statsample 0.6.4 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,12 @@
1
+ === 0.6.5 / 2010-02-24
2
+ * Bug fix on test: Use tempfile instead of tempdir
3
+ * Multiple Regression: Calculation of constant standard error , using covariance matrix.
4
+ * Calculation of R^2_yx and P^2_yx for Regresion on Multiple Dependents variables
5
+ * Dominance Analysis could use Correlation or Covariance Matrix as input.
6
+ * Dominance Analysis extension to multiple dependent variables (Azen & Budescu, 2006)
7
+ * Two-step estimate of Polychoric correlation uses minimization gem, so could be executed without rb-gsl
8
+
9
+
1
10
  === 0.6.4 / 2010-02-19
2
11
  * Dominance Analysis and Dominance Analysis Bootstrap allows multivariate dependent analysis.
3
12
  * Test suite for Dominance Analysis, using Azen and Budescu papers as references
data/README.txt CHANGED
@@ -3,9 +3,11 @@
3
3
  http://ruby-statsample.rubyforge.org/
4
4
 
5
5
 
6
- == FEATURES:
6
+ == DESCRIPTION:
7
+
8
+ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
7
9
 
8
- A suite for basic and advanced statistics. Includes:
10
+ Includes:
9
11
  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
10
12
  * Imports and exports datasets from and to Excel, CSV and plain text files.
11
13
  * Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
@@ -14,7 +16,7 @@ A suite for basic and advanced statistics. Includes:
14
16
  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
15
17
  * Sample calculation related formulas
16
18
 
17
- == DETAILED FEATURES:
19
+ == FEATURES:
18
20
 
19
21
  * Factorial Analysis. Principal Component Analysis and Principal Axis extraction, with orthogonal rotations (Varimax, Equimax, Quartimax)
20
22
  * Multiple Regression. Listwise analysis optimized with use of Alglib library. Pairwise analysis is executed on pure ruby with matrixes and reports same values as SPSS
@@ -66,7 +68,9 @@ A suite for basic and advanced statistics. Includes:
66
68
  Optional:
67
69
 
68
70
  * Plotting: gnuplot and rbgnuplot, SVG::Graph
69
- * Factorial analysis and polychorical correlation: gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt>
71
+ * Factorial analysis and polychorical correlation(joint estimate and polychoric series): gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt>.
72
+
73
+ <b>Note</b>: Use gsl 1.12.109 or later.
70
74
 
71
75
  == DOWNLOAD
72
76
  * Gems and bugs report: http://rubyforge.org/projects/ruby-statsample/
data/demo/polychoric.rb CHANGED
@@ -5,10 +5,9 @@ require 'statsample'
5
5
  ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
6
6
 
7
7
  # Estimation of polychoric correlation using two-step (default)
8
- poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step")
8
+ poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step", :debug=>true)
9
9
  puts poly.summary
10
10
 
11
-
12
11
  # Estimation of polychoric correlation using joint method (slow)
13
12
  poly=Statsample::Bivariate::Polychoric.new(ct, :method=>:joint, :name=>"Polychoric with joint")
14
13
  puts poly.summary
@@ -19,10 +19,9 @@ module Distribution
19
19
  # Probability density function for a given x, y and rho value.
20
20
  #
21
21
  # Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
22
- def pdf(x,y, rho, sigma1=1.0, sigma2=1.0)
23
- (1.quo(2 * Math::PI * sigma1*sigma2 * Math::sqrt( 1 - rho**2 ))) *
24
- Math::exp(-(1.quo(2*(1-rho**2))) *
25
- ((x**2/sigma1) + (y**2/sigma2) - (2*rho*x*y).quo(sigma1*sigma2) ))
22
+ def pdf(x,y, rho, s1=1.0, s2=1.0)
23
+ 1.quo(2 * Math::PI * s1 * s2 * Math::sqrt( 1 - rho**2 )) * (Math::exp(-(1.quo(2*(1-rho**2))) *
24
+ ((x**2.quo(s1)) + (y**2.quo(s2)) - (2*rho*x*y).quo(s1*s2))))
26
25
  end
27
26
 
28
27
  def f(x,y,aprime,bprime,rho)
@@ -147,7 +146,8 @@ module Distribution
147
146
  r=rho
148
147
  twopi = 6.283185307179586
149
148
 
150
- w=11.times.collect {[nil]*4}; x=11.times.collect {[nil]*4}
149
+ w=11.times.collect {[nil]*4};
150
+ x=11.times.collect {[nil]*4}
151
151
 
152
152
  data=[
153
153
  0.1713244923791705E+00, -0.9324695142031522E+00,
data/lib/statsample.rb CHANGED
@@ -109,7 +109,7 @@ end
109
109
  # * Dataset: An union of vectors.
110
110
  #
111
111
  module Statsample
112
- VERSION = '0.6.4'
112
+ VERSION = '0.6.5'
113
113
  SPLIT_TOKEN = ","
114
114
  autoload(:Database, 'statsample/converters')
115
115
  autoload(:Anova, 'statsample/anova')
@@ -1,3 +1,4 @@
1
+ require 'minimization'
1
2
  module Statsample
2
3
  module Bivariate
3
4
  # Calculate Polychoric correlation for two vectors.
@@ -79,7 +80,7 @@ module Statsample
79
80
 
80
81
  METHOD=:two_step
81
82
  MAX_ITERATIONS=300
82
- EPSILON=0.000001
83
+ EPSILON=1e-6
83
84
  MINIMIZER_TYPE_TWO_STEP="brent"
84
85
  MINIMIZER_TYPE_JOINT="nmsimplex"
85
86
  def new_with_vectors(v1,v2)
@@ -184,6 +185,52 @@ module Statsample
184
185
  def chi_square_df
185
186
  (@nr*@nc)-@nc-@nr
186
187
  end
188
+
189
+ def loglike_fd_rho(alpha,beta,rho)
190
+ if rho.abs>0.9999
191
+ rho= (rho>0) ? 0.9999 : -0.9999
192
+ end
193
+ #puts "rho: #{rho}"
194
+
195
+ loglike=0
196
+ pd=@nr.times.collect{ [0]*@nc}
197
+ pc=@nr.times.collect{ [0]*@nc}
198
+ @nr.times { |i|
199
+ @nc.times { |j|
200
+ if i==@nr-1 and j==@nc-1
201
+ pd[i][j]=1.0
202
+ a=100
203
+ b=100
204
+ else
205
+ a=(i==@nr-1) ? 100: alpha[i]
206
+ b=(j==@nc-1) ? 100: beta[j]
207
+ pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
208
+ end
209
+ pc[i][j] = pd[i][j]
210
+ pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
211
+ pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
212
+ pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
213
+
214
+ pij= pd[i][j]+EPSILON
215
+ if i==0
216
+ alpha_m1=-10
217
+ else
218
+ alpha_m1=alpha[i-1]
219
+ end
220
+
221
+ if j==0
222
+ beta_m1=-10
223
+ else
224
+ beta_m1=beta[j-1]
225
+ end
226
+
227
+ loglike+= (@matrix[i,j].quo(pij))*(Distribution::NormalBivariate.pdf(a,b,rho) - Distribution::NormalBivariate.pdf(alpha_m1, b,rho) - Distribution::NormalBivariate.pdf(a, beta_m1,rho) + Distribution::NormalBivariate.pdf(alpha_m1, beta_m1,rho) )
228
+
229
+ }
230
+ }
231
+ #puts "derivative: #{loglike}"
232
+ -loglike
233
+ end
187
234
  def loglike(alpha,beta,rho)
188
235
  if rho.abs>0.9999
189
236
  rho= (rho>0) ? 0.9999 : -0.9999
@@ -249,6 +296,8 @@ module Statsample
249
296
  ac=@sumcac[i]
250
297
  end
251
298
  end
299
+
300
+
252
301
  # Computation of polychoric correlation usign two-step ML estimation.
253
302
  #
254
303
  # Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
@@ -260,6 +309,34 @@ module Statsample
260
309
  # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
261
310
  #
262
311
  def compute_two_step_mle_drasgow
312
+ if HAS_GSL
313
+ compute_two_step_mle_drasgow_gsl
314
+ else
315
+ compute_two_step_mle_drasgow_ruby
316
+ end
317
+ end
318
+
319
+ # Depends on minimization algorithm.
320
+
321
+ def compute_two_step_mle_drasgow_ruby #:nodoc:
322
+
323
+ f=proc {|rho|
324
+ loglike(@alpha,@beta, rho)
325
+ }
326
+ @log="Minimizing using GSL Brent method\n"
327
+ min=Minimization::Brent.new(-0.9999,0.9999,f)
328
+ min.epsilon=@epsilon
329
+ min.expected=0
330
+ min.iterate
331
+ @log+=min.log
332
+ @r=min.x_minimum
333
+ @loglike_model=-min.f_minimum
334
+ puts @log if @debug
335
+
336
+ end
337
+
338
+
339
+ def compute_two_step_mle_drasgow_gsl #:nodoc:
263
340
 
264
341
  fn1=GSL::Function.alloc {|rho|
265
342
  loglike(@alpha,@beta, rho)
@@ -86,7 +86,7 @@ module Statsample
86
86
  end
87
87
  }
88
88
  =end
89
- raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.count>0
89
+ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.size>0
90
90
  fields=row.to_a.collect{|c| c.downcase}
91
91
  fields.recode_repeated
92
92
  end
@@ -112,6 +112,7 @@ module Statsample
112
112
  @method_association=:r2
113
113
 
114
114
  end
115
+ @name=nil
115
116
  opts.each{|k,v|
116
117
  self.send("#{k}=",v) if self.respond_to? k
117
118
  }
@@ -133,7 +134,7 @@ module Statsample
133
134
  raise ArgumentError.new("You should use a Matrix or a Dataset")
134
135
  end
135
136
  @models=nil
136
-
137
+ @models_data=nil
137
138
  end
138
139
  # Compute models.
139
140
  def compute
@@ -227,7 +228,7 @@ module Statsample
227
228
  dominances.push(0)
228
229
  else
229
230
  return 0.5
230
- dominances.push(0.5)
231
+ #dominances.push(0.5)
231
232
  end
232
233
  end
233
234
  final=dominances.uniq
@@ -125,7 +125,7 @@ module Statsample
125
125
  out={}
126
126
  mse=sse.quo(df_e)
127
127
  coeffs.each {|k,v|
128
- out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
128
+ out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
129
129
  }
130
130
  out
131
131
  end
@@ -35,22 +35,36 @@ class MatrixEngine < BaseEngine
35
35
  def initialize(matrix,y_var, opts=Hash.new)
36
36
  matrix.extend Statsample::CovariateMatrix
37
37
  raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
38
-
39
- @matrix_cor=matrix.correlation
38
+ if matrix.type==:covariance
39
+ @matrix_cov=matrix
40
+ @matrix_cor=matrix.correlation
41
+ @no_covariance=false
42
+ else
43
+ @matrix_cor=matrix
44
+ @matrix_cov=matrix
45
+ @no_covariance=true
46
+ end
40
47
 
41
48
  @y_var=y_var
42
49
  @fields=matrix.fields-[y_var]
50
+
43
51
  @n_predictors=@fields.size
44
- @matrix=matrix
45
- @matrix_x= matrix.submatrix(@fields)
46
- @matrix_y = matrix.submatrix(@fields, [y_var])
47
- @matrix_y_cor=@matrix_cor.submatrix(@fields, [y_var])
48
- @result_matrix=@matrix_x.inverse * @matrix_y
49
- @y_sd=Math::sqrt(@matrix.submatrix([y_var])[0,0])
50
- @x_sd=@matrix_x.row_size.times.inject({}) {|ac,i|
51
- ac[@matrix_x.fields[i]]=Math::sqrt(@matrix_x[i,i])
52
+
53
+ @matrix_x= @matrix_cor.submatrix(@fields)
54
+ @matrix_x_cov= @matrix_cov.submatrix(@fields)
55
+
56
+ @matrix_y = @matrix_cor.submatrix(@fields, [y_var])
57
+ @matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
58
+
59
+
60
+
61
+ @y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
62
+
63
+ @x_sd=@n_predictors.times.inject({}) {|ac,i|
64
+ ac[@matrix_x_cov.fields[i]]=Math::sqrt(@matrix_x_cov[i,i])
52
65
  ac;
53
66
  }
67
+
54
68
  @cases=nil
55
69
  @x_mean=@fields.inject({}) {|ac,f|
56
70
  ac[f]=0.0
@@ -64,13 +78,15 @@ class MatrixEngine < BaseEngine
64
78
  opts.each{|k,v|
65
79
  self.send("#{k}=",v) if self.respond_to? k
66
80
  }
81
+ result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
82
+
67
83
  if matrix.type==:covariance
68
- @coeffs=@result_matrix.column(0).to_a
84
+ @coeffs=result_matrix.column(0).to_a
69
85
  @coeffs_stan=coeffs.collect {|k,v|
70
86
  coeffs[k]*@x_sd[k].quo(@y_sd)
71
87
  }
72
88
  else
73
- @coeffs_stan=@result_matrix.column(0).to_a
89
+ @coeffs_stan=result_matrix.column(0).to_a
74
90
 
75
91
  @coeffs=standarized_coeffs.collect {|k,v|
76
92
  standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
@@ -87,8 +103,7 @@ class MatrixEngine < BaseEngine
87
103
  # * 1-(|R| / |R_x|) or
88
104
  # * Sum(b_i*r_yi)
89
105
  def r2
90
- @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y_cor[i,0]}
91
- #1-(@matrix.correlation.determinant.quo(@matrix_x.correlation.determinant))
106
+ @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
92
107
  end
93
108
  def r
94
109
  Math::sqrt(r2)
@@ -141,9 +156,36 @@ class MatrixEngine < BaseEngine
141
156
  }
142
157
  out
143
158
  end
144
- # Standard error for constant
159
+ def constant_t
160
+ return nil if constant_se.nil?
161
+ constant.to_f/constant_se
162
+ end
163
+ # Standard error for constant.
164
+ # Recreate the estimaded variance-covariance matrix
165
+ # using means, standard deviation and covariance matrix
145
166
  def constant_se
146
- nil
167
+ return nil if @no_covariance
168
+ means=@x_mean
169
+ #means[@y_var]=@y_mean
170
+ means[:constant]=1
171
+ sd=@x_sd
172
+ #sd[@y_var]=@y_sd
173
+ sd[:constant]=0
174
+ fields=[:constant]+@matrix_cov.fields-[@y_var]
175
+ xt_x=Matrix.rows(fields.collect {|i|
176
+ fields.collect {|j|
177
+ if i==:constant or j==:constant
178
+ cov=0
179
+ elsif i==j
180
+ cov=sd[i]**2
181
+ else
182
+ cov=@matrix_cov.submatrix(i..i,j..j)[0,0]
183
+ end
184
+ cov*(@cases-1)+@cases*means[i]*means[j]
185
+ }
186
+ })
187
+ matrix=xt_x.inverse * mse
188
+ matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
147
189
  end
148
190
 
149
191
  def to_reportbuilder(generator)
@@ -25,47 +25,74 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
25
25
  end
26
26
  end
27
27
  end
28
+ def test_poly_vs_tetra
29
+ 10.times {
30
+ # Should be the same results as Tetrachoric for 2x2 matrix
31
+ matrix=Matrix[[150+rand(10),1000+rand(20)],[1000+rand(20),200+rand(20)]]
32
+ tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
33
+ poly = Statsample::Bivariate::Polychoric.new(matrix)
34
+ poly.compute_two_step_mle_drasgow_ruby
35
+ assert_in_delta(tetra.r,poly.r,0.0001)
36
+ if HAS_GSL
37
+ poly.compute_two_step_mle_drasgow_gsl
38
+ assert_in_delta(tetra.r,poly.r,0.0001)
39
+ end
40
+ }
41
+ end
28
42
  def test_polychoric
29
- # Should be the same results as Tetrachoric for 2x2 matrix
30
-
31
- matrix=Matrix[[rand(100)+10,rand(100)+10],[rand(100)+10,rand(100)+10]]
32
- tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
33
- poly = Statsample::Bivariate::Polychoric.new(matrix)
34
- assert_in_delta(tetra.r,poly.r,0.0001)
35
-
36
- # Example for Tallis(1962, cited by Drasgow, 2006)
37
-
38
- matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
39
- poly=Statsample::Bivariate::Polychoric.new(matrix)
40
- poly.method=:two_step
41
- poly.compute
42
- assert_in_delta(0.420, poly.r, 0.001)
43
- assert_in_delta(-0.240, poly.threshold_y[0],0.001)
44
- assert_in_delta(-0.027, poly.threshold_x[0],0.001)
45
- assert_in_delta(1.578, poly.threshold_y[1],0.001)
46
- assert_in_delta(1.137, poly.threshold_x[1],0.001)
47
-
48
-
49
- poly.method=:polychoric_series
50
- poly.compute
51
43
 
52
- assert_in_delta(0.556, poly.r, 0.001)
53
- assert_in_delta(-0.240, poly.threshold_y[0],0.001)
54
- assert_in_delta(-0.027, poly.threshold_x[0],0.001)
55
- assert_in_delta(1.578, poly.threshold_y[1],0.001)
56
- assert_in_delta(1.137, poly.threshold_x[1],0.001)
57
-
58
-
59
- poly.method=:joint
60
- poly.compute
61
-
62
-
63
- assert_in_delta(0.4192, poly.r, 0.0001)
64
- assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
65
- assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
66
- assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
67
- assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
44
+ matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
45
+ poly=Statsample::Bivariate::Polychoric.new(matrix)
46
+ poly.compute_two_step_mle_drasgow_ruby
47
+ assert_in_delta(0.420, poly.r, 0.001)
48
+ assert_in_delta(-0.240, poly.threshold_y[0],0.001)
49
+ assert_in_delta(-0.027, poly.threshold_x[0],0.001)
50
+ assert_in_delta(1.578, poly.threshold_y[1],0.001)
51
+ assert_in_delta(1.137, poly.threshold_x[1],0.001)
52
+
53
+
68
54
 
55
+
56
+
57
+
58
+
59
+
60
+
61
+
62
+ if HAS_GSL
63
+ poly.method=:polychoric_series
64
+ poly.compute
65
+
66
+ assert_in_delta(0.556, poly.r, 0.001)
67
+ assert_in_delta(-0.240, poly.threshold_y[0],0.001)
68
+ assert_in_delta(-0.027, poly.threshold_x[0],0.001)
69
+ assert_in_delta(1.578, poly.threshold_y[1],0.001)
70
+ assert_in_delta(1.137, poly.threshold_x[1],0.001)
71
+
72
+ # Example for Tallis(1962, cited by Drasgow, 2006)
73
+
74
+ matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
75
+ poly=Statsample::Bivariate::Polychoric.new(matrix)
76
+ poly.compute_two_step_mle_drasgow_gsl
77
+ assert_in_delta(0.420, poly.r, 0.001)
78
+ assert_in_delta(-0.240, poly.threshold_y[0],0.001)
79
+ assert_in_delta(-0.027, poly.threshold_x[0],0.001)
80
+ assert_in_delta(1.578, poly.threshold_y[1],0.001)
81
+ assert_in_delta(1.137, poly.threshold_x[1],0.001)
82
+
83
+
84
+ poly.method=:joint
85
+ poly.compute
86
+
87
+
88
+ assert_in_delta(0.4192, poly.r, 0.0001)
89
+ assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
90
+ assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
91
+ assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
92
+ assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
93
+ else
94
+ puts "Two-step optimized, polychoric series and Joint method for Polychoric requires GSL"
95
+ end
69
96
  end
70
97
  def test_tetrachoric
71
98
  a,b,c,d=0,0,0,0
data/test/test_csv.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
- require 'tmpdir'
3
+ require "tempfile"
4
4
  require 'test/unit'
5
5
 
6
6
  class StatsampleCSVTestCase < Test::Unit::TestCase
@@ -33,9 +33,10 @@ class StatsampleCSVTestCase < Test::Unit::TestCase
33
33
  assert_equal(age,ds['age_2'])
34
34
  end
35
35
  def test_write
36
- filename=Dir::tmpdir+"/test_write.csv"
37
- Statsample::CSV.write(@ds,filename)
38
- ds2=Statsample::CSV.read(filename)
36
+ filename=Tempfile.new("afile")
37
+ # filename=Dir::tmpdir+"/test_write.csv"
38
+ Statsample::CSV.write(@ds, filename.path)
39
+ ds2=Statsample::CSV.read(filename.path)
39
40
  i=0
40
41
  ds2.each_array{|row|
41
42
  assert_equal(@ds.case_as_array(i),row)
data/test/test_dataset.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'test/unit'
4
- require 'tmpdir'
4
+ require 'tempfile'
5
5
  class StatsampleDatasetTestCase < Test::Unit::TestCase
6
6
  def setup
7
7
  @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
@@ -13,9 +13,9 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
13
13
  assert_equal(%w{id name age city a1}, @ds.fields)
14
14
  end
15
15
  def test_saveload
16
- outfile=Dir::tmpdir+"/dataset.ds"
17
- @ds.save(outfile)
18
- a=Statsample.load(outfile)
16
+ outfile=Tempfile.new("/dataset.ds")
17
+ @ds.save(outfile.path)
18
+ a=Statsample.load(outfile.path)
19
19
  assert_equal(@ds,a)
20
20
  end
21
21
 
data/test/test_factor.rb CHANGED
@@ -3,35 +3,43 @@ require 'statsample'
3
3
  require 'test/unit'
4
4
  class StatsampleFactorTestCase < Test::Unit::TestCase
5
5
  def test_pca
6
- require 'gsl'
7
- a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
8
- b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
9
- a.recode! {|c| c-a.mean}
10
- b.recode! {|c| c-b.mean}
11
- ds={'a'=>a,'b'=>b}.to_dataset
12
- cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
13
- pca=Statsample::Factor::PCA.new(cov_matrix)
14
- expected_eigenvalues=[1.284, 0.0490]
15
- expected_eigenvalues.each_with_index{|ev,i|
16
- assert_in_delta(ev,pca.eigenvalues[i],0.001)
17
- }
18
- expected_fm_1=GSL::Matrix[[0.677], [0.735]]
19
- expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
20
- _test_matrix(expected_fm_1,pca.feature_vector(1))
21
- _test_matrix(expected_fm_2,pca.feature_vector(2))
6
+ if HAS_GSL
7
+ require 'gsl'
8
+ a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
9
+ b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
10
+ a.recode! {|c| c-a.mean}
11
+ b.recode! {|c| c-b.mean}
12
+ ds={'a'=>a,'b'=>b}.to_dataset
13
+ cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
14
+ pca=Statsample::Factor::PCA.new(cov_matrix)
15
+ expected_eigenvalues=[1.284, 0.0490]
16
+ expected_eigenvalues.each_with_index{|ev,i|
17
+ assert_in_delta(ev,pca.eigenvalues[i],0.001)
18
+ }
19
+ expected_fm_1=GSL::Matrix[[0.677], [0.735]]
20
+ expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
21
+ _test_matrix(expected_fm_1,pca.feature_vector(1))
22
+ _test_matrix(expected_fm_2,pca.feature_vector(2))
23
+ else
24
+ puts "PCA not tested. Requires GSL"
25
+ end
22
26
  end
23
27
  def test_rotation_varimax
24
- a = Matrix[ [ 0.4320, 0.8129, 0.3872] ,
25
- [0.7950, -0.5416, 0.2565] ,
26
- [0.5944, 0.7234, -0.3441],
27
- [0.8945, -0.3921, -0.1863] ]
28
- expected= Matrix[[-0.0204423, 0.938674, -0.340334],
29
- [0.983662, 0.0730206, 0.134997],
30
- [0.0826106, 0.435975, -0.893379],
31
- [0.939901, -0.0965213, -0.309596]].to_gsl
32
- varimax=Statsample::Factor::Varimax.new(a)
33
- varimax.iterate
34
- _test_matrix(expected,varimax.rotated)
28
+ if HAS_GSL
29
+ a = Matrix[ [ 0.4320, 0.8129, 0.3872] ,
30
+ [0.7950, -0.5416, 0.2565] ,
31
+ [0.5944, 0.7234, -0.3441],
32
+ [0.8945, -0.3921, -0.1863] ]
33
+ expected= Matrix[[-0.0204423, 0.938674, -0.340334],
34
+ [0.983662, 0.0730206, 0.134997],
35
+ [0.0826106, 0.435975, -0.893379],
36
+ [0.939901, -0.0965213, -0.309596]].to_gsl
37
+ varimax=Statsample::Factor::Varimax.new(a)
38
+ varimax.iterate
39
+ _test_matrix(expected,varimax.rotated)
40
+ else
41
+ puts "Rotation not tested. Requires GSL"
42
+ end
35
43
  end
36
44
  def _test_matrix(a,b)
37
45
  a.size1.times {|i|
data/test/test_matrix.rb CHANGED
@@ -19,7 +19,6 @@ class StatsampleMatrixTestCase < Test::Unit::TestCase
19
19
  assert_equal(6,matrix.row_sum[0])
20
20
  assert_equal(12,matrix.column_sum[0])
21
21
  assert_equal(45,matrix.total_sum)
22
- m=matrix.to_gsl
23
22
  end
24
23
  def test_covariate
25
24
  a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
@@ -158,12 +158,14 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
158
158
 
159
159
  cor=Statsample::Bivariate.correlation_matrix(ds)
160
160
  lr=Statsample::Regression::Multiple::MatrixEngine.new(cor,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size, :y_sd=>@y.sd , :x_sd=>{'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd})
161
-
161
+ assert_nil(lr.constant_se)
162
+ assert_nil(lr.constant_t)
162
163
  model_test_matrix(lr, "correlation matrix")
163
164
 
164
165
  covariance=Statsample::Bivariate.covariance_matrix(ds)
165
166
  lr=Statsample::Regression::Multiple::MatrixEngine.new(covariance,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size)
166
- model_test_matrix(lr , "covariance matrix")
167
+
168
+ model_test(lr , "covariance matrix")
167
169
  end
168
170
  def test_regression_rubyengine
169
171
  @a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
@@ -2,6 +2,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'tmpdir'
4
4
  require 'tempfile'
5
+ require 'tempfile'
5
6
  require 'fileutils'
6
7
  require 'test/unit'
7
8
  begin
@@ -19,12 +20,10 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
19
20
  rand(10)
20
21
  }.to_vector(:scale)
21
22
  h=ar.histogram([0,2,5,11])
22
- file=@image_path+"/svg_histogram_only.svg"
23
+ file=Tempfile.new("svg_histogram_only.svg")
23
24
  graph = Statsample::Graph::SvgHistogram.new({})
24
25
  graph.histogram=h
25
- File.open(file,"w") {|f|
26
- f.puts(graph.burn)
27
- }
26
+ file.puts(graph.burn)
28
27
  else
29
28
  puts "Statsample::Graph::SvgHistogram.new not tested (no ruby-gsl)"
30
29
  end
@@ -36,20 +35,20 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
36
35
  ar.push(rand(10))
37
36
  }
38
37
  vector=ar.to_vector
39
- file=@image_path+"/svggraph_default.svg"
38
+ file=Tempfile.new("svggraph_default.svg").path
40
39
  vector.svggraph_frequencies(file)
41
- file=@image_path+"/svggraph_Bar.svg"
40
+ file=Tempfile.new("svggraph_bar.svg").path
42
41
  vector.svggraph_frequencies(file,800,600,SVG::Graph::Bar,:graph_title=>'Bar')
43
42
  assert(File.exists?(file))
44
- file=@image_path+"/svggraph_BarHorizontal.svg"
43
+ file=Tempfile.new("svggraph_bar_horizontal.svg").path
45
44
  vector.svggraph_frequencies(file,800,600,SVG::Graph::BarHorizontalNoOp,:graph_title=>'Horizontal Bar')
46
45
  assert(File.exists?(file))
47
- file=@image_path+"/svggraph_Pie.svg"
46
+ file=Tempfile.new("svggraph_pie.svg").path
48
47
  vector.svggraph_frequencies(file,800,600,SVG::Graph::PieNoOp,:graph_title=>'Pie')
49
48
  assert(File.exists?(file))
50
49
  vector.type=:scale
51
50
  if HAS_GSL
52
- file=@image_path+"/svggraph_histogram.svg"
51
+ file=Tempfile.new("svg_histogram.svg").path
53
52
  hist=vector.svggraph_histogram(5)
54
53
  File.open(file,"wb") {|fp|
55
54
  fp.write(hist.burn)
data/test/test_vector.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'test/unit'
4
- require 'tmpdir'
4
+ require 'tempfile'
5
5
  class TestStatsample
6
6
  end
7
7
  class TestStatsample::TestVector < Test::Unit::TestCase
@@ -12,9 +12,9 @@ class TestStatsample::TestVector < Test::Unit::TestCase
12
12
 
13
13
  end
14
14
  def test_save_load
15
- outfile=Dir::tmpdir+"/vector.vec"
16
- @c.save(outfile)
17
- a=Statsample.load(outfile)
15
+ outfile=Tempfile.new("vector.vec")
16
+ @c.save(outfile.path)
17
+ a=Statsample.load(outfile.path)
18
18
  assert_equal(@c,a)
19
19
 
20
20
  end
data/test/test_xls.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'test/unit'
4
- require 'tmpdir'
4
+ require 'tempfile'
5
5
  begin
6
6
  require 'spreadsheet'
7
7
  rescue LoadError
@@ -30,9 +30,9 @@ class StatsampleExcelTestCase < Test::Unit::TestCase
30
30
  assert_equal(nil,@ds['age'][5])
31
31
  end
32
32
  def test_write
33
- filename=Dir::tmpdir+"/test_write.xls"
34
- Statsample::Excel.write(@ds,filename)
35
- ds2=Statsample::Excel.read(filename)
33
+ tempfile=Tempfile.new("test_write.xls")
34
+ Statsample::Excel.write(@ds,tempfile.path)
35
+ ds2=Statsample::Excel.read(tempfile.path)
36
36
  i=0
37
37
  ds2.each_array{|row|
38
38
  assert_equal(@ds.case_as_array(i),row)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsample
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.4
4
+ version: 0.6.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-02-19 00:00:00 -03:00
12
+ date: 2010-02-24 00:00:00 -03:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -43,24 +43,14 @@ dependencies:
43
43
  version: 0.2.0
44
44
  version:
45
45
  - !ruby/object:Gem::Dependency
46
- name: rubyforge
47
- type: :development
48
- version_requirement:
49
- version_requirements: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - ">="
52
- - !ruby/object:Gem::Version
53
- version: 2.0.3
54
- version:
55
- - !ruby/object:Gem::Dependency
56
- name: gemcutter
57
- type: :development
46
+ name: minimization
47
+ type: :runtime
58
48
  version_requirement:
59
49
  version_requirements: !ruby/object:Gem::Requirement
60
50
  requirements:
61
51
  - - ">="
62
52
  - !ruby/object:Gem::Version
63
- version: 0.3.0
53
+ version: 0.1.0
64
54
  version:
65
55
  - !ruby/object:Gem::Dependency
66
56
  name: hoe
@@ -70,9 +60,19 @@ dependencies:
70
60
  requirements:
71
61
  - - ">="
72
62
  - !ruby/object:Gem::Version
73
- version: 2.5.0
63
+ version: 2.4.0
74
64
  version:
75
- description: ""
65
+ description: |-
66
+ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
67
+
68
+ Includes:
69
+ * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
70
+ * Imports and exports datasets from and to Excel, CSV and plain text files.
71
+ * Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
72
+ * Regression: Simple, Multiple, Probit and Logit
73
+ * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
74
+ * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
75
+ * Sample calculation related formulas
76
76
  email:
77
77
  - clbustos@gmail.com
78
78
  executables:
@@ -221,7 +221,7 @@ rubyforge_project: ruby-statsample
221
221
  rubygems_version: 1.3.5
222
222
  signing_key:
223
223
  specification_version: 3
224
- summary: ""
224
+ summary: A suite for basic and advanced statistics on Ruby
225
225
  test_files:
226
226
  - test/test_bivariate.rb
227
227
  - test/test_dominance_analysis.rb