statsample 0.6.4 → 0.6.5
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +9 -0
- data/README.txt +8 -4
- data/demo/polychoric.rb +1 -2
- data/lib/distribution/normalbivariate.rb +5 -5
- data/lib/statsample.rb +1 -1
- data/lib/statsample/bivariate/polychoric.rb +78 -1
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dominanceanalysis.rb +3 -2
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +58 -16
- data/test/test_bivariate.rb +65 -38
- data/test/test_csv.rb +5 -4
- data/test/test_dataset.rb +4 -4
- data/test/test_factor.rb +35 -27
- data/test/test_matrix.rb +0 -1
- data/test/test_regression.rb +4 -2
- data/test/test_svg_graph.rb +8 -9
- data/test/test_vector.rb +4 -4
- data/test/test_xls.rb +4 -4
- metadata +18 -18
data/History.txt
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
=== 0.6.5 / 2010-02-24
|
2
|
+
* Bug fix on test: Use tempfile instead of tempdir
|
3
|
+
* Multiple Regression: Calculation of constant standard error , using covariance matrix.
|
4
|
+
* Calculation of R^2_yx and P^2_yx for Regresion on Multiple Dependents variables
|
5
|
+
* Dominance Analysis could use Correlation or Covariance Matrix as input.
|
6
|
+
* Dominance Analysis extension to multiple dependent variables (Azen & Budescu, 2006)
|
7
|
+
* Two-step estimate of Polychoric correlation uses minimization gem, so could be executed without rb-gsl
|
8
|
+
|
9
|
+
|
1
10
|
=== 0.6.4 / 2010-02-19
|
2
11
|
* Dominance Analysis and Dominance Analysis Bootstrap allows multivariate dependent analysis.
|
3
12
|
* Test suite for Dominance Analysis, using Azen and Budescu papers as references
|
data/README.txt
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
http://ruby-statsample.rubyforge.org/
|
4
4
|
|
5
5
|
|
6
|
-
==
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
|
7
9
|
|
8
|
-
|
10
|
+
Includes:
|
9
11
|
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
10
12
|
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
11
13
|
* Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
|
@@ -14,7 +16,7 @@ A suite for basic and advanced statistics. Includes:
|
|
14
16
|
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
15
17
|
* Sample calculation related formulas
|
16
18
|
|
17
|
-
==
|
19
|
+
== FEATURES:
|
18
20
|
|
19
21
|
* Factorial Analysis. Principal Component Analysis and Principal Axis extraction, with orthogonal rotations (Varimax, Equimax, Quartimax)
|
20
22
|
* Multiple Regression. Listwise analysis optimized with use of Alglib library. Pairwise analysis is executed on pure ruby with matrixes and reports same values as SPSS
|
@@ -66,7 +68,9 @@ A suite for basic and advanced statistics. Includes:
|
|
66
68
|
Optional:
|
67
69
|
|
68
70
|
* Plotting: gnuplot and rbgnuplot, SVG::Graph
|
69
|
-
* Factorial analysis and polychorical correlation: gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt
|
71
|
+
* Factorial analysis and polychorical correlation(joint estimate and polychoric series): gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt>.
|
72
|
+
|
73
|
+
<b>Note</b>: Use gsl 1.12.109 or later.
|
70
74
|
|
71
75
|
== DOWNLOAD
|
72
76
|
* Gems and bugs report: http://rubyforge.org/projects/ruby-statsample/
|
data/demo/polychoric.rb
CHANGED
@@ -5,10 +5,9 @@ require 'statsample'
|
|
5
5
|
ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
6
6
|
|
7
7
|
# Estimation of polychoric correlation using two-step (default)
|
8
|
-
poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step")
|
8
|
+
poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step", :debug=>true)
|
9
9
|
puts poly.summary
|
10
10
|
|
11
|
-
|
12
11
|
# Estimation of polychoric correlation using joint method (slow)
|
13
12
|
poly=Statsample::Bivariate::Polychoric.new(ct, :method=>:joint, :name=>"Polychoric with joint")
|
14
13
|
puts poly.summary
|
@@ -19,10 +19,9 @@ module Distribution
|
|
19
19
|
# Probability density function for a given x, y and rho value.
|
20
20
|
#
|
21
21
|
# Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
22
|
-
def pdf(x,y, rho,
|
23
|
-
|
24
|
-
|
25
|
-
((x**2/sigma1) + (y**2/sigma2) - (2*rho*x*y).quo(sigma1*sigma2) ))
|
22
|
+
def pdf(x,y, rho, s1=1.0, s2=1.0)
|
23
|
+
1.quo(2 * Math::PI * s1 * s2 * Math::sqrt( 1 - rho**2 )) * (Math::exp(-(1.quo(2*(1-rho**2))) *
|
24
|
+
((x**2.quo(s1)) + (y**2.quo(s2)) - (2*rho*x*y).quo(s1*s2))))
|
26
25
|
end
|
27
26
|
|
28
27
|
def f(x,y,aprime,bprime,rho)
|
@@ -147,7 +146,8 @@ module Distribution
|
|
147
146
|
r=rho
|
148
147
|
twopi = 6.283185307179586
|
149
148
|
|
150
|
-
w=11.times.collect {[nil]*4};
|
149
|
+
w=11.times.collect {[nil]*4};
|
150
|
+
x=11.times.collect {[nil]*4}
|
151
151
|
|
152
152
|
data=[
|
153
153
|
0.1713244923791705E+00, -0.9324695142031522E+00,
|
data/lib/statsample.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'minimization'
|
1
2
|
module Statsample
|
2
3
|
module Bivariate
|
3
4
|
# Calculate Polychoric correlation for two vectors.
|
@@ -79,7 +80,7 @@ module Statsample
|
|
79
80
|
|
80
81
|
METHOD=:two_step
|
81
82
|
MAX_ITERATIONS=300
|
82
|
-
EPSILON=
|
83
|
+
EPSILON=1e-6
|
83
84
|
MINIMIZER_TYPE_TWO_STEP="brent"
|
84
85
|
MINIMIZER_TYPE_JOINT="nmsimplex"
|
85
86
|
def new_with_vectors(v1,v2)
|
@@ -184,6 +185,52 @@ module Statsample
|
|
184
185
|
def chi_square_df
|
185
186
|
(@nr*@nc)-@nc-@nr
|
186
187
|
end
|
188
|
+
|
189
|
+
def loglike_fd_rho(alpha,beta,rho)
|
190
|
+
if rho.abs>0.9999
|
191
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
192
|
+
end
|
193
|
+
#puts "rho: #{rho}"
|
194
|
+
|
195
|
+
loglike=0
|
196
|
+
pd=@nr.times.collect{ [0]*@nc}
|
197
|
+
pc=@nr.times.collect{ [0]*@nc}
|
198
|
+
@nr.times { |i|
|
199
|
+
@nc.times { |j|
|
200
|
+
if i==@nr-1 and j==@nc-1
|
201
|
+
pd[i][j]=1.0
|
202
|
+
a=100
|
203
|
+
b=100
|
204
|
+
else
|
205
|
+
a=(i==@nr-1) ? 100: alpha[i]
|
206
|
+
b=(j==@nc-1) ? 100: beta[j]
|
207
|
+
pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
208
|
+
end
|
209
|
+
pc[i][j] = pd[i][j]
|
210
|
+
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
211
|
+
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
212
|
+
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
213
|
+
|
214
|
+
pij= pd[i][j]+EPSILON
|
215
|
+
if i==0
|
216
|
+
alpha_m1=-10
|
217
|
+
else
|
218
|
+
alpha_m1=alpha[i-1]
|
219
|
+
end
|
220
|
+
|
221
|
+
if j==0
|
222
|
+
beta_m1=-10
|
223
|
+
else
|
224
|
+
beta_m1=beta[j-1]
|
225
|
+
end
|
226
|
+
|
227
|
+
loglike+= (@matrix[i,j].quo(pij))*(Distribution::NormalBivariate.pdf(a,b,rho) - Distribution::NormalBivariate.pdf(alpha_m1, b,rho) - Distribution::NormalBivariate.pdf(a, beta_m1,rho) + Distribution::NormalBivariate.pdf(alpha_m1, beta_m1,rho) )
|
228
|
+
|
229
|
+
}
|
230
|
+
}
|
231
|
+
#puts "derivative: #{loglike}"
|
232
|
+
-loglike
|
233
|
+
end
|
187
234
|
def loglike(alpha,beta,rho)
|
188
235
|
if rho.abs>0.9999
|
189
236
|
rho= (rho>0) ? 0.9999 : -0.9999
|
@@ -249,6 +296,8 @@ module Statsample
|
|
249
296
|
ac=@sumcac[i]
|
250
297
|
end
|
251
298
|
end
|
299
|
+
|
300
|
+
|
252
301
|
# Computation of polychoric correlation usign two-step ML estimation.
|
253
302
|
#
|
254
303
|
# Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
|
@@ -260,6 +309,34 @@ module Statsample
|
|
260
309
|
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
261
310
|
#
|
262
311
|
def compute_two_step_mle_drasgow
|
312
|
+
if HAS_GSL
|
313
|
+
compute_two_step_mle_drasgow_gsl
|
314
|
+
else
|
315
|
+
compute_two_step_mle_drasgow_ruby
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# Depends on minimization algorithm.
|
320
|
+
|
321
|
+
def compute_two_step_mle_drasgow_ruby #:nodoc:
|
322
|
+
|
323
|
+
f=proc {|rho|
|
324
|
+
loglike(@alpha,@beta, rho)
|
325
|
+
}
|
326
|
+
@log="Minimizing using GSL Brent method\n"
|
327
|
+
min=Minimization::Brent.new(-0.9999,0.9999,f)
|
328
|
+
min.epsilon=@epsilon
|
329
|
+
min.expected=0
|
330
|
+
min.iterate
|
331
|
+
@log+=min.log
|
332
|
+
@r=min.x_minimum
|
333
|
+
@loglike_model=-min.f_minimum
|
334
|
+
puts @log if @debug
|
335
|
+
|
336
|
+
end
|
337
|
+
|
338
|
+
|
339
|
+
def compute_two_step_mle_drasgow_gsl #:nodoc:
|
263
340
|
|
264
341
|
fn1=GSL::Function.alloc {|rho|
|
265
342
|
loglike(@alpha,@beta, rho)
|
@@ -86,7 +86,7 @@ module Statsample
|
|
86
86
|
end
|
87
87
|
}
|
88
88
|
=end
|
89
|
-
raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.
|
89
|
+
raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.size>0
|
90
90
|
fields=row.to_a.collect{|c| c.downcase}
|
91
91
|
fields.recode_repeated
|
92
92
|
end
|
@@ -112,6 +112,7 @@ module Statsample
|
|
112
112
|
@method_association=:r2
|
113
113
|
|
114
114
|
end
|
115
|
+
@name=nil
|
115
116
|
opts.each{|k,v|
|
116
117
|
self.send("#{k}=",v) if self.respond_to? k
|
117
118
|
}
|
@@ -133,7 +134,7 @@ module Statsample
|
|
133
134
|
raise ArgumentError.new("You should use a Matrix or a Dataset")
|
134
135
|
end
|
135
136
|
@models=nil
|
136
|
-
|
137
|
+
@models_data=nil
|
137
138
|
end
|
138
139
|
# Compute models.
|
139
140
|
def compute
|
@@ -227,7 +228,7 @@ module Statsample
|
|
227
228
|
dominances.push(0)
|
228
229
|
else
|
229
230
|
return 0.5
|
230
|
-
dominances.push(0.5)
|
231
|
+
#dominances.push(0.5)
|
231
232
|
end
|
232
233
|
end
|
233
234
|
final=dominances.uniq
|
@@ -35,22 +35,36 @@ class MatrixEngine < BaseEngine
|
|
35
35
|
def initialize(matrix,y_var, opts=Hash.new)
|
36
36
|
matrix.extend Statsample::CovariateMatrix
|
37
37
|
raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
|
38
|
-
|
39
|
-
|
38
|
+
if matrix.type==:covariance
|
39
|
+
@matrix_cov=matrix
|
40
|
+
@matrix_cor=matrix.correlation
|
41
|
+
@no_covariance=false
|
42
|
+
else
|
43
|
+
@matrix_cor=matrix
|
44
|
+
@matrix_cov=matrix
|
45
|
+
@no_covariance=true
|
46
|
+
end
|
40
47
|
|
41
48
|
@y_var=y_var
|
42
49
|
@fields=matrix.fields-[y_var]
|
50
|
+
|
43
51
|
@n_predictors=@fields.size
|
44
|
-
|
45
|
-
@matrix_x=
|
46
|
-
@
|
47
|
-
|
48
|
-
@
|
49
|
-
@
|
50
|
-
|
51
|
-
|
52
|
+
|
53
|
+
@matrix_x= @matrix_cor.submatrix(@fields)
|
54
|
+
@matrix_x_cov= @matrix_cov.submatrix(@fields)
|
55
|
+
|
56
|
+
@matrix_y = @matrix_cor.submatrix(@fields, [y_var])
|
57
|
+
@matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
@y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
|
62
|
+
|
63
|
+
@x_sd=@n_predictors.times.inject({}) {|ac,i|
|
64
|
+
ac[@matrix_x_cov.fields[i]]=Math::sqrt(@matrix_x_cov[i,i])
|
52
65
|
ac;
|
53
66
|
}
|
67
|
+
|
54
68
|
@cases=nil
|
55
69
|
@x_mean=@fields.inject({}) {|ac,f|
|
56
70
|
ac[f]=0.0
|
@@ -64,13 +78,15 @@ class MatrixEngine < BaseEngine
|
|
64
78
|
opts.each{|k,v|
|
65
79
|
self.send("#{k}=",v) if self.respond_to? k
|
66
80
|
}
|
81
|
+
result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
|
82
|
+
|
67
83
|
if matrix.type==:covariance
|
68
|
-
@coeffs
|
84
|
+
@coeffs=result_matrix.column(0).to_a
|
69
85
|
@coeffs_stan=coeffs.collect {|k,v|
|
70
86
|
coeffs[k]*@x_sd[k].quo(@y_sd)
|
71
87
|
}
|
72
88
|
else
|
73
|
-
@coeffs_stan
|
89
|
+
@coeffs_stan=result_matrix.column(0).to_a
|
74
90
|
|
75
91
|
@coeffs=standarized_coeffs.collect {|k,v|
|
76
92
|
standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
|
@@ -87,8 +103,7 @@ class MatrixEngine < BaseEngine
|
|
87
103
|
# * 1-(|R| / |R_x|) or
|
88
104
|
# * Sum(b_i*r_yi)
|
89
105
|
def r2
|
90
|
-
@n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @
|
91
|
-
#1-(@matrix.correlation.determinant.quo(@matrix_x.correlation.determinant))
|
106
|
+
@n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
|
92
107
|
end
|
93
108
|
def r
|
94
109
|
Math::sqrt(r2)
|
@@ -141,9 +156,36 @@ class MatrixEngine < BaseEngine
|
|
141
156
|
}
|
142
157
|
out
|
143
158
|
end
|
144
|
-
|
159
|
+
def constant_t
|
160
|
+
return nil if constant_se.nil?
|
161
|
+
constant.to_f/constant_se
|
162
|
+
end
|
163
|
+
# Standard error for constant.
|
164
|
+
# Recreate the estimaded variance-covariance matrix
|
165
|
+
# using means, standard deviation and covariance matrix
|
145
166
|
def constant_se
|
146
|
-
|
167
|
+
return nil if @no_covariance
|
168
|
+
means=@x_mean
|
169
|
+
#means[@y_var]=@y_mean
|
170
|
+
means[:constant]=1
|
171
|
+
sd=@x_sd
|
172
|
+
#sd[@y_var]=@y_sd
|
173
|
+
sd[:constant]=0
|
174
|
+
fields=[:constant]+@matrix_cov.fields-[@y_var]
|
175
|
+
xt_x=Matrix.rows(fields.collect {|i|
|
176
|
+
fields.collect {|j|
|
177
|
+
if i==:constant or j==:constant
|
178
|
+
cov=0
|
179
|
+
elsif i==j
|
180
|
+
cov=sd[i]**2
|
181
|
+
else
|
182
|
+
cov=@matrix_cov.submatrix(i..i,j..j)[0,0]
|
183
|
+
end
|
184
|
+
cov*(@cases-1)+@cases*means[i]*means[j]
|
185
|
+
}
|
186
|
+
})
|
187
|
+
matrix=xt_x.inverse * mse
|
188
|
+
matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
|
147
189
|
end
|
148
190
|
|
149
191
|
def to_reportbuilder(generator)
|
data/test/test_bivariate.rb
CHANGED
@@ -25,47 +25,74 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end
|
28
|
+
def test_poly_vs_tetra
|
29
|
+
10.times {
|
30
|
+
# Should be the same results as Tetrachoric for 2x2 matrix
|
31
|
+
matrix=Matrix[[150+rand(10),1000+rand(20)],[1000+rand(20),200+rand(20)]]
|
32
|
+
tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
|
33
|
+
poly = Statsample::Bivariate::Polychoric.new(matrix)
|
34
|
+
poly.compute_two_step_mle_drasgow_ruby
|
35
|
+
assert_in_delta(tetra.r,poly.r,0.0001)
|
36
|
+
if HAS_GSL
|
37
|
+
poly.compute_two_step_mle_drasgow_gsl
|
38
|
+
assert_in_delta(tetra.r,poly.r,0.0001)
|
39
|
+
end
|
40
|
+
}
|
41
|
+
end
|
28
42
|
def test_polychoric
|
29
|
-
# Should be the same results as Tetrachoric for 2x2 matrix
|
30
|
-
|
31
|
-
matrix=Matrix[[rand(100)+10,rand(100)+10],[rand(100)+10,rand(100)+10]]
|
32
|
-
tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
|
33
|
-
poly = Statsample::Bivariate::Polychoric.new(matrix)
|
34
|
-
assert_in_delta(tetra.r,poly.r,0.0001)
|
35
|
-
|
36
|
-
# Example for Tallis(1962, cited by Drasgow, 2006)
|
37
|
-
|
38
|
-
matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
39
|
-
poly=Statsample::Bivariate::Polychoric.new(matrix)
|
40
|
-
poly.method=:two_step
|
41
|
-
poly.compute
|
42
|
-
assert_in_delta(0.420, poly.r, 0.001)
|
43
|
-
assert_in_delta(-0.240, poly.threshold_y[0],0.001)
|
44
|
-
assert_in_delta(-0.027, poly.threshold_x[0],0.001)
|
45
|
-
assert_in_delta(1.578, poly.threshold_y[1],0.001)
|
46
|
-
assert_in_delta(1.137, poly.threshold_x[1],0.001)
|
47
|
-
|
48
|
-
|
49
|
-
poly.method=:polychoric_series
|
50
|
-
poly.compute
|
51
43
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
assert_in_delta(0.4192, poly.r, 0.0001)
|
64
|
-
assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
|
65
|
-
assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
|
66
|
-
assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
|
67
|
-
assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
|
44
|
+
matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
45
|
+
poly=Statsample::Bivariate::Polychoric.new(matrix)
|
46
|
+
poly.compute_two_step_mle_drasgow_ruby
|
47
|
+
assert_in_delta(0.420, poly.r, 0.001)
|
48
|
+
assert_in_delta(-0.240, poly.threshold_y[0],0.001)
|
49
|
+
assert_in_delta(-0.027, poly.threshold_x[0],0.001)
|
50
|
+
assert_in_delta(1.578, poly.threshold_y[1],0.001)
|
51
|
+
assert_in_delta(1.137, poly.threshold_x[1],0.001)
|
52
|
+
|
53
|
+
|
68
54
|
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
if HAS_GSL
|
63
|
+
poly.method=:polychoric_series
|
64
|
+
poly.compute
|
65
|
+
|
66
|
+
assert_in_delta(0.556, poly.r, 0.001)
|
67
|
+
assert_in_delta(-0.240, poly.threshold_y[0],0.001)
|
68
|
+
assert_in_delta(-0.027, poly.threshold_x[0],0.001)
|
69
|
+
assert_in_delta(1.578, poly.threshold_y[1],0.001)
|
70
|
+
assert_in_delta(1.137, poly.threshold_x[1],0.001)
|
71
|
+
|
72
|
+
# Example for Tallis(1962, cited by Drasgow, 2006)
|
73
|
+
|
74
|
+
matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
75
|
+
poly=Statsample::Bivariate::Polychoric.new(matrix)
|
76
|
+
poly.compute_two_step_mle_drasgow_gsl
|
77
|
+
assert_in_delta(0.420, poly.r, 0.001)
|
78
|
+
assert_in_delta(-0.240, poly.threshold_y[0],0.001)
|
79
|
+
assert_in_delta(-0.027, poly.threshold_x[0],0.001)
|
80
|
+
assert_in_delta(1.578, poly.threshold_y[1],0.001)
|
81
|
+
assert_in_delta(1.137, poly.threshold_x[1],0.001)
|
82
|
+
|
83
|
+
|
84
|
+
poly.method=:joint
|
85
|
+
poly.compute
|
86
|
+
|
87
|
+
|
88
|
+
assert_in_delta(0.4192, poly.r, 0.0001)
|
89
|
+
assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
|
90
|
+
assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
|
91
|
+
assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
|
92
|
+
assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
|
93
|
+
else
|
94
|
+
puts "Two-step optimized, polychoric series and Joint method for Polychoric requires GSL"
|
95
|
+
end
|
69
96
|
end
|
70
97
|
def test_tetrachoric
|
71
98
|
a,b,c,d=0,0,0,0
|
data/test/test_csv.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
|
-
require
|
3
|
+
require "tempfile"
|
4
4
|
require 'test/unit'
|
5
5
|
|
6
6
|
class StatsampleCSVTestCase < Test::Unit::TestCase
|
@@ -33,9 +33,10 @@ class StatsampleCSVTestCase < Test::Unit::TestCase
|
|
33
33
|
assert_equal(age,ds['age_2'])
|
34
34
|
end
|
35
35
|
def test_write
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
filename=Tempfile.new("afile")
|
37
|
+
# filename=Dir::tmpdir+"/test_write.csv"
|
38
|
+
Statsample::CSV.write(@ds, filename.path)
|
39
|
+
ds2=Statsample::CSV.read(filename.path)
|
39
40
|
i=0
|
40
41
|
ds2.each_array{|row|
|
41
42
|
assert_equal(@ds.case_as_array(i),row)
|
data/test/test_dataset.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
require '
|
4
|
+
require 'tempfile'
|
5
5
|
class StatsampleDatasetTestCase < Test::Unit::TestCase
|
6
6
|
def setup
|
7
7
|
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
|
@@ -13,9 +13,9 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
13
13
|
assert_equal(%w{id name age city a1}, @ds.fields)
|
14
14
|
end
|
15
15
|
def test_saveload
|
16
|
-
|
17
|
-
@ds.save(outfile)
|
18
|
-
a=Statsample.load(outfile)
|
16
|
+
outfile=Tempfile.new("/dataset.ds")
|
17
|
+
@ds.save(outfile.path)
|
18
|
+
a=Statsample.load(outfile.path)
|
19
19
|
assert_equal(@ds,a)
|
20
20
|
end
|
21
21
|
|
data/test/test_factor.rb
CHANGED
@@ -3,35 +3,43 @@ require 'statsample'
|
|
3
3
|
require 'test/unit'
|
4
4
|
class StatsampleFactorTestCase < Test::Unit::TestCase
|
5
5
|
def test_pca
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
6
|
+
if HAS_GSL
|
7
|
+
require 'gsl'
|
8
|
+
a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
|
9
|
+
b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
|
10
|
+
a.recode! {|c| c-a.mean}
|
11
|
+
b.recode! {|c| c-b.mean}
|
12
|
+
ds={'a'=>a,'b'=>b}.to_dataset
|
13
|
+
cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
|
14
|
+
pca=Statsample::Factor::PCA.new(cov_matrix)
|
15
|
+
expected_eigenvalues=[1.284, 0.0490]
|
16
|
+
expected_eigenvalues.each_with_index{|ev,i|
|
17
|
+
assert_in_delta(ev,pca.eigenvalues[i],0.001)
|
18
|
+
}
|
19
|
+
expected_fm_1=GSL::Matrix[[0.677], [0.735]]
|
20
|
+
expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
|
21
|
+
_test_matrix(expected_fm_1,pca.feature_vector(1))
|
22
|
+
_test_matrix(expected_fm_2,pca.feature_vector(2))
|
23
|
+
else
|
24
|
+
puts "PCA not tested. Requires GSL"
|
25
|
+
end
|
22
26
|
end
|
23
27
|
def test_rotation_varimax
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
28
|
+
if HAS_GSL
|
29
|
+
a = Matrix[ [ 0.4320, 0.8129, 0.3872] ,
|
30
|
+
[0.7950, -0.5416, 0.2565] ,
|
31
|
+
[0.5944, 0.7234, -0.3441],
|
32
|
+
[0.8945, -0.3921, -0.1863] ]
|
33
|
+
expected= Matrix[[-0.0204423, 0.938674, -0.340334],
|
34
|
+
[0.983662, 0.0730206, 0.134997],
|
35
|
+
[0.0826106, 0.435975, -0.893379],
|
36
|
+
[0.939901, -0.0965213, -0.309596]].to_gsl
|
37
|
+
varimax=Statsample::Factor::Varimax.new(a)
|
38
|
+
varimax.iterate
|
39
|
+
_test_matrix(expected,varimax.rotated)
|
40
|
+
else
|
41
|
+
puts "Rotation not tested. Requires GSL"
|
42
|
+
end
|
35
43
|
end
|
36
44
|
def _test_matrix(a,b)
|
37
45
|
a.size1.times {|i|
|
data/test/test_matrix.rb
CHANGED
@@ -19,7 +19,6 @@ class StatsampleMatrixTestCase < Test::Unit::TestCase
|
|
19
19
|
assert_equal(6,matrix.row_sum[0])
|
20
20
|
assert_equal(12,matrix.column_sum[0])
|
21
21
|
assert_equal(45,matrix.total_sum)
|
22
|
-
m=matrix.to_gsl
|
23
22
|
end
|
24
23
|
def test_covariate
|
25
24
|
a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
|
data/test/test_regression.rb
CHANGED
@@ -158,12 +158,14 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
|
|
158
158
|
|
159
159
|
cor=Statsample::Bivariate.correlation_matrix(ds)
|
160
160
|
lr=Statsample::Regression::Multiple::MatrixEngine.new(cor,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size, :y_sd=>@y.sd , :x_sd=>{'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd})
|
161
|
-
|
161
|
+
assert_nil(lr.constant_se)
|
162
|
+
assert_nil(lr.constant_t)
|
162
163
|
model_test_matrix(lr, "correlation matrix")
|
163
164
|
|
164
165
|
covariance=Statsample::Bivariate.covariance_matrix(ds)
|
165
166
|
lr=Statsample::Regression::Multiple::MatrixEngine.new(covariance,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size)
|
166
|
-
|
167
|
+
|
168
|
+
model_test(lr , "covariance matrix")
|
167
169
|
end
|
168
170
|
def test_regression_rubyengine
|
169
171
|
@a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
data/test/test_svg_graph.rb
CHANGED
@@ -2,6 +2,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
2
2
|
require 'statsample'
|
3
3
|
require 'tmpdir'
|
4
4
|
require 'tempfile'
|
5
|
+
require 'tempfile'
|
5
6
|
require 'fileutils'
|
6
7
|
require 'test/unit'
|
7
8
|
begin
|
@@ -19,12 +20,10 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
|
|
19
20
|
rand(10)
|
20
21
|
}.to_vector(:scale)
|
21
22
|
h=ar.histogram([0,2,5,11])
|
22
|
-
file
|
23
|
+
file=Tempfile.new("svg_histogram_only.svg")
|
23
24
|
graph = Statsample::Graph::SvgHistogram.new({})
|
24
25
|
graph.histogram=h
|
25
|
-
|
26
|
-
f.puts(graph.burn)
|
27
|
-
}
|
26
|
+
file.puts(graph.burn)
|
28
27
|
else
|
29
28
|
puts "Statsample::Graph::SvgHistogram.new not tested (no ruby-gsl)"
|
30
29
|
end
|
@@ -36,20 +35,20 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
|
|
36
35
|
ar.push(rand(10))
|
37
36
|
}
|
38
37
|
vector=ar.to_vector
|
39
|
-
file
|
38
|
+
file=Tempfile.new("svggraph_default.svg").path
|
40
39
|
vector.svggraph_frequencies(file)
|
41
|
-
file
|
40
|
+
file=Tempfile.new("svggraph_bar.svg").path
|
42
41
|
vector.svggraph_frequencies(file,800,600,SVG::Graph::Bar,:graph_title=>'Bar')
|
43
42
|
assert(File.exists?(file))
|
44
|
-
file
|
43
|
+
file=Tempfile.new("svggraph_bar_horizontal.svg").path
|
45
44
|
vector.svggraph_frequencies(file,800,600,SVG::Graph::BarHorizontalNoOp,:graph_title=>'Horizontal Bar')
|
46
45
|
assert(File.exists?(file))
|
47
|
-
file
|
46
|
+
file=Tempfile.new("svggraph_pie.svg").path
|
48
47
|
vector.svggraph_frequencies(file,800,600,SVG::Graph::PieNoOp,:graph_title=>'Pie')
|
49
48
|
assert(File.exists?(file))
|
50
49
|
vector.type=:scale
|
51
50
|
if HAS_GSL
|
52
|
-
|
51
|
+
file=Tempfile.new("svg_histogram.svg").path
|
53
52
|
hist=vector.svggraph_histogram(5)
|
54
53
|
File.open(file,"wb") {|fp|
|
55
54
|
fp.write(hist.burn)
|
data/test/test_vector.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
require '
|
4
|
+
require 'tempfile'
|
5
5
|
class TestStatsample
|
6
6
|
end
|
7
7
|
class TestStatsample::TestVector < Test::Unit::TestCase
|
@@ -12,9 +12,9 @@ class TestStatsample::TestVector < Test::Unit::TestCase
|
|
12
12
|
|
13
13
|
end
|
14
14
|
def test_save_load
|
15
|
-
outfile=
|
16
|
-
@c.save(outfile)
|
17
|
-
a=Statsample.load(outfile)
|
15
|
+
outfile=Tempfile.new("vector.vec")
|
16
|
+
@c.save(outfile.path)
|
17
|
+
a=Statsample.load(outfile.path)
|
18
18
|
assert_equal(@c,a)
|
19
19
|
|
20
20
|
end
|
data/test/test_xls.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
require '
|
4
|
+
require 'tempfile'
|
5
5
|
begin
|
6
6
|
require 'spreadsheet'
|
7
7
|
rescue LoadError
|
@@ -30,9 +30,9 @@ class StatsampleExcelTestCase < Test::Unit::TestCase
|
|
30
30
|
assert_equal(nil,@ds['age'][5])
|
31
31
|
end
|
32
32
|
def test_write
|
33
|
-
|
34
|
-
Statsample::Excel.write(@ds,
|
35
|
-
ds2=Statsample::Excel.read(
|
33
|
+
tempfile=Tempfile.new("test_write.xls")
|
34
|
+
Statsample::Excel.write(@ds,tempfile.path)
|
35
|
+
ds2=Statsample::Excel.read(tempfile.path)
|
36
36
|
i=0
|
37
37
|
ds2.each_array{|row|
|
38
38
|
assert_equal(@ds.case_as_array(i),row)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-24 00:00:00 -03:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -43,24 +43,14 @@ dependencies:
|
|
43
43
|
version: 0.2.0
|
44
44
|
version:
|
45
45
|
- !ruby/object:Gem::Dependency
|
46
|
-
name:
|
47
|
-
type: :
|
48
|
-
version_requirement:
|
49
|
-
version_requirements: !ruby/object:Gem::Requirement
|
50
|
-
requirements:
|
51
|
-
- - ">="
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: 2.0.3
|
54
|
-
version:
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: gemcutter
|
57
|
-
type: :development
|
46
|
+
name: minimization
|
47
|
+
type: :runtime
|
58
48
|
version_requirement:
|
59
49
|
version_requirements: !ruby/object:Gem::Requirement
|
60
50
|
requirements:
|
61
51
|
- - ">="
|
62
52
|
- !ruby/object:Gem::Version
|
63
|
-
version: 0.
|
53
|
+
version: 0.1.0
|
64
54
|
version:
|
65
55
|
- !ruby/object:Gem::Dependency
|
66
56
|
name: hoe
|
@@ -70,9 +60,19 @@ dependencies:
|
|
70
60
|
requirements:
|
71
61
|
- - ">="
|
72
62
|
- !ruby/object:Gem::Version
|
73
|
-
version: 2.
|
63
|
+
version: 2.4.0
|
74
64
|
version:
|
75
|
-
description:
|
65
|
+
description: |-
|
66
|
+
A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
|
67
|
+
|
68
|
+
Includes:
|
69
|
+
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
70
|
+
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
71
|
+
* Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
|
72
|
+
* Regression: Simple, Multiple, Probit and Logit
|
73
|
+
* Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
|
74
|
+
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
75
|
+
* Sample calculation related formulas
|
76
76
|
email:
|
77
77
|
- clbustos@gmail.com
|
78
78
|
executables:
|
@@ -221,7 +221,7 @@ rubyforge_project: ruby-statsample
|
|
221
221
|
rubygems_version: 1.3.5
|
222
222
|
signing_key:
|
223
223
|
specification_version: 3
|
224
|
-
summary:
|
224
|
+
summary: A suite for basic and advanced statistics on Ruby
|
225
225
|
test_files:
|
226
226
|
- test/test_bivariate.rb
|
227
227
|
- test/test_dominance_analysis.rb
|