statsample 0.6.4 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +9 -0
- data/README.txt +8 -4
- data/demo/polychoric.rb +1 -2
- data/lib/distribution/normalbivariate.rb +5 -5
- data/lib/statsample.rb +1 -1
- data/lib/statsample/bivariate/polychoric.rb +78 -1
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dominanceanalysis.rb +3 -2
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +58 -16
- data/test/test_bivariate.rb +65 -38
- data/test/test_csv.rb +5 -4
- data/test/test_dataset.rb +4 -4
- data/test/test_factor.rb +35 -27
- data/test/test_matrix.rb +0 -1
- data/test/test_regression.rb +4 -2
- data/test/test_svg_graph.rb +8 -9
- data/test/test_vector.rb +4 -4
- data/test/test_xls.rb +4 -4
- metadata +18 -18
data/History.txt
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
=== 0.6.5 / 2010-02-24
|
2
|
+
* Bug fix on test: Use tempfile instead of tempdir
|
3
|
+
* Multiple Regression: Calculation of constant standard error , using covariance matrix.
|
4
|
+
* Calculation of R^2_yx and P^2_yx for Regresion on Multiple Dependents variables
|
5
|
+
* Dominance Analysis could use Correlation or Covariance Matrix as input.
|
6
|
+
* Dominance Analysis extension to multiple dependent variables (Azen & Budescu, 2006)
|
7
|
+
* Two-step estimate of Polychoric correlation uses minimization gem, so could be executed without rb-gsl
|
8
|
+
|
9
|
+
|
1
10
|
=== 0.6.4 / 2010-02-19
|
2
11
|
* Dominance Analysis and Dominance Analysis Bootstrap allows multivariate dependent analysis.
|
3
12
|
* Test suite for Dominance Analysis, using Azen and Budescu papers as references
|
data/README.txt
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
http://ruby-statsample.rubyforge.org/
|
4
4
|
|
5
5
|
|
6
|
-
==
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
|
7
9
|
|
8
|
-
|
10
|
+
Includes:
|
9
11
|
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
10
12
|
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
11
13
|
* Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
|
@@ -14,7 +16,7 @@ A suite for basic and advanced statistics. Includes:
|
|
14
16
|
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
15
17
|
* Sample calculation related formulas
|
16
18
|
|
17
|
-
==
|
19
|
+
== FEATURES:
|
18
20
|
|
19
21
|
* Factorial Analysis. Principal Component Analysis and Principal Axis extraction, with orthogonal rotations (Varimax, Equimax, Quartimax)
|
20
22
|
* Multiple Regression. Listwise analysis optimized with use of Alglib library. Pairwise analysis is executed on pure ruby with matrixes and reports same values as SPSS
|
@@ -66,7 +68,9 @@ A suite for basic and advanced statistics. Includes:
|
|
66
68
|
Optional:
|
67
69
|
|
68
70
|
* Plotting: gnuplot and rbgnuplot, SVG::Graph
|
69
|
-
* Factorial analysis and polychorical correlation: gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt
|
71
|
+
* Factorial analysis and polychorical correlation(joint estimate and polychoric series): gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt>.
|
72
|
+
|
73
|
+
<b>Note</b>: Use gsl 1.12.109 or later.
|
70
74
|
|
71
75
|
== DOWNLOAD
|
72
76
|
* Gems and bugs report: http://rubyforge.org/projects/ruby-statsample/
|
data/demo/polychoric.rb
CHANGED
@@ -5,10 +5,9 @@ require 'statsample'
|
|
5
5
|
ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
6
6
|
|
7
7
|
# Estimation of polychoric correlation using two-step (default)
|
8
|
-
poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step")
|
8
|
+
poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step", :debug=>true)
|
9
9
|
puts poly.summary
|
10
10
|
|
11
|
-
|
12
11
|
# Estimation of polychoric correlation using joint method (slow)
|
13
12
|
poly=Statsample::Bivariate::Polychoric.new(ct, :method=>:joint, :name=>"Polychoric with joint")
|
14
13
|
puts poly.summary
|
@@ -19,10 +19,9 @@ module Distribution
|
|
19
19
|
# Probability density function for a given x, y and rho value.
|
20
20
|
#
|
21
21
|
# Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
22
|
-
def pdf(x,y, rho,
|
23
|
-
|
24
|
-
|
25
|
-
((x**2/sigma1) + (y**2/sigma2) - (2*rho*x*y).quo(sigma1*sigma2) ))
|
22
|
+
def pdf(x,y, rho, s1=1.0, s2=1.0)
|
23
|
+
1.quo(2 * Math::PI * s1 * s2 * Math::sqrt( 1 - rho**2 )) * (Math::exp(-(1.quo(2*(1-rho**2))) *
|
24
|
+
((x**2.quo(s1)) + (y**2.quo(s2)) - (2*rho*x*y).quo(s1*s2))))
|
26
25
|
end
|
27
26
|
|
28
27
|
def f(x,y,aprime,bprime,rho)
|
@@ -147,7 +146,8 @@ module Distribution
|
|
147
146
|
r=rho
|
148
147
|
twopi = 6.283185307179586
|
149
148
|
|
150
|
-
w=11.times.collect {[nil]*4};
|
149
|
+
w=11.times.collect {[nil]*4};
|
150
|
+
x=11.times.collect {[nil]*4}
|
151
151
|
|
152
152
|
data=[
|
153
153
|
0.1713244923791705E+00, -0.9324695142031522E+00,
|
data/lib/statsample.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'minimization'
|
1
2
|
module Statsample
|
2
3
|
module Bivariate
|
3
4
|
# Calculate Polychoric correlation for two vectors.
|
@@ -79,7 +80,7 @@ module Statsample
|
|
79
80
|
|
80
81
|
METHOD=:two_step
|
81
82
|
MAX_ITERATIONS=300
|
82
|
-
EPSILON=
|
83
|
+
EPSILON=1e-6
|
83
84
|
MINIMIZER_TYPE_TWO_STEP="brent"
|
84
85
|
MINIMIZER_TYPE_JOINT="nmsimplex"
|
85
86
|
def new_with_vectors(v1,v2)
|
@@ -184,6 +185,52 @@ module Statsample
|
|
184
185
|
def chi_square_df
|
185
186
|
(@nr*@nc)-@nc-@nr
|
186
187
|
end
|
188
|
+
|
189
|
+
def loglike_fd_rho(alpha,beta,rho)
|
190
|
+
if rho.abs>0.9999
|
191
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
192
|
+
end
|
193
|
+
#puts "rho: #{rho}"
|
194
|
+
|
195
|
+
loglike=0
|
196
|
+
pd=@nr.times.collect{ [0]*@nc}
|
197
|
+
pc=@nr.times.collect{ [0]*@nc}
|
198
|
+
@nr.times { |i|
|
199
|
+
@nc.times { |j|
|
200
|
+
if i==@nr-1 and j==@nc-1
|
201
|
+
pd[i][j]=1.0
|
202
|
+
a=100
|
203
|
+
b=100
|
204
|
+
else
|
205
|
+
a=(i==@nr-1) ? 100: alpha[i]
|
206
|
+
b=(j==@nc-1) ? 100: beta[j]
|
207
|
+
pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
208
|
+
end
|
209
|
+
pc[i][j] = pd[i][j]
|
210
|
+
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
211
|
+
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
212
|
+
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
213
|
+
|
214
|
+
pij= pd[i][j]+EPSILON
|
215
|
+
if i==0
|
216
|
+
alpha_m1=-10
|
217
|
+
else
|
218
|
+
alpha_m1=alpha[i-1]
|
219
|
+
end
|
220
|
+
|
221
|
+
if j==0
|
222
|
+
beta_m1=-10
|
223
|
+
else
|
224
|
+
beta_m1=beta[j-1]
|
225
|
+
end
|
226
|
+
|
227
|
+
loglike+= (@matrix[i,j].quo(pij))*(Distribution::NormalBivariate.pdf(a,b,rho) - Distribution::NormalBivariate.pdf(alpha_m1, b,rho) - Distribution::NormalBivariate.pdf(a, beta_m1,rho) + Distribution::NormalBivariate.pdf(alpha_m1, beta_m1,rho) )
|
228
|
+
|
229
|
+
}
|
230
|
+
}
|
231
|
+
#puts "derivative: #{loglike}"
|
232
|
+
-loglike
|
233
|
+
end
|
187
234
|
def loglike(alpha,beta,rho)
|
188
235
|
if rho.abs>0.9999
|
189
236
|
rho= (rho>0) ? 0.9999 : -0.9999
|
@@ -249,6 +296,8 @@ module Statsample
|
|
249
296
|
ac=@sumcac[i]
|
250
297
|
end
|
251
298
|
end
|
299
|
+
|
300
|
+
|
252
301
|
# Computation of polychoric correlation usign two-step ML estimation.
|
253
302
|
#
|
254
303
|
# Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
|
@@ -260,6 +309,34 @@ module Statsample
|
|
260
309
|
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
261
310
|
#
|
262
311
|
def compute_two_step_mle_drasgow
|
312
|
+
if HAS_GSL
|
313
|
+
compute_two_step_mle_drasgow_gsl
|
314
|
+
else
|
315
|
+
compute_two_step_mle_drasgow_ruby
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# Depends on minimization algorithm.
|
320
|
+
|
321
|
+
def compute_two_step_mle_drasgow_ruby #:nodoc:
|
322
|
+
|
323
|
+
f=proc {|rho|
|
324
|
+
loglike(@alpha,@beta, rho)
|
325
|
+
}
|
326
|
+
@log="Minimizing using GSL Brent method\n"
|
327
|
+
min=Minimization::Brent.new(-0.9999,0.9999,f)
|
328
|
+
min.epsilon=@epsilon
|
329
|
+
min.expected=0
|
330
|
+
min.iterate
|
331
|
+
@log+=min.log
|
332
|
+
@r=min.x_minimum
|
333
|
+
@loglike_model=-min.f_minimum
|
334
|
+
puts @log if @debug
|
335
|
+
|
336
|
+
end
|
337
|
+
|
338
|
+
|
339
|
+
def compute_two_step_mle_drasgow_gsl #:nodoc:
|
263
340
|
|
264
341
|
fn1=GSL::Function.alloc {|rho|
|
265
342
|
loglike(@alpha,@beta, rho)
|
@@ -86,7 +86,7 @@ module Statsample
|
|
86
86
|
end
|
87
87
|
}
|
88
88
|
=end
|
89
|
-
raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.
|
89
|
+
raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.size>0
|
90
90
|
fields=row.to_a.collect{|c| c.downcase}
|
91
91
|
fields.recode_repeated
|
92
92
|
end
|
@@ -112,6 +112,7 @@ module Statsample
|
|
112
112
|
@method_association=:r2
|
113
113
|
|
114
114
|
end
|
115
|
+
@name=nil
|
115
116
|
opts.each{|k,v|
|
116
117
|
self.send("#{k}=",v) if self.respond_to? k
|
117
118
|
}
|
@@ -133,7 +134,7 @@ module Statsample
|
|
133
134
|
raise ArgumentError.new("You should use a Matrix or a Dataset")
|
134
135
|
end
|
135
136
|
@models=nil
|
136
|
-
|
137
|
+
@models_data=nil
|
137
138
|
end
|
138
139
|
# Compute models.
|
139
140
|
def compute
|
@@ -227,7 +228,7 @@ module Statsample
|
|
227
228
|
dominances.push(0)
|
228
229
|
else
|
229
230
|
return 0.5
|
230
|
-
dominances.push(0.5)
|
231
|
+
#dominances.push(0.5)
|
231
232
|
end
|
232
233
|
end
|
233
234
|
final=dominances.uniq
|
@@ -35,22 +35,36 @@ class MatrixEngine < BaseEngine
|
|
35
35
|
def initialize(matrix,y_var, opts=Hash.new)
|
36
36
|
matrix.extend Statsample::CovariateMatrix
|
37
37
|
raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
|
38
|
-
|
39
|
-
|
38
|
+
if matrix.type==:covariance
|
39
|
+
@matrix_cov=matrix
|
40
|
+
@matrix_cor=matrix.correlation
|
41
|
+
@no_covariance=false
|
42
|
+
else
|
43
|
+
@matrix_cor=matrix
|
44
|
+
@matrix_cov=matrix
|
45
|
+
@no_covariance=true
|
46
|
+
end
|
40
47
|
|
41
48
|
@y_var=y_var
|
42
49
|
@fields=matrix.fields-[y_var]
|
50
|
+
|
43
51
|
@n_predictors=@fields.size
|
44
|
-
|
45
|
-
@matrix_x=
|
46
|
-
@
|
47
|
-
|
48
|
-
@
|
49
|
-
@
|
50
|
-
|
51
|
-
|
52
|
+
|
53
|
+
@matrix_x= @matrix_cor.submatrix(@fields)
|
54
|
+
@matrix_x_cov= @matrix_cov.submatrix(@fields)
|
55
|
+
|
56
|
+
@matrix_y = @matrix_cor.submatrix(@fields, [y_var])
|
57
|
+
@matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
@y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
|
62
|
+
|
63
|
+
@x_sd=@n_predictors.times.inject({}) {|ac,i|
|
64
|
+
ac[@matrix_x_cov.fields[i]]=Math::sqrt(@matrix_x_cov[i,i])
|
52
65
|
ac;
|
53
66
|
}
|
67
|
+
|
54
68
|
@cases=nil
|
55
69
|
@x_mean=@fields.inject({}) {|ac,f|
|
56
70
|
ac[f]=0.0
|
@@ -64,13 +78,15 @@ class MatrixEngine < BaseEngine
|
|
64
78
|
opts.each{|k,v|
|
65
79
|
self.send("#{k}=",v) if self.respond_to? k
|
66
80
|
}
|
81
|
+
result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
|
82
|
+
|
67
83
|
if matrix.type==:covariance
|
68
|
-
@coeffs
|
84
|
+
@coeffs=result_matrix.column(0).to_a
|
69
85
|
@coeffs_stan=coeffs.collect {|k,v|
|
70
86
|
coeffs[k]*@x_sd[k].quo(@y_sd)
|
71
87
|
}
|
72
88
|
else
|
73
|
-
@coeffs_stan
|
89
|
+
@coeffs_stan=result_matrix.column(0).to_a
|
74
90
|
|
75
91
|
@coeffs=standarized_coeffs.collect {|k,v|
|
76
92
|
standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
|
@@ -87,8 +103,7 @@ class MatrixEngine < BaseEngine
|
|
87
103
|
# * 1-(|R| / |R_x|) or
|
88
104
|
# * Sum(b_i*r_yi)
|
89
105
|
def r2
|
90
|
-
@n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @
|
91
|
-
#1-(@matrix.correlation.determinant.quo(@matrix_x.correlation.determinant))
|
106
|
+
@n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
|
92
107
|
end
|
93
108
|
def r
|
94
109
|
Math::sqrt(r2)
|
@@ -141,9 +156,36 @@ class MatrixEngine < BaseEngine
|
|
141
156
|
}
|
142
157
|
out
|
143
158
|
end
|
144
|
-
|
159
|
+
def constant_t
|
160
|
+
return nil if constant_se.nil?
|
161
|
+
constant.to_f/constant_se
|
162
|
+
end
|
163
|
+
# Standard error for constant.
|
164
|
+
# Recreate the estimaded variance-covariance matrix
|
165
|
+
# using means, standard deviation and covariance matrix
|
145
166
|
def constant_se
|
146
|
-
|
167
|
+
return nil if @no_covariance
|
168
|
+
means=@x_mean
|
169
|
+
#means[@y_var]=@y_mean
|
170
|
+
means[:constant]=1
|
171
|
+
sd=@x_sd
|
172
|
+
#sd[@y_var]=@y_sd
|
173
|
+
sd[:constant]=0
|
174
|
+
fields=[:constant]+@matrix_cov.fields-[@y_var]
|
175
|
+
xt_x=Matrix.rows(fields.collect {|i|
|
176
|
+
fields.collect {|j|
|
177
|
+
if i==:constant or j==:constant
|
178
|
+
cov=0
|
179
|
+
elsif i==j
|
180
|
+
cov=sd[i]**2
|
181
|
+
else
|
182
|
+
cov=@matrix_cov.submatrix(i..i,j..j)[0,0]
|
183
|
+
end
|
184
|
+
cov*(@cases-1)+@cases*means[i]*means[j]
|
185
|
+
}
|
186
|
+
})
|
187
|
+
matrix=xt_x.inverse * mse
|
188
|
+
matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
|
147
189
|
end
|
148
190
|
|
149
191
|
def to_reportbuilder(generator)
|
data/test/test_bivariate.rb
CHANGED
@@ -25,47 +25,74 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end
|
28
|
+
def test_poly_vs_tetra
|
29
|
+
10.times {
|
30
|
+
# Should be the same results as Tetrachoric for 2x2 matrix
|
31
|
+
matrix=Matrix[[150+rand(10),1000+rand(20)],[1000+rand(20),200+rand(20)]]
|
32
|
+
tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
|
33
|
+
poly = Statsample::Bivariate::Polychoric.new(matrix)
|
34
|
+
poly.compute_two_step_mle_drasgow_ruby
|
35
|
+
assert_in_delta(tetra.r,poly.r,0.0001)
|
36
|
+
if HAS_GSL
|
37
|
+
poly.compute_two_step_mle_drasgow_gsl
|
38
|
+
assert_in_delta(tetra.r,poly.r,0.0001)
|
39
|
+
end
|
40
|
+
}
|
41
|
+
end
|
28
42
|
def test_polychoric
|
29
|
-
# Should be the same results as Tetrachoric for 2x2 matrix
|
30
|
-
|
31
|
-
matrix=Matrix[[rand(100)+10,rand(100)+10],[rand(100)+10,rand(100)+10]]
|
32
|
-
tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
|
33
|
-
poly = Statsample::Bivariate::Polychoric.new(matrix)
|
34
|
-
assert_in_delta(tetra.r,poly.r,0.0001)
|
35
|
-
|
36
|
-
# Example for Tallis(1962, cited by Drasgow, 2006)
|
37
|
-
|
38
|
-
matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
39
|
-
poly=Statsample::Bivariate::Polychoric.new(matrix)
|
40
|
-
poly.method=:two_step
|
41
|
-
poly.compute
|
42
|
-
assert_in_delta(0.420, poly.r, 0.001)
|
43
|
-
assert_in_delta(-0.240, poly.threshold_y[0],0.001)
|
44
|
-
assert_in_delta(-0.027, poly.threshold_x[0],0.001)
|
45
|
-
assert_in_delta(1.578, poly.threshold_y[1],0.001)
|
46
|
-
assert_in_delta(1.137, poly.threshold_x[1],0.001)
|
47
|
-
|
48
|
-
|
49
|
-
poly.method=:polychoric_series
|
50
|
-
poly.compute
|
51
43
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
assert_in_delta(0.4192, poly.r, 0.0001)
|
64
|
-
assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
|
65
|
-
assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
|
66
|
-
assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
|
67
|
-
assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
|
44
|
+
matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
45
|
+
poly=Statsample::Bivariate::Polychoric.new(matrix)
|
46
|
+
poly.compute_two_step_mle_drasgow_ruby
|
47
|
+
assert_in_delta(0.420, poly.r, 0.001)
|
48
|
+
assert_in_delta(-0.240, poly.threshold_y[0],0.001)
|
49
|
+
assert_in_delta(-0.027, poly.threshold_x[0],0.001)
|
50
|
+
assert_in_delta(1.578, poly.threshold_y[1],0.001)
|
51
|
+
assert_in_delta(1.137, poly.threshold_x[1],0.001)
|
52
|
+
|
53
|
+
|
68
54
|
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
if HAS_GSL
|
63
|
+
poly.method=:polychoric_series
|
64
|
+
poly.compute
|
65
|
+
|
66
|
+
assert_in_delta(0.556, poly.r, 0.001)
|
67
|
+
assert_in_delta(-0.240, poly.threshold_y[0],0.001)
|
68
|
+
assert_in_delta(-0.027, poly.threshold_x[0],0.001)
|
69
|
+
assert_in_delta(1.578, poly.threshold_y[1],0.001)
|
70
|
+
assert_in_delta(1.137, poly.threshold_x[1],0.001)
|
71
|
+
|
72
|
+
# Example for Tallis(1962, cited by Drasgow, 2006)
|
73
|
+
|
74
|
+
matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
75
|
+
poly=Statsample::Bivariate::Polychoric.new(matrix)
|
76
|
+
poly.compute_two_step_mle_drasgow_gsl
|
77
|
+
assert_in_delta(0.420, poly.r, 0.001)
|
78
|
+
assert_in_delta(-0.240, poly.threshold_y[0],0.001)
|
79
|
+
assert_in_delta(-0.027, poly.threshold_x[0],0.001)
|
80
|
+
assert_in_delta(1.578, poly.threshold_y[1],0.001)
|
81
|
+
assert_in_delta(1.137, poly.threshold_x[1],0.001)
|
82
|
+
|
83
|
+
|
84
|
+
poly.method=:joint
|
85
|
+
poly.compute
|
86
|
+
|
87
|
+
|
88
|
+
assert_in_delta(0.4192, poly.r, 0.0001)
|
89
|
+
assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
|
90
|
+
assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
|
91
|
+
assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
|
92
|
+
assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
|
93
|
+
else
|
94
|
+
puts "Two-step optimized, polychoric series and Joint method for Polychoric requires GSL"
|
95
|
+
end
|
69
96
|
end
|
70
97
|
def test_tetrachoric
|
71
98
|
a,b,c,d=0,0,0,0
|
data/test/test_csv.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
|
-
require
|
3
|
+
require "tempfile"
|
4
4
|
require 'test/unit'
|
5
5
|
|
6
6
|
class StatsampleCSVTestCase < Test::Unit::TestCase
|
@@ -33,9 +33,10 @@ class StatsampleCSVTestCase < Test::Unit::TestCase
|
|
33
33
|
assert_equal(age,ds['age_2'])
|
34
34
|
end
|
35
35
|
def test_write
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
filename=Tempfile.new("afile")
|
37
|
+
# filename=Dir::tmpdir+"/test_write.csv"
|
38
|
+
Statsample::CSV.write(@ds, filename.path)
|
39
|
+
ds2=Statsample::CSV.read(filename.path)
|
39
40
|
i=0
|
40
41
|
ds2.each_array{|row|
|
41
42
|
assert_equal(@ds.case_as_array(i),row)
|
data/test/test_dataset.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
require '
|
4
|
+
require 'tempfile'
|
5
5
|
class StatsampleDatasetTestCase < Test::Unit::TestCase
|
6
6
|
def setup
|
7
7
|
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
|
@@ -13,9 +13,9 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
13
13
|
assert_equal(%w{id name age city a1}, @ds.fields)
|
14
14
|
end
|
15
15
|
def test_saveload
|
16
|
-
|
17
|
-
@ds.save(outfile)
|
18
|
-
a=Statsample.load(outfile)
|
16
|
+
outfile=Tempfile.new("/dataset.ds")
|
17
|
+
@ds.save(outfile.path)
|
18
|
+
a=Statsample.load(outfile.path)
|
19
19
|
assert_equal(@ds,a)
|
20
20
|
end
|
21
21
|
|
data/test/test_factor.rb
CHANGED
@@ -3,35 +3,43 @@ require 'statsample'
|
|
3
3
|
require 'test/unit'
|
4
4
|
class StatsampleFactorTestCase < Test::Unit::TestCase
|
5
5
|
def test_pca
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
6
|
+
if HAS_GSL
|
7
|
+
require 'gsl'
|
8
|
+
a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
|
9
|
+
b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
|
10
|
+
a.recode! {|c| c-a.mean}
|
11
|
+
b.recode! {|c| c-b.mean}
|
12
|
+
ds={'a'=>a,'b'=>b}.to_dataset
|
13
|
+
cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
|
14
|
+
pca=Statsample::Factor::PCA.new(cov_matrix)
|
15
|
+
expected_eigenvalues=[1.284, 0.0490]
|
16
|
+
expected_eigenvalues.each_with_index{|ev,i|
|
17
|
+
assert_in_delta(ev,pca.eigenvalues[i],0.001)
|
18
|
+
}
|
19
|
+
expected_fm_1=GSL::Matrix[[0.677], [0.735]]
|
20
|
+
expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
|
21
|
+
_test_matrix(expected_fm_1,pca.feature_vector(1))
|
22
|
+
_test_matrix(expected_fm_2,pca.feature_vector(2))
|
23
|
+
else
|
24
|
+
puts "PCA not tested. Requires GSL"
|
25
|
+
end
|
22
26
|
end
|
23
27
|
def test_rotation_varimax
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
28
|
+
if HAS_GSL
|
29
|
+
a = Matrix[ [ 0.4320, 0.8129, 0.3872] ,
|
30
|
+
[0.7950, -0.5416, 0.2565] ,
|
31
|
+
[0.5944, 0.7234, -0.3441],
|
32
|
+
[0.8945, -0.3921, -0.1863] ]
|
33
|
+
expected= Matrix[[-0.0204423, 0.938674, -0.340334],
|
34
|
+
[0.983662, 0.0730206, 0.134997],
|
35
|
+
[0.0826106, 0.435975, -0.893379],
|
36
|
+
[0.939901, -0.0965213, -0.309596]].to_gsl
|
37
|
+
varimax=Statsample::Factor::Varimax.new(a)
|
38
|
+
varimax.iterate
|
39
|
+
_test_matrix(expected,varimax.rotated)
|
40
|
+
else
|
41
|
+
puts "Rotation not tested. Requires GSL"
|
42
|
+
end
|
35
43
|
end
|
36
44
|
def _test_matrix(a,b)
|
37
45
|
a.size1.times {|i|
|
data/test/test_matrix.rb
CHANGED
@@ -19,7 +19,6 @@ class StatsampleMatrixTestCase < Test::Unit::TestCase
|
|
19
19
|
assert_equal(6,matrix.row_sum[0])
|
20
20
|
assert_equal(12,matrix.column_sum[0])
|
21
21
|
assert_equal(45,matrix.total_sum)
|
22
|
-
m=matrix.to_gsl
|
23
22
|
end
|
24
23
|
def test_covariate
|
25
24
|
a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
|
data/test/test_regression.rb
CHANGED
@@ -158,12 +158,14 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
|
|
158
158
|
|
159
159
|
cor=Statsample::Bivariate.correlation_matrix(ds)
|
160
160
|
lr=Statsample::Regression::Multiple::MatrixEngine.new(cor,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size, :y_sd=>@y.sd , :x_sd=>{'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd})
|
161
|
-
|
161
|
+
assert_nil(lr.constant_se)
|
162
|
+
assert_nil(lr.constant_t)
|
162
163
|
model_test_matrix(lr, "correlation matrix")
|
163
164
|
|
164
165
|
covariance=Statsample::Bivariate.covariance_matrix(ds)
|
165
166
|
lr=Statsample::Regression::Multiple::MatrixEngine.new(covariance,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size)
|
166
|
-
|
167
|
+
|
168
|
+
model_test(lr , "covariance matrix")
|
167
169
|
end
|
168
170
|
def test_regression_rubyengine
|
169
171
|
@a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
data/test/test_svg_graph.rb
CHANGED
@@ -2,6 +2,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
2
2
|
require 'statsample'
|
3
3
|
require 'tmpdir'
|
4
4
|
require 'tempfile'
|
5
|
+
require 'tempfile'
|
5
6
|
require 'fileutils'
|
6
7
|
require 'test/unit'
|
7
8
|
begin
|
@@ -19,12 +20,10 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
|
|
19
20
|
rand(10)
|
20
21
|
}.to_vector(:scale)
|
21
22
|
h=ar.histogram([0,2,5,11])
|
22
|
-
file
|
23
|
+
file=Tempfile.new("svg_histogram_only.svg")
|
23
24
|
graph = Statsample::Graph::SvgHistogram.new({})
|
24
25
|
graph.histogram=h
|
25
|
-
|
26
|
-
f.puts(graph.burn)
|
27
|
-
}
|
26
|
+
file.puts(graph.burn)
|
28
27
|
else
|
29
28
|
puts "Statsample::Graph::SvgHistogram.new not tested (no ruby-gsl)"
|
30
29
|
end
|
@@ -36,20 +35,20 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
|
|
36
35
|
ar.push(rand(10))
|
37
36
|
}
|
38
37
|
vector=ar.to_vector
|
39
|
-
file
|
38
|
+
file=Tempfile.new("svggraph_default.svg").path
|
40
39
|
vector.svggraph_frequencies(file)
|
41
|
-
file
|
40
|
+
file=Tempfile.new("svggraph_bar.svg").path
|
42
41
|
vector.svggraph_frequencies(file,800,600,SVG::Graph::Bar,:graph_title=>'Bar')
|
43
42
|
assert(File.exists?(file))
|
44
|
-
file
|
43
|
+
file=Tempfile.new("svggraph_bar_horizontal.svg").path
|
45
44
|
vector.svggraph_frequencies(file,800,600,SVG::Graph::BarHorizontalNoOp,:graph_title=>'Horizontal Bar')
|
46
45
|
assert(File.exists?(file))
|
47
|
-
file
|
46
|
+
file=Tempfile.new("svggraph_pie.svg").path
|
48
47
|
vector.svggraph_frequencies(file,800,600,SVG::Graph::PieNoOp,:graph_title=>'Pie')
|
49
48
|
assert(File.exists?(file))
|
50
49
|
vector.type=:scale
|
51
50
|
if HAS_GSL
|
52
|
-
|
51
|
+
file=Tempfile.new("svg_histogram.svg").path
|
53
52
|
hist=vector.svggraph_histogram(5)
|
54
53
|
File.open(file,"wb") {|fp|
|
55
54
|
fp.write(hist.burn)
|
data/test/test_vector.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
require '
|
4
|
+
require 'tempfile'
|
5
5
|
class TestStatsample
|
6
6
|
end
|
7
7
|
class TestStatsample::TestVector < Test::Unit::TestCase
|
@@ -12,9 +12,9 @@ class TestStatsample::TestVector < Test::Unit::TestCase
|
|
12
12
|
|
13
13
|
end
|
14
14
|
def test_save_load
|
15
|
-
outfile=
|
16
|
-
@c.save(outfile)
|
17
|
-
a=Statsample.load(outfile)
|
15
|
+
outfile=Tempfile.new("vector.vec")
|
16
|
+
@c.save(outfile.path)
|
17
|
+
a=Statsample.load(outfile.path)
|
18
18
|
assert_equal(@c,a)
|
19
19
|
|
20
20
|
end
|
data/test/test_xls.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
require '
|
4
|
+
require 'tempfile'
|
5
5
|
begin
|
6
6
|
require 'spreadsheet'
|
7
7
|
rescue LoadError
|
@@ -30,9 +30,9 @@ class StatsampleExcelTestCase < Test::Unit::TestCase
|
|
30
30
|
assert_equal(nil,@ds['age'][5])
|
31
31
|
end
|
32
32
|
def test_write
|
33
|
-
|
34
|
-
Statsample::Excel.write(@ds,
|
35
|
-
ds2=Statsample::Excel.read(
|
33
|
+
tempfile=Tempfile.new("test_write.xls")
|
34
|
+
Statsample::Excel.write(@ds,tempfile.path)
|
35
|
+
ds2=Statsample::Excel.read(tempfile.path)
|
36
36
|
i=0
|
37
37
|
ds2.each_array{|row|
|
38
38
|
assert_equal(@ds.case_as_array(i),row)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-24 00:00:00 -03:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -43,24 +43,14 @@ dependencies:
|
|
43
43
|
version: 0.2.0
|
44
44
|
version:
|
45
45
|
- !ruby/object:Gem::Dependency
|
46
|
-
name:
|
47
|
-
type: :
|
48
|
-
version_requirement:
|
49
|
-
version_requirements: !ruby/object:Gem::Requirement
|
50
|
-
requirements:
|
51
|
-
- - ">="
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: 2.0.3
|
54
|
-
version:
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: gemcutter
|
57
|
-
type: :development
|
46
|
+
name: minimization
|
47
|
+
type: :runtime
|
58
48
|
version_requirement:
|
59
49
|
version_requirements: !ruby/object:Gem::Requirement
|
60
50
|
requirements:
|
61
51
|
- - ">="
|
62
52
|
- !ruby/object:Gem::Version
|
63
|
-
version: 0.
|
53
|
+
version: 0.1.0
|
64
54
|
version:
|
65
55
|
- !ruby/object:Gem::Dependency
|
66
56
|
name: hoe
|
@@ -70,9 +60,19 @@ dependencies:
|
|
70
60
|
requirements:
|
71
61
|
- - ">="
|
72
62
|
- !ruby/object:Gem::Version
|
73
|
-
version: 2.
|
63
|
+
version: 2.4.0
|
74
64
|
version:
|
75
|
-
description:
|
65
|
+
description: |-
|
66
|
+
A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
|
67
|
+
|
68
|
+
Includes:
|
69
|
+
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
70
|
+
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
71
|
+
* Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
|
72
|
+
* Regression: Simple, Multiple, Probit and Logit
|
73
|
+
* Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
|
74
|
+
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
75
|
+
* Sample calculation related formulas
|
76
76
|
email:
|
77
77
|
- clbustos@gmail.com
|
78
78
|
executables:
|
@@ -221,7 +221,7 @@ rubyforge_project: ruby-statsample
|
|
221
221
|
rubygems_version: 1.3.5
|
222
222
|
signing_key:
|
223
223
|
specification_version: 3
|
224
|
-
summary:
|
224
|
+
summary: A suite for basic and advanced statistics on Ruby
|
225
225
|
test_files:
|
226
226
|
- test/test_bivariate.rb
|
227
227
|
- test/test_dominance_analysis.rb
|