statsample 0.18.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +23 -0
- data/Manifest.txt +28 -17
- data/Rakefile +3 -2
- data/benchmarks/correlation_matrix_15_variables.rb +31 -0
- data/benchmarks/correlation_matrix_5_variables.rb +32 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/examples/boxplot.rb +13 -14
- data/examples/correlation_matrix.rb +16 -8
- data/examples/dataset.rb +13 -4
- data/examples/dominance_analysis.rb +23 -17
- data/examples/dominance_analysis_bootstrap.rb +28 -22
- data/examples/histogram.rb +8 -9
- data/examples/icc.rb +20 -21
- data/examples/levene.rb +10 -4
- data/examples/multiple_regression.rb +9 -28
- data/examples/multivariate_correlation.rb +9 -3
- data/examples/parallel_analysis.rb +20 -16
- data/examples/polychoric.rb +15 -9
- data/examples/principal_axis.rb +18 -6
- data/examples/reliability.rb +26 -13
- data/examples/scatterplot.rb +10 -6
- data/examples/t_test.rb +15 -6
- data/examples/tetrachoric.rb +9 -2
- data/examples/u_test.rb +12 -4
- data/examples/vector.rb +13 -2
- data/examples/velicer_map_test.rb +33 -26
- data/lib/statsample.rb +32 -12
- data/lib/statsample/analysis.rb +79 -0
- data/lib/statsample/analysis/suite.rb +72 -0
- data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
- data/lib/statsample/bivariate.rb +70 -16
- data/lib/statsample/dataset.rb +25 -19
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/factor.rb +2 -0
- data/lib/statsample/factor/map.rb +16 -10
- data/lib/statsample/factor/parallelanalysis.rb +9 -3
- data/lib/statsample/factor/pca.rb +28 -32
- data/lib/statsample/factor/rotation.rb +15 -8
- data/lib/statsample/graph/boxplot.rb +3 -4
- data/lib/statsample/graph/histogram.rb +2 -1
- data/lib/statsample/graph/scatterplot.rb +1 -0
- data/lib/statsample/matrix.rb +106 -16
- data/lib/statsample/regression.rb +4 -1
- data/lib/statsample/regression/binomial.rb +1 -1
- data/lib/statsample/regression/multiple/baseengine.rb +19 -9
- data/lib/statsample/regression/multiple/gslengine.rb +127 -126
- data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/regression/simple.rb +31 -6
- data/lib/statsample/reliability.rb +11 -3
- data/lib/statsample/reliability/scaleanalysis.rb +4 -4
- data/lib/statsample/shorthand.rb +81 -0
- data/lib/statsample/test/chisquare.rb +1 -1
- data/lib/statsample/vector.rb +163 -163
- data/lib/statsample/vector/gsl.rb +106 -0
- data/references.txt +2 -2
- data/{data → test/fixtures}/crime.txt +0 -0
- data/{data → test/fixtures}/hartman_23.matrix +0 -0
- data/{data → test/fixtures}/repeated_fields.csv +0 -0
- data/{data → test/fixtures}/test_binomial.csv +0 -0
- data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
- data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
- data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
- data/{data → test/fixtures}/tetmat_test.txt +0 -0
- data/test/helpers_tests.rb +18 -2
- data/test/test_analysis.rb +118 -0
- data/test/test_anovatwoway.rb +1 -1
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +1 -2
- data/test/test_bartlettsphericity.rb +1 -2
- data/test/test_bivariate.rb +64 -22
- data/test/test_codification.rb +1 -2
- data/test/test_crosstab.rb +1 -2
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +24 -3
- data/test/test_dominance_analysis.rb +1 -2
- data/test/test_factor.rb +8 -69
- data/test/test_factor_map.rb +43 -0
- data/test/test_factor_pa.rb +54 -0
- data/test/test_ggobi.rb +1 -1
- data/test/test_gsl.rb +12 -18
- data/test/test_histogram.rb +1 -2
- data/test/test_logit.rb +62 -18
- data/test/test_matrix.rb +4 -5
- data/test/test_mle.rb +3 -4
- data/test/test_regression.rb +21 -2
- data/test/test_reliability.rb +3 -3
- data/test/test_reliability_icc.rb +1 -1
- data/test/test_reliability_skillscale.rb +20 -4
- data/test/test_resample.rb +1 -2
- data/test/test_rserve_extension.rb +1 -2
- data/test/test_srs.rb +1 -2
- data/test/test_statistics.rb +1 -2
- data/test/test_stest.rb +1 -2
- data/test/test_stratified.rb +1 -2
- data/test/test_test_f.rb +1 -2
- data/test/test_test_t.rb +1 -2
- data/test/test_umannwhitney.rb +1 -2
- data/test/test_vector.rb +117 -18
- data/test/test_xls.rb +2 -3
- data/web/Rakefile +39 -0
- metadata +109 -29
- metadata.gz.sig +0 -0
- data/examples/parallel_analysis_tetrachoric.rb +0 -31
- data/lib/distribution.rb +0 -25
- data/lib/distribution/chisquare.rb +0 -23
- data/lib/distribution/f.rb +0 -35
- data/lib/distribution/normal.rb +0 -60
- data/lib/distribution/normalbivariate.rb +0 -284
- data/lib/distribution/normalmultivariate.rb +0 -73
- data/lib/distribution/t.rb +0 -55
- data/test/test_distribution.rb +0 -73
data/test/test_codification.rb
CHANGED
data/test/test_crosstab.rb
CHANGED
data/test/test_csv.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
2
|
-
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
3
2
|
class StatsampleCSVTestCase < MiniTest::Unit::TestCase
|
4
3
|
def setup
|
5
|
-
@ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
|
4
|
+
@ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/test_csv.csv")
|
6
5
|
end
|
7
6
|
def test_read
|
8
7
|
assert_equal(6,@ds.cases)
|
@@ -22,7 +21,7 @@ class StatsampleCSVTestCase < MiniTest::Unit::TestCase
|
|
22
21
|
assert_equal(nil,@ds['age'][5])
|
23
22
|
end
|
24
23
|
def test_repeated
|
25
|
-
ds=Statsample::CSV.read(File.dirname(__FILE__)+"
|
24
|
+
ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/repeated_fields.csv")
|
26
25
|
assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
|
27
26
|
age=[3,4,5,6,nil,8].to_vector(:scale)
|
28
27
|
assert_equal(age,ds['age_2'])
|
data/test/test_dataset.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
2
|
-
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
3
2
|
class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
|
4
3
|
def setup
|
5
4
|
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
|
@@ -19,7 +18,15 @@ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
|
|
19
18
|
a=Statsample.load(outfile.path)
|
20
19
|
assert_equal(@ds,a)
|
21
20
|
end
|
22
|
-
|
21
|
+
def test_gsl
|
22
|
+
if Statsample.has_gsl?
|
23
|
+
matrix=GSL::Matrix[[1,2],[3,4],[5,6]]
|
24
|
+
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
25
|
+
assert_equal(matrix,ds.to_gsl)
|
26
|
+
else
|
27
|
+
skip("Gsl needed")
|
28
|
+
end
|
29
|
+
end
|
23
30
|
def test_matrix
|
24
31
|
matrix=Matrix[[1,2],[3,4],[5,6]]
|
25
32
|
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
@@ -124,6 +131,20 @@ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
|
|
124
131
|
mva=[2,3,0,1,0,1].to_vector(:scale)
|
125
132
|
assert_equal(mva,ds.vector_missing_values)
|
126
133
|
end
|
134
|
+
|
135
|
+
def test_has_missing_values
|
136
|
+
a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
|
137
|
+
a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
|
138
|
+
b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
|
139
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
140
|
+
c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
|
141
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
142
|
+
assert(ds.has_missing_data?)
|
143
|
+
clean=ds.dup_only_valid
|
144
|
+
assert(!clean.has_missing_data?)
|
145
|
+
end
|
146
|
+
|
147
|
+
|
127
148
|
def test_vector_count_characters
|
128
149
|
a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
|
129
150
|
a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
|
data/test/test_factor.rb
CHANGED
@@ -35,17 +35,18 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
|
|
35
35
|
end
|
36
36
|
def test_principalcomponents_ruby_gsl
|
37
37
|
|
38
|
-
ran=Distribution::Normal.
|
38
|
+
ran=Distribution::Normal.rng
|
39
39
|
|
40
40
|
# @r=::Rserve::Connection.new
|
41
41
|
|
42
42
|
samples=20
|
43
|
-
|
43
|
+
[3,5,7].each {|k|
|
44
44
|
v={}
|
45
45
|
v["x0"]=samples.times.map { ran.call()}.to_scale.centered
|
46
46
|
(1...k).each {|i|
|
47
47
|
v["x#{i}"]=samples.times.map {|ii| ran.call()*0.5+v["x#{i-1}"][ii]*0.5}.to_scale.centered
|
48
48
|
}
|
49
|
+
|
49
50
|
ds=v.to_dataset
|
50
51
|
cm=ds.covariance_matrix
|
51
52
|
# @r.assign('ds',ds)
|
@@ -82,7 +83,7 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
|
|
82
83
|
|
83
84
|
end
|
84
85
|
def principalcomponents(gsl)
|
85
|
-
ran=Distribution::Normal.
|
86
|
+
ran=Distribution::Normal.rng
|
86
87
|
samples=50
|
87
88
|
x1=samples.times.map { ran.call()}.to_scale
|
88
89
|
x2=samples.times.map {|i| ran.call()*0.5+x1[i]*0.5}.to_scale
|
@@ -94,9 +95,10 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
|
|
94
95
|
assert_in_delta(1+r,pca.eigenvalues[0],1e-10)
|
95
96
|
assert_in_delta(1-r,pca.eigenvalues[1],1e-10)
|
96
97
|
hs=1.0 / Math.sqrt(2)
|
97
|
-
|
98
|
-
m_1=gsl ?
|
99
|
-
|
98
|
+
assert_equal_vector(Vector[1, 1]*hs, pca.eigenvectors[0])
|
99
|
+
m_1=gsl ? Vector[-1,1] : Vector[1,-1]
|
100
|
+
|
101
|
+
assert_equal_vector(hs*m_1, pca.eigenvectors[1])
|
100
102
|
|
101
103
|
pcs=pca.principal_components(ds)
|
102
104
|
exp_pc_1=ds.collect_with_index {|row,i|
|
@@ -134,69 +136,6 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
|
|
134
136
|
assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m,i),0.01)
|
135
137
|
}
|
136
138
|
end
|
137
|
-
def test_parallelanalysis_with_data
|
138
|
-
if Statsample.has_gsl?
|
139
|
-
samples=100
|
140
|
-
variables=10
|
141
|
-
iterations=50
|
142
|
-
rng = Distribution::Normal.rng_ugaussian
|
143
|
-
f1=samples.times.collect {rng.call}.to_scale
|
144
|
-
f2=samples.times.collect {rng.call}.to_scale
|
145
|
-
vectors={}
|
146
|
-
variables.times do |i|
|
147
|
-
if i<5
|
148
|
-
vectors["v#{i}"]=samples.times.collect {|nv|
|
149
|
-
f1[nv]*5+f2[nv]*2+rng.call
|
150
|
-
}.to_scale
|
151
|
-
else
|
152
|
-
vectors["v#{i}"]=samples.times.collect {|nv|
|
153
|
-
f2[nv]*5+f1[nv]*2+rng.call
|
154
|
-
}.to_scale
|
155
|
-
end
|
156
|
-
|
157
|
-
end
|
158
|
-
ds=vectors.to_dataset
|
159
|
-
|
160
|
-
pa1=Statsample::Factor::ParallelAnalysis.new(ds, :bootstrap_method=>:data, :iterations=>iterations)
|
161
|
-
pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables,:iterations=>iterations,:percentil=>95)
|
162
|
-
3.times do |n|
|
163
|
-
var="ev_0000#{n+1}"
|
164
|
-
assert_in_delta(pa1.ds_eigenvalues[var].mean,pa2.ds_eigenvalues[var].mean,0.04)
|
165
|
-
end
|
166
|
-
else
|
167
|
-
skip("Too slow without GSL")
|
168
|
-
end
|
169
|
-
|
170
|
-
end
|
171
|
-
def test_parallelanalysis
|
172
|
-
pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,:iterations=>100,:percentil=>95)
|
173
|
-
assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
|
174
|
-
assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
|
175
|
-
assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
|
176
|
-
#puts pa.summary
|
177
|
-
assert(pa.summary.size>0)
|
178
|
-
#pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,100, 95, true)
|
179
|
-
#puts pa.summary
|
180
|
-
end
|
181
|
-
def test_map
|
182
|
-
#fields=%w{height arm.span forearm lower.leg weight bitro.diameter chest.girth chest.width}
|
183
|
-
m=Matrix[
|
184
|
-
[ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
|
185
|
-
[ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
|
186
|
-
[ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
|
187
|
-
[ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
|
188
|
-
[ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
|
189
|
-
[ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
|
190
|
-
[ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
|
191
|
-
[ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
|
192
|
-
]
|
193
|
-
map=Statsample::Factor::MAP.new(m)
|
194
|
-
assert_in_delta(map.minfm, 0.066445,0.00001)
|
195
|
-
assert_equal(map.number_of_factors, 2)
|
196
|
-
assert_in_delta(map.fm[0], 0.312475,0.00001)
|
197
|
-
assert_in_delta(map.fm[1], 0.245121,0.00001)
|
198
|
-
|
199
|
-
end
|
200
139
|
# Tested with SPSS and R
|
201
140
|
def test_pca
|
202
141
|
a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
2
|
+
#require 'rserve'
|
3
|
+
#require 'statsample/rserve_extension'
|
4
|
+
|
5
|
+
class StatsampleFactorMpaTestCase < MiniTest::Unit::TestCase
|
6
|
+
context Statsample::Factor::MAP do
|
7
|
+
setup do
|
8
|
+
m=Matrix[
|
9
|
+
[ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
|
10
|
+
[ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
|
11
|
+
[ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
|
12
|
+
[ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
|
13
|
+
[ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
|
14
|
+
[ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
|
15
|
+
[ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
|
16
|
+
[ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
|
17
|
+
]
|
18
|
+
@map=Statsample::Factor::MAP.new(m)
|
19
|
+
end
|
20
|
+
should "return correct values with pure ruby" do
|
21
|
+
@map.use_gsl=false
|
22
|
+
map_assertions(@map)
|
23
|
+
end
|
24
|
+
should_with_gsl "return correct values with gsl" do
|
25
|
+
require 'ruby-prof'
|
26
|
+
|
27
|
+
@map.use_gsl=true
|
28
|
+
map_assertions(@map)
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
def map_assertions(map)
|
35
|
+
assert_in_delta(map.minfm, 0.066445,0.00001)
|
36
|
+
assert_equal(map.number_of_factors, 2)
|
37
|
+
assert_in_delta(map.fm[0], 0.312475,0.00001)
|
38
|
+
assert_in_delta(map.fm[1], 0.245121,0.00001)
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
2
|
+
#require 'rserve'
|
3
|
+
#require 'statsample/rserve_extension'
|
4
|
+
|
5
|
+
class StatsampleFactorTestCase < MiniTest::Unit::TestCase
|
6
|
+
include Statsample::Fixtures
|
7
|
+
# Based on Hardle and Simar
|
8
|
+
def setup
|
9
|
+
@fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures")
|
10
|
+
end
|
11
|
+
def test_parallelanalysis_with_data
|
12
|
+
if Statsample.has_gsl?
|
13
|
+
samples=100
|
14
|
+
variables=10
|
15
|
+
iterations=50
|
16
|
+
rng = Distribution::Normal.rng
|
17
|
+
f1=samples.times.collect {rng.call}.to_scale
|
18
|
+
f2=samples.times.collect {rng.call}.to_scale
|
19
|
+
vectors={}
|
20
|
+
variables.times do |i|
|
21
|
+
if i<5
|
22
|
+
vectors["v#{i}"]=samples.times.collect {|nv|
|
23
|
+
f1[nv]*5+f2[nv]*2+rng.call
|
24
|
+
}.to_scale
|
25
|
+
else
|
26
|
+
vectors["v#{i}"]=samples.times.collect {|nv|
|
27
|
+
f2[nv]*5+f1[nv]*2+rng.call
|
28
|
+
}.to_scale
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
ds=vectors.to_dataset
|
33
|
+
|
34
|
+
pa1=Statsample::Factor::ParallelAnalysis.new(ds, :bootstrap_method=>:data, :iterations=>iterations)
|
35
|
+
pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables,:iterations=>iterations,:percentil=>95)
|
36
|
+
3.times do |n|
|
37
|
+
var="ev_0000#{n+1}"
|
38
|
+
assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean,0.04)
|
39
|
+
end
|
40
|
+
else
|
41
|
+
skip("Too slow without GSL")
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
def test_parallelanalysis
|
46
|
+
pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,:iterations=>100,:percentil=>95)
|
47
|
+
assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
|
48
|
+
assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
|
49
|
+
assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
|
50
|
+
assert(pa.summary.size>0)
|
51
|
+
#pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,100, 95, true)
|
52
|
+
#puts pa.summary
|
53
|
+
end
|
54
|
+
end
|
data/test/test_ggobi.rb
CHANGED
data/test/test_gsl.rb
CHANGED
@@ -1,22 +1,16 @@
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
2
|
-
|
3
|
-
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
4
2
|
class StatsampleGSLTestCase < MiniTest::Unit::TestCase
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
assert_equal(3,matrix.column_size)
|
17
|
-
else
|
18
|
-
skip("Needs GSL extension")
|
19
|
-
end
|
3
|
+
should_with_gsl "matrix with gsl" do
|
4
|
+
a=[1,2,3,4,20].to_vector(:scale)
|
5
|
+
b=[3,2,3,4,50].to_vector(:scale)
|
6
|
+
c=[6,2,3,4,3].to_vector(:scale)
|
7
|
+
ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
8
|
+
gsl=ds.to_matrix.to_gsl
|
9
|
+
assert_equal(5,gsl.size1)
|
10
|
+
assert_equal(3,gsl.size2)
|
11
|
+
matrix=gsl.to_matrix
|
12
|
+
assert_equal(5,matrix.row_size)
|
13
|
+
assert_equal(3,matrix.column_size)
|
20
14
|
end
|
21
15
|
end
|
22
16
|
|
data/test/test_histogram.rb
CHANGED
data/test/test_logit.rb
CHANGED
@@ -1,21 +1,65 @@
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
2
|
-
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
2
|
+
$:.unshift("/home/cdx/dev/rserve-client/lib")
|
3
3
|
class StatsampleLogitTestCase < MiniTest::Unit::TestCase
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
context Statsample::Regression::Binomial::Logit do
|
5
|
+
should "return correct values for example" do
|
6
|
+
crime=File.dirname(__FILE__)+'/fixtures/test_binomial.csv'
|
7
|
+
ds=Statsample::CSV.read(crime)
|
8
|
+
lr=Statsample::Regression::Binomial::Logit.new(ds,'y')
|
9
|
+
assert_in_delta(-38.8669,lr.log_likehood,0.001)
|
10
|
+
assert_in_delta(-5.3658,lr.constant,0.001)
|
11
|
+
|
12
|
+
exp_coeffs={"a"=>0.3270,"b"=>0.8147, "c"=>-0.4031}
|
13
|
+
exp_coeffs.each{|k,v|
|
14
|
+
assert_in_delta(v,lr.coeffs[k],0.001)
|
15
|
+
}
|
16
|
+
exp_errors={'a'=>0.4390,'b'=>0.4270,'c'=>0.3819}
|
17
|
+
exp_errors.each{|k,v|
|
18
|
+
assert_in_delta(v,lr.coeffs_se[k],0.001)
|
19
|
+
}
|
20
|
+
assert_equal(7,lr.iterations)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
begin
|
24
|
+
require 'rserve'
|
25
|
+
require 'statsample/rserve_extension'
|
26
|
+
should "return same similat values to as R gml" do
|
27
|
+
|
28
|
+
r=Rserve::Connection.new
|
29
|
+
ran=Distribution::Normal.rng
|
30
|
+
samples=100
|
31
|
+
a,b,c=ran.call,ran.call,ran.call
|
32
|
+
logit=lambda {|x| Math.exp(x) / (1+Math.exp(x))}
|
33
|
+
|
34
|
+
x1=Statsample::Vector.new_scale(samples) {ran.call}
|
35
|
+
x2=Statsample::Vector.new_scale(samples) {ran.call}
|
36
|
+
x3=Statsample::Vector.new_scale(samples) {ran.call}
|
37
|
+
|
38
|
+
y= Statsample::Vector.new_scale(samples) {|i| logit.call(x1[i]*a+x2[i]*b+x3[i]*c+ran.call)}
|
39
|
+
# Generate R object
|
40
|
+
ds={'x1'=>x1,'x2'=>x2,'x3'=>x3,'y'=>y}.to_dataset
|
41
|
+
r.assign('ds',ds)
|
42
|
+
r.eval("mylogit<- glm(ds$y~ds$x1+ds$x2+ds$x3, family=binomial(link='logit'), na.action=na.pass)")
|
43
|
+
|
44
|
+
r_logit=r.eval('summary(mylogit)')
|
45
|
+
r_coeffs=r_logit.as_list['coefficients'].to_ruby
|
46
|
+
ruby_logit=Statsample::Regression::Binomial::Logit.new(ds,'y')
|
10
47
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
48
|
+
assert_in_delta(r_coeffs[0,0], ruby_logit.constant,1e-4)
|
49
|
+
assert_in_delta(r_coeffs[0,1], ruby_logit.constant_se,5e-3)
|
50
|
+
|
51
|
+
%w{x1 x2 x3}.each_with_index do |f,i|
|
52
|
+
assert_in_delta(r_coeffs[i+1,0], ruby_logit.coeffs[f],1e-4)
|
53
|
+
assert_in_delta(r_coeffs[i+1,1], ruby_logit.coeffs_se[f],5e-3)
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
r.close
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
rescue LoadError
|
62
|
+
puts "Require rserve extension"
|
63
|
+
|
64
|
+
end
|
21
65
|
end
|
data/test/test_matrix.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
2
|
-
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
3
2
|
|
4
3
|
class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
|
5
4
|
|
@@ -24,17 +23,17 @@ class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
|
|
24
23
|
a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
|
25
24
|
a.extend Statsample::CovariateMatrix
|
26
25
|
a.fields=%w{a b c}
|
27
|
-
assert_equal(:correlation, a.
|
26
|
+
assert_equal(:correlation, a._type)
|
28
27
|
|
29
28
|
assert_equal(Matrix[[0.5],[0.3]], a.submatrix(%w{c a}, %w{b}))
|
30
29
|
assert_equal(Matrix[[1.0, 0.2] , [0.2, 1.0]], a.submatrix(%w{c a}))
|
31
|
-
assert_equal(:correlation, a.submatrix(%w{c a}).
|
30
|
+
assert_equal(:correlation, a.submatrix(%w{c a})._type)
|
32
31
|
|
33
32
|
a=Matrix[[20,30,10], [30,60,50], [10,50,50]]
|
34
33
|
|
35
34
|
a.extend Statsample::CovariateMatrix
|
36
35
|
|
37
|
-
assert_equal(:covariance, a.
|
36
|
+
assert_equal(:covariance, a._type)
|
38
37
|
|
39
38
|
a=50.times.collect {rand()}.to_scale
|
40
39
|
b=50.times.collect {rand()}.to_scale
|