statsample 0.18.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +23 -0
  3. data/Manifest.txt +28 -17
  4. data/Rakefile +3 -2
  5. data/benchmarks/correlation_matrix_15_variables.rb +31 -0
  6. data/benchmarks/correlation_matrix_5_variables.rb +32 -0
  7. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  8. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  9. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  11. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  13. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  14. data/benchmarks/factor_map.rb +37 -0
  15. data/benchmarks/helpers_benchmark.rb +5 -0
  16. data/examples/boxplot.rb +13 -14
  17. data/examples/correlation_matrix.rb +16 -8
  18. data/examples/dataset.rb +13 -4
  19. data/examples/dominance_analysis.rb +23 -17
  20. data/examples/dominance_analysis_bootstrap.rb +28 -22
  21. data/examples/histogram.rb +8 -9
  22. data/examples/icc.rb +20 -21
  23. data/examples/levene.rb +10 -4
  24. data/examples/multiple_regression.rb +9 -28
  25. data/examples/multivariate_correlation.rb +9 -3
  26. data/examples/parallel_analysis.rb +20 -16
  27. data/examples/polychoric.rb +15 -9
  28. data/examples/principal_axis.rb +18 -6
  29. data/examples/reliability.rb +26 -13
  30. data/examples/scatterplot.rb +10 -6
  31. data/examples/t_test.rb +15 -6
  32. data/examples/tetrachoric.rb +9 -2
  33. data/examples/u_test.rb +12 -4
  34. data/examples/vector.rb +13 -2
  35. data/examples/velicer_map_test.rb +33 -26
  36. data/lib/statsample.rb +32 -12
  37. data/lib/statsample/analysis.rb +79 -0
  38. data/lib/statsample/analysis/suite.rb +72 -0
  39. data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
  40. data/lib/statsample/bivariate.rb +70 -16
  41. data/lib/statsample/dataset.rb +25 -19
  42. data/lib/statsample/dominanceanalysis.rb +2 -2
  43. data/lib/statsample/factor.rb +2 -0
  44. data/lib/statsample/factor/map.rb +16 -10
  45. data/lib/statsample/factor/parallelanalysis.rb +9 -3
  46. data/lib/statsample/factor/pca.rb +28 -32
  47. data/lib/statsample/factor/rotation.rb +15 -8
  48. data/lib/statsample/graph/boxplot.rb +3 -4
  49. data/lib/statsample/graph/histogram.rb +2 -1
  50. data/lib/statsample/graph/scatterplot.rb +1 -0
  51. data/lib/statsample/matrix.rb +106 -16
  52. data/lib/statsample/regression.rb +4 -1
  53. data/lib/statsample/regression/binomial.rb +1 -1
  54. data/lib/statsample/regression/multiple/baseengine.rb +19 -9
  55. data/lib/statsample/regression/multiple/gslengine.rb +127 -126
  56. data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
  57. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  58. data/lib/statsample/regression/simple.rb +31 -6
  59. data/lib/statsample/reliability.rb +11 -3
  60. data/lib/statsample/reliability/scaleanalysis.rb +4 -4
  61. data/lib/statsample/shorthand.rb +81 -0
  62. data/lib/statsample/test/chisquare.rb +1 -1
  63. data/lib/statsample/vector.rb +163 -163
  64. data/lib/statsample/vector/gsl.rb +106 -0
  65. data/references.txt +2 -2
  66. data/{data → test/fixtures}/crime.txt +0 -0
  67. data/{data → test/fixtures}/hartman_23.matrix +0 -0
  68. data/{data → test/fixtures}/repeated_fields.csv +0 -0
  69. data/{data → test/fixtures}/test_binomial.csv +0 -0
  70. data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
  71. data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
  72. data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
  73. data/{data → test/fixtures}/tetmat_test.txt +0 -0
  74. data/test/helpers_tests.rb +18 -2
  75. data/test/test_analysis.rb +118 -0
  76. data/test/test_anovatwoway.rb +1 -1
  77. data/test/test_anovatwowaywithdataset.rb +1 -1
  78. data/test/test_anovawithvectors.rb +1 -2
  79. data/test/test_bartlettsphericity.rb +1 -2
  80. data/test/test_bivariate.rb +64 -22
  81. data/test/test_codification.rb +1 -2
  82. data/test/test_crosstab.rb +1 -2
  83. data/test/test_csv.rb +3 -4
  84. data/test/test_dataset.rb +24 -3
  85. data/test/test_dominance_analysis.rb +1 -2
  86. data/test/test_factor.rb +8 -69
  87. data/test/test_factor_map.rb +43 -0
  88. data/test/test_factor_pa.rb +54 -0
  89. data/test/test_ggobi.rb +1 -1
  90. data/test/test_gsl.rb +12 -18
  91. data/test/test_histogram.rb +1 -2
  92. data/test/test_logit.rb +62 -18
  93. data/test/test_matrix.rb +4 -5
  94. data/test/test_mle.rb +3 -4
  95. data/test/test_regression.rb +21 -2
  96. data/test/test_reliability.rb +3 -3
  97. data/test/test_reliability_icc.rb +1 -1
  98. data/test/test_reliability_skillscale.rb +20 -4
  99. data/test/test_resample.rb +1 -2
  100. data/test/test_rserve_extension.rb +1 -2
  101. data/test/test_srs.rb +1 -2
  102. data/test/test_statistics.rb +1 -2
  103. data/test/test_stest.rb +1 -2
  104. data/test/test_stratified.rb +1 -2
  105. data/test/test_test_f.rb +1 -2
  106. data/test/test_test_t.rb +1 -2
  107. data/test/test_umannwhitney.rb +1 -2
  108. data/test/test_vector.rb +117 -18
  109. data/test/test_xls.rb +2 -3
  110. data/web/Rakefile +39 -0
  111. metadata +109 -29
  112. metadata.gz.sig +0 -0
  113. data/examples/parallel_analysis_tetrachoric.rb +0 -31
  114. data/lib/distribution.rb +0 -25
  115. data/lib/distribution/chisquare.rb +0 -23
  116. data/lib/distribution/f.rb +0 -35
  117. data/lib/distribution/normal.rb +0 -60
  118. data/lib/distribution/normalbivariate.rb +0 -284
  119. data/lib/distribution/normalmultivariate.rb +0 -73
  120. data/lib/distribution/t.rb +0 -55
  121. data/test/test_distribution.rb +0 -73
@@ -1,5 +1,4 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
2
-
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
3
2
  class StatsampleCodificationTestCase < MiniTest::Unit::TestCase
4
3
 
5
4
  def initialize(*args)
@@ -1,5 +1,4 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
2
-
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
3
2
  class StatsampleCrosstabTestCase < MiniTest::Unit::TestCase
4
3
 
5
4
  def initialize(*args)
data/test/test_csv.rb CHANGED
@@ -1,8 +1,7 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
2
-
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
3
2
  class StatsampleCSVTestCase < MiniTest::Unit::TestCase
4
3
  def setup
5
- @ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
4
+ @ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/test_csv.csv")
6
5
  end
7
6
  def test_read
8
7
  assert_equal(6,@ds.cases)
@@ -22,7 +21,7 @@ class StatsampleCSVTestCase < MiniTest::Unit::TestCase
22
21
  assert_equal(nil,@ds['age'][5])
23
22
  end
24
23
  def test_repeated
25
- ds=Statsample::CSV.read(File.dirname(__FILE__)+"/../data/repeated_fields.csv")
24
+ ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/repeated_fields.csv")
26
25
  assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
27
26
  age=[3,4,5,6,nil,8].to_vector(:scale)
28
27
  assert_equal(age,ds['age_2'])
data/test/test_dataset.rb CHANGED
@@ -1,5 +1,4 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
2
-
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
3
2
  class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
4
3
  def setup
5
4
  @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
@@ -19,7 +18,15 @@ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
19
18
  a=Statsample.load(outfile.path)
20
19
  assert_equal(@ds,a)
21
20
  end
22
-
21
+ def test_gsl
22
+ if Statsample.has_gsl?
23
+ matrix=GSL::Matrix[[1,2],[3,4],[5,6]]
24
+ ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
25
+ assert_equal(matrix,ds.to_gsl)
26
+ else
27
+ skip("Gsl needed")
28
+ end
29
+ end
23
30
  def test_matrix
24
31
  matrix=Matrix[[1,2],[3,4],[5,6]]
25
32
  ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
@@ -124,6 +131,20 @@ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
124
131
  mva=[2,3,0,1,0,1].to_vector(:scale)
125
132
  assert_equal(mva,ds.vector_missing_values)
126
133
  end
134
+
135
+ def test_has_missing_values
136
+ a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
137
+ a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
138
+ b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
139
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
140
+ c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
141
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
142
+ assert(ds.has_missing_data?)
143
+ clean=ds.dup_only_valid
144
+ assert(!clean.has_missing_data?)
145
+ end
146
+
147
+
127
148
  def test_vector_count_characters
128
149
  a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
129
150
  a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
@@ -1,5 +1,4 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
2
-
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
3
2
  class StatsampleDominanceAnalysisTestCase < MiniTest::Unit::TestCase
4
3
  def test_dominance_univariate
5
4
  # Example from Budescu (1993)
data/test/test_factor.rb CHANGED
@@ -35,17 +35,18 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
35
35
  end
36
36
  def test_principalcomponents_ruby_gsl
37
37
 
38
- ran=Distribution::Normal.rng_ugaussian
38
+ ran=Distribution::Normal.rng
39
39
 
40
40
  # @r=::Rserve::Connection.new
41
41
 
42
42
  samples=20
43
- (3..7).each {|k|
43
+ [3,5,7].each {|k|
44
44
  v={}
45
45
  v["x0"]=samples.times.map { ran.call()}.to_scale.centered
46
46
  (1...k).each {|i|
47
47
  v["x#{i}"]=samples.times.map {|ii| ran.call()*0.5+v["x#{i-1}"][ii]*0.5}.to_scale.centered
48
48
  }
49
+
49
50
  ds=v.to_dataset
50
51
  cm=ds.covariance_matrix
51
52
  # @r.assign('ds',ds)
@@ -82,7 +83,7 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
82
83
 
83
84
  end
84
85
  def principalcomponents(gsl)
85
- ran=Distribution::Normal.rng_ugaussian
86
+ ran=Distribution::Normal.rng
86
87
  samples=50
87
88
  x1=samples.times.map { ran.call()}.to_scale
88
89
  x2=samples.times.map {|i| ran.call()*0.5+x1[i]*0.5}.to_scale
@@ -94,9 +95,10 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
94
95
  assert_in_delta(1+r,pca.eigenvalues[0],1e-10)
95
96
  assert_in_delta(1-r,pca.eigenvalues[1],1e-10)
96
97
  hs=1.0 / Math.sqrt(2)
97
- assert_equal_matrix(hs*Matrix[[1],[1]],pca.eigenvectors[0])
98
- m_1=gsl ? Matrix[[-1],[1]] : Matrix[[1],[-1]]
99
- assert_equal_matrix(hs*m_1, pca.eigenvectors[1])
98
+ assert_equal_vector(Vector[1, 1]*hs, pca.eigenvectors[0])
99
+ m_1=gsl ? Vector[-1,1] : Vector[1,-1]
100
+
101
+ assert_equal_vector(hs*m_1, pca.eigenvectors[1])
100
102
 
101
103
  pcs=pca.principal_components(ds)
102
104
  exp_pc_1=ds.collect_with_index {|row,i|
@@ -134,69 +136,6 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
134
136
  assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m,i),0.01)
135
137
  }
136
138
  end
137
- def test_parallelanalysis_with_data
138
- if Statsample.has_gsl?
139
- samples=100
140
- variables=10
141
- iterations=50
142
- rng = Distribution::Normal.rng_ugaussian
143
- f1=samples.times.collect {rng.call}.to_scale
144
- f2=samples.times.collect {rng.call}.to_scale
145
- vectors={}
146
- variables.times do |i|
147
- if i<5
148
- vectors["v#{i}"]=samples.times.collect {|nv|
149
- f1[nv]*5+f2[nv]*2+rng.call
150
- }.to_scale
151
- else
152
- vectors["v#{i}"]=samples.times.collect {|nv|
153
- f2[nv]*5+f1[nv]*2+rng.call
154
- }.to_scale
155
- end
156
-
157
- end
158
- ds=vectors.to_dataset
159
-
160
- pa1=Statsample::Factor::ParallelAnalysis.new(ds, :bootstrap_method=>:data, :iterations=>iterations)
161
- pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables,:iterations=>iterations,:percentil=>95)
162
- 3.times do |n|
163
- var="ev_0000#{n+1}"
164
- assert_in_delta(pa1.ds_eigenvalues[var].mean,pa2.ds_eigenvalues[var].mean,0.04)
165
- end
166
- else
167
- skip("Too slow without GSL")
168
- end
169
-
170
- end
171
- def test_parallelanalysis
172
- pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,:iterations=>100,:percentil=>95)
173
- assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
174
- assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
175
- assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
176
- #puts pa.summary
177
- assert(pa.summary.size>0)
178
- #pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,100, 95, true)
179
- #puts pa.summary
180
- end
181
- def test_map
182
- #fields=%w{height arm.span forearm lower.leg weight bitro.diameter chest.girth chest.width}
183
- m=Matrix[
184
- [ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
185
- [ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
186
- [ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
187
- [ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
188
- [ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
189
- [ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
190
- [ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
191
- [ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
192
- ]
193
- map=Statsample::Factor::MAP.new(m)
194
- assert_in_delta(map.minfm, 0.066445,0.00001)
195
- assert_equal(map.number_of_factors, 2)
196
- assert_in_delta(map.fm[0], 0.312475,0.00001)
197
- assert_in_delta(map.fm[1], 0.245121,0.00001)
198
-
199
- end
200
139
  # Tested with SPSS and R
201
140
  def test_pca
202
141
  a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
@@ -0,0 +1,43 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
+ #require 'rserve'
3
+ #require 'statsample/rserve_extension'
4
+
5
+ class StatsampleFactorMpaTestCase < MiniTest::Unit::TestCase
6
+ context Statsample::Factor::MAP do
7
+ setup do
8
+ m=Matrix[
9
+ [ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
10
+ [ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
11
+ [ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
12
+ [ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
13
+ [ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
14
+ [ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
15
+ [ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
16
+ [ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
17
+ ]
18
+ @map=Statsample::Factor::MAP.new(m)
19
+ end
20
+ should "return correct values with pure ruby" do
21
+ @map.use_gsl=false
22
+ map_assertions(@map)
23
+ end
24
+ should_with_gsl "return correct values with gsl" do
25
+ require 'ruby-prof'
26
+
27
+ @map.use_gsl=true
28
+ map_assertions(@map)
29
+ end
30
+
31
+
32
+ end
33
+
34
+ def map_assertions(map)
35
+ assert_in_delta(map.minfm, 0.066445,0.00001)
36
+ assert_equal(map.number_of_factors, 2)
37
+ assert_in_delta(map.fm[0], 0.312475,0.00001)
38
+ assert_in_delta(map.fm[1], 0.245121,0.00001)
39
+ end
40
+
41
+
42
+ end
43
+
@@ -0,0 +1,54 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
+ #require 'rserve'
3
+ #require 'statsample/rserve_extension'
4
+
5
+ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
6
+ include Statsample::Fixtures
7
+ # Based on Hardle and Simar
8
+ def setup
9
+ @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures")
10
+ end
11
+ def test_parallelanalysis_with_data
12
+ if Statsample.has_gsl?
13
+ samples=100
14
+ variables=10
15
+ iterations=50
16
+ rng = Distribution::Normal.rng
17
+ f1=samples.times.collect {rng.call}.to_scale
18
+ f2=samples.times.collect {rng.call}.to_scale
19
+ vectors={}
20
+ variables.times do |i|
21
+ if i<5
22
+ vectors["v#{i}"]=samples.times.collect {|nv|
23
+ f1[nv]*5+f2[nv]*2+rng.call
24
+ }.to_scale
25
+ else
26
+ vectors["v#{i}"]=samples.times.collect {|nv|
27
+ f2[nv]*5+f1[nv]*2+rng.call
28
+ }.to_scale
29
+ end
30
+
31
+ end
32
+ ds=vectors.to_dataset
33
+
34
+ pa1=Statsample::Factor::ParallelAnalysis.new(ds, :bootstrap_method=>:data, :iterations=>iterations)
35
+ pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables,:iterations=>iterations,:percentil=>95)
36
+ 3.times do |n|
37
+ var="ev_0000#{n+1}"
38
+ assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean,0.04)
39
+ end
40
+ else
41
+ skip("Too slow without GSL")
42
+ end
43
+
44
+ end
45
+ def test_parallelanalysis
46
+ pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,:iterations=>100,:percentil=>95)
47
+ assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
48
+ assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
49
+ assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
50
+ assert(pa.summary.size>0)
51
+ #pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,100, 95, true)
52
+ #puts pa.summary
53
+ end
54
+ end
data/test/test_ggobi.rb CHANGED
@@ -1,4 +1,4 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
2
  require 'ostruct'
3
3
  class StatsampleGGobiTestCase < MiniTest::Unit::TestCase
4
4
 
data/test/test_gsl.rb CHANGED
@@ -1,22 +1,16 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
2
-
3
-
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
4
2
  class StatsampleGSLTestCase < MiniTest::Unit::TestCase
5
- def test_matrix_to_gsl
6
- if Statsample.has_gsl?
7
- a=[1,2,3,4,20].to_vector(:scale)
8
- b=[3,2,3,4,50].to_vector(:scale)
9
- c=[6,2,3,4,3].to_vector(:scale)
10
- ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
11
- gsl=ds.to_matrix.to_gsl
12
- assert_equal(5,gsl.size1)
13
- assert_equal(3,gsl.size2)
14
- matrix=gsl.to_matrix
15
- assert_equal(5,matrix.row_size)
16
- assert_equal(3,matrix.column_size)
17
- else
18
- skip("Needs GSL extension")
19
- end
3
+ should_with_gsl "matrix with gsl" do
4
+ a=[1,2,3,4,20].to_vector(:scale)
5
+ b=[3,2,3,4,50].to_vector(:scale)
6
+ c=[6,2,3,4,3].to_vector(:scale)
7
+ ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
8
+ gsl=ds.to_matrix.to_gsl
9
+ assert_equal(5,gsl.size1)
10
+ assert_equal(3,gsl.size2)
11
+ matrix=gsl.to_matrix
12
+ assert_equal(5,matrix.row_size)
13
+ assert_equal(3,matrix.column_size)
20
14
  end
21
15
  end
22
16
 
@@ -1,5 +1,4 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
2
-
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
3
2
 
4
3
 
5
4
  class StatsampleHistogramTestCase < MiniTest::Unit::TestCase
data/test/test_logit.rb CHANGED
@@ -1,21 +1,65 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
2
-
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
+ $:.unshift("/home/cdx/dev/rserve-client/lib")
3
3
  class StatsampleLogitTestCase < MiniTest::Unit::TestCase
4
- def test_logit_1
5
- crime=File.dirname(__FILE__)+'/../data/test_binomial.csv'
6
- ds=Statsample::CSV.read(crime)
7
- lr=Statsample::Regression::Binomial::Logit.new(ds,'y')
8
- assert_in_delta(-38.8669,lr.log_likehood,0.001)
9
- assert_in_delta(-5.3658,lr.constant,0.001)
4
+ context Statsample::Regression::Binomial::Logit do
5
+ should "return correct values for example" do
6
+ crime=File.dirname(__FILE__)+'/fixtures/test_binomial.csv'
7
+ ds=Statsample::CSV.read(crime)
8
+ lr=Statsample::Regression::Binomial::Logit.new(ds,'y')
9
+ assert_in_delta(-38.8669,lr.log_likehood,0.001)
10
+ assert_in_delta(-5.3658,lr.constant,0.001)
11
+
12
+ exp_coeffs={"a"=>0.3270,"b"=>0.8147, "c"=>-0.4031}
13
+ exp_coeffs.each{|k,v|
14
+ assert_in_delta(v,lr.coeffs[k],0.001)
15
+ }
16
+ exp_errors={'a'=>0.4390,'b'=>0.4270,'c'=>0.3819}
17
+ exp_errors.each{|k,v|
18
+ assert_in_delta(v,lr.coeffs_se[k],0.001)
19
+ }
20
+ assert_equal(7,lr.iterations)
21
+ end
22
+ end
23
+ begin
24
+ require 'rserve'
25
+ require 'statsample/rserve_extension'
26
+ should "return same similat values to as R gml" do
27
+
28
+ r=Rserve::Connection.new
29
+ ran=Distribution::Normal.rng
30
+ samples=100
31
+ a,b,c=ran.call,ran.call,ran.call
32
+ logit=lambda {|x| Math.exp(x) / (1+Math.exp(x))}
33
+
34
+ x1=Statsample::Vector.new_scale(samples) {ran.call}
35
+ x2=Statsample::Vector.new_scale(samples) {ran.call}
36
+ x3=Statsample::Vector.new_scale(samples) {ran.call}
37
+
38
+ y= Statsample::Vector.new_scale(samples) {|i| logit.call(x1[i]*a+x2[i]*b+x3[i]*c+ran.call)}
39
+ # Generate R object
40
+ ds={'x1'=>x1,'x2'=>x2,'x3'=>x3,'y'=>y}.to_dataset
41
+ r.assign('ds',ds)
42
+ r.eval("mylogit<- glm(ds$y~ds$x1+ds$x2+ds$x3, family=binomial(link='logit'), na.action=na.pass)")
43
+
44
+ r_logit=r.eval('summary(mylogit)')
45
+ r_coeffs=r_logit.as_list['coefficients'].to_ruby
46
+ ruby_logit=Statsample::Regression::Binomial::Logit.new(ds,'y')
10
47
 
11
- exp_coeffs={"a"=>0.3270,"b"=>0.8147, "c"=>-0.4031}
12
- exp_coeffs.each{|k,v|
13
- assert_in_delta(v,lr.coeffs[k],0.001)
14
- }
15
- exp_errors={'a'=>0.4390,'b'=>0.4270,'c'=>0.3819}
16
- exp_errors.each{|k,v|
17
- assert_in_delta(v,lr.coeffs_se[k],0.001)
18
- }
19
- assert_equal(7,lr.iterations)
20
- end
48
+ assert_in_delta(r_coeffs[0,0], ruby_logit.constant,1e-4)
49
+ assert_in_delta(r_coeffs[0,1], ruby_logit.constant_se,5e-3)
50
+
51
+ %w{x1 x2 x3}.each_with_index do |f,i|
52
+ assert_in_delta(r_coeffs[i+1,0], ruby_logit.coeffs[f],1e-4)
53
+ assert_in_delta(r_coeffs[i+1,1], ruby_logit.coeffs_se[f],5e-3)
54
+
55
+ end
56
+
57
+ r.close
58
+
59
+ end
60
+
61
+ rescue LoadError
62
+ puts "Require rserve extension"
63
+
64
+ end
21
65
  end
data/test/test_matrix.rb CHANGED
@@ -1,5 +1,4 @@
1
- require(File.dirname(__FILE__)+'/helpers_tests.rb')
2
-
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
3
2
 
4
3
  class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
5
4
 
@@ -24,17 +23,17 @@ class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
24
23
  a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
25
24
  a.extend Statsample::CovariateMatrix
26
25
  a.fields=%w{a b c}
27
- assert_equal(:correlation, a.type)
26
+ assert_equal(:correlation, a._type)
28
27
 
29
28
  assert_equal(Matrix[[0.5],[0.3]], a.submatrix(%w{c a}, %w{b}))
30
29
  assert_equal(Matrix[[1.0, 0.2] , [0.2, 1.0]], a.submatrix(%w{c a}))
31
- assert_equal(:correlation, a.submatrix(%w{c a}).type)
30
+ assert_equal(:correlation, a.submatrix(%w{c a})._type)
32
31
 
33
32
  a=Matrix[[20,30,10], [30,60,50], [10,50,50]]
34
33
 
35
34
  a.extend Statsample::CovariateMatrix
36
35
 
37
- assert_equal(:covariance, a.type)
36
+ assert_equal(:covariance, a._type)
38
37
 
39
38
  a=50.times.collect {rand()}.to_scale
40
39
  b=50.times.collect {rand()}.to_scale