statsample 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +15 -9
  3. data/README.txt +6 -0
  4. data/Rakefile +8 -0
  5. data/{demo → examples}/correlation_matrix.rb +0 -0
  6. data/{demo/dominanceanalysis.rb → examples/dominance_analysis.rb} +0 -0
  7. data/{demo → examples}/dominance_analysis_bootstrap.rb +0 -0
  8. data/{demo → examples}/levene.rb +0 -0
  9. data/{demo → examples}/multiple_regression.rb +5 -3
  10. data/{demo → examples}/multivariate_correlation.rb +0 -0
  11. data/{demo → examples}/polychoric.rb +0 -0
  12. data/{demo → examples}/principal_axis.rb +0 -0
  13. data/examples/t_test.rb +11 -0
  14. data/{demo → examples}/tetrachoric.rb +0 -0
  15. data/lib/statistics2.rb +1 -1
  16. data/lib/statsample.rb +57 -6
  17. data/lib/statsample/bivariate/polychoric.rb +12 -25
  18. data/lib/statsample/bivariate/tetrachoric.rb +1 -3
  19. data/lib/statsample/converter/csv.rb +11 -12
  20. data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -3
  21. data/lib/statsample/factor/principalaxis.rb +0 -2
  22. data/lib/statsample/factor/rotation.rb +6 -8
  23. data/lib/statsample/graph.rb +8 -0
  24. data/lib/statsample/graph/svggraph.rb +0 -4
  25. data/lib/statsample/regression/multiple/baseengine.rb +25 -28
  26. data/lib/statsample/regression/multiple/matrixengine.rb +30 -34
  27. data/lib/statsample/test.rb +36 -1
  28. data/lib/statsample/test/levene.rb +11 -7
  29. data/lib/statsample/test/t.rb +189 -0
  30. data/test/test_anova.rb +8 -10
  31. data/test/test_bivariate.rb +40 -37
  32. data/test/test_codification.rb +9 -13
  33. data/test/test_combination.rb +37 -39
  34. data/test/test_crosstab.rb +46 -48
  35. data/test/test_csv.rb +40 -45
  36. data/test/test_dataset.rb +150 -152
  37. data/test/test_distribution.rb +24 -21
  38. data/test/test_dominance_analysis.rb +10 -12
  39. data/test/test_factor.rb +95 -91
  40. data/test/test_ggobi.rb +30 -33
  41. data/test/test_gsl.rb +4 -4
  42. data/test/test_helpers.rb +26 -0
  43. data/test/test_histogram.rb +5 -6
  44. data/test/test_logit.rb +20 -21
  45. data/test/test_matrix.rb +47 -48
  46. data/test/test_mle.rb +130 -131
  47. data/test/test_multiset.rb +95 -96
  48. data/test/test_permutation.rb +35 -36
  49. data/test/test_promise_after.rb +39 -0
  50. data/test/test_regression.rb +49 -51
  51. data/test/test_reliability.rb +29 -30
  52. data/test/test_resample.rb +22 -23
  53. data/test/test_srs.rb +8 -9
  54. data/test/test_statistics.rb +12 -6
  55. data/test/test_stest.rb +18 -10
  56. data/test/test_stratified.rb +15 -16
  57. data/test/test_svg_graph.rb +11 -22
  58. data/test/test_test_t.rb +40 -0
  59. data/test/test_umannwhitney.rb +14 -15
  60. data/test/test_vector.rb +33 -37
  61. data/test/test_xls.rb +34 -41
  62. metadata +22 -11
data/test/test_dataset.rb CHANGED
@@ -1,11 +1,9 @@
1
- $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
- require 'statsample'
3
- require 'test/unit'
4
- require 'tempfile'
5
- class StatsampleDatasetTestCase < Test::Unit::TestCase
1
+ require(File.dirname(__FILE__)+'/test_helpers.rb')
2
+
3
+ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
6
4
  def setup
7
5
  @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
8
- 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
6
+ 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
9
7
  'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
10
8
  end
11
9
  def test_basic
@@ -14,40 +12,40 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
14
12
  end
15
13
  def test_saveload
16
14
  outfile=Tempfile.new("dataset.ds")
17
- @ds.save(outfile.path)
18
- a=Statsample.load(outfile.path)
19
- assert_equal(@ds,a)
15
+ @ds.save(outfile.path)
16
+ a=Statsample.load(outfile.path)
17
+ assert_equal(@ds,a)
20
18
  end
21
-
19
+
22
20
  def test_matrix
23
21
  matrix=Matrix[[1,2],[3,4],[5,6]]
24
22
  ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
25
23
  assert_equal(matrix,ds.to_matrix)
26
24
  end
27
-
25
+
28
26
  def test_fields
29
27
  @ds.fields=%w{name a1 id age city}
30
28
  assert_equal(%w{name a1 id age city}, @ds.fields)
31
29
  @ds.fields=%w{id name age}
32
- assert_equal(%w{id name age a1 city}, @ds.fields)
30
+ assert_equal(%w{id name age a1 city}, @ds.fields)
33
31
  end
34
32
  def test_merge
35
- a=[1,2,3].to_scale
36
- b=[3,4,5].to_vector
37
- c=[4,5,6].to_scale
38
- d=[7,8,9].to_vector
39
- e=[10,20,30].to_vector
40
- ds1={'a'=>a,'b'=>b}.to_dataset
41
- ds2={'c'=>c,'d'=>d}.to_dataset
42
- exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
43
-
44
- assert_equal(exp,ds1.merge(ds2))
45
- exp.fields=%w{c d a b}
46
- assert_equal(exp,ds2.merge(ds1))
47
- ds3={'a'=>e}.to_dataset
48
- exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
49
- exp.fields=%w{a_1 b a_2}
50
- assert_equal(exp,ds1.merge(ds3))
33
+ a=[1,2,3].to_scale
34
+ b=[3,4,5].to_vector
35
+ c=[4,5,6].to_scale
36
+ d=[7,8,9].to_vector
37
+ e=[10,20,30].to_vector
38
+ ds1={'a'=>a,'b'=>b}.to_dataset
39
+ ds2={'c'=>c,'d'=>d}.to_dataset
40
+ exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
41
+
42
+ assert_equal(exp,ds1.merge(ds2))
43
+ exp.fields=%w{c d a b}
44
+ assert_equal(exp,ds2.merge(ds1))
45
+ ds3={'a'=>e}.to_dataset
46
+ exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
47
+ exp.fields=%w{a_1 b a_2}
48
+ assert_equal(exp,ds1.merge(ds3))
51
49
  end
52
50
  def test_each_vector
53
51
  a=[1,2,3].to_vector
@@ -56,13 +54,13 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
56
54
  ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
57
55
  res=[]
58
56
  ds.each_vector{|k,v|
59
- res.push([k,v])
57
+ res.push([k,v])
60
58
  }
61
59
  assert_equal([["a",a],["b",b]],res)
62
60
  ds.fields=["b","a"]
63
61
  res=[]
64
62
  ds.each_vector{|k,v|
65
- res.push([k,v])
63
+ res.push([k,v])
66
64
  }
67
65
  assert_equal([["b",b],["a",a]],res)
68
66
  end
@@ -82,84 +80,84 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
82
80
  @ds.add_vector('new',v)
83
81
  assert_equal(%w{id name age city a1 new},@ds.fields)
84
82
  x=Statsample::Vector.new(%w{a b c d e f g})
85
- assert_raise ArgumentError do
86
- @ds.add_vector('new2',x)
83
+ assert_raise ArgumentError do
84
+ @ds.add_vector('new2',x)
87
85
  end
88
86
  end
89
87
  def test_vector_by_calculation
90
- a1=[1,2,3,4,5,6,7].to_vector(:scale)
91
- a2=[10,20,30,40,50,60,70].to_vector(:scale)
92
- a3=[100,200,300,400,500,600,700].to_vector(:scale)
93
- ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
94
- total=ds.vector_by_calculation() {|row|
95
- row['a1']+row['a2']+row['a3']
96
- }
97
- expected=[111,222,333,444,555,666,777].to_vector(:scale)
98
- assert_equal(expected,total)
88
+ a1=[1,2,3,4,5,6,7].to_vector(:scale)
89
+ a2=[10,20,30,40,50,60,70].to_vector(:scale)
90
+ a3=[100,200,300,400,500,600,700].to_vector(:scale)
91
+ ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
92
+ total=ds.vector_by_calculation() {|row|
93
+ row['a1']+row['a2']+row['a3']
94
+ }
95
+ expected=[111,222,333,444,555,666,777].to_vector(:scale)
96
+ assert_equal(expected,total)
99
97
  end
100
98
  def test_vector_sum
101
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
102
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
103
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
104
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
105
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
106
- total=ds.vector_sum
107
- a=ds.vector_sum(['a1','a2'])
108
- b=ds.vector_sum(['b1','b2'])
109
- expected_a=[11,12,23,24,25,nil].to_vector(:scale)
110
- expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
111
- expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
112
- assert_equal(expected_a, a)
113
- assert_equal(expected_b, b)
114
- assert_equal(expected_total, total)
99
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
100
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
101
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
102
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
103
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
104
+ total=ds.vector_sum
105
+ a=ds.vector_sum(['a1','a2'])
106
+ b=ds.vector_sum(['b1','b2'])
107
+ expected_a=[11,12,23,24,25,nil].to_vector(:scale)
108
+ expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
109
+ expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
110
+ assert_equal(expected_a, a)
111
+ assert_equal(expected_b, b)
112
+ assert_equal(expected_total, total)
115
113
  end
116
114
  def test_vector_missing_values
117
115
  a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
118
- a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
119
- b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
120
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
116
+ a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
117
+ b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
118
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
121
119
  c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
122
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
120
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
123
121
  mva=[2,3,0,1,0,1].to_vector(:scale)
124
122
  assert_equal(mva,ds.vector_missing_values)
125
123
  end
126
124
  def test_vector_count_characters
127
125
  a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
128
- a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
129
- b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
130
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
126
+ a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
127
+ b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
128
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
131
129
  c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
132
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
130
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
133
131
  exp=[4,17,27,5,6,5].to_vector(:scale)
134
132
  assert_equal(exp,ds.vector_count_characters)
135
-
133
+
136
134
  end
137
135
  def test_vector_mean
138
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
139
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
140
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
141
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
136
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
137
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
138
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
139
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
142
140
  c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
143
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
144
- total=ds.vector_mean
145
- a=ds.vector_mean(['a1','a2'],1)
146
- b=ds.vector_mean(['b1','b2'],1)
141
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
142
+ total=ds.vector_mean
143
+ a=ds.vector_mean(['a1','a2'],1)
144
+ b=ds.vector_mean(['b1','b2'],1)
147
145
  c=ds.vector_mean(['b1','b2','c'],1)
148
- expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
149
- expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
150
- expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
151
- expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
152
- assert_equal(expected_a, a)
153
- assert_equal(expected_b, b)
154
- assert_equal(expected_c, c)
155
- assert_equal(expected_total, total)
156
- end
157
-
146
+ expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
147
+ expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
148
+ expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
149
+ expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
150
+ assert_equal(expected_a, a)
151
+ assert_equal(expected_b, b)
152
+ assert_equal(expected_c, c)
153
+ assert_equal(expected_total, total)
154
+ end
155
+
158
156
  def test_each_array
159
157
  expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
160
158
  out=[]
161
159
  @ds.each_array{ |a|
162
- out.push(a)
160
+ out.push(a)
163
161
  }
164
162
  assert_equal(expected,out)
165
163
  end
@@ -175,9 +173,9 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
175
173
  # Native methods
176
174
  assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds._case_as_hash(0))
177
175
  assert_equal([5,'George',5,'Tome','a,b,c'],@ds._case_as_array(4))
178
-
179
-
180
-
176
+
177
+
178
+
181
179
  end
182
180
  def test_delete_vector
183
181
  @ds.delete_vector('name')
@@ -204,16 +202,16 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
204
202
  assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
205
203
  end
206
204
  def test_percentiles
207
- v1=(1..100).to_a.to_scale
208
- assert_equal(50.5,v1.median)
209
- assert_equal(25.5, v1.percentil(25))
210
- v2=(1..99).to_a.to_scale
211
- assert_equal(50,v2.median)
212
- assert_equal(25,v2.percentil(25))
213
- v3=(1..50).to_a.to_scale
214
- assert_equal(25.5, v3.median)
215
- assert_equal(13, v3.percentil(25))
216
-
205
+ v1=(1..100).to_a.to_scale
206
+ assert_equal(50.5,v1.median)
207
+ assert_equal(25.5, v1.percentil(25))
208
+ v2=(1..99).to_a.to_scale
209
+ assert_equal(50,v2.median)
210
+ assert_equal(25,v2.percentil(25))
211
+ v3=(1..50).to_a.to_scale
212
+ assert_equal(25.5, v3.median)
213
+ assert_equal(13, v3.percentil(25))
214
+
217
215
  end
218
216
  def test_add_case
219
217
  ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
@@ -227,7 +225,7 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
227
225
  ds.add_case_array([6,7,1])
228
226
  ds.update_valid_data
229
227
  assert_equal([6,7,1],ds.case_as_array(4))
230
-
228
+
231
229
  end
232
230
  def test_marshaling
233
231
  ds_marshal=Marshal.load(Marshal.dump(@ds))
@@ -243,8 +241,8 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
243
241
  assert_equal(%w{v2 v1},ds2.fields)
244
242
  assert_same(ds1['v1'],ds2['v1'])
245
243
  assert_same(ds1['v2'],ds2['v2'])
246
-
247
-
244
+
245
+
248
246
  end
249
247
  def test_dup
250
248
  v1=[1,2,3,4].to_vector
@@ -267,8 +265,8 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
267
265
  assert_not_same(ds3['v1'],ds_exp['v1'])
268
266
  assert_equal(ds3.fields,ds_exp.fields)
269
267
  assert_not_same(ds3.fields,ds_exp.fields)
270
-
271
-
268
+
269
+
272
270
  # empty
273
271
  ds3=ds1.dup_empty
274
272
  assert_not_equal(ds1,ds3)
@@ -282,22 +280,22 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
282
280
  def test_from_to
283
281
  assert_equal(%w{name age city}, @ds.from_to("name","city"))
284
282
  assert_raise ArgumentError do
285
- @ds.from_to("name","a2")
283
+ @ds.from_to("name","a2")
286
284
  end
287
285
  end
288
- def test_each_array_with_nils
289
- v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
290
- v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
291
- v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
292
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
293
- ds2=ds1.dup_empty
294
- ds1.each_array_with_nils {|row|
295
- ds2.add_case_array(row)
296
- }
297
- ds2.update_valid_data
298
- assert_equal([1,nil,3,4,nil],ds2['v1'].data)
299
- assert_equal([5,6,nil,8,20],ds2['v2'].data)
300
- end
286
+ def test_each_array_with_nils
287
+ v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
288
+ v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
289
+ v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
290
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
291
+ ds2=ds1.dup_empty
292
+ ds1.each_array_with_nils {|row|
293
+ ds2.add_case_array(row)
294
+ }
295
+ ds2.update_valid_data
296
+ assert_equal([1,nil,3,4,nil],ds2['v1'].data)
297
+ assert_equal([5,6,nil,8,20],ds2['v2'].data)
298
+ end
301
299
  def test_dup_only_valid
302
300
  v1=[1,nil,3,4].to_vector(:scale)
303
301
  v2=[5,6,nil,8].to_vector(:scale)
@@ -306,22 +304,22 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
306
304
  ds2=ds1.dup_only_valid
307
305
  expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
308
306
  assert_equal(expected,ds2)
309
- assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
307
+ assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
310
308
  end
311
309
  def test_filter
312
310
  @ds['age'].type=:scale
313
311
  filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
314
312
  expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
315
- 'city'=>Statsample::Vector.new(['London','Paris']),
313
+ 'city'=>Statsample::Vector.new(['London','Paris']),
316
314
  'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
317
315
  assert_equal(expected,filtered)
318
- end
316
+ end
319
317
  def test_filter_field
320
- @ds['age'].type=:scale
318
+ @ds['age'].type=:scale
321
319
  filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
322
320
  expected=[2,4].to_vector
323
321
  assert_equal(expected,filtered)
324
-
322
+
325
323
  end
326
324
  def test_verify
327
325
  name=%w{r1 r2 r3 r4}.to_vector(:nominal)
@@ -351,42 +349,42 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
351
349
  vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
352
350
  vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
353
351
  ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
354
- assert_equal(vscale,ds.compute("v1/2"))
352
+ assert_equal(vscale,ds.compute("v1/2"))
355
353
  assert_equal(vsum,ds.compute("v1+v2+v3"))
356
354
  assert_equal(vmult,ds.compute("v1*v2"))
357
-
355
+
358
356
  end
359
357
  def test_crosstab_with_asignation
360
- v1=%w{a a a b b b c c c}.to_vector
361
- v2=%w{a b c a b c a b c}.to_vector
362
- v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
363
- ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
364
- assert_equal(:nominal, ds['_id'].type)
365
- assert_equal(:scale, ds['a'].type)
366
- assert_equal(:scale, ds['b'].type)
367
- ev_id=%w{a b c}.to_vector
368
- ev_a =%w{0 0 0}.to_scale
369
- ev_b =%w{1 1 0}.to_scale
370
- ev_c =%w{0 1 1}.to_scale
371
- ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
372
- assert_equal(ds, ds2)
358
+ v1=%w{a a a b b b c c c}.to_vector
359
+ v2=%w{a b c a b c a b c}.to_vector
360
+ v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
361
+ ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
362
+ assert_equal(:nominal, ds['_id'].type)
363
+ assert_equal(:scale, ds['a'].type)
364
+ assert_equal(:scale, ds['b'].type)
365
+ ev_id=%w{a b c}.to_vector
366
+ ev_a =%w{0 0 0}.to_scale
367
+ ev_b =%w{1 1 0}.to_scale
368
+ ev_c =%w{0 1 1}.to_scale
369
+ ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
370
+ assert_equal(ds, ds2)
373
371
  end
374
372
  def test_one_to_many
375
- cases=[
376
- ['1','george','red',10,'blue',20,nil,nil],
377
- ['2','fred','green',15,'orange',30,'white',20],
378
- ['3','alfred',nil,nil,nil,nil,nil,nil]
379
- ]
380
- ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
381
- cases.each {|c| ds.add_case_array c }
382
- ds.update_valid_data
383
- ids=%w{1 1 2 2 2}.to_vector
384
- colors=%w{red blue green orange white}.to_vector
385
- values=[10,20,15,30,20].to_vector
386
- col_ids=[1,2,1,2,3].to_scale
387
- ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
388
- assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
389
-
390
- end
391
-
373
+ cases=[
374
+ ['1','george','red',10,'blue',20,nil,nil],
375
+ ['2','fred','green',15,'orange',30,'white',20],
376
+ ['3','alfred',nil,nil,nil,nil,nil,nil]
377
+ ]
378
+ ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
379
+ cases.each {|c| ds.add_case_array c }
380
+ ds.update_valid_data
381
+ ids=%w{1 1 2 2 2}.to_vector
382
+ colors=%w{red blue green orange white}.to_vector
383
+ values=[10,20,15,30,20].to_vector
384
+ col_ids=[1,2,1,2,3].to_scale
385
+ ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
386
+ assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
387
+
388
+ end
389
+
392
390
  end
@@ -1,13 +1,16 @@
1
- $:.unshift(File.dirname(__FILE__)+'/../lib/')
1
+ require(File.dirname(__FILE__)+'/test_helpers.rb')
2
+
2
3
  require 'distribution'
3
- require 'test/unit'
4
+
4
5
  begin
5
- require 'rbgsl'
6
- NOT_GSL=false
6
+ require 'rbgsl'
7
+ NOT_GSL=false
7
8
  rescue LoadError
8
- NOT_GSL=true
9
+ NOT_GSL=true
9
10
  end
10
- class DistributionTestCase < Test::Unit::TestCase
11
+
12
+
13
+ class DistributionTestCase < MiniTest::Unit::TestCase
11
14
  def test_chi
12
15
  if !NOT_GSL
13
16
  [2,3,4,5].each{|k|
@@ -25,7 +28,7 @@ class DistributionTestCase < Test::Unit::TestCase
25
28
  area=Distribution::T.cdf(t,n)
26
29
  assert_in_delta(area, GSL::Cdf.tdist_P(t,n),0.0001)
27
30
  assert_in_delta(Distribution::T.p_value(area,n), GSL::Cdf.tdist_Pinv(area,n),0.0001)
28
-
31
+
29
32
  }
30
33
  }
31
34
  end
@@ -44,34 +47,34 @@ class DistributionTestCase < Test::Unit::TestCase
44
47
  if !NOT_GSL
45
48
  [0.2,0.4,0.6,0.8,0.9, 0.99,0.999,0.999999].each {|rho|
46
49
  assert_equal(GSL::Ran::bivariate_gaussian_pdf(0, 0, 1,1,rho), Distribution::NormalBivariate.pdf(0,0, rho , 1,1))
47
-
50
+
48
51
  }
49
52
  end
50
-
53
+
51
54
  [-3,-2,-1,0,1,1.5].each {|x|
52
55
  assert_in_delta(Distribution::NormalBivariate.cdf_hull(x,x,0.5), Distribution::NormalBivariate.cdf_genz(x,x,0.5), 0.001)
53
56
  assert_in_delta(Distribution::NormalBivariate.cdf_genz(x,x,0.5), Distribution::NormalBivariate.cdf_jantaravareerat(x,x,0.5), 0.001)
54
57
  }
55
-
58
+
56
59
  assert_in_delta(0.686, Distribution::NormalBivariate.cdf(2,0.5,0.5), 0.001)
57
60
  assert_in_delta(0.498, Distribution::NormalBivariate.cdf(2,0.0,0.5), 0.001)
58
61
  assert_in_delta(0.671, Distribution::NormalBivariate.cdf(1.5,0.5,0.5), 0.001)
59
-
62
+
60
63
  assert_in_delta(Distribution::Normal.cdf(0), Distribution::NormalBivariate.cdf(10,0,0.9), 0.001)
61
64
  end
62
65
  def test_f
63
66
  if !NOT_GSL
64
- [0.1,0.5,1,2,10,20,30].each{|f|
65
- [2,5,10].each{|n2|
66
- [2,5,10].each{|n1|
67
- area=Distribution::F.cdf(f,n1,n2)
68
- assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
69
- assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
70
-
71
- }
72
- }
67
+ [0.1,0.5,1,2,10,20,30].each{|f|
68
+ [2,5,10].each{|n2|
69
+ [2,5,10].each{|n1|
70
+ area=Distribution::F.cdf(f,n1,n2)
71
+ assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
72
+ assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
73
+
74
+ }
73
75
  }
76
+ }
74
77
  end
75
78
  end
76
79
 
77
- end
80
+ end