statsample 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +15 -9
  3. data/README.txt +6 -0
  4. data/Rakefile +8 -0
  5. data/{demo → examples}/correlation_matrix.rb +0 -0
  6. data/{demo/dominanceanalysis.rb → examples/dominance_analysis.rb} +0 -0
  7. data/{demo → examples}/dominance_analysis_bootstrap.rb +0 -0
  8. data/{demo → examples}/levene.rb +0 -0
  9. data/{demo → examples}/multiple_regression.rb +5 -3
  10. data/{demo → examples}/multivariate_correlation.rb +0 -0
  11. data/{demo → examples}/polychoric.rb +0 -0
  12. data/{demo → examples}/principal_axis.rb +0 -0
  13. data/examples/t_test.rb +11 -0
  14. data/{demo → examples}/tetrachoric.rb +0 -0
  15. data/lib/statistics2.rb +1 -1
  16. data/lib/statsample.rb +57 -6
  17. data/lib/statsample/bivariate/polychoric.rb +12 -25
  18. data/lib/statsample/bivariate/tetrachoric.rb +1 -3
  19. data/lib/statsample/converter/csv.rb +11 -12
  20. data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -3
  21. data/lib/statsample/factor/principalaxis.rb +0 -2
  22. data/lib/statsample/factor/rotation.rb +6 -8
  23. data/lib/statsample/graph.rb +8 -0
  24. data/lib/statsample/graph/svggraph.rb +0 -4
  25. data/lib/statsample/regression/multiple/baseengine.rb +25 -28
  26. data/lib/statsample/regression/multiple/matrixengine.rb +30 -34
  27. data/lib/statsample/test.rb +36 -1
  28. data/lib/statsample/test/levene.rb +11 -7
  29. data/lib/statsample/test/t.rb +189 -0
  30. data/test/test_anova.rb +8 -10
  31. data/test/test_bivariate.rb +40 -37
  32. data/test/test_codification.rb +9 -13
  33. data/test/test_combination.rb +37 -39
  34. data/test/test_crosstab.rb +46 -48
  35. data/test/test_csv.rb +40 -45
  36. data/test/test_dataset.rb +150 -152
  37. data/test/test_distribution.rb +24 -21
  38. data/test/test_dominance_analysis.rb +10 -12
  39. data/test/test_factor.rb +95 -91
  40. data/test/test_ggobi.rb +30 -33
  41. data/test/test_gsl.rb +4 -4
  42. data/test/test_helpers.rb +26 -0
  43. data/test/test_histogram.rb +5 -6
  44. data/test/test_logit.rb +20 -21
  45. data/test/test_matrix.rb +47 -48
  46. data/test/test_mle.rb +130 -131
  47. data/test/test_multiset.rb +95 -96
  48. data/test/test_permutation.rb +35 -36
  49. data/test/test_promise_after.rb +39 -0
  50. data/test/test_regression.rb +49 -51
  51. data/test/test_reliability.rb +29 -30
  52. data/test/test_resample.rb +22 -23
  53. data/test/test_srs.rb +8 -9
  54. data/test/test_statistics.rb +12 -6
  55. data/test/test_stest.rb +18 -10
  56. data/test/test_stratified.rb +15 -16
  57. data/test/test_svg_graph.rb +11 -22
  58. data/test/test_test_t.rb +40 -0
  59. data/test/test_umannwhitney.rb +14 -15
  60. data/test/test_vector.rb +33 -37
  61. data/test/test_xls.rb +34 -41
  62. metadata +22 -11
data/test/test_dataset.rb CHANGED
@@ -1,11 +1,9 @@
1
- $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
- require 'statsample'
3
- require 'test/unit'
4
- require 'tempfile'
5
- class StatsampleDatasetTestCase < Test::Unit::TestCase
1
+ require(File.dirname(__FILE__)+'/test_helpers.rb')
2
+
3
+ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
6
4
  def setup
7
5
  @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
8
- 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
6
+ 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
9
7
  'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
10
8
  end
11
9
  def test_basic
@@ -14,40 +12,40 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
14
12
  end
15
13
  def test_saveload
16
14
  outfile=Tempfile.new("dataset.ds")
17
- @ds.save(outfile.path)
18
- a=Statsample.load(outfile.path)
19
- assert_equal(@ds,a)
15
+ @ds.save(outfile.path)
16
+ a=Statsample.load(outfile.path)
17
+ assert_equal(@ds,a)
20
18
  end
21
-
19
+
22
20
  def test_matrix
23
21
  matrix=Matrix[[1,2],[3,4],[5,6]]
24
22
  ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
25
23
  assert_equal(matrix,ds.to_matrix)
26
24
  end
27
-
25
+
28
26
  def test_fields
29
27
  @ds.fields=%w{name a1 id age city}
30
28
  assert_equal(%w{name a1 id age city}, @ds.fields)
31
29
  @ds.fields=%w{id name age}
32
- assert_equal(%w{id name age a1 city}, @ds.fields)
30
+ assert_equal(%w{id name age a1 city}, @ds.fields)
33
31
  end
34
32
  def test_merge
35
- a=[1,2,3].to_scale
36
- b=[3,4,5].to_vector
37
- c=[4,5,6].to_scale
38
- d=[7,8,9].to_vector
39
- e=[10,20,30].to_vector
40
- ds1={'a'=>a,'b'=>b}.to_dataset
41
- ds2={'c'=>c,'d'=>d}.to_dataset
42
- exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
43
-
44
- assert_equal(exp,ds1.merge(ds2))
45
- exp.fields=%w{c d a b}
46
- assert_equal(exp,ds2.merge(ds1))
47
- ds3={'a'=>e}.to_dataset
48
- exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
49
- exp.fields=%w{a_1 b a_2}
50
- assert_equal(exp,ds1.merge(ds3))
33
+ a=[1,2,3].to_scale
34
+ b=[3,4,5].to_vector
35
+ c=[4,5,6].to_scale
36
+ d=[7,8,9].to_vector
37
+ e=[10,20,30].to_vector
38
+ ds1={'a'=>a,'b'=>b}.to_dataset
39
+ ds2={'c'=>c,'d'=>d}.to_dataset
40
+ exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
41
+
42
+ assert_equal(exp,ds1.merge(ds2))
43
+ exp.fields=%w{c d a b}
44
+ assert_equal(exp,ds2.merge(ds1))
45
+ ds3={'a'=>e}.to_dataset
46
+ exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
47
+ exp.fields=%w{a_1 b a_2}
48
+ assert_equal(exp,ds1.merge(ds3))
51
49
  end
52
50
  def test_each_vector
53
51
  a=[1,2,3].to_vector
@@ -56,13 +54,13 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
56
54
  ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
57
55
  res=[]
58
56
  ds.each_vector{|k,v|
59
- res.push([k,v])
57
+ res.push([k,v])
60
58
  }
61
59
  assert_equal([["a",a],["b",b]],res)
62
60
  ds.fields=["b","a"]
63
61
  res=[]
64
62
  ds.each_vector{|k,v|
65
- res.push([k,v])
63
+ res.push([k,v])
66
64
  }
67
65
  assert_equal([["b",b],["a",a]],res)
68
66
  end
@@ -82,84 +80,84 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
82
80
  @ds.add_vector('new',v)
83
81
  assert_equal(%w{id name age city a1 new},@ds.fields)
84
82
  x=Statsample::Vector.new(%w{a b c d e f g})
85
- assert_raise ArgumentError do
86
- @ds.add_vector('new2',x)
83
+ assert_raise ArgumentError do
84
+ @ds.add_vector('new2',x)
87
85
  end
88
86
  end
89
87
  def test_vector_by_calculation
90
- a1=[1,2,3,4,5,6,7].to_vector(:scale)
91
- a2=[10,20,30,40,50,60,70].to_vector(:scale)
92
- a3=[100,200,300,400,500,600,700].to_vector(:scale)
93
- ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
94
- total=ds.vector_by_calculation() {|row|
95
- row['a1']+row['a2']+row['a3']
96
- }
97
- expected=[111,222,333,444,555,666,777].to_vector(:scale)
98
- assert_equal(expected,total)
88
+ a1=[1,2,3,4,5,6,7].to_vector(:scale)
89
+ a2=[10,20,30,40,50,60,70].to_vector(:scale)
90
+ a3=[100,200,300,400,500,600,700].to_vector(:scale)
91
+ ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
92
+ total=ds.vector_by_calculation() {|row|
93
+ row['a1']+row['a2']+row['a3']
94
+ }
95
+ expected=[111,222,333,444,555,666,777].to_vector(:scale)
96
+ assert_equal(expected,total)
99
97
  end
100
98
  def test_vector_sum
101
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
102
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
103
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
104
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
105
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
106
- total=ds.vector_sum
107
- a=ds.vector_sum(['a1','a2'])
108
- b=ds.vector_sum(['b1','b2'])
109
- expected_a=[11,12,23,24,25,nil].to_vector(:scale)
110
- expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
111
- expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
112
- assert_equal(expected_a, a)
113
- assert_equal(expected_b, b)
114
- assert_equal(expected_total, total)
99
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
100
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
101
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
102
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
103
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
104
+ total=ds.vector_sum
105
+ a=ds.vector_sum(['a1','a2'])
106
+ b=ds.vector_sum(['b1','b2'])
107
+ expected_a=[11,12,23,24,25,nil].to_vector(:scale)
108
+ expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
109
+ expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
110
+ assert_equal(expected_a, a)
111
+ assert_equal(expected_b, b)
112
+ assert_equal(expected_total, total)
115
113
  end
116
114
  def test_vector_missing_values
117
115
  a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
118
- a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
119
- b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
120
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
116
+ a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
117
+ b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
118
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
121
119
  c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
122
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
120
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
123
121
  mva=[2,3,0,1,0,1].to_vector(:scale)
124
122
  assert_equal(mva,ds.vector_missing_values)
125
123
  end
126
124
  def test_vector_count_characters
127
125
  a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
128
- a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
129
- b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
130
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
126
+ a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
127
+ b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
128
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
131
129
  c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
132
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
130
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
133
131
  exp=[4,17,27,5,6,5].to_vector(:scale)
134
132
  assert_equal(exp,ds.vector_count_characters)
135
-
133
+
136
134
  end
137
135
  def test_vector_mean
138
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
139
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
140
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
141
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
136
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
137
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
138
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
139
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
142
140
  c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
143
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
144
- total=ds.vector_mean
145
- a=ds.vector_mean(['a1','a2'],1)
146
- b=ds.vector_mean(['b1','b2'],1)
141
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
142
+ total=ds.vector_mean
143
+ a=ds.vector_mean(['a1','a2'],1)
144
+ b=ds.vector_mean(['b1','b2'],1)
147
145
  c=ds.vector_mean(['b1','b2','c'],1)
148
- expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
149
- expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
150
- expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
151
- expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
152
- assert_equal(expected_a, a)
153
- assert_equal(expected_b, b)
154
- assert_equal(expected_c, c)
155
- assert_equal(expected_total, total)
156
- end
157
-
146
+ expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
147
+ expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
148
+ expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
149
+ expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
150
+ assert_equal(expected_a, a)
151
+ assert_equal(expected_b, b)
152
+ assert_equal(expected_c, c)
153
+ assert_equal(expected_total, total)
154
+ end
155
+
158
156
  def test_each_array
159
157
  expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
160
158
  out=[]
161
159
  @ds.each_array{ |a|
162
- out.push(a)
160
+ out.push(a)
163
161
  }
164
162
  assert_equal(expected,out)
165
163
  end
@@ -175,9 +173,9 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
175
173
  # Native methods
176
174
  assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds._case_as_hash(0))
177
175
  assert_equal([5,'George',5,'Tome','a,b,c'],@ds._case_as_array(4))
178
-
179
-
180
-
176
+
177
+
178
+
181
179
  end
182
180
  def test_delete_vector
183
181
  @ds.delete_vector('name')
@@ -204,16 +202,16 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
204
202
  assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
205
203
  end
206
204
  def test_percentiles
207
- v1=(1..100).to_a.to_scale
208
- assert_equal(50.5,v1.median)
209
- assert_equal(25.5, v1.percentil(25))
210
- v2=(1..99).to_a.to_scale
211
- assert_equal(50,v2.median)
212
- assert_equal(25,v2.percentil(25))
213
- v3=(1..50).to_a.to_scale
214
- assert_equal(25.5, v3.median)
215
- assert_equal(13, v3.percentil(25))
216
-
205
+ v1=(1..100).to_a.to_scale
206
+ assert_equal(50.5,v1.median)
207
+ assert_equal(25.5, v1.percentil(25))
208
+ v2=(1..99).to_a.to_scale
209
+ assert_equal(50,v2.median)
210
+ assert_equal(25,v2.percentil(25))
211
+ v3=(1..50).to_a.to_scale
212
+ assert_equal(25.5, v3.median)
213
+ assert_equal(13, v3.percentil(25))
214
+
217
215
  end
218
216
  def test_add_case
219
217
  ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
@@ -227,7 +225,7 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
227
225
  ds.add_case_array([6,7,1])
228
226
  ds.update_valid_data
229
227
  assert_equal([6,7,1],ds.case_as_array(4))
230
-
228
+
231
229
  end
232
230
  def test_marshaling
233
231
  ds_marshal=Marshal.load(Marshal.dump(@ds))
@@ -243,8 +241,8 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
243
241
  assert_equal(%w{v2 v1},ds2.fields)
244
242
  assert_same(ds1['v1'],ds2['v1'])
245
243
  assert_same(ds1['v2'],ds2['v2'])
246
-
247
-
244
+
245
+
248
246
  end
249
247
  def test_dup
250
248
  v1=[1,2,3,4].to_vector
@@ -267,8 +265,8 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
267
265
  assert_not_same(ds3['v1'],ds_exp['v1'])
268
266
  assert_equal(ds3.fields,ds_exp.fields)
269
267
  assert_not_same(ds3.fields,ds_exp.fields)
270
-
271
-
268
+
269
+
272
270
  # empty
273
271
  ds3=ds1.dup_empty
274
272
  assert_not_equal(ds1,ds3)
@@ -282,22 +280,22 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
282
280
  def test_from_to
283
281
  assert_equal(%w{name age city}, @ds.from_to("name","city"))
284
282
  assert_raise ArgumentError do
285
- @ds.from_to("name","a2")
283
+ @ds.from_to("name","a2")
286
284
  end
287
285
  end
288
- def test_each_array_with_nils
289
- v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
290
- v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
291
- v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
292
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
293
- ds2=ds1.dup_empty
294
- ds1.each_array_with_nils {|row|
295
- ds2.add_case_array(row)
296
- }
297
- ds2.update_valid_data
298
- assert_equal([1,nil,3,4,nil],ds2['v1'].data)
299
- assert_equal([5,6,nil,8,20],ds2['v2'].data)
300
- end
286
+ def test_each_array_with_nils
287
+ v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
288
+ v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
289
+ v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
290
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
291
+ ds2=ds1.dup_empty
292
+ ds1.each_array_with_nils {|row|
293
+ ds2.add_case_array(row)
294
+ }
295
+ ds2.update_valid_data
296
+ assert_equal([1,nil,3,4,nil],ds2['v1'].data)
297
+ assert_equal([5,6,nil,8,20],ds2['v2'].data)
298
+ end
301
299
  def test_dup_only_valid
302
300
  v1=[1,nil,3,4].to_vector(:scale)
303
301
  v2=[5,6,nil,8].to_vector(:scale)
@@ -306,22 +304,22 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
306
304
  ds2=ds1.dup_only_valid
307
305
  expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
308
306
  assert_equal(expected,ds2)
309
- assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
307
+ assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
310
308
  end
311
309
  def test_filter
312
310
  @ds['age'].type=:scale
313
311
  filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
314
312
  expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
315
- 'city'=>Statsample::Vector.new(['London','Paris']),
313
+ 'city'=>Statsample::Vector.new(['London','Paris']),
316
314
  'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
317
315
  assert_equal(expected,filtered)
318
- end
316
+ end
319
317
  def test_filter_field
320
- @ds['age'].type=:scale
318
+ @ds['age'].type=:scale
321
319
  filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
322
320
  expected=[2,4].to_vector
323
321
  assert_equal(expected,filtered)
324
-
322
+
325
323
  end
326
324
  def test_verify
327
325
  name=%w{r1 r2 r3 r4}.to_vector(:nominal)
@@ -351,42 +349,42 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
351
349
  vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
352
350
  vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
353
351
  ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
354
- assert_equal(vscale,ds.compute("v1/2"))
352
+ assert_equal(vscale,ds.compute("v1/2"))
355
353
  assert_equal(vsum,ds.compute("v1+v2+v3"))
356
354
  assert_equal(vmult,ds.compute("v1*v2"))
357
-
355
+
358
356
  end
359
357
  def test_crosstab_with_asignation
360
- v1=%w{a a a b b b c c c}.to_vector
361
- v2=%w{a b c a b c a b c}.to_vector
362
- v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
363
- ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
364
- assert_equal(:nominal, ds['_id'].type)
365
- assert_equal(:scale, ds['a'].type)
366
- assert_equal(:scale, ds['b'].type)
367
- ev_id=%w{a b c}.to_vector
368
- ev_a =%w{0 0 0}.to_scale
369
- ev_b =%w{1 1 0}.to_scale
370
- ev_c =%w{0 1 1}.to_scale
371
- ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
372
- assert_equal(ds, ds2)
358
+ v1=%w{a a a b b b c c c}.to_vector
359
+ v2=%w{a b c a b c a b c}.to_vector
360
+ v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
361
+ ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
362
+ assert_equal(:nominal, ds['_id'].type)
363
+ assert_equal(:scale, ds['a'].type)
364
+ assert_equal(:scale, ds['b'].type)
365
+ ev_id=%w{a b c}.to_vector
366
+ ev_a =%w{0 0 0}.to_scale
367
+ ev_b =%w{1 1 0}.to_scale
368
+ ev_c =%w{0 1 1}.to_scale
369
+ ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
370
+ assert_equal(ds, ds2)
373
371
  end
374
372
  def test_one_to_many
375
- cases=[
376
- ['1','george','red',10,'blue',20,nil,nil],
377
- ['2','fred','green',15,'orange',30,'white',20],
378
- ['3','alfred',nil,nil,nil,nil,nil,nil]
379
- ]
380
- ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
381
- cases.each {|c| ds.add_case_array c }
382
- ds.update_valid_data
383
- ids=%w{1 1 2 2 2}.to_vector
384
- colors=%w{red blue green orange white}.to_vector
385
- values=[10,20,15,30,20].to_vector
386
- col_ids=[1,2,1,2,3].to_scale
387
- ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
388
- assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
389
-
390
- end
391
-
373
+ cases=[
374
+ ['1','george','red',10,'blue',20,nil,nil],
375
+ ['2','fred','green',15,'orange',30,'white',20],
376
+ ['3','alfred',nil,nil,nil,nil,nil,nil]
377
+ ]
378
+ ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
379
+ cases.each {|c| ds.add_case_array c }
380
+ ds.update_valid_data
381
+ ids=%w{1 1 2 2 2}.to_vector
382
+ colors=%w{red blue green orange white}.to_vector
383
+ values=[10,20,15,30,20].to_vector
384
+ col_ids=[1,2,1,2,3].to_scale
385
+ ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
386
+ assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
387
+
388
+ end
389
+
392
390
  end
@@ -1,13 +1,16 @@
1
- $:.unshift(File.dirname(__FILE__)+'/../lib/')
1
+ require(File.dirname(__FILE__)+'/test_helpers.rb')
2
+
2
3
  require 'distribution'
3
- require 'test/unit'
4
+
4
5
  begin
5
- require 'rbgsl'
6
- NOT_GSL=false
6
+ require 'rbgsl'
7
+ NOT_GSL=false
7
8
  rescue LoadError
8
- NOT_GSL=true
9
+ NOT_GSL=true
9
10
  end
10
- class DistributionTestCase < Test::Unit::TestCase
11
+
12
+
13
+ class DistributionTestCase < MiniTest::Unit::TestCase
11
14
  def test_chi
12
15
  if !NOT_GSL
13
16
  [2,3,4,5].each{|k|
@@ -25,7 +28,7 @@ class DistributionTestCase < Test::Unit::TestCase
25
28
  area=Distribution::T.cdf(t,n)
26
29
  assert_in_delta(area, GSL::Cdf.tdist_P(t,n),0.0001)
27
30
  assert_in_delta(Distribution::T.p_value(area,n), GSL::Cdf.tdist_Pinv(area,n),0.0001)
28
-
31
+
29
32
  }
30
33
  }
31
34
  end
@@ -44,34 +47,34 @@ class DistributionTestCase < Test::Unit::TestCase
44
47
  if !NOT_GSL
45
48
  [0.2,0.4,0.6,0.8,0.9, 0.99,0.999,0.999999].each {|rho|
46
49
  assert_equal(GSL::Ran::bivariate_gaussian_pdf(0, 0, 1,1,rho), Distribution::NormalBivariate.pdf(0,0, rho , 1,1))
47
-
50
+
48
51
  }
49
52
  end
50
-
53
+
51
54
  [-3,-2,-1,0,1,1.5].each {|x|
52
55
  assert_in_delta(Distribution::NormalBivariate.cdf_hull(x,x,0.5), Distribution::NormalBivariate.cdf_genz(x,x,0.5), 0.001)
53
56
  assert_in_delta(Distribution::NormalBivariate.cdf_genz(x,x,0.5), Distribution::NormalBivariate.cdf_jantaravareerat(x,x,0.5), 0.001)
54
57
  }
55
-
58
+
56
59
  assert_in_delta(0.686, Distribution::NormalBivariate.cdf(2,0.5,0.5), 0.001)
57
60
  assert_in_delta(0.498, Distribution::NormalBivariate.cdf(2,0.0,0.5), 0.001)
58
61
  assert_in_delta(0.671, Distribution::NormalBivariate.cdf(1.5,0.5,0.5), 0.001)
59
-
62
+
60
63
  assert_in_delta(Distribution::Normal.cdf(0), Distribution::NormalBivariate.cdf(10,0,0.9), 0.001)
61
64
  end
62
65
  def test_f
63
66
  if !NOT_GSL
64
- [0.1,0.5,1,2,10,20,30].each{|f|
65
- [2,5,10].each{|n2|
66
- [2,5,10].each{|n1|
67
- area=Distribution::F.cdf(f,n1,n2)
68
- assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
69
- assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
70
-
71
- }
72
- }
67
+ [0.1,0.5,1,2,10,20,30].each{|f|
68
+ [2,5,10].each{|n2|
69
+ [2,5,10].each{|n1|
70
+ area=Distribution::F.cdf(f,n1,n2)
71
+ assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
72
+ assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
73
+
74
+ }
73
75
  }
76
+ }
74
77
  end
75
78
  end
76
79
 
77
- end
80
+ end