statsample 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/test/test_vector.rb CHANGED
@@ -30,12 +30,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
30
30
  end
31
31
  @correct_a=@correct_a.to_scale
32
32
  @correct_b=@correct_b.to_scale
33
-
33
+
34
34
  @common=lambda do |av,bv|
35
35
  assert_equal(@correct_a, av, "A no es esperado")
36
36
  assert_equal(@correct_b, bv, "B no es esperado")
37
37
  assert(!av.has_missing_data?, "A tiene datos faltantes")
38
- assert(!bv.has_missing_data?, "b tiene datos faltantes")
38
+ assert(!bv.has_missing_data?, "b tiene datos faltantes")
39
39
  end
40
40
  end
41
41
  should "return correct only_valid" do
@@ -53,10 +53,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
53
53
  assert_equal(av,av2)
54
54
  assert_same(av,av2)
55
55
  assert_same(bv,bv2)
56
- end
56
+ end
57
57
  end
58
58
  context Statsample::Vector do
59
- setup do
59
+ setup do
60
60
  @c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
61
61
  @c.name="Test Vector"
62
62
  @c.missing_values=[-99]
@@ -66,18 +66,18 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
66
66
  v=Statsample::Vector.new(gsl)
67
67
  assert_equal([1,2,3,4,5], v.to_a)
68
68
  refute(v.flawed?)
69
-
69
+
70
70
  end
71
-
71
+
72
72
  context "using matrix operations" do
73
73
  setup do
74
74
  @a=[1,2,3,4,5].to_scale
75
75
  end
76
- should "to_matrix returns a matrix with 1 row" do
76
+ should "to_matrix returns a matrix with 1 row" do
77
77
  mh=Matrix[[1,2,3,4,5]]
78
78
  assert_equal(mh,@a.to_matrix)
79
79
  end
80
- should "to_matrix(:vertical) returns a matrix with 1 column" do
80
+ should "to_matrix(:vertical) returns a matrix with 1 column" do
81
81
  mv=Matrix.columns([[1,2,3,4,5]])
82
82
  assert_equal(mv,@a.to_matrix(:vertical))
83
83
  end
@@ -89,7 +89,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
89
89
  end
90
90
  end
91
91
  context "when initializing" do
92
- setup do
92
+ setup do
93
93
  @data=(10.times.map{rand(100)})+[nil]
94
94
  @original=Statsample::Vector.new(@data, :scale)
95
95
  end
@@ -103,9 +103,9 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
103
103
  assert_equal(reference, Statsample::Vector[0,4..6,10])
104
104
  assert_equal(reference, Statsample::Vector[[0],[4,5,6],[10]])
105
105
  assert_equal(reference, Statsample::Vector[[0],[4,[5,[6]]],[10]])
106
-
106
+
107
107
  assert_equal(reference, Statsample::Vector[[0],[4,5,6].to_vector,[10]])
108
-
108
+
109
109
  end
110
110
  should "be the same usign #to_vector" do
111
111
  lazy1=@data.to_vector(:scale)
@@ -121,7 +121,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
121
121
  v1=10.times.map {nil}.to_scale
122
122
  v2=Statsample::Vector.new_scale(10)
123
123
  assert_equal(v1,v2)
124
-
124
+
125
125
  end
126
126
  should "could use new_scale with size and value" do
127
127
  a=rand
@@ -134,11 +134,11 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
134
134
  v2=Statsample::Vector.new_scale(10) {|i| i*2}
135
135
  assert_equal(v1,v2)
136
136
  end
137
-
137
+
138
138
  end
139
-
139
+
140
140
  context "#split_by_separator" do
141
-
141
+
142
142
  setup do
143
143
  @a = Statsample::Vector.new(["a","a,b","c,d","a,d",10,nil],:nominal)
144
144
  @b=@a.split_by_separator(",")
@@ -150,14 +150,14 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
150
150
  expected=['a','b','c','d',10]
151
151
  assert_equal(expected, @b.keys)
152
152
  end
153
-
153
+
154
154
  should "returns a Hash, which values are Statsample::Vector" do
155
155
  @b.each_key {|k| assert_instance_of(Statsample::Vector, @b[k])}
156
156
  end
157
157
  should "hash values are n times the tokens appears" do
158
158
  assert_counting_tokens(@b)
159
159
  end
160
- should "#split_by_separator_freq returns the number of ocurrences of tokens" do
160
+ should "#split_by_separator_freq returns the number of ocurrences of tokens" do
161
161
  assert_equal({'a'=>3,'b'=>1,'c'=>1,'d'=>2,10=>1}, @a.split_by_separator_freq())
162
162
  end
163
163
  should "using a different separator give the same values" do
@@ -177,7 +177,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
177
177
  3.times do |i|
178
178
  assert_in_delta(i*4.5, hist.get_range(i)[0], 1e-9)
179
179
  end
180
-
180
+
181
181
  end
182
182
  should "have a name" do
183
183
  @c.name=="Test Vector"
@@ -190,17 +190,17 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
190
190
  next_number=$1.to_i+1
191
191
  assert_equal("Vector #{next_number}",b.name)
192
192
  end
193
- should "save to a file and load the same Vector" do
193
+ should "save to a file and load the same Vector" do
194
194
  outfile=Tempfile.new("vector.vec")
195
195
  @c.save(outfile.path)
196
196
  a=Statsample.load(outfile.path)
197
- assert_equal(@c,a)
197
+ assert_equal(@c,a)
198
198
  end
199
199
  should "#collect returns an array" do
200
200
  val=@c.collect {|v| v}
201
201
  assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
202
202
  end
203
-
203
+
204
204
  should "#recode returns a recoded array" do
205
205
  a=@c.recode{|v| @c.is_valid?(v) ? 0 : 1 }
206
206
  exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
@@ -213,8 +213,8 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
213
213
  a=[1,2,3,4,5].to_vector(:scale)
214
214
  assert_equal(120,a.product)
215
215
  end
216
-
217
- should "missing values" do
216
+
217
+ should "missing values" do
218
218
  @c.missing_values=[10]
219
219
  assert_equal([-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9], @c.valid_data.sort)
220
220
  assert_equal([5,5,5,5,5,6,6,7,8,9,nil,1,2,3,4,nil,-99,-99], @c.data_with_nils)
@@ -224,13 +224,13 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
224
224
  @c.missing_values=[]
225
225
  assert_equal(@c.valid_data.sort,[-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9,10])
226
226
  assert_equal(@c.data_with_nils,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
227
-
227
+
228
228
  end
229
- should "correct has_missing_data? with missing data" do
229
+ should "correct has_missing_data? with missing data" do
230
230
  a=[1,2,3,nil].to_vector
231
231
  assert(a.has_missing_data?)
232
232
  end
233
- should "correct has_missing_data? without missing data" do
233
+ should "correct has_missing_data? without missing data" do
234
234
  a=[1,2,3,4,10].to_vector
235
235
  assert(!a.has_missing_data?)
236
236
  end
@@ -238,12 +238,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
238
238
  a=[1,2,3,4,10].to_vector
239
239
  a.missing_values=[10]
240
240
  assert(a.has_missing_data?)
241
- end
242
- should "label correctly fields" do
241
+ end
242
+ should "label correctly fields" do
243
243
  @c.labels={5=>'FIVE'}
244
244
  assert_equal(["FIVE","FIVE","FIVE","FIVE","FIVE",6,6,7,8,9,10,1,2,3,4,nil,-99, -99],@c.vector_labeled.to_a)
245
245
  end
246
- should "verify" do
246
+ should "verify" do
247
247
  h=@c.verify{|d| !d.nil? and d>0}
248
248
  e={15=>nil,16=>-99,17=>-99}
249
249
  assert_equal(e,h)
@@ -251,7 +251,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
251
251
  should "have a summary with name on it" do
252
252
  assert_match(/#{@c.name}/, @c.summary)
253
253
  end
254
-
254
+
255
255
  should "GSL::Vector based should push correcty" do
256
256
  if Statsample.has_gsl?
257
257
  v=GSL::Vector[1,2,3,4,5].to_scale
@@ -263,10 +263,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
263
263
  end
264
264
  end
265
265
 
266
-
266
+
267
267
  should "split correctly" do
268
268
  a = Statsample::Vector.new(["a","a,b","c,d","a,d","d",10,nil],:nominal)
269
- assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
269
+ assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
270
270
  end
271
271
  should "multiply correct for scalar" do
272
272
  a = [1,2,3].to_scale
@@ -275,20 +275,20 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
275
275
  should "multiply correct with other vector" do
276
276
  a = [1,2,3].to_scale
277
277
  b = [2,4,6].to_scale
278
-
278
+
279
279
  assert_equal([2,8,18].to_scale, a*b)
280
280
  end
281
281
  should "sum correct for scalar" do
282
282
  a = [1,2,3].to_scale
283
283
  assert_equal([11,12,13].to_scale, a+10)
284
284
  end
285
-
285
+
286
286
  should "raise NoMethodError when method requires ordinal and vector is nominal" do
287
287
  @c.type=:nominal
288
288
  assert_raise(::NoMethodError) { @c.median }
289
289
  end
290
-
291
- should "raise NoMethodError when method requires scalar and vector is ordinal" do
290
+
291
+ should "raise NoMethodError when method requires scalar and vector is ordinal" do
292
292
  @c.type=:ordinal
293
293
  assert_raise(::NoMethodError) { @c.mean }
294
294
  end
@@ -326,7 +326,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
326
326
  assert_in_delta(se, ds[:mean].sd, 0.02)
327
327
  end
328
328
 
329
-
329
+
330
330
  end
331
331
 
332
332
 
@@ -349,9 +349,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
349
349
  v1=[1,2,3].to_vector(:nominal)
350
350
  v2=[1,2,3].to_vector(:ordinal)
351
351
  assert_not_equal(v1,v2)
352
+ v2=[1,2,3]
353
+ assert_not_equal(v1,v2)
352
354
  v1=[1,2,3].to_vector()
353
355
  v2=[1,2,3].to_vector()
354
356
  assert_equal(v1,v2)
357
+ assert_equal(false, v1 == Object.new)
355
358
  end
356
359
  def test_vector_percentil
357
360
  a=[1,2,2,3,4,5,5,5,6,10].to_scale
@@ -360,8 +363,6 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
360
363
  a=[1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10].to_scale
361
364
  expected=[10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100].to_scale
362
365
  assert_equal(expected, a.vector_percentil)
363
-
364
-
365
366
  end
366
367
  def test_ordinal
367
368
  @c.type=:ordinal
@@ -377,6 +378,21 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
377
378
  assert_equal(9.5, a.percentil(75))
378
379
  assert_equal(3.0, a.percentil(10))
379
380
  end
381
+ def test_linear_percentil_strategy
382
+ values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_scale
383
+ assert_equal 102, values.percentil(0, :linear)
384
+ assert_equal 104.75, values.percentil(25, :linear)
385
+ assert_equal 108.5, values.percentil(50, :linear)
386
+ assert_equal 112.75, values.percentil(75, :linear)
387
+ assert_equal 116, values.percentil(100, :linear)
388
+
389
+ values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle.to_scale
390
+ assert_equal 102, values.percentil(0, :linear)
391
+ assert_equal 105, values.percentil(25, :linear)
392
+ assert_equal 109, values.percentil(50, :linear)
393
+ assert_equal 115, values.percentil(75, :linear)
394
+ assert_equal 118, values.percentil(100, :linear)
395
+ end
380
396
  def test_ranked
381
397
  v1=[0.8,1.2,1.2,2.3,18].to_vector(:ordinal)
382
398
  expected=[1,2.5,2.5,4,5].to_vector(:ordinal)
@@ -415,7 +431,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
415
431
  assert_equal(0,vs.mean)
416
432
  assert_equal(1,vs.sds)
417
433
  end
418
-
434
+
419
435
  def test_vector_standarized_with_zero_variance
420
436
  v1=100.times.map {|i| 1}.to_scale
421
437
  exp=100.times.map {nil}.to_scale
@@ -428,14 +444,14 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
428
444
  assert_raise(NoMethodError) { v.check_type(:scale)}
429
445
  assert_raise(NoMethodError) { v.check_type(:ordinal)}
430
446
  assert(v.check_type(:nominal).nil?)
431
-
447
+
432
448
  v.type=:ordinal
433
-
449
+
434
450
  assert_raise(NoMethodError) { v.check_type(:scale)}
435
-
451
+
436
452
  assert(v.check_type(:ordinal).nil?)
437
453
  assert(v.check_type(:nominal).nil?)
438
-
454
+
439
455
 
440
456
  v.type=:scale
441
457
  assert(v.check_type(:scale).nil?)
@@ -446,10 +462,8 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
446
462
  assert_raise(NoMethodError) { v.check_type(:scale)}
447
463
  assert_raise(NoMethodError) { v.check_type(:ordinal)}
448
464
  assert_raise(NoMethodError) { v.check_type(:nominal)}
449
-
450
465
  end
451
-
452
-
466
+
453
467
  def test_add
454
468
  a=Statsample::Vector.new([1,2,3,4,5], :scale)
455
469
  b=Statsample::Vector.new([11,12,13,14,15], :scale)
@@ -527,7 +541,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
527
541
  def test_gsl
528
542
  if Statsample.has_gsl?
529
543
  a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)
530
-
544
+
531
545
  assert_equal(2,a.mean)
532
546
  assert_equal(a.variance_sample_ruby,a.variance_sample)
533
547
  assert_equal(a.standard_deviation_sample_ruby,a.sds)