statsample 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test/test_vector.rb CHANGED
@@ -30,12 +30,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
30
30
  end
31
31
  @correct_a=@correct_a.to_scale
32
32
  @correct_b=@correct_b.to_scale
33
-
33
+
34
34
  @common=lambda do |av,bv|
35
35
  assert_equal(@correct_a, av, "A no es esperado")
36
36
  assert_equal(@correct_b, bv, "B no es esperado")
37
37
  assert(!av.has_missing_data?, "A tiene datos faltantes")
38
- assert(!bv.has_missing_data?, "b tiene datos faltantes")
38
+ assert(!bv.has_missing_data?, "b tiene datos faltantes")
39
39
  end
40
40
  end
41
41
  should "return correct only_valid" do
@@ -53,10 +53,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
53
53
  assert_equal(av,av2)
54
54
  assert_same(av,av2)
55
55
  assert_same(bv,bv2)
56
- end
56
+ end
57
57
  end
58
58
  context Statsample::Vector do
59
- setup do
59
+ setup do
60
60
  @c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
61
61
  @c.name="Test Vector"
62
62
  @c.missing_values=[-99]
@@ -66,18 +66,18 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
66
66
  v=Statsample::Vector.new(gsl)
67
67
  assert_equal([1,2,3,4,5], v.to_a)
68
68
  refute(v.flawed?)
69
-
69
+
70
70
  end
71
-
71
+
72
72
  context "using matrix operations" do
73
73
  setup do
74
74
  @a=[1,2,3,4,5].to_scale
75
75
  end
76
- should "to_matrix returns a matrix with 1 row" do
76
+ should "to_matrix returns a matrix with 1 row" do
77
77
  mh=Matrix[[1,2,3,4,5]]
78
78
  assert_equal(mh,@a.to_matrix)
79
79
  end
80
- should "to_matrix(:vertical) returns a matrix with 1 column" do
80
+ should "to_matrix(:vertical) returns a matrix with 1 column" do
81
81
  mv=Matrix.columns([[1,2,3,4,5]])
82
82
  assert_equal(mv,@a.to_matrix(:vertical))
83
83
  end
@@ -89,7 +89,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
89
89
  end
90
90
  end
91
91
  context "when initializing" do
92
- setup do
92
+ setup do
93
93
  @data=(10.times.map{rand(100)})+[nil]
94
94
  @original=Statsample::Vector.new(@data, :scale)
95
95
  end
@@ -103,9 +103,9 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
103
103
  assert_equal(reference, Statsample::Vector[0,4..6,10])
104
104
  assert_equal(reference, Statsample::Vector[[0],[4,5,6],[10]])
105
105
  assert_equal(reference, Statsample::Vector[[0],[4,[5,[6]]],[10]])
106
-
106
+
107
107
  assert_equal(reference, Statsample::Vector[[0],[4,5,6].to_vector,[10]])
108
-
108
+
109
109
  end
110
110
  should "be the same usign #to_vector" do
111
111
  lazy1=@data.to_vector(:scale)
@@ -121,7 +121,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
121
121
  v1=10.times.map {nil}.to_scale
122
122
  v2=Statsample::Vector.new_scale(10)
123
123
  assert_equal(v1,v2)
124
-
124
+
125
125
  end
126
126
  should "could use new_scale with size and value" do
127
127
  a=rand
@@ -134,11 +134,11 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
134
134
  v2=Statsample::Vector.new_scale(10) {|i| i*2}
135
135
  assert_equal(v1,v2)
136
136
  end
137
-
137
+
138
138
  end
139
-
139
+
140
140
  context "#split_by_separator" do
141
-
141
+
142
142
  setup do
143
143
  @a = Statsample::Vector.new(["a","a,b","c,d","a,d",10,nil],:nominal)
144
144
  @b=@a.split_by_separator(",")
@@ -150,14 +150,14 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
150
150
  expected=['a','b','c','d',10]
151
151
  assert_equal(expected, @b.keys)
152
152
  end
153
-
153
+
154
154
  should "returns a Hash, which values are Statsample::Vector" do
155
155
  @b.each_key {|k| assert_instance_of(Statsample::Vector, @b[k])}
156
156
  end
157
157
  should "hash values are n times the tokens appears" do
158
158
  assert_counting_tokens(@b)
159
159
  end
160
- should "#split_by_separator_freq returns the number of ocurrences of tokens" do
160
+ should "#split_by_separator_freq returns the number of ocurrences of tokens" do
161
161
  assert_equal({'a'=>3,'b'=>1,'c'=>1,'d'=>2,10=>1}, @a.split_by_separator_freq())
162
162
  end
163
163
  should "using a different separator give the same values" do
@@ -177,7 +177,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
177
177
  3.times do |i|
178
178
  assert_in_delta(i*4.5, hist.get_range(i)[0], 1e-9)
179
179
  end
180
-
180
+
181
181
  end
182
182
  should "have a name" do
183
183
  @c.name=="Test Vector"
@@ -190,17 +190,17 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
190
190
  next_number=$1.to_i+1
191
191
  assert_equal("Vector #{next_number}",b.name)
192
192
  end
193
- should "save to a file and load the same Vector" do
193
+ should "save to a file and load the same Vector" do
194
194
  outfile=Tempfile.new("vector.vec")
195
195
  @c.save(outfile.path)
196
196
  a=Statsample.load(outfile.path)
197
- assert_equal(@c,a)
197
+ assert_equal(@c,a)
198
198
  end
199
199
  should "#collect returns an array" do
200
200
  val=@c.collect {|v| v}
201
201
  assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
202
202
  end
203
-
203
+
204
204
  should "#recode returns a recoded array" do
205
205
  a=@c.recode{|v| @c.is_valid?(v) ? 0 : 1 }
206
206
  exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
@@ -213,8 +213,8 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
213
213
  a=[1,2,3,4,5].to_vector(:scale)
214
214
  assert_equal(120,a.product)
215
215
  end
216
-
217
- should "missing values" do
216
+
217
+ should "missing values" do
218
218
  @c.missing_values=[10]
219
219
  assert_equal([-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9], @c.valid_data.sort)
220
220
  assert_equal([5,5,5,5,5,6,6,7,8,9,nil,1,2,3,4,nil,-99,-99], @c.data_with_nils)
@@ -224,13 +224,13 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
224
224
  @c.missing_values=[]
225
225
  assert_equal(@c.valid_data.sort,[-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9,10])
226
226
  assert_equal(@c.data_with_nils,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
227
-
227
+
228
228
  end
229
- should "correct has_missing_data? with missing data" do
229
+ should "correct has_missing_data? with missing data" do
230
230
  a=[1,2,3,nil].to_vector
231
231
  assert(a.has_missing_data?)
232
232
  end
233
- should "correct has_missing_data? without missing data" do
233
+ should "correct has_missing_data? without missing data" do
234
234
  a=[1,2,3,4,10].to_vector
235
235
  assert(!a.has_missing_data?)
236
236
  end
@@ -238,12 +238,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
238
238
  a=[1,2,3,4,10].to_vector
239
239
  a.missing_values=[10]
240
240
  assert(a.has_missing_data?)
241
- end
242
- should "label correctly fields" do
241
+ end
242
+ should "label correctly fields" do
243
243
  @c.labels={5=>'FIVE'}
244
244
  assert_equal(["FIVE","FIVE","FIVE","FIVE","FIVE",6,6,7,8,9,10,1,2,3,4,nil,-99, -99],@c.vector_labeled.to_a)
245
245
  end
246
- should "verify" do
246
+ should "verify" do
247
247
  h=@c.verify{|d| !d.nil? and d>0}
248
248
  e={15=>nil,16=>-99,17=>-99}
249
249
  assert_equal(e,h)
@@ -251,7 +251,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
251
251
  should "have a summary with name on it" do
252
252
  assert_match(/#{@c.name}/, @c.summary)
253
253
  end
254
-
254
+
255
255
  should "GSL::Vector based should push correcty" do
256
256
  if Statsample.has_gsl?
257
257
  v=GSL::Vector[1,2,3,4,5].to_scale
@@ -263,10 +263,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
263
263
  end
264
264
  end
265
265
 
266
-
266
+
267
267
  should "split correctly" do
268
268
  a = Statsample::Vector.new(["a","a,b","c,d","a,d","d",10,nil],:nominal)
269
- assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
269
+ assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
270
270
  end
271
271
  should "multiply correct for scalar" do
272
272
  a = [1,2,3].to_scale
@@ -275,20 +275,20 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
275
275
  should "multiply correct with other vector" do
276
276
  a = [1,2,3].to_scale
277
277
  b = [2,4,6].to_scale
278
-
278
+
279
279
  assert_equal([2,8,18].to_scale, a*b)
280
280
  end
281
281
  should "sum correct for scalar" do
282
282
  a = [1,2,3].to_scale
283
283
  assert_equal([11,12,13].to_scale, a+10)
284
284
  end
285
-
285
+
286
286
  should "raise NoMethodError when method requires ordinal and vector is nominal" do
287
287
  @c.type=:nominal
288
288
  assert_raise(::NoMethodError) { @c.median }
289
289
  end
290
-
291
- should "raise NoMethodError when method requires scalar and vector is ordinal" do
290
+
291
+ should "raise NoMethodError when method requires scalar and vector is ordinal" do
292
292
  @c.type=:ordinal
293
293
  assert_raise(::NoMethodError) { @c.mean }
294
294
  end
@@ -326,7 +326,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
326
326
  assert_in_delta(se, ds[:mean].sd, 0.02)
327
327
  end
328
328
 
329
-
329
+
330
330
  end
331
331
 
332
332
 
@@ -349,9 +349,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
349
349
  v1=[1,2,3].to_vector(:nominal)
350
350
  v2=[1,2,3].to_vector(:ordinal)
351
351
  assert_not_equal(v1,v2)
352
+ v2=[1,2,3]
353
+ assert_not_equal(v1,v2)
352
354
  v1=[1,2,3].to_vector()
353
355
  v2=[1,2,3].to_vector()
354
356
  assert_equal(v1,v2)
357
+ assert_equal(false, v1 == Object.new)
355
358
  end
356
359
  def test_vector_percentil
357
360
  a=[1,2,2,3,4,5,5,5,6,10].to_scale
@@ -360,8 +363,6 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
360
363
  a=[1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10].to_scale
361
364
  expected=[10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100].to_scale
362
365
  assert_equal(expected, a.vector_percentil)
363
-
364
-
365
366
  end
366
367
  def test_ordinal
367
368
  @c.type=:ordinal
@@ -377,6 +378,21 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
377
378
  assert_equal(9.5, a.percentil(75))
378
379
  assert_equal(3.0, a.percentil(10))
379
380
  end
381
+ def test_linear_percentil_strategy
382
+ values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_scale
383
+ assert_equal 102, values.percentil(0, :linear)
384
+ assert_equal 104.75, values.percentil(25, :linear)
385
+ assert_equal 108.5, values.percentil(50, :linear)
386
+ assert_equal 112.75, values.percentil(75, :linear)
387
+ assert_equal 116, values.percentil(100, :linear)
388
+
389
+ values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle.to_scale
390
+ assert_equal 102, values.percentil(0, :linear)
391
+ assert_equal 105, values.percentil(25, :linear)
392
+ assert_equal 109, values.percentil(50, :linear)
393
+ assert_equal 115, values.percentil(75, :linear)
394
+ assert_equal 118, values.percentil(100, :linear)
395
+ end
380
396
  def test_ranked
381
397
  v1=[0.8,1.2,1.2,2.3,18].to_vector(:ordinal)
382
398
  expected=[1,2.5,2.5,4,5].to_vector(:ordinal)
@@ -415,7 +431,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
415
431
  assert_equal(0,vs.mean)
416
432
  assert_equal(1,vs.sds)
417
433
  end
418
-
434
+
419
435
  def test_vector_standarized_with_zero_variance
420
436
  v1=100.times.map {|i| 1}.to_scale
421
437
  exp=100.times.map {nil}.to_scale
@@ -428,14 +444,14 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
428
444
  assert_raise(NoMethodError) { v.check_type(:scale)}
429
445
  assert_raise(NoMethodError) { v.check_type(:ordinal)}
430
446
  assert(v.check_type(:nominal).nil?)
431
-
447
+
432
448
  v.type=:ordinal
433
-
449
+
434
450
  assert_raise(NoMethodError) { v.check_type(:scale)}
435
-
451
+
436
452
  assert(v.check_type(:ordinal).nil?)
437
453
  assert(v.check_type(:nominal).nil?)
438
-
454
+
439
455
 
440
456
  v.type=:scale
441
457
  assert(v.check_type(:scale).nil?)
@@ -446,10 +462,8 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
446
462
  assert_raise(NoMethodError) { v.check_type(:scale)}
447
463
  assert_raise(NoMethodError) { v.check_type(:ordinal)}
448
464
  assert_raise(NoMethodError) { v.check_type(:nominal)}
449
-
450
465
  end
451
-
452
-
466
+
453
467
  def test_add
454
468
  a=Statsample::Vector.new([1,2,3,4,5], :scale)
455
469
  b=Statsample::Vector.new([11,12,13,14,15], :scale)
@@ -527,7 +541,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
527
541
  def test_gsl
528
542
  if Statsample.has_gsl?
529
543
  a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)
530
-
544
+
531
545
  assert_equal(2,a.mean)
532
546
  assert_equal(a.variance_sample_ruby,a.variance_sample)
533
547
  assert_equal(a.standard_deviation_sample_ruby,a.sds)