statsample 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +14 -0
- data/Gemfile +1 -16
- data/History.txt +51 -46
- data/LICENSE.txt +7 -82
- data/README.md +145 -150
- data/Rakefile +20 -102
- data/lib/spss.rb +17 -14
- data/lib/statsample/crosstab.rb +2 -2
- data/lib/statsample/dataset.rb +82 -81
- data/lib/statsample/matrix.rb +43 -43
- data/lib/statsample/reliability.rb +1 -2
- data/lib/statsample/vector.rb +157 -124
- data/lib/statsample/version.rb +1 -1
- data/lib/statsample.rb +91 -91
- data/references.txt +2 -1
- data/statsample.gemspec +89 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_crosstab.rb +8 -0
- data/test/test_histogram.rb +7 -0
- data/test/test_vector.rb +62 -48
- metadata +109 -120
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -78
- data/Manifest.txt +0 -157
- data/setup.rb +0 -1585
data/test/test_vector.rb
CHANGED
@@ -30,12 +30,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
30
30
|
end
|
31
31
|
@correct_a=@correct_a.to_scale
|
32
32
|
@correct_b=@correct_b.to_scale
|
33
|
-
|
33
|
+
|
34
34
|
@common=lambda do |av,bv|
|
35
35
|
assert_equal(@correct_a, av, "A no es esperado")
|
36
36
|
assert_equal(@correct_b, bv, "B no es esperado")
|
37
37
|
assert(!av.has_missing_data?, "A tiene datos faltantes")
|
38
|
-
assert(!bv.has_missing_data?, "b tiene datos faltantes")
|
38
|
+
assert(!bv.has_missing_data?, "b tiene datos faltantes")
|
39
39
|
end
|
40
40
|
end
|
41
41
|
should "return correct only_valid" do
|
@@ -53,10 +53,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
53
53
|
assert_equal(av,av2)
|
54
54
|
assert_same(av,av2)
|
55
55
|
assert_same(bv,bv2)
|
56
|
-
end
|
56
|
+
end
|
57
57
|
end
|
58
58
|
context Statsample::Vector do
|
59
|
-
setup do
|
59
|
+
setup do
|
60
60
|
@c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
|
61
61
|
@c.name="Test Vector"
|
62
62
|
@c.missing_values=[-99]
|
@@ -66,18 +66,18 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
66
66
|
v=Statsample::Vector.new(gsl)
|
67
67
|
assert_equal([1,2,3,4,5], v.to_a)
|
68
68
|
refute(v.flawed?)
|
69
|
-
|
69
|
+
|
70
70
|
end
|
71
|
-
|
71
|
+
|
72
72
|
context "using matrix operations" do
|
73
73
|
setup do
|
74
74
|
@a=[1,2,3,4,5].to_scale
|
75
75
|
end
|
76
|
-
should "to_matrix returns a matrix with 1 row" do
|
76
|
+
should "to_matrix returns a matrix with 1 row" do
|
77
77
|
mh=Matrix[[1,2,3,4,5]]
|
78
78
|
assert_equal(mh,@a.to_matrix)
|
79
79
|
end
|
80
|
-
should "to_matrix(:vertical) returns a matrix with 1 column" do
|
80
|
+
should "to_matrix(:vertical) returns a matrix with 1 column" do
|
81
81
|
mv=Matrix.columns([[1,2,3,4,5]])
|
82
82
|
assert_equal(mv,@a.to_matrix(:vertical))
|
83
83
|
end
|
@@ -89,7 +89,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
89
89
|
end
|
90
90
|
end
|
91
91
|
context "when initializing" do
|
92
|
-
setup do
|
92
|
+
setup do
|
93
93
|
@data=(10.times.map{rand(100)})+[nil]
|
94
94
|
@original=Statsample::Vector.new(@data, :scale)
|
95
95
|
end
|
@@ -103,9 +103,9 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
103
103
|
assert_equal(reference, Statsample::Vector[0,4..6,10])
|
104
104
|
assert_equal(reference, Statsample::Vector[[0],[4,5,6],[10]])
|
105
105
|
assert_equal(reference, Statsample::Vector[[0],[4,[5,[6]]],[10]])
|
106
|
-
|
106
|
+
|
107
107
|
assert_equal(reference, Statsample::Vector[[0],[4,5,6].to_vector,[10]])
|
108
|
-
|
108
|
+
|
109
109
|
end
|
110
110
|
should "be the same usign #to_vector" do
|
111
111
|
lazy1=@data.to_vector(:scale)
|
@@ -121,7 +121,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
121
121
|
v1=10.times.map {nil}.to_scale
|
122
122
|
v2=Statsample::Vector.new_scale(10)
|
123
123
|
assert_equal(v1,v2)
|
124
|
-
|
124
|
+
|
125
125
|
end
|
126
126
|
should "could use new_scale with size and value" do
|
127
127
|
a=rand
|
@@ -134,11 +134,11 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
134
134
|
v2=Statsample::Vector.new_scale(10) {|i| i*2}
|
135
135
|
assert_equal(v1,v2)
|
136
136
|
end
|
137
|
-
|
137
|
+
|
138
138
|
end
|
139
|
-
|
139
|
+
|
140
140
|
context "#split_by_separator" do
|
141
|
-
|
141
|
+
|
142
142
|
setup do
|
143
143
|
@a = Statsample::Vector.new(["a","a,b","c,d","a,d",10,nil],:nominal)
|
144
144
|
@b=@a.split_by_separator(",")
|
@@ -150,14 +150,14 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
150
150
|
expected=['a','b','c','d',10]
|
151
151
|
assert_equal(expected, @b.keys)
|
152
152
|
end
|
153
|
-
|
153
|
+
|
154
154
|
should "returns a Hash, which values are Statsample::Vector" do
|
155
155
|
@b.each_key {|k| assert_instance_of(Statsample::Vector, @b[k])}
|
156
156
|
end
|
157
157
|
should "hash values are n times the tokens appears" do
|
158
158
|
assert_counting_tokens(@b)
|
159
159
|
end
|
160
|
-
should "#split_by_separator_freq returns the number of ocurrences of tokens" do
|
160
|
+
should "#split_by_separator_freq returns the number of ocurrences of tokens" do
|
161
161
|
assert_equal({'a'=>3,'b'=>1,'c'=>1,'d'=>2,10=>1}, @a.split_by_separator_freq())
|
162
162
|
end
|
163
163
|
should "using a different separator give the same values" do
|
@@ -177,7 +177,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
177
177
|
3.times do |i|
|
178
178
|
assert_in_delta(i*4.5, hist.get_range(i)[0], 1e-9)
|
179
179
|
end
|
180
|
-
|
180
|
+
|
181
181
|
end
|
182
182
|
should "have a name" do
|
183
183
|
@c.name=="Test Vector"
|
@@ -190,17 +190,17 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
190
190
|
next_number=$1.to_i+1
|
191
191
|
assert_equal("Vector #{next_number}",b.name)
|
192
192
|
end
|
193
|
-
should "save to a file and load the same Vector" do
|
193
|
+
should "save to a file and load the same Vector" do
|
194
194
|
outfile=Tempfile.new("vector.vec")
|
195
195
|
@c.save(outfile.path)
|
196
196
|
a=Statsample.load(outfile.path)
|
197
|
-
assert_equal(@c,a)
|
197
|
+
assert_equal(@c,a)
|
198
198
|
end
|
199
199
|
should "#collect returns an array" do
|
200
200
|
val=@c.collect {|v| v}
|
201
201
|
assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
|
202
202
|
end
|
203
|
-
|
203
|
+
|
204
204
|
should "#recode returns a recoded array" do
|
205
205
|
a=@c.recode{|v| @c.is_valid?(v) ? 0 : 1 }
|
206
206
|
exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
|
@@ -213,8 +213,8 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
213
213
|
a=[1,2,3,4,5].to_vector(:scale)
|
214
214
|
assert_equal(120,a.product)
|
215
215
|
end
|
216
|
-
|
217
|
-
should "missing values" do
|
216
|
+
|
217
|
+
should "missing values" do
|
218
218
|
@c.missing_values=[10]
|
219
219
|
assert_equal([-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9], @c.valid_data.sort)
|
220
220
|
assert_equal([5,5,5,5,5,6,6,7,8,9,nil,1,2,3,4,nil,-99,-99], @c.data_with_nils)
|
@@ -224,13 +224,13 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
224
224
|
@c.missing_values=[]
|
225
225
|
assert_equal(@c.valid_data.sort,[-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9,10])
|
226
226
|
assert_equal(@c.data_with_nils,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
|
227
|
-
|
227
|
+
|
228
228
|
end
|
229
|
-
should "correct has_missing_data? with missing data" do
|
229
|
+
should "correct has_missing_data? with missing data" do
|
230
230
|
a=[1,2,3,nil].to_vector
|
231
231
|
assert(a.has_missing_data?)
|
232
232
|
end
|
233
|
-
should "correct has_missing_data? without missing data" do
|
233
|
+
should "correct has_missing_data? without missing data" do
|
234
234
|
a=[1,2,3,4,10].to_vector
|
235
235
|
assert(!a.has_missing_data?)
|
236
236
|
end
|
@@ -238,12 +238,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
238
238
|
a=[1,2,3,4,10].to_vector
|
239
239
|
a.missing_values=[10]
|
240
240
|
assert(a.has_missing_data?)
|
241
|
-
end
|
242
|
-
should "label correctly fields" do
|
241
|
+
end
|
242
|
+
should "label correctly fields" do
|
243
243
|
@c.labels={5=>'FIVE'}
|
244
244
|
assert_equal(["FIVE","FIVE","FIVE","FIVE","FIVE",6,6,7,8,9,10,1,2,3,4,nil,-99, -99],@c.vector_labeled.to_a)
|
245
245
|
end
|
246
|
-
should "verify" do
|
246
|
+
should "verify" do
|
247
247
|
h=@c.verify{|d| !d.nil? and d>0}
|
248
248
|
e={15=>nil,16=>-99,17=>-99}
|
249
249
|
assert_equal(e,h)
|
@@ -251,7 +251,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
251
251
|
should "have a summary with name on it" do
|
252
252
|
assert_match(/#{@c.name}/, @c.summary)
|
253
253
|
end
|
254
|
-
|
254
|
+
|
255
255
|
should "GSL::Vector based should push correcty" do
|
256
256
|
if Statsample.has_gsl?
|
257
257
|
v=GSL::Vector[1,2,3,4,5].to_scale
|
@@ -263,10 +263,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
263
263
|
end
|
264
264
|
end
|
265
265
|
|
266
|
-
|
266
|
+
|
267
267
|
should "split correctly" do
|
268
268
|
a = Statsample::Vector.new(["a","a,b","c,d","a,d","d",10,nil],:nominal)
|
269
|
-
assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
|
269
|
+
assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
|
270
270
|
end
|
271
271
|
should "multiply correct for scalar" do
|
272
272
|
a = [1,2,3].to_scale
|
@@ -275,20 +275,20 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
275
275
|
should "multiply correct with other vector" do
|
276
276
|
a = [1,2,3].to_scale
|
277
277
|
b = [2,4,6].to_scale
|
278
|
-
|
278
|
+
|
279
279
|
assert_equal([2,8,18].to_scale, a*b)
|
280
280
|
end
|
281
281
|
should "sum correct for scalar" do
|
282
282
|
a = [1,2,3].to_scale
|
283
283
|
assert_equal([11,12,13].to_scale, a+10)
|
284
284
|
end
|
285
|
-
|
285
|
+
|
286
286
|
should "raise NoMethodError when method requires ordinal and vector is nominal" do
|
287
287
|
@c.type=:nominal
|
288
288
|
assert_raise(::NoMethodError) { @c.median }
|
289
289
|
end
|
290
|
-
|
291
|
-
should "raise NoMethodError when method requires scalar and vector is ordinal" do
|
290
|
+
|
291
|
+
should "raise NoMethodError when method requires scalar and vector is ordinal" do
|
292
292
|
@c.type=:ordinal
|
293
293
|
assert_raise(::NoMethodError) { @c.mean }
|
294
294
|
end
|
@@ -326,7 +326,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
326
326
|
assert_in_delta(se, ds[:mean].sd, 0.02)
|
327
327
|
end
|
328
328
|
|
329
|
-
|
329
|
+
|
330
330
|
end
|
331
331
|
|
332
332
|
|
@@ -349,9 +349,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
349
349
|
v1=[1,2,3].to_vector(:nominal)
|
350
350
|
v2=[1,2,3].to_vector(:ordinal)
|
351
351
|
assert_not_equal(v1,v2)
|
352
|
+
v2=[1,2,3]
|
353
|
+
assert_not_equal(v1,v2)
|
352
354
|
v1=[1,2,3].to_vector()
|
353
355
|
v2=[1,2,3].to_vector()
|
354
356
|
assert_equal(v1,v2)
|
357
|
+
assert_equal(false, v1 == Object.new)
|
355
358
|
end
|
356
359
|
def test_vector_percentil
|
357
360
|
a=[1,2,2,3,4,5,5,5,6,10].to_scale
|
@@ -360,8 +363,6 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
360
363
|
a=[1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10].to_scale
|
361
364
|
expected=[10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100].to_scale
|
362
365
|
assert_equal(expected, a.vector_percentil)
|
363
|
-
|
364
|
-
|
365
366
|
end
|
366
367
|
def test_ordinal
|
367
368
|
@c.type=:ordinal
|
@@ -377,6 +378,21 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
377
378
|
assert_equal(9.5, a.percentil(75))
|
378
379
|
assert_equal(3.0, a.percentil(10))
|
379
380
|
end
|
381
|
+
def test_linear_percentil_strategy
|
382
|
+
values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_scale
|
383
|
+
assert_equal 102, values.percentil(0, :linear)
|
384
|
+
assert_equal 104.75, values.percentil(25, :linear)
|
385
|
+
assert_equal 108.5, values.percentil(50, :linear)
|
386
|
+
assert_equal 112.75, values.percentil(75, :linear)
|
387
|
+
assert_equal 116, values.percentil(100, :linear)
|
388
|
+
|
389
|
+
values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle.to_scale
|
390
|
+
assert_equal 102, values.percentil(0, :linear)
|
391
|
+
assert_equal 105, values.percentil(25, :linear)
|
392
|
+
assert_equal 109, values.percentil(50, :linear)
|
393
|
+
assert_equal 115, values.percentil(75, :linear)
|
394
|
+
assert_equal 118, values.percentil(100, :linear)
|
395
|
+
end
|
380
396
|
def test_ranked
|
381
397
|
v1=[0.8,1.2,1.2,2.3,18].to_vector(:ordinal)
|
382
398
|
expected=[1,2.5,2.5,4,5].to_vector(:ordinal)
|
@@ -415,7 +431,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
415
431
|
assert_equal(0,vs.mean)
|
416
432
|
assert_equal(1,vs.sds)
|
417
433
|
end
|
418
|
-
|
434
|
+
|
419
435
|
def test_vector_standarized_with_zero_variance
|
420
436
|
v1=100.times.map {|i| 1}.to_scale
|
421
437
|
exp=100.times.map {nil}.to_scale
|
@@ -428,14 +444,14 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
428
444
|
assert_raise(NoMethodError) { v.check_type(:scale)}
|
429
445
|
assert_raise(NoMethodError) { v.check_type(:ordinal)}
|
430
446
|
assert(v.check_type(:nominal).nil?)
|
431
|
-
|
447
|
+
|
432
448
|
v.type=:ordinal
|
433
|
-
|
449
|
+
|
434
450
|
assert_raise(NoMethodError) { v.check_type(:scale)}
|
435
|
-
|
451
|
+
|
436
452
|
assert(v.check_type(:ordinal).nil?)
|
437
453
|
assert(v.check_type(:nominal).nil?)
|
438
|
-
|
454
|
+
|
439
455
|
|
440
456
|
v.type=:scale
|
441
457
|
assert(v.check_type(:scale).nil?)
|
@@ -446,10 +462,8 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
446
462
|
assert_raise(NoMethodError) { v.check_type(:scale)}
|
447
463
|
assert_raise(NoMethodError) { v.check_type(:ordinal)}
|
448
464
|
assert_raise(NoMethodError) { v.check_type(:nominal)}
|
449
|
-
|
450
465
|
end
|
451
|
-
|
452
|
-
|
466
|
+
|
453
467
|
def test_add
|
454
468
|
a=Statsample::Vector.new([1,2,3,4,5], :scale)
|
455
469
|
b=Statsample::Vector.new([11,12,13,14,15], :scale)
|
@@ -527,7 +541,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
527
541
|
def test_gsl
|
528
542
|
if Statsample.has_gsl?
|
529
543
|
a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)
|
530
|
-
|
544
|
+
|
531
545
|
assert_equal(2,a.mean)
|
532
546
|
assert_equal(a.variance_sample_ruby,a.variance_sample)
|
533
547
|
assert_equal(a.standard_deviation_sample_ruby,a.sds)
|