statsample 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +14 -0
- data/Gemfile +1 -16
- data/History.txt +51 -46
- data/LICENSE.txt +7 -82
- data/README.md +145 -150
- data/Rakefile +20 -102
- data/lib/spss.rb +17 -14
- data/lib/statsample/crosstab.rb +2 -2
- data/lib/statsample/dataset.rb +82 -81
- data/lib/statsample/matrix.rb +43 -43
- data/lib/statsample/reliability.rb +1 -2
- data/lib/statsample/vector.rb +157 -124
- data/lib/statsample/version.rb +1 -1
- data/lib/statsample.rb +91 -91
- data/references.txt +2 -1
- data/statsample.gemspec +89 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_crosstab.rb +8 -0
- data/test/test_histogram.rb +7 -0
- data/test/test_vector.rb +62 -48
- metadata +109 -120
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -78
- data/Manifest.txt +0 -157
- data/setup.rb +0 -1585
data/test/test_vector.rb
CHANGED
@@ -30,12 +30,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
30
30
|
end
|
31
31
|
@correct_a=@correct_a.to_scale
|
32
32
|
@correct_b=@correct_b.to_scale
|
33
|
-
|
33
|
+
|
34
34
|
@common=lambda do |av,bv|
|
35
35
|
assert_equal(@correct_a, av, "A no es esperado")
|
36
36
|
assert_equal(@correct_b, bv, "B no es esperado")
|
37
37
|
assert(!av.has_missing_data?, "A tiene datos faltantes")
|
38
|
-
assert(!bv.has_missing_data?, "b tiene datos faltantes")
|
38
|
+
assert(!bv.has_missing_data?, "b tiene datos faltantes")
|
39
39
|
end
|
40
40
|
end
|
41
41
|
should "return correct only_valid" do
|
@@ -53,10 +53,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
53
53
|
assert_equal(av,av2)
|
54
54
|
assert_same(av,av2)
|
55
55
|
assert_same(bv,bv2)
|
56
|
-
end
|
56
|
+
end
|
57
57
|
end
|
58
58
|
context Statsample::Vector do
|
59
|
-
setup do
|
59
|
+
setup do
|
60
60
|
@c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
|
61
61
|
@c.name="Test Vector"
|
62
62
|
@c.missing_values=[-99]
|
@@ -66,18 +66,18 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
66
66
|
v=Statsample::Vector.new(gsl)
|
67
67
|
assert_equal([1,2,3,4,5], v.to_a)
|
68
68
|
refute(v.flawed?)
|
69
|
-
|
69
|
+
|
70
70
|
end
|
71
|
-
|
71
|
+
|
72
72
|
context "using matrix operations" do
|
73
73
|
setup do
|
74
74
|
@a=[1,2,3,4,5].to_scale
|
75
75
|
end
|
76
|
-
should "to_matrix returns a matrix with 1 row" do
|
76
|
+
should "to_matrix returns a matrix with 1 row" do
|
77
77
|
mh=Matrix[[1,2,3,4,5]]
|
78
78
|
assert_equal(mh,@a.to_matrix)
|
79
79
|
end
|
80
|
-
should "to_matrix(:vertical) returns a matrix with 1 column" do
|
80
|
+
should "to_matrix(:vertical) returns a matrix with 1 column" do
|
81
81
|
mv=Matrix.columns([[1,2,3,4,5]])
|
82
82
|
assert_equal(mv,@a.to_matrix(:vertical))
|
83
83
|
end
|
@@ -89,7 +89,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
89
89
|
end
|
90
90
|
end
|
91
91
|
context "when initializing" do
|
92
|
-
setup do
|
92
|
+
setup do
|
93
93
|
@data=(10.times.map{rand(100)})+[nil]
|
94
94
|
@original=Statsample::Vector.new(@data, :scale)
|
95
95
|
end
|
@@ -103,9 +103,9 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
103
103
|
assert_equal(reference, Statsample::Vector[0,4..6,10])
|
104
104
|
assert_equal(reference, Statsample::Vector[[0],[4,5,6],[10]])
|
105
105
|
assert_equal(reference, Statsample::Vector[[0],[4,[5,[6]]],[10]])
|
106
|
-
|
106
|
+
|
107
107
|
assert_equal(reference, Statsample::Vector[[0],[4,5,6].to_vector,[10]])
|
108
|
-
|
108
|
+
|
109
109
|
end
|
110
110
|
should "be the same usign #to_vector" do
|
111
111
|
lazy1=@data.to_vector(:scale)
|
@@ -121,7 +121,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
121
121
|
v1=10.times.map {nil}.to_scale
|
122
122
|
v2=Statsample::Vector.new_scale(10)
|
123
123
|
assert_equal(v1,v2)
|
124
|
-
|
124
|
+
|
125
125
|
end
|
126
126
|
should "could use new_scale with size and value" do
|
127
127
|
a=rand
|
@@ -134,11 +134,11 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
134
134
|
v2=Statsample::Vector.new_scale(10) {|i| i*2}
|
135
135
|
assert_equal(v1,v2)
|
136
136
|
end
|
137
|
-
|
137
|
+
|
138
138
|
end
|
139
|
-
|
139
|
+
|
140
140
|
context "#split_by_separator" do
|
141
|
-
|
141
|
+
|
142
142
|
setup do
|
143
143
|
@a = Statsample::Vector.new(["a","a,b","c,d","a,d",10,nil],:nominal)
|
144
144
|
@b=@a.split_by_separator(",")
|
@@ -150,14 +150,14 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
150
150
|
expected=['a','b','c','d',10]
|
151
151
|
assert_equal(expected, @b.keys)
|
152
152
|
end
|
153
|
-
|
153
|
+
|
154
154
|
should "returns a Hash, which values are Statsample::Vector" do
|
155
155
|
@b.each_key {|k| assert_instance_of(Statsample::Vector, @b[k])}
|
156
156
|
end
|
157
157
|
should "hash values are n times the tokens appears" do
|
158
158
|
assert_counting_tokens(@b)
|
159
159
|
end
|
160
|
-
should "#split_by_separator_freq returns the number of ocurrences of tokens" do
|
160
|
+
should "#split_by_separator_freq returns the number of ocurrences of tokens" do
|
161
161
|
assert_equal({'a'=>3,'b'=>1,'c'=>1,'d'=>2,10=>1}, @a.split_by_separator_freq())
|
162
162
|
end
|
163
163
|
should "using a different separator give the same values" do
|
@@ -177,7 +177,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
177
177
|
3.times do |i|
|
178
178
|
assert_in_delta(i*4.5, hist.get_range(i)[0], 1e-9)
|
179
179
|
end
|
180
|
-
|
180
|
+
|
181
181
|
end
|
182
182
|
should "have a name" do
|
183
183
|
@c.name=="Test Vector"
|
@@ -190,17 +190,17 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
190
190
|
next_number=$1.to_i+1
|
191
191
|
assert_equal("Vector #{next_number}",b.name)
|
192
192
|
end
|
193
|
-
should "save to a file and load the same Vector" do
|
193
|
+
should "save to a file and load the same Vector" do
|
194
194
|
outfile=Tempfile.new("vector.vec")
|
195
195
|
@c.save(outfile.path)
|
196
196
|
a=Statsample.load(outfile.path)
|
197
|
-
assert_equal(@c,a)
|
197
|
+
assert_equal(@c,a)
|
198
198
|
end
|
199
199
|
should "#collect returns an array" do
|
200
200
|
val=@c.collect {|v| v}
|
201
201
|
assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
|
202
202
|
end
|
203
|
-
|
203
|
+
|
204
204
|
should "#recode returns a recoded array" do
|
205
205
|
a=@c.recode{|v| @c.is_valid?(v) ? 0 : 1 }
|
206
206
|
exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
|
@@ -213,8 +213,8 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
213
213
|
a=[1,2,3,4,5].to_vector(:scale)
|
214
214
|
assert_equal(120,a.product)
|
215
215
|
end
|
216
|
-
|
217
|
-
should "missing values" do
|
216
|
+
|
217
|
+
should "missing values" do
|
218
218
|
@c.missing_values=[10]
|
219
219
|
assert_equal([-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9], @c.valid_data.sort)
|
220
220
|
assert_equal([5,5,5,5,5,6,6,7,8,9,nil,1,2,3,4,nil,-99,-99], @c.data_with_nils)
|
@@ -224,13 +224,13 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
224
224
|
@c.missing_values=[]
|
225
225
|
assert_equal(@c.valid_data.sort,[-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9,10])
|
226
226
|
assert_equal(@c.data_with_nils,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
|
227
|
-
|
227
|
+
|
228
228
|
end
|
229
|
-
should "correct has_missing_data? with missing data" do
|
229
|
+
should "correct has_missing_data? with missing data" do
|
230
230
|
a=[1,2,3,nil].to_vector
|
231
231
|
assert(a.has_missing_data?)
|
232
232
|
end
|
233
|
-
should "correct has_missing_data? without missing data" do
|
233
|
+
should "correct has_missing_data? without missing data" do
|
234
234
|
a=[1,2,3,4,10].to_vector
|
235
235
|
assert(!a.has_missing_data?)
|
236
236
|
end
|
@@ -238,12 +238,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
238
238
|
a=[1,2,3,4,10].to_vector
|
239
239
|
a.missing_values=[10]
|
240
240
|
assert(a.has_missing_data?)
|
241
|
-
end
|
242
|
-
should "label correctly fields" do
|
241
|
+
end
|
242
|
+
should "label correctly fields" do
|
243
243
|
@c.labels={5=>'FIVE'}
|
244
244
|
assert_equal(["FIVE","FIVE","FIVE","FIVE","FIVE",6,6,7,8,9,10,1,2,3,4,nil,-99, -99],@c.vector_labeled.to_a)
|
245
245
|
end
|
246
|
-
should "verify" do
|
246
|
+
should "verify" do
|
247
247
|
h=@c.verify{|d| !d.nil? and d>0}
|
248
248
|
e={15=>nil,16=>-99,17=>-99}
|
249
249
|
assert_equal(e,h)
|
@@ -251,7 +251,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
251
251
|
should "have a summary with name on it" do
|
252
252
|
assert_match(/#{@c.name}/, @c.summary)
|
253
253
|
end
|
254
|
-
|
254
|
+
|
255
255
|
should "GSL::Vector based should push correcty" do
|
256
256
|
if Statsample.has_gsl?
|
257
257
|
v=GSL::Vector[1,2,3,4,5].to_scale
|
@@ -263,10 +263,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
263
263
|
end
|
264
264
|
end
|
265
265
|
|
266
|
-
|
266
|
+
|
267
267
|
should "split correctly" do
|
268
268
|
a = Statsample::Vector.new(["a","a,b","c,d","a,d","d",10,nil],:nominal)
|
269
|
-
assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
|
269
|
+
assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
|
270
270
|
end
|
271
271
|
should "multiply correct for scalar" do
|
272
272
|
a = [1,2,3].to_scale
|
@@ -275,20 +275,20 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
275
275
|
should "multiply correct with other vector" do
|
276
276
|
a = [1,2,3].to_scale
|
277
277
|
b = [2,4,6].to_scale
|
278
|
-
|
278
|
+
|
279
279
|
assert_equal([2,8,18].to_scale, a*b)
|
280
280
|
end
|
281
281
|
should "sum correct for scalar" do
|
282
282
|
a = [1,2,3].to_scale
|
283
283
|
assert_equal([11,12,13].to_scale, a+10)
|
284
284
|
end
|
285
|
-
|
285
|
+
|
286
286
|
should "raise NoMethodError when method requires ordinal and vector is nominal" do
|
287
287
|
@c.type=:nominal
|
288
288
|
assert_raise(::NoMethodError) { @c.median }
|
289
289
|
end
|
290
|
-
|
291
|
-
should "raise NoMethodError when method requires scalar and vector is ordinal" do
|
290
|
+
|
291
|
+
should "raise NoMethodError when method requires scalar and vector is ordinal" do
|
292
292
|
@c.type=:ordinal
|
293
293
|
assert_raise(::NoMethodError) { @c.mean }
|
294
294
|
end
|
@@ -326,7 +326,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
326
326
|
assert_in_delta(se, ds[:mean].sd, 0.02)
|
327
327
|
end
|
328
328
|
|
329
|
-
|
329
|
+
|
330
330
|
end
|
331
331
|
|
332
332
|
|
@@ -349,9 +349,12 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
349
349
|
v1=[1,2,3].to_vector(:nominal)
|
350
350
|
v2=[1,2,3].to_vector(:ordinal)
|
351
351
|
assert_not_equal(v1,v2)
|
352
|
+
v2=[1,2,3]
|
353
|
+
assert_not_equal(v1,v2)
|
352
354
|
v1=[1,2,3].to_vector()
|
353
355
|
v2=[1,2,3].to_vector()
|
354
356
|
assert_equal(v1,v2)
|
357
|
+
assert_equal(false, v1 == Object.new)
|
355
358
|
end
|
356
359
|
def test_vector_percentil
|
357
360
|
a=[1,2,2,3,4,5,5,5,6,10].to_scale
|
@@ -360,8 +363,6 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
360
363
|
a=[1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10].to_scale
|
361
364
|
expected=[10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100].to_scale
|
362
365
|
assert_equal(expected, a.vector_percentil)
|
363
|
-
|
364
|
-
|
365
366
|
end
|
366
367
|
def test_ordinal
|
367
368
|
@c.type=:ordinal
|
@@ -377,6 +378,21 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
377
378
|
assert_equal(9.5, a.percentil(75))
|
378
379
|
assert_equal(3.0, a.percentil(10))
|
379
380
|
end
|
381
|
+
def test_linear_percentil_strategy
|
382
|
+
values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_scale
|
383
|
+
assert_equal 102, values.percentil(0, :linear)
|
384
|
+
assert_equal 104.75, values.percentil(25, :linear)
|
385
|
+
assert_equal 108.5, values.percentil(50, :linear)
|
386
|
+
assert_equal 112.75, values.percentil(75, :linear)
|
387
|
+
assert_equal 116, values.percentil(100, :linear)
|
388
|
+
|
389
|
+
values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle.to_scale
|
390
|
+
assert_equal 102, values.percentil(0, :linear)
|
391
|
+
assert_equal 105, values.percentil(25, :linear)
|
392
|
+
assert_equal 109, values.percentil(50, :linear)
|
393
|
+
assert_equal 115, values.percentil(75, :linear)
|
394
|
+
assert_equal 118, values.percentil(100, :linear)
|
395
|
+
end
|
380
396
|
def test_ranked
|
381
397
|
v1=[0.8,1.2,1.2,2.3,18].to_vector(:ordinal)
|
382
398
|
expected=[1,2.5,2.5,4,5].to_vector(:ordinal)
|
@@ -415,7 +431,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
415
431
|
assert_equal(0,vs.mean)
|
416
432
|
assert_equal(1,vs.sds)
|
417
433
|
end
|
418
|
-
|
434
|
+
|
419
435
|
def test_vector_standarized_with_zero_variance
|
420
436
|
v1=100.times.map {|i| 1}.to_scale
|
421
437
|
exp=100.times.map {nil}.to_scale
|
@@ -428,14 +444,14 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
428
444
|
assert_raise(NoMethodError) { v.check_type(:scale)}
|
429
445
|
assert_raise(NoMethodError) { v.check_type(:ordinal)}
|
430
446
|
assert(v.check_type(:nominal).nil?)
|
431
|
-
|
447
|
+
|
432
448
|
v.type=:ordinal
|
433
|
-
|
449
|
+
|
434
450
|
assert_raise(NoMethodError) { v.check_type(:scale)}
|
435
|
-
|
451
|
+
|
436
452
|
assert(v.check_type(:ordinal).nil?)
|
437
453
|
assert(v.check_type(:nominal).nil?)
|
438
|
-
|
454
|
+
|
439
455
|
|
440
456
|
v.type=:scale
|
441
457
|
assert(v.check_type(:scale).nil?)
|
@@ -446,10 +462,8 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
446
462
|
assert_raise(NoMethodError) { v.check_type(:scale)}
|
447
463
|
assert_raise(NoMethodError) { v.check_type(:ordinal)}
|
448
464
|
assert_raise(NoMethodError) { v.check_type(:nominal)}
|
449
|
-
|
450
465
|
end
|
451
|
-
|
452
|
-
|
466
|
+
|
453
467
|
def test_add
|
454
468
|
a=Statsample::Vector.new([1,2,3,4,5], :scale)
|
455
469
|
b=Statsample::Vector.new([11,12,13,14,15], :scale)
|
@@ -527,7 +541,7 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
527
541
|
def test_gsl
|
528
542
|
if Statsample.has_gsl?
|
529
543
|
a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)
|
530
|
-
|
544
|
+
|
531
545
|
assert_equal(2,a.mean)
|
532
546
|
assert_equal(a.variance_sample_ruby,a.variance_sample)
|
533
547
|
assert_equal(a.standard_deviation_sample_ruby,a.sds)
|