statsample 1.5.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
data/test/test_crosstab.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleCrosstabTestCase < Minitest::Test
|
3
3
|
def initialize(*args)
|
4
|
-
@v1 = %w(black blonde black black red black brown black blonde black red black blonde)
|
5
|
-
@v2 = %w(woman man man woman man man man woman man woman woman man man)
|
4
|
+
@v1 =Daru::Vector.new( %w(black blonde black black red black brown black blonde black red black blonde))
|
5
|
+
@v2 =Daru::Vector.new( %w(woman man man woman man man man woman man woman woman man man))
|
6
6
|
@ct = Statsample::Crosstab.new(@v1, @v2)
|
7
7
|
super
|
8
8
|
end
|
@@ -12,7 +12,7 @@ class StatsampleCrosstabTestCase < Minitest::Test
|
|
12
12
|
assert_raise ArgumentError do
|
13
13
|
Statsample::Crosstab.new(e1, @v2)
|
14
14
|
end
|
15
|
-
e2 = %w(black blonde black black red black brown black blonde black black)
|
15
|
+
e2 = Daru::Vector.new(%w(black blonde black black red black brown black blonde black black))
|
16
16
|
|
17
17
|
assert_raise ArgumentError do
|
18
18
|
Statsample::Crosstab.new(e2, @v2)
|
@@ -23,8 +23,8 @@ class StatsampleCrosstabTestCase < Minitest::Test
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def test_crosstab_basic
|
26
|
-
assert_equal(%w(black blonde brown red), @ct.rows_names)
|
27
|
-
assert_equal(%w(man woman), @ct.cols_names)
|
26
|
+
assert_equal(Daru::Vector.new(%w(black blonde brown red)), @ct.rows_names)
|
27
|
+
assert_equal(Daru::Vector.new(%w(man woman)), @ct.cols_names)
|
28
28
|
assert_equal({ 'black' => 7, 'blonde' => 3, 'red' => 2, 'brown' => 1 }, @ct.rows_total)
|
29
29
|
assert_equal({ 'man' => 8, 'woman' => 5 }, @ct.cols_total)
|
30
30
|
end
|
@@ -51,15 +51,15 @@ class StatsampleCrosstabTestCase < Minitest::Test
|
|
51
51
|
end
|
52
52
|
|
53
53
|
def test_expected
|
54
|
-
v1 = %w(1 1 1 1 1 0 0 0 0 0)
|
55
|
-
v2 = %w(0 0 0 0 0 1 1 1 1 1)
|
54
|
+
v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
|
55
|
+
v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
|
56
56
|
ct = Statsample::Crosstab.new(v1, v2)
|
57
57
|
assert_equal(Matrix[[2.5, 2.5], [2.5, 2.5]], ct.matrix_expected)
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_crosstab_with_scale
|
61
|
-
v1 = %w(1 1 1 1 1 0 0 0 0 0)
|
62
|
-
v2 = %w(0 0 0 0 0 1 1 1 1 1)
|
61
|
+
v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
|
62
|
+
v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
|
63
63
|
ct = Statsample::Crosstab.new(v1, v2)
|
64
64
|
assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
|
65
65
|
assert_nothing_raised { ct.summary }
|
data/test/test_dataset.rb
CHANGED
@@ -1,479 +1,49 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleDatasetTestCase < Minitest::Test
|
3
3
|
def setup
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
'c' => %w(f g h i j k).to_vector
|
14
|
-
}.to_dataset
|
15
|
-
nest = ds.nest('a', 'b')
|
16
|
-
assert_equal([{ 'c' => 'f' }, { 'c' => 'g' }], nest['a']['c'])
|
17
|
-
assert_equal([{ 'c' => 'h' }], nest['a']['d'])
|
18
|
-
assert_equal([{ 'c' => 'j' }, { 'c' => 'k' }], nest['b']['e'])
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_should_have_summary
|
22
|
-
assert(@ds.summary.size > 0)
|
4
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
5
|
+
@ds = Statsample::Dataset.new({
|
6
|
+
'id' => Statsample::Vector.new([1, 2, 3, 4, 5]),
|
7
|
+
'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)),
|
8
|
+
'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
|
9
|
+
'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
10
|
+
'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
|
11
|
+
%w(id name age city a1))
|
12
|
+
end
|
23
13
|
end
|
24
14
|
|
25
15
|
def test_basic
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_saveload
|
31
|
-
outfile = Tempfile.new('dataset.ds')
|
32
|
-
@ds.save(outfile.path)
|
33
|
-
a = Statsample.load(outfile.path)
|
34
|
-
assert_equal(@ds, a)
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_gsl
|
38
|
-
if Statsample.has_gsl?
|
39
|
-
matrix = GSL::Matrix[[1, 2], [3, 4], [5, 6]]
|
40
|
-
ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
|
41
|
-
assert_equal(matrix, ds.to_gsl)
|
42
|
-
else
|
43
|
-
skip('Gsl needed')
|
16
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#nrows.\n") do
|
17
|
+
assert_equal(5, @ds.cases)
|
44
18
|
end
|
45
|
-
end
|
46
19
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
assert_equal(matrix, ds.to_matrix)
|
20
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
|
21
|
+
assert_equal([:id, :name, :age, :city, :a1], @ds.fields)
|
22
|
+
end
|
51
23
|
end
|
52
24
|
|
53
25
|
def test_fields
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
assert_equal(%w(id name age a1 city), @ds.fields)
|
58
|
-
end
|
59
|
-
|
60
|
-
def test_merge
|
61
|
-
a = [1, 2, 3].to_numeric
|
62
|
-
b = [3, 4, 5].to_vector
|
63
|
-
c = [4, 5, 6].to_numeric
|
64
|
-
d = [7, 8, 9].to_vector
|
65
|
-
e = [10, 20, 30].to_vector
|
66
|
-
ds1 = { 'a' => a, 'b' => b }.to_dataset
|
67
|
-
ds2 = { 'c' => c, 'd' => d }.to_dataset
|
68
|
-
exp = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
|
69
|
-
|
70
|
-
assert_equal(exp, ds1.merge(ds2))
|
71
|
-
exp.fields = %w(c d a b)
|
72
|
-
assert_equal(exp, ds2.merge(ds1))
|
73
|
-
ds3 = { 'a' => e }.to_dataset
|
74
|
-
exp = { 'a_1' => a, 'b' => b, 'a_2' => e }.to_dataset
|
75
|
-
exp.fields = %w(a_1 b a_2)
|
76
|
-
assert_equal(exp, ds1.merge(ds3))
|
77
|
-
end
|
78
|
-
|
79
|
-
def test_each_vector
|
80
|
-
a = [1, 2, 3].to_vector
|
81
|
-
b = [3, 4, 5].to_vector
|
82
|
-
fields = %w(a b)
|
83
|
-
ds = Statsample::Dataset.new({ 'a' => a, 'b' => b }, fields)
|
84
|
-
res = []
|
85
|
-
ds.each_vector{|k, v|
|
86
|
-
res.push([k, v])
|
87
|
-
}
|
88
|
-
assert_equal([['a', a], ['b', b]], res)
|
89
|
-
ds.fields = %w(b a)
|
90
|
-
res = []
|
91
|
-
ds.each_vector{|k, v|
|
92
|
-
res.push([k, v])
|
93
|
-
}
|
94
|
-
assert_equal([['b', b], ['a', a]], res)
|
95
|
-
end
|
96
|
-
|
97
|
-
def test_equality
|
98
|
-
v1 = [1, 2, 3, 4].to_vector
|
99
|
-
v2 = [5, 6, 7, 8].to_vector
|
100
|
-
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
|
101
|
-
v3 = [1, 2, 3, 4].to_vector
|
102
|
-
v4 = [5, 6, 7, 8].to_vector
|
103
|
-
ds2 = Statsample::Dataset.new({ 'v1' => v3, 'v2' => v4 }, %w(v2 v1))
|
104
|
-
assert_equal(ds1, ds2)
|
105
|
-
ds2.fields = %w(v1 v2)
|
106
|
-
assert_not_equal(ds1, ds2)
|
107
|
-
end
|
108
|
-
|
109
|
-
def test_add_vector
|
110
|
-
v = Statsample::Vector.new(%w(a b c d e))
|
111
|
-
@ds.add_vector('new', v)
|
112
|
-
assert_equal(%w(id name age city a1 new), @ds.fields)
|
113
|
-
x = Statsample::Vector.new(%w(a b c d e f g))
|
114
|
-
assert_raise ArgumentError do
|
115
|
-
@ds.add_vector('new2', x)
|
26
|
+
assert_output(nil, "WARNING: Deprecated. Use Daru::DataFrame#reindex_vectors! instead.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
|
27
|
+
@ds.fields = %w(name a1 id age city)
|
28
|
+
assert_equal([:name, :a1, :id, :age, :city], @ds.fields)
|
116
29
|
end
|
117
|
-
end
|
118
|
-
|
119
|
-
def test_vector_by_calculation
|
120
|
-
a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:numeric)
|
121
|
-
a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:numeric)
|
122
|
-
a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:numeric)
|
123
|
-
ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
|
124
|
-
total = ds.vector_by_calculation {|row|
|
125
|
-
row['a1'] + row['a2'] + row['a3']
|
126
|
-
}
|
127
|
-
expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:numeric)
|
128
|
-
assert_equal(expected, total)
|
129
|
-
end
|
130
|
-
|
131
|
-
def test_vector_sum
|
132
|
-
a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
|
133
|
-
a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
|
134
|
-
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
|
135
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
136
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
|
137
|
-
total = ds.vector_sum
|
138
|
-
a = ds.vector_sum(%w(a1 a2))
|
139
|
-
b = ds.vector_sum(%w(b1 b2))
|
140
|
-
expected_a = [11, 12, 23, 24, 25, nil].to_vector(:numeric)
|
141
|
-
expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:numeric)
|
142
|
-
expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:numeric)
|
143
|
-
assert_equal(expected_a, a)
|
144
|
-
assert_equal(expected_b, b)
|
145
|
-
assert_equal(expected_total, total)
|
146
|
-
end
|
147
|
-
|
148
|
-
def test_vector_missing_values
|
149
|
-
a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
|
150
|
-
a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
|
151
|
-
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
|
152
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
153
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
154
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
155
|
-
mva = [2, 3, 0, 1, 0, 1].to_vector(:numeric)
|
156
|
-
assert_equal(mva, ds.vector_missing_values)
|
157
|
-
end
|
158
|
-
|
159
|
-
def test_has_missing_values
|
160
|
-
a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
|
161
|
-
a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
|
162
|
-
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
|
163
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
164
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
165
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
166
|
-
assert(ds.has_missing_data?)
|
167
|
-
clean = ds.dup_only_valid
|
168
|
-
assert(!clean.has_missing_data?)
|
169
|
-
end
|
170
|
-
|
171
|
-
def test_vector_count_characters
|
172
|
-
a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:numeric)
|
173
|
-
a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:numeric)
|
174
|
-
b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:numeric)
|
175
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
176
|
-
c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:numeric)
|
177
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
178
|
-
exp = [4, 17, 27, 5, 6, 5].to_vector(:numeric)
|
179
|
-
assert_equal(exp, ds.vector_count_characters)
|
180
|
-
end
|
181
|
-
|
182
|
-
def test_vector_mean
|
183
|
-
a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
|
184
|
-
a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
|
185
|
-
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
|
186
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
187
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
188
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
189
|
-
total = ds.vector_mean
|
190
|
-
a = ds.vector_mean(%w(a1 a2), 1)
|
191
|
-
b = ds.vector_mean(%w(b1 b2), 1)
|
192
|
-
c = ds.vector_mean(%w(b1 b2 c), 1)
|
193
|
-
expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:numeric)
|
194
|
-
expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:numeric)
|
195
|
-
expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:numeric)
|
196
|
-
expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:numeric)
|
197
|
-
assert_equal(expected_a, a)
|
198
|
-
assert_equal(expected_b, b)
|
199
|
-
assert_equal(expected_c, c)
|
200
|
-
assert_equal(expected_total, total)
|
201
|
-
end
|
202
|
-
|
203
|
-
def test_each_array
|
204
|
-
expected = [[1, 'Alex', 20, 'New York', 'a,b'], [2, 'Claude', 23, 'London', 'b,c'], [3, 'Peter', 25, 'London', 'a'], [4, 'Franz', 27, 'Paris', nil], [5, 'George', 5, 'Tome', 'a,b,c']]
|
205
|
-
out = []
|
206
|
-
@ds.each_array{ |a|
|
207
|
-
out.push(a)
|
208
|
-
}
|
209
|
-
assert_equal(expected, out)
|
210
|
-
end
|
211
30
|
|
212
|
-
|
213
|
-
|
214
|
-
@ds.recode!('age') { |c| c['id'] * 2 }
|
215
|
-
expected = [2, 4, 6, 8, 10].to_vector(:numeric)
|
216
|
-
assert_equal(expected, @ds['age'])
|
217
|
-
end
|
218
|
-
|
219
|
-
def test_case_as
|
220
|
-
assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds.case_as_hash(0))
|
221
|
-
assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds.case_as_array(4))
|
222
|
-
# Native methods
|
223
|
-
assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds._case_as_hash(0))
|
224
|
-
assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds._case_as_array(4))
|
225
|
-
end
|
226
|
-
|
227
|
-
def test_delete_vector
|
228
|
-
@ds.delete_vector('name')
|
229
|
-
assert_equal(%w(id age city a1), @ds.fields)
|
230
|
-
assert_equal(%w(a1 age city id), @ds.vectors.keys.sort)
|
231
|
-
end
|
232
|
-
|
233
|
-
def test_change_type
|
234
|
-
@ds.col('age').type = :numeric
|
235
|
-
assert_equal(:numeric, @ds.col('age').type)
|
236
|
-
end
|
237
|
-
|
238
|
-
def test_split_by_separator_recode
|
239
|
-
@ds.add_vectors_by_split_recode('a1', '_')
|
240
|
-
assert_equal(%w(id name age city a1 a1_1 a1_2 a1_3), @ds.fields)
|
241
|
-
assert_equal([1, 0, 1, nil, 1], @ds.col('a1_1').to_a)
|
242
|
-
assert_equal([1, 1, 0, nil, 1], @ds.col('a1_2').to_a)
|
243
|
-
assert_equal([0, 1, 0, nil, 1], @ds.col('a1_3').to_a)
|
244
|
-
{ 'a1_1' => 'a1:a', 'a1_2' => 'a1:b', 'a1_3' => 'a1:c' }.each do |k, v|
|
245
|
-
assert_equal(v, @ds[k].name)
|
31
|
+
assert_raise ArgumentError, "Assigning less fields than vectors is no longer supported" do
|
32
|
+
@ds.fields = %w(id name age)
|
246
33
|
end
|
247
34
|
end
|
248
35
|
|
249
|
-
def
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
assert_equal([1, 1, 0, nil, 1], @ds.col('a1_b').to_a)
|
254
|
-
assert_equal([0, 1, 0, nil, 1], @ds.col('a1_c').to_a)
|
255
|
-
end
|
256
|
-
|
257
|
-
def test_percentiles
|
258
|
-
v1 = (1..100).to_a.to_numeric
|
259
|
-
assert_equal(50.5, v1.median)
|
260
|
-
assert_equal(25.5, v1.percentil(25))
|
261
|
-
v2 = (1..99).to_a.to_numeric
|
262
|
-
assert_equal(50, v2.median)
|
263
|
-
assert_equal(25, v2.percentil(25))
|
264
|
-
v3 = (1..50).to_a.to_numeric
|
265
|
-
assert_equal(25.5, v3.median)
|
266
|
-
assert_equal(13, v3.percentil(25))
|
267
|
-
end
|
268
|
-
|
269
|
-
def test_add_case
|
270
|
-
ds = Statsample::Dataset.new('a' => [].to_vector, 'b' => [].to_vector, 'c' => [].to_vector)
|
271
|
-
ds.add_case([1, 2, 3])
|
272
|
-
ds.add_case('a' => 4, 'b' => 5, 'c' => 6)
|
273
|
-
ds.add_case([[7, 8, 9], %w(a b c)])
|
274
|
-
assert_equal({ 'a' => 1, 'b' => 2, 'c' => 3 }, ds.case_as_hash(0))
|
275
|
-
assert_equal([4, 5, 6], ds.case_as_array(1))
|
276
|
-
assert_equal([7, 8, 9], ds.case_as_array(2))
|
277
|
-
assert_equal(%w(a b c), ds.case_as_array(3))
|
278
|
-
ds.add_case_array([6, 7, 1])
|
279
|
-
ds.update_valid_data
|
280
|
-
assert_equal([6, 7, 1], ds.case_as_array(4))
|
281
|
-
end
|
282
|
-
|
283
|
-
def test_marshaling
|
284
|
-
ds_marshal = Marshal.load(Marshal.dump(@ds))
|
285
|
-
assert_equal(ds_marshal, @ds)
|
286
|
-
end
|
287
|
-
|
288
|
-
def test_range
|
289
|
-
v1 = [1, 2, 3, 4].to_vector
|
290
|
-
v2 = [5, 6, 7, 8].to_vector
|
291
|
-
v3 = [9, 10, 11, 12].to_vector
|
292
|
-
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2, 'v3' => v3 }, %w(v3 v2 v1))
|
293
|
-
assert_same(v1, ds1['v1'])
|
294
|
-
ds2 = ds1['v2'..'v1']
|
295
|
-
assert_equal(%w(v2 v1), ds2.fields)
|
296
|
-
assert_same(ds1['v1'], ds2['v1'])
|
297
|
-
assert_same(ds1['v2'], ds2['v2'])
|
298
|
-
end
|
299
|
-
|
300
|
-
def test_clone
|
301
|
-
v1 = [1, 2, 3, 4].to_vector
|
302
|
-
v2 = [5, 6, 7, 8].to_vector
|
303
|
-
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
|
304
|
-
ds2 = ds1.clone
|
305
|
-
assert_equal(ds1, ds2)
|
306
|
-
assert_not_same(ds1, ds2)
|
307
|
-
assert_equal(ds1['v1'], ds2['v1'])
|
308
|
-
assert_same(ds1['v1'], ds2['v1'])
|
309
|
-
assert_equal(ds1.fields, ds2.fields)
|
310
|
-
assert_not_same(ds1.fields, ds2.fields)
|
311
|
-
assert_equal(ds1.cases, ds2.cases)
|
312
|
-
|
313
|
-
# partial clone
|
314
|
-
ds3 = ds1.clone('v1')
|
315
|
-
ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
|
316
|
-
assert_equal(ds_exp, ds3)
|
317
|
-
assert_not_same(ds_exp, ds3)
|
318
|
-
assert_equal(ds3['v1'], ds_exp['v1'])
|
319
|
-
assert_same(ds3['v1'], ds_exp['v1'])
|
320
|
-
assert_equal(ds3.fields, ds_exp.fields)
|
321
|
-
assert_equal(ds3.cases, ds_exp.cases)
|
322
|
-
|
323
|
-
assert_not_same(ds3.fields, ds_exp.fields)
|
324
|
-
end
|
325
|
-
|
326
|
-
def test_dup
|
327
|
-
v1 = [1, 2, 3, 4].to_vector
|
328
|
-
v2 = [5, 6, 7, 8].to_vector
|
329
|
-
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
|
330
|
-
ds2 = ds1.dup
|
331
|
-
assert_equal(ds1, ds2)
|
332
|
-
assert_not_same(ds1, ds2)
|
333
|
-
assert_equal(ds1['v1'], ds2['v1'])
|
334
|
-
assert_not_same(ds1['v1'], ds2['v1'])
|
335
|
-
assert_equal(ds1.cases, ds2.cases)
|
336
|
-
|
337
|
-
assert_equal(ds1.fields, ds2.fields)
|
338
|
-
assert_not_same(ds1.fields, ds2.fields)
|
339
|
-
ds1['v1'].type = :numeric
|
340
|
-
# dup partial
|
341
|
-
ds3 = ds1.dup('v1')
|
342
|
-
ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
|
343
|
-
assert_equal(ds_exp, ds3)
|
344
|
-
assert_not_same(ds_exp, ds3)
|
345
|
-
assert_equal(ds3['v1'], ds_exp['v1'])
|
346
|
-
assert_not_same(ds3['v1'], ds_exp['v1'])
|
347
|
-
assert_equal(ds3.fields, ds_exp.fields)
|
348
|
-
assert_equal(ds3.cases, ds_exp.cases)
|
349
|
-
|
350
|
-
assert_not_same(ds3.fields, ds_exp.fields)
|
351
|
-
|
352
|
-
# empty
|
353
|
-
ds3 = ds1.dup_empty
|
354
|
-
assert_not_equal(ds1, ds3)
|
355
|
-
assert_not_equal(ds1['v1'], ds3['v1'])
|
356
|
-
assert_equal([], ds3['v1'].data)
|
357
|
-
assert_equal([], ds3['v2'].data)
|
358
|
-
assert_equal(:numeric, ds3['v1'].type)
|
359
|
-
assert_equal(ds1.fields, ds2.fields)
|
360
|
-
assert_not_same(ds1.fields, ds2.fields)
|
361
|
-
end
|
36
|
+
def test_crosstab_with_asignation
|
37
|
+
v1 = Daru::Vector.new(%w(a a a b b b c c c))
|
38
|
+
v2 = Daru::Vector.new(%w(a b c a b c a b c))
|
39
|
+
v3 = Daru::Vector.new(%w(0 1 0 0 1 1 0 0 1))
|
362
40
|
|
363
|
-
|
364
|
-
|
365
|
-
assert_raise ArgumentError do
|
366
|
-
@ds.from_to('name', 'a2')
|
41
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
42
|
+
@ds = Statsample::Dataset.crosstab_by_assignation(v1, v2, v3)
|
367
43
|
end
|
368
|
-
end
|
369
|
-
|
370
|
-
def test_each_array_with_nils
|
371
|
-
v1 = [1, -99, 3, 4, 'na'].to_vector(:numeric, missing_values: [-99, 'na'])
|
372
|
-
v2 = [5, 6, -99, 8, 20].to_vector(:numeric, missing_values: [-99])
|
373
|
-
v3 = [9, 10, 11, 12, 20].to_vector(:numeric, missing_values: [-99])
|
374
|
-
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
375
|
-
ds2 = ds1.dup_empty
|
376
|
-
ds1.each_array_with_nils {|row|
|
377
|
-
ds2.add_case_array(row)
|
378
|
-
}
|
379
|
-
ds2.update_valid_data
|
380
|
-
assert_equal([1, nil, 3, 4, nil], ds2['v1'].data)
|
381
|
-
assert_equal([5, 6, nil, 8, 20], ds2['v2'].data)
|
382
|
-
end
|
383
44
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
v3 = [9, 10, 11, 12].to_vector(:numeric)
|
388
|
-
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
389
|
-
ds2 = ds1.dup_only_valid
|
390
|
-
expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:numeric), 'v2' => [5, 8].to_vector(:numeric), 'v3' => [9, 12].to_vector(:numeric))
|
391
|
-
assert_equal(expected, ds2)
|
392
|
-
assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
|
393
|
-
expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:numeric), 'v3' => [9, 11, 12].to_vector(:numeric))
|
394
|
-
assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
|
395
|
-
end
|
396
|
-
|
397
|
-
def test_filter
|
398
|
-
@ds['age'].type = :numeric
|
399
|
-
filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
|
400
|
-
expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :numeric),
|
401
|
-
'city' => Statsample::Vector.new(%w(London Paris)),
|
402
|
-
'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
|
403
|
-
assert_equal(expected, filtered)
|
404
|
-
end
|
405
|
-
|
406
|
-
def test_filter_field
|
407
|
-
@ds['age'].type = :numeric
|
408
|
-
filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
|
409
|
-
expected = [2, 4].to_vector
|
410
|
-
assert_equal(expected, filtered)
|
411
|
-
end
|
412
|
-
|
413
|
-
def test_verify
|
414
|
-
name = %w(r1 r2 r3 r4).to_vector(:object)
|
415
|
-
v1 = [1, 2, 3, 4].to_vector(:numeric)
|
416
|
-
v2 = [4, 3, 2, 1].to_vector(:numeric)
|
417
|
-
v3 = [10, 20, 30, 40].to_vector(:numeric)
|
418
|
-
v4 = %w(a b a b).to_vector(:object)
|
419
|
-
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
|
420
|
-
ds.fields = %w(v1 v2 v3 v4 id)
|
421
|
-
# Correct
|
422
|
-
t1 = create_test('If v4=a, v1 odd') { |r| r['v4'] == 'b' or (r['v4'] == 'a' and r['v1'].odd?) }
|
423
|
-
t2 = create_test('v3=v1*10') { |r| r['v3'] == r['v1'] * 10 }
|
424
|
-
# Fail!
|
425
|
-
t3 = create_test("v4='b'") { |r| r['v4'] == 'b' }
|
426
|
-
exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
427
|
-
exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
428
|
-
res = ds.verify(t3, t1, t2)
|
429
|
-
assert_equal(exp1, res)
|
430
|
-
res = ds.verify('id', t1, t2, t3)
|
431
|
-
assert_equal(exp2, res)
|
432
|
-
end
|
433
|
-
|
434
|
-
def test_compute_operation
|
435
|
-
v1 = [1, 2, 3, 4].to_vector(:numeric)
|
436
|
-
v2 = [4, 3, 2, 1].to_vector(:numeric)
|
437
|
-
v3 = [10, 20, 30, 40].to_vector(:numeric)
|
438
|
-
vnumeric = [1.quo(2), 1, 3.quo(2), 2].to_vector(:numeric)
|
439
|
-
vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:numeric)
|
440
|
-
vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:numeric)
|
441
|
-
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
|
442
|
-
assert_equal(vnumeric, ds.compute('v1/2'))
|
443
|
-
assert_equal(vsum, ds.compute('v1+v2+v3'))
|
444
|
-
assert_equal(vmult, ds.compute('v1*v2'))
|
445
|
-
end
|
446
|
-
|
447
|
-
def test_crosstab_with_asignation
|
448
|
-
v1 = %w(a a a b b b c c c).to_vector
|
449
|
-
v2 = %w(a b c a b c a b c).to_vector
|
450
|
-
v3 = %w(0 1 0 0 1 1 0 0 1).to_numeric
|
451
|
-
ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
|
452
|
-
assert_equal(:object, ds['_id'].type)
|
453
|
-
assert_equal(:numeric, ds['a'].type)
|
454
|
-
assert_equal(:numeric, ds['b'].type)
|
455
|
-
ev_id = %w(a b c).to_vector
|
456
|
-
ev_a = %w(0 0 0).to_numeric
|
457
|
-
ev_b = %w(1 1 0).to_numeric
|
458
|
-
ev_c = %w(0 1 1).to_numeric
|
459
|
-
ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
|
460
|
-
assert_equal(ds, ds2)
|
461
|
-
end
|
462
|
-
|
463
|
-
def test_one_to_many
|
464
|
-
cases = [
|
465
|
-
['1', 'george', 'red', 10, 'blue', 20, nil, nil],
|
466
|
-
['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
|
467
|
-
['3', 'alfred', nil, nil, nil, nil, nil, nil]
|
468
|
-
]
|
469
|
-
ds = Statsample::Dataset.new(%w(id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3))
|
470
|
-
cases.each { |c| ds.add_case_array c }
|
471
|
-
ds.update_valid_data
|
472
|
-
ids = %w(1 1 2 2 2).to_vector
|
473
|
-
colors = %w(red blue green orange white).to_vector
|
474
|
-
values = [10, 20, 15, 30, 20].to_vector
|
475
|
-
col_ids = [1, 2, 1, 2, 3].to_numeric
|
476
|
-
ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
|
477
|
-
assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
|
45
|
+
assert_output(nil, "WARNING: Daru uses symbols instead of strings for naming vectors. Please switch to symbols.\n") do
|
46
|
+
assert_equal(:object, @ds['_id'].type)
|
47
|
+
end
|
478
48
|
end
|
479
49
|
end
|