statsample 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
data/test/test_crosstab.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleCrosstabTestCase < Minitest::Test
|
3
3
|
def initialize(*args)
|
4
|
-
@v1 = %w(black blonde black black red black brown black blonde black red black blonde)
|
5
|
-
@v2 = %w(woman man man woman man man man woman man woman woman man man)
|
4
|
+
@v1 =Daru::Vector.new( %w(black blonde black black red black brown black blonde black red black blonde))
|
5
|
+
@v2 =Daru::Vector.new( %w(woman man man woman man man man woman man woman woman man man))
|
6
6
|
@ct = Statsample::Crosstab.new(@v1, @v2)
|
7
7
|
super
|
8
8
|
end
|
@@ -12,7 +12,7 @@ class StatsampleCrosstabTestCase < Minitest::Test
|
|
12
12
|
assert_raise ArgumentError do
|
13
13
|
Statsample::Crosstab.new(e1, @v2)
|
14
14
|
end
|
15
|
-
e2 = %w(black blonde black black red black brown black blonde black black)
|
15
|
+
e2 = Daru::Vector.new(%w(black blonde black black red black brown black blonde black black))
|
16
16
|
|
17
17
|
assert_raise ArgumentError do
|
18
18
|
Statsample::Crosstab.new(e2, @v2)
|
@@ -23,8 +23,8 @@ class StatsampleCrosstabTestCase < Minitest::Test
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def test_crosstab_basic
|
26
|
-
assert_equal(%w(black blonde brown red), @ct.rows_names)
|
27
|
-
assert_equal(%w(man woman), @ct.cols_names)
|
26
|
+
assert_equal(Daru::Vector.new(%w(black blonde brown red)), @ct.rows_names)
|
27
|
+
assert_equal(Daru::Vector.new(%w(man woman)), @ct.cols_names)
|
28
28
|
assert_equal({ 'black' => 7, 'blonde' => 3, 'red' => 2, 'brown' => 1 }, @ct.rows_total)
|
29
29
|
assert_equal({ 'man' => 8, 'woman' => 5 }, @ct.cols_total)
|
30
30
|
end
|
@@ -51,15 +51,15 @@ class StatsampleCrosstabTestCase < Minitest::Test
|
|
51
51
|
end
|
52
52
|
|
53
53
|
def test_expected
|
54
|
-
v1 = %w(1 1 1 1 1 0 0 0 0 0)
|
55
|
-
v2 = %w(0 0 0 0 0 1 1 1 1 1)
|
54
|
+
v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
|
55
|
+
v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
|
56
56
|
ct = Statsample::Crosstab.new(v1, v2)
|
57
57
|
assert_equal(Matrix[[2.5, 2.5], [2.5, 2.5]], ct.matrix_expected)
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_crosstab_with_scale
|
61
|
-
v1 = %w(1 1 1 1 1 0 0 0 0 0)
|
62
|
-
v2 = %w(0 0 0 0 0 1 1 1 1 1)
|
61
|
+
v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
|
62
|
+
v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
|
63
63
|
ct = Statsample::Crosstab.new(v1, v2)
|
64
64
|
assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
|
65
65
|
assert_nothing_raised { ct.summary }
|
data/test/test_dataset.rb
CHANGED
@@ -1,479 +1,49 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleDatasetTestCase < Minitest::Test
|
3
3
|
def setup
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
'c' => %w(f g h i j k).to_vector
|
14
|
-
}.to_dataset
|
15
|
-
nest = ds.nest('a', 'b')
|
16
|
-
assert_equal([{ 'c' => 'f' }, { 'c' => 'g' }], nest['a']['c'])
|
17
|
-
assert_equal([{ 'c' => 'h' }], nest['a']['d'])
|
18
|
-
assert_equal([{ 'c' => 'j' }, { 'c' => 'k' }], nest['b']['e'])
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_should_have_summary
|
22
|
-
assert(@ds.summary.size > 0)
|
4
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
5
|
+
@ds = Statsample::Dataset.new({
|
6
|
+
'id' => Statsample::Vector.new([1, 2, 3, 4, 5]),
|
7
|
+
'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)),
|
8
|
+
'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
|
9
|
+
'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
10
|
+
'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
|
11
|
+
%w(id name age city a1))
|
12
|
+
end
|
23
13
|
end
|
24
14
|
|
25
15
|
def test_basic
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_saveload
|
31
|
-
outfile = Tempfile.new('dataset.ds')
|
32
|
-
@ds.save(outfile.path)
|
33
|
-
a = Statsample.load(outfile.path)
|
34
|
-
assert_equal(@ds, a)
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_gsl
|
38
|
-
if Statsample.has_gsl?
|
39
|
-
matrix = GSL::Matrix[[1, 2], [3, 4], [5, 6]]
|
40
|
-
ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
|
41
|
-
assert_equal(matrix, ds.to_gsl)
|
42
|
-
else
|
43
|
-
skip('Gsl needed')
|
16
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#nrows.\n") do
|
17
|
+
assert_equal(5, @ds.cases)
|
44
18
|
end
|
45
|
-
end
|
46
19
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
assert_equal(matrix, ds.to_matrix)
|
20
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
|
21
|
+
assert_equal([:id, :name, :age, :city, :a1], @ds.fields)
|
22
|
+
end
|
51
23
|
end
|
52
24
|
|
53
25
|
def test_fields
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
assert_equal(%w(id name age a1 city), @ds.fields)
|
58
|
-
end
|
59
|
-
|
60
|
-
def test_merge
|
61
|
-
a = [1, 2, 3].to_numeric
|
62
|
-
b = [3, 4, 5].to_vector
|
63
|
-
c = [4, 5, 6].to_numeric
|
64
|
-
d = [7, 8, 9].to_vector
|
65
|
-
e = [10, 20, 30].to_vector
|
66
|
-
ds1 = { 'a' => a, 'b' => b }.to_dataset
|
67
|
-
ds2 = { 'c' => c, 'd' => d }.to_dataset
|
68
|
-
exp = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
|
69
|
-
|
70
|
-
assert_equal(exp, ds1.merge(ds2))
|
71
|
-
exp.fields = %w(c d a b)
|
72
|
-
assert_equal(exp, ds2.merge(ds1))
|
73
|
-
ds3 = { 'a' => e }.to_dataset
|
74
|
-
exp = { 'a_1' => a, 'b' => b, 'a_2' => e }.to_dataset
|
75
|
-
exp.fields = %w(a_1 b a_2)
|
76
|
-
assert_equal(exp, ds1.merge(ds3))
|
77
|
-
end
|
78
|
-
|
79
|
-
def test_each_vector
|
80
|
-
a = [1, 2, 3].to_vector
|
81
|
-
b = [3, 4, 5].to_vector
|
82
|
-
fields = %w(a b)
|
83
|
-
ds = Statsample::Dataset.new({ 'a' => a, 'b' => b }, fields)
|
84
|
-
res = []
|
85
|
-
ds.each_vector{|k, v|
|
86
|
-
res.push([k, v])
|
87
|
-
}
|
88
|
-
assert_equal([['a', a], ['b', b]], res)
|
89
|
-
ds.fields = %w(b a)
|
90
|
-
res = []
|
91
|
-
ds.each_vector{|k, v|
|
92
|
-
res.push([k, v])
|
93
|
-
}
|
94
|
-
assert_equal([['b', b], ['a', a]], res)
|
95
|
-
end
|
96
|
-
|
97
|
-
def test_equality
|
98
|
-
v1 = [1, 2, 3, 4].to_vector
|
99
|
-
v2 = [5, 6, 7, 8].to_vector
|
100
|
-
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
|
101
|
-
v3 = [1, 2, 3, 4].to_vector
|
102
|
-
v4 = [5, 6, 7, 8].to_vector
|
103
|
-
ds2 = Statsample::Dataset.new({ 'v1' => v3, 'v2' => v4 }, %w(v2 v1))
|
104
|
-
assert_equal(ds1, ds2)
|
105
|
-
ds2.fields = %w(v1 v2)
|
106
|
-
assert_not_equal(ds1, ds2)
|
107
|
-
end
|
108
|
-
|
109
|
-
def test_add_vector
|
110
|
-
v = Statsample::Vector.new(%w(a b c d e))
|
111
|
-
@ds.add_vector('new', v)
|
112
|
-
assert_equal(%w(id name age city a1 new), @ds.fields)
|
113
|
-
x = Statsample::Vector.new(%w(a b c d e f g))
|
114
|
-
assert_raise ArgumentError do
|
115
|
-
@ds.add_vector('new2', x)
|
26
|
+
assert_output(nil, "WARNING: Deprecated. Use Daru::DataFrame#reindex_vectors! instead.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
|
27
|
+
@ds.fields = %w(name a1 id age city)
|
28
|
+
assert_equal([:name, :a1, :id, :age, :city], @ds.fields)
|
116
29
|
end
|
117
|
-
end
|
118
|
-
|
119
|
-
def test_vector_by_calculation
|
120
|
-
a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:numeric)
|
121
|
-
a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:numeric)
|
122
|
-
a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:numeric)
|
123
|
-
ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
|
124
|
-
total = ds.vector_by_calculation {|row|
|
125
|
-
row['a1'] + row['a2'] + row['a3']
|
126
|
-
}
|
127
|
-
expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:numeric)
|
128
|
-
assert_equal(expected, total)
|
129
|
-
end
|
130
|
-
|
131
|
-
def test_vector_sum
|
132
|
-
a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
|
133
|
-
a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
|
134
|
-
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
|
135
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
136
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
|
137
|
-
total = ds.vector_sum
|
138
|
-
a = ds.vector_sum(%w(a1 a2))
|
139
|
-
b = ds.vector_sum(%w(b1 b2))
|
140
|
-
expected_a = [11, 12, 23, 24, 25, nil].to_vector(:numeric)
|
141
|
-
expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:numeric)
|
142
|
-
expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:numeric)
|
143
|
-
assert_equal(expected_a, a)
|
144
|
-
assert_equal(expected_b, b)
|
145
|
-
assert_equal(expected_total, total)
|
146
|
-
end
|
147
|
-
|
148
|
-
def test_vector_missing_values
|
149
|
-
a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
|
150
|
-
a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
|
151
|
-
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
|
152
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
153
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
154
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
155
|
-
mva = [2, 3, 0, 1, 0, 1].to_vector(:numeric)
|
156
|
-
assert_equal(mva, ds.vector_missing_values)
|
157
|
-
end
|
158
|
-
|
159
|
-
def test_has_missing_values
|
160
|
-
a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
|
161
|
-
a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
|
162
|
-
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
|
163
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
164
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
165
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
166
|
-
assert(ds.has_missing_data?)
|
167
|
-
clean = ds.dup_only_valid
|
168
|
-
assert(!clean.has_missing_data?)
|
169
|
-
end
|
170
|
-
|
171
|
-
def test_vector_count_characters
|
172
|
-
a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:numeric)
|
173
|
-
a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:numeric)
|
174
|
-
b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:numeric)
|
175
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
176
|
-
c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:numeric)
|
177
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
178
|
-
exp = [4, 17, 27, 5, 6, 5].to_vector(:numeric)
|
179
|
-
assert_equal(exp, ds.vector_count_characters)
|
180
|
-
end
|
181
|
-
|
182
|
-
def test_vector_mean
|
183
|
-
a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
|
184
|
-
a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
|
185
|
-
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
|
186
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
187
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
188
|
-
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
189
|
-
total = ds.vector_mean
|
190
|
-
a = ds.vector_mean(%w(a1 a2), 1)
|
191
|
-
b = ds.vector_mean(%w(b1 b2), 1)
|
192
|
-
c = ds.vector_mean(%w(b1 b2 c), 1)
|
193
|
-
expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:numeric)
|
194
|
-
expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:numeric)
|
195
|
-
expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:numeric)
|
196
|
-
expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:numeric)
|
197
|
-
assert_equal(expected_a, a)
|
198
|
-
assert_equal(expected_b, b)
|
199
|
-
assert_equal(expected_c, c)
|
200
|
-
assert_equal(expected_total, total)
|
201
|
-
end
|
202
|
-
|
203
|
-
def test_each_array
|
204
|
-
expected = [[1, 'Alex', 20, 'New York', 'a,b'], [2, 'Claude', 23, 'London', 'b,c'], [3, 'Peter', 25, 'London', 'a'], [4, 'Franz', 27, 'Paris', nil], [5, 'George', 5, 'Tome', 'a,b,c']]
|
205
|
-
out = []
|
206
|
-
@ds.each_array{ |a|
|
207
|
-
out.push(a)
|
208
|
-
}
|
209
|
-
assert_equal(expected, out)
|
210
|
-
end
|
211
30
|
|
212
|
-
|
213
|
-
|
214
|
-
@ds.recode!('age') { |c| c['id'] * 2 }
|
215
|
-
expected = [2, 4, 6, 8, 10].to_vector(:numeric)
|
216
|
-
assert_equal(expected, @ds['age'])
|
217
|
-
end
|
218
|
-
|
219
|
-
def test_case_as
|
220
|
-
assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds.case_as_hash(0))
|
221
|
-
assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds.case_as_array(4))
|
222
|
-
# Native methods
|
223
|
-
assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds._case_as_hash(0))
|
224
|
-
assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds._case_as_array(4))
|
225
|
-
end
|
226
|
-
|
227
|
-
def test_delete_vector
|
228
|
-
@ds.delete_vector('name')
|
229
|
-
assert_equal(%w(id age city a1), @ds.fields)
|
230
|
-
assert_equal(%w(a1 age city id), @ds.vectors.keys.sort)
|
231
|
-
end
|
232
|
-
|
233
|
-
def test_change_type
|
234
|
-
@ds.col('age').type = :numeric
|
235
|
-
assert_equal(:numeric, @ds.col('age').type)
|
236
|
-
end
|
237
|
-
|
238
|
-
def test_split_by_separator_recode
|
239
|
-
@ds.add_vectors_by_split_recode('a1', '_')
|
240
|
-
assert_equal(%w(id name age city a1 a1_1 a1_2 a1_3), @ds.fields)
|
241
|
-
assert_equal([1, 0, 1, nil, 1], @ds.col('a1_1').to_a)
|
242
|
-
assert_equal([1, 1, 0, nil, 1], @ds.col('a1_2').to_a)
|
243
|
-
assert_equal([0, 1, 0, nil, 1], @ds.col('a1_3').to_a)
|
244
|
-
{ 'a1_1' => 'a1:a', 'a1_2' => 'a1:b', 'a1_3' => 'a1:c' }.each do |k, v|
|
245
|
-
assert_equal(v, @ds[k].name)
|
31
|
+
assert_raise ArgumentError, "Assigning less fields than vectors is no longer supported" do
|
32
|
+
@ds.fields = %w(id name age)
|
246
33
|
end
|
247
34
|
end
|
248
35
|
|
249
|
-
def
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
assert_equal([1, 1, 0, nil, 1], @ds.col('a1_b').to_a)
|
254
|
-
assert_equal([0, 1, 0, nil, 1], @ds.col('a1_c').to_a)
|
255
|
-
end
|
256
|
-
|
257
|
-
def test_percentiles
|
258
|
-
v1 = (1..100).to_a.to_numeric
|
259
|
-
assert_equal(50.5, v1.median)
|
260
|
-
assert_equal(25.5, v1.percentil(25))
|
261
|
-
v2 = (1..99).to_a.to_numeric
|
262
|
-
assert_equal(50, v2.median)
|
263
|
-
assert_equal(25, v2.percentil(25))
|
264
|
-
v3 = (1..50).to_a.to_numeric
|
265
|
-
assert_equal(25.5, v3.median)
|
266
|
-
assert_equal(13, v3.percentil(25))
|
267
|
-
end
|
268
|
-
|
269
|
-
def test_add_case
|
270
|
-
ds = Statsample::Dataset.new('a' => [].to_vector, 'b' => [].to_vector, 'c' => [].to_vector)
|
271
|
-
ds.add_case([1, 2, 3])
|
272
|
-
ds.add_case('a' => 4, 'b' => 5, 'c' => 6)
|
273
|
-
ds.add_case([[7, 8, 9], %w(a b c)])
|
274
|
-
assert_equal({ 'a' => 1, 'b' => 2, 'c' => 3 }, ds.case_as_hash(0))
|
275
|
-
assert_equal([4, 5, 6], ds.case_as_array(1))
|
276
|
-
assert_equal([7, 8, 9], ds.case_as_array(2))
|
277
|
-
assert_equal(%w(a b c), ds.case_as_array(3))
|
278
|
-
ds.add_case_array([6, 7, 1])
|
279
|
-
ds.update_valid_data
|
280
|
-
assert_equal([6, 7, 1], ds.case_as_array(4))
|
281
|
-
end
|
282
|
-
|
283
|
-
def test_marshaling
|
284
|
-
ds_marshal = Marshal.load(Marshal.dump(@ds))
|
285
|
-
assert_equal(ds_marshal, @ds)
|
286
|
-
end
|
287
|
-
|
288
|
-
def test_range
|
289
|
-
v1 = [1, 2, 3, 4].to_vector
|
290
|
-
v2 = [5, 6, 7, 8].to_vector
|
291
|
-
v3 = [9, 10, 11, 12].to_vector
|
292
|
-
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2, 'v3' => v3 }, %w(v3 v2 v1))
|
293
|
-
assert_same(v1, ds1['v1'])
|
294
|
-
ds2 = ds1['v2'..'v1']
|
295
|
-
assert_equal(%w(v2 v1), ds2.fields)
|
296
|
-
assert_same(ds1['v1'], ds2['v1'])
|
297
|
-
assert_same(ds1['v2'], ds2['v2'])
|
298
|
-
end
|
299
|
-
|
300
|
-
def test_clone
|
301
|
-
v1 = [1, 2, 3, 4].to_vector
|
302
|
-
v2 = [5, 6, 7, 8].to_vector
|
303
|
-
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
|
304
|
-
ds2 = ds1.clone
|
305
|
-
assert_equal(ds1, ds2)
|
306
|
-
assert_not_same(ds1, ds2)
|
307
|
-
assert_equal(ds1['v1'], ds2['v1'])
|
308
|
-
assert_same(ds1['v1'], ds2['v1'])
|
309
|
-
assert_equal(ds1.fields, ds2.fields)
|
310
|
-
assert_not_same(ds1.fields, ds2.fields)
|
311
|
-
assert_equal(ds1.cases, ds2.cases)
|
312
|
-
|
313
|
-
# partial clone
|
314
|
-
ds3 = ds1.clone('v1')
|
315
|
-
ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
|
316
|
-
assert_equal(ds_exp, ds3)
|
317
|
-
assert_not_same(ds_exp, ds3)
|
318
|
-
assert_equal(ds3['v1'], ds_exp['v1'])
|
319
|
-
assert_same(ds3['v1'], ds_exp['v1'])
|
320
|
-
assert_equal(ds3.fields, ds_exp.fields)
|
321
|
-
assert_equal(ds3.cases, ds_exp.cases)
|
322
|
-
|
323
|
-
assert_not_same(ds3.fields, ds_exp.fields)
|
324
|
-
end
|
325
|
-
|
326
|
-
def test_dup
|
327
|
-
v1 = [1, 2, 3, 4].to_vector
|
328
|
-
v2 = [5, 6, 7, 8].to_vector
|
329
|
-
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
|
330
|
-
ds2 = ds1.dup
|
331
|
-
assert_equal(ds1, ds2)
|
332
|
-
assert_not_same(ds1, ds2)
|
333
|
-
assert_equal(ds1['v1'], ds2['v1'])
|
334
|
-
assert_not_same(ds1['v1'], ds2['v1'])
|
335
|
-
assert_equal(ds1.cases, ds2.cases)
|
336
|
-
|
337
|
-
assert_equal(ds1.fields, ds2.fields)
|
338
|
-
assert_not_same(ds1.fields, ds2.fields)
|
339
|
-
ds1['v1'].type = :numeric
|
340
|
-
# dup partial
|
341
|
-
ds3 = ds1.dup('v1')
|
342
|
-
ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
|
343
|
-
assert_equal(ds_exp, ds3)
|
344
|
-
assert_not_same(ds_exp, ds3)
|
345
|
-
assert_equal(ds3['v1'], ds_exp['v1'])
|
346
|
-
assert_not_same(ds3['v1'], ds_exp['v1'])
|
347
|
-
assert_equal(ds3.fields, ds_exp.fields)
|
348
|
-
assert_equal(ds3.cases, ds_exp.cases)
|
349
|
-
|
350
|
-
assert_not_same(ds3.fields, ds_exp.fields)
|
351
|
-
|
352
|
-
# empty
|
353
|
-
ds3 = ds1.dup_empty
|
354
|
-
assert_not_equal(ds1, ds3)
|
355
|
-
assert_not_equal(ds1['v1'], ds3['v1'])
|
356
|
-
assert_equal([], ds3['v1'].data)
|
357
|
-
assert_equal([], ds3['v2'].data)
|
358
|
-
assert_equal(:numeric, ds3['v1'].type)
|
359
|
-
assert_equal(ds1.fields, ds2.fields)
|
360
|
-
assert_not_same(ds1.fields, ds2.fields)
|
361
|
-
end
|
36
|
+
def test_crosstab_with_asignation
|
37
|
+
v1 = Daru::Vector.new(%w(a a a b b b c c c))
|
38
|
+
v2 = Daru::Vector.new(%w(a b c a b c a b c))
|
39
|
+
v3 = Daru::Vector.new(%w(0 1 0 0 1 1 0 0 1))
|
362
40
|
|
363
|
-
|
364
|
-
|
365
|
-
assert_raise ArgumentError do
|
366
|
-
@ds.from_to('name', 'a2')
|
41
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
42
|
+
@ds = Statsample::Dataset.crosstab_by_assignation(v1, v2, v3)
|
367
43
|
end
|
368
|
-
end
|
369
|
-
|
370
|
-
def test_each_array_with_nils
|
371
|
-
v1 = [1, -99, 3, 4, 'na'].to_vector(:numeric, missing_values: [-99, 'na'])
|
372
|
-
v2 = [5, 6, -99, 8, 20].to_vector(:numeric, missing_values: [-99])
|
373
|
-
v3 = [9, 10, 11, 12, 20].to_vector(:numeric, missing_values: [-99])
|
374
|
-
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
375
|
-
ds2 = ds1.dup_empty
|
376
|
-
ds1.each_array_with_nils {|row|
|
377
|
-
ds2.add_case_array(row)
|
378
|
-
}
|
379
|
-
ds2.update_valid_data
|
380
|
-
assert_equal([1, nil, 3, 4, nil], ds2['v1'].data)
|
381
|
-
assert_equal([5, 6, nil, 8, 20], ds2['v2'].data)
|
382
|
-
end
|
383
44
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
v3 = [9, 10, 11, 12].to_vector(:numeric)
|
388
|
-
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
389
|
-
ds2 = ds1.dup_only_valid
|
390
|
-
expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:numeric), 'v2' => [5, 8].to_vector(:numeric), 'v3' => [9, 12].to_vector(:numeric))
|
391
|
-
assert_equal(expected, ds2)
|
392
|
-
assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
|
393
|
-
expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:numeric), 'v3' => [9, 11, 12].to_vector(:numeric))
|
394
|
-
assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
|
395
|
-
end
|
396
|
-
|
397
|
-
def test_filter
|
398
|
-
@ds['age'].type = :numeric
|
399
|
-
filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
|
400
|
-
expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :numeric),
|
401
|
-
'city' => Statsample::Vector.new(%w(London Paris)),
|
402
|
-
'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
|
403
|
-
assert_equal(expected, filtered)
|
404
|
-
end
|
405
|
-
|
406
|
-
def test_filter_field
|
407
|
-
@ds['age'].type = :numeric
|
408
|
-
filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
|
409
|
-
expected = [2, 4].to_vector
|
410
|
-
assert_equal(expected, filtered)
|
411
|
-
end
|
412
|
-
|
413
|
-
def test_verify
|
414
|
-
name = %w(r1 r2 r3 r4).to_vector(:object)
|
415
|
-
v1 = [1, 2, 3, 4].to_vector(:numeric)
|
416
|
-
v2 = [4, 3, 2, 1].to_vector(:numeric)
|
417
|
-
v3 = [10, 20, 30, 40].to_vector(:numeric)
|
418
|
-
v4 = %w(a b a b).to_vector(:object)
|
419
|
-
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
|
420
|
-
ds.fields = %w(v1 v2 v3 v4 id)
|
421
|
-
# Correct
|
422
|
-
t1 = create_test('If v4=a, v1 odd') { |r| r['v4'] == 'b' or (r['v4'] == 'a' and r['v1'].odd?) }
|
423
|
-
t2 = create_test('v3=v1*10') { |r| r['v3'] == r['v1'] * 10 }
|
424
|
-
# Fail!
|
425
|
-
t3 = create_test("v4='b'") { |r| r['v4'] == 'b' }
|
426
|
-
exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
427
|
-
exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
428
|
-
res = ds.verify(t3, t1, t2)
|
429
|
-
assert_equal(exp1, res)
|
430
|
-
res = ds.verify('id', t1, t2, t3)
|
431
|
-
assert_equal(exp2, res)
|
432
|
-
end
|
433
|
-
|
434
|
-
def test_compute_operation
|
435
|
-
v1 = [1, 2, 3, 4].to_vector(:numeric)
|
436
|
-
v2 = [4, 3, 2, 1].to_vector(:numeric)
|
437
|
-
v3 = [10, 20, 30, 40].to_vector(:numeric)
|
438
|
-
vnumeric = [1.quo(2), 1, 3.quo(2), 2].to_vector(:numeric)
|
439
|
-
vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:numeric)
|
440
|
-
vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:numeric)
|
441
|
-
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
|
442
|
-
assert_equal(vnumeric, ds.compute('v1/2'))
|
443
|
-
assert_equal(vsum, ds.compute('v1+v2+v3'))
|
444
|
-
assert_equal(vmult, ds.compute('v1*v2'))
|
445
|
-
end
|
446
|
-
|
447
|
-
def test_crosstab_with_asignation
|
448
|
-
v1 = %w(a a a b b b c c c).to_vector
|
449
|
-
v2 = %w(a b c a b c a b c).to_vector
|
450
|
-
v3 = %w(0 1 0 0 1 1 0 0 1).to_numeric
|
451
|
-
ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
|
452
|
-
assert_equal(:object, ds['_id'].type)
|
453
|
-
assert_equal(:numeric, ds['a'].type)
|
454
|
-
assert_equal(:numeric, ds['b'].type)
|
455
|
-
ev_id = %w(a b c).to_vector
|
456
|
-
ev_a = %w(0 0 0).to_numeric
|
457
|
-
ev_b = %w(1 1 0).to_numeric
|
458
|
-
ev_c = %w(0 1 1).to_numeric
|
459
|
-
ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
|
460
|
-
assert_equal(ds, ds2)
|
461
|
-
end
|
462
|
-
|
463
|
-
def test_one_to_many
|
464
|
-
cases = [
|
465
|
-
['1', 'george', 'red', 10, 'blue', 20, nil, nil],
|
466
|
-
['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
|
467
|
-
['3', 'alfred', nil, nil, nil, nil, nil, nil]
|
468
|
-
]
|
469
|
-
ds = Statsample::Dataset.new(%w(id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3))
|
470
|
-
cases.each { |c| ds.add_case_array c }
|
471
|
-
ds.update_valid_data
|
472
|
-
ids = %w(1 1 2 2 2).to_vector
|
473
|
-
colors = %w(red blue green orange white).to_vector
|
474
|
-
values = [10, 20, 15, 30, 20].to_vector
|
475
|
-
col_ids = [1, 2, 1, 2, 3].to_numeric
|
476
|
-
ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
|
477
|
-
assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
|
45
|
+
assert_output(nil, "WARNING: Daru uses symbols instead of strings for naming vectors. Please switch to symbols.\n") do
|
46
|
+
assert_equal(:object, @ds['_id'].type)
|
47
|
+
end
|
478
48
|
end
|
479
49
|
end
|