statsample 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -1,8 +1,8 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleCrosstabTestCase < Minitest::Test
3
3
  def initialize(*args)
4
- @v1 = %w(black blonde black black red black brown black blonde black red black blonde).to_vector
5
- @v2 = %w(woman man man woman man man man woman man woman woman man man).to_vector
4
+ @v1 =Daru::Vector.new( %w(black blonde black black red black brown black blonde black red black blonde))
5
+ @v2 =Daru::Vector.new( %w(woman man man woman man man man woman man woman woman man man))
6
6
  @ct = Statsample::Crosstab.new(@v1, @v2)
7
7
  super
8
8
  end
@@ -12,7 +12,7 @@ class StatsampleCrosstabTestCase < Minitest::Test
12
12
  assert_raise ArgumentError do
13
13
  Statsample::Crosstab.new(e1, @v2)
14
14
  end
15
- e2 = %w(black blonde black black red black brown black blonde black black).to_vector
15
+ e2 = Daru::Vector.new(%w(black blonde black black red black brown black blonde black black))
16
16
 
17
17
  assert_raise ArgumentError do
18
18
  Statsample::Crosstab.new(e2, @v2)
@@ -23,8 +23,8 @@ class StatsampleCrosstabTestCase < Minitest::Test
23
23
  end
24
24
 
25
25
  def test_crosstab_basic
26
- assert_equal(%w(black blonde brown red), @ct.rows_names)
27
- assert_equal(%w(man woman), @ct.cols_names)
26
+ assert_equal(Daru::Vector.new(%w(black blonde brown red)), @ct.rows_names)
27
+ assert_equal(Daru::Vector.new(%w(man woman)), @ct.cols_names)
28
28
  assert_equal({ 'black' => 7, 'blonde' => 3, 'red' => 2, 'brown' => 1 }, @ct.rows_total)
29
29
  assert_equal({ 'man' => 8, 'woman' => 5 }, @ct.cols_total)
30
30
  end
@@ -51,15 +51,15 @@ class StatsampleCrosstabTestCase < Minitest::Test
51
51
  end
52
52
 
53
53
  def test_expected
54
- v1 = %w(1 1 1 1 1 0 0 0 0 0).to_vector
55
- v2 = %w(0 0 0 0 0 1 1 1 1 1).to_vector
54
+ v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
55
+ v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
56
56
  ct = Statsample::Crosstab.new(v1, v2)
57
57
  assert_equal(Matrix[[2.5, 2.5], [2.5, 2.5]], ct.matrix_expected)
58
58
  end
59
59
 
60
60
  def test_crosstab_with_scale
61
- v1 = %w(1 1 1 1 1 0 0 0 0 0).to_numeric
62
- v2 = %w(0 0 0 0 0 1 1 1 1 1).to_numeric
61
+ v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
62
+ v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
63
63
  ct = Statsample::Crosstab.new(v1, v2)
64
64
  assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
65
65
  assert_nothing_raised { ct.summary }
@@ -1,479 +1,49 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleDatasetTestCase < Minitest::Test
3
3
  def setup
4
- @ds = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([1, 2, 3, 4, 5]), 'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)), 'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
5
- 'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
6
- 'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) }, %w(id name age city a1))
7
- end
8
-
9
- def test_nest
10
- ds = {
11
- 'a' => %w(a a a b b b).to_vector,
12
- 'b' => %w(c c d d e e).to_vector,
13
- 'c' => %w(f g h i j k).to_vector
14
- }.to_dataset
15
- nest = ds.nest('a', 'b')
16
- assert_equal([{ 'c' => 'f' }, { 'c' => 'g' }], nest['a']['c'])
17
- assert_equal([{ 'c' => 'h' }], nest['a']['d'])
18
- assert_equal([{ 'c' => 'j' }, { 'c' => 'k' }], nest['b']['e'])
19
- end
20
-
21
- def test_should_have_summary
22
- assert(@ds.summary.size > 0)
4
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
5
+ @ds = Statsample::Dataset.new({
6
+ 'id' => Statsample::Vector.new([1, 2, 3, 4, 5]),
7
+ 'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)),
8
+ 'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
9
+ 'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
10
+ 'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
11
+ %w(id name age city a1))
12
+ end
23
13
  end
24
14
 
25
15
  def test_basic
26
- assert_equal(5, @ds.cases)
27
- assert_equal(%w(id name age city a1), @ds.fields)
28
- end
29
-
30
- def test_saveload
31
- outfile = Tempfile.new('dataset.ds')
32
- @ds.save(outfile.path)
33
- a = Statsample.load(outfile.path)
34
- assert_equal(@ds, a)
35
- end
36
-
37
- def test_gsl
38
- if Statsample.has_gsl?
39
- matrix = GSL::Matrix[[1, 2], [3, 4], [5, 6]]
40
- ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
41
- assert_equal(matrix, ds.to_gsl)
42
- else
43
- skip('Gsl needed')
16
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#nrows.\n") do
17
+ assert_equal(5, @ds.cases)
44
18
  end
45
- end
46
19
 
47
- def test_matrix
48
- matrix = Matrix[[1, 2], [3, 4], [5, 6]]
49
- ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
50
- assert_equal(matrix, ds.to_matrix)
20
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
21
+ assert_equal([:id, :name, :age, :city, :a1], @ds.fields)
22
+ end
51
23
  end
52
24
 
53
25
  def test_fields
54
- @ds.fields = %w(name a1 id age city)
55
- assert_equal(%w(name a1 id age city), @ds.fields)
56
- @ds.fields = %w(id name age)
57
- assert_equal(%w(id name age a1 city), @ds.fields)
58
- end
59
-
60
- def test_merge
61
- a = [1, 2, 3].to_numeric
62
- b = [3, 4, 5].to_vector
63
- c = [4, 5, 6].to_numeric
64
- d = [7, 8, 9].to_vector
65
- e = [10, 20, 30].to_vector
66
- ds1 = { 'a' => a, 'b' => b }.to_dataset
67
- ds2 = { 'c' => c, 'd' => d }.to_dataset
68
- exp = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
69
-
70
- assert_equal(exp, ds1.merge(ds2))
71
- exp.fields = %w(c d a b)
72
- assert_equal(exp, ds2.merge(ds1))
73
- ds3 = { 'a' => e }.to_dataset
74
- exp = { 'a_1' => a, 'b' => b, 'a_2' => e }.to_dataset
75
- exp.fields = %w(a_1 b a_2)
76
- assert_equal(exp, ds1.merge(ds3))
77
- end
78
-
79
- def test_each_vector
80
- a = [1, 2, 3].to_vector
81
- b = [3, 4, 5].to_vector
82
- fields = %w(a b)
83
- ds = Statsample::Dataset.new({ 'a' => a, 'b' => b }, fields)
84
- res = []
85
- ds.each_vector{|k, v|
86
- res.push([k, v])
87
- }
88
- assert_equal([['a', a], ['b', b]], res)
89
- ds.fields = %w(b a)
90
- res = []
91
- ds.each_vector{|k, v|
92
- res.push([k, v])
93
- }
94
- assert_equal([['b', b], ['a', a]], res)
95
- end
96
-
97
- def test_equality
98
- v1 = [1, 2, 3, 4].to_vector
99
- v2 = [5, 6, 7, 8].to_vector
100
- ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
101
- v3 = [1, 2, 3, 4].to_vector
102
- v4 = [5, 6, 7, 8].to_vector
103
- ds2 = Statsample::Dataset.new({ 'v1' => v3, 'v2' => v4 }, %w(v2 v1))
104
- assert_equal(ds1, ds2)
105
- ds2.fields = %w(v1 v2)
106
- assert_not_equal(ds1, ds2)
107
- end
108
-
109
- def test_add_vector
110
- v = Statsample::Vector.new(%w(a b c d e))
111
- @ds.add_vector('new', v)
112
- assert_equal(%w(id name age city a1 new), @ds.fields)
113
- x = Statsample::Vector.new(%w(a b c d e f g))
114
- assert_raise ArgumentError do
115
- @ds.add_vector('new2', x)
26
+ assert_output(nil, "WARNING: Deprecated. Use Daru::DataFrame#reindex_vectors! instead.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
27
+ @ds.fields = %w(name a1 id age city)
28
+ assert_equal([:name, :a1, :id, :age, :city], @ds.fields)
116
29
  end
117
- end
118
-
119
- def test_vector_by_calculation
120
- a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:numeric)
121
- a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:numeric)
122
- a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:numeric)
123
- ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
124
- total = ds.vector_by_calculation {|row|
125
- row['a1'] + row['a2'] + row['a3']
126
- }
127
- expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:numeric)
128
- assert_equal(expected, total)
129
- end
130
-
131
- def test_vector_sum
132
- a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
133
- a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
134
- b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
135
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
136
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
137
- total = ds.vector_sum
138
- a = ds.vector_sum(%w(a1 a2))
139
- b = ds.vector_sum(%w(b1 b2))
140
- expected_a = [11, 12, 23, 24, 25, nil].to_vector(:numeric)
141
- expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:numeric)
142
- expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:numeric)
143
- assert_equal(expected_a, a)
144
- assert_equal(expected_b, b)
145
- assert_equal(expected_total, total)
146
- end
147
-
148
- def test_vector_missing_values
149
- a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
150
- a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
151
- b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
152
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
153
- c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
154
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
155
- mva = [2, 3, 0, 1, 0, 1].to_vector(:numeric)
156
- assert_equal(mva, ds.vector_missing_values)
157
- end
158
-
159
- def test_has_missing_values
160
- a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
161
- a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
162
- b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
163
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
164
- c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
165
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
166
- assert(ds.has_missing_data?)
167
- clean = ds.dup_only_valid
168
- assert(!clean.has_missing_data?)
169
- end
170
-
171
- def test_vector_count_characters
172
- a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:numeric)
173
- a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:numeric)
174
- b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:numeric)
175
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
176
- c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:numeric)
177
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
178
- exp = [4, 17, 27, 5, 6, 5].to_vector(:numeric)
179
- assert_equal(exp, ds.vector_count_characters)
180
- end
181
-
182
- def test_vector_mean
183
- a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
184
- a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
185
- b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
186
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
187
- c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
188
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
189
- total = ds.vector_mean
190
- a = ds.vector_mean(%w(a1 a2), 1)
191
- b = ds.vector_mean(%w(b1 b2), 1)
192
- c = ds.vector_mean(%w(b1 b2 c), 1)
193
- expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:numeric)
194
- expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:numeric)
195
- expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:numeric)
196
- expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:numeric)
197
- assert_equal(expected_a, a)
198
- assert_equal(expected_b, b)
199
- assert_equal(expected_c, c)
200
- assert_equal(expected_total, total)
201
- end
202
-
203
- def test_each_array
204
- expected = [[1, 'Alex', 20, 'New York', 'a,b'], [2, 'Claude', 23, 'London', 'b,c'], [3, 'Peter', 25, 'London', 'a'], [4, 'Franz', 27, 'Paris', nil], [5, 'George', 5, 'Tome', 'a,b,c']]
205
- out = []
206
- @ds.each_array{ |a|
207
- out.push(a)
208
- }
209
- assert_equal(expected, out)
210
- end
211
30
 
212
- def test_recode
213
- @ds['age'].type = :numeric
214
- @ds.recode!('age') { |c| c['id'] * 2 }
215
- expected = [2, 4, 6, 8, 10].to_vector(:numeric)
216
- assert_equal(expected, @ds['age'])
217
- end
218
-
219
- def test_case_as
220
- assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds.case_as_hash(0))
221
- assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds.case_as_array(4))
222
- # Native methods
223
- assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds._case_as_hash(0))
224
- assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds._case_as_array(4))
225
- end
226
-
227
- def test_delete_vector
228
- @ds.delete_vector('name')
229
- assert_equal(%w(id age city a1), @ds.fields)
230
- assert_equal(%w(a1 age city id), @ds.vectors.keys.sort)
231
- end
232
-
233
- def test_change_type
234
- @ds.col('age').type = :numeric
235
- assert_equal(:numeric, @ds.col('age').type)
236
- end
237
-
238
- def test_split_by_separator_recode
239
- @ds.add_vectors_by_split_recode('a1', '_')
240
- assert_equal(%w(id name age city a1 a1_1 a1_2 a1_3), @ds.fields)
241
- assert_equal([1, 0, 1, nil, 1], @ds.col('a1_1').to_a)
242
- assert_equal([1, 1, 0, nil, 1], @ds.col('a1_2').to_a)
243
- assert_equal([0, 1, 0, nil, 1], @ds.col('a1_3').to_a)
244
- { 'a1_1' => 'a1:a', 'a1_2' => 'a1:b', 'a1_3' => 'a1:c' }.each do |k, v|
245
- assert_equal(v, @ds[k].name)
31
+ assert_raise ArgumentError, "Assigning less fields than vectors is no longer supported" do
32
+ @ds.fields = %w(id name age)
246
33
  end
247
34
  end
248
35
 
249
- def test_split_by_separator
250
- @ds.add_vectors_by_split('a1', '_')
251
- assert_equal(%w(id name age city a1 a1_a a1_b a1_c), @ds.fields)
252
- assert_equal([1, 0, 1, nil, 1], @ds.col('a1_a').to_a)
253
- assert_equal([1, 1, 0, nil, 1], @ds.col('a1_b').to_a)
254
- assert_equal([0, 1, 0, nil, 1], @ds.col('a1_c').to_a)
255
- end
256
-
257
- def test_percentiles
258
- v1 = (1..100).to_a.to_numeric
259
- assert_equal(50.5, v1.median)
260
- assert_equal(25.5, v1.percentil(25))
261
- v2 = (1..99).to_a.to_numeric
262
- assert_equal(50, v2.median)
263
- assert_equal(25, v2.percentil(25))
264
- v3 = (1..50).to_a.to_numeric
265
- assert_equal(25.5, v3.median)
266
- assert_equal(13, v3.percentil(25))
267
- end
268
-
269
- def test_add_case
270
- ds = Statsample::Dataset.new('a' => [].to_vector, 'b' => [].to_vector, 'c' => [].to_vector)
271
- ds.add_case([1, 2, 3])
272
- ds.add_case('a' => 4, 'b' => 5, 'c' => 6)
273
- ds.add_case([[7, 8, 9], %w(a b c)])
274
- assert_equal({ 'a' => 1, 'b' => 2, 'c' => 3 }, ds.case_as_hash(0))
275
- assert_equal([4, 5, 6], ds.case_as_array(1))
276
- assert_equal([7, 8, 9], ds.case_as_array(2))
277
- assert_equal(%w(a b c), ds.case_as_array(3))
278
- ds.add_case_array([6, 7, 1])
279
- ds.update_valid_data
280
- assert_equal([6, 7, 1], ds.case_as_array(4))
281
- end
282
-
283
- def test_marshaling
284
- ds_marshal = Marshal.load(Marshal.dump(@ds))
285
- assert_equal(ds_marshal, @ds)
286
- end
287
-
288
- def test_range
289
- v1 = [1, 2, 3, 4].to_vector
290
- v2 = [5, 6, 7, 8].to_vector
291
- v3 = [9, 10, 11, 12].to_vector
292
- ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2, 'v3' => v3 }, %w(v3 v2 v1))
293
- assert_same(v1, ds1['v1'])
294
- ds2 = ds1['v2'..'v1']
295
- assert_equal(%w(v2 v1), ds2.fields)
296
- assert_same(ds1['v1'], ds2['v1'])
297
- assert_same(ds1['v2'], ds2['v2'])
298
- end
299
-
300
- def test_clone
301
- v1 = [1, 2, 3, 4].to_vector
302
- v2 = [5, 6, 7, 8].to_vector
303
- ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
304
- ds2 = ds1.clone
305
- assert_equal(ds1, ds2)
306
- assert_not_same(ds1, ds2)
307
- assert_equal(ds1['v1'], ds2['v1'])
308
- assert_same(ds1['v1'], ds2['v1'])
309
- assert_equal(ds1.fields, ds2.fields)
310
- assert_not_same(ds1.fields, ds2.fields)
311
- assert_equal(ds1.cases, ds2.cases)
312
-
313
- # partial clone
314
- ds3 = ds1.clone('v1')
315
- ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
316
- assert_equal(ds_exp, ds3)
317
- assert_not_same(ds_exp, ds3)
318
- assert_equal(ds3['v1'], ds_exp['v1'])
319
- assert_same(ds3['v1'], ds_exp['v1'])
320
- assert_equal(ds3.fields, ds_exp.fields)
321
- assert_equal(ds3.cases, ds_exp.cases)
322
-
323
- assert_not_same(ds3.fields, ds_exp.fields)
324
- end
325
-
326
- def test_dup
327
- v1 = [1, 2, 3, 4].to_vector
328
- v2 = [5, 6, 7, 8].to_vector
329
- ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
330
- ds2 = ds1.dup
331
- assert_equal(ds1, ds2)
332
- assert_not_same(ds1, ds2)
333
- assert_equal(ds1['v1'], ds2['v1'])
334
- assert_not_same(ds1['v1'], ds2['v1'])
335
- assert_equal(ds1.cases, ds2.cases)
336
-
337
- assert_equal(ds1.fields, ds2.fields)
338
- assert_not_same(ds1.fields, ds2.fields)
339
- ds1['v1'].type = :numeric
340
- # dup partial
341
- ds3 = ds1.dup('v1')
342
- ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
343
- assert_equal(ds_exp, ds3)
344
- assert_not_same(ds_exp, ds3)
345
- assert_equal(ds3['v1'], ds_exp['v1'])
346
- assert_not_same(ds3['v1'], ds_exp['v1'])
347
- assert_equal(ds3.fields, ds_exp.fields)
348
- assert_equal(ds3.cases, ds_exp.cases)
349
-
350
- assert_not_same(ds3.fields, ds_exp.fields)
351
-
352
- # empty
353
- ds3 = ds1.dup_empty
354
- assert_not_equal(ds1, ds3)
355
- assert_not_equal(ds1['v1'], ds3['v1'])
356
- assert_equal([], ds3['v1'].data)
357
- assert_equal([], ds3['v2'].data)
358
- assert_equal(:numeric, ds3['v1'].type)
359
- assert_equal(ds1.fields, ds2.fields)
360
- assert_not_same(ds1.fields, ds2.fields)
361
- end
36
+ def test_crosstab_with_asignation
37
+ v1 = Daru::Vector.new(%w(a a a b b b c c c))
38
+ v2 = Daru::Vector.new(%w(a b c a b c a b c))
39
+ v3 = Daru::Vector.new(%w(0 1 0 0 1 1 0 0 1))
362
40
 
363
- def test_from_to
364
- assert_equal(%w(name age city), @ds.from_to('name', 'city'))
365
- assert_raise ArgumentError do
366
- @ds.from_to('name', 'a2')
41
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
42
+ @ds = Statsample::Dataset.crosstab_by_assignation(v1, v2, v3)
367
43
  end
368
- end
369
-
370
- def test_each_array_with_nils
371
- v1 = [1, -99, 3, 4, 'na'].to_vector(:numeric, missing_values: [-99, 'na'])
372
- v2 = [5, 6, -99, 8, 20].to_vector(:numeric, missing_values: [-99])
373
- v3 = [9, 10, 11, 12, 20].to_vector(:numeric, missing_values: [-99])
374
- ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
375
- ds2 = ds1.dup_empty
376
- ds1.each_array_with_nils {|row|
377
- ds2.add_case_array(row)
378
- }
379
- ds2.update_valid_data
380
- assert_equal([1, nil, 3, 4, nil], ds2['v1'].data)
381
- assert_equal([5, 6, nil, 8, 20], ds2['v2'].data)
382
- end
383
44
 
384
- def test_dup_only_valid
385
- v1 = [1, nil, 3, 4].to_vector(:numeric)
386
- v2 = [5, 6, nil, 8].to_vector(:numeric)
387
- v3 = [9, 10, 11, 12].to_vector(:numeric)
388
- ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
389
- ds2 = ds1.dup_only_valid
390
- expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:numeric), 'v2' => [5, 8].to_vector(:numeric), 'v3' => [9, 12].to_vector(:numeric))
391
- assert_equal(expected, ds2)
392
- assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
393
- expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:numeric), 'v3' => [9, 11, 12].to_vector(:numeric))
394
- assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
395
- end
396
-
397
- def test_filter
398
- @ds['age'].type = :numeric
399
- filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
400
- expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :numeric),
401
- 'city' => Statsample::Vector.new(%w(London Paris)),
402
- 'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
403
- assert_equal(expected, filtered)
404
- end
405
-
406
- def test_filter_field
407
- @ds['age'].type = :numeric
408
- filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
409
- expected = [2, 4].to_vector
410
- assert_equal(expected, filtered)
411
- end
412
-
413
- def test_verify
414
- name = %w(r1 r2 r3 r4).to_vector(:object)
415
- v1 = [1, 2, 3, 4].to_vector(:numeric)
416
- v2 = [4, 3, 2, 1].to_vector(:numeric)
417
- v3 = [10, 20, 30, 40].to_vector(:numeric)
418
- v4 = %w(a b a b).to_vector(:object)
419
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
420
- ds.fields = %w(v1 v2 v3 v4 id)
421
- # Correct
422
- t1 = create_test('If v4=a, v1 odd') { |r| r['v4'] == 'b' or (r['v4'] == 'a' and r['v1'].odd?) }
423
- t2 = create_test('v3=v1*10') { |r| r['v3'] == r['v1'] * 10 }
424
- # Fail!
425
- t3 = create_test("v4='b'") { |r| r['v4'] == 'b' }
426
- exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
427
- exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
428
- res = ds.verify(t3, t1, t2)
429
- assert_equal(exp1, res)
430
- res = ds.verify('id', t1, t2, t3)
431
- assert_equal(exp2, res)
432
- end
433
-
434
- def test_compute_operation
435
- v1 = [1, 2, 3, 4].to_vector(:numeric)
436
- v2 = [4, 3, 2, 1].to_vector(:numeric)
437
- v3 = [10, 20, 30, 40].to_vector(:numeric)
438
- vnumeric = [1.quo(2), 1, 3.quo(2), 2].to_vector(:numeric)
439
- vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:numeric)
440
- vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:numeric)
441
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
442
- assert_equal(vnumeric, ds.compute('v1/2'))
443
- assert_equal(vsum, ds.compute('v1+v2+v3'))
444
- assert_equal(vmult, ds.compute('v1*v2'))
445
- end
446
-
447
- def test_crosstab_with_asignation
448
- v1 = %w(a a a b b b c c c).to_vector
449
- v2 = %w(a b c a b c a b c).to_vector
450
- v3 = %w(0 1 0 0 1 1 0 0 1).to_numeric
451
- ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
452
- assert_equal(:object, ds['_id'].type)
453
- assert_equal(:numeric, ds['a'].type)
454
- assert_equal(:numeric, ds['b'].type)
455
- ev_id = %w(a b c).to_vector
456
- ev_a = %w(0 0 0).to_numeric
457
- ev_b = %w(1 1 0).to_numeric
458
- ev_c = %w(0 1 1).to_numeric
459
- ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
460
- assert_equal(ds, ds2)
461
- end
462
-
463
- def test_one_to_many
464
- cases = [
465
- ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
466
- ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
467
- ['3', 'alfred', nil, nil, nil, nil, nil, nil]
468
- ]
469
- ds = Statsample::Dataset.new(%w(id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3))
470
- cases.each { |c| ds.add_case_array c }
471
- ds.update_valid_data
472
- ids = %w(1 1 2 2 2).to_vector
473
- colors = %w(red blue green orange white).to_vector
474
- values = [10, 20, 15, 30, 20].to_vector
475
- col_ids = [1, 2, 1, 2, 3].to_numeric
476
- ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
477
- assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
45
+ assert_output(nil, "WARNING: Daru uses symbols instead of strings for naming vectors. Please switch to symbols.\n") do
46
+ assert_equal(:object, @ds['_id'].type)
47
+ end
478
48
  end
479
49
  end