statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -1,8 +1,8 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleCrosstabTestCase < Minitest::Test
3
3
  def initialize(*args)
4
- @v1 = %w(black blonde black black red black brown black blonde black red black blonde).to_vector
5
- @v2 = %w(woman man man woman man man man woman man woman woman man man).to_vector
4
+ @v1 =Daru::Vector.new( %w(black blonde black black red black brown black blonde black red black blonde))
5
+ @v2 =Daru::Vector.new( %w(woman man man woman man man man woman man woman woman man man))
6
6
  @ct = Statsample::Crosstab.new(@v1, @v2)
7
7
  super
8
8
  end
@@ -12,7 +12,7 @@ class StatsampleCrosstabTestCase < Minitest::Test
12
12
  assert_raise ArgumentError do
13
13
  Statsample::Crosstab.new(e1, @v2)
14
14
  end
15
- e2 = %w(black blonde black black red black brown black blonde black black).to_vector
15
+ e2 = Daru::Vector.new(%w(black blonde black black red black brown black blonde black black))
16
16
 
17
17
  assert_raise ArgumentError do
18
18
  Statsample::Crosstab.new(e2, @v2)
@@ -23,8 +23,8 @@ class StatsampleCrosstabTestCase < Minitest::Test
23
23
  end
24
24
 
25
25
  def test_crosstab_basic
26
- assert_equal(%w(black blonde brown red), @ct.rows_names)
27
- assert_equal(%w(man woman), @ct.cols_names)
26
+ assert_equal(Daru::Vector.new(%w(black blonde brown red)), @ct.rows_names)
27
+ assert_equal(Daru::Vector.new(%w(man woman)), @ct.cols_names)
28
28
  assert_equal({ 'black' => 7, 'blonde' => 3, 'red' => 2, 'brown' => 1 }, @ct.rows_total)
29
29
  assert_equal({ 'man' => 8, 'woman' => 5 }, @ct.cols_total)
30
30
  end
@@ -51,15 +51,15 @@ class StatsampleCrosstabTestCase < Minitest::Test
51
51
  end
52
52
 
53
53
  def test_expected
54
- v1 = %w(1 1 1 1 1 0 0 0 0 0).to_vector
55
- v2 = %w(0 0 0 0 0 1 1 1 1 1).to_vector
54
+ v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
55
+ v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
56
56
  ct = Statsample::Crosstab.new(v1, v2)
57
57
  assert_equal(Matrix[[2.5, 2.5], [2.5, 2.5]], ct.matrix_expected)
58
58
  end
59
59
 
60
60
  def test_crosstab_with_scale
61
- v1 = %w(1 1 1 1 1 0 0 0 0 0).to_numeric
62
- v2 = %w(0 0 0 0 0 1 1 1 1 1).to_numeric
61
+ v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
62
+ v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
63
63
  ct = Statsample::Crosstab.new(v1, v2)
64
64
  assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
65
65
  assert_nothing_raised { ct.summary }
@@ -1,479 +1,49 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleDatasetTestCase < Minitest::Test
3
3
  def setup
4
- @ds = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([1, 2, 3, 4, 5]), 'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)), 'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
5
- 'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
6
- 'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) }, %w(id name age city a1))
7
- end
8
-
9
- def test_nest
10
- ds = {
11
- 'a' => %w(a a a b b b).to_vector,
12
- 'b' => %w(c c d d e e).to_vector,
13
- 'c' => %w(f g h i j k).to_vector
14
- }.to_dataset
15
- nest = ds.nest('a', 'b')
16
- assert_equal([{ 'c' => 'f' }, { 'c' => 'g' }], nest['a']['c'])
17
- assert_equal([{ 'c' => 'h' }], nest['a']['d'])
18
- assert_equal([{ 'c' => 'j' }, { 'c' => 'k' }], nest['b']['e'])
19
- end
20
-
21
- def test_should_have_summary
22
- assert(@ds.summary.size > 0)
4
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
5
+ @ds = Statsample::Dataset.new({
6
+ 'id' => Statsample::Vector.new([1, 2, 3, 4, 5]),
7
+ 'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)),
8
+ 'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
9
+ 'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
10
+ 'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
11
+ %w(id name age city a1))
12
+ end
23
13
  end
24
14
 
25
15
  def test_basic
26
- assert_equal(5, @ds.cases)
27
- assert_equal(%w(id name age city a1), @ds.fields)
28
- end
29
-
30
- def test_saveload
31
- outfile = Tempfile.new('dataset.ds')
32
- @ds.save(outfile.path)
33
- a = Statsample.load(outfile.path)
34
- assert_equal(@ds, a)
35
- end
36
-
37
- def test_gsl
38
- if Statsample.has_gsl?
39
- matrix = GSL::Matrix[[1, 2], [3, 4], [5, 6]]
40
- ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
41
- assert_equal(matrix, ds.to_gsl)
42
- else
43
- skip('Gsl needed')
16
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#nrows.\n") do
17
+ assert_equal(5, @ds.cases)
44
18
  end
45
- end
46
19
 
47
- def test_matrix
48
- matrix = Matrix[[1, 2], [3, 4], [5, 6]]
49
- ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
50
- assert_equal(matrix, ds.to_matrix)
20
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
21
+ assert_equal([:id, :name, :age, :city, :a1], @ds.fields)
22
+ end
51
23
  end
52
24
 
53
25
  def test_fields
54
- @ds.fields = %w(name a1 id age city)
55
- assert_equal(%w(name a1 id age city), @ds.fields)
56
- @ds.fields = %w(id name age)
57
- assert_equal(%w(id name age a1 city), @ds.fields)
58
- end
59
-
60
- def test_merge
61
- a = [1, 2, 3].to_numeric
62
- b = [3, 4, 5].to_vector
63
- c = [4, 5, 6].to_numeric
64
- d = [7, 8, 9].to_vector
65
- e = [10, 20, 30].to_vector
66
- ds1 = { 'a' => a, 'b' => b }.to_dataset
67
- ds2 = { 'c' => c, 'd' => d }.to_dataset
68
- exp = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
69
-
70
- assert_equal(exp, ds1.merge(ds2))
71
- exp.fields = %w(c d a b)
72
- assert_equal(exp, ds2.merge(ds1))
73
- ds3 = { 'a' => e }.to_dataset
74
- exp = { 'a_1' => a, 'b' => b, 'a_2' => e }.to_dataset
75
- exp.fields = %w(a_1 b a_2)
76
- assert_equal(exp, ds1.merge(ds3))
77
- end
78
-
79
- def test_each_vector
80
- a = [1, 2, 3].to_vector
81
- b = [3, 4, 5].to_vector
82
- fields = %w(a b)
83
- ds = Statsample::Dataset.new({ 'a' => a, 'b' => b }, fields)
84
- res = []
85
- ds.each_vector{|k, v|
86
- res.push([k, v])
87
- }
88
- assert_equal([['a', a], ['b', b]], res)
89
- ds.fields = %w(b a)
90
- res = []
91
- ds.each_vector{|k, v|
92
- res.push([k, v])
93
- }
94
- assert_equal([['b', b], ['a', a]], res)
95
- end
96
-
97
- def test_equality
98
- v1 = [1, 2, 3, 4].to_vector
99
- v2 = [5, 6, 7, 8].to_vector
100
- ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
101
- v3 = [1, 2, 3, 4].to_vector
102
- v4 = [5, 6, 7, 8].to_vector
103
- ds2 = Statsample::Dataset.new({ 'v1' => v3, 'v2' => v4 }, %w(v2 v1))
104
- assert_equal(ds1, ds2)
105
- ds2.fields = %w(v1 v2)
106
- assert_not_equal(ds1, ds2)
107
- end
108
-
109
- def test_add_vector
110
- v = Statsample::Vector.new(%w(a b c d e))
111
- @ds.add_vector('new', v)
112
- assert_equal(%w(id name age city a1 new), @ds.fields)
113
- x = Statsample::Vector.new(%w(a b c d e f g))
114
- assert_raise ArgumentError do
115
- @ds.add_vector('new2', x)
26
+ assert_output(nil, "WARNING: Deprecated. Use Daru::DataFrame#reindex_vectors! instead.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
27
+ @ds.fields = %w(name a1 id age city)
28
+ assert_equal([:name, :a1, :id, :age, :city], @ds.fields)
116
29
  end
117
- end
118
-
119
- def test_vector_by_calculation
120
- a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:numeric)
121
- a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:numeric)
122
- a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:numeric)
123
- ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
124
- total = ds.vector_by_calculation {|row|
125
- row['a1'] + row['a2'] + row['a3']
126
- }
127
- expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:numeric)
128
- assert_equal(expected, total)
129
- end
130
-
131
- def test_vector_sum
132
- a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
133
- a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
134
- b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
135
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
136
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
137
- total = ds.vector_sum
138
- a = ds.vector_sum(%w(a1 a2))
139
- b = ds.vector_sum(%w(b1 b2))
140
- expected_a = [11, 12, 23, 24, 25, nil].to_vector(:numeric)
141
- expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:numeric)
142
- expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:numeric)
143
- assert_equal(expected_a, a)
144
- assert_equal(expected_b, b)
145
- assert_equal(expected_total, total)
146
- end
147
-
148
- def test_vector_missing_values
149
- a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
150
- a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
151
- b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
152
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
153
- c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
154
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
155
- mva = [2, 3, 0, 1, 0, 1].to_vector(:numeric)
156
- assert_equal(mva, ds.vector_missing_values)
157
- end
158
-
159
- def test_has_missing_values
160
- a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
161
- a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
162
- b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
163
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
164
- c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
165
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
166
- assert(ds.has_missing_data?)
167
- clean = ds.dup_only_valid
168
- assert(!clean.has_missing_data?)
169
- end
170
-
171
- def test_vector_count_characters
172
- a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:numeric)
173
- a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:numeric)
174
- b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:numeric)
175
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
176
- c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:numeric)
177
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
178
- exp = [4, 17, 27, 5, 6, 5].to_vector(:numeric)
179
- assert_equal(exp, ds.vector_count_characters)
180
- end
181
-
182
- def test_vector_mean
183
- a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
184
- a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
185
- b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
186
- b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
187
- c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
188
- ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
189
- total = ds.vector_mean
190
- a = ds.vector_mean(%w(a1 a2), 1)
191
- b = ds.vector_mean(%w(b1 b2), 1)
192
- c = ds.vector_mean(%w(b1 b2 c), 1)
193
- expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:numeric)
194
- expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:numeric)
195
- expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:numeric)
196
- expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:numeric)
197
- assert_equal(expected_a, a)
198
- assert_equal(expected_b, b)
199
- assert_equal(expected_c, c)
200
- assert_equal(expected_total, total)
201
- end
202
-
203
- def test_each_array
204
- expected = [[1, 'Alex', 20, 'New York', 'a,b'], [2, 'Claude', 23, 'London', 'b,c'], [3, 'Peter', 25, 'London', 'a'], [4, 'Franz', 27, 'Paris', nil], [5, 'George', 5, 'Tome', 'a,b,c']]
205
- out = []
206
- @ds.each_array{ |a|
207
- out.push(a)
208
- }
209
- assert_equal(expected, out)
210
- end
211
30
 
212
- def test_recode
213
- @ds['age'].type = :numeric
214
- @ds.recode!('age') { |c| c['id'] * 2 }
215
- expected = [2, 4, 6, 8, 10].to_vector(:numeric)
216
- assert_equal(expected, @ds['age'])
217
- end
218
-
219
- def test_case_as
220
- assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds.case_as_hash(0))
221
- assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds.case_as_array(4))
222
- # Native methods
223
- assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds._case_as_hash(0))
224
- assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds._case_as_array(4))
225
- end
226
-
227
- def test_delete_vector
228
- @ds.delete_vector('name')
229
- assert_equal(%w(id age city a1), @ds.fields)
230
- assert_equal(%w(a1 age city id), @ds.vectors.keys.sort)
231
- end
232
-
233
- def test_change_type
234
- @ds.col('age').type = :numeric
235
- assert_equal(:numeric, @ds.col('age').type)
236
- end
237
-
238
- def test_split_by_separator_recode
239
- @ds.add_vectors_by_split_recode('a1', '_')
240
- assert_equal(%w(id name age city a1 a1_1 a1_2 a1_3), @ds.fields)
241
- assert_equal([1, 0, 1, nil, 1], @ds.col('a1_1').to_a)
242
- assert_equal([1, 1, 0, nil, 1], @ds.col('a1_2').to_a)
243
- assert_equal([0, 1, 0, nil, 1], @ds.col('a1_3').to_a)
244
- { 'a1_1' => 'a1:a', 'a1_2' => 'a1:b', 'a1_3' => 'a1:c' }.each do |k, v|
245
- assert_equal(v, @ds[k].name)
31
+ assert_raise ArgumentError, "Assigning less fields than vectors is no longer supported" do
32
+ @ds.fields = %w(id name age)
246
33
  end
247
34
  end
248
35
 
249
- def test_split_by_separator
250
- @ds.add_vectors_by_split('a1', '_')
251
- assert_equal(%w(id name age city a1 a1_a a1_b a1_c), @ds.fields)
252
- assert_equal([1, 0, 1, nil, 1], @ds.col('a1_a').to_a)
253
- assert_equal([1, 1, 0, nil, 1], @ds.col('a1_b').to_a)
254
- assert_equal([0, 1, 0, nil, 1], @ds.col('a1_c').to_a)
255
- end
256
-
257
- def test_percentiles
258
- v1 = (1..100).to_a.to_numeric
259
- assert_equal(50.5, v1.median)
260
- assert_equal(25.5, v1.percentil(25))
261
- v2 = (1..99).to_a.to_numeric
262
- assert_equal(50, v2.median)
263
- assert_equal(25, v2.percentil(25))
264
- v3 = (1..50).to_a.to_numeric
265
- assert_equal(25.5, v3.median)
266
- assert_equal(13, v3.percentil(25))
267
- end
268
-
269
- def test_add_case
270
- ds = Statsample::Dataset.new('a' => [].to_vector, 'b' => [].to_vector, 'c' => [].to_vector)
271
- ds.add_case([1, 2, 3])
272
- ds.add_case('a' => 4, 'b' => 5, 'c' => 6)
273
- ds.add_case([[7, 8, 9], %w(a b c)])
274
- assert_equal({ 'a' => 1, 'b' => 2, 'c' => 3 }, ds.case_as_hash(0))
275
- assert_equal([4, 5, 6], ds.case_as_array(1))
276
- assert_equal([7, 8, 9], ds.case_as_array(2))
277
- assert_equal(%w(a b c), ds.case_as_array(3))
278
- ds.add_case_array([6, 7, 1])
279
- ds.update_valid_data
280
- assert_equal([6, 7, 1], ds.case_as_array(4))
281
- end
282
-
283
- def test_marshaling
284
- ds_marshal = Marshal.load(Marshal.dump(@ds))
285
- assert_equal(ds_marshal, @ds)
286
- end
287
-
288
- def test_range
289
- v1 = [1, 2, 3, 4].to_vector
290
- v2 = [5, 6, 7, 8].to_vector
291
- v3 = [9, 10, 11, 12].to_vector
292
- ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2, 'v3' => v3 }, %w(v3 v2 v1))
293
- assert_same(v1, ds1['v1'])
294
- ds2 = ds1['v2'..'v1']
295
- assert_equal(%w(v2 v1), ds2.fields)
296
- assert_same(ds1['v1'], ds2['v1'])
297
- assert_same(ds1['v2'], ds2['v2'])
298
- end
299
-
300
- def test_clone
301
- v1 = [1, 2, 3, 4].to_vector
302
- v2 = [5, 6, 7, 8].to_vector
303
- ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
304
- ds2 = ds1.clone
305
- assert_equal(ds1, ds2)
306
- assert_not_same(ds1, ds2)
307
- assert_equal(ds1['v1'], ds2['v1'])
308
- assert_same(ds1['v1'], ds2['v1'])
309
- assert_equal(ds1.fields, ds2.fields)
310
- assert_not_same(ds1.fields, ds2.fields)
311
- assert_equal(ds1.cases, ds2.cases)
312
-
313
- # partial clone
314
- ds3 = ds1.clone('v1')
315
- ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
316
- assert_equal(ds_exp, ds3)
317
- assert_not_same(ds_exp, ds3)
318
- assert_equal(ds3['v1'], ds_exp['v1'])
319
- assert_same(ds3['v1'], ds_exp['v1'])
320
- assert_equal(ds3.fields, ds_exp.fields)
321
- assert_equal(ds3.cases, ds_exp.cases)
322
-
323
- assert_not_same(ds3.fields, ds_exp.fields)
324
- end
325
-
326
- def test_dup
327
- v1 = [1, 2, 3, 4].to_vector
328
- v2 = [5, 6, 7, 8].to_vector
329
- ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
330
- ds2 = ds1.dup
331
- assert_equal(ds1, ds2)
332
- assert_not_same(ds1, ds2)
333
- assert_equal(ds1['v1'], ds2['v1'])
334
- assert_not_same(ds1['v1'], ds2['v1'])
335
- assert_equal(ds1.cases, ds2.cases)
336
-
337
- assert_equal(ds1.fields, ds2.fields)
338
- assert_not_same(ds1.fields, ds2.fields)
339
- ds1['v1'].type = :numeric
340
- # dup partial
341
- ds3 = ds1.dup('v1')
342
- ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
343
- assert_equal(ds_exp, ds3)
344
- assert_not_same(ds_exp, ds3)
345
- assert_equal(ds3['v1'], ds_exp['v1'])
346
- assert_not_same(ds3['v1'], ds_exp['v1'])
347
- assert_equal(ds3.fields, ds_exp.fields)
348
- assert_equal(ds3.cases, ds_exp.cases)
349
-
350
- assert_not_same(ds3.fields, ds_exp.fields)
351
-
352
- # empty
353
- ds3 = ds1.dup_empty
354
- assert_not_equal(ds1, ds3)
355
- assert_not_equal(ds1['v1'], ds3['v1'])
356
- assert_equal([], ds3['v1'].data)
357
- assert_equal([], ds3['v2'].data)
358
- assert_equal(:numeric, ds3['v1'].type)
359
- assert_equal(ds1.fields, ds2.fields)
360
- assert_not_same(ds1.fields, ds2.fields)
361
- end
36
+ def test_crosstab_with_asignation
37
+ v1 = Daru::Vector.new(%w(a a a b b b c c c))
38
+ v2 = Daru::Vector.new(%w(a b c a b c a b c))
39
+ v3 = Daru::Vector.new(%w(0 1 0 0 1 1 0 0 1))
362
40
 
363
- def test_from_to
364
- assert_equal(%w(name age city), @ds.from_to('name', 'city'))
365
- assert_raise ArgumentError do
366
- @ds.from_to('name', 'a2')
41
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
42
+ @ds = Statsample::Dataset.crosstab_by_assignation(v1, v2, v3)
367
43
  end
368
- end
369
-
370
- def test_each_array_with_nils
371
- v1 = [1, -99, 3, 4, 'na'].to_vector(:numeric, missing_values: [-99, 'na'])
372
- v2 = [5, 6, -99, 8, 20].to_vector(:numeric, missing_values: [-99])
373
- v3 = [9, 10, 11, 12, 20].to_vector(:numeric, missing_values: [-99])
374
- ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
375
- ds2 = ds1.dup_empty
376
- ds1.each_array_with_nils {|row|
377
- ds2.add_case_array(row)
378
- }
379
- ds2.update_valid_data
380
- assert_equal([1, nil, 3, 4, nil], ds2['v1'].data)
381
- assert_equal([5, 6, nil, 8, 20], ds2['v2'].data)
382
- end
383
44
 
384
- def test_dup_only_valid
385
- v1 = [1, nil, 3, 4].to_vector(:numeric)
386
- v2 = [5, 6, nil, 8].to_vector(:numeric)
387
- v3 = [9, 10, 11, 12].to_vector(:numeric)
388
- ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
389
- ds2 = ds1.dup_only_valid
390
- expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:numeric), 'v2' => [5, 8].to_vector(:numeric), 'v3' => [9, 12].to_vector(:numeric))
391
- assert_equal(expected, ds2)
392
- assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
393
- expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:numeric), 'v3' => [9, 11, 12].to_vector(:numeric))
394
- assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
395
- end
396
-
397
- def test_filter
398
- @ds['age'].type = :numeric
399
- filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
400
- expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :numeric),
401
- 'city' => Statsample::Vector.new(%w(London Paris)),
402
- 'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
403
- assert_equal(expected, filtered)
404
- end
405
-
406
- def test_filter_field
407
- @ds['age'].type = :numeric
408
- filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
409
- expected = [2, 4].to_vector
410
- assert_equal(expected, filtered)
411
- end
412
-
413
- def test_verify
414
- name = %w(r1 r2 r3 r4).to_vector(:object)
415
- v1 = [1, 2, 3, 4].to_vector(:numeric)
416
- v2 = [4, 3, 2, 1].to_vector(:numeric)
417
- v3 = [10, 20, 30, 40].to_vector(:numeric)
418
- v4 = %w(a b a b).to_vector(:object)
419
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
420
- ds.fields = %w(v1 v2 v3 v4 id)
421
- # Correct
422
- t1 = create_test('If v4=a, v1 odd') { |r| r['v4'] == 'b' or (r['v4'] == 'a' and r['v1'].odd?) }
423
- t2 = create_test('v3=v1*10') { |r| r['v3'] == r['v1'] * 10 }
424
- # Fail!
425
- t3 = create_test("v4='b'") { |r| r['v4'] == 'b' }
426
- exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
427
- exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
428
- res = ds.verify(t3, t1, t2)
429
- assert_equal(exp1, res)
430
- res = ds.verify('id', t1, t2, t3)
431
- assert_equal(exp2, res)
432
- end
433
-
434
- def test_compute_operation
435
- v1 = [1, 2, 3, 4].to_vector(:numeric)
436
- v2 = [4, 3, 2, 1].to_vector(:numeric)
437
- v3 = [10, 20, 30, 40].to_vector(:numeric)
438
- vnumeric = [1.quo(2), 1, 3.quo(2), 2].to_vector(:numeric)
439
- vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:numeric)
440
- vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:numeric)
441
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
442
- assert_equal(vnumeric, ds.compute('v1/2'))
443
- assert_equal(vsum, ds.compute('v1+v2+v3'))
444
- assert_equal(vmult, ds.compute('v1*v2'))
445
- end
446
-
447
- def test_crosstab_with_asignation
448
- v1 = %w(a a a b b b c c c).to_vector
449
- v2 = %w(a b c a b c a b c).to_vector
450
- v3 = %w(0 1 0 0 1 1 0 0 1).to_numeric
451
- ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
452
- assert_equal(:object, ds['_id'].type)
453
- assert_equal(:numeric, ds['a'].type)
454
- assert_equal(:numeric, ds['b'].type)
455
- ev_id = %w(a b c).to_vector
456
- ev_a = %w(0 0 0).to_numeric
457
- ev_b = %w(1 1 0).to_numeric
458
- ev_c = %w(0 1 1).to_numeric
459
- ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
460
- assert_equal(ds, ds2)
461
- end
462
-
463
- def test_one_to_many
464
- cases = [
465
- ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
466
- ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
467
- ['3', 'alfred', nil, nil, nil, nil, nil, nil]
468
- ]
469
- ds = Statsample::Dataset.new(%w(id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3))
470
- cases.each { |c| ds.add_case_array c }
471
- ds.update_valid_data
472
- ids = %w(1 1 2 2 2).to_vector
473
- colors = %w(red blue green orange white).to_vector
474
- values = [10, 20, 15, 30, 20].to_vector
475
- col_ids = [1, 2, 1, 2, 3].to_numeric
476
- ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
477
- assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
45
+ assert_output(nil, "WARNING: Daru uses symbols instead of strings for naming vectors. Please switch to symbols.\n") do
46
+ assert_equal(:object, @ds['_id'].type)
47
+ end
478
48
  end
479
49
  end