daru 0.1.3.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = '0.1.3.1'.freeze
2
+ VERSION = '0.1.4'.freeze
3
3
  end
@@ -0,0 +1,23 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'ruby-prof'
4
+ require 'fileutils'
5
+
6
+ require 'daru'
7
+
8
+ def __profile__(name = nil)
9
+ # infers name to be "sorting" when called from "profile/sorting.rb:10:in `<main>'"
10
+ name ||= caller.first.split(':').first.split('/').last.sub('.rb', '')
11
+
12
+ path = File.expand_path("../out/#{name}.html", __FILE__)
13
+ FileUtils.mkdir_p File.dirname(path)
14
+
15
+ RubyProf.start
16
+
17
+ yield
18
+
19
+ res = RubyProf.stop
20
+ RubyProf::GraphHtmlPrinter.new(res)
21
+ .print(File.open(path, 'w'))
22
+
23
+ end
@@ -0,0 +1,10 @@
1
+ require_relative '_base'
2
+
3
+ n = 40_000
4
+ keys = (1..(n)).to_a
5
+
6
+ df = Daru::DataFrame.new(idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]})
7
+
8
+ __profile__ do
9
+ df.to_a
10
+ end
@@ -0,0 +1,13 @@
1
+ require_relative '_base'
2
+
3
+ df = Daru::DataFrame.new({
4
+ a: [1,2,3,4,5,6]*1000,
5
+ b: ['a','b','c','d','e','f']*1000,
6
+ c: [11,22,33,44,55,66]*1000
7
+ }, index: (1..6000).to_a.shuffle)
8
+
9
+ __profile__ do
10
+ df.filter(:row) do |r|
11
+ r[:a] == 2 or r[:c] == 55
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ require_relative '_base'
2
+
3
+ n = 40_000
4
+ keys = (1..(n)).to_a
5
+ base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
6
+ lookup_hash = keys.map { |k| [k, k * 100]}.to_h
7
+
8
+ base_data_df = Daru::DataFrame.new(base_data)
9
+ lookup_df = Daru::DataFrame.new(keys: lookup_hash.keys, values: lookup_hash.values)
10
+
11
+ __profile__ do
12
+ base_data_df.join(lookup_df, on: [:keys], how: :inner)
13
+ end
@@ -0,0 +1,12 @@
1
+ require_relative '_base'
2
+
3
+ vector = Daru::Vector.new(10_000.times.map.to_a.shuffle)
4
+ df = Daru::DataFrame.new({
5
+ a: vector,
6
+ b: vector,
7
+ c: vector
8
+ })
9
+
10
+ __profile__ do
11
+ df.sort([:a])
12
+ end
@@ -0,0 +1,9 @@
1
+ require_relative '_base'
2
+
3
+ vector = Daru::Vector.new(['a','b','c','d','e','f']*1000, index: (1..6000).to_a.shuffle)
4
+
5
+ __profile__ do
6
+ 100.times do
7
+ vector.each_with_index{|val, i| }
8
+ end
9
+ end
@@ -1,5 +1,3 @@
1
- require 'spec_helper.rb'
2
-
3
1
  describe Daru::Accessors::NMatrixWrapper do
4
2
  before :each do
5
3
  stub_context = Object.new
@@ -32,7 +30,7 @@ describe Daru::Accessors::NMatrixWrapper do
32
30
  end
33
31
 
34
32
  describe Daru::Accessors::ArrayWrapper do
35
-
33
+
36
34
  end
37
35
 
38
36
  describe Daru::Accessors::GSLWrapper do
@@ -84,4 +82,4 @@ describe Daru::Accessors::GSLWrapper do
84
82
  )
85
83
  end
86
84
  end
87
- end
85
+ end
@@ -0,0 +1,1734 @@
1
+ describe Daru::Vector, "categorical" do
2
+ context "initialize" do
3
+ context "default parameters" do
4
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
5
+ subject { dv }
6
+
7
+ it { is_expected.to be_a Daru::Vector }
8
+ its(:size) { is_expected.to eq 5 }
9
+ its(:type) { is_expected.to eq :category }
10
+ its(:ordered?) { is_expected.to eq false }
11
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1, :c] }
12
+ its(:base_category) { is_expected.to eq :a }
13
+ its(:coding_scheme) { is_expected.to eq :dummy }
14
+ its(:index) { is_expected.to be_a Daru::Index }
15
+ its(:'index.to_a') { is_expected.to eq [0, 1, 2, 3, 4] }
16
+ end
17
+
18
+ context "with index" do
19
+ context "as array" do
20
+ let(:dv) do
21
+ Daru::Vector.new [:a, 1, :a, 1, :c],
22
+ type: :category,
23
+ index: ['a', 'b', 'c', 'd', 'e']
24
+ end
25
+ subject { dv }
26
+
27
+ its(:index) { is_expected.to be_a Daru::Index }
28
+ its(:'index.to_a') { is_expected.to eq ['a', 'b', 'c', 'd', 'e'] }
29
+ end
30
+
31
+ context "as range" do
32
+ let(:dv) do
33
+ Daru::Vector.new [:a, 1, :a, 1, :c],
34
+ type: :category,
35
+ index: 'a'..'e'
36
+ end
37
+ subject { dv }
38
+
39
+ its(:index) { is_expected.to be_a Daru::Index }
40
+ its(:'index.to_a') { is_expected.to eq ['a', 'b', 'c', 'd', 'e'] }
41
+ end
42
+
43
+ context "as index object" do
44
+ let(:tuples) do
45
+ [
46
+ [:one, :tin, :bar],
47
+ [:one, :pin, :bar],
48
+ [:two, :pin, :bar],
49
+ [:two, :tin, :bar],
50
+ [:thr, :pin, :foo]
51
+ ]
52
+ end
53
+ let(:idx) { Daru::MultiIndex.from_tuples tuples }
54
+ let(:dv) do
55
+ Daru::Vector.new [:a, 1, :a, 1, :c],
56
+ type: :category,
57
+ index: idx
58
+ end
59
+ subject { dv }
60
+
61
+ its(:index) { is_expected.to be_a Daru::MultiIndex }
62
+ its(:'index.to_a') { is_expected.to eq tuples }
63
+ end
64
+
65
+ context "invalid index" do
66
+ it { expect { Daru::Vector.new [1, 1, 2],
67
+ type: :category,
68
+ index: [1, 2]
69
+ }.to raise_error ArgumentError }
70
+ end
71
+ end
72
+
73
+ context '#category?' do
74
+ let(:non_cat) { Daru::Vector.new [1, 2, 3] }
75
+ let(:cat) { Daru::Vector.new [1, 2, 3], type: :category }
76
+ it { expect(non_cat.category?).to eq false }
77
+ it { expect(cat.category?).to eq true }
78
+ end
79
+
80
+ context "with categories" do
81
+ context "extra categories" do
82
+ subject { Daru::Vector.new [:a, 1, :a, 1, :c],
83
+ type: :category, categories: [:a, :b, :c, 1] }
84
+
85
+ it { is_expected.to be_a Daru::Vector }
86
+ its(:type) { is_expected.to eq :category }
87
+ its(:size) { is_expected.to eq 5 }
88
+ its(:order) { is_expected.to eq [:a, :b, :c, 1] }
89
+ its(:categories) { is_expected.to eq [:a, :b, :c, 1] }
90
+ end
91
+
92
+ context "incomplete" do
93
+ it do
94
+ expect { Daru::Vector.new [:a, 1, :a, 1, :c],
95
+ type: :category, categories: [:b, :c, 1] }.
96
+ to raise_error ArgumentError
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ context "#rename" do
103
+ let(:dv) { Daru::Vector.new [1, 2, 1], type: :category }
104
+ subject { dv.rename 'hello' }
105
+
106
+ it { is_expected.to be_a Daru::Vector }
107
+ its(:name) { is_expected.to eq 'hello' }
108
+ end
109
+
110
+ context '#index=' do
111
+ context Daru::Index do
112
+ let(:idx) { Daru::Index.new [1, 2, 3] }
113
+ let(:dv) { Daru::Vector.new ['a', 'b', 'c'], type: :category }
114
+ before { dv.index = idx }
115
+ subject { dv }
116
+
117
+ it { is_expected.to be_a Daru::Vector }
118
+ its(:index) { is_expected.to be_a Daru::Index }
119
+ its(:'index.to_a') { is_expected.to eq [1, 2, 3] }
120
+ end
121
+
122
+ context Range do
123
+ let(:dv) { Daru::Vector.new ['a', 'b', 'c'], type: :category }
124
+ before { dv.index = 1..3 }
125
+ subject { dv }
126
+
127
+ it { is_expected.to be_a Daru::Vector }
128
+ its(:index) { is_expected.to be_a Daru::Index }
129
+ its(:'index.to_a') { is_expected.to eq [1, 2, 3] }
130
+ end
131
+
132
+ context Daru::MultiIndex do
133
+ let(:idx) { Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one]] }
134
+ let(:dv) { Daru::Vector.new ['a', 'b', 'c'], type: :category }
135
+ before { dv.index = idx }
136
+ subject { dv }
137
+
138
+ it { is_expected.to be_a Daru::Vector }
139
+ its(:index) { is_expected.to be_a Daru::MultiIndex }
140
+ its(:'index.to_a') { is_expected.to eq [[:a, :one], [:a, :two], [:b, :one]] }
141
+ end
142
+ end
143
+
144
+ context "#cut" do
145
+ context "close at right end" do
146
+ let(:dv) { Daru::Vector.new [1, 2, 5, 14] }
147
+ subject { dv.cut (0..20).step(5) }
148
+
149
+ it { is_expected.to be_a Daru::Vector }
150
+ its(:type) { is_expected.to eq :category }
151
+ its(:size) { is_expected.to eq 4 }
152
+ its(:categories) { is_expected.to eq ['0-4', '5-9', '10-14', '15-19'] }
153
+ its(:to_a) { is_expected.to eq ['0-4', '0-4', '5-9', '10-14'] }
154
+ end
155
+
156
+ context "close at left end" do
157
+ let(:dv) { Daru::Vector.new [1, 2, 5, 14] }
158
+ subject { dv.cut (0..20).step(5), close_at: :left }
159
+
160
+ it { is_expected.to be_a Daru::Vector }
161
+ its(:type) { is_expected.to eq :category }
162
+ its(:size) { is_expected.to eq 4 }
163
+ its(:categories) { is_expected.to eq ['1-5', '6-10', '11-15', '16-20'] }
164
+ its(:to_a) { is_expected.to eq ['1-5', '1-5', '1-5', '11-15'] }
165
+ end
166
+
167
+ context "labels" do
168
+ let(:dv) { Daru::Vector.new [1, 2, 5, 14] }
169
+ subject { dv.cut (0..20).step(5), close_at: :left, labels: [:a, :b, :c, :d] }
170
+
171
+ it { is_expected.to be_a Daru::Vector }
172
+ its(:type) { is_expected.to eq :category }
173
+ its(:size) { is_expected.to eq 4 }
174
+ its(:categories) { is_expected.to eq [:a, :b, :c, :d] }
175
+ its(:to_a) { is_expected.to eq [:a, :a, :a, :c] }
176
+ end
177
+ end
178
+
179
+ context "#each" do
180
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c] }
181
+ subject { dv.each }
182
+
183
+ it { is_expected.to be_a Enumerator }
184
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1, :c] }
185
+ end
186
+
187
+ context "#to_a" do
188
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c] }
189
+ subject { dv.to_a }
190
+
191
+ it { is_expected.to be_a Array }
192
+ its(:size) { is_expected.to eq 5 }
193
+ it { is_expected.to eq [:a, 1, :a, 1, :c] }
194
+ end
195
+
196
+ context "#dup" do
197
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
198
+ before do
199
+ dv.categories = [:a, :b, :c, 1]
200
+ dv.name = 'daru'
201
+ dv.ordered = true
202
+ end
203
+ subject { dv.dup }
204
+
205
+ its(:type) { is_expected.to eq :category }
206
+ its(:ordered?) { is_expected.to eq true }
207
+ its(:categories) { is_expected.to eq [:a, :b, :c, 1] }
208
+ its(:name) { is_expected.to eq 'daru' }
209
+ end
210
+
211
+ context "#add_category" do
212
+ context "single category" do
213
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
214
+ subject { dv }
215
+ before { dv.add_category :b }
216
+
217
+ its(:categories) { is_expected.to eq [:a, 1, :c, :b] }
218
+ its(:order) { is_expected.to eq [:a, 1, :c, :b] }
219
+ end
220
+
221
+ context "multiple categories" do
222
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
223
+ subject { dv }
224
+ before { dv.add_category :b, :d }
225
+
226
+ its(:categories) { is_expected.to eq [:a, 1, :c, :b, :d] }
227
+ its(:order) { is_expected.to eq [:a, 1, :c, :b, :d] }
228
+ end
229
+ end
230
+
231
+ context '#remove_unused_categories' do
232
+ context 'base category not removed' do
233
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
234
+ before do
235
+ dv.categories = [:a, :b, :c, 1]
236
+ dv.base_category = 1
237
+ dv.remove_unused_categories
238
+ end
239
+ subject { dv }
240
+
241
+ its(:categories) { is_expected.to eq [:a, :c, 1] }
242
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1, :c] }
243
+ its(:base_category) { is_expected.to eq 1 }
244
+ end
245
+
246
+ context 'base category removed' do
247
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
248
+ before do
249
+ dv.categories = [:a, :b, :c, 1]
250
+ dv.base_category = :b
251
+ dv.remove_unused_categories
252
+ end
253
+ subject { dv }
254
+
255
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1, :c] }
256
+ its(:categories) { is_expected.to eq [:a, :c, 1] }
257
+ its(:base_category) { is_expected.to eq :a }
258
+ end
259
+ end
260
+
261
+ context "count" do
262
+ context "existant category" do
263
+ context "more than 0" do
264
+ subject(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
265
+
266
+ it { expect(dv.count :a).to eq 2 }
267
+ end
268
+
269
+ context "equal to 0" do
270
+ subject(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
271
+ before { dv.add_category :b }
272
+
273
+ it { expect(dv.count :b).to eq 0 }
274
+ end
275
+ end
276
+
277
+ context "non existant category" do
278
+ subject(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
279
+
280
+ it { expect { dv.count :k }.to raise_error ArgumentError }
281
+ end
282
+ end
283
+
284
+ context "#frequencies" do
285
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c],
286
+ type: :category,
287
+ name: :hello,
288
+ categories: [:a, :b, :c, :d, 1] }
289
+ context "counts" do
290
+ subject { dv.frequencies }
291
+
292
+ its(:'index.to_a') { is_expected.to eq [:a, :b, :c, :d, 1] }
293
+ its(:to_a) { is_expected.to eq [2, 0, 1, 0, 2] }
294
+ its(:name) { is_expected.to eq :hello }
295
+ end
296
+ context "percentage" do
297
+ subject { dv.frequencies :percentage }
298
+
299
+ its(:'index.to_a') { is_expected.to eq [:a, :b, :c, :d, 1] }
300
+ its(:to_a) { is_expected.to eq [40, 0, 20, 0, 40] }
301
+ end
302
+ context "fraction" do
303
+ subject { dv.frequencies :fraction }
304
+
305
+ its(:'index.to_a') { is_expected.to eq [:a, :b, :c, :d, 1] }
306
+ its(:to_a) { is_expected.to eq [0.4, 0, 0.2, 0, 0.4] }
307
+ end
308
+ end
309
+
310
+ context "#to_category" do
311
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], index: 1..5 }
312
+ subject { dv.to_category ordered: true, categories: [:a, :b, :c, 1] }
313
+
314
+ it { is_expected.to be_a Daru::Vector }
315
+ its(:size) { is_expected.to eq 5 }
316
+ its(:type) { is_expected.to eq :category }
317
+ its(:'index.to_a') { is_expected.to eq [1, 2, 3, 4, 5] }
318
+ its(:ordered?) { is_expected.to eq true }
319
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1, :c] }
320
+ its(:categories) { is_expected.to eq [:a, :b, :c, 1] }
321
+ end
322
+
323
+ context "#categories" do
324
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
325
+ subject { dv.categories }
326
+
327
+ it { is_expected.to be_a Array }
328
+ its(:size) { is_expected.to eq 3 }
329
+ its(:'to_a') { is_expected.to eq [:a, 1, :c] }
330
+ end
331
+
332
+ context "#categories=" do
333
+ context "extra categories" do
334
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c],
335
+ type: :category }
336
+ before { dv.categories = [:c, :b, :a, 1] }
337
+ subject { dv }
338
+
339
+ it { is_expected.to be_a Daru::Vector }
340
+ its(:type) { is_expected.to eq :category }
341
+ its(:categories) { is_expected.to eq [:c, :b, :a, 1] }
342
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1, :c] }
343
+ its(:base_category) { is_expected.to eq :a }
344
+ end
345
+
346
+ context "incomplete" do
347
+ subject { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
348
+
349
+ it do
350
+ expect { subject.categories = [:b, :c, 1] }.
351
+ to raise_error ArgumentError
352
+ end
353
+ end
354
+ end
355
+
356
+ context "#base_category" do
357
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
358
+ subject { dv }
359
+ before { dv.base_category = 1 }
360
+
361
+ its(:base_category) { is_expected.to eq 1 }
362
+ end
363
+
364
+ context "#coding_scheme" do
365
+ context "valid coding scheme" do
366
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
367
+ subject { dv }
368
+ before { dv.coding_scheme = :deviation }
369
+
370
+ its(:coding_scheme) { is_expected.to eq :deviation }
371
+ end
372
+
373
+ context "invalid coding scheme" do
374
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
375
+
376
+ it { expect { dv.coding_scheme = :foo }.to raise_error ArgumentError }
377
+ end
378
+ end
379
+
380
+ context "#rename_categories" do
381
+ context 'rename base category' do
382
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category,
383
+ categories: [:a, :x, :y, :c, :b, 1]}
384
+ subject { dv.rename_categories :a => 1, 1 => 2 }
385
+
386
+ it { is_expected.to be_a Daru::Vector }
387
+ its(:to_a) { is_expected.to eq [1, 2, 1, 2, :c] }
388
+ its(:categories) { is_expected.to eq [:x, :y, :c, :b, 1, 2] }
389
+ its(:base_category) { is_expected.to eq 1 }
390
+ end
391
+
392
+ context 'rename non-base category' do
393
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category,
394
+ categories: [:a, :b, :c, 1] }
395
+ subject { dv.rename_categories 1 => 2 }
396
+
397
+ it { is_expected.to be_a Daru::Vector }
398
+ its(:to_a) { is_expected.to eq [:a, 2, :a, 2, :c] }
399
+ its(:categories) { is_expected.to eq [:a, :b, :c, 2] }
400
+ its(:base_category) { is_expected.to eq :a }
401
+ end
402
+
403
+ context 'merge' do
404
+ let(:dv) { Daru::Vector.new [:a, :b, :c, :b, :e], type: :category }
405
+ before { dv.categories = [:a, :b, :c, :d, :e, :f] }
406
+ subject { dv.rename_categories :d => :a, :c => 1, :e => 1 }
407
+
408
+ it { is_expected.to be_a Daru::Vector }
409
+ its(:categories) { is_expected.to eq [:a, :b, :f, 1] }
410
+ its(:to_a) { is_expected.to eq [:a, :b, 1, :b, 1] }
411
+ end
412
+ end
413
+
414
+ context '#to_non_category' do
415
+ let(:dv) { Daru::Vector.new [1, 2, 3], type: :category,
416
+ index: [:a, :b, :c], name: :hello }
417
+ subject { dv.to_non_category }
418
+
419
+ it { is_expected.to be_a Daru::Vector }
420
+ its(:type) { is_expected.not_to eq :category }
421
+ its(:to_a) { is_expected.to eq [1, 2, 3] }
422
+ its(:'index.to_a') { is_expected.to eq [:a, :b, :c] }
423
+ its(:name) { is_expected.to eq :hello }
424
+ end
425
+
426
+ context '#to_category' do
427
+ let(:dv) { Daru::Vector.new [1, 2, 3], type: :category }
428
+ it { expect(dv.to_category).to eq dv }
429
+ end
430
+
431
+ context '#reindex!' do
432
+ context Daru::Index do
433
+ let(:dv) { Daru::Vector.new [3, 2, 1, 3, 2, 1],
434
+ index: 'a'..'f', type: :category, categories: [1, 2, 3, 4] }
435
+ before { dv.reindex! ['e', 'f', 'a', 'b', 'c', 'd'] }
436
+ subject { dv }
437
+
438
+ it { is_expected.to be_a Daru::Vector }
439
+ its(:categories) { is_expected.to eq [1, 2, 3, 4] }
440
+ its(:to_a) { is_expected.to eq [2, 1, 3, 2, 1, 3] }
441
+ end
442
+
443
+ context Daru::MultiIndex do
444
+ let(:tuples) do
445
+ [
446
+ [:a,:one,:baz],
447
+ [:a,:two,:bar],
448
+ [:a,:two,:baz],
449
+ [:b,:one,:bar],
450
+ [:b,:two,:bar],
451
+ [:b,:two,:baz]
452
+ ]
453
+ end
454
+ let(:idx) { Daru::MultiIndex.from_tuples tuples }
455
+ let(:dv) { Daru::Vector.new [3, 2, 1, 3, 2, 1],
456
+ index: idx, type: :category, categories: [1, 2, 3, 4] }
457
+ before { dv.reindex! [4, 5, 0, 1, 2, 3].map { |i| tuples[i] } }
458
+ subject { dv }
459
+
460
+ it { is_expected.to be_a Daru::Vector }
461
+ its(:categories) { is_expected.to eq [1, 2, 3, 4] }
462
+ its(:to_a) { is_expected.to eq [2, 1, 3, 2, 1, 3] }
463
+ its(:'index.to_a') { is_expected.to eq [4, 5, 0, 1, 2, 3]
464
+ .map { |i| tuples[i] } }
465
+ end
466
+
467
+ context 'invalid index' do
468
+ let(:dv) { Daru::Vector.new [1, 2, 3], type: :category }
469
+
470
+ it { expect { dv.reindex! [1, 1, 1] }.to raise_error ArgumentError }
471
+ end
472
+ end
473
+
474
+ context '#reorder!' do
475
+ context 'valid order' do
476
+ let(:dv) { Daru::Vector.new [3, 2, 1, 3, 2, 1], index: 'a'..'f', type: :category }
477
+ before { dv.reorder! [5, 4, 3, 2, 1, 0] }
478
+ subject { dv }
479
+
480
+ it { is_expected.to be_a Daru::Vector }
481
+ its(:categories) { is_expected.to eq [1, 2, 3] }
482
+ its(:to_a) { is_expected.to eq [1, 2, 3, 1, 2, 3] }
483
+ end
484
+
485
+ context 'invalid order' do
486
+ let(:dv) { Daru::Vector.new [1, 2, 3], type: :category }
487
+
488
+ it { expect { dv.reorder! [1, 1, 1] }.to raise_error ArgumentError }
489
+ end
490
+ end
491
+
492
+ context "#min" do
493
+ context "ordered" do
494
+ context "default ordering" do
495
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, ordered: true }
496
+
497
+ it { expect(dv.min).to eq :a }
498
+ end
499
+
500
+ context "reorder" do
501
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, ordered: true }
502
+ before { dv.categories = [1, :a, :c] }
503
+
504
+ it { expect(dv.min).to eq 1 }
505
+ end
506
+ end
507
+
508
+ context "unordered" do
509
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category }
510
+
511
+ it { expect { dv.min }.to raise_error ArgumentError }
512
+ end
513
+ end
514
+
515
+ context "#max" do
516
+ context "ordered" do
517
+ context "default ordering" do
518
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, ordered: true }
519
+
520
+ it { expect(dv.max).to eq :c }
521
+ end
522
+
523
+ context "reorder" do
524
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, ordered: true }
525
+ before { dv.categories = [1, :c, :a] }
526
+
527
+ it { expect(dv.max).to eq :a }
528
+ end
529
+ end
530
+
531
+ context "unordered" do
532
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c, :a], type: :category }
533
+
534
+ it { expect { dv.max }.to raise_error ArgumentError }
535
+ end
536
+ end
537
+
538
+ context "summary" do
539
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c, :a], type: :category }
540
+ subject { dv.describe }
541
+
542
+ it { is_expected.to be_a Daru::Vector }
543
+ its(:categories) { is_expected.to eq 3 }
544
+ its(:max_freq) { is_expected.to eq 3 }
545
+ its(:max_category) { is_expected.to eq :a }
546
+ its(:min_freq) { is_expected.to eq 1 }
547
+ its(:min_category) { is_expected.to eq :c }
548
+ end
549
+
550
+ context "#sort!" do
551
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, ordered: true }
552
+ subject { dv }
553
+ before { dv.categories = [:c, :a, 1]; dv.sort! }
554
+
555
+ it { is_expected.to be_a Daru::Vector }
556
+ its(:size) { is_expected.to eq 5 }
557
+ its(:to_a) { is_expected.to eq [:c, :a, :a, 1, 1] }
558
+ its(:'index.to_a') { is_expected.to eq [4, 0, 2, 1, 3] }
559
+ end
560
+
561
+ context "#sort" do
562
+ context 'return sorted vector' do
563
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, ordered: true }
564
+ subject { dv.sort }
565
+ before { dv.categories = [:c, :a, 1] }
566
+
567
+ it { is_expected.to be_a Daru::Vector }
568
+ its(:size) { is_expected.to eq 5 }
569
+ its(:to_a) { is_expected.to eq [:c, :a, :a, 1, 1] }
570
+ its(:'index.to_a') { is_expected.to eq [4, 0, 2, 1, 3] }
571
+ end
572
+
573
+ context 'original vector unaffected' do
574
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, ordered: true }
575
+ subject { dv }
576
+ before { dv.categories = [:c, :a, 1]; dv.sort }
577
+
578
+ it { is_expected.to be_a Daru::Vector }
579
+ its(:size) { is_expected.to eq 5 }
580
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1, :c] }
581
+ its(:'index.to_a') { is_expected.to eq [0, 1, 2, 3, 4] }
582
+ end
583
+ end
584
+
585
+ context "#[]" do
586
+ context Daru::Index do
587
+ before :each do
588
+ @dv = Daru::Vector.new [1,2,3,4,5], name: :yoga,
589
+ index: [:yoda, :anakin, :obi, :padme, :r2d2], type: :category
590
+ end
591
+
592
+ it "returns an element after passing an index" do
593
+ expect(@dv[:yoda]).to eq(1)
594
+ end
595
+
596
+ it "returns an element after passing a numeric index" do
597
+ expect(@dv[0]).to eq(1)
598
+ end
599
+
600
+ it "returns a vector with given indices for multiple indices" do
601
+ expect(@dv[:yoda, :anakin]).to eq(Daru::Vector.new([1,2], name: :yoda,
602
+ index: [:yoda, :anakin], type: :category))
603
+ end
604
+
605
+ it "returns a vector with given indices for multiple numeric indices" do
606
+ expect(@dv[0,1]).to eq(Daru::Vector.new([1,2], name: :yoda,
607
+ index: [:yoda, :anakin], type: :category))
608
+ end
609
+
610
+ it "returns a vector when specified symbol Range" do
611
+ expect(@dv[:yoda..:anakin]).to eq(Daru::Vector.new([1,2],
612
+ index: [:yoda, :anakin], name: :yoga, type: :category))
613
+ end
614
+
615
+ it "returns a vector when specified numeric Range" do
616
+ expect(@dv[3..4]).to eq(Daru::Vector.new([4,5], name: :yoga,
617
+ index: [:padme, :r2d2], type: :category))
618
+ end
619
+
620
+ it "returns correct results for index of multiple index" do
621
+ v = Daru::Vector.new([1,2,3,4], index: ['a','c',1,:a], type: :category)
622
+ expect(v['a']).to eq(1)
623
+ expect(v[:a]).to eq(4)
624
+ expect(v[1]).to eq(3)
625
+ expect(v[0]).to eq(1)
626
+ end
627
+
628
+ it "raises exception for invalid index" do
629
+ expect { @dv[:foo] }.to raise_error(IndexError)
630
+ expect { @dv[:obi, :foo] }.to raise_error(IndexError)
631
+ end
632
+
633
+ context "preserves old categories" do
634
+ let(:dv) do
635
+ Daru::Vector.new [:a, :a, :b, :c, :b],
636
+ type: :category,
637
+ categories: [:c, :b, :a, :e]
638
+ end
639
+ subject { dv[0, 1, 4] }
640
+
641
+ it { is_expected.to be_a Daru::Vector }
642
+ its(:categories) { is_expected.to eq [:c, :b, :a, :e] }
643
+ its(:to_a) { is_expected.to eq [:a, :a, :b] }
644
+ end
645
+ end
646
+
647
+ context Daru::MultiIndex do
648
+ before do
649
+ @tuples = [
650
+ [:a,:one,:bar],
651
+ [:a,:one,:baz],
652
+ [:a,:two,:bar],
653
+ [:a,:two,:baz],
654
+ [:b,:one,:bar],
655
+ [:b,:two,:bar],
656
+ [:b,:two,:baz],
657
+ [:b,:one,:foo],
658
+ [:c,:one,:bar],
659
+ [:c,:one,:baz],
660
+ [:c,:two,:foo],
661
+ [:c,:two,:bar],
662
+ [:d,:one,:foo]
663
+ ]
664
+ @multi_index = Daru::MultiIndex.from_tuples(@tuples)
665
+ @vector = Daru::Vector.new(
666
+ Array.new(13) { |i| i }, index: @multi_index,
667
+ name: :mi_vector, type: :category)
668
+ end
669
+
670
+ it "returns a single element when passed a row number" do
671
+ expect(@vector[1]).to eq(1)
672
+ end
673
+
674
+ it "returns a single element when passed the full tuple" do
675
+ expect(@vector[:a, :one, :baz]).to eq(1)
676
+ end
677
+
678
+ it "returns sub vector when passed first layer of tuple" do
679
+ mi = Daru::MultiIndex.from_tuples([
680
+ [:one,:bar],
681
+ [:one,:baz],
682
+ [:two,:bar],
683
+ [:two,:baz]])
684
+ expect(@vector[:a]).to eq(Daru::Vector.new([0,1,2,3], index: mi,
685
+ name: :sub_vector, type: :category))
686
+ end
687
+
688
+ it "returns sub vector when passed first and second layer of tuple" do
689
+ mi = Daru::MultiIndex.from_tuples([
690
+ [:foo],
691
+ [:bar]])
692
+ expect(@vector[:c,:two]).to eq(Daru::Vector.new([10,11], index: mi,
693
+ name: :sub_sub_vector, type: :category))
694
+ end
695
+
696
+ it "returns sub vector not a single element when passed the partial tuple" do
697
+ mi = Daru::MultiIndex.from_tuples([[:foo]])
698
+ expect(@vector[:d, :one]).to eq(Daru::Vector.new([12], index: mi,
699
+ name: :sub_sub_vector, type: :category))
700
+ end
701
+
702
+ it "returns a vector with corresponding MultiIndex when specified numeric Range" do
703
+ mi = Daru::MultiIndex.from_tuples([
704
+ [:a,:two,:baz],
705
+ [:b,:one,:bar],
706
+ [:b,:two,:bar],
707
+ [:b,:two,:baz],
708
+ [:b,:one,:foo],
709
+ [:c,:one,:bar],
710
+ [:c,:one,:baz]
711
+ ])
712
+ expect(@vector[3..9]).to eq(Daru::Vector.new([3,4,5,6,7,8,9], index: mi,
713
+ name: :slice, type: :category))
714
+ end
715
+
716
+ it "raises exception for invalid index" do
717
+ expect { @vector[:foo] }.to raise_error(IndexError)
718
+ expect { @vector[:a, :two, :foo] }.to raise_error(IndexError)
719
+ expect { @vector[:x, :one] }.to raise_error(IndexError)
720
+ end
721
+ end
722
+
723
+ context Daru::CategoricalIndex do
724
+ context "non-numerical index" do
725
+ let (:idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
726
+ let (:dv) { Daru::Vector.new 'a'..'e', index: idx, type: :category }
727
+
728
+ context "single category" do
729
+ context "multiple instances" do
730
+ subject { dv[:a] }
731
+
732
+ it { is_expected.to be_a Daru::Vector }
733
+ its(:type) { is_expected.to eq :category }
734
+ its(:size) { is_expected.to eq 3 }
735
+ its(:to_a) { is_expected.to eq ['a', 'c', 'd'] }
736
+ its(:index) { is_expected.to eq(
737
+ Daru::CategoricalIndex.new([:a, :a, :a])) }
738
+ end
739
+
740
+ context "single instance" do
741
+ subject { dv[:c] }
742
+
743
+ it { is_expected.to eq 'e' }
744
+ end
745
+ end
746
+
747
+ context "multiple categories" do
748
+ subject { dv[:a, :c] }
749
+
750
+ it { is_expected.to be_a Daru::Vector }
751
+ its(:type) { is_expected.to eq :category }
752
+ its(:size) { is_expected.to eq 4 }
753
+ its(:to_a) { is_expected.to eq ['a', 'c', 'd', 'e'] }
754
+ its(:index) { is_expected.to eq(
755
+ Daru::CategoricalIndex.new([:a, :a, :a, :c])) }
756
+ end
757
+
758
+ context "multiple positional indexes" do
759
+ subject { dv[0, 1, 2] }
760
+
761
+ it { is_expected.to be_a Daru::Vector }
762
+ its(:type) { is_expected.to eq :category }
763
+ its(:size) { is_expected.to eq 3 }
764
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
765
+ its(:index) { is_expected.to eq(
766
+ Daru::CategoricalIndex.new([:a, :b, :a])) }
767
+ end
768
+
769
+ context "single positional index" do
770
+ subject { dv[1] }
771
+
772
+ it { is_expected.to eq 'b' }
773
+ end
774
+
775
+ context "invalid category" do
776
+ it { expect { dv[:x] }.to raise_error IndexError }
777
+ end
778
+
779
+ context "invalid positional index" do
780
+ it { expect { dv[30] }.to raise_error IndexError }
781
+ end
782
+ end
783
+
784
+ context "numerical index" do
785
+ let (:idx) { Daru::CategoricalIndex.new [1, 1, 2, 2, 3] }
786
+ let (:dv) { Daru::Vector.new 'a'..'e', index: idx, type: :category }
787
+
788
+ context "single category" do
789
+ context "multiple instances" do
790
+ subject { dv[1] }
791
+
792
+ it { is_expected.to be_a Daru::Vector }
793
+ its(:type) { is_expected.to eq :category }
794
+ its(:size) { is_expected.to eq 2 }
795
+ its(:to_a) { is_expected.to eq ['a', 'b'] }
796
+ its(:index) { is_expected.to eq(
797
+ Daru::CategoricalIndex.new([1, 1])) }
798
+ end
799
+
800
+ context "single instance" do
801
+ subject { dv[3] }
802
+
803
+ it { is_expected.to eq 'e' }
804
+ end
805
+ end
806
+ end
807
+ end
808
+ end
809
+
810
+ context "#[]=" do
811
+ context Daru::Index do
812
+ before :each do
813
+ @dv = Daru::Vector.new [1,2,3,4,5], name: :yoga,
814
+ index: [:yoda, :anakin, :obi, :padme, :r2d2], type: :category
815
+ @dv.add_category 666
816
+ end
817
+
818
+ it "assigns at the specified index" do
819
+ @dv[:yoda] = 666
820
+ expect(@dv[:yoda]).to eq(666)
821
+ end
822
+
823
+ it "assigns at the specified Integer index" do
824
+ @dv[0] = 666
825
+ expect(@dv[:yoda]).to eq(666)
826
+ end
827
+
828
+ it "assigns correctly for a mixed index Vector" do
829
+ v = Daru::Vector.new [1,2,3,4], index: ['a',:a,0,66], type: :category
830
+ v.add_category 666
831
+ v['a'] = 666
832
+ expect(v['a']).to eq(666)
833
+
834
+ v[0] = 666
835
+ expect(v[0]).to eq(666)
836
+
837
+ v[3] = 666
838
+ expect(v[3]).to eq(666)
839
+
840
+ expect(v).to eq(Daru::Vector.new([666,2,666,666],
841
+ index: ['a',:a,0,66], type: :category))
842
+ end
843
+ end
844
+
845
+ context Daru::MultiIndex do
846
+ before :each do
847
+ @tuples = [
848
+ [:a,:one,:bar],
849
+ [:a,:one,:baz],
850
+ [:a,:two,:bar],
851
+ [:a,:two,:baz],
852
+ [:b,:one,:bar],
853
+ [:b,:two,:bar],
854
+ [:b,:two,:baz],
855
+ [:b,:one,:foo],
856
+ [:c,:one,:bar],
857
+ [:c,:one,:baz],
858
+ [:c,:two,:foo],
859
+ [:c,:two,:bar]
860
+ ]
861
+ @multi_index = Daru::MultiIndex.from_tuples(@tuples)
862
+ @vector = Daru::Vector.new Array.new(12) { |i| i }, index: @multi_index,
863
+ type: :category, name: :mi_vector
864
+ @vector.add_category 69
865
+ end
866
+
867
+ it "assigns all lower layer indices when specified a first layer index" do
868
+ @vector[:b] = 69
869
+ expect(@vector).to eq(Daru::Vector.new([0,1,2,3,69,69,69,69,8,9,10,11],
870
+ index: @multi_index, name: :top_layer_assignment, type: :category
871
+ ))
872
+ end
873
+
874
+ it "assigns all lower indices when specified first and second layer index" do
875
+ @vector[:b, :one] = 69
876
+ expect(@vector).to eq(Daru::Vector.new([0,1,2,3,69,5,6,69,8,9,10,11],
877
+ index: @multi_index, name: :second_layer_assignment, type: :category))
878
+ end
879
+
880
+ it "assigns just the precise value when specified complete tuple" do
881
+ @vector[:b, :one, :foo] = 69
882
+ expect(@vector).to eq(Daru::Vector.new([0,1,2,3,4,5,6,69,8,9,10,11],
883
+ index: @multi_index, name: :precise_assignment, type: :category))
884
+ end
885
+
886
+ it "assigns correctly when numeric index" do
887
+ @vector[7] = 69
888
+ expect(@vector).to eq(Daru::Vector.new([0,1,2,3,4,5,6,69,8,9,10,11],
889
+ index: @multi_index, name: :precise_assignment, type: :category))
890
+ end
891
+
892
+ it "fails predictably on unknown index" do
893
+ expect { @vector[:d] = 69 }.to raise_error(IndexError)
894
+ expect { @vector[:b, :three] = 69 }.to raise_error(IndexError)
895
+ expect { @vector[:b, :two, :test] = 69 }.to raise_error(IndexError)
896
+ end
897
+ end
898
+
899
+ context Daru::CategoricalIndex do
900
+ context "non-numerical index" do
901
+ let (:idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
902
+ let (:dv) { Daru::Vector.new 'a'..'e', index: idx, type: :category }
903
+ before { dv.add_category 'x' }
904
+
905
+ context "single category" do
906
+ context "multiple instances" do
907
+ subject { dv }
908
+ before { dv[:a] = 'x' }
909
+
910
+ its(:size) { is_expected.to eq 5 }
911
+ its(:to_a) { is_expected.to eq ['x', 'b', 'x', 'x', 'e'] }
912
+ its(:index) { is_expected.to eq idx }
913
+ end
914
+
915
+ context "single instance" do
916
+ subject { dv }
917
+ before { dv[:b] = 'x' }
918
+
919
+ its(:size) { is_expected.to eq 5 }
920
+ its(:to_a) { is_expected.to eq ['a', 'x', 'c', 'd', 'e'] }
921
+ its(:index) { is_expected.to eq idx }
922
+ end
923
+ end
924
+
925
+ context "multiple categories" do
926
+ subject { dv }
927
+ before { dv[:a, :c] = 'x' }
928
+
929
+ its(:size) { is_expected.to eq 5 }
930
+ its(:to_a) { is_expected.to eq ['x', 'b', 'x', 'x', 'x'] }
931
+ its(:index) { is_expected.to eq idx }
932
+ end
933
+
934
+ context "multiple positional indexes" do
935
+ subject { dv }
936
+ before { dv[0, 1, 2] = 'x' }
937
+
938
+ its(:size) { is_expected.to eq 5 }
939
+ its(:to_a) { is_expected.to eq ['x', 'x', 'x', 'd', 'e'] }
940
+ its(:index) { is_expected.to eq idx }
941
+ end
942
+
943
+ context "single positional index" do
944
+ subject { dv }
945
+ before { dv[1] = 'x' }
946
+
947
+ its(:size) { is_expected.to eq 5 }
948
+ its(:to_a) { is_expected.to eq ['a', 'x', 'c', 'd', 'e'] }
949
+ its(:index) { is_expected.to eq idx }
950
+ end
951
+
952
+ context "invalid category" do
953
+ it { expect { dv[:x] = 'x' }.to raise_error IndexError }
954
+ end
955
+
956
+ context "invalid positional index" do
957
+ it { expect { dv[30] = 'x'}.to raise_error IndexError }
958
+ end
959
+ end
960
+
961
+ context "numerical index" do
962
+ let (:idx) { Daru::CategoricalIndex.new [1, 1, 2, 2, 3] }
963
+ let (:dv) { Daru::Vector.new 'a'..'e', index: idx, type: :category }
964
+ before { dv.add_category 'x' }
965
+
966
+ context "single category" do
967
+ subject { dv }
968
+ before { dv[1] = 'x' }
969
+
970
+ its(:size) { is_expected.to eq 5 }
971
+ its(:to_a) { is_expected.to eq ['x', 'x', 'c', 'd', 'e'] }
972
+ its(:index) { is_expected.to eq idx }
973
+ end
974
+
975
+ context "multiple categories" do
976
+ subject { dv }
977
+ before { dv[1, 2] = 'x' }
978
+
979
+ its(:size) { is_expected.to eq 5 }
980
+ its(:to_a) { is_expected.to eq ['x', 'x', 'x', 'x', 'e'] }
981
+ its(:index) { is_expected.to eq idx }
982
+ end
983
+ end
984
+ end
985
+ end
986
+
987
+ context "#at" do
988
+ context Daru::Index do
989
+ let (:idx) { Daru::Index.new [1, 0, :c] }
990
+ let (:dv) { Daru::Vector.new ['a', 'b', 'c'], index: idx, type: :category }
991
+
992
+ context "single position" do
993
+ it { expect(dv.at 1).to eq 'b' }
994
+ end
995
+
996
+ context "multiple positions" do
997
+ subject { dv.at 0, 2 }
998
+
999
+ it { is_expected.to be_a Daru::Vector }
1000
+ its(:type) { is_expected.to eq :category }
1001
+ its(:size) { is_expected.to eq 2 }
1002
+ its(:to_a) { is_expected.to eq ['a', 'c'] }
1003
+ its(:'index.to_a') { is_expected.to eq [1, :c] }
1004
+ end
1005
+
1006
+ context "invalid position" do
1007
+ it { expect { dv.at 3 }.to raise_error IndexError }
1008
+ end
1009
+
1010
+ context "invalid positions" do
1011
+ it { expect { dv.at 2, 3 }.to raise_error IndexError }
1012
+ end
1013
+
1014
+ context "range" do
1015
+ subject { dv.at 0..1 }
1016
+
1017
+ it { is_expected.to be_a Daru::Vector }
1018
+ its(:type) { is_expected.to eq :category }
1019
+ its(:size) { is_expected.to eq 2 }
1020
+ its(:to_a) { is_expected.to eq ['a', 'b'] }
1021
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
1022
+ end
1023
+
1024
+ context "range with negative end" do
1025
+ subject { dv.at 0..-2 }
1026
+
1027
+ it { is_expected.to be_a Daru::Vector }
1028
+ its(:type) { is_expected.to eq :category }
1029
+ its(:size) { is_expected.to eq 2 }
1030
+ its(:to_a) { is_expected.to eq ['a', 'b'] }
1031
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
1032
+ end
1033
+
1034
+ context "range with single element" do
1035
+ subject { dv.at 0..0 }
1036
+
1037
+ it { is_expected.to be_a Daru::Vector }
1038
+ its(:type) { is_expected.to eq :category }
1039
+ its(:size) { is_expected.to eq 1 }
1040
+ its(:to_a) { is_expected.to eq ['a'] }
1041
+ its(:'index.to_a') { is_expected.to eq [1] }
1042
+ end
1043
+
1044
+ context "preserves old categories" do
1045
+ let(:dv) do
1046
+ Daru::Vector.new [:a, :a, :b, :c, :b],
1047
+ type: :category,
1048
+ categories: [:c, :b, :a, :e]
1049
+ end
1050
+ subject { dv.at 0, 1, 4 }
1051
+
1052
+ it { is_expected.to be_a Daru::Vector }
1053
+ its(:categories) { is_expected.to eq [:c, :b, :a, :e] }
1054
+ its(:to_a) { is_expected.to eq [:a, :a, :b] }
1055
+ end
1056
+ end
1057
+
1058
+ context Daru::MultiIndex do
1059
+ let (:idx) do
1060
+ Daru::MultiIndex.from_tuples [
1061
+ [:a,:one,:bar],
1062
+ [:a,:one,:baz],
1063
+ [:b,:two,:bar],
1064
+ [:a,:two,:baz],
1065
+ ]
1066
+ end
1067
+ let (:dv) { Daru::Vector.new 1..4, index: idx, type: :category }
1068
+
1069
+ context "single position" do
1070
+ it { expect(dv.at 1).to eq 2 }
1071
+ end
1072
+
1073
+ context "multiple positions" do
1074
+ subject { dv.at 2, 3 }
1075
+
1076
+ it { is_expected.to be_a Daru::Vector }
1077
+ its(:type) { is_expected.to eq :category }
1078
+ its(:size) { is_expected.to eq 2 }
1079
+ its(:to_a) { is_expected.to eq [3, 4] }
1080
+ its(:'index.to_a') { is_expected.to eq [[:b, :two, :bar],
1081
+ [:a, :two, :baz]] }
1082
+ end
1083
+
1084
+ context "invalid position" do
1085
+ it { expect { dv.at 4 }.to raise_error IndexError }
1086
+ end
1087
+
1088
+ context "invalid positions" do
1089
+ it { expect { dv.at 2, 4 }.to raise_error IndexError }
1090
+ end
1091
+
1092
+ context "range" do
1093
+ subject { dv.at 2..3 }
1094
+
1095
+ it { is_expected.to be_a Daru::Vector }
1096
+ its(:type) { is_expected.to eq :category }
1097
+ its(:size) { is_expected.to eq 2 }
1098
+ its(:to_a) { is_expected.to eq [3, 4] }
1099
+ its(:'index.to_a') { is_expected.to eq [[:b, :two, :bar],
1100
+ [:a, :two, :baz]] }
1101
+ end
1102
+
1103
+ context "range with negative end" do
1104
+ subject { dv.at 2..-1 }
1105
+
1106
+ it { is_expected.to be_a Daru::Vector }
1107
+ its(:type) { is_expected.to eq :category }
1108
+ its(:size) { is_expected.to eq 2 }
1109
+ its(:to_a) { is_expected.to eq [3, 4] }
1110
+ its(:'index.to_a') { is_expected.to eq [[:b, :two, :bar],
1111
+ [:a, :two, :baz]] }
1112
+ end
1113
+
1114
+ context "range with single element" do
1115
+ subject { dv.at 2..2 }
1116
+
1117
+ it { is_expected.to be_a Daru::Vector }
1118
+ its(:type) { is_expected.to eq :category }
1119
+ its(:size) { is_expected.to eq 1 }
1120
+ its(:to_a) { is_expected.to eq [3] }
1121
+ its(:'index.to_a') { is_expected.to eq [[:b, :two, :bar]] }
1122
+ end
1123
+ end
1124
+
1125
+ context Daru::CategoricalIndex do
1126
+ let (:idx) { Daru::CategoricalIndex.new [:a, 1, 1, :a, :c] }
1127
+ let (:dv) { Daru::Vector.new 'a'..'e', index: idx, type: :category }
1128
+
1129
+ context "multiple positional indexes" do
1130
+ subject { dv.at 0, 1, 2 }
1131
+
1132
+ it { is_expected.to be_a Daru::Vector }
1133
+ its(:type) { is_expected.to eq :category }
1134
+ its(:size) { is_expected.to eq 3 }
1135
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1136
+ its(:index) { is_expected.to eq(
1137
+ Daru::CategoricalIndex.new([:a, 1, 1])) }
1138
+ end
1139
+
1140
+ context "single positional index" do
1141
+ subject { dv.at 1 }
1142
+
1143
+ it { is_expected.to eq 'b' }
1144
+ end
1145
+
1146
+ context "invalid position" do
1147
+ it { expect { dv.at 5 }.to raise_error IndexError }
1148
+ end
1149
+
1150
+ context "invalid positions" do
1151
+ it { expect { dv.at 2, 5 }.to raise_error IndexError }
1152
+ end
1153
+
1154
+ context "range" do
1155
+ subject { dv.at 0..2 }
1156
+
1157
+ it { is_expected.to be_a Daru::Vector }
1158
+ its(:type) { is_expected.to eq :category }
1159
+ its(:size) { is_expected.to eq 3 }
1160
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1161
+ its(:index) { is_expected.to eq(
1162
+ Daru::CategoricalIndex.new([:a, 1, 1])) }
1163
+ end
1164
+
1165
+ context "range with negative end" do
1166
+ subject { dv.at 0..-3 }
1167
+
1168
+ it { is_expected.to be_a Daru::Vector }
1169
+ its(:type) { is_expected.to eq :category }
1170
+ its(:size) { is_expected.to eq 3 }
1171
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1172
+ its(:index) { is_expected.to eq(
1173
+ Daru::CategoricalIndex.new([:a, 1, 1])) }
1174
+ end
1175
+
1176
+ context "range with single element" do
1177
+ subject { dv.at 0..0 }
1178
+
1179
+ it { is_expected.to be_a Daru::Vector }
1180
+ its(:type) { is_expected.to eq :category }
1181
+ its(:size) { is_expected.to eq 1 }
1182
+ its(:to_a) { is_expected.to eq ['a'] }
1183
+ its(:index) { is_expected.to eq(
1184
+ Daru::CategoricalIndex.new([:a])) }
1185
+ end
1186
+ end
1187
+ end
1188
+
1189
+ context "#set_at" do
1190
+ context Daru::Index do
1191
+ let (:idx) { Daru::Index.new [1, 0, :c] }
1192
+ let (:dv) { Daru::Vector.new ['a', 'b', 'c'], index: idx, type: :category }
1193
+ before { dv.add_category 'x' }
1194
+
1195
+ context "single position" do
1196
+ subject { dv }
1197
+ before { dv.set_at [1], 'x' }
1198
+
1199
+ its(:to_a) { is_expected.to eq ['a', 'x', 'c'] }
1200
+ end
1201
+
1202
+ context "multiple positions" do
1203
+ subject { dv }
1204
+ before { dv.set_at [0, 2], 'x' }
1205
+
1206
+ its(:to_a) { is_expected.to eq ['x', 'b', 'x'] }
1207
+ end
1208
+
1209
+ context "invalid position" do
1210
+ it { expect { dv.set_at [3], 'x' }.to raise_error IndexError }
1211
+ end
1212
+
1213
+ context "invalid positions" do
1214
+ it { expect { dv.set_at [2, 3], 'x' }.to raise_error IndexError }
1215
+ end
1216
+ end
1217
+
1218
+ context Daru::MultiIndex do
1219
+ let(:idx) do
1220
+ Daru::MultiIndex.from_tuples [
1221
+ [:a,:one,:bar],
1222
+ [:a,:one,:baz],
1223
+ [:b,:two,:bar],
1224
+ [:a,:two,:baz],
1225
+ ]
1226
+ end
1227
+ let(:dv) { Daru::Vector.new 1..4, index: idx, type: :category }
1228
+ before { dv.add_category 'x' }
1229
+
1230
+ context "single position" do
1231
+ subject { dv }
1232
+ before { dv.set_at [1], 'x' }
1233
+
1234
+ its(:to_a) { is_expected.to eq [1, 'x', 3, 4] }
1235
+ end
1236
+
1237
+ context "multiple positions" do
1238
+ subject { dv }
1239
+ before { dv.set_at [2, 3], 'x' }
1240
+
1241
+ its(:to_a) { is_expected.to eq [1, 2, 'x', 'x'] }
1242
+ end
1243
+
1244
+ context "invalid position" do
1245
+ it { expect { dv.set_at [4], 'x' }.to raise_error IndexError }
1246
+ end
1247
+
1248
+ context "invalid positions" do
1249
+ it { expect { dv.set_at [2, 4], 'x' }.to raise_error IndexError }
1250
+ end
1251
+ end
1252
+
1253
+ context Daru::CategoricalIndex do
1254
+ let (:idx) { Daru::CategoricalIndex.new [:a, 1, 1, :a, :c] }
1255
+ let (:dv) { Daru::Vector.new 'a'..'e', index: idx, type: :category }
1256
+ before { dv.add_category 'x' }
1257
+
1258
+ context "multiple positional indexes" do
1259
+ subject { dv }
1260
+ before { dv.set_at [0, 1, 2], 'x' }
1261
+
1262
+ its(:to_a) { is_expected.to eq ['x', 'x', 'x', 'd', 'e'] }
1263
+ end
1264
+
1265
+ context "single positional index" do
1266
+ subject { dv }
1267
+ before { dv.set_at [1], 'x' }
1268
+
1269
+ its(:to_a) { is_expected.to eq ['a', 'x', 'c', 'd', 'e'] }
1270
+ end
1271
+
1272
+ context "invalid position" do
1273
+ it { expect { dv.set_at [5], 'x' }.to raise_error IndexError }
1274
+ end
1275
+
1276
+ context "invalid positions" do
1277
+ it { expect { dv.set_at [2, 5], 'x' }.to raise_error IndexError }
1278
+ end
1279
+ end
1280
+ end
1281
+
1282
+ context "#contrast_code" do
1283
+ context "dummy coding" do
1284
+ context "default base category" do
1285
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, name: :abc }
1286
+ subject { dv.contrast_code }
1287
+
1288
+ it { is_expected.to be_a Daru::DataFrame }
1289
+ its(:shape) { is_expected.to eq [5, 2] }
1290
+ its(:'abc_1.to_a') { is_expected.to eq [0, 1, 0, 1, 0] }
1291
+ its(:'abc_c.to_a') { is_expected.to eq [0, 0, 0, 0, 1] }
1292
+ end
1293
+
1294
+ context "manual base category" do
1295
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, name: :abc }
1296
+ before { dv.base_category = :c }
1297
+ subject { dv.contrast_code }
1298
+
1299
+ it { is_expected.to be_a Daru::DataFrame }
1300
+ its(:shape) { is_expected.to eq [5, 2] }
1301
+ its(:'abc_a.to_a') { is_expected.to eq [1, 0, 1, 0, 0] }
1302
+ its(:'abc_1.to_a') { is_expected.to eq [0, 1, 0, 1, 0] }
1303
+ end
1304
+ end
1305
+
1306
+ context "simple coding" do
1307
+ context "default base category" do
1308
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, name: :abc }
1309
+ subject { dv.contrast_code }
1310
+ before { dv.coding_scheme = :simple }
1311
+
1312
+ it { is_expected.to be_a Daru::DataFrame }
1313
+ its(:shape) { is_expected.to eq [5, 2] }
1314
+ its(:'abc_1.to_a') { is_expected.to eq [-1/3.0, 2/3.0, -1/3.0, 2/3.0, -1/3.0] }
1315
+ its(:'abc_c.to_a') { is_expected.to eq [-1/3.0, -1/3.0, -1/3.0, -1/3.0, 2/3.0] }
1316
+ end
1317
+
1318
+ context "manual base category" do
1319
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, name: :abc }
1320
+ subject { dv.contrast_code }
1321
+ before do
1322
+ dv.coding_scheme = :simple
1323
+ dv.base_category = :c
1324
+ end
1325
+
1326
+ it { is_expected.to be_a Daru::DataFrame }
1327
+ its(:shape) { is_expected.to eq [5, 2] }
1328
+ its(:'abc_a.to_a') { is_expected.to eq [2/3.0, -1/3.0, 2/3.0, -1/3.0, -1/3.0] }
1329
+ its(:'abc_1.to_a') { is_expected.to eq [-1/3.0, 2/3.0, -1/3.0, 2/3.0, -1/3.0] }
1330
+ end
1331
+ end
1332
+
1333
+ context "helmert coding" do
1334
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, name: :abc }
1335
+ subject { dv.contrast_code }
1336
+ before { dv.coding_scheme = :helmert }
1337
+
1338
+ it { is_expected.to be_a Daru::DataFrame }
1339
+ its(:shape) { is_expected.to eq [5, 2] }
1340
+ its(:'abc_a.to_a') { is_expected.to eq [2/3.0, -1/3.0, 2/3.0, -1/3.0, -1/3.0] }
1341
+ its(:'abc_1.to_a') { is_expected.to eq [0, 1/2.0, 0, 1/2.0, -1/2.0] }
1342
+ end
1343
+
1344
+ context "deviation coding" do
1345
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, name: :abc }
1346
+ subject { dv.contrast_code }
1347
+ before { dv.coding_scheme = :deviation }
1348
+
1349
+ it { is_expected.to be_a Daru::DataFrame }
1350
+ its(:shape) { is_expected.to eq [5, 2] }
1351
+ its(:'abc_a.to_a') { is_expected.to eq [1, 0, 1, 0, -1] }
1352
+ its(:'abc_1.to_a') { is_expected.to eq [0, 1, 0, 1, -1] }
1353
+ end
1354
+
1355
+ context "user-defined coding" do
1356
+ let(:df) do
1357
+ Daru::DataFrame.new({
1358
+ rank_level1: [1, -2, -3],
1359
+ rank_level2: [-4, 2, -1],
1360
+ rank_level3: [-3, -1, 5]
1361
+ }, index: ['I', 'II', 'III'])
1362
+ end
1363
+ let(:dv) { Daru::Vector.new ['III', 'II', 'I', 'II', 'II'],
1364
+ name: :rank, type: :category }
1365
+ subject { dv.contrast_code user_defined: df }
1366
+
1367
+ it { is_expected.to be_a Daru::DataFrame }
1368
+ its(:shape) { is_expected.to eq [5, 3] }
1369
+ its(:'rank_level1.to_a') { is_expected.to eq [-3, -2, 1, -2, -2] }
1370
+ its(:'rank_level2.to_a') { is_expected.to eq [-1, 2, -4, 2, 2] }
1371
+ its(:'rank_level3.to_a') { is_expected.to eq [5, -1, -3, -1, -1] }
1372
+ end
1373
+
1374
+ context 'naming' do
1375
+ context "string" do
1376
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, name: 'abc' }
1377
+ subject { dv.contrast_code }
1378
+
1379
+ it { is_expected.to be_a Daru::DataFrame }
1380
+ its(:'vectors.to_a') { is_expected.to eq ['abc_1', 'abc_c'] }
1381
+ end
1382
+
1383
+ context "symbol" do
1384
+ let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category, name: :abc }
1385
+ subject { dv.contrast_code }
1386
+
1387
+ it { is_expected.to be_a Daru::DataFrame }
1388
+ its(:'vectors.to_a') { is_expected.to eq [:abc_1, :abc_c] }
1389
+ end
1390
+ end
1391
+ end
1392
+
1393
+ context '#reject_values'do
1394
+ let(:dv) { Daru::Vector.new [1, nil, 3, :a, Float::NAN, nil, Float::NAN, 1],
1395
+ index: 11..18, type: :category }
1396
+ context 'reject only nils' do
1397
+ subject { dv.reject_values nil }
1398
+
1399
+ it { is_expected.to be_a Daru::Vector }
1400
+ its(:type) { is_expected.to eq :category }
1401
+ its(:to_a) { is_expected.to eq [1, 3, :a, Float::NAN, Float::NAN, 1] }
1402
+ its(:'index.to_a') { is_expected.to eq [11, 13, 14, 15, 17, 18] }
1403
+ end
1404
+
1405
+ context 'reject only float::NAN' do
1406
+ subject { dv.reject_values Float::NAN }
1407
+
1408
+ it { is_expected.to be_a Daru::Vector }
1409
+ its(:type) { is_expected.to eq :category }
1410
+ its(:to_a) { is_expected.to eq [1, nil, 3, :a, nil, 1] }
1411
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 16, 18] }
1412
+ end
1413
+
1414
+ context 'reject both nil and float::NAN' do
1415
+ subject { dv.reject_values nil, Float::NAN }
1416
+
1417
+ it { is_expected.to be_a Daru::Vector }
1418
+ its(:type) { is_expected.to eq :category }
1419
+ its(:to_a) { is_expected.to eq [1, 3, :a, 1] }
1420
+ its(:'index.to_a') { is_expected.to eq [11, 13, 14, 18] }
1421
+ end
1422
+
1423
+ context 'reject any other value' do
1424
+ subject { dv.reject_values 1, 3, 20 }
1425
+
1426
+ it { is_expected.to be_a Daru::Vector }
1427
+ its(:type) { is_expected.to eq :category }
1428
+ its(:to_a) { is_expected.to eq [nil, :a, Float::NAN, nil, Float::NAN] }
1429
+ its(:'index.to_a') { is_expected.to eq [12, 14, 15, 16, 17] }
1430
+ end
1431
+
1432
+ context 'when resultant vector has only one value' do
1433
+ subject { dv.reject_values 1, :a, nil, Float::NAN }
1434
+
1435
+ it { is_expected.to be_a Daru::Vector }
1436
+ its(:to_a) { is_expected.to eq [3] }
1437
+ its(:'index.to_a') { is_expected.to eq [13] }
1438
+ end
1439
+
1440
+ context 'when resultant vector has no value' do
1441
+ subject { dv.reject_values 1, 3, :a, nil, Float::NAN, 5 }
1442
+
1443
+ it { is_expected.to be_a Daru::Vector }
1444
+ its(:to_a) { is_expected.to eq [] }
1445
+ its(:'index.to_a') { is_expected.to eq [] }
1446
+ end
1447
+ end
1448
+
1449
+ context '#include_values?' do
1450
+ context 'only nils' do
1451
+ context 'true' do
1452
+ let(:dv) { Daru::Vector.new [1, 2, 3, :a, 'Unknown', nil],
1453
+ type: :category }
1454
+ it { expect(dv.include_values? nil).to eq true }
1455
+ end
1456
+
1457
+ context 'false' do
1458
+ let(:dv) { Daru::Vector.new [1, 2, 3, :a, 'Unknown'],
1459
+ type: :category }
1460
+ it { expect(dv.include_values? nil).to eq false }
1461
+ end
1462
+ end
1463
+
1464
+ context 'only Float::NAN' do
1465
+ context 'true' do
1466
+ let(:dv) { Daru::Vector.new [1, nil, 2, 3, Float::NAN],
1467
+ type: :category}
1468
+ it { expect(dv.include_values? Float::NAN).to eq true }
1469
+ end
1470
+
1471
+ context 'false' do
1472
+ let(:dv) { Daru::Vector.new [1, nil, 2, 3],
1473
+ type: :category }
1474
+ it { expect(dv.include_values? Float::NAN).to eq false }
1475
+ end
1476
+ end
1477
+
1478
+ context 'both nil and Float::NAN' do
1479
+ context 'true with only nil' do
1480
+ let(:dv) { Daru::Vector.new [1, Float::NAN, 2, 3],
1481
+ type: :category}
1482
+ it { expect(dv.include_values? nil, Float::NAN).to eq true }
1483
+ end
1484
+
1485
+ context 'true with only Float::NAN' do
1486
+ let(:dv) { Daru::Vector.new [1, nil, 2, 3],
1487
+ type: :category}
1488
+ it { expect(dv.include_values? nil, Float::NAN).to eq true }
1489
+ end
1490
+
1491
+ context 'false' do
1492
+ let(:dv) { Daru::Vector.new [1, 2, 3],
1493
+ type: :category}
1494
+ it { expect(dv.include_values? nil, Float::NAN).to eq false }
1495
+ end
1496
+ end
1497
+
1498
+ context 'any other value' do
1499
+ context 'true' do
1500
+ let(:dv) { Daru::Vector.new [1, 2, 3, 4, nil],
1501
+ type: :category }
1502
+ it { expect(dv.include_values? 1, 2, 3, 5).to eq true }
1503
+ end
1504
+
1505
+ context 'false' do
1506
+ let(:dv) { Daru::Vector.new [1, 2, 3, 4, nil],
1507
+ type: :category }
1508
+ it { expect(dv.include_values? 5, 6).to eq false }
1509
+ end
1510
+ end
1511
+ end
1512
+
1513
+ context '#count_values' do
1514
+ let(:dv) { Daru::Vector.new [1, 2, 3, 1, 2, nil, nil], type: :category }
1515
+ it { expect(dv.count_values 1, 2).to eq 4 }
1516
+ it { expect(dv.count_values nil).to eq 2 }
1517
+ it { expect(dv.count_values 3, Float::NAN).to eq 1 }
1518
+ it { expect(dv.count_values 4).to eq 0 }
1519
+ end
1520
+
1521
+ context '#indexes' do
1522
+ context Daru::Index do
1523
+ let(:dv) { Daru::Vector.new [1, 2, 1, 2, 3, nil, nil, Float::NAN],
1524
+ index: 11..18, type: :category }
1525
+
1526
+ subject { dv.indexes 1, 2, nil, Float::NAN }
1527
+ it { is_expected.to be_a Array }
1528
+ it { is_expected.to eq [11, 12, 13, 14, 16, 17, 18] }
1529
+ end
1530
+
1531
+ context Daru::MultiIndex do
1532
+ let(:mi) do
1533
+ Daru::MultiIndex.from_tuples([
1534
+ ['M', 2000],
1535
+ ['M', 2001],
1536
+ ['M', 2002],
1537
+ ['M', 2003],
1538
+ ['F', 2000],
1539
+ ['F', 2001],
1540
+ ['F', 2002],
1541
+ ['F', 2003]
1542
+ ])
1543
+ end
1544
+ let(:dv) { Daru::Vector.new [1, 2, 1, 2, 3, nil, nil, Float::NAN],
1545
+ index: mi, type: :category }
1546
+
1547
+ subject { dv.indexes 1, 2, Float::NAN }
1548
+ it { is_expected.to be_a Array }
1549
+ it { is_expected.to eq(
1550
+ [
1551
+ ['M', 2000],
1552
+ ['M', 2001],
1553
+ ['M', 2002],
1554
+ ['M', 2003],
1555
+ ['F', 2003]
1556
+ ]) }
1557
+ end
1558
+ end
1559
+
1560
+ context '#replace_values' do
1561
+ subject do
1562
+ Daru::Vector.new(
1563
+ [1, 2, 1, 4, nil, Float::NAN, nil, Float::NAN],
1564
+ index: 11..18, type: :category
1565
+ )
1566
+ end
1567
+
1568
+ context 'replace nils and NaNs' do
1569
+ before { subject.replace_values [nil, Float::NAN], 10 }
1570
+ its(:type) { is_expected.to eq :category }
1571
+ its(:to_a) { is_expected.to eq [1, 2, 1, 4, 10, 10, 10, 10] }
1572
+ end
1573
+
1574
+ context 'replace arbitrary values' do
1575
+ before { subject.replace_values [1, 2], 10 }
1576
+ its(:type) { is_expected.to eq :category }
1577
+ its(:to_a) { is_expected.to eq(
1578
+ [10, 10, 10, 4, nil, Float::NAN, nil, Float::NAN]) }
1579
+ end
1580
+
1581
+ context 'works for single value' do
1582
+ before { subject.replace_values nil, 10 }
1583
+ its(:type) { is_expected.to eq :category }
1584
+ its(:to_a) { is_expected.to eq(
1585
+ [1, 2, 1, 4, 10, Float::NAN, 10, Float::NAN]) }
1586
+ end
1587
+ end
1588
+ end
1589
+
1590
+ describe Daru::DataFrame, "categorical" do
1591
+ context "#to_category" do
1592
+ let(:df) do
1593
+ Daru::DataFrame.new({
1594
+ a: [1, 2, 3, 4, 5],
1595
+ b: ['first', 'second', 'first', 'second', 'third'],
1596
+ c: ['a', 'b', 'a', 'b', 'c']
1597
+ })
1598
+ end
1599
+ before { df.to_category :b, :c }
1600
+ subject { df }
1601
+
1602
+ it { is_expected.to be_a Daru::DataFrame }
1603
+ its(:'b.type') { is_expected.to eq :category }
1604
+ its(:'c.type') { is_expected.to eq :category }
1605
+ end
1606
+
1607
+ context "#interact_code" do
1608
+ context "two vectors" do
1609
+ let(:df) do
1610
+ Daru::DataFrame.new({
1611
+ a: [1, 2, 3, 4, 5],
1612
+ b: ['first', 'second', 'first', 'second', 'third'],
1613
+ c: ['a', 'b', 'a', 'b', 'c']
1614
+ })
1615
+ end
1616
+ before do
1617
+ df.to_category :b, :c
1618
+ df[:b].categories = ['first', 'second', 'third']
1619
+ df[:c].categories = ['a', 'b', 'c']
1620
+ end
1621
+
1622
+ context "both full" do
1623
+ subject { df.interact_code [:b, :c], [true, true] }
1624
+
1625
+ it { is_expected.to be_a Daru::DataFrame }
1626
+ its(:shape) { is_expected.to eq [5, 9] }
1627
+ it { expect(subject['b_first:c_a'].to_a).to eq [1, 0, 1, 0, 0] }
1628
+ it { expect(subject['b_first:c_b'].to_a).to eq [0, 0, 0, 0, 0] }
1629
+ it { expect(subject['b_first:c_c'].to_a).to eq [0, 0, 0, 0, 0] }
1630
+ it { expect(subject['b_second:c_a'].to_a).to eq [0, 0, 0, 0, 0] }
1631
+ it { expect(subject['b_second:c_b'].to_a).to eq [0, 1, 0, 1, 0] }
1632
+ it { expect(subject['b_second:c_c'].to_a).to eq [0, 0, 0, 0, 0] }
1633
+ it { expect(subject['b_third:c_a'].to_a).to eq [0, 0, 0, 0, 0] }
1634
+ it { expect(subject['b_third:c_b'].to_a).to eq [0, 0, 0, 0, 0] }
1635
+ it { expect(subject['b_third:c_c'].to_a).to eq [0, 0, 0, 0, 1] }
1636
+ end
1637
+
1638
+ context "one full" do
1639
+ subject { df.interact_code [:b, :c], [true, false] }
1640
+
1641
+ it { is_expected.to be_a Daru::DataFrame }
1642
+ its(:shape) { is_expected.to eq [5, 6] }
1643
+ it { expect(subject['b_first:c_b'].to_a).to eq [0, 0, 0, 0, 0] }
1644
+ it { expect(subject['b_first:c_c'].to_a).to eq [0, 0, 0, 0, 0] }
1645
+ it { expect(subject['b_second:c_b'].to_a).to eq [0, 1, 0, 1, 0] }
1646
+ it { expect(subject['b_second:c_c'].to_a).to eq [0, 0, 0, 0, 0] }
1647
+ it { expect(subject['b_third:c_b'].to_a).to eq [0, 0, 0, 0, 0] }
1648
+ it { expect(subject['b_third:c_c'].to_a).to eq [0, 0, 0, 0, 1] }
1649
+ end
1650
+
1651
+ context "none full" do
1652
+ subject { df.interact_code [:b, :c], [false, false] }
1653
+
1654
+ it { is_expected.to be_a Daru::DataFrame }
1655
+ its(:shape) { is_expected.to eq [5, 4] }
1656
+ it { expect(subject['b_second:c_b'].to_a).to eq [0, 1, 0, 1, 0] }
1657
+ it { expect(subject['b_second:c_c'].to_a).to eq [0, 0, 0, 0, 0] }
1658
+ it { expect(subject['b_third:c_b'].to_a).to eq [0, 0, 0, 0, 0] }
1659
+ it { expect(subject['b_third:c_c'].to_a).to eq [0, 0, 0, 0, 1] }
1660
+ end
1661
+ end
1662
+
1663
+ context "more than two vectors" do
1664
+ let(:df) do
1665
+ Daru::DataFrame.new({
1666
+ a: [1, 1, 2],
1667
+ b: [2, 2, 3],
1668
+ c: [3, 3, 4]
1669
+ })
1670
+ end
1671
+ before { df.to_category :a, :b, :c }
1672
+ subject { df.interact_code [:a, :b, :c], [false, false, true] }
1673
+
1674
+ it { is_expected.to be_a Daru::DataFrame }
1675
+ its(:shape) { is_expected.to eq [3, 2] }
1676
+ it { expect(subject['a_2:b_3:c_3'].to_a).to eq [0, 0, 0] }
1677
+ it { expect(subject['a_2:b_3:c_4'].to_a).to eq [0, 0, 1] }
1678
+ end
1679
+ end
1680
+
1681
+ context "#sort!" do
1682
+ let(:df) do
1683
+ Daru::DataFrame.new({
1684
+ a: [1, 2, 1, 4, 5],
1685
+ b: ['II', 'I', 'III', 'II', 'I'],
1686
+ })
1687
+ end
1688
+ before do
1689
+ df[:b] = df[:b].to_category ordereed: true, categories: ['I', 'II', 'III']
1690
+ df.sort! [:a, :b]
1691
+ end
1692
+ subject { df }
1693
+
1694
+ its(:shape) { is_expected.to eq [5, 2] }
1695
+ its(:'a.to_a') { is_expected.to eq [1, 1, 2, 4, 5] }
1696
+ its(:'b.to_a') { is_expected.to eq ['II', 'III', 'I', 'II', 'I'] }
1697
+ end
1698
+
1699
+ context "#split_by_category" do
1700
+ let(:df) do
1701
+ Daru::DataFrame.new({
1702
+ a: [1, 2, 3, 4, 5, 6, 7],
1703
+ b: [3, 2, 2, 35, 3, 2, 5],
1704
+ cat: [:I, :II, :I, :III, :I, :III, :II]
1705
+ })
1706
+ end
1707
+ let(:df1) do
1708
+ Daru::DataFrame.new({
1709
+ a: [1, 3, 5],
1710
+ b: [3, 2, 3]
1711
+ }, name: :I, index: [0, 2, 4])
1712
+ end
1713
+ let(:df2) do
1714
+ Daru::DataFrame.new({
1715
+ a: [2, 7],
1716
+ b: [2, 5]
1717
+ }, name: :II, index: [1, 6])
1718
+ end
1719
+ let(:df3) do
1720
+ Daru::DataFrame.new({
1721
+ a: [4, 6],
1722
+ b: [35, 2]
1723
+ }, name: :III, index: [3, 5])
1724
+ end
1725
+ before { df.to_category :cat }
1726
+ subject { df.split_by_category :cat }
1727
+
1728
+ it { is_expected.to be_a Array }
1729
+ its(:size) { is_expected.to eq 3 }
1730
+ its(:first) { is_expected.to eq df1 }
1731
+ it { expect(subject[1]).to eq df2 }
1732
+ its(:last) { is_expected.to eq df3 }
1733
+ end
1734
+ end