daru_lite 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +14 -1
  20. data/lib/daru_lite/index/multi_index.rb +9 -0
  21. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  22. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  23. data/lib/daru_lite/vector/calculatable.rb +78 -0
  24. data/lib/daru_lite/vector/convertible.rb +77 -0
  25. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  26. data/lib/daru_lite/vector/fetchable.rb +175 -0
  27. data/lib/daru_lite/vector/filterable.rb +128 -0
  28. data/lib/daru_lite/vector/indexable.rb +77 -0
  29. data/lib/daru_lite/vector/iterable.rb +95 -0
  30. data/lib/daru_lite/vector/joinable.rb +17 -0
  31. data/lib/daru_lite/vector/missable.rb +124 -0
  32. data/lib/daru_lite/vector/queryable.rb +45 -0
  33. data/lib/daru_lite/vector/setable.rb +47 -0
  34. data/lib/daru_lite/vector/sortable.rb +113 -0
  35. data/lib/daru_lite/vector.rb +36 -932
  36. data/lib/daru_lite/version.rb +1 -1
  37. data/spec/data_frame/aggregatable_example.rb +65 -0
  38. data/spec/data_frame/buildable_example.rb +109 -0
  39. data/spec/data_frame/calculatable_example.rb +135 -0
  40. data/spec/data_frame/convertible_example.rb +180 -0
  41. data/spec/data_frame/duplicatable_example.rb +111 -0
  42. data/spec/data_frame/fetchable_example.rb +476 -0
  43. data/spec/data_frame/filterable_example.rb +409 -0
  44. data/spec/data_frame/indexable_example.rb +221 -0
  45. data/spec/data_frame/iterable_example.rb +465 -0
  46. data/spec/data_frame/joinable_example.rb +106 -0
  47. data/spec/data_frame/missable_example.rb +47 -0
  48. data/spec/data_frame/pivotable_example.rb +297 -0
  49. data/spec/data_frame/queryable_example.rb +92 -0
  50. data/spec/data_frame/setable_example.rb +482 -0
  51. data/spec/data_frame/sortable_example.rb +350 -0
  52. data/spec/dataframe_spec.rb +181 -3289
  53. data/spec/index/categorical_index_spec.rb +27 -8
  54. data/spec/index/index_spec.rb +21 -0
  55. data/spec/index/multi_index_spec.rb +85 -76
  56. data/spec/vector/aggregatable_example.rb +27 -0
  57. data/spec/vector/calculatable_example.rb +82 -0
  58. data/spec/vector/convertible_example.rb +126 -0
  59. data/spec/vector/duplicatable_example.rb +48 -0
  60. data/spec/vector/fetchable_example.rb +463 -0
  61. data/spec/vector/filterable_example.rb +165 -0
  62. data/spec/vector/indexable_example.rb +201 -0
  63. data/spec/vector/iterable_example.rb +111 -0
  64. data/spec/vector/joinable_example.rb +25 -0
  65. data/spec/vector/missable_example.rb +88 -0
  66. data/spec/vector/queryable_example.rb +91 -0
  67. data/spec/vector/setable_example.rb +300 -0
  68. data/spec/vector/sortable_example.rb +242 -0
  69. data/spec/vector_spec.rb +111 -1805
  70. metadata +86 -2
@@ -0,0 +1,465 @@
1
+ shared_examples_for 'an iterable DataFrame' do
2
+ describe "#each_index" do
3
+ it "iterates over index" do
4
+ idxs = []
5
+ ret = df.each_index do |index|
6
+ idxs << index
7
+ end
8
+
9
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
10
+
11
+ expect(ret).to eq(df)
12
+ end
13
+ end
14
+
15
+ describe "#each_vector_with_index" do
16
+ it "iterates over vectors with index" do
17
+ idxs = []
18
+ ret = df.each_vector_with_index do |vector, index|
19
+ idxs << index
20
+ expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
21
+ expect(vector.class).to eq(DaruLite::Vector)
22
+ end
23
+
24
+ expect(idxs).to eq([:a, :b, :c])
25
+
26
+ expect(ret).to eq(df)
27
+ end
28
+ end
29
+
30
+ describe "#each_row_with_index" do
31
+ it "iterates over rows with indexes" do
32
+ idxs = []
33
+ ret = df.each_row_with_index do |row, idx|
34
+ idxs << idx
35
+ expect(row.index).to eq([:a, :b, :c].to_index)
36
+ expect(row.class).to eq(DaruLite::Vector)
37
+ end
38
+
39
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
40
+ expect(ret) .to eq(df)
41
+ end
42
+ end
43
+
44
+ describe "#each" do
45
+ it "iterates over rows" do
46
+ ret = df.each(:row) do |row|
47
+ expect(row.index).to eq([:a, :b, :c].to_index)
48
+ expect(row.class).to eq(DaruLite::Vector)
49
+ end
50
+
51
+ expect(ret).to eq(df)
52
+ end
53
+
54
+ it "iterates over all vectors" do
55
+ ret = df.each do |vector|
56
+ expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
57
+ expect(vector.class).to eq(DaruLite::Vector)
58
+ end
59
+
60
+ expect(ret).to eq(df)
61
+ end
62
+
63
+ it "returns Enumerable if no block specified" do
64
+ ret = df.each
65
+ expect(ret.is_a?(Enumerator)).to eq(true)
66
+ end
67
+
68
+ it "raises on unknown axis" do
69
+ expect { df.each(:kitten) }.to raise_error(ArgumentError, /axis/)
70
+ end
71
+ end
72
+
73
+ describe "#collect" do
74
+ before do
75
+ @df = DaruLite::DataFrame.new({
76
+ a: [1,2,3,4,5],
77
+ b: [11,22,33,44,55],
78
+ c: [1,2,3,4,5]
79
+ })
80
+ end
81
+
82
+ it "collects calculation over rows and returns a Vector from the results" do
83
+ expect(@df.collect(:row) { |row| (row[:a] + row[:c]) * row[:c] }).to eq(
84
+ DaruLite::Vector.new([2,8,18,32,50])
85
+ )
86
+ end
87
+
88
+ it "collects calculation over vectors and returns a Vector from the results" do
89
+ expect(@df.collect { |v| v[0] * v[1] + v[4] }).to eq(
90
+ DaruLite::Vector.new([7,297,7], index: [:a, :b, :c])
91
+ )
92
+ end
93
+ end
94
+
95
+ describe "#map" do
96
+ it "iterates over rows and returns an Array" do
97
+ ret = df.map(:row) do |row|
98
+ expect(row.class).to eq(DaruLite::Vector)
99
+ row[:a] * row[:c]
100
+ end
101
+
102
+ expect(ret).to eq([11, 44, 99, 176, 275])
103
+ expect(df.vectors.to_a).to eq([:a, :b, :c])
104
+ end
105
+
106
+ it "iterates over vectors and returns an Array" do
107
+ ret = df.map do |vector|
108
+ vector.mean
109
+ end
110
+ expect(ret).to eq([3.0, 13.0, 33.0])
111
+ end
112
+ end
113
+
114
+ describe "#map!" do
115
+ let(:ans_vector) do
116
+ DaruLite::DataFrame.new(
117
+ {
118
+ b: [21,22,23,24,25],
119
+ a: [11,12,13,14,15],
120
+ c: [21,32,43,54,65]
121
+ },
122
+ order: [:a, :b, :c],
123
+ index: [:one, :two, :three, :four, :five]
124
+ )
125
+ end
126
+ let(:ans_row) do
127
+ DaruLite::DataFrame.new(
128
+ {
129
+ b: [12,13,14,15,16],
130
+ a: [2,3,4,5,6],
131
+ c: [12,23,34,45,56]
132
+ },
133
+ order: [:a, :b, :c],
134
+ index: [:one, :two, :three, :four, :five]
135
+ )
136
+ end
137
+
138
+ it "destructively maps over the vectors and changes the DF" do
139
+ df.map! do |vector|
140
+ vector + 10
141
+ end
142
+ expect(df).to eq(ans_vector)
143
+ end
144
+
145
+ it "destructively maps over the rows and changes the DF" do
146
+ df.map!(:row) do |row|
147
+ row + 1
148
+ end
149
+
150
+ expect(df).to eq(ans_row)
151
+ end
152
+ end
153
+
154
+ describe "#map_vectors_with_index" do
155
+ it "iterates over vectors with index and returns an Array" do
156
+ idx = []
157
+ ret = df.map_vectors_with_index do |vector, index|
158
+ idx << index
159
+ vector.recode { |e| e += 10}
160
+ end
161
+
162
+ expect(ret).to eq([
163
+ DaruLite::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
164
+ DaruLite::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
165
+ DaruLite::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
166
+ expect(idx).to eq([:a, :b, :c])
167
+ end
168
+ end
169
+
170
+ # FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
171
+ # (Not saying about unfortunate difference between them...)
172
+ describe "#collect_vector_with_index" do
173
+ it "iterates over vectors with index and returns an Array" do
174
+ idx = []
175
+ ret = df.collect_vector_with_index do |vector, index|
176
+ idx << index
177
+ vector.sum
178
+ end
179
+
180
+ expect(ret).to eq(DaruLite::Vector.new([15, 65, 165], index: [:a, :b, :c]))
181
+ expect(idx).to eq([:a, :b, :c])
182
+ end
183
+ end
184
+
185
+ describe "#map_rows_with_index" do
186
+ it "iterates over rows with index and returns an Array" do
187
+ idx = []
188
+ ret = df.map_rows_with_index do |row, index|
189
+ idx << index
190
+ expect(row.class).to eq(DaruLite::Vector)
191
+ row[:a] * row[:c]
192
+ end
193
+
194
+ expect(ret).to eq([11, 44, 99, 176, 275])
195
+ expect(idx).to eq([:one, :two, :three, :four, :five])
196
+ end
197
+ end
198
+
199
+ describe '#collect_row_with_index' do
200
+ it "iterates over rows with index and returns a Vector" do
201
+ idx = []
202
+ ret = df.collect_row_with_index do |row, index|
203
+ idx << index
204
+ expect(row.class).to eq(DaruLite::Vector)
205
+ row[:a] * row[:c]
206
+ end
207
+
208
+ expected = DaruLite::Vector.new([11, 44, 99, 176, 275], index: df.index)
209
+ expect(ret).to eq(expected)
210
+ expect(idx).to eq([:one, :two, :three, :four, :five])
211
+ end
212
+ end
213
+
214
+ describe "#recode" do
215
+ let(:ans_vector) do
216
+ DaruLite::DataFrame.new(
217
+ { b: [21,22,23,24,25],
218
+ a: [11,12,13,14,15],
219
+ c: [21,32,43,54,65]
220
+ },
221
+ order: [:a, :b, :c],
222
+ index: [:one, :two, :three, :four, :five]
223
+ )
224
+ end
225
+ let(:ans_rows) do
226
+ DaruLite::DataFrame.new(
227
+ {
228
+ b: [121, 144, 169, 196, 225],
229
+ a: [1,4,9,16,25],
230
+ c: [121, 484, 1089, 1936, 3025]
231
+ },
232
+ order: [:a, :b, :c],
233
+ index: [:one, :two, :three, :four, :five]
234
+ )
235
+ end
236
+ let(:ans_vector_date_time) do
237
+ DaruLite::DataFrame.new(
238
+ {
239
+ b: [21,22,23,24,25],
240
+ a: [11,12,13,14,15],
241
+ c: [21,32,43,54,65]
242
+ },
243
+ order: [:a, :b, :c],
244
+ index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
245
+ )
246
+ end
247
+ let(:ans_rows_date_time) do
248
+ DaruLite::DataFrame.new(
249
+ {
250
+ b: [121, 144, 169, 196, 225],
251
+ a: [1,4,9,16,25],
252
+ c: [121, 484, 1089, 1936, 3025]
253
+ },
254
+ order: [:a, :b, :c],
255
+ index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
256
+ )
257
+ end
258
+ let(:data_frame_date_time) do
259
+ df.dup.tap do |df_dt|
260
+ df_dt.index = DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
261
+ end
262
+ end
263
+
264
+ it "maps over the vectors of a DataFrame and returns a DataFrame" do
265
+ ret = df.recode do |vector|
266
+ vector.map! { |e| e += 10}
267
+ end
268
+
269
+ expect(ret).to eq(ans_vector)
270
+ end
271
+
272
+ it "maps over the rows of a DataFrame and returns a DataFrame" do
273
+ ret = df.recode(:row) do |row|
274
+ expect(row.class).to eq(DaruLite::Vector)
275
+ row.map! { |e| e*e }
276
+ end
277
+
278
+ expect(ret).to eq(ans_rows)
279
+ end
280
+
281
+ it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
282
+ ret = data_frame_date_time.recode do |vector|
283
+ vector.map! { |e| e += 10}
284
+ end
285
+
286
+ expect(ret).to eq(ans_vector_date_time)
287
+ end
288
+
289
+ it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
290
+ ret = data_frame_date_time.recode(:row) do |row|
291
+ expect(row.class).to eq(DaruLite::Vector)
292
+ row.map! { |e| e*e }
293
+ end
294
+
295
+ expect(ret).to eq(ans_rows_date_time)
296
+ end
297
+ end
298
+
299
+ describe '#replace_values' do
300
+ subject do
301
+ DaruLite::DataFrame.new({
302
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
303
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
304
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
305
+ })
306
+ end
307
+ before { subject.to_category :b }
308
+
309
+ context 'replace nils only' do
310
+ before { subject.replace_values nil, 10 }
311
+
312
+ it { is_expected.to be_a DaruLite::DataFrame }
313
+ its(:'b.type') { is_expected.to eq :category }
314
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
315
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
316
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
317
+ end
318
+
319
+ context 'replace Float::NAN only' do
320
+ before { subject.replace_values Float::NAN, 10 }
321
+
322
+ it { is_expected.to be_a DaruLite::DataFrame }
323
+ its(:'b.type') { is_expected.to eq :category }
324
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
325
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
326
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
327
+ end
328
+
329
+ context 'replace both nil and Float::NAN' do
330
+ before { subject.replace_values [nil, Float::NAN], 10 }
331
+
332
+ it { is_expected.to be_a DaruLite::DataFrame }
333
+ its(:'b.type') { is_expected.to eq :category }
334
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
335
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
336
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
337
+ end
338
+
339
+ context 'replace other values' do
340
+ before { subject.replace_values [1, 5], 10 }
341
+
342
+ it { is_expected.to be_a DaruLite::DataFrame }
343
+ its(:'b.type') { is_expected.to eq :category }
344
+ its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
345
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
346
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
347
+ end
348
+ end
349
+
350
+
351
+ describe "#verify" do
352
+ def create_test(*args, &proc)
353
+ description = args.shift
354
+ fields = args
355
+ [description, fields, proc]
356
+ end
357
+
358
+ let(:df) do
359
+ name = DaruLite::Vector.new %w(r1 r2 r3 r4)
360
+ v1 = DaruLite::Vector.new [1, 2, 3, 4]
361
+ v2 = DaruLite::Vector.new [4, 3, 2, 1]
362
+ v3 = DaruLite::Vector.new [10, 20, 30, 40]
363
+ v4 = DaruLite::Vector.new %w(a b a b)
364
+
365
+ DaruLite::DataFrame.new({ v1:, v2:, v3:, v4:, id: name }, order: [:v1, :v2, :v3, :v4, :id])
366
+ end
367
+
368
+ it "correctly verifies data as per the block" do
369
+ # Correct
370
+ t1 = create_test('If v4=a, v1 odd') do |r|
371
+ r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
372
+ end
373
+ t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
374
+ # Fail!
375
+ t3 = create_test("v4='b'") { |r| r[:v4] == 'b' }
376
+ exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
377
+ exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
378
+
379
+ dataf = df.verify(t3, t1, t2)
380
+ expect(dataf).to eq(exp1)
381
+ end
382
+
383
+ it "uses additional fields to extend error messages" do
384
+ t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
385
+
386
+ dataf = df.verify(:id, t)
387
+ expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
388
+ end
389
+ end
390
+
391
+ describe "#merge" do
392
+ it "merges one dataframe with another" do
393
+ a = DaruLite::Vector.new [1, 2, 3]
394
+ b = DaruLite::Vector.new [3, 4, 5]
395
+ c = DaruLite::Vector.new [4, 5, 6]
396
+ d = DaruLite::Vector.new [7, 8, 9]
397
+ e = DaruLite::Vector.new [10, 20, 30]
398
+ ds1 = DaruLite::DataFrame.new({ :a => a, :b => b })
399
+ ds2 = DaruLite::DataFrame.new({ :c => c, :d => d })
400
+ exp = DaruLite::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
401
+
402
+ expect(ds1.merge(ds2)).to eq(exp)
403
+ expect(ds2.merge(ds1)).to eq(
404
+ DaruLite::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
405
+
406
+ ds3 = DaruLite::DataFrame.new({ :a => e })
407
+ exp = DaruLite::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
408
+ order: [:a_1, :b, :a_2])
409
+
410
+ expect(ds1.merge(ds3)).to eq(exp)
411
+ end
412
+
413
+ context "preserves type of vector names" do
414
+ let(:df1) { DaruLite::DataFrame.new({'a'=> [1, 2, 3]}) }
415
+ let(:df2) { DaruLite::DataFrame.new({:b=> [4, 5, 6]}) }
416
+ subject { df1.merge df2 }
417
+
418
+ it { is_expected.to be_a DaruLite::DataFrame }
419
+ it { expect(subject['a'].to_a).to eq [1, 2, 3] }
420
+ it { expect(subject[:b].to_a).to eq [4, 5, 6] }
421
+ end
422
+
423
+ context "preserves indices for dataframes with same index" do
424
+ let(:index) { ['one','two','three'] }
425
+ let(:df1) { DaruLite::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
426
+ let(:df2) { DaruLite::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
427
+ subject { df1.merge df2 }
428
+
429
+ its(:index) { is_expected.to eq DaruLite::Index.new(index) }
430
+ end
431
+ end
432
+
433
+ describe "#one_to_many" do
434
+ subject { df.one_to_many(['id'], 'car_%v%n') }
435
+
436
+ let(:df) do
437
+ DaruLite::DataFrame.rows(
438
+ [
439
+ ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
440
+ ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
441
+ ['3', 'alfred', nil, nil, nil, nil, nil, nil]
442
+ ],
443
+ order: [
444
+ 'id', 'name', 'car_color1', 'car_value1', 'car_color2',
445
+ 'car_value2', 'car_color3', 'car_value3'
446
+ ]
447
+ )
448
+ end
449
+ let(:df_expected) do
450
+ ids = DaruLite::Vector.new %w(1 1 2 2 2)
451
+ colors = DaruLite::Vector.new %w(red blue green orange white)
452
+ values = DaruLite::Vector.new [10, 20, 15, 30, 20]
453
+ col_ids = DaruLite::Vector.new [1, 2, 1, 2, 3]
454
+
455
+ DaruLite::DataFrame.new(
456
+ {
457
+ 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values
458
+ },
459
+ order: ['id', '_col_id', 'color', 'value']
460
+ )
461
+ end
462
+
463
+ it { is_expected.to eq(df_expected) }
464
+ end
465
+ end
@@ -0,0 +1,106 @@
1
+ shared_examples_for 'a joinable DataFrame' do
2
+ describe "#concat" do
3
+ let(:df1) do
4
+ DaruLite::DataFrame.new({
5
+ a: [1, 2, 3],
6
+ b: [1, 2, 3]
7
+ })
8
+ end
9
+ let(:df2) do
10
+ DaruLite::DataFrame.new({
11
+ a: [4, 5, 6],
12
+ c: [4, 5, 6]
13
+ })
14
+ end
15
+
16
+ it 'does not modify the original dataframes' do
17
+ df1_a = df1[:a].to_a.dup
18
+ df2_a = df2[:a].to_a.dup
19
+
20
+ df_concat = df1.concat df2
21
+ expect(df1[:a].to_a).to eq df1_a
22
+ expect(df2[:a].to_a).to eq df2_a
23
+ end
24
+
25
+ it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
26
+ df1_a = df1[:a].to_a.dup
27
+ df2_a = df2[:a].to_a.dup
28
+
29
+ df_concat = df1.concat df2
30
+ expect(df_concat[:a].to_a).to eq df1_a + df2_a
31
+ end
32
+
33
+ it 'fills in missing vectors with nils' do
34
+ df1_b = df1[:b].to_a.dup
35
+ df2_c = df2[:c].to_a.dup
36
+
37
+ df_concat = df1.concat df2
38
+ expect(df_concat[:b].to_a).to eq df1_b + [nil] * df2.size
39
+ expect(df_concat[:c].to_a).to eq [nil] * df1.size + df2_c
40
+ end
41
+ end
42
+
43
+
44
+ context "#union" do
45
+ let(:df1) do
46
+ DaruLite::DataFrame.new({
47
+ a: [1, 2, 3],
48
+ b: [1, 2, 3]},
49
+ index: [1,3,5]
50
+ )
51
+ end
52
+ let(:df2) do
53
+ DaruLite::DataFrame.new({
54
+ a: [4, 5, 6],
55
+ c: [4, 5, 6]},
56
+ index: [7,9,11]
57
+ )
58
+ end
59
+ let(:df3) do
60
+ DaruLite::DataFrame.new({
61
+ a: [4, 5, 6],
62
+ c: [4, 5, 6]},
63
+ index: [5,7,9]
64
+ )
65
+ end
66
+
67
+ it 'does not modify the original dataframes' do
68
+ df1_a = df1[:a].to_a.dup
69
+ df2_a = df2[:a].to_a.dup
70
+
71
+ _ = df1.union df2
72
+ expect(df1[:a].to_a).to eq df1_a
73
+ expect(df2[:a].to_a).to eq df2_a
74
+ end
75
+
76
+ it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
77
+ df1_a = df1[:a].to_a.dup
78
+ df2_a = df2[:a].to_a.dup
79
+
80
+ df_union = df1.union df2
81
+ expect(df_union[:a].to_a).to eq df1_a + df2_a
82
+ end
83
+
84
+ it 'fills in missing vectors with nils' do
85
+ df1_b = df1[:b].to_a.dup
86
+ df2_c = df2[:c].to_a.dup
87
+
88
+ df_union = df1.union df2
89
+ expect(df_union[:b].to_a).to eq df1_b + [nil] * df2.size
90
+ expect(df_union[:c].to_a).to eq [nil] * df1.size + df2_c
91
+ end
92
+
93
+ it 'overwrites part of the first dataframe if there are double indices' do
94
+ vec = DaruLite::Vector.new({a: 4, b: nil, c: 4})
95
+ expect(df1.union(df3).row[5]).to eq vec
96
+ end
97
+
98
+ it 'concats the indices' do
99
+ v1 = df1.index.to_a
100
+ v2 = df2.index.to_a
101
+
102
+ df_union = df1.union df2
103
+ expect(df_union.index.to_a).to eq v1 + v2
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,47 @@
1
+ shared_examples_for 'a missable DataFrame' do
2
+ describe '#rolling_fillna!' do
3
+ subject do
4
+ DaruLite::DataFrame.new({
5
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
6
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
7
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
8
+ })
9
+ end
10
+
11
+ context 'rolling_fillna! forwards' do
12
+ before { subject.rolling_fillna!(:forward) }
13
+
14
+ it { expect(subject.rolling_fillna!(:forward)).to eq(subject) }
15
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
16
+ its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
17
+ its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
18
+ end
19
+
20
+ context 'rolling_fillna! backwards' do
21
+ before { subject.rolling_fillna!(:backward) }
22
+
23
+ it { expect(subject.rolling_fillna!(:backward)).to eq(subject) }
24
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
25
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
26
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
27
+ end
28
+ end
29
+
30
+ describe "#missing_values_rows" do
31
+ subject { df.missing_values_rows }
32
+
33
+ let(:df) do
34
+ a1 = DaruLite::Vector.new [1, nil, 3, 4, 5, nil]
35
+ a2 = DaruLite::Vector.new [10, nil, 20, 20, 20, 30]
36
+ b1 = DaruLite::Vector.new [nil, nil, 1, 1, 1, 2]
37
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
38
+ c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
39
+
40
+ DaruLite::DataFrame.new({a1:, a2:, b1:, b2:, c: })
41
+ end
42
+
43
+ it "returns number of missing values in each row" do
44
+ expect(subject).to eq(DaruLite::Vector.new [2, 3, 0, 1, 0, 1])
45
+ end
46
+ end
47
+ end