daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,465 @@
1
+ shared_examples_for 'an iterable DataFrame' do
2
+ describe "#each_index" do
3
+ it "iterates over index" do
4
+ idxs = []
5
+ ret = df.each_index do |index|
6
+ idxs << index
7
+ end
8
+
9
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
10
+
11
+ expect(ret).to eq(df)
12
+ end
13
+ end
14
+
15
+ describe "#each_vector_with_index" do
16
+ it "iterates over vectors with index" do
17
+ idxs = []
18
+ ret = df.each_vector_with_index do |vector, index|
19
+ idxs << index
20
+ expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
21
+ expect(vector.class).to eq(DaruLite::Vector)
22
+ end
23
+
24
+ expect(idxs).to eq([:a, :b, :c])
25
+
26
+ expect(ret).to eq(df)
27
+ end
28
+ end
29
+
30
+ describe "#each_row_with_index" do
31
+ it "iterates over rows with indexes" do
32
+ idxs = []
33
+ ret = df.each_row_with_index do |row, idx|
34
+ idxs << idx
35
+ expect(row.index).to eq([:a, :b, :c].to_index)
36
+ expect(row.class).to eq(DaruLite::Vector)
37
+ end
38
+
39
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
40
+ expect(ret) .to eq(df)
41
+ end
42
+ end
43
+
44
+ describe "#each" do
45
+ it "iterates over rows" do
46
+ ret = df.each(:row) do |row|
47
+ expect(row.index).to eq([:a, :b, :c].to_index)
48
+ expect(row.class).to eq(DaruLite::Vector)
49
+ end
50
+
51
+ expect(ret).to eq(df)
52
+ end
53
+
54
+ it "iterates over all vectors" do
55
+ ret = df.each do |vector|
56
+ expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
57
+ expect(vector.class).to eq(DaruLite::Vector)
58
+ end
59
+
60
+ expect(ret).to eq(df)
61
+ end
62
+
63
+ it "returns Enumerable if no block specified" do
64
+ ret = df.each
65
+ expect(ret.is_a?(Enumerator)).to eq(true)
66
+ end
67
+
68
+ it "raises on unknown axis" do
69
+ expect { df.each(:kitten) }.to raise_error(ArgumentError, /axis/)
70
+ end
71
+ end
72
+
73
+ describe "#collect" do
74
+ before do
75
+ @df = DaruLite::DataFrame.new({
76
+ a: [1,2,3,4,5],
77
+ b: [11,22,33,44,55],
78
+ c: [1,2,3,4,5]
79
+ })
80
+ end
81
+
82
+ it "collects calculation over rows and returns a Vector from the results" do
83
+ expect(@df.collect(:row) { |row| (row[:a] + row[:c]) * row[:c] }).to eq(
84
+ DaruLite::Vector.new([2,8,18,32,50])
85
+ )
86
+ end
87
+
88
+ it "collects calculation over vectors and returns a Vector from the results" do
89
+ expect(@df.collect { |v| v[0] * v[1] + v[4] }).to eq(
90
+ DaruLite::Vector.new([7,297,7], index: [:a, :b, :c])
91
+ )
92
+ end
93
+ end
94
+
95
+ describe "#map" do
96
+ it "iterates over rows and returns an Array" do
97
+ ret = df.map(:row) do |row|
98
+ expect(row.class).to eq(DaruLite::Vector)
99
+ row[:a] * row[:c]
100
+ end
101
+
102
+ expect(ret).to eq([11, 44, 99, 176, 275])
103
+ expect(df.vectors.to_a).to eq([:a, :b, :c])
104
+ end
105
+
106
+ it "iterates over vectors and returns an Array" do
107
+ ret = df.map do |vector|
108
+ vector.mean
109
+ end
110
+ expect(ret).to eq([3.0, 13.0, 33.0])
111
+ end
112
+ end
113
+
114
+ describe "#map!" do
115
+ let(:ans_vector) do
116
+ DaruLite::DataFrame.new(
117
+ {
118
+ b: [21,22,23,24,25],
119
+ a: [11,12,13,14,15],
120
+ c: [21,32,43,54,65]
121
+ },
122
+ order: [:a, :b, :c],
123
+ index: [:one, :two, :three, :four, :five]
124
+ )
125
+ end
126
+ let(:ans_row) do
127
+ DaruLite::DataFrame.new(
128
+ {
129
+ b: [12,13,14,15,16],
130
+ a: [2,3,4,5,6],
131
+ c: [12,23,34,45,56]
132
+ },
133
+ order: [:a, :b, :c],
134
+ index: [:one, :two, :three, :four, :five]
135
+ )
136
+ end
137
+
138
+ it "destructively maps over the vectors and changes the DF" do
139
+ df.map! do |vector|
140
+ vector + 10
141
+ end
142
+ expect(df).to eq(ans_vector)
143
+ end
144
+
145
+ it "destructively maps over the rows and changes the DF" do
146
+ df.map!(:row) do |row|
147
+ row + 1
148
+ end
149
+
150
+ expect(df).to eq(ans_row)
151
+ end
152
+ end
153
+
154
+ describe "#map_vectors_with_index" do
155
+ it "iterates over vectors with index and returns an Array" do
156
+ idx = []
157
+ ret = df.map_vectors_with_index do |vector, index|
158
+ idx << index
159
+ vector.recode { |e| e += 10}
160
+ end
161
+
162
+ expect(ret).to eq([
163
+ DaruLite::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
164
+ DaruLite::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
165
+ DaruLite::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
166
+ expect(idx).to eq([:a, :b, :c])
167
+ end
168
+ end
169
+
170
+ # FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
171
+ # (Not saying about unfortunate difference between them...)
172
+ describe "#collect_vector_with_index" do
173
+ it "iterates over vectors with index and returns an Array" do
174
+ idx = []
175
+ ret = df.collect_vector_with_index do |vector, index|
176
+ idx << index
177
+ vector.sum
178
+ end
179
+
180
+ expect(ret).to eq(DaruLite::Vector.new([15, 65, 165], index: [:a, :b, :c]))
181
+ expect(idx).to eq([:a, :b, :c])
182
+ end
183
+ end
184
+
185
+ describe "#map_rows_with_index" do
186
+ it "iterates over rows with index and returns an Array" do
187
+ idx = []
188
+ ret = df.map_rows_with_index do |row, index|
189
+ idx << index
190
+ expect(row.class).to eq(DaruLite::Vector)
191
+ row[:a] * row[:c]
192
+ end
193
+
194
+ expect(ret).to eq([11, 44, 99, 176, 275])
195
+ expect(idx).to eq([:one, :two, :three, :four, :five])
196
+ end
197
+ end
198
+
199
+ describe '#collect_row_with_index' do
200
+ it "iterates over rows with index and returns a Vector" do
201
+ idx = []
202
+ ret = df.collect_row_with_index do |row, index|
203
+ idx << index
204
+ expect(row.class).to eq(DaruLite::Vector)
205
+ row[:a] * row[:c]
206
+ end
207
+
208
+ expected = DaruLite::Vector.new([11, 44, 99, 176, 275], index: df.index)
209
+ expect(ret).to eq(expected)
210
+ expect(idx).to eq([:one, :two, :three, :four, :five])
211
+ end
212
+ end
213
+
214
+ describe "#recode" do
215
+ let(:ans_vector) do
216
+ DaruLite::DataFrame.new(
217
+ { b: [21,22,23,24,25],
218
+ a: [11,12,13,14,15],
219
+ c: [21,32,43,54,65]
220
+ },
221
+ order: [:a, :b, :c],
222
+ index: [:one, :two, :three, :four, :five]
223
+ )
224
+ end
225
+ let(:ans_rows) do
226
+ DaruLite::DataFrame.new(
227
+ {
228
+ b: [121, 144, 169, 196, 225],
229
+ a: [1,4,9,16,25],
230
+ c: [121, 484, 1089, 1936, 3025]
231
+ },
232
+ order: [:a, :b, :c],
233
+ index: [:one, :two, :three, :four, :five]
234
+ )
235
+ end
236
+ let(:ans_vector_date_time) do
237
+ DaruLite::DataFrame.new(
238
+ {
239
+ b: [21,22,23,24,25],
240
+ a: [11,12,13,14,15],
241
+ c: [21,32,43,54,65]
242
+ },
243
+ order: [:a, :b, :c],
244
+ index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
245
+ )
246
+ end
247
+ let(:ans_rows_date_time) do
248
+ DaruLite::DataFrame.new(
249
+ {
250
+ b: [121, 144, 169, 196, 225],
251
+ a: [1,4,9,16,25],
252
+ c: [121, 484, 1089, 1936, 3025]
253
+ },
254
+ order: [:a, :b, :c],
255
+ index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
256
+ )
257
+ end
258
+ let(:data_frame_date_time) do
259
+ df.dup.tap do |df_dt|
260
+ df_dt.index = DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
261
+ end
262
+ end
263
+
264
+ it "maps over the vectors of a DataFrame and returns a DataFrame" do
265
+ ret = df.recode do |vector|
266
+ vector.map! { |e| e += 10}
267
+ end
268
+
269
+ expect(ret).to eq(ans_vector)
270
+ end
271
+
272
+ it "maps over the rows of a DataFrame and returns a DataFrame" do
273
+ ret = df.recode(:row) do |row|
274
+ expect(row.class).to eq(DaruLite::Vector)
275
+ row.map! { |e| e*e }
276
+ end
277
+
278
+ expect(ret).to eq(ans_rows)
279
+ end
280
+
281
+ it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
282
+ ret = data_frame_date_time.recode do |vector|
283
+ vector.map! { |e| e += 10}
284
+ end
285
+
286
+ expect(ret).to eq(ans_vector_date_time)
287
+ end
288
+
289
+ it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
290
+ ret = data_frame_date_time.recode(:row) do |row|
291
+ expect(row.class).to eq(DaruLite::Vector)
292
+ row.map! { |e| e*e }
293
+ end
294
+
295
+ expect(ret).to eq(ans_rows_date_time)
296
+ end
297
+ end
298
+
299
+ describe '#replace_values' do
300
+ subject do
301
+ DaruLite::DataFrame.new({
302
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
303
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
304
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
305
+ })
306
+ end
307
+ before { subject.to_category :b }
308
+
309
+ context 'replace nils only' do
310
+ before { subject.replace_values nil, 10 }
311
+
312
+ it { is_expected.to be_a DaruLite::DataFrame }
313
+ its(:'b.type') { is_expected.to eq :category }
314
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
315
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
316
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
317
+ end
318
+
319
+ context 'replace Float::NAN only' do
320
+ before { subject.replace_values Float::NAN, 10 }
321
+
322
+ it { is_expected.to be_a DaruLite::DataFrame }
323
+ its(:'b.type') { is_expected.to eq :category }
324
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
325
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
326
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
327
+ end
328
+
329
+ context 'replace both nil and Float::NAN' do
330
+ before { subject.replace_values [nil, Float::NAN], 10 }
331
+
332
+ it { is_expected.to be_a DaruLite::DataFrame }
333
+ its(:'b.type') { is_expected.to eq :category }
334
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
335
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
336
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
337
+ end
338
+
339
+ context 'replace other values' do
340
+ before { subject.replace_values [1, 5], 10 }
341
+
342
+ it { is_expected.to be_a DaruLite::DataFrame }
343
+ its(:'b.type') { is_expected.to eq :category }
344
+ its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
345
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
346
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
347
+ end
348
+ end
349
+
350
+
351
+ describe "#verify" do
352
+ def create_test(*args, &proc)
353
+ description = args.shift
354
+ fields = args
355
+ [description, fields, proc]
356
+ end
357
+
358
+ let(:df) do
359
+ name = DaruLite::Vector.new %w(r1 r2 r3 r4)
360
+ v1 = DaruLite::Vector.new [1, 2, 3, 4]
361
+ v2 = DaruLite::Vector.new [4, 3, 2, 1]
362
+ v3 = DaruLite::Vector.new [10, 20, 30, 40]
363
+ v4 = DaruLite::Vector.new %w(a b a b)
364
+
365
+ DaruLite::DataFrame.new({ v1:, v2:, v3:, v4:, id: name }, order: [:v1, :v2, :v3, :v4, :id])
366
+ end
367
+
368
+ it "correctly verifies data as per the block" do
369
+ # Correct
370
+ t1 = create_test('If v4=a, v1 odd') do |r|
371
+ r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
372
+ end
373
+ t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
374
+ # Fail!
375
+ t3 = create_test("v4='b'") { |r| r[:v4] == 'b' }
376
+ exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
377
+ exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
378
+
379
+ dataf = df.verify(t3, t1, t2)
380
+ expect(dataf).to eq(exp1)
381
+ end
382
+
383
+ it "uses additional fields to extend error messages" do
384
+ t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
385
+
386
+ dataf = df.verify(:id, t)
387
+ expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
388
+ end
389
+ end
390
+
391
+ describe "#merge" do
392
+ it "merges one dataframe with another" do
393
+ a = DaruLite::Vector.new [1, 2, 3]
394
+ b = DaruLite::Vector.new [3, 4, 5]
395
+ c = DaruLite::Vector.new [4, 5, 6]
396
+ d = DaruLite::Vector.new [7, 8, 9]
397
+ e = DaruLite::Vector.new [10, 20, 30]
398
+ ds1 = DaruLite::DataFrame.new({ :a => a, :b => b })
399
+ ds2 = DaruLite::DataFrame.new({ :c => c, :d => d })
400
+ exp = DaruLite::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
401
+
402
+ expect(ds1.merge(ds2)).to eq(exp)
403
+ expect(ds2.merge(ds1)).to eq(
404
+ DaruLite::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
405
+
406
+ ds3 = DaruLite::DataFrame.new({ :a => e })
407
+ exp = DaruLite::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
408
+ order: [:a_1, :b, :a_2])
409
+
410
+ expect(ds1.merge(ds3)).to eq(exp)
411
+ end
412
+
413
+ context "preserves type of vector names" do
414
+ let(:df1) { DaruLite::DataFrame.new({'a'=> [1, 2, 3]}) }
415
+ let(:df2) { DaruLite::DataFrame.new({:b=> [4, 5, 6]}) }
416
+ subject { df1.merge df2 }
417
+
418
+ it { is_expected.to be_a DaruLite::DataFrame }
419
+ it { expect(subject['a'].to_a).to eq [1, 2, 3] }
420
+ it { expect(subject[:b].to_a).to eq [4, 5, 6] }
421
+ end
422
+
423
+ context "preserves indices for dataframes with same index" do
424
+ let(:index) { ['one','two','three'] }
425
+ let(:df1) { DaruLite::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
426
+ let(:df2) { DaruLite::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
427
+ subject { df1.merge df2 }
428
+
429
+ its(:index) { is_expected.to eq DaruLite::Index.new(index) }
430
+ end
431
+ end
432
+
433
+ describe "#one_to_many" do
434
+ subject { df.one_to_many(['id'], 'car_%v%n') }
435
+
436
+ let(:df) do
437
+ DaruLite::DataFrame.rows(
438
+ [
439
+ ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
440
+ ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
441
+ ['3', 'alfred', nil, nil, nil, nil, nil, nil]
442
+ ],
443
+ order: [
444
+ 'id', 'name', 'car_color1', 'car_value1', 'car_color2',
445
+ 'car_value2', 'car_color3', 'car_value3'
446
+ ]
447
+ )
448
+ end
449
+ let(:df_expected) do
450
+ ids = DaruLite::Vector.new %w(1 1 2 2 2)
451
+ colors = DaruLite::Vector.new %w(red blue green orange white)
452
+ values = DaruLite::Vector.new [10, 20, 15, 30, 20]
453
+ col_ids = DaruLite::Vector.new [1, 2, 1, 2, 3]
454
+
455
+ DaruLite::DataFrame.new(
456
+ {
457
+ 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values
458
+ },
459
+ order: ['id', '_col_id', 'color', 'value']
460
+ )
461
+ end
462
+
463
+ it { is_expected.to eq(df_expected) }
464
+ end
465
+ end
@@ -0,0 +1,106 @@
1
+ shared_examples_for 'a joinable DataFrame' do
2
+ describe "#concat" do
3
+ let(:df1) do
4
+ DaruLite::DataFrame.new({
5
+ a: [1, 2, 3],
6
+ b: [1, 2, 3]
7
+ })
8
+ end
9
+ let(:df2) do
10
+ DaruLite::DataFrame.new({
11
+ a: [4, 5, 6],
12
+ c: [4, 5, 6]
13
+ })
14
+ end
15
+
16
+ it 'does not modify the original dataframes' do
17
+ df1_a = df1[:a].to_a.dup
18
+ df2_a = df2[:a].to_a.dup
19
+
20
+ df_concat = df1.concat df2
21
+ expect(df1[:a].to_a).to eq df1_a
22
+ expect(df2[:a].to_a).to eq df2_a
23
+ end
24
+
25
+ it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
26
+ df1_a = df1[:a].to_a.dup
27
+ df2_a = df2[:a].to_a.dup
28
+
29
+ df_concat = df1.concat df2
30
+ expect(df_concat[:a].to_a).to eq df1_a + df2_a
31
+ end
32
+
33
+ it 'fills in missing vectors with nils' do
34
+ df1_b = df1[:b].to_a.dup
35
+ df2_c = df2[:c].to_a.dup
36
+
37
+ df_concat = df1.concat df2
38
+ expect(df_concat[:b].to_a).to eq df1_b + [nil] * df2.size
39
+ expect(df_concat[:c].to_a).to eq [nil] * df1.size + df2_c
40
+ end
41
+ end
42
+
43
+
44
+ context "#union" do
45
+ let(:df1) do
46
+ DaruLite::DataFrame.new({
47
+ a: [1, 2, 3],
48
+ b: [1, 2, 3]},
49
+ index: [1,3,5]
50
+ )
51
+ end
52
+ let(:df2) do
53
+ DaruLite::DataFrame.new({
54
+ a: [4, 5, 6],
55
+ c: [4, 5, 6]},
56
+ index: [7,9,11]
57
+ )
58
+ end
59
+ let(:df3) do
60
+ DaruLite::DataFrame.new({
61
+ a: [4, 5, 6],
62
+ c: [4, 5, 6]},
63
+ index: [5,7,9]
64
+ )
65
+ end
66
+
67
+ it 'does not modify the original dataframes' do
68
+ df1_a = df1[:a].to_a.dup
69
+ df2_a = df2[:a].to_a.dup
70
+
71
+ _ = df1.union df2
72
+ expect(df1[:a].to_a).to eq df1_a
73
+ expect(df2[:a].to_a).to eq df2_a
74
+ end
75
+
76
+ it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
77
+ df1_a = df1[:a].to_a.dup
78
+ df2_a = df2[:a].to_a.dup
79
+
80
+ df_union = df1.union df2
81
+ expect(df_union[:a].to_a).to eq df1_a + df2_a
82
+ end
83
+
84
+ it 'fills in missing vectors with nils' do
85
+ df1_b = df1[:b].to_a.dup
86
+ df2_c = df2[:c].to_a.dup
87
+
88
+ df_union = df1.union df2
89
+ expect(df_union[:b].to_a).to eq df1_b + [nil] * df2.size
90
+ expect(df_union[:c].to_a).to eq [nil] * df1.size + df2_c
91
+ end
92
+
93
+ it 'overwrites part of the first dataframe if there are double indices' do
94
+ vec = DaruLite::Vector.new({a: 4, b: nil, c: 4})
95
+ expect(df1.union(df3).row[5]).to eq vec
96
+ end
97
+
98
+ it 'concats the indices' do
99
+ v1 = df1.index.to_a
100
+ v2 = df2.index.to_a
101
+
102
+ df_union = df1.union df2
103
+ expect(df_union.index.to_a).to eq v1 + v2
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,47 @@
1
+ shared_examples_for 'a missable DataFrame' do
2
+ describe '#rolling_fillna!' do
3
+ subject do
4
+ DaruLite::DataFrame.new({
5
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
6
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
7
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
8
+ })
9
+ end
10
+
11
+ context 'rolling_fillna! forwards' do
12
+ before { subject.rolling_fillna!(:forward) }
13
+
14
+ it { expect(subject.rolling_fillna!(:forward)).to eq(subject) }
15
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
16
+ its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
17
+ its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
18
+ end
19
+
20
+ context 'rolling_fillna! backwards' do
21
+ before { subject.rolling_fillna!(:backward) }
22
+
23
+ it { expect(subject.rolling_fillna!(:backward)).to eq(subject) }
24
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
25
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
26
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
27
+ end
28
+ end
29
+
30
+ describe "#missing_values_rows" do
31
+ subject { df.missing_values_rows }
32
+
33
+ let(:df) do
34
+ a1 = DaruLite::Vector.new [1, nil, 3, 4, 5, nil]
35
+ a2 = DaruLite::Vector.new [10, nil, 20, 20, 20, 30]
36
+ b1 = DaruLite::Vector.new [nil, nil, 1, 1, 1, 2]
37
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
38
+ c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
39
+
40
+ DaruLite::DataFrame.new({a1:, a2:, b1:, b2:, c: })
41
+ end
42
+
43
+ it "returns number of missing values in each row" do
44
+ expect(subject).to eq(DaruLite::Vector.new [2, 3, 0, 1, 0, 1])
45
+ end
46
+ end
47
+ end