daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,476 @@
1
+ shared_examples_for 'a fetchable DataFrame' do
2
+ describe "#[]" do
3
+ context DaruLite::Index do
4
+ let(:df) do
5
+ DaruLite::DataFrame.new(
6
+ {
7
+ b: [11,12,13,14,15],
8
+ a: [1,2,3,4,5],
9
+ c: [11,22,33,44,55]
10
+ },
11
+ order: [:a, :b, :c],
12
+ index: [:one, :two, :three, :four, :five]
13
+ )
14
+ end
15
+
16
+ it "returns a Vector" do
17
+ expect(df[:a]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
18
+ end
19
+
20
+ it "returns a Vector by default" do
21
+ expect(df[:a]).to eq(DaruLite::Vector.new([1,2,3,4,5], name: :a,
22
+ index: [:one, :two, :three, :four, :five]))
23
+ end
24
+
25
+ it "returns a DataFrame" do
26
+ temp = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
27
+ order: [:a, :b], index: [:one, :two, :three, :four, :five])
28
+
29
+ expect(df[:a, :b]).to eq(temp)
30
+ end
31
+
32
+ it "accesses vector with Integer index" do
33
+ expect(df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
34
+ end
35
+
36
+ it "returns a subset of DataFrame when specified range" do
37
+ subset = df[:b..:c]
38
+ expect(subset).to eq(DaruLite::DataFrame.new({
39
+ b: [11,12,13,14,15],
40
+ c: [11,22,33,44,55]
41
+ }, index: [:one, :two, :three, :four, :five]))
42
+ end
43
+
44
+ it 'accepts axis parameter as a last argument' do
45
+ expect(df[:a, :vector]).to eq df[:a]
46
+ expect(df[:one, :row]).to eq [1, 11, 11].dv(:one, [:a, :b, :c])
47
+ end
48
+ end
49
+
50
+ context DaruLite::MultiIndex do
51
+ it "accesses vector with an integer index" do
52
+ expect(df_mi[0]).to eq(
53
+ DaruLite::Vector.new(vector_arry1, index: multi_index))
54
+ end
55
+
56
+ it "returns a vector when specifying full tuple" do
57
+ expect(df_mi[:a, :one, :bar]).to eq(
58
+ DaruLite::Vector.new(vector_arry1, index: multi_index))
59
+ end
60
+
61
+ it "returns DataFrame when specified first layer of MultiIndex" do
62
+ sub_order = DaruLite::MultiIndex.from_tuples([
63
+ [:one, :bar],
64
+ [:two, :baz]
65
+ ])
66
+ expect(df_mi[:a]).to eq(
67
+ DaruLite::DataFrame.new([vector_arry1, vector_arry2], index: multi_index, order: sub_order)
68
+ )
69
+ end
70
+
71
+ it "returns a Vector if the last level of MultiIndex is tracked" do
72
+ expect(df_mi[:a, :one, :bar]).to eq(
73
+ DaruLite::Vector.new(vector_arry1, index: multi_index)
74
+ )
75
+ end
76
+ end
77
+ end
78
+
79
+ describe "#at" do
80
+ context DaruLite::Index do
81
+ let(:idx) { DaruLite::Index.new [:a, :b, :c] }
82
+ let(:df) do
83
+ DaruLite::DataFrame.new({
84
+ 1 => 1..3,
85
+ a: 'a'..'c',
86
+ b: 11..13
87
+ }, index: idx)
88
+ end
89
+
90
+ context "single position" do
91
+ subject { df.at 1 }
92
+
93
+ it { is_expected.to be_a DaruLite::Vector }
94
+ its(:size) { is_expected.to eq 3 }
95
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
96
+ its(:index) { is_expected.to eq idx }
97
+ end
98
+
99
+ context "multiple positions" do
100
+ subject { df.at 0, 2 }
101
+
102
+ it { is_expected.to be_a DaruLite::DataFrame }
103
+ its(:shape) { is_expected.to eq [3, 2] }
104
+ its(:index) { is_expected.to eq idx }
105
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
106
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
107
+ end
108
+
109
+ context "single invalid position" do
110
+ it { expect { df. at 3 }.to raise_error IndexError }
111
+ end
112
+
113
+ context "multiple invalid positions" do
114
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
115
+ end
116
+
117
+ context "range" do
118
+ subject { df.at 0..1 }
119
+
120
+ it { is_expected.to be_a DaruLite::DataFrame }
121
+ its(:shape) { is_expected.to eq [3, 2] }
122
+ its(:index) { is_expected.to eq idx }
123
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
124
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
125
+ end
126
+
127
+ context "range with negative end" do
128
+ subject { df.at 0..-2 }
129
+
130
+ it { is_expected.to be_a DaruLite::DataFrame }
131
+ its(:shape) { is_expected.to eq [3, 2] }
132
+ its(:index) { is_expected.to eq idx }
133
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
134
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
135
+ end
136
+
137
+ context "range with single element" do
138
+ subject { df.at 1..1 }
139
+
140
+ it { is_expected.to be_a DaruLite::DataFrame }
141
+ its(:shape) { is_expected.to eq [3, 1] }
142
+ its(:index) { is_expected.to eq idx }
143
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
144
+ end
145
+ end
146
+
147
+ context DaruLite::MultiIndex do
148
+ let (:idx) do
149
+ DaruLite::MultiIndex.from_tuples [
150
+ [:a,:one,:bar],
151
+ [:a,:one,:baz],
152
+ [:b,:two,:bar],
153
+ ]
154
+ end
155
+ let(:df) do
156
+ DaruLite::DataFrame.new({
157
+ 1 => 1..3,
158
+ a: 'a'..'c',
159
+ b: 11..13
160
+ }, index: idx)
161
+ end
162
+
163
+ context "single position" do
164
+ subject { df.at 1 }
165
+
166
+ it { is_expected.to be_a DaruLite::Vector }
167
+ its(:size) { is_expected.to eq 3 }
168
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
169
+ its(:index) { is_expected.to eq idx }
170
+ end
171
+
172
+ context "multiple positions" do
173
+ subject { df.at 0, 2 }
174
+
175
+ it { is_expected.to be_a DaruLite::DataFrame }
176
+ its(:shape) { is_expected.to eq [3, 2] }
177
+ its(:index) { is_expected.to eq idx }
178
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
179
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
180
+ end
181
+
182
+ context "single invalid position" do
183
+ it { expect { df. at 3 }.to raise_error IndexError }
184
+ end
185
+
186
+ context "multiple invalid positions" do
187
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
188
+ end
189
+
190
+ context "range" do
191
+ subject { df.at 0..1 }
192
+
193
+ it { is_expected.to be_a DaruLite::DataFrame }
194
+ its(:shape) { is_expected.to eq [3, 2] }
195
+ its(:index) { is_expected.to eq idx }
196
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
197
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
198
+ end
199
+
200
+ context "range with negative end" do
201
+ subject { df.at 0..-2 }
202
+
203
+ it { is_expected.to be_a DaruLite::DataFrame }
204
+ its(:shape) { is_expected.to eq [3, 2] }
205
+ its(:index) { is_expected.to eq idx }
206
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
207
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
208
+ end
209
+
210
+ context "range with single element" do
211
+ subject { df.at 1..1 }
212
+
213
+ it { is_expected.to be_a DaruLite::DataFrame }
214
+ its(:shape) { is_expected.to eq [3, 1] }
215
+ its(:index) { is_expected.to eq idx }
216
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
217
+ end
218
+ end
219
+
220
+ context DaruLite::CategoricalIndex do
221
+ let (:idx) { DaruLite::CategoricalIndex.new [:a, 1, 1] }
222
+ let(:df) do
223
+ DaruLite::DataFrame.new({
224
+ 1 => 1..3,
225
+ a: 'a'..'c',
226
+ b: 11..13
227
+ }, index: idx)
228
+ end
229
+
230
+ context "single position" do
231
+ subject { df.at 1 }
232
+
233
+ it { is_expected.to be_a DaruLite::Vector }
234
+ its(:size) { is_expected.to eq 3 }
235
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
236
+ its(:index) { is_expected.to eq idx }
237
+ end
238
+
239
+ context "multiple positions" do
240
+ subject { df.at 0, 2 }
241
+
242
+ it { is_expected.to be_a DaruLite::DataFrame }
243
+ its(:shape) { is_expected.to eq [3, 2] }
244
+ its(:index) { is_expected.to eq idx }
245
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
246
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
247
+ end
248
+
249
+ context "single invalid position" do
250
+ it { expect { df. at 3 }.to raise_error IndexError }
251
+ end
252
+
253
+ context "multiple invalid positions" do
254
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
255
+ end
256
+
257
+ context "range" do
258
+ subject { df.at 0..1 }
259
+
260
+ it { is_expected.to be_a DaruLite::DataFrame }
261
+ its(:shape) { is_expected.to eq [3, 2] }
262
+ its(:index) { is_expected.to eq idx }
263
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
264
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
265
+ end
266
+
267
+ context "range with negative index" do
268
+ subject { df.at 0..-2 }
269
+
270
+ it { is_expected.to be_a DaruLite::DataFrame }
271
+ its(:shape) { is_expected.to eq [3, 2] }
272
+ its(:index) { is_expected.to eq idx }
273
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
274
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
275
+ end
276
+
277
+ context "range with single element" do
278
+ subject { df.at 1..1 }
279
+
280
+ it { is_expected.to be_a DaruLite::DataFrame }
281
+ its(:shape) { is_expected.to eq [3, 1] }
282
+ its(:index) { is_expected.to eq idx }
283
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
284
+ end
285
+ end
286
+ end
287
+
288
+ context "#first" do
289
+ it 'works' do
290
+ expect(df.first(2)).to eq(
291
+ DaruLite::DataFrame.new({b: [11,12], a: [1,2], c: [11,22]},
292
+ order: [:a, :b, :c],
293
+ index: [:one, :two]))
294
+ end
295
+
296
+ it 'works with too large values' do
297
+ expect(df.first(200)).to eq(df)
298
+ end
299
+
300
+ it 'has synonym' do
301
+ expect(df.first(2)).to eq(df.head(2))
302
+ end
303
+
304
+ it 'works on DateTime indexes' do
305
+ idx = DaruLite::DateTimeIndex.new(['2017-01-01', '2017-02-01', '2017-03-01'])
306
+ df = DaruLite::DataFrame.new({col1: ['a', 'b', 'c']}, index: idx)
307
+ first = DaruLite::DataFrame.new({col1: ['a']}, index: DaruLite::DateTimeIndex.new(['2017-01-01']))
308
+ expect(df.head(1)).to eq(first)
309
+ end
310
+ end
311
+
312
+ context "#last" do
313
+ it 'works' do
314
+ expect(df.last(2)).to eq(
315
+ DaruLite::DataFrame.new({b: [14,15], a: [4,5], c: [44,55]},
316
+ order: [:a, :b, :c],
317
+ index: [:four, :five]))
318
+ end
319
+
320
+ it 'works with too large values' do
321
+ expect(df.last(200)).to eq(df)
322
+ end
323
+
324
+ it 'has synonym' do
325
+ expect(df.last(2)).to eq(df.tail(2))
326
+ end
327
+ end
328
+
329
+ context '#access_row_tuples_by_indexs' do
330
+ let(:df) {
331
+ DaruLite::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
332
+ let(:df_idx) {
333
+ DaruLite::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
334
+ }
335
+ let (:mi_idx) do
336
+ DaruLite::MultiIndex.from_tuples [
337
+ [:a,:one,:bar],
338
+ [:a,:one,:baz],
339
+ [:b,:two,:bar],
340
+ [:a,:two,:baz],
341
+ ]
342
+ end
343
+ let (:df_mi) do
344
+ DaruLite::DataFrame.new({
345
+ a: 1..4,
346
+ b: 'a'..'d'
347
+ }, index: mi_idx )
348
+ end
349
+ context 'when no index is given' do
350
+ it 'returns empty Array' do
351
+ expect(df.access_row_tuples_by_indexs()).to eq([])
352
+ end
353
+ end
354
+ context 'when index(s) are given' do
355
+ it 'returns Array of row tuples' do
356
+ expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
357
+ expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
358
+ end
359
+ end
360
+ context 'when custom index(s) are given' do
361
+ it 'returns Array of row tuples' do
362
+ expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
363
+ [[52, 1], [7, 3]]
364
+ )
365
+ end
366
+ end
367
+ context 'when multi index is given' do
368
+ it 'returns Array of row tuples' do
369
+ expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
370
+ [[1, "a"], [2, "b"], [4, "d"]]
371
+ )
372
+ expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
373
+ [[2, "b"]]
374
+ )
375
+ end
376
+ end
377
+ end
378
+
379
+ context "#only_numerics" do
380
+ subject { df.only_numerics }
381
+
382
+ let(:df) do
383
+ DaruLite::DataFrame.new({a: vector1, b: vector2, c: vector3 }, clone: false)
384
+ end
385
+ let(:vector1) { DaruLite::Vector.new([1,2,3,4,5]) }
386
+ let(:vector2) { DaruLite::Vector.new(%w(one two three four five)) }
387
+ let(:vector3) { DaruLite::Vector.new([11,22,33,44,55]) }
388
+
389
+ it "returns a clone of numeric vectors" do
390
+ expect(subject).to eq(
391
+ DaruLite::DataFrame.new({ a: vector1, c: vector3}, clone: false)
392
+ )
393
+ expect(subject[:a].object_id).to_not eq(vector1.object_id)
394
+ end
395
+
396
+ context 'clone is false' do
397
+ subject { df.only_numerics(clone: false) }
398
+
399
+ it "returns a view of only the numeric vectors" do
400
+ expect(subject).to eq(
401
+ DaruLite::DataFrame.new({ a: vector1, c: vector3 }, clone: false)
402
+ )
403
+ expect(subject[:a].object_id).to eq(vector1.object_id)
404
+ end
405
+ end
406
+
407
+ context DaruLite::MultiIndex do
408
+ let(:df) do
409
+ order = DaruLite::MultiIndex.from_tuples(
410
+ [
411
+ [:d, :one, :large],
412
+ [:d, :one, :small],
413
+ [:d, :two, :large],
414
+ [:d, :two, :small],
415
+ [:e, :one, :large],
416
+ [:e, :one, :small],
417
+ [:e, :two, :large],
418
+ [:e, :two, :small]
419
+ ]
420
+ )
421
+
422
+ index = DaruLite::MultiIndex.from_tuples(
423
+ [
424
+ [:bar],
425
+ [:foo]
426
+ ]
427
+ )
428
+ DaruLite::DataFrame.new(
429
+ [
430
+ [4.112,2.234],
431
+ %w(a b),
432
+ [6.342,nil],
433
+ [7.2344,3.23214],
434
+ [8.234,4.533],
435
+ [10.342,2.3432],
436
+ [12.0,nil],
437
+ %w(a b)
438
+ ],
439
+ order:,
440
+ index:
441
+ )
442
+ end
443
+
444
+ it "returns numeric vectors" do
445
+ vectors = DaruLite::MultiIndex.from_tuples(
446
+ [
447
+ [:d, :one, :large],
448
+ [:d, :two, :large],
449
+ [:d, :two, :small],
450
+ [:e, :one, :large],
451
+ [:e, :one, :small],
452
+ [:e, :two, :large]
453
+ ]
454
+ )
455
+ index = DaruLite::MultiIndex.from_tuples(
456
+ [
457
+ [:bar],
458
+ [:foo]
459
+ ]
460
+ )
461
+ answer = DaruLite::DataFrame.new(
462
+ [
463
+ [4.112,2.234],
464
+ [6.342,nil],
465
+ [7.2344,3.23214],
466
+ [8.234,4.533],
467
+ [10.342,2.3432],
468
+ [12.0,nil],
469
+ ], order: vectors, index: index
470
+ )
471
+
472
+ expect(subject).to eq(answer)
473
+ end
474
+ end
475
+ end
476
+ end