daru_lite 0.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -0,0 +1,476 @@
1
+ shared_examples_for 'a fetchable DataFrame' do
2
+ describe "#[]" do
3
+ context DaruLite::Index do
4
+ let(:df) do
5
+ DaruLite::DataFrame.new(
6
+ {
7
+ b: [11,12,13,14,15],
8
+ a: [1,2,3,4,5],
9
+ c: [11,22,33,44,55]
10
+ },
11
+ order: [:a, :b, :c],
12
+ index: [:one, :two, :three, :four, :five]
13
+ )
14
+ end
15
+
16
+ it "returns a Vector" do
17
+ expect(df[:a]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
18
+ end
19
+
20
+ it "returns a Vector by default" do
21
+ expect(df[:a]).to eq(DaruLite::Vector.new([1,2,3,4,5], name: :a,
22
+ index: [:one, :two, :three, :four, :five]))
23
+ end
24
+
25
+ it "returns a DataFrame" do
26
+ temp = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
27
+ order: [:a, :b], index: [:one, :two, :three, :four, :five])
28
+
29
+ expect(df[:a, :b]).to eq(temp)
30
+ end
31
+
32
+ it "accesses vector with Integer index" do
33
+ expect(df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
34
+ end
35
+
36
+ it "returns a subset of DataFrame when specified range" do
37
+ subset = df[:b..:c]
38
+ expect(subset).to eq(DaruLite::DataFrame.new({
39
+ b: [11,12,13,14,15],
40
+ c: [11,22,33,44,55]
41
+ }, index: [:one, :two, :three, :four, :five]))
42
+ end
43
+
44
+ it 'accepts axis parameter as a last argument' do
45
+ expect(df[:a, :vector]).to eq df[:a]
46
+ expect(df[:one, :row]).to eq [1, 11, 11].dv(:one, [:a, :b, :c])
47
+ end
48
+ end
49
+
50
+ context DaruLite::MultiIndex do
51
+ it "accesses vector with an integer index" do
52
+ expect(df_mi[0]).to eq(
53
+ DaruLite::Vector.new(vector_arry1, index: multi_index))
54
+ end
55
+
56
+ it "returns a vector when specifying full tuple" do
57
+ expect(df_mi[:a, :one, :bar]).to eq(
58
+ DaruLite::Vector.new(vector_arry1, index: multi_index))
59
+ end
60
+
61
+ it "returns DataFrame when specified first layer of MultiIndex" do
62
+ sub_order = DaruLite::MultiIndex.from_tuples([
63
+ [:one, :bar],
64
+ [:two, :baz]
65
+ ])
66
+ expect(df_mi[:a]).to eq(
67
+ DaruLite::DataFrame.new([vector_arry1, vector_arry2], index: multi_index, order: sub_order)
68
+ )
69
+ end
70
+
71
+ it "returns a Vector if the last level of MultiIndex is tracked" do
72
+ expect(df_mi[:a, :one, :bar]).to eq(
73
+ DaruLite::Vector.new(vector_arry1, index: multi_index)
74
+ )
75
+ end
76
+ end
77
+ end
78
+
79
+ describe "#at" do
80
+ context DaruLite::Index do
81
+ let(:idx) { DaruLite::Index.new [:a, :b, :c] }
82
+ let(:df) do
83
+ DaruLite::DataFrame.new({
84
+ 1 => 1..3,
85
+ a: 'a'..'c',
86
+ b: 11..13
87
+ }, index: idx)
88
+ end
89
+
90
+ context "single position" do
91
+ subject { df.at 1 }
92
+
93
+ it { is_expected.to be_a DaruLite::Vector }
94
+ its(:size) { is_expected.to eq 3 }
95
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
96
+ its(:index) { is_expected.to eq idx }
97
+ end
98
+
99
+ context "multiple positions" do
100
+ subject { df.at 0, 2 }
101
+
102
+ it { is_expected.to be_a DaruLite::DataFrame }
103
+ its(:shape) { is_expected.to eq [3, 2] }
104
+ its(:index) { is_expected.to eq idx }
105
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
106
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
107
+ end
108
+
109
+ context "single invalid position" do
110
+ it { expect { df. at 3 }.to raise_error IndexError }
111
+ end
112
+
113
+ context "multiple invalid positions" do
114
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
115
+ end
116
+
117
+ context "range" do
118
+ subject { df.at 0..1 }
119
+
120
+ it { is_expected.to be_a DaruLite::DataFrame }
121
+ its(:shape) { is_expected.to eq [3, 2] }
122
+ its(:index) { is_expected.to eq idx }
123
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
124
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
125
+ end
126
+
127
+ context "range with negative end" do
128
+ subject { df.at 0..-2 }
129
+
130
+ it { is_expected.to be_a DaruLite::DataFrame }
131
+ its(:shape) { is_expected.to eq [3, 2] }
132
+ its(:index) { is_expected.to eq idx }
133
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
134
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
135
+ end
136
+
137
+ context "range with single element" do
138
+ subject { df.at 1..1 }
139
+
140
+ it { is_expected.to be_a DaruLite::DataFrame }
141
+ its(:shape) { is_expected.to eq [3, 1] }
142
+ its(:index) { is_expected.to eq idx }
143
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
144
+ end
145
+ end
146
+
147
+ context DaruLite::MultiIndex do
148
+ let (:idx) do
149
+ DaruLite::MultiIndex.from_tuples [
150
+ [:a,:one,:bar],
151
+ [:a,:one,:baz],
152
+ [:b,:two,:bar],
153
+ ]
154
+ end
155
+ let(:df) do
156
+ DaruLite::DataFrame.new({
157
+ 1 => 1..3,
158
+ a: 'a'..'c',
159
+ b: 11..13
160
+ }, index: idx)
161
+ end
162
+
163
+ context "single position" do
164
+ subject { df.at 1 }
165
+
166
+ it { is_expected.to be_a DaruLite::Vector }
167
+ its(:size) { is_expected.to eq 3 }
168
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
169
+ its(:index) { is_expected.to eq idx }
170
+ end
171
+
172
+ context "multiple positions" do
173
+ subject { df.at 0, 2 }
174
+
175
+ it { is_expected.to be_a DaruLite::DataFrame }
176
+ its(:shape) { is_expected.to eq [3, 2] }
177
+ its(:index) { is_expected.to eq idx }
178
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
179
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
180
+ end
181
+
182
+ context "single invalid position" do
183
+ it { expect { df. at 3 }.to raise_error IndexError }
184
+ end
185
+
186
+ context "multiple invalid positions" do
187
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
188
+ end
189
+
190
+ context "range" do
191
+ subject { df.at 0..1 }
192
+
193
+ it { is_expected.to be_a DaruLite::DataFrame }
194
+ its(:shape) { is_expected.to eq [3, 2] }
195
+ its(:index) { is_expected.to eq idx }
196
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
197
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
198
+ end
199
+
200
+ context "range with negative end" do
201
+ subject { df.at 0..-2 }
202
+
203
+ it { is_expected.to be_a DaruLite::DataFrame }
204
+ its(:shape) { is_expected.to eq [3, 2] }
205
+ its(:index) { is_expected.to eq idx }
206
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
207
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
208
+ end
209
+
210
+ context "range with single element" do
211
+ subject { df.at 1..1 }
212
+
213
+ it { is_expected.to be_a DaruLite::DataFrame }
214
+ its(:shape) { is_expected.to eq [3, 1] }
215
+ its(:index) { is_expected.to eq idx }
216
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
217
+ end
218
+ end
219
+
220
+ context DaruLite::CategoricalIndex do
221
+ let (:idx) { DaruLite::CategoricalIndex.new [:a, 1, 1] }
222
+ let(:df) do
223
+ DaruLite::DataFrame.new({
224
+ 1 => 1..3,
225
+ a: 'a'..'c',
226
+ b: 11..13
227
+ }, index: idx)
228
+ end
229
+
230
+ context "single position" do
231
+ subject { df.at 1 }
232
+
233
+ it { is_expected.to be_a DaruLite::Vector }
234
+ its(:size) { is_expected.to eq 3 }
235
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
236
+ its(:index) { is_expected.to eq idx }
237
+ end
238
+
239
+ context "multiple positions" do
240
+ subject { df.at 0, 2 }
241
+
242
+ it { is_expected.to be_a DaruLite::DataFrame }
243
+ its(:shape) { is_expected.to eq [3, 2] }
244
+ its(:index) { is_expected.to eq idx }
245
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
246
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
247
+ end
248
+
249
+ context "single invalid position" do
250
+ it { expect { df. at 3 }.to raise_error IndexError }
251
+ end
252
+
253
+ context "multiple invalid positions" do
254
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
255
+ end
256
+
257
+ context "range" do
258
+ subject { df.at 0..1 }
259
+
260
+ it { is_expected.to be_a DaruLite::DataFrame }
261
+ its(:shape) { is_expected.to eq [3, 2] }
262
+ its(:index) { is_expected.to eq idx }
263
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
264
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
265
+ end
266
+
267
+ context "range with negative index" do
268
+ subject { df.at 0..-2 }
269
+
270
+ it { is_expected.to be_a DaruLite::DataFrame }
271
+ its(:shape) { is_expected.to eq [3, 2] }
272
+ its(:index) { is_expected.to eq idx }
273
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
274
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
275
+ end
276
+
277
+ context "range with single element" do
278
+ subject { df.at 1..1 }
279
+
280
+ it { is_expected.to be_a DaruLite::DataFrame }
281
+ its(:shape) { is_expected.to eq [3, 1] }
282
+ its(:index) { is_expected.to eq idx }
283
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
284
+ end
285
+ end
286
+ end
287
+
288
+ context "#first" do
289
+ it 'works' do
290
+ expect(df.first(2)).to eq(
291
+ DaruLite::DataFrame.new({b: [11,12], a: [1,2], c: [11,22]},
292
+ order: [:a, :b, :c],
293
+ index: [:one, :two]))
294
+ end
295
+
296
+ it 'works with too large values' do
297
+ expect(df.first(200)).to eq(df)
298
+ end
299
+
300
+ it 'has synonym' do
301
+ expect(df.first(2)).to eq(df.head(2))
302
+ end
303
+
304
+ it 'works on DateTime indexes' do
305
+ idx = DaruLite::DateTimeIndex.new(['2017-01-01', '2017-02-01', '2017-03-01'])
306
+ df = DaruLite::DataFrame.new({col1: ['a', 'b', 'c']}, index: idx)
307
+ first = DaruLite::DataFrame.new({col1: ['a']}, index: DaruLite::DateTimeIndex.new(['2017-01-01']))
308
+ expect(df.head(1)).to eq(first)
309
+ end
310
+ end
311
+
312
+ context "#last" do
313
+ it 'works' do
314
+ expect(df.last(2)).to eq(
315
+ DaruLite::DataFrame.new({b: [14,15], a: [4,5], c: [44,55]},
316
+ order: [:a, :b, :c],
317
+ index: [:four, :five]))
318
+ end
319
+
320
+ it 'works with too large values' do
321
+ expect(df.last(200)).to eq(df)
322
+ end
323
+
324
+ it 'has synonym' do
325
+ expect(df.last(2)).to eq(df.tail(2))
326
+ end
327
+ end
328
+
329
+ context '#access_row_tuples_by_indexs' do
330
+ let(:df) {
331
+ DaruLite::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
332
+ let(:df_idx) {
333
+ DaruLite::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
334
+ }
335
+ let (:mi_idx) do
336
+ DaruLite::MultiIndex.from_tuples [
337
+ [:a,:one,:bar],
338
+ [:a,:one,:baz],
339
+ [:b,:two,:bar],
340
+ [:a,:two,:baz],
341
+ ]
342
+ end
343
+ let (:df_mi) do
344
+ DaruLite::DataFrame.new({
345
+ a: 1..4,
346
+ b: 'a'..'d'
347
+ }, index: mi_idx )
348
+ end
349
+ context 'when no index is given' do
350
+ it 'returns empty Array' do
351
+ expect(df.access_row_tuples_by_indexs()).to eq([])
352
+ end
353
+ end
354
+ context 'when index(s) are given' do
355
+ it 'returns Array of row tuples' do
356
+ expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
357
+ expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
358
+ end
359
+ end
360
+ context 'when custom index(s) are given' do
361
+ it 'returns Array of row tuples' do
362
+ expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
363
+ [[52, 1], [7, 3]]
364
+ )
365
+ end
366
+ end
367
+ context 'when multi index is given' do
368
+ it 'returns Array of row tuples' do
369
+ expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
370
+ [[1, "a"], [2, "b"], [4, "d"]]
371
+ )
372
+ expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
373
+ [[2, "b"]]
374
+ )
375
+ end
376
+ end
377
+ end
378
+
379
+ context "#only_numerics" do
380
+ subject { df.only_numerics }
381
+
382
+ let(:df) do
383
+ DaruLite::DataFrame.new({a: vector1, b: vector2, c: vector3 }, clone: false)
384
+ end
385
+ let(:vector1) { DaruLite::Vector.new([1,2,3,4,5]) }
386
+ let(:vector2) { DaruLite::Vector.new(%w(one two three four five)) }
387
+ let(:vector3) { DaruLite::Vector.new([11,22,33,44,55]) }
388
+
389
+ it "returns a clone of numeric vectors" do
390
+ expect(subject).to eq(
391
+ DaruLite::DataFrame.new({ a: vector1, c: vector3}, clone: false)
392
+ )
393
+ expect(subject[:a].object_id).to_not eq(vector1.object_id)
394
+ end
395
+
396
+ context 'clone is false' do
397
+ subject { df.only_numerics(clone: false) }
398
+
399
+ it "returns a view of only the numeric vectors" do
400
+ expect(subject).to eq(
401
+ DaruLite::DataFrame.new({ a: vector1, c: vector3 }, clone: false)
402
+ )
403
+ expect(subject[:a].object_id).to eq(vector1.object_id)
404
+ end
405
+ end
406
+
407
+ context DaruLite::MultiIndex do
408
+ let(:df) do
409
+ order = DaruLite::MultiIndex.from_tuples(
410
+ [
411
+ [:d, :one, :large],
412
+ [:d, :one, :small],
413
+ [:d, :two, :large],
414
+ [:d, :two, :small],
415
+ [:e, :one, :large],
416
+ [:e, :one, :small],
417
+ [:e, :two, :large],
418
+ [:e, :two, :small]
419
+ ]
420
+ )
421
+
422
+ index = DaruLite::MultiIndex.from_tuples(
423
+ [
424
+ [:bar],
425
+ [:foo]
426
+ ]
427
+ )
428
+ DaruLite::DataFrame.new(
429
+ [
430
+ [4.112,2.234],
431
+ %w(a b),
432
+ [6.342,nil],
433
+ [7.2344,3.23214],
434
+ [8.234,4.533],
435
+ [10.342,2.3432],
436
+ [12.0,nil],
437
+ %w(a b)
438
+ ],
439
+ order:,
440
+ index:
441
+ )
442
+ end
443
+
444
+ it "returns numeric vectors" do
445
+ vectors = DaruLite::MultiIndex.from_tuples(
446
+ [
447
+ [:d, :one, :large],
448
+ [:d, :two, :large],
449
+ [:d, :two, :small],
450
+ [:e, :one, :large],
451
+ [:e, :one, :small],
452
+ [:e, :two, :large]
453
+ ]
454
+ )
455
+ index = DaruLite::MultiIndex.from_tuples(
456
+ [
457
+ [:bar],
458
+ [:foo]
459
+ ]
460
+ )
461
+ answer = DaruLite::DataFrame.new(
462
+ [
463
+ [4.112,2.234],
464
+ [6.342,nil],
465
+ [7.2344,3.23214],
466
+ [8.234,4.533],
467
+ [10.342,2.3432],
468
+ [12.0,nil],
469
+ ], order: vectors, index: index
470
+ )
471
+
472
+ expect(subject).to eq(answer)
473
+ end
474
+ end
475
+ end
476
+ end