daru_lite 0.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -0,0 +1,350 @@
1
+ shared_examples_for 'a sortable DataFrame' do
2
+ describe '#order=' do
3
+ subject { df.order = new_order }
4
+
5
+ let(:df) do
6
+ DaruLite::DataFrame.new({
7
+ a: [1, 2, 3],
8
+ b: [4, 5, 6]
9
+ }, order: [:a, :b])
10
+ end
11
+
12
+ context 'correct order' do
13
+ let(:new_order) { [:b, :a] }
14
+
15
+ it "sets correct order" do
16
+ expect { subject }.to change { df.vectors.to_a }.to(new_order)
17
+ end
18
+
19
+ it 'vector data is unchanged' do
20
+ expect { subject }.not_to change { [df[:a].to_a, df[:b].to_a] }
21
+ end
22
+ end
23
+
24
+ context 'insufficient vectors' do
25
+ it { expect { df.order = [:a] }.to raise_error }
26
+ end
27
+
28
+ context 'wrong vectors' do
29
+ it { expect { df.order = [:a, :b, 'b'] }.to raise_error }
30
+ end
31
+
32
+ context "vectors labels are of mixed classes" do
33
+ let(:df) do
34
+ DaruLite::DataFrame.new({
35
+ a: [1, 2, 3],
36
+ 'b' => [4, 5, 6],
37
+ nil => [5, 7, 9],
38
+ 1 => [10, 11, 12]
39
+ })
40
+ end
41
+ let(:new_order) { [nil, :a, 1, 'b'] }
42
+
43
+ it "sets correct order" do
44
+ expect { subject }.to change { df.vectors.to_a }.to(new_order)
45
+ end
46
+ end
47
+ end
48
+
49
+ describe "#rotate_vectors" do
50
+ subject { df.rotate_vectors(-1) }
51
+
52
+ context "only one vector in the dataframe" do
53
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3] }) }
54
+
55
+ it "return the dataframe without any change" do
56
+ expect { subject }.not_to change { df.vectors.to_a }
57
+ end
58
+ end
59
+
60
+ context "several vectors in the dataframe" do
61
+ let(:df) do
62
+ DaruLite::DataFrame.new({
63
+ a: [1, 2, 3],
64
+ b: [4, 5, 6],
65
+ total: [5, 7, 9]
66
+ })
67
+ end
68
+ let(:new_order) { [:total, :a, :b] }
69
+
70
+ it "return the dataframe with the position of the last vector change to first" do
71
+ expect { subject }.to change { df.vectors.to_a }.to(new_order)
72
+ end
73
+ end
74
+
75
+ context "vectors labels are of mixed classes" do
76
+ let(:df) do
77
+ DaruLite::DataFrame.new({
78
+ a: [1, 2, 3],
79
+ 'b' => [4, 5, 6],
80
+ nil => [5, 7, 9],
81
+ 1 => [10, 11, 12]
82
+ })
83
+ end
84
+ let(:new_order) { [1, :a, 'b', nil] }
85
+
86
+ it "return the dataframe with the position of the last vector change to first" do
87
+ expect { subject }.to change { df.vectors.to_a }.to(new_order)
88
+ end
89
+ end
90
+ end
91
+
92
+
93
+ describe "#sort!" do
94
+ context DaruLite::Index do
95
+ let(:df) do
96
+ DaruLite::DataFrame.new(
97
+ {
98
+ a: [5,1,-6,7,5,5],
99
+ b: [-2,-1,5,3,9,1],
100
+ c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']
101
+ }
102
+ )
103
+ end
104
+
105
+ it "sorts according to given vector order (bang)" do
106
+ a_sorter = lambda { |a| a }
107
+
108
+ expect(df.sort!([:a], by: { a: a_sorter })).to eq(
109
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
110
+ c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
111
+ )
112
+ end
113
+
114
+ it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
115
+ expect(df.sort!([:a, :b])).to eq(
116
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
117
+ index: [2,1,0,5,4,3])
118
+ )
119
+ end
120
+
121
+ it "sorts both vectors in descending order" do
122
+ expect(df.sort!([:a,:b], ascending: [false, false])).to eq(
123
+ DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,9,1,-2,-1,5], c: ['aaaa','aaaaa','aaaaaa', 'a','aa', 'aaa'] },
124
+ index: [3,4,5,0,1,2])
125
+ )
126
+ end
127
+
128
+ it "sorts one vector in desc and other is asc" do
129
+ expect(df.sort!([:a, :b], ascending: [false, true])).to eq(
130
+ DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,-2,1,9,-1,5], c: ['aaaa','a','aaaaaa','aaaaa','aa','aaa']},
131
+ index: [3,0,5,4,1,2])
132
+ )
133
+ end
134
+
135
+ it "sorts many vectors" do
136
+ d = DaruLite::DataFrame.new({a: [1,1,1,222,44,5,5,544], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
137
+
138
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
139
+ DaruLite::DataFrame.new({a: [544,222,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
140
+ index: [7,3,4,6,5,0,1,2])
141
+ )
142
+ end
143
+
144
+ it "places nils at the beginning when sorting ascedingly" do
145
+ d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
146
+
147
+ expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
148
+ DaruLite::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
149
+ index: [7,3,0,1,2,6,5,4])
150
+ )
151
+ end
152
+
153
+ it "places nils at the beginning when sorting decendingly" do
154
+ d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
155
+
156
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
157
+ DaruLite::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
158
+ index: [7,3,4,6,5,0,1,2])
159
+ )
160
+ end
161
+
162
+ it "sorts vectors of non-numeric types with nils in ascending order" do
163
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
164
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
165
+
166
+ expect(non_numeric.sort!([:c], ascending: [true])).to eq(
167
+ DaruLite::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
168
+ c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
169
+ index: [2, 5, 0, 1, 3, 4])
170
+ )
171
+ end
172
+
173
+ it "sorts vectors of non-numeric types with nils in descending order" do
174
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
175
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
176
+
177
+ expect(non_numeric.sort!([:c], ascending: [false])).to eq(
178
+ DaruLite::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
179
+ c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
180
+ index: [2, 5, 4, 3, 0, 1])
181
+ )
182
+ end
183
+
184
+ it "sorts vectors with block provided and handle nils automatically" do
185
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
186
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
187
+
188
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
189
+ DaruLite::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
190
+ c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
191
+ index: [0, 3, 1, 2, 4, 5])
192
+ )
193
+ end
194
+
195
+ it "sorts vectors with block provided and nils handled manually" do
196
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
197
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
198
+
199
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
200
+ DaruLite::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
201
+ c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
202
+ index: [1, 2, 4, 5, 0, 3])
203
+ )
204
+ end
205
+ end
206
+
207
+ context DaruLite::MultiIndex do
208
+ pending
209
+ it "sorts the DataFrame when specified full tuple" do
210
+ df_mi.sort([[:a,:one,:bar]])
211
+ end
212
+ end
213
+ end
214
+
215
+ describe "#sort" do
216
+ context DaruLite::Index do
217
+ let(:df) do
218
+ DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
219
+ end
220
+
221
+ it "sorts according to given vector order (bang)" do
222
+ a_sorter = lambda { |a| a }
223
+ ans = df.sort([:a], by: { a: a_sorter })
224
+
225
+ expect(ans).to eq(
226
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
227
+ index: [2,1,0,4,5,3])
228
+ )
229
+ expect(ans).to_not eq(df)
230
+ end
231
+
232
+ it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
233
+ ans = df.sort([:a, :b])
234
+ expect(ans).to eq(
235
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
236
+ index: [2,1,0,5,4,3])
237
+ )
238
+ expect(ans).to_not eq(df)
239
+ end
240
+ end
241
+
242
+ context DaruLite::MultiIndex do
243
+ pending
244
+ end
245
+
246
+ context DaruLite::CategoricalIndex do
247
+ let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
248
+ let(:df) do
249
+ DaruLite::DataFrame.new({
250
+ a: [2, -1, 3, 4, 5],
251
+ b: ['x', 'y', 'x', 'a', 'y'],
252
+ c: [nil, nil, -2, 2, 1]
253
+ }, index: idx)
254
+ end
255
+
256
+ context "ascending order" do
257
+ context "single vector" do
258
+ subject { df.sort [:a] }
259
+
260
+ its(:'index.to_a') { is_expected.to eq [1, :a, :a, 1, :c] }
261
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 3, 4, 5] }
262
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'x', 'a', 'y'] }
263
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
264
+ end
265
+
266
+ context "multiple vectors" do
267
+ subject { df.sort [:c, :b] }
268
+
269
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, :c, 1] }
270
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 5, 4] }
271
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'y', 'a'] }
272
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 1, 2] }
273
+ end
274
+
275
+ context "block" do
276
+ context "automatic handle nils" do
277
+ subject do
278
+ df.sort [:c], by: {c: lambda { |a| a.abs } }, handle_nils: true
279
+ end
280
+
281
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :c, :a, 1] }
282
+ its(:'a.to_a') { is_expected.to eq [2, -1, 5, 3, 4] }
283
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'y', 'x', 'a'] }
284
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 1, -2, 2] }
285
+ end
286
+
287
+ context "manually handle nils" do
288
+ subject do
289
+ df.sort [:c], by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } }
290
+ end
291
+
292
+ its(:'index.to_a') { is_expected.to eq [:c, :a, 1, :a, 1] }
293
+ its(:'a.to_a') { is_expected.to eq [5, 3, 4, 2, -1] }
294
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'x', 'y'] }
295
+ its(:'c.to_a') { is_expected.to eq [1, -2, 2, nil, nil] }
296
+ end
297
+ end
298
+ end
299
+
300
+ context "descending order" do
301
+ context "single vector" do
302
+ subject { df.sort [:a], ascending: false }
303
+
304
+ its(:'index.to_a') { is_expected.to eq [:c, 1, :a, :a, 1] }
305
+ its(:'a.to_a') { is_expected.to eq [5, 4, 3, 2, -1] }
306
+ its(:'b.to_a') { is_expected.to eq ['y', 'a', 'x', 'x', 'y'] }
307
+ its(:'c.to_a') { is_expected.to eq [1, 2, -2, nil, nil] }
308
+ end
309
+
310
+ context "multiple vectors" do
311
+ subject { df.sort [:c, :b], ascending: false }
312
+
313
+ its(:'index.to_a') { is_expected.to eq [1, :a, 1, :c, :a] }
314
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 4, 5, 3] }
315
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'y', 'x'] }
316
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 2, 1, -2] }
317
+ end
318
+
319
+ context "block" do
320
+ context "automatic handle nils" do
321
+ subject do
322
+ df.sort [:c],
323
+ by: {c: lambda { |a| a.abs } },
324
+ handle_nils: true,
325
+ ascending: false
326
+ end
327
+
328
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
329
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
330
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
331
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
332
+ end
333
+
334
+ context "manually handle nils" do
335
+ subject do
336
+ df.sort [:c],
337
+ by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } },
338
+ ascending: false
339
+ end
340
+
341
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
342
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
343
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
344
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
345
+ end
346
+ end
347
+ end
348
+ end
349
+ end
350
+ end