daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,350 @@
1
+ shared_examples_for 'a sortable DataFrame' do
2
+ describe '#order=' do
3
+ subject { df.order = new_order }
4
+
5
+ let(:df) do
6
+ DaruLite::DataFrame.new({
7
+ a: [1, 2, 3],
8
+ b: [4, 5, 6]
9
+ }, order: [:a, :b])
10
+ end
11
+
12
+ context 'correct order' do
13
+ let(:new_order) { [:b, :a] }
14
+
15
+ it "sets correct order" do
16
+ expect { subject }.to change { df.vectors.to_a }.to(new_order)
17
+ end
18
+
19
+ it 'vector data is unchanged' do
20
+ expect { subject }.not_to change { [df[:a].to_a, df[:b].to_a] }
21
+ end
22
+ end
23
+
24
+ context 'insufficient vectors' do
25
+ it { expect { df.order = [:a] }.to raise_error }
26
+ end
27
+
28
+ context 'wrong vectors' do
29
+ it { expect { df.order = [:a, :b, 'b'] }.to raise_error }
30
+ end
31
+
32
+ context "vectors labels are of mixed classes" do
33
+ let(:df) do
34
+ DaruLite::DataFrame.new({
35
+ a: [1, 2, 3],
36
+ 'b' => [4, 5, 6],
37
+ nil => [5, 7, 9],
38
+ 1 => [10, 11, 12]
39
+ })
40
+ end
41
+ let(:new_order) { [nil, :a, 1, 'b'] }
42
+
43
+ it "sets correct order" do
44
+ expect { subject }.to change { df.vectors.to_a }.to(new_order)
45
+ end
46
+ end
47
+ end
48
+
49
+ describe "#rotate_vectors" do
50
+ subject { df.rotate_vectors(-1) }
51
+
52
+ context "only one vector in the dataframe" do
53
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3] }) }
54
+
55
+ it "return the dataframe without any change" do
56
+ expect { subject }.not_to change { df.vectors.to_a }
57
+ end
58
+ end
59
+
60
+ context "several vectors in the dataframe" do
61
+ let(:df) do
62
+ DaruLite::DataFrame.new({
63
+ a: [1, 2, 3],
64
+ b: [4, 5, 6],
65
+ total: [5, 7, 9]
66
+ })
67
+ end
68
+ let(:new_order) { [:total, :a, :b] }
69
+
70
+ it "return the dataframe with the position of the last vector change to first" do
71
+ expect { subject }.to change { df.vectors.to_a }.to(new_order)
72
+ end
73
+ end
74
+
75
+ context "vectors labels are of mixed classes" do
76
+ let(:df) do
77
+ DaruLite::DataFrame.new({
78
+ a: [1, 2, 3],
79
+ 'b' => [4, 5, 6],
80
+ nil => [5, 7, 9],
81
+ 1 => [10, 11, 12]
82
+ })
83
+ end
84
+ let(:new_order) { [1, :a, 'b', nil] }
85
+
86
+ it "return the dataframe with the position of the last vector change to first" do
87
+ expect { subject }.to change { df.vectors.to_a }.to(new_order)
88
+ end
89
+ end
90
+ end
91
+
92
+
93
+ describe "#sort!" do
94
+ context DaruLite::Index do
95
+ let(:df) do
96
+ DaruLite::DataFrame.new(
97
+ {
98
+ a: [5,1,-6,7,5,5],
99
+ b: [-2,-1,5,3,9,1],
100
+ c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']
101
+ }
102
+ )
103
+ end
104
+
105
+ it "sorts according to given vector order (bang)" do
106
+ a_sorter = lambda { |a| a }
107
+
108
+ expect(df.sort!([:a], by: { a: a_sorter })).to eq(
109
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
110
+ c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
111
+ )
112
+ end
113
+
114
+ it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
115
+ expect(df.sort!([:a, :b])).to eq(
116
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
117
+ index: [2,1,0,5,4,3])
118
+ )
119
+ end
120
+
121
+ it "sorts both vectors in descending order" do
122
+ expect(df.sort!([:a,:b], ascending: [false, false])).to eq(
123
+ DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,9,1,-2,-1,5], c: ['aaaa','aaaaa','aaaaaa', 'a','aa', 'aaa'] },
124
+ index: [3,4,5,0,1,2])
125
+ )
126
+ end
127
+
128
+ it "sorts one vector in desc and other is asc" do
129
+ expect(df.sort!([:a, :b], ascending: [false, true])).to eq(
130
+ DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,-2,1,9,-1,5], c: ['aaaa','a','aaaaaa','aaaaa','aa','aaa']},
131
+ index: [3,0,5,4,1,2])
132
+ )
133
+ end
134
+
135
+ it "sorts many vectors" do
136
+ d = DaruLite::DataFrame.new({a: [1,1,1,222,44,5,5,544], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
137
+
138
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
139
+ DaruLite::DataFrame.new({a: [544,222,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
140
+ index: [7,3,4,6,5,0,1,2])
141
+ )
142
+ end
143
+
144
+ it "places nils at the beginning when sorting ascedingly" do
145
+ d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
146
+
147
+ expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
148
+ DaruLite::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
149
+ index: [7,3,0,1,2,6,5,4])
150
+ )
151
+ end
152
+
153
+ it "places nils at the beginning when sorting decendingly" do
154
+ d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
155
+
156
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
157
+ DaruLite::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
158
+ index: [7,3,4,6,5,0,1,2])
159
+ )
160
+ end
161
+
162
+ it "sorts vectors of non-numeric types with nils in ascending order" do
163
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
164
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
165
+
166
+ expect(non_numeric.sort!([:c], ascending: [true])).to eq(
167
+ DaruLite::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
168
+ c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
169
+ index: [2, 5, 0, 1, 3, 4])
170
+ )
171
+ end
172
+
173
+ it "sorts vectors of non-numeric types with nils in descending order" do
174
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
175
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
176
+
177
+ expect(non_numeric.sort!([:c], ascending: [false])).to eq(
178
+ DaruLite::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
179
+ c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
180
+ index: [2, 5, 4, 3, 0, 1])
181
+ )
182
+ end
183
+
184
+ it "sorts vectors with block provided and handle nils automatically" do
185
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
186
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
187
+
188
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
189
+ DaruLite::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
190
+ c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
191
+ index: [0, 3, 1, 2, 4, 5])
192
+ )
193
+ end
194
+
195
+ it "sorts vectors with block provided and nils handled manually" do
196
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
197
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
198
+
199
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
200
+ DaruLite::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
201
+ c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
202
+ index: [1, 2, 4, 5, 0, 3])
203
+ )
204
+ end
205
+ end
206
+
207
+ context DaruLite::MultiIndex do
208
+ pending
209
+ it "sorts the DataFrame when specified full tuple" do
210
+ df_mi.sort([[:a,:one,:bar]])
211
+ end
212
+ end
213
+ end
214
+
215
+ describe "#sort" do
216
+ context DaruLite::Index do
217
+ let(:df) do
218
+ DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
219
+ end
220
+
221
+ it "sorts according to given vector order (bang)" do
222
+ a_sorter = lambda { |a| a }
223
+ ans = df.sort([:a], by: { a: a_sorter })
224
+
225
+ expect(ans).to eq(
226
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
227
+ index: [2,1,0,4,5,3])
228
+ )
229
+ expect(ans).to_not eq(df)
230
+ end
231
+
232
+ it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
233
+ ans = df.sort([:a, :b])
234
+ expect(ans).to eq(
235
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
236
+ index: [2,1,0,5,4,3])
237
+ )
238
+ expect(ans).to_not eq(df)
239
+ end
240
+ end
241
+
242
+ context DaruLite::MultiIndex do
243
+ pending
244
+ end
245
+
246
+ context DaruLite::CategoricalIndex do
247
+ let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
248
+ let(:df) do
249
+ DaruLite::DataFrame.new({
250
+ a: [2, -1, 3, 4, 5],
251
+ b: ['x', 'y', 'x', 'a', 'y'],
252
+ c: [nil, nil, -2, 2, 1]
253
+ }, index: idx)
254
+ end
255
+
256
+ context "ascending order" do
257
+ context "single vector" do
258
+ subject { df.sort [:a] }
259
+
260
+ its(:'index.to_a') { is_expected.to eq [1, :a, :a, 1, :c] }
261
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 3, 4, 5] }
262
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'x', 'a', 'y'] }
263
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
264
+ end
265
+
266
+ context "multiple vectors" do
267
+ subject { df.sort [:c, :b] }
268
+
269
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, :c, 1] }
270
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 5, 4] }
271
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'y', 'a'] }
272
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 1, 2] }
273
+ end
274
+
275
+ context "block" do
276
+ context "automatic handle nils" do
277
+ subject do
278
+ df.sort [:c], by: {c: lambda { |a| a.abs } }, handle_nils: true
279
+ end
280
+
281
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :c, :a, 1] }
282
+ its(:'a.to_a') { is_expected.to eq [2, -1, 5, 3, 4] }
283
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'y', 'x', 'a'] }
284
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 1, -2, 2] }
285
+ end
286
+
287
+ context "manually handle nils" do
288
+ subject do
289
+ df.sort [:c], by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } }
290
+ end
291
+
292
+ its(:'index.to_a') { is_expected.to eq [:c, :a, 1, :a, 1] }
293
+ its(:'a.to_a') { is_expected.to eq [5, 3, 4, 2, -1] }
294
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'x', 'y'] }
295
+ its(:'c.to_a') { is_expected.to eq [1, -2, 2, nil, nil] }
296
+ end
297
+ end
298
+ end
299
+
300
+ context "descending order" do
301
+ context "single vector" do
302
+ subject { df.sort [:a], ascending: false }
303
+
304
+ its(:'index.to_a') { is_expected.to eq [:c, 1, :a, :a, 1] }
305
+ its(:'a.to_a') { is_expected.to eq [5, 4, 3, 2, -1] }
306
+ its(:'b.to_a') { is_expected.to eq ['y', 'a', 'x', 'x', 'y'] }
307
+ its(:'c.to_a') { is_expected.to eq [1, 2, -2, nil, nil] }
308
+ end
309
+
310
+ context "multiple vectors" do
311
+ subject { df.sort [:c, :b], ascending: false }
312
+
313
+ its(:'index.to_a') { is_expected.to eq [1, :a, 1, :c, :a] }
314
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 4, 5, 3] }
315
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'y', 'x'] }
316
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 2, 1, -2] }
317
+ end
318
+
319
+ context "block" do
320
+ context "automatic handle nils" do
321
+ subject do
322
+ df.sort [:c],
323
+ by: {c: lambda { |a| a.abs } },
324
+ handle_nils: true,
325
+ ascending: false
326
+ end
327
+
328
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
329
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
330
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
331
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
332
+ end
333
+
334
+ context "manually handle nils" do
335
+ subject do
336
+ df.sort [:c],
337
+ by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } },
338
+ ascending: false
339
+ end
340
+
341
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
342
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
343
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
344
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
345
+ end
346
+ end
347
+ end
348
+ end
349
+ end
350
+ end