daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,297 @@
1
+ shared_examples_for 'a pivotable DataFrame' do
2
+ describe "#pivot_table" do
3
+ let(:df) do
4
+ DaruLite::DataFrame.new({
5
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
6
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
7
+ c: ['small','large','large','small','small','large','small','large','small'],
8
+ d: [1,2,2,3,3,4,5,6,7],
9
+ e: [2,4,4,6,6,8,10,12,14]
10
+ })
11
+ end
12
+
13
+ it "creates row index as per (single) index argument and default aggregates to mean" do
14
+ expect(df.pivot_table(index: [:a])).to eq(DaruLite::DataFrame.new({
15
+ d: [5.5,2.2],
16
+ e: [11.0,4.4]
17
+ }, index: ['bar', 'foo']))
18
+ end
19
+
20
+ it "creates row index as per (double) index argument and default aggregates to mean" do
21
+ agg_mi = DaruLite::MultiIndex.from_tuples(
22
+ [
23
+ ['bar', 'large'],
24
+ ['bar', 'small'],
25
+ ['foo', 'large'],
26
+ ['foo', 'small']
27
+ ]
28
+ )
29
+ expect(df.pivot_table(index: [:a, :c]).round(2)).to eq(DaruLite::DataFrame.new({
30
+ d: [5.0 , 6.0, 2.0, 2.33],
31
+ e: [10.0, 12.0, 4.0, 4.67]
32
+ }, index: agg_mi))
33
+ end
34
+
35
+ it "creates row and vector index as per (single) index and (single) vectors args" do
36
+ agg_vectors = DaruLite::MultiIndex.from_tuples([
37
+ [:d, 'one'],
38
+ [:d, 'two'],
39
+ [:e, 'one'],
40
+ [:e, 'two']
41
+ ])
42
+ agg_index = DaruLite::MultiIndex.from_tuples(
43
+ [
44
+ ['bar'],
45
+ ['foo']
46
+ ]
47
+ )
48
+
49
+ expect(df.pivot_table(index: [:a], vectors: [:b]).round(2)).to eq(
50
+ DaruLite::DataFrame.new(
51
+ [
52
+ [4.5, 1.67],
53
+ [6.5, 3.0],
54
+ [9.0, 3.33],
55
+ [13, 6]
56
+ ], order: agg_vectors, index: agg_index)
57
+ )
58
+ end
59
+
60
+ it "creates row and vector index as per (single) index and (double) vector args" do
61
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
62
+ [
63
+ [:d, 'one', 'large'],
64
+ [:d, 'one', 'small'],
65
+ [:d, 'two', 'large'],
66
+ [:d, 'two', 'small'],
67
+ [:e, 'one', 'large'],
68
+ [:e, 'one', 'small'],
69
+ [:e, 'two', 'large'],
70
+ [:e, 'two', 'small']
71
+ ]
72
+ )
73
+
74
+ agg_index = DaruLite::MultiIndex.from_tuples(
75
+ [
76
+ ['bar'],
77
+ ['foo']
78
+ ]
79
+ )
80
+
81
+ expect(df.pivot_table(index: [:a], vectors: [:b, :c])).to eq(DaruLite::DataFrame.new(
82
+ [
83
+ [4.0,2.0],
84
+ [5.0,1.0],
85
+ [6.0,nil],
86
+ [7.0,3.0],
87
+ [8.0,4.0],
88
+ [10.0,2.0],
89
+ [12.0,nil],
90
+ [14.0,6.0]
91
+ ], order: agg_vectors, index: agg_index
92
+ ))
93
+ end
94
+
95
+ it "creates row and vector index with (double) index and (double) vector args" do
96
+ agg_index = DaruLite::MultiIndex.from_tuples([
97
+ ['bar', 4],
98
+ ['bar', 5],
99
+ ['bar', 6],
100
+ ['bar', 7],
101
+ ['foo', 1],
102
+ ['foo', 2],
103
+ ['foo', 3]
104
+ ])
105
+
106
+ agg_vectors = DaruLite::MultiIndex.from_tuples([
107
+ [:e, 'one', 'large'],
108
+ [:e, 'one', 'small'],
109
+ [:e, 'two', 'large'],
110
+ [:e, 'two', 'small']
111
+ ])
112
+
113
+ expect(df.pivot_table(index: [:a, :d], vectors: [:b, :c])).to eq(
114
+ DaruLite::DataFrame.new(
115
+ [
116
+ [8 ,nil,nil,nil,nil, 4,nil],
117
+ [nil, 10,nil,nil, 2,nil,nil],
118
+ [nil,nil, 12,nil,nil,nil,nil],
119
+ [nil,nil,nil, 14,nil,nil, 6],
120
+ ], index: agg_index, order: agg_vectors)
121
+ )
122
+ end
123
+
124
+ it "only aggregates over the vector specified in the values argument" do
125
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
126
+ [
127
+ [:e, 'one', 'large'],
128
+ [:e, 'one', 'small'],
129
+ [:e, 'two', 'large'],
130
+ [:e, 'two', 'small']
131
+ ]
132
+ )
133
+ agg_index = DaruLite::MultiIndex.from_tuples(
134
+ [
135
+ ['bar'],
136
+ ['foo']
137
+ ]
138
+ )
139
+ expect(df.pivot_table(index: [:a], vectors: [:b, :c], values: :e)).to eq(
140
+ DaruLite::DataFrame.new(
141
+ [
142
+ [8, 4],
143
+ [10, 2],
144
+ [12,nil],
145
+ [14, 6]
146
+ ], order: agg_vectors, index: agg_index
147
+ )
148
+ )
149
+
150
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
151
+ [
152
+ [:d, 'one'],
153
+ [:d, 'two'],
154
+ [:e, 'one'],
155
+ [:e, 'two']
156
+ ]
157
+ )
158
+ expect(df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
159
+ DaruLite::DataFrame.new(
160
+ [
161
+ [4.5, 5.0/3],
162
+ [6.5, 3.0],
163
+ [9.0, 10.0/3],
164
+ [13.0, 6.0]
165
+ ], order: agg_vectors, index: agg_index
166
+ )
167
+ )
168
+ end
169
+
170
+ it "overrides default aggregate function to aggregate over sum" do
171
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
172
+ [
173
+ [:e, 'one', 'large'],
174
+ [:e, 'one', 'small'],
175
+ [:e, 'two', 'large'],
176
+ [:e, 'two', 'small']
177
+ ]
178
+ )
179
+ agg_index = DaruLite::MultiIndex.from_tuples(
180
+ [
181
+ ['bar'],
182
+ ['foo']
183
+ ]
184
+ )
185
+ expect(df.pivot_table(index: [:a], vectors: [:b, :c], values: :e, agg: :sum)).to eq(
186
+ DaruLite::DataFrame.new(
187
+ [
188
+ [8, 8],
189
+ [10, 2],
190
+ [12,nil],
191
+ [14, 12]
192
+ ], order: agg_vectors, index: agg_index
193
+ )
194
+ )
195
+ end
196
+
197
+ it "raises error if no non-numeric vectors are present" do
198
+ df = DaruLite::DataFrame.new({a: ['a', 'b', 'c'], b: ['b', 'e', 'd']})
199
+ expect {
200
+ df.pivot_table(index: [:a])
201
+ }.to raise_error
202
+ end
203
+
204
+ it "raises error if atleast a row index is not specified" do
205
+ expect {
206
+ df.pivot_table
207
+ }.to raise_error
208
+ end
209
+
210
+ it "aggregates when nils are present in value vector" do
211
+ df = DaruLite::DataFrame.new({
212
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
213
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
214
+ c: ['small','large','large','small','small','large','small','large','small'],
215
+ d: [1,2,2,3,3,4,5,6,7],
216
+ e: [2,nil,4,6,6,8,10,12,nil]
217
+ })
218
+
219
+ expect(df.pivot_table index: [:a]).to eq(
220
+ DaruLite::DataFrame.new({
221
+ d: [5.0, 2.2, 7],
222
+ e: [10.0, 4.5, nil]
223
+ }, index: DaruLite::Index.new(['bar', 'foo', 'ice'])))
224
+ end
225
+
226
+ it "works when nils are present in value vector" do
227
+ df = DaruLite::DataFrame.new({
228
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
229
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
230
+ c: ['small','large','large','small','small','large','small','large','small'],
231
+ d: [1,2,2,3,3,4,5,6,7],
232
+ e: [2,nil,4,6,6,8,10,12,nil]
233
+ })
234
+
235
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
236
+ [
237
+ [:e, 'one'],
238
+ [:e, 'two']
239
+ ]
240
+ )
241
+
242
+ agg_index = DaruLite::MultiIndex.from_tuples(
243
+ [
244
+ ['bar'],
245
+ ['foo'],
246
+ ['ice']
247
+ ]
248
+ )
249
+
250
+ expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
251
+ DaruLite::DataFrame.new(
252
+ [
253
+ [9, 3, nil],
254
+ [12, 6, nil]
255
+ ], order: agg_vectors, index: agg_index
256
+ )
257
+ )
258
+ end
259
+
260
+ it 'performs date pivoting' do
261
+ categories = %i[jan feb mar apr may jun jul aug sep oct nov dec]
262
+ df = DaruLite::DataFrame.rows([
263
+ [2014, 2, 1600.0, 20.0],
264
+ [2014, 3, 1680.0, 21.0],
265
+ [2016, 2, 1600.0, 20.0],
266
+ [2016, 4, 1520.0, 19.0],
267
+ ], order: [:year, :month, :visitors, :days])
268
+ df[:averages] = df[:visitors] / df[:days]
269
+ df[:month] = df[:month].map{|i| categories[i - 1]}
270
+ actual = df.pivot_table(index: :month, vectors: [:year], values: :averages)
271
+
272
+ # NB: As you can see, there are some "illogical" parts:
273
+ # months are sorted lexicographically, then made into multi-index
274
+ # with one-element-per-tuple, then order of columns is dependent
275
+ # on which month is lexicographically first (its apr, so, apr-2016
276
+ # is first row to gather, so 2016 is first column).
277
+ #
278
+ # All of it is descendance of our group_by implementation (which
279
+ # always sorts results & always make array keys). I hope that fixing
280
+ # group_by, even to the extend described at https://github.com/v0dro/daru/issues/152,
281
+ # will be fix this case also.
282
+ expected =
283
+ DaruLite::DataFrame.new(
284
+ [
285
+ [80.0, 80.0, nil],
286
+ [nil, 80.0, 80.0],
287
+ ], index: DaruLite::MultiIndex.from_tuples([[:apr], [:feb], [:mar]]),
288
+ order: DaruLite::MultiIndex.from_tuples([[:averages, 2016], [:averages, 2014]])
289
+ )
290
+ # Comparing their parts previous to full comparison allows to
291
+ # find complicated differences.
292
+ expect(actual.vectors).to eq expected.vectors
293
+ expect(actual.index).to eq expected.index
294
+ expect(actual).to eq expected
295
+ end
296
+ end
297
+ end
@@ -0,0 +1,92 @@
1
+ shared_examples_for 'a queryable DataFrame' do
2
+ describe '#include_values?' do
3
+ let(:df) do
4
+ DaruLite::DataFrame.new({
5
+ a: [1, 2, 3, 4, Float::NAN, 6, 1],
6
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5],
7
+ c: ['a', 6, 3, 4, 3, 5, 3],
8
+ d: [1, 2, 3, 5, 1, 2, 5]
9
+ })
10
+ end
11
+ before { df.to_category :b }
12
+
13
+ context 'true' do
14
+ it { expect(df.include_values? nil).to eq true }
15
+ it { expect(df.include_values? Float::NAN).to eq true }
16
+ it { expect(df.include_values? nil, Float::NAN).to eq true }
17
+ it { expect(df.include_values? 1, 30).to eq true }
18
+ end
19
+
20
+ context 'false' do
21
+ it { expect(df[:a, :c].include_values? nil).to eq false }
22
+ it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
23
+ it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
24
+ it { expect(df.include_values? 10, 20).to eq false }
25
+ end
26
+ end
27
+
28
+
29
+ describe "#any?" do
30
+ let(:df) do
31
+ DaruLite::DataFrame.new(
32
+ {
33
+ a: [1,2,3,4,5],
34
+ b: [10,20,30,40,50],
35
+ c: [11,22,33,44,55]
36
+ }
37
+ )
38
+ end
39
+
40
+ it "returns true if any one of the vectors satisfy condition" do
41
+ expect(df.any? { |v| v[0] == 1 }).to eq(true)
42
+ end
43
+
44
+ it "returns false if none of the vectors satisfy the condition" do
45
+ expect(df.any? { |v| v.mean > 100 }).to eq(false)
46
+ end
47
+
48
+ it "returns true if any one of the rows satisfy condition" do
49
+ expect(df.any?(:row) { |r| r[:a] == 1 and r[:c] == 11 }).to eq(true)
50
+ end
51
+
52
+ it "returns false if none of the rows satisfy the condition" do
53
+ expect(df.any?(:row) { |r| r.mean > 100 }).to eq(false)
54
+ end
55
+
56
+ it 'fails on unknown axis' do
57
+ expect { df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
58
+ end
59
+ end
60
+
61
+ describe "#all?" do
62
+ let(:df) do
63
+ DaruLite::DataFrame.new(
64
+ {
65
+ a: [1,2,3,4,5],
66
+ b: [10,20,30,40,50],
67
+ c: [11,22,33,44,55]
68
+ }
69
+ )
70
+ end
71
+
72
+ it "returns true if all of the vectors satisfy condition" do
73
+ expect(df.all? { |v| v.mean < 40 }).to eq(true)
74
+ end
75
+
76
+ it "returns false if any one of the vectors does not satisfy condition" do
77
+ expect(df.all? { |v| v.mean == 30 }).to eq(false)
78
+ end
79
+
80
+ it "returns true if all of the rows satisfy condition" do
81
+ expect(df.all?(:row) { |r| r.mean < 70 }).to eq(true)
82
+ end
83
+
84
+ it "returns false if any one of the rows does not satisfy condition" do
85
+ expect(df.all?(:row) { |r| r.mean == 30 }).to eq(false)
86
+ end
87
+
88
+ it 'fails on unknown axis' do
89
+ expect { df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
90
+ end
91
+ end
92
+ end