daru_lite 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,297 @@
1
+ shared_examples_for 'a pivotable DataFrame' do
2
+ describe "#pivot_table" do
3
+ let(:df) do
4
+ DaruLite::DataFrame.new({
5
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
6
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
7
+ c: ['small','large','large','small','small','large','small','large','small'],
8
+ d: [1,2,2,3,3,4,5,6,7],
9
+ e: [2,4,4,6,6,8,10,12,14]
10
+ })
11
+ end
12
+
13
+ it "creates row index as per (single) index argument and default aggregates to mean" do
14
+ expect(df.pivot_table(index: [:a])).to eq(DaruLite::DataFrame.new({
15
+ d: [5.5,2.2],
16
+ e: [11.0,4.4]
17
+ }, index: ['bar', 'foo']))
18
+ end
19
+
20
+ it "creates row index as per (double) index argument and default aggregates to mean" do
21
+ agg_mi = DaruLite::MultiIndex.from_tuples(
22
+ [
23
+ ['bar', 'large'],
24
+ ['bar', 'small'],
25
+ ['foo', 'large'],
26
+ ['foo', 'small']
27
+ ]
28
+ )
29
+ expect(df.pivot_table(index: [:a, :c]).round(2)).to eq(DaruLite::DataFrame.new({
30
+ d: [5.0 , 6.0, 2.0, 2.33],
31
+ e: [10.0, 12.0, 4.0, 4.67]
32
+ }, index: agg_mi))
33
+ end
34
+
35
+ it "creates row and vector index as per (single) index and (single) vectors args" do
36
+ agg_vectors = DaruLite::MultiIndex.from_tuples([
37
+ [:d, 'one'],
38
+ [:d, 'two'],
39
+ [:e, 'one'],
40
+ [:e, 'two']
41
+ ])
42
+ agg_index = DaruLite::MultiIndex.from_tuples(
43
+ [
44
+ ['bar'],
45
+ ['foo']
46
+ ]
47
+ )
48
+
49
+ expect(df.pivot_table(index: [:a], vectors: [:b]).round(2)).to eq(
50
+ DaruLite::DataFrame.new(
51
+ [
52
+ [4.5, 1.67],
53
+ [6.5, 3.0],
54
+ [9.0, 3.33],
55
+ [13, 6]
56
+ ], order: agg_vectors, index: agg_index)
57
+ )
58
+ end
59
+
60
+ it "creates row and vector index as per (single) index and (double) vector args" do
61
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
62
+ [
63
+ [:d, 'one', 'large'],
64
+ [:d, 'one', 'small'],
65
+ [:d, 'two', 'large'],
66
+ [:d, 'two', 'small'],
67
+ [:e, 'one', 'large'],
68
+ [:e, 'one', 'small'],
69
+ [:e, 'two', 'large'],
70
+ [:e, 'two', 'small']
71
+ ]
72
+ )
73
+
74
+ agg_index = DaruLite::MultiIndex.from_tuples(
75
+ [
76
+ ['bar'],
77
+ ['foo']
78
+ ]
79
+ )
80
+
81
+ expect(df.pivot_table(index: [:a], vectors: [:b, :c])).to eq(DaruLite::DataFrame.new(
82
+ [
83
+ [4.0,2.0],
84
+ [5.0,1.0],
85
+ [6.0,nil],
86
+ [7.0,3.0],
87
+ [8.0,4.0],
88
+ [10.0,2.0],
89
+ [12.0,nil],
90
+ [14.0,6.0]
91
+ ], order: agg_vectors, index: agg_index
92
+ ))
93
+ end
94
+
95
+ it "creates row and vector index with (double) index and (double) vector args" do
96
+ agg_index = DaruLite::MultiIndex.from_tuples([
97
+ ['bar', 4],
98
+ ['bar', 5],
99
+ ['bar', 6],
100
+ ['bar', 7],
101
+ ['foo', 1],
102
+ ['foo', 2],
103
+ ['foo', 3]
104
+ ])
105
+
106
+ agg_vectors = DaruLite::MultiIndex.from_tuples([
107
+ [:e, 'one', 'large'],
108
+ [:e, 'one', 'small'],
109
+ [:e, 'two', 'large'],
110
+ [:e, 'two', 'small']
111
+ ])
112
+
113
+ expect(df.pivot_table(index: [:a, :d], vectors: [:b, :c])).to eq(
114
+ DaruLite::DataFrame.new(
115
+ [
116
+ [8 ,nil,nil,nil,nil, 4,nil],
117
+ [nil, 10,nil,nil, 2,nil,nil],
118
+ [nil,nil, 12,nil,nil,nil,nil],
119
+ [nil,nil,nil, 14,nil,nil, 6],
120
+ ], index: agg_index, order: agg_vectors)
121
+ )
122
+ end
123
+
124
+ it "only aggregates over the vector specified in the values argument" do
125
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
126
+ [
127
+ [:e, 'one', 'large'],
128
+ [:e, 'one', 'small'],
129
+ [:e, 'two', 'large'],
130
+ [:e, 'two', 'small']
131
+ ]
132
+ )
133
+ agg_index = DaruLite::MultiIndex.from_tuples(
134
+ [
135
+ ['bar'],
136
+ ['foo']
137
+ ]
138
+ )
139
+ expect(df.pivot_table(index: [:a], vectors: [:b, :c], values: :e)).to eq(
140
+ DaruLite::DataFrame.new(
141
+ [
142
+ [8, 4],
143
+ [10, 2],
144
+ [12,nil],
145
+ [14, 6]
146
+ ], order: agg_vectors, index: agg_index
147
+ )
148
+ )
149
+
150
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
151
+ [
152
+ [:d, 'one'],
153
+ [:d, 'two'],
154
+ [:e, 'one'],
155
+ [:e, 'two']
156
+ ]
157
+ )
158
+ expect(df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
159
+ DaruLite::DataFrame.new(
160
+ [
161
+ [4.5, 5.0/3],
162
+ [6.5, 3.0],
163
+ [9.0, 10.0/3],
164
+ [13.0, 6.0]
165
+ ], order: agg_vectors, index: agg_index
166
+ )
167
+ )
168
+ end
169
+
170
+ it "overrides default aggregate function to aggregate over sum" do
171
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
172
+ [
173
+ [:e, 'one', 'large'],
174
+ [:e, 'one', 'small'],
175
+ [:e, 'two', 'large'],
176
+ [:e, 'two', 'small']
177
+ ]
178
+ )
179
+ agg_index = DaruLite::MultiIndex.from_tuples(
180
+ [
181
+ ['bar'],
182
+ ['foo']
183
+ ]
184
+ )
185
+ expect(df.pivot_table(index: [:a], vectors: [:b, :c], values: :e, agg: :sum)).to eq(
186
+ DaruLite::DataFrame.new(
187
+ [
188
+ [8, 8],
189
+ [10, 2],
190
+ [12,nil],
191
+ [14, 12]
192
+ ], order: agg_vectors, index: agg_index
193
+ )
194
+ )
195
+ end
196
+
197
+ it "raises error if no non-numeric vectors are present" do
198
+ df = DaruLite::DataFrame.new({a: ['a', 'b', 'c'], b: ['b', 'e', 'd']})
199
+ expect {
200
+ df.pivot_table(index: [:a])
201
+ }.to raise_error
202
+ end
203
+
204
+ it "raises error if atleast a row index is not specified" do
205
+ expect {
206
+ df.pivot_table
207
+ }.to raise_error
208
+ end
209
+
210
+ it "aggregates when nils are present in value vector" do
211
+ df = DaruLite::DataFrame.new({
212
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
213
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
214
+ c: ['small','large','large','small','small','large','small','large','small'],
215
+ d: [1,2,2,3,3,4,5,6,7],
216
+ e: [2,nil,4,6,6,8,10,12,nil]
217
+ })
218
+
219
+ expect(df.pivot_table index: [:a]).to eq(
220
+ DaruLite::DataFrame.new({
221
+ d: [5.0, 2.2, 7],
222
+ e: [10.0, 4.5, nil]
223
+ }, index: DaruLite::Index.new(['bar', 'foo', 'ice'])))
224
+ end
225
+
226
+ it "works when nils are present in value vector" do
227
+ df = DaruLite::DataFrame.new({
228
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
229
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
230
+ c: ['small','large','large','small','small','large','small','large','small'],
231
+ d: [1,2,2,3,3,4,5,6,7],
232
+ e: [2,nil,4,6,6,8,10,12,nil]
233
+ })
234
+
235
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
236
+ [
237
+ [:e, 'one'],
238
+ [:e, 'two']
239
+ ]
240
+ )
241
+
242
+ agg_index = DaruLite::MultiIndex.from_tuples(
243
+ [
244
+ ['bar'],
245
+ ['foo'],
246
+ ['ice']
247
+ ]
248
+ )
249
+
250
+ expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
251
+ DaruLite::DataFrame.new(
252
+ [
253
+ [9, 3, nil],
254
+ [12, 6, nil]
255
+ ], order: agg_vectors, index: agg_index
256
+ )
257
+ )
258
+ end
259
+
260
+ it 'performs date pivoting' do
261
+ categories = %i[jan feb mar apr may jun jul aug sep oct nov dec]
262
+ df = DaruLite::DataFrame.rows([
263
+ [2014, 2, 1600.0, 20.0],
264
+ [2014, 3, 1680.0, 21.0],
265
+ [2016, 2, 1600.0, 20.0],
266
+ [2016, 4, 1520.0, 19.0],
267
+ ], order: [:year, :month, :visitors, :days])
268
+ df[:averages] = df[:visitors] / df[:days]
269
+ df[:month] = df[:month].map{|i| categories[i - 1]}
270
+ actual = df.pivot_table(index: :month, vectors: [:year], values: :averages)
271
+
272
+ # NB: As you can see, there are some "illogical" parts:
273
+ # months are sorted lexicographically, then made into multi-index
274
+ # with one-element-per-tuple, then order of columns is dependent
275
+ # on which month is lexicographically first (its apr, so, apr-2016
276
+ # is first row to gather, so 2016 is first column).
277
+ #
278
+ # All of it is descendance of our group_by implementation (which
279
+ # always sorts results & always make array keys). I hope that fixing
280
+ # group_by, even to the extend described at https://github.com/v0dro/daru/issues/152,
281
+ # will be fix this case also.
282
+ expected =
283
+ DaruLite::DataFrame.new(
284
+ [
285
+ [80.0, 80.0, nil],
286
+ [nil, 80.0, 80.0],
287
+ ], index: DaruLite::MultiIndex.from_tuples([[:apr], [:feb], [:mar]]),
288
+ order: DaruLite::MultiIndex.from_tuples([[:averages, 2016], [:averages, 2014]])
289
+ )
290
+ # Comparing their parts previous to full comparison allows to
291
+ # find complicated differences.
292
+ expect(actual.vectors).to eq expected.vectors
293
+ expect(actual.index).to eq expected.index
294
+ expect(actual).to eq expected
295
+ end
296
+ end
297
+ end
@@ -0,0 +1,92 @@
1
+ shared_examples_for 'a queryable DataFrame' do
2
+ describe '#include_values?' do
3
+ let(:df) do
4
+ DaruLite::DataFrame.new({
5
+ a: [1, 2, 3, 4, Float::NAN, 6, 1],
6
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5],
7
+ c: ['a', 6, 3, 4, 3, 5, 3],
8
+ d: [1, 2, 3, 5, 1, 2, 5]
9
+ })
10
+ end
11
+ before { df.to_category :b }
12
+
13
+ context 'true' do
14
+ it { expect(df.include_values? nil).to eq true }
15
+ it { expect(df.include_values? Float::NAN).to eq true }
16
+ it { expect(df.include_values? nil, Float::NAN).to eq true }
17
+ it { expect(df.include_values? 1, 30).to eq true }
18
+ end
19
+
20
+ context 'false' do
21
+ it { expect(df[:a, :c].include_values? nil).to eq false }
22
+ it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
23
+ it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
24
+ it { expect(df.include_values? 10, 20).to eq false }
25
+ end
26
+ end
27
+
28
+
29
+ describe "#any?" do
30
+ let(:df) do
31
+ DaruLite::DataFrame.new(
32
+ {
33
+ a: [1,2,3,4,5],
34
+ b: [10,20,30,40,50],
35
+ c: [11,22,33,44,55]
36
+ }
37
+ )
38
+ end
39
+
40
+ it "returns true if any one of the vectors satisfy condition" do
41
+ expect(df.any? { |v| v[0] == 1 }).to eq(true)
42
+ end
43
+
44
+ it "returns false if none of the vectors satisfy the condition" do
45
+ expect(df.any? { |v| v.mean > 100 }).to eq(false)
46
+ end
47
+
48
+ it "returns true if any one of the rows satisfy condition" do
49
+ expect(df.any?(:row) { |r| r[:a] == 1 and r[:c] == 11 }).to eq(true)
50
+ end
51
+
52
+ it "returns false if none of the rows satisfy the condition" do
53
+ expect(df.any?(:row) { |r| r.mean > 100 }).to eq(false)
54
+ end
55
+
56
+ it 'fails on unknown axis' do
57
+ expect { df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
58
+ end
59
+ end
60
+
61
+ describe "#all?" do
62
+ let(:df) do
63
+ DaruLite::DataFrame.new(
64
+ {
65
+ a: [1,2,3,4,5],
66
+ b: [10,20,30,40,50],
67
+ c: [11,22,33,44,55]
68
+ }
69
+ )
70
+ end
71
+
72
+ it "returns true if all of the vectors satisfy condition" do
73
+ expect(df.all? { |v| v.mean < 40 }).to eq(true)
74
+ end
75
+
76
+ it "returns false if any one of the vectors does not satisfy condition" do
77
+ expect(df.all? { |v| v.mean == 30 }).to eq(false)
78
+ end
79
+
80
+ it "returns true if all of the rows satisfy condition" do
81
+ expect(df.all?(:row) { |r| r.mean < 70 }).to eq(true)
82
+ end
83
+
84
+ it "returns false if any one of the rows does not satisfy condition" do
85
+ expect(df.all?(:row) { |r| r.mean == 30 }).to eq(false)
86
+ end
87
+
88
+ it 'fails on unknown axis' do
89
+ expect { df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
90
+ end
91
+ end
92
+ end