daru_lite 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +35 -33
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +138 -2353
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3289
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +86 -2
@@ -0,0 +1,297 @@
|
|
1
|
+
shared_examples_for 'a pivotable DataFrame' do
|
2
|
+
describe "#pivot_table" do
|
3
|
+
let(:df) do
|
4
|
+
DaruLite::DataFrame.new({
|
5
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
|
6
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
7
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
8
|
+
d: [1,2,2,3,3,4,5,6,7],
|
9
|
+
e: [2,4,4,6,6,8,10,12,14]
|
10
|
+
})
|
11
|
+
end
|
12
|
+
|
13
|
+
it "creates row index as per (single) index argument and default aggregates to mean" do
|
14
|
+
expect(df.pivot_table(index: [:a])).to eq(DaruLite::DataFrame.new({
|
15
|
+
d: [5.5,2.2],
|
16
|
+
e: [11.0,4.4]
|
17
|
+
}, index: ['bar', 'foo']))
|
18
|
+
end
|
19
|
+
|
20
|
+
it "creates row index as per (double) index argument and default aggregates to mean" do
|
21
|
+
agg_mi = DaruLite::MultiIndex.from_tuples(
|
22
|
+
[
|
23
|
+
['bar', 'large'],
|
24
|
+
['bar', 'small'],
|
25
|
+
['foo', 'large'],
|
26
|
+
['foo', 'small']
|
27
|
+
]
|
28
|
+
)
|
29
|
+
expect(df.pivot_table(index: [:a, :c]).round(2)).to eq(DaruLite::DataFrame.new({
|
30
|
+
d: [5.0 , 6.0, 2.0, 2.33],
|
31
|
+
e: [10.0, 12.0, 4.0, 4.67]
|
32
|
+
}, index: agg_mi))
|
33
|
+
end
|
34
|
+
|
35
|
+
it "creates row and vector index as per (single) index and (single) vectors args" do
|
36
|
+
agg_vectors = DaruLite::MultiIndex.from_tuples([
|
37
|
+
[:d, 'one'],
|
38
|
+
[:d, 'two'],
|
39
|
+
[:e, 'one'],
|
40
|
+
[:e, 'two']
|
41
|
+
])
|
42
|
+
agg_index = DaruLite::MultiIndex.from_tuples(
|
43
|
+
[
|
44
|
+
['bar'],
|
45
|
+
['foo']
|
46
|
+
]
|
47
|
+
)
|
48
|
+
|
49
|
+
expect(df.pivot_table(index: [:a], vectors: [:b]).round(2)).to eq(
|
50
|
+
DaruLite::DataFrame.new(
|
51
|
+
[
|
52
|
+
[4.5, 1.67],
|
53
|
+
[6.5, 3.0],
|
54
|
+
[9.0, 3.33],
|
55
|
+
[13, 6]
|
56
|
+
], order: agg_vectors, index: agg_index)
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "creates row and vector index as per (single) index and (double) vector args" do
|
61
|
+
agg_vectors = DaruLite::MultiIndex.from_tuples(
|
62
|
+
[
|
63
|
+
[:d, 'one', 'large'],
|
64
|
+
[:d, 'one', 'small'],
|
65
|
+
[:d, 'two', 'large'],
|
66
|
+
[:d, 'two', 'small'],
|
67
|
+
[:e, 'one', 'large'],
|
68
|
+
[:e, 'one', 'small'],
|
69
|
+
[:e, 'two', 'large'],
|
70
|
+
[:e, 'two', 'small']
|
71
|
+
]
|
72
|
+
)
|
73
|
+
|
74
|
+
agg_index = DaruLite::MultiIndex.from_tuples(
|
75
|
+
[
|
76
|
+
['bar'],
|
77
|
+
['foo']
|
78
|
+
]
|
79
|
+
)
|
80
|
+
|
81
|
+
expect(df.pivot_table(index: [:a], vectors: [:b, :c])).to eq(DaruLite::DataFrame.new(
|
82
|
+
[
|
83
|
+
[4.0,2.0],
|
84
|
+
[5.0,1.0],
|
85
|
+
[6.0,nil],
|
86
|
+
[7.0,3.0],
|
87
|
+
[8.0,4.0],
|
88
|
+
[10.0,2.0],
|
89
|
+
[12.0,nil],
|
90
|
+
[14.0,6.0]
|
91
|
+
], order: agg_vectors, index: agg_index
|
92
|
+
))
|
93
|
+
end
|
94
|
+
|
95
|
+
it "creates row and vector index with (double) index and (double) vector args" do
|
96
|
+
agg_index = DaruLite::MultiIndex.from_tuples([
|
97
|
+
['bar', 4],
|
98
|
+
['bar', 5],
|
99
|
+
['bar', 6],
|
100
|
+
['bar', 7],
|
101
|
+
['foo', 1],
|
102
|
+
['foo', 2],
|
103
|
+
['foo', 3]
|
104
|
+
])
|
105
|
+
|
106
|
+
agg_vectors = DaruLite::MultiIndex.from_tuples([
|
107
|
+
[:e, 'one', 'large'],
|
108
|
+
[:e, 'one', 'small'],
|
109
|
+
[:e, 'two', 'large'],
|
110
|
+
[:e, 'two', 'small']
|
111
|
+
])
|
112
|
+
|
113
|
+
expect(df.pivot_table(index: [:a, :d], vectors: [:b, :c])).to eq(
|
114
|
+
DaruLite::DataFrame.new(
|
115
|
+
[
|
116
|
+
[8 ,nil,nil,nil,nil, 4,nil],
|
117
|
+
[nil, 10,nil,nil, 2,nil,nil],
|
118
|
+
[nil,nil, 12,nil,nil,nil,nil],
|
119
|
+
[nil,nil,nil, 14,nil,nil, 6],
|
120
|
+
], index: agg_index, order: agg_vectors)
|
121
|
+
)
|
122
|
+
end
|
123
|
+
|
124
|
+
it "only aggregates over the vector specified in the values argument" do
|
125
|
+
agg_vectors = DaruLite::MultiIndex.from_tuples(
|
126
|
+
[
|
127
|
+
[:e, 'one', 'large'],
|
128
|
+
[:e, 'one', 'small'],
|
129
|
+
[:e, 'two', 'large'],
|
130
|
+
[:e, 'two', 'small']
|
131
|
+
]
|
132
|
+
)
|
133
|
+
agg_index = DaruLite::MultiIndex.from_tuples(
|
134
|
+
[
|
135
|
+
['bar'],
|
136
|
+
['foo']
|
137
|
+
]
|
138
|
+
)
|
139
|
+
expect(df.pivot_table(index: [:a], vectors: [:b, :c], values: :e)).to eq(
|
140
|
+
DaruLite::DataFrame.new(
|
141
|
+
[
|
142
|
+
[8, 4],
|
143
|
+
[10, 2],
|
144
|
+
[12,nil],
|
145
|
+
[14, 6]
|
146
|
+
], order: agg_vectors, index: agg_index
|
147
|
+
)
|
148
|
+
)
|
149
|
+
|
150
|
+
agg_vectors = DaruLite::MultiIndex.from_tuples(
|
151
|
+
[
|
152
|
+
[:d, 'one'],
|
153
|
+
[:d, 'two'],
|
154
|
+
[:e, 'one'],
|
155
|
+
[:e, 'two']
|
156
|
+
]
|
157
|
+
)
|
158
|
+
expect(df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
|
159
|
+
DaruLite::DataFrame.new(
|
160
|
+
[
|
161
|
+
[4.5, 5.0/3],
|
162
|
+
[6.5, 3.0],
|
163
|
+
[9.0, 10.0/3],
|
164
|
+
[13.0, 6.0]
|
165
|
+
], order: agg_vectors, index: agg_index
|
166
|
+
)
|
167
|
+
)
|
168
|
+
end
|
169
|
+
|
170
|
+
it "overrides default aggregate function to aggregate over sum" do
|
171
|
+
agg_vectors = DaruLite::MultiIndex.from_tuples(
|
172
|
+
[
|
173
|
+
[:e, 'one', 'large'],
|
174
|
+
[:e, 'one', 'small'],
|
175
|
+
[:e, 'two', 'large'],
|
176
|
+
[:e, 'two', 'small']
|
177
|
+
]
|
178
|
+
)
|
179
|
+
agg_index = DaruLite::MultiIndex.from_tuples(
|
180
|
+
[
|
181
|
+
['bar'],
|
182
|
+
['foo']
|
183
|
+
]
|
184
|
+
)
|
185
|
+
expect(df.pivot_table(index: [:a], vectors: [:b, :c], values: :e, agg: :sum)).to eq(
|
186
|
+
DaruLite::DataFrame.new(
|
187
|
+
[
|
188
|
+
[8, 8],
|
189
|
+
[10, 2],
|
190
|
+
[12,nil],
|
191
|
+
[14, 12]
|
192
|
+
], order: agg_vectors, index: agg_index
|
193
|
+
)
|
194
|
+
)
|
195
|
+
end
|
196
|
+
|
197
|
+
it "raises error if no non-numeric vectors are present" do
|
198
|
+
df = DaruLite::DataFrame.new({a: ['a', 'b', 'c'], b: ['b', 'e', 'd']})
|
199
|
+
expect {
|
200
|
+
df.pivot_table(index: [:a])
|
201
|
+
}.to raise_error
|
202
|
+
end
|
203
|
+
|
204
|
+
it "raises error if atleast a row index is not specified" do
|
205
|
+
expect {
|
206
|
+
df.pivot_table
|
207
|
+
}.to raise_error
|
208
|
+
end
|
209
|
+
|
210
|
+
it "aggregates when nils are present in value vector" do
|
211
|
+
df = DaruLite::DataFrame.new({
|
212
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
|
213
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
214
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
215
|
+
d: [1,2,2,3,3,4,5,6,7],
|
216
|
+
e: [2,nil,4,6,6,8,10,12,nil]
|
217
|
+
})
|
218
|
+
|
219
|
+
expect(df.pivot_table index: [:a]).to eq(
|
220
|
+
DaruLite::DataFrame.new({
|
221
|
+
d: [5.0, 2.2, 7],
|
222
|
+
e: [10.0, 4.5, nil]
|
223
|
+
}, index: DaruLite::Index.new(['bar', 'foo', 'ice'])))
|
224
|
+
end
|
225
|
+
|
226
|
+
it "works when nils are present in value vector" do
|
227
|
+
df = DaruLite::DataFrame.new({
|
228
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
|
229
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
230
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
231
|
+
d: [1,2,2,3,3,4,5,6,7],
|
232
|
+
e: [2,nil,4,6,6,8,10,12,nil]
|
233
|
+
})
|
234
|
+
|
235
|
+
agg_vectors = DaruLite::MultiIndex.from_tuples(
|
236
|
+
[
|
237
|
+
[:e, 'one'],
|
238
|
+
[:e, 'two']
|
239
|
+
]
|
240
|
+
)
|
241
|
+
|
242
|
+
agg_index = DaruLite::MultiIndex.from_tuples(
|
243
|
+
[
|
244
|
+
['bar'],
|
245
|
+
['foo'],
|
246
|
+
['ice']
|
247
|
+
]
|
248
|
+
)
|
249
|
+
|
250
|
+
expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
|
251
|
+
DaruLite::DataFrame.new(
|
252
|
+
[
|
253
|
+
[9, 3, nil],
|
254
|
+
[12, 6, nil]
|
255
|
+
], order: agg_vectors, index: agg_index
|
256
|
+
)
|
257
|
+
)
|
258
|
+
end
|
259
|
+
|
260
|
+
it 'performs date pivoting' do
|
261
|
+
categories = %i[jan feb mar apr may jun jul aug sep oct nov dec]
|
262
|
+
df = DaruLite::DataFrame.rows([
|
263
|
+
[2014, 2, 1600.0, 20.0],
|
264
|
+
[2014, 3, 1680.0, 21.0],
|
265
|
+
[2016, 2, 1600.0, 20.0],
|
266
|
+
[2016, 4, 1520.0, 19.0],
|
267
|
+
], order: [:year, :month, :visitors, :days])
|
268
|
+
df[:averages] = df[:visitors] / df[:days]
|
269
|
+
df[:month] = df[:month].map{|i| categories[i - 1]}
|
270
|
+
actual = df.pivot_table(index: :month, vectors: [:year], values: :averages)
|
271
|
+
|
272
|
+
# NB: As you can see, there are some "illogical" parts:
|
273
|
+
# months are sorted lexicographically, then made into multi-index
|
274
|
+
# with one-element-per-tuple, then order of columns is dependent
|
275
|
+
# on which month is lexicographically first (its apr, so, apr-2016
|
276
|
+
# is first row to gather, so 2016 is first column).
|
277
|
+
#
|
278
|
+
# All of it is descendance of our group_by implementation (which
|
279
|
+
# always sorts results & always make array keys). I hope that fixing
|
280
|
+
# group_by, even to the extend described at https://github.com/v0dro/daru/issues/152,
|
281
|
+
# will be fix this case also.
|
282
|
+
expected =
|
283
|
+
DaruLite::DataFrame.new(
|
284
|
+
[
|
285
|
+
[80.0, 80.0, nil],
|
286
|
+
[nil, 80.0, 80.0],
|
287
|
+
], index: DaruLite::MultiIndex.from_tuples([[:apr], [:feb], [:mar]]),
|
288
|
+
order: DaruLite::MultiIndex.from_tuples([[:averages, 2016], [:averages, 2014]])
|
289
|
+
)
|
290
|
+
# Comparing their parts previous to full comparison allows to
|
291
|
+
# find complicated differences.
|
292
|
+
expect(actual.vectors).to eq expected.vectors
|
293
|
+
expect(actual.index).to eq expected.index
|
294
|
+
expect(actual).to eq expected
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
shared_examples_for 'a queryable DataFrame' do
|
2
|
+
describe '#include_values?' do
|
3
|
+
let(:df) do
|
4
|
+
DaruLite::DataFrame.new({
|
5
|
+
a: [1, 2, 3, 4, Float::NAN, 6, 1],
|
6
|
+
b: [:a, :b, nil, Float::NAN, nil, 3, 5],
|
7
|
+
c: ['a', 6, 3, 4, 3, 5, 3],
|
8
|
+
d: [1, 2, 3, 5, 1, 2, 5]
|
9
|
+
})
|
10
|
+
end
|
11
|
+
before { df.to_category :b }
|
12
|
+
|
13
|
+
context 'true' do
|
14
|
+
it { expect(df.include_values? nil).to eq true }
|
15
|
+
it { expect(df.include_values? Float::NAN).to eq true }
|
16
|
+
it { expect(df.include_values? nil, Float::NAN).to eq true }
|
17
|
+
it { expect(df.include_values? 1, 30).to eq true }
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'false' do
|
21
|
+
it { expect(df[:a, :c].include_values? nil).to eq false }
|
22
|
+
it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
|
23
|
+
it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
|
24
|
+
it { expect(df.include_values? 10, 20).to eq false }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
describe "#any?" do
|
30
|
+
let(:df) do
|
31
|
+
DaruLite::DataFrame.new(
|
32
|
+
{
|
33
|
+
a: [1,2,3,4,5],
|
34
|
+
b: [10,20,30,40,50],
|
35
|
+
c: [11,22,33,44,55]
|
36
|
+
}
|
37
|
+
)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "returns true if any one of the vectors satisfy condition" do
|
41
|
+
expect(df.any? { |v| v[0] == 1 }).to eq(true)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "returns false if none of the vectors satisfy the condition" do
|
45
|
+
expect(df.any? { |v| v.mean > 100 }).to eq(false)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "returns true if any one of the rows satisfy condition" do
|
49
|
+
expect(df.any?(:row) { |r| r[:a] == 1 and r[:c] == 11 }).to eq(true)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "returns false if none of the rows satisfy the condition" do
|
53
|
+
expect(df.any?(:row) { |r| r.mean > 100 }).to eq(false)
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'fails on unknown axis' do
|
57
|
+
expect { df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe "#all?" do
|
62
|
+
let(:df) do
|
63
|
+
DaruLite::DataFrame.new(
|
64
|
+
{
|
65
|
+
a: [1,2,3,4,5],
|
66
|
+
b: [10,20,30,40,50],
|
67
|
+
c: [11,22,33,44,55]
|
68
|
+
}
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
it "returns true if all of the vectors satisfy condition" do
|
73
|
+
expect(df.all? { |v| v.mean < 40 }).to eq(true)
|
74
|
+
end
|
75
|
+
|
76
|
+
it "returns false if any one of the vectors does not satisfy condition" do
|
77
|
+
expect(df.all? { |v| v.mean == 30 }).to eq(false)
|
78
|
+
end
|
79
|
+
|
80
|
+
it "returns true if all of the rows satisfy condition" do
|
81
|
+
expect(df.all?(:row) { |r| r.mean < 70 }).to eq(true)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "returns false if any one of the rows does not satisfy condition" do
|
85
|
+
expect(df.all?(:row) { |r| r.mean == 30 }).to eq(false)
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'fails on unknown axis' do
|
89
|
+
expect { df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|