daru_lite 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +35 -33
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +138 -2353
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3289
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +86 -2
@@ -0,0 +1,465 @@
|
|
1
|
+
shared_examples_for 'an iterable DataFrame' do
|
2
|
+
describe "#each_index" do
|
3
|
+
it "iterates over index" do
|
4
|
+
idxs = []
|
5
|
+
ret = df.each_index do |index|
|
6
|
+
idxs << index
|
7
|
+
end
|
8
|
+
|
9
|
+
expect(idxs).to eq([:one, :two, :three, :four, :five])
|
10
|
+
|
11
|
+
expect(ret).to eq(df)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "#each_vector_with_index" do
|
16
|
+
it "iterates over vectors with index" do
|
17
|
+
idxs = []
|
18
|
+
ret = df.each_vector_with_index do |vector, index|
|
19
|
+
idxs << index
|
20
|
+
expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
|
21
|
+
expect(vector.class).to eq(DaruLite::Vector)
|
22
|
+
end
|
23
|
+
|
24
|
+
expect(idxs).to eq([:a, :b, :c])
|
25
|
+
|
26
|
+
expect(ret).to eq(df)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "#each_row_with_index" do
|
31
|
+
it "iterates over rows with indexes" do
|
32
|
+
idxs = []
|
33
|
+
ret = df.each_row_with_index do |row, idx|
|
34
|
+
idxs << idx
|
35
|
+
expect(row.index).to eq([:a, :b, :c].to_index)
|
36
|
+
expect(row.class).to eq(DaruLite::Vector)
|
37
|
+
end
|
38
|
+
|
39
|
+
expect(idxs).to eq([:one, :two, :three, :four, :five])
|
40
|
+
expect(ret) .to eq(df)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe "#each" do
|
45
|
+
it "iterates over rows" do
|
46
|
+
ret = df.each(:row) do |row|
|
47
|
+
expect(row.index).to eq([:a, :b, :c].to_index)
|
48
|
+
expect(row.class).to eq(DaruLite::Vector)
|
49
|
+
end
|
50
|
+
|
51
|
+
expect(ret).to eq(df)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "iterates over all vectors" do
|
55
|
+
ret = df.each do |vector|
|
56
|
+
expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
|
57
|
+
expect(vector.class).to eq(DaruLite::Vector)
|
58
|
+
end
|
59
|
+
|
60
|
+
expect(ret).to eq(df)
|
61
|
+
end
|
62
|
+
|
63
|
+
it "returns Enumerable if no block specified" do
|
64
|
+
ret = df.each
|
65
|
+
expect(ret.is_a?(Enumerator)).to eq(true)
|
66
|
+
end
|
67
|
+
|
68
|
+
it "raises on unknown axis" do
|
69
|
+
expect { df.each(:kitten) }.to raise_error(ArgumentError, /axis/)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe "#collect" do
|
74
|
+
before do
|
75
|
+
@df = DaruLite::DataFrame.new({
|
76
|
+
a: [1,2,3,4,5],
|
77
|
+
b: [11,22,33,44,55],
|
78
|
+
c: [1,2,3,4,5]
|
79
|
+
})
|
80
|
+
end
|
81
|
+
|
82
|
+
it "collects calculation over rows and returns a Vector from the results" do
|
83
|
+
expect(@df.collect(:row) { |row| (row[:a] + row[:c]) * row[:c] }).to eq(
|
84
|
+
DaruLite::Vector.new([2,8,18,32,50])
|
85
|
+
)
|
86
|
+
end
|
87
|
+
|
88
|
+
it "collects calculation over vectors and returns a Vector from the results" do
|
89
|
+
expect(@df.collect { |v| v[0] * v[1] + v[4] }).to eq(
|
90
|
+
DaruLite::Vector.new([7,297,7], index: [:a, :b, :c])
|
91
|
+
)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
describe "#map" do
|
96
|
+
it "iterates over rows and returns an Array" do
|
97
|
+
ret = df.map(:row) do |row|
|
98
|
+
expect(row.class).to eq(DaruLite::Vector)
|
99
|
+
row[:a] * row[:c]
|
100
|
+
end
|
101
|
+
|
102
|
+
expect(ret).to eq([11, 44, 99, 176, 275])
|
103
|
+
expect(df.vectors.to_a).to eq([:a, :b, :c])
|
104
|
+
end
|
105
|
+
|
106
|
+
it "iterates over vectors and returns an Array" do
|
107
|
+
ret = df.map do |vector|
|
108
|
+
vector.mean
|
109
|
+
end
|
110
|
+
expect(ret).to eq([3.0, 13.0, 33.0])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe "#map!" do
|
115
|
+
let(:ans_vector) do
|
116
|
+
DaruLite::DataFrame.new(
|
117
|
+
{
|
118
|
+
b: [21,22,23,24,25],
|
119
|
+
a: [11,12,13,14,15],
|
120
|
+
c: [21,32,43,54,65]
|
121
|
+
},
|
122
|
+
order: [:a, :b, :c],
|
123
|
+
index: [:one, :two, :three, :four, :five]
|
124
|
+
)
|
125
|
+
end
|
126
|
+
let(:ans_row) do
|
127
|
+
DaruLite::DataFrame.new(
|
128
|
+
{
|
129
|
+
b: [12,13,14,15,16],
|
130
|
+
a: [2,3,4,5,6],
|
131
|
+
c: [12,23,34,45,56]
|
132
|
+
},
|
133
|
+
order: [:a, :b, :c],
|
134
|
+
index: [:one, :two, :three, :four, :five]
|
135
|
+
)
|
136
|
+
end
|
137
|
+
|
138
|
+
it "destructively maps over the vectors and changes the DF" do
|
139
|
+
df.map! do |vector|
|
140
|
+
vector + 10
|
141
|
+
end
|
142
|
+
expect(df).to eq(ans_vector)
|
143
|
+
end
|
144
|
+
|
145
|
+
it "destructively maps over the rows and changes the DF" do
|
146
|
+
df.map!(:row) do |row|
|
147
|
+
row + 1
|
148
|
+
end
|
149
|
+
|
150
|
+
expect(df).to eq(ans_row)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
describe "#map_vectors_with_index" do
|
155
|
+
it "iterates over vectors with index and returns an Array" do
|
156
|
+
idx = []
|
157
|
+
ret = df.map_vectors_with_index do |vector, index|
|
158
|
+
idx << index
|
159
|
+
vector.recode { |e| e += 10}
|
160
|
+
end
|
161
|
+
|
162
|
+
expect(ret).to eq([
|
163
|
+
DaruLite::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
|
164
|
+
DaruLite::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
|
165
|
+
DaruLite::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
|
166
|
+
expect(idx).to eq([:a, :b, :c])
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
|
171
|
+
# (Not saying about unfortunate difference between them...)
|
172
|
+
describe "#collect_vector_with_index" do
|
173
|
+
it "iterates over vectors with index and returns an Array" do
|
174
|
+
idx = []
|
175
|
+
ret = df.collect_vector_with_index do |vector, index|
|
176
|
+
idx << index
|
177
|
+
vector.sum
|
178
|
+
end
|
179
|
+
|
180
|
+
expect(ret).to eq(DaruLite::Vector.new([15, 65, 165], index: [:a, :b, :c]))
|
181
|
+
expect(idx).to eq([:a, :b, :c])
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
describe "#map_rows_with_index" do
|
186
|
+
it "iterates over rows with index and returns an Array" do
|
187
|
+
idx = []
|
188
|
+
ret = df.map_rows_with_index do |row, index|
|
189
|
+
idx << index
|
190
|
+
expect(row.class).to eq(DaruLite::Vector)
|
191
|
+
row[:a] * row[:c]
|
192
|
+
end
|
193
|
+
|
194
|
+
expect(ret).to eq([11, 44, 99, 176, 275])
|
195
|
+
expect(idx).to eq([:one, :two, :three, :four, :five])
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
describe '#collect_row_with_index' do
|
200
|
+
it "iterates over rows with index and returns a Vector" do
|
201
|
+
idx = []
|
202
|
+
ret = df.collect_row_with_index do |row, index|
|
203
|
+
idx << index
|
204
|
+
expect(row.class).to eq(DaruLite::Vector)
|
205
|
+
row[:a] * row[:c]
|
206
|
+
end
|
207
|
+
|
208
|
+
expected = DaruLite::Vector.new([11, 44, 99, 176, 275], index: df.index)
|
209
|
+
expect(ret).to eq(expected)
|
210
|
+
expect(idx).to eq([:one, :two, :three, :four, :five])
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
describe "#recode" do
|
215
|
+
let(:ans_vector) do
|
216
|
+
DaruLite::DataFrame.new(
|
217
|
+
{ b: [21,22,23,24,25],
|
218
|
+
a: [11,12,13,14,15],
|
219
|
+
c: [21,32,43,54,65]
|
220
|
+
},
|
221
|
+
order: [:a, :b, :c],
|
222
|
+
index: [:one, :two, :three, :four, :five]
|
223
|
+
)
|
224
|
+
end
|
225
|
+
let(:ans_rows) do
|
226
|
+
DaruLite::DataFrame.new(
|
227
|
+
{
|
228
|
+
b: [121, 144, 169, 196, 225],
|
229
|
+
a: [1,4,9,16,25],
|
230
|
+
c: [121, 484, 1089, 1936, 3025]
|
231
|
+
},
|
232
|
+
order: [:a, :b, :c],
|
233
|
+
index: [:one, :two, :three, :four, :five]
|
234
|
+
)
|
235
|
+
end
|
236
|
+
let(:ans_vector_date_time) do
|
237
|
+
DaruLite::DataFrame.new(
|
238
|
+
{
|
239
|
+
b: [21,22,23,24,25],
|
240
|
+
a: [11,12,13,14,15],
|
241
|
+
c: [21,32,43,54,65]
|
242
|
+
},
|
243
|
+
order: [:a, :b, :c],
|
244
|
+
index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
|
245
|
+
)
|
246
|
+
end
|
247
|
+
let(:ans_rows_date_time) do
|
248
|
+
DaruLite::DataFrame.new(
|
249
|
+
{
|
250
|
+
b: [121, 144, 169, 196, 225],
|
251
|
+
a: [1,4,9,16,25],
|
252
|
+
c: [121, 484, 1089, 1936, 3025]
|
253
|
+
},
|
254
|
+
order: [:a, :b, :c],
|
255
|
+
index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
|
256
|
+
)
|
257
|
+
end
|
258
|
+
let(:data_frame_date_time) do
|
259
|
+
df.dup.tap do |df_dt|
|
260
|
+
df_dt.index = DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
it "maps over the vectors of a DataFrame and returns a DataFrame" do
|
265
|
+
ret = df.recode do |vector|
|
266
|
+
vector.map! { |e| e += 10}
|
267
|
+
end
|
268
|
+
|
269
|
+
expect(ret).to eq(ans_vector)
|
270
|
+
end
|
271
|
+
|
272
|
+
it "maps over the rows of a DataFrame and returns a DataFrame" do
|
273
|
+
ret = df.recode(:row) do |row|
|
274
|
+
expect(row.class).to eq(DaruLite::Vector)
|
275
|
+
row.map! { |e| e*e }
|
276
|
+
end
|
277
|
+
|
278
|
+
expect(ret).to eq(ans_rows)
|
279
|
+
end
|
280
|
+
|
281
|
+
it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
|
282
|
+
ret = data_frame_date_time.recode do |vector|
|
283
|
+
vector.map! { |e| e += 10}
|
284
|
+
end
|
285
|
+
|
286
|
+
expect(ret).to eq(ans_vector_date_time)
|
287
|
+
end
|
288
|
+
|
289
|
+
it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
|
290
|
+
ret = data_frame_date_time.recode(:row) do |row|
|
291
|
+
expect(row.class).to eq(DaruLite::Vector)
|
292
|
+
row.map! { |e| e*e }
|
293
|
+
end
|
294
|
+
|
295
|
+
expect(ret).to eq(ans_rows_date_time)
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
describe '#replace_values' do
|
300
|
+
subject do
|
301
|
+
DaruLite::DataFrame.new({
|
302
|
+
a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
|
303
|
+
b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
|
304
|
+
c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
305
|
+
})
|
306
|
+
end
|
307
|
+
before { subject.to_category :b }
|
308
|
+
|
309
|
+
context 'replace nils only' do
|
310
|
+
before { subject.replace_values nil, 10 }
|
311
|
+
|
312
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
313
|
+
its(:'b.type') { is_expected.to eq :category }
|
314
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
|
315
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
|
316
|
+
its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
|
317
|
+
end
|
318
|
+
|
319
|
+
context 'replace Float::NAN only' do
|
320
|
+
before { subject.replace_values Float::NAN, 10 }
|
321
|
+
|
322
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
323
|
+
its(:'b.type') { is_expected.to eq :category }
|
324
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
|
325
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
|
326
|
+
its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
|
327
|
+
end
|
328
|
+
|
329
|
+
context 'replace both nil and Float::NAN' do
|
330
|
+
before { subject.replace_values [nil, Float::NAN], 10 }
|
331
|
+
|
332
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
333
|
+
its(:'b.type') { is_expected.to eq :category }
|
334
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
|
335
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
|
336
|
+
its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
|
337
|
+
end
|
338
|
+
|
339
|
+
context 'replace other values' do
|
340
|
+
before { subject.replace_values [1, 5], 10 }
|
341
|
+
|
342
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
343
|
+
its(:'b.type') { is_expected.to eq :category }
|
344
|
+
its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
|
345
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
|
346
|
+
its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
|
351
|
+
describe "#verify" do
|
352
|
+
def create_test(*args, &proc)
|
353
|
+
description = args.shift
|
354
|
+
fields = args
|
355
|
+
[description, fields, proc]
|
356
|
+
end
|
357
|
+
|
358
|
+
let(:df) do
|
359
|
+
name = DaruLite::Vector.new %w(r1 r2 r3 r4)
|
360
|
+
v1 = DaruLite::Vector.new [1, 2, 3, 4]
|
361
|
+
v2 = DaruLite::Vector.new [4, 3, 2, 1]
|
362
|
+
v3 = DaruLite::Vector.new [10, 20, 30, 40]
|
363
|
+
v4 = DaruLite::Vector.new %w(a b a b)
|
364
|
+
|
365
|
+
DaruLite::DataFrame.new({ v1:, v2:, v3:, v4:, id: name }, order: [:v1, :v2, :v3, :v4, :id])
|
366
|
+
end
|
367
|
+
|
368
|
+
it "correctly verifies data as per the block" do
|
369
|
+
# Correct
|
370
|
+
t1 = create_test('If v4=a, v1 odd') do |r|
|
371
|
+
r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
|
372
|
+
end
|
373
|
+
t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
|
374
|
+
# Fail!
|
375
|
+
t3 = create_test("v4='b'") { |r| r[:v4] == 'b' }
|
376
|
+
exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
377
|
+
exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
378
|
+
|
379
|
+
dataf = df.verify(t3, t1, t2)
|
380
|
+
expect(dataf).to eq(exp1)
|
381
|
+
end
|
382
|
+
|
383
|
+
it "uses additional fields to extend error messages" do
|
384
|
+
t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
|
385
|
+
|
386
|
+
dataf = df.verify(:id, t)
|
387
|
+
expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
describe "#merge" do
|
392
|
+
it "merges one dataframe with another" do
|
393
|
+
a = DaruLite::Vector.new [1, 2, 3]
|
394
|
+
b = DaruLite::Vector.new [3, 4, 5]
|
395
|
+
c = DaruLite::Vector.new [4, 5, 6]
|
396
|
+
d = DaruLite::Vector.new [7, 8, 9]
|
397
|
+
e = DaruLite::Vector.new [10, 20, 30]
|
398
|
+
ds1 = DaruLite::DataFrame.new({ :a => a, :b => b })
|
399
|
+
ds2 = DaruLite::DataFrame.new({ :c => c, :d => d })
|
400
|
+
exp = DaruLite::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
|
401
|
+
|
402
|
+
expect(ds1.merge(ds2)).to eq(exp)
|
403
|
+
expect(ds2.merge(ds1)).to eq(
|
404
|
+
DaruLite::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
|
405
|
+
|
406
|
+
ds3 = DaruLite::DataFrame.new({ :a => e })
|
407
|
+
exp = DaruLite::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
|
408
|
+
order: [:a_1, :b, :a_2])
|
409
|
+
|
410
|
+
expect(ds1.merge(ds3)).to eq(exp)
|
411
|
+
end
|
412
|
+
|
413
|
+
context "preserves type of vector names" do
|
414
|
+
let(:df1) { DaruLite::DataFrame.new({'a'=> [1, 2, 3]}) }
|
415
|
+
let(:df2) { DaruLite::DataFrame.new({:b=> [4, 5, 6]}) }
|
416
|
+
subject { df1.merge df2 }
|
417
|
+
|
418
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
419
|
+
it { expect(subject['a'].to_a).to eq [1, 2, 3] }
|
420
|
+
it { expect(subject[:b].to_a).to eq [4, 5, 6] }
|
421
|
+
end
|
422
|
+
|
423
|
+
context "preserves indices for dataframes with same index" do
|
424
|
+
let(:index) { ['one','two','three'] }
|
425
|
+
let(:df1) { DaruLite::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
|
426
|
+
let(:df2) { DaruLite::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
|
427
|
+
subject { df1.merge df2 }
|
428
|
+
|
429
|
+
its(:index) { is_expected.to eq DaruLite::Index.new(index) }
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
433
|
+
describe "#one_to_many" do
|
434
|
+
subject { df.one_to_many(['id'], 'car_%v%n') }
|
435
|
+
|
436
|
+
let(:df) do
|
437
|
+
DaruLite::DataFrame.rows(
|
438
|
+
[
|
439
|
+
['1', 'george', 'red', 10, 'blue', 20, nil, nil],
|
440
|
+
['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
|
441
|
+
['3', 'alfred', nil, nil, nil, nil, nil, nil]
|
442
|
+
],
|
443
|
+
order: [
|
444
|
+
'id', 'name', 'car_color1', 'car_value1', 'car_color2',
|
445
|
+
'car_value2', 'car_color3', 'car_value3'
|
446
|
+
]
|
447
|
+
)
|
448
|
+
end
|
449
|
+
let(:df_expected) do
|
450
|
+
ids = DaruLite::Vector.new %w(1 1 2 2 2)
|
451
|
+
colors = DaruLite::Vector.new %w(red blue green orange white)
|
452
|
+
values = DaruLite::Vector.new [10, 20, 15, 30, 20]
|
453
|
+
col_ids = DaruLite::Vector.new [1, 2, 1, 2, 3]
|
454
|
+
|
455
|
+
DaruLite::DataFrame.new(
|
456
|
+
{
|
457
|
+
'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values
|
458
|
+
},
|
459
|
+
order: ['id', '_col_id', 'color', 'value']
|
460
|
+
)
|
461
|
+
end
|
462
|
+
|
463
|
+
it { is_expected.to eq(df_expected) }
|
464
|
+
end
|
465
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
shared_examples_for 'a joinable DataFrame' do
|
2
|
+
describe "#concat" do
|
3
|
+
let(:df1) do
|
4
|
+
DaruLite::DataFrame.new({
|
5
|
+
a: [1, 2, 3],
|
6
|
+
b: [1, 2, 3]
|
7
|
+
})
|
8
|
+
end
|
9
|
+
let(:df2) do
|
10
|
+
DaruLite::DataFrame.new({
|
11
|
+
a: [4, 5, 6],
|
12
|
+
c: [4, 5, 6]
|
13
|
+
})
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'does not modify the original dataframes' do
|
17
|
+
df1_a = df1[:a].to_a.dup
|
18
|
+
df2_a = df2[:a].to_a.dup
|
19
|
+
|
20
|
+
df_concat = df1.concat df2
|
21
|
+
expect(df1[:a].to_a).to eq df1_a
|
22
|
+
expect(df2[:a].to_a).to eq df2_a
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
|
26
|
+
df1_a = df1[:a].to_a.dup
|
27
|
+
df2_a = df2[:a].to_a.dup
|
28
|
+
|
29
|
+
df_concat = df1.concat df2
|
30
|
+
expect(df_concat[:a].to_a).to eq df1_a + df2_a
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'fills in missing vectors with nils' do
|
34
|
+
df1_b = df1[:b].to_a.dup
|
35
|
+
df2_c = df2[:c].to_a.dup
|
36
|
+
|
37
|
+
df_concat = df1.concat df2
|
38
|
+
expect(df_concat[:b].to_a).to eq df1_b + [nil] * df2.size
|
39
|
+
expect(df_concat[:c].to_a).to eq [nil] * df1.size + df2_c
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
context "#union" do
|
45
|
+
let(:df1) do
|
46
|
+
DaruLite::DataFrame.new({
|
47
|
+
a: [1, 2, 3],
|
48
|
+
b: [1, 2, 3]},
|
49
|
+
index: [1,3,5]
|
50
|
+
)
|
51
|
+
end
|
52
|
+
let(:df2) do
|
53
|
+
DaruLite::DataFrame.new({
|
54
|
+
a: [4, 5, 6],
|
55
|
+
c: [4, 5, 6]},
|
56
|
+
index: [7,9,11]
|
57
|
+
)
|
58
|
+
end
|
59
|
+
let(:df3) do
|
60
|
+
DaruLite::DataFrame.new({
|
61
|
+
a: [4, 5, 6],
|
62
|
+
c: [4, 5, 6]},
|
63
|
+
index: [5,7,9]
|
64
|
+
)
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'does not modify the original dataframes' do
|
68
|
+
df1_a = df1[:a].to_a.dup
|
69
|
+
df2_a = df2[:a].to_a.dup
|
70
|
+
|
71
|
+
_ = df1.union df2
|
72
|
+
expect(df1[:a].to_a).to eq df1_a
|
73
|
+
expect(df2[:a].to_a).to eq df2_a
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
|
77
|
+
df1_a = df1[:a].to_a.dup
|
78
|
+
df2_a = df2[:a].to_a.dup
|
79
|
+
|
80
|
+
df_union = df1.union df2
|
81
|
+
expect(df_union[:a].to_a).to eq df1_a + df2_a
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'fills in missing vectors with nils' do
|
85
|
+
df1_b = df1[:b].to_a.dup
|
86
|
+
df2_c = df2[:c].to_a.dup
|
87
|
+
|
88
|
+
df_union = df1.union df2
|
89
|
+
expect(df_union[:b].to_a).to eq df1_b + [nil] * df2.size
|
90
|
+
expect(df_union[:c].to_a).to eq [nil] * df1.size + df2_c
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'overwrites part of the first dataframe if there are double indices' do
|
94
|
+
vec = DaruLite::Vector.new({a: 4, b: nil, c: 4})
|
95
|
+
expect(df1.union(df3).row[5]).to eq vec
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'concats the indices' do
|
99
|
+
v1 = df1.index.to_a
|
100
|
+
v2 = df2.index.to_a
|
101
|
+
|
102
|
+
df_union = df1.union df2
|
103
|
+
expect(df_union.index.to_a).to eq v1 + v2
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
shared_examples_for 'a missable DataFrame' do
|
2
|
+
describe '#rolling_fillna!' do
|
3
|
+
subject do
|
4
|
+
DaruLite::DataFrame.new({
|
5
|
+
a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
|
6
|
+
b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
|
7
|
+
c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
8
|
+
})
|
9
|
+
end
|
10
|
+
|
11
|
+
context 'rolling_fillna! forwards' do
|
12
|
+
before { subject.rolling_fillna!(:forward) }
|
13
|
+
|
14
|
+
it { expect(subject.rolling_fillna!(:forward)).to eq(subject) }
|
15
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
|
16
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
|
17
|
+
its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'rolling_fillna! backwards' do
|
21
|
+
before { subject.rolling_fillna!(:backward) }
|
22
|
+
|
23
|
+
it { expect(subject.rolling_fillna!(:backward)).to eq(subject) }
|
24
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
|
25
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
|
26
|
+
its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "#missing_values_rows" do
|
31
|
+
subject { df.missing_values_rows }
|
32
|
+
|
33
|
+
let(:df) do
|
34
|
+
a1 = DaruLite::Vector.new [1, nil, 3, 4, 5, nil]
|
35
|
+
a2 = DaruLite::Vector.new [10, nil, 20, 20, 20, 30]
|
36
|
+
b1 = DaruLite::Vector.new [nil, nil, 1, 1, 1, 2]
|
37
|
+
b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
|
38
|
+
c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
|
39
|
+
|
40
|
+
DaruLite::DataFrame.new({a1:, a2:, b1:, b2:, c: })
|
41
|
+
end
|
42
|
+
|
43
|
+
it "returns number of missing values in each row" do
|
44
|
+
expect(subject).to eq(DaruLite::Vector.new [2, 3, 0, 1, 0, 1])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|