daru_lite 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +35 -33
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +138 -2353
- data/lib/daru_lite/index/index.rb +14 -1
- data/lib/daru_lite/index/multi_index.rb +9 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +409 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3289
- data/spec/index/categorical_index_spec.rb +27 -8
- data/spec/index/index_spec.rb +21 -0
- data/spec/index/multi_index_spec.rb +85 -76
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +86 -2
@@ -0,0 +1,409 @@
|
|
1
|
+
shared_examples_for 'a filterable DataFrame' do
|
2
|
+
describe '#uniq' do
|
3
|
+
let(:df) { DaruLite::DataFrame.from_csv 'spec/fixtures/duplicates.csv' }
|
4
|
+
|
5
|
+
context 'with no args' do
|
6
|
+
subject { df.uniq }
|
7
|
+
|
8
|
+
it 'returns the correct result' do
|
9
|
+
expect(subject.shape.first).to eq 30
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
context 'given a vector' do
|
14
|
+
subject { df.uniq('color') }
|
15
|
+
|
16
|
+
it 'returns the correct result' do
|
17
|
+
expect(subject.shape.first).to eq 2
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
context 'given an array of vectors' do
|
22
|
+
subject { df.uniq("color", "director_name") }
|
23
|
+
|
24
|
+
it 'returns the correct result' do
|
25
|
+
expect(subject.shape.first).to eq 29
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "#filter" do
|
31
|
+
let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }) }
|
32
|
+
|
33
|
+
context 'avis is row' do
|
34
|
+
subject { df.filter(:row) { |r| r[:a] % 2 == 0 } }
|
35
|
+
|
36
|
+
it { is_expected.to eq(df.filter_rows { |r| r[:a] % 2 == 0 }) }
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'avis is vector' do
|
40
|
+
subject { df.filter(:vector) { |v| v[0] == 1 } }
|
41
|
+
|
42
|
+
it { is_expected.to eq(df.filter_vectors { |v| v[0] == 1 }) }
|
43
|
+
end
|
44
|
+
|
45
|
+
context 'avis is unknown' do
|
46
|
+
subject { df.filter(:kitten) {} }
|
47
|
+
|
48
|
+
it { expect { subject }.to raise_error ArgumentError, /axis/ }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
describe '#reject_values' do
|
54
|
+
let(:df) do
|
55
|
+
DaruLite::DataFrame.new({
|
56
|
+
a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
|
57
|
+
b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
|
58
|
+
c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
59
|
+
}, index: 11..18)
|
60
|
+
end
|
61
|
+
before { df.to_category :b }
|
62
|
+
|
63
|
+
context 'remove nils only' do
|
64
|
+
subject { df.reject_values nil }
|
65
|
+
|
66
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
67
|
+
its(:'b.type') { is_expected.to eq :category }
|
68
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
|
69
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
|
70
|
+
its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
|
71
|
+
its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
|
72
|
+
end
|
73
|
+
|
74
|
+
context 'remove Float::NAN only' do
|
75
|
+
subject { df.reject_values Float::NAN }
|
76
|
+
|
77
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
78
|
+
its(:'b.type') { is_expected.to eq :category }
|
79
|
+
its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
|
80
|
+
its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
|
81
|
+
its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
|
82
|
+
its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
|
83
|
+
end
|
84
|
+
|
85
|
+
context 'remove both nil and Float::NAN' do
|
86
|
+
subject { df.reject_values nil, Float::NAN }
|
87
|
+
|
88
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
89
|
+
its(:'b.type') { is_expected.to eq :category }
|
90
|
+
its(:'a.to_a') { is_expected.to eq [1, 7] }
|
91
|
+
its(:'b.to_a') { is_expected.to eq [:a, 8] }
|
92
|
+
its(:'c.to_a') { is_expected.to eq ['a', 7] }
|
93
|
+
its(:'index.to_a') { is_expected.to eq [11, 18] }
|
94
|
+
end
|
95
|
+
|
96
|
+
context 'any other values' do
|
97
|
+
subject { df.reject_values 1, 5 }
|
98
|
+
|
99
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
100
|
+
its(:'b.type') { is_expected.to eq :category }
|
101
|
+
its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
|
102
|
+
its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
|
103
|
+
its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
|
104
|
+
its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
|
105
|
+
end
|
106
|
+
|
107
|
+
context 'when resultant dataframe has one row' do
|
108
|
+
subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
|
109
|
+
|
110
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
111
|
+
its(:'b.type') { is_expected.to eq :category }
|
112
|
+
its(:'a.to_a') { is_expected.to eq [7] }
|
113
|
+
its(:'b.to_a') { is_expected.to eq [8] }
|
114
|
+
its(:'c.to_a') { is_expected.to eq [7] }
|
115
|
+
its(:'index.to_a') { is_expected.to eq [18] }
|
116
|
+
end
|
117
|
+
|
118
|
+
context 'when resultant dataframe is empty' do
|
119
|
+
subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
|
120
|
+
|
121
|
+
it { is_expected.to be_a DaruLite::DataFrame }
|
122
|
+
its(:'b.type') { is_expected.to eq :category }
|
123
|
+
its(:'a.to_a') { is_expected.to eq [] }
|
124
|
+
its(:'b.to_a') { is_expected.to eq [] }
|
125
|
+
its(:'c.to_a') { is_expected.to eq [] }
|
126
|
+
its(:'index.to_a') { is_expected.to eq [] }
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
describe "#keep_row_if" do
|
131
|
+
let(:index) { [:one, :two, :three, :four, :five] }
|
132
|
+
let(:order) { [:a, :b, :c] }
|
133
|
+
let(:df) do
|
134
|
+
DaruLite::DataFrame.new({
|
135
|
+
b: [10, 12, 20, 23, 30],
|
136
|
+
a: [50, 30, 30, 1, 5],
|
137
|
+
c: [10, 20, 30, 40, 50]
|
138
|
+
},
|
139
|
+
order:,
|
140
|
+
index:
|
141
|
+
)
|
142
|
+
end
|
143
|
+
|
144
|
+
shared_examples_for '#keep_row_if' do
|
145
|
+
before { subject }
|
146
|
+
|
147
|
+
it "keeps row if block evaluates to true" do
|
148
|
+
expect(df).to eq(expected_df)
|
149
|
+
end
|
150
|
+
|
151
|
+
it 'returns correct index' do
|
152
|
+
expect(df.index).to eq(expected_index)
|
153
|
+
end
|
154
|
+
|
155
|
+
it "all vectors have the same index" do
|
156
|
+
expect(df.map(&:index)).to all(eq(expected_index))
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
context 'a single row is removed' do
|
161
|
+
context DaruLite::Index do
|
162
|
+
subject { df.keep_row_if { |row| row.name != :four } }
|
163
|
+
|
164
|
+
let(:expected_index) { DaruLite::Index.new([:one, :two, :three, :five]) }
|
165
|
+
let(:expected_df) do
|
166
|
+
DaruLite::DataFrame.new(
|
167
|
+
{ b: [10, 12, 20, 30], a: [50, 30, 30, 5], c: [10, 20, 30, 50] },
|
168
|
+
order:,
|
169
|
+
index: expected_index
|
170
|
+
)
|
171
|
+
end
|
172
|
+
|
173
|
+
it_behaves_like '#keep_row_if'
|
174
|
+
end
|
175
|
+
|
176
|
+
context DaruLite::CategoricalIndex do
|
177
|
+
subject { df.keep_row_if { |row| row.name != :a } }
|
178
|
+
|
179
|
+
let (:index) { DaruLite::CategoricalIndex.new([:a, 1, 1, :a, :c]) }
|
180
|
+
let(:expected_index) { DaruLite::CategoricalIndex.new([1, 1, :c]) }
|
181
|
+
let(:expected_df) do
|
182
|
+
DaruLite::DataFrame.new(
|
183
|
+
{
|
184
|
+
b: [12, 20, 30],
|
185
|
+
a: [30, 30, 5],
|
186
|
+
c: [20, 30, 50]
|
187
|
+
},
|
188
|
+
order:,
|
189
|
+
index: expected_index
|
190
|
+
)
|
191
|
+
end
|
192
|
+
|
193
|
+
it_behaves_like '#keep_row_if'
|
194
|
+
end
|
195
|
+
|
196
|
+
context DaruLite::MultiIndex do
|
197
|
+
subject { df.keep_row_if { |row| !row.name.include?('two') } }
|
198
|
+
|
199
|
+
let(:index) do
|
200
|
+
DaruLite::MultiIndex.from_tuples([[:a, :one], [:a, :two], [:b, :one], [:b, :two], [:c, :one]])
|
201
|
+
end
|
202
|
+
let(:expected_index) do
|
203
|
+
DaruLite::MultiIndex.from_tuples([[:a, :one], [:b, :one], [:c, :one]])
|
204
|
+
end
|
205
|
+
let(:expected_df) do
|
206
|
+
DaruLite::DataFrame.new(
|
207
|
+
{
|
208
|
+
b: [10, 20, 30],
|
209
|
+
a: [50, 30, 5],
|
210
|
+
c: [10, 30, 50]
|
211
|
+
},
|
212
|
+
order:,
|
213
|
+
index: expected_index
|
214
|
+
)
|
215
|
+
end
|
216
|
+
|
217
|
+
it_behaves_like '#keep_row_if'
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
context 'several rows are removed' do
|
222
|
+
subject { df.keep_row_if { |row| row[:a] % 10 == 0 } }
|
223
|
+
|
224
|
+
context DaruLite::Index do
|
225
|
+
let(:expected_index) { DaruLite::Index.new([:one, :two, :three]) }
|
226
|
+
let(:expected_df) do
|
227
|
+
DaruLite::DataFrame.new(
|
228
|
+
{ b: [10, 12, 20], a: [50, 30, 30], c: [10, 20, 30] },
|
229
|
+
order:,
|
230
|
+
index: expected_index
|
231
|
+
)
|
232
|
+
end
|
233
|
+
|
234
|
+
it_behaves_like '#keep_row_if'
|
235
|
+
end
|
236
|
+
|
237
|
+
context DaruLite::CategoricalIndex do
|
238
|
+
let (:index) { DaruLite::CategoricalIndex.new([:a, 1, 1, :a, :c]) }
|
239
|
+
let(:expected_index) { DaruLite::CategoricalIndex.new([:a, 1, 1]) }
|
240
|
+
let(:expected_df) do
|
241
|
+
DaruLite::DataFrame.new(
|
242
|
+
{ b: [10, 12, 20], a: [50, 30, 30], c: [10, 20, 30] },
|
243
|
+
order:,
|
244
|
+
index: expected_index
|
245
|
+
)
|
246
|
+
end
|
247
|
+
|
248
|
+
it_behaves_like '#keep_row_if'
|
249
|
+
end
|
250
|
+
|
251
|
+
context DaruLite::MultiIndex do
|
252
|
+
let(:index) { DaruLite::MultiIndex.from_tuples([[:a, :one], [:a, :two], [:b, :one], [:b, :two], [:c, :one]]) }
|
253
|
+
let(:expected_index) do
|
254
|
+
DaruLite::MultiIndex.from_tuples([[:a, :one], [:a, :two], [:b, :one]])
|
255
|
+
end
|
256
|
+
let(:expected_df) do
|
257
|
+
DaruLite::DataFrame.new(
|
258
|
+
{ b: [10, 12, 20], a: [50, 30, 30], c: [10, 20, 30] },
|
259
|
+
order:,
|
260
|
+
index: expected_index
|
261
|
+
)
|
262
|
+
end
|
263
|
+
|
264
|
+
it_behaves_like '#keep_row_if'
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
# context DaruLite::MultiIndex do
|
269
|
+
# subject { df.keep_row_if { |row| row.name != 'No answer' } }
|
270
|
+
|
271
|
+
# let(:df) do
|
272
|
+
# order = DaruLite::MultiIndex.from_tuples(
|
273
|
+
# [
|
274
|
+
# [:a, :total],
|
275
|
+
# [:a, "Male"],
|
276
|
+
# [:a, "Female"],
|
277
|
+
# [:a, "Prefer not to answer"],
|
278
|
+
# [:a, "No answer"],
|
279
|
+
# [:b, :total],
|
280
|
+
# [:b, "Male"],
|
281
|
+
# [:b, "Female"],
|
282
|
+
# [:b, "Prefer not to answer"],
|
283
|
+
# [:b, "No answer"],
|
284
|
+
# [:c, :total],
|
285
|
+
# [:c, "Male"],
|
286
|
+
# [:c, "Female"],
|
287
|
+
# [:c, "Prefer not to answer"],
|
288
|
+
# [:c, "No answer"]
|
289
|
+
# ]
|
290
|
+
# )
|
291
|
+
# index = [
|
292
|
+
# :base,
|
293
|
+
# "Single Malt Whisky",
|
294
|
+
# "Blended/ Other Whisky",
|
295
|
+
# "Vodka",
|
296
|
+
# "Cognac",
|
297
|
+
# "Brandy",
|
298
|
+
# "Rum",
|
299
|
+
# "Gin",
|
300
|
+
# "Tequila",
|
301
|
+
# "No answer",
|
302
|
+
# "NET"
|
303
|
+
# ]
|
304
|
+
# DaruLite::DataFrame.new(
|
305
|
+
# [
|
306
|
+
# [0.0, nil, nil, nil, nil, nil, nil, nil, nil],
|
307
|
+
# [nil, nil, nil, nil, nil, nil, nil, nil, nil],
|
308
|
+
# [nil, nil, nil, nil, nil, nil, nil, nil, nil],
|
309
|
+
# [nil, nil, nil, nil, nil, nil, nil, nil, nil],
|
310
|
+
# [nil, nil, nil, nil, nil, nil, nil, nil, nil],
|
311
|
+
# [Float::NAN, nil, nil, nil, nil, nil, nil, nil, nil],
|
312
|
+
# [nil, nil, nil, nil, nil, nil, nil, nil, nil],
|
313
|
+
# [nil, nil, nil, nil, nil, nil, nil, nil, nil],
|
314
|
+
# [nil, nil, nil, nil, nil, nil, nil, nil, nil],
|
315
|
+
# [nil, nil, nil, nil, nil, nil, nil, nil, nil],
|
316
|
+
# [0, nil, nil, nil, nil, nil, nil, nil, nil],
|
317
|
+
# [0, nil, nil, nil, nil, nil, nil, nil, nil],
|
318
|
+
# [0, nil, nil, nil, nil, nil, nil, nil, nil],
|
319
|
+
# [0, nil, nil, nil, nil, nil, nil, nil, nil],
|
320
|
+
# [0, nil, nil, nil, nil, nil, nil, nil, nil],
|
321
|
+
# ],
|
322
|
+
# index:,
|
323
|
+
# order:
|
324
|
+
# )
|
325
|
+
# end
|
326
|
+
|
327
|
+
# it "all vectors have the same index" do
|
328
|
+
# subject
|
329
|
+
# expect(df.map(&:index)).to all(eq(df.index))
|
330
|
+
# end
|
331
|
+
# end
|
332
|
+
end
|
333
|
+
|
334
|
+
describe "#keep_vector_if" do
|
335
|
+
it "keeps vector if block evaluates to true" do
|
336
|
+
df.keep_vector_if do |vector|
|
337
|
+
vector == [1,2,3,4,5].dv(nil, [:one, :two, :three, :four, :five])
|
338
|
+
end
|
339
|
+
|
340
|
+
expect(df).to eq(DaruLite::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
|
341
|
+
index: [:one, :two, :three, :four, :five]))
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
describe "#filter_vectors" do
|
346
|
+
context DaruLite::Index do
|
347
|
+
subject { df.filter_vectors { |vector| vector[0] == 1 } }
|
348
|
+
|
349
|
+
let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }) }
|
350
|
+
|
351
|
+
it "filters vectors" do
|
352
|
+
expect(subject).to eq(DaruLite::DataFrame.new({a: [1,2,3]}))
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
describe "#filter_rows" do
|
358
|
+
context DaruLite::Index do
|
359
|
+
subject { df.filter_rows { |r| r[:a] != 2 } }
|
360
|
+
|
361
|
+
let(:df) { DaruLite::DataFrame.new a: 1..3, b: 4..6 }
|
362
|
+
|
363
|
+
it "preserves names of vectors" do
|
364
|
+
expect(subject[:a].name).to eq(df[:a].name)
|
365
|
+
end
|
366
|
+
|
367
|
+
context "when specified no index" do
|
368
|
+
subject { df.filter_rows { |row| row[:a] % 2 == 0 } }
|
369
|
+
|
370
|
+
let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }) }
|
371
|
+
|
372
|
+
it "filters rows" do
|
373
|
+
expect(subject).to eq(DaruLite::DataFrame.new({ a: [2], b: [3] }, order: [:a, :b], index: [1]))
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
377
|
+
context "when specified numerical index" do
|
378
|
+
subject { df.filter_rows { |row| row[:a] % 2 == 0 } }
|
379
|
+
|
380
|
+
let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }, index: [1,2,3]) }
|
381
|
+
|
382
|
+
it "filters rows" do
|
383
|
+
expect(subject).to eq(DaruLite::DataFrame.new({ a: [2], b: [3] }, order: [:a, :b], index: [2]))
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
context "#filter_vector" do
|
390
|
+
subject { df.filter_vector(:id) { |c| c[:id] == 2 or c[:id] == 4 } }
|
391
|
+
|
392
|
+
let(:df) do
|
393
|
+
DaruLite::DataFrame.new(
|
394
|
+
{
|
395
|
+
id: DaruLite::Vector.new([1, 2, 3, 4, 5]),
|
396
|
+
name: DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
|
397
|
+
age: DaruLite::Vector.new([20, 23, 25, 27, 5]),
|
398
|
+
city: DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
399
|
+
a1: DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c'])
|
400
|
+
},
|
401
|
+
order: [:id, :name, :age, :city, :a1]
|
402
|
+
)
|
403
|
+
end
|
404
|
+
|
405
|
+
it "creates new vector with the data of a given field for which block returns true" do
|
406
|
+
expect(subject).to eq(DaruLite::Vector.new([2,4]))
|
407
|
+
end
|
408
|
+
end
|
409
|
+
end
|
@@ -0,0 +1,221 @@
|
|
1
|
+
shared_examples_for 'an indexable DataFrame' do
|
2
|
+
describe "#set_index" do
|
3
|
+
let(:df) do
|
4
|
+
DaruLite::DataFrame.new(
|
5
|
+
{
|
6
|
+
a: [1,2,3,4,5],
|
7
|
+
b: ['a','b','c','d','e'],
|
8
|
+
c: [11,22,33,44,55]
|
9
|
+
}
|
10
|
+
)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "sets a particular column as the index and deletes that column" do
|
14
|
+
df.set_index(:b)
|
15
|
+
expect(df).to eq(
|
16
|
+
DaruLite::DataFrame.new({
|
17
|
+
a: [1,2,3,4,5],
|
18
|
+
c: [11,22,33,44,55]
|
19
|
+
}, index: ['a','b','c','d','e'])
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "sets a particular column as index but keeps that column" do
|
24
|
+
expect(df.set_index(:c, keep: true)).to eq(
|
25
|
+
DaruLite::DataFrame.new({
|
26
|
+
a: [1,2,3,4,5],
|
27
|
+
b: ['a','b','c','d','e'],
|
28
|
+
c: [11,22,33,44,55]
|
29
|
+
}, index: [11,22,33,44,55]))
|
30
|
+
expect(df[:c]).to eq(df[:c])
|
31
|
+
end
|
32
|
+
|
33
|
+
it "sets categorical index if categorical is true" do
|
34
|
+
data = {
|
35
|
+
a: [1, 2, 3, 4, 5],
|
36
|
+
b: [:a, 1, :a, 1, 'c'],
|
37
|
+
c: %w[a b c d e]
|
38
|
+
}
|
39
|
+
df = DaruLite::DataFrame.new(data)
|
40
|
+
df.set_index(:b, categorical: true)
|
41
|
+
expected = DaruLite::DataFrame.new(
|
42
|
+
data.slice(:a, :c),
|
43
|
+
index: DaruLite::CategoricalIndex.new(data[:b])
|
44
|
+
)
|
45
|
+
expect(df).to eq(expected)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "raises error if all elements in the column aren't unique" do
|
49
|
+
jholu = DaruLite::DataFrame.new({
|
50
|
+
a: ['a','b','a'],
|
51
|
+
b: [1,2,4]
|
52
|
+
})
|
53
|
+
|
54
|
+
expect {
|
55
|
+
jholu.set_index(:a)
|
56
|
+
}.to raise_error(ArgumentError)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "sets multiindex if array is given" do
|
60
|
+
df = DaruLite::DataFrame.new({
|
61
|
+
a: %w[a a b b],
|
62
|
+
b: [1, 2, 1, 2],
|
63
|
+
c: %w[a b c d]
|
64
|
+
})
|
65
|
+
df.set_index(%i[a b])
|
66
|
+
expected =
|
67
|
+
DaruLite::DataFrame.new(
|
68
|
+
{ c: %w[a b c d] },
|
69
|
+
index: DaruLite::MultiIndex.from_tuples(
|
70
|
+
[['a', 1], ['a', 2], ['b', 1], ['b', 2]]
|
71
|
+
)
|
72
|
+
).tap do |df|
|
73
|
+
df.index.name = %i[a b]
|
74
|
+
df
|
75
|
+
end
|
76
|
+
expect(df).to eq(expected)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "#reindex" do
|
81
|
+
subject { df.reindex(DaruLite::Index.new([1,3,0,8,2])) }
|
82
|
+
|
83
|
+
let(:df) do
|
84
|
+
DaruLite::DataFrame.new({
|
85
|
+
a: [1,2,3,4,5],
|
86
|
+
b: [11,22,33,44,55],
|
87
|
+
c: %w(a b c d e)
|
88
|
+
})
|
89
|
+
end
|
90
|
+
|
91
|
+
it "re indexes and aligns accordingly" do
|
92
|
+
expect(subject).to eq(
|
93
|
+
DaruLite::DataFrame.new(
|
94
|
+
{
|
95
|
+
a: [2,4,1,nil,3],
|
96
|
+
b: [22,44,11,nil,33],
|
97
|
+
c: ['b','d','a',nil,'c']
|
98
|
+
},
|
99
|
+
index: DaruLite::Index.new([1,3,0,8,2])
|
100
|
+
)
|
101
|
+
)
|
102
|
+
end
|
103
|
+
|
104
|
+
it { is_expected.to_not eq(df) }
|
105
|
+
end
|
106
|
+
|
107
|
+
describe '#reset_index' do
|
108
|
+
context 'when Index' do
|
109
|
+
subject do
|
110
|
+
DaruLite::DataFrame.new(
|
111
|
+
{'vals' => [1,2,3,4,5]},
|
112
|
+
index: DaruLite::Index.new(%w[a b c d e], name: 'indices')
|
113
|
+
).reset_index
|
114
|
+
end
|
115
|
+
|
116
|
+
it { is_expected.to eq DaruLite::DataFrame.new(
|
117
|
+
'indices' => %w[a b c d e],
|
118
|
+
'vals' => [1,2,3,4,5]
|
119
|
+
)}
|
120
|
+
end
|
121
|
+
|
122
|
+
context 'when MultiIndex' do
|
123
|
+
subject do
|
124
|
+
mi = DaruLite::MultiIndex.from_tuples([
|
125
|
+
[0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
|
126
|
+
])
|
127
|
+
mi.name = %w[nums alphas]
|
128
|
+
DaruLite::DataFrame.new(
|
129
|
+
{'vals' => [1,2,3,4]},
|
130
|
+
index: mi
|
131
|
+
).reset_index
|
132
|
+
end
|
133
|
+
|
134
|
+
it { is_expected.to eq DaruLite::DataFrame.new(
|
135
|
+
'nums' => [0,0,1,1],
|
136
|
+
'alphas' => %w[a b a b],
|
137
|
+
'vals' => [1,2,3,4]
|
138
|
+
)}
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
describe "#index=" do
|
143
|
+
let(:df) do
|
144
|
+
DaruLite::DataFrame.new({
|
145
|
+
a: [1,2,3,4,5],
|
146
|
+
b: [11,22,33,44,55],
|
147
|
+
c: %w(a b c d e)
|
148
|
+
})
|
149
|
+
end
|
150
|
+
|
151
|
+
it "simply reassigns the index" do
|
152
|
+
df.index = DaruLite::Index.new(['4','foo', :bar, 0, 23])
|
153
|
+
expect(df.row['foo']).to eq(DaruLite::Vector.new([2,22,'b'], index: [:a,:b,:c]))
|
154
|
+
end
|
155
|
+
|
156
|
+
it "raises error for improper length index" do
|
157
|
+
expect {
|
158
|
+
df.index = DaruLite::Index.new([1,2])
|
159
|
+
}.to raise_error(ArgumentError)
|
160
|
+
end
|
161
|
+
|
162
|
+
it "is able to accept array" do
|
163
|
+
df.index = (1..5).to_a
|
164
|
+
expect(df.index).to eq DaruLite::Index.new (1..5).to_a
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
describe "#reindex_vectors" do
|
169
|
+
it "re indexes vectors and aligns accordingly" do
|
170
|
+
df = DaruLite::DataFrame.new({
|
171
|
+
a: [1,2,3,4,5],
|
172
|
+
b: [11,22,33,44,55],
|
173
|
+
c: %w(a b c d e)
|
174
|
+
})
|
175
|
+
|
176
|
+
ans = df.reindex_vectors(DaruLite::Index.new([:b, 'a', :a]))
|
177
|
+
expect(ans).to eq(DaruLite::DataFrame.new({
|
178
|
+
:b => [11,22,33,44,55],
|
179
|
+
'a' => [nil, nil, nil, nil, nil],
|
180
|
+
:a => [1,2,3,4,5]
|
181
|
+
}, order: [:b, 'a', :a]))
|
182
|
+
end
|
183
|
+
|
184
|
+
it 'raises ArgumentError if argument was not an index' do
|
185
|
+
df = DaruLite::DataFrame.new([])
|
186
|
+
expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
describe "#vectors=" do
|
191
|
+
let(:df) do
|
192
|
+
DaruLite::DataFrame.new({
|
193
|
+
a: [1,2,3,4,5],
|
194
|
+
b: [11,22,33,44,55],
|
195
|
+
c: %w(a b c d e)
|
196
|
+
})
|
197
|
+
end
|
198
|
+
|
199
|
+
it "simply reassigns vectors" do
|
200
|
+
df.vectors = DaruLite::Index.new(['b',0,'m'])
|
201
|
+
|
202
|
+
expect(df.vectors).to eq(DaruLite::Index.new(['b',0,'m']))
|
203
|
+
expect(df['b']).to eq(DaruLite::Vector.new([1,2,3,4,5]))
|
204
|
+
expect(df[0]).to eq(DaruLite::Vector.new([11,22,33,44,55]))
|
205
|
+
expect(df['m']).to eq(DaruLite::Vector.new(%w(a b c d e)))
|
206
|
+
end
|
207
|
+
|
208
|
+
it "raises error for improper length index" do
|
209
|
+
expect {
|
210
|
+
df.vectors = DaruLite::Index.new([1,2,'3',4,'5'])
|
211
|
+
}.to raise_error(ArgumentError)
|
212
|
+
end
|
213
|
+
|
214
|
+
it "change name of vectors in @data" do
|
215
|
+
new_index_array = [:k, :l, :m]
|
216
|
+
df.vectors = DaruLite::Index.new(new_index_array)
|
217
|
+
|
218
|
+
expect(df.data.map { |vector| vector.name }).to eq(new_index_array)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|