daru 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -5
- data/CONTRIBUTING.md +2 -11
- data/History.md +18 -0
- data/README.md +109 -11
- data/daru.gemspec +11 -6
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/images/plot0.png +0 -0
- data/lib/daru.rb +5 -2
- data/lib/daru/core/group_by.rb +45 -45
- data/lib/daru/core/merge.rb +59 -1
- data/lib/daru/dataframe.rb +255 -226
- data/lib/daru/exceptions.rb +2 -0
- data/lib/daru/io/io.rb +41 -19
- data/lib/daru/io/sql_data_source.rb +116 -0
- data/lib/daru/vector.rb +124 -104
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +12 -2
- data/spec/core/merge_spec.rb +14 -1
- data/spec/dataframe_spec.rb +189 -158
- data/spec/io/io_spec.rb +80 -2
- data/spec/io/sql_data_source_spec.rb +67 -0
- data/spec/spec_helper.rb +4 -2
- data/spec/support/database_helper.rb +30 -0
- data/spec/vector_spec.rb +45 -46
- metadata +104 -16
- data/.build.sh +0 -14
data/lib/daru/version.rb
CHANGED
data/spec/core/group_by_spec.rb
CHANGED
@@ -5,7 +5,7 @@ describe Daru::Core::GroupBy do
|
|
5
5
|
@df = Daru::DataFrame.new({
|
6
6
|
a: %w{foo bar foo bar foo bar foo foo},
|
7
7
|
b: %w{one one two three two two one three},
|
8
|
-
c: [1 ,2
|
8
|
+
c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
9
9
|
d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
10
10
|
})
|
11
11
|
|
@@ -33,6 +33,16 @@ describe Daru::Core::GroupBy do
|
|
33
33
|
])
|
34
34
|
end
|
35
35
|
|
36
|
+
context 'with nil values' do
|
37
|
+
before do
|
38
|
+
@df[:w_nils] = Daru::Vector.new([11 ,nil ,33 ,nil ,nil ,66 ,77 ,88])
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'groups by nil values' do
|
42
|
+
expect(@df.group_by(:w_nils).groups[[nil]]).to eq([1,3,4])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
36
46
|
context "#initialize" do
|
37
47
|
it "groups by a single tuple" do
|
38
48
|
expect(@sl_group.groups).to eq({
|
@@ -326,4 +336,4 @@ describe Daru::Core::GroupBy do
|
|
326
336
|
context "#[]" do
|
327
337
|
pending
|
328
338
|
end
|
329
|
-
end
|
339
|
+
end
|
data/spec/core/merge_spec.rb
CHANGED
@@ -11,6 +11,10 @@ describe Daru::DataFrame do
|
|
11
11
|
:id => [1,2,3,4],
|
12
12
|
:name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
13
13
|
})
|
14
|
+
@right_many = Daru::DataFrame.new({
|
15
|
+
:id => [1,1,1,1],
|
16
|
+
:name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
17
|
+
})
|
14
18
|
end
|
15
19
|
|
16
20
|
it "performs an inner join of two dataframes" do
|
@@ -22,6 +26,15 @@ describe Daru::DataFrame do
|
|
22
26
|
expect(@left.join(@right, how: :inner, on: [:name])).to eq(answer)
|
23
27
|
end
|
24
28
|
|
29
|
+
it "performs an inner join of two dataframes that has one to many mapping" do
|
30
|
+
answer = Daru::DataFrame.new({
|
31
|
+
:id => [1,1,1,1],
|
32
|
+
:name_1 => ['Pirate', 'Pirate', 'Pirate', 'Pirate'],
|
33
|
+
:name_2 => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
34
|
+
}, order: [:id, :name_1, :name_2])
|
35
|
+
expect(@left.join(@right_many, how: :inner, on: [:id])).to eq(answer)
|
36
|
+
end
|
37
|
+
|
25
38
|
it "performs a full outer join" do
|
26
39
|
answer = Daru::DataFrame.new({
|
27
40
|
:id_1 => [1,2,3,4,nil,nil],
|
@@ -49,4 +62,4 @@ describe Daru::DataFrame do
|
|
49
62
|
expect(@left.join(@right, how: :right, on: [:name])).to eq(answer)
|
50
63
|
end
|
51
64
|
end
|
52
|
-
end
|
65
|
+
end
|
data/spec/dataframe_spec.rb
CHANGED
@@ -2,9 +2,9 @@ require 'spec_helper.rb'
|
|
2
2
|
|
3
3
|
describe Daru::DataFrame do
|
4
4
|
before :each do
|
5
|
-
@data_frame = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
6
|
-
c: [11,22,33,44,55]},
|
7
|
-
order: [:a, :b, :c],
|
5
|
+
@data_frame = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
6
|
+
c: [11,22,33,44,55]},
|
7
|
+
order: [:a, :b, :c],
|
8
8
|
index: [:one, :two, :three, :four, :five])
|
9
9
|
tuples = [
|
10
10
|
[:a,:one,:bar],
|
@@ -32,14 +32,14 @@ describe Daru::DataFrame do
|
|
32
32
|
[:b,:one,:foo]])
|
33
33
|
|
34
34
|
@df_mi = Daru::DataFrame.new([
|
35
|
-
@vector_arry1,
|
36
|
-
@vector_arry2,
|
37
|
-
@vector_arry1,
|
35
|
+
@vector_arry1,
|
36
|
+
@vector_arry2,
|
37
|
+
@vector_arry1,
|
38
38
|
@vector_arry2], order: @order_mi, index: @multi_index)
|
39
39
|
end
|
40
40
|
|
41
41
|
context ".rows" do
|
42
|
-
before do
|
42
|
+
before do
|
43
43
|
@rows = [
|
44
44
|
[1,2,3,4,5],
|
45
45
|
[1,2,3,4,5],
|
@@ -80,9 +80,9 @@ describe Daru::DataFrame do
|
|
80
80
|
|
81
81
|
it "crates a DataFrame from rows (MultiIndex order)" do
|
82
82
|
rows = [
|
83
|
-
[11, 1, 11, 1],
|
84
|
-
[12, 2, 12, 2],
|
85
|
-
[13, 3, 13, 3],
|
83
|
+
[11, 1, 11, 1],
|
84
|
+
[12, 2, 12, 2],
|
85
|
+
[13, 3, 13, 3],
|
86
86
|
[14, 4, 14, 4]
|
87
87
|
]
|
88
88
|
index = Daru::MultiIndex.from_tuples([
|
@@ -119,7 +119,7 @@ describe Daru::DataFrame do
|
|
119
119
|
|
120
120
|
expect(df.vectors).to eq(Daru::Index.new [:a, :b])
|
121
121
|
expect(df.a.class).to eq(Daru::Vector)
|
122
|
-
expect(df.a) .to eq([].dv(:a))
|
122
|
+
expect(df.a) .to eq([].dv(:a))
|
123
123
|
end
|
124
124
|
|
125
125
|
it "initializes from a Hash" do
|
@@ -129,29 +129,29 @@ describe Daru::DataFrame do
|
|
129
129
|
expect(df.index) .to eq(Daru::Index.new [:one, :two, :three, :four, :five])
|
130
130
|
expect(df.vectors).to eq(Daru::Index.new [:a, :b])
|
131
131
|
expect(df.a.class).to eq(Daru::Vector)
|
132
|
-
expect(df.a) .to eq([1,2,3,4,5].dv(:a, df.index))
|
132
|
+
expect(df.a) .to eq([1,2,3,4,5].dv(:a, df.index))
|
133
133
|
end
|
134
134
|
|
135
135
|
it "initializes from a Hash of Vectors" do
|
136
|
-
df = Daru::DataFrame.new({b: [11,12,13,14,15].dv(:b, [:one, :two, :three, :four, :five]),
|
136
|
+
df = Daru::DataFrame.new({b: [11,12,13,14,15].dv(:b, [:one, :two, :three, :four, :five]),
|
137
137
|
a: [1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five])}, order: [:a, :b],
|
138
138
|
index: [:one, :two, :three, :four, :five])
|
139
139
|
|
140
140
|
expect(df.index) .to eq(Daru::Index.new [:one, :two, :three, :four, :five])
|
141
141
|
expect(df.vectors).to eq(Daru::Index.new [:a, :b])
|
142
142
|
expect(df.a.class).to eq(Daru::Vector)
|
143
|
-
expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
|
143
|
+
expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
|
144
144
|
end
|
145
145
|
|
146
146
|
it "initializes from an Array of Hashes" do
|
147
147
|
df = Daru::DataFrame.new([{a: 1, b: 11}, {a: 2, b: 12}, {a: 3, b: 13},
|
148
|
-
{a: 4, b: 14}, {a: 5, b: 15}], order: [:b, :a],
|
148
|
+
{a: 4, b: 14}, {a: 5, b: 15}], order: [:b, :a],
|
149
149
|
index: [:one, :two, :three, :four, :five])
|
150
150
|
|
151
151
|
expect(df.index) .to eq(Daru::Index.new [:one, :two, :three, :four, :five])
|
152
152
|
expect(df.vectors).to eq(Daru::Index.new [:b, :a])
|
153
153
|
expect(df.a.class).to eq(Daru::Vector)
|
154
|
-
expect(df.a) .to eq([1,2,3,4,5].dv(:a,[:one, :two, :three, :four, :five]))
|
154
|
+
expect(df.a) .to eq([1,2,3,4,5].dv(:a,[:one, :two, :three, :four, :five]))
|
155
155
|
end
|
156
156
|
|
157
157
|
it "initializes from Array of Arrays" do
|
@@ -175,7 +175,7 @@ describe Daru::DataFrame do
|
|
175
175
|
rows = Daru::Index.new [:one, :two, :three, :four, :five]
|
176
176
|
cols = Daru::Index.new [:a, :b]
|
177
177
|
|
178
|
-
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]}, order: cols,
|
178
|
+
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]}, order: cols,
|
179
179
|
index: rows)
|
180
180
|
|
181
181
|
expect(df.a) .to eq(Daru::Vector.new([1,2,3,4,5], order: [:a], index: rows))
|
@@ -193,14 +193,14 @@ describe Daru::DataFrame do
|
|
193
193
|
|
194
194
|
it "aligns indexes properly" do
|
195
195
|
df = Daru::DataFrame.new({
|
196
|
-
b: [11,12,13,14,15].dv(:b, [:two, :one, :four, :five, :three]),
|
196
|
+
b: [11,12,13,14,15].dv(:b, [:two, :one, :four, :five, :three]),
|
197
197
|
a: [1,2,3,4,5].dv(:a, [:two,:one,:three, :four, :five])
|
198
|
-
},
|
198
|
+
},
|
199
199
|
order: [:a, :b]
|
200
200
|
)
|
201
201
|
|
202
202
|
expect(df).to eq(Daru::DataFrame.new({
|
203
|
-
b: [14,13,12,15,11].dv(:b, [:five, :four, :one, :three, :two]),
|
203
|
+
b: [14,13,12,15,11].dv(:b, [:five, :four, :one, :three, :two]),
|
204
204
|
a: [5,4,2,3,1].dv(:a, [:five, :four, :one, :three, :two])
|
205
205
|
}, order: [:a, :b])
|
206
206
|
)
|
@@ -208,43 +208,43 @@ describe Daru::DataFrame do
|
|
208
208
|
|
209
209
|
it "adds nil values for missing indexes and aligns by index" do
|
210
210
|
df = Daru::DataFrame.new({
|
211
|
-
b: [11,12,13,14,15].dv(:b, [:two, :one, :four, :five, :three]),
|
211
|
+
b: [11,12,13,14,15].dv(:b, [:two, :one, :four, :five, :three]),
|
212
212
|
a: [1,2,3] .dv(:a, [:two,:one,:three])
|
213
|
-
},
|
213
|
+
},
|
214
214
|
order: [:a, :b]
|
215
215
|
)
|
216
216
|
|
217
217
|
expect(df).to eq(Daru::DataFrame.new({
|
218
|
-
b: [14,13,12,15,11].dv(:b, [:five, :four, :one, :three, :two]),
|
218
|
+
b: [14,13,12,15,11].dv(:b, [:five, :four, :one, :three, :two]),
|
219
219
|
a: [nil,nil,2,3,1].dv(:a, [:five, :four, :one, :three, :two])
|
220
|
-
},
|
220
|
+
},
|
221
221
|
order: [:a, :b])
|
222
222
|
)
|
223
223
|
end
|
224
224
|
|
225
225
|
it "adds nils in first vector when other vectors have many extra indexes" do
|
226
226
|
df = Daru::DataFrame.new({
|
227
|
-
b: [11] .dv(nil, [:one]),
|
228
|
-
a: [1,2,3] .dv(nil, [:one, :two, :three]),
|
227
|
+
b: [11] .dv(nil, [:one]),
|
228
|
+
a: [1,2,3] .dv(nil, [:one, :two, :three]),
|
229
229
|
c: [11,22,33,44,55] .dv(nil, [:one, :two, :three, :four, :five]),
|
230
230
|
d: [49,69,89,99,108,44].dv(nil, [:one, :two, :three, :four, :five, :six])
|
231
|
-
}, order: [:a, :b, :c, :d],
|
231
|
+
}, order: [:a, :b, :c, :d],
|
232
232
|
index: [:one, :two, :three, :four, :five, :six])
|
233
233
|
|
234
234
|
expect(df).to eq(Daru::DataFrame.new({
|
235
|
-
b: [11,nil,nil,nil,nil,nil].dv(nil, [:one, :two, :three, :four, :five, :six]),
|
236
|
-
a: [1,2,3,nil,nil,nil] .dv(nil, [:one, :two, :three, :four, :five, :six]),
|
235
|
+
b: [11,nil,nil,nil,nil,nil].dv(nil, [:one, :two, :three, :four, :five, :six]),
|
236
|
+
a: [1,2,3,nil,nil,nil] .dv(nil, [:one, :two, :three, :four, :five, :six]),
|
237
237
|
c: [11,22,33,44,55,nil] .dv(nil, [:one, :two, :three, :four, :five, :six]),
|
238
238
|
d: [49,69,89,99,108,44] .dv(nil, [:one, :two, :three, :four, :five, :six])
|
239
|
-
}, order: [:a, :b, :c, :d],
|
239
|
+
}, order: [:a, :b, :c, :d],
|
240
240
|
index: [:one, :two, :three, :four, :five, :six])
|
241
241
|
)
|
242
242
|
end
|
243
243
|
|
244
244
|
it "correctly matches the supplied DataFrame index with the individual vector indexes" do
|
245
245
|
df = Daru::DataFrame.new({
|
246
|
-
b: [11,12,13] .dv(nil, [:one, :bleh, :blah]),
|
247
|
-
a: [1,2,3,4,5].dv(nil, [:one, :two, :booh, :baah, :three]),
|
246
|
+
b: [11,12,13] .dv(nil, [:one, :bleh, :blah]),
|
247
|
+
a: [1,2,3,4,5].dv(nil, [:one, :two, :booh, :baah, :three]),
|
248
248
|
c: [11,22,33,44,55].dv(nil, [0,1,3,:three, :two])
|
249
249
|
}, order: [:a, :b, :c], index: [:one, :two, :three])
|
250
250
|
|
@@ -252,14 +252,14 @@ describe Daru::DataFrame do
|
|
252
252
|
b: [11,nil,nil].dv(nil, [:one, :two, :three]),
|
253
253
|
a: [1,2,5] .dv(nil, [:one, :two, :three]),
|
254
254
|
c: [nil,55,44] .dv(nil, [:one, :two, :three]),
|
255
|
-
},
|
255
|
+
},
|
256
256
|
order: [:a, :b, :c], index: [:one, :two, :three]
|
257
257
|
)
|
258
258
|
)
|
259
259
|
end
|
260
260
|
|
261
261
|
it "completes incomplete vectors" do
|
262
|
-
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
262
|
+
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
263
263
|
c: [11,22,33,44,55]}, order: [:a, :c])
|
264
264
|
|
265
265
|
expect(df.vectors).to eq([:a,:c,:b].to_index)
|
@@ -270,7 +270,7 @@ describe Daru::DataFrame do
|
|
270
270
|
b = Daru::Vector.new([1,2,3,4,5])
|
271
271
|
c = Daru::Vector.new([1,2,3,4,5])
|
272
272
|
df = Daru::DataFrame.new({a: a, b: b, c: c}, clone: false)
|
273
|
-
|
273
|
+
|
274
274
|
expect(df[:a].object_id).to eq(a.object_id)
|
275
275
|
expect(df[:b].object_id).to eq(b.object_id)
|
276
276
|
expect(df[:c].object_id).to eq(c.object_id)
|
@@ -298,21 +298,21 @@ describe Daru::DataFrame do
|
|
298
298
|
|
299
299
|
it "raises error for incomplete DataFrame index" do
|
300
300
|
expect {
|
301
|
-
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
302
|
-
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
301
|
+
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
302
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
303
303
|
index: [:one, :two, :three])
|
304
304
|
}.to raise_error
|
305
305
|
end
|
306
306
|
|
307
307
|
it "raises error for unequal sized vectors/arrays" do
|
308
308
|
expect {
|
309
|
-
df = Daru::DataFrame.new({b: [11,12,13], a: [1,2,3,4,5],
|
310
|
-
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
309
|
+
df = Daru::DataFrame.new({b: [11,12,13], a: [1,2,3,4,5],
|
310
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
311
311
|
index: [:one, :two, :three])
|
312
312
|
}.to raise_error
|
313
313
|
end
|
314
314
|
end
|
315
|
-
|
315
|
+
|
316
316
|
context Daru::MultiIndex do
|
317
317
|
it "creates empty DataFrame" do
|
318
318
|
df = Daru::DataFrame.new({}, order: @order_mi)
|
@@ -323,15 +323,15 @@ describe Daru::DataFrame do
|
|
323
323
|
|
324
324
|
it "creates from Hash" do
|
325
325
|
df = Daru::DataFrame.new({
|
326
|
-
[:a,:one,:bar] => @vector_arry1,
|
327
|
-
[:a,:two,:baz] => @vector_arry2,
|
328
|
-
[:b,:one,:foo] => @vector_arry1,
|
326
|
+
[:a,:one,:bar] => @vector_arry1,
|
327
|
+
[:a,:two,:baz] => @vector_arry2,
|
328
|
+
[:b,:one,:foo] => @vector_arry1,
|
329
329
|
[:b,:two,:foo] => @vector_arry2
|
330
330
|
}, order: @order_mi, index: @multi_index)
|
331
331
|
|
332
332
|
expect(df.index) .to eq(@multi_index)
|
333
333
|
expect(df.vectors) .to eq(@order_mi)
|
334
|
-
expect(df[:a,:one,:bar]).to eq(Daru::Vector.new(@vector_arry1,
|
334
|
+
expect(df[:a,:one,:bar]).to eq(Daru::Vector.new(@vector_arry1,
|
335
335
|
index: @multi_index))
|
336
336
|
end
|
337
337
|
|
@@ -340,12 +340,12 @@ describe Daru::DataFrame do
|
|
340
340
|
end
|
341
341
|
|
342
342
|
it "creates from Array of Arrays" do
|
343
|
-
df = Daru::DataFrame.new([@vector_arry1, @vector_arry2, @vector_arry1,
|
343
|
+
df = Daru::DataFrame.new([@vector_arry1, @vector_arry2, @vector_arry1,
|
344
344
|
@vector_arry2], index: @multi_index, order: @order_mi)
|
345
345
|
|
346
346
|
expect(df.index) .to eq(@multi_index)
|
347
347
|
expect(df.vectors).to eq(@order_mi)
|
348
|
-
expect(df[:a, :one, :bar]).to eq(Daru::Vector.new(@vector_arry1,
|
348
|
+
expect(df[:a, :one, :bar]).to eq(Daru::Vector.new(@vector_arry1,
|
349
349
|
index: @multi_index))
|
350
350
|
end
|
351
351
|
|
@@ -366,9 +366,9 @@ describe Daru::DataFrame do
|
|
366
366
|
[:a,:one,:baz]
|
367
367
|
])
|
368
368
|
mi_sorted = Daru::MultiIndex.from_tuples([
|
369
|
-
[:a, :one, :bar],
|
370
|
-
[:a, :one, :baz],
|
371
|
-
[:b, :one, :foo],
|
369
|
+
[:a, :one, :bar],
|
370
|
+
[:a, :one, :baz],
|
371
|
+
[:b, :one, :foo],
|
372
372
|
[:b, :two, :foo]
|
373
373
|
])
|
374
374
|
order = Daru::MultiIndex.from_tuples([
|
@@ -398,8 +398,8 @@ describe Daru::DataFrame do
|
|
398
398
|
context "#[]" do
|
399
399
|
context Daru::Index do
|
400
400
|
before :each do
|
401
|
-
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
402
|
-
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
401
|
+
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
402
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
403
403
|
index: [:one, :two, :three, :four, :five])
|
404
404
|
end
|
405
405
|
|
@@ -408,12 +408,12 @@ describe Daru::DataFrame do
|
|
408
408
|
end
|
409
409
|
|
410
410
|
it "returns a Vector by default" do
|
411
|
-
expect(@df[:a]).to eq(Daru::Vector.new([1,2,3,4,5], name: :a,
|
411
|
+
expect(@df[:a]).to eq(Daru::Vector.new([1,2,3,4,5], name: :a,
|
412
412
|
index: [:one, :two, :three, :four, :five]))
|
413
413
|
end
|
414
414
|
|
415
415
|
it "returns a DataFrame" do
|
416
|
-
temp = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
|
416
|
+
temp = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
|
417
417
|
order: [:a, :b], index: [:one, :two, :three, :four, :five])
|
418
418
|
|
419
419
|
expect(@df[:a, :b]).to eq(temp)
|
@@ -464,17 +464,17 @@ describe Daru::DataFrame do
|
|
464
464
|
context "#[]=" do
|
465
465
|
context Daru::Index do
|
466
466
|
before :each do
|
467
|
-
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
468
|
-
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
467
|
+
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
468
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
469
469
|
index: [:one, :two, :three, :four, :five])
|
470
470
|
end
|
471
471
|
|
472
472
|
it "assigns directly with the []= operator" do
|
473
473
|
@data_frame[:a] = [100,200,300,400,500]
|
474
474
|
expect(@data_frame).to eq(Daru::DataFrame.new({
|
475
|
-
b: [11,12,13,14,15],
|
476
|
-
a: [100,200,300,400,500],
|
477
|
-
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
475
|
+
b: [11,12,13,14,15],
|
476
|
+
a: [100,200,300,400,500],
|
477
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
478
478
|
index: [:one, :two, :three, :four, :five]))
|
479
479
|
end
|
480
480
|
|
@@ -500,12 +500,12 @@ describe Daru::DataFrame do
|
|
500
500
|
@df[:woo] = [69,99,108,85,49]
|
501
501
|
|
502
502
|
expect(@df.woo.index).to eq([:one, :two, :three, :four, :five].to_index)
|
503
|
-
end
|
503
|
+
end
|
504
504
|
|
505
505
|
it "matches index of vector to be inserted with the DataFrame index" do
|
506
506
|
@df[:shankar] = [69,99,108,85,49].dv(:shankar, [:two, :one, :three, :five, :four])
|
507
507
|
|
508
|
-
expect(@df.shankar).to eq([99,69,108,49,85].dv(:shankar,
|
508
|
+
expect(@df.shankar).to eq([99,69,108,49,85].dv(:shankar,
|
509
509
|
[:one, :two, :three, :four, :five]))
|
510
510
|
end
|
511
511
|
|
@@ -535,12 +535,12 @@ describe Daru::DataFrame do
|
|
535
535
|
|
536
536
|
it "assigns all sub-indexes when a top level index is specified" do
|
537
537
|
@df_mi[:a] = [100,200,300,400,100,200,300,400,100,200,300,400]
|
538
|
-
|
538
|
+
|
539
539
|
expect(@df_mi).to eq(Daru::DataFrame.new([
|
540
540
|
[100,200,300,400,100,200,300,400,100,200,300,400],
|
541
541
|
[100,200,300,400,100,200,300,400,100,200,300,400],
|
542
542
|
@vector_arry1,
|
543
|
-
@vector_arry2], index: @multi_index, order: @order_mi))
|
543
|
+
@vector_arry2], index: @multi_index, order: @order_mi))
|
544
544
|
end
|
545
545
|
|
546
546
|
it "creates a new vector when full index specfied" do
|
@@ -551,7 +551,7 @@ describe Daru::DataFrame do
|
|
551
551
|
[:b,:one,:foo],
|
552
552
|
[:c,:one,:bar]])
|
553
553
|
answer = Daru::DataFrame.new([
|
554
|
-
@vector_arry1,
|
554
|
+
@vector_arry1,
|
555
555
|
@vector_arry2,
|
556
556
|
@vector_arry1,
|
557
557
|
@vector_arry2,
|
@@ -567,8 +567,8 @@ describe Daru::DataFrame do
|
|
567
567
|
context "#row[]=" do
|
568
568
|
context Daru::Index do
|
569
569
|
before :each do
|
570
|
-
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
571
|
-
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
570
|
+
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
571
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
572
572
|
index: [:one, :two, :three, :four, :five])
|
573
573
|
end
|
574
574
|
|
@@ -612,7 +612,7 @@ describe Daru::DataFrame do
|
|
612
612
|
|
613
613
|
it "correctly aligns assigned DV by index" do
|
614
614
|
@df.row[:two] = [9,2,11].dv(nil, [:b, :a, :c])
|
615
|
-
|
615
|
+
|
616
616
|
expect(@df.row[:two]).to eq([2,9,11].dv(:two, [:a, :b, :c]))
|
617
617
|
end
|
618
618
|
|
@@ -649,8 +649,8 @@ describe Daru::DataFrame do
|
|
649
649
|
context "#row" do
|
650
650
|
context Daru::Index do
|
651
651
|
before :each do
|
652
|
-
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
653
|
-
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
652
|
+
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
653
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
654
654
|
index: [:one, :two, :three, :four, :five])
|
655
655
|
end
|
656
656
|
|
@@ -664,16 +664,16 @@ describe Daru::DataFrame do
|
|
664
664
|
|
665
665
|
it "returns a DataFrame when specifying numeric Range" do
|
666
666
|
expect(@df.row[0..2]).to eq(
|
667
|
-
Daru::DataFrame.new({b: [11,12,13], a: [1,2,3],
|
668
|
-
c: [11,22,33]}, order: [:a, :b, :c],
|
667
|
+
Daru::DataFrame.new({b: [11,12,13], a: [1,2,3],
|
668
|
+
c: [11,22,33]}, order: [:a, :b, :c],
|
669
669
|
index: [:one, :two, :three])
|
670
670
|
)
|
671
671
|
end
|
672
672
|
|
673
673
|
it "returns a DataFrame when specifying symbolic Range" do
|
674
674
|
expect(@df.row[:one..:three]).to eq(
|
675
|
-
Daru::DataFrame.new({b: [11,12,13], a: [1,2,3],
|
676
|
-
c: [11,22,33]}, order: [:a, :b, :c],
|
675
|
+
Daru::DataFrame.new({b: [11,12,13], a: [1,2,3],
|
676
|
+
c: [11,22,33]}, order: [:a, :b, :c],
|
677
677
|
index: [:one, :two, :three])
|
678
678
|
)
|
679
679
|
end
|
@@ -687,11 +687,11 @@ describe Daru::DataFrame do
|
|
687
687
|
end
|
688
688
|
|
689
689
|
it "returns a row with given Integer index for default index-less DataFrame" do
|
690
|
-
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
690
|
+
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
691
691
|
c: [11,22,33,44,55]}, order: [:a, :b, :c])
|
692
692
|
|
693
693
|
expect(df.row[0]).to eq([1,11,11].dv(nil, [:a, :b, :c]))
|
694
|
-
end
|
694
|
+
end
|
695
695
|
end
|
696
696
|
|
697
697
|
context Daru::MultiIndex do
|
@@ -762,10 +762,10 @@ describe Daru::DataFrame do
|
|
762
762
|
|
763
763
|
context "#==" do
|
764
764
|
it "compares by vectors, index and values of a DataFrame (ignores name)" do
|
765
|
-
a = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
|
765
|
+
a = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
|
766
766
|
order: [:a, :b], index: [:one, :two, :three, :four, :five])
|
767
767
|
|
768
|
-
b = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
|
768
|
+
b = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
|
769
769
|
order: [:a, :b], index: [:one, :two, :three, :four, :five])
|
770
770
|
|
771
771
|
expect(a).to eq(b)
|
@@ -778,11 +778,12 @@ describe Daru::DataFrame do
|
|
778
778
|
clo = @data_frame.dup
|
779
779
|
|
780
780
|
expect(clo.object_id) .not_to eq(@data_frame.object_id)
|
781
|
-
expect(clo.vectors.object_id).not_to eq(@data_frame.object_id)
|
782
|
-
expect(clo.index.object_id) .not_to eq(@data_frame.object_id)
|
781
|
+
expect(clo.vectors.object_id).not_to eq(@data_frame.vectors.object_id)
|
782
|
+
expect(clo.index.object_id) .not_to eq(@data_frame.index.object_id)
|
783
783
|
|
784
784
|
@data_frame.each_vector_with_index do |vector, index|
|
785
785
|
expect(vector.object_id).not_to eq(clo[index].object_id)
|
786
|
+
expect(vector.to_a.object_id).not_to eq(clo[index].to_a.object_id)
|
786
787
|
end
|
787
788
|
end
|
788
789
|
end
|
@@ -804,7 +805,7 @@ describe Daru::DataFrame do
|
|
804
805
|
a: [1 , 2, 3, nil, 4, nil, 5],
|
805
806
|
b: [nil, 2, 3, nil, 4, nil, 5],
|
806
807
|
c: [1, 2, 3, 43 , 4, nil, 5]
|
807
|
-
})
|
808
|
+
})
|
808
809
|
end
|
809
810
|
|
810
811
|
it "dups rows with non-missing data only" do
|
@@ -812,7 +813,7 @@ describe Daru::DataFrame do
|
|
812
813
|
a: [2, 3, 4, 5],
|
813
814
|
b: [2, 3, 4, 5],
|
814
815
|
c: [2, 3, 4, 5]
|
815
|
-
}, index: [1,2,4,6])
|
816
|
+
}, index: [1,2,4,6])
|
816
817
|
expect(@missing_data_df.dup_only_valid).to eq(df)
|
817
818
|
end
|
818
819
|
|
@@ -865,7 +866,7 @@ describe Daru::DataFrame do
|
|
865
866
|
ret = @data_frame.each_vector_with_index do |vector, index|
|
866
867
|
idxs << index
|
867
868
|
expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
|
868
|
-
expect(vector.class).to eq(Daru::Vector)
|
869
|
+
expect(vector.class).to eq(Daru::Vector)
|
869
870
|
end
|
870
871
|
|
871
872
|
expect(idxs).to eq([:a, :b, :c])
|
@@ -901,7 +902,7 @@ describe Daru::DataFrame do
|
|
901
902
|
it "iterates over all vectors" do
|
902
903
|
ret = @data_frame.each do |vector|
|
903
904
|
expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
|
904
|
-
expect(vector.class).to eq(Daru::Vector)
|
905
|
+
expect(vector.class).to eq(Daru::Vector)
|
905
906
|
end
|
906
907
|
|
907
908
|
expect(ret).to eq(@data_frame)
|
@@ -909,18 +910,18 @@ describe Daru::DataFrame do
|
|
909
910
|
|
910
911
|
it "returns Enumerable if no block specified" do
|
911
912
|
ret = @data_frame.each
|
912
|
-
expect(ret.is_a?(Enumerator)).to eq(true)
|
913
|
+
expect(ret.is_a?(Enumerator)).to eq(true)
|
913
914
|
end
|
914
915
|
end
|
915
916
|
|
916
917
|
context "#recode" do
|
917
918
|
before do
|
918
|
-
@ans_vector = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
|
919
|
-
c: [21,32,43,54,65]}, order: [:a, :b, :c],
|
919
|
+
@ans_vector = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
|
920
|
+
c: [21,32,43,54,65]}, order: [:a, :b, :c],
|
920
921
|
index: [:one, :two, :three, :four, :five])
|
921
922
|
|
922
|
-
@ans_rows = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
|
923
|
-
c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
|
923
|
+
@ans_rows = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
|
924
|
+
c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
|
924
925
|
index: [:one, :two, :three, :four, :five])
|
925
926
|
end
|
926
927
|
|
@@ -945,7 +946,7 @@ describe Daru::DataFrame do
|
|
945
946
|
context "#collect" do
|
946
947
|
before do
|
947
948
|
@df = Daru::DataFrame.new({
|
948
|
-
a: [1,2,3,4,5],
|
949
|
+
a: [1,2,3,4,5],
|
949
950
|
b: [11,22,33,44,55],
|
950
951
|
c: [1,2,3,4,5]
|
951
952
|
})
|
@@ -985,12 +986,12 @@ describe Daru::DataFrame do
|
|
985
986
|
|
986
987
|
context "#map!" do
|
987
988
|
before do
|
988
|
-
@ans_vector = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
|
989
|
-
c: [21,32,43,54,65]}, order: [:a, :b, :c],
|
989
|
+
@ans_vector = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
|
990
|
+
c: [21,32,43,54,65]}, order: [:a, :b, :c],
|
990
991
|
index: [:one, :two, :three, :four, :five])
|
991
992
|
|
992
|
-
@ans_row = Daru::DataFrame.new({b: [12,13,14,15,16], a: [2,3,4,5,6],
|
993
|
-
c: [12,23,34,45,56]}, order: [:a, :b, :c],
|
993
|
+
@ans_row = Daru::DataFrame.new({b: [12,13,14,15,16], a: [2,3,4,5,6],
|
994
|
+
c: [12,23,34,45,56]}, order: [:a, :b, :c],
|
994
995
|
index: [:one, :two, :three, :four, :five])
|
995
996
|
end
|
996
997
|
|
@@ -1020,7 +1021,7 @@ describe Daru::DataFrame do
|
|
1020
1021
|
|
1021
1022
|
expect(ret).to eq([
|
1022
1023
|
Daru::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
|
1023
|
-
Daru::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
|
1024
|
+
Daru::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
|
1024
1025
|
Daru::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
|
1025
1026
|
expect(idx).to eq([:a, :b, :c])
|
1026
1027
|
end
|
@@ -1045,9 +1046,9 @@ describe Daru::DataFrame do
|
|
1045
1046
|
it "deletes the specified vector" do
|
1046
1047
|
@data_frame.delete_vector :a
|
1047
1048
|
|
1048
|
-
expect(@data_frame).to eq(Daru::DataFrame.new({b: [11,12,13,14,15],
|
1049
|
-
c: [11,22,33,44,55]}, order: [:b, :c],
|
1050
|
-
index: [:one, :two, :three, :four, :five]))
|
1049
|
+
expect(@data_frame).to eq(Daru::DataFrame.new({b: [11,12,13,14,15],
|
1050
|
+
c: [11,22,33,44,55]}, order: [:b, :c],
|
1051
|
+
index: [:one, :two, :three, :four, :five]))
|
1051
1052
|
end
|
1052
1053
|
end
|
1053
1054
|
end
|
@@ -1056,7 +1057,7 @@ describe Daru::DataFrame do
|
|
1056
1057
|
it "deletes the specified row" do
|
1057
1058
|
@data_frame.delete_row :three
|
1058
1059
|
|
1059
|
-
expect(@data_frame).to eq(Daru::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
|
1060
|
+
expect(@data_frame).to eq(Daru::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
|
1060
1061
|
c: [11,22,44,55]}, order: [:a, :b, :c], index: [:one, :two, :four, :five]))
|
1061
1062
|
end
|
1062
1063
|
end
|
@@ -1064,8 +1065,8 @@ describe Daru::DataFrame do
|
|
1064
1065
|
context "#keep_row_if" do
|
1065
1066
|
pending "changing row from under the iterator trips this"
|
1066
1067
|
it "keeps row if block evaluates to true" do
|
1067
|
-
df = Daru::DataFrame.new({b: [10,12,20,23,30], a: [50,30,30,1,5],
|
1068
|
-
c: [10,20,30,40,50]}, order: [:a, :b, :c],
|
1068
|
+
df = Daru::DataFrame.new({b: [10,12,20,23,30], a: [50,30,30,1,5],
|
1069
|
+
c: [10,20,30,40,50]}, order: [:a, :b, :c],
|
1069
1070
|
index: [:one, :two, :three, :four, :five])
|
1070
1071
|
|
1071
1072
|
df.keep_row_if do |row|
|
@@ -1081,16 +1082,16 @@ describe Daru::DataFrame do
|
|
1081
1082
|
vector == [1,2,3,4,5].dv(nil, [:one, :two, :three, :four, :five])
|
1082
1083
|
end
|
1083
1084
|
|
1084
|
-
expect(@data_frame).to eq(Daru::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
|
1085
|
+
expect(@data_frame).to eq(Daru::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
|
1085
1086
|
index: [:one, :two, :three, :four, :five]))
|
1086
1087
|
end
|
1087
1088
|
end
|
1088
1089
|
|
1089
1090
|
context "#filter_field" do
|
1090
1091
|
before do
|
1091
|
-
@df = Daru::DataFrame.new({
|
1092
|
-
:id => Daru::Vector.new([1, 2, 3, 4, 5]),
|
1093
|
-
:name => Daru::Vector.new(%w(Alex Claude Peter Franz George)),
|
1092
|
+
@df = Daru::DataFrame.new({
|
1093
|
+
:id => Daru::Vector.new([1, 2, 3, 4, 5]),
|
1094
|
+
:name => Daru::Vector.new(%w(Alex Claude Peter Franz George)),
|
1094
1095
|
:age => Daru::Vector.new([20, 23, 25, 27, 5]),
|
1095
1096
|
:city => Daru::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
1096
1097
|
:a1 => Daru::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
|
@@ -1139,10 +1140,10 @@ describe Daru::DataFrame do
|
|
1139
1140
|
expect(arry).to eq(
|
1140
1141
|
[
|
1141
1142
|
[
|
1142
|
-
{a: 1, b: 11, c: 11},
|
1143
|
+
{a: 1, b: 11, c: 11},
|
1143
1144
|
{a: 2, b: 12, c: 22},
|
1144
1145
|
{a: 3, b: 13, c: 33},
|
1145
|
-
{a: 4, b: 14, c: 44},
|
1146
|
+
{a: 4, b: 14, c: 44},
|
1146
1147
|
{a: 5, b: 15, c: 55}
|
1147
1148
|
],
|
1148
1149
|
[
|
@@ -1159,13 +1160,13 @@ describe Daru::DataFrame do
|
|
1159
1160
|
|
1160
1161
|
context "#to_hash" do
|
1161
1162
|
it "converts to a hash" do
|
1162
|
-
expect(@data_frame.to_hash).to eq(
|
1163
|
+
expect(@data_frame.to_hash).to eq(
|
1163
1164
|
{
|
1164
|
-
a: Daru::Vector.new([1,2,3,4,5],
|
1165
|
-
index: [:one, :two, :three, :four, :five]),
|
1166
|
-
b: Daru::Vector.new([11,12,13,14,15],
|
1165
|
+
a: Daru::Vector.new([1,2,3,4,5],
|
1166
|
+
index: [:one, :two, :three, :four, :five]),
|
1167
|
+
b: Daru::Vector.new([11,12,13,14,15],
|
1167
1168
|
index: [:one, :two, :three, :four, :five]),
|
1168
|
-
c: Daru::Vector.new([11,22,33,44,55],
|
1169
|
+
c: Daru::Vector.new([11,22,33,44,55],
|
1169
1170
|
index: [:one, :two, :three, :four, :five])
|
1170
1171
|
}
|
1171
1172
|
)
|
@@ -1193,7 +1194,7 @@ describe Daru::DataFrame do
|
|
1193
1194
|
ans = @df.sort([:a], by: { a: a_sorter })
|
1194
1195
|
|
1195
1196
|
expect(ans).to eq(
|
1196
|
-
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1,-2,3], c: ['aaa','aa','aaaaa','aaaaaa','a','aaaa']},
|
1197
|
+
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1,-2,3], c: ['aaa','aa','aaaaa','aaaaaa','a','aaaa']},
|
1197
1198
|
index: [2,1,4,5,0,3])
|
1198
1199
|
)
|
1199
1200
|
expect(ans).to_not eq(@df)
|
@@ -1206,9 +1207,9 @@ describe Daru::DataFrame do
|
|
1206
1207
|
index: [2,1,0,5,4,3])
|
1207
1208
|
)
|
1208
1209
|
expect(ans).to_not eq(@df)
|
1209
|
-
end
|
1210
|
+
end
|
1210
1211
|
end
|
1211
|
-
|
1212
|
+
|
1212
1213
|
context Daru::MultiIndex do
|
1213
1214
|
pending
|
1214
1215
|
end
|
@@ -1217,7 +1218,7 @@ describe Daru::DataFrame do
|
|
1217
1218
|
context "#sort!" do
|
1218
1219
|
context Daru::Index do
|
1219
1220
|
before :each do
|
1220
|
-
@df = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1],
|
1221
|
+
@df = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1],
|
1221
1222
|
c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
|
1222
1223
|
end
|
1223
1224
|
|
@@ -1225,7 +1226,7 @@ describe Daru::DataFrame do
|
|
1225
1226
|
a_sorter = lambda { |a,b| a <=> b }
|
1226
1227
|
|
1227
1228
|
expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
|
1228
|
-
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1,-2,3],
|
1229
|
+
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1,-2,3],
|
1229
1230
|
c: ['aaa','aa','aaaaa','aaaaaa','a','aaaa']}, index: [2,1,4,5,0,3])
|
1230
1231
|
)
|
1231
1232
|
end
|
@@ -1253,14 +1254,14 @@ describe Daru::DataFrame do
|
|
1253
1254
|
|
1254
1255
|
it "sorts many vectors" do
|
1255
1256
|
d = Daru::DataFrame.new({a: [1,1,1,222,44,5,5,544], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
|
1256
|
-
|
1257
|
+
|
1257
1258
|
expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
|
1258
1259
|
Daru::DataFrame.new({a: [544,222,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
|
1259
1260
|
index: [7,3,4,6,5,0,1,2])
|
1260
1261
|
)
|
1261
1262
|
end
|
1262
1263
|
end
|
1263
|
-
|
1264
|
+
|
1264
1265
|
context Daru::MultiIndex do
|
1265
1266
|
pending
|
1266
1267
|
it "sorts the DataFrame when specified full tuple" do
|
@@ -1322,7 +1323,7 @@ describe Daru::DataFrame do
|
|
1322
1323
|
b: [11,22,33,44,55],
|
1323
1324
|
c: %w(a b c d e)
|
1324
1325
|
})
|
1325
|
-
|
1326
|
+
|
1326
1327
|
ans = df.reindex(Daru::Index.new([1,3,0,8,2]))
|
1327
1328
|
expect(ans).to eq(Daru::DataFrame.new({
|
1328
1329
|
a: [2,4,1,nil,3],
|
@@ -1352,9 +1353,9 @@ describe Daru::DataFrame do
|
|
1352
1353
|
|
1353
1354
|
context "#to_matrix" do
|
1354
1355
|
before do
|
1355
|
-
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
1356
|
-
c: [11,22,33,44,55], d: [5,4,nil,2,1], e: ['this', 'has', 'string','data','too']},
|
1357
|
-
order: [:a, :b, :c,:d,:e],
|
1356
|
+
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
1357
|
+
c: [11,22,33,44,55], d: [5,4,nil,2,1], e: ['this', 'has', 'string','data','too']},
|
1358
|
+
order: [:a, :b, :c,:d,:e],
|
1358
1359
|
index: [:one, :two, :three, :four, :five])
|
1359
1360
|
end
|
1360
1361
|
|
@@ -1371,9 +1372,9 @@ describe Daru::DataFrame do
|
|
1371
1372
|
|
1372
1373
|
context "#to_nmatrix" do
|
1373
1374
|
before do
|
1374
|
-
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
1375
|
-
c: [11,22,33,44,55], d: [5,4,nil,2,1], e: ['this', 'has', 'string','data','too']},
|
1376
|
-
order: [:a, :b, :c,:d,:e],
|
1375
|
+
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
1376
|
+
c: [11,22,33,44,55], d: [5,4,nil,2,1], e: ['this', 'has', 'string','data','too']},
|
1377
|
+
order: [:a, :b, :c,:d,:e],
|
1377
1378
|
index: [:one, :two, :three, :four, :five])
|
1378
1379
|
end
|
1379
1380
|
|
@@ -1383,7 +1384,7 @@ describe Daru::DataFrame do
|
|
1383
1384
|
2,12,22,
|
1384
1385
|
3,13,33,
|
1385
1386
|
4,14,44,
|
1386
|
-
5,15,55]
|
1387
|
+
5,15,55]
|
1387
1388
|
))
|
1388
1389
|
end
|
1389
1390
|
end
|
@@ -1392,10 +1393,10 @@ describe Daru::DataFrame do
|
|
1392
1393
|
context Daru::Index do
|
1393
1394
|
it "transposes a DataFrame including row and column indexing" do
|
1394
1395
|
expect(@data_frame.transpose).to eq(Daru::DataFrame.new({
|
1395
|
-
one: [1,11,11],
|
1396
|
-
two: [2,12,22],
|
1397
|
-
three: [3,13,33],
|
1398
|
-
four: [4,14,44],
|
1396
|
+
one: [1,11,11],
|
1397
|
+
two: [2,12,22],
|
1398
|
+
three: [3,13,33],
|
1399
|
+
four: [4,14,44],
|
1399
1400
|
five: [5,15,55]
|
1400
1401
|
}, index: [:a, :b, :c],
|
1401
1402
|
order: [:one, :two, :three, :four, :five])
|
@@ -1406,9 +1407,9 @@ describe Daru::DataFrame do
|
|
1406
1407
|
context Daru::MultiIndex do
|
1407
1408
|
it "transposes a DataFrame including row and column indexing" do
|
1408
1409
|
expect(@df_mi.transpose).to eq(Daru::DataFrame.new([
|
1409
|
-
@vector_arry1,
|
1410
|
-
@vector_arry2,
|
1411
|
-
@vector_arry1,
|
1410
|
+
@vector_arry1,
|
1411
|
+
@vector_arry2,
|
1412
|
+
@vector_arry1,
|
1412
1413
|
@vector_arry2].transpose, index: @order_mi, order: @multi_index))
|
1413
1414
|
end
|
1414
1415
|
end
|
@@ -1417,7 +1418,7 @@ describe Daru::DataFrame do
|
|
1417
1418
|
context "#pivot_table" do
|
1418
1419
|
before do
|
1419
1420
|
@df = Daru::DataFrame.new({
|
1420
|
-
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
|
1421
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
|
1421
1422
|
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
1422
1423
|
c: ['small','large','large','small','small','large','small','large','small'],
|
1423
1424
|
d: [1,2,2,3,3,4,5,6,7],
|
@@ -1434,7 +1435,7 @@ describe Daru::DataFrame do
|
|
1434
1435
|
|
1435
1436
|
it "creates row index as per (double) index argument and default aggregates to mean" do
|
1436
1437
|
agg_mi = Daru::MultiIndex.from_tuples(
|
1437
|
-
[
|
1438
|
+
[
|
1438
1439
|
['bar', 'large'],
|
1439
1440
|
['bar', 'small'],
|
1440
1441
|
['foo', 'large'],
|
@@ -1446,7 +1447,7 @@ describe Daru::DataFrame do
|
|
1446
1447
|
e: [10.0, 12.0, 4.0, 4.67]
|
1447
1448
|
}, index: agg_mi))
|
1448
1449
|
end
|
1449
|
-
|
1450
|
+
|
1450
1451
|
it "creates row and vector index as per (single) index and (single) vectors args" do
|
1451
1452
|
agg_vectors = Daru::MultiIndex.from_tuples([
|
1452
1453
|
[:d, 'one'],
|
@@ -1460,7 +1461,7 @@ describe Daru::DataFrame do
|
|
1460
1461
|
['foo']
|
1461
1462
|
]
|
1462
1463
|
)
|
1463
|
-
|
1464
|
+
|
1464
1465
|
expect(@df.pivot_table(index: [:a], vectors: [:b]).round(2)).to eq(
|
1465
1466
|
Daru::DataFrame.new(
|
1466
1467
|
[
|
@@ -1596,7 +1597,7 @@ describe Daru::DataFrame do
|
|
1596
1597
|
df.pivot_table(index: [:a])
|
1597
1598
|
}.to raise_error
|
1598
1599
|
end
|
1599
|
-
|
1600
|
+
|
1600
1601
|
it "raises error if atleast a row index is not specified" do
|
1601
1602
|
expect {
|
1602
1603
|
@df.pivot_table
|
@@ -1630,7 +1631,7 @@ describe Daru::DataFrame do
|
|
1630
1631
|
expect(@df_mi.summary.match("#{@df_mi.name}")).to_not eq(nil)
|
1631
1632
|
end
|
1632
1633
|
end
|
1633
|
-
|
1634
|
+
|
1634
1635
|
context "#to_gsl" do
|
1635
1636
|
it "converts to GSL::Matrix" do
|
1636
1637
|
rows = [[1,2,3,4,5],[11,12,13,14,15],[11,22,33,44,55]].transpose
|
@@ -1655,7 +1656,7 @@ describe Daru::DataFrame do
|
|
1655
1656
|
Daru::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
|
1656
1657
|
|
1657
1658
|
ds3 = Daru::DataFrame.new({ :a => e })
|
1658
|
-
exp = Daru::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
|
1659
|
+
exp = Daru::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
|
1659
1660
|
order: [:a_1, :b, :a_2])
|
1660
1661
|
|
1661
1662
|
expect(ds1.merge(ds3)).to eq(exp)
|
@@ -1703,7 +1704,7 @@ describe Daru::DataFrame do
|
|
1703
1704
|
b1 = Daru::Vector.new [nil, nil, 1, 1, 1, 2]
|
1704
1705
|
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
|
1705
1706
|
c = Daru::Vector.new [nil, 2, 4, 2, 2, 2]
|
1706
|
-
df = Daru::DataFrame.new({
|
1707
|
+
df = Daru::DataFrame.new({
|
1707
1708
|
:a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
|
1708
1709
|
|
1709
1710
|
expect(df.missing_values_rows).to eq(Daru::Vector.new [2, 3, 0, 1, 0, 1])
|
@@ -1763,12 +1764,12 @@ describe Daru::DataFrame do
|
|
1763
1764
|
|
1764
1765
|
context "#add_vectors_by_split_recode" do
|
1765
1766
|
before do
|
1766
|
-
@ds = Daru::DataFrame.new({
|
1767
|
-
:id => Daru::Vector.new([1, 2, 3, 4, 5]),
|
1768
|
-
:name => Daru::Vector.new(%w(Alex Claude Peter Franz George)),
|
1767
|
+
@ds = Daru::DataFrame.new({
|
1768
|
+
:id => Daru::Vector.new([1, 2, 3, 4, 5]),
|
1769
|
+
:name => Daru::Vector.new(%w(Alex Claude Peter Franz George)),
|
1769
1770
|
:age => Daru::Vector.new([20, 23, 25, 27, 5]),
|
1770
1771
|
:city => Daru::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
1771
|
-
:a1 => Daru::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
|
1772
|
+
:a1 => Daru::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
|
1772
1773
|
order: [:id, :name, :age, :city, :a1])
|
1773
1774
|
end
|
1774
1775
|
|
@@ -1783,12 +1784,12 @@ describe Daru::DataFrame do
|
|
1783
1784
|
|
1784
1785
|
context "#add_vectors_by_split" do
|
1785
1786
|
before do
|
1786
|
-
@ds = Daru::DataFrame.new({
|
1787
|
-
:id => Daru::Vector.new([1, 2, 3, 4, 5]),
|
1788
|
-
:name => Daru::Vector.new(%w(Alex Claude Peter Franz George)),
|
1787
|
+
@ds = Daru::DataFrame.new({
|
1788
|
+
:id => Daru::Vector.new([1, 2, 3, 4, 5]),
|
1789
|
+
:name => Daru::Vector.new(%w(Alex Claude Peter Franz George)),
|
1789
1790
|
:age => Daru::Vector.new([20, 23, 25, 27, 5]),
|
1790
1791
|
:city => Daru::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
1791
|
-
:a1 => Daru::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c'])
|
1792
|
+
:a1 => Daru::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c'])
|
1792
1793
|
}, order: [:id, :name, :age, :city, :a1])
|
1793
1794
|
end
|
1794
1795
|
|
@@ -1814,14 +1815,14 @@ describe Daru::DataFrame do
|
|
1814
1815
|
v2 = Daru::Vector.new [4, 3, 2, 1]
|
1815
1816
|
v3 = Daru::Vector.new [10, 20, 30, 40]
|
1816
1817
|
v4 = Daru::Vector.new %w(a b a b)
|
1817
|
-
@df = Daru::DataFrame.new({
|
1818
|
-
:v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :id => name
|
1818
|
+
@df = Daru::DataFrame.new({
|
1819
|
+
:v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :id => name
|
1819
1820
|
}, order: [:v1, :v2, :v3, :v4, :id])
|
1820
1821
|
end
|
1821
1822
|
|
1822
1823
|
it "correctly verifies data as per the block" do
|
1823
1824
|
# Correct
|
1824
|
-
t1 = create_test('If v4=a, v1 odd') do |r|
|
1825
|
+
t1 = create_test('If v4=a, v1 odd') do |r|
|
1825
1826
|
r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
|
1826
1827
|
end
|
1827
1828
|
t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
|
@@ -1870,7 +1871,7 @@ describe Daru::DataFrame do
|
|
1870
1871
|
ev_a = Daru::Vector.new [0, 0, 0]
|
1871
1872
|
ev_b = Daru::Vector.new [1, 1, 0]
|
1872
1873
|
ev_c = Daru::Vector.new [0, 1, 1]
|
1873
|
-
df2 = Daru::DataFrame.new({
|
1874
|
+
df2 = Daru::DataFrame.new({
|
1874
1875
|
:_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c })
|
1875
1876
|
|
1876
1877
|
expect(df2).to eq(df)
|
@@ -1885,8 +1886,8 @@ describe Daru::DataFrame do
|
|
1885
1886
|
['3', 'alfred', nil, nil, nil, nil, nil, nil]
|
1886
1887
|
]
|
1887
1888
|
|
1888
|
-
df = Daru::DataFrame.rows(rows,
|
1889
|
-
order: ['id', 'name', 'car_color1', 'car_value1', 'car_color2',
|
1889
|
+
df = Daru::DataFrame.rows(rows,
|
1890
|
+
order: ['id', 'name', 'car_color1', 'car_value1', 'car_color2',
|
1890
1891
|
'car_value2', 'car_color3', 'car_value3'])
|
1891
1892
|
|
1892
1893
|
ids = Daru::Vector.new %w(1 1 2 2 2)
|
@@ -1905,7 +1906,7 @@ describe Daru::DataFrame do
|
|
1905
1906
|
context "#any?" do
|
1906
1907
|
before do
|
1907
1908
|
@df = Daru::DataFrame.new({
|
1908
|
-
a: [1,2,3,4,5],
|
1909
|
+
a: [1,2,3,4,5],
|
1909
1910
|
b: [10,20,30,40,50],
|
1910
1911
|
c: [11,22,33,44,55]})
|
1911
1912
|
end
|
@@ -1930,11 +1931,11 @@ describe Daru::DataFrame do
|
|
1930
1931
|
context "#all?" do
|
1931
1932
|
before do
|
1932
1933
|
@df = Daru::DataFrame.new({
|
1933
|
-
a: [1,2,3,4,5],
|
1934
|
+
a: [1,2,3,4,5],
|
1934
1935
|
b: [10,20,30,40,50],
|
1935
1936
|
c: [11,22,33,44,55]})
|
1936
1937
|
end
|
1937
|
-
|
1938
|
+
|
1938
1939
|
it "returns true if all of the vectors satisfy condition" do
|
1939
1940
|
expect(@df.all? { |v| v.mean < 40 }).to eq(true)
|
1940
1941
|
end
|
@@ -2087,4 +2088,34 @@ describe Daru::DataFrame do
|
|
2087
2088
|
}.to raise_error(ArgumentError)
|
2088
2089
|
end
|
2089
2090
|
end
|
2090
|
-
|
2091
|
+
|
2092
|
+
context "#concat" do
|
2093
|
+
before do
|
2094
|
+
@df1 = Daru::DataFrame.new({
|
2095
|
+
a: [1, 2, 3]
|
2096
|
+
})
|
2097
|
+
|
2098
|
+
@df2 = Daru::DataFrame.new({
|
2099
|
+
a: [4, 5, 6]
|
2100
|
+
})
|
2101
|
+
end
|
2102
|
+
|
2103
|
+
it 'does not modify the original dataframes' do
|
2104
|
+
df1_a = @df1[:a].to_a.dup
|
2105
|
+
df2_a = @df2[:a].to_a.dup
|
2106
|
+
|
2107
|
+
df_concat = @df1.concat @df2
|
2108
|
+
expect(@df1[:a].to_a).to eq df1_a
|
2109
|
+
expect(@df2[:a].to_a).to eq df2_a
|
2110
|
+
end
|
2111
|
+
|
2112
|
+
it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
|
2113
|
+
df1_a = @df1[:a].to_a.dup
|
2114
|
+
df2_a = @df2[:a].to_a.dup
|
2115
|
+
|
2116
|
+
df_concat = @df1.concat @df2
|
2117
|
+
expect(df_concat[:a].to_a).to eq df1_a + df2_a
|
2118
|
+
end
|
2119
|
+
|
2120
|
+
end
|
2121
|
+
end if mri?
|