daru 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
data/lib/daru/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Daru
|
2
|
-
VERSION =
|
3
|
-
end
|
2
|
+
VERSION = '0.1.3'.freeze
|
3
|
+
end
|
data/spec/core/group_by_spec.rb
CHANGED
@@ -41,6 +41,10 @@ describe Daru::Core::GroupBy do
|
|
41
41
|
it 'groups by nil values' do
|
42
42
|
expect(@df.group_by(:w_nils).groups[[nil]]).to eq([1,3,4])
|
43
43
|
end
|
44
|
+
|
45
|
+
it "uses a multi-index when nils are part of the grouping keys" do
|
46
|
+
expect(@df.group_by(:a, :w_nils).send(:multi_indexed_grouping?)).to be true
|
47
|
+
end
|
44
48
|
end
|
45
49
|
|
46
50
|
context "#initialize" do
|
@@ -85,6 +89,11 @@ describe Daru::Core::GroupBy do
|
|
85
89
|
it "returns a vector containing the size of each group" do
|
86
90
|
expect(@dl_group.size).to eq(Daru::Vector.new([1,1,1,2,1,2], index: @dl_multi_index))
|
87
91
|
end
|
92
|
+
|
93
|
+
it "returns an empty vector if given an empty dataframe" do
|
94
|
+
df = Daru::DataFrame.new({ a: [], b: [] })
|
95
|
+
expect(df.group_by(:a).size).to eq(Daru::Vector.new([]))
|
96
|
+
end
|
88
97
|
end
|
89
98
|
|
90
99
|
context "#get_group" do
|
@@ -336,4 +345,11 @@ describe Daru::Core::GroupBy do
|
|
336
345
|
context "#[]" do
|
337
346
|
pending
|
338
347
|
end
|
348
|
+
|
349
|
+
context "#reduce" do
|
350
|
+
it "returns a vector that concatenates strings in a group" do
|
351
|
+
string_concat = lambda { |result, row| result += row[:b] }
|
352
|
+
expect(@sl_group.reduce('', &string_concat)).to eq(Daru::Vector.new(['onethreetwo', 'onetwotwoonethree'], index: @sl_index))
|
353
|
+
end
|
354
|
+
end
|
339
355
|
end
|
data/spec/core/merge_spec.rb
CHANGED
@@ -15,51 +15,67 @@ describe Daru::DataFrame do
|
|
15
15
|
:id => [1,1,1,1],
|
16
16
|
:name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
17
17
|
})
|
18
|
+
@empty = Daru::DataFrame.new({
|
19
|
+
:id => [],
|
20
|
+
:name => []
|
21
|
+
})
|
18
22
|
end
|
19
23
|
|
20
24
|
it "performs an inner join of two dataframes" do
|
21
25
|
answer = Daru::DataFrame.new({
|
22
|
-
:id_1 => [1
|
23
|
-
:name => ['
|
24
|
-
:id_2 => [2
|
26
|
+
:id_1 => [3,1],
|
27
|
+
:name => ['Ninja', 'Pirate'],
|
28
|
+
:id_2 => [4,2]
|
25
29
|
}, order: [:id_1, :name, :id_2])
|
26
30
|
expect(@left.join(@right, how: :inner, on: [:name])).to eq(answer)
|
27
31
|
end
|
28
32
|
|
29
33
|
it "performs an inner join of two dataframes that has one to many mapping" do
|
30
34
|
answer = Daru::DataFrame.new({
|
31
|
-
:id => [1,1,1,1],
|
32
35
|
:name_1 => ['Pirate', 'Pirate', 'Pirate', 'Pirate'],
|
36
|
+
:id => [1,1,1,1],
|
33
37
|
:name_2 => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
34
|
-
}, order: [:
|
38
|
+
}, order: [:name_1, :id, :name_2])
|
35
39
|
expect(@left.join(@right_many, how: :inner, on: [:id])).to eq(answer)
|
36
40
|
end
|
37
41
|
|
38
42
|
it "performs a full outer join" do
|
39
43
|
answer = Daru::DataFrame.new({
|
40
|
-
:id_1 => [
|
41
|
-
:name => [
|
42
|
-
:id_2 => [
|
44
|
+
:id_1 => [nil,2,3,1,nil,4],
|
45
|
+
:name => ["Darth Vader", "Monkey", "Ninja", "Pirate", "Rutabaga", "Spaghetti"],
|
46
|
+
:id_2 => [3,nil,4,2,1,nil]
|
43
47
|
}, order: [:id_1, :name, :id_2])
|
44
48
|
expect(@left.join(@right, how: :outer, on: [:name])).to eq(answer)
|
45
49
|
end
|
46
50
|
|
47
51
|
it "performs a left outer join", focus: true do
|
48
52
|
answer = Daru::DataFrame.new({
|
49
|
-
:id_1 => [
|
50
|
-
:name => [
|
51
|
-
:id_2 => [
|
53
|
+
:id_1 => [2,3,1,4],
|
54
|
+
:name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
|
55
|
+
:id_2 => [nil,4,2,nil]
|
52
56
|
}, order: [:id_1, :name, :id_2])
|
53
57
|
expect(@left.join(@right, how: :left, on: [:name])).to eq(answer)
|
54
58
|
end
|
55
59
|
|
60
|
+
it "performs a left join with an empty dataframe" do
|
61
|
+
|
62
|
+
answer = Daru::DataFrame.new({
|
63
|
+
:id_1 => [2,3,1,4],
|
64
|
+
:name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
|
65
|
+
:id_2 => [nil,nil,nil,nil]
|
66
|
+
}, order: [:id_1, :name, :id_2])
|
67
|
+
|
68
|
+
expect(@left.join(@empty, how: :left, on: [:name])).to eq(answer)
|
69
|
+
end
|
70
|
+
|
56
71
|
it "performs a right outer join" do
|
57
72
|
answer = Daru::DataFrame.new({
|
58
|
-
:id_1 => [nil,1,nil
|
59
|
-
:name => [
|
60
|
-
:id_2 => [
|
73
|
+
:id_1 => [nil,3,1,nil],
|
74
|
+
:name => ["Darth Vader", "Ninja", "Pirate", "Rutabaga"],
|
75
|
+
:id_2 => [3,4,2,1]
|
61
76
|
}, order: [:id_1, :name, :id_2])
|
62
77
|
expect(@left.join(@right, how: :right, on: [:name])).to eq(answer)
|
63
78
|
end
|
79
|
+
|
64
80
|
end
|
65
81
|
end
|
data/spec/dataframe_spec.rb
CHANGED
@@ -133,14 +133,17 @@ describe Daru::DataFrame do
|
|
133
133
|
end
|
134
134
|
|
135
135
|
it "initializes from a Hash of Vectors" do
|
136
|
-
|
137
|
-
|
138
|
-
|
136
|
+
va = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 }, index: [:one, :two, :three, :four, :five])
|
137
|
+
vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
|
138
|
+
|
139
|
+
df = Daru::DataFrame.new({ b: vb, a: va }, order: [:a, :b], index: [:one, :two, :three, :four, :five])
|
139
140
|
|
140
141
|
expect(df.index) .to eq(Daru::Index.new [:one, :two, :three, :four, :five])
|
141
142
|
expect(df.vectors).to eq(Daru::Index.new [:a, :b])
|
142
143
|
expect(df.a.class).to eq(Daru::Vector)
|
143
144
|
expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
|
145
|
+
expect(df.a.metadata).to eq({ cdc_type: 2 })
|
146
|
+
expect(df.b.metadata).to eq({})
|
144
147
|
end
|
145
148
|
|
146
149
|
it "initializes from an Array of Hashes" do
|
@@ -419,6 +422,13 @@ describe Daru::DataFrame do
|
|
419
422
|
expect(@df[:a, :b]).to eq(temp)
|
420
423
|
end
|
421
424
|
|
425
|
+
it "returns a DataFrame with metadata" do
|
426
|
+
@df[:a].metadata = "alpha"
|
427
|
+
@df[:b].metadata = "beta"
|
428
|
+
subset_df = @df[:a, :b]
|
429
|
+
expect([:a, :b].map { |v| subset_df[v].metadata }).to eq(["alpha", "beta"])
|
430
|
+
end
|
431
|
+
|
422
432
|
it "accesses vector with Integer index" do
|
423
433
|
expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
|
424
434
|
end
|
@@ -455,7 +465,7 @@ describe Daru::DataFrame do
|
|
455
465
|
end
|
456
466
|
|
457
467
|
it "returns a Vector if the last level of MultiIndex is tracked" do
|
458
|
-
expect(@df_mi[:a, :one]).to eq(
|
468
|
+
expect(@df_mi[:a, :one, :bar]).to eq(
|
459
469
|
Daru::Vector.new(@vector_arry1, index: @multi_index))
|
460
470
|
end
|
461
471
|
end
|
@@ -521,6 +531,31 @@ describe Daru::DataFrame do
|
|
521
531
|
}.to raise_error
|
522
532
|
end
|
523
533
|
|
534
|
+
it "assigns correct name given empty dataframe" do
|
535
|
+
df_empty = Daru::DataFrame.new({})
|
536
|
+
df_empty[:a] = 1..5
|
537
|
+
df_empty[:b] = 1..5
|
538
|
+
|
539
|
+
expect(df_empty[:a].name).to equal(:a)
|
540
|
+
expect(df_empty[:b].name).to equal(:b)
|
541
|
+
end
|
542
|
+
|
543
|
+
it "copies metadata when the target is a vector" do
|
544
|
+
vec = Daru::Vector.new(1.upto(@df.size), index: @df.index, metadata: { cdc_type: 2 })
|
545
|
+
@df[:woo] = vec.dup
|
546
|
+
expect(@df[:woo].metadata).to eq vec.metadata
|
547
|
+
end
|
548
|
+
|
549
|
+
it "doesn't delete metadata when the source is a dataframe with empty vectors" do
|
550
|
+
empty_df = Daru::DataFrame.new({
|
551
|
+
a: Daru::Vector.new([], metadata: 'alpha'),
|
552
|
+
b: Daru::Vector.new([], metadata: 'beta'),
|
553
|
+
})
|
554
|
+
|
555
|
+
empty_df[:c] = Daru::Vector.new(1.upto(3))
|
556
|
+
expect(empty_df[:a].metadata).to eq 'alpha'
|
557
|
+
end
|
558
|
+
|
524
559
|
it "appends multiple vectors at a time" do
|
525
560
|
# TODO
|
526
561
|
end
|
@@ -561,6 +596,13 @@ describe Daru::DataFrame do
|
|
561
596
|
|
562
597
|
expect(@df_mi).to eq(answer)
|
563
598
|
end
|
599
|
+
|
600
|
+
it "assigns correct name given empty dataframe" do
|
601
|
+
df_empty = Daru::DataFrame.new([], index: @multi_index, order: @order_mi)
|
602
|
+
df_empty[:c, :one, :bar] = 1..12
|
603
|
+
|
604
|
+
expect(df_empty[:c, :one, :bar].name).to eq "conebar"
|
605
|
+
end
|
564
606
|
end
|
565
607
|
end
|
566
608
|
|
@@ -848,6 +890,16 @@ describe Daru::DataFrame do
|
|
848
890
|
expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
|
849
891
|
expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
|
850
892
|
end
|
893
|
+
|
894
|
+
it "original dataframe remains unaffected when operations are applied
|
895
|
+
on cloned data frame" do
|
896
|
+
original = @data_frame.dup
|
897
|
+
cloned = @data_frame.clone
|
898
|
+
cloned.delete_vector :a
|
899
|
+
|
900
|
+
expect(@data_frame).to eq(original)
|
901
|
+
end
|
902
|
+
|
851
903
|
end
|
852
904
|
|
853
905
|
context "#clone_structure" do
|
@@ -923,6 +975,17 @@ describe Daru::DataFrame do
|
|
923
975
|
@ans_rows = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
|
924
976
|
c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
|
925
977
|
index: [:one, :two, :three, :four, :five])
|
978
|
+
|
979
|
+
@data_frame_date_time = @data_frame.dup
|
980
|
+
@data_frame_date_time.index = Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
|
981
|
+
|
982
|
+
@ans_vector_date_time = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
|
983
|
+
c: [21,32,43,54,65]}, order: [:a, :b, :c],
|
984
|
+
index: Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
|
985
|
+
|
986
|
+
@ans_rows_date_time = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
|
987
|
+
c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
|
988
|
+
index: Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
|
926
989
|
end
|
927
990
|
|
928
991
|
it "maps over the vectors of a DataFrame and returns a DataFrame" do
|
@@ -941,6 +1004,24 @@ describe Daru::DataFrame do
|
|
941
1004
|
|
942
1005
|
expect(ret).to eq(@ans_rows)
|
943
1006
|
end
|
1007
|
+
|
1008
|
+
it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
|
1009
|
+
ret = @data_frame_date_time.recode do |vector|
|
1010
|
+
vector.map! { |e| e += 10}
|
1011
|
+
end
|
1012
|
+
|
1013
|
+
expect(ret).to eq(@ans_vector_date_time)
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
|
1017
|
+
ret = @data_frame_date_time.recode(:row) do |row|
|
1018
|
+
expect(row.class).to eq(Daru::Vector)
|
1019
|
+
row.map! { |e| e*e }
|
1020
|
+
end
|
1021
|
+
|
1022
|
+
expect(ret).to eq(@ans_rows_date_time)
|
1023
|
+
end
|
1024
|
+
|
944
1025
|
end
|
945
1026
|
|
946
1027
|
context "#collect" do
|
@@ -1053,6 +1134,18 @@ describe Daru::DataFrame do
|
|
1053
1134
|
end
|
1054
1135
|
end
|
1055
1136
|
|
1137
|
+
context "#delete_vectors" do
|
1138
|
+
context Daru::Index do
|
1139
|
+
it "deletes the specified vectors" do
|
1140
|
+
@data_frame.delete_vectors :a, :b
|
1141
|
+
|
1142
|
+
expect(@data_frame).to eq(Daru::DataFrame.new({
|
1143
|
+
c: [11,22,33,44,55]}, order: [:c],
|
1144
|
+
index: [:one, :two, :three, :four, :five]))
|
1145
|
+
end
|
1146
|
+
end
|
1147
|
+
end
|
1148
|
+
|
1056
1149
|
context "#delete_row" do
|
1057
1150
|
it "deletes the specified row" do
|
1058
1151
|
@data_frame.delete_row :three
|
@@ -1158,9 +1251,9 @@ describe Daru::DataFrame do
|
|
1158
1251
|
end
|
1159
1252
|
end
|
1160
1253
|
|
1161
|
-
context "#
|
1254
|
+
context "#to_h" do
|
1162
1255
|
it "converts to a hash" do
|
1163
|
-
expect(@data_frame.
|
1256
|
+
expect(@data_frame.to_h).to eq(
|
1164
1257
|
{
|
1165
1258
|
a: Daru::Vector.new([1,2,3,4,5],
|
1166
1259
|
index: [:one, :two, :three, :four, :five]),
|
@@ -1187,15 +1280,16 @@ describe Daru::DataFrame do
|
|
1187
1280
|
context Daru::Index do
|
1188
1281
|
before :each do
|
1189
1282
|
@df = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
|
1283
|
+
@df[:a].metadata = { cdc_type: 2 }
|
1190
1284
|
end
|
1191
1285
|
|
1192
1286
|
it "sorts according to given vector order (bang)" do
|
1193
|
-
a_sorter = lambda { |a
|
1287
|
+
a_sorter = lambda { |a| a }
|
1194
1288
|
ans = @df.sort([:a], by: { a: a_sorter })
|
1195
1289
|
|
1196
1290
|
expect(ans).to eq(
|
1197
|
-
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1
|
1198
|
-
index: [2,1,4,5,
|
1291
|
+
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
|
1292
|
+
index: [2,1,0,4,5,3])
|
1199
1293
|
)
|
1200
1294
|
expect(ans).to_not eq(@df)
|
1201
1295
|
end
|
@@ -1208,6 +1302,12 @@ describe Daru::DataFrame do
|
|
1208
1302
|
)
|
1209
1303
|
expect(ans).to_not eq(@df)
|
1210
1304
|
end
|
1305
|
+
|
1306
|
+
it "retains the vector metadata from the original dataframe" do
|
1307
|
+
ans = @df.sort([:a])
|
1308
|
+
expect(ans[:a].metadata).to eq({ cdc_type: 2 })
|
1309
|
+
end
|
1310
|
+
|
1211
1311
|
end
|
1212
1312
|
|
1213
1313
|
context Daru::MultiIndex do
|
@@ -1223,11 +1323,11 @@ describe Daru::DataFrame do
|
|
1223
1323
|
end
|
1224
1324
|
|
1225
1325
|
it "sorts according to given vector order (bang)" do
|
1226
|
-
a_sorter = lambda { |a
|
1326
|
+
a_sorter = lambda { |a| a }
|
1227
1327
|
|
1228
1328
|
expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
|
1229
|
-
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1
|
1230
|
-
c: ['aaa','aa','
|
1329
|
+
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
|
1330
|
+
c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
|
1231
1331
|
)
|
1232
1332
|
end
|
1233
1333
|
|
@@ -1260,6 +1360,68 @@ describe Daru::DataFrame do
|
|
1260
1360
|
index: [7,3,4,6,5,0,1,2])
|
1261
1361
|
)
|
1262
1362
|
end
|
1363
|
+
|
1364
|
+
it "places nils at the beginning when sorting ascedingly" do
|
1365
|
+
d = Daru::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
|
1366
|
+
|
1367
|
+
expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
|
1368
|
+
Daru::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
|
1369
|
+
index: [7,3,0,1,2,6,5,4])
|
1370
|
+
)
|
1371
|
+
end
|
1372
|
+
|
1373
|
+
it "places nils at the beginning when sorting decendingly" do
|
1374
|
+
d = Daru::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
|
1375
|
+
|
1376
|
+
expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
|
1377
|
+
Daru::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
|
1378
|
+
index: [7,3,4,6,5,0,1,2])
|
1379
|
+
)
|
1380
|
+
end
|
1381
|
+
|
1382
|
+
it "sorts vectors of non-numeric types with nils in ascending order" do
|
1383
|
+
non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
|
1384
|
+
c: ['aaa','aaa',nil,'baaa','xxx',nil]})
|
1385
|
+
|
1386
|
+
expect(non_numeric.sort!([:c], ascending: [true])).to eq(
|
1387
|
+
Daru::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
|
1388
|
+
c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
|
1389
|
+
index: [2, 5, 0, 1, 3, 4])
|
1390
|
+
)
|
1391
|
+
end
|
1392
|
+
|
1393
|
+
it "sorts vectors of non-numeric types with nils in descending order" do
|
1394
|
+
non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
|
1395
|
+
c: ['aaa','aaa',nil,'baaa','xxx',nil]})
|
1396
|
+
|
1397
|
+
expect(non_numeric.sort!([:c], ascending: [false])).to eq(
|
1398
|
+
Daru::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
|
1399
|
+
c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
|
1400
|
+
index: [2, 5, 4, 3, 0, 1])
|
1401
|
+
)
|
1402
|
+
end
|
1403
|
+
|
1404
|
+
it "sorts vectors with block provided and handle nils automatically" do
|
1405
|
+
non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
|
1406
|
+
c: ['aaa','aaa',nil,'baaa','xxx',nil]})
|
1407
|
+
|
1408
|
+
expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
|
1409
|
+
Daru::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
|
1410
|
+
c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
|
1411
|
+
index: [0, 3, 1, 2, 4, 5])
|
1412
|
+
)
|
1413
|
+
end
|
1414
|
+
|
1415
|
+
it "sorts vectors with block provided and nils handled manually" do
|
1416
|
+
non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
|
1417
|
+
c: ['aaa','aaa',nil,'baaa','xxx',nil]})
|
1418
|
+
|
1419
|
+
expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
|
1420
|
+
Daru::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
|
1421
|
+
c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
|
1422
|
+
index: [1, 2, 4, 5, 0, 3])
|
1423
|
+
)
|
1424
|
+
end
|
1263
1425
|
end
|
1264
1426
|
|
1265
1427
|
context Daru::MultiIndex do
|
@@ -1316,6 +1478,37 @@ describe Daru::DataFrame do
|
|
1316
1478
|
end
|
1317
1479
|
end
|
1318
1480
|
|
1481
|
+
context "#rename_vectors" do
|
1482
|
+
before do
|
1483
|
+
@df = Daru::DataFrame.new({
|
1484
|
+
a: [1,2,3,4,5],
|
1485
|
+
b: [11,22,33,44,55],
|
1486
|
+
c: %w(a b c d e)
|
1487
|
+
})
|
1488
|
+
end
|
1489
|
+
|
1490
|
+
it "renames vectors using a hash map" do
|
1491
|
+
@df.rename_vectors :a => :alpha, :c => :gamma
|
1492
|
+
expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
|
1493
|
+
end
|
1494
|
+
|
1495
|
+
it "overwrites vectors if the new name already exists" do
|
1496
|
+
saved_vector = @df[:a].dup
|
1497
|
+
|
1498
|
+
@df.rename_vectors :a => :b
|
1499
|
+
expect(@df.vectors.to_a).to eq([:b, :c])
|
1500
|
+
expect(@df[:b]).to eq saved_vector
|
1501
|
+
end
|
1502
|
+
|
1503
|
+
it "makes no changes if the old and new names are the same" do
|
1504
|
+
saved_vector = @df[:a].dup
|
1505
|
+
|
1506
|
+
@df.rename_vectors :a => :a
|
1507
|
+
expect(@df.vectors.to_a).to eq([:a, :b, :c])
|
1508
|
+
expect(@df[:a]).to eq saved_vector
|
1509
|
+
end
|
1510
|
+
end
|
1511
|
+
|
1319
1512
|
context "#reindex" do
|
1320
1513
|
it "re indexes and aligns accordingly" do
|
1321
1514
|
df = Daru::DataFrame.new({
|
@@ -1603,6 +1796,56 @@ describe Daru::DataFrame do
|
|
1603
1796
|
@df.pivot_table
|
1604
1797
|
}.to raise_error
|
1605
1798
|
end
|
1799
|
+
|
1800
|
+
it "aggregates when nils are present in value vector" do
|
1801
|
+
df = Daru::DataFrame.new({
|
1802
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
|
1803
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
1804
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
1805
|
+
d: [1,2,2,3,3,4,5,6,7],
|
1806
|
+
e: [2,nil,4,6,6,8,10,12,nil]
|
1807
|
+
})
|
1808
|
+
|
1809
|
+
expect(df.pivot_table index: [:a]).to eq(
|
1810
|
+
Daru::DataFrame.new({
|
1811
|
+
d: [5.0, 2.2, 7],
|
1812
|
+
e: [10.0, 4.5, nil]
|
1813
|
+
}, index: Daru::Index.new(['bar', 'foo', 'ice'])))
|
1814
|
+
end
|
1815
|
+
|
1816
|
+
it "works when nils are present in value vector" do
|
1817
|
+
df = Daru::DataFrame.new({
|
1818
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
|
1819
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
1820
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
1821
|
+
d: [1,2,2,3,3,4,5,6,7],
|
1822
|
+
e: [2,nil,4,6,6,8,10,12,nil]
|
1823
|
+
})
|
1824
|
+
|
1825
|
+
agg_vectors = Daru::MultiIndex.from_tuples(
|
1826
|
+
[
|
1827
|
+
[:e, 'one'],
|
1828
|
+
[:e, 'two']
|
1829
|
+
]
|
1830
|
+
)
|
1831
|
+
|
1832
|
+
agg_index = Daru::MultiIndex.from_tuples(
|
1833
|
+
[
|
1834
|
+
['bar'],
|
1835
|
+
['foo'],
|
1836
|
+
['ice']
|
1837
|
+
]
|
1838
|
+
)
|
1839
|
+
|
1840
|
+
expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
|
1841
|
+
Daru::DataFrame.new(
|
1842
|
+
[
|
1843
|
+
[9, 3, nil],
|
1844
|
+
[12, 6, nil]
|
1845
|
+
], order: agg_vectors, index: agg_index
|
1846
|
+
)
|
1847
|
+
)
|
1848
|
+
end
|
1606
1849
|
end
|
1607
1850
|
|
1608
1851
|
context "#shape" do
|
@@ -2092,11 +2335,13 @@ describe Daru::DataFrame do
|
|
2092
2335
|
context "#concat" do
|
2093
2336
|
before do
|
2094
2337
|
@df1 = Daru::DataFrame.new({
|
2095
|
-
a: [1, 2, 3]
|
2338
|
+
a: [1, 2, 3],
|
2339
|
+
b: [1, 2, 3]
|
2096
2340
|
})
|
2097
2341
|
|
2098
2342
|
@df2 = Daru::DataFrame.new({
|
2099
|
-
a: [4, 5, 6]
|
2343
|
+
a: [4, 5, 6],
|
2344
|
+
c: [4, 5, 6]
|
2100
2345
|
})
|
2101
2346
|
end
|
2102
2347
|
|
@@ -2117,5 +2362,14 @@ describe Daru::DataFrame do
|
|
2117
2362
|
expect(df_concat[:a].to_a).to eq df1_a + df2_a
|
2118
2363
|
end
|
2119
2364
|
|
2365
|
+
it 'fills in missing vectors with nils' do
|
2366
|
+
df1_b = @df1[:b].to_a.dup
|
2367
|
+
df2_c = @df2[:c].to_a.dup
|
2368
|
+
|
2369
|
+
df_concat = @df1.concat @df2
|
2370
|
+
expect(df_concat[:b].to_a).to eq df1_b + [nil] * @df2.size
|
2371
|
+
expect(df_concat[:c].to_a).to eq [nil] * @df1.size + df2_c
|
2372
|
+
end
|
2373
|
+
|
2120
2374
|
end
|
2121
2375
|
end if mri?
|