daru 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
data/lib/daru/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Daru
|
2
|
-
VERSION =
|
3
|
-
end
|
2
|
+
VERSION = '0.1.3'.freeze
|
3
|
+
end
|
data/spec/core/group_by_spec.rb
CHANGED
@@ -41,6 +41,10 @@ describe Daru::Core::GroupBy do
|
|
41
41
|
it 'groups by nil values' do
|
42
42
|
expect(@df.group_by(:w_nils).groups[[nil]]).to eq([1,3,4])
|
43
43
|
end
|
44
|
+
|
45
|
+
it "uses a multi-index when nils are part of the grouping keys" do
|
46
|
+
expect(@df.group_by(:a, :w_nils).send(:multi_indexed_grouping?)).to be true
|
47
|
+
end
|
44
48
|
end
|
45
49
|
|
46
50
|
context "#initialize" do
|
@@ -85,6 +89,11 @@ describe Daru::Core::GroupBy do
|
|
85
89
|
it "returns a vector containing the size of each group" do
|
86
90
|
expect(@dl_group.size).to eq(Daru::Vector.new([1,1,1,2,1,2], index: @dl_multi_index))
|
87
91
|
end
|
92
|
+
|
93
|
+
it "returns an empty vector if given an empty dataframe" do
|
94
|
+
df = Daru::DataFrame.new({ a: [], b: [] })
|
95
|
+
expect(df.group_by(:a).size).to eq(Daru::Vector.new([]))
|
96
|
+
end
|
88
97
|
end
|
89
98
|
|
90
99
|
context "#get_group" do
|
@@ -336,4 +345,11 @@ describe Daru::Core::GroupBy do
|
|
336
345
|
context "#[]" do
|
337
346
|
pending
|
338
347
|
end
|
348
|
+
|
349
|
+
context "#reduce" do
|
350
|
+
it "returns a vector that concatenates strings in a group" do
|
351
|
+
string_concat = lambda { |result, row| result += row[:b] }
|
352
|
+
expect(@sl_group.reduce('', &string_concat)).to eq(Daru::Vector.new(['onethreetwo', 'onetwotwoonethree'], index: @sl_index))
|
353
|
+
end
|
354
|
+
end
|
339
355
|
end
|
data/spec/core/merge_spec.rb
CHANGED
@@ -15,51 +15,67 @@ describe Daru::DataFrame do
|
|
15
15
|
:id => [1,1,1,1],
|
16
16
|
:name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
17
17
|
})
|
18
|
+
@empty = Daru::DataFrame.new({
|
19
|
+
:id => [],
|
20
|
+
:name => []
|
21
|
+
})
|
18
22
|
end
|
19
23
|
|
20
24
|
it "performs an inner join of two dataframes" do
|
21
25
|
answer = Daru::DataFrame.new({
|
22
|
-
:id_1 => [1
|
23
|
-
:name => ['
|
24
|
-
:id_2 => [2
|
26
|
+
:id_1 => [3,1],
|
27
|
+
:name => ['Ninja', 'Pirate'],
|
28
|
+
:id_2 => [4,2]
|
25
29
|
}, order: [:id_1, :name, :id_2])
|
26
30
|
expect(@left.join(@right, how: :inner, on: [:name])).to eq(answer)
|
27
31
|
end
|
28
32
|
|
29
33
|
it "performs an inner join of two dataframes that has one to many mapping" do
|
30
34
|
answer = Daru::DataFrame.new({
|
31
|
-
:id => [1,1,1,1],
|
32
35
|
:name_1 => ['Pirate', 'Pirate', 'Pirate', 'Pirate'],
|
36
|
+
:id => [1,1,1,1],
|
33
37
|
:name_2 => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
34
|
-
}, order: [:
|
38
|
+
}, order: [:name_1, :id, :name_2])
|
35
39
|
expect(@left.join(@right_many, how: :inner, on: [:id])).to eq(answer)
|
36
40
|
end
|
37
41
|
|
38
42
|
it "performs a full outer join" do
|
39
43
|
answer = Daru::DataFrame.new({
|
40
|
-
:id_1 => [
|
41
|
-
:name => [
|
42
|
-
:id_2 => [
|
44
|
+
:id_1 => [nil,2,3,1,nil,4],
|
45
|
+
:name => ["Darth Vader", "Monkey", "Ninja", "Pirate", "Rutabaga", "Spaghetti"],
|
46
|
+
:id_2 => [3,nil,4,2,1,nil]
|
43
47
|
}, order: [:id_1, :name, :id_2])
|
44
48
|
expect(@left.join(@right, how: :outer, on: [:name])).to eq(answer)
|
45
49
|
end
|
46
50
|
|
47
51
|
it "performs a left outer join", focus: true do
|
48
52
|
answer = Daru::DataFrame.new({
|
49
|
-
:id_1 => [
|
50
|
-
:name => [
|
51
|
-
:id_2 => [
|
53
|
+
:id_1 => [2,3,1,4],
|
54
|
+
:name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
|
55
|
+
:id_2 => [nil,4,2,nil]
|
52
56
|
}, order: [:id_1, :name, :id_2])
|
53
57
|
expect(@left.join(@right, how: :left, on: [:name])).to eq(answer)
|
54
58
|
end
|
55
59
|
|
60
|
+
it "performs a left join with an empty dataframe" do
|
61
|
+
|
62
|
+
answer = Daru::DataFrame.new({
|
63
|
+
:id_1 => [2,3,1,4],
|
64
|
+
:name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
|
65
|
+
:id_2 => [nil,nil,nil,nil]
|
66
|
+
}, order: [:id_1, :name, :id_2])
|
67
|
+
|
68
|
+
expect(@left.join(@empty, how: :left, on: [:name])).to eq(answer)
|
69
|
+
end
|
70
|
+
|
56
71
|
it "performs a right outer join" do
|
57
72
|
answer = Daru::DataFrame.new({
|
58
|
-
:id_1 => [nil,1,nil
|
59
|
-
:name => [
|
60
|
-
:id_2 => [
|
73
|
+
:id_1 => [nil,3,1,nil],
|
74
|
+
:name => ["Darth Vader", "Ninja", "Pirate", "Rutabaga"],
|
75
|
+
:id_2 => [3,4,2,1]
|
61
76
|
}, order: [:id_1, :name, :id_2])
|
62
77
|
expect(@left.join(@right, how: :right, on: [:name])).to eq(answer)
|
63
78
|
end
|
79
|
+
|
64
80
|
end
|
65
81
|
end
|
data/spec/dataframe_spec.rb
CHANGED
@@ -133,14 +133,17 @@ describe Daru::DataFrame do
|
|
133
133
|
end
|
134
134
|
|
135
135
|
it "initializes from a Hash of Vectors" do
|
136
|
-
|
137
|
-
|
138
|
-
|
136
|
+
va = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 }, index: [:one, :two, :three, :four, :five])
|
137
|
+
vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
|
138
|
+
|
139
|
+
df = Daru::DataFrame.new({ b: vb, a: va }, order: [:a, :b], index: [:one, :two, :three, :four, :five])
|
139
140
|
|
140
141
|
expect(df.index) .to eq(Daru::Index.new [:one, :two, :three, :four, :five])
|
141
142
|
expect(df.vectors).to eq(Daru::Index.new [:a, :b])
|
142
143
|
expect(df.a.class).to eq(Daru::Vector)
|
143
144
|
expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
|
145
|
+
expect(df.a.metadata).to eq({ cdc_type: 2 })
|
146
|
+
expect(df.b.metadata).to eq({})
|
144
147
|
end
|
145
148
|
|
146
149
|
it "initializes from an Array of Hashes" do
|
@@ -419,6 +422,13 @@ describe Daru::DataFrame do
|
|
419
422
|
expect(@df[:a, :b]).to eq(temp)
|
420
423
|
end
|
421
424
|
|
425
|
+
it "returns a DataFrame with metadata" do
|
426
|
+
@df[:a].metadata = "alpha"
|
427
|
+
@df[:b].metadata = "beta"
|
428
|
+
subset_df = @df[:a, :b]
|
429
|
+
expect([:a, :b].map { |v| subset_df[v].metadata }).to eq(["alpha", "beta"])
|
430
|
+
end
|
431
|
+
|
422
432
|
it "accesses vector with Integer index" do
|
423
433
|
expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
|
424
434
|
end
|
@@ -455,7 +465,7 @@ describe Daru::DataFrame do
|
|
455
465
|
end
|
456
466
|
|
457
467
|
it "returns a Vector if the last level of MultiIndex is tracked" do
|
458
|
-
expect(@df_mi[:a, :one]).to eq(
|
468
|
+
expect(@df_mi[:a, :one, :bar]).to eq(
|
459
469
|
Daru::Vector.new(@vector_arry1, index: @multi_index))
|
460
470
|
end
|
461
471
|
end
|
@@ -521,6 +531,31 @@ describe Daru::DataFrame do
|
|
521
531
|
}.to raise_error
|
522
532
|
end
|
523
533
|
|
534
|
+
it "assigns correct name given empty dataframe" do
|
535
|
+
df_empty = Daru::DataFrame.new({})
|
536
|
+
df_empty[:a] = 1..5
|
537
|
+
df_empty[:b] = 1..5
|
538
|
+
|
539
|
+
expect(df_empty[:a].name).to equal(:a)
|
540
|
+
expect(df_empty[:b].name).to equal(:b)
|
541
|
+
end
|
542
|
+
|
543
|
+
it "copies metadata when the target is a vector" do
|
544
|
+
vec = Daru::Vector.new(1.upto(@df.size), index: @df.index, metadata: { cdc_type: 2 })
|
545
|
+
@df[:woo] = vec.dup
|
546
|
+
expect(@df[:woo].metadata).to eq vec.metadata
|
547
|
+
end
|
548
|
+
|
549
|
+
it "doesn't delete metadata when the source is a dataframe with empty vectors" do
|
550
|
+
empty_df = Daru::DataFrame.new({
|
551
|
+
a: Daru::Vector.new([], metadata: 'alpha'),
|
552
|
+
b: Daru::Vector.new([], metadata: 'beta'),
|
553
|
+
})
|
554
|
+
|
555
|
+
empty_df[:c] = Daru::Vector.new(1.upto(3))
|
556
|
+
expect(empty_df[:a].metadata).to eq 'alpha'
|
557
|
+
end
|
558
|
+
|
524
559
|
it "appends multiple vectors at a time" do
|
525
560
|
# TODO
|
526
561
|
end
|
@@ -561,6 +596,13 @@ describe Daru::DataFrame do
|
|
561
596
|
|
562
597
|
expect(@df_mi).to eq(answer)
|
563
598
|
end
|
599
|
+
|
600
|
+
it "assigns correct name given empty dataframe" do
|
601
|
+
df_empty = Daru::DataFrame.new([], index: @multi_index, order: @order_mi)
|
602
|
+
df_empty[:c, :one, :bar] = 1..12
|
603
|
+
|
604
|
+
expect(df_empty[:c, :one, :bar].name).to eq "conebar"
|
605
|
+
end
|
564
606
|
end
|
565
607
|
end
|
566
608
|
|
@@ -848,6 +890,16 @@ describe Daru::DataFrame do
|
|
848
890
|
expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
|
849
891
|
expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
|
850
892
|
end
|
893
|
+
|
894
|
+
it "original dataframe remains unaffected when operations are applied
|
895
|
+
on cloned data frame" do
|
896
|
+
original = @data_frame.dup
|
897
|
+
cloned = @data_frame.clone
|
898
|
+
cloned.delete_vector :a
|
899
|
+
|
900
|
+
expect(@data_frame).to eq(original)
|
901
|
+
end
|
902
|
+
|
851
903
|
end
|
852
904
|
|
853
905
|
context "#clone_structure" do
|
@@ -923,6 +975,17 @@ describe Daru::DataFrame do
|
|
923
975
|
@ans_rows = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
|
924
976
|
c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
|
925
977
|
index: [:one, :two, :three, :four, :five])
|
978
|
+
|
979
|
+
@data_frame_date_time = @data_frame.dup
|
980
|
+
@data_frame_date_time.index = Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
|
981
|
+
|
982
|
+
@ans_vector_date_time = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
|
983
|
+
c: [21,32,43,54,65]}, order: [:a, :b, :c],
|
984
|
+
index: Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
|
985
|
+
|
986
|
+
@ans_rows_date_time = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
|
987
|
+
c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
|
988
|
+
index: Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
|
926
989
|
end
|
927
990
|
|
928
991
|
it "maps over the vectors of a DataFrame and returns a DataFrame" do
|
@@ -941,6 +1004,24 @@ describe Daru::DataFrame do
|
|
941
1004
|
|
942
1005
|
expect(ret).to eq(@ans_rows)
|
943
1006
|
end
|
1007
|
+
|
1008
|
+
it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
|
1009
|
+
ret = @data_frame_date_time.recode do |vector|
|
1010
|
+
vector.map! { |e| e += 10}
|
1011
|
+
end
|
1012
|
+
|
1013
|
+
expect(ret).to eq(@ans_vector_date_time)
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
|
1017
|
+
ret = @data_frame_date_time.recode(:row) do |row|
|
1018
|
+
expect(row.class).to eq(Daru::Vector)
|
1019
|
+
row.map! { |e| e*e }
|
1020
|
+
end
|
1021
|
+
|
1022
|
+
expect(ret).to eq(@ans_rows_date_time)
|
1023
|
+
end
|
1024
|
+
|
944
1025
|
end
|
945
1026
|
|
946
1027
|
context "#collect" do
|
@@ -1053,6 +1134,18 @@ describe Daru::DataFrame do
|
|
1053
1134
|
end
|
1054
1135
|
end
|
1055
1136
|
|
1137
|
+
context "#delete_vectors" do
|
1138
|
+
context Daru::Index do
|
1139
|
+
it "deletes the specified vectors" do
|
1140
|
+
@data_frame.delete_vectors :a, :b
|
1141
|
+
|
1142
|
+
expect(@data_frame).to eq(Daru::DataFrame.new({
|
1143
|
+
c: [11,22,33,44,55]}, order: [:c],
|
1144
|
+
index: [:one, :two, :three, :four, :five]))
|
1145
|
+
end
|
1146
|
+
end
|
1147
|
+
end
|
1148
|
+
|
1056
1149
|
context "#delete_row" do
|
1057
1150
|
it "deletes the specified row" do
|
1058
1151
|
@data_frame.delete_row :three
|
@@ -1158,9 +1251,9 @@ describe Daru::DataFrame do
|
|
1158
1251
|
end
|
1159
1252
|
end
|
1160
1253
|
|
1161
|
-
context "#
|
1254
|
+
context "#to_h" do
|
1162
1255
|
it "converts to a hash" do
|
1163
|
-
expect(@data_frame.
|
1256
|
+
expect(@data_frame.to_h).to eq(
|
1164
1257
|
{
|
1165
1258
|
a: Daru::Vector.new([1,2,3,4,5],
|
1166
1259
|
index: [:one, :two, :three, :four, :five]),
|
@@ -1187,15 +1280,16 @@ describe Daru::DataFrame do
|
|
1187
1280
|
context Daru::Index do
|
1188
1281
|
before :each do
|
1189
1282
|
@df = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
|
1283
|
+
@df[:a].metadata = { cdc_type: 2 }
|
1190
1284
|
end
|
1191
1285
|
|
1192
1286
|
it "sorts according to given vector order (bang)" do
|
1193
|
-
a_sorter = lambda { |a
|
1287
|
+
a_sorter = lambda { |a| a }
|
1194
1288
|
ans = @df.sort([:a], by: { a: a_sorter })
|
1195
1289
|
|
1196
1290
|
expect(ans).to eq(
|
1197
|
-
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1
|
1198
|
-
index: [2,1,4,5,
|
1291
|
+
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
|
1292
|
+
index: [2,1,0,4,5,3])
|
1199
1293
|
)
|
1200
1294
|
expect(ans).to_not eq(@df)
|
1201
1295
|
end
|
@@ -1208,6 +1302,12 @@ describe Daru::DataFrame do
|
|
1208
1302
|
)
|
1209
1303
|
expect(ans).to_not eq(@df)
|
1210
1304
|
end
|
1305
|
+
|
1306
|
+
it "retains the vector metadata from the original dataframe" do
|
1307
|
+
ans = @df.sort([:a])
|
1308
|
+
expect(ans[:a].metadata).to eq({ cdc_type: 2 })
|
1309
|
+
end
|
1310
|
+
|
1211
1311
|
end
|
1212
1312
|
|
1213
1313
|
context Daru::MultiIndex do
|
@@ -1223,11 +1323,11 @@ describe Daru::DataFrame do
|
|
1223
1323
|
end
|
1224
1324
|
|
1225
1325
|
it "sorts according to given vector order (bang)" do
|
1226
|
-
a_sorter = lambda { |a
|
1326
|
+
a_sorter = lambda { |a| a }
|
1227
1327
|
|
1228
1328
|
expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
|
1229
|
-
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1
|
1230
|
-
c: ['aaa','aa','
|
1329
|
+
Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
|
1330
|
+
c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
|
1231
1331
|
)
|
1232
1332
|
end
|
1233
1333
|
|
@@ -1260,6 +1360,68 @@ describe Daru::DataFrame do
|
|
1260
1360
|
index: [7,3,4,6,5,0,1,2])
|
1261
1361
|
)
|
1262
1362
|
end
|
1363
|
+
|
1364
|
+
it "places nils at the beginning when sorting ascedingly" do
|
1365
|
+
d = Daru::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
|
1366
|
+
|
1367
|
+
expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
|
1368
|
+
Daru::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
|
1369
|
+
index: [7,3,0,1,2,6,5,4])
|
1370
|
+
)
|
1371
|
+
end
|
1372
|
+
|
1373
|
+
it "places nils at the beginning when sorting decendingly" do
|
1374
|
+
d = Daru::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
|
1375
|
+
|
1376
|
+
expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
|
1377
|
+
Daru::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
|
1378
|
+
index: [7,3,4,6,5,0,1,2])
|
1379
|
+
)
|
1380
|
+
end
|
1381
|
+
|
1382
|
+
it "sorts vectors of non-numeric types with nils in ascending order" do
|
1383
|
+
non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
|
1384
|
+
c: ['aaa','aaa',nil,'baaa','xxx',nil]})
|
1385
|
+
|
1386
|
+
expect(non_numeric.sort!([:c], ascending: [true])).to eq(
|
1387
|
+
Daru::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
|
1388
|
+
c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
|
1389
|
+
index: [2, 5, 0, 1, 3, 4])
|
1390
|
+
)
|
1391
|
+
end
|
1392
|
+
|
1393
|
+
it "sorts vectors of non-numeric types with nils in descending order" do
|
1394
|
+
non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
|
1395
|
+
c: ['aaa','aaa',nil,'baaa','xxx',nil]})
|
1396
|
+
|
1397
|
+
expect(non_numeric.sort!([:c], ascending: [false])).to eq(
|
1398
|
+
Daru::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
|
1399
|
+
c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
|
1400
|
+
index: [2, 5, 4, 3, 0, 1])
|
1401
|
+
)
|
1402
|
+
end
|
1403
|
+
|
1404
|
+
it "sorts vectors with block provided and handle nils automatically" do
|
1405
|
+
non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
|
1406
|
+
c: ['aaa','aaa',nil,'baaa','xxx',nil]})
|
1407
|
+
|
1408
|
+
expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
|
1409
|
+
Daru::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
|
1410
|
+
c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
|
1411
|
+
index: [0, 3, 1, 2, 4, 5])
|
1412
|
+
)
|
1413
|
+
end
|
1414
|
+
|
1415
|
+
it "sorts vectors with block provided and nils handled manually" do
|
1416
|
+
non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
|
1417
|
+
c: ['aaa','aaa',nil,'baaa','xxx',nil]})
|
1418
|
+
|
1419
|
+
expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
|
1420
|
+
Daru::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
|
1421
|
+
c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
|
1422
|
+
index: [1, 2, 4, 5, 0, 3])
|
1423
|
+
)
|
1424
|
+
end
|
1263
1425
|
end
|
1264
1426
|
|
1265
1427
|
context Daru::MultiIndex do
|
@@ -1316,6 +1478,37 @@ describe Daru::DataFrame do
|
|
1316
1478
|
end
|
1317
1479
|
end
|
1318
1480
|
|
1481
|
+
context "#rename_vectors" do
|
1482
|
+
before do
|
1483
|
+
@df = Daru::DataFrame.new({
|
1484
|
+
a: [1,2,3,4,5],
|
1485
|
+
b: [11,22,33,44,55],
|
1486
|
+
c: %w(a b c d e)
|
1487
|
+
})
|
1488
|
+
end
|
1489
|
+
|
1490
|
+
it "renames vectors using a hash map" do
|
1491
|
+
@df.rename_vectors :a => :alpha, :c => :gamma
|
1492
|
+
expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
|
1493
|
+
end
|
1494
|
+
|
1495
|
+
it "overwrites vectors if the new name already exists" do
|
1496
|
+
saved_vector = @df[:a].dup
|
1497
|
+
|
1498
|
+
@df.rename_vectors :a => :b
|
1499
|
+
expect(@df.vectors.to_a).to eq([:b, :c])
|
1500
|
+
expect(@df[:b]).to eq saved_vector
|
1501
|
+
end
|
1502
|
+
|
1503
|
+
it "makes no changes if the old and new names are the same" do
|
1504
|
+
saved_vector = @df[:a].dup
|
1505
|
+
|
1506
|
+
@df.rename_vectors :a => :a
|
1507
|
+
expect(@df.vectors.to_a).to eq([:a, :b, :c])
|
1508
|
+
expect(@df[:a]).to eq saved_vector
|
1509
|
+
end
|
1510
|
+
end
|
1511
|
+
|
1319
1512
|
context "#reindex" do
|
1320
1513
|
it "re indexes and aligns accordingly" do
|
1321
1514
|
df = Daru::DataFrame.new({
|
@@ -1603,6 +1796,56 @@ describe Daru::DataFrame do
|
|
1603
1796
|
@df.pivot_table
|
1604
1797
|
}.to raise_error
|
1605
1798
|
end
|
1799
|
+
|
1800
|
+
it "aggregates when nils are present in value vector" do
|
1801
|
+
df = Daru::DataFrame.new({
|
1802
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
|
1803
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
1804
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
1805
|
+
d: [1,2,2,3,3,4,5,6,7],
|
1806
|
+
e: [2,nil,4,6,6,8,10,12,nil]
|
1807
|
+
})
|
1808
|
+
|
1809
|
+
expect(df.pivot_table index: [:a]).to eq(
|
1810
|
+
Daru::DataFrame.new({
|
1811
|
+
d: [5.0, 2.2, 7],
|
1812
|
+
e: [10.0, 4.5, nil]
|
1813
|
+
}, index: Daru::Index.new(['bar', 'foo', 'ice'])))
|
1814
|
+
end
|
1815
|
+
|
1816
|
+
it "works when nils are present in value vector" do
|
1817
|
+
df = Daru::DataFrame.new({
|
1818
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
|
1819
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
1820
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
1821
|
+
d: [1,2,2,3,3,4,5,6,7],
|
1822
|
+
e: [2,nil,4,6,6,8,10,12,nil]
|
1823
|
+
})
|
1824
|
+
|
1825
|
+
agg_vectors = Daru::MultiIndex.from_tuples(
|
1826
|
+
[
|
1827
|
+
[:e, 'one'],
|
1828
|
+
[:e, 'two']
|
1829
|
+
]
|
1830
|
+
)
|
1831
|
+
|
1832
|
+
agg_index = Daru::MultiIndex.from_tuples(
|
1833
|
+
[
|
1834
|
+
['bar'],
|
1835
|
+
['foo'],
|
1836
|
+
['ice']
|
1837
|
+
]
|
1838
|
+
)
|
1839
|
+
|
1840
|
+
expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
|
1841
|
+
Daru::DataFrame.new(
|
1842
|
+
[
|
1843
|
+
[9, 3, nil],
|
1844
|
+
[12, 6, nil]
|
1845
|
+
], order: agg_vectors, index: agg_index
|
1846
|
+
)
|
1847
|
+
)
|
1848
|
+
end
|
1606
1849
|
end
|
1607
1850
|
|
1608
1851
|
context "#shape" do
|
@@ -2092,11 +2335,13 @@ describe Daru::DataFrame do
|
|
2092
2335
|
context "#concat" do
|
2093
2336
|
before do
|
2094
2337
|
@df1 = Daru::DataFrame.new({
|
2095
|
-
a: [1, 2, 3]
|
2338
|
+
a: [1, 2, 3],
|
2339
|
+
b: [1, 2, 3]
|
2096
2340
|
})
|
2097
2341
|
|
2098
2342
|
@df2 = Daru::DataFrame.new({
|
2099
|
-
a: [4, 5, 6]
|
2343
|
+
a: [4, 5, 6],
|
2344
|
+
c: [4, 5, 6]
|
2100
2345
|
})
|
2101
2346
|
end
|
2102
2347
|
|
@@ -2117,5 +2362,14 @@ describe Daru::DataFrame do
|
|
2117
2362
|
expect(df_concat[:a].to_a).to eq df1_a + df2_a
|
2118
2363
|
end
|
2119
2364
|
|
2365
|
+
it 'fills in missing vectors with nils' do
|
2366
|
+
df1_b = @df1[:b].to_a.dup
|
2367
|
+
df2_c = @df2[:c].to_a.dup
|
2368
|
+
|
2369
|
+
df_concat = @df1.concat @df2
|
2370
|
+
expect(df_concat[:b].to_a).to eq df1_b + [nil] * @df2.size
|
2371
|
+
expect(df_concat[:c].to_a).to eq [nil] * @df1.size + df2_c
|
2372
|
+
end
|
2373
|
+
|
2120
2374
|
end
|
2121
2375
|
end if mri?
|