daru 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +99 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +3 -1
  6. data/CONTRIBUTING.md +5 -1
  7. data/History.md +43 -0
  8. data/README.md +3 -4
  9. data/benchmarks/duplicating.rb +45 -0
  10. data/benchmarks/group_by.rb +7 -7
  11. data/benchmarks/joining.rb +52 -0
  12. data/benchmarks/sorting.rb +9 -2
  13. data/benchmarks/statistics.rb +39 -0
  14. data/daru.gemspec +4 -4
  15. data/lib/daru.rb +9 -9
  16. data/lib/daru/accessors/array_wrapper.rb +15 -11
  17. data/lib/daru/accessors/dataframe_by_row.rb +1 -1
  18. data/lib/daru/accessors/gsl_wrapper.rb +30 -19
  19. data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
  20. data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
  21. data/lib/daru/core/group_by.rb +69 -16
  22. data/lib/daru/core/merge.rb +135 -151
  23. data/lib/daru/core/query.rb +9 -30
  24. data/lib/daru/dataframe.rb +476 -439
  25. data/lib/daru/date_time/index.rb +150 -137
  26. data/lib/daru/date_time/offsets.rb +45 -41
  27. data/lib/daru/extensions/rserve.rb +4 -4
  28. data/lib/daru/index.rb +88 -64
  29. data/lib/daru/io/io.rb +33 -34
  30. data/lib/daru/io/sql_data_source.rb +11 -11
  31. data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
  32. data/lib/daru/maths/arithmetic/vector.rb +9 -14
  33. data/lib/daru/maths/statistics/dataframe.rb +89 -61
  34. data/lib/daru/maths/statistics/vector.rb +226 -97
  35. data/lib/daru/monkeys.rb +23 -30
  36. data/lib/daru/plotting/dataframe.rb +27 -28
  37. data/lib/daru/plotting/vector.rb +12 -13
  38. data/lib/daru/vector.rb +221 -330
  39. data/lib/daru/version.rb +2 -2
  40. data/spec/core/group_by_spec.rb +16 -0
  41. data/spec/core/merge_spec.rb +30 -14
  42. data/spec/dataframe_spec.rb +268 -14
  43. data/spec/index_spec.rb +23 -5
  44. data/spec/io/io_spec.rb +37 -16
  45. data/spec/math/statistics/dataframe_spec.rb +40 -8
  46. data/spec/math/statistics/vector_spec.rb +135 -10
  47. data/spec/monkeys_spec.rb +3 -3
  48. data/spec/vector_spec.rb +157 -25
  49. metadata +41 -21
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = "0.1.2"
3
- end
2
+ VERSION = '0.1.3'.freeze
3
+ end
@@ -41,6 +41,10 @@ describe Daru::Core::GroupBy do
41
41
  it 'groups by nil values' do
42
42
  expect(@df.group_by(:w_nils).groups[[nil]]).to eq([1,3,4])
43
43
  end
44
+
45
+ it "uses a multi-index when nils are part of the grouping keys" do
46
+ expect(@df.group_by(:a, :w_nils).send(:multi_indexed_grouping?)).to be true
47
+ end
44
48
  end
45
49
 
46
50
  context "#initialize" do
@@ -85,6 +89,11 @@ describe Daru::Core::GroupBy do
85
89
  it "returns a vector containing the size of each group" do
86
90
  expect(@dl_group.size).to eq(Daru::Vector.new([1,1,1,2,1,2], index: @dl_multi_index))
87
91
  end
92
+
93
+ it "returns an empty vector if given an empty dataframe" do
94
+ df = Daru::DataFrame.new({ a: [], b: [] })
95
+ expect(df.group_by(:a).size).to eq(Daru::Vector.new([]))
96
+ end
88
97
  end
89
98
 
90
99
  context "#get_group" do
@@ -336,4 +345,11 @@ describe Daru::Core::GroupBy do
336
345
  context "#[]" do
337
346
  pending
338
347
  end
348
+
349
+ context "#reduce" do
350
+ it "returns a vector that concatenates strings in a group" do
351
+ string_concat = lambda { |result, row| result += row[:b] }
352
+ expect(@sl_group.reduce('', &string_concat)).to eq(Daru::Vector.new(['onethreetwo', 'onetwotwoonethree'], index: @sl_index))
353
+ end
354
+ end
339
355
  end
@@ -15,51 +15,67 @@ describe Daru::DataFrame do
15
15
  :id => [1,1,1,1],
16
16
  :name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
17
17
  })
18
+ @empty = Daru::DataFrame.new({
19
+ :id => [],
20
+ :name => []
21
+ })
18
22
  end
19
23
 
20
24
  it "performs an inner join of two dataframes" do
21
25
  answer = Daru::DataFrame.new({
22
- :id_1 => [1,3],
23
- :name => ['Pirate', 'Ninja'],
24
- :id_2 => [2,4]
26
+ :id_1 => [3,1],
27
+ :name => ['Ninja', 'Pirate'],
28
+ :id_2 => [4,2]
25
29
  }, order: [:id_1, :name, :id_2])
26
30
  expect(@left.join(@right, how: :inner, on: [:name])).to eq(answer)
27
31
  end
28
32
 
29
33
  it "performs an inner join of two dataframes that has one to many mapping" do
30
34
  answer = Daru::DataFrame.new({
31
- :id => [1,1,1,1],
32
35
  :name_1 => ['Pirate', 'Pirate', 'Pirate', 'Pirate'],
36
+ :id => [1,1,1,1],
33
37
  :name_2 => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
34
- }, order: [:id, :name_1, :name_2])
38
+ }, order: [:name_1, :id, :name_2])
35
39
  expect(@left.join(@right_many, how: :inner, on: [:id])).to eq(answer)
36
40
  end
37
41
 
38
42
  it "performs a full outer join" do
39
43
  answer = Daru::DataFrame.new({
40
- :id_1 => [1,2,3,4,nil,nil],
41
- :name => ['Pirate', 'Monkey', 'Ninja', 'Spaghetti','Rutabaga', 'Darth Vader'],
42
- :id_2 => [2,nil,4,nil,1,3]
44
+ :id_1 => [nil,2,3,1,nil,4],
45
+ :name => ["Darth Vader", "Monkey", "Ninja", "Pirate", "Rutabaga", "Spaghetti"],
46
+ :id_2 => [3,nil,4,2,1,nil]
43
47
  }, order: [:id_1, :name, :id_2])
44
48
  expect(@left.join(@right, how: :outer, on: [:name])).to eq(answer)
45
49
  end
46
50
 
47
51
  it "performs a left outer join", focus: true do
48
52
  answer = Daru::DataFrame.new({
49
- :id_1 => [1,2,3,4],
50
- :name => ['Pirate', 'Monkey', 'Ninja', 'Spaghetti'],
51
- :id_2 => [2,nil,4,nil]
53
+ :id_1 => [2,3,1,4],
54
+ :name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
55
+ :id_2 => [nil,4,2,nil]
52
56
  }, order: [:id_1, :name, :id_2])
53
57
  expect(@left.join(@right, how: :left, on: [:name])).to eq(answer)
54
58
  end
55
59
 
60
+ it "performs a left join with an empty dataframe" do
61
+
62
+ answer = Daru::DataFrame.new({
63
+ :id_1 => [2,3,1,4],
64
+ :name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
65
+ :id_2 => [nil,nil,nil,nil]
66
+ }, order: [:id_1, :name, :id_2])
67
+
68
+ expect(@left.join(@empty, how: :left, on: [:name])).to eq(answer)
69
+ end
70
+
56
71
  it "performs a right outer join" do
57
72
  answer = Daru::DataFrame.new({
58
- :id_1 => [nil,1,nil,3],
59
- :name => ['Rutabaga','Pirate', 'Darth Vader', 'Ninja'],
60
- :id_2 => [1,2,3,4]
73
+ :id_1 => [nil,3,1,nil],
74
+ :name => ["Darth Vader", "Ninja", "Pirate", "Rutabaga"],
75
+ :id_2 => [3,4,2,1]
61
76
  }, order: [:id_1, :name, :id_2])
62
77
  expect(@left.join(@right, how: :right, on: [:name])).to eq(answer)
63
78
  end
79
+
64
80
  end
65
81
  end
@@ -133,14 +133,17 @@ describe Daru::DataFrame do
133
133
  end
134
134
 
135
135
  it "initializes from a Hash of Vectors" do
136
- df = Daru::DataFrame.new({b: [11,12,13,14,15].dv(:b, [:one, :two, :three, :four, :five]),
137
- a: [1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five])}, order: [:a, :b],
138
- index: [:one, :two, :three, :four, :five])
136
+ va = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 }, index: [:one, :two, :three, :four, :five])
137
+ vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
138
+
139
+ df = Daru::DataFrame.new({ b: vb, a: va }, order: [:a, :b], index: [:one, :two, :three, :four, :five])
139
140
 
140
141
  expect(df.index) .to eq(Daru::Index.new [:one, :two, :three, :four, :five])
141
142
  expect(df.vectors).to eq(Daru::Index.new [:a, :b])
142
143
  expect(df.a.class).to eq(Daru::Vector)
143
144
  expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
145
+ expect(df.a.metadata).to eq({ cdc_type: 2 })
146
+ expect(df.b.metadata).to eq({})
144
147
  end
145
148
 
146
149
  it "initializes from an Array of Hashes" do
@@ -419,6 +422,13 @@ describe Daru::DataFrame do
419
422
  expect(@df[:a, :b]).to eq(temp)
420
423
  end
421
424
 
425
+ it "returns a DataFrame with metadata" do
426
+ @df[:a].metadata = "alpha"
427
+ @df[:b].metadata = "beta"
428
+ subset_df = @df[:a, :b]
429
+ expect([:a, :b].map { |v| subset_df[v].metadata }).to eq(["alpha", "beta"])
430
+ end
431
+
422
432
  it "accesses vector with Integer index" do
423
433
  expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
424
434
  end
@@ -455,7 +465,7 @@ describe Daru::DataFrame do
455
465
  end
456
466
 
457
467
  it "returns a Vector if the last level of MultiIndex is tracked" do
458
- expect(@df_mi[:a, :one]).to eq(
468
+ expect(@df_mi[:a, :one, :bar]).to eq(
459
469
  Daru::Vector.new(@vector_arry1, index: @multi_index))
460
470
  end
461
471
  end
@@ -521,6 +531,31 @@ describe Daru::DataFrame do
521
531
  }.to raise_error
522
532
  end
523
533
 
534
+ it "assigns correct name given empty dataframe" do
535
+ df_empty = Daru::DataFrame.new({})
536
+ df_empty[:a] = 1..5
537
+ df_empty[:b] = 1..5
538
+
539
+ expect(df_empty[:a].name).to equal(:a)
540
+ expect(df_empty[:b].name).to equal(:b)
541
+ end
542
+
543
+ it "copies metadata when the target is a vector" do
544
+ vec = Daru::Vector.new(1.upto(@df.size), index: @df.index, metadata: { cdc_type: 2 })
545
+ @df[:woo] = vec.dup
546
+ expect(@df[:woo].metadata).to eq vec.metadata
547
+ end
548
+
549
+ it "doesn't delete metadata when the source is a dataframe with empty vectors" do
550
+ empty_df = Daru::DataFrame.new({
551
+ a: Daru::Vector.new([], metadata: 'alpha'),
552
+ b: Daru::Vector.new([], metadata: 'beta'),
553
+ })
554
+
555
+ empty_df[:c] = Daru::Vector.new(1.upto(3))
556
+ expect(empty_df[:a].metadata).to eq 'alpha'
557
+ end
558
+
524
559
  it "appends multiple vectors at a time" do
525
560
  # TODO
526
561
  end
@@ -561,6 +596,13 @@ describe Daru::DataFrame do
561
596
 
562
597
  expect(@df_mi).to eq(answer)
563
598
  end
599
+
600
+ it "assigns correct name given empty dataframe" do
601
+ df_empty = Daru::DataFrame.new([], index: @multi_index, order: @order_mi)
602
+ df_empty[:c, :one, :bar] = 1..12
603
+
604
+ expect(df_empty[:c, :one, :bar].name).to eq "conebar"
605
+ end
564
606
  end
565
607
  end
566
608
 
@@ -848,6 +890,16 @@ describe Daru::DataFrame do
848
890
  expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
849
891
  expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
850
892
  end
893
+
894
+ it "original dataframe remains unaffected when operations are applied
895
+ on cloned data frame" do
896
+ original = @data_frame.dup
897
+ cloned = @data_frame.clone
898
+ cloned.delete_vector :a
899
+
900
+ expect(@data_frame).to eq(original)
901
+ end
902
+
851
903
  end
852
904
 
853
905
  context "#clone_structure" do
@@ -923,6 +975,17 @@ describe Daru::DataFrame do
923
975
  @ans_rows = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
924
976
  c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
925
977
  index: [:one, :two, :three, :four, :five])
978
+
979
+ @data_frame_date_time = @data_frame.dup
980
+ @data_frame_date_time.index = Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
981
+
982
+ @ans_vector_date_time = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
983
+ c: [21,32,43,54,65]}, order: [:a, :b, :c],
984
+ index: Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
985
+
986
+ @ans_rows_date_time = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
987
+ c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
988
+ index: Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
926
989
  end
927
990
 
928
991
  it "maps over the vectors of a DataFrame and returns a DataFrame" do
@@ -941,6 +1004,24 @@ describe Daru::DataFrame do
941
1004
 
942
1005
  expect(ret).to eq(@ans_rows)
943
1006
  end
1007
+
1008
+ it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
1009
+ ret = @data_frame_date_time.recode do |vector|
1010
+ vector.map! { |e| e += 10}
1011
+ end
1012
+
1013
+ expect(ret).to eq(@ans_vector_date_time)
1014
+ end
1015
+
1016
+ it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
1017
+ ret = @data_frame_date_time.recode(:row) do |row|
1018
+ expect(row.class).to eq(Daru::Vector)
1019
+ row.map! { |e| e*e }
1020
+ end
1021
+
1022
+ expect(ret).to eq(@ans_rows_date_time)
1023
+ end
1024
+
944
1025
  end
945
1026
 
946
1027
  context "#collect" do
@@ -1053,6 +1134,18 @@ describe Daru::DataFrame do
1053
1134
  end
1054
1135
  end
1055
1136
 
1137
+ context "#delete_vectors" do
1138
+ context Daru::Index do
1139
+ it "deletes the specified vectors" do
1140
+ @data_frame.delete_vectors :a, :b
1141
+
1142
+ expect(@data_frame).to eq(Daru::DataFrame.new({
1143
+ c: [11,22,33,44,55]}, order: [:c],
1144
+ index: [:one, :two, :three, :four, :five]))
1145
+ end
1146
+ end
1147
+ end
1148
+
1056
1149
  context "#delete_row" do
1057
1150
  it "deletes the specified row" do
1058
1151
  @data_frame.delete_row :three
@@ -1158,9 +1251,9 @@ describe Daru::DataFrame do
1158
1251
  end
1159
1252
  end
1160
1253
 
1161
- context "#to_hash" do
1254
+ context "#to_h" do
1162
1255
  it "converts to a hash" do
1163
- expect(@data_frame.to_hash).to eq(
1256
+ expect(@data_frame.to_h).to eq(
1164
1257
  {
1165
1258
  a: Daru::Vector.new([1,2,3,4,5],
1166
1259
  index: [:one, :two, :three, :four, :five]),
@@ -1187,15 +1280,16 @@ describe Daru::DataFrame do
1187
1280
  context Daru::Index do
1188
1281
  before :each do
1189
1282
  @df = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
1283
+ @df[:a].metadata = { cdc_type: 2 }
1190
1284
  end
1191
1285
 
1192
1286
  it "sorts according to given vector order (bang)" do
1193
- a_sorter = lambda { |a,b| a <=> b }
1287
+ a_sorter = lambda { |a| a }
1194
1288
  ans = @df.sort([:a], by: { a: a_sorter })
1195
1289
 
1196
1290
  expect(ans).to eq(
1197
- Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1,-2,3], c: ['aaa','aa','aaaaa','aaaaaa','a','aaaa']},
1198
- index: [2,1,4,5,0,3])
1291
+ Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
1292
+ index: [2,1,0,4,5,3])
1199
1293
  )
1200
1294
  expect(ans).to_not eq(@df)
1201
1295
  end
@@ -1208,6 +1302,12 @@ describe Daru::DataFrame do
1208
1302
  )
1209
1303
  expect(ans).to_not eq(@df)
1210
1304
  end
1305
+
1306
+ it "retains the vector metadata from the original dataframe" do
1307
+ ans = @df.sort([:a])
1308
+ expect(ans[:a].metadata).to eq({ cdc_type: 2 })
1309
+ end
1310
+
1211
1311
  end
1212
1312
 
1213
1313
  context Daru::MultiIndex do
@@ -1223,11 +1323,11 @@ describe Daru::DataFrame do
1223
1323
  end
1224
1324
 
1225
1325
  it "sorts according to given vector order (bang)" do
1226
- a_sorter = lambda { |a,b| a <=> b }
1326
+ a_sorter = lambda { |a| a }
1227
1327
 
1228
1328
  expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
1229
- Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1,-2,3],
1230
- c: ['aaa','aa','aaaaa','aaaaaa','a','aaaa']}, index: [2,1,4,5,0,3])
1329
+ Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
1330
+ c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
1231
1331
  )
1232
1332
  end
1233
1333
 
@@ -1260,6 +1360,68 @@ describe Daru::DataFrame do
1260
1360
  index: [7,3,4,6,5,0,1,2])
1261
1361
  )
1262
1362
  end
1363
+
1364
+ it "places nils at the beginning when sorting ascedingly" do
1365
+ d = Daru::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
1366
+
1367
+ expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
1368
+ Daru::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
1369
+ index: [7,3,0,1,2,6,5,4])
1370
+ )
1371
+ end
1372
+
1373
+ it "places nils at the beginning when sorting decendingly" do
1374
+ d = Daru::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
1375
+
1376
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
1377
+ Daru::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
1378
+ index: [7,3,4,6,5,0,1,2])
1379
+ )
1380
+ end
1381
+
1382
+ it "sorts vectors of non-numeric types with nils in ascending order" do
1383
+ non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
1384
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
1385
+
1386
+ expect(non_numeric.sort!([:c], ascending: [true])).to eq(
1387
+ Daru::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
1388
+ c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
1389
+ index: [2, 5, 0, 1, 3, 4])
1390
+ )
1391
+ end
1392
+
1393
+ it "sorts vectors of non-numeric types with nils in descending order" do
1394
+ non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
1395
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
1396
+
1397
+ expect(non_numeric.sort!([:c], ascending: [false])).to eq(
1398
+ Daru::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
1399
+ c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
1400
+ index: [2, 5, 4, 3, 0, 1])
1401
+ )
1402
+ end
1403
+
1404
+ it "sorts vectors with block provided and handle nils automatically" do
1405
+ non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
1406
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
1407
+
1408
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
1409
+ Daru::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
1410
+ c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
1411
+ index: [0, 3, 1, 2, 4, 5])
1412
+ )
1413
+ end
1414
+
1415
+ it "sorts vectors with block provided and nils handled manually" do
1416
+ non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
1417
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
1418
+
1419
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
1420
+ Daru::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
1421
+ c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
1422
+ index: [1, 2, 4, 5, 0, 3])
1423
+ )
1424
+ end
1263
1425
  end
1264
1426
 
1265
1427
  context Daru::MultiIndex do
@@ -1316,6 +1478,37 @@ describe Daru::DataFrame do
1316
1478
  end
1317
1479
  end
1318
1480
 
1481
+ context "#rename_vectors" do
1482
+ before do
1483
+ @df = Daru::DataFrame.new({
1484
+ a: [1,2,3,4,5],
1485
+ b: [11,22,33,44,55],
1486
+ c: %w(a b c d e)
1487
+ })
1488
+ end
1489
+
1490
+ it "renames vectors using a hash map" do
1491
+ @df.rename_vectors :a => :alpha, :c => :gamma
1492
+ expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
1493
+ end
1494
+
1495
+ it "overwrites vectors if the new name already exists" do
1496
+ saved_vector = @df[:a].dup
1497
+
1498
+ @df.rename_vectors :a => :b
1499
+ expect(@df.vectors.to_a).to eq([:b, :c])
1500
+ expect(@df[:b]).to eq saved_vector
1501
+ end
1502
+
1503
+ it "makes no changes if the old and new names are the same" do
1504
+ saved_vector = @df[:a].dup
1505
+
1506
+ @df.rename_vectors :a => :a
1507
+ expect(@df.vectors.to_a).to eq([:a, :b, :c])
1508
+ expect(@df[:a]).to eq saved_vector
1509
+ end
1510
+ end
1511
+
1319
1512
  context "#reindex" do
1320
1513
  it "re indexes and aligns accordingly" do
1321
1514
  df = Daru::DataFrame.new({
@@ -1603,6 +1796,56 @@ describe Daru::DataFrame do
1603
1796
  @df.pivot_table
1604
1797
  }.to raise_error
1605
1798
  end
1799
+
1800
+ it "aggregates when nils are present in value vector" do
1801
+ df = Daru::DataFrame.new({
1802
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
1803
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
1804
+ c: ['small','large','large','small','small','large','small','large','small'],
1805
+ d: [1,2,2,3,3,4,5,6,7],
1806
+ e: [2,nil,4,6,6,8,10,12,nil]
1807
+ })
1808
+
1809
+ expect(df.pivot_table index: [:a]).to eq(
1810
+ Daru::DataFrame.new({
1811
+ d: [5.0, 2.2, 7],
1812
+ e: [10.0, 4.5, nil]
1813
+ }, index: Daru::Index.new(['bar', 'foo', 'ice'])))
1814
+ end
1815
+
1816
+ it "works when nils are present in value vector" do
1817
+ df = Daru::DataFrame.new({
1818
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
1819
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
1820
+ c: ['small','large','large','small','small','large','small','large','small'],
1821
+ d: [1,2,2,3,3,4,5,6,7],
1822
+ e: [2,nil,4,6,6,8,10,12,nil]
1823
+ })
1824
+
1825
+ agg_vectors = Daru::MultiIndex.from_tuples(
1826
+ [
1827
+ [:e, 'one'],
1828
+ [:e, 'two']
1829
+ ]
1830
+ )
1831
+
1832
+ agg_index = Daru::MultiIndex.from_tuples(
1833
+ [
1834
+ ['bar'],
1835
+ ['foo'],
1836
+ ['ice']
1837
+ ]
1838
+ )
1839
+
1840
+ expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
1841
+ Daru::DataFrame.new(
1842
+ [
1843
+ [9, 3, nil],
1844
+ [12, 6, nil]
1845
+ ], order: agg_vectors, index: agg_index
1846
+ )
1847
+ )
1848
+ end
1606
1849
  end
1607
1850
 
1608
1851
  context "#shape" do
@@ -2092,11 +2335,13 @@ describe Daru::DataFrame do
2092
2335
  context "#concat" do
2093
2336
  before do
2094
2337
  @df1 = Daru::DataFrame.new({
2095
- a: [1, 2, 3]
2338
+ a: [1, 2, 3],
2339
+ b: [1, 2, 3]
2096
2340
  })
2097
2341
 
2098
2342
  @df2 = Daru::DataFrame.new({
2099
- a: [4, 5, 6]
2343
+ a: [4, 5, 6],
2344
+ c: [4, 5, 6]
2100
2345
  })
2101
2346
  end
2102
2347
 
@@ -2117,5 +2362,14 @@ describe Daru::DataFrame do
2117
2362
  expect(df_concat[:a].to_a).to eq df1_a + df2_a
2118
2363
  end
2119
2364
 
2365
+ it 'fills in missing vectors with nils' do
2366
+ df1_b = @df1[:b].to_a.dup
2367
+ df2_c = @df2[:c].to_a.dup
2368
+
2369
+ df_concat = @df1.concat @df2
2370
+ expect(df_concat[:b].to_a).to eq df1_b + [nil] * @df2.size
2371
+ expect(df_concat[:c].to_a).to eq [nil] * @df1.size + df2_c
2372
+ end
2373
+
2120
2374
  end
2121
2375
  end if mri?