daru 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +99 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +3 -1
  6. data/CONTRIBUTING.md +5 -1
  7. data/History.md +43 -0
  8. data/README.md +3 -4
  9. data/benchmarks/duplicating.rb +45 -0
  10. data/benchmarks/group_by.rb +7 -7
  11. data/benchmarks/joining.rb +52 -0
  12. data/benchmarks/sorting.rb +9 -2
  13. data/benchmarks/statistics.rb +39 -0
  14. data/daru.gemspec +4 -4
  15. data/lib/daru.rb +9 -9
  16. data/lib/daru/accessors/array_wrapper.rb +15 -11
  17. data/lib/daru/accessors/dataframe_by_row.rb +1 -1
  18. data/lib/daru/accessors/gsl_wrapper.rb +30 -19
  19. data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
  20. data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
  21. data/lib/daru/core/group_by.rb +69 -16
  22. data/lib/daru/core/merge.rb +135 -151
  23. data/lib/daru/core/query.rb +9 -30
  24. data/lib/daru/dataframe.rb +476 -439
  25. data/lib/daru/date_time/index.rb +150 -137
  26. data/lib/daru/date_time/offsets.rb +45 -41
  27. data/lib/daru/extensions/rserve.rb +4 -4
  28. data/lib/daru/index.rb +88 -64
  29. data/lib/daru/io/io.rb +33 -34
  30. data/lib/daru/io/sql_data_source.rb +11 -11
  31. data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
  32. data/lib/daru/maths/arithmetic/vector.rb +9 -14
  33. data/lib/daru/maths/statistics/dataframe.rb +89 -61
  34. data/lib/daru/maths/statistics/vector.rb +226 -97
  35. data/lib/daru/monkeys.rb +23 -30
  36. data/lib/daru/plotting/dataframe.rb +27 -28
  37. data/lib/daru/plotting/vector.rb +12 -13
  38. data/lib/daru/vector.rb +221 -330
  39. data/lib/daru/version.rb +2 -2
  40. data/spec/core/group_by_spec.rb +16 -0
  41. data/spec/core/merge_spec.rb +30 -14
  42. data/spec/dataframe_spec.rb +268 -14
  43. data/spec/index_spec.rb +23 -5
  44. data/spec/io/io_spec.rb +37 -16
  45. data/spec/math/statistics/dataframe_spec.rb +40 -8
  46. data/spec/math/statistics/vector_spec.rb +135 -10
  47. data/spec/monkeys_spec.rb +3 -3
  48. data/spec/vector_spec.rb +157 -25
  49. metadata +41 -21
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = "0.1.2"
3
- end
2
+ VERSION = '0.1.3'.freeze
3
+ end
@@ -41,6 +41,10 @@ describe Daru::Core::GroupBy do
41
41
  it 'groups by nil values' do
42
42
  expect(@df.group_by(:w_nils).groups[[nil]]).to eq([1,3,4])
43
43
  end
44
+
45
+ it "uses a multi-index when nils are part of the grouping keys" do
46
+ expect(@df.group_by(:a, :w_nils).send(:multi_indexed_grouping?)).to be true
47
+ end
44
48
  end
45
49
 
46
50
  context "#initialize" do
@@ -85,6 +89,11 @@ describe Daru::Core::GroupBy do
85
89
  it "returns a vector containing the size of each group" do
86
90
  expect(@dl_group.size).to eq(Daru::Vector.new([1,1,1,2,1,2], index: @dl_multi_index))
87
91
  end
92
+
93
+ it "returns an empty vector if given an empty dataframe" do
94
+ df = Daru::DataFrame.new({ a: [], b: [] })
95
+ expect(df.group_by(:a).size).to eq(Daru::Vector.new([]))
96
+ end
88
97
  end
89
98
 
90
99
  context "#get_group" do
@@ -336,4 +345,11 @@ describe Daru::Core::GroupBy do
336
345
  context "#[]" do
337
346
  pending
338
347
  end
348
+
349
+ context "#reduce" do
350
+ it "returns a vector that concatenates strings in a group" do
351
+ string_concat = lambda { |result, row| result += row[:b] }
352
+ expect(@sl_group.reduce('', &string_concat)).to eq(Daru::Vector.new(['onethreetwo', 'onetwotwoonethree'], index: @sl_index))
353
+ end
354
+ end
339
355
  end
@@ -15,51 +15,67 @@ describe Daru::DataFrame do
15
15
  :id => [1,1,1,1],
16
16
  :name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
17
17
  })
18
+ @empty = Daru::DataFrame.new({
19
+ :id => [],
20
+ :name => []
21
+ })
18
22
  end
19
23
 
20
24
  it "performs an inner join of two dataframes" do
21
25
  answer = Daru::DataFrame.new({
22
- :id_1 => [1,3],
23
- :name => ['Pirate', 'Ninja'],
24
- :id_2 => [2,4]
26
+ :id_1 => [3,1],
27
+ :name => ['Ninja', 'Pirate'],
28
+ :id_2 => [4,2]
25
29
  }, order: [:id_1, :name, :id_2])
26
30
  expect(@left.join(@right, how: :inner, on: [:name])).to eq(answer)
27
31
  end
28
32
 
29
33
  it "performs an inner join of two dataframes that has one to many mapping" do
30
34
  answer = Daru::DataFrame.new({
31
- :id => [1,1,1,1],
32
35
  :name_1 => ['Pirate', 'Pirate', 'Pirate', 'Pirate'],
36
+ :id => [1,1,1,1],
33
37
  :name_2 => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
34
- }, order: [:id, :name_1, :name_2])
38
+ }, order: [:name_1, :id, :name_2])
35
39
  expect(@left.join(@right_many, how: :inner, on: [:id])).to eq(answer)
36
40
  end
37
41
 
38
42
  it "performs a full outer join" do
39
43
  answer = Daru::DataFrame.new({
40
- :id_1 => [1,2,3,4,nil,nil],
41
- :name => ['Pirate', 'Monkey', 'Ninja', 'Spaghetti','Rutabaga', 'Darth Vader'],
42
- :id_2 => [2,nil,4,nil,1,3]
44
+ :id_1 => [nil,2,3,1,nil,4],
45
+ :name => ["Darth Vader", "Monkey", "Ninja", "Pirate", "Rutabaga", "Spaghetti"],
46
+ :id_2 => [3,nil,4,2,1,nil]
43
47
  }, order: [:id_1, :name, :id_2])
44
48
  expect(@left.join(@right, how: :outer, on: [:name])).to eq(answer)
45
49
  end
46
50
 
47
51
  it "performs a left outer join", focus: true do
48
52
  answer = Daru::DataFrame.new({
49
- :id_1 => [1,2,3,4],
50
- :name => ['Pirate', 'Monkey', 'Ninja', 'Spaghetti'],
51
- :id_2 => [2,nil,4,nil]
53
+ :id_1 => [2,3,1,4],
54
+ :name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
55
+ :id_2 => [nil,4,2,nil]
52
56
  }, order: [:id_1, :name, :id_2])
53
57
  expect(@left.join(@right, how: :left, on: [:name])).to eq(answer)
54
58
  end
55
59
 
60
+ it "performs a left join with an empty dataframe" do
61
+
62
+ answer = Daru::DataFrame.new({
63
+ :id_1 => [2,3,1,4],
64
+ :name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
65
+ :id_2 => [nil,nil,nil,nil]
66
+ }, order: [:id_1, :name, :id_2])
67
+
68
+ expect(@left.join(@empty, how: :left, on: [:name])).to eq(answer)
69
+ end
70
+
56
71
  it "performs a right outer join" do
57
72
  answer = Daru::DataFrame.new({
58
- :id_1 => [nil,1,nil,3],
59
- :name => ['Rutabaga','Pirate', 'Darth Vader', 'Ninja'],
60
- :id_2 => [1,2,3,4]
73
+ :id_1 => [nil,3,1,nil],
74
+ :name => ["Darth Vader", "Ninja", "Pirate", "Rutabaga"],
75
+ :id_2 => [3,4,2,1]
61
76
  }, order: [:id_1, :name, :id_2])
62
77
  expect(@left.join(@right, how: :right, on: [:name])).to eq(answer)
63
78
  end
79
+
64
80
  end
65
81
  end
@@ -133,14 +133,17 @@ describe Daru::DataFrame do
133
133
  end
134
134
 
135
135
  it "initializes from a Hash of Vectors" do
136
- df = Daru::DataFrame.new({b: [11,12,13,14,15].dv(:b, [:one, :two, :three, :four, :five]),
137
- a: [1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five])}, order: [:a, :b],
138
- index: [:one, :two, :three, :four, :five])
136
+ va = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 }, index: [:one, :two, :three, :four, :five])
137
+ vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
138
+
139
+ df = Daru::DataFrame.new({ b: vb, a: va }, order: [:a, :b], index: [:one, :two, :three, :four, :five])
139
140
 
140
141
  expect(df.index) .to eq(Daru::Index.new [:one, :two, :three, :four, :five])
141
142
  expect(df.vectors).to eq(Daru::Index.new [:a, :b])
142
143
  expect(df.a.class).to eq(Daru::Vector)
143
144
  expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
145
+ expect(df.a.metadata).to eq({ cdc_type: 2 })
146
+ expect(df.b.metadata).to eq({})
144
147
  end
145
148
 
146
149
  it "initializes from an Array of Hashes" do
@@ -419,6 +422,13 @@ describe Daru::DataFrame do
419
422
  expect(@df[:a, :b]).to eq(temp)
420
423
  end
421
424
 
425
+ it "returns a DataFrame with metadata" do
426
+ @df[:a].metadata = "alpha"
427
+ @df[:b].metadata = "beta"
428
+ subset_df = @df[:a, :b]
429
+ expect([:a, :b].map { |v| subset_df[v].metadata }).to eq(["alpha", "beta"])
430
+ end
431
+
422
432
  it "accesses vector with Integer index" do
423
433
  expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
424
434
  end
@@ -455,7 +465,7 @@ describe Daru::DataFrame do
455
465
  end
456
466
 
457
467
  it "returns a Vector if the last level of MultiIndex is tracked" do
458
- expect(@df_mi[:a, :one]).to eq(
468
+ expect(@df_mi[:a, :one, :bar]).to eq(
459
469
  Daru::Vector.new(@vector_arry1, index: @multi_index))
460
470
  end
461
471
  end
@@ -521,6 +531,31 @@ describe Daru::DataFrame do
521
531
  }.to raise_error
522
532
  end
523
533
 
534
+ it "assigns correct name given empty dataframe" do
535
+ df_empty = Daru::DataFrame.new({})
536
+ df_empty[:a] = 1..5
537
+ df_empty[:b] = 1..5
538
+
539
+ expect(df_empty[:a].name).to equal(:a)
540
+ expect(df_empty[:b].name).to equal(:b)
541
+ end
542
+
543
+ it "copies metadata when the target is a vector" do
544
+ vec = Daru::Vector.new(1.upto(@df.size), index: @df.index, metadata: { cdc_type: 2 })
545
+ @df[:woo] = vec.dup
546
+ expect(@df[:woo].metadata).to eq vec.metadata
547
+ end
548
+
549
+ it "doesn't delete metadata when the source is a dataframe with empty vectors" do
550
+ empty_df = Daru::DataFrame.new({
551
+ a: Daru::Vector.new([], metadata: 'alpha'),
552
+ b: Daru::Vector.new([], metadata: 'beta'),
553
+ })
554
+
555
+ empty_df[:c] = Daru::Vector.new(1.upto(3))
556
+ expect(empty_df[:a].metadata).to eq 'alpha'
557
+ end
558
+
524
559
  it "appends multiple vectors at a time" do
525
560
  # TODO
526
561
  end
@@ -561,6 +596,13 @@ describe Daru::DataFrame do
561
596
 
562
597
  expect(@df_mi).to eq(answer)
563
598
  end
599
+
600
+ it "assigns correct name given empty dataframe" do
601
+ df_empty = Daru::DataFrame.new([], index: @multi_index, order: @order_mi)
602
+ df_empty[:c, :one, :bar] = 1..12
603
+
604
+ expect(df_empty[:c, :one, :bar].name).to eq "conebar"
605
+ end
564
606
  end
565
607
  end
566
608
 
@@ -848,6 +890,16 @@ describe Daru::DataFrame do
848
890
  expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
849
891
  expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
850
892
  end
893
+
894
+ it "original dataframe remains unaffected when operations are applied
895
+ on cloned data frame" do
896
+ original = @data_frame.dup
897
+ cloned = @data_frame.clone
898
+ cloned.delete_vector :a
899
+
900
+ expect(@data_frame).to eq(original)
901
+ end
902
+
851
903
  end
852
904
 
853
905
  context "#clone_structure" do
@@ -923,6 +975,17 @@ describe Daru::DataFrame do
923
975
  @ans_rows = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
924
976
  c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
925
977
  index: [:one, :two, :three, :four, :five])
978
+
979
+ @data_frame_date_time = @data_frame.dup
980
+ @data_frame_date_time.index = Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
981
+
982
+ @ans_vector_date_time = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
983
+ c: [21,32,43,54,65]}, order: [:a, :b, :c],
984
+ index: Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
985
+
986
+ @ans_rows_date_time = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
987
+ c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
988
+ index: Daru::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
926
989
  end
927
990
 
928
991
  it "maps over the vectors of a DataFrame and returns a DataFrame" do
@@ -941,6 +1004,24 @@ describe Daru::DataFrame do
941
1004
 
942
1005
  expect(ret).to eq(@ans_rows)
943
1006
  end
1007
+
1008
+ it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
1009
+ ret = @data_frame_date_time.recode do |vector|
1010
+ vector.map! { |e| e += 10}
1011
+ end
1012
+
1013
+ expect(ret).to eq(@ans_vector_date_time)
1014
+ end
1015
+
1016
+ it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
1017
+ ret = @data_frame_date_time.recode(:row) do |row|
1018
+ expect(row.class).to eq(Daru::Vector)
1019
+ row.map! { |e| e*e }
1020
+ end
1021
+
1022
+ expect(ret).to eq(@ans_rows_date_time)
1023
+ end
1024
+
944
1025
  end
945
1026
 
946
1027
  context "#collect" do
@@ -1053,6 +1134,18 @@ describe Daru::DataFrame do
1053
1134
  end
1054
1135
  end
1055
1136
 
1137
+ context "#delete_vectors" do
1138
+ context Daru::Index do
1139
+ it "deletes the specified vectors" do
1140
+ @data_frame.delete_vectors :a, :b
1141
+
1142
+ expect(@data_frame).to eq(Daru::DataFrame.new({
1143
+ c: [11,22,33,44,55]}, order: [:c],
1144
+ index: [:one, :two, :three, :four, :five]))
1145
+ end
1146
+ end
1147
+ end
1148
+
1056
1149
  context "#delete_row" do
1057
1150
  it "deletes the specified row" do
1058
1151
  @data_frame.delete_row :three
@@ -1158,9 +1251,9 @@ describe Daru::DataFrame do
1158
1251
  end
1159
1252
  end
1160
1253
 
1161
- context "#to_hash" do
1254
+ context "#to_h" do
1162
1255
  it "converts to a hash" do
1163
- expect(@data_frame.to_hash).to eq(
1256
+ expect(@data_frame.to_h).to eq(
1164
1257
  {
1165
1258
  a: Daru::Vector.new([1,2,3,4,5],
1166
1259
  index: [:one, :two, :three, :four, :five]),
@@ -1187,15 +1280,16 @@ describe Daru::DataFrame do
1187
1280
  context Daru::Index do
1188
1281
  before :each do
1189
1282
  @df = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
1283
+ @df[:a].metadata = { cdc_type: 2 }
1190
1284
  end
1191
1285
 
1192
1286
  it "sorts according to given vector order (bang)" do
1193
- a_sorter = lambda { |a,b| a <=> b }
1287
+ a_sorter = lambda { |a| a }
1194
1288
  ans = @df.sort([:a], by: { a: a_sorter })
1195
1289
 
1196
1290
  expect(ans).to eq(
1197
- Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1,-2,3], c: ['aaa','aa','aaaaa','aaaaaa','a','aaaa']},
1198
- index: [2,1,4,5,0,3])
1291
+ Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
1292
+ index: [2,1,0,4,5,3])
1199
1293
  )
1200
1294
  expect(ans).to_not eq(@df)
1201
1295
  end
@@ -1208,6 +1302,12 @@ describe Daru::DataFrame do
1208
1302
  )
1209
1303
  expect(ans).to_not eq(@df)
1210
1304
  end
1305
+
1306
+ it "retains the vector metadata from the original dataframe" do
1307
+ ans = @df.sort([:a])
1308
+ expect(ans[:a].metadata).to eq({ cdc_type: 2 })
1309
+ end
1310
+
1211
1311
  end
1212
1312
 
1213
1313
  context Daru::MultiIndex do
@@ -1223,11 +1323,11 @@ describe Daru::DataFrame do
1223
1323
  end
1224
1324
 
1225
1325
  it "sorts according to given vector order (bang)" do
1226
- a_sorter = lambda { |a,b| a <=> b }
1326
+ a_sorter = lambda { |a| a }
1227
1327
 
1228
1328
  expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
1229
- Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,9,1,-2,3],
1230
- c: ['aaa','aa','aaaaa','aaaaaa','a','aaaa']}, index: [2,1,4,5,0,3])
1329
+ Daru::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
1330
+ c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
1231
1331
  )
1232
1332
  end
1233
1333
 
@@ -1260,6 +1360,68 @@ describe Daru::DataFrame do
1260
1360
  index: [7,3,4,6,5,0,1,2])
1261
1361
  )
1262
1362
  end
1363
+
1364
+ it "places nils at the beginning when sorting ascedingly" do
1365
+ d = Daru::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
1366
+
1367
+ expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
1368
+ Daru::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
1369
+ index: [7,3,0,1,2,6,5,4])
1370
+ )
1371
+ end
1372
+
1373
+ it "places nils at the beginning when sorting decendingly" do
1374
+ d = Daru::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
1375
+
1376
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
1377
+ Daru::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
1378
+ index: [7,3,4,6,5,0,1,2])
1379
+ )
1380
+ end
1381
+
1382
+ it "sorts vectors of non-numeric types with nils in ascending order" do
1383
+ non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
1384
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
1385
+
1386
+ expect(non_numeric.sort!([:c], ascending: [true])).to eq(
1387
+ Daru::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
1388
+ c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
1389
+ index: [2, 5, 0, 1, 3, 4])
1390
+ )
1391
+ end
1392
+
1393
+ it "sorts vectors of non-numeric types with nils in descending order" do
1394
+ non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
1395
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
1396
+
1397
+ expect(non_numeric.sort!([:c], ascending: [false])).to eq(
1398
+ Daru::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
1399
+ c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
1400
+ index: [2, 5, 4, 3, 0, 1])
1401
+ )
1402
+ end
1403
+
1404
+ it "sorts vectors with block provided and handle nils automatically" do
1405
+ non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
1406
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
1407
+
1408
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
1409
+ Daru::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
1410
+ c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
1411
+ index: [0, 3, 1, 2, 4, 5])
1412
+ )
1413
+ end
1414
+
1415
+ it "sorts vectors with block provided and nils handled manually" do
1416
+ non_numeric = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
1417
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
1418
+
1419
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
1420
+ Daru::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
1421
+ c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
1422
+ index: [1, 2, 4, 5, 0, 3])
1423
+ )
1424
+ end
1263
1425
  end
1264
1426
 
1265
1427
  context Daru::MultiIndex do
@@ -1316,6 +1478,37 @@ describe Daru::DataFrame do
1316
1478
  end
1317
1479
  end
1318
1480
 
1481
+ context "#rename_vectors" do
1482
+ before do
1483
+ @df = Daru::DataFrame.new({
1484
+ a: [1,2,3,4,5],
1485
+ b: [11,22,33,44,55],
1486
+ c: %w(a b c d e)
1487
+ })
1488
+ end
1489
+
1490
+ it "renames vectors using a hash map" do
1491
+ @df.rename_vectors :a => :alpha, :c => :gamma
1492
+ expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
1493
+ end
1494
+
1495
+ it "overwrites vectors if the new name already exists" do
1496
+ saved_vector = @df[:a].dup
1497
+
1498
+ @df.rename_vectors :a => :b
1499
+ expect(@df.vectors.to_a).to eq([:b, :c])
1500
+ expect(@df[:b]).to eq saved_vector
1501
+ end
1502
+
1503
+ it "makes no changes if the old and new names are the same" do
1504
+ saved_vector = @df[:a].dup
1505
+
1506
+ @df.rename_vectors :a => :a
1507
+ expect(@df.vectors.to_a).to eq([:a, :b, :c])
1508
+ expect(@df[:a]).to eq saved_vector
1509
+ end
1510
+ end
1511
+
1319
1512
  context "#reindex" do
1320
1513
  it "re indexes and aligns accordingly" do
1321
1514
  df = Daru::DataFrame.new({
@@ -1603,6 +1796,56 @@ describe Daru::DataFrame do
1603
1796
  @df.pivot_table
1604
1797
  }.to raise_error
1605
1798
  end
1799
+
1800
+ it "aggregates when nils are present in value vector" do
1801
+ df = Daru::DataFrame.new({
1802
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
1803
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
1804
+ c: ['small','large','large','small','small','large','small','large','small'],
1805
+ d: [1,2,2,3,3,4,5,6,7],
1806
+ e: [2,nil,4,6,6,8,10,12,nil]
1807
+ })
1808
+
1809
+ expect(df.pivot_table index: [:a]).to eq(
1810
+ Daru::DataFrame.new({
1811
+ d: [5.0, 2.2, 7],
1812
+ e: [10.0, 4.5, nil]
1813
+ }, index: Daru::Index.new(['bar', 'foo', 'ice'])))
1814
+ end
1815
+
1816
+ it "works when nils are present in value vector" do
1817
+ df = Daru::DataFrame.new({
1818
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
1819
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
1820
+ c: ['small','large','large','small','small','large','small','large','small'],
1821
+ d: [1,2,2,3,3,4,5,6,7],
1822
+ e: [2,nil,4,6,6,8,10,12,nil]
1823
+ })
1824
+
1825
+ agg_vectors = Daru::MultiIndex.from_tuples(
1826
+ [
1827
+ [:e, 'one'],
1828
+ [:e, 'two']
1829
+ ]
1830
+ )
1831
+
1832
+ agg_index = Daru::MultiIndex.from_tuples(
1833
+ [
1834
+ ['bar'],
1835
+ ['foo'],
1836
+ ['ice']
1837
+ ]
1838
+ )
1839
+
1840
+ expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
1841
+ Daru::DataFrame.new(
1842
+ [
1843
+ [9, 3, nil],
1844
+ [12, 6, nil]
1845
+ ], order: agg_vectors, index: agg_index
1846
+ )
1847
+ )
1848
+ end
1606
1849
  end
1607
1850
 
1608
1851
  context "#shape" do
@@ -2092,11 +2335,13 @@ describe Daru::DataFrame do
2092
2335
  context "#concat" do
2093
2336
  before do
2094
2337
  @df1 = Daru::DataFrame.new({
2095
- a: [1, 2, 3]
2338
+ a: [1, 2, 3],
2339
+ b: [1, 2, 3]
2096
2340
  })
2097
2341
 
2098
2342
  @df2 = Daru::DataFrame.new({
2099
- a: [4, 5, 6]
2343
+ a: [4, 5, 6],
2344
+ c: [4, 5, 6]
2100
2345
  })
2101
2346
  end
2102
2347
 
@@ -2117,5 +2362,14 @@ describe Daru::DataFrame do
2117
2362
  expect(df_concat[:a].to_a).to eq df1_a + df2_a
2118
2363
  end
2119
2364
 
2365
+ it 'fills in missing vectors with nils' do
2366
+ df1_b = @df1[:b].to_a.dup
2367
+ df2_c = @df2[:c].to_a.dup
2368
+
2369
+ df_concat = @df1.concat @df2
2370
+ expect(df_concat[:b].to_a).to eq df1_b + [nil] * @df2.size
2371
+ expect(df_concat[:c].to_a).to eq [nil] * @df1.size + df2_c
2372
+ end
2373
+
2120
2374
  end
2121
2375
  end if mri?