daru 0.1.5 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +5 -5
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +21 -7
  5. data/.travis.yml +10 -5
  6. data/CONTRIBUTING.md +15 -10
  7. data/History.md +124 -2
  8. data/README.md +37 -9
  9. data/ReleasePolicy.md +20 -0
  10. data/benchmarks/db_loading.rb +34 -0
  11. data/benchmarks/statistics.rb +6 -6
  12. data/benchmarks/where_clause.rb +1 -1
  13. data/benchmarks/where_vs_filter.rb +1 -1
  14. data/daru.gemspec +17 -41
  15. data/lib/daru.rb +10 -13
  16. data/lib/daru/accessors/gsl_wrapper.rb +1 -1
  17. data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
  18. data/lib/daru/category.rb +29 -15
  19. data/lib/daru/configuration.rb +34 -0
  20. data/lib/daru/core/group_by.rb +158 -77
  21. data/lib/daru/core/merge.rb +12 -3
  22. data/lib/daru/core/query.rb +20 -4
  23. data/lib/daru/dataframe.rb +692 -118
  24. data/lib/daru/date_time/index.rb +14 -11
  25. data/lib/daru/date_time/offsets.rb +9 -1
  26. data/lib/daru/extensions/which_dsl.rb +55 -0
  27. data/lib/daru/formatters/table.rb +3 -5
  28. data/lib/daru/index/categorical_index.rb +4 -4
  29. data/lib/daru/index/index.rb +131 -42
  30. data/lib/daru/index/multi_index.rb +118 -10
  31. data/lib/daru/io/csv/converters.rb +21 -0
  32. data/lib/daru/io/io.rb +105 -33
  33. data/lib/daru/io/sql_data_source.rb +10 -0
  34. data/lib/daru/iruby/templates/dataframe.html.erb +4 -51
  35. data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
  36. data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  37. data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  38. data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
  39. data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
  40. data/lib/daru/iruby/templates/vector.html.erb +3 -25
  41. data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
  42. data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
  43. data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
  44. data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
  45. data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
  46. data/lib/daru/maths/arithmetic/vector.rb +38 -2
  47. data/lib/daru/maths/statistics/dataframe.rb +28 -30
  48. data/lib/daru/maths/statistics/vector.rb +295 -41
  49. data/lib/daru/plotting/gruff/dataframe.rb +13 -15
  50. data/lib/daru/plotting/nyaplot/category.rb +1 -1
  51. data/lib/daru/plotting/nyaplot/dataframe.rb +15 -4
  52. data/lib/daru/plotting/nyaplot/vector.rb +1 -2
  53. data/lib/daru/vector.rb +308 -96
  54. data/lib/daru/version.rb +1 -1
  55. data/profile/vector_new.rb +9 -0
  56. data/spec/accessors/gsl_wrapper_spec.rb +38 -35
  57. data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
  58. data/spec/category_spec.rb +24 -20
  59. data/spec/core/group_by_spec.rb +238 -4
  60. data/spec/core/merge_spec.rb +1 -1
  61. data/spec/core/query_spec.rb +65 -50
  62. data/spec/daru_spec.rb +22 -0
  63. data/spec/dataframe_spec.rb +473 -16
  64. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  65. data/spec/date_time/index_spec.rb +34 -16
  66. data/spec/date_time/offsets_spec.rb +14 -0
  67. data/spec/extensions/rserve_spec.rb +1 -1
  68. data/spec/extensions/which_dsl_spec.rb +38 -0
  69. data/spec/fixtures/boolean_converter_test.csv +5 -0
  70. data/spec/fixtures/duplicates.csv +32 -0
  71. data/spec/fixtures/eciresults.html +394 -0
  72. data/spec/fixtures/empty_rows_test.csv +17 -0
  73. data/spec/fixtures/macau.html +3691 -0
  74. data/spec/fixtures/macd_data.csv +150 -0
  75. data/spec/fixtures/matrix_test.csv +55 -55
  76. data/spec/fixtures/moneycontrol.html +6812 -0
  77. data/spec/fixtures/string_converter_test.csv +5 -0
  78. data/spec/fixtures/test_xls.xls +0 -0
  79. data/spec/fixtures/test_xls_2.xls +0 -0
  80. data/spec/fixtures/url_test.txt~ +0 -0
  81. data/spec/fixtures/valid_markup.html +62 -0
  82. data/spec/fixtures/wiki_climate.html +1243 -0
  83. data/spec/fixtures/wiki_table_info.html +631 -0
  84. data/spec/formatters/table_formatter_spec.rb +29 -0
  85. data/spec/index/categorical_index_spec.rb +33 -33
  86. data/spec/index/index_spec.rb +160 -41
  87. data/spec/index/multi_index_spec.rb +143 -33
  88. data/spec/io/io_spec.rb +246 -2
  89. data/spec/io/sql_data_source_spec.rb +31 -41
  90. data/spec/iruby/dataframe_spec.rb +17 -19
  91. data/spec/iruby/vector_spec.rb +26 -28
  92. data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
  93. data/spec/maths/arithmetic/vector_spec.rb +18 -0
  94. data/spec/maths/statistics/vector_spec.rb +153 -15
  95. data/spec/plotting/gruff/category_spec.rb +3 -3
  96. data/spec/plotting/gruff/dataframe_spec.rb +14 -4
  97. data/spec/plotting/gruff/vector_spec.rb +9 -9
  98. data/spec/plotting/nyaplot/category_spec.rb +5 -9
  99. data/spec/plotting/nyaplot/dataframe_spec.rb +95 -47
  100. data/spec/plotting/nyaplot/vector_spec.rb +5 -11
  101. data/spec/shared/vector_display_spec.rb +12 -14
  102. data/spec/spec_helper.rb +30 -7
  103. data/spec/support/matchers.rb +5 -0
  104. data/spec/vector_spec.rb +306 -72
  105. metadata +96 -55
  106. data/spec/fixtures/stock_data.csv +0 -500
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = '0.1.5'.freeze
2
+ VERSION = '0.3'.freeze
3
3
  end
@@ -0,0 +1,9 @@
1
+ require_relative '_base'
2
+
3
+ n = 40_0000
4
+ idx = (1..n).to_a.map(&:to_s)
5
+
6
+
7
+ __profile__ do
8
+ Daru::Vector.new(1..n, index: idx)
9
+ end
@@ -1,50 +1,53 @@
1
- describe Daru::Accessors::GSLWrapper do
2
- before :each do
3
- @stub_context = Object.new
4
- @gsl_wrapper = Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6], @stub_context)
5
- end
1
+ if Daru.has_gsl?
2
+ describe Daru::Accessors::GSLWrapper do
3
+ before :each do
4
+ @stub_context = Object.new
5
+ @gsl_wrapper = Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6], @stub_context)
6
+ end
6
7
 
7
- context ".new" do
8
- it "actually creates a GSL Vector" do
9
- expect(@gsl_wrapper.data.class).to eq(GSL::Vector)
8
+ context ".new" do
9
+ it "actually creates a GSL Vector" do
10
+ expect(@gsl_wrapper.data.class).to eq(GSL::Vector)
11
+ end
10
12
  end
11
- end
12
13
 
13
- context "#mean" do
14
- it "computes mean" do
15
- expect(@gsl_wrapper.mean).to eq(3.5)
14
+ context "#mean" do
15
+ it "computes mean" do
16
+ expect(@gsl_wrapper.mean).to eq(3.5)
17
+ end
16
18
  end
17
- end
18
19
 
19
- context "#map!" do
20
- it "destructively maps" do
21
- expect(@gsl_wrapper.map! { |a| a += 1 }).to eq(
22
- Daru::Accessors::GSLWrapper.new([2,3,4,5,6,7], @stub_context)
23
- )
20
+ context "#map!" do
21
+ it "destructively maps" do
22
+ expect(@gsl_wrapper.map! { |a| a += 1 }).to eq(
23
+ Daru::Accessors::GSLWrapper.new([2,3,4,5,6,7], @stub_context)
24
+ )
25
+ end
24
26
  end
25
- end
26
27
 
27
- context "#delete_at" do
28
- it "deletes at key" do
29
- expect(@gsl_wrapper.delete_at(2)).to eq(3)
28
+ context "#delete_at" do
29
+ it "deletes at key" do
30
+ expect(@gsl_wrapper.delete_at(2)).to eq(3)
30
31
 
31
- expect(@gsl_wrapper).to eq(
32
- Daru::Accessors::GSLWrapper.new([1,2,4,5,6], @stub_context)
33
- )
32
+ expect(@gsl_wrapper).to eq(
33
+ Daru::Accessors::GSLWrapper.new([1,2,4,5,6], @stub_context)
34
+ )
35
+ end
34
36
  end
35
- end
36
37
 
37
- context "#index" do
38
- it "returns index of value" do
39
- expect(@gsl_wrapper.index(3)).to eq(2)
38
+ context "#index" do
39
+ it "returns index of value" do
40
+ expect(@gsl_wrapper.index(3)).to eq(2)
41
+ end
40
42
  end
41
- end
42
43
 
43
- context "#push" do
44
- it "appends element" do
45
- expect(@gsl_wrapper.push(15)).to eq(
46
- Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6,15], @stub_context)
47
- )
44
+ context "#push" do
45
+ it "appends element" do
46
+ expect(@gsl_wrapper.push(15)).to eq(
47
+ Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6,15], @stub_context)
48
+ )
49
+ end
48
50
  end
49
51
  end
50
52
  end
53
+
@@ -1,32 +1,35 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
- describe Daru::Accessors::NMatrixWrapper do
4
- before :each do
5
- stub_context = Object.new
6
- @nm_wrapper = Daru::Accessors::NMatrixWrapper.new([1,2,3,4,5], stub_context, :float32)
7
- end
3
+ if Daru.has_nmatrix?
4
+ describe Daru::Accessors::NMatrixWrapper do
5
+ before :each do
6
+ stub_context = Object.new
7
+ @nm_wrapper = Daru::Accessors::NMatrixWrapper.new([1,2,3,4,5], stub_context, :float32)
8
+ end
8
9
 
9
- it "checks for actual NMatrix creation" do
10
- expect(@nm_wrapper.data.class).to eq(NMatrix)
11
- end
10
+ it "checks for actual NMatrix creation" do
11
+ expect(@nm_wrapper.data.class).to eq(NMatrix)
12
+ end
12
13
 
13
- it "checks the actual size of the NMatrix object" do
14
- expect(@nm_wrapper.data.size).to eq(10)
15
- end
14
+ it "checks the actual size of the NMatrix object" do
15
+ expect(@nm_wrapper.data.size).to eq(10)
16
+ end
16
17
 
17
- it "checks that @size is the number of elements in the vector" do
18
- expect(@nm_wrapper.size).to eq(5)
19
- end
18
+ it "checks that @size is the number of elements in the vector" do
19
+ expect(@nm_wrapper.size).to eq(5)
20
+ end
20
21
 
21
- it "checks for underlying NMatrix data type" do
22
- expect(@nm_wrapper.data.dtype).to eq(:float32)
23
- end
22
+ it "checks for underlying NMatrix data type" do
23
+ expect(@nm_wrapper.data.dtype).to eq(:float32)
24
+ end
24
25
 
25
- it "resizes" do
26
- @nm_wrapper.resize(100)
26
+ it "resizes" do
27
+ @nm_wrapper.resize(100)
27
28
 
28
- expect(@nm_wrapper.size).to eq(5)
29
- expect(@nm_wrapper.data.size).to eq(100)
30
- expect(@nm_wrapper.data).to eq(NMatrix.new [100], [1,2,3,4,5])
29
+ expect(@nm_wrapper.size).to eq(5)
30
+ expect(@nm_wrapper.data.size).to eq(100)
31
+ expect(@nm_wrapper.data).to eq(NMatrix.new [100], [1,2,3,4,5])
32
+ end
31
33
  end
32
34
  end
35
+
@@ -383,13 +383,13 @@ describe Daru::Vector, "categorical" do
383
383
  context "#rename_categories" do
384
384
  context 'rename base category' do
385
385
  let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category,
386
- categories: [:a, :x, :y, :c, :b, 1]}
386
+ categories: [:a, :x, :y, :c, :b, 1]}
387
387
  subject { dv.rename_categories :a => 1, 1 => 2 }
388
388
 
389
389
  it { is_expected.to be_a Daru::Vector }
390
390
  its(:to_a) { is_expected.to eq [1, 2, 1, 2, :c] }
391
391
  its(:categories) { is_expected.to eq [:x, :y, :c, :b, 1, 2] }
392
- its(:base_category) { is_expected.to eq 1 }
392
+ its(:base_category) { is_expected.to eq 1 }
393
393
  end
394
394
 
395
395
  context 'rename non-base category' do
@@ -1398,7 +1398,7 @@ describe Daru::Vector, "categorical" do
1398
1398
  index: 11..18, type: :category }
1399
1399
  context 'reject only nils' do
1400
1400
  subject { dv.reject_values nil }
1401
-
1401
+
1402
1402
  it { is_expected.to be_a Daru::Vector }
1403
1403
  its(:type) { is_expected.to eq :category }
1404
1404
  its(:to_a) { is_expected.to eq [1, 3, :a, Float::NAN, Float::NAN, 1] }
@@ -1407,7 +1407,7 @@ describe Daru::Vector, "categorical" do
1407
1407
 
1408
1408
  context 'reject only float::NAN' do
1409
1409
  subject { dv.reject_values Float::NAN }
1410
-
1410
+
1411
1411
  it { is_expected.to be_a Daru::Vector }
1412
1412
  its(:type) { is_expected.to eq :category }
1413
1413
  its(:to_a) { is_expected.to eq [1, nil, 3, :a, nil, 1] }
@@ -1422,7 +1422,7 @@ describe Daru::Vector, "categorical" do
1422
1422
  its(:to_a) { is_expected.to eq [1, 3, :a, 1] }
1423
1423
  its(:'index.to_a') { is_expected.to eq [11, 13, 14, 18] }
1424
1424
  end
1425
-
1425
+
1426
1426
  context 'reject any other value' do
1427
1427
  subject { dv.reject_values 1, 3, 20 }
1428
1428
 
@@ -1434,19 +1434,19 @@ describe Daru::Vector, "categorical" do
1434
1434
 
1435
1435
  context 'when resultant vector has only one value' do
1436
1436
  subject { dv.reject_values 1, :a, nil, Float::NAN }
1437
-
1437
+
1438
1438
  it { is_expected.to be_a Daru::Vector }
1439
1439
  its(:to_a) { is_expected.to eq [3] }
1440
1440
  its(:'index.to_a') { is_expected.to eq [13] }
1441
1441
  end
1442
-
1442
+
1443
1443
  context 'when resultant vector has no value' do
1444
1444
  subject { dv.reject_values 1, 3, :a, nil, Float::NAN, 5 }
1445
-
1445
+
1446
1446
  it { is_expected.to be_a Daru::Vector }
1447
1447
  its(:to_a) { is_expected.to eq [] }
1448
1448
  its(:'index.to_a') { is_expected.to eq [] }
1449
- end
1449
+ end
1450
1450
  end
1451
1451
 
1452
1452
  context '#include_values?' do
@@ -1484,27 +1484,27 @@ describe Daru::Vector, "categorical" do
1484
1484
  type: :category}
1485
1485
  it { expect(dv.include_values? nil, Float::NAN).to eq true }
1486
1486
  end
1487
-
1487
+
1488
1488
  context 'true with only Float::NAN' do
1489
1489
  let(:dv) { Daru::Vector.new [1, nil, 2, 3],
1490
1490
  type: :category}
1491
1491
  it { expect(dv.include_values? nil, Float::NAN).to eq true }
1492
1492
  end
1493
-
1493
+
1494
1494
  context 'false' do
1495
1495
  let(:dv) { Daru::Vector.new [1, 2, 3],
1496
1496
  type: :category}
1497
1497
  it { expect(dv.include_values? nil, Float::NAN).to eq false }
1498
1498
  end
1499
1499
  end
1500
-
1500
+
1501
1501
  context 'any other value' do
1502
1502
  context 'true' do
1503
1503
  let(:dv) { Daru::Vector.new [1, 2, 3, 4, nil],
1504
1504
  type: :category }
1505
1505
  it { expect(dv.include_values? 1, 2, 3, 5).to eq true }
1506
1506
  end
1507
-
1507
+
1508
1508
  context 'false' do
1509
1509
  let(:dv) { Daru::Vector.new [1, 2, 3, 4, nil],
1510
1510
  type: :category }
@@ -1525,12 +1525,12 @@ describe Daru::Vector, "categorical" do
1525
1525
  context Daru::Index do
1526
1526
  let(:dv) { Daru::Vector.new [1, 2, 1, 2, 3, nil, nil, Float::NAN],
1527
1527
  index: 11..18, type: :category }
1528
-
1528
+
1529
1529
  subject { dv.indexes 1, 2, nil, Float::NAN }
1530
1530
  it { is_expected.to be_a Array }
1531
1531
  it { is_expected.to eq [11, 12, 13, 14, 16, 17, 18] }
1532
1532
  end
1533
-
1533
+
1534
1534
  context Daru::MultiIndex do
1535
1535
  let(:mi) do
1536
1536
  Daru::MultiIndex.from_tuples([
@@ -1546,7 +1546,7 @@ describe Daru::Vector, "categorical" do
1546
1546
  end
1547
1547
  let(:dv) { Daru::Vector.new [1, 2, 1, 2, 3, nil, nil, Float::NAN],
1548
1548
  index: mi, type: :category }
1549
-
1549
+
1550
1550
  subject { dv.indexes 1, 2, Float::NAN }
1551
1551
  it { is_expected.to be_a Array }
1552
1552
  it { is_expected.to eq(
@@ -1559,7 +1559,7 @@ describe Daru::Vector, "categorical" do
1559
1559
  ]) }
1560
1560
  end
1561
1561
  end
1562
-
1562
+
1563
1563
  context '#replace_values' do
1564
1564
  subject do
1565
1565
  Daru::Vector.new(
@@ -1573,14 +1573,14 @@ describe Daru::Vector, "categorical" do
1573
1573
  its(:type) { is_expected.to eq :category }
1574
1574
  its(:to_a) { is_expected.to eq [1, 2, 1, 4, 10, 10, 10, 10] }
1575
1575
  end
1576
-
1576
+
1577
1577
  context 'replace arbitrary values' do
1578
1578
  before { subject.replace_values [1, 2], 10 }
1579
1579
  its(:type) { is_expected.to eq :category }
1580
1580
  its(:to_a) { is_expected.to eq(
1581
1581
  [10, 10, 10, 4, nil, Float::NAN, nil, Float::NAN]) }
1582
1582
  end
1583
-
1583
+
1584
1584
  context 'works for single value' do
1585
1585
  before { subject.replace_values nil, 10 }
1586
1586
  its(:type) { is_expected.to eq :category }
@@ -1596,7 +1596,7 @@ describe Daru::DataFrame, "categorical" do
1596
1596
  Daru::DataFrame.new({
1597
1597
  a: [1, 2, 3, 4, 5],
1598
1598
  b: ['first', 'second', 'first', 'second', 'third'],
1599
- c: ['a', 'b', 'a', 'b', 'c']
1599
+ c: ['a', 'b', 'a', 'b', nil]
1600
1600
  })
1601
1601
  end
1602
1602
  before { df.to_category :b, :c }
@@ -1605,6 +1605,10 @@ describe Daru::DataFrame, "categorical" do
1605
1605
  it { is_expected.to be_a Daru::DataFrame }
1606
1606
  its(:'b.type') { is_expected.to eq :category }
1607
1607
  its(:'c.type') { is_expected.to eq :category }
1608
+ its(:'a.count') { is_expected.to eq 5 }
1609
+ its(:'c.count') { is_expected.to eq 5 }
1610
+ it { expect(df.c.count('a')).to eq 2 }
1611
+ it { expect(df.c.count(nil)).to eq 1 }
1608
1612
  end
1609
1613
 
1610
1614
  context "#interact_code" do
@@ -29,6 +29,7 @@ describe Daru::Core::GroupBy do
29
29
  ['foo', 'three', 8],
30
30
  ['foo', 'two' , 3]
31
31
  ])
32
+
32
33
  end
33
34
 
34
35
  context 'with nil values' do
@@ -46,6 +47,34 @@ describe Daru::Core::GroupBy do
46
47
  end
47
48
 
48
49
  context "#initialize" do
50
+ let(:df_emp) { Daru::DataFrame.new(
51
+ employee: %w[John Jane Mark John Jane Mark],
52
+ month: %w[June June June July July July],
53
+ salary: [1000, 500, 700, 1200, 600, 600]
54
+ ) }
55
+ let(:employee_grp) { df_emp.group_by(:employee).df }
56
+ let(:mi_single) { Daru::MultiIndex.from_tuples([
57
+ ['Jane', 1], ['Jane', 4], ['John', 0],
58
+ ['John', 3], ['Mark', 2], ['Mark', 5]
59
+ ]
60
+ )}
61
+
62
+ let(:emp_month_grp) { df_emp.group_by([:employee, :month]).df }
63
+ let(:mi_double) { Daru::MultiIndex.from_tuples([
64
+ ['Jane', 'July', 4], ['Jane', 'June', 1], ['John', 'July', 3],
65
+ ['John', 'June', 0], ['Mark', 'July', 5], ['Mark', 'June', 2]
66
+ ]
67
+ )}
68
+
69
+ let(:emp_month_salary_grp) {
70
+ df_emp.group_by([:employee, :month, :salary]).df }
71
+ let(:mi_triple) { Daru::MultiIndex.from_tuples([
72
+ ['Jane', 'July', 600, 4], ['Jane', 'June', 500, 1],
73
+ ['John', 'July', 1200, 3], ['John', 'June', 1000, 0],
74
+ ['Mark', 'July', 600, 5], ['Mark', 'June', 700, 2]
75
+ ]
76
+ )}
77
+
49
78
  it "groups by a single tuple" do
50
79
  expect(@sl_group.groups).to eq({
51
80
  ['bar'] => [1,3,5],
@@ -53,6 +82,24 @@ describe Daru::Core::GroupBy do
53
82
  })
54
83
  end
55
84
 
85
+ it "returns dataframe with MultiIndex, groups by single layer hierarchy" do
86
+ expect(employee_grp).to eq(Daru::DataFrame.new({
87
+ month: ["June", "July", "June", "July", "June", "July"],
88
+ salary: [500, 600, 1000, 1200, 700, 600]
89
+ }, index: mi_single))
90
+ end
91
+
92
+ it "returns dataframe with MultiIndex, groups by double layer hierarchy" do
93
+ expect(emp_month_grp).to eq(Daru::DataFrame.new({
94
+ salary: [600, 500, 1200, 1000, 600, 700]
95
+ }, index: mi_double))
96
+ end
97
+
98
+ it "returns dataframe with MultiIndex, groups by triple layer hierarchy" do
99
+ expect(emp_month_salary_grp).to eq(Daru::DataFrame.new({
100
+ }, index: mi_triple))
101
+ end
102
+
56
103
  it "groups by a double layer hierarchy" do
57
104
  expect(@dl_group.groups).to eq({
58
105
  ['foo', 'one'] => [0,6],
@@ -154,6 +201,22 @@ describe Daru::Core::GroupBy do
154
201
  end
155
202
  end
156
203
 
204
+ context '#each_group without block' do
205
+ it 'enumerates groups' do
206
+ enum = @dl_group.each_group
207
+
208
+ expect(enum.count).to eq 6
209
+ expect(enum).to all be_a(Daru::DataFrame)
210
+ expect(enum.to_a.last).to eq(Daru::DataFrame.new({
211
+ a: ['foo', 'foo'],
212
+ b: ['two', 'two'],
213
+ c: [3, 3],
214
+ d: [33, 55]
215
+ }, index: [2, 4]
216
+ ))
217
+ end
218
+ end
219
+
157
220
  context '#first' do
158
221
  it 'gets the first row from each group' do
159
222
  expect(@dl_group.first).to eq(Daru::DataFrame.new({
@@ -176,10 +239,6 @@ describe Daru::Core::GroupBy do
176
239
  end
177
240
  end
178
241
 
179
- context "#aggregate" do
180
- pending
181
- end
182
-
183
242
  context "#mean" do
184
243
  it "computes mean of the numeric columns of a single layer group" do
185
244
  expect(@sl_group.mean).to eq(Daru::DataFrame.new({
@@ -418,4 +477,179 @@ describe Daru::Core::GroupBy do
418
477
 
419
478
  it { is_expected.to eq Daru::DataFrame.new({num: [6]}, index: ['a']) }
420
479
  end
480
+
481
+ context 'when dataframe tuples contain nils in mismatching positions' do
482
+
483
+ let(:df){
484
+ Daru::DataFrame.new(
485
+ {
486
+ 'string1' => ["Color", "Color", "Color", "Color", nil, "Color", "Color", " Black and White"],
487
+ 'string2' => ["Test", "test2", nil, "test3", nil, "test", "test3", "test5"],
488
+ 'num' => [1, nil, 3, 4, 5, 6, 7, nil]
489
+ }
490
+ )
491
+ }
492
+
493
+ it 'groups by without errors' do
494
+ expect { df.group_by(df.vectors.map(&:to_s)) }.to_not raise_error(ArgumentError)
495
+ end
496
+ end
497
+
498
+ context '#aggregate' do
499
+ let(:dataframe) { Daru::DataFrame.new({
500
+ employee: %w[John Jane Mark John Jane Mark],
501
+ month: %w[June June June July July July],
502
+ salary: [1000, 500, 700, 1200, 600, 600]})
503
+ }
504
+ context 'group and aggregate sum for particular single vector' do
505
+ subject { dataframe.group_by([:employee]).aggregate(salary: :sum) }
506
+
507
+ it { is_expected.to eq Daru::DataFrame.new({
508
+ salary: [1100, 2200, 1300]},
509
+ index: ['Jane', 'John', 'Mark'])
510
+ }
511
+ end
512
+
513
+ context 'group and aggregate sum and lambda function for vectors' do
514
+ subject { dataframe.group_by([:employee]).aggregate(
515
+ salary: :sum,
516
+ month: ->(vec) { vec.to_a.join('/') }) }
517
+
518
+ it { is_expected.to eq Daru::DataFrame.new({
519
+ salary: [1100, 2200, 1300],
520
+ month: ['June/July', 'June/July', 'June/July']},
521
+ index: ['Jane', 'John', 'Mark'],
522
+ order: [:salary, :month])
523
+ }
524
+ end
525
+
526
+ context 'group and aggregate sum and lambda functions on dataframe' do
527
+ subject { dataframe.group_by([:employee]).aggregate(
528
+ salary: :sum,
529
+ month: ->(vec) { vec.to_a.join('/') },
530
+ mean_salary: ->(df) { df.salary.mean },
531
+ periods: ->(df) { df.size }
532
+ )}
533
+
534
+ it { is_expected.to eq Daru::DataFrame.new({
535
+ salary: [1100, 2200, 1300],
536
+ month: ['June/July', 'June/July', 'June/July'],
537
+ mean_salary: [550.0, 1100.0, 650.0],
538
+ periods: [2, 2, 2]},
539
+ index: ['Jane', 'John', 'Mark'], order: [:salary, :month,
540
+ :mean_salary, :periods]) }
541
+ end
542
+
543
+ context 'group_by and aggregate on mixed MultiIndex' do
544
+ let(:df) { Daru::DataFrame.new(
545
+ name: ['Ram','Krishna','Ram','Krishna','Krishna'],
546
+ visited: [
547
+ 'Hyderabad', 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
548
+ )
549
+ }
550
+ let(:df_mixed) { Daru::DataFrame.new(
551
+ name: ['Krishna','Ram','Krishna','Krishna'],
552
+ visited: [
553
+ 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
554
+ )
555
+ }
556
+ it 'group_by' do
557
+ expect(df.group_by(:name).df).to eq(
558
+ Daru::DataFrame.new({
559
+ visited: ['Delhi', 'Raipur', 'Banglore', 'Hyderabad', 'Mumbai']},
560
+ index: Daru::MultiIndex.from_tuples(
561
+ [['Krishna', 1], ['Krishna', 3], ['Krishna', 4],
562
+ ['Ram', 0], ['Ram', 2]]
563
+ )
564
+ )
565
+ )
566
+ end
567
+
568
+ it 'group_by and aggregate' do
569
+ expect(
570
+ df.group_by(:name).aggregate(
571
+ visited: -> (vec){vec.to_a.join(',')})).to eq(
572
+ Daru::DataFrame.new({
573
+ visited: ['Delhi,Raipur,Banglore', 'Hyderabad,Mumbai']},
574
+ index: ['Krishna', 'Ram']
575
+ )
576
+ )
577
+ end
578
+
579
+ it 'group_by and aggregate when anyone index is not multiple times' do
580
+ expect(
581
+ df_mixed.group_by(:name).aggregate(
582
+ visited: -> (vec){vec.to_a.join(',')})).to eq(
583
+ Daru::DataFrame.new({
584
+ visited: ['Delhi,Raipur,Banglore', 'Mumbai']},
585
+ index: ['Krishna', 'Ram']
586
+ )
587
+ )
588
+ end
589
+ end
590
+
591
+ let(:spending_df) {
592
+ Daru::DataFrame.rows([
593
+ [2010, 'dev', 50, 1],
594
+ [2010, 'dev', 150, 1],
595
+ [2010, 'dev', 200, 1],
596
+ [2011, 'dev', 50, 1],
597
+ [2012, 'dev', 150, 1],
598
+
599
+ [2011, 'office', 300, 1],
600
+
601
+ [2010, 'market', 50, 1],
602
+ [2011, 'market', 500, 1],
603
+ [2012, 'market', 500, 1],
604
+ [2012, 'market', 300, 1],
605
+
606
+ [2012, 'R&D', 10, 1],],
607
+ order: [:year, :category, :spending, :nb_spending])
608
+ }
609
+ let(:multi_index_year_category) {
610
+ Daru::MultiIndex.from_tuples([
611
+ [2010, "dev"], [2010, "market"],
612
+ [2011, "dev"], [2011, "market"], [2011, "office"],
613
+ [2012, "R&D"], [2012, "dev"], [2012, "market"]])
614
+ }
615
+
616
+ context 'group_by and aggregate on multiple elements' do
617
+ it 'does aggregate' do
618
+ expect(spending_df.group_by([:year, :category]).aggregate(spending: :sum)).to eq(
619
+ Daru::DataFrame.new({spending: [400, 50, 50, 500, 300, 10, 150, 800]}, index: multi_index_year_category))
620
+ end
621
+
622
+ it 'works as older methods' do
623
+ older_way = spending_df.group_by([:year, :category]).sum
624
+
625
+ newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
626
+ expect(newer_way).to eq(older_way)
627
+
628
+ contrived_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending_lambda: ->(df) { df[:nb_spending].sum })
629
+ contrived_way.rename_vectors(nb_spending_lambda: :nb_spending)
630
+ expect(contrived_way).to eq(older_way)
631
+ end
632
+
633
+ context 'can aggregate on MultiIndex' do
634
+ let(:multi_indexed_aggregated_df) { spending_df.group_by([:year, :category]).aggregate(spending: :sum) }
635
+ let(:index_year) { Daru::Index.new([2010, 2011, 2012]) }
636
+ let(:index_category) { Daru::Index.new(["dev", "market", "office", "R&D"]) }
637
+
638
+ it 'aggregates by default on the last layer of MultiIndex' do
639
+ expect(multi_indexed_aggregated_df.aggregate(spending: :sum)).to eq(
640
+ Daru::DataFrame.new({spending: [450, 850, 960]}, index: index_year))
641
+ end
642
+
643
+ it 'can aggregate on the first layer of MultiIndex' do
644
+ expect(multi_indexed_aggregated_df.aggregate({spending: :sum},0)).to eq(
645
+ Daru::DataFrame.new({spending: [600, 1350, 300, 10]}, index: index_category))
646
+ end
647
+
648
+ it 'does coercion: when one layer is remaining, MultiIndex is coerced in Index that does not aggregate anymore' do
649
+ df_with_simple_index = multi_indexed_aggregated_df.aggregate(spending: :sum)
650
+ expect(df_with_simple_index.aggregate(spending: :sum)).to eq(df_with_simple_index)
651
+ end
652
+ end
653
+ end
654
+ end
421
655
  end