daru 0.1.5 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +21 -7
- data/.travis.yml +10 -5
- data/CONTRIBUTING.md +15 -10
- data/History.md +124 -2
- data/README.md +37 -9
- data/ReleasePolicy.md +20 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/statistics.rb +6 -6
- data/benchmarks/where_clause.rb +1 -1
- data/benchmarks/where_vs_filter.rb +1 -1
- data/daru.gemspec +17 -41
- data/lib/daru.rb +10 -13
- data/lib/daru/accessors/gsl_wrapper.rb +1 -1
- data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
- data/lib/daru/category.rb +29 -15
- data/lib/daru/configuration.rb +34 -0
- data/lib/daru/core/group_by.rb +158 -77
- data/lib/daru/core/merge.rb +12 -3
- data/lib/daru/core/query.rb +20 -4
- data/lib/daru/dataframe.rb +692 -118
- data/lib/daru/date_time/index.rb +14 -11
- data/lib/daru/date_time/offsets.rb +9 -1
- data/lib/daru/extensions/which_dsl.rb +55 -0
- data/lib/daru/formatters/table.rb +3 -5
- data/lib/daru/index/categorical_index.rb +4 -4
- data/lib/daru/index/index.rb +131 -42
- data/lib/daru/index/multi_index.rb +118 -10
- data/lib/daru/io/csv/converters.rb +21 -0
- data/lib/daru/io/io.rb +105 -33
- data/lib/daru/io/sql_data_source.rb +10 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +4 -51
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
- data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/vector.html.erb +3 -25
- data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
- data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru/maths/arithmetic/vector.rb +38 -2
- data/lib/daru/maths/statistics/dataframe.rb +28 -30
- data/lib/daru/maths/statistics/vector.rb +295 -41
- data/lib/daru/plotting/gruff/dataframe.rb +13 -15
- data/lib/daru/plotting/nyaplot/category.rb +1 -1
- data/lib/daru/plotting/nyaplot/dataframe.rb +15 -4
- data/lib/daru/plotting/nyaplot/vector.rb +1 -2
- data/lib/daru/vector.rb +308 -96
- data/lib/daru/version.rb +1 -1
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/gsl_wrapper_spec.rb +38 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
- data/spec/category_spec.rb +24 -20
- data/spec/core/group_by_spec.rb +238 -4
- data/spec/core/merge_spec.rb +1 -1
- data/spec/core/query_spec.rb +65 -50
- data/spec/daru_spec.rb +22 -0
- data/spec/dataframe_spec.rb +473 -16
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +34 -16
- data/spec/date_time/offsets_spec.rb +14 -0
- data/spec/extensions/rserve_spec.rb +1 -1
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +55 -55
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +29 -0
- data/spec/index/categorical_index_spec.rb +33 -33
- data/spec/index/index_spec.rb +160 -41
- data/spec/index/multi_index_spec.rb +143 -33
- data/spec/io/io_spec.rb +246 -2
- data/spec/io/sql_data_source_spec.rb +31 -41
- data/spec/iruby/dataframe_spec.rb +17 -19
- data/spec/iruby/vector_spec.rb +26 -28
- data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
- data/spec/maths/arithmetic/vector_spec.rb +18 -0
- data/spec/maths/statistics/vector_spec.rb +153 -15
- data/spec/plotting/gruff/category_spec.rb +3 -3
- data/spec/plotting/gruff/dataframe_spec.rb +14 -4
- data/spec/plotting/gruff/vector_spec.rb +9 -9
- data/spec/plotting/nyaplot/category_spec.rb +5 -9
- data/spec/plotting/nyaplot/dataframe_spec.rb +95 -47
- data/spec/plotting/nyaplot/vector_spec.rb +5 -11
- data/spec/shared/vector_display_spec.rb +12 -14
- data/spec/spec_helper.rb +30 -7
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +306 -72
- metadata +96 -55
- data/spec/fixtures/stock_data.csv +0 -500
data/lib/daru/version.rb
CHANGED
@@ -1,50 +1,53 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
if Daru.has_gsl?
|
2
|
+
describe Daru::Accessors::GSLWrapper do
|
3
|
+
before :each do
|
4
|
+
@stub_context = Object.new
|
5
|
+
@gsl_wrapper = Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6], @stub_context)
|
6
|
+
end
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
8
|
+
context ".new" do
|
9
|
+
it "actually creates a GSL Vector" do
|
10
|
+
expect(@gsl_wrapper.data.class).to eq(GSL::Vector)
|
11
|
+
end
|
10
12
|
end
|
11
|
-
end
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
context "#mean" do
|
15
|
+
it "computes mean" do
|
16
|
+
expect(@gsl_wrapper.mean).to eq(3.5)
|
17
|
+
end
|
16
18
|
end
|
17
|
-
end
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
20
|
+
context "#map!" do
|
21
|
+
it "destructively maps" do
|
22
|
+
expect(@gsl_wrapper.map! { |a| a += 1 }).to eq(
|
23
|
+
Daru::Accessors::GSLWrapper.new([2,3,4,5,6,7], @stub_context)
|
24
|
+
)
|
25
|
+
end
|
24
26
|
end
|
25
|
-
end
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
context "#delete_at" do
|
29
|
+
it "deletes at key" do
|
30
|
+
expect(@gsl_wrapper.delete_at(2)).to eq(3)
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
expect(@gsl_wrapper).to eq(
|
33
|
+
Daru::Accessors::GSLWrapper.new([1,2,4,5,6], @stub_context)
|
34
|
+
)
|
35
|
+
end
|
34
36
|
end
|
35
|
-
end
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
|
38
|
+
context "#index" do
|
39
|
+
it "returns index of value" do
|
40
|
+
expect(@gsl_wrapper.index(3)).to eq(2)
|
41
|
+
end
|
40
42
|
end
|
41
|
-
end
|
42
43
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
context "#push" do
|
45
|
+
it "appends element" do
|
46
|
+
expect(@gsl_wrapper.push(15)).to eq(
|
47
|
+
Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6,15], @stub_context)
|
48
|
+
)
|
49
|
+
end
|
48
50
|
end
|
49
51
|
end
|
50
52
|
end
|
53
|
+
|
@@ -1,32 +1,35 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
if Daru.has_nmatrix?
|
4
|
+
describe Daru::Accessors::NMatrixWrapper do
|
5
|
+
before :each do
|
6
|
+
stub_context = Object.new
|
7
|
+
@nm_wrapper = Daru::Accessors::NMatrixWrapper.new([1,2,3,4,5], stub_context, :float32)
|
8
|
+
end
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
it "checks for actual NMatrix creation" do
|
11
|
+
expect(@nm_wrapper.data.class).to eq(NMatrix)
|
12
|
+
end
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
it "checks the actual size of the NMatrix object" do
|
15
|
+
expect(@nm_wrapper.data.size).to eq(10)
|
16
|
+
end
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
it "checks that @size is the number of elements in the vector" do
|
19
|
+
expect(@nm_wrapper.size).to eq(5)
|
20
|
+
end
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
it "checks for underlying NMatrix data type" do
|
23
|
+
expect(@nm_wrapper.data.dtype).to eq(:float32)
|
24
|
+
end
|
24
25
|
|
25
|
-
|
26
|
-
|
26
|
+
it "resizes" do
|
27
|
+
@nm_wrapper.resize(100)
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
29
|
+
expect(@nm_wrapper.size).to eq(5)
|
30
|
+
expect(@nm_wrapper.data.size).to eq(100)
|
31
|
+
expect(@nm_wrapper.data).to eq(NMatrix.new [100], [1,2,3,4,5])
|
32
|
+
end
|
31
33
|
end
|
32
34
|
end
|
35
|
+
|
data/spec/category_spec.rb
CHANGED
@@ -383,13 +383,13 @@ describe Daru::Vector, "categorical" do
|
|
383
383
|
context "#rename_categories" do
|
384
384
|
context 'rename base category' do
|
385
385
|
let(:dv) { Daru::Vector.new [:a, 1, :a, 1, :c], type: :category,
|
386
|
-
categories: [:a, :x, :y, :c, :b, 1]}
|
386
|
+
categories: [:a, :x, :y, :c, :b, 1]}
|
387
387
|
subject { dv.rename_categories :a => 1, 1 => 2 }
|
388
388
|
|
389
389
|
it { is_expected.to be_a Daru::Vector }
|
390
390
|
its(:to_a) { is_expected.to eq [1, 2, 1, 2, :c] }
|
391
391
|
its(:categories) { is_expected.to eq [:x, :y, :c, :b, 1, 2] }
|
392
|
-
its(:base_category) { is_expected.to eq 1 }
|
392
|
+
its(:base_category) { is_expected.to eq 1 }
|
393
393
|
end
|
394
394
|
|
395
395
|
context 'rename non-base category' do
|
@@ -1398,7 +1398,7 @@ describe Daru::Vector, "categorical" do
|
|
1398
1398
|
index: 11..18, type: :category }
|
1399
1399
|
context 'reject only nils' do
|
1400
1400
|
subject { dv.reject_values nil }
|
1401
|
-
|
1401
|
+
|
1402
1402
|
it { is_expected.to be_a Daru::Vector }
|
1403
1403
|
its(:type) { is_expected.to eq :category }
|
1404
1404
|
its(:to_a) { is_expected.to eq [1, 3, :a, Float::NAN, Float::NAN, 1] }
|
@@ -1407,7 +1407,7 @@ describe Daru::Vector, "categorical" do
|
|
1407
1407
|
|
1408
1408
|
context 'reject only float::NAN' do
|
1409
1409
|
subject { dv.reject_values Float::NAN }
|
1410
|
-
|
1410
|
+
|
1411
1411
|
it { is_expected.to be_a Daru::Vector }
|
1412
1412
|
its(:type) { is_expected.to eq :category }
|
1413
1413
|
its(:to_a) { is_expected.to eq [1, nil, 3, :a, nil, 1] }
|
@@ -1422,7 +1422,7 @@ describe Daru::Vector, "categorical" do
|
|
1422
1422
|
its(:to_a) { is_expected.to eq [1, 3, :a, 1] }
|
1423
1423
|
its(:'index.to_a') { is_expected.to eq [11, 13, 14, 18] }
|
1424
1424
|
end
|
1425
|
-
|
1425
|
+
|
1426
1426
|
context 'reject any other value' do
|
1427
1427
|
subject { dv.reject_values 1, 3, 20 }
|
1428
1428
|
|
@@ -1434,19 +1434,19 @@ describe Daru::Vector, "categorical" do
|
|
1434
1434
|
|
1435
1435
|
context 'when resultant vector has only one value' do
|
1436
1436
|
subject { dv.reject_values 1, :a, nil, Float::NAN }
|
1437
|
-
|
1437
|
+
|
1438
1438
|
it { is_expected.to be_a Daru::Vector }
|
1439
1439
|
its(:to_a) { is_expected.to eq [3] }
|
1440
1440
|
its(:'index.to_a') { is_expected.to eq [13] }
|
1441
1441
|
end
|
1442
|
-
|
1442
|
+
|
1443
1443
|
context 'when resultant vector has no value' do
|
1444
1444
|
subject { dv.reject_values 1, 3, :a, nil, Float::NAN, 5 }
|
1445
|
-
|
1445
|
+
|
1446
1446
|
it { is_expected.to be_a Daru::Vector }
|
1447
1447
|
its(:to_a) { is_expected.to eq [] }
|
1448
1448
|
its(:'index.to_a') { is_expected.to eq [] }
|
1449
|
-
end
|
1449
|
+
end
|
1450
1450
|
end
|
1451
1451
|
|
1452
1452
|
context '#include_values?' do
|
@@ -1484,27 +1484,27 @@ describe Daru::Vector, "categorical" do
|
|
1484
1484
|
type: :category}
|
1485
1485
|
it { expect(dv.include_values? nil, Float::NAN).to eq true }
|
1486
1486
|
end
|
1487
|
-
|
1487
|
+
|
1488
1488
|
context 'true with only Float::NAN' do
|
1489
1489
|
let(:dv) { Daru::Vector.new [1, nil, 2, 3],
|
1490
1490
|
type: :category}
|
1491
1491
|
it { expect(dv.include_values? nil, Float::NAN).to eq true }
|
1492
1492
|
end
|
1493
|
-
|
1493
|
+
|
1494
1494
|
context 'false' do
|
1495
1495
|
let(:dv) { Daru::Vector.new [1, 2, 3],
|
1496
1496
|
type: :category}
|
1497
1497
|
it { expect(dv.include_values? nil, Float::NAN).to eq false }
|
1498
1498
|
end
|
1499
1499
|
end
|
1500
|
-
|
1500
|
+
|
1501
1501
|
context 'any other value' do
|
1502
1502
|
context 'true' do
|
1503
1503
|
let(:dv) { Daru::Vector.new [1, 2, 3, 4, nil],
|
1504
1504
|
type: :category }
|
1505
1505
|
it { expect(dv.include_values? 1, 2, 3, 5).to eq true }
|
1506
1506
|
end
|
1507
|
-
|
1507
|
+
|
1508
1508
|
context 'false' do
|
1509
1509
|
let(:dv) { Daru::Vector.new [1, 2, 3, 4, nil],
|
1510
1510
|
type: :category }
|
@@ -1525,12 +1525,12 @@ describe Daru::Vector, "categorical" do
|
|
1525
1525
|
context Daru::Index do
|
1526
1526
|
let(:dv) { Daru::Vector.new [1, 2, 1, 2, 3, nil, nil, Float::NAN],
|
1527
1527
|
index: 11..18, type: :category }
|
1528
|
-
|
1528
|
+
|
1529
1529
|
subject { dv.indexes 1, 2, nil, Float::NAN }
|
1530
1530
|
it { is_expected.to be_a Array }
|
1531
1531
|
it { is_expected.to eq [11, 12, 13, 14, 16, 17, 18] }
|
1532
1532
|
end
|
1533
|
-
|
1533
|
+
|
1534
1534
|
context Daru::MultiIndex do
|
1535
1535
|
let(:mi) do
|
1536
1536
|
Daru::MultiIndex.from_tuples([
|
@@ -1546,7 +1546,7 @@ describe Daru::Vector, "categorical" do
|
|
1546
1546
|
end
|
1547
1547
|
let(:dv) { Daru::Vector.new [1, 2, 1, 2, 3, nil, nil, Float::NAN],
|
1548
1548
|
index: mi, type: :category }
|
1549
|
-
|
1549
|
+
|
1550
1550
|
subject { dv.indexes 1, 2, Float::NAN }
|
1551
1551
|
it { is_expected.to be_a Array }
|
1552
1552
|
it { is_expected.to eq(
|
@@ -1559,7 +1559,7 @@ describe Daru::Vector, "categorical" do
|
|
1559
1559
|
]) }
|
1560
1560
|
end
|
1561
1561
|
end
|
1562
|
-
|
1562
|
+
|
1563
1563
|
context '#replace_values' do
|
1564
1564
|
subject do
|
1565
1565
|
Daru::Vector.new(
|
@@ -1573,14 +1573,14 @@ describe Daru::Vector, "categorical" do
|
|
1573
1573
|
its(:type) { is_expected.to eq :category }
|
1574
1574
|
its(:to_a) { is_expected.to eq [1, 2, 1, 4, 10, 10, 10, 10] }
|
1575
1575
|
end
|
1576
|
-
|
1576
|
+
|
1577
1577
|
context 'replace arbitrary values' do
|
1578
1578
|
before { subject.replace_values [1, 2], 10 }
|
1579
1579
|
its(:type) { is_expected.to eq :category }
|
1580
1580
|
its(:to_a) { is_expected.to eq(
|
1581
1581
|
[10, 10, 10, 4, nil, Float::NAN, nil, Float::NAN]) }
|
1582
1582
|
end
|
1583
|
-
|
1583
|
+
|
1584
1584
|
context 'works for single value' do
|
1585
1585
|
before { subject.replace_values nil, 10 }
|
1586
1586
|
its(:type) { is_expected.to eq :category }
|
@@ -1596,7 +1596,7 @@ describe Daru::DataFrame, "categorical" do
|
|
1596
1596
|
Daru::DataFrame.new({
|
1597
1597
|
a: [1, 2, 3, 4, 5],
|
1598
1598
|
b: ['first', 'second', 'first', 'second', 'third'],
|
1599
|
-
c: ['a', 'b', 'a', 'b',
|
1599
|
+
c: ['a', 'b', 'a', 'b', nil]
|
1600
1600
|
})
|
1601
1601
|
end
|
1602
1602
|
before { df.to_category :b, :c }
|
@@ -1605,6 +1605,10 @@ describe Daru::DataFrame, "categorical" do
|
|
1605
1605
|
it { is_expected.to be_a Daru::DataFrame }
|
1606
1606
|
its(:'b.type') { is_expected.to eq :category }
|
1607
1607
|
its(:'c.type') { is_expected.to eq :category }
|
1608
|
+
its(:'a.count') { is_expected.to eq 5 }
|
1609
|
+
its(:'c.count') { is_expected.to eq 5 }
|
1610
|
+
it { expect(df.c.count('a')).to eq 2 }
|
1611
|
+
it { expect(df.c.count(nil)).to eq 1 }
|
1608
1612
|
end
|
1609
1613
|
|
1610
1614
|
context "#interact_code" do
|
data/spec/core/group_by_spec.rb
CHANGED
@@ -29,6 +29,7 @@ describe Daru::Core::GroupBy do
|
|
29
29
|
['foo', 'three', 8],
|
30
30
|
['foo', 'two' , 3]
|
31
31
|
])
|
32
|
+
|
32
33
|
end
|
33
34
|
|
34
35
|
context 'with nil values' do
|
@@ -46,6 +47,34 @@ describe Daru::Core::GroupBy do
|
|
46
47
|
end
|
47
48
|
|
48
49
|
context "#initialize" do
|
50
|
+
let(:df_emp) { Daru::DataFrame.new(
|
51
|
+
employee: %w[John Jane Mark John Jane Mark],
|
52
|
+
month: %w[June June June July July July],
|
53
|
+
salary: [1000, 500, 700, 1200, 600, 600]
|
54
|
+
) }
|
55
|
+
let(:employee_grp) { df_emp.group_by(:employee).df }
|
56
|
+
let(:mi_single) { Daru::MultiIndex.from_tuples([
|
57
|
+
['Jane', 1], ['Jane', 4], ['John', 0],
|
58
|
+
['John', 3], ['Mark', 2], ['Mark', 5]
|
59
|
+
]
|
60
|
+
)}
|
61
|
+
|
62
|
+
let(:emp_month_grp) { df_emp.group_by([:employee, :month]).df }
|
63
|
+
let(:mi_double) { Daru::MultiIndex.from_tuples([
|
64
|
+
['Jane', 'July', 4], ['Jane', 'June', 1], ['John', 'July', 3],
|
65
|
+
['John', 'June', 0], ['Mark', 'July', 5], ['Mark', 'June', 2]
|
66
|
+
]
|
67
|
+
)}
|
68
|
+
|
69
|
+
let(:emp_month_salary_grp) {
|
70
|
+
df_emp.group_by([:employee, :month, :salary]).df }
|
71
|
+
let(:mi_triple) { Daru::MultiIndex.from_tuples([
|
72
|
+
['Jane', 'July', 600, 4], ['Jane', 'June', 500, 1],
|
73
|
+
['John', 'July', 1200, 3], ['John', 'June', 1000, 0],
|
74
|
+
['Mark', 'July', 600, 5], ['Mark', 'June', 700, 2]
|
75
|
+
]
|
76
|
+
)}
|
77
|
+
|
49
78
|
it "groups by a single tuple" do
|
50
79
|
expect(@sl_group.groups).to eq({
|
51
80
|
['bar'] => [1,3,5],
|
@@ -53,6 +82,24 @@ describe Daru::Core::GroupBy do
|
|
53
82
|
})
|
54
83
|
end
|
55
84
|
|
85
|
+
it "returns dataframe with MultiIndex, groups by single layer hierarchy" do
|
86
|
+
expect(employee_grp).to eq(Daru::DataFrame.new({
|
87
|
+
month: ["June", "July", "June", "July", "June", "July"],
|
88
|
+
salary: [500, 600, 1000, 1200, 700, 600]
|
89
|
+
}, index: mi_single))
|
90
|
+
end
|
91
|
+
|
92
|
+
it "returns dataframe with MultiIndex, groups by double layer hierarchy" do
|
93
|
+
expect(emp_month_grp).to eq(Daru::DataFrame.new({
|
94
|
+
salary: [600, 500, 1200, 1000, 600, 700]
|
95
|
+
}, index: mi_double))
|
96
|
+
end
|
97
|
+
|
98
|
+
it "returns dataframe with MultiIndex, groups by triple layer hierarchy" do
|
99
|
+
expect(emp_month_salary_grp).to eq(Daru::DataFrame.new({
|
100
|
+
}, index: mi_triple))
|
101
|
+
end
|
102
|
+
|
56
103
|
it "groups by a double layer hierarchy" do
|
57
104
|
expect(@dl_group.groups).to eq({
|
58
105
|
['foo', 'one'] => [0,6],
|
@@ -154,6 +201,22 @@ describe Daru::Core::GroupBy do
|
|
154
201
|
end
|
155
202
|
end
|
156
203
|
|
204
|
+
context '#each_group without block' do
|
205
|
+
it 'enumerates groups' do
|
206
|
+
enum = @dl_group.each_group
|
207
|
+
|
208
|
+
expect(enum.count).to eq 6
|
209
|
+
expect(enum).to all be_a(Daru::DataFrame)
|
210
|
+
expect(enum.to_a.last).to eq(Daru::DataFrame.new({
|
211
|
+
a: ['foo', 'foo'],
|
212
|
+
b: ['two', 'two'],
|
213
|
+
c: [3, 3],
|
214
|
+
d: [33, 55]
|
215
|
+
}, index: [2, 4]
|
216
|
+
))
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
157
220
|
context '#first' do
|
158
221
|
it 'gets the first row from each group' do
|
159
222
|
expect(@dl_group.first).to eq(Daru::DataFrame.new({
|
@@ -176,10 +239,6 @@ describe Daru::Core::GroupBy do
|
|
176
239
|
end
|
177
240
|
end
|
178
241
|
|
179
|
-
context "#aggregate" do
|
180
|
-
pending
|
181
|
-
end
|
182
|
-
|
183
242
|
context "#mean" do
|
184
243
|
it "computes mean of the numeric columns of a single layer group" do
|
185
244
|
expect(@sl_group.mean).to eq(Daru::DataFrame.new({
|
@@ -418,4 +477,179 @@ describe Daru::Core::GroupBy do
|
|
418
477
|
|
419
478
|
it { is_expected.to eq Daru::DataFrame.new({num: [6]}, index: ['a']) }
|
420
479
|
end
|
480
|
+
|
481
|
+
context 'when dataframe tuples contain nils in mismatching positions' do
|
482
|
+
|
483
|
+
let(:df){
|
484
|
+
Daru::DataFrame.new(
|
485
|
+
{
|
486
|
+
'string1' => ["Color", "Color", "Color", "Color", nil, "Color", "Color", " Black and White"],
|
487
|
+
'string2' => ["Test", "test2", nil, "test3", nil, "test", "test3", "test5"],
|
488
|
+
'num' => [1, nil, 3, 4, 5, 6, 7, nil]
|
489
|
+
}
|
490
|
+
)
|
491
|
+
}
|
492
|
+
|
493
|
+
it 'groups by without errors' do
|
494
|
+
expect { df.group_by(df.vectors.map(&:to_s)) }.to_not raise_error(ArgumentError)
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
context '#aggregate' do
|
499
|
+
let(:dataframe) { Daru::DataFrame.new({
|
500
|
+
employee: %w[John Jane Mark John Jane Mark],
|
501
|
+
month: %w[June June June July July July],
|
502
|
+
salary: [1000, 500, 700, 1200, 600, 600]})
|
503
|
+
}
|
504
|
+
context 'group and aggregate sum for particular single vector' do
|
505
|
+
subject { dataframe.group_by([:employee]).aggregate(salary: :sum) }
|
506
|
+
|
507
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
508
|
+
salary: [1100, 2200, 1300]},
|
509
|
+
index: ['Jane', 'John', 'Mark'])
|
510
|
+
}
|
511
|
+
end
|
512
|
+
|
513
|
+
context 'group and aggregate sum and lambda function for vectors' do
|
514
|
+
subject { dataframe.group_by([:employee]).aggregate(
|
515
|
+
salary: :sum,
|
516
|
+
month: ->(vec) { vec.to_a.join('/') }) }
|
517
|
+
|
518
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
519
|
+
salary: [1100, 2200, 1300],
|
520
|
+
month: ['June/July', 'June/July', 'June/July']},
|
521
|
+
index: ['Jane', 'John', 'Mark'],
|
522
|
+
order: [:salary, :month])
|
523
|
+
}
|
524
|
+
end
|
525
|
+
|
526
|
+
context 'group and aggregate sum and lambda functions on dataframe' do
|
527
|
+
subject { dataframe.group_by([:employee]).aggregate(
|
528
|
+
salary: :sum,
|
529
|
+
month: ->(vec) { vec.to_a.join('/') },
|
530
|
+
mean_salary: ->(df) { df.salary.mean },
|
531
|
+
periods: ->(df) { df.size }
|
532
|
+
)}
|
533
|
+
|
534
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
535
|
+
salary: [1100, 2200, 1300],
|
536
|
+
month: ['June/July', 'June/July', 'June/July'],
|
537
|
+
mean_salary: [550.0, 1100.0, 650.0],
|
538
|
+
periods: [2, 2, 2]},
|
539
|
+
index: ['Jane', 'John', 'Mark'], order: [:salary, :month,
|
540
|
+
:mean_salary, :periods]) }
|
541
|
+
end
|
542
|
+
|
543
|
+
context 'group_by and aggregate on mixed MultiIndex' do
|
544
|
+
let(:df) { Daru::DataFrame.new(
|
545
|
+
name: ['Ram','Krishna','Ram','Krishna','Krishna'],
|
546
|
+
visited: [
|
547
|
+
'Hyderabad', 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
|
548
|
+
)
|
549
|
+
}
|
550
|
+
let(:df_mixed) { Daru::DataFrame.new(
|
551
|
+
name: ['Krishna','Ram','Krishna','Krishna'],
|
552
|
+
visited: [
|
553
|
+
'Delhi', 'Mumbai', 'Raipur', 'Banglore']
|
554
|
+
)
|
555
|
+
}
|
556
|
+
it 'group_by' do
|
557
|
+
expect(df.group_by(:name).df).to eq(
|
558
|
+
Daru::DataFrame.new({
|
559
|
+
visited: ['Delhi', 'Raipur', 'Banglore', 'Hyderabad', 'Mumbai']},
|
560
|
+
index: Daru::MultiIndex.from_tuples(
|
561
|
+
[['Krishna', 1], ['Krishna', 3], ['Krishna', 4],
|
562
|
+
['Ram', 0], ['Ram', 2]]
|
563
|
+
)
|
564
|
+
)
|
565
|
+
)
|
566
|
+
end
|
567
|
+
|
568
|
+
it 'group_by and aggregate' do
|
569
|
+
expect(
|
570
|
+
df.group_by(:name).aggregate(
|
571
|
+
visited: -> (vec){vec.to_a.join(',')})).to eq(
|
572
|
+
Daru::DataFrame.new({
|
573
|
+
visited: ['Delhi,Raipur,Banglore', 'Hyderabad,Mumbai']},
|
574
|
+
index: ['Krishna', 'Ram']
|
575
|
+
)
|
576
|
+
)
|
577
|
+
end
|
578
|
+
|
579
|
+
it 'group_by and aggregate when anyone index is not multiple times' do
|
580
|
+
expect(
|
581
|
+
df_mixed.group_by(:name).aggregate(
|
582
|
+
visited: -> (vec){vec.to_a.join(',')})).to eq(
|
583
|
+
Daru::DataFrame.new({
|
584
|
+
visited: ['Delhi,Raipur,Banglore', 'Mumbai']},
|
585
|
+
index: ['Krishna', 'Ram']
|
586
|
+
)
|
587
|
+
)
|
588
|
+
end
|
589
|
+
end
|
590
|
+
|
591
|
+
let(:spending_df) {
|
592
|
+
Daru::DataFrame.rows([
|
593
|
+
[2010, 'dev', 50, 1],
|
594
|
+
[2010, 'dev', 150, 1],
|
595
|
+
[2010, 'dev', 200, 1],
|
596
|
+
[2011, 'dev', 50, 1],
|
597
|
+
[2012, 'dev', 150, 1],
|
598
|
+
|
599
|
+
[2011, 'office', 300, 1],
|
600
|
+
|
601
|
+
[2010, 'market', 50, 1],
|
602
|
+
[2011, 'market', 500, 1],
|
603
|
+
[2012, 'market', 500, 1],
|
604
|
+
[2012, 'market', 300, 1],
|
605
|
+
|
606
|
+
[2012, 'R&D', 10, 1],],
|
607
|
+
order: [:year, :category, :spending, :nb_spending])
|
608
|
+
}
|
609
|
+
let(:multi_index_year_category) {
|
610
|
+
Daru::MultiIndex.from_tuples([
|
611
|
+
[2010, "dev"], [2010, "market"],
|
612
|
+
[2011, "dev"], [2011, "market"], [2011, "office"],
|
613
|
+
[2012, "R&D"], [2012, "dev"], [2012, "market"]])
|
614
|
+
}
|
615
|
+
|
616
|
+
context 'group_by and aggregate on multiple elements' do
|
617
|
+
it 'does aggregate' do
|
618
|
+
expect(spending_df.group_by([:year, :category]).aggregate(spending: :sum)).to eq(
|
619
|
+
Daru::DataFrame.new({spending: [400, 50, 50, 500, 300, 10, 150, 800]}, index: multi_index_year_category))
|
620
|
+
end
|
621
|
+
|
622
|
+
it 'works as older methods' do
|
623
|
+
older_way = spending_df.group_by([:year, :category]).sum
|
624
|
+
|
625
|
+
newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
|
626
|
+
expect(newer_way).to eq(older_way)
|
627
|
+
|
628
|
+
contrived_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending_lambda: ->(df) { df[:nb_spending].sum })
|
629
|
+
contrived_way.rename_vectors(nb_spending_lambda: :nb_spending)
|
630
|
+
expect(contrived_way).to eq(older_way)
|
631
|
+
end
|
632
|
+
|
633
|
+
context 'can aggregate on MultiIndex' do
|
634
|
+
let(:multi_indexed_aggregated_df) { spending_df.group_by([:year, :category]).aggregate(spending: :sum) }
|
635
|
+
let(:index_year) { Daru::Index.new([2010, 2011, 2012]) }
|
636
|
+
let(:index_category) { Daru::Index.new(["dev", "market", "office", "R&D"]) }
|
637
|
+
|
638
|
+
it 'aggregates by default on the last layer of MultiIndex' do
|
639
|
+
expect(multi_indexed_aggregated_df.aggregate(spending: :sum)).to eq(
|
640
|
+
Daru::DataFrame.new({spending: [450, 850, 960]}, index: index_year))
|
641
|
+
end
|
642
|
+
|
643
|
+
it 'can aggregate on the first layer of MultiIndex' do
|
644
|
+
expect(multi_indexed_aggregated_df.aggregate({spending: :sum},0)).to eq(
|
645
|
+
Daru::DataFrame.new({spending: [600, 1350, 300, 10]}, index: index_category))
|
646
|
+
end
|
647
|
+
|
648
|
+
it 'does coercion: when one layer is remaining, MultiIndex is coerced in Index that does not aggregate anymore' do
|
649
|
+
df_with_simple_index = multi_indexed_aggregated_df.aggregate(spending: :sum)
|
650
|
+
expect(df_with_simple_index.aggregate(spending: :sum)).to eq(df_with_simple_index)
|
651
|
+
end
|
652
|
+
end
|
653
|
+
end
|
654
|
+
end
|
421
655
|
end
|