daru 0.1.4.1 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +3 -0
- data/CONTRIBUTING.md +27 -3
- data/Guardfile +7 -0
- data/History.md +39 -1
- data/README.md +1 -1
- data/daru.gemspec +9 -2
- data/lib/daru.rb +4 -1
- data/lib/daru/accessors/gsl_wrapper.rb +93 -91
- data/lib/daru/accessors/nmatrix_wrapper.rb +109 -107
- data/lib/daru/category.rb +22 -15
- data/lib/daru/core/group_by.rb +13 -2
- data/lib/daru/core/merge.rb +37 -31
- data/lib/daru/core/query.rb +10 -2
- data/lib/daru/dataframe.rb +95 -34
- data/lib/daru/date_time/index.rb +15 -16
- data/lib/daru/date_time/offsets.rb +14 -11
- data/lib/daru/formatters/table.rb +2 -2
- data/lib/daru/index/categorical_index.rb +201 -0
- data/lib/daru/index/index.rb +289 -0
- data/lib/daru/index/multi_index.rb +266 -0
- data/lib/daru/maths/statistics/vector.rb +13 -9
- data/lib/daru/monkeys.rb +0 -7
- data/lib/daru/plotting/gruff/category.rb +1 -0
- data/lib/daru/plotting/gruff/dataframe.rb +3 -3
- data/lib/daru/plotting/nyaplot/dataframe.rb +1 -1
- data/lib/daru/vector.rb +36 -21
- data/lib/daru/version.rb +1 -1
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/accessors/{wrappers_spec.rb → gsl_wrapper_spec.rb} +0 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +32 -0
- data/spec/{categorical_spec.rb → category_spec.rb} +3 -0
- data/spec/core/group_by_spec.rb +17 -1
- data/spec/core/merge_spec.rb +38 -1
- data/spec/core/query_spec.rb +5 -0
- data/spec/dataframe_spec.rb +230 -57
- data/spec/date_time/offsets_spec.rb +84 -3
- data/spec/formatters/table_formatter_spec.rb +9 -0
- data/spec/index/categorical_index_spec.rb +2 -0
- data/spec/index/index_spec.rb +17 -2
- data/spec/{math → maths}/arithmetic/dataframe_spec.rb +0 -0
- data/spec/{math → maths}/arithmetic/vector_spec.rb +0 -0
- data/spec/{math → maths}/statistics/dataframe_spec.rb +1 -1
- data/spec/{math → maths}/statistics/vector_spec.rb +7 -12
- data/spec/plotting/gruff/category_spec.rb +44 -0
- data/spec/plotting/gruff/dataframe_spec.rb +84 -0
- data/spec/plotting/gruff/vector_spec.rb +70 -0
- data/spec/plotting/nyaplot/category_spec.rb +51 -0
- data/spec/plotting/{dataframe_spec.rb → nyaplot/dataframe_spec.rb} +0 -83
- data/spec/plotting/nyaplot/vector_spec.rb +66 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/vector_spec.rb +68 -1
- metadata +53 -24
- data/lib/daru/index.rb +0 -761
- data/spec/plotting/vector_spec.rb +0 -230
data/lib/daru/version.rb
CHANGED
@@ -1,38 +1,3 @@
|
|
1
|
-
describe Daru::Accessors::NMatrixWrapper do
|
2
|
-
before :each do
|
3
|
-
stub_context = Object.new
|
4
|
-
@nm_wrapper = Daru::Accessors::NMatrixWrapper.new([1,2,3,4,5], stub_context, :float32)
|
5
|
-
end
|
6
|
-
|
7
|
-
it "checks for actual NMatrix creation" do
|
8
|
-
expect(@nm_wrapper.data.class).to eq(NMatrix)
|
9
|
-
end
|
10
|
-
|
11
|
-
it "checks the actual size of the NMatrix object" do
|
12
|
-
expect(@nm_wrapper.data.size).to eq(10)
|
13
|
-
end
|
14
|
-
|
15
|
-
it "checks that @size is the number of elements in the vector" do
|
16
|
-
expect(@nm_wrapper.size).to eq(5)
|
17
|
-
end
|
18
|
-
|
19
|
-
it "checks for underlying NMatrix data type" do
|
20
|
-
expect(@nm_wrapper.data.dtype).to eq(:float32)
|
21
|
-
end
|
22
|
-
|
23
|
-
it "resizes" do
|
24
|
-
@nm_wrapper.resize(100)
|
25
|
-
|
26
|
-
expect(@nm_wrapper.size).to eq(5)
|
27
|
-
expect(@nm_wrapper.data.size).to eq(100)
|
28
|
-
expect(@nm_wrapper.data).to eq(NMatrix.new [100], [1,2,3,4,5])
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
describe Daru::Accessors::ArrayWrapper do
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
1
|
describe Daru::Accessors::GSLWrapper do
|
37
2
|
before :each do
|
38
3
|
@stub_context = Object.new
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper.rb'
|
2
|
+
|
3
|
+
describe Daru::Accessors::NMatrixWrapper do
|
4
|
+
before :each do
|
5
|
+
stub_context = Object.new
|
6
|
+
@nm_wrapper = Daru::Accessors::NMatrixWrapper.new([1,2,3,4,5], stub_context, :float32)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "checks for actual NMatrix creation" do
|
10
|
+
expect(@nm_wrapper.data.class).to eq(NMatrix)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "checks the actual size of the NMatrix object" do
|
14
|
+
expect(@nm_wrapper.data.size).to eq(10)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "checks that @size is the number of elements in the vector" do
|
18
|
+
expect(@nm_wrapper.size).to eq(5)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "checks for underlying NMatrix data type" do
|
22
|
+
expect(@nm_wrapper.data.dtype).to eq(:float32)
|
23
|
+
end
|
24
|
+
|
25
|
+
it "resizes" do
|
26
|
+
@nm_wrapper.resize(100)
|
27
|
+
|
28
|
+
expect(@nm_wrapper.size).to eq(5)
|
29
|
+
expect(@nm_wrapper.data.size).to eq(100)
|
30
|
+
expect(@nm_wrapper.data).to eq(NMatrix.new [100], [1,2,3,4,5])
|
31
|
+
end
|
32
|
+
end
|
@@ -305,6 +305,9 @@ describe Daru::Vector, "categorical" do
|
|
305
305
|
its(:'index.to_a') { is_expected.to eq [:a, :b, :c, :d, 1] }
|
306
306
|
its(:to_a) { is_expected.to eq [0.4, 0, 0.2, 0, 0.4] }
|
307
307
|
end
|
308
|
+
context "invalid argument" do
|
309
|
+
it { expect { dv.frequencies :hash }.to raise_error ArgumentError }
|
310
|
+
end
|
308
311
|
end
|
309
312
|
|
310
313
|
context "#to_category" do
|
data/spec/core/group_by_spec.rb
CHANGED
@@ -5,7 +5,7 @@ describe Daru::Core::GroupBy do
|
|
5
5
|
b: %w{one one two three two two one three},
|
6
6
|
c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
7
7
|
d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
8
|
-
})
|
8
|
+
}, order: [:a, :b, :c, :d])
|
9
9
|
|
10
10
|
@sl_group = @df.group_by(:a)
|
11
11
|
@dl_group = @df.group_by([:a, :b])
|
@@ -402,4 +402,20 @@ describe Daru::Core::GroupBy do
|
|
402
402
|
Daru::Vector.new(['one', 'three', 'two', 'oneone', 'three', 'twotwo'], index: @dl_multi_index)
|
403
403
|
end
|
404
404
|
end
|
405
|
+
|
406
|
+
context 'groups by first vector if no vector mentioned' do
|
407
|
+
subject { @df.group_by }
|
408
|
+
|
409
|
+
it { is_expected.to be_a Daru::Core::GroupBy }
|
410
|
+
its(:groups) { is_expected.to eq @sl_group.groups }
|
411
|
+
its(:size) { is_expected.to eq @sl_group.size }
|
412
|
+
end
|
413
|
+
|
414
|
+
context 'group and sum with numeric indices' do
|
415
|
+
let(:df) { Daru::DataFrame.new({ g: ['a','a','a'], num: [1,2,3]}, index: [2,12,23]) }
|
416
|
+
|
417
|
+
subject { df.group_by([:g]).sum }
|
418
|
+
|
419
|
+
it { is_expected.to eq Daru::DataFrame.new({num: [6]}, index: ['a']) }
|
420
|
+
end
|
405
421
|
end
|
data/spec/core/merge_spec.rb
CHANGED
@@ -37,6 +37,18 @@ describe Daru::DataFrame do
|
|
37
37
|
expect(@left.join(@right_many, how: :inner, on: [:id])).to eq(answer)
|
38
38
|
end
|
39
39
|
|
40
|
+
it "performs an inner join of two dataframes that has many to one mapping" do
|
41
|
+
left_many = @right_many
|
42
|
+
right = @left
|
43
|
+
|
44
|
+
answer = Daru::DataFrame.new({
|
45
|
+
:name_2 => ['Pirate', 'Pirate', 'Pirate', 'Pirate'],
|
46
|
+
:id => [1,1,1,1],
|
47
|
+
:name_1 => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
48
|
+
}, order: [:name_1, :id, :name_2])
|
49
|
+
expect(left_many.join(right, how: :inner, on: [:id])).to eq(answer)
|
50
|
+
end
|
51
|
+
|
40
52
|
it "performs an inner join of two dataframes that has many to many mapping" do
|
41
53
|
@left[:id].recode! { |v| v == 2 ? 1 : v }
|
42
54
|
answer = Daru::DataFrame.new({
|
@@ -56,6 +68,18 @@ describe Daru::DataFrame do
|
|
56
68
|
expect(@left.join(@right, how: :outer, on: [:name])).to eq(answer)
|
57
69
|
end
|
58
70
|
|
71
|
+
it "adds a left/right indicator" do
|
72
|
+
answer = Daru::DataFrame.new({
|
73
|
+
:id_1 => [nil,2,3,1,nil,4],
|
74
|
+
:name => ["Darth Vader", "Monkey", "Ninja", "Pirate", "Rutabaga", "Spaghetti"],
|
75
|
+
:id_2 => [3,nil,4,2,1,nil]
|
76
|
+
}, order: [:id_1, :name, :id_2])
|
77
|
+
|
78
|
+
outer = @left.join(@right, how: :outer, on: [:name], indicator: :my_indicator)
|
79
|
+
expect(outer[:my_indicator].to_a).to eq [:right_only, :left_only, :both, :both, :right_only, :left_only]
|
80
|
+
end
|
81
|
+
|
82
|
+
|
59
83
|
it "performs a full outer join when the right join keys have nils" do
|
60
84
|
@right[:name].recode! { |v| v == 'Rutabaga' ? nil : v }
|
61
85
|
answer = Daru::DataFrame.new({
|
@@ -117,8 +141,21 @@ describe Daru::DataFrame do
|
|
117
141
|
expect(@left.join(@right, how: :right, on: [:name])).to eq(answer)
|
118
142
|
end
|
119
143
|
|
144
|
+
it "doesn't convert false into nil when joining boolean values" do
|
145
|
+
left = Daru::DataFrame.new({ key: [1,2,3], left_value: [true, false, true] })
|
146
|
+
right = Daru::DataFrame.new({ key: [1,2,3], right_value: [true, false, true] })
|
147
|
+
|
148
|
+
answer = Daru::DataFrame.new({
|
149
|
+
left_value: [true, false, true],
|
150
|
+
key: [1,2,3],
|
151
|
+
right_value: [true, false, true]
|
152
|
+
}, order: [:left_value, :key, :right_value] )
|
153
|
+
|
154
|
+
expect(left.join(right, on: [:key], how: :inner)).to eq answer
|
155
|
+
end
|
156
|
+
|
120
157
|
it "raises if :on field are absent in one of dataframes" do
|
121
|
-
@right.vectors = [:id, :other_name]
|
158
|
+
@right.vectors = Daru::Index.new [:id, :other_name]
|
122
159
|
expect { @left.join(@right, how: :right, on: [:name]) }.to \
|
123
160
|
raise_error(ArgumentError, /Both dataframes expected .* :name/)
|
124
161
|
|
data/spec/core/query_spec.rb
CHANGED
@@ -266,6 +266,11 @@ describe "Arel-like syntax" do
|
|
266
266
|
@df.where (@df[:names].eq('james') | @df[:sym].eq(:four))
|
267
267
|
).to eq(answer)
|
268
268
|
end
|
269
|
+
|
270
|
+
it "does not give SystemStackError" do
|
271
|
+
v = Daru::Vector.new [1]*300_000
|
272
|
+
expect { v.where v.eq(1) }.not_to raise_error
|
273
|
+
end
|
269
274
|
end
|
270
275
|
|
271
276
|
context Daru::Vector do
|
data/spec/dataframe_spec.rb
CHANGED
@@ -55,6 +55,13 @@ describe Daru::DataFrame do
|
|
55
55
|
expect(df[:a]) .to eq(Daru::Vector.new [1,1,1,1])
|
56
56
|
end
|
57
57
|
|
58
|
+
it "creates empty dataframe" do
|
59
|
+
df = Daru::DataFrame.rows [], order: [:a, :b, :c]
|
60
|
+
|
61
|
+
expect(df.vectors).to eq(Daru::Index.new [:a,:b,:c])
|
62
|
+
expect(df.index).to be_empty
|
63
|
+
end
|
64
|
+
|
58
65
|
it "creates a DataFrame from Vector rows" do
|
59
66
|
rows = @rows.map { |r| Daru::Vector.new r, index: [:a,:b,:c,:d,:e] }
|
60
67
|
|
@@ -170,14 +177,14 @@ describe Daru::DataFrame do
|
|
170
177
|
end
|
171
178
|
|
172
179
|
it "initializes from an Array of Hashes" do
|
173
|
-
df = Daru::DataFrame.new([{a: 1, b: 11}, {a:
|
180
|
+
df = Daru::DataFrame.new([{a: 1, b: 11}, {a: false, b: 12}, {a: 3, b: 13},
|
174
181
|
{a: 4, b: 14}, {a: 5, b: 15}], order: [:b, :a],
|
175
182
|
index: [:one, :two, :three, :four, :five])
|
176
183
|
|
177
184
|
expect(df.index) .to eq(Daru::Index.new [:one, :two, :three, :four, :five])
|
178
185
|
expect(df.vectors).to eq(Daru::Index.new [:b, :a])
|
179
186
|
expect(df.a.class).to eq(Daru::Vector)
|
180
|
-
expect(df.a) .to eq([1,
|
187
|
+
expect(df.a) .to eq([1,false,3,4,5].dv(:a,[:one, :two, :three, :four, :five]))
|
181
188
|
end
|
182
189
|
|
183
190
|
it "initializes from Array of Arrays" do
|
@@ -612,48 +619,63 @@ describe Daru::DataFrame do
|
|
612
619
|
end
|
613
620
|
|
614
621
|
context '#method_missing' do
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
}
|
622
|
+
let(:df) { Daru::DataFrame.new({
|
623
|
+
:a => [1, 2, 3, 4, 5],
|
624
|
+
'b' => [5, 4, 3, 2, 1]
|
625
|
+
}, index: 11..15)}
|
620
626
|
|
621
|
-
context '
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
627
|
+
context 'get vector' do
|
628
|
+
context 'by string' do
|
629
|
+
subject { df.b }
|
630
|
+
|
631
|
+
it { is_expected.to be_a Daru::Vector }
|
632
|
+
its(:to_a) { is_expected.to eq [5, 4, 3, 2, 1] }
|
633
|
+
its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
|
634
|
+
end
|
635
|
+
|
636
|
+
context 'by symbol' do
|
637
|
+
subject { df.a }
|
638
|
+
|
639
|
+
it { is_expected.to be_a Daru::Vector }
|
640
|
+
its(:to_a) { is_expected.to eq [1, 2, 3, 4, 5] }
|
641
|
+
its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
|
642
|
+
end
|
626
643
|
end
|
627
644
|
|
628
|
-
context '
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
645
|
+
context 'set existing vector' do
|
646
|
+
context 'by string' do
|
647
|
+
before { df.b = [:a, :b, :c, :d, :e] }
|
648
|
+
subject { df }
|
649
|
+
|
650
|
+
it { is_expected.to be_a Daru::DataFrame }
|
651
|
+
its(:'vectors.to_a') { is_expected.to eq [:a, 'b'] }
|
652
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, :c, :d, :e] }
|
653
|
+
its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
|
654
|
+
end
|
655
|
+
|
656
|
+
context 'by symbol' do
|
657
|
+
before { df.a = [:a, :b, :c, :d, :e] }
|
658
|
+
subject { df }
|
659
|
+
|
660
|
+
it { is_expected.to be_a Daru::DataFrame }
|
661
|
+
its(:'vectors.to_a') { is_expected.to eq [:a, 'b'] }
|
662
|
+
its(:'a.to_a') { is_expected.to eq [:a, :b, :c, :d, :e] }
|
663
|
+
its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
|
664
|
+
end
|
638
665
|
end
|
639
666
|
|
640
|
-
context '
|
641
|
-
before{
|
642
|
-
|
643
|
-
|
644
|
-
it { is_expected.to
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
c: [11,22,33,44,55]}, order: [:a, :b, :c, :d],
|
649
|
-
index: [:one, :two, :three, :four, :five]))
|
650
|
-
}
|
667
|
+
context 'set new vector' do
|
668
|
+
before { df.c = [5, 5, 5, 5, 5] }
|
669
|
+
subject { df }
|
670
|
+
|
671
|
+
it { is_expected.to be_a Daru::DataFrame }
|
672
|
+
its(:'vectors.to_a') { is_expected.to eq [:a, 'b', :c] }
|
673
|
+
its(:'c.to_a') { is_expected.to eq [5, 5, 5, 5, 5] }
|
674
|
+
its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
|
651
675
|
end
|
652
676
|
|
653
|
-
context '
|
654
|
-
it
|
655
|
-
expect { data_frame.e }.to raise_error(NoMethodError)
|
656
|
-
end
|
677
|
+
context 'reference invalid vector' do
|
678
|
+
it { expect { df.d }.to raise_error NoMethodError }
|
657
679
|
end
|
658
680
|
end
|
659
681
|
|
@@ -1370,6 +1392,15 @@ describe Daru::DataFrame do
|
|
1370
1392
|
c: [11,22,33,44,55]}, order: [:a, :b, :c])
|
1371
1393
|
|
1372
1394
|
expect(df.row[0]).to eq([1,11,11].dv(nil, [:a, :b, :c]))
|
1395
|
+
expect(df.row[3]).to eq([4,14,44].dv(nil, [:a, :b, :c]))
|
1396
|
+
end
|
1397
|
+
|
1398
|
+
it "returns a row with given Integer index for numerical index DataFrame" do
|
1399
|
+
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
1400
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c], index: [1,2,3,4,5])
|
1401
|
+
|
1402
|
+
expect(df.row[0]).to eq([1,11,11].dv(nil, [:a, :b, :c]))
|
1403
|
+
expect(df.row[3]).to eq([3,13,33].dv(nil, [:a, :b, :c]))
|
1373
1404
|
end
|
1374
1405
|
end
|
1375
1406
|
|
@@ -1616,7 +1647,7 @@ describe Daru::DataFrame do
|
|
1616
1647
|
}, index: 11..18)
|
1617
1648
|
end
|
1618
1649
|
before { df.to_category :b }
|
1619
|
-
|
1650
|
+
|
1620
1651
|
context 'remove nils only' do
|
1621
1652
|
subject { df.reject_values nil }
|
1622
1653
|
it { is_expected.to be_a Daru::DataFrame }
|
@@ -1626,7 +1657,7 @@ describe Daru::DataFrame do
|
|
1626
1657
|
its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
|
1627
1658
|
its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
|
1628
1659
|
end
|
1629
|
-
|
1660
|
+
|
1630
1661
|
context 'remove Float::NAN only' do
|
1631
1662
|
subject { df.reject_values Float::NAN }
|
1632
1663
|
it { is_expected.to be_a Daru::DataFrame }
|
@@ -1636,7 +1667,7 @@ describe Daru::DataFrame do
|
|
1636
1667
|
its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
|
1637
1668
|
its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
|
1638
1669
|
end
|
1639
|
-
|
1670
|
+
|
1640
1671
|
context 'remove both nil and Float::NAN' do
|
1641
1672
|
subject { df.reject_values nil, Float::NAN }
|
1642
1673
|
it { is_expected.to be_a Daru::DataFrame }
|
@@ -1646,7 +1677,7 @@ describe Daru::DataFrame do
|
|
1646
1677
|
its(:'c.to_a') { is_expected.to eq ['a', 7] }
|
1647
1678
|
its(:'index.to_a') { is_expected.to eq [11, 18] }
|
1648
1679
|
end
|
1649
|
-
|
1680
|
+
|
1650
1681
|
context 'any other values' do
|
1651
1682
|
subject { df.reject_values 1, 5 }
|
1652
1683
|
it { is_expected.to be_a Daru::DataFrame }
|
@@ -1664,9 +1695,9 @@ describe Daru::DataFrame do
|
|
1664
1695
|
its(:'a.to_a') { is_expected.to eq [7] }
|
1665
1696
|
its(:'b.to_a') { is_expected.to eq [8] }
|
1666
1697
|
its(:'c.to_a') { is_expected.to eq [7] }
|
1667
|
-
its(:'index.to_a') { is_expected.to eq [18] }
|
1698
|
+
its(:'index.to_a') { is_expected.to eq [18] }
|
1668
1699
|
end
|
1669
|
-
|
1700
|
+
|
1670
1701
|
context 'when resultant dataframe is empty' do
|
1671
1702
|
subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
|
1672
1703
|
it { is_expected.to be_a Daru::DataFrame }
|
@@ -1674,10 +1705,10 @@ describe Daru::DataFrame do
|
|
1674
1705
|
its(:'a.to_a') { is_expected.to eq [] }
|
1675
1706
|
its(:'b.to_a') { is_expected.to eq [] }
|
1676
1707
|
its(:'c.to_a') { is_expected.to eq [] }
|
1677
|
-
its(:'index.to_a') { is_expected.to eq [] }
|
1708
|
+
its(:'index.to_a') { is_expected.to eq [] }
|
1678
1709
|
end
|
1679
1710
|
end
|
1680
|
-
|
1711
|
+
|
1681
1712
|
context '#replace_values' do
|
1682
1713
|
subject do
|
1683
1714
|
Daru::DataFrame.new({
|
@@ -1687,7 +1718,7 @@ describe Daru::DataFrame do
|
|
1687
1718
|
})
|
1688
1719
|
end
|
1689
1720
|
before { subject.to_category :b }
|
1690
|
-
|
1721
|
+
|
1691
1722
|
context 'replace nils only' do
|
1692
1723
|
before { subject.replace_values nil, 10 }
|
1693
1724
|
it { is_expected.to be_a Daru::DataFrame }
|
@@ -1696,7 +1727,7 @@ describe Daru::DataFrame do
|
|
1696
1727
|
its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
|
1697
1728
|
its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
|
1698
1729
|
end
|
1699
|
-
|
1730
|
+
|
1700
1731
|
context 'replace Float::NAN only' do
|
1701
1732
|
before { subject.replace_values Float::NAN, 10 }
|
1702
1733
|
it { is_expected.to be_a Daru::DataFrame }
|
@@ -1705,7 +1736,7 @@ describe Daru::DataFrame do
|
|
1705
1736
|
its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
|
1706
1737
|
its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
|
1707
1738
|
end
|
1708
|
-
|
1739
|
+
|
1709
1740
|
context 'replace both nil and Float::NAN' do
|
1710
1741
|
before { subject.replace_values [nil, Float::NAN], 10 }
|
1711
1742
|
it { is_expected.to be_a Daru::DataFrame }
|
@@ -1714,7 +1745,7 @@ describe Daru::DataFrame do
|
|
1714
1745
|
its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
|
1715
1746
|
its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
|
1716
1747
|
end
|
1717
|
-
|
1748
|
+
|
1718
1749
|
context 'replace other values' do
|
1719
1750
|
before { subject.replace_values [1, 5], 10 }
|
1720
1751
|
it { is_expected.to be_a Daru::DataFrame }
|
@@ -1722,7 +1753,7 @@ describe Daru::DataFrame do
|
|
1722
1753
|
its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
|
1723
1754
|
its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
|
1724
1755
|
its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
|
1725
|
-
end
|
1756
|
+
end
|
1726
1757
|
end
|
1727
1758
|
|
1728
1759
|
context "#clone" do
|
@@ -2124,14 +2155,28 @@ describe Daru::DataFrame do
|
|
2124
2155
|
|
2125
2156
|
context "#filter_rows" do
|
2126
2157
|
context Daru::Index do
|
2127
|
-
|
2128
|
-
|
2158
|
+
context "when specified no index" do
|
2159
|
+
it "filters rows" do
|
2160
|
+
df = Daru::DataFrame.new({a: [1,2,3], b: [2,3,4]})
|
2129
2161
|
|
2130
|
-
|
2131
|
-
|
2162
|
+
a = df.filter_rows do |row|
|
2163
|
+
row[:a] % 2 == 0
|
2164
|
+
end
|
2165
|
+
|
2166
|
+
expect(a).to eq(Daru::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [1]))
|
2132
2167
|
end
|
2168
|
+
end
|
2133
2169
|
|
2134
|
-
|
2170
|
+
context "when specified numerical index" do
|
2171
|
+
it "filters rows" do
|
2172
|
+
df = Daru::DataFrame.new({a: [1,2,3], b: [2,3,4]}, index: [1,2,3])
|
2173
|
+
|
2174
|
+
a = df.filter_rows do |row|
|
2175
|
+
row[:a] % 2 == 0
|
2176
|
+
end
|
2177
|
+
|
2178
|
+
expect(a).to eq(Daru::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [2]))
|
2179
|
+
end
|
2135
2180
|
end
|
2136
2181
|
|
2137
2182
|
it "preserves names of vectors" do
|
@@ -2500,6 +2545,32 @@ describe Daru::DataFrame do
|
|
2500
2545
|
end
|
2501
2546
|
end
|
2502
2547
|
|
2548
|
+
context '#order=' do
|
2549
|
+
let(:df) do
|
2550
|
+
Daru::DataFrame.new({
|
2551
|
+
a: [1, 2, 3],
|
2552
|
+
b: [4, 5, 6]
|
2553
|
+
}, order: [:a, :b])
|
2554
|
+
end
|
2555
|
+
|
2556
|
+
context 'correct order' do
|
2557
|
+
before { df.order = [:b, :a] }
|
2558
|
+
subject { df }
|
2559
|
+
|
2560
|
+
its(:'vectors.to_a') { is_expected.to eq [:b, :a] }
|
2561
|
+
its(:'b.to_a') { is_expected.to eq [4, 5, 6] }
|
2562
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3] }
|
2563
|
+
end
|
2564
|
+
|
2565
|
+
context 'insufficient vectors' do
|
2566
|
+
it { expect { df.order = [:a] }.to raise_error }
|
2567
|
+
end
|
2568
|
+
|
2569
|
+
context 'wrong vectors' do
|
2570
|
+
it { expect { df.order = [:a, :b, 'b'] }.to raise_error }
|
2571
|
+
end
|
2572
|
+
end
|
2573
|
+
|
2503
2574
|
context "#vectors=" do
|
2504
2575
|
before :each do
|
2505
2576
|
@df = Daru::DataFrame.new({
|
@@ -2523,6 +2594,13 @@ describe Daru::DataFrame do
|
|
2523
2594
|
@df.vectors = Daru::Index.new([1,2,'3',4,'5'])
|
2524
2595
|
}.to raise_error(ArgumentError)
|
2525
2596
|
end
|
2597
|
+
|
2598
|
+
it "change name of vectors in @data" do
|
2599
|
+
new_index_array = [:k, :l, :m]
|
2600
|
+
@df.vectors = Daru::Index.new(new_index_array)
|
2601
|
+
|
2602
|
+
expect(@df.data.map { |vector| vector.name }).to eq(new_index_array)
|
2603
|
+
end
|
2526
2604
|
end
|
2527
2605
|
|
2528
2606
|
context "#rename_vectors" do
|
@@ -2912,6 +2990,43 @@ describe Daru::DataFrame do
|
|
2912
2990
|
)
|
2913
2991
|
)
|
2914
2992
|
end
|
2993
|
+
|
2994
|
+
it 'performs date pivoting' do
|
2995
|
+
categories = %i[jan feb mar apr may jun jul aug sep oct nov dec]
|
2996
|
+
df = Daru::DataFrame.rows([
|
2997
|
+
[2014, 2, 1600.0, 20.0],
|
2998
|
+
[2014, 3, 1680.0, 21.0],
|
2999
|
+
[2016, 2, 1600.0, 20.0],
|
3000
|
+
[2016, 4, 1520.0, 19.0],
|
3001
|
+
], order: [:year, :month, :visitors, :days])
|
3002
|
+
df[:averages] = df[:visitors] / df[:days]
|
3003
|
+
df[:month] = df[:month].map{|i| categories[i - 1]}
|
3004
|
+
actual = df.pivot_table(index: :month, vectors: [:year], values: :averages)
|
3005
|
+
|
3006
|
+
# NB: As you can see, there are some "illogical" parts:
|
3007
|
+
# months are sorted lexicographically, then made into multi-index
|
3008
|
+
# with one-element-per-tuple, then order of columns is dependent
|
3009
|
+
# on which month is lexicographically first (its apr, so, apr-2016
|
3010
|
+
# is first row to gather, so 2016 is first column).
|
3011
|
+
#
|
3012
|
+
# All of it is descendance of our group_by implementation (which
|
3013
|
+
# always sorts results & always make array keys). I hope that fixing
|
3014
|
+
# group_by, even to the extend described at https://github.com/v0dro/daru/issues/152,
|
3015
|
+
# will be fix this case also.
|
3016
|
+
expected =
|
3017
|
+
Daru::DataFrame.new(
|
3018
|
+
[
|
3019
|
+
[80.0, 80.0, nil],
|
3020
|
+
[nil, 80.0, 80.0],
|
3021
|
+
], index: Daru::MultiIndex.from_tuples([[:apr], [:feb], [:mar]]),
|
3022
|
+
order: Daru::MultiIndex.from_tuples([[:averages, 2016], [:averages, 2014]])
|
3023
|
+
)
|
3024
|
+
# Comparing their parts previous to full comparison allows to
|
3025
|
+
# find complicated differences.
|
3026
|
+
expect(actual.vectors).to eq expected.vectors
|
3027
|
+
expect(actual.index).to eq expected.index
|
3028
|
+
expect(actual).to eq expected
|
3029
|
+
end
|
2915
3030
|
end
|
2916
3031
|
|
2917
3032
|
context "#shape" do
|
@@ -3059,14 +3174,14 @@ describe Daru::DataFrame do
|
|
3059
3174
|
})
|
3060
3175
|
end
|
3061
3176
|
before { df.to_category :b }
|
3062
|
-
|
3177
|
+
|
3063
3178
|
context 'true' do
|
3064
3179
|
it { expect(df.include_values? nil).to eq true }
|
3065
3180
|
it { expect(df.include_values? Float::NAN).to eq true }
|
3066
3181
|
it { expect(df.include_values? nil, Float::NAN).to eq true }
|
3067
3182
|
it { expect(df.include_values? 1, 30).to eq true }
|
3068
3183
|
end
|
3069
|
-
|
3184
|
+
|
3070
3185
|
context 'false' do
|
3071
3186
|
it { expect(df[:a, :c].include_values? nil).to eq false }
|
3072
3187
|
it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
|
@@ -3472,6 +3587,64 @@ describe Daru::DataFrame do
|
|
3472
3587
|
|
3473
3588
|
end
|
3474
3589
|
|
3590
|
+
context "#union" do
|
3591
|
+
before do
|
3592
|
+
@df1 = Daru::DataFrame.new({
|
3593
|
+
a: [1, 2, 3],
|
3594
|
+
b: [1, 2, 3]},
|
3595
|
+
index: [1,3,5] )
|
3596
|
+
|
3597
|
+
@df2 = Daru::DataFrame.new({
|
3598
|
+
a: [4, 5, 6],
|
3599
|
+
c: [4, 5, 6]},
|
3600
|
+
index: [7,9,11])
|
3601
|
+
|
3602
|
+
@df3 = Daru::DataFrame.new({
|
3603
|
+
a: [4, 5, 6],
|
3604
|
+
c: [4, 5, 6]},
|
3605
|
+
index: [5,7,9])
|
3606
|
+
end
|
3607
|
+
|
3608
|
+
it 'does not modify the original dataframes' do
|
3609
|
+
df1_a = @df1[:a].to_a.dup
|
3610
|
+
df2_a = @df2[:a].to_a.dup
|
3611
|
+
|
3612
|
+
_ = @df1.union @df2
|
3613
|
+
expect(@df1[:a].to_a).to eq df1_a
|
3614
|
+
expect(@df2[:a].to_a).to eq df2_a
|
3615
|
+
end
|
3616
|
+
|
3617
|
+
it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
|
3618
|
+
df1_a = @df1[:a].to_a.dup
|
3619
|
+
df2_a = @df2[:a].to_a.dup
|
3620
|
+
|
3621
|
+
df_union = @df1.union @df2
|
3622
|
+
expect(df_union[:a].to_a).to eq df1_a + df2_a
|
3623
|
+
end
|
3624
|
+
|
3625
|
+
it 'fills in missing vectors with nils' do
|
3626
|
+
df1_b = @df1[:b].to_a.dup
|
3627
|
+
df2_c = @df2[:c].to_a.dup
|
3628
|
+
|
3629
|
+
df_union = @df1.union @df2
|
3630
|
+
expect(df_union[:b].to_a).to eq df1_b + [nil] * @df2.size
|
3631
|
+
expect(df_union[:c].to_a).to eq [nil] * @df1.size + df2_c
|
3632
|
+
end
|
3633
|
+
|
3634
|
+
it 'overwrites part of the first dataframe if there are double indices' do
|
3635
|
+
vec = Daru::Vector.new({a: 4, b: nil, c: 4})
|
3636
|
+
expect(@df1.union(@df3).row[5]).to eq vec
|
3637
|
+
end
|
3638
|
+
|
3639
|
+
it 'concats the indices' do
|
3640
|
+
v1 = @df1.index.to_a
|
3641
|
+
v2 = @df2.index.to_a
|
3642
|
+
|
3643
|
+
df_union = @df1.union @df2
|
3644
|
+
expect(df_union.index.to_a).to eq v1 + v2
|
3645
|
+
end
|
3646
|
+
end
|
3647
|
+
|
3475
3648
|
context '#inspect' do
|
3476
3649
|
subject { df.inspect }
|
3477
3650
|
|