daru 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
@@ -0,0 +1,18 @@
|
|
1
|
+
Account,Name,Rep,Manager,Product,Quantity,Price,Status
|
2
|
+
714466,Trantow-Barrows,Craig Booker,Debra Henley,CPU,1,30000,presented
|
3
|
+
714466,Trantow-Barrows,Craig Booker,Debra Henley,Software,1,10000,presented
|
4
|
+
714466,Trantow-Barrows,Craig Booker,Debra Henley,Maintenance,2,5000,pending
|
5
|
+
737550,"Fritsch, Russel and Anderson",Craig Booker,Debra Henley,CPU,1,35000,declined
|
6
|
+
146832,Kiehn-Spinka,Daniel Hilton,Debra Henley,CPU,2,65000,won
|
7
|
+
218895,Kulas Inc,Daniel Hilton,Debra Henley,CPU,2,40000,pending
|
8
|
+
218895,Kulas Inc,Daniel Hilton,Debra Henley,Software,1,10000,presented
|
9
|
+
412290,Jerde-Hilpert,John Smith,Debra Henley,Maintenance,2,5000,pending
|
10
|
+
740150,Barton LLC,John Smith,Debra Henley,CPU,1,35000,declined
|
11
|
+
141962,Herman LLC,Cedric Moss,Fred Anderson,CPU,2,65000,won
|
12
|
+
163416,Purdy-Kunde,Cedric Moss,Fred Anderson,CPU,1,30000,presented
|
13
|
+
239344,Stokes LLC,Cedric Moss,Fred Anderson,Maintenance,1,5000,pending
|
14
|
+
239344,Stokes LLC,Cedric Moss,Fred Anderson,Software,1,10000,presented
|
15
|
+
307599,"Kassulke, Ondricka and Metz",Wendy Yule,Fred Anderson,Maintenance,3,7000,won
|
16
|
+
688981,Keeling LLC,Wendy Yule,Fred Anderson,CPU,5,100000,won
|
17
|
+
729833,Koepp Ltd,Wendy Yule,Fred Anderson,CPU,2,65000,declined
|
18
|
+
729833,Koepp Ltd,Wendy Yule,Fred Anderson,Monitor,2,5000,presented
|
data/spec/index_spec.rb
CHANGED
@@ -17,25 +17,6 @@ describe Daru::Index do
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
context "#re_index" do
|
21
|
-
before :each do
|
22
|
-
@old = Daru::Index.new [:bob, :fisher, :zakir]
|
23
|
-
end
|
24
|
-
it "returns a new index object" do
|
25
|
-
n = @old.re_index(@old + [:john, :shrinivas])
|
26
|
-
|
27
|
-
expect(n.object_id).not_to eq(@old.object_id)
|
28
|
-
expect(n.to_a).to eq([:bob, :fisher, :zakir, :john, :shrinivas])
|
29
|
-
end
|
30
|
-
|
31
|
-
it "does not over-ride existing indexes" do
|
32
|
-
n = @old.re_index(@old + :bob)
|
33
|
-
|
34
|
-
expect(n.object_id).not_to eq(@old.object_id)
|
35
|
-
expect(n.to_a) .to eq([:bob, :fisher, :zakir])
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
20
|
context "#+" do
|
40
21
|
before :each do
|
41
22
|
@left = Daru::Index.new [:miles, :geddy, :eric]
|
@@ -53,10 +34,18 @@ describe Daru::Index do
|
|
53
34
|
end
|
54
35
|
|
55
36
|
context "#[]" do
|
37
|
+
before do
|
38
|
+
@id = Daru::Index.new [:one, :two, :three, :four, :five, :six, :seven]
|
39
|
+
end
|
40
|
+
|
56
41
|
it "works with ranges" do
|
57
|
-
id
|
42
|
+
expect(@id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five],
|
43
|
+
[1,2,3,4]))
|
44
|
+
end
|
58
45
|
|
59
|
-
|
46
|
+
it "returns multiple keys if specified multiple indices" do
|
47
|
+
expect(@id[[0,1,3,4]]).to eq(Daru::Index.new([:one, :two, :four, :five],
|
48
|
+
[0,1,3,4]))
|
60
49
|
end
|
61
50
|
end
|
62
51
|
end
|
data/spec/io/io_spec.rb
CHANGED
@@ -4,21 +4,11 @@ describe Daru::DataFrame do
|
|
4
4
|
context ".from_csv" do
|
5
5
|
it "loads from a CSV file" do
|
6
6
|
df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
|
7
|
-
col_sep: ' ', headers: true)
|
8
|
-
|
9
|
-
|
10
|
-
when :true_transform
|
11
|
-
field.split(',').map { |s| s.to_f }
|
12
|
-
else
|
13
|
-
field
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
|
19
|
-
expect(df.vectors).to eq([:image_resolution, :true_transform, :mls].to_index)
|
7
|
+
col_sep: ' ', headers: true)
|
8
|
+
|
9
|
+
expect(df.vectors).to eq([:image_resolution, :mls, :true_transform].to_index)
|
20
10
|
expect(df.vector[:image_resolution].first).to eq(6.55779)
|
21
|
-
expect(df.vector[:true_transform].first
|
11
|
+
expect(df.vector[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
|
22
12
|
end
|
23
13
|
end
|
24
14
|
|
@@ -1,5 +1,71 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe Daru::DataFrame do
|
4
|
+
before(:each) do
|
5
|
+
@df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a','e','i','o','u'],
|
6
|
+
c: [10,20,30,40,50]})
|
7
|
+
@left = Daru::DataFrame.new({a: [1,nil,nil,4], b: [10,nil,nil,40], c: [5,6,7,8]},
|
8
|
+
index: [0,4,5,3])
|
9
|
+
@right = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,20,30,40,50]},
|
10
|
+
index: [0,1,2,3,6])
|
11
|
+
end
|
4
12
|
|
13
|
+
context "#+" do
|
14
|
+
it "adds a number to all numeric vectors" do
|
15
|
+
expect(@df + 2).to eq(Daru::DataFrame.new({a: [3,4,5,6,7], b: ['a','e','i','o','u'],
|
16
|
+
c: [12,22,32,42,52] }))
|
17
|
+
end
|
18
|
+
|
19
|
+
it "adds two dataframes to produce a third" do
|
20
|
+
expect(@left + @right).to eq(Daru::DataFrame.new({a: [2,nil,nil,8,nil,nil,nil],
|
21
|
+
b: [20,nil,nil,80,nil,nil,nil], c: [nil,nil,nil,nil,nil,nil]}, index:
|
22
|
+
[0,1,2,3,4,5,6]))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context "#-" do
|
27
|
+
it "subtracts a number from all numeric vectors" do
|
28
|
+
expect(@df - 2).to eq(Daru::DataFrame.new({a: [-1,0,1,2,3], b: ['a','e','i','o','u'],
|
29
|
+
c: [8,18,28,38,48]}))
|
30
|
+
end
|
31
|
+
|
32
|
+
it "subtracts a data frame from another" do
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context "#*" do
|
38
|
+
it "multiplies a number with a DataFrame" do
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context "#/" do
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
context "#%" do
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
context "#**" do
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
context "#sqrt" do
|
55
|
+
it "calculates sqrt" do
|
56
|
+
@df.sqrt
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
context "#round" do
|
61
|
+
it "rounds to precision" do
|
62
|
+
@df.round
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context "#exp" do
|
67
|
+
it "calculates exponential" do
|
68
|
+
@df.exp
|
69
|
+
end
|
70
|
+
end
|
5
71
|
end
|
@@ -4,21 +4,31 @@ describe Daru::Vector do
|
|
4
4
|
before :each do
|
5
5
|
@dv1 = Daru::Vector.new [1,2,3,4], name: :boozy, index: [:bud, :kf, :henie, :corona]
|
6
6
|
@dv2 = Daru::Vector.new [1,2,3,4], name: :mayer, index: [:obi, :wan, :kf, :corona]
|
7
|
+
@with_md1 = Daru::Vector.new [1,2,3,nil,5,nil], name: :missing, index: [:a, :b, :c, :obi, :wan, :corona]
|
8
|
+
@with_md2 = Daru::Vector.new [1,2,3,nil,5,nil], name: :missing, index: [:obi, :wan, :corona, :a, :b, :c]
|
7
9
|
end
|
8
10
|
|
9
11
|
context "#+" do
|
10
12
|
it "adds matching indexes of the other vector" do
|
11
|
-
expect(@dv1 + @dv2).to eq(Daru::Vector.new([5,
|
13
|
+
expect(@dv1 + @dv2).to eq(Daru::Vector.new([nil,8,nil,5,nil,nil], name: :boozy, index: [:bud,:corona,:henie,:kf,:obi,:wan]))
|
12
14
|
end
|
13
15
|
|
14
16
|
it "adds number to each element of the entire vector" do
|
15
17
|
expect(@dv1 + 5).to eq(Daru::Vector.new [6,7,8,9], name: :boozy, index: [:bud, :kf, :henie, :corona])
|
16
18
|
end
|
19
|
+
|
20
|
+
it "does not add when a number is being added" do
|
21
|
+
expect(@with_md1 + 1).to eq(Daru::Vector.new([2,3,4,nil,6,nil], name: :missing, index: [:a, :b, :c, :obi, :wan, :corona]))
|
22
|
+
end
|
23
|
+
|
24
|
+
it "puts a nil when one of the operands is nil" do
|
25
|
+
expect(@with_md1 + @with_md2).to eq(Daru::Vector.new([nil,7,nil,nil,nil,7], name: :missing, index: [:a, :b, :c, :corona, :obi, :wan]))
|
26
|
+
end
|
17
27
|
end
|
18
28
|
|
19
29
|
context "#-" do
|
20
30
|
it "subtracts matching indexes of the other vector" do
|
21
|
-
expect(@dv1 - @dv2).to eq(Daru::Vector.new([
|
31
|
+
expect(@dv1 - @dv2).to eq(Daru::Vector.new([nil,0,nil,-1,nil,nil], name: :boozy, index: [:bud,:corona,:henie,:kf,:obi,:wan]))
|
22
32
|
end
|
23
33
|
|
24
34
|
it "subtracts number from each element of the entire vector" do
|
@@ -26,7 +36,7 @@ describe Daru::Vector do
|
|
26
36
|
end
|
27
37
|
end
|
28
38
|
|
29
|
-
context "#*"
|
39
|
+
context "#*" do
|
30
40
|
it "multiplies matching indexes of the other vector" do
|
31
41
|
|
32
42
|
end
|
@@ -48,4 +58,35 @@ describe Daru::Vector do
|
|
48
58
|
|
49
59
|
context "#%" do
|
50
60
|
|
51
|
-
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context "#**" do
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
context "#exp" do
|
68
|
+
it "calculates exp of all numbers" do
|
69
|
+
expect(@with_md1.exp.round(3)).to eq(Daru::Vector.new([2.718281828459045,
|
70
|
+
7.38905609893065, 20.085536923187668, nil, 148.4131591025766, nil], index:
|
71
|
+
[:a, :b, :c, :obi, :wan, :corona], name: :missing).round(3))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context "#abs" do
|
76
|
+
it "calculates abs value" do
|
77
|
+
@with_md1.abs
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
context "#sqrt" do
|
82
|
+
it "calculates sqrt" do
|
83
|
+
@with_md1.sqrt
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context "#round" do
|
88
|
+
it "rounds to given precision" do
|
89
|
+
@with_md1.round(2)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -1,5 +1,95 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe Daru::DataFrame do
|
4
|
-
|
4
|
+
before do
|
5
|
+
@df = Daru::DataFrame.new({
|
6
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
|
7
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
8
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
9
|
+
d: [1,2,2,3,3,4,5,6,7],
|
10
|
+
e: [2,4,4,6,6,8,10,12,14],
|
11
|
+
f: [10,20,20,30,30,40,50,60,70]
|
12
|
+
})
|
13
|
+
end
|
14
|
+
|
15
|
+
context "#mean" do
|
16
|
+
it "calculates mean of single level numeric only vectors and returns values in a Vector" do
|
17
|
+
expect(@df.mean.round(2)).to eq(Daru::Vector.new([3.67, 7.33, 36.67],
|
18
|
+
index: [:d, :e, :f]
|
19
|
+
))
|
20
|
+
end
|
21
|
+
|
22
|
+
it "calculates mean of multi level numeric only vectors and returns values in a DataFrame" do
|
23
|
+
# TODO - pending
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
context "#std" do
|
28
|
+
it "calculates standard deviation of single leavel numeric only vectors and returns values in a Vector" do
|
29
|
+
expect(@df.std).to eq(Daru::Vector.new([2, 4, 20], index: [:d, :e, :f]))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "#sum" do
|
34
|
+
it "calculates sum of single level numeric only vectors and returns values in a Vector" do
|
35
|
+
# TODO - write tests
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context "#count" do
|
40
|
+
# TODO
|
41
|
+
end
|
42
|
+
|
43
|
+
context "#mode" do
|
44
|
+
# TODO
|
45
|
+
end
|
46
|
+
|
47
|
+
context "#median" do
|
48
|
+
# TODO
|
49
|
+
end
|
50
|
+
|
51
|
+
context "#max" do
|
52
|
+
# TODO
|
53
|
+
end
|
54
|
+
|
55
|
+
context "#min" do
|
56
|
+
# TODO
|
57
|
+
end
|
58
|
+
|
59
|
+
context "#product" do
|
60
|
+
# TODO
|
61
|
+
end
|
62
|
+
|
63
|
+
context "#describe" do
|
64
|
+
it "generates mean, std, max, min and count of numeric vectors in one shot" do
|
65
|
+
expect(@df.describe.round(2)).to eq(Daru::DataFrame.new({
|
66
|
+
d: [9.00, 3.67 ,2.00 , 1.00, 7.00],
|
67
|
+
e: [9.00, 7.33 ,4.00 , 2.00, 14.00],
|
68
|
+
f: [9.00, 36.67,20.00,10.00, 70.00]
|
69
|
+
}, index: [:count, :mean, :std, :min, :max]
|
70
|
+
))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context "#cov" do
|
75
|
+
it "calculates the variance covariance of the numeric vectors of DataFrame" do
|
76
|
+
expect(@df.cov).to eq(Daru::DataFrame.new({
|
77
|
+
d: [4,8,40],
|
78
|
+
e: [8,16,80],
|
79
|
+
f: [40,80,400]
|
80
|
+
}, index: [:d, :e, :f]
|
81
|
+
))
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
context "#corr", focus: true do
|
86
|
+
it "calculates the correlation between the numeric vectors of DataFrame" do
|
87
|
+
expect(@df.corr).to eq(Daru::DataFrame.new({
|
88
|
+
d: [1,1,1],
|
89
|
+
e: [1,1,1],
|
90
|
+
f: [1,1,1]
|
91
|
+
}, index: [:d, :e, :f]
|
92
|
+
))
|
93
|
+
end
|
94
|
+
end
|
5
95
|
end
|
@@ -1,39 +1,41 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe Daru::Vector do
|
4
|
-
[
|
4
|
+
[:array, :nmatrix].each do |dtype|
|
5
5
|
describe dtype do
|
6
6
|
before :each do
|
7
7
|
@dv = Daru::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2], dtype: dtype
|
8
|
+
@dv_with_md = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
|
8
9
|
end
|
9
10
|
|
10
11
|
context "#mean" do
|
11
12
|
it "calculates mean" do
|
12
13
|
expect(@dv.mean).to eq(282.2)
|
14
|
+
expect(@dv_with_md.mean).to eq(282.2)
|
13
15
|
end
|
14
16
|
end
|
15
17
|
|
16
18
|
context "#sum_of_squares" do
|
17
19
|
it "calcs sum of squares" do
|
18
|
-
|
20
|
+
@dv.sum_of_squares
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
22
24
|
context "#standard_deviation_sample" do
|
23
25
|
it "calcs standard deviation sample" do
|
24
|
-
|
26
|
+
@dv.standard_deviation_sample
|
25
27
|
end
|
26
28
|
end
|
27
29
|
|
28
30
|
context "#variance_sample" do
|
29
31
|
it "calculates sample variance" do
|
30
|
-
|
32
|
+
@dv.variance_sample
|
31
33
|
end
|
32
34
|
end
|
33
35
|
|
34
36
|
context "#standard_deviation_population" do
|
35
37
|
it "calculates standard deviation population" do
|
36
|
-
|
38
|
+
@dv.standard_deviation_population
|
37
39
|
end
|
38
40
|
end
|
39
41
|
|
@@ -124,7 +126,9 @@ describe Daru::Vector do
|
|
124
126
|
end
|
125
127
|
|
126
128
|
context "#proportion" do
|
127
|
-
|
129
|
+
it "calculates proportion" do
|
130
|
+
@dv.proportion
|
131
|
+
end
|
128
132
|
end
|
129
133
|
|
130
134
|
context "#proportions" do
|
@@ -140,7 +144,13 @@ describe Daru::Vector do
|
|
140
144
|
end
|
141
145
|
|
142
146
|
context "#count" do
|
147
|
+
it "counts specified element" do
|
148
|
+
@dv.count(323)
|
149
|
+
end
|
143
150
|
|
151
|
+
it "counts total number of elements" do
|
152
|
+
expect(@dv.count).to eq(10)
|
153
|
+
end
|
144
154
|
end
|
145
155
|
|
146
156
|
context "#coefficient_of_variation" do
|
@@ -148,6 +158,22 @@ describe Daru::Vector do
|
|
148
158
|
@dv.coefficient_of_variation
|
149
159
|
end
|
150
160
|
end
|
161
|
+
|
162
|
+
context "#factor" do
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
context "#median_absolute_deviation" do
|
167
|
+
it "calculates median_absolute_deviation" do
|
168
|
+
@dv.median_absolute_deviation
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
context "#standard_error" do
|
173
|
+
it "calculates standard error" do
|
174
|
+
@dv.standard_error
|
175
|
+
end
|
176
|
+
end
|
151
177
|
end
|
152
178
|
end
|
153
179
|
end
|
data/spec/monkeys_spec.rb
CHANGED
@@ -1,6 +1,15 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe "Monkeys" do
|
4
|
-
context
|
4
|
+
context Array do
|
5
|
+
end
|
6
|
+
|
7
|
+
context Matrix do
|
8
|
+
it "performs elementwise division" do
|
9
|
+
left = Matrix[[3,6,9],[4,8,12],[2,4,6]]
|
10
|
+
right = Matrix[[3,6,9],[4,8,12],[2,4,6]]
|
11
|
+
|
12
|
+
expect(left.elementwise_division(right)).to eq(Matrix[[1,1,1],[1,1,1],[1,1,1]])
|
13
|
+
end
|
5
14
|
end
|
6
15
|
end
|