daru 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
@@ -0,0 +1,18 @@
|
|
1
|
+
Account,Name,Rep,Manager,Product,Quantity,Price,Status
|
2
|
+
714466,Trantow-Barrows,Craig Booker,Debra Henley,CPU,1,30000,presented
|
3
|
+
714466,Trantow-Barrows,Craig Booker,Debra Henley,Software,1,10000,presented
|
4
|
+
714466,Trantow-Barrows,Craig Booker,Debra Henley,Maintenance,2,5000,pending
|
5
|
+
737550,"Fritsch, Russel and Anderson",Craig Booker,Debra Henley,CPU,1,35000,declined
|
6
|
+
146832,Kiehn-Spinka,Daniel Hilton,Debra Henley,CPU,2,65000,won
|
7
|
+
218895,Kulas Inc,Daniel Hilton,Debra Henley,CPU,2,40000,pending
|
8
|
+
218895,Kulas Inc,Daniel Hilton,Debra Henley,Software,1,10000,presented
|
9
|
+
412290,Jerde-Hilpert,John Smith,Debra Henley,Maintenance,2,5000,pending
|
10
|
+
740150,Barton LLC,John Smith,Debra Henley,CPU,1,35000,declined
|
11
|
+
141962,Herman LLC,Cedric Moss,Fred Anderson,CPU,2,65000,won
|
12
|
+
163416,Purdy-Kunde,Cedric Moss,Fred Anderson,CPU,1,30000,presented
|
13
|
+
239344,Stokes LLC,Cedric Moss,Fred Anderson,Maintenance,1,5000,pending
|
14
|
+
239344,Stokes LLC,Cedric Moss,Fred Anderson,Software,1,10000,presented
|
15
|
+
307599,"Kassulke, Ondricka and Metz",Wendy Yule,Fred Anderson,Maintenance,3,7000,won
|
16
|
+
688981,Keeling LLC,Wendy Yule,Fred Anderson,CPU,5,100000,won
|
17
|
+
729833,Koepp Ltd,Wendy Yule,Fred Anderson,CPU,2,65000,declined
|
18
|
+
729833,Koepp Ltd,Wendy Yule,Fred Anderson,Monitor,2,5000,presented
|
data/spec/index_spec.rb
CHANGED
@@ -17,25 +17,6 @@ describe Daru::Index do
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
context "#re_index" do
|
21
|
-
before :each do
|
22
|
-
@old = Daru::Index.new [:bob, :fisher, :zakir]
|
23
|
-
end
|
24
|
-
it "returns a new index object" do
|
25
|
-
n = @old.re_index(@old + [:john, :shrinivas])
|
26
|
-
|
27
|
-
expect(n.object_id).not_to eq(@old.object_id)
|
28
|
-
expect(n.to_a).to eq([:bob, :fisher, :zakir, :john, :shrinivas])
|
29
|
-
end
|
30
|
-
|
31
|
-
it "does not over-ride existing indexes" do
|
32
|
-
n = @old.re_index(@old + :bob)
|
33
|
-
|
34
|
-
expect(n.object_id).not_to eq(@old.object_id)
|
35
|
-
expect(n.to_a) .to eq([:bob, :fisher, :zakir])
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
20
|
context "#+" do
|
40
21
|
before :each do
|
41
22
|
@left = Daru::Index.new [:miles, :geddy, :eric]
|
@@ -53,10 +34,18 @@ describe Daru::Index do
|
|
53
34
|
end
|
54
35
|
|
55
36
|
context "#[]" do
|
37
|
+
before do
|
38
|
+
@id = Daru::Index.new [:one, :two, :three, :four, :five, :six, :seven]
|
39
|
+
end
|
40
|
+
|
56
41
|
it "works with ranges" do
|
57
|
-
id
|
42
|
+
expect(@id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five],
|
43
|
+
[1,2,3,4]))
|
44
|
+
end
|
58
45
|
|
59
|
-
|
46
|
+
it "returns multiple keys if specified multiple indices" do
|
47
|
+
expect(@id[[0,1,3,4]]).to eq(Daru::Index.new([:one, :two, :four, :five],
|
48
|
+
[0,1,3,4]))
|
60
49
|
end
|
61
50
|
end
|
62
51
|
end
|
data/spec/io/io_spec.rb
CHANGED
@@ -4,21 +4,11 @@ describe Daru::DataFrame do
|
|
4
4
|
context ".from_csv" do
|
5
5
|
it "loads from a CSV file" do
|
6
6
|
df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
|
7
|
-
col_sep: ' ', headers: true)
|
8
|
-
|
9
|
-
|
10
|
-
when :true_transform
|
11
|
-
field.split(',').map { |s| s.to_f }
|
12
|
-
else
|
13
|
-
field
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
|
19
|
-
expect(df.vectors).to eq([:image_resolution, :true_transform, :mls].to_index)
|
7
|
+
col_sep: ' ', headers: true)
|
8
|
+
|
9
|
+
expect(df.vectors).to eq([:image_resolution, :mls, :true_transform].to_index)
|
20
10
|
expect(df.vector[:image_resolution].first).to eq(6.55779)
|
21
|
-
expect(df.vector[:true_transform].first
|
11
|
+
expect(df.vector[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
|
22
12
|
end
|
23
13
|
end
|
24
14
|
|
@@ -1,5 +1,71 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe Daru::DataFrame do
|
4
|
+
before(:each) do
|
5
|
+
@df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a','e','i','o','u'],
|
6
|
+
c: [10,20,30,40,50]})
|
7
|
+
@left = Daru::DataFrame.new({a: [1,nil,nil,4], b: [10,nil,nil,40], c: [5,6,7,8]},
|
8
|
+
index: [0,4,5,3])
|
9
|
+
@right = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,20,30,40,50]},
|
10
|
+
index: [0,1,2,3,6])
|
11
|
+
end
|
4
12
|
|
13
|
+
context "#+" do
|
14
|
+
it "adds a number to all numeric vectors" do
|
15
|
+
expect(@df + 2).to eq(Daru::DataFrame.new({a: [3,4,5,6,7], b: ['a','e','i','o','u'],
|
16
|
+
c: [12,22,32,42,52] }))
|
17
|
+
end
|
18
|
+
|
19
|
+
it "adds two dataframes to produce a third" do
|
20
|
+
expect(@left + @right).to eq(Daru::DataFrame.new({a: [2,nil,nil,8,nil,nil,nil],
|
21
|
+
b: [20,nil,nil,80,nil,nil,nil], c: [nil,nil,nil,nil,nil,nil]}, index:
|
22
|
+
[0,1,2,3,4,5,6]))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context "#-" do
|
27
|
+
it "subtracts a number from all numeric vectors" do
|
28
|
+
expect(@df - 2).to eq(Daru::DataFrame.new({a: [-1,0,1,2,3], b: ['a','e','i','o','u'],
|
29
|
+
c: [8,18,28,38,48]}))
|
30
|
+
end
|
31
|
+
|
32
|
+
it "subtracts a data frame from another" do
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context "#*" do
|
38
|
+
it "multiplies a number with a DataFrame" do
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context "#/" do
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
context "#%" do
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
context "#**" do
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
context "#sqrt" do
|
55
|
+
it "calculates sqrt" do
|
56
|
+
@df.sqrt
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
context "#round" do
|
61
|
+
it "rounds to precision" do
|
62
|
+
@df.round
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context "#exp" do
|
67
|
+
it "calculates exponential" do
|
68
|
+
@df.exp
|
69
|
+
end
|
70
|
+
end
|
5
71
|
end
|
@@ -4,21 +4,31 @@ describe Daru::Vector do
|
|
4
4
|
before :each do
|
5
5
|
@dv1 = Daru::Vector.new [1,2,3,4], name: :boozy, index: [:bud, :kf, :henie, :corona]
|
6
6
|
@dv2 = Daru::Vector.new [1,2,3,4], name: :mayer, index: [:obi, :wan, :kf, :corona]
|
7
|
+
@with_md1 = Daru::Vector.new [1,2,3,nil,5,nil], name: :missing, index: [:a, :b, :c, :obi, :wan, :corona]
|
8
|
+
@with_md2 = Daru::Vector.new [1,2,3,nil,5,nil], name: :missing, index: [:obi, :wan, :corona, :a, :b, :c]
|
7
9
|
end
|
8
10
|
|
9
11
|
context "#+" do
|
10
12
|
it "adds matching indexes of the other vector" do
|
11
|
-
expect(@dv1 + @dv2).to eq(Daru::Vector.new([5,
|
13
|
+
expect(@dv1 + @dv2).to eq(Daru::Vector.new([nil,8,nil,5,nil,nil], name: :boozy, index: [:bud,:corona,:henie,:kf,:obi,:wan]))
|
12
14
|
end
|
13
15
|
|
14
16
|
it "adds number to each element of the entire vector" do
|
15
17
|
expect(@dv1 + 5).to eq(Daru::Vector.new [6,7,8,9], name: :boozy, index: [:bud, :kf, :henie, :corona])
|
16
18
|
end
|
19
|
+
|
20
|
+
it "does not add when a number is being added" do
|
21
|
+
expect(@with_md1 + 1).to eq(Daru::Vector.new([2,3,4,nil,6,nil], name: :missing, index: [:a, :b, :c, :obi, :wan, :corona]))
|
22
|
+
end
|
23
|
+
|
24
|
+
it "puts a nil when one of the operands is nil" do
|
25
|
+
expect(@with_md1 + @with_md2).to eq(Daru::Vector.new([nil,7,nil,nil,nil,7], name: :missing, index: [:a, :b, :c, :corona, :obi, :wan]))
|
26
|
+
end
|
17
27
|
end
|
18
28
|
|
19
29
|
context "#-" do
|
20
30
|
it "subtracts matching indexes of the other vector" do
|
21
|
-
expect(@dv1 - @dv2).to eq(Daru::Vector.new([
|
31
|
+
expect(@dv1 - @dv2).to eq(Daru::Vector.new([nil,0,nil,-1,nil,nil], name: :boozy, index: [:bud,:corona,:henie,:kf,:obi,:wan]))
|
22
32
|
end
|
23
33
|
|
24
34
|
it "subtracts number from each element of the entire vector" do
|
@@ -26,7 +36,7 @@ describe Daru::Vector do
|
|
26
36
|
end
|
27
37
|
end
|
28
38
|
|
29
|
-
context "#*"
|
39
|
+
context "#*" do
|
30
40
|
it "multiplies matching indexes of the other vector" do
|
31
41
|
|
32
42
|
end
|
@@ -48,4 +58,35 @@ describe Daru::Vector do
|
|
48
58
|
|
49
59
|
context "#%" do
|
50
60
|
|
51
|
-
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context "#**" do
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
context "#exp" do
|
68
|
+
it "calculates exp of all numbers" do
|
69
|
+
expect(@with_md1.exp.round(3)).to eq(Daru::Vector.new([2.718281828459045,
|
70
|
+
7.38905609893065, 20.085536923187668, nil, 148.4131591025766, nil], index:
|
71
|
+
[:a, :b, :c, :obi, :wan, :corona], name: :missing).round(3))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context "#abs" do
|
76
|
+
it "calculates abs value" do
|
77
|
+
@with_md1.abs
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
context "#sqrt" do
|
82
|
+
it "calculates sqrt" do
|
83
|
+
@with_md1.sqrt
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context "#round" do
|
88
|
+
it "rounds to given precision" do
|
89
|
+
@with_md1.round(2)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -1,5 +1,95 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe Daru::DataFrame do
|
4
|
-
|
4
|
+
before do
|
5
|
+
@df = Daru::DataFrame.new({
|
6
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
|
7
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
8
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
9
|
+
d: [1,2,2,3,3,4,5,6,7],
|
10
|
+
e: [2,4,4,6,6,8,10,12,14],
|
11
|
+
f: [10,20,20,30,30,40,50,60,70]
|
12
|
+
})
|
13
|
+
end
|
14
|
+
|
15
|
+
context "#mean" do
|
16
|
+
it "calculates mean of single level numeric only vectors and returns values in a Vector" do
|
17
|
+
expect(@df.mean.round(2)).to eq(Daru::Vector.new([3.67, 7.33, 36.67],
|
18
|
+
index: [:d, :e, :f]
|
19
|
+
))
|
20
|
+
end
|
21
|
+
|
22
|
+
it "calculates mean of multi level numeric only vectors and returns values in a DataFrame" do
|
23
|
+
# TODO - pending
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
context "#std" do
|
28
|
+
it "calculates standard deviation of single leavel numeric only vectors and returns values in a Vector" do
|
29
|
+
expect(@df.std).to eq(Daru::Vector.new([2, 4, 20], index: [:d, :e, :f]))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "#sum" do
|
34
|
+
it "calculates sum of single level numeric only vectors and returns values in a Vector" do
|
35
|
+
# TODO - write tests
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context "#count" do
|
40
|
+
# TODO
|
41
|
+
end
|
42
|
+
|
43
|
+
context "#mode" do
|
44
|
+
# TODO
|
45
|
+
end
|
46
|
+
|
47
|
+
context "#median" do
|
48
|
+
# TODO
|
49
|
+
end
|
50
|
+
|
51
|
+
context "#max" do
|
52
|
+
# TODO
|
53
|
+
end
|
54
|
+
|
55
|
+
context "#min" do
|
56
|
+
# TODO
|
57
|
+
end
|
58
|
+
|
59
|
+
context "#product" do
|
60
|
+
# TODO
|
61
|
+
end
|
62
|
+
|
63
|
+
context "#describe" do
|
64
|
+
it "generates mean, std, max, min and count of numeric vectors in one shot" do
|
65
|
+
expect(@df.describe.round(2)).to eq(Daru::DataFrame.new({
|
66
|
+
d: [9.00, 3.67 ,2.00 , 1.00, 7.00],
|
67
|
+
e: [9.00, 7.33 ,4.00 , 2.00, 14.00],
|
68
|
+
f: [9.00, 36.67,20.00,10.00, 70.00]
|
69
|
+
}, index: [:count, :mean, :std, :min, :max]
|
70
|
+
))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context "#cov" do
|
75
|
+
it "calculates the variance covariance of the numeric vectors of DataFrame" do
|
76
|
+
expect(@df.cov).to eq(Daru::DataFrame.new({
|
77
|
+
d: [4,8,40],
|
78
|
+
e: [8,16,80],
|
79
|
+
f: [40,80,400]
|
80
|
+
}, index: [:d, :e, :f]
|
81
|
+
))
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
context "#corr", focus: true do
|
86
|
+
it "calculates the correlation between the numeric vectors of DataFrame" do
|
87
|
+
expect(@df.corr).to eq(Daru::DataFrame.new({
|
88
|
+
d: [1,1,1],
|
89
|
+
e: [1,1,1],
|
90
|
+
f: [1,1,1]
|
91
|
+
}, index: [:d, :e, :f]
|
92
|
+
))
|
93
|
+
end
|
94
|
+
end
|
5
95
|
end
|
@@ -1,39 +1,41 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe Daru::Vector do
|
4
|
-
[
|
4
|
+
[:array, :nmatrix].each do |dtype|
|
5
5
|
describe dtype do
|
6
6
|
before :each do
|
7
7
|
@dv = Daru::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2], dtype: dtype
|
8
|
+
@dv_with_md = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
|
8
9
|
end
|
9
10
|
|
10
11
|
context "#mean" do
|
11
12
|
it "calculates mean" do
|
12
13
|
expect(@dv.mean).to eq(282.2)
|
14
|
+
expect(@dv_with_md.mean).to eq(282.2)
|
13
15
|
end
|
14
16
|
end
|
15
17
|
|
16
18
|
context "#sum_of_squares" do
|
17
19
|
it "calcs sum of squares" do
|
18
|
-
|
20
|
+
@dv.sum_of_squares
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
22
24
|
context "#standard_deviation_sample" do
|
23
25
|
it "calcs standard deviation sample" do
|
24
|
-
|
26
|
+
@dv.standard_deviation_sample
|
25
27
|
end
|
26
28
|
end
|
27
29
|
|
28
30
|
context "#variance_sample" do
|
29
31
|
it "calculates sample variance" do
|
30
|
-
|
32
|
+
@dv.variance_sample
|
31
33
|
end
|
32
34
|
end
|
33
35
|
|
34
36
|
context "#standard_deviation_population" do
|
35
37
|
it "calculates standard deviation population" do
|
36
|
-
|
38
|
+
@dv.standard_deviation_population
|
37
39
|
end
|
38
40
|
end
|
39
41
|
|
@@ -124,7 +126,9 @@ describe Daru::Vector do
|
|
124
126
|
end
|
125
127
|
|
126
128
|
context "#proportion" do
|
127
|
-
|
129
|
+
it "calculates proportion" do
|
130
|
+
@dv.proportion
|
131
|
+
end
|
128
132
|
end
|
129
133
|
|
130
134
|
context "#proportions" do
|
@@ -140,7 +144,13 @@ describe Daru::Vector do
|
|
140
144
|
end
|
141
145
|
|
142
146
|
context "#count" do
|
147
|
+
it "counts specified element" do
|
148
|
+
@dv.count(323)
|
149
|
+
end
|
143
150
|
|
151
|
+
it "counts total number of elements" do
|
152
|
+
expect(@dv.count).to eq(10)
|
153
|
+
end
|
144
154
|
end
|
145
155
|
|
146
156
|
context "#coefficient_of_variation" do
|
@@ -148,6 +158,22 @@ describe Daru::Vector do
|
|
148
158
|
@dv.coefficient_of_variation
|
149
159
|
end
|
150
160
|
end
|
161
|
+
|
162
|
+
context "#factor" do
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
context "#median_absolute_deviation" do
|
167
|
+
it "calculates median_absolute_deviation" do
|
168
|
+
@dv.median_absolute_deviation
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
context "#standard_error" do
|
173
|
+
it "calculates standard error" do
|
174
|
+
@dv.standard_error
|
175
|
+
end
|
176
|
+
end
|
151
177
|
end
|
152
178
|
end
|
153
179
|
end
|
data/spec/monkeys_spec.rb
CHANGED
@@ -1,6 +1,15 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe "Monkeys" do
|
4
|
-
context
|
4
|
+
context Array do
|
5
|
+
end
|
6
|
+
|
7
|
+
context Matrix do
|
8
|
+
it "performs elementwise division" do
|
9
|
+
left = Matrix[[3,6,9],[4,8,12],[2,4,6]]
|
10
|
+
right = Matrix[[3,6,9],[4,8,12],[2,4,6]]
|
11
|
+
|
12
|
+
expect(left.elementwise_division(right)).to eq(Matrix[[1,1,1],[1,1,1],[1,1,1]])
|
13
|
+
end
|
5
14
|
end
|
6
15
|
end
|