daru 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +0 -0
  3. data/Gemfile +0 -1
  4. data/History.txt +35 -0
  5. data/README.md +178 -198
  6. data/daru.gemspec +5 -7
  7. data/lib/daru.rb +10 -2
  8. data/lib/daru/accessors/array_wrapper.rb +36 -198
  9. data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
  10. data/lib/daru/core/group_by.rb +183 -0
  11. data/lib/daru/dataframe.rb +615 -167
  12. data/lib/daru/index.rb +17 -16
  13. data/lib/daru/io/io.rb +5 -12
  14. data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
  15. data/lib/daru/maths/arithmetic/vector.rb +19 -6
  16. data/lib/daru/maths/statistics/dataframe.rb +103 -2
  17. data/lib/daru/maths/statistics/vector.rb +102 -61
  18. data/lib/daru/monkeys.rb +8 -0
  19. data/lib/daru/multi_index.rb +199 -0
  20. data/lib/daru/plotting/dataframe.rb +24 -24
  21. data/lib/daru/plotting/vector.rb +14 -15
  22. data/lib/daru/vector.rb +402 -98
  23. data/lib/version.rb +1 -1
  24. data/notebooks/grouping_splitting_pivots.ipynb +529 -0
  25. data/notebooks/intro_with_music_data_.ipynb +104 -119
  26. data/spec/accessors/wrappers_spec.rb +36 -0
  27. data/spec/core/group_by_spec.rb +331 -0
  28. data/spec/dataframe_spec.rb +1237 -475
  29. data/spec/fixtures/sales-funnel.csv +18 -0
  30. data/spec/index_spec.rb +10 -21
  31. data/spec/io/io_spec.rb +4 -14
  32. data/spec/math/arithmetic/dataframe_spec.rb +66 -0
  33. data/spec/math/arithmetic/vector_spec.rb +45 -4
  34. data/spec/math/statistics/dataframe_spec.rb +91 -1
  35. data/spec/math/statistics/vector_spec.rb +32 -6
  36. data/spec/monkeys_spec.rb +10 -1
  37. data/spec/multi_index_spec.rb +216 -0
  38. data/spec/spec_helper.rb +1 -0
  39. data/spec/vector_spec.rb +505 -57
  40. metadata +21 -15
@@ -0,0 +1,18 @@
1
+ Account,Name,Rep,Manager,Product,Quantity,Price,Status
2
+ 714466,Trantow-Barrows,Craig Booker,Debra Henley,CPU,1,30000,presented
3
+ 714466,Trantow-Barrows,Craig Booker,Debra Henley,Software,1,10000,presented
4
+ 714466,Trantow-Barrows,Craig Booker,Debra Henley,Maintenance,2,5000,pending
5
+ 737550,"Fritsch, Russel and Anderson",Craig Booker,Debra Henley,CPU,1,35000,declined
6
+ 146832,Kiehn-Spinka,Daniel Hilton,Debra Henley,CPU,2,65000,won
7
+ 218895,Kulas Inc,Daniel Hilton,Debra Henley,CPU,2,40000,pending
8
+ 218895,Kulas Inc,Daniel Hilton,Debra Henley,Software,1,10000,presented
9
+ 412290,Jerde-Hilpert,John Smith,Debra Henley,Maintenance,2,5000,pending
10
+ 740150,Barton LLC,John Smith,Debra Henley,CPU,1,35000,declined
11
+ 141962,Herman LLC,Cedric Moss,Fred Anderson,CPU,2,65000,won
12
+ 163416,Purdy-Kunde,Cedric Moss,Fred Anderson,CPU,1,30000,presented
13
+ 239344,Stokes LLC,Cedric Moss,Fred Anderson,Maintenance,1,5000,pending
14
+ 239344,Stokes LLC,Cedric Moss,Fred Anderson,Software,1,10000,presented
15
+ 307599,"Kassulke, Ondricka and Metz",Wendy Yule,Fred Anderson,Maintenance,3,7000,won
16
+ 688981,Keeling LLC,Wendy Yule,Fred Anderson,CPU,5,100000,won
17
+ 729833,Koepp Ltd,Wendy Yule,Fred Anderson,CPU,2,65000,declined
18
+ 729833,Koepp Ltd,Wendy Yule,Fred Anderson,Monitor,2,5000,presented
@@ -17,25 +17,6 @@ describe Daru::Index do
17
17
  end
18
18
  end
19
19
 
20
- context "#re_index" do
21
- before :each do
22
- @old = Daru::Index.new [:bob, :fisher, :zakir]
23
- end
24
- it "returns a new index object" do
25
- n = @old.re_index(@old + [:john, :shrinivas])
26
-
27
- expect(n.object_id).not_to eq(@old.object_id)
28
- expect(n.to_a).to eq([:bob, :fisher, :zakir, :john, :shrinivas])
29
- end
30
-
31
- it "does not over-ride existing indexes" do
32
- n = @old.re_index(@old + :bob)
33
-
34
- expect(n.object_id).not_to eq(@old.object_id)
35
- expect(n.to_a) .to eq([:bob, :fisher, :zakir])
36
- end
37
- end
38
-
39
20
  context "#+" do
40
21
  before :each do
41
22
  @left = Daru::Index.new [:miles, :geddy, :eric]
@@ -53,10 +34,18 @@ describe Daru::Index do
53
34
  end
54
35
 
55
36
  context "#[]" do
37
+ before do
38
+ @id = Daru::Index.new [:one, :two, :three, :four, :five, :six, :seven]
39
+ end
40
+
56
41
  it "works with ranges" do
57
- id = Daru::Index.new [:one, :two, :three, :four, :five, :six, :seven]
42
+ expect(@id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five],
43
+ [1,2,3,4]))
44
+ end
58
45
 
59
- expect(id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five]))
46
+ it "returns multiple keys if specified multiple indices" do
47
+ expect(@id[[0,1,3,4]]).to eq(Daru::Index.new([:one, :two, :four, :five],
48
+ [0,1,3,4]))
60
49
  end
61
50
  end
62
51
  end
@@ -4,21 +4,11 @@ describe Daru::DataFrame do
4
4
  context ".from_csv" do
5
5
  it "loads from a CSV file" do
6
6
  df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
7
- col_sep: ' ', headers: true) do |csv|
8
- csv.convert do |field, info|
9
- case info[:header]
10
- when :true_transform
11
- field.split(',').map { |s| s.to_f }
12
- else
13
- field
14
- end
15
- end
16
- end
17
-
18
-
19
- expect(df.vectors).to eq([:image_resolution, :true_transform, :mls].to_index)
7
+ col_sep: ' ', headers: true)
8
+
9
+ expect(df.vectors).to eq([:image_resolution, :mls, :true_transform].to_index)
20
10
  expect(df.vector[:image_resolution].first).to eq(6.55779)
21
- expect(df.vector[:true_transform].first[15]).to eq(1.0)
11
+ expect(df.vector[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
22
12
  end
23
13
  end
24
14
 
@@ -1,5 +1,71 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::DataFrame do
4
+ before(:each) do
5
+ @df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a','e','i','o','u'],
6
+ c: [10,20,30,40,50]})
7
+ @left = Daru::DataFrame.new({a: [1,nil,nil,4], b: [10,nil,nil,40], c: [5,6,7,8]},
8
+ index: [0,4,5,3])
9
+ @right = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,20,30,40,50]},
10
+ index: [0,1,2,3,6])
11
+ end
4
12
 
13
+ context "#+" do
14
+ it "adds a number to all numeric vectors" do
15
+ expect(@df + 2).to eq(Daru::DataFrame.new({a: [3,4,5,6,7], b: ['a','e','i','o','u'],
16
+ c: [12,22,32,42,52] }))
17
+ end
18
+
19
+ it "adds two dataframes to produce a third" do
20
+ expect(@left + @right).to eq(Daru::DataFrame.new({a: [2,nil,nil,8,nil,nil,nil],
21
+ b: [20,nil,nil,80,nil,nil,nil], c: [nil,nil,nil,nil,nil,nil]}, index:
22
+ [0,1,2,3,4,5,6]))
23
+ end
24
+ end
25
+
26
+ context "#-" do
27
+ it "subtracts a number from all numeric vectors" do
28
+ expect(@df - 2).to eq(Daru::DataFrame.new({a: [-1,0,1,2,3], b: ['a','e','i','o','u'],
29
+ c: [8,18,28,38,48]}))
30
+ end
31
+
32
+ it "subtracts a data frame from another" do
33
+
34
+ end
35
+ end
36
+
37
+ context "#*" do
38
+ it "multiplies a number with a DataFrame" do
39
+ end
40
+ end
41
+
42
+ context "#/" do
43
+
44
+ end
45
+
46
+ context "#%" do
47
+
48
+ end
49
+
50
+ context "#**" do
51
+
52
+ end
53
+
54
+ context "#sqrt" do
55
+ it "calculates sqrt" do
56
+ @df.sqrt
57
+ end
58
+ end
59
+
60
+ context "#round" do
61
+ it "rounds to precision" do
62
+ @df.round
63
+ end
64
+ end
65
+
66
+ context "#exp" do
67
+ it "calculates exponential" do
68
+ @df.exp
69
+ end
70
+ end
5
71
  end
@@ -4,21 +4,31 @@ describe Daru::Vector do
4
4
  before :each do
5
5
  @dv1 = Daru::Vector.new [1,2,3,4], name: :boozy, index: [:bud, :kf, :henie, :corona]
6
6
  @dv2 = Daru::Vector.new [1,2,3,4], name: :mayer, index: [:obi, :wan, :kf, :corona]
7
+ @with_md1 = Daru::Vector.new [1,2,3,nil,5,nil], name: :missing, index: [:a, :b, :c, :obi, :wan, :corona]
8
+ @with_md2 = Daru::Vector.new [1,2,3,nil,5,nil], name: :missing, index: [:obi, :wan, :corona, :a, :b, :c]
7
9
  end
8
10
 
9
11
  context "#+" do
10
12
  it "adds matching indexes of the other vector" do
11
- expect(@dv1 + @dv2).to eq(Daru::Vector.new([5, 8], name: :boozy, index: [:kf, :corona]))
13
+ expect(@dv1 + @dv2).to eq(Daru::Vector.new([nil,8,nil,5,nil,nil], name: :boozy, index: [:bud,:corona,:henie,:kf,:obi,:wan]))
12
14
  end
13
15
 
14
16
  it "adds number to each element of the entire vector" do
15
17
  expect(@dv1 + 5).to eq(Daru::Vector.new [6,7,8,9], name: :boozy, index: [:bud, :kf, :henie, :corona])
16
18
  end
19
+
20
+ it "does not add when a number is being added" do
21
+ expect(@with_md1 + 1).to eq(Daru::Vector.new([2,3,4,nil,6,nil], name: :missing, index: [:a, :b, :c, :obi, :wan, :corona]))
22
+ end
23
+
24
+ it "puts a nil when one of the operands is nil" do
25
+ expect(@with_md1 + @with_md2).to eq(Daru::Vector.new([nil,7,nil,nil,nil,7], name: :missing, index: [:a, :b, :c, :corona, :obi, :wan]))
26
+ end
17
27
  end
18
28
 
19
29
  context "#-" do
20
30
  it "subtracts matching indexes of the other vector" do
21
- expect(@dv1 - @dv2).to eq(Daru::Vector.new([-1,0], name: :boozy, index: [:kf, :corona]))
31
+ expect(@dv1 - @dv2).to eq(Daru::Vector.new([nil,0,nil,-1,nil,nil], name: :boozy, index: [:bud,:corona,:henie,:kf,:obi,:wan]))
22
32
  end
23
33
 
24
34
  it "subtracts number from each element of the entire vector" do
@@ -26,7 +36,7 @@ describe Daru::Vector do
26
36
  end
27
37
  end
28
38
 
29
- context "#*"
39
+ context "#*" do
30
40
  it "multiplies matching indexes of the other vector" do
31
41
 
32
42
  end
@@ -48,4 +58,35 @@ describe Daru::Vector do
48
58
 
49
59
  context "#%" do
50
60
 
51
- end
61
+ end
62
+
63
+ context "#**" do
64
+
65
+ end
66
+
67
+ context "#exp" do
68
+ it "calculates exp of all numbers" do
69
+ expect(@with_md1.exp.round(3)).to eq(Daru::Vector.new([2.718281828459045,
70
+ 7.38905609893065, 20.085536923187668, nil, 148.4131591025766, nil], index:
71
+ [:a, :b, :c, :obi, :wan, :corona], name: :missing).round(3))
72
+ end
73
+ end
74
+
75
+ context "#abs" do
76
+ it "calculates abs value" do
77
+ @with_md1.abs
78
+ end
79
+ end
80
+
81
+ context "#sqrt" do
82
+ it "calculates sqrt" do
83
+ @with_md1.sqrt
84
+ end
85
+ end
86
+
87
+ context "#round" do
88
+ it "rounds to given precision" do
89
+ @with_md1.round(2)
90
+ end
91
+ end
92
+ end
@@ -1,5 +1,95 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::DataFrame do
4
-
4
+ before do
5
+ @df = Daru::DataFrame.new({
6
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
7
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
8
+ c: ['small','large','large','small','small','large','small','large','small'],
9
+ d: [1,2,2,3,3,4,5,6,7],
10
+ e: [2,4,4,6,6,8,10,12,14],
11
+ f: [10,20,20,30,30,40,50,60,70]
12
+ })
13
+ end
14
+
15
+ context "#mean" do
16
+ it "calculates mean of single level numeric only vectors and returns values in a Vector" do
17
+ expect(@df.mean.round(2)).to eq(Daru::Vector.new([3.67, 7.33, 36.67],
18
+ index: [:d, :e, :f]
19
+ ))
20
+ end
21
+
22
+ it "calculates mean of multi level numeric only vectors and returns values in a DataFrame" do
23
+ # TODO - pending
24
+ end
25
+ end
26
+
27
+ context "#std" do
28
+ it "calculates standard deviation of single leavel numeric only vectors and returns values in a Vector" do
29
+ expect(@df.std).to eq(Daru::Vector.new([2, 4, 20], index: [:d, :e, :f]))
30
+ end
31
+ end
32
+
33
+ context "#sum" do
34
+ it "calculates sum of single level numeric only vectors and returns values in a Vector" do
35
+ # TODO - write tests
36
+ end
37
+ end
38
+
39
+ context "#count" do
40
+ # TODO
41
+ end
42
+
43
+ context "#mode" do
44
+ # TODO
45
+ end
46
+
47
+ context "#median" do
48
+ # TODO
49
+ end
50
+
51
+ context "#max" do
52
+ # TODO
53
+ end
54
+
55
+ context "#min" do
56
+ # TODO
57
+ end
58
+
59
+ context "#product" do
60
+ # TODO
61
+ end
62
+
63
+ context "#describe" do
64
+ it "generates mean, std, max, min and count of numeric vectors in one shot" do
65
+ expect(@df.describe.round(2)).to eq(Daru::DataFrame.new({
66
+ d: [9.00, 3.67 ,2.00 , 1.00, 7.00],
67
+ e: [9.00, 7.33 ,4.00 , 2.00, 14.00],
68
+ f: [9.00, 36.67,20.00,10.00, 70.00]
69
+ }, index: [:count, :mean, :std, :min, :max]
70
+ ))
71
+ end
72
+ end
73
+
74
+ context "#cov" do
75
+ it "calculates the variance covariance of the numeric vectors of DataFrame" do
76
+ expect(@df.cov).to eq(Daru::DataFrame.new({
77
+ d: [4,8,40],
78
+ e: [8,16,80],
79
+ f: [40,80,400]
80
+ }, index: [:d, :e, :f]
81
+ ))
82
+ end
83
+ end
84
+
85
+ context "#corr", focus: true do
86
+ it "calculates the correlation between the numeric vectors of DataFrame" do
87
+ expect(@df.corr).to eq(Daru::DataFrame.new({
88
+ d: [1,1,1],
89
+ e: [1,1,1],
90
+ f: [1,1,1]
91
+ }, index: [:d, :e, :f]
92
+ ))
93
+ end
94
+ end
5
95
  end
@@ -1,39 +1,41 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::Vector do
4
- [NMatrix, Array].each do |dtype|
4
+ [:array, :nmatrix].each do |dtype|
5
5
  describe dtype do
6
6
  before :each do
7
7
  @dv = Daru::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2], dtype: dtype
8
+ @dv_with_md = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
8
9
  end
9
10
 
10
11
  context "#mean" do
11
12
  it "calculates mean" do
12
13
  expect(@dv.mean).to eq(282.2)
14
+ expect(@dv_with_md.mean).to eq(282.2)
13
15
  end
14
16
  end
15
17
 
16
18
  context "#sum_of_squares" do
17
19
  it "calcs sum of squares" do
18
-
20
+ @dv.sum_of_squares
19
21
  end
20
22
  end
21
23
 
22
24
  context "#standard_deviation_sample" do
23
25
  it "calcs standard deviation sample" do
24
-
26
+ @dv.standard_deviation_sample
25
27
  end
26
28
  end
27
29
 
28
30
  context "#variance_sample" do
29
31
  it "calculates sample variance" do
30
-
32
+ @dv.variance_sample
31
33
  end
32
34
  end
33
35
 
34
36
  context "#standard_deviation_population" do
35
37
  it "calculates standard deviation population" do
36
-
38
+ @dv.standard_deviation_population
37
39
  end
38
40
  end
39
41
 
@@ -124,7 +126,9 @@ describe Daru::Vector do
124
126
  end
125
127
 
126
128
  context "#proportion" do
127
-
129
+ it "calculates proportion" do
130
+ @dv.proportion
131
+ end
128
132
  end
129
133
 
130
134
  context "#proportions" do
@@ -140,7 +144,13 @@ describe Daru::Vector do
140
144
  end
141
145
 
142
146
  context "#count" do
147
+ it "counts specified element" do
148
+ @dv.count(323)
149
+ end
143
150
 
151
+ it "counts total number of elements" do
152
+ expect(@dv.count).to eq(10)
153
+ end
144
154
  end
145
155
 
146
156
  context "#coefficient_of_variation" do
@@ -148,6 +158,22 @@ describe Daru::Vector do
148
158
  @dv.coefficient_of_variation
149
159
  end
150
160
  end
161
+
162
+ context "#factor" do
163
+
164
+ end
165
+
166
+ context "#median_absolute_deviation" do
167
+ it "calculates median_absolute_deviation" do
168
+ @dv.median_absolute_deviation
169
+ end
170
+ end
171
+
172
+ context "#standard_error" do
173
+ it "calculates standard error" do
174
+ @dv.standard_error
175
+ end
176
+ end
151
177
  end
152
178
  end
153
179
  end
@@ -1,6 +1,15 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe "Monkeys" do
4
- context "Array" do
4
+ context Array do
5
+ end
6
+
7
+ context Matrix do
8
+ it "performs elementwise division" do
9
+ left = Matrix[[3,6,9],[4,8,12],[2,4,6]]
10
+ right = Matrix[[3,6,9],[4,8,12],[2,4,6]]
11
+
12
+ expect(left.elementwise_division(right)).to eq(Matrix[[1,1,1],[1,1,1],[1,1,1]])
13
+ end
5
14
  end
6
15
  end