daru 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +0 -0
  3. data/Gemfile +0 -1
  4. data/History.txt +35 -0
  5. data/README.md +178 -198
  6. data/daru.gemspec +5 -7
  7. data/lib/daru.rb +10 -2
  8. data/lib/daru/accessors/array_wrapper.rb +36 -198
  9. data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
  10. data/lib/daru/core/group_by.rb +183 -0
  11. data/lib/daru/dataframe.rb +615 -167
  12. data/lib/daru/index.rb +17 -16
  13. data/lib/daru/io/io.rb +5 -12
  14. data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
  15. data/lib/daru/maths/arithmetic/vector.rb +19 -6
  16. data/lib/daru/maths/statistics/dataframe.rb +103 -2
  17. data/lib/daru/maths/statistics/vector.rb +102 -61
  18. data/lib/daru/monkeys.rb +8 -0
  19. data/lib/daru/multi_index.rb +199 -0
  20. data/lib/daru/plotting/dataframe.rb +24 -24
  21. data/lib/daru/plotting/vector.rb +14 -15
  22. data/lib/daru/vector.rb +402 -98
  23. data/lib/version.rb +1 -1
  24. data/notebooks/grouping_splitting_pivots.ipynb +529 -0
  25. data/notebooks/intro_with_music_data_.ipynb +104 -119
  26. data/spec/accessors/wrappers_spec.rb +36 -0
  27. data/spec/core/group_by_spec.rb +331 -0
  28. data/spec/dataframe_spec.rb +1237 -475
  29. data/spec/fixtures/sales-funnel.csv +18 -0
  30. data/spec/index_spec.rb +10 -21
  31. data/spec/io/io_spec.rb +4 -14
  32. data/spec/math/arithmetic/dataframe_spec.rb +66 -0
  33. data/spec/math/arithmetic/vector_spec.rb +45 -4
  34. data/spec/math/statistics/dataframe_spec.rb +91 -1
  35. data/spec/math/statistics/vector_spec.rb +32 -6
  36. data/spec/monkeys_spec.rb +10 -1
  37. data/spec/multi_index_spec.rb +216 -0
  38. data/spec/spec_helper.rb +1 -0
  39. data/spec/vector_spec.rb +505 -57
  40. metadata +21 -15
@@ -0,0 +1,18 @@
1
+ Account,Name,Rep,Manager,Product,Quantity,Price,Status
2
+ 714466,Trantow-Barrows,Craig Booker,Debra Henley,CPU,1,30000,presented
3
+ 714466,Trantow-Barrows,Craig Booker,Debra Henley,Software,1,10000,presented
4
+ 714466,Trantow-Barrows,Craig Booker,Debra Henley,Maintenance,2,5000,pending
5
+ 737550,"Fritsch, Russel and Anderson",Craig Booker,Debra Henley,CPU,1,35000,declined
6
+ 146832,Kiehn-Spinka,Daniel Hilton,Debra Henley,CPU,2,65000,won
7
+ 218895,Kulas Inc,Daniel Hilton,Debra Henley,CPU,2,40000,pending
8
+ 218895,Kulas Inc,Daniel Hilton,Debra Henley,Software,1,10000,presented
9
+ 412290,Jerde-Hilpert,John Smith,Debra Henley,Maintenance,2,5000,pending
10
+ 740150,Barton LLC,John Smith,Debra Henley,CPU,1,35000,declined
11
+ 141962,Herman LLC,Cedric Moss,Fred Anderson,CPU,2,65000,won
12
+ 163416,Purdy-Kunde,Cedric Moss,Fred Anderson,CPU,1,30000,presented
13
+ 239344,Stokes LLC,Cedric Moss,Fred Anderson,Maintenance,1,5000,pending
14
+ 239344,Stokes LLC,Cedric Moss,Fred Anderson,Software,1,10000,presented
15
+ 307599,"Kassulke, Ondricka and Metz",Wendy Yule,Fred Anderson,Maintenance,3,7000,won
16
+ 688981,Keeling LLC,Wendy Yule,Fred Anderson,CPU,5,100000,won
17
+ 729833,Koepp Ltd,Wendy Yule,Fred Anderson,CPU,2,65000,declined
18
+ 729833,Koepp Ltd,Wendy Yule,Fred Anderson,Monitor,2,5000,presented
@@ -17,25 +17,6 @@ describe Daru::Index do
17
17
  end
18
18
  end
19
19
 
20
- context "#re_index" do
21
- before :each do
22
- @old = Daru::Index.new [:bob, :fisher, :zakir]
23
- end
24
- it "returns a new index object" do
25
- n = @old.re_index(@old + [:john, :shrinivas])
26
-
27
- expect(n.object_id).not_to eq(@old.object_id)
28
- expect(n.to_a).to eq([:bob, :fisher, :zakir, :john, :shrinivas])
29
- end
30
-
31
- it "does not over-ride existing indexes" do
32
- n = @old.re_index(@old + :bob)
33
-
34
- expect(n.object_id).not_to eq(@old.object_id)
35
- expect(n.to_a) .to eq([:bob, :fisher, :zakir])
36
- end
37
- end
38
-
39
20
  context "#+" do
40
21
  before :each do
41
22
  @left = Daru::Index.new [:miles, :geddy, :eric]
@@ -53,10 +34,18 @@ describe Daru::Index do
53
34
  end
54
35
 
55
36
  context "#[]" do
37
+ before do
38
+ @id = Daru::Index.new [:one, :two, :three, :four, :five, :six, :seven]
39
+ end
40
+
56
41
  it "works with ranges" do
57
- id = Daru::Index.new [:one, :two, :three, :four, :five, :six, :seven]
42
+ expect(@id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five],
43
+ [1,2,3,4]))
44
+ end
58
45
 
59
- expect(id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five]))
46
+ it "returns multiple keys if specified multiple indices" do
47
+ expect(@id[[0,1,3,4]]).to eq(Daru::Index.new([:one, :two, :four, :five],
48
+ [0,1,3,4]))
60
49
  end
61
50
  end
62
51
  end
@@ -4,21 +4,11 @@ describe Daru::DataFrame do
4
4
  context ".from_csv" do
5
5
  it "loads from a CSV file" do
6
6
  df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
7
- col_sep: ' ', headers: true) do |csv|
8
- csv.convert do |field, info|
9
- case info[:header]
10
- when :true_transform
11
- field.split(',').map { |s| s.to_f }
12
- else
13
- field
14
- end
15
- end
16
- end
17
-
18
-
19
- expect(df.vectors).to eq([:image_resolution, :true_transform, :mls].to_index)
7
+ col_sep: ' ', headers: true)
8
+
9
+ expect(df.vectors).to eq([:image_resolution, :mls, :true_transform].to_index)
20
10
  expect(df.vector[:image_resolution].first).to eq(6.55779)
21
- expect(df.vector[:true_transform].first[15]).to eq(1.0)
11
+ expect(df.vector[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
22
12
  end
23
13
  end
24
14
 
@@ -1,5 +1,71 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::DataFrame do
4
+ before(:each) do
5
+ @df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a','e','i','o','u'],
6
+ c: [10,20,30,40,50]})
7
+ @left = Daru::DataFrame.new({a: [1,nil,nil,4], b: [10,nil,nil,40], c: [5,6,7,8]},
8
+ index: [0,4,5,3])
9
+ @right = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,20,30,40,50]},
10
+ index: [0,1,2,3,6])
11
+ end
4
12
 
13
+ context "#+" do
14
+ it "adds a number to all numeric vectors" do
15
+ expect(@df + 2).to eq(Daru::DataFrame.new({a: [3,4,5,6,7], b: ['a','e','i','o','u'],
16
+ c: [12,22,32,42,52] }))
17
+ end
18
+
19
+ it "adds two dataframes to produce a third" do
20
+ expect(@left + @right).to eq(Daru::DataFrame.new({a: [2,nil,nil,8,nil,nil,nil],
21
+ b: [20,nil,nil,80,nil,nil,nil], c: [nil,nil,nil,nil,nil,nil]}, index:
22
+ [0,1,2,3,4,5,6]))
23
+ end
24
+ end
25
+
26
+ context "#-" do
27
+ it "subtracts a number from all numeric vectors" do
28
+ expect(@df - 2).to eq(Daru::DataFrame.new({a: [-1,0,1,2,3], b: ['a','e','i','o','u'],
29
+ c: [8,18,28,38,48]}))
30
+ end
31
+
32
+ it "subtracts a data frame from another" do
33
+
34
+ end
35
+ end
36
+
37
+ context "#*" do
38
+ it "multiplies a number with a DataFrame" do
39
+ end
40
+ end
41
+
42
+ context "#/" do
43
+
44
+ end
45
+
46
+ context "#%" do
47
+
48
+ end
49
+
50
+ context "#**" do
51
+
52
+ end
53
+
54
+ context "#sqrt" do
55
+ it "calculates sqrt" do
56
+ @df.sqrt
57
+ end
58
+ end
59
+
60
+ context "#round" do
61
+ it "rounds to precision" do
62
+ @df.round
63
+ end
64
+ end
65
+
66
+ context "#exp" do
67
+ it "calculates exponential" do
68
+ @df.exp
69
+ end
70
+ end
5
71
  end
@@ -4,21 +4,31 @@ describe Daru::Vector do
4
4
  before :each do
5
5
  @dv1 = Daru::Vector.new [1,2,3,4], name: :boozy, index: [:bud, :kf, :henie, :corona]
6
6
  @dv2 = Daru::Vector.new [1,2,3,4], name: :mayer, index: [:obi, :wan, :kf, :corona]
7
+ @with_md1 = Daru::Vector.new [1,2,3,nil,5,nil], name: :missing, index: [:a, :b, :c, :obi, :wan, :corona]
8
+ @with_md2 = Daru::Vector.new [1,2,3,nil,5,nil], name: :missing, index: [:obi, :wan, :corona, :a, :b, :c]
7
9
  end
8
10
 
9
11
  context "#+" do
10
12
  it "adds matching indexes of the other vector" do
11
- expect(@dv1 + @dv2).to eq(Daru::Vector.new([5, 8], name: :boozy, index: [:kf, :corona]))
13
+ expect(@dv1 + @dv2).to eq(Daru::Vector.new([nil,8,nil,5,nil,nil], name: :boozy, index: [:bud,:corona,:henie,:kf,:obi,:wan]))
12
14
  end
13
15
 
14
16
  it "adds number to each element of the entire vector" do
15
17
  expect(@dv1 + 5).to eq(Daru::Vector.new [6,7,8,9], name: :boozy, index: [:bud, :kf, :henie, :corona])
16
18
  end
19
+
20
+ it "does not add when a number is being added" do
21
+ expect(@with_md1 + 1).to eq(Daru::Vector.new([2,3,4,nil,6,nil], name: :missing, index: [:a, :b, :c, :obi, :wan, :corona]))
22
+ end
23
+
24
+ it "puts a nil when one of the operands is nil" do
25
+ expect(@with_md1 + @with_md2).to eq(Daru::Vector.new([nil,7,nil,nil,nil,7], name: :missing, index: [:a, :b, :c, :corona, :obi, :wan]))
26
+ end
17
27
  end
18
28
 
19
29
  context "#-" do
20
30
  it "subtracts matching indexes of the other vector" do
21
- expect(@dv1 - @dv2).to eq(Daru::Vector.new([-1,0], name: :boozy, index: [:kf, :corona]))
31
+ expect(@dv1 - @dv2).to eq(Daru::Vector.new([nil,0,nil,-1,nil,nil], name: :boozy, index: [:bud,:corona,:henie,:kf,:obi,:wan]))
22
32
  end
23
33
 
24
34
  it "subtracts number from each element of the entire vector" do
@@ -26,7 +36,7 @@ describe Daru::Vector do
26
36
  end
27
37
  end
28
38
 
29
- context "#*"
39
+ context "#*" do
30
40
  it "multiplies matching indexes of the other vector" do
31
41
 
32
42
  end
@@ -48,4 +58,35 @@ describe Daru::Vector do
48
58
 
49
59
  context "#%" do
50
60
 
51
- end
61
+ end
62
+
63
+ context "#**" do
64
+
65
+ end
66
+
67
+ context "#exp" do
68
+ it "calculates exp of all numbers" do
69
+ expect(@with_md1.exp.round(3)).to eq(Daru::Vector.new([2.718281828459045,
70
+ 7.38905609893065, 20.085536923187668, nil, 148.4131591025766, nil], index:
71
+ [:a, :b, :c, :obi, :wan, :corona], name: :missing).round(3))
72
+ end
73
+ end
74
+
75
+ context "#abs" do
76
+ it "calculates abs value" do
77
+ @with_md1.abs
78
+ end
79
+ end
80
+
81
+ context "#sqrt" do
82
+ it "calculates sqrt" do
83
+ @with_md1.sqrt
84
+ end
85
+ end
86
+
87
+ context "#round" do
88
+ it "rounds to given precision" do
89
+ @with_md1.round(2)
90
+ end
91
+ end
92
+ end
@@ -1,5 +1,95 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::DataFrame do
4
-
4
+ before do
5
+ @df = Daru::DataFrame.new({
6
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
7
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
8
+ c: ['small','large','large','small','small','large','small','large','small'],
9
+ d: [1,2,2,3,3,4,5,6,7],
10
+ e: [2,4,4,6,6,8,10,12,14],
11
+ f: [10,20,20,30,30,40,50,60,70]
12
+ })
13
+ end
14
+
15
+ context "#mean" do
16
+ it "calculates mean of single level numeric only vectors and returns values in a Vector" do
17
+ expect(@df.mean.round(2)).to eq(Daru::Vector.new([3.67, 7.33, 36.67],
18
+ index: [:d, :e, :f]
19
+ ))
20
+ end
21
+
22
+ it "calculates mean of multi level numeric only vectors and returns values in a DataFrame" do
23
+ # TODO - pending
24
+ end
25
+ end
26
+
27
+ context "#std" do
28
+ it "calculates standard deviation of single leavel numeric only vectors and returns values in a Vector" do
29
+ expect(@df.std).to eq(Daru::Vector.new([2, 4, 20], index: [:d, :e, :f]))
30
+ end
31
+ end
32
+
33
+ context "#sum" do
34
+ it "calculates sum of single level numeric only vectors and returns values in a Vector" do
35
+ # TODO - write tests
36
+ end
37
+ end
38
+
39
+ context "#count" do
40
+ # TODO
41
+ end
42
+
43
+ context "#mode" do
44
+ # TODO
45
+ end
46
+
47
+ context "#median" do
48
+ # TODO
49
+ end
50
+
51
+ context "#max" do
52
+ # TODO
53
+ end
54
+
55
+ context "#min" do
56
+ # TODO
57
+ end
58
+
59
+ context "#product" do
60
+ # TODO
61
+ end
62
+
63
+ context "#describe" do
64
+ it "generates mean, std, max, min and count of numeric vectors in one shot" do
65
+ expect(@df.describe.round(2)).to eq(Daru::DataFrame.new({
66
+ d: [9.00, 3.67 ,2.00 , 1.00, 7.00],
67
+ e: [9.00, 7.33 ,4.00 , 2.00, 14.00],
68
+ f: [9.00, 36.67,20.00,10.00, 70.00]
69
+ }, index: [:count, :mean, :std, :min, :max]
70
+ ))
71
+ end
72
+ end
73
+
74
+ context "#cov" do
75
+ it "calculates the variance covariance of the numeric vectors of DataFrame" do
76
+ expect(@df.cov).to eq(Daru::DataFrame.new({
77
+ d: [4,8,40],
78
+ e: [8,16,80],
79
+ f: [40,80,400]
80
+ }, index: [:d, :e, :f]
81
+ ))
82
+ end
83
+ end
84
+
85
+ context "#corr", focus: true do
86
+ it "calculates the correlation between the numeric vectors of DataFrame" do
87
+ expect(@df.corr).to eq(Daru::DataFrame.new({
88
+ d: [1,1,1],
89
+ e: [1,1,1],
90
+ f: [1,1,1]
91
+ }, index: [:d, :e, :f]
92
+ ))
93
+ end
94
+ end
5
95
  end
@@ -1,39 +1,41 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::Vector do
4
- [NMatrix, Array].each do |dtype|
4
+ [:array, :nmatrix].each do |dtype|
5
5
  describe dtype do
6
6
  before :each do
7
7
  @dv = Daru::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2], dtype: dtype
8
+ @dv_with_md = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
8
9
  end
9
10
 
10
11
  context "#mean" do
11
12
  it "calculates mean" do
12
13
  expect(@dv.mean).to eq(282.2)
14
+ expect(@dv_with_md.mean).to eq(282.2)
13
15
  end
14
16
  end
15
17
 
16
18
  context "#sum_of_squares" do
17
19
  it "calcs sum of squares" do
18
-
20
+ @dv.sum_of_squares
19
21
  end
20
22
  end
21
23
 
22
24
  context "#standard_deviation_sample" do
23
25
  it "calcs standard deviation sample" do
24
-
26
+ @dv.standard_deviation_sample
25
27
  end
26
28
  end
27
29
 
28
30
  context "#variance_sample" do
29
31
  it "calculates sample variance" do
30
-
32
+ @dv.variance_sample
31
33
  end
32
34
  end
33
35
 
34
36
  context "#standard_deviation_population" do
35
37
  it "calculates standard deviation population" do
36
-
38
+ @dv.standard_deviation_population
37
39
  end
38
40
  end
39
41
 
@@ -124,7 +126,9 @@ describe Daru::Vector do
124
126
  end
125
127
 
126
128
  context "#proportion" do
127
-
129
+ it "calculates proportion" do
130
+ @dv.proportion
131
+ end
128
132
  end
129
133
 
130
134
  context "#proportions" do
@@ -140,7 +144,13 @@ describe Daru::Vector do
140
144
  end
141
145
 
142
146
  context "#count" do
147
+ it "counts specified element" do
148
+ @dv.count(323)
149
+ end
143
150
 
151
+ it "counts total number of elements" do
152
+ expect(@dv.count).to eq(10)
153
+ end
144
154
  end
145
155
 
146
156
  context "#coefficient_of_variation" do
@@ -148,6 +158,22 @@ describe Daru::Vector do
148
158
  @dv.coefficient_of_variation
149
159
  end
150
160
  end
161
+
162
+ context "#factor" do
163
+
164
+ end
165
+
166
+ context "#median_absolute_deviation" do
167
+ it "calculates median_absolute_deviation" do
168
+ @dv.median_absolute_deviation
169
+ end
170
+ end
171
+
172
+ context "#standard_error" do
173
+ it "calculates standard error" do
174
+ @dv.standard_error
175
+ end
176
+ end
151
177
  end
152
178
  end
153
179
  end
@@ -1,6 +1,15 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe "Monkeys" do
4
- context "Array" do
4
+ context Array do
5
+ end
6
+
7
+ context Matrix do
8
+ it "performs elementwise division" do
9
+ left = Matrix[[3,6,9],[4,8,12],[2,4,6]]
10
+ right = Matrix[[3,6,9],[4,8,12],[2,4,6]]
11
+
12
+ expect(left.elementwise_division(right)).to eq(Matrix[[1,1,1],[1,1,1],[1,1,1]])
13
+ end
5
14
  end
6
15
  end