daru 0.0.5 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +14 -0
  3. data/.travis.yml +26 -4
  4. data/CONTRIBUTING.md +31 -0
  5. data/Gemfile +1 -2
  6. data/{History.txt → History.md} +110 -44
  7. data/README.md +21 -288
  8. data/Rakefile +1 -0
  9. data/daru.gemspec +12 -8
  10. data/lib/daru.rb +36 -1
  11. data/lib/daru/accessors/array_wrapper.rb +8 -3
  12. data/lib/daru/accessors/gsl_wrapper.rb +113 -0
  13. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
  14. data/lib/daru/core/group_by.rb +0 -1
  15. data/lib/daru/dataframe.rb +1192 -83
  16. data/lib/daru/extensions/rserve.rb +21 -0
  17. data/lib/daru/index.rb +14 -0
  18. data/lib/daru/io/io.rb +170 -8
  19. data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
  20. data/lib/daru/maths/arithmetic/vector.rb +4 -4
  21. data/lib/daru/maths/statistics/dataframe.rb +48 -27
  22. data/lib/daru/maths/statistics/vector.rb +215 -33
  23. data/lib/daru/monkeys.rb +53 -7
  24. data/lib/daru/multi_index.rb +21 -4
  25. data/lib/daru/plotting/dataframe.rb +83 -25
  26. data/lib/daru/plotting/vector.rb +9 -10
  27. data/lib/daru/vector.rb +596 -61
  28. data/lib/daru/version.rb +3 -0
  29. data/spec/accessors/wrappers_spec.rb +51 -0
  30. data/spec/core/group_by_spec.rb +0 -2
  31. data/spec/daru_spec.rb +58 -0
  32. data/spec/dataframe_spec.rb +768 -73
  33. data/spec/extensions/rserve_spec.rb +52 -0
  34. data/spec/fixtures/bank2.dat +200 -0
  35. data/spec/fixtures/repeated_fields.csv +7 -0
  36. data/spec/fixtures/scientific_notation.csv +4 -0
  37. data/spec/fixtures/test_xls.xls +0 -0
  38. data/spec/io/io_spec.rb +161 -24
  39. data/spec/math/arithmetic/dataframe_spec.rb +26 -7
  40. data/spec/math/arithmetic/vector_spec.rb +8 -0
  41. data/spec/math/statistics/dataframe_spec.rb +16 -1
  42. data/spec/math/statistics/vector_spec.rb +215 -47
  43. data/spec/spec_helper.rb +21 -2
  44. data/spec/vector_spec.rb +368 -12
  45. metadata +99 -16
  46. data/lib/version.rb +0 -3
  47. data/notebooks/grouping_splitting_pivots.ipynb +0 -529
  48. data/notebooks/intro_with_music_data_.ipynb +0 -303
@@ -17,16 +17,20 @@ describe Daru::DataFrame do
17
17
  end
18
18
 
19
19
  it "adds two dataframes to produce a third" do
20
- expect(@left + @right).to eq(Daru::DataFrame.new({a: [2,nil,nil,8,nil,nil,nil],
21
- b: [20,nil,nil,80,nil,nil,nil], c: [nil,nil,nil,nil,nil,nil]}, index:
22
- [0,1,2,3,4,5,6]))
20
+ expect(@left + @right).to eq(Daru::DataFrame.new({
21
+ a: [2,nil,nil,8,nil,nil,nil],
22
+ b: [20,nil,nil,80,nil,nil,nil],
23
+ c: [nil,nil,nil,nil,nil,nil]
24
+ }, index: [0,1,2,3,4,5,6]))
23
25
  end
24
26
  end
25
27
 
26
28
  context "#-" do
27
29
  it "subtracts a number from all numeric vectors" do
28
- expect(@df - 2).to eq(Daru::DataFrame.new({a: [-1,0,1,2,3], b: ['a','e','i','o','u'],
29
- c: [8,18,28,38,48]}))
30
+ expect(@df - 2).to eq(Daru::DataFrame.new({
31
+ a: [-1,0,1,2,3],
32
+ b: ['a','e','i','o','u'],
33
+ c: [8,18,28,38,48]}))
30
34
  end
31
35
 
32
36
  it "subtracts a data frame from another" do
@@ -53,13 +57,28 @@ describe Daru::DataFrame do
53
57
 
54
58
  context "#sqrt" do
55
59
  it "calculates sqrt" do
56
- @df.sqrt
60
+ expect_correct_df_in_delta(@df.sqrt,
61
+ Daru::DataFrame.new({
62
+ a: [1.0,1.41421356,1.73205080,2.0,2.23606797],
63
+ c: [3.16227766, 4.47213595 ,5.47722557 ,6.32455532, 7.07106781]
64
+ }), 0.001
65
+ )
57
66
  end
58
67
  end
59
68
 
60
69
  context "#round" do
61
70
  it "rounds to precision" do
62
- @df.round
71
+ df = Daru::DataFrame.new({
72
+ a: [1.3434,2.4332,5.6655,12.3344,32.233],
73
+ b: [1.3434,2.4332,5.6655,12.3344,32.233],
74
+ c: %w(a b c d e)
75
+ })
76
+ ans = Daru::DataFrame.new({
77
+ a: [1.34,2.43,5.67,12.33,32.23],
78
+ b: [1.34,2.43,5.67,12.33,32.23],
79
+ })
80
+
81
+ expect(df.round(2)).to eq(ans)
63
82
  end
64
83
  end
65
84
 
@@ -24,6 +24,14 @@ describe Daru::Vector do
24
24
  it "puts a nil when one of the operands is nil" do
25
25
  expect(@with_md1 + @with_md2).to eq(Daru::Vector.new([nil,7,nil,nil,nil,7], name: :missing, index: [:a, :b, :c, :corona, :obi, :wan]))
26
26
  end
27
+
28
+ it "appropriately adds vectors with numeric and non-numeric indexes" do
29
+ pending "Need an alternate index implementation?"
30
+ v1 = Daru::Vector.new([1,2,3])
31
+ v2 = Daru::Vector.new([1,2,3], index: [:a,:b,:c])
32
+
33
+ expect(v1 + v2).to eq(Daru::Vector.new([nil]*6, index: [0,1,2,:a,:b,:c]))
34
+ end
27
35
  end
28
36
 
29
37
  context "#-" do
@@ -79,10 +79,25 @@ describe Daru::DataFrame do
79
79
  f: [40,80,400]
80
80
  }, index: [:d, :e, :f]
81
81
  ))
82
+
83
+ test = Daru::DataFrame.rows([
84
+ [0.3543,0.4535,0.2424],
85
+ [0.123,0.53323,0.544],
86
+ [0.4345,0.4552,0.425]
87
+ ], order: [:a, :b, :c])
88
+ ans = Daru::DataFrame.new({
89
+ a: [0.0261607, -0.0071019, -0.0153640],
90
+ b: [-0.0071019, 0.0020747, 0.0056071],
91
+ c: [-0.0153640, 0.0056071, 0.0230777]
92
+ })
93
+
94
+ test.cov.each_vector_with_index do |v, i|
95
+ expect_correct_vector_in_delta v, ans[i], 0.01
96
+ end
82
97
  end
83
98
  end
84
99
 
85
- context "#corr", focus: true do
100
+ context "#corr" do
86
101
  it "calculates the correlation between the numeric vectors of DataFrame" do
87
102
  expect(@df.corr).to eq(Daru::DataFrame.new({
88
103
  d: [1,1,1],
@@ -1,35 +1,36 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::Vector do
4
- [:array, :nmatrix].each do |dtype|
4
+ [:array, :gsl].each do |dtype| #nmatrix still unstable
5
5
  describe dtype do
6
- before :each do
6
+ before do
7
7
  @dv = Daru::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2], dtype: dtype
8
- @dv_with_md = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
8
+ @dv_with_nils = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
9
9
  end
10
10
 
11
11
  context "#mean" do
12
12
  it "calculates mean" do
13
13
  expect(@dv.mean).to eq(282.2)
14
- expect(@dv_with_md.mean).to eq(282.2)
14
+ expect(@dv_with_nils.mean).to eq(282.2)
15
15
  end
16
16
  end
17
17
 
18
18
  context "#sum_of_squares" do
19
- it "calcs sum of squares" do
20
- @dv.sum_of_squares
19
+ it "calcs sum of squares, omits nil values" do
20
+ v = Daru::Vector.new [1,2,3,4,5,6], dtype: dtype
21
+ expect(v.sum_of_squares).to eq(17.5)
21
22
  end
22
23
  end
23
24
 
24
25
  context "#standard_deviation_sample" do
25
26
  it "calcs standard deviation sample" do
26
- @dv.standard_deviation_sample
27
+ @dv_with_nils.standard_deviation_sample
27
28
  end
28
29
  end
29
30
 
30
31
  context "#variance_sample" do
31
32
  it "calculates sample variance" do
32
- @dv.variance_sample
33
+ expect(@dv.variance).to be_within(0.01).of(75118.84)
33
34
  end
34
35
  end
35
36
 
@@ -41,7 +42,7 @@ describe Daru::Vector do
41
42
 
42
43
  context "#variance_population" do
43
44
  it "calculates population variance" do
44
- expect(@dv.variance_population).to eq(67606.95999999999)
45
+ expect(@dv.variance_population).to be_within(0.001).of(67606.95999999999)
45
46
  end
46
47
  end
47
48
 
@@ -77,7 +78,8 @@ describe Daru::Vector do
77
78
 
78
79
  context "#product" do
79
80
  it "returns the product" do
80
- @dv.product
81
+ v = Daru::Vector.new [1, 2, 3, 4, 5], dtype: dtype
82
+ expect(v.product).to eq(120)
81
83
  end
82
84
  end
83
85
 
@@ -99,35 +101,38 @@ describe Daru::Vector do
99
101
  end
100
102
  end
101
103
 
102
- context "#percentile" do
103
- it "calculates percentile" do
104
- expect(@dv.percentile(50)).to eq(333.0)
104
+ context "#count" do
105
+ it "counts specified element" do
106
+ @dv.count(323)
105
107
  end
106
- end
107
-
108
- context "#recode" do
109
108
 
109
+ it "counts total number of elements" do
110
+ expect(@dv.count).to eq(10)
111
+ end
110
112
  end
111
113
 
112
- context "#recode!" do
113
-
114
+ context "#coefficient_of_variation" do
115
+ it "calculates coefficient_of_variation" do
116
+ @dv.coefficient_of_variation
117
+ end
114
118
  end
115
119
 
116
- context "#frequencies" do
117
- it "calculates frequencies" do
118
- @dv.frequencies
120
+ context "#percentile" do
121
+ it "calculates mid point percentile" do
122
+ expect(@dv.percentile(50)).to eq(278.5)
119
123
  end
120
124
  end
121
125
 
122
126
  context "#average_deviation_population" do
123
127
  it "calculates average_deviation_population" do
124
- @dv.average_deviation_population
128
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype: dtype)
129
+ expect(a.average_deviation_population).to eq(20.quo(9).to_f)
125
130
  end
126
131
  end
127
132
 
128
133
  context "#proportion" do
129
134
  it "calculates proportion" do
130
- @dv.proportion
135
+ expect(@dv.proportion(dtype == :gsl ? 1.0 : 1)).to eq(0.1)
131
136
  end
132
137
  end
133
138
 
@@ -137,43 +142,206 @@ describe Daru::Vector do
137
142
  end
138
143
  end
139
144
 
140
- context "#ranked" do
141
- it "curates by rank" do
142
- @dv.ranked
145
+ context "#standard_error" do
146
+ it "calculates standard error" do
147
+ @dv.standard_error
143
148
  end
144
149
  end
145
150
 
146
- context "#count" do
147
- it "counts specified element" do
148
- @dv.count(323)
149
- end
150
-
151
- it "counts total number of elements" do
152
- expect(@dv.count).to eq(10)
151
+ context "#vector_standardized_compute" do
152
+ it "calculates vector_standardized_compute" do
153
+ @dv.vector_standardized_compute(@dv.mean, @dv.sd)
154
+ @dv_with_nils.vector_standardized_compute(@dv.mean, @dv.sd)
153
155
  end
154
156
  end
155
157
 
156
- context "#coefficient_of_variation" do
157
- it "calculates coefficient_of_variation" do
158
- @dv.coefficient_of_variation
158
+ context "#vector_centered_compute" do
159
+ it "calculates vector_centered_compute" do
160
+ @dv.vector_centered_compute(@dv.mean)
161
+ @dv_with_nils.vector_centered_compute(@dv.mean)
159
162
  end
160
163
  end
164
+ end
165
+ end # ALL DTYPE tests
166
+
167
+ # Only Array tests
168
+ context "#percentile" do
169
+ it "tests linear percentile strategy" do
170
+ values = Daru::Vector.new [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle
171
+ expect(values.percentil(0, :linear)).to eq(102)
172
+ expect(values.percentil(25, :linear)).to eq(104.75)
173
+ expect(values.percentil(50, :linear)).to eq(108.5)
174
+ expect(values.percentil(75, :linear)).to eq(112.75)
175
+ expect(values.percentil(100, :linear)).to eq(116)
176
+
177
+ values = Daru::Vector.new [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle
178
+ expect(values.percentil(0, :linear)).to eq(102)
179
+ expect(values.percentil(25, :linear)).to eq(105)
180
+ expect(values.percentil(50, :linear)).to eq(109)
181
+ expect(values.percentil(75, :linear)).to eq(115)
182
+ expect(values.percentil(100, :linear)).to eq(118)
183
+ end
184
+ end
161
185
 
162
- context "#factor" do
186
+ context "#frequencies" do
187
+ it "calculates frequencies" do
188
+ vector = Daru::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
189
+ expect(vector.frequencies).to eq({
190
+ 1=>1, 2=>1, 3=>1, 4=>1, 5=>5,
191
+ 6=>2, 7=>1, 8=>1, 9=>1,10=>1, -99=>2
192
+ })
193
+ end
194
+ end
163
195
 
164
- end
196
+ context "#ranked" do
197
+ it "curates by rank" do
198
+ vector = Daru::Vector.new([nil, 0.8, 1.2, 1.2, 2.3, 18, nil])
199
+ expect(vector.ranked).to eq(Daru::Vector.new([nil,1,2.5,2.5,4,5,nil]))
165
200
 
166
- context "#median_absolute_deviation" do
167
- it "calculates median_absolute_deviation" do
168
- @dv.median_absolute_deviation
169
- end
170
- end
201
+ v = Daru::Vector.new [0.8, 1.2, 1.2, 2.3, 18]
202
+ expect(v.ranked).to eq(Daru::Vector.new [1, 2.5, 2.5, 4, 5])
203
+ end
171
204
 
172
- context "#standard_error" do
173
- it "calculates standard error" do
174
- @dv.standard_error
175
- end
205
+ it "tests paired ties" do
206
+ a = Daru::Vector.new [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4]
207
+ expected = Daru::Vector.new [2, 2, 2, 4.5, 4.5, 6, 7.5, 7.5, 10, 10, 10]
208
+ expect(a.ranked).to eq(expected)
209
+ end
210
+ end
211
+
212
+ context "#dichotomize" do
213
+ it "dichotomizes" do
214
+ a = Daru::Vector.new [0, 0, 0, 1, 2, 3, nil]
215
+ exp = Daru::Vector.new [0, 0, 0, 1, 1, 1, nil]
216
+ expect(a.dichotomize).to eq(exp)
217
+
218
+ a = Daru::Vector.new [1, 1, 1, 2, 2, 2, 3]
219
+ exp = Daru::Vector.new [0, 0, 0, 1, 1, 1, 1]
220
+ expect(a.dichotomize).to eq(exp)
221
+
222
+ a = Daru::Vector.new [0, 0, 0, 1, 2, 3, nil]
223
+ exp = Daru::Vector.new [0, 0, 0, 0, 1, 1, nil]
224
+ expect(a.dichotomize(1)).to eq(exp)
225
+
226
+ a = Daru::Vector.new %w(a a a b c d)
227
+ exp = Daru::Vector.new [0, 0, 0, 1, 1, 1]
228
+ expect(a.dichotomize).to eq(exp)
229
+ end
230
+ end
231
+
232
+ context "#median_absolute_deviation" do
233
+ it "calculates median_absolute_deviation" do
234
+ a = Daru::Vector.new [1, 1, 2, 2, 4, 6, 9]
235
+ expect(a.median_absolute_deviation).to eq(1)
236
+ end
237
+ end
238
+
239
+ context "#round" do
240
+ it "rounds non-nil values" do
241
+ vector = Daru::Vector.new([1.44,55.32,nil,4])
242
+ expect(vector.round(1)).to eq(Daru::Vector.new([1.4,55.3,nil,4]))
243
+ end
244
+ end
245
+
246
+ context "#center" do
247
+ it "centers" do
248
+ mean = rand
249
+ samples = 11
250
+ centered = Daru::Vector.new(samples.times.map { |i| i - ((samples / 2).floor).to_i })
251
+ not_centered = centered.recode { |v| v + mean }
252
+ obs = not_centered.center
253
+ centered.each_with_index do |v, i|
254
+ expect(v).to be_within(0.0001).of(obs[i])
176
255
  end
177
256
  end
178
257
  end
258
+
259
+ context "#standardize" do
260
+ it "returns a standardized vector" do
261
+ vector = Daru::Vector.new([11,55,33,25,nil,22])
262
+ expect(vector.standardize.round(2)).to eq(
263
+ Daru::Vector.new([-1.11, 1.57, 0.23, -0.26,nil, -0.44])
264
+ )
265
+ end
266
+
267
+ it "tests for vector standardized with zero variance" do
268
+ v1 = Daru::Vector.new 100.times.map { |_i| 1 }
269
+ exp = Daru::Vector.new 100.times.map { nil }
270
+ expect(v1.standardize).to eq(exp)
271
+ end
272
+ end
273
+
274
+ context "#vector_percentile" do
275
+ it "replaces each non-nil value with its percentile value" do
276
+ vector = Daru::Vector.new([1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10])
277
+ expect(vector.vector_percentile).to eq(Daru::Vector.new(
278
+ [10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100])
279
+ )
280
+ end
281
+ end
282
+
283
+ context "#sample_with_replacement" do
284
+ it "calculates sample_with_replacement" do
285
+ vec = Daru::Vector.new(
286
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99],
287
+ name: :common_all_dtypes)
288
+ srand(1)
289
+ expect(vec.sample_with_replacement(100).size).to eq(100)
290
+
291
+ srand(1)
292
+ expect(vec.sample_with_replacement(100).size).to eq(100)
293
+ end
294
+ end
295
+
296
+ context "#sample_without_replacement" do
297
+ it "calculates sample_without_replacement" do
298
+ vec = Daru::Vector.new(
299
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99],
300
+ name: :common_all_dtypes)
301
+
302
+ srand(1)
303
+ expect(vec.sample_without_replacement(17).sort).to eq(
304
+ vec.only_valid.to_a.sort)
305
+ expect {
306
+ vec.sample_without_replacement(20)
307
+ }.to raise_error(ArgumentError)
308
+
309
+ srand(1)
310
+ expect(vec.sample_without_replacement(17).sort).to eq(
311
+ vec.only_valid.to_a.sort)
312
+ end
313
+ end
314
+
315
+ context "#jackknife" do
316
+ it "jack knife correctly with named method" do
317
+ a = Daru::Vector.new [1, 2, 3, 4]
318
+ df = a.jackknife(:mean)
319
+ expect(df[:mean].mean).to eq (a.mean)
320
+
321
+ df = a.jackknife([:mean, :sd])
322
+ expect(df[:mean].mean).to eq(a.mean)
323
+ expect(df[:mean].sd).to eq(a.sd)
324
+ end
325
+
326
+ it "jack knife correctly with custom method" do
327
+ a = Daru::Vector.new [17.23, 18.71, 13.93, 18.81, 15.78, 11.29, 14.91, 13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52, 13.45, 15.25]
328
+ ds = a.jackknife(log_s2: ->(v) { Math.log(v.variance) })
329
+ exp = Daru::Vector.new [1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937]
330
+
331
+ expect_correct_vector_in_delta ds[:log_s2], exp, 0.001
332
+ # expect(ds[:log_s2]).to be_within(0.001).of(exp)
333
+ expect(ds[:log_s2].mean).to be_within(0.00001).of(2.00389)
334
+ expect(ds[:log_s2].variance).to be_within(0.001).of(1.091)
335
+ end
336
+
337
+ it "jack knife correctly with k > 1" do
338
+ rng = Distribution::Normal.rng(0,1)
339
+ a = Daru::Vector.new_with_size(6) { rng.call}
340
+
341
+ ds = a.jackknife(:mean, 2)
342
+ mean = a.mean
343
+ exp = Daru::Vector.new [3 * mean - 2 * (a[2] + a[3] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[2] + a[3]) / 4]
344
+ expect_correct_vector_in_delta(exp, ds[:mean], 1e-13)
345
+ end
346
+ end
179
347
  end
@@ -1,6 +1,8 @@
1
1
  require 'rspec'
2
- require 'awesome_print'
3
2
  require 'matrix'
3
+ require 'awesome_print'
4
+ require 'distribution'
5
+ require 'tempfile'
4
6
 
5
7
  def mri?
6
8
  RUBY_ENGINE == 'ruby'
@@ -16,6 +18,23 @@ else
16
18
  require 'nmatrix'
17
19
  end
18
20
 
21
+
19
22
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
20
23
  $LOAD_PATH.unshift(File.dirname(__FILE__))
21
- require 'daru'
24
+ require 'daru'
25
+
26
+ ALL_DTYPES = [:nmatrix, :gsl, :array]
27
+
28
+ # FIXME: This must go! Need to be able to use be_within
29
+ def expect_correct_vector_in_delta v1, v2, delta
30
+ expect(v1.size).to eq(v2.size)
31
+ (0...v1.size).each do |v|
32
+ expect(v1[v]).to be_within(delta).of(v2[v])
33
+ end
34
+ end
35
+
36
+ def expect_correct_df_in_delta df1, df2, delta
37
+ df1.each_vector_with_index do |vector, i|
38
+ expect_correct_vector_in_delta vector, df2[i], delta
39
+ end
40
+ end