daru 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +14 -0
  3. data/.travis.yml +26 -4
  4. data/CONTRIBUTING.md +31 -0
  5. data/Gemfile +1 -2
  6. data/{History.txt → History.md} +110 -44
  7. data/README.md +21 -288
  8. data/Rakefile +1 -0
  9. data/daru.gemspec +12 -8
  10. data/lib/daru.rb +36 -1
  11. data/lib/daru/accessors/array_wrapper.rb +8 -3
  12. data/lib/daru/accessors/gsl_wrapper.rb +113 -0
  13. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
  14. data/lib/daru/core/group_by.rb +0 -1
  15. data/lib/daru/dataframe.rb +1192 -83
  16. data/lib/daru/extensions/rserve.rb +21 -0
  17. data/lib/daru/index.rb +14 -0
  18. data/lib/daru/io/io.rb +170 -8
  19. data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
  20. data/lib/daru/maths/arithmetic/vector.rb +4 -4
  21. data/lib/daru/maths/statistics/dataframe.rb +48 -27
  22. data/lib/daru/maths/statistics/vector.rb +215 -33
  23. data/lib/daru/monkeys.rb +53 -7
  24. data/lib/daru/multi_index.rb +21 -4
  25. data/lib/daru/plotting/dataframe.rb +83 -25
  26. data/lib/daru/plotting/vector.rb +9 -10
  27. data/lib/daru/vector.rb +596 -61
  28. data/lib/daru/version.rb +3 -0
  29. data/spec/accessors/wrappers_spec.rb +51 -0
  30. data/spec/core/group_by_spec.rb +0 -2
  31. data/spec/daru_spec.rb +58 -0
  32. data/spec/dataframe_spec.rb +768 -73
  33. data/spec/extensions/rserve_spec.rb +52 -0
  34. data/spec/fixtures/bank2.dat +200 -0
  35. data/spec/fixtures/repeated_fields.csv +7 -0
  36. data/spec/fixtures/scientific_notation.csv +4 -0
  37. data/spec/fixtures/test_xls.xls +0 -0
  38. data/spec/io/io_spec.rb +161 -24
  39. data/spec/math/arithmetic/dataframe_spec.rb +26 -7
  40. data/spec/math/arithmetic/vector_spec.rb +8 -0
  41. data/spec/math/statistics/dataframe_spec.rb +16 -1
  42. data/spec/math/statistics/vector_spec.rb +215 -47
  43. data/spec/spec_helper.rb +21 -2
  44. data/spec/vector_spec.rb +368 -12
  45. metadata +99 -16
  46. data/lib/version.rb +0 -3
  47. data/notebooks/grouping_splitting_pivots.ipynb +0 -529
  48. data/notebooks/intro_with_music_data_.ipynb +0 -303
@@ -17,16 +17,20 @@ describe Daru::DataFrame do
17
17
  end
18
18
 
19
19
  it "adds two dataframes to produce a third" do
20
- expect(@left + @right).to eq(Daru::DataFrame.new({a: [2,nil,nil,8,nil,nil,nil],
21
- b: [20,nil,nil,80,nil,nil,nil], c: [nil,nil,nil,nil,nil,nil]}, index:
22
- [0,1,2,3,4,5,6]))
20
+ expect(@left + @right).to eq(Daru::DataFrame.new({
21
+ a: [2,nil,nil,8,nil,nil,nil],
22
+ b: [20,nil,nil,80,nil,nil,nil],
23
+ c: [nil,nil,nil,nil,nil,nil]
24
+ }, index: [0,1,2,3,4,5,6]))
23
25
  end
24
26
  end
25
27
 
26
28
  context "#-" do
27
29
  it "subtracts a number from all numeric vectors" do
28
- expect(@df - 2).to eq(Daru::DataFrame.new({a: [-1,0,1,2,3], b: ['a','e','i','o','u'],
29
- c: [8,18,28,38,48]}))
30
+ expect(@df - 2).to eq(Daru::DataFrame.new({
31
+ a: [-1,0,1,2,3],
32
+ b: ['a','e','i','o','u'],
33
+ c: [8,18,28,38,48]}))
30
34
  end
31
35
 
32
36
  it "subtracts a data frame from another" do
@@ -53,13 +57,28 @@ describe Daru::DataFrame do
53
57
 
54
58
  context "#sqrt" do
55
59
  it "calculates sqrt" do
56
- @df.sqrt
60
+ expect_correct_df_in_delta(@df.sqrt,
61
+ Daru::DataFrame.new({
62
+ a: [1.0,1.41421356,1.73205080,2.0,2.23606797],
63
+ c: [3.16227766, 4.47213595 ,5.47722557 ,6.32455532, 7.07106781]
64
+ }), 0.001
65
+ )
57
66
  end
58
67
  end
59
68
 
60
69
  context "#round" do
61
70
  it "rounds to precision" do
62
- @df.round
71
+ df = Daru::DataFrame.new({
72
+ a: [1.3434,2.4332,5.6655,12.3344,32.233],
73
+ b: [1.3434,2.4332,5.6655,12.3344,32.233],
74
+ c: %w(a b c d e)
75
+ })
76
+ ans = Daru::DataFrame.new({
77
+ a: [1.34,2.43,5.67,12.33,32.23],
78
+ b: [1.34,2.43,5.67,12.33,32.23],
79
+ })
80
+
81
+ expect(df.round(2)).to eq(ans)
63
82
  end
64
83
  end
65
84
 
@@ -24,6 +24,14 @@ describe Daru::Vector do
24
24
  it "puts a nil when one of the operands is nil" do
25
25
  expect(@with_md1 + @with_md2).to eq(Daru::Vector.new([nil,7,nil,nil,nil,7], name: :missing, index: [:a, :b, :c, :corona, :obi, :wan]))
26
26
  end
27
+
28
+ it "appropriately adds vectors with numeric and non-numeric indexes" do
29
+ pending "Need an alternate index implementation?"
30
+ v1 = Daru::Vector.new([1,2,3])
31
+ v2 = Daru::Vector.new([1,2,3], index: [:a,:b,:c])
32
+
33
+ expect(v1 + v2).to eq(Daru::Vector.new([nil]*6, index: [0,1,2,:a,:b,:c]))
34
+ end
27
35
  end
28
36
 
29
37
  context "#-" do
@@ -79,10 +79,25 @@ describe Daru::DataFrame do
79
79
  f: [40,80,400]
80
80
  }, index: [:d, :e, :f]
81
81
  ))
82
+
83
+ test = Daru::DataFrame.rows([
84
+ [0.3543,0.4535,0.2424],
85
+ [0.123,0.53323,0.544],
86
+ [0.4345,0.4552,0.425]
87
+ ], order: [:a, :b, :c])
88
+ ans = Daru::DataFrame.new({
89
+ a: [0.0261607, -0.0071019, -0.0153640],
90
+ b: [-0.0071019, 0.0020747, 0.0056071],
91
+ c: [-0.0153640, 0.0056071, 0.0230777]
92
+ })
93
+
94
+ test.cov.each_vector_with_index do |v, i|
95
+ expect_correct_vector_in_delta v, ans[i], 0.01
96
+ end
82
97
  end
83
98
  end
84
99
 
85
- context "#corr", focus: true do
100
+ context "#corr" do
86
101
  it "calculates the correlation between the numeric vectors of DataFrame" do
87
102
  expect(@df.corr).to eq(Daru::DataFrame.new({
88
103
  d: [1,1,1],
@@ -1,35 +1,36 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::Vector do
4
- [:array, :nmatrix].each do |dtype|
4
+ [:array, :gsl].each do |dtype| #nmatrix still unstable
5
5
  describe dtype do
6
- before :each do
6
+ before do
7
7
  @dv = Daru::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2], dtype: dtype
8
- @dv_with_md = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
8
+ @dv_with_nils = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
9
9
  end
10
10
 
11
11
  context "#mean" do
12
12
  it "calculates mean" do
13
13
  expect(@dv.mean).to eq(282.2)
14
- expect(@dv_with_md.mean).to eq(282.2)
14
+ expect(@dv_with_nils.mean).to eq(282.2)
15
15
  end
16
16
  end
17
17
 
18
18
  context "#sum_of_squares" do
19
- it "calcs sum of squares" do
20
- @dv.sum_of_squares
19
+ it "calcs sum of squares, omits nil values" do
20
+ v = Daru::Vector.new [1,2,3,4,5,6], dtype: dtype
21
+ expect(v.sum_of_squares).to eq(17.5)
21
22
  end
22
23
  end
23
24
 
24
25
  context "#standard_deviation_sample" do
25
26
  it "calcs standard deviation sample" do
26
- @dv.standard_deviation_sample
27
+ @dv_with_nils.standard_deviation_sample
27
28
  end
28
29
  end
29
30
 
30
31
  context "#variance_sample" do
31
32
  it "calculates sample variance" do
32
- @dv.variance_sample
33
+ expect(@dv.variance).to be_within(0.01).of(75118.84)
33
34
  end
34
35
  end
35
36
 
@@ -41,7 +42,7 @@ describe Daru::Vector do
41
42
 
42
43
  context "#variance_population" do
43
44
  it "calculates population variance" do
44
- expect(@dv.variance_population).to eq(67606.95999999999)
45
+ expect(@dv.variance_population).to be_within(0.001).of(67606.95999999999)
45
46
  end
46
47
  end
47
48
 
@@ -77,7 +78,8 @@ describe Daru::Vector do
77
78
 
78
79
  context "#product" do
79
80
  it "returns the product" do
80
- @dv.product
81
+ v = Daru::Vector.new [1, 2, 3, 4, 5], dtype: dtype
82
+ expect(v.product).to eq(120)
81
83
  end
82
84
  end
83
85
 
@@ -99,35 +101,38 @@ describe Daru::Vector do
99
101
  end
100
102
  end
101
103
 
102
- context "#percentile" do
103
- it "calculates percentile" do
104
- expect(@dv.percentile(50)).to eq(333.0)
104
+ context "#count" do
105
+ it "counts specified element" do
106
+ @dv.count(323)
105
107
  end
106
- end
107
-
108
- context "#recode" do
109
108
 
109
+ it "counts total number of elements" do
110
+ expect(@dv.count).to eq(10)
111
+ end
110
112
  end
111
113
 
112
- context "#recode!" do
113
-
114
+ context "#coefficient_of_variation" do
115
+ it "calculates coefficient_of_variation" do
116
+ @dv.coefficient_of_variation
117
+ end
114
118
  end
115
119
 
116
- context "#frequencies" do
117
- it "calculates frequencies" do
118
- @dv.frequencies
120
+ context "#percentile" do
121
+ it "calculates mid point percentile" do
122
+ expect(@dv.percentile(50)).to eq(278.5)
119
123
  end
120
124
  end
121
125
 
122
126
  context "#average_deviation_population" do
123
127
  it "calculates average_deviation_population" do
124
- @dv.average_deviation_population
128
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype: dtype)
129
+ expect(a.average_deviation_population).to eq(20.quo(9).to_f)
125
130
  end
126
131
  end
127
132
 
128
133
  context "#proportion" do
129
134
  it "calculates proportion" do
130
- @dv.proportion
135
+ expect(@dv.proportion(dtype == :gsl ? 1.0 : 1)).to eq(0.1)
131
136
  end
132
137
  end
133
138
 
@@ -137,43 +142,206 @@ describe Daru::Vector do
137
142
  end
138
143
  end
139
144
 
140
- context "#ranked" do
141
- it "curates by rank" do
142
- @dv.ranked
145
+ context "#standard_error" do
146
+ it "calculates standard error" do
147
+ @dv.standard_error
143
148
  end
144
149
  end
145
150
 
146
- context "#count" do
147
- it "counts specified element" do
148
- @dv.count(323)
149
- end
150
-
151
- it "counts total number of elements" do
152
- expect(@dv.count).to eq(10)
151
+ context "#vector_standardized_compute" do
152
+ it "calculates vector_standardized_compute" do
153
+ @dv.vector_standardized_compute(@dv.mean, @dv.sd)
154
+ @dv_with_nils.vector_standardized_compute(@dv.mean, @dv.sd)
153
155
  end
154
156
  end
155
157
 
156
- context "#coefficient_of_variation" do
157
- it "calculates coefficient_of_variation" do
158
- @dv.coefficient_of_variation
158
+ context "#vector_centered_compute" do
159
+ it "calculates vector_centered_compute" do
160
+ @dv.vector_centered_compute(@dv.mean)
161
+ @dv_with_nils.vector_centered_compute(@dv.mean)
159
162
  end
160
163
  end
164
+ end
165
+ end # ALL DTYPE tests
166
+
167
+ # Only Array tests
168
+ context "#percentile" do
169
+ it "tests linear percentile strategy" do
170
+ values = Daru::Vector.new [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle
171
+ expect(values.percentil(0, :linear)).to eq(102)
172
+ expect(values.percentil(25, :linear)).to eq(104.75)
173
+ expect(values.percentil(50, :linear)).to eq(108.5)
174
+ expect(values.percentil(75, :linear)).to eq(112.75)
175
+ expect(values.percentil(100, :linear)).to eq(116)
176
+
177
+ values = Daru::Vector.new [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle
178
+ expect(values.percentil(0, :linear)).to eq(102)
179
+ expect(values.percentil(25, :linear)).to eq(105)
180
+ expect(values.percentil(50, :linear)).to eq(109)
181
+ expect(values.percentil(75, :linear)).to eq(115)
182
+ expect(values.percentil(100, :linear)).to eq(118)
183
+ end
184
+ end
161
185
 
162
- context "#factor" do
186
+ context "#frequencies" do
187
+ it "calculates frequencies" do
188
+ vector = Daru::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
189
+ expect(vector.frequencies).to eq({
190
+ 1=>1, 2=>1, 3=>1, 4=>1, 5=>5,
191
+ 6=>2, 7=>1, 8=>1, 9=>1,10=>1, -99=>2
192
+ })
193
+ end
194
+ end
163
195
 
164
- end
196
+ context "#ranked" do
197
+ it "curates by rank" do
198
+ vector = Daru::Vector.new([nil, 0.8, 1.2, 1.2, 2.3, 18, nil])
199
+ expect(vector.ranked).to eq(Daru::Vector.new([nil,1,2.5,2.5,4,5,nil]))
165
200
 
166
- context "#median_absolute_deviation" do
167
- it "calculates median_absolute_deviation" do
168
- @dv.median_absolute_deviation
169
- end
170
- end
201
+ v = Daru::Vector.new [0.8, 1.2, 1.2, 2.3, 18]
202
+ expect(v.ranked).to eq(Daru::Vector.new [1, 2.5, 2.5, 4, 5])
203
+ end
171
204
 
172
- context "#standard_error" do
173
- it "calculates standard error" do
174
- @dv.standard_error
175
- end
205
+ it "tests paired ties" do
206
+ a = Daru::Vector.new [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4]
207
+ expected = Daru::Vector.new [2, 2, 2, 4.5, 4.5, 6, 7.5, 7.5, 10, 10, 10]
208
+ expect(a.ranked).to eq(expected)
209
+ end
210
+ end
211
+
212
+ context "#dichotomize" do
213
+ it "dichotomizes" do
214
+ a = Daru::Vector.new [0, 0, 0, 1, 2, 3, nil]
215
+ exp = Daru::Vector.new [0, 0, 0, 1, 1, 1, nil]
216
+ expect(a.dichotomize).to eq(exp)
217
+
218
+ a = Daru::Vector.new [1, 1, 1, 2, 2, 2, 3]
219
+ exp = Daru::Vector.new [0, 0, 0, 1, 1, 1, 1]
220
+ expect(a.dichotomize).to eq(exp)
221
+
222
+ a = Daru::Vector.new [0, 0, 0, 1, 2, 3, nil]
223
+ exp = Daru::Vector.new [0, 0, 0, 0, 1, 1, nil]
224
+ expect(a.dichotomize(1)).to eq(exp)
225
+
226
+ a = Daru::Vector.new %w(a a a b c d)
227
+ exp = Daru::Vector.new [0, 0, 0, 1, 1, 1]
228
+ expect(a.dichotomize).to eq(exp)
229
+ end
230
+ end
231
+
232
+ context "#median_absolute_deviation" do
233
+ it "calculates median_absolute_deviation" do
234
+ a = Daru::Vector.new [1, 1, 2, 2, 4, 6, 9]
235
+ expect(a.median_absolute_deviation).to eq(1)
236
+ end
237
+ end
238
+
239
+ context "#round" do
240
+ it "rounds non-nil values" do
241
+ vector = Daru::Vector.new([1.44,55.32,nil,4])
242
+ expect(vector.round(1)).to eq(Daru::Vector.new([1.4,55.3,nil,4]))
243
+ end
244
+ end
245
+
246
+ context "#center" do
247
+ it "centers" do
248
+ mean = rand
249
+ samples = 11
250
+ centered = Daru::Vector.new(samples.times.map { |i| i - ((samples / 2).floor).to_i })
251
+ not_centered = centered.recode { |v| v + mean }
252
+ obs = not_centered.center
253
+ centered.each_with_index do |v, i|
254
+ expect(v).to be_within(0.0001).of(obs[i])
176
255
  end
177
256
  end
178
257
  end
258
+
259
+ context "#standardize" do
260
+ it "returns a standardized vector" do
261
+ vector = Daru::Vector.new([11,55,33,25,nil,22])
262
+ expect(vector.standardize.round(2)).to eq(
263
+ Daru::Vector.new([-1.11, 1.57, 0.23, -0.26,nil, -0.44])
264
+ )
265
+ end
266
+
267
+ it "tests for vector standardized with zero variance" do
268
+ v1 = Daru::Vector.new 100.times.map { |_i| 1 }
269
+ exp = Daru::Vector.new 100.times.map { nil }
270
+ expect(v1.standardize).to eq(exp)
271
+ end
272
+ end
273
+
274
+ context "#vector_percentile" do
275
+ it "replaces each non-nil value with its percentile value" do
276
+ vector = Daru::Vector.new([1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10])
277
+ expect(vector.vector_percentile).to eq(Daru::Vector.new(
278
+ [10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100])
279
+ )
280
+ end
281
+ end
282
+
283
+ context "#sample_with_replacement" do
284
+ it "calculates sample_with_replacement" do
285
+ vec = Daru::Vector.new(
286
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99],
287
+ name: :common_all_dtypes)
288
+ srand(1)
289
+ expect(vec.sample_with_replacement(100).size).to eq(100)
290
+
291
+ srand(1)
292
+ expect(vec.sample_with_replacement(100).size).to eq(100)
293
+ end
294
+ end
295
+
296
+ context "#sample_without_replacement" do
297
+ it "calculates sample_without_replacement" do
298
+ vec = Daru::Vector.new(
299
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99],
300
+ name: :common_all_dtypes)
301
+
302
+ srand(1)
303
+ expect(vec.sample_without_replacement(17).sort).to eq(
304
+ vec.only_valid.to_a.sort)
305
+ expect {
306
+ vec.sample_without_replacement(20)
307
+ }.to raise_error(ArgumentError)
308
+
309
+ srand(1)
310
+ expect(vec.sample_without_replacement(17).sort).to eq(
311
+ vec.only_valid.to_a.sort)
312
+ end
313
+ end
314
+
315
+ context "#jackknife" do
316
+ it "jack knife correctly with named method" do
317
+ a = Daru::Vector.new [1, 2, 3, 4]
318
+ df = a.jackknife(:mean)
319
+ expect(df[:mean].mean).to eq (a.mean)
320
+
321
+ df = a.jackknife([:mean, :sd])
322
+ expect(df[:mean].mean).to eq(a.mean)
323
+ expect(df[:mean].sd).to eq(a.sd)
324
+ end
325
+
326
+ it "jack knife correctly with custom method" do
327
+ a = Daru::Vector.new [17.23, 18.71, 13.93, 18.81, 15.78, 11.29, 14.91, 13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52, 13.45, 15.25]
328
+ ds = a.jackknife(log_s2: ->(v) { Math.log(v.variance) })
329
+ exp = Daru::Vector.new [1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937]
330
+
331
+ expect_correct_vector_in_delta ds[:log_s2], exp, 0.001
332
+ # expect(ds[:log_s2]).to be_within(0.001).of(exp)
333
+ expect(ds[:log_s2].mean).to be_within(0.00001).of(2.00389)
334
+ expect(ds[:log_s2].variance).to be_within(0.001).of(1.091)
335
+ end
336
+
337
+ it "jack knife correctly with k > 1" do
338
+ rng = Distribution::Normal.rng(0,1)
339
+ a = Daru::Vector.new_with_size(6) { rng.call}
340
+
341
+ ds = a.jackknife(:mean, 2)
342
+ mean = a.mean
343
+ exp = Daru::Vector.new [3 * mean - 2 * (a[2] + a[3] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[2] + a[3]) / 4]
344
+ expect_correct_vector_in_delta(exp, ds[:mean], 1e-13)
345
+ end
346
+ end
179
347
  end
@@ -1,6 +1,8 @@
1
1
  require 'rspec'
2
- require 'awesome_print'
3
2
  require 'matrix'
3
+ require 'awesome_print'
4
+ require 'distribution'
5
+ require 'tempfile'
4
6
 
5
7
  def mri?
6
8
  RUBY_ENGINE == 'ruby'
@@ -16,6 +18,23 @@ else
16
18
  require 'nmatrix'
17
19
  end
18
20
 
21
+
19
22
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
20
23
  $LOAD_PATH.unshift(File.dirname(__FILE__))
21
- require 'daru'
24
+ require 'daru'
25
+
26
+ ALL_DTYPES = [:nmatrix, :gsl, :array]
27
+
28
+ # FIXME: This must go! Need to be able to use be_within
29
+ def expect_correct_vector_in_delta v1, v2, delta
30
+ expect(v1.size).to eq(v2.size)
31
+ (0...v1.size).each do |v|
32
+ expect(v1[v]).to be_within(delta).of(v2[v])
33
+ end
34
+ end
35
+
36
+ def expect_correct_df_in_delta df1, df2, delta
37
+ df1.each_vector_with_index do |vector, i|
38
+ expect_correct_vector_in_delta vector, df2[i], delta
39
+ end
40
+ end