daru 0.1.3.1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -0,0 +1,27 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan="2">Daru::Vector(<%= size %>)<%= ':category' if category? %></th>
4
+ </tr>
5
+ <% if name %>
6
+ <tr>
7
+ <th> </th>
8
+ <th><%= name %></th>
9
+ </tr>
10
+ <% end %>
11
+
12
+ <% @index.each_with_index.first(threshold).each do |index, pos| %>
13
+ <tr>
14
+ <td><%= index %></td>
15
+ <td><%= self.at(pos) %></td>
16
+ </tr>
17
+ <% end %>
18
+
19
+ <% if size > threshold %>
20
+ <% last_index = @index.size-1 %>
21
+ <tr><td>...</td><td>...</td></tr>
22
+ <tr>
23
+ <td><%= last_index %></td>
24
+ <td><%= self.at last_index %></td>
25
+ </tr>
26
+ <% end %>
27
+ </table>
@@ -0,0 +1,36 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan="<%= index.width+1 %>">Daru::Vector(<%= size %>)<%= ':category' if category? %></th>
4
+ </tr>
5
+ <% if name %>
6
+ <tr>
7
+ <th colspan="<%= index.width %>"> </th>
8
+ <th><%= name %></th>
9
+ </tr>
10
+ <% end %>
11
+
12
+ <% Daru::IRuby::Helpers.tuples_with_rowspans(@index).first(threshold).zip(to_a).each do |tuple, value| %>
13
+ <tr>
14
+ <% tuple.each do |idx, span| %>
15
+ <th rowspan="<%= span %>"><%= idx %></th>
16
+ <% end %>
17
+ <td><%= value %></td>
18
+ </tr>
19
+ <% end %>
20
+
21
+ <% if size > threshold %>
22
+ <% last_index = @index.to_a.last %>
23
+ <tr>
24
+ <% last_index.size.times do %>
25
+ <th>...</th>
26
+ <% end %>
27
+ <td>...</td>
28
+ </tr>
29
+ <tr>
30
+ <% last_index.each do |idx| %>
31
+ <th><%= idx %></td>
32
+ <% end %>
33
+ <td><%= self[last_index] %></td>
34
+ </tr>
35
+ <% end %>
36
+ </table>
@@ -62,30 +62,28 @@ module Daru
62
62
  all_vectors = (vectors.to_a | other.vectors.to_a).sort
63
63
  all_indexes = (index.to_a | other.index.to_a).sort
64
64
 
65
- hsh = {}
66
- all_vectors.each do |vector_name|
67
- this = has_vector?(vector_name) ? self[vector_name] : nil
68
- that = other.has_vector?(vector_name) ? other[vector_name] : nil
69
-
70
- hsh[vector_name] =
71
- if this && that
72
- this.send(operation, that)
73
- else
74
- Daru::Vector.new([], index: all_indexes, name: vector_name)
75
- end
76
- end
65
+ hsh =
66
+ all_vectors.map do |vector_name|
67
+ vector = dataframe_binary_operation_on_vectors other, vector_name, operation, all_indexes
68
+
69
+ [vector_name, vector]
70
+ end.to_h
77
71
 
78
72
  Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
79
73
  end
80
74
 
81
- def scalar_binary_operation operation, other
82
- clone = dup
83
- clone.map_vectors! do |vector|
84
- vector = vector.send(operation, other) if vector.type == :numeric
85
- vector
75
+ def dataframe_binary_operation_on_vectors other, name, operation, indexes
76
+ if has_vector?(name) && other.has_vector?(name)
77
+ self[name].send(operation, other[name])
78
+ else
79
+ Daru::Vector.new([], index: indexes, name: name)
86
80
  end
81
+ end
87
82
 
88
- clone
83
+ def scalar_binary_operation operation, other
84
+ dup.map_vectors! do |vector|
85
+ vector.numeric? ? vector.send(operation, other) : vector
86
+ end
89
87
  end
90
88
  end
91
89
  end
@@ -63,19 +63,17 @@ module Daru
63
63
  end
64
64
 
65
65
  def v2v_binary operation, other
66
- common_idxs = []
67
- elements = []
66
+ # FIXME: why the sorting?.. - zverok, 2016-05-18
68
67
  index = (@index.to_a | other.index.to_a).sort
69
68
 
70
- index.each do |idx|
69
+ elements = index.map do |idx|
71
70
  this = self.index.include?(idx) ? self[idx] : nil
72
71
  that = other.index.include?(idx) ? other[idx] : nil
73
72
 
74
- elements << (this && that ? this.send(operation, that) : nil)
75
- common_idxs << idx
73
+ this && that ? this.send(operation, that) : nil
76
74
  end
77
75
 
78
- Daru::Vector.new(elements, name: @name, index: common_idxs)
76
+ Daru::Vector.new(elements, name: @name, index: index)
79
77
  end
80
78
  end
81
79
  end
@@ -132,17 +132,15 @@ module Daru
132
132
 
133
133
  # Calculate sample variance-covariance between the numeric vectors.
134
134
  def covariance
135
- cache={}
135
+ cache = Hash.new do |h, (col, row)|
136
+ h[[col, row]] = vector_cov(self[row],self[col])
137
+ end
136
138
  vectors = numeric_vectors
137
139
 
138
140
  mat_rows = vectors.collect do |row|
139
141
  vectors.collect do |col|
140
142
  if row == col
141
143
  self[row].variance
142
- elsif cache[[col,row]].nil?
143
- cov = vector_cov(self[row],self[col])
144
- cache[[row,col]] = cov
145
- cov
146
144
  else
147
145
  cache[[col,row]]
148
146
  end
@@ -170,16 +168,11 @@ module Daru
170
168
  private
171
169
 
172
170
  def apply_method_to_numerics method, *args
173
- order = []
174
- computed = @vectors.to_a.each_with_object([]) do |n, memo|
175
- v = @data[@vectors[n]]
176
- if v.type == :numeric
177
- memo << v.send(method, *args)
178
- order << n
179
- end
180
- end
171
+ numerics = @vectors.to_a.map { |n| [n, @data[@vectors[n]]] }
172
+ .select { |_n, v| v.numeric? }
173
+ computed = numerics.map { |_n, v| v.send(method, *args) }
181
174
 
182
- Daru::DataFrame.new(computed, index: @index, order: order,clone: false)
175
+ Daru::DataFrame.new(computed, index: @index, order: numerics.map(&:first), clone: false)
183
176
  end
184
177
 
185
178
  def vector_cov v1a, v2a
@@ -187,7 +180,7 @@ module Daru
187
180
  end
188
181
 
189
182
  def sum_of_squares v1, v2
190
- v1a,v2a = v1.only_valid,v2.only_valid
183
+ v1a,v2a = v1.reject_values(*Daru::MISSING_VALUES),v2.reject_values(*Daru::MISSING_VALUES)
191
184
  v1a.reset_index!
192
185
  v2a.reset_index!
193
186
  m1 = v1a.mean
@@ -4,7 +4,7 @@ module Daru
4
4
  # is done inside the wrapper, so that native methods can be used for most of
5
5
  # the computationally intensive tasks.
6
6
  module Statistics
7
- module Vector
7
+ module Vector # rubocop:disable Metrics/ModuleLength
8
8
  def mean
9
9
  @data.mean
10
10
  end
@@ -55,16 +55,16 @@ module Daru
55
55
  alias :mad :median_absolute_deviation
56
56
 
57
57
  def standard_error
58
- standard_deviation_sample/Math.sqrt(n_valid)
58
+ standard_deviation_sample/Math.sqrt(size - count_values(*Daru::MISSING_VALUES))
59
59
  end
60
60
 
61
61
  def sum_of_squared_deviation
62
- (@data.inject(0) { |a,x| x.square + a } - sum.square.quo(n_valid).to_f).to_f
62
+ (@data.inject(0) { |a,x| x**2 + a } - (sum**2).quo(size - count_values(*Daru::MISSING_VALUES)).to_f).to_f
63
63
  end
64
64
 
65
65
  # Retrieve unique values of non-nil data
66
66
  def factors
67
- only_valid.uniq.reset_index!
67
+ reject_values(*Daru::MISSING_VALUES).uniq.reset_index!
68
68
  end
69
69
 
70
70
  # Maximum element of the vector.
@@ -98,15 +98,17 @@ module Daru
98
98
  end
99
99
 
100
100
  def proportions
101
- len = n_valid
102
- frequencies.each_with_object({}) { |arr, hash| hash[arr[0]] = arr[1] / len }
101
+ len = size - count_values(*Daru::MISSING_VALUES)
102
+ frequencies.each_with_object({}) do |(el, count), hash|
103
+ hash[el] = count / len
104
+ end
103
105
  end
104
106
 
105
107
  def ranked
106
108
  sum = 0
107
- r = frequencies.sort.each_with_object({}) do |val, memo|
108
- memo[val[0]] = ((sum + 1) + (sum + val[1])).quo(2)
109
- sum += val[1]
109
+ r = frequencies.sort.each_with_object({}) do |(el, count), memo|
110
+ memo[el] = ((sum + 1) + (sum + count)).quo(2)
111
+ sum += count
110
112
  end
111
113
 
112
114
  recode { |e| r[e] }
@@ -120,29 +122,27 @@ module Daru
120
122
  # retrieves number of instances where block returns true. If other
121
123
  # values given, retrieves the frequency for this value. If no value
122
124
  # given, counts the number of non-nil elements in the Vector.
123
- def count value=false
125
+ def count value=false, &block
124
126
  if block_given?
125
- @data.select { |val| yield(val) }.count
127
+ @data.select(&block).count
126
128
  elsif value
127
- val = frequencies[value]
128
- val.nil? ? 0 : val
129
+ count { |val| val == value }
129
130
  else
130
- size - @missing_positions.size
131
+ size - indexes(*Daru::MISSING_VALUES).size
131
132
  end
132
133
  end
133
134
 
134
135
  # Count number of occurrences of each value in the Vector
135
136
  def value_counts
136
- values = {}
137
- @data.each do |d|
138
- values[d] ? values[d] += 1 : values[d] = 1
137
+ values = @data.each_with_object(Hash.new(0)) do |d, memo|
138
+ memo[d] += 1
139
139
  end
140
140
 
141
141
  Daru::Vector.new(values)
142
142
  end
143
143
 
144
144
  def proportion value=1
145
- frequencies[value].quo(n_valid).to_f
145
+ frequencies[value].quo(size - count_values(*Daru::MISSING_VALUES)).to_f
146
146
  end
147
147
 
148
148
  # Sample variance with denominator (N-1)
@@ -151,7 +151,7 @@ module Daru
151
151
  if @data.respond_to? :variance_sample
152
152
  @data.variance_sample m
153
153
  else
154
- sum_of_squares(m).quo(n_valid - 1)
154
+ sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES) - 1)
155
155
  end
156
156
  end
157
157
 
@@ -161,38 +161,26 @@ module Daru
161
161
  if @data.respond_to? :variance_population
162
162
  @data.variance_population m
163
163
  else
164
- sum_of_squares(m).quo(n_valid).to_f
164
+ sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES)).to_f
165
165
  end
166
166
  end
167
167
 
168
168
  # Sample covariance with denominator (N-1)
169
169
  def covariance_sample other
170
- @size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
171
- mean_x = mean
172
- mean_y = other.mean
173
- sum = 0
174
- (0...size).each do |i|
175
- sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
176
- end
177
- sum / (n_valid - 1)
170
+ size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
171
+ covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES) - 1)
178
172
  end
179
173
 
180
174
  # Population covariance with denominator (N)
181
175
  def covariance_population other
182
- @size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
183
- mean_x = mean
184
- mean_y = other.mean
185
- sum = 0
186
- (0...size).each do |i|
187
- sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
188
- end
189
- sum / n_valid
176
+ size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
177
+ covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES))
190
178
  end
191
179
 
192
180
  def sum_of_squares(m=nil)
193
181
  m ||= mean
194
- @data.inject(0) { |memo, val|
195
- @missing_values.key?(val) ? memo : (memo + (val - m)**2)
182
+ reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
183
+ memo + (val - m)**2
196
184
  }
197
185
  end
198
186
 
@@ -221,7 +209,7 @@ module Daru
221
209
  else
222
210
  m ||= mean
223
211
  th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
224
- th.quo((@size - @missing_positions.size) * (standard_deviation_sample(m)**3))
212
+ th.quo((size - indexes(*Daru::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
225
213
  end
226
214
  end
227
215
 
@@ -231,16 +219,16 @@ module Daru
231
219
  else
232
220
  m ||= mean
233
221
  fo = @data.inject(0) { |a, x| a + ((x - m) ** 4) }
234
- fo.quo((@size - @missing_positions.size) * standard_deviation_sample(m) ** 4) - 3
222
+ fo.quo((size - indexes(*Daru::MISSING_VALUES).size) * standard_deviation_sample(m) ** 4) - 3
235
223
  end
236
224
  end
237
225
 
238
226
  def average_deviation_population m=nil
239
- type == :numeric or raise TypeError, 'Vector must be numeric'
227
+ must_be_numeric!
240
228
  m ||= mean
241
- (@data.inject(0) { |memo, val|
242
- @missing_values.key?(val) ? memo : (val - m).abs + memo
243
- }).quo(n_valid)
229
+ reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
230
+ (val - m).abs + memo
231
+ }.quo(size - count_values(*Daru::MISSING_VALUES))
244
232
  end
245
233
 
246
234
  # Returns the value of the percentile q
@@ -254,31 +242,13 @@ module Daru
254
242
  #
255
243
  # This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
256
244
  def percentile(q, strategy=:midpoint)
257
- sorted = only_valid(:array).sort
258
-
259
245
  case strategy
260
246
  when :midpoint
261
- v = (n_valid * q).quo(100)
262
- if v.to_i!=v
263
- sorted[v.to_i]
264
- else
265
- (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
266
- end
247
+ midpoint_percentile(q)
267
248
  when :linear
268
- index = (q / 100.0) * (n_valid + 1)
269
-
270
- k = index.truncate
271
- d = index % 1
272
-
273
- if k == 0
274
- sorted[0]
275
- elsif k >= sorted.size
276
- sorted[-1]
277
- else
278
- sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
279
- end
249
+ linear_percentile(q)
280
250
  else
281
- raise NotImplementedError, "Unknown strategy #{strategy}"
251
+ raise ArgumentError, "Unknown strategy #{strategy}"
282
252
  end
283
253
  end
284
254
 
@@ -313,17 +283,18 @@ module Daru
313
283
  def standardize use_population=false
314
284
  m ||= mean
315
285
  sd = use_population ? sdp : sds
316
- return Daru::Vector.new([nil]*@size) if m.nil? || sd == 0.0
286
+ return Daru::Vector.new([nil]*size) if m.nil? || sd == 0.0
317
287
 
318
288
  vector_standardized_compute m, sd
319
289
  end
320
290
 
291
+ # :nocov:
321
292
  def box_cox_transformation lambda # :nodoc:
322
- raise 'Should be a numeric' unless @type == :numeric
293
+ must_be_numeric!
323
294
 
324
295
  recode do |x|
325
296
  if !x.nil?
326
- if lambda == 0
297
+ if lambda.zero?
327
298
  Math.log(x)
328
299
  else
329
300
  (x ** lambda - 1).quo(lambda)
@@ -333,10 +304,11 @@ module Daru
333
304
  end
334
305
  end
335
306
  end
307
+ # :nocov:
336
308
 
337
309
  # Replace each non-nil value in the vector with its percentile.
338
310
  def vector_percentile
339
- c = size - missing_positions.size
311
+ c = size - indexes(*Daru::MISSING_VALUES).size
340
312
  ranked.recode! { |i| i.nil? ? nil : (i.quo(c)*100).to_f }
341
313
  end
342
314
 
@@ -367,7 +339,7 @@ module Daru
367
339
  if @data.respond_to? :sample_with_replacement
368
340
  @data.sample_with_replacement sample
369
341
  else
370
- valid = missing_positions.empty? ? self : only_valid
342
+ valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
371
343
  vds = valid.size
372
344
  (0...sample).collect { valid[rand(vds)] }
373
345
  end
@@ -383,17 +355,7 @@ module Daru
383
355
  if @data.respond_to? :sample_without_replacement
384
356
  @data.sample_without_replacement sample
385
357
  else
386
- valid = missing_positions.empty? ? self : only_valid
387
- raise ArgumentError, "Sample size couldn't be greater than n" if
388
- sample > valid.size
389
- out = []
390
- size = valid.size
391
- while out.size < sample
392
- value = rand(size)
393
- out.push(value) unless out.include?(value)
394
- end
395
-
396
- out.collect { |i| valid[i] }
358
+ raw_sample_without_replacement(sample)
397
359
  end
398
360
  end
399
361
 
@@ -407,7 +369,7 @@ module Daru
407
369
  # vector = Daru::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
408
370
  # vector.percent_change
409
371
  # #=>
410
- # # <Daru::Vector:28713060 @name = nil @size: 5 >
372
+ # # <Daru::Vector:28713060 @name = nil size: 5 >
411
373
  # # nil
412
374
  # # a
413
375
  # # f 0.5
@@ -415,19 +377,19 @@ module Daru
415
377
  # # i 0.3333333333333333
416
378
  # # k 0.25
417
379
  def percent_change periods=1
418
- type == :numeric or raise TypeError, 'Vector must be numeric'
419
- value = only_valid
420
- arr = []
421
- i = 1
422
- ind = @data.find_index { |x| !x.nil? }
423
- (periods...size).each do |j|
424
- if j==ind || @missing_values.key?(@data[j])
425
- arr[j] = nil
380
+ must_be_numeric!
381
+
382
+ prev = nil
383
+ arr = @data.each_with_index.map do |cur, i|
384
+ if i < periods ||
385
+ include_with_nan?(Daru::MISSING_VALUES, cur) ||
386
+ include_with_nan?(Daru::MISSING_VALUES, prev)
387
+ nil
426
388
  else
427
- arr[j] = (value.data[i] - value.data[i - 1]) / value.data[i - 1].to_f
428
- i+=1
429
- end
389
+ (cur - prev) / prev.to_f
390
+ end.tap { prev = cur if cur }
430
391
  end
392
+
431
393
  Daru::Vector.new(arr, index: @index, name: @name)
432
394
  end
433
395
 
@@ -533,7 +495,7 @@ module Daru
533
495
  # ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
534
496
  #
535
497
  # @return [Daru::Vector] Contains EMA
536
- def ema(n=10, wilder=false)
498
+ def ema(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
537
499
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
538
500
  # need to start everything from the first non-nil observation
539
501
  start = @data.index { |i| !i.nil? }
@@ -567,7 +529,7 @@ module Daru
567
529
  # ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
568
530
  #
569
531
  # @return [Daru::Vector] contains EMV
570
- def emv(n=10, wilder=false)
532
+ def emv(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
571
533
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
572
534
  # need to start everything from the first non-nil observation
573
535
  start = @data.index { |i| !i.nil? }
@@ -651,7 +613,7 @@ module Daru
651
613
  max_lags ||= (10 * Math.log10(size)).to_i
652
614
 
653
615
  (0..max_lags).map do |i|
654
- if i == 0
616
+ if i.zero?
655
617
  1.0
656
618
  else
657
619
  m = mean
@@ -672,7 +634,7 @@ module Daru
672
634
  # == Returns
673
635
  #
674
636
  # Autocovariance value
675
- def acvf(demean=true, unbiased=true)
637
+ def acvf(demean=true, unbiased=true) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
676
638
  opts = {
677
639
  demean: true,
678
640
  unbaised: true
@@ -700,7 +662,7 @@ module Daru
700
662
  result = []
701
663
  acc = 0
702
664
  @data.each do |d|
703
- if @missing_values.key?(d)
665
+ if include_with_nan? Daru::MISSING_VALUES, d
704
666
  result << nil
705
667
  else
706
668
  acc += d
@@ -722,6 +684,66 @@ module Daru
722
684
  alias :ss :sum_of_squares
723
685
  alias :percentil :percentile
724
686
  alias :se :standard_error
687
+
688
+ private
689
+
690
+ def must_be_numeric!
691
+ numeric? or raise TypeError, 'Vector must be numeric'
692
+ end
693
+
694
+ def covariance_sum other
695
+ self_mean = mean
696
+ other_mean = other.mean
697
+ @data
698
+ .zip(other.data).inject(0) do |res, (d, o)|
699
+ res + if !d || !o
700
+ 0
701
+ else
702
+ (d - self_mean) * (o - other_mean)
703
+ end
704
+ end
705
+ end
706
+
707
+ def midpoint_percentile(q) # rubocop:disable Metrics/AbcSize
708
+ sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort
709
+
710
+ v = ((size - count_values(*Daru::MISSING_VALUES)) * q).quo(100)
711
+ if v.to_i!=v
712
+ sorted[v.to_i]
713
+ else
714
+ (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
715
+ end
716
+ end
717
+
718
+ def linear_percentile(q) # rubocop:disable Metrics/AbcSize
719
+ sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort
720
+ index = (q / 100.0) * ((size - count_values(*Daru::MISSING_VALUES)) + 1)
721
+
722
+ k = index.truncate
723
+ d = index % 1
724
+
725
+ if k.zero?
726
+ sorted[0]
727
+ elsif k >= sorted.size
728
+ sorted[-1]
729
+ else
730
+ sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
731
+ end
732
+ end
733
+
734
+ def raw_sample_without_replacement sample
735
+ valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
736
+ raise ArgumentError, "Sample size couldn't be greater than n" if
737
+ sample > valid.size
738
+ out = []
739
+ size = valid.size
740
+ while out.size < sample
741
+ value = rand(size)
742
+ out.push(value) unless out.include?(value)
743
+ end
744
+
745
+ out.collect { |i| valid[i] }
746
+ end
725
747
  end
726
748
  end
727
749
  end