daru 0.1.3.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -0,0 +1,27 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan="2">Daru::Vector(<%= size %>)<%= ':category' if category? %></th>
4
+ </tr>
5
+ <% if name %>
6
+ <tr>
7
+ <th> </th>
8
+ <th><%= name %></th>
9
+ </tr>
10
+ <% end %>
11
+
12
+ <% @index.each_with_index.first(threshold).each do |index, pos| %>
13
+ <tr>
14
+ <td><%= index %></td>
15
+ <td><%= self.at(pos) %></td>
16
+ </tr>
17
+ <% end %>
18
+
19
+ <% if size > threshold %>
20
+ <% last_index = @index.size-1 %>
21
+ <tr><td>...</td><td>...</td></tr>
22
+ <tr>
23
+ <td><%= last_index %></td>
24
+ <td><%= self.at last_index %></td>
25
+ </tr>
26
+ <% end %>
27
+ </table>
@@ -0,0 +1,36 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan="<%= index.width+1 %>">Daru::Vector(<%= size %>)<%= ':category' if category? %></th>
4
+ </tr>
5
+ <% if name %>
6
+ <tr>
7
+ <th colspan="<%= index.width %>"> </th>
8
+ <th><%= name %></th>
9
+ </tr>
10
+ <% end %>
11
+
12
+ <% Daru::IRuby::Helpers.tuples_with_rowspans(@index).first(threshold).zip(to_a).each do |tuple, value| %>
13
+ <tr>
14
+ <% tuple.each do |idx, span| %>
15
+ <th rowspan="<%= span %>"><%= idx %></th>
16
+ <% end %>
17
+ <td><%= value %></td>
18
+ </tr>
19
+ <% end %>
20
+
21
+ <% if size > threshold %>
22
+ <% last_index = @index.to_a.last %>
23
+ <tr>
24
+ <% last_index.size.times do %>
25
+ <th>...</th>
26
+ <% end %>
27
+ <td>...</td>
28
+ </tr>
29
+ <tr>
30
+ <% last_index.each do |idx| %>
31
+ <th><%= idx %></td>
32
+ <% end %>
33
+ <td><%= self[last_index] %></td>
34
+ </tr>
35
+ <% end %>
36
+ </table>
@@ -62,30 +62,28 @@ module Daru
62
62
  all_vectors = (vectors.to_a | other.vectors.to_a).sort
63
63
  all_indexes = (index.to_a | other.index.to_a).sort
64
64
 
65
- hsh = {}
66
- all_vectors.each do |vector_name|
67
- this = has_vector?(vector_name) ? self[vector_name] : nil
68
- that = other.has_vector?(vector_name) ? other[vector_name] : nil
69
-
70
- hsh[vector_name] =
71
- if this && that
72
- this.send(operation, that)
73
- else
74
- Daru::Vector.new([], index: all_indexes, name: vector_name)
75
- end
76
- end
65
+ hsh =
66
+ all_vectors.map do |vector_name|
67
+ vector = dataframe_binary_operation_on_vectors other, vector_name, operation, all_indexes
68
+
69
+ [vector_name, vector]
70
+ end.to_h
77
71
 
78
72
  Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
79
73
  end
80
74
 
81
- def scalar_binary_operation operation, other
82
- clone = dup
83
- clone.map_vectors! do |vector|
84
- vector = vector.send(operation, other) if vector.type == :numeric
85
- vector
75
+ def dataframe_binary_operation_on_vectors other, name, operation, indexes
76
+ if has_vector?(name) && other.has_vector?(name)
77
+ self[name].send(operation, other[name])
78
+ else
79
+ Daru::Vector.new([], index: indexes, name: name)
86
80
  end
81
+ end
87
82
 
88
- clone
83
+ def scalar_binary_operation operation, other
84
+ dup.map_vectors! do |vector|
85
+ vector.numeric? ? vector.send(operation, other) : vector
86
+ end
89
87
  end
90
88
  end
91
89
  end
@@ -63,19 +63,17 @@ module Daru
63
63
  end
64
64
 
65
65
  def v2v_binary operation, other
66
- common_idxs = []
67
- elements = []
66
+ # FIXME: why the sorting?.. - zverok, 2016-05-18
68
67
  index = (@index.to_a | other.index.to_a).sort
69
68
 
70
- index.each do |idx|
69
+ elements = index.map do |idx|
71
70
  this = self.index.include?(idx) ? self[idx] : nil
72
71
  that = other.index.include?(idx) ? other[idx] : nil
73
72
 
74
- elements << (this && that ? this.send(operation, that) : nil)
75
- common_idxs << idx
73
+ this && that ? this.send(operation, that) : nil
76
74
  end
77
75
 
78
- Daru::Vector.new(elements, name: @name, index: common_idxs)
76
+ Daru::Vector.new(elements, name: @name, index: index)
79
77
  end
80
78
  end
81
79
  end
@@ -132,17 +132,15 @@ module Daru
132
132
 
133
133
  # Calculate sample variance-covariance between the numeric vectors.
134
134
  def covariance
135
- cache={}
135
+ cache = Hash.new do |h, (col, row)|
136
+ h[[col, row]] = vector_cov(self[row],self[col])
137
+ end
136
138
  vectors = numeric_vectors
137
139
 
138
140
  mat_rows = vectors.collect do |row|
139
141
  vectors.collect do |col|
140
142
  if row == col
141
143
  self[row].variance
142
- elsif cache[[col,row]].nil?
143
- cov = vector_cov(self[row],self[col])
144
- cache[[row,col]] = cov
145
- cov
146
144
  else
147
145
  cache[[col,row]]
148
146
  end
@@ -170,16 +168,11 @@ module Daru
170
168
  private
171
169
 
172
170
  def apply_method_to_numerics method, *args
173
- order = []
174
- computed = @vectors.to_a.each_with_object([]) do |n, memo|
175
- v = @data[@vectors[n]]
176
- if v.type == :numeric
177
- memo << v.send(method, *args)
178
- order << n
179
- end
180
- end
171
+ numerics = @vectors.to_a.map { |n| [n, @data[@vectors[n]]] }
172
+ .select { |_n, v| v.numeric? }
173
+ computed = numerics.map { |_n, v| v.send(method, *args) }
181
174
 
182
- Daru::DataFrame.new(computed, index: @index, order: order,clone: false)
175
+ Daru::DataFrame.new(computed, index: @index, order: numerics.map(&:first), clone: false)
183
176
  end
184
177
 
185
178
  def vector_cov v1a, v2a
@@ -187,7 +180,7 @@ module Daru
187
180
  end
188
181
 
189
182
  def sum_of_squares v1, v2
190
- v1a,v2a = v1.only_valid,v2.only_valid
183
+ v1a,v2a = v1.reject_values(*Daru::MISSING_VALUES),v2.reject_values(*Daru::MISSING_VALUES)
191
184
  v1a.reset_index!
192
185
  v2a.reset_index!
193
186
  m1 = v1a.mean
@@ -4,7 +4,7 @@ module Daru
4
4
  # is done inside the wrapper, so that native methods can be used for most of
5
5
  # the computationally intensive tasks.
6
6
  module Statistics
7
- module Vector
7
+ module Vector # rubocop:disable Metrics/ModuleLength
8
8
  def mean
9
9
  @data.mean
10
10
  end
@@ -55,16 +55,16 @@ module Daru
55
55
  alias :mad :median_absolute_deviation
56
56
 
57
57
  def standard_error
58
- standard_deviation_sample/Math.sqrt(n_valid)
58
+ standard_deviation_sample/Math.sqrt(size - count_values(*Daru::MISSING_VALUES))
59
59
  end
60
60
 
61
61
  def sum_of_squared_deviation
62
- (@data.inject(0) { |a,x| x.square + a } - sum.square.quo(n_valid).to_f).to_f
62
+ (@data.inject(0) { |a,x| x**2 + a } - (sum**2).quo(size - count_values(*Daru::MISSING_VALUES)).to_f).to_f
63
63
  end
64
64
 
65
65
  # Retrieve unique values of non-nil data
66
66
  def factors
67
- only_valid.uniq.reset_index!
67
+ reject_values(*Daru::MISSING_VALUES).uniq.reset_index!
68
68
  end
69
69
 
70
70
  # Maximum element of the vector.
@@ -98,15 +98,17 @@ module Daru
98
98
  end
99
99
 
100
100
  def proportions
101
- len = n_valid
102
- frequencies.each_with_object({}) { |arr, hash| hash[arr[0]] = arr[1] / len }
101
+ len = size - count_values(*Daru::MISSING_VALUES)
102
+ frequencies.each_with_object({}) do |(el, count), hash|
103
+ hash[el] = count / len
104
+ end
103
105
  end
104
106
 
105
107
  def ranked
106
108
  sum = 0
107
- r = frequencies.sort.each_with_object({}) do |val, memo|
108
- memo[val[0]] = ((sum + 1) + (sum + val[1])).quo(2)
109
- sum += val[1]
109
+ r = frequencies.sort.each_with_object({}) do |(el, count), memo|
110
+ memo[el] = ((sum + 1) + (sum + count)).quo(2)
111
+ sum += count
110
112
  end
111
113
 
112
114
  recode { |e| r[e] }
@@ -120,29 +122,27 @@ module Daru
120
122
  # retrieves number of instances where block returns true. If other
121
123
  # values given, retrieves the frequency for this value. If no value
122
124
  # given, counts the number of non-nil elements in the Vector.
123
- def count value=false
125
+ def count value=false, &block
124
126
  if block_given?
125
- @data.select { |val| yield(val) }.count
127
+ @data.select(&block).count
126
128
  elsif value
127
- val = frequencies[value]
128
- val.nil? ? 0 : val
129
+ count { |val| val == value }
129
130
  else
130
- size - @missing_positions.size
131
+ size - indexes(*Daru::MISSING_VALUES).size
131
132
  end
132
133
  end
133
134
 
134
135
  # Count number of occurrences of each value in the Vector
135
136
  def value_counts
136
- values = {}
137
- @data.each do |d|
138
- values[d] ? values[d] += 1 : values[d] = 1
137
+ values = @data.each_with_object(Hash.new(0)) do |d, memo|
138
+ memo[d] += 1
139
139
  end
140
140
 
141
141
  Daru::Vector.new(values)
142
142
  end
143
143
 
144
144
  def proportion value=1
145
- frequencies[value].quo(n_valid).to_f
145
+ frequencies[value].quo(size - count_values(*Daru::MISSING_VALUES)).to_f
146
146
  end
147
147
 
148
148
  # Sample variance with denominator (N-1)
@@ -151,7 +151,7 @@ module Daru
151
151
  if @data.respond_to? :variance_sample
152
152
  @data.variance_sample m
153
153
  else
154
- sum_of_squares(m).quo(n_valid - 1)
154
+ sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES) - 1)
155
155
  end
156
156
  end
157
157
 
@@ -161,38 +161,26 @@ module Daru
161
161
  if @data.respond_to? :variance_population
162
162
  @data.variance_population m
163
163
  else
164
- sum_of_squares(m).quo(n_valid).to_f
164
+ sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES)).to_f
165
165
  end
166
166
  end
167
167
 
168
168
  # Sample covariance with denominator (N-1)
169
169
  def covariance_sample other
170
- @size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
171
- mean_x = mean
172
- mean_y = other.mean
173
- sum = 0
174
- (0...size).each do |i|
175
- sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
176
- end
177
- sum / (n_valid - 1)
170
+ size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
171
+ covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES) - 1)
178
172
  end
179
173
 
180
174
  # Population covariance with denominator (N)
181
175
  def covariance_population other
182
- @size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
183
- mean_x = mean
184
- mean_y = other.mean
185
- sum = 0
186
- (0...size).each do |i|
187
- sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
188
- end
189
- sum / n_valid
176
+ size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
177
+ covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES))
190
178
  end
191
179
 
192
180
  def sum_of_squares(m=nil)
193
181
  m ||= mean
194
- @data.inject(0) { |memo, val|
195
- @missing_values.key?(val) ? memo : (memo + (val - m)**2)
182
+ reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
183
+ memo + (val - m)**2
196
184
  }
197
185
  end
198
186
 
@@ -221,7 +209,7 @@ module Daru
221
209
  else
222
210
  m ||= mean
223
211
  th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
224
- th.quo((@size - @missing_positions.size) * (standard_deviation_sample(m)**3))
212
+ th.quo((size - indexes(*Daru::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
225
213
  end
226
214
  end
227
215
 
@@ -231,16 +219,16 @@ module Daru
231
219
  else
232
220
  m ||= mean
233
221
  fo = @data.inject(0) { |a, x| a + ((x - m) ** 4) }
234
- fo.quo((@size - @missing_positions.size) * standard_deviation_sample(m) ** 4) - 3
222
+ fo.quo((size - indexes(*Daru::MISSING_VALUES).size) * standard_deviation_sample(m) ** 4) - 3
235
223
  end
236
224
  end
237
225
 
238
226
  def average_deviation_population m=nil
239
- type == :numeric or raise TypeError, 'Vector must be numeric'
227
+ must_be_numeric!
240
228
  m ||= mean
241
- (@data.inject(0) { |memo, val|
242
- @missing_values.key?(val) ? memo : (val - m).abs + memo
243
- }).quo(n_valid)
229
+ reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
230
+ (val - m).abs + memo
231
+ }.quo(size - count_values(*Daru::MISSING_VALUES))
244
232
  end
245
233
 
246
234
  # Returns the value of the percentile q
@@ -254,31 +242,13 @@ module Daru
254
242
  #
255
243
  # This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
256
244
  def percentile(q, strategy=:midpoint)
257
- sorted = only_valid(:array).sort
258
-
259
245
  case strategy
260
246
  when :midpoint
261
- v = (n_valid * q).quo(100)
262
- if v.to_i!=v
263
- sorted[v.to_i]
264
- else
265
- (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
266
- end
247
+ midpoint_percentile(q)
267
248
  when :linear
268
- index = (q / 100.0) * (n_valid + 1)
269
-
270
- k = index.truncate
271
- d = index % 1
272
-
273
- if k == 0
274
- sorted[0]
275
- elsif k >= sorted.size
276
- sorted[-1]
277
- else
278
- sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
279
- end
249
+ linear_percentile(q)
280
250
  else
281
- raise NotImplementedError, "Unknown strategy #{strategy}"
251
+ raise ArgumentError, "Unknown strategy #{strategy}"
282
252
  end
283
253
  end
284
254
 
@@ -313,17 +283,18 @@ module Daru
313
283
  def standardize use_population=false
314
284
  m ||= mean
315
285
  sd = use_population ? sdp : sds
316
- return Daru::Vector.new([nil]*@size) if m.nil? || sd == 0.0
286
+ return Daru::Vector.new([nil]*size) if m.nil? || sd == 0.0
317
287
 
318
288
  vector_standardized_compute m, sd
319
289
  end
320
290
 
291
+ # :nocov:
321
292
  def box_cox_transformation lambda # :nodoc:
322
- raise 'Should be a numeric' unless @type == :numeric
293
+ must_be_numeric!
323
294
 
324
295
  recode do |x|
325
296
  if !x.nil?
326
- if lambda == 0
297
+ if lambda.zero?
327
298
  Math.log(x)
328
299
  else
329
300
  (x ** lambda - 1).quo(lambda)
@@ -333,10 +304,11 @@ module Daru
333
304
  end
334
305
  end
335
306
  end
307
+ # :nocov:
336
308
 
337
309
  # Replace each non-nil value in the vector with its percentile.
338
310
  def vector_percentile
339
- c = size - missing_positions.size
311
+ c = size - indexes(*Daru::MISSING_VALUES).size
340
312
  ranked.recode! { |i| i.nil? ? nil : (i.quo(c)*100).to_f }
341
313
  end
342
314
 
@@ -367,7 +339,7 @@ module Daru
367
339
  if @data.respond_to? :sample_with_replacement
368
340
  @data.sample_with_replacement sample
369
341
  else
370
- valid = missing_positions.empty? ? self : only_valid
342
+ valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
371
343
  vds = valid.size
372
344
  (0...sample).collect { valid[rand(vds)] }
373
345
  end
@@ -383,17 +355,7 @@ module Daru
383
355
  if @data.respond_to? :sample_without_replacement
384
356
  @data.sample_without_replacement sample
385
357
  else
386
- valid = missing_positions.empty? ? self : only_valid
387
- raise ArgumentError, "Sample size couldn't be greater than n" if
388
- sample > valid.size
389
- out = []
390
- size = valid.size
391
- while out.size < sample
392
- value = rand(size)
393
- out.push(value) unless out.include?(value)
394
- end
395
-
396
- out.collect { |i| valid[i] }
358
+ raw_sample_without_replacement(sample)
397
359
  end
398
360
  end
399
361
 
@@ -407,7 +369,7 @@ module Daru
407
369
  # vector = Daru::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
408
370
  # vector.percent_change
409
371
  # #=>
410
- # # <Daru::Vector:28713060 @name = nil @size: 5 >
372
+ # # <Daru::Vector:28713060 @name = nil size: 5 >
411
373
  # # nil
412
374
  # # a
413
375
  # # f 0.5
@@ -415,19 +377,19 @@ module Daru
415
377
  # # i 0.3333333333333333
416
378
  # # k 0.25
417
379
  def percent_change periods=1
418
- type == :numeric or raise TypeError, 'Vector must be numeric'
419
- value = only_valid
420
- arr = []
421
- i = 1
422
- ind = @data.find_index { |x| !x.nil? }
423
- (periods...size).each do |j|
424
- if j==ind || @missing_values.key?(@data[j])
425
- arr[j] = nil
380
+ must_be_numeric!
381
+
382
+ prev = nil
383
+ arr = @data.each_with_index.map do |cur, i|
384
+ if i < periods ||
385
+ include_with_nan?(Daru::MISSING_VALUES, cur) ||
386
+ include_with_nan?(Daru::MISSING_VALUES, prev)
387
+ nil
426
388
  else
427
- arr[j] = (value.data[i] - value.data[i - 1]) / value.data[i - 1].to_f
428
- i+=1
429
- end
389
+ (cur - prev) / prev.to_f
390
+ end.tap { prev = cur if cur }
430
391
  end
392
+
431
393
  Daru::Vector.new(arr, index: @index, name: @name)
432
394
  end
433
395
 
@@ -533,7 +495,7 @@ module Daru
533
495
  # ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
534
496
  #
535
497
  # @return [Daru::Vector] Contains EMA
536
- def ema(n=10, wilder=false)
498
+ def ema(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
537
499
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
538
500
  # need to start everything from the first non-nil observation
539
501
  start = @data.index { |i| !i.nil? }
@@ -567,7 +529,7 @@ module Daru
567
529
  # ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
568
530
  #
569
531
  # @return [Daru::Vector] contains EMV
570
- def emv(n=10, wilder=false)
532
+ def emv(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
571
533
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
572
534
  # need to start everything from the first non-nil observation
573
535
  start = @data.index { |i| !i.nil? }
@@ -651,7 +613,7 @@ module Daru
651
613
  max_lags ||= (10 * Math.log10(size)).to_i
652
614
 
653
615
  (0..max_lags).map do |i|
654
- if i == 0
616
+ if i.zero?
655
617
  1.0
656
618
  else
657
619
  m = mean
@@ -672,7 +634,7 @@ module Daru
672
634
  # == Returns
673
635
  #
674
636
  # Autocovariance value
675
- def acvf(demean=true, unbiased=true)
637
+ def acvf(demean=true, unbiased=true) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
676
638
  opts = {
677
639
  demean: true,
678
640
  unbaised: true
@@ -700,7 +662,7 @@ module Daru
700
662
  result = []
701
663
  acc = 0
702
664
  @data.each do |d|
703
- if @missing_values.key?(d)
665
+ if include_with_nan? Daru::MISSING_VALUES, d
704
666
  result << nil
705
667
  else
706
668
  acc += d
@@ -722,6 +684,66 @@ module Daru
722
684
  alias :ss :sum_of_squares
723
685
  alias :percentil :percentile
724
686
  alias :se :standard_error
687
+
688
+ private
689
+
690
+ def must_be_numeric!
691
+ numeric? or raise TypeError, 'Vector must be numeric'
692
+ end
693
+
694
+ def covariance_sum other
695
+ self_mean = mean
696
+ other_mean = other.mean
697
+ @data
698
+ .zip(other.data).inject(0) do |res, (d, o)|
699
+ res + if !d || !o
700
+ 0
701
+ else
702
+ (d - self_mean) * (o - other_mean)
703
+ end
704
+ end
705
+ end
706
+
707
+ def midpoint_percentile(q) # rubocop:disable Metrics/AbcSize
708
+ sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort
709
+
710
+ v = ((size - count_values(*Daru::MISSING_VALUES)) * q).quo(100)
711
+ if v.to_i!=v
712
+ sorted[v.to_i]
713
+ else
714
+ (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
715
+ end
716
+ end
717
+
718
+ def linear_percentile(q) # rubocop:disable Metrics/AbcSize
719
+ sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort
720
+ index = (q / 100.0) * ((size - count_values(*Daru::MISSING_VALUES)) + 1)
721
+
722
+ k = index.truncate
723
+ d = index % 1
724
+
725
+ if k.zero?
726
+ sorted[0]
727
+ elsif k >= sorted.size
728
+ sorted[-1]
729
+ else
730
+ sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
731
+ end
732
+ end
733
+
734
+ def raw_sample_without_replacement sample
735
+ valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
736
+ raise ArgumentError, "Sample size couldn't be greater than n" if
737
+ sample > valid.size
738
+ out = []
739
+ size = valid.size
740
+ while out.size < sample
741
+ value = rand(size)
742
+ out.push(value) unless out.include?(value)
743
+ end
744
+
745
+ out.collect { |i| valid[i] }
746
+ end
725
747
  end
726
748
  end
727
749
  end