daru 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
@@ -1,7 +1,7 @@
|
|
1
1
|
module Daru
|
2
2
|
module Maths
|
3
3
|
# Encapsulates statistics methods for vectors. Most basic stuff like mean, etc.
|
4
|
-
# is done inside the wrapper, so that native methods can be used for most of
|
4
|
+
# is done inside the wrapper, so that native methods can be used for most of
|
5
5
|
# the computationally intensive tasks.
|
6
6
|
module Statistics
|
7
7
|
module Vector
|
@@ -30,22 +30,36 @@ module Daru
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def mode
|
33
|
-
|
34
|
-
|
33
|
+
frequencies.max { |a,b| a[1]<=>b[1] }.first
|
34
|
+
end
|
35
|
+
|
36
|
+
# Create a summary of count, mean, standard deviation, min and max of
|
37
|
+
# the vector in one shot.
|
38
|
+
#
|
39
|
+
# == Arguments
|
40
|
+
#
|
41
|
+
# +methods+ - An array with aggregation methods specified as symbols to
|
42
|
+
# be applied to vectors. Default is [:count, :mean, :std, :max,
|
43
|
+
# :min]. Methods will be applied in the specified order.
|
44
|
+
def describe methods=nil
|
45
|
+
methods ||= [:count, :mean, :std, :min, :max]
|
46
|
+
description = methods.map { |m| send(m) }
|
47
|
+
Daru::Vector.new(description, index: methods, name: :statistics)
|
35
48
|
end
|
36
49
|
|
37
50
|
def median_absolute_deviation
|
38
51
|
m = median
|
39
|
-
recode {|val| (val - m).abs }.median
|
52
|
+
recode { |val| (val - m).abs }.median
|
40
53
|
end
|
54
|
+
|
41
55
|
alias :mad :median_absolute_deviation
|
42
56
|
|
43
57
|
def standard_error
|
44
|
-
standard_deviation_sample/
|
58
|
+
standard_deviation_sample/Math.sqrt(n_valid)
|
45
59
|
end
|
46
60
|
|
47
61
|
def sum_of_squared_deviation
|
48
|
-
(@data.inject(0) { |a,x| x.square + a } -
|
62
|
+
(@data.inject(0) { |a,x| x.square + a } - sum.square.quo(n_valid).to_f).to_f
|
49
63
|
end
|
50
64
|
|
51
65
|
# Retrieve unique values of non-nil data
|
@@ -54,7 +68,7 @@ module Daru
|
|
54
68
|
end
|
55
69
|
|
56
70
|
# Maximum element of the vector.
|
57
|
-
#
|
71
|
+
#
|
58
72
|
# @param return_type [Symbol] Data type of the returned value. Defaults
|
59
73
|
# to returning only the maximum number but passing *:vector* will return
|
60
74
|
# a Daru::Vector with the index of the corresponding maximum value.
|
@@ -74,12 +88,8 @@ module Daru
|
|
74
88
|
end
|
75
89
|
|
76
90
|
def frequencies
|
77
|
-
@data.
|
78
|
-
unless element.nil?
|
79
|
-
hash[element] ||= 0
|
80
|
-
hash[element] += 1
|
81
|
-
end
|
82
|
-
hash
|
91
|
+
@data.each_with_object(Hash.new(0)) do |element, hash|
|
92
|
+
hash[element] += 1 unless element.nil?
|
83
93
|
end
|
84
94
|
end
|
85
95
|
|
@@ -89,15 +99,14 @@ module Daru
|
|
89
99
|
|
90
100
|
def proportions
|
91
101
|
len = n_valid
|
92
|
-
frequencies.
|
102
|
+
frequencies.each_with_object({}) { |arr, hash| hash[arr[0]] = arr[1] / len }
|
93
103
|
end
|
94
104
|
|
95
105
|
def ranked
|
96
106
|
sum = 0
|
97
|
-
r = frequencies.sort.
|
107
|
+
r = frequencies.sort.each_with_object({}) do |val, memo|
|
98
108
|
memo[val[0]] = ((sum + 1) + (sum + val[1])).quo(2)
|
99
109
|
sum += val[1]
|
100
|
-
memo
|
101
110
|
end
|
102
111
|
|
103
112
|
recode { |e| r[e] }
|
@@ -107,13 +116,13 @@ module Daru
|
|
107
116
|
standard_deviation_sample / mean
|
108
117
|
end
|
109
118
|
|
110
|
-
# Retrieves number of cases which comply condition. If block given,
|
111
|
-
# retrieves number of instances where block returns true. If other
|
119
|
+
# Retrieves number of cases which comply condition. If block given,
|
120
|
+
# retrieves number of instances where block returns true. If other
|
112
121
|
# values given, retrieves the frequency for this value. If no value
|
113
122
|
# given, counts the number of non-nil elements in the Vector.
|
114
123
|
def count value=false
|
115
124
|
if block_given?
|
116
|
-
@data.
|
125
|
+
@data.select { |val| yield(val) }.count
|
117
126
|
elsif value
|
118
127
|
val = frequencies[value]
|
119
128
|
val.nil? ? 0 : val
|
@@ -122,7 +131,7 @@ module Daru
|
|
122
131
|
end
|
123
132
|
end
|
124
133
|
|
125
|
-
# Count number of
|
134
|
+
# Count number of occurrences of each value in the Vector
|
126
135
|
def value_counts
|
127
136
|
values = {}
|
128
137
|
@data.each do |d|
|
@@ -138,11 +147,11 @@ module Daru
|
|
138
147
|
|
139
148
|
# Sample variance with denominator (N-1)
|
140
149
|
def variance_sample m=nil
|
141
|
-
m ||=
|
150
|
+
m ||= mean
|
142
151
|
if @data.respond_to? :variance_sample
|
143
152
|
@data.variance_sample m
|
144
153
|
else
|
145
|
-
sum_of_squares(m).quo(
|
154
|
+
sum_of_squares(m).quo(n_valid - 1)
|
146
155
|
end
|
147
156
|
end
|
148
157
|
|
@@ -152,14 +161,38 @@ module Daru
|
|
152
161
|
if @data.respond_to? :variance_population
|
153
162
|
@data.variance_population m
|
154
163
|
else
|
155
|
-
sum_of_squares(m).quo(
|
164
|
+
sum_of_squares(m).quo(n_valid).to_f
|
156
165
|
end
|
157
166
|
end
|
158
167
|
|
168
|
+
# Sample covariance with denominator (N-1)
|
169
|
+
def covariance_sample other
|
170
|
+
@size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
171
|
+
mean_x = mean
|
172
|
+
mean_y = other.mean
|
173
|
+
sum = 0
|
174
|
+
(0...size).each do |i|
|
175
|
+
sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
|
176
|
+
end
|
177
|
+
sum / (n_valid - 1)
|
178
|
+
end
|
179
|
+
|
180
|
+
# Population covariance with denominator (N)
|
181
|
+
def covariance_population other
|
182
|
+
@size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
183
|
+
mean_x = mean
|
184
|
+
mean_y = other.mean
|
185
|
+
sum = 0
|
186
|
+
(0...size).each do |i|
|
187
|
+
sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
|
188
|
+
end
|
189
|
+
sum / n_valid
|
190
|
+
end
|
191
|
+
|
159
192
|
def sum_of_squares(m=nil)
|
160
193
|
m ||= mean
|
161
|
-
@data.inject(0) { |memo, val|
|
162
|
-
@missing_values.
|
194
|
+
@data.inject(0) { |memo, val|
|
195
|
+
@missing_values.key?(val) ? memo : (memo + (val - m)**2)
|
163
196
|
}
|
164
197
|
end
|
165
198
|
|
@@ -168,7 +201,7 @@ module Daru
|
|
168
201
|
if @data.respond_to? :standard_deviation_population
|
169
202
|
@data.standard_deviation_population(m)
|
170
203
|
else
|
171
|
-
Math
|
204
|
+
Math.sqrt(variance_population(m))
|
172
205
|
end
|
173
206
|
end
|
174
207
|
|
@@ -177,7 +210,7 @@ module Daru
|
|
177
210
|
if @data.respond_to? :standard_deviation_sample
|
178
211
|
@data.standard_deviation_sample m
|
179
212
|
else
|
180
|
-
Math
|
213
|
+
Math.sqrt(variance_sample(m))
|
181
214
|
end
|
182
215
|
end
|
183
216
|
|
@@ -188,7 +221,7 @@ module Daru
|
|
188
221
|
else
|
189
222
|
m ||= mean
|
190
223
|
th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
|
191
|
-
th.quo
|
224
|
+
th.quo((@size - @missing_positions.size) * (standard_deviation_sample(m)**3))
|
192
225
|
end
|
193
226
|
end
|
194
227
|
|
@@ -197,17 +230,17 @@ module Daru
|
|
197
230
|
@data.kurtosis
|
198
231
|
else
|
199
232
|
m ||= mean
|
200
|
-
fo = @data.inject(0){ |a, x| a + ((x - m) ** 4) }
|
233
|
+
fo = @data.inject(0) { |a, x| a + ((x - m) ** 4) }
|
201
234
|
fo.quo((@size - @missing_positions.size) * standard_deviation_sample(m) ** 4) - 3
|
202
235
|
end
|
203
236
|
end
|
204
237
|
|
205
238
|
def average_deviation_population m=nil
|
206
|
-
type == :numeric or raise TypeError,
|
239
|
+
type == :numeric or raise TypeError, 'Vector must be numeric'
|
207
240
|
m ||= mean
|
208
|
-
(@data.inject(
|
209
|
-
@missing_values.
|
210
|
-
}).quo(
|
241
|
+
(@data.inject(0) { |memo, val|
|
242
|
+
@missing_values.key?(val) ? memo : (val - m).abs + memo
|
243
|
+
}).quo(n_valid)
|
211
244
|
end
|
212
245
|
|
213
246
|
# Returns the value of the percentile q
|
@@ -218,15 +251,15 @@ module Daru
|
|
218
251
|
# * :midpoint (Default): (a + b) / 2
|
219
252
|
# * :linear : a + (b - a) * d where d is the decimal part of the index between a and b.
|
220
253
|
# == References
|
221
|
-
#
|
254
|
+
#
|
222
255
|
# This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
|
223
|
-
def percentile(q, strategy
|
256
|
+
def percentile(q, strategy=:midpoint)
|
224
257
|
sorted = only_valid(:array).sort
|
225
258
|
|
226
259
|
case strategy
|
227
260
|
when :midpoint
|
228
261
|
v = (n_valid * q).quo(100)
|
229
|
-
if
|
262
|
+
if v.to_i!=v
|
230
263
|
sorted[v.to_i]
|
231
264
|
else
|
232
265
|
(sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
|
@@ -245,18 +278,18 @@ module Daru
|
|
245
278
|
sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
|
246
279
|
end
|
247
280
|
else
|
248
|
-
raise NotImplementedError
|
281
|
+
raise NotImplementedError, "Unknown strategy #{strategy}"
|
249
282
|
end
|
250
283
|
end
|
251
284
|
|
252
285
|
# Dichotomize the vector with 0 and 1, based on lowest value.
|
253
|
-
# If parameter is defined, this value and lower will be 0
|
286
|
+
# If parameter is defined, this value and lower will be 0
|
254
287
|
# and higher, 1.
|
255
|
-
def dichotomize(low
|
288
|
+
def dichotomize(low=nil)
|
256
289
|
low ||= factors.min
|
257
290
|
|
258
|
-
|
259
|
-
if x.nil?
|
291
|
+
recode do |x|
|
292
|
+
if x.nil?
|
260
293
|
nil
|
261
294
|
elsif x > low
|
262
295
|
1
|
@@ -272,25 +305,25 @@ module Daru
|
|
272
305
|
end
|
273
306
|
|
274
307
|
# Standardize data.
|
275
|
-
#
|
308
|
+
#
|
276
309
|
# == Arguments
|
277
|
-
#
|
310
|
+
#
|
278
311
|
# * use_population - Pass as *true* if you want to use population
|
279
312
|
# standard deviation instead of sample standard deviation.
|
280
313
|
def standardize use_population=false
|
281
314
|
m ||= mean
|
282
315
|
sd = use_population ? sdp : sds
|
283
|
-
return Daru::Vector.new([nil]*@size) if m.nil?
|
316
|
+
return Daru::Vector.new([nil]*@size) if m.nil? || sd == 0.0
|
284
317
|
|
285
318
|
vector_standardized_compute m, sd
|
286
319
|
end
|
287
320
|
|
288
321
|
def box_cox_transformation lambda # :nodoc:
|
289
|
-
raise
|
322
|
+
raise 'Should be a numeric' unless @type == :numeric
|
290
323
|
|
291
|
-
|
324
|
+
recode do |x|
|
292
325
|
if !x.nil?
|
293
|
-
if
|
326
|
+
if lambda == 0
|
294
327
|
Math.log(x)
|
295
328
|
else
|
296
329
|
(x ** lambda - 1).quo(lambda)
|
@@ -315,7 +348,7 @@ module Daru
|
|
315
348
|
index: index, name: name, dtype: dtype
|
316
349
|
end
|
317
350
|
end
|
318
|
-
|
351
|
+
|
319
352
|
def vector_centered_compute(m)
|
320
353
|
if @data.respond_to? :vector_centered_compute
|
321
354
|
@data.vector_centered_compute(m)
|
@@ -334,12 +367,12 @@ module Daru
|
|
334
367
|
if @data.respond_to? :sample_with_replacement
|
335
368
|
@data.sample_with_replacement sample
|
336
369
|
else
|
337
|
-
valid = missing_positions.empty? ? self :
|
370
|
+
valid = missing_positions.empty? ? self : only_valid
|
338
371
|
vds = valid.size
|
339
|
-
(0...sample).collect{ valid[rand(vds)] }
|
372
|
+
(0...sample).collect { valid[rand(vds)] }
|
340
373
|
end
|
341
374
|
end
|
342
|
-
|
375
|
+
|
343
376
|
# Returns an random sample of size n, without replacement,
|
344
377
|
# only with valid data.
|
345
378
|
#
|
@@ -350,18 +383,52 @@ module Daru
|
|
350
383
|
if @data.respond_to? :sample_without_replacement
|
351
384
|
@data.sample_without_replacement sample
|
352
385
|
else
|
353
|
-
valid = missing_positions.empty? ? self :
|
354
|
-
raise ArgumentError, "Sample size couldn't be greater than n" if
|
386
|
+
valid = missing_positions.empty? ? self : only_valid
|
387
|
+
raise ArgumentError, "Sample size couldn't be greater than n" if
|
355
388
|
sample > valid.size
|
356
389
|
out = []
|
357
390
|
size = valid.size
|
358
391
|
while out.size < sample
|
359
392
|
value = rand(size)
|
360
|
-
out.push(value)
|
393
|
+
out.push(value) unless out.include?(value)
|
361
394
|
end
|
362
395
|
|
363
|
-
out.collect{|i| valid[i]}
|
396
|
+
out.collect { |i| valid[i] }
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
# The percent_change method computes the percent change over
|
401
|
+
# the given number of periods.
|
402
|
+
#
|
403
|
+
# @param [Integer] periods (1) number of nils to insert at the beginning.
|
404
|
+
#
|
405
|
+
# @example
|
406
|
+
#
|
407
|
+
# vector = Daru::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
|
408
|
+
# vector.percent_change
|
409
|
+
# #=>
|
410
|
+
# # <Daru::Vector:28713060 @name = nil @size: 5 >
|
411
|
+
# # nil
|
412
|
+
# # a
|
413
|
+
# # f 0.5
|
414
|
+
# # t 0.0
|
415
|
+
# # i 0.3333333333333333
|
416
|
+
# # k 0.25
|
417
|
+
def percent_change periods=1
|
418
|
+
type == :numeric or raise TypeError, 'Vector must be numeric'
|
419
|
+
value = only_valid
|
420
|
+
arr = []
|
421
|
+
i = 1
|
422
|
+
ind = @data.find_index { |x| !x.nil? }
|
423
|
+
(periods...size).each do |j|
|
424
|
+
if j==ind || @missing_values.key?(@data[j])
|
425
|
+
arr[j] = nil
|
426
|
+
else
|
427
|
+
arr[j] = (value.data[i] - value.data[i - 1]) / value.data[i - 1].to_f
|
428
|
+
i+=1
|
429
|
+
end
|
364
430
|
end
|
431
|
+
Daru::Vector.new(arr, index: @index, name: @name)
|
365
432
|
end
|
366
433
|
|
367
434
|
# Performs the difference of the series.
|
@@ -372,7 +439,7 @@ module Daru
|
|
372
439
|
#
|
373
440
|
# == Arguments
|
374
441
|
#
|
375
|
-
|
442
|
+
# * *max_lags*: integer, (default: 1), number of differences reqd.
|
376
443
|
#
|
377
444
|
# @example Using #diff
|
378
445
|
#
|
@@ -382,7 +449,7 @@ module Daru
|
|
382
449
|
# ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
|
383
450
|
#
|
384
451
|
# @return [Daru::Vector]
|
385
|
-
def diff(max_lags
|
452
|
+
def diff(max_lags=1)
|
386
453
|
ts = self
|
387
454
|
difference = []
|
388
455
|
max_lags.times do
|
@@ -394,8 +461,8 @@ module Daru
|
|
394
461
|
|
395
462
|
# Calculate the rolling function for a loopback value.
|
396
463
|
#
|
397
|
-
# @param [Symbol] function The rolling function to be applied. Can be
|
398
|
-
# any function applicatble to Daru::Vector (:mean, :median, :count,
|
464
|
+
# @param [Symbol] function The rolling function to be applied. Can be
|
465
|
+
# any function applicatble to Daru::Vector (:mean, :median, :count,
|
399
466
|
# :min, :max, etc.)
|
400
467
|
# @param [Integer] n (10) A non-negative value which serves as the loopback length.
|
401
468
|
# @return [Daru::Vector] Vector containin rolling calculations.
|
@@ -406,17 +473,17 @@ module Daru
|
|
406
473
|
# ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
|
407
474
|
def rolling function, n=10
|
408
475
|
Daru::Vector.new(
|
409
|
-
[nil] * (n - 1) +
|
476
|
+
[nil] * (n - 1) +
|
410
477
|
(0..(size - n)).map do |i|
|
411
478
|
Daru::Vector.new(@data[i...(i + n)]).send(function)
|
412
479
|
end, index: @index
|
413
480
|
)
|
414
481
|
end
|
415
482
|
|
416
|
-
# @!method rolling_mean
|
483
|
+
# @!method rolling_mean
|
417
484
|
# Calculate rolling average
|
418
485
|
# @param [Integer] n (10) Loopback length
|
419
|
-
# @!method rolling_median
|
486
|
+
# @!method rolling_median
|
420
487
|
# Calculate rolling median
|
421
488
|
# @param [Integer] n (10) Loopback length
|
422
489
|
# @!method rolling_count
|
@@ -425,7 +492,7 @@ module Daru
|
|
425
492
|
# @!method rolling_max
|
426
493
|
# Calculate rolling max value
|
427
494
|
# @param [Integer] n (10) Loopback length
|
428
|
-
# @!method rolling_min
|
495
|
+
# @!method rolling_min
|
429
496
|
# Calculate rolling min value
|
430
497
|
# @param [Integer] n (10) Loopback length
|
431
498
|
# @!method rolling_sum
|
@@ -454,22 +521,22 @@ module Daru
|
|
454
521
|
# if the size of the series is >= 3.45 * (n + 1)
|
455
522
|
#
|
456
523
|
# @param [Integer] n (10) Loopback length.
|
457
|
-
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
524
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
458
525
|
# used for smoothing; if false, uses 2/(n+1) value
|
459
526
|
#
|
460
527
|
# @example Using ema
|
461
528
|
#
|
462
|
-
# ts = (1..100).map { rand }
|
463
|
-
# # => [0.
|
529
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
530
|
+
# # => [0.577..., 0.123..., 0.173..., 0.233..., ...]
|
464
531
|
#
|
465
532
|
# # first 9 observations are nil
|
466
|
-
# ts.ema # => [ ... nil, 0.
|
533
|
+
# ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
|
467
534
|
#
|
468
535
|
# @return [Daru::Vector] Contains EMA
|
469
|
-
def ema(n
|
536
|
+
def ema(n=10, wilder=false)
|
470
537
|
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
471
538
|
# need to start everything from the first non-nil observation
|
472
|
-
start = @data.index { |i| i
|
539
|
+
start = @data.index { |i| !i.nil? }
|
473
540
|
# first n - 1 observations are nil
|
474
541
|
base = [nil] * (start + n - 1)
|
475
542
|
# nth observation is just a moving average
|
@@ -478,7 +545,72 @@ module Daru
|
|
478
545
|
base << self[i] * smoother + (1 - smoother) * base.last
|
479
546
|
end
|
480
547
|
|
481
|
-
Daru::Vector.new(base, index: @index)
|
548
|
+
Daru::Vector.new(base, index: @index, name: @name)
|
549
|
+
end
|
550
|
+
|
551
|
+
# Exponential Moving Variance.
|
552
|
+
# Calculates an exponential moving variance of the series using a
|
553
|
+
# specified parameter. If wilder is false (the default) then the EMV
|
554
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
555
|
+
# Welles Wilder smoother of 1 / n.
|
556
|
+
#
|
557
|
+
# @param [Integer] n (10) Loopback length.
|
558
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
559
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
560
|
+
#
|
561
|
+
# @example Using emv
|
562
|
+
#
|
563
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
564
|
+
# # => [0.047..., 0.23..., 0.836..., 0.845..., ...]
|
565
|
+
#
|
566
|
+
# # first 9 observations are nil
|
567
|
+
# ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
|
568
|
+
#
|
569
|
+
# @return [Daru::Vector] contains EMV
|
570
|
+
def emv(n=10, wilder=false)
|
571
|
+
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
572
|
+
# need to start everything from the first non-nil observation
|
573
|
+
start = @data.index { |i| !i.nil? }
|
574
|
+
# first n - 1 observations are nil
|
575
|
+
var_base = [nil] * (start + n - 1)
|
576
|
+
mean_base = [nil] * (start + n - 1)
|
577
|
+
mean_base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
|
578
|
+
# nth observation is just a moving variance_population
|
579
|
+
var_base << @data[start...(start + n)].inject(0.0) { |s,x| x.nil? ? s : s + (x - mean_base.last)**2 } / n
|
580
|
+
(start + n).upto size - 1 do |i|
|
581
|
+
last = mean_base.last
|
582
|
+
mean_base << self[i] * smoother + (1 - smoother) * last
|
583
|
+
var_base << (1 - smoother) * var_base.last + smoother * (self[i] - last) * (self[i] - mean_base.last)
|
584
|
+
end
|
585
|
+
Daru::Vector.new(var_base, index: @index, name: @name)
|
586
|
+
end
|
587
|
+
|
588
|
+
# Exponential Moving Standard Deviation.
|
589
|
+
# Calculates an exponential moving standard deviation of the series using a
|
590
|
+
# specified parameter. If wilder is false (the default) then the EMSD
|
591
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
592
|
+
# Welles Wilder smoother of 1 / n.
|
593
|
+
#
|
594
|
+
# @param [Integer] n (10) Loopback length.
|
595
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
596
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
597
|
+
#
|
598
|
+
# @example Using emsd
|
599
|
+
#
|
600
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
601
|
+
# # => [0.400..., 0.727..., 0.862..., 0.013..., ...]
|
602
|
+
#
|
603
|
+
# # first 9 observations are nil
|
604
|
+
# ts.emsd # => [ ... nil, 0.285... , 0.258..., 0.243..., ...]
|
605
|
+
#
|
606
|
+
# @return [Daru::Vector] contains EMSD
|
607
|
+
def emsd(n=10, wilder=false)
|
608
|
+
result = []
|
609
|
+
emv_return = emv(n, wilder)
|
610
|
+
emv_return.each do |d|
|
611
|
+
result << (d.nil? ? nil : Math.sqrt(d))
|
612
|
+
end
|
613
|
+
Daru::Vector.new(result, index: @index, name: @name)
|
482
614
|
end
|
483
615
|
|
484
616
|
# Moving Average Convergence-Divergence.
|
@@ -486,9 +618,9 @@ module Daru
|
|
486
618
|
# series - this is a comparison of a fast EMA with a slow EMA.
|
487
619
|
#
|
488
620
|
# == Arguments
|
489
|
-
|
490
|
-
|
491
|
-
|
621
|
+
# * *fast*: integer, (default = 12) - fast component of MACD
|
622
|
+
# * *slow*: integer, (default = 26) - slow component of MACD
|
623
|
+
# * *signal*: integer, (default = 9) - signal component of MACD
|
492
624
|
#
|
493
625
|
# == Usage
|
494
626
|
#
|
@@ -498,9 +630,9 @@ module Daru
|
|
498
630
|
#
|
499
631
|
# == Returns
|
500
632
|
#
|
501
|
-
# Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with
|
633
|
+
# Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with
|
502
634
|
# signal value
|
503
|
-
def macd(fast
|
635
|
+
def macd(fast=12, slow=26, signal=9)
|
504
636
|
series = ema(fast) - ema(slow)
|
505
637
|
[series, series.ema(signal)]
|
506
638
|
end
|
@@ -515,17 +647,17 @@ module Daru
|
|
515
647
|
#
|
516
648
|
# ts.acf # => array with first 21 autocorrelations
|
517
649
|
# ts.acf 3 # => array with first 3 autocorrelations
|
518
|
-
def acf(max_lags
|
650
|
+
def acf(max_lags=nil)
|
519
651
|
max_lags ||= (10 * Math.log10(size)).to_i
|
520
652
|
|
521
653
|
(0..max_lags).map do |i|
|
522
654
|
if i == 0
|
523
655
|
1.0
|
524
656
|
else
|
525
|
-
m =
|
657
|
+
m = mean
|
526
658
|
# can't use Pearson coefficient since the mean for the lagged series should
|
527
659
|
# be the same as the regular series
|
528
|
-
((self - m) * (
|
660
|
+
((self - m) * (lag(i) - m)).sum / variance_sample / (size - 1)
|
529
661
|
end
|
530
662
|
end
|
531
663
|
end
|
@@ -533,14 +665,14 @@ module Daru
|
|
533
665
|
# Provides autocovariance.
|
534
666
|
#
|
535
667
|
# == Options
|
536
|
-
#
|
537
|
-
|
538
|
-
|
668
|
+
#
|
669
|
+
# * *:demean* = true; optional. Supply false if series is not to be demeaned
|
670
|
+
# * *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
|
539
671
|
#
|
540
672
|
# == Returns
|
541
673
|
#
|
542
674
|
# Autocovariance value
|
543
|
-
def acvf(demean
|
675
|
+
def acvf(demean=true, unbiased=true)
|
544
676
|
opts = {
|
545
677
|
demean: true,
|
546
678
|
unbaised: true
|
@@ -548,22 +680,18 @@ module Daru
|
|
548
680
|
|
549
681
|
demean = opts[:demean]
|
550
682
|
unbiased = opts[:unbiased]
|
551
|
-
|
552
|
-
demeaned_series = self - self.mean
|
553
|
-
else
|
554
|
-
demeaned_series = self
|
555
|
-
end
|
683
|
+
demeaned_series = demean ? self - mean : self
|
556
684
|
|
557
685
|
n = (10 * Math.log10(size)).to_i + 1
|
558
|
-
m =
|
559
|
-
if unbiased
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
686
|
+
m = mean
|
687
|
+
d = if unbiased
|
688
|
+
Array.new(size, size)
|
689
|
+
else
|
690
|
+
(1..size).to_a.reverse[0..n]
|
691
|
+
end
|
564
692
|
|
565
693
|
0.upto(n - 1).map do |i|
|
566
|
-
(demeaned_series * (
|
694
|
+
(demeaned_series * (lag(i) - m)).sum / d[i]
|
567
695
|
end
|
568
696
|
end
|
569
697
|
|
@@ -572,7 +700,7 @@ module Daru
|
|
572
700
|
result = []
|
573
701
|
acc = 0
|
574
702
|
@data.each do |d|
|
575
|
-
if @missing_values.
|
703
|
+
if @missing_values.key?(d)
|
576
704
|
result << nil
|
577
705
|
else
|
578
706
|
acc += d
|
@@ -588,7 +716,8 @@ module Daru
|
|
588
716
|
alias :std :sds
|
589
717
|
alias :adp :average_deviation_population
|
590
718
|
alias :cov :coefficient_of_variation
|
591
|
-
alias :variance :variance_sample
|
719
|
+
alias :variance :variance_sample
|
720
|
+
alias :covariance :covariance_sample
|
592
721
|
alias :sd :standard_deviation_sample
|
593
722
|
alias :ss :sum_of_squares
|
594
723
|
alias :percentil :percentile
|
@@ -596,4 +725,4 @@ module Daru
|
|
596
725
|
end
|
597
726
|
end
|
598
727
|
end
|
599
|
-
end
|
728
|
+
end
|