daru 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
@@ -1,7 +1,7 @@
|
|
1
1
|
module Daru
|
2
2
|
module Maths
|
3
3
|
# Encapsulates statistics methods for vectors. Most basic stuff like mean, etc.
|
4
|
-
# is done inside the wrapper, so that native methods can be used for most of
|
4
|
+
# is done inside the wrapper, so that native methods can be used for most of
|
5
5
|
# the computationally intensive tasks.
|
6
6
|
module Statistics
|
7
7
|
module Vector
|
@@ -30,22 +30,36 @@ module Daru
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def mode
|
33
|
-
|
34
|
-
|
33
|
+
frequencies.max { |a,b| a[1]<=>b[1] }.first
|
34
|
+
end
|
35
|
+
|
36
|
+
# Create a summary of count, mean, standard deviation, min and max of
|
37
|
+
# the vector in one shot.
|
38
|
+
#
|
39
|
+
# == Arguments
|
40
|
+
#
|
41
|
+
# +methods+ - An array with aggregation methods specified as symbols to
|
42
|
+
# be applied to vectors. Default is [:count, :mean, :std, :max,
|
43
|
+
# :min]. Methods will be applied in the specified order.
|
44
|
+
def describe methods=nil
|
45
|
+
methods ||= [:count, :mean, :std, :min, :max]
|
46
|
+
description = methods.map { |m| send(m) }
|
47
|
+
Daru::Vector.new(description, index: methods, name: :statistics)
|
35
48
|
end
|
36
49
|
|
37
50
|
def median_absolute_deviation
|
38
51
|
m = median
|
39
|
-
recode {|val| (val - m).abs }.median
|
52
|
+
recode { |val| (val - m).abs }.median
|
40
53
|
end
|
54
|
+
|
41
55
|
alias :mad :median_absolute_deviation
|
42
56
|
|
43
57
|
def standard_error
|
44
|
-
standard_deviation_sample/
|
58
|
+
standard_deviation_sample/Math.sqrt(n_valid)
|
45
59
|
end
|
46
60
|
|
47
61
|
def sum_of_squared_deviation
|
48
|
-
(@data.inject(0) { |a,x| x.square + a } -
|
62
|
+
(@data.inject(0) { |a,x| x.square + a } - sum.square.quo(n_valid).to_f).to_f
|
49
63
|
end
|
50
64
|
|
51
65
|
# Retrieve unique values of non-nil data
|
@@ -54,7 +68,7 @@ module Daru
|
|
54
68
|
end
|
55
69
|
|
56
70
|
# Maximum element of the vector.
|
57
|
-
#
|
71
|
+
#
|
58
72
|
# @param return_type [Symbol] Data type of the returned value. Defaults
|
59
73
|
# to returning only the maximum number but passing *:vector* will return
|
60
74
|
# a Daru::Vector with the index of the corresponding maximum value.
|
@@ -74,12 +88,8 @@ module Daru
|
|
74
88
|
end
|
75
89
|
|
76
90
|
def frequencies
|
77
|
-
@data.
|
78
|
-
unless element.nil?
|
79
|
-
hash[element] ||= 0
|
80
|
-
hash[element] += 1
|
81
|
-
end
|
82
|
-
hash
|
91
|
+
@data.each_with_object(Hash.new(0)) do |element, hash|
|
92
|
+
hash[element] += 1 unless element.nil?
|
83
93
|
end
|
84
94
|
end
|
85
95
|
|
@@ -89,15 +99,14 @@ module Daru
|
|
89
99
|
|
90
100
|
def proportions
|
91
101
|
len = n_valid
|
92
|
-
frequencies.
|
102
|
+
frequencies.each_with_object({}) { |arr, hash| hash[arr[0]] = arr[1] / len }
|
93
103
|
end
|
94
104
|
|
95
105
|
def ranked
|
96
106
|
sum = 0
|
97
|
-
r = frequencies.sort.
|
107
|
+
r = frequencies.sort.each_with_object({}) do |val, memo|
|
98
108
|
memo[val[0]] = ((sum + 1) + (sum + val[1])).quo(2)
|
99
109
|
sum += val[1]
|
100
|
-
memo
|
101
110
|
end
|
102
111
|
|
103
112
|
recode { |e| r[e] }
|
@@ -107,13 +116,13 @@ module Daru
|
|
107
116
|
standard_deviation_sample / mean
|
108
117
|
end
|
109
118
|
|
110
|
-
# Retrieves number of cases which comply condition. If block given,
|
111
|
-
# retrieves number of instances where block returns true. If other
|
119
|
+
# Retrieves number of cases which comply condition. If block given,
|
120
|
+
# retrieves number of instances where block returns true. If other
|
112
121
|
# values given, retrieves the frequency for this value. If no value
|
113
122
|
# given, counts the number of non-nil elements in the Vector.
|
114
123
|
def count value=false
|
115
124
|
if block_given?
|
116
|
-
@data.
|
125
|
+
@data.select { |val| yield(val) }.count
|
117
126
|
elsif value
|
118
127
|
val = frequencies[value]
|
119
128
|
val.nil? ? 0 : val
|
@@ -122,7 +131,7 @@ module Daru
|
|
122
131
|
end
|
123
132
|
end
|
124
133
|
|
125
|
-
# Count number of
|
134
|
+
# Count number of occurrences of each value in the Vector
|
126
135
|
def value_counts
|
127
136
|
values = {}
|
128
137
|
@data.each do |d|
|
@@ -138,11 +147,11 @@ module Daru
|
|
138
147
|
|
139
148
|
# Sample variance with denominator (N-1)
|
140
149
|
def variance_sample m=nil
|
141
|
-
m ||=
|
150
|
+
m ||= mean
|
142
151
|
if @data.respond_to? :variance_sample
|
143
152
|
@data.variance_sample m
|
144
153
|
else
|
145
|
-
sum_of_squares(m).quo(
|
154
|
+
sum_of_squares(m).quo(n_valid - 1)
|
146
155
|
end
|
147
156
|
end
|
148
157
|
|
@@ -152,14 +161,38 @@ module Daru
|
|
152
161
|
if @data.respond_to? :variance_population
|
153
162
|
@data.variance_population m
|
154
163
|
else
|
155
|
-
sum_of_squares(m).quo(
|
164
|
+
sum_of_squares(m).quo(n_valid).to_f
|
156
165
|
end
|
157
166
|
end
|
158
167
|
|
168
|
+
# Sample covariance with denominator (N-1)
|
169
|
+
def covariance_sample other
|
170
|
+
@size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
171
|
+
mean_x = mean
|
172
|
+
mean_y = other.mean
|
173
|
+
sum = 0
|
174
|
+
(0...size).each do |i|
|
175
|
+
sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
|
176
|
+
end
|
177
|
+
sum / (n_valid - 1)
|
178
|
+
end
|
179
|
+
|
180
|
+
# Population covariance with denominator (N)
|
181
|
+
def covariance_population other
|
182
|
+
@size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
183
|
+
mean_x = mean
|
184
|
+
mean_y = other.mean
|
185
|
+
sum = 0
|
186
|
+
(0...size).each do |i|
|
187
|
+
sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
|
188
|
+
end
|
189
|
+
sum / n_valid
|
190
|
+
end
|
191
|
+
|
159
192
|
def sum_of_squares(m=nil)
|
160
193
|
m ||= mean
|
161
|
-
@data.inject(0) { |memo, val|
|
162
|
-
@missing_values.
|
194
|
+
@data.inject(0) { |memo, val|
|
195
|
+
@missing_values.key?(val) ? memo : (memo + (val - m)**2)
|
163
196
|
}
|
164
197
|
end
|
165
198
|
|
@@ -168,7 +201,7 @@ module Daru
|
|
168
201
|
if @data.respond_to? :standard_deviation_population
|
169
202
|
@data.standard_deviation_population(m)
|
170
203
|
else
|
171
|
-
Math
|
204
|
+
Math.sqrt(variance_population(m))
|
172
205
|
end
|
173
206
|
end
|
174
207
|
|
@@ -177,7 +210,7 @@ module Daru
|
|
177
210
|
if @data.respond_to? :standard_deviation_sample
|
178
211
|
@data.standard_deviation_sample m
|
179
212
|
else
|
180
|
-
Math
|
213
|
+
Math.sqrt(variance_sample(m))
|
181
214
|
end
|
182
215
|
end
|
183
216
|
|
@@ -188,7 +221,7 @@ module Daru
|
|
188
221
|
else
|
189
222
|
m ||= mean
|
190
223
|
th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
|
191
|
-
th.quo
|
224
|
+
th.quo((@size - @missing_positions.size) * (standard_deviation_sample(m)**3))
|
192
225
|
end
|
193
226
|
end
|
194
227
|
|
@@ -197,17 +230,17 @@ module Daru
|
|
197
230
|
@data.kurtosis
|
198
231
|
else
|
199
232
|
m ||= mean
|
200
|
-
fo = @data.inject(0){ |a, x| a + ((x - m) ** 4) }
|
233
|
+
fo = @data.inject(0) { |a, x| a + ((x - m) ** 4) }
|
201
234
|
fo.quo((@size - @missing_positions.size) * standard_deviation_sample(m) ** 4) - 3
|
202
235
|
end
|
203
236
|
end
|
204
237
|
|
205
238
|
def average_deviation_population m=nil
|
206
|
-
type == :numeric or raise TypeError,
|
239
|
+
type == :numeric or raise TypeError, 'Vector must be numeric'
|
207
240
|
m ||= mean
|
208
|
-
(@data.inject(
|
209
|
-
@missing_values.
|
210
|
-
}).quo(
|
241
|
+
(@data.inject(0) { |memo, val|
|
242
|
+
@missing_values.key?(val) ? memo : (val - m).abs + memo
|
243
|
+
}).quo(n_valid)
|
211
244
|
end
|
212
245
|
|
213
246
|
# Returns the value of the percentile q
|
@@ -218,15 +251,15 @@ module Daru
|
|
218
251
|
# * :midpoint (Default): (a + b) / 2
|
219
252
|
# * :linear : a + (b - a) * d where d is the decimal part of the index between a and b.
|
220
253
|
# == References
|
221
|
-
#
|
254
|
+
#
|
222
255
|
# This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
|
223
|
-
def percentile(q, strategy
|
256
|
+
def percentile(q, strategy=:midpoint)
|
224
257
|
sorted = only_valid(:array).sort
|
225
258
|
|
226
259
|
case strategy
|
227
260
|
when :midpoint
|
228
261
|
v = (n_valid * q).quo(100)
|
229
|
-
if
|
262
|
+
if v.to_i!=v
|
230
263
|
sorted[v.to_i]
|
231
264
|
else
|
232
265
|
(sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
|
@@ -245,18 +278,18 @@ module Daru
|
|
245
278
|
sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
|
246
279
|
end
|
247
280
|
else
|
248
|
-
raise NotImplementedError
|
281
|
+
raise NotImplementedError, "Unknown strategy #{strategy}"
|
249
282
|
end
|
250
283
|
end
|
251
284
|
|
252
285
|
# Dichotomize the vector with 0 and 1, based on lowest value.
|
253
|
-
# If parameter is defined, this value and lower will be 0
|
286
|
+
# If parameter is defined, this value and lower will be 0
|
254
287
|
# and higher, 1.
|
255
|
-
def dichotomize(low
|
288
|
+
def dichotomize(low=nil)
|
256
289
|
low ||= factors.min
|
257
290
|
|
258
|
-
|
259
|
-
if x.nil?
|
291
|
+
recode do |x|
|
292
|
+
if x.nil?
|
260
293
|
nil
|
261
294
|
elsif x > low
|
262
295
|
1
|
@@ -272,25 +305,25 @@ module Daru
|
|
272
305
|
end
|
273
306
|
|
274
307
|
# Standardize data.
|
275
|
-
#
|
308
|
+
#
|
276
309
|
# == Arguments
|
277
|
-
#
|
310
|
+
#
|
278
311
|
# * use_population - Pass as *true* if you want to use population
|
279
312
|
# standard deviation instead of sample standard deviation.
|
280
313
|
def standardize use_population=false
|
281
314
|
m ||= mean
|
282
315
|
sd = use_population ? sdp : sds
|
283
|
-
return Daru::Vector.new([nil]*@size) if m.nil?
|
316
|
+
return Daru::Vector.new([nil]*@size) if m.nil? || sd == 0.0
|
284
317
|
|
285
318
|
vector_standardized_compute m, sd
|
286
319
|
end
|
287
320
|
|
288
321
|
def box_cox_transformation lambda # :nodoc:
|
289
|
-
raise
|
322
|
+
raise 'Should be a numeric' unless @type == :numeric
|
290
323
|
|
291
|
-
|
324
|
+
recode do |x|
|
292
325
|
if !x.nil?
|
293
|
-
if
|
326
|
+
if lambda == 0
|
294
327
|
Math.log(x)
|
295
328
|
else
|
296
329
|
(x ** lambda - 1).quo(lambda)
|
@@ -315,7 +348,7 @@ module Daru
|
|
315
348
|
index: index, name: name, dtype: dtype
|
316
349
|
end
|
317
350
|
end
|
318
|
-
|
351
|
+
|
319
352
|
def vector_centered_compute(m)
|
320
353
|
if @data.respond_to? :vector_centered_compute
|
321
354
|
@data.vector_centered_compute(m)
|
@@ -334,12 +367,12 @@ module Daru
|
|
334
367
|
if @data.respond_to? :sample_with_replacement
|
335
368
|
@data.sample_with_replacement sample
|
336
369
|
else
|
337
|
-
valid = missing_positions.empty? ? self :
|
370
|
+
valid = missing_positions.empty? ? self : only_valid
|
338
371
|
vds = valid.size
|
339
|
-
(0...sample).collect{ valid[rand(vds)] }
|
372
|
+
(0...sample).collect { valid[rand(vds)] }
|
340
373
|
end
|
341
374
|
end
|
342
|
-
|
375
|
+
|
343
376
|
# Returns an random sample of size n, without replacement,
|
344
377
|
# only with valid data.
|
345
378
|
#
|
@@ -350,18 +383,52 @@ module Daru
|
|
350
383
|
if @data.respond_to? :sample_without_replacement
|
351
384
|
@data.sample_without_replacement sample
|
352
385
|
else
|
353
|
-
valid = missing_positions.empty? ? self :
|
354
|
-
raise ArgumentError, "Sample size couldn't be greater than n" if
|
386
|
+
valid = missing_positions.empty? ? self : only_valid
|
387
|
+
raise ArgumentError, "Sample size couldn't be greater than n" if
|
355
388
|
sample > valid.size
|
356
389
|
out = []
|
357
390
|
size = valid.size
|
358
391
|
while out.size < sample
|
359
392
|
value = rand(size)
|
360
|
-
out.push(value)
|
393
|
+
out.push(value) unless out.include?(value)
|
361
394
|
end
|
362
395
|
|
363
|
-
out.collect{|i| valid[i]}
|
396
|
+
out.collect { |i| valid[i] }
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
# The percent_change method computes the percent change over
|
401
|
+
# the given number of periods.
|
402
|
+
#
|
403
|
+
# @param [Integer] periods (1) number of nils to insert at the beginning.
|
404
|
+
#
|
405
|
+
# @example
|
406
|
+
#
|
407
|
+
# vector = Daru::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
|
408
|
+
# vector.percent_change
|
409
|
+
# #=>
|
410
|
+
# # <Daru::Vector:28713060 @name = nil @size: 5 >
|
411
|
+
# # nil
|
412
|
+
# # a
|
413
|
+
# # f 0.5
|
414
|
+
# # t 0.0
|
415
|
+
# # i 0.3333333333333333
|
416
|
+
# # k 0.25
|
417
|
+
def percent_change periods=1
|
418
|
+
type == :numeric or raise TypeError, 'Vector must be numeric'
|
419
|
+
value = only_valid
|
420
|
+
arr = []
|
421
|
+
i = 1
|
422
|
+
ind = @data.find_index { |x| !x.nil? }
|
423
|
+
(periods...size).each do |j|
|
424
|
+
if j==ind || @missing_values.key?(@data[j])
|
425
|
+
arr[j] = nil
|
426
|
+
else
|
427
|
+
arr[j] = (value.data[i] - value.data[i - 1]) / value.data[i - 1].to_f
|
428
|
+
i+=1
|
429
|
+
end
|
364
430
|
end
|
431
|
+
Daru::Vector.new(arr, index: @index, name: @name)
|
365
432
|
end
|
366
433
|
|
367
434
|
# Performs the difference of the series.
|
@@ -372,7 +439,7 @@ module Daru
|
|
372
439
|
#
|
373
440
|
# == Arguments
|
374
441
|
#
|
375
|
-
|
442
|
+
# * *max_lags*: integer, (default: 1), number of differences reqd.
|
376
443
|
#
|
377
444
|
# @example Using #diff
|
378
445
|
#
|
@@ -382,7 +449,7 @@ module Daru
|
|
382
449
|
# ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
|
383
450
|
#
|
384
451
|
# @return [Daru::Vector]
|
385
|
-
def diff(max_lags
|
452
|
+
def diff(max_lags=1)
|
386
453
|
ts = self
|
387
454
|
difference = []
|
388
455
|
max_lags.times do
|
@@ -394,8 +461,8 @@ module Daru
|
|
394
461
|
|
395
462
|
# Calculate the rolling function for a loopback value.
|
396
463
|
#
|
397
|
-
# @param [Symbol] function The rolling function to be applied. Can be
|
398
|
-
# any function applicatble to Daru::Vector (:mean, :median, :count,
|
464
|
+
# @param [Symbol] function The rolling function to be applied. Can be
|
465
|
+
# any function applicatble to Daru::Vector (:mean, :median, :count,
|
399
466
|
# :min, :max, etc.)
|
400
467
|
# @param [Integer] n (10) A non-negative value which serves as the loopback length.
|
401
468
|
# @return [Daru::Vector] Vector containin rolling calculations.
|
@@ -406,17 +473,17 @@ module Daru
|
|
406
473
|
# ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
|
407
474
|
def rolling function, n=10
|
408
475
|
Daru::Vector.new(
|
409
|
-
[nil] * (n - 1) +
|
476
|
+
[nil] * (n - 1) +
|
410
477
|
(0..(size - n)).map do |i|
|
411
478
|
Daru::Vector.new(@data[i...(i + n)]).send(function)
|
412
479
|
end, index: @index
|
413
480
|
)
|
414
481
|
end
|
415
482
|
|
416
|
-
# @!method rolling_mean
|
483
|
+
# @!method rolling_mean
|
417
484
|
# Calculate rolling average
|
418
485
|
# @param [Integer] n (10) Loopback length
|
419
|
-
# @!method rolling_median
|
486
|
+
# @!method rolling_median
|
420
487
|
# Calculate rolling median
|
421
488
|
# @param [Integer] n (10) Loopback length
|
422
489
|
# @!method rolling_count
|
@@ -425,7 +492,7 @@ module Daru
|
|
425
492
|
# @!method rolling_max
|
426
493
|
# Calculate rolling max value
|
427
494
|
# @param [Integer] n (10) Loopback length
|
428
|
-
# @!method rolling_min
|
495
|
+
# @!method rolling_min
|
429
496
|
# Calculate rolling min value
|
430
497
|
# @param [Integer] n (10) Loopback length
|
431
498
|
# @!method rolling_sum
|
@@ -454,22 +521,22 @@ module Daru
|
|
454
521
|
# if the size of the series is >= 3.45 * (n + 1)
|
455
522
|
#
|
456
523
|
# @param [Integer] n (10) Loopback length.
|
457
|
-
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
524
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
458
525
|
# used for smoothing; if false, uses 2/(n+1) value
|
459
526
|
#
|
460
527
|
# @example Using ema
|
461
528
|
#
|
462
|
-
# ts = (1..100).map { rand }
|
463
|
-
# # => [0.
|
529
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
530
|
+
# # => [0.577..., 0.123..., 0.173..., 0.233..., ...]
|
464
531
|
#
|
465
532
|
# # first 9 observations are nil
|
466
|
-
# ts.ema # => [ ... nil, 0.
|
533
|
+
# ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
|
467
534
|
#
|
468
535
|
# @return [Daru::Vector] Contains EMA
|
469
|
-
def ema(n
|
536
|
+
def ema(n=10, wilder=false)
|
470
537
|
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
471
538
|
# need to start everything from the first non-nil observation
|
472
|
-
start = @data.index { |i| i
|
539
|
+
start = @data.index { |i| !i.nil? }
|
473
540
|
# first n - 1 observations are nil
|
474
541
|
base = [nil] * (start + n - 1)
|
475
542
|
# nth observation is just a moving average
|
@@ -478,7 +545,72 @@ module Daru
|
|
478
545
|
base << self[i] * smoother + (1 - smoother) * base.last
|
479
546
|
end
|
480
547
|
|
481
|
-
Daru::Vector.new(base, index: @index)
|
548
|
+
Daru::Vector.new(base, index: @index, name: @name)
|
549
|
+
end
|
550
|
+
|
551
|
+
# Exponential Moving Variance.
|
552
|
+
# Calculates an exponential moving variance of the series using a
|
553
|
+
# specified parameter. If wilder is false (the default) then the EMV
|
554
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
555
|
+
# Welles Wilder smoother of 1 / n.
|
556
|
+
#
|
557
|
+
# @param [Integer] n (10) Loopback length.
|
558
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
559
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
560
|
+
#
|
561
|
+
# @example Using emv
|
562
|
+
#
|
563
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
564
|
+
# # => [0.047..., 0.23..., 0.836..., 0.845..., ...]
|
565
|
+
#
|
566
|
+
# # first 9 observations are nil
|
567
|
+
# ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
|
568
|
+
#
|
569
|
+
# @return [Daru::Vector] contains EMV
|
570
|
+
def emv(n=10, wilder=false)
|
571
|
+
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
572
|
+
# need to start everything from the first non-nil observation
|
573
|
+
start = @data.index { |i| !i.nil? }
|
574
|
+
# first n - 1 observations are nil
|
575
|
+
var_base = [nil] * (start + n - 1)
|
576
|
+
mean_base = [nil] * (start + n - 1)
|
577
|
+
mean_base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
|
578
|
+
# nth observation is just a moving variance_population
|
579
|
+
var_base << @data[start...(start + n)].inject(0.0) { |s,x| x.nil? ? s : s + (x - mean_base.last)**2 } / n
|
580
|
+
(start + n).upto size - 1 do |i|
|
581
|
+
last = mean_base.last
|
582
|
+
mean_base << self[i] * smoother + (1 - smoother) * last
|
583
|
+
var_base << (1 - smoother) * var_base.last + smoother * (self[i] - last) * (self[i] - mean_base.last)
|
584
|
+
end
|
585
|
+
Daru::Vector.new(var_base, index: @index, name: @name)
|
586
|
+
end
|
587
|
+
|
588
|
+
# Exponential Moving Standard Deviation.
|
589
|
+
# Calculates an exponential moving standard deviation of the series using a
|
590
|
+
# specified parameter. If wilder is false (the default) then the EMSD
|
591
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
592
|
+
# Welles Wilder smoother of 1 / n.
|
593
|
+
#
|
594
|
+
# @param [Integer] n (10) Loopback length.
|
595
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
596
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
597
|
+
#
|
598
|
+
# @example Using emsd
|
599
|
+
#
|
600
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
601
|
+
# # => [0.400..., 0.727..., 0.862..., 0.013..., ...]
|
602
|
+
#
|
603
|
+
# # first 9 observations are nil
|
604
|
+
# ts.emsd # => [ ... nil, 0.285... , 0.258..., 0.243..., ...]
|
605
|
+
#
|
606
|
+
# @return [Daru::Vector] contains EMSD
|
607
|
+
def emsd(n=10, wilder=false)
|
608
|
+
result = []
|
609
|
+
emv_return = emv(n, wilder)
|
610
|
+
emv_return.each do |d|
|
611
|
+
result << (d.nil? ? nil : Math.sqrt(d))
|
612
|
+
end
|
613
|
+
Daru::Vector.new(result, index: @index, name: @name)
|
482
614
|
end
|
483
615
|
|
484
616
|
# Moving Average Convergence-Divergence.
|
@@ -486,9 +618,9 @@ module Daru
|
|
486
618
|
# series - this is a comparison of a fast EMA with a slow EMA.
|
487
619
|
#
|
488
620
|
# == Arguments
|
489
|
-
|
490
|
-
|
491
|
-
|
621
|
+
# * *fast*: integer, (default = 12) - fast component of MACD
|
622
|
+
# * *slow*: integer, (default = 26) - slow component of MACD
|
623
|
+
# * *signal*: integer, (default = 9) - signal component of MACD
|
492
624
|
#
|
493
625
|
# == Usage
|
494
626
|
#
|
@@ -498,9 +630,9 @@ module Daru
|
|
498
630
|
#
|
499
631
|
# == Returns
|
500
632
|
#
|
501
|
-
# Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with
|
633
|
+
# Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with
|
502
634
|
# signal value
|
503
|
-
def macd(fast
|
635
|
+
def macd(fast=12, slow=26, signal=9)
|
504
636
|
series = ema(fast) - ema(slow)
|
505
637
|
[series, series.ema(signal)]
|
506
638
|
end
|
@@ -515,17 +647,17 @@ module Daru
|
|
515
647
|
#
|
516
648
|
# ts.acf # => array with first 21 autocorrelations
|
517
649
|
# ts.acf 3 # => array with first 3 autocorrelations
|
518
|
-
def acf(max_lags
|
650
|
+
def acf(max_lags=nil)
|
519
651
|
max_lags ||= (10 * Math.log10(size)).to_i
|
520
652
|
|
521
653
|
(0..max_lags).map do |i|
|
522
654
|
if i == 0
|
523
655
|
1.0
|
524
656
|
else
|
525
|
-
m =
|
657
|
+
m = mean
|
526
658
|
# can't use Pearson coefficient since the mean for the lagged series should
|
527
659
|
# be the same as the regular series
|
528
|
-
((self - m) * (
|
660
|
+
((self - m) * (lag(i) - m)).sum / variance_sample / (size - 1)
|
529
661
|
end
|
530
662
|
end
|
531
663
|
end
|
@@ -533,14 +665,14 @@ module Daru
|
|
533
665
|
# Provides autocovariance.
|
534
666
|
#
|
535
667
|
# == Options
|
536
|
-
#
|
537
|
-
|
538
|
-
|
668
|
+
#
|
669
|
+
# * *:demean* = true; optional. Supply false if series is not to be demeaned
|
670
|
+
# * *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
|
539
671
|
#
|
540
672
|
# == Returns
|
541
673
|
#
|
542
674
|
# Autocovariance value
|
543
|
-
def acvf(demean
|
675
|
+
def acvf(demean=true, unbiased=true)
|
544
676
|
opts = {
|
545
677
|
demean: true,
|
546
678
|
unbaised: true
|
@@ -548,22 +680,18 @@ module Daru
|
|
548
680
|
|
549
681
|
demean = opts[:demean]
|
550
682
|
unbiased = opts[:unbiased]
|
551
|
-
|
552
|
-
demeaned_series = self - self.mean
|
553
|
-
else
|
554
|
-
demeaned_series = self
|
555
|
-
end
|
683
|
+
demeaned_series = demean ? self - mean : self
|
556
684
|
|
557
685
|
n = (10 * Math.log10(size)).to_i + 1
|
558
|
-
m =
|
559
|
-
if unbiased
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
686
|
+
m = mean
|
687
|
+
d = if unbiased
|
688
|
+
Array.new(size, size)
|
689
|
+
else
|
690
|
+
(1..size).to_a.reverse[0..n]
|
691
|
+
end
|
564
692
|
|
565
693
|
0.upto(n - 1).map do |i|
|
566
|
-
(demeaned_series * (
|
694
|
+
(demeaned_series * (lag(i) - m)).sum / d[i]
|
567
695
|
end
|
568
696
|
end
|
569
697
|
|
@@ -572,7 +700,7 @@ module Daru
|
|
572
700
|
result = []
|
573
701
|
acc = 0
|
574
702
|
@data.each do |d|
|
575
|
-
if @missing_values.
|
703
|
+
if @missing_values.key?(d)
|
576
704
|
result << nil
|
577
705
|
else
|
578
706
|
acc += d
|
@@ -588,7 +716,8 @@ module Daru
|
|
588
716
|
alias :std :sds
|
589
717
|
alias :adp :average_deviation_population
|
590
718
|
alias :cov :coefficient_of_variation
|
591
|
-
alias :variance :variance_sample
|
719
|
+
alias :variance :variance_sample
|
720
|
+
alias :covariance :covariance_sample
|
592
721
|
alias :sd :standard_deviation_sample
|
593
722
|
alias :ss :sum_of_squares
|
594
723
|
alias :percentil :percentile
|
@@ -596,4 +725,4 @@ module Daru
|
|
596
725
|
end
|
597
726
|
end
|
598
727
|
end
|
599
|
-
end
|
728
|
+
end
|