statsample-timeseries 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.travis.yml +13 -0
- data/Gemfile +22 -0
- data/LICENSE.txt +22 -0
- data/README.rdoc +72 -0
- data/Rakefile +47 -0
- data/VERSION +1 -0
- data/bin/bio-statsample-timeseries +74 -0
- data/features/acf.feature +31 -0
- data/features/pacf.feature +42 -0
- data/features/step_definitions/bio-statsample-timeseries_steps.rb +0 -0
- data/features/step_definitions/step_definitions.rb +37 -0
- data/features/step_definitions/step_definitions_acf.rb +8 -0
- data/features/support/env.rb +15 -0
- data/lib/statsample-timeseries.rb +18 -0
- data/lib/statsample-timeseries/arima.rb +246 -0
- data/lib/statsample-timeseries/arima/kalman.rb +148 -0
- data/lib/statsample-timeseries/arima/likelihood.rb +101 -0
- data/lib/statsample-timeseries/timeseries.rb +291 -0
- data/lib/statsample-timeseries/timeseries/pacf.rb +164 -0
- data/lib/statsample-timeseries/utility.rb +154 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/helper.rb +81 -0
- data/test/test_arima_ks.rb +106 -0
- data/test/test_arima_simulators.rb +186 -0
- data/test/test_matrix.rb +92 -0
- data/test/test_pacf.rb +52 -0
- data/test/test_tseries.rb +103 -0
- data/test/test_wald.rb +71 -0
- metadata +273 -0
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'statsample-timeseries/arima/likelihood'
|
2
|
+
module Statsample
|
3
|
+
module TimeSeries
|
4
|
+
module Arima
|
5
|
+
|
6
|
+
class KalmanFilter
|
7
|
+
include Statsample::TimeSeries
|
8
|
+
include GSL::MultiMin
|
9
|
+
|
10
|
+
#timeseries object
|
11
|
+
attr_accessor :ts
|
12
|
+
#Autoregressive order
|
13
|
+
attr_accessor :p
|
14
|
+
#Integerated part order
|
15
|
+
attr_accessor :i
|
16
|
+
#Moving average order
|
17
|
+
attr_accessor :q
|
18
|
+
|
19
|
+
# Autoregressive coefficients
|
20
|
+
attr_reader :ar
|
21
|
+
# Moving average coefficients
|
22
|
+
attr_reader :ma
|
23
|
+
|
24
|
+
#Creates a new KalmanFilter object and computes the likelihood
|
25
|
+
def initialize(ts=[].to_ts, p=0, i=0, q=0)
|
26
|
+
@ts = ts
|
27
|
+
@p = p
|
28
|
+
@i = i
|
29
|
+
@q = q
|
30
|
+
ks #call the filter
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
sprintf("ARIMA model(p = %d, i = %d, q = %d) on series(%d elements) - [%s]",
|
35
|
+
@p, @i, @q, @ts.size, @ts.to_a.join(','))
|
36
|
+
end
|
37
|
+
|
38
|
+
# = Kalman Filter
|
39
|
+
# Function which minimizes KalmanFilter.ll iteratively for initial parameters
|
40
|
+
# == Usage
|
41
|
+
# @s = [-1.16025577,0.64758021,0.77158601,0.14989543,2.31358162,3.49213868,1.14826956,0.58169457,-0.30813868,-0.34741084,-1.41175595,0.06040081, -0.78230232,0.86734837,0.95015787,-0.49781397,0.53247330,1.56495187,0.30936619,0.09750217,1.09698829,-0.81315490,-0.79425607,-0.64568547,-1.06460320,1.24647894,0.66695937,1.50284551,1.17631218,1.64082872,1.61462736,0.06443761,-0.17583741,0.83918339,0.46610988,-0.54915270,-0.56417108,-1.27696654,0.89460084,1.49970338,0.24520493,0.26249138,-1.33744834,-0.57725961,1.55819543,1.62143157,0.44421891,-0.74000084 ,0.57866347,3.51189333,2.39135077,1.73046244,1.81783890,0.21454040,0.43520890,-1.42443856,-2.72124685,-2.51313877,-1.20243091,-1.44268002 ,-0.16777305,0.05780661,2.03533992,0.39187242,0.54987983,0.57865693,-0.96592469,-0.93278473,-0.75962671,-0.63216906,1.06776183, 0.17476059 ,0.06635860,0.94906227,2.44498583,-1.04990407,-0.88440073,-1.99838258,-1.12955558,-0.62654882,-1.36589161,-2.67456821,-0.97187696, -0.84431782 ,-0.10051809,0.54239549,1.34622861,1.25598105,0.19707759,3.29286114,3.52423499,1.69146333,-0.10150024,0.45222903,-0.01730516, -0.49828727, -1.18484684,-1.09531773,-1.17190808,0.30207662].to_ts
|
42
|
+
# @kf=Statsample::TimeSeries::ARIMA.ks(@s,1,0,0)
|
43
|
+
# #=> ks is implictly called in above operation
|
44
|
+
# @kf.ar
|
45
|
+
# #=> AR coefficients
|
46
|
+
def ks
|
47
|
+
initial = Array.new((@p+@q), 0.0)
|
48
|
+
|
49
|
+
my_f = Proc.new{ |x, params|
|
50
|
+
#In rb-gsl, params remain idle, x is varied upon
|
51
|
+
#In R code, initial parameters varied in each iteration
|
52
|
+
#my_func.set_params([(1..100).to_a.to_ts, p_value, q_value])
|
53
|
+
timeseries = params[0]
|
54
|
+
p,q = params[1], params[2]
|
55
|
+
params = x
|
56
|
+
#puts x
|
57
|
+
-Arima::KF::LogLikelihood.new(x.to_a, timeseries, p, q).ll
|
58
|
+
#KalmanFilter.ll(x.to_a, timeseries, p, q)
|
59
|
+
}
|
60
|
+
np = @p + @q
|
61
|
+
my_func = Function.alloc(my_f, np)
|
62
|
+
my_func.set_params([@ts, @p, @q])
|
63
|
+
x = GSL::Vector.alloc(initial)
|
64
|
+
ss = GSL::Vector.alloc(np)
|
65
|
+
ss.set_all(0.1)
|
66
|
+
|
67
|
+
minimizer = FMinimizer.alloc("nmsimplex", np)
|
68
|
+
minimizer.set(my_func, x, ss)
|
69
|
+
status = GSL::CONTINUE
|
70
|
+
iter = 0
|
71
|
+
while status == GSL::CONTINUE && iter < 100
|
72
|
+
iter += 1
|
73
|
+
begin
|
74
|
+
status = minimizer.iterate()
|
75
|
+
status = minimizer.test_size(1e-2)
|
76
|
+
x = minimizer.x
|
77
|
+
rescue
|
78
|
+
break
|
79
|
+
end
|
80
|
+
# printf("%5d ", iter)
|
81
|
+
# for i in 0...np do
|
82
|
+
# puts "#{x[i]}.to_f"
|
83
|
+
# #printf("%10.3e ", x[i].to_f)
|
84
|
+
# end
|
85
|
+
# printf("f() = %7.3f size = %.3f\n", minimizer.fval, minimizer.size)
|
86
|
+
end
|
87
|
+
#
|
88
|
+
@ar = (p > 0) ? x.to_a[0...p] : []
|
89
|
+
@ma = (q > 0) ? x.to_a[p...(p+q)] : []
|
90
|
+
x.to_a
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
#=Log Likelihood
|
95
|
+
#Computes Log likelihood on given parameters, ARMA order and timeseries
|
96
|
+
#==params
|
97
|
+
#* *params*: array of floats, contains phi/theta parameters
|
98
|
+
#* *timeseries*: timeseries object
|
99
|
+
#* *p*: integer, AR(p) order
|
100
|
+
#* *q*: integer, MA(q) order
|
101
|
+
#==Returns
|
102
|
+
#LogLikelihood object
|
103
|
+
#==Usage
|
104
|
+
# s = (1..100).map { rand }.to_ts
|
105
|
+
# p, q = 1, 0
|
106
|
+
# ll = KalmanFilter.log_likelihood([0.2], s, p, q)
|
107
|
+
# ll.log_likelihood
|
108
|
+
# #=> -22.66
|
109
|
+
# ll.sigma
|
110
|
+
# #=> 0.232
|
111
|
+
def self.log_likelihood(params, timeseries, p, q)
|
112
|
+
Arima::KF::LogLikelihood.new(params, timeseries, p, q)
|
113
|
+
end
|
114
|
+
|
115
|
+
#=T
|
116
|
+
#The coefficient matrix for the state vector in state equation
|
117
|
+
# It's dimensions is r+k x r+k
|
118
|
+
#==Parameters
|
119
|
+
#* *r*: integer, r is max(p, q+1), where p and q are orders of AR and MA respectively
|
120
|
+
#* *k*: integer, number of exogeneous variables in ARMA model
|
121
|
+
#* *q*: integer, The AR coefficient of ARMA model
|
122
|
+
|
123
|
+
#==References Statsmodels tsa, Durbin and Koopman Section 4.7
|
124
|
+
#def self.T(r, k, p)
|
125
|
+
# arr = Matrix.zero(r)
|
126
|
+
# params_padded = Statsample::Vector.new(Array.new(r, 0), :scale)
|
127
|
+
#
|
128
|
+
# params_padded[0...p] = params[k...(p+k)]
|
129
|
+
# intermediate_matrix = (r-1).times.map { Array.new(r, 0) }
|
130
|
+
# #appending an array filled with padded values in beginning
|
131
|
+
# intermediate_matrix[0,0] = [params_padded]
|
132
|
+
#
|
133
|
+
# #now generating column matrix for that:
|
134
|
+
# arr = Matrix.columns(intermediate_matrix)
|
135
|
+
# arr_00 = arr[0,0]
|
136
|
+
#
|
137
|
+
# #identify matrix substituition in matrix except row[0] and column[0]
|
138
|
+
# r.times do |i|
|
139
|
+
# arr[r,r] = 1
|
140
|
+
# end
|
141
|
+
# arr[0,0] = arr_00
|
142
|
+
# arr
|
143
|
+
#end
|
144
|
+
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module Statsample
|
2
|
+
module TimeSeries
|
3
|
+
module Arima
|
4
|
+
module KF
|
5
|
+
class LogLikelihood
|
6
|
+
|
7
|
+
#Gives log likelihood value of an ARMA(p, q) process on given parameters
|
8
|
+
attr_reader :log_likelihood
|
9
|
+
|
10
|
+
#Gives sigma value of an ARMA(p,q) process on given parameters
|
11
|
+
attr_reader :sigma
|
12
|
+
|
13
|
+
#Gives AIC(Akaike Information Criterion)
|
14
|
+
#https://www.scss.tcd.ie/Rozenn.Dahyot/ST7005/13AICBIC.pdf
|
15
|
+
attr_reader :aic
|
16
|
+
|
17
|
+
def initialize(params, timeseries, p, q)
|
18
|
+
@params = params
|
19
|
+
@timeseries = timeseries
|
20
|
+
@p = p
|
21
|
+
@q = q
|
22
|
+
ll
|
23
|
+
end
|
24
|
+
|
25
|
+
#===Log likelihood link function.
|
26
|
+
#iteratively minimized by simplex algorithm via KalmanFilter.ks
|
27
|
+
#Not meant to be used directly. Will make it private later.
|
28
|
+
def ll
|
29
|
+
params, timeseries = @params, @timeseries
|
30
|
+
p, q = @p, @q
|
31
|
+
|
32
|
+
phi = []
|
33
|
+
theta = []
|
34
|
+
phi = params[0...p] if p > 0
|
35
|
+
theta = params[(p)...(p + q)] if q > 0
|
36
|
+
|
37
|
+
[phi, theta].each do |v|
|
38
|
+
if v.size>0 and v.map(&:abs).inject(:+) > 1
|
39
|
+
return
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
m = [p, q].max
|
44
|
+
h = Matrix.column_vector(Array.new(m,0))
|
45
|
+
m.times do |i|
|
46
|
+
h[i,0] = phi[i] if i< p
|
47
|
+
h[i,0] = h[i,0] + theta[i] if i < q
|
48
|
+
end
|
49
|
+
|
50
|
+
t = Matrix.zero(m)
|
51
|
+
#set_column is available in utility.rb
|
52
|
+
t = t.set_column(0, phi)
|
53
|
+
if(m > 1)
|
54
|
+
t[0...(m-1), 1...m] = Matrix.I(m-1)
|
55
|
+
#chances of extra constant 0 values as unbalanced column, so:
|
56
|
+
t = Matrix.columns(t.column_vectors)
|
57
|
+
end
|
58
|
+
|
59
|
+
g = Matrix[[1]]
|
60
|
+
a_t = Matrix.column_vector(Array.new(m,0))
|
61
|
+
n = timeseries.size
|
62
|
+
z = Matrix.row_vector(Array.new(m,0))
|
63
|
+
z[0,0] = 1
|
64
|
+
p_t = Matrix.I(m)
|
65
|
+
v_t, f_t = Array.new(n,0), Array.new(n, 0)
|
66
|
+
|
67
|
+
n.times do |i|
|
68
|
+
v_t[i] = (z * a_t).map { |x| timeseries[i] - x }[0,0]
|
69
|
+
|
70
|
+
f_t[i] = (z * p_t * (z.transpose)).map { |x| x + 1 }[0,0]
|
71
|
+
|
72
|
+
k_t = ((t * p_t * z.transpose) + h).map { |x| x / f_t[i] }
|
73
|
+
|
74
|
+
a_t = (t * a_t) + (k_t * v_t[i])
|
75
|
+
l_t = t - k_t * z
|
76
|
+
j_t = h - k_t
|
77
|
+
|
78
|
+
p_t = (t * p_t * (l_t.transpose)) + (h * (j_t.transpose))
|
79
|
+
end
|
80
|
+
|
81
|
+
pot = v_t.map(&:square).zip(f_t).map { |x,y| x / y}.inject(:+)
|
82
|
+
sigma_2 = pot.to_f / n.to_f
|
83
|
+
|
84
|
+
f_t_log_sum = f_t.map { |x| Math.log(x) }.inject(:+)
|
85
|
+
@log_likelihood = -0.5 * (n*Math.log(2*Math::PI) + n*Math.log(sigma_2) + f_t_log_sum + n)
|
86
|
+
|
87
|
+
@sigma = sigma_2
|
88
|
+
@aic = -(2 * @log_likelihood - 2*(p+q+1))
|
89
|
+
#puts ("ll = #{-ll}")
|
90
|
+
return @log_likelihood
|
91
|
+
end
|
92
|
+
|
93
|
+
def to_s
|
94
|
+
sprintf("LogLikelihood(p = %d, q = %d) on params: [%s]",
|
95
|
+
@p, @q, @params.join(', '))
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,291 @@
|
|
1
|
+
require 'statsample-timeseries/timeseries/pacf'
|
2
|
+
module Statsample::TimeSeriesShorthands
|
3
|
+
# Creates a new Statsample::TimeSeries object
|
4
|
+
# Argument should be equal to TimeSeries.new
|
5
|
+
def to_time_series(*args)
|
6
|
+
Statsample::TimeSeries::Series.new(self, :scale, *args)
|
7
|
+
end
|
8
|
+
|
9
|
+
alias :to_ts :to_time_series
|
10
|
+
end
|
11
|
+
|
12
|
+
class Array
|
13
|
+
include Statsample::TimeSeriesShorthands
|
14
|
+
end
|
15
|
+
|
16
|
+
module Statsample
|
17
|
+
module TimeSeries
|
18
|
+
# Collection of data indexed by time.
|
19
|
+
# The order goes from earliest to latest.
|
20
|
+
class Series < Statsample::Vector
|
21
|
+
include Statsample::TimeSeries::Pacf
|
22
|
+
# Calculates the autocorrelation coefficients of the series.
|
23
|
+
#
|
24
|
+
# The first element is always 1, since that is the correlation
|
25
|
+
# of the series with itself.
|
26
|
+
#
|
27
|
+
# Usage:
|
28
|
+
#
|
29
|
+
# ts = (1..100).map { rand }.to_time_series
|
30
|
+
#
|
31
|
+
# ts.acf # => array with first 21 autocorrelations
|
32
|
+
# ts.acf 3 # => array with first 3 autocorrelations
|
33
|
+
#
|
34
|
+
def acf(max_lags = nil)
|
35
|
+
max_lags ||= (10 * Math.log10(size)).to_i
|
36
|
+
|
37
|
+
(0..max_lags).map do |i|
|
38
|
+
if i == 0
|
39
|
+
1.0
|
40
|
+
else
|
41
|
+
m = self.mean
|
42
|
+
|
43
|
+
# can't use Pearson coefficient since the mean for the lagged series should
|
44
|
+
# be the same as the regular series
|
45
|
+
((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
#=Partial Autocorrelation
|
51
|
+
#Generates partial autocorrelation series for a timeseries
|
52
|
+
#==Parameters
|
53
|
+
#* *max_lags*: integer, optional - provide number of lags
|
54
|
+
#* *method*: string. Default: 'yw'.
|
55
|
+
# * *yw*: For yule-walker algorithm unbiased approach
|
56
|
+
# * *mle*: For Maximum likelihood algorithm approach
|
57
|
+
# * *ld*: Forr Levinson-Durbin recursive approach
|
58
|
+
#==Returns
|
59
|
+
# array of pacf
|
60
|
+
def pacf(max_lags = nil, method = :yw)
|
61
|
+
|
62
|
+
method = method.downcase.to_sym
|
63
|
+
max_lags ||= (10 * Math.log10(size)).to_i
|
64
|
+
if method.eql? :yw or method.eql? :mle
|
65
|
+
Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
|
66
|
+
elsif method == :ld
|
67
|
+
series = self.acvf
|
68
|
+
Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
|
69
|
+
else
|
70
|
+
raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
#=Autoregressive estimation
|
75
|
+
#Generates AR(k) series for the calling timeseries by yule walker.
|
76
|
+
#==Parameters
|
77
|
+
#* *n*: integer, (default = 1500) number of observations for AR.
|
78
|
+
#* *k*: integer, (default = 1) order of AR process.
|
79
|
+
#==Returns
|
80
|
+
#Array constituting estimated AR series.
|
81
|
+
def ar(n = 1500, k = 1)
|
82
|
+
series = Statsample::TimeSeries.arima
|
83
|
+
#series = Statsample::TimeSeries::ARIMA.new
|
84
|
+
series.yule_walker(self, n, k)
|
85
|
+
end
|
86
|
+
|
87
|
+
#=AutoCovariance
|
88
|
+
#Provides autocovariance of timeseries.
|
89
|
+
#==Parameters
|
90
|
+
#* *demean* = true; optional. Supply false if series is not to be demeaned
|
91
|
+
#* *unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
|
92
|
+
#==Returns
|
93
|
+
# Autocovariance value
|
94
|
+
def acvf(demean = true, unbiased = true)
|
95
|
+
#TODO: change parameters list in opts.merge as suggested by John
|
96
|
+
#functionality: computes autocovariance of timeseries data
|
97
|
+
#returns: array of autocovariances
|
98
|
+
|
99
|
+
if demean
|
100
|
+
demeaned_series = self - self.mean
|
101
|
+
else
|
102
|
+
demeaned_series = self
|
103
|
+
end
|
104
|
+
n = (10 * Math.log10(size)).to_i + 1
|
105
|
+
m = self.mean
|
106
|
+
if unbiased
|
107
|
+
d = Array.new(self.size, self.size)
|
108
|
+
else
|
109
|
+
d = ((1..self.size).to_a.reverse)[0..n]
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
0.upto(n - 1).map do |i|
|
114
|
+
(demeaned_series * (self.lag(i) - m)).sum / d[i]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
#=Correlation
|
119
|
+
#Gives correlation of timeseries.
|
120
|
+
def correlate(a, v, mode = 'full')
|
121
|
+
#peforms cross-correlation of two series
|
122
|
+
#multiarray.correlate2(a, v, 'full')
|
123
|
+
if a.size < v.size
|
124
|
+
raise("Should have same size!")
|
125
|
+
end
|
126
|
+
ps = a.size + v.size - 1
|
127
|
+
a_padded = Array.new(ps, 0)
|
128
|
+
a_padded[0...a.size] = a
|
129
|
+
|
130
|
+
out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
|
131
|
+
#ongoing
|
132
|
+
end
|
133
|
+
|
134
|
+
# Lags the series by k periods.
|
135
|
+
#
|
136
|
+
# The convention is to set the oldest observations (the first ones
|
137
|
+
# in the series) to nil so that the size of the lagged series is the
|
138
|
+
# same as the original.
|
139
|
+
#
|
140
|
+
# Usage:
|
141
|
+
#
|
142
|
+
# ts = (1..10).map { rand }.to_time_series
|
143
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
144
|
+
#
|
145
|
+
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
|
146
|
+
# ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
|
147
|
+
#
|
148
|
+
def lag(k = 1)
|
149
|
+
return self if k == 0
|
150
|
+
|
151
|
+
dup.tap do |lagged|
|
152
|
+
(lagged.size - 1).downto k do |i|
|
153
|
+
lagged[i] = lagged[i - k]
|
154
|
+
end
|
155
|
+
|
156
|
+
(0...k).each do |i|
|
157
|
+
lagged[i] = nil
|
158
|
+
end
|
159
|
+
lagged.set_valid_data
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
#=Diff
|
164
|
+
# Performs the difference of the series.
|
165
|
+
# Note: The first difference of series is X(t) - X(t-1)
|
166
|
+
# But, second difference of series is NOT X(t) - X(t-2)
|
167
|
+
# It is the first difference of the first difference
|
168
|
+
# => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
|
169
|
+
#==Params
|
170
|
+
#* *max_lags*: integer, (default: 1), number of differences reqd.
|
171
|
+
#==Usage
|
172
|
+
#
|
173
|
+
# ts = (1..10).map { rand }.to_ts
|
174
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
175
|
+
#
|
176
|
+
# ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
|
177
|
+
#==Returns
|
178
|
+
# Timeseries object
|
179
|
+
def diff(max_lags = 1)
|
180
|
+
ts = self
|
181
|
+
difference = []
|
182
|
+
max_lags.times do
|
183
|
+
difference = ts - ts.lag
|
184
|
+
ts = difference
|
185
|
+
end
|
186
|
+
difference
|
187
|
+
end
|
188
|
+
|
189
|
+
#=Moving Average
|
190
|
+
# Calculates the moving average of the series using the provided
|
191
|
+
# lookback argument. The lookback defaults to 10 periods.
|
192
|
+
#==Parameters
|
193
|
+
#* *n*: integer, (default = 10) - loopback argument
|
194
|
+
#
|
195
|
+
#==Usage
|
196
|
+
#
|
197
|
+
# ts = (1..100).map { rand }.to_ts
|
198
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
199
|
+
#
|
200
|
+
# # first 9 observations are nil
|
201
|
+
# ts.ma # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
|
202
|
+
#
|
203
|
+
#==Returns
|
204
|
+
#Resulting moving average timeseries object
|
205
|
+
def ma(n = 10)
|
206
|
+
return mean if n >= size
|
207
|
+
|
208
|
+
([nil] * (n - 1) + (0..(size - n)).map do |i|
|
209
|
+
self[i...(i + n)].inject(&:+) / n
|
210
|
+
end).to_time_series
|
211
|
+
end
|
212
|
+
|
213
|
+
#=Exponential Moving Average
|
214
|
+
# Calculates an exponential moving average of the series using a
|
215
|
+
# specified parameter. If wilder is false (the default) then the EMA
|
216
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
217
|
+
# Welles Wilder smoother of 1 / n.
|
218
|
+
#
|
219
|
+
# Warning for EMA usage: EMAs are unstable for small series, as they
|
220
|
+
# use a lot more than n observations to calculate. The series is stable
|
221
|
+
# if the size of the series is >= 3.45 * (n + 1)
|
222
|
+
#
|
223
|
+
#==Parameters
|
224
|
+
#* *n*: integer, (default = 10)
|
225
|
+
#* *wilder*: boolean, (default = false), if true, 1/n value is used for smoothing; if false, uses 2/(n+1) value
|
226
|
+
#
|
227
|
+
#==Usage
|
228
|
+
# ts = (1..100).map { rand }.to_ts
|
229
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
230
|
+
#
|
231
|
+
# # first 9 observations are nil
|
232
|
+
# ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
|
233
|
+
#
|
234
|
+
#==Returns
|
235
|
+
#EMA timeseries
|
236
|
+
def ema(n = 10, wilder = false)
|
237
|
+
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
238
|
+
|
239
|
+
# need to start everything from the first non-nil observation
|
240
|
+
start = self.data.index { |i| i != nil }
|
241
|
+
|
242
|
+
# first n - 1 observations are nil
|
243
|
+
base = [nil] * (start + n - 1)
|
244
|
+
|
245
|
+
# nth observation is just a moving average
|
246
|
+
base << self[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
|
247
|
+
|
248
|
+
(start + n).upto size - 1 do |i|
|
249
|
+
base << self[i] * smoother + (1 - smoother) * base.last
|
250
|
+
end
|
251
|
+
|
252
|
+
base.to_time_series
|
253
|
+
end
|
254
|
+
|
255
|
+
#=Moving Average Convergence-Divergence
|
256
|
+
# Calculates the MACD (moving average convergence-divergence) of the time
|
257
|
+
# series - this is a comparison of a fast EMA with a slow EMA.
|
258
|
+
#
|
259
|
+
#==Parameters*:
|
260
|
+
#* *fast*: integer, (default = 12) - fast component of MACD
|
261
|
+
#* *slow*: integer, (default = 26) - slow component of MACD
|
262
|
+
#* *signal*: integer, (default = 9) - signal component of MACD
|
263
|
+
#
|
264
|
+
#==Usage
|
265
|
+
# ts = (1..100).map { rand }.to_ts
|
266
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
267
|
+
# ts.macd(13)
|
268
|
+
#
|
269
|
+
#==Returns
|
270
|
+
# Array of two timeseries - comparison of fast EMA with slow and EMA with signal value
|
271
|
+
def macd(fast = 12, slow = 26, signal = 9)
|
272
|
+
series = ema(fast) - ema(slow)
|
273
|
+
[series, series.ema(signal)]
|
274
|
+
end
|
275
|
+
|
276
|
+
# Borrow the operations from Vector, but convert to time series
|
277
|
+
def + series
|
278
|
+
super.to_a.to_ts
|
279
|
+
end
|
280
|
+
|
281
|
+
def - series
|
282
|
+
super.to_a.to_ts
|
283
|
+
end
|
284
|
+
|
285
|
+
def to_s
|
286
|
+
sprintf("Time Series(type:%s, n:%d)[%s]", @type.to_s, @data.size,
|
287
|
+
@data.collect{|d| d.nil? ? "nil":d}.join(","))
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|