statsample-timeseries 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,148 @@
1
+ require 'statsample-timeseries/arima/likelihood'
2
+ module Statsample
3
+ module TimeSeries
4
+ module Arima
5
+
6
+ class KalmanFilter
7
+ include Statsample::TimeSeries
8
+ include GSL::MultiMin
9
+
10
+ #timeseries object
11
+ attr_accessor :ts
12
+ #Autoregressive order
13
+ attr_accessor :p
14
+ #Integerated part order
15
+ attr_accessor :i
16
+ #Moving average order
17
+ attr_accessor :q
18
+
19
+ # Autoregressive coefficients
20
+ attr_reader :ar
21
+ # Moving average coefficients
22
+ attr_reader :ma
23
+
24
+ #Creates a new KalmanFilter object and computes the likelihood
25
+ def initialize(ts=[].to_ts, p=0, i=0, q=0)
26
+ @ts = ts
27
+ @p = p
28
+ @i = i
29
+ @q = q
30
+ ks #call the filter
31
+ end
32
+
33
+ def to_s
34
+ sprintf("ARIMA model(p = %d, i = %d, q = %d) on series(%d elements) - [%s]",
35
+ @p, @i, @q, @ts.size, @ts.to_a.join(','))
36
+ end
37
+
38
+ # = Kalman Filter
39
+ # Function which minimizes KalmanFilter.ll iteratively for initial parameters
40
+ # == Usage
41
+ # @s = [-1.16025577,0.64758021,0.77158601,0.14989543,2.31358162,3.49213868,1.14826956,0.58169457,-0.30813868,-0.34741084,-1.41175595,0.06040081, -0.78230232,0.86734837,0.95015787,-0.49781397,0.53247330,1.56495187,0.30936619,0.09750217,1.09698829,-0.81315490,-0.79425607,-0.64568547,-1.06460320,1.24647894,0.66695937,1.50284551,1.17631218,1.64082872,1.61462736,0.06443761,-0.17583741,0.83918339,0.46610988,-0.54915270,-0.56417108,-1.27696654,0.89460084,1.49970338,0.24520493,0.26249138,-1.33744834,-0.57725961,1.55819543,1.62143157,0.44421891,-0.74000084 ,0.57866347,3.51189333,2.39135077,1.73046244,1.81783890,0.21454040,0.43520890,-1.42443856,-2.72124685,-2.51313877,-1.20243091,-1.44268002 ,-0.16777305,0.05780661,2.03533992,0.39187242,0.54987983,0.57865693,-0.96592469,-0.93278473,-0.75962671,-0.63216906,1.06776183, 0.17476059 ,0.06635860,0.94906227,2.44498583,-1.04990407,-0.88440073,-1.99838258,-1.12955558,-0.62654882,-1.36589161,-2.67456821,-0.97187696, -0.84431782 ,-0.10051809,0.54239549,1.34622861,1.25598105,0.19707759,3.29286114,3.52423499,1.69146333,-0.10150024,0.45222903,-0.01730516, -0.49828727, -1.18484684,-1.09531773,-1.17190808,0.30207662].to_ts
42
+ # @kf=Statsample::TimeSeries::ARIMA.ks(@s,1,0,0)
43
+ # #=> ks is implictly called in above operation
44
+ # @kf.ar
45
+ # #=> AR coefficients
46
+ def ks
47
+ initial = Array.new((@p+@q), 0.0)
48
+
49
+ my_f = Proc.new{ |x, params|
50
+ #In rb-gsl, params remain idle, x is varied upon
51
+ #In R code, initial parameters varied in each iteration
52
+ #my_func.set_params([(1..100).to_a.to_ts, p_value, q_value])
53
+ timeseries = params[0]
54
+ p,q = params[1], params[2]
55
+ params = x
56
+ #puts x
57
+ -Arima::KF::LogLikelihood.new(x.to_a, timeseries, p, q).ll
58
+ #KalmanFilter.ll(x.to_a, timeseries, p, q)
59
+ }
60
+ np = @p + @q
61
+ my_func = Function.alloc(my_f, np)
62
+ my_func.set_params([@ts, @p, @q])
63
+ x = GSL::Vector.alloc(initial)
64
+ ss = GSL::Vector.alloc(np)
65
+ ss.set_all(0.1)
66
+
67
+ minimizer = FMinimizer.alloc("nmsimplex", np)
68
+ minimizer.set(my_func, x, ss)
69
+ status = GSL::CONTINUE
70
+ iter = 0
71
+ while status == GSL::CONTINUE && iter < 100
72
+ iter += 1
73
+ begin
74
+ status = minimizer.iterate()
75
+ status = minimizer.test_size(1e-2)
76
+ x = minimizer.x
77
+ rescue
78
+ break
79
+ end
80
+ # printf("%5d ", iter)
81
+ # for i in 0...np do
82
+ # puts "#{x[i]}.to_f"
83
+ # #printf("%10.3e ", x[i].to_f)
84
+ # end
85
+ # printf("f() = %7.3f size = %.3f\n", minimizer.fval, minimizer.size)
86
+ end
87
+ #
88
+ @ar = (p > 0) ? x.to_a[0...p] : []
89
+ @ma = (q > 0) ? x.to_a[p...(p+q)] : []
90
+ x.to_a
91
+ end
92
+
93
+
94
+ #=Log Likelihood
95
+ #Computes Log likelihood on given parameters, ARMA order and timeseries
96
+ #==params
97
+ #* *params*: array of floats, contains phi/theta parameters
98
+ #* *timeseries*: timeseries object
99
+ #* *p*: integer, AR(p) order
100
+ #* *q*: integer, MA(q) order
101
+ #==Returns
102
+ #LogLikelihood object
103
+ #==Usage
104
+ # s = (1..100).map { rand }.to_ts
105
+ # p, q = 1, 0
106
+ # ll = KalmanFilter.log_likelihood([0.2], s, p, q)
107
+ # ll.log_likelihood
108
+ # #=> -22.66
109
+ # ll.sigma
110
+ # #=> 0.232
111
+ def self.log_likelihood(params, timeseries, p, q)
112
+ Arima::KF::LogLikelihood.new(params, timeseries, p, q)
113
+ end
114
+
115
+ #=T
116
+ #The coefficient matrix for the state vector in state equation
117
+ # It's dimensions is r+k x r+k
118
+ #==Parameters
119
+ #* *r*: integer, r is max(p, q+1), where p and q are orders of AR and MA respectively
120
+ #* *k*: integer, number of exogeneous variables in ARMA model
121
+ #* *q*: integer, The AR coefficient of ARMA model
122
+
123
+ #==References Statsmodels tsa, Durbin and Koopman Section 4.7
124
+ #def self.T(r, k, p)
125
+ # arr = Matrix.zero(r)
126
+ # params_padded = Statsample::Vector.new(Array.new(r, 0), :scale)
127
+ #
128
+ # params_padded[0...p] = params[k...(p+k)]
129
+ # intermediate_matrix = (r-1).times.map { Array.new(r, 0) }
130
+ # #appending an array filled with padded values in beginning
131
+ # intermediate_matrix[0,0] = [params_padded]
132
+ #
133
+ # #now generating column matrix for that:
134
+ # arr = Matrix.columns(intermediate_matrix)
135
+ # arr_00 = arr[0,0]
136
+ #
137
+ # #identify matrix substituition in matrix except row[0] and column[0]
138
+ # r.times do |i|
139
+ # arr[r,r] = 1
140
+ # end
141
+ # arr[0,0] = arr_00
142
+ # arr
143
+ #end
144
+
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,101 @@
1
+ module Statsample
2
+ module TimeSeries
3
+ module Arima
4
+ module KF
5
+ class LogLikelihood
6
+
7
+ #Gives log likelihood value of an ARMA(p, q) process on given parameters
8
+ attr_reader :log_likelihood
9
+
10
+ #Gives sigma value of an ARMA(p,q) process on given parameters
11
+ attr_reader :sigma
12
+
13
+ #Gives AIC(Akaike Information Criterion)
14
+ #https://www.scss.tcd.ie/Rozenn.Dahyot/ST7005/13AICBIC.pdf
15
+ attr_reader :aic
16
+
17
+ def initialize(params, timeseries, p, q)
18
+ @params = params
19
+ @timeseries = timeseries
20
+ @p = p
21
+ @q = q
22
+ ll
23
+ end
24
+
25
+ #===Log likelihood link function.
26
+ #iteratively minimized by simplex algorithm via KalmanFilter.ks
27
+ #Not meant to be used directly. Will make it private later.
28
+ def ll
29
+ params, timeseries = @params, @timeseries
30
+ p, q = @p, @q
31
+
32
+ phi = []
33
+ theta = []
34
+ phi = params[0...p] if p > 0
35
+ theta = params[(p)...(p + q)] if q > 0
36
+
37
+ [phi, theta].each do |v|
38
+ if v.size>0 and v.map(&:abs).inject(:+) > 1
39
+ return
40
+ end
41
+ end
42
+
43
+ m = [p, q].max
44
+ h = Matrix.column_vector(Array.new(m,0))
45
+ m.times do |i|
46
+ h[i,0] = phi[i] if i< p
47
+ h[i,0] = h[i,0] + theta[i] if i < q
48
+ end
49
+
50
+ t = Matrix.zero(m)
51
+ #set_column is available in utility.rb
52
+ t = t.set_column(0, phi)
53
+ if(m > 1)
54
+ t[0...(m-1), 1...m] = Matrix.I(m-1)
55
+ #chances of extra constant 0 values as unbalanced column, so:
56
+ t = Matrix.columns(t.column_vectors)
57
+ end
58
+
59
+ g = Matrix[[1]]
60
+ a_t = Matrix.column_vector(Array.new(m,0))
61
+ n = timeseries.size
62
+ z = Matrix.row_vector(Array.new(m,0))
63
+ z[0,0] = 1
64
+ p_t = Matrix.I(m)
65
+ v_t, f_t = Array.new(n,0), Array.new(n, 0)
66
+
67
+ n.times do |i|
68
+ v_t[i] = (z * a_t).map { |x| timeseries[i] - x }[0,0]
69
+
70
+ f_t[i] = (z * p_t * (z.transpose)).map { |x| x + 1 }[0,0]
71
+
72
+ k_t = ((t * p_t * z.transpose) + h).map { |x| x / f_t[i] }
73
+
74
+ a_t = (t * a_t) + (k_t * v_t[i])
75
+ l_t = t - k_t * z
76
+ j_t = h - k_t
77
+
78
+ p_t = (t * p_t * (l_t.transpose)) + (h * (j_t.transpose))
79
+ end
80
+
81
+ pot = v_t.map(&:square).zip(f_t).map { |x,y| x / y}.inject(:+)
82
+ sigma_2 = pot.to_f / n.to_f
83
+
84
+ f_t_log_sum = f_t.map { |x| Math.log(x) }.inject(:+)
85
+ @log_likelihood = -0.5 * (n*Math.log(2*Math::PI) + n*Math.log(sigma_2) + f_t_log_sum + n)
86
+
87
+ @sigma = sigma_2
88
+ @aic = -(2 * @log_likelihood - 2*(p+q+1))
89
+ #puts ("ll = #{-ll}")
90
+ return @log_likelihood
91
+ end
92
+
93
+ def to_s
94
+ sprintf("LogLikelihood(p = %d, q = %d) on params: [%s]",
95
+ @p, @q, @params.join(', '))
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,291 @@
1
+ require 'statsample-timeseries/timeseries/pacf'
2
+ module Statsample::TimeSeriesShorthands
3
+ # Creates a new Statsample::TimeSeries object
4
+ # Argument should be equal to TimeSeries.new
5
+ def to_time_series(*args)
6
+ Statsample::TimeSeries::Series.new(self, :scale, *args)
7
+ end
8
+
9
+ alias :to_ts :to_time_series
10
+ end
11
+
12
+ class Array
13
+ include Statsample::TimeSeriesShorthands
14
+ end
15
+
16
+ module Statsample
17
+ module TimeSeries
18
+ # Collection of data indexed by time.
19
+ # The order goes from earliest to latest.
20
+ class Series < Statsample::Vector
21
+ include Statsample::TimeSeries::Pacf
22
+ # Calculates the autocorrelation coefficients of the series.
23
+ #
24
+ # The first element is always 1, since that is the correlation
25
+ # of the series with itself.
26
+ #
27
+ # Usage:
28
+ #
29
+ # ts = (1..100).map { rand }.to_time_series
30
+ #
31
+ # ts.acf # => array with first 21 autocorrelations
32
+ # ts.acf 3 # => array with first 3 autocorrelations
33
+ #
34
+ def acf(max_lags = nil)
35
+ max_lags ||= (10 * Math.log10(size)).to_i
36
+
37
+ (0..max_lags).map do |i|
38
+ if i == 0
39
+ 1.0
40
+ else
41
+ m = self.mean
42
+
43
+ # can't use Pearson coefficient since the mean for the lagged series should
44
+ # be the same as the regular series
45
+ ((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
46
+ end
47
+ end
48
+ end
49
+
50
+ #=Partial Autocorrelation
51
+ #Generates partial autocorrelation series for a timeseries
52
+ #==Parameters
53
+ #* *max_lags*: integer, optional - provide number of lags
54
+ #* *method*: string. Default: 'yw'.
55
+ # * *yw*: For yule-walker algorithm unbiased approach
56
+ # * *mle*: For Maximum likelihood algorithm approach
57
+ # * *ld*: Forr Levinson-Durbin recursive approach
58
+ #==Returns
59
+ # array of pacf
60
+ def pacf(max_lags = nil, method = :yw)
61
+
62
+ method = method.downcase.to_sym
63
+ max_lags ||= (10 * Math.log10(size)).to_i
64
+ if method.eql? :yw or method.eql? :mle
65
+ Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
66
+ elsif method == :ld
67
+ series = self.acvf
68
+ Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
69
+ else
70
+ raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
71
+ end
72
+ end
73
+
74
+ #=Autoregressive estimation
75
+ #Generates AR(k) series for the calling timeseries by yule walker.
76
+ #==Parameters
77
+ #* *n*: integer, (default = 1500) number of observations for AR.
78
+ #* *k*: integer, (default = 1) order of AR process.
79
+ #==Returns
80
+ #Array constituting estimated AR series.
81
+ def ar(n = 1500, k = 1)
82
+ series = Statsample::TimeSeries.arima
83
+ #series = Statsample::TimeSeries::ARIMA.new
84
+ series.yule_walker(self, n, k)
85
+ end
86
+
87
+ #=AutoCovariance
88
+ #Provides autocovariance of timeseries.
89
+ #==Parameters
90
+ #* *demean* = true; optional. Supply false if series is not to be demeaned
91
+ #* *unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
92
+ #==Returns
93
+ # Autocovariance value
94
+ def acvf(demean = true, unbiased = true)
95
+ #TODO: change parameters list in opts.merge as suggested by John
96
+ #functionality: computes autocovariance of timeseries data
97
+ #returns: array of autocovariances
98
+
99
+ if demean
100
+ demeaned_series = self - self.mean
101
+ else
102
+ demeaned_series = self
103
+ end
104
+ n = (10 * Math.log10(size)).to_i + 1
105
+ m = self.mean
106
+ if unbiased
107
+ d = Array.new(self.size, self.size)
108
+ else
109
+ d = ((1..self.size).to_a.reverse)[0..n]
110
+ end
111
+
112
+
113
+ 0.upto(n - 1).map do |i|
114
+ (demeaned_series * (self.lag(i) - m)).sum / d[i]
115
+ end
116
+ end
117
+
118
+ #=Correlation
119
+ #Gives correlation of timeseries.
120
+ def correlate(a, v, mode = 'full')
121
+ #peforms cross-correlation of two series
122
+ #multiarray.correlate2(a, v, 'full')
123
+ if a.size < v.size
124
+ raise("Should have same size!")
125
+ end
126
+ ps = a.size + v.size - 1
127
+ a_padded = Array.new(ps, 0)
128
+ a_padded[0...a.size] = a
129
+
130
+ out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
131
+ #ongoing
132
+ end
133
+
134
+ # Lags the series by k periods.
135
+ #
136
+ # The convention is to set the oldest observations (the first ones
137
+ # in the series) to nil so that the size of the lagged series is the
138
+ # same as the original.
139
+ #
140
+ # Usage:
141
+ #
142
+ # ts = (1..10).map { rand }.to_time_series
143
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
144
+ #
145
+ # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
146
+ # ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
147
+ #
148
+ def lag(k = 1)
149
+ return self if k == 0
150
+
151
+ dup.tap do |lagged|
152
+ (lagged.size - 1).downto k do |i|
153
+ lagged[i] = lagged[i - k]
154
+ end
155
+
156
+ (0...k).each do |i|
157
+ lagged[i] = nil
158
+ end
159
+ lagged.set_valid_data
160
+ end
161
+ end
162
+
163
+ #=Diff
164
+ # Performs the difference of the series.
165
+ # Note: The first difference of series is X(t) - X(t-1)
166
+ # But, second difference of series is NOT X(t) - X(t-2)
167
+ # It is the first difference of the first difference
168
+ # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
169
+ #==Params
170
+ #* *max_lags*: integer, (default: 1), number of differences reqd.
171
+ #==Usage
172
+ #
173
+ # ts = (1..10).map { rand }.to_ts
174
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
175
+ #
176
+ # ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
177
+ #==Returns
178
+ # Timeseries object
179
+ def diff(max_lags = 1)
180
+ ts = self
181
+ difference = []
182
+ max_lags.times do
183
+ difference = ts - ts.lag
184
+ ts = difference
185
+ end
186
+ difference
187
+ end
188
+
189
+ #=Moving Average
190
+ # Calculates the moving average of the series using the provided
191
+ # lookback argument. The lookback defaults to 10 periods.
192
+ #==Parameters
193
+ #* *n*: integer, (default = 10) - loopback argument
194
+ #
195
+ #==Usage
196
+ #
197
+ # ts = (1..100).map { rand }.to_ts
198
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
199
+ #
200
+ # # first 9 observations are nil
201
+ # ts.ma # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
202
+ #
203
+ #==Returns
204
+ #Resulting moving average timeseries object
205
+ def ma(n = 10)
206
+ return mean if n >= size
207
+
208
+ ([nil] * (n - 1) + (0..(size - n)).map do |i|
209
+ self[i...(i + n)].inject(&:+) / n
210
+ end).to_time_series
211
+ end
212
+
213
+ #=Exponential Moving Average
214
+ # Calculates an exponential moving average of the series using a
215
+ # specified parameter. If wilder is false (the default) then the EMA
216
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
217
+ # Welles Wilder smoother of 1 / n.
218
+ #
219
+ # Warning for EMA usage: EMAs are unstable for small series, as they
220
+ # use a lot more than n observations to calculate. The series is stable
221
+ # if the size of the series is >= 3.45 * (n + 1)
222
+ #
223
+ #==Parameters
224
+ #* *n*: integer, (default = 10)
225
+ #* *wilder*: boolean, (default = false), if true, 1/n value is used for smoothing; if false, uses 2/(n+1) value
226
+ #
227
+ #==Usage
228
+ # ts = (1..100).map { rand }.to_ts
229
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
230
+ #
231
+ # # first 9 observations are nil
232
+ # ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
233
+ #
234
+ #==Returns
235
+ #EMA timeseries
236
+ def ema(n = 10, wilder = false)
237
+ smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
238
+
239
+ # need to start everything from the first non-nil observation
240
+ start = self.data.index { |i| i != nil }
241
+
242
+ # first n - 1 observations are nil
243
+ base = [nil] * (start + n - 1)
244
+
245
+ # nth observation is just a moving average
246
+ base << self[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
247
+
248
+ (start + n).upto size - 1 do |i|
249
+ base << self[i] * smoother + (1 - smoother) * base.last
250
+ end
251
+
252
+ base.to_time_series
253
+ end
254
+
255
+ #=Moving Average Convergence-Divergence
256
+ # Calculates the MACD (moving average convergence-divergence) of the time
257
+ # series - this is a comparison of a fast EMA with a slow EMA.
258
+ #
259
+ #==Parameters*:
260
+ #* *fast*: integer, (default = 12) - fast component of MACD
261
+ #* *slow*: integer, (default = 26) - slow component of MACD
262
+ #* *signal*: integer, (default = 9) - signal component of MACD
263
+ #
264
+ #==Usage
265
+ # ts = (1..100).map { rand }.to_ts
266
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
267
+ # ts.macd(13)
268
+ #
269
+ #==Returns
270
+ # Array of two timeseries - comparison of fast EMA with slow and EMA with signal value
271
+ def macd(fast = 12, slow = 26, signal = 9)
272
+ series = ema(fast) - ema(slow)
273
+ [series, series.ema(signal)]
274
+ end
275
+
276
+ # Borrow the operations from Vector, but convert to time series
277
+ def + series
278
+ super.to_a.to_ts
279
+ end
280
+
281
+ def - series
282
+ super.to_a.to_ts
283
+ end
284
+
285
+ def to_s
286
+ sprintf("Time Series(type:%s, n:%d)[%s]", @type.to_s, @data.size,
287
+ @data.collect{|d| d.nil? ? "nil":d}.join(","))
288
+ end
289
+ end
290
+ end
291
+ end