statsample-timeseries 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ require 'statsample-timeseries/arima/likelihood'
2
+ module Statsample
3
+ module TimeSeries
4
+ module Arima
5
+
6
+ class KalmanFilter
7
+ include Statsample::TimeSeries
8
+ include GSL::MultiMin
9
+
10
+ #timeseries object
11
+ attr_accessor :ts
12
+ #Autoregressive order
13
+ attr_accessor :p
14
+ #Integerated part order
15
+ attr_accessor :i
16
+ #Moving average order
17
+ attr_accessor :q
18
+
19
+ # Autoregressive coefficients
20
+ attr_reader :ar
21
+ # Moving average coefficients
22
+ attr_reader :ma
23
+
24
+ #Creates a new KalmanFilter object and computes the likelihood
25
+ def initialize(ts=[].to_ts, p=0, i=0, q=0)
26
+ @ts = ts
27
+ @p = p
28
+ @i = i
29
+ @q = q
30
+ ks #call the filter
31
+ end
32
+
33
+ def to_s
34
+ sprintf("ARIMA model(p = %d, i = %d, q = %d) on series(%d elements) - [%s]",
35
+ @p, @i, @q, @ts.size, @ts.to_a.join(','))
36
+ end
37
+
38
+ # = Kalman Filter
39
+ # Function which minimizes KalmanFilter.ll iteratively for initial parameters
40
+ # == Usage
41
+ # @s = [-1.16025577,0.64758021,0.77158601,0.14989543,2.31358162,3.49213868,1.14826956,0.58169457,-0.30813868,-0.34741084,-1.41175595,0.06040081, -0.78230232,0.86734837,0.95015787,-0.49781397,0.53247330,1.56495187,0.30936619,0.09750217,1.09698829,-0.81315490,-0.79425607,-0.64568547,-1.06460320,1.24647894,0.66695937,1.50284551,1.17631218,1.64082872,1.61462736,0.06443761,-0.17583741,0.83918339,0.46610988,-0.54915270,-0.56417108,-1.27696654,0.89460084,1.49970338,0.24520493,0.26249138,-1.33744834,-0.57725961,1.55819543,1.62143157,0.44421891,-0.74000084 ,0.57866347,3.51189333,2.39135077,1.73046244,1.81783890,0.21454040,0.43520890,-1.42443856,-2.72124685,-2.51313877,-1.20243091,-1.44268002 ,-0.16777305,0.05780661,2.03533992,0.39187242,0.54987983,0.57865693,-0.96592469,-0.93278473,-0.75962671,-0.63216906,1.06776183, 0.17476059 ,0.06635860,0.94906227,2.44498583,-1.04990407,-0.88440073,-1.99838258,-1.12955558,-0.62654882,-1.36589161,-2.67456821,-0.97187696, -0.84431782 ,-0.10051809,0.54239549,1.34622861,1.25598105,0.19707759,3.29286114,3.52423499,1.69146333,-0.10150024,0.45222903,-0.01730516, -0.49828727, -1.18484684,-1.09531773,-1.17190808,0.30207662].to_ts
42
+ # @kf=Statsample::TimeSeries::ARIMA.ks(@s,1,0,0)
43
+ # #=> ks is implictly called in above operation
44
+ # @kf.ar
45
+ # #=> AR coefficients
46
+ def ks
47
+ initial = Array.new((@p+@q), 0.0)
48
+
49
+ my_f = Proc.new{ |x, params|
50
+ #In rb-gsl, params remain idle, x is varied upon
51
+ #In R code, initial parameters varied in each iteration
52
+ #my_func.set_params([(1..100).to_a.to_ts, p_value, q_value])
53
+ timeseries = params[0]
54
+ p,q = params[1], params[2]
55
+ params = x
56
+ #puts x
57
+ -Arima::KF::LogLikelihood.new(x.to_a, timeseries, p, q).ll
58
+ #KalmanFilter.ll(x.to_a, timeseries, p, q)
59
+ }
60
+ np = @p + @q
61
+ my_func = Function.alloc(my_f, np)
62
+ my_func.set_params([@ts, @p, @q])
63
+ x = GSL::Vector.alloc(initial)
64
+ ss = GSL::Vector.alloc(np)
65
+ ss.set_all(0.1)
66
+
67
+ minimizer = FMinimizer.alloc("nmsimplex", np)
68
+ minimizer.set(my_func, x, ss)
69
+ status = GSL::CONTINUE
70
+ iter = 0
71
+ while status == GSL::CONTINUE && iter < 100
72
+ iter += 1
73
+ begin
74
+ status = minimizer.iterate()
75
+ status = minimizer.test_size(1e-2)
76
+ x = minimizer.x
77
+ rescue
78
+ break
79
+ end
80
+ # printf("%5d ", iter)
81
+ # for i in 0...np do
82
+ # puts "#{x[i]}.to_f"
83
+ # #printf("%10.3e ", x[i].to_f)
84
+ # end
85
+ # printf("f() = %7.3f size = %.3f\n", minimizer.fval, minimizer.size)
86
+ end
87
+ #
88
+ @ar = (p > 0) ? x.to_a[0...p] : []
89
+ @ma = (q > 0) ? x.to_a[p...(p+q)] : []
90
+ x.to_a
91
+ end
92
+
93
+
94
+ #=Log Likelihood
95
+ #Computes Log likelihood on given parameters, ARMA order and timeseries
96
+ #==params
97
+ #* *params*: array of floats, contains phi/theta parameters
98
+ #* *timeseries*: timeseries object
99
+ #* *p*: integer, AR(p) order
100
+ #* *q*: integer, MA(q) order
101
+ #==Returns
102
+ #LogLikelihood object
103
+ #==Usage
104
+ # s = (1..100).map { rand }.to_ts
105
+ # p, q = 1, 0
106
+ # ll = KalmanFilter.log_likelihood([0.2], s, p, q)
107
+ # ll.log_likelihood
108
+ # #=> -22.66
109
+ # ll.sigma
110
+ # #=> 0.232
111
+ def self.log_likelihood(params, timeseries, p, q)
112
+ Arima::KF::LogLikelihood.new(params, timeseries, p, q)
113
+ end
114
+
115
+ #=T
116
+ #The coefficient matrix for the state vector in state equation
117
+ # It's dimensions is r+k x r+k
118
+ #==Parameters
119
+ #* *r*: integer, r is max(p, q+1), where p and q are orders of AR and MA respectively
120
+ #* *k*: integer, number of exogeneous variables in ARMA model
121
+ #* *q*: integer, The AR coefficient of ARMA model
122
+
123
+ #==References Statsmodels tsa, Durbin and Koopman Section 4.7
124
+ #def self.T(r, k, p)
125
+ # arr = Matrix.zero(r)
126
+ # params_padded = Statsample::Vector.new(Array.new(r, 0), :scale)
127
+ #
128
+ # params_padded[0...p] = params[k...(p+k)]
129
+ # intermediate_matrix = (r-1).times.map { Array.new(r, 0) }
130
+ # #appending an array filled with padded values in beginning
131
+ # intermediate_matrix[0,0] = [params_padded]
132
+ #
133
+ # #now generating column matrix for that:
134
+ # arr = Matrix.columns(intermediate_matrix)
135
+ # arr_00 = arr[0,0]
136
+ #
137
+ # #identify matrix substituition in matrix except row[0] and column[0]
138
+ # r.times do |i|
139
+ # arr[r,r] = 1
140
+ # end
141
+ # arr[0,0] = arr_00
142
+ # arr
143
+ #end
144
+
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,101 @@
1
+ module Statsample
2
+ module TimeSeries
3
+ module Arima
4
+ module KF
5
+ class LogLikelihood
6
+
7
+ #Gives log likelihood value of an ARMA(p, q) process on given parameters
8
+ attr_reader :log_likelihood
9
+
10
+ #Gives sigma value of an ARMA(p,q) process on given parameters
11
+ attr_reader :sigma
12
+
13
+ #Gives AIC(Akaike Information Criterion)
14
+ #https://www.scss.tcd.ie/Rozenn.Dahyot/ST7005/13AICBIC.pdf
15
+ attr_reader :aic
16
+
17
+ def initialize(params, timeseries, p, q)
18
+ @params = params
19
+ @timeseries = timeseries
20
+ @p = p
21
+ @q = q
22
+ ll
23
+ end
24
+
25
+ #===Log likelihood link function.
26
+ #iteratively minimized by simplex algorithm via KalmanFilter.ks
27
+ #Not meant to be used directly. Will make it private later.
28
+ def ll
29
+ params, timeseries = @params, @timeseries
30
+ p, q = @p, @q
31
+
32
+ phi = []
33
+ theta = []
34
+ phi = params[0...p] if p > 0
35
+ theta = params[(p)...(p + q)] if q > 0
36
+
37
+ [phi, theta].each do |v|
38
+ if v.size>0 and v.map(&:abs).inject(:+) > 1
39
+ return
40
+ end
41
+ end
42
+
43
+ m = [p, q].max
44
+ h = Matrix.column_vector(Array.new(m,0))
45
+ m.times do |i|
46
+ h[i,0] = phi[i] if i< p
47
+ h[i,0] = h[i,0] + theta[i] if i < q
48
+ end
49
+
50
+ t = Matrix.zero(m)
51
+ #set_column is available in utility.rb
52
+ t = t.set_column(0, phi)
53
+ if(m > 1)
54
+ t[0...(m-1), 1...m] = Matrix.I(m-1)
55
+ #chances of extra constant 0 values as unbalanced column, so:
56
+ t = Matrix.columns(t.column_vectors)
57
+ end
58
+
59
+ g = Matrix[[1]]
60
+ a_t = Matrix.column_vector(Array.new(m,0))
61
+ n = timeseries.size
62
+ z = Matrix.row_vector(Array.new(m,0))
63
+ z[0,0] = 1
64
+ p_t = Matrix.I(m)
65
+ v_t, f_t = Array.new(n,0), Array.new(n, 0)
66
+
67
+ n.times do |i|
68
+ v_t[i] = (z * a_t).map { |x| timeseries[i] - x }[0,0]
69
+
70
+ f_t[i] = (z * p_t * (z.transpose)).map { |x| x + 1 }[0,0]
71
+
72
+ k_t = ((t * p_t * z.transpose) + h).map { |x| x / f_t[i] }
73
+
74
+ a_t = (t * a_t) + (k_t * v_t[i])
75
+ l_t = t - k_t * z
76
+ j_t = h - k_t
77
+
78
+ p_t = (t * p_t * (l_t.transpose)) + (h * (j_t.transpose))
79
+ end
80
+
81
+ pot = v_t.map(&:square).zip(f_t).map { |x,y| x / y}.inject(:+)
82
+ sigma_2 = pot.to_f / n.to_f
83
+
84
+ f_t_log_sum = f_t.map { |x| Math.log(x) }.inject(:+)
85
+ @log_likelihood = -0.5 * (n*Math.log(2*Math::PI) + n*Math.log(sigma_2) + f_t_log_sum + n)
86
+
87
+ @sigma = sigma_2
88
+ @aic = -(2 * @log_likelihood - 2*(p+q+1))
89
+ #puts ("ll = #{-ll}")
90
+ return @log_likelihood
91
+ end
92
+
93
+ def to_s
94
+ sprintf("LogLikelihood(p = %d, q = %d) on params: [%s]",
95
+ @p, @q, @params.join(', '))
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,291 @@
1
+ require 'statsample-timeseries/timeseries/pacf'
2
+ module Statsample::TimeSeriesShorthands
3
+ # Creates a new Statsample::TimeSeries object
4
+ # Argument should be equal to TimeSeries.new
5
+ def to_time_series(*args)
6
+ Statsample::TimeSeries::Series.new(self, :scale, *args)
7
+ end
8
+
9
+ alias :to_ts :to_time_series
10
+ end
11
+
12
+ class Array
13
+ include Statsample::TimeSeriesShorthands
14
+ end
15
+
16
+ module Statsample
17
+ module TimeSeries
18
+ # Collection of data indexed by time.
19
+ # The order goes from earliest to latest.
20
+ class Series < Statsample::Vector
21
+ include Statsample::TimeSeries::Pacf
22
+ # Calculates the autocorrelation coefficients of the series.
23
+ #
24
+ # The first element is always 1, since that is the correlation
25
+ # of the series with itself.
26
+ #
27
+ # Usage:
28
+ #
29
+ # ts = (1..100).map { rand }.to_time_series
30
+ #
31
+ # ts.acf # => array with first 21 autocorrelations
32
+ # ts.acf 3 # => array with first 3 autocorrelations
33
+ #
34
+ def acf(max_lags = nil)
35
+ max_lags ||= (10 * Math.log10(size)).to_i
36
+
37
+ (0..max_lags).map do |i|
38
+ if i == 0
39
+ 1.0
40
+ else
41
+ m = self.mean
42
+
43
+ # can't use Pearson coefficient since the mean for the lagged series should
44
+ # be the same as the regular series
45
+ ((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
46
+ end
47
+ end
48
+ end
49
+
50
+ #=Partial Autocorrelation
51
+ #Generates partial autocorrelation series for a timeseries
52
+ #==Parameters
53
+ #* *max_lags*: integer, optional - provide number of lags
54
+ #* *method*: string. Default: 'yw'.
55
+ # * *yw*: For yule-walker algorithm unbiased approach
56
+ # * *mle*: For Maximum likelihood algorithm approach
57
+ # * *ld*: Forr Levinson-Durbin recursive approach
58
+ #==Returns
59
+ # array of pacf
60
+ def pacf(max_lags = nil, method = :yw)
61
+
62
+ method = method.downcase.to_sym
63
+ max_lags ||= (10 * Math.log10(size)).to_i
64
+ if method.eql? :yw or method.eql? :mle
65
+ Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
66
+ elsif method == :ld
67
+ series = self.acvf
68
+ Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
69
+ else
70
+ raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
71
+ end
72
+ end
73
+
74
+ #=Autoregressive estimation
75
+ #Generates AR(k) series for the calling timeseries by yule walker.
76
+ #==Parameters
77
+ #* *n*: integer, (default = 1500) number of observations for AR.
78
+ #* *k*: integer, (default = 1) order of AR process.
79
+ #==Returns
80
+ #Array constituting estimated AR series.
81
+ def ar(n = 1500, k = 1)
82
+ series = Statsample::TimeSeries.arima
83
+ #series = Statsample::TimeSeries::ARIMA.new
84
+ series.yule_walker(self, n, k)
85
+ end
86
+
87
+ #=AutoCovariance
88
+ #Provides autocovariance of timeseries.
89
+ #==Parameters
90
+ #* *demean* = true; optional. Supply false if series is not to be demeaned
91
+ #* *unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
92
+ #==Returns
93
+ # Autocovariance value
94
+ def acvf(demean = true, unbiased = true)
95
+ #TODO: change parameters list in opts.merge as suggested by John
96
+ #functionality: computes autocovariance of timeseries data
97
+ #returns: array of autocovariances
98
+
99
+ if demean
100
+ demeaned_series = self - self.mean
101
+ else
102
+ demeaned_series = self
103
+ end
104
+ n = (10 * Math.log10(size)).to_i + 1
105
+ m = self.mean
106
+ if unbiased
107
+ d = Array.new(self.size, self.size)
108
+ else
109
+ d = ((1..self.size).to_a.reverse)[0..n]
110
+ end
111
+
112
+
113
+ 0.upto(n - 1).map do |i|
114
+ (demeaned_series * (self.lag(i) - m)).sum / d[i]
115
+ end
116
+ end
117
+
118
+ #=Correlation
119
+ #Gives correlation of timeseries.
120
+ def correlate(a, v, mode = 'full')
121
+ #peforms cross-correlation of two series
122
+ #multiarray.correlate2(a, v, 'full')
123
+ if a.size < v.size
124
+ raise("Should have same size!")
125
+ end
126
+ ps = a.size + v.size - 1
127
+ a_padded = Array.new(ps, 0)
128
+ a_padded[0...a.size] = a
129
+
130
+ out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
131
+ #ongoing
132
+ end
133
+
134
+ # Lags the series by k periods.
135
+ #
136
+ # The convention is to set the oldest observations (the first ones
137
+ # in the series) to nil so that the size of the lagged series is the
138
+ # same as the original.
139
+ #
140
+ # Usage:
141
+ #
142
+ # ts = (1..10).map { rand }.to_time_series
143
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
144
+ #
145
+ # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
146
+ # ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
147
+ #
148
+ def lag(k = 1)
149
+ return self if k == 0
150
+
151
+ dup.tap do |lagged|
152
+ (lagged.size - 1).downto k do |i|
153
+ lagged[i] = lagged[i - k]
154
+ end
155
+
156
+ (0...k).each do |i|
157
+ lagged[i] = nil
158
+ end
159
+ lagged.set_valid_data
160
+ end
161
+ end
162
+
163
+ #=Diff
164
+ # Performs the difference of the series.
165
+ # Note: The first difference of series is X(t) - X(t-1)
166
+ # But, second difference of series is NOT X(t) - X(t-2)
167
+ # It is the first difference of the first difference
168
+ # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
169
+ #==Params
170
+ #* *max_lags*: integer, (default: 1), number of differences reqd.
171
+ #==Usage
172
+ #
173
+ # ts = (1..10).map { rand }.to_ts
174
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
175
+ #
176
+ # ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
177
+ #==Returns
178
+ # Timeseries object
179
+ def diff(max_lags = 1)
180
+ ts = self
181
+ difference = []
182
+ max_lags.times do
183
+ difference = ts - ts.lag
184
+ ts = difference
185
+ end
186
+ difference
187
+ end
188
+
189
+ #=Moving Average
190
+ # Calculates the moving average of the series using the provided
191
+ # lookback argument. The lookback defaults to 10 periods.
192
+ #==Parameters
193
+ #* *n*: integer, (default = 10) - loopback argument
194
+ #
195
+ #==Usage
196
+ #
197
+ # ts = (1..100).map { rand }.to_ts
198
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
199
+ #
200
+ # # first 9 observations are nil
201
+ # ts.ma # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
202
+ #
203
+ #==Returns
204
+ #Resulting moving average timeseries object
205
+ def ma(n = 10)
206
+ return mean if n >= size
207
+
208
+ ([nil] * (n - 1) + (0..(size - n)).map do |i|
209
+ self[i...(i + n)].inject(&:+) / n
210
+ end).to_time_series
211
+ end
212
+
213
+ #=Exponential Moving Average
214
+ # Calculates an exponential moving average of the series using a
215
+ # specified parameter. If wilder is false (the default) then the EMA
216
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
217
+ # Welles Wilder smoother of 1 / n.
218
+ #
219
+ # Warning for EMA usage: EMAs are unstable for small series, as they
220
+ # use a lot more than n observations to calculate. The series is stable
221
+ # if the size of the series is >= 3.45 * (n + 1)
222
+ #
223
+ #==Parameters
224
+ #* *n*: integer, (default = 10)
225
+ #* *wilder*: boolean, (default = false), if true, 1/n value is used for smoothing; if false, uses 2/(n+1) value
226
+ #
227
+ #==Usage
228
+ # ts = (1..100).map { rand }.to_ts
229
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
230
+ #
231
+ # # first 9 observations are nil
232
+ # ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
233
+ #
234
+ #==Returns
235
+ #EMA timeseries
236
+ def ema(n = 10, wilder = false)
237
+ smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
238
+
239
+ # need to start everything from the first non-nil observation
240
+ start = self.data.index { |i| i != nil }
241
+
242
+ # first n - 1 observations are nil
243
+ base = [nil] * (start + n - 1)
244
+
245
+ # nth observation is just a moving average
246
+ base << self[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
247
+
248
+ (start + n).upto size - 1 do |i|
249
+ base << self[i] * smoother + (1 - smoother) * base.last
250
+ end
251
+
252
+ base.to_time_series
253
+ end
254
+
255
+ #=Moving Average Convergence-Divergence
256
+ # Calculates the MACD (moving average convergence-divergence) of the time
257
+ # series - this is a comparison of a fast EMA with a slow EMA.
258
+ #
259
+ #==Parameters*:
260
+ #* *fast*: integer, (default = 12) - fast component of MACD
261
+ #* *slow*: integer, (default = 26) - slow component of MACD
262
+ #* *signal*: integer, (default = 9) - signal component of MACD
263
+ #
264
+ #==Usage
265
+ # ts = (1..100).map { rand }.to_ts
266
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
267
+ # ts.macd(13)
268
+ #
269
+ #==Returns
270
+ # Array of two timeseries - comparison of fast EMA with slow and EMA with signal value
271
+ def macd(fast = 12, slow = 26, signal = 9)
272
+ series = ema(fast) - ema(slow)
273
+ [series, series.ema(signal)]
274
+ end
275
+
276
+ # Borrow the operations from Vector, but convert to time series
277
+ def + series
278
+ super.to_a.to_ts
279
+ end
280
+
281
+ def - series
282
+ super.to_a.to_ts
283
+ end
284
+
285
+ def to_s
286
+ sprintf("Time Series(type:%s, n:%d)[%s]", @type.to_s, @data.size,
287
+ @data.collect{|d| d.nil? ? "nil":d}.join(","))
288
+ end
289
+ end
290
+ end
291
+ end