bio-statsample-timeseries 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +3 -3
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/bio-statsample-timeseries.rb +1 -1
- data/lib/bio-statsample-timeseries/arima.rb +162 -9
- data/lib/bio-statsample-timeseries/timeseries.rb +125 -13
- data/lib/bio-statsample-timeseries/timeseries/pacf.rb +74 -15
- data/lib/bio-statsample-timeseries/utility.rb +118 -0
- data/test/test_arima_simulators.rb +18 -8
- data/test/test_matrix.rb +92 -0
- data/test/test_tseries.rb +1 -1
- metadata +6 -5
- data/lib/bio-statsample-timeseries/statsample-timeseries.rb +0 -2
data/README.rdoc
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
= bio-statsample-timeseries
|
2
2
|
|
3
3
|
{<img
|
4
|
-
src="https://secure.travis-ci.org/
|
5
|
-
/>}[http://travis-ci.org/#!/
|
4
|
+
src="https://secure.travis-ci.org/AnkurGel/bioruby-statsample-timeseries.png"
|
5
|
+
/>}[http://travis-ci.org/#!/AnkurGel/bioruby-statsample-timeseries]
|
6
6
|
|
7
7
|
Full description goes here
|
8
8
|
|
@@ -27,7 +27,7 @@ the source tree.
|
|
27
27
|
|
28
28
|
Information on the source tree, documentation, issues and how to contribute, see
|
29
29
|
|
30
|
-
http://github.com/
|
30
|
+
http://github.com/AnkurGel/bioruby-statsample-timeseries
|
31
31
|
|
32
32
|
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
33
33
|
|
data/Rakefile
CHANGED
@@ -15,7 +15,7 @@ require 'jeweler'
|
|
15
15
|
Jeweler::Tasks.new do |gem|
|
16
16
|
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
17
|
gem.name = "bio-statsample-timeseries"
|
18
|
-
gem.homepage = "http://github.com/
|
18
|
+
gem.homepage = "http://github.com/AnkurGel/bioruby-statsample-timeseries"
|
19
19
|
gem.license = "MIT"
|
20
20
|
gem.summary = %Q{TimeSeries modules for Statsample}
|
21
21
|
gem.description = %Q{Statsample-timeseries is an extension to Statsample. It incorporates helpful timeseries functions and modules like ARMA, ARIMA, acf, pacf, lags etc.}
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.2
|
@@ -8,9 +8,9 @@
|
|
8
8
|
#
|
9
9
|
# In this file only require other files. Avoid other source code.
|
10
10
|
|
11
|
-
require 'bio-statsample-timeseries/statsample-timeseries.rb'
|
12
11
|
require 'statsample'
|
13
12
|
require_relative 'bio-statsample-timeseries/timeseries.rb'
|
14
13
|
require_relative 'bio-statsample-timeseries/arima.rb'
|
14
|
+
require_relative 'bio-statsample-timeseries/utility.rb'
|
15
15
|
|
16
16
|
|
@@ -1,15 +1,29 @@
|
|
1
1
|
#require 'debugger'
|
2
2
|
module Statsample
|
3
|
-
module
|
3
|
+
module TimeSeries
|
4
|
+
|
5
|
+
def self.arima
|
6
|
+
#not passing (ds,p,i,q) elements for now
|
7
|
+
#will do that once #arima is ready for all modelling
|
8
|
+
Statsample::TimeSeries::ARIMA.new
|
9
|
+
end
|
10
|
+
|
4
11
|
class ARIMA < Statsample::Vector
|
5
12
|
include Statsample::TimeSeries
|
6
|
-
|
13
|
+
# SUGGESTION: We could use an API similar to R
|
14
|
+
# like
|
15
|
+
# ar_obj=Statsample::TimeSeries.arima(ds,p,i,q)
|
16
|
+
# which calls
|
17
|
+
# Statsample::TimeSeries::Arima.new(ds,p,i,q)
|
7
18
|
def arima(ds, p, i, q)
|
8
19
|
#prototype
|
20
|
+
# ISSUE: We should differenciate now, if i>0.
|
21
|
+
# The result should be send to next step
|
9
22
|
if q.zero?
|
10
23
|
self.ar(p)
|
11
24
|
elsif p.zero?
|
12
25
|
self.ma(p)
|
26
|
+
# ISSUE-> ELSE -> simultaneuos estimation of MA and AR parameters
|
13
27
|
end
|
14
28
|
end
|
15
29
|
|
@@ -20,15 +34,49 @@ module Statsample
|
|
20
34
|
#or Burg's algorithm(more efficient)
|
21
35
|
end
|
22
36
|
|
23
|
-
|
24
|
-
#To be implemented
|
25
|
-
end
|
26
|
-
|
37
|
+
#Converts a linear array into a vector
|
27
38
|
def create_vector(arr)
|
28
39
|
Statsample::Vector.new(arr, :scale)
|
29
40
|
end
|
30
41
|
|
31
|
-
|
42
|
+
|
43
|
+
def yule_walker(ts, n, k)
|
44
|
+
#parameters: timeseries, no of observations, order
|
45
|
+
#returns: simulated autoregression with phi parameters and sigma
|
46
|
+
phi, sigma = Pacf::Pacf.yule_walker(ts, k)
|
47
|
+
return phi, sigma
|
48
|
+
#return ar_sim(n, phi, sigma)
|
49
|
+
end
|
50
|
+
|
51
|
+
def levinson_durbin(ts, n, k)
|
52
|
+
#parameters;
|
53
|
+
#ts: timseries against which to generate phi coefficients
|
54
|
+
#n: number of observations for simulation
|
55
|
+
#k: order of AR
|
56
|
+
intermediate = Pacf::Pacf.levinson_durbin(ts, k)
|
57
|
+
phi, sigma = intermediate[1], intermediate[0]
|
58
|
+
return phi, sigma
|
59
|
+
#return ar_sim(n, phi, sigma)
|
60
|
+
end
|
61
|
+
|
62
|
+
#=Autoregressive Simulator
|
63
|
+
#Simulates an autoregressive AR(p) model with specified number of
|
64
|
+
#observations(n), with phi number of values for order p and sigma.
|
65
|
+
#
|
66
|
+
#*Analysis*: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
|
67
|
+
#
|
68
|
+
#*Parameters*:
|
69
|
+
#-_n_::integer, number of observations
|
70
|
+
#-_phi_::array of phi values, e.g: [0.35, 0.213] for p = 2
|
71
|
+
#-_sigma_::float, sigma value for error generalization
|
72
|
+
#
|
73
|
+
#*Usage*:
|
74
|
+
# ar = ARIMA.new
|
75
|
+
# ar.ar_sim(1500, [0.3, 0.9], 0.12)
|
76
|
+
# # => AR(2) autoregressive series of 1500 values
|
77
|
+
#
|
78
|
+
#*Returns*:
|
79
|
+
#Array of generated autoregressive series against attributes
|
32
80
|
def ar_sim(n, phi, sigma)
|
33
81
|
#using random number generator for inclusion of white noise
|
34
82
|
err_nor = Distribution::Normal.rng(0, sigma)
|
@@ -58,7 +106,21 @@ module Statsample
|
|
58
106
|
x - buffer
|
59
107
|
end
|
60
108
|
|
61
|
-
|
109
|
+
#=Moving Average Simulator
|
110
|
+
#Simulates a moving average model with specified number of
|
111
|
+
#observations(n), with theta values for order k and sigma
|
112
|
+
#
|
113
|
+
#*Parameters*:
|
114
|
+
#-_n_::integer, number of observations
|
115
|
+
#-_theta_::array of floats, e.g: [0.23, 0.732], must be < 1
|
116
|
+
#-_sigma_::float, sigma value for whitenoise error
|
117
|
+
#
|
118
|
+
#*Usage*:
|
119
|
+
# ar = ARIMA.new
|
120
|
+
# ar.ma_sim(1500, [0.23, 0.732], 0.27)
|
121
|
+
#
|
122
|
+
#*Returns*:
|
123
|
+
#Array of generated MA(q) model
|
62
124
|
def ma_sim(n, theta, sigma)
|
63
125
|
#n is number of observations (eg: 1000)
|
64
126
|
#theta are the model parameters containting q values
|
@@ -84,7 +146,28 @@ module Statsample
|
|
84
146
|
x
|
85
147
|
end
|
86
148
|
|
87
|
-
#
|
149
|
+
#ARMA(Autoregressive and Moving Average) Simulator
|
150
|
+
#ARMA is represented by:
|
151
|
+
#http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
|
152
|
+
#This simulates the ARMA model against p, q and sigma.
|
153
|
+
#If p = 0, then model is pure MA(q),
|
154
|
+
#If q = 0, then model is pure AR(p),
|
155
|
+
#otherwise, model is ARMA(p, q) represented by above.
|
156
|
+
#
|
157
|
+
#Detailed analysis: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
|
158
|
+
#
|
159
|
+
#*Parameters*:
|
160
|
+
#-_n_::integer, number of observations
|
161
|
+
#-_p_::array, contains p number of phi values for AR(p) process
|
162
|
+
#-_q_::array, contains q number of theta values for MA(q) process
|
163
|
+
#-_sigma_::float, sigma value for whitenoise error generation
|
164
|
+
#
|
165
|
+
#*Usage*:
|
166
|
+
# ar = ARIMA.new
|
167
|
+
# ar.arma_sim(1500, [0.3, 0.272], [0.8, 0.317], 0.92)
|
168
|
+
#
|
169
|
+
#*Returns*:
|
170
|
+
#array of generated ARMA model values
|
88
171
|
def arma_sim(n, p, q, sigma)
|
89
172
|
#represented by :
|
90
173
|
#http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
|
@@ -119,6 +202,76 @@ module Statsample
|
|
119
202
|
end
|
120
203
|
x - buffer
|
121
204
|
end
|
205
|
+
|
206
|
+
#=Hannan-Rissanen for ARMA fit
|
207
|
+
def self.hannan(ts, p, q, k)
|
208
|
+
start_params = create_vector(Array.new(p+q+k, 0))
|
209
|
+
ts_dup = ts.dup
|
210
|
+
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
module Arima
|
215
|
+
class KalmanFilter < Statsample::Vector
|
216
|
+
include Statsample::TimeSeries
|
217
|
+
|
218
|
+
#=T
|
219
|
+
#The coefficient matrix for the state vector in state equation
|
220
|
+
# It's dimensions is r+k x r+k
|
221
|
+
#*Parameters*
|
222
|
+
#-_r_::integer, r is max(p, q+1), where p and q are orders of AR and MA respectively
|
223
|
+
#-_k_::integer, number of exogeneous variables in ARMA model
|
224
|
+
#-_q_::integer, The AR coefficient of ARMA model
|
225
|
+
|
226
|
+
#*References*: Statsmodels tsa, Durbin and Koopman Section 4.7
|
227
|
+
def self.T(r, k, p)
|
228
|
+
arr = Matrix.zero(r)
|
229
|
+
params_padded = Statsample::Vector.new(Array.new(r, 0), :scale)
|
230
|
+
|
231
|
+
params_padded[0...p] = params[k...(p+k)]
|
232
|
+
intermediate_matrix = (r-1).times.map { Array.new(r, 0) }
|
233
|
+
#appending an array filled with padded values in beginning
|
234
|
+
intermediate_matrix[0,0] = [params_padded]
|
235
|
+
|
236
|
+
#now generating column matrix for that:
|
237
|
+
arr = Matrix.columns(intermediate_matrix)
|
238
|
+
arr_00 = arr[0,0]
|
239
|
+
|
240
|
+
#identify matrix substituition in matrix except row[0] and column[0]
|
241
|
+
r.times do |i|
|
242
|
+
arr[r,r] = 1
|
243
|
+
end
|
244
|
+
arr[0,0] = arr_00
|
245
|
+
arr
|
246
|
+
end
|
247
|
+
|
248
|
+
|
249
|
+
#=R
|
250
|
+
#The coefficient matrix for the state vector in the observation matrix.
|
251
|
+
#It's dimension is r+k x 1
|
252
|
+
#*Parameters*
|
253
|
+
#-_r_::integer, r is max(p, q+1) where p and q are order of AR and MA respectively
|
254
|
+
#-_k_::integer, number of exogeneous variables in ARMA model
|
255
|
+
#-_q_::integer, The MA order in ARMA model
|
256
|
+
#-_p_::integer, The AR order in ARMA model
|
257
|
+
#*References*: Statsmodels tsa, Durbin and Koopman
|
258
|
+
def self.R(r, k, q, p)
|
259
|
+
arr = Matrix.column_vector(Array.new(r,0.0))
|
260
|
+
|
261
|
+
#pending - in kind of difficult end here;
|
262
|
+
end
|
263
|
+
|
264
|
+
#=Z
|
265
|
+
#The Z selector matrix
|
266
|
+
#*Parameters*
|
267
|
+
#-_r_::integer, max(p, q+1)
|
268
|
+
#Returns: vector
|
269
|
+
def self.Z(r)
|
270
|
+
arr = Statsample::Vector.new(Array.new(r, 0.0), :scale)
|
271
|
+
arr[0] = 1.0
|
272
|
+
return arr
|
273
|
+
end
|
274
|
+
end
|
122
275
|
end
|
123
276
|
end
|
124
277
|
end
|
@@ -3,7 +3,7 @@ module Statsample::TimeSeriesShorthands
|
|
3
3
|
# Creates a new Statsample::TimeSeries object
|
4
4
|
# Argument should be equal to TimeSeries.new
|
5
5
|
def to_time_series(*args)
|
6
|
-
Statsample::TimeSeries::
|
6
|
+
Statsample::TimeSeries::Series.new(self, :scale, *args)
|
7
7
|
end
|
8
8
|
|
9
9
|
alias :to_ts :to_time_series
|
@@ -17,7 +17,7 @@ module Statsample
|
|
17
17
|
module TimeSeries
|
18
18
|
# Collection of data indexed by time.
|
19
19
|
# The order goes from earliest to latest.
|
20
|
-
class
|
20
|
+
class Series < Statsample::Vector
|
21
21
|
include Statsample::TimeSeries::Pacf
|
22
22
|
# Calculates the autocorrelation coefficients of the series.
|
23
23
|
#
|
@@ -31,7 +31,7 @@ module Statsample
|
|
31
31
|
# ts.acf # => array with first 21 autocorrelations
|
32
32
|
# ts.acf 3 # => array with first 3 autocorrelations
|
33
33
|
#
|
34
|
-
def acf
|
34
|
+
def acf(max_lags = nil)
|
35
35
|
max_lags ||= (10 * Math.log10(size)).to_i
|
36
36
|
|
37
37
|
(0..max_lags).map do |i|
|
@@ -47,14 +47,95 @@ module Statsample
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
-
|
50
|
+
#=Partial Autocorrelation
|
51
|
+
#Generates partial autocorrelation series for a timeseries
|
52
|
+
#*Parameters*:
|
53
|
+
#-_max_lags_::integer, optional - provide number of lags
|
54
|
+
#-_method_::string. Default: 'yw'.
|
55
|
+
# * _yw_:: For yule-walker algorithm unbiased approach
|
56
|
+
# * _mle_:: For Maximum likelihood algorithm approach
|
57
|
+
# * _ld_:: Forr Levinson-Durbin recursive approach
|
58
|
+
#Returns - array of pacf
|
59
|
+
#
|
60
|
+
def pacf(max_lags = nil, method = :yw)
|
51
61
|
#parameters:
|
52
62
|
#max_lags => maximum number of lags for pcf
|
53
63
|
#method => for autocovariance in yule_walker:
|
54
64
|
#'yw' for 'yule-walker unbaised', 'mle' for biased maximum likelihood
|
65
|
+
#'ld' for Levinson-Durbin recursion
|
55
66
|
|
67
|
+
method = method.downcase.to_sym
|
56
68
|
max_lags ||= (10 * Math.log10(size)).to_i
|
57
|
-
|
69
|
+
if method.eql? :yw or method.eql? :mle
|
70
|
+
Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
|
71
|
+
elsif method == :ld
|
72
|
+
series = self.acvf
|
73
|
+
Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
|
74
|
+
else
|
75
|
+
raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
#=Autoregressive estimation
|
80
|
+
#Generates AR(k) series for the calling timeseries by yule walker.
|
81
|
+
#*Parameters*:
|
82
|
+
#-_n_::integer, (default = 1500) number of observations for AR.
|
83
|
+
#-_k_::integer, (default = 1) order of AR process.
|
84
|
+
#*Returns*:
|
85
|
+
#Array constituting estimated AR series.
|
86
|
+
#
|
87
|
+
def ar(n = 1500, k = 1)
|
88
|
+
series = Statsample::TimeSeries.arima
|
89
|
+
#series = Statsample::TimeSeries::ARIMA.new
|
90
|
+
series.yule_walker(self, n, k)
|
91
|
+
end
|
92
|
+
|
93
|
+
#=AutoCovariance
|
94
|
+
#Provides autocovariance of timeseries.
|
95
|
+
#-Parameters:
|
96
|
+
#demean = true; optional. Supply false if series is not to be demeaned
|
97
|
+
#unbiased = true; optional. true/false for unbiased/biased form of autocovariance
|
98
|
+
#-Returns-: Autocovariance value
|
99
|
+
#
|
100
|
+
def acvf(demean = true, unbiased = true)
|
101
|
+
#TODO: change parameters list in opts.merge as suggested by John
|
102
|
+
#functionality: computes autocovariance of timeseries data
|
103
|
+
#returns: array of autocovariances
|
104
|
+
|
105
|
+
if demean
|
106
|
+
demeaned_series = self - self.mean
|
107
|
+
else
|
108
|
+
demeaned_series = self
|
109
|
+
end
|
110
|
+
n = self.acf.size
|
111
|
+
m = self.mean
|
112
|
+
if unbiased
|
113
|
+
d = Array.new(self.size, self.size)
|
114
|
+
else
|
115
|
+
d = ((1..self.size).to_a.reverse)[0..n]
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
0.upto(n - 1).map do |i|
|
120
|
+
(demeaned_series * (self.lag(i) - m)).sum / d[i]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
#=Correlation
|
125
|
+
#Gives correlation of timeseries.
|
126
|
+
#
|
127
|
+
def correlate(a, v, mode = 'full')
|
128
|
+
#peforms cross-correlation of two series
|
129
|
+
#multiarray.correlate2(a, v, 'full')
|
130
|
+
if a.size < v.size
|
131
|
+
raise("Should have same size!")
|
132
|
+
end
|
133
|
+
ps = a.size + v.size - 1
|
134
|
+
a_padded = Array.new(ps, 0)
|
135
|
+
a_padded[0...a.size] = a
|
136
|
+
|
137
|
+
out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
|
138
|
+
#ongoing
|
58
139
|
end
|
59
140
|
|
60
141
|
# Lags the series by k periods.
|
@@ -71,7 +152,7 @@ module Statsample
|
|
71
152
|
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
|
72
153
|
# ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
|
73
154
|
#
|
74
|
-
def lag
|
155
|
+
def lag(k = 1)
|
75
156
|
return self if k == 0
|
76
157
|
|
77
158
|
dup.tap do |lagged|
|
@@ -86,13 +167,14 @@ module Statsample
|
|
86
167
|
end
|
87
168
|
end
|
88
169
|
|
170
|
+
#=Diff
|
89
171
|
# Performs a first difference of the series.
|
90
172
|
#
|
91
173
|
# The convention is to set the oldest observations (the first ones
|
92
174
|
# in the series) to nil so that the size of the diffed series is the
|
93
175
|
# same as the original.
|
94
176
|
#
|
95
|
-
|
177
|
+
#*Usage*:
|
96
178
|
#
|
97
179
|
# ts = (1..10).map { rand }.to_ts
|
98
180
|
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
@@ -103,17 +185,23 @@ module Statsample
|
|
103
185
|
self - self.lag
|
104
186
|
end
|
105
187
|
|
106
|
-
|
188
|
+
#=Moving Average
|
189
|
+
# Calculates the moving average of the series using the provided
|
107
190
|
# lookback argument. The lookback defaults to 10 periods.
|
191
|
+
#*Parameters*:
|
192
|
+
#-_n_::integer, (default = 10) - loopback argument
|
108
193
|
#
|
109
|
-
|
194
|
+
#*Usage*:
|
110
195
|
#
|
111
196
|
# ts = (1..100).map { rand }.to_ts
|
112
197
|
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
113
198
|
#
|
114
199
|
# # first 9 observations are nil
|
115
200
|
# ts.ma # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
|
116
|
-
|
201
|
+
#
|
202
|
+
#*Returns*:
|
203
|
+
#Resulting moving average timeseries object
|
204
|
+
def ma(n = 10)
|
117
205
|
return mean if n >= size
|
118
206
|
|
119
207
|
([nil] * (n - 1) + (0..(size - n)).map do |i|
|
@@ -121,6 +209,7 @@ module Statsample
|
|
121
209
|
end).to_time_series
|
122
210
|
end
|
123
211
|
|
212
|
+
#=Exponential Moving Average
|
124
213
|
# Calculates an exponential moving average of the series using a
|
125
214
|
# specified parameter. If wilder is false (the default) then the EMA
|
126
215
|
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
@@ -130,14 +219,22 @@ module Statsample
|
|
130
219
|
# use a lot more than n observations to calculate. The series is stable
|
131
220
|
# if the size of the series is >= 3.45 * (n + 1)
|
132
221
|
#
|
133
|
-
|
222
|
+
#*Parameters*:
|
223
|
+
#-_n_::integer, (default = 10)
|
224
|
+
#-_wilder_::boolean, (default = false), if true, 1/n value is used for smoothing;
|
225
|
+
#if false, uses 2/(n+1) value
|
226
|
+
#
|
227
|
+
#*Usage*:
|
134
228
|
#
|
135
229
|
# ts = (1..100).map { rand }.to_ts
|
136
230
|
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
137
231
|
#
|
138
232
|
# # first 9 observations are nil
|
139
233
|
# ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
|
140
|
-
|
234
|
+
#
|
235
|
+
#*Returns*:
|
236
|
+
#EMA timeseries
|
237
|
+
def ema(n = 10, wilder = false)
|
141
238
|
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
142
239
|
|
143
240
|
# need to start everything from the first non-nil observation
|
@@ -156,9 +253,24 @@ module Statsample
|
|
156
253
|
base.to_time_series
|
157
254
|
end
|
158
255
|
|
256
|
+
#=Moving Average Convergence-Divergence
|
159
257
|
# Calculates the MACD (moving average convergence-divergence) of the time
|
160
258
|
# series - this is a comparison of a fast EMA with a slow EMA.
|
161
|
-
|
259
|
+
#
|
260
|
+
# *Parameters*:
|
261
|
+
# -_fast_::integer, (default = 12) - fast component of MACD
|
262
|
+
# -_slow_::integer, (default = 26) - slow component of MACD
|
263
|
+
# -_signal_::integer, (default = 9) - signal component of MACD
|
264
|
+
#
|
265
|
+
# *Usage*:
|
266
|
+
# ts = (1..100).map { rand }.to_ts
|
267
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
268
|
+
# ts.macd(13)
|
269
|
+
#
|
270
|
+
# *Returns*:
|
271
|
+
# Array of two timeseries - comparison of fast EMA with slow
|
272
|
+
# and EMA with signal value
|
273
|
+
def macd(fast = 12, slow = 26, signal = 9)
|
162
274
|
series = ema(fast) - ema(slow)
|
163
275
|
[series, series.ema(signal)]
|
164
276
|
end
|
@@ -8,24 +8,79 @@ module Statsample
|
|
8
8
|
#Inspiration: StatsModels
|
9
9
|
pacf = [1.0]
|
10
10
|
(1..max_lags).map do |i|
|
11
|
-
pacf << yule_walker(timeseries, i, method)[-1]
|
11
|
+
pacf << yule_walker(timeseries, i, method)[0][-1]
|
12
12
|
end
|
13
13
|
pacf
|
14
14
|
end
|
15
15
|
|
16
|
+
|
17
|
+
#=Levinson-Durbin Algorithm
|
18
|
+
#*Parameters*:
|
19
|
+
#-_series_ : timeseries, or a series of autocovariances
|
20
|
+
#-_nlags_: integer(default: 10): largest lag to include in recursion or order of the AR process
|
21
|
+
#-_is_acovf_: boolean(default: false): series is timeseries if it is false, else contains autocavariances
|
22
|
+
|
23
|
+
#*returns*:
|
24
|
+
#-_sigma_v_: estimate of the error variance
|
25
|
+
#-_arcoefs_: AR coefficients
|
26
|
+
#-_pacf_: pacf function
|
27
|
+
#-_sigma_: some function
|
28
|
+
def self.levinson_durbin(series, nlags = 10, is_acovf = false)
|
29
|
+
|
30
|
+
if is_acovf
|
31
|
+
series = series.map(&:to_f)
|
32
|
+
else
|
33
|
+
#nlags = order(k) of AR in this case
|
34
|
+
series = series.acvf.map(&:to_f)[0..nlags]
|
35
|
+
end
|
36
|
+
#phi = Array.new((nlags+1), 0.0) { Array.new(nlags+1, 0.0) }
|
37
|
+
order = nlags
|
38
|
+
phi = Matrix.zero(nlags + 1)
|
39
|
+
sig = Array.new(nlags+1)
|
40
|
+
|
41
|
+
#setting initial point for recursion:
|
42
|
+
phi[1,1] = series[1]/series[0]
|
43
|
+
#phi[1][1] = series[1]/series[0]
|
44
|
+
sig[1] = series[0] - phi[1, 1] * series[1]
|
45
|
+
|
46
|
+
2.upto(order).each do |k|
|
47
|
+
phi[k, k] = (series[k] - (Statsample::Vector.new(phi[1...k, k-1]) * series[1...k].reverse.to_ts).sum) / sig[k-1]
|
48
|
+
#some serious refinement needed in above for matrix manipulation. Will do today
|
49
|
+
1.upto(k-1).each do |j|
|
50
|
+
phi[j, k] = phi[j, k-1] - phi[k, k] * phi[k-j, k-1]
|
51
|
+
end
|
52
|
+
sig[k] = sig[k-1] * (1-phi[k, k] ** 2)
|
53
|
+
|
54
|
+
end
|
55
|
+
sigma_v = sig[-1]
|
56
|
+
arcoefs_delta = phi.column(phi.column_size - 1)
|
57
|
+
arcoefs = arcoefs_delta[1..arcoefs_delta.size]
|
58
|
+
pacf = diag(phi)
|
59
|
+
pacf[0] = 1.0
|
60
|
+
return [sigma_v, arcoefs, pacf, sig, phi]
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.diag(mat)
|
64
|
+
#returns array of diagonal elements of a matrix.
|
65
|
+
#will later abstract it to matrix.rb in Statsample
|
66
|
+
return mat.each_with_index(:diagonal).map { |x, r, c| x }
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
#=Yule Walker Algorithm
|
71
|
+
#From the series, estimates AR(p)(autoregressive) parameter
|
72
|
+
#using Yule-Waler equation. See -
|
73
|
+
#http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
|
74
|
+
|
75
|
+
#*Parameters*:
|
76
|
+
#-_ts_::timeseries
|
77
|
+
#-_k_::order, default = 1
|
78
|
+
#-_method_:: can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator is (n - k)
|
79
|
+
|
80
|
+
#*returns*:
|
81
|
+
#-_rho_:: autoregressive coefficients
|
82
|
+
#-_sigma_:: sigma parameter
|
16
83
|
def self.yule_walker(ts, k = 1, method='yw')
|
17
|
-
#From the series, estimates AR(p)(autoregressive) parameter
|
18
|
-
#using Yule-Waler equation. See -
|
19
|
-
#http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
|
20
|
-
|
21
|
-
#parameters:
|
22
|
-
#ts = series
|
23
|
-
#k = order, default = 1
|
24
|
-
#method = can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator
|
25
|
-
#is (n - k)
|
26
|
-
|
27
|
-
#returns:
|
28
|
-
#rho => autoregressive coefficients
|
29
84
|
ts = ts - ts.mean
|
30
85
|
n = ts.size
|
31
86
|
if method.downcase.eql? 'yw'
|
@@ -37,7 +92,7 @@ module Statsample
|
|
37
92
|
denom =->(k) { n }
|
38
93
|
end
|
39
94
|
r = Array.new(k + 1) { 0.0 }
|
40
|
-
r[0] = ts.map { |x| x
|
95
|
+
r[0] = ts.map { |x| x**2 }.inject(:+).to_f / denom.call(0).to_f
|
41
96
|
|
42
97
|
1.upto(k) do |l|
|
43
98
|
r[l] = (ts[0...-l].zip(ts[l...ts.size])).map do |x|
|
@@ -48,7 +103,11 @@ module Statsample
|
|
48
103
|
r_R = toeplitz(r[0...-1])
|
49
104
|
|
50
105
|
mat = Matrix.columns(r_R).inverse()
|
51
|
-
solve_matrix(mat, r[1..r.size])
|
106
|
+
phi = solve_matrix(mat, r[1..r.size])
|
107
|
+
phi_vector = Statsample::Vector.new(phi, :scale)
|
108
|
+
r_vector = Statsample::Vector.new(r[1..r.size], :scale)
|
109
|
+
sigma = r[0] - (r_vector * phi_vector).sum
|
110
|
+
return [phi, sigma]
|
52
111
|
end
|
53
112
|
|
54
113
|
def self.toeplitz(arr)
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Statsample
|
2
|
+
class Vector
|
3
|
+
include Enumerable
|
4
|
+
include Writable
|
5
|
+
include Summarizable
|
6
|
+
|
7
|
+
#=Squares of sum
|
8
|
+
#---
|
9
|
+
#parameter:
|
10
|
+
#-demean::boolean - optional. __default__: false
|
11
|
+
#Sums the timeseries and then returns the square
|
12
|
+
def squares_of_sum(demean = false)
|
13
|
+
if demean
|
14
|
+
m = self.mean
|
15
|
+
self.map { |x| (x-m) }.sum ** 2
|
16
|
+
else
|
17
|
+
return self.sum.to_f ** 2
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
class ::Matrix
|
24
|
+
#=Squares of sum
|
25
|
+
#---
|
26
|
+
#Does squares of sum in column order.
|
27
|
+
#Necessary for computations in various processes
|
28
|
+
def squares_of_sum
|
29
|
+
(0...column_size).map do |j|
|
30
|
+
self.column(j).sum ** 2
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
#=Checks if given matrix is symmetric or not
|
35
|
+
#---
|
36
|
+
#returns bool
|
37
|
+
#`symmetric?` is present in Ruby Matrix 1.9.3+, but not in 1.8.*
|
38
|
+
def symmetric?
|
39
|
+
return false unless square?
|
40
|
+
|
41
|
+
(0...row_size).each do |i|
|
42
|
+
0.upto(i).each do |j|
|
43
|
+
return false if self[i, j] != self[j, i]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
true
|
47
|
+
end
|
48
|
+
|
49
|
+
#=Cholesky decomposition
|
50
|
+
#Reference: http://en.wikipedia.org/wiki/Cholesky_decomposition
|
51
|
+
#---
|
52
|
+
#==Description
|
53
|
+
#Cholesky decomposition is reprsented by `M = L X L*`, where
|
54
|
+
#M is the symmetric matrix and `L` is the lower half of cholesky matrix,
|
55
|
+
#and `L*` is the conjugate form of `L`.
|
56
|
+
#*Returns* : Cholesky decomposition for a given matrix(if symmetric)
|
57
|
+
#*Utility*: Essential matrix function, requisite in kalman filter, least squares
|
58
|
+
def cholesky
|
59
|
+
raise ArgumentError, "Given matrix should be symmetric" unless symmetric?
|
60
|
+
c = Matrix.zero(row_size)
|
61
|
+
0.upto(row_size - 1).each do |k|
|
62
|
+
0.upto(row_size - 1).each do |i|
|
63
|
+
if i == k
|
64
|
+
sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[k, j] ** 2 }
|
65
|
+
value = Math.sqrt(self[k,k] - sum)
|
66
|
+
c[k, k] = value
|
67
|
+
elsif i > k
|
68
|
+
sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[i, j] * c[k, j] }
|
69
|
+
value = (self[k,i] - sum) / c[k, k]
|
70
|
+
c[i, k] = value
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
c
|
75
|
+
end
|
76
|
+
|
77
|
+
#=Chain Product
|
78
|
+
#Class method
|
79
|
+
#Returns the chain product of two matrices
|
80
|
+
#==Usage:
|
81
|
+
#Let `a` be 4 * 3 matrix,
|
82
|
+
#Let `b` be 3 * 3 matrix,
|
83
|
+
#Let `c` be 3 * 1 matrix,
|
84
|
+
#then `Matrix.chain_dot(a, b, c)`
|
85
|
+
#===*NOTE*: Send the matrices in multiplicative order with proper dimensions
|
86
|
+
def self.chain_dot(*args)
|
87
|
+
#inspired by Statsmodels
|
88
|
+
begin
|
89
|
+
args.reduce { |x, y| x * y } #perform matrix multiplication in order
|
90
|
+
rescue ExceptionForMatrix::ErrDimensionMismatch
|
91
|
+
puts "ExceptionForMatrix: Please provide matrices with proper multiplicative dimensions"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
#=Adds a column of constants.
|
97
|
+
#Appends a column of ones to the matrix/array if first argument is false
|
98
|
+
#If an n-array, first checks if one column of ones is already present
|
99
|
+
#if present, then original(self) is returned, else, prepends with a vector of ones
|
100
|
+
def add_constant(prepend = true)
|
101
|
+
#for Matrix
|
102
|
+
(0...column_size).each do |i|
|
103
|
+
if self.column(i).map(&:to_f) == Object::Vector.elements(Array.new(row_size, 1.0))
|
104
|
+
return self
|
105
|
+
end
|
106
|
+
end
|
107
|
+
#append/prepend a column of one's
|
108
|
+
vectors = (0...row_size).map do |r|
|
109
|
+
if prepend
|
110
|
+
[1.0].concat(self.row(r).to_a)
|
111
|
+
else
|
112
|
+
self.row(r).to_a.push(1.0)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
return Matrix.rows(vectors)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -11,10 +11,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
11
11
|
ts.pacf
|
12
12
|
end
|
13
13
|
context("AR(1) simulations") do
|
14
|
-
include Statsample
|
14
|
+
include Statsample
|
15
15
|
|
16
16
|
setup do
|
17
|
-
@series =
|
17
|
+
@series = TimeSeries.arima
|
18
18
|
@ar_1_positive = @series.ar_sim(1500, [0.9], 2)
|
19
19
|
@ar_1_negative = @series.ar_sim(1500, [-0.9], 2)
|
20
20
|
|
@@ -73,10 +73,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
73
73
|
end
|
74
74
|
|
75
75
|
context("AR(p) simulations") do
|
76
|
-
include Statsample
|
76
|
+
include Statsample
|
77
77
|
|
78
78
|
setup do
|
79
|
-
@series =
|
79
|
+
@series = TimeSeries.arima
|
80
80
|
@ar_p_positive = @series.ar_sim(1500, [0.3, 0.5], 2)
|
81
81
|
@ar_p_negative = @series.ar_sim(1500, [-0.3, -0.5], 2)
|
82
82
|
end
|
@@ -120,9 +120,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
120
120
|
|
121
121
|
|
122
122
|
context("MA(1) simulations") do
|
123
|
-
include Statsample
|
123
|
+
include Statsample
|
124
124
|
setup do
|
125
|
-
@series =
|
125
|
+
@series = TimeSeries.arima
|
126
126
|
@ma_positive = @series.ar_sim(1500, [0.5], 2)
|
127
127
|
@ma_negative = @series.ar_sim(1500, [-0.5], 2)
|
128
128
|
end
|
@@ -153,9 +153,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
153
153
|
end
|
154
154
|
|
155
155
|
context("MA(q) simulations") do
|
156
|
-
include Statsample
|
156
|
+
include Statsample
|
157
157
|
setup do
|
158
|
-
@series =
|
158
|
+
@series = TimeSeries.arima
|
159
159
|
@ma_positive = @series.ar_sim(1500, [0.5, 0.3, 0.2], 2)
|
160
160
|
@ma_negative = @series.ar_sim(1500, [-0.5], 2)
|
161
161
|
end
|
@@ -172,5 +172,15 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
172
172
|
#visualization: http://jsfiddle.net/7keHK/2/
|
173
173
|
end
|
174
174
|
end
|
175
|
+
|
176
|
+
context("Yule walker estimations") do
|
177
|
+
include Statsample
|
178
|
+
|
179
|
+
setup do
|
180
|
+
@timeseries = 100.times.map { rand }.to_ts
|
181
|
+
@arma_simulation =->(n) { @timeseries.ar(n, k)}
|
182
|
+
end
|
183
|
+
#to write test
|
184
|
+
end
|
175
185
|
end
|
176
186
|
|
data/test/test_matrix.rb
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
|
2
|
+
class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
|
3
|
+
|
4
|
+
def setup_square_matrix(arr, n)
|
5
|
+
#returns n * n matrix by slicing arr
|
6
|
+
return Matrix.rows(arr.each_slice(n).to_a)
|
7
|
+
end
|
8
|
+
def setup
|
9
|
+
@arr_square = (1..16)
|
10
|
+
@mat_non_symmetric = setup_square_matrix(@arr_square, 4)
|
11
|
+
|
12
|
+
@arr_non_square = (1..12).to_a
|
13
|
+
#this is a 4 X 3 matrix
|
14
|
+
@mat_non_square = Matrix.rows(@arr_non_square.each_slice(3).to_a)
|
15
|
+
end
|
16
|
+
|
17
|
+
#TESTS for matrix symmetricity - Matrix#symmetric?
|
18
|
+
context("symmetric?") do
|
19
|
+
|
20
|
+
should "return false for non-symmetric matrix" do
|
21
|
+
assert_equal @mat_non_symmetric.symmetric?, false
|
22
|
+
end
|
23
|
+
|
24
|
+
should "return false for non-square matrix" do
|
25
|
+
assert_equal @mat_non_square.symmetric?, false
|
26
|
+
end
|
27
|
+
|
28
|
+
should "return true for symmetrix matrix" do
|
29
|
+
arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
|
30
|
+
mat = setup_square_matrix(arr, 3)
|
31
|
+
assert_equal mat.symmetric?, true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
#TESTS for cholesky decomposition - Matrix#cholesky
|
36
|
+
context("Cholesky Decomposition") do
|
37
|
+
|
38
|
+
should "raise error for non symmetric matrix" do
|
39
|
+
assert_raises(ArgumentError) { @mat_non_symmetric.cholesky }
|
40
|
+
end
|
41
|
+
|
42
|
+
should "raise raise error if non-square matix" do
|
43
|
+
arr = (1..12).to_a
|
44
|
+
mat = Matrix.rows(arr.each_slice(3).to_a)
|
45
|
+
assert_raises(ArgumentError) { @mat_non_square.cholesky }
|
46
|
+
end
|
47
|
+
|
48
|
+
should "give hermitian cholesky decomposed matrix for symmetrix matrix" do
|
49
|
+
arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
|
50
|
+
mat = setup_square_matrix(arr, 3)
|
51
|
+
assert_equal Matrix[[2.0, 0, 0], [6.0, 1.0, 0], [-8.0, 5.0, 2.0]], mat.cholesky
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
#TESTS for matrix squares of sum - Matrix#squares_of_sum
|
56
|
+
context("Squares of sum") do
|
57
|
+
|
58
|
+
should "return array of size 4 for matrix - #{@mat_non_symmetric}" do
|
59
|
+
#equal to column size
|
60
|
+
assert_equal @mat_non_symmetric.squares_of_sum.size, 4
|
61
|
+
end
|
62
|
+
|
63
|
+
should "return [784, 1024, 1296, 1600] for matrix - #{@mat_non_symmetric}" do
|
64
|
+
assert_equal @mat_non_symmetric.squares_of_sum, [784, 1024, 1296, 1600]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
#TESTS for adding constants to matrix
|
69
|
+
context("Add constant") do
|
70
|
+
|
71
|
+
should "prepend all rows with ones" do
|
72
|
+
mat = @mat_non_symmetric.add_constant
|
73
|
+
assert_equal @mat_non_symmetric.column_size, 4
|
74
|
+
assert_equal mat.column_size, 5
|
75
|
+
assert_equal mat.column(0).to_a, [1.0, 1.0,1.0,1.0]
|
76
|
+
end
|
77
|
+
|
78
|
+
should "append all rows with ones if prepend = false" do
|
79
|
+
mat = @mat_non_symmetric.add_constant(false)
|
80
|
+
assert_equal @mat_non_symmetric.column_size, 4
|
81
|
+
assert_equal mat.column_size, 5
|
82
|
+
assert_equal mat.column(mat.column_size - 1).to_a, [1.0, 1.0,1.0,1.0]
|
83
|
+
end
|
84
|
+
|
85
|
+
should "not append/prepend if a column of ones already exists in matrix" do
|
86
|
+
matrix = Matrix[[1, 2, 1, 4], [5, 6, 1, 8], [9, 10, 1, 12]]
|
87
|
+
const_mat = matrix.add_constant
|
88
|
+
assert_equal matrix.column_size, const_mat.column_size
|
89
|
+
assert_equal matrix.row_size, const_mat.row_size
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
data/test/test_tseries.rb
CHANGED
@@ -7,7 +7,7 @@ class StatsampleTestTimeSeries < MiniTest::Unit::TestCase
|
|
7
7
|
|
8
8
|
def setup
|
9
9
|
# daily closes of iShares XIU on the TSX
|
10
|
-
@xiu = Statsample::TimeSeries::
|
10
|
+
@xiu = Statsample::TimeSeries::Series.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
|
11
11
|
16.86, 16.86, 16.56, 16.36, 16.66, 16.77], :scale
|
12
12
|
end
|
13
13
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-statsample-timeseries
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2013-
|
13
|
+
date: 2013-09-03 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: statsample
|
@@ -216,16 +216,17 @@ files:
|
|
216
216
|
- features/support/env.rb
|
217
217
|
- lib/bio-statsample-timeseries.rb
|
218
218
|
- lib/bio-statsample-timeseries/arima.rb
|
219
|
-
- lib/bio-statsample-timeseries/statsample-timeseries.rb
|
220
219
|
- lib/bio-statsample-timeseries/timeseries.rb
|
221
220
|
- lib/bio-statsample-timeseries/timeseries/pacf.rb
|
221
|
+
- lib/bio-statsample-timeseries/utility.rb
|
222
222
|
- test/fixtures/stock_data.csv
|
223
223
|
- test/helper.rb
|
224
224
|
- test/test_arima_simulators.rb
|
225
|
+
- test/test_matrix.rb
|
225
226
|
- test/test_pacf.rb
|
226
227
|
- test/test_tseries.rb
|
227
228
|
- test/test_wald.rb
|
228
|
-
homepage: http://github.com/
|
229
|
+
homepage: http://github.com/AnkurGel/bioruby-statsample-timeseries
|
229
230
|
licenses:
|
230
231
|
- MIT
|
231
232
|
post_install_message:
|
@@ -240,7 +241,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
240
241
|
version: '0'
|
241
242
|
segments:
|
242
243
|
- 0
|
243
|
-
hash:
|
244
|
+
hash: -122253519
|
244
245
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
245
246
|
none: false
|
246
247
|
requirements:
|