bio-statsample-timeseries 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +3 -3
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/bio-statsample-timeseries.rb +1 -1
- data/lib/bio-statsample-timeseries/arima.rb +162 -9
- data/lib/bio-statsample-timeseries/timeseries.rb +125 -13
- data/lib/bio-statsample-timeseries/timeseries/pacf.rb +74 -15
- data/lib/bio-statsample-timeseries/utility.rb +118 -0
- data/test/test_arima_simulators.rb +18 -8
- data/test/test_matrix.rb +92 -0
- data/test/test_tseries.rb +1 -1
- metadata +6 -5
- data/lib/bio-statsample-timeseries/statsample-timeseries.rb +0 -2
data/README.rdoc
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
= bio-statsample-timeseries
|
2
2
|
|
3
3
|
{<img
|
4
|
-
src="https://secure.travis-ci.org/
|
5
|
-
/>}[http://travis-ci.org/#!/
|
4
|
+
src="https://secure.travis-ci.org/AnkurGel/bioruby-statsample-timeseries.png"
|
5
|
+
/>}[http://travis-ci.org/#!/AnkurGel/bioruby-statsample-timeseries]
|
6
6
|
|
7
7
|
Full description goes here
|
8
8
|
|
@@ -27,7 +27,7 @@ the source tree.
|
|
27
27
|
|
28
28
|
Information on the source tree, documentation, issues and how to contribute, see
|
29
29
|
|
30
|
-
http://github.com/
|
30
|
+
http://github.com/AnkurGel/bioruby-statsample-timeseries
|
31
31
|
|
32
32
|
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
33
33
|
|
data/Rakefile
CHANGED
@@ -15,7 +15,7 @@ require 'jeweler'
|
|
15
15
|
Jeweler::Tasks.new do |gem|
|
16
16
|
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
17
|
gem.name = "bio-statsample-timeseries"
|
18
|
-
gem.homepage = "http://github.com/
|
18
|
+
gem.homepage = "http://github.com/AnkurGel/bioruby-statsample-timeseries"
|
19
19
|
gem.license = "MIT"
|
20
20
|
gem.summary = %Q{TimeSeries modules for Statsample}
|
21
21
|
gem.description = %Q{Statsample-timeseries is an extension to Statsample. It incorporates helpful timeseries functions and modules like ARMA, ARIMA, acf, pacf, lags etc.}
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.2
|
@@ -8,9 +8,9 @@
|
|
8
8
|
#
|
9
9
|
# In this file only require other files. Avoid other source code.
|
10
10
|
|
11
|
-
require 'bio-statsample-timeseries/statsample-timeseries.rb'
|
12
11
|
require 'statsample'
|
13
12
|
require_relative 'bio-statsample-timeseries/timeseries.rb'
|
14
13
|
require_relative 'bio-statsample-timeseries/arima.rb'
|
14
|
+
require_relative 'bio-statsample-timeseries/utility.rb'
|
15
15
|
|
16
16
|
|
@@ -1,15 +1,29 @@
|
|
1
1
|
#require 'debugger'
|
2
2
|
module Statsample
|
3
|
-
module
|
3
|
+
module TimeSeries
|
4
|
+
|
5
|
+
def self.arima
|
6
|
+
#not passing (ds,p,i,q) elements for now
|
7
|
+
#will do that once #arima is ready for all modelling
|
8
|
+
Statsample::TimeSeries::ARIMA.new
|
9
|
+
end
|
10
|
+
|
4
11
|
class ARIMA < Statsample::Vector
|
5
12
|
include Statsample::TimeSeries
|
6
|
-
|
13
|
+
# SUGGESTION: We could use an API similar to R
|
14
|
+
# like
|
15
|
+
# ar_obj=Statsample::TimeSeries.arima(ds,p,i,q)
|
16
|
+
# which calls
|
17
|
+
# Statsample::TimeSeries::Arima.new(ds,p,i,q)
|
7
18
|
def arima(ds, p, i, q)
|
8
19
|
#prototype
|
20
|
+
# ISSUE: We should differenciate now, if i>0.
|
21
|
+
# The result should be send to next step
|
9
22
|
if q.zero?
|
10
23
|
self.ar(p)
|
11
24
|
elsif p.zero?
|
12
25
|
self.ma(p)
|
26
|
+
# ISSUE-> ELSE -> simultaneuos estimation of MA and AR parameters
|
13
27
|
end
|
14
28
|
end
|
15
29
|
|
@@ -20,15 +34,49 @@ module Statsample
|
|
20
34
|
#or Burg's algorithm(more efficient)
|
21
35
|
end
|
22
36
|
|
23
|
-
|
24
|
-
#To be implemented
|
25
|
-
end
|
26
|
-
|
37
|
+
#Converts a linear array into a vector
|
27
38
|
def create_vector(arr)
|
28
39
|
Statsample::Vector.new(arr, :scale)
|
29
40
|
end
|
30
41
|
|
31
|
-
|
42
|
+
|
43
|
+
def yule_walker(ts, n, k)
|
44
|
+
#parameters: timeseries, no of observations, order
|
45
|
+
#returns: simulated autoregression with phi parameters and sigma
|
46
|
+
phi, sigma = Pacf::Pacf.yule_walker(ts, k)
|
47
|
+
return phi, sigma
|
48
|
+
#return ar_sim(n, phi, sigma)
|
49
|
+
end
|
50
|
+
|
51
|
+
def levinson_durbin(ts, n, k)
|
52
|
+
#parameters;
|
53
|
+
#ts: timseries against which to generate phi coefficients
|
54
|
+
#n: number of observations for simulation
|
55
|
+
#k: order of AR
|
56
|
+
intermediate = Pacf::Pacf.levinson_durbin(ts, k)
|
57
|
+
phi, sigma = intermediate[1], intermediate[0]
|
58
|
+
return phi, sigma
|
59
|
+
#return ar_sim(n, phi, sigma)
|
60
|
+
end
|
61
|
+
|
62
|
+
#=Autoregressive Simulator
|
63
|
+
#Simulates an autoregressive AR(p) model with specified number of
|
64
|
+
#observations(n), with phi number of values for order p and sigma.
|
65
|
+
#
|
66
|
+
#*Analysis*: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
|
67
|
+
#
|
68
|
+
#*Parameters*:
|
69
|
+
#-_n_::integer, number of observations
|
70
|
+
#-_phi_::array of phi values, e.g: [0.35, 0.213] for p = 2
|
71
|
+
#-_sigma_::float, sigma value for error generalization
|
72
|
+
#
|
73
|
+
#*Usage*:
|
74
|
+
# ar = ARIMA.new
|
75
|
+
# ar.ar_sim(1500, [0.3, 0.9], 0.12)
|
76
|
+
# # => AR(2) autoregressive series of 1500 values
|
77
|
+
#
|
78
|
+
#*Returns*:
|
79
|
+
#Array of generated autoregressive series against attributes
|
32
80
|
def ar_sim(n, phi, sigma)
|
33
81
|
#using random number generator for inclusion of white noise
|
34
82
|
err_nor = Distribution::Normal.rng(0, sigma)
|
@@ -58,7 +106,21 @@ module Statsample
|
|
58
106
|
x - buffer
|
59
107
|
end
|
60
108
|
|
61
|
-
|
109
|
+
#=Moving Average Simulator
|
110
|
+
#Simulates a moving average model with specified number of
|
111
|
+
#observations(n), with theta values for order k and sigma
|
112
|
+
#
|
113
|
+
#*Parameters*:
|
114
|
+
#-_n_::integer, number of observations
|
115
|
+
#-_theta_::array of floats, e.g: [0.23, 0.732], must be < 1
|
116
|
+
#-_sigma_::float, sigma value for whitenoise error
|
117
|
+
#
|
118
|
+
#*Usage*:
|
119
|
+
# ar = ARIMA.new
|
120
|
+
# ar.ma_sim(1500, [0.23, 0.732], 0.27)
|
121
|
+
#
|
122
|
+
#*Returns*:
|
123
|
+
#Array of generated MA(q) model
|
62
124
|
def ma_sim(n, theta, sigma)
|
63
125
|
#n is number of observations (eg: 1000)
|
64
126
|
#theta are the model parameters containting q values
|
@@ -84,7 +146,28 @@ module Statsample
|
|
84
146
|
x
|
85
147
|
end
|
86
148
|
|
87
|
-
#
|
149
|
+
#ARMA(Autoregressive and Moving Average) Simulator
|
150
|
+
#ARMA is represented by:
|
151
|
+
#http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
|
152
|
+
#This simulates the ARMA model against p, q and sigma.
|
153
|
+
#If p = 0, then model is pure MA(q),
|
154
|
+
#If q = 0, then model is pure AR(p),
|
155
|
+
#otherwise, model is ARMA(p, q) represented by above.
|
156
|
+
#
|
157
|
+
#Detailed analysis: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
|
158
|
+
#
|
159
|
+
#*Parameters*:
|
160
|
+
#-_n_::integer, number of observations
|
161
|
+
#-_p_::array, contains p number of phi values for AR(p) process
|
162
|
+
#-_q_::array, contains q number of theta values for MA(q) process
|
163
|
+
#-_sigma_::float, sigma value for whitenoise error generation
|
164
|
+
#
|
165
|
+
#*Usage*:
|
166
|
+
# ar = ARIMA.new
|
167
|
+
# ar.arma_sim(1500, [0.3, 0.272], [0.8, 0.317], 0.92)
|
168
|
+
#
|
169
|
+
#*Returns*:
|
170
|
+
#array of generated ARMA model values
|
88
171
|
def arma_sim(n, p, q, sigma)
|
89
172
|
#represented by :
|
90
173
|
#http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
|
@@ -119,6 +202,76 @@ module Statsample
|
|
119
202
|
end
|
120
203
|
x - buffer
|
121
204
|
end
|
205
|
+
|
206
|
+
#=Hannan-Rissanen for ARMA fit
|
207
|
+
def self.hannan(ts, p, q, k)
|
208
|
+
start_params = create_vector(Array.new(p+q+k, 0))
|
209
|
+
ts_dup = ts.dup
|
210
|
+
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
module Arima
|
215
|
+
class KalmanFilter < Statsample::Vector
|
216
|
+
include Statsample::TimeSeries
|
217
|
+
|
218
|
+
#=T
|
219
|
+
#The coefficient matrix for the state vector in state equation
|
220
|
+
# It's dimensions is r+k x r+k
|
221
|
+
#*Parameters*
|
222
|
+
#-_r_::integer, r is max(p, q+1), where p and q are orders of AR and MA respectively
|
223
|
+
#-_k_::integer, number of exogeneous variables in ARMA model
|
224
|
+
#-_q_::integer, The AR coefficient of ARMA model
|
225
|
+
|
226
|
+
#*References*: Statsmodels tsa, Durbin and Koopman Section 4.7
|
227
|
+
def self.T(r, k, p)
|
228
|
+
arr = Matrix.zero(r)
|
229
|
+
params_padded = Statsample::Vector.new(Array.new(r, 0), :scale)
|
230
|
+
|
231
|
+
params_padded[0...p] = params[k...(p+k)]
|
232
|
+
intermediate_matrix = (r-1).times.map { Array.new(r, 0) }
|
233
|
+
#appending an array filled with padded values in beginning
|
234
|
+
intermediate_matrix[0,0] = [params_padded]
|
235
|
+
|
236
|
+
#now generating column matrix for that:
|
237
|
+
arr = Matrix.columns(intermediate_matrix)
|
238
|
+
arr_00 = arr[0,0]
|
239
|
+
|
240
|
+
#identify matrix substituition in matrix except row[0] and column[0]
|
241
|
+
r.times do |i|
|
242
|
+
arr[r,r] = 1
|
243
|
+
end
|
244
|
+
arr[0,0] = arr_00
|
245
|
+
arr
|
246
|
+
end
|
247
|
+
|
248
|
+
|
249
|
+
#=R
|
250
|
+
#The coefficient matrix for the state vector in the observation matrix.
|
251
|
+
#It's dimension is r+k x 1
|
252
|
+
#*Parameters*
|
253
|
+
#-_r_::integer, r is max(p, q+1) where p and q are order of AR and MA respectively
|
254
|
+
#-_k_::integer, number of exogeneous variables in ARMA model
|
255
|
+
#-_q_::integer, The MA order in ARMA model
|
256
|
+
#-_p_::integer, The AR order in ARMA model
|
257
|
+
#*References*: Statsmodels tsa, Durbin and Koopman
|
258
|
+
def self.R(r, k, q, p)
|
259
|
+
arr = Matrix.column_vector(Array.new(r,0.0))
|
260
|
+
|
261
|
+
#pending - in kind of difficult end here;
|
262
|
+
end
|
263
|
+
|
264
|
+
#=Z
|
265
|
+
#The Z selector matrix
|
266
|
+
#*Parameters*
|
267
|
+
#-_r_::integer, max(p, q+1)
|
268
|
+
#Returns: vector
|
269
|
+
def self.Z(r)
|
270
|
+
arr = Statsample::Vector.new(Array.new(r, 0.0), :scale)
|
271
|
+
arr[0] = 1.0
|
272
|
+
return arr
|
273
|
+
end
|
274
|
+
end
|
122
275
|
end
|
123
276
|
end
|
124
277
|
end
|
@@ -3,7 +3,7 @@ module Statsample::TimeSeriesShorthands
|
|
3
3
|
# Creates a new Statsample::TimeSeries object
|
4
4
|
# Argument should be equal to TimeSeries.new
|
5
5
|
def to_time_series(*args)
|
6
|
-
Statsample::TimeSeries::
|
6
|
+
Statsample::TimeSeries::Series.new(self, :scale, *args)
|
7
7
|
end
|
8
8
|
|
9
9
|
alias :to_ts :to_time_series
|
@@ -17,7 +17,7 @@ module Statsample
|
|
17
17
|
module TimeSeries
|
18
18
|
# Collection of data indexed by time.
|
19
19
|
# The order goes from earliest to latest.
|
20
|
-
class
|
20
|
+
class Series < Statsample::Vector
|
21
21
|
include Statsample::TimeSeries::Pacf
|
22
22
|
# Calculates the autocorrelation coefficients of the series.
|
23
23
|
#
|
@@ -31,7 +31,7 @@ module Statsample
|
|
31
31
|
# ts.acf # => array with first 21 autocorrelations
|
32
32
|
# ts.acf 3 # => array with first 3 autocorrelations
|
33
33
|
#
|
34
|
-
def acf
|
34
|
+
def acf(max_lags = nil)
|
35
35
|
max_lags ||= (10 * Math.log10(size)).to_i
|
36
36
|
|
37
37
|
(0..max_lags).map do |i|
|
@@ -47,14 +47,95 @@ module Statsample
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
-
|
50
|
+
#=Partial Autocorrelation
|
51
|
+
#Generates partial autocorrelation series for a timeseries
|
52
|
+
#*Parameters*:
|
53
|
+
#-_max_lags_::integer, optional - provide number of lags
|
54
|
+
#-_method_::string. Default: 'yw'.
|
55
|
+
# * _yw_:: For yule-walker algorithm unbiased approach
|
56
|
+
# * _mle_:: For Maximum likelihood algorithm approach
|
57
|
+
# * _ld_:: Forr Levinson-Durbin recursive approach
|
58
|
+
#Returns - array of pacf
|
59
|
+
#
|
60
|
+
def pacf(max_lags = nil, method = :yw)
|
51
61
|
#parameters:
|
52
62
|
#max_lags => maximum number of lags for pcf
|
53
63
|
#method => for autocovariance in yule_walker:
|
54
64
|
#'yw' for 'yule-walker unbaised', 'mle' for biased maximum likelihood
|
65
|
+
#'ld' for Levinson-Durbin recursion
|
55
66
|
|
67
|
+
method = method.downcase.to_sym
|
56
68
|
max_lags ||= (10 * Math.log10(size)).to_i
|
57
|
-
|
69
|
+
if method.eql? :yw or method.eql? :mle
|
70
|
+
Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
|
71
|
+
elsif method == :ld
|
72
|
+
series = self.acvf
|
73
|
+
Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
|
74
|
+
else
|
75
|
+
raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
#=Autoregressive estimation
|
80
|
+
#Generates AR(k) series for the calling timeseries by yule walker.
|
81
|
+
#*Parameters*:
|
82
|
+
#-_n_::integer, (default = 1500) number of observations for AR.
|
83
|
+
#-_k_::integer, (default = 1) order of AR process.
|
84
|
+
#*Returns*:
|
85
|
+
#Array constituting estimated AR series.
|
86
|
+
#
|
87
|
+
def ar(n = 1500, k = 1)
|
88
|
+
series = Statsample::TimeSeries.arima
|
89
|
+
#series = Statsample::TimeSeries::ARIMA.new
|
90
|
+
series.yule_walker(self, n, k)
|
91
|
+
end
|
92
|
+
|
93
|
+
#=AutoCovariance
|
94
|
+
#Provides autocovariance of timeseries.
|
95
|
+
#-Parameters:
|
96
|
+
#demean = true; optional. Supply false if series is not to be demeaned
|
97
|
+
#unbiased = true; optional. true/false for unbiased/biased form of autocovariance
|
98
|
+
#-Returns-: Autocovariance value
|
99
|
+
#
|
100
|
+
def acvf(demean = true, unbiased = true)
|
101
|
+
#TODO: change parameters list in opts.merge as suggested by John
|
102
|
+
#functionality: computes autocovariance of timeseries data
|
103
|
+
#returns: array of autocovariances
|
104
|
+
|
105
|
+
if demean
|
106
|
+
demeaned_series = self - self.mean
|
107
|
+
else
|
108
|
+
demeaned_series = self
|
109
|
+
end
|
110
|
+
n = self.acf.size
|
111
|
+
m = self.mean
|
112
|
+
if unbiased
|
113
|
+
d = Array.new(self.size, self.size)
|
114
|
+
else
|
115
|
+
d = ((1..self.size).to_a.reverse)[0..n]
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
0.upto(n - 1).map do |i|
|
120
|
+
(demeaned_series * (self.lag(i) - m)).sum / d[i]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
#=Correlation
|
125
|
+
#Gives correlation of timeseries.
|
126
|
+
#
|
127
|
+
def correlate(a, v, mode = 'full')
|
128
|
+
#peforms cross-correlation of two series
|
129
|
+
#multiarray.correlate2(a, v, 'full')
|
130
|
+
if a.size < v.size
|
131
|
+
raise("Should have same size!")
|
132
|
+
end
|
133
|
+
ps = a.size + v.size - 1
|
134
|
+
a_padded = Array.new(ps, 0)
|
135
|
+
a_padded[0...a.size] = a
|
136
|
+
|
137
|
+
out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
|
138
|
+
#ongoing
|
58
139
|
end
|
59
140
|
|
60
141
|
# Lags the series by k periods.
|
@@ -71,7 +152,7 @@ module Statsample
|
|
71
152
|
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
|
72
153
|
# ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
|
73
154
|
#
|
74
|
-
def lag
|
155
|
+
def lag(k = 1)
|
75
156
|
return self if k == 0
|
76
157
|
|
77
158
|
dup.tap do |lagged|
|
@@ -86,13 +167,14 @@ module Statsample
|
|
86
167
|
end
|
87
168
|
end
|
88
169
|
|
170
|
+
#=Diff
|
89
171
|
# Performs a first difference of the series.
|
90
172
|
#
|
91
173
|
# The convention is to set the oldest observations (the first ones
|
92
174
|
# in the series) to nil so that the size of the diffed series is the
|
93
175
|
# same as the original.
|
94
176
|
#
|
95
|
-
|
177
|
+
#*Usage*:
|
96
178
|
#
|
97
179
|
# ts = (1..10).map { rand }.to_ts
|
98
180
|
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
@@ -103,17 +185,23 @@ module Statsample
|
|
103
185
|
self - self.lag
|
104
186
|
end
|
105
187
|
|
106
|
-
|
188
|
+
#=Moving Average
|
189
|
+
# Calculates the moving average of the series using the provided
|
107
190
|
# lookback argument. The lookback defaults to 10 periods.
|
191
|
+
#*Parameters*:
|
192
|
+
#-_n_::integer, (default = 10) - loopback argument
|
108
193
|
#
|
109
|
-
|
194
|
+
#*Usage*:
|
110
195
|
#
|
111
196
|
# ts = (1..100).map { rand }.to_ts
|
112
197
|
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
113
198
|
#
|
114
199
|
# # first 9 observations are nil
|
115
200
|
# ts.ma # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
|
116
|
-
|
201
|
+
#
|
202
|
+
#*Returns*:
|
203
|
+
#Resulting moving average timeseries object
|
204
|
+
def ma(n = 10)
|
117
205
|
return mean if n >= size
|
118
206
|
|
119
207
|
([nil] * (n - 1) + (0..(size - n)).map do |i|
|
@@ -121,6 +209,7 @@ module Statsample
|
|
121
209
|
end).to_time_series
|
122
210
|
end
|
123
211
|
|
212
|
+
#=Exponential Moving Average
|
124
213
|
# Calculates an exponential moving average of the series using a
|
125
214
|
# specified parameter. If wilder is false (the default) then the EMA
|
126
215
|
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
@@ -130,14 +219,22 @@ module Statsample
|
|
130
219
|
# use a lot more than n observations to calculate. The series is stable
|
131
220
|
# if the size of the series is >= 3.45 * (n + 1)
|
132
221
|
#
|
133
|
-
|
222
|
+
#*Parameters*:
|
223
|
+
#-_n_::integer, (default = 10)
|
224
|
+
#-_wilder_::boolean, (default = false), if true, 1/n value is used for smoothing;
|
225
|
+
#if false, uses 2/(n+1) value
|
226
|
+
#
|
227
|
+
#*Usage*:
|
134
228
|
#
|
135
229
|
# ts = (1..100).map { rand }.to_ts
|
136
230
|
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
137
231
|
#
|
138
232
|
# # first 9 observations are nil
|
139
233
|
# ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
|
140
|
-
|
234
|
+
#
|
235
|
+
#*Returns*:
|
236
|
+
#EMA timeseries
|
237
|
+
def ema(n = 10, wilder = false)
|
141
238
|
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
142
239
|
|
143
240
|
# need to start everything from the first non-nil observation
|
@@ -156,9 +253,24 @@ module Statsample
|
|
156
253
|
base.to_time_series
|
157
254
|
end
|
158
255
|
|
256
|
+
#=Moving Average Convergence-Divergence
|
159
257
|
# Calculates the MACD (moving average convergence-divergence) of the time
|
160
258
|
# series - this is a comparison of a fast EMA with a slow EMA.
|
161
|
-
|
259
|
+
#
|
260
|
+
# *Parameters*:
|
261
|
+
# -_fast_::integer, (default = 12) - fast component of MACD
|
262
|
+
# -_slow_::integer, (default = 26) - slow component of MACD
|
263
|
+
# -_signal_::integer, (default = 9) - signal component of MACD
|
264
|
+
#
|
265
|
+
# *Usage*:
|
266
|
+
# ts = (1..100).map { rand }.to_ts
|
267
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
268
|
+
# ts.macd(13)
|
269
|
+
#
|
270
|
+
# *Returns*:
|
271
|
+
# Array of two timeseries - comparison of fast EMA with slow
|
272
|
+
# and EMA with signal value
|
273
|
+
def macd(fast = 12, slow = 26, signal = 9)
|
162
274
|
series = ema(fast) - ema(slow)
|
163
275
|
[series, series.ema(signal)]
|
164
276
|
end
|
@@ -8,24 +8,79 @@ module Statsample
|
|
8
8
|
#Inspiration: StatsModels
|
9
9
|
pacf = [1.0]
|
10
10
|
(1..max_lags).map do |i|
|
11
|
-
pacf << yule_walker(timeseries, i, method)[-1]
|
11
|
+
pacf << yule_walker(timeseries, i, method)[0][-1]
|
12
12
|
end
|
13
13
|
pacf
|
14
14
|
end
|
15
15
|
|
16
|
+
|
17
|
+
#=Levinson-Durbin Algorithm
|
18
|
+
#*Parameters*:
|
19
|
+
#-_series_ : timeseries, or a series of autocovariances
|
20
|
+
#-_nlags_: integer(default: 10): largest lag to include in recursion or order of the AR process
|
21
|
+
#-_is_acovf_: boolean(default: false): series is timeseries if it is false, else contains autocavariances
|
22
|
+
|
23
|
+
#*returns*:
|
24
|
+
#-_sigma_v_: estimate of the error variance
|
25
|
+
#-_arcoefs_: AR coefficients
|
26
|
+
#-_pacf_: pacf function
|
27
|
+
#-_sigma_: some function
|
28
|
+
def self.levinson_durbin(series, nlags = 10, is_acovf = false)
|
29
|
+
|
30
|
+
if is_acovf
|
31
|
+
series = series.map(&:to_f)
|
32
|
+
else
|
33
|
+
#nlags = order(k) of AR in this case
|
34
|
+
series = series.acvf.map(&:to_f)[0..nlags]
|
35
|
+
end
|
36
|
+
#phi = Array.new((nlags+1), 0.0) { Array.new(nlags+1, 0.0) }
|
37
|
+
order = nlags
|
38
|
+
phi = Matrix.zero(nlags + 1)
|
39
|
+
sig = Array.new(nlags+1)
|
40
|
+
|
41
|
+
#setting initial point for recursion:
|
42
|
+
phi[1,1] = series[1]/series[0]
|
43
|
+
#phi[1][1] = series[1]/series[0]
|
44
|
+
sig[1] = series[0] - phi[1, 1] * series[1]
|
45
|
+
|
46
|
+
2.upto(order).each do |k|
|
47
|
+
phi[k, k] = (series[k] - (Statsample::Vector.new(phi[1...k, k-1]) * series[1...k].reverse.to_ts).sum) / sig[k-1]
|
48
|
+
#some serious refinement needed in above for matrix manipulation. Will do today
|
49
|
+
1.upto(k-1).each do |j|
|
50
|
+
phi[j, k] = phi[j, k-1] - phi[k, k] * phi[k-j, k-1]
|
51
|
+
end
|
52
|
+
sig[k] = sig[k-1] * (1-phi[k, k] ** 2)
|
53
|
+
|
54
|
+
end
|
55
|
+
sigma_v = sig[-1]
|
56
|
+
arcoefs_delta = phi.column(phi.column_size - 1)
|
57
|
+
arcoefs = arcoefs_delta[1..arcoefs_delta.size]
|
58
|
+
pacf = diag(phi)
|
59
|
+
pacf[0] = 1.0
|
60
|
+
return [sigma_v, arcoefs, pacf, sig, phi]
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.diag(mat)
|
64
|
+
#returns array of diagonal elements of a matrix.
|
65
|
+
#will later abstract it to matrix.rb in Statsample
|
66
|
+
return mat.each_with_index(:diagonal).map { |x, r, c| x }
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
#=Yule Walker Algorithm
|
71
|
+
#From the series, estimates AR(p)(autoregressive) parameter
|
72
|
+
#using Yule-Waler equation. See -
|
73
|
+
#http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
|
74
|
+
|
75
|
+
#*Parameters*:
|
76
|
+
#-_ts_::timeseries
|
77
|
+
#-_k_::order, default = 1
|
78
|
+
#-_method_:: can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator is (n - k)
|
79
|
+
|
80
|
+
#*returns*:
|
81
|
+
#-_rho_:: autoregressive coefficients
|
82
|
+
#-_sigma_:: sigma parameter
|
16
83
|
def self.yule_walker(ts, k = 1, method='yw')
|
17
|
-
#From the series, estimates AR(p)(autoregressive) parameter
|
18
|
-
#using Yule-Waler equation. See -
|
19
|
-
#http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
|
20
|
-
|
21
|
-
#parameters:
|
22
|
-
#ts = series
|
23
|
-
#k = order, default = 1
|
24
|
-
#method = can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator
|
25
|
-
#is (n - k)
|
26
|
-
|
27
|
-
#returns:
|
28
|
-
#rho => autoregressive coefficients
|
29
84
|
ts = ts - ts.mean
|
30
85
|
n = ts.size
|
31
86
|
if method.downcase.eql? 'yw'
|
@@ -37,7 +92,7 @@ module Statsample
|
|
37
92
|
denom =->(k) { n }
|
38
93
|
end
|
39
94
|
r = Array.new(k + 1) { 0.0 }
|
40
|
-
r[0] = ts.map { |x| x
|
95
|
+
r[0] = ts.map { |x| x**2 }.inject(:+).to_f / denom.call(0).to_f
|
41
96
|
|
42
97
|
1.upto(k) do |l|
|
43
98
|
r[l] = (ts[0...-l].zip(ts[l...ts.size])).map do |x|
|
@@ -48,7 +103,11 @@ module Statsample
|
|
48
103
|
r_R = toeplitz(r[0...-1])
|
49
104
|
|
50
105
|
mat = Matrix.columns(r_R).inverse()
|
51
|
-
solve_matrix(mat, r[1..r.size])
|
106
|
+
phi = solve_matrix(mat, r[1..r.size])
|
107
|
+
phi_vector = Statsample::Vector.new(phi, :scale)
|
108
|
+
r_vector = Statsample::Vector.new(r[1..r.size], :scale)
|
109
|
+
sigma = r[0] - (r_vector * phi_vector).sum
|
110
|
+
return [phi, sigma]
|
52
111
|
end
|
53
112
|
|
54
113
|
def self.toeplitz(arr)
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Statsample
|
2
|
+
class Vector
|
3
|
+
include Enumerable
|
4
|
+
include Writable
|
5
|
+
include Summarizable
|
6
|
+
|
7
|
+
#=Squares of sum
|
8
|
+
#---
|
9
|
+
#parameter:
|
10
|
+
#-demean::boolean - optional. __default__: false
|
11
|
+
#Sums the timeseries and then returns the square
|
12
|
+
def squares_of_sum(demean = false)
|
13
|
+
if demean
|
14
|
+
m = self.mean
|
15
|
+
self.map { |x| (x-m) }.sum ** 2
|
16
|
+
else
|
17
|
+
return self.sum.to_f ** 2
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
class ::Matrix
|
24
|
+
#=Squares of sum
|
25
|
+
#---
|
26
|
+
#Does squares of sum in column order.
|
27
|
+
#Necessary for computations in various processes
|
28
|
+
def squares_of_sum
|
29
|
+
(0...column_size).map do |j|
|
30
|
+
self.column(j).sum ** 2
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
#=Checks if given matrix is symmetric or not
|
35
|
+
#---
|
36
|
+
#returns bool
|
37
|
+
#`symmetric?` is present in Ruby Matrix 1.9.3+, but not in 1.8.*
|
38
|
+
def symmetric?
|
39
|
+
return false unless square?
|
40
|
+
|
41
|
+
(0...row_size).each do |i|
|
42
|
+
0.upto(i).each do |j|
|
43
|
+
return false if self[i, j] != self[j, i]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
true
|
47
|
+
end
|
48
|
+
|
49
|
+
#=Cholesky decomposition
|
50
|
+
#Reference: http://en.wikipedia.org/wiki/Cholesky_decomposition
|
51
|
+
#---
|
52
|
+
#==Description
|
53
|
+
#Cholesky decomposition is reprsented by `M = L X L*`, where
|
54
|
+
#M is the symmetric matrix and `L` is the lower half of cholesky matrix,
|
55
|
+
#and `L*` is the conjugate form of `L`.
|
56
|
+
#*Returns* : Cholesky decomposition for a given matrix(if symmetric)
|
57
|
+
#*Utility*: Essential matrix function, requisite in kalman filter, least squares
|
58
|
+
def cholesky
|
59
|
+
raise ArgumentError, "Given matrix should be symmetric" unless symmetric?
|
60
|
+
c = Matrix.zero(row_size)
|
61
|
+
0.upto(row_size - 1).each do |k|
|
62
|
+
0.upto(row_size - 1).each do |i|
|
63
|
+
if i == k
|
64
|
+
sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[k, j] ** 2 }
|
65
|
+
value = Math.sqrt(self[k,k] - sum)
|
66
|
+
c[k, k] = value
|
67
|
+
elsif i > k
|
68
|
+
sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[i, j] * c[k, j] }
|
69
|
+
value = (self[k,i] - sum) / c[k, k]
|
70
|
+
c[i, k] = value
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
c
|
75
|
+
end
|
76
|
+
|
77
|
+
#=Chain Product
|
78
|
+
#Class method
|
79
|
+
#Returns the chain product of two matrices
|
80
|
+
#==Usage:
|
81
|
+
#Let `a` be 4 * 3 matrix,
|
82
|
+
#Let `b` be 3 * 3 matrix,
|
83
|
+
#Let `c` be 3 * 1 matrix,
|
84
|
+
#then `Matrix.chain_dot(a, b, c)`
|
85
|
+
#===*NOTE*: Send the matrices in multiplicative order with proper dimensions
|
86
|
+
def self.chain_dot(*args)
|
87
|
+
#inspired by Statsmodels
|
88
|
+
begin
|
89
|
+
args.reduce { |x, y| x * y } #perform matrix multiplication in order
|
90
|
+
rescue ExceptionForMatrix::ErrDimensionMismatch
|
91
|
+
puts "ExceptionForMatrix: Please provide matrices with proper multiplicative dimensions"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
#=Adds a column of constants.
|
97
|
+
#Appends a column of ones to the matrix/array if first argument is false
|
98
|
+
#If an n-array, first checks if one column of ones is already present
|
99
|
+
#if present, then original(self) is returned, else, prepends with a vector of ones
|
100
|
+
def add_constant(prepend = true)
|
101
|
+
#for Matrix
|
102
|
+
(0...column_size).each do |i|
|
103
|
+
if self.column(i).map(&:to_f) == Object::Vector.elements(Array.new(row_size, 1.0))
|
104
|
+
return self
|
105
|
+
end
|
106
|
+
end
|
107
|
+
#append/prepend a column of one's
|
108
|
+
vectors = (0...row_size).map do |r|
|
109
|
+
if prepend
|
110
|
+
[1.0].concat(self.row(r).to_a)
|
111
|
+
else
|
112
|
+
self.row(r).to_a.push(1.0)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
return Matrix.rows(vectors)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -11,10 +11,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
11
11
|
ts.pacf
|
12
12
|
end
|
13
13
|
context("AR(1) simulations") do
|
14
|
-
include Statsample
|
14
|
+
include Statsample
|
15
15
|
|
16
16
|
setup do
|
17
|
-
@series =
|
17
|
+
@series = TimeSeries.arima
|
18
18
|
@ar_1_positive = @series.ar_sim(1500, [0.9], 2)
|
19
19
|
@ar_1_negative = @series.ar_sim(1500, [-0.9], 2)
|
20
20
|
|
@@ -73,10 +73,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
73
73
|
end
|
74
74
|
|
75
75
|
context("AR(p) simulations") do
|
76
|
-
include Statsample
|
76
|
+
include Statsample
|
77
77
|
|
78
78
|
setup do
|
79
|
-
@series =
|
79
|
+
@series = TimeSeries.arima
|
80
80
|
@ar_p_positive = @series.ar_sim(1500, [0.3, 0.5], 2)
|
81
81
|
@ar_p_negative = @series.ar_sim(1500, [-0.3, -0.5], 2)
|
82
82
|
end
|
@@ -120,9 +120,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
120
120
|
|
121
121
|
|
122
122
|
context("MA(1) simulations") do
|
123
|
-
include Statsample
|
123
|
+
include Statsample
|
124
124
|
setup do
|
125
|
-
@series =
|
125
|
+
@series = TimeSeries.arima
|
126
126
|
@ma_positive = @series.ar_sim(1500, [0.5], 2)
|
127
127
|
@ma_negative = @series.ar_sim(1500, [-0.5], 2)
|
128
128
|
end
|
@@ -153,9 +153,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
153
153
|
end
|
154
154
|
|
155
155
|
context("MA(q) simulations") do
|
156
|
-
include Statsample
|
156
|
+
include Statsample
|
157
157
|
setup do
|
158
|
-
@series =
|
158
|
+
@series = TimeSeries.arima
|
159
159
|
@ma_positive = @series.ar_sim(1500, [0.5, 0.3, 0.2], 2)
|
160
160
|
@ma_negative = @series.ar_sim(1500, [-0.5], 2)
|
161
161
|
end
|
@@ -172,5 +172,15 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
|
|
172
172
|
#visualization: http://jsfiddle.net/7keHK/2/
|
173
173
|
end
|
174
174
|
end
|
175
|
+
|
176
|
+
context("Yule walker estimations") do
|
177
|
+
include Statsample
|
178
|
+
|
179
|
+
setup do
|
180
|
+
@timeseries = 100.times.map { rand }.to_ts
|
181
|
+
@arma_simulation =->(n) { @timeseries.ar(n, k)}
|
182
|
+
end
|
183
|
+
#to write test
|
184
|
+
end
|
175
185
|
end
|
176
186
|
|
data/test/test_matrix.rb
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
|
2
|
+
class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
|
3
|
+
|
4
|
+
def setup_square_matrix(arr, n)
|
5
|
+
#returns n * n matrix by slicing arr
|
6
|
+
return Matrix.rows(arr.each_slice(n).to_a)
|
7
|
+
end
|
8
|
+
def setup
|
9
|
+
@arr_square = (1..16)
|
10
|
+
@mat_non_symmetric = setup_square_matrix(@arr_square, 4)
|
11
|
+
|
12
|
+
@arr_non_square = (1..12).to_a
|
13
|
+
#this is a 4 X 3 matrix
|
14
|
+
@mat_non_square = Matrix.rows(@arr_non_square.each_slice(3).to_a)
|
15
|
+
end
|
16
|
+
|
17
|
+
#TESTS for matrix symmetricity - Matrix#symmetric?
|
18
|
+
context("symmetric?") do
|
19
|
+
|
20
|
+
should "return false for non-symmetric matrix" do
|
21
|
+
assert_equal @mat_non_symmetric.symmetric?, false
|
22
|
+
end
|
23
|
+
|
24
|
+
should "return false for non-square matrix" do
|
25
|
+
assert_equal @mat_non_square.symmetric?, false
|
26
|
+
end
|
27
|
+
|
28
|
+
should "return true for symmetrix matrix" do
|
29
|
+
arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
|
30
|
+
mat = setup_square_matrix(arr, 3)
|
31
|
+
assert_equal mat.symmetric?, true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
#TESTS for cholesky decomposition - Matrix#cholesky
|
36
|
+
context("Cholesky Decomposition") do
|
37
|
+
|
38
|
+
should "raise error for non symmetric matrix" do
|
39
|
+
assert_raises(ArgumentError) { @mat_non_symmetric.cholesky }
|
40
|
+
end
|
41
|
+
|
42
|
+
should "raise raise error if non-square matix" do
|
43
|
+
arr = (1..12).to_a
|
44
|
+
mat = Matrix.rows(arr.each_slice(3).to_a)
|
45
|
+
assert_raises(ArgumentError) { @mat_non_square.cholesky }
|
46
|
+
end
|
47
|
+
|
48
|
+
should "give hermitian cholesky decomposed matrix for symmetrix matrix" do
|
49
|
+
arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
|
50
|
+
mat = setup_square_matrix(arr, 3)
|
51
|
+
assert_equal Matrix[[2.0, 0, 0], [6.0, 1.0, 0], [-8.0, 5.0, 2.0]], mat.cholesky
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
#TESTS for matrix squares of sum - Matrix#squares_of_sum
|
56
|
+
context("Squares of sum") do
|
57
|
+
|
58
|
+
should "return array of size 4 for matrix - #{@mat_non_symmetric}" do
|
59
|
+
#equal to column size
|
60
|
+
assert_equal @mat_non_symmetric.squares_of_sum.size, 4
|
61
|
+
end
|
62
|
+
|
63
|
+
should "return [784, 1024, 1296, 1600] for matrix - #{@mat_non_symmetric}" do
|
64
|
+
assert_equal @mat_non_symmetric.squares_of_sum, [784, 1024, 1296, 1600]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
#TESTS for adding constants to matrix
|
69
|
+
context("Add constant") do
|
70
|
+
|
71
|
+
should "prepend all rows with ones" do
|
72
|
+
mat = @mat_non_symmetric.add_constant
|
73
|
+
assert_equal @mat_non_symmetric.column_size, 4
|
74
|
+
assert_equal mat.column_size, 5
|
75
|
+
assert_equal mat.column(0).to_a, [1.0, 1.0,1.0,1.0]
|
76
|
+
end
|
77
|
+
|
78
|
+
should "append all rows with ones if prepend = false" do
|
79
|
+
mat = @mat_non_symmetric.add_constant(false)
|
80
|
+
assert_equal @mat_non_symmetric.column_size, 4
|
81
|
+
assert_equal mat.column_size, 5
|
82
|
+
assert_equal mat.column(mat.column_size - 1).to_a, [1.0, 1.0,1.0,1.0]
|
83
|
+
end
|
84
|
+
|
85
|
+
should "not append/prepend if a column of ones already exists in matrix" do
|
86
|
+
matrix = Matrix[[1, 2, 1, 4], [5, 6, 1, 8], [9, 10, 1, 12]]
|
87
|
+
const_mat = matrix.add_constant
|
88
|
+
assert_equal matrix.column_size, const_mat.column_size
|
89
|
+
assert_equal matrix.row_size, const_mat.row_size
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
data/test/test_tseries.rb
CHANGED
@@ -7,7 +7,7 @@ class StatsampleTestTimeSeries < MiniTest::Unit::TestCase
|
|
7
7
|
|
8
8
|
def setup
|
9
9
|
# daily closes of iShares XIU on the TSX
|
10
|
-
@xiu = Statsample::TimeSeries::
|
10
|
+
@xiu = Statsample::TimeSeries::Series.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
|
11
11
|
16.86, 16.86, 16.56, 16.36, 16.66, 16.77], :scale
|
12
12
|
end
|
13
13
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-statsample-timeseries
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2013-
|
13
|
+
date: 2013-09-03 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: statsample
|
@@ -216,16 +216,17 @@ files:
|
|
216
216
|
- features/support/env.rb
|
217
217
|
- lib/bio-statsample-timeseries.rb
|
218
218
|
- lib/bio-statsample-timeseries/arima.rb
|
219
|
-
- lib/bio-statsample-timeseries/statsample-timeseries.rb
|
220
219
|
- lib/bio-statsample-timeseries/timeseries.rb
|
221
220
|
- lib/bio-statsample-timeseries/timeseries/pacf.rb
|
221
|
+
- lib/bio-statsample-timeseries/utility.rb
|
222
222
|
- test/fixtures/stock_data.csv
|
223
223
|
- test/helper.rb
|
224
224
|
- test/test_arima_simulators.rb
|
225
|
+
- test/test_matrix.rb
|
225
226
|
- test/test_pacf.rb
|
226
227
|
- test/test_tseries.rb
|
227
228
|
- test/test_wald.rb
|
228
|
-
homepage: http://github.com/
|
229
|
+
homepage: http://github.com/AnkurGel/bioruby-statsample-timeseries
|
229
230
|
licenses:
|
230
231
|
- MIT
|
231
232
|
post_install_message:
|
@@ -240,7 +241,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
240
241
|
version: '0'
|
241
242
|
segments:
|
242
243
|
- 0
|
243
|
-
hash:
|
244
|
+
hash: -122253519
|
244
245
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
245
246
|
none: false
|
246
247
|
requirements:
|