bio-statsample-timeseries 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,8 +1,8 @@
1
1
  = bio-statsample-timeseries
2
2
 
3
3
  {<img
4
- src="https://secure.travis-ci.org/ankurgel/bioruby-statsample-timeseries.png"
5
- />}[http://travis-ci.org/#!/ankurgel/bioruby-statsample-timeseries]
4
+ src="https://secure.travis-ci.org/AnkurGel/bioruby-statsample-timeseries.png"
5
+ />}[http://travis-ci.org/#!/AnkurGel/bioruby-statsample-timeseries]
6
6
 
7
7
  Full description goes here
8
8
 
@@ -27,7 +27,7 @@ the source tree.
27
27
 
28
28
  Information on the source tree, documentation, issues and how to contribute, see
29
29
 
30
- http://github.com/ankurgel/bioruby-statsample-timeseries
30
+ http://github.com/AnkurGel/bioruby-statsample-timeseries
31
31
 
32
32
  The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
33
33
 
data/Rakefile CHANGED
@@ -15,7 +15,7 @@ require 'jeweler'
15
15
  Jeweler::Tasks.new do |gem|
16
16
  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
17
  gem.name = "bio-statsample-timeseries"
18
- gem.homepage = "http://github.com/ankurgel/bioruby-statsample-timeseries"
18
+ gem.homepage = "http://github.com/AnkurGel/bioruby-statsample-timeseries"
19
19
  gem.license = "MIT"
20
20
  gem.summary = %Q{TimeSeries modules for Statsample}
21
21
  gem.description = %Q{Statsample-timeseries is an extension to Statsample. It incorporates helpful timeseries functions and modules like ARMA, ARIMA, acf, pacf, lags etc.}
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.1
1
+ 0.1.2
@@ -8,9 +8,9 @@
8
8
  #
9
9
  # In this file only require other files. Avoid other source code.
10
10
 
11
- require 'bio-statsample-timeseries/statsample-timeseries.rb'
12
11
  require 'statsample'
13
12
  require_relative 'bio-statsample-timeseries/timeseries.rb'
14
13
  require_relative 'bio-statsample-timeseries/arima.rb'
14
+ require_relative 'bio-statsample-timeseries/utility.rb'
15
15
 
16
16
 
@@ -1,15 +1,29 @@
1
1
  #require 'debugger'
2
2
  module Statsample
3
- module ARIMA
3
+ module TimeSeries
4
+
5
+ def self.arima
6
+ #not passing (ds,p,i,q) elements for now
7
+ #will do that once #arima is ready for all modelling
8
+ Statsample::TimeSeries::ARIMA.new
9
+ end
10
+
4
11
  class ARIMA < Statsample::Vector
5
12
  include Statsample::TimeSeries
6
-
13
+ # SUGGESTION: We could use an API similar to R
14
+ # like
15
+ # ar_obj=Statsample::TimeSeries.arima(ds,p,i,q)
16
+ # which calls
17
+ # Statsample::TimeSeries::Arima.new(ds,p,i,q)
7
18
  def arima(ds, p, i, q)
8
19
  #prototype
20
+ # ISSUE: We should differenciate now, if i>0.
21
+ # The result should be send to next step
9
22
  if q.zero?
10
23
  self.ar(p)
11
24
  elsif p.zero?
12
25
  self.ma(p)
26
+ # ISSUE-> ELSE -> simultaneuos estimation of MA and AR parameters
13
27
  end
14
28
  end
15
29
 
@@ -20,15 +34,49 @@ module Statsample
20
34
  #or Burg's algorithm(more efficient)
21
35
  end
22
36
 
23
- def yule_walker()
24
- #To be implemented
25
- end
26
-
37
+ #Converts a linear array into a vector
27
38
  def create_vector(arr)
28
39
  Statsample::Vector.new(arr, :scale)
29
40
  end
30
41
 
31
- #tentative AR(p) simulator
42
+
43
+ def yule_walker(ts, n, k)
44
+ #parameters: timeseries, no of observations, order
45
+ #returns: simulated autoregression with phi parameters and sigma
46
+ phi, sigma = Pacf::Pacf.yule_walker(ts, k)
47
+ return phi, sigma
48
+ #return ar_sim(n, phi, sigma)
49
+ end
50
+
51
+ def levinson_durbin(ts, n, k)
52
+ #parameters;
53
+ #ts: timseries against which to generate phi coefficients
54
+ #n: number of observations for simulation
55
+ #k: order of AR
56
+ intermediate = Pacf::Pacf.levinson_durbin(ts, k)
57
+ phi, sigma = intermediate[1], intermediate[0]
58
+ return phi, sigma
59
+ #return ar_sim(n, phi, sigma)
60
+ end
61
+
62
+ #=Autoregressive Simulator
63
+ #Simulates an autoregressive AR(p) model with specified number of
64
+ #observations(n), with phi number of values for order p and sigma.
65
+ #
66
+ #*Analysis*: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
67
+ #
68
+ #*Parameters*:
69
+ #-_n_::integer, number of observations
70
+ #-_phi_::array of phi values, e.g: [0.35, 0.213] for p = 2
71
+ #-_sigma_::float, sigma value for error generalization
72
+ #
73
+ #*Usage*:
74
+ # ar = ARIMA.new
75
+ # ar.ar_sim(1500, [0.3, 0.9], 0.12)
76
+ # # => AR(2) autoregressive series of 1500 values
77
+ #
78
+ #*Returns*:
79
+ #Array of generated autoregressive series against attributes
32
80
  def ar_sim(n, phi, sigma)
33
81
  #using random number generator for inclusion of white noise
34
82
  err_nor = Distribution::Normal.rng(0, sigma)
@@ -58,7 +106,21 @@ module Statsample
58
106
  x - buffer
59
107
  end
60
108
 
61
- #moving average simulator
109
+ #=Moving Average Simulator
110
+ #Simulates a moving average model with specified number of
111
+ #observations(n), with theta values for order k and sigma
112
+ #
113
+ #*Parameters*:
114
+ #-_n_::integer, number of observations
115
+ #-_theta_::array of floats, e.g: [0.23, 0.732], must be < 1
116
+ #-_sigma_::float, sigma value for whitenoise error
117
+ #
118
+ #*Usage*:
119
+ # ar = ARIMA.new
120
+ # ar.ma_sim(1500, [0.23, 0.732], 0.27)
121
+ #
122
+ #*Returns*:
123
+ #Array of generated MA(q) model
62
124
  def ma_sim(n, theta, sigma)
63
125
  #n is number of observations (eg: 1000)
64
126
  #theta are the model parameters containting q values
@@ -84,7 +146,28 @@ module Statsample
84
146
  x
85
147
  end
86
148
 
87
- #arma simulator
149
+ #ARMA(Autoregressive and Moving Average) Simulator
150
+ #ARMA is represented by:
151
+ #http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
152
+ #This simulates the ARMA model against p, q and sigma.
153
+ #If p = 0, then model is pure MA(q),
154
+ #If q = 0, then model is pure AR(p),
155
+ #otherwise, model is ARMA(p, q) represented by above.
156
+ #
157
+ #Detailed analysis: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
158
+ #
159
+ #*Parameters*:
160
+ #-_n_::integer, number of observations
161
+ #-_p_::array, contains p number of phi values for AR(p) process
162
+ #-_q_::array, contains q number of theta values for MA(q) process
163
+ #-_sigma_::float, sigma value for whitenoise error generation
164
+ #
165
+ #*Usage*:
166
+ # ar = ARIMA.new
167
+ # ar.arma_sim(1500, [0.3, 0.272], [0.8, 0.317], 0.92)
168
+ #
169
+ #*Returns*:
170
+ #array of generated ARMA model values
88
171
  def arma_sim(n, p, q, sigma)
89
172
  #represented by :
90
173
  #http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
@@ -119,6 +202,76 @@ module Statsample
119
202
  end
120
203
  x - buffer
121
204
  end
205
+
206
+ #=Hannan-Rissanen for ARMA fit
207
+ def self.hannan(ts, p, q, k)
208
+ start_params = create_vector(Array.new(p+q+k, 0))
209
+ ts_dup = ts.dup
210
+
211
+ end
212
+ end
213
+
214
+ module Arima
215
+ class KalmanFilter < Statsample::Vector
216
+ include Statsample::TimeSeries
217
+
218
+ #=T
219
+ #The coefficient matrix for the state vector in state equation
220
+ # It's dimensions is r+k x r+k
221
+ #*Parameters*
222
+ #-_r_::integer, r is max(p, q+1), where p and q are orders of AR and MA respectively
223
+ #-_k_::integer, number of exogeneous variables in ARMA model
224
+ #-_q_::integer, The AR coefficient of ARMA model
225
+
226
+ #*References*: Statsmodels tsa, Durbin and Koopman Section 4.7
227
+ def self.T(r, k, p)
228
+ arr = Matrix.zero(r)
229
+ params_padded = Statsample::Vector.new(Array.new(r, 0), :scale)
230
+
231
+ params_padded[0...p] = params[k...(p+k)]
232
+ intermediate_matrix = (r-1).times.map { Array.new(r, 0) }
233
+ #appending an array filled with padded values in beginning
234
+ intermediate_matrix[0,0] = [params_padded]
235
+
236
+ #now generating column matrix for that:
237
+ arr = Matrix.columns(intermediate_matrix)
238
+ arr_00 = arr[0,0]
239
+
240
+ #identify matrix substituition in matrix except row[0] and column[0]
241
+ r.times do |i|
242
+ arr[r,r] = 1
243
+ end
244
+ arr[0,0] = arr_00
245
+ arr
246
+ end
247
+
248
+
249
+ #=R
250
+ #The coefficient matrix for the state vector in the observation matrix.
251
+ #It's dimension is r+k x 1
252
+ #*Parameters*
253
+ #-_r_::integer, r is max(p, q+1) where p and q are order of AR and MA respectively
254
+ #-_k_::integer, number of exogeneous variables in ARMA model
255
+ #-_q_::integer, The MA order in ARMA model
256
+ #-_p_::integer, The AR order in ARMA model
257
+ #*References*: Statsmodels tsa, Durbin and Koopman
258
+ def self.R(r, k, q, p)
259
+ arr = Matrix.column_vector(Array.new(r,0.0))
260
+
261
+ #pending - in kind of difficult end here;
262
+ end
263
+
264
+ #=Z
265
+ #The Z selector matrix
266
+ #*Parameters*
267
+ #-_r_::integer, max(p, q+1)
268
+ #Returns: vector
269
+ def self.Z(r)
270
+ arr = Statsample::Vector.new(Array.new(r, 0.0), :scale)
271
+ arr[0] = 1.0
272
+ return arr
273
+ end
274
+ end
122
275
  end
123
276
  end
124
277
  end
@@ -3,7 +3,7 @@ module Statsample::TimeSeriesShorthands
3
3
  # Creates a new Statsample::TimeSeries object
4
4
  # Argument should be equal to TimeSeries.new
5
5
  def to_time_series(*args)
6
- Statsample::TimeSeries::TimeSeries.new(self, :scale, *args)
6
+ Statsample::TimeSeries::Series.new(self, :scale, *args)
7
7
  end
8
8
 
9
9
  alias :to_ts :to_time_series
@@ -17,7 +17,7 @@ module Statsample
17
17
  module TimeSeries
18
18
  # Collection of data indexed by time.
19
19
  # The order goes from earliest to latest.
20
- class TimeSeries < Statsample::Vector
20
+ class Series < Statsample::Vector
21
21
  include Statsample::TimeSeries::Pacf
22
22
  # Calculates the autocorrelation coefficients of the series.
23
23
  #
@@ -31,7 +31,7 @@ module Statsample
31
31
  # ts.acf # => array with first 21 autocorrelations
32
32
  # ts.acf 3 # => array with first 3 autocorrelations
33
33
  #
34
- def acf max_lags = nil
34
+ def acf(max_lags = nil)
35
35
  max_lags ||= (10 * Math.log10(size)).to_i
36
36
 
37
37
  (0..max_lags).map do |i|
@@ -47,14 +47,95 @@ module Statsample
47
47
  end
48
48
  end
49
49
 
50
- def pacf(max_lags = nil, method = 'yw')
50
+ #=Partial Autocorrelation
51
+ #Generates partial autocorrelation series for a timeseries
52
+ #*Parameters*:
53
+ #-_max_lags_::integer, optional - provide number of lags
54
+ #-_method_::string. Default: 'yw'.
55
+ # * _yw_:: For yule-walker algorithm unbiased approach
56
+ # * _mle_:: For Maximum likelihood algorithm approach
57
+ # * _ld_:: Forr Levinson-Durbin recursive approach
58
+ #Returns - array of pacf
59
+ #
60
+ def pacf(max_lags = nil, method = :yw)
51
61
  #parameters:
52
62
  #max_lags => maximum number of lags for pcf
53
63
  #method => for autocovariance in yule_walker:
54
64
  #'yw' for 'yule-walker unbaised', 'mle' for biased maximum likelihood
65
+ #'ld' for Levinson-Durbin recursion
55
66
 
67
+ method = method.downcase.to_sym
56
68
  max_lags ||= (10 * Math.log10(size)).to_i
57
- Pacf::Pacf.pacf_yw(self, max_lags, method)
69
+ if method.eql? :yw or method.eql? :mle
70
+ Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
71
+ elsif method == :ld
72
+ series = self.acvf
73
+ Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
74
+ else
75
+ raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
76
+ end
77
+ end
78
+
79
+ #=Autoregressive estimation
80
+ #Generates AR(k) series for the calling timeseries by yule walker.
81
+ #*Parameters*:
82
+ #-_n_::integer, (default = 1500) number of observations for AR.
83
+ #-_k_::integer, (default = 1) order of AR process.
84
+ #*Returns*:
85
+ #Array constituting estimated AR series.
86
+ #
87
+ def ar(n = 1500, k = 1)
88
+ series = Statsample::TimeSeries.arima
89
+ #series = Statsample::TimeSeries::ARIMA.new
90
+ series.yule_walker(self, n, k)
91
+ end
92
+
93
+ #=AutoCovariance
94
+ #Provides autocovariance of timeseries.
95
+ #-Parameters:
96
+ #demean = true; optional. Supply false if series is not to be demeaned
97
+ #unbiased = true; optional. true/false for unbiased/biased form of autocovariance
98
+ #-Returns-: Autocovariance value
99
+ #
100
+ def acvf(demean = true, unbiased = true)
101
+ #TODO: change parameters list in opts.merge as suggested by John
102
+ #functionality: computes autocovariance of timeseries data
103
+ #returns: array of autocovariances
104
+
105
+ if demean
106
+ demeaned_series = self - self.mean
107
+ else
108
+ demeaned_series = self
109
+ end
110
+ n = self.acf.size
111
+ m = self.mean
112
+ if unbiased
113
+ d = Array.new(self.size, self.size)
114
+ else
115
+ d = ((1..self.size).to_a.reverse)[0..n]
116
+ end
117
+
118
+
119
+ 0.upto(n - 1).map do |i|
120
+ (demeaned_series * (self.lag(i) - m)).sum / d[i]
121
+ end
122
+ end
123
+
124
+ #=Correlation
125
+ #Gives correlation of timeseries.
126
+ #
127
+ def correlate(a, v, mode = 'full')
128
+ #peforms cross-correlation of two series
129
+ #multiarray.correlate2(a, v, 'full')
130
+ if a.size < v.size
131
+ raise("Should have same size!")
132
+ end
133
+ ps = a.size + v.size - 1
134
+ a_padded = Array.new(ps, 0)
135
+ a_padded[0...a.size] = a
136
+
137
+ out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
138
+ #ongoing
58
139
  end
59
140
 
60
141
  # Lags the series by k periods.
@@ -71,7 +152,7 @@ module Statsample
71
152
  # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
72
153
  # ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
73
154
  #
74
- def lag k = 1
155
+ def lag(k = 1)
75
156
  return self if k == 0
76
157
 
77
158
  dup.tap do |lagged|
@@ -86,13 +167,14 @@ module Statsample
86
167
  end
87
168
  end
88
169
 
170
+ #=Diff
89
171
  # Performs a first difference of the series.
90
172
  #
91
173
  # The convention is to set the oldest observations (the first ones
92
174
  # in the series) to nil so that the size of the diffed series is the
93
175
  # same as the original.
94
176
  #
95
- # Usage:
177
+ #*Usage*:
96
178
  #
97
179
  # ts = (1..10).map { rand }.to_ts
98
180
  # # => [0.69, 0.23, 0.44, 0.71, ...]
@@ -103,17 +185,23 @@ module Statsample
103
185
  self - self.lag
104
186
  end
105
187
 
106
- # Calculates a moving average of the series using the provided
188
+ #=Moving Average
189
+ # Calculates the moving average of the series using the provided
107
190
  # lookback argument. The lookback defaults to 10 periods.
191
+ #*Parameters*:
192
+ #-_n_::integer, (default = 10) - loopback argument
108
193
  #
109
- # Usage:
194
+ #*Usage*:
110
195
  #
111
196
  # ts = (1..100).map { rand }.to_ts
112
197
  # # => [0.69, 0.23, 0.44, 0.71, ...]
113
198
  #
114
199
  # # first 9 observations are nil
115
200
  # ts.ma # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
116
- def ma n = 10
201
+ #
202
+ #*Returns*:
203
+ #Resulting moving average timeseries object
204
+ def ma(n = 10)
117
205
  return mean if n >= size
118
206
 
119
207
  ([nil] * (n - 1) + (0..(size - n)).map do |i|
@@ -121,6 +209,7 @@ module Statsample
121
209
  end).to_time_series
122
210
  end
123
211
 
212
+ #=Exponential Moving Average
124
213
  # Calculates an exponential moving average of the series using a
125
214
  # specified parameter. If wilder is false (the default) then the EMA
126
215
  # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
@@ -130,14 +219,22 @@ module Statsample
130
219
  # use a lot more than n observations to calculate. The series is stable
131
220
  # if the size of the series is >= 3.45 * (n + 1)
132
221
  #
133
- # Usage:
222
+ #*Parameters*:
223
+ #-_n_::integer, (default = 10)
224
+ #-_wilder_::boolean, (default = false), if true, 1/n value is used for smoothing;
225
+ #if false, uses 2/(n+1) value
226
+ #
227
+ #*Usage*:
134
228
  #
135
229
  # ts = (1..100).map { rand }.to_ts
136
230
  # # => [0.69, 0.23, 0.44, 0.71, ...]
137
231
  #
138
232
  # # first 9 observations are nil
139
233
  # ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
140
- def ema n = 10, wilder = false
234
+ #
235
+ #*Returns*:
236
+ #EMA timeseries
237
+ def ema(n = 10, wilder = false)
141
238
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
142
239
 
143
240
  # need to start everything from the first non-nil observation
@@ -156,9 +253,24 @@ module Statsample
156
253
  base.to_time_series
157
254
  end
158
255
 
256
+ #=Moving Average Convergence-Divergence
159
257
  # Calculates the MACD (moving average convergence-divergence) of the time
160
258
  # series - this is a comparison of a fast EMA with a slow EMA.
161
- def macd fast = 12, slow = 26, signal = 9
259
+ #
260
+ # *Parameters*:
261
+ # -_fast_::integer, (default = 12) - fast component of MACD
262
+ # -_slow_::integer, (default = 26) - slow component of MACD
263
+ # -_signal_::integer, (default = 9) - signal component of MACD
264
+ #
265
+ # *Usage*:
266
+ # ts = (1..100).map { rand }.to_ts
267
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
268
+ # ts.macd(13)
269
+ #
270
+ # *Returns*:
271
+ # Array of two timeseries - comparison of fast EMA with slow
272
+ # and EMA with signal value
273
+ def macd(fast = 12, slow = 26, signal = 9)
162
274
  series = ema(fast) - ema(slow)
163
275
  [series, series.ema(signal)]
164
276
  end
@@ -8,24 +8,79 @@ module Statsample
8
8
  #Inspiration: StatsModels
9
9
  pacf = [1.0]
10
10
  (1..max_lags).map do |i|
11
- pacf << yule_walker(timeseries, i, method)[-1]
11
+ pacf << yule_walker(timeseries, i, method)[0][-1]
12
12
  end
13
13
  pacf
14
14
  end
15
15
 
16
+
17
+ #=Levinson-Durbin Algorithm
18
+ #*Parameters*:
19
+ #-_series_ : timeseries, or a series of autocovariances
20
+ #-_nlags_: integer(default: 10): largest lag to include in recursion or order of the AR process
21
+ #-_is_acovf_: boolean(default: false): series is timeseries if it is false, else contains autocavariances
22
+
23
+ #*returns*:
24
+ #-_sigma_v_: estimate of the error variance
25
+ #-_arcoefs_: AR coefficients
26
+ #-_pacf_: pacf function
27
+ #-_sigma_: some function
28
+ def self.levinson_durbin(series, nlags = 10, is_acovf = false)
29
+
30
+ if is_acovf
31
+ series = series.map(&:to_f)
32
+ else
33
+ #nlags = order(k) of AR in this case
34
+ series = series.acvf.map(&:to_f)[0..nlags]
35
+ end
36
+ #phi = Array.new((nlags+1), 0.0) { Array.new(nlags+1, 0.0) }
37
+ order = nlags
38
+ phi = Matrix.zero(nlags + 1)
39
+ sig = Array.new(nlags+1)
40
+
41
+ #setting initial point for recursion:
42
+ phi[1,1] = series[1]/series[0]
43
+ #phi[1][1] = series[1]/series[0]
44
+ sig[1] = series[0] - phi[1, 1] * series[1]
45
+
46
+ 2.upto(order).each do |k|
47
+ phi[k, k] = (series[k] - (Statsample::Vector.new(phi[1...k, k-1]) * series[1...k].reverse.to_ts).sum) / sig[k-1]
48
+ #some serious refinement needed in above for matrix manipulation. Will do today
49
+ 1.upto(k-1).each do |j|
50
+ phi[j, k] = phi[j, k-1] - phi[k, k] * phi[k-j, k-1]
51
+ end
52
+ sig[k] = sig[k-1] * (1-phi[k, k] ** 2)
53
+
54
+ end
55
+ sigma_v = sig[-1]
56
+ arcoefs_delta = phi.column(phi.column_size - 1)
57
+ arcoefs = arcoefs_delta[1..arcoefs_delta.size]
58
+ pacf = diag(phi)
59
+ pacf[0] = 1.0
60
+ return [sigma_v, arcoefs, pacf, sig, phi]
61
+ end
62
+
63
+ def self.diag(mat)
64
+ #returns array of diagonal elements of a matrix.
65
+ #will later abstract it to matrix.rb in Statsample
66
+ return mat.each_with_index(:diagonal).map { |x, r, c| x }
67
+ end
68
+
69
+
70
+ #=Yule Walker Algorithm
71
+ #From the series, estimates AR(p)(autoregressive) parameter
72
+ #using Yule-Waler equation. See -
73
+ #http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
74
+
75
+ #*Parameters*:
76
+ #-_ts_::timeseries
77
+ #-_k_::order, default = 1
78
+ #-_method_:: can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator is (n - k)
79
+
80
+ #*returns*:
81
+ #-_rho_:: autoregressive coefficients
82
+ #-_sigma_:: sigma parameter
16
83
  def self.yule_walker(ts, k = 1, method='yw')
17
- #From the series, estimates AR(p)(autoregressive) parameter
18
- #using Yule-Waler equation. See -
19
- #http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
20
-
21
- #parameters:
22
- #ts = series
23
- #k = order, default = 1
24
- #method = can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator
25
- #is (n - k)
26
-
27
- #returns:
28
- #rho => autoregressive coefficients
29
84
  ts = ts - ts.mean
30
85
  n = ts.size
31
86
  if method.downcase.eql? 'yw'
@@ -37,7 +92,7 @@ module Statsample
37
92
  denom =->(k) { n }
38
93
  end
39
94
  r = Array.new(k + 1) { 0.0 }
40
- r[0] = ts.map { |x| x ** 2 }.inject(:+).to_f / denom.call(0).to_f
95
+ r[0] = ts.map { |x| x**2 }.inject(:+).to_f / denom.call(0).to_f
41
96
 
42
97
  1.upto(k) do |l|
43
98
  r[l] = (ts[0...-l].zip(ts[l...ts.size])).map do |x|
@@ -48,7 +103,11 @@ module Statsample
48
103
  r_R = toeplitz(r[0...-1])
49
104
 
50
105
  mat = Matrix.columns(r_R).inverse()
51
- solve_matrix(mat, r[1..r.size])
106
+ phi = solve_matrix(mat, r[1..r.size])
107
+ phi_vector = Statsample::Vector.new(phi, :scale)
108
+ r_vector = Statsample::Vector.new(r[1..r.size], :scale)
109
+ sigma = r[0] - (r_vector * phi_vector).sum
110
+ return [phi, sigma]
52
111
  end
53
112
 
54
113
  def self.toeplitz(arr)
@@ -0,0 +1,118 @@
1
+ module Statsample
2
+ class Vector
3
+ include Enumerable
4
+ include Writable
5
+ include Summarizable
6
+
7
+ #=Squares of sum
8
+ #---
9
+ #parameter:
10
+ #-demean::boolean - optional. __default__: false
11
+ #Sums the timeseries and then returns the square
12
+ def squares_of_sum(demean = false)
13
+ if demean
14
+ m = self.mean
15
+ self.map { |x| (x-m) }.sum ** 2
16
+ else
17
+ return self.sum.to_f ** 2
18
+ end
19
+ end
20
+ end
21
+
22
+
23
+ class ::Matrix
24
+ #=Squares of sum
25
+ #---
26
+ #Does squares of sum in column order.
27
+ #Necessary for computations in various processes
28
+ def squares_of_sum
29
+ (0...column_size).map do |j|
30
+ self.column(j).sum ** 2
31
+ end
32
+ end
33
+
34
+ #=Checks if given matrix is symmetric or not
35
+ #---
36
+ #returns bool
37
+ #`symmetric?` is present in Ruby Matrix 1.9.3+, but not in 1.8.*
38
+ def symmetric?
39
+ return false unless square?
40
+
41
+ (0...row_size).each do |i|
42
+ 0.upto(i).each do |j|
43
+ return false if self[i, j] != self[j, i]
44
+ end
45
+ end
46
+ true
47
+ end
48
+
49
+ #=Cholesky decomposition
50
+ #Reference: http://en.wikipedia.org/wiki/Cholesky_decomposition
51
+ #---
52
+ #==Description
53
+ #Cholesky decomposition is reprsented by `M = L X L*`, where
54
+ #M is the symmetric matrix and `L` is the lower half of cholesky matrix,
55
+ #and `L*` is the conjugate form of `L`.
56
+ #*Returns* : Cholesky decomposition for a given matrix(if symmetric)
57
+ #*Utility*: Essential matrix function, requisite in kalman filter, least squares
58
+ def cholesky
59
+ raise ArgumentError, "Given matrix should be symmetric" unless symmetric?
60
+ c = Matrix.zero(row_size)
61
+ 0.upto(row_size - 1).each do |k|
62
+ 0.upto(row_size - 1).each do |i|
63
+ if i == k
64
+ sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[k, j] ** 2 }
65
+ value = Math.sqrt(self[k,k] - sum)
66
+ c[k, k] = value
67
+ elsif i > k
68
+ sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[i, j] * c[k, j] }
69
+ value = (self[k,i] - sum) / c[k, k]
70
+ c[i, k] = value
71
+ end
72
+ end
73
+ end
74
+ c
75
+ end
76
+
77
+ #=Chain Product
78
+ #Class method
79
+ #Returns the chain product of two matrices
80
+ #==Usage:
81
+ #Let `a` be 4 * 3 matrix,
82
+ #Let `b` be 3 * 3 matrix,
83
+ #Let `c` be 3 * 1 matrix,
84
+ #then `Matrix.chain_dot(a, b, c)`
85
+ #===*NOTE*: Send the matrices in multiplicative order with proper dimensions
86
+ def self.chain_dot(*args)
87
+ #inspired by Statsmodels
88
+ begin
89
+ args.reduce { |x, y| x * y } #perform matrix multiplication in order
90
+ rescue ExceptionForMatrix::ErrDimensionMismatch
91
+ puts "ExceptionForMatrix: Please provide matrices with proper multiplicative dimensions"
92
+ end
93
+ end
94
+
95
+
96
+ #=Adds a column of constants.
97
+ #Appends a column of ones to the matrix/array if first argument is false
98
+ #If an n-array, first checks if one column of ones is already present
99
+ #if present, then original(self) is returned, else, prepends with a vector of ones
100
+ def add_constant(prepend = true)
101
+ #for Matrix
102
+ (0...column_size).each do |i|
103
+ if self.column(i).map(&:to_f) == Object::Vector.elements(Array.new(row_size, 1.0))
104
+ return self
105
+ end
106
+ end
107
+ #append/prepend a column of one's
108
+ vectors = (0...row_size).map do |r|
109
+ if prepend
110
+ [1.0].concat(self.row(r).to_a)
111
+ else
112
+ self.row(r).to_a.push(1.0)
113
+ end
114
+ end
115
+ return Matrix.rows(vectors)
116
+ end
117
+ end
118
+ end
@@ -11,10 +11,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
11
11
  ts.pacf
12
12
  end
13
13
  context("AR(1) simulations") do
14
- include Statsample::ARIMA
14
+ include Statsample
15
15
 
16
16
  setup do
17
- @series = ARIMA.new
17
+ @series = TimeSeries.arima
18
18
  @ar_1_positive = @series.ar_sim(1500, [0.9], 2)
19
19
  @ar_1_negative = @series.ar_sim(1500, [-0.9], 2)
20
20
 
@@ -73,10 +73,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
73
73
  end
74
74
 
75
75
  context("AR(p) simulations") do
76
- include Statsample::ARIMA
76
+ include Statsample
77
77
 
78
78
  setup do
79
- @series = ARIMA.new
79
+ @series = TimeSeries.arima
80
80
  @ar_p_positive = @series.ar_sim(1500, [0.3, 0.5], 2)
81
81
  @ar_p_negative = @series.ar_sim(1500, [-0.3, -0.5], 2)
82
82
  end
@@ -120,9 +120,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
120
120
 
121
121
 
122
122
  context("MA(1) simulations") do
123
- include Statsample::ARIMA
123
+ include Statsample
124
124
  setup do
125
- @series = ARIMA.new
125
+ @series = TimeSeries.arima
126
126
  @ma_positive = @series.ar_sim(1500, [0.5], 2)
127
127
  @ma_negative = @series.ar_sim(1500, [-0.5], 2)
128
128
  end
@@ -153,9 +153,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
153
153
  end
154
154
 
155
155
  context("MA(q) simulations") do
156
- include Statsample::ARIMA
156
+ include Statsample
157
157
  setup do
158
- @series = ARIMA.new
158
+ @series = TimeSeries.arima
159
159
  @ma_positive = @series.ar_sim(1500, [0.5, 0.3, 0.2], 2)
160
160
  @ma_negative = @series.ar_sim(1500, [-0.5], 2)
161
161
  end
@@ -172,5 +172,15 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
172
172
  #visualization: http://jsfiddle.net/7keHK/2/
173
173
  end
174
174
  end
175
+
176
+ context("Yule walker estimations") do
177
+ include Statsample
178
+
179
+ setup do
180
+ @timeseries = 100.times.map { rand }.to_ts
181
+ @arma_simulation =->(n) { @timeseries.ar(n, k)}
182
+ end
183
+ #to write test
184
+ end
175
185
  end
176
186
 
@@ -0,0 +1,92 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+ class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
3
+
4
+ def setup_square_matrix(arr, n)
5
+ #returns n * n matrix by slicing arr
6
+ return Matrix.rows(arr.each_slice(n).to_a)
7
+ end
8
+ def setup
9
+ @arr_square = (1..16)
10
+ @mat_non_symmetric = setup_square_matrix(@arr_square, 4)
11
+
12
+ @arr_non_square = (1..12).to_a
13
+ #this is a 4 X 3 matrix
14
+ @mat_non_square = Matrix.rows(@arr_non_square.each_slice(3).to_a)
15
+ end
16
+
17
+ #TESTS for matrix symmetricity - Matrix#symmetric?
18
+ context("symmetric?") do
19
+
20
+ should "return false for non-symmetric matrix" do
21
+ assert_equal @mat_non_symmetric.symmetric?, false
22
+ end
23
+
24
+ should "return false for non-square matrix" do
25
+ assert_equal @mat_non_square.symmetric?, false
26
+ end
27
+
28
+ should "return true for symmetrix matrix" do
29
+ arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
30
+ mat = setup_square_matrix(arr, 3)
31
+ assert_equal mat.symmetric?, true
32
+ end
33
+ end
34
+
35
+ #TESTS for cholesky decomposition - Matrix#cholesky
36
+ context("Cholesky Decomposition") do
37
+
38
+ should "raise error for non symmetric matrix" do
39
+ assert_raises(ArgumentError) { @mat_non_symmetric.cholesky }
40
+ end
41
+
42
+ should "raise raise error if non-square matix" do
43
+ arr = (1..12).to_a
44
+ mat = Matrix.rows(arr.each_slice(3).to_a)
45
+ assert_raises(ArgumentError) { @mat_non_square.cholesky }
46
+ end
47
+
48
+ should "give hermitian cholesky decomposed matrix for symmetrix matrix" do
49
+ arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
50
+ mat = setup_square_matrix(arr, 3)
51
+ assert_equal Matrix[[2.0, 0, 0], [6.0, 1.0, 0], [-8.0, 5.0, 2.0]], mat.cholesky
52
+ end
53
+ end
54
+
55
+ #TESTS for matrix squares of sum - Matrix#squares_of_sum
56
+ context("Squares of sum") do
57
+
58
+ should "return array of size 4 for matrix - #{@mat_non_symmetric}" do
59
+ #equal to column size
60
+ assert_equal @mat_non_symmetric.squares_of_sum.size, 4
61
+ end
62
+
63
+ should "return [784, 1024, 1296, 1600] for matrix - #{@mat_non_symmetric}" do
64
+ assert_equal @mat_non_symmetric.squares_of_sum, [784, 1024, 1296, 1600]
65
+ end
66
+ end
67
+
68
+ #TESTS for adding constants to matrix
69
+ context("Add constant") do
70
+
71
+ should "prepend all rows with ones" do
72
+ mat = @mat_non_symmetric.add_constant
73
+ assert_equal @mat_non_symmetric.column_size, 4
74
+ assert_equal mat.column_size, 5
75
+ assert_equal mat.column(0).to_a, [1.0, 1.0,1.0,1.0]
76
+ end
77
+
78
+ should "append all rows with ones if prepend = false" do
79
+ mat = @mat_non_symmetric.add_constant(false)
80
+ assert_equal @mat_non_symmetric.column_size, 4
81
+ assert_equal mat.column_size, 5
82
+ assert_equal mat.column(mat.column_size - 1).to_a, [1.0, 1.0,1.0,1.0]
83
+ end
84
+
85
+ should "not append/prepend if a column of ones already exists in matrix" do
86
+ matrix = Matrix[[1, 2, 1, 4], [5, 6, 1, 8], [9, 10, 1, 12]]
87
+ const_mat = matrix.add_constant
88
+ assert_equal matrix.column_size, const_mat.column_size
89
+ assert_equal matrix.row_size, const_mat.row_size
90
+ end
91
+ end
92
+ end
@@ -7,7 +7,7 @@ class StatsampleTestTimeSeries < MiniTest::Unit::TestCase
7
7
 
8
8
  def setup
9
9
  # daily closes of iShares XIU on the TSX
10
- @xiu = Statsample::TimeSeries::TimeSeries.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
10
+ @xiu = Statsample::TimeSeries::Series.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
11
11
  16.86, 16.86, 16.56, 16.36, 16.66, 16.77], :scale
12
12
  end
13
13
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-statsample-timeseries
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2013-07-26 00:00:00.000000000 Z
13
+ date: 2013-09-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: statsample
@@ -216,16 +216,17 @@ files:
216
216
  - features/support/env.rb
217
217
  - lib/bio-statsample-timeseries.rb
218
218
  - lib/bio-statsample-timeseries/arima.rb
219
- - lib/bio-statsample-timeseries/statsample-timeseries.rb
220
219
  - lib/bio-statsample-timeseries/timeseries.rb
221
220
  - lib/bio-statsample-timeseries/timeseries/pacf.rb
221
+ - lib/bio-statsample-timeseries/utility.rb
222
222
  - test/fixtures/stock_data.csv
223
223
  - test/helper.rb
224
224
  - test/test_arima_simulators.rb
225
+ - test/test_matrix.rb
225
226
  - test/test_pacf.rb
226
227
  - test/test_tseries.rb
227
228
  - test/test_wald.rb
228
- homepage: http://github.com/ankurgel/bioruby-statsample-timeseries
229
+ homepage: http://github.com/AnkurGel/bioruby-statsample-timeseries
229
230
  licenses:
230
231
  - MIT
231
232
  post_install_message:
@@ -240,7 +241,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
240
241
  version: '0'
241
242
  segments:
242
243
  - 0
243
- hash: 146794323
244
+ hash: -122253519
244
245
  required_rubygems_version: !ruby/object:Gem::Requirement
245
246
  none: false
246
247
  requirements: