bio-statsample-timeseries 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  = bio-statsample-timeseries
2
2
 
3
3
  {<img
4
- src="https://secure.travis-ci.org/ankurgel/bioruby-statsample-timeseries.png"
5
- />}[http://travis-ci.org/#!/ankurgel/bioruby-statsample-timeseries]
4
+ src="https://secure.travis-ci.org/AnkurGel/bioruby-statsample-timeseries.png"
5
+ />}[http://travis-ci.org/#!/AnkurGel/bioruby-statsample-timeseries]
6
6
 
7
7
  Full description goes here
8
8
 
@@ -27,7 +27,7 @@ the source tree.
27
27
 
28
28
  Information on the source tree, documentation, issues and how to contribute, see
29
29
 
30
- http://github.com/ankurgel/bioruby-statsample-timeseries
30
+ http://github.com/AnkurGel/bioruby-statsample-timeseries
31
31
 
32
32
  The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
33
33
 
data/Rakefile CHANGED
@@ -15,7 +15,7 @@ require 'jeweler'
15
15
  Jeweler::Tasks.new do |gem|
16
16
  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
17
  gem.name = "bio-statsample-timeseries"
18
- gem.homepage = "http://github.com/ankurgel/bioruby-statsample-timeseries"
18
+ gem.homepage = "http://github.com/AnkurGel/bioruby-statsample-timeseries"
19
19
  gem.license = "MIT"
20
20
  gem.summary = %Q{TimeSeries modules for Statsample}
21
21
  gem.description = %Q{Statsample-timeseries is an extension to Statsample. It incorporates helpful timeseries functions and modules like ARMA, ARIMA, acf, pacf, lags etc.}
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.1
1
+ 0.1.2
@@ -8,9 +8,9 @@
8
8
  #
9
9
  # In this file only require other files. Avoid other source code.
10
10
 
11
- require 'bio-statsample-timeseries/statsample-timeseries.rb'
12
11
  require 'statsample'
13
12
  require_relative 'bio-statsample-timeseries/timeseries.rb'
14
13
  require_relative 'bio-statsample-timeseries/arima.rb'
14
+ require_relative 'bio-statsample-timeseries/utility.rb'
15
15
 
16
16
 
@@ -1,15 +1,29 @@
1
1
  #require 'debugger'
2
2
  module Statsample
3
- module ARIMA
3
+ module TimeSeries
4
+
5
+ def self.arima
6
+ #not passing (ds,p,i,q) elements for now
7
+ #will do that once #arima is ready for all modelling
8
+ Statsample::TimeSeries::ARIMA.new
9
+ end
10
+
4
11
  class ARIMA < Statsample::Vector
5
12
  include Statsample::TimeSeries
6
-
13
+ # SUGGESTION: We could use an API similar to R
14
+ # like
15
+ # ar_obj=Statsample::TimeSeries.arima(ds,p,i,q)
16
+ # which calls
17
+ # Statsample::TimeSeries::Arima.new(ds,p,i,q)
7
18
  def arima(ds, p, i, q)
8
19
  #prototype
20
+ # ISSUE: We should differenciate now, if i>0.
21
+ # The result should be send to next step
9
22
  if q.zero?
10
23
  self.ar(p)
11
24
  elsif p.zero?
12
25
  self.ma(p)
26
+ # ISSUE-> ELSE -> simultaneuos estimation of MA and AR parameters
13
27
  end
14
28
  end
15
29
 
@@ -20,15 +34,49 @@ module Statsample
20
34
  #or Burg's algorithm(more efficient)
21
35
  end
22
36
 
23
- def yule_walker()
24
- #To be implemented
25
- end
26
-
37
+ #Converts a linear array into a vector
27
38
  def create_vector(arr)
28
39
  Statsample::Vector.new(arr, :scale)
29
40
  end
30
41
 
31
- #tentative AR(p) simulator
42
+
43
+ def yule_walker(ts, n, k)
44
+ #parameters: timeseries, no of observations, order
45
+ #returns: simulated autoregression with phi parameters and sigma
46
+ phi, sigma = Pacf::Pacf.yule_walker(ts, k)
47
+ return phi, sigma
48
+ #return ar_sim(n, phi, sigma)
49
+ end
50
+
51
+ def levinson_durbin(ts, n, k)
52
+ #parameters;
53
+ #ts: timseries against which to generate phi coefficients
54
+ #n: number of observations for simulation
55
+ #k: order of AR
56
+ intermediate = Pacf::Pacf.levinson_durbin(ts, k)
57
+ phi, sigma = intermediate[1], intermediate[0]
58
+ return phi, sigma
59
+ #return ar_sim(n, phi, sigma)
60
+ end
61
+
62
+ #=Autoregressive Simulator
63
+ #Simulates an autoregressive AR(p) model with specified number of
64
+ #observations(n), with phi number of values for order p and sigma.
65
+ #
66
+ #*Analysis*: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
67
+ #
68
+ #*Parameters*:
69
+ #-_n_::integer, number of observations
70
+ #-_phi_::array of phi values, e.g: [0.35, 0.213] for p = 2
71
+ #-_sigma_::float, sigma value for error generalization
72
+ #
73
+ #*Usage*:
74
+ # ar = ARIMA.new
75
+ # ar.ar_sim(1500, [0.3, 0.9], 0.12)
76
+ # # => AR(2) autoregressive series of 1500 values
77
+ #
78
+ #*Returns*:
79
+ #Array of generated autoregressive series against attributes
32
80
  def ar_sim(n, phi, sigma)
33
81
  #using random number generator for inclusion of white noise
34
82
  err_nor = Distribution::Normal.rng(0, sigma)
@@ -58,7 +106,21 @@ module Statsample
58
106
  x - buffer
59
107
  end
60
108
 
61
- #moving average simulator
109
+ #=Moving Average Simulator
110
+ #Simulates a moving average model with specified number of
111
+ #observations(n), with theta values for order k and sigma
112
+ #
113
+ #*Parameters*:
114
+ #-_n_::integer, number of observations
115
+ #-_theta_::array of floats, e.g: [0.23, 0.732], must be < 1
116
+ #-_sigma_::float, sigma value for whitenoise error
117
+ #
118
+ #*Usage*:
119
+ # ar = ARIMA.new
120
+ # ar.ma_sim(1500, [0.23, 0.732], 0.27)
121
+ #
122
+ #*Returns*:
123
+ #Array of generated MA(q) model
62
124
  def ma_sim(n, theta, sigma)
63
125
  #n is number of observations (eg: 1000)
64
126
  #theta are the model parameters containting q values
@@ -84,7 +146,28 @@ module Statsample
84
146
  x
85
147
  end
86
148
 
87
- #arma simulator
149
+ #ARMA(Autoregressive and Moving Average) Simulator
150
+ #ARMA is represented by:
151
+ #http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
152
+ #This simulates the ARMA model against p, q and sigma.
153
+ #If p = 0, then model is pure MA(q),
154
+ #If q = 0, then model is pure AR(p),
155
+ #otherwise, model is ARMA(p, q) represented by above.
156
+ #
157
+ #Detailed analysis: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
158
+ #
159
+ #*Parameters*:
160
+ #-_n_::integer, number of observations
161
+ #-_p_::array, contains p number of phi values for AR(p) process
162
+ #-_q_::array, contains q number of theta values for MA(q) process
163
+ #-_sigma_::float, sigma value for whitenoise error generation
164
+ #
165
+ #*Usage*:
166
+ # ar = ARIMA.new
167
+ # ar.arma_sim(1500, [0.3, 0.272], [0.8, 0.317], 0.92)
168
+ #
169
+ #*Returns*:
170
+ #array of generated ARMA model values
88
171
  def arma_sim(n, p, q, sigma)
89
172
  #represented by :
90
173
  #http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
@@ -119,6 +202,76 @@ module Statsample
119
202
  end
120
203
  x - buffer
121
204
  end
205
+
206
+ #=Hannan-Rissanen for ARMA fit
207
+ def self.hannan(ts, p, q, k)
208
+ start_params = create_vector(Array.new(p+q+k, 0))
209
+ ts_dup = ts.dup
210
+
211
+ end
212
+ end
213
+
214
+ module Arima
215
+ class KalmanFilter < Statsample::Vector
216
+ include Statsample::TimeSeries
217
+
218
+ #=T
219
+ #The coefficient matrix for the state vector in state equation
220
+ # It's dimensions is r+k x r+k
221
+ #*Parameters*
222
+ #-_r_::integer, r is max(p, q+1), where p and q are orders of AR and MA respectively
223
+ #-_k_::integer, number of exogeneous variables in ARMA model
224
+ #-_q_::integer, The AR coefficient of ARMA model
225
+
226
+ #*References*: Statsmodels tsa, Durbin and Koopman Section 4.7
227
+ def self.T(r, k, p)
228
+ arr = Matrix.zero(r)
229
+ params_padded = Statsample::Vector.new(Array.new(r, 0), :scale)
230
+
231
+ params_padded[0...p] = params[k...(p+k)]
232
+ intermediate_matrix = (r-1).times.map { Array.new(r, 0) }
233
+ #appending an array filled with padded values in beginning
234
+ intermediate_matrix[0,0] = [params_padded]
235
+
236
+ #now generating column matrix for that:
237
+ arr = Matrix.columns(intermediate_matrix)
238
+ arr_00 = arr[0,0]
239
+
240
+ #identify matrix substituition in matrix except row[0] and column[0]
241
+ r.times do |i|
242
+ arr[r,r] = 1
243
+ end
244
+ arr[0,0] = arr_00
245
+ arr
246
+ end
247
+
248
+
249
+ #=R
250
+ #The coefficient matrix for the state vector in the observation matrix.
251
+ #It's dimension is r+k x 1
252
+ #*Parameters*
253
+ #-_r_::integer, r is max(p, q+1) where p and q are order of AR and MA respectively
254
+ #-_k_::integer, number of exogeneous variables in ARMA model
255
+ #-_q_::integer, The MA order in ARMA model
256
+ #-_p_::integer, The AR order in ARMA model
257
+ #*References*: Statsmodels tsa, Durbin and Koopman
258
+ def self.R(r, k, q, p)
259
+ arr = Matrix.column_vector(Array.new(r,0.0))
260
+
261
+ #pending - in kind of difficult end here;
262
+ end
263
+
264
+ #=Z
265
+ #The Z selector matrix
266
+ #*Parameters*
267
+ #-_r_::integer, max(p, q+1)
268
+ #Returns: vector
269
+ def self.Z(r)
270
+ arr = Statsample::Vector.new(Array.new(r, 0.0), :scale)
271
+ arr[0] = 1.0
272
+ return arr
273
+ end
274
+ end
122
275
  end
123
276
  end
124
277
  end
@@ -3,7 +3,7 @@ module Statsample::TimeSeriesShorthands
3
3
  # Creates a new Statsample::TimeSeries object
4
4
  # Argument should be equal to TimeSeries.new
5
5
  def to_time_series(*args)
6
- Statsample::TimeSeries::TimeSeries.new(self, :scale, *args)
6
+ Statsample::TimeSeries::Series.new(self, :scale, *args)
7
7
  end
8
8
 
9
9
  alias :to_ts :to_time_series
@@ -17,7 +17,7 @@ module Statsample
17
17
  module TimeSeries
18
18
  # Collection of data indexed by time.
19
19
  # The order goes from earliest to latest.
20
- class TimeSeries < Statsample::Vector
20
+ class Series < Statsample::Vector
21
21
  include Statsample::TimeSeries::Pacf
22
22
  # Calculates the autocorrelation coefficients of the series.
23
23
  #
@@ -31,7 +31,7 @@ module Statsample
31
31
  # ts.acf # => array with first 21 autocorrelations
32
32
  # ts.acf 3 # => array with first 3 autocorrelations
33
33
  #
34
- def acf max_lags = nil
34
+ def acf(max_lags = nil)
35
35
  max_lags ||= (10 * Math.log10(size)).to_i
36
36
 
37
37
  (0..max_lags).map do |i|
@@ -47,14 +47,95 @@ module Statsample
47
47
  end
48
48
  end
49
49
 
50
- def pacf(max_lags = nil, method = 'yw')
50
+ #=Partial Autocorrelation
51
+ #Generates partial autocorrelation series for a timeseries
52
+ #*Parameters*:
53
+ #-_max_lags_::integer, optional - provide number of lags
54
+ #-_method_::string. Default: 'yw'.
55
+ # * _yw_:: For yule-walker algorithm unbiased approach
56
+ # * _mle_:: For Maximum likelihood algorithm approach
57
+ # * _ld_:: Forr Levinson-Durbin recursive approach
58
+ #Returns - array of pacf
59
+ #
60
+ def pacf(max_lags = nil, method = :yw)
51
61
  #parameters:
52
62
  #max_lags => maximum number of lags for pcf
53
63
  #method => for autocovariance in yule_walker:
54
64
  #'yw' for 'yule-walker unbaised', 'mle' for biased maximum likelihood
65
+ #'ld' for Levinson-Durbin recursion
55
66
 
67
+ method = method.downcase.to_sym
56
68
  max_lags ||= (10 * Math.log10(size)).to_i
57
- Pacf::Pacf.pacf_yw(self, max_lags, method)
69
+ if method.eql? :yw or method.eql? :mle
70
+ Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
71
+ elsif method == :ld
72
+ series = self.acvf
73
+ Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
74
+ else
75
+ raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
76
+ end
77
+ end
78
+
79
+ #=Autoregressive estimation
80
+ #Generates AR(k) series for the calling timeseries by yule walker.
81
+ #*Parameters*:
82
+ #-_n_::integer, (default = 1500) number of observations for AR.
83
+ #-_k_::integer, (default = 1) order of AR process.
84
+ #*Returns*:
85
+ #Array constituting estimated AR series.
86
+ #
87
+ def ar(n = 1500, k = 1)
88
+ series = Statsample::TimeSeries.arima
89
+ #series = Statsample::TimeSeries::ARIMA.new
90
+ series.yule_walker(self, n, k)
91
+ end
92
+
93
+ #=AutoCovariance
94
+ #Provides autocovariance of timeseries.
95
+ #-Parameters:
96
+ #demean = true; optional. Supply false if series is not to be demeaned
97
+ #unbiased = true; optional. true/false for unbiased/biased form of autocovariance
98
+ #-Returns-: Autocovariance value
99
+ #
100
+ def acvf(demean = true, unbiased = true)
101
+ #TODO: change parameters list in opts.merge as suggested by John
102
+ #functionality: computes autocovariance of timeseries data
103
+ #returns: array of autocovariances
104
+
105
+ if demean
106
+ demeaned_series = self - self.mean
107
+ else
108
+ demeaned_series = self
109
+ end
110
+ n = self.acf.size
111
+ m = self.mean
112
+ if unbiased
113
+ d = Array.new(self.size, self.size)
114
+ else
115
+ d = ((1..self.size).to_a.reverse)[0..n]
116
+ end
117
+
118
+
119
+ 0.upto(n - 1).map do |i|
120
+ (demeaned_series * (self.lag(i) - m)).sum / d[i]
121
+ end
122
+ end
123
+
124
+ #=Correlation
125
+ #Gives correlation of timeseries.
126
+ #
127
+ def correlate(a, v, mode = 'full')
128
+ #peforms cross-correlation of two series
129
+ #multiarray.correlate2(a, v, 'full')
130
+ if a.size < v.size
131
+ raise("Should have same size!")
132
+ end
133
+ ps = a.size + v.size - 1
134
+ a_padded = Array.new(ps, 0)
135
+ a_padded[0...a.size] = a
136
+
137
+ out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
138
+ #ongoing
58
139
  end
59
140
 
60
141
  # Lags the series by k periods.
@@ -71,7 +152,7 @@ module Statsample
71
152
  # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
72
153
  # ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
73
154
  #
74
- def lag k = 1
155
+ def lag(k = 1)
75
156
  return self if k == 0
76
157
 
77
158
  dup.tap do |lagged|
@@ -86,13 +167,14 @@ module Statsample
86
167
  end
87
168
  end
88
169
 
170
+ #=Diff
89
171
  # Performs a first difference of the series.
90
172
  #
91
173
  # The convention is to set the oldest observations (the first ones
92
174
  # in the series) to nil so that the size of the diffed series is the
93
175
  # same as the original.
94
176
  #
95
- # Usage:
177
+ #*Usage*:
96
178
  #
97
179
  # ts = (1..10).map { rand }.to_ts
98
180
  # # => [0.69, 0.23, 0.44, 0.71, ...]
@@ -103,17 +185,23 @@ module Statsample
103
185
  self - self.lag
104
186
  end
105
187
 
106
- # Calculates a moving average of the series using the provided
188
+ #=Moving Average
189
+ # Calculates the moving average of the series using the provided
107
190
  # lookback argument. The lookback defaults to 10 periods.
191
+ #*Parameters*:
192
+ #-_n_::integer, (default = 10) - loopback argument
108
193
  #
109
- # Usage:
194
+ #*Usage*:
110
195
  #
111
196
  # ts = (1..100).map { rand }.to_ts
112
197
  # # => [0.69, 0.23, 0.44, 0.71, ...]
113
198
  #
114
199
  # # first 9 observations are nil
115
200
  # ts.ma # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
116
- def ma n = 10
201
+ #
202
+ #*Returns*:
203
+ #Resulting moving average timeseries object
204
+ def ma(n = 10)
117
205
  return mean if n >= size
118
206
 
119
207
  ([nil] * (n - 1) + (0..(size - n)).map do |i|
@@ -121,6 +209,7 @@ module Statsample
121
209
  end).to_time_series
122
210
  end
123
211
 
212
+ #=Exponential Moving Average
124
213
  # Calculates an exponential moving average of the series using a
125
214
  # specified parameter. If wilder is false (the default) then the EMA
126
215
  # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
@@ -130,14 +219,22 @@ module Statsample
130
219
  # use a lot more than n observations to calculate. The series is stable
131
220
  # if the size of the series is >= 3.45 * (n + 1)
132
221
  #
133
- # Usage:
222
+ #*Parameters*:
223
+ #-_n_::integer, (default = 10)
224
+ #-_wilder_::boolean, (default = false), if true, 1/n value is used for smoothing;
225
+ #if false, uses 2/(n+1) value
226
+ #
227
+ #*Usage*:
134
228
  #
135
229
  # ts = (1..100).map { rand }.to_ts
136
230
  # # => [0.69, 0.23, 0.44, 0.71, ...]
137
231
  #
138
232
  # # first 9 observations are nil
139
233
  # ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
140
- def ema n = 10, wilder = false
234
+ #
235
+ #*Returns*:
236
+ #EMA timeseries
237
+ def ema(n = 10, wilder = false)
141
238
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
142
239
 
143
240
  # need to start everything from the first non-nil observation
@@ -156,9 +253,24 @@ module Statsample
156
253
  base.to_time_series
157
254
  end
158
255
 
256
+ #=Moving Average Convergence-Divergence
159
257
  # Calculates the MACD (moving average convergence-divergence) of the time
160
258
  # series - this is a comparison of a fast EMA with a slow EMA.
161
- def macd fast = 12, slow = 26, signal = 9
259
+ #
260
+ # *Parameters*:
261
+ # -_fast_::integer, (default = 12) - fast component of MACD
262
+ # -_slow_::integer, (default = 26) - slow component of MACD
263
+ # -_signal_::integer, (default = 9) - signal component of MACD
264
+ #
265
+ # *Usage*:
266
+ # ts = (1..100).map { rand }.to_ts
267
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
268
+ # ts.macd(13)
269
+ #
270
+ # *Returns*:
271
+ # Array of two timeseries - comparison of fast EMA with slow
272
+ # and EMA with signal value
273
+ def macd(fast = 12, slow = 26, signal = 9)
162
274
  series = ema(fast) - ema(slow)
163
275
  [series, series.ema(signal)]
164
276
  end
@@ -8,24 +8,79 @@ module Statsample
8
8
  #Inspiration: StatsModels
9
9
  pacf = [1.0]
10
10
  (1..max_lags).map do |i|
11
- pacf << yule_walker(timeseries, i, method)[-1]
11
+ pacf << yule_walker(timeseries, i, method)[0][-1]
12
12
  end
13
13
  pacf
14
14
  end
15
15
 
16
+
17
+ #=Levinson-Durbin Algorithm
18
+ #*Parameters*:
19
+ #-_series_ : timeseries, or a series of autocovariances
20
+ #-_nlags_: integer(default: 10): largest lag to include in recursion or order of the AR process
21
+ #-_is_acovf_: boolean(default: false): series is timeseries if it is false, else contains autocavariances
22
+
23
+ #*returns*:
24
+ #-_sigma_v_: estimate of the error variance
25
+ #-_arcoefs_: AR coefficients
26
+ #-_pacf_: pacf function
27
+ #-_sigma_: some function
28
+ def self.levinson_durbin(series, nlags = 10, is_acovf = false)
29
+
30
+ if is_acovf
31
+ series = series.map(&:to_f)
32
+ else
33
+ #nlags = order(k) of AR in this case
34
+ series = series.acvf.map(&:to_f)[0..nlags]
35
+ end
36
+ #phi = Array.new((nlags+1), 0.0) { Array.new(nlags+1, 0.0) }
37
+ order = nlags
38
+ phi = Matrix.zero(nlags + 1)
39
+ sig = Array.new(nlags+1)
40
+
41
+ #setting initial point for recursion:
42
+ phi[1,1] = series[1]/series[0]
43
+ #phi[1][1] = series[1]/series[0]
44
+ sig[1] = series[0] - phi[1, 1] * series[1]
45
+
46
+ 2.upto(order).each do |k|
47
+ phi[k, k] = (series[k] - (Statsample::Vector.new(phi[1...k, k-1]) * series[1...k].reverse.to_ts).sum) / sig[k-1]
48
+ #some serious refinement needed in above for matrix manipulation. Will do today
49
+ 1.upto(k-1).each do |j|
50
+ phi[j, k] = phi[j, k-1] - phi[k, k] * phi[k-j, k-1]
51
+ end
52
+ sig[k] = sig[k-1] * (1-phi[k, k] ** 2)
53
+
54
+ end
55
+ sigma_v = sig[-1]
56
+ arcoefs_delta = phi.column(phi.column_size - 1)
57
+ arcoefs = arcoefs_delta[1..arcoefs_delta.size]
58
+ pacf = diag(phi)
59
+ pacf[0] = 1.0
60
+ return [sigma_v, arcoefs, pacf, sig, phi]
61
+ end
62
+
63
+ def self.diag(mat)
64
+ #returns array of diagonal elements of a matrix.
65
+ #will later abstract it to matrix.rb in Statsample
66
+ return mat.each_with_index(:diagonal).map { |x, r, c| x }
67
+ end
68
+
69
+
70
+ #=Yule Walker Algorithm
71
+ #From the series, estimates AR(p)(autoregressive) parameter
72
+ #using Yule-Waler equation. See -
73
+ #http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
74
+
75
+ #*Parameters*:
76
+ #-_ts_::timeseries
77
+ #-_k_::order, default = 1
78
+ #-_method_:: can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator is (n - k)
79
+
80
+ #*returns*:
81
+ #-_rho_:: autoregressive coefficients
82
+ #-_sigma_:: sigma parameter
16
83
  def self.yule_walker(ts, k = 1, method='yw')
17
- #From the series, estimates AR(p)(autoregressive) parameter
18
- #using Yule-Waler equation. See -
19
- #http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
20
-
21
- #parameters:
22
- #ts = series
23
- #k = order, default = 1
24
- #method = can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator
25
- #is (n - k)
26
-
27
- #returns:
28
- #rho => autoregressive coefficients
29
84
  ts = ts - ts.mean
30
85
  n = ts.size
31
86
  if method.downcase.eql? 'yw'
@@ -37,7 +92,7 @@ module Statsample
37
92
  denom =->(k) { n }
38
93
  end
39
94
  r = Array.new(k + 1) { 0.0 }
40
- r[0] = ts.map { |x| x ** 2 }.inject(:+).to_f / denom.call(0).to_f
95
+ r[0] = ts.map { |x| x**2 }.inject(:+).to_f / denom.call(0).to_f
41
96
 
42
97
  1.upto(k) do |l|
43
98
  r[l] = (ts[0...-l].zip(ts[l...ts.size])).map do |x|
@@ -48,7 +103,11 @@ module Statsample
48
103
  r_R = toeplitz(r[0...-1])
49
104
 
50
105
  mat = Matrix.columns(r_R).inverse()
51
- solve_matrix(mat, r[1..r.size])
106
+ phi = solve_matrix(mat, r[1..r.size])
107
+ phi_vector = Statsample::Vector.new(phi, :scale)
108
+ r_vector = Statsample::Vector.new(r[1..r.size], :scale)
109
+ sigma = r[0] - (r_vector * phi_vector).sum
110
+ return [phi, sigma]
52
111
  end
53
112
 
54
113
  def self.toeplitz(arr)
@@ -0,0 +1,118 @@
1
+ module Statsample
2
+ class Vector
3
+ include Enumerable
4
+ include Writable
5
+ include Summarizable
6
+
7
+ #=Squares of sum
8
+ #---
9
+ #parameter:
10
+ #-demean::boolean - optional. __default__: false
11
+ #Sums the timeseries and then returns the square
12
+ def squares_of_sum(demean = false)
13
+ if demean
14
+ m = self.mean
15
+ self.map { |x| (x-m) }.sum ** 2
16
+ else
17
+ return self.sum.to_f ** 2
18
+ end
19
+ end
20
+ end
21
+
22
+
23
+ class ::Matrix
24
+ #=Squares of sum
25
+ #---
26
+ #Does squares of sum in column order.
27
+ #Necessary for computations in various processes
28
+ def squares_of_sum
29
+ (0...column_size).map do |j|
30
+ self.column(j).sum ** 2
31
+ end
32
+ end
33
+
34
+ #=Checks if given matrix is symmetric or not
35
+ #---
36
+ #returns bool
37
+ #`symmetric?` is present in Ruby Matrix 1.9.3+, but not in 1.8.*
38
+ def symmetric?
39
+ return false unless square?
40
+
41
+ (0...row_size).each do |i|
42
+ 0.upto(i).each do |j|
43
+ return false if self[i, j] != self[j, i]
44
+ end
45
+ end
46
+ true
47
+ end
48
+
49
+ #=Cholesky decomposition
50
+ #Reference: http://en.wikipedia.org/wiki/Cholesky_decomposition
51
+ #---
52
+ #==Description
53
+ #Cholesky decomposition is reprsented by `M = L X L*`, where
54
+ #M is the symmetric matrix and `L` is the lower half of cholesky matrix,
55
+ #and `L*` is the conjugate form of `L`.
56
+ #*Returns* : Cholesky decomposition for a given matrix(if symmetric)
57
+ #*Utility*: Essential matrix function, requisite in kalman filter, least squares
58
+ def cholesky
59
+ raise ArgumentError, "Given matrix should be symmetric" unless symmetric?
60
+ c = Matrix.zero(row_size)
61
+ 0.upto(row_size - 1).each do |k|
62
+ 0.upto(row_size - 1).each do |i|
63
+ if i == k
64
+ sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[k, j] ** 2 }
65
+ value = Math.sqrt(self[k,k] - sum)
66
+ c[k, k] = value
67
+ elsif i > k
68
+ sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[i, j] * c[k, j] }
69
+ value = (self[k,i] - sum) / c[k, k]
70
+ c[i, k] = value
71
+ end
72
+ end
73
+ end
74
+ c
75
+ end
76
+
77
+ #=Chain Product
78
+ #Class method
79
+ #Returns the chain product of two matrices
80
+ #==Usage:
81
+ #Let `a` be 4 * 3 matrix,
82
+ #Let `b` be 3 * 3 matrix,
83
+ #Let `c` be 3 * 1 matrix,
84
+ #then `Matrix.chain_dot(a, b, c)`
85
+ #===*NOTE*: Send the matrices in multiplicative order with proper dimensions
86
+ def self.chain_dot(*args)
87
+ #inspired by Statsmodels
88
+ begin
89
+ args.reduce { |x, y| x * y } #perform matrix multiplication in order
90
+ rescue ExceptionForMatrix::ErrDimensionMismatch
91
+ puts "ExceptionForMatrix: Please provide matrices with proper multiplicative dimensions"
92
+ end
93
+ end
94
+
95
+
96
+ #=Adds a column of constants.
97
+ #Appends a column of ones to the matrix/array if first argument is false
98
+ #If an n-array, first checks if one column of ones is already present
99
+ #if present, then original(self) is returned, else, prepends with a vector of ones
100
+ def add_constant(prepend = true)
101
+ #for Matrix
102
+ (0...column_size).each do |i|
103
+ if self.column(i).map(&:to_f) == Object::Vector.elements(Array.new(row_size, 1.0))
104
+ return self
105
+ end
106
+ end
107
+ #append/prepend a column of one's
108
+ vectors = (0...row_size).map do |r|
109
+ if prepend
110
+ [1.0].concat(self.row(r).to_a)
111
+ else
112
+ self.row(r).to_a.push(1.0)
113
+ end
114
+ end
115
+ return Matrix.rows(vectors)
116
+ end
117
+ end
118
+ end
@@ -11,10 +11,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
11
11
  ts.pacf
12
12
  end
13
13
  context("AR(1) simulations") do
14
- include Statsample::ARIMA
14
+ include Statsample
15
15
 
16
16
  setup do
17
- @series = ARIMA.new
17
+ @series = TimeSeries.arima
18
18
  @ar_1_positive = @series.ar_sim(1500, [0.9], 2)
19
19
  @ar_1_negative = @series.ar_sim(1500, [-0.9], 2)
20
20
 
@@ -73,10 +73,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
73
73
  end
74
74
 
75
75
  context("AR(p) simulations") do
76
- include Statsample::ARIMA
76
+ include Statsample
77
77
 
78
78
  setup do
79
- @series = ARIMA.new
79
+ @series = TimeSeries.arima
80
80
  @ar_p_positive = @series.ar_sim(1500, [0.3, 0.5], 2)
81
81
  @ar_p_negative = @series.ar_sim(1500, [-0.3, -0.5], 2)
82
82
  end
@@ -120,9 +120,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
120
120
 
121
121
 
122
122
  context("MA(1) simulations") do
123
- include Statsample::ARIMA
123
+ include Statsample
124
124
  setup do
125
- @series = ARIMA.new
125
+ @series = TimeSeries.arima
126
126
  @ma_positive = @series.ar_sim(1500, [0.5], 2)
127
127
  @ma_negative = @series.ar_sim(1500, [-0.5], 2)
128
128
  end
@@ -153,9 +153,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
153
153
  end
154
154
 
155
155
  context("MA(q) simulations") do
156
- include Statsample::ARIMA
156
+ include Statsample
157
157
  setup do
158
- @series = ARIMA.new
158
+ @series = TimeSeries.arima
159
159
  @ma_positive = @series.ar_sim(1500, [0.5, 0.3, 0.2], 2)
160
160
  @ma_negative = @series.ar_sim(1500, [-0.5], 2)
161
161
  end
@@ -172,5 +172,15 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
172
172
  #visualization: http://jsfiddle.net/7keHK/2/
173
173
  end
174
174
  end
175
+
176
+ context("Yule walker estimations") do
177
+ include Statsample
178
+
179
+ setup do
180
+ @timeseries = 100.times.map { rand }.to_ts
181
+ @arma_simulation =->(n) { @timeseries.ar(n, k)}
182
+ end
183
+ #to write test
184
+ end
175
185
  end
176
186
 
@@ -0,0 +1,92 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+ class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
3
+
4
+ def setup_square_matrix(arr, n)
5
+ #returns n * n matrix by slicing arr
6
+ return Matrix.rows(arr.each_slice(n).to_a)
7
+ end
8
+ def setup
9
+ @arr_square = (1..16)
10
+ @mat_non_symmetric = setup_square_matrix(@arr_square, 4)
11
+
12
+ @arr_non_square = (1..12).to_a
13
+ #this is a 4 X 3 matrix
14
+ @mat_non_square = Matrix.rows(@arr_non_square.each_slice(3).to_a)
15
+ end
16
+
17
+ #TESTS for matrix symmetricity - Matrix#symmetric?
18
+ context("symmetric?") do
19
+
20
+ should "return false for non-symmetric matrix" do
21
+ assert_equal @mat_non_symmetric.symmetric?, false
22
+ end
23
+
24
+ should "return false for non-square matrix" do
25
+ assert_equal @mat_non_square.symmetric?, false
26
+ end
27
+
28
+ should "return true for symmetrix matrix" do
29
+ arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
30
+ mat = setup_square_matrix(arr, 3)
31
+ assert_equal mat.symmetric?, true
32
+ end
33
+ end
34
+
35
+ #TESTS for cholesky decomposition - Matrix#cholesky
36
+ context("Cholesky Decomposition") do
37
+
38
+ should "raise error for non symmetric matrix" do
39
+ assert_raises(ArgumentError) { @mat_non_symmetric.cholesky }
40
+ end
41
+
42
+ should "raise raise error if non-square matix" do
43
+ arr = (1..12).to_a
44
+ mat = Matrix.rows(arr.each_slice(3).to_a)
45
+ assert_raises(ArgumentError) { @mat_non_square.cholesky }
46
+ end
47
+
48
+ should "give hermitian cholesky decomposed matrix for symmetrix matrix" do
49
+ arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
50
+ mat = setup_square_matrix(arr, 3)
51
+ assert_equal Matrix[[2.0, 0, 0], [6.0, 1.0, 0], [-8.0, 5.0, 2.0]], mat.cholesky
52
+ end
53
+ end
54
+
55
+ #TESTS for matrix squares of sum - Matrix#squares_of_sum
56
+ context("Squares of sum") do
57
+
58
+ should "return array of size 4 for matrix - #{@mat_non_symmetric}" do
59
+ #equal to column size
60
+ assert_equal @mat_non_symmetric.squares_of_sum.size, 4
61
+ end
62
+
63
+ should "return [784, 1024, 1296, 1600] for matrix - #{@mat_non_symmetric}" do
64
+ assert_equal @mat_non_symmetric.squares_of_sum, [784, 1024, 1296, 1600]
65
+ end
66
+ end
67
+
68
+ #TESTS for adding constants to matrix
69
+ context("Add constant") do
70
+
71
+ should "prepend all rows with ones" do
72
+ mat = @mat_non_symmetric.add_constant
73
+ assert_equal @mat_non_symmetric.column_size, 4
74
+ assert_equal mat.column_size, 5
75
+ assert_equal mat.column(0).to_a, [1.0, 1.0,1.0,1.0]
76
+ end
77
+
78
+ should "append all rows with ones if prepend = false" do
79
+ mat = @mat_non_symmetric.add_constant(false)
80
+ assert_equal @mat_non_symmetric.column_size, 4
81
+ assert_equal mat.column_size, 5
82
+ assert_equal mat.column(mat.column_size - 1).to_a, [1.0, 1.0,1.0,1.0]
83
+ end
84
+
85
+ should "not append/prepend if a column of ones already exists in matrix" do
86
+ matrix = Matrix[[1, 2, 1, 4], [5, 6, 1, 8], [9, 10, 1, 12]]
87
+ const_mat = matrix.add_constant
88
+ assert_equal matrix.column_size, const_mat.column_size
89
+ assert_equal matrix.row_size, const_mat.row_size
90
+ end
91
+ end
92
+ end
@@ -7,7 +7,7 @@ class StatsampleTestTimeSeries < MiniTest::Unit::TestCase
7
7
 
8
8
  def setup
9
9
  # daily closes of iShares XIU on the TSX
10
- @xiu = Statsample::TimeSeries::TimeSeries.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
10
+ @xiu = Statsample::TimeSeries::Series.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
11
11
  16.86, 16.86, 16.56, 16.36, 16.66, 16.77], :scale
12
12
  end
13
13
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-statsample-timeseries
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2013-07-26 00:00:00.000000000 Z
13
+ date: 2013-09-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: statsample
@@ -216,16 +216,17 @@ files:
216
216
  - features/support/env.rb
217
217
  - lib/bio-statsample-timeseries.rb
218
218
  - lib/bio-statsample-timeseries/arima.rb
219
- - lib/bio-statsample-timeseries/statsample-timeseries.rb
220
219
  - lib/bio-statsample-timeseries/timeseries.rb
221
220
  - lib/bio-statsample-timeseries/timeseries/pacf.rb
221
+ - lib/bio-statsample-timeseries/utility.rb
222
222
  - test/fixtures/stock_data.csv
223
223
  - test/helper.rb
224
224
  - test/test_arima_simulators.rb
225
+ - test/test_matrix.rb
225
226
  - test/test_pacf.rb
226
227
  - test/test_tseries.rb
227
228
  - test/test_wald.rb
228
- homepage: http://github.com/ankurgel/bioruby-statsample-timeseries
229
+ homepage: http://github.com/AnkurGel/bioruby-statsample-timeseries
229
230
  licenses:
230
231
  - MIT
231
232
  post_install_message:
@@ -240,7 +241,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
240
241
  version: '0'
241
242
  segments:
242
243
  - 0
243
- hash: 146794323
244
+ hash: -122253519
244
245
  required_rubygems_version: !ruby/object:Gem::Requirement
245
246
  none: false
246
247
  requirements: