statsample-timeseries 0.0.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.3
@@ -1,74 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # BioRuby bio-statsample-timeseries Plugin BioStatsampleTimeseries
4
- # Author:: Ankur Goel
5
- # Copyright:: 2013
6
-
7
- USAGE = "Describe bio-statsample-timeseries"
8
-
9
- if ARGV.size == 0
10
- print USAGE
11
- end
12
-
13
- require 'bio-statsample-timeseries'
14
- require 'optparse'
15
-
16
- # Uncomment when using the bio-logger
17
- # require 'bio-logger'
18
- # Bio::Log::CLI.logger('stderr')
19
- # Bio::Log::CLI.trace('info')
20
-
21
- options = {:example_switch=>false,:show_help=>false}
22
- opts = OptionParser.new do |o|
23
- o.banner = "Usage: #{File.basename($0)} [options] reponame\ne.g. #{File.basename($0)} the-perfect-gem"
24
-
25
- o.on('--example_parameter [EXAMPLE_PARAMETER]', 'TODO: put a description for the PARAMETER') do |example_parameter|
26
- # TODO: your logic here, below an example
27
- options[:example_parameter] = 'this is a parameter'
28
- end
29
-
30
- o.separator ""
31
- o.on("--switch-example", 'TODO: put a description for the SWITCH') do
32
- # TODO: your logic here, below an example
33
- self[:example_switch] = true
34
- end
35
-
36
- # Uncomment the following when using the bio-logger
37
- # o.separator ""
38
- # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
39
- # Bio::Log::CLI.logger(name)
40
- # end
41
- #
42
- # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
43
- # Bio::Log::CLI.trace(s)
44
- # end
45
- #
46
- # o.on("-q", "--quiet", "Run quietly") do |q|
47
- # Bio::Log::CLI.trace('error')
48
- # end
49
- #
50
- # o.on("-v", "--verbose", "Run verbosely") do |v|
51
- # Bio::Log::CLI.trace('info')
52
- # end
53
- #
54
- # o.on("--debug", "Show debug messages") do |v|
55
- # Bio::Log::CLI.trace('debug')
56
- # end
57
-
58
- o.separator ""
59
- o.on_tail('-h', '--help', 'display this help and exit') do
60
- options[:show_help] = true
61
- end
62
- end
63
-
64
- begin
65
- opts.parse!(ARGV)
66
-
67
- # Uncomment the following when using the bio-logger
68
- # Bio::Log::CLI.configure('bio-statsample-timeseries')
69
-
70
- # TODO: your code here
71
- # use options for your logic
72
- rescue OptionParser::InvalidOption => e
73
- options[:invalid_argument] = e.message
74
- end
@@ -1,31 +0,0 @@
1
- Feature: ACF
2
-
3
- As a statistician
4
- So that I can evaluate autocorrelation of a series
5
- I want to evaluate acf
6
-
7
- Background: a timeseries
8
-
9
- Given the following values in a timeseries:
10
- | timeseries |
11
- | 10 20 30 40 50 60 70 80 90 100 |
12
- | 110 120 130 140 150 160 170 180 190 200 |
13
-
14
- Scenario: cross-check acf for 10 lags
15
- When I provide 10 lags for acf
16
- And I calculate acf
17
- Then I should get 11 values in resultant acf
18
- And I should see "1.0, 0.85, 0.7015037593984963, 0.556015037593985, 0.4150375939849624, 0.2800751879699248, 0.15263157894736842, 0.034210526315789476, -0.07368421052631578, -0.16954887218045114, -0.2518796992481203" as complete series
19
-
20
- Scenario: cross-check acf for 5 lags
21
- When I provide 5 lags for acf
22
- And I calculate acf
23
- Then I should get 6 values in resultant acf
24
- And I should see "1.0, 0.85, 0.7015037593984963, 0.556015037593985, 0.4150375939849624, 0.2800751879699248" as complete series
25
-
26
- Scenario: first value should be 1.0
27
- When I provide 2 lags for acf
28
- And I calculate acf
29
- Then I should get 3 values in resultant acf
30
- And I should see 1.0 as first value
31
-
@@ -1,42 +0,0 @@
1
- Feature: PACF
2
-
3
- As a statistician
4
- So that I can quickly evaluate partial autocorrelation of a series
5
- I want to evaluate pacf
6
-
7
- Background: a timeseries
8
-
9
- Given the following values in a timeseries:
10
- | timeseries |
11
- | 10 20 30 40 50 60 70 80 90 100 |
12
- | 110 120 130 140 150 160 170 180 190 200 |
13
-
14
- Scenario: check pacf for 10 lags with unbiased
15
- When I provide 10 lags for pacf
16
- When I provide yw yule walker as method
17
- Then I should get Array as resultant output
18
- Then I should get 11 values in resultant pacf
19
-
20
- Scenario: check pacf for 5 lags with mle
21
- When I provide 5 lags for pacf
22
- When I provide mle yule walker as method
23
- Then I should get Array as resultant output
24
- Then I should get 6 values in resultant pacf
25
-
26
- Scenario: check first value of pacf
27
- When I provide 5 lags for pacf
28
- When I provide yw yule walker as method
29
- Then I should get Array as resultant output
30
- And I should see 1.0 as first value
31
-
32
- Scenario: check all values in pacf for 5 lags with mle
33
- When I provide 5 lags for pacf
34
- When I provide mle yule walker as method
35
- Then I should get Array as resultant output
36
- And I should see "1.0, 0.85, -0.07566212829370711, -0.07635069706072706, -0.07698628638512295, -0.07747034005560738" as complete series
37
-
38
- Scenario: check all values in pacf for 5 lags with unbiased
39
- When I provide 5 lags for pacf
40
- When I provide yw yule walker as method
41
- Then I should get Array as resultant output
42
- And I should see "1.0, 0.8947368421052632, -0.10582010582010604, -0.11350188273265083, -0.12357534824820737, -0.13686534216335522" as complete series
@@ -1,37 +0,0 @@
1
- require 'statsample-timeseries'
2
- include Statsample::TimeSeries
3
-
4
- Given /^the following values in a timeseries:$/ do |series|
5
- arr = []
6
- series.hashes.each do |sequence|
7
- arr += sequence['timeseries'].split(' ').map(&:to_i).to_ts
8
- end
9
- @timeseries = arr.to_ts
10
- end
11
-
12
- When /^I provide (\d+) lags for p?acf$/ do |lags|
13
- @lags = lags.to_i
14
- end
15
-
16
- When /^I provide (\w+) yule walker as method$/ do |method|
17
- @method = method
18
- end
19
-
20
- Then /^I should get (\w+) as resultant output$/ do |klass|
21
- @result = @timeseries.pacf(@lags, @method)
22
- assert_equal @result.class.to_s, klass
23
- end
24
-
25
- Then /^I should get (\w+) values in resultant p?acf$/ do |values_count|
26
- assert_equal @result.size, values_count.to_i
27
- end
28
-
29
- And /^I should see (\d+\.\d) as first value$/ do |first_value|
30
- assert_equal @result.first, first_value.to_f
31
- end
32
-
33
- And /^I should see \"(.+)\" as complete series$/ do |series|
34
- series = series.split(',').map(&:to_f)
35
- assert_equal @result, series
36
- end
37
-
@@ -1,8 +0,0 @@
1
- require 'statsample-timeseries'
2
- include Statsample::TimeSeries
3
-
4
- #all instance variable and cucumber DSL s DRYed up in step_definitions.rb
5
- And /^I calculate acf$/ do
6
- @result = @timeseries.acf(@lags)
7
- end
8
-
@@ -1,15 +0,0 @@
1
- require 'bundler'
2
- begin
3
- Bundler.setup(:default, :development)
4
- rescue Bundler::BundlerError => e
5
- $stderr.puts e.message
6
- $stderr.puts "Run `bundle install` to install missing gems"
7
- exit e.status_code
8
- end
9
-
10
- $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
11
- require 'statsample-timeseries'
12
-
13
- require 'test/unit/assertions'
14
-
15
- World(Test::Unit::Assertions)
@@ -1,291 +0,0 @@
1
- require 'statsample-timeseries/timeseries/pacf'
2
- module Statsample::TimeSeriesShorthands
3
- # Creates a new Statsample::TimeSeries object
4
- # Argument should be equal to TimeSeries.new
5
- def to_time_series(*args)
6
- Statsample::TimeSeries::Series.new(self, :scale, *args)
7
- end
8
-
9
- alias :to_ts :to_time_series
10
- end
11
-
12
- class Array
13
- include Statsample::TimeSeriesShorthands
14
- end
15
-
16
- module Statsample
17
- module TimeSeries
18
- # Collection of data indexed by time.
19
- # The order goes from earliest to latest.
20
- class Series < Statsample::Vector
21
- include Statsample::TimeSeries::Pacf
22
- # Calculates the autocorrelation coefficients of the series.
23
- #
24
- # The first element is always 1, since that is the correlation
25
- # of the series with itself.
26
- #
27
- # Usage:
28
- #
29
- # ts = (1..100).map { rand }.to_time_series
30
- #
31
- # ts.acf # => array with first 21 autocorrelations
32
- # ts.acf 3 # => array with first 3 autocorrelations
33
- #
34
- def acf(max_lags = nil)
35
- max_lags ||= (10 * Math.log10(size)).to_i
36
-
37
- (0..max_lags).map do |i|
38
- if i == 0
39
- 1.0
40
- else
41
- m = self.mean
42
-
43
- # can't use Pearson coefficient since the mean for the lagged series should
44
- # be the same as the regular series
45
- ((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
46
- end
47
- end
48
- end
49
-
50
- #=Partial Autocorrelation
51
- #Generates partial autocorrelation series for a timeseries
52
- #==Parameters
53
- #* *max_lags*: integer, optional - provide number of lags
54
- #* *method*: string. Default: 'yw'.
55
- # * *yw*: For yule-walker algorithm unbiased approach
56
- # * *mle*: For Maximum likelihood algorithm approach
57
- # * *ld*: Forr Levinson-Durbin recursive approach
58
- #==Returns
59
- # array of pacf
60
- def pacf(max_lags = nil, method = :yw)
61
-
62
- method = method.downcase.to_sym
63
- max_lags ||= (10 * Math.log10(size)).to_i
64
- if method.eql? :yw or method.eql? :mle
65
- Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
66
- elsif method == :ld
67
- series = self.acvf
68
- Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
69
- else
70
- raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
71
- end
72
- end
73
-
74
- #=Autoregressive estimation
75
- #Generates AR(k) series for the calling timeseries by yule walker.
76
- #==Parameters
77
- #* *n*: integer, (default = 1500) number of observations for AR.
78
- #* *k*: integer, (default = 1) order of AR process.
79
- #==Returns
80
- #Array constituting estimated AR series.
81
- def ar(n = 1500, k = 1)
82
- series = Statsample::TimeSeries.arima
83
- #series = Statsample::TimeSeries::ARIMA.new
84
- series.yule_walker(self, n, k)
85
- end
86
-
87
- #=AutoCovariance
88
- #Provides autocovariance of timeseries.
89
- #==Parameters
90
- #* *demean* = true; optional. Supply false if series is not to be demeaned
91
- #* *unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
92
- #==Returns
93
- # Autocovariance value
94
- def acvf(demean = true, unbiased = true)
95
- #TODO: change parameters list in opts.merge as suggested by John
96
- #functionality: computes autocovariance of timeseries data
97
- #returns: array of autocovariances
98
-
99
- if demean
100
- demeaned_series = self - self.mean
101
- else
102
- demeaned_series = self
103
- end
104
- n = (10 * Math.log10(size)).to_i + 1
105
- m = self.mean
106
- if unbiased
107
- d = Array.new(self.size, self.size)
108
- else
109
- d = ((1..self.size).to_a.reverse)[0..n]
110
- end
111
-
112
-
113
- 0.upto(n - 1).map do |i|
114
- (demeaned_series * (self.lag(i) - m)).sum / d[i]
115
- end
116
- end
117
-
118
- #=Correlation
119
- #Gives correlation of timeseries.
120
- def correlate(a, v, mode = 'full')
121
- #peforms cross-correlation of two series
122
- #multiarray.correlate2(a, v, 'full')
123
- if a.size < v.size
124
- raise("Should have same size!")
125
- end
126
- ps = a.size + v.size - 1
127
- a_padded = Array.new(ps, 0)
128
- a_padded[0...a.size] = a
129
-
130
- out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
131
- #ongoing
132
- end
133
-
134
- # Lags the series by k periods.
135
- #
136
- # The convention is to set the oldest observations (the first ones
137
- # in the series) to nil so that the size of the lagged series is the
138
- # same as the original.
139
- #
140
- # Usage:
141
- #
142
- # ts = (1..10).map { rand }.to_time_series
143
- # # => [0.69, 0.23, 0.44, 0.71, ...]
144
- #
145
- # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
146
- # ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
147
- #
148
- def lag(k = 1)
149
- return self if k == 0
150
-
151
- dup.tap do |lagged|
152
- (lagged.size - 1).downto k do |i|
153
- lagged[i] = lagged[i - k]
154
- end
155
-
156
- (0...k).each do |i|
157
- lagged[i] = nil
158
- end
159
- lagged.set_valid_data
160
- end
161
- end
162
-
163
- #=Diff
164
- # Performs the difference of the series.
165
- # Note: The first difference of series is X(t) - X(t-1)
166
- # But, second difference of series is NOT X(t) - X(t-2)
167
- # It is the first difference of the first difference
168
- # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
169
- #==Params
170
- #* *max_lags*: integer, (default: 1), number of differences reqd.
171
- #==Usage
172
- #
173
- # ts = (1..10).map { rand }.to_ts
174
- # # => [0.69, 0.23, 0.44, 0.71, ...]
175
- #
176
- # ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
177
- #==Returns
178
- # Timeseries object
179
- def diff(max_lags = 1)
180
- ts = self
181
- difference = []
182
- max_lags.times do
183
- difference = ts - ts.lag
184
- ts = difference
185
- end
186
- difference
187
- end
188
-
189
- #=Moving Average
190
- # Calculates the moving average of the series using the provided
191
- # lookback argument. The lookback defaults to 10 periods.
192
- #==Parameters
193
- #* *n*: integer, (default = 10) - loopback argument
194
- #
195
- #==Usage
196
- #
197
- # ts = (1..100).map { rand }.to_ts
198
- # # => [0.69, 0.23, 0.44, 0.71, ...]
199
- #
200
- # # first 9 observations are nil
201
- # ts.ma # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
202
- #
203
- #==Returns
204
- #Resulting moving average timeseries object
205
- def ma(n = 10)
206
- return mean if n >= size
207
-
208
- ([nil] * (n - 1) + (0..(size - n)).map do |i|
209
- self[i...(i + n)].inject(&:+) / n
210
- end).to_time_series
211
- end
212
-
213
- #=Exponential Moving Average
214
- # Calculates an exponential moving average of the series using a
215
- # specified parameter. If wilder is false (the default) then the EMA
216
- # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
217
- # Welles Wilder smoother of 1 / n.
218
- #
219
- # Warning for EMA usage: EMAs are unstable for small series, as they
220
- # use a lot more than n observations to calculate. The series is stable
221
- # if the size of the series is >= 3.45 * (n + 1)
222
- #
223
- #==Parameters
224
- #* *n*: integer, (default = 10)
225
- #* *wilder*: boolean, (default = false), if true, 1/n value is used for smoothing; if false, uses 2/(n+1) value
226
- #
227
- #==Usage
228
- # ts = (1..100).map { rand }.to_ts
229
- # # => [0.69, 0.23, 0.44, 0.71, ...]
230
- #
231
- # # first 9 observations are nil
232
- # ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
233
- #
234
- #==Returns
235
- #EMA timeseries
236
- def ema(n = 10, wilder = false)
237
- smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
238
-
239
- # need to start everything from the first non-nil observation
240
- start = self.data.index { |i| i != nil }
241
-
242
- # first n - 1 observations are nil
243
- base = [nil] * (start + n - 1)
244
-
245
- # nth observation is just a moving average
246
- base << self[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
247
-
248
- (start + n).upto size - 1 do |i|
249
- base << self[i] * smoother + (1 - smoother) * base.last
250
- end
251
-
252
- base.to_time_series
253
- end
254
-
255
- #=Moving Average Convergence-Divergence
256
- # Calculates the MACD (moving average convergence-divergence) of the time
257
- # series - this is a comparison of a fast EMA with a slow EMA.
258
- #
259
- #==Parameters*:
260
- #* *fast*: integer, (default = 12) - fast component of MACD
261
- #* *slow*: integer, (default = 26) - slow component of MACD
262
- #* *signal*: integer, (default = 9) - signal component of MACD
263
- #
264
- #==Usage
265
- # ts = (1..100).map { rand }.to_ts
266
- # # => [0.69, 0.23, 0.44, 0.71, ...]
267
- # ts.macd(13)
268
- #
269
- #==Returns
270
- # Array of two timeseries - comparison of fast EMA with slow and EMA with signal value
271
- def macd(fast = 12, slow = 26, signal = 9)
272
- series = ema(fast) - ema(slow)
273
- [series, series.ema(signal)]
274
- end
275
-
276
- # Borrow the operations from Vector, but convert to time series
277
- def + series
278
- super.to_a.to_ts
279
- end
280
-
281
- def - series
282
- super.to_a.to_ts
283
- end
284
-
285
- def to_s
286
- sprintf("Time Series(type:%s, n:%d)[%s]", @type.to_s, @data.size,
287
- @data.collect{|d| d.nil? ? "nil":d}.join(","))
288
- end
289
- end
290
- end
291
- end