statsample-timeseries 0.0.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.3
@@ -1,74 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # BioRuby bio-statsample-timeseries Plugin BioStatsampleTimeseries
4
- # Author:: Ankur Goel
5
- # Copyright:: 2013
6
-
7
- USAGE = "Describe bio-statsample-timeseries"
8
-
9
- if ARGV.size == 0
10
- print USAGE
11
- end
12
-
13
- require 'bio-statsample-timeseries'
14
- require 'optparse'
15
-
16
- # Uncomment when using the bio-logger
17
- # require 'bio-logger'
18
- # Bio::Log::CLI.logger('stderr')
19
- # Bio::Log::CLI.trace('info')
20
-
21
- options = {:example_switch=>false,:show_help=>false}
22
- opts = OptionParser.new do |o|
23
- o.banner = "Usage: #{File.basename($0)} [options] reponame\ne.g. #{File.basename($0)} the-perfect-gem"
24
-
25
- o.on('--example_parameter [EXAMPLE_PARAMETER]', 'TODO: put a description for the PARAMETER') do |example_parameter|
26
- # TODO: your logic here, below an example
27
- options[:example_parameter] = 'this is a parameter'
28
- end
29
-
30
- o.separator ""
31
- o.on("--switch-example", 'TODO: put a description for the SWITCH') do
32
- # TODO: your logic here, below an example
33
- self[:example_switch] = true
34
- end
35
-
36
- # Uncomment the following when using the bio-logger
37
- # o.separator ""
38
- # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
39
- # Bio::Log::CLI.logger(name)
40
- # end
41
- #
42
- # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
43
- # Bio::Log::CLI.trace(s)
44
- # end
45
- #
46
- # o.on("-q", "--quiet", "Run quietly") do |q|
47
- # Bio::Log::CLI.trace('error')
48
- # end
49
- #
50
- # o.on("-v", "--verbose", "Run verbosely") do |v|
51
- # Bio::Log::CLI.trace('info')
52
- # end
53
- #
54
- # o.on("--debug", "Show debug messages") do |v|
55
- # Bio::Log::CLI.trace('debug')
56
- # end
57
-
58
- o.separator ""
59
- o.on_tail('-h', '--help', 'display this help and exit') do
60
- options[:show_help] = true
61
- end
62
- end
63
-
64
- begin
65
- opts.parse!(ARGV)
66
-
67
- # Uncomment the following when using the bio-logger
68
- # Bio::Log::CLI.configure('bio-statsample-timeseries')
69
-
70
- # TODO: your code here
71
- # use options for your logic
72
- rescue OptionParser::InvalidOption => e
73
- options[:invalid_argument] = e.message
74
- end
@@ -1,31 +0,0 @@
1
- Feature: ACF
2
-
3
- As a statistician
4
- So that I can evaluate autocorrelation of a series
5
- I want to evaluate acf
6
-
7
- Background: a timeseries
8
-
9
- Given the following values in a timeseries:
10
- | timeseries |
11
- | 10 20 30 40 50 60 70 80 90 100 |
12
- | 110 120 130 140 150 160 170 180 190 200 |
13
-
14
- Scenario: cross-check acf for 10 lags
15
- When I provide 10 lags for acf
16
- And I calculate acf
17
- Then I should get 11 values in resultant acf
18
- And I should see "1.0, 0.85, 0.7015037593984963, 0.556015037593985, 0.4150375939849624, 0.2800751879699248, 0.15263157894736842, 0.034210526315789476, -0.07368421052631578, -0.16954887218045114, -0.2518796992481203" as complete series
19
-
20
- Scenario: cross-check acf for 5 lags
21
- When I provide 5 lags for acf
22
- And I calculate acf
23
- Then I should get 6 values in resultant acf
24
- And I should see "1.0, 0.85, 0.7015037593984963, 0.556015037593985, 0.4150375939849624, 0.2800751879699248" as complete series
25
-
26
- Scenario: first value should be 1.0
27
- When I provide 2 lags for acf
28
- And I calculate acf
29
- Then I should get 3 values in resultant acf
30
- And I should see 1.0 as first value
31
-
@@ -1,42 +0,0 @@
1
- Feature: PACF
2
-
3
- As a statistician
4
- So that I can quickly evaluate partial autocorrelation of a series
5
- I want to evaluate pacf
6
-
7
- Background: a timeseries
8
-
9
- Given the following values in a timeseries:
10
- | timeseries |
11
- | 10 20 30 40 50 60 70 80 90 100 |
12
- | 110 120 130 140 150 160 170 180 190 200 |
13
-
14
- Scenario: check pacf for 10 lags with unbiased
15
- When I provide 10 lags for pacf
16
- When I provide yw yule walker as method
17
- Then I should get Array as resultant output
18
- Then I should get 11 values in resultant pacf
19
-
20
- Scenario: check pacf for 5 lags with mle
21
- When I provide 5 lags for pacf
22
- When I provide mle yule walker as method
23
- Then I should get Array as resultant output
24
- Then I should get 6 values in resultant pacf
25
-
26
- Scenario: check first value of pacf
27
- When I provide 5 lags for pacf
28
- When I provide yw yule walker as method
29
- Then I should get Array as resultant output
30
- And I should see 1.0 as first value
31
-
32
- Scenario: check all values in pacf for 5 lags with mle
33
- When I provide 5 lags for pacf
34
- When I provide mle yule walker as method
35
- Then I should get Array as resultant output
36
- And I should see "1.0, 0.85, -0.07566212829370711, -0.07635069706072706, -0.07698628638512295, -0.07747034005560738" as complete series
37
-
38
- Scenario: check all values in pacf for 5 lags with unbiased
39
- When I provide 5 lags for pacf
40
- When I provide yw yule walker as method
41
- Then I should get Array as resultant output
42
- And I should see "1.0, 0.8947368421052632, -0.10582010582010604, -0.11350188273265083, -0.12357534824820737, -0.13686534216335522" as complete series
@@ -1,37 +0,0 @@
1
- require 'statsample-timeseries'
2
- include Statsample::TimeSeries
3
-
4
- Given /^the following values in a timeseries:$/ do |series|
5
- arr = []
6
- series.hashes.each do |sequence|
7
- arr += sequence['timeseries'].split(' ').map(&:to_i).to_ts
8
- end
9
- @timeseries = arr.to_ts
10
- end
11
-
12
- When /^I provide (\d+) lags for p?acf$/ do |lags|
13
- @lags = lags.to_i
14
- end
15
-
16
- When /^I provide (\w+) yule walker as method$/ do |method|
17
- @method = method
18
- end
19
-
20
- Then /^I should get (\w+) as resultant output$/ do |klass|
21
- @result = @timeseries.pacf(@lags, @method)
22
- assert_equal @result.class.to_s, klass
23
- end
24
-
25
- Then /^I should get (\w+) values in resultant p?acf$/ do |values_count|
26
- assert_equal @result.size, values_count.to_i
27
- end
28
-
29
- And /^I should see (\d+\.\d) as first value$/ do |first_value|
30
- assert_equal @result.first, first_value.to_f
31
- end
32
-
33
- And /^I should see \"(.+)\" as complete series$/ do |series|
34
- series = series.split(',').map(&:to_f)
35
- assert_equal @result, series
36
- end
37
-
@@ -1,8 +0,0 @@
1
- require 'statsample-timeseries'
2
- include Statsample::TimeSeries
3
-
4
- #all instance variable and cucumber DSL s DRYed up in step_definitions.rb
5
- And /^I calculate acf$/ do
6
- @result = @timeseries.acf(@lags)
7
- end
8
-
@@ -1,15 +0,0 @@
1
- require 'bundler'
2
- begin
3
- Bundler.setup(:default, :development)
4
- rescue Bundler::BundlerError => e
5
- $stderr.puts e.message
6
- $stderr.puts "Run `bundle install` to install missing gems"
7
- exit e.status_code
8
- end
9
-
10
- $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
11
- require 'statsample-timeseries'
12
-
13
- require 'test/unit/assertions'
14
-
15
- World(Test::Unit::Assertions)
@@ -1,291 +0,0 @@
1
- require 'statsample-timeseries/timeseries/pacf'
2
- module Statsample::TimeSeriesShorthands
3
- # Creates a new Statsample::TimeSeries object
4
- # Argument should be equal to TimeSeries.new
5
- def to_time_series(*args)
6
- Statsample::TimeSeries::Series.new(self, :scale, *args)
7
- end
8
-
9
- alias :to_ts :to_time_series
10
- end
11
-
12
- class Array
13
- include Statsample::TimeSeriesShorthands
14
- end
15
-
16
- module Statsample
17
- module TimeSeries
18
- # Collection of data indexed by time.
19
- # The order goes from earliest to latest.
20
- class Series < Statsample::Vector
21
- include Statsample::TimeSeries::Pacf
22
- # Calculates the autocorrelation coefficients of the series.
23
- #
24
- # The first element is always 1, since that is the correlation
25
- # of the series with itself.
26
- #
27
- # Usage:
28
- #
29
- # ts = (1..100).map { rand }.to_time_series
30
- #
31
- # ts.acf # => array with first 21 autocorrelations
32
- # ts.acf 3 # => array with first 3 autocorrelations
33
- #
34
- def acf(max_lags = nil)
35
- max_lags ||= (10 * Math.log10(size)).to_i
36
-
37
- (0..max_lags).map do |i|
38
- if i == 0
39
- 1.0
40
- else
41
- m = self.mean
42
-
43
- # can't use Pearson coefficient since the mean for the lagged series should
44
- # be the same as the regular series
45
- ((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
46
- end
47
- end
48
- end
49
-
50
- #=Partial Autocorrelation
51
- #Generates partial autocorrelation series for a timeseries
52
- #==Parameters
53
- #* *max_lags*: integer, optional - provide number of lags
54
- #* *method*: string. Default: 'yw'.
55
- # * *yw*: For yule-walker algorithm unbiased approach
56
- # * *mle*: For Maximum likelihood algorithm approach
57
- # * *ld*: Forr Levinson-Durbin recursive approach
58
- #==Returns
59
- # array of pacf
60
- def pacf(max_lags = nil, method = :yw)
61
-
62
- method = method.downcase.to_sym
63
- max_lags ||= (10 * Math.log10(size)).to_i
64
- if method.eql? :yw or method.eql? :mle
65
- Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
66
- elsif method == :ld
67
- series = self.acvf
68
- Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
69
- else
70
- raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
71
- end
72
- end
73
-
74
- #=Autoregressive estimation
75
- #Generates AR(k) series for the calling timeseries by yule walker.
76
- #==Parameters
77
- #* *n*: integer, (default = 1500) number of observations for AR.
78
- #* *k*: integer, (default = 1) order of AR process.
79
- #==Returns
80
- #Array constituting estimated AR series.
81
- def ar(n = 1500, k = 1)
82
- series = Statsample::TimeSeries.arima
83
- #series = Statsample::TimeSeries::ARIMA.new
84
- series.yule_walker(self, n, k)
85
- end
86
-
87
- #=AutoCovariance
88
- #Provides autocovariance of timeseries.
89
- #==Parameters
90
- #* *demean* = true; optional. Supply false if series is not to be demeaned
91
- #* *unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
92
- #==Returns
93
- # Autocovariance value
94
- def acvf(demean = true, unbiased = true)
95
- #TODO: change parameters list in opts.merge as suggested by John
96
- #functionality: computes autocovariance of timeseries data
97
- #returns: array of autocovariances
98
-
99
- if demean
100
- demeaned_series = self - self.mean
101
- else
102
- demeaned_series = self
103
- end
104
- n = (10 * Math.log10(size)).to_i + 1
105
- m = self.mean
106
- if unbiased
107
- d = Array.new(self.size, self.size)
108
- else
109
- d = ((1..self.size).to_a.reverse)[0..n]
110
- end
111
-
112
-
113
- 0.upto(n - 1).map do |i|
114
- (demeaned_series * (self.lag(i) - m)).sum / d[i]
115
- end
116
- end
117
-
118
- #=Correlation
119
- #Gives correlation of timeseries.
120
- def correlate(a, v, mode = 'full')
121
- #peforms cross-correlation of two series
122
- #multiarray.correlate2(a, v, 'full')
123
- if a.size < v.size
124
- raise("Should have same size!")
125
- end
126
- ps = a.size + v.size - 1
127
- a_padded = Array.new(ps, 0)
128
- a_padded[0...a.size] = a
129
-
130
- out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
131
- #ongoing
132
- end
133
-
134
- # Lags the series by k periods.
135
- #
136
- # The convention is to set the oldest observations (the first ones
137
- # in the series) to nil so that the size of the lagged series is the
138
- # same as the original.
139
- #
140
- # Usage:
141
- #
142
- # ts = (1..10).map { rand }.to_time_series
143
- # # => [0.69, 0.23, 0.44, 0.71, ...]
144
- #
145
- # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
146
- # ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
147
- #
148
- def lag(k = 1)
149
- return self if k == 0
150
-
151
- dup.tap do |lagged|
152
- (lagged.size - 1).downto k do |i|
153
- lagged[i] = lagged[i - k]
154
- end
155
-
156
- (0...k).each do |i|
157
- lagged[i] = nil
158
- end
159
- lagged.set_valid_data
160
- end
161
- end
162
-
163
- #=Diff
164
- # Performs the difference of the series.
165
- # Note: The first difference of series is X(t) - X(t-1)
166
- # But, second difference of series is NOT X(t) - X(t-2)
167
- # It is the first difference of the first difference
168
- # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
169
- #==Params
170
- #* *max_lags*: integer, (default: 1), number of differences reqd.
171
- #==Usage
172
- #
173
- # ts = (1..10).map { rand }.to_ts
174
- # # => [0.69, 0.23, 0.44, 0.71, ...]
175
- #
176
- # ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
177
- #==Returns
178
- # Timeseries object
179
- def diff(max_lags = 1)
180
- ts = self
181
- difference = []
182
- max_lags.times do
183
- difference = ts - ts.lag
184
- ts = difference
185
- end
186
- difference
187
- end
188
-
189
- #=Moving Average
190
- # Calculates the moving average of the series using the provided
191
- # lookback argument. The lookback defaults to 10 periods.
192
- #==Parameters
193
- #* *n*: integer, (default = 10) - loopback argument
194
- #
195
- #==Usage
196
- #
197
- # ts = (1..100).map { rand }.to_ts
198
- # # => [0.69, 0.23, 0.44, 0.71, ...]
199
- #
200
- # # first 9 observations are nil
201
- # ts.ma # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
202
- #
203
- #==Returns
204
- #Resulting moving average timeseries object
205
- def ma(n = 10)
206
- return mean if n >= size
207
-
208
- ([nil] * (n - 1) + (0..(size - n)).map do |i|
209
- self[i...(i + n)].inject(&:+) / n
210
- end).to_time_series
211
- end
212
-
213
- #=Exponential Moving Average
214
- # Calculates an exponential moving average of the series using a
215
- # specified parameter. If wilder is false (the default) then the EMA
216
- # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
217
- # Welles Wilder smoother of 1 / n.
218
- #
219
- # Warning for EMA usage: EMAs are unstable for small series, as they
220
- # use a lot more than n observations to calculate. The series is stable
221
- # if the size of the series is >= 3.45 * (n + 1)
222
- #
223
- #==Parameters
224
- #* *n*: integer, (default = 10)
225
- #* *wilder*: boolean, (default = false), if true, 1/n value is used for smoothing; if false, uses 2/(n+1) value
226
- #
227
- #==Usage
228
- # ts = (1..100).map { rand }.to_ts
229
- # # => [0.69, 0.23, 0.44, 0.71, ...]
230
- #
231
- # # first 9 observations are nil
232
- # ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
233
- #
234
- #==Returns
235
- #EMA timeseries
236
- def ema(n = 10, wilder = false)
237
- smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
238
-
239
- # need to start everything from the first non-nil observation
240
- start = self.data.index { |i| i != nil }
241
-
242
- # first n - 1 observations are nil
243
- base = [nil] * (start + n - 1)
244
-
245
- # nth observation is just a moving average
246
- base << self[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
247
-
248
- (start + n).upto size - 1 do |i|
249
- base << self[i] * smoother + (1 - smoother) * base.last
250
- end
251
-
252
- base.to_time_series
253
- end
254
-
255
- #=Moving Average Convergence-Divergence
256
- # Calculates the MACD (moving average convergence-divergence) of the time
257
- # series - this is a comparison of a fast EMA with a slow EMA.
258
- #
259
- #==Parameters*:
260
- #* *fast*: integer, (default = 12) - fast component of MACD
261
- #* *slow*: integer, (default = 26) - slow component of MACD
262
- #* *signal*: integer, (default = 9) - signal component of MACD
263
- #
264
- #==Usage
265
- # ts = (1..100).map { rand }.to_ts
266
- # # => [0.69, 0.23, 0.44, 0.71, ...]
267
- # ts.macd(13)
268
- #
269
- #==Returns
270
- # Array of two timeseries - comparison of fast EMA with slow and EMA with signal value
271
- def macd(fast = 12, slow = 26, signal = 9)
272
- series = ema(fast) - ema(slow)
273
- [series, series.ema(signal)]
274
- end
275
-
276
- # Borrow the operations from Vector, but convert to time series
277
- def + series
278
- super.to_a.to_ts
279
- end
280
-
281
- def - series
282
- super.to_a.to_ts
283
- end
284
-
285
- def to_s
286
- sprintf("Time Series(type:%s, n:%d)[%s]", @type.to_s, @data.size,
287
- @data.collect{|d| d.nil? ? "nil":d}.join(","))
288
- end
289
- end
290
- end
291
- end