RubyGems - statsample-timeseries - Versions diffs - 0.0.3 → 0.3.0 - Mend

statsample-timeseries 0.0.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +7 -0
data/.gitignore +19 -0
data/.travis.yml +13 -10
data/Gemfile +2 -21
data/History.md +4 -0
data/LICENSE.txt +1 -1
data/README.md +62 -0
data/Rakefile +12 -17
data/lib/statsample-timeseries.rb +3 -13
data/lib/statsample-timeseries/arima.rb +72 -74
data/lib/statsample-timeseries/arima/kalman.rb +20 -40
data/lib/statsample-timeseries/arima/likelihood.rb +3 -4
data/lib/statsample-timeseries/daru_monkeys.rb +78 -0
data/lib/statsample-timeseries/timeseries/pacf.rb +47 -38
data/lib/statsample-timeseries/utility.rb +105 -133
data/lib/statsample-timeseries/version.rb +5 -0
data/statsample-timeseries.gemspec +31 -0
data/test/helper.rb +6 -29
data/test/test_acf.rb +41 -0
data/test/test_arima_ks.rb +28 -12
data/test/test_arima_simulators.rb +59 -42
data/test/test_matrix.rb +1 -1
data/test/test_pacf.rb +7 -2
data/test/test_wald.rb +7 -3
metadata +81 -132
data/README.rdoc +0 -72
data/VERSION +0 -1
data/bin/bio-statsample-timeseries +0 -74
data/features/acf.feature +0 -31
data/features/pacf.feature +0 -42
data/features/step_definitions/bio-statsample-timeseries_steps.rb +0 -0
data/features/step_definitions/step_definitions.rb +0 -37
data/features/step_definitions/step_definitions_acf.rb +0 -8
data/features/support/env.rb +0 -15
data/lib/statsample-timeseries/timeseries.rb +0 -291
data/test/test_tseries.rb +0 -103

data/VERSION DELETED

	@@ -1 +0,0 @@
1	- 0.0.3

data/bin/bio-statsample-timeseries DELETED

@@ -1,74 +0,0 @@
-#!/usr/bin/env ruby
-#
-# BioRuby bio-statsample-timeseries Plugin BioStatsampleTimeseries
-# Author:: Ankur Goel
-# Copyright:: 2013
- USAGE = "Describe bio-statsample-timeseries"
-if ARGV.size == 0
-  print USAGE
-end
-require 'bio-statsample-timeseries'
-require 'optparse'
-# Uncomment when using the bio-logger
-# require 'bio-logger'
-# Bio::Log::CLI.logger('stderr')
-# Bio::Log::CLI.trace('info')
-options = {:example_switch=>false,:show_help=>false}
-opts = OptionParser.new do |o|
-  o.banner = "Usage: #{File.basename($0)} [options] reponame\ne.g. #{File.basename($0)} the-perfect-gem"
-  o.on('--example_parameter [EXAMPLE_PARAMETER]', 'TODO: put a description for the PARAMETER') do |example_parameter|
-    # TODO: your logic here, below an example
-    options[:example_parameter] = 'this is a parameter'
-  end
-  o.separator ""
-  o.on("--switch-example", 'TODO: put a description for the SWITCH') do
-    # TODO: your logic here, below an example
-    self[:example_switch] = true
-  end
-  # Uncomment the following when using the bio-logger
-  # o.separator ""
-  # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
-  #   Bio::Log::CLI.logger(name)
-  # end
-  #
-  # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
-  #   Bio::Log::CLI.trace(s)
-  # end
-  #
-  # o.on("-q", "--quiet", "Run quietly") do |q|
-  #   Bio::Log::CLI.trace('error')
-  # end
-  #
-  # o.on("-v", "--verbose", "Run verbosely") do |v|
-  #   Bio::Log::CLI.trace('info')
-  # end
-  #
-  # o.on("--debug", "Show debug messages") do |v|
-  #   Bio::Log::CLI.trace('debug')
-  # end
-  o.separator ""
-  o.on_tail('-h', '--help', 'display this help and exit') do
-    options[:show_help] = true
-  end
-end
-begin
-  opts.parse!(ARGV)
-  # Uncomment the following when using the bio-logger
-  # Bio::Log::CLI.configure('bio-statsample-timeseries')
-  # TODO: your code here
-  # use options for your logic
-rescue OptionParser::InvalidOption => e
-  options[:invalid_argument] = e.message
-end

data/features/acf.feature DELETED

@@ -1,31 +0,0 @@
-Feature: ACF
-  As a statistician
-  So that I can evaluate autocorrelation of a series
-  I want to evaluate acf
-Background: a timeseries
-  Given the following values in a timeseries:
-    | timeseries |
-    | 10  20  30  40  50  60  70  80  90  100 |
-    | 110 120 130 140 150 160 170 180 190 200 |
-Scenario: cross-check acf for 10 lags
-  When I provide 10 lags for acf
-  And I calculate acf
-  Then I should get 11 values in resultant acf
-  And I should see "1.0, 0.85, 0.7015037593984963, 0.556015037593985, 0.4150375939849624, 0.2800751879699248, 0.15263157894736842, 0.034210526315789476, -0.07368421052631578, -0.16954887218045114, -0.2518796992481203" as complete series
-Scenario: cross-check acf for 5 lags
-  When I provide 5 lags for acf
-  And I calculate acf
-  Then I should get 6 values in resultant acf
-  And I should see "1.0, 0.85, 0.7015037593984963, 0.556015037593985, 0.4150375939849624, 0.2800751879699248" as complete series
-Scenario: first value should be 1.0
-  When I provide 2 lags for acf
-  And I calculate acf
-  Then I should get 3 values in resultant acf
-  And I should see 1.0 as first value

data/features/pacf.feature DELETED

@@ -1,42 +0,0 @@
-Feature: PACF
-  As a statistician
-  So that I can quickly evaluate partial autocorrelation of a series
-  I want to evaluate pacf
-Background: a timeseries
-  Given the following values in a timeseries:
-    | timeseries |
-    | 10  20  30  40  50  60  70  80  90  100 |
-    | 110 120 130 140 150 160 170 180 190 200 |
-Scenario: check pacf for 10 lags with unbiased
-  When I provide 10 lags for pacf
-  When I provide yw yule walker as method
-  Then I should get Array as resultant output
-  Then I should get 11 values in resultant pacf
-Scenario: check pacf for 5 lags with mle
-  When I provide 5 lags for pacf
-  When I provide mle yule walker as method
-  Then I should get Array as resultant output
-  Then I should get 6 values in resultant pacf
-Scenario: check first value of pacf
-  When I provide 5 lags for pacf
-  When I provide yw yule walker as method
-  Then I should get Array as resultant output
-  And I should see 1.0 as first value
-Scenario: check all values in pacf for 5 lags with mle
-  When I provide 5 lags for pacf
-  When I provide mle yule walker as method
-  Then I should get Array as resultant output
-  And I should see "1.0, 0.85, -0.07566212829370711, -0.07635069706072706, -0.07698628638512295, -0.07747034005560738" as complete series
-Scenario: check all values in pacf for 5 lags with unbiased
-  When I provide 5 lags for pacf
-  When I provide yw yule walker as method
-  Then I should get Array as resultant output
-  And I should see "1.0, 0.8947368421052632, -0.10582010582010604, -0.11350188273265083, -0.12357534824820737, -0.13686534216335522" as complete series

data/features/step_definitions/bio-statsample-timeseries_steps.rb DELETED

File without changes

data/features/step_definitions/step_definitions.rb DELETED

@@ -1,37 +0,0 @@
-require 'statsample-timeseries'
-include Statsample::TimeSeries
-Given /^the following values in a timeseries:$/ do |series|
-  arr = []
-  series.hashes.each do |sequence|
-    arr += sequence['timeseries'].split(' ').map(&:to_i).to_ts
-  end
-  @timeseries = arr.to_ts
-end
-When /^I provide (\d+) lags for p?acf$/ do |lags|
-  @lags = lags.to_i
-end
-When /^I provide (\w+) yule walker as method$/ do |method|
-  @method = method
-end
-Then /^I should get (\w+) as resultant output$/ do |klass|
-  @result = @timeseries.pacf(@lags, @method)
-  assert_equal @result.class.to_s, klass
-end
-Then /^I should get (\w+) values in resultant p?acf$/ do |values_count|
-  assert_equal @result.size, values_count.to_i
-end
-And /^I should see (\d+\.\d) as first value$/ do |first_value|
-  assert_equal @result.first, first_value.to_f
-end
-And /^I should see \"(.+)\" as complete series$/ do |series|
-  series = series.split(',').map(&:to_f)
-  assert_equal @result, series
-end

data/features/step_definitions/step_definitions_acf.rb DELETED

@@ -1,8 +0,0 @@
-require 'statsample-timeseries'
-include Statsample::TimeSeries
-#all instance variable and cucumber DSL s DRYed up in step_definitions.rb
-And /^I calculate acf$/ do
-  @result = @timeseries.acf(@lags)
-end

data/features/support/env.rb DELETED

@@ -1,15 +0,0 @@
-require 'bundler'
-begin
-  Bundler.setup(:default, :development)
-rescue Bundler::BundlerError => e
-  $stderr.puts e.message
-  $stderr.puts "Run `bundle install` to install missing gems"
-  exit e.status_code
-end
-$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
-require 'statsample-timeseries'
-require 'test/unit/assertions'
-World(Test::Unit::Assertions)

data/lib/statsample-timeseries/timeseries.rb DELETED

@@ -1,291 +0,0 @@
-require 'statsample-timeseries/timeseries/pacf'
-module Statsample::TimeSeriesShorthands
-  # Creates a new Statsample::TimeSeries object
-  # Argument should be equal to TimeSeries.new
-  def to_time_series(*args)
-    Statsample::TimeSeries::Series.new(self, :scale, *args)
-  end
-  alias :to_ts :to_time_series
-end
-class Array
-  include Statsample::TimeSeriesShorthands
-end
-module Statsample
-  module TimeSeries
-    # Collection of data indexed by time.
-    # The order goes from earliest to latest.
-    class Series < Statsample::Vector
-      include Statsample::TimeSeries::Pacf
-      # Calculates the autocorrelation coefficients of the series.
-      #
-      # The first element is always 1, since that is the correlation
-      # of the series with itself.
-      #
-      # Usage:
-      #
-      #  ts = (1..100).map { rand }.to_time_series
-      #
-      #  ts.acf   # => array with first 21 autocorrelations
-      #  ts.acf 3 # => array with first 3 autocorrelations
-      #
-      def acf(max_lags = nil)
-        max_lags ||= (10 * Math.log10(size)).to_i
-        (0..max_lags).map do |i|
-          if i == 0
-            1.0
-          else
-            m = self.mean
-            # can't use Pearson coefficient since the mean for the lagged series should
-            # be the same as the regular series
-            ((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
-          end
-        end
-      end
-      #=Partial Autocorrelation
-      #Generates partial autocorrelation series for a timeseries
-      #==Parameters
-      #* *max_lags*: integer, optional - provide number of lags
-      #* *method*: string. Default: 'yw'.
-      #    * *yw*: For yule-walker algorithm unbiased approach
-      #    * *mle*: For Maximum likelihood algorithm approach
-      #    * *ld*: Forr Levinson-Durbin recursive approach
-      #==Returns
-      # array of pacf
-      def pacf(max_lags = nil, method = :yw)
-        method = method.downcase.to_sym
-        max_lags ||= (10 * Math.log10(size)).to_i
-        if method.eql? :yw or method.eql? :mle
-          Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
-        elsif method == :ld
-          series = self.acvf
-          Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
-        else
-          raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
-        end
-      end
-      #=Autoregressive estimation
-      #Generates AR(k) series for the calling timeseries by yule walker.
-      #==Parameters
-      #* *n*: integer, (default = 1500) number of observations for AR.
-      #* *k*: integer, (default = 1) order of AR process.
-      #==Returns
-      #Array constituting estimated AR series.
-      def ar(n = 1500, k = 1)
-        series = Statsample::TimeSeries.arima
-        #series = Statsample::TimeSeries::ARIMA.new
-        series.yule_walker(self, n, k)
-      end
-      #=AutoCovariance
-      #Provides autocovariance of timeseries.
-      #==Parameters
-      #* *demean* = true; optional. Supply false if series is not to be demeaned
-      #* *unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
-      #==Returns
-      # Autocovariance value
-      def acvf(demean = true, unbiased = true)
-        #TODO: change parameters list in opts.merge as suggested by John
-        #functionality: computes autocovariance of timeseries data
-        #returns: array of autocovariances
-        if demean
-          demeaned_series = self - self.mean
-        else
-          demeaned_series = self
-        end
-        n = (10 * Math.log10(size)).to_i + 1
-        m = self.mean
-        if unbiased
-          d = Array.new(self.size, self.size)
-        else
-          d = ((1..self.size).to_a.reverse)[0..n]
-        end
-        0.upto(n - 1).map do |i|
-          (demeaned_series * (self.lag(i) - m)).sum / d[i]
-        end
-      end
-      #=Correlation
-      #Gives correlation of timeseries.
-      def correlate(a, v, mode = 'full')
-        #peforms cross-correlation of two series
-        #multiarray.correlate2(a, v, 'full')
-        if a.size < v.size
-          raise("Should have same size!")
-        end
-        ps = a.size + v.size - 1
-        a_padded = Array.new(ps, 0)
-        a_padded[0...a.size] = a
-        out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
-        #ongoing
-      end
-      # Lags the series by k periods.
-      #
-      # The convention is to set the oldest observations (the first ones
-      # in the series) to nil so that the size of the lagged series is the
-      # same as the original.
-      #
-      # Usage:
-      #
-      #  ts = (1..10).map { rand }.to_time_series
-      #           # => [0.69, 0.23, 0.44, 0.71, ...]
-      #
-      #  ts.lag   # => [nil, 0.69, 0.23, 0.44, ...]
-      #  ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
-      #
-      def lag(k = 1)
-        return self if k == 0
-        dup.tap do |lagged|
-          (lagged.size - 1).downto k do |i|
-            lagged[i] = lagged[i - k]
-          end
-          (0...k).each do |i|
-            lagged[i] = nil
-          end
-          lagged.set_valid_data
-        end
-      end
-      #=Diff
-      # Performs the difference of the series.
-      # Note: The first difference of series is X(t) - X(t-1)
-      # But, second difference of series is NOT X(t) - X(t-2)
-      # It is the first difference of the first difference
-      # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
-      #==Params
-      #* *max_lags*: integer, (default: 1), number of differences reqd.
-      #==Usage
-      #
-      #  ts = (1..10).map { rand }.to_ts
-      #            # => [0.69, 0.23, 0.44, 0.71, ...]
-      #
-      #  ts.diff   # => [nil, -0.46, 0.21, 0.27, ...]
-      #==Returns
-      # Timeseries object
-      def diff(max_lags = 1)
-        ts = self
-        difference = []
-        max_lags.times do
-          difference = ts - ts.lag
-          ts = difference
-        end
-        difference
-      end
-      #=Moving Average
-      # Calculates the moving average of the series using the provided
-      # lookback argument. The lookback defaults to 10 periods.
-      #==Parameters
-      #* *n*: integer, (default = 10) - loopback argument
-      #
-      #==Usage
-      #
-      #   ts = (1..100).map { rand }.to_ts
-      #            # => [0.69, 0.23, 0.44, 0.71, ...]
-      #
-      #   # first 9 observations are nil
-      #   ts.ma    # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
-      #
-      #==Returns
-      #Resulting moving average timeseries object
-      def ma(n = 10)
-        return mean if n >= size
-        ([nil] * (n - 1) + (0..(size - n)).map do |i|
-          self[i...(i + n)].inject(&:+) / n
-        end).to_time_series
-      end
-      #=Exponential Moving Average
-      # Calculates an exponential moving average of the series using a
-      # specified parameter. If wilder is false (the default) then the EMA
-      # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
-      # Welles Wilder smoother of 1 / n.
-      #
-      # Warning for EMA usage: EMAs are unstable for small series, as they
-      # use a lot more than n observations to calculate. The series is stable
-      # if the size of the series is >= 3.45 * (n + 1)
-      #
-      #==Parameters
-      #* *n*: integer, (default = 10)
-      #* *wilder*: boolean, (default = false), if true, 1/n value is used for smoothing; if false, uses 2/(n+1) value
-      #
-      #==Usage
-      #   ts = (1..100).map { rand }.to_ts
-      #            # => [0.69, 0.23, 0.44, 0.71, ...]
-      #
-      #   # first 9 observations are nil
-      #   ts.ema   # => [ ... nil, 0.509... , 0.433..., ... ]
-      #
-      #==Returns
-      #EMA timeseries
-      def ema(n = 10, wilder = false)
-        smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
-        # need to start everything from the first non-nil observation
-        start = self.data.index { |i| i != nil }
-        # first n - 1 observations are nil
-        base = [nil] * (start + n - 1)
-        # nth observation is just a moving average
-        base << self[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
-        (start + n).upto size - 1 do |i|
-          base << self[i] * smoother + (1 - smoother) * base.last
-        end
-        base.to_time_series
-      end
-      #=Moving Average Convergence-Divergence
-      # Calculates the MACD (moving average convergence-divergence) of the time
-      # series - this is a comparison of a fast EMA with a slow EMA.
-      #
-      #==Parameters*:
-      #* *fast*: integer, (default = 12) - fast component of MACD
-      #* *slow*: integer, (default = 26) - slow component of MACD
-      #* *signal*: integer, (default = 9) - signal component of MACD
-      #
-      #==Usage
-      # ts = (1..100).map { rand }.to_ts
-      #            # => [0.69, 0.23, 0.44, 0.71, ...]
-      # ts.macd(13)
-      #
-      #==Returns
-      # Array of two timeseries - comparison of fast EMA with slow and EMA with signal value
-      def macd(fast = 12, slow = 26, signal = 9)
-        series = ema(fast) - ema(slow)
-        [series, series.ema(signal)]
-      end
-      # Borrow the operations from Vector, but convert to time series
-      def + series
-        super.to_a.to_ts
-      end
-      def - series
-        super.to_a.to_ts
-      end
-      def to_s
-        sprintf("Time Series(type:%s, n:%d)[%s]", @type.to_s, @data.size,
-                @data.collect{|d| d.nil? ? "nil":d}.join(","))
-      end
-    end
-  end
-end