RubyGems - bio-statsample-timeseries - Versions diffs - 0.1.1 - Mend

bio-statsample-timeseries 0.1.1

Files changed (27) hide show

data/.document +5 -0
data/.travis.yml +13 -0
data/Gemfile +21 -0
data/LICENSE.txt +20 -0
data/README.md +47 -0
data/README.rdoc +48 -0
data/Rakefile +48 -0
data/VERSION +1 -0
data/bin/bio-statsample-timeseries +74 -0
data/features/acf.feature +31 -0
data/features/pacf.feature +42 -0
data/features/step_definitions/bio-statsample-timeseries_steps.rb +0 -0
data/features/step_definitions/step_definitions.rb +37 -0
data/features/step_definitions/step_definitions_acf.rb +8 -0
data/features/support/env.rb +15 -0
data/lib/bio-statsample-timeseries.rb +16 -0
data/lib/bio-statsample-timeseries/arima.rb +124 -0
data/lib/bio-statsample-timeseries/statsample-timeseries.rb +2 -0
data/lib/bio-statsample-timeseries/timeseries.rb +181 -0
data/lib/bio-statsample-timeseries/timeseries/pacf.rb +100 -0
data/test/fixtures/stock_data.csv +500 -0
data/test/helper.rb +81 -0
data/test/test_arima_simulators.rb +176 -0
data/test/test_pacf.rb +52 -0
data/test/test_tseries.rb +103 -0
data/test/test_wald.rb +71 -0
metadata +256 -0

data/test/helper.rb ADDED Viewed

@@ -0,0 +1,81 @@
+require 'rubygems'
+require 'bundler'
+begin
+  Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+  $stderr.puts e.message
+  $stderr.puts "Run `bundle install` to install missing gems"
+  exit e.status_code
+end
+require 'minitest/unit'
+require 'shoulda'
+require 'shoulda-context'
+require 'mocha'
+require 'bio-statsample-timeseries'
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+require 'bio-statsample-timeseries'
+module MiniTest
+  class Unit
+    class TestCase
+    include Shoulda::Context::Assertions
+    include Shoulda::Context::InstanceMethods
+    extend Shoulda::Context::ClassMethods
+      def self.should_with_gsl(name,&block)
+        should(name) do
+          if Statsample.has_gsl?
+            instance_eval(&block)
+          else
+            skip("Requires GSL")
+          end
+        end
+      end
+    end
+  end
+  module Assertions
+    def assert_similar_vector(exp, obs, delta=1e-10,msg=nil)
+      msg||="Different vectors #{exp} - #{obs}"
+      assert_equal(exp.size, obs.size)
+      exp.data_with_nils.each_with_index {|v,i|
+        assert_in_delta(v,obs[i],delta)
+      }
+    end
+    def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
+      assert_equal(exp.size, obs.size, "Different size.#{msg}")
+      exp.size.times {|i|
+        assert_in_delta(exp[i],obs[i],delta, "Different element #{i}. \nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
+      }
+    end
+    def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
+       assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
+       assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
+       exp.row_size.times {|i|
+         exp.column_size.times {|j|
+           assert_in_delta(exp[i,j],obs[i,j], delta, "Different element #{i},#{j}\nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
+         }
+       }
+    end
+    alias :assert_raise :assert_raises unless method_defined? :assert_raise
+    alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
+    alias :assert_not_same :refute_same unless method_defined? :assert_not_same
+    unless method_defined? :assert_nothing_raised
+      def assert_nothing_raised(msg=nil)
+        msg||="Nothing should be raised, but raised %s"
+        begin
+          yield
+          not_raised=true
+        rescue Exception => e
+          not_raised=false
+          msg=sprintf(msg,e)
+        end
+        assert(not_raised,msg)
+      end
+    end
+  end
+end
+MiniTest::Unit.autorun

data/test/test_arima_simulators.rb ADDED Viewed

@@ -0,0 +1,176 @@
+require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
+class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
+  def generate_acf(simulation)
+    ts = simulation.to_ts
+    ts.acf
+  end
+  def generate_pacf(simulation)
+    ts = simulation.to_ts
+    ts.pacf
+  end
+  context("AR(1) simulations") do
+    include Statsample::ARIMA
+    setup do
+      @series = ARIMA.new
+      @ar_1_positive = @series.ar_sim(1500, [0.9], 2)
+      @ar_1_negative = @series.ar_sim(1500, [-0.9], 2)
+      #generating acf
+      @positive_acf = generate_acf(@ar_1_positive)
+      @negative_acf = generate_acf(@ar_1_negative)
+      #generating pacf
+      @positive_pacf = generate_pacf(@ar_1_positive)
+      @negative_pacf = generate_pacf(@ar_1_negative)
+    end
+    should "have exponential decay of acf on positive side with phi > 0" do
+      @acf = @positive_acf
+      assert_equal @acf[0], 1.0
+      assert_operator @acf[1], :>=, 0.7
+      assert_operator @acf[@acf.size - 1], :<=, 0.2
+      #visualization:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR%281%29_positive_phi_acf.png
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR%281%29_positive_phi_acf_line.png
+    end
+    should "have series with alternating sign on acf starting on negative side with phi < 0" do
+      @acf = @negative_acf
+      assert_equal @acf[0], 1.0
+      #testing for alternating series
+      assert_operator @acf[1], :<, 0
+      assert_operator @acf[2], :>, 0
+      assert_operator @acf[3], :<, 0
+      assert_operator @acf[4], :>, 0
+      #visualization:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR%281%29_negative_phi_acf.png
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR%281%29_negative_phi_acf_line.png
+    end
+    should "have positive spike on pacf at lag 1 for phi > 0" do
+      @pacf = @positive_pacf
+      assert_operator @pacf[1], :>=, 0.7
+      assert_operator @pacf[2], :<=, 0.2
+      assert_operator @pacf[3], :<=, 0.14
+      #visualization:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR%281%29_postive_phi_pacf.png
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR%281%29_postive_phi_pacf_line.png
+    end
+    should "have negative spike on pacf at lag 1 for phi < 0" do
+      @pacf = @negative_pacf
+      assert_operator @pacf[1], :<=, 0
+      assert_operator @pacf[1], :<=, -0.5
+      assert_operator @pacf[2], :>=, -0.5
+      #visualizaton:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR%281%29_negative_phi_pacf.png
+      #[hided @pacf[0] = 1 to convey accurate picture]
+    end
+  end
+  context("AR(p) simulations") do
+    include Statsample::ARIMA
+    setup do
+      @series = ARIMA.new
+      @ar_p_positive = @series.ar_sim(1500, [0.3, 0.5], 2)
+      @ar_p_negative = @series.ar_sim(1500, [-0.3, -0.5], 2)
+    end
+    should "have damped sine wave starting on positive side on acf" do
+      @ar = @ar_p_positive
+      @acf = generate_acf(@ar)
+      assert_operator @acf[0], :>=, @acf[1]
+      assert_operator @acf[1], :>=, 0.0
+      #caution: sine curve can split on cartesian plane,
+      #visualization:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR(p)_positive_phi_sine_wave.png
+    end
+    should "have damped sine wave starting on negative side on acf" do
+      @ar = @ar_p_negative
+      @acf = generate_acf(@ar)
+      assert_operator @acf[0], :>=, @acf[1]
+      assert_operator @acf[1], :<=, 0.0
+      assert_operator @acf[1], :>=, @acf[2]
+      #caution: sine curve can split on cartesian plane,
+      #visualization:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR%28p%29_negative_phi_acf_sine_wave.png
+    end
+    should "have spikes from 1 to p for pacf" do
+      #here p = 2
+      @ar = @ar_p_positive
+      @pacf = generate_pacf(@ar)
+      assert_equal @pacf[0], 1.0
+      assert_operator @pacf[1], :>, @pacf[3]
+      assert_operator @pacf[1], :>, @pacf[4]
+      assert_operator @pacf[1], :>, @pacf[5]
+      assert_operator @pacf[2], :>, @pacf[3]
+      assert_operator @pacf[2], :>, @pacf[4]
+      #visualization:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/AR(p)_positive_phi_pacf_spikes.png
+    end
+  end
+  context("MA(1) simulations") do
+    include Statsample::ARIMA
+    setup do
+      @series = ARIMA.new
+      @ma_positive = @series.ar_sim(1500, [0.5], 2)
+      @ma_negative = @series.ar_sim(1500, [-0.5], 2)
+    end
+    should "have one positive spike at lag 1 on acf at positive theta" do
+      @acf = generate_acf(@ma_positive)
+      assert_equal @acf[0], 1.0
+      assert_operator @acf[1], :>=, 0 #test if positive
+      #test if spike
+      assert_operator @acf[2], :>=, 0.1
+      assert_operator @acf[3], :<=, 0.2
+      assert_operator @acf[4], :<=, 0.2
+      #visualization:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/MA%281%29_postive_acf.png
+    end
+    should "have one negative spike at lag 1 on acf at negative theta" do
+      @acf = generate_acf(@ma_negative)
+      assert_operator @acf[1], :<, 0
+      assert_operator @acf[2], :>=, @acf[1]
+      assert_operator @acf[3], :>=, @acf[1]
+      #visualization:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/MA%281%29_negative_acf.png
+      #positive_vs_negative:
+      #https://dl.dropboxusercontent.com/u/102071534/sciruby/MA%281%29_acf_positive_vs_negative.png
+    end
+  end
+  context("MA(q) simulations") do
+    include Statsample::ARIMA
+    setup do
+      @series = ARIMA.new
+      @ma_positive = @series.ar_sim(1500, [0.5, 0.3, 0.2], 2)
+      @ma_negative = @series.ar_sim(1500, [-0.5], 2)
+    end
+    should "have q positive spikes at lag 1 to q on acf at positive thetas" do
+      @acf = generate_acf(@ma_positive)
+      assert_operator @acf[1], :>=, @acf[2]
+      assert_operator @acf[2], :>=, @acf[3]
+      assert_operator @acf[3], :>=, @acf[4]
+      #Visualization: http://jsfiddle.net/YeK2c/
+    end
+    should "have damped sine wave on pacf at positive thetas" do
+      #visualization: http://jsfiddle.net/7keHK/2/
+    end
+  end
+end

data/test/test_pacf.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
+class StatsampleTimeSeriesPacfTestCase < MiniTest::Unit::TestCase
+  context(Statsample::TimeSeries) do
+    include Statsample::TimeSeries
+    setup do
+      @ts = (1..20).map { |x| x * 10 }.to_ts
+      #setting up a proc to get a closure for pacf calling with variable lags and methods
+      @pacf_proc =->(k, method) { @ts.pacf(k, method) }
+    end
+    should "return correct correct pacf size for lags = 5" do
+      assert_equal @pacf_proc.call(5, 'yw').size, 6
+      assert_equal @pacf_proc.call(5, 'mle').size, 6
+      #first element is 1.0
+    end
+    should "return correct correct pacf size for lags = 10" do
+      assert_equal @pacf_proc.call(10, 'yw').size, 11
+      assert_equal @pacf_proc.call(10, 'mle').size, 11
+      #first element is 1.0
+    end
+    should "have first element as 1.0" do
+      assert_equal @pacf_proc.call(10, 'yw')[0], 1.0
+      assert_equal @pacf_proc.call(10, 'mle')[0], 1.0
+    end
+    should "give correct pacf results for unbiased yule-walker" do
+      result_10 = [1.0, 0.8947368421052632, -0.10582010582010604, -0.11350188273265083, -0.12357534824820737, -0.13686534216335522, -0.15470588235294147, -0.17938011883732036, -0.2151192288178601, -0.2707082833133261, -0.3678160919540221]
+      result_5 = [1.0, 0.8947368421052632, -0.10582010582010604, -0.11350188273265083, -0.12357534824820737, -0.13686534216335522]
+      assert_equal @pacf_proc.call(10, 'yw'), result_10
+      assert_equal @pacf_proc.call(5, 'yw'), result_5
+      #Checking for lag = (1..10)
+      1.upto(10) do |i|
+        assert_equal @pacf_proc.call(i, 'yw'), result_10[0..i]
+      end
+    end
+    should "give correct pacf results for mle yule-walker" do
+      result_10 = [1.0, 0.85, -0.07566212829370711, -0.07635069706072706, -0.07698628638512295, -0.07747034005560738, -0.0776780981161499, -0.07744984679625189, -0.0765803323191094, -0.07480650005932366, -0.07179435184923755]
+      result_5 = [1.0, 0.85, -0.07566212829370711, -0.07635069706072706, -0.07698628638512295, -0.07747034005560738]
+      assert_equal @pacf_proc.call(10, 'mle'), result_10
+      assert_equal @pacf_proc.call(5, 'mle'), result_5
+      #Checking for lag = (1..10)
+      1.upto(10) do |i|
+        assert_equal @pacf_proc.call(i, 'mle'), result_10[0..i]
+      end
+    end
+  end
+end

data/test/test_tseries.rb ADDED Viewed

@@ -0,0 +1,103 @@
+require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
+class StatsampleTestTimeSeries < MiniTest::Unit::TestCase
+  include Statsample::Shorthand
+  # All calculations are compared to the output of the equivalent function in R
+  def setup
+    # daily closes of iShares XIU on the TSX
+    @xiu = Statsample::TimeSeries::TimeSeries.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
+      16.86, 16.86, 16.56, 16.36, 16.66, 16.77], :scale
+  end
+  def test_acf
+    acf = @xiu.acf
+    assert_equal 14,       acf.length
+    # test the first few autocorrelations
+    assert_in_delta 1.0,   acf[0], 0.0001
+    assert_in_delta 0.852, acf[1], 0.001
+    assert_in_delta 0.669, acf[2], 0.001
+    assert_in_delta 0.486, acf[3], 0.001
+  end
+  def test_lag
+    #test of default lag
+    lag1 = @xiu.lag
+    assert_in_delta 16.66, lag1[lag1.size - 1], 0.001
+    assert_in_delta 16.36, lag1[lag1.size - 2], 0.001
+    #test with different lagging unit
+    lag2 = @xiu.lag(2)
+    assert_in_delta 16.36, lag2[lag2.size - 1], 0.001
+    assert_in_delta 16.56, lag2[lag2.size - 2], 0.001
+  end
+  def test_delta
+    diff = @xiu.diff
+    assert_in_delta  0.11, diff[@xiu.size - 1], 0.001
+    assert_in_delta  0.30, diff[@xiu.size - 2], 0.001
+    assert_in_delta -0.20, diff[@xiu.size - 3], 0.001
+  end
+  def test_ma
+    # test default
+    ma10 = @xiu.ma
+    assert_in_delta ma10[-1],  16.897, 0.001
+    assert_in_delta ma10[-5],  17.233, 0.001
+    assert_in_delta ma10[-10], 17.587, 0.001
+    # test with a different lookback period
+    ma5 = @xiu.ma 5
+    assert_in_delta ma5[-1],  16.642, 0.001
+    assert_in_delta ma5[-10], 17.434, 0.001
+    assert_in_delta ma5[-15], 17.74,  0.001
+  end
+  def test_ema
+    # test default
+    ema10 = @xiu.ema
+    assert_in_delta ema10[-1],  16.87187, 0.00001
+    assert_in_delta ema10[-5],  17.19187, 0.00001
+    assert_in_delta ema10[-10], 17.54918, 0.00001
+    # test with a different lookback period
+    ema5 = @xiu.ema 5
+    assert_in_delta ema5[-1],  16.71299, 0.0001
+    assert_in_delta ema5[-10], 17.49079, 0.0001
+    assert_in_delta ema5[-15], 17.70067, 0.0001
+    # test with a different smoother
+    ema_w = @xiu.ema 10, true
+    assert_in_delta ema_w[-1],  17.08044, 0.00001
+    assert_in_delta ema_w[-5],  17.33219, 0.00001
+    assert_in_delta ema_w[-10], 17.55810, 0.00001
+  end
+  def test_macd
+    # MACD uses a lot more data than the other ones, so we need a bigger vector
+    data = File.readlines(File.dirname(__FILE__) + "/fixtures/stock_data.csv").map(&:to_f).to_time_series
+    macd, signal = data.macd
+    # check the MACD
+    assert_in_delta  3.12e-4, macd[-1],  1e-6
+    assert_in_delta -1.07e-2, macd[-10], 1e-4
+    assert_in_delta -5.65e-3, macd[-20], 1e-5
+    # check the signal
+    assert_in_delta -0.00628, signal[-1],  1e-5
+    assert_in_delta -0.00971, signal[-10], 1e-5
+    assert_in_delta -0.00338, signal[-20], 1e-5
+  end
+end

data/test/test_wald.rb ADDED Viewed

@@ -0,0 +1,71 @@
+require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
+class StatsampleWaldTest < MiniTest::Unit::TestCase
+  # Wald test is useful to test a series of n acf with Chi-square
+  # degree of freedom. It is extremely useful to test fit the fit of
+  # an ARIMA model to test the residuals.
+  include Statsample::TimeSeries
+  include Statsample::Shorthand
+  include Distribution
+  def setup
+    #create time series to evaluate later
+    @wald = 100.times.map { rand(100) }.to_ts
+  end
+  def sum_of_squares_of_acf_series(lags)
+    #perform sum of squares for a series of acf with specified lags
+    acf = @wald.acf(lags)
+    return acf.map { |x| x ** 2 }.inject(:+)
+  end
+  def chisquare_cdf(sum_of_squares, lags)
+    1 - ChiSquare.cdf(sum_of_squares, lags)
+  end
+  def test_wald_with_5_lags
+    #number of lags for acf = 5
+    lags = 5
+    sum_of_squares = sum_of_squares_of_acf_series(lags)
+    assert_in_delta chisquare_cdf(sum_of_squares, lags), 1, 0.05
+    assert_equal @wald.acf(lags).size, lags + 1
+  end
+  def test_wald_with_10_lags
+    #number of lags for acf = 10
+    lags = 10
+    sum_of_squares = sum_of_squares_of_acf_series(lags)
+    assert_in_delta chisquare_cdf(sum_of_squares, lags), 1, 0.05
+    assert_equal @wald.acf(lags).size, lags + 1
+  end
+  def test_wald_with_15_lags
+    #number of lags for acf = 15
+    lags = 15
+    sum_of_squares = sum_of_squares_of_acf_series(lags)
+    assert_in_delta chisquare_cdf(sum_of_squares, lags), 1, 0.05
+    assert_equal @wald.acf(lags).size, lags + 1
+  end
+  def test_wald_with_20_lags
+    #number of lags for acf = 20
+    lags = 20
+    sum_of_squares = sum_of_squares_of_acf_series(lags)
+    assert_in_delta chisquare_cdf(sum_of_squares, lags), 1, 0.05
+    assert_equal @wald.acf(lags).size, lags + 1
+  end
+  def test_wald_with_25_lags
+    #number of lags for acf = 25
+    lags = 25
+    sum_of_squares = sum_of_squares_of_acf_series(lags)
+    assert_in_delta chisquare_cdf(sum_of_squares, lags), 1, 0.05
+    assert_equal @wald.acf(lags).size, lags + 1
+  end
+end