RubyGems - bio-statsample-timeseries - Versions diffs - 0.1.1 → 0.1.2 - Mend

bio-statsample-timeseries 0.1.1 → 0.1.2

Files changed (13) hide show

data/README.rdoc +3 -3
data/Rakefile +1 -1
data/VERSION +1 -1
data/lib/bio-statsample-timeseries.rb +1 -1
data/lib/bio-statsample-timeseries/arima.rb +162 -9
data/lib/bio-statsample-timeseries/timeseries.rb +125 -13
data/lib/bio-statsample-timeseries/timeseries/pacf.rb +74 -15
data/lib/bio-statsample-timeseries/utility.rb +118 -0
data/test/test_arima_simulators.rb +18 -8
data/test/test_matrix.rb +92 -0
data/test/test_tseries.rb +1 -1
metadata +6 -5
data/lib/bio-statsample-timeseries/statsample-timeseries.rb +0 -2

data/README.rdoc CHANGED

@@ -1,8 +1,8 @@
 = bio-statsample-timeseries
 {<img
-src="https://secure.travis-ci.org/ankurgel/bioruby-statsample-timeseries.png"
-/>}[http://travis-ci.org/#!/ankurgel/bioruby-statsample-timeseries]
+src="https://secure.travis-ci.org/AnkurGel/bioruby-statsample-timeseries.png"
+/>}[http://travis-ci.org/#!/AnkurGel/bioruby-statsample-timeseries]
 Full description goes here
@@ -27,7 +27,7 @@ the source tree.
 Information on the source tree, documentation, issues and how to contribute, see
-  http://github.com/ankurgel/bioruby-statsample-timeseries
+  http://github.com/AnkurGel/bioruby-statsample-timeseries
 The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.

data/Rakefile CHANGED

@@ -15,7 +15,7 @@ require 'jeweler'
 Jeweler::Tasks.new do |gem|
   # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
   gem.name = "bio-statsample-timeseries"
-  gem.homepage = "http://github.com/ankurgel/bioruby-statsample-timeseries"
+  gem.homepage = "http://github.com/AnkurGel/bioruby-statsample-timeseries"
   gem.license = "MIT"
   gem.summary = %Q{TimeSeries modules for Statsample}
   gem.description = %Q{Statsample-timeseries is an extension to Statsample. It incorporates helpful timeseries functions and modules like ARMA, ARIMA, acf, pacf, lags etc.}

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.1
1	+ 0.1.2

data/lib/bio-statsample-timeseries.rb CHANGED

@@ -8,9 +8,9 @@
 #
 # In this file only require other files. Avoid other source code.
-require 'bio-statsample-timeseries/statsample-timeseries.rb'
 require 'statsample'
 require_relative 'bio-statsample-timeseries/timeseries.rb'
 require_relative 'bio-statsample-timeseries/arima.rb'
+require_relative 'bio-statsample-timeseries/utility.rb'

data/lib/bio-statsample-timeseries/arima.rb CHANGED

@@ -1,15 +1,29 @@
 #require 'debugger'
 module Statsample
-  module ARIMA
+  module TimeSeries
+    def self.arima
+      #not passing (ds,p,i,q) elements for now
+      #will do that once #arima is ready for all modelling
+      Statsample::TimeSeries::ARIMA.new
+    end
     class ARIMA < Statsample::Vector
       include Statsample::TimeSeries
+      # SUGGESTION: We could use an API similar to R
+      #             like
+      #             ar_obj=Statsample::TimeSeries.arima(ds,p,i,q)
+      # 			which calls
+      # 			Statsample::TimeSeries::Arima.new(ds,p,i,q)
       def arima(ds, p, i, q)
         #prototype
+        # ISSUE: We should differenciate now, if i>0.
+        #	     The result should be send to next step
         if q.zero?
           self.ar(p)
         elsif p.zero?
           self.ma(p)
+          # ISSUE-> ELSE -> simultaneuos estimation of MA and AR parameters
         end
       end
@@ -20,15 +34,49 @@ module Statsample
         #or Burg's algorithm(more efficient)
       end
-      def yule_walker()
-        #To be implemented
-      end
+      #Converts a linear array into a vector
       def create_vector(arr)
         Statsample::Vector.new(arr, :scale)
       end
-      #tentative AR(p) simulator
+      def yule_walker(ts, n, k)
+        #parameters: timeseries, no of observations, order
+        #returns: simulated autoregression with phi parameters and sigma
+        phi, sigma = Pacf::Pacf.yule_walker(ts, k)
+        return phi, sigma
+        #return ar_sim(n, phi, sigma)
+      end
+      def levinson_durbin(ts, n, k)
+        #parameters;
+        #ts: timseries against which to generate phi coefficients
+        #n: number of observations for simulation
+        #k: order of AR
+        intermediate = Pacf::Pacf.levinson_durbin(ts, k)
+        phi, sigma = intermediate[1], intermediate[0]
+        return phi, sigma
+        #return ar_sim(n, phi, sigma)
+      end
+      #=Autoregressive Simulator
+      #Simulates an autoregressive AR(p) model with specified number of
+      #observations(n), with phi number of values for order p and sigma.
+      #
+      #*Analysis*:  http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
+      #
+      #*Parameters*:
+      #-_n_::integer, number of observations
+      #-_phi_::array of phi values, e.g: [0.35, 0.213] for p = 2
+      #-_sigma_::float, sigma value for error generalization
+      #
+      #*Usage*:
+      #  ar = ARIMA.new
+      #  ar.ar_sim(1500, [0.3, 0.9], 0.12)
+      #    # => AR(2) autoregressive series of 1500 values
+      #
+      #*Returns*:
+      #Array of generated autoregressive series against attributes
       def ar_sim(n, phi, sigma)
         #using random number generator for inclusion of white noise
         err_nor = Distribution::Normal.rng(0, sigma)
@@ -58,7 +106,21 @@ module Statsample
         x - buffer
       end
-      #moving average simulator
+      #=Moving Average Simulator
+      #Simulates a moving average model with specified number of
+      #observations(n), with theta values for order k and sigma
+      #
+      #*Parameters*:
+      #-_n_::integer, number of observations
+      #-_theta_::array of floats, e.g: [0.23, 0.732], must be < 1
+      #-_sigma_::float, sigma value for whitenoise error
+      #
+      #*Usage*:
+      #  ar = ARIMA.new
+      #  ar.ma_sim(1500, [0.23, 0.732], 0.27)
+      #
+      #*Returns*:
+      #Array of generated MA(q) model
       def ma_sim(n, theta, sigma)
         #n is number of observations (eg: 1000)
         #theta are the model parameters containting q values
@@ -84,7 +146,28 @@ module Statsample
         x
       end
-      #arma simulator
+      #ARMA(Autoregressive and Moving Average) Simulator
+      #ARMA is represented by:
+      #http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
+      #This simulates the ARMA model against p, q and sigma.
+      #If p = 0, then model is pure MA(q),
+      #If q = 0, then model is pure AR(p),
+      #otherwise, model is ARMA(p, q) represented by above.
+      #
+      #Detailed analysis: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
+      #
+      #*Parameters*:
+      #-_n_::integer, number of observations
+      #-_p_::array, contains p number of phi values for AR(p) process
+      #-_q_::array, contains q number of theta values for MA(q) process
+      #-_sigma_::float, sigma value for whitenoise error generation
+      #
+      #*Usage*:
+      #  ar = ARIMA.new
+      #  ar.arma_sim(1500, [0.3, 0.272], [0.8, 0.317], 0.92)
+      #
+      #*Returns*:
+      #array of generated ARMA model values
       def arma_sim(n, p, q, sigma)
         #represented by :
         #http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
@@ -119,6 +202,76 @@ module Statsample
         end
         x - buffer
       end
+      #=Hannan-Rissanen for ARMA fit
+      def self.hannan(ts, p, q, k)
+        start_params = create_vector(Array.new(p+q+k, 0))
+        ts_dup = ts.dup
+      end
+    end
+    module Arima
+      class KalmanFilter < Statsample::Vector
+        include Statsample::TimeSeries
+        #=T
+        #The coefficient matrix for the state vector in state equation
+        # It's dimensions is r+k x r+k
+        #*Parameters*
+        #-_r_::integer, r is max(p, q+1), where p and q are orders of AR and MA respectively
+        #-_k_::integer, number of exogeneous variables in ARMA model
+        #-_q_::integer, The AR coefficient of ARMA model
+        #*References*: Statsmodels tsa, Durbin and Koopman Section 4.7
+        def self.T(r, k, p)
+          arr = Matrix.zero(r)
+          params_padded  = Statsample::Vector.new(Array.new(r, 0), :scale)
+          params_padded[0...p] = params[k...(p+k)]
+          intermediate_matrix = (r-1).times.map { Array.new(r, 0) }
+          #appending an array filled with padded values in beginning
+          intermediate_matrix[0,0] = [params_padded]
+          #now generating column matrix for that:
+          arr = Matrix.columns(intermediate_matrix)
+          arr_00 = arr[0,0]
+          #identify matrix substituition in matrix except row[0] and column[0]
+          r.times do |i|
+            arr[r,r] = 1
+          end
+          arr[0,0] = arr_00
+          arr
+        end
+        #=R
+        #The coefficient matrix for the state vector in the observation matrix.
+        #It's dimension is r+k x 1
+        #*Parameters*
+        #-_r_::integer, r is max(p, q+1) where p and q are order of AR and MA respectively
+        #-_k_::integer, number of exogeneous variables in ARMA model
+        #-_q_::integer, The MA order in ARMA model
+        #-_p_::integer, The AR order in ARMA model
+        #*References*: Statsmodels tsa, Durbin and Koopman
+        def self.R(r, k, q, p)
+          arr = Matrix.column_vector(Array.new(r,0.0))
+          #pending - in kind of difficult end here;
+        end
+        #=Z
+        #The Z selector matrix
+        #*Parameters*
+        #-_r_::integer, max(p, q+1)
+        #Returns: vector
+        def self.Z(r)
+          arr = Statsample::Vector.new(Array.new(r, 0.0), :scale)
+          arr[0] = 1.0
+          return arr
+        end
+      end
     end
   end
 end

data/lib/bio-statsample-timeseries/timeseries.rb CHANGED

@@ -3,7 +3,7 @@ module Statsample::TimeSeriesShorthands
   # Creates a new Statsample::TimeSeries object
   # Argument should be equal to TimeSeries.new
   def to_time_series(*args)
-    Statsample::TimeSeries::TimeSeries.new(self, :scale, *args)
+    Statsample::TimeSeries::Series.new(self, :scale, *args)
   end
   alias :to_ts :to_time_series
@@ -17,7 +17,7 @@ module Statsample
   module TimeSeries
     # Collection of data indexed by time.
     # The order goes from earliest to latest.
-    class TimeSeries < Statsample::Vector
+    class Series < Statsample::Vector
       include Statsample::TimeSeries::Pacf
       # Calculates the autocorrelation coefficients of the series.
       #
@@ -31,7 +31,7 @@ module Statsample
       #  ts.acf   # => array with first 21 autocorrelations
       #  ts.acf 3 # => array with first 3 autocorrelations
       #
-      def acf max_lags = nil
+      def acf(max_lags = nil)
         max_lags ||= (10 * Math.log10(size)).to_i
         (0..max_lags).map do |i|
@@ -47,14 +47,95 @@ module Statsample
         end
       end
-      def pacf(max_lags = nil, method = 'yw')
+      #=Partial Autocorrelation
+      #Generates partial autocorrelation series for a timeseries
+      #*Parameters*:
+      #-_max_lags_::integer, optional - provide number of lags
+      #-_method_::string. Default: 'yw'.
+      #    * _yw_:: For yule-walker algorithm unbiased approach
+      #    * _mle_:: For Maximum likelihood algorithm approach
+      #    * _ld_:: Forr Levinson-Durbin recursive approach
+      #Returns - array of pacf
+      #
+      def pacf(max_lags = nil, method = :yw)
         #parameters:
         #max_lags => maximum number of lags for pcf
         #method => for autocovariance in yule_walker:
           #'yw' for 'yule-walker unbaised', 'mle' for biased maximum likelihood
+          #'ld' for Levinson-Durbin recursion
+        method = method.downcase.to_sym
         max_lags ||= (10 * Math.log10(size)).to_i
-       Pacf::Pacf.pacf_yw(self, max_lags, method)
+        if method.eql? :yw or method.eql? :mle
+          Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
+        elsif method == :ld
+          series = self.acvf
+          Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
+        else
+          raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
+        end
+      end
+      #=Autoregressive estimation
+      #Generates AR(k) series for the calling timeseries by yule walker.
+      #*Parameters*:
+      #-_n_::integer, (default = 1500) number of observations for AR.
+      #-_k_::integer, (default = 1) order of AR process.
+      #*Returns*:
+      #Array constituting estimated AR series.
+      #
+      def ar(n = 1500, k = 1)
+        series = Statsample::TimeSeries.arima
+        #series = Statsample::TimeSeries::ARIMA.new
+        series.yule_walker(self, n, k)
+      end
+      #=AutoCovariance
+      #Provides autocovariance of timeseries.
+      #-Parameters:
+      #demean = true; optional. Supply false if series is not to be demeaned
+      #unbiased = true; optional. true/false for unbiased/biased form of autocovariance
+      #-Returns-: Autocovariance value
+      #
+      def acvf(demean = true, unbiased = true)
+        #TODO: change parameters list in opts.merge as suggested by John
+        #functionality: computes autocovariance of timeseries data
+        #returns: array of autocovariances
+        if demean
+          demeaned_series = self - self.mean
+        else
+          demeaned_series = self
+        end
+        n = self.acf.size
+        m = self.mean
+        if unbiased
+          d = Array.new(self.size, self.size)
+        else
+          d = ((1..self.size).to_a.reverse)[0..n]
+        end
+        0.upto(n - 1).map do |i|
+          (demeaned_series * (self.lag(i) - m)).sum / d[i]
+        end
+      end
+      #=Correlation
+      #Gives correlation of timeseries.
+      #
+      def correlate(a, v, mode = 'full')
+        #peforms cross-correlation of two series
+        #multiarray.correlate2(a, v, 'full')
+        if a.size < v.size
+          raise("Should have same size!")
+        end
+        ps = a.size + v.size - 1
+        a_padded = Array.new(ps, 0)
+        a_padded[0...a.size] = a
+        out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
+        #ongoing
       end
       # Lags the series by k periods.
@@ -71,7 +152,7 @@ module Statsample
       #  ts.lag   # => [nil, 0.69, 0.23, 0.44, ...]
       #  ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
       #
-      def lag k = 1
+      def lag(k = 1)
         return self if k == 0
         dup.tap do |lagged|
@@ -86,13 +167,14 @@ module Statsample
         end
       end
+      #=Diff
       # Performs a first difference of the series.
       #
       # The convention is to set the oldest observations (the first ones
       # in the series) to nil so that the size of the diffed series is the
       # same as the original.
       #
-      # Usage:
+      #*Usage*:
       #
       #  ts = (1..10).map { rand }.to_ts
       #            # => [0.69, 0.23, 0.44, 0.71, ...]
@@ -103,17 +185,23 @@ module Statsample
         self - self.lag
       end
-      # Calculates a moving average of the series using the provided
+      #=Moving Average
+      # Calculates the moving average of the series using the provided
       # lookback argument. The lookback defaults to 10 periods.
+      #*Parameters*:
+      #-_n_::integer, (default = 10) - loopback argument
       #
-      # Usage:
+      #*Usage*:
       #
       #   ts = (1..100).map { rand }.to_ts
       #            # => [0.69, 0.23, 0.44, 0.71, ...]
       #
       #   # first 9 observations are nil
       #   ts.ma    # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
-      def ma n = 10
+      #
+      #*Returns*:
+      #Resulting moving average timeseries object
+      def ma(n = 10)
         return mean if n >= size
         ([nil] * (n - 1) + (0..(size - n)).map do |i|
@@ -121,6 +209,7 @@ module Statsample
         end).to_time_series
       end
+      #=Exponential Moving Average
       # Calculates an exponential moving average of the series using a
       # specified parameter. If wilder is false (the default) then the EMA
       # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
@@ -130,14 +219,22 @@ module Statsample
       # use a lot more than n observations to calculate. The series is stable
       # if the size of the series is >= 3.45 * (n + 1)
       #
-      # Usage:
+      #*Parameters*:
+      #-_n_::integer, (default = 10)
+      #-_wilder_::boolean, (default = false), if true, 1/n value is used for smoothing;
+      #if false, uses 2/(n+1) value
+      #
+      #*Usage*:
       #
       #   ts = (1..100).map { rand }.to_ts
       #            # => [0.69, 0.23, 0.44, 0.71, ...]
       #
       #   # first 9 observations are nil
       #   ts.ema   # => [ ... nil, 0.509... , 0.433..., ... ]
-      def ema n = 10, wilder = false
+      #
+      #*Returns*:
+      #EMA timeseries
+      def ema(n = 10, wilder = false)
         smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
         # need to start everything from the first non-nil observation
@@ -156,9 +253,24 @@ module Statsample
         base.to_time_series
       end
+      #=Moving Average Convergence-Divergence
       # Calculates the MACD (moving average convergence-divergence) of the time
       # series - this is a comparison of a fast EMA with a slow EMA.
-      def macd fast = 12, slow = 26, signal = 9
+      #
+      # *Parameters*:
+      # -_fast_::integer, (default = 12) - fast component of MACD
+      # -_slow_::integer, (default = 26) - slow component of MACD
+      # -_signal_::integer, (default = 9) - signal component of MACD
+      #
+      # *Usage*:
+      # ts = (1..100).map { rand }.to_ts
+      #            # => [0.69, 0.23, 0.44, 0.71, ...]
+      # ts.macd(13)
+      #
+      # *Returns*:
+      # Array of two timeseries - comparison of fast EMA with slow
+      # and EMA with signal value
+      def macd(fast = 12, slow = 26, signal = 9)
         series = ema(fast) - ema(slow)
         [series, series.ema(signal)]
       end

data/lib/bio-statsample-timeseries/timeseries/pacf.rb CHANGED

@@ -8,24 +8,79 @@ module Statsample
           #Inspiration: StatsModels
           pacf = [1.0]
           (1..max_lags).map do |i|
-            pacf << yule_walker(timeseries, i, method)[-1]
+            pacf << yule_walker(timeseries, i, method)[0][-1]
           end
           pacf
         end
+        #=Levinson-Durbin Algorithm
+        #*Parameters*:
+        #-_series_ : timeseries, or a series of autocovariances
+        #-_nlags_: integer(default: 10): largest lag to include in recursion or order of the AR process
+        #-_is_acovf_: boolean(default: false): series is timeseries if it is false, else contains autocavariances
+        #*returns*:
+        #-_sigma_v_: estimate of the error variance
+        #-_arcoefs_: AR coefficients
+        #-_pacf_: pacf function
+        #-_sigma_: some function
+        def self.levinson_durbin(series, nlags = 10, is_acovf = false)
+          if is_acovf
+            series = series.map(&:to_f)
+          else
+            #nlags = order(k) of AR in this case
+            series = series.acvf.map(&:to_f)[0..nlags]
+          end
+          #phi = Array.new((nlags+1), 0.0) { Array.new(nlags+1, 0.0) }
+          order = nlags
+          phi = Matrix.zero(nlags + 1)
+          sig = Array.new(nlags+1)
+          #setting initial point for recursion:
+          phi[1,1] = series[1]/series[0]
+          #phi[1][1] = series[1]/series[0]
+          sig[1] = series[0] - phi[1, 1] * series[1]
+          2.upto(order).each do |k|
+            phi[k, k] = (series[k] - (Statsample::Vector.new(phi[1...k, k-1]) * series[1...k].reverse.to_ts).sum) / sig[k-1]
+            #some serious refinement needed in above for matrix manipulation. Will do today
+            1.upto(k-1).each do |j|
+              phi[j, k] = phi[j, k-1] - phi[k, k] * phi[k-j, k-1]
+            end
+            sig[k] = sig[k-1] * (1-phi[k, k] ** 2)
+          end
+          sigma_v = sig[-1]
+          arcoefs_delta = phi.column(phi.column_size - 1)
+          arcoefs = arcoefs_delta[1..arcoefs_delta.size]
+          pacf = diag(phi)
+          pacf[0] = 1.0
+          return [sigma_v, arcoefs, pacf, sig, phi]
+        end
+        def self.diag(mat)
+          #returns array of diagonal elements of a matrix.
+          #will later abstract it to matrix.rb in Statsample
+          return mat.each_with_index(:diagonal).map { |x, r, c| x }
+        end
+        #=Yule Walker Algorithm
+        #From the series, estimates AR(p)(autoregressive) parameter
+        #using Yule-Waler equation. See -
+        #http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
+        #*Parameters*:
+        #-_ts_::timeseries
+        #-_k_::order, default = 1
+        #-_method_:: can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator is (n - k)
+        #*returns*:
+        #-_rho_:: autoregressive coefficients
+        #-_sigma_:: sigma parameter
         def self.yule_walker(ts, k = 1, method='yw')
-          #From the series, estimates AR(p)(autoregressive) parameter
-          #using Yule-Waler equation. See -
-          #http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
-          #parameters:
-          #ts = series
-          #k = order, default = 1
-          #method = can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator
-          #is (n - k)
-          #returns:
-          #rho => autoregressive coefficients
           ts = ts - ts.mean
           n = ts.size
           if method.downcase.eql? 'yw'
@@ -37,7 +92,7 @@ module Statsample
             denom =->(k) { n }
           end
           r = Array.new(k + 1) { 0.0 }
-          r[0] = ts.map { |x| x ** 2 }.inject(:+).to_f / denom.call(0).to_f
+          r[0] = ts.map { |x| x**2 }.inject(:+).to_f / denom.call(0).to_f
           1.upto(k) do |l|
             r[l] = (ts[0...-l].zip(ts[l...ts.size])).map do |x|
@@ -48,7 +103,11 @@ module Statsample
           r_R = toeplitz(r[0...-1])
           mat = Matrix.columns(r_R).inverse()
-          solve_matrix(mat, r[1..r.size])
+          phi = solve_matrix(mat, r[1..r.size])
+          phi_vector = Statsample::Vector.new(phi, :scale)
+          r_vector = Statsample::Vector.new(r[1..r.size], :scale)
+          sigma = r[0] - (r_vector * phi_vector).sum
+          return [phi, sigma]
         end
         def self.toeplitz(arr)

data/lib/bio-statsample-timeseries/utility.rb ADDED

@@ -0,0 +1,118 @@
+module Statsample
+  class Vector
+    include Enumerable
+    include Writable
+    include Summarizable
+    #=Squares of sum
+    #---
+    #parameter:
+    #-demean::boolean - optional. __default__: false
+    #Sums the timeseries and then returns the square
+    def squares_of_sum(demean = false)
+      if demean
+        m = self.mean
+        self.map { |x| (x-m) }.sum ** 2
+      else
+        return self.sum.to_f ** 2
+      end
+    end
+  end
+  class ::Matrix
+    #=Squares of sum
+    #---
+    #Does squares of sum in column order.
+    #Necessary for computations in various processes
+    def squares_of_sum
+      (0...column_size).map do |j|
+        self.column(j).sum ** 2
+      end
+    end
+    #=Checks if given matrix is symmetric or not
+    #---
+    #returns bool
+    #`symmetric?` is present in Ruby Matrix 1.9.3+, but not in 1.8.*
+    def symmetric?
+      return false unless square?
+      (0...row_size).each do |i|
+        0.upto(i).each do |j|
+          return false if self[i, j] != self[j, i]
+        end
+      end
+      true
+    end
+    #=Cholesky decomposition
+    #Reference: http://en.wikipedia.org/wiki/Cholesky_decomposition
+    #---
+    #==Description
+    #Cholesky decomposition is reprsented by `M = L X L*`, where
+    #M is the symmetric matrix and `L` is the lower half of cholesky matrix,
+    #and `L*` is the conjugate form of `L`.
+    #*Returns* : Cholesky decomposition for a given matrix(if symmetric)
+    #*Utility*: Essential matrix function, requisite in kalman filter, least squares
+    def cholesky
+      raise ArgumentError, "Given matrix should be symmetric" unless symmetric?
+      c = Matrix.zero(row_size)
+      0.upto(row_size - 1).each do |k|
+        0.upto(row_size - 1).each do |i|
+          if i == k
+            sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[k, j] ** 2 }
+            value = Math.sqrt(self[k,k] - sum)
+            c[k, k] = value
+          elsif i > k
+            sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[i, j] * c[k, j] }
+            value = (self[k,i] - sum) / c[k, k]
+            c[i, k] = value
+          end
+        end
+      end
+      c
+    end
+    #=Chain Product
+    #Class method
+    #Returns the chain product of two matrices
+    #==Usage:
+    #Let `a` be 4 * 3 matrix,
+    #Let `b` be 3 * 3 matrix,
+    #Let `c` be 3 * 1 matrix,
+    #then `Matrix.chain_dot(a, b, c)`
+    #===*NOTE*: Send the matrices in multiplicative order with proper dimensions
+    def self.chain_dot(*args)
+      #inspired by Statsmodels
+      begin
+        args.reduce { |x, y| x * y } #perform matrix multiplication in order
+      rescue ExceptionForMatrix::ErrDimensionMismatch
+        puts "ExceptionForMatrix: Please provide matrices with proper multiplicative dimensions"
+      end
+    end
+    #=Adds a column of constants.
+    #Appends a column of ones to the matrix/array if first argument is false
+    #If an n-array, first checks if one column of ones is already present
+    #if present, then original(self) is returned, else, prepends with a vector of ones
+    def add_constant(prepend = true)
+      #for Matrix
+      (0...column_size).each do |i|
+        if self.column(i).map(&:to_f) == Object::Vector.elements(Array.new(row_size, 1.0))
+          return self
+        end
+      end
+      #append/prepend a column of one's
+      vectors = (0...row_size).map do |r|
+        if prepend
+          [1.0].concat(self.row(r).to_a)
+        else
+          self.row(r).to_a.push(1.0)
+        end
+      end
+      return Matrix.rows(vectors)
+    end
+  end
+end

data/test/test_arima_simulators.rb CHANGED

@@ -11,10 +11,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
     ts.pacf
   end
   context("AR(1) simulations") do
-    include Statsample::ARIMA
+    include Statsample
     setup do
-      @series = ARIMA.new
+      @series = TimeSeries.arima
       @ar_1_positive = @series.ar_sim(1500, [0.9], 2)
       @ar_1_negative = @series.ar_sim(1500, [-0.9], 2)
@@ -73,10 +73,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
   end
   context("AR(p) simulations") do
-    include Statsample::ARIMA
+    include Statsample
     setup do
-      @series = ARIMA.new
+      @series = TimeSeries.arima
       @ar_p_positive = @series.ar_sim(1500, [0.3, 0.5], 2)
       @ar_p_negative = @series.ar_sim(1500, [-0.3, -0.5], 2)
     end
@@ -120,9 +120,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
   context("MA(1) simulations") do
-    include Statsample::ARIMA
+    include Statsample
     setup do
-      @series = ARIMA.new
+      @series = TimeSeries.arima
       @ma_positive = @series.ar_sim(1500, [0.5], 2)
       @ma_negative = @series.ar_sim(1500, [-0.5], 2)
     end
@@ -153,9 +153,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
   end
   context("MA(q) simulations") do
-    include Statsample::ARIMA
+    include Statsample
     setup do
-      @series = ARIMA.new
+      @series = TimeSeries.arima
       @ma_positive = @series.ar_sim(1500, [0.5, 0.3, 0.2], 2)
       @ma_negative = @series.ar_sim(1500, [-0.5], 2)
     end
@@ -172,5 +172,15 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
       #visualization: http://jsfiddle.net/7keHK/2/
     end
   end
+  context("Yule walker estimations") do
+    include Statsample
+    setup do
+      @timeseries = 100.times.map { rand }.to_ts
+      @arma_simulation =->(n) { @timeseries.ar(n, k)}
+    end
+    #to write test
+  end
 end

data/test/test_matrix.rb ADDED

@@ -0,0 +1,92 @@
+require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
+class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
+  def setup_square_matrix(arr, n)
+    #returns n * n matrix by slicing arr
+    return Matrix.rows(arr.each_slice(n).to_a)
+  end
+  def setup
+    @arr_square = (1..16)
+    @mat_non_symmetric = setup_square_matrix(@arr_square, 4)
+    @arr_non_square = (1..12).to_a
+    #this is a 4 X 3 matrix
+    @mat_non_square = Matrix.rows(@arr_non_square.each_slice(3).to_a)
+  end
+  #TESTS for matrix symmetricity - Matrix#symmetric?
+  context("symmetric?") do
+    should "return false for non-symmetric matrix" do
+      assert_equal @mat_non_symmetric.symmetric?, false
+    end
+    should "return false for non-square matrix" do
+      assert_equal @mat_non_square.symmetric?, false
+    end
+    should "return true for symmetrix matrix" do
+      arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
+      mat = setup_square_matrix(arr, 3)
+      assert_equal mat.symmetric?, true
+    end
+  end
+  #TESTS for cholesky decomposition - Matrix#cholesky
+  context("Cholesky Decomposition") do
+    should "raise error for non symmetric matrix" do
+      assert_raises(ArgumentError) { @mat_non_symmetric.cholesky }
+    end
+    should "raise raise error if non-square matix" do
+      arr = (1..12).to_a
+      mat = Matrix.rows(arr.each_slice(3).to_a)
+      assert_raises(ArgumentError) { @mat_non_square.cholesky }
+    end
+    should "give hermitian cholesky decomposed matrix for symmetrix matrix" do
+       arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
+       mat = setup_square_matrix(arr, 3)
+       assert_equal Matrix[[2.0, 0, 0], [6.0, 1.0, 0], [-8.0, 5.0, 2.0]], mat.cholesky
+    end
+  end
+  #TESTS for matrix squares of sum - Matrix#squares_of_sum
+  context("Squares of sum") do
+    should "return array of size 4 for matrix - #{@mat_non_symmetric}" do
+      #equal to column size
+      assert_equal @mat_non_symmetric.squares_of_sum.size, 4
+    end
+    should "return [784, 1024, 1296, 1600] for matrix - #{@mat_non_symmetric}" do
+      assert_equal @mat_non_symmetric.squares_of_sum, [784, 1024, 1296, 1600]
+    end
+  end
+  #TESTS for adding constants to matrix
+  context("Add constant") do
+    should "prepend all rows with ones" do
+      mat = @mat_non_symmetric.add_constant
+      assert_equal @mat_non_symmetric.column_size, 4
+      assert_equal mat.column_size, 5
+      assert_equal mat.column(0).to_a, [1.0, 1.0,1.0,1.0]
+    end
+    should "append all rows with ones if prepend = false" do
+      mat = @mat_non_symmetric.add_constant(false)
+      assert_equal @mat_non_symmetric.column_size, 4
+      assert_equal mat.column_size, 5
+      assert_equal mat.column(mat.column_size - 1).to_a, [1.0, 1.0,1.0,1.0]
+    end
+    should "not append/prepend if a column of ones already exists in matrix" do
+      matrix = Matrix[[1, 2, 1, 4], [5, 6, 1, 8], [9, 10, 1, 12]]
+      const_mat = matrix.add_constant
+      assert_equal matrix.column_size, const_mat.column_size
+      assert_equal matrix.row_size, const_mat.row_size
+    end
+  end
+end

data/test/test_tseries.rb CHANGED

@@ -7,7 +7,7 @@ class StatsampleTestTimeSeries < MiniTest::Unit::TestCase
   def setup
     # daily closes of iShares XIU on the TSX
-    @xiu = Statsample::TimeSeries::TimeSeries.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
+    @xiu = Statsample::TimeSeries::Series.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
       16.86, 16.86, 16.56, 16.36, 16.66, 16.77], :scale
   end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: bio-statsample-timeseries
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
   prerelease:
 platform: ruby
 authors:
@@ -10,7 +10,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-07-26 00:00:00.000000000 Z
+date: 2013-09-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: statsample
@@ -216,16 +216,17 @@ files:
 - features/support/env.rb
 - lib/bio-statsample-timeseries.rb
 - lib/bio-statsample-timeseries/arima.rb
-- lib/bio-statsample-timeseries/statsample-timeseries.rb
 - lib/bio-statsample-timeseries/timeseries.rb
 - lib/bio-statsample-timeseries/timeseries/pacf.rb
+- lib/bio-statsample-timeseries/utility.rb
 - test/fixtures/stock_data.csv
 - test/helper.rb
 - test/test_arima_simulators.rb
+- test/test_matrix.rb
 - test/test_pacf.rb
 - test/test_tseries.rb
 - test/test_wald.rb
-homepage: http://github.com/ankurgel/bioruby-statsample-timeseries
+homepage: http://github.com/AnkurGel/bioruby-statsample-timeseries
 licenses:
 - MIT
 post_install_message:
@@ -240,7 +241,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 146794323
+      hash: -122253519
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:

data/lib/bio-statsample-timeseries/statsample-timeseries.rb DELETED

	@@ -1,2 +0,0 @@
1	-
2	-