RubyGems - bio-statsample-timeseries - Versions diffs - 0.1.1 → 0.1.2 - Mend

bio-statsample-timeseries 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/README.rdoc +3 -3
data/Rakefile +1 -1
data/VERSION +1 -1
data/lib/bio-statsample-timeseries.rb +1 -1
data/lib/bio-statsample-timeseries/arima.rb +162 -9
data/lib/bio-statsample-timeseries/timeseries.rb +125 -13
data/lib/bio-statsample-timeseries/timeseries/pacf.rb +74 -15
data/lib/bio-statsample-timeseries/utility.rb +118 -0
data/test/test_arima_simulators.rb +18 -8
data/test/test_matrix.rb +92 -0
data/test/test_tseries.rb +1 -1
metadata +6 -5
data/lib/bio-statsample-timeseries/statsample-timeseries.rb +0 -2

data/README.rdoc CHANGED

@@ -1,8 +1,8 @@
 = bio-statsample-timeseries
 {<img
-src="https://secure.travis-ci.org/ankurgel/bioruby-statsample-timeseries.png"
-/>}[http://travis-ci.org/#!/ankurgel/bioruby-statsample-timeseries]
+src="https://secure.travis-ci.org/AnkurGel/bioruby-statsample-timeseries.png"
+/>}[http://travis-ci.org/#!/AnkurGel/bioruby-statsample-timeseries]
 Full description goes here
@@ -27,7 +27,7 @@ the source tree.
 Information on the source tree, documentation, issues and how to contribute, see
-  http://github.com/ankurgel/bioruby-statsample-timeseries
+  http://github.com/AnkurGel/bioruby-statsample-timeseries
 The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.

data/Rakefile CHANGED

@@ -15,7 +15,7 @@ require 'jeweler'
 Jeweler::Tasks.new do |gem|
   # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
   gem.name = "bio-statsample-timeseries"
-  gem.homepage = "http://github.com/ankurgel/bioruby-statsample-timeseries"
+  gem.homepage = "http://github.com/AnkurGel/bioruby-statsample-timeseries"
   gem.license = "MIT"
   gem.summary = %Q{TimeSeries modules for Statsample}
   gem.description = %Q{Statsample-timeseries is an extension to Statsample. It incorporates helpful timeseries functions and modules like ARMA, ARIMA, acf, pacf, lags etc.}

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.1
1	+ 0.1.2

data/lib/bio-statsample-timeseries.rb CHANGED

@@ -8,9 +8,9 @@
 #
 # In this file only require other files. Avoid other source code.
-require 'bio-statsample-timeseries/statsample-timeseries.rb'
 require 'statsample'
 require_relative 'bio-statsample-timeseries/timeseries.rb'
 require_relative 'bio-statsample-timeseries/arima.rb'
+require_relative 'bio-statsample-timeseries/utility.rb'

data/lib/bio-statsample-timeseries/arima.rb CHANGED

@@ -1,15 +1,29 @@
 #require 'debugger'
 module Statsample
-  module ARIMA
+  module TimeSeries
+    def self.arima
+      #not passing (ds,p,i,q) elements for now
+      #will do that once #arima is ready for all modelling
+      Statsample::TimeSeries::ARIMA.new
+    end
     class ARIMA < Statsample::Vector
       include Statsample::TimeSeries
+      # SUGGESTION: We could use an API similar to R
+      #             like
+      #             ar_obj=Statsample::TimeSeries.arima(ds,p,i,q)
+      # 			which calls
+      # 			Statsample::TimeSeries::Arima.new(ds,p,i,q)
       def arima(ds, p, i, q)
         #prototype
+        # ISSUE: We should differenciate now, if i>0.
+        #	     The result should be send to next step
         if q.zero?
           self.ar(p)
         elsif p.zero?
           self.ma(p)
+          # ISSUE-> ELSE -> simultaneuos estimation of MA and AR parameters
         end
       end
@@ -20,15 +34,49 @@ module Statsample
         #or Burg's algorithm(more efficient)
       end
-      def yule_walker()
-        #To be implemented
-      end
+      #Converts a linear array into a vector
       def create_vector(arr)
         Statsample::Vector.new(arr, :scale)
       end
-      #tentative AR(p) simulator
+      def yule_walker(ts, n, k)
+        #parameters: timeseries, no of observations, order
+        #returns: simulated autoregression with phi parameters and sigma
+        phi, sigma = Pacf::Pacf.yule_walker(ts, k)
+        return phi, sigma
+        #return ar_sim(n, phi, sigma)
+      end
+      def levinson_durbin(ts, n, k)
+        #parameters;
+        #ts: timseries against which to generate phi coefficients
+        #n: number of observations for simulation
+        #k: order of AR
+        intermediate = Pacf::Pacf.levinson_durbin(ts, k)
+        phi, sigma = intermediate[1], intermediate[0]
+        return phi, sigma
+        #return ar_sim(n, phi, sigma)
+      end
+      #=Autoregressive Simulator
+      #Simulates an autoregressive AR(p) model with specified number of
+      #observations(n), with phi number of values for order p and sigma.
+      #
+      #*Analysis*:  http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
+      #
+      #*Parameters*:
+      #-_n_::integer, number of observations
+      #-_phi_::array of phi values, e.g: [0.35, 0.213] for p = 2
+      #-_sigma_::float, sigma value for error generalization
+      #
+      #*Usage*:
+      #  ar = ARIMA.new
+      #  ar.ar_sim(1500, [0.3, 0.9], 0.12)
+      #    # => AR(2) autoregressive series of 1500 values
+      #
+      #*Returns*:
+      #Array of generated autoregressive series against attributes
       def ar_sim(n, phi, sigma)
         #using random number generator for inclusion of white noise
         err_nor = Distribution::Normal.rng(0, sigma)
@@ -58,7 +106,21 @@ module Statsample
         x - buffer
       end
-      #moving average simulator
+      #=Moving Average Simulator
+      #Simulates a moving average model with specified number of
+      #observations(n), with theta values for order k and sigma
+      #
+      #*Parameters*:
+      #-_n_::integer, number of observations
+      #-_theta_::array of floats, e.g: [0.23, 0.732], must be < 1
+      #-_sigma_::float, sigma value for whitenoise error
+      #
+      #*Usage*:
+      #  ar = ARIMA.new
+      #  ar.ma_sim(1500, [0.23, 0.732], 0.27)
+      #
+      #*Returns*:
+      #Array of generated MA(q) model
       def ma_sim(n, theta, sigma)
         #n is number of observations (eg: 1000)
         #theta are the model parameters containting q values
@@ -84,7 +146,28 @@ module Statsample
         x
       end
-      #arma simulator
+      #ARMA(Autoregressive and Moving Average) Simulator
+      #ARMA is represented by:
+      #http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
+      #This simulates the ARMA model against p, q and sigma.
+      #If p = 0, then model is pure MA(q),
+      #If q = 0, then model is pure AR(p),
+      #otherwise, model is ARMA(p, q) represented by above.
+      #
+      #Detailed analysis: http://ankurgoel.com/blog/2013/07/20/ar-ma-arma-acf-pacf-visualizations/
+      #
+      #*Parameters*:
+      #-_n_::integer, number of observations
+      #-_p_::array, contains p number of phi values for AR(p) process
+      #-_q_::array, contains q number of theta values for MA(q) process
+      #-_sigma_::float, sigma value for whitenoise error generation
+      #
+      #*Usage*:
+      #  ar = ARIMA.new
+      #  ar.arma_sim(1500, [0.3, 0.272], [0.8, 0.317], 0.92)
+      #
+      #*Returns*:
+      #array of generated ARMA model values
       def arma_sim(n, p, q, sigma)
         #represented by :
         #http://upload.wikimedia.org/math/2/e/d/2ed0485927b4370ae288f1bc1fe2fc8b.png
@@ -119,6 +202,76 @@ module Statsample
         end
         x - buffer
       end
+      #=Hannan-Rissanen for ARMA fit
+      def self.hannan(ts, p, q, k)
+        start_params = create_vector(Array.new(p+q+k, 0))
+        ts_dup = ts.dup
+      end
+    end
+    module Arima
+      class KalmanFilter < Statsample::Vector
+        include Statsample::TimeSeries
+        #=T
+        #The coefficient matrix for the state vector in state equation
+        # It's dimensions is r+k x r+k
+        #*Parameters*
+        #-_r_::integer, r is max(p, q+1), where p and q are orders of AR and MA respectively
+        #-_k_::integer, number of exogeneous variables in ARMA model
+        #-_q_::integer, The AR coefficient of ARMA model
+        #*References*: Statsmodels tsa, Durbin and Koopman Section 4.7
+        def self.T(r, k, p)
+          arr = Matrix.zero(r)
+          params_padded  = Statsample::Vector.new(Array.new(r, 0), :scale)
+          params_padded[0...p] = params[k...(p+k)]
+          intermediate_matrix = (r-1).times.map { Array.new(r, 0) }
+          #appending an array filled with padded values in beginning
+          intermediate_matrix[0,0] = [params_padded]
+          #now generating column matrix for that:
+          arr = Matrix.columns(intermediate_matrix)
+          arr_00 = arr[0,0]
+          #identify matrix substituition in matrix except row[0] and column[0]
+          r.times do |i|
+            arr[r,r] = 1
+          end
+          arr[0,0] = arr_00
+          arr
+        end
+        #=R
+        #The coefficient matrix for the state vector in the observation matrix.
+        #It's dimension is r+k x 1
+        #*Parameters*
+        #-_r_::integer, r is max(p, q+1) where p and q are order of AR and MA respectively
+        #-_k_::integer, number of exogeneous variables in ARMA model
+        #-_q_::integer, The MA order in ARMA model
+        #-_p_::integer, The AR order in ARMA model
+        #*References*: Statsmodels tsa, Durbin and Koopman
+        def self.R(r, k, q, p)
+          arr = Matrix.column_vector(Array.new(r,0.0))
+          #pending - in kind of difficult end here;
+        end
+        #=Z
+        #The Z selector matrix
+        #*Parameters*
+        #-_r_::integer, max(p, q+1)
+        #Returns: vector
+        def self.Z(r)
+          arr = Statsample::Vector.new(Array.new(r, 0.0), :scale)
+          arr[0] = 1.0
+          return arr
+        end
+      end
     end
   end
 end

data/lib/bio-statsample-timeseries/timeseries.rb CHANGED

@@ -3,7 +3,7 @@ module Statsample::TimeSeriesShorthands
   # Creates a new Statsample::TimeSeries object
   # Argument should be equal to TimeSeries.new
   def to_time_series(*args)
-    Statsample::TimeSeries::TimeSeries.new(self, :scale, *args)
+    Statsample::TimeSeries::Series.new(self, :scale, *args)
   end
   alias :to_ts :to_time_series
@@ -17,7 +17,7 @@ module Statsample
   module TimeSeries
     # Collection of data indexed by time.
     # The order goes from earliest to latest.
-    class TimeSeries < Statsample::Vector
+    class Series < Statsample::Vector
       include Statsample::TimeSeries::Pacf
       # Calculates the autocorrelation coefficients of the series.
       #
@@ -31,7 +31,7 @@ module Statsample
       #  ts.acf   # => array with first 21 autocorrelations
       #  ts.acf 3 # => array with first 3 autocorrelations
       #
-      def acf max_lags = nil
+      def acf(max_lags = nil)
         max_lags ||= (10 * Math.log10(size)).to_i
         (0..max_lags).map do |i|
@@ -47,14 +47,95 @@ module Statsample
         end
       end
-      def pacf(max_lags = nil, method = 'yw')
+      #=Partial Autocorrelation
+      #Generates partial autocorrelation series for a timeseries
+      #*Parameters*:
+      #-_max_lags_::integer, optional - provide number of lags
+      #-_method_::string. Default: 'yw'.
+      #    * _yw_:: For yule-walker algorithm unbiased approach
+      #    * _mle_:: For Maximum likelihood algorithm approach
+      #    * _ld_:: Forr Levinson-Durbin recursive approach
+      #Returns - array of pacf
+      #
+      def pacf(max_lags = nil, method = :yw)
         #parameters:
         #max_lags => maximum number of lags for pcf
         #method => for autocovariance in yule_walker:
           #'yw' for 'yule-walker unbaised', 'mle' for biased maximum likelihood
+          #'ld' for Levinson-Durbin recursion
+        method = method.downcase.to_sym
         max_lags ||= (10 * Math.log10(size)).to_i
-       Pacf::Pacf.pacf_yw(self, max_lags, method)
+        if method.eql? :yw or method.eql? :mle
+          Pacf::Pacf.pacf_yw(self, max_lags, method.to_s)
+        elsif method == :ld
+          series = self.acvf
+          Pacf::Pacf.levinson_durbin(series, max_lags, true)[2]
+        else
+          raise "Method presents for pacf are 'yw', 'mle' or 'ld'"
+        end
+      end
+      #=Autoregressive estimation
+      #Generates AR(k) series for the calling timeseries by yule walker.
+      #*Parameters*:
+      #-_n_::integer, (default = 1500) number of observations for AR.
+      #-_k_::integer, (default = 1) order of AR process.
+      #*Returns*:
+      #Array constituting estimated AR series.
+      #
+      def ar(n = 1500, k = 1)
+        series = Statsample::TimeSeries.arima
+        #series = Statsample::TimeSeries::ARIMA.new
+        series.yule_walker(self, n, k)
+      end
+      #=AutoCovariance
+      #Provides autocovariance of timeseries.
+      #-Parameters:
+      #demean = true; optional. Supply false if series is not to be demeaned
+      #unbiased = true; optional. true/false for unbiased/biased form of autocovariance
+      #-Returns-: Autocovariance value
+      #
+      def acvf(demean = true, unbiased = true)
+        #TODO: change parameters list in opts.merge as suggested by John
+        #functionality: computes autocovariance of timeseries data
+        #returns: array of autocovariances
+        if demean
+          demeaned_series = self - self.mean
+        else
+          demeaned_series = self
+        end
+        n = self.acf.size
+        m = self.mean
+        if unbiased
+          d = Array.new(self.size, self.size)
+        else
+          d = ((1..self.size).to_a.reverse)[0..n]
+        end
+        0.upto(n - 1).map do |i|
+          (demeaned_series * (self.lag(i) - m)).sum / d[i]
+        end
+      end
+      #=Correlation
+      #Gives correlation of timeseries.
+      #
+      def correlate(a, v, mode = 'full')
+        #peforms cross-correlation of two series
+        #multiarray.correlate2(a, v, 'full')
+        if a.size < v.size
+          raise("Should have same size!")
+        end
+        ps = a.size + v.size - 1
+        a_padded = Array.new(ps, 0)
+        a_padded[0...a.size] = a
+        out = (mode.downcase.eql? 'full') ? Array.new(ps) : Array.new(a.size)
+        #ongoing
       end
       # Lags the series by k periods.
@@ -71,7 +152,7 @@ module Statsample
       #  ts.lag   # => [nil, 0.69, 0.23, 0.44, ...]
       #  ts.lag 2 # => [nil, nil, 0.69, 0.23, ...]
       #
-      def lag k = 1
+      def lag(k = 1)
         return self if k == 0
         dup.tap do |lagged|
@@ -86,13 +167,14 @@ module Statsample
         end
       end
+      #=Diff
       # Performs a first difference of the series.
       #
       # The convention is to set the oldest observations (the first ones
       # in the series) to nil so that the size of the diffed series is the
       # same as the original.
       #
-      # Usage:
+      #*Usage*:
       #
       #  ts = (1..10).map { rand }.to_ts
       #            # => [0.69, 0.23, 0.44, 0.71, ...]
@@ -103,17 +185,23 @@ module Statsample
         self - self.lag
       end
-      # Calculates a moving average of the series using the provided
+      #=Moving Average
+      # Calculates the moving average of the series using the provided
       # lookback argument. The lookback defaults to 10 periods.
+      #*Parameters*:
+      #-_n_::integer, (default = 10) - loopback argument
       #
-      # Usage:
+      #*Usage*:
       #
       #   ts = (1..100).map { rand }.to_ts
       #            # => [0.69, 0.23, 0.44, 0.71, ...]
       #
       #   # first 9 observations are nil
       #   ts.ma    # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
-      def ma n = 10
+      #
+      #*Returns*:
+      #Resulting moving average timeseries object
+      def ma(n = 10)
         return mean if n >= size
         ([nil] * (n - 1) + (0..(size - n)).map do |i|
@@ -121,6 +209,7 @@ module Statsample
         end).to_time_series
       end
+      #=Exponential Moving Average
       # Calculates an exponential moving average of the series using a
       # specified parameter. If wilder is false (the default) then the EMA
       # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
@@ -130,14 +219,22 @@ module Statsample
       # use a lot more than n observations to calculate. The series is stable
       # if the size of the series is >= 3.45 * (n + 1)
       #
-      # Usage:
+      #*Parameters*:
+      #-_n_::integer, (default = 10)
+      #-_wilder_::boolean, (default = false), if true, 1/n value is used for smoothing;
+      #if false, uses 2/(n+1) value
+      #
+      #*Usage*:
       #
       #   ts = (1..100).map { rand }.to_ts
       #            # => [0.69, 0.23, 0.44, 0.71, ...]
       #
       #   # first 9 observations are nil
       #   ts.ema   # => [ ... nil, 0.509... , 0.433..., ... ]
-      def ema n = 10, wilder = false
+      #
+      #*Returns*:
+      #EMA timeseries
+      def ema(n = 10, wilder = false)
         smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
         # need to start everything from the first non-nil observation
@@ -156,9 +253,24 @@ module Statsample
         base.to_time_series
       end
+      #=Moving Average Convergence-Divergence
       # Calculates the MACD (moving average convergence-divergence) of the time
       # series - this is a comparison of a fast EMA with a slow EMA.
-      def macd fast = 12, slow = 26, signal = 9
+      #
+      # *Parameters*:
+      # -_fast_::integer, (default = 12) - fast component of MACD
+      # -_slow_::integer, (default = 26) - slow component of MACD
+      # -_signal_::integer, (default = 9) - signal component of MACD
+      #
+      # *Usage*:
+      # ts = (1..100).map { rand }.to_ts
+      #            # => [0.69, 0.23, 0.44, 0.71, ...]
+      # ts.macd(13)
+      #
+      # *Returns*:
+      # Array of two timeseries - comparison of fast EMA with slow
+      # and EMA with signal value
+      def macd(fast = 12, slow = 26, signal = 9)
         series = ema(fast) - ema(slow)
         [series, series.ema(signal)]
       end

data/lib/bio-statsample-timeseries/timeseries/pacf.rb CHANGED

@@ -8,24 +8,79 @@ module Statsample
           #Inspiration: StatsModels
           pacf = [1.0]
           (1..max_lags).map do |i|
-            pacf << yule_walker(timeseries, i, method)[-1]
+            pacf << yule_walker(timeseries, i, method)[0][-1]
           end
           pacf
         end
+        #=Levinson-Durbin Algorithm
+        #*Parameters*:
+        #-_series_ : timeseries, or a series of autocovariances
+        #-_nlags_: integer(default: 10): largest lag to include in recursion or order of the AR process
+        #-_is_acovf_: boolean(default: false): series is timeseries if it is false, else contains autocavariances
+        #*returns*:
+        #-_sigma_v_: estimate of the error variance
+        #-_arcoefs_: AR coefficients
+        #-_pacf_: pacf function
+        #-_sigma_: some function
+        def self.levinson_durbin(series, nlags = 10, is_acovf = false)
+          if is_acovf
+            series = series.map(&:to_f)
+          else
+            #nlags = order(k) of AR in this case
+            series = series.acvf.map(&:to_f)[0..nlags]
+          end
+          #phi = Array.new((nlags+1), 0.0) { Array.new(nlags+1, 0.0) }
+          order = nlags
+          phi = Matrix.zero(nlags + 1)
+          sig = Array.new(nlags+1)
+          #setting initial point for recursion:
+          phi[1,1] = series[1]/series[0]
+          #phi[1][1] = series[1]/series[0]
+          sig[1] = series[0] - phi[1, 1] * series[1]
+          2.upto(order).each do |k|
+            phi[k, k] = (series[k] - (Statsample::Vector.new(phi[1...k, k-1]) * series[1...k].reverse.to_ts).sum) / sig[k-1]
+            #some serious refinement needed in above for matrix manipulation. Will do today
+            1.upto(k-1).each do |j|
+              phi[j, k] = phi[j, k-1] - phi[k, k] * phi[k-j, k-1]
+            end
+            sig[k] = sig[k-1] * (1-phi[k, k] ** 2)
+          end
+          sigma_v = sig[-1]
+          arcoefs_delta = phi.column(phi.column_size - 1)
+          arcoefs = arcoefs_delta[1..arcoefs_delta.size]
+          pacf = diag(phi)
+          pacf[0] = 1.0
+          return [sigma_v, arcoefs, pacf, sig, phi]
+        end
+        def self.diag(mat)
+          #returns array of diagonal elements of a matrix.
+          #will later abstract it to matrix.rb in Statsample
+          return mat.each_with_index(:diagonal).map { |x, r, c| x }
+        end
+        #=Yule Walker Algorithm
+        #From the series, estimates AR(p)(autoregressive) parameter
+        #using Yule-Waler equation. See -
+        #http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
+        #*Parameters*:
+        #-_ts_::timeseries
+        #-_k_::order, default = 1
+        #-_method_:: can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator is (n - k)
+        #*returns*:
+        #-_rho_:: autoregressive coefficients
+        #-_sigma_:: sigma parameter
         def self.yule_walker(ts, k = 1, method='yw')
-          #From the series, estimates AR(p)(autoregressive) parameter
-          #using Yule-Waler equation. See -
-          #http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
-          #parameters:
-          #ts = series
-          #k = order, default = 1
-          #method = can be 'yw' or 'mle'. If 'yw' then it is unbiased, denominator
-          #is (n - k)
-          #returns:
-          #rho => autoregressive coefficients
           ts = ts - ts.mean
           n = ts.size
           if method.downcase.eql? 'yw'
@@ -37,7 +92,7 @@ module Statsample
             denom =->(k) { n }
           end
           r = Array.new(k + 1) { 0.0 }
-          r[0] = ts.map { |x| x ** 2 }.inject(:+).to_f / denom.call(0).to_f
+          r[0] = ts.map { |x| x**2 }.inject(:+).to_f / denom.call(0).to_f
           1.upto(k) do |l|
             r[l] = (ts[0...-l].zip(ts[l...ts.size])).map do |x|
@@ -48,7 +103,11 @@ module Statsample
           r_R = toeplitz(r[0...-1])
           mat = Matrix.columns(r_R).inverse()
-          solve_matrix(mat, r[1..r.size])
+          phi = solve_matrix(mat, r[1..r.size])
+          phi_vector = Statsample::Vector.new(phi, :scale)
+          r_vector = Statsample::Vector.new(r[1..r.size], :scale)
+          sigma = r[0] - (r_vector * phi_vector).sum
+          return [phi, sigma]
         end
         def self.toeplitz(arr)

data/lib/bio-statsample-timeseries/utility.rb ADDED

@@ -0,0 +1,118 @@
+module Statsample
+  class Vector
+    include Enumerable
+    include Writable
+    include Summarizable
+    #=Squares of sum
+    #---
+    #parameter:
+    #-demean::boolean - optional. __default__: false
+    #Sums the timeseries and then returns the square
+    def squares_of_sum(demean = false)
+      if demean
+        m = self.mean
+        self.map { |x| (x-m) }.sum ** 2
+      else
+        return self.sum.to_f ** 2
+      end
+    end
+  end
+  class ::Matrix
+    #=Squares of sum
+    #---
+    #Does squares of sum in column order.
+    #Necessary for computations in various processes
+    def squares_of_sum
+      (0...column_size).map do |j|
+        self.column(j).sum ** 2
+      end
+    end
+    #=Checks if given matrix is symmetric or not
+    #---
+    #returns bool
+    #`symmetric?` is present in Ruby Matrix 1.9.3+, but not in 1.8.*
+    def symmetric?
+      return false unless square?
+      (0...row_size).each do |i|
+        0.upto(i).each do |j|
+          return false if self[i, j] != self[j, i]
+        end
+      end
+      true
+    end
+    #=Cholesky decomposition
+    #Reference: http://en.wikipedia.org/wiki/Cholesky_decomposition
+    #---
+    #==Description
+    #Cholesky decomposition is reprsented by `M = L X L*`, where
+    #M is the symmetric matrix and `L` is the lower half of cholesky matrix,
+    #and `L*` is the conjugate form of `L`.
+    #*Returns* : Cholesky decomposition for a given matrix(if symmetric)
+    #*Utility*: Essential matrix function, requisite in kalman filter, least squares
+    def cholesky
+      raise ArgumentError, "Given matrix should be symmetric" unless symmetric?
+      c = Matrix.zero(row_size)
+      0.upto(row_size - 1).each do |k|
+        0.upto(row_size - 1).each do |i|
+          if i == k
+            sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[k, j] ** 2 }
+            value = Math.sqrt(self[k,k] - sum)
+            c[k, k] = value
+          elsif i > k
+            sum = (0..(k-1)).inject(0.0){ |sum, j| sum + c[i, j] * c[k, j] }
+            value = (self[k,i] - sum) / c[k, k]
+            c[i, k] = value
+          end
+        end
+      end
+      c
+    end
+    #=Chain Product
+    #Class method
+    #Returns the chain product of two matrices
+    #==Usage:
+    #Let `a` be 4 * 3 matrix,
+    #Let `b` be 3 * 3 matrix,
+    #Let `c` be 3 * 1 matrix,
+    #then `Matrix.chain_dot(a, b, c)`
+    #===*NOTE*: Send the matrices in multiplicative order with proper dimensions
+    def self.chain_dot(*args)
+      #inspired by Statsmodels
+      begin
+        args.reduce { |x, y| x * y } #perform matrix multiplication in order
+      rescue ExceptionForMatrix::ErrDimensionMismatch
+        puts "ExceptionForMatrix: Please provide matrices with proper multiplicative dimensions"
+      end
+    end
+    #=Adds a column of constants.
+    #Appends a column of ones to the matrix/array if first argument is false
+    #If an n-array, first checks if one column of ones is already present
+    #if present, then original(self) is returned, else, prepends with a vector of ones
+    def add_constant(prepend = true)
+      #for Matrix
+      (0...column_size).each do |i|
+        if self.column(i).map(&:to_f) == Object::Vector.elements(Array.new(row_size, 1.0))
+          return self
+        end
+      end
+      #append/prepend a column of one's
+      vectors = (0...row_size).map do |r|
+        if prepend
+          [1.0].concat(self.row(r).to_a)
+        else
+          self.row(r).to_a.push(1.0)
+        end
+      end
+      return Matrix.rows(vectors)
+    end
+  end
+end

data/test/test_arima_simulators.rb CHANGED

@@ -11,10 +11,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
     ts.pacf
   end
   context("AR(1) simulations") do
-    include Statsample::ARIMA
+    include Statsample
     setup do
-      @series = ARIMA.new
+      @series = TimeSeries.arima
       @ar_1_positive = @series.ar_sim(1500, [0.9], 2)
       @ar_1_negative = @series.ar_sim(1500, [-0.9], 2)
@@ -73,10 +73,10 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
   end
   context("AR(p) simulations") do
-    include Statsample::ARIMA
+    include Statsample
     setup do
-      @series = ARIMA.new
+      @series = TimeSeries.arima
       @ar_p_positive = @series.ar_sim(1500, [0.3, 0.5], 2)
       @ar_p_negative = @series.ar_sim(1500, [-0.3, -0.5], 2)
     end
@@ -120,9 +120,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
   context("MA(1) simulations") do
-    include Statsample::ARIMA
+    include Statsample
     setup do
-      @series = ARIMA.new
+      @series = TimeSeries.arima
       @ma_positive = @series.ar_sim(1500, [0.5], 2)
       @ma_negative = @series.ar_sim(1500, [-0.5], 2)
     end
@@ -153,9 +153,9 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
   end
   context("MA(q) simulations") do
-    include Statsample::ARIMA
+    include Statsample
     setup do
-      @series = ARIMA.new
+      @series = TimeSeries.arima
       @ma_positive = @series.ar_sim(1500, [0.5, 0.3, 0.2], 2)
       @ma_negative = @series.ar_sim(1500, [-0.5], 2)
     end
@@ -172,5 +172,15 @@ class StatsampleArimaSimulatorsTest < MiniTest::Unit::TestCase
       #visualization: http://jsfiddle.net/7keHK/2/
     end
   end
+  context("Yule walker estimations") do
+    include Statsample
+    setup do
+      @timeseries = 100.times.map { rand }.to_ts
+      @arma_simulation =->(n) { @timeseries.ar(n, k)}
+    end
+    #to write test
+  end
 end

data/test/test_matrix.rb ADDED

@@ -0,0 +1,92 @@
+require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
+class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
+  def setup_square_matrix(arr, n)
+    #returns n * n matrix by slicing arr
+    return Matrix.rows(arr.each_slice(n).to_a)
+  end
+  def setup
+    @arr_square = (1..16)
+    @mat_non_symmetric = setup_square_matrix(@arr_square, 4)
+    @arr_non_square = (1..12).to_a
+    #this is a 4 X 3 matrix
+    @mat_non_square = Matrix.rows(@arr_non_square.each_slice(3).to_a)
+  end
+  #TESTS for matrix symmetricity - Matrix#symmetric?
+  context("symmetric?") do
+    should "return false for non-symmetric matrix" do
+      assert_equal @mat_non_symmetric.symmetric?, false
+    end
+    should "return false for non-square matrix" do
+      assert_equal @mat_non_square.symmetric?, false
+    end
+    should "return true for symmetrix matrix" do
+      arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
+      mat = setup_square_matrix(arr, 3)
+      assert_equal mat.symmetric?, true
+    end
+  end
+  #TESTS for cholesky decomposition - Matrix#cholesky
+  context("Cholesky Decomposition") do
+    should "raise error for non symmetric matrix" do
+      assert_raises(ArgumentError) { @mat_non_symmetric.cholesky }
+    end
+    should "raise raise error if non-square matix" do
+      arr = (1..12).to_a
+      mat = Matrix.rows(arr.each_slice(3).to_a)
+      assert_raises(ArgumentError) { @mat_non_square.cholesky }
+    end
+    should "give hermitian cholesky decomposed matrix for symmetrix matrix" do
+       arr = %w[4 12 -16 12 37 -43 -16 -43 93].map(&:to_i)
+       mat = setup_square_matrix(arr, 3)
+       assert_equal Matrix[[2.0, 0, 0], [6.0, 1.0, 0], [-8.0, 5.0, 2.0]], mat.cholesky
+    end
+  end
+  #TESTS for matrix squares of sum - Matrix#squares_of_sum
+  context("Squares of sum") do
+    should "return array of size 4 for matrix - #{@mat_non_symmetric}" do
+      #equal to column size
+      assert_equal @mat_non_symmetric.squares_of_sum.size, 4
+    end
+    should "return [784, 1024, 1296, 1600] for matrix - #{@mat_non_symmetric}" do
+      assert_equal @mat_non_symmetric.squares_of_sum, [784, 1024, 1296, 1600]
+    end
+  end
+  #TESTS for adding constants to matrix
+  context("Add constant") do
+    should "prepend all rows with ones" do
+      mat = @mat_non_symmetric.add_constant
+      assert_equal @mat_non_symmetric.column_size, 4
+      assert_equal mat.column_size, 5
+      assert_equal mat.column(0).to_a, [1.0, 1.0,1.0,1.0]
+    end
+    should "append all rows with ones if prepend = false" do
+      mat = @mat_non_symmetric.add_constant(false)
+      assert_equal @mat_non_symmetric.column_size, 4
+      assert_equal mat.column_size, 5
+      assert_equal mat.column(mat.column_size - 1).to_a, [1.0, 1.0,1.0,1.0]
+    end
+    should "not append/prepend if a column of ones already exists in matrix" do
+      matrix = Matrix[[1, 2, 1, 4], [5, 6, 1, 8], [9, 10, 1, 12]]
+      const_mat = matrix.add_constant
+      assert_equal matrix.column_size, const_mat.column_size
+      assert_equal matrix.row_size, const_mat.row_size
+    end
+  end
+end

data/test/test_tseries.rb CHANGED

@@ -7,7 +7,7 @@ class StatsampleTestTimeSeries < MiniTest::Unit::TestCase
   def setup
     # daily closes of iShares XIU on the TSX
-    @xiu = Statsample::TimeSeries::TimeSeries.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
+    @xiu = Statsample::TimeSeries::Series.new [17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
       16.86, 16.86, 16.56, 16.36, 16.66, 16.77], :scale
   end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: bio-statsample-timeseries
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
   prerelease:
 platform: ruby
 authors:
@@ -10,7 +10,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-07-26 00:00:00.000000000 Z
+date: 2013-09-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: statsample
@@ -216,16 +216,17 @@ files:
 - features/support/env.rb
 - lib/bio-statsample-timeseries.rb
 - lib/bio-statsample-timeseries/arima.rb
-- lib/bio-statsample-timeseries/statsample-timeseries.rb
 - lib/bio-statsample-timeseries/timeseries.rb
 - lib/bio-statsample-timeseries/timeseries/pacf.rb
+- lib/bio-statsample-timeseries/utility.rb
 - test/fixtures/stock_data.csv
 - test/helper.rb
 - test/test_arima_simulators.rb
+- test/test_matrix.rb
 - test/test_pacf.rb
 - test/test_tseries.rb
 - test/test_wald.rb
-homepage: http://github.com/ankurgel/bioruby-statsample-timeseries
+homepage: http://github.com/AnkurGel/bioruby-statsample-timeseries
 licenses:
 - MIT
 post_install_message:
@@ -240,7 +241,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 146794323
+      hash: -122253519
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:

data/lib/bio-statsample-timeseries/statsample-timeseries.rb DELETED

	@@ -1,2 +0,0 @@
1	-
2	-