RubyGems - statsample - Versions diffs - 2.0.2 → 2.1.0 - Mend

statsample 2.0.2 → 2.1.0

Files changed (41) hide show

checksums.yaml +4 -4
data/History.txt +7 -0
data/README.md +2 -4
data/Rakefile +6 -0
data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +0 -1
data/examples/correlation_matrix.rb +0 -16
data/examples/dataset.rb +0 -7
data/examples/dominance_analysis_bootstrap.rb +0 -6
data/examples/reliability.rb +0 -2
data/lib/statsample.rb +3 -2
data/lib/statsample/anova/oneway.rb +1 -1
data/lib/statsample/bivariate.rb +4 -4
data/lib/statsample/converter/spss.rb +1 -1
data/lib/statsample/crosstab.rb +3 -3
data/lib/statsample/daru.rb +1 -3
data/lib/statsample/factor/parallelanalysis.rb +1 -3
data/lib/statsample/formula/fit_model.rb +46 -0
data/lib/statsample/formula/formula.rb +306 -0
data/lib/statsample/matrix.rb +0 -2
data/lib/statsample/regression.rb +1 -3
data/lib/statsample/regression/multiple/alglibengine.rb +1 -1
data/lib/statsample/regression/multiple/gslengine.rb +1 -1
data/lib/statsample/regression/multiple/rubyengine.rb +1 -3
data/lib/statsample/reliability.rb +3 -3
data/lib/statsample/reliability/icc.rb +1 -2
data/lib/statsample/reliability/multiscaleanalysis.rb +0 -1
data/lib/statsample/reliability/scaleanalysis.rb +2 -3
data/lib/statsample/reliability/skillscaleanalysis.rb +1 -1
data/lib/statsample/test/levene.rb +4 -4
data/lib/statsample/test/t.rb +10 -10
data/lib/statsample/test/umannwhitney.rb +3 -3
data/lib/statsample/version.rb +1 -1
data/statsample.gemspec +4 -1
data/test/fixtures/df.csv +15 -0
data/test/helpers_tests.rb +7 -0
data/test/test_factor.rb +0 -5
data/test/test_factor_pa.rb +1 -6
data/test/test_fit_model.rb +88 -0
data/test/test_reliability.rb +0 -10
data/test/test_statistics.rb +1 -1
metadata +52 -48

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ee62b72f947f9760824885a479c92ce6dbc55127
-  data.tar.gz: 7cb2c7057856eee78f69be2f7c1e43671cc8007e
+  metadata.gz: e2a80fff135f963dcabfe1593de243611eb8ab9a
+  data.tar.gz: cb2d80e85339201f8a37ea1b8e934953f26b5591
 SHA512:
-  metadata.gz: 26b9d9aab40c4dc700fc4632a30cd195f5f0f0dfe6ab36a84d055f5d0e22ba7992c143ac2507069c02e3ec6e3b02b31f20ae1bf4c1f47a8846339c6d0e0b67b6
-  data.tar.gz: 8eeac7c1f6aca3ed959ff15cfd97407876d46dfb8d9adf13aa892831772b02209142d73a46ac1581e848cf020b86cfccb93e21bd9f450da106d08ed755d5bd1b
+  metadata.gz: b9412e202d3364c6fe7f982a9ceb2828061312ffd0274c7bc25e8b5747abd4b11aca5edd960e22fb6ee8cfec409a6352961c51e283d9523fb608e5d66bf65377
+  data.tar.gz: 5db058e78ae638c155727ca51af3e303503bfd42b518de095aa93e9639194c9a273a8b4e3ae3e069d91e25500612beba48140f2e7024e2c329ba16adeca54bef

data/History.txt CHANGED

@@ -1,3 +1,10 @@
+=== 2.1.0 / 2017-08-10
+  * Update documentation to reflect methods that have been removed (@lokeshh)
+  * Update daru dependency to v0.1.6 (@lokeshh)
+  * Remove pre-daru legacy methods like n_valid, missing value functions (@lokeshh)
+  * Update test suite with rubocop and rake. New tests for methods like Regression (@lokeshh)
+  * Introduce fitting a regression using string formulas (@lokeshh)
 === 2.0.2 / 2016-03-11
   * Update dependencies (spreadsheet, GSL)

data/README.md CHANGED

@@ -11,7 +11,7 @@ Homepage :: https://github.com/sciruby/statsample
 You should have a recent version of GSL and R (with the `irr` and `Rserve` libraries) installed. In Ubuntu:
 ```bash
-$ sudo apt-get install libgs10-dev r-base r-base-dev
+$ sudo apt-get install libgsl0-dev r-base r-base-dev
 $ sudo Rscript -e "install.packages(c('Rserve', 'irr'))"
 ```
@@ -86,7 +86,7 @@ Include:
 - Intra-class correlation
 - Anova: generic and vector-based One-way ANOVA and Two-way ANOVA, with contrasts for One-way ANOVA.
 - Tests: F, T, Levene, U-Mannwhitney.
-- Regression: Simple, Multiple (OLS), Probit  and Logit
+- Regression: Simple, Multiple (OLS)
 - Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis and Velicer's MAP test, for estimation of number of factors.
 - Reliability analysis for simple scale and a DSL to easily analyze multiple scales using factor analysis and correlations, if you want it.
 - Basic time series support
@@ -120,8 +120,6 @@ Include:
 - Multiple types of regression.
   - Simple Regression :  Statsample::Regression::Simple
   - Multiple Regression: Statsample::Regression::Multiple
-  - Logit Regression:    Statsample::Regression::Binomial::Logit
-  - Probit Regression:    Statsample::Regression::Binomial::Probit
 - Factorial Analysis algorithms on Statsample::Factor module.
   - Classes for Extraction of factors:
     - Statsample::Factor::PCA

data/Rakefile CHANGED

@@ -1,4 +1,5 @@
 $:.unshift File.expand_path("../lib/", __FILE__)
+lib_folder = File.expand_path("../lib", __FILE__)
 require 'statsample/version'
 require 'rake'
@@ -36,3 +37,8 @@ task "gettext:makemo" do
   require 'gettext/tools'
   GetText.create_mofiles()
 end
+desc 'Run pry'
+task :pry do |task|
+  sh "pry -r #{lib_folder}/statsample.rb"
+end

data/benchmarks/correlation_matrix_methods/correlation_matrix.rb CHANGED

@@ -60,7 +60,6 @@ end
 rs[:c_v] = rs.collect {|row| row[:cases]*row[:vars]}
-rs.update
 rs.save("correlation_matrix.ds")
 Statsample::Excel.write(rs,"correlation_matrix.xls")

data/examples/correlation_matrix.rb CHANGED

@@ -7,18 +7,6 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
-  # It so happens that Daru::Vector and Daru::DataFrame must update metadata
-  # like positions of missing values every time they are created.
-  #
-  # Since we dont have any missing values in the data that we are creating,
-  # we set Daru.lazy_update = true so that missing data is not updated every
-  # time and things happen much faster.
-  #
-  # In case you do have missing data and lazy_update has been set to *true*,
-  # you _SHOULD_ called `#update` on the concerned Vector or DataFrame object
-  # everytime an assingment or deletion cycle is complete.
-  Daru.lazy_update = true
   # Create a Daru::DataFrame containing 4 vectors a, b, c and d.
   #
   # Notice that the `clone` option has been set to *false*. This tells Daru
@@ -36,10 +24,6 @@ Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
   # Calculate correlation matrix by calling the `cor` shorthand.
   cm = cor(ds)
   summary(cm)
-  # Set lazy_update to *false* once our job is done so that this analysis does
-  # not accidentally affect code elsewhere.
-  Daru.lazy_update = false
 end
 if __FILE__==$0

data/examples/dataset.rb CHANGED

@@ -6,10 +6,6 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 Statsample::Analysis.store(Daru::DataFrame) do
-  # We set lazy_update to *true* so that time is not wasted in updating
-  # metdata every time an assignment happens.
-  Daru.lazy_update = true
   samples = 1000
   # The 'new_with_size' function lets you specify the size of the
@@ -26,9 +22,6 @@ Statsample::Analysis.store(Daru::DataFrame) do
   # order by default.
   ds = Daru::DataFrame.new({:a=>a,:b=>b}, order: [:b, :a])
   summary(ds)
-  # Reset lazy_update to *false* to prevent other code from breaking.
-  Daru.lazy_update = false
 end
 if __FILE__==$0

data/examples/dominance_analysis_bootstrap.rb CHANGED

@@ -3,10 +3,6 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 Statsample::Analysis.store(Statsample::DominanceAnalysis::Bootstrap) do
-  # Remember to call *update* after an assignment/deletion cycle if lazy_update
-  # is *false*.
-  Daru.lazy_update = true
   sample=300
   a=rnorm(sample)
   b=rnorm(sample)
@@ -29,8 +25,6 @@ Statsample::Analysis.store(Statsample::DominanceAnalysis::Bootstrap) do
   dab2=dominance_analysis_bootstrap(ds2, :y1, :debug=>true)
   dab2.bootstrap(100,nil)
   summary(dab2)
-  Daru.lazy_update = false
 end
 if __FILE__==$0

data/examples/reliability.rb CHANGED

@@ -15,8 +15,6 @@ Statsample::Analysis.store(Statsample::Reliability) do
     ds["v#{i}".to_sym]= a + rnorm(samples,0,0.2)
   end
-  ds.update
   rel=Statsample::Reliability::ScaleAnalysis.new(ds)
   summary rel

data/lib/statsample.rb CHANGED

@@ -160,6 +160,7 @@ module Statsample
   autoload(:StratifiedSample, 'statsample/multiset')
   autoload(:MLE, 'statsample/mle')
   autoload(:Regression, 'statsample/regression')
+  autoload(:FitModel, 'statsample/formula/fit_model')
   autoload(:Test, 'statsample/test')
   autoload(:Factor, 'statsample/factor')
   autoload(:Graph, 'statsample/graph')
@@ -206,7 +207,7 @@ module Statsample
     def only_valid(*vs)
       i = 1
       h = vs.inject({}) { |acc, v| acc["v#{i}".to_sym] = v; i += 1; acc }
-      df = Daru::DataFrame.new(h).dup_only_valid
+      df = Daru::DataFrame.new(h).reject_values(*Daru::MISSING_VALUES)
       df.map { |v| v }
     end
@@ -214,7 +215,7 @@ module Statsample
     # If any vectors have missing_values, return only valid.
     # If not, return the vectors itself
     def only_valid_clone(*vs)
-      if vs.any?(&:has_missing_data?)
+      if vs.any? { |v| v.include_values?(*Daru::MISSING_VALUES) }
         only_valid(*vs)
       else
         vs

data/lib/statsample/anova/oneway.rb CHANGED

@@ -164,7 +164,7 @@ module Statsample
           if summary_descriptives
             s.table(:name=>_("Descriptives"),:header=>%w{Name N Mean SD Min Max}.map {|v| _(v)}) do |t|
               @vectors.each do |v|
-                t.row [v.name, v.n_valid, "%0.4f" % v.mean, "%0.4f" %  v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
+                t.row [v.name, v.reject_values(*Daru::MISSING_VALUES).size, "%0.4f" % v.mean, "%0.4f" %  v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
               end
             end
           end

data/lib/statsample/bivariate.rb CHANGED

@@ -159,7 +159,7 @@ module Statsample
       def covariance_matrix(ds)
         vars,cases = ds.ncols, ds.nrows
-        if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
+        if !ds.include_values?(*Daru::MISSING_VALUES) and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
           cm=covariance_matrix_optimized(ds)
         else
           cm=covariance_matrix_pairwise(ds)
@@ -198,7 +198,7 @@ module Statsample
       # Order of rows and columns depends on Dataset#fields order
       def correlation_matrix(ds)
         vars, cases = ds.ncols, ds.nrows
-        if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
+        if !ds.include_values?(*Daru::MISSING_VALUES) and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
           cm=correlation_matrix_optimized(ds)
         else
           cm=correlation_matrix_pairwise(ds)
@@ -248,7 +248,7 @@ module Statsample
         m = vectors.collect do |row|
           vectors.collect do |col|
             if row==col
-              ds[row].only_valid.size
+              ds[row].reject_values(*Daru::MISSING_VALUES).size
             else
               rowa,rowb = Statsample.only_valid_clone(ds[row],ds[col])
               rowa.size
@@ -281,7 +281,7 @@ module Statsample
       # Calculate Point biserial correlation. Equal to Pearson correlation, with
       # one dichotomous value replaced by "0" and the other by "1"
       def point_biserial(dichotomous,continous)
-        ds = Daru::DataFrame.new({:d=>dichotomous,:c=>continous}).dup_only_valid
+        ds = Daru::DataFrame.new({:d=>dichotomous,:c=>continous}).reject_values(*Daru::MISSING_VALUES)
         raise(TypeError, "First vector should be dichotomous") if ds[:d].factors.size != 2
         raise(TypeError, "Second vector should be continous") if ds[:c].type != :numeric
         f0=ds[:d].factors.sort.to_a[0]

data/lib/statsample/converter/spss.rb CHANGED

@@ -7,7 +7,7 @@ module Statsample
       #   ds=Daru::DataFrame.from_excel("my_data.xls")
       #   puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
       def tetrachoric_correlation_matrix(ds)
-        dsv=ds.dup_only_valid
+        dsv=ds.reject_values(*Daru::MISSING_VALUES)
         # Delete all vectors doesn't have variation
         dsv.vectors.each { |f|
           if dsv[f].factors.size==1

data/lib/statsample/crosstab.rb CHANGED

@@ -29,10 +29,10 @@ module Statsample
       @v_cols.factors.sort.reset_index!
     end
     def rows_total
-      @v_rows.frequencies
+      @v_rows.frequencies.to_h
     end
     def cols_total
-      @v_cols.frequencies
+      @v_cols.frequencies.to_h
     end
     def frequencies
@@ -42,7 +42,7 @@ module Statsample
         s[par]=0
         s
       end
-      base.update(Daru::Vector.new(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a).frequencies)
+      base.update(Daru::Vector.new(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a).frequencies.to_h)
     end
     def to_matrix
       f  = frequencies

data/lib/statsample/daru.rb CHANGED

@@ -11,7 +11,7 @@ module Daru
         # ugly patch. The upper limit for a bin has the form
         # x < range
         #h=Statsample::Histogram.new(self, bins)
-        valid = only_valid
+        valid = reject_values(*Daru::MISSING_VALUES)
         min,max=Statsample::Util.nice(valid.min,valid.max)
         # fix last data
         if max == valid.max
@@ -72,7 +72,6 @@ module Daru
       end
       #puts "Ingreso a los dataset"
       ms.datasets.each do |k,ds|
-        ds.update
         ds.rename self[field].index_of(k)
       end
@@ -102,7 +101,6 @@ module Daru
       each_row { |r| p1.call(r) }
       ms.datasets.each do |k,ds|
-        ds.update
         ds.rename(
           fields.size.times.map do |i|
             f  = fields[i]

data/lib/statsample/factor/parallelanalysis.rb CHANGED

@@ -142,8 +142,7 @@ module Statsample
                 raise "bootstrap_method doesn't recogniced"
               end
             end
-            ds_bootstrap.update
             matrix=Statsample::Bivariate.send(matrix_method, ds_bootstrap)
             matrix=matrix.to_gsl if @use_gsl
             if smc
@@ -159,7 +158,6 @@ module Statsample
             redo
           end
         end
-        @ds_eigenvalues.update
       end
       dirty_memoize :number_of_factors, :ds_eigenvalues
       dirty_writer :iterations, :bootstrap_method, :percentil, :smc

data/lib/statsample/formula/fit_model.rb ADDED

@@ -0,0 +1,46 @@
+require 'statsample/formula/formula'
+module Statsample
+  # Class for performing regression
+  class FitModel
+    def initialize(formula, df, opts = {})
+      @formula = FormulaWrapper.new formula, df
+      @df = df
+      @opts = opts
+    end
+    def model
+      @model || fit_model
+    end
+    def predict(new_data)
+      model.predict(df_for_prediction(new_data))
+    end
+    def df_for_prediction df
+      canonicalize_df(df)
+    end
+    def df_for_regression
+      df = canonicalize_df(@df)
+      df[@formula.y.value] = @df[@formula.y.value]
+      df
+    end
+    def canonicalize_df(orig_df)
+      tokens = @formula.canonical_tokens
+      tokens.shift if tokens.first.value == '1'
+      df = tokens.map { |t| t.to_df orig_df }.reduce(&:merge)
+      df
+    end
+    def fit_model
+      # TODO: Add support for inclusion/exclusion of intercept
+      @model = Statsample::Regression.multiple(
+        df_for_regression,
+        @formula.y.value,
+        @opts
+      )
+    end
+  end
+end

data/lib/statsample/formula/formula.rb ADDED

@@ -0,0 +1,306 @@
+module Statsample
+  # This class recognizes what terms are numeric
+  # and accordingly forms groups which are fed to Formula
+  # Once they are parsed with Formula, they are combined back
+  class FormulaWrapper
+    attr_reader :tokens, :y, :canonical_tokens
+    # Initializes formula wrapper object to parse a given formula into
+    # some tokens which do not overlap one another.
+    # @note Specify 0 as a term in the formula if you do not want constant
+    #   to be included in the parsed formula
+    # @param [string] formula to parse
+    # @param [Daru::DataFrame] df dataframe requried to know what vectors
+    #   are numerical
+    # @example
+    #   df = Daru::DataFrame.from_csv 'spec/data/df.csv'
+    #   df.to_category 'c', 'd', 'e'
+    #   formula = Statsample::GLM::FormulaWrapper.new 'y~a+d:c', df
+    #   formula.canonical_to_s
+    #   #=> "1+c(-)+d(-):c+a"
+    def initialize(formula, df)
+      @df = df
+      # @y store the LHS term that is name of vector to be predicted
+      # @tokens store the RHS terms of the formula
+      @y, *@tokens = split_to_tokens(formula)
+      @tokens = @tokens.uniq.sort
+      manage_constant_term
+      @canonical_tokens = non_redundant_tokens
+    end
+    # Returns canonical tokens in a readable form.
+    # @return [String] canonical tokens in a readable form.
+    # @note 'y~a+b(-)' means 'a' exist in full rank expansion
+    #   and 'b(-)' exist in reduced rank expansion
+    # @example
+    #   df = Daru::DataFrame.from_csv 'spec/data/df.csv'
+    #   df.to_category 'c', 'd', 'e'
+    #   formula = Statsample::GLM::FormulaWrapper.new 'y~a+d:c', df
+    #   formula.canonical_to_s
+    #   #=> "1+c(-)+d(-):c+a"
+    def canonical_to_s
+      canonical_tokens.join '+'
+    end
+    # Returns tokens to produce non-redundant design matrix
+    # @return [Array] array of tokens that do not produce redundant matrix
+    def non_redundant_tokens
+      groups = split_to_groups
+      # TODO: An enhancement
+      # Right now x:c appears as c:x
+      groups.each { |k, v| groups[k] = strip_numeric v, k }
+      groups.each { |k, v| groups[k] = Formula.new(v).canonical_tokens }
+      groups.flat_map { |k, v| add_numeric v, k }
+    end
+    private
+    # Removes intercept token if term '0' is found in the formula.
+    # Intercept token remains if term '1' is found.
+    # If neither term '0' nor term '1' is found then, intercept token is added.
+    def manage_constant_term
+      @tokens.unshift Token.new('1') unless
+        @tokens.include?(Token.new('1')) ||
+        @tokens.include?(Token.new('0'))
+      @tokens.delete Token.new('0')
+    end
+    # Groups the tokens to gropus based on the numerical terms
+    # they are interacting with.
+    def split_to_groups
+      @tokens.group_by { |t| extract_numeric t }
+    end
+    # Add numeric interaction term which was removed earlier
+    # @param [Array] tokens tokens on which to add numerical terms
+    # @param [Array] numeric array of numeric terms to add
+    def add_numeric(tokens, numeric)
+      tokens.map do |t|
+        terms = t.interact_terms + numeric
+        if terms == ['1']
+          Token.new('1')
+        else
+          terms = terms.reject { |i| i == '1' }
+          Token.new terms.join(':'), t.full
+        end
+      end
+    end
+    # Strip numerical interacting terms
+    # @param [Array] tokens tokens from which to strip numeric
+    # @param [Array] numeric array of numeric terms to strip from tokens
+    # @return [Array] array of tokens with striped numerical terms
+    def strip_numeric(tokens, numeric)
+      tokens.map do |t|
+        terms = t.interact_terms - numeric
+        terms = ['1'] if terms.empty?
+        Token.new terms.join(':')
+      end
+    end
+    # Extract numeric interacting terms
+    # @param [Statsample::GLM::Token] token form which to extract numeric terms
+    # @return [Array] array of numericl terms
+    def extract_numeric(token)
+      terms = token.interact_terms
+      return [] if terms == ['1']
+      terms.reject { |t| @df[t].category? }
+    end
+    def split_to_tokens(formula)
+      formula = formula.gsub(/\s+/, '')
+      lhs_term, rhs = formula.split '~'
+      rhs_terms = rhs.split '+'
+      ([lhs_term] + rhs_terms).map { |t| Token.new t }
+    end
+  end
+  # To process formula language
+  class Formula
+    attr_reader :tokens, :canonical_tokens
+    def initialize(tokens)
+      @tokens = tokens
+      @canonical_tokens = parse_formula
+    end
+    def canonical_to_s
+      canonical_tokens.join '+'
+    end
+    private
+    def parse_formula
+      @tokens.inject([]) do |acc, token|
+        acc + add_non_redundant_elements(token, acc)
+      end
+    end
+    def add_non_redundant_elements(token, result_so_far)
+      return [token] if token.value == '1'
+      tokens = token.expand
+      result_so_far = result_so_far.flat_map(&:expand)
+      tokens -= result_so_far
+      contract_if_possible tokens
+    end
+    def contract_if_possible(tokens)
+      tokens.combination(2).each do |a, b|
+        result = a.add b
+        next unless result
+        tokens.delete a
+        tokens.delete b
+        tokens << result
+        return contract_if_possible tokens
+      end
+      tokens.sort
+    end
+  end
+  # To encapsulate interaction as well as non-interaction terms
+  class Token
+    attr_reader :value, :full, :interact_terms
+    def initialize(value, full = true)
+      @interact_terms = value.include?(':') ? value.split(':') : [value]
+      @full = coerce_full full
+    end
+    def value
+      interact_terms.join(':')
+    end
+    def size
+      # TODO: Return size 1 for value '1' also
+      # CAn't do this at the moment because have to make
+      # changes in sorting first
+      value == '1' ? 0 : interact_terms.size
+    end
+    def add(other)
+      # ANYTHING + FACTOR- : ANYTHING = FACTOR : ANYTHING
+      # ANYTHING + ANYTHING : FACTOR- = ANYTHING : FACTOR
+      if size > other.size
+        other.add self
+      elsif other.size == 2 &&
+            size == 1 &&
+            other.interact_terms.last == value &&
+            other.full.last == full.first &&
+            other.full.first == false
+        Token.new(
+          "#{other.interact_terms.first}:#{value}",
+          [true, other.full.last]
+        )
+      elsif other.size == 2 &&
+            size == 1 &&
+            other.interact_terms.first == value &&
+            other.full.first == full.first &&
+            other.full.last == false
+        Token.new(
+          "#{value}:#{other.interact_terms.last}",
+          [other.full.first, true]
+        )
+      elsif value == '1' &&
+            other.size == 1
+        Token.new(other.value, true)
+      end
+    end
+    def ==(other)
+      value == other.value &&
+        full == other.full
+    end
+    alias eql? ==
+    def hash
+      value.hash ^ full.hash
+    end
+    def <=>(other)
+      size <=> other.size
+    end
+    def to_s
+      interact_terms
+        .zip(full)
+        .map { |t, f| f ? t : t + '(-)' }
+        .join ':'
+    end
+    def expand
+      case size
+      when 0
+        [self]
+      when 1
+        [Token.new('1'), Token.new(value, false)]
+      when 2
+        a, b = interact_terms
+        [Token.new('1'), Token.new(a, false), Token.new(b, false),
+         Token.new(a + ':' + b, [false, false])]
+      end
+    end
+    def to_df(df)
+      case size
+      when 1
+        if df[value].category?
+          df[value].contrast_code full: full.first
+        else
+          Daru::DataFrame.new value => df[value].to_a
+        end
+      when 2
+        to_df_when_interaction(df)
+      end
+    end
+    private
+    def coerce_full(value)
+      if value.is_a? Array
+        value + Array.new((@interact_terms.size - value.size), true)
+      else
+        [value] * @interact_terms.size
+      end
+    end
+    def to_df_when_interaction(df)
+      case interact_terms.map { |t| df[t].category? }
+      when [true, true]
+        df.interact_code(interact_terms, full)
+      when [false, false]
+        to_df_numeric_interact_with_numeric df
+      when [true, false]
+        to_df_category_interact_with_numeric df
+      when [false, true]
+        to_df_numeric_interact_with_category df
+      end
+    end
+    def to_df_numeric_interact_with_numeric(df)
+      Daru::DataFrame.new value => (df[interact_terms.first] *
+        df[interact_terms.last]).to_a
+    end
+    def to_df_category_interact_with_numeric(df)
+      a, b = interact_terms
+      Daru::DataFrame.new(
+        df[a].contrast_code(full: full.first)
+          .map { |dv| ["#{dv.name}:#{b}", (dv * df[b]).to_a] }
+          .to_h
+      )
+    end
+    def to_df_numeric_interact_with_category(df)
+      a, b = interact_terms
+      Daru::DataFrame.new(
+        df[b].contrast_code(full: full.last)
+          .map { |dv| ["#{a}:#{dv.name}", (dv * df[a]).to_a] }
+          .to_h
+      )
+    end
+  end
+end