RubyGems - statsample - Versions diffs - 2.0.2 → 2.1.0 - Mend

statsample 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

checksums.yaml +4 -4
data/History.txt +7 -0
data/README.md +2 -4
data/Rakefile +6 -0
data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +0 -1
data/examples/correlation_matrix.rb +0 -16
data/examples/dataset.rb +0 -7
data/examples/dominance_analysis_bootstrap.rb +0 -6
data/examples/reliability.rb +0 -2
data/lib/statsample.rb +3 -2
data/lib/statsample/anova/oneway.rb +1 -1
data/lib/statsample/bivariate.rb +4 -4
data/lib/statsample/converter/spss.rb +1 -1
data/lib/statsample/crosstab.rb +3 -3
data/lib/statsample/daru.rb +1 -3
data/lib/statsample/factor/parallelanalysis.rb +1 -3
data/lib/statsample/formula/fit_model.rb +46 -0
data/lib/statsample/formula/formula.rb +306 -0
data/lib/statsample/matrix.rb +0 -2
data/lib/statsample/regression.rb +1 -3
data/lib/statsample/regression/multiple/alglibengine.rb +1 -1
data/lib/statsample/regression/multiple/gslengine.rb +1 -1
data/lib/statsample/regression/multiple/rubyengine.rb +1 -3
data/lib/statsample/reliability.rb +3 -3
data/lib/statsample/reliability/icc.rb +1 -2
data/lib/statsample/reliability/multiscaleanalysis.rb +0 -1
data/lib/statsample/reliability/scaleanalysis.rb +2 -3
data/lib/statsample/reliability/skillscaleanalysis.rb +1 -1
data/lib/statsample/test/levene.rb +4 -4
data/lib/statsample/test/t.rb +10 -10
data/lib/statsample/test/umannwhitney.rb +3 -3
data/lib/statsample/version.rb +1 -1
data/statsample.gemspec +4 -1
data/test/fixtures/df.csv +15 -0
data/test/helpers_tests.rb +7 -0
data/test/test_factor.rb +0 -5
data/test/test_factor_pa.rb +1 -6
data/test/test_fit_model.rb +88 -0
data/test/test_reliability.rb +0 -10
data/test/test_statistics.rb +1 -1
metadata +52 -48

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ee62b72f947f9760824885a479c92ce6dbc55127
-  data.tar.gz: 7cb2c7057856eee78f69be2f7c1e43671cc8007e
+  metadata.gz: e2a80fff135f963dcabfe1593de243611eb8ab9a
+  data.tar.gz: cb2d80e85339201f8a37ea1b8e934953f26b5591
 SHA512:
-  metadata.gz: 26b9d9aab40c4dc700fc4632a30cd195f5f0f0dfe6ab36a84d055f5d0e22ba7992c143ac2507069c02e3ec6e3b02b31f20ae1bf4c1f47a8846339c6d0e0b67b6
-  data.tar.gz: 8eeac7c1f6aca3ed959ff15cfd97407876d46dfb8d9adf13aa892831772b02209142d73a46ac1581e848cf020b86cfccb93e21bd9f450da106d08ed755d5bd1b
+  metadata.gz: b9412e202d3364c6fe7f982a9ceb2828061312ffd0274c7bc25e8b5747abd4b11aca5edd960e22fb6ee8cfec409a6352961c51e283d9523fb608e5d66bf65377
+  data.tar.gz: 5db058e78ae638c155727ca51af3e303503bfd42b518de095aa93e9639194c9a273a8b4e3ae3e069d91e25500612beba48140f2e7024e2c329ba16adeca54bef

data/History.txt CHANGED

@@ -1,3 +1,10 @@
+=== 2.1.0 / 2017-08-10
+  * Update documentation to reflect methods that have been removed (@lokeshh)
+  * Update daru dependency to v0.1.6 (@lokeshh)
+  * Remove pre-daru legacy methods like n_valid, missing value functions (@lokeshh)
+  * Update test suite with rubocop and rake. New tests for methods like Regression (@lokeshh)
+  * Introduce fitting a regression using string formulas (@lokeshh)
 === 2.0.2 / 2016-03-11
   * Update dependencies (spreadsheet, GSL)

data/README.md CHANGED

@@ -11,7 +11,7 @@ Homepage :: https://github.com/sciruby/statsample
 You should have a recent version of GSL and R (with the `irr` and `Rserve` libraries) installed. In Ubuntu:
 ```bash
-$ sudo apt-get install libgs10-dev r-base r-base-dev
+$ sudo apt-get install libgsl0-dev r-base r-base-dev
 $ sudo Rscript -e "install.packages(c('Rserve', 'irr'))"
 ```
@@ -86,7 +86,7 @@ Include:
 - Intra-class correlation
 - Anova: generic and vector-based One-way ANOVA and Two-way ANOVA, with contrasts for One-way ANOVA.
 - Tests: F, T, Levene, U-Mannwhitney.
-- Regression: Simple, Multiple (OLS), Probit  and Logit
+- Regression: Simple, Multiple (OLS)
 - Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis and Velicer's MAP test, for estimation of number of factors.
 - Reliability analysis for simple scale and a DSL to easily analyze multiple scales using factor analysis and correlations, if you want it.
 - Basic time series support
@@ -120,8 +120,6 @@ Include:
 - Multiple types of regression.
   - Simple Regression :  Statsample::Regression::Simple
   - Multiple Regression: Statsample::Regression::Multiple
-  - Logit Regression:    Statsample::Regression::Binomial::Logit
-  - Probit Regression:    Statsample::Regression::Binomial::Probit
 - Factorial Analysis algorithms on Statsample::Factor module.
   - Classes for Extraction of factors:
     - Statsample::Factor::PCA

data/Rakefile CHANGED

@@ -1,4 +1,5 @@
 $:.unshift File.expand_path("../lib/", __FILE__)
+lib_folder = File.expand_path("../lib", __FILE__)
 require 'statsample/version'
 require 'rake'
@@ -36,3 +37,8 @@ task "gettext:makemo" do
   require 'gettext/tools'
   GetText.create_mofiles()
 end
+desc 'Run pry'
+task :pry do |task|
+  sh "pry -r #{lib_folder}/statsample.rb"
+end

data/benchmarks/correlation_matrix_methods/correlation_matrix.rb CHANGED

@@ -60,7 +60,6 @@ end
 rs[:c_v] = rs.collect {|row| row[:cases]*row[:vars]}
-rs.update
 rs.save("correlation_matrix.ds")
 Statsample::Excel.write(rs,"correlation_matrix.xls")

data/examples/correlation_matrix.rb CHANGED

@@ -7,18 +7,6 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
-  # It so happens that Daru::Vector and Daru::DataFrame must update metadata
-  # like positions of missing values every time they are created.
-  #
-  # Since we dont have any missing values in the data that we are creating,
-  # we set Daru.lazy_update = true so that missing data is not updated every
-  # time and things happen much faster.
-  #
-  # In case you do have missing data and lazy_update has been set to *true*,
-  # you _SHOULD_ called `#update` on the concerned Vector or DataFrame object
-  # everytime an assingment or deletion cycle is complete.
-  Daru.lazy_update = true
   # Create a Daru::DataFrame containing 4 vectors a, b, c and d.
   #
   # Notice that the `clone` option has been set to *false*. This tells Daru
@@ -36,10 +24,6 @@ Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
   # Calculate correlation matrix by calling the `cor` shorthand.
   cm = cor(ds)
   summary(cm)
-  # Set lazy_update to *false* once our job is done so that this analysis does
-  # not accidentally affect code elsewhere.
-  Daru.lazy_update = false
 end
 if __FILE__==$0

data/examples/dataset.rb CHANGED

@@ -6,10 +6,6 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 Statsample::Analysis.store(Daru::DataFrame) do
-  # We set lazy_update to *true* so that time is not wasted in updating
-  # metdata every time an assignment happens.
-  Daru.lazy_update = true
   samples = 1000
   # The 'new_with_size' function lets you specify the size of the
@@ -26,9 +22,6 @@ Statsample::Analysis.store(Daru::DataFrame) do
   # order by default.
   ds = Daru::DataFrame.new({:a=>a,:b=>b}, order: [:b, :a])
   summary(ds)
-  # Reset lazy_update to *false* to prevent other code from breaking.
-  Daru.lazy_update = false
 end
 if __FILE__==$0

data/examples/dominance_analysis_bootstrap.rb CHANGED

@@ -3,10 +3,6 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 Statsample::Analysis.store(Statsample::DominanceAnalysis::Bootstrap) do
-  # Remember to call *update* after an assignment/deletion cycle if lazy_update
-  # is *false*.
-  Daru.lazy_update = true
   sample=300
   a=rnorm(sample)
   b=rnorm(sample)
@@ -29,8 +25,6 @@ Statsample::Analysis.store(Statsample::DominanceAnalysis::Bootstrap) do
   dab2=dominance_analysis_bootstrap(ds2, :y1, :debug=>true)
   dab2.bootstrap(100,nil)
   summary(dab2)
-  Daru.lazy_update = false
 end
 if __FILE__==$0

data/examples/reliability.rb CHANGED

@@ -15,8 +15,6 @@ Statsample::Analysis.store(Statsample::Reliability) do
     ds["v#{i}".to_sym]= a + rnorm(samples,0,0.2)
   end
-  ds.update
   rel=Statsample::Reliability::ScaleAnalysis.new(ds)
   summary rel

data/lib/statsample.rb CHANGED

@@ -160,6 +160,7 @@ module Statsample
   autoload(:StratifiedSample, 'statsample/multiset')
   autoload(:MLE, 'statsample/mle')
   autoload(:Regression, 'statsample/regression')
+  autoload(:FitModel, 'statsample/formula/fit_model')
   autoload(:Test, 'statsample/test')
   autoload(:Factor, 'statsample/factor')
   autoload(:Graph, 'statsample/graph')
@@ -206,7 +207,7 @@ module Statsample
     def only_valid(*vs)
       i = 1
       h = vs.inject({}) { |acc, v| acc["v#{i}".to_sym] = v; i += 1; acc }
-      df = Daru::DataFrame.new(h).dup_only_valid
+      df = Daru::DataFrame.new(h).reject_values(*Daru::MISSING_VALUES)
       df.map { |v| v }
     end
@@ -214,7 +215,7 @@ module Statsample
     # If any vectors have missing_values, return only valid.
     # If not, return the vectors itself
     def only_valid_clone(*vs)
-      if vs.any?(&:has_missing_data?)
+      if vs.any? { |v| v.include_values?(*Daru::MISSING_VALUES) }
         only_valid(*vs)
       else
         vs

data/lib/statsample/anova/oneway.rb CHANGED

@@ -164,7 +164,7 @@ module Statsample
           if summary_descriptives
             s.table(:name=>_("Descriptives"),:header=>%w{Name N Mean SD Min Max}.map {|v| _(v)}) do |t|
               @vectors.each do |v|
-                t.row [v.name, v.n_valid, "%0.4f" % v.mean, "%0.4f" %  v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
+                t.row [v.name, v.reject_values(*Daru::MISSING_VALUES).size, "%0.4f" % v.mean, "%0.4f" %  v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
               end
             end
           end

data/lib/statsample/bivariate.rb CHANGED

@@ -159,7 +159,7 @@ module Statsample
       def covariance_matrix(ds)
         vars,cases = ds.ncols, ds.nrows
-        if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
+        if !ds.include_values?(*Daru::MISSING_VALUES) and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
           cm=covariance_matrix_optimized(ds)
         else
           cm=covariance_matrix_pairwise(ds)
@@ -198,7 +198,7 @@ module Statsample
       # Order of rows and columns depends on Dataset#fields order
       def correlation_matrix(ds)
         vars, cases = ds.ncols, ds.nrows
-        if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
+        if !ds.include_values?(*Daru::MISSING_VALUES) and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
           cm=correlation_matrix_optimized(ds)
         else
           cm=correlation_matrix_pairwise(ds)
@@ -248,7 +248,7 @@ module Statsample
         m = vectors.collect do |row|
           vectors.collect do |col|
             if row==col
-              ds[row].only_valid.size
+              ds[row].reject_values(*Daru::MISSING_VALUES).size
             else
               rowa,rowb = Statsample.only_valid_clone(ds[row],ds[col])
               rowa.size
@@ -281,7 +281,7 @@ module Statsample
       # Calculate Point biserial correlation. Equal to Pearson correlation, with
       # one dichotomous value replaced by "0" and the other by "1"
       def point_biserial(dichotomous,continous)
-        ds = Daru::DataFrame.new({:d=>dichotomous,:c=>continous}).dup_only_valid
+        ds = Daru::DataFrame.new({:d=>dichotomous,:c=>continous}).reject_values(*Daru::MISSING_VALUES)
         raise(TypeError, "First vector should be dichotomous") if ds[:d].factors.size != 2
         raise(TypeError, "Second vector should be continous") if ds[:c].type != :numeric
         f0=ds[:d].factors.sort.to_a[0]

data/lib/statsample/converter/spss.rb CHANGED

@@ -7,7 +7,7 @@ module Statsample
       #   ds=Daru::DataFrame.from_excel("my_data.xls")
       #   puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
       def tetrachoric_correlation_matrix(ds)
-        dsv=ds.dup_only_valid
+        dsv=ds.reject_values(*Daru::MISSING_VALUES)
         # Delete all vectors doesn't have variation
         dsv.vectors.each { |f|
           if dsv[f].factors.size==1

data/lib/statsample/crosstab.rb CHANGED

@@ -29,10 +29,10 @@ module Statsample
       @v_cols.factors.sort.reset_index!
     end
     def rows_total
-      @v_rows.frequencies
+      @v_rows.frequencies.to_h
     end
     def cols_total
-      @v_cols.frequencies
+      @v_cols.frequencies.to_h
     end
     def frequencies
@@ -42,7 +42,7 @@ module Statsample
         s[par]=0
         s
       end
-      base.update(Daru::Vector.new(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a).frequencies)
+      base.update(Daru::Vector.new(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a).frequencies.to_h)
     end
     def to_matrix
       f  = frequencies

data/lib/statsample/daru.rb CHANGED

@@ -11,7 +11,7 @@ module Daru
         # ugly patch. The upper limit for a bin has the form
         # x < range
         #h=Statsample::Histogram.new(self, bins)
-        valid = only_valid
+        valid = reject_values(*Daru::MISSING_VALUES)
         min,max=Statsample::Util.nice(valid.min,valid.max)
         # fix last data
         if max == valid.max
@@ -72,7 +72,6 @@ module Daru
       end
       #puts "Ingreso a los dataset"
       ms.datasets.each do |k,ds|
-        ds.update
         ds.rename self[field].index_of(k)
       end
@@ -102,7 +101,6 @@ module Daru
       each_row { |r| p1.call(r) }
       ms.datasets.each do |k,ds|
-        ds.update
         ds.rename(
           fields.size.times.map do |i|
             f  = fields[i]

data/lib/statsample/factor/parallelanalysis.rb CHANGED

@@ -142,8 +142,7 @@ module Statsample
                 raise "bootstrap_method doesn't recogniced"
               end
             end
-            ds_bootstrap.update
             matrix=Statsample::Bivariate.send(matrix_method, ds_bootstrap)
             matrix=matrix.to_gsl if @use_gsl
             if smc
@@ -159,7 +158,6 @@ module Statsample
             redo
           end
         end
-        @ds_eigenvalues.update
       end
       dirty_memoize :number_of_factors, :ds_eigenvalues
       dirty_writer :iterations, :bootstrap_method, :percentil, :smc

data/lib/statsample/formula/fit_model.rb ADDED

@@ -0,0 +1,46 @@
+require 'statsample/formula/formula'
+module Statsample
+  # Class for performing regression
+  class FitModel
+    def initialize(formula, df, opts = {})
+      @formula = FormulaWrapper.new formula, df
+      @df = df
+      @opts = opts
+    end
+    def model
+      @model || fit_model
+    end
+    def predict(new_data)
+      model.predict(df_for_prediction(new_data))
+    end
+    def df_for_prediction df
+      canonicalize_df(df)
+    end
+    def df_for_regression
+      df = canonicalize_df(@df)
+      df[@formula.y.value] = @df[@formula.y.value]
+      df
+    end
+    def canonicalize_df(orig_df)
+      tokens = @formula.canonical_tokens
+      tokens.shift if tokens.first.value == '1'
+      df = tokens.map { |t| t.to_df orig_df }.reduce(&:merge)
+      df
+    end
+    def fit_model
+      # TODO: Add support for inclusion/exclusion of intercept
+      @model = Statsample::Regression.multiple(
+        df_for_regression,
+        @formula.y.value,
+        @opts
+      )
+    end
+  end
+end

data/lib/statsample/formula/formula.rb ADDED

@@ -0,0 +1,306 @@
+module Statsample
+  # This class recognizes what terms are numeric
+  # and accordingly forms groups which are fed to Formula
+  # Once they are parsed with Formula, they are combined back
+  class FormulaWrapper
+    attr_reader :tokens, :y, :canonical_tokens
+    # Initializes formula wrapper object to parse a given formula into
+    # some tokens which do not overlap one another.
+    # @note Specify 0 as a term in the formula if you do not want constant
+    #   to be included in the parsed formula
+    # @param [string] formula to parse
+    # @param [Daru::DataFrame] df dataframe requried to know what vectors
+    #   are numerical
+    # @example
+    #   df = Daru::DataFrame.from_csv 'spec/data/df.csv'
+    #   df.to_category 'c', 'd', 'e'
+    #   formula = Statsample::GLM::FormulaWrapper.new 'y~a+d:c', df
+    #   formula.canonical_to_s
+    #   #=> "1+c(-)+d(-):c+a"
+    def initialize(formula, df)
+      @df = df
+      # @y store the LHS term that is name of vector to be predicted
+      # @tokens store the RHS terms of the formula
+      @y, *@tokens = split_to_tokens(formula)
+      @tokens = @tokens.uniq.sort
+      manage_constant_term
+      @canonical_tokens = non_redundant_tokens
+    end
+    # Returns canonical tokens in a readable form.
+    # @return [String] canonical tokens in a readable form.
+    # @note 'y~a+b(-)' means 'a' exist in full rank expansion
+    #   and 'b(-)' exist in reduced rank expansion
+    # @example
+    #   df = Daru::DataFrame.from_csv 'spec/data/df.csv'
+    #   df.to_category 'c', 'd', 'e'
+    #   formula = Statsample::GLM::FormulaWrapper.new 'y~a+d:c', df
+    #   formula.canonical_to_s
+    #   #=> "1+c(-)+d(-):c+a"
+    def canonical_to_s
+      canonical_tokens.join '+'
+    end
+    # Returns tokens to produce non-redundant design matrix
+    # @return [Array] array of tokens that do not produce redundant matrix
+    def non_redundant_tokens
+      groups = split_to_groups
+      # TODO: An enhancement
+      # Right now x:c appears as c:x
+      groups.each { |k, v| groups[k] = strip_numeric v, k }
+      groups.each { |k, v| groups[k] = Formula.new(v).canonical_tokens }
+      groups.flat_map { |k, v| add_numeric v, k }
+    end
+    private
+    # Removes intercept token if term '0' is found in the formula.
+    # Intercept token remains if term '1' is found.
+    # If neither term '0' nor term '1' is found then, intercept token is added.
+    def manage_constant_term
+      @tokens.unshift Token.new('1') unless
+        @tokens.include?(Token.new('1')) ||
+        @tokens.include?(Token.new('0'))
+      @tokens.delete Token.new('0')
+    end
+    # Groups the tokens to gropus based on the numerical terms
+    # they are interacting with.
+    def split_to_groups
+      @tokens.group_by { |t| extract_numeric t }
+    end
+    # Add numeric interaction term which was removed earlier
+    # @param [Array] tokens tokens on which to add numerical terms
+    # @param [Array] numeric array of numeric terms to add
+    def add_numeric(tokens, numeric)
+      tokens.map do |t|
+        terms = t.interact_terms + numeric
+        if terms == ['1']
+          Token.new('1')
+        else
+          terms = terms.reject { |i| i == '1' }
+          Token.new terms.join(':'), t.full
+        end
+      end
+    end
+    # Strip numerical interacting terms
+    # @param [Array] tokens tokens from which to strip numeric
+    # @param [Array] numeric array of numeric terms to strip from tokens
+    # @return [Array] array of tokens with striped numerical terms
+    def strip_numeric(tokens, numeric)
+      tokens.map do |t|
+        terms = t.interact_terms - numeric
+        terms = ['1'] if terms.empty?
+        Token.new terms.join(':')
+      end
+    end
+    # Extract numeric interacting terms
+    # @param [Statsample::GLM::Token] token form which to extract numeric terms
+    # @return [Array] array of numericl terms
+    def extract_numeric(token)
+      terms = token.interact_terms
+      return [] if terms == ['1']
+      terms.reject { |t| @df[t].category? }
+    end
+    def split_to_tokens(formula)
+      formula = formula.gsub(/\s+/, '')
+      lhs_term, rhs = formula.split '~'
+      rhs_terms = rhs.split '+'
+      ([lhs_term] + rhs_terms).map { |t| Token.new t }
+    end
+  end
+  # To process formula language
+  class Formula
+    attr_reader :tokens, :canonical_tokens
+    def initialize(tokens)
+      @tokens = tokens
+      @canonical_tokens = parse_formula
+    end
+    def canonical_to_s
+      canonical_tokens.join '+'
+    end
+    private
+    def parse_formula
+      @tokens.inject([]) do |acc, token|
+        acc + add_non_redundant_elements(token, acc)
+      end
+    end
+    def add_non_redundant_elements(token, result_so_far)
+      return [token] if token.value == '1'
+      tokens = token.expand
+      result_so_far = result_so_far.flat_map(&:expand)
+      tokens -= result_so_far
+      contract_if_possible tokens
+    end
+    def contract_if_possible(tokens)
+      tokens.combination(2).each do |a, b|
+        result = a.add b
+        next unless result
+        tokens.delete a
+        tokens.delete b
+        tokens << result
+        return contract_if_possible tokens
+      end
+      tokens.sort
+    end
+  end
+  # To encapsulate interaction as well as non-interaction terms
+  class Token
+    attr_reader :value, :full, :interact_terms
+    def initialize(value, full = true)
+      @interact_terms = value.include?(':') ? value.split(':') : [value]
+      @full = coerce_full full
+    end
+    def value
+      interact_terms.join(':')
+    end
+    def size
+      # TODO: Return size 1 for value '1' also
+      # CAn't do this at the moment because have to make
+      # changes in sorting first
+      value == '1' ? 0 : interact_terms.size
+    end
+    def add(other)
+      # ANYTHING + FACTOR- : ANYTHING = FACTOR : ANYTHING
+      # ANYTHING + ANYTHING : FACTOR- = ANYTHING : FACTOR
+      if size > other.size
+        other.add self
+      elsif other.size == 2 &&
+            size == 1 &&
+            other.interact_terms.last == value &&
+            other.full.last == full.first &&
+            other.full.first == false
+        Token.new(
+          "#{other.interact_terms.first}:#{value}",
+          [true, other.full.last]
+        )
+      elsif other.size == 2 &&
+            size == 1 &&
+            other.interact_terms.first == value &&
+            other.full.first == full.first &&
+            other.full.last == false
+        Token.new(
+          "#{value}:#{other.interact_terms.last}",
+          [other.full.first, true]
+        )
+      elsif value == '1' &&
+            other.size == 1
+        Token.new(other.value, true)
+      end
+    end
+    def ==(other)
+      value == other.value &&
+        full == other.full
+    end
+    alias eql? ==
+    def hash
+      value.hash ^ full.hash
+    end
+    def <=>(other)
+      size <=> other.size
+    end
+    def to_s
+      interact_terms
+        .zip(full)
+        .map { |t, f| f ? t : t + '(-)' }
+        .join ':'
+    end
+    def expand
+      case size
+      when 0
+        [self]
+      when 1
+        [Token.new('1'), Token.new(value, false)]
+      when 2
+        a, b = interact_terms
+        [Token.new('1'), Token.new(a, false), Token.new(b, false),
+         Token.new(a + ':' + b, [false, false])]
+      end
+    end
+    def to_df(df)
+      case size
+      when 1
+        if df[value].category?
+          df[value].contrast_code full: full.first
+        else
+          Daru::DataFrame.new value => df[value].to_a
+        end
+      when 2
+        to_df_when_interaction(df)
+      end
+    end
+    private
+    def coerce_full(value)
+      if value.is_a? Array
+        value + Array.new((@interact_terms.size - value.size), true)
+      else
+        [value] * @interact_terms.size
+      end
+    end
+    def to_df_when_interaction(df)
+      case interact_terms.map { |t| df[t].category? }
+      when [true, true]
+        df.interact_code(interact_terms, full)
+      when [false, false]
+        to_df_numeric_interact_with_numeric df
+      when [true, false]
+        to_df_category_interact_with_numeric df
+      when [false, true]
+        to_df_numeric_interact_with_category df
+      end
+    end
+    def to_df_numeric_interact_with_numeric(df)
+      Daru::DataFrame.new value => (df[interact_terms.first] *
+        df[interact_terms.last]).to_a
+    end
+    def to_df_category_interact_with_numeric(df)
+      a, b = interact_terms
+      Daru::DataFrame.new(
+        df[a].contrast_code(full: full.first)
+          .map { |dv| ["#{dv.name}:#{b}", (dv * df[b]).to_a] }
+          .to_h
+      )
+    end
+    def to_df_numeric_interact_with_category(df)
+      a, b = interact_terms
+      Daru::DataFrame.new(
+        df[b].contrast_code(full: full.last)
+          .map { |dv| ["#{a}:#{dv.name}", (dv * df[a]).to_a] }
+          .to_h
+      )
+    end
+  end
+end