RubyGems - fselector - Versions diffs - 0.8.1 → 0.9.0 - Mend

fselector 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/ChangeLog +6 -0
data/README.md +5 -2
data/lib/fselector.rb +1 -1
data/lib/fselector/algo_base/base.rb +5 -20
data/lib/fselector/algo_continuous/CFS_c.rb +2 -2
data/lib/fselector/algo_discrete/CFS_d.rb +2 -2
data/lib/fselector/algo_discrete/FastCorrelationBasedFilter.rb +2 -2
data/lib/fselector/algo_discrete/InformationGain.rb +1 -1
data/lib/fselector/algo_discrete/LasVegasFilter.rb +110 -0
data/lib/fselector/algo_discrete/LasVegasIncremental.rb +179 -0
data/lib/fselector/algo_discrete/SymmetricalUncertainty.rb +2 -6
data/lib/fselector/discretizer.rb +108 -15
data/lib/fselector/entropy.rb +20 -0
metadata +6 -4

data/ChangeLog CHANGED Viewed

@@ -1,3 +1,9 @@
+2012-04-25	version 0.9.0
+  * add new discretization algorithm (Three-Interval Discretization, TID)
+  * add new algorithm Las Vegas Filter (LVF) for discrete feature
+  * add new algorithm Las Vegas Incremental (LVI) for discrete feature
 2012-04-23	version 0.8.1
   * correct a bug in the example in the README file because discretize\_by\_ChiMerge!() now takes confidence alpha value as argument instead of chi-square value

data/README.md CHANGED Viewed

@@ -8,8 +8,8 @@ FSelector: a Ruby gem for feature selection and ranking
 **Email**: [need47@gmail.com](mailto:need47@gmail.com)
 **Copyright**: 2012
 **License**: MIT License
-**Latest Version**: 0.8.1
-**Release Date**: April 23 2012
+**Latest Version**: 0.9.0
+**Release Date**: April 25 2012
 Synopsis
 --------
@@ -58,6 +58,8 @@ Feature List
     GMean                             GM          discrete
     GSSCoefficient                    GSS         discrete
     InformationGain                   IG          discrete
+    LasVegasFilter                    LVF         discrete
+    LasVegasIncremental               LVI         discrete
     MatthewsCorrelationCoefficient    MCC, PHI    discrete
     McNemarsTest                      MNT         discrete
     OddsRatio                         OR          discrete
@@ -104,6 +106,7 @@ Feature List
     discretize_by_ChiMerge!           discretize by ChiMerge algorithm
     discretize_by_Chi2!               discretize by Chi2 algorithm
     discretize_by_MID!                discretize by Multi-Interval Discretization
+    discretize_by_TID!                discretize by Three-Interval Discretization
 **5. availabe algorithms for replacing missing feature values**

data/lib/fselector.rb CHANGED Viewed

@@ -6,7 +6,7 @@ require 'rinruby'
 #
 module FSelector
   # module version
-  VERSION = '0.8.1'
+  VERSION = '0.9.0'
 end
 # the root dir of FSelector

data/lib/fselector/algo_base/base.rb CHANGED Viewed

@@ -270,12 +270,7 @@ module FSelector
       each_sample do |k, s|
         my_data[k] ||= []
-        my_s = {}
-        s.each do |f, v|
-          my_s[f] = v if subset.include? f
-        end
+        my_s = s.select { |f, v| subset.include? f }
         my_data[k] << my_s if not my_s.empty?
       end
@@ -287,7 +282,7 @@ module FSelector
     # reconstruct data with feature scores satisfying cutoff
     #
     # @param [String] criterion
-    #   valid criterion can be '>0.5', '>= 0.4', '==2', '<=1' or '<0.2'
+    #   valid criterion can be '>0.5', '>=0.4', '==2', '<=1' or '<0.2'
     # @param [Hash] my_scores
     #   user customized feature scores
     # @return [Hash] data after feature selection
@@ -301,12 +296,7 @@ module FSelector
       each_sample do |k, s|
         my_data[k] ||= []
-        my_s = {}
-        s.each do |f, v|
-          my_s[f] = v if eval("#{scores[f][:BEST]} #{criterion}")
-        end
+        my_s = s.select { |f, v| eval("#{scores[f][:BEST]} #{criterion}") }
         my_data[k] << my_s if not my_s.empty?
       end
@@ -318,7 +308,7 @@ module FSelector
     # reconstruct data by rank
     #
     # @param [String] criterion
-    #   valid criterion can be '>11', '>= 10', '==1', '<=10' or '<20'
+    #   valid criterion can be '>11', '>=10', '==1', '<=10' or '<20'
     # @param [Hash] my_ranks
     #   user customized feature ranks
     # @return [Hash] data after feature selection
@@ -332,12 +322,7 @@ module FSelector
       each_sample do |k, s|
         my_data[k] ||= []
-        my_s = {}
-        s.each do |f,v|
-          my_s[f] = v if eval("#{ranks[f]} #{criterion}")
-        end
+        my_s = s.select { |f, v| eval("#{ranks[f]} #{criterion}") }
         my_data[k] << my_s if not my_s.empty?
       end

data/lib/fselector/algo_continuous/CFS_c.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 #
 module FSelector
 #
-# Correlation-based Feature Selection (CFS) algorithm for continuous feature (CFS\_c).
-# For CFS\_c, use **select\_feature!** for feature selection
+# Correlation-based Feature Selection (CFS) algorithm for continuous feature (CFS\_c),
+# use **select\_feature!** for feature selection
 #
 # ref: [Feature Selection for Discrete and Numeric Class Machine Learning](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.30.5673)
 #

data/lib/fselector/algo_discrete/CFS_d.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 #
 module FSelector
 #
-# Correlation-based Feature Selection (CFS) algorithm for discrete feature (CFS\_d).
-# For CFS\_d, use **select\_feature!** for feature selection
+# Correlation-based Feature Selection (CFS) algorithm for discrete feature (CFS\_d)
+# use **select\_feature!** for feature selection
 #
 # ref: [Feature Selection for Discrete and Numeric Class Machine Learning](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.30.5673)
 #

data/lib/fselector/algo_discrete/FastCorrelationBasedFilter.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 #
 module FSelector
 #
-# Fast Correlation-Based Filter for feature with discrete data (FCBF),
-# for FCBF, use **select\_feature!** for feature selection
+# Fast Correlation-Based Filter (FCBF) for discrete feature,
+# use **select\_feature!** for feature selection
 #
 # ref: [Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution](http://www.hpl.hp.com/conferences/icml2003/papers/144.pdf)
 #

data/lib/fselector/algo_discrete/InformationGain.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 #
 module FSelector
 #
-# Information Gain for feature with discrete data (IG)
+# Information Gain (IG) for discrete feature
 #
 #     IG(c,f) = H(c) - H(c|f)
 #

data/lib/fselector/algo_discrete/LasVegasFilter.rb ADDED Viewed

@@ -0,0 +1,110 @@
+#
+# FSelector: a Ruby gem for feature selection and ranking
+#
+module FSelector
+#
+# Las Vegas Filter (LVF) for discrete feature,
+# use **select\_feature!** for feature selection
+#
+# @note we only keep one of the equivalently good solutions
+#
+# ref: [Review and Evaluation of Feature Selection Algorithms in Synthetic Problems](http://arxiv.org/abs/1101.2320)
+#
+  class LasVegasFilter < BaseDiscrete
+    #
+    # initialize from existing data structure
+    #
+    # @param [Integer] max_iter maximum number of iterations
+    # @param [Hash] data existing data structure
+    #
+    def initialize(max_iter=100, data=nil)
+      super(data)
+      @max_iter = max_iter || 100
+    end
+    private
+    # Las Vegas Filter (LVF) algorithm
+    def get_feature_subset
+      feats = get_features # initial best solution
+      data = get_data # working dataset
+      j0 = check_J(data, feats)
+      subset = lvf(data, feats, j0)
+      subset
+    end #get_feature_subset
+    # check evaluation mean J -> (0, 1]
+    def check_J(data, feats)
+      # create a reduced dataset within feats
+      dt = {}
+      data.each do |k, ss|
+        dt[k] ||= []
+        ss.each do |s|
+          my_s = s.select { |f,v| feats.include? f }
+          dt[k] << my_s if not my_s.empty?
+        end
+      end
+      # check data inconsistency rate
+      # get unique instances (except class label)
+      inst_u = dt.values.flatten.uniq
+      inst_u_cnt = {} # occurrences for each unique instance in each class
+      ks = dt.keys
+      # count
+      inst_u.each_with_index do |inst, idx|
+        inst_u_cnt[idx] = [] # record for all classes
+        ks.each do |k|
+          inst_u_cnt[idx] << dt[k].count(inst)
+        end
+      end
+      # inconsistency count
+      inconsis = 0.0
+      inst_u_cnt.each do |idx, cnts|
+        inconsis += cnts.sum-cnts.max
+      end
+      # inconsistency rate
+      sz = dt.values.flatten.size # inconsis / num_of_sample
+      ir = (sz.zero?) ? 0.0 : inconsis/sz
+      1.0/(1.0 + ir)
+    end
+    # lvf
+    def lvf(data, feats, j0)
+      subset_best = feats
+      sz_best = subset_best.size
+      #pp [sz_best, j0]
+      @max_iter.times do
+        # always sample a smaller feature subset than sz_best at random
+        f_try = feats.sample(rand(sz_best-1)+1)
+        j = check_J(data, f_try)
+        #pp [f_try.size, j]
+        if j >= j0
+          subset_best = f_try
+          sz_best = f_try.size
+          #pp [sz_best, j, 'best']
+        end
+      end
+      subset_best
+    end
+  end # class
+  # shortcut so that you can use FSelector::LVF instead of FSelector::LasVegasFilter
+  LVF = LasVegasFilter
+end # module

data/lib/fselector/algo_discrete/LasVegasIncremental.rb ADDED Viewed

@@ -0,0 +1,179 @@
+#
+# FSelector: a Ruby gem for feature selection and ranking
+#
+module FSelector
+#
+# Las Vegas Incremental (LVI) for discrete feature,
+# use **select\_feature!** for feature selection
+#
+# ref: [Incremental Feature Selection](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.34.8218)
+#
+  class LasVegasIncremental < BaseDiscrete
+    #
+    # initialize from existing data structure
+    #
+    # @param [Integer] max_iter maximum number of iterations
+    # @param [Hash] data existing data structure
+    #
+    def initialize(max_iter=100, portion=0.10, data=nil)
+      super(data)
+      @max_iter = max_iter || 100
+      @portion = portion || 0.10
+    end
+    private
+    # Las Vegas Incremental (LVI) algorithm
+    def get_feature_subset
+      data = get_data # working dataset
+      s0, s1 = portion(data)
+      feats = get_features # initial best solution
+      j0 = check_incon_rate(data, feats)[0] # initial data inconsistency rate
+      subset = feats # initial feature subset
+      while true
+        f_try = lvf(s0, feats, j0) # keep only one equivalently good subset
+        #pp f_try
+        j_s0 = check_incon_rate(s0, f_try)[0]
+        j_s1, inconC = check_incon_rate(s1, f_try)
+        #pp [j0, j_s0, j_s1, s0.values.flatten.size, s1.values.flatten.size, f_try.size]
+        if j_s0+j_s1 <= j0 or inconC.empty?
+          subset = f_try
+          break
+        else
+          update(s0, s1, inconC)
+        end
+      end
+      #pp check_incon_rate(data, subset)[0]
+      subset
+    end #get_feature_subset
+    def portion(data)
+      s0, s1 = {}, {}
+      data.each do |k, ss|
+        sz = ss.size
+        n0 = (sz * @portion).to_i
+        indices = (0...sz).to_a
+        n0_indices = indices.sample(n0)
+        n1_indices = indices - n0_indices
+        s0[k] = ss.values_at(*n0_indices)
+        s1[k] = ss.values_at(*n1_indices)
+      end
+      [s0, s1]
+    end
+    # check evaluation mean J -> (0, 1]
+    def check_incon_rate(data, feats)
+      #pp feats
+      ir, inconC = 0.0, []
+      # create a reduced dataset within feats
+      dt = {}
+      data.each do |k, ss|
+        dt[k] ||= []
+        ss.each do |s|
+          my_s = s.select { |f,v| feats.include? f }
+          dt[k] << my_s if not my_s.empty?
+        end
+      end
+      # check data inconsistency rate
+      # get unique instances (except class label)
+      inst_u = dt.values.flatten.uniq
+      inst_u_cnt = {} # occurrences for each unique instance in each class
+      ks = dt.keys
+      # count
+      inst_u.each_with_index do |inst, idx|
+        inst_u_cnt[idx] = [] # record for all classes
+        ks.each do |k|
+          inst_u_cnt[idx] << dt[k].count(inst)
+        end
+      end
+      # inconsistency count
+      inconsis = 0.0
+      inst_u_cnt.each do |idx, cnts|
+        diff = cnts.sum-cnts.max
+        inconsis += diff
+        if not diff.zero? # inconsistent instance
+          inconC << inst_u[idx]
+        end
+      end
+      # inconsistency rate
+      sz = dt.values.flatten.size # inconsis / num_of_sample
+      ir = inconsis/sz if not sz.zero?
+      [ir, inconC]
+    end
+    # lvf
+    def lvf(data, feats, j0)
+      subset_best = feats
+      sz_best = subset_best.size
+      @max_iter.times do
+        # always sample a smaller feature subset than sz_best at random
+        f_try = feats.sample(rand(sz_best-1)+1)
+        if check_incon_rate(data, f_try)[0] <= j0
+          subset_best = f_try
+          sz_best = f_try.size
+        end
+      end
+      subset_best
+    end
+    # update s0, s1
+    def update(s0, s1, inconC)
+      inconC.each do |inst|
+        s1.each do |k, sams|
+          sams.each_with_index do |sam, i|
+            if is_subset?(inst, sam)
+              s0[k] << sam
+              sams[i] = nil
+            end
+          end
+          sams.compact!
+        end
+      end
+    end
+    # is Hash a is a subset of Hash b
+    def is_subset?(ha, hb)
+      ha.each do |k, v|
+        if hb.has_key? k and v == hb[k]
+          next
+        else
+          return false
+        end
+      end
+      return true
+    end
+  end # class
+  # shortcut so that you can use FSelector::LVI instead of FSelector::LasVegasIncremental
+  LVI = LasVegasIncremental
+end # module

data/lib/fselector/algo_discrete/SymmetricalUncertainty.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 #
 module FSelector
 #
-# Symmetrical Uncertainty for feature with discrete data (SU)
+# Symmetrical Uncertainty (SU) for discrete feature
 #
 #                      IG(c|f)       H(c) - H(c|f)
 #     SU(c,f) = 2 * ------------- = ---------------
@@ -27,11 +27,7 @@ module FSelector
       cv = get_class_labels
       fv = get_feature_values(f, :include_missing_values)
-      hc = get_marginal_entropy(cv)
-      hcf = get_conditional_entropy(cv, fv)
-      hf = get_marginal_entropy(fv)
-      s =  2*(hc-hcf)/(hc+hf)
+      s = get_symmetrical_uncertainty(cv, fv)
       set_feature_score(f, :BEST, s)
     end # calc_contribution

data/lib/fselector/discretizer.rb CHANGED Viewed

@@ -63,10 +63,6 @@ module Discretizer
   # @note data structure will be altered
   #
   # ref: [ChiMerge: Discretization of Numberic Attributes](http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf)
-  #
-  # chi-squared values and associated p values can be looked up at
-  # [Wikipedia](http://en.wikipedia.org/wiki/Chi-squared_distribution)
-  # degrees of freedom: one less than the number of classes
   #
   def discretize_by_ChiMerge!(alpha=0.10)
     df = get_classes.size-1
@@ -302,7 +298,7 @@ module Discretizer
       fv = get_feature_values(f)
       n = cv.size
-      # sort cv and fv according ascending order of fv
+      # sort cv and fv according to ascending order of fv
       sis = (0...n).to_a.sort { |i,j| fv[i] <=> fv[j] }
       cv = cv.values_at(*sis)
       fv = fv.values_at(*sis)
@@ -331,6 +327,82 @@ module Discretizer
     discretize_at_cutpoints!(f2cp)
   end # discretize_by_MID!
+  #
+  # discretize by Three-Interval Discretization (TID) algorithm
+  #
+  # @note no missing feature value is allowed, and data structure will be altered
+  #
+  # ref: [Filter versus wrapper gene selection approaches in DNA microarray domains](http://www.sciencedirect.com/science/article/pii/S0933365704000193)
+  #
+  def discretize_by_TID!
+    # cut points for each feature
+    f2cp = {}
+    each_feature do |f|
+      cv = get_class_labels
+      fv = get_feature_values(f)
+      n = cv.size
+      # sort cv and fv according to ascending order of fv
+      sis = (0...n).to_a.sort { |i,j| fv[i] <=> fv[j] }
+      cv = cv.values_at(*sis)
+      fv = fv.values_at(*sis)
+      # get initial boundaries
+      bs = []
+      fv_u = fv.uniq
+      fv_u.each_with_index do |v, i|
+        # cut points are the mean of two adjacent data points
+        if i < fv_u.size-1
+          bs << (v+fv_u[i+1])/2.0
+        end
+      end
+      # test each pair cut point
+      s_best, h1_best, h2_best = nil, nil, nil
+      bs.each_with_index do |h1, i|
+        bs.each_with_index do |h2, j|
+          next if j <= i
+          n_h1 = (0...n).to_a.select { |x| fv[x] < h1 }.size.to_f
+          n_h1_h2 = (0...n).to_a.select { |x| fv[x] > h1 and fv[x] < h2 }.size.to_f
+          n_h2 = (0...n).to_a.select { |x| fv[x] > h2 }.size.to_f
+          s = 0.0
+          each_class do |k|
+            n_h1_k = (0...n).to_a.select { |x| fv[x] < h1 and cv[x] == k }.size.to_f
+            n_h1_h2_k = (0...n).to_a.select { |x| fv[x] > h1 and fv[x] < h2 and cv[x] == k }.size.to_f
+            n_h2_k = (0...n).to_a.select { |x| fv[x] > h2 and cv[x] == k }.size.to_f
+            s += n_h1_k * Math.log2(n_h1_k/n_h1) if not n_h1_k.zero?
+            s += n_h1_h2_k * Math.log2(n_h1_h2_k/n_h1_h2) if not n_h1_h2_k.zero?
+            s += n_h2_k * Math.log2(n_h2_k/n_h2) if not n_h2_k.zero?
+            #pp [s_best, s, h1, h2] + [n_h1, n_h1_k] + [n_h1_h2, n_h1_h2_k] + [n_h2, n_h2_k]
+          end
+          if not s_best or s > s_best
+            s_best, h1_best, h2_best = s, h1, h2
+            #pp [s_best, h1_best, h2_best]
+          end
+          break if s_best.zero? # allow early temination at maximum value 0.0
+        end
+        break if s_best.zero? # allow early temination at maximum value 0.0
+      end
+      #pp [s_best, h1_best, h2_best]
+      f2cp[f] = [h1_best, h2_best]
+    end
+    # discretize based on cut points
+    discretize_at_cutpoints!(f2cp, true)
+  end # discretize_by_TID!
   private
   #
@@ -349,18 +421,36 @@ module Discretizer
   #
   # get index from sorted cut points
   #
-  # cp1 -- cp2 ... cpn # cp1 is the min
+  # cp1 -- cp2 ... cpn
+  #
+  # if cut points are drawn from single data point, then
   #
   # [cp1, cp2) -> 1
   # [cp2, cp3) -> 2
   # ...
-  # [cpn, ) -> n
+  # [cpn, ) -> n
+  #
+  # if cut points are drawn from the mean of two adjacent data points, then
+  #
+  # (, cp1) -> 1
+  # (cp1, cp2) -> 2
+  # ...
+  # (cpn, ) -> n+1
   #
-  def get_index(v, cut_points)
-    i = cut_points.rindex { |x| v >= x }
-    i ? i+1 : 0
-    #i = cut_points.index { |x| v <= x }
-    #i ? i+1 : cut_points.size+1
+  # @param [Float] v continuous data to be discretized
+  # @param [Array<Float>] cut_points cut points
+  # @param [Boolean] mid_point true if cut points are drawn from the mean of
+  #   two adjacent data points, false if drawn from single data point
+  # @return [Integer] discretized index for v
+  #
+  def get_index(v, cut_points, mid_point=false)
+    if mid_point
+      i = cut_points.index { |x| v < x }
+      return i ? i+1 : cut_points.size+1
+    else
+      i = cut_points.rindex { |x| v >= x }
+      return i ? i+1 : 0
+    end
   end # get_index
@@ -387,12 +477,15 @@ module Discretizer
   #
   # discretize data at given cut points
   #
+  # @param [Hash] f2cp cut points for each feature
+  # @param [Boolean] mid_point true if cut points are drawn from the mean of
+  #   two adjacent data points, false if drawn from single data point
   # @note data structure will be altered
   #
-  def discretize_at_cutpoints!(f2cp)
+  def discretize_at_cutpoints!(f2cp, mid_point=false)
     each_sample do |k, s|
       s.keys.each do |f|
-        s[f] = get_index(s[f], f2cp[f])
+        s[f] = get_index(s[f], f2cp[f], mid_point)
       end
     end
@@ -578,7 +671,7 @@ module Discretizer
       inconsis += cnts.sum-cnts.max
     end
-    inconsis/get_sample_size
+    inconsis/dt.values.flatten.size # inconsis / num_of_sample
   end
   #

data/lib/fselector/entropy.rb CHANGED Viewed

@@ -78,6 +78,26 @@ module Entropy
   end # get_joint_entropy
+  #
+  # get the symmetrical uncertainty of array (X) and array (Y)
+  #
+  # @param [Array] arrX the first array
+  # @param [Array] arrY the second array
+  # @return [Float] SU(X,Y)
+  #
+  def get_symmetrical_uncertainty(arrX, arrY)
+    abort "[#{__FILE__}@#{__LINE__}]: "+
+        "array must be of same length" if not arrX.size == arrY.size
+    hx = get_marginal_entropy(arrX)
+    hxy = get_conditional_entropy(arrX, arrY)
+    hy = get_marginal_entropy(arrY)
+    su = 0.0
+    su = 2*(hx-hxy)/(hx+hy) if not (hx+hy).zero?
+  end
 end # module

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fselector
 version: !ruby/object:Gem::Version
-  version: 0.8.1
+  version: 0.9.0
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-04-23 00:00:00.000000000 Z
+date: 2012-04-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rinruby
-  requirement: &25316676 !ruby/object:Gem::Requirement
+  requirement: &25980288 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
         version: 2.0.2
   type: :runtime
   prerelease: false
-  version_requirements: *25316676
+  version_requirements: *25980288
 description: FSelector is a Ruby gem that aims to integrate various feature selection/ranking
   algorithms and related functions into one single package. Welcome to contact me
   (need47@gmail.com) if you'd like to contribute your own algorithms or report a bug.
@@ -70,6 +70,8 @@ files:
 - lib/fselector/algo_discrete/GMean.rb
 - lib/fselector/algo_discrete/GSSCoefficient.rb
 - lib/fselector/algo_discrete/InformationGain.rb
+- lib/fselector/algo_discrete/LasVegasFilter.rb
+- lib/fselector/algo_discrete/LasVegasIncremental.rb
 - lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb
 - lib/fselector/algo_discrete/McNemarsTest.rb
 - lib/fselector/algo_discrete/MutualInformation.rb