RubyGems - fselector - Versions diffs - 1.0.1 → 1.1.0 - Mend

fselector 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

data/ChangeLog +9 -0
data/README.md +62 -26
data/lib/fselector.rb +1 -1
data/lib/fselector/algo_base/base.rb +89 -34
data/lib/fselector/algo_base/base_CFS.rb +20 -7
data/lib/fselector/algo_base/base_Relief.rb +5 -5
data/lib/fselector/algo_base/base_ReliefF.rb +11 -3
data/lib/fselector/algo_base/base_discrete.rb +8 -0
data/lib/fselector/algo_continuous/BSS_WSS.rb +3 -1
data/lib/fselector/algo_continuous/CFS_c.rb +3 -1
data/lib/fselector/algo_continuous/FTest.rb +2 -0
data/lib/fselector/algo_continuous/PMetric.rb +4 -2
data/lib/fselector/algo_continuous/ReliefF_c.rb +11 -0
data/lib/fselector/algo_continuous/Relief_c.rb +14 -3
data/lib/fselector/algo_continuous/TScore.rb +5 -3
data/lib/fselector/algo_continuous/WilcoxonRankSum.rb +5 -3
data/lib/fselector/algo_discrete/Accuracy.rb +2 -0
data/lib/fselector/algo_discrete/AccuracyBalanced.rb +2 -0
data/lib/fselector/algo_discrete/BiNormalSeparation.rb +3 -1
data/lib/fselector/algo_discrete/CFS_d.rb +3 -0
data/lib/fselector/algo_discrete/ChiSquaredTest.rb +3 -0
data/lib/fselector/algo_discrete/CorrelationCoefficient.rb +2 -0
data/lib/fselector/algo_discrete/DocumentFrequency.rb +2 -0
data/lib/fselector/algo_discrete/F1Measure.rb +2 -0
data/lib/fselector/algo_discrete/FastCorrelationBasedFilter.rb +12 -1
data/lib/fselector/algo_discrete/FishersExactTest.rb +3 -1
data/lib/fselector/algo_discrete/GMean.rb +2 -0
data/lib/fselector/algo_discrete/GSSCoefficient.rb +2 -0
data/lib/fselector/algo_discrete/GiniIndex.rb +3 -1
data/lib/fselector/algo_discrete/INTERACT.rb +3 -0
data/lib/fselector/algo_discrete/InformationGain.rb +12 -1
data/lib/fselector/algo_discrete/LasVegasFilter.rb +3 -0
data/lib/fselector/algo_discrete/LasVegasIncremental.rb +3 -0
data/lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb +2 -0
data/lib/fselector/algo_discrete/McNemarsTest.rb +3 -0
data/lib/fselector/algo_discrete/MutualInformation.rb +3 -1
data/lib/fselector/algo_discrete/OddsRatio.rb +2 -0
data/lib/fselector/algo_discrete/OddsRatioNumerator.rb +2 -0
data/lib/fselector/algo_discrete/Power.rb +4 -1
data/lib/fselector/algo_discrete/Precision.rb +2 -0
data/lib/fselector/algo_discrete/ProbabilityRatio.rb +2 -0
data/lib/fselector/algo_discrete/Random.rb +3 -0
data/lib/fselector/algo_discrete/ReliefF_d.rb +3 -1
data/lib/fselector/algo_discrete/Relief_d.rb +4 -2
data/lib/fselector/algo_discrete/Sensitivity.rb +2 -0
data/lib/fselector/algo_discrete/Specificity.rb +2 -0
data/lib/fselector/algo_discrete/SymmetricalUncertainty.rb +4 -1
data/lib/fselector/discretizer.rb +7 -7
data/lib/fselector/ensemble.rb +375 -115
data/lib/fselector/entropy.rb +2 -2
data/lib/fselector/fileio.rb +83 -70
data/lib/fselector/normalizer.rb +2 -2
data/lib/fselector/replace_missing_values.rb +137 -3
data/lib/fselector/util.rb +17 -5
metadata +4 -4

data/lib/fselector/algo_discrete/INTERACT.rb CHANGED

@@ -14,6 +14,9 @@ module FSelector
     # include Consistency module
     include Consistency
+    # this algo outputs a subset of feature
+    @algo_type = :feature_subset_selection
     #
     # initialize from an existing data structure
     #

data/lib/fselector/algo_discrete/InformationGain.rb CHANGED

@@ -17,6 +17,9 @@ module FSelector
     # include Entropy module
     include Entropy
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private
     # calculate contribution of each feature (f) across all classes
@@ -39,7 +42,15 @@ module FSelector
       set_feature_score(f, :BEST, s)
     end # calc_contribution
+    # override clear\_vars for InformationGain
+    def clear_vars
+      super
+      @hc = nil
+    end # clear_vars
   end # class

data/lib/fselector/algo_discrete/LasVegasFilter.rb CHANGED

@@ -14,6 +14,9 @@ module FSelector
     # include Consistency module
     include Consistency
+    # this algo outputs a subset of feature
+    @algo_type = :feature_subset_selection
     #
     # initialize from an existing data structure
     #

data/lib/fselector/algo_discrete/LasVegasIncremental.rb CHANGED

@@ -12,6 +12,9 @@ module FSelector
     # include Consistency module
     include Consistency
+    # this algo outputs a subset of feature
+    @algo_type = :feature_subset_selection
     #
     # initialize from an existing data structure
     #

data/lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb CHANGED

@@ -16,6 +16,8 @@ module FSelector
 # ref: [Wikipedia](http://en.wikipedia.org/wiki/Matthews_correlation_coefficient)
 #
   class MatthewsCorrelationCoefficient < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private

data/lib/fselector/algo_discrete/McNemarsTest.rb CHANGED

@@ -14,6 +14,9 @@ module FSelector
 # ref: [Wikipedia](http://en.wikipedia.org/wiki/McNemar%27s_test)
 #
   class McNemarsTest < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     #
     # intialize from an existing data structure
     #

data/lib/fselector/algo_discrete/MutualInformation.rb CHANGED

@@ -16,7 +16,9 @@ module FSelector
 # ref: [A Comparative Study on Feature Selection Methods for Drug Discovery](http://pubs.acs.org/doi/abs/10.1021/ci049875d)
 #
   class MutualInformation < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private
     # calculate contribution of each feature (f) for each class (k)

data/lib/fselector/algo_discrete/OddsRatio.rb CHANGED

@@ -16,6 +16,8 @@ module FSelector
 # ref: [Wikipedia](http://en.wikipedia.org/wiki/Odds_ratio) and [An extensive empirical study of feature selection metrics for text classification](http://dl.acm.org/citation.cfm?id=944974) and [Optimally Combining Positive and Negative Features for Text Categorization](http://www.site.uottawa.ca/~nat/Workshop2003/zheng.pdf)
 #
   class OddsRatio < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private

data/lib/fselector/algo_discrete/OddsRatioNumerator.rb CHANGED

@@ -14,6 +14,8 @@ module FSelector
 # ref: [An extensive empirical study of feature selection metrics for text classification](http://dl.acm.org/citation.cfm?id=944974)
 #
   class OddsRatioNumerator < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private

data/lib/fselector/algo_discrete/Power.rb CHANGED

@@ -13,7 +13,10 @@ module FSelector
 #
 # ref: [An extensive empirical study of feature selection metrics for text classification](http://dl.acm.org/citation.cfm?id=944974)
 #
-  class Power < BaseDiscrete
+  class Power < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     #
     # initialize from an existing data structure
     #

data/lib/fselector/algo_discrete/Precision.rb CHANGED

@@ -10,6 +10,8 @@ module FSelector
 #                  TP+FP     A+B
 #
   class Precision < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private

data/lib/fselector/algo_discrete/ProbabilityRatio.rb CHANGED

@@ -14,6 +14,8 @@ module FSelector
 # ref: [An extensive empirical study of feature selection metrics for text classification](http://dl.acm.org/citation.cfm?id=944974)
 #
   class ProbabilityRatio < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private

data/lib/fselector/algo_discrete/Random.rb CHANGED

@@ -10,6 +10,9 @@ module FSelector
 # ref: [An extensive empirical study of feature selection metrics for text classification](http://dl.acm.org/citation.cfm?id=944974)
 #
   class Random < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     #
     # initialize from an existing data structure
     #

data/lib/fselector/algo_discrete/ReliefF_d.rb CHANGED

@@ -9,7 +9,9 @@ module FSelector
 # ref: [Estimating Attributes: Analysis and Extensions of RELIEF](http://www.springerlink.com/content/fp23jh2h0426ww45/)
 #
   class ReliefF_d < BaseReliefF
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private
     # difference beween the feature (f) of two samples

data/lib/fselector/algo_discrete/Relief_d.rb CHANGED

@@ -10,6 +10,8 @@ module FSelector
 # ref: [The Feature Selection Problem: Traditional Methods and a New Algorithm](http://www.aaai.org/Papers/AAAI/1992/AAAI92-020.pdf)
 #
   class Relief_d < BaseRelief
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private
@@ -19,8 +21,8 @@ module FSelector
       d = 0.0
       if not s1.has_key?(f) or not s2.has_key?(f)
-        abort "[#{__FILE__}@#{__LINE__}]: "+
-              "Relief does not allow missing values"
+        abort "[#{__FILE__}@#{__LINE__}]: \n"+
+              "  Relief does not allow missing values"
       end
       (s1[f] == s2[f]) ? 0.0 : 1.0

data/lib/fselector/algo_discrete/Sensitivity.rb CHANGED

@@ -12,6 +12,8 @@ module FSelector
 # ref: [Wikipedia](http://en.wikipedia.org/wiki/Sensitivity_and_specificity)
 #
   class Sensitivity < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private

data/lib/fselector/algo_discrete/Specificity.rb CHANGED

@@ -12,6 +12,8 @@ module FSelector
 # ref: [Wikipedia](http://en.wikipedia.org/wiki/Sensitivity_and_specificity)
 #
   class Specificity < BaseDiscrete
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private

data/lib/fselector/algo_discrete/SymmetricalUncertainty.rb CHANGED

@@ -14,12 +14,15 @@ module FSelector
 #           H(C|f_j) = -1 * sigma_k (P(c_k|f_j) log2 P(c_k|f_j))
 #           H(F) = -1 * sigma_i (P(f_i) log2 P(f_i))
 #
-# ref: [Wikipedia](http://en.wikipedia.org/wiki/Symmetric_uncertainty)
+# ref: [Wikipedia](http://en.wikipedia.org/wiki/Symmetric_uncertainty) and [Robust Feature Selection Using Ensemble Feature Selection Techniques](http://dl.acm.org/citation.cfm?id=1432021)
 #
   class SymmetricalUncertainty < BaseDiscrete
     # include Entropy module
     include Entropy
+    # this algo outputs weight for each feature
+    @algo_type = :feature_weighting
     private
     # calculate contribution of each feature (f) across all classes

data/lib/fselector/discretizer.rb CHANGED

@@ -11,7 +11,7 @@ module Discretizer
   # discretize by equal-width intervals
   #
   # @param [Integer] n_interval
-  #        desired number of intervals
+  #   desired number of intervals
   # @note data structure will be altered
   #
   def discretize_by_equal_width!(n_interval)
@@ -38,7 +38,7 @@ module Discretizer
   # discretize by equal-frequency intervals
   #
   # @param [Integer] n_interval
-  #        desired number of intervals
+  #   desired number of intervals
   # @note data structure will be altered
   #
   def discretize_by_equal_frequency!(n_interval)
@@ -251,7 +251,7 @@ module Discretizer
       end
     end
     #pp f2bs
-    #pp f2sig_level;abort
+    #pp f2sig_level
     # if there is only one interval, remove this feature
     each_sample do |k, s|
@@ -278,8 +278,8 @@ module Discretizer
       fv = get_feature_values(f)
       n = cv.size
-      abort "[#{__FILE__}@#{__LINE__}]: "+
-              "missing feature value is not allowed!" if n != fv.size
+      abort "[#{__FILE__}@#{__LINE__}]: \n"+
+            "  missing feature value is not allowed!" if n != fv.size
       # sort cv and fv according to ascending order of fv
       sis = (0...n).to_a.sort { |i,j| fv[i] <=> fv[j] }
@@ -327,8 +327,8 @@ module Discretizer
       fv = get_feature_values(f)
       n = cv.size
-      abort "[#{__FILE__}@#{__LINE__}]: "+
-              "missing feature value is not allowed!" if n != fv.size
+      abort "[#{__FILE__}@#{__LINE__}]: \n"+
+            "  missing feature value is not allowed!" if n != fv.size
       # sort cv and fv according to ascending order of fv
       sis = (0...n).to_a.sort { |i,j| fv[i] <=> fv[j] }

data/lib/fselector/ensemble.rb CHANGED

@@ -3,64 +3,50 @@
 #
 module FSelector
   #
-  # feature selection by an ensemble of algorithms,
-  # sharing the same interface as single algo
+  # feature selection by an ensemble of feature selectors
   #
-  # for the type of weighting algorithms,  you must call one of
-  # the following two functions before calling select\_feature\_by\_score! or
+  # for the type of feature weighting algorithms, call one of the following two
+  # functions first before calling select\_feature\_by\_score! or
   # select\_feature\_by\_rank! for feature selection:
-  # - ensemble\_by\_score()  if ensemble scores are based on those of individual algos
-  # - ensemble\_by\_rank()   if ensemble ranks are based on those of individual algos
+  # - ensemble\_by\_score()  # ensemble scores are based on that of individual selector
+  # - ensemble\_by\_rank()   # ensemble ranks are based on that of individual selector
   #
-  # for the type of subset selection algorithm, use
-  # select\_feature! for feature selection (based on consensus features)
+  # for the type of feature subset selection algorithms, use
+  # select\_feature! for feature selection (based on feature frequency count)
   #
-  class Ensemble < Base
-    #
-    # initialize from multiple algorithms
+  # @note ensemble feature selectors share the same feature selection
+  #   interface as single feature selector
+  #
+  class BaseEnsemble < Base
     #
-    # @param [Array] algos multiple feature selection algorithms
-    # @note different algorithms must be of the same type,
-    #   either weighting or subset selection (see {file:README.md})
+    # initialize from an existing data structure
     #
-    def initialize(*algos)
-      super(nil)
-      @algos = []
-      algos.each do |r|
-        @algos << r
-      end
+    def initialize(data=nil)
+      super(data)
     end
+    # override algo\_type for BaseEnsemble
     #
-    # reload set\_data() for Ensemble
-    #
-    # @param [Hash] data source data structure
-    # @note all algos share the same data structure
-    #
-    def set_data(data)
-      super
-      @algos.each do |r|
-        r.set_data(data)
-      end
+    # get the type of ensemble feature selectors at instance-level
+    def algo_type
+      @algo_type # instance-level variable
     end
     #
-    # reload get\_feature\_scores() for Ensemble
+    # override get\_feature\_scores() for BaseEnsemble
     #
     def get_feature_scores
       return @scores if @scores
-      abort "[#{__FILE__}@#{__LINE__}]: "+
-              "please call one consensus scoring method first!"
+      abort "[#{__FILE__}@#{__LINE__}]: \n"+
+            "  please call one ensemble method first!"
     end
     #
-    # reload get\_feature\_ranks() for Ensemble
+    # override get\_feature\_ranks() for BaseEnsemble
     #
     def get_feature_ranks
       return @ranks if @ranks
@@ -69,81 +55,91 @@ module FSelector
         set_ranks_from_scores
         return @ranks
       else
-        abort "[#{__FILE__}@#{__LINE__}]: "+
-              "please call one consensus ranking method first!"
+        abort "[#{__FILE__}@#{__LINE__}]: \n"+
+              "  please call one ensemble method first!"
       end
     end
     #
-    # ensemble scores are made from those of individual algorithms
+    # ensemble scores are made from that of individual feature selector
     #
     # @param [Symbol] ensem_method how the ensemble score should
-    #   be derived from those of individual algorithms
+    #   be derived from those of individual feature selector
     #   allowed values are:
-    #   - :by\_min # use min score
-    #   - :by\_max # use max score
-    #   - :by\_ave # use ave score
-    # @param [Symbol] norm_method score normalization method
-    #   :by\_min\_max, score scaled to [0, 1]
-    #   :by\_zscore, score converted to zscore
+    #   - :by\_min  # use min score
+    #   - :by\_max  # use max score
+    #   - :by\_ave  # use ave score
+    #   - :by\_sum  # use sum score
+    # @param [Symbol] norm_method score normalization method
+    #   - :none          # use score as is
+    #   - :by\_min\_max  # score scaled to [0, 1]
+    #   - :by\_zscore    # score converted to zscore
     #
-    # @note scores from different algos are usually incompatible with
-    #   each other, so we need to normalize it first
+    # @note scores from different feature selectors are often incompatible
+    #   with each other, so we need to normalize them first
     #
     def ensemble_by_score(ensem_method=:by_max, norm_method=:by_zscore)
-      if not [:by_min, :by_max, :by_ave].include? ensem_method
-        abort "[#{__FILE__}@#{__LINE__}]: "+
-              "only :by_min, :by_max and :by_ave are supported ensemble methods!"
+      if not [:by_min, :by_max, :by_ave, :by_sum].include? ensem_method
+        abort "[#{__FILE__}@#{__LINE__}]: \n"+
+              "  only :by_min, :by_max and :by_ave are supported ensemble methods!"
       end
-      if not [:by_min_max, :by_zscore].include? norm_method
-        abort "[#{__FILE__}@#{__LINE__}]: "+
-              "only :by_min_max and :by_zscore are supported normalization methods!"
+      if not [:none, :by_min_max, :by_zscore].include? norm_method
+        abort "[#{__FILE__}@#{__LINE__}]: \n"+
+              "  only :none, :by_min_max and :by_zscore are supported normalization methods!"
       end
-      # normalization
-      @algos.each do |r|
-        self.send(norm_method, r)
-      end
+      # get score from each feature selector in the ensemble
+      ensem_scores = get_ensemble_scores
+      # normalization (if needed)
+      self.send(norm_method, ensem_scores) if not norm_method == :none
-      @scores = {}
+      scores = {}
       each_feature do |f|
-        @scores[f] = {}
-        # score from individual algo
-        score_arr = @algos.collect { |r| r.get_feature_scores[f][:BEST] }
+        scores[f] = {}
+        # feature score from individual feature selector
+        score_arr = ensem_scores.collect { |es| es[f][:BEST] }
         # ensemble score
-        @scores[f][:BEST] = self.send(ensem_method, score_arr)
+        scores[f][:BEST] = self.send(ensem_method, score_arr)
       end
+      #pp scores
+      @scores = scores
     end
     #
-    # ensemble ranks are made from those of individual algorithms
+    # ensemble ranks are made from that of individual feature selector
     #
     # @param [Symbol] ensem_method how the ensemble rank should
-    #   be derived from those of individual algorithms
+    #   be derived from those of individual feature selector
     #   allowed values are:
-    #   - :by\_min # use min rank
-    #   - :by\_max # use max rank
-    #   - :by\_ave # use ave rank
+    #   - :by\_min  # use min rank
+    #   - :by\_max  # use max rank
+    #   - :by\_ave  # use ave rank
+    #   - :by\_sum  # use sum rank
     #
-    def ensemble_by_rank(ensem_method=:by_min)
-      if not [:by_min, :by_max, :by_ave].include? ensem_method
-        abort "[#{__FILE__}@#{__LINE__}]: "+
-              "only :by_min, :by_max and :by_ave are supported ensemble methods!"
+    def ensemble_by_rank(ensem_method=:by_sum)
+      if not [:by_min, :by_max, :by_ave, :by_sum].include? ensem_method
+        abort "[#{__FILE__}@#{__LINE__}]: \n"+
+              "  only :by_min, :by_max and :by_ave are supported ensemble methods!"
       end
+      # get ranks from individual feature selector in ensemble
+      ensem_ranks = get_ensemble_ranks
       ranks = {}
       each_feature do |f|
-        # score from individual algo
-        rank_arr = @algos.collect { |r| r.get_feature_ranks[f] }
+        # feature rank from individual feature selector
+        rank_arr = ensem_ranks.collect { |er| er[f] }
         # ensemble rank
         ranks[f] = self.send(ensem_method, rank_arr)
       end
+      #pp ranks
       new_ranks = {}
       sorted_features = ranks.keys.sort do |x, y|
@@ -156,29 +152,7 @@ module FSelector
       @ranks = new_ranks
     end
-    private
-    #
-    # reload get\_feature\_subset() for Ensemble
-    #
-    # select a subset of consensus features selected by multiple algos
-    #
-    # @note the subset of features are based on the consensus features
-    #   selected by multiple algos. This is suitable only for the type
-    #   of subset selection algorithms
-    #
-    def get_feature_subset
-      subset = get_features.dup
-      @algos.each do |r|
-        # note we call a private method here
-        r_subset = r.send(:get_feature_subset)
-        subset = subset & r_subset
-      end
-      subset
-    end
+    private
     # by average value of an array
     def by_ave(arr)
@@ -197,41 +171,327 @@ module FSelector
       arr.max if arr.class == Array
     end
+    # by sum of an array
+    def by_sum(arr)
+      arr.sum if arr.class == Array
+    end
     #
-    # normalize feature scores of each individual alogrithm (r)
+    # normalize feature scores
     # by scaling to [0, 1]
     #
     # @note original scores will be altered in place
     #
-    def by_min_max(r)
-      scores = r.get_feature_scores
-      scores_best = scores.collect { |f, ks|  ks[:BEST] }
-      min, max = scores_best.min, scores_best.max
-      scores.each do |f, ks|
-        ks[:BEST] = (ks[:BEST]-min) / (max-min)
+    def by_min_max(scores)
+      scores.each do |score| # score from each feature selector
+        score_best = score.collect { |f, ks| ks[:BEST] }
+        min, max = score_best.min, score_best.max
+        score.each do |f, ks|
+          ks[:BEST] = (ks[:BEST]-min) / (max-min)
+        end
       end
     end
     #
-    # normalize feature scores of each individual alogrithm (r)
+    # normalize feature scores
     # by z-score
     #
     # @note original scores will be altered in place
     #
-    def by_zscore(r)
-      scores = r.get_feature_scores
-      scores_best = scores.collect { |f, ks|  ks[:BEST] }
-      ave, sd = scores_best.ave, scores_best.sd
+    def by_zscore(scores)
+      scores.each do |score| # score from each feature selector
+        score_best = score.collect { |f, ks| ks[:BEST] }
+        ave, sd = score_best.ave, score_best.sd
+        score.each do |f, ks|
+          ks[:BEST] = (ks[:BEST]-ave) / sd
+        end
+      end
+    end
+  end # BaseEnsemble
+  #
+  # feature selection by an ensemble of feature selectors
+  # that created by using a single feature selection algorithm
+  #
+  # for the type of feature weighting algorithms, call one of the following two
+  # functions first before calling select\_feature\_by\_score! or
+  # select\_feature\_by\_rank! for feature selection:
+  # - ensemble\_by\_score()  # ensemble scores are based on that of individual selector
+  # - ensemble\_by\_rank()   # ensemble ranks are based on that of individual selector
+  #
+  # for the type of feature subset selection algorithms, use
+  # select\_feature! for feature selection (based on feature frequency count)
+  #
+  # @note ensemble feature selectors share the same feature selection
+  #   interface as single feature selector
+  #
+  class EnsembleSingle < BaseEnsemble
+    #
+    # initialize from a single feature selection algorithm
+    #
+    # @param [Algorithm] algo feature selection algorithm
+    # @param [Integer] nselector number of feature selectors
+    # @param [Float] pdata percentage of data used by each feature selector
+    # @param [Symbol] sampling_method sampling method
+    #   - :bootstrap\_sampling  # random sampling with replacement
+    #   - :random\_sampling     # random sampling without replacement
+    #
+    # ref: [Robust Feature Selection Using Ensemble Feature Selection Techniques](http://dl.acm.org/citation.cfm?id=1432021)
+    #
+    def initialize(algo, nselector=40, pdata=0.90, sampling_method=:bootstrap_sampling, data=nil)
+      super(data)
+      @algo = algo
+      @nselector = nselector || 40
+      @pdata = pdata || 0.90
+      @sampling_method = sampling_method || :bootstrap_sampling
+      # set feature selector type
+      @algo_type = algo.algo_type
+    end
+    #
+    # get ensemble feature scores
+    #
+    # @return [Array] feature scores from all feature selectors
+    #
+    def get_ensemble_scores
+      ensem_scores = []
+      @nselector.times do
+        # sampling
+        my_data = self.send(@sampling_method)
+        # score from this feature selector
+        r = @algo
+        r.set_data(my_data)
+        ensem_scores << r.get_feature_scores
+      end
+      ensem_scores
+      #pp ensem_scores
+    end # get_feature_scores
+    #
+    # get ensemble feature ranks
+    #
+    # @return [Array] feature ranks from all feature selectors
+    #
+    def get_ensemble_ranks
+      ensem_ranks = []
-      scores.each do |f, ks|
-        ks[:BEST] = (ks[:BEST]-ave) / sd
+      @nselector.times do
+        # sampling
+        my_data = self.send(@sampling_method)
+        # rank from this feature selector
+        r = @algo
+        r.set_data(my_data)
+        ensem_ranks << r.get_feature_ranks
+      end
+      ensem_ranks
+      #pp ensem_ranks
+    end # get_ensemble_ranks
+    private
+    #
+    # override get\_feature\_subset() for EnsembleSingle,
+    # select a subset of features based on frequency count
+    #
+    # @note only the features that occur in the ensemble
+    # with above average count are selected
+    #
+    def get_feature_subset
+      f2count = Hash.new(0)
+      total_count = 0.0
+      @nselector.times do
+        # sampling
+        my_data = self.send(@sampling_method)
+        # subset from this selector
+        r = @algo
+        r.set_data(my_data)
+        # note we call a private method here
+        r_subset = r.send(:get_feature_subset)
+        # record count
+        r_subset.each do |f|
+          total_count += 1
+          f2count[f] += 1
+        end
+      end
+      #pp f2count
+      #pp total_count
+      # only the features that occur in the ensemble
+      # with above average count are selected
+      subset = f2count.keys.select { |f| f2count[f] > total_count/f2count.keys.size }
+      subset
+    end # get_feature_subset
+    # sampling with replacement
+    # @note sampling will be done stratifily in each class
+    def bootstrap_sampling
+      my_data = {}
+      each_class do |k|
+        my_data[k] = []
+        n = (get_data[k].size * @pdata).to_i
+        n.times { # with replacement
+          my_data[k] << get_data[k].sample
+        }
+      end
+      my_data
+    end # bootstrap_sampling
+    # sampling without replacement
+    # @note sampling will be done stratifily in each class
+    def random_sampling
+      my_data = {}
+      each_class do |k|
+        n = (get_data[k].size * @pdata).to_i
+        my_data[k] = get_data[k].sample(n) # without replacement
+      end
+      my_data
+    end # random_sampling
+  end # EnsembleSingle
+  #
+  # feature selection by an ensemble of feature selectors
+  # that created by using multiple algorithms of the same type
+  #
+  # for the type of feature weighting algorithms, call one of the following two
+  # functions first before calling select\_feature\_by\_score! or
+  # select\_feature\_by\_rank! for feature selection:
+  # - ensemble\_by\_score()  # ensemble scores are based on that of individual selector
+  # - ensemble\_by\_rank()   # ensemble ranks are based on that of individual selector
+  #
+  # for the type of feature subset selection algorithms, use
+  # select\_feature! for feature selection (based on feature frequency count)
+  #
+  # @note ensemble feature selectors share the same feature selection
+  #   interface as single feature selector
+  #
+  class EnsembleMultiple < BaseEnsemble
+    #
+    # initialize from multiple algorithms
+    #
+    # @param [Array] algos multiple feature selection algorithms
+    # @note different algorithms must be of the same type,
+    #   either weighting or subset selection (see {file:README.md})
+    #
+    def initialize(*algos)
+      super(nil)
+      @algos = []
+      algos.each do |r|
+        @algos << r
+      end
+      @algo_type = algos.first.algo_type
+      # all algorithms must be of the same type
+      algos.each do |r|
+        abort "[#{__FILE__}@#{__LINE__}]: \n"+
+              "  all algorithms must be of the same type" if not r.algo_type == @algo_type
       end
     end
+    #
+    # get ensemble feature scores
+    #
+    # @return [Array] feature scores from all algorithms
+    #
+    def get_ensemble_scores
+      ensem_scores = []
+      @algos.each do |r|
+        # score from this feature selector
+        r.set_data(get_data) # share same data structure
+        ensem_scores << r.get_feature_scores
+      end
+      ensem_scores
+      #pp ensem_scores
+    end # get_feature_scores
+    #
+    # get ensemble feature ranks
+    #
+    # @return [Array] feature ranks from all feature selectors
+    #
+    def get_ensemble_ranks
+      ensem_ranks = []
+      @algos.each do |r|
+        # rank from this feature selector
+        r.set_data(get_data)
+        ensem_ranks << r.get_feature_ranks
+      end
+      ensem_ranks
+      #pp ensem_ranks
+    end # get_ensemble_ranks
+    private
+    #
+    # override get\_feature\_subset() for EnsembleMultiple,
+    # select a subset of features based on frequency count
+    #
+    # @note only the features that occur in the ensemble
+    # with above average count are selected
+    #
+    def get_feature_subset
+      f2count = Hash.new(0)
+      total_count = 0.0
+      @algos.each do |r|
+        # subset from this selector
+        r.set_data(get_data)
+        # note we call a private method here
+        r_subset = r.send(:get_feature_subset)
+        # record count
+        r_subset.each do |f|
+          total_count += 1
+          f2count[f] += 1
+        end
+      end
+      #pp f2count
+      #pp total_count
+      # only the features that occur in the ensemble
+      # with above average count are selected
+      subset = f2count.keys.select { |f| f2count[f] > total_count/f2count.keys.size }
+      subset
+    end # get_feature_subset
-  end # class
+  end # EnsembleMultiple
 end # module