fselector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +21 -0
 - data/README.md +195 -0
 - data/lib/fselector.rb +41 -0
 - data/lib/fselector/algo_continuous/PMetric.rb +51 -0
 - data/lib/fselector/algo_continuous/ReliefF_c.rb +190 -0
 - data/lib/fselector/algo_continuous/Relief_c.rb +150 -0
 - data/lib/fselector/algo_continuous/TScore.rb +52 -0
 - data/lib/fselector/algo_continuous/discretizer.rb +219 -0
 - data/lib/fselector/algo_continuous/normalizer.rb +59 -0
 - data/lib/fselector/algo_discrete/Accuracy.rb +35 -0
 - data/lib/fselector/algo_discrete/AccuracyBalanced.rb +37 -0
 - data/lib/fselector/algo_discrete/BiNormalSeparation.rb +45 -0
 - data/lib/fselector/algo_discrete/ChiSquaredTest.rb +69 -0
 - data/lib/fselector/algo_discrete/CorrelationCoefficient.rb +42 -0
 - data/lib/fselector/algo_discrete/DocumentFrequency.rb +36 -0
 - data/lib/fselector/algo_discrete/F1Measure.rb +41 -0
 - data/lib/fselector/algo_discrete/FishersExactTest.rb +47 -0
 - data/lib/fselector/algo_discrete/GMean.rb +37 -0
 - data/lib/fselector/algo_discrete/GSSCoefficient.rb +43 -0
 - data/lib/fselector/algo_discrete/GiniIndex.rb +44 -0
 - data/lib/fselector/algo_discrete/InformationGain.rb +96 -0
 - data/lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb +45 -0
 - data/lib/fselector/algo_discrete/McNemarsTest.rb +57 -0
 - data/lib/fselector/algo_discrete/MutualInformation.rb +42 -0
 - data/lib/fselector/algo_discrete/OddsRatio.rb +46 -0
 - data/lib/fselector/algo_discrete/OddsRatioNumerator.rb +41 -0
 - data/lib/fselector/algo_discrete/Power.rb +46 -0
 - data/lib/fselector/algo_discrete/Precision.rb +31 -0
 - data/lib/fselector/algo_discrete/ProbabilityRatio.rb +41 -0
 - data/lib/fselector/algo_discrete/Random.rb +40 -0
 - data/lib/fselector/algo_discrete/ReliefF_d.rb +173 -0
 - data/lib/fselector/algo_discrete/Relief_d.rb +135 -0
 - data/lib/fselector/algo_discrete/Sensitivity.rb +38 -0
 - data/lib/fselector/algo_discrete/Specificity.rb +35 -0
 - data/lib/fselector/base.rb +322 -0
 - data/lib/fselector/base_continuous.rb +25 -0
 - data/lib/fselector/base_discrete.rb +355 -0
 - data/lib/fselector/ensemble.rb +181 -0
 - data/lib/fselector/fileio.rb +455 -0
 - data/lib/fselector/util.rb +707 -0
 - metadata +86 -0
 
| 
         @@ -0,0 +1,25 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require File.expand_path(File.dirname(__FILE__) + '/algo_continuous/normalizer.rb')
         
     | 
| 
      
 2 
     | 
    
         
            +
            require File.expand_path(File.dirname(__FILE__) + '/algo_continuous/discretizer.rb')
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # FSelector: a Ruby gem for feature selection and ranking
         
     | 
| 
      
 5 
     | 
    
         
            +
            #
         
     | 
| 
      
 6 
     | 
    
         
            +
            module FSelector
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            # base ranking algorithm for handling continous feature
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
      
 10 
     | 
    
         
            +
              class BaseContinuous < Base
         
     | 
| 
      
 11 
     | 
    
         
            +
                # include normalizer
         
     | 
| 
      
 12 
     | 
    
         
            +
                include Normalizer
         
     | 
| 
      
 13 
     | 
    
         
            +
                # include discretilizer
         
     | 
| 
      
 14 
     | 
    
         
            +
                include Discretilizer
         
     | 
| 
      
 15 
     | 
    
         
            +
                
         
     | 
| 
      
 16 
     | 
    
         
            +
                # initialize from an existing data structure
         
     | 
| 
      
 17 
     | 
    
         
            +
                def initialize(data=nil)
         
     | 
| 
      
 18 
     | 
    
         
            +
                  super(data)
         
     | 
| 
      
 19 
     | 
    
         
            +
                end
         
     | 
| 
      
 20 
     | 
    
         
            +
                
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              end # class
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            end # module
         
     | 
| 
         @@ -0,0 +1,355 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #
         
     | 
| 
      
 2 
     | 
    
         
            +
            # FSelector: a Ruby gem for feature selection and ranking
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            module FSelector
         
     | 
| 
      
 5 
     | 
    
         
            +
            #
         
     | 
| 
      
 6 
     | 
    
         
            +
            #  base ranking alogrithm for handling discrete feature
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            #     2 x 2 contingency table
         
     | 
| 
      
 9 
     | 
    
         
            +
            #     
         
     | 
| 
      
 10 
     | 
    
         
            +
            #           c   c'
         
     | 
| 
      
 11 
     | 
    
         
            +
            #         ---------
         
     | 
| 
      
 12 
     | 
    
         
            +
            #      f  | A | B | A+B
         
     | 
| 
      
 13 
     | 
    
         
            +
            #         |---|---| 
         
     | 
| 
      
 14 
     | 
    
         
            +
            #      f' | C | D | C+D
         
     | 
| 
      
 15 
     | 
    
         
            +
            #         ---------
         
     | 
| 
      
 16 
     | 
    
         
            +
            #          A+C B+D  N = A+B+C+D
         
     | 
| 
      
 17 
     | 
    
         
            +
            #     
         
     | 
| 
      
 18 
     | 
    
         
            +
            #      P(f)     = (A+B)/N
         
     | 
| 
      
 19 
     | 
    
         
            +
            #      P(f')    = (C+D)/N
         
     | 
| 
      
 20 
     | 
    
         
            +
            #      P(c)     = (A+C)/N
         
     | 
| 
      
 21 
     | 
    
         
            +
            #      P(c')    = (B+D)/N
         
     | 
| 
      
 22 
     | 
    
         
            +
            #      P(f,c)   = A/N
         
     | 
| 
      
 23 
     | 
    
         
            +
            #      P(f,c')  = B/N
         
     | 
| 
      
 24 
     | 
    
         
            +
            #      P(f',c)  = C/N
         
     | 
| 
      
 25 
     | 
    
         
            +
            #      P(f',c') = D/N
         
     | 
| 
      
 26 
     | 
    
         
            +
            #
         
     | 
| 
      
 27 
     | 
    
         
            +
              class BaseDiscrete < Base
         
     | 
| 
      
 28 
     | 
    
         
            +
                # initialize from an existing data structure
         
     | 
| 
      
 29 
     | 
    
         
            +
                def initialize(data=nil)
         
     | 
| 
      
 30 
     | 
    
         
            +
                  super(data)
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                private
         
     | 
| 
      
 34 
     | 
    
         
            +
                
         
     | 
| 
      
 35 
     | 
    
         
            +
                # count of sample (i.e. 'A' or CT00) that
         
     | 
| 
      
 36 
     | 
    
         
            +
                # contains feature (f = v) and belongs to class (k)
         
     | 
| 
      
 37 
     | 
    
         
            +
                def get_Av(f, k, v)
         
     | 
| 
      
 38 
     | 
    
         
            +
                  @Av ||= calc_Av
         
     | 
| 
      
 39 
     | 
    
         
            +
                  a = @Av[k][f][v]
         
     | 
| 
      
 40 
     | 
    
         
            +
                  
         
     | 
| 
      
 41 
     | 
    
         
            +
                  # add 0.5 to avoid any ZERO in denominator or numerator
         
     | 
| 
      
 42 
     | 
    
         
            +
                  #a+=0.5 if a.zero?
         
     | 
| 
      
 43 
     | 
    
         
            +
                  
         
     | 
| 
      
 44 
     | 
    
         
            +
                  a
         
     | 
| 
      
 45 
     | 
    
         
            +
                end
         
     | 
| 
      
 46 
     | 
    
         
            +
                
         
     | 
| 
      
 47 
     | 
    
         
            +
                
         
     | 
| 
      
 48 
     | 
    
         
            +
                # pre-compute 'A' or CT00
         
     | 
| 
      
 49 
     | 
    
         
            +
                # feature (f) has categorical values
         
     | 
| 
      
 50 
     | 
    
         
            +
                def calc_Av
         
     | 
| 
      
 51 
     | 
    
         
            +
                  results = {}
         
     | 
| 
      
 52 
     | 
    
         
            +
                  
         
     | 
| 
      
 53 
     | 
    
         
            +
                  each_class do |k1|
         
     | 
| 
      
 54 
     | 
    
         
            +
                    results[k1] = {}
         
     | 
| 
      
 55 
     | 
    
         
            +
                    
         
     | 
| 
      
 56 
     | 
    
         
            +
                    each_feature do |f|
         
     | 
| 
      
 57 
     | 
    
         
            +
                      results[k1][f] = {}
         
     | 
| 
      
 58 
     | 
    
         
            +
                      
         
     | 
| 
      
 59 
     | 
    
         
            +
                      get_feature_values(f).each do |v|
         
     | 
| 
      
 60 
     | 
    
         
            +
                        count = 0.0
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                        each_sample do |k2, s|
         
     | 
| 
      
 63 
     | 
    
         
            +
                          if k2 == k1
         
     | 
| 
      
 64 
     | 
    
         
            +
                            count += 1 if s.has_key? f and s[f] == v
         
     | 
| 
      
 65 
     | 
    
         
            +
                          end
         
     | 
| 
      
 66 
     | 
    
         
            +
                        end
         
     | 
| 
      
 67 
     | 
    
         
            +
                        
         
     | 
| 
      
 68 
     | 
    
         
            +
                        results[k1][f][v] = count
         
     | 
| 
      
 69 
     | 
    
         
            +
                      end
         
     | 
| 
      
 70 
     | 
    
         
            +
                    end
         
     | 
| 
      
 71 
     | 
    
         
            +
                  end
         
     | 
| 
      
 72 
     | 
    
         
            +
                  
         
     | 
| 
      
 73 
     | 
    
         
            +
                  results
         
     | 
| 
      
 74 
     | 
    
         
            +
                  
         
     | 
| 
      
 75 
     | 
    
         
            +
                end
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
                
         
     | 
| 
      
 78 
     | 
    
         
            +
                # count of sample (i.e. 'B' or CT01) that
         
     | 
| 
      
 79 
     | 
    
         
            +
                # contains feature (f = v) but does not belong to class (k)
         
     | 
| 
      
 80 
     | 
    
         
            +
                def get_Bv(f, k, v)
         
     | 
| 
      
 81 
     | 
    
         
            +
                  @Bv ||= calc_Bv
         
     | 
| 
      
 82 
     | 
    
         
            +
                  b = @Bv[k][f][v]
         
     | 
| 
      
 83 
     | 
    
         
            +
                        
         
     | 
| 
      
 84 
     | 
    
         
            +
                  # add 0.5 to avoid any ZERO in denominator or numerator
         
     | 
| 
      
 85 
     | 
    
         
            +
                  #b+=0.5 if b.zero?
         
     | 
| 
      
 86 
     | 
    
         
            +
                  
         
     | 
| 
      
 87 
     | 
    
         
            +
                  b
         
     | 
| 
      
 88 
     | 
    
         
            +
                end
         
     | 
| 
      
 89 
     | 
    
         
            +
                
         
     | 
| 
      
 90 
     | 
    
         
            +
                
         
     | 
| 
      
 91 
     | 
    
         
            +
                # pre-compute 'B' or CT01
         
     | 
| 
      
 92 
     | 
    
         
            +
                # feature (f) has categorical values
         
     | 
| 
      
 93 
     | 
    
         
            +
                def calc_Bv
         
     | 
| 
      
 94 
     | 
    
         
            +
                  results = {}
         
     | 
| 
      
 95 
     | 
    
         
            +
                  
         
     | 
| 
      
 96 
     | 
    
         
            +
                  each_class do |k1|
         
     | 
| 
      
 97 
     | 
    
         
            +
                    results[k1] = {}
         
     | 
| 
      
 98 
     | 
    
         
            +
                    
         
     | 
| 
      
 99 
     | 
    
         
            +
                    each_feature do |f|
         
     | 
| 
      
 100 
     | 
    
         
            +
                      results[k1][f] = {}
         
     | 
| 
      
 101 
     | 
    
         
            +
                      
         
     | 
| 
      
 102 
     | 
    
         
            +
                      get_feature_values(f).each do |v|
         
     | 
| 
      
 103 
     | 
    
         
            +
                        count = 0.0
         
     | 
| 
      
 104 
     | 
    
         
            +
                        
         
     | 
| 
      
 105 
     | 
    
         
            +
                        each_sample do |k2, s|
         
     | 
| 
      
 106 
     | 
    
         
            +
                          if k2 != k1
         
     | 
| 
      
 107 
     | 
    
         
            +
                            count += 1 if s.has_key? f and s[f] == v
         
     | 
| 
      
 108 
     | 
    
         
            +
                          end
         
     | 
| 
      
 109 
     | 
    
         
            +
                        end
         
     | 
| 
      
 110 
     | 
    
         
            +
                      
         
     | 
| 
      
 111 
     | 
    
         
            +
                        results[k1][f][v] = count
         
     | 
| 
      
 112 
     | 
    
         
            +
                      end
         
     | 
| 
      
 113 
     | 
    
         
            +
                    end
         
     | 
| 
      
 114 
     | 
    
         
            +
                  end
         
     | 
| 
      
 115 
     | 
    
         
            +
                  
         
     | 
| 
      
 116 
     | 
    
         
            +
                  results
         
     | 
| 
      
 117 
     | 
    
         
            +
                end
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
                # count of sample (i.e. 'C' or CT10) that
         
     | 
| 
      
 121 
     | 
    
         
            +
                # does not contain feature (f != v) but belongs to class (k)
         
     | 
| 
      
 122 
     | 
    
         
            +
                def get_Cv(f, k, v)
         
     | 
| 
      
 123 
     | 
    
         
            +
                  @Cv ||= calc_Cv
         
     | 
| 
      
 124 
     | 
    
         
            +
                  c = @Cv[k][f][v]
         
     | 
| 
      
 125 
     | 
    
         
            +
                  
         
     | 
| 
      
 126 
     | 
    
         
            +
                  # add 0.5 to avoid any ZERO in denominator or numerator
         
     | 
| 
      
 127 
     | 
    
         
            +
                  #c+=0.5 if c.zero?
         
     | 
| 
      
 128 
     | 
    
         
            +
                  
         
     | 
| 
      
 129 
     | 
    
         
            +
                  c
         
     | 
| 
      
 130 
     | 
    
         
            +
                end
         
     | 
| 
      
 131 
     | 
    
         
            +
                
         
     | 
| 
      
 132 
     | 
    
         
            +
              
         
     | 
| 
      
 133 
     | 
    
         
            +
                # pre-compute 'C' or CT10
         
     | 
| 
      
 134 
     | 
    
         
            +
                # feature (f) has categorical values
         
     | 
| 
      
 135 
     | 
    
         
            +
                def calc_Cv
         
     | 
| 
      
 136 
     | 
    
         
            +
                  results = {}
         
     | 
| 
      
 137 
     | 
    
         
            +
                  
         
     | 
| 
      
 138 
     | 
    
         
            +
                  each_class do |k1|
         
     | 
| 
      
 139 
     | 
    
         
            +
                    results[k1] = {}
         
     | 
| 
      
 140 
     | 
    
         
            +
                    
         
     | 
| 
      
 141 
     | 
    
         
            +
                    each_feature do |f|
         
     | 
| 
      
 142 
     | 
    
         
            +
                      results[k1][f] = {}
         
     | 
| 
      
 143 
     | 
    
         
            +
                      
         
     | 
| 
      
 144 
     | 
    
         
            +
                      get_feature_values(f).each do |v|
         
     | 
| 
      
 145 
     | 
    
         
            +
                        count = 0.0
         
     | 
| 
      
 146 
     | 
    
         
            +
                      
         
     | 
| 
      
 147 
     | 
    
         
            +
                        each_sample do |k2, s|
         
     | 
| 
      
 148 
     | 
    
         
            +
                          if k2 == k1
         
     | 
| 
      
 149 
     | 
    
         
            +
                            count += 1 if not s.has_key? f or s[f] != v
         
     | 
| 
      
 150 
     | 
    
         
            +
                          end
         
     | 
| 
      
 151 
     | 
    
         
            +
                        end
         
     | 
| 
      
 152 
     | 
    
         
            +
                      
         
     | 
| 
      
 153 
     | 
    
         
            +
                        results[k1][f][v] = count
         
     | 
| 
      
 154 
     | 
    
         
            +
                      end
         
     | 
| 
      
 155 
     | 
    
         
            +
                    end
         
     | 
| 
      
 156 
     | 
    
         
            +
                  end
         
     | 
| 
      
 157 
     | 
    
         
            +
                  
         
     | 
| 
      
 158 
     | 
    
         
            +
                  results
         
     | 
| 
      
 159 
     | 
    
         
            +
                end
         
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
                # count of sample (i.e. 'D' or CT11) that
         
     | 
| 
      
 163 
     | 
    
         
            +
                # does not contain feature (f) and does not belong to class (c)
         
     | 
| 
      
 164 
     | 
    
         
            +
                def get_Dv(f, k, v)
         
     | 
| 
      
 165 
     | 
    
         
            +
                  @Dv ||= calc_Dv
         
     | 
| 
      
 166 
     | 
    
         
            +
                  d = @Dv[k][f][v]
         
     | 
| 
      
 167 
     | 
    
         
            +
                       
         
     | 
| 
      
 168 
     | 
    
         
            +
                  # add 0.5 to avoid any ZERO in denominator or numerator
         
     | 
| 
      
 169 
     | 
    
         
            +
                  #d+=0.5 if d.zero?
         
     | 
| 
      
 170 
     | 
    
         
            +
                  
         
     | 
| 
      
 171 
     | 
    
         
            +
                  d
         
     | 
| 
      
 172 
     | 
    
         
            +
                end
         
     | 
| 
      
 173 
     | 
    
         
            +
             
     | 
| 
      
 174 
     | 
    
         
            +
                
         
     | 
| 
      
 175 
     | 
    
         
            +
                # pre-compute 'D' or CT11
         
     | 
| 
      
 176 
     | 
    
         
            +
                # feature (f) has categorical values
         
     | 
| 
      
 177 
     | 
    
         
            +
                def calc_Dv
         
     | 
| 
      
 178 
     | 
    
         
            +
                  results = {}
         
     | 
| 
      
 179 
     | 
    
         
            +
                  
         
     | 
| 
      
 180 
     | 
    
         
            +
                  each_class do |k1|
         
     | 
| 
      
 181 
     | 
    
         
            +
                    results[k1] = {}
         
     | 
| 
      
 182 
     | 
    
         
            +
                    
         
     | 
| 
      
 183 
     | 
    
         
            +
                    each_feature do |f|
         
     | 
| 
      
 184 
     | 
    
         
            +
                      results[k1][f] = {}
         
     | 
| 
      
 185 
     | 
    
         
            +
                      
         
     | 
| 
      
 186 
     | 
    
         
            +
                      get_feature_values(f).each do |v|
         
     | 
| 
      
 187 
     | 
    
         
            +
                        count = 0.0
         
     | 
| 
      
 188 
     | 
    
         
            +
                      
         
     | 
| 
      
 189 
     | 
    
         
            +
                        each_sample do |k2, s|
         
     | 
| 
      
 190 
     | 
    
         
            +
                          if k2 != k1
         
     | 
| 
      
 191 
     | 
    
         
            +
                            count += 1 if not s.has_key? f or s[f] != v
         
     | 
| 
      
 192 
     | 
    
         
            +
                          end
         
     | 
| 
      
 193 
     | 
    
         
            +
                        end
         
     | 
| 
      
 194 
     | 
    
         
            +
                      
         
     | 
| 
      
 195 
     | 
    
         
            +
                        results[k1][f][v] = count
         
     | 
| 
      
 196 
     | 
    
         
            +
                      end
         
     | 
| 
      
 197 
     | 
    
         
            +
                    end
         
     | 
| 
      
 198 
     | 
    
         
            +
                  end
         
     | 
| 
      
 199 
     | 
    
         
            +
                  
         
     | 
| 
      
 200 
     | 
    
         
            +
                  results
         
     | 
| 
      
 201 
     | 
    
         
            +
                end
         
     | 
| 
      
 202 
     | 
    
         
            +
                
         
     | 
| 
      
 203 
     | 
    
         
            +
                
         
     | 
| 
      
 204 
     | 
    
         
            +
                # count of sample (i.e. 'A') that
         
     | 
| 
      
 205 
     | 
    
         
            +
                # contains feature (f) and belongs to class (k)
         
     | 
| 
      
 206 
     | 
    
         
            +
                def get_A(f, k)
         
     | 
| 
      
 207 
     | 
    
         
            +
                  @A ||= calc_A
         
     | 
| 
      
 208 
     | 
    
         
            +
                  a = @A[k][f]
         
     | 
| 
      
 209 
     | 
    
         
            +
                  
         
     | 
| 
      
 210 
     | 
    
         
            +
                  # add 0.5 to avoid any ZERO in denominator or numerator
         
     | 
| 
      
 211 
     | 
    
         
            +
                  a+=0.5 if a.zero?
         
     | 
| 
      
 212 
     | 
    
         
            +
                  
         
     | 
| 
      
 213 
     | 
    
         
            +
                  a 
         
     | 
| 
      
 214 
     | 
    
         
            +
                end
         
     | 
| 
      
 215 
     | 
    
         
            +
             
     | 
| 
      
 216 
     | 
    
         
            +
                
         
     | 
| 
      
 217 
     | 
    
         
            +
                # pre-compute 'A'
         
     | 
| 
      
 218 
     | 
    
         
            +
                def calc_A
         
     | 
| 
      
 219 
     | 
    
         
            +
                  results = {}
         
     | 
| 
      
 220 
     | 
    
         
            +
                  
         
     | 
| 
      
 221 
     | 
    
         
            +
                  each_class do |k1|
         
     | 
| 
      
 222 
     | 
    
         
            +
                    results[k1] = {}
         
     | 
| 
      
 223 
     | 
    
         
            +
                    
         
     | 
| 
      
 224 
     | 
    
         
            +
                    each_feature do |f|
         
     | 
| 
      
 225 
     | 
    
         
            +
                      count = 0.0
         
     | 
| 
      
 226 
     | 
    
         
            +
                      
         
     | 
| 
      
 227 
     | 
    
         
            +
                      each_sample do |k2, s|
         
     | 
| 
      
 228 
     | 
    
         
            +
                        if k2 == k1
         
     | 
| 
      
 229 
     | 
    
         
            +
                          count += 1 if s.has_key? f
         
     | 
| 
      
 230 
     | 
    
         
            +
                        end
         
     | 
| 
      
 231 
     | 
    
         
            +
                      end
         
     | 
| 
      
 232 
     | 
    
         
            +
                      
         
     | 
| 
      
 233 
     | 
    
         
            +
                      results[k1][f] = count
         
     | 
| 
      
 234 
     | 
    
         
            +
                    end
         
     | 
| 
      
 235 
     | 
    
         
            +
                  end
         
     | 
| 
      
 236 
     | 
    
         
            +
                  
         
     | 
| 
      
 237 
     | 
    
         
            +
                  results
         
     | 
| 
      
 238 
     | 
    
         
            +
                end
         
     | 
| 
      
 239 
     | 
    
         
            +
                
         
     | 
| 
      
 240 
     | 
    
         
            +
              
         
     | 
| 
      
 241 
     | 
    
         
            +
                # count of sample (i.e. 'B') that
         
     | 
| 
      
 242 
     | 
    
         
            +
                # contains feature (f) but does not belong to class (k)
         
     | 
| 
      
 243 
     | 
    
         
            +
                def get_B(f, k)
         
     | 
| 
      
 244 
     | 
    
         
            +
                  @B ||= calc_B
         
     | 
| 
      
 245 
     | 
    
         
            +
                  b = @B[k][f]
         
     | 
| 
      
 246 
     | 
    
         
            +
                      
         
     | 
| 
      
 247 
     | 
    
         
            +
                  # add 0.5 to avoid any ZERO in denominator or numerator
         
     | 
| 
      
 248 
     | 
    
         
            +
                  b+=0.5 if b.zero?
         
     | 
| 
      
 249 
     | 
    
         
            +
                  
         
     | 
| 
      
 250 
     | 
    
         
            +
                  b
         
     | 
| 
      
 251 
     | 
    
         
            +
                end
         
     | 
| 
      
 252 
     | 
    
         
            +
                
         
     | 
| 
      
 253 
     | 
    
         
            +
                
         
     | 
| 
      
 254 
     | 
    
         
            +
                # pre-compute 'B'
         
     | 
| 
      
 255 
     | 
    
         
            +
                def calc_B
         
     | 
| 
      
 256 
     | 
    
         
            +
                  results = {}
         
     | 
| 
      
 257 
     | 
    
         
            +
                  
         
     | 
| 
      
 258 
     | 
    
         
            +
                  each_class do |k1|
         
     | 
| 
      
 259 
     | 
    
         
            +
                    results[k1] = {}
         
     | 
| 
      
 260 
     | 
    
         
            +
                    
         
     | 
| 
      
 261 
     | 
    
         
            +
                    each_feature do |f|
         
     | 
| 
      
 262 
     | 
    
         
            +
                      count = 0.0
         
     | 
| 
      
 263 
     | 
    
         
            +
                      
         
     | 
| 
      
 264 
     | 
    
         
            +
                      each_sample do |k2, s|
         
     | 
| 
      
 265 
     | 
    
         
            +
                        if k2 != k1
         
     | 
| 
      
 266 
     | 
    
         
            +
                          count += 1 if s.has_key? f
         
     | 
| 
      
 267 
     | 
    
         
            +
                        end
         
     | 
| 
      
 268 
     | 
    
         
            +
                      end
         
     | 
| 
      
 269 
     | 
    
         
            +
                      
         
     | 
| 
      
 270 
     | 
    
         
            +
                      results[k1][f] = count
         
     | 
| 
      
 271 
     | 
    
         
            +
                    end
         
     | 
| 
      
 272 
     | 
    
         
            +
                  end
         
     | 
| 
      
 273 
     | 
    
         
            +
                  
         
     | 
| 
      
 274 
     | 
    
         
            +
                  results
         
     | 
| 
      
 275 
     | 
    
         
            +
                end
         
     | 
| 
      
 276 
     | 
    
         
            +
                
         
     | 
| 
      
 277 
     | 
    
         
            +
                
         
     | 
| 
      
 278 
     | 
    
         
            +
                # count of sample (i.e. 'C') that
         
     | 
| 
      
 279 
     | 
    
         
            +
                # does not contain feature (f) but belongs to class (k)
         
     | 
| 
      
 280 
     | 
    
         
            +
                def get_C(f, k)
         
     | 
| 
      
 281 
     | 
    
         
            +
                  @C ||= calc_C
         
     | 
| 
      
 282 
     | 
    
         
            +
                  c = @C[k][f]
         
     | 
| 
      
 283 
     | 
    
         
            +
                  
         
     | 
| 
      
 284 
     | 
    
         
            +
                  # add 0.5 to avoid any ZERO in denominator or numerator
         
     | 
| 
      
 285 
     | 
    
         
            +
                  c+=0.5 if c.zero?
         
     | 
| 
      
 286 
     | 
    
         
            +
                  
         
     | 
| 
      
 287 
     | 
    
         
            +
                  c
         
     | 
| 
      
 288 
     | 
    
         
            +
                end
         
     | 
| 
      
 289 
     | 
    
         
            +
                
         
     | 
| 
      
 290 
     | 
    
         
            +
              
         
     | 
| 
      
 291 
     | 
    
         
            +
                # pre-compute 'C'
         
     | 
| 
      
 292 
     | 
    
         
            +
                def calc_C
         
     | 
| 
      
 293 
     | 
    
         
            +
                  results = {}
         
     | 
| 
      
 294 
     | 
    
         
            +
                  
         
     | 
| 
      
 295 
     | 
    
         
            +
                  each_class do |k1|
         
     | 
| 
      
 296 
     | 
    
         
            +
                    results[k1] = {}
         
     | 
| 
      
 297 
     | 
    
         
            +
                    
         
     | 
| 
      
 298 
     | 
    
         
            +
                    each_feature do |f|
         
     | 
| 
      
 299 
     | 
    
         
            +
                      count = 0.0
         
     | 
| 
      
 300 
     | 
    
         
            +
                      
         
     | 
| 
      
 301 
     | 
    
         
            +
                      each_sample do |k2, s|
         
     | 
| 
      
 302 
     | 
    
         
            +
                        if k2 == k1
         
     | 
| 
      
 303 
     | 
    
         
            +
                          count += 1 if not s.has_key? f
         
     | 
| 
      
 304 
     | 
    
         
            +
                        end
         
     | 
| 
      
 305 
     | 
    
         
            +
                      end
         
     | 
| 
      
 306 
     | 
    
         
            +
                      
         
     | 
| 
      
 307 
     | 
    
         
            +
                      results[k1][f] = count
         
     | 
| 
      
 308 
     | 
    
         
            +
                    end
         
     | 
| 
      
 309 
     | 
    
         
            +
                  end
         
     | 
| 
      
 310 
     | 
    
         
            +
                  
         
     | 
| 
      
 311 
     | 
    
         
            +
                  results
         
     | 
| 
      
 312 
     | 
    
         
            +
                end
         
     | 
| 
      
 313 
     | 
    
         
            +
                
         
     | 
| 
      
 314 
     | 
    
         
            +
                
         
     | 
| 
      
 315 
     | 
    
         
            +
                # count of sample (i.e. 'D') that
         
     | 
| 
      
 316 
     | 
    
         
            +
                # does not contain feature (f) and does not belong to class (c)
         
     | 
| 
      
 317 
     | 
    
         
            +
                def get_D(f, k)
         
     | 
| 
      
 318 
     | 
    
         
            +
                  @D ||= calc_D
         
     | 
| 
      
 319 
     | 
    
         
            +
                  d = @D[k][f]
         
     | 
| 
      
 320 
     | 
    
         
            +
                  
         
     | 
| 
      
 321 
     | 
    
         
            +
                  # add 0.5 to avoid any ZERO in denominator or numerator
         
     | 
| 
      
 322 
     | 
    
         
            +
                  d+=0.5 if d.zero?
         
     | 
| 
      
 323 
     | 
    
         
            +
                  
         
     | 
| 
      
 324 
     | 
    
         
            +
                  d
         
     | 
| 
      
 325 
     | 
    
         
            +
                end
         
     | 
| 
      
 326 
     | 
    
         
            +
             
     | 
| 
      
 327 
     | 
    
         
            +
                
         
     | 
| 
      
 328 
     | 
    
         
            +
                # pre-compute 'D'
         
     | 
| 
      
 329 
     | 
    
         
            +
                def calc_D
         
     | 
| 
      
 330 
     | 
    
         
            +
                  results = {}
         
     | 
| 
      
 331 
     | 
    
         
            +
                  
         
     | 
| 
      
 332 
     | 
    
         
            +
                  each_class do |k1|
         
     | 
| 
      
 333 
     | 
    
         
            +
                    results[k1] = {}
         
     | 
| 
      
 334 
     | 
    
         
            +
                    
         
     | 
| 
      
 335 
     | 
    
         
            +
                    each_feature do |f|
         
     | 
| 
      
 336 
     | 
    
         
            +
                      count = 0.0
         
     | 
| 
      
 337 
     | 
    
         
            +
                      
         
     | 
| 
      
 338 
     | 
    
         
            +
                      each_sample do |k2, s|
         
     | 
| 
      
 339 
     | 
    
         
            +
                        if k2 != k1
         
     | 
| 
      
 340 
     | 
    
         
            +
                          count += 1 if not s.has_key? f
         
     | 
| 
      
 341 
     | 
    
         
            +
                        end
         
     | 
| 
      
 342 
     | 
    
         
            +
                      end
         
     | 
| 
      
 343 
     | 
    
         
            +
                      
         
     | 
| 
      
 344 
     | 
    
         
            +
                      results[k1][f] = count
         
     | 
| 
      
 345 
     | 
    
         
            +
                    end
         
     | 
| 
      
 346 
     | 
    
         
            +
                  end
         
     | 
| 
      
 347 
     | 
    
         
            +
                  
         
     | 
| 
      
 348 
     | 
    
         
            +
                  results
         
     | 
| 
      
 349 
     | 
    
         
            +
                end
         
     | 
| 
      
 350 
     | 
    
         
            +
                
         
     | 
| 
      
 351 
     | 
    
         
            +
                
         
     | 
| 
      
 352 
     | 
    
         
            +
              end # class
         
     | 
| 
      
 353 
     | 
    
         
            +
             
     | 
| 
      
 354 
     | 
    
         
            +
             
     | 
| 
      
 355 
     | 
    
         
            +
            end # module
         
     | 
| 
         @@ -0,0 +1,181 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #
         
     | 
| 
      
 2 
     | 
    
         
            +
            # FSelector: a Ruby gem for feature selection and ranking
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            module FSelector
         
     | 
| 
      
 5 
     | 
    
         
            +
              # select feature by an ensemble of ranking algorithms
         
     | 
| 
      
 6 
     | 
    
         
            +
              class Ensemble < Base
         
     | 
| 
      
 7 
     | 
    
         
            +
                # new()
         
     | 
| 
      
 8 
     | 
    
         
            +
                #
         
     | 
| 
      
 9 
     | 
    
         
            +
                # @param [Array] rankers multiple feature ranking algorithms
         
     | 
| 
      
 10 
     | 
    
         
            +
                def initialize(*algos)
         
     | 
| 
      
 11 
     | 
    
         
            +
                  super(nil)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @algos = []
         
     | 
| 
      
 14 
     | 
    
         
            +
                  algos.each do |r|
         
     | 
| 
      
 15 
     | 
    
         
            +
                    @algos << r
         
     | 
| 
      
 16 
     | 
    
         
            +
                  end
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
                
         
     | 
| 
      
 19 
     | 
    
         
            +
                
         
     | 
| 
      
 20 
     | 
    
         
            +
                #
         
     | 
| 
      
 21 
     | 
    
         
            +
                # reload set\_data
         
     | 
| 
      
 22 
     | 
    
         
            +
                #
         
     | 
| 
      
 23 
     | 
    
         
            +
                # @note all algos share the same data structure
         
     | 
| 
      
 24 
     | 
    
         
            +
                #
         
     | 
| 
      
 25 
     | 
    
         
            +
                def set_data(data)
         
     | 
| 
      
 26 
     | 
    
         
            +
                  super
         
     | 
| 
      
 27 
     | 
    
         
            +
                  
         
     | 
| 
      
 28 
     | 
    
         
            +
                  @algos.each do |r|
         
     | 
| 
      
 29 
     | 
    
         
            +
                    r.set_data(data)
         
     | 
| 
      
 30 
     | 
    
         
            +
                  end
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
                
         
     | 
| 
      
 33 
     | 
    
         
            +
                
         
     | 
| 
      
 34 
     | 
    
         
            +
                #
         
     | 
| 
      
 35 
     | 
    
         
            +
                # reload get\_feature\_scores
         
     | 
| 
      
 36 
     | 
    
         
            +
                #
         
     | 
| 
      
 37 
     | 
    
         
            +
                def get_feature_scores
         
     | 
| 
      
 38 
     | 
    
         
            +
                  return @scores if @scores
         
     | 
| 
      
 39 
     | 
    
         
            +
                  
         
     | 
| 
      
 40 
     | 
    
         
            +
                  abort "[#{__FILE__}@#{__LINE__}]: "+
         
     | 
| 
      
 41 
     | 
    
         
            +
                          "please call one consensus scoring method first!"
         
     | 
| 
      
 42 
     | 
    
         
            +
                end
         
     | 
| 
      
 43 
     | 
    
         
            +
                
         
     | 
| 
      
 44 
     | 
    
         
            +
                
         
     | 
| 
      
 45 
     | 
    
         
            +
                #
         
     | 
| 
      
 46 
     | 
    
         
            +
                # reload get\_feature\_ranks
         
     | 
| 
      
 47 
     | 
    
         
            +
                #
         
     | 
| 
      
 48 
     | 
    
         
            +
                def get_feature_ranks
         
     | 
| 
      
 49 
     | 
    
         
            +
                  return @ranks if @ranks
         
     | 
| 
      
 50 
     | 
    
         
            +
                  
         
     | 
| 
      
 51 
     | 
    
         
            +
                  abort "[#{__FILE__}@#{__LINE__}]: "+
         
     | 
| 
      
 52 
     | 
    
         
            +
                          "please call one consensus ranking method first!"
         
     | 
| 
      
 53 
     | 
    
         
            +
                end
         
     | 
| 
      
 54 
     | 
    
         
            +
                
         
     | 
| 
      
 55 
     | 
    
         
            +
                
         
     | 
| 
      
 56 
     | 
    
         
            +
                # ensemble based on score
         
     | 
| 
      
 57 
     | 
    
         
            +
                #
         
     | 
| 
      
 58 
     | 
    
         
            +
                # @param [Method] by_what by what criterion that ensemble
         
     | 
| 
      
 59 
     | 
    
         
            +
                #   score should be obtained from those of individual algorithms  
         
     | 
| 
      
 60 
     | 
    
         
            +
                #   allowed values are:  
         
     | 
| 
      
 61 
     | 
    
         
            +
                #   receiver.method(:by\_min) # by min rank  
         
     | 
| 
      
 62 
     | 
    
         
            +
                #   receiver.method(:by\_max) # by max rank  
         
     | 
| 
      
 63 
     | 
    
         
            +
                #   receiver.method(:by\_ave) # by ave rank
         
     | 
| 
      
 64 
     | 
    
         
            +
                # @param [Integer] norm normalization  
         
     | 
| 
      
 65 
     | 
    
         
            +
                #   :min\_max, score scaled to [0, 1]  
         
     | 
| 
      
 66 
     | 
    
         
            +
                #   :zscore, score converted to zscore
         
     | 
| 
      
 67 
     | 
    
         
            +
                #
         
     | 
| 
      
 68 
     | 
    
         
            +
                # @note scores from different algos are usually incompatible with
         
     | 
| 
      
 69 
     | 
    
         
            +
                #   each other, we have to normalize it first
         
     | 
| 
      
 70 
     | 
    
         
            +
                #
         
     | 
| 
      
 71 
     | 
    
         
            +
                def ensemble_by_score(by_what=method(:by_max), norm=:min_max)
         
     | 
| 
      
 72 
     | 
    
         
            +
                  @algos.each do |r|
         
     | 
| 
      
 73 
     | 
    
         
            +
                    if norm == :min_max
         
     | 
| 
      
 74 
     | 
    
         
            +
                      normalize_min_max!(r)
         
     | 
| 
      
 75 
     | 
    
         
            +
                    elsif norm == :zscore
         
     | 
| 
      
 76 
     | 
    
         
            +
                      normalize_zscore!(r)
         
     | 
| 
      
 77 
     | 
    
         
            +
                    else
         
     | 
| 
      
 78 
     | 
    
         
            +
                      abort "[#{__FILE__}@#{__LINE__}]: "+
         
     | 
| 
      
 79 
     | 
    
         
            +
                          "invalid normalizer, only :min_max and :zscore supported!"
         
     | 
| 
      
 80 
     | 
    
         
            +
                    end
         
     | 
| 
      
 81 
     | 
    
         
            +
                  end
         
     | 
| 
      
 82 
     | 
    
         
            +
                  
         
     | 
| 
      
 83 
     | 
    
         
            +
                  @scores = {}
         
     | 
| 
      
 84 
     | 
    
         
            +
                  
         
     | 
| 
      
 85 
     | 
    
         
            +
                  each_feature do |f|
         
     | 
| 
      
 86 
     | 
    
         
            +
                    @scores[f] = {}
         
     | 
| 
      
 87 
     | 
    
         
            +
                    @scores[f][:BEST] = by_what.call(
         
     | 
| 
      
 88 
     | 
    
         
            +
                      @algos.collect { |r| r.get_feature_scores[f][:BEST] }
         
     | 
| 
      
 89 
     | 
    
         
            +
                    )
         
     | 
| 
      
 90 
     | 
    
         
            +
                  end      
         
     | 
| 
      
 91 
     | 
    
         
            +
                end
         
     | 
| 
      
 92 
     | 
    
         
            +
                
         
     | 
| 
      
 93 
     | 
    
         
            +
                
         
     | 
| 
      
 94 
     | 
    
         
            +
                # ensemble based on rank
         
     | 
| 
      
 95 
     | 
    
         
            +
                #
         
     | 
| 
      
 96 
     | 
    
         
            +
                # @param [Method] by_what by what criterion that ensemble
         
     | 
| 
      
 97 
     | 
    
         
            +
                #   rank should be obtained from those of individual algorithms  
         
     | 
| 
      
 98 
     | 
    
         
            +
                #   allowed values are:  
         
     | 
| 
      
 99 
     | 
    
         
            +
                #   method(:by\_min) # by min rank  
         
     | 
| 
      
 100 
     | 
    
         
            +
                #   method(:by\_max) # by max rank  
         
     | 
| 
      
 101 
     | 
    
         
            +
                #   method(:by\_ave) # by ave rank
         
     | 
| 
      
 102 
     | 
    
         
            +
                #
         
     | 
| 
      
 103 
     | 
    
         
            +
                def ensemble_by_rank(by_what=method(:by_min))
         
     | 
| 
      
 104 
     | 
    
         
            +
                  ranks = {}
         
     | 
| 
      
 105 
     | 
    
         
            +
                       
         
     | 
| 
      
 106 
     | 
    
         
            +
                  each_feature do |f|
         
     | 
| 
      
 107 
     | 
    
         
            +
                    ranks[f] = by_what.call(
         
     | 
| 
      
 108 
     | 
    
         
            +
                      @algos.collect { |r| r.get_feature_ranks[f] }
         
     | 
| 
      
 109 
     | 
    
         
            +
                    )
         
     | 
| 
      
 110 
     | 
    
         
            +
                  end
         
     | 
| 
      
 111 
     | 
    
         
            +
                  
         
     | 
| 
      
 112 
     | 
    
         
            +
                  new_ranks = {}
         
     | 
| 
      
 113 
     | 
    
         
            +
                  
         
     | 
| 
      
 114 
     | 
    
         
            +
                  sorted_features = ranks.keys.sort do |x, y|
         
     | 
| 
      
 115 
     | 
    
         
            +
                    ranks[x] <=> ranks[y]
         
     | 
| 
      
 116 
     | 
    
         
            +
                  end
         
     | 
| 
      
 117 
     | 
    
         
            +
                  sorted_features.each_with_index do |sf, si|
         
     | 
| 
      
 118 
     | 
    
         
            +
                    new_ranks[sf] = si+1
         
     | 
| 
      
 119 
     | 
    
         
            +
                  end
         
     | 
| 
      
 120 
     | 
    
         
            +
                  
         
     | 
| 
      
 121 
     | 
    
         
            +
                  @ranks = new_ranks
         
     | 
| 
      
 122 
     | 
    
         
            +
                end
         
     | 
| 
      
 123 
     | 
    
         
            +
                
         
     | 
| 
      
 124 
     | 
    
         
            +
                
         
     | 
| 
      
 125 
     | 
    
         
            +
                # by average value of an array
         
     | 
| 
      
 126 
     | 
    
         
            +
                def by_ave(arr)
         
     | 
| 
      
 127 
     | 
    
         
            +
                  arr.ave if arr.class == Array
         
     | 
| 
      
 128 
     | 
    
         
            +
                end
         
     | 
| 
      
 129 
     | 
    
         
            +
                
         
     | 
| 
      
 130 
     | 
    
         
            +
                
         
     | 
| 
      
 131 
     | 
    
         
            +
                # by min value of an array
         
     | 
| 
      
 132 
     | 
    
         
            +
                def by_min(arr)
         
     | 
| 
      
 133 
     | 
    
         
            +
                  arr.min if arr.class == Array
         
     | 
| 
      
 134 
     | 
    
         
            +
                end
         
     | 
| 
      
 135 
     | 
    
         
            +
                
         
     | 
| 
      
 136 
     | 
    
         
            +
                
         
     | 
| 
      
 137 
     | 
    
         
            +
                # by max value of an array
         
     | 
| 
      
 138 
     | 
    
         
            +
                def by_max(arr)
         
     | 
| 
      
 139 
     | 
    
         
            +
                  arr.max if arr.class == Array
         
     | 
| 
      
 140 
     | 
    
         
            +
                end
         
     | 
| 
      
 141 
     | 
    
         
            +
                
         
     | 
| 
      
 142 
     | 
    
         
            +
                private
         
     | 
| 
      
 143 
     | 
    
         
            +
                
         
     | 
| 
      
 144 
     | 
    
         
            +
                #
         
     | 
| 
      
 145 
     | 
    
         
            +
                # normalize feature scores of each individual alogrithm (r)
         
     | 
| 
      
 146 
     | 
    
         
            +
                # by scaling to [0, 1]
         
     | 
| 
      
 147 
     | 
    
         
            +
                #
         
     | 
| 
      
 148 
     | 
    
         
            +
                # @note original scores will be altered in place
         
     | 
| 
      
 149 
     | 
    
         
            +
                #
         
     | 
| 
      
 150 
     | 
    
         
            +
                def normalize_min_max!(r)
         
     | 
| 
      
 151 
     | 
    
         
            +
                  scores = r.get_feature_scores
         
     | 
| 
      
 152 
     | 
    
         
            +
                  scores_best = scores.collect { |f, ks|  ks[:BEST] }
         
     | 
| 
      
 153 
     | 
    
         
            +
                  min, max = scores_best.min, scores_best.max
         
     | 
| 
      
 154 
     | 
    
         
            +
                  
         
     | 
| 
      
 155 
     | 
    
         
            +
                  scores.each do |f, ks|
         
     | 
| 
      
 156 
     | 
    
         
            +
                    ks[:BEST] = (ks[:BEST]-min) / (max-min)
         
     | 
| 
      
 157 
     | 
    
         
            +
                  end
         
     | 
| 
      
 158 
     | 
    
         
            +
                end
         
     | 
| 
      
 159 
     | 
    
         
            +
                
         
     | 
| 
      
 160 
     | 
    
         
            +
                
         
     | 
| 
      
 161 
     | 
    
         
            +
                #
         
     | 
| 
      
 162 
     | 
    
         
            +
                # normalize feature scores of each individual alogrithm (r)
         
     | 
| 
      
 163 
     | 
    
         
            +
                # by z-score
         
     | 
| 
      
 164 
     | 
    
         
            +
                #
         
     | 
| 
      
 165 
     | 
    
         
            +
                # @note original scores will be altered in place
         
     | 
| 
      
 166 
     | 
    
         
            +
                #
         
     | 
| 
      
 167 
     | 
    
         
            +
                def normalize_zscore!(r)
         
     | 
| 
      
 168 
     | 
    
         
            +
                  scores = r.get_feature_scores
         
     | 
| 
      
 169 
     | 
    
         
            +
                  scores_best = scores.collect { |f, ks|  ks[:BEST] }
         
     | 
| 
      
 170 
     | 
    
         
            +
                  ave, sd = scores_best.ave, scores_best.sd
         
     | 
| 
      
 171 
     | 
    
         
            +
                  
         
     | 
| 
      
 172 
     | 
    
         
            +
                  scores.each do |f, ks|
         
     | 
| 
      
 173 
     | 
    
         
            +
                    ks[:BEST] = (ks[:BEST]-ave) / sd
         
     | 
| 
      
 174 
     | 
    
         
            +
                  end
         
     | 
| 
      
 175 
     | 
    
         
            +
                end
         
     | 
| 
      
 176 
     | 
    
         
            +
                
         
     | 
| 
      
 177 
     | 
    
         
            +
                
         
     | 
| 
      
 178 
     | 
    
         
            +
              end # class
         
     | 
| 
      
 179 
     | 
    
         
            +
              
         
     | 
| 
      
 180 
     | 
    
         
            +
              
         
     | 
| 
      
 181 
     | 
    
         
            +
            end # module
         
     |