fselector 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +1 -1
- data/README.md +14 -12
- data/lib/fselector.rb +11 -10
- data/lib/fselector/{base.rb → algo_base/base.rb} +33 -41
- data/lib/fselector/algo_base/base_CFS.rb +135 -0
- data/lib/fselector/algo_base/base_Relief.rb +130 -0
- data/lib/fselector/algo_base/base_ReliefF.rb +157 -0
- data/lib/fselector/{base_continuous.rb → algo_base/base_continuous.rb} +2 -2
- data/lib/fselector/algo_base/base_discrete.rb +190 -0
- data/lib/fselector/algo_continuous/CFS_c.rb +47 -0
- data/lib/fselector/algo_continuous/ReliefF_c.rb +4 -133
- data/lib/fselector/algo_continuous/Relief_c.rb +3 -103
- data/lib/fselector/algo_discrete/CFS_d.rb +41 -0
- data/lib/fselector/algo_discrete/FastCorrelationBasedFilter.rb +1 -1
- data/lib/fselector/algo_discrete/InformationGain.rb +15 -2
- data/lib/fselector/algo_discrete/ReliefF_d.rb +3 -132
- data/lib/fselector/algo_discrete/Relief_d.rb +3 -103
- data/lib/fselector/entropy.rb +125 -0
- data/lib/fselector/util.rb +22 -2
- metadata +20 -6
- data/lib/fselector/base_discrete.rb +0 -502
    
        data/LICENSE
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -8,22 +8,22 @@ FSelector: a Ruby gem for feature selection and ranking | |
| 8 8 | 
             
            **Email**: [need47@gmail.com](mailto:need47@gmail.com)  
         | 
| 9 9 | 
             
            **Copyright**: 2012  
         | 
| 10 10 | 
             
            **License**: MIT License  
         | 
| 11 | 
            -
            **Latest Version**: 0. | 
| 12 | 
            -
            **Release Date**:  | 
| 11 | 
            +
            **Latest Version**: 0.2.0  
         | 
| 12 | 
            +
            **Release Date**: April 1st 2012
         | 
| 13 13 |  | 
| 14 14 | 
             
            Synopsis
         | 
| 15 15 | 
             
            --------
         | 
| 16 16 |  | 
| 17 | 
            -
            FSelector is  | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 20 | 
            -
            feature selection  | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 17 | 
            +
            FSelector is a Ruby gem that aims to integrate various feature selection/ranking 
         | 
| 18 | 
            +
            algorithms into one single package. Welcome to contact me (need47@gmail.com) 
         | 
| 19 | 
            +
            if you want to contribute your own algorithms or report a bug. FSelector enables 
         | 
| 20 | 
            +
            the user to perform feature selection by using either a single algorithm or an 
         | 
| 21 | 
            +
            ensemble of algorithms. FSelector acts on a full-feature data set with CSV, LibSVM 
         | 
| 22 | 
            +
            or WEKA file format and outputs a reduced data set with only selected subset of 
         | 
| 23 | 
            +
            features, which can later be used as the input for various machine learning softwares 
         | 
| 24 | 
            +
            including LibSVM and WEKA. FSelector, itself, does not implement any of the machine 
         | 
| 25 | 
            +
            learning algorithms such as support vector machines and random forest. Below is a 
         | 
| 26 | 
            +
            summary of FSelector's features.
         | 
| 27 27 |  | 
| 28 28 | 
             
            Feature List
         | 
| 29 29 | 
             
            ------------
         | 
| @@ -35,6 +35,7 @@ Feature List | |
| 35 35 | 
             
                Accuracy                        Acc         discrete
         | 
| 36 36 | 
             
                AccuracyBalanced                Acc2        discrete
         | 
| 37 37 | 
             
                BiNormalSeparation              BNS         discrete
         | 
| 38 | 
            +
                CFS_d                           CFS_d       discrete
         | 
| 38 39 | 
             
                ChiSquaredTest                  CHI         discrete
         | 
| 39 40 | 
             
                CorrelationCoefficient          CC          discrete
         | 
| 40 41 | 
             
                DocumentFrequency               DF          discrete
         | 
| @@ -60,6 +61,7 @@ Feature List | |
| 60 61 | 
             
                Sensitivity                     SN, Recall  discrete
         | 
| 61 62 | 
             
                Specificity                     SP          discrete
         | 
| 62 63 | 
             
                SymmetricalUncertainty          SU          discrete
         | 
| 64 | 
            +
                CFS_c                           CFS_c       continuous
         | 
| 63 65 | 
             
                PMetric                         PM          continuous
         | 
| 64 66 | 
             
                Relief_c                        Relief_c    continuous
         | 
| 65 67 | 
             
                ReliefF_c                       ReliefF_c   continuous
         | 
    
        data/lib/fselector.rb
    CHANGED
    
    | @@ -3,7 +3,7 @@ | |
| 3 3 | 
             
            #
         | 
| 4 4 | 
             
            module FSelector
         | 
| 5 5 | 
             
              # module version
         | 
| 6 | 
            -
              VERSION = '0. | 
| 6 | 
            +
              VERSION = '0.2.0'
         | 
| 7 7 | 
             
            end
         | 
| 8 8 |  | 
| 9 9 | 
             
            ROOT = File.expand_path(File.dirname(__FILE__))
         | 
| @@ -13,18 +13,13 @@ ROOT = File.expand_path(File.dirname(__FILE__)) | |
| 13 13 | 
             
            #
         | 
| 14 14 | 
             
            require "#{ROOT}/fselector/fileio.rb"
         | 
| 15 15 | 
             
            require "#{ROOT}/fselector/util.rb"
         | 
| 16 | 
            +
            require "#{ROOT}/fselector/entropy.rb"
         | 
| 16 17 |  | 
| 17 18 | 
             
            #
         | 
| 18 19 | 
             
            # base class
         | 
| 19 | 
            -
            #
         | 
| 20 | 
            -
            require  | 
| 21 | 
            -
             | 
| 22 | 
            -
            require "#{ROOT}/fselector/base_continuous.rb"
         | 
| 23 | 
            -
             | 
| 24 | 
            -
            #
         | 
| 25 | 
            -
            # feature selection use an ensemble of algorithms
         | 
| 26 | 
            -
            #
         | 
| 27 | 
            -
            require "#{ROOT}/fselector/ensemble.rb"
         | 
| 20 | 
            +
            Dir.glob("#{ROOT}/fselector/algo_base/*").each do |f|
         | 
| 21 | 
            +
              require f
         | 
| 22 | 
            +
            end
         | 
| 28 23 |  | 
| 29 24 | 
             
            #
         | 
| 30 25 | 
             
            # algorithms for handling discrete feature
         | 
| @@ -39,3 +34,9 @@ end | |
| 39 34 | 
             
            Dir.glob("#{ROOT}/fselector/algo_continuous/*").each do |f|
         | 
| 40 35 | 
             
              require f
         | 
| 41 36 | 
             
            end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            #
         | 
| 39 | 
            +
            # feature selection use an ensemble of algorithms
         | 
| 40 | 
            +
            #
         | 
| 41 | 
            +
            require "#{ROOT}/fselector/ensemble.rb"
         | 
| 42 | 
            +
             | 
| @@ -80,6 +80,20 @@ module FSelector | |
| 80 80 | 
             
                end
         | 
| 81 81 |  | 
| 82 82 |  | 
| 83 | 
            +
                # get class labels
         | 
| 84 | 
            +
                def get_class_labels
         | 
| 85 | 
            +
                  if not @cv
         | 
| 86 | 
            +
                    @cv = []
         | 
| 87 | 
            +
                    
         | 
| 88 | 
            +
                    each_sample do |k, s|
         | 
| 89 | 
            +
                      @cv << k
         | 
| 90 | 
            +
                    end
         | 
| 91 | 
            +
                  end
         | 
| 92 | 
            +
                  
         | 
| 93 | 
            +
                  @cv
         | 
| 94 | 
            +
                end
         | 
| 95 | 
            +
                
         | 
| 96 | 
            +
                
         | 
| 83 97 | 
             
                # set classes
         | 
| 84 98 | 
             
                def set_classes(classes)
         | 
| 85 99 | 
             
                  if classes and classes.class == Array
         | 
| @@ -101,22 +115,34 @@ module FSelector | |
| 101 115 | 
             
                # get feature values
         | 
| 102 116 | 
             
                #
         | 
| 103 117 | 
             
                # @param [Symbol] f feature of interest
         | 
| 118 | 
            +
                # @param [Symbol] mv including missing feature values?
         | 
| 119 | 
            +
                #   don't include missing feature values (recorded as nils)
         | 
| 120 | 
            +
                #   if mv==nil, include otherwise
         | 
| 104 121 | 
             
                # @param [Symbol] ck class of interest.
         | 
| 105 | 
            -
                #    | 
| 106 | 
            -
                # | 
| 122 | 
            +
                #   return feature values for all classes, otherwise return feature
         | 
| 123 | 
            +
                # values for the specific class (ck)
         | 
| 107 124 | 
             
                #
         | 
| 108 | 
            -
                def get_feature_values(f, ck=nil)
         | 
| 125 | 
            +
                def get_feature_values(f, mv=nil, ck=nil)
         | 
| 109 126 | 
             
                  @fvs ||= {}
         | 
| 110 127 |  | 
| 111 128 | 
             
                  if not @fvs.has_key? f
         | 
| 112 129 | 
             
                    @fvs[f] = {}
         | 
| 130 | 
            +
                    
         | 
| 113 131 | 
             
                    each_sample do |k, s|
         | 
| 114 132 | 
             
                      @fvs[f][k] = [] if not @fvs[f].has_key? k
         | 
| 115 | 
            -
                       | 
| 133 | 
            +
                      if s.has_key? f
         | 
| 134 | 
            +
                        @fvs[f][k] << s[f]
         | 
| 135 | 
            +
                      else
         | 
| 136 | 
            +
                        @fvs[f][k] << nil # for missing featue values
         | 
| 137 | 
            +
                      end
         | 
| 116 138 | 
             
                    end
         | 
| 117 139 | 
             
                  end
         | 
| 118 140 |  | 
| 119 | 
            -
                   | 
| 141 | 
            +
                  if mv # include missing feature values
         | 
| 142 | 
            +
                    return ck ? @fvs[f][ck] : @fvs[f].values.flatten
         | 
| 143 | 
            +
                  else # don't include
         | 
| 144 | 
            +
                    return ck ? @fvs[f][ck].compact : @fvs[f].values.flatten.compact
         | 
| 145 | 
            +
                  end  
         | 
| 120 146 | 
             
                end
         | 
| 121 147 |  | 
| 122 148 |  | 
| @@ -136,6 +162,7 @@ module FSelector | |
| 136 162 | 
             
                  @data
         | 
| 137 163 | 
             
                end
         | 
| 138 164 |  | 
| 165 | 
            +
                
         | 
| 139 166 | 
             
                # set data
         | 
| 140 167 | 
             
                def set_data(data)
         | 
| 141 168 | 
             
                  if data and data.class == Hash
         | 
| @@ -167,42 +194,7 @@ module FSelector | |
| 167 194 | 
             
                def get_sample_size
         | 
| 168 195 | 
             
                  @sz ||= get_data.values.flatten.size
         | 
| 169 196 | 
             
                end
         | 
| 170 | 
            -
             | 
| 171 | 
            -
                
         | 
| 172 | 
            -
                #
         | 
| 173 | 
            -
                # print feature scores
         | 
| 174 | 
            -
                #
         | 
| 175 | 
            -
                # @param [String] kclass class of interest
         | 
| 176 | 
            -
                #
         | 
| 177 | 
            -
                def print_feature_scores(feat=nil, kclass=nil)
         | 
| 178 | 
            -
                  scores = get_feature_scores
         | 
| 179 | 
            -
                  
         | 
| 180 | 
            -
                  scores.each do |f, ks|
         | 
| 181 | 
            -
                    next if feat and feat != f
         | 
| 182 | 
            -
                    
         | 
| 183 | 
            -
                    print "#{f} =>"
         | 
| 184 | 
            -
                    ks.each do |k, s|
         | 
| 185 | 
            -
                      if kclass
         | 
| 186 | 
            -
                        print " #{k}->#{s}" if k == kclass
         | 
| 187 | 
            -
                      else
         | 
| 188 | 
            -
                        print " #{k}->#{s}"
         | 
| 189 | 
            -
                      end
         | 
| 190 | 
            -
                    end
         | 
| 191 | 
            -
                    puts
         | 
| 192 | 
            -
                  end
         | 
| 193 | 
            -
                end
         | 
| 194 | 
            -
                
         | 
| 195 | 
            -
                
         | 
| 196 | 
            -
                # print feature ranks
         | 
| 197 | 
            -
                def print_feature_ranks
         | 
| 198 | 
            -
                  ranks = get_feature_ranks
         | 
| 199 | 
            -
                  
         | 
| 200 | 
            -
                  ranks.each do |f, r|
         | 
| 201 | 
            -
                    puts "#{f} => #{r}"
         | 
| 202 | 
            -
                  end
         | 
| 203 | 
            -
                end
         | 
| 204 | 
            -
                
         | 
| 205 | 
            -
                
         | 
| 197 | 
            +
                   
         | 
| 206 198 | 
             
                #
         | 
| 207 199 | 
             
                # get scores of all features for all classes
         | 
| 208 200 | 
             
                #
         | 
| @@ -0,0 +1,135 @@ | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            # FSelector: a Ruby gem for feature selection and ranking
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            module FSelector
         | 
| 5 | 
            +
            #
         | 
| 6 | 
            +
            # base class for Correlation-based Feature Selection (CFS) algorithm, see specialized
         | 
| 7 | 
            +
            # versions for discrete feature (CFS_d) and continuous feature (CFS_c), respectively
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # @note for simplicity, we use *sequential forward search* for optimal feature subset,
         | 
| 10 | 
            +
            # the original CFS that uses *best first search* only produces slightly better results
         | 
| 11 | 
            +
            # but demands much more computational resources
         | 
| 12 | 
            +
            #
         | 
| 13 | 
            +
            # ref: [Feature Selection for Discrete and Numeric Class Machine Learning](http://www.cs.waikato.ac.nz/ml/publications/1999/99MH-Feature-Select.pdf)
         | 
| 14 | 
            +
            #
         | 
| 15 | 
            +
              class BaseCFS < Base
         | 
| 16 | 
            +
                # undefine superclass methods
         | 
| 17 | 
            +
                undef :select_feature_by_score!
         | 
| 18 | 
            +
                undef :select_feature_by_rank!
         | 
| 19 | 
            +
                
         | 
| 20 | 
            +
                private
         | 
| 21 | 
            +
                
         | 
| 22 | 
            +
                # use sequential forward search
         | 
| 23 | 
            +
                def get_feature_subset
         | 
| 24 | 
            +
                  subset = []
         | 
| 25 | 
            +
                  feats = get_features.dup
         | 
| 26 | 
            +
            	  
         | 
| 27 | 
            +
            	    s_best = -100.0
         | 
| 28 | 
            +
                  # use cache
         | 
| 29 | 
            +
                  @rcf_best, @rff_best = 0.0, 0.0
         | 
| 30 | 
            +
            	  
         | 
| 31 | 
            +
                  improvement = true
         | 
| 32 | 
            +
                  
         | 
| 33 | 
            +
                  while improvement
         | 
| 34 | 
            +
                    improvement = false
         | 
| 35 | 
            +
                    f_max, s_max = nil, -100.0
         | 
| 36 | 
            +
                    rcf_max, rff_max = -100.0, -100.0
         | 
| 37 | 
            +
                    
         | 
| 38 | 
            +
                    feats.each do |f|
         | 
| 39 | 
            +
                      s_try, rcf_try, rff_try = calc_merit(subset, f)
         | 
| 40 | 
            +
                      
         | 
| 41 | 
            +
                      if s_try > s_best and s_try > s_max
         | 
| 42 | 
            +
                        f_max, s_max = f, s_try
         | 
| 43 | 
            +
                        rcf_max, rff_max = rcf_try, rff_try
         | 
| 44 | 
            +
                      end
         | 
| 45 | 
            +
                    end
         | 
| 46 | 
            +
                    
         | 
| 47 | 
            +
                    # add f_max to subset and remove it from feats
         | 
| 48 | 
            +
                    if f_max
         | 
| 49 | 
            +
                      subset << f_max
         | 
| 50 | 
            +
                      feats.delete(f_max)
         | 
| 51 | 
            +
                      improvement = true
         | 
| 52 | 
            +
                      # update info
         | 
| 53 | 
            +
                      s_best, @rcf_best, @rff_best = s_max, rcf_max, rff_max
         | 
| 54 | 
            +
                    end
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
                  
         | 
| 57 | 
            +
                  subset
         | 
| 58 | 
            +
                end # get_feature_subset
         | 
| 59 | 
            +
                
         | 
| 60 | 
            +
                
         | 
| 61 | 
            +
                # calc new merit of subset when adding feature (f)
         | 
| 62 | 
            +
                def calc_merit(subset, f)
         | 
| 63 | 
            +
                  k = subset.size.to_f + 1
         | 
| 64 | 
            +
                  
         | 
| 65 | 
            +
                  # use cache
         | 
| 66 | 
            +
                  rcf = @rcf_best + calc_rcf(f)
         | 
| 67 | 
            +
                  rff = @rff_best      
         | 
| 68 | 
            +
                  subset.each do |s|
         | 
| 69 | 
            +
                    rff += 2*calc_rff(f, s)
         | 
| 70 | 
            +
                  end
         | 
| 71 | 
            +
                  
         | 
| 72 | 
            +
                  m = rcf/Math.sqrt(k+rff)
         | 
| 73 | 
            +
                  
         | 
| 74 | 
            +
                  [m, rcf, rff]
         | 
| 75 | 
            +
                end # calc_metrit
         | 
| 76 | 
            +
                   
         | 
| 77 | 
            +
                
         | 
| 78 | 
            +
            	  # calc feature-class correlation
         | 
| 79 | 
            +
                def calc_rcf(f)
         | 
| 80 | 
            +
            	    @f2rcf ||= {} # use cache
         | 
| 81 | 
            +
            	    
         | 
| 82 | 
            +
            	    if not @f2rcf.has_key? f
         | 
| 83 | 
            +
            	      cv = get_class_labels
         | 
| 84 | 
            +
            	      fv = get_feature_values(f, :include_missing_values)
         | 
| 85 | 
            +
            	      @f2rcf[f] = do_rcf(cv, fv)
         | 
| 86 | 
            +
            	    end
         | 
| 87 | 
            +
            	    
         | 
| 88 | 
            +
            	    @f2rcf[f]
         | 
| 89 | 
            +
                end # calc_rcf
         | 
| 90 | 
            +
                
         | 
| 91 | 
            +
                
         | 
| 92 | 
            +
                # calc feature-feature intercorrelation
         | 
| 93 | 
            +
                def calc_rff(f, s)
         | 
| 94 | 
            +
            	    @fs2rff ||= {} # use cache
         | 
| 95 | 
            +
            	    
         | 
| 96 | 
            +
            	    if not @f2idx
         | 
| 97 | 
            +
            	      @f2idx = {}
         | 
| 98 | 
            +
            	      fvs = get_features
         | 
| 99 | 
            +
            		    fvs.each_with_index { |f, idx| @f2idx[f] = idx }
         | 
| 100 | 
            +
            	    end
         | 
| 101 | 
            +
            	    
         | 
| 102 | 
            +
            	    if @f2idx[f] > @f2idx[s]
         | 
| 103 | 
            +
            	      k = [f, s].join('_')
         | 
| 104 | 
            +
            	    else
         | 
| 105 | 
            +
            	      k = [s, f].join('_')
         | 
| 106 | 
            +
            	    end
         | 
| 107 | 
            +
            	    
         | 
| 108 | 
            +
            	    if not @fs2rff.has_key? k
         | 
| 109 | 
            +
            	      fv = get_feature_values(f, :include_missing_values)
         | 
| 110 | 
            +
            	      sv = get_feature_values(s, :include_missing_values)
         | 
| 111 | 
            +
            	      @fs2rff[k] = do_rff(fv, sv)
         | 
| 112 | 
            +
            	    end
         | 
| 113 | 
            +
            	    
         | 
| 114 | 
            +
            	    @fs2rff[k]
         | 
| 115 | 
            +
                end # calc_rff
         | 
| 116 | 
            +
                
         | 
| 117 | 
            +
            	
         | 
| 118 | 
            +
            	  # calc the feature-class correlation of two vectors
         | 
| 119 | 
            +
                def do_rcf(cv, fv)
         | 
| 120 | 
            +
                  abort "[#{__FILE__}@#{__LINE__}]: "+
         | 
| 121 | 
            +
                         "derived CFS algo must implement its own do_rcf()"
         | 
| 122 | 
            +
                end # do_rcf
         | 
| 123 | 
            +
                
         | 
| 124 | 
            +
                
         | 
| 125 | 
            +
                # calc the feature-class correlation of two vectors
         | 
| 126 | 
            +
                def do_rff(fv, sv)
         | 
| 127 | 
            +
                  abort "[#{__FILE__}@#{__LINE__}]: "+
         | 
| 128 | 
            +
                         "derived CFS algo must implement its own do_rff()"
         | 
| 129 | 
            +
                end # do_rff
         | 
| 130 | 
            +
                
         | 
| 131 | 
            +
                
         | 
| 132 | 
            +
              end # class
         | 
| 133 | 
            +
              
         | 
| 134 | 
            +
              
         | 
| 135 | 
            +
            end # module
         | 
| @@ -0,0 +1,130 @@ | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            # FSelector: a Ruby gem for feature selection and ranking
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            module FSelector
         | 
| 5 | 
            +
            #
         | 
| 6 | 
            +
            # base class for Relief algorithm, see specialized versions for discrete
         | 
| 7 | 
            +
            # feature (Relief_d) and continuous feature (Relief_c), respectively
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # @note Relief applicable only to two-class problem without missing data
         | 
| 10 | 
            +
            #
         | 
| 11 | 
            +
            # ref: [The Feature Selection Problem: Traditional Methods and a New Algorithm](http://www.aaai.org/Papers/AAAI/1992/AAAI92-020.pdf)
         | 
| 12 | 
            +
            #
         | 
| 13 | 
            +
              class BaseRelief < Base
         | 
| 14 | 
            +
                #
         | 
| 15 | 
            +
                # new()
         | 
| 16 | 
            +
                #
         | 
| 17 | 
            +
                # @param [Integer] m number of samples to be used 
         | 
| 18 | 
            +
                #   for estimating feature contribution. max can be
         | 
| 19 | 
            +
                #   the number of training samples
         | 
| 20 | 
            +
                # @param [Hash] data existing data structure
         | 
| 21 | 
            +
                #
         | 
| 22 | 
            +
                def initialize(m=nil, data=nil)
         | 
| 23 | 
            +
                  super(data)
         | 
| 24 | 
            +
                  @m = (m || 30) # default 30
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
                
         | 
| 27 | 
            +
                private
         | 
| 28 | 
            +
                
         | 
| 29 | 
            +
                # calculate contribution of each feature (f) across all classes
         | 
| 30 | 
            +
                def calc_contribution(f)
         | 
| 31 | 
            +
                  if not get_classes.size == 2
         | 
| 32 | 
            +
                    abort "[#{__FILE__}@#{__LINE__}]: "+
         | 
| 33 | 
            +
                    "Relief applicable only to two-class problems without missing data"
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
                  
         | 
| 36 | 
            +
                  ## use all samples if @m not provided
         | 
| 37 | 
            +
                  #@m = get_sample_size if not @m
         | 
| 38 | 
            +
                  
         | 
| 39 | 
            +
                  k1, k2 = get_classes
         | 
| 40 | 
            +
                  score = 0.0
         | 
| 41 | 
            +
                  
         | 
| 42 | 
            +
                  @m.times do        
         | 
| 43 | 
            +
                    # pick a sample at random
         | 
| 44 | 
            +
                    rs, rk = pick_a_sample_at_random
         | 
| 45 | 
            +
                    
         | 
| 46 | 
            +
                    # find the nearest neighbor for each class
         | 
| 47 | 
            +
                    nbrs = find_nearest_nb(rs, rk)
         | 
| 48 | 
            +
                    
         | 
| 49 | 
            +
                    # calc contribution from neighbors
         | 
| 50 | 
            +
                    score += calc_score(f, rs, rk, nbrs)
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
                  
         | 
| 53 | 
            +
                  s = score / @m
         | 
| 54 | 
            +
                  
         | 
| 55 | 
            +
                  set_feature_score(f, :BEST, s)
         | 
| 56 | 
            +
                end # calc_contribution
         | 
| 57 | 
            +
                
         | 
| 58 | 
            +
                
         | 
| 59 | 
            +
                # pick a sample at random
         | 
| 60 | 
            +
                def pick_a_sample_at_random
         | 
| 61 | 
            +
                  rk = get_classes[rand(get_classes.size)]
         | 
| 62 | 
            +
                  rks = get_data[rk]
         | 
| 63 | 
            +
                  
         | 
| 64 | 
            +
                  [ rks[rand(rks.size)], rk ]
         | 
| 65 | 
            +
                end # pick_a_sample_at_random
         | 
| 66 | 
            +
                
         | 
| 67 | 
            +
                
         | 
| 68 | 
            +
                # find nearest neighbor sample for given sample (rs) within class (k)
         | 
| 69 | 
            +
                def find_nearest_nb(rs, rk)
         | 
| 70 | 
            +
                  nbrs = {}
         | 
| 71 | 
            +
                  
         | 
| 72 | 
            +
                  each_class do |k|
         | 
| 73 | 
            +
                    nb, dmin = nil, 999
         | 
| 74 | 
            +
                    get_data[k].each do |s|
         | 
| 75 | 
            +
                      next if s.object_id == rs.object_id # exclude self
         | 
| 76 | 
            +
                      
         | 
| 77 | 
            +
                      d = diff_sample(rs, s)
         | 
| 78 | 
            +
                      
         | 
| 79 | 
            +
                      if d < dmin
         | 
| 80 | 
            +
                        dmin = d
         | 
| 81 | 
            +
                        nb = s
         | 
| 82 | 
            +
                      end
         | 
| 83 | 
            +
                    end
         | 
| 84 | 
            +
                    
         | 
| 85 | 
            +
                    nbrs[k] = nb
         | 
| 86 | 
            +
                  end
         | 
| 87 | 
            +
                  
         | 
| 88 | 
            +
                  nbrs
         | 
| 89 | 
            +
                end # find_nearest_nb
         | 
| 90 | 
            +
                
         | 
| 91 | 
            +
                
         | 
| 92 | 
            +
                # difference between two samples
         | 
| 93 | 
            +
                def diff_sample(s1, s2)
         | 
| 94 | 
            +
                  d = 0.0
         | 
| 95 | 
            +
                  
         | 
| 96 | 
            +
                  each_feature do |f|
         | 
| 97 | 
            +
                    d += diff_feature(f, s1, s2)**2
         | 
| 98 | 
            +
                  end
         | 
| 99 | 
            +
                  
         | 
| 100 | 
            +
                  d
         | 
| 101 | 
            +
                end # diff_sample
         | 
| 102 | 
            +
                
         | 
| 103 | 
            +
                
         | 
| 104 | 
            +
                # difference beween the feature (f) of two samples
         | 
| 105 | 
            +
                def diff_feature(f, s1, s2)
         | 
| 106 | 
            +
                  abort "[#{__FILE__}@#{__LINE__}]: "+
         | 
| 107 | 
            +
                          "derived Relief algo must implement its own diff_feature()"
         | 
| 108 | 
            +
                end # diff_feature
         | 
| 109 | 
            +
                
         | 
| 110 | 
            +
                
         | 
| 111 | 
            +
                # calc feature (f) contribution from neighbors
         | 
| 112 | 
            +
                def calc_score(f, rs, rk, nbrs)
         | 
| 113 | 
            +
                  score = 0.0
         | 
| 114 | 
            +
                  
         | 
| 115 | 
            +
                  nbrs.each do |k, s|
         | 
| 116 | 
            +
                    if k == rk # near hit
         | 
| 117 | 
            +
                      score -= diff_feature(f, rs, s)**2
         | 
| 118 | 
            +
                    else # near_miss
         | 
| 119 | 
            +
                      score += diff_feature(f, rs, s)**2
         | 
| 120 | 
            +
                    end
         | 
| 121 | 
            +
                  end
         | 
| 122 | 
            +
                  
         | 
| 123 | 
            +
                  score
         | 
| 124 | 
            +
                end # calc_score
         | 
| 125 | 
            +
                
         | 
| 126 | 
            +
                
         | 
| 127 | 
            +
              end # class
         | 
| 128 | 
            +
              
         | 
| 129 | 
            +
              
         | 
| 130 | 
            +
            end # module
         |