fselector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +21 -0
- data/README.md +195 -0
- data/lib/fselector.rb +41 -0
- data/lib/fselector/algo_continuous/PMetric.rb +51 -0
- data/lib/fselector/algo_continuous/ReliefF_c.rb +190 -0
- data/lib/fselector/algo_continuous/Relief_c.rb +150 -0
- data/lib/fselector/algo_continuous/TScore.rb +52 -0
- data/lib/fselector/algo_continuous/discretizer.rb +219 -0
- data/lib/fselector/algo_continuous/normalizer.rb +59 -0
- data/lib/fselector/algo_discrete/Accuracy.rb +35 -0
- data/lib/fselector/algo_discrete/AccuracyBalanced.rb +37 -0
- data/lib/fselector/algo_discrete/BiNormalSeparation.rb +45 -0
- data/lib/fselector/algo_discrete/ChiSquaredTest.rb +69 -0
- data/lib/fselector/algo_discrete/CorrelationCoefficient.rb +42 -0
- data/lib/fselector/algo_discrete/DocumentFrequency.rb +36 -0
- data/lib/fselector/algo_discrete/F1Measure.rb +41 -0
- data/lib/fselector/algo_discrete/FishersExactTest.rb +47 -0
- data/lib/fselector/algo_discrete/GMean.rb +37 -0
- data/lib/fselector/algo_discrete/GSSCoefficient.rb +43 -0
- data/lib/fselector/algo_discrete/GiniIndex.rb +44 -0
- data/lib/fselector/algo_discrete/InformationGain.rb +96 -0
- data/lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb +45 -0
- data/lib/fselector/algo_discrete/McNemarsTest.rb +57 -0
- data/lib/fselector/algo_discrete/MutualInformation.rb +42 -0
- data/lib/fselector/algo_discrete/OddsRatio.rb +46 -0
- data/lib/fselector/algo_discrete/OddsRatioNumerator.rb +41 -0
- data/lib/fselector/algo_discrete/Power.rb +46 -0
- data/lib/fselector/algo_discrete/Precision.rb +31 -0
- data/lib/fselector/algo_discrete/ProbabilityRatio.rb +41 -0
- data/lib/fselector/algo_discrete/Random.rb +40 -0
- data/lib/fselector/algo_discrete/ReliefF_d.rb +173 -0
- data/lib/fselector/algo_discrete/Relief_d.rb +135 -0
- data/lib/fselector/algo_discrete/Sensitivity.rb +38 -0
- data/lib/fselector/algo_discrete/Specificity.rb +35 -0
- data/lib/fselector/base.rb +322 -0
- data/lib/fselector/base_continuous.rb +25 -0
- data/lib/fselector/base_discrete.rb +355 -0
- data/lib/fselector/ensemble.rb +181 -0
- data/lib/fselector/fileio.rb +455 -0
- data/lib/fselector/util.rb +707 -0
- metadata +86 -0
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2011-2012 Tiejun Cheng
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
16
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
18
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
19
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
20
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
21
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
FSelector: a Ruby package for feature selection and ranking
|
2
|
+
===========================================================
|
3
|
+
|
4
|
+
**Git**: [https://github.com/need47/fselector](https://github.com/need47/fselector)
|
5
|
+
**Author**: Tiejun Cheng
|
6
|
+
**Email**: [need47@gmail.com](mailto:need47@gmail.com)
|
7
|
+
**Copyright**: 2011-2012
|
8
|
+
**License**: MIT License
|
9
|
+
**Latest Version**: 0.1.0
|
10
|
+
**Release Date**: March 1st 2012
|
11
|
+
|
12
|
+
Synopsis
|
13
|
+
--------
|
14
|
+
|
15
|
+
FSelector is an open-access Ruby package that aims to integrate as many
|
16
|
+
feature selection/ranking algorithms as possible. It enables the
|
17
|
+
user to perform feature selection by either a single algorithm or by an
|
18
|
+
ensemble of algorithms. Below is a summary of FSelector's features.
|
19
|
+
|
20
|
+
Feature List
|
21
|
+
------------
|
22
|
+
|
23
|
+
**1. available algorithms**
|
24
|
+
|
25
|
+
algorithm alias feature type
|
26
|
+
-------------------------------------------------------
|
27
|
+
Accuracy Acc discrete
|
28
|
+
AccuracyBalanced Acc2 discrete
|
29
|
+
BiNormalSeparation BNS discrete
|
30
|
+
ChiSquaredTest CHI discrete
|
31
|
+
CorrelationCoefficient CC discrete
|
32
|
+
DocumentFrequency DF discrete
|
33
|
+
F1Measure F1 discrete
|
34
|
+
FishersExactTest FET discrete
|
35
|
+
GiniIndex GI discrete
|
36
|
+
GMean GM discrete
|
37
|
+
GSSCoefficient GSS discrete
|
38
|
+
InformationGain IG discrete
|
39
|
+
MatthewsCorrelationCoefficient MCC, PHI discrete
|
40
|
+
McNemarsTest MNT discrete
|
41
|
+
OddsRatio OR discrete
|
42
|
+
OddsRatioNumerator ORN discrete
|
43
|
+
PhiCoefficient Phi discrete
|
44
|
+
Power Power discrete
|
45
|
+
Precision Precision discrete
|
46
|
+
ProbabilityRatio PR discrete
|
47
|
+
Random Random discrete
|
48
|
+
Recall Recall discrete
|
49
|
+
Relief_d Relief_d discrete
|
50
|
+
ReliefF_d ReliefF_d discrete
|
51
|
+
Sensitivity SN, Recall discrete
|
52
|
+
Specificity SP discrete
|
53
|
+
PMetric PM continuous
|
54
|
+
Relief_c Relief_c continuous
|
55
|
+
ReliefF_c ReliefF_c continuous
|
56
|
+
TScore TS continuous
|
57
|
+
|
58
|
+
**2. feature selection approaches**
|
59
|
+
|
60
|
+
- by a single algorithm
|
61
|
+
- by multiple algorithms in a tandem manner
|
62
|
+
- by multiple algorithms in a consensus manner
|
63
|
+
|
64
|
+
**3. availabe normalization and discretization algorithms for continuous feature**
|
65
|
+
|
66
|
+
algorithm note
|
67
|
+
--------------------------------------------------------------------
|
68
|
+
log normalization by logarithmic transformation
|
69
|
+
min_max normalization by scaling into [min, max]
|
70
|
+
zscore normalization by converting into zscore
|
71
|
+
equal_width discretization by equal width among intervals
|
72
|
+
equal_frequency discretization by equal frequency among intervals
|
73
|
+
ChiMerge discretization by ChiMerge method
|
74
|
+
|
75
|
+
**4. supported input/output file types**
|
76
|
+
|
77
|
+
- csv
|
78
|
+
- libsvm
|
79
|
+
- weka ARFF
|
80
|
+
- random (for test purpose)
|
81
|
+
|
82
|
+
Installing
|
83
|
+
----------
|
84
|
+
|
85
|
+
To install FSelector, use the following command:
|
86
|
+
|
87
|
+
$ gem install fselector
|
88
|
+
|
89
|
+
Usage
|
90
|
+
-----
|
91
|
+
|
92
|
+
**1. feature selection by a single algorithm**
|
93
|
+
|
94
|
+
require 'fselector'
|
95
|
+
|
96
|
+
# use InformationGain as a feature ranking algorithm
|
97
|
+
r1 = FSelector::InformationGain.new
|
98
|
+
|
99
|
+
# read from random data (or csv, libsvm, weka ARFF file)
|
100
|
+
# no. of samples: 100
|
101
|
+
# no. of classes: 2
|
102
|
+
# no. of features: 10
|
103
|
+
# no. of possible values for each feature: 3
|
104
|
+
# allow missing values: true
|
105
|
+
r1.data_from_random(100, 2, 10, 3, true)
|
106
|
+
|
107
|
+
# number of features before feature selection
|
108
|
+
puts "# features (before): "+ r1.get_features.size.to_s
|
109
|
+
|
110
|
+
# select the top-ranked features with scores >0.01
|
111
|
+
r1.select_data_by_score!('>0.01')
|
112
|
+
|
113
|
+
# number of features before feature selection
|
114
|
+
puts "# features (after): "+ r1.get_features.size.to_s
|
115
|
+
|
116
|
+
# you can also use multiple alogirithms in a tandem manner
|
117
|
+
# e.g. use the ChiSquaredTest with Yates' continuity correction
|
118
|
+
# initialize from r1's data
|
119
|
+
r2 = FSelector::ChiSquaredTest.new(:yates, r1.get_data)
|
120
|
+
|
121
|
+
# number of features before feature selection
|
122
|
+
puts "# features (before): "+ r2.get_features.size.to_s
|
123
|
+
|
124
|
+
# select the top-ranked 3 features
|
125
|
+
r2.select_data_by_rank!('<=3')
|
126
|
+
|
127
|
+
# number of features before feature selection
|
128
|
+
puts "# features (after): "+ r2.get_features.size.to_s
|
129
|
+
|
130
|
+
# save data to standard ouput as a weka ARFF file (sparse format)
|
131
|
+
# with selected features only
|
132
|
+
r2.data_to_weka(:stdout, :sparse)
|
133
|
+
|
134
|
+
|
135
|
+
**2. feature selection by an ensemble of algorithms**
|
136
|
+
|
137
|
+
require 'fselector'
|
138
|
+
|
139
|
+
# use both Information and ChiSquaredTest
|
140
|
+
r1 = FSelector::InformationGain.new
|
141
|
+
r2 = FSelector::ChiSquaredTest.new
|
142
|
+
|
143
|
+
# ensemble ranker
|
144
|
+
re = FSelector::Ensemble.new(r1, r2)
|
145
|
+
|
146
|
+
# read random data
|
147
|
+
re.data_from_random(100, 2, 10, 3, true)
|
148
|
+
|
149
|
+
# number of features before feature selection
|
150
|
+
puts '# features before feature selection: ' + re.get_features.size.to_s
|
151
|
+
|
152
|
+
# based on the min feature rank among
|
153
|
+
# ensemble feature selection algorithms
|
154
|
+
re.ensemble_by_rank(re.method(:by_min))
|
155
|
+
|
156
|
+
# select the top-ranked 3 features
|
157
|
+
re.select_data_by_rank!('<=3')
|
158
|
+
|
159
|
+
# number of features before feature selection
|
160
|
+
puts '# features before feature selection: ' + re.get_features.size.to_s
|
161
|
+
|
162
|
+
|
163
|
+
**3. normalization and discretization before feature selection**
|
164
|
+
|
165
|
+
In addition to the algorithms designed for continous feature, one
|
166
|
+
can apply those deisgned for discrete feature after (optionally
|
167
|
+
normalization and) discretization
|
168
|
+
|
169
|
+
require 'fselector'
|
170
|
+
|
171
|
+
# for continuous feature
|
172
|
+
r1 = FSelector::BaseContinuous.new
|
173
|
+
|
174
|
+
# read the Iris data set (under the test/ directory)
|
175
|
+
r1.data_from_csv(File.expand_path(File.dirname(__FILE__))+'/iris.csv')
|
176
|
+
|
177
|
+
# normalization by log2 (optional)
|
178
|
+
# r1.normalize_log!(2)
|
179
|
+
|
180
|
+
# discretization by ChiMerge algorithm
|
181
|
+
# chi-squared value = 4.60 for a three-class problem at alpha=0.10
|
182
|
+
r1.discretize_chimerge!(4.60)
|
183
|
+
|
184
|
+
# apply Relief_d for discrete feature
|
185
|
+
# initialize with discretized data from r1
|
186
|
+
r2 = FSelector::ReliefF_d.new(r1.get_sample_size, 10, r1.get_data)
|
187
|
+
|
188
|
+
# print feature ranks
|
189
|
+
r2.print_feature_ranks
|
190
|
+
|
191
|
+
Copyright
|
192
|
+
---------
|
193
|
+
FSelector © 2011-2012 by [Tiejun Cheng](mailto:need47@gmail.com).
|
194
|
+
FSelector is licensed under the MIT license. Please see the {file:LICENSE} for
|
195
|
+
more information.
|
data/lib/fselector.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
# module version
|
6
|
+
VERSION = '0.1.0'
|
7
|
+
end
|
8
|
+
|
9
|
+
ROOT = File.expand_path(File.dirname(__FILE__))
|
10
|
+
|
11
|
+
#
|
12
|
+
# include necessary files
|
13
|
+
#
|
14
|
+
require "#{ROOT}/fselector/fileio.rb"
|
15
|
+
require "#{ROOT}/fselector/util.rb"
|
16
|
+
|
17
|
+
#
|
18
|
+
# base class
|
19
|
+
#
|
20
|
+
require "#{ROOT}/fselector/base.rb"
|
21
|
+
require "#{ROOT}/fselector/base_discrete.rb"
|
22
|
+
require "#{ROOT}/fselector/base_continuous.rb"
|
23
|
+
|
24
|
+
#
|
25
|
+
# feature selection use an ensemble of algorithms
|
26
|
+
#
|
27
|
+
require "#{ROOT}/fselector/ensemble.rb"
|
28
|
+
|
29
|
+
#
|
30
|
+
# algorithms for handling discrete feature
|
31
|
+
#
|
32
|
+
Dir.glob("#{ROOT}/fselector/algo_discrete/*").each do |f|
|
33
|
+
require f
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# algorithms for handling continuous feature
|
38
|
+
#
|
39
|
+
Dir.glob("#{ROOT}/fselector/algo_continuous/*").each do |f|
|
40
|
+
require f
|
41
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
#
|
6
|
+
# P-Metric (PM) for continous feature
|
7
|
+
#
|
8
|
+
# |u1 - u2|
|
9
|
+
# PM(f) = -----------------
|
10
|
+
# sigma1 + sigma2
|
11
|
+
#
|
12
|
+
# @note PM applicable only to two-class problems
|
13
|
+
#
|
14
|
+
# ref: [Filter versus wrapper gene selection approaches][url]
|
15
|
+
# [url]: http://www.sciencedirect.com/science/article/pii/S0933365704000193
|
16
|
+
#
|
17
|
+
class PMetric < BaseContinuous
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
# calculate contribution of each feature (f) across all classes
|
22
|
+
def calc_contribution(f)
|
23
|
+
if not get_classes.size == 2
|
24
|
+
abort "[#{__FILE__}@#{__LINE__}]: "+
|
25
|
+
"suitable only for two-class problem with continuous feature"
|
26
|
+
end
|
27
|
+
|
28
|
+
# collect data for class 1 and 2, respectively
|
29
|
+
s1, s2 = [], []
|
30
|
+
k1, k2 = get_classes
|
31
|
+
|
32
|
+
each_sample do |k, ss|
|
33
|
+
s1 << ss[f] if k == k1 and ss.has_key? f
|
34
|
+
s2 << ss[f] if k == k2 and ss.has_key? f
|
35
|
+
end
|
36
|
+
|
37
|
+
# calc
|
38
|
+
s = (s1.ave-s2.ave).abs / (s1.sd+s2.sd)
|
39
|
+
|
40
|
+
set_feature_score(f, :BEST, s)
|
41
|
+
end # calc_contribution
|
42
|
+
|
43
|
+
|
44
|
+
end # class
|
45
|
+
|
46
|
+
|
47
|
+
# shortcut so that you can use FSelector::PM instead of FSelector::PMetric
|
48
|
+
PM = PMetric
|
49
|
+
|
50
|
+
|
51
|
+
end # module
|
@@ -0,0 +1,190 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
#
|
6
|
+
# extended Relief algorithm for continuous feature (ReliefF_c)
|
7
|
+
#
|
8
|
+
# @note applicable to multi-class problem with missing data
|
9
|
+
#
|
10
|
+
# ref: [Estimating Attributes: Analysis and Extensions of RELIEF][url]
|
11
|
+
# [url]: http://www.springerlink.com/content/fp23jh2h0426ww45/
|
12
|
+
#
|
13
|
+
class ReliefF_c < BaseContinuous
|
14
|
+
#
|
15
|
+
# new()
|
16
|
+
#
|
17
|
+
# @param [Integer] m number of samples to be used
|
18
|
+
# for estimating feature contribution. max can be
|
19
|
+
# the number of training samples
|
20
|
+
# @param [Integer] k number of k-nearest neighbor
|
21
|
+
# @param [Hash] data existing data structure
|
22
|
+
#
|
23
|
+
def initialize(m=nil, k=10, data=nil)
|
24
|
+
super(data)
|
25
|
+
@m = m # use all samples
|
26
|
+
@k = (k || 10) # default 10
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
# calculate contribution of each feature (f) across all classes
|
32
|
+
def calc_contribution(f)
|
33
|
+
score = 0.0
|
34
|
+
|
35
|
+
# use all samples if @m not provided
|
36
|
+
@m = get_sample_size if not @m
|
37
|
+
|
38
|
+
@m.times do
|
39
|
+
# pick a sample at random
|
40
|
+
rs, rk = pick_a_sample_at_random
|
41
|
+
|
42
|
+
# find k nearest neighbor for each class
|
43
|
+
nbrs = find_k_nearest_nb(rs, rk)
|
44
|
+
|
45
|
+
# calc contribution from neighbors
|
46
|
+
score += calc_score(f, rs, rk, nbrs)
|
47
|
+
end
|
48
|
+
|
49
|
+
s = score / @m
|
50
|
+
|
51
|
+
set_feature_score(f, :BEST, s)
|
52
|
+
end # calc_contribution
|
53
|
+
|
54
|
+
|
55
|
+
# pick a sample at random
|
56
|
+
def pick_a_sample_at_random
|
57
|
+
rk = get_classes[rand(get_classes.size)]
|
58
|
+
rks = get_data[rk]
|
59
|
+
|
60
|
+
[ rks[rand(rks.size)], rk ]
|
61
|
+
end # pick_a_sample_at_random
|
62
|
+
|
63
|
+
# # find k nearest neighbors of sample (rs) for each class
|
64
|
+
def find_k_nearest_nb(rs, rk)
|
65
|
+
nbrs = {}
|
66
|
+
|
67
|
+
each_class do |k|
|
68
|
+
res = []
|
69
|
+
|
70
|
+
get_data[k].each do |s|
|
71
|
+
next if s == rs # exclude self
|
72
|
+
|
73
|
+
d = diff_sample(rs, s, rk, k)
|
74
|
+
res << [d, s]
|
75
|
+
end
|
76
|
+
|
77
|
+
nbrs[k] = (res.sort { |x, y| x[0] <=> y[0] }[0...@k]).collect { |z| z[1] }
|
78
|
+
end
|
79
|
+
|
80
|
+
nbrs
|
81
|
+
end # find_k_nearest_nb
|
82
|
+
|
83
|
+
|
84
|
+
# difference between two samples
|
85
|
+
def diff_sample(s1, s2, k1, k2)
|
86
|
+
d = 0.0
|
87
|
+
|
88
|
+
each_feature do |f|
|
89
|
+
d += diff_feature(f, s1, s2, k1, k2)**2
|
90
|
+
end
|
91
|
+
|
92
|
+
d
|
93
|
+
end # diff_sample
|
94
|
+
|
95
|
+
|
96
|
+
# difference beween the feature (f) of two samples
|
97
|
+
def diff_feature(f, s1, s2, k1, k2)
|
98
|
+
d = 0.0
|
99
|
+
|
100
|
+
if s1.has_key?(f) and s2.has_key?(f) # no missing value
|
101
|
+
nu = get_normalization_unit(f)
|
102
|
+
d = (nu.zero?) ? 0.0 : (s1[f]-s2[f])/nu
|
103
|
+
elsif not s1.has_key?(f) and not s2.has_key?(f) # two missing values
|
104
|
+
fvs = get_feature_values(f).uniq
|
105
|
+
fvs.each do |mv|
|
106
|
+
d -= calc_p(f, mv, k1)*calc_p(f, mv, k2)
|
107
|
+
end
|
108
|
+
d += 1
|
109
|
+
elsif not s1.has_key?(f) # s1: one missing value
|
110
|
+
# diff(f, s1, s2) = 1 - P(value(f, s2)|class(s1))
|
111
|
+
d = 1 - calc_p(f, s2[f], k1)
|
112
|
+
else # s2: one missing value
|
113
|
+
# diff(f, s1, s2) = 1 - P(value(f, s1)|class(s2))
|
114
|
+
d = 1 - calc_p(f, s1[f], k2)
|
115
|
+
end
|
116
|
+
|
117
|
+
d
|
118
|
+
end # diff_feature
|
119
|
+
|
120
|
+
|
121
|
+
# calc probability of missing value (mv)
|
122
|
+
def calc_p(f, mv, k)
|
123
|
+
# cache
|
124
|
+
if not @f2mvp
|
125
|
+
@f2mvp = {}
|
126
|
+
|
127
|
+
each_feature do |f|
|
128
|
+
@f2mvp[f] = {}
|
129
|
+
|
130
|
+
each_class do |k|
|
131
|
+
@f2mvp[f][k] = {}
|
132
|
+
|
133
|
+
fvs = get_feature_values(f).uniq
|
134
|
+
fvs.each do |v|
|
135
|
+
n = 0.0
|
136
|
+
|
137
|
+
get_data[k].each do |s|
|
138
|
+
n += 1 if s.has_key?(f) and s[f] == v
|
139
|
+
end
|
140
|
+
|
141
|
+
@f2mvp[f][k][v] = n/get_data[k].size
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
@f2mvp[f][k][mv]
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
# get normalization unit for each feature
|
152
|
+
def get_normalization_unit(fi)
|
153
|
+
return @f2nu[fi] if @f2nu
|
154
|
+
|
155
|
+
@f2nu = {}
|
156
|
+
|
157
|
+
each_feature do |f|
|
158
|
+
fvs = get_feature_values(f)
|
159
|
+
@f2nu[f] = (fvs.max-fvs.min).to_f
|
160
|
+
end
|
161
|
+
|
162
|
+
@f2nu[fi]
|
163
|
+
end # get_normalization_unit
|
164
|
+
|
165
|
+
|
166
|
+
# calc feature (f) contribution from neighbors
|
167
|
+
def calc_score(f, rs, rk, nbrs)
|
168
|
+
score = 0.0
|
169
|
+
|
170
|
+
nbrs.each do |k, nbs|
|
171
|
+
if k == rk # near hit
|
172
|
+
nbs.each do |s|
|
173
|
+
score -= (diff_feature(f, rs, s, rk, k)**2/nbs.size)
|
174
|
+
end
|
175
|
+
else # near_miss
|
176
|
+
nbs.each do |s|
|
177
|
+
score += (get_data[k].size/get_sample_size.to_f *
|
178
|
+
diff_feature(f, rs, s, rk, k)**2/nbs.size)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
score
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
end # class
|
188
|
+
|
189
|
+
|
190
|
+
end # module
|