fselector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +21 -0
- data/README.md +195 -0
- data/lib/fselector.rb +41 -0
- data/lib/fselector/algo_continuous/PMetric.rb +51 -0
- data/lib/fselector/algo_continuous/ReliefF_c.rb +190 -0
- data/lib/fselector/algo_continuous/Relief_c.rb +150 -0
- data/lib/fselector/algo_continuous/TScore.rb +52 -0
- data/lib/fselector/algo_continuous/discretizer.rb +219 -0
- data/lib/fselector/algo_continuous/normalizer.rb +59 -0
- data/lib/fselector/algo_discrete/Accuracy.rb +35 -0
- data/lib/fselector/algo_discrete/AccuracyBalanced.rb +37 -0
- data/lib/fselector/algo_discrete/BiNormalSeparation.rb +45 -0
- data/lib/fselector/algo_discrete/ChiSquaredTest.rb +69 -0
- data/lib/fselector/algo_discrete/CorrelationCoefficient.rb +42 -0
- data/lib/fselector/algo_discrete/DocumentFrequency.rb +36 -0
- data/lib/fselector/algo_discrete/F1Measure.rb +41 -0
- data/lib/fselector/algo_discrete/FishersExactTest.rb +47 -0
- data/lib/fselector/algo_discrete/GMean.rb +37 -0
- data/lib/fselector/algo_discrete/GSSCoefficient.rb +43 -0
- data/lib/fselector/algo_discrete/GiniIndex.rb +44 -0
- data/lib/fselector/algo_discrete/InformationGain.rb +96 -0
- data/lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb +45 -0
- data/lib/fselector/algo_discrete/McNemarsTest.rb +57 -0
- data/lib/fselector/algo_discrete/MutualInformation.rb +42 -0
- data/lib/fselector/algo_discrete/OddsRatio.rb +46 -0
- data/lib/fselector/algo_discrete/OddsRatioNumerator.rb +41 -0
- data/lib/fselector/algo_discrete/Power.rb +46 -0
- data/lib/fselector/algo_discrete/Precision.rb +31 -0
- data/lib/fselector/algo_discrete/ProbabilityRatio.rb +41 -0
- data/lib/fselector/algo_discrete/Random.rb +40 -0
- data/lib/fselector/algo_discrete/ReliefF_d.rb +173 -0
- data/lib/fselector/algo_discrete/Relief_d.rb +135 -0
- data/lib/fselector/algo_discrete/Sensitivity.rb +38 -0
- data/lib/fselector/algo_discrete/Specificity.rb +35 -0
- data/lib/fselector/base.rb +322 -0
- data/lib/fselector/base_continuous.rb +25 -0
- data/lib/fselector/base_discrete.rb +355 -0
- data/lib/fselector/ensemble.rb +181 -0
- data/lib/fselector/fileio.rb +455 -0
- data/lib/fselector/util.rb +707 -0
- metadata +86 -0
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2011-2012 Tiejun Cheng
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
16
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
18
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
19
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
20
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
21
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
FSelector: a Ruby package for feature selection and ranking
|
2
|
+
===========================================================
|
3
|
+
|
4
|
+
**Git**: [https://github.com/need47/fselector](https://github.com/need47/fselector)
|
5
|
+
**Author**: Tiejun Cheng
|
6
|
+
**Email**: [need47@gmail.com](mailto:need47@gmail.com)
|
7
|
+
**Copyright**: 2011-2012
|
8
|
+
**License**: MIT License
|
9
|
+
**Latest Version**: 0.1.0
|
10
|
+
**Release Date**: March 1st 2012
|
11
|
+
|
12
|
+
Synopsis
|
13
|
+
--------
|
14
|
+
|
15
|
+
FSelector is an open-access Ruby package that aims to integrate as many
|
16
|
+
feature selection/ranking algorithms as possible. It enables the
|
17
|
+
user to perform feature selection by either a single algorithm or by an
|
18
|
+
ensemble of algorithms. Below is a summary of FSelector's features.
|
19
|
+
|
20
|
+
Feature List
|
21
|
+
------------
|
22
|
+
|
23
|
+
**1. available algorithms**
|
24
|
+
|
25
|
+
algorithm alias feature type
|
26
|
+
-------------------------------------------------------
|
27
|
+
Accuracy Acc discrete
|
28
|
+
AccuracyBalanced Acc2 discrete
|
29
|
+
BiNormalSeparation BNS discrete
|
30
|
+
ChiSquaredTest CHI discrete
|
31
|
+
CorrelationCoefficient CC discrete
|
32
|
+
DocumentFrequency DF discrete
|
33
|
+
F1Measure F1 discrete
|
34
|
+
FishersExactTest FET discrete
|
35
|
+
GiniIndex GI discrete
|
36
|
+
GMean GM discrete
|
37
|
+
GSSCoefficient GSS discrete
|
38
|
+
InformationGain IG discrete
|
39
|
+
MatthewsCorrelationCoefficient MCC, PHI discrete
|
40
|
+
McNemarsTest MNT discrete
|
41
|
+
OddsRatio OR discrete
|
42
|
+
OddsRatioNumerator ORN discrete
|
43
|
+
PhiCoefficient Phi discrete
|
44
|
+
Power Power discrete
|
45
|
+
Precision Precision discrete
|
46
|
+
ProbabilityRatio PR discrete
|
47
|
+
Random Random discrete
|
48
|
+
Recall Recall discrete
|
49
|
+
Relief_d Relief_d discrete
|
50
|
+
ReliefF_d ReliefF_d discrete
|
51
|
+
Sensitivity SN, Recall discrete
|
52
|
+
Specificity SP discrete
|
53
|
+
PMetric PM continuous
|
54
|
+
Relief_c Relief_c continuous
|
55
|
+
ReliefF_c ReliefF_c continuous
|
56
|
+
TScore TS continuous
|
57
|
+
|
58
|
+
**2. feature selection approaches**
|
59
|
+
|
60
|
+
- by a single algorithm
|
61
|
+
- by multiple algorithms in a tandem manner
|
62
|
+
- by multiple algorithms in a consensus manner
|
63
|
+
|
64
|
+
**3. availabe normalization and discretization algorithms for continuous feature**
|
65
|
+
|
66
|
+
algorithm note
|
67
|
+
--------------------------------------------------------------------
|
68
|
+
log normalization by logarithmic transformation
|
69
|
+
min_max normalization by scaling into [min, max]
|
70
|
+
zscore normalization by converting into zscore
|
71
|
+
equal_width discretization by equal width among intervals
|
72
|
+
equal_frequency discretization by equal frequency among intervals
|
73
|
+
ChiMerge discretization by ChiMerge method
|
74
|
+
|
75
|
+
**4. supported input/output file types**
|
76
|
+
|
77
|
+
- csv
|
78
|
+
- libsvm
|
79
|
+
- weka ARFF
|
80
|
+
- random (for test purpose)
|
81
|
+
|
82
|
+
Installing
|
83
|
+
----------
|
84
|
+
|
85
|
+
To install FSelector, use the following command:
|
86
|
+
|
87
|
+
$ gem install fselector
|
88
|
+
|
89
|
+
Usage
|
90
|
+
-----
|
91
|
+
|
92
|
+
**1. feature selection by a single algorithm**
|
93
|
+
|
94
|
+
require 'fselector'
|
95
|
+
|
96
|
+
# use InformationGain as a feature ranking algorithm
|
97
|
+
r1 = FSelector::InformationGain.new
|
98
|
+
|
99
|
+
# read from random data (or csv, libsvm, weka ARFF file)
|
100
|
+
# no. of samples: 100
|
101
|
+
# no. of classes: 2
|
102
|
+
# no. of features: 10
|
103
|
+
# no. of possible values for each feature: 3
|
104
|
+
# allow missing values: true
|
105
|
+
r1.data_from_random(100, 2, 10, 3, true)
|
106
|
+
|
107
|
+
# number of features before feature selection
|
108
|
+
puts "# features (before): "+ r1.get_features.size.to_s
|
109
|
+
|
110
|
+
# select the top-ranked features with scores >0.01
|
111
|
+
r1.select_data_by_score!('>0.01')
|
112
|
+
|
113
|
+
# number of features before feature selection
|
114
|
+
puts "# features (after): "+ r1.get_features.size.to_s
|
115
|
+
|
116
|
+
# you can also use multiple alogirithms in a tandem manner
|
117
|
+
# e.g. use the ChiSquaredTest with Yates' continuity correction
|
118
|
+
# initialize from r1's data
|
119
|
+
r2 = FSelector::ChiSquaredTest.new(:yates, r1.get_data)
|
120
|
+
|
121
|
+
# number of features before feature selection
|
122
|
+
puts "# features (before): "+ r2.get_features.size.to_s
|
123
|
+
|
124
|
+
# select the top-ranked 3 features
|
125
|
+
r2.select_data_by_rank!('<=3')
|
126
|
+
|
127
|
+
# number of features before feature selection
|
128
|
+
puts "# features (after): "+ r2.get_features.size.to_s
|
129
|
+
|
130
|
+
# save data to standard ouput as a weka ARFF file (sparse format)
|
131
|
+
# with selected features only
|
132
|
+
r2.data_to_weka(:stdout, :sparse)
|
133
|
+
|
134
|
+
|
135
|
+
**2. feature selection by an ensemble of algorithms**
|
136
|
+
|
137
|
+
require 'fselector'
|
138
|
+
|
139
|
+
# use both Information and ChiSquaredTest
|
140
|
+
r1 = FSelector::InformationGain.new
|
141
|
+
r2 = FSelector::ChiSquaredTest.new
|
142
|
+
|
143
|
+
# ensemble ranker
|
144
|
+
re = FSelector::Ensemble.new(r1, r2)
|
145
|
+
|
146
|
+
# read random data
|
147
|
+
re.data_from_random(100, 2, 10, 3, true)
|
148
|
+
|
149
|
+
# number of features before feature selection
|
150
|
+
puts '# features before feature selection: ' + re.get_features.size.to_s
|
151
|
+
|
152
|
+
# based on the min feature rank among
|
153
|
+
# ensemble feature selection algorithms
|
154
|
+
re.ensemble_by_rank(re.method(:by_min))
|
155
|
+
|
156
|
+
# select the top-ranked 3 features
|
157
|
+
re.select_data_by_rank!('<=3')
|
158
|
+
|
159
|
+
# number of features before feature selection
|
160
|
+
puts '# features before feature selection: ' + re.get_features.size.to_s
|
161
|
+
|
162
|
+
|
163
|
+
**3. normalization and discretization before feature selection**
|
164
|
+
|
165
|
+
In addition to the algorithms designed for continous feature, one
|
166
|
+
can apply those deisgned for discrete feature after (optionally
|
167
|
+
normalization and) discretization
|
168
|
+
|
169
|
+
require 'fselector'
|
170
|
+
|
171
|
+
# for continuous feature
|
172
|
+
r1 = FSelector::BaseContinuous.new
|
173
|
+
|
174
|
+
# read the Iris data set (under the test/ directory)
|
175
|
+
r1.data_from_csv(File.expand_path(File.dirname(__FILE__))+'/iris.csv')
|
176
|
+
|
177
|
+
# normalization by log2 (optional)
|
178
|
+
# r1.normalize_log!(2)
|
179
|
+
|
180
|
+
# discretization by ChiMerge algorithm
|
181
|
+
# chi-squared value = 4.60 for a three-class problem at alpha=0.10
|
182
|
+
r1.discretize_chimerge!(4.60)
|
183
|
+
|
184
|
+
# apply Relief_d for discrete feature
|
185
|
+
# initialize with discretized data from r1
|
186
|
+
r2 = FSelector::ReliefF_d.new(r1.get_sample_size, 10, r1.get_data)
|
187
|
+
|
188
|
+
# print feature ranks
|
189
|
+
r2.print_feature_ranks
|
190
|
+
|
191
|
+
Copyright
|
192
|
+
---------
|
193
|
+
FSelector © 2011-2012 by [Tiejun Cheng](mailto:need47@gmail.com).
|
194
|
+
FSelector is licensed under the MIT license. Please see the {file:LICENSE} for
|
195
|
+
more information.
|
data/lib/fselector.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
# module version
|
6
|
+
VERSION = '0.1.0'
|
7
|
+
end
|
8
|
+
|
9
|
+
ROOT = File.expand_path(File.dirname(__FILE__))
|
10
|
+
|
11
|
+
#
|
12
|
+
# include necessary files
|
13
|
+
#
|
14
|
+
require "#{ROOT}/fselector/fileio.rb"
|
15
|
+
require "#{ROOT}/fselector/util.rb"
|
16
|
+
|
17
|
+
#
|
18
|
+
# base class
|
19
|
+
#
|
20
|
+
require "#{ROOT}/fselector/base.rb"
|
21
|
+
require "#{ROOT}/fselector/base_discrete.rb"
|
22
|
+
require "#{ROOT}/fselector/base_continuous.rb"
|
23
|
+
|
24
|
+
#
|
25
|
+
# feature selection use an ensemble of algorithms
|
26
|
+
#
|
27
|
+
require "#{ROOT}/fselector/ensemble.rb"
|
28
|
+
|
29
|
+
#
|
30
|
+
# algorithms for handling discrete feature
|
31
|
+
#
|
32
|
+
Dir.glob("#{ROOT}/fselector/algo_discrete/*").each do |f|
|
33
|
+
require f
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# algorithms for handling continuous feature
|
38
|
+
#
|
39
|
+
Dir.glob("#{ROOT}/fselector/algo_continuous/*").each do |f|
|
40
|
+
require f
|
41
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
#
|
6
|
+
# P-Metric (PM) for continous feature
|
7
|
+
#
|
8
|
+
# |u1 - u2|
|
9
|
+
# PM(f) = -----------------
|
10
|
+
# sigma1 + sigma2
|
11
|
+
#
|
12
|
+
# @note PM applicable only to two-class problems
|
13
|
+
#
|
14
|
+
# ref: [Filter versus wrapper gene selection approaches][url]
|
15
|
+
# [url]: http://www.sciencedirect.com/science/article/pii/S0933365704000193
|
16
|
+
#
|
17
|
+
class PMetric < BaseContinuous
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
# calculate contribution of each feature (f) across all classes
|
22
|
+
def calc_contribution(f)
|
23
|
+
if not get_classes.size == 2
|
24
|
+
abort "[#{__FILE__}@#{__LINE__}]: "+
|
25
|
+
"suitable only for two-class problem with continuous feature"
|
26
|
+
end
|
27
|
+
|
28
|
+
# collect data for class 1 and 2, respectively
|
29
|
+
s1, s2 = [], []
|
30
|
+
k1, k2 = get_classes
|
31
|
+
|
32
|
+
each_sample do |k, ss|
|
33
|
+
s1 << ss[f] if k == k1 and ss.has_key? f
|
34
|
+
s2 << ss[f] if k == k2 and ss.has_key? f
|
35
|
+
end
|
36
|
+
|
37
|
+
# calc
|
38
|
+
s = (s1.ave-s2.ave).abs / (s1.sd+s2.sd)
|
39
|
+
|
40
|
+
set_feature_score(f, :BEST, s)
|
41
|
+
end # calc_contribution
|
42
|
+
|
43
|
+
|
44
|
+
end # class
|
45
|
+
|
46
|
+
|
47
|
+
# shortcut so that you can use FSelector::PM instead of FSelector::PMetric
|
48
|
+
PM = PMetric
|
49
|
+
|
50
|
+
|
51
|
+
end # module
|
@@ -0,0 +1,190 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
#
|
6
|
+
# extended Relief algorithm for continuous feature (ReliefF_c)
|
7
|
+
#
|
8
|
+
# @note applicable to multi-class problem with missing data
|
9
|
+
#
|
10
|
+
# ref: [Estimating Attributes: Analysis and Extensions of RELIEF][url]
|
11
|
+
# [url]: http://www.springerlink.com/content/fp23jh2h0426ww45/
|
12
|
+
#
|
13
|
+
class ReliefF_c < BaseContinuous
|
14
|
+
#
|
15
|
+
# new()
|
16
|
+
#
|
17
|
+
# @param [Integer] m number of samples to be used
|
18
|
+
# for estimating feature contribution. max can be
|
19
|
+
# the number of training samples
|
20
|
+
# @param [Integer] k number of k-nearest neighbor
|
21
|
+
# @param [Hash] data existing data structure
|
22
|
+
#
|
23
|
+
def initialize(m=nil, k=10, data=nil)
|
24
|
+
super(data)
|
25
|
+
@m = m # use all samples
|
26
|
+
@k = (k || 10) # default 10
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
# calculate contribution of each feature (f) across all classes
|
32
|
+
def calc_contribution(f)
|
33
|
+
score = 0.0
|
34
|
+
|
35
|
+
# use all samples if @m not provided
|
36
|
+
@m = get_sample_size if not @m
|
37
|
+
|
38
|
+
@m.times do
|
39
|
+
# pick a sample at random
|
40
|
+
rs, rk = pick_a_sample_at_random
|
41
|
+
|
42
|
+
# find k nearest neighbor for each class
|
43
|
+
nbrs = find_k_nearest_nb(rs, rk)
|
44
|
+
|
45
|
+
# calc contribution from neighbors
|
46
|
+
score += calc_score(f, rs, rk, nbrs)
|
47
|
+
end
|
48
|
+
|
49
|
+
s = score / @m
|
50
|
+
|
51
|
+
set_feature_score(f, :BEST, s)
|
52
|
+
end # calc_contribution
|
53
|
+
|
54
|
+
|
55
|
+
# pick a sample at random
|
56
|
+
def pick_a_sample_at_random
|
57
|
+
rk = get_classes[rand(get_classes.size)]
|
58
|
+
rks = get_data[rk]
|
59
|
+
|
60
|
+
[ rks[rand(rks.size)], rk ]
|
61
|
+
end # pick_a_sample_at_random
|
62
|
+
|
63
|
+
# # find k nearest neighbors of sample (rs) for each class
|
64
|
+
def find_k_nearest_nb(rs, rk)
|
65
|
+
nbrs = {}
|
66
|
+
|
67
|
+
each_class do |k|
|
68
|
+
res = []
|
69
|
+
|
70
|
+
get_data[k].each do |s|
|
71
|
+
next if s == rs # exclude self
|
72
|
+
|
73
|
+
d = diff_sample(rs, s, rk, k)
|
74
|
+
res << [d, s]
|
75
|
+
end
|
76
|
+
|
77
|
+
nbrs[k] = (res.sort { |x, y| x[0] <=> y[0] }[0...@k]).collect { |z| z[1] }
|
78
|
+
end
|
79
|
+
|
80
|
+
nbrs
|
81
|
+
end # find_k_nearest_nb
|
82
|
+
|
83
|
+
|
84
|
+
# difference between two samples
|
85
|
+
def diff_sample(s1, s2, k1, k2)
|
86
|
+
d = 0.0
|
87
|
+
|
88
|
+
each_feature do |f|
|
89
|
+
d += diff_feature(f, s1, s2, k1, k2)**2
|
90
|
+
end
|
91
|
+
|
92
|
+
d
|
93
|
+
end # diff_sample
|
94
|
+
|
95
|
+
|
96
|
+
# difference beween the feature (f) of two samples
|
97
|
+
def diff_feature(f, s1, s2, k1, k2)
|
98
|
+
d = 0.0
|
99
|
+
|
100
|
+
if s1.has_key?(f) and s2.has_key?(f) # no missing value
|
101
|
+
nu = get_normalization_unit(f)
|
102
|
+
d = (nu.zero?) ? 0.0 : (s1[f]-s2[f])/nu
|
103
|
+
elsif not s1.has_key?(f) and not s2.has_key?(f) # two missing values
|
104
|
+
fvs = get_feature_values(f).uniq
|
105
|
+
fvs.each do |mv|
|
106
|
+
d -= calc_p(f, mv, k1)*calc_p(f, mv, k2)
|
107
|
+
end
|
108
|
+
d += 1
|
109
|
+
elsif not s1.has_key?(f) # s1: one missing value
|
110
|
+
# diff(f, s1, s2) = 1 - P(value(f, s2)|class(s1))
|
111
|
+
d = 1 - calc_p(f, s2[f], k1)
|
112
|
+
else # s2: one missing value
|
113
|
+
# diff(f, s1, s2) = 1 - P(value(f, s1)|class(s2))
|
114
|
+
d = 1 - calc_p(f, s1[f], k2)
|
115
|
+
end
|
116
|
+
|
117
|
+
d
|
118
|
+
end # diff_feature
|
119
|
+
|
120
|
+
|
121
|
+
# calc probability of missing value (mv)
|
122
|
+
def calc_p(f, mv, k)
|
123
|
+
# cache
|
124
|
+
if not @f2mvp
|
125
|
+
@f2mvp = {}
|
126
|
+
|
127
|
+
each_feature do |f|
|
128
|
+
@f2mvp[f] = {}
|
129
|
+
|
130
|
+
each_class do |k|
|
131
|
+
@f2mvp[f][k] = {}
|
132
|
+
|
133
|
+
fvs = get_feature_values(f).uniq
|
134
|
+
fvs.each do |v|
|
135
|
+
n = 0.0
|
136
|
+
|
137
|
+
get_data[k].each do |s|
|
138
|
+
n += 1 if s.has_key?(f) and s[f] == v
|
139
|
+
end
|
140
|
+
|
141
|
+
@f2mvp[f][k][v] = n/get_data[k].size
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
@f2mvp[f][k][mv]
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
# get normalization unit for each feature
|
152
|
+
def get_normalization_unit(fi)
|
153
|
+
return @f2nu[fi] if @f2nu
|
154
|
+
|
155
|
+
@f2nu = {}
|
156
|
+
|
157
|
+
each_feature do |f|
|
158
|
+
fvs = get_feature_values(f)
|
159
|
+
@f2nu[f] = (fvs.max-fvs.min).to_f
|
160
|
+
end
|
161
|
+
|
162
|
+
@f2nu[fi]
|
163
|
+
end # get_normalization_unit
|
164
|
+
|
165
|
+
|
166
|
+
# calc feature (f) contribution from neighbors
|
167
|
+
def calc_score(f, rs, rk, nbrs)
|
168
|
+
score = 0.0
|
169
|
+
|
170
|
+
nbrs.each do |k, nbs|
|
171
|
+
if k == rk # near hit
|
172
|
+
nbs.each do |s|
|
173
|
+
score -= (diff_feature(f, rs, s, rk, k)**2/nbs.size)
|
174
|
+
end
|
175
|
+
else # near_miss
|
176
|
+
nbs.each do |s|
|
177
|
+
score += (get_data[k].size/get_sample_size.to_f *
|
178
|
+
diff_feature(f, rs, s, rk, k)**2/nbs.size)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
score
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
end # class
|
188
|
+
|
189
|
+
|
190
|
+
end # module
|