fselector 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +9 -0
- data/README.md +62 -26
- data/lib/fselector.rb +1 -1
- data/lib/fselector/algo_base/base.rb +89 -34
- data/lib/fselector/algo_base/base_CFS.rb +20 -7
- data/lib/fselector/algo_base/base_Relief.rb +5 -5
- data/lib/fselector/algo_base/base_ReliefF.rb +11 -3
- data/lib/fselector/algo_base/base_discrete.rb +8 -0
- data/lib/fselector/algo_continuous/BSS_WSS.rb +3 -1
- data/lib/fselector/algo_continuous/CFS_c.rb +3 -1
- data/lib/fselector/algo_continuous/FTest.rb +2 -0
- data/lib/fselector/algo_continuous/PMetric.rb +4 -2
- data/lib/fselector/algo_continuous/ReliefF_c.rb +11 -0
- data/lib/fselector/algo_continuous/Relief_c.rb +14 -3
- data/lib/fselector/algo_continuous/TScore.rb +5 -3
- data/lib/fselector/algo_continuous/WilcoxonRankSum.rb +5 -3
- data/lib/fselector/algo_discrete/Accuracy.rb +2 -0
- data/lib/fselector/algo_discrete/AccuracyBalanced.rb +2 -0
- data/lib/fselector/algo_discrete/BiNormalSeparation.rb +3 -1
- data/lib/fselector/algo_discrete/CFS_d.rb +3 -0
- data/lib/fselector/algo_discrete/ChiSquaredTest.rb +3 -0
- data/lib/fselector/algo_discrete/CorrelationCoefficient.rb +2 -0
- data/lib/fselector/algo_discrete/DocumentFrequency.rb +2 -0
- data/lib/fselector/algo_discrete/F1Measure.rb +2 -0
- data/lib/fselector/algo_discrete/FastCorrelationBasedFilter.rb +12 -1
- data/lib/fselector/algo_discrete/FishersExactTest.rb +3 -1
- data/lib/fselector/algo_discrete/GMean.rb +2 -0
- data/lib/fselector/algo_discrete/GSSCoefficient.rb +2 -0
- data/lib/fselector/algo_discrete/GiniIndex.rb +3 -1
- data/lib/fselector/algo_discrete/INTERACT.rb +3 -0
- data/lib/fselector/algo_discrete/InformationGain.rb +12 -1
- data/lib/fselector/algo_discrete/LasVegasFilter.rb +3 -0
- data/lib/fselector/algo_discrete/LasVegasIncremental.rb +3 -0
- data/lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb +2 -0
- data/lib/fselector/algo_discrete/McNemarsTest.rb +3 -0
- data/lib/fselector/algo_discrete/MutualInformation.rb +3 -1
- data/lib/fselector/algo_discrete/OddsRatio.rb +2 -0
- data/lib/fselector/algo_discrete/OddsRatioNumerator.rb +2 -0
- data/lib/fselector/algo_discrete/Power.rb +4 -1
- data/lib/fselector/algo_discrete/Precision.rb +2 -0
- data/lib/fselector/algo_discrete/ProbabilityRatio.rb +2 -0
- data/lib/fselector/algo_discrete/Random.rb +3 -0
- data/lib/fselector/algo_discrete/ReliefF_d.rb +3 -1
- data/lib/fselector/algo_discrete/Relief_d.rb +4 -2
- data/lib/fselector/algo_discrete/Sensitivity.rb +2 -0
- data/lib/fselector/algo_discrete/Specificity.rb +2 -0
- data/lib/fselector/algo_discrete/SymmetricalUncertainty.rb +4 -1
- data/lib/fselector/discretizer.rb +7 -7
- data/lib/fselector/ensemble.rb +375 -115
- data/lib/fselector/entropy.rb +2 -2
- data/lib/fselector/fileio.rb +83 -70
- data/lib/fselector/normalizer.rb +2 -2
- data/lib/fselector/replace_missing_values.rb +137 -3
- data/lib/fselector/util.rb +17 -5
- metadata +4 -4
data/ChangeLog
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
2012-05-15 version 1.1.0
|
2
|
+
|
3
|
+
* add replace\_by\_median\_value! for replacing missing value with feature median value
|
4
|
+
* add replace\_by\_knn\_value! for replacing missing value with weighted feature value from k-nearest neighbors
|
5
|
+
* replace\_by\_mean\_value! and replace\_by\_median\_value! now support both column and row mode
|
6
|
+
* add EnsembleSingle class for ensemble feature selection by creating an ensemble of feature selectors using a single feature selection algorithm
|
7
|
+
* rename Ensemble to EnsembleMultiple for ensemble feature selection by creating an ensemble of feature selectors using multiple feature selection algorithms of the same type
|
8
|
+
* bug fix in FileIO module
|
9
|
+
|
1
10
|
2012-05-08 version 1.0.1
|
2
11
|
|
3
12
|
* modify Ensemble module so that ensemble\_by\_score() and ensemble\_by\_rank() now take Symbol, instead of Method, as argument. This allows easier and clearer function call
|
data/README.md
CHANGED
@@ -8,8 +8,8 @@ FSelector: a Ruby gem for feature selection and ranking
|
|
8
8
|
**Email**: [need47@gmail.com](mailto:need47@gmail.com)
|
9
9
|
**Copyright**: 2012
|
10
10
|
**License**: MIT License
|
11
|
-
**Latest Version**: 1.0
|
12
|
-
**Release Date**: 2012-05-
|
11
|
+
**Latest Version**: 1.1.0
|
12
|
+
**Release Date**: 2012-05-15
|
13
13
|
|
14
14
|
Synopsis
|
15
15
|
--------
|
@@ -86,17 +86,16 @@ Feature List
|
|
86
86
|
WilcoxonRankSum WRS weighting continuous two-class
|
87
87
|
|
88
88
|
**note for feature selection interface:**
|
89
|
-
there are two types of filter methods, i.e., weighting algorithms and subset selection algorithms
|
89
|
+
there are two types of filter methods, i.e., feature weighting algorithms and feature subset selection algorithms
|
90
90
|
|
91
91
|
- for weighting type: use either **select\_feature\_by\_rank!** or **select\_feature\_by\_score!**
|
92
92
|
- for subset type: use **select\_feature!**
|
93
|
-
|
94
93
|
|
95
94
|
**3. feature selection approaches**
|
96
95
|
|
97
96
|
- by a single algorithm
|
98
97
|
- by multiple algorithms in a tandem manner
|
99
|
-
- by multiple algorithms in an ensemble manner
|
98
|
+
- by multiple algorithms in an ensemble manner (share same feature selection interface as single algorithm)
|
100
99
|
|
101
100
|
**4. availabe normalization and discretization algorithms for continuous feature**
|
102
101
|
|
@@ -114,11 +113,13 @@ Feature List
|
|
114
113
|
|
115
114
|
**5. availabe algorithms for replacing missing feature values**
|
116
115
|
|
117
|
-
algorithm note
|
118
|
-
|
119
|
-
replace_by_fixed_value! replace by a fixed value
|
120
|
-
replace_by_mean_value! replace by mean feature value
|
121
|
-
|
116
|
+
algorithm note feature_type
|
117
|
+
---------------------------------------------------------------------------------------------------------
|
118
|
+
replace_by_fixed_value! replace by a fixed value discrete, continuous
|
119
|
+
replace_by_mean_value! replace by mean feature value continuous
|
120
|
+
replace_by_median_value! replace by median feature value continuous
|
121
|
+
replace_by_knn_value! replace by weighted knn feature value continuous
|
122
|
+
replace_by_most_seen_value! replace by most seen feature value discrete
|
122
123
|
|
123
124
|
Installing
|
124
125
|
----------
|
@@ -140,7 +141,7 @@ Usage
|
|
140
141
|
|
141
142
|
require 'fselector'
|
142
143
|
|
143
|
-
# use InformationGain as a feature
|
144
|
+
# use InformationGain as a feature selection algorithm
|
144
145
|
r1 = FSelector::InformationGain.new
|
145
146
|
|
146
147
|
# read from random data (or csv, libsvm, weka ARFF file)
|
@@ -152,13 +153,13 @@ Usage
|
|
152
153
|
r1.data_from_random(100, 2, 15, 3, true)
|
153
154
|
|
154
155
|
# number of features before feature selection
|
155
|
-
puts "# features (before): "+ r1.get_features.size.to_s
|
156
|
+
puts " # features (before): "+ r1.get_features.size.to_s
|
156
157
|
|
157
158
|
# select the top-ranked features with scores >0.01
|
158
159
|
r1.select_feature_by_score!('>0.01')
|
159
160
|
|
160
161
|
# number of features after feature selection
|
161
|
-
puts "# features (after): "+ r1.get_features.size.to_s
|
162
|
+
puts " # features (after): "+ r1.get_features.size.to_s
|
162
163
|
|
163
164
|
# you can also use multiple alogirithms in a tandem manner
|
164
165
|
# e.g. use the ChiSquaredTest with Yates' continuity correction
|
@@ -166,29 +167,65 @@ Usage
|
|
166
167
|
r2 = FSelector::ChiSquaredTest.new(:yates, r1.get_data)
|
167
168
|
|
168
169
|
# number of features before feature selection
|
169
|
-
puts "# features (before): "+ r2.get_features.size.to_s
|
170
|
+
puts " # features (before): "+ r2.get_features.size.to_s
|
170
171
|
|
171
172
|
# select the top-ranked 3 features
|
172
173
|
r2.select_feature_by_rank!('<=3')
|
173
174
|
|
174
175
|
# number of features after feature selection
|
175
|
-
puts "# features (after): "+ r2.get_features.size.to_s
|
176
|
+
puts " # features (after): "+ r2.get_features.size.to_s
|
176
177
|
|
177
178
|
# save data to standard ouput as a weka ARFF file (sparse format)
|
178
179
|
# with selected features only
|
179
180
|
r2.data_to_weka(:stdout, :sparse)
|
180
181
|
|
181
182
|
|
182
|
-
**2. feature selection by an ensemble of multiple
|
183
|
+
**2. feature selection by an ensemble of multiple feature selectors**
|
183
184
|
|
184
185
|
require 'fselector'
|
185
186
|
|
186
|
-
#
|
187
|
+
# example 1
|
188
|
+
#
|
189
|
+
|
190
|
+
|
191
|
+
# creating an ensemble of feature selectors by using
|
192
|
+
# a single feature selection algorithm (INTERACT)
|
193
|
+
# by instance perturbation (e.g. bootstrap sampling)
|
194
|
+
|
195
|
+
# test for the type of feature subset selection algorithms
|
196
|
+
r = FSelector::INTERACT.new(0.0001)
|
197
|
+
|
198
|
+
# an ensemble of 40 feature selectors with 90% data by random sampling
|
199
|
+
re = FSelector::EnsembleSingle.new(r, 40, 0.90, :random_sampling)
|
200
|
+
|
201
|
+
# read SPECT data set (under the test/ directory)
|
202
|
+
re.data_from_csv('test/SPECT_train.csv')
|
203
|
+
|
204
|
+
# number of features before feature selection
|
205
|
+
puts ' # features (before): ' + re.get_features.size.to_s
|
206
|
+
|
207
|
+
# only features with above average count among ensemble are selected
|
208
|
+
re.select_feature!
|
209
|
+
|
210
|
+
# number of features after feature selection
|
211
|
+
puts ' # features before (after): ' + re.get_features.size.to_s
|
212
|
+
|
213
|
+
|
214
|
+
# example 2
|
215
|
+
#
|
216
|
+
|
217
|
+
|
218
|
+
# creating an ensemble of feature selectors by using
|
219
|
+
# two feature selection algorithms (InformationGain and Relief_d).
|
220
|
+
# note: can be 2+ algorithms, as long as they are of the same type,
|
221
|
+
# either feature weighting or feature subset selection algorithms
|
222
|
+
|
223
|
+
# test for the type of feature weighting algorithms
|
187
224
|
r1 = FSelector::InformationGain.new
|
188
|
-
r2 = FSelector::Relief_d.new
|
225
|
+
r2 = FSelector::Relief_d.new(10)
|
189
226
|
|
190
|
-
# ensemble
|
191
|
-
re = FSelector::
|
227
|
+
# an ensemble of two feature selectors
|
228
|
+
re = FSelector::EnsembleMultiple.new(r1, r2)
|
192
229
|
|
193
230
|
# read random data
|
194
231
|
re.data_from_random(100, 2, 15, 3, true)
|
@@ -198,18 +235,17 @@ Usage
|
|
198
235
|
re.replace_by_most_seen_value!
|
199
236
|
|
200
237
|
# number of features before feature selection
|
201
|
-
puts '# features (before): ' + re.get_features.size.to_s
|
238
|
+
puts ' # features (before): ' + re.get_features.size.to_s
|
202
239
|
|
203
240
|
# based on the max feature score (z-score standardized) among
|
204
|
-
# an ensemble of feature
|
241
|
+
# an ensemble of feature selectors
|
205
242
|
re.ensemble_by_score(:by_max, :by_zscore)
|
206
243
|
|
207
244
|
# select the top-ranked 3 features
|
208
245
|
re.select_feature_by_rank!('<=3')
|
209
246
|
|
210
247
|
# number of features after feature selection
|
211
|
-
puts '# features (after): ' + re.get_features.size.to_s
|
212
|
-
|
248
|
+
puts ' # features (after): ' + re.get_features.size.to_s
|
213
249
|
|
214
250
|
**3. normalization and discretization before feature selection**
|
215
251
|
|
@@ -233,13 +269,13 @@ Usage
|
|
233
269
|
r2 = FSelector::FCBF.new(0.0, r1.get_data)
|
234
270
|
|
235
271
|
# number of features before feature selection
|
236
|
-
puts '# features (before): ' + r2.get_features.size.to_s
|
272
|
+
puts ' # features (before): ' + r2.get_features.size.to_s
|
237
273
|
|
238
274
|
# feature selection
|
239
275
|
r2.select_feature!
|
240
276
|
|
241
277
|
# number of features after feature selection
|
242
|
-
puts '# features (after): ' + r2.get_features.size.to_s
|
278
|
+
puts ' # features (after): ' + r2.get_features.size.to_s
|
243
279
|
|
244
280
|
**4. see more examples test_*.rb under the test/ directory**
|
245
281
|
|
data/lib/fselector.rb
CHANGED
@@ -11,25 +11,39 @@ module FSelector
|
|
11
11
|
# include ReplaceMissingValues
|
12
12
|
include ReplaceMissingValues
|
13
13
|
|
14
|
+
class << self
|
15
|
+
# class-level instance variable, type of feature selection algorithm.
|
16
|
+
#
|
17
|
+
# @note derived class (except for Base*** class) must set its own type with
|
18
|
+
# one of the following two:
|
19
|
+
# - :feature\_weighting # when algo outputs weight for each feature
|
20
|
+
# - :feature\_subset_selection # when algo outputs a subset of features
|
21
|
+
attr_accessor :algo_type
|
22
|
+
end
|
23
|
+
|
24
|
+
# get the type of feature selection algorithm at class-level
|
25
|
+
def algo_type
|
26
|
+
self.class.algo_type
|
27
|
+
end
|
28
|
+
|
29
|
+
|
14
30
|
# initialize from an existing data structure
|
15
31
|
def initialize(data=nil)
|
16
|
-
@data = data
|
17
|
-
@opts = {} # store non-data information
|
32
|
+
@data = data # store data
|
18
33
|
end
|
19
34
|
|
20
35
|
|
21
36
|
#
|
22
|
-
# iterator for each class, a block must be given
|
37
|
+
# iterator for each class, a block must be given. e.g.
|
23
38
|
#
|
24
|
-
#
|
25
|
-
# self.each_class do |k|
|
39
|
+
# each_class do |k|
|
26
40
|
# puts k
|
27
41
|
# end
|
28
42
|
#
|
29
43
|
def each_class
|
30
44
|
if not block_given?
|
31
|
-
abort "[#{__FILE__}@#{__LINE__}]: "+
|
32
|
-
"block must be given!"
|
45
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
46
|
+
" block must be given!"
|
33
47
|
else
|
34
48
|
get_classes.each { |k| yield k }
|
35
49
|
end
|
@@ -37,17 +51,16 @@ module FSelector
|
|
37
51
|
|
38
52
|
|
39
53
|
#
|
40
|
-
# iterator for each feature, a block must be given
|
54
|
+
# iterator for each feature, a block must be given. e.g.
|
41
55
|
#
|
42
|
-
#
|
43
|
-
# self.each_feature do |f|
|
56
|
+
# each_feature do |f|
|
44
57
|
# puts f
|
45
58
|
# end
|
46
59
|
#
|
47
60
|
def each_feature
|
48
61
|
if not block_given?
|
49
|
-
abort "[#{__FILE__}@#{__LINE__}]: "+
|
50
|
-
"block must be given!"
|
62
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
63
|
+
" block must be given!"
|
51
64
|
else
|
52
65
|
get_features.each { |f| yield f }
|
53
66
|
end
|
@@ -55,10 +68,10 @@ module FSelector
|
|
55
68
|
|
56
69
|
|
57
70
|
#
|
58
|
-
# iterator for each sample with class label,
|
71
|
+
# iterator for each sample with class label,
|
72
|
+
# a block must be given. e.g.
|
59
73
|
#
|
60
|
-
#
|
61
|
-
# self.each_sample do |k, s|
|
74
|
+
# each_sample do |k, s|
|
62
75
|
# print k
|
63
76
|
# s.each { |f, v| print " #{v}" }
|
64
77
|
# puts
|
@@ -66,7 +79,7 @@ module FSelector
|
|
66
79
|
#
|
67
80
|
def each_sample
|
68
81
|
if not block_given?
|
69
|
-
abort "[#{__FILE__}@#{__LINE__}]: "+
|
82
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
70
83
|
" block must be given!"
|
71
84
|
else
|
72
85
|
get_data.each do |k, samples|
|
@@ -114,8 +127,8 @@ module FSelector
|
|
114
127
|
if classes and classes.class == Array
|
115
128
|
@classes = classes
|
116
129
|
else
|
117
|
-
abort "[#{__FILE__}@#{__LINE__}]: "+
|
118
|
-
"classes must be a Array object!"
|
130
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
131
|
+
" classes must be a Array object!"
|
119
132
|
end
|
120
133
|
end
|
121
134
|
|
@@ -125,7 +138,7 @@ module FSelector
|
|
125
138
|
# @return [Array<Symbol>] unique features
|
126
139
|
#
|
127
140
|
def get_features
|
128
|
-
@features ||= @data.
|
141
|
+
@features ||= @data.collect { |x| x[1].collect { |y| y.keys } }.flatten.uniq
|
129
142
|
end
|
130
143
|
|
131
144
|
|
@@ -174,8 +187,8 @@ module FSelector
|
|
174
187
|
if features and features.class == Array
|
175
188
|
@features = features
|
176
189
|
else
|
177
|
-
abort "[#{__FILE__}@#{__LINE__}]: "+
|
178
|
-
"features must be a Array object!"
|
190
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
191
|
+
" features must be a Array object!"
|
179
192
|
end
|
180
193
|
end
|
181
194
|
|
@@ -204,27 +217,40 @@ module FSelector
|
|
204
217
|
# set data and clean relevant variables in case of data change
|
205
218
|
#
|
206
219
|
# @param [Hash] data source data structure
|
220
|
+
# @return [nil] to suppress console echo of data in irb
|
207
221
|
#
|
208
222
|
def set_data(data)
|
209
223
|
if data and data.class == Hash
|
210
|
-
@data = data
|
211
224
|
# clear variables
|
212
|
-
clear_vars
|
225
|
+
clear_vars if @data
|
226
|
+
@data = data # set new data structure
|
213
227
|
else
|
214
|
-
abort "[#{__FILE__}@#{__LINE__}]: "+
|
215
|
-
"data must be a Hash object!"
|
228
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
229
|
+
" data must be a Hash object!"
|
216
230
|
end
|
231
|
+
|
232
|
+
nil # suppress console echo of data in irb
|
217
233
|
end
|
218
234
|
|
219
235
|
|
236
|
+
#
|
220
237
|
# get non-data information for a given key
|
221
|
-
|
222
|
-
|
238
|
+
#
|
239
|
+
# @param [Symbol] key key of non-data
|
240
|
+
# @return [Any] value of non-data, can be any type
|
241
|
+
#
|
242
|
+
# @note return all non-data as a Hash if key == nil
|
243
|
+
#
|
244
|
+
def get_opt(key=nil)
|
245
|
+
key ? @opts[key] : @opts
|
223
246
|
end
|
224
247
|
|
225
248
|
|
226
249
|
# set non-data information as a key-value pair
|
250
|
+
# @param [Symbol] key key of non-data
|
251
|
+
# @param [Any] value value of non-data, can be any type
|
227
252
|
def set_opt(key, value)
|
253
|
+
@opts ||= {} # store non-data information
|
228
254
|
@opts[key] = value
|
229
255
|
end
|
230
256
|
|
@@ -235,7 +261,7 @@ module FSelector
|
|
235
261
|
# @return [Integer] sample size
|
236
262
|
#
|
237
263
|
def get_sample_size
|
238
|
-
@sz ||= get_data.
|
264
|
+
@sz ||= get_classes.inject(0) { |sz, k| sz+get_data[k].size }
|
239
265
|
end
|
240
266
|
|
241
267
|
|
@@ -286,6 +312,13 @@ module FSelector
|
|
286
312
|
# the subset selection type of algorithms, see {file:README.md}
|
287
313
|
#
|
288
314
|
def select_feature!
|
315
|
+
if not self.algo_type == :feature_subset_selection
|
316
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
317
|
+
" select_feature! is the interface for the type of feature subset selection algorithms only. \n" +
|
318
|
+
" please consider select_featue_by_score! or select_feature_by_rank!, \n" +
|
319
|
+
" which is the interface for the type of feature weighting algorithms"
|
320
|
+
end
|
321
|
+
|
289
322
|
# derived class must implement its own one
|
290
323
|
subset = get_feature_subset
|
291
324
|
return if subset.empty?
|
@@ -313,8 +346,16 @@ module FSelector
|
|
313
346
|
# the weighting type of algorithms, see {file:README.md}
|
314
347
|
#
|
315
348
|
def select_feature_by_score!(criterion, my_scores=nil)
|
349
|
+
if not self.algo_type == :feature_weighting
|
350
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
351
|
+
" select_feature_by_score! is the interface for the type of feature weighting algorithms only. \n" +
|
352
|
+
" please consider select_featue!, \n" +
|
353
|
+
" which is the interface for the type of feature subset selection algorithms"
|
354
|
+
end
|
355
|
+
|
316
356
|
# user scores or internal scores
|
317
357
|
scores = my_scores || get_feature_scores
|
358
|
+
return if scores.empty?
|
318
359
|
|
319
360
|
my_data = {}
|
320
361
|
|
@@ -339,8 +380,16 @@ module FSelector
|
|
339
380
|
# the weighting type of algorithms, see {file:README.md}
|
340
381
|
#
|
341
382
|
def select_feature_by_rank!(criterion, my_ranks=nil)
|
383
|
+
if not self.algo_type == :feature_weighting
|
384
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
385
|
+
" select_feature_by_rank! is the interface for the type of feature weighting algorithms only. \n" +
|
386
|
+
" please consider select_featue!, \n" +
|
387
|
+
" which is the interface for the type of feature subset selection algorithms"
|
388
|
+
end
|
389
|
+
|
342
390
|
# user ranks or internal ranks
|
343
391
|
ranks = my_ranks || get_feature_ranks
|
392
|
+
return if ranks.empty?
|
344
393
|
|
345
394
|
my_data = {}
|
346
395
|
|
@@ -355,12 +404,18 @@ module FSelector
|
|
355
404
|
|
356
405
|
private
|
357
406
|
|
358
|
-
#
|
359
|
-
#
|
407
|
+
#
|
408
|
+
# clear variables when data structure is altered, this is
|
409
|
+
# useful when data structure has changed while
|
410
|
+
# you still want to use the same instance
|
411
|
+
#
|
412
|
+
# @note the variables of original data structure (@data) and
|
413
|
+
# algorithm type (@algo_type) are retained
|
414
|
+
#
|
360
415
|
def clear_vars
|
361
416
|
@classes, @features, @fvs = nil, nil, nil
|
362
417
|
@scores, @ranks, @sz = nil, nil, nil
|
363
|
-
@cv, @fvs = nil, nil
|
418
|
+
@cv, @fvs, @opts = nil, nil, {}
|
364
419
|
end
|
365
420
|
|
366
421
|
|
@@ -399,10 +454,10 @@ module FSelector
|
|
399
454
|
end
|
400
455
|
|
401
456
|
|
402
|
-
# get subset
|
457
|
+
# get feature subset, for the type of subset selection algorithms
|
403
458
|
def get_feature_subset
|
404
|
-
abort "[#{__FILE__}@#{__LINE__}]: "+
|
405
|
-
|
459
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
460
|
+
" derived subclass must implement its own get_feature_subset()"
|
406
461
|
end
|
407
462
|
|
408
463
|
|