fselector 1.3.1 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +6 -0
- data/README.md +20 -19
- data/lib/fselector.rb +1 -1
- data/lib/fselector/algo_base/base.rb +3 -3
- data/lib/fselector/algo_both/LasVegasFilter.rb +1 -1
- data/lib/fselector/algo_both/LasVegasIncremental.rb +1 -1
- data/lib/fselector/algo_both/Random.rb +1 -1
- data/lib/fselector/algo_both/RandomSubset.rb +1 -1
- data/lib/fselector/algo_continuous/BSS_WSS.rb +1 -1
- data/lib/fselector/algo_continuous/CFS_c.rb +1 -1
- data/lib/fselector/algo_continuous/F-Test.rb +1 -1
- data/lib/fselector/algo_continuous/KS-CCBF.rb +1 -1
- data/lib/fselector/algo_continuous/KS-Test.rb +1 -1
- data/lib/fselector/algo_continuous/P-Metric.rb +1 -1
- data/lib/fselector/algo_continuous/ReliefF_c.rb +1 -1
- data/lib/fselector/algo_continuous/Relief_c.rb +1 -1
- data/lib/fselector/algo_continuous/T-Score.rb +1 -1
- data/lib/fselector/algo_continuous/WilcoxonRankSum.rb +1 -1
- data/lib/fselector/algo_discrete/Accuracy.rb +3 -1
- data/lib/fselector/algo_discrete/AccuracyBalanced.rb +1 -1
- data/lib/fselector/algo_discrete/BiNormalSeparation.rb +1 -1
- data/lib/fselector/algo_discrete/CFS_d.rb +1 -1
- data/lib/fselector/algo_discrete/ChiSquaredTest.rb +1 -1
- data/lib/fselector/algo_discrete/CorrelationCoefficient.rb +1 -1
- data/lib/fselector/algo_discrete/DocumentFrequency.rb +1 -1
- data/lib/fselector/algo_discrete/F1Measure.rb +1 -1
- data/lib/fselector/algo_discrete/FastCorrelationBasedFilter.rb +1 -1
- data/lib/fselector/algo_discrete/FishersExactTest.rb +1 -1
- data/lib/fselector/algo_discrete/G-Mean.rb +1 -1
- data/lib/fselector/algo_discrete/GSSCoefficient.rb +1 -1
- data/lib/fselector/algo_discrete/GiniIndex.rb +1 -1
- data/lib/fselector/algo_discrete/INTERACT.rb +1 -1
- data/lib/fselector/algo_discrete/InformationGain.rb +1 -1
- data/lib/fselector/algo_discrete/J-Measure.rb +1 -1
- data/lib/fselector/algo_discrete/KL-Divergence.rb +1 -1
- data/lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb +1 -1
- data/lib/fselector/algo_discrete/McNemarsTest.rb +1 -1
- data/lib/fselector/algo_discrete/MutualInformation.rb +1 -1
- data/lib/fselector/algo_discrete/OddsRatio.rb +1 -1
- data/lib/fselector/algo_discrete/OddsRatioNumerator.rb +1 -1
- data/lib/fselector/algo_discrete/Power.rb +1 -1
- data/lib/fselector/algo_discrete/Precision.rb +1 -1
- data/lib/fselector/algo_discrete/ProbabilityRatio.rb +1 -1
- data/lib/fselector/algo_discrete/ReliefF_d.rb +1 -1
- data/lib/fselector/algo_discrete/Relief_d.rb +1 -1
- data/lib/fselector/algo_discrete/Sensitivity.rb +1 -1
- data/lib/fselector/algo_discrete/Specificity.rb +1 -1
- data/lib/fselector/algo_discrete/SymmetricalUncertainty.rb +1 -1
- data/lib/fselector/fileio.rb +12 -1
- metadata +6 -6
data/ChangeLog
CHANGED
data/README.md
CHANGED
@@ -1,21 +1,22 @@
|
|
1
|
-
FSelector: a Ruby gem for feature selection
|
2
|
-
|
1
|
+
FSelector: a Ruby gem for feature selection
|
2
|
+
===========================================
|
3
3
|
|
4
4
|
**Home**: [https://rubygems.org/gems/fselector](https://rubygems.org/gems/fselector)
|
5
5
|
**Source Code**: [https://github.com/need47/fselector](https://github.com/need47/fselector)
|
6
|
-
**Documentation
|
6
|
+
**Documentation**: [http://rubydoc.info/gems/fselector/frames](http://rubydoc.info/gems/fselector/frames)
|
7
|
+
**Publication**: [Bioinformatics, 2012, 28, 2851-2852](http://bioinformatics.oxfordjournals.org/content/28/21/2851)
|
7
8
|
**Author**: Tiejun Cheng
|
8
9
|
**Email**: [need47@gmail.com](mailto:need47@gmail.com)
|
9
10
|
**Copyright**: 2012
|
10
11
|
**License**: MIT License
|
11
|
-
**Latest Version**: 1.
|
12
|
-
**Release Date**: 2012-05
|
12
|
+
**Latest Version**: 1.4.0
|
13
|
+
**Release Date**: 2012-11-05
|
13
14
|
|
14
15
|
Synopsis
|
15
16
|
--------
|
16
17
|
|
17
18
|
FSelector is a Ruby gem that aims to integrate various feature
|
18
|
-
selection
|
19
|
+
selection algorithms and related functions into one single
|
19
20
|
package. Welcome to contact me (need47@gmail.com) if you'd like to
|
20
21
|
contribute your own algorithms or report a bug. FSelector allows user
|
21
22
|
to perform feature selection by using either a single algorithm or an
|
@@ -49,18 +50,18 @@ Feature List
|
|
49
50
|
Accuracy Acc weighting multi-class discrete
|
50
51
|
AccuracyBalanced Acc2 weighting multi-class discrete
|
51
52
|
BiNormalSeparation BNS weighting multi-class discrete
|
52
|
-
CFS_d CFS_d
|
53
|
+
CFS_d CFS_d searching multi-class discrete
|
53
54
|
ChiSquaredTest CHI weighting multi-class discrete
|
54
55
|
CorrelationCoefficient CC weighting multi-class discrete
|
55
56
|
DocumentFrequency DF weighting multi-class discrete
|
56
57
|
F1Measure F1 weighting multi-class discrete
|
57
58
|
FishersExactTest FET weighting multi-class discrete
|
58
|
-
FastCorrelationBasedFilter FCBF
|
59
|
+
FastCorrelationBasedFilter FCBF searching multi-class discrete
|
59
60
|
GiniIndex GI weighting multi-class discrete
|
60
61
|
GMean GM weighting multi-class discrete
|
61
62
|
GSSCoefficient GSS weighting multi-class discrete
|
62
63
|
InformationGain IG weighting multi-class discrete
|
63
|
-
INTERACT INTERACT
|
64
|
+
INTERACT INTERACT searching multi-class discrete
|
64
65
|
JMeasure JM weighting multi-class discrete
|
65
66
|
KLDivergence KLD weighting multi-class discrete
|
66
67
|
MatthewsCorrelationCoefficient MCC, PHI weighting multi-class discrete
|
@@ -78,25 +79,25 @@ Feature List
|
|
78
79
|
Specificity SP weighting multi-class discrete
|
79
80
|
SymmetricalUncertainty SU weighting multi-class discrete
|
80
81
|
BetweenWithinClassesSumOfSquare BSS_WSS weighting multi-class continuous
|
81
|
-
CFS_c CFS_c
|
82
|
+
CFS_c CFS_c searching multi-class continuous
|
82
83
|
FTest FT weighting multi-class continuous
|
83
|
-
KS_CCBF KS_CCBF
|
84
|
+
KS_CCBF KS_CCBF searching multi-class continuous
|
84
85
|
KSTest KST weighting two-class continuous
|
85
86
|
PMetric PM weighting two-class continuous
|
86
87
|
Relief_c Relief_c weighting two-class continuous
|
87
88
|
ReliefF_c ReliefF_c weighting multi-class continuous
|
88
89
|
TScore TS weighting two-class continuous
|
89
90
|
WilcoxonRankSum WRS weighting two-class continuous
|
90
|
-
LasVegasFilter LVF
|
91
|
-
LasVegasIncremental LVI
|
91
|
+
LasVegasFilter LVF searching multi-class discrete, continuous, mixed
|
92
|
+
LasVegasIncremental LVI searching multi-class discrete, continuous, mixed
|
92
93
|
Random Rand weighting multi-class discrete, continuous, mixed
|
93
|
-
RandomSubset RandS
|
94
|
+
RandomSubset RandS searching multi-class discrete, continuous, mixed
|
94
95
|
|
95
96
|
**note for feature selection interface:**
|
96
|
-
there are two types of filter
|
97
|
+
there are two types of filter algorithms: filter\_by\_feature\_weighting and filter\_by\_feature\_searching
|
97
98
|
|
98
|
-
- for
|
99
|
-
- for
|
99
|
+
- for former: use either **select\_feature\_by\_score!** or **select\_feature\_by\_rank!**
|
100
|
+
- for latter: use **select\_feature!**
|
100
101
|
|
101
102
|
**3. feature selection approaches**
|
102
103
|
|
@@ -205,7 +206,7 @@ Usage
|
|
205
206
|
# an ensemble of 40 feature selectors with 90% data by random sampling
|
206
207
|
re = FSelector::EnsembleSingle.new(r, 40, 0.90, :random_sampling)
|
207
208
|
|
208
|
-
# read SPECT data set
|
209
|
+
# read SPECT data set (under the test/ directory)
|
209
210
|
re.data_from_csv('test/SPECT_train.csv')
|
210
211
|
|
211
212
|
# number of features before feature selection
|
@@ -225,7 +226,7 @@ Usage
|
|
225
226
|
# creating an ensemble of feature selectors by using
|
226
227
|
# two feature selection algorithms: InformationGain (IG) and Relief_d.
|
227
228
|
# note: can be 2+ algorithms, as long as they are of the same type,
|
228
|
-
# either
|
229
|
+
# either filter_by_feature_weighting or filter_by_feature_searching
|
229
230
|
|
230
231
|
# test for the type of feature weighting algorithms
|
231
232
|
r1 = FSelector::IG.new
|
data/lib/fselector.rb
CHANGED
@@ -343,7 +343,7 @@ module FSelector
|
|
343
343
|
# the subset selection type of algorithms, see {file:README.md}
|
344
344
|
#
|
345
345
|
def select_feature!
|
346
|
-
if not self.algo_type == :
|
346
|
+
if not self.algo_type == :filter_by_feature_searching
|
347
347
|
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
348
348
|
" select_feature! is the interface for the type of feature subset selection algorithms only. \n" +
|
349
349
|
" please consider select_featue_by_score! or select_feature_by_rank!, \n" +
|
@@ -377,7 +377,7 @@ module FSelector
|
|
377
377
|
# the weighting type of algorithms, see {file:README.md}
|
378
378
|
#
|
379
379
|
def select_feature_by_score!(criterion, my_scores=nil)
|
380
|
-
if not self.algo_type == :
|
380
|
+
if not self.algo_type == :filter_by_feature_weighting
|
381
381
|
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
382
382
|
" select_feature_by_score! is the interface for the type of feature weighting algorithms only. \n" +
|
383
383
|
" please consider select_featue!, \n" +
|
@@ -411,7 +411,7 @@ module FSelector
|
|
411
411
|
# the weighting type of algorithms, see {file:README.md}
|
412
412
|
#
|
413
413
|
def select_feature_by_rank!(criterion, my_ranks=nil)
|
414
|
-
if not self.algo_type == :
|
414
|
+
if not self.algo_type == :filter_by_feature_weighting
|
415
415
|
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
416
416
|
" select_feature_by_rank! is the interface for the type of feature weighting algorithms only. \n" +
|
417
417
|
" please consider select_featue!, \n" +
|
@@ -8,10 +8,12 @@ module FSelector
|
|
8
8
|
# tp+tn A+D
|
9
9
|
# Acc = ------------- = ---------
|
10
10
|
# tp+fn+tn+fp A+B+C+D
|
11
|
+
#
|
12
|
+
# ref: [An extensive empirical study of feature selection metrics for text classification](http://dl.acm.org/citation.cfm?id=944974)
|
11
13
|
#
|
12
14
|
class Accuracy < BaseDiscrete
|
13
15
|
# this algo outputs weight for each feature
|
14
|
-
@algo_type = :
|
16
|
+
@algo_type = :filter_by_feature_weighting
|
15
17
|
|
16
18
|
private
|
17
19
|
|
data/lib/fselector/fileio.rb
CHANGED
@@ -107,6 +107,11 @@ module FileIO
|
|
107
107
|
ifs.close if not ifs == $stdin
|
108
108
|
|
109
109
|
set_data(data)
|
110
|
+
|
111
|
+
# feature name-type pairs
|
112
|
+
each_feature do |f|
|
113
|
+
set_feature_type(f, :numeric)
|
114
|
+
end
|
110
115
|
end # data_from_libsvm
|
111
116
|
|
112
117
|
|
@@ -134,7 +139,13 @@ module FileIO
|
|
134
139
|
each_sample do |k, s|
|
135
140
|
ofs.print "#{k2idx[k]} "
|
136
141
|
s.keys.sort { |x, y| f2idx[x] <=> f2idx[y] }.each do |f|
|
137
|
-
|
142
|
+
if not s[f].is_a? Numeric
|
143
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
144
|
+
" LibSVM format only supports the following feature type: \n"+
|
145
|
+
" integer, real, numeric, float, double, continuous"
|
146
|
+
else
|
147
|
+
ofs.print " #{f2idx[f]}:#{s[f]}" if not s[f].zero? # implicit mode
|
148
|
+
end
|
138
149
|
end
|
139
150
|
ofs.puts
|
140
151
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fselector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05
|
12
|
+
date: 2012-11-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rinruby
|
16
|
-
requirement: &
|
16
|
+
requirement: &24606276 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,8 +21,8 @@ dependencies:
|
|
21
21
|
version: 2.0.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
25
|
-
description: FSelector is a Ruby gem that aims to integrate various feature selection
|
24
|
+
version_requirements: *24606276
|
25
|
+
description: FSelector is a Ruby gem that aims to integrate various feature selection
|
26
26
|
algorithms and related functions into one single package. Welcome to contact me
|
27
27
|
(need47@gmail.com) if you'd like to contribute your own algorithms or report a bug.
|
28
28
|
FSelector allows user to perform feature selection by using either a single algorithm
|
@@ -125,5 +125,5 @@ rubyforge_project:
|
|
125
125
|
rubygems_version: 1.8.17
|
126
126
|
signing_key:
|
127
127
|
specification_version: 3
|
128
|
-
summary: feature selection
|
128
|
+
summary: ! 'FSelector: a Ruby gem for feature selection'
|
129
129
|
test_files: []
|