fselector 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +6 -0
- data/README.md +20 -19
- data/lib/fselector.rb +1 -1
- data/lib/fselector/algo_base/base.rb +3 -3
- data/lib/fselector/algo_both/LasVegasFilter.rb +1 -1
- data/lib/fselector/algo_both/LasVegasIncremental.rb +1 -1
- data/lib/fselector/algo_both/Random.rb +1 -1
- data/lib/fselector/algo_both/RandomSubset.rb +1 -1
- data/lib/fselector/algo_continuous/BSS_WSS.rb +1 -1
- data/lib/fselector/algo_continuous/CFS_c.rb +1 -1
- data/lib/fselector/algo_continuous/F-Test.rb +1 -1
- data/lib/fselector/algo_continuous/KS-CCBF.rb +1 -1
- data/lib/fselector/algo_continuous/KS-Test.rb +1 -1
- data/lib/fselector/algo_continuous/P-Metric.rb +1 -1
- data/lib/fselector/algo_continuous/ReliefF_c.rb +1 -1
- data/lib/fselector/algo_continuous/Relief_c.rb +1 -1
- data/lib/fselector/algo_continuous/T-Score.rb +1 -1
- data/lib/fselector/algo_continuous/WilcoxonRankSum.rb +1 -1
- data/lib/fselector/algo_discrete/Accuracy.rb +3 -1
- data/lib/fselector/algo_discrete/AccuracyBalanced.rb +1 -1
- data/lib/fselector/algo_discrete/BiNormalSeparation.rb +1 -1
- data/lib/fselector/algo_discrete/CFS_d.rb +1 -1
- data/lib/fselector/algo_discrete/ChiSquaredTest.rb +1 -1
- data/lib/fselector/algo_discrete/CorrelationCoefficient.rb +1 -1
- data/lib/fselector/algo_discrete/DocumentFrequency.rb +1 -1
- data/lib/fselector/algo_discrete/F1Measure.rb +1 -1
- data/lib/fselector/algo_discrete/FastCorrelationBasedFilter.rb +1 -1
- data/lib/fselector/algo_discrete/FishersExactTest.rb +1 -1
- data/lib/fselector/algo_discrete/G-Mean.rb +1 -1
- data/lib/fselector/algo_discrete/GSSCoefficient.rb +1 -1
- data/lib/fselector/algo_discrete/GiniIndex.rb +1 -1
- data/lib/fselector/algo_discrete/INTERACT.rb +1 -1
- data/lib/fselector/algo_discrete/InformationGain.rb +1 -1
- data/lib/fselector/algo_discrete/J-Measure.rb +1 -1
- data/lib/fselector/algo_discrete/KL-Divergence.rb +1 -1
- data/lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb +1 -1
- data/lib/fselector/algo_discrete/McNemarsTest.rb +1 -1
- data/lib/fselector/algo_discrete/MutualInformation.rb +1 -1
- data/lib/fselector/algo_discrete/OddsRatio.rb +1 -1
- data/lib/fselector/algo_discrete/OddsRatioNumerator.rb +1 -1
- data/lib/fselector/algo_discrete/Power.rb +1 -1
- data/lib/fselector/algo_discrete/Precision.rb +1 -1
- data/lib/fselector/algo_discrete/ProbabilityRatio.rb +1 -1
- data/lib/fselector/algo_discrete/ReliefF_d.rb +1 -1
- data/lib/fselector/algo_discrete/Relief_d.rb +1 -1
- data/lib/fselector/algo_discrete/Sensitivity.rb +1 -1
- data/lib/fselector/algo_discrete/Specificity.rb +1 -1
- data/lib/fselector/algo_discrete/SymmetricalUncertainty.rb +1 -1
- data/lib/fselector/fileio.rb +12 -1
- metadata +6 -6
data/ChangeLog
CHANGED
data/README.md
CHANGED
@@ -1,21 +1,22 @@
|
|
1
|
-
FSelector: a Ruby gem for feature selection
|
2
|
-
|
1
|
+
FSelector: a Ruby gem for feature selection
|
2
|
+
===========================================
|
3
3
|
|
4
4
|
**Home**: [https://rubygems.org/gems/fselector](https://rubygems.org/gems/fselector)
|
5
5
|
**Source Code**: [https://github.com/need47/fselector](https://github.com/need47/fselector)
|
6
|
-
**Documentation
|
6
|
+
**Documentation**: [http://rubydoc.info/gems/fselector/frames](http://rubydoc.info/gems/fselector/frames)
|
7
|
+
**Publication**: [Bioinformatics, 2012, 28, 2851-2852](http://bioinformatics.oxfordjournals.org/content/28/21/2851)
|
7
8
|
**Author**: Tiejun Cheng
|
8
9
|
**Email**: [need47@gmail.com](mailto:need47@gmail.com)
|
9
10
|
**Copyright**: 2012
|
10
11
|
**License**: MIT License
|
11
|
-
**Latest Version**: 1.
|
12
|
-
**Release Date**: 2012-05
|
12
|
+
**Latest Version**: 1.4.0
|
13
|
+
**Release Date**: 2012-11-05
|
13
14
|
|
14
15
|
Synopsis
|
15
16
|
--------
|
16
17
|
|
17
18
|
FSelector is a Ruby gem that aims to integrate various feature
|
18
|
-
selection
|
19
|
+
selection algorithms and related functions into one single
|
19
20
|
package. Welcome to contact me (need47@gmail.com) if you'd like to
|
20
21
|
contribute your own algorithms or report a bug. FSelector allows user
|
21
22
|
to perform feature selection by using either a single algorithm or an
|
@@ -49,18 +50,18 @@ Feature List
|
|
49
50
|
Accuracy Acc weighting multi-class discrete
|
50
51
|
AccuracyBalanced Acc2 weighting multi-class discrete
|
51
52
|
BiNormalSeparation BNS weighting multi-class discrete
|
52
|
-
CFS_d CFS_d
|
53
|
+
CFS_d CFS_d searching multi-class discrete
|
53
54
|
ChiSquaredTest CHI weighting multi-class discrete
|
54
55
|
CorrelationCoefficient CC weighting multi-class discrete
|
55
56
|
DocumentFrequency DF weighting multi-class discrete
|
56
57
|
F1Measure F1 weighting multi-class discrete
|
57
58
|
FishersExactTest FET weighting multi-class discrete
|
58
|
-
FastCorrelationBasedFilter FCBF
|
59
|
+
FastCorrelationBasedFilter FCBF searching multi-class discrete
|
59
60
|
GiniIndex GI weighting multi-class discrete
|
60
61
|
GMean GM weighting multi-class discrete
|
61
62
|
GSSCoefficient GSS weighting multi-class discrete
|
62
63
|
InformationGain IG weighting multi-class discrete
|
63
|
-
INTERACT INTERACT
|
64
|
+
INTERACT INTERACT searching multi-class discrete
|
64
65
|
JMeasure JM weighting multi-class discrete
|
65
66
|
KLDivergence KLD weighting multi-class discrete
|
66
67
|
MatthewsCorrelationCoefficient MCC, PHI weighting multi-class discrete
|
@@ -78,25 +79,25 @@ Feature List
|
|
78
79
|
Specificity SP weighting multi-class discrete
|
79
80
|
SymmetricalUncertainty SU weighting multi-class discrete
|
80
81
|
BetweenWithinClassesSumOfSquare BSS_WSS weighting multi-class continuous
|
81
|
-
CFS_c CFS_c
|
82
|
+
CFS_c CFS_c searching multi-class continuous
|
82
83
|
FTest FT weighting multi-class continuous
|
83
|
-
KS_CCBF KS_CCBF
|
84
|
+
KS_CCBF KS_CCBF searching multi-class continuous
|
84
85
|
KSTest KST weighting two-class continuous
|
85
86
|
PMetric PM weighting two-class continuous
|
86
87
|
Relief_c Relief_c weighting two-class continuous
|
87
88
|
ReliefF_c ReliefF_c weighting multi-class continuous
|
88
89
|
TScore TS weighting two-class continuous
|
89
90
|
WilcoxonRankSum WRS weighting two-class continuous
|
90
|
-
LasVegasFilter LVF
|
91
|
-
LasVegasIncremental LVI
|
91
|
+
LasVegasFilter LVF searching multi-class discrete, continuous, mixed
|
92
|
+
LasVegasIncremental LVI searching multi-class discrete, continuous, mixed
|
92
93
|
Random Rand weighting multi-class discrete, continuous, mixed
|
93
|
-
RandomSubset RandS
|
94
|
+
RandomSubset RandS searching multi-class discrete, continuous, mixed
|
94
95
|
|
95
96
|
**note for feature selection interface:**
|
96
|
-
there are two types of filter
|
97
|
+
there are two types of filter algorithms: filter\_by\_feature\_weighting and filter\_by\_feature\_searching
|
97
98
|
|
98
|
-
- for
|
99
|
-
- for
|
99
|
+
- for former: use either **select\_feature\_by\_score!** or **select\_feature\_by\_rank!**
|
100
|
+
- for latter: use **select\_feature!**
|
100
101
|
|
101
102
|
**3. feature selection approaches**
|
102
103
|
|
@@ -205,7 +206,7 @@ Usage
|
|
205
206
|
# an ensemble of 40 feature selectors with 90% data by random sampling
|
206
207
|
re = FSelector::EnsembleSingle.new(r, 40, 0.90, :random_sampling)
|
207
208
|
|
208
|
-
# read SPECT data set
|
209
|
+
# read SPECT data set (under the test/ directory)
|
209
210
|
re.data_from_csv('test/SPECT_train.csv')
|
210
211
|
|
211
212
|
# number of features before feature selection
|
@@ -225,7 +226,7 @@ Usage
|
|
225
226
|
# creating an ensemble of feature selectors by using
|
226
227
|
# two feature selection algorithms: InformationGain (IG) and Relief_d.
|
227
228
|
# note: can be 2+ algorithms, as long as they are of the same type,
|
228
|
-
# either
|
229
|
+
# either filter_by_feature_weighting or filter_by_feature_searching
|
229
230
|
|
230
231
|
# test for the type of feature weighting algorithms
|
231
232
|
r1 = FSelector::IG.new
|
data/lib/fselector.rb
CHANGED
@@ -343,7 +343,7 @@ module FSelector
|
|
343
343
|
# the subset selection type of algorithms, see {file:README.md}
|
344
344
|
#
|
345
345
|
def select_feature!
|
346
|
-
if not self.algo_type == :
|
346
|
+
if not self.algo_type == :filter_by_feature_searching
|
347
347
|
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
348
348
|
" select_feature! is the interface for the type of feature subset selection algorithms only. \n" +
|
349
349
|
" please consider select_featue_by_score! or select_feature_by_rank!, \n" +
|
@@ -377,7 +377,7 @@ module FSelector
|
|
377
377
|
# the weighting type of algorithms, see {file:README.md}
|
378
378
|
#
|
379
379
|
def select_feature_by_score!(criterion, my_scores=nil)
|
380
|
-
if not self.algo_type == :
|
380
|
+
if not self.algo_type == :filter_by_feature_weighting
|
381
381
|
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
382
382
|
" select_feature_by_score! is the interface for the type of feature weighting algorithms only. \n" +
|
383
383
|
" please consider select_featue!, \n" +
|
@@ -411,7 +411,7 @@ module FSelector
|
|
411
411
|
# the weighting type of algorithms, see {file:README.md}
|
412
412
|
#
|
413
413
|
def select_feature_by_rank!(criterion, my_ranks=nil)
|
414
|
-
if not self.algo_type == :
|
414
|
+
if not self.algo_type == :filter_by_feature_weighting
|
415
415
|
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
416
416
|
" select_feature_by_rank! is the interface for the type of feature weighting algorithms only. \n" +
|
417
417
|
" please consider select_featue!, \n" +
|
@@ -8,10 +8,12 @@ module FSelector
|
|
8
8
|
# tp+tn A+D
|
9
9
|
# Acc = ------------- = ---------
|
10
10
|
# tp+fn+tn+fp A+B+C+D
|
11
|
+
#
|
12
|
+
# ref: [An extensive empirical study of feature selection metrics for text classification](http://dl.acm.org/citation.cfm?id=944974)
|
11
13
|
#
|
12
14
|
class Accuracy < BaseDiscrete
|
13
15
|
# this algo outputs weight for each feature
|
14
|
-
@algo_type = :
|
16
|
+
@algo_type = :filter_by_feature_weighting
|
15
17
|
|
16
18
|
private
|
17
19
|
|
data/lib/fselector/fileio.rb
CHANGED
@@ -107,6 +107,11 @@ module FileIO
|
|
107
107
|
ifs.close if not ifs == $stdin
|
108
108
|
|
109
109
|
set_data(data)
|
110
|
+
|
111
|
+
# feature name-type pairs
|
112
|
+
each_feature do |f|
|
113
|
+
set_feature_type(f, :numeric)
|
114
|
+
end
|
110
115
|
end # data_from_libsvm
|
111
116
|
|
112
117
|
|
@@ -134,7 +139,13 @@ module FileIO
|
|
134
139
|
each_sample do |k, s|
|
135
140
|
ofs.print "#{k2idx[k]} "
|
136
141
|
s.keys.sort { |x, y| f2idx[x] <=> f2idx[y] }.each do |f|
|
137
|
-
|
142
|
+
if not s[f].is_a? Numeric
|
143
|
+
abort "[#{__FILE__}@#{__LINE__}]: \n"+
|
144
|
+
" LibSVM format only supports the following feature type: \n"+
|
145
|
+
" integer, real, numeric, float, double, continuous"
|
146
|
+
else
|
147
|
+
ofs.print " #{f2idx[f]}:#{s[f]}" if not s[f].zero? # implicit mode
|
148
|
+
end
|
138
149
|
end
|
139
150
|
ofs.puts
|
140
151
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fselector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05
|
12
|
+
date: 2012-11-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rinruby
|
16
|
-
requirement: &
|
16
|
+
requirement: &24606276 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,8 +21,8 @@ dependencies:
|
|
21
21
|
version: 2.0.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
25
|
-
description: FSelector is a Ruby gem that aims to integrate various feature selection
|
24
|
+
version_requirements: *24606276
|
25
|
+
description: FSelector is a Ruby gem that aims to integrate various feature selection
|
26
26
|
algorithms and related functions into one single package. Welcome to contact me
|
27
27
|
(need47@gmail.com) if you'd like to contribute your own algorithms or report a bug.
|
28
28
|
FSelector allows user to perform feature selection by using either a single algorithm
|
@@ -125,5 +125,5 @@ rubyforge_project:
|
|
125
125
|
rubygems_version: 1.8.17
|
126
126
|
signing_key:
|
127
127
|
specification_version: 3
|
128
|
-
summary: feature selection
|
128
|
+
summary: ! 'FSelector: a Ruby gem for feature selection'
|
129
129
|
test_files: []
|