fselector 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +9 -0
- data/README.md +12 -10
- data/lib/fselector.rb +2 -1
- data/lib/fselector/algo_continuous/BSS_WSS.rb +48 -0
- data/lib/fselector/algo_continuous/PMetric.rb +3 -7
- data/lib/fselector/algo_continuous/TScore.rb +3 -7
- data/lib/fselector/algo_continuous/WilcoxonRankSum.rb +49 -0
- metadata +6 -4
data/ChangeLog
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
2012-04-20 Tiejun Cheng <need47@gmail.com>
|
2
|
+
|
3
|
+
* update to version 0.7.0
|
4
|
+
|
5
|
+
2012-04-19 Tiejun Cheng <need47@gmail.com>
|
6
|
+
|
7
|
+
* add new algorithm BetweenWithinClassesSumOfSquare (BSS_WSS) for continuous feature
|
8
|
+
* add new algorithm WilcoxonRankSum (WRS) for continuous feature
|
9
|
+
|
1
10
|
2012-04-18 Tiejun Cheng <need47@gmail.com>
|
2
11
|
|
3
12
|
* require the RinRuby gem (http://rinruby.ddahl.org) to access the
|
data/README.md
CHANGED
@@ -3,13 +3,13 @@ FSelector: a Ruby gem for feature selection and ranking
|
|
3
3
|
|
4
4
|
**Home** [https://rubygems.org/gems/fselector](https://rubygems.org/gems/fselector)
|
5
5
|
**Source Code**: [https://github.com/need47/fselector](https://github.com/need47/fselector)
|
6
|
-
**Documentation** [http://rubydoc.info/
|
6
|
+
**Documentation** [http://rubydoc.info/gems/fselector/frames](http://rubydoc.info/gems/fselector/frames)
|
7
7
|
**Author**: Tiejun Cheng
|
8
8
|
**Email**: [need47@gmail.com](mailto:need47@gmail.com)
|
9
9
|
**Copyright**: 2012
|
10
10
|
**License**: MIT License
|
11
|
-
**Latest Version**: 0.
|
12
|
-
**Release Date**: April
|
11
|
+
**Latest Version**: 0.7.0
|
12
|
+
**Release Date**: April 20 2012
|
13
13
|
|
14
14
|
Synopsis
|
15
15
|
--------
|
@@ -41,8 +41,8 @@ Feature List
|
|
41
41
|
|
42
42
|
**2. available feature selection/ranking algorithms**
|
43
43
|
|
44
|
-
algorithm alias feature_type
|
45
|
-
|
44
|
+
algorithm alias feature_type applicability
|
45
|
+
--------------------------------------------------------------------------------------
|
46
46
|
Accuracy Acc discrete
|
47
47
|
AccuracyBalanced Acc2 discrete
|
48
48
|
BiNormalSeparation BNS discrete
|
@@ -67,16 +67,18 @@ Feature List
|
|
67
67
|
ProbabilityRatio PR discrete
|
68
68
|
Random Random discrete
|
69
69
|
Recall Recall discrete
|
70
|
-
Relief_d Relief_d discrete
|
70
|
+
Relief_d Relief_d discrete two-class, no missing data
|
71
71
|
ReliefF_d ReliefF_d discrete
|
72
72
|
Sensitivity SN, Recall discrete
|
73
73
|
Specificity SP discrete
|
74
74
|
SymmetricalUncertainty SU discrete
|
75
|
+
BetweenWithinClassesSumOfSquare BSS_WSS continuous
|
75
76
|
CFS_c CFS_c continuous
|
76
|
-
PMetric PM continuous
|
77
|
-
Relief_c Relief_c continuous
|
77
|
+
PMetric PM continuous two-class
|
78
|
+
Relief_c Relief_c continuous two-class, no missing data
|
78
79
|
ReliefF_c ReliefF_c continuous
|
79
|
-
TScore TS continuous
|
80
|
+
TScore TS continuous two-class
|
81
|
+
WilcoxonRankSum WRS continuous two-class
|
80
82
|
|
81
83
|
**note for feature selection interace:**
|
82
84
|
- for the algorithms of CFS\_d, FCBF and CFS\_c, use select\_feature!
|
@@ -120,7 +122,7 @@ To install FSelector, use the following command:
|
|
120
122
|
as a seemless bridge to access the statistical routines in the R package (http://www.r-project.org),
|
121
123
|
which will greatly expand the inclusion of algorithms to FSelector, especially for those relying
|
122
124
|
on statistical test. To this end, please pre-install the R package. RinRuby should have been
|
123
|
-
auto-installed with FSelector.
|
125
|
+
auto-installed with FSelector by the above command.
|
124
126
|
|
125
127
|
Usage
|
126
128
|
-----
|
data/lib/fselector.rb
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
#
|
6
|
+
# between-within classes sum of squares (BSS/WSS) for continous feature
|
7
|
+
#
|
8
|
+
# sigma_i sigma_k I(yi=k)(xbar_kj - xbar_xj)2
|
9
|
+
# BSS_WSS(f) = ----------------------------------------------
|
10
|
+
# sigma_i sigma_k I(yi=k)(x_ij - xbar_kj)2
|
11
|
+
#
|
12
|
+
# ref: [Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data](http://amstat.tandfonline.com/doi/abs/10.1198/016214502753479248)
|
13
|
+
#
|
14
|
+
class BetweenWithinClassesSumOfSquare < BaseContinuous
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
# calculate contribution of each feature (f) across all classes
|
19
|
+
def calc_contribution(f)
|
20
|
+
xbar_xj = get_feature_values(f).mean
|
21
|
+
|
22
|
+
a, b, s = 0.0, 0.0, 0.0
|
23
|
+
|
24
|
+
each_sample do |k, sam|
|
25
|
+
# get the feature values for class k, without missing values
|
26
|
+
xbar_kj = get_feature_values(f, nil, k).mean
|
27
|
+
a += (xbar_kj - xbar_xj)**2
|
28
|
+
|
29
|
+
if sam.has_key? f
|
30
|
+
x_ij = sam[f]
|
31
|
+
b += (x_ij - xbar_kj)**2
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
s = a/b if not b.zero?
|
36
|
+
|
37
|
+
set_feature_score(f, :BEST, s)
|
38
|
+
end # calc_contribution
|
39
|
+
|
40
|
+
|
41
|
+
end # class
|
42
|
+
|
43
|
+
|
44
|
+
# shortcut so that you can use FSelector::BSS_WSS instead of FSelector::BetweenWithinClassesSumOfSquare
|
45
|
+
BSS_WSS = BetweenWithinClassesSumOfSquare
|
46
|
+
|
47
|
+
|
48
|
+
end # module
|
@@ -24,14 +24,10 @@ module FSelector
|
|
24
24
|
"suitable only for two-class problem with continuous feature"
|
25
25
|
end
|
26
26
|
|
27
|
-
# collect data for class 1 and 2, respectively
|
28
|
-
s1, s2 = [], []
|
27
|
+
# collect data for class 1 and 2, respectively
|
29
28
|
k1, k2 = get_classes
|
30
|
-
|
31
|
-
|
32
|
-
s1 << ss[f] if k == k1 and ss.has_key? f
|
33
|
-
s2 << ss[f] if k == k2 and ss.has_key? f
|
34
|
-
end
|
29
|
+
s1 = get_feature_values(f, nil, k1)
|
30
|
+
s2 = get_feature_values(f, nil, k2)
|
35
31
|
|
36
32
|
# calc
|
37
33
|
s = 0.0
|
@@ -24,14 +24,10 @@ module FSelector
|
|
24
24
|
"suitable only for two-class problem with continuous feature"
|
25
25
|
end
|
26
26
|
|
27
|
-
# collect data for class 1 and 2, respectively
|
28
|
-
s1, s2 = [], []
|
27
|
+
# collect data for class 1 and 2, respectively
|
29
28
|
k1, k2 = get_classes
|
30
|
-
|
31
|
-
|
32
|
-
s1 << ss[f] if k == k1 and ss.has_key? f
|
33
|
-
s2 << ss[f] if k == k2 and ss.has_key? f
|
34
|
-
end
|
29
|
+
s1 = get_feature_values(f, nil, k1)
|
30
|
+
s2 = get_feature_values(f, nil, k2)
|
35
31
|
|
36
32
|
# calc
|
37
33
|
n1, n2 = s1.size, s2.size
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
#
|
6
|
+
# Wilcoxon Rank Sum (WRS) for continous feature
|
7
|
+
#
|
8
|
+
# @note WRS applicable only to two-class problems
|
9
|
+
#
|
10
|
+
# for WRS (p-value), the smaller, the better, but we intentionally negate it
|
11
|
+
# so that the larger is always the better (consistent with other algorithms).
|
12
|
+
# R equivalent: wilcox.test
|
13
|
+
#
|
14
|
+
# ref: [An Efficient and Robust Statistical Modeling Approach to Discover Differentially Expressed Genes Using Genomic Expression Profiles](http://genome.cshlp.org/content/11/7/1227)
|
15
|
+
#
|
16
|
+
class WilcoxonRankSum < BaseContinuous
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# calculate contribution of each feature (f) across all classes
|
21
|
+
def calc_contribution(f)
|
22
|
+
if not get_classes.size == 2
|
23
|
+
abort "[#{__FILE__}@#{__LINE__}]: "+
|
24
|
+
"suitable only for two-class problem with continuous feature"
|
25
|
+
end
|
26
|
+
|
27
|
+
# collect data for class 1 and 2, respectively
|
28
|
+
k1, k2 = get_classes
|
29
|
+
R.s1 = get_feature_values(f, nil, k1) # class 1
|
30
|
+
R.s2 = get_feature_values(f, nil, k2) # class 2
|
31
|
+
|
32
|
+
# Wilcoxon rank sum test
|
33
|
+
R.eval "rv <- wilcox.test(s1, s2)$p.value"
|
34
|
+
|
35
|
+
# intensionally negate it
|
36
|
+
s = -1.0 * R.rv # pull the p-value from R
|
37
|
+
|
38
|
+
set_feature_score(f, :BEST, s)
|
39
|
+
end # calc_contribution
|
40
|
+
|
41
|
+
|
42
|
+
end # class
|
43
|
+
|
44
|
+
|
45
|
+
# shortcut so that you can use FSelector::WRS instead of FSelector::WilcoxonRankSum
|
46
|
+
WRS = WilcoxonRankSum
|
47
|
+
|
48
|
+
|
49
|
+
end # module
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fselector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rinruby
|
16
|
-
requirement: &
|
16
|
+
requirement: &26347848 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: 2.0.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *26347848
|
25
25
|
description: FSelector is a Ruby gem that aims to integrate various feature selection/ranking
|
26
26
|
algorithms and related functions into one single package. Welcome to contact me
|
27
27
|
(need47@gmail.com) if you'd like to contribute your own algorithms or report a bug.
|
@@ -49,11 +49,13 @@ files:
|
|
49
49
|
- lib/fselector/algo_base/base_discrete.rb
|
50
50
|
- lib/fselector/algo_base/base_Relief.rb
|
51
51
|
- lib/fselector/algo_base/base_ReliefF.rb
|
52
|
+
- lib/fselector/algo_continuous/BSS_WSS.rb
|
52
53
|
- lib/fselector/algo_continuous/CFS_c.rb
|
53
54
|
- lib/fselector/algo_continuous/PMetric.rb
|
54
55
|
- lib/fselector/algo_continuous/ReliefF_c.rb
|
55
56
|
- lib/fselector/algo_continuous/Relief_c.rb
|
56
57
|
- lib/fselector/algo_continuous/TScore.rb
|
58
|
+
- lib/fselector/algo_continuous/WilcoxonRankSum.rb
|
57
59
|
- lib/fselector/algo_discrete/Accuracy.rb
|
58
60
|
- lib/fselector/algo_discrete/AccuracyBalanced.rb
|
59
61
|
- lib/fselector/algo_discrete/BiNormalSeparation.rb
|