fselector 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +9 -0
- data/README.md +12 -10
- data/lib/fselector.rb +2 -1
- data/lib/fselector/algo_continuous/BSS_WSS.rb +48 -0
- data/lib/fselector/algo_continuous/PMetric.rb +3 -7
- data/lib/fselector/algo_continuous/TScore.rb +3 -7
- data/lib/fselector/algo_continuous/WilcoxonRankSum.rb +49 -0
- metadata +6 -4
data/ChangeLog
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
2012-04-20 Tiejun Cheng <need47@gmail.com>
|
2
|
+
|
3
|
+
* update to version 0.7.0
|
4
|
+
|
5
|
+
2012-04-19 Tiejun Cheng <need47@gmail.com>
|
6
|
+
|
7
|
+
* add new algorithm BetweenWithinClassesSumOfSquare (BSS_WSS) for continuous feature
|
8
|
+
* add new algorithm WilcoxonRankSum (WRS) for continuous feature
|
9
|
+
|
1
10
|
2012-04-18 Tiejun Cheng <need47@gmail.com>
|
2
11
|
|
3
12
|
* require the RinRuby gem (http://rinruby.ddahl.org) to access the
|
data/README.md
CHANGED
@@ -3,13 +3,13 @@ FSelector: a Ruby gem for feature selection and ranking
|
|
3
3
|
|
4
4
|
**Home** [https://rubygems.org/gems/fselector](https://rubygems.org/gems/fselector)
|
5
5
|
**Source Code**: [https://github.com/need47/fselector](https://github.com/need47/fselector)
|
6
|
-
**Documentation** [http://rubydoc.info/
|
6
|
+
**Documentation** [http://rubydoc.info/gems/fselector/frames](http://rubydoc.info/gems/fselector/frames)
|
7
7
|
**Author**: Tiejun Cheng
|
8
8
|
**Email**: [need47@gmail.com](mailto:need47@gmail.com)
|
9
9
|
**Copyright**: 2012
|
10
10
|
**License**: MIT License
|
11
|
-
**Latest Version**: 0.
|
12
|
-
**Release Date**: April
|
11
|
+
**Latest Version**: 0.7.0
|
12
|
+
**Release Date**: April 20 2012
|
13
13
|
|
14
14
|
Synopsis
|
15
15
|
--------
|
@@ -41,8 +41,8 @@ Feature List
|
|
41
41
|
|
42
42
|
**2. available feature selection/ranking algorithms**
|
43
43
|
|
44
|
-
algorithm alias feature_type
|
45
|
-
|
44
|
+
algorithm alias feature_type applicability
|
45
|
+
--------------------------------------------------------------------------------------
|
46
46
|
Accuracy Acc discrete
|
47
47
|
AccuracyBalanced Acc2 discrete
|
48
48
|
BiNormalSeparation BNS discrete
|
@@ -67,16 +67,18 @@ Feature List
|
|
67
67
|
ProbabilityRatio PR discrete
|
68
68
|
Random Random discrete
|
69
69
|
Recall Recall discrete
|
70
|
-
Relief_d Relief_d discrete
|
70
|
+
Relief_d Relief_d discrete two-class, no missing data
|
71
71
|
ReliefF_d ReliefF_d discrete
|
72
72
|
Sensitivity SN, Recall discrete
|
73
73
|
Specificity SP discrete
|
74
74
|
SymmetricalUncertainty SU discrete
|
75
|
+
BetweenWithinClassesSumOfSquare BSS_WSS continuous
|
75
76
|
CFS_c CFS_c continuous
|
76
|
-
PMetric PM continuous
|
77
|
-
Relief_c Relief_c continuous
|
77
|
+
PMetric PM continuous two-class
|
78
|
+
Relief_c Relief_c continuous two-class, no missing data
|
78
79
|
ReliefF_c ReliefF_c continuous
|
79
|
-
TScore TS continuous
|
80
|
+
TScore TS continuous two-class
|
81
|
+
WilcoxonRankSum WRS continuous two-class
|
80
82
|
|
81
83
|
**note for feature selection interace:**
|
82
84
|
- for the algorithms of CFS\_d, FCBF and CFS\_c, use select\_feature!
|
@@ -120,7 +122,7 @@ To install FSelector, use the following command:
|
|
120
122
|
as a seemless bridge to access the statistical routines in the R package (http://www.r-project.org),
|
121
123
|
which will greatly expand the inclusion of algorithms to FSelector, especially for those relying
|
122
124
|
on statistical test. To this end, please pre-install the R package. RinRuby should have been
|
123
|
-
auto-installed with FSelector.
|
125
|
+
auto-installed with FSelector by the above command.
|
124
126
|
|
125
127
|
Usage
|
126
128
|
-----
|
data/lib/fselector.rb
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
#
|
6
|
+
# between-within classes sum of squares (BSS/WSS) for continous feature
|
7
|
+
#
|
8
|
+
# sigma_i sigma_k I(yi=k)(xbar_kj - xbar_xj)2
|
9
|
+
# BSS_WSS(f) = ----------------------------------------------
|
10
|
+
# sigma_i sigma_k I(yi=k)(x_ij - xbar_kj)2
|
11
|
+
#
|
12
|
+
# ref: [Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data](http://amstat.tandfonline.com/doi/abs/10.1198/016214502753479248)
|
13
|
+
#
|
14
|
+
class BetweenWithinClassesSumOfSquare < BaseContinuous
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
# calculate contribution of each feature (f) across all classes
|
19
|
+
def calc_contribution(f)
|
20
|
+
xbar_xj = get_feature_values(f).mean
|
21
|
+
|
22
|
+
a, b, s = 0.0, 0.0, 0.0
|
23
|
+
|
24
|
+
each_sample do |k, sam|
|
25
|
+
# get the feature values for class k, without missing values
|
26
|
+
xbar_kj = get_feature_values(f, nil, k).mean
|
27
|
+
a += (xbar_kj - xbar_xj)**2
|
28
|
+
|
29
|
+
if sam.has_key? f
|
30
|
+
x_ij = sam[f]
|
31
|
+
b += (x_ij - xbar_kj)**2
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
s = a/b if not b.zero?
|
36
|
+
|
37
|
+
set_feature_score(f, :BEST, s)
|
38
|
+
end # calc_contribution
|
39
|
+
|
40
|
+
|
41
|
+
end # class
|
42
|
+
|
43
|
+
|
44
|
+
# shortcut so that you can use FSelector::BSS_WSS instead of FSelector::BetweenWithinClassesSumOfSquare
|
45
|
+
BSS_WSS = BetweenWithinClassesSumOfSquare
|
46
|
+
|
47
|
+
|
48
|
+
end # module
|
@@ -24,14 +24,10 @@ module FSelector
|
|
24
24
|
"suitable only for two-class problem with continuous feature"
|
25
25
|
end
|
26
26
|
|
27
|
-
# collect data for class 1 and 2, respectively
|
28
|
-
s1, s2 = [], []
|
27
|
+
# collect data for class 1 and 2, respectively
|
29
28
|
k1, k2 = get_classes
|
30
|
-
|
31
|
-
|
32
|
-
s1 << ss[f] if k == k1 and ss.has_key? f
|
33
|
-
s2 << ss[f] if k == k2 and ss.has_key? f
|
34
|
-
end
|
29
|
+
s1 = get_feature_values(f, nil, k1)
|
30
|
+
s2 = get_feature_values(f, nil, k2)
|
35
31
|
|
36
32
|
# calc
|
37
33
|
s = 0.0
|
@@ -24,14 +24,10 @@ module FSelector
|
|
24
24
|
"suitable only for two-class problem with continuous feature"
|
25
25
|
end
|
26
26
|
|
27
|
-
# collect data for class 1 and 2, respectively
|
28
|
-
s1, s2 = [], []
|
27
|
+
# collect data for class 1 and 2, respectively
|
29
28
|
k1, k2 = get_classes
|
30
|
-
|
31
|
-
|
32
|
-
s1 << ss[f] if k == k1 and ss.has_key? f
|
33
|
-
s2 << ss[f] if k == k2 and ss.has_key? f
|
34
|
-
end
|
29
|
+
s1 = get_feature_values(f, nil, k1)
|
30
|
+
s2 = get_feature_values(f, nil, k2)
|
35
31
|
|
36
32
|
# calc
|
37
33
|
n1, n2 = s1.size, s2.size
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#
|
2
|
+
# FSelector: a Ruby gem for feature selection and ranking
|
3
|
+
#
|
4
|
+
module FSelector
|
5
|
+
#
|
6
|
+
# Wilcoxon Rank Sum (WRS) for continous feature
|
7
|
+
#
|
8
|
+
# @note WRS applicable only to two-class problems
|
9
|
+
#
|
10
|
+
# for WRS (p-value), the smaller, the better, but we intentionally negate it
|
11
|
+
# so that the larger is always the better (consistent with other algorithms).
|
12
|
+
# R equivalent: wilcox.test
|
13
|
+
#
|
14
|
+
# ref: [An Efficient and Robust Statistical Modeling Approach to Discover Differentially Expressed Genes Using Genomic Expression Profiles](http://genome.cshlp.org/content/11/7/1227)
|
15
|
+
#
|
16
|
+
class WilcoxonRankSum < BaseContinuous
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# calculate contribution of each feature (f) across all classes
|
21
|
+
def calc_contribution(f)
|
22
|
+
if not get_classes.size == 2
|
23
|
+
abort "[#{__FILE__}@#{__LINE__}]: "+
|
24
|
+
"suitable only for two-class problem with continuous feature"
|
25
|
+
end
|
26
|
+
|
27
|
+
# collect data for class 1 and 2, respectively
|
28
|
+
k1, k2 = get_classes
|
29
|
+
R.s1 = get_feature_values(f, nil, k1) # class 1
|
30
|
+
R.s2 = get_feature_values(f, nil, k2) # class 2
|
31
|
+
|
32
|
+
# Wilcoxon rank sum test
|
33
|
+
R.eval "rv <- wilcox.test(s1, s2)$p.value"
|
34
|
+
|
35
|
+
# intensionally negate it
|
36
|
+
s = -1.0 * R.rv # pull the p-value from R
|
37
|
+
|
38
|
+
set_feature_score(f, :BEST, s)
|
39
|
+
end # calc_contribution
|
40
|
+
|
41
|
+
|
42
|
+
end # class
|
43
|
+
|
44
|
+
|
45
|
+
# shortcut so that you can use FSelector::WRS instead of FSelector::WilcoxonRankSum
|
46
|
+
WRS = WilcoxonRankSum
|
47
|
+
|
48
|
+
|
49
|
+
end # module
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fselector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rinruby
|
16
|
-
requirement: &
|
16
|
+
requirement: &26347848 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: 2.0.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *26347848
|
25
25
|
description: FSelector is a Ruby gem that aims to integrate various feature selection/ranking
|
26
26
|
algorithms and related functions into one single package. Welcome to contact me
|
27
27
|
(need47@gmail.com) if you'd like to contribute your own algorithms or report a bug.
|
@@ -49,11 +49,13 @@ files:
|
|
49
49
|
- lib/fselector/algo_base/base_discrete.rb
|
50
50
|
- lib/fselector/algo_base/base_Relief.rb
|
51
51
|
- lib/fselector/algo_base/base_ReliefF.rb
|
52
|
+
- lib/fselector/algo_continuous/BSS_WSS.rb
|
52
53
|
- lib/fselector/algo_continuous/CFS_c.rb
|
53
54
|
- lib/fselector/algo_continuous/PMetric.rb
|
54
55
|
- lib/fselector/algo_continuous/ReliefF_c.rb
|
55
56
|
- lib/fselector/algo_continuous/Relief_c.rb
|
56
57
|
- lib/fselector/algo_continuous/TScore.rb
|
58
|
+
- lib/fselector/algo_continuous/WilcoxonRankSum.rb
|
57
59
|
- lib/fselector/algo_discrete/Accuracy.rb
|
58
60
|
- lib/fselector/algo_discrete/AccuracyBalanced.rb
|
59
61
|
- lib/fselector/algo_discrete/BiNormalSeparation.rb
|