feature_selection 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +12 -0
- data/VERSION +1 -1
- data/feature_selection.gemspec +6 -5
- data/lib/feature_selection/{chi_squared.rb → algorithms/chi_squared.rb} +5 -0
- data/lib/feature_selection/{frequency_based.rb → algorithms/frequency_based.rb} +15 -0
- data/lib/feature_selection/{mutual_information.rb → algorithms/mutual_information.rb} +5 -0
- data/lib/feature_selection/base.rb +7 -1
- data/lib/feature_selection/log_helpers.rb +25 -0
- data/lib/feature_selection.rb +4 -3
- data/spec/feature_selection/base_spec.rb +70 -0
- metadata +6 -5
data/README.rdoc
CHANGED
|
@@ -32,6 +32,18 @@ Example:
|
|
|
32
32
|
|
|
33
33
|
a.rank_features
|
|
34
34
|
#=> {:spam => {term => score, term => score}, :ham => {term => score}}
|
|
35
|
+
|
|
36
|
+
== Logging
|
|
37
|
+
|
|
38
|
+
There are two ways to log the activity of algorithms:
|
|
39
|
+
|
|
40
|
+
# Provide a path to somewhere to log to
|
|
41
|
+
log = File.expand_path(File.dirname(__FILE__) + '/log.txt')
|
|
42
|
+
FeatureSelection::MutualInformation.new(data, :log_to => log)
|
|
43
|
+
|
|
44
|
+
# Provide an existing Logger object
|
|
45
|
+
log = Logger.new('log.txt')
|
|
46
|
+
FeatureSelection::MutualInformation.new(data, :log_to => log)
|
|
35
47
|
|
|
36
48
|
== Copyright
|
|
37
49
|
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.0.
|
|
1
|
+
0.0.2
|
data/feature_selection.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{feature_selection}
|
|
8
|
-
s.version = "0.0.
|
|
8
|
+
s.version = "0.0.2"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["reddavis"]
|
|
12
|
-
s.date = %q{2010-01-
|
|
12
|
+
s.date = %q{2010-01-11}
|
|
13
13
|
s.description = %q{A library of feature selection algorithms}
|
|
14
14
|
s.email = %q{reddavis@gmail.com}
|
|
15
15
|
s.extra_rdoc_files = [
|
|
@@ -26,10 +26,11 @@ Gem::Specification.new do |s|
|
|
|
26
26
|
"benchmark/benchmark.rb",
|
|
27
27
|
"feature_selection.gemspec",
|
|
28
28
|
"lib/feature_selection.rb",
|
|
29
|
+
"lib/feature_selection/algorithms/chi_squared.rb",
|
|
30
|
+
"lib/feature_selection/algorithms/frequency_based.rb",
|
|
31
|
+
"lib/feature_selection/algorithms/mutual_information.rb",
|
|
29
32
|
"lib/feature_selection/base.rb",
|
|
30
|
-
"lib/feature_selection/
|
|
31
|
-
"lib/feature_selection/frequency_based.rb",
|
|
32
|
-
"lib/feature_selection/mutual_information.rb",
|
|
33
|
+
"lib/feature_selection/log_helpers.rb",
|
|
33
34
|
"spec/feature_selection/base_spec.rb",
|
|
34
35
|
"spec/feature_selection/chi_squared_spec.rb",
|
|
35
36
|
"spec/feature_selection/frequency_based_spec.rb",
|
|
@@ -10,10 +10,15 @@ module FeatureSelection
|
|
|
10
10
|
#=> {:class => {'term' => score, 'term' => score}}
|
|
11
11
|
@results = {}
|
|
12
12
|
|
|
13
|
+
n = 1
|
|
14
|
+
|
|
13
15
|
classes.each do |klass|
|
|
14
16
|
@results[klass] = {}
|
|
15
17
|
|
|
16
18
|
uniq_terms.each do |term|
|
|
19
|
+
log_calculations_complete(n)
|
|
20
|
+
n += 1
|
|
21
|
+
|
|
17
22
|
answer = calculate_contribution(term, klass)
|
|
18
23
|
@results[klass][term] = answer
|
|
19
24
|
end #terms.each
|
|
@@ -2,14 +2,22 @@ module FeatureSelection
|
|
|
2
2
|
class FrequencyBased < Base
|
|
3
3
|
|
|
4
4
|
def rank_features
|
|
5
|
+
write_to_log("Starting to rank features...")
|
|
5
6
|
# Returns:
|
|
6
7
|
#=> {:class => {'term' => count, 'term' => count}}
|
|
7
8
|
@results = {}
|
|
8
9
|
|
|
10
|
+
# For logger
|
|
11
|
+
total_calculations = classes.size * terms.size
|
|
12
|
+
n = 1
|
|
13
|
+
|
|
9
14
|
classes.each do |klass|
|
|
10
15
|
@results[klass] = {}
|
|
11
16
|
|
|
12
17
|
terms.each do |term|
|
|
18
|
+
log_calculations_complete(n)
|
|
19
|
+
n += 1
|
|
20
|
+
|
|
13
21
|
if @results[klass].key?(term)
|
|
14
22
|
@results[klass][term] += 1
|
|
15
23
|
else
|
|
@@ -19,6 +27,13 @@ module FeatureSelection
|
|
|
19
27
|
end #classes.each
|
|
20
28
|
@results
|
|
21
29
|
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
# Overwrite Base#total_calculations
|
|
34
|
+
def total_calculations
|
|
35
|
+
classes.size * terms.size
|
|
36
|
+
end
|
|
22
37
|
|
|
23
38
|
end
|
|
24
39
|
end
|
|
@@ -21,10 +21,15 @@ module FeatureSelection
|
|
|
21
21
|
#=> {:class => {'term' => score, 'term' => score}}
|
|
22
22
|
@results = {}
|
|
23
23
|
|
|
24
|
+
n = 1
|
|
25
|
+
|
|
24
26
|
classes.each do |klass|
|
|
25
27
|
@results[klass] = {}
|
|
26
28
|
|
|
27
29
|
uniq_terms.each do |term|
|
|
30
|
+
log_calculations_complete(n)
|
|
31
|
+
n += 1
|
|
32
|
+
|
|
28
33
|
answer = calculate_contribution(term, klass)
|
|
29
34
|
@results[klass][term] = answer
|
|
30
35
|
end #terms.each
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
module FeatureSelection
|
|
2
2
|
class Base
|
|
3
|
+
include LogHelpers
|
|
3
4
|
|
|
4
|
-
def initialize(data)
|
|
5
|
+
def initialize(data, options={})
|
|
5
6
|
@data = data
|
|
7
|
+
create_log(options[:log_to]) if options[:log_to]
|
|
6
8
|
end
|
|
7
9
|
|
|
8
10
|
def classes
|
|
@@ -160,5 +162,9 @@ module FeatureSelection
|
|
|
160
162
|
@terms ||= @data.map {|x| x[1]}.flatten
|
|
161
163
|
end
|
|
162
164
|
|
|
165
|
+
def total_calculations
|
|
166
|
+
@total_calculations ||= uniq_terms.size * classes.size
|
|
167
|
+
end
|
|
168
|
+
|
|
163
169
|
end
|
|
164
170
|
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
require 'logger'
|
|
2
|
+
|
|
3
|
+
module LogHelpers
|
|
4
|
+
# 2 outcomes
|
|
5
|
+
# - Filepath given: create a log to that file
|
|
6
|
+
# - Logger object given: write to that object
|
|
7
|
+
def create_log(log_to)
|
|
8
|
+
if log_to.is_a?(Logger)
|
|
9
|
+
@log = log_to
|
|
10
|
+
else
|
|
11
|
+
@log = Logger.new(log_to)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def write_to_log(message)
|
|
16
|
+
if @log
|
|
17
|
+
@log.info(message)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Writes the number of calculations completed to the log
|
|
22
|
+
def log_calculations_complete(n)
|
|
23
|
+
write_to_log("#{n}/#{total_calculations} calculations complete.")
|
|
24
|
+
end
|
|
25
|
+
end
|
data/lib/feature_selection.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/log_helpers')
|
|
1
2
|
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/base')
|
|
2
|
-
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/mutual_information')
|
|
3
|
-
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/chi_squared')
|
|
4
|
-
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/frequency_based')
|
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/mutual_information')
|
|
4
|
+
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/chi_squared')
|
|
5
|
+
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/frequency_based')
|
|
@@ -10,5 +10,75 @@ describe "Base" do
|
|
|
10
10
|
@a.classes.should include(:spam)
|
|
11
11
|
@a.classes.should include(:ham)
|
|
12
12
|
end
|
|
13
|
+
|
|
14
|
+
describe "Logger" do
|
|
15
|
+
before do
|
|
16
|
+
# Remove test files
|
|
17
|
+
Dir[File.expand_path(File.dirname(__FILE__) + "/../logger/*")].each do |file|
|
|
18
|
+
FileUtils.rm(file)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
describe "New Logger" do
|
|
23
|
+
before do
|
|
24
|
+
@log_file = log_path
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
describe "Base" do
|
|
28
|
+
it "should work create a log file called test_1.txt" do
|
|
29
|
+
FeatureSelection::Base.new(data, :log_to => @log_file)
|
|
30
|
+
File.exist?(@log_file).should be_true
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
describe "MutualInformation" do
|
|
35
|
+
it "should work and create a log file called test_1.txt" do
|
|
36
|
+
FeatureSelection::MutualInformation.new(data, :log_to => @log_file).rank_features
|
|
37
|
+
File.exist?(@log_file).should be_true
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
describe "ChiSquared" do
|
|
42
|
+
it "should work and create a log file called test_1.txt" do
|
|
43
|
+
FeatureSelection::ChiSquared.new(data, :log_to => @log_file).rank_features
|
|
44
|
+
File.exist?(@log_file).should be_true
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
describe "FrequencyBased" do
|
|
49
|
+
it "should work and create a log file called test_1.txt" do
|
|
50
|
+
FeatureSelection::FrequencyBased.new(data, :log_to => @log_file).rank_features
|
|
51
|
+
File.exist?(@log_file).should be_true
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
describe "Existing Log" do
|
|
57
|
+
describe "FrequencyBased" do
|
|
58
|
+
it "should work, therefore return a hash" do
|
|
59
|
+
a = FeatureSelection::FrequencyBased.new(data, :log_to => @log_file)
|
|
60
|
+
a.rank_features.should be_a(Hash)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
describe "MutualInformation" do
|
|
65
|
+
it "should work, therefore return a hash" do
|
|
66
|
+
a = FeatureSelection::MutualInformation.new(data, :log_to => @log_file)
|
|
67
|
+
a.rank_features.should be_a(Hash)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
describe "ChiSquared" do
|
|
72
|
+
it "should work, therefore return a hash" do
|
|
73
|
+
a = FeatureSelection::ChiSquared.new(data, :log_to => @log_file)
|
|
74
|
+
a.rank_features.should be_a(Hash)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def log_path
|
|
80
|
+
File.expand_path(File.dirname(__FILE__) + "/../logger/test_1.txt")
|
|
81
|
+
end
|
|
82
|
+
end
|
|
13
83
|
|
|
14
84
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: feature_selection
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- reddavis
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2010-01-
|
|
12
|
+
date: 2010-01-11 00:00:00 +00:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
@@ -41,10 +41,11 @@ files:
|
|
|
41
41
|
- benchmark/benchmark.rb
|
|
42
42
|
- feature_selection.gemspec
|
|
43
43
|
- lib/feature_selection.rb
|
|
44
|
+
- lib/feature_selection/algorithms/chi_squared.rb
|
|
45
|
+
- lib/feature_selection/algorithms/frequency_based.rb
|
|
46
|
+
- lib/feature_selection/algorithms/mutual_information.rb
|
|
44
47
|
- lib/feature_selection/base.rb
|
|
45
|
-
- lib/feature_selection/
|
|
46
|
-
- lib/feature_selection/frequency_based.rb
|
|
47
|
-
- lib/feature_selection/mutual_information.rb
|
|
48
|
+
- lib/feature_selection/log_helpers.rb
|
|
48
49
|
- spec/feature_selection/base_spec.rb
|
|
49
50
|
- spec/feature_selection/chi_squared_spec.rb
|
|
50
51
|
- spec/feature_selection/frequency_based_spec.rb
|