feature_selection 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +12 -0
- data/VERSION +1 -1
- data/feature_selection.gemspec +6 -5
- data/lib/feature_selection/{chi_squared.rb → algorithms/chi_squared.rb} +5 -0
- data/lib/feature_selection/{frequency_based.rb → algorithms/frequency_based.rb} +15 -0
- data/lib/feature_selection/{mutual_information.rb → algorithms/mutual_information.rb} +5 -0
- data/lib/feature_selection/base.rb +7 -1
- data/lib/feature_selection/log_helpers.rb +25 -0
- data/lib/feature_selection.rb +4 -3
- data/spec/feature_selection/base_spec.rb +70 -0
- metadata +6 -5
data/README.rdoc
CHANGED
@@ -32,6 +32,18 @@ Example:
|
|
32
32
|
|
33
33
|
a.rank_features
|
34
34
|
#=> {:spam => {term => score, term => score}, :ham => {term => score}}
|
35
|
+
|
36
|
+
== Logging
|
37
|
+
|
38
|
+
There are two ways to log the activity of algorithms:
|
39
|
+
|
40
|
+
# Provide a path to somewhere to log to
|
41
|
+
log = File.expand_path(File.dirname(__FILE__) + '/log.txt')
|
42
|
+
FeatureSelection::MutualInformation.new(data, :log_to => log)
|
43
|
+
|
44
|
+
# Provide an existing Logger object
|
45
|
+
log = Logger.new('log.txt')
|
46
|
+
FeatureSelection::MutualInformation.new(data, :log_to => log)
|
35
47
|
|
36
48
|
== Copyright
|
37
49
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/feature_selection.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{feature_selection}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["reddavis"]
|
12
|
-
s.date = %q{2010-01-
|
12
|
+
s.date = %q{2010-01-11}
|
13
13
|
s.description = %q{A library of feature selection algorithms}
|
14
14
|
s.email = %q{reddavis@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -26,10 +26,11 @@ Gem::Specification.new do |s|
|
|
26
26
|
"benchmark/benchmark.rb",
|
27
27
|
"feature_selection.gemspec",
|
28
28
|
"lib/feature_selection.rb",
|
29
|
+
"lib/feature_selection/algorithms/chi_squared.rb",
|
30
|
+
"lib/feature_selection/algorithms/frequency_based.rb",
|
31
|
+
"lib/feature_selection/algorithms/mutual_information.rb",
|
29
32
|
"lib/feature_selection/base.rb",
|
30
|
-
"lib/feature_selection/
|
31
|
-
"lib/feature_selection/frequency_based.rb",
|
32
|
-
"lib/feature_selection/mutual_information.rb",
|
33
|
+
"lib/feature_selection/log_helpers.rb",
|
33
34
|
"spec/feature_selection/base_spec.rb",
|
34
35
|
"spec/feature_selection/chi_squared_spec.rb",
|
35
36
|
"spec/feature_selection/frequency_based_spec.rb",
|
@@ -10,10 +10,15 @@ module FeatureSelection
|
|
10
10
|
#=> {:class => {'term' => score, 'term' => score}}
|
11
11
|
@results = {}
|
12
12
|
|
13
|
+
n = 1
|
14
|
+
|
13
15
|
classes.each do |klass|
|
14
16
|
@results[klass] = {}
|
15
17
|
|
16
18
|
uniq_terms.each do |term|
|
19
|
+
log_calculations_complete(n)
|
20
|
+
n += 1
|
21
|
+
|
17
22
|
answer = calculate_contribution(term, klass)
|
18
23
|
@results[klass][term] = answer
|
19
24
|
end #terms.each
|
@@ -2,14 +2,22 @@ module FeatureSelection
|
|
2
2
|
class FrequencyBased < Base
|
3
3
|
|
4
4
|
def rank_features
|
5
|
+
write_to_log("Starting to rank features...")
|
5
6
|
# Returns:
|
6
7
|
#=> {:class => {'term' => count, 'term' => count}}
|
7
8
|
@results = {}
|
8
9
|
|
10
|
+
# For logger
|
11
|
+
total_calculations = classes.size * terms.size
|
12
|
+
n = 1
|
13
|
+
|
9
14
|
classes.each do |klass|
|
10
15
|
@results[klass] = {}
|
11
16
|
|
12
17
|
terms.each do |term|
|
18
|
+
log_calculations_complete(n)
|
19
|
+
n += 1
|
20
|
+
|
13
21
|
if @results[klass].key?(term)
|
14
22
|
@results[klass][term] += 1
|
15
23
|
else
|
@@ -19,6 +27,13 @@ module FeatureSelection
|
|
19
27
|
end #classes.each
|
20
28
|
@results
|
21
29
|
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
# Overwrite Base#total_calculations
|
34
|
+
def total_calculations
|
35
|
+
classes.size * terms.size
|
36
|
+
end
|
22
37
|
|
23
38
|
end
|
24
39
|
end
|
@@ -21,10 +21,15 @@ module FeatureSelection
|
|
21
21
|
#=> {:class => {'term' => score, 'term' => score}}
|
22
22
|
@results = {}
|
23
23
|
|
24
|
+
n = 1
|
25
|
+
|
24
26
|
classes.each do |klass|
|
25
27
|
@results[klass] = {}
|
26
28
|
|
27
29
|
uniq_terms.each do |term|
|
30
|
+
log_calculations_complete(n)
|
31
|
+
n += 1
|
32
|
+
|
28
33
|
answer = calculate_contribution(term, klass)
|
29
34
|
@results[klass][term] = answer
|
30
35
|
end #terms.each
|
@@ -1,8 +1,10 @@
|
|
1
1
|
module FeatureSelection
|
2
2
|
class Base
|
3
|
+
include LogHelpers
|
3
4
|
|
4
|
-
def initialize(data)
|
5
|
+
def initialize(data, options={})
|
5
6
|
@data = data
|
7
|
+
create_log(options[:log_to]) if options[:log_to]
|
6
8
|
end
|
7
9
|
|
8
10
|
def classes
|
@@ -160,5 +162,9 @@ module FeatureSelection
|
|
160
162
|
@terms ||= @data.map {|x| x[1]}.flatten
|
161
163
|
end
|
162
164
|
|
165
|
+
def total_calculations
|
166
|
+
@total_calculations ||= uniq_terms.size * classes.size
|
167
|
+
end
|
168
|
+
|
163
169
|
end
|
164
170
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module LogHelpers
|
4
|
+
# 2 outcomes
|
5
|
+
# - Filepath given: create a log to that file
|
6
|
+
# - Logger object given: write to that object
|
7
|
+
def create_log(log_to)
|
8
|
+
if log_to.is_a?(Logger)
|
9
|
+
@log = log_to
|
10
|
+
else
|
11
|
+
@log = Logger.new(log_to)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def write_to_log(message)
|
16
|
+
if @log
|
17
|
+
@log.info(message)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Writes the number of calculations completed to the log
|
22
|
+
def log_calculations_complete(n)
|
23
|
+
write_to_log("#{n}/#{total_calculations} calculations complete.")
|
24
|
+
end
|
25
|
+
end
|
data/lib/feature_selection.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/log_helpers')
|
1
2
|
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/base')
|
2
|
-
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/mutual_information')
|
3
|
-
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/chi_squared')
|
4
|
-
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/frequency_based')
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/mutual_information')
|
4
|
+
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/chi_squared')
|
5
|
+
require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/frequency_based')
|
@@ -10,5 +10,75 @@ describe "Base" do
|
|
10
10
|
@a.classes.should include(:spam)
|
11
11
|
@a.classes.should include(:ham)
|
12
12
|
end
|
13
|
+
|
14
|
+
describe "Logger" do
|
15
|
+
before do
|
16
|
+
# Remove test files
|
17
|
+
Dir[File.expand_path(File.dirname(__FILE__) + "/../logger/*")].each do |file|
|
18
|
+
FileUtils.rm(file)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "New Logger" do
|
23
|
+
before do
|
24
|
+
@log_file = log_path
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "Base" do
|
28
|
+
it "should work create a log file called test_1.txt" do
|
29
|
+
FeatureSelection::Base.new(data, :log_to => @log_file)
|
30
|
+
File.exist?(@log_file).should be_true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe "MutualInformation" do
|
35
|
+
it "should work and create a log file called test_1.txt" do
|
36
|
+
FeatureSelection::MutualInformation.new(data, :log_to => @log_file).rank_features
|
37
|
+
File.exist?(@log_file).should be_true
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "ChiSquared" do
|
42
|
+
it "should work and create a log file called test_1.txt" do
|
43
|
+
FeatureSelection::ChiSquared.new(data, :log_to => @log_file).rank_features
|
44
|
+
File.exist?(@log_file).should be_true
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe "FrequencyBased" do
|
49
|
+
it "should work and create a log file called test_1.txt" do
|
50
|
+
FeatureSelection::FrequencyBased.new(data, :log_to => @log_file).rank_features
|
51
|
+
File.exist?(@log_file).should be_true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "Existing Log" do
|
57
|
+
describe "FrequencyBased" do
|
58
|
+
it "should work, therefore return a hash" do
|
59
|
+
a = FeatureSelection::FrequencyBased.new(data, :log_to => @log_file)
|
60
|
+
a.rank_features.should be_a(Hash)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
describe "MutualInformation" do
|
65
|
+
it "should work, therefore return a hash" do
|
66
|
+
a = FeatureSelection::MutualInformation.new(data, :log_to => @log_file)
|
67
|
+
a.rank_features.should be_a(Hash)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "ChiSquared" do
|
72
|
+
it "should work, therefore return a hash" do
|
73
|
+
a = FeatureSelection::ChiSquared.new(data, :log_to => @log_file)
|
74
|
+
a.rank_features.should be_a(Hash)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def log_path
|
80
|
+
File.expand_path(File.dirname(__FILE__) + "/../logger/test_1.txt")
|
81
|
+
end
|
82
|
+
end
|
13
83
|
|
14
84
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feature_selection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- reddavis
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-11 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -41,10 +41,11 @@ files:
|
|
41
41
|
- benchmark/benchmark.rb
|
42
42
|
- feature_selection.gemspec
|
43
43
|
- lib/feature_selection.rb
|
44
|
+
- lib/feature_selection/algorithms/chi_squared.rb
|
45
|
+
- lib/feature_selection/algorithms/frequency_based.rb
|
46
|
+
- lib/feature_selection/algorithms/mutual_information.rb
|
44
47
|
- lib/feature_selection/base.rb
|
45
|
-
- lib/feature_selection/
|
46
|
-
- lib/feature_selection/frequency_based.rb
|
47
|
-
- lib/feature_selection/mutual_information.rb
|
48
|
+
- lib/feature_selection/log_helpers.rb
|
48
49
|
- spec/feature_selection/base_spec.rb
|
49
50
|
- spec/feature_selection/chi_squared_spec.rb
|
50
51
|
- spec/feature_selection/frequency_based_spec.rb
|