feature_selection 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -32,6 +32,18 @@ Example:
32
32
 
33
33
  a.rank_features
34
34
  #=> {:spam => {term => score, term => score}, :ham => {term => score}}
35
+
36
+ == Logging
37
+
38
+ There are two ways to log the activity of algorithms:
39
+
40
+ # Provide a path to somewhere to log to
41
+ log = File.expand_path(File.dirname(__FILE__) + '/log.txt')
42
+ FeatureSelection::MutualInformation.new(data, :log_to => log)
43
+
44
+ # Provide an existing Logger object
45
+ log = Logger.new('log.txt')
46
+ FeatureSelection::MutualInformation.new(data, :log_to => log)
35
47
 
36
48
  == Copyright
37
49
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{feature_selection}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2010-01-07}
12
+ s.date = %q{2010-01-11}
13
13
  s.description = %q{A library of feature selection algorithms}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -26,10 +26,11 @@ Gem::Specification.new do |s|
26
26
  "benchmark/benchmark.rb",
27
27
  "feature_selection.gemspec",
28
28
  "lib/feature_selection.rb",
29
+ "lib/feature_selection/algorithms/chi_squared.rb",
30
+ "lib/feature_selection/algorithms/frequency_based.rb",
31
+ "lib/feature_selection/algorithms/mutual_information.rb",
29
32
  "lib/feature_selection/base.rb",
30
- "lib/feature_selection/chi_squared.rb",
31
- "lib/feature_selection/frequency_based.rb",
32
- "lib/feature_selection/mutual_information.rb",
33
+ "lib/feature_selection/log_helpers.rb",
33
34
  "spec/feature_selection/base_spec.rb",
34
35
  "spec/feature_selection/chi_squared_spec.rb",
35
36
  "spec/feature_selection/frequency_based_spec.rb",
@@ -10,10 +10,15 @@ module FeatureSelection
10
10
  #=> {:class => {'term' => score, 'term' => score}}
11
11
  @results = {}
12
12
 
13
+ n = 1
14
+
13
15
  classes.each do |klass|
14
16
  @results[klass] = {}
15
17
 
16
18
  uniq_terms.each do |term|
19
+ log_calculations_complete(n)
20
+ n += 1
21
+
17
22
  answer = calculate_contribution(term, klass)
18
23
  @results[klass][term] = answer
19
24
  end #terms.each
@@ -2,14 +2,22 @@ module FeatureSelection
2
2
  class FrequencyBased < Base
3
3
 
4
4
  def rank_features
5
+ write_to_log("Starting to rank features...")
5
6
  # Returns:
6
7
  #=> {:class => {'term' => count, 'term' => count}}
7
8
  @results = {}
8
9
 
10
+ # For logger
11
+ total_calculations = classes.size * terms.size
12
+ n = 1
13
+
9
14
  classes.each do |klass|
10
15
  @results[klass] = {}
11
16
 
12
17
  terms.each do |term|
18
+ log_calculations_complete(n)
19
+ n += 1
20
+
13
21
  if @results[klass].key?(term)
14
22
  @results[klass][term] += 1
15
23
  else
@@ -19,6 +27,13 @@ module FeatureSelection
19
27
  end #classes.each
20
28
  @results
21
29
  end
30
+
31
+ private
32
+
33
+ # Overwrite Base#total_calculations
34
+ def total_calculations
35
+ classes.size * terms.size
36
+ end
22
37
 
23
38
  end
24
39
  end
@@ -21,10 +21,15 @@ module FeatureSelection
21
21
  #=> {:class => {'term' => score, 'term' => score}}
22
22
  @results = {}
23
23
 
24
+ n = 1
25
+
24
26
  classes.each do |klass|
25
27
  @results[klass] = {}
26
28
 
27
29
  uniq_terms.each do |term|
30
+ log_calculations_complete(n)
31
+ n += 1
32
+
28
33
  answer = calculate_contribution(term, klass)
29
34
  @results[klass][term] = answer
30
35
  end #terms.each
@@ -1,8 +1,10 @@
1
1
  module FeatureSelection
2
2
  class Base
3
+ include LogHelpers
3
4
 
4
- def initialize(data)
5
+ def initialize(data, options={})
5
6
  @data = data
7
+ create_log(options[:log_to]) if options[:log_to]
6
8
  end
7
9
 
8
10
  def classes
@@ -160,5 +162,9 @@ module FeatureSelection
160
162
  @terms ||= @data.map {|x| x[1]}.flatten
161
163
  end
162
164
 
165
+ def total_calculations
166
+ @total_calculations ||= uniq_terms.size * classes.size
167
+ end
168
+
163
169
  end
164
170
  end
@@ -0,0 +1,25 @@
1
+ require 'logger'
2
+
3
+ module LogHelpers
4
+ # 2 outcomes
5
+ # - Filepath given: create a log to that file
6
+ # - Logger object given: write to that object
7
+ def create_log(log_to)
8
+ if log_to.is_a?(Logger)
9
+ @log = log_to
10
+ else
11
+ @log = Logger.new(log_to)
12
+ end
13
+ end
14
+
15
+ def write_to_log(message)
16
+ if @log
17
+ @log.info(message)
18
+ end
19
+ end
20
+
21
+ # Writes the number of calculations completed to the log
22
+ def log_calculations_complete(n)
23
+ write_to_log("#{n}/#{total_calculations} calculations complete.")
24
+ end
25
+ end
@@ -1,4 +1,5 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/feature_selection/log_helpers')
1
2
  require File.expand_path(File.dirname(__FILE__) + '/feature_selection/base')
2
- require File.expand_path(File.dirname(__FILE__) + '/feature_selection/mutual_information')
3
- require File.expand_path(File.dirname(__FILE__) + '/feature_selection/chi_squared')
4
- require File.expand_path(File.dirname(__FILE__) + '/feature_selection/frequency_based')
3
+ require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/mutual_information')
4
+ require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/chi_squared')
5
+ require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/frequency_based')
@@ -10,5 +10,75 @@ describe "Base" do
10
10
  @a.classes.should include(:spam)
11
11
  @a.classes.should include(:ham)
12
12
  end
13
+
14
+ describe "Logger" do
15
+ before do
16
+ # Remove test files
17
+ Dir[File.expand_path(File.dirname(__FILE__) + "/../logger/*")].each do |file|
18
+ FileUtils.rm(file)
19
+ end
20
+ end
21
+
22
+ describe "New Logger" do
23
+ before do
24
+ @log_file = log_path
25
+ end
26
+
27
+ describe "Base" do
28
+ it "should work create a log file called test_1.txt" do
29
+ FeatureSelection::Base.new(data, :log_to => @log_file)
30
+ File.exist?(@log_file).should be_true
31
+ end
32
+ end
33
+
34
+ describe "MutualInformation" do
35
+ it "should work and create a log file called test_1.txt" do
36
+ FeatureSelection::MutualInformation.new(data, :log_to => @log_file).rank_features
37
+ File.exist?(@log_file).should be_true
38
+ end
39
+ end
40
+
41
+ describe "ChiSquared" do
42
+ it "should work and create a log file called test_1.txt" do
43
+ FeatureSelection::ChiSquared.new(data, :log_to => @log_file).rank_features
44
+ File.exist?(@log_file).should be_true
45
+ end
46
+ end
47
+
48
+ describe "FrequencyBased" do
49
+ it "should work and create a log file called test_1.txt" do
50
+ FeatureSelection::FrequencyBased.new(data, :log_to => @log_file).rank_features
51
+ File.exist?(@log_file).should be_true
52
+ end
53
+ end
54
+ end
55
+
56
+ describe "Existing Log" do
57
+ describe "FrequencyBased" do
58
+ it "should work, therefore return a hash" do
59
+ a = FeatureSelection::FrequencyBased.new(data, :log_to => @log_file)
60
+ a.rank_features.should be_a(Hash)
61
+ end
62
+ end
63
+
64
+ describe "MutualInformation" do
65
+ it "should work, therefore return a hash" do
66
+ a = FeatureSelection::MutualInformation.new(data, :log_to => @log_file)
67
+ a.rank_features.should be_a(Hash)
68
+ end
69
+ end
70
+
71
+ describe "ChiSquared" do
72
+ it "should work, therefore return a hash" do
73
+ a = FeatureSelection::ChiSquared.new(data, :log_to => @log_file)
74
+ a.rank_features.should be_a(Hash)
75
+ end
76
+ end
77
+ end
78
+
79
+ def log_path
80
+ File.expand_path(File.dirname(__FILE__) + "/../logger/test_1.txt")
81
+ end
82
+ end
13
83
 
14
84
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feature_selection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-07 00:00:00 +00:00
12
+ date: 2010-01-11 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -41,10 +41,11 @@ files:
41
41
  - benchmark/benchmark.rb
42
42
  - feature_selection.gemspec
43
43
  - lib/feature_selection.rb
44
+ - lib/feature_selection/algorithms/chi_squared.rb
45
+ - lib/feature_selection/algorithms/frequency_based.rb
46
+ - lib/feature_selection/algorithms/mutual_information.rb
44
47
  - lib/feature_selection/base.rb
45
- - lib/feature_selection/chi_squared.rb
46
- - lib/feature_selection/frequency_based.rb
47
- - lib/feature_selection/mutual_information.rb
48
+ - lib/feature_selection/log_helpers.rb
48
49
  - spec/feature_selection/base_spec.rb
49
50
  - spec/feature_selection/chi_squared_spec.rb
50
51
  - spec/feature_selection/frequency_based_spec.rb