feature_selection 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -32,6 +32,18 @@ Example:
32
32
 
33
33
  a.rank_features
34
34
  #=> {:spam => {term => score, term => score}, :ham => {term => score}}
35
+
36
+ == Logging
37
+
38
+ There are two ways to log the activity of algorithms:
39
+
40
+ # Provide a path to somewhere to log to
41
+ log = File.expand_path(File.dirname(__FILE__) + '/log.txt')
42
+ FeatureSelection::MutualInformation.new(data, :log_to => log)
43
+
44
+ # Provide an existing Logger object
45
+ log = Logger.new('log.txt')
46
+ FeatureSelection::MutualInformation.new(data, :log_to => log)
35
47
 
36
48
  == Copyright
37
49
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{feature_selection}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2010-01-07}
12
+ s.date = %q{2010-01-11}
13
13
  s.description = %q{A library of feature selection algorithms}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -26,10 +26,11 @@ Gem::Specification.new do |s|
26
26
  "benchmark/benchmark.rb",
27
27
  "feature_selection.gemspec",
28
28
  "lib/feature_selection.rb",
29
+ "lib/feature_selection/algorithms/chi_squared.rb",
30
+ "lib/feature_selection/algorithms/frequency_based.rb",
31
+ "lib/feature_selection/algorithms/mutual_information.rb",
29
32
  "lib/feature_selection/base.rb",
30
- "lib/feature_selection/chi_squared.rb",
31
- "lib/feature_selection/frequency_based.rb",
32
- "lib/feature_selection/mutual_information.rb",
33
+ "lib/feature_selection/log_helpers.rb",
33
34
  "spec/feature_selection/base_spec.rb",
34
35
  "spec/feature_selection/chi_squared_spec.rb",
35
36
  "spec/feature_selection/frequency_based_spec.rb",
@@ -10,10 +10,15 @@ module FeatureSelection
10
10
  #=> {:class => {'term' => score, 'term' => score}}
11
11
  @results = {}
12
12
 
13
+ n = 1
14
+
13
15
  classes.each do |klass|
14
16
  @results[klass] = {}
15
17
 
16
18
  uniq_terms.each do |term|
19
+ log_calculations_complete(n)
20
+ n += 1
21
+
17
22
  answer = calculate_contribution(term, klass)
18
23
  @results[klass][term] = answer
19
24
  end #terms.each
@@ -2,14 +2,22 @@ module FeatureSelection
2
2
  class FrequencyBased < Base
3
3
 
4
4
  def rank_features
5
+ write_to_log("Starting to rank features...")
5
6
  # Returns:
6
7
  #=> {:class => {'term' => count, 'term' => count}}
7
8
  @results = {}
8
9
 
10
+ # For logger
11
+ total_calculations = classes.size * terms.size
12
+ n = 1
13
+
9
14
  classes.each do |klass|
10
15
  @results[klass] = {}
11
16
 
12
17
  terms.each do |term|
18
+ log_calculations_complete(n)
19
+ n += 1
20
+
13
21
  if @results[klass].key?(term)
14
22
  @results[klass][term] += 1
15
23
  else
@@ -19,6 +27,13 @@ module FeatureSelection
19
27
  end #classes.each
20
28
  @results
21
29
  end
30
+
31
+ private
32
+
33
+ # Overwrite Base#total_calculations
34
+ def total_calculations
35
+ classes.size * terms.size
36
+ end
22
37
 
23
38
  end
24
39
  end
@@ -21,10 +21,15 @@ module FeatureSelection
21
21
  #=> {:class => {'term' => score, 'term' => score}}
22
22
  @results = {}
23
23
 
24
+ n = 1
25
+
24
26
  classes.each do |klass|
25
27
  @results[klass] = {}
26
28
 
27
29
  uniq_terms.each do |term|
30
+ log_calculations_complete(n)
31
+ n += 1
32
+
28
33
  answer = calculate_contribution(term, klass)
29
34
  @results[klass][term] = answer
30
35
  end #terms.each
@@ -1,8 +1,10 @@
1
1
  module FeatureSelection
2
2
  class Base
3
+ include LogHelpers
3
4
 
4
- def initialize(data)
5
+ def initialize(data, options={})
5
6
  @data = data
7
+ create_log(options[:log_to]) if options[:log_to]
6
8
  end
7
9
 
8
10
  def classes
@@ -160,5 +162,9 @@ module FeatureSelection
160
162
  @terms ||= @data.map {|x| x[1]}.flatten
161
163
  end
162
164
 
165
+ def total_calculations
166
+ @total_calculations ||= uniq_terms.size * classes.size
167
+ end
168
+
163
169
  end
164
170
  end
@@ -0,0 +1,25 @@
1
+ require 'logger'
2
+
3
+ module LogHelpers
4
+ # 2 outcomes
5
+ # - Filepath given: create a log to that file
6
+ # - Logger object given: write to that object
7
+ def create_log(log_to)
8
+ if log_to.is_a?(Logger)
9
+ @log = log_to
10
+ else
11
+ @log = Logger.new(log_to)
12
+ end
13
+ end
14
+
15
+ def write_to_log(message)
16
+ if @log
17
+ @log.info(message)
18
+ end
19
+ end
20
+
21
+ # Writes the number of calculations completed to the log
22
+ def log_calculations_complete(n)
23
+ write_to_log("#{n}/#{total_calculations} calculations complete.")
24
+ end
25
+ end
@@ -1,4 +1,5 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/feature_selection/log_helpers')
1
2
  require File.expand_path(File.dirname(__FILE__) + '/feature_selection/base')
2
- require File.expand_path(File.dirname(__FILE__) + '/feature_selection/mutual_information')
3
- require File.expand_path(File.dirname(__FILE__) + '/feature_selection/chi_squared')
4
- require File.expand_path(File.dirname(__FILE__) + '/feature_selection/frequency_based')
3
+ require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/mutual_information')
4
+ require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/chi_squared')
5
+ require File.expand_path(File.dirname(__FILE__) + '/feature_selection/algorithms/frequency_based')
@@ -10,5 +10,75 @@ describe "Base" do
10
10
  @a.classes.should include(:spam)
11
11
  @a.classes.should include(:ham)
12
12
  end
13
+
14
+ describe "Logger" do
15
+ before do
16
+ # Remove test files
17
+ Dir[File.expand_path(File.dirname(__FILE__) + "/../logger/*")].each do |file|
18
+ FileUtils.rm(file)
19
+ end
20
+ end
21
+
22
+ describe "New Logger" do
23
+ before do
24
+ @log_file = log_path
25
+ end
26
+
27
+ describe "Base" do
28
+ it "should work create a log file called test_1.txt" do
29
+ FeatureSelection::Base.new(data, :log_to => @log_file)
30
+ File.exist?(@log_file).should be_true
31
+ end
32
+ end
33
+
34
+ describe "MutualInformation" do
35
+ it "should work and create a log file called test_1.txt" do
36
+ FeatureSelection::MutualInformation.new(data, :log_to => @log_file).rank_features
37
+ File.exist?(@log_file).should be_true
38
+ end
39
+ end
40
+
41
+ describe "ChiSquared" do
42
+ it "should work and create a log file called test_1.txt" do
43
+ FeatureSelection::ChiSquared.new(data, :log_to => @log_file).rank_features
44
+ File.exist?(@log_file).should be_true
45
+ end
46
+ end
47
+
48
+ describe "FrequencyBased" do
49
+ it "should work and create a log file called test_1.txt" do
50
+ FeatureSelection::FrequencyBased.new(data, :log_to => @log_file).rank_features
51
+ File.exist?(@log_file).should be_true
52
+ end
53
+ end
54
+ end
55
+
56
+ describe "Existing Log" do
57
+ describe "FrequencyBased" do
58
+ it "should work, therefore return a hash" do
59
+ a = FeatureSelection::FrequencyBased.new(data, :log_to => @log_file)
60
+ a.rank_features.should be_a(Hash)
61
+ end
62
+ end
63
+
64
+ describe "MutualInformation" do
65
+ it "should work, therefore return a hash" do
66
+ a = FeatureSelection::MutualInformation.new(data, :log_to => @log_file)
67
+ a.rank_features.should be_a(Hash)
68
+ end
69
+ end
70
+
71
+ describe "ChiSquared" do
72
+ it "should work, therefore return a hash" do
73
+ a = FeatureSelection::ChiSquared.new(data, :log_to => @log_file)
74
+ a.rank_features.should be_a(Hash)
75
+ end
76
+ end
77
+ end
78
+
79
+ def log_path
80
+ File.expand_path(File.dirname(__FILE__) + "/../logger/test_1.txt")
81
+ end
82
+ end
13
83
 
14
84
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feature_selection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-07 00:00:00 +00:00
12
+ date: 2010-01-11 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -41,10 +41,11 @@ files:
41
41
  - benchmark/benchmark.rb
42
42
  - feature_selection.gemspec
43
43
  - lib/feature_selection.rb
44
+ - lib/feature_selection/algorithms/chi_squared.rb
45
+ - lib/feature_selection/algorithms/frequency_based.rb
46
+ - lib/feature_selection/algorithms/mutual_information.rb
44
47
  - lib/feature_selection/base.rb
45
- - lib/feature_selection/chi_squared.rb
46
- - lib/feature_selection/frequency_based.rb
47
- - lib/feature_selection/mutual_information.rb
48
+ - lib/feature_selection/log_helpers.rb
48
49
  - spec/feature_selection/base_spec.rb
49
50
  - spec/feature_selection/chi_squared_spec.rb
50
51
  - spec/feature_selection/frequency_based_spec.rb