green_midget 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,8 +5,8 @@ describe GreenMidget::Examples do
5
5
  include GreenMidget
6
6
 
7
7
  before(:each) do
8
- GreenMidgetRecords.delete_all
9
- GreenMidgetRecords.class_variable_set("@@cache", {})
8
+ Records.delete_all
9
+ Records.cache = {}
10
10
  end
11
11
 
12
12
  describe "#[]()" do
@@ -17,17 +17,17 @@ describe GreenMidget::Examples do
17
17
  end
18
18
 
19
19
  it "should return the general feature examples if passed a (new) feature key that has no examples yet" do
20
- GreenMidgetRecords.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count", :value => 1000)
21
- GreenMidgetRecords.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ ALTERNATIVE }_count", :value => 1000)
22
- GreenMidgetRecords.find_by_key(Examples.prefix + "new::#{ NULL }_count").should == nil
23
- GreenMidgetRecords.fetch_all
20
+ Records.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count", :value => 1000)
21
+ Records.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ ALTERNATIVE }_count", :value => 1000)
22
+ Records.find_by_key(Examples.prefix + "new::#{ NULL }_count").should == nil
23
+ Records.fetch_all
24
24
  CATEGORIES.each do |category|
25
25
  Examples['new'][category].should == Examples[Examples::GENERAL_FEATURE_NAME][category]
26
26
  end
27
27
  end
28
28
  it "should return the feature's own example counts if these exist" do
29
- GreenMidgetRecords.create(:key => Examples.prefix + "new::#{ NULL }_count", :value => 3)
30
- GreenMidgetRecords.create(:key => Examples.prefix + "new::#{ ALTERNATIVE }_count", :value => 1)
29
+ Records.create(:key => Examples.prefix + "new::#{ NULL }_count", :value => 3)
30
+ Records.create(:key => Examples.prefix + "new::#{ ALTERNATIVE }_count", :value => 1)
31
31
  Examples['new'][NULL].should == 3
32
32
  end
33
33
 
@@ -36,34 +36,34 @@ describe GreenMidget::Examples do
36
36
  end
37
37
 
38
38
  it "should throw an error if the general feature examples has a zero spam_count and ham_count" do
39
- GreenMidgetRecords.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count")
39
+ Records.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count")
40
40
  @call_any.should raise_error
41
41
  end
42
42
 
43
43
  it "should throw an error if the general feature examples has a zero spam_count or ham_count" do
44
- GreenMidgetRecords.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count", :value => 0)
44
+ Records.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count", :value => 0)
45
45
  @call_any.should raise_error
46
46
  end
47
47
 
48
48
  it "should not throw an error if both columns are positive" do
49
- GreenMidgetRecords.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count", :value => 2)
50
- GreenMidgetRecords.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ ALTERNATIVE }_count", :value => 1)
49
+ Records.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count", :value => 2)
50
+ Records.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ ALTERNATIVE }_count", :value => 1)
51
51
  @call_any.should_not raise_error
52
52
  end
53
53
  end
54
54
 
55
55
  describe "#probability_for" do
56
56
  it "should return the probability of a feature falling into category as: Examples[feature][category] / (Examples[feature][ALTERNATIVE] + Examples[feature][NULL])" do
57
- GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(NULL), :value => 1000)
58
- GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE), :value => 150 )
57
+ Records.create(:key => Examples['url_in_text'].record_key(NULL), :value => 1000)
58
+ Records.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE), :value => 150 )
59
59
  Examples['url_in_text'].probability_for(ALTERNATIVE).should == 150.0/(1000 + 150)
60
60
  end
61
61
  end
62
62
 
63
63
  describe "#no_examples?" do
64
64
  before(:each) do
65
- GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE))
66
- GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(NULL))
65
+ Records.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE))
66
+ Records.create(:key => Examples['url_in_text'].record_key(NULL))
67
67
  @object = Examples['url_in_text']
68
68
  end
69
69
 
@@ -72,13 +72,13 @@ describe GreenMidget::Examples do
72
72
  end
73
73
 
74
74
  it "should return true if spam_count or ham_count are zero" do
75
- GreenMidgetRecords.find_by_key(@object.record_key(NULL)).update_attribute(:value, 1)
75
+ Records.find_by_key(@object.record_key(NULL)).update_attribute(:value, 1)
76
76
  @object.no_examples?.should be_true
77
77
  end
78
78
 
79
79
  it "should should return false if both spam_count and ham_count are positive" do
80
- GreenMidgetRecords.find_by_key(@object.record_key(NULL)).update_attribute(:value, 1)
81
- GreenMidgetRecords.find_by_key(@object.record_key(ALTERNATIVE)).update_attribute(:value, 1)
80
+ Records.find_by_key(@object.record_key(NULL)).update_attribute(:value, 1)
81
+ Records.find_by_key(@object.record_key(ALTERNATIVE)).update_attribute(:value, 1)
82
82
  @object.no_examples?.should be_false
83
83
  end
84
84
  end
@@ -5,17 +5,17 @@ describe GreenMidget::Features do
5
5
  include GreenMidget
6
6
 
7
7
  before(:each) do
8
- GreenMidgetRecords.delete_all
9
- GreenMidgetRecords.class_variable_set("@@cache", {})
8
+ Records.delete_all
9
+ Records.cache = {}
10
10
  end
11
11
 
12
12
  describe "#probability_for" do
13
13
  it "should return Feature[feature] / Examples[feature]" do
14
- GreenMidgetRecords.create(:key => Features["url_in_text"].record_key(NULL), :value => 20 )
15
- GreenMidgetRecords.create(:key => Features["url_in_text"].record_key(ALTERNATIVE), :value => 10 )
14
+ Records.create(:key => Features["url_in_text"].record_key(NULL), :value => 20 )
15
+ Records.create(:key => Features["url_in_text"].record_key(ALTERNATIVE), :value => 10 )
16
16
 
17
- GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(NULL), :value => 100 )
18
- GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE), :value => 1000)
17
+ Records.create(:key => Examples['url_in_text'].record_key(NULL), :value => 100 )
18
+ Records.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE), :value => 1000)
19
19
 
20
20
  Features['url_in_text'].probability_for(NULL).should == 20.0/100
21
21
  Features['url_in_text'].probability_for(ALTERNATIVE).should == 10.0/1000
@@ -1,64 +1,69 @@
1
1
  # Copyright (c) 2011, SoundCloud Ltd., Nikola Chochkov
2
2
  require 'spec_helper'
3
3
 
4
- describe GreenMidget::GreenMidgetRecords do
4
+ describe GreenMidget::Records do
5
5
  include GreenMidget
6
6
 
7
7
  before(:each) do
8
- GreenMidgetRecords.delete_all
8
+ Records.delete_all
9
9
  end
10
10
 
11
11
  describe "#[]()" do
12
12
  it "should take words from data store if not found in the cache" do
13
13
  word_key, phrase_key = [ 'word', 'phrase' ].map { |w| Words[w].record_key(NULL) }
14
- GreenMidgetRecords.fetch_all([ 'word' ])
15
- GreenMidgetRecords.create(:key => phrase_key)
16
- GreenMidgetRecords.find_by_key(word_key).should == nil
17
- GreenMidgetRecords.find_by_key(phrase_key).should_not == nil
18
- GreenMidgetRecords[phrase_key].should == ''
14
+ Records.fetch_all([ 'word' ])
15
+ Records.create(:key => phrase_key)
16
+ Records.find_by_key(word_key).should == nil
17
+ Records.find_by_key(phrase_key).should_not == nil
18
+ Records[phrase_key].should == ''
19
19
  end
20
+
20
21
  it "should add a {key => ''} to the cache if key not found in cache and in the data store" do
21
22
  key = Words['nonexisting'].record_key(NULL)
22
- GreenMidgetRecords[key].should == ''
23
- GreenMidgetRecords.find_by_key(key).should == nil
23
+ Records[key].should == ''
24
+ Records.find_by_key(key).should == nil
24
25
  end
25
26
  end
26
27
 
27
28
  describe "#fetch_all" do
28
29
  it "should empty cache before fetching" do
29
30
  bar_key = Words['bar'].record_key(ALTERNATIVE)
30
- GreenMidgetRecords.fetch_all([ 'foo', 'bar' ])
31
- GreenMidgetRecords.class_variable_get("@@cache").key?(bar_key).should be_true
32
- GreenMidgetRecords.fetch_all([ 'foo', 'newbar' ])
33
- GreenMidgetRecords.class_variable_get("@@cache").key?(bar_key).should be_false
31
+ Records.fetch_all([ 'foo', 'bar' ])
32
+ Records.cache.key?(bar_key).should be_true
33
+ Records.fetch_all([ 'foo', 'newbar' ])
34
+ Records.cache.key?(bar_key).should be_false
34
35
  end
36
+
35
37
  it "does a multi get on all words and keys" do
36
- cache = GreenMidgetRecords.fetch_all([ 'foo', 'bar' ])
37
- cache['foo'].should.eql? GreenMidgetRecords.class_eval{new('foo')}
38
+ cache = Records.fetch_all([ 'foo', 'bar' ])
39
+ cache['foo'].should.eql? Records.class_eval{new(:key => 'foo')}
38
40
  end
41
+
39
42
  it "should fetch the system keys along with the given words" do
40
43
  key = Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count"
41
- GreenMidgetRecords.create(:key => key)
42
- GreenMidgetRecords.fetch_all([])
43
- cache = GreenMidgetRecords.class_variable_get("@@cache")
44
+ Records.create(:key => key)
45
+ Records.fetch_all([])
46
+ cache = Records.cache
44
47
  cache.key?(key).should be_true
45
48
  cache.count.should == 1
46
49
  end
50
+
47
51
  it "words with zero examples or no record in the database should be present in the cache" do
48
- GreenMidgetRecords.create(:key => Words['kotoba'].record_key(NULL))
49
- GreenMidgetRecords.fetch_all(['kotoba'])
50
- GreenMidgetRecords.class_variable_get("@@cache").key?(Words['kotoba'].record_key(ALTERNATIVE)).should be_true
51
- GreenMidgetRecords.create(:key => Words['mouichidou'].record_key(NULL), :value => 0)
52
- GreenMidgetRecords.create(:key => Words['mouichidou'].record_key(ALTERNATIVE), :value => 3)
53
- GreenMidgetRecords.fetch_all(['mouichidou'])
54
- GreenMidgetRecords.class_variable_get("@@cache")[Words['mouichidou'].record_key(NULL)].should_not == nil
55
- GreenMidgetRecords.class_variable_get("@@cache")[Words['mouichidou'].record_key(ALTERNATIVE)].should_not == nil
52
+ Records.create(:key => Words['kotoba'].record_key(NULL))
53
+ Records.fetch_all(['kotoba'])
54
+ Records.cache.key?(Words['kotoba'].record_key(ALTERNATIVE)).should be_true
55
+ Records.create(:key => Words['mouichidou'].record_key(NULL), :value => 0)
56
+ Records.create(:key => Words['mouichidou'].record_key(ALTERNATIVE), :value => 3)
57
+ Records.fetch_all(['mouichidou'])
58
+ Records.cache[Words['mouichidou'].record_key(NULL)].should_not == nil
59
+ Records.cache[Words['mouichidou'].record_key(ALTERNATIVE)].should_not == nil
56
60
  end
61
+
57
62
  it "the cache should be a hash; its keys should be strings" do
58
- GreenMidgetRecords.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count")
59
- GreenMidgetRecords.create(:key => Features.prefix + "url_in_text::#{ NULL }_count")
60
- GreenMidgetRecords.fetch_all([])
61
- cache = GreenMidgetRecords.class_variable_get("@@cache")
63
+ Records.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count")
64
+ Records.create(:key => Features.prefix + "url_in_text::#{ NULL }_count")
65
+ Records.fetch_all([])
66
+ cache = Records.cache
62
67
  cache.class.should.eql? Hash
63
68
  cache.count.should == 2
64
69
  cache.keys.each do |key|
@@ -70,11 +75,11 @@ describe GreenMidget::GreenMidgetRecords do
70
75
  describe "#increment" do
71
76
  it "should increment counts first in cache and write! to store only if explicitly called" do
72
77
  record_key = Words['stuff'].record_key(NULL)
73
- GreenMidgetRecords.create(:key => record_key)
78
+ Records.create(:key => record_key)
74
79
 
75
80
  lambda {
76
- GreenMidgetRecords.increment(record_key)
77
- }.should change { GreenMidgetRecords.find_by_key(record_key).value.to_f }.by(1)
81
+ Records.increment(record_key)
82
+ }.should change { Records.find_by_key(record_key).value.to_f }.by(1)
78
83
  end
79
84
  end
80
85
  end
data/spec/spec_helper.rb CHANGED
@@ -1,13 +1,16 @@
1
1
  # Copyright (c) 2011, SoundCloud Ltd., Nikola Chochkov
2
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
- $LOAD_PATH.unshift(File.dirname(__FILE__))
4
- require 'rspec'
2
+
5
3
  require 'green_midget'
4
+ require 'sqlite3'
6
5
 
7
- # Requires supporting files with custom matchers and macros, etc,
8
- # in ./support/ and its subdirectories.
9
- Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |file| require file }
6
+ conn = { :adapter => 'sqlite3', :database => ':memory:' }
7
+ ActiveRecord::Base.establish_connection(conn)
10
8
 
11
- RSpec.configure do |config|
9
+ require 'green_midget/db/migrate/create_green_midget_records'
10
+ GreenMidget::CreateGreenMidgetRecords.verbose = false
11
+ GreenMidget::CreateGreenMidgetRecords.up
12
12
 
13
+ GreenMidget::Records.class_eval do
14
+ cattr_accessor :cache
13
15
  end
16
+
data/spec/tester.rb CHANGED
@@ -11,7 +11,7 @@ class Tester < GreenMidget::Base
11
11
  end
12
12
 
13
13
  def log_ratio
14
- GreenMidgetRecords.fetch_all(words)
14
+ Records.fetch_all(words)
15
15
  super
16
16
  end
17
17
  end
data/spec/words_spec.rb CHANGED
@@ -5,7 +5,7 @@ describe GreenMidget::Words do
5
5
  include GreenMidget
6
6
 
7
7
  before(:each) do
8
- GreenMidgetRecords.delete_all
8
+ Records.delete_all
9
9
  end
10
10
 
11
11
  describe "self.record_keys" do
@@ -17,7 +17,7 @@ describe GreenMidget::Words do
17
17
 
18
18
  describe "#probability_for" do
19
19
  it "should return the smoother constant if the word has zero examples" do
20
- GreenMidgetRecords[Words['word'].record_key(ALTERNATIVE)].should == ''
20
+ Records[Words['word'].record_key(ALTERNATIVE)].should == ''
21
21
  Words['word'].probability_for(ALTERNATIVE).should == (1.0 / Examples.total)
22
22
  end
23
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: green_midget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-05 00:00:00.000000000 +02:00
12
+ date: 2012-02-17 00:00:00.000000000 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activerecord
17
- requirement: &2153074740 !ruby/object:Gem::Requirement
17
+ requirement: &2153348200 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,29 +22,7 @@ dependencies:
22
22
  version: '0'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2153074740
26
- - !ruby/object:Gem::Dependency
27
- name: rspec
28
- requirement: &2153074320 !ruby/object:Gem::Requirement
29
- none: false
30
- requirements:
31
- - - ! '>='
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: *2153074320
37
- - !ruby/object:Gem::Dependency
38
- name: bundler
39
- requirement: &2153073900 !ruby/object:Gem::Requirement
40
- none: false
41
- requirements:
42
- - - ! '>='
43
- - !ruby/object:Gem::Version
44
- version: '0'
45
- type: :development
46
- prerelease: false
47
- version_requirements: *2153073900
25
+ version_requirements: *2153348200
48
26
  description: Naive Bayesian Classifier with customizable features
49
27
  email:
50
28
  - nikola@howkul.info
@@ -52,26 +30,32 @@ executables: []
52
30
  extensions: []
53
31
  extra_rdoc_files: []
54
32
  files:
55
- - .document
56
33
  - .gitignore
34
+ - .travis.yml
57
35
  - Gemfile
58
36
  - Gemfile.lock
59
37
  - LICENSE.txt
60
38
  - README.md
61
39
  - Rakefile
40
+ - benchmark/benchmark.rb
41
+ - benchmark/test.rb
62
42
  - green_midget.gemspec
63
43
  - lib/green_midget.rb
64
44
  - lib/green_midget/base.rb
65
45
  - lib/green_midget/constants.rb
66
46
  - lib/green_midget/db/migrate/create_green_midget_records.rb
47
+ - lib/green_midget/default_features.rb
48
+ - lib/green_midget/errors/feature_method_not_implemented.rb
49
+ - lib/green_midget/errors/no_examples_given.rb
50
+ - lib/green_midget/errors/no_text_found.rb
67
51
  - lib/green_midget/extensions/classifier.rb
68
52
  - lib/green_midget/extensions/sample.rb
69
- - lib/green_midget/green_midget.rb
53
+ - lib/green_midget/heuristic_checks.rb
70
54
  - lib/green_midget/logger.rb
71
55
  - lib/green_midget/models/countable.rb
72
56
  - lib/green_midget/models/examples.rb
73
57
  - lib/green_midget/models/features.rb
74
- - lib/green_midget/models/green_midget_records.rb
58
+ - lib/green_midget/models/records.rb
75
59
  - lib/green_midget/models/words.rb
76
60
  - lib/green_midget/url_detection.rb
77
61
  - lib/green_midget/version.rb
@@ -105,7 +89,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
105
89
  - !ruby/object:Gem::Version
106
90
  version: '0'
107
91
  requirements: []
108
- rubyforge_project: green_midget
92
+ rubyforge_project:
109
93
  rubygems_version: 1.6.2
110
94
  signing_key:
111
95
  specification_version: 3
data/.document DELETED
@@ -1,5 +0,0 @@
1
- lib/**/*.rb
2
- bin/*
3
- -
4
- features/**/*.feature
5
- LICENSE.txt
@@ -1,6 +0,0 @@
1
- # Copyright (c) 2011, SoundCloud Ltd., Nikola Chochkov
2
- require File.join(File.dirname(__FILE__), 'constants')
3
-
4
- module GreenMidget
5
- include Constants
6
- end
@@ -1,49 +0,0 @@
1
- # Copyright (c) 2011, SoundCloud Ltd., Nikola Chochkov
2
- module GreenMidget
3
- class GreenMidgetRecords < ActiveRecord::Base
4
- set_table_name :green_midget_records
5
-
6
- def self.fetch_all(words = [])
7
- words_keys = Words.record_keys(words)
8
-
9
- pairs = connection.select_rows(
10
- "SELECT `key`, `value` FROM %s WHERE `key` IN ('%s') OR `key` LIKE '%s' OR `key` LIKE '%s'" %
11
- [ table_name, words_keys.join("', '"), "#{ Features.prefix }%", "#{ Examples.prefix }%" ]
12
- )
13
-
14
- @@cache = pairs.inject({}) do |memo, pair|
15
- memo[pair.first] = pair.last
16
- memo
17
- end
18
-
19
- words_keys.inject(@@cache) do |memo, word|
20
- memo[word] ||= ''
21
- memo
22
- end
23
- end
24
-
25
- def self.[](key)
26
- key = key.to_s
27
- @@cache ||= {}
28
- @@cache[key] || @@cache[key] = connection.select_value("SELECT `value` FROM #{ table_name } WHERE `key` = '#{ key }'") || @@cache[key] = ''
29
- end
30
-
31
- def self.increment(keys)
32
- keys = Array(keys)
33
- records = all(:conditions => [ "`key` IN (?)", keys ])
34
-
35
- @@objects = records.inject({}) do |memo, record|
36
- memo[record.key] = record
37
- memo
38
- end
39
-
40
- keys.inject(@@objects) do |memo, key|
41
- memo[key] ||= new(:key => key, :value => '0.0')
42
- memo
43
- end
44
-
45
- @@objects.each { |key, record| record.update_attribute(:value, record.value.to_f + 1) }
46
- @@objects = {}
47
- end
48
- end
49
- end