linnaeus 1.0.3 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b380bea103cd6825850436184c4ab35c9c07c089
4
+ data.tar.gz: 147508ab8b2610250a82acf5d11721040d9c721d
5
+ SHA512:
6
+ metadata.gz: 12ef0f679970ad545fc3942f26a709626ed43647bbeb454656fb39c908691434746dd3cf21c3cc658be534a814c73aa5572af1e1c466c1f89048d326516dc97f
7
+ data.tar.gz: d982a11641d6be711117b270850d7309881ad74b4e5278f484bced8f1703a36bd058e7de093883f520916c378eba353ae3c01b331461301f41139699657bed8a
data/.rspec CHANGED
@@ -1 +1,3 @@
1
- --color
1
+ --colour
2
+ --order rand
3
+ -f d
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.0.0-p0
data/.travis.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  language: ruby
2
2
  rvm:
3
+ - 2.0.0
3
4
  - 1.9.3
4
5
  - 1.9.2
5
6
  services:
data/Gemfile.lock CHANGED
@@ -1,20 +1,51 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
+ addressable (2.3.5)
5
+ builder (3.2.2)
4
6
  diff-lcs (1.1.3)
7
+ faraday (0.8.7)
8
+ multipart-post (~> 1.1)
5
9
  git (1.2.5)
6
- jeweler (1.8.4)
10
+ github_api (0.10.1)
11
+ addressable
12
+ faraday (~> 0.8.1)
13
+ hashie (>= 1.2)
14
+ multi_json (~> 1.4)
15
+ nokogiri (~> 1.5.2)
16
+ oauth2
17
+ hashie (2.0.5)
18
+ highline (1.6.19)
19
+ httpauth (0.2.0)
20
+ jeweler (1.8.6)
21
+ builder
7
22
  bundler (~> 1.0)
8
23
  git (>= 1.2.5)
24
+ github_api (= 0.10.1)
25
+ highline (>= 1.6.15)
26
+ nokogiri (= 1.5.10)
9
27
  rake
10
28
  rdoc
11
- json (1.7.5)
12
- multi_json (1.3.6)
13
- rake (0.9.2.2)
14
- rdoc (3.12)
29
+ json (1.8.0)
30
+ jwt (0.1.8)
31
+ multi_json (>= 1.5)
32
+ multi_json (1.7.7)
33
+ multi_xml (0.5.4)
34
+ multipart-post (1.2.0)
35
+ nokogiri (1.5.10)
36
+ oauth2 (0.9.2)
37
+ faraday (~> 0.8)
38
+ httpauth (~> 0.2)
39
+ jwt (~> 0.1.4)
40
+ multi_json (~> 1.0)
41
+ multi_xml (~> 0.5)
42
+ rack (~> 1.2)
43
+ rack (1.5.2)
44
+ rake (10.1.0)
45
+ rdoc (3.12.2)
15
46
  json (~> 1.4)
16
- redcarpet (2.2.2)
17
- redis (3.0.2)
47
+ redcarpet (3.0.0)
48
+ redis (3.0.4)
18
49
  rspec (2.11.0)
19
50
  rspec-core (~> 2.11.0)
20
51
  rspec-expectations (~> 2.11.0)
@@ -28,7 +59,7 @@ GEM
28
59
  simplecov-html (~> 0.7.1)
29
60
  simplecov-html (0.7.1)
30
61
  stemmer (1.0.1)
31
- yard (0.8.3)
62
+ yard (0.8.6.2)
32
63
 
33
64
  PLATFORMS
34
65
  ruby
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.3
1
+ 1.1.0
@@ -1,10 +1,5 @@
1
1
  # The redis persistence layer.
2
2
  class Linnaeus::Persistence < Linnaeus
3
- # The Set (in the redis sense) of categories are stored in this key.
4
- CATEGORIES_KEY = 'Linnaeus:category'
5
- # The base key for a category in the redis corpus. Word occurrence counts for a category appear under here.
6
- BASE_CATEGORY_KEY = 'Linnaeus:cat:'
7
-
8
3
  attr_accessor :redis
9
4
 
10
5
  def initialize(opts = {})
@@ -17,9 +12,12 @@ class Linnaeus::Persistence < Linnaeus
17
12
  redis_timeout: 5.0,
18
13
  redis_password: nil,
19
14
  redis_id: nil,
20
- redis_tcp_keepalive: 0
15
+ redis_tcp_keepalive: 0,
16
+ scope: nil
21
17
  }.merge(opts)
22
18
 
19
+ @scope = options[:scope]
20
+
23
21
  @redis = Redis.new(
24
22
  host: options[:redis_host],
25
23
  port: options[:redis_port],
@@ -41,7 +39,7 @@ class Linnaeus::Persistence < Linnaeus
41
39
  # categories::
42
40
  # A string or array of categories.
43
41
  def add_categories(categories)
44
- @redis.sadd CATEGORIES_KEY, categories
42
+ @redis.sadd category_collection_key, categories
45
43
  end
46
44
 
47
45
  # Remove categories from the bayesian corpus
@@ -50,7 +48,7 @@ class Linnaeus::Persistence < Linnaeus
50
48
  # categories::
51
49
  # A string or array of categories.
52
50
  def remove_category(category)
53
- @redis.srem CATEGORIES_KEY, category
51
+ @redis.srem category_collection_key, category
54
52
  end
55
53
 
56
54
  # Get categories from the bayesian corpus
@@ -59,7 +57,7 @@ class Linnaeus::Persistence < Linnaeus
59
57
  # categories::
60
58
  # A string or array of categories.
61
59
  def get_categories
62
- @redis.smembers CATEGORIES_KEY
60
+ @redis.smembers category_collection_key
63
61
  end
64
62
 
65
63
  # Get a list of words with their number of occurrences.
@@ -71,7 +69,7 @@ class Linnaeus::Persistence < Linnaeus
71
69
  # == Returns
72
70
  # A hash with the word counts for this category.
73
71
  def get_words_with_count_for_category(category)
74
- @redis.hgetall BASE_CATEGORY_KEY + category
72
+ @redis.hgetall base_category_key + category
75
73
  end
76
74
 
77
75
  # Clear all training data from the backend.
@@ -79,6 +77,15 @@ class Linnaeus::Persistence < Linnaeus
79
77
  @redis.flushdb
80
78
  end
81
79
 
80
+ # Clear training data for the scope associated with this instance.
81
+ def clear_training_data
82
+ keys = @redis.keys(base_key.join(':') + '*')
83
+
84
+ keys.each do |key|
85
+ @redis.del key
86
+ end
87
+ end
88
+
82
89
  # Increment word counts within a category
83
90
  #
84
91
  # == Parameters
@@ -88,7 +95,7 @@ class Linnaeus::Persistence < Linnaeus
88
95
  # A hash containing a count of the number of word occurences in a document
89
96
  def increment_word_counts_for_category(category, word_occurrences)
90
97
  word_occurrences.each do|word,count|
91
- @redis.hincrby BASE_CATEGORY_KEY + category, word, count
98
+ @redis.hincrby base_category_key + category, word, count
92
99
  end
93
100
  end
94
101
 
@@ -101,7 +108,7 @@ class Linnaeus::Persistence < Linnaeus
101
108
  # A hash containing a count of the number of word occurences in a document
102
109
  def decrement_word_counts_for_category(category, word_occurrences)
103
110
  word_occurrences.each do|word,count|
104
- @redis.hincrby BASE_CATEGORY_KEY + category, word, - count
111
+ @redis.hincrby base_category_key + category, word, - count
105
112
  end
106
113
  end
107
114
 
@@ -111,15 +118,32 @@ class Linnaeus::Persistence < Linnaeus
111
118
  # category::
112
119
  # A string representing a category.
113
120
  def cleanup_empty_words_in_category(category)
114
- word_counts = @redis.hgetall BASE_CATEGORY_KEY + category
121
+ word_counts = @redis.hgetall base_category_key + category
115
122
  empty_words = word_counts.select{|word, count| count.to_i <= 0}
116
123
  if empty_words == word_counts
117
- @redis.del BASE_CATEGORY_KEY + category
124
+ @redis.del base_category_key + category
118
125
  else
119
126
  if empty_words.any?
120
- @redis.hdel BASE_CATEGORY_KEY + category, empty_words.keys
127
+ @redis.hdel base_category_key + category, empty_words.keys
121
128
  end
122
129
  end
123
130
  end
124
131
 
132
+ private
133
+
134
+ # The Set (in the redis sense) of categories are stored in this key.
135
+ def category_collection_key
136
+ [ base_key, 'category' ].compact.join(':')
137
+ end
138
+
139
+ # The base key for a category within a scope in the redis corpus. Word
140
+ # occurrence counts for a category appear under here.
141
+ def base_category_key
142
+ [ base_key, 'cat:' ].flatten.join(':')
143
+ end
144
+
145
+ def base_key
146
+ [ 'Linnaeus', @scope ].compact
147
+ end
148
+
125
149
  end
data/linnaeus.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "linnaeus"
8
- s.version = "1.0.3"
8
+ s.version = "1.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["djcp"]
12
- s.date = "2012-11-02"
12
+ s.date = "2013-07-12"
13
13
  s.description = "Linnaeus provides a redis-backed Bayesian classifier. Words are stemmed, stopwords are stopped, and redis is used to allow for persistent and concurrent training and classification."
14
14
  s.email = "dan@collispuro.net"
15
15
  s.extra_rdoc_files = [
@@ -19,6 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.files = [
20
20
  ".document",
21
21
  ".rspec",
22
+ ".ruby-version",
22
23
  ".travis.yml",
23
24
  "Gemfile",
24
25
  "Gemfile.lock",
@@ -43,11 +44,11 @@ Gem::Specification.new do |s|
43
44
  s.homepage = "http://github.com/djcp/linnaeus"
44
45
  s.licenses = ["MIT"]
45
46
  s.require_paths = ["lib"]
46
- s.rubygems_version = "1.8.24"
47
+ s.rubygems_version = "2.0.0"
47
48
  s.summary = "Another redis-backed Bayesian classifier"
48
49
 
49
50
  if s.respond_to? :specification_version then
50
- s.specification_version = 3
51
+ s.specification_version = 4
51
52
 
52
53
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
53
54
  s.add_runtime_dependency(%q<redis>, ["~> 3.0.0"])
@@ -3,7 +3,58 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
  describe Linnaeus::Persistence do
4
4
  before do
5
5
  lp = get_linnaeus_persistence
6
- lp.clear_all_training_data
6
+ lp.clear_training_data
7
+ end
8
+
9
+ it 'sets keys properly with defaults' do
10
+ lp2 = get_linnaeus_persistence
11
+ train_a_document_in('foobar')
12
+ lp2.redis.keys('*').should eq ['Linnaeus:category', 'Linnaeus:cat:foobar']
13
+ end
14
+
15
+ context "custom scopes" do
16
+ it 'sets keys properly' do
17
+ lp2 = get_linnaeus_persistence(scope: 'new-scope')
18
+ lp2.clear_all_training_data
19
+
20
+ train_a_document_in('foobar', scope: 'new-scope')
21
+
22
+ lp2.redis.keys('*').sort.should eq [
23
+ 'Linnaeus:new-scope:cat:foobar', 'Linnaeus:new-scope:category'
24
+ ]
25
+ end
26
+
27
+ it 'can clear scoped training data separately' do
28
+ lp = get_linnaeus_persistence
29
+
30
+ train_a_document_in('foobar')
31
+
32
+ lp2 = get_linnaeus_persistence(scope: 'new-scope')
33
+
34
+ train_a_document_in('foobar', scope: 'new-scope')
35
+
36
+ lp.redis.keys.sort.should eq [
37
+ "Linnaeus:cat:foobar", "Linnaeus:category",
38
+ "Linnaeus:new-scope:cat:foobar", "Linnaeus:new-scope:category"
39
+ ]
40
+
41
+ lp2.clear_training_data
42
+
43
+ lp.redis.keys.sort.should eq [
44
+ "Linnaeus:cat:foobar", "Linnaeus:category"
45
+ ]
46
+ end
47
+
48
+ it 'stores categories successfully into different scopes' do
49
+ lp = get_linnaeus_persistence
50
+ add_categories lp
51
+
52
+ lp2 = get_linnaeus_persistence(scope: 'new-scope')
53
+ add_categories lp2, ['slack' , 'frop']
54
+
55
+ lp2.get_categories.sort.should eq ['frop', 'slack']
56
+ lp.get_categories.sort.should eq ['bar','baz','foo']
57
+ end
7
58
  end
8
59
 
9
60
  it '#clear_all_training_data' do
@@ -64,21 +115,21 @@ describe Linnaeus::Persistence do
64
115
  lp.get_words_with_count_for_category('testcategory').should eq ({})
65
116
  end
66
117
 
67
- def add_categories(lp)
68
- lp.add_categories(['foo','bar','baz','foo', 'bar'])
118
+ def add_categories(lp, categories = ['foo','bar','baz','foo', 'bar'])
119
+ lp.add_categories(categories)
69
120
  end
70
121
 
71
- def get_linnaeus_persistence
72
- @lp ||= Linnaeus::Persistence.new
122
+ def get_linnaeus_persistence(options = {})
123
+ Linnaeus::Persistence.new(options)
73
124
  end
74
125
 
75
- def train_a_document_in(category)
76
- lt = Linnaeus::Trainer.new
126
+ def train_a_document_in(category, options = {})
127
+ lt = Linnaeus::Trainer.new(options)
77
128
  lt.train category, document
78
129
  end
79
130
 
80
- def untrain_a_document_in(category)
81
- lt = Linnaeus::Trainer.new
131
+ def untrain_a_document_in(category, options = {})
132
+ lt = Linnaeus::Trainer.new(options)
82
133
  lt.untrain category, document
83
134
  end
84
135
 
@@ -17,17 +17,8 @@ describe Linnaeus::Trainer do
17
17
  subject.count_word_occurrences.should == { }
18
18
  end
19
19
 
20
- it 'should train on documents properly' do
21
- lp = Linnaeus::Persistence.new
22
- lp.clear_all_training_data
23
- subject.train 'fruit', grape
24
- subject.train 'fruit', orange
25
- lp.get_words_with_count_for_category('fruit').should eq(
26
- {
27
- "grape"=>"1", "purpl"=>"1", "blue"=>"1", "green"=>"1",
28
- "fruit"=>"2", "sweet"=>"2", "wine"=>"1", "oval"=>"1",
29
- "orang"=>"1", "round"=>"1", "citru"=>"1"
30
- })
20
+ it 'should train documents properly' do
21
+ train_documents_properly
31
22
  end
32
23
 
33
24
  it 'should partially untrain properly' do
@@ -57,6 +48,26 @@ describe Linnaeus::Trainer do
57
48
  end
58
49
  end
59
50
 
51
+ context 'with a custom scope' do
52
+ it 'should train on documents properly' do
53
+ train_documents_properly(scope: 'new-scope')
54
+ end
55
+ end
56
+
57
+ def train_documents_properly(options = {})
58
+ lp = Linnaeus::Persistence.new(options)
59
+ lp.clear_all_training_data
60
+ subject = described_class.new(options)
61
+ subject.train 'fruit', grape
62
+ subject.train 'fruit', orange
63
+ lp.get_words_with_count_for_category('fruit').should eq(
64
+ {
65
+ "grape"=>"1", "purpl"=>"1", "blue"=>"1", "green"=>"1",
66
+ "fruit"=>"2", "sweet"=>"2", "wine"=>"1", "oval"=>"1",
67
+ "orang"=>"1", "round"=>"1", "citru"=>"1"
68
+ })
69
+ end
70
+
60
71
  def grape
61
72
  'grape purple blue green fruit sweet wine oval'
62
73
  end
metadata CHANGED
@@ -1,20 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linnaeus
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
5
- prerelease:
4
+ version: 1.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - djcp
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-11-02 00:00:00.000000000 Z
11
+ date: 2013-07-12 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: redis
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ~>
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ~>
28
25
  - !ruby/object:Gem::Version
@@ -30,7 +27,6 @@ dependencies:
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: stemmer
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ~>
36
32
  - !ruby/object:Gem::Version
@@ -38,7 +34,6 @@ dependencies:
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ~>
44
39
  - !ruby/object:Gem::Version
@@ -46,7 +41,6 @@ dependencies:
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: rspec
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
45
  - - ~>
52
46
  - !ruby/object:Gem::Version
@@ -54,7 +48,6 @@ dependencies:
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
52
  - - ~>
60
53
  - !ruby/object:Gem::Version
@@ -62,7 +55,6 @@ dependencies:
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: yard
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
59
  - - ~>
68
60
  - !ruby/object:Gem::Version
@@ -70,7 +62,6 @@ dependencies:
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
66
  - - ~>
76
67
  - !ruby/object:Gem::Version
@@ -78,7 +69,6 @@ dependencies:
78
69
  - !ruby/object:Gem::Dependency
79
70
  name: rdoc
80
71
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
72
  requirements:
83
73
  - - ~>
84
74
  - !ruby/object:Gem::Version
@@ -86,7 +76,6 @@ dependencies:
86
76
  type: :development
87
77
  prerelease: false
88
78
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
79
  requirements:
91
80
  - - ~>
92
81
  - !ruby/object:Gem::Version
@@ -94,65 +83,57 @@ dependencies:
94
83
  - !ruby/object:Gem::Dependency
95
84
  name: bundler
96
85
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
86
  requirements:
99
- - - ! '>='
87
+ - - '>='
100
88
  - !ruby/object:Gem::Version
101
89
  version: '0'
102
90
  type: :development
103
91
  prerelease: false
104
92
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
93
  requirements:
107
- - - ! '>='
94
+ - - '>='
108
95
  - !ruby/object:Gem::Version
109
96
  version: '0'
110
97
  - !ruby/object:Gem::Dependency
111
98
  name: jeweler
112
99
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
100
  requirements:
115
- - - ! '>='
101
+ - - '>='
116
102
  - !ruby/object:Gem::Version
117
103
  version: '0'
118
104
  type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
107
  requirements:
123
- - - ! '>='
108
+ - - '>='
124
109
  - !ruby/object:Gem::Version
125
110
  version: '0'
126
111
  - !ruby/object:Gem::Dependency
127
112
  name: simplecov
128
113
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
114
  requirements:
131
- - - ! '>='
115
+ - - '>='
132
116
  - !ruby/object:Gem::Version
133
117
  version: '0'
134
118
  type: :development
135
119
  prerelease: false
136
120
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
121
  requirements:
139
- - - ! '>='
122
+ - - '>='
140
123
  - !ruby/object:Gem::Version
141
124
  version: '0'
142
125
  - !ruby/object:Gem::Dependency
143
126
  name: redcarpet
144
127
  requirement: !ruby/object:Gem::Requirement
145
- none: false
146
128
  requirements:
147
- - - ! '>='
129
+ - - '>='
148
130
  - !ruby/object:Gem::Version
149
131
  version: '0'
150
132
  type: :development
151
133
  prerelease: false
152
134
  version_requirements: !ruby/object:Gem::Requirement
153
- none: false
154
135
  requirements:
155
- - - ! '>='
136
+ - - '>='
156
137
  - !ruby/object:Gem::Version
157
138
  version: '0'
158
139
  description: Linnaeus provides a redis-backed Bayesian classifier. Words are stemmed,
@@ -167,6 +148,7 @@ extra_rdoc_files:
167
148
  files:
168
149
  - .document
169
150
  - .rspec
151
+ - .ruby-version
170
152
  - .travis.yml
171
153
  - Gemfile
172
154
  - Gemfile.lock
@@ -190,29 +172,25 @@ files:
190
172
  homepage: http://github.com/djcp/linnaeus
191
173
  licenses:
192
174
  - MIT
175
+ metadata: {}
193
176
  post_install_message:
194
177
  rdoc_options: []
195
178
  require_paths:
196
179
  - lib
197
180
  required_ruby_version: !ruby/object:Gem::Requirement
198
- none: false
199
181
  requirements:
200
- - - ! '>='
182
+ - - '>='
201
183
  - !ruby/object:Gem::Version
202
184
  version: '0'
203
- segments:
204
- - 0
205
- hash: 2790078718663664512
206
185
  required_rubygems_version: !ruby/object:Gem::Requirement
207
- none: false
208
186
  requirements:
209
- - - ! '>='
187
+ - - '>='
210
188
  - !ruby/object:Gem::Version
211
189
  version: '0'
212
190
  requirements: []
213
191
  rubyforge_project:
214
- rubygems_version: 1.8.24
192
+ rubygems_version: 2.0.0
215
193
  signing_key:
216
- specification_version: 3
194
+ specification_version: 4
217
195
  summary: Another redis-backed Bayesian classifier
218
196
  test_files: []