linnaeus 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b380bea103cd6825850436184c4ab35c9c07c089
4
+ data.tar.gz: 147508ab8b2610250a82acf5d11721040d9c721d
5
+ SHA512:
6
+ metadata.gz: 12ef0f679970ad545fc3942f26a709626ed43647bbeb454656fb39c908691434746dd3cf21c3cc658be534a814c73aa5572af1e1c466c1f89048d326516dc97f
7
+ data.tar.gz: d982a11641d6be711117b270850d7309881ad74b4e5278f484bced8f1703a36bd058e7de093883f520916c378eba353ae3c01b331461301f41139699657bed8a
data/.rspec CHANGED
@@ -1 +1,3 @@
1
- --color
1
+ --colour
2
+ --order rand
3
+ -f d
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.0.0-p0
data/.travis.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  language: ruby
2
2
  rvm:
3
+ - 2.0.0
3
4
  - 1.9.3
4
5
  - 1.9.2
5
6
  services:
data/Gemfile.lock CHANGED
@@ -1,20 +1,51 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
+ addressable (2.3.5)
5
+ builder (3.2.2)
4
6
  diff-lcs (1.1.3)
7
+ faraday (0.8.7)
8
+ multipart-post (~> 1.1)
5
9
  git (1.2.5)
6
- jeweler (1.8.4)
10
+ github_api (0.10.1)
11
+ addressable
12
+ faraday (~> 0.8.1)
13
+ hashie (>= 1.2)
14
+ multi_json (~> 1.4)
15
+ nokogiri (~> 1.5.2)
16
+ oauth2
17
+ hashie (2.0.5)
18
+ highline (1.6.19)
19
+ httpauth (0.2.0)
20
+ jeweler (1.8.6)
21
+ builder
7
22
  bundler (~> 1.0)
8
23
  git (>= 1.2.5)
24
+ github_api (= 0.10.1)
25
+ highline (>= 1.6.15)
26
+ nokogiri (= 1.5.10)
9
27
  rake
10
28
  rdoc
11
- json (1.7.5)
12
- multi_json (1.3.6)
13
- rake (0.9.2.2)
14
- rdoc (3.12)
29
+ json (1.8.0)
30
+ jwt (0.1.8)
31
+ multi_json (>= 1.5)
32
+ multi_json (1.7.7)
33
+ multi_xml (0.5.4)
34
+ multipart-post (1.2.0)
35
+ nokogiri (1.5.10)
36
+ oauth2 (0.9.2)
37
+ faraday (~> 0.8)
38
+ httpauth (~> 0.2)
39
+ jwt (~> 0.1.4)
40
+ multi_json (~> 1.0)
41
+ multi_xml (~> 0.5)
42
+ rack (~> 1.2)
43
+ rack (1.5.2)
44
+ rake (10.1.0)
45
+ rdoc (3.12.2)
15
46
  json (~> 1.4)
16
- redcarpet (2.2.2)
17
- redis (3.0.2)
47
+ redcarpet (3.0.0)
48
+ redis (3.0.4)
18
49
  rspec (2.11.0)
19
50
  rspec-core (~> 2.11.0)
20
51
  rspec-expectations (~> 2.11.0)
@@ -28,7 +59,7 @@ GEM
28
59
  simplecov-html (~> 0.7.1)
29
60
  simplecov-html (0.7.1)
30
61
  stemmer (1.0.1)
31
- yard (0.8.3)
62
+ yard (0.8.6.2)
32
63
 
33
64
  PLATFORMS
34
65
  ruby
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.3
1
+ 1.1.0
@@ -1,10 +1,5 @@
1
1
  # The redis persistence layer.
2
2
  class Linnaeus::Persistence < Linnaeus
3
- # The Set (in the redis sense) of categories are stored in this key.
4
- CATEGORIES_KEY = 'Linnaeus:category'
5
- # The base key for a category in the redis corpus. Word occurrence counts for a category appear under here.
6
- BASE_CATEGORY_KEY = 'Linnaeus:cat:'
7
-
8
3
  attr_accessor :redis
9
4
 
10
5
  def initialize(opts = {})
@@ -17,9 +12,12 @@ class Linnaeus::Persistence < Linnaeus
17
12
  redis_timeout: 5.0,
18
13
  redis_password: nil,
19
14
  redis_id: nil,
20
- redis_tcp_keepalive: 0
15
+ redis_tcp_keepalive: 0,
16
+ scope: nil
21
17
  }.merge(opts)
22
18
 
19
+ @scope = options[:scope]
20
+
23
21
  @redis = Redis.new(
24
22
  host: options[:redis_host],
25
23
  port: options[:redis_port],
@@ -41,7 +39,7 @@ class Linnaeus::Persistence < Linnaeus
41
39
  # categories::
42
40
  # A string or array of categories.
43
41
  def add_categories(categories)
44
- @redis.sadd CATEGORIES_KEY, categories
42
+ @redis.sadd category_collection_key, categories
45
43
  end
46
44
 
47
45
  # Remove categories from the bayesian corpus
@@ -50,7 +48,7 @@ class Linnaeus::Persistence < Linnaeus
50
48
  # categories::
51
49
  # A string or array of categories.
52
50
  def remove_category(category)
53
- @redis.srem CATEGORIES_KEY, category
51
+ @redis.srem category_collection_key, category
54
52
  end
55
53
 
56
54
  # Get categories from the bayesian corpus
@@ -59,7 +57,7 @@ class Linnaeus::Persistence < Linnaeus
59
57
  # categories::
60
58
  # A string or array of categories.
61
59
  def get_categories
62
- @redis.smembers CATEGORIES_KEY
60
+ @redis.smembers category_collection_key
63
61
  end
64
62
 
65
63
  # Get a list of words with their number of occurrences.
@@ -71,7 +69,7 @@ class Linnaeus::Persistence < Linnaeus
71
69
  # == Returns
72
70
  # A hash with the word counts for this category.
73
71
  def get_words_with_count_for_category(category)
74
- @redis.hgetall BASE_CATEGORY_KEY + category
72
+ @redis.hgetall base_category_key + category
75
73
  end
76
74
 
77
75
  # Clear all training data from the backend.
@@ -79,6 +77,15 @@ class Linnaeus::Persistence < Linnaeus
79
77
  @redis.flushdb
80
78
  end
81
79
 
80
+ # Clear training data for the scope associated with this instance.
81
+ def clear_training_data
82
+ keys = @redis.keys(base_key.join(':') + '*')
83
+
84
+ keys.each do |key|
85
+ @redis.del key
86
+ end
87
+ end
88
+
82
89
  # Increment word counts within a category
83
90
  #
84
91
  # == Parameters
@@ -88,7 +95,7 @@ class Linnaeus::Persistence < Linnaeus
88
95
  # A hash containing a count of the number of word occurences in a document
89
96
  def increment_word_counts_for_category(category, word_occurrences)
90
97
  word_occurrences.each do|word,count|
91
- @redis.hincrby BASE_CATEGORY_KEY + category, word, count
98
+ @redis.hincrby base_category_key + category, word, count
92
99
  end
93
100
  end
94
101
 
@@ -101,7 +108,7 @@ class Linnaeus::Persistence < Linnaeus
101
108
  # A hash containing a count of the number of word occurences in a document
102
109
  def decrement_word_counts_for_category(category, word_occurrences)
103
110
  word_occurrences.each do|word,count|
104
- @redis.hincrby BASE_CATEGORY_KEY + category, word, - count
111
+ @redis.hincrby base_category_key + category, word, - count
105
112
  end
106
113
  end
107
114
 
@@ -111,15 +118,32 @@ class Linnaeus::Persistence < Linnaeus
111
118
  # category::
112
119
  # A string representing a category.
113
120
  def cleanup_empty_words_in_category(category)
114
- word_counts = @redis.hgetall BASE_CATEGORY_KEY + category
121
+ word_counts = @redis.hgetall base_category_key + category
115
122
  empty_words = word_counts.select{|word, count| count.to_i <= 0}
116
123
  if empty_words == word_counts
117
- @redis.del BASE_CATEGORY_KEY + category
124
+ @redis.del base_category_key + category
118
125
  else
119
126
  if empty_words.any?
120
- @redis.hdel BASE_CATEGORY_KEY + category, empty_words.keys
127
+ @redis.hdel base_category_key + category, empty_words.keys
121
128
  end
122
129
  end
123
130
  end
124
131
 
132
+ private
133
+
134
+ # The Set (in the redis sense) of categories are stored in this key.
135
+ def category_collection_key
136
+ [ base_key, 'category' ].compact.join(':')
137
+ end
138
+
139
+ # The base key for a category within a scope in the redis corpus. Word
140
+ # occurrence counts for a category appear under here.
141
+ def base_category_key
142
+ [ base_key, 'cat:' ].flatten.join(':')
143
+ end
144
+
145
+ def base_key
146
+ [ 'Linnaeus', @scope ].compact
147
+ end
148
+
125
149
  end
data/linnaeus.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "linnaeus"
8
- s.version = "1.0.3"
8
+ s.version = "1.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["djcp"]
12
- s.date = "2012-11-02"
12
+ s.date = "2013-07-12"
13
13
  s.description = "Linnaeus provides a redis-backed Bayesian classifier. Words are stemmed, stopwords are stopped, and redis is used to allow for persistent and concurrent training and classification."
14
14
  s.email = "dan@collispuro.net"
15
15
  s.extra_rdoc_files = [
@@ -19,6 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.files = [
20
20
  ".document",
21
21
  ".rspec",
22
+ ".ruby-version",
22
23
  ".travis.yml",
23
24
  "Gemfile",
24
25
  "Gemfile.lock",
@@ -43,11 +44,11 @@ Gem::Specification.new do |s|
43
44
  s.homepage = "http://github.com/djcp/linnaeus"
44
45
  s.licenses = ["MIT"]
45
46
  s.require_paths = ["lib"]
46
- s.rubygems_version = "1.8.24"
47
+ s.rubygems_version = "2.0.0"
47
48
  s.summary = "Another redis-backed Bayesian classifier"
48
49
 
49
50
  if s.respond_to? :specification_version then
50
- s.specification_version = 3
51
+ s.specification_version = 4
51
52
 
52
53
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
53
54
  s.add_runtime_dependency(%q<redis>, ["~> 3.0.0"])
@@ -3,7 +3,58 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
  describe Linnaeus::Persistence do
4
4
  before do
5
5
  lp = get_linnaeus_persistence
6
- lp.clear_all_training_data
6
+ lp.clear_training_data
7
+ end
8
+
9
+ it 'sets keys properly with defaults' do
10
+ lp2 = get_linnaeus_persistence
11
+ train_a_document_in('foobar')
12
+ lp2.redis.keys('*').should eq ['Linnaeus:category', 'Linnaeus:cat:foobar']
13
+ end
14
+
15
+ context "custom scopes" do
16
+ it 'sets keys properly' do
17
+ lp2 = get_linnaeus_persistence(scope: 'new-scope')
18
+ lp2.clear_all_training_data
19
+
20
+ train_a_document_in('foobar', scope: 'new-scope')
21
+
22
+ lp2.redis.keys('*').sort.should eq [
23
+ 'Linnaeus:new-scope:cat:foobar', 'Linnaeus:new-scope:category'
24
+ ]
25
+ end
26
+
27
+ it 'can clear scoped training data separately' do
28
+ lp = get_linnaeus_persistence
29
+
30
+ train_a_document_in('foobar')
31
+
32
+ lp2 = get_linnaeus_persistence(scope: 'new-scope')
33
+
34
+ train_a_document_in('foobar', scope: 'new-scope')
35
+
36
+ lp.redis.keys.sort.should eq [
37
+ "Linnaeus:cat:foobar", "Linnaeus:category",
38
+ "Linnaeus:new-scope:cat:foobar", "Linnaeus:new-scope:category"
39
+ ]
40
+
41
+ lp2.clear_training_data
42
+
43
+ lp.redis.keys.sort.should eq [
44
+ "Linnaeus:cat:foobar", "Linnaeus:category"
45
+ ]
46
+ end
47
+
48
+ it 'stores categories successfully into different scopes' do
49
+ lp = get_linnaeus_persistence
50
+ add_categories lp
51
+
52
+ lp2 = get_linnaeus_persistence(scope: 'new-scope')
53
+ add_categories lp2, ['slack' , 'frop']
54
+
55
+ lp2.get_categories.sort.should eq ['frop', 'slack']
56
+ lp.get_categories.sort.should eq ['bar','baz','foo']
57
+ end
7
58
  end
8
59
 
9
60
  it '#clear_all_training_data' do
@@ -64,21 +115,21 @@ describe Linnaeus::Persistence do
64
115
  lp.get_words_with_count_for_category('testcategory').should eq ({})
65
116
  end
66
117
 
67
- def add_categories(lp)
68
- lp.add_categories(['foo','bar','baz','foo', 'bar'])
118
+ def add_categories(lp, categories = ['foo','bar','baz','foo', 'bar'])
119
+ lp.add_categories(categories)
69
120
  end
70
121
 
71
- def get_linnaeus_persistence
72
- @lp ||= Linnaeus::Persistence.new
122
+ def get_linnaeus_persistence(options = {})
123
+ Linnaeus::Persistence.new(options)
73
124
  end
74
125
 
75
- def train_a_document_in(category)
76
- lt = Linnaeus::Trainer.new
126
+ def train_a_document_in(category, options = {})
127
+ lt = Linnaeus::Trainer.new(options)
77
128
  lt.train category, document
78
129
  end
79
130
 
80
- def untrain_a_document_in(category)
81
- lt = Linnaeus::Trainer.new
131
+ def untrain_a_document_in(category, options = {})
132
+ lt = Linnaeus::Trainer.new(options)
82
133
  lt.untrain category, document
83
134
  end
84
135
 
@@ -17,17 +17,8 @@ describe Linnaeus::Trainer do
17
17
  subject.count_word_occurrences.should == { }
18
18
  end
19
19
 
20
- it 'should train on documents properly' do
21
- lp = Linnaeus::Persistence.new
22
- lp.clear_all_training_data
23
- subject.train 'fruit', grape
24
- subject.train 'fruit', orange
25
- lp.get_words_with_count_for_category('fruit').should eq(
26
- {
27
- "grape"=>"1", "purpl"=>"1", "blue"=>"1", "green"=>"1",
28
- "fruit"=>"2", "sweet"=>"2", "wine"=>"1", "oval"=>"1",
29
- "orang"=>"1", "round"=>"1", "citru"=>"1"
30
- })
20
+ it 'should train documents properly' do
21
+ train_documents_properly
31
22
  end
32
23
 
33
24
  it 'should partially untrain properly' do
@@ -57,6 +48,26 @@ describe Linnaeus::Trainer do
57
48
  end
58
49
  end
59
50
 
51
+ context 'with a custom scope' do
52
+ it 'should train on documents properly' do
53
+ train_documents_properly(scope: 'new-scope')
54
+ end
55
+ end
56
+
57
+ def train_documents_properly(options = {})
58
+ lp = Linnaeus::Persistence.new(options)
59
+ lp.clear_all_training_data
60
+ subject = described_class.new(options)
61
+ subject.train 'fruit', grape
62
+ subject.train 'fruit', orange
63
+ lp.get_words_with_count_for_category('fruit').should eq(
64
+ {
65
+ "grape"=>"1", "purpl"=>"1", "blue"=>"1", "green"=>"1",
66
+ "fruit"=>"2", "sweet"=>"2", "wine"=>"1", "oval"=>"1",
67
+ "orang"=>"1", "round"=>"1", "citru"=>"1"
68
+ })
69
+ end
70
+
60
71
  def grape
61
72
  'grape purple blue green fruit sweet wine oval'
62
73
  end
metadata CHANGED
@@ -1,20 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linnaeus
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
5
- prerelease:
4
+ version: 1.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - djcp
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-11-02 00:00:00.000000000 Z
11
+ date: 2013-07-12 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: redis
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ~>
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ~>
28
25
  - !ruby/object:Gem::Version
@@ -30,7 +27,6 @@ dependencies:
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: stemmer
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ~>
36
32
  - !ruby/object:Gem::Version
@@ -38,7 +34,6 @@ dependencies:
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ~>
44
39
  - !ruby/object:Gem::Version
@@ -46,7 +41,6 @@ dependencies:
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: rspec
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
45
  - - ~>
52
46
  - !ruby/object:Gem::Version
@@ -54,7 +48,6 @@ dependencies:
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
52
  - - ~>
60
53
  - !ruby/object:Gem::Version
@@ -62,7 +55,6 @@ dependencies:
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: yard
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
59
  - - ~>
68
60
  - !ruby/object:Gem::Version
@@ -70,7 +62,6 @@ dependencies:
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
66
  - - ~>
76
67
  - !ruby/object:Gem::Version
@@ -78,7 +69,6 @@ dependencies:
78
69
  - !ruby/object:Gem::Dependency
79
70
  name: rdoc
80
71
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
72
  requirements:
83
73
  - - ~>
84
74
  - !ruby/object:Gem::Version
@@ -86,7 +76,6 @@ dependencies:
86
76
  type: :development
87
77
  prerelease: false
88
78
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
79
  requirements:
91
80
  - - ~>
92
81
  - !ruby/object:Gem::Version
@@ -94,65 +83,57 @@ dependencies:
94
83
  - !ruby/object:Gem::Dependency
95
84
  name: bundler
96
85
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
86
  requirements:
99
- - - ! '>='
87
+ - - '>='
100
88
  - !ruby/object:Gem::Version
101
89
  version: '0'
102
90
  type: :development
103
91
  prerelease: false
104
92
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
93
  requirements:
107
- - - ! '>='
94
+ - - '>='
108
95
  - !ruby/object:Gem::Version
109
96
  version: '0'
110
97
  - !ruby/object:Gem::Dependency
111
98
  name: jeweler
112
99
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
100
  requirements:
115
- - - ! '>='
101
+ - - '>='
116
102
  - !ruby/object:Gem::Version
117
103
  version: '0'
118
104
  type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
107
  requirements:
123
- - - ! '>='
108
+ - - '>='
124
109
  - !ruby/object:Gem::Version
125
110
  version: '0'
126
111
  - !ruby/object:Gem::Dependency
127
112
  name: simplecov
128
113
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
114
  requirements:
131
- - - ! '>='
115
+ - - '>='
132
116
  - !ruby/object:Gem::Version
133
117
  version: '0'
134
118
  type: :development
135
119
  prerelease: false
136
120
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
121
  requirements:
139
- - - ! '>='
122
+ - - '>='
140
123
  - !ruby/object:Gem::Version
141
124
  version: '0'
142
125
  - !ruby/object:Gem::Dependency
143
126
  name: redcarpet
144
127
  requirement: !ruby/object:Gem::Requirement
145
- none: false
146
128
  requirements:
147
- - - ! '>='
129
+ - - '>='
148
130
  - !ruby/object:Gem::Version
149
131
  version: '0'
150
132
  type: :development
151
133
  prerelease: false
152
134
  version_requirements: !ruby/object:Gem::Requirement
153
- none: false
154
135
  requirements:
155
- - - ! '>='
136
+ - - '>='
156
137
  - !ruby/object:Gem::Version
157
138
  version: '0'
158
139
  description: Linnaeus provides a redis-backed Bayesian classifier. Words are stemmed,
@@ -167,6 +148,7 @@ extra_rdoc_files:
167
148
  files:
168
149
  - .document
169
150
  - .rspec
151
+ - .ruby-version
170
152
  - .travis.yml
171
153
  - Gemfile
172
154
  - Gemfile.lock
@@ -190,29 +172,25 @@ files:
190
172
  homepage: http://github.com/djcp/linnaeus
191
173
  licenses:
192
174
  - MIT
175
+ metadata: {}
193
176
  post_install_message:
194
177
  rdoc_options: []
195
178
  require_paths:
196
179
  - lib
197
180
  required_ruby_version: !ruby/object:Gem::Requirement
198
- none: false
199
181
  requirements:
200
- - - ! '>='
182
+ - - '>='
201
183
  - !ruby/object:Gem::Version
202
184
  version: '0'
203
- segments:
204
- - 0
205
- hash: 2790078718663664512
206
185
  required_rubygems_version: !ruby/object:Gem::Requirement
207
- none: false
208
186
  requirements:
209
- - - ! '>='
187
+ - - '>='
210
188
  - !ruby/object:Gem::Version
211
189
  version: '0'
212
190
  requirements: []
213
191
  rubyforge_project:
214
- rubygems_version: 1.8.24
192
+ rubygems_version: 2.0.0
215
193
  signing_key:
216
- specification_version: 3
194
+ specification_version: 4
217
195
  summary: Another redis-backed Bayesian classifier
218
196
  test_files: []