rrbayes 0.0.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/rrbayes/category.rb +18 -10
- data/lib/rrbayes/rrbayes.rb +2 -3
- data/rrbayes.gemspec +7 -7
- data/test/classifier_test.rb +48 -0
- metadata +5 -5
- data/test/rrbayes_test.rb +0 -52
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ begin
|
|
10
10
|
gem.email = "hungryblank@gmail.com"
|
11
11
|
gem.homepage = "http://github.com/hungryblank/rrbayes"
|
12
12
|
gem.authors = ["hungryblank"]
|
13
|
-
gem.add_dependency "redis", ">= 0"
|
13
|
+
gem.add_dependency "redis", ">= 2.0.0"
|
14
14
|
gem.add_development_dependency "riot", ">= 0"
|
15
15
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
16
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
1.0.0
|
data/lib/rrbayes/category.rb
CHANGED
@@ -14,18 +14,16 @@ module Rrbayes
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def learn(frequency_map)
|
17
|
-
@db.pipelined do
|
17
|
+
@db.pipelined do
|
18
18
|
frequency_map.each do |attribute, evidences|
|
19
|
-
|
20
|
-
pipe.incrby(evidences_key, evidences)
|
19
|
+
store_evidences(attribute, evidences)
|
21
20
|
end
|
22
|
-
|
23
|
-
pipe.incr(DOCUMENTS_SCOPE)
|
21
|
+
increment_documents
|
24
22
|
end
|
25
23
|
end
|
26
24
|
|
27
25
|
def attributes_score(attributes)
|
28
|
-
total =
|
26
|
+
total = evidences_total.to_f
|
29
27
|
attributes.map do |attribute, evidences|
|
30
28
|
(evidences_for(attribute) || 0.1).to_f / total
|
31
29
|
end.inject(0) do |score, attr_likelyhood|
|
@@ -34,17 +32,27 @@ module Rrbayes
|
|
34
32
|
end
|
35
33
|
|
36
34
|
def evidences_for(attribute)
|
37
|
-
@db.
|
35
|
+
@db.hget(name, attribute)
|
38
36
|
end
|
39
37
|
|
40
|
-
def
|
38
|
+
def evidences_total
|
41
39
|
@db.get(evidences_key)
|
42
40
|
end
|
43
41
|
|
42
|
+
def documents_total
|
43
|
+
@db.get(documents_key)
|
44
|
+
end
|
45
|
+
|
44
46
|
private
|
45
47
|
|
46
|
-
def
|
47
|
-
|
48
|
+
def store_evidences(attribute, evidences)
|
49
|
+
@db.hincrby(name, attribute, evidences)
|
50
|
+
@db.incrby(evidences_key, evidences)
|
51
|
+
end
|
52
|
+
|
53
|
+
def increment_documents
|
54
|
+
@db.incr(documents_key)
|
55
|
+
@db.incr(DOCUMENTS_SCOPE)
|
48
56
|
end
|
49
57
|
|
50
58
|
def evidences_key
|
data/lib/rrbayes/rrbayes.rb
CHANGED
@@ -15,7 +15,6 @@ module Rrbayes
|
|
15
15
|
#
|
16
16
|
def initialize(options = {}, redis_options = {})
|
17
17
|
@db = Redis.new(redis_options)
|
18
|
-
@db.connect_to_server
|
19
18
|
@categories = find_categories(options).map { |c| Category.new(c, self) }
|
20
19
|
end
|
21
20
|
|
@@ -37,7 +36,7 @@ module Rrbayes
|
|
37
36
|
# => 'spam'
|
38
37
|
#
|
39
38
|
def classify(frequency_map)
|
40
|
-
@categories.
|
39
|
+
@categories.sort_by { |c| -c.attributes_score(frequency_map) }.first.name
|
41
40
|
end
|
42
41
|
|
43
42
|
#Returns the category objects with the name provided in the argument
|
@@ -48,7 +47,7 @@ module Rrbayes
|
|
48
47
|
private
|
49
48
|
|
50
49
|
def load_categories
|
51
|
-
@db.
|
50
|
+
@db.smembers('categories')
|
52
51
|
end
|
53
52
|
|
54
53
|
def find_categories(options)
|
data/rrbayes.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rrbayes}
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "1.0.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["hungryblank"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-05-26}
|
13
13
|
s.description = %q{Implementation of a naive Bayes classifier with a Redis backend}
|
14
14
|
s.email = %q{hungryblank@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -27,7 +27,7 @@ Gem::Specification.new do |s|
|
|
27
27
|
"lib/rrbayes/category.rb",
|
28
28
|
"lib/rrbayes/rrbayes.rb",
|
29
29
|
"rrbayes.gemspec",
|
30
|
-
"test/
|
30
|
+
"test/classifier_test.rb",
|
31
31
|
"test/teststrap.rb"
|
32
32
|
]
|
33
33
|
s.homepage = %q{http://github.com/hungryblank/rrbayes}
|
@@ -37,7 +37,7 @@ Gem::Specification.new do |s|
|
|
37
37
|
s.summary = %q{Rrbayes: Ruby Redis Bayes}
|
38
38
|
s.test_files = [
|
39
39
|
"test/teststrap.rb",
|
40
|
-
"test/
|
40
|
+
"test/classifier_test.rb"
|
41
41
|
]
|
42
42
|
|
43
43
|
if s.respond_to? :specification_version then
|
@@ -45,14 +45,14 @@ Gem::Specification.new do |s|
|
|
45
45
|
s.specification_version = 3
|
46
46
|
|
47
47
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
48
|
-
s.add_runtime_dependency(%q<redis>, [">= 0"])
|
48
|
+
s.add_runtime_dependency(%q<redis>, [">= 2.0.0"])
|
49
49
|
s.add_development_dependency(%q<riot>, [">= 0"])
|
50
50
|
else
|
51
|
-
s.add_dependency(%q<redis>, [">= 0"])
|
51
|
+
s.add_dependency(%q<redis>, [">= 2.0.0"])
|
52
52
|
s.add_dependency(%q<riot>, [">= 0"])
|
53
53
|
end
|
54
54
|
else
|
55
|
-
s.add_dependency(%q<redis>, [">= 0"])
|
55
|
+
s.add_dependency(%q<redis>, [">= 2.0.0"])
|
56
56
|
s.add_dependency(%q<riot>, [">= 0"])
|
57
57
|
end
|
58
58
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'teststrap'
|
2
|
+
|
3
|
+
include Rrbayes
|
4
|
+
|
5
|
+
DB_NUM = 0
|
6
|
+
|
7
|
+
context "a new spam/ham classifier" do
|
8
|
+
|
9
|
+
setup do
|
10
|
+
db = Redis.new :db => DB_NUM
|
11
|
+
db.flushdb
|
12
|
+
@classifier = Classifier.new({:categories => %w(spam ham)}, :db => DB_NUM)
|
13
|
+
end
|
14
|
+
|
15
|
+
asserts("categories") { topic.db.smembers('categories') }.equals %w(spam ham)
|
16
|
+
|
17
|
+
asserts("loaded categories") { topic.send(:load_categories) }.equals %w(spam ham)
|
18
|
+
|
19
|
+
asserts("recognized categories") { Classifier.new({}, :db => DB_NUM).categories.map { |c| c.name } }.equals %w(spam ham)
|
20
|
+
|
21
|
+
|
22
|
+
asserts("initialize with same categories") { Classifier.new({:categories => %w(spam ham)}, :db => DB_NUM).categories.map { |c| c.name } == %w(spam ham) }
|
23
|
+
|
24
|
+
asserts("category mismatch") { Classifier.new({:categories => %w(bad good)}, :db => DB_NUM) }.raises(LoadingError)
|
25
|
+
|
26
|
+
context "trained with spam and ham" do
|
27
|
+
|
28
|
+
setup do
|
29
|
+
spam_frequencies = {'enlarge' => 1, 'your' => 2 ,'viagra' => 3}
|
30
|
+
@classifier.learn(spam_frequencies, :as => 'spam')
|
31
|
+
|
32
|
+
ham_frequencies = {'dear' => 1, 'Jon' => 2 ,'how' => 3}
|
33
|
+
@classifier.learn(ham_frequencies, :as => 'ham')
|
34
|
+
end
|
35
|
+
|
36
|
+
asserts("evidences number for spam") { @classifier.category('spam').evidences_for('viagra') }.equals '3'
|
37
|
+
|
38
|
+
asserts("category documents total") { @classifier.category('spam').documents_total }.equals '1'
|
39
|
+
|
40
|
+
asserts("evidences total") { @classifier.db['documents'] }.equals '2'
|
41
|
+
|
42
|
+
should("classify spam") { @classifier.classify({'viagra' => 1, 'cheap' => 1}) == 'spam' }
|
43
|
+
|
44
|
+
should("classify ham") { @classifier.classify({'dear' => 1, 'molly' => 2}) == 'ham' }
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rrbayes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hungryblank
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-05-26 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: 2.0.0
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: riot
|
@@ -52,7 +52,7 @@ files:
|
|
52
52
|
- lib/rrbayes/category.rb
|
53
53
|
- lib/rrbayes/rrbayes.rb
|
54
54
|
- rrbayes.gemspec
|
55
|
-
- test/
|
55
|
+
- test/classifier_test.rb
|
56
56
|
- test/teststrap.rb
|
57
57
|
has_rdoc: true
|
58
58
|
homepage: http://github.com/hungryblank/rrbayes
|
@@ -84,4 +84,4 @@ specification_version: 3
|
|
84
84
|
summary: "Rrbayes: Ruby Redis Bayes"
|
85
85
|
test_files:
|
86
86
|
- test/teststrap.rb
|
87
|
-
- test/
|
87
|
+
- test/classifier_test.rb
|
data/test/rrbayes_test.rb
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'teststrap'
|
2
|
-
|
3
|
-
include Rrbayes
|
4
|
-
|
5
|
-
DB_NUM = 0
|
6
|
-
|
7
|
-
context "a new classifier" do
|
8
|
-
|
9
|
-
setup do
|
10
|
-
db = Redis.new :db => DB_NUM
|
11
|
-
db.connect_to_server
|
12
|
-
db.flushdb
|
13
|
-
@classifier = Classifier.new({:categories => %w(spam ham)}, :db => DB_NUM)
|
14
|
-
end
|
15
|
-
|
16
|
-
should("persist categories") { topic.db.set_members('categories') }.equals %w(spam ham)
|
17
|
-
|
18
|
-
should("load categories") { topic.send(:load_categories) }.equals %w(spam ham)
|
19
|
-
|
20
|
-
should("initialize with no categories") { Classifier.new({}, :db => DB_NUM).categories.map { |c| c.name } }.equals %w(spam ham)
|
21
|
-
|
22
|
-
should("initialize with same categories") { Classifier.new({:categories => %w(spam ham)}, :db => DB_NUM).categories.map { |c| c.name } }.equals %w(spam ham)
|
23
|
-
|
24
|
-
should("detect category mismatch") { Classifier.new({:categories => %w(bad good)}, :db => DB_NUM) }.raises(LoadingError)
|
25
|
-
|
26
|
-
context "in training" do
|
27
|
-
|
28
|
-
setup do
|
29
|
-
spam_frequencies = {'enlarge' => 1, 'your' => 2 ,'viagra' => 3}
|
30
|
-
@classifier.learn(spam_frequencies, :as => 'spam')
|
31
|
-
|
32
|
-
ham_frequencies = {'dear' => 1, 'Jon' => 2 ,'how' => 3}
|
33
|
-
@classifier.learn(ham_frequencies, :as => 'ham')
|
34
|
-
end
|
35
|
-
|
36
|
-
should("store evidences number") { @classifier.db['spam:viagra'] }.equals '3'
|
37
|
-
|
38
|
-
should("store category evidences total") { @classifier.db['documents:spam'] }.equals '1'
|
39
|
-
|
40
|
-
should("store evidences total") { @classifier.db['documents'] }.equals '2'
|
41
|
-
|
42
|
-
context "for a while" do
|
43
|
-
|
44
|
-
should("classify_spam") { @classifier.classify({'viagra' => 1, 'cheap' => 1}) }.equals 'spam'
|
45
|
-
|
46
|
-
should("classify_ham") { @classifier.classify({'dear' => 1, 'molly' => 2}) }.equals 'ham'
|
47
|
-
|
48
|
-
end
|
49
|
-
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|