rrbayes 0.0.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/rrbayes/category.rb +18 -10
- data/lib/rrbayes/rrbayes.rb +2 -3
- data/rrbayes.gemspec +7 -7
- data/test/classifier_test.rb +48 -0
- metadata +5 -5
- data/test/rrbayes_test.rb +0 -52
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ begin
|
|
10
10
|
gem.email = "hungryblank@gmail.com"
|
11
11
|
gem.homepage = "http://github.com/hungryblank/rrbayes"
|
12
12
|
gem.authors = ["hungryblank"]
|
13
|
-
gem.add_dependency "redis", ">= 0"
|
13
|
+
gem.add_dependency "redis", ">= 2.0.0"
|
14
14
|
gem.add_development_dependency "riot", ">= 0"
|
15
15
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
16
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
1.0.0
|
data/lib/rrbayes/category.rb
CHANGED
@@ -14,18 +14,16 @@ module Rrbayes
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def learn(frequency_map)
|
17
|
-
@db.pipelined do
|
17
|
+
@db.pipelined do
|
18
18
|
frequency_map.each do |attribute, evidences|
|
19
|
-
|
20
|
-
pipe.incrby(evidences_key, evidences)
|
19
|
+
store_evidences(attribute, evidences)
|
21
20
|
end
|
22
|
-
|
23
|
-
pipe.incr(DOCUMENTS_SCOPE)
|
21
|
+
increment_documents
|
24
22
|
end
|
25
23
|
end
|
26
24
|
|
27
25
|
def attributes_score(attributes)
|
28
|
-
total =
|
26
|
+
total = evidences_total.to_f
|
29
27
|
attributes.map do |attribute, evidences|
|
30
28
|
(evidences_for(attribute) || 0.1).to_f / total
|
31
29
|
end.inject(0) do |score, attr_likelyhood|
|
@@ -34,17 +32,27 @@ module Rrbayes
|
|
34
32
|
end
|
35
33
|
|
36
34
|
def evidences_for(attribute)
|
37
|
-
@db.
|
35
|
+
@db.hget(name, attribute)
|
38
36
|
end
|
39
37
|
|
40
|
-
def
|
38
|
+
def evidences_total
|
41
39
|
@db.get(evidences_key)
|
42
40
|
end
|
43
41
|
|
42
|
+
def documents_total
|
43
|
+
@db.get(documents_key)
|
44
|
+
end
|
45
|
+
|
44
46
|
private
|
45
47
|
|
46
|
-
def
|
47
|
-
|
48
|
+
def store_evidences(attribute, evidences)
|
49
|
+
@db.hincrby(name, attribute, evidences)
|
50
|
+
@db.incrby(evidences_key, evidences)
|
51
|
+
end
|
52
|
+
|
53
|
+
def increment_documents
|
54
|
+
@db.incr(documents_key)
|
55
|
+
@db.incr(DOCUMENTS_SCOPE)
|
48
56
|
end
|
49
57
|
|
50
58
|
def evidences_key
|
data/lib/rrbayes/rrbayes.rb
CHANGED
@@ -15,7 +15,6 @@ module Rrbayes
|
|
15
15
|
#
|
16
16
|
def initialize(options = {}, redis_options = {})
|
17
17
|
@db = Redis.new(redis_options)
|
18
|
-
@db.connect_to_server
|
19
18
|
@categories = find_categories(options).map { |c| Category.new(c, self) }
|
20
19
|
end
|
21
20
|
|
@@ -37,7 +36,7 @@ module Rrbayes
|
|
37
36
|
# => 'spam'
|
38
37
|
#
|
39
38
|
def classify(frequency_map)
|
40
|
-
@categories.
|
39
|
+
@categories.sort_by { |c| -c.attributes_score(frequency_map) }.first.name
|
41
40
|
end
|
42
41
|
|
43
42
|
#Returns the category objects with the name provided in the argument
|
@@ -48,7 +47,7 @@ module Rrbayes
|
|
48
47
|
private
|
49
48
|
|
50
49
|
def load_categories
|
51
|
-
@db.
|
50
|
+
@db.smembers('categories')
|
52
51
|
end
|
53
52
|
|
54
53
|
def find_categories(options)
|
data/rrbayes.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rrbayes}
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "1.0.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["hungryblank"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-05-26}
|
13
13
|
s.description = %q{Implementation of a naive Bayes classifier with a Redis backend}
|
14
14
|
s.email = %q{hungryblank@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -27,7 +27,7 @@ Gem::Specification.new do |s|
|
|
27
27
|
"lib/rrbayes/category.rb",
|
28
28
|
"lib/rrbayes/rrbayes.rb",
|
29
29
|
"rrbayes.gemspec",
|
30
|
-
"test/
|
30
|
+
"test/classifier_test.rb",
|
31
31
|
"test/teststrap.rb"
|
32
32
|
]
|
33
33
|
s.homepage = %q{http://github.com/hungryblank/rrbayes}
|
@@ -37,7 +37,7 @@ Gem::Specification.new do |s|
|
|
37
37
|
s.summary = %q{Rrbayes: Ruby Redis Bayes}
|
38
38
|
s.test_files = [
|
39
39
|
"test/teststrap.rb",
|
40
|
-
"test/
|
40
|
+
"test/classifier_test.rb"
|
41
41
|
]
|
42
42
|
|
43
43
|
if s.respond_to? :specification_version then
|
@@ -45,14 +45,14 @@ Gem::Specification.new do |s|
|
|
45
45
|
s.specification_version = 3
|
46
46
|
|
47
47
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
48
|
-
s.add_runtime_dependency(%q<redis>, [">= 0"])
|
48
|
+
s.add_runtime_dependency(%q<redis>, [">= 2.0.0"])
|
49
49
|
s.add_development_dependency(%q<riot>, [">= 0"])
|
50
50
|
else
|
51
|
-
s.add_dependency(%q<redis>, [">= 0"])
|
51
|
+
s.add_dependency(%q<redis>, [">= 2.0.0"])
|
52
52
|
s.add_dependency(%q<riot>, [">= 0"])
|
53
53
|
end
|
54
54
|
else
|
55
|
-
s.add_dependency(%q<redis>, [">= 0"])
|
55
|
+
s.add_dependency(%q<redis>, [">= 2.0.0"])
|
56
56
|
s.add_dependency(%q<riot>, [">= 0"])
|
57
57
|
end
|
58
58
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'teststrap'
|
2
|
+
|
3
|
+
include Rrbayes
|
4
|
+
|
5
|
+
DB_NUM = 0
|
6
|
+
|
7
|
+
context "a new spam/ham classifier" do
|
8
|
+
|
9
|
+
setup do
|
10
|
+
db = Redis.new :db => DB_NUM
|
11
|
+
db.flushdb
|
12
|
+
@classifier = Classifier.new({:categories => %w(spam ham)}, :db => DB_NUM)
|
13
|
+
end
|
14
|
+
|
15
|
+
asserts("categories") { topic.db.smembers('categories') }.equals %w(spam ham)
|
16
|
+
|
17
|
+
asserts("loaded categories") { topic.send(:load_categories) }.equals %w(spam ham)
|
18
|
+
|
19
|
+
asserts("recognized categories") { Classifier.new({}, :db => DB_NUM).categories.map { |c| c.name } }.equals %w(spam ham)
|
20
|
+
|
21
|
+
|
22
|
+
asserts("initialize with same categories") { Classifier.new({:categories => %w(spam ham)}, :db => DB_NUM).categories.map { |c| c.name } == %w(spam ham) }
|
23
|
+
|
24
|
+
asserts("category mismatch") { Classifier.new({:categories => %w(bad good)}, :db => DB_NUM) }.raises(LoadingError)
|
25
|
+
|
26
|
+
context "trained with spam and ham" do
|
27
|
+
|
28
|
+
setup do
|
29
|
+
spam_frequencies = {'enlarge' => 1, 'your' => 2 ,'viagra' => 3}
|
30
|
+
@classifier.learn(spam_frequencies, :as => 'spam')
|
31
|
+
|
32
|
+
ham_frequencies = {'dear' => 1, 'Jon' => 2 ,'how' => 3}
|
33
|
+
@classifier.learn(ham_frequencies, :as => 'ham')
|
34
|
+
end
|
35
|
+
|
36
|
+
asserts("evidences number for spam") { @classifier.category('spam').evidences_for('viagra') }.equals '3'
|
37
|
+
|
38
|
+
asserts("category documents total") { @classifier.category('spam').documents_total }.equals '1'
|
39
|
+
|
40
|
+
asserts("evidences total") { @classifier.db['documents'] }.equals '2'
|
41
|
+
|
42
|
+
should("classify spam") { @classifier.classify({'viagra' => 1, 'cheap' => 1}) == 'spam' }
|
43
|
+
|
44
|
+
should("classify ham") { @classifier.classify({'dear' => 1, 'molly' => 2}) == 'ham' }
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rrbayes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hungryblank
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-05-26 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: 2.0.0
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: riot
|
@@ -52,7 +52,7 @@ files:
|
|
52
52
|
- lib/rrbayes/category.rb
|
53
53
|
- lib/rrbayes/rrbayes.rb
|
54
54
|
- rrbayes.gemspec
|
55
|
-
- test/
|
55
|
+
- test/classifier_test.rb
|
56
56
|
- test/teststrap.rb
|
57
57
|
has_rdoc: true
|
58
58
|
homepage: http://github.com/hungryblank/rrbayes
|
@@ -84,4 +84,4 @@ specification_version: 3
|
|
84
84
|
summary: "Rrbayes: Ruby Redis Bayes"
|
85
85
|
test_files:
|
86
86
|
- test/teststrap.rb
|
87
|
-
- test/
|
87
|
+
- test/classifier_test.rb
|
data/test/rrbayes_test.rb
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'teststrap'
|
2
|
-
|
3
|
-
include Rrbayes
|
4
|
-
|
5
|
-
DB_NUM = 0
|
6
|
-
|
7
|
-
context "a new classifier" do
|
8
|
-
|
9
|
-
setup do
|
10
|
-
db = Redis.new :db => DB_NUM
|
11
|
-
db.connect_to_server
|
12
|
-
db.flushdb
|
13
|
-
@classifier = Classifier.new({:categories => %w(spam ham)}, :db => DB_NUM)
|
14
|
-
end
|
15
|
-
|
16
|
-
should("persist categories") { topic.db.set_members('categories') }.equals %w(spam ham)
|
17
|
-
|
18
|
-
should("load categories") { topic.send(:load_categories) }.equals %w(spam ham)
|
19
|
-
|
20
|
-
should("initialize with no categories") { Classifier.new({}, :db => DB_NUM).categories.map { |c| c.name } }.equals %w(spam ham)
|
21
|
-
|
22
|
-
should("initialize with same categories") { Classifier.new({:categories => %w(spam ham)}, :db => DB_NUM).categories.map { |c| c.name } }.equals %w(spam ham)
|
23
|
-
|
24
|
-
should("detect category mismatch") { Classifier.new({:categories => %w(bad good)}, :db => DB_NUM) }.raises(LoadingError)
|
25
|
-
|
26
|
-
context "in training" do
|
27
|
-
|
28
|
-
setup do
|
29
|
-
spam_frequencies = {'enlarge' => 1, 'your' => 2 ,'viagra' => 3}
|
30
|
-
@classifier.learn(spam_frequencies, :as => 'spam')
|
31
|
-
|
32
|
-
ham_frequencies = {'dear' => 1, 'Jon' => 2 ,'how' => 3}
|
33
|
-
@classifier.learn(ham_frequencies, :as => 'ham')
|
34
|
-
end
|
35
|
-
|
36
|
-
should("store evidences number") { @classifier.db['spam:viagra'] }.equals '3'
|
37
|
-
|
38
|
-
should("store category evidences total") { @classifier.db['documents:spam'] }.equals '1'
|
39
|
-
|
40
|
-
should("store evidences total") { @classifier.db['documents'] }.equals '2'
|
41
|
-
|
42
|
-
context "for a while" do
|
43
|
-
|
44
|
-
should("classify_spam") { @classifier.classify({'viagra' => 1, 'cheap' => 1}) }.equals 'spam'
|
45
|
-
|
46
|
-
should("classify_ham") { @classifier.classify({'dear' => 1, 'molly' => 2}) }.equals 'ham'
|
47
|
-
|
48
|
-
end
|
49
|
-
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|