rrbayes 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +40 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/lib/rrbayes/category.rb +68 -0
- data/lib/rrbayes/rrbayes.rb +54 -0
- data/lib/rrbayes.rb +2 -0
- data/rrbayes.gemspec +59 -0
- data/test/rrbayes_test.rb +44 -0
- data/test/teststrap.rb +3 -0
- metadata +87 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 hungryblank
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
= Rrbayes (Ruby Redis Bayes)
|
2
|
+
|
3
|
+
|
4
|
+
* http://github.com/hungryblank/rrbayes
|
5
|
+
|
6
|
+
== Description
|
7
|
+
|
8
|
+
Rrbayes is a basic and simplified implementation of a naive bayesian classifier.
|
9
|
+
|
10
|
+
It uses Redis http://github.com/antirez/redis to store the information
|
11
|
+
necessary for the classification, this gives Rrbayes persistency and
|
12
|
+
parallel processing support
|
13
|
+
|
14
|
+
Rrbayes has been inspired by http://github.com/cardmagic/classifier
|
15
|
+
|
16
|
+
== Development status
|
17
|
+
|
18
|
+
At this stage Rrbayes is more than anything else an excuse to use Redis
|
19
|
+
|
20
|
+
== Usage
|
21
|
+
|
22
|
+
require 'rrbayes'
|
23
|
+
|
24
|
+
classifier = Rrbayes::Classifier.new :categories => :spam, :ham
|
25
|
+
|
26
|
+
classifier.learn({'enlarge' => 1, 'your' => 2 ,'viagra' => 3} :as => 'spam')
|
27
|
+
|
28
|
+
classifier.learn({'dear' => 1, 'Jon' => 2 ,'how' => 3}, :as => 'ham'))
|
29
|
+
|
30
|
+
classifier.classify({'viagra' => 1, 'cheap' => 1})
|
31
|
+
=> 'spam'
|
32
|
+
|
33
|
+
== Thanks
|
34
|
+
|
35
|
+
* Salvatore Sanfilippo for Redis
|
36
|
+
* Rrbayes has been inspired by http://github.com/cardmagic/classifier
|
37
|
+
|
38
|
+
== Author
|
39
|
+
|
40
|
+
Copyright (c) 2009 hungryblank. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "rrbayes"
|
8
|
+
gem.summary = %Q{Rrbayes: Ruby Redis Bayes}
|
9
|
+
gem.description = %Q{Implementation of a naive Bayes classifier with a Redis backend}
|
10
|
+
gem.email = "hungryblank@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/hungryblank/rrbayes"
|
12
|
+
gem.authors = ["hungryblank"]
|
13
|
+
gem.add_dependency "redis", ">= 0"
|
14
|
+
gem.add_development_dependency "riot", ">= 0"
|
15
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
|
+
end
|
17
|
+
Jeweler::GemcutterTasks.new
|
18
|
+
rescue LoadError
|
19
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
20
|
+
end
|
21
|
+
|
22
|
+
require 'rake/testtask'
|
23
|
+
Rake::TestTask.new(:test) do |test|
|
24
|
+
test.libs << 'lib' << 'test'
|
25
|
+
test.pattern = 'test/**/*_test.rb'
|
26
|
+
test.verbose = true
|
27
|
+
end
|
28
|
+
|
29
|
+
begin
|
30
|
+
require 'rcov/rcovtask'
|
31
|
+
Rcov::RcovTask.new do |test|
|
32
|
+
test.libs << 'test'
|
33
|
+
test.pattern = 'test/**/*_test.rb'
|
34
|
+
test.verbose = true
|
35
|
+
end
|
36
|
+
rescue LoadError
|
37
|
+
task :rcov do
|
38
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
task :test => :check_dependencies
|
43
|
+
|
44
|
+
task :default => :test
|
45
|
+
|
46
|
+
require 'rake/rdoctask'
|
47
|
+
Rake::RDocTask.new do |rdoc|
|
48
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
49
|
+
|
50
|
+
rdoc.rdoc_dir = 'rdoc'
|
51
|
+
rdoc.title = "rrbayes #{version}"
|
52
|
+
rdoc.rdoc_files.include('README*')
|
53
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
54
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Rrbayes
|
2
|
+
|
3
|
+
class Category
|
4
|
+
|
5
|
+
DOCUMENTS_SCOPE = 'documents'
|
6
|
+
EVIDENCES_SCOPE = 'evidences'
|
7
|
+
|
8
|
+
attr_reader :name
|
9
|
+
|
10
|
+
def initialize(name, classifier)
|
11
|
+
@name = name
|
12
|
+
@db = classifier.db
|
13
|
+
persist
|
14
|
+
end
|
15
|
+
|
16
|
+
def learn(frequency_map)
|
17
|
+
@db.pipelined do |pipe|
|
18
|
+
frequency_map.each do |attribute, evidences|
|
19
|
+
pipe.incrby(attribute_key(attribute), evidences)
|
20
|
+
pipe.incrby(evidences_key, evidences)
|
21
|
+
end
|
22
|
+
pipe.incr(documents_key)
|
23
|
+
pipe.incr(DOCUMENTS_SCOPE)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def attributes_score(attributes)
|
28
|
+
total = total_evidences.to_f
|
29
|
+
attributes.map do |attribute, evidences|
|
30
|
+
(evidences_for(attribute) || 0.1).to_f / total
|
31
|
+
end.inject(0) do |score, attr_likelyhood|
|
32
|
+
score + Math.log(attr_likelyhood)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def evidences_for(attribute)
|
37
|
+
@db.get(attribute_key(attribute))
|
38
|
+
end
|
39
|
+
|
40
|
+
def total_evidences
|
41
|
+
@db.get(evidences_key)
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def attribute_key(attribute)
|
47
|
+
key_for(name, attribute)
|
48
|
+
end
|
49
|
+
|
50
|
+
def evidences_key
|
51
|
+
@evidences_key ||= key_for(EVIDENCES_SCOPE, name)
|
52
|
+
end
|
53
|
+
|
54
|
+
def documents_key
|
55
|
+
@documents_key ||= key_for(DOCUMENTS_SCOPE, name)
|
56
|
+
end
|
57
|
+
|
58
|
+
def key_for(*args)
|
59
|
+
args.join(':')
|
60
|
+
end
|
61
|
+
|
62
|
+
def persist
|
63
|
+
@db.sadd('categories', name)
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Rrbayes
|
2
|
+
|
3
|
+
class Classifier
|
4
|
+
|
5
|
+
attr_reader :categories, :db
|
6
|
+
|
7
|
+
#creates a new classifier, takes 2 hashes
|
8
|
+
#the first hash contains Rrbayes specific options
|
9
|
+
#the second hash is passed to the backend Redis#new constructor
|
10
|
+
#
|
11
|
+
# Rrbayes.new(:categories => %w(spam ham), {:host => '127.0.0.1'})
|
12
|
+
#
|
13
|
+
def initialize(options = {}, redis_options = {})
|
14
|
+
raise "No categories specified for the classifier" unless options[:categories] || load_categories
|
15
|
+
@db = Redis.new(redis_options)
|
16
|
+
@db.connect_to_server
|
17
|
+
@categories = options[:categories].map { |c| Category.new(c, self) }
|
18
|
+
end
|
19
|
+
|
20
|
+
#given a frequency hash and a category, stores teh frequency data
|
21
|
+
#for the given catogory
|
22
|
+
#
|
23
|
+
# classifier = Rrbayes.new(:categories => %w(spam ham))
|
24
|
+
# classifier.learn {'viagra' => 1, 'buy' => 1}, :as => 'spam'
|
25
|
+
#
|
26
|
+
def learn(frequency_map, options)
|
27
|
+
category(options[:as]).learn(frequency_map)
|
28
|
+
end
|
29
|
+
|
30
|
+
#given a frequency hash tries to guess to which category
|
31
|
+
#the hash is most likely to belong to
|
32
|
+
#
|
33
|
+
# unknown_data = {'viagra' => 1, 'buy' => 1}
|
34
|
+
# classifier.classify(unknown_data)
|
35
|
+
# => 'spam'
|
36
|
+
#
|
37
|
+
def classify(frequency_map)
|
38
|
+
@categories.map { |c| [c.name, c.attributes_score(frequency_map)] }.sort_by { |c| -c[1] }.first[0]
|
39
|
+
end
|
40
|
+
|
41
|
+
#Returns the category objects with the name provided in the argument
|
42
|
+
def category(name)
|
43
|
+
@categories.find { |c| c.name == name}
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def load_categories
|
49
|
+
@db.set_members('categories')
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
data/lib/rrbayes.rb
ADDED
data/rrbayes.gemspec
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{rrbayes}
|
8
|
+
s.version = "0.0.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["hungryblank"]
|
12
|
+
s.date = %q{2009-12-22}
|
13
|
+
s.description = %q{Implementation of a naive Bayes classifier with a Redis backend}
|
14
|
+
s.email = %q{hungryblank@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/rrbayes.rb",
|
27
|
+
"lib/rrbayes/category.rb",
|
28
|
+
"lib/rrbayes/rrbayes.rb",
|
29
|
+
"rrbayes.gemspec",
|
30
|
+
"test/rrbayes_test.rb",
|
31
|
+
"test/teststrap.rb"
|
32
|
+
]
|
33
|
+
s.homepage = %q{http://github.com/hungryblank/rrbayes}
|
34
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
35
|
+
s.require_paths = ["lib"]
|
36
|
+
s.rubygems_version = %q{1.3.5}
|
37
|
+
s.summary = %q{Rrbayes: Ruby Redis Bayes}
|
38
|
+
s.test_files = [
|
39
|
+
"test/teststrap.rb",
|
40
|
+
"test/rrbayes_test.rb"
|
41
|
+
]
|
42
|
+
|
43
|
+
if s.respond_to? :specification_version then
|
44
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
45
|
+
s.specification_version = 3
|
46
|
+
|
47
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
48
|
+
s.add_runtime_dependency(%q<redis>, [">= 0"])
|
49
|
+
s.add_development_dependency(%q<riot>, [">= 0"])
|
50
|
+
else
|
51
|
+
s.add_dependency(%q<redis>, [">= 0"])
|
52
|
+
s.add_dependency(%q<riot>, [">= 0"])
|
53
|
+
end
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<redis>, [">= 0"])
|
56
|
+
s.add_dependency(%q<riot>, [">= 0"])
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'teststrap'
|
2
|
+
|
3
|
+
include Rrbayes
|
4
|
+
|
5
|
+
context "a new classifier" do
|
6
|
+
|
7
|
+
setup do
|
8
|
+
db = Redis.new :db => 'rrbayes_test'
|
9
|
+
db.connect_to_server
|
10
|
+
db.flushdb
|
11
|
+
@classifier = Classifier.new({:categories => %w(spam ham)}, :db => 'rrbayes_test')
|
12
|
+
end
|
13
|
+
|
14
|
+
should("persist categories") { topic.db.set_members('categories') }.equals %w(spam ham)
|
15
|
+
|
16
|
+
should("load categories") { topic.send(:load_categories) }.equals %w(spam ham)
|
17
|
+
|
18
|
+
context "in training" do
|
19
|
+
|
20
|
+
setup do
|
21
|
+
spam_frequencies = {'enlarge' => 1, 'your' => 2 ,'viagra' => 3}
|
22
|
+
@classifier.learn(spam_frequencies, :as => 'spam')
|
23
|
+
|
24
|
+
ham_frequencies = {'dear' => 1, 'Jon' => 2 ,'how' => 3}
|
25
|
+
@classifier.learn(ham_frequencies, :as => 'ham')
|
26
|
+
end
|
27
|
+
|
28
|
+
should("store evidences number") { @classifier.db['spam:viagra'] }.equals '3'
|
29
|
+
|
30
|
+
should("store category evidences total") { @classifier.db['documents:spam'] }.equals '1'
|
31
|
+
|
32
|
+
should("store evidences total") { @classifier.db['documents'] }.equals '2'
|
33
|
+
|
34
|
+
context "for a while" do
|
35
|
+
|
36
|
+
should("classify_spam") { @classifier.classify({'viagra' => 1, 'cheap' => 1}) }.equals 'spam'
|
37
|
+
|
38
|
+
should("classify_ham") { @classifier.classify({'dear' => 1, 'molly' => 2}) }.equals 'ham'
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/test/teststrap.rb
ADDED
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rrbayes
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- hungryblank
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-22 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: redis
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: riot
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
description: Implementation of a naive Bayes classifier with a Redis backend
|
36
|
+
email: hungryblank@gmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- LICENSE
|
43
|
+
- README.rdoc
|
44
|
+
files:
|
45
|
+
- .document
|
46
|
+
- .gitignore
|
47
|
+
- LICENSE
|
48
|
+
- README.rdoc
|
49
|
+
- Rakefile
|
50
|
+
- VERSION
|
51
|
+
- lib/rrbayes.rb
|
52
|
+
- lib/rrbayes/category.rb
|
53
|
+
- lib/rrbayes/rrbayes.rb
|
54
|
+
- rrbayes.gemspec
|
55
|
+
- test/rrbayes_test.rb
|
56
|
+
- test/teststrap.rb
|
57
|
+
has_rdoc: true
|
58
|
+
homepage: http://github.com/hungryblank/rrbayes
|
59
|
+
licenses: []
|
60
|
+
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options:
|
63
|
+
- --charset=UTF-8
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 1.3.5
|
82
|
+
signing_key:
|
83
|
+
specification_version: 3
|
84
|
+
summary: "Rrbayes: Ruby Redis Bayes"
|
85
|
+
test_files:
|
86
|
+
- test/teststrap.rb
|
87
|
+
- test/rrbayes_test.rb
|