rrbayes 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +40 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/lib/rrbayes/category.rb +68 -0
- data/lib/rrbayes/rrbayes.rb +54 -0
- data/lib/rrbayes.rb +2 -0
- data/rrbayes.gemspec +59 -0
- data/test/rrbayes_test.rb +44 -0
- data/test/teststrap.rb +3 -0
- metadata +87 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 hungryblank
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
= Rrbayes (Ruby Redis Bayes)
|
2
|
+
|
3
|
+
|
4
|
+
* http://github.com/hungryblank/rrbayes
|
5
|
+
|
6
|
+
== Description
|
7
|
+
|
8
|
+
Rrbayes is a basic and simplified implementation of a naive bayesian classifier.
|
9
|
+
|
10
|
+
It uses Redis http://github.com/antirez/redis to store the information
|
11
|
+
necessary for the classification, this gives Rrbayes persistency and
|
12
|
+
parallel processing support
|
13
|
+
|
14
|
+
Rrbayes has been inspired by http://github.com/cardmagic/classifier
|
15
|
+
|
16
|
+
== Development status
|
17
|
+
|
18
|
+
At this stage Rrbayes is more than anything else an excuse to use Redis
|
19
|
+
|
20
|
+
== Usage
|
21
|
+
|
22
|
+
require 'rrbayes'
|
23
|
+
|
24
|
+
classifier = Rrbayes::Classifier.new :categories => :spam, :ham
|
25
|
+
|
26
|
+
classifier.learn({'enlarge' => 1, 'your' => 2 ,'viagra' => 3} :as => 'spam')
|
27
|
+
|
28
|
+
classifier.learn({'dear' => 1, 'Jon' => 2 ,'how' => 3}, :as => 'ham'))
|
29
|
+
|
30
|
+
classifier.classify({'viagra' => 1, 'cheap' => 1})
|
31
|
+
=> 'spam'
|
32
|
+
|
33
|
+
== Thanks
|
34
|
+
|
35
|
+
* Salvatore Sanfilippo for Redis
|
36
|
+
* Rrbayes has been inspired by http://github.com/cardmagic/classifier
|
37
|
+
|
38
|
+
== Author
|
39
|
+
|
40
|
+
Copyright (c) 2009 hungryblank. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "rrbayes"
|
8
|
+
gem.summary = %Q{Rrbayes: Ruby Redis Bayes}
|
9
|
+
gem.description = %Q{Implementation of a naive Bayes classifier with a Redis backend}
|
10
|
+
gem.email = "hungryblank@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/hungryblank/rrbayes"
|
12
|
+
gem.authors = ["hungryblank"]
|
13
|
+
gem.add_dependency "redis", ">= 0"
|
14
|
+
gem.add_development_dependency "riot", ">= 0"
|
15
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
|
+
end
|
17
|
+
Jeweler::GemcutterTasks.new
|
18
|
+
rescue LoadError
|
19
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
20
|
+
end
|
21
|
+
|
22
|
+
require 'rake/testtask'
|
23
|
+
Rake::TestTask.new(:test) do |test|
|
24
|
+
test.libs << 'lib' << 'test'
|
25
|
+
test.pattern = 'test/**/*_test.rb'
|
26
|
+
test.verbose = true
|
27
|
+
end
|
28
|
+
|
29
|
+
begin
|
30
|
+
require 'rcov/rcovtask'
|
31
|
+
Rcov::RcovTask.new do |test|
|
32
|
+
test.libs << 'test'
|
33
|
+
test.pattern = 'test/**/*_test.rb'
|
34
|
+
test.verbose = true
|
35
|
+
end
|
36
|
+
rescue LoadError
|
37
|
+
task :rcov do
|
38
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
task :test => :check_dependencies
|
43
|
+
|
44
|
+
task :default => :test
|
45
|
+
|
46
|
+
require 'rake/rdoctask'
|
47
|
+
Rake::RDocTask.new do |rdoc|
|
48
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
49
|
+
|
50
|
+
rdoc.rdoc_dir = 'rdoc'
|
51
|
+
rdoc.title = "rrbayes #{version}"
|
52
|
+
rdoc.rdoc_files.include('README*')
|
53
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
54
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Rrbayes
|
2
|
+
|
3
|
+
class Category
|
4
|
+
|
5
|
+
DOCUMENTS_SCOPE = 'documents'
|
6
|
+
EVIDENCES_SCOPE = 'evidences'
|
7
|
+
|
8
|
+
attr_reader :name
|
9
|
+
|
10
|
+
def initialize(name, classifier)
|
11
|
+
@name = name
|
12
|
+
@db = classifier.db
|
13
|
+
persist
|
14
|
+
end
|
15
|
+
|
16
|
+
def learn(frequency_map)
|
17
|
+
@db.pipelined do |pipe|
|
18
|
+
frequency_map.each do |attribute, evidences|
|
19
|
+
pipe.incrby(attribute_key(attribute), evidences)
|
20
|
+
pipe.incrby(evidences_key, evidences)
|
21
|
+
end
|
22
|
+
pipe.incr(documents_key)
|
23
|
+
pipe.incr(DOCUMENTS_SCOPE)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def attributes_score(attributes)
|
28
|
+
total = total_evidences.to_f
|
29
|
+
attributes.map do |attribute, evidences|
|
30
|
+
(evidences_for(attribute) || 0.1).to_f / total
|
31
|
+
end.inject(0) do |score, attr_likelyhood|
|
32
|
+
score + Math.log(attr_likelyhood)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def evidences_for(attribute)
|
37
|
+
@db.get(attribute_key(attribute))
|
38
|
+
end
|
39
|
+
|
40
|
+
def total_evidences
|
41
|
+
@db.get(evidences_key)
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def attribute_key(attribute)
|
47
|
+
key_for(name, attribute)
|
48
|
+
end
|
49
|
+
|
50
|
+
def evidences_key
|
51
|
+
@evidences_key ||= key_for(EVIDENCES_SCOPE, name)
|
52
|
+
end
|
53
|
+
|
54
|
+
def documents_key
|
55
|
+
@documents_key ||= key_for(DOCUMENTS_SCOPE, name)
|
56
|
+
end
|
57
|
+
|
58
|
+
def key_for(*args)
|
59
|
+
args.join(':')
|
60
|
+
end
|
61
|
+
|
62
|
+
def persist
|
63
|
+
@db.sadd('categories', name)
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Rrbayes
|
2
|
+
|
3
|
+
class Classifier
|
4
|
+
|
5
|
+
attr_reader :categories, :db
|
6
|
+
|
7
|
+
#creates a new classifier, takes 2 hashes
|
8
|
+
#the first hash contains Rrbayes specific options
|
9
|
+
#the second hash is passed to the backend Redis#new constructor
|
10
|
+
#
|
11
|
+
# Rrbayes.new(:categories => %w(spam ham), {:host => '127.0.0.1'})
|
12
|
+
#
|
13
|
+
def initialize(options = {}, redis_options = {})
|
14
|
+
raise "No categories specified for the classifier" unless options[:categories] || load_categories
|
15
|
+
@db = Redis.new(redis_options)
|
16
|
+
@db.connect_to_server
|
17
|
+
@categories = options[:categories].map { |c| Category.new(c, self) }
|
18
|
+
end
|
19
|
+
|
20
|
+
#given a frequency hash and a category, stores teh frequency data
|
21
|
+
#for the given catogory
|
22
|
+
#
|
23
|
+
# classifier = Rrbayes.new(:categories => %w(spam ham))
|
24
|
+
# classifier.learn {'viagra' => 1, 'buy' => 1}, :as => 'spam'
|
25
|
+
#
|
26
|
+
def learn(frequency_map, options)
|
27
|
+
category(options[:as]).learn(frequency_map)
|
28
|
+
end
|
29
|
+
|
30
|
+
#given a frequency hash tries to guess to which category
|
31
|
+
#the hash is most likely to belong to
|
32
|
+
#
|
33
|
+
# unknown_data = {'viagra' => 1, 'buy' => 1}
|
34
|
+
# classifier.classify(unknown_data)
|
35
|
+
# => 'spam'
|
36
|
+
#
|
37
|
+
def classify(frequency_map)
|
38
|
+
@categories.map { |c| [c.name, c.attributes_score(frequency_map)] }.sort_by { |c| -c[1] }.first[0]
|
39
|
+
end
|
40
|
+
|
41
|
+
#Returns the category objects with the name provided in the argument
|
42
|
+
def category(name)
|
43
|
+
@categories.find { |c| c.name == name}
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def load_categories
|
49
|
+
@db.set_members('categories')
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
data/lib/rrbayes.rb
ADDED
data/rrbayes.gemspec
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{rrbayes}
|
8
|
+
s.version = "0.0.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["hungryblank"]
|
12
|
+
s.date = %q{2009-12-22}
|
13
|
+
s.description = %q{Implementation of a naive Bayes classifier with a Redis backend}
|
14
|
+
s.email = %q{hungryblank@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/rrbayes.rb",
|
27
|
+
"lib/rrbayes/category.rb",
|
28
|
+
"lib/rrbayes/rrbayes.rb",
|
29
|
+
"rrbayes.gemspec",
|
30
|
+
"test/rrbayes_test.rb",
|
31
|
+
"test/teststrap.rb"
|
32
|
+
]
|
33
|
+
s.homepage = %q{http://github.com/hungryblank/rrbayes}
|
34
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
35
|
+
s.require_paths = ["lib"]
|
36
|
+
s.rubygems_version = %q{1.3.5}
|
37
|
+
s.summary = %q{Rrbayes: Ruby Redis Bayes}
|
38
|
+
s.test_files = [
|
39
|
+
"test/teststrap.rb",
|
40
|
+
"test/rrbayes_test.rb"
|
41
|
+
]
|
42
|
+
|
43
|
+
if s.respond_to? :specification_version then
|
44
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
45
|
+
s.specification_version = 3
|
46
|
+
|
47
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
48
|
+
s.add_runtime_dependency(%q<redis>, [">= 0"])
|
49
|
+
s.add_development_dependency(%q<riot>, [">= 0"])
|
50
|
+
else
|
51
|
+
s.add_dependency(%q<redis>, [">= 0"])
|
52
|
+
s.add_dependency(%q<riot>, [">= 0"])
|
53
|
+
end
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<redis>, [">= 0"])
|
56
|
+
s.add_dependency(%q<riot>, [">= 0"])
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'teststrap'
|
2
|
+
|
3
|
+
include Rrbayes
|
4
|
+
|
5
|
+
context "a new classifier" do
|
6
|
+
|
7
|
+
setup do
|
8
|
+
db = Redis.new :db => 'rrbayes_test'
|
9
|
+
db.connect_to_server
|
10
|
+
db.flushdb
|
11
|
+
@classifier = Classifier.new({:categories => %w(spam ham)}, :db => 'rrbayes_test')
|
12
|
+
end
|
13
|
+
|
14
|
+
should("persist categories") { topic.db.set_members('categories') }.equals %w(spam ham)
|
15
|
+
|
16
|
+
should("load categories") { topic.send(:load_categories) }.equals %w(spam ham)
|
17
|
+
|
18
|
+
context "in training" do
|
19
|
+
|
20
|
+
setup do
|
21
|
+
spam_frequencies = {'enlarge' => 1, 'your' => 2 ,'viagra' => 3}
|
22
|
+
@classifier.learn(spam_frequencies, :as => 'spam')
|
23
|
+
|
24
|
+
ham_frequencies = {'dear' => 1, 'Jon' => 2 ,'how' => 3}
|
25
|
+
@classifier.learn(ham_frequencies, :as => 'ham')
|
26
|
+
end
|
27
|
+
|
28
|
+
should("store evidences number") { @classifier.db['spam:viagra'] }.equals '3'
|
29
|
+
|
30
|
+
should("store category evidences total") { @classifier.db['documents:spam'] }.equals '1'
|
31
|
+
|
32
|
+
should("store evidences total") { @classifier.db['documents'] }.equals '2'
|
33
|
+
|
34
|
+
context "for a while" do
|
35
|
+
|
36
|
+
should("classify_spam") { @classifier.classify({'viagra' => 1, 'cheap' => 1}) }.equals 'spam'
|
37
|
+
|
38
|
+
should("classify_ham") { @classifier.classify({'dear' => 1, 'molly' => 2}) }.equals 'ham'
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/test/teststrap.rb
ADDED
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rrbayes
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- hungryblank
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-22 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: redis
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: riot
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
description: Implementation of a naive Bayes classifier with a Redis backend
|
36
|
+
email: hungryblank@gmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- LICENSE
|
43
|
+
- README.rdoc
|
44
|
+
files:
|
45
|
+
- .document
|
46
|
+
- .gitignore
|
47
|
+
- LICENSE
|
48
|
+
- README.rdoc
|
49
|
+
- Rakefile
|
50
|
+
- VERSION
|
51
|
+
- lib/rrbayes.rb
|
52
|
+
- lib/rrbayes/category.rb
|
53
|
+
- lib/rrbayes/rrbayes.rb
|
54
|
+
- rrbayes.gemspec
|
55
|
+
- test/rrbayes_test.rb
|
56
|
+
- test/teststrap.rb
|
57
|
+
has_rdoc: true
|
58
|
+
homepage: http://github.com/hungryblank/rrbayes
|
59
|
+
licenses: []
|
60
|
+
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options:
|
63
|
+
- --charset=UTF-8
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 1.3.5
|
82
|
+
signing_key:
|
83
|
+
specification_version: 3
|
84
|
+
summary: "Rrbayes: Ruby Redis Bayes"
|
85
|
+
test_files:
|
86
|
+
- test/teststrap.rb
|
87
|
+
- test/rrbayes_test.rb
|