rrbayes 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 hungryblank
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,40 @@
1
+ = Rrbayes (Ruby Redis Bayes)
2
+
3
+
4
+ * http://github.com/hungryblank/rrbayes
5
+
6
+ == Description
7
+
8
+ Rrbayes is a basic and simplified implementation of a naive bayesian classifier.
9
+
10
+ It uses Redis http://github.com/antirez/redis to store the information
11
+ necessary for the classification, this gives Rrbayes persistency and
12
+ parallel processing support
13
+
14
+ Rrbayes has been inspired by http://github.com/cardmagic/classifier
15
+
16
+ == Development status
17
+
18
+ At this stage Rrbayes is more than anything else an excuse to use Redis
19
+
20
+ == Usage
21
+
22
+ require 'rrbayes'
23
+
24
+ classifier = Rrbayes::Classifier.new :categories => :spam, :ham
25
+
26
+ classifier.learn({'enlarge' => 1, 'your' => 2 ,'viagra' => 3} :as => 'spam')
27
+
28
+ classifier.learn({'dear' => 1, 'Jon' => 2 ,'how' => 3}, :as => 'ham'))
29
+
30
+ classifier.classify({'viagra' => 1, 'cheap' => 1})
31
+ => 'spam'
32
+
33
+ == Thanks
34
+
35
+ * Salvatore Sanfilippo for Redis
36
+ * Rrbayes has been inspired by http://github.com/cardmagic/classifier
37
+
38
+ == Author
39
+
40
+ Copyright (c) 2009 hungryblank. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,54 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "rrbayes"
8
+ gem.summary = %Q{Rrbayes: Ruby Redis Bayes}
9
+ gem.description = %Q{Implementation of a naive Bayes classifier with a Redis backend}
10
+ gem.email = "hungryblank@gmail.com"
11
+ gem.homepage = "http://github.com/hungryblank/rrbayes"
12
+ gem.authors = ["hungryblank"]
13
+ gem.add_dependency "redis", ">= 0"
14
+ gem.add_development_dependency "riot", ">= 0"
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'rake/testtask'
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.libs << 'lib' << 'test'
25
+ test.pattern = 'test/**/*_test.rb'
26
+ test.verbose = true
27
+ end
28
+
29
+ begin
30
+ require 'rcov/rcovtask'
31
+ Rcov::RcovTask.new do |test|
32
+ test.libs << 'test'
33
+ test.pattern = 'test/**/*_test.rb'
34
+ test.verbose = true
35
+ end
36
+ rescue LoadError
37
+ task :rcov do
38
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
39
+ end
40
+ end
41
+
42
+ task :test => :check_dependencies
43
+
44
+ task :default => :test
45
+
46
+ require 'rake/rdoctask'
47
+ Rake::RDocTask.new do |rdoc|
48
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
49
+
50
+ rdoc.rdoc_dir = 'rdoc'
51
+ rdoc.title = "rrbayes #{version}"
52
+ rdoc.rdoc_files.include('README*')
53
+ rdoc.rdoc_files.include('lib/**/*.rb')
54
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,68 @@
1
+ module Rrbayes
2
+
3
+ class Category
4
+
5
+ DOCUMENTS_SCOPE = 'documents'
6
+ EVIDENCES_SCOPE = 'evidences'
7
+
8
+ attr_reader :name
9
+
10
+ def initialize(name, classifier)
11
+ @name = name
12
+ @db = classifier.db
13
+ persist
14
+ end
15
+
16
+ def learn(frequency_map)
17
+ @db.pipelined do |pipe|
18
+ frequency_map.each do |attribute, evidences|
19
+ pipe.incrby(attribute_key(attribute), evidences)
20
+ pipe.incrby(evidences_key, evidences)
21
+ end
22
+ pipe.incr(documents_key)
23
+ pipe.incr(DOCUMENTS_SCOPE)
24
+ end
25
+ end
26
+
27
+ def attributes_score(attributes)
28
+ total = total_evidences.to_f
29
+ attributes.map do |attribute, evidences|
30
+ (evidences_for(attribute) || 0.1).to_f / total
31
+ end.inject(0) do |score, attr_likelyhood|
32
+ score + Math.log(attr_likelyhood)
33
+ end
34
+ end
35
+
36
+ def evidences_for(attribute)
37
+ @db.get(attribute_key(attribute))
38
+ end
39
+
40
+ def total_evidences
41
+ @db.get(evidences_key)
42
+ end
43
+
44
+ private
45
+
46
+ def attribute_key(attribute)
47
+ key_for(name, attribute)
48
+ end
49
+
50
+ def evidences_key
51
+ @evidences_key ||= key_for(EVIDENCES_SCOPE, name)
52
+ end
53
+
54
+ def documents_key
55
+ @documents_key ||= key_for(DOCUMENTS_SCOPE, name)
56
+ end
57
+
58
+ def key_for(*args)
59
+ args.join(':')
60
+ end
61
+
62
+ def persist
63
+ @db.sadd('categories', name)
64
+ end
65
+
66
+ end
67
+
68
+ end
@@ -0,0 +1,54 @@
1
+ module Rrbayes
2
+
3
+ class Classifier
4
+
5
+ attr_reader :categories, :db
6
+
7
+ #creates a new classifier, takes 2 hashes
8
+ #the first hash contains Rrbayes specific options
9
+ #the second hash is passed to the backend Redis#new constructor
10
+ #
11
+ # Rrbayes.new(:categories => %w(spam ham), {:host => '127.0.0.1'})
12
+ #
13
+ def initialize(options = {}, redis_options = {})
14
+ raise "No categories specified for the classifier" unless options[:categories] || load_categories
15
+ @db = Redis.new(redis_options)
16
+ @db.connect_to_server
17
+ @categories = options[:categories].map { |c| Category.new(c, self) }
18
+ end
19
+
20
+ #given a frequency hash and a category, stores teh frequency data
21
+ #for the given catogory
22
+ #
23
+ # classifier = Rrbayes.new(:categories => %w(spam ham))
24
+ # classifier.learn {'viagra' => 1, 'buy' => 1}, :as => 'spam'
25
+ #
26
+ def learn(frequency_map, options)
27
+ category(options[:as]).learn(frequency_map)
28
+ end
29
+
30
+ #given a frequency hash tries to guess to which category
31
+ #the hash is most likely to belong to
32
+ #
33
+ # unknown_data = {'viagra' => 1, 'buy' => 1}
34
+ # classifier.classify(unknown_data)
35
+ # => 'spam'
36
+ #
37
+ def classify(frequency_map)
38
+ @categories.map { |c| [c.name, c.attributes_score(frequency_map)] }.sort_by { |c| -c[1] }.first[0]
39
+ end
40
+
41
+ #Returns the category objects with the name provided in the argument
42
+ def category(name)
43
+ @categories.find { |c| c.name == name}
44
+ end
45
+
46
+ private
47
+
48
+ def load_categories
49
+ @db.set_members('categories')
50
+ end
51
+
52
+ end
53
+
54
+ end
data/lib/rrbayes.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'redis'
2
+ Dir.glob(File.join(File.dirname(__FILE__), "rrbayes", "*.rb")).each { |file| require file }
data/rrbayes.gemspec ADDED
@@ -0,0 +1,59 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{rrbayes}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["hungryblank"]
12
+ s.date = %q{2009-12-22}
13
+ s.description = %q{Implementation of a naive Bayes classifier with a Redis backend}
14
+ s.email = %q{hungryblank@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/rrbayes.rb",
27
+ "lib/rrbayes/category.rb",
28
+ "lib/rrbayes/rrbayes.rb",
29
+ "rrbayes.gemspec",
30
+ "test/rrbayes_test.rb",
31
+ "test/teststrap.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/hungryblank/rrbayes}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.5}
37
+ s.summary = %q{Rrbayes: Ruby Redis Bayes}
38
+ s.test_files = [
39
+ "test/teststrap.rb",
40
+ "test/rrbayes_test.rb"
41
+ ]
42
+
43
+ if s.respond_to? :specification_version then
44
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
48
+ s.add_runtime_dependency(%q<redis>, [">= 0"])
49
+ s.add_development_dependency(%q<riot>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<redis>, [">= 0"])
52
+ s.add_dependency(%q<riot>, [">= 0"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<redis>, [">= 0"])
56
+ s.add_dependency(%q<riot>, [">= 0"])
57
+ end
58
+ end
59
+
@@ -0,0 +1,44 @@
1
+ require 'teststrap'
2
+
3
+ include Rrbayes
4
+
5
+ context "a new classifier" do
6
+
7
+ setup do
8
+ db = Redis.new :db => 'rrbayes_test'
9
+ db.connect_to_server
10
+ db.flushdb
11
+ @classifier = Classifier.new({:categories => %w(spam ham)}, :db => 'rrbayes_test')
12
+ end
13
+
14
+ should("persist categories") { topic.db.set_members('categories') }.equals %w(spam ham)
15
+
16
+ should("load categories") { topic.send(:load_categories) }.equals %w(spam ham)
17
+
18
+ context "in training" do
19
+
20
+ setup do
21
+ spam_frequencies = {'enlarge' => 1, 'your' => 2 ,'viagra' => 3}
22
+ @classifier.learn(spam_frequencies, :as => 'spam')
23
+
24
+ ham_frequencies = {'dear' => 1, 'Jon' => 2 ,'how' => 3}
25
+ @classifier.learn(ham_frequencies, :as => 'ham')
26
+ end
27
+
28
+ should("store evidences number") { @classifier.db['spam:viagra'] }.equals '3'
29
+
30
+ should("store category evidences total") { @classifier.db['documents:spam'] }.equals '1'
31
+
32
+ should("store evidences total") { @classifier.db['documents'] }.equals '2'
33
+
34
+ context "for a while" do
35
+
36
+ should("classify_spam") { @classifier.classify({'viagra' => 1, 'cheap' => 1}) }.equals 'spam'
37
+
38
+ should("classify_ham") { @classifier.classify({'dear' => 1, 'molly' => 2}) }.equals 'ham'
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
data/test/teststrap.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'rubygems'
2
+ require 'riot'
3
+ require 'rrbayes'
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rrbayes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - hungryblank
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-22 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: redis
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: riot
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description: Implementation of a naive Bayes classifier with a Redis backend
36
+ email: hungryblank@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - .document
46
+ - .gitignore
47
+ - LICENSE
48
+ - README.rdoc
49
+ - Rakefile
50
+ - VERSION
51
+ - lib/rrbayes.rb
52
+ - lib/rrbayes/category.rb
53
+ - lib/rrbayes/rrbayes.rb
54
+ - rrbayes.gemspec
55
+ - test/rrbayes_test.rb
56
+ - test/teststrap.rb
57
+ has_rdoc: true
58
+ homepage: http://github.com/hungryblank/rrbayes
59
+ licenses: []
60
+
61
+ post_install_message:
62
+ rdoc_options:
63
+ - --charset=UTF-8
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: "0"
77
+ version:
78
+ requirements: []
79
+
80
+ rubyforge_project:
81
+ rubygems_version: 1.3.5
82
+ signing_key:
83
+ specification_version: 3
84
+ summary: "Rrbayes: Ruby Redis Bayes"
85
+ test_files:
86
+ - test/teststrap.rb
87
+ - test/rrbayes_test.rb