rrbayes 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 hungryblank
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,40 @@
1
+ = Rrbayes (Ruby Redis Bayes)
2
+
3
+
4
+ * http://github.com/hungryblank/rrbayes
5
+
6
+ == Description
7
+
8
+ Rrbayes is a basic and simplified implementation of a naive bayesian classifier.
9
+
10
+ It uses Redis http://github.com/antirez/redis to store the information
11
+ necessary for the classification, this gives Rrbayes persistency and
12
+ parallel processing support
13
+
14
+ Rrbayes has been inspired by http://github.com/cardmagic/classifier
15
+
16
+ == Development status
17
+
18
+ At this stage Rrbayes is more than anything else an excuse to use Redis
19
+
20
+ == Usage
21
+
22
+ require 'rrbayes'
23
+
24
+ classifier = Rrbayes::Classifier.new :categories => :spam, :ham
25
+
26
+ classifier.learn({'enlarge' => 1, 'your' => 2 ,'viagra' => 3} :as => 'spam')
27
+
28
+ classifier.learn({'dear' => 1, 'Jon' => 2 ,'how' => 3}, :as => 'ham'))
29
+
30
+ classifier.classify({'viagra' => 1, 'cheap' => 1})
31
+ => 'spam'
32
+
33
+ == Thanks
34
+
35
+ * Salvatore Sanfilippo for Redis
36
+ * Rrbayes has been inspired by http://github.com/cardmagic/classifier
37
+
38
+ == Author
39
+
40
+ Copyright (c) 2009 hungryblank. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,54 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "rrbayes"
8
+ gem.summary = %Q{Rrbayes: Ruby Redis Bayes}
9
+ gem.description = %Q{Implementation of a naive Bayes classifier with a Redis backend}
10
+ gem.email = "hungryblank@gmail.com"
11
+ gem.homepage = "http://github.com/hungryblank/rrbayes"
12
+ gem.authors = ["hungryblank"]
13
+ gem.add_dependency "redis", ">= 0"
14
+ gem.add_development_dependency "riot", ">= 0"
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'rake/testtask'
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.libs << 'lib' << 'test'
25
+ test.pattern = 'test/**/*_test.rb'
26
+ test.verbose = true
27
+ end
28
+
29
+ begin
30
+ require 'rcov/rcovtask'
31
+ Rcov::RcovTask.new do |test|
32
+ test.libs << 'test'
33
+ test.pattern = 'test/**/*_test.rb'
34
+ test.verbose = true
35
+ end
36
+ rescue LoadError
37
+ task :rcov do
38
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
39
+ end
40
+ end
41
+
42
+ task :test => :check_dependencies
43
+
44
+ task :default => :test
45
+
46
+ require 'rake/rdoctask'
47
+ Rake::RDocTask.new do |rdoc|
48
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
49
+
50
+ rdoc.rdoc_dir = 'rdoc'
51
+ rdoc.title = "rrbayes #{version}"
52
+ rdoc.rdoc_files.include('README*')
53
+ rdoc.rdoc_files.include('lib/**/*.rb')
54
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,68 @@
1
+ module Rrbayes
2
+
3
+ class Category
4
+
5
+ DOCUMENTS_SCOPE = 'documents'
6
+ EVIDENCES_SCOPE = 'evidences'
7
+
8
+ attr_reader :name
9
+
10
+ def initialize(name, classifier)
11
+ @name = name
12
+ @db = classifier.db
13
+ persist
14
+ end
15
+
16
+ def learn(frequency_map)
17
+ @db.pipelined do |pipe|
18
+ frequency_map.each do |attribute, evidences|
19
+ pipe.incrby(attribute_key(attribute), evidences)
20
+ pipe.incrby(evidences_key, evidences)
21
+ end
22
+ pipe.incr(documents_key)
23
+ pipe.incr(DOCUMENTS_SCOPE)
24
+ end
25
+ end
26
+
27
+ def attributes_score(attributes)
28
+ total = total_evidences.to_f
29
+ attributes.map do |attribute, evidences|
30
+ (evidences_for(attribute) || 0.1).to_f / total
31
+ end.inject(0) do |score, attr_likelyhood|
32
+ score + Math.log(attr_likelyhood)
33
+ end
34
+ end
35
+
36
+ def evidences_for(attribute)
37
+ @db.get(attribute_key(attribute))
38
+ end
39
+
40
+ def total_evidences
41
+ @db.get(evidences_key)
42
+ end
43
+
44
+ private
45
+
46
+ def attribute_key(attribute)
47
+ key_for(name, attribute)
48
+ end
49
+
50
+ def evidences_key
51
+ @evidences_key ||= key_for(EVIDENCES_SCOPE, name)
52
+ end
53
+
54
+ def documents_key
55
+ @documents_key ||= key_for(DOCUMENTS_SCOPE, name)
56
+ end
57
+
58
+ def key_for(*args)
59
+ args.join(':')
60
+ end
61
+
62
+ def persist
63
+ @db.sadd('categories', name)
64
+ end
65
+
66
+ end
67
+
68
+ end
@@ -0,0 +1,54 @@
1
+ module Rrbayes
2
+
3
+ class Classifier
4
+
5
+ attr_reader :categories, :db
6
+
7
+ #creates a new classifier, takes 2 hashes
8
+ #the first hash contains Rrbayes specific options
9
+ #the second hash is passed to the backend Redis#new constructor
10
+ #
11
+ # Rrbayes.new(:categories => %w(spam ham), {:host => '127.0.0.1'})
12
+ #
13
+ def initialize(options = {}, redis_options = {})
14
+ raise "No categories specified for the classifier" unless options[:categories] || load_categories
15
+ @db = Redis.new(redis_options)
16
+ @db.connect_to_server
17
+ @categories = options[:categories].map { |c| Category.new(c, self) }
18
+ end
19
+
20
+ #given a frequency hash and a category, stores teh frequency data
21
+ #for the given catogory
22
+ #
23
+ # classifier = Rrbayes.new(:categories => %w(spam ham))
24
+ # classifier.learn {'viagra' => 1, 'buy' => 1}, :as => 'spam'
25
+ #
26
+ def learn(frequency_map, options)
27
+ category(options[:as]).learn(frequency_map)
28
+ end
29
+
30
+ #given a frequency hash tries to guess to which category
31
+ #the hash is most likely to belong to
32
+ #
33
+ # unknown_data = {'viagra' => 1, 'buy' => 1}
34
+ # classifier.classify(unknown_data)
35
+ # => 'spam'
36
+ #
37
+ def classify(frequency_map)
38
+ @categories.map { |c| [c.name, c.attributes_score(frequency_map)] }.sort_by { |c| -c[1] }.first[0]
39
+ end
40
+
41
+ #Returns the category objects with the name provided in the argument
42
+ def category(name)
43
+ @categories.find { |c| c.name == name}
44
+ end
45
+
46
+ private
47
+
48
+ def load_categories
49
+ @db.set_members('categories')
50
+ end
51
+
52
+ end
53
+
54
+ end
data/lib/rrbayes.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'redis'
2
+ Dir.glob(File.join(File.dirname(__FILE__), "rrbayes", "*.rb")).each { |file| require file }
data/rrbayes.gemspec ADDED
@@ -0,0 +1,59 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{rrbayes}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["hungryblank"]
12
+ s.date = %q{2009-12-22}
13
+ s.description = %q{Implementation of a naive Bayes classifier with a Redis backend}
14
+ s.email = %q{hungryblank@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/rrbayes.rb",
27
+ "lib/rrbayes/category.rb",
28
+ "lib/rrbayes/rrbayes.rb",
29
+ "rrbayes.gemspec",
30
+ "test/rrbayes_test.rb",
31
+ "test/teststrap.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/hungryblank/rrbayes}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.5}
37
+ s.summary = %q{Rrbayes: Ruby Redis Bayes}
38
+ s.test_files = [
39
+ "test/teststrap.rb",
40
+ "test/rrbayes_test.rb"
41
+ ]
42
+
43
+ if s.respond_to? :specification_version then
44
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
48
+ s.add_runtime_dependency(%q<redis>, [">= 0"])
49
+ s.add_development_dependency(%q<riot>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<redis>, [">= 0"])
52
+ s.add_dependency(%q<riot>, [">= 0"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<redis>, [">= 0"])
56
+ s.add_dependency(%q<riot>, [">= 0"])
57
+ end
58
+ end
59
+
@@ -0,0 +1,44 @@
1
+ require 'teststrap'
2
+
3
+ include Rrbayes
4
+
5
+ context "a new classifier" do
6
+
7
+ setup do
8
+ db = Redis.new :db => 'rrbayes_test'
9
+ db.connect_to_server
10
+ db.flushdb
11
+ @classifier = Classifier.new({:categories => %w(spam ham)}, :db => 'rrbayes_test')
12
+ end
13
+
14
+ should("persist categories") { topic.db.set_members('categories') }.equals %w(spam ham)
15
+
16
+ should("load categories") { topic.send(:load_categories) }.equals %w(spam ham)
17
+
18
+ context "in training" do
19
+
20
+ setup do
21
+ spam_frequencies = {'enlarge' => 1, 'your' => 2 ,'viagra' => 3}
22
+ @classifier.learn(spam_frequencies, :as => 'spam')
23
+
24
+ ham_frequencies = {'dear' => 1, 'Jon' => 2 ,'how' => 3}
25
+ @classifier.learn(ham_frequencies, :as => 'ham')
26
+ end
27
+
28
+ should("store evidences number") { @classifier.db['spam:viagra'] }.equals '3'
29
+
30
+ should("store category evidences total") { @classifier.db['documents:spam'] }.equals '1'
31
+
32
+ should("store evidences total") { @classifier.db['documents'] }.equals '2'
33
+
34
+ context "for a while" do
35
+
36
+ should("classify_spam") { @classifier.classify({'viagra' => 1, 'cheap' => 1}) }.equals 'spam'
37
+
38
+ should("classify_ham") { @classifier.classify({'dear' => 1, 'molly' => 2}) }.equals 'ham'
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
data/test/teststrap.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'rubygems'
2
+ require 'riot'
3
+ require 'rrbayes'
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rrbayes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - hungryblank
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-22 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: redis
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: riot
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description: Implementation of a naive Bayes classifier with a Redis backend
36
+ email: hungryblank@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - .document
46
+ - .gitignore
47
+ - LICENSE
48
+ - README.rdoc
49
+ - Rakefile
50
+ - VERSION
51
+ - lib/rrbayes.rb
52
+ - lib/rrbayes/category.rb
53
+ - lib/rrbayes/rrbayes.rb
54
+ - rrbayes.gemspec
55
+ - test/rrbayes_test.rb
56
+ - test/teststrap.rb
57
+ has_rdoc: true
58
+ homepage: http://github.com/hungryblank/rrbayes
59
+ licenses: []
60
+
61
+ post_install_message:
62
+ rdoc_options:
63
+ - --charset=UTF-8
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: "0"
77
+ version:
78
+ requirements: []
79
+
80
+ rubyforge_project:
81
+ rubygems_version: 1.3.5
82
+ signing_key:
83
+ specification_version: 3
84
+ summary: "Rrbayes: Ruby Redis Bayes"
85
+ test_files:
86
+ - test/teststrap.rb
87
+ - test/rrbayes_test.rb