bayes_on_redis 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/README.markdown +13 -0
  2. data/lib/bayes_on_redis.rb +74 -0
  3. metadata +68 -0
data/README.markdown ADDED
@@ -0,0 +1,13 @@
1
+ # bayes_on_redis
2
+
3
+ Bayesian classifier on top of Redis
4
+
5
+ ## Why on Redis?
6
+
7
+ Redis is perfect for building fast bayesian filter.
8
+
9
+ ## Getting started
10
+
11
+ ## Contributing
12
+
13
+ [Fork the project](http://github.com/didip/bayes_on_redis) and send pull requests.
@@ -0,0 +1,74 @@
1
+ require "rubygems"
2
+ require "redis"
3
+
4
+ class BayesOnRedis
5
+ CATEGORIES_KEY = "BayesOnRedis:categories"
6
+
7
+ def initialize(options)
8
+ @redis = Redis.new(:host => options[:redis_host], :port => options[:redis_port], :db => options[:redis_db])
9
+ end
10
+
11
+ def flushdb
12
+ @redis.flushdb
13
+ end
14
+
15
+ # training for a category
16
+ def train(category, text)
17
+ category = category.downcase
18
+ @redis.sadd(CATEGORIES_KEY, category)
19
+
20
+ count_occurance(text).each do |word, count|
21
+ @redis.hincrby(redis_category_key(category), word, count)
22
+ end
23
+ end
24
+
25
+ def untrain(category, text)
26
+ category = category.downcase
27
+
28
+ count_occurance(text).each do |word, count|
29
+ word_count_atm = @redis.hget(redis_category_key(category), word)
30
+ if (word_count_atm >= count)
31
+ new_count = (word_count_atm - count)
32
+ else
33
+ new_count = 0
34
+ end
35
+ @redis.hset(redis_category_key(category), word, new_count)
36
+ end
37
+ end
38
+
39
+ def classify(text)
40
+ scores = {}
41
+
42
+ @redis.smembers(CATEGORIES_KEY).each do |category|
43
+ words_count_per_category = @redis.hvals(redis_category_key(category)).inject(0) {|sum, score| sum + score.to_i}
44
+ @redis.srem(CATEGORIES_KEY, category) if words_count_per_category <= 0
45
+
46
+ scores[category] = 0
47
+
48
+ count_occurance(text).each do |word, count|
49
+ tmp_score = @redis.hget(redis_category_key(category), word).to_i
50
+ tmp_score = 0.1 if tmp_score <= 0
51
+
52
+ scores[category] += Math.log(tmp_score / words_count_per_category.to_f)
53
+ end
54
+ end
55
+
56
+ return scores
57
+ end
58
+
59
+ def classify_for_human(text)
60
+ (classify(text).sort_by { |score| -score[1] })[0][0] # [0][0] -> first score, get the key
61
+ end
62
+
63
+ private
64
+ def redis_category_key(category)
65
+ "BayesOnRedis:cat:#{category}"
66
+ end
67
+
68
+ # Incoming text is always downcased
69
+ def count_occurance(text)
70
+ text.downcase.split.inject(Hash.new(0)) do |container, word|
71
+ container[word] += 1; container
72
+ end
73
+ end
74
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bayes_on_redis
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Didip Kerabat
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-24 00:00:00 -08:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: bayes_on_redis library provides bayesian classification on a given text similar to many SPAM/HAM filtering.
23
+ email: didipk@gmail.com
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - README.markdown
32
+ - lib/bayes_on_redis.rb
33
+ has_rdoc: true
34
+ homepage: https://github.com/didip/bayes_on_redis
35
+ licenses: []
36
+
37
+ post_install_message:
38
+ rdoc_options: []
39
+
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ hash: 3
48
+ segments:
49
+ - 0
50
+ version: "0"
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ requirements: []
61
+
62
+ rubyforge_project:
63
+ rubygems_version: 1.3.7
64
+ signing_key:
65
+ specification_version: 3
66
+ summary: Bayesian filter on top of Redis
67
+ test_files: []
68
+