bayes_on_redis_internal 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in bayes_on_redis_internal.gemspec
4
+ gemspec
5
+ gem 'redis'
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "bayes_on_redis_internal/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "bayes_on_redis_internal"
7
+ s.version = BayesOnRedisInternal::VERSION
8
+ s.authors = ["Dermot Haughey"]
9
+ s.email = ["hderms@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = "fork of bayes_on_redis that includes more redis integration"
12
+ s.description = "library for performing bayesian analysis by way of redis hashes"
13
+
14
+ s.rubyforge_project = "bayes_on_redis_internal"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ # s.add_development_dependency "rspec"
23
+ # s.add_runtime_dependency "rest-client"
24
+ end
@@ -0,0 +1,120 @@
1
+ require "rubygems"
2
+
3
+ class BayesOnRedis
4
+ CATEGORIES_KEY = "BayesOnRedis:categories"
5
+ ONE_OR_TWO_WORDS_RE = /\b\w{1,2}\b/mi
6
+ NON_ALPHANUMERIC_AND_NON_DOT_RE = /[^\w\.]/mi
7
+
8
+ attr_reader :redis, :stopwords
9
+
10
+ def initialize(options)
11
+ if options.is_a?(Hash)
12
+ @redis = Redis.new(:host => options[:redis_host], :port => options[:redis_port], :db => options[:redis_db])
13
+ else
14
+ @redis = options
15
+ end
16
+
17
+ @stopwords = Stopword.new
18
+ end
19
+
20
+ def flushdb
21
+ @redis.flushdb
22
+ end
23
+
24
+ # training for a category
25
+ def train(category, text)
26
+ category = category.downcase
27
+ @redis.sadd(CATEGORIES_KEY, category)
28
+
29
+ count_occurance(text).each do |word, count|
30
+ @redis.hincrby(redis_category_key(category), word, count)
31
+ end
32
+ end
33
+ alias_method :learn, :train
34
+
35
+ def remove(category)
36
+ category=category.downcase
37
+ @redis.del(redis_category_key(category))
38
+ @redis.srem(CATEGORIES_KEY, category)
39
+ end
40
+ def list_members
41
+ return @redis.smembers (CATEGORIES_KEY)
42
+ end
43
+
44
+ def untrain(category, text)
45
+ category = category.downcase
46
+
47
+ count_occurance(text).each do |word, count|
48
+ word_count_atm = @redis.hget(redis_category_key(category), word)
49
+ if (word_count_atm >= count)
50
+ new_count = (word_count_atm - count)
51
+ else
52
+ new_count = 0
53
+ end
54
+ @redis.hset(redis_category_key(category), word, new_count)
55
+ end
56
+ end
57
+ alias_method :unlearn, :untrain
58
+
59
+ def score(text)
60
+ scores = {}
61
+
62
+ @redis.smembers(CATEGORIES_KEY).each do |category|
63
+ words_count_per_category = @redis.hvals(redis_category_key(category)).inject(0) {|sum, score| sum + score.to_i}
64
+ @redis.srem(CATEGORIES_KEY, category) if words_count_per_category <= 0
65
+
66
+ scores[category] = 0
67
+
68
+ count_occurance(text).each do |word, count|
69
+ tmp_score = @redis.hget(redis_category_key(category), word).to_i
70
+ tmp_score = 0.1 if tmp_score <= 0
71
+
72
+ scores[category] += Math.log(tmp_score / words_count_per_category.to_f)
73
+ end
74
+ end
75
+
76
+ return scores
77
+ end
78
+
79
+ def classify(text)
80
+ (score(text).sort_by { |score| -score[1] })[0][0] # [0][0] -> first score, get the key
81
+ end
82
+
83
+ private
84
+ def redis_category_key(category)
85
+ "BayesOnRedis:cat:#{category}"
86
+ end
87
+
88
+ # Incoming text is always downcased
89
+ def count_occurance(text='')
90
+ raise "input must be instance of String" unless text.is_a?(String)
91
+
92
+ text_chunks = text.downcase.gsub(ONE_OR_TWO_WORDS_RE, '').gsub(NON_ALPHANUMERIC_AND_NON_DOT_RE, ' ').gsub(@stopwords.to_re, '').gsub(/\./, '').split
93
+ text_chunks.inject(Hash.new(0)) do |container, word|
94
+ container[word] += 1; container
95
+ end
96
+ end
97
+
98
+ def remove_stopwords
99
+ @redis.smembers(CATEGORIES_KEY).each do |category|
100
+ @stopwords.to_a.each do |stopword|
101
+ @redis.hdel(redis_category_key(category), stopword)
102
+ end
103
+ end
104
+ end
105
+ end
106
+
107
+
108
+ class Stopword
109
+ def initialize
110
+ @stopwords = File.read(File.expand_path(File.join(__FILE__, "..", "..", "datasets", "stopwords.txt"))).split
111
+ end
112
+
113
+ def to_a
114
+ @stopwords
115
+ end
116
+
117
+ def to_re
118
+ @to_re ||= /\b(#{@stopwords.join('|')})\b/mi
119
+ end
120
+ end
@@ -0,0 +1,3 @@
1
+ module BayesOnRedisInternal
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bayes_on_redis_internal
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dermot Haughey
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-11 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: library for performing bayesian analysis by way of redis hashes
15
+ email:
16
+ - hderms@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - Gemfile
23
+ - Rakefile
24
+ - bayes_on_redis_internal.gemspec
25
+ - lib/bayes_on_redis_internal.rb
26
+ - lib/bayes_on_redis_internal/version.rb
27
+ homepage: ''
28
+ licenses: []
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubyforge_project: bayes_on_redis_internal
47
+ rubygems_version: 1.8.10
48
+ signing_key:
49
+ specification_version: 3
50
+ summary: fork of bayes_on_redis that includes more redis integration
51
+ test_files: []