bayes_on_redis_internal 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in bayes_on_redis_internal.gemspec
4
+ gemspec
5
+ gem 'redis'
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "bayes_on_redis_internal/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "bayes_on_redis_internal"
7
+ s.version = BayesOnRedisInternal::VERSION
8
+ s.authors = ["Dermot Haughey"]
9
+ s.email = ["hderms@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = "fork of bayes_on_redis that includes more redis integration"
12
+ s.description = "library for performing bayesian analysis by way of redis hashes"
13
+
14
+ s.rubyforge_project = "bayes_on_redis_internal"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ # s.add_development_dependency "rspec"
23
+ # s.add_runtime_dependency "rest-client"
24
+ end
@@ -0,0 +1,120 @@
1
+ require "rubygems"
2
+
3
+ class BayesOnRedis
4
+ CATEGORIES_KEY = "BayesOnRedis:categories"
5
+ ONE_OR_TWO_WORDS_RE = /\b\w{1,2}\b/mi
6
+ NON_ALPHANUMERIC_AND_NON_DOT_RE = /[^\w\.]/mi
7
+
8
+ attr_reader :redis, :stopwords
9
+
10
+ def initialize(options)
11
+ if options.is_a?(Hash)
12
+ @redis = Redis.new(:host => options[:redis_host], :port => options[:redis_port], :db => options[:redis_db])
13
+ else
14
+ @redis = options
15
+ end
16
+
17
+ @stopwords = Stopword.new
18
+ end
19
+
20
+ def flushdb
21
+ @redis.flushdb
22
+ end
23
+
24
+ # training for a category
25
+ def train(category, text)
26
+ category = category.downcase
27
+ @redis.sadd(CATEGORIES_KEY, category)
28
+
29
+ count_occurance(text).each do |word, count|
30
+ @redis.hincrby(redis_category_key(category), word, count)
31
+ end
32
+ end
33
+ alias_method :learn, :train
34
+
35
+ def remove(category)
36
+ category=category.downcase
37
+ @redis.del(redis_category_key(category))
38
+ @redis.srem(CATEGORIES_KEY, category)
39
+ end
40
+ def list_members
41
+ return @redis.smembers (CATEGORIES_KEY)
42
+ end
43
+
44
+ def untrain(category, text)
45
+ category = category.downcase
46
+
47
+ count_occurance(text).each do |word, count|
48
+ word_count_atm = @redis.hget(redis_category_key(category), word)
49
+ if (word_count_atm >= count)
50
+ new_count = (word_count_atm - count)
51
+ else
52
+ new_count = 0
53
+ end
54
+ @redis.hset(redis_category_key(category), word, new_count)
55
+ end
56
+ end
57
+ alias_method :unlearn, :untrain
58
+
59
+ def score(text)
60
+ scores = {}
61
+
62
+ @redis.smembers(CATEGORIES_KEY).each do |category|
63
+ words_count_per_category = @redis.hvals(redis_category_key(category)).inject(0) {|sum, score| sum + score.to_i}
64
+ @redis.srem(CATEGORIES_KEY, category) if words_count_per_category <= 0
65
+
66
+ scores[category] = 0
67
+
68
+ count_occurance(text).each do |word, count|
69
+ tmp_score = @redis.hget(redis_category_key(category), word).to_i
70
+ tmp_score = 0.1 if tmp_score <= 0
71
+
72
+ scores[category] += Math.log(tmp_score / words_count_per_category.to_f)
73
+ end
74
+ end
75
+
76
+ return scores
77
+ end
78
+
79
+ def classify(text)
80
+ (score(text).sort_by { |score| -score[1] })[0][0] # [0][0] -> first score, get the key
81
+ end
82
+
83
+ private
84
+ def redis_category_key(category)
85
+ "BayesOnRedis:cat:#{category}"
86
+ end
87
+
88
+ # Incoming text is always downcased
89
+ def count_occurance(text='')
90
+ raise "input must be instance of String" unless text.is_a?(String)
91
+
92
+ text_chunks = text.downcase.gsub(ONE_OR_TWO_WORDS_RE, '').gsub(NON_ALPHANUMERIC_AND_NON_DOT_RE, ' ').gsub(@stopwords.to_re, '').gsub(/\./, '').split
93
+ text_chunks.inject(Hash.new(0)) do |container, word|
94
+ container[word] += 1; container
95
+ end
96
+ end
97
+
98
+ def remove_stopwords
99
+ @redis.smembers(CATEGORIES_KEY).each do |category|
100
+ @stopwords.to_a.each do |stopword|
101
+ @redis.hdel(redis_category_key(category), stopword)
102
+ end
103
+ end
104
+ end
105
+ end
106
+
107
+
108
+ class Stopword
109
+ def initialize
110
+ @stopwords = File.read(File.expand_path(File.join(__FILE__, "..", "..", "datasets", "stopwords.txt"))).split
111
+ end
112
+
113
+ def to_a
114
+ @stopwords
115
+ end
116
+
117
+ def to_re
118
+ @to_re ||= /\b(#{@stopwords.join('|')})\b/mi
119
+ end
120
+ end
@@ -0,0 +1,3 @@
1
+ module BayesOnRedisInternal
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bayes_on_redis_internal
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dermot Haughey
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-11 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: library for performing bayesian analysis by way of redis hashes
15
+ email:
16
+ - hderms@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - Gemfile
23
+ - Rakefile
24
+ - bayes_on_redis_internal.gemspec
25
+ - lib/bayes_on_redis_internal.rb
26
+ - lib/bayes_on_redis_internal/version.rb
27
+ homepage: ''
28
+ licenses: []
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubyforge_project: bayes_on_redis_internal
47
+ rubygems_version: 1.8.10
48
+ signing_key:
49
+ specification_version: 3
50
+ summary: fork of bayes_on_redis that includes more redis integration
51
+ test_files: []