judgee 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Yjg5MGQ0YjhlNTcwYmM4ZTMwNjgxMzBkMGEyNjNmOWI4NWNhZDJmNg==
5
+ data.tar.gz: !binary |-
6
+ ZTkwY2E4ZjZiZjUxNzljOWQwNTcwOWRjZTFiNmVmYmQyODVkNGFjZQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MjJlYTFhMmQ4MzVmZDEyMjk1ZjdlYjk0MjdlOWU3YzI0NzI3ODE4YWIyY2U0
10
+ NWJjOTE2ZmQ2ZjMwMjg5NTJjODUxZmU3Yzg4ODRiZDdkZTM5ZGQyY2Q3ZTFj
11
+ NTJiMmUzOWE3YTMzZWU1NGRlOTljM2MzZGI1NmEyMWRlNTQzOTA=
12
+ data.tar.gz: !binary |-
13
+ ZWQwZmJlNDkxOTgxNmJlNWFjYTM0MWE2Mjc1NzYyMWNlYzZlMTZkNTQxM2Nl
14
+ ODUxYTkzZjU1OWRmNWIzNGQ5OWJhOThmYzMzOTg5NTU3NDdiYTQ2ZTNlNDMx
15
+ MGMyMWE0MWNhMTQyZmNlYzgwODFmYmVlYTMxODhhYTkyMGY5ZjU=
data/README.md CHANGED
@@ -32,7 +32,6 @@ It supports strings, hashes, lists, sets, sorted sets and offers an incredible p
32
32
  judgee = Judgee::Classifier.new(:path => "/tmp/redis.sock")
33
33
 
34
34
 
35
-
36
35
  # Now you can train the classifier
37
36
  judgee.train(:spam, ["bad", "worse", "stupid", "idiotic"])
38
37
  judgee.train(:ham, ["good", "better", "best", "lovely"])
@@ -51,4 +50,17 @@ If you read the source code, you might stumble upon the confusing method names.
51
50
  There are two methods for training (train, train_fast), two methods for untraining (untrain, untrain_fast) and two methods for classification (classify, classify_fast).
52
51
  The difference is quite simple. As the name suggests, all methods with the suffix '_fast' are (really) faster (3x to 10x) in processing the data, but virtually unreadable.
53
52
 
54
- So use the '_fast' methods if you need performance, e.g. in production and the methods without the suffix for learning purposes.
53
+ So use the '_fast' methods if you need performance, e.g. in production and the methods without the suffix for learning purposes.
54
+
55
+ ## For higher performance, use the _fast methods
56
+
57
+ # Now you can train the classifier
58
+ judgee.train_fast(:spam, ["bad", "worse", "stupid", "idiotic"])
59
+ judgee.train_fast(:ham, ["good", "better", "best", "lovely"])
60
+
61
+ # After training, classify your text sample
62
+ judgee.classify_fast(["good", "better", "best", "worse"]) # => :ham
63
+
64
+
65
+ # Want to untrain some words?
66
+ judgee.untrain_fast(:spam, ["bad", "worse"])
@@ -5,9 +5,7 @@ require "redis"
5
5
  module Judgee
6
6
  class Classifier
7
7
 
8
- ###
9
- # Constants
10
- ###
8
+ ### Constants ###
11
9
  CATEGORIES_KEY = "judgee:categories"
12
10
  CATEGORY_KEY = "judgee:category"
13
11
  ALPHA = 1.0
@@ -84,8 +82,8 @@ module Judgee
84
82
  occurances = count_occurance(data)
85
83
 
86
84
  categories.each do |category|
87
- numerator = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))].inject({}) { |hash, (key, value)| hash[key] = value.to_f + ALPHA; hash }
88
- denominator = categories.map { |category| Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] }.inject(Hash.new(0)) { |main_hash, sub_hash| main_hash.merge(sub_hash) { |key, value_first, value_second| value_first.to_f + value_second.to_f} }.inject(Hash.new(0)) { |hash, (key, value)| hash[key] = value.to_f + (ALPHA * data.length); hash }
85
+ numerator = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))].inject({}) { |hash, (key, value)| hash[key] = value.to_f + ALPHA; hash }
86
+ denominator = categories.map { |category| Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] }.inject(Hash.new(0)) { |main_hash, sub_hash| main_hash.merge(sub_hash) { |key, value_first, value_second| value_first.to_f + value_second.to_f} }.inject(Hash.new(0)) { |hash, (key, value)| hash[key] = value.to_f + (ALPHA * data.length); hash }
89
87
  result[category] += numerator.merge(denominator) { |key, value_numerator, value_denominator| (occurances[key] * Math.log(value_numerator / value_denominator)).abs }.values.inject(0, :+)
90
88
  end
91
89
 
@@ -112,7 +110,7 @@ module Judgee
112
110
  def count_occurance(data='')
113
111
  bag_of_words = Hash.new(0)
114
112
 
115
- data = [data].flatten.map! do |word|
113
+ data = [data].flatten.map do |word|
116
114
  word.to_s.strip
117
115
  end.delete_if(&:empty?)
118
116
 
@@ -121,7 +119,7 @@ module Judgee
121
119
  end
122
120
  bag_of_words
123
121
  rescue
124
- raise ArgumentError, 'input must be a single String or an Array of Strings'
122
+ raise ArgumentError, 'Input must be a single String or an Array of Strings'
125
123
  end
126
124
 
127
125
 
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  module Judgee
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: judgee
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
5
- prerelease:
4
+ version: 0.1.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - Railsmechanic
@@ -14,7 +13,6 @@ dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: redis
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ! '>='
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ! '>='
28
25
  - !ruby/object:Gem::Version
@@ -30,7 +27,6 @@ dependencies:
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: rspec
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ! '>='
36
32
  - !ruby/object:Gem::Version
@@ -38,7 +34,6 @@ dependencies:
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ! '>='
44
39
  - !ruby/object:Gem::Version
@@ -62,27 +57,26 @@ files:
62
57
  - spec/judgee_spec.rb
63
58
  homepage: https://github.com/railsmechanic/judgee
64
59
  licenses: []
60
+ metadata: {}
65
61
  post_install_message:
66
62
  rdoc_options: []
67
63
  require_paths:
68
64
  - lib
69
65
  required_ruby_version: !ruby/object:Gem::Requirement
70
- none: false
71
66
  requirements:
72
67
  - - ! '>='
73
68
  - !ruby/object:Gem::Version
74
69
  version: '0'
75
70
  required_rubygems_version: !ruby/object:Gem::Requirement
76
- none: false
77
71
  requirements:
78
72
  - - ! '>='
79
73
  - !ruby/object:Gem::Version
80
74
  version: '0'
81
75
  requirements: []
82
76
  rubyforge_project:
83
- rubygems_version: 1.8.24
77
+ rubygems_version: 2.0.3
84
78
  signing_key:
85
- specification_version: 3
79
+ specification_version: 4
86
80
  summary: Judgee is a simple Bayesian Classifier with additive smoothing, which uses
87
81
  Redis for persistance.
88
82
  test_files: