judgee 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Yjg5MGQ0YjhlNTcwYmM4ZTMwNjgxMzBkMGEyNjNmOWI4NWNhZDJmNg==
5
+ data.tar.gz: !binary |-
6
+ ZTkwY2E4ZjZiZjUxNzljOWQwNTcwOWRjZTFiNmVmYmQyODVkNGFjZQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MjJlYTFhMmQ4MzVmZDEyMjk1ZjdlYjk0MjdlOWU3YzI0NzI3ODE4YWIyY2U0
10
+ NWJjOTE2ZmQ2ZjMwMjg5NTJjODUxZmU3Yzg4ODRiZDdkZTM5ZGQyY2Q3ZTFj
11
+ NTJiMmUzOWE3YTMzZWU1NGRlOTljM2MzZGI1NmEyMWRlNTQzOTA=
12
+ data.tar.gz: !binary |-
13
+ ZWQwZmJlNDkxOTgxNmJlNWFjYTM0MWE2Mjc1NzYyMWNlYzZlMTZkNTQxM2Nl
14
+ ODUxYTkzZjU1OWRmNWIzNGQ5OWJhOThmYzMzOTg5NTU3NDdiYTQ2ZTNlNDMx
15
+ MGMyMWE0MWNhMTQyZmNlYzgwODFmYmVlYTMxODhhYTkyMGY5ZjU=
data/README.md CHANGED
@@ -32,7 +32,6 @@ It supports strings, hashes, lists, sets, sorted sets and offers an incredible p
32
32
  judgee = Judgee::Classifier.new(:path => "/tmp/redis.sock")
33
33
 
34
34
 
35
-
36
35
  # Now you can train the classifier
37
36
  judgee.train(:spam, ["bad", "worse", "stupid", "idiotic"])
38
37
  judgee.train(:ham, ["good", "better", "best", "lovely"])
@@ -51,4 +50,17 @@ If you read the source code, you might stumble upon the confusing method names.
51
50
  There are two methods for training (train, train_fast), two methods for untraining (untrain, untrain_fast) and two methods for classification (classify, classify_fast).
52
51
  The difference is quite simple. As the name suggests, all methods with the suffix '_fast' are (really) faster (3x to 10x) in processing the data, but virtually unreadable.
53
52
 
54
- So use the '_fast' methods if you need performance, e.g. in production and the methods without the suffix for learning purposes.
53
+ So use the '_fast' methods if you need performance, e.g. in production and the methods without the suffix for learning purposes.
54
+
55
+ ## For higher performance, use the _fast methods
56
+
57
+ # Now you can train the classifier
58
+ judgee.train_fast(:spam, ["bad", "worse", "stupid", "idiotic"])
59
+ judgee.train_fast(:ham, ["good", "better", "best", "lovely"])
60
+
61
+ # After training, classify your text sample
62
+ judgee.classify_fast(["good", "better", "best", "worse"]) # => :ham
63
+
64
+
65
+ # Want to untrain some words?
66
+ judgee.untrain_fast(:spam, ["bad", "worse"])
@@ -5,9 +5,7 @@ require "redis"
5
5
  module Judgee
6
6
  class Classifier
7
7
 
8
- ###
9
- # Constants
10
- ###
8
+ ### Constants ###
11
9
  CATEGORIES_KEY = "judgee:categories"
12
10
  CATEGORY_KEY = "judgee:category"
13
11
  ALPHA = 1.0
@@ -84,8 +82,8 @@ module Judgee
84
82
  occurances = count_occurance(data)
85
83
 
86
84
  categories.each do |category|
87
- numerator = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))].inject({}) { |hash, (key, value)| hash[key] = value.to_f + ALPHA; hash }
88
- denominator = categories.map { |category| Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] }.inject(Hash.new(0)) { |main_hash, sub_hash| main_hash.merge(sub_hash) { |key, value_first, value_second| value_first.to_f + value_second.to_f} }.inject(Hash.new(0)) { |hash, (key, value)| hash[key] = value.to_f + (ALPHA * data.length); hash }
85
+ numerator = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))].inject({}) { |hash, (key, value)| hash[key] = value.to_f + ALPHA; hash }
86
+ denominator = categories.map { |category| Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] }.inject(Hash.new(0)) { |main_hash, sub_hash| main_hash.merge(sub_hash) { |key, value_first, value_second| value_first.to_f + value_second.to_f} }.inject(Hash.new(0)) { |hash, (key, value)| hash[key] = value.to_f + (ALPHA * data.length); hash }
89
87
  result[category] += numerator.merge(denominator) { |key, value_numerator, value_denominator| (occurances[key] * Math.log(value_numerator / value_denominator)).abs }.values.inject(0, :+)
90
88
  end
91
89
 
@@ -112,7 +110,7 @@ module Judgee
112
110
  def count_occurance(data='')
113
111
  bag_of_words = Hash.new(0)
114
112
 
115
- data = [data].flatten.map! do |word|
113
+ data = [data].flatten.map do |word|
116
114
  word.to_s.strip
117
115
  end.delete_if(&:empty?)
118
116
 
@@ -121,7 +119,7 @@ module Judgee
121
119
  end
122
120
  bag_of_words
123
121
  rescue
124
- raise ArgumentError, 'input must be a single String or an Array of Strings'
122
+ raise ArgumentError, 'Input must be a single String or an Array of Strings'
125
123
  end
126
124
 
127
125
 
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  module Judgee
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: judgee
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
5
- prerelease:
4
+ version: 0.1.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - Railsmechanic
@@ -14,7 +13,6 @@ dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: redis
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ! '>='
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ! '>='
28
25
  - !ruby/object:Gem::Version
@@ -30,7 +27,6 @@ dependencies:
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: rspec
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ! '>='
36
32
  - !ruby/object:Gem::Version
@@ -38,7 +34,6 @@ dependencies:
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ! '>='
44
39
  - !ruby/object:Gem::Version
@@ -62,27 +57,26 @@ files:
62
57
  - spec/judgee_spec.rb
63
58
  homepage: https://github.com/railsmechanic/judgee
64
59
  licenses: []
60
+ metadata: {}
65
61
  post_install_message:
66
62
  rdoc_options: []
67
63
  require_paths:
68
64
  - lib
69
65
  required_ruby_version: !ruby/object:Gem::Requirement
70
- none: false
71
66
  requirements:
72
67
  - - ! '>='
73
68
  - !ruby/object:Gem::Version
74
69
  version: '0'
75
70
  required_rubygems_version: !ruby/object:Gem::Requirement
76
- none: false
77
71
  requirements:
78
72
  - - ! '>='
79
73
  - !ruby/object:Gem::Version
80
74
  version: '0'
81
75
  requirements: []
82
76
  rubyforge_project:
83
- rubygems_version: 1.8.24
77
+ rubygems_version: 2.0.3
84
78
  signing_key:
85
- specification_version: 3
79
+ specification_version: 4
86
80
  summary: Judgee is a simple Bayesian Classifier with additive smoothing, which uses
87
81
  Redis for persistance.
88
82
  test_files: