judgee 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/README.md +14 -2
- data/lib/judgee/classifier.rb +5 -7
- data/lib/judgee/version.rb +1 -1
- metadata +4 -10
checksums.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
---
|
|
2
|
+
!binary "U0hBMQ==":
|
|
3
|
+
metadata.gz: !binary |-
|
|
4
|
+
Yjg5MGQ0YjhlNTcwYmM4ZTMwNjgxMzBkMGEyNjNmOWI4NWNhZDJmNg==
|
|
5
|
+
data.tar.gz: !binary |-
|
|
6
|
+
ZTkwY2E4ZjZiZjUxNzljOWQwNTcwOWRjZTFiNmVmYmQyODVkNGFjZQ==
|
|
7
|
+
!binary "U0hBNTEy":
|
|
8
|
+
metadata.gz: !binary |-
|
|
9
|
+
MjJlYTFhMmQ4MzVmZDEyMjk1ZjdlYjk0MjdlOWU3YzI0NzI3ODE4YWIyY2U0
|
|
10
|
+
NWJjOTE2ZmQ2ZjMwMjg5NTJjODUxZmU3Yzg4ODRiZDdkZTM5ZGQyY2Q3ZTFj
|
|
11
|
+
NTJiMmUzOWE3YTMzZWU1NGRlOTljM2MzZGI1NmEyMWRlNTQzOTA=
|
|
12
|
+
data.tar.gz: !binary |-
|
|
13
|
+
ZWQwZmJlNDkxOTgxNmJlNWFjYTM0MWE2Mjc1NzYyMWNlYzZlMTZkNTQxM2Nl
|
|
14
|
+
ODUxYTkzZjU1OWRmNWIzNGQ5OWJhOThmYzMzOTg5NTU3NDdiYTQ2ZTNlNDMx
|
|
15
|
+
MGMyMWE0MWNhMTQyZmNlYzgwODFmYmVlYTMxODhhYTkyMGY5ZjU=
|
data/README.md
CHANGED
|
@@ -32,7 +32,6 @@ It supports strings, hashes, lists, sets, sorted sets and offers an incredible p
|
|
|
32
32
|
judgee = Judgee::Classifier.new(:path => "/tmp/redis.sock")
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
|
|
36
35
|
# Now you can train the classifier
|
|
37
36
|
judgee.train(:spam, ["bad", "worse", "stupid", "idiotic"])
|
|
38
37
|
judgee.train(:ham, ["good", "better", "best", "lovely"])
|
|
@@ -51,4 +50,17 @@ If you read the source code, you might stumble upon the confusing method names.
|
|
|
51
50
|
There are two methods for training (train, train_fast), two methods for untraining (untrain, untrain_fast) and two methods for classification (classify, classify_fast).
|
|
52
51
|
The difference is quite simple. As the name suggests, all methods with the suffix '_fast' are (really) faster (3x to 10x) in processing the data, but virtually unreadable.
|
|
53
52
|
|
|
54
|
-
So use the '_fast' methods if you need performance, e.g. in production and the methods without the suffix for learning purposes.
|
|
53
|
+
So use the '_fast' methods if you need performance, e.g. in production and the methods without the suffix for learning purposes.
|
|
54
|
+
|
|
55
|
+
## For higher performance, use the _fast methods
|
|
56
|
+
|
|
57
|
+
# Now you can train the classifier
|
|
58
|
+
judgee.train_fast(:spam, ["bad", "worse", "stupid", "idiotic"])
|
|
59
|
+
judgee.train_fast(:ham, ["good", "better", "best", "lovely"])
|
|
60
|
+
|
|
61
|
+
# After training, classify your text sample
|
|
62
|
+
judgee.classify_fast(["good", "better", "best", "worse"]) # => :ham
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Want to untrain some words?
|
|
66
|
+
judgee.untrain_fast(:spam, ["bad", "worse"])
|
data/lib/judgee/classifier.rb
CHANGED
|
@@ -5,9 +5,7 @@ require "redis"
|
|
|
5
5
|
module Judgee
|
|
6
6
|
class Classifier
|
|
7
7
|
|
|
8
|
-
###
|
|
9
|
-
# Constants
|
|
10
|
-
###
|
|
8
|
+
### Constants ###
|
|
11
9
|
CATEGORIES_KEY = "judgee:categories"
|
|
12
10
|
CATEGORY_KEY = "judgee:category"
|
|
13
11
|
ALPHA = 1.0
|
|
@@ -84,8 +82,8 @@ module Judgee
|
|
|
84
82
|
occurances = count_occurance(data)
|
|
85
83
|
|
|
86
84
|
categories.each do |category|
|
|
87
|
-
numerator
|
|
88
|
-
denominator
|
|
85
|
+
numerator = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))].inject({}) { |hash, (key, value)| hash[key] = value.to_f + ALPHA; hash }
|
|
86
|
+
denominator = categories.map { |category| Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] }.inject(Hash.new(0)) { |main_hash, sub_hash| main_hash.merge(sub_hash) { |key, value_first, value_second| value_first.to_f + value_second.to_f} }.inject(Hash.new(0)) { |hash, (key, value)| hash[key] = value.to_f + (ALPHA * data.length); hash }
|
|
89
87
|
result[category] += numerator.merge(denominator) { |key, value_numerator, value_denominator| (occurances[key] * Math.log(value_numerator / value_denominator)).abs }.values.inject(0, :+)
|
|
90
88
|
end
|
|
91
89
|
|
|
@@ -112,7 +110,7 @@ module Judgee
|
|
|
112
110
|
def count_occurance(data='')
|
|
113
111
|
bag_of_words = Hash.new(0)
|
|
114
112
|
|
|
115
|
-
data = [data].flatten.map
|
|
113
|
+
data = [data].flatten.map do |word|
|
|
116
114
|
word.to_s.strip
|
|
117
115
|
end.delete_if(&:empty?)
|
|
118
116
|
|
|
@@ -121,7 +119,7 @@ module Judgee
|
|
|
121
119
|
end
|
|
122
120
|
bag_of_words
|
|
123
121
|
rescue
|
|
124
|
-
raise ArgumentError, '
|
|
122
|
+
raise ArgumentError, 'Input must be a single String or an Array of Strings'
|
|
125
123
|
end
|
|
126
124
|
|
|
127
125
|
|
data/lib/judgee/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: judgee
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
5
|
-
prerelease:
|
|
4
|
+
version: 0.1.1
|
|
6
5
|
platform: ruby
|
|
7
6
|
authors:
|
|
8
7
|
- Railsmechanic
|
|
@@ -14,7 +13,6 @@ dependencies:
|
|
|
14
13
|
- !ruby/object:Gem::Dependency
|
|
15
14
|
name: redis
|
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
|
17
|
-
none: false
|
|
18
16
|
requirements:
|
|
19
17
|
- - ! '>='
|
|
20
18
|
- !ruby/object:Gem::Version
|
|
@@ -22,7 +20,6 @@ dependencies:
|
|
|
22
20
|
type: :runtime
|
|
23
21
|
prerelease: false
|
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
-
none: false
|
|
26
23
|
requirements:
|
|
27
24
|
- - ! '>='
|
|
28
25
|
- !ruby/object:Gem::Version
|
|
@@ -30,7 +27,6 @@ dependencies:
|
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
|
31
28
|
name: rspec
|
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
|
33
|
-
none: false
|
|
34
30
|
requirements:
|
|
35
31
|
- - ! '>='
|
|
36
32
|
- !ruby/object:Gem::Version
|
|
@@ -38,7 +34,6 @@ dependencies:
|
|
|
38
34
|
type: :development
|
|
39
35
|
prerelease: false
|
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
41
|
-
none: false
|
|
42
37
|
requirements:
|
|
43
38
|
- - ! '>='
|
|
44
39
|
- !ruby/object:Gem::Version
|
|
@@ -62,27 +57,26 @@ files:
|
|
|
62
57
|
- spec/judgee_spec.rb
|
|
63
58
|
homepage: https://github.com/railsmechanic/judgee
|
|
64
59
|
licenses: []
|
|
60
|
+
metadata: {}
|
|
65
61
|
post_install_message:
|
|
66
62
|
rdoc_options: []
|
|
67
63
|
require_paths:
|
|
68
64
|
- lib
|
|
69
65
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
70
|
-
none: false
|
|
71
66
|
requirements:
|
|
72
67
|
- - ! '>='
|
|
73
68
|
- !ruby/object:Gem::Version
|
|
74
69
|
version: '0'
|
|
75
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
|
-
none: false
|
|
77
71
|
requirements:
|
|
78
72
|
- - ! '>='
|
|
79
73
|
- !ruby/object:Gem::Version
|
|
80
74
|
version: '0'
|
|
81
75
|
requirements: []
|
|
82
76
|
rubyforge_project:
|
|
83
|
-
rubygems_version:
|
|
77
|
+
rubygems_version: 2.0.3
|
|
84
78
|
signing_key:
|
|
85
|
-
specification_version:
|
|
79
|
+
specification_version: 4
|
|
86
80
|
summary: Judgee is a simple Bayesian Classifier with additive smoothing, which uses
|
|
87
81
|
Redis for persistance.
|
|
88
82
|
test_files:
|