omnicat-bayes 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +122 -0
- data/Rakefile +20 -0
- data/lib/omnicat/bayes/version.rb +5 -0
- data/lib/omnicat/bayes.rb +3 -0
- data/lib/omnicat/classifiers/bayes.rb +153 -0
- data/lib/omnicat/classifiers/bayes_internals/category.rb +14 -0
- data/lib/test/test_helper.rb +2 -0
- data/lib/test/unit/bayes_test.rb +1 -0
- data/lib/test/unit/classifiers/bayes_test.rb +142 -0
- data/omnicat-bayes.gemspec +24 -0
- metadata +107 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Mustafa Turan
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
# OmniCat Bayes
|
2
|
+
|
3
|
+
[](https://travis-ci.org/mustafaturan/omnicat-bayes) [](https://codeclimate.com/github/mustafaturan/omnicat-bayes)
|
4
|
+
|
5
|
+
A Naive Bayes text classification implementation as an OmniCat classifier strategy.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'omnicat-bayes'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install omnicat-bayes
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
See rdoc for detailed usage.
|
24
|
+
|
25
|
+
### Configurations
|
26
|
+
|
27
|
+
Optional configuration sample:
|
28
|
+
|
29
|
+
OmniCat.configure do |config|
|
30
|
+
config.exclude_tokens = ['something', 'anything'] # exclude token list
|
31
|
+
config.token_patterns = {
|
32
|
+
# exclude token Regex patterns
|
33
|
+
minus: [/[\s\t\n\r]+/, /(@[\w\d]+)/],
|
34
|
+
# include token Regex patterns
|
35
|
+
plus: [/[\p{L}\-0-9]{2,}/, /[\!\?]/, /[\:\)\(\;\-\|]{2,3}/]
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
### Bayes classifier
|
40
|
+
Create a classifier object with Bayes strategy.
|
41
|
+
|
42
|
+
# If you need to change strategy on runtime, you should prefer this inialization
|
43
|
+
bayes = OmniCat::Classifier.new(OmniCat::Classifiers::Bayes.new)
|
44
|
+
or
|
45
|
+
|
46
|
+
# If you only need to use only Bayes classification, then you can use
|
47
|
+
bayes = OmniCat::Classifiers::Bayes.new
|
48
|
+
|
49
|
+
### Create categories
|
50
|
+
Create a classification category.
|
51
|
+
|
52
|
+
bayes.add_category('positive')
|
53
|
+
bayes.add_category('negative')
|
54
|
+
|
55
|
+
### Train
|
56
|
+
Train category with a document.
|
57
|
+
|
58
|
+
bayes.train('positive', 'great if you are in a slap happy mood .')
|
59
|
+
bayes.train('negative', 'bad tracking issue')
|
60
|
+
|
61
|
+
### Train batch
|
62
|
+
Train category with multiple documents.
|
63
|
+
|
64
|
+
bayes.train_batch('positive', [
|
65
|
+
'a feel-good picture in the best sense of the term...',
|
66
|
+
'it is a feel-good movie about which you can actually feel good.',
|
67
|
+
'love and money both of them are good choises'
|
68
|
+
])
|
69
|
+
bayes.train_batch('negative', [
|
70
|
+
'simplistic , silly and tedious .',
|
71
|
+
'interesting , but not compelling . ',
|
72
|
+
'seems clever but not especially compelling'
|
73
|
+
])
|
74
|
+
|
75
|
+
### Classify
|
76
|
+
Classify a document.
|
77
|
+
|
78
|
+
result = bayes.classify('I feel so good and happy')
|
79
|
+
=> #<OmniCat::Result:0x007fd20296aad8 @category={:name=>"positive", :percentage=>73}, @scores={"positive"=>5.4253472222222225e-09, "negative"=>1.9600796074572086e-09}, @total_score=7.385426829679431e-09>
|
80
|
+
result.to_hash
|
81
|
+
=> {:category=>{:name=>"positive", :percentage=>73}, :scores=>{"positive"=>5.4253472222222225e-09, "negative"=>1.9600796074572086e-09}, :total_score=>7.385426829679431e-09}
|
82
|
+
|
83
|
+
### Classify batch
|
84
|
+
Classify multiple documents at a time.
|
85
|
+
|
86
|
+
results = bayes.classify_batch(
|
87
|
+
[
|
88
|
+
'the movie is silly so not compelling enough',
|
89
|
+
'a good piece of work'
|
90
|
+
]
|
91
|
+
)
|
92
|
+
=> [#<OmniCat::Result:0x007fd2029341b8 @category={:name=>"negative", :percentage=>78}, @scores={"positive"=>2.5521869888765736e-14, "negative"=>9.074442627116706e-14}, @total_score=1.162662961599328e-13>, #<OmniCat::Result:0x007fd20292e7e0 @category={:name=>"positive", :percentage=>80}, @scores={"positive"=>2.411265432098765e-07, "negative"=>5.880238822371627e-08}, @total_score=2.999289314335928e-07>]
|
93
|
+
|
94
|
+
### Convert to hash
|
95
|
+
Convert full Bayes object to hash.
|
96
|
+
|
97
|
+
# For storing, restoring modal data
|
98
|
+
bayes_hash = bayes.to_hash
|
99
|
+
=> {:categories=>{"positive"=>{:doc_count=>4, :tokens=>{"great"=>1, "if"=>1, "you"=>2, "are"=>2, "in"=>2, "slap"=>1, "happy"=>1, "mood"=>1, "feel-good"=>2, "picture"=>1, "the"=>2, "best"=>1, "sense"=>1, "of"=>2, "term"=>1, "it"=>1, "is"=>1, "movie"=>1, "about"=>1, "which"=>1, "can"=>1, "actually"=>1, "feel"=>1, "good"=>2, "love"=>1, "and"=>1, "money"=>1, "both"=>1, "them"=>1, "choises"=>1}, :token_count=>37}, "negative"=>{:doc_count=>4, :tokens=>{"bad"=>1, "tracking"=>1, "issue"=>1, "simplistic"=>1, "silly"=>1, "and"=>1, "tedious"=>1, "interesting"=>1, "but"=>2, "not"=>2, "compelling"=>2, "seems"=>1, "clever"=>1, "especially"=>1}, :token_count=>17}}, :category_count=>2, :doc_count=>8, :k_value=>1.0, :token_count=>54, :uniq_token_count=>43}
|
100
|
+
|
101
|
+
### Load from hash
|
102
|
+
Load full Bayes object from hash.
|
103
|
+
|
104
|
+
another_bayes_obj = OmniCat::Classifiers::Bayes.new(bayes_hash)
|
105
|
+
=> #<OmniCat::Classifiers::Bayes:0x007fd20308cff0 @categories={"positive"=>#<OmniCat::Classifiers::BayesInternals::Category:0x007fd20308cf78 @doc_count=4, @tokens={"great"=>1, "if"=>1, "you"=>2, "are"=>2, "in"=>2, "slap"=>1, "happy"=>1, "mood"=>1, "feel-good"=>2, "picture"=>1, "the"=>2, "best"=>1, "sense"=>1, "of"=>2, "term"=>1, "it"=>1, "is"=>1, "movie"=>1, "about"=>1, "which"=>1, "can"=>1, "actually"=>1, "feel"=>1, "good"=>2, "love"=>1, "and"=>1, "money"=>1, "both"=>1, "them"=>1, "choises"=>1}, @token_count=37>, "negative"=>#<OmniCat::Classifiers::BayesInternals::Category:0x007fd20308cf00 @doc_count=4, @tokens={"bad"=>1, "tracking"=>1, "issue"=>1, "simplistic"=>1, "silly"=>1, "and"=>1, "tedious"=>1, "interesting"=>1, "but"=>2, "not"=>2, "compelling"=>2, "seems"=>1, "clever"=>1, "especially"=>1}, @token_count=17>}, @category_count=2, @doc_count=8, @k_value=1.0, @token_count=54, @uniq_token_count=43>
|
106
|
+
another_bayes_obj.classify('best senses')
|
107
|
+
=> #<OmniCat::Result:0x007fd203075008 @category={:name=>"positive", :percentage=>57}, @scores={"positive"=>0.0002314814814814815, "negative"=>0.00017146776406035664}, @total_score=0.00040294924554183816>
|
108
|
+
|
109
|
+
## Todo
|
110
|
+
* Implement all OmniCat(http://github.com/mustafaturan/omnicat) classifier strategy abstract methods
|
111
|
+
|
112
|
+
## Contributing
|
113
|
+
|
114
|
+
1. Fork it
|
115
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
116
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
117
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
118
|
+
5. Create new Pull Request
|
119
|
+
|
120
|
+
## Copyright
|
121
|
+
Copyright © 2013 Mustafa Turan. See LICENSE for details.
|
122
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
desc "Default Task"
|
6
|
+
task :default => [ :test ]
|
7
|
+
|
8
|
+
# Run the unit tests
|
9
|
+
desc "Run all unit tests"
|
10
|
+
Rake::TestTask.new do |t|
|
11
|
+
t.libs << 'lib'
|
12
|
+
t.test_files = FileList['lib/test/unit/*_test.rb']
|
13
|
+
t.verbose = true
|
14
|
+
end
|
15
|
+
|
16
|
+
# Make a console for testing purposes
|
17
|
+
desc "Generate a test console"
|
18
|
+
task :console do
|
19
|
+
verbose( false ) { sh "irb -I lib/ -r 'omnicat/bayes'" }
|
20
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'omnicat/classifiers/strategy'
|
2
|
+
|
3
|
+
module OmniCat
|
4
|
+
module Classifiers
|
5
|
+
class Bayes < ::OmniCat::Classifiers::Strategy
|
6
|
+
attr_accessor :k_value # Integer - Helper value for skipping some Bayes algorithm errors
|
7
|
+
|
8
|
+
def initialize(bayes_hash = {})
|
9
|
+
super(bayes_hash)
|
10
|
+
if bayes_hash.has_key?(:categories)
|
11
|
+
bayes_hash[:categories].each do |name, category|
|
12
|
+
@categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
@k_value = bayes_hash[:k_value] || 1.0
|
16
|
+
end
|
17
|
+
|
18
|
+
# Allows adding new classification category
|
19
|
+
#
|
20
|
+
# ==== Parameters
|
21
|
+
#
|
22
|
+
# * +category_name+ - Name for category
|
23
|
+
#
|
24
|
+
# ==== Examples
|
25
|
+
#
|
26
|
+
# # Create a classification category
|
27
|
+
# bayes = Bayes.new
|
28
|
+
# bayes.add_category("positive")
|
29
|
+
def add_category(category_name)
|
30
|
+
if category_exists?(category_name)
|
31
|
+
raise StandardError,
|
32
|
+
"Category with name '#{category_name}' is already exists!"
|
33
|
+
else
|
34
|
+
increment_category_count
|
35
|
+
@categories[category_name] = ::OmniCat::Classifiers::BayesInternals::Category.new
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Train the desired category with a document
|
40
|
+
#
|
41
|
+
# ==== Parameters
|
42
|
+
#
|
43
|
+
# * +category_name+ - Name of the category from added categories list
|
44
|
+
# * +doc_content+ - Document text
|
45
|
+
#
|
46
|
+
# ==== Examples
|
47
|
+
#
|
48
|
+
# # Train the desired category
|
49
|
+
# bayes.train("positive", "clear documentation")
|
50
|
+
# bayes.train("positive", "good, very well")
|
51
|
+
# bayes.train("negative", "bad dog")
|
52
|
+
# bayes.train("neutral", "how is the management gui")
|
53
|
+
def train(category_name, doc_content)
|
54
|
+
if category_exists?(category_name)
|
55
|
+
increment_doc_counts(category_name)
|
56
|
+
update_priors
|
57
|
+
doc = OmniCat::Doc.new(content: doc_content)
|
58
|
+
doc.tokens.each do |token, count|
|
59
|
+
increment_token_counts(category_name, token, count)
|
60
|
+
@categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count
|
61
|
+
end
|
62
|
+
else
|
63
|
+
raise StandardError,
|
64
|
+
"Category with name '#{category_name}' does not exist!"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Classify the given document
|
69
|
+
#
|
70
|
+
# ==== Parameters
|
71
|
+
#
|
72
|
+
# * +doc_content+ - The document for classification
|
73
|
+
#
|
74
|
+
# ==== Returns
|
75
|
+
#
|
76
|
+
# * +result+ - OmniCat::Result object
|
77
|
+
#
|
78
|
+
# ==== Examples
|
79
|
+
#
|
80
|
+
# # Classify a document
|
81
|
+
# bayes.classify("good documentation")
|
82
|
+
# =>
|
83
|
+
def classify(doc_content)
|
84
|
+
return unless classifiable?
|
85
|
+
score = -1000000
|
86
|
+
result = ::OmniCat::Result.new
|
87
|
+
@categories.each do |category_name, category|
|
88
|
+
result.scores[category_name] = doc_probability(category, doc_content)
|
89
|
+
if result.scores[category_name] > score
|
90
|
+
result.category[:name] = category_name
|
91
|
+
score = result.scores[category_name]
|
92
|
+
end
|
93
|
+
result.total_score += result.scores[category_name]
|
94
|
+
end
|
95
|
+
result.total_score = 1 if result.total_score == 0
|
96
|
+
result.category[:percentage] = (
|
97
|
+
result.scores[result.category[:name]] * 100.0 /
|
98
|
+
result.total_score
|
99
|
+
).floor
|
100
|
+
result
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
# nodoc
|
105
|
+
def update_priors
|
106
|
+
@categories.each do |_, category|
|
107
|
+
category.prior = category.doc_count / doc_count.to_f
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# nodoc
|
112
|
+
def increment_token_counts(category_name, token, count)
|
113
|
+
increment_uniq_token_count(token)
|
114
|
+
@token_count += count
|
115
|
+
@categories[category_name].token_count += count
|
116
|
+
end
|
117
|
+
|
118
|
+
# nodoc
|
119
|
+
def increment_uniq_token_count(token)
|
120
|
+
uniq_token_addition = 1
|
121
|
+
categories.each do |_, category|
|
122
|
+
if category.tokens.has_key?(token)
|
123
|
+
uniq_token_addition = 0
|
124
|
+
break
|
125
|
+
end
|
126
|
+
end
|
127
|
+
@uniq_token_count += uniq_token_addition
|
128
|
+
end
|
129
|
+
|
130
|
+
# nodoc
|
131
|
+
def doc_probability(category, doc_content)
|
132
|
+
score = k_value
|
133
|
+
doc = OmniCat::Doc.new(content: doc_content)
|
134
|
+
doc.tokens.each do |token, count|
|
135
|
+
score *= token_probability(category, token, count)
|
136
|
+
end
|
137
|
+
category.prior * score
|
138
|
+
end
|
139
|
+
|
140
|
+
# nodoc
|
141
|
+
def token_probability(category, token, count)
|
142
|
+
if category.tokens[token].to_i == 0
|
143
|
+
k_value / token_count
|
144
|
+
else
|
145
|
+
count * (
|
146
|
+
(category.tokens[token].to_i + k_value) /
|
147
|
+
(category.token_count + uniq_token_count)
|
148
|
+
)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module OmniCat
|
2
|
+
module Classifiers
|
3
|
+
module BayesInternals
|
4
|
+
class Category < ::OmniCat::Classifiers::StrategyInternals::Category
|
5
|
+
attr_accessor :prior
|
6
|
+
|
7
|
+
def initialize(category_hash = {})
|
8
|
+
super(category_hash)
|
9
|
+
@prior = category_hash[:prior].to_f
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/classifiers/bayes_test'
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'test_helper'))
|
3
|
+
|
4
|
+
class TestBayes < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
OmniCat.configure do |config|
|
7
|
+
config.exclude_tokens = ['are', 'at', 'by']
|
8
|
+
config.token_patterns = {
|
9
|
+
minus: [/[\s\t\n\r]+/, /(@[\w\d]+)/],
|
10
|
+
plus: [/[\p{L}\-0-9]{2,}/, /[\!\?]/, /[\:\)\(\;\-\|]{2,3}/]
|
11
|
+
}
|
12
|
+
end
|
13
|
+
@bayes = OmniCat::Classifiers::Bayes.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_add_category
|
17
|
+
@bayes.add_category 'neutral'
|
18
|
+
assert_not_nil(@bayes.categories['neutral'])
|
19
|
+
assert_equal(
|
20
|
+
['neutral'],
|
21
|
+
@bayes.categories.keys
|
22
|
+
)
|
23
|
+
assert_equal(
|
24
|
+
0,
|
25
|
+
@bayes.categories['neutral'].doc_count
|
26
|
+
)
|
27
|
+
assert_equal(
|
28
|
+
{},
|
29
|
+
@bayes.categories['neutral'].tokens
|
30
|
+
)
|
31
|
+
assert_equal(
|
32
|
+
0,
|
33
|
+
@bayes.categories['neutral'].token_count
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_add_category_that_already_exists
|
38
|
+
@bayes.add_category 'neutral'
|
39
|
+
assert_raise(StandardError) { @bayes.add_category 'neutral' }
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_add_categories
|
43
|
+
@bayes.add_categories ['neutral', 'positive', 'negative']
|
44
|
+
assert_not_nil(@bayes.categories['neutral'])
|
45
|
+
assert_equal(
|
46
|
+
['neutral', 'positive', 'negative'],
|
47
|
+
@bayes.categories.keys
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_train_valid_category
|
52
|
+
@bayes.add_category 'neutral'
|
53
|
+
@bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
|
54
|
+
assert_equal(
|
55
|
+
1,
|
56
|
+
@bayes.categories['neutral'].doc_count
|
57
|
+
)
|
58
|
+
assert_equal(
|
59
|
+
{'how' => 1, 'you' => 1, '?' => 2, ':|' => 1, ':)' => 1, ';-)' => 1, ':(' => 1},
|
60
|
+
@bayes.categories['neutral'].tokens
|
61
|
+
)
|
62
|
+
assert_equal(
|
63
|
+
8,
|
64
|
+
@bayes.categories['neutral'].token_count
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_train_batch
|
69
|
+
@bayes.add_category 'positive'
|
70
|
+
@bayes.train_batch 'positive', ['good job ever', 'valid syntax',
|
71
|
+
'best moments of my life']
|
72
|
+
assert_equal(
|
73
|
+
3,
|
74
|
+
@bayes.categories['positive'].doc_count
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_train_missing_category
|
79
|
+
assert_raise(StandardError) { @bayes.train 'neutral', 'how are you?' }
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_classifiability_error
|
83
|
+
@bayes.add_category 'positive'
|
84
|
+
@bayes.add_category 'negative'
|
85
|
+
assert_raise(StandardError) { @bayes.classify 'good job' }
|
86
|
+
@bayes.train('positive', 'good job')
|
87
|
+
assert_raise(StandardError) { @bayes.classify 'good job' }
|
88
|
+
end
|
89
|
+
|
90
|
+
def test_classify
|
91
|
+
@bayes.add_category 'positive'
|
92
|
+
@bayes.add_category 'negative'
|
93
|
+
@bayes.train('positive', 'good job')
|
94
|
+
@bayes.train('negative', 'bad work')
|
95
|
+
assert_equal(
|
96
|
+
'positive',
|
97
|
+
@bayes.classify('very good position for this sentence').category[:name]
|
98
|
+
)
|
99
|
+
assert_equal(
|
100
|
+
'negative',
|
101
|
+
@bayes.classify('bad words').category[:name]
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_classify_batch
|
106
|
+
@bayes.add_category 'positive'
|
107
|
+
@bayes.add_category 'negative'
|
108
|
+
@bayes.train_batch 'positive', ['good job ever', 'valid syntax',
|
109
|
+
'best moments of my life']
|
110
|
+
@bayes.train_batch('negative', ['bad work', 'awfull day', 'never liked it'])
|
111
|
+
results = @bayes.classify_batch(
|
112
|
+
['good sytanx research', 'bad words']
|
113
|
+
)
|
114
|
+
|
115
|
+
assert_equal(2, results.count)
|
116
|
+
|
117
|
+
assert_equal(
|
118
|
+
'positive',
|
119
|
+
results[0].category[:name]
|
120
|
+
)
|
121
|
+
assert_equal(
|
122
|
+
'negative',
|
123
|
+
results[1].category[:name]
|
124
|
+
)
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_initialize_with_hash
|
128
|
+
bayes1 = ::OmniCat::Classifiers::Bayes.new
|
129
|
+
bayes1.add_category 'positive'
|
130
|
+
bayes1.add_category 'negative'
|
131
|
+
bayes1.train('positive', 'good job')
|
132
|
+
bayes1.train('negative', 'bad work')
|
133
|
+
h1 = bayes1.to_hash
|
134
|
+
|
135
|
+
bayes2 = ::OmniCat::Classifiers::Bayes.new(h1)
|
136
|
+
assert_equal(h1, bayes2.to_hash)
|
137
|
+
end
|
138
|
+
|
139
|
+
def test_classify_with_insufficient_categories
|
140
|
+
assert_raise(StandardError) { @bayes.classify 'blank' }
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'omnicat/bayes/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'omnicat-bayes'
|
8
|
+
spec.version = Omnicat::Bayes::VERSION
|
9
|
+
spec.authors = ['Mustafa Turan']
|
10
|
+
spec.email = ['mustafaturan.net@gmail.com']
|
11
|
+
spec.description = %q{Naive Bayes classifier strategy for OmniCat}
|
12
|
+
spec.summary = %q{Naive Bayes text classification implementation as an OmniCat classifier strategy.}
|
13
|
+
spec.homepage = 'https://github.com/mustafaturan/omnicat-bayes'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_dependency 'omnicat', '~> 0.2.0'
|
22
|
+
spec.add_development_dependency 'bundler', '~> 1.3'
|
23
|
+
spec.add_development_dependency 'rake'
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: omnicat-bayes
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Mustafa Turan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-07-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: omnicat
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.2.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.2.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bundler
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '1.3'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '1.3'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: Naive Bayes classifier strategy for OmniCat
|
63
|
+
email:
|
64
|
+
- mustafaturan.net@gmail.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- .gitignore
|
70
|
+
- Gemfile
|
71
|
+
- LICENSE.txt
|
72
|
+
- README.md
|
73
|
+
- Rakefile
|
74
|
+
- lib/omnicat/bayes.rb
|
75
|
+
- lib/omnicat/bayes/version.rb
|
76
|
+
- lib/omnicat/classifiers/bayes.rb
|
77
|
+
- lib/omnicat/classifiers/bayes_internals/category.rb
|
78
|
+
- lib/test/test_helper.rb
|
79
|
+
- lib/test/unit/bayes_test.rb
|
80
|
+
- lib/test/unit/classifiers/bayes_test.rb
|
81
|
+
- omnicat-bayes.gemspec
|
82
|
+
homepage: https://github.com/mustafaturan/omnicat-bayes
|
83
|
+
licenses:
|
84
|
+
- MIT
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
none: false
|
91
|
+
requirements:
|
92
|
+
- - ! '>='
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
none: false
|
97
|
+
requirements:
|
98
|
+
- - ! '>='
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
requirements: []
|
102
|
+
rubyforge_project:
|
103
|
+
rubygems_version: 1.8.23
|
104
|
+
signing_key:
|
105
|
+
specification_version: 3
|
106
|
+
summary: Naive Bayes text classification implementation as an OmniCat classifier strategy.
|
107
|
+
test_files: []
|