omnicat 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +6 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +99 -0
- data/Rakefile +1 -0
- data/lib/omnicat.rb +7 -0
- data/lib/omnicat/array.rb +9 -0
- data/lib/omnicat/base.rb +16 -0
- data/lib/omnicat/bayes.rb +3 -0
- data/lib/omnicat/classifiers/base.rb +55 -0
- data/lib/omnicat/classifiers/bayes.rb +127 -0
- data/lib/omnicat/classifiers/bayes_internals/category.rb +15 -0
- data/lib/omnicat/hash.rb +13 -0
- data/lib/omnicat/result.rb +11 -0
- data/lib/omnicat/string.rb +9 -0
- data/lib/omnicat/version.rb +3 -0
- data/lib/test/test_helper.rb +2 -0
- data/lib/test/unit/array_test.rb +10 -0
- data/lib/test/unit/base_test.rb +49 -0
- data/lib/test/unit/bayes_test.rb +85 -0
- data/lib/test/unit/hash_test.rb +10 -0
- data/lib/test/unit/string_test.rb +17 -0
- data/omnicat.gemspec +23 -0
- metadata +95 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1468554ebab3a7d69abfb6b806ed2acc474a0fbb
|
4
|
+
data.tar.gz: 7cafcd36ce8030ba19dc1f48f3be8cc016f49c1d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ce946b646239bfec96ce58bc296d9b866c72603d5113b56fb4c65418b14457fad6072026fe90330250e04dc4467476d874fdd29327253793b10fdf5a90526f37
|
7
|
+
data.tar.gz: 1281d217ed03836696b9025e368f274941b481ba42d94af5de045fb7c06a6ee231ae8c6bec4e0758fe6e438f707559322f955f60b4c26b7f0e3c1ad3993328b1
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Mustafa Turan
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# OmniCat
|
2
|
+
|
3
|
+
A generalized framework for text classifications. For now, it only supports Naive Bayes algorithm for text classification.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'omnicat'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install omnicat
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
See rdoc for detailed usage.
|
22
|
+
|
23
|
+
### Bayes classifier
|
24
|
+
Create a Bayes classifier object.
|
25
|
+
|
26
|
+
bayes = OmniCat::Classifiers::Bayes.new
|
27
|
+
|
28
|
+
### Create categories
|
29
|
+
Create a classification category.
|
30
|
+
|
31
|
+
bayes.add_category('positive')
|
32
|
+
bayes.add_category('negative')
|
33
|
+
|
34
|
+
### Train
|
35
|
+
Train category with a document.
|
36
|
+
|
37
|
+
bayes.train('positive', 'great if you are in a slap happy mood .')
|
38
|
+
bayes.train('negative', 'bad tracking issue')
|
39
|
+
|
40
|
+
### Train batch
|
41
|
+
Train category with multiple documents.
|
42
|
+
|
43
|
+
bayes.train_batch('positive', [
|
44
|
+
'a feel-good picture in the best sense of the term...',
|
45
|
+
'it is a feel-good movie about which you can actually feel good.',
|
46
|
+
'love and money both of them are good choises'
|
47
|
+
])
|
48
|
+
bayes.train_batch('negative', [
|
49
|
+
'simplistic , silly and tedious .',
|
50
|
+
'interesting , but not compelling . ',
|
51
|
+
'seems clever but not especially compelling'
|
52
|
+
])
|
53
|
+
|
54
|
+
### Classify
|
55
|
+
Classify a document.
|
56
|
+
|
57
|
+
result = bayes.classify('I feel so good and happy')
|
58
|
+
=> #<OmniCat::Result:0x007fe59b97b548 @category={:name=>"negative", :percentage=>99}, @scores={"positive"=>1.749909854122994e-07, "negative"=>0.014084507042253521}, @total_score=0.014084682033238934>
|
59
|
+
result.to_hash
|
60
|
+
=> {:category=>{:name=>"negative", :percentage=>99}, :scores=>{"positive"=>1.749909854122994e-07, "negative"=>0.014084507042253521}, :total_score=>0.014084682033238934}
|
61
|
+
|
62
|
+
### Classify batch
|
63
|
+
Classify multiple documents at a time.
|
64
|
+
|
65
|
+
results = bayes.classify_batch(
|
66
|
+
[
|
67
|
+
'the movie is silly so not compelling enough',
|
68
|
+
'a good piece of work'
|
69
|
+
]
|
70
|
+
)
|
71
|
+
=> [#<OmniCat::Result:0x007fe59b949d90 @category={:name=>"negative", :percentage=>75}, @scores={"positive"=>7.962089836259623e-06, "negative"=>2.5145916163515512e-05}, @total_score=3.3108005999775135e-05>, #<OmniCat::Result:0x007fe59c9d7d10 @category={:name=>"positive", :percentage=>100}, @scores={"positive"=>0.0005434126313247192, "negative"=>0}, @total_score=0.0005434126313247192>]
|
72
|
+
|
73
|
+
### Convert to hash
|
74
|
+
Convert full Bayes object to hash.
|
75
|
+
|
76
|
+
# For storing, restoring modal data
|
77
|
+
bayes_hash = bayes.to_hash
|
78
|
+
|
79
|
+
### Load from hash
|
80
|
+
Load full Bayes object from hash.
|
81
|
+
|
82
|
+
another_bayes_obj = OmniCat::Classifiers::Bayes.new(bayes_hash)
|
83
|
+
another_bayes_obj.classify('best senses')
|
84
|
+
|
85
|
+
## Todo
|
86
|
+
* Add more text classification modules such as Support Vector Machine (SVM).
|
87
|
+
* Add text cleaning/manipulating extensions such as stopwords cleaner, stemmer, and pos-tagger, etc...
|
88
|
+
|
89
|
+
## Contributing
|
90
|
+
|
91
|
+
1. Fork it
|
92
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
93
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
94
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
95
|
+
5. Create new Pull Request
|
96
|
+
|
97
|
+
## Copyright
|
98
|
+
Copyright © 2013 Mustafa Turan. See LICENSE for details.
|
99
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/lib/omnicat.rb
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/omnicat/version'
|
2
|
+
require File.dirname(__FILE__) + '/omnicat/string'
|
3
|
+
require File.dirname(__FILE__) + '/omnicat/array'
|
4
|
+
require File.dirname(__FILE__) + '/omnicat/hash'
|
5
|
+
require File.dirname(__FILE__) + '/omnicat/base'
|
6
|
+
require File.dirname(__FILE__) + '/omnicat/result'
|
7
|
+
require File.dirname(__FILE__) + '/omnicat/bayes'
|
data/lib/omnicat/base.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
module OmniCat
|
2
|
+
class Base
|
3
|
+
# Returns Base object as Hash
|
4
|
+
#
|
5
|
+
# @return Base object instance variables in a Hash
|
6
|
+
def to_hash
|
7
|
+
hash = {}
|
8
|
+
self.instance_variables.each do |key|
|
9
|
+
if val = instance_variable_get(key)
|
10
|
+
hash[key[1..-1].to_sym] = val.class.to_s.include?('OmniCat') ? val.to_hash : val
|
11
|
+
end
|
12
|
+
end
|
13
|
+
hash
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module OmniCat
|
2
|
+
module Classifiers
|
3
|
+
class Base < ::OmniCat::Base
|
4
|
+
# Allows adding multiple classification categories
|
5
|
+
#
|
6
|
+
# ==== Parameters
|
7
|
+
#
|
8
|
+
# * +names+ - Array of categories
|
9
|
+
#
|
10
|
+
# ==== Examples
|
11
|
+
#
|
12
|
+
# # Add multiple categories for classification
|
13
|
+
# bayes.add_categories(["positive", "negative", "neutral"])
|
14
|
+
def add_categories(names)
|
15
|
+
names.each { |name| add_category(name) }
|
16
|
+
end
|
17
|
+
|
18
|
+
# Train the desired category with multiple documents
|
19
|
+
#
|
20
|
+
# ==== Parameters
|
21
|
+
#
|
22
|
+
# * +category+ - Name of the category from added categories list
|
23
|
+
# * +docs+ - Array of documents
|
24
|
+
#
|
25
|
+
# ==== Examples
|
26
|
+
#
|
27
|
+
# # Add multiple docs for training the category
|
28
|
+
# bayes.train("positive", ["clear documentation", "good, very well"])
|
29
|
+
# bayes.train("negative", ["bad interface", "damn"])
|
30
|
+
def train_batch(category, docs)
|
31
|
+
docs.each { |doc| train(category, doc) }
|
32
|
+
end
|
33
|
+
|
34
|
+
# Classify the multiple documents at a time
|
35
|
+
#
|
36
|
+
# ==== Parameters
|
37
|
+
#
|
38
|
+
# * +docs+ - Array of documents
|
39
|
+
#
|
40
|
+
# ==== Returns
|
41
|
+
#
|
42
|
+
# * +result_set+ - Array of OmniCat::Result objects
|
43
|
+
#
|
44
|
+
# ==== Examples
|
45
|
+
#
|
46
|
+
# # Classify multiple documents
|
47
|
+
# bayes.classify_batch(["good documentation", "damn workin again"])
|
48
|
+
# =>
|
49
|
+
def classify_batch(docs)
|
50
|
+
docs.collect { |doc| classify(doc) }
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
module OmniCat
|
2
|
+
module Classifiers
|
3
|
+
class Bayes < ::OmniCat::Classifiers::Base
|
4
|
+
|
5
|
+
attr_accessor :categories, :category_count, :doc_count, :token_count
|
6
|
+
attr_accessor :k_value # helper val for skipping some Bayes theorem errors
|
7
|
+
|
8
|
+
def initialize(bayes_hash = {})
|
9
|
+
self.categories = ::OmniCat::Hash.new
|
10
|
+
if bayes_hash.has_key?(:categories)
|
11
|
+
bayes_hash[:categories].each do |name, category|
|
12
|
+
self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
self.category_count = bayes_hash[:category_count].to_i
|
16
|
+
self.doc_count = bayes_hash[:doc_count].to_i
|
17
|
+
self.k_value = bayes_hash[:k_value] || 1.0
|
18
|
+
self.token_count = bayes_hash[:token_count].to_i
|
19
|
+
end
|
20
|
+
|
21
|
+
# Allows adding new classification category
|
22
|
+
#
|
23
|
+
# ==== Parameters
|
24
|
+
#
|
25
|
+
# * +name+ - Name for category
|
26
|
+
#
|
27
|
+
# ==== Examples
|
28
|
+
#
|
29
|
+
# # Create a classification category
|
30
|
+
# bayes = Bayes.new
|
31
|
+
# bayes.add_category("positive")
|
32
|
+
def add_category(name)
|
33
|
+
if category_exists?(name)
|
34
|
+
raise StandardError,
|
35
|
+
"Category with name '#{name}' is already exists!"
|
36
|
+
else
|
37
|
+
self.category_count +=1
|
38
|
+
self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Train the desired category with a document
|
43
|
+
#
|
44
|
+
# ==== Parameters
|
45
|
+
#
|
46
|
+
# * +category+ - Name of the category from added categories list
|
47
|
+
# * +doc+ - Document text
|
48
|
+
#
|
49
|
+
# ==== Examples
|
50
|
+
#
|
51
|
+
# # Train the desired category
|
52
|
+
# bayes.train("positive", "clear documentation")
|
53
|
+
# bayes.train("positive", "good, very well")
|
54
|
+
# bayes.train("negative", "bad dog")
|
55
|
+
# bayes.train("neutral", "how is the management gui")
|
56
|
+
def train(category, doc)
|
57
|
+
if category_exists?(category)
|
58
|
+
self.doc_count += 1
|
59
|
+
categories[category].doc_count += 1
|
60
|
+
doc.tokenize_with_counts.each do |token, count|
|
61
|
+
self.token_count += count
|
62
|
+
self.categories[category].tokens[token] = self.categories[category].tokens[token].to_i + count
|
63
|
+
self.categories[category].token_count += count
|
64
|
+
end
|
65
|
+
else
|
66
|
+
raise StandardError,
|
67
|
+
"Category with name '#{category}' does not exist!"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Classify the given document
|
72
|
+
#
|
73
|
+
# ==== Parameters
|
74
|
+
#
|
75
|
+
# * +doc+ - The document for classification
|
76
|
+
#
|
77
|
+
# ==== Returns
|
78
|
+
#
|
79
|
+
# * +result+ - OmniCat::Result object
|
80
|
+
#
|
81
|
+
# ==== Examples
|
82
|
+
#
|
83
|
+
# # Classify a document
|
84
|
+
# bayes.classify("good documentation")
|
85
|
+
# =>
|
86
|
+
def classify(doc)
|
87
|
+
if category_count < 2
|
88
|
+
return raise StandardError,
|
89
|
+
"At least 2 categories needed for classification process!"
|
90
|
+
end
|
91
|
+
score = -1000000
|
92
|
+
result = ::OmniCat::Result.new
|
93
|
+
categories.each do |name, category|
|
94
|
+
prior = category.doc_count / doc_count.to_f
|
95
|
+
result.scores[name] = k_value
|
96
|
+
doc.tokenize_with_counts.each do |token, count|
|
97
|
+
result.scores[name] *= (
|
98
|
+
(category.tokens[token].to_i + k_value) /
|
99
|
+
(category.token_count + token_count)
|
100
|
+
) if category.tokens.has_key?(token)
|
101
|
+
end
|
102
|
+
result.scores[name] = (
|
103
|
+
result.scores[name].to_f == 1.0 ? 0 : (prior * result.scores[name])
|
104
|
+
)
|
105
|
+
if result.scores[name] > score
|
106
|
+
result.category[:name] = name;
|
107
|
+
score = result.scores[name];
|
108
|
+
end
|
109
|
+
result.total_score += result.scores[name]
|
110
|
+
end
|
111
|
+
result.total_score = 1 if result.total_score == 0
|
112
|
+
result.category[:percentage] = (
|
113
|
+
result.scores[result.category[:name]] * 100.0 /
|
114
|
+
result.total_score
|
115
|
+
).floor
|
116
|
+
result
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
# nodoc
|
121
|
+
def category_exists?(category_name)
|
122
|
+
categories.has_key?(category_name)
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module OmniCat
|
2
|
+
module Classifiers
|
3
|
+
module BayesInternals
|
4
|
+
class Category < ::OmniCat::Base
|
5
|
+
attr_accessor :doc_count, :tokens, :token_count
|
6
|
+
|
7
|
+
def initialize(category_hash = {})
|
8
|
+
self.doc_count = category_hash[:doc_count].to_i
|
9
|
+
self.tokens = category_hash[:tokens] || {}
|
10
|
+
self.token_count = category_hash[:token_count].to_i
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/omnicat/hash.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'test_helper'))
|
2
|
+
|
3
|
+
class TestArray < Test::Unit::TestCase
|
4
|
+
def test_hashify_with_counts
|
5
|
+
assert_equal(
|
6
|
+
{"omnicat" => 2, "written" => 1, "at" => 1, "2011" => 1},
|
7
|
+
["omnicat", "written", "at", "2011", "omnicat"].hashify_with_counts
|
8
|
+
)
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'test_helper'))
|
2
|
+
|
3
|
+
class TestBase < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@bayes = OmniCat::Classifiers::Bayes.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_add_categories
|
9
|
+
@bayes.add_categories ["neutral", "positive", "negative"]
|
10
|
+
assert_not_nil(@bayes.categories["neutral"])
|
11
|
+
assert_equal(
|
12
|
+
["neutral", "positive", "negative"],
|
13
|
+
@bayes.categories.keys
|
14
|
+
)
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_train_batch
|
18
|
+
@bayes.add_category "positive"
|
19
|
+
@bayes.train_batch "positive", ["good job ever", "valid syntax",
|
20
|
+
"best moments of my life"]
|
21
|
+
assert_equal(
|
22
|
+
3,
|
23
|
+
@bayes.categories["positive"].doc_count
|
24
|
+
)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_classify_batch
|
28
|
+
@bayes.add_category "positive"
|
29
|
+
@bayes.add_category "negative"
|
30
|
+
@bayes.train_batch "positive", ["good job ever", "valid syntax",
|
31
|
+
"best moments of my life"]
|
32
|
+
@bayes.train_batch("negative", ["bad work", "awfull day", "never liked it"])
|
33
|
+
results = @bayes.classify_batch(
|
34
|
+
["good sytanx research", "bad words"]
|
35
|
+
)
|
36
|
+
|
37
|
+
assert_equal(2, results.count)
|
38
|
+
|
39
|
+
assert_equal(
|
40
|
+
"positive",
|
41
|
+
results[0].category[:name]
|
42
|
+
)
|
43
|
+
assert_equal(
|
44
|
+
"negative",
|
45
|
+
results[1].category[:name]
|
46
|
+
)
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'test_helper'))
|
2
|
+
|
3
|
+
class TestBayes < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@bayes = OmniCat::Classifiers::Bayes.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_add_category
|
9
|
+
@bayes.add_category "neutral"
|
10
|
+
assert_not_nil(@bayes.categories["neutral"])
|
11
|
+
assert_equal(
|
12
|
+
["neutral"],
|
13
|
+
@bayes.categories.keys
|
14
|
+
)
|
15
|
+
assert_equal(
|
16
|
+
0,
|
17
|
+
@bayes.categories["neutral"].doc_count
|
18
|
+
)
|
19
|
+
assert_equal(
|
20
|
+
{},
|
21
|
+
@bayes.categories["neutral"].tokens
|
22
|
+
)
|
23
|
+
assert_equal(
|
24
|
+
0,
|
25
|
+
@bayes.categories["neutral"].token_count
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_add_category_that_already_exists
|
30
|
+
@bayes.add_category "neutral"
|
31
|
+
assert_raise(StandardError) { @bayes.add_category "neutral" }
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_train_valid_category
|
35
|
+
@bayes.add_category "neutral"
|
36
|
+
@bayes.train "neutral", "how are you?"
|
37
|
+
assert_equal(
|
38
|
+
1,
|
39
|
+
@bayes.categories["neutral"].doc_count
|
40
|
+
)
|
41
|
+
assert_equal(
|
42
|
+
{"how" => 1, "are" => 1, "you" => 1},
|
43
|
+
@bayes.categories["neutral"].tokens
|
44
|
+
)
|
45
|
+
assert_equal(
|
46
|
+
3,
|
47
|
+
@bayes.categories["neutral"].token_count
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_train_missing_category
|
52
|
+
assert_raise(StandardError) { @bayes.train "neutral", "how are you?" }
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_classify
|
56
|
+
@bayes.add_category "positive"
|
57
|
+
@bayes.add_category "negative"
|
58
|
+
@bayes.train("positive", "good job")
|
59
|
+
@bayes.train("negative", "bad work")
|
60
|
+
assert_equal(
|
61
|
+
"positive",
|
62
|
+
@bayes.classify("very good position for this sentence").category[:name]
|
63
|
+
)
|
64
|
+
assert_equal(
|
65
|
+
"negative",
|
66
|
+
@bayes.classify("bad words").category[:name]
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_initialize_with_hash
|
71
|
+
bayes1 = ::OmniCat::Classifiers::Bayes.new
|
72
|
+
bayes1.add_category "positive"
|
73
|
+
bayes1.add_category "negative"
|
74
|
+
bayes1.train("positive", "good job")
|
75
|
+
bayes1.train("negative", "bad work")
|
76
|
+
h1 = bayes1.to_hash
|
77
|
+
|
78
|
+
bayes2 = ::OmniCat::Classifiers::Bayes.new(h1)
|
79
|
+
assert_equal(h1, bayes2.to_hash)
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_classify_with_insufficient_categories
|
83
|
+
assert_raise(StandardError) { @bayes.classify "blank" }
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'test_helper'))
|
2
|
+
|
3
|
+
class TestHash < Test::Unit::TestCase
|
4
|
+
def test_to_hash
|
5
|
+
categories_hash = { "pos" => { doc_count: 0, tokens: {}, token_count: 0 } }
|
6
|
+
categories = OmniCat::Hash.new
|
7
|
+
categories["pos"] = OmniCat::Classifiers::BayesInternals::Category.new(categories_hash["pos"])
|
8
|
+
assert_equal(categories_hash, categories.to_hash)
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'test_helper'))
|
2
|
+
|
3
|
+
class TestString < Test::Unit::TestCase
|
4
|
+
def test_omnicat_tokenize
|
5
|
+
assert_equal(
|
6
|
+
["mustafa", "turan", "omni-cat-v0", "1986"],
|
7
|
+
"mustafa turan omni-cat-v0 1986 1 a s d".omnicat_tokenize
|
8
|
+
)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_tokenize_with_counts
|
12
|
+
assert_equal(
|
13
|
+
{"omnicat" => 2, "written" => 1, "at" => 1, "2011" => 1},
|
14
|
+
"omnicat written at 2011, omnicat".tokenize_with_counts
|
15
|
+
)
|
16
|
+
end
|
17
|
+
end
|
data/omnicat.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'omnicat/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "omnicat"
|
8
|
+
spec.version = OmniCat::VERSION
|
9
|
+
spec.authors = ["Mustafa Turan"]
|
10
|
+
spec.email = ["mustafaturan.net@gmail.com"]
|
11
|
+
spec.description = %q{A generalized framework for text classifications.}
|
12
|
+
spec.summary = spec.description
|
13
|
+
spec.homepage = "https://github.com/mustafaturan/omnicat"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: omnicat
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Mustafa Turan
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-06-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: A generalized framework for text classifications.
|
42
|
+
email:
|
43
|
+
- mustafaturan.net@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .travis.yml
|
49
|
+
- Gemfile
|
50
|
+
- LICENSE.txt
|
51
|
+
- README.md
|
52
|
+
- Rakefile
|
53
|
+
- lib/omnicat.rb
|
54
|
+
- lib/omnicat/array.rb
|
55
|
+
- lib/omnicat/base.rb
|
56
|
+
- lib/omnicat/bayes.rb
|
57
|
+
- lib/omnicat/classifiers/base.rb
|
58
|
+
- lib/omnicat/classifiers/bayes.rb
|
59
|
+
- lib/omnicat/classifiers/bayes_internals/category.rb
|
60
|
+
- lib/omnicat/hash.rb
|
61
|
+
- lib/omnicat/result.rb
|
62
|
+
- lib/omnicat/string.rb
|
63
|
+
- lib/omnicat/version.rb
|
64
|
+
- lib/test/test_helper.rb
|
65
|
+
- lib/test/unit/array_test.rb
|
66
|
+
- lib/test/unit/base_test.rb
|
67
|
+
- lib/test/unit/bayes_test.rb
|
68
|
+
- lib/test/unit/hash_test.rb
|
69
|
+
- lib/test/unit/string_test.rb
|
70
|
+
- omnicat.gemspec
|
71
|
+
homepage: https://github.com/mustafaturan/omnicat
|
72
|
+
licenses:
|
73
|
+
- MIT
|
74
|
+
metadata: {}
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options: []
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - '>='
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
requirements: []
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 2.0.3
|
92
|
+
signing_key:
|
93
|
+
specification_version: 4
|
94
|
+
summary: A generalized framework for text classifications.
|
95
|
+
test_files: []
|