stuff-classifier-zh 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +20 -0
- data/README.md +162 -0
- data/Rakefile +12 -0
- data/lib/stuff-classifier.rb +16 -0
- data/lib/stuff-classifier/base.rb +190 -0
- data/lib/stuff-classifier/bayes.rb +81 -0
- data/lib/stuff-classifier/storage.rb +122 -0
- data/lib/stuff-classifier/tf-idf.rb +44 -0
- data/lib/stuff-classifier/tokenizer.rb +94 -0
- data/lib/stuff-classifier/tokenizer/tokenizer_properties.rb +107 -0
- data/lib/stuff-classifier/version.rb +3 -0
- data/stuff-classifier.gemspec +36 -0
- data/test/helper.rb +49 -0
- data/test/test_001_tokenizer.rb +62 -0
- data/test/test_002_base.rb +38 -0
- data/test/test_003_naive_bayes.rb +56 -0
- data/test/test_004_tf_idf.rb +37 -0
- data/test/test_005_in_memory_storage.rb +31 -0
- data/test/test_006_file_storage.rb +77 -0
- data/test/test_007_redis_storage.rb +81 -0
- metadata +228 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'helper.rb'
|
3
|
+
|
4
|
+
class Test001Tokenizer < TestBase
|
5
|
+
before do
|
6
|
+
@en_tokenizer = StuffClassifier::Tokenizer.new
|
7
|
+
@fr_tokenizer = StuffClassifier::Tokenizer.new(:language => "fr")
|
8
|
+
@zh_tokenizer = StuffClassifier::Tokenizer.new(:language => 'zh', :stemming => false)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_simple_tokens
|
12
|
+
words = @en_tokenizer.each_word('Hello world! How are you?')
|
13
|
+
should_return = ["hello", "world"]
|
14
|
+
|
15
|
+
assert_equal should_return, words
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_with_stemming
|
19
|
+
words = @en_tokenizer.each_word('Lots of dogs, lots of cats! This really is the information highway')
|
20
|
+
should_return =["lot", "dog", "lot", "cat", "realli" ,"inform", "highway" ]
|
21
|
+
|
22
|
+
assert_equal should_return, words
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_complicated_tokens
|
27
|
+
words = @en_tokenizer.each_word("I don't really get what you want to
|
28
|
+
accomplish. There is a class TestEval2, you can do test_eval2 =
|
29
|
+
TestEval2.new afterwards. And: class A ... end always yields nil, so
|
30
|
+
your output is ok I guess ;-)")
|
31
|
+
|
32
|
+
should_return = [
|
33
|
+
"realli", "want", "accomplish", "class",
|
34
|
+
"testeval2", "test", "eval2","testeval2", "new", "class", "end",
|
35
|
+
"yield", "nil", "output", "ok", "guess"]
|
36
|
+
|
37
|
+
assert_equal should_return, words
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_unicode
|
41
|
+
|
42
|
+
words = @fr_tokenizer.each_word("il s'appelle le vilain petit canard : en référence à Hans Christian Andersen, se démarquer négativement")
|
43
|
+
|
44
|
+
should_return = [
|
45
|
+
"appel", "vilain", "pet", "canard", "référent",
|
46
|
+
"han", "christian", "andersen", "démarqu", "négat"]
|
47
|
+
|
48
|
+
assert_equal should_return, words
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_chinese
|
52
|
+
|
53
|
+
words = @zh_tokenizer.each_word("今天我得到二个最重要的人生启示")
|
54
|
+
|
55
|
+
should_return = [
|
56
|
+
"今天", "我", "得到", "二个", "最重要的", "人生", "启示"
|
57
|
+
]
|
58
|
+
|
59
|
+
assert_equal should_return, words
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
|
4
|
+
class Test002Base < TestBase
|
5
|
+
before do
|
6
|
+
@cls = StuffClassifier::Bayes.new("Cats or Dogs")
|
7
|
+
set_classifier @cls
|
8
|
+
|
9
|
+
train :dog, "Dogs are awesome, cats too. I love my dog"
|
10
|
+
train :cat, "Cats are more preferred by software developers. I never could stand cats. I have a dog"
|
11
|
+
train :dog, "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs"
|
12
|
+
train :cat, "Cats are difficult animals, unlike dogs, really annoying, I hate them all"
|
13
|
+
train :dog, "So which one should you choose? A dog, definitely."
|
14
|
+
train :cat, "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy"
|
15
|
+
train :dog, "A dog will eat anything, including birds or whatever meat"
|
16
|
+
train :cat, "My cat's favorite place to purr is on my keyboard"
|
17
|
+
train :dog, "My dog's favorite place to take a leak is the tree in front of our house"
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_count
|
21
|
+
assert @cls.total_cat_count == 9
|
22
|
+
assert @cls.categories.map {|c| @cls.cat_count(c)}.inject(0){|s,count| s+count} == 9
|
23
|
+
|
24
|
+
|
25
|
+
# compare word count sum to word by cat count sum
|
26
|
+
assert @cls.word_list.map {|w| @cls.total_word_count(w[0]) }.inject(0) {|s,count| s+count} == 58
|
27
|
+
assert @cls.categories.map {|c| @cls.total_word_count_in_cat(c) }.inject(0){|s,count| s+count} == 58
|
28
|
+
|
29
|
+
# test word count by categories
|
30
|
+
assert @cls.word_list.map {|w| @cls.word_count(w[0],:dog) }.inject(0) {|s,count| s+count} == 29
|
31
|
+
assert @cls.word_list.map {|w| @cls.word_count(w[0],:cat) }.inject(0) {|s,count| s+count} == 29
|
32
|
+
|
33
|
+
# for all categories
|
34
|
+
assert @cls.categories.map {|c| @cls.word_list.map {|w| @cls.word_count(w[0],c) }.inject(0) {|s,count| s+count} }.inject(0){|s,count| s+count} == 58
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
|
4
|
+
class Test003NaiveBayesClassification < TestBase
|
5
|
+
before do
|
6
|
+
set_classifier StuffClassifier::Bayes.new("Cats or Dogs")
|
7
|
+
|
8
|
+
train :dog, "Dogs are awesome, cats too. I love my dog"
|
9
|
+
train :cat, "Cats are more preferred by software developers. I never could stand cats. I have a dog"
|
10
|
+
train :dog, "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs"
|
11
|
+
train :cat, "Cats are difficult animals, unlike dogs, really annoying, I hate them all"
|
12
|
+
train :dog, "So which one should you choose? A dog, definitely."
|
13
|
+
train :cat, "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy"
|
14
|
+
train :dog, "A dog will eat anything, including birds or whatever meat"
|
15
|
+
train :cat, "My cat's favorite place to purr is on my keyboard"
|
16
|
+
train :dog, "My dog's favorite place to take a leak is the tree in front of our house"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_for_cats
|
20
|
+
should_be :cat, "This test is about cats."
|
21
|
+
should_be :cat, "I hate ..."
|
22
|
+
should_be :cat, "The most annoying animal on earth."
|
23
|
+
should_be :cat, "The preferred company of software developers."
|
24
|
+
should_be :cat, "My precious, my favorite!"
|
25
|
+
should_be :cat, "Kill that bird!"
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_for_dogs
|
29
|
+
should_be :dog, "This test is about dogs."
|
30
|
+
should_be :dog, "Cats or Dogs?"
|
31
|
+
should_be :dog, "What pet will I love more?"
|
32
|
+
should_be :dog, "Willy, where the heck are you?"
|
33
|
+
should_be :dog, "I like big buts and I cannot lie."
|
34
|
+
should_be :dog, "Why is the front door of our house open?"
|
35
|
+
should_be :dog, "Who ate my meat?"
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_min_prob
|
39
|
+
classifier.min_prob = 0.001
|
40
|
+
should_be :cat, "This test is about cats."
|
41
|
+
should_be :cat, "I hate ..."
|
42
|
+
should_be nil, "The most annoying animal on earth."
|
43
|
+
should_be nil, "The preferred company of software developers."
|
44
|
+
should_be :cat, "My precious, my favorite!"
|
45
|
+
should_be :cat, "Kill that bird!"
|
46
|
+
should_be :dog, "This test is about dogs."
|
47
|
+
should_be :dog, "Cats or Dogs?"
|
48
|
+
should_be :dog, "What pet will I love more?"
|
49
|
+
should_be :dog, "Willy, where the heck are you?"
|
50
|
+
should_be nil, "I like big buts and I cannot lie."
|
51
|
+
should_be nil, "Why is the front door of our house open?"
|
52
|
+
should_be :dog, "Who ate my meat?"
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
|
4
|
+
class Test004TfIdfClassification < TestBase
|
5
|
+
before do
|
6
|
+
set_classifier StuffClassifier::TfIdf.new("Cats or Dogs")
|
7
|
+
|
8
|
+
train :dog, "Dogs are awesome, cats too. I love my dog"
|
9
|
+
train :cat, "Cats are more preferred by software developers. I never could stand cats. I have a dog"
|
10
|
+
train :dog, "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs"
|
11
|
+
train :cat, "Cats are difficult animals, unlike dogs, really annoying, I hate them all"
|
12
|
+
train :dog, "So which one should you choose? A dog, definitely."
|
13
|
+
train :cat, "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy"
|
14
|
+
train :dog, "A dog will eat anything, including birds or whatever meat"
|
15
|
+
train :cat, "My cat's favorite place to purr is on my keyboard"
|
16
|
+
train :dog, "My dog's favorite place to take a leak is the tree in front of our house"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_for_cats
|
20
|
+
should_be :cat, "This test is about cats."
|
21
|
+
should_be :cat, "I hate ..."
|
22
|
+
should_be :cat, "The most annoying animal on earth."
|
23
|
+
should_be :cat, "The preferred company of software developers."
|
24
|
+
should_be :cat, "My precious, my favorite!"
|
25
|
+
should_be :cat, "Kill that bird!"
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_for_dogs
|
29
|
+
should_be :dog, "This test is about dogs."
|
30
|
+
should_be :dog, "Cats or Dogs?"
|
31
|
+
should_be :dog, "What pet will I love more?"
|
32
|
+
should_be :dog, "Willy, where the heck are you?"
|
33
|
+
should_be :dog, "I like big buts and I cannot lie."
|
34
|
+
should_be :dog, "Why is the front door of our house open?"
|
35
|
+
should_be :dog, "Who is eating my meat?"
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
|
4
|
+
class Test005InMemoryStorage < TestBase
|
5
|
+
before do
|
6
|
+
StuffClassifier::Base.storage = StuffClassifier::InMemoryStorage.new
|
7
|
+
|
8
|
+
StuffClassifier::Bayes.open("Cats or Dogs") do |cls|
|
9
|
+
cls.train(:dog, "Dogs are awesome, cats too. I love my dog")
|
10
|
+
cls.train(:cat, "Cats are more preferred by software developers. I never could stand cats. I have a dog")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_for_persistance
|
15
|
+
test = self
|
16
|
+
StuffClassifier::Bayes.new("Cats or Dogs").instance_eval do
|
17
|
+
test.assert @storage.instance_of?(StuffClassifier::InMemoryStorage),
|
18
|
+
"@storage should be an instance of FileStorage"
|
19
|
+
test.assert @word_list.length > 0, "Word count should be persisted"
|
20
|
+
test.assert @category_list.length > 0, "Category count should be persisted"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_purge_state
|
25
|
+
test = self
|
26
|
+
StuffClassifier::Bayes.new("Cats or Dogs", :purge_state => true).instance_eval do
|
27
|
+
test.assert @word_list.length == 0, "Word count should be purged"
|
28
|
+
test.assert @category_list.length == 0, "Category count should be purged"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
|
4
|
+
class Test006FileStorage < TestBase
|
5
|
+
before do
|
6
|
+
@storage_path = "/tmp/test_classifier.db"
|
7
|
+
@storage = StuffClassifier::FileStorage.new(@storage_path)
|
8
|
+
StuffClassifier::Base.storage = @storage
|
9
|
+
|
10
|
+
StuffClassifier::Bayes.open("Cats or Dogs") do |cls|
|
11
|
+
cls.train(:dog, "Dogs are awesome, cats too. I love my dog.")
|
12
|
+
cls.train(:dog, "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs")
|
13
|
+
cls.train(:dog, "So which one should you choose? A dog, definitely.")
|
14
|
+
cls.train(:dog, "A dog will eat anything, including birds or whatever meat")
|
15
|
+
cls.train(:dog, "My dog's favorite place to take a leak is the tree in front of our house")
|
16
|
+
|
17
|
+
cls.train(:cat, "My cat's favorite place to purr is on my keyboard")
|
18
|
+
cls.train(:cat, "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy")
|
19
|
+
cls.train(:cat, "Cats are difficult animals, unlike dogs, really annoying, I hate them all")
|
20
|
+
cls.train(:cat, "Cats are more preferred by software developers. I never could stand cats. I have a dog")
|
21
|
+
end
|
22
|
+
|
23
|
+
# redefining storage instance, forcing it to read from file again
|
24
|
+
StuffClassifier::Base.storage = StuffClassifier::FileStorage.new(@storage_path)
|
25
|
+
end
|
26
|
+
|
27
|
+
def teardown
|
28
|
+
File.unlink @storage_path if File.exists? @storage_path
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_result
|
32
|
+
set_classifier StuffClassifier::Bayes.new("Cats or Dogs")
|
33
|
+
|
34
|
+
should_be :cat, "This test is about cats."
|
35
|
+
should_be :cat, "I hate ..."
|
36
|
+
should_be :cat, "The most annoying animal on earth."
|
37
|
+
should_be :cat, "The preferred company of software developers."
|
38
|
+
should_be :cat, "My precious, my favorite!"
|
39
|
+
should_be :cat, "Kill that bird!"
|
40
|
+
|
41
|
+
should_be :dog, "This test is about dogs."
|
42
|
+
should_be :dog, "Cats or Dogs?"
|
43
|
+
should_be :dog, "What pet will I love more?"
|
44
|
+
should_be :dog, "Willy, where the heck are you?"
|
45
|
+
should_be :dog, "I like big buts and I cannot lie."
|
46
|
+
should_be :dog, "Why is the front door of our house open?"
|
47
|
+
should_be :dog, "Who ate my meat?"
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_for_persistance
|
52
|
+
assert ! @storage.equal?(StuffClassifier::Base.storage),"Storage instance should not be the same"
|
53
|
+
|
54
|
+
test = self
|
55
|
+
StuffClassifier::Bayes.new("Cats or Dogs").instance_eval do
|
56
|
+
test.assert @storage.instance_of?(StuffClassifier::FileStorage),"@storage should be an instance of FileStorage"
|
57
|
+
test.assert @word_list.length > 0, "Word count should be persisted"
|
58
|
+
test.assert @category_list.length > 0, "Category count should be persisted"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_file_created
|
63
|
+
assert File.exist?(@storage_path), "File #@storage_path should exist"
|
64
|
+
|
65
|
+
content = File.read(@storage_path)
|
66
|
+
assert content.length > 100, "Serialized content should have more than 100 chars"
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_purge_state
|
70
|
+
test = self
|
71
|
+
StuffClassifier::Bayes.new("Cats or Dogs", :purge_state => true).instance_eval do
|
72
|
+
test.assert @storage.instance_of?(StuffClassifier::FileStorage),"@storage should be an instance of FileStorage"
|
73
|
+
test.assert @word_list.length == 0, "Word count should be purged"
|
74
|
+
test.assert @category_list.length == 0, "Category count should be purged"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'redis'
|
3
|
+
|
4
|
+
|
5
|
+
class Test007RedisStorage < TestBase
|
6
|
+
before do
|
7
|
+
@key = "test_classifier"
|
8
|
+
@redis_options = { host: 'localhost', port: 6379 }
|
9
|
+
@redis = Redis.new(@redis_options)
|
10
|
+
|
11
|
+
@storage = StuffClassifier::RedisStorage.new(@key, @redis_options)
|
12
|
+
StuffClassifier::Base.storage = @storage
|
13
|
+
|
14
|
+
StuffClassifier::Bayes.open("Cats or Dogs") do |cls|
|
15
|
+
cls.train(:dog, "Dogs are awesome, cats too. I love my dog.")
|
16
|
+
cls.train(:dog, "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs")
|
17
|
+
cls.train(:dog, "So which one should you choose? A dog, definitely.")
|
18
|
+
cls.train(:dog, "A dog will eat anything, including birds or whatever meat")
|
19
|
+
cls.train(:dog, "My dog's favorite place to take a leak is the tree in front of our house")
|
20
|
+
|
21
|
+
cls.train(:cat, "My cat's favorite place to purr is on my keyboard")
|
22
|
+
cls.train(:cat, "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy")
|
23
|
+
cls.train(:cat, "Cats are difficult animals, unlike dogs, really annoying, I hate them all")
|
24
|
+
cls.train(:cat, "Cats are more preferred by software developers. I never could stand cats. I have a dog")
|
25
|
+
end
|
26
|
+
|
27
|
+
# redefining storage instance, forcing it to read from file again
|
28
|
+
StuffClassifier::Base.storage = StuffClassifier::RedisStorage.new(@key, @redis_options)
|
29
|
+
end
|
30
|
+
|
31
|
+
def teardown
|
32
|
+
@redis.del(@key)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_result
|
36
|
+
set_classifier StuffClassifier::Bayes.new("Cats or Dogs")
|
37
|
+
|
38
|
+
should_be :cat, "This test is about cats."
|
39
|
+
should_be :cat, "I hate ..."
|
40
|
+
should_be :cat, "The most annoying animal on earth."
|
41
|
+
should_be :cat, "The preferred company of software developers."
|
42
|
+
should_be :cat, "My precious, my favorite!"
|
43
|
+
should_be :cat, "Kill that bird!"
|
44
|
+
|
45
|
+
should_be :dog, "This test is about dogs."
|
46
|
+
should_be :dog, "Cats or Dogs?"
|
47
|
+
should_be :dog, "What pet will I love more?"
|
48
|
+
should_be :dog, "Willy, where the heck are you?"
|
49
|
+
should_be :dog, "I like big buts and I cannot lie."
|
50
|
+
should_be :dog, "Why is the front door of our house open?"
|
51
|
+
should_be :dog, "Who ate my meat?"
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_for_persistance
|
56
|
+
assert !@storage.equal?(StuffClassifier::Base.storage),"Storage instance should not be the same"
|
57
|
+
|
58
|
+
test = self
|
59
|
+
StuffClassifier::Bayes.new("Cats or Dogs").instance_eval do
|
60
|
+
test.assert @storage.instance_of?(StuffClassifier::RedisStorage),"@storage should be an instance of RedisStorage"
|
61
|
+
test.assert @word_list.length > 0, "Word count should be persisted"
|
62
|
+
test.assert @category_list.length > 0, "Category count should be persisted"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_key_created
|
67
|
+
assert @redis.exists(@key), "Redis key #{@key} should exist"
|
68
|
+
|
69
|
+
content = @redis.get(@key)
|
70
|
+
assert content.length > 100, "Serialized content should have more than 100 chars"
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_purge_state
|
74
|
+
test = self
|
75
|
+
StuffClassifier::Bayes.new("Cats or Dogs", :purge_state => true).instance_eval do
|
76
|
+
test.assert @storage.instance_of?(StuffClassifier::RedisStorage),"@storage should be an instance of RedisStorage"
|
77
|
+
test.assert @word_list.length == 0, "Word count should be purged"
|
78
|
+
test.assert @category_list.length == 0, "Category count should be purged"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
metadata
ADDED
@@ -0,0 +1,228 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: stuff-classifier-zh
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alexandru Nedelcu
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-12-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ruby-stemmer
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: sequel
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: redis
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rmmseg-cpp
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: bundler
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.9.2
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.9.2
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: minitest
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '4'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '4'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: turn
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.8.3
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.8.3
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: simplecov
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: awesome_print
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: byebug
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
description: 2 methods are provided for now - (1) naive bayes implementation + (2)
|
168
|
+
tf-idf weights
|
169
|
+
email:
|
170
|
+
- github@contact.bionicspirit.com
|
171
|
+
executables: []
|
172
|
+
extensions: []
|
173
|
+
extra_rdoc_files: []
|
174
|
+
files:
|
175
|
+
- ".gitignore"
|
176
|
+
- Gemfile
|
177
|
+
- LICENSE.txt
|
178
|
+
- README.md
|
179
|
+
- Rakefile
|
180
|
+
- lib/stuff-classifier.rb
|
181
|
+
- lib/stuff-classifier/base.rb
|
182
|
+
- lib/stuff-classifier/bayes.rb
|
183
|
+
- lib/stuff-classifier/storage.rb
|
184
|
+
- lib/stuff-classifier/tf-idf.rb
|
185
|
+
- lib/stuff-classifier/tokenizer.rb
|
186
|
+
- lib/stuff-classifier/tokenizer/tokenizer_properties.rb
|
187
|
+
- lib/stuff-classifier/version.rb
|
188
|
+
- stuff-classifier.gemspec
|
189
|
+
- test/helper.rb
|
190
|
+
- test/test_001_tokenizer.rb
|
191
|
+
- test/test_002_base.rb
|
192
|
+
- test/test_003_naive_bayes.rb
|
193
|
+
- test/test_004_tf_idf.rb
|
194
|
+
- test/test_005_in_memory_storage.rb
|
195
|
+
- test/test_006_file_storage.rb
|
196
|
+
- test/test_007_redis_storage.rb
|
197
|
+
homepage: https://github.com/alexandru/stuff-classifier/
|
198
|
+
licenses: []
|
199
|
+
metadata: {}
|
200
|
+
post_install_message:
|
201
|
+
rdoc_options: []
|
202
|
+
require_paths:
|
203
|
+
- lib
|
204
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - ">="
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: 1.9.1
|
209
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
210
|
+
requirements:
|
211
|
+
- - ">="
|
212
|
+
- !ruby/object:Gem::Version
|
213
|
+
version: '0'
|
214
|
+
requirements: []
|
215
|
+
rubyforge_project:
|
216
|
+
rubygems_version: 2.4.5.1
|
217
|
+
signing_key:
|
218
|
+
specification_version: 4
|
219
|
+
summary: Simple text classifier(s) implemetation
|
220
|
+
test_files:
|
221
|
+
- test/helper.rb
|
222
|
+
- test/test_001_tokenizer.rb
|
223
|
+
- test/test_002_base.rb
|
224
|
+
- test/test_003_naive_bayes.rb
|
225
|
+
- test/test_004_tf_idf.rb
|
226
|
+
- test/test_005_in_memory_storage.rb
|
227
|
+
- test/test_006_file_storage.rb
|
228
|
+
- test/test_007_redis_storage.rb
|