stuff-classifier 0.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +49 -4
- data/Rakefile +9 -9
- data/lib/stuff-classifier.rb +4 -0
- data/lib/stuff-classifier/base.rb +34 -1
- data/lib/stuff-classifier/bayes.rb +2 -0
- data/lib/stuff-classifier/stop_words.rb +2 -0
- data/lib/stuff-classifier/storage.rb +71 -0
- data/lib/stuff-classifier/tokenizer.rb +2 -6
- data/lib/stuff-classifier/version.rb +1 -1
- data/stuff-classifier.gemspec +4 -0
- data/test/test_004_in_memory_storage.rb +31 -0
- data/test/test_005_file_storage.rb +54 -0
- metadata +61 -17
- data/.gitignore +0 -48
- data/Gemfile.lock +0 -46
- data/stuff-classifier-0.1.gem +0 -0
data/README.md
CHANGED
@@ -43,7 +43,16 @@ cls = StuffClassifier::Bayes.new("Cats or Dogs")
|
|
43
43
|
|
44
44
|
# for the Tf-Idf based implementation
|
45
45
|
cls = StuffClassifier::TfIdf.new("Cats or Dogs")
|
46
|
-
|
46
|
+
|
47
|
+
# these classifiers use word stemming by default, but if it has weird
|
48
|
+
# behavior, then you can disable it on init:
|
49
|
+
cls = StuffClassifier::TfIdf.new("Cats or Dogs", :stemming => false)
|
50
|
+
|
51
|
+
# also by default, the parsing phase filters out stop words, to
|
52
|
+
# disable or to come up with your own list of stop words, on a
|
53
|
+
# classifier instance you can do this:
|
54
|
+
cls.ignore_words = [ 'the', 'my', 'i', 'dont' ]
|
55
|
+
```
|
47
56
|
|
48
57
|
Training the classifier:
|
49
58
|
|
@@ -93,10 +102,46 @@ cls.classify("Who is eating my meat?")
|
|
93
102
|
#=> :dog
|
94
103
|
```
|
95
104
|
|
96
|
-
##
|
105
|
+
## Persistency
|
106
|
+
|
107
|
+
2 persistency layers for saving the training data are implemented:
|
108
|
+
|
109
|
+
- in memory (by default)
|
110
|
+
- on disk
|
111
|
+
|
112
|
+
To persist the data on disk, you can do this:
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
store = StuffClassifier::FileStorage.new(@storage_path)
|
116
|
+
|
117
|
+
# global setting
|
118
|
+
StuffClassifier::Base.storage = store
|
97
119
|
|
98
|
-
|
99
|
-
|
120
|
+
# or alternative local setting on instantiation, by means of an
|
121
|
+
# optional param ...
|
122
|
+
cls = StuffClassifier::Bayes.new("Cats or Dogs", :storage => store)
|
123
|
+
|
124
|
+
# after training is done, to persist the data ...
|
125
|
+
cls.save_state
|
126
|
+
|
127
|
+
# or you could just do this:
|
128
|
+
StuffClassifier::Bayes.open("Cats or Dogs") do |cls|
|
129
|
+
# when done, save_state is called on END
|
130
|
+
end
|
131
|
+
|
132
|
+
# to start fresh, deleting the saved training data for this classifier
|
133
|
+
StuffClassifier::Bayes.new("Cats or Dogs", :purge_state => true)
|
134
|
+
```
|
135
|
+
|
136
|
+
The name you give your classifier is important, as based on it the
|
137
|
+
data will get loaded and saved. For instance, following 3 classifiers
|
138
|
+
will be stored in different buckets, being independent of each other.
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
cls1 = StuffClassifier::Bayes.new("Cats or Dogs")
|
142
|
+
cls2 = StuffClassifier::Bayes.new("True or False")
|
143
|
+
cls3 = StuffClassifier::Bayes.new("Spam or Ham")
|
144
|
+
```
|
100
145
|
|
101
146
|
## License
|
102
147
|
|
data/Rakefile
CHANGED
@@ -16,15 +16,15 @@ Rcov::RcovTask.new do |test|
|
|
16
16
|
test.rcov_opts << '--exclude "gems/*"'
|
17
17
|
end
|
18
18
|
|
19
|
-
require 'rdoc/task'
|
20
|
-
RDoc::Task.new do |rdoc|
|
21
|
-
version = StuffClassifier::VERSION
|
22
|
-
|
23
|
-
rdoc.rdoc_dir = 'rdoc'
|
24
|
-
rdoc.title = "stuff-classifier #{version}"
|
25
|
-
rdoc.rdoc_files.include('README*')
|
26
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
27
|
-
end
|
19
|
+
#require 'rdoc/task'
|
20
|
+
#RDoc::Task.new do |rdoc|
|
21
|
+
# version = StuffClassifier::VERSION
|
22
|
+
#
|
23
|
+
# rdoc.rdoc_dir = 'rdoc'
|
24
|
+
# rdoc.title = "stuff-classifier #{version}"
|
25
|
+
# rdoc.rdoc_files.include('README*')
|
26
|
+
# rdoc.rdoc_files.include('lib/**/*.rb')
|
27
|
+
#end
|
28
28
|
|
29
29
|
task :default => :test
|
30
30
|
|
data/lib/stuff-classifier.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
module StuffClassifier
|
2
2
|
autoload :VERSION, 'stuff-classifier/version'
|
3
3
|
autoload :STOP_WORDS, 'stuff-classifier/stop_words'
|
4
|
+
|
4
5
|
autoload :Tokenizer, 'stuff-classifier/tokenizer'
|
5
6
|
autoload :Base, 'stuff-classifier/base'
|
6
7
|
autoload :Bayes, 'stuff-classifier/bayes'
|
7
8
|
autoload :TfIdf, 'stuff-classifier/tf-idf'
|
9
|
+
|
10
|
+
autoload :InMemoryStorage, 'stuff-classifier/storage'
|
11
|
+
autoload :FileStorage, 'stuff-classifier/storage'
|
8
12
|
end
|
@@ -1,12 +1,22 @@
|
|
1
1
|
class StuffClassifier::Base
|
2
2
|
include StuffClassifier::Tokenizer
|
3
|
+
attr_reader :name
|
3
4
|
|
4
5
|
def initialize(name, opts={})
|
5
|
-
@name = name
|
6
6
|
@stemming = opts.key?(:stemming) ? opts[:stemming] : true
|
7
|
+
purge_state = opts[:purge_state]
|
8
|
+
|
9
|
+
@name = name
|
7
10
|
@wcount = {}
|
8
11
|
@ccount = {}
|
9
12
|
@ignore_words = nil
|
13
|
+
|
14
|
+
@storage = opts[:storage] || StuffClassifier::Base.storage
|
15
|
+
unless purge_state
|
16
|
+
@storage.load_state(self)
|
17
|
+
else
|
18
|
+
@storage.purge_state(self)
|
19
|
+
end
|
10
20
|
end
|
11
21
|
|
12
22
|
def incr_word(word, category)
|
@@ -63,4 +73,27 @@ class StuffClassifier::Base
|
|
63
73
|
# the final weighted average
|
64
74
|
(weight * assumed_prob + totals * basic_prob) / (weight + totals)
|
65
75
|
end
|
76
|
+
|
77
|
+
def save_state
|
78
|
+
@storage.save_state(self)
|
79
|
+
end
|
80
|
+
|
81
|
+
class << self
|
82
|
+
attr_writer :storage
|
83
|
+
|
84
|
+
def storage
|
85
|
+
@storage = StuffClassifier::InMemoryStorage.new unless defined? @storage
|
86
|
+
@storage
|
87
|
+
end
|
88
|
+
|
89
|
+
def open(name)
|
90
|
+
inst = self.new(name)
|
91
|
+
if block_given?
|
92
|
+
yield inst
|
93
|
+
inst.save_state
|
94
|
+
else
|
95
|
+
inst
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
66
99
|
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'msgpack'
|
2
|
+
|
3
|
+
module StuffClassifier
|
4
|
+
class InMemoryStorage
|
5
|
+
def initialize
|
6
|
+
@storage = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def load_state(classifier)
|
10
|
+
if @storage.key? classifier.name
|
11
|
+
_wcount, _ccount = @storage[classifier.name]
|
12
|
+
classifier.instance_eval do
|
13
|
+
@wcount = _wcount
|
14
|
+
@ccount = _ccount
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def save_state(classifier)
|
20
|
+
name = classifier.name
|
21
|
+
wcount = classifier.instance_variable_get :@wcount
|
22
|
+
ccount = classifier.instance_variable_get :@ccount
|
23
|
+
@storage[name] = [wcount, ccount]
|
24
|
+
end
|
25
|
+
|
26
|
+
def purge_state(classifier)
|
27
|
+
@storage.delete(classifier.name)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class FileStorage
|
32
|
+
def initialize(path)
|
33
|
+
@storage = {}
|
34
|
+
@path = path
|
35
|
+
end
|
36
|
+
|
37
|
+
def load_state(classifier)
|
38
|
+
if @storage.length == 0 && File.exists?(@path)
|
39
|
+
@storage = MessagePack.unpack(File.read(@path))
|
40
|
+
end
|
41
|
+
|
42
|
+
if @storage.key? classifier.name
|
43
|
+
_wcount, _ccount = @storage[classifier.name]
|
44
|
+
classifier.instance_eval do
|
45
|
+
@wcount = _wcount
|
46
|
+
@ccount = _ccount
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def save_state(classifier)
|
52
|
+
name = classifier.name
|
53
|
+
wcount = classifier.instance_variable_get :@wcount
|
54
|
+
ccount = classifier.instance_variable_get :@ccount
|
55
|
+
@storage[name] = [wcount, ccount]
|
56
|
+
_write_to_file
|
57
|
+
end
|
58
|
+
|
59
|
+
def purge_state(classifier)
|
60
|
+
@storage.delete(classifier.name)
|
61
|
+
_write_to_file
|
62
|
+
end
|
63
|
+
|
64
|
+
def _write_to_file
|
65
|
+
File.open(@path, 'w') do |fh|
|
66
|
+
fh.flock(File::LOCK_EX)
|
67
|
+
fh.write(@storage.to_msgpack)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'fast_stemmer'
|
2
2
|
|
3
3
|
module StuffClassifier::Tokenizer
|
4
|
+
attr_writer :stemming
|
4
5
|
|
5
6
|
def ignore_words=(value)
|
6
7
|
@ignore_words = value
|
@@ -14,10 +15,6 @@ module StuffClassifier::Tokenizer
|
|
14
15
|
defined?(@stemming) ? @stemming : false
|
15
16
|
end
|
16
17
|
|
17
|
-
def stemming=(value)
|
18
|
-
@stemming = value
|
19
|
-
end
|
20
|
-
|
21
18
|
def each_word(string)
|
22
19
|
string = string.strip
|
23
20
|
return if string == ''
|
@@ -37,8 +34,7 @@ module StuffClassifier::Tokenizer
|
|
37
34
|
w = w.downcase
|
38
35
|
end
|
39
36
|
|
40
|
-
yield w
|
41
|
-
words << w
|
37
|
+
words << (block_given? ? (yield w) : w)
|
42
38
|
end
|
43
39
|
end
|
44
40
|
|
data/stuff-classifier.gemspec
CHANGED
@@ -17,7 +17,11 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.require_paths = ["lib"]
|
18
18
|
|
19
19
|
s.add_runtime_dependency "fast-stemmer", ">= 1.0"
|
20
|
+
s.add_runtime_dependency "sqlite3"
|
21
|
+
s.add_runtime_dependency "sequel"
|
22
|
+
s.add_runtime_dependency "msgpack"
|
20
23
|
|
24
|
+
s.add_development_dependency "ruby-debug19"
|
21
25
|
s.add_development_dependency "bundler"
|
22
26
|
s.add_development_dependency "rake", ">= 0.9.2"
|
23
27
|
s.add_development_dependency "minitest", ">= 2.10"
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
|
4
|
+
class Test004InMemoryStorage < TestBase
|
5
|
+
before do
|
6
|
+
StuffClassifier::Base.storage = StuffClassifier::InMemoryStorage.new
|
7
|
+
|
8
|
+
StuffClassifier::Bayes.open("Cats or Dogs") do |cls|
|
9
|
+
cls.train(:dog, "Dogs are awesome, cats too. I love my dog")
|
10
|
+
cls.train(:cat, "Cats are more preferred by software developers. I never could stand cats. I have a dog")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_for_persistance
|
15
|
+
test = self
|
16
|
+
StuffClassifier::Bayes.new("Cats or Dogs").instance_eval do
|
17
|
+
test.assert @storage.instance_of?(StuffClassifier::InMemoryStorage),
|
18
|
+
"@storage should be an instance of FileStorage"
|
19
|
+
test.assert @wcount.length > 0, "Word count should be persisted"
|
20
|
+
test.assert @ccount.length > 0, "Category count should be persisted"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_purge_state
|
25
|
+
test = self
|
26
|
+
StuffClassifier::Bayes.new("Cats or Dogs", :purge_state => true).instance_eval do
|
27
|
+
test.assert @wcount.length == 0, "Word count should be purged"
|
28
|
+
test.assert @ccount.length == 0, "Category count should be purged"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
|
4
|
+
class Test005FileStorage < TestBase
|
5
|
+
before do
|
6
|
+
@storage_path = "/tmp/test_classifier.db"
|
7
|
+
@storage = StuffClassifier::FileStorage.new(@storage_path)
|
8
|
+
StuffClassifier::Base.storage = @storage
|
9
|
+
|
10
|
+
StuffClassifier::Bayes.open("Cats or Dogs") do |cls|
|
11
|
+
cls.train(:dog, "Dogs are awesome, cats too. I love my dog")
|
12
|
+
cls.train(:cat, "Cats are more preferred by software developers. I never could stand cats. I have a dog")
|
13
|
+
end
|
14
|
+
|
15
|
+
# redefining storage instance, forcing it to read from file again
|
16
|
+
StuffClassifier::Base.storage = StuffClassifier::FileStorage.new(@storage_path)
|
17
|
+
end
|
18
|
+
|
19
|
+
def teardown
|
20
|
+
File.unlink @storage_path if File.exists? @storage_path
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_for_persistance
|
24
|
+
assert ! @storage.equal?(StuffClassifier::Base.storage),
|
25
|
+
"Storage instance should not be the same"
|
26
|
+
|
27
|
+
test = self
|
28
|
+
StuffClassifier::Bayes.new("Cats or Dogs").instance_eval do
|
29
|
+
test.assert @storage.instance_of?(StuffClassifier::FileStorage),
|
30
|
+
"@storage should be an instance of FileStorage"
|
31
|
+
test.assert @wcount.length > 0, "Word count should be persisted"
|
32
|
+
test.assert @ccount.length > 0, "Category count should be persisted"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_file_created
|
37
|
+
assert File.exist?(@storage_path),
|
38
|
+
"File #@storage_path should exist"
|
39
|
+
|
40
|
+
content = File.read(@storage_path)
|
41
|
+
assert content.length > 100,
|
42
|
+
"Serialized content should have more than 100 chars"
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_purge_state
|
46
|
+
test = self
|
47
|
+
StuffClassifier::Bayes.new("Cats or Dogs", :purge_state => true).instance_eval do
|
48
|
+
test.assert @storage.instance_of?(StuffClassifier::FileStorage),
|
49
|
+
"@storage should be an instance of FileStorage"
|
50
|
+
test.assert @wcount.length == 0, "Word count should be purged"
|
51
|
+
test.assert @ccount.length == 0, "Category count should be purged"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stuff-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.4'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
12
|
+
date: 2012-01-20 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fast-stemmer
|
16
|
-
requirement: &
|
16
|
+
requirement: &77637680 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,54 @@ dependencies:
|
|
21
21
|
version: '1.0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *77637680
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: sqlite3
|
27
|
+
requirement: &77637470 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *77637470
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: sequel
|
38
|
+
requirement: &77637240 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *77637240
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: msgpack
|
49
|
+
requirement: &77637030 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *77637030
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: ruby-debug19
|
60
|
+
requirement: &77636820 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *77636820
|
25
69
|
- !ruby/object:Gem::Dependency
|
26
70
|
name: bundler
|
27
|
-
requirement: &
|
71
|
+
requirement: &77636610 !ruby/object:Gem::Requirement
|
28
72
|
none: false
|
29
73
|
requirements:
|
30
74
|
- - ! '>='
|
@@ -32,10 +76,10 @@ dependencies:
|
|
32
76
|
version: '0'
|
33
77
|
type: :development
|
34
78
|
prerelease: false
|
35
|
-
version_requirements: *
|
79
|
+
version_requirements: *77636610
|
36
80
|
- !ruby/object:Gem::Dependency
|
37
81
|
name: rake
|
38
|
-
requirement: &
|
82
|
+
requirement: &77636360 !ruby/object:Gem::Requirement
|
39
83
|
none: false
|
40
84
|
requirements:
|
41
85
|
- - ! '>='
|
@@ -43,10 +87,10 @@ dependencies:
|
|
43
87
|
version: 0.9.2
|
44
88
|
type: :development
|
45
89
|
prerelease: false
|
46
|
-
version_requirements: *
|
90
|
+
version_requirements: *77636360
|
47
91
|
- !ruby/object:Gem::Dependency
|
48
92
|
name: minitest
|
49
|
-
requirement: &
|
93
|
+
requirement: &77636110 !ruby/object:Gem::Requirement
|
50
94
|
none: false
|
51
95
|
requirements:
|
52
96
|
- - ! '>='
|
@@ -54,10 +98,10 @@ dependencies:
|
|
54
98
|
version: '2.10'
|
55
99
|
type: :development
|
56
100
|
prerelease: false
|
57
|
-
version_requirements: *
|
101
|
+
version_requirements: *77636110
|
58
102
|
- !ruby/object:Gem::Dependency
|
59
103
|
name: turn
|
60
|
-
requirement: &
|
104
|
+
requirement: &77635880 !ruby/object:Gem::Requirement
|
61
105
|
none: false
|
62
106
|
requirements:
|
63
107
|
- - ! '>='
|
@@ -65,10 +109,10 @@ dependencies:
|
|
65
109
|
version: 0.8.3
|
66
110
|
type: :development
|
67
111
|
prerelease: false
|
68
|
-
version_requirements: *
|
112
|
+
version_requirements: *77635880
|
69
113
|
- !ruby/object:Gem::Dependency
|
70
114
|
name: rcov
|
71
|
-
requirement: &
|
115
|
+
requirement: &77635650 !ruby/object:Gem::Requirement
|
72
116
|
none: false
|
73
117
|
requirements:
|
74
118
|
- - ! '>='
|
@@ -76,7 +120,7 @@ dependencies:
|
|
76
120
|
version: '0.9'
|
77
121
|
type: :development
|
78
122
|
prerelease: false
|
79
|
-
version_requirements: *
|
123
|
+
version_requirements: *77635650
|
80
124
|
description: 2 methods are provided for now - (1) naive bayes implementation + (2)
|
81
125
|
tf-idf weights
|
82
126
|
email:
|
@@ -85,9 +129,7 @@ executables: []
|
|
85
129
|
extensions: []
|
86
130
|
extra_rdoc_files: []
|
87
131
|
files:
|
88
|
-
- .gitignore
|
89
132
|
- Gemfile
|
90
|
-
- Gemfile.lock
|
91
133
|
- LICENSE.txt
|
92
134
|
- README.md
|
93
135
|
- Rakefile
|
@@ -95,15 +137,17 @@ files:
|
|
95
137
|
- lib/stuff-classifier/base.rb
|
96
138
|
- lib/stuff-classifier/bayes.rb
|
97
139
|
- lib/stuff-classifier/stop_words.rb
|
140
|
+
- lib/stuff-classifier/storage.rb
|
98
141
|
- lib/stuff-classifier/tf-idf.rb
|
99
142
|
- lib/stuff-classifier/tokenizer.rb
|
100
143
|
- lib/stuff-classifier/version.rb
|
101
|
-
- stuff-classifier-0.1.gem
|
102
144
|
- stuff-classifier.gemspec
|
103
145
|
- test/helper.rb
|
104
146
|
- test/test_001_tokenizer.rb
|
105
147
|
- test/test_002_naive_bayes.rb
|
106
148
|
- test/test_003_tf_idf.rb
|
149
|
+
- test/test_004_in_memory_storage.rb
|
150
|
+
- test/test_005_file_storage.rb
|
107
151
|
homepage: https://github.com/alexandru/stuff-classifier/
|
108
152
|
licenses: []
|
109
153
|
post_install_message:
|
data/.gitignore
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
# rcov generated
|
2
|
-
coverage
|
3
|
-
|
4
|
-
# rdoc generated
|
5
|
-
rdoc
|
6
|
-
|
7
|
-
# yard generated
|
8
|
-
doc
|
9
|
-
.yardoc
|
10
|
-
|
11
|
-
# bundler
|
12
|
-
.bundle
|
13
|
-
|
14
|
-
# jeweler generated
|
15
|
-
pkg
|
16
|
-
|
17
|
-
# Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
|
18
|
-
#
|
19
|
-
# * Create a file at ~/.gitignore
|
20
|
-
# * Include files you want ignored
|
21
|
-
# * Run: git config --global core.excludesfile ~/.gitignore
|
22
|
-
#
|
23
|
-
# After doing this, these files will be ignored in all your git projects,
|
24
|
-
# saving you from having to 'pollute' every project you touch with them
|
25
|
-
#
|
26
|
-
# Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
|
27
|
-
#
|
28
|
-
# For MacOS:
|
29
|
-
#
|
30
|
-
#.DS_Store
|
31
|
-
|
32
|
-
# For TextMate
|
33
|
-
#*.tmproj
|
34
|
-
#tmtags
|
35
|
-
|
36
|
-
# For emacs:
|
37
|
-
#*~
|
38
|
-
#\#*
|
39
|
-
#.\#*
|
40
|
-
|
41
|
-
# For vim:
|
42
|
-
#*.swp
|
43
|
-
|
44
|
-
# For redcar:
|
45
|
-
#.redcar
|
46
|
-
|
47
|
-
# For rubinius:
|
48
|
-
#*.rbc
|
data/Gemfile.lock
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
PATH
|
2
|
-
remote: .
|
3
|
-
specs:
|
4
|
-
stuff-classifier (0.1)
|
5
|
-
fast-stemmer (>= 1.0)
|
6
|
-
|
7
|
-
GEM
|
8
|
-
remote: http://rubygems.org/
|
9
|
-
specs:
|
10
|
-
ansi (1.4.1)
|
11
|
-
archive-tar-minitar (0.5.2)
|
12
|
-
columnize (0.3.4)
|
13
|
-
fast-stemmer (1.0.0)
|
14
|
-
json (1.6.5)
|
15
|
-
linecache19 (0.5.12)
|
16
|
-
ruby_core_source (>= 0.1.4)
|
17
|
-
minitest (2.10.1)
|
18
|
-
rake (0.9.2.2)
|
19
|
-
rcov (0.9.11)
|
20
|
-
rdoc (3.12)
|
21
|
-
json (~> 1.4)
|
22
|
-
ruby-debug-base19 (0.11.25)
|
23
|
-
columnize (>= 0.3.1)
|
24
|
-
linecache19 (>= 0.5.11)
|
25
|
-
ruby_core_source (>= 0.1.4)
|
26
|
-
ruby-debug19 (0.11.6)
|
27
|
-
columnize (>= 0.3.1)
|
28
|
-
linecache19 (>= 0.5.11)
|
29
|
-
ruby-debug-base19 (>= 0.11.19)
|
30
|
-
ruby_core_source (0.1.5)
|
31
|
-
archive-tar-minitar (>= 0.5.2)
|
32
|
-
turn (0.8.3)
|
33
|
-
ansi
|
34
|
-
|
35
|
-
PLATFORMS
|
36
|
-
ruby
|
37
|
-
|
38
|
-
DEPENDENCIES
|
39
|
-
bundler
|
40
|
-
minitest (>= 2.10)
|
41
|
-
rake (>= 0.9.2)
|
42
|
-
rcov (>= 0.9)
|
43
|
-
rdoc (>= 3.1)
|
44
|
-
ruby-debug19
|
45
|
-
stuff-classifier!
|
46
|
-
turn (>= 0.8.3)
|
data/stuff-classifier-0.1.gem
DELETED
Binary file
|