nbayes 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -4
- data/LICENSE.txt +1 -1
- data/{README.rdoc → README.md} +13 -7
- data/VERSION +1 -1
- data/lib/nbayes.rb +243 -89
- data/nbayes.gemspec +28 -28
- data/spec/nbayes_spec.rb +213 -131
- metadata +68 -66
- data/Gemfile.lock +0 -37
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c4c286395465f5e97e4ecba9407ce021039e2809
|
4
|
+
data.tar.gz: 7ac74a9b05b4fcfb1051a9a9ed2359c860b8564e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6359cc15db183b65f376062297c9c1f143befd795365d969b92f0ea55fd831a3c93aba8247f52fa3da1e6348a5f7aedff85bec3b52f3d7da204c0bb5a79bfbca
|
7
|
+
data.tar.gz: ee66196fab0c55a70557947dac32005c868451ec2b4032b64cc2c9c2ee3da8eafe14fb13ab8ad968fba7ebd30352e9eb1619dc96e646fa6bfe0d862cbd089074
|
data/Gemfile
CHANGED
@@ -6,9 +6,9 @@ source "http://rubygems.org"
|
|
6
6
|
# Add dependencies to develop your gem here.
|
7
7
|
# Include everything needed to run rake, tests, features, etc.
|
8
8
|
group :development do
|
9
|
-
gem "rspec", ">=
|
10
|
-
gem "rdoc", ">= 3.
|
11
|
-
gem "bundler", ">=
|
12
|
-
gem "jeweler", ">=
|
9
|
+
gem "rspec", ">= 3.9.0"
|
10
|
+
gem "rdoc", ">= 3.0.0"
|
11
|
+
gem "bundler", ">= 2.0.0"
|
12
|
+
gem "jeweler", ">= 2.3.0"
|
13
13
|
end
|
14
14
|
gem 'simplecov', :require => false, :group => :test
|
data/LICENSE.txt
CHANGED
data/{README.rdoc → README.md}
RENAMED
@@ -1,8 +1,10 @@
|
|
1
|
-
|
1
|
+
# nbayes
|
2
2
|
|
3
|
-
|
3
|
+
```
|
4
|
+
gem install nbayes
|
5
|
+
```
|
4
6
|
|
5
|
-
NBayes is a full-featured, Ruby implementation of Naive Bayes
|
7
|
+
NBayes is a full-featured, Ruby implementation of ``Naive Bayes``. Some of the features include:
|
6
8
|
|
7
9
|
* allows prior distribution on classes to be assumed uniform (optional)
|
8
10
|
* generic to work with all types of tokens, not just text
|
@@ -14,7 +16,7 @@ NBayes is a full-featured, Ruby implementation of Naive Bayes. Some of the feat
|
|
14
16
|
|
15
17
|
For more information, view this blog post: http://blog.oasic.net/2012/06/naive-bayes-for-ruby.html
|
16
18
|
|
17
|
-
|
19
|
+
## Contributing to nbayes
|
18
20
|
|
19
21
|
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
20
22
|
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
@@ -24,8 +26,12 @@ For more information, view this blog post: http://blog.oasic.net/2012/06/naive-b
|
|
24
26
|
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
25
27
|
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
26
28
|
|
27
|
-
|
29
|
+
## Acknowledgements
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
+
This project is supported by the GrammarBot [grammar checker](http://www.GrammarBot.io/)
|
32
|
+
|
33
|
+
|
34
|
+
## Copyright
|
35
|
+
|
36
|
+
Copyright (c) 2012-2020 Oasic Technologies LLC. See LICENSE.txt for further details.
|
31
37
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.3
|
data/lib/nbayes.rb
CHANGED
@@ -8,24 +8,44 @@ require 'yaml'
|
|
8
8
|
# - allows binarized or standard NB
|
9
9
|
# - allows Prior distribution on category to be assumed uniform (optional)
|
10
10
|
# - generic to work with all types of tokens, not just text
|
11
|
-
|
11
|
+
|
12
12
|
|
13
13
|
module NBayes
|
14
14
|
|
15
|
-
class
|
15
|
+
class Vocab
|
16
|
+
attr_accessor :log_size, :tokens
|
16
17
|
|
17
|
-
|
18
|
-
|
18
|
+
def initialize(options = {})
|
19
|
+
@tokens = Hash.new
|
20
|
+
# for smoothing, use log of vocab size, rather than vocab size
|
21
|
+
@log_size = options[:log_size]
|
22
|
+
end
|
19
23
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
def delete(token)
|
25
|
+
tokens.delete(token)
|
26
|
+
end
|
27
|
+
|
28
|
+
def each(&block)
|
29
|
+
tokens.keys.each(&block)
|
30
|
+
end
|
31
|
+
|
32
|
+
def size
|
33
|
+
if log_size
|
34
|
+
Math.log(tokens.count)
|
35
|
+
else
|
36
|
+
tokens.count
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def seen_token(token)
|
41
|
+
tokens[token] = 1
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class Data
|
46
|
+
attr_accessor :data
|
47
|
+
def initialize(options = {})
|
27
48
|
@data = Hash.new
|
28
|
-
@data.default_proc = get_default_proc()
|
29
49
|
#@data = {
|
30
50
|
# "category1": {
|
31
51
|
# "tokens": Hash.new(0),
|
@@ -36,82 +56,199 @@ module NBayes
|
|
36
56
|
#}
|
37
57
|
end
|
38
58
|
|
59
|
+
def categories
|
60
|
+
data.keys
|
61
|
+
end
|
62
|
+
|
63
|
+
def token_trained?(token, category)
|
64
|
+
data[category] ? data[category][:tokens].has_key?(token) : false
|
65
|
+
end
|
66
|
+
|
67
|
+
def cat_data(category)
|
68
|
+
unless data[category].is_a? Hash
|
69
|
+
data[category] = new_category
|
70
|
+
end
|
71
|
+
data[category]
|
72
|
+
end
|
73
|
+
|
74
|
+
def category_stats
|
75
|
+
tmp = []
|
76
|
+
total_example_count = total_examples
|
77
|
+
self.each do |category|
|
78
|
+
e = example_count(category)
|
79
|
+
t = token_count(category)
|
80
|
+
tmp << "For category #{category}, %d examples (%.02f%% of the total) and %d total_tokens" % [e, 100.0 * e / total_example_count, t]
|
81
|
+
end
|
82
|
+
tmp.join("\n")
|
83
|
+
end
|
84
|
+
|
85
|
+
def each(&block)
|
86
|
+
data.keys.each(&block)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Increment the number of training examples for this category
|
90
|
+
def increment_examples(category)
|
91
|
+
cat_data(category)[:examples] += 1
|
92
|
+
end
|
93
|
+
|
94
|
+
# Decrement the number of training examples for this category.
|
95
|
+
# Delete the category if the examples counter is 0.
|
96
|
+
def decrement_examples(category)
|
97
|
+
cat_data(category)[:examples] -= 1
|
98
|
+
delete_category(category) if cat_data(category)[:examples] < 1
|
99
|
+
end
|
100
|
+
|
101
|
+
def example_count(category)
|
102
|
+
cat_data(category)[:examples]
|
103
|
+
end
|
104
|
+
|
105
|
+
def token_count(category)
|
106
|
+
cat_data(category)[:total_tokens]
|
107
|
+
end
|
108
|
+
|
109
|
+
# XXX - Add Enumerable and see if I get inject?
|
110
|
+
# Total number of training instances
|
111
|
+
def total_examples
|
112
|
+
sum = 0
|
113
|
+
self.each {|category| sum += example_count(category) }
|
114
|
+
sum
|
115
|
+
end
|
116
|
+
|
117
|
+
# Add this token to this category
|
118
|
+
def add_token_to_category(category, token)
|
119
|
+
cat_data(category)[:tokens][token] += 1
|
120
|
+
cat_data(category)[:total_tokens] += 1
|
121
|
+
end
|
122
|
+
|
123
|
+
# Decrement the token counter in a category
|
124
|
+
# If the counter is 0, delete the token.
|
125
|
+
# If the total number of tokens is 0, delete the category.
|
126
|
+
def remove_token_from_category(category, token)
|
127
|
+
cat_data(category)[:tokens][token] -= 1
|
128
|
+
delete_token_from_category(category, token) if cat_data(category)[:tokens][token] < 1
|
129
|
+
cat_data(category)[:total_tokens] -= 1
|
130
|
+
delete_category(category) if cat_data(category)[:total_tokens] < 1
|
131
|
+
end
|
132
|
+
|
133
|
+
# How many times does this token appear in this category?
|
134
|
+
def count_of_token_in_category(category, token)
|
135
|
+
cat_data(category)[:tokens][token]
|
136
|
+
end
|
137
|
+
|
138
|
+
def delete_token_from_category(category, token)
|
139
|
+
count = count_of_token_in_category(category, token)
|
140
|
+
cat_data(category)[:tokens].delete(token)
|
141
|
+
# Update this category's total token count
|
142
|
+
cat_data(category)[:total_tokens] -= count
|
143
|
+
end
|
144
|
+
|
145
|
+
def purge_less_than(token, x)
|
146
|
+
return if token_count_across_categories(token) >= x
|
147
|
+
self.each do |category|
|
148
|
+
delete_token_from_category(category, token)
|
149
|
+
end
|
150
|
+
true # Let caller know we removed this token
|
151
|
+
end
|
152
|
+
|
153
|
+
# XXX - TODO - use count_of_token_in_category
|
154
|
+
# Return the total number of tokens we've seen across all categories
|
155
|
+
def token_count_across_categories(token)
|
156
|
+
data.keys.inject(0){|sum, cat| sum + @data[cat][:tokens][token] }
|
157
|
+
end
|
158
|
+
|
159
|
+
def reset_after_import
|
160
|
+
self.each {|category| cat_data(category)[:tokens].default = 0 }
|
161
|
+
end
|
162
|
+
|
163
|
+
def new_category
|
164
|
+
{
|
165
|
+
:tokens => Hash.new(0), # holds freq counts
|
166
|
+
:total_tokens => 0,
|
167
|
+
:examples => 0
|
168
|
+
}
|
169
|
+
end
|
170
|
+
|
171
|
+
def delete_category(category)
|
172
|
+
data.delete(category) if data.has_key?(category)
|
173
|
+
categories
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
|
178
|
+
class Base
|
179
|
+
|
180
|
+
attr_accessor :assume_uniform, :debug, :k, :vocab, :data
|
181
|
+
attr_reader :binarized
|
182
|
+
|
183
|
+
def initialize(options={})
|
184
|
+
@debug = false
|
185
|
+
@k = 1
|
186
|
+
@binarized = options[:binarized] || false
|
187
|
+
@assume_uniform = false
|
188
|
+
@vocab = Vocab.new(:log_size => options[:log_vocab])
|
189
|
+
@data = Data.new
|
190
|
+
end
|
39
191
|
|
40
192
|
# Allows removal of low frequency words that increase processing time and may overfit
|
41
193
|
# - tokens with a count less than x (measured by summing across all classes) are removed
|
42
194
|
# Ex: nb.purge_less_than(2)
|
43
195
|
#
|
44
|
-
# NOTE: this does not decrement the "examples" count, so purging is not *always* the same
|
45
|
-
#
|
196
|
+
# NOTE: this does not decrement the "examples" count, so purging is not *always* the same
|
197
|
+
# as if the item was never added in the first place, but usually so
|
46
198
|
def purge_less_than(x)
|
47
199
|
remove_list = {}
|
48
|
-
@vocab.
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
cat_data[:tokens].delete(token) # delete and retrieve count
|
54
|
-
cat_data[:total_tokens] -= count # subtract that count from cat counts
|
55
|
-
end # each category hash
|
56
|
-
#print "removing #{token}\n"
|
57
|
-
remove_list[token]=1
|
200
|
+
@vocab.each do |token|
|
201
|
+
if data.purge_less_than(token, x)
|
202
|
+
# print "removing #{token}\n"
|
203
|
+
remove_list[token] = 1
|
204
|
+
end
|
58
205
|
end # each vocab word
|
59
206
|
remove_list.keys.each {|token| @vocab.delete(token) }
|
60
|
-
#print "total vocab size is now #{
|
207
|
+
# print "total vocab size is now #{vocab.size}\n"
|
61
208
|
end
|
62
209
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
def get_default_proc
|
67
|
-
return lambda do |hash, category|
|
68
|
-
hash[category]= {
|
69
|
-
:tokens => Hash.new(0), # holds freq counts
|
70
|
-
:total_tokens => 0,
|
71
|
-
:examples => 0
|
72
|
-
}
|
73
|
-
end
|
210
|
+
# Delete an entire category from the classification data
|
211
|
+
def delete_category(category)
|
212
|
+
data.delete_category(category)
|
74
213
|
end
|
75
214
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
215
|
+
def train(tokens, category)
|
216
|
+
tokens = tokens.uniq if binarized
|
217
|
+
data.increment_examples(category)
|
218
|
+
tokens.each do |token|
|
219
|
+
vocab.seen_token(token)
|
220
|
+
data.add_token_to_category(category, token)
|
221
|
+
end
|
80
222
|
end
|
81
223
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
224
|
+
# Be carefull with this function:
|
225
|
+
# * It decrement the number of examples for the category.
|
226
|
+
# If the being-untrained category has no more examples, it is removed from the category list.
|
227
|
+
# * It untrain already trained tokens, non existing tokens are not considered.
|
228
|
+
def untrain(tokens, category)
|
229
|
+
tokens = tokens.uniq if binarized
|
230
|
+
data.decrement_examples(category)
|
231
|
+
|
232
|
+
tokens.each do |token|
|
233
|
+
if data.token_trained?(token, category)
|
234
|
+
vocab.delete(token)
|
235
|
+
data.remove_token_from_category(category, token)
|
236
|
+
end
|
90
237
|
end
|
91
238
|
end
|
92
239
|
|
93
240
|
def classify(tokens)
|
94
241
|
print "classify: #{tokens.join(', ')}\n" if @debug
|
95
242
|
probs = {}
|
96
|
-
tokens = tokens.uniq
|
243
|
+
tokens = tokens.uniq if binarized
|
97
244
|
probs = calculate_probabilities(tokens)
|
98
245
|
print "results: #{probs.to_yaml}\n" if @debug
|
99
246
|
probs.extend(NBayes::Result)
|
100
247
|
probs
|
101
248
|
end
|
102
249
|
|
103
|
-
|
104
|
-
|
105
|
-
sum = 0
|
106
|
-
@data.each {|cat, cat_data| sum += cat_data[:examples] }
|
107
|
-
sum
|
108
|
-
end
|
109
|
-
|
110
|
-
# Returns the size of the "vocab" - the number of unique tokens found in the text
|
111
|
-
# This is used in the Laplacian smoothing.
|
112
|
-
def vocab_size
|
113
|
-
return Math.log(@vocab.keys.length) if @log_vocab
|
114
|
-
@vocab.keys.length
|
250
|
+
def category_stats
|
251
|
+
data.category_stats
|
115
252
|
end
|
116
253
|
|
117
254
|
# Calculates the actual probability of a class given the tokens
|
@@ -119,21 +256,31 @@ module NBayes
|
|
119
256
|
def calculate_probabilities(tokens)
|
120
257
|
# P(class|words) = P(w1,...,wn|class) * P(class) / P(w1,...,wn)
|
121
258
|
# = argmax P(w1,...,wn|class) * P(class)
|
122
|
-
#
|
259
|
+
#
|
123
260
|
# P(wi|class) = (count(wi, class) + k)/(count(w,class) + kV)
|
124
261
|
prob_numerator = {}
|
125
|
-
v_size =
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
262
|
+
v_size = vocab.size
|
263
|
+
|
264
|
+
cat_prob = Math.log(1 / data.categories.count.to_f)
|
265
|
+
total_example_count = data.total_examples.to_f
|
266
|
+
|
267
|
+
data.each do |category|
|
268
|
+
unless assume_uniform
|
269
|
+
cat_prob = Math.log(data.example_count(category) / total_example_count)
|
270
|
+
end
|
271
|
+
|
272
|
+
log_probs = 0
|
273
|
+
denominator = (data.token_count(category) + @k * v_size).to_f
|
274
|
+
tokens.each do |token|
|
275
|
+
numerator = data.count_of_token_in_category(category, token) + @k
|
276
|
+
log_probs += Math.log( numerator / denominator )
|
277
|
+
end
|
278
|
+
prob_numerator[category] = log_probs + cat_prob
|
136
279
|
end
|
280
|
+
normalize(prob_numerator)
|
281
|
+
end
|
282
|
+
|
283
|
+
def normalize(prob_numerator)
|
137
284
|
# calculate the denominator, which normalizes this into a probability; it's just the sum of all numerators from above
|
138
285
|
normalizer = 0
|
139
286
|
prob_numerator.each {|cat, numerator| normalizer += numerator }
|
@@ -148,37 +295,47 @@ module NBayes
|
|
148
295
|
intermed = {}
|
149
296
|
renormalizer = 0
|
150
297
|
prob_numerator.each do |cat, numerator|
|
151
|
-
|
152
|
-
|
298
|
+
intermed[cat] = normalizer / numerator.to_f
|
299
|
+
renormalizer += intermed[cat]
|
153
300
|
end
|
154
301
|
# calculate final probs
|
155
302
|
final_probs = {}
|
156
303
|
intermed.each do |cat, value|
|
157
|
-
|
304
|
+
final_probs[cat] = value / renormalizer.to_f
|
158
305
|
end
|
159
306
|
final_probs
|
160
307
|
end
|
161
308
|
|
309
|
+
# called internally after yaml import to reset Hash defaults
|
310
|
+
def reset_after_import
|
311
|
+
data.reset_after_import
|
312
|
+
end
|
313
|
+
|
314
|
+
def self.from_yml(yml_data)
|
315
|
+
nbayes = YAML.load(yml_data)
|
316
|
+
nbayes.reset_after_import() # yaml does not properly set the defaults on the Hashes
|
317
|
+
nbayes
|
318
|
+
end
|
319
|
+
|
162
320
|
# Loads class instance from a data file (e.g., yaml)
|
163
321
|
def self.from(yml_file)
|
164
|
-
|
165
|
-
|
166
|
-
|
322
|
+
File.open(yml_file, "rb") do |file|
|
323
|
+
self.from_yml(file.read)
|
324
|
+
end
|
167
325
|
end
|
168
326
|
|
169
327
|
# Load class instance
|
170
328
|
def load(yml)
|
171
329
|
if yml.nil?
|
172
|
-
|
330
|
+
nbayes = NBayes::Base.new
|
173
331
|
elsif yml[0..2] == "---"
|
174
|
-
nbayes =
|
332
|
+
nbayes = self.class.from_yml(yml)
|
175
333
|
else
|
176
|
-
nbayes =
|
334
|
+
nbayes = self.class.from(yml)
|
177
335
|
end
|
178
|
-
nbayes.reset_after_import() # yaml does not properly set the defaults on the Hashes
|
179
336
|
nbayes
|
180
337
|
end
|
181
|
-
|
338
|
+
|
182
339
|
# Dumps class instance to a data file (e.g., yaml) or a string
|
183
340
|
def dump(arg)
|
184
341
|
if arg.instance_of? String
|
@@ -190,14 +347,11 @@ module NBayes
|
|
190
347
|
|
191
348
|
end
|
192
349
|
|
193
|
-
|
194
350
|
module Result
|
351
|
+
# Return the key having the largest value
|
195
352
|
def max_class
|
196
|
-
keys.max{|a,b| self[a] <=> self[b] }
|
353
|
+
keys.max{ |a,b| self[a] <=> self[b] }
|
197
354
|
end
|
198
355
|
end
|
199
356
|
|
200
357
|
end
|
201
|
-
|
202
|
-
|
203
|
-
|
data/nbayes.gemspec
CHANGED
@@ -2,27 +2,28 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: nbayes 0.1.3 ruby lib
|
5
6
|
|
6
7
|
Gem::Specification.new do |s|
|
7
|
-
s.name = "nbayes"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.name = "nbayes".freeze
|
9
|
+
s.version = "0.1.3"
|
9
10
|
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.
|
12
|
-
s.
|
13
|
-
s.
|
14
|
-
s.
|
11
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib".freeze]
|
13
|
+
s.authors = ["oasic".freeze]
|
14
|
+
s.date = "2020-06-26"
|
15
|
+
s.description = "Ruby implementation of Naive Bayes that generates true probabilities per class, works with many token types, and provides lots of bells and whistles while being optimized for performance.".freeze
|
16
|
+
s.email = "j@oasic.net".freeze
|
15
17
|
s.extra_rdoc_files = [
|
16
18
|
"LICENSE.txt",
|
17
|
-
"README.
|
19
|
+
"README.md"
|
18
20
|
]
|
19
21
|
s.files = [
|
20
22
|
".document",
|
21
23
|
".rspec",
|
22
24
|
"Gemfile",
|
23
|
-
"Gemfile.lock",
|
24
25
|
"LICENSE.txt",
|
25
|
-
"README.
|
26
|
+
"README.md",
|
26
27
|
"Rakefile",
|
27
28
|
"VERSION",
|
28
29
|
"lib/nbayes.rb",
|
@@ -30,31 +31,30 @@ Gem::Specification.new do |s|
|
|
30
31
|
"spec/nbayes_spec.rb",
|
31
32
|
"spec/spec_helper.rb"
|
32
33
|
]
|
33
|
-
s.homepage = "http://github.com/oasic/nbayes"
|
34
|
-
s.licenses = ["MIT"]
|
35
|
-
s.
|
36
|
-
s.
|
37
|
-
s.summary = "Full-featured Ruby implementation of Naive Bayes classifier"
|
34
|
+
s.homepage = "http://github.com/oasic/nbayes".freeze
|
35
|
+
s.licenses = ["MIT".freeze]
|
36
|
+
s.rubygems_version = "2.6.14".freeze
|
37
|
+
s.summary = "Full-featured Ruby implementation of Naive Bayes classifier".freeze
|
38
38
|
|
39
39
|
if s.respond_to? :specification_version then
|
40
|
-
s.specification_version =
|
40
|
+
s.specification_version = 4
|
41
41
|
|
42
42
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
43
|
-
s.add_development_dependency(%q<rspec
|
44
|
-
s.add_development_dependency(%q<rdoc
|
45
|
-
s.add_development_dependency(%q<bundler
|
46
|
-
s.add_development_dependency(%q<jeweler
|
43
|
+
s.add_development_dependency(%q<rspec>.freeze, [">= 3.9.0"])
|
44
|
+
s.add_development_dependency(%q<rdoc>.freeze, [">= 3.0.0"])
|
45
|
+
s.add_development_dependency(%q<bundler>.freeze, [">= 2.0.0"])
|
46
|
+
s.add_development_dependency(%q<jeweler>.freeze, [">= 2.3.0"])
|
47
47
|
else
|
48
|
-
s.add_dependency(%q<rspec
|
49
|
-
s.add_dependency(%q<rdoc
|
50
|
-
s.add_dependency(%q<bundler
|
51
|
-
s.add_dependency(%q<jeweler
|
48
|
+
s.add_dependency(%q<rspec>.freeze, [">= 3.9.0"])
|
49
|
+
s.add_dependency(%q<rdoc>.freeze, [">= 3.0.0"])
|
50
|
+
s.add_dependency(%q<bundler>.freeze, [">= 2.0.0"])
|
51
|
+
s.add_dependency(%q<jeweler>.freeze, [">= 2.3.0"])
|
52
52
|
end
|
53
53
|
else
|
54
|
-
s.add_dependency(%q<rspec
|
55
|
-
s.add_dependency(%q<rdoc
|
56
|
-
s.add_dependency(%q<bundler
|
57
|
-
s.add_dependency(%q<jeweler
|
54
|
+
s.add_dependency(%q<rspec>.freeze, [">= 3.9.0"])
|
55
|
+
s.add_dependency(%q<rdoc>.freeze, [">= 3.0.0"])
|
56
|
+
s.add_dependency(%q<bundler>.freeze, [">= 2.0.0"])
|
57
|
+
s.add_dependency(%q<jeweler>.freeze, [">= 2.3.0"])
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
data/spec/nbayes_spec.rb
CHANGED
@@ -1,161 +1,243 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
require 'fileutils'
|
3
3
|
|
4
|
-
describe
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
4
|
+
describe NBayes do
|
5
|
+
let(:nbayes) { NBayes::Base.new }
|
6
|
+
|
7
|
+
describe 'should assign equal probability to each class' do
|
8
|
+
let(:results) { nbayes.classify(%w(a b c)) }
|
9
|
+
|
10
|
+
before do
|
11
|
+
nbayes.train(%w(a b c d e f g), 'classA')
|
12
|
+
nbayes.train(%w(a b c d e f g), 'classB')
|
13
|
+
end
|
14
|
+
|
15
|
+
specify { expect(results['classA']).to eq(0.5) }
|
16
|
+
specify { expect(results['classB']).to eq(0.5) }
|
17
|
+
end
|
18
|
+
|
19
|
+
describe 'should handle more than 2 classes' do
|
20
|
+
let(:results) { nbayes.classify(%w(a a a a b c)) }
|
21
|
+
|
22
|
+
before do
|
23
|
+
nbayes.train(%w(a a a a), 'classA')
|
24
|
+
nbayes.train(%w(b b b b), 'classB')
|
25
|
+
nbayes.train(%w(c c), 'classC')
|
26
|
+
end
|
27
|
+
|
28
|
+
specify { expect(results.max_class).to eq('classA') }
|
29
|
+
specify { expect(results['classA']).to be >= 0.4 }
|
30
|
+
specify { expect(results['classB']).to be <= 0.3 }
|
31
|
+
specify { expect(results['classC']).to be <= 0.3 }
|
32
|
+
end
|
33
|
+
|
34
|
+
describe 'should use smoothing by default to eliminate errors' do
|
35
|
+
context 'when dividing by zero' do
|
36
|
+
let(:results) { nbayes.classify(%w(x y z)) }
|
37
|
+
|
38
|
+
before do
|
39
|
+
nbayes.train(%w(a a a a), 'classA')
|
40
|
+
nbayes.train(%w(b b b b), 'classB')
|
41
|
+
end
|
42
|
+
|
43
|
+
specify { expect(results['classA']).to be >= 0.0 }
|
44
|
+
specify { expect(results['classB']).to be >= 0.0 }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe 'should optionally purge low frequency data' do
|
49
|
+
let(:results) { nbayes.classify(%w(c)) }
|
50
|
+
let(:token_count) { nbayes.data.count_of_token_in_category('classB', 'c') }
|
51
|
+
|
52
|
+
before do
|
53
|
+
100.times do
|
54
|
+
nbayes.train(%w(a a a a), 'classA')
|
55
|
+
nbayes.train(%w(b b b b), 'classB')
|
56
|
+
end
|
57
|
+
nbayes.train(%w(a), 'classA')
|
58
|
+
nbayes.train(%w(c b), 'classB')
|
59
|
+
end
|
60
|
+
|
61
|
+
context 'before purge' do
|
62
|
+
specify { expect(results.max_class).to eq('classB') }
|
63
|
+
specify { expect(results['classB']).to be > 0.5 }
|
64
|
+
specify { expect(token_count).to eq(1) }
|
65
|
+
end
|
66
|
+
|
67
|
+
context 'after purge' do
|
68
|
+
before { nbayes.purge_less_than(2) }
|
69
|
+
|
70
|
+
specify { expect(results['classA']).to eq(0.5) }
|
71
|
+
specify { expect(results['classB']).to eq(0.5) }
|
72
|
+
specify { expect(token_count).to be_zero }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'works on all tokens - not just strings' do
|
77
|
+
nbayes.train([1, 2, 3], 'low')
|
78
|
+
nbayes.train([5, 6, 7], 'high')
|
79
|
+
results = nbayes.classify([2])
|
80
|
+
expect(results.max_class).to eq('low')
|
81
|
+
results = nbayes.classify([6])
|
82
|
+
expect(results.max_class).to eq('high')
|
83
|
+
end
|
84
|
+
|
85
|
+
describe 'should optionally allow class distribution to be assumed uniform' do
|
86
|
+
context 'before uniform distribution' do
|
87
|
+
let(:before_results) { nbayes.classify(['a']) }
|
88
|
+
|
89
|
+
before do
|
90
|
+
nbayes.train(%w(a a a a b), 'classA')
|
91
|
+
nbayes.train(%w(a a a a), 'classA')
|
92
|
+
nbayes.train(%w(a a a a), 'classB')
|
93
|
+
end
|
94
|
+
|
95
|
+
specify { expect(before_results.max_class).to eq('classA') }
|
96
|
+
specify { expect(before_results['classA']).to be > 0.5 }
|
97
|
+
|
98
|
+
context 'and after uniform distribution assumption' do
|
99
|
+
let(:after_results) { nbayes.classify(['a']) }
|
100
|
+
|
101
|
+
before { nbayes.assume_uniform = true }
|
102
|
+
|
103
|
+
specify { expect(after_results.max_class).to eq('classB') }
|
104
|
+
specify { expect(after_results['classB']).to be > 0.5 }
|
105
|
+
end
|
40
106
|
end
|
41
|
-
@nbayes.train( %w[a], 'classA' )
|
42
|
-
@nbayes.train( %w[c b], 'classB' )
|
43
|
-
results = @nbayes.classify( %w[c] )
|
44
|
-
results.max_class.should == 'classB'
|
45
|
-
results['classB'].should > 0.5
|
46
|
-
@nbayes.data['classB'][:tokens]['c'].should == 1
|
47
|
-
|
48
|
-
@nbayes.purge_less_than(2) # this removes the entry for 'c' in 'classB' because it has freq of 1
|
49
|
-
# NOTE: this does not decrement the 'example' count
|
50
|
-
results = @nbayes.classify( %w[c] )
|
51
|
-
@nbayes.data['classB'][:tokens]['c'].should == 0
|
52
|
-
results['classA'].should == 0.5
|
53
|
-
results['classB'].should == 0.5
|
54
|
-
end
|
55
|
-
|
56
|
-
it "works on all tokens - not just strings" do
|
57
|
-
@nbayes.train( [1, 2, 3], 'low' )
|
58
|
-
@nbayes.train( [5, 6, 7], 'high' )
|
59
|
-
results = @nbayes.classify( [2] )
|
60
|
-
results.max_class.should == 'low'
|
61
|
-
results = @nbayes.classify( [6] )
|
62
|
-
results.max_class.should == 'high'
|
63
|
-
end
|
64
|
-
|
65
|
-
it "should optionally allow class distribution to be assumed uniform" do
|
66
|
-
# before uniform distribution
|
67
|
-
@nbayes.train( %w[a a a a b], 'classA' )
|
68
|
-
@nbayes.train( %w[a a a a], 'classA' )
|
69
|
-
@nbayes.train( %w[a a a a], 'classB' )
|
70
|
-
results = @nbayes.classify( ['a'] )
|
71
|
-
results.max_class.should == 'classA'
|
72
|
-
results['classA'].should > 0.5
|
73
|
-
# after uniform distribution assumption
|
74
|
-
@nbayes.assume_uniform = true
|
75
|
-
results = @nbayes.classify( ['a'] )
|
76
|
-
results.max_class.should == 'classB'
|
77
|
-
results['classB'].should > 0.5
|
78
|
-
end
|
79
|
-
|
80
|
-
it "should allow log of vocab size in smoothing" do
|
81
|
-
|
82
107
|
end
|
83
108
|
|
84
109
|
# In binarized mode, the frequency count is set to 1 for each token in each instance
|
85
110
|
# For text, this is "set of words" rather than "bag of words"
|
86
|
-
it
|
111
|
+
it 'should allow binarized mode' do
|
87
112
|
# w/o binarized mode, token repetition can skew the results
|
88
|
-
def train_it
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
end
|
95
|
-
train_it
|
96
|
-
results =
|
97
|
-
results.max_class.
|
98
|
-
results['classA'].
|
113
|
+
# def train_it
|
114
|
+
nbayes.train(%w(a a a a a a a a a a a), 'classA')
|
115
|
+
nbayes.train(%w(b b), 'classA')
|
116
|
+
nbayes.train(%w(a c), 'classB')
|
117
|
+
nbayes.train(%w(a c), 'classB')
|
118
|
+
nbayes.train(%w(a c), 'classB')
|
119
|
+
# end
|
120
|
+
# train_it
|
121
|
+
results = nbayes.classify(['a'])
|
122
|
+
expect(results.max_class).to eq('classA')
|
123
|
+
expect(results['classA']).to be > 0.5
|
99
124
|
# this does not happen in binarized mode
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
125
|
+
nbayes = NBayes::Base.new(binarized: true)
|
126
|
+
nbayes.train(%w(a a a a a a a a a a a), 'classA')
|
127
|
+
nbayes.train(%w(b b), 'classA')
|
128
|
+
nbayes.train(%w(a c), 'classB')
|
129
|
+
nbayes.train(%w(a c), 'classB')
|
130
|
+
nbayes.train(%w(a c), 'classB')
|
131
|
+
results = nbayes.classify(['a'])
|
132
|
+
expect(results.max_class).to eq('classB')
|
133
|
+
expect(results['classB']).to be > 0.5
|
105
134
|
end
|
106
135
|
|
107
|
-
it
|
136
|
+
it 'allows smoothing constant k to be set to any value' do
|
108
137
|
# increasing k increases smoothing
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
results =
|
138
|
+
nbayes.train(%w(a a a c), 'classA')
|
139
|
+
nbayes.train(%w(b b b d), 'classB')
|
140
|
+
expect(nbayes.k).to eq(1)
|
141
|
+
results = nbayes.classify(['c'])
|
113
142
|
prob_k1 = results['classA']
|
114
|
-
|
115
|
-
results =
|
143
|
+
nbayes.k = 5
|
144
|
+
results = nbayes.classify(['c'])
|
116
145
|
prob_k5 = results['classA']
|
117
|
-
prob_k1.
|
146
|
+
expect(prob_k1).to be > prob_k5 # increasing smoothing constant dampens the effect of the rare token 'c'
|
118
147
|
end
|
119
148
|
|
120
|
-
it
|
121
|
-
10_000.times do
|
122
|
-
|
123
|
-
|
149
|
+
it 'optionally allows using the log of vocab size during smoothing' do
|
150
|
+
10_000.times do
|
151
|
+
nbayes.train([rand(100)], 'classA')
|
152
|
+
nbayes.train(%w(b b b d), 'classB')
|
124
153
|
end
|
125
154
|
end
|
126
155
|
|
127
|
-
describe
|
156
|
+
describe 'saving' do
|
157
|
+
let(:tmp_dir) { File.join(File.dirname(__FILE__), 'tmp') }
|
158
|
+
let(:yml_file) { File.join(tmp_dir, 'test.yml') }
|
159
|
+
|
160
|
+
before { FileUtils.mkdir(tmp_dir) unless File.exist?(tmp_dir) }
|
161
|
+
|
162
|
+
after { FileUtils.rm(yml_file) if File.exist?(yml_file) }
|
163
|
+
|
164
|
+
it 'should save to yaml and load from yaml' do
|
165
|
+
nbayes.train(%w(a a a a), 'classA')
|
166
|
+
nbayes.train(%w(b b b b), 'classB')
|
167
|
+
results = nbayes.classify(['b'])
|
168
|
+
expect(results['classB']).to be >= 0.5
|
169
|
+
nbayes.dump(yml_file)
|
170
|
+
expect(File.exist?(yml_file)).to eq(true)
|
171
|
+
nbayes2 = NBayes::Base.from(yml_file)
|
172
|
+
results = nbayes.classify(['b'])
|
173
|
+
expect(results['classB']).to be >= 0.5
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
it 'should dump to yaml string and load from yaml string' do
|
178
|
+
nbayes.train(%w(a a a a), 'classA')
|
179
|
+
nbayes.train(%w(b b b b), 'classB')
|
180
|
+
results = nbayes.classify(['b'])
|
181
|
+
expect(results['classB']).to be >= 0.5
|
182
|
+
yml = nbayes.dump(nbayes)
|
183
|
+
nbayes2 = NBayes::Base.new.load(yml)
|
184
|
+
results = nbayes.classify(['b'])
|
185
|
+
expect(results['classB']).to be >= 0.5
|
186
|
+
end
|
187
|
+
|
188
|
+
describe 'should delete a category' do
|
128
189
|
before do
|
129
|
-
|
130
|
-
|
131
|
-
|
190
|
+
nbayes.train(%w(a a a a), 'classA')
|
191
|
+
nbayes.train(%w(b b b b), 'classB')
|
192
|
+
expect(nbayes.data.categories).to eq(%w(classA classB))
|
193
|
+
expect(nbayes.delete_category('classB')).to eq(['classA'])
|
132
194
|
end
|
133
195
|
|
134
|
-
|
135
|
-
|
196
|
+
specify { expect(nbayes.data.categories).to eq(['classA']) }
|
197
|
+
end
|
198
|
+
|
199
|
+
describe 'should do nothing if asked to delete an inexistant category' do
|
200
|
+
before { nbayes.train(%w(a a a a), 'classA') }
|
201
|
+
|
202
|
+
specify { expect(nbayes.data.categories).to eq(['classA']) }
|
203
|
+
specify { expect(nbayes.delete_category('classB')).to eq(['classA']) }
|
204
|
+
specify { expect(nbayes.data.categories).to eq(['classA']) }
|
205
|
+
end
|
206
|
+
|
207
|
+
describe 'should untrain a class' do
|
208
|
+
let(:results) { nbayes.classify(%w(a b c)) }
|
209
|
+
|
210
|
+
before do
|
211
|
+
nbayes.train(%w(a b c d e f g), 'classA')
|
212
|
+
nbayes.train(%w(a b c d e f g), 'classB')
|
213
|
+
nbayes.train(%w(a b c d e f g), 'classB')
|
214
|
+
nbayes.untrain(%w(a b c d e f g), 'classB')
|
136
215
|
end
|
137
216
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
results = @nbayes.classify( ['b'] )
|
147
|
-
results['classB'].should >= 0.5
|
217
|
+
specify { expect(results['classA']).to eq(0.5) }
|
218
|
+
specify { expect(results['classB']).to eq(0.5) }
|
219
|
+
end
|
220
|
+
|
221
|
+
describe 'should remove the category when the only example is untrained' do
|
222
|
+
before do
|
223
|
+
nbayes.train(%w(a b c d e f g), 'classA')
|
224
|
+
nbayes.untrain(%w(a b c d e f g), 'classA')
|
148
225
|
end
|
226
|
+
|
227
|
+
specify { expect(nbayes.data.categories).to eq([]) }
|
149
228
|
end
|
150
229
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
230
|
+
describe 'try untraining a non-existant category' do
|
231
|
+
let(:results) { nbayes.classify(%w(a b c)) }
|
232
|
+
|
233
|
+
before do
|
234
|
+
nbayes.train(%w(a b c d e f g), 'classA')
|
235
|
+
nbayes.train(%w(a b c d e f g), 'classB')
|
236
|
+
nbayes.untrain(%w(a b c d e f g), 'classC')
|
237
|
+
end
|
238
|
+
|
239
|
+
specify { expect(nbayes.data.categories).to eq(%w(classA classB)) }
|
240
|
+
specify { expect(results['classA']).to eq(0.5) }
|
241
|
+
specify { expect(results['classB']).to eq(0.5) }
|
160
242
|
end
|
161
243
|
end
|
metadata
CHANGED
@@ -1,77 +1,86 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: nbayes
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
version: 0.1.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.3
|
6
5
|
platform: ruby
|
7
|
-
authors:
|
6
|
+
authors:
|
8
7
|
- oasic
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2020-06-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
16
14
|
name: rspec
|
17
|
-
requirement:
|
18
|
-
|
19
|
-
requirements:
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
20
17
|
- - ">="
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version:
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.9.0
|
23
20
|
type: :development
|
24
21
|
prerelease: false
|
25
|
-
version_requirements:
|
26
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.9.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
27
28
|
name: rdoc
|
28
|
-
requirement:
|
29
|
-
|
30
|
-
requirements:
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
31
|
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version:
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 3.0.0
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements:
|
37
|
-
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 3.0.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
38
42
|
name: bundler
|
39
|
-
requirement:
|
40
|
-
|
41
|
-
requirements:
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
42
45
|
- - ">="
|
43
|
-
- !ruby/object:Gem::Version
|
44
|
-
version:
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.0.0
|
45
48
|
type: :development
|
46
49
|
prerelease: false
|
47
|
-
version_requirements:
|
48
|
-
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.0.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
49
56
|
name: jeweler
|
50
|
-
requirement:
|
51
|
-
|
52
|
-
requirements:
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
53
59
|
- - ">="
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version:
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.3.0
|
56
62
|
type: :development
|
57
63
|
prerelease: false
|
58
|
-
version_requirements:
|
59
|
-
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.3.0
|
69
|
+
description: Ruby implementation of Naive Bayes that generates true probabilities
|
70
|
+
per class, works with many token types, and provides lots of bells and whistles
|
71
|
+
while being optimized for performance.
|
60
72
|
email: j@oasic.net
|
61
73
|
executables: []
|
62
|
-
|
63
74
|
extensions: []
|
64
|
-
|
65
|
-
extra_rdoc_files:
|
75
|
+
extra_rdoc_files:
|
66
76
|
- LICENSE.txt
|
67
|
-
- README.
|
68
|
-
files:
|
69
|
-
- .document
|
70
|
-
- .rspec
|
77
|
+
- README.md
|
78
|
+
files:
|
79
|
+
- ".document"
|
80
|
+
- ".rspec"
|
71
81
|
- Gemfile
|
72
|
-
- Gemfile.lock
|
73
82
|
- LICENSE.txt
|
74
|
-
- README.
|
83
|
+
- README.md
|
75
84
|
- Rakefile
|
76
85
|
- VERSION
|
77
86
|
- lib/nbayes.rb
|
@@ -79,34 +88,27 @@ files:
|
|
79
88
|
- spec/nbayes_spec.rb
|
80
89
|
- spec/spec_helper.rb
|
81
90
|
homepage: http://github.com/oasic/nbayes
|
82
|
-
licenses:
|
91
|
+
licenses:
|
83
92
|
- MIT
|
93
|
+
metadata: {}
|
84
94
|
post_install_message:
|
85
95
|
rdoc_options: []
|
86
|
-
|
87
|
-
require_paths:
|
96
|
+
require_paths:
|
88
97
|
- lib
|
89
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
-
|
91
|
-
requirements:
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
92
100
|
- - ">="
|
93
|
-
- !ruby/object:Gem::Version
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
version: "0"
|
98
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
-
none: false
|
100
|
-
requirements:
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
101
105
|
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version:
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
104
108
|
requirements: []
|
105
|
-
|
106
109
|
rubyforge_project:
|
107
|
-
rubygems_version:
|
110
|
+
rubygems_version: 2.6.14
|
108
111
|
signing_key:
|
109
|
-
specification_version:
|
112
|
+
specification_version: 4
|
110
113
|
summary: Full-featured Ruby implementation of Naive Bayes classifier
|
111
114
|
test_files: []
|
112
|
-
|
data/Gemfile.lock
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
GEM
|
2
|
-
remote: http://rubygems.org/
|
3
|
-
specs:
|
4
|
-
diff-lcs (1.1.3)
|
5
|
-
git (1.2.5)
|
6
|
-
jeweler (1.8.3)
|
7
|
-
bundler (~> 1.0)
|
8
|
-
git (>= 1.2.5)
|
9
|
-
rake
|
10
|
-
rdoc
|
11
|
-
json (1.7.3)
|
12
|
-
multi_json (1.3.6)
|
13
|
-
rake (0.9.2.2)
|
14
|
-
rdoc (3.12)
|
15
|
-
json (~> 1.4)
|
16
|
-
rspec (2.10.0)
|
17
|
-
rspec-core (~> 2.10.0)
|
18
|
-
rspec-expectations (~> 2.10.0)
|
19
|
-
rspec-mocks (~> 2.10.0)
|
20
|
-
rspec-core (2.10.1)
|
21
|
-
rspec-expectations (2.10.0)
|
22
|
-
diff-lcs (~> 1.1.3)
|
23
|
-
rspec-mocks (2.10.1)
|
24
|
-
simplecov (0.6.4)
|
25
|
-
multi_json (~> 1.0)
|
26
|
-
simplecov-html (~> 0.5.3)
|
27
|
-
simplecov-html (0.5.3)
|
28
|
-
|
29
|
-
PLATFORMS
|
30
|
-
ruby
|
31
|
-
|
32
|
-
DEPENDENCIES
|
33
|
-
bundler (>= 1.0.0)
|
34
|
-
jeweler (>= 1.8.3)
|
35
|
-
rdoc (>= 3.12)
|
36
|
-
rspec (>= 2.8.0)
|
37
|
-
simplecov
|