bayesball 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/Gemfile +10 -0
- data/Guardfile +5 -0
- data/LICENSE +28 -0
- data/README.md +29 -0
- data/Rakefile +7 -0
- data/bayesball.gemspec +17 -0
- data/lib/bayesball/classifier.rb +42 -0
- data/lib/bayesball/persistence/mongo.rb +47 -0
- data/lib/bayesball/persistence.rb +5 -0
- data/lib/bayesball/stopwords.txt +667 -0
- data/lib/bayesball/version.rb +3 -0
- data/lib/bayesball.rb +6 -0
- data/spec/bayesball/classifier_spec.rb +50 -0
- data/spec/bayesball/persistence/mongo_spec.rb +36 -0
- data/spec/spec_helper.rb +9 -0
- metadata +70 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
Copyright (c) 2012, Jason Staten, Katalus
|
2
|
+
|
3
|
+
All rights reserved.
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
7
|
+
|
8
|
+
- Redistributions of source code must retain the above copyright notice, this
|
9
|
+
list of conditions and the following disclaimer.
|
10
|
+
|
11
|
+
- Redistributions in binary form must reproduce the above copyright notice,
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
13
|
+
and/or other materials provided with the distribution.
|
14
|
+
|
15
|
+
- Neither the name of Katalus nor the names of its contributors may be used
|
16
|
+
to endorse or promote products derived from this software without specific
|
17
|
+
prior written permission.
|
18
|
+
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
20
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
21
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
28
|
+
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Bayesball
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'bayesball'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install bayesball
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bayesball.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/bayesball/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Jason Staten"]
|
6
|
+
gem.email = ["jstaten07@gmail.com"]
|
7
|
+
gem.description = %q{A bayes classifier}
|
8
|
+
gem.summary = %q{A bayes classifier}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "bayesball"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Bayesball::VERSION
|
17
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Bayesball
|
2
|
+
class Classifier
|
3
|
+
STOP_WORDS = IO.read(File.expand_path('../stopwords.txt',__FILE__)).split
|
4
|
+
|
5
|
+
def initialize(persistence = {})
|
6
|
+
@persistence = persistence
|
7
|
+
end
|
8
|
+
|
9
|
+
def train(category, payload)
|
10
|
+
counts = @persistence[category] ||= {}
|
11
|
+
|
12
|
+
word_counts(payload).each do |word, count|
|
13
|
+
counts[word] = counts.fetch(word,0) + count
|
14
|
+
end
|
15
|
+
@persistence[category] = counts
|
16
|
+
end
|
17
|
+
|
18
|
+
def word_counts(payload)
|
19
|
+
words = payload.downcase.gsub(/[^\w]|(\b\w{1,2}\b)/,' ').split.reject { |w| STOP_WORDS.include?(w) }
|
20
|
+
words.reduce(Hash.new(0)) do |memo, word|
|
21
|
+
memo[word] += 1
|
22
|
+
memo
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def score(payload)
|
27
|
+
@persistence.reduce(Hash.new(0)) do |memo, (category, counts)|
|
28
|
+
total = counts.values.reduce(:+).to_f
|
29
|
+
word_counts(payload).each do |word, count|
|
30
|
+
s = counts.fetch(word, 0.0001)
|
31
|
+
memo[category] += Math.log(s/total)
|
32
|
+
end
|
33
|
+
memo
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def classify(payload)
|
38
|
+
s = score(payload)
|
39
|
+
s.sort_by{|k,v| v}.reverse[0][0]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Bayesball
|
2
|
+
module Persistence
|
3
|
+
class Mongo
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
attr_reader :db
|
7
|
+
def initialize(uri, options={})
|
8
|
+
collection_name = options.delete(:collection) || 'bayesball_categories'
|
9
|
+
@db = build_db(uri, options)
|
10
|
+
@collection = db.collection(collection_name)
|
11
|
+
end
|
12
|
+
|
13
|
+
def empty?
|
14
|
+
@collection.count == 0
|
15
|
+
end
|
16
|
+
|
17
|
+
def [](key)
|
18
|
+
doc = @collection.find_one(name: key)
|
19
|
+
doc && doc['counts'] || {}
|
20
|
+
end
|
21
|
+
|
22
|
+
def []=(key, value)
|
23
|
+
@collection.update({name: key}, {name: key, counts: value}, {upsert: true})
|
24
|
+
end
|
25
|
+
|
26
|
+
def each
|
27
|
+
if block_given?
|
28
|
+
@collection.find.each do |item|
|
29
|
+
yield [item['name'], item['counts']]
|
30
|
+
end
|
31
|
+
else
|
32
|
+
Enumerator.new(self, :each)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def build_db(uri, options={})
|
39
|
+
parts = URI.parse(uri)
|
40
|
+
raise "scheme must be mongodb, found #{parts.scheme}" unless parts.scheme == 'mongodb'
|
41
|
+
db = ::Mongo::Connection.new(parts.host, parts.port, options).db(parts.path.gsub(/^\//, ''))
|
42
|
+
db.authenticate(parts.user, parts.password) if parts.user && parts.password
|
43
|
+
db
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,667 @@
|
|
1
|
+
a
|
2
|
+
able
|
3
|
+
about
|
4
|
+
above
|
5
|
+
abst
|
6
|
+
accordance
|
7
|
+
according
|
8
|
+
accordingly
|
9
|
+
across
|
10
|
+
act
|
11
|
+
actually
|
12
|
+
added
|
13
|
+
adj
|
14
|
+
affected
|
15
|
+
affecting
|
16
|
+
affects
|
17
|
+
after
|
18
|
+
afterwards
|
19
|
+
again
|
20
|
+
against
|
21
|
+
ah
|
22
|
+
all
|
23
|
+
almost
|
24
|
+
alone
|
25
|
+
along
|
26
|
+
already
|
27
|
+
also
|
28
|
+
although
|
29
|
+
always
|
30
|
+
am
|
31
|
+
among
|
32
|
+
amongst
|
33
|
+
an
|
34
|
+
and
|
35
|
+
announce
|
36
|
+
another
|
37
|
+
any
|
38
|
+
anybody
|
39
|
+
anyhow
|
40
|
+
anymore
|
41
|
+
anyone
|
42
|
+
anything
|
43
|
+
anyway
|
44
|
+
anyways
|
45
|
+
anywhere
|
46
|
+
apparently
|
47
|
+
approximately
|
48
|
+
are
|
49
|
+
aren
|
50
|
+
arent
|
51
|
+
arise
|
52
|
+
around
|
53
|
+
as
|
54
|
+
aside
|
55
|
+
ask
|
56
|
+
asking
|
57
|
+
at
|
58
|
+
auth
|
59
|
+
available
|
60
|
+
away
|
61
|
+
awfully
|
62
|
+
b
|
63
|
+
back
|
64
|
+
be
|
65
|
+
became
|
66
|
+
because
|
67
|
+
become
|
68
|
+
becomes
|
69
|
+
becoming
|
70
|
+
been
|
71
|
+
before
|
72
|
+
beforehand
|
73
|
+
begin
|
74
|
+
beginning
|
75
|
+
beginnings
|
76
|
+
begins
|
77
|
+
behind
|
78
|
+
being
|
79
|
+
believe
|
80
|
+
below
|
81
|
+
beside
|
82
|
+
besides
|
83
|
+
between
|
84
|
+
beyond
|
85
|
+
biol
|
86
|
+
both
|
87
|
+
brief
|
88
|
+
briefly
|
89
|
+
but
|
90
|
+
by
|
91
|
+
c
|
92
|
+
ca
|
93
|
+
came
|
94
|
+
can
|
95
|
+
cannot
|
96
|
+
can't
|
97
|
+
cause
|
98
|
+
causes
|
99
|
+
certain
|
100
|
+
certainly
|
101
|
+
co
|
102
|
+
com
|
103
|
+
come
|
104
|
+
comes
|
105
|
+
contain
|
106
|
+
containing
|
107
|
+
contains
|
108
|
+
could
|
109
|
+
couldnt
|
110
|
+
d
|
111
|
+
date
|
112
|
+
did
|
113
|
+
didn't
|
114
|
+
different
|
115
|
+
do
|
116
|
+
does
|
117
|
+
doesn't
|
118
|
+
doing
|
119
|
+
done
|
120
|
+
don't
|
121
|
+
down
|
122
|
+
downwards
|
123
|
+
due
|
124
|
+
during
|
125
|
+
e
|
126
|
+
each
|
127
|
+
ed
|
128
|
+
edu
|
129
|
+
effect
|
130
|
+
eg
|
131
|
+
eight
|
132
|
+
eighty
|
133
|
+
either
|
134
|
+
else
|
135
|
+
elsewhere
|
136
|
+
end
|
137
|
+
ending
|
138
|
+
enough
|
139
|
+
especially
|
140
|
+
et
|
141
|
+
et-al
|
142
|
+
etc
|
143
|
+
even
|
144
|
+
ever
|
145
|
+
every
|
146
|
+
everybody
|
147
|
+
everyone
|
148
|
+
everything
|
149
|
+
everywhere
|
150
|
+
ex
|
151
|
+
except
|
152
|
+
f
|
153
|
+
far
|
154
|
+
few
|
155
|
+
ff
|
156
|
+
fifth
|
157
|
+
first
|
158
|
+
five
|
159
|
+
fix
|
160
|
+
followed
|
161
|
+
following
|
162
|
+
follows
|
163
|
+
for
|
164
|
+
former
|
165
|
+
formerly
|
166
|
+
forth
|
167
|
+
found
|
168
|
+
four
|
169
|
+
from
|
170
|
+
further
|
171
|
+
furthermore
|
172
|
+
g
|
173
|
+
gave
|
174
|
+
get
|
175
|
+
gets
|
176
|
+
getting
|
177
|
+
give
|
178
|
+
given
|
179
|
+
gives
|
180
|
+
giving
|
181
|
+
go
|
182
|
+
goes
|
183
|
+
gone
|
184
|
+
got
|
185
|
+
gotten
|
186
|
+
h
|
187
|
+
had
|
188
|
+
happens
|
189
|
+
hardly
|
190
|
+
has
|
191
|
+
hasn't
|
192
|
+
have
|
193
|
+
haven't
|
194
|
+
having
|
195
|
+
he
|
196
|
+
hed
|
197
|
+
hence
|
198
|
+
her
|
199
|
+
here
|
200
|
+
hereafter
|
201
|
+
hereby
|
202
|
+
herein
|
203
|
+
heres
|
204
|
+
hereupon
|
205
|
+
hers
|
206
|
+
herself
|
207
|
+
hes
|
208
|
+
hi
|
209
|
+
hid
|
210
|
+
him
|
211
|
+
himself
|
212
|
+
his
|
213
|
+
hither
|
214
|
+
home
|
215
|
+
how
|
216
|
+
howbeit
|
217
|
+
however
|
218
|
+
hundred
|
219
|
+
i
|
220
|
+
id
|
221
|
+
ie
|
222
|
+
if
|
223
|
+
i'll
|
224
|
+
im
|
225
|
+
immediate
|
226
|
+
immediately
|
227
|
+
importance
|
228
|
+
important
|
229
|
+
in
|
230
|
+
inc
|
231
|
+
indeed
|
232
|
+
index
|
233
|
+
information
|
234
|
+
instead
|
235
|
+
into
|
236
|
+
invention
|
237
|
+
inward
|
238
|
+
is
|
239
|
+
isn't
|
240
|
+
it
|
241
|
+
itd
|
242
|
+
it'll
|
243
|
+
its
|
244
|
+
itself
|
245
|
+
i've
|
246
|
+
j
|
247
|
+
just
|
248
|
+
k
|
249
|
+
keep
|
250
|
+
keeps
|
251
|
+
kept
|
252
|
+
kg
|
253
|
+
km
|
254
|
+
know
|
255
|
+
known
|
256
|
+
knows
|
257
|
+
l
|
258
|
+
largely
|
259
|
+
last
|
260
|
+
lately
|
261
|
+
later
|
262
|
+
latter
|
263
|
+
latterly
|
264
|
+
least
|
265
|
+
less
|
266
|
+
lest
|
267
|
+
let
|
268
|
+
lets
|
269
|
+
like
|
270
|
+
liked
|
271
|
+
likely
|
272
|
+
line
|
273
|
+
little
|
274
|
+
'll
|
275
|
+
look
|
276
|
+
looking
|
277
|
+
looks
|
278
|
+
ltd
|
279
|
+
m
|
280
|
+
made
|
281
|
+
mainly
|
282
|
+
make
|
283
|
+
makes
|
284
|
+
many
|
285
|
+
may
|
286
|
+
maybe
|
287
|
+
me
|
288
|
+
mean
|
289
|
+
means
|
290
|
+
meantime
|
291
|
+
meanwhile
|
292
|
+
merely
|
293
|
+
mg
|
294
|
+
might
|
295
|
+
million
|
296
|
+
miss
|
297
|
+
ml
|
298
|
+
more
|
299
|
+
moreover
|
300
|
+
most
|
301
|
+
mostly
|
302
|
+
mr
|
303
|
+
mrs
|
304
|
+
much
|
305
|
+
mug
|
306
|
+
must
|
307
|
+
my
|
308
|
+
myself
|
309
|
+
n
|
310
|
+
na
|
311
|
+
name
|
312
|
+
namely
|
313
|
+
nay
|
314
|
+
nd
|
315
|
+
near
|
316
|
+
nearly
|
317
|
+
necessarily
|
318
|
+
necessary
|
319
|
+
need
|
320
|
+
needs
|
321
|
+
neither
|
322
|
+
never
|
323
|
+
nevertheless
|
324
|
+
new
|
325
|
+
next
|
326
|
+
nine
|
327
|
+
ninety
|
328
|
+
no
|
329
|
+
nobody
|
330
|
+
non
|
331
|
+
none
|
332
|
+
nonetheless
|
333
|
+
noone
|
334
|
+
nor
|
335
|
+
normally
|
336
|
+
nos
|
337
|
+
not
|
338
|
+
noted
|
339
|
+
nothing
|
340
|
+
now
|
341
|
+
nowhere
|
342
|
+
o
|
343
|
+
obtain
|
344
|
+
obtained
|
345
|
+
obviously
|
346
|
+
of
|
347
|
+
off
|
348
|
+
often
|
349
|
+
oh
|
350
|
+
ok
|
351
|
+
okay
|
352
|
+
old
|
353
|
+
omitted
|
354
|
+
on
|
355
|
+
once
|
356
|
+
one
|
357
|
+
ones
|
358
|
+
only
|
359
|
+
onto
|
360
|
+
or
|
361
|
+
ord
|
362
|
+
other
|
363
|
+
others
|
364
|
+
otherwise
|
365
|
+
ought
|
366
|
+
our
|
367
|
+
ours
|
368
|
+
ourselves
|
369
|
+
out
|
370
|
+
outside
|
371
|
+
over
|
372
|
+
overall
|
373
|
+
owing
|
374
|
+
own
|
375
|
+
p
|
376
|
+
page
|
377
|
+
pages
|
378
|
+
part
|
379
|
+
particular
|
380
|
+
particularly
|
381
|
+
past
|
382
|
+
per
|
383
|
+
perhaps
|
384
|
+
placed
|
385
|
+
please
|
386
|
+
plus
|
387
|
+
poorly
|
388
|
+
possible
|
389
|
+
possibly
|
390
|
+
potentially
|
391
|
+
pp
|
392
|
+
predominantly
|
393
|
+
present
|
394
|
+
previously
|
395
|
+
primarily
|
396
|
+
probably
|
397
|
+
promptly
|
398
|
+
proud
|
399
|
+
provides
|
400
|
+
put
|
401
|
+
q
|
402
|
+
que
|
403
|
+
quickly
|
404
|
+
quite
|
405
|
+
qv
|
406
|
+
r
|
407
|
+
ran
|
408
|
+
rather
|
409
|
+
rd
|
410
|
+
re
|
411
|
+
readily
|
412
|
+
really
|
413
|
+
recent
|
414
|
+
recently
|
415
|
+
ref
|
416
|
+
refs
|
417
|
+
regarding
|
418
|
+
regardless
|
419
|
+
regards
|
420
|
+
related
|
421
|
+
relatively
|
422
|
+
research
|
423
|
+
respectively
|
424
|
+
resulted
|
425
|
+
resulting
|
426
|
+
results
|
427
|
+
right
|
428
|
+
run
|
429
|
+
s
|
430
|
+
said
|
431
|
+
same
|
432
|
+
saw
|
433
|
+
say
|
434
|
+
saying
|
435
|
+
says
|
436
|
+
sec
|
437
|
+
section
|
438
|
+
see
|
439
|
+
seeing
|
440
|
+
seem
|
441
|
+
seemed
|
442
|
+
seeming
|
443
|
+
seems
|
444
|
+
seen
|
445
|
+
self
|
446
|
+
selves
|
447
|
+
sent
|
448
|
+
seven
|
449
|
+
several
|
450
|
+
shall
|
451
|
+
she
|
452
|
+
shed
|
453
|
+
she'll
|
454
|
+
shes
|
455
|
+
should
|
456
|
+
shouldn't
|
457
|
+
show
|
458
|
+
showed
|
459
|
+
shown
|
460
|
+
showns
|
461
|
+
shows
|
462
|
+
significant
|
463
|
+
significantly
|
464
|
+
similar
|
465
|
+
similarly
|
466
|
+
since
|
467
|
+
six
|
468
|
+
slightly
|
469
|
+
so
|
470
|
+
some
|
471
|
+
somebody
|
472
|
+
somehow
|
473
|
+
someone
|
474
|
+
somethan
|
475
|
+
something
|
476
|
+
sometime
|
477
|
+
sometimes
|
478
|
+
somewhat
|
479
|
+
somewhere
|
480
|
+
soon
|
481
|
+
sorry
|
482
|
+
specifically
|
483
|
+
specified
|
484
|
+
specify
|
485
|
+
specifying
|
486
|
+
still
|
487
|
+
stop
|
488
|
+
strongly
|
489
|
+
sub
|
490
|
+
substantially
|
491
|
+
successfully
|
492
|
+
such
|
493
|
+
sufficiently
|
494
|
+
suggest
|
495
|
+
sup
|
496
|
+
sure
|
497
|
+
t
|
498
|
+
take
|
499
|
+
taken
|
500
|
+
taking
|
501
|
+
tell
|
502
|
+
tends
|
503
|
+
th
|
504
|
+
than
|
505
|
+
thank
|
506
|
+
thanks
|
507
|
+
thanx
|
508
|
+
that
|
509
|
+
that'll
|
510
|
+
thats
|
511
|
+
that've
|
512
|
+
the
|
513
|
+
their
|
514
|
+
theirs
|
515
|
+
them
|
516
|
+
themselves
|
517
|
+
then
|
518
|
+
thence
|
519
|
+
there
|
520
|
+
thereafter
|
521
|
+
thereby
|
522
|
+
thered
|
523
|
+
therefore
|
524
|
+
therein
|
525
|
+
there'll
|
526
|
+
thereof
|
527
|
+
therere
|
528
|
+
theres
|
529
|
+
thereto
|
530
|
+
thereupon
|
531
|
+
there've
|
532
|
+
these
|
533
|
+
they
|
534
|
+
theyd
|
535
|
+
they'll
|
536
|
+
theyre
|
537
|
+
they've
|
538
|
+
think
|
539
|
+
this
|
540
|
+
those
|
541
|
+
thou
|
542
|
+
though
|
543
|
+
thoughh
|
544
|
+
thousand
|
545
|
+
throug
|
546
|
+
through
|
547
|
+
throughout
|
548
|
+
thru
|
549
|
+
thus
|
550
|
+
til
|
551
|
+
tip
|
552
|
+
to
|
553
|
+
together
|
554
|
+
too
|
555
|
+
took
|
556
|
+
toward
|
557
|
+
towards
|
558
|
+
tried
|
559
|
+
tries
|
560
|
+
truly
|
561
|
+
try
|
562
|
+
trying
|
563
|
+
ts
|
564
|
+
twice
|
565
|
+
two
|
566
|
+
u
|
567
|
+
un
|
568
|
+
under
|
569
|
+
unfortunately
|
570
|
+
unless
|
571
|
+
unlike
|
572
|
+
unlikely
|
573
|
+
until
|
574
|
+
unto
|
575
|
+
up
|
576
|
+
upon
|
577
|
+
ups
|
578
|
+
us
|
579
|
+
use
|
580
|
+
used
|
581
|
+
useful
|
582
|
+
usefully
|
583
|
+
usefulness
|
584
|
+
uses
|
585
|
+
using
|
586
|
+
usually
|
587
|
+
v
|
588
|
+
value
|
589
|
+
various
|
590
|
+
've
|
591
|
+
very
|
592
|
+
via
|
593
|
+
viz
|
594
|
+
vol
|
595
|
+
vols
|
596
|
+
vs
|
597
|
+
w
|
598
|
+
want
|
599
|
+
wants
|
600
|
+
was
|
601
|
+
wasn't
|
602
|
+
way
|
603
|
+
we
|
604
|
+
wed
|
605
|
+
welcome
|
606
|
+
we'll
|
607
|
+
went
|
608
|
+
were
|
609
|
+
weren't
|
610
|
+
we've
|
611
|
+
what
|
612
|
+
whatever
|
613
|
+
what'll
|
614
|
+
whats
|
615
|
+
when
|
616
|
+
whence
|
617
|
+
whenever
|
618
|
+
where
|
619
|
+
whereafter
|
620
|
+
whereas
|
621
|
+
whereby
|
622
|
+
wherein
|
623
|
+
wheres
|
624
|
+
whereupon
|
625
|
+
wherever
|
626
|
+
whether
|
627
|
+
which
|
628
|
+
while
|
629
|
+
whim
|
630
|
+
whither
|
631
|
+
who
|
632
|
+
whod
|
633
|
+
whoever
|
634
|
+
whole
|
635
|
+
who'll
|
636
|
+
whom
|
637
|
+
whomever
|
638
|
+
whos
|
639
|
+
whose
|
640
|
+
why
|
641
|
+
widely
|
642
|
+
willing
|
643
|
+
wish
|
644
|
+
with
|
645
|
+
within
|
646
|
+
without
|
647
|
+
won't
|
648
|
+
words
|
649
|
+
world
|
650
|
+
would
|
651
|
+
wouldn't
|
652
|
+
www
|
653
|
+
x
|
654
|
+
y
|
655
|
+
yes
|
656
|
+
yet
|
657
|
+
you
|
658
|
+
youd
|
659
|
+
you'll
|
660
|
+
your
|
661
|
+
youre
|
662
|
+
yours
|
663
|
+
yourself
|
664
|
+
yourselves
|
665
|
+
you've
|
666
|
+
z
|
667
|
+
zero
|
data/lib/bayesball.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Bayesball::Classifier do
|
4
|
+
let(:subject) { Bayesball::Classifier.new(Bayesball::Persistence::Mongo.new(MONGO_URI)) }
|
5
|
+
|
6
|
+
it 'should do word counts' do
|
7
|
+
result = subject.word_counts('Hello, friend. Hello!')
|
8
|
+
result.must_equal({'hello' => 2, 'friend' => 1})
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'should score a payload' do
|
12
|
+
subject.train('basketball', 'The ball went in the hoop')
|
13
|
+
subject.train('baseball', 'He hit another grand slam!')
|
14
|
+
|
15
|
+
result = subject.score('The ball went in the hoop')
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should classify' do
|
19
|
+
#Wikipedia
|
20
|
+
subject.train('basketball', <<-EOF)
|
21
|
+
Basketball is a team sport, the objective being to shoot a ball through a basket horizontally positioned to score points while following a set of rules. Usually, two teams of five players play on a marked rectangular court with a basket at each width end. Basketball is one of the world's most popular and widely viewed sports.[1]
|
22
|
+
A regulation basketball ring consists of a rim 18 inches in diameter and 10 feet high mounted to a backboard. A team can score a field goal by shooting the ball through the basket during regular play. A field goal scores two points for the shooting team if a player is touching or closer to the basket than the three-point line, and three points (known commonly as a 3 pointer or three) if the player is behind the three-point line. The team with the most points at the end of the game wins, but additional time (overtime) may be issued when the game ends with a draw. The ball can be advanced on the court by bouncing it while walking or running (dribbling) or throwing (passing) it to a teammate. It is a violation to move without dribbling the ball (traveling), to carry it, or to double dribble (to hold the ball with both hands then resume dribbling).
|
23
|
+
Various violations are generally called "fouls". Disruptive physical contact (a personal foul) is penalized, and a free throw is usually awarded to an offensive player if he is fouled while shooting the ball. A technical foul may also be issued when certain infractions occur, most commonly for unsportsmanlike conduct on the part of a player or coach. A technical foul gives the opposing team a free throw.
|
24
|
+
Basketball has evolved many commonly used techniques of shooting, passing, dribbling, and rebounding, as well as specialized player positions and offensive and defensive structures (player positioning) and techniques. Typically, the tallest members of a team will play "center", "power forward" or "small forward" positions, while shorter players or those who possess the best ball handling skills and speed play "point guard" or "shooting guard".
|
25
|
+
While competitive basketball is carefully regulated, numerous variations of basketball have developed for casual play. Competitive basketball is primarily an indoor sport played on a carefully marked and maintained basketball court, but less regulated variations are often played outdoors in both inner city and remote areas.
|
26
|
+
EOF
|
27
|
+
|
28
|
+
subject.train('baseball', <<-EOF)
|
29
|
+
Baseball is a bat-and-ball sport played between two teams of nine players each. The aim is to score runs by hitting a thrown ball with a bat and touching a series of four bases arranged at the corners of a ninety-foot diamond. Players on the batting team take turns hitting against the pitcher of the fielding team, which tries to stop them from scoring runs by getting hitters out in any of several ways. A player on the batting team can stop at any of the bases and later advance via a teammate's hit or other means. The teams switch between batting and fielding whenever the fielding team records three outs. One turn at bat for each team constitutes an inning and nine innings make up a professional game. The team with the most runs at the end of the game wins.
|
30
|
+
Evolving from older bat-and-ball games, an early form of baseball was being played in England by the mid-eighteenth century. This game was brought by immigrants to North America, where the modern version developed. By the late nineteenth century, baseball was widely recognized as the national sport of the United States. Baseball is now popular in North America, parts of Central and South America and the Caribbean, and parts of East Asia.
|
31
|
+
In North America, professional Major League Baseball (MLB) teams are divided into the National League (NL) and American League (AL), each with three divisions: East, West, and Central. The major league champion is determined by playoffs that culminate in the World Series. Five teams make the playoffs from each league: the three regular season division winners, plus two wild card teams. Baseball is the leading team sport in both Japan and Cuba, and the top level of play is similarly split between two leagues: Japan's Central League and Pacific League; Cuba's West League and East League. In the National and Central leagues, the pitcher is required to bat, per the traditional rules. In the American, Pacific, and both Cuban leagues, there is a tenth player, a designated hitter, who bats for the pitcher. Each top-level team has a farm system of one or more minor league teams.
|
32
|
+
EOF
|
33
|
+
|
34
|
+
subject.train('racquetball', <<-EOF)
|
35
|
+
Racquetball is a racquet sport played with a hollow rubber ball in an indoor or outdoor court. Joseph Sobek[1] is credited with inventing the modern sport of racquetball in 1950 (the outdoor, one-wall game goes back to at least 1910 in N.Y.C.),[2] adding a stringed racquet to paddleball in order to increase velocity and control. Unlike most racquet sports, such as tennis and badminton, there is no net to hit the ball over, and unlike squash no tin (out of bounds area at the bottom of front wall) to hit the ball above. Also, the court's walls, floor, and ceiling are legal playing surfaces, with the exception of court-specific designated hinders being out-of-bounds.[3] It is very similar to 40x20 handball, which is played in many countries.
|
36
|
+
EOF
|
37
|
+
|
38
|
+
subject.train('football', <<-EOF)
|
39
|
+
Football refers to a number of sports that involve, to varying degrees, kicking a ball with the foot to score a goal. The most popular of these sports worldwide is association football, more commonly known as just "football" or "soccer". Unqualified, the word football applies to whichever form of football is the most popular in the regional context in which the word appears, including association football, as well as American football, Australian rules football, Canadian football, Gaelic football, rugby league, rugby union[1] and other related games. These variations of football are known as football "codes".
|
40
|
+
Various forms of 'football' can be identified in history, often as popular peasant games. Contemporary codes of football can be traced back to the codification of these games at English public schools in the eighteenth and nineteenth century.[2][3] The influence and power of the British Empire allowed these rules of football to spread, including to areas of British influence outside of the directly controlled Empire,[4] though by the end of the nineteenth century, distinct regional codes were already developing: Gaelic Football, for example, deliberately incorporated the rules of local traditional football games in order to maintain their heritage.[5] In 1888, The Football League was founded in England, becoming the first of many professional football competitions. In the twentieth century, the various codes of football have become amongst the most popular team sports in the world.[6]
|
41
|
+
EOF
|
42
|
+
|
43
|
+
subject.train('football', 'field goal')
|
44
|
+
|
45
|
+
subject.classify('the shot did not count because he was traveling').must_equal 'basketball'
|
46
|
+
subject.classify('I want to play Major League Baseball some day').must_equal 'baseball'
|
47
|
+
subject.classify('Hitting a ball made of rubber').must_equal 'racquetball'
|
48
|
+
subject.classify('The winning team is kicking butt. They always make the ball go in the hoop every time').must_equal 'basketball'
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Bayesball
|
4
|
+
module Persistence
|
5
|
+
describe Mongo do
|
6
|
+
let(:persistence) { Mongo.new(MONGO_URI) }
|
7
|
+
|
8
|
+
it 'should return counts' do
|
9
|
+
persistence['truck'].wont_be_nil
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should set counts' do
|
13
|
+
persistence['car'] = {'x' => 3, 'y' => 2}
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should reduce' do
|
17
|
+
persistence['duck'] = {'x' => 8, 'y' => 5}
|
18
|
+
persistence['cat'] = {'x' => 7, 'y' => 1}
|
19
|
+
persistence['dog'] = {'x' => 2, 'y' => 2}
|
20
|
+
|
21
|
+
result = persistence.reduce({}) do |memo, (category, counts)|
|
22
|
+
total = counts.values.reduce(:+).to_f
|
23
|
+
memo[category] = total
|
24
|
+
memo
|
25
|
+
end
|
26
|
+
result['duck'].must_equal 13
|
27
|
+
result['cat'].must_equal 8
|
28
|
+
result['dog'].must_equal 4
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should return enumerator from each' do
|
32
|
+
persistence.each.class.must_equal Enumerator
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bayesball
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jason Staten
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-04-09 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: A bayes classifier
|
15
|
+
email:
|
16
|
+
- jstaten07@gmail.com
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- .gitignore
|
22
|
+
- Gemfile
|
23
|
+
- Guardfile
|
24
|
+
- LICENSE
|
25
|
+
- README.md
|
26
|
+
- Rakefile
|
27
|
+
- bayesball.gemspec
|
28
|
+
- lib/bayesball.rb
|
29
|
+
- lib/bayesball/classifier.rb
|
30
|
+
- lib/bayesball/persistence.rb
|
31
|
+
- lib/bayesball/persistence/mongo.rb
|
32
|
+
- lib/bayesball/stopwords.txt
|
33
|
+
- lib/bayesball/version.rb
|
34
|
+
- spec/bayesball/classifier_spec.rb
|
35
|
+
- spec/bayesball/persistence/mongo_spec.rb
|
36
|
+
- spec/spec_helper.rb
|
37
|
+
homepage: ''
|
38
|
+
licenses: []
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options: []
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
none: false
|
45
|
+
requirements:
|
46
|
+
- - ! '>='
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
segments:
|
50
|
+
- 0
|
51
|
+
hash: -3818961451437177514
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
hash: -3818961451437177514
|
61
|
+
requirements: []
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 1.8.18
|
64
|
+
signing_key:
|
65
|
+
specification_version: 3
|
66
|
+
summary: A bayes classifier
|
67
|
+
test_files:
|
68
|
+
- spec/bayesball/classifier_spec.rb
|
69
|
+
- spec/bayesball/persistence/mongo_spec.rb
|
70
|
+
- spec/spec_helper.rb
|