classifier 1.4.1 → 1.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/classifier/bayes.rb +2 -1
- data/lib/classifier/extensions/string.rb +3 -1
- data/lib/classifier/extensions/word_hash.rb +84 -82
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e2d12a6941acf386b0567d5f504d20bffad8486111675977446867c6caf5e865
|
4
|
+
data.tar.gz: b44dca735ec32321183dc9291f339e68ef115af145d0d9ec78c767e9b3e132b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a37d6482fac59b1b6d3cf1c22f0144a08e580ab4ef681cb01189c266fa3de6d6a11668dfd2f1175db0f9d587c01302570be1814a457d2b05e2a9a72d9b9b975
|
7
|
+
data.tar.gz: b9a62dc7243527ae95cd89f946d1caf30ca0c2f52527a34427b4dbe68698b920dce8644b0ffd4f34cba4d646f2a17d8711698c096062b0596ed9228885bd822b
|
data/lib/classifier/bayes.rb
CHANGED
@@ -152,10 +152,11 @@ module Classifier
|
|
152
152
|
category = category.prepare_category_name
|
153
153
|
raise StandardError, "No such category: #{category}" unless @categories.key?(category)
|
154
154
|
|
155
|
+
@total_words -= @category_word_count[category].to_i
|
156
|
+
|
155
157
|
@categories.delete(category)
|
156
158
|
@category_counts.delete(category)
|
157
159
|
@category_word_count.delete(category)
|
158
|
-
@total_words -= @category_word_count[category].to_i
|
159
160
|
end
|
160
161
|
end
|
161
162
|
end
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# Copyright:: Copyright (c) 2005 Lucas Carlson
|
3
3
|
# License:: LGPL
|
4
4
|
|
5
|
+
require 'set'
|
6
|
+
|
5
7
|
# These are extensions to the String class to provide convenience
|
6
8
|
# methods for the Classifier package.
|
7
9
|
class String
|
@@ -45,86 +47,86 @@ class String
|
|
45
47
|
d
|
46
48
|
end
|
47
49
|
|
48
|
-
CORPUS_SKIP_WORDS = Set.new(%w[
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
50
|
+
CORPUS_SKIP_WORDS = ::Set.new(%w[
|
51
|
+
a
|
52
|
+
again
|
53
|
+
all
|
54
|
+
along
|
55
|
+
are
|
56
|
+
also
|
57
|
+
an
|
58
|
+
and
|
59
|
+
as
|
60
|
+
at
|
61
|
+
but
|
62
|
+
by
|
63
|
+
came
|
64
|
+
can
|
65
|
+
cant
|
66
|
+
couldnt
|
67
|
+
did
|
68
|
+
didn
|
69
|
+
didnt
|
70
|
+
do
|
71
|
+
doesnt
|
72
|
+
dont
|
73
|
+
ever
|
74
|
+
first
|
75
|
+
from
|
76
|
+
have
|
77
|
+
her
|
78
|
+
here
|
79
|
+
him
|
80
|
+
how
|
81
|
+
i
|
82
|
+
if
|
83
|
+
in
|
84
|
+
into
|
85
|
+
is
|
86
|
+
isnt
|
87
|
+
it
|
88
|
+
itll
|
89
|
+
just
|
90
|
+
last
|
91
|
+
least
|
92
|
+
like
|
93
|
+
most
|
94
|
+
my
|
95
|
+
new
|
96
|
+
no
|
97
|
+
not
|
98
|
+
now
|
99
|
+
of
|
100
|
+
on
|
101
|
+
or
|
102
|
+
should
|
103
|
+
sinc
|
104
|
+
so
|
105
|
+
some
|
106
|
+
th
|
107
|
+
than
|
108
|
+
this
|
109
|
+
that
|
110
|
+
the
|
111
|
+
their
|
112
|
+
then
|
113
|
+
those
|
114
|
+
to
|
115
|
+
told
|
116
|
+
too
|
117
|
+
true
|
118
|
+
try
|
119
|
+
until
|
120
|
+
url
|
121
|
+
us
|
122
|
+
were
|
123
|
+
when
|
124
|
+
whether
|
125
|
+
while
|
126
|
+
with
|
127
|
+
within
|
128
|
+
yes
|
129
|
+
you
|
130
|
+
youll
|
131
|
+
])
|
130
132
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lucas Carlson
|
@@ -16,14 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.0
|
19
|
+
version: '1.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.0
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mutex_m
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.2'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.2'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rake
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|