classifier 1.4.2 → 1.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/classifier/extensions/string.rb +3 -1
- data/lib/classifier/extensions/word_hash.rb +84 -82
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e2d12a6941acf386b0567d5f504d20bffad8486111675977446867c6caf5e865
|
4
|
+
data.tar.gz: b44dca735ec32321183dc9291f339e68ef115af145d0d9ec78c767e9b3e132b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a37d6482fac59b1b6d3cf1c22f0144a08e580ab4ef681cb01189c266fa3de6d6a11668dfd2f1175db0f9d587c01302570be1814a457d2b05e2a9a72d9b9b975
|
7
|
+
data.tar.gz: b9a62dc7243527ae95cd89f946d1caf30ca0c2f52527a34427b4dbe68698b920dce8644b0ffd4f34cba4d646f2a17d8711698c096062b0596ed9228885bd822b
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# Copyright:: Copyright (c) 2005 Lucas Carlson
|
3
3
|
# License:: LGPL
|
4
4
|
|
5
|
+
require 'set'
|
6
|
+
|
5
7
|
# These are extensions to the String class to provide convenience
|
6
8
|
# methods for the Classifier package.
|
7
9
|
class String
|
@@ -45,86 +47,86 @@ class String
|
|
45
47
|
d
|
46
48
|
end
|
47
49
|
|
48
|
-
CORPUS_SKIP_WORDS = Set.new(%w[
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
50
|
+
CORPUS_SKIP_WORDS = ::Set.new(%w[
|
51
|
+
a
|
52
|
+
again
|
53
|
+
all
|
54
|
+
along
|
55
|
+
are
|
56
|
+
also
|
57
|
+
an
|
58
|
+
and
|
59
|
+
as
|
60
|
+
at
|
61
|
+
but
|
62
|
+
by
|
63
|
+
came
|
64
|
+
can
|
65
|
+
cant
|
66
|
+
couldnt
|
67
|
+
did
|
68
|
+
didn
|
69
|
+
didnt
|
70
|
+
do
|
71
|
+
doesnt
|
72
|
+
dont
|
73
|
+
ever
|
74
|
+
first
|
75
|
+
from
|
76
|
+
have
|
77
|
+
her
|
78
|
+
here
|
79
|
+
him
|
80
|
+
how
|
81
|
+
i
|
82
|
+
if
|
83
|
+
in
|
84
|
+
into
|
85
|
+
is
|
86
|
+
isnt
|
87
|
+
it
|
88
|
+
itll
|
89
|
+
just
|
90
|
+
last
|
91
|
+
least
|
92
|
+
like
|
93
|
+
most
|
94
|
+
my
|
95
|
+
new
|
96
|
+
no
|
97
|
+
not
|
98
|
+
now
|
99
|
+
of
|
100
|
+
on
|
101
|
+
or
|
102
|
+
should
|
103
|
+
sinc
|
104
|
+
so
|
105
|
+
some
|
106
|
+
th
|
107
|
+
than
|
108
|
+
this
|
109
|
+
that
|
110
|
+
the
|
111
|
+
their
|
112
|
+
then
|
113
|
+
those
|
114
|
+
to
|
115
|
+
told
|
116
|
+
too
|
117
|
+
true
|
118
|
+
try
|
119
|
+
until
|
120
|
+
url
|
121
|
+
us
|
122
|
+
were
|
123
|
+
when
|
124
|
+
whether
|
125
|
+
while
|
126
|
+
with
|
127
|
+
within
|
128
|
+
yes
|
129
|
+
you
|
130
|
+
youll
|
131
|
+
])
|
130
132
|
end
|