classifier 1.4.2 → 1.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/classifier/extensions/string.rb +3 -1
- data/lib/classifier/extensions/word_hash.rb +84 -82
- data/lib/classifier/lsi/content_node.rb +2 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5bbc7714c3f1b5b6bf2b81484e071eb34c5510455d9520f8bd743ffe25a3bb6
|
4
|
+
data.tar.gz: f9236a1e0c086e1bda93645d94ea21f6634b76acbb4c99e62709e1b011311509
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 79596fac37a6591587859335d4dbb280f038c1504a6fba9a48f7f3d5c83c50bee959887a827588d7418503ff141bff81401125604ddc104af35863dd84842e28
|
7
|
+
data.tar.gz: 5cd28357e92c65e10630700a65097350030e401f16d4aacaaf9407e8bd8dd2d200739965bab52d547054a90bfe25ab53935e6544f06ebe3f61d356cc972d8ead
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# Copyright:: Copyright (c) 2005 Lucas Carlson
|
3
3
|
# License:: LGPL
|
4
4
|
|
5
|
+
require 'set'
|
6
|
+
|
5
7
|
# These are extensions to the String class to provide convenience
|
6
8
|
# methods for the Classifier package.
|
7
9
|
class String
|
@@ -45,86 +47,86 @@ class String
|
|
45
47
|
d
|
46
48
|
end
|
47
49
|
|
48
|
-
CORPUS_SKIP_WORDS = Set.new(%w[
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
50
|
+
CORPUS_SKIP_WORDS = ::Set.new(%w[
|
51
|
+
a
|
52
|
+
again
|
53
|
+
all
|
54
|
+
along
|
55
|
+
are
|
56
|
+
also
|
57
|
+
an
|
58
|
+
and
|
59
|
+
as
|
60
|
+
at
|
61
|
+
but
|
62
|
+
by
|
63
|
+
came
|
64
|
+
can
|
65
|
+
cant
|
66
|
+
couldnt
|
67
|
+
did
|
68
|
+
didn
|
69
|
+
didnt
|
70
|
+
do
|
71
|
+
doesnt
|
72
|
+
dont
|
73
|
+
ever
|
74
|
+
first
|
75
|
+
from
|
76
|
+
have
|
77
|
+
her
|
78
|
+
here
|
79
|
+
him
|
80
|
+
how
|
81
|
+
i
|
82
|
+
if
|
83
|
+
in
|
84
|
+
into
|
85
|
+
is
|
86
|
+
isnt
|
87
|
+
it
|
88
|
+
itll
|
89
|
+
just
|
90
|
+
last
|
91
|
+
least
|
92
|
+
like
|
93
|
+
most
|
94
|
+
my
|
95
|
+
new
|
96
|
+
no
|
97
|
+
not
|
98
|
+
now
|
99
|
+
of
|
100
|
+
on
|
101
|
+
or
|
102
|
+
should
|
103
|
+
sinc
|
104
|
+
so
|
105
|
+
some
|
106
|
+
th
|
107
|
+
than
|
108
|
+
this
|
109
|
+
that
|
110
|
+
the
|
111
|
+
their
|
112
|
+
then
|
113
|
+
those
|
114
|
+
to
|
115
|
+
told
|
116
|
+
too
|
117
|
+
true
|
118
|
+
try
|
119
|
+
until
|
120
|
+
url
|
121
|
+
us
|
122
|
+
were
|
123
|
+
when
|
124
|
+
whether
|
125
|
+
while
|
126
|
+
with
|
127
|
+
within
|
128
|
+
yes
|
129
|
+
you
|
130
|
+
youll
|
131
|
+
])
|
130
132
|
end
|
@@ -45,10 +45,11 @@ module Classifier
|
|
45
45
|
|
46
46
|
# Perform the scaling transform
|
47
47
|
total_words = $GSL ? vec.sum : vec.sum_with_identity
|
48
|
+
total_unique_words = vec.count { |word| word != 0 }
|
48
49
|
|
49
50
|
# Perform first-order association transform if this vector has more
|
50
51
|
# than one word in it.
|
51
|
-
if total_words > 1.0
|
52
|
+
if total_words > 1.0 && total_unique_words > 1
|
52
53
|
weighted_total = 0.0
|
53
54
|
|
54
55
|
vec.each do |term|
|