jekyll_ranked_search 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll_ranked_search.rb +9 -13
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: de4d3c57baffb900a2d63699eb24a6f625ad620d0a5b11601bae89c38259ac77
|
4
|
+
data.tar.gz: 8673a43723f4a2a3c26e3f9b7e0bc3ad8c468775ab2a619761fdac66d3de8223
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 763bda3f5f7be27375fd589a68a1fae591d189aebf7618965eeb3a46ca40538e81495b04621821f8a71a37fdbfac152bbac3f6e2b4232c1b3bf335b389c8788c
|
7
|
+
data.tar.gz: a6ee82171745c1d46e1ea3dba7f74985f0ff00d4ff84f569e41a888f7b95f920e74195bad8a5fb253312b583b4ef4df3c349e58cbf1eae4302ff327bce2fe1e0
|
data/lib/jekyll_ranked_search.rb
CHANGED
@@ -135,31 +135,27 @@ class TfidfConverter < Jekyll::Generator
|
|
135
135
|
# Split document into tokens
|
136
136
|
splitted_doc = doc.strip.downcase.split
|
137
137
|
|
138
|
-
# Remove
|
139
|
-
splitted_doc.delete_if do |word|
|
140
|
-
if @stopwords.include?(word)
|
141
|
-
Jekyll.logger.debug "Removing stopword:", word
|
142
|
-
end
|
143
|
-
@stopwords.include?(word)
|
144
|
-
end
|
145
|
-
|
146
|
-
# Remove special characters (only at beginning and end)
|
138
|
+
# Remove special characters
|
147
139
|
splitted_doc.map! { |word| word.gsub(/[^a-z0-9_\/\-\s]/i, '') }
|
148
140
|
|
141
|
+
# Remove stopwords in place
|
142
|
+
splitted_doc.delete_if { |t| @stopwords.include? t }
|
143
|
+
|
149
144
|
splitted_doc
|
150
145
|
end
|
151
146
|
|
152
147
|
# Load english stopwords from file
|
153
|
-
# @return [
|
148
|
+
# @return [Set<String>] the stopwords
|
154
149
|
def load_stopwords
|
155
|
-
|
150
|
+
filename = File.join(File.dirname(__FILE__), "stopwords/en.txt")
|
151
|
+
Jekyll.logger.info "Loading stopwords: ", filename
|
156
152
|
stopwords = Set.new
|
157
|
-
File.open(
|
153
|
+
File.open(filename, "r") do |f|
|
158
154
|
f.each_line do |line|
|
159
155
|
stopwords.add line.strip
|
160
156
|
end
|
161
157
|
end
|
162
|
-
Jekyll.logger.info "
|
158
|
+
Jekyll.logger.info "Loaded #{stopwords.length} stopwords"
|
163
159
|
stopwords
|
164
160
|
end
|
165
161
|
|