jekyll_ranked_search 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll_ranked_search.rb +9 -13
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: de4d3c57baffb900a2d63699eb24a6f625ad620d0a5b11601bae89c38259ac77
|
4
|
+
data.tar.gz: 8673a43723f4a2a3c26e3f9b7e0bc3ad8c468775ab2a619761fdac66d3de8223
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 763bda3f5f7be27375fd589a68a1fae591d189aebf7618965eeb3a46ca40538e81495b04621821f8a71a37fdbfac152bbac3f6e2b4232c1b3bf335b389c8788c
|
7
|
+
data.tar.gz: a6ee82171745c1d46e1ea3dba7f74985f0ff00d4ff84f569e41a888f7b95f920e74195bad8a5fb253312b583b4ef4df3c349e58cbf1eae4302ff327bce2fe1e0
|
data/lib/jekyll_ranked_search.rb
CHANGED
@@ -135,31 +135,27 @@ class TfidfConverter < Jekyll::Generator
|
|
135
135
|
# Split document into tokens
|
136
136
|
splitted_doc = doc.strip.downcase.split
|
137
137
|
|
138
|
-
# Remove
|
139
|
-
splitted_doc.delete_if do |word|
|
140
|
-
if @stopwords.include?(word)
|
141
|
-
Jekyll.logger.debug "Removing stopword:", word
|
142
|
-
end
|
143
|
-
@stopwords.include?(word)
|
144
|
-
end
|
145
|
-
|
146
|
-
# Remove special characters (only at beginning and end)
|
138
|
+
# Remove special characters
|
147
139
|
splitted_doc.map! { |word| word.gsub(/[^a-z0-9_\/\-\s]/i, '') }
|
148
140
|
|
141
|
+
# Remove stopwords in place
|
142
|
+
splitted_doc.delete_if { |t| @stopwords.include? t }
|
143
|
+
|
149
144
|
splitted_doc
|
150
145
|
end
|
151
146
|
|
152
147
|
# Load english stopwords from file
|
153
|
-
# @return [
|
148
|
+
# @return [Set<String>] the stopwords
|
154
149
|
def load_stopwords
|
155
|
-
|
150
|
+
filename = File.join(File.dirname(__FILE__), "stopwords/en.txt")
|
151
|
+
Jekyll.logger.info "Loading stopwords: ", filename
|
156
152
|
stopwords = Set.new
|
157
|
-
File.open(
|
153
|
+
File.open(filename, "r") do |f|
|
158
154
|
f.each_line do |line|
|
159
155
|
stopwords.add line.strip
|
160
156
|
end
|
161
157
|
end
|
162
|
-
Jekyll.logger.info "
|
158
|
+
Jekyll.logger.info "Loaded #{stopwords.length} stopwords"
|
163
159
|
stopwords
|
164
160
|
end
|
165
161
|
|