jekyll_ranked_search 0.0.5 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll_ranked_search.rb +16 -8
- data/lib/{stopwords.txt → stopwords/en.txt} +0 -2
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: de4d3c57baffb900a2d63699eb24a6f625ad620d0a5b11601bae89c38259ac77
|
4
|
+
data.tar.gz: 8673a43723f4a2a3c26e3f9b7e0bc3ad8c468775ab2a619761fdac66d3de8223
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 763bda3f5f7be27375fd589a68a1fae591d189aebf7618965eeb3a46ca40538e81495b04621821f8a71a37fdbfac152bbac3f6e2b4232c1b3bf335b389c8788c
|
7
|
+
data.tar.gz: a6ee82171745c1d46e1ea3dba7f74985f0ff00d4ff84f569e41a888f7b95f920e74195bad8a5fb253312b583b4ef4df3c349e58cbf1eae4302ff327bce2fe1e0
|
data/lib/jekyll_ranked_search.rb
CHANGED
@@ -122,32 +122,40 @@ class TfidfConverter < Jekyll::Generator
|
|
122
122
|
site.data['tfidf'] = tfidf.to_json
|
123
123
|
end
|
124
124
|
|
125
|
+
# Tokenize document by removing special characters and splitting
|
126
|
+
# the document into tokens.
|
127
|
+
# @param [String] doc The document to tokenize
|
128
|
+
# @return [Array<String>] individual tokens/words
|
125
129
|
def tokenize_words(doc)
|
126
130
|
# Remove stopwords from document
|
127
131
|
@stopwords ||= self.load_stopwords
|
128
132
|
|
133
|
+
# TODO: Remove Liquid tags via regex
|
134
|
+
|
129
135
|
# Split document into tokens
|
130
136
|
splitted_doc = doc.strip.downcase.split
|
131
137
|
|
132
|
-
# Remove
|
133
|
-
splitted_doc.delete_if { |word| @stopwords.include?(word) }
|
134
|
-
|
135
|
-
# Remove special characters (only at beginning and end)
|
138
|
+
# Remove special characters
|
136
139
|
splitted_doc.map! { |word| word.gsub(/[^a-z0-9_\/\-\s]/i, '') }
|
137
140
|
|
141
|
+
# Remove stopwords in place
|
142
|
+
splitted_doc.delete_if { |t| @stopwords.include? t }
|
143
|
+
|
138
144
|
splitted_doc
|
139
145
|
end
|
140
146
|
|
141
|
-
# Load stopwords from file
|
147
|
+
# Load english stopwords from file
|
148
|
+
# @return [Set<String>] the stopwords
|
142
149
|
def load_stopwords
|
143
|
-
|
150
|
+
filename = File.join(File.dirname(__FILE__), "stopwords/en.txt")
|
151
|
+
Jekyll.logger.info "Loading stopwords: ", filename
|
144
152
|
stopwords = Set.new
|
145
|
-
File.open(
|
153
|
+
File.open(filename, "r") do |f|
|
146
154
|
f.each_line do |line|
|
147
155
|
stopwords.add line.strip
|
148
156
|
end
|
149
157
|
end
|
150
|
-
Jekyll.logger.info "
|
158
|
+
Jekyll.logger.info "Loaded #{stopwords.length} stopwords"
|
151
159
|
stopwords
|
152
160
|
end
|
153
161
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll_ranked_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Friedrich Ewald
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redcarpet
|
@@ -24,7 +24,10 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '3.6'
|
27
|
-
description:
|
27
|
+
description: |
|
28
|
+
A webcomponent based search box that provides search functionality for your Jekyll blog.
|
29
|
+
|
30
|
+
If you have any feedback or suggestions for improvement, please open an issue on Github.
|
28
31
|
email: freddiemailster@gmail.com
|
29
32
|
executables: []
|
30
33
|
extensions: []
|
@@ -33,7 +36,7 @@ files:
|
|
33
36
|
- lib/jekyll_ranked_search.rb
|
34
37
|
- lib/search.js
|
35
38
|
- lib/search.json
|
36
|
-
- lib/stopwords.txt
|
39
|
+
- lib/stopwords/en.txt
|
37
40
|
homepage: https://github.com/f-ewald/jekyll_ranked_search
|
38
41
|
licenses:
|
39
42
|
- MIT
|