jekyll_ranked_search 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 285d83435623362b1b2a64895f39e9083a23e994f44772b8d7d0ee119bb0a2a8
4
- data.tar.gz: 3f87f6708fb4bb8070ffbf22a388345ec0ead46c251aa94b7828d3cb8e0653d6
3
+ metadata.gz: b7a50962fa0617b6acc80b702c06695a27f2ad056030e80e4c099230f958d39b
4
+ data.tar.gz: c26e47d1f49e7d50edd91c1263dde8a6188b0a2c491bc99d22223f98dc60b9a6
5
5
  SHA512:
6
- metadata.gz: c5b400d6ad614276e3f325923c7a0ba56a78fc216e4eeaf26acb5fd070a89e90b41e995c16760f337c983b1902d892d8f96bfdba1312000e8f9bfff71104cb00
7
- data.tar.gz: 132c37490020d2205f526ae84c0f190236e265ac19f33f12abb4654d0350547055e0549517e3df554b5a46d9271f43e80556dafbeadf5fd98f79c4992a126d70
6
+ metadata.gz: d6f1fa610a8afa056eb0451ea234294044beae405e4375a46e495c83cf6ab697a57f978e394082705f0692057ffb316e53a99d6d80099cfa6cebfdcd15a7df9e
7
+ data.tar.gz: 4557557cbbc06268b6fbe725de5d7cbe31ab72ffaffa3393b46ec9e9c4ba9eee68cff3ba39ce655d3e1c97f420eb152f86246b7eb88eb8333fab32d3ec029e66
@@ -122,15 +122,26 @@ class TfidfConverter < Jekyll::Generator
122
122
  site.data['tfidf'] = tfidf.to_json
123
123
  end
124
124
 
125
+ # Tokenize document by removing special characters and splitting
126
+ # the document into tokens.
127
+ # @param [String] doc The document to tokenize
128
+ # @return [Array<String>] individual tokens/words
125
129
  def tokenize_words(doc)
126
130
  # Remove stopwords from document
127
131
  @stopwords ||= self.load_stopwords
128
132
 
133
+ # TODO: Remove Liquid tags via regex
134
+
129
135
  # Split document into tokens
130
136
  splitted_doc = doc.strip.downcase.split
131
137
 
132
138
  # Remove stopwords in place
133
- splitted_doc.delete_if { |word| @stopwords.include?(word) }
139
+ splitted_doc.delete_if do |word|
140
+ if @stopwords.include?(word)
141
+ Jekyll.logger.debug "Removing stopword:", word
142
+ end
143
+ @stopwords.include?(word)
144
+ end
134
145
 
135
146
  # Remove special characters (only at beginning and end)
136
147
  splitted_doc.map! { |word| word.gsub(/[^a-z0-9_\/\-\s]/i, '') }
@@ -138,11 +149,12 @@ class TfidfConverter < Jekyll::Generator
138
149
  splitted_doc
139
150
  end
140
151
 
141
- # Load stopwords from file
152
+ # Load english stopwords from file
153
+ # @return [Array<String>] the stopwords
142
154
  def load_stopwords
143
155
  Jekyll.logger.info "Loading stopwords"
144
156
  stopwords = Set.new
145
- File.open(File.join(File.dirname(__FILE__), "stopwords.txt"), "r") do |f|
157
+ File.open(File.join(File.dirname(__FILE__), "stopwords/en.txt"), "r") do |f|
146
158
  f.each_line do |line|
147
159
  stopwords.add line.strip
148
160
  end
@@ -2,8 +2,6 @@
2
2
  'tis
3
3
  'twas
4
4
  've
5
- 10
6
- 39
7
5
  a
8
6
  a's
9
7
  able
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll_ranked_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Friedrich Ewald
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-07-28 00:00:00.000000000 Z
11
+ date: 2024-01-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redcarpet
@@ -24,7 +24,10 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '3.6'
27
- description: Offline search plugin for Jekyll posts using TF-IDF
27
+ description: |
28
+ A webcomponent based search box that provides search functionality for your Jekyll blog.
29
+
30
+ If you have any feedback or suggestions for improvement, please open an issue on Github.
28
31
  email: freddiemailster@gmail.com
29
32
  executables: []
30
33
  extensions: []
@@ -33,7 +36,7 @@ files:
33
36
  - lib/jekyll_ranked_search.rb
34
37
  - lib/search.js
35
38
  - lib/search.json
36
- - lib/stopwords.txt
39
+ - lib/stopwords/en.txt
37
40
  homepage: https://github.com/f-ewald/jekyll_ranked_search
38
41
  licenses:
39
42
  - MIT