jekyll_ranked_search 0.0.5 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 285d83435623362b1b2a64895f39e9083a23e994f44772b8d7d0ee119bb0a2a8
4
- data.tar.gz: 3f87f6708fb4bb8070ffbf22a388345ec0ead46c251aa94b7828d3cb8e0653d6
3
+ metadata.gz: de4d3c57baffb900a2d63699eb24a6f625ad620d0a5b11601bae89c38259ac77
4
+ data.tar.gz: 8673a43723f4a2a3c26e3f9b7e0bc3ad8c468775ab2a619761fdac66d3de8223
5
5
  SHA512:
6
- metadata.gz: c5b400d6ad614276e3f325923c7a0ba56a78fc216e4eeaf26acb5fd070a89e90b41e995c16760f337c983b1902d892d8f96bfdba1312000e8f9bfff71104cb00
7
- data.tar.gz: 132c37490020d2205f526ae84c0f190236e265ac19f33f12abb4654d0350547055e0549517e3df554b5a46d9271f43e80556dafbeadf5fd98f79c4992a126d70
6
+ metadata.gz: 763bda3f5f7be27375fd589a68a1fae591d189aebf7618965eeb3a46ca40538e81495b04621821f8a71a37fdbfac152bbac3f6e2b4232c1b3bf335b389c8788c
7
+ data.tar.gz: a6ee82171745c1d46e1ea3dba7f74985f0ff00d4ff84f569e41a888f7b95f920e74195bad8a5fb253312b583b4ef4df3c349e58cbf1eae4302ff327bce2fe1e0
@@ -122,32 +122,40 @@ class TfidfConverter < Jekyll::Generator
122
122
  site.data['tfidf'] = tfidf.to_json
123
123
  end
124
124
 
125
+ # Tokenize document by removing special characters and splitting
126
+ # the document into tokens.
127
+ # @param [String] doc The document to tokenize
128
+ # @return [Array<String>] individual tokens/words
125
129
  def tokenize_words(doc)
126
130
  # Remove stopwords from document
127
131
  @stopwords ||= self.load_stopwords
128
132
 
133
+ # TODO: Remove Liquid tags via regex
134
+
129
135
  # Split document into tokens
130
136
  splitted_doc = doc.strip.downcase.split
131
137
 
132
- # Remove stopwords in place
133
- splitted_doc.delete_if { |word| @stopwords.include?(word) }
134
-
135
- # Remove special characters (only at beginning and end)
138
+ # Remove special characters
136
139
  splitted_doc.map! { |word| word.gsub(/[^a-z0-9_\/\-\s]/i, '') }
137
140
 
141
+ # Remove stopwords in place
142
+ splitted_doc.delete_if { |t| @stopwords.include? t }
143
+
138
144
  splitted_doc
139
145
  end
140
146
 
141
- # Load stopwords from file
147
+ # Load english stopwords from file
148
+ # @return [Set<String>] the stopwords
142
149
  def load_stopwords
143
- Jekyll.logger.info "Loading stopwords"
150
+ filename = File.join(File.dirname(__FILE__), "stopwords/en.txt")
151
+ Jekyll.logger.info "Loading stopwords: ", filename
144
152
  stopwords = Set.new
145
- File.open(File.join(File.dirname(__FILE__), "stopwords.txt"), "r") do |f|
153
+ File.open(filename, "r") do |f|
146
154
  f.each_line do |line|
147
155
  stopwords.add line.strip
148
156
  end
149
157
  end
150
- Jekyll.logger.info "Done loading #{stopwords.length} stopwords"
158
+ Jekyll.logger.info "Loaded #{stopwords.length} stopwords"
151
159
  stopwords
152
160
  end
153
161
 
@@ -2,8 +2,6 @@
2
2
  'tis
3
3
  'twas
4
4
  've
5
- 10
6
- 39
7
5
  a
8
6
  a's
9
7
  able
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll_ranked_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Friedrich Ewald
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-07-28 00:00:00.000000000 Z
11
+ date: 2024-01-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redcarpet
@@ -24,7 +24,10 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '3.6'
27
- description: Offline search plugin for Jekyll posts using TF-IDF
27
+ description: |
28
+ A webcomponent based search box that provides search functionality for your Jekyll blog.
29
+
30
+ If you have any feedback or suggestions for improvement, please open an issue on Github.
28
31
  email: freddiemailster@gmail.com
29
32
  executables: []
30
33
  extensions: []
@@ -33,7 +36,7 @@ files:
33
36
  - lib/jekyll_ranked_search.rb
34
37
  - lib/search.js
35
38
  - lib/search.json
36
- - lib/stopwords.txt
39
+ - lib/stopwords/en.txt
37
40
  homepage: https://github.com/f-ewald/jekyll_ranked_search
38
41
  licenses:
39
42
  - MIT