jekyll-meilisearch 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d924f1a1f32890e0bcce4fc23c6c93e518a9f58db20e297d0ee4e15836faaade
4
- data.tar.gz: 72c34b6ed21590f870169ae75607001fa503bd22b071e2394a644b2483855a36
3
+ metadata.gz: b88f29e6292e57d37582a005ccd165d9916bead6896ec325effc4f7122a841dc
4
+ data.tar.gz: 44cc18269f75390ebb9849c2a6eee43f8441a1046b90379697171a10e8cb070b
5
5
  SHA512:
6
- metadata.gz: 1871cdbe57d39c430d6b726b9aeede35915726ae3d857b2b207092bea8819d912f6f34db05f165fd8bb0167f66ba666328fea1d772d4defa36b69c3b925f38d5
7
- data.tar.gz: a4fc047ff2c6261c14cbf7517c6a02ce2267024405212795c4c87d087b976daba0a826afd1526219632b727dd20ddd13d5f30c311453fe6c775b97f14043ab63
6
+ metadata.gz: fc89972661de0f3c9163fd9086bc60b8a28500745514f15f5155ef65265d4c7aeb6210d3fb64d62e58730c7d9a4b080b2a7b145717119a59746f86429330a80e
7
+ data.tar.gz: c3f56c3d650ab6038247eba09c25896e21e70c387d459a6a6fb2e5373469fe936a7c917996406c14379bb8ab16aa7f560e76870bea0459b5ff30aa0aa4a057a3
data/README.md ADDED
@@ -0,0 +1,133 @@
1
+ # Jekyll Meilisearch Plugin
2
+
3
+ A Jekyll plugin that indexes your site’s content into Meilisearch, a fast and lightweight search engine. This plugin supports incremental indexing, ensuring efficient updates by only syncing changes between your Jekyll site and Meilisearch.
4
+
5
+ [![Continuous Integration](https://github.com/unicolored/jekyll-meilisearch/actions/workflows/ruby.yml/badge.svg)](https://github.com/unicolored/jekyll-meilisearch/actions/workflows/ruby.yml) [![Gem Version](https://badge.fury.io/rb/jekyll-meilisearch.svg)](https://badge.fury.io/rb/jekyll-meilisearch)
6
+
7
+ ## Features
8
+ - Indexes Jekyll collections (e.g., posts, pages) into Meilisearch.
9
+ - Incremental updates: adds new documents, deletes obsolete ones, and skips unchanged content.
10
+ - Configurable via _config.yml: customize fields, collections, and ID formats.
11
+ - Robust error handling with retries and fallback to full indexing if needed.
12
+ - Pagination support for large sites.
13
+
14
+ ## Installation
15
+
16
+ Add the gem to your Jekyll site’s Gemfile:
17
+
18
+ ```ruby
19
+ gem "jekyll-meilisearch"
20
+ ```
21
+
22
+ And then add this line to your site's `_config.yml`:
23
+
24
+ ```yml
25
+ plugins:
26
+ - jekyll-meilisearch
27
+ ```
28
+
29
+ ## Configuration
30
+ Add the following to your Jekyll _config.yml (or a separate config file like _config.prod.yml):
31
+
32
+ ```yaml
33
+ meilisearch:
34
+ url: "http://localhost:7700" # Your Meilisearch instance URL
35
+ api_key: "your-api-key" # Meilisearch API key
36
+ index_name: "my_site" # Optional: defaults to "jekyll_documents"
37
+ collections:
38
+ posts:
39
+ fields: ["title", "content", "url", "date"] # Fields to index
40
+ id_format: "default" # Optional: "default" or "path"
41
+ pages:
42
+ fields: ["title", "content", "url"]
43
+ ```
44
+
45
+ ## Configuration Options
46
+ * `url`: The Meilisearch server URL (required).
47
+ * `api_key`: The Meilisearch API key (required). Recommended: use a dedicated api key for your index, not the admin one.
48
+ * `index_name`: The name of the Meilisearch index (optional, defaults to jekyll_documents).
49
+ * `collections`: A hash of Jekyll collections to index.
50
+ * `fields`: Array of fields to extract from each document (e.g., title, content, url, date).
51
+ * `id_format`: How to generate document IDs:
52
+ * "default" | "id": Uses collection-name-number if a number field exists, otherwise sanitizes the document ID.
53
+ * "url": Uses the document’s URL, sanitized.
54
+ * fallback: if "number" exists, uses "collection_name" + "number"
55
+
56
+ Run your Jekyll build:
57
+
58
+ ```shell
59
+ bundle exec jekyll build
60
+ ```
61
+
62
+ Or with multiple config files:
63
+
64
+ ```shell
65
+ bundle exec jekyll build --config _config.yml,_config.prod.yml
66
+ ```
67
+
68
+ ## Usage
69
+ Ensure Meilisearch is running and accessible at the configured url.
70
+ Configure your _config.yml with the necessary meilisearch settings.
71
+ Build your site. The plugin will:
72
+ - Create the Meilisearch index if it doesn’t exist.
73
+ - Fetch existing documents from Meilisearch.
74
+ - Delete obsolete documents.
75
+ - Index new or updated documents.
76
+ - Logs will output to STDOUT with details about the indexing process.
77
+
78
+ Include the following for adding search to your front :
79
+ ```html
80
+
81
+ <!-- Search Input -->
82
+ <div class="border m-6 mb-6 p-4">
83
+ <input type="text" id="search" class="border p-2 w-full" placeholder="Rechercher...">
84
+ <div id="results" class="mt-2 border p-4">Results will appear here.</div>
85
+ </div>
86
+
87
+ <!-- Meilisearch JS SDK -->
88
+ <script src="https://cdn.jsdelivr.net/npm/meilisearch@0.40.0/dist/bundles/meilisearch.umd.js"></script>
89
+ <script>
90
+ const meilisearchConfig = {
91
+ host: "{{ site.meilisearch.url | default: 'http://localhost:7700' }}",
92
+ apiKey: "{{ site.meilisearch.search_api_key}}"
93
+ };
94
+ const client = new MeiliSearch(meilisearchConfig);
95
+ const index = client.index('{{site.meilisearch.index_name}}');
96
+
97
+ document.getElementById('search').addEventListener('input', async (e) => {
98
+ const query = e.target.value;
99
+ if (query.length < 2) {
100
+ document.getElementById('results').innerHTML = '';
101
+ return;
102
+ }
103
+ try {
104
+ const results = await index.search(query);
105
+ document.getElementById('results').innerHTML = results.hits
106
+ .map(hit => `<p><a href="${hit.url}" class="text-blue-500 hover:underline">${hit.title}</a></p>`)
107
+ .join('');
108
+ } catch (error) {
109
+ console.error('Search error:', error);
110
+ document.getElementById('results').innerHTML = '<p class="text-red-500">Search failed. Please try again.</p>';
111
+ }
112
+ });
113
+ </script>
114
+
115
+ ```
116
+
117
+ ## Skip development
118
+
119
+ Use `disable_in_development: true` if you want to turn off meilisearch indexation when `jekyll.environment == "development"`,
120
+ but don't want to remove the plugin (so you don't accidentally commit the removal). Default value is `false`.
121
+
122
+ ```yml
123
+ meilisearch:
124
+ disable_in_development: true
125
+ ```
126
+
127
+ ## Contributing
128
+
129
+ 1. Fork it (https://github.com/unicolored/jekyll-meilisearch/fork)
130
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
131
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
132
+ 4. Push to the branch (`git push origin my-new-feature`)
133
+ 5. Create a new Pull Request
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllMeilisearch
4
+ class MeilisearchIndexer < Jekyll::Generator
5
+ safe true
6
+ priority :lowest
7
+
8
+ # Main plugin action, called by Jekyll-core
9
+ def generate(site)
10
+ @site = site
11
+ if disabled_in_development?
12
+ Jekyll.logger.info "Jekyll Meilisearch:", "Skipping meilisearch indexation in development"
13
+ return
14
+ end
15
+ Jekyll.logger.info "Starting Meilisearch incremental indexing..."
16
+ return unless validate_config
17
+
18
+ @documents = build_documents
19
+ sync_with_meilisearch
20
+ end
21
+
22
+ private
23
+
24
+ # Returns the plugin's config or an empty hash if not set
25
+ def config
26
+ @config ||= @site.config["meilisearch"] || {}
27
+ end
28
+
29
+ def validate_config
30
+ unless config["url"]
31
+ Jekyll.logger.info "Error: Meilisearch URL not set in config. Skipping indexing."
32
+ return false
33
+ end
34
+ unless config["api_key"]
35
+ Jekyll.logger.info "Error: Meilisearch API key not set in config. Skipping indexing."
36
+ return false
37
+ end
38
+ true
39
+ end
40
+
41
+ def build_headers(api_key)
42
+ {
43
+ "Content-Type" => "application/json",
44
+ "Authorization" => "Bearer #{api_key}",
45
+ }
46
+ end
47
+
48
+ def build_documents
49
+ documents = []
50
+ collections_config = config["collections"] || { "posts" => { "fields" => %w(title content url date) } }
51
+
52
+ collections_config.each do |collection_name, collection_settings|
53
+ collection = @site.collections[collection_name]
54
+ if collection
55
+ Jekyll.logger.info "Processing collection: '#{collection_name}'..."
56
+ fields_to_index = collection_settings["fields"] || %w(title content url date)
57
+ id_format = collection_settings["id_format"] || :default
58
+
59
+ collection_docs = collection.docs.map do |doc|
60
+ sanitized_id = generate_id(doc, collection_name, id_format)
61
+ doc_data = {
62
+ "id" => sanitized_id,
63
+ "content" => doc.content.strip,
64
+ "url" => doc.url,
65
+ }
66
+ fields_to_index.each do |field|
67
+ next if %w(id content url).include?(field)
68
+
69
+ value = doc.data[field]
70
+ doc_data[field] = field == "date" && value ? value.strftime("%Y-%m-%d") : value
71
+ end
72
+ doc_data
73
+ end
74
+ documents.concat(collection_docs)
75
+ else
76
+ Jekyll.logger.info "Warning: Collection '#{collection_name}' not found. Skipping."
77
+ end
78
+ end
79
+
80
+ if documents.empty?
81
+ Jekyll.logger.info "No documents found across configured collections: #{collections_config.keys.join(", ")}. Cleaning up index..."
82
+ end
83
+ documents
84
+ end
85
+
86
+ def generate_id(doc, collection_name, id_format)
87
+ # Helper method to normalize strings
88
+ normalize = lambda do |str|
89
+ str.tr("/", "-")
90
+ .squeeze(%r![^a-zA-Z0-9_-]!, "-").squeeze("-")
91
+ .downcase
92
+ .slice(0, 100)
93
+ end
94
+
95
+ case id_format
96
+ when :default, :id
97
+ normalize.call(doc.id)
98
+ when :url
99
+ normalize.call(doc.url)
100
+ else
101
+ doc.data["number"] ? "#{collection_name}-#{doc.data["number"]}" : normalize.call(doc.id)
102
+ end
103
+ end
104
+
105
+ def sync_with_meilisearch
106
+ headers = build_headers(config["api_key"])
107
+ index_name = config["index_name"] || "jekyll_documents"
108
+ create_index_if_missing(config["url"], index_name, headers)
109
+
110
+ meili_docs = fetch_all_documents(config["url"], index_name, headers)
111
+ if meili_docs.nil?
112
+ Jekyll.logger.info "Failed to fetch existing documents. Falling back to full indexing."
113
+ return full_index(config["url"], index_name, @documents, headers)
114
+ end
115
+
116
+ meili_ids = meili_docs.map { |doc| doc["id"] }
117
+ jekyll_ids = @documents.map { |doc| doc["id"] }
118
+
119
+ delete_obsolete_documents(config["url"], index_name, meili_ids - jekyll_ids, headers)
120
+ index_new_documents(config["url"], index_name, @documents, headers) if @documents.any?
121
+ end
122
+
123
+ def fetch_all_documents(url, index_name, headers)
124
+ documents = []
125
+ offset = 0
126
+ limit = 1000
127
+ loop do
128
+ response = attempt_request(
129
+ lambda {
130
+ HTTParty.get("#{url}/indexes/#{index_name}/documents?limit=#{limit}&offset=#{offset}", :headers => headers,
131
+ :timeout => 30)
132
+ },
133
+ "fetching documents"
134
+ )
135
+ return nil unless response&.success?
136
+
137
+ results = JSON.parse(response.body)["results"]
138
+ documents.concat(results)
139
+ break if results.size < limit
140
+
141
+ offset += limit
142
+ end
143
+ documents
144
+ end
145
+
146
+ def delete_obsolete_documents(url, index_name, ids_to_delete, headers)
147
+ return Jekyll.logger.info "No documents to delete from Meilisearch." if ids_to_delete.empty?
148
+
149
+ Jekyll.logger.info "Deleting #{ids_to_delete.size} obsolete documents from Meilisearch..."
150
+ response = attempt_request(
151
+ lambda {
152
+ HTTParty.post("#{url}/indexes/#{index_name}/documents/delete-batch", :body => ids_to_delete.to_json, :headers => headers,
153
+ :timeout => 30)
154
+ },
155
+ "deleting documents"
156
+ )
157
+ if response&.success?
158
+ Jekyll.logger.info "Delete task queued successfully."
159
+ elsif response
160
+ Jekyll.logger.info "Failed to delete obsolete documents: #{response.code} - #{response.body}"
161
+ end
162
+ end
163
+
164
+ def index_new_documents(url, index_name, documents, headers)
165
+ Jekyll.logger.info "Indexing #{documents.size} documents to Meilisearch..."
166
+ batch_size = 1000
167
+ documents.each_slice(batch_size) do |batch|
168
+ response = attempt_request(
169
+ lambda {
170
+ HTTParty.post("#{url}/indexes/#{index_name}/documents", :body => batch.to_json, :headers => headers, :timeout => 30)
171
+ },
172
+ "indexing documents"
173
+ )
174
+ if response&.code == 202
175
+ if response.body
176
+ task = JSON.parse(response.body)
177
+ Jekyll.logger.info "Task queued: UID #{task["taskUid"]}. Check status at #{url}/tasks/#{task["taskUid"]}"
178
+ else
179
+ Jekyll.logger.info "Task queued (202), but no response body received."
180
+ end
181
+ elsif response.nil?
182
+ Jekyll.logger.info "Failed to queue indexing task: No response received from Meilisearch."
183
+ else
184
+ Jekyll.logger.info "Failed to queue indexing task: #{response.code} - #{response.body}"
185
+ end
186
+ end
187
+ end
188
+
189
+ def create_index_if_missing(url, index_name, headers)
190
+ Jekyll.logger.info "Checking if index '#{index_name}' exists..."
191
+ response = HTTParty.get("#{url}/indexes/#{index_name}", :headers => headers, :timeout => 30)
192
+ return if response.success?
193
+
194
+ if response.code == 404
195
+ Jekyll.logger.info "Index '#{index_name}' not found. Creating it..."
196
+ response = attempt_request(
197
+ -> { HTTParty.post("#{url}/indexes", :body => { "uid" => index_name }.to_json, :headers => headers, :timeout => 30) },
198
+ "creating index"
199
+ )
200
+ if response&.success? || response&.code == 202
201
+ Jekyll.logger.info "Index '#{index_name}' created successfully."
202
+ elsif response
203
+ Jekyll.logger.info "Failed to create index: #{response.code} - #{response.body}"
204
+ end
205
+ else
206
+ Jekyll.logger.info "Error checking index: #{response.code} - #{response.body}"
207
+ end
208
+ end
209
+
210
+ def full_index(url, index_name, documents, headers)
211
+ Jekyll.logger.info "Performing full index reset as fallback..."
212
+ response = attempt_request(
213
+ -> { HTTParty.delete("#{url}/indexes/#{index_name}/documents", :headers => headers, :timeout => 30) },
214
+ "resetting index"
215
+ )
216
+ unless response&.success? || response&.code == 404
217
+ if response.nil?
218
+ Jekyll.logger.info "Failed to reset index: No response received from Meilisearch."
219
+ else
220
+ Jekyll.logger.info "Failed to reset index: #{response.code} - #{response.body}"
221
+ end
222
+ return
223
+ end
224
+
225
+ index_new_documents(url, index_name, documents, headers) if documents.any?
226
+ end
227
+
228
+ def attempt_request(request, action, retries: 3)
229
+ retries.times do |i|
230
+ response = request.call
231
+ return response if response.success? || [202, 404].include?(response.code)
232
+ rescue HTTParty::Error => e
233
+ Jekyll.logger.info "Attempt #{i + 1} failed while #{action}: #{e.message}"
234
+ sleep(2**i) # Exponential backoff
235
+ end
236
+ Jekyll.logger.info "All retries failed for #{action}."
237
+ nil
238
+ end
239
+
240
+ def disabled_in_development?
241
+ config && config["disable_in_development"] && Jekyll.env == "development"
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module Meilisearch
5
+ VERSION = "0.4.0"
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "jekyll"
4
+ require "httparty"
5
+ require "json"
6
+ require "logger"
7
+ require "jekyll-meilisearch/generator"
8
+
9
+ module JekyllMeilisearch
10
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-meilisearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - unicolored
@@ -30,7 +30,7 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '3.0'
33
+ version: '3.7'
34
34
  - - "<"
35
35
  - !ruby/object:Gem::Version
36
36
  version: '5.0'
@@ -40,10 +40,50 @@ dependencies:
40
40
  requirements:
41
41
  - - ">="
42
42
  - !ruby/object:Gem::Version
43
- version: '3.0'
43
+ version: '3.7'
44
44
  - - "<"
45
45
  - !ruby/object:Gem::Version
46
46
  version: '5.0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '2.10'
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 2.10.2
57
+ type: :runtime
58
+ prerelease: false
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - "~>"
62
+ - !ruby/object:Gem::Version
63
+ version: '2.10'
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.10.2
67
+ - !ruby/object:Gem::Dependency
68
+ name: logger
69
+ requirement: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - "~>"
72
+ - !ruby/object:Gem::Version
73
+ version: '1.6'
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: 1.6.6
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '1.6'
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: 1.6.6
47
87
  - !ruby/object:Gem::Dependency
48
88
  name: bundler
49
89
  requirement: !ruby/object:Gem::Requirement
@@ -58,6 +98,20 @@ dependencies:
58
98
  - - "~>"
59
99
  - !ruby/object:Gem::Version
60
100
  version: '2.0'
101
+ - !ruby/object:Gem::Dependency
102
+ name: nokogiri
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: '1.6'
108
+ type: :development
109
+ prerelease: false
110
+ version_requirements: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - "~>"
113
+ - !ruby/object:Gem::Version
114
+ version: '1.6'
61
115
  - !ruby/object:Gem::Dependency
62
116
  name: rake
63
117
  requirement: !ruby/object:Gem::Requirement
@@ -72,15 +126,66 @@ dependencies:
72
126
  - - "~>"
73
127
  - !ruby/object:Gem::Version
74
128
  version: '13.0'
129
+ - !ruby/object:Gem::Dependency
130
+ name: rspec
131
+ requirement: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - "~>"
134
+ - !ruby/object:Gem::Version
135
+ version: '3.0'
136
+ type: :development
137
+ prerelease: false
138
+ version_requirements: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - "~>"
141
+ - !ruby/object:Gem::Version
142
+ version: '3.0'
143
+ - !ruby/object:Gem::Dependency
144
+ name: rubocop-jekyll
145
+ requirement: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - "~>"
148
+ - !ruby/object:Gem::Version
149
+ version: 0.14.0
150
+ type: :development
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ requirements:
154
+ - - "~>"
155
+ - !ruby/object:Gem::Version
156
+ version: 0.14.0
157
+ - !ruby/object:Gem::Dependency
158
+ name: typhoeus
159
+ requirement: !ruby/object:Gem::Requirement
160
+ requirements:
161
+ - - ">="
162
+ - !ruby/object:Gem::Version
163
+ version: '0.7'
164
+ - - "<"
165
+ - !ruby/object:Gem::Version
166
+ version: '2.0'
167
+ type: :development
168
+ prerelease: false
169
+ version_requirements: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0.7'
174
+ - - "<"
175
+ - !ruby/object:Gem::Version
176
+ version: '2.0'
75
177
  description: This plugin incrementally indexes Jekyll collections into Meilisearch
76
178
  for fast search capabilities.
77
179
  email: hello@gilles.dev
78
180
  executables: []
79
181
  extensions: []
80
- extra_rdoc_files: []
182
+ extra_rdoc_files:
183
+ - README.md
81
184
  files:
82
- - lib/jekyll/meilisearch_indexer.rb
83
- - lib/jekyll_meilisearch.rb
185
+ - README.md
186
+ - lib/jekyll-meilisearch.rb
187
+ - lib/jekyll-meilisearch/generator.rb
188
+ - lib/jekyll-meilisearch/version.rb
84
189
  homepage: https://github.com/unicolored/jekyll-meilisearch
85
190
  licenses:
86
191
  - MIT
@@ -1,232 +0,0 @@
1
- require 'httparty'
2
- require 'json'
3
- require 'logger'
4
-
5
- module Jekyll
6
- class MeilisearchIndexer < Generator
7
- def generate(site)
8
- @logger = Logger.new(STDOUT)
9
- @logger.level = Logger::INFO
10
-
11
- log_info("Starting Meilisearch incremental indexing...")
12
- config = load_config(site)
13
- return unless validate_config(config)
14
-
15
- documents = build_documents(site, config)
16
- sync_with_meilisearch(config, documents)
17
- end
18
-
19
- private
20
-
21
- def log_info(message)
22
- @logger.info(message)
23
- end
24
-
25
- def load_config(site)
26
- site.config['meilisearch'] || {}
27
- end
28
-
29
- def validate_config(config)
30
- unless config['url']
31
- log_info("Error: Meilisearch URL not set in config. Skipping indexing.")
32
- return false
33
- end
34
- unless config['api_key']
35
- log_info("Error: Meilisearch API key not set in config. Skipping indexing.")
36
- return false
37
- end
38
- true
39
- end
40
-
41
- def build_headers(api_key)
42
- {
43
- 'Content-Type' => 'application/json',
44
- 'Authorization' => "Bearer #{api_key}"
45
- }
46
- end
47
-
48
- def build_documents(site, config)
49
- documents = []
50
- collections_config = config['collections'] || { 'posts' => { 'fields' => %w[title content url date] } }
51
-
52
- collections_config.each do |collection_name, collection_settings|
53
- collection = site.collections[collection_name]
54
- if collection
55
- log_info("Processing collection: '#{collection_name}'...")
56
- fields_to_index = collection_settings['fields'] || %w[title content url date]
57
- id_format = collection_settings['id_format'] || :default
58
-
59
- collection_docs = collection.docs.map do |doc|
60
- sanitized_id = generate_id(doc, collection_name, id_format)
61
- doc_data = {
62
- 'id' => sanitized_id,
63
- 'content' => doc.content.strip,
64
- 'url' => doc.url
65
- }
66
- fields_to_index.each do |field|
67
- next if %w[id content url].include?(field)
68
- value = doc.data[field]
69
- doc_data[field] = field == 'date' && value ? value.strftime('%Y-%m-%d') : value
70
- end
71
- doc_data
72
- end
73
- documents.concat(collection_docs)
74
- else
75
- log_info("Warning: Collection '#{collection_name}' not found. Skipping.")
76
- end
77
- end
78
-
79
- if documents.empty?
80
- log_info("No documents found across configured collections: #{collections_config.keys.join(', ')}. Cleaning up index...")
81
- end
82
- documents
83
- end
84
-
85
- def generate_id(doc, collection_name, id_format)
86
- # Helper method to normalize strings
87
- normalize = ->(str) do
88
- str.gsub('/', '-')
89
- .gsub(/[^a-zA-Z0-9_-]/, '-')
90
- .gsub(/-+/, '-')
91
- .downcase
92
- .slice(0, 100)
93
- end
94
-
95
- case id_format
96
- when :default, :id
97
- normalize.call(doc.id)
98
- when :url
99
- normalize.call(doc.url)
100
- else
101
- doc.data['number'] ? "#{collection_name}-#{doc.data['number']}" : normalize.call(doc.id)
102
- end
103
- end
104
-
105
- def sync_with_meilisearch(config, documents)
106
- headers = build_headers(config['api_key'])
107
- index_name = config['index_name'] || 'jekyll_documents'
108
- create_index_if_missing(config['url'], index_name, headers)
109
-
110
- meili_docs = fetch_all_documents(config['url'], index_name, headers)
111
- if meili_docs.nil?
112
- log_info("Failed to fetch existing documents. Falling back to full indexing.")
113
- return full_index(config['url'], index_name, documents, headers)
114
- end
115
-
116
- meili_ids = meili_docs.map { |doc| doc['id'] }
117
- jekyll_ids = documents.map { |doc| doc['id'] }
118
-
119
- delete_obsolete_documents(config['url'], index_name, meili_ids - jekyll_ids, headers)
120
- index_new_documents(config['url'], index_name, documents, headers) if documents.any?
121
- end
122
-
123
- def fetch_all_documents(url, index_name, headers)
124
- documents = []
125
- offset = 0
126
- limit = 1000
127
- loop do
128
- response = attempt_request(
129
- -> { HTTParty.get("#{url}/indexes/#{index_name}/documents?limit=#{limit}&offset=#{offset}", headers: headers, timeout: 30) },
130
- "fetching documents"
131
- )
132
- return nil unless response&.success?
133
- results = JSON.parse(response.body)['results']
134
- documents.concat(results)
135
- break if results.size < limit
136
- offset += limit
137
- end
138
- documents
139
- end
140
-
141
- def delete_obsolete_documents(url, index_name, ids_to_delete, headers)
142
- return log_info("No documents to delete from Meilisearch.") if ids_to_delete.empty?
143
-
144
- log_info("Deleting #{ids_to_delete.size} obsolete documents from Meilisearch...")
145
- response = attempt_request(
146
- -> { HTTParty.post("#{url}/indexes/#{index_name}/documents/delete-batch", body: ids_to_delete.to_json, headers: headers, timeout: 30) },
147
- "deleting documents"
148
- )
149
- if response&.success?
150
- log_info("Delete task queued successfully.")
151
- elsif response
152
- log_info("Failed to delete obsolete documents: #{response.code} - #{response.body}")
153
- end
154
- end
155
-
156
- def index_new_documents(url, index_name, documents, headers)
157
- log_info("Indexing #{documents.size} documents to Meilisearch...")
158
- batch_size = 1000
159
- documents.each_slice(batch_size) do |batch|
160
- response = attempt_request(
161
- -> { HTTParty.post("#{url}/indexes/#{index_name}/documents", body: batch.to_json, headers: headers, timeout: 30) },
162
- "indexing documents"
163
- )
164
- if response&.code == 202
165
- if response.body
166
- task = JSON.parse(response.body)
167
- log_info("Task queued: UID #{task['taskUid']}. Check status at #{url}/tasks/#{task['taskUid']}")
168
- else
169
- log_info("Task queued (202), but no response body received.")
170
- end
171
- elsif response.nil?
172
- log_info("Failed to queue indexing task: No response received from Meilisearch.")
173
- else
174
- log_info("Failed to queue indexing task: #{response.code} - #{response.body}")
175
- end
176
- end
177
- end
178
-
179
- def create_index_if_missing(url, index_name, headers)
180
- log_info("Checking if index '#{index_name}' exists...")
181
- response = HTTParty.get("#{url}/indexes/#{index_name}", headers: headers, timeout: 30)
182
- return if response.success?
183
-
184
- if response.code == 404
185
- log_info("Index '#{index_name}' not found. Creating it...")
186
- response = attempt_request(
187
- -> { HTTParty.post("#{url}/indexes", body: { "uid" => index_name }.to_json, headers: headers, timeout: 30) },
188
- "creating index"
189
- )
190
- if response&.success? || response&.code == 202
191
- log_info("Index '#{index_name}' created successfully.")
192
- elsif response
193
- log_info("Failed to create index: #{response.code} - #{response.body}")
194
- end
195
- else
196
- log_info("Error checking index: #{response.code} - #{response.body}")
197
- end
198
- end
199
-
200
- def full_index(url, index_name, documents, headers)
201
- log_info("Performing full index reset as fallback...")
202
- response = attempt_request(
203
- -> { HTTParty.delete("#{url}/indexes/#{index_name}/documents", headers: headers, timeout: 30) },
204
- "resetting index"
205
- )
206
- unless response&.success? || response&.code == 404
207
- if response.nil?
208
- log_info("Failed to reset index: No response received from Meilisearch.")
209
- else
210
- log_info("Failed to reset index: #{response.code} - #{response.body}")
211
- end
212
- return
213
- end
214
-
215
- index_new_documents(url, index_name, documents, headers) if documents.any?
216
- end
217
-
218
- def attempt_request(request, action, retries: 3)
219
- retries.times do |i|
220
- begin
221
- response = request.call
222
- return response if response.success? || [202, 404].include?(response.code)
223
- rescue HTTParty::Error => e
224
- log_info("Attempt #{i + 1} failed while #{action}: #{e.message}")
225
- sleep(2 ** i) # Exponential backoff
226
- end
227
- end
228
- log_info("All retries failed for #{action}.")
229
- nil
230
- end
231
- end
232
- end
@@ -1 +0,0 @@
1
- require 'jekyll/meilisearch_indexer'