jekyll-meilisearch 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +153 -0
- data/lib/jekyll-meilisearch/generator.rb +245 -0
- data/lib/jekyll-meilisearch/version.rb +7 -0
- data/lib/jekyll-meilisearch.rb +10 -1
- metadata +52 -9
- data/lib/jekyll/meilisearch_indexer.rb +0 -236
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ea5866f93ee6d60be594b225c5c161329787250f6ead78082f141cd723e1e27
|
4
|
+
data.tar.gz: 9e3b6508edcbd98d94d302369246f4984463019e3bb21909e8263b185d3cd072
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55858897684e4042eb808f737d892c902d2c0d9be925fb6421da478224f75a2ceae2af59a699a097c1d6fa9f9641e4ccbf29264b3a60bf86837f4fdc50aac04c
|
7
|
+
data.tar.gz: 7b2ac53a64ef94ea33294c240560b6e1df667d4625e3416ce8221dd1efc43d8e78320caa4665d3fad22dbdad50d8d7d6c69a954c05394d347a352da69312cb1b
|
data/README.md
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
# Jekyll Meilisearch Plugin
|
2
|
+
A Jekyll plugin that indexes your site’s content into Meilisearch, a fast and lightweight search engine. This plugin supports incremental indexing, ensuring efficient updates by only syncing changes between your Jekyll site and Meilisearch.
|
3
|
+
|
4
|
+
## Features
|
5
|
+
- Indexes Jekyll collections (e.g., posts, pages) into Meilisearch.
|
6
|
+
- Incremental updates: adds new documents, deletes obsolete ones, and skips unchanged content.
|
7
|
+
- Configurable via _config.yml: customize fields, collections, and ID formats.
|
8
|
+
- Robust error handling with retries and fallback to full indexing if needed.
|
9
|
+
- Pagination support for large sites.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
Add the gem to your Jekyll site’s Gemfile:
|
13
|
+
|
14
|
+
```shell
|
15
|
+
gem "jekyll-meilisearch", "~> 0.2.0"
|
16
|
+
```
|
17
|
+
|
18
|
+
Then run:
|
19
|
+
|
20
|
+
```shell
|
21
|
+
bundle install
|
22
|
+
```
|
23
|
+
|
24
|
+
Alternatively, install it directly:
|
25
|
+
|
26
|
+
```shell
|
27
|
+
gem install jekyll-meilisearch
|
28
|
+
```
|
29
|
+
|
30
|
+
## Configuration
|
31
|
+
Add the following to your Jekyll _config.yml (or a separate config file like _config.prod.yml):
|
32
|
+
|
33
|
+
```yaml
|
34
|
+
meilisearch:
|
35
|
+
url: "http://localhost:7700" # Your Meilisearch instance URL
|
36
|
+
api_key: "your-api-key" # Meilisearch API key
|
37
|
+
index_name: "my_site" # Optional: defaults to "jekyll_documents"
|
38
|
+
collections:
|
39
|
+
posts:
|
40
|
+
fields: ["title", "content", "url", "date"] # Fields to index
|
41
|
+
id_format: "default" # Optional: "default" or "path"
|
42
|
+
pages:
|
43
|
+
fields: ["title", "content", "url"]
|
44
|
+
```
|
45
|
+
|
46
|
+
## Configuration Options
|
47
|
+
- url: The Meilisearch server URL (required).
|
48
|
+
- api_key: The Meilisearch API key (required). Recommended: use a dedicated api key for your index, not the admin one.
|
49
|
+
- index_name: The name of the Meilisearch index (optional, defaults to jekyll_documents).
|
50
|
+
- collections: A hash of Jekyll collections to index.
|
51
|
+
- fields: Array of fields to extract from each document (e.g., title, content, url, date).
|
52
|
+
- id_format: How to generate document IDs:
|
53
|
+
- "default" | "id": Uses collection-name-number if a number field exists, otherwise sanitizes the document ID.
|
54
|
+
- "url": Uses the document’s URL, sanitized.
|
55
|
+
- fallback: if "number" exists, uses "collection_name" + "number"
|
56
|
+
|
57
|
+
Run your Jekyll build:
|
58
|
+
|
59
|
+
```shell
|
60
|
+
bundle exec jekyll build
|
61
|
+
```
|
62
|
+
|
63
|
+
Or with multiple config files:
|
64
|
+
|
65
|
+
```shell
|
66
|
+
bundle exec jekyll build --config _config.yml,_config.prod.yml
|
67
|
+
```
|
68
|
+
|
69
|
+
## Usage
|
70
|
+
Ensure Meilisearch is running and accessible at the configured url.
|
71
|
+
Configure your _config.yml with the necessary meilisearch settings.
|
72
|
+
Build your site. The plugin will:
|
73
|
+
- Create the Meilisearch index if it doesn’t exist.
|
74
|
+
- Fetch existing documents from Meilisearch.
|
75
|
+
- Delete obsolete documents.
|
76
|
+
- Index new or updated documents.
|
77
|
+
- Logs will output to STDOUT with details about the indexing process.
|
78
|
+
|
79
|
+
Include the following for adding search to your front :
|
80
|
+
```html
|
81
|
+
|
82
|
+
<!-- Search Input -->
|
83
|
+
<div class="border m-6 mb-6 p-4">
|
84
|
+
<input type="text" id="search" class="border p-2 w-full" placeholder="Rechercher...">
|
85
|
+
<div id="results" class="mt-2 border p-4">Results will appear here.</div>
|
86
|
+
</div>
|
87
|
+
|
88
|
+
<!-- Meilisearch JS SDK -->
|
89
|
+
<script src="https://cdn.jsdelivr.net/npm/meilisearch@0.40.0/dist/bundles/meilisearch.umd.js"></script>
|
90
|
+
<script>
|
91
|
+
const meilisearchConfig = {
|
92
|
+
host: "{{ site.meilisearch.url | default: 'http://localhost:7700' }}",
|
93
|
+
apiKey: "{{ site.meilisearch.search_api_key}}"
|
94
|
+
};
|
95
|
+
const client = new MeiliSearch(meilisearchConfig);
|
96
|
+
const index = client.index('{{site.meilisearch.index_name}}');
|
97
|
+
|
98
|
+
document.getElementById('search').addEventListener('input', async (e) => {
|
99
|
+
const query = e.target.value;
|
100
|
+
if (query.length < 2) {
|
101
|
+
document.getElementById('results').innerHTML = '';
|
102
|
+
return;
|
103
|
+
}
|
104
|
+
try {
|
105
|
+
const results = await index.search(query);
|
106
|
+
document.getElementById('results').innerHTML = results.hits
|
107
|
+
.map(hit => `<p><a href="${hit.url}" class="text-blue-500 hover:underline">${hit.title}</a></p>`)
|
108
|
+
.join('');
|
109
|
+
} catch (error) {
|
110
|
+
console.error('Search error:', error);
|
111
|
+
document.getElementById('results').innerHTML = '<p class="text-red-500">Search failed. Please try again.</p>';
|
112
|
+
}
|
113
|
+
});
|
114
|
+
</script>
|
115
|
+
|
116
|
+
```
|
117
|
+
|
118
|
+
## Requirements
|
119
|
+
- Ruby >= 2.7
|
120
|
+
- Jekyll >= 3.0, < 5.0
|
121
|
+
- Meilisearch server (local or hosted)
|
122
|
+
|
123
|
+
## Dependencies:
|
124
|
+
- httparty (for HTTP requests)
|
125
|
+
|
126
|
+
These are automatically installed when you add the gem to your Gemfile.
|
127
|
+
|
128
|
+
## Development
|
129
|
+
To contribute or modify the plugin:
|
130
|
+
|
131
|
+
- Clone the repository: git clone https://github.com/unicolored/jekyll-meilisearch.git cd jekyll-meilisearch
|
132
|
+
- Install dependencies: bundle install
|
133
|
+
- Make changes and test locally: gem build jekyll-meilisearch.gemspec gem install ./jekyll-meilisearch-0.1.0.gem
|
134
|
+
|
135
|
+
## Releasing a New Version
|
136
|
+
- Update the version in jekyll-meilisearch.gemspec.
|
137
|
+
- Build the gem: gem build jekyll-meilisearch.gemspec
|
138
|
+
- Push to RubyGems: gem push jekyll-meilisearch-x.x.x.gem
|
139
|
+
|
140
|
+
## License
|
141
|
+
This project is licensed under the MIT License.
|
142
|
+
|
143
|
+
## Contributing
|
144
|
+
Feel free to open issues or submit pull requests on GitHub.
|
145
|
+
|
146
|
+
```shell
|
147
|
+
bundle install
|
148
|
+
# Update version in ./jekyll-meilisearch.gemspec
|
149
|
+
# Build the gem
|
150
|
+
gem build jekyll-meilisearch.gemspec
|
151
|
+
# Push the gem
|
152
|
+
gem push jekyll-meilisearch-${version}.gem
|
153
|
+
```
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JekyllMeilisearch
|
4
|
+
class MeilisearchIndexer < Jekyll::Generator
|
5
|
+
safe true
|
6
|
+
priority :lowest
|
7
|
+
|
8
|
+
# Main plugin action, called by Jekyll-core
|
9
|
+
def generate(site)
|
10
|
+
@site = site
|
11
|
+
if disabled_in_development?
|
12
|
+
Jekyll.logger.info "Jekyll Meilisearch:", "Skipping meilisearch indexation in development"
|
13
|
+
return
|
14
|
+
end
|
15
|
+
Jekyll.logger.info 'Starting Meilisearch incremental indexing...'
|
16
|
+
return unless validate_config
|
17
|
+
|
18
|
+
@documents = build_documents
|
19
|
+
sync_with_meilisearch
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
# Returns the plugin's config or an empty hash if not set
|
25
|
+
def config
|
26
|
+
@config ||= @site.config["meilisearch"] || {}
|
27
|
+
end
|
28
|
+
|
29
|
+
def validate_config
|
30
|
+
unless config['url']
|
31
|
+
Jekyll.logger.info 'Error: Meilisearch URL not set in config. Skipping indexing.'
|
32
|
+
return false
|
33
|
+
end
|
34
|
+
unless config['api_key']
|
35
|
+
Jekyll.logger.info 'Error: Meilisearch API key not set in config. Skipping indexing.'
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
true
|
39
|
+
end
|
40
|
+
|
41
|
+
def build_headers(api_key)
|
42
|
+
{
|
43
|
+
'Content-Type' => 'application/json',
|
44
|
+
'Authorization' => "Bearer #{api_key}"
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
def build_documents
|
49
|
+
documents = []
|
50
|
+
collections_config = config['collections'] || { 'posts' => { 'fields' => %w[title content url date] } }
|
51
|
+
|
52
|
+
collections_config.each do |collection_name, collection_settings|
|
53
|
+
collection = @site.collections[collection_name]
|
54
|
+
if collection
|
55
|
+
Jekyll.logger.info "Processing collection: '#{collection_name}'..."
|
56
|
+
fields_to_index = collection_settings['fields'] || %w[title content url date]
|
57
|
+
id_format = collection_settings['id_format'] || :default
|
58
|
+
|
59
|
+
collection_docs = collection.docs.map do |doc|
|
60
|
+
sanitized_id = generate_id(doc, collection_name, id_format)
|
61
|
+
doc_data = {
|
62
|
+
'id' => sanitized_id,
|
63
|
+
'content' => doc.content.strip,
|
64
|
+
'url' => doc.url
|
65
|
+
}
|
66
|
+
fields_to_index.each do |field|
|
67
|
+
next if %w[id content url].include?(field)
|
68
|
+
|
69
|
+
value = doc.data[field]
|
70
|
+
doc_data[field] = field == 'date' && value ? value.strftime('%Y-%m-%d') : value
|
71
|
+
end
|
72
|
+
doc_data
|
73
|
+
end
|
74
|
+
documents.concat(collection_docs)
|
75
|
+
else
|
76
|
+
Jekyll.logger.info "Warning: Collection '#{collection_name}' not found. Skipping."
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if documents.empty?
|
81
|
+
Jekyll.logger.info "No documents found across configured collections: #{collections_config.keys.join(', ')}. Cleaning up index..."
|
82
|
+
end
|
83
|
+
documents
|
84
|
+
end
|
85
|
+
|
86
|
+
def generate_id(doc, collection_name, id_format)
|
87
|
+
# Helper method to normalize strings
|
88
|
+
normalize = lambda do |str|
|
89
|
+
str.gsub('/', '-')
|
90
|
+
.gsub(/[^a-zA-Z0-9_-]/, '-')
|
91
|
+
.gsub(/-+/, '-')
|
92
|
+
.downcase
|
93
|
+
.slice(0, 100)
|
94
|
+
end
|
95
|
+
|
96
|
+
case id_format
|
97
|
+
when :default, :id
|
98
|
+
normalize.call(doc.id)
|
99
|
+
when :url
|
100
|
+
normalize.call(doc.url)
|
101
|
+
else
|
102
|
+
doc.data['number'] ? "#{collection_name}-#{doc.data['number']}" : normalize.call(doc.id)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def sync_with_meilisearch
|
107
|
+
headers = build_headers(config['api_key'])
|
108
|
+
index_name = config['index_name'] || 'jekyll_documents'
|
109
|
+
create_index_if_missing(config['url'], index_name, headers)
|
110
|
+
|
111
|
+
meili_docs = fetch_all_documents(config['url'], index_name, headers)
|
112
|
+
if meili_docs.nil?
|
113
|
+
Jekyll.logger.info 'Failed to fetch existing documents. Falling back to full indexing.'
|
114
|
+
return full_index(config['url'], index_name, @documents, headers)
|
115
|
+
end
|
116
|
+
|
117
|
+
meili_ids = meili_docs.map { |doc| doc['id'] }
|
118
|
+
jekyll_ids = @documents.map { |doc| doc['id'] }
|
119
|
+
|
120
|
+
delete_obsolete_documents(config['url'], index_name, meili_ids - jekyll_ids, headers)
|
121
|
+
index_new_documents(config['url'], index_name, @documents, headers) if @documents.any?
|
122
|
+
end
|
123
|
+
|
124
|
+
def fetch_all_documents(url, index_name, headers)
|
125
|
+
documents = []
|
126
|
+
offset = 0
|
127
|
+
limit = 1000
|
128
|
+
loop do
|
129
|
+
response = attempt_request(
|
130
|
+
-> {
|
131
|
+
HTTParty.get("#{url}/indexes/#{index_name}/documents?limit=#{limit}&offset=#{offset}", headers: headers,
|
132
|
+
timeout: 30)
|
133
|
+
},
|
134
|
+
'fetching documents'
|
135
|
+
)
|
136
|
+
return nil unless response&.success?
|
137
|
+
|
138
|
+
results = JSON.parse(response.body)['results']
|
139
|
+
documents.concat(results)
|
140
|
+
break if results.size < limit
|
141
|
+
|
142
|
+
offset += limit
|
143
|
+
end
|
144
|
+
documents
|
145
|
+
end
|
146
|
+
|
147
|
+
def delete_obsolete_documents(url, index_name, ids_to_delete, headers)
|
148
|
+
return Jekyll.logger.info 'No documents to delete from Meilisearch.' if ids_to_delete.empty?
|
149
|
+
|
150
|
+
Jekyll.logger.info "Deleting #{ids_to_delete.size} obsolete documents from Meilisearch..."
|
151
|
+
response = attempt_request(
|
152
|
+
-> {
|
153
|
+
HTTParty.post("#{url}/indexes/#{index_name}/documents/delete-batch", body: ids_to_delete.to_json, headers: headers,
|
154
|
+
timeout: 30)
|
155
|
+
},
|
156
|
+
'deleting documents'
|
157
|
+
)
|
158
|
+
if response&.success?
|
159
|
+
Jekyll.logger.info 'Delete task queued successfully.'
|
160
|
+
elsif response
|
161
|
+
Jekyll.logger.info "Failed to delete obsolete documents: #{response.code} - #{response.body}"
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def index_new_documents(url, index_name, documents, headers)
|
166
|
+
Jekyll.logger.info "Indexing #{documents.size} documents to Meilisearch..."
|
167
|
+
batch_size = 1000
|
168
|
+
documents.each_slice(batch_size) do |batch|
|
169
|
+
response = attempt_request(
|
170
|
+
-> {
|
171
|
+
HTTParty.post("#{url}/indexes/#{index_name}/documents", body: batch.to_json, headers: headers, timeout: 30)
|
172
|
+
},
|
173
|
+
'indexing documents'
|
174
|
+
)
|
175
|
+
if response&.code == 202
|
176
|
+
if response.body
|
177
|
+
task = JSON.parse(response.body)
|
178
|
+
Jekyll.logger.info "Task queued: UID #{task['taskUid']}. Check status at #{url}/tasks/#{task['taskUid']}"
|
179
|
+
else
|
180
|
+
Jekyll.logger.info 'Task queued (202), but no response body received.'
|
181
|
+
end
|
182
|
+
elsif response.nil?
|
183
|
+
Jekyll.logger.info 'Failed to queue indexing task: No response received from Meilisearch.'
|
184
|
+
else
|
185
|
+
Jekyll.logger.info "Failed to queue indexing task: #{response.code} - #{response.body}"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def create_index_if_missing(url, index_name, headers)
|
191
|
+
Jekyll.logger.info "Checking if index '#{index_name}' exists..."
|
192
|
+
response = HTTParty.get("#{url}/indexes/#{index_name}", headers: headers, timeout: 30)
|
193
|
+
return if response.success?
|
194
|
+
|
195
|
+
if response.code == 404
|
196
|
+
Jekyll.logger.info "Index '#{index_name}' not found. Creating it..."
|
197
|
+
response = attempt_request(
|
198
|
+
-> { HTTParty.post("#{url}/indexes", body: { 'uid' => index_name }.to_json, headers: headers, timeout: 30) },
|
199
|
+
'creating index'
|
200
|
+
)
|
201
|
+
if response&.success? || response&.code == 202
|
202
|
+
Jekyll.logger.info "Index '#{index_name}' created successfully."
|
203
|
+
elsif response
|
204
|
+
Jekyll.logger.info "Failed to create index: #{response.code} - #{response.body}"
|
205
|
+
end
|
206
|
+
else
|
207
|
+
Jekyll.logger.info "Error checking index: #{response.code} - #{response.body}"
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def full_index(url, index_name, documents, headers)
|
212
|
+
Jekyll.logger.info 'Performing full index reset as fallback...'
|
213
|
+
response = attempt_request(
|
214
|
+
-> { HTTParty.delete("#{url}/indexes/#{index_name}/documents", headers: headers, timeout: 30) },
|
215
|
+
'resetting index'
|
216
|
+
)
|
217
|
+
unless response&.success? || response&.code == 404
|
218
|
+
if response.nil?
|
219
|
+
Jekyll.logger.info 'Failed to reset index: No response received from Meilisearch.'
|
220
|
+
else
|
221
|
+
Jekyll.logger.info "Failed to reset index: #{response.code} - #{response.body}"
|
222
|
+
end
|
223
|
+
return
|
224
|
+
end
|
225
|
+
|
226
|
+
index_new_documents(url, index_name, documents, headers) if documents.any?
|
227
|
+
end
|
228
|
+
|
229
|
+
def attempt_request(request, action, retries: 3)
|
230
|
+
retries.times do |i|
|
231
|
+
response = request.call
|
232
|
+
return response if response.success? || [202, 404].include?(response.code)
|
233
|
+
rescue HTTParty::Error => e
|
234
|
+
Jekyll.logger.info "Attempt #{i + 1} failed while #{action}: #{e.message}"
|
235
|
+
sleep(2**i) # Exponential backoff
|
236
|
+
end
|
237
|
+
Jekyll.logger.info "All retries failed for #{action}."
|
238
|
+
nil
|
239
|
+
end
|
240
|
+
|
241
|
+
def disabled_in_development?
|
242
|
+
config && config["disable_in_development"] && Jekyll.env == "development"
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
data/lib/jekyll-meilisearch.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-meilisearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- unicolored
|
@@ -10,13 +10,27 @@ bindir: bin
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2025-03-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: httparty
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.21'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.21'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: jekyll
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
16
30
|
requirements:
|
17
31
|
- - ">="
|
18
32
|
- !ruby/object:Gem::Version
|
19
|
-
version: '3.
|
33
|
+
version: '3.7'
|
20
34
|
- - "<"
|
21
35
|
- !ruby/object:Gem::Version
|
22
36
|
version: '5.0'
|
@@ -26,24 +40,50 @@ dependencies:
|
|
26
40
|
requirements:
|
27
41
|
- - ">="
|
28
42
|
- !ruby/object:Gem::Version
|
29
|
-
version: '3.
|
43
|
+
version: '3.7'
|
30
44
|
- - "<"
|
31
45
|
- !ruby/object:Gem::Version
|
32
46
|
version: '5.0'
|
33
47
|
- !ruby/object:Gem::Dependency
|
34
|
-
name:
|
48
|
+
name: json
|
35
49
|
requirement: !ruby/object:Gem::Requirement
|
36
50
|
requirements:
|
37
51
|
- - "~>"
|
38
52
|
- !ruby/object:Gem::Version
|
39
|
-
version: '
|
53
|
+
version: '2.10'
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 2.10.2
|
40
57
|
type: :runtime
|
41
58
|
prerelease: false
|
42
59
|
version_requirements: !ruby/object:Gem::Requirement
|
43
60
|
requirements:
|
44
61
|
- - "~>"
|
45
62
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
63
|
+
version: '2.10'
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 2.10.2
|
67
|
+
- !ruby/object:Gem::Dependency
|
68
|
+
name: logger
|
69
|
+
requirement: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - "~>"
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '1.6'
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 1.6.6
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '1.6'
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 1.6.6
|
47
87
|
- !ruby/object:Gem::Dependency
|
48
88
|
name: bundler
|
49
89
|
requirement: !ruby/object:Gem::Requirement
|
@@ -77,10 +117,13 @@ description: This plugin incrementally indexes Jekyll collections into Meilisear
|
|
77
117
|
email: hello@gilles.dev
|
78
118
|
executables: []
|
79
119
|
extensions: []
|
80
|
-
extra_rdoc_files:
|
120
|
+
extra_rdoc_files:
|
121
|
+
- README.md
|
81
122
|
files:
|
123
|
+
- README.md
|
82
124
|
- lib/jekyll-meilisearch.rb
|
83
|
-
- lib/jekyll/
|
125
|
+
- lib/jekyll-meilisearch/generator.rb
|
126
|
+
- lib/jekyll-meilisearch/version.rb
|
84
127
|
homepage: https://github.com/unicolored/jekyll-meilisearch
|
85
128
|
licenses:
|
86
129
|
- MIT
|
@@ -93,7 +136,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
93
136
|
requirements:
|
94
137
|
- - ">="
|
95
138
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
139
|
+
version: '2.7'
|
97
140
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
141
|
requirements:
|
99
142
|
- - ">="
|
@@ -1,236 +0,0 @@
|
|
1
|
-
require 'httparty'
|
2
|
-
require 'json'
|
3
|
-
require 'logger'
|
4
|
-
|
5
|
-
module Jekyll
|
6
|
-
class MeilisearchIndexer < Generator
|
7
|
-
def generate(site)
|
8
|
-
@logger = Logger.new(STDOUT)
|
9
|
-
@logger.level = Logger::INFO
|
10
|
-
|
11
|
-
log_info("Starting Meilisearch incremental indexing...")
|
12
|
-
config = load_config(site)
|
13
|
-
return unless validate_config(config)
|
14
|
-
|
15
|
-
documents = build_documents(site, config)
|
16
|
-
sync_with_meilisearch(config, documents)
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
def log_info(message)
|
22
|
-
@logger.info(message)
|
23
|
-
end
|
24
|
-
|
25
|
-
def load_config(site)
|
26
|
-
site.config['meilisearch'] || {}
|
27
|
-
end
|
28
|
-
|
29
|
-
def validate_config(config)
|
30
|
-
unless config['url']
|
31
|
-
log_info("Error: Meilisearch URL not set in config. Skipping indexing.")
|
32
|
-
return false
|
33
|
-
end
|
34
|
-
unless config['api_key']
|
35
|
-
log_info("Error: Meilisearch API key not set in config. Skipping indexing.")
|
36
|
-
return false
|
37
|
-
end
|
38
|
-
true
|
39
|
-
end
|
40
|
-
|
41
|
-
def build_headers(api_key)
|
42
|
-
{
|
43
|
-
'Content-Type' => 'application/json',
|
44
|
-
'Authorization' => "Bearer #{api_key}"
|
45
|
-
}
|
46
|
-
end
|
47
|
-
|
48
|
-
def build_documents(site, config)
|
49
|
-
documents = []
|
50
|
-
collections_config = config['collections'] || { 'posts' => { 'fields' => ['title', 'content', 'url', 'date'] } }
|
51
|
-
|
52
|
-
collections_config.each do |collection_name, collection_settings|
|
53
|
-
collection = site.collections[collection_name]
|
54
|
-
if collection
|
55
|
-
log_info("Processing collection: '#{collection_name}'...")
|
56
|
-
fields_to_index = collection_settings['fields'] || ['title', 'content', 'url', 'date']
|
57
|
-
id_format = collection_settings['id_format'] || :default
|
58
|
-
|
59
|
-
collection_docs = collection.docs.map do |doc|
|
60
|
-
sanitized_id = generate_id(doc, collection_name, id_format)
|
61
|
-
doc_data = {
|
62
|
-
'id' => sanitized_id,
|
63
|
-
'content' => doc.content.strip,
|
64
|
-
'url' => doc.url
|
65
|
-
}
|
66
|
-
fields_to_index.each do |field|
|
67
|
-
next if ['id', 'content', 'url'].include?(field)
|
68
|
-
value = doc.data[field]
|
69
|
-
doc_data[field] = field == 'date' && value ? value.strftime('%Y-%m-%d') : value
|
70
|
-
end
|
71
|
-
doc_data
|
72
|
-
end
|
73
|
-
documents.concat(collection_docs)
|
74
|
-
else
|
75
|
-
log_info("Warning: Collection '#{collection_name}' not found. Skipping.")
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
if documents.empty?
|
80
|
-
log_info("No documents found across configured collections: #{collections_config.keys.join(', ')}. Cleaning up index...")
|
81
|
-
end
|
82
|
-
documents
|
83
|
-
end
|
84
|
-
|
85
|
-
def generate_id(doc, collection_name, id_format)
|
86
|
-
case id_format
|
87
|
-
when :default | :id
|
88
|
-
doc.id.gsub('/', '-')
|
89
|
-
.gsub(/[^a-zA-Z0-9_-]/, '-')
|
90
|
-
.gsub(/-+/, '-')
|
91
|
-
.downcase
|
92
|
-
.slice(0, 100)
|
93
|
-
when :url
|
94
|
-
doc.url
|
95
|
-
.gsub('/', '-')
|
96
|
-
.gsub(/[^a-zA-Z0-9_-]/, '-')
|
97
|
-
.gsub(/-+/, '-')
|
98
|
-
.downcase
|
99
|
-
.slice(0, 100)
|
100
|
-
else
|
101
|
-
doc.data['number'] ? "#{collection_name}-#{doc.data['number']}" : doc.id.gsub('/', '-')
|
102
|
-
.gsub(/[^a-zA-Z0-9_-]/, '-')
|
103
|
-
.gsub(/-+/, '-')
|
104
|
-
.downcase
|
105
|
-
.slice(0, 100)
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
def sync_with_meilisearch(config, documents)
|
110
|
-
headers = build_headers(config['api_key'])
|
111
|
-
index_name = config['index_name'] || 'jekyll_documents'
|
112
|
-
create_index_if_missing(config['url'], index_name, headers)
|
113
|
-
|
114
|
-
meili_docs = fetch_all_documents(config['url'], index_name, headers)
|
115
|
-
if meili_docs.nil?
|
116
|
-
log_info("Failed to fetch existing documents. Falling back to full indexing.")
|
117
|
-
return full_index(config['url'], index_name, documents, headers)
|
118
|
-
end
|
119
|
-
|
120
|
-
meili_ids = meili_docs.map { |doc| doc['id'] }
|
121
|
-
jekyll_ids = documents.map { |doc| doc['id'] }
|
122
|
-
|
123
|
-
delete_obsolete_documents(config['url'], index_name, meili_ids - jekyll_ids, headers)
|
124
|
-
index_new_documents(config['url'], index_name, documents, headers) if documents.any?
|
125
|
-
end
|
126
|
-
|
127
|
-
def fetch_all_documents(url, index_name, headers)
|
128
|
-
documents = []
|
129
|
-
offset = 0
|
130
|
-
limit = 1000
|
131
|
-
loop do
|
132
|
-
response = attempt_request(
|
133
|
-
-> { HTTParty.get("#{url}/indexes/#{index_name}/documents?limit=#{limit}&offset=#{offset}", headers: headers, timeout: 30) },
|
134
|
-
"fetching documents"
|
135
|
-
)
|
136
|
-
return nil unless response&.success?
|
137
|
-
results = JSON.parse(response.body)['results']
|
138
|
-
documents.concat(results)
|
139
|
-
break if results.size < limit
|
140
|
-
offset += limit
|
141
|
-
end
|
142
|
-
documents
|
143
|
-
end
|
144
|
-
|
145
|
-
def delete_obsolete_documents(url, index_name, ids_to_delete, headers)
|
146
|
-
return log_info("No documents to delete from Meilisearch.") if ids_to_delete.empty?
|
147
|
-
|
148
|
-
log_info("Deleting #{ids_to_delete.size} obsolete documents from Meilisearch...")
|
149
|
-
response = attempt_request(
|
150
|
-
-> { HTTParty.post("#{url}/indexes/#{index_name}/documents/delete-batch", body: ids_to_delete.to_json, headers: headers, timeout: 30) },
|
151
|
-
"deleting documents"
|
152
|
-
)
|
153
|
-
if response&.success?
|
154
|
-
log_info("Delete task queued successfully.")
|
155
|
-
elsif response
|
156
|
-
log_info("Failed to delete obsolete documents: #{response.code} - #{response.body}")
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
def index_new_documents(url, index_name, documents, headers)
|
161
|
-
log_info("Indexing #{documents.size} documents to Meilisearch...")
|
162
|
-
batch_size = 1000
|
163
|
-
documents.each_slice(batch_size) do |batch|
|
164
|
-
response = attempt_request(
|
165
|
-
-> { HTTParty.post("#{url}/indexes/#{index_name}/documents", body: batch.to_json, headers: headers, timeout: 30) },
|
166
|
-
"indexing documents"
|
167
|
-
)
|
168
|
-
if response&.code == 202
|
169
|
-
if response.body
|
170
|
-
task = JSON.parse(response.body)
|
171
|
-
log_info("Task queued: UID #{task['taskUid']}. Check status at #{url}/tasks/#{task['taskUid']}")
|
172
|
-
else
|
173
|
-
log_info("Task queued (202), but no response body received.")
|
174
|
-
end
|
175
|
-
elsif response.nil?
|
176
|
-
log_info("Failed to queue indexing task: No response received from Meilisearch.")
|
177
|
-
else
|
178
|
-
log_info("Failed to queue indexing task: #{response.code} - #{response.body}")
|
179
|
-
end
|
180
|
-
end
|
181
|
-
end
|
182
|
-
|
183
|
-
def create_index_if_missing(url, index_name, headers)
|
184
|
-
log_info("Checking if index '#{index_name}' exists...")
|
185
|
-
response = HTTParty.get("#{url}/indexes/#{index_name}", headers: headers, timeout: 30)
|
186
|
-
return if response.success?
|
187
|
-
|
188
|
-
if response.code == 404
|
189
|
-
log_info("Index '#{index_name}' not found. Creating it...")
|
190
|
-
response = attempt_request(
|
191
|
-
-> { HTTParty.post("#{url}/indexes", body: { "uid" => index_name }.to_json, headers: headers, timeout: 30) },
|
192
|
-
"creating index"
|
193
|
-
)
|
194
|
-
if response&.success? || response&.code == 202
|
195
|
-
log_info("Index '#{index_name}' created successfully.")
|
196
|
-
elsif response
|
197
|
-
log_info("Failed to create index: #{response.code} - #{response.body}")
|
198
|
-
end
|
199
|
-
else
|
200
|
-
log_info("Error checking index: #{response.code} - #{response.body}")
|
201
|
-
end
|
202
|
-
end
|
203
|
-
|
204
|
-
def full_index(url, index_name, documents, headers)
|
205
|
-
log_info("Performing full index reset as fallback...")
|
206
|
-
response = attempt_request(
|
207
|
-
-> { HTTParty.delete("#{url}/indexes/#{index_name}/documents", headers: headers, timeout: 30) },
|
208
|
-
"resetting index"
|
209
|
-
)
|
210
|
-
unless response&.success? || response&.code == 404
|
211
|
-
if response.nil?
|
212
|
-
log_info("Failed to reset index: No response received from Meilisearch.")
|
213
|
-
else
|
214
|
-
log_info("Failed to reset index: #{response.code} - #{response.body}")
|
215
|
-
end
|
216
|
-
return
|
217
|
-
end
|
218
|
-
|
219
|
-
index_new_documents(url, index_name, documents, headers) if documents.any?
|
220
|
-
end
|
221
|
-
|
222
|
-
def attempt_request(request, action, retries: 3)
|
223
|
-
retries.times do |i|
|
224
|
-
begin
|
225
|
-
response = request.call
|
226
|
-
return response if response.success? || [202, 404].include?(response.code)
|
227
|
-
rescue HTTParty::Error => e
|
228
|
-
log_info("Attempt #{i + 1} failed while #{action}: #{e.message}")
|
229
|
-
sleep(2 ** i) # Exponential backoff
|
230
|
-
end
|
231
|
-
end
|
232
|
-
log_info("All retries failed for #{action}.")
|
233
|
-
nil
|
234
|
-
end
|
235
|
-
end
|
236
|
-
end
|