jekyll-meilisearch 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +133 -0
- data/lib/jekyll-meilisearch/generator.rb +244 -0
- data/lib/jekyll-meilisearch/version.rb +7 -0
- data/lib/jekyll-meilisearch.rb +10 -0
- metadata +111 -6
- data/lib/jekyll/meilisearch_indexer.rb +0 -232
- data/lib/jekyll_meilisearch.rb +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b88f29e6292e57d37582a005ccd165d9916bead6896ec325effc4f7122a841dc
|
4
|
+
data.tar.gz: 44cc18269f75390ebb9849c2a6eee43f8441a1046b90379697171a10e8cb070b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc89972661de0f3c9163fd9086bc60b8a28500745514f15f5155ef65265d4c7aeb6210d3fb64d62e58730c7d9a4b080b2a7b145717119a59746f86429330a80e
|
7
|
+
data.tar.gz: c3f56c3d650ab6038247eba09c25896e21e70c387d459a6a6fb2e5373469fe936a7c917996406c14379bb8ab16aa7f560e76870bea0459b5ff30aa0aa4a057a3
|
data/README.md
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
# Jekyll Meilisearch Plugin
|
2
|
+
|
3
|
+
A Jekyll plugin that indexes your site’s content into Meilisearch, a fast and lightweight search engine. This plugin supports incremental indexing, ensuring efficient updates by only syncing changes between your Jekyll site and Meilisearch.
|
4
|
+
|
5
|
+
[](https://github.com/unicolored/jekyll-meilisearch/actions/workflows/ruby.yml) [](https://badge.fury.io/rb/jekyll-meilisearch)
|
6
|
+
|
7
|
+
## Features
|
8
|
+
- Indexes Jekyll collections (e.g., posts, pages) into Meilisearch.
|
9
|
+
- Incremental updates: adds new documents, deletes obsolete ones, and skips unchanged content.
|
10
|
+
- Configurable via _config.yml: customize fields, collections, and ID formats.
|
11
|
+
- Robust error handling with retries and fallback to full indexing if needed.
|
12
|
+
- Pagination support for large sites.
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
|
16
|
+
Add the gem to your Jekyll site’s Gemfile:
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
gem "jekyll-meilisearch"
|
20
|
+
```
|
21
|
+
|
22
|
+
And then add this line to your site's `_config.yml`:
|
23
|
+
|
24
|
+
```yml
|
25
|
+
plugins:
|
26
|
+
- jekyll-meilisearch
|
27
|
+
```
|
28
|
+
|
29
|
+
## Configuration
|
30
|
+
Add the following to your Jekyll _config.yml (or a separate config file like _config.prod.yml):
|
31
|
+
|
32
|
+
```yaml
|
33
|
+
meilisearch:
|
34
|
+
url: "http://localhost:7700" # Your Meilisearch instance URL
|
35
|
+
api_key: "your-api-key" # Meilisearch API key
|
36
|
+
index_name: "my_site" # Optional: defaults to "jekyll_documents"
|
37
|
+
collections:
|
38
|
+
posts:
|
39
|
+
fields: ["title", "content", "url", "date"] # Fields to index
|
40
|
+
id_format: "default" # Optional: "default" or "path"
|
41
|
+
pages:
|
42
|
+
fields: ["title", "content", "url"]
|
43
|
+
```
|
44
|
+
|
45
|
+
## Configuration Options
|
46
|
+
* `url`: The Meilisearch server URL (required).
|
47
|
+
* `api_key`: The Meilisearch API key (required). Recommended: use a dedicated api key for your index, not the admin one.
|
48
|
+
* `index_name`: The name of the Meilisearch index (optional, defaults to jekyll_documents).
|
49
|
+
* `collections`: A hash of Jekyll collections to index.
|
50
|
+
* `fields`: Array of fields to extract from each document (e.g., title, content, url, date).
|
51
|
+
* `id_format`: How to generate document IDs:
|
52
|
+
* "default" | "id": Uses collection-name-number if a number field exists, otherwise sanitizes the document ID.
|
53
|
+
* "url": Uses the document’s URL, sanitized.
|
54
|
+
* fallback: if "number" exists, uses "collection_name" + "number"
|
55
|
+
|
56
|
+
Run your Jekyll build:
|
57
|
+
|
58
|
+
```shell
|
59
|
+
bundle exec jekyll build
|
60
|
+
```
|
61
|
+
|
62
|
+
Or with multiple config files:
|
63
|
+
|
64
|
+
```shell
|
65
|
+
bundle exec jekyll build --config _config.yml,_config.prod.yml
|
66
|
+
```
|
67
|
+
|
68
|
+
## Usage
|
69
|
+
Ensure Meilisearch is running and accessible at the configured url.
|
70
|
+
Configure your _config.yml with the necessary meilisearch settings.
|
71
|
+
Build your site. The plugin will:
|
72
|
+
- Create the Meilisearch index if it doesn’t exist.
|
73
|
+
- Fetch existing documents from Meilisearch.
|
74
|
+
- Delete obsolete documents.
|
75
|
+
- Index new or updated documents.
|
76
|
+
- Logs will output to STDOUT with details about the indexing process.
|
77
|
+
|
78
|
+
Include the following for adding search to your front :
|
79
|
+
```html
|
80
|
+
|
81
|
+
<!-- Search Input -->
|
82
|
+
<div class="border m-6 mb-6 p-4">
|
83
|
+
<input type="text" id="search" class="border p-2 w-full" placeholder="Rechercher...">
|
84
|
+
<div id="results" class="mt-2 border p-4">Results will appear here.</div>
|
85
|
+
</div>
|
86
|
+
|
87
|
+
<!-- Meilisearch JS SDK -->
|
88
|
+
<script src="https://cdn.jsdelivr.net/npm/meilisearch@0.40.0/dist/bundles/meilisearch.umd.js"></script>
|
89
|
+
<script>
|
90
|
+
const meilisearchConfig = {
|
91
|
+
host: "{{ site.meilisearch.url | default: 'http://localhost:7700' }}",
|
92
|
+
apiKey: "{{ site.meilisearch.search_api_key}}"
|
93
|
+
};
|
94
|
+
const client = new MeiliSearch(meilisearchConfig);
|
95
|
+
const index = client.index('{{site.meilisearch.index_name}}');
|
96
|
+
|
97
|
+
document.getElementById('search').addEventListener('input', async (e) => {
|
98
|
+
const query = e.target.value;
|
99
|
+
if (query.length < 2) {
|
100
|
+
document.getElementById('results').innerHTML = '';
|
101
|
+
return;
|
102
|
+
}
|
103
|
+
try {
|
104
|
+
const results = await index.search(query);
|
105
|
+
document.getElementById('results').innerHTML = results.hits
|
106
|
+
.map(hit => `<p><a href="${hit.url}" class="text-blue-500 hover:underline">${hit.title}</a></p>`)
|
107
|
+
.join('');
|
108
|
+
} catch (error) {
|
109
|
+
console.error('Search error:', error);
|
110
|
+
document.getElementById('results').innerHTML = '<p class="text-red-500">Search failed. Please try again.</p>';
|
111
|
+
}
|
112
|
+
});
|
113
|
+
</script>
|
114
|
+
|
115
|
+
```
|
116
|
+
|
117
|
+
## Skip development
|
118
|
+
|
119
|
+
Use `disable_in_development: true` if you want to turn off meilisearch indexation when `jekyll.environment == "development"`,
|
120
|
+
but don't want to remove the plugin (so you don't accidentally commit the removal). Default value is `false`.
|
121
|
+
|
122
|
+
```yml
|
123
|
+
meilisearch:
|
124
|
+
disable_in_development: true
|
125
|
+
```
|
126
|
+
|
127
|
+
## Contributing
|
128
|
+
|
129
|
+
1. Fork it (https://github.com/unicolored/jekyll-meilisearch/fork)
|
130
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
131
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
132
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
133
|
+
5. Create a new Pull Request
|
@@ -0,0 +1,244 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JekyllMeilisearch
|
4
|
+
class MeilisearchIndexer < Jekyll::Generator
|
5
|
+
safe true
|
6
|
+
priority :lowest
|
7
|
+
|
8
|
+
# Main plugin action, called by Jekyll-core
|
9
|
+
def generate(site)
|
10
|
+
@site = site
|
11
|
+
if disabled_in_development?
|
12
|
+
Jekyll.logger.info "Jekyll Meilisearch:", "Skipping meilisearch indexation in development"
|
13
|
+
return
|
14
|
+
end
|
15
|
+
Jekyll.logger.info "Starting Meilisearch incremental indexing..."
|
16
|
+
return unless validate_config
|
17
|
+
|
18
|
+
@documents = build_documents
|
19
|
+
sync_with_meilisearch
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
# Returns the plugin's config or an empty hash if not set
|
25
|
+
def config
|
26
|
+
@config ||= @site.config["meilisearch"] || {}
|
27
|
+
end
|
28
|
+
|
29
|
+
def validate_config
|
30
|
+
unless config["url"]
|
31
|
+
Jekyll.logger.info "Error: Meilisearch URL not set in config. Skipping indexing."
|
32
|
+
return false
|
33
|
+
end
|
34
|
+
unless config["api_key"]
|
35
|
+
Jekyll.logger.info "Error: Meilisearch API key not set in config. Skipping indexing."
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
true
|
39
|
+
end
|
40
|
+
|
41
|
+
def build_headers(api_key)
|
42
|
+
{
|
43
|
+
"Content-Type" => "application/json",
|
44
|
+
"Authorization" => "Bearer #{api_key}",
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
def build_documents
|
49
|
+
documents = []
|
50
|
+
collections_config = config["collections"] || { "posts" => { "fields" => %w(title content url date) } }
|
51
|
+
|
52
|
+
collections_config.each do |collection_name, collection_settings|
|
53
|
+
collection = @site.collections[collection_name]
|
54
|
+
if collection
|
55
|
+
Jekyll.logger.info "Processing collection: '#{collection_name}'..."
|
56
|
+
fields_to_index = collection_settings["fields"] || %w(title content url date)
|
57
|
+
id_format = collection_settings["id_format"] || :default
|
58
|
+
|
59
|
+
collection_docs = collection.docs.map do |doc|
|
60
|
+
sanitized_id = generate_id(doc, collection_name, id_format)
|
61
|
+
doc_data = {
|
62
|
+
"id" => sanitized_id,
|
63
|
+
"content" => doc.content.strip,
|
64
|
+
"url" => doc.url,
|
65
|
+
}
|
66
|
+
fields_to_index.each do |field|
|
67
|
+
next if %w(id content url).include?(field)
|
68
|
+
|
69
|
+
value = doc.data[field]
|
70
|
+
doc_data[field] = field == "date" && value ? value.strftime("%Y-%m-%d") : value
|
71
|
+
end
|
72
|
+
doc_data
|
73
|
+
end
|
74
|
+
documents.concat(collection_docs)
|
75
|
+
else
|
76
|
+
Jekyll.logger.info "Warning: Collection '#{collection_name}' not found. Skipping."
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if documents.empty?
|
81
|
+
Jekyll.logger.info "No documents found across configured collections: #{collections_config.keys.join(", ")}. Cleaning up index..."
|
82
|
+
end
|
83
|
+
documents
|
84
|
+
end
|
85
|
+
|
86
|
+
def generate_id(doc, collection_name, id_format)
|
87
|
+
# Helper method to normalize strings
|
88
|
+
normalize = lambda do |str|
|
89
|
+
str.tr("/", "-")
|
90
|
+
.squeeze(%r![^a-zA-Z0-9_-]!, "-").squeeze("-")
|
91
|
+
.downcase
|
92
|
+
.slice(0, 100)
|
93
|
+
end
|
94
|
+
|
95
|
+
case id_format
|
96
|
+
when :default, :id
|
97
|
+
normalize.call(doc.id)
|
98
|
+
when :url
|
99
|
+
normalize.call(doc.url)
|
100
|
+
else
|
101
|
+
doc.data["number"] ? "#{collection_name}-#{doc.data["number"]}" : normalize.call(doc.id)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def sync_with_meilisearch
|
106
|
+
headers = build_headers(config["api_key"])
|
107
|
+
index_name = config["index_name"] || "jekyll_documents"
|
108
|
+
create_index_if_missing(config["url"], index_name, headers)
|
109
|
+
|
110
|
+
meili_docs = fetch_all_documents(config["url"], index_name, headers)
|
111
|
+
if meili_docs.nil?
|
112
|
+
Jekyll.logger.info "Failed to fetch existing documents. Falling back to full indexing."
|
113
|
+
return full_index(config["url"], index_name, @documents, headers)
|
114
|
+
end
|
115
|
+
|
116
|
+
meili_ids = meili_docs.map { |doc| doc["id"] }
|
117
|
+
jekyll_ids = @documents.map { |doc| doc["id"] }
|
118
|
+
|
119
|
+
delete_obsolete_documents(config["url"], index_name, meili_ids - jekyll_ids, headers)
|
120
|
+
index_new_documents(config["url"], index_name, @documents, headers) if @documents.any?
|
121
|
+
end
|
122
|
+
|
123
|
+
def fetch_all_documents(url, index_name, headers)
|
124
|
+
documents = []
|
125
|
+
offset = 0
|
126
|
+
limit = 1000
|
127
|
+
loop do
|
128
|
+
response = attempt_request(
|
129
|
+
lambda {
|
130
|
+
HTTParty.get("#{url}/indexes/#{index_name}/documents?limit=#{limit}&offset=#{offset}", :headers => headers,
|
131
|
+
:timeout => 30)
|
132
|
+
},
|
133
|
+
"fetching documents"
|
134
|
+
)
|
135
|
+
return nil unless response&.success?
|
136
|
+
|
137
|
+
results = JSON.parse(response.body)["results"]
|
138
|
+
documents.concat(results)
|
139
|
+
break if results.size < limit
|
140
|
+
|
141
|
+
offset += limit
|
142
|
+
end
|
143
|
+
documents
|
144
|
+
end
|
145
|
+
|
146
|
+
def delete_obsolete_documents(url, index_name, ids_to_delete, headers)
|
147
|
+
return Jekyll.logger.info "No documents to delete from Meilisearch." if ids_to_delete.empty?
|
148
|
+
|
149
|
+
Jekyll.logger.info "Deleting #{ids_to_delete.size} obsolete documents from Meilisearch..."
|
150
|
+
response = attempt_request(
|
151
|
+
lambda {
|
152
|
+
HTTParty.post("#{url}/indexes/#{index_name}/documents/delete-batch", :body => ids_to_delete.to_json, :headers => headers,
|
153
|
+
:timeout => 30)
|
154
|
+
},
|
155
|
+
"deleting documents"
|
156
|
+
)
|
157
|
+
if response&.success?
|
158
|
+
Jekyll.logger.info "Delete task queued successfully."
|
159
|
+
elsif response
|
160
|
+
Jekyll.logger.info "Failed to delete obsolete documents: #{response.code} - #{response.body}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def index_new_documents(url, index_name, documents, headers)
|
165
|
+
Jekyll.logger.info "Indexing #{documents.size} documents to Meilisearch..."
|
166
|
+
batch_size = 1000
|
167
|
+
documents.each_slice(batch_size) do |batch|
|
168
|
+
response = attempt_request(
|
169
|
+
lambda {
|
170
|
+
HTTParty.post("#{url}/indexes/#{index_name}/documents", :body => batch.to_json, :headers => headers, :timeout => 30)
|
171
|
+
},
|
172
|
+
"indexing documents"
|
173
|
+
)
|
174
|
+
if response&.code == 202
|
175
|
+
if response.body
|
176
|
+
task = JSON.parse(response.body)
|
177
|
+
Jekyll.logger.info "Task queued: UID #{task["taskUid"]}. Check status at #{url}/tasks/#{task["taskUid"]}"
|
178
|
+
else
|
179
|
+
Jekyll.logger.info "Task queued (202), but no response body received."
|
180
|
+
end
|
181
|
+
elsif response.nil?
|
182
|
+
Jekyll.logger.info "Failed to queue indexing task: No response received from Meilisearch."
|
183
|
+
else
|
184
|
+
Jekyll.logger.info "Failed to queue indexing task: #{response.code} - #{response.body}"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def create_index_if_missing(url, index_name, headers)
|
190
|
+
Jekyll.logger.info "Checking if index '#{index_name}' exists..."
|
191
|
+
response = HTTParty.get("#{url}/indexes/#{index_name}", :headers => headers, :timeout => 30)
|
192
|
+
return if response.success?
|
193
|
+
|
194
|
+
if response.code == 404
|
195
|
+
Jekyll.logger.info "Index '#{index_name}' not found. Creating it..."
|
196
|
+
response = attempt_request(
|
197
|
+
-> { HTTParty.post("#{url}/indexes", :body => { "uid" => index_name }.to_json, :headers => headers, :timeout => 30) },
|
198
|
+
"creating index"
|
199
|
+
)
|
200
|
+
if response&.success? || response&.code == 202
|
201
|
+
Jekyll.logger.info "Index '#{index_name}' created successfully."
|
202
|
+
elsif response
|
203
|
+
Jekyll.logger.info "Failed to create index: #{response.code} - #{response.body}"
|
204
|
+
end
|
205
|
+
else
|
206
|
+
Jekyll.logger.info "Error checking index: #{response.code} - #{response.body}"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def full_index(url, index_name, documents, headers)
|
211
|
+
Jekyll.logger.info "Performing full index reset as fallback..."
|
212
|
+
response = attempt_request(
|
213
|
+
-> { HTTParty.delete("#{url}/indexes/#{index_name}/documents", :headers => headers, :timeout => 30) },
|
214
|
+
"resetting index"
|
215
|
+
)
|
216
|
+
unless response&.success? || response&.code == 404
|
217
|
+
if response.nil?
|
218
|
+
Jekyll.logger.info "Failed to reset index: No response received from Meilisearch."
|
219
|
+
else
|
220
|
+
Jekyll.logger.info "Failed to reset index: #{response.code} - #{response.body}"
|
221
|
+
end
|
222
|
+
return
|
223
|
+
end
|
224
|
+
|
225
|
+
index_new_documents(url, index_name, documents, headers) if documents.any?
|
226
|
+
end
|
227
|
+
|
228
|
+
def attempt_request(request, action, retries: 3)
|
229
|
+
retries.times do |i|
|
230
|
+
response = request.call
|
231
|
+
return response if response.success? || [202, 404].include?(response.code)
|
232
|
+
rescue HTTParty::Error => e
|
233
|
+
Jekyll.logger.info "Attempt #{i + 1} failed while #{action}: #{e.message}"
|
234
|
+
sleep(2**i) # Exponential backoff
|
235
|
+
end
|
236
|
+
Jekyll.logger.info "All retries failed for #{action}."
|
237
|
+
nil
|
238
|
+
end
|
239
|
+
|
240
|
+
def disabled_in_development?
|
241
|
+
config && config["disable_in_development"] && Jekyll.env == "development"
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-meilisearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- unicolored
|
@@ -30,7 +30,7 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '3.
|
33
|
+
version: '3.7'
|
34
34
|
- - "<"
|
35
35
|
- !ruby/object:Gem::Version
|
36
36
|
version: '5.0'
|
@@ -40,10 +40,50 @@ dependencies:
|
|
40
40
|
requirements:
|
41
41
|
- - ">="
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: '3.
|
43
|
+
version: '3.7'
|
44
44
|
- - "<"
|
45
45
|
- !ruby/object:Gem::Version
|
46
46
|
version: '5.0'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: json
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '2.10'
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 2.10.2
|
57
|
+
type: :runtime
|
58
|
+
prerelease: false
|
59
|
+
version_requirements: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - "~>"
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '2.10'
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 2.10.2
|
67
|
+
- !ruby/object:Gem::Dependency
|
68
|
+
name: logger
|
69
|
+
requirement: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - "~>"
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '1.6'
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 1.6.6
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '1.6'
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 1.6.6
|
47
87
|
- !ruby/object:Gem::Dependency
|
48
88
|
name: bundler
|
49
89
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,6 +98,20 @@ dependencies:
|
|
58
98
|
- - "~>"
|
59
99
|
- !ruby/object:Gem::Version
|
60
100
|
version: '2.0'
|
101
|
+
- !ruby/object:Gem::Dependency
|
102
|
+
name: nokogiri
|
103
|
+
requirement: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - "~>"
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '1.6'
|
108
|
+
type: :development
|
109
|
+
prerelease: false
|
110
|
+
version_requirements: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - "~>"
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '1.6'
|
61
115
|
- !ruby/object:Gem::Dependency
|
62
116
|
name: rake
|
63
117
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,15 +126,66 @@ dependencies:
|
|
72
126
|
- - "~>"
|
73
127
|
- !ruby/object:Gem::Version
|
74
128
|
version: '13.0'
|
129
|
+
- !ruby/object:Gem::Dependency
|
130
|
+
name: rspec
|
131
|
+
requirement: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - "~>"
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '3.0'
|
136
|
+
type: :development
|
137
|
+
prerelease: false
|
138
|
+
version_requirements: !ruby/object:Gem::Requirement
|
139
|
+
requirements:
|
140
|
+
- - "~>"
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: '3.0'
|
143
|
+
- !ruby/object:Gem::Dependency
|
144
|
+
name: rubocop-jekyll
|
145
|
+
requirement: !ruby/object:Gem::Requirement
|
146
|
+
requirements:
|
147
|
+
- - "~>"
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 0.14.0
|
150
|
+
type: :development
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
requirements:
|
154
|
+
- - "~>"
|
155
|
+
- !ruby/object:Gem::Version
|
156
|
+
version: 0.14.0
|
157
|
+
- !ruby/object:Gem::Dependency
|
158
|
+
name: typhoeus
|
159
|
+
requirement: !ruby/object:Gem::Requirement
|
160
|
+
requirements:
|
161
|
+
- - ">="
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0.7'
|
164
|
+
- - "<"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '2.0'
|
167
|
+
type: :development
|
168
|
+
prerelease: false
|
169
|
+
version_requirements: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0.7'
|
174
|
+
- - "<"
|
175
|
+
- !ruby/object:Gem::Version
|
176
|
+
version: '2.0'
|
75
177
|
description: This plugin incrementally indexes Jekyll collections into Meilisearch
|
76
178
|
for fast search capabilities.
|
77
179
|
email: hello@gilles.dev
|
78
180
|
executables: []
|
79
181
|
extensions: []
|
80
|
-
extra_rdoc_files:
|
182
|
+
extra_rdoc_files:
|
183
|
+
- README.md
|
81
184
|
files:
|
82
|
-
-
|
83
|
-
- lib/
|
185
|
+
- README.md
|
186
|
+
- lib/jekyll-meilisearch.rb
|
187
|
+
- lib/jekyll-meilisearch/generator.rb
|
188
|
+
- lib/jekyll-meilisearch/version.rb
|
84
189
|
homepage: https://github.com/unicolored/jekyll-meilisearch
|
85
190
|
licenses:
|
86
191
|
- MIT
|
@@ -1,232 +0,0 @@
|
|
1
|
-
require 'httparty'
|
2
|
-
require 'json'
|
3
|
-
require 'logger'
|
4
|
-
|
5
|
-
module Jekyll
|
6
|
-
class MeilisearchIndexer < Generator
|
7
|
-
def generate(site)
|
8
|
-
@logger = Logger.new(STDOUT)
|
9
|
-
@logger.level = Logger::INFO
|
10
|
-
|
11
|
-
log_info("Starting Meilisearch incremental indexing...")
|
12
|
-
config = load_config(site)
|
13
|
-
return unless validate_config(config)
|
14
|
-
|
15
|
-
documents = build_documents(site, config)
|
16
|
-
sync_with_meilisearch(config, documents)
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
def log_info(message)
|
22
|
-
@logger.info(message)
|
23
|
-
end
|
24
|
-
|
25
|
-
def load_config(site)
|
26
|
-
site.config['meilisearch'] || {}
|
27
|
-
end
|
28
|
-
|
29
|
-
def validate_config(config)
|
30
|
-
unless config['url']
|
31
|
-
log_info("Error: Meilisearch URL not set in config. Skipping indexing.")
|
32
|
-
return false
|
33
|
-
end
|
34
|
-
unless config['api_key']
|
35
|
-
log_info("Error: Meilisearch API key not set in config. Skipping indexing.")
|
36
|
-
return false
|
37
|
-
end
|
38
|
-
true
|
39
|
-
end
|
40
|
-
|
41
|
-
def build_headers(api_key)
|
42
|
-
{
|
43
|
-
'Content-Type' => 'application/json',
|
44
|
-
'Authorization' => "Bearer #{api_key}"
|
45
|
-
}
|
46
|
-
end
|
47
|
-
|
48
|
-
def build_documents(site, config)
|
49
|
-
documents = []
|
50
|
-
collections_config = config['collections'] || { 'posts' => { 'fields' => %w[title content url date] } }
|
51
|
-
|
52
|
-
collections_config.each do |collection_name, collection_settings|
|
53
|
-
collection = site.collections[collection_name]
|
54
|
-
if collection
|
55
|
-
log_info("Processing collection: '#{collection_name}'...")
|
56
|
-
fields_to_index = collection_settings['fields'] || %w[title content url date]
|
57
|
-
id_format = collection_settings['id_format'] || :default
|
58
|
-
|
59
|
-
collection_docs = collection.docs.map do |doc|
|
60
|
-
sanitized_id = generate_id(doc, collection_name, id_format)
|
61
|
-
doc_data = {
|
62
|
-
'id' => sanitized_id,
|
63
|
-
'content' => doc.content.strip,
|
64
|
-
'url' => doc.url
|
65
|
-
}
|
66
|
-
fields_to_index.each do |field|
|
67
|
-
next if %w[id content url].include?(field)
|
68
|
-
value = doc.data[field]
|
69
|
-
doc_data[field] = field == 'date' && value ? value.strftime('%Y-%m-%d') : value
|
70
|
-
end
|
71
|
-
doc_data
|
72
|
-
end
|
73
|
-
documents.concat(collection_docs)
|
74
|
-
else
|
75
|
-
log_info("Warning: Collection '#{collection_name}' not found. Skipping.")
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
if documents.empty?
|
80
|
-
log_info("No documents found across configured collections: #{collections_config.keys.join(', ')}. Cleaning up index...")
|
81
|
-
end
|
82
|
-
documents
|
83
|
-
end
|
84
|
-
|
85
|
-
def generate_id(doc, collection_name, id_format)
|
86
|
-
# Helper method to normalize strings
|
87
|
-
normalize = ->(str) do
|
88
|
-
str.gsub('/', '-')
|
89
|
-
.gsub(/[^a-zA-Z0-9_-]/, '-')
|
90
|
-
.gsub(/-+/, '-')
|
91
|
-
.downcase
|
92
|
-
.slice(0, 100)
|
93
|
-
end
|
94
|
-
|
95
|
-
case id_format
|
96
|
-
when :default, :id
|
97
|
-
normalize.call(doc.id)
|
98
|
-
when :url
|
99
|
-
normalize.call(doc.url)
|
100
|
-
else
|
101
|
-
doc.data['number'] ? "#{collection_name}-#{doc.data['number']}" : normalize.call(doc.id)
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
def sync_with_meilisearch(config, documents)
|
106
|
-
headers = build_headers(config['api_key'])
|
107
|
-
index_name = config['index_name'] || 'jekyll_documents'
|
108
|
-
create_index_if_missing(config['url'], index_name, headers)
|
109
|
-
|
110
|
-
meili_docs = fetch_all_documents(config['url'], index_name, headers)
|
111
|
-
if meili_docs.nil?
|
112
|
-
log_info("Failed to fetch existing documents. Falling back to full indexing.")
|
113
|
-
return full_index(config['url'], index_name, documents, headers)
|
114
|
-
end
|
115
|
-
|
116
|
-
meili_ids = meili_docs.map { |doc| doc['id'] }
|
117
|
-
jekyll_ids = documents.map { |doc| doc['id'] }
|
118
|
-
|
119
|
-
delete_obsolete_documents(config['url'], index_name, meili_ids - jekyll_ids, headers)
|
120
|
-
index_new_documents(config['url'], index_name, documents, headers) if documents.any?
|
121
|
-
end
|
122
|
-
|
123
|
-
def fetch_all_documents(url, index_name, headers)
|
124
|
-
documents = []
|
125
|
-
offset = 0
|
126
|
-
limit = 1000
|
127
|
-
loop do
|
128
|
-
response = attempt_request(
|
129
|
-
-> { HTTParty.get("#{url}/indexes/#{index_name}/documents?limit=#{limit}&offset=#{offset}", headers: headers, timeout: 30) },
|
130
|
-
"fetching documents"
|
131
|
-
)
|
132
|
-
return nil unless response&.success?
|
133
|
-
results = JSON.parse(response.body)['results']
|
134
|
-
documents.concat(results)
|
135
|
-
break if results.size < limit
|
136
|
-
offset += limit
|
137
|
-
end
|
138
|
-
documents
|
139
|
-
end
|
140
|
-
|
141
|
-
def delete_obsolete_documents(url, index_name, ids_to_delete, headers)
|
142
|
-
return log_info("No documents to delete from Meilisearch.") if ids_to_delete.empty?
|
143
|
-
|
144
|
-
log_info("Deleting #{ids_to_delete.size} obsolete documents from Meilisearch...")
|
145
|
-
response = attempt_request(
|
146
|
-
-> { HTTParty.post("#{url}/indexes/#{index_name}/documents/delete-batch", body: ids_to_delete.to_json, headers: headers, timeout: 30) },
|
147
|
-
"deleting documents"
|
148
|
-
)
|
149
|
-
if response&.success?
|
150
|
-
log_info("Delete task queued successfully.")
|
151
|
-
elsif response
|
152
|
-
log_info("Failed to delete obsolete documents: #{response.code} - #{response.body}")
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
def index_new_documents(url, index_name, documents, headers)
|
157
|
-
log_info("Indexing #{documents.size} documents to Meilisearch...")
|
158
|
-
batch_size = 1000
|
159
|
-
documents.each_slice(batch_size) do |batch|
|
160
|
-
response = attempt_request(
|
161
|
-
-> { HTTParty.post("#{url}/indexes/#{index_name}/documents", body: batch.to_json, headers: headers, timeout: 30) },
|
162
|
-
"indexing documents"
|
163
|
-
)
|
164
|
-
if response&.code == 202
|
165
|
-
if response.body
|
166
|
-
task = JSON.parse(response.body)
|
167
|
-
log_info("Task queued: UID #{task['taskUid']}. Check status at #{url}/tasks/#{task['taskUid']}")
|
168
|
-
else
|
169
|
-
log_info("Task queued (202), but no response body received.")
|
170
|
-
end
|
171
|
-
elsif response.nil?
|
172
|
-
log_info("Failed to queue indexing task: No response received from Meilisearch.")
|
173
|
-
else
|
174
|
-
log_info("Failed to queue indexing task: #{response.code} - #{response.body}")
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|
178
|
-
|
179
|
-
def create_index_if_missing(url, index_name, headers)
|
180
|
-
log_info("Checking if index '#{index_name}' exists...")
|
181
|
-
response = HTTParty.get("#{url}/indexes/#{index_name}", headers: headers, timeout: 30)
|
182
|
-
return if response.success?
|
183
|
-
|
184
|
-
if response.code == 404
|
185
|
-
log_info("Index '#{index_name}' not found. Creating it...")
|
186
|
-
response = attempt_request(
|
187
|
-
-> { HTTParty.post("#{url}/indexes", body: { "uid" => index_name }.to_json, headers: headers, timeout: 30) },
|
188
|
-
"creating index"
|
189
|
-
)
|
190
|
-
if response&.success? || response&.code == 202
|
191
|
-
log_info("Index '#{index_name}' created successfully.")
|
192
|
-
elsif response
|
193
|
-
log_info("Failed to create index: #{response.code} - #{response.body}")
|
194
|
-
end
|
195
|
-
else
|
196
|
-
log_info("Error checking index: #{response.code} - #{response.body}")
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
def full_index(url, index_name, documents, headers)
|
201
|
-
log_info("Performing full index reset as fallback...")
|
202
|
-
response = attempt_request(
|
203
|
-
-> { HTTParty.delete("#{url}/indexes/#{index_name}/documents", headers: headers, timeout: 30) },
|
204
|
-
"resetting index"
|
205
|
-
)
|
206
|
-
unless response&.success? || response&.code == 404
|
207
|
-
if response.nil?
|
208
|
-
log_info("Failed to reset index: No response received from Meilisearch.")
|
209
|
-
else
|
210
|
-
log_info("Failed to reset index: #{response.code} - #{response.body}")
|
211
|
-
end
|
212
|
-
return
|
213
|
-
end
|
214
|
-
|
215
|
-
index_new_documents(url, index_name, documents, headers) if documents.any?
|
216
|
-
end
|
217
|
-
|
218
|
-
def attempt_request(request, action, retries: 3)
|
219
|
-
retries.times do |i|
|
220
|
-
begin
|
221
|
-
response = request.call
|
222
|
-
return response if response.success? || [202, 404].include?(response.code)
|
223
|
-
rescue HTTParty::Error => e
|
224
|
-
log_info("Attempt #{i + 1} failed while #{action}: #{e.message}")
|
225
|
-
sleep(2 ** i) # Exponential backoff
|
226
|
-
end
|
227
|
-
end
|
228
|
-
log_info("All retries failed for #{action}.")
|
229
|
-
nil
|
230
|
-
end
|
231
|
-
end
|
232
|
-
end
|
data/lib/jekyll_meilisearch.rb
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
require 'jekyll/meilisearch_indexer'
|