jekyll-meilisearch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 2bab52d0823a5c5f10cc3a9a77490caef8e7632c7503b1fbdeb44fec7522915f
4
+ data.tar.gz: '062748a5c5060016fb9742bfbc5c2c459ff51e2dd6fe0b1151670c1dae1de14f'
5
+ SHA512:
6
+ metadata.gz: ffdae7915bfb6a64ccd99f7824bf1b663741efa8af0f09db3a307c1dae33a57eb3cf52cec136bebde796b84171b1e1f74688e08245c7978fa0e13e4af7079dee
7
+ data.tar.gz: db38552dbf384b23072925a16831f484ceb3481c3a41cf3c1c567890be3a972c81b4f36bba196b32891558048b86914461c10532a240c24d31c7e33c38e2d35f
@@ -0,0 +1,215 @@
1
+ require 'httparty'
2
+ require 'json'
3
+ require 'logger'
4
+
5
+ module Jekyll
6
+ class MeilisearchIndexer < Generator
7
+ def generate(site)
8
+ @logger = Logger.new(STDOUT)
9
+ @logger.level = Logger::INFO
10
+
11
+ log_info("Starting Meilisearch incremental indexing...")
12
+ config = load_config(site)
13
+ return unless validate_config(config)
14
+
15
+ documents = build_documents(site, config)
16
+ sync_with_meilisearch(config, documents)
17
+ end
18
+
19
+ private
20
+
21
+ def log_info(message)
22
+ @logger.info(message)
23
+ end
24
+
25
+ def load_config(site)
26
+ site.config['meilisearch'] || {}
27
+ end
28
+
29
+ def validate_config(config)
30
+ unless config['url']
31
+ log_info("Error: Meilisearch URL not set in config. Skipping indexing.")
32
+ return false
33
+ end
34
+ unless config['api_key']
35
+ log_info("Error: Meilisearch API key not set in config. Skipping indexing.")
36
+ return false
37
+ end
38
+ true
39
+ end
40
+
41
+ def build_headers(api_key)
42
+ {
43
+ 'Content-Type' => 'application/json',
44
+ 'Authorization' => "Bearer #{api_key}"
45
+ }
46
+ end
47
+
48
+ def build_documents(site, config)
49
+ documents = []
50
+ index_name = config['index_name'] || 'jekyll_documents'
51
+ collections_config = config['collections'] || { 'posts' => { 'fields' => ['title', 'content', 'url', 'date'] } }
52
+
53
+ collections_config.each do |collection_name, collection_settings|
54
+ collection = site.collections[collection_name]
55
+ if collection
56
+ log_info("Processing collection: '#{collection_name}'...")
57
+ fields_to_index = collection_settings['fields'] || ['title', 'content', 'url', 'date']
58
+ id_format = collection_settings['id_format'] || :default
59
+
60
+ collection_docs = collection.docs.map do |doc|
61
+ sanitized_id = generate_id(doc, collection_name, id_format)
62
+ doc_data = {
63
+ 'id' => sanitized_id,
64
+ 'content' => doc.content.strip,
65
+ 'url' => doc.url
66
+ }
67
+ fields_to_index.each do |field|
68
+ next if ['id', 'content', 'url'].include?(field)
69
+ value = doc.data[field]
70
+ doc_data[field] = field == 'date' && value ? value.strftime('%Y-%m-%d') : value
71
+ end
72
+ doc_data
73
+ end
74
+ documents.concat(collection_docs)
75
+ else
76
+ log_info("Warning: Collection '#{collection_name}' not found. Skipping.")
77
+ end
78
+ end
79
+
80
+ if documents.empty?
81
+ log_info("No documents found across configured collections: #{collections_config.keys.join(', ')}. Cleaning up index...")
82
+ end
83
+ documents
84
+ end
85
+
86
+ def generate_id(doc, collection_name, id_format)
87
+ case id_format
88
+ when :default
89
+ doc.data['number'] ? "#{collection_name}-#{doc.data['number']}" : doc.id.gsub('/', '-')
90
+ .gsub(/[^a-zA-Z0-9_-]/, '-').gsub(/-+/, '-').downcase.slice(0, 100)
91
+ when :path
92
+ doc.url.gsub('/', '-').downcase.slice(0, 100)
93
+ else
94
+ doc.id.gsub('/', '-').downcase.slice(0, 100)
95
+ end
96
+ end
97
+
98
+ def sync_with_meilisearch(config, documents)
99
+ headers = build_headers(config['api_key'])
100
+ index_name = config['index_name'] || 'jekyll_documents'
101
+ create_index_if_missing(config['url'], index_name, headers)
102
+
103
+ meili_docs = fetch_all_documents(config['url'], index_name, headers)
104
+ if meili_docs.nil?
105
+ log_info("Failed to fetch existing documents. Falling back to full indexing.")
106
+ return full_index(config['url'], index_name, documents, headers)
107
+ end
108
+
109
+ meili_ids = meili_docs.map { |doc| doc['id'] }
110
+ jekyll_ids = documents.map { |doc| doc['id'] }
111
+
112
+ delete_obsolete_documents(config['url'], index_name, meili_ids - jekyll_ids, headers)
113
+ index_new_documents(config['url'], index_name, documents, headers) if documents.any?
114
+ end
115
+
116
+ def fetch_all_documents(url, index_name, headers)
117
+ documents = []
118
+ offset = 0
119
+ limit = 1000
120
+ loop do
121
+ response = attempt_request(
122
+ -> { HTTParty.get("#{url}/indexes/#{index_name}/documents?limit=#{limit}&offset=#{offset}", headers: headers, timeout: 30) },
123
+ "fetching documents"
124
+ )
125
+ return nil unless response&.success?
126
+ results = JSON.parse(response.body)['results']
127
+ documents.concat(results)
128
+ break if results.size < limit
129
+ offset += limit
130
+ end
131
+ documents
132
+ end
133
+
134
+ def delete_obsolete_documents(url, index_name, ids_to_delete, headers)
135
+ return log_info("No documents to delete from Meilisearch.") if ids_to_delete.empty?
136
+
137
+ log_info("Deleting #{ids_to_delete.size} obsolete documents from Meilisearch...")
138
+ response = attempt_request(
139
+ -> { HTTParty.post("#{url}/indexes/#{index_name}/documents/delete-batch", body: ids_to_delete.to_json, headers: headers, timeout: 30) },
140
+ "deleting documents"
141
+ )
142
+ if response&.success?
143
+ log_info("Delete task queued successfully.")
144
+ elsif response
145
+ log_info("Failed to delete obsolete documents: #{response.code} - #{response.body}")
146
+ end
147
+ end
148
+
149
+ def index_new_documents(url, index_name, documents, headers)
150
+ log_info("Indexing #{documents.size} documents to Meilisearch...")
151
+ batch_size = 1000
152
+ documents.each_slice(batch_size) do |batch|
153
+ response = attempt_request(
154
+ -> { HTTParty.post("#{url}/indexes/#{index_name}/documents", body: batch.to_json, headers: headers, timeout: 30) },
155
+ "indexing documents"
156
+ )
157
+ if response&.code == 202
158
+ task = JSON.parse(response.body)
159
+ log_info("Task queued: UID #{task['taskUid']}. Check status at #{url}/tasks/#{task['taskUid']}")
160
+ elsif response
161
+ log_info("Failed to queue indexing task: #{response.code} - #{response.body}")
162
+ end
163
+ end
164
+ end
165
+
166
+ def create_index_if_missing(url, index_name, headers)
167
+ log_info("Checking if index '#{index_name}' exists...")
168
+ response = HTTParty.get("#{url}/indexes/#{index_name}", headers: headers, timeout: 30)
169
+ return if response.success?
170
+
171
+ if response.code == 404
172
+ log_info("Index '#{index_name}' not found. Creating it...")
173
+ response = attempt_request(
174
+ -> { HTTParty.post("#{url}/indexes", body: { "uid" => index_name }.to_json, headers: headers, timeout: 30) },
175
+ "creating index"
176
+ )
177
+ if response&.success? || response&.code == 202
178
+ log_info("Index '#{index_name}' created successfully.")
179
+ elsif response
180
+ log_info("Failed to create index: #{response.code} - #{response.body}")
181
+ end
182
+ else
183
+ log_info("Error checking index: #{response.code} - #{response.body}")
184
+ end
185
+ end
186
+
187
+ def full_index(url, index_name, documents, headers)
188
+ log_info("Performing full index reset as fallback...")
189
+ response = attempt_request(
190
+ -> { HTTParty.delete("#{url}/indexes/#{index_name}/documents", headers: headers, timeout: 30) },
191
+ "resetting index"
192
+ )
193
+ unless response&.success? || response&.code == 404
194
+ log_info("Failed to reset index: #{response.code} - #{response.body}")
195
+ return
196
+ end
197
+
198
+ index_new_documents(url, index_name, documents, headers) if documents.any?
199
+ end
200
+
201
+ def attempt_request(request, action, retries: 3)
202
+ retries.times do |i|
203
+ begin
204
+ response = request.call
205
+ return response if response.success? || [202, 404].include?(response.code)
206
+ rescue HTTParty::Error => e
207
+ log_info("Attempt #{i + 1} failed while #{action}: #{e.message}")
208
+ sleep(2 ** i) # Exponential backoff
209
+ end
210
+ end
211
+ log_info("All retries failed for #{action}.")
212
+ nil
213
+ end
214
+ end
215
+ end
@@ -0,0 +1 @@
1
+ require "jekyll/meilisearch_indexer"
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jekyll-meilisearch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - unicolored
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-03-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jekyll
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '5.0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '3.0'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '5.0'
33
+ - !ruby/object:Gem::Dependency
34
+ name: httparty
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '0.21'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.21'
47
+ - !ruby/object:Gem::Dependency
48
+ name: bundler
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '2.0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '2.0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: rake
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '13.0'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '13.0'
75
+ description: This plugin incrementally indexes Jekyll collections into Meilisearch
76
+ for fast search capabilities.
77
+ email: hello@gilles.dev
78
+ executables: []
79
+ extensions: []
80
+ extra_rdoc_files: []
81
+ files:
82
+ - lib/jekyll-meilisearch.rb
83
+ - lib/jekyll/meilisearch_indexer.rb
84
+ homepage: https://github.com/unicolored/jekyll-meilisearch
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubygems_version: 3.5.6
104
+ signing_key:
105
+ specification_version: 4
106
+ summary: A Jekyll plugin to index site content in Meilisearch.
107
+ test_files: []