jekyll-algolia 1.2.7 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/errors/invalid_credentials.txt +3 -0
- data/lib/errors/too_many_records.txt +14 -0
- data/lib/jekyll/algolia/configurator.rb +5 -0
- data/lib/jekyll/algolia/error_handler.rb +46 -23
- data/lib/jekyll/algolia/indexer.rb +163 -50
- data/lib/jekyll/algolia/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf200adeeada4a57c74a46db83ca98f29fe69bde
|
4
|
+
data.tar.gz: 4098806de6e5cc74d79021b99bc8875fad5cacee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25b04ae1214a048234d7ad9512a00939cc99fcc4d35e025a08bdfd826c4267d355f21d67a2e559fde018d25a5fd45cb611c27cc4d6c846f0641ead7c034a7107
|
7
|
+
data.tar.gz: dd36674e3de277bf3fa62daf3b24fb1e165d68979f8ad6b2935235587779d660e4ae0ffb29453168fde47a75c29890a08cb5061a4c2bc54606d04a72fd581f4d
|
@@ -3,6 +3,9 @@ E:
|
|
3
3
|
E:The jekyll-algolia plugin could not connect to your application ID using the API key your provided.
|
4
4
|
W:
|
5
5
|
W:Make sure your API key has access to your {application_id} application.
|
6
|
+
W:It should also have the rights to push to the following indices:
|
7
|
+
W: - {index_name}
|
8
|
+
W: - {index_object_ids_name}
|
6
9
|
I:
|
7
10
|
I:You can find your API key in your Algolia dashboard here:
|
8
11
|
I: https://www.algolia.com/licensing
|
@@ -0,0 +1,14 @@
|
|
1
|
+
E:[✗ Error] Too many records
|
2
|
+
E:
|
3
|
+
E:The jekyll-algolia plugin could not push your records because it exceeds the maximum number of records allowed in your current plan.
|
4
|
+
W:
|
5
|
+
W:Community plans can host up to 10k records and Essential plans starts at 50k.
|
6
|
+
W:
|
7
|
+
W:Check our pricing page for more details:
|
8
|
+
W: https://www.algolia.com/pricing
|
9
|
+
W:
|
10
|
+
I:You might want to upgrade your plan or exclude records from indexing using the `files_to_exclude` option:
|
11
|
+
I: https://community.algolia.com/jekyll-algolia/options.html#files-to-exclude
|
12
|
+
I:
|
13
|
+
I:If you're having trouble solving this issue, feel free to file a bug on GitHub, ideally with a link to a repository where we can reproduce the issue as well as the APPID you're trying to push to.
|
14
|
+
I: https://github.com/algolia/jekyll-algolia/issues
|
@@ -148,6 +148,11 @@ module Jekyll
|
|
148
148
|
ENV['ALGOLIA_INDEX_NAME'] || algolia('index_name')
|
149
149
|
end
|
150
150
|
|
151
|
+
# Public: Return the name of the index used to store the object ids
|
152
|
+
def self.index_object_ids_name
|
153
|
+
"#{index_name}_object_ids"
|
154
|
+
end
|
155
|
+
|
151
156
|
# Public: Get the index settings
|
152
157
|
#
|
153
158
|
# This will be a merge of default settings and the one defined in the
|
@@ -48,7 +48,8 @@ module Jekyll
|
|
48
48
|
unknown_application_id
|
49
49
|
invalid_credentials
|
50
50
|
record_too_big
|
51
|
-
|
51
|
+
too_many_records
|
52
|
+
unknown_setting
|
52
53
|
invalid_index_name
|
53
54
|
]
|
54
55
|
|
@@ -134,6 +135,24 @@ module Jekyll
|
|
134
135
|
hash
|
135
136
|
end
|
136
137
|
|
138
|
+
# Public: Returns a string explaining which attributes are the largest in
|
139
|
+
# the record
|
140
|
+
#
|
141
|
+
# record - The record hash to analyze
|
142
|
+
#
|
143
|
+
# This will be used on the `record_too_big` error, to guide users in
|
144
|
+
# finding which record is causing trouble
|
145
|
+
def self.readable_largest_record_keys(record)
|
146
|
+
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
|
147
|
+
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
|
148
|
+
output = []
|
149
|
+
largest_keys.each do |key, size|
|
150
|
+
size = Filesize.from("#{size} B").to_s('Kb')
|
151
|
+
output << "#{key} (#{size})"
|
152
|
+
end
|
153
|
+
output.join(', ')
|
154
|
+
end
|
155
|
+
|
137
156
|
# Public: Check if the application id is available
|
138
157
|
#
|
139
158
|
# _context - Not used
|
@@ -161,34 +180,19 @@ module Jekyll
|
|
161
180
|
# Application ID and API key submitted don't match any credentials known
|
162
181
|
def self.invalid_credentials?(error, _context = {})
|
163
182
|
details = error_hash(error.message)
|
183
|
+
return false if details == false
|
164
184
|
|
165
185
|
if details['message'] != 'Invalid Application-ID or API key'
|
166
186
|
return false
|
167
187
|
end
|
168
188
|
|
169
189
|
{
|
170
|
-
'application_id' => details['application_id']
|
190
|
+
'application_id' => details['application_id'],
|
191
|
+
'index_name' => Configurator.index_name,
|
192
|
+
'index_object_ids_name' => Configurator.index_object_ids_name
|
171
193
|
}
|
172
194
|
end
|
173
195
|
|
174
|
-
# Public: Returns a string explaining which attributes are the largest in
|
175
|
-
# the record
|
176
|
-
#
|
177
|
-
# record - The record hash to analyze
|
178
|
-
#
|
179
|
-
# This will be used on the `record_too_big` error, to guide users in
|
180
|
-
# finding which record is causing trouble
|
181
|
-
def self.readable_largest_record_keys(record)
|
182
|
-
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
|
183
|
-
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
|
184
|
-
output = []
|
185
|
-
largest_keys.each do |key, size|
|
186
|
-
size = Filesize.from("#{size} B").to_s('Kb')
|
187
|
-
output << "#{key} (#{size})"
|
188
|
-
end
|
189
|
-
output.join(', ')
|
190
|
-
end
|
191
|
-
|
192
196
|
# Public: Check if the sent records are not too big
|
193
197
|
#
|
194
198
|
# context[:records] - list of records sent in the batch
|
@@ -198,6 +202,7 @@ module Jekyll
|
|
198
202
|
# informations about it so the user can debug it.
|
199
203
|
def self.record_too_big?(error, context = {})
|
200
204
|
details = error_hash(error.message)
|
205
|
+
return false if details == false
|
201
206
|
|
202
207
|
message = details['message']
|
203
208
|
return false if message !~ /^Record .* is too big .*/
|
@@ -207,8 +212,11 @@ module Jekyll
|
|
207
212
|
size = Filesize.from("#{size} B").to_s('Kb')
|
208
213
|
object_id = details['objectID']
|
209
214
|
|
210
|
-
#
|
211
|
-
|
215
|
+
# Finding the record in all the operations
|
216
|
+
operation = context[:operations].find do |o|
|
217
|
+
o[:action] == 'addObject' && o[:body][:objectID] == object_id
|
218
|
+
end
|
219
|
+
record = operation[:body]
|
212
220
|
probable_wrong_keys = readable_largest_record_keys(record)
|
213
221
|
|
214
222
|
# Writing the full record to disk for inspection
|
@@ -236,8 +244,9 @@ module Jekyll
|
|
236
244
|
# The API will block any call that tries to update a setting value that is
|
237
245
|
# not available. We'll tell the user which one so they can fix their
|
238
246
|
# issue.
|
239
|
-
def self.
|
247
|
+
def self.unknown_setting?(error, context = {})
|
240
248
|
details = error_hash(error.message)
|
249
|
+
return false if details == false
|
241
250
|
|
242
251
|
message = details['message']
|
243
252
|
return false if message !~ /^Invalid object attributes.*/
|
@@ -258,6 +267,7 @@ module Jekyll
|
|
258
267
|
# Some characters are forbidden in index names
|
259
268
|
def self.invalid_index_name?(error, _context = {})
|
260
269
|
details = error_hash(error.message)
|
270
|
+
return false if details == false
|
261
271
|
|
262
272
|
message = details['message']
|
263
273
|
return false if message !~ /^indexName is not valid.*/
|
@@ -266,6 +276,19 @@ module Jekyll
|
|
266
276
|
'index_name' => Configurator.index_name
|
267
277
|
}
|
268
278
|
end
|
279
|
+
|
280
|
+
# Public: Check if the application has too many records
|
281
|
+
#
|
282
|
+
# We're trying to push too many records and it goes over quota
|
283
|
+
def self.too_many_records?(error, _context = {})
|
284
|
+
details = error_hash(error.message)
|
285
|
+
return false if details == false
|
286
|
+
|
287
|
+
message = details['message']
|
288
|
+
return false if message !~ /^Record quota exceeded.*/
|
289
|
+
|
290
|
+
{}
|
291
|
+
end
|
269
292
|
end
|
270
293
|
end
|
271
294
|
end
|
@@ -4,6 +4,7 @@ require 'algoliasearch'
|
|
4
4
|
require 'yaml'
|
5
5
|
require 'algolia_html_extractor'
|
6
6
|
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
7
8
|
module Jekyll
|
8
9
|
module Algolia
|
9
10
|
# Module to push records to Algolia and configure the index
|
@@ -11,15 +12,15 @@ module Jekyll
|
|
11
12
|
include Jekyll::Algolia
|
12
13
|
|
13
14
|
# Public: Init the module
|
14
|
-
#
|
15
|
-
# This call will instanciate the Algolia API client, set the custom
|
16
|
-
# User Agent and give an easy access to the main index
|
17
15
|
def self.init
|
18
16
|
::Algolia.init(
|
19
17
|
application_id: Configurator.application_id,
|
20
18
|
api_key: Configurator.api_key
|
21
19
|
)
|
22
|
-
|
20
|
+
index_name = Configurator.index_name
|
21
|
+
@index = ::Algolia::Index.new(index_name)
|
22
|
+
index_object_ids_name = Configurator.index_object_ids_name
|
23
|
+
@index_object_ids = ::Algolia::Index.new(index_object_ids_name)
|
23
24
|
|
24
25
|
set_user_agent
|
25
26
|
|
@@ -31,6 +32,42 @@ module Jekyll
|
|
31
32
|
@index
|
32
33
|
end
|
33
34
|
|
35
|
+
# Public: Returns the Algolia index used to store object ids
|
36
|
+
def self.index_object_ids
|
37
|
+
@index_object_ids
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: Check if an index exists
|
41
|
+
#
|
42
|
+
# index - Index to check
|
43
|
+
#
|
44
|
+
# Note: there is no API endpoint to do that, so we try to get the settings
|
45
|
+
# instead, which will fail if the index does not exist
|
46
|
+
def self.index_exist?(index)
|
47
|
+
index.get_settings
|
48
|
+
true
|
49
|
+
rescue StandardError
|
50
|
+
false
|
51
|
+
end
|
52
|
+
|
53
|
+
# Public: Get the number of records in an index
|
54
|
+
#
|
55
|
+
# index - Index to check
|
56
|
+
#
|
57
|
+
# Note: We'll do an empty query search, to match everything, but we'll
|
58
|
+
# only return the objectID and one element, to get the shortest response
|
59
|
+
# possible. It will still contain the nbHits
|
60
|
+
def self.record_count(index)
|
61
|
+
index.search(
|
62
|
+
'',
|
63
|
+
attributesToRetrieve: 'objectID',
|
64
|
+
distinct: false,
|
65
|
+
hitsPerPage: 1
|
66
|
+
)['nbHits']
|
67
|
+
rescue StandardError
|
68
|
+
0
|
69
|
+
end
|
70
|
+
|
34
71
|
# Public: Set the User-Agent to send to the API
|
35
72
|
#
|
36
73
|
# Every integrations should follow the "YYY Integration" pattern, and
|
@@ -48,27 +85,71 @@ module Jekyll
|
|
48
85
|
::Algolia.set_extra_header('User-Agent', user_agent)
|
49
86
|
end
|
50
87
|
|
51
|
-
# Public:
|
88
|
+
# Public: Get an array of all object IDs stored in the main index
|
52
89
|
#
|
53
|
-
#
|
54
|
-
#
|
55
|
-
def self.
|
90
|
+
# Note: As this will be slow (grabbing them 1000 at a time), we display
|
91
|
+
# a progress bar.
|
92
|
+
def self.remote_object_ids_from_main_index
|
93
|
+
Logger.verbose("I:Inspecting existing records in index #{index.name}")
|
94
|
+
|
56
95
|
list = []
|
57
|
-
|
58
|
-
|
96
|
+
|
97
|
+
# As it might take some time, we display a progress bar
|
98
|
+
progress_bar = ProgressBar.create(
|
99
|
+
total: record_count(index),
|
100
|
+
format: 'Inspecting existing records (%j%%) |%B|'
|
59
101
|
)
|
60
102
|
begin
|
61
|
-
index.browse(
|
103
|
+
index.browse(
|
104
|
+
attributesToRetrieve: 'objectID',
|
105
|
+
hitsPerPage: 1000
|
106
|
+
) do |hit|
|
62
107
|
list << hit['objectID']
|
108
|
+
progress_bar.increment
|
63
109
|
end
|
64
110
|
rescue StandardError
|
65
|
-
# The index might not exist if it's the first time we use the plugin
|
66
|
-
# so we'll consider that it means there are no records there
|
67
111
|
return []
|
68
112
|
end
|
113
|
+
|
69
114
|
list.sort
|
70
115
|
end
|
71
116
|
|
117
|
+
# Public: Get an array of all the object ids, stored in the dedicated
|
118
|
+
# index
|
119
|
+
#
|
120
|
+
# Note: This will be very fast. Each record contain 100 object id, so it
|
121
|
+
# will fit in one call each time.
|
122
|
+
def self.remote_object_ids_from_dedicated_index
|
123
|
+
list = []
|
124
|
+
begin
|
125
|
+
index_object_ids.browse(
|
126
|
+
attributesToRetrieve: 'content',
|
127
|
+
hitsPerPage: 1000
|
128
|
+
) do |hit|
|
129
|
+
list += hit['content']
|
130
|
+
end
|
131
|
+
rescue StandardError
|
132
|
+
return []
|
133
|
+
end
|
134
|
+
|
135
|
+
list.sort
|
136
|
+
end
|
137
|
+
|
138
|
+
# Public: Returns an array of all the objectIDs in the index
|
139
|
+
#
|
140
|
+
# Note: We use a dedicated index to store the objectIDs for faster
|
141
|
+
# browsing, but if the index does not exist we read the main index.
|
142
|
+
def self.remote_object_ids
|
143
|
+
Logger.log('I:Getting list of existing records')
|
144
|
+
|
145
|
+
# Fast version, using the dedicated index
|
146
|
+
has_dedicated_index = index_exist?(index_object_ids)
|
147
|
+
return remote_object_ids_from_dedicated_index if has_dedicated_index
|
148
|
+
|
149
|
+
# Slow version, browsing the full index
|
150
|
+
remote_object_ids_from_main_index
|
151
|
+
end
|
152
|
+
|
72
153
|
# Public: Returns an array of the local objectIDs
|
73
154
|
#
|
74
155
|
# records - Array of all local records
|
@@ -78,38 +159,84 @@ module Jekyll
|
|
78
159
|
|
79
160
|
# Public: Update records of the index
|
80
161
|
#
|
81
|
-
#
|
82
|
-
# new_records - Records to add to the index
|
162
|
+
# records - All records extracted from Jekyll
|
83
163
|
#
|
84
164
|
# Note: All operations will be done in one batch, assuring an atomic
|
85
165
|
# update
|
86
166
|
# Does nothing in dry run mode
|
87
|
-
def self.update_records(
|
167
|
+
def self.update_records(records)
|
168
|
+
# Getting list of objectID in remote and locally
|
169
|
+
remote_ids = remote_object_ids
|
170
|
+
local_ids = local_object_ids(records)
|
171
|
+
|
172
|
+
# Making a diff, to see what to add and what to delete
|
173
|
+
ids_to_delete = remote_ids - local_ids
|
174
|
+
ids_to_add = local_ids - remote_ids
|
175
|
+
|
176
|
+
# What changes should we do to the indexes?
|
177
|
+
has_records_to_update = !ids_to_delete.empty? || !ids_to_add.empty?
|
178
|
+
has_dedicated_index = index_exist?(index_object_ids)
|
179
|
+
|
88
180
|
# Stop if nothing to change
|
89
|
-
if
|
181
|
+
if !has_records_to_update && has_dedicated_index
|
90
182
|
Logger.log('I:Content is already up to date.')
|
91
183
|
return
|
92
184
|
end
|
93
185
|
|
94
|
-
|
95
|
-
Logger.log("I:Records to delete: #{old_records_ids.length}")
|
96
|
-
Logger.log("I:Records to add: #{new_records.length}")
|
97
|
-
return if Configurator.dry_run?
|
98
|
-
|
99
|
-
# We group delete and add operations into the same batch. Delete
|
100
|
-
# operations should still come first, to avoid hitting an overquota too
|
101
|
-
# soon
|
186
|
+
# We group all operations into one batch
|
102
187
|
operations = []
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
}
|
188
|
+
|
189
|
+
# We update records only if there are records to update
|
190
|
+
if has_records_to_update
|
191
|
+
Logger.log("I:Updating records in index #{index.name}...")
|
192
|
+
Logger.log("I:Records to delete: #{ids_to_delete.length}")
|
193
|
+
Logger.log("I:Records to add: #{ids_to_add.length}")
|
194
|
+
|
195
|
+
# Transforming ids into real records to add
|
196
|
+
records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
|
197
|
+
records_to_add = ids_to_add.map { |id| records_by_id[id] }
|
198
|
+
|
199
|
+
# Deletion operations come first, to avoid hitting an overquota too
|
200
|
+
# soon if it can be avoided
|
201
|
+
ids_to_delete.each do |object_id|
|
202
|
+
operations << {
|
203
|
+
action: 'deleteObject', indexName: index.name,
|
204
|
+
body: { objectID: object_id }
|
205
|
+
}
|
206
|
+
end
|
207
|
+
# Then we add the new records
|
208
|
+
operations += records_to_add.map do |new_record|
|
209
|
+
{ action: 'addObject', indexName: index.name, body: new_record }
|
210
|
+
end
|
108
211
|
end
|
109
|
-
|
110
|
-
|
212
|
+
|
213
|
+
# We update the dedicated index everytime we update records, but we also
|
214
|
+
# create it if it does not exist
|
215
|
+
should_update_dedicated_index = has_records_to_update ||
|
216
|
+
!has_dedicated_index
|
217
|
+
if should_update_dedicated_index
|
218
|
+
operations << { action: 'clear', indexName: index_object_ids.name }
|
219
|
+
local_ids.each_slice(100).each do |ids|
|
220
|
+
operations << {
|
221
|
+
action: 'addObject', indexName: index_object_ids.name,
|
222
|
+
body: { content: ids }
|
223
|
+
}
|
224
|
+
end
|
111
225
|
end
|
112
226
|
|
227
|
+
execute_operations(operations)
|
228
|
+
end
|
229
|
+
|
230
|
+
# Public: Execute a serie of operations in a batch
|
231
|
+
#
|
232
|
+
# operations - Operations to batch
|
233
|
+
#
|
234
|
+
# Note: Will split the batch in several calls if too big, and will display
|
235
|
+
# a progress bar if this happens
|
236
|
+
def self.execute_operations(operations)
|
237
|
+
return if Configurator.dry_run?
|
238
|
+
return if operations.empty?
|
239
|
+
|
113
240
|
# Run the batches in slices if they are too large
|
114
241
|
batch_size = Configurator.algolia('indexing_batch_size')
|
115
242
|
slices = operations.each_slice(batch_size).to_a
|
@@ -118,7 +245,7 @@ module Jekyll
|
|
118
245
|
if should_have_progress_bar
|
119
246
|
progress_bar = ProgressBar.create(
|
120
247
|
total: slices.length,
|
121
|
-
format: '
|
248
|
+
format: 'Updating index (%j%%) |%B|'
|
122
249
|
)
|
123
250
|
end
|
124
251
|
|
@@ -128,10 +255,7 @@ module Jekyll
|
|
128
255
|
|
129
256
|
progress_bar.increment if should_have_progress_bar
|
130
257
|
rescue StandardError => error
|
131
|
-
|
132
|
-
record[:body]
|
133
|
-
end
|
134
|
-
ErrorHandler.stop(error, records: records)
|
258
|
+
ErrorHandler.stop(error, operations: slice)
|
135
259
|
end
|
136
260
|
end
|
137
261
|
end
|
@@ -253,23 +377,12 @@ module Jekyll
|
|
253
377
|
exit 1
|
254
378
|
end
|
255
379
|
|
256
|
-
# Update settings
|
257
380
|
update_settings
|
258
|
-
|
259
|
-
# Getting list of objectID in remote and locally
|
260
|
-
remote_ids = remote_object_ids
|
261
|
-
local_ids = local_object_ids(records)
|
262
|
-
|
263
|
-
# Getting list of what to add and what to delete
|
264
|
-
old_records_ids = remote_ids - local_ids
|
265
|
-
new_records_ids = local_ids - remote_ids
|
266
|
-
new_records = records.select do |record|
|
267
|
-
new_records_ids.include?(record[:objectID])
|
268
|
-
end
|
269
|
-
update_records(old_records_ids, new_records)
|
381
|
+
update_records(records)
|
270
382
|
|
271
383
|
Logger.log('I:✔ Indexing complete')
|
272
384
|
end
|
273
385
|
end
|
274
386
|
end
|
275
387
|
end
|
388
|
+
# rubocop:enable Metrics/ModuleLength
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-algolia
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Carry
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: algolia_html_extractor
|
@@ -292,6 +292,7 @@ files:
|
|
292
292
|
- lib/errors/no_records_found.txt
|
293
293
|
- lib/errors/record_too_big.txt
|
294
294
|
- lib/errors/settings_manually_edited.txt
|
295
|
+
- lib/errors/too_many_records.txt
|
295
296
|
- lib/errors/unknown_application_id.txt
|
296
297
|
- lib/errors/unknown_settings.txt
|
297
298
|
- lib/jekyll-algolia.rb
|