jekyll-algolia 1.2.7 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/errors/invalid_credentials.txt +3 -0
- data/lib/errors/too_many_records.txt +14 -0
- data/lib/jekyll/algolia/configurator.rb +5 -0
- data/lib/jekyll/algolia/error_handler.rb +46 -23
- data/lib/jekyll/algolia/indexer.rb +163 -50
- data/lib/jekyll/algolia/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf200adeeada4a57c74a46db83ca98f29fe69bde
|
4
|
+
data.tar.gz: 4098806de6e5cc74d79021b99bc8875fad5cacee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25b04ae1214a048234d7ad9512a00939cc99fcc4d35e025a08bdfd826c4267d355f21d67a2e559fde018d25a5fd45cb611c27cc4d6c846f0641ead7c034a7107
|
7
|
+
data.tar.gz: dd36674e3de277bf3fa62daf3b24fb1e165d68979f8ad6b2935235587779d660e4ae0ffb29453168fde47a75c29890a08cb5061a4c2bc54606d04a72fd581f4d
|
@@ -3,6 +3,9 @@ E:
|
|
3
3
|
E:The jekyll-algolia plugin could not connect to your application ID using the API key your provided.
|
4
4
|
W:
|
5
5
|
W:Make sure your API key has access to your {application_id} application.
|
6
|
+
W:It should also have the rights to push to the following indices:
|
7
|
+
W: - {index_name}
|
8
|
+
W: - {index_object_ids_name}
|
6
9
|
I:
|
7
10
|
I:You can find your API key in your Algolia dashboard here:
|
8
11
|
I: https://www.algolia.com/licensing
|
@@ -0,0 +1,14 @@
|
|
1
|
+
E:[✗ Error] Too many records
|
2
|
+
E:
|
3
|
+
E:The jekyll-algolia plugin could not push your records because it exceeds the maximum number of records allowed in your current plan.
|
4
|
+
W:
|
5
|
+
W:Community plans can host up to 10k records and Essential plans starts at 50k.
|
6
|
+
W:
|
7
|
+
W:Check our pricing page for more details:
|
8
|
+
W: https://www.algolia.com/pricing
|
9
|
+
W:
|
10
|
+
I:You might want to upgrade your plan or exclude records from indexing using the `files_to_exclude` option:
|
11
|
+
I: https://community.algolia.com/jekyll-algolia/options.html#files-to-exclude
|
12
|
+
I:
|
13
|
+
I:If you're having trouble solving this issue, feel free to file a bug on GitHub, ideally with a link to a repository where we can reproduce the issue as well as the APPID you're trying to push to.
|
14
|
+
I: https://github.com/algolia/jekyll-algolia/issues
|
@@ -148,6 +148,11 @@ module Jekyll
|
|
148
148
|
ENV['ALGOLIA_INDEX_NAME'] || algolia('index_name')
|
149
149
|
end
|
150
150
|
|
151
|
+
# Public: Return the name of the index used to store the object ids
|
152
|
+
def self.index_object_ids_name
|
153
|
+
"#{index_name}_object_ids"
|
154
|
+
end
|
155
|
+
|
151
156
|
# Public: Get the index settings
|
152
157
|
#
|
153
158
|
# This will be a merge of default settings and the one defined in the
|
@@ -48,7 +48,8 @@ module Jekyll
|
|
48
48
|
unknown_application_id
|
49
49
|
invalid_credentials
|
50
50
|
record_too_big
|
51
|
-
|
51
|
+
too_many_records
|
52
|
+
unknown_setting
|
52
53
|
invalid_index_name
|
53
54
|
]
|
54
55
|
|
@@ -134,6 +135,24 @@ module Jekyll
|
|
134
135
|
hash
|
135
136
|
end
|
136
137
|
|
138
|
+
# Public: Returns a string explaining which attributes are the largest in
|
139
|
+
# the record
|
140
|
+
#
|
141
|
+
# record - The record hash to analyze
|
142
|
+
#
|
143
|
+
# This will be used on the `record_too_big` error, to guide users in
|
144
|
+
# finding which record is causing trouble
|
145
|
+
def self.readable_largest_record_keys(record)
|
146
|
+
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
|
147
|
+
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
|
148
|
+
output = []
|
149
|
+
largest_keys.each do |key, size|
|
150
|
+
size = Filesize.from("#{size} B").to_s('Kb')
|
151
|
+
output << "#{key} (#{size})"
|
152
|
+
end
|
153
|
+
output.join(', ')
|
154
|
+
end
|
155
|
+
|
137
156
|
# Public: Check if the application id is available
|
138
157
|
#
|
139
158
|
# _context - Not used
|
@@ -161,34 +180,19 @@ module Jekyll
|
|
161
180
|
# Application ID and API key submitted don't match any credentials known
|
162
181
|
def self.invalid_credentials?(error, _context = {})
|
163
182
|
details = error_hash(error.message)
|
183
|
+
return false if details == false
|
164
184
|
|
165
185
|
if details['message'] != 'Invalid Application-ID or API key'
|
166
186
|
return false
|
167
187
|
end
|
168
188
|
|
169
189
|
{
|
170
|
-
'application_id' => details['application_id']
|
190
|
+
'application_id' => details['application_id'],
|
191
|
+
'index_name' => Configurator.index_name,
|
192
|
+
'index_object_ids_name' => Configurator.index_object_ids_name
|
171
193
|
}
|
172
194
|
end
|
173
195
|
|
174
|
-
# Public: Returns a string explaining which attributes are the largest in
|
175
|
-
# the record
|
176
|
-
#
|
177
|
-
# record - The record hash to analyze
|
178
|
-
#
|
179
|
-
# This will be used on the `record_too_big` error, to guide users in
|
180
|
-
# finding which record is causing trouble
|
181
|
-
def self.readable_largest_record_keys(record)
|
182
|
-
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
|
183
|
-
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
|
184
|
-
output = []
|
185
|
-
largest_keys.each do |key, size|
|
186
|
-
size = Filesize.from("#{size} B").to_s('Kb')
|
187
|
-
output << "#{key} (#{size})"
|
188
|
-
end
|
189
|
-
output.join(', ')
|
190
|
-
end
|
191
|
-
|
192
196
|
# Public: Check if the sent records are not too big
|
193
197
|
#
|
194
198
|
# context[:records] - list of records sent in the batch
|
@@ -198,6 +202,7 @@ module Jekyll
|
|
198
202
|
# informations about it so the user can debug it.
|
199
203
|
def self.record_too_big?(error, context = {})
|
200
204
|
details = error_hash(error.message)
|
205
|
+
return false if details == false
|
201
206
|
|
202
207
|
message = details['message']
|
203
208
|
return false if message !~ /^Record .* is too big .*/
|
@@ -207,8 +212,11 @@ module Jekyll
|
|
207
212
|
size = Filesize.from("#{size} B").to_s('Kb')
|
208
213
|
object_id = details['objectID']
|
209
214
|
|
210
|
-
#
|
211
|
-
|
215
|
+
# Finding the record in all the operations
|
216
|
+
operation = context[:operations].find do |o|
|
217
|
+
o[:action] == 'addObject' && o[:body][:objectID] == object_id
|
218
|
+
end
|
219
|
+
record = operation[:body]
|
212
220
|
probable_wrong_keys = readable_largest_record_keys(record)
|
213
221
|
|
214
222
|
# Writing the full record to disk for inspection
|
@@ -236,8 +244,9 @@ module Jekyll
|
|
236
244
|
# The API will block any call that tries to update a setting value that is
|
237
245
|
# not available. We'll tell the user which one so they can fix their
|
238
246
|
# issue.
|
239
|
-
def self.
|
247
|
+
def self.unknown_setting?(error, context = {})
|
240
248
|
details = error_hash(error.message)
|
249
|
+
return false if details == false
|
241
250
|
|
242
251
|
message = details['message']
|
243
252
|
return false if message !~ /^Invalid object attributes.*/
|
@@ -258,6 +267,7 @@ module Jekyll
|
|
258
267
|
# Some characters are forbidden in index names
|
259
268
|
def self.invalid_index_name?(error, _context = {})
|
260
269
|
details = error_hash(error.message)
|
270
|
+
return false if details == false
|
261
271
|
|
262
272
|
message = details['message']
|
263
273
|
return false if message !~ /^indexName is not valid.*/
|
@@ -266,6 +276,19 @@ module Jekyll
|
|
266
276
|
'index_name' => Configurator.index_name
|
267
277
|
}
|
268
278
|
end
|
279
|
+
|
280
|
+
# Public: Check if the application has too many records
|
281
|
+
#
|
282
|
+
# We're trying to push too many records and it goes over quota
|
283
|
+
def self.too_many_records?(error, _context = {})
|
284
|
+
details = error_hash(error.message)
|
285
|
+
return false if details == false
|
286
|
+
|
287
|
+
message = details['message']
|
288
|
+
return false if message !~ /^Record quota exceeded.*/
|
289
|
+
|
290
|
+
{}
|
291
|
+
end
|
269
292
|
end
|
270
293
|
end
|
271
294
|
end
|
@@ -4,6 +4,7 @@ require 'algoliasearch'
|
|
4
4
|
require 'yaml'
|
5
5
|
require 'algolia_html_extractor'
|
6
6
|
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
7
8
|
module Jekyll
|
8
9
|
module Algolia
|
9
10
|
# Module to push records to Algolia and configure the index
|
@@ -11,15 +12,15 @@ module Jekyll
|
|
11
12
|
include Jekyll::Algolia
|
12
13
|
|
13
14
|
# Public: Init the module
|
14
|
-
#
|
15
|
-
# This call will instanciate the Algolia API client, set the custom
|
16
|
-
# User Agent and give an easy access to the main index
|
17
15
|
def self.init
|
18
16
|
::Algolia.init(
|
19
17
|
application_id: Configurator.application_id,
|
20
18
|
api_key: Configurator.api_key
|
21
19
|
)
|
22
|
-
|
20
|
+
index_name = Configurator.index_name
|
21
|
+
@index = ::Algolia::Index.new(index_name)
|
22
|
+
index_object_ids_name = Configurator.index_object_ids_name
|
23
|
+
@index_object_ids = ::Algolia::Index.new(index_object_ids_name)
|
23
24
|
|
24
25
|
set_user_agent
|
25
26
|
|
@@ -31,6 +32,42 @@ module Jekyll
|
|
31
32
|
@index
|
32
33
|
end
|
33
34
|
|
35
|
+
# Public: Returns the Algolia index used to store object ids
|
36
|
+
def self.index_object_ids
|
37
|
+
@index_object_ids
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: Check if an index exists
|
41
|
+
#
|
42
|
+
# index - Index to check
|
43
|
+
#
|
44
|
+
# Note: there is no API endpoint to do that, so we try to get the settings
|
45
|
+
# instead, which will fail if the index does not exist
|
46
|
+
def self.index_exist?(index)
|
47
|
+
index.get_settings
|
48
|
+
true
|
49
|
+
rescue StandardError
|
50
|
+
false
|
51
|
+
end
|
52
|
+
|
53
|
+
# Public: Get the number of records in an index
|
54
|
+
#
|
55
|
+
# index - Index to check
|
56
|
+
#
|
57
|
+
# Note: We'll do an empty query search, to match everything, but we'll
|
58
|
+
# only return the objectID and one element, to get the shortest response
|
59
|
+
# possible. It will still contain the nbHits
|
60
|
+
def self.record_count(index)
|
61
|
+
index.search(
|
62
|
+
'',
|
63
|
+
attributesToRetrieve: 'objectID',
|
64
|
+
distinct: false,
|
65
|
+
hitsPerPage: 1
|
66
|
+
)['nbHits']
|
67
|
+
rescue StandardError
|
68
|
+
0
|
69
|
+
end
|
70
|
+
|
34
71
|
# Public: Set the User-Agent to send to the API
|
35
72
|
#
|
36
73
|
# Every integrations should follow the "YYY Integration" pattern, and
|
@@ -48,27 +85,71 @@ module Jekyll
|
|
48
85
|
::Algolia.set_extra_header('User-Agent', user_agent)
|
49
86
|
end
|
50
87
|
|
51
|
-
# Public:
|
88
|
+
# Public: Get an array of all object IDs stored in the main index
|
52
89
|
#
|
53
|
-
#
|
54
|
-
#
|
55
|
-
def self.
|
90
|
+
# Note: As this will be slow (grabbing them 1000 at a time), we display
|
91
|
+
# a progress bar.
|
92
|
+
def self.remote_object_ids_from_main_index
|
93
|
+
Logger.verbose("I:Inspecting existing records in index #{index.name}")
|
94
|
+
|
56
95
|
list = []
|
57
|
-
|
58
|
-
|
96
|
+
|
97
|
+
# As it might take some time, we display a progress bar
|
98
|
+
progress_bar = ProgressBar.create(
|
99
|
+
total: record_count(index),
|
100
|
+
format: 'Inspecting existing records (%j%%) |%B|'
|
59
101
|
)
|
60
102
|
begin
|
61
|
-
index.browse(
|
103
|
+
index.browse(
|
104
|
+
attributesToRetrieve: 'objectID',
|
105
|
+
hitsPerPage: 1000
|
106
|
+
) do |hit|
|
62
107
|
list << hit['objectID']
|
108
|
+
progress_bar.increment
|
63
109
|
end
|
64
110
|
rescue StandardError
|
65
|
-
# The index might not exist if it's the first time we use the plugin
|
66
|
-
# so we'll consider that it means there are no records there
|
67
111
|
return []
|
68
112
|
end
|
113
|
+
|
69
114
|
list.sort
|
70
115
|
end
|
71
116
|
|
117
|
+
# Public: Get an array of all the object ids, stored in the dedicated
|
118
|
+
# index
|
119
|
+
#
|
120
|
+
# Note: This will be very fast. Each record contain 100 object id, so it
|
121
|
+
# will fit in one call each time.
|
122
|
+
def self.remote_object_ids_from_dedicated_index
|
123
|
+
list = []
|
124
|
+
begin
|
125
|
+
index_object_ids.browse(
|
126
|
+
attributesToRetrieve: 'content',
|
127
|
+
hitsPerPage: 1000
|
128
|
+
) do |hit|
|
129
|
+
list += hit['content']
|
130
|
+
end
|
131
|
+
rescue StandardError
|
132
|
+
return []
|
133
|
+
end
|
134
|
+
|
135
|
+
list.sort
|
136
|
+
end
|
137
|
+
|
138
|
+
# Public: Returns an array of all the objectIDs in the index
|
139
|
+
#
|
140
|
+
# Note: We use a dedicated index to store the objectIDs for faster
|
141
|
+
# browsing, but if the index does not exist we read the main index.
|
142
|
+
def self.remote_object_ids
|
143
|
+
Logger.log('I:Getting list of existing records')
|
144
|
+
|
145
|
+
# Fast version, using the dedicated index
|
146
|
+
has_dedicated_index = index_exist?(index_object_ids)
|
147
|
+
return remote_object_ids_from_dedicated_index if has_dedicated_index
|
148
|
+
|
149
|
+
# Slow version, browsing the full index
|
150
|
+
remote_object_ids_from_main_index
|
151
|
+
end
|
152
|
+
|
72
153
|
# Public: Returns an array of the local objectIDs
|
73
154
|
#
|
74
155
|
# records - Array of all local records
|
@@ -78,38 +159,84 @@ module Jekyll
|
|
78
159
|
|
79
160
|
# Public: Update records of the index
|
80
161
|
#
|
81
|
-
#
|
82
|
-
# new_records - Records to add to the index
|
162
|
+
# records - All records extracted from Jekyll
|
83
163
|
#
|
84
164
|
# Note: All operations will be done in one batch, assuring an atomic
|
85
165
|
# update
|
86
166
|
# Does nothing in dry run mode
|
87
|
-
def self.update_records(
|
167
|
+
def self.update_records(records)
|
168
|
+
# Getting list of objectID in remote and locally
|
169
|
+
remote_ids = remote_object_ids
|
170
|
+
local_ids = local_object_ids(records)
|
171
|
+
|
172
|
+
# Making a diff, to see what to add and what to delete
|
173
|
+
ids_to_delete = remote_ids - local_ids
|
174
|
+
ids_to_add = local_ids - remote_ids
|
175
|
+
|
176
|
+
# What changes should we do to the indexes?
|
177
|
+
has_records_to_update = !ids_to_delete.empty? || !ids_to_add.empty?
|
178
|
+
has_dedicated_index = index_exist?(index_object_ids)
|
179
|
+
|
88
180
|
# Stop if nothing to change
|
89
|
-
if
|
181
|
+
if !has_records_to_update && has_dedicated_index
|
90
182
|
Logger.log('I:Content is already up to date.')
|
91
183
|
return
|
92
184
|
end
|
93
185
|
|
94
|
-
|
95
|
-
Logger.log("I:Records to delete: #{old_records_ids.length}")
|
96
|
-
Logger.log("I:Records to add: #{new_records.length}")
|
97
|
-
return if Configurator.dry_run?
|
98
|
-
|
99
|
-
# We group delete and add operations into the same batch. Delete
|
100
|
-
# operations should still come first, to avoid hitting an overquota too
|
101
|
-
# soon
|
186
|
+
# We group all operations into one batch
|
102
187
|
operations = []
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
}
|
188
|
+
|
189
|
+
# We update records only if there are records to update
|
190
|
+
if has_records_to_update
|
191
|
+
Logger.log("I:Updating records in index #{index.name}...")
|
192
|
+
Logger.log("I:Records to delete: #{ids_to_delete.length}")
|
193
|
+
Logger.log("I:Records to add: #{ids_to_add.length}")
|
194
|
+
|
195
|
+
# Transforming ids into real records to add
|
196
|
+
records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
|
197
|
+
records_to_add = ids_to_add.map { |id| records_by_id[id] }
|
198
|
+
|
199
|
+
# Deletion operations come first, to avoid hitting an overquota too
|
200
|
+
# soon if it can be avoided
|
201
|
+
ids_to_delete.each do |object_id|
|
202
|
+
operations << {
|
203
|
+
action: 'deleteObject', indexName: index.name,
|
204
|
+
body: { objectID: object_id }
|
205
|
+
}
|
206
|
+
end
|
207
|
+
# Then we add the new records
|
208
|
+
operations += records_to_add.map do |new_record|
|
209
|
+
{ action: 'addObject', indexName: index.name, body: new_record }
|
210
|
+
end
|
108
211
|
end
|
109
|
-
|
110
|
-
|
212
|
+
|
213
|
+
# We update the dedicated index everytime we update records, but we also
|
214
|
+
# create it if it does not exist
|
215
|
+
should_update_dedicated_index = has_records_to_update ||
|
216
|
+
!has_dedicated_index
|
217
|
+
if should_update_dedicated_index
|
218
|
+
operations << { action: 'clear', indexName: index_object_ids.name }
|
219
|
+
local_ids.each_slice(100).each do |ids|
|
220
|
+
operations << {
|
221
|
+
action: 'addObject', indexName: index_object_ids.name,
|
222
|
+
body: { content: ids }
|
223
|
+
}
|
224
|
+
end
|
111
225
|
end
|
112
226
|
|
227
|
+
execute_operations(operations)
|
228
|
+
end
|
229
|
+
|
230
|
+
# Public: Execute a serie of operations in a batch
|
231
|
+
#
|
232
|
+
# operations - Operations to batch
|
233
|
+
#
|
234
|
+
# Note: Will split the batch in several calls if too big, and will display
|
235
|
+
# a progress bar if this happens
|
236
|
+
def self.execute_operations(operations)
|
237
|
+
return if Configurator.dry_run?
|
238
|
+
return if operations.empty?
|
239
|
+
|
113
240
|
# Run the batches in slices if they are too large
|
114
241
|
batch_size = Configurator.algolia('indexing_batch_size')
|
115
242
|
slices = operations.each_slice(batch_size).to_a
|
@@ -118,7 +245,7 @@ module Jekyll
|
|
118
245
|
if should_have_progress_bar
|
119
246
|
progress_bar = ProgressBar.create(
|
120
247
|
total: slices.length,
|
121
|
-
format: '
|
248
|
+
format: 'Updating index (%j%%) |%B|'
|
122
249
|
)
|
123
250
|
end
|
124
251
|
|
@@ -128,10 +255,7 @@ module Jekyll
|
|
128
255
|
|
129
256
|
progress_bar.increment if should_have_progress_bar
|
130
257
|
rescue StandardError => error
|
131
|
-
|
132
|
-
record[:body]
|
133
|
-
end
|
134
|
-
ErrorHandler.stop(error, records: records)
|
258
|
+
ErrorHandler.stop(error, operations: slice)
|
135
259
|
end
|
136
260
|
end
|
137
261
|
end
|
@@ -253,23 +377,12 @@ module Jekyll
|
|
253
377
|
exit 1
|
254
378
|
end
|
255
379
|
|
256
|
-
# Update settings
|
257
380
|
update_settings
|
258
|
-
|
259
|
-
# Getting list of objectID in remote and locally
|
260
|
-
remote_ids = remote_object_ids
|
261
|
-
local_ids = local_object_ids(records)
|
262
|
-
|
263
|
-
# Getting list of what to add and what to delete
|
264
|
-
old_records_ids = remote_ids - local_ids
|
265
|
-
new_records_ids = local_ids - remote_ids
|
266
|
-
new_records = records.select do |record|
|
267
|
-
new_records_ids.include?(record[:objectID])
|
268
|
-
end
|
269
|
-
update_records(old_records_ids, new_records)
|
381
|
+
update_records(records)
|
270
382
|
|
271
383
|
Logger.log('I:✔ Indexing complete')
|
272
384
|
end
|
273
385
|
end
|
274
386
|
end
|
275
387
|
end
|
388
|
+
# rubocop:enable Metrics/ModuleLength
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-algolia
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Carry
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: algolia_html_extractor
|
@@ -292,6 +292,7 @@ files:
|
|
292
292
|
- lib/errors/no_records_found.txt
|
293
293
|
- lib/errors/record_too_big.txt
|
294
294
|
- lib/errors/settings_manually_edited.txt
|
295
|
+
- lib/errors/too_many_records.txt
|
295
296
|
- lib/errors/unknown_application_id.txt
|
296
297
|
- lib/errors/unknown_settings.txt
|
297
298
|
- lib/jekyll-algolia.rb
|