jekyll-algolia 1.2.7 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6243fd7aa41bd8a50531f03d81c2162ff2c0c8a1
4
- data.tar.gz: 16da8529592e1cf701cce7781588486d41a11390
3
+ metadata.gz: cf200adeeada4a57c74a46db83ca98f29fe69bde
4
+ data.tar.gz: 4098806de6e5cc74d79021b99bc8875fad5cacee
5
5
  SHA512:
6
- metadata.gz: b95eb1944bca76b4fd0520f3f9ad6c29b0763c515dc0160edfb2a8e5086b9da1b9827741e258f5b27334efef4e49e4c86235dd02c2f8a4ad69f95b30dc254a2e
7
- data.tar.gz: e40caac42d9d8793bc2d15025c0a3f017b9ee04235d84bfd4bb4fb2b6e392159edb7b290333752e373ec96931c7f3ef7ef7b46e2ea4ddeb1b3e826b16db262cb
6
+ metadata.gz: 25b04ae1214a048234d7ad9512a00939cc99fcc4d35e025a08bdfd826c4267d355f21d67a2e559fde018d25a5fd45cb611c27cc4d6c846f0641ead7c034a7107
7
+ data.tar.gz: dd36674e3de277bf3fa62daf3b24fb1e165d68979f8ad6b2935235587779d660e4ae0ffb29453168fde47a75c29890a08cb5061a4c2bc54606d04a72fd581f4d
@@ -3,6 +3,9 @@ E:
3
3
  E:The jekyll-algolia plugin could not connect to your application ID using the API key your provided.
4
4
  W:
5
5
  W:Make sure your API key has access to your {application_id} application.
6
+ W:It should also have the rights to push to the following indices:
7
+ W:   - {index_name}
8
+ W:   - {index_object_ids_name}
6
9
  I:
7
10
  I:You can find your API key in your Algolia dashboard here:
8
11
  I:   https://www.algolia.com/licensing
@@ -0,0 +1,14 @@
1
+ E:[✗ Error] Too many records
2
+ E:
3
+ E:The jekyll-algolia plugin could not push your records because it exceeds the maximum number of records allowed in your current plan.
4
+ W:
5
+ W:Community plans can host up to 10k records and Essential plans starts at 50k.
6
+ W:
7
+ W:Check our pricing page for more details:
8
+ W:   https://www.algolia.com/pricing
9
+ W:
10
+ I:You might want to upgrade your plan or exclude records from indexing using the `files_to_exclude` option:
11
+ I:   https://community.algolia.com/jekyll-algolia/options.html#files-to-exclude
12
+ I:
13
+ I:If you're having trouble solving this issue, feel free to file a bug on GitHub, ideally with a link to a repository where we can reproduce the issue as well as the APPID you're trying to push to.
14
+ I:  https://github.com/algolia/jekyll-algolia/issues
@@ -148,6 +148,11 @@ module Jekyll
148
148
  ENV['ALGOLIA_INDEX_NAME'] || algolia('index_name')
149
149
  end
150
150
 
151
+ # Public: Return the name of the index used to store the object ids
152
+ def self.index_object_ids_name
153
+ "#{index_name}_object_ids"
154
+ end
155
+
151
156
  # Public: Get the index settings
152
157
  #
153
158
  # This will be a merge of default settings and the one defined in the
@@ -48,7 +48,8 @@ module Jekyll
48
48
  unknown_application_id
49
49
  invalid_credentials
50
50
  record_too_big
51
- unknown_settings
51
+ too_many_records
52
+ unknown_setting
52
53
  invalid_index_name
53
54
  ]
54
55
 
@@ -134,6 +135,24 @@ module Jekyll
134
135
  hash
135
136
  end
136
137
 
138
+ # Public: Returns a string explaining which attributes are the largest in
139
+ # the record
140
+ #
141
+ # record - The record hash to analyze
142
+ #
143
+ # This will be used on the `record_too_big` error, to guide users in
144
+ # finding which record is causing trouble
145
+ def self.readable_largest_record_keys(record)
146
+ keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
147
+ largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
148
+ output = []
149
+ largest_keys.each do |key, size|
150
+ size = Filesize.from("#{size} B").to_s('Kb')
151
+ output << "#{key} (#{size})"
152
+ end
153
+ output.join(', ')
154
+ end
155
+
137
156
  # Public: Check if the application id is available
138
157
  #
139
158
  # _context - Not used
@@ -161,34 +180,19 @@ module Jekyll
161
180
  # Application ID and API key submitted don't match any credentials known
162
181
  def self.invalid_credentials?(error, _context = {})
163
182
  details = error_hash(error.message)
183
+ return false if details == false
164
184
 
165
185
  if details['message'] != 'Invalid Application-ID or API key'
166
186
  return false
167
187
  end
168
188
 
169
189
  {
170
- 'application_id' => details['application_id']
190
+ 'application_id' => details['application_id'],
191
+ 'index_name' => Configurator.index_name,
192
+ 'index_object_ids_name' => Configurator.index_object_ids_name
171
193
  }
172
194
  end
173
195
 
174
- # Public: Returns a string explaining which attributes are the largest in
175
- # the record
176
- #
177
- # record - The record hash to analyze
178
- #
179
- # This will be used on the `record_too_big` error, to guide users in
180
- # finding which record is causing trouble
181
- def self.readable_largest_record_keys(record)
182
- keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
183
- largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
184
- output = []
185
- largest_keys.each do |key, size|
186
- size = Filesize.from("#{size} B").to_s('Kb')
187
- output << "#{key} (#{size})"
188
- end
189
- output.join(', ')
190
- end
191
-
192
196
  # Public: Check if the sent records are not too big
193
197
  #
194
198
  # context[:records] - list of records sent in the batch
@@ -198,6 +202,7 @@ module Jekyll
198
202
  # informations about it so the user can debug it.
199
203
  def self.record_too_big?(error, context = {})
200
204
  details = error_hash(error.message)
205
+ return false if details == false
201
206
 
202
207
  message = details['message']
203
208
  return false if message !~ /^Record .* is too big .*/
@@ -207,8 +212,11 @@ module Jekyll
207
212
  size = Filesize.from("#{size} B").to_s('Kb')
208
213
  object_id = details['objectID']
209
214
 
210
- # Getting record details
211
- record = Utils.find_by_key(context[:records], :objectID, object_id)
215
+ # Finding the record in all the operations
216
+ operation = context[:operations].find do |o|
217
+ o[:action] == 'addObject' && o[:body][:objectID] == object_id
218
+ end
219
+ record = operation[:body]
212
220
  probable_wrong_keys = readable_largest_record_keys(record)
213
221
 
214
222
  # Writing the full record to disk for inspection
@@ -236,8 +244,9 @@ module Jekyll
236
244
  # The API will block any call that tries to update a setting value that is
237
245
  # not available. We'll tell the user which one so they can fix their
238
246
  # issue.
239
- def self.unknown_settings?(error, context = {})
247
+ def self.unknown_setting?(error, context = {})
240
248
  details = error_hash(error.message)
249
+ return false if details == false
241
250
 
242
251
  message = details['message']
243
252
  return false if message !~ /^Invalid object attributes.*/
@@ -258,6 +267,7 @@ module Jekyll
258
267
  # Some characters are forbidden in index names
259
268
  def self.invalid_index_name?(error, _context = {})
260
269
  details = error_hash(error.message)
270
+ return false if details == false
261
271
 
262
272
  message = details['message']
263
273
  return false if message !~ /^indexName is not valid.*/
@@ -266,6 +276,19 @@ module Jekyll
266
276
  'index_name' => Configurator.index_name
267
277
  }
268
278
  end
279
+
280
+ # Public: Check if the application has too many records
281
+ #
282
+ # We're trying to push too many records and it goes over quota
283
+ def self.too_many_records?(error, _context = {})
284
+ details = error_hash(error.message)
285
+ return false if details == false
286
+
287
+ message = details['message']
288
+ return false if message !~ /^Record quota exceeded.*/
289
+
290
+ {}
291
+ end
269
292
  end
270
293
  end
271
294
  end
@@ -4,6 +4,7 @@ require 'algoliasearch'
4
4
  require 'yaml'
5
5
  require 'algolia_html_extractor'
6
6
 
7
+ # rubocop:disable Metrics/ModuleLength
7
8
  module Jekyll
8
9
  module Algolia
9
10
  # Module to push records to Algolia and configure the index
@@ -11,15 +12,15 @@ module Jekyll
11
12
  include Jekyll::Algolia
12
13
 
13
14
  # Public: Init the module
14
- #
15
- # This call will instanciate the Algolia API client, set the custom
16
- # User Agent and give an easy access to the main index
17
15
  def self.init
18
16
  ::Algolia.init(
19
17
  application_id: Configurator.application_id,
20
18
  api_key: Configurator.api_key
21
19
  )
22
- @index = ::Algolia::Index.new(Configurator.index_name)
20
+ index_name = Configurator.index_name
21
+ @index = ::Algolia::Index.new(index_name)
22
+ index_object_ids_name = Configurator.index_object_ids_name
23
+ @index_object_ids = ::Algolia::Index.new(index_object_ids_name)
23
24
 
24
25
  set_user_agent
25
26
 
@@ -31,6 +32,42 @@ module Jekyll
31
32
  @index
32
33
  end
33
34
 
35
+ # Public: Returns the Algolia index used to store object ids
36
+ def self.index_object_ids
37
+ @index_object_ids
38
+ end
39
+
40
+ # Public: Check if an index exists
41
+ #
42
+ # index - Index to check
43
+ #
44
+ # Note: there is no API endpoint to do that, so we try to get the settings
45
+ # instead, which will fail if the index does not exist
46
+ def self.index_exist?(index)
47
+ index.get_settings
48
+ true
49
+ rescue StandardError
50
+ false
51
+ end
52
+
53
+ # Public: Get the number of records in an index
54
+ #
55
+ # index - Index to check
56
+ #
57
+ # Note: We'll do an empty query search, to match everything, but we'll
58
+ # only return the objectID and one element, to get the shortest response
59
+ # possible. It will still contain the nbHits
60
+ def self.record_count(index)
61
+ index.search(
62
+ '',
63
+ attributesToRetrieve: 'objectID',
64
+ distinct: false,
65
+ hitsPerPage: 1
66
+ )['nbHits']
67
+ rescue StandardError
68
+ 0
69
+ end
70
+
34
71
  # Public: Set the User-Agent to send to the API
35
72
  #
36
73
  # Every integrations should follow the "YYY Integration" pattern, and
@@ -48,27 +85,71 @@ module Jekyll
48
85
  ::Algolia.set_extra_header('User-Agent', user_agent)
49
86
  end
50
87
 
51
- # Public: Returns an array of all the objectIDs in the index
88
+ # Public: Get an array of all object IDs stored in the main index
52
89
  #
53
- # The returned array is sorted. It won't have any impact on the way it is
54
- # processed, but makes debugging easier when comparing arrays is needed.
55
- def self.remote_object_ids
90
+ # Note: As this will be slow (grabbing them 1000 at a time), we display
91
+ # a progress bar.
92
+ def self.remote_object_ids_from_main_index
93
+ Logger.verbose("I:Inspecting existing records in index #{index.name}")
94
+
56
95
  list = []
57
- Logger.verbose(
58
- "I:Inspecting existing records in index #{index.name}..."
96
+
97
+ # As it might take some time, we display a progress bar
98
+ progress_bar = ProgressBar.create(
99
+ total: record_count(index),
100
+ format: 'Inspecting existing records (%j%%) |%B|'
59
101
  )
60
102
  begin
61
- index.browse(attributesToRetrieve: 'objectID') do |hit|
103
+ index.browse(
104
+ attributesToRetrieve: 'objectID',
105
+ hitsPerPage: 1000
106
+ ) do |hit|
62
107
  list << hit['objectID']
108
+ progress_bar.increment
63
109
  end
64
110
  rescue StandardError
65
- # The index might not exist if it's the first time we use the plugin
66
- # so we'll consider that it means there are no records there
67
111
  return []
68
112
  end
113
+
69
114
  list.sort
70
115
  end
71
116
 
117
+ # Public: Get an array of all the object ids, stored in the dedicated
118
+ # index
119
+ #
120
+ # Note: This will be very fast. Each record contain 100 object id, so it
121
+ # will fit in one call each time.
122
+ def self.remote_object_ids_from_dedicated_index
123
+ list = []
124
+ begin
125
+ index_object_ids.browse(
126
+ attributesToRetrieve: 'content',
127
+ hitsPerPage: 1000
128
+ ) do |hit|
129
+ list += hit['content']
130
+ end
131
+ rescue StandardError
132
+ return []
133
+ end
134
+
135
+ list.sort
136
+ end
137
+
138
+ # Public: Returns an array of all the objectIDs in the index
139
+ #
140
+ # Note: We use a dedicated index to store the objectIDs for faster
141
+ # browsing, but if the index does not exist we read the main index.
142
+ def self.remote_object_ids
143
+ Logger.log('I:Getting list of existing records')
144
+
145
+ # Fast version, using the dedicated index
146
+ has_dedicated_index = index_exist?(index_object_ids)
147
+ return remote_object_ids_from_dedicated_index if has_dedicated_index
148
+
149
+ # Slow version, browsing the full index
150
+ remote_object_ids_from_main_index
151
+ end
152
+
72
153
  # Public: Returns an array of the local objectIDs
73
154
  #
74
155
  # records - Array of all local records
@@ -78,38 +159,84 @@ module Jekyll
78
159
 
79
160
  # Public: Update records of the index
80
161
  #
81
- # old_records_ids - Ids of records to delete from the index
82
- # new_records - Records to add to the index
162
+ # records - All records extracted from Jekyll
83
163
  #
84
164
  # Note: All operations will be done in one batch, assuring an atomic
85
165
  # update
86
166
  # Does nothing in dry run mode
87
- def self.update_records(old_records_ids, new_records)
167
+ def self.update_records(records)
168
+ # Getting list of objectID in remote and locally
169
+ remote_ids = remote_object_ids
170
+ local_ids = local_object_ids(records)
171
+
172
+ # Making a diff, to see what to add and what to delete
173
+ ids_to_delete = remote_ids - local_ids
174
+ ids_to_add = local_ids - remote_ids
175
+
176
+ # What changes should we do to the indexes?
177
+ has_records_to_update = !ids_to_delete.empty? || !ids_to_add.empty?
178
+ has_dedicated_index = index_exist?(index_object_ids)
179
+
88
180
  # Stop if nothing to change
89
- if old_records_ids.empty? && new_records.empty?
181
+ if !has_records_to_update && has_dedicated_index
90
182
  Logger.log('I:Content is already up to date.')
91
183
  return
92
184
  end
93
185
 
94
- Logger.log("I:Updating records in index #{index.name}...")
95
- Logger.log("I:Records to delete: #{old_records_ids.length}")
96
- Logger.log("I:Records to add: #{new_records.length}")
97
- return if Configurator.dry_run?
98
-
99
- # We group delete and add operations into the same batch. Delete
100
- # operations should still come first, to avoid hitting an overquota too
101
- # soon
186
+ # We group all operations into one batch
102
187
  operations = []
103
- old_records_ids.each do |object_id|
104
- operations << {
105
- action: 'deleteObject', indexName: index.name,
106
- body: { objectID: object_id }
107
- }
188
+
189
+ # We update records only if there are records to update
190
+ if has_records_to_update
191
+ Logger.log("I:Updating records in index #{index.name}...")
192
+ Logger.log("I:Records to delete: #{ids_to_delete.length}")
193
+ Logger.log("I:Records to add: #{ids_to_add.length}")
194
+
195
+ # Transforming ids into real records to add
196
+ records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
197
+ records_to_add = ids_to_add.map { |id| records_by_id[id] }
198
+
199
+ # Deletion operations come first, to avoid hitting an overquota too
200
+ # soon if it can be avoided
201
+ ids_to_delete.each do |object_id|
202
+ operations << {
203
+ action: 'deleteObject', indexName: index.name,
204
+ body: { objectID: object_id }
205
+ }
206
+ end
207
+ # Then we add the new records
208
+ operations += records_to_add.map do |new_record|
209
+ { action: 'addObject', indexName: index.name, body: new_record }
210
+ end
108
211
  end
109
- operations += new_records.map do |new_record|
110
- { action: 'addObject', indexName: index.name, body: new_record }
212
+
213
+ # We update the dedicated index everytime we update records, but we also
214
+ # create it if it does not exist
215
+ should_update_dedicated_index = has_records_to_update ||
216
+ !has_dedicated_index
217
+ if should_update_dedicated_index
218
+ operations << { action: 'clear', indexName: index_object_ids.name }
219
+ local_ids.each_slice(100).each do |ids|
220
+ operations << {
221
+ action: 'addObject', indexName: index_object_ids.name,
222
+ body: { content: ids }
223
+ }
224
+ end
111
225
  end
112
226
 
227
+ execute_operations(operations)
228
+ end
229
+
230
+ # Public: Execute a serie of operations in a batch
231
+ #
232
+ # operations - Operations to batch
233
+ #
234
+ # Note: Will split the batch in several calls if too big, and will display
235
+ # a progress bar if this happens
236
+ def self.execute_operations(operations)
237
+ return if Configurator.dry_run?
238
+ return if operations.empty?
239
+
113
240
  # Run the batches in slices if they are too large
114
241
  batch_size = Configurator.algolia('indexing_batch_size')
115
242
  slices = operations.each_slice(batch_size).to_a
@@ -118,7 +245,7 @@ module Jekyll
118
245
  if should_have_progress_bar
119
246
  progress_bar = ProgressBar.create(
120
247
  total: slices.length,
121
- format: 'Pushing records (%j%%) |%B|'
248
+ format: 'Updating index (%j%%) |%B|'
122
249
  )
123
250
  end
124
251
 
@@ -128,10 +255,7 @@ module Jekyll
128
255
 
129
256
  progress_bar.increment if should_have_progress_bar
130
257
  rescue StandardError => error
131
- records = slice.map do |record|
132
- record[:body]
133
- end
134
- ErrorHandler.stop(error, records: records)
258
+ ErrorHandler.stop(error, operations: slice)
135
259
  end
136
260
  end
137
261
  end
@@ -253,23 +377,12 @@ module Jekyll
253
377
  exit 1
254
378
  end
255
379
 
256
- # Update settings
257
380
  update_settings
258
-
259
- # Getting list of objectID in remote and locally
260
- remote_ids = remote_object_ids
261
- local_ids = local_object_ids(records)
262
-
263
- # Getting list of what to add and what to delete
264
- old_records_ids = remote_ids - local_ids
265
- new_records_ids = local_ids - remote_ids
266
- new_records = records.select do |record|
267
- new_records_ids.include?(record[:objectID])
268
- end
269
- update_records(old_records_ids, new_records)
381
+ update_records(records)
270
382
 
271
383
  Logger.log('I:✔ Indexing complete')
272
384
  end
273
385
  end
274
386
  end
275
387
  end
388
+ # rubocop:enable Metrics/ModuleLength
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Jekyll
4
4
  module Algolia
5
- VERSION = '1.2.7'
5
+ VERSION = '1.3.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-algolia
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.7
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-28 00:00:00.000000000 Z
11
+ date: 2018-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: algolia_html_extractor
@@ -292,6 +292,7 @@ files:
292
292
  - lib/errors/no_records_found.txt
293
293
  - lib/errors/record_too_big.txt
294
294
  - lib/errors/settings_manually_edited.txt
295
+ - lib/errors/too_many_records.txt
295
296
  - lib/errors/unknown_application_id.txt
296
297
  - lib/errors/unknown_settings.txt
297
298
  - lib/jekyll-algolia.rb