jekyll-algolia 1.2.7 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6243fd7aa41bd8a50531f03d81c2162ff2c0c8a1
4
- data.tar.gz: 16da8529592e1cf701cce7781588486d41a11390
3
+ metadata.gz: cf200adeeada4a57c74a46db83ca98f29fe69bde
4
+ data.tar.gz: 4098806de6e5cc74d79021b99bc8875fad5cacee
5
5
  SHA512:
6
- metadata.gz: b95eb1944bca76b4fd0520f3f9ad6c29b0763c515dc0160edfb2a8e5086b9da1b9827741e258f5b27334efef4e49e4c86235dd02c2f8a4ad69f95b30dc254a2e
7
- data.tar.gz: e40caac42d9d8793bc2d15025c0a3f017b9ee04235d84bfd4bb4fb2b6e392159edb7b290333752e373ec96931c7f3ef7ef7b46e2ea4ddeb1b3e826b16db262cb
6
+ metadata.gz: 25b04ae1214a048234d7ad9512a00939cc99fcc4d35e025a08bdfd826c4267d355f21d67a2e559fde018d25a5fd45cb611c27cc4d6c846f0641ead7c034a7107
7
+ data.tar.gz: dd36674e3de277bf3fa62daf3b24fb1e165d68979f8ad6b2935235587779d660e4ae0ffb29453168fde47a75c29890a08cb5061a4c2bc54606d04a72fd581f4d
@@ -3,6 +3,9 @@ E:
3
3
  E:The jekyll-algolia plugin could not connect to your application ID using the API key your provided.
4
4
  W:
5
5
  W:Make sure your API key has access to your {application_id} application.
6
+ W:It should also have the rights to push to the following indices:
7
+ W:   - {index_name}
8
+ W:   - {index_object_ids_name}
6
9
  I:
7
10
  I:You can find your API key in your Algolia dashboard here:
8
11
  I:   https://www.algolia.com/licensing
@@ -0,0 +1,14 @@
1
+ E:[✗ Error] Too many records
2
+ E:
3
+ E:The jekyll-algolia plugin could not push your records because it exceeds the maximum number of records allowed in your current plan.
4
+ W:
5
+ W:Community plans can host up to 10k records and Essential plans starts at 50k.
6
+ W:
7
+ W:Check our pricing page for more details:
8
+ W:   https://www.algolia.com/pricing
9
+ W:
10
+ I:You might want to upgrade your plan or exclude records from indexing using the `files_to_exclude` option:
11
+ I:   https://community.algolia.com/jekyll-algolia/options.html#files-to-exclude
12
+ I:
13
+ I:If you're having trouble solving this issue, feel free to file a bug on GitHub, ideally with a link to a repository where we can reproduce the issue as well as the APPID you're trying to push to.
14
+ I:  https://github.com/algolia/jekyll-algolia/issues
@@ -148,6 +148,11 @@ module Jekyll
148
148
  ENV['ALGOLIA_INDEX_NAME'] || algolia('index_name')
149
149
  end
150
150
 
151
+ # Public: Return the name of the index used to store the object ids
152
+ def self.index_object_ids_name
153
+ "#{index_name}_object_ids"
154
+ end
155
+
151
156
  # Public: Get the index settings
152
157
  #
153
158
  # This will be a merge of default settings and the one defined in the
@@ -48,7 +48,8 @@ module Jekyll
48
48
  unknown_application_id
49
49
  invalid_credentials
50
50
  record_too_big
51
- unknown_settings
51
+ too_many_records
52
+ unknown_setting
52
53
  invalid_index_name
53
54
  ]
54
55
 
@@ -134,6 +135,24 @@ module Jekyll
134
135
  hash
135
136
  end
136
137
 
138
+ # Public: Returns a string explaining which attributes are the largest in
139
+ # the record
140
+ #
141
+ # record - The record hash to analyze
142
+ #
143
+ # This will be used on the `record_too_big` error, to guide users in
144
+ # finding which record is causing trouble
145
+ def self.readable_largest_record_keys(record)
146
+ keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
147
+ largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
148
+ output = []
149
+ largest_keys.each do |key, size|
150
+ size = Filesize.from("#{size} B").to_s('Kb')
151
+ output << "#{key} (#{size})"
152
+ end
153
+ output.join(', ')
154
+ end
155
+
137
156
  # Public: Check if the application id is available
138
157
  #
139
158
  # _context - Not used
@@ -161,34 +180,19 @@ module Jekyll
161
180
  # Application ID and API key submitted don't match any credentials known
162
181
  def self.invalid_credentials?(error, _context = {})
163
182
  details = error_hash(error.message)
183
+ return false if details == false
164
184
 
165
185
  if details['message'] != 'Invalid Application-ID or API key'
166
186
  return false
167
187
  end
168
188
 
169
189
  {
170
- 'application_id' => details['application_id']
190
+ 'application_id' => details['application_id'],
191
+ 'index_name' => Configurator.index_name,
192
+ 'index_object_ids_name' => Configurator.index_object_ids_name
171
193
  }
172
194
  end
173
195
 
174
- # Public: Returns a string explaining which attributes are the largest in
175
- # the record
176
- #
177
- # record - The record hash to analyze
178
- #
179
- # This will be used on the `record_too_big` error, to guide users in
180
- # finding which record is causing trouble
181
- def self.readable_largest_record_keys(record)
182
- keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
183
- largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
184
- output = []
185
- largest_keys.each do |key, size|
186
- size = Filesize.from("#{size} B").to_s('Kb')
187
- output << "#{key} (#{size})"
188
- end
189
- output.join(', ')
190
- end
191
-
192
196
  # Public: Check if the sent records are not too big
193
197
  #
194
198
  # context[:records] - list of records sent in the batch
@@ -198,6 +202,7 @@ module Jekyll
198
202
  # informations about it so the user can debug it.
199
203
  def self.record_too_big?(error, context = {})
200
204
  details = error_hash(error.message)
205
+ return false if details == false
201
206
 
202
207
  message = details['message']
203
208
  return false if message !~ /^Record .* is too big .*/
@@ -207,8 +212,11 @@ module Jekyll
207
212
  size = Filesize.from("#{size} B").to_s('Kb')
208
213
  object_id = details['objectID']
209
214
 
210
- # Getting record details
211
- record = Utils.find_by_key(context[:records], :objectID, object_id)
215
+ # Finding the record in all the operations
216
+ operation = context[:operations].find do |o|
217
+ o[:action] == 'addObject' && o[:body][:objectID] == object_id
218
+ end
219
+ record = operation[:body]
212
220
  probable_wrong_keys = readable_largest_record_keys(record)
213
221
 
214
222
  # Writing the full record to disk for inspection
@@ -236,8 +244,9 @@ module Jekyll
236
244
  # The API will block any call that tries to update a setting value that is
237
245
  # not available. We'll tell the user which one so they can fix their
238
246
  # issue.
239
- def self.unknown_settings?(error, context = {})
247
+ def self.unknown_setting?(error, context = {})
240
248
  details = error_hash(error.message)
249
+ return false if details == false
241
250
 
242
251
  message = details['message']
243
252
  return false if message !~ /^Invalid object attributes.*/
@@ -258,6 +267,7 @@ module Jekyll
258
267
  # Some characters are forbidden in index names
259
268
  def self.invalid_index_name?(error, _context = {})
260
269
  details = error_hash(error.message)
270
+ return false if details == false
261
271
 
262
272
  message = details['message']
263
273
  return false if message !~ /^indexName is not valid.*/
@@ -266,6 +276,19 @@ module Jekyll
266
276
  'index_name' => Configurator.index_name
267
277
  }
268
278
  end
279
+
280
+ # Public: Check if the application has too many records
281
+ #
282
+ # We're trying to push too many records and it goes over quota
283
+ def self.too_many_records?(error, _context = {})
284
+ details = error_hash(error.message)
285
+ return false if details == false
286
+
287
+ message = details['message']
288
+ return false if message !~ /^Record quota exceeded.*/
289
+
290
+ {}
291
+ end
269
292
  end
270
293
  end
271
294
  end
@@ -4,6 +4,7 @@ require 'algoliasearch'
4
4
  require 'yaml'
5
5
  require 'algolia_html_extractor'
6
6
 
7
+ # rubocop:disable Metrics/ModuleLength
7
8
  module Jekyll
8
9
  module Algolia
9
10
  # Module to push records to Algolia and configure the index
@@ -11,15 +12,15 @@ module Jekyll
11
12
  include Jekyll::Algolia
12
13
 
13
14
  # Public: Init the module
14
- #
15
- # This call will instanciate the Algolia API client, set the custom
16
- # User Agent and give an easy access to the main index
17
15
  def self.init
18
16
  ::Algolia.init(
19
17
  application_id: Configurator.application_id,
20
18
  api_key: Configurator.api_key
21
19
  )
22
- @index = ::Algolia::Index.new(Configurator.index_name)
20
+ index_name = Configurator.index_name
21
+ @index = ::Algolia::Index.new(index_name)
22
+ index_object_ids_name = Configurator.index_object_ids_name
23
+ @index_object_ids = ::Algolia::Index.new(index_object_ids_name)
23
24
 
24
25
  set_user_agent
25
26
 
@@ -31,6 +32,42 @@ module Jekyll
31
32
  @index
32
33
  end
33
34
 
35
+ # Public: Returns the Algolia index used to store object ids
36
+ def self.index_object_ids
37
+ @index_object_ids
38
+ end
39
+
40
+ # Public: Check if an index exists
41
+ #
42
+ # index - Index to check
43
+ #
44
+ # Note: there is no API endpoint to do that, so we try to get the settings
45
+ # instead, which will fail if the index does not exist
46
+ def self.index_exist?(index)
47
+ index.get_settings
48
+ true
49
+ rescue StandardError
50
+ false
51
+ end
52
+
53
+ # Public: Get the number of records in an index
54
+ #
55
+ # index - Index to check
56
+ #
57
+ # Note: We'll do an empty query search, to match everything, but we'll
58
+ # only return the objectID and one element, to get the shortest response
59
+ # possible. It will still contain the nbHits
60
+ def self.record_count(index)
61
+ index.search(
62
+ '',
63
+ attributesToRetrieve: 'objectID',
64
+ distinct: false,
65
+ hitsPerPage: 1
66
+ )['nbHits']
67
+ rescue StandardError
68
+ 0
69
+ end
70
+
34
71
  # Public: Set the User-Agent to send to the API
35
72
  #
36
73
  # Every integrations should follow the "YYY Integration" pattern, and
@@ -48,27 +85,71 @@ module Jekyll
48
85
  ::Algolia.set_extra_header('User-Agent', user_agent)
49
86
  end
50
87
 
51
- # Public: Returns an array of all the objectIDs in the index
88
+ # Public: Get an array of all object IDs stored in the main index
52
89
  #
53
- # The returned array is sorted. It won't have any impact on the way it is
54
- # processed, but makes debugging easier when comparing arrays is needed.
55
- def self.remote_object_ids
90
+ # Note: As this will be slow (grabbing them 1000 at a time), we display
91
+ # a progress bar.
92
+ def self.remote_object_ids_from_main_index
93
+ Logger.verbose("I:Inspecting existing records in index #{index.name}")
94
+
56
95
  list = []
57
- Logger.verbose(
58
- "I:Inspecting existing records in index #{index.name}..."
96
+
97
+ # As it might take some time, we display a progress bar
98
+ progress_bar = ProgressBar.create(
99
+ total: record_count(index),
100
+ format: 'Inspecting existing records (%j%%) |%B|'
59
101
  )
60
102
  begin
61
- index.browse(attributesToRetrieve: 'objectID') do |hit|
103
+ index.browse(
104
+ attributesToRetrieve: 'objectID',
105
+ hitsPerPage: 1000
106
+ ) do |hit|
62
107
  list << hit['objectID']
108
+ progress_bar.increment
63
109
  end
64
110
  rescue StandardError
65
- # The index might not exist if it's the first time we use the plugin
66
- # so we'll consider that it means there are no records there
67
111
  return []
68
112
  end
113
+
69
114
  list.sort
70
115
  end
71
116
 
117
+ # Public: Get an array of all the object ids, stored in the dedicated
118
+ # index
119
+ #
120
+ # Note: This will be very fast. Each record contain 100 object id, so it
121
+ # will fit in one call each time.
122
+ def self.remote_object_ids_from_dedicated_index
123
+ list = []
124
+ begin
125
+ index_object_ids.browse(
126
+ attributesToRetrieve: 'content',
127
+ hitsPerPage: 1000
128
+ ) do |hit|
129
+ list += hit['content']
130
+ end
131
+ rescue StandardError
132
+ return []
133
+ end
134
+
135
+ list.sort
136
+ end
137
+
138
+ # Public: Returns an array of all the objectIDs in the index
139
+ #
140
+ # Note: We use a dedicated index to store the objectIDs for faster
141
+ # browsing, but if the index does not exist we read the main index.
142
+ def self.remote_object_ids
143
+ Logger.log('I:Getting list of existing records')
144
+
145
+ # Fast version, using the dedicated index
146
+ has_dedicated_index = index_exist?(index_object_ids)
147
+ return remote_object_ids_from_dedicated_index if has_dedicated_index
148
+
149
+ # Slow version, browsing the full index
150
+ remote_object_ids_from_main_index
151
+ end
152
+
72
153
  # Public: Returns an array of the local objectIDs
73
154
  #
74
155
  # records - Array of all local records
@@ -78,38 +159,84 @@ module Jekyll
78
159
 
79
160
  # Public: Update records of the index
80
161
  #
81
- # old_records_ids - Ids of records to delete from the index
82
- # new_records - Records to add to the index
162
+ # records - All records extracted from Jekyll
83
163
  #
84
164
  # Note: All operations will be done in one batch, assuring an atomic
85
165
  # update
86
166
  # Does nothing in dry run mode
87
- def self.update_records(old_records_ids, new_records)
167
+ def self.update_records(records)
168
+ # Getting list of objectID in remote and locally
169
+ remote_ids = remote_object_ids
170
+ local_ids = local_object_ids(records)
171
+
172
+ # Making a diff, to see what to add and what to delete
173
+ ids_to_delete = remote_ids - local_ids
174
+ ids_to_add = local_ids - remote_ids
175
+
176
+ # What changes should we do to the indexes?
177
+ has_records_to_update = !ids_to_delete.empty? || !ids_to_add.empty?
178
+ has_dedicated_index = index_exist?(index_object_ids)
179
+
88
180
  # Stop if nothing to change
89
- if old_records_ids.empty? && new_records.empty?
181
+ if !has_records_to_update && has_dedicated_index
90
182
  Logger.log('I:Content is already up to date.')
91
183
  return
92
184
  end
93
185
 
94
- Logger.log("I:Updating records in index #{index.name}...")
95
- Logger.log("I:Records to delete: #{old_records_ids.length}")
96
- Logger.log("I:Records to add: #{new_records.length}")
97
- return if Configurator.dry_run?
98
-
99
- # We group delete and add operations into the same batch. Delete
100
- # operations should still come first, to avoid hitting an overquota too
101
- # soon
186
+ # We group all operations into one batch
102
187
  operations = []
103
- old_records_ids.each do |object_id|
104
- operations << {
105
- action: 'deleteObject', indexName: index.name,
106
- body: { objectID: object_id }
107
- }
188
+
189
+ # We update records only if there are records to update
190
+ if has_records_to_update
191
+ Logger.log("I:Updating records in index #{index.name}...")
192
+ Logger.log("I:Records to delete: #{ids_to_delete.length}")
193
+ Logger.log("I:Records to add: #{ids_to_add.length}")
194
+
195
+ # Transforming ids into real records to add
196
+ records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
197
+ records_to_add = ids_to_add.map { |id| records_by_id[id] }
198
+
199
+ # Deletion operations come first, to avoid hitting an overquota too
200
+ # soon if it can be avoided
201
+ ids_to_delete.each do |object_id|
202
+ operations << {
203
+ action: 'deleteObject', indexName: index.name,
204
+ body: { objectID: object_id }
205
+ }
206
+ end
207
+ # Then we add the new records
208
+ operations += records_to_add.map do |new_record|
209
+ { action: 'addObject', indexName: index.name, body: new_record }
210
+ end
108
211
  end
109
- operations += new_records.map do |new_record|
110
- { action: 'addObject', indexName: index.name, body: new_record }
212
+
213
+ # We update the dedicated index everytime we update records, but we also
214
+ # create it if it does not exist
215
+ should_update_dedicated_index = has_records_to_update ||
216
+ !has_dedicated_index
217
+ if should_update_dedicated_index
218
+ operations << { action: 'clear', indexName: index_object_ids.name }
219
+ local_ids.each_slice(100).each do |ids|
220
+ operations << {
221
+ action: 'addObject', indexName: index_object_ids.name,
222
+ body: { content: ids }
223
+ }
224
+ end
111
225
  end
112
226
 
227
+ execute_operations(operations)
228
+ end
229
+
230
+ # Public: Execute a serie of operations in a batch
231
+ #
232
+ # operations - Operations to batch
233
+ #
234
+ # Note: Will split the batch in several calls if too big, and will display
235
+ # a progress bar if this happens
236
+ def self.execute_operations(operations)
237
+ return if Configurator.dry_run?
238
+ return if operations.empty?
239
+
113
240
  # Run the batches in slices if they are too large
114
241
  batch_size = Configurator.algolia('indexing_batch_size')
115
242
  slices = operations.each_slice(batch_size).to_a
@@ -118,7 +245,7 @@ module Jekyll
118
245
  if should_have_progress_bar
119
246
  progress_bar = ProgressBar.create(
120
247
  total: slices.length,
121
- format: 'Pushing records (%j%%) |%B|'
248
+ format: 'Updating index (%j%%) |%B|'
122
249
  )
123
250
  end
124
251
 
@@ -128,10 +255,7 @@ module Jekyll
128
255
 
129
256
  progress_bar.increment if should_have_progress_bar
130
257
  rescue StandardError => error
131
- records = slice.map do |record|
132
- record[:body]
133
- end
134
- ErrorHandler.stop(error, records: records)
258
+ ErrorHandler.stop(error, operations: slice)
135
259
  end
136
260
  end
137
261
  end
@@ -253,23 +377,12 @@ module Jekyll
253
377
  exit 1
254
378
  end
255
379
 
256
- # Update settings
257
380
  update_settings
258
-
259
- # Getting list of objectID in remote and locally
260
- remote_ids = remote_object_ids
261
- local_ids = local_object_ids(records)
262
-
263
- # Getting list of what to add and what to delete
264
- old_records_ids = remote_ids - local_ids
265
- new_records_ids = local_ids - remote_ids
266
- new_records = records.select do |record|
267
- new_records_ids.include?(record[:objectID])
268
- end
269
- update_records(old_records_ids, new_records)
381
+ update_records(records)
270
382
 
271
383
  Logger.log('I:✔ Indexing complete')
272
384
  end
273
385
  end
274
386
  end
275
387
  end
388
+ # rubocop:enable Metrics/ModuleLength
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Jekyll
4
4
  module Algolia
5
- VERSION = '1.2.7'
5
+ VERSION = '1.3.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-algolia
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.7
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-28 00:00:00.000000000 Z
11
+ date: 2018-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: algolia_html_extractor
@@ -292,6 +292,7 @@ files:
292
292
  - lib/errors/no_records_found.txt
293
293
  - lib/errors/record_too_big.txt
294
294
  - lib/errors/settings_manually_edited.txt
295
+ - lib/errors/too_many_records.txt
295
296
  - lib/errors/unknown_application_id.txt
296
297
  - lib/errors/unknown_settings.txt
297
298
  - lib/jekyll-algolia.rb