jekyll-algolia 1.0.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +51 -30
- data/README.md +69 -27
- data/lib/errors/invalid_credentials.txt +12 -0
- data/lib/errors/invalid_index_name.txt +9 -0
- data/lib/errors/missing_api_key.txt +15 -0
- data/lib/errors/missing_application_id.txt +11 -0
- data/lib/errors/missing_index_name.txt +18 -0
- data/lib/errors/no_records_found.txt +14 -0
- data/lib/errors/record_too_big.txt +27 -0
- data/lib/errors/record_too_big_api.txt +10 -0
- data/lib/errors/settings_manually_edited.txt +17 -0
- data/lib/errors/too_many_records.txt +14 -0
- data/lib/errors/unknown_application_id.txt +16 -0
- data/lib/errors/unknown_settings.txt +12 -0
- data/lib/jekyll-algolia.rb +45 -60
- data/lib/jekyll/algolia/configurator.rb +137 -44
- data/lib/jekyll/algolia/error_handler.rb +36 -48
- data/lib/jekyll/algolia/extractor.rb +16 -6
- data/lib/jekyll/algolia/file_browser.rb +161 -68
- data/lib/jekyll/algolia/hooks.rb +18 -6
- data/lib/jekyll/algolia/indexer.rb +283 -145
- data/lib/jekyll/algolia/logger.rb +39 -8
- data/lib/jekyll/algolia/overwrites/githubpages-configuration.rb +32 -0
- data/lib/jekyll/algolia/overwrites/jekyll-algolia-site.rb +151 -0
- data/lib/jekyll/algolia/overwrites/jekyll-document.rb +13 -0
- data/lib/jekyll/algolia/overwrites/jekyll-paginate-pager.rb +20 -0
- data/lib/jekyll/algolia/overwrites/jekyll-tags-link.rb +33 -0
- data/lib/jekyll/algolia/progress_bar.rb +27 -0
- data/lib/jekyll/algolia/shrinker.rb +112 -0
- data/lib/jekyll/algolia/utils.rb +118 -2
- data/lib/jekyll/algolia/version.rb +1 -1
- data/lib/jekyll/commands/algolia.rb +3 -14
- metadata +75 -31
- data/errors/invalid_credentials.txt +0 -10
- data/errors/invalid_credentials_for_tmp_index.txt +0 -17
- data/errors/invalid_index_name.txt +0 -11
- data/errors/missing_api_key.txt +0 -17
- data/errors/missing_application_id.txt +0 -12
- data/errors/missing_index_name.txt +0 -19
- data/errors/no_records_found.txt +0 -20
- data/errors/record_too_big.txt +0 -25
- data/errors/unknown_application_id.txt +0 -20
- data/errors/unknown_settings.txt +0 -15
@@ -11,13 +11,15 @@ module Jekyll
|
|
11
11
|
# Public: Extract records from the file
|
12
12
|
#
|
13
13
|
# file - The Jekyll file to process
|
14
|
-
# TOTEST
|
15
14
|
def self.run(file)
|
16
|
-
# Getting all
|
15
|
+
# Getting all nodes from the HTML input
|
17
16
|
raw_records = extract_raw_records(file.content)
|
18
17
|
# Getting file metadata
|
19
18
|
shared_metadata = FileBrowser.metadata(file)
|
20
19
|
|
20
|
+
# If no content, we still index the metadata
|
21
|
+
raw_records = [shared_metadata] if raw_records.empty?
|
22
|
+
|
21
23
|
# Building the list of records
|
22
24
|
records = []
|
23
25
|
raw_records.map do |record|
|
@@ -31,7 +33,7 @@ module Jekyll
|
|
31
33
|
# Apply custom user-defined hooks
|
32
34
|
# Users can return `nil` from the hook to signal we should not index
|
33
35
|
# such a record
|
34
|
-
record = Hooks.apply_each(record, node)
|
36
|
+
record = Hooks.apply_each(record, node, Jekyll::Algolia.site)
|
35
37
|
next if record.nil?
|
36
38
|
|
37
39
|
records << record
|
@@ -48,16 +50,24 @@ module Jekyll
|
|
48
50
|
end
|
49
51
|
|
50
52
|
# Public: Extract raw records from the file, including content for each
|
51
|
-
# node
|
53
|
+
# node and its headings
|
52
54
|
#
|
53
55
|
# content - The HTML content to parse
|
54
56
|
def self.extract_raw_records(content)
|
55
|
-
AlgoliaHTMLExtractor.run(
|
57
|
+
records = AlgoliaHTMLExtractor.run(
|
56
58
|
content,
|
57
59
|
options: {
|
58
|
-
css_selector: Configurator.algolia('nodes_to_index')
|
60
|
+
css_selector: Configurator.algolia('nodes_to_index'),
|
61
|
+
tags_to_exclude: 'script,style,iframe'
|
59
62
|
}
|
60
63
|
)
|
64
|
+
# We remove objectIDs, as the will be added at the very end, after all
|
65
|
+
# the hooks and shrinkage
|
66
|
+
records.each do |record|
|
67
|
+
record.delete(:objectID)
|
68
|
+
end
|
69
|
+
|
70
|
+
records
|
61
71
|
end
|
62
72
|
end
|
63
73
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'algolia_html_extractor'
|
4
|
+
require 'pathname'
|
5
|
+
require 'time'
|
4
6
|
|
5
7
|
module Jekyll
|
6
8
|
module Algolia
|
@@ -13,6 +15,50 @@ module Jekyll
|
|
13
15
|
module FileBrowser
|
14
16
|
include Jekyll::Algolia
|
15
17
|
|
18
|
+
# Public: Return the absolute path of a Jekyll file
|
19
|
+
#
|
20
|
+
# file - The Jekyll file to inspect
|
21
|
+
def self.absolute_path(filepath)
|
22
|
+
pathname = Pathname.new(filepath)
|
23
|
+
return pathname.cleanpath.to_s if pathname.absolute?
|
24
|
+
|
25
|
+
File.expand_path(File.join(Configurator.get('source'), filepath))
|
26
|
+
end
|
27
|
+
|
28
|
+
# Public: Return the path of a Jekyll file relative to the Jekyll source
|
29
|
+
#
|
30
|
+
# file - The Jekyll file to inspect
|
31
|
+
def self.relative_path(filepath)
|
32
|
+
pathname = Pathname.new(filepath)
|
33
|
+
config_source = Configurator.get('source') || ''
|
34
|
+
jekyll_source = Pathname.new(File.expand_path(config_source))
|
35
|
+
|
36
|
+
# Removing any starting ./
|
37
|
+
if pathname.relative?
|
38
|
+
fullpath = File.expand_path(File.join(jekyll_source, pathname))
|
39
|
+
return fullpath.gsub(%r{^#{jekyll_source}/}, '')
|
40
|
+
end
|
41
|
+
|
42
|
+
pathname.relative_path_from(jekyll_source).cleanpath.to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
# Public: Check if the file should be indexed
|
46
|
+
#
|
47
|
+
# file - The Jekyll file
|
48
|
+
#
|
49
|
+
# There are many reasons a file should not be indexed. We need to exclude
|
50
|
+
# all the static assets, only keep the actual content.
|
51
|
+
def self.indexable?(file)
|
52
|
+
return false if static_file?(file)
|
53
|
+
return false if is_404?(file)
|
54
|
+
return false if redirect?(file)
|
55
|
+
return false unless allowed_extension?(file)
|
56
|
+
return false if excluded_from_config?(file)
|
57
|
+
return false if excluded_from_hook?(file)
|
58
|
+
|
59
|
+
true
|
60
|
+
end
|
61
|
+
|
16
62
|
# Public: Check if the specified file is a static Jekyll asset
|
17
63
|
#
|
18
64
|
# file - The Jekyll file
|
@@ -30,20 +76,27 @@ module Jekyll
|
|
30
76
|
# pages. We don't want to index those.
|
31
77
|
# Source: https://help.github.com/articles/creating-a-custom-404-page-for-your-github-pages-site/
|
32
78
|
#
|
33
|
-
# rubocop:disable Naming/PredicateName
|
34
79
|
def self.is_404?(file)
|
35
|
-
|
80
|
+
['404.md', '404.html'].include?(File.basename(file.path))
|
36
81
|
end
|
37
|
-
# rubocop:enable Naming/PredicateName
|
38
82
|
|
39
|
-
# Public: Check if the
|
83
|
+
# Public: Check if the file is redirect page
|
40
84
|
#
|
41
85
|
# file - The Jekyll file
|
42
86
|
#
|
43
|
-
#
|
44
|
-
# We
|
45
|
-
|
46
|
-
|
87
|
+
# Plugins like jekyll-redirect-from add dynamic pages that only contain
|
88
|
+
# an HTML meta refresh. We need to exclude those files from indexing.
|
89
|
+
# https://github.com/jekyll/jekyll-redirect-from
|
90
|
+
def self.redirect?(file)
|
91
|
+
# When using redirect_from, jekyll-redirect-from creates a page named
|
92
|
+
# `redirect.html`
|
93
|
+
return true if file.respond_to?(:name) && file.name == 'redirect.html'
|
94
|
+
# When using redirect_to, it sets the layout to `redirect`
|
95
|
+
if file.respond_to?(:data) && file.data['layout'] == 'redirect'
|
96
|
+
return true
|
97
|
+
end
|
98
|
+
|
99
|
+
false
|
47
100
|
end
|
48
101
|
|
49
102
|
# Public: Check if the file has one of the allowed extensions
|
@@ -55,36 +108,24 @@ module Jekyll
|
|
55
108
|
# and raw HTML files but this list can be extended using the
|
56
109
|
# `extensions_to_index` config option.
|
57
110
|
def self.allowed_extension?(file)
|
58
|
-
extensions = Configurator.
|
111
|
+
extensions = Configurator.extensions_to_index
|
59
112
|
extname = File.extname(file.path)[1..-1]
|
60
113
|
extensions.include?(extname)
|
61
114
|
end
|
62
115
|
|
63
|
-
# Public: Check if the file has been excluded by the user
|
64
|
-
#
|
65
|
-
# file - The Jekyll file
|
66
|
-
#
|
67
|
-
# Files can be excluded either by setting the `files_to_exclude` option,
|
68
|
-
# or by defining a custom hook
|
69
|
-
def self.excluded_by_user?(file)
|
70
|
-
excluded_from_config?(file) || excluded_from_hook?(file)
|
71
|
-
end
|
72
|
-
|
73
116
|
# Public: Check if the file has been excluded by `files_to_exclude`
|
74
117
|
#
|
75
118
|
# file - The Jekyll file
|
76
119
|
def self.excluded_from_config?(file)
|
77
120
|
excluded_patterns = Configurator.algolia('files_to_exclude')
|
78
|
-
|
121
|
+
jekyll_source = Configurator.get('source')
|
122
|
+
path = absolute_path(file.path)
|
79
123
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
excluded_files += Dir.glob(pattern)
|
84
|
-
end
|
124
|
+
excluded_patterns.each do |pattern|
|
125
|
+
pattern = File.expand_path(File.join(jekyll_source, pattern))
|
126
|
+
return true if File.fnmatch(pattern, path, File::FNM_PATHNAME)
|
85
127
|
end
|
86
|
-
|
87
|
-
excluded_files.include?(file.path)
|
128
|
+
false
|
88
129
|
end
|
89
130
|
|
90
131
|
# Public: Check if the file has been excluded by running a custom user
|
@@ -95,34 +136,6 @@ module Jekyll
|
|
95
136
|
Hooks.should_be_excluded?(file.path)
|
96
137
|
end
|
97
138
|
|
98
|
-
# Public: Return the path to the original file, relative from the Jekyll
|
99
|
-
# source
|
100
|
-
#
|
101
|
-
# file - The Jekyll file
|
102
|
-
#
|
103
|
-
# Pages have their .path property relative to the source, but collections
|
104
|
-
# (including posts) have an absolute file path.
|
105
|
-
def self.path_from_root(file)
|
106
|
-
source = Configurator.get('source')
|
107
|
-
file.path.gsub(%r{^#{source}/}, '')
|
108
|
-
end
|
109
|
-
|
110
|
-
# Public: Check if the file should be indexed
|
111
|
-
#
|
112
|
-
# file - The Jekyll file
|
113
|
-
#
|
114
|
-
# There are many reasons a file should not be indexed. We need to exclude
|
115
|
-
# all the static assets, only keep the actual content.
|
116
|
-
def self.indexable?(file)
|
117
|
-
return false if static_file?(file)
|
118
|
-
return false if is_404?(file)
|
119
|
-
return false if pagination_page?(file)
|
120
|
-
return false unless allowed_extension?(file)
|
121
|
-
return false if excluded_by_user?(file)
|
122
|
-
|
123
|
-
true
|
124
|
-
end
|
125
|
-
|
126
139
|
# Public: Return a hash of all the file metadata
|
127
140
|
#
|
128
141
|
# file - The Jekyll file
|
@@ -134,6 +147,8 @@ module Jekyll
|
|
134
147
|
raw_data = raw_data(file)
|
135
148
|
specific_data = {
|
136
149
|
collection: collection(file),
|
150
|
+
tags: tags(file),
|
151
|
+
categories: categories(file),
|
137
152
|
date: date(file),
|
138
153
|
excerpt_html: excerpt_html(file),
|
139
154
|
excerpt_text: excerpt_text(file),
|
@@ -164,10 +179,16 @@ module Jekyll
|
|
164
179
|
data.each_key do |key|
|
165
180
|
data.delete(key) if respond_to?(key)
|
166
181
|
end
|
167
|
-
|
168
|
-
# Also delete keys we manually handle
|
169
182
|
data.delete('excerpt')
|
170
183
|
|
184
|
+
# Delete other keys added by Jekyll that are not in the front-matter and
|
185
|
+
# not needed for search
|
186
|
+
data.delete('draft')
|
187
|
+
data.delete('ext')
|
188
|
+
|
189
|
+
# Convert all values to a version that can be serialized to JSON
|
190
|
+
data = Utils.jsonify(data)
|
191
|
+
|
171
192
|
# Convert all keys to symbols
|
172
193
|
data = Utils.keys_to_symbols(data)
|
173
194
|
|
@@ -196,29 +217,102 @@ module Jekyll
|
|
196
217
|
file.url
|
197
218
|
end
|
198
219
|
|
220
|
+
# Public: Returns the list of tags of a file, defaults to an empty array
|
221
|
+
#
|
222
|
+
# file - The Jekyll file
|
223
|
+
def self.tags(file)
|
224
|
+
file.data['tags'] || []
|
225
|
+
end
|
226
|
+
|
227
|
+
# Public: Returns the list of tags of a file, defaults to an empty array
|
228
|
+
#
|
229
|
+
# file - The Jekyll file
|
230
|
+
def self.categories(file)
|
231
|
+
file.data['categories'] || []
|
232
|
+
end
|
233
|
+
|
199
234
|
# Public: Returns a timestamp of the file date
|
200
235
|
#
|
201
236
|
# file - The Jekyll file
|
202
237
|
#
|
203
|
-
#
|
204
|
-
#
|
205
|
-
#
|
238
|
+
# Posts have their date coming from the filepath, or the front-matter.
|
239
|
+
# Pages and other collection items can only have a date set in
|
240
|
+
# front-matter.
|
206
241
|
def self.date(file)
|
207
|
-
date
|
242
|
+
# Collections get their date from .date, while pages read it from .data.
|
243
|
+
# Jekyll by default will set the date of collection to the current date,
|
244
|
+
# but we monkey-patched that so it returns nil for collection items
|
245
|
+
date = if file.respond_to?(:date)
|
246
|
+
file.date
|
247
|
+
else
|
248
|
+
file.data['date']
|
249
|
+
end
|
250
|
+
|
208
251
|
return nil if date.nil?
|
209
252
|
|
210
|
-
date
|
253
|
+
# If date is a string, we try to parse it
|
254
|
+
if date.is_a? String
|
255
|
+
begin
|
256
|
+
date = Time.parse(date)
|
257
|
+
rescue StandardError
|
258
|
+
return nil
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
date.to_time.to_i
|
211
263
|
end
|
212
264
|
|
213
|
-
# Public: Returns the
|
265
|
+
# Public: Returns the raw excerpt of a file, directly as returned by
|
266
|
+
# Jekyll. Swallow any error that could occur when reading.
|
214
267
|
#
|
215
268
|
# file - The Jekyll file
|
216
269
|
#
|
217
|
-
#
|
270
|
+
# This might throw an exception if the excerpt is invalid. We also
|
271
|
+
# silence all logger output as Jekyll is quite verbose and will display
|
272
|
+
# the potential Liquid error in the terminal, even if we catch the actual
|
273
|
+
# error.
|
274
|
+
def self.excerpt_raw(file)
|
275
|
+
Logger.silent do
|
276
|
+
return file.data['excerpt'].to_s.strip
|
277
|
+
end
|
278
|
+
rescue StandardError
|
279
|
+
nil
|
280
|
+
end
|
281
|
+
|
282
|
+
# Public: Return true if the Jekyll default excerpt should be used for
|
283
|
+
# this file
|
284
|
+
#
|
285
|
+
# file - The Jekyll file
|
286
|
+
#
|
287
|
+
# Most of the time, we'll use our own excerpt (the first matching
|
288
|
+
# element), but in some cases, we'll fallback to Jekyll's default excerpt
|
289
|
+
# if it seems to be what the user wants
|
290
|
+
def self.use_default_excerpt?(file)
|
291
|
+
# Only posts can have excerpt
|
292
|
+
return false unless type(file) == 'post'
|
293
|
+
|
294
|
+
# User defined their own separator in the config
|
295
|
+
custom_separator = file.excerpt_separator.to_s.strip
|
296
|
+
return false if custom_separator.empty?
|
297
|
+
|
298
|
+
# This specific post contains this separator
|
299
|
+
file.content.include?(custom_separator)
|
300
|
+
end
|
301
|
+
|
302
|
+
# Public: Returns the HTML version of the excerpt
|
303
|
+
#
|
304
|
+
# file - The Jekyll file
|
218
305
|
def self.excerpt_html(file)
|
219
|
-
|
220
|
-
return
|
221
|
-
|
306
|
+
# If it's a post with a custom separator for the excerpt, we honor it
|
307
|
+
return excerpt_raw(file) if use_default_excerpt?(file)
|
308
|
+
|
309
|
+
# Otherwise we take the first matching node
|
310
|
+
html = file.content
|
311
|
+
selector = Configurator.algolia('nodes_to_index')
|
312
|
+
first_node = Nokogiri::HTML(html).css(selector).first
|
313
|
+
return nil if first_node.nil?
|
314
|
+
|
315
|
+
first_node.to_s
|
222
316
|
end
|
223
317
|
|
224
318
|
# Public: Returns the text version of the excerpt
|
@@ -228,7 +322,6 @@ module Jekyll
|
|
228
322
|
# Only collections (including posts) have an excerpt. Pages don't.
|
229
323
|
def self.excerpt_text(file)
|
230
324
|
html = excerpt_html(file)
|
231
|
-
return nil if html.nil?
|
232
325
|
Utils.html_to_text(html)
|
233
326
|
end
|
234
327
|
|
data/lib/jekyll/algolia/hooks.rb
CHANGED
@@ -11,8 +11,15 @@ module Jekyll
|
|
11
11
|
#
|
12
12
|
# record - The hash of the record to be pushed
|
13
13
|
# node - The Nokogiri node of the element
|
14
|
-
def self.apply_each(record, node)
|
15
|
-
before_indexing_each
|
14
|
+
def self.apply_each(record, node, context)
|
15
|
+
case method(:before_indexing_each).arity
|
16
|
+
when 1
|
17
|
+
before_indexing_each(record)
|
18
|
+
when 2
|
19
|
+
before_indexing_each(record, node)
|
20
|
+
else
|
21
|
+
before_indexing_each(record, node, context)
|
22
|
+
end
|
16
23
|
end
|
17
24
|
|
18
25
|
# Public: Apply the before_indexing_all hook to all records.
|
@@ -21,8 +28,13 @@ module Jekyll
|
|
21
28
|
# as they can be mocked in tests.
|
22
29
|
#
|
23
30
|
# records - The list of all records to be indexed
|
24
|
-
def self.apply_all(records)
|
25
|
-
before_indexing_all
|
31
|
+
def self.apply_all(records, context)
|
32
|
+
case method(:before_indexing_all).arity
|
33
|
+
when 1
|
34
|
+
before_indexing_all(records)
|
35
|
+
else
|
36
|
+
before_indexing_all(records, context)
|
37
|
+
end
|
26
38
|
end
|
27
39
|
|
28
40
|
# Public: Check if the file should be indexed or not
|
@@ -47,7 +59,7 @@ module Jekyll
|
|
47
59
|
# information from the HTML node.
|
48
60
|
#
|
49
61
|
# Users can return nil to signal that the record should not be indexed
|
50
|
-
def self.before_indexing_each(record, _node)
|
62
|
+
def self.before_indexing_each(record, _node, _context)
|
51
63
|
record
|
52
64
|
end
|
53
65
|
|
@@ -59,7 +71,7 @@ module Jekyll
|
|
59
71
|
# Users can modify the full list from here. It might provide an easier
|
60
72
|
# interface than `hook_before_indexing_each` when knowing the full context
|
61
73
|
# is necessary
|
62
|
-
def self.before_indexing_all(records)
|
74
|
+
def self.before_indexing_all(records, _context)
|
63
75
|
records
|
64
76
|
end
|
65
77
|
end
|
@@ -1,7 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'algoliasearch'
|
4
|
+
require 'yaml'
|
5
|
+
require 'algolia_html_extractor'
|
4
6
|
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
5
8
|
module Jekyll
|
6
9
|
module Algolia
|
7
10
|
# Module to push records to Algolia and configure the index
|
@@ -9,16 +12,60 @@ module Jekyll
|
|
9
12
|
include Jekyll::Algolia
|
10
13
|
|
11
14
|
# Public: Init the module
|
12
|
-
#
|
13
|
-
# This call will instanciate the Algolia API client, set the custom
|
14
|
-
# User Agent and give an easy access to the main index
|
15
15
|
def self.init
|
16
16
|
::Algolia.init(
|
17
17
|
application_id: Configurator.application_id,
|
18
18
|
api_key: Configurator.api_key
|
19
19
|
)
|
20
|
+
index_name = Configurator.index_name
|
21
|
+
@index = ::Algolia::Index.new(index_name)
|
22
|
+
index_object_ids_name = Configurator.index_object_ids_name
|
23
|
+
@index_object_ids = ::Algolia::Index.new(index_object_ids_name)
|
20
24
|
|
21
25
|
set_user_agent
|
26
|
+
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
# Public: Returns the Algolia index object
|
31
|
+
def self.index
|
32
|
+
@index
|
33
|
+
end
|
34
|
+
|
35
|
+
# Public: Returns the Algolia index used to store object ids
|
36
|
+
def self.index_object_ids
|
37
|
+
@index_object_ids
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: Check if an index exists
|
41
|
+
#
|
42
|
+
# index - Index to check
|
43
|
+
#
|
44
|
+
# Note: there is no API endpoint to do that, so we try to get the settings
|
45
|
+
# instead, which will fail if the index does not exist
|
46
|
+
def self.index_exist?(index)
|
47
|
+
index.get_settings
|
48
|
+
true
|
49
|
+
rescue StandardError
|
50
|
+
false
|
51
|
+
end
|
52
|
+
|
53
|
+
# Public: Get the number of records in an index
|
54
|
+
#
|
55
|
+
# index - Index to check
|
56
|
+
#
|
57
|
+
# Note: We'll do an empty query search, to match everything, but we'll
|
58
|
+
# only return the objectID and one element, to get the shortest response
|
59
|
+
# possible. It will still contain the nbHits
|
60
|
+
def self.record_count(index)
|
61
|
+
index.search(
|
62
|
+
'',
|
63
|
+
attributesToRetrieve: 'objectID',
|
64
|
+
distinct: false,
|
65
|
+
hitsPerPage: 1
|
66
|
+
)['nbHits']
|
67
|
+
rescue StandardError
|
68
|
+
0
|
22
69
|
end
|
23
70
|
|
24
71
|
# Public: Set the User-Agent to send to the API
|
@@ -38,74 +85,75 @@ module Jekyll
|
|
38
85
|
::Algolia.set_extra_header('User-Agent', user_agent)
|
39
86
|
end
|
40
87
|
|
41
|
-
# Public:
|
88
|
+
# Public: Get an array of all object IDs stored in the main index
|
42
89
|
#
|
43
|
-
#
|
44
|
-
|
45
|
-
|
46
|
-
|
90
|
+
# Note: As this will be slow (grabbing them 1000 at a time), we display
|
91
|
+
# a progress bar.
|
92
|
+
def self.remote_object_ids_from_main_index
|
93
|
+
Logger.verbose("I:Inspecting existing records in index #{index.name}")
|
47
94
|
|
48
|
-
|
49
|
-
#
|
50
|
-
# index - Algolia Index to update
|
51
|
-
# records - Array of records to update
|
52
|
-
#
|
53
|
-
# New records will be automatically added. Technically existing records
|
54
|
-
# should be updated but this case should never happen as changing a record
|
55
|
-
# content will change its objectID as well.
|
56
|
-
#
|
57
|
-
# Does nothing in dry run mode
|
58
|
-
def self.update_records(index, records)
|
59
|
-
batch_size = Configurator.algolia('indexing_batch_size')
|
60
|
-
records.each_slice(batch_size) do |batch|
|
61
|
-
Logger.log("I:Pushing #{batch.size} records")
|
62
|
-
next if Configurator.dry_run?
|
63
|
-
begin
|
64
|
-
index.add_objects!(batch)
|
65
|
-
rescue StandardError => error
|
66
|
-
ErrorHandler.stop(error, records: records)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
# Public: Delete records whose objectIDs are passed
|
72
|
-
#
|
73
|
-
# index - Algolia Index to target
|
74
|
-
# ids - Array of objectIDs to delete
|
75
|
-
#
|
76
|
-
# Does nothing in dry run mode
|
77
|
-
def self.delete_records_by_id(index, ids)
|
78
|
-
return if ids.empty?
|
79
|
-
Logger.log("I:Deleting #{ids.length} records")
|
80
|
-
return if Configurator.dry_run?
|
95
|
+
list = []
|
81
96
|
|
97
|
+
# As it might take some time, we display a progress bar
|
98
|
+
progress_bar = ProgressBar.create(
|
99
|
+
total: record_count(index),
|
100
|
+
format: 'Inspecting existing records (%j%%) |%B|'
|
101
|
+
)
|
82
102
|
begin
|
83
|
-
index.
|
84
|
-
|
85
|
-
|
103
|
+
index.browse(
|
104
|
+
attributesToRetrieve: 'objectID',
|
105
|
+
hitsPerPage: 1000
|
106
|
+
) do |hit|
|
107
|
+
list << hit['objectID']
|
108
|
+
progress_bar.increment
|
109
|
+
end
|
110
|
+
rescue StandardError
|
111
|
+
return []
|
86
112
|
end
|
113
|
+
|
114
|
+
list.sort
|
87
115
|
end
|
88
116
|
|
89
|
-
# Public:
|
90
|
-
#
|
91
|
-
# index - Algolia Index to target
|
117
|
+
# Public: Get an array of all the object ids, stored in a dedicated
|
118
|
+
# index
|
92
119
|
#
|
93
|
-
#
|
94
|
-
#
|
95
|
-
def self.
|
120
|
+
# Note: This will be very fast. Each record contain 100 object id, so it
|
121
|
+
# will fit in one call each time.
|
122
|
+
def self.remote_object_ids_from_dedicated_index
|
96
123
|
list = []
|
97
124
|
begin
|
98
|
-
|
99
|
-
|
125
|
+
index_object_ids.browse(
|
126
|
+
attributesToRetrieve: 'content',
|
127
|
+
hitsPerPage: 1000
|
128
|
+
) do |hit|
|
129
|
+
list += hit['content']
|
100
130
|
end
|
101
131
|
rescue StandardError
|
102
|
-
# The index might not exist if it's the first time we use the plugin
|
103
|
-
# so we'll consider that it means there are no records there
|
104
132
|
return []
|
105
133
|
end
|
134
|
+
|
106
135
|
list.sort
|
107
136
|
end
|
108
137
|
|
138
|
+
# Public: Returns an array of all the objectIDs in the index
|
139
|
+
#
|
140
|
+
# Note: We use a dedicated index to store the objectIDs for faster
|
141
|
+
# browsing, but if the index does not exist we read the main index.
|
142
|
+
def self.remote_object_ids
|
143
|
+
Logger.log('I:Getting list of existing records')
|
144
|
+
|
145
|
+
# Main index empty, the list is empty no matter what (we don't use the
|
146
|
+
# dedicated index in that case)
|
147
|
+
return [] if record_count(index).zero?
|
148
|
+
|
149
|
+
# Fast version, using the dedicated index
|
150
|
+
has_object_id_index = index_exist?(index_object_ids)
|
151
|
+
return remote_object_ids_from_dedicated_index if has_object_id_index
|
152
|
+
|
153
|
+
# Slow version, browsing the full index
|
154
|
+
remote_object_ids_from_main_index
|
155
|
+
end
|
156
|
+
|
109
157
|
# Public: Returns an array of the local objectIDs
|
110
158
|
#
|
111
159
|
# records - Array of all local records
|
@@ -113,116 +161,211 @@ module Jekyll
|
|
113
161
|
records.map { |record| record[:objectID] }.compact.sort
|
114
162
|
end
|
115
163
|
|
116
|
-
# Public: Update
|
164
|
+
# Public: Update records of the index
|
117
165
|
#
|
118
|
-
#
|
119
|
-
# settings - The hash of settings to pass to the index
|
166
|
+
# records - All records extracted from Jekyll
|
120
167
|
#
|
168
|
+
# Note: All operations will be done in one batch, assuring an atomic
|
169
|
+
# update
|
121
170
|
# Does nothing in dry run mode
|
122
|
-
def self.
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
171
|
+
def self.update_records(records)
|
172
|
+
# Getting list of objectID in remote and locally
|
173
|
+
remote_ids = remote_object_ids
|
174
|
+
local_ids = local_object_ids(records)
|
175
|
+
|
176
|
+
# Making a diff, to see what to add and what to delete
|
177
|
+
ids_to_delete = remote_ids - local_ids
|
178
|
+
ids_to_add = local_ids - remote_ids
|
179
|
+
|
180
|
+
# What changes should we do to the indexes?
|
181
|
+
has_records_to_update = !ids_to_delete.empty? || !ids_to_add.empty?
|
182
|
+
has_object_id_index = index_exist?(index_object_ids)
|
183
|
+
|
184
|
+
# Stop if nothing to change
|
185
|
+
if !has_records_to_update && has_object_id_index
|
186
|
+
Logger.log('I:Content is already up to date.')
|
187
|
+
return
|
188
|
+
end
|
189
|
+
|
190
|
+
# We group all operations into one batch
|
191
|
+
operations = []
|
192
|
+
|
193
|
+
# We update records only if there are records to update
|
194
|
+
if has_records_to_update
|
195
|
+
Logger.log("I:Updating records in index #{index.name}...")
|
196
|
+
Logger.log("I:Records to delete: #{ids_to_delete.length}")
|
197
|
+
Logger.log("I:Records to add: #{ids_to_add.length}")
|
198
|
+
|
199
|
+
# Transforming ids into real records to add
|
200
|
+
records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
|
201
|
+
records_to_add = ids_to_add.map { |id| records_by_id[id] }
|
202
|
+
|
203
|
+
# Deletion operations come first, to avoid hitting an overquota too
|
204
|
+
# soon if it can be avoided
|
205
|
+
ids_to_delete.each do |object_id|
|
206
|
+
operations << {
|
207
|
+
action: 'deleteObject', indexName: index.name,
|
208
|
+
body: { objectID: object_id }
|
209
|
+
}
|
210
|
+
end
|
211
|
+
# Then we add the new records
|
212
|
+
operations += records_to_add.map do |new_record|
|
213
|
+
{ action: 'addObject', indexName: index.name, body: new_record }
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# We update the dedicated index everytime we update records, but we also
|
218
|
+
# create it if it does not exist
|
219
|
+
should_update_object_id_index = has_records_to_update ||
|
220
|
+
!has_object_id_index
|
221
|
+
if should_update_object_id_index
|
222
|
+
operations << { action: 'clear', indexName: index_object_ids.name }
|
223
|
+
local_ids.each_slice(100).each do |ids|
|
224
|
+
operations << {
|
225
|
+
action: 'addObject', indexName: index_object_ids.name,
|
226
|
+
body: { content: ids }
|
227
|
+
}
|
228
|
+
end
|
129
229
|
end
|
230
|
+
|
231
|
+
execute_operations(operations)
|
130
232
|
end
|
131
233
|
|
132
|
-
# Public:
|
234
|
+
# Public: Execute a serie of operations in a batch
|
133
235
|
#
|
134
|
-
#
|
236
|
+
# operations - Operations to batch
|
135
237
|
#
|
136
|
-
#
|
137
|
-
#
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
index = index(Configurator.index_name)
|
142
|
-
|
143
|
-
# Update settings
|
144
|
-
update_settings(index, Configurator.settings)
|
238
|
+
# Note: Will split the batch in several calls if too big, and will display
|
239
|
+
# a progress bar if this happens
|
240
|
+
def self.execute_operations(operations)
|
241
|
+
return if Configurator.dry_run?
|
242
|
+
return if operations.empty?
|
145
243
|
|
146
|
-
#
|
147
|
-
|
148
|
-
|
244
|
+
# Run the batches in slices if they are too large
|
245
|
+
batch_size = Configurator.algolia('indexing_batch_size')
|
246
|
+
slices = operations.each_slice(batch_size).to_a
|
149
247
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
248
|
+
should_have_progress_bar = (slices.length > 1)
|
249
|
+
if should_have_progress_bar
|
250
|
+
progress_bar = ProgressBar.create(
|
251
|
+
total: slices.length,
|
252
|
+
format: 'Updating index (%j%%) |%B|'
|
253
|
+
)
|
155
254
|
end
|
156
255
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
delete_records_by_id(index, old_records_ids)
|
256
|
+
slices.each do |slice|
|
257
|
+
begin
|
258
|
+
::Algolia.batch!(slice)
|
161
259
|
|
162
|
-
|
163
|
-
|
164
|
-
|
260
|
+
progress_bar.increment if should_have_progress_bar
|
261
|
+
rescue StandardError => e
|
262
|
+
ErrorHandler.stop(e, operations: slice)
|
263
|
+
end
|
165
264
|
end
|
166
|
-
|
265
|
+
end
|
167
266
|
|
168
|
-
|
267
|
+
# Public: Get a unique settingID for the current settings
|
268
|
+
#
|
269
|
+
# The settingID is generated as a hash of the current settings. As it will
|
270
|
+
# be stored in the userData key of the resulting config, we exclude that
|
271
|
+
# key from the hashing.
|
272
|
+
def self.local_setting_id
|
273
|
+
settings = Configurator.settings
|
274
|
+
settings.delete('userData')
|
275
|
+
AlgoliaHTMLExtractor.uuid(settings)
|
169
276
|
end
|
170
277
|
|
171
278
|
# Public: Get the settings of the remote index
|
172
279
|
#
|
173
|
-
# index
|
174
|
-
def self.remote_settings
|
280
|
+
# In case the index is not accessible, it will return nil
|
281
|
+
def self.remote_settings
|
175
282
|
index.get_settings
|
176
|
-
rescue StandardError
|
177
|
-
|
283
|
+
rescue StandardError
|
284
|
+
nil
|
178
285
|
end
|
179
286
|
|
180
|
-
# Public:
|
287
|
+
# Public: Smart update of the settings of the index
|
181
288
|
#
|
182
|
-
#
|
183
|
-
#
|
289
|
+
# This will first compare the settings about to be pushed with the
|
290
|
+
# settings already pushed. It will compare userData.settingID for that.
|
291
|
+
# If the settingID is the same, we don't push as this won't change
|
292
|
+
# anything. We will still check if the remote config seem to have been
|
293
|
+
# manually altered though, and warn the user that this is not the
|
294
|
+
# preferred way of doing so.
|
184
295
|
#
|
185
|
-
#
|
186
|
-
|
187
|
-
|
188
|
-
return if Configurator.
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
296
|
+
# If the settingID are not matching, it means our config is different, so
|
297
|
+
# we push it, overriding the settingID for next push.
|
298
|
+
def self.update_settings
|
299
|
+
return if Configurator.settings.empty?
|
300
|
+
|
301
|
+
current_remote_settings = remote_settings || {}
|
302
|
+
remote_setting_id = current_remote_settings.dig('userData', 'settingID')
|
303
|
+
|
304
|
+
settings = Configurator.settings
|
305
|
+
setting_id = local_setting_id
|
306
|
+
|
307
|
+
are_settings_forced = Configurator.force_settings?
|
308
|
+
|
309
|
+
# The config we're about to push is the same we pushed previously. We
|
310
|
+
# won't push again.
|
311
|
+
if setting_id == remote_setting_id && !are_settings_forced
|
312
|
+
Logger.log('I:Settings are already up to date.')
|
313
|
+
# Check if remote config has been changed outside of the plugin, so we
|
314
|
+
# can warn users that they should not alter their config from outside
|
315
|
+
# of the plugin.
|
316
|
+
current_remote_settings.delete('userData')
|
317
|
+
changed_keys = Utils.diff_keys(settings, current_remote_settings)
|
318
|
+
unless changed_keys.nil?
|
319
|
+
warn_of_manual_dashboard_editing(changed_keys)
|
320
|
+
end
|
195
321
|
|
196
|
-
|
197
|
-
|
198
|
-
# records - Array of records to push
|
199
|
-
#
|
200
|
-
# The `atomic` indexing mode will push all records to a brand new index,
|
201
|
-
# configure it, and then overwrite the previous index with this new one.
|
202
|
-
# For the end-user, it will make all the changes in one go, making sure
|
203
|
-
# people are always searching into a fully configured index. It will
|
204
|
-
# consume more operations, but will never leave the index in a transient
|
205
|
-
# state.
|
206
|
-
def self.run_atomic_mode(records)
|
207
|
-
index_name = Configurator.index_name
|
208
|
-
index = index(index_name)
|
209
|
-
index_tmp_name = "#{Configurator.index_name}_tmp"
|
210
|
-
index_tmp = index(index_tmp_name)
|
322
|
+
return
|
323
|
+
end
|
211
324
|
|
212
|
-
|
325
|
+
# Settings have changed, we push them
|
326
|
+
settings['userData'] = {
|
327
|
+
'settingID' => setting_id,
|
328
|
+
'pluginVersion' => VERSION
|
329
|
+
}
|
213
330
|
|
214
|
-
|
215
|
-
|
216
|
-
new_settings = remote_settings.merge(Configurator.settings)
|
217
|
-
update_settings(index_tmp, new_settings)
|
331
|
+
Logger.log("I:Updating settings of index #{index.name}")
|
332
|
+
return if Configurator.dry_run?
|
218
333
|
|
219
|
-
|
220
|
-
|
334
|
+
set_settings(settings)
|
335
|
+
end
|
221
336
|
|
222
|
-
|
223
|
-
|
337
|
+
# Public: Set new settings to an index
|
338
|
+
#
|
339
|
+
# Will dispatch to the error handler if it fails
|
340
|
+
# rubocop:disable Naming/AccessorMethodName
|
341
|
+
def self.set_settings(settings)
|
342
|
+
index.set_settings!(settings)
|
343
|
+
rescue StandardError => e
|
344
|
+
ErrorHandler.stop(e, settings: settings)
|
345
|
+
end
|
346
|
+
# rubocop:enable Naming/AccessorMethodName
|
224
347
|
|
225
|
-
|
348
|
+
# Public: Warn users that they have some settings manually configured in
|
349
|
+
# their dashboard
|
350
|
+
#
|
351
|
+
# When users change some settings in their dashboard, those settings might
|
352
|
+
# get overwritten by the plugin. We can't prevent that, but we can warn
|
353
|
+
# them when we detect they changed something.
|
354
|
+
def self.warn_of_manual_dashboard_editing(changed_keys)
|
355
|
+
# Transform the hash into readable YAML
|
356
|
+
yaml_lines = changed_keys
|
357
|
+
.to_yaml(indentation: 2)
|
358
|
+
.split("\n")[1..-1]
|
359
|
+
yaml_lines.map! do |line|
|
360
|
+
line = line.gsub(/^ */) { |spaces| ' ' * spaces.length }
|
361
|
+
line = line.gsub('- ', ' - ')
|
362
|
+
"W: #{line}"
|
363
|
+
end
|
364
|
+
Logger.known_message(
|
365
|
+
'settings_manually_edited',
|
366
|
+
settings: yaml_lines.join("\n"),
|
367
|
+
index_name: Configurator.index_name
|
368
|
+
)
|
226
369
|
end
|
227
370
|
|
228
371
|
# Public: Push all records to Algolia and configure the index
|
@@ -231,10 +374,8 @@ module Jekyll
|
|
231
374
|
def self.run(records)
|
232
375
|
init
|
233
376
|
|
234
|
-
record_count = records.length
|
235
|
-
|
236
377
|
# Indexing zero record is surely a misconfiguration
|
237
|
-
if
|
378
|
+
if records.length.zero?
|
238
379
|
files_to_exclude = Configurator.algolia('files_to_exclude').join(', ')
|
239
380
|
Logger.known_message(
|
240
381
|
'no_records_found',
|
@@ -244,15 +385,12 @@ module Jekyll
|
|
244
385
|
exit 1
|
245
386
|
end
|
246
387
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
run_diff_mode(records)
|
252
|
-
when 'atomic'
|
253
|
-
run_atomic_mode(records)
|
254
|
-
end
|
388
|
+
update_settings
|
389
|
+
update_records(records)
|
390
|
+
|
391
|
+
Logger.log('I:✔ Indexing complete')
|
255
392
|
end
|
256
393
|
end
|
257
394
|
end
|
258
395
|
end
|
396
|
+
# rubocop:enable Metrics/ModuleLength
|