jekyll-algolia 1.0.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +51 -30
- data/README.md +69 -27
- data/lib/errors/invalid_credentials.txt +12 -0
- data/lib/errors/invalid_index_name.txt +9 -0
- data/lib/errors/missing_api_key.txt +15 -0
- data/lib/errors/missing_application_id.txt +11 -0
- data/lib/errors/missing_index_name.txt +18 -0
- data/lib/errors/no_records_found.txt +14 -0
- data/lib/errors/record_too_big.txt +27 -0
- data/lib/errors/record_too_big_api.txt +10 -0
- data/lib/errors/settings_manually_edited.txt +17 -0
- data/lib/errors/too_many_records.txt +14 -0
- data/lib/errors/unknown_application_id.txt +16 -0
- data/lib/errors/unknown_settings.txt +12 -0
- data/lib/jekyll-algolia.rb +45 -60
- data/lib/jekyll/algolia/configurator.rb +137 -44
- data/lib/jekyll/algolia/error_handler.rb +36 -48
- data/lib/jekyll/algolia/extractor.rb +16 -6
- data/lib/jekyll/algolia/file_browser.rb +161 -68
- data/lib/jekyll/algolia/hooks.rb +18 -6
- data/lib/jekyll/algolia/indexer.rb +283 -145
- data/lib/jekyll/algolia/logger.rb +39 -8
- data/lib/jekyll/algolia/overwrites/githubpages-configuration.rb +32 -0
- data/lib/jekyll/algolia/overwrites/jekyll-algolia-site.rb +151 -0
- data/lib/jekyll/algolia/overwrites/jekyll-document.rb +13 -0
- data/lib/jekyll/algolia/overwrites/jekyll-paginate-pager.rb +20 -0
- data/lib/jekyll/algolia/overwrites/jekyll-tags-link.rb +33 -0
- data/lib/jekyll/algolia/progress_bar.rb +27 -0
- data/lib/jekyll/algolia/shrinker.rb +112 -0
- data/lib/jekyll/algolia/utils.rb +118 -2
- data/lib/jekyll/algolia/version.rb +1 -1
- data/lib/jekyll/commands/algolia.rb +3 -14
- metadata +75 -31
- data/errors/invalid_credentials.txt +0 -10
- data/errors/invalid_credentials_for_tmp_index.txt +0 -17
- data/errors/invalid_index_name.txt +0 -11
- data/errors/missing_api_key.txt +0 -17
- data/errors/missing_application_id.txt +0 -12
- data/errors/missing_index_name.txt +0 -19
- data/errors/no_records_found.txt +0 -20
- data/errors/record_too_big.txt +0 -25
- data/errors/unknown_application_id.txt +0 -20
- data/errors/unknown_settings.txt +0 -15
@@ -11,13 +11,15 @@ module Jekyll
|
|
11
11
|
# Public: Extract records from the file
|
12
12
|
#
|
13
13
|
# file - The Jekyll file to process
|
14
|
-
# TOTEST
|
15
14
|
def self.run(file)
|
16
|
-
# Getting all
|
15
|
+
# Getting all nodes from the HTML input
|
17
16
|
raw_records = extract_raw_records(file.content)
|
18
17
|
# Getting file metadata
|
19
18
|
shared_metadata = FileBrowser.metadata(file)
|
20
19
|
|
20
|
+
# If no content, we still index the metadata
|
21
|
+
raw_records = [shared_metadata] if raw_records.empty?
|
22
|
+
|
21
23
|
# Building the list of records
|
22
24
|
records = []
|
23
25
|
raw_records.map do |record|
|
@@ -31,7 +33,7 @@ module Jekyll
|
|
31
33
|
# Apply custom user-defined hooks
|
32
34
|
# Users can return `nil` from the hook to signal we should not index
|
33
35
|
# such a record
|
34
|
-
record = Hooks.apply_each(record, node)
|
36
|
+
record = Hooks.apply_each(record, node, Jekyll::Algolia.site)
|
35
37
|
next if record.nil?
|
36
38
|
|
37
39
|
records << record
|
@@ -48,16 +50,24 @@ module Jekyll
|
|
48
50
|
end
|
49
51
|
|
50
52
|
# Public: Extract raw records from the file, including content for each
|
51
|
-
# node
|
53
|
+
# node and its headings
|
52
54
|
#
|
53
55
|
# content - The HTML content to parse
|
54
56
|
def self.extract_raw_records(content)
|
55
|
-
AlgoliaHTMLExtractor.run(
|
57
|
+
records = AlgoliaHTMLExtractor.run(
|
56
58
|
content,
|
57
59
|
options: {
|
58
|
-
css_selector: Configurator.algolia('nodes_to_index')
|
60
|
+
css_selector: Configurator.algolia('nodes_to_index'),
|
61
|
+
tags_to_exclude: 'script,style,iframe'
|
59
62
|
}
|
60
63
|
)
|
64
|
+
# We remove objectIDs, as the will be added at the very end, after all
|
65
|
+
# the hooks and shrinkage
|
66
|
+
records.each do |record|
|
67
|
+
record.delete(:objectID)
|
68
|
+
end
|
69
|
+
|
70
|
+
records
|
61
71
|
end
|
62
72
|
end
|
63
73
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'algolia_html_extractor'
|
4
|
+
require 'pathname'
|
5
|
+
require 'time'
|
4
6
|
|
5
7
|
module Jekyll
|
6
8
|
module Algolia
|
@@ -13,6 +15,50 @@ module Jekyll
|
|
13
15
|
module FileBrowser
|
14
16
|
include Jekyll::Algolia
|
15
17
|
|
18
|
+
# Public: Return the absolute path of a Jekyll file
|
19
|
+
#
|
20
|
+
# file - The Jekyll file to inspect
|
21
|
+
def self.absolute_path(filepath)
|
22
|
+
pathname = Pathname.new(filepath)
|
23
|
+
return pathname.cleanpath.to_s if pathname.absolute?
|
24
|
+
|
25
|
+
File.expand_path(File.join(Configurator.get('source'), filepath))
|
26
|
+
end
|
27
|
+
|
28
|
+
# Public: Return the path of a Jekyll file relative to the Jekyll source
|
29
|
+
#
|
30
|
+
# file - The Jekyll file to inspect
|
31
|
+
def self.relative_path(filepath)
|
32
|
+
pathname = Pathname.new(filepath)
|
33
|
+
config_source = Configurator.get('source') || ''
|
34
|
+
jekyll_source = Pathname.new(File.expand_path(config_source))
|
35
|
+
|
36
|
+
# Removing any starting ./
|
37
|
+
if pathname.relative?
|
38
|
+
fullpath = File.expand_path(File.join(jekyll_source, pathname))
|
39
|
+
return fullpath.gsub(%r{^#{jekyll_source}/}, '')
|
40
|
+
end
|
41
|
+
|
42
|
+
pathname.relative_path_from(jekyll_source).cleanpath.to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
# Public: Check if the file should be indexed
|
46
|
+
#
|
47
|
+
# file - The Jekyll file
|
48
|
+
#
|
49
|
+
# There are many reasons a file should not be indexed. We need to exclude
|
50
|
+
# all the static assets, only keep the actual content.
|
51
|
+
def self.indexable?(file)
|
52
|
+
return false if static_file?(file)
|
53
|
+
return false if is_404?(file)
|
54
|
+
return false if redirect?(file)
|
55
|
+
return false unless allowed_extension?(file)
|
56
|
+
return false if excluded_from_config?(file)
|
57
|
+
return false if excluded_from_hook?(file)
|
58
|
+
|
59
|
+
true
|
60
|
+
end
|
61
|
+
|
16
62
|
# Public: Check if the specified file is a static Jekyll asset
|
17
63
|
#
|
18
64
|
# file - The Jekyll file
|
@@ -30,20 +76,27 @@ module Jekyll
|
|
30
76
|
# pages. We don't want to index those.
|
31
77
|
# Source: https://help.github.com/articles/creating-a-custom-404-page-for-your-github-pages-site/
|
32
78
|
#
|
33
|
-
# rubocop:disable Naming/PredicateName
|
34
79
|
def self.is_404?(file)
|
35
|
-
|
80
|
+
['404.md', '404.html'].include?(File.basename(file.path))
|
36
81
|
end
|
37
|
-
# rubocop:enable Naming/PredicateName
|
38
82
|
|
39
|
-
# Public: Check if the
|
83
|
+
# Public: Check if the file is redirect page
|
40
84
|
#
|
41
85
|
# file - The Jekyll file
|
42
86
|
#
|
43
|
-
#
|
44
|
-
# We
|
45
|
-
|
46
|
-
|
87
|
+
# Plugins like jekyll-redirect-from add dynamic pages that only contain
|
88
|
+
# an HTML meta refresh. We need to exclude those files from indexing.
|
89
|
+
# https://github.com/jekyll/jekyll-redirect-from
|
90
|
+
def self.redirect?(file)
|
91
|
+
# When using redirect_from, jekyll-redirect-from creates a page named
|
92
|
+
# `redirect.html`
|
93
|
+
return true if file.respond_to?(:name) && file.name == 'redirect.html'
|
94
|
+
# When using redirect_to, it sets the layout to `redirect`
|
95
|
+
if file.respond_to?(:data) && file.data['layout'] == 'redirect'
|
96
|
+
return true
|
97
|
+
end
|
98
|
+
|
99
|
+
false
|
47
100
|
end
|
48
101
|
|
49
102
|
# Public: Check if the file has one of the allowed extensions
|
@@ -55,36 +108,24 @@ module Jekyll
|
|
55
108
|
# and raw HTML files but this list can be extended using the
|
56
109
|
# `extensions_to_index` config option.
|
57
110
|
def self.allowed_extension?(file)
|
58
|
-
extensions = Configurator.
|
111
|
+
extensions = Configurator.extensions_to_index
|
59
112
|
extname = File.extname(file.path)[1..-1]
|
60
113
|
extensions.include?(extname)
|
61
114
|
end
|
62
115
|
|
63
|
-
# Public: Check if the file has been excluded by the user
|
64
|
-
#
|
65
|
-
# file - The Jekyll file
|
66
|
-
#
|
67
|
-
# Files can be excluded either by setting the `files_to_exclude` option,
|
68
|
-
# or by defining a custom hook
|
69
|
-
def self.excluded_by_user?(file)
|
70
|
-
excluded_from_config?(file) || excluded_from_hook?(file)
|
71
|
-
end
|
72
|
-
|
73
116
|
# Public: Check if the file has been excluded by `files_to_exclude`
|
74
117
|
#
|
75
118
|
# file - The Jekyll file
|
76
119
|
def self.excluded_from_config?(file)
|
77
120
|
excluded_patterns = Configurator.algolia('files_to_exclude')
|
78
|
-
|
121
|
+
jekyll_source = Configurator.get('source')
|
122
|
+
path = absolute_path(file.path)
|
79
123
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
excluded_files += Dir.glob(pattern)
|
84
|
-
end
|
124
|
+
excluded_patterns.each do |pattern|
|
125
|
+
pattern = File.expand_path(File.join(jekyll_source, pattern))
|
126
|
+
return true if File.fnmatch(pattern, path, File::FNM_PATHNAME)
|
85
127
|
end
|
86
|
-
|
87
|
-
excluded_files.include?(file.path)
|
128
|
+
false
|
88
129
|
end
|
89
130
|
|
90
131
|
# Public: Check if the file has been excluded by running a custom user
|
@@ -95,34 +136,6 @@ module Jekyll
|
|
95
136
|
Hooks.should_be_excluded?(file.path)
|
96
137
|
end
|
97
138
|
|
98
|
-
# Public: Return the path to the original file, relative from the Jekyll
|
99
|
-
# source
|
100
|
-
#
|
101
|
-
# file - The Jekyll file
|
102
|
-
#
|
103
|
-
# Pages have their .path property relative to the source, but collections
|
104
|
-
# (including posts) have an absolute file path.
|
105
|
-
def self.path_from_root(file)
|
106
|
-
source = Configurator.get('source')
|
107
|
-
file.path.gsub(%r{^#{source}/}, '')
|
108
|
-
end
|
109
|
-
|
110
|
-
# Public: Check if the file should be indexed
|
111
|
-
#
|
112
|
-
# file - The Jekyll file
|
113
|
-
#
|
114
|
-
# There are many reasons a file should not be indexed. We need to exclude
|
115
|
-
# all the static assets, only keep the actual content.
|
116
|
-
def self.indexable?(file)
|
117
|
-
return false if static_file?(file)
|
118
|
-
return false if is_404?(file)
|
119
|
-
return false if pagination_page?(file)
|
120
|
-
return false unless allowed_extension?(file)
|
121
|
-
return false if excluded_by_user?(file)
|
122
|
-
|
123
|
-
true
|
124
|
-
end
|
125
|
-
|
126
139
|
# Public: Return a hash of all the file metadata
|
127
140
|
#
|
128
141
|
# file - The Jekyll file
|
@@ -134,6 +147,8 @@ module Jekyll
|
|
134
147
|
raw_data = raw_data(file)
|
135
148
|
specific_data = {
|
136
149
|
collection: collection(file),
|
150
|
+
tags: tags(file),
|
151
|
+
categories: categories(file),
|
137
152
|
date: date(file),
|
138
153
|
excerpt_html: excerpt_html(file),
|
139
154
|
excerpt_text: excerpt_text(file),
|
@@ -164,10 +179,16 @@ module Jekyll
|
|
164
179
|
data.each_key do |key|
|
165
180
|
data.delete(key) if respond_to?(key)
|
166
181
|
end
|
167
|
-
|
168
|
-
# Also delete keys we manually handle
|
169
182
|
data.delete('excerpt')
|
170
183
|
|
184
|
+
# Delete other keys added by Jekyll that are not in the front-matter and
|
185
|
+
# not needed for search
|
186
|
+
data.delete('draft')
|
187
|
+
data.delete('ext')
|
188
|
+
|
189
|
+
# Convert all values to a version that can be serialized to JSON
|
190
|
+
data = Utils.jsonify(data)
|
191
|
+
|
171
192
|
# Convert all keys to symbols
|
172
193
|
data = Utils.keys_to_symbols(data)
|
173
194
|
|
@@ -196,29 +217,102 @@ module Jekyll
|
|
196
217
|
file.url
|
197
218
|
end
|
198
219
|
|
220
|
+
# Public: Returns the list of tags of a file, defaults to an empty array
|
221
|
+
#
|
222
|
+
# file - The Jekyll file
|
223
|
+
def self.tags(file)
|
224
|
+
file.data['tags'] || []
|
225
|
+
end
|
226
|
+
|
227
|
+
# Public: Returns the list of tags of a file, defaults to an empty array
|
228
|
+
#
|
229
|
+
# file - The Jekyll file
|
230
|
+
def self.categories(file)
|
231
|
+
file.data['categories'] || []
|
232
|
+
end
|
233
|
+
|
199
234
|
# Public: Returns a timestamp of the file date
|
200
235
|
#
|
201
236
|
# file - The Jekyll file
|
202
237
|
#
|
203
|
-
#
|
204
|
-
#
|
205
|
-
#
|
238
|
+
# Posts have their date coming from the filepath, or the front-matter.
|
239
|
+
# Pages and other collection items can only have a date set in
|
240
|
+
# front-matter.
|
206
241
|
def self.date(file)
|
207
|
-
date
|
242
|
+
# Collections get their date from .date, while pages read it from .data.
|
243
|
+
# Jekyll by default will set the date of collection to the current date,
|
244
|
+
# but we monkey-patched that so it returns nil for collection items
|
245
|
+
date = if file.respond_to?(:date)
|
246
|
+
file.date
|
247
|
+
else
|
248
|
+
file.data['date']
|
249
|
+
end
|
250
|
+
|
208
251
|
return nil if date.nil?
|
209
252
|
|
210
|
-
date
|
253
|
+
# If date is a string, we try to parse it
|
254
|
+
if date.is_a? String
|
255
|
+
begin
|
256
|
+
date = Time.parse(date)
|
257
|
+
rescue StandardError
|
258
|
+
return nil
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
date.to_time.to_i
|
211
263
|
end
|
212
264
|
|
213
|
-
# Public: Returns the
|
265
|
+
# Public: Returns the raw excerpt of a file, directly as returned by
|
266
|
+
# Jekyll. Swallow any error that could occur when reading.
|
214
267
|
#
|
215
268
|
# file - The Jekyll file
|
216
269
|
#
|
217
|
-
#
|
270
|
+
# This might throw an exception if the excerpt is invalid. We also
|
271
|
+
# silence all logger output as Jekyll is quite verbose and will display
|
272
|
+
# the potential Liquid error in the terminal, even if we catch the actual
|
273
|
+
# error.
|
274
|
+
def self.excerpt_raw(file)
|
275
|
+
Logger.silent do
|
276
|
+
return file.data['excerpt'].to_s.strip
|
277
|
+
end
|
278
|
+
rescue StandardError
|
279
|
+
nil
|
280
|
+
end
|
281
|
+
|
282
|
+
# Public: Return true if the Jekyll default excerpt should be used for
|
283
|
+
# this file
|
284
|
+
#
|
285
|
+
# file - The Jekyll file
|
286
|
+
#
|
287
|
+
# Most of the time, we'll use our own excerpt (the first matching
|
288
|
+
# element), but in some cases, we'll fallback to Jekyll's default excerpt
|
289
|
+
# if it seems to be what the user wants
|
290
|
+
def self.use_default_excerpt?(file)
|
291
|
+
# Only posts can have excerpt
|
292
|
+
return false unless type(file) == 'post'
|
293
|
+
|
294
|
+
# User defined their own separator in the config
|
295
|
+
custom_separator = file.excerpt_separator.to_s.strip
|
296
|
+
return false if custom_separator.empty?
|
297
|
+
|
298
|
+
# This specific post contains this separator
|
299
|
+
file.content.include?(custom_separator)
|
300
|
+
end
|
301
|
+
|
302
|
+
# Public: Returns the HTML version of the excerpt
|
303
|
+
#
|
304
|
+
# file - The Jekyll file
|
218
305
|
def self.excerpt_html(file)
|
219
|
-
|
220
|
-
return
|
221
|
-
|
306
|
+
# If it's a post with a custom separator for the excerpt, we honor it
|
307
|
+
return excerpt_raw(file) if use_default_excerpt?(file)
|
308
|
+
|
309
|
+
# Otherwise we take the first matching node
|
310
|
+
html = file.content
|
311
|
+
selector = Configurator.algolia('nodes_to_index')
|
312
|
+
first_node = Nokogiri::HTML(html).css(selector).first
|
313
|
+
return nil if first_node.nil?
|
314
|
+
|
315
|
+
first_node.to_s
|
222
316
|
end
|
223
317
|
|
224
318
|
# Public: Returns the text version of the excerpt
|
@@ -228,7 +322,6 @@ module Jekyll
|
|
228
322
|
# Only collections (including posts) have an excerpt. Pages don't.
|
229
323
|
def self.excerpt_text(file)
|
230
324
|
html = excerpt_html(file)
|
231
|
-
return nil if html.nil?
|
232
325
|
Utils.html_to_text(html)
|
233
326
|
end
|
234
327
|
|
data/lib/jekyll/algolia/hooks.rb
CHANGED
@@ -11,8 +11,15 @@ module Jekyll
|
|
11
11
|
#
|
12
12
|
# record - The hash of the record to be pushed
|
13
13
|
# node - The Nokogiri node of the element
|
14
|
-
def self.apply_each(record, node)
|
15
|
-
before_indexing_each
|
14
|
+
def self.apply_each(record, node, context)
|
15
|
+
case method(:before_indexing_each).arity
|
16
|
+
when 1
|
17
|
+
before_indexing_each(record)
|
18
|
+
when 2
|
19
|
+
before_indexing_each(record, node)
|
20
|
+
else
|
21
|
+
before_indexing_each(record, node, context)
|
22
|
+
end
|
16
23
|
end
|
17
24
|
|
18
25
|
# Public: Apply the before_indexing_all hook to all records.
|
@@ -21,8 +28,13 @@ module Jekyll
|
|
21
28
|
# as they can be mocked in tests.
|
22
29
|
#
|
23
30
|
# records - The list of all records to be indexed
|
24
|
-
def self.apply_all(records)
|
25
|
-
before_indexing_all
|
31
|
+
def self.apply_all(records, context)
|
32
|
+
case method(:before_indexing_all).arity
|
33
|
+
when 1
|
34
|
+
before_indexing_all(records)
|
35
|
+
else
|
36
|
+
before_indexing_all(records, context)
|
37
|
+
end
|
26
38
|
end
|
27
39
|
|
28
40
|
# Public: Check if the file should be indexed or not
|
@@ -47,7 +59,7 @@ module Jekyll
|
|
47
59
|
# information from the HTML node.
|
48
60
|
#
|
49
61
|
# Users can return nil to signal that the record should not be indexed
|
50
|
-
def self.before_indexing_each(record, _node)
|
62
|
+
def self.before_indexing_each(record, _node, _context)
|
51
63
|
record
|
52
64
|
end
|
53
65
|
|
@@ -59,7 +71,7 @@ module Jekyll
|
|
59
71
|
# Users can modify the full list from here. It might provide an easier
|
60
72
|
# interface than `hook_before_indexing_each` when knowing the full context
|
61
73
|
# is necessary
|
62
|
-
def self.before_indexing_all(records)
|
74
|
+
def self.before_indexing_all(records, _context)
|
63
75
|
records
|
64
76
|
end
|
65
77
|
end
|
@@ -1,7 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'algoliasearch'
|
4
|
+
require 'yaml'
|
5
|
+
require 'algolia_html_extractor'
|
4
6
|
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
5
8
|
module Jekyll
|
6
9
|
module Algolia
|
7
10
|
# Module to push records to Algolia and configure the index
|
@@ -9,16 +12,60 @@ module Jekyll
|
|
9
12
|
include Jekyll::Algolia
|
10
13
|
|
11
14
|
# Public: Init the module
|
12
|
-
#
|
13
|
-
# This call will instanciate the Algolia API client, set the custom
|
14
|
-
# User Agent and give an easy access to the main index
|
15
15
|
def self.init
|
16
16
|
::Algolia.init(
|
17
17
|
application_id: Configurator.application_id,
|
18
18
|
api_key: Configurator.api_key
|
19
19
|
)
|
20
|
+
index_name = Configurator.index_name
|
21
|
+
@index = ::Algolia::Index.new(index_name)
|
22
|
+
index_object_ids_name = Configurator.index_object_ids_name
|
23
|
+
@index_object_ids = ::Algolia::Index.new(index_object_ids_name)
|
20
24
|
|
21
25
|
set_user_agent
|
26
|
+
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
# Public: Returns the Algolia index object
|
31
|
+
def self.index
|
32
|
+
@index
|
33
|
+
end
|
34
|
+
|
35
|
+
# Public: Returns the Algolia index used to store object ids
|
36
|
+
def self.index_object_ids
|
37
|
+
@index_object_ids
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: Check if an index exists
|
41
|
+
#
|
42
|
+
# index - Index to check
|
43
|
+
#
|
44
|
+
# Note: there is no API endpoint to do that, so we try to get the settings
|
45
|
+
# instead, which will fail if the index does not exist
|
46
|
+
def self.index_exist?(index)
|
47
|
+
index.get_settings
|
48
|
+
true
|
49
|
+
rescue StandardError
|
50
|
+
false
|
51
|
+
end
|
52
|
+
|
53
|
+
# Public: Get the number of records in an index
|
54
|
+
#
|
55
|
+
# index - Index to check
|
56
|
+
#
|
57
|
+
# Note: We'll do an empty query search, to match everything, but we'll
|
58
|
+
# only return the objectID and one element, to get the shortest response
|
59
|
+
# possible. It will still contain the nbHits
|
60
|
+
def self.record_count(index)
|
61
|
+
index.search(
|
62
|
+
'',
|
63
|
+
attributesToRetrieve: 'objectID',
|
64
|
+
distinct: false,
|
65
|
+
hitsPerPage: 1
|
66
|
+
)['nbHits']
|
67
|
+
rescue StandardError
|
68
|
+
0
|
22
69
|
end
|
23
70
|
|
24
71
|
# Public: Set the User-Agent to send to the API
|
@@ -38,74 +85,75 @@ module Jekyll
|
|
38
85
|
::Algolia.set_extra_header('User-Agent', user_agent)
|
39
86
|
end
|
40
87
|
|
41
|
-
# Public:
|
88
|
+
# Public: Get an array of all object IDs stored in the main index
|
42
89
|
#
|
43
|
-
#
|
44
|
-
|
45
|
-
|
46
|
-
|
90
|
+
# Note: As this will be slow (grabbing them 1000 at a time), we display
|
91
|
+
# a progress bar.
|
92
|
+
def self.remote_object_ids_from_main_index
|
93
|
+
Logger.verbose("I:Inspecting existing records in index #{index.name}")
|
47
94
|
|
48
|
-
|
49
|
-
#
|
50
|
-
# index - Algolia Index to update
|
51
|
-
# records - Array of records to update
|
52
|
-
#
|
53
|
-
# New records will be automatically added. Technically existing records
|
54
|
-
# should be updated but this case should never happen as changing a record
|
55
|
-
# content will change its objectID as well.
|
56
|
-
#
|
57
|
-
# Does nothing in dry run mode
|
58
|
-
def self.update_records(index, records)
|
59
|
-
batch_size = Configurator.algolia('indexing_batch_size')
|
60
|
-
records.each_slice(batch_size) do |batch|
|
61
|
-
Logger.log("I:Pushing #{batch.size} records")
|
62
|
-
next if Configurator.dry_run?
|
63
|
-
begin
|
64
|
-
index.add_objects!(batch)
|
65
|
-
rescue StandardError => error
|
66
|
-
ErrorHandler.stop(error, records: records)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
# Public: Delete records whose objectIDs are passed
|
72
|
-
#
|
73
|
-
# index - Algolia Index to target
|
74
|
-
# ids - Array of objectIDs to delete
|
75
|
-
#
|
76
|
-
# Does nothing in dry run mode
|
77
|
-
def self.delete_records_by_id(index, ids)
|
78
|
-
return if ids.empty?
|
79
|
-
Logger.log("I:Deleting #{ids.length} records")
|
80
|
-
return if Configurator.dry_run?
|
95
|
+
list = []
|
81
96
|
|
97
|
+
# As it might take some time, we display a progress bar
|
98
|
+
progress_bar = ProgressBar.create(
|
99
|
+
total: record_count(index),
|
100
|
+
format: 'Inspecting existing records (%j%%) |%B|'
|
101
|
+
)
|
82
102
|
begin
|
83
|
-
index.
|
84
|
-
|
85
|
-
|
103
|
+
index.browse(
|
104
|
+
attributesToRetrieve: 'objectID',
|
105
|
+
hitsPerPage: 1000
|
106
|
+
) do |hit|
|
107
|
+
list << hit['objectID']
|
108
|
+
progress_bar.increment
|
109
|
+
end
|
110
|
+
rescue StandardError
|
111
|
+
return []
|
86
112
|
end
|
113
|
+
|
114
|
+
list.sort
|
87
115
|
end
|
88
116
|
|
89
|
-
# Public:
|
90
|
-
#
|
91
|
-
# index - Algolia Index to target
|
117
|
+
# Public: Get an array of all the object ids, stored in a dedicated
|
118
|
+
# index
|
92
119
|
#
|
93
|
-
#
|
94
|
-
#
|
95
|
-
def self.
|
120
|
+
# Note: This will be very fast. Each record contain 100 object id, so it
|
121
|
+
# will fit in one call each time.
|
122
|
+
def self.remote_object_ids_from_dedicated_index
|
96
123
|
list = []
|
97
124
|
begin
|
98
|
-
|
99
|
-
|
125
|
+
index_object_ids.browse(
|
126
|
+
attributesToRetrieve: 'content',
|
127
|
+
hitsPerPage: 1000
|
128
|
+
) do |hit|
|
129
|
+
list += hit['content']
|
100
130
|
end
|
101
131
|
rescue StandardError
|
102
|
-
# The index might not exist if it's the first time we use the plugin
|
103
|
-
# so we'll consider that it means there are no records there
|
104
132
|
return []
|
105
133
|
end
|
134
|
+
|
106
135
|
list.sort
|
107
136
|
end
|
108
137
|
|
138
|
+
# Public: Returns an array of all the objectIDs in the index
|
139
|
+
#
|
140
|
+
# Note: We use a dedicated index to store the objectIDs for faster
|
141
|
+
# browsing, but if the index does not exist we read the main index.
|
142
|
+
def self.remote_object_ids
|
143
|
+
Logger.log('I:Getting list of existing records')
|
144
|
+
|
145
|
+
# Main index empty, the list is empty no matter what (we don't use the
|
146
|
+
# dedicated index in that case)
|
147
|
+
return [] if record_count(index).zero?
|
148
|
+
|
149
|
+
# Fast version, using the dedicated index
|
150
|
+
has_object_id_index = index_exist?(index_object_ids)
|
151
|
+
return remote_object_ids_from_dedicated_index if has_object_id_index
|
152
|
+
|
153
|
+
# Slow version, browsing the full index
|
154
|
+
remote_object_ids_from_main_index
|
155
|
+
end
|
156
|
+
|
109
157
|
# Public: Returns an array of the local objectIDs
|
110
158
|
#
|
111
159
|
# records - Array of all local records
|
@@ -113,116 +161,211 @@ module Jekyll
|
|
113
161
|
records.map { |record| record[:objectID] }.compact.sort
|
114
162
|
end
|
115
163
|
|
116
|
-
# Public: Update
|
164
|
+
# Public: Update records of the index
|
117
165
|
#
|
118
|
-
#
|
119
|
-
# settings - The hash of settings to pass to the index
|
166
|
+
# records - All records extracted from Jekyll
|
120
167
|
#
|
168
|
+
# Note: All operations will be done in one batch, assuring an atomic
|
169
|
+
# update
|
121
170
|
# Does nothing in dry run mode
|
122
|
-
def self.
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
171
|
+
def self.update_records(records)
|
172
|
+
# Getting list of objectID in remote and locally
|
173
|
+
remote_ids = remote_object_ids
|
174
|
+
local_ids = local_object_ids(records)
|
175
|
+
|
176
|
+
# Making a diff, to see what to add and what to delete
|
177
|
+
ids_to_delete = remote_ids - local_ids
|
178
|
+
ids_to_add = local_ids - remote_ids
|
179
|
+
|
180
|
+
# What changes should we do to the indexes?
|
181
|
+
has_records_to_update = !ids_to_delete.empty? || !ids_to_add.empty?
|
182
|
+
has_object_id_index = index_exist?(index_object_ids)
|
183
|
+
|
184
|
+
# Stop if nothing to change
|
185
|
+
if !has_records_to_update && has_object_id_index
|
186
|
+
Logger.log('I:Content is already up to date.')
|
187
|
+
return
|
188
|
+
end
|
189
|
+
|
190
|
+
# We group all operations into one batch
|
191
|
+
operations = []
|
192
|
+
|
193
|
+
# We update records only if there are records to update
|
194
|
+
if has_records_to_update
|
195
|
+
Logger.log("I:Updating records in index #{index.name}...")
|
196
|
+
Logger.log("I:Records to delete: #{ids_to_delete.length}")
|
197
|
+
Logger.log("I:Records to add: #{ids_to_add.length}")
|
198
|
+
|
199
|
+
# Transforming ids into real records to add
|
200
|
+
records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
|
201
|
+
records_to_add = ids_to_add.map { |id| records_by_id[id] }
|
202
|
+
|
203
|
+
# Deletion operations come first, to avoid hitting an overquota too
|
204
|
+
# soon if it can be avoided
|
205
|
+
ids_to_delete.each do |object_id|
|
206
|
+
operations << {
|
207
|
+
action: 'deleteObject', indexName: index.name,
|
208
|
+
body: { objectID: object_id }
|
209
|
+
}
|
210
|
+
end
|
211
|
+
# Then we add the new records
|
212
|
+
operations += records_to_add.map do |new_record|
|
213
|
+
{ action: 'addObject', indexName: index.name, body: new_record }
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# We update the dedicated index everytime we update records, but we also
|
218
|
+
# create it if it does not exist
|
219
|
+
should_update_object_id_index = has_records_to_update ||
|
220
|
+
!has_object_id_index
|
221
|
+
if should_update_object_id_index
|
222
|
+
operations << { action: 'clear', indexName: index_object_ids.name }
|
223
|
+
local_ids.each_slice(100).each do |ids|
|
224
|
+
operations << {
|
225
|
+
action: 'addObject', indexName: index_object_ids.name,
|
226
|
+
body: { content: ids }
|
227
|
+
}
|
228
|
+
end
|
129
229
|
end
|
230
|
+
|
231
|
+
execute_operations(operations)
|
130
232
|
end
|
131
233
|
|
132
|
-
# Public:
|
234
|
+
# Public: Execute a serie of operations in a batch
|
133
235
|
#
|
134
|
-
#
|
236
|
+
# operations - Operations to batch
|
135
237
|
#
|
136
|
-
#
|
137
|
-
#
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
index = index(Configurator.index_name)
|
142
|
-
|
143
|
-
# Update settings
|
144
|
-
update_settings(index, Configurator.settings)
|
238
|
+
# Note: Will split the batch in several calls if too big, and will display
|
239
|
+
# a progress bar if this happens
|
240
|
+
def self.execute_operations(operations)
|
241
|
+
return if Configurator.dry_run?
|
242
|
+
return if operations.empty?
|
145
243
|
|
146
|
-
#
|
147
|
-
|
148
|
-
|
244
|
+
# Run the batches in slices if they are too large
|
245
|
+
batch_size = Configurator.algolia('indexing_batch_size')
|
246
|
+
slices = operations.each_slice(batch_size).to_a
|
149
247
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
248
|
+
should_have_progress_bar = (slices.length > 1)
|
249
|
+
if should_have_progress_bar
|
250
|
+
progress_bar = ProgressBar.create(
|
251
|
+
total: slices.length,
|
252
|
+
format: 'Updating index (%j%%) |%B|'
|
253
|
+
)
|
155
254
|
end
|
156
255
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
delete_records_by_id(index, old_records_ids)
|
256
|
+
slices.each do |slice|
|
257
|
+
begin
|
258
|
+
::Algolia.batch!(slice)
|
161
259
|
|
162
|
-
|
163
|
-
|
164
|
-
|
260
|
+
progress_bar.increment if should_have_progress_bar
|
261
|
+
rescue StandardError => e
|
262
|
+
ErrorHandler.stop(e, operations: slice)
|
263
|
+
end
|
165
264
|
end
|
166
|
-
|
265
|
+
end
|
167
266
|
|
168
|
-
|
267
|
+
# Public: Get a unique settingID for the current settings
|
268
|
+
#
|
269
|
+
# The settingID is generated as a hash of the current settings. As it will
|
270
|
+
# be stored in the userData key of the resulting config, we exclude that
|
271
|
+
# key from the hashing.
|
272
|
+
def self.local_setting_id
|
273
|
+
settings = Configurator.settings
|
274
|
+
settings.delete('userData')
|
275
|
+
AlgoliaHTMLExtractor.uuid(settings)
|
169
276
|
end
|
170
277
|
|
171
278
|
# Public: Get the settings of the remote index
|
172
279
|
#
|
173
|
-
# index
|
174
|
-
def self.remote_settings
|
280
|
+
# In case the index is not accessible, it will return nil
|
281
|
+
def self.remote_settings
|
175
282
|
index.get_settings
|
176
|
-
rescue StandardError
|
177
|
-
|
283
|
+
rescue StandardError
|
284
|
+
nil
|
178
285
|
end
|
179
286
|
|
180
|
-
# Public:
|
287
|
+
# Public: Smart update of the settings of the index
|
181
288
|
#
|
182
|
-
#
|
183
|
-
#
|
289
|
+
# This will first compare the settings about to be pushed with the
|
290
|
+
# settings already pushed. It will compare userData.settingID for that.
|
291
|
+
# If the settingID is the same, we don't push as this won't change
|
292
|
+
# anything. We will still check if the remote config seem to have been
|
293
|
+
# manually altered though, and warn the user that this is not the
|
294
|
+
# preferred way of doing so.
|
184
295
|
#
|
185
|
-
#
|
186
|
-
|
187
|
-
|
188
|
-
return if Configurator.
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
296
|
+
# If the settingID are not matching, it means our config is different, so
|
297
|
+
# we push it, overriding the settingID for next push.
|
298
|
+
def self.update_settings
|
299
|
+
return if Configurator.settings.empty?
|
300
|
+
|
301
|
+
current_remote_settings = remote_settings || {}
|
302
|
+
remote_setting_id = current_remote_settings.dig('userData', 'settingID')
|
303
|
+
|
304
|
+
settings = Configurator.settings
|
305
|
+
setting_id = local_setting_id
|
306
|
+
|
307
|
+
are_settings_forced = Configurator.force_settings?
|
308
|
+
|
309
|
+
# The config we're about to push is the same we pushed previously. We
|
310
|
+
# won't push again.
|
311
|
+
if setting_id == remote_setting_id && !are_settings_forced
|
312
|
+
Logger.log('I:Settings are already up to date.')
|
313
|
+
# Check if remote config has been changed outside of the plugin, so we
|
314
|
+
# can warn users that they should not alter their config from outside
|
315
|
+
# of the plugin.
|
316
|
+
current_remote_settings.delete('userData')
|
317
|
+
changed_keys = Utils.diff_keys(settings, current_remote_settings)
|
318
|
+
unless changed_keys.nil?
|
319
|
+
warn_of_manual_dashboard_editing(changed_keys)
|
320
|
+
end
|
195
321
|
|
196
|
-
|
197
|
-
|
198
|
-
# records - Array of records to push
|
199
|
-
#
|
200
|
-
# The `atomic` indexing mode will push all records to a brand new index,
|
201
|
-
# configure it, and then overwrite the previous index with this new one.
|
202
|
-
# For the end-user, it will make all the changes in one go, making sure
|
203
|
-
# people are always searching into a fully configured index. It will
|
204
|
-
# consume more operations, but will never leave the index in a transient
|
205
|
-
# state.
|
206
|
-
def self.run_atomic_mode(records)
|
207
|
-
index_name = Configurator.index_name
|
208
|
-
index = index(index_name)
|
209
|
-
index_tmp_name = "#{Configurator.index_name}_tmp"
|
210
|
-
index_tmp = index(index_tmp_name)
|
322
|
+
return
|
323
|
+
end
|
211
324
|
|
212
|
-
|
325
|
+
# Settings have changed, we push them
|
326
|
+
settings['userData'] = {
|
327
|
+
'settingID' => setting_id,
|
328
|
+
'pluginVersion' => VERSION
|
329
|
+
}
|
213
330
|
|
214
|
-
|
215
|
-
|
216
|
-
new_settings = remote_settings.merge(Configurator.settings)
|
217
|
-
update_settings(index_tmp, new_settings)
|
331
|
+
Logger.log("I:Updating settings of index #{index.name}")
|
332
|
+
return if Configurator.dry_run?
|
218
333
|
|
219
|
-
|
220
|
-
|
334
|
+
set_settings(settings)
|
335
|
+
end
|
221
336
|
|
222
|
-
|
223
|
-
|
337
|
+
# Public: Set new settings to an index
|
338
|
+
#
|
339
|
+
# Will dispatch to the error handler if it fails
|
340
|
+
# rubocop:disable Naming/AccessorMethodName
|
341
|
+
def self.set_settings(settings)
|
342
|
+
index.set_settings!(settings)
|
343
|
+
rescue StandardError => e
|
344
|
+
ErrorHandler.stop(e, settings: settings)
|
345
|
+
end
|
346
|
+
# rubocop:enable Naming/AccessorMethodName
|
224
347
|
|
225
|
-
|
348
|
+
# Public: Warn users that they have some settings manually configured in
|
349
|
+
# their dashboard
|
350
|
+
#
|
351
|
+
# When users change some settings in their dashboard, those settings might
|
352
|
+
# get overwritten by the plugin. We can't prevent that, but we can warn
|
353
|
+
# them when we detect they changed something.
|
354
|
+
def self.warn_of_manual_dashboard_editing(changed_keys)
|
355
|
+
# Transform the hash into readable YAML
|
356
|
+
yaml_lines = changed_keys
|
357
|
+
.to_yaml(indentation: 2)
|
358
|
+
.split("\n")[1..-1]
|
359
|
+
yaml_lines.map! do |line|
|
360
|
+
line = line.gsub(/^ */) { |spaces| ' ' * spaces.length }
|
361
|
+
line = line.gsub('- ', ' - ')
|
362
|
+
"W: #{line}"
|
363
|
+
end
|
364
|
+
Logger.known_message(
|
365
|
+
'settings_manually_edited',
|
366
|
+
settings: yaml_lines.join("\n"),
|
367
|
+
index_name: Configurator.index_name
|
368
|
+
)
|
226
369
|
end
|
227
370
|
|
228
371
|
# Public: Push all records to Algolia and configure the index
|
@@ -231,10 +374,8 @@ module Jekyll
|
|
231
374
|
def self.run(records)
|
232
375
|
init
|
233
376
|
|
234
|
-
record_count = records.length
|
235
|
-
|
236
377
|
# Indexing zero record is surely a misconfiguration
|
237
|
-
if
|
378
|
+
if records.length.zero?
|
238
379
|
files_to_exclude = Configurator.algolia('files_to_exclude').join(', ')
|
239
380
|
Logger.known_message(
|
240
381
|
'no_records_found',
|
@@ -244,15 +385,12 @@ module Jekyll
|
|
244
385
|
exit 1
|
245
386
|
end
|
246
387
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
run_diff_mode(records)
|
252
|
-
when 'atomic'
|
253
|
-
run_atomic_mode(records)
|
254
|
-
end
|
388
|
+
update_settings
|
389
|
+
update_records(records)
|
390
|
+
|
391
|
+
Logger.log('I:✔ Indexing complete')
|
255
392
|
end
|
256
393
|
end
|
257
394
|
end
|
258
395
|
end
|
396
|
+
# rubocop:enable Metrics/ModuleLength
|