jekyll-algolia 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CONTRIBUTING.md +94 -0
- data/README.md +99 -0
- data/errors/invalid_credentials.txt +10 -0
- data/errors/invalid_credentials_for_tmp_index.txt +17 -0
- data/errors/invalid_index_name.txt +11 -0
- data/errors/missing_api_key.txt +17 -0
- data/errors/missing_application_id.txt +12 -0
- data/errors/missing_index_name.txt +19 -0
- data/errors/no_records_found.txt +20 -0
- data/errors/record_too_big.txt +25 -0
- data/errors/unknown_application_id.txt +20 -0
- data/errors/unknown_settings.txt +15 -0
- data/lib/jekyll-algolia.rb +107 -0
- data/lib/jekyll/algolia/configurator.rb +202 -0
- data/lib/jekyll/algolia/error_handler.rb +270 -0
- data/lib/jekyll/algolia/extractor.rb +64 -0
- data/lib/jekyll/algolia/file_browser.rb +269 -0
- data/lib/jekyll/algolia/hooks.rb +67 -0
- data/lib/jekyll/algolia/indexer.rb +258 -0
- data/lib/jekyll/algolia/logger.rb +63 -0
- data/lib/jekyll/algolia/utils.rb +68 -0
- data/lib/jekyll/algolia/version.rb +7 -0
- data/lib/jekyll/commands/algolia.rb +49 -0
- metadata +304 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'algolia_html_extractor'
|
4
|
+
|
5
|
+
module Jekyll
|
6
|
+
module Algolia
|
7
|
+
# Module to extract records from Jekyll files
|
8
|
+
module Extractor
|
9
|
+
include Jekyll::Algolia
|
10
|
+
|
11
|
+
# Public: Extract records from the file
|
12
|
+
#
|
13
|
+
# file - The Jekyll file to process
|
14
|
+
# TOTEST
|
15
|
+
def self.run(file)
|
16
|
+
# Getting all hierarchical nodes from the HTML input
|
17
|
+
raw_records = extract_raw_records(file.content)
|
18
|
+
# Getting file metadata
|
19
|
+
shared_metadata = FileBrowser.metadata(file)
|
20
|
+
|
21
|
+
# Building the list of records
|
22
|
+
records = []
|
23
|
+
raw_records.map do |record|
|
24
|
+
# We do not need to pass the HTML node element to the final record
|
25
|
+
node = record[:node]
|
26
|
+
record.delete(:node)
|
27
|
+
|
28
|
+
# Merging each record info with file info
|
29
|
+
record = Utils.compact_empty(record.merge(shared_metadata))
|
30
|
+
|
31
|
+
# Apply custom user-defined hooks
|
32
|
+
# Users can return `nil` from the hook to signal we should not index
|
33
|
+
# such a record
|
34
|
+
record = Hooks.apply_each(record, node)
|
35
|
+
next if record.nil?
|
36
|
+
|
37
|
+
records << record
|
38
|
+
end
|
39
|
+
|
40
|
+
records
|
41
|
+
end
|
42
|
+
|
43
|
+
# Public: Adds a unique :objectID field to the hash, representing the
|
44
|
+
# current content of the record
|
45
|
+
def self.add_unique_object_id(record)
|
46
|
+
record[:objectID] = AlgoliaHTMLExtractor.uuid(record)
|
47
|
+
record
|
48
|
+
end
|
49
|
+
|
50
|
+
# Public: Extract raw records from the file, including content for each
|
51
|
+
# node to index and hierarchy
|
52
|
+
#
|
53
|
+
# content - The HTML content to parse
|
54
|
+
def self.extract_raw_records(content)
|
55
|
+
AlgoliaHTMLExtractor.run(
|
56
|
+
content,
|
57
|
+
options: {
|
58
|
+
css_selector: Configurator.algolia('nodes_to_index')
|
59
|
+
}
|
60
|
+
)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,269 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'algolia_html_extractor'
|
4
|
+
|
5
|
+
module Jekyll
|
6
|
+
module Algolia
|
7
|
+
# Module to get information about Jekyll file. Jekyll handles posts, pages,
|
8
|
+
# collection, etc. They each need specific processing, so knowing which kind
|
9
|
+
# of file we're working on will help.
|
10
|
+
#
|
11
|
+
# We also do not index all files. This module will help in defining which
|
12
|
+
# files should be indexed and which should not.
|
13
|
+
module FileBrowser
|
14
|
+
include Jekyll::Algolia
|
15
|
+
|
16
|
+
# Public: Check if the specified file is a static Jekyll asset
|
17
|
+
#
|
18
|
+
# file - The Jekyll file
|
19
|
+
#
|
20
|
+
# We don't index static assets (js, css, images)
|
21
|
+
def self.static_file?(file)
|
22
|
+
file.is_a?(Jekyll::StaticFile)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Public: Check if the file is a 404 error page
|
26
|
+
#
|
27
|
+
# file - The Jekyll file
|
28
|
+
#
|
29
|
+
# 404 pages are not Jekyll defaults but a convention adopted by GitHub
|
30
|
+
# pages. We don't want to index those.
|
31
|
+
# Source: https://help.github.com/articles/creating-a-custom-404-page-for-your-github-pages-site/
|
32
|
+
#
|
33
|
+
# rubocop:disable Naming/PredicateName
|
34
|
+
def self.is_404?(file)
|
35
|
+
File.basename(file.path, File.extname(file.path)) == '404'
|
36
|
+
end
|
37
|
+
# rubocop:enable Naming/PredicateName
|
38
|
+
|
39
|
+
# Public: Check if the page is a pagination page
|
40
|
+
#
|
41
|
+
# file - The Jekyll file
|
42
|
+
#
|
43
|
+
# `jekyll-paginate` automatically creates pages to paginate through posts.
|
44
|
+
# We don't want to index those
|
45
|
+
def self.pagination_page?(file)
|
46
|
+
Utils.match?(file.path, %r{page([0-9]*)/index\.html$})
|
47
|
+
end
|
48
|
+
|
49
|
+
# Public: Check if the file has one of the allowed extensions
|
50
|
+
#
|
51
|
+
# file - The Jekyll file
|
52
|
+
#
|
53
|
+
# Jekyll can transform markdown files to HTML by default. With plugins, it
|
54
|
+
# can convert many more file formats. By default we'll only index markdown
|
55
|
+
# and raw HTML files but this list can be extended using the
|
56
|
+
# `extensions_to_index` config option.
|
57
|
+
def self.allowed_extension?(file)
|
58
|
+
extensions = Configurator.algolia('extensions_to_index')
|
59
|
+
extname = File.extname(file.path)[1..-1]
|
60
|
+
extensions.include?(extname)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Public: Check if the file has been excluded by the user
|
64
|
+
#
|
65
|
+
# file - The Jekyll file
|
66
|
+
#
|
67
|
+
# Files can be excluded either by setting the `files_to_exclude` option,
|
68
|
+
# or by defining a custom hook
|
69
|
+
def self.excluded_by_user?(file)
|
70
|
+
excluded_from_config?(file) || excluded_from_hook?(file)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Public: Check if the file has been excluded by `files_to_exclude`
|
74
|
+
#
|
75
|
+
# file - The Jekyll file
|
76
|
+
def self.excluded_from_config?(file)
|
77
|
+
excluded_patterns = Configurator.algolia('files_to_exclude')
|
78
|
+
excluded_files = []
|
79
|
+
|
80
|
+
# Transform the glob patterns into a real list of files
|
81
|
+
Dir.chdir(Configurator.get('source')) do
|
82
|
+
excluded_patterns.each do |pattern|
|
83
|
+
excluded_files += Dir.glob(pattern)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
excluded_files.include?(file.path)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Public: Check if the file has been excluded by running a custom user
|
91
|
+
# hook
|
92
|
+
#
|
93
|
+
# file - The Jekyll file
|
94
|
+
def self.excluded_from_hook?(file)
|
95
|
+
Hooks.should_be_excluded?(file.path)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Public: Return the path to the original file, relative from the Jekyll
|
99
|
+
# source
|
100
|
+
#
|
101
|
+
# file - The Jekyll file
|
102
|
+
#
|
103
|
+
# Pages have their .path property relative to the source, but collections
|
104
|
+
# (including posts) have an absolute file path.
|
105
|
+
def self.path_from_root(file)
|
106
|
+
source = Configurator.get('source')
|
107
|
+
file.path.gsub(%r{^#{source}/}, '')
|
108
|
+
end
|
109
|
+
|
110
|
+
# Public: Check if the file should be indexed
|
111
|
+
#
|
112
|
+
# file - The Jekyll file
|
113
|
+
#
|
114
|
+
# There are many reasons a file should not be indexed. We need to exclude
|
115
|
+
# all the static assets, only keep the actual content.
|
116
|
+
def self.indexable?(file)
|
117
|
+
return false if static_file?(file)
|
118
|
+
return false if is_404?(file)
|
119
|
+
return false if pagination_page?(file)
|
120
|
+
return false unless allowed_extension?(file)
|
121
|
+
return false if excluded_by_user?(file)
|
122
|
+
|
123
|
+
true
|
124
|
+
end
|
125
|
+
|
126
|
+
# Public: Return a hash of all the file metadata
|
127
|
+
#
|
128
|
+
# file - The Jekyll file
|
129
|
+
#
|
130
|
+
# It contains both the raw metadata extracted from the front-matter, as
|
131
|
+
# well as more specific fields like the collection name, date timestamp,
|
132
|
+
# slug, type and url
|
133
|
+
def self.metadata(file)
|
134
|
+
raw_data = raw_data(file)
|
135
|
+
specific_data = {
|
136
|
+
collection: collection(file),
|
137
|
+
date: date(file),
|
138
|
+
excerpt_html: excerpt_html(file),
|
139
|
+
excerpt_text: excerpt_text(file),
|
140
|
+
slug: slug(file),
|
141
|
+
type: type(file),
|
142
|
+
url: url(file)
|
143
|
+
}
|
144
|
+
|
145
|
+
metadata = Utils.compact_empty(raw_data.merge(specific_data))
|
146
|
+
|
147
|
+
metadata
|
148
|
+
end
|
149
|
+
|
150
|
+
# Public: Return a hash of all the raw data, as defined in the
|
151
|
+
# front-matter and including default values
|
152
|
+
#
|
153
|
+
# file - The Jekyll file
|
154
|
+
#
|
155
|
+
# Any custom data passed to the front-matter will be returned by this
|
156
|
+
# method. It ignores any key where we have a better, custom, getter.
|
157
|
+
|
158
|
+
# Note that even if you define tags and categories in a collection item,
|
159
|
+
# it will not be included in the data. It's always an empty array.
|
160
|
+
def self.raw_data(file)
|
161
|
+
data = file.data.clone
|
162
|
+
|
163
|
+
# Remove all keys where we have a specific getter
|
164
|
+
data.each_key do |key|
|
165
|
+
data.delete(key) if respond_to?(key)
|
166
|
+
end
|
167
|
+
|
168
|
+
# Also delete keys we manually handle
|
169
|
+
data.delete('excerpt')
|
170
|
+
|
171
|
+
# Convert all keys to symbols
|
172
|
+
data = Utils.keys_to_symbols(data)
|
173
|
+
|
174
|
+
data
|
175
|
+
end
|
176
|
+
|
177
|
+
# Public: Get the type of the document (page, post, collection, etc)
|
178
|
+
#
|
179
|
+
# file - The Jekyll file
|
180
|
+
#
|
181
|
+
# Pages are simple html and markdown documents in the tree
|
182
|
+
# Elements from a collection are called Documents
|
183
|
+
# Posts are a custom kind of Documents
|
184
|
+
def self.type(file)
|
185
|
+
type = file.class.name.split('::')[-1].downcase
|
186
|
+
|
187
|
+
type = 'post' if type == 'document' && file.collection.label == 'posts'
|
188
|
+
|
189
|
+
type
|
190
|
+
end
|
191
|
+
|
192
|
+
# Public: Returns the url of the file, starting from the root
|
193
|
+
#
|
194
|
+
# file - The Jekyll file
|
195
|
+
def self.url(file)
|
196
|
+
file.url
|
197
|
+
end
|
198
|
+
|
199
|
+
# Public: Returns a timestamp of the file date
|
200
|
+
#
|
201
|
+
# file - The Jekyll file
|
202
|
+
#
|
203
|
+
# All collections have a date, either taken from the filename, or the
|
204
|
+
# `date` config set in the front-matter. Even if none is set, the current
|
205
|
+
# date is taken by default.
|
206
|
+
def self.date(file)
|
207
|
+
date = file.data['date']
|
208
|
+
return nil if date.nil?
|
209
|
+
|
210
|
+
date.to_i
|
211
|
+
end
|
212
|
+
|
213
|
+
# Public: Returns the HTML version of the excerpt
|
214
|
+
#
|
215
|
+
# file - The Jekyll file
|
216
|
+
#
|
217
|
+
# Only collections (including posts) have an excerpt. Pages don't.
|
218
|
+
def self.excerpt_html(file)
|
219
|
+
excerpt = file.data['excerpt']
|
220
|
+
return nil if excerpt.nil?
|
221
|
+
excerpt.to_s.tr("\n", ' ').strip
|
222
|
+
end
|
223
|
+
|
224
|
+
# Public: Returns the text version of the excerpt
|
225
|
+
#
|
226
|
+
# file - The Jekyll file
|
227
|
+
#
|
228
|
+
# Only collections (including posts) have an excerpt. Pages don't.
|
229
|
+
def self.excerpt_text(file)
|
230
|
+
html = excerpt_html(file)
|
231
|
+
return nil if html.nil?
|
232
|
+
Utils.html_to_text(html)
|
233
|
+
end
|
234
|
+
|
235
|
+
# Public: Returns the slug of the file
|
236
|
+
#
|
237
|
+
# file - The Jekyll file
|
238
|
+
#
|
239
|
+
# Slugs can be automatically extracted from collections, but for other
|
240
|
+
# files, we have to create them from the basename
|
241
|
+
def self.slug(file)
|
242
|
+
# We get the real slug from the file data if available
|
243
|
+
return file.data['slug'] if file.data.key?('slug')
|
244
|
+
|
245
|
+
# We create it ourselves from the filepath otherwise
|
246
|
+
File.basename(file.path, File.extname(file.path)).downcase
|
247
|
+
end
|
248
|
+
|
249
|
+
# Public: Returns the name of the collection
|
250
|
+
#
|
251
|
+
# file - The Jekyll file
|
252
|
+
#
|
253
|
+
# Only collection documents can have a collection name. Pages don't. Posts
|
254
|
+
# are purposefully excluded from it as well even if they are technically
|
255
|
+
# part of a collection
|
256
|
+
def self.collection(file)
|
257
|
+
return nil unless file.respond_to?(:collection)
|
258
|
+
|
259
|
+
collection_name = file.collection.label
|
260
|
+
|
261
|
+
# Posts are a special kind of collection, but it's an implementation
|
262
|
+
# detail from my POV, so I'll exclude them
|
263
|
+
return nil if collection_name == 'posts'
|
264
|
+
|
265
|
+
collection_name
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module Algolia
|
5
|
+
# Applying user-defined hooks on the processing pipeline
|
6
|
+
module Hooks
|
7
|
+
# Public: Apply the before_indexing_each hook to the record.
|
8
|
+
# This method is a simple wrapper around methods that can be overwritten
|
9
|
+
# by users. Using a wrapper around it makes testing their behavior easier
|
10
|
+
# as they can be mocked in tests.
|
11
|
+
#
|
12
|
+
# record - The hash of the record to be pushed
|
13
|
+
# node - The Nokogiri node of the element
|
14
|
+
def self.apply_each(record, node)
|
15
|
+
before_indexing_each(record, node)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Public: Apply the before_indexing_all hook to all records.
|
19
|
+
# This method is a simple wrapper around methods that can be overwritten
|
20
|
+
# by users. Using a wrapper around it makes testing their behavior easier
|
21
|
+
# as they can be mocked in tests.
|
22
|
+
#
|
23
|
+
# records - The list of all records to be indexed
|
24
|
+
def self.apply_all(records)
|
25
|
+
before_indexing_all(records)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Public: Check if the file should be indexed or not
|
29
|
+
#
|
30
|
+
# filepath - The path to the file, before transformation
|
31
|
+
#
|
32
|
+
# This hook allow users to define if a specific file should be indexed or
|
33
|
+
# not. Basic exclusion can be done through the `files_to_exclude` option,
|
34
|
+
# but a custom hook like this one can allow more fine-grained
|
35
|
+
# customisation.
|
36
|
+
def self.should_be_excluded?(_filepath)
|
37
|
+
false
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: Custom method to be run on the record before indexing it
|
41
|
+
#
|
42
|
+
# record - The hash of the record to be pushed
|
43
|
+
# node - The Nokogiri node of the element
|
44
|
+
#
|
45
|
+
# Users can modify the record (adding/editing/removing keys) here. It can
|
46
|
+
# be used to remove keys that should not be indexed, or access more
|
47
|
+
# information from the HTML node.
|
48
|
+
#
|
49
|
+
# Users can return nil to signal that the record should not be indexed
|
50
|
+
def self.before_indexing_each(record, _node)
|
51
|
+
record
|
52
|
+
end
|
53
|
+
|
54
|
+
# Public: Custom method to be run on the list of all records before
|
55
|
+
# indexing them
|
56
|
+
#
|
57
|
+
# records - The list of all records to be indexed
|
58
|
+
#
|
59
|
+
# Users can modify the full list from here. It might provide an easier
|
60
|
+
# interface than `hook_before_indexing_each` when knowing the full context
|
61
|
+
# is necessary
|
62
|
+
def self.before_indexing_all(records)
|
63
|
+
records
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,258 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'algoliasearch'
|
4
|
+
|
5
|
+
module Jekyll
|
6
|
+
module Algolia
|
7
|
+
# Module to push records to Algolia and configure the index
|
8
|
+
module Indexer
|
9
|
+
include Jekyll::Algolia
|
10
|
+
|
11
|
+
# Public: Init the module
|
12
|
+
#
|
13
|
+
# This call will instanciate the Algolia API client, set the custom
|
14
|
+
# User Agent and give an easy access to the main index
|
15
|
+
def self.init
|
16
|
+
::Algolia.init(
|
17
|
+
application_id: Configurator.application_id,
|
18
|
+
api_key: Configurator.api_key
|
19
|
+
)
|
20
|
+
|
21
|
+
set_user_agent
|
22
|
+
end
|
23
|
+
|
24
|
+
# Public: Set the User-Agent to send to the API
|
25
|
+
#
|
26
|
+
# Every integrations should follow the "YYY Integration" pattern, and
|
27
|
+
# every API client should follow the "Algolia for YYY" pattern. Even if
|
28
|
+
# each integration version is pinned to a specific API client version, we
|
29
|
+
# are explicit in defining it to help debug from the dashboard.
|
30
|
+
def self.set_user_agent
|
31
|
+
user_agent = [
|
32
|
+
"Jekyll Integration (#{VERSION})",
|
33
|
+
"Algolia for Ruby (#{::Algolia::VERSION})",
|
34
|
+
"Jekyll (#{::Jekyll::VERSION})",
|
35
|
+
"Ruby (#{RUBY_VERSION})"
|
36
|
+
].join('; ')
|
37
|
+
|
38
|
+
::Algolia.set_extra_header('User-Agent', user_agent)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Public: Returns an Algolia Index object from an index name
|
42
|
+
#
|
43
|
+
# index_name - String name of the index
|
44
|
+
def self.index(index_name)
|
45
|
+
::Algolia::Index.new(index_name)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Public: Update records of the specified index
|
49
|
+
#
|
50
|
+
# index - Algolia Index to update
|
51
|
+
# records - Array of records to update
|
52
|
+
#
|
53
|
+
# New records will be automatically added. Technically existing records
|
54
|
+
# should be updated but this case should never happen as changing a record
|
55
|
+
# content will change its objectID as well.
|
56
|
+
#
|
57
|
+
# Does nothing in dry run mode
|
58
|
+
def self.update_records(index, records)
|
59
|
+
batch_size = Configurator.algolia('indexing_batch_size')
|
60
|
+
records.each_slice(batch_size) do |batch|
|
61
|
+
Logger.log("I:Pushing #{batch.size} records")
|
62
|
+
next if Configurator.dry_run?
|
63
|
+
begin
|
64
|
+
index.add_objects!(batch)
|
65
|
+
rescue StandardError => error
|
66
|
+
ErrorHandler.stop(error, records: records)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Public: Delete records whose objectIDs are passed
|
72
|
+
#
|
73
|
+
# index - Algolia Index to target
|
74
|
+
# ids - Array of objectIDs to delete
|
75
|
+
#
|
76
|
+
# Does nothing in dry run mode
|
77
|
+
def self.delete_records_by_id(index, ids)
|
78
|
+
return if ids.empty?
|
79
|
+
Logger.log("I:Deleting #{ids.length} records")
|
80
|
+
return if Configurator.dry_run?
|
81
|
+
|
82
|
+
begin
|
83
|
+
index.delete_objects!(ids)
|
84
|
+
rescue StandardError => error
|
85
|
+
ErrorHandler.stop(error)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Public: Returns an array of all the objectIDs in the index
|
90
|
+
#
|
91
|
+
# index - Algolia Index to target
|
92
|
+
#
|
93
|
+
# The returned array is sorted. It won't have any impact on the way it is
|
94
|
+
# processed, but makes debugging easier when comparing arrays is needed.
|
95
|
+
def self.remote_object_ids(index)
|
96
|
+
list = []
|
97
|
+
begin
|
98
|
+
index.browse(attributesToRetrieve: 'objectID') do |hit|
|
99
|
+
list << hit['objectID']
|
100
|
+
end
|
101
|
+
rescue StandardError
|
102
|
+
# The index might not exist if it's the first time we use the plugin
|
103
|
+
# so we'll consider that it means there are no records there
|
104
|
+
return []
|
105
|
+
end
|
106
|
+
list.sort
|
107
|
+
end
|
108
|
+
|
109
|
+
# Public: Returns an array of the local objectIDs
|
110
|
+
#
|
111
|
+
# records - Array of all local records
|
112
|
+
def self.local_object_ids(records)
|
113
|
+
records.map { |record| record[:objectID] }.compact.sort
|
114
|
+
end
|
115
|
+
|
116
|
+
# Public: Update settings of the index
|
117
|
+
#
|
118
|
+
# index - The Algolia Index
|
119
|
+
# settings - The hash of settings to pass to the index
|
120
|
+
#
|
121
|
+
# Does nothing in dry run mode
|
122
|
+
def self.update_settings(index, settings)
|
123
|
+
Logger.verbose('I:Updating settings')
|
124
|
+
return if Configurator.dry_run?
|
125
|
+
begin
|
126
|
+
index.set_settings(settings)
|
127
|
+
rescue StandardError => error
|
128
|
+
ErrorHandler.stop(error, settings: settings)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Public: Index content following the `diff` indexing mode
|
133
|
+
#
|
134
|
+
# records - Array of local records
|
135
|
+
#
|
136
|
+
# The `diff` indexing mode will only push new content to the index and
|
137
|
+
# remove old content from it. It won't touch records that haven't been
|
138
|
+
# updated. It will be a bit slower as it will first need to get the list
|
139
|
+
# of all records in the index, but it will consume less operations.
|
140
|
+
def self.run_diff_mode(records)
|
141
|
+
index = index(Configurator.index_name)
|
142
|
+
|
143
|
+
# Update settings
|
144
|
+
update_settings(index, Configurator.settings)
|
145
|
+
|
146
|
+
# Getting list of objectID in remote and locally
|
147
|
+
remote_ids = remote_object_ids(index)
|
148
|
+
local_ids = local_object_ids(records)
|
149
|
+
|
150
|
+
old_records_ids = remote_ids - local_ids
|
151
|
+
new_records_ids = local_ids - remote_ids
|
152
|
+
if old_records_ids.empty? && new_records_ids.empty?
|
153
|
+
Logger.log('I:Nothing to index. Your content is already up to date.')
|
154
|
+
return
|
155
|
+
end
|
156
|
+
|
157
|
+
Logger.log('I:Pushing records to Algolia...')
|
158
|
+
|
159
|
+
# Delete remote records that are no longer available locally
|
160
|
+
delete_records_by_id(index, old_records_ids)
|
161
|
+
|
162
|
+
# Add only records that are not yet already in the remote
|
163
|
+
new_records = records.select do |record|
|
164
|
+
new_records_ids.include?(record[:objectID])
|
165
|
+
end
|
166
|
+
update_records(index, new_records)
|
167
|
+
|
168
|
+
Logger.log('I:✔ Indexing complete')
|
169
|
+
end
|
170
|
+
|
171
|
+
# Public: Get the settings of the remote index
|
172
|
+
#
|
173
|
+
# index - The Algolia Index
|
174
|
+
def self.remote_settings(index)
|
175
|
+
index.get_settings
|
176
|
+
rescue StandardError => error
|
177
|
+
ErrorHandler.stop(error)
|
178
|
+
end
|
179
|
+
|
180
|
+
# Public: Rename an index
|
181
|
+
#
|
182
|
+
# old_name - Current name of the index
|
183
|
+
# new_name - New name of the index
|
184
|
+
#
|
185
|
+
# Does nothing in dry run mode
|
186
|
+
def self.rename_index(old_name, new_name)
|
187
|
+
Logger.verbose("I:Renaming `#{old_name}` to `#{new_name}`")
|
188
|
+
return if Configurator.dry_run?
|
189
|
+
begin
|
190
|
+
::Algolia.move_index(old_name, new_name)
|
191
|
+
rescue StandardError => error
|
192
|
+
ErrorHandler.stop(error, new_name: new_name)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
# Public: Index content following the `atomic` indexing mode
|
197
|
+
#
|
198
|
+
# records - Array of records to push
|
199
|
+
#
|
200
|
+
# The `atomic` indexing mode will push all records to a brand new index,
|
201
|
+
# configure it, and then overwrite the previous index with this new one.
|
202
|
+
# For the end-user, it will make all the changes in one go, making sure
|
203
|
+
# people are always searching into a fully configured index. It will
|
204
|
+
# consume more operations, but will never leave the index in a transient
|
205
|
+
# state.
|
206
|
+
def self.run_atomic_mode(records)
|
207
|
+
index_name = Configurator.index_name
|
208
|
+
index = index(index_name)
|
209
|
+
index_tmp_name = "#{Configurator.index_name}_tmp"
|
210
|
+
index_tmp = index(index_tmp_name)
|
211
|
+
|
212
|
+
Logger.verbose("I:Using `#{index_tmp_name}` as temporary index")
|
213
|
+
|
214
|
+
# Copying original settings to the new index
|
215
|
+
remote_settings = remote_settings(index)
|
216
|
+
new_settings = remote_settings.merge(Configurator.settings)
|
217
|
+
update_settings(index_tmp, new_settings)
|
218
|
+
|
219
|
+
# Pushing everthing to a brand new index
|
220
|
+
update_records(index_tmp, records)
|
221
|
+
|
222
|
+
# Renaming the new index in place of the old
|
223
|
+
rename_index(index_tmp_name, index_name)
|
224
|
+
|
225
|
+
Logger.log('I:✔ Indexing complete')
|
226
|
+
end
|
227
|
+
|
228
|
+
# Public: Push all records to Algolia and configure the index
|
229
|
+
#
|
230
|
+
# records - Records to push
|
231
|
+
def self.run(records)
|
232
|
+
init
|
233
|
+
|
234
|
+
record_count = records.length
|
235
|
+
|
236
|
+
# Indexing zero record is surely a misconfiguration
|
237
|
+
if record_count.zero?
|
238
|
+
files_to_exclude = Configurator.algolia('files_to_exclude').join(', ')
|
239
|
+
Logger.known_message(
|
240
|
+
'no_records_found',
|
241
|
+
'files_to_exclude' => files_to_exclude,
|
242
|
+
'nodes_to_index' => Configurator.algolia('nodes_to_index')
|
243
|
+
)
|
244
|
+
exit 1
|
245
|
+
end
|
246
|
+
|
247
|
+
indexing_mode = Configurator.indexing_mode
|
248
|
+
Logger.verbose("I:Indexing mode: #{indexing_mode}")
|
249
|
+
case indexing_mode
|
250
|
+
when 'diff'
|
251
|
+
run_diff_mode(records)
|
252
|
+
when 'atomic'
|
253
|
+
run_atomic_mode(records)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|