jekyll-algolia 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CONTRIBUTING.md +94 -0
- data/README.md +99 -0
- data/errors/invalid_credentials.txt +10 -0
- data/errors/invalid_credentials_for_tmp_index.txt +17 -0
- data/errors/invalid_index_name.txt +11 -0
- data/errors/missing_api_key.txt +17 -0
- data/errors/missing_application_id.txt +12 -0
- data/errors/missing_index_name.txt +19 -0
- data/errors/no_records_found.txt +20 -0
- data/errors/record_too_big.txt +25 -0
- data/errors/unknown_application_id.txt +20 -0
- data/errors/unknown_settings.txt +15 -0
- data/lib/jekyll-algolia.rb +107 -0
- data/lib/jekyll/algolia/configurator.rb +202 -0
- data/lib/jekyll/algolia/error_handler.rb +270 -0
- data/lib/jekyll/algolia/extractor.rb +64 -0
- data/lib/jekyll/algolia/file_browser.rb +269 -0
- data/lib/jekyll/algolia/hooks.rb +67 -0
- data/lib/jekyll/algolia/indexer.rb +258 -0
- data/lib/jekyll/algolia/logger.rb +63 -0
- data/lib/jekyll/algolia/utils.rb +68 -0
- data/lib/jekyll/algolia/version.rb +7 -0
- data/lib/jekyll/commands/algolia.rb +49 -0
- metadata +304 -0
@@ -0,0 +1,202 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module Algolia
|
5
|
+
# Single source of truth for access to configuration variables
|
6
|
+
module Configurator
|
7
|
+
include Jekyll::Algolia
|
8
|
+
|
9
|
+
# Algolia default values
|
10
|
+
ALGOLIA_DEFAULTS = {
|
11
|
+
'extensions_to_index' => nil,
|
12
|
+
'files_to_exclude' => nil,
|
13
|
+
'nodes_to_index' => 'p',
|
14
|
+
'indexing_batch_size' => 1000,
|
15
|
+
'indexing_mode' => 'diff',
|
16
|
+
'settings' => {
|
17
|
+
'distinct' => true,
|
18
|
+
'attributeForDistinct' => 'url',
|
19
|
+
'attributesForFaceting' => %w[
|
20
|
+
searchable(tags)
|
21
|
+
searchable(type)
|
22
|
+
searchable(title)
|
23
|
+
],
|
24
|
+
'customRanking' => [
|
25
|
+
'desc(date)',
|
26
|
+
'desc(weight.heading)',
|
27
|
+
'asc(weight.position)'
|
28
|
+
],
|
29
|
+
'highlightPreTag' => '<em class="ais-Highlight">',
|
30
|
+
'highlightPostTag' => '</em>',
|
31
|
+
'searchableAttributes' => %w[
|
32
|
+
title
|
33
|
+
hierarchy.lvl0
|
34
|
+
hierarchy.lvl1
|
35
|
+
hierarchy.lvl2
|
36
|
+
hierarchy.lvl3
|
37
|
+
hierarchy.lvl4
|
38
|
+
hierarchy.lvl5
|
39
|
+
unordered(content)
|
40
|
+
collection,unordered(categories),unordered(tags)
|
41
|
+
],
|
42
|
+
# We want to allow highlight in more keys than what we search on
|
43
|
+
'attributesToHighlight' => %w[
|
44
|
+
title
|
45
|
+
hierarchy.lvl0
|
46
|
+
hierarchy.lvl1
|
47
|
+
hierarchy.lvl2
|
48
|
+
hierarchy.lvl3
|
49
|
+
hierarchy.lvl4
|
50
|
+
hierarchy.lvl5
|
51
|
+
content
|
52
|
+
html
|
53
|
+
collection
|
54
|
+
categories
|
55
|
+
tags
|
56
|
+
]
|
57
|
+
}
|
58
|
+
}.freeze
|
59
|
+
|
60
|
+
# Public: Get the value of a specific Jekyll configuration option
|
61
|
+
#
|
62
|
+
# key - Key to read
|
63
|
+
#
|
64
|
+
# Returns the value of this configuration option, nil otherwise
|
65
|
+
def self.get(key)
|
66
|
+
Jekyll::Algolia.config[key]
|
67
|
+
end
|
68
|
+
|
69
|
+
# Public: Get the value of a specific Algolia configuration option, or
|
70
|
+
# revert to the default value otherwise
|
71
|
+
#
|
72
|
+
# key - Algolia key to read
|
73
|
+
#
|
74
|
+
# Returns the value of this option, or the default value
|
75
|
+
def self.algolia(key)
|
76
|
+
config = get('algolia') || {}
|
77
|
+
value = config[key] || ALGOLIA_DEFAULTS[key]
|
78
|
+
|
79
|
+
# No value found but we have a method to define the default value
|
80
|
+
if value.nil? && respond_to?("default_#{key}")
|
81
|
+
value = send("default_#{key}")
|
82
|
+
end
|
83
|
+
|
84
|
+
value
|
85
|
+
end
|
86
|
+
|
87
|
+
# Public: Return the application id
|
88
|
+
#
|
89
|
+
# Will first try to read the ENV variable, and fallback to the one
|
90
|
+
# configured in Jekyll config
|
91
|
+
def self.application_id
|
92
|
+
ENV['ALGOLIA_APPLICATION_ID'] || algolia('application_id')
|
93
|
+
end
|
94
|
+
|
95
|
+
# Public: Return the api key
|
96
|
+
#
|
97
|
+
# Will first try to read the ENV variable. Will otherwise try to read the
|
98
|
+
# _algolia_api_key file in the Jekyll folder
|
99
|
+
def self.api_key
|
100
|
+
# Alway taking the ENV variable first
|
101
|
+
return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
|
102
|
+
|
103
|
+
# Reading from file on disk otherwise
|
104
|
+
source_dir = get('source')
|
105
|
+
if source_dir
|
106
|
+
api_key_file = File.join(source_dir, '_algolia_api_key')
|
107
|
+
if File.exist?(api_key_file) && File.size(api_key_file).positive?
|
108
|
+
return File.open(api_key_file).read.strip
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
nil
|
113
|
+
end
|
114
|
+
|
115
|
+
# Public: Return the index name
|
116
|
+
#
|
117
|
+
# Will first try to read the ENV variable, and fallback to the one
|
118
|
+
# configured in Jekyll config
|
119
|
+
def self.index_name
|
120
|
+
ENV['ALGOLIA_INDEX_NAME'] || algolia('index_name')
|
121
|
+
end
|
122
|
+
|
123
|
+
# Public: Get the index settings
|
124
|
+
#
|
125
|
+
# This will be a merge of default settings and the one defined in the
|
126
|
+
# _config.yml file
|
127
|
+
def self.settings
|
128
|
+
user_settings = algolia('settings') || {}
|
129
|
+
ALGOLIA_DEFAULTS['settings'].merge(user_settings)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Public: Return the current indexing mode
|
133
|
+
#
|
134
|
+
# Default mode is `diff`, but users can configure their own by updating
|
135
|
+
# the `indexing_mode` config in _config.yml. The only other authorized
|
136
|
+
# value is `atomic`. If an unrecognized mode is defined, it defaults to
|
137
|
+
# `diff`.
|
138
|
+
def self.indexing_mode
|
139
|
+
mode = algolia('indexing_mode') || ALGOLIA_DEFAULTS['indexing_mode']
|
140
|
+
return 'diff' unless %w[diff atomic].include?(mode)
|
141
|
+
mode
|
142
|
+
end
|
143
|
+
|
144
|
+
# Public: Check that all credentials are set
|
145
|
+
#
|
146
|
+
# Returns true if everything is ok, false otherwise. Will display helpful
|
147
|
+
# error messages for each missing credential
|
148
|
+
def self.assert_valid_credentials
|
149
|
+
checks = %w[application_id index_name api_key]
|
150
|
+
checks.each do |check|
|
151
|
+
if send(check.to_sym).nil?
|
152
|
+
Logger.known_message("missing_#{check}")
|
153
|
+
return false
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
true
|
158
|
+
end
|
159
|
+
|
160
|
+
# Public: Setting a default values to index only html and markdown files
|
161
|
+
#
|
162
|
+
# Markdown files can have many different extensions. We keep the one
|
163
|
+
# defined in the Jekyll config
|
164
|
+
def self.default_extensions_to_index
|
165
|
+
['html'] + get('markdown_ext').split(',')
|
166
|
+
end
|
167
|
+
|
168
|
+
# Public: Setting a default value to ignore index.html/index.md files in
|
169
|
+
# the root
|
170
|
+
#
|
171
|
+
# Chances are high that the main page is not worthy of indexing (it can be
|
172
|
+
# the list of the most recent posts or some landing page without much
|
173
|
+
# content). We ignore it by default.
|
174
|
+
#
|
175
|
+
# User can still add it by manually specifying a `files_to_exclude` to an
|
176
|
+
# empty array
|
177
|
+
def self.default_files_to_exclude
|
178
|
+
algolia('extensions_to_index').map do |extension|
|
179
|
+
"index.#{extension}"
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Public: Returns true if the command is run in verbose mode
|
184
|
+
#
|
185
|
+
# When set to true, more logs will be displayed
|
186
|
+
def self.verbose?
|
187
|
+
value = get('verbose')
|
188
|
+
return true if value == true
|
189
|
+
false
|
190
|
+
end
|
191
|
+
|
192
|
+
# Public: Returns true if the command is run in verbose mode
|
193
|
+
#
|
194
|
+
# When set to true, no indexing operations will be sent to the API
|
195
|
+
def self.dry_run?
|
196
|
+
value = get('dry_run')
|
197
|
+
return true if value == true
|
198
|
+
false
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
@@ -0,0 +1,270 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'verbal_expressions'
|
4
|
+
require 'filesize'
|
5
|
+
require 'cgi'
|
6
|
+
|
7
|
+
module Jekyll
|
8
|
+
module Algolia
|
9
|
+
# Catch API errors and display messages
|
10
|
+
module ErrorHandler
|
11
|
+
include Jekyll::Algolia
|
12
|
+
|
13
|
+
# Public: Stop the execution of the plugin and display if possible
|
14
|
+
# a human-readable error message
|
15
|
+
#
|
16
|
+
# error - The caught error
|
17
|
+
# context - A hash of values that will be passed from where the error
|
18
|
+
# happened to the display
|
19
|
+
def self.stop(error, context = {})
|
20
|
+
Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
|
21
|
+
Logger.verbose("E:[jekyll-algolia] Context: #{context}")
|
22
|
+
|
23
|
+
identified_error = identify(error, context)
|
24
|
+
|
25
|
+
if identified_error == false
|
26
|
+
Logger.log('E:[jekyll-algolia] Error:')
|
27
|
+
Logger.log("E:#{error}")
|
28
|
+
else
|
29
|
+
Logger.known_message(
|
30
|
+
identified_error[:name],
|
31
|
+
identified_error[:details]
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
exit 1
|
36
|
+
end
|
37
|
+
|
38
|
+
# Public: Will identify the error and return its internal name
|
39
|
+
#
|
40
|
+
# error - The caught error
|
41
|
+
# context - A hash of additional information that can be passed from the
|
42
|
+
# code intercepting the user
|
43
|
+
#
|
44
|
+
# It will parse in order all potential known issues until it finds one
|
45
|
+
# that matches. Returns false if no match, or a hash of :name and :details
|
46
|
+
# further identifying the issue.
|
47
|
+
def self.identify(error, context = {})
|
48
|
+
known_errors = %w[
|
49
|
+
unknown_application_id
|
50
|
+
invalid_credentials_for_tmp_index
|
51
|
+
invalid_credentials
|
52
|
+
record_too_big
|
53
|
+
unknown_settings
|
54
|
+
invalid_index_name
|
55
|
+
]
|
56
|
+
|
57
|
+
# Checking the errors against our known list
|
58
|
+
known_errors.each do |potential_error|
|
59
|
+
error_check = send("#{potential_error}?", error, context)
|
60
|
+
next if error_check == false
|
61
|
+
return {
|
62
|
+
name: potential_error,
|
63
|
+
details: error_check
|
64
|
+
}
|
65
|
+
end
|
66
|
+
false
|
67
|
+
end
|
68
|
+
|
69
|
+
# Public: Parses an Algolia error message into a hash of its content
|
70
|
+
#
|
71
|
+
# message - The raw message as returned by the API
|
72
|
+
#
|
73
|
+
# Returns a hash of all parts of the message, to be more easily consumed
|
74
|
+
# by our error matchers
|
75
|
+
def self.error_hash(message)
|
76
|
+
message = message.delete("\n")
|
77
|
+
|
78
|
+
# Ex: Cannot PUT to https://appid.algolia.net/1/indexes/index_name/settings:
|
79
|
+
# {"message":"Invalid Application-ID or API key","status":403} (403)
|
80
|
+
regex = VerEx.new do
|
81
|
+
find 'Cannot '
|
82
|
+
capture('verb') { word }
|
83
|
+
find ' to '
|
84
|
+
capture('scheme') { word }
|
85
|
+
find '://'
|
86
|
+
capture('application_id') { word }
|
87
|
+
anything_but '/'
|
88
|
+
find '/'
|
89
|
+
capture('api_version') { digit }
|
90
|
+
find '/'
|
91
|
+
capture('api_section') { word }
|
92
|
+
find '/'
|
93
|
+
capture('index_name') do
|
94
|
+
anything_but('/')
|
95
|
+
end
|
96
|
+
find '/'
|
97
|
+
capture do
|
98
|
+
capture('api_action') { word }
|
99
|
+
maybe '?'
|
100
|
+
capture('query_parameters') do
|
101
|
+
anything_but(':')
|
102
|
+
end
|
103
|
+
end
|
104
|
+
find ': '
|
105
|
+
capture('json') do
|
106
|
+
find '{'
|
107
|
+
anything_but('}')
|
108
|
+
find '}'
|
109
|
+
end
|
110
|
+
find ' ('
|
111
|
+
capture('http_error') { word }
|
112
|
+
find ')'
|
113
|
+
end
|
114
|
+
|
115
|
+
matches = regex.match(message)
|
116
|
+
return false unless matches
|
117
|
+
|
118
|
+
# Convert matches to a hash
|
119
|
+
hash = {}
|
120
|
+
matches.names.each do |name|
|
121
|
+
hash[name] = matches[name]
|
122
|
+
end
|
123
|
+
|
124
|
+
hash['api_version'] = hash['api_version'].to_i
|
125
|
+
hash['http_error'] = hash['http_error'].to_i
|
126
|
+
|
127
|
+
# Merging the JSON key directly in the answer
|
128
|
+
hash = hash.merge(JSON.parse(hash['json']))
|
129
|
+
hash.delete('json')
|
130
|
+
# Merging the query parameters in the answer
|
131
|
+
CGI.parse(hash['query_parameters']).each do |key, values|
|
132
|
+
hash[key] = values[0]
|
133
|
+
end
|
134
|
+
hash.delete('query_parameters')
|
135
|
+
|
136
|
+
hash
|
137
|
+
end
|
138
|
+
|
139
|
+
# Public: Check if the application id is available
|
140
|
+
#
|
141
|
+
# _context - Not used
|
142
|
+
#
|
143
|
+
# If the call to the cluster fails, chances are that the application ID
|
144
|
+
# is invalid. As we cannot actually contact the server, the error is raw
|
145
|
+
# and does not follow our error spec
|
146
|
+
def self.unknown_application_id?(error, _context = {})
|
147
|
+
message = error.message
|
148
|
+
return false if message !~ /^Cannot reach any host/
|
149
|
+
|
150
|
+
matches = /.*\((.*)\.algolia.net.*/.match(message)
|
151
|
+
|
152
|
+
# The API will browse on APP_ID-dsn, but push/delete on APP_ID only
|
153
|
+
# We need to catch both potential errors
|
154
|
+
app_id = matches[1].gsub(/-dsn$/, '')
|
155
|
+
|
156
|
+
{ 'application_id' => app_id }
|
157
|
+
end
|
158
|
+
|
159
|
+
# Public: Check if credentials specifically can't access the _tmp index
|
160
|
+
#
|
161
|
+
# _context - Not used
|
162
|
+
#
|
163
|
+
# If the error happens on a _tmp folder, it might mean that the key does
|
164
|
+
# not have access to the _tmp indices and the error message will reflect
|
165
|
+
# that.
|
166
|
+
def self.invalid_credentials_for_tmp_index?(error, _context = {})
|
167
|
+
details = error_hash(error.message)
|
168
|
+
|
169
|
+
index_name_tmp = details['index_name']
|
170
|
+
if details['message'] != 'Index not allowed with this API key' ||
|
171
|
+
index_name_tmp !~ /_tmp$/
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
|
175
|
+
{
|
176
|
+
'application_id' => Configurator.application_id,
|
177
|
+
'index_name' => Configurator.index_name,
|
178
|
+
'index_name_tmp' => index_name_tmp
|
179
|
+
}
|
180
|
+
end
|
181
|
+
|
182
|
+
# Public: Check if the credentials are working
|
183
|
+
#
|
184
|
+
# _context - Not used
|
185
|
+
#
|
186
|
+
# Application ID and API key submitted don't match any credentials known
|
187
|
+
def self.invalid_credentials?(error, _context = {})
|
188
|
+
details = error_hash(error.message)
|
189
|
+
|
190
|
+
if details['message'] != 'Invalid Application-ID or API key'
|
191
|
+
return false
|
192
|
+
end
|
193
|
+
|
194
|
+
{
|
195
|
+
'application_id' => details['application_id']
|
196
|
+
}
|
197
|
+
end
|
198
|
+
|
199
|
+
# Public: Check if the sent records are not too big
|
200
|
+
#
|
201
|
+
# context[:records] - list of records to push
|
202
|
+
#
|
203
|
+
# Records cannot weight more that 10Kb. If we're getting this error it
|
204
|
+
# means that one of the records is too big, so we'll try to give
|
205
|
+
# informations about it so the user can debug it.
|
206
|
+
def self.record_too_big?(error, context = {})
|
207
|
+
details = error_hash(error.message)
|
208
|
+
|
209
|
+
message = details['message']
|
210
|
+
return false if message !~ /^Record .* is too big .*/
|
211
|
+
|
212
|
+
# Getting the record size
|
213
|
+
size, = /.*size=(.*) bytes.*/.match(message).captures
|
214
|
+
size = Filesize.from("#{size} B").pretty
|
215
|
+
object_id = details['objectID']
|
216
|
+
|
217
|
+
# Getting record details
|
218
|
+
record = Utils.find_by_key(context[:records], :objectID, object_id)
|
219
|
+
|
220
|
+
{
|
221
|
+
'object_id' => object_id,
|
222
|
+
'object_title' => record[:title],
|
223
|
+
'object_url' => record[:url],
|
224
|
+
'object_hint' => record[:content][0..100],
|
225
|
+
'nodes_to_index' => Configurator.algolia('nodes_to_index'),
|
226
|
+
'size' => size,
|
227
|
+
'size_limit' => '10 Kb'
|
228
|
+
}
|
229
|
+
end
|
230
|
+
|
231
|
+
# Public: Check if one of the index settings is invalid
|
232
|
+
#
|
233
|
+
# context[:settings] - The settings passed to update the index
|
234
|
+
#
|
235
|
+
# The API will block any call that tries to update a setting value that is
|
236
|
+
# not available. We'll tell the user which one so they can fix their
|
237
|
+
# issue.
|
238
|
+
def self.unknown_settings?(error, context = {})
|
239
|
+
details = error_hash(error.message)
|
240
|
+
|
241
|
+
message = details['message']
|
242
|
+
return false if message !~ /^Invalid object attributes.*/
|
243
|
+
|
244
|
+
# Getting the unknown setting name
|
245
|
+
regex = /^Invalid object attributes: (.*) near line.*/
|
246
|
+
setting_name, = regex.match(message).captures
|
247
|
+
setting_value = context[:settings][setting_name]
|
248
|
+
|
249
|
+
{
|
250
|
+
'setting_name' => setting_name,
|
251
|
+
'setting_value' => setting_value
|
252
|
+
}
|
253
|
+
end
|
254
|
+
|
255
|
+
# Public: Check if the index name is invalid
|
256
|
+
#
|
257
|
+
# Some characters are forbidden in index names
|
258
|
+
def self.invalid_index_name?(error, _context = {})
|
259
|
+
details = error_hash(error.message)
|
260
|
+
|
261
|
+
message = details['message']
|
262
|
+
return false if message !~ /^indexName is not valid.*/
|
263
|
+
|
264
|
+
{
|
265
|
+
'index_name' => Configurator.index_name
|
266
|
+
}
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|