jekyll-algolia 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module Algolia
5
+ # Single source of truth for access to configuration variables
6
+ module Configurator
7
+ include Jekyll::Algolia
8
+
9
+ # Algolia default values
10
+ ALGOLIA_DEFAULTS = {
11
+ 'extensions_to_index' => nil,
12
+ 'files_to_exclude' => nil,
13
+ 'nodes_to_index' => 'p',
14
+ 'indexing_batch_size' => 1000,
15
+ 'indexing_mode' => 'diff',
16
+ 'settings' => {
17
+ 'distinct' => true,
18
+ 'attributeForDistinct' => 'url',
19
+ 'attributesForFaceting' => %w[
20
+ searchable(tags)
21
+ searchable(type)
22
+ searchable(title)
23
+ ],
24
+ 'customRanking' => [
25
+ 'desc(date)',
26
+ 'desc(weight.heading)',
27
+ 'asc(weight.position)'
28
+ ],
29
+ 'highlightPreTag' => '<em class="ais-Highlight">',
30
+ 'highlightPostTag' => '</em>',
31
+ 'searchableAttributes' => %w[
32
+ title
33
+ hierarchy.lvl0
34
+ hierarchy.lvl1
35
+ hierarchy.lvl2
36
+ hierarchy.lvl3
37
+ hierarchy.lvl4
38
+ hierarchy.lvl5
39
+ unordered(content)
40
+ collection,unordered(categories),unordered(tags)
41
+ ],
42
+ # We want to allow highlight in more keys than what we search on
43
+ 'attributesToHighlight' => %w[
44
+ title
45
+ hierarchy.lvl0
46
+ hierarchy.lvl1
47
+ hierarchy.lvl2
48
+ hierarchy.lvl3
49
+ hierarchy.lvl4
50
+ hierarchy.lvl5
51
+ content
52
+ html
53
+ collection
54
+ categories
55
+ tags
56
+ ]
57
+ }
58
+ }.freeze
59
+
60
+ # Public: Get the value of a specific Jekyll configuration option
61
+ #
62
+ # key - Key to read
63
+ #
64
+ # Returns the value of this configuration option, nil otherwise
65
+ def self.get(key)
66
+ Jekyll::Algolia.config[key]
67
+ end
68
+
69
+ # Public: Get the value of a specific Algolia configuration option, or
70
+ # revert to the default value otherwise
71
+ #
72
+ # key - Algolia key to read
73
+ #
74
+ # Returns the value of this option, or the default value
75
+ def self.algolia(key)
76
+ config = get('algolia') || {}
77
+ value = config[key] || ALGOLIA_DEFAULTS[key]
78
+
79
+ # No value found but we have a method to define the default value
80
+ if value.nil? && respond_to?("default_#{key}")
81
+ value = send("default_#{key}")
82
+ end
83
+
84
+ value
85
+ end
86
+
87
+ # Public: Return the application id
88
+ #
89
+ # Will first try to read the ENV variable, and fallback to the one
90
+ # configured in Jekyll config
91
+ def self.application_id
92
+ ENV['ALGOLIA_APPLICATION_ID'] || algolia('application_id')
93
+ end
94
+
95
+ # Public: Return the api key
96
+ #
97
+ # Will first try to read the ENV variable. Will otherwise try to read the
98
+ # _algolia_api_key file in the Jekyll folder
99
+ def self.api_key
100
+ # Alway taking the ENV variable first
101
+ return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
102
+
103
+ # Reading from file on disk otherwise
104
+ source_dir = get('source')
105
+ if source_dir
106
+ api_key_file = File.join(source_dir, '_algolia_api_key')
107
+ if File.exist?(api_key_file) && File.size(api_key_file).positive?
108
+ return File.open(api_key_file).read.strip
109
+ end
110
+ end
111
+
112
+ nil
113
+ end
114
+
115
+ # Public: Return the index name
116
+ #
117
+ # Will first try to read the ENV variable, and fallback to the one
118
+ # configured in Jekyll config
119
+ def self.index_name
120
+ ENV['ALGOLIA_INDEX_NAME'] || algolia('index_name')
121
+ end
122
+
123
+ # Public: Get the index settings
124
+ #
125
+ # This will be a merge of default settings and the one defined in the
126
+ # _config.yml file
127
+ def self.settings
128
+ user_settings = algolia('settings') || {}
129
+ ALGOLIA_DEFAULTS['settings'].merge(user_settings)
130
+ end
131
+
132
+ # Public: Return the current indexing mode
133
+ #
134
+ # Default mode is `diff`, but users can configure their own by updating
135
+ # the `indexing_mode` config in _config.yml. The only other authorized
136
+ # value is `atomic`. If an unrecognized mode is defined, it defaults to
137
+ # `diff`.
138
+ def self.indexing_mode
139
+ mode = algolia('indexing_mode') || ALGOLIA_DEFAULTS['indexing_mode']
140
+ return 'diff' unless %w[diff atomic].include?(mode)
141
+ mode
142
+ end
143
+
144
+ # Public: Check that all credentials are set
145
+ #
146
+ # Returns true if everything is ok, false otherwise. Will display helpful
147
+ # error messages for each missing credential
148
+ def self.assert_valid_credentials
149
+ checks = %w[application_id index_name api_key]
150
+ checks.each do |check|
151
+ if send(check.to_sym).nil?
152
+ Logger.known_message("missing_#{check}")
153
+ return false
154
+ end
155
+ end
156
+
157
+ true
158
+ end
159
+
160
+ # Public: Setting a default values to index only html and markdown files
161
+ #
162
+ # Markdown files can have many different extensions. We keep the one
163
+ # defined in the Jekyll config
164
+ def self.default_extensions_to_index
165
+ ['html'] + get('markdown_ext').split(',')
166
+ end
167
+
168
+ # Public: Setting a default value to ignore index.html/index.md files in
169
+ # the root
170
+ #
171
+ # Chances are high that the main page is not worthy of indexing (it can be
172
+ # the list of the most recent posts or some landing page without much
173
+ # content). We ignore it by default.
174
+ #
175
+ # User can still add it by manually specifying a `files_to_exclude` to an
176
+ # empty array
177
+ def self.default_files_to_exclude
178
+ algolia('extensions_to_index').map do |extension|
179
+ "index.#{extension}"
180
+ end
181
+ end
182
+
183
+ # Public: Returns true if the command is run in verbose mode
184
+ #
185
+ # When set to true, more logs will be displayed
186
+ def self.verbose?
187
+ value = get('verbose')
188
+ return true if value == true
189
+ false
190
+ end
191
+
192
+ # Public: Returns true if the command is run in verbose mode
193
+ #
194
+ # When set to true, no indexing operations will be sent to the API
195
+ def self.dry_run?
196
+ value = get('dry_run')
197
+ return true if value == true
198
+ false
199
+ end
200
+ end
201
+ end
202
+ end
@@ -0,0 +1,270 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'verbal_expressions'
4
+ require 'filesize'
5
+ require 'cgi'
6
+
7
+ module Jekyll
8
+ module Algolia
9
+ # Catch API errors and display messages
10
+ module ErrorHandler
11
+ include Jekyll::Algolia
12
+
13
+ # Public: Stop the execution of the plugin and display if possible
14
+ # a human-readable error message
15
+ #
16
+ # error - The caught error
17
+ # context - A hash of values that will be passed from where the error
18
+ # happened to the display
19
+ def self.stop(error, context = {})
20
+ Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
21
+ Logger.verbose("E:[jekyll-algolia] Context: #{context}")
22
+
23
+ identified_error = identify(error, context)
24
+
25
+ if identified_error == false
26
+ Logger.log('E:[jekyll-algolia] Error:')
27
+ Logger.log("E:#{error}")
28
+ else
29
+ Logger.known_message(
30
+ identified_error[:name],
31
+ identified_error[:details]
32
+ )
33
+ end
34
+
35
+ exit 1
36
+ end
37
+
38
+ # Public: Will identify the error and return its internal name
39
+ #
40
+ # error - The caught error
41
+ # context - A hash of additional information that can be passed from the
42
+ # code intercepting the user
43
+ #
44
+ # It will parse in order all potential known issues until it finds one
45
+ # that matches. Returns false if no match, or a hash of :name and :details
46
+ # further identifying the issue.
47
+ def self.identify(error, context = {})
48
+ known_errors = %w[
49
+ unknown_application_id
50
+ invalid_credentials_for_tmp_index
51
+ invalid_credentials
52
+ record_too_big
53
+ unknown_settings
54
+ invalid_index_name
55
+ ]
56
+
57
+ # Checking the errors against our known list
58
+ known_errors.each do |potential_error|
59
+ error_check = send("#{potential_error}?", error, context)
60
+ next if error_check == false
61
+ return {
62
+ name: potential_error,
63
+ details: error_check
64
+ }
65
+ end
66
+ false
67
+ end
68
+
69
+ # Public: Parses an Algolia error message into a hash of its content
70
+ #
71
+ # message - The raw message as returned by the API
72
+ #
73
+ # Returns a hash of all parts of the message, to be more easily consumed
74
+ # by our error matchers
75
+ def self.error_hash(message)
76
+ message = message.delete("\n")
77
+
78
+ # Ex: Cannot PUT to https://appid.algolia.net/1/indexes/index_name/settings:
79
+ # {"message":"Invalid Application-ID or API key","status":403} (403)
80
+ regex = VerEx.new do
81
+ find 'Cannot '
82
+ capture('verb') { word }
83
+ find ' to '
84
+ capture('scheme') { word }
85
+ find '://'
86
+ capture('application_id') { word }
87
+ anything_but '/'
88
+ find '/'
89
+ capture('api_version') { digit }
90
+ find '/'
91
+ capture('api_section') { word }
92
+ find '/'
93
+ capture('index_name') do
94
+ anything_but('/')
95
+ end
96
+ find '/'
97
+ capture do
98
+ capture('api_action') { word }
99
+ maybe '?'
100
+ capture('query_parameters') do
101
+ anything_but(':')
102
+ end
103
+ end
104
+ find ': '
105
+ capture('json') do
106
+ find '{'
107
+ anything_but('}')
108
+ find '}'
109
+ end
110
+ find ' ('
111
+ capture('http_error') { word }
112
+ find ')'
113
+ end
114
+
115
+ matches = regex.match(message)
116
+ return false unless matches
117
+
118
+ # Convert matches to a hash
119
+ hash = {}
120
+ matches.names.each do |name|
121
+ hash[name] = matches[name]
122
+ end
123
+
124
+ hash['api_version'] = hash['api_version'].to_i
125
+ hash['http_error'] = hash['http_error'].to_i
126
+
127
+ # Merging the JSON key directly in the answer
128
+ hash = hash.merge(JSON.parse(hash['json']))
129
+ hash.delete('json')
130
+ # Merging the query parameters in the answer
131
+ CGI.parse(hash['query_parameters']).each do |key, values|
132
+ hash[key] = values[0]
133
+ end
134
+ hash.delete('query_parameters')
135
+
136
+ hash
137
+ end
138
+
139
+ # Public: Check if the application id is available
140
+ #
141
+ # _context - Not used
142
+ #
143
+ # If the call to the cluster fails, chances are that the application ID
144
+ # is invalid. As we cannot actually contact the server, the error is raw
145
+ # and does not follow our error spec
146
+ def self.unknown_application_id?(error, _context = {})
147
+ message = error.message
148
+ return false if message !~ /^Cannot reach any host/
149
+
150
+ matches = /.*\((.*)\.algolia.net.*/.match(message)
151
+
152
+ # The API will browse on APP_ID-dsn, but push/delete on APP_ID only
153
+ # We need to catch both potential errors
154
+ app_id = matches[1].gsub(/-dsn$/, '')
155
+
156
+ { 'application_id' => app_id }
157
+ end
158
+
159
+ # Public: Check if credentials specifically can't access the _tmp index
160
+ #
161
+ # _context - Not used
162
+ #
163
+ # If the error happens on a _tmp folder, it might mean that the key does
164
+ # not have access to the _tmp indices and the error message will reflect
165
+ # that.
166
+ def self.invalid_credentials_for_tmp_index?(error, _context = {})
167
+ details = error_hash(error.message)
168
+
169
+ index_name_tmp = details['index_name']
170
+ if details['message'] != 'Index not allowed with this API key' ||
171
+ index_name_tmp !~ /_tmp$/
172
+ return false
173
+ end
174
+
175
+ {
176
+ 'application_id' => Configurator.application_id,
177
+ 'index_name' => Configurator.index_name,
178
+ 'index_name_tmp' => index_name_tmp
179
+ }
180
+ end
181
+
182
+ # Public: Check if the credentials are working
183
+ #
184
+ # _context - Not used
185
+ #
186
+ # Application ID and API key submitted don't match any credentials known
187
+ def self.invalid_credentials?(error, _context = {})
188
+ details = error_hash(error.message)
189
+
190
+ if details['message'] != 'Invalid Application-ID or API key'
191
+ return false
192
+ end
193
+
194
+ {
195
+ 'application_id' => details['application_id']
196
+ }
197
+ end
198
+
199
+ # Public: Check if the sent records are not too big
200
+ #
201
+ # context[:records] - list of records to push
202
+ #
203
+ # Records cannot weight more that 10Kb. If we're getting this error it
204
+ # means that one of the records is too big, so we'll try to give
205
+ # informations about it so the user can debug it.
206
+ def self.record_too_big?(error, context = {})
207
+ details = error_hash(error.message)
208
+
209
+ message = details['message']
210
+ return false if message !~ /^Record .* is too big .*/
211
+
212
+ # Getting the record size
213
+ size, = /.*size=(.*) bytes.*/.match(message).captures
214
+ size = Filesize.from("#{size} B").pretty
215
+ object_id = details['objectID']
216
+
217
+ # Getting record details
218
+ record = Utils.find_by_key(context[:records], :objectID, object_id)
219
+
220
+ {
221
+ 'object_id' => object_id,
222
+ 'object_title' => record[:title],
223
+ 'object_url' => record[:url],
224
+ 'object_hint' => record[:content][0..100],
225
+ 'nodes_to_index' => Configurator.algolia('nodes_to_index'),
226
+ 'size' => size,
227
+ 'size_limit' => '10 Kb'
228
+ }
229
+ end
230
+
231
+ # Public: Check if one of the index settings is invalid
232
+ #
233
+ # context[:settings] - The settings passed to update the index
234
+ #
235
+ # The API will block any call that tries to update a setting value that is
236
+ # not available. We'll tell the user which one so they can fix their
237
+ # issue.
238
+ def self.unknown_settings?(error, context = {})
239
+ details = error_hash(error.message)
240
+
241
+ message = details['message']
242
+ return false if message !~ /^Invalid object attributes.*/
243
+
244
+ # Getting the unknown setting name
245
+ regex = /^Invalid object attributes: (.*) near line.*/
246
+ setting_name, = regex.match(message).captures
247
+ setting_value = context[:settings][setting_name]
248
+
249
+ {
250
+ 'setting_name' => setting_name,
251
+ 'setting_value' => setting_value
252
+ }
253
+ end
254
+
255
+ # Public: Check if the index name is invalid
256
+ #
257
+ # Some characters are forbidden in index names
258
+ def self.invalid_index_name?(error, _context = {})
259
+ details = error_hash(error.message)
260
+
261
+ message = details['message']
262
+ return false if message !~ /^indexName is not valid.*/
263
+
264
+ {
265
+ 'index_name' => Configurator.index_name
266
+ }
267
+ end
268
+ end
269
+ end
270
+ end