jekyll-algolia 1.1.5 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,45 +15,49 @@ module Jekyll
15
15
  'nodes_to_index' => 'p',
16
16
  'indexing_batch_size' => 1000,
17
17
  'settings' => {
18
- 'distinct' => true,
19
- 'attributeForDistinct' => 'url',
20
- 'attributesForFaceting' => %w[
21
- searchable(tags)
22
- searchable(type)
23
- searchable(title)
18
+ # Searchable attributes
19
+ 'searchableAttributes' => %w[
20
+ title
21
+ headings
22
+ unordered(content)
23
+ collection,categories,tags
24
24
  ],
25
+ # Custom Ranking
25
26
  'customRanking' => [
26
27
  'desc(date)',
27
- 'desc(weight.heading)',
28
- 'asc(weight.position)'
28
+ 'desc(custom_ranking.heading)',
29
+ 'asc(custom_ranking.position)'
29
30
  ],
30
- 'highlightPreTag' => '<em class="ais-Highlight">',
31
- 'highlightPostTag' => '</em>',
32
- 'searchableAttributes' => %w[
33
- title
34
- hierarchy.lvl0
35
- hierarchy.lvl1
36
- hierarchy.lvl2
37
- hierarchy.lvl3
38
- hierarchy.lvl4
39
- hierarchy.lvl5
40
- unordered(content)
41
- collection,unordered(categories),unordered(tags)
31
+ 'unretrievableAttributes' => [
32
+ 'custom_ranking'
42
33
  ],
43
- # We want to allow highlight in more keys than what we search on
34
+ # Highlight
44
35
  'attributesToHighlight' => %w[
45
36
  title
46
- hierarchy.lvl0
47
- hierarchy.lvl1
48
- hierarchy.lvl2
49
- hierarchy.lvl3
50
- hierarchy.lvl4
51
- hierarchy.lvl5
37
+ headings
52
38
  content
53
39
  html
54
40
  collection
55
41
  categories
56
42
  tags
43
+ ],
44
+ 'highlightPreTag' => '<em class="ais-Highlight">',
45
+ 'highlightPostTag' => '</em>',
46
+ # Snippet
47
+ 'attributesToSnippet' => %w[
48
+ content:55
49
+ ],
50
+ 'snippetEllipsisText' => '…',
51
+ # Distinct
52
+ 'distinct' => true,
53
+ 'attributeForDistinct' => 'url',
54
+ # Faceting
55
+ 'attributesForFaceting' => %w[
56
+ type
57
+ searchable(collection)
58
+ searchable(categories)
59
+ searchable(tags)
60
+ searchable(title)
57
61
  ]
58
62
  }
59
63
  }.freeze
@@ -68,7 +72,6 @@ module Jekyll
68
72
  Logger.silent { config = Jekyll.configuration } if config.nil?
69
73
 
70
74
  @config = config
71
- @config['exclude'] = files_excluded_from_render
72
75
 
73
76
  @config = disable_other_plugins(@config)
74
77
 
@@ -212,23 +215,14 @@ module Jekyll
212
215
  false
213
216
  end
214
217
 
215
- # Public: List of files to exclude from the Jekyll build
218
+ # Public: Returns true if the command should always update the settings
216
219
  #
217
- # We skip all files usually ignored by Jekyll, plus any file that should
218
- # not be indexed.
219
- def self.files_excluded_from_render
220
- site_exclude = get('exclude') || []
221
- algolia_exclude = algolia('files_to_exclude') || []
222
-
223
- excluded_files = site_exclude + algolia_exclude
224
-
225
- # 404 pages are not Jekyll defaults but a convention adopted by GitHub
226
- # pages. We don't want to index those.
227
- # https://help.github.com/articles/creating-a-custom-404-page-for-your-github-pages-site/
228
- excluded_files << '404.html'
229
- excluded_files << '404.md'
230
-
231
- excluded_files
220
+ # When set to true, the index settings will always be updated, no matter
221
+ # if they've been modified or not
222
+ def self.force_settings?
223
+ value = get('force_settings')
224
+ return true if value == true
225
+ false
232
226
  end
233
227
 
234
228
  # Public: Disable features from other Jekyll plugins that might interfere
@@ -12,7 +12,7 @@ module Jekyll
12
12
  #
13
13
  # file - The Jekyll file to process
14
14
  def self.run(file)
15
- # Getting all hierarchical nodes from the HTML input
15
+ # Getting all nodes from the HTML input
16
16
  raw_records = extract_raw_records(file.content)
17
17
  # Getting file metadata
18
18
  shared_metadata = FileBrowser.metadata(file)
@@ -47,7 +47,7 @@ module Jekyll
47
47
  end
48
48
 
49
49
  # Public: Extract raw records from the file, including content for each
50
- # node to index and hierarchy
50
+ # node and its headings
51
51
  #
52
52
  # content - The HTML content to parse
53
53
  def self.extract_raw_records(content)
@@ -14,46 +14,72 @@ module Jekyll
14
14
  module FileBrowser
15
15
  include Jekyll::Algolia
16
16
 
17
- # Public: Check if the specified file is a static Jekyll asset
18
- #
19
- # file - The Jekyll file
20
- #
21
- # We don't index static assets (js, css, images)
22
- def self.static_file?(file)
23
- file.is_a?(Jekyll::StaticFile)
24
- end
25
-
26
17
  # Public: Return the absolute path of a Jekyll file
27
18
  #
28
19
  # file - The Jekyll file to inspect
29
- #
30
- # Jekyll handles the .path property of some files as relative to the root
31
- # (pages) or as an absolute paths (posts and static assets). We make sure
32
- # we have a consistent way of accessing it
33
- def self.absolute_path(file)
34
- pathname = Pathname.new(file.path)
20
+ def self.absolute_path(filepath)
21
+ pathname = Pathname.new(filepath)
35
22
  return pathname.cleanpath.to_s if pathname.absolute?
36
23
 
37
- File.expand_path(File.join(Configurator.get('source'), file.path))
24
+ File.expand_path(File.join(Configurator.get('source'), filepath))
38
25
  end
39
26
 
40
27
  # Public: Return the path of a Jekyll file relative to the Jekyll source
41
28
  #
42
29
  # file - The Jekyll file to inspect
43
- #
44
- # Jekyll handles the .path property of some files as relative to the root
45
- # (pages) or as an absolute paths (posts and static assets). We make sure
46
- # we have a consistent way of accessing it
47
- def self.relative_path(file)
48
- pathname = Pathname.new(file.path)
49
- return file.path if pathname.relative?
30
+ def self.relative_path(filepath)
31
+ pathname = Pathname.new(filepath)
32
+ config_source = Configurator.get('source') || ''
33
+ jekyll_source = Pathname.new(File.expand_path(config_source))
34
+
35
+ # Removing any starting ./
36
+ if pathname.relative?
37
+ fullpath = File.expand_path(File.join(jekyll_source, pathname))
38
+ return fullpath.gsub(%r{^#{jekyll_source}/}, '')
39
+ end
50
40
 
51
- jekyll_source = Pathname.new(
52
- File.expand_path(Configurator.get('source'))
53
- )
54
41
  pathname.relative_path_from(jekyll_source).cleanpath.to_s
55
42
  end
56
43
 
44
+ # Public: Check if the file should be indexed
45
+ #
46
+ # file - The Jekyll file
47
+ #
48
+ # There are many reasons a file should not be indexed. We need to exclude
49
+ # all the static assets, only keep the actual content.
50
+ def self.indexable?(file)
51
+ return false if static_file?(file)
52
+ return false if is_404?(file)
53
+ return false unless allowed_extension?(file)
54
+ return false if excluded_from_config?(file)
55
+ return false if excluded_from_hook?(file)
56
+
57
+ true
58
+ end
59
+
60
+ # Public: Check if the specified file is a static Jekyll asset
61
+ #
62
+ # file - The Jekyll file
63
+ #
64
+ # We don't index static assets (js, css, images)
65
+ def self.static_file?(file)
66
+ file.is_a?(Jekyll::StaticFile)
67
+ end
68
+
69
+ # Public: Check if the file is a 404 error page
70
+ #
71
+ # file - The Jekyll file
72
+ #
73
+ # 404 pages are not Jekyll defaults but a convention adopted by GitHub
74
+ # pages. We don't want to index those.
75
+ # Source: https://help.github.com/articles/creating-a-custom-404-page-for-your-github-pages-site/
76
+ #
77
+ # rubocop:disable Naming/PredicateName
78
+ def self.is_404?(file)
79
+ ['404.md', '404.html'].include?(File.basename(file.path))
80
+ end
81
+ # rubocop:enable Naming/PredicateName
82
+
57
83
  # Public: Check if the file has one of the allowed extensions
58
84
  #
59
85
  # file - The Jekyll file
@@ -74,18 +100,13 @@ module Jekyll
74
100
  def self.excluded_from_config?(file)
75
101
  excluded_patterns = Configurator.algolia('files_to_exclude')
76
102
  jekyll_source = Configurator.get('source')
103
+ path = absolute_path(file.path)
77
104
 
78
- # Transform the glob patterns into a real list of files
79
- excluded_files = []
80
- Dir.chdir(jekyll_source) do
81
- excluded_patterns.each do |pattern|
82
- Dir.glob(pattern).each do |match|
83
- excluded_files << File.expand_path(match)
84
- end
85
- end
105
+ excluded_patterns.each do |pattern|
106
+ pattern = File.expand_path(File.join(jekyll_source, pattern))
107
+ return true if File.fnmatch(pattern, path, File::FNM_PATHNAME)
86
108
  end
87
-
88
- excluded_files.include?(absolute_path(file))
109
+ false
89
110
  end
90
111
 
91
112
  # Public: Check if the file has been excluded by running a custom user
@@ -96,20 +117,6 @@ module Jekyll
96
117
  Hooks.should_be_excluded?(file.path)
97
118
  end
98
119
 
99
- # Public: Check if the file should be indexed
100
- #
101
- # file - The Jekyll file
102
- #
103
- # There are many reasons a file should not be indexed. We need to exclude
104
- # all the static assets, only keep the actual content.
105
- def self.indexable?(file)
106
- return false if static_file?(file)
107
- return false unless allowed_extension?(file)
108
- return false if excluded_from_hook?(file)
109
-
110
- true
111
- end
112
-
113
120
  # Public: Return a hash of all the file metadata
114
121
  #
115
122
  # file - The Jekyll file
@@ -121,6 +128,8 @@ module Jekyll
121
128
  raw_data = raw_data(file)
122
129
  specific_data = {
123
130
  collection: collection(file),
131
+ tags: tags(file),
132
+ categories: categories(file),
124
133
  date: date(file),
125
134
  excerpt_html: excerpt_html(file),
126
135
  excerpt_text: excerpt_text(file),
@@ -153,6 +162,11 @@ module Jekyll
153
162
  end
154
163
  data.delete('excerpt')
155
164
 
165
+ # Delete other keys added by Jekyll that are not in the front-matter and
166
+ # not needed for search
167
+ data.delete('draft')
168
+ data.delete('ext')
169
+
156
170
  # Convert all values to a version that can be serialized to JSON
157
171
  data = Utils.jsonify(data)
158
172
 
@@ -184,30 +198,39 @@ module Jekyll
184
198
  file.url
185
199
  end
186
200
 
187
- # Public: Returns a timestamp of the file date
201
+ # Public: Returns the list of tags of a file, defaults to an empty array
188
202
  #
189
203
  # file - The Jekyll file
204
+ def self.tags(file)
205
+ file.data['tags'] || []
206
+ end
207
+
208
+ # Public: Returns the list of tags of a file, defaults to an empty array
190
209
  #
191
- # All collections (including posts) will have a date taken either from the
192
- # front-matter or the filename prefix. If none is set, Jekyll will use the
193
- # current date.
210
+ # file - The Jekyll file
211
+ def self.categories(file)
212
+ file.data['categories'] || []
213
+ end
214
+
215
+ # Public: Returns a timestamp of the file date
194
216
  #
195
- # For pages, only dates defined in the front-matter will be used.
217
+ # file - The Jekyll file
196
218
  #
197
- # Note that because the default date is the current one if none is
198
- # defined, we have to make sure the date is actually nil when we index it.
199
- # Otherwise the diff indexing mode will think that records have changed
200
- # while they haven't.
219
+ # Posts have their date coming from the filepath, or the front-matter.
220
+ # Pages and other collection items can only have a date set in
221
+ # front-matter.
201
222
  def self.date(file)
202
- date = file.data['date']
203
- return nil if date.nil?
223
+ # Collections get their date from .date, while pages read it from .data.
224
+ # Jekyll by default will set the date of collection to the current date,
225
+ # but we overwrote this.
226
+ date = if file.respond_to?(:date)
227
+ file.date
228
+ else
229
+ file.data['date']
230
+ end
204
231
 
205
- # The date is *exactly* the time where the `jekyll algolia` was run.
206
- # What a coincidence! It's a safe bet to assume that the original date
207
- # was nil and has been overwritten by Jekyll
208
- return nil if date.to_i == Jekyll::Algolia.start_time.to_i
209
-
210
- date.to_i
232
+ return nil if date.nil?
233
+ date.to_time.to_i
211
234
  end
212
235
 
213
236
  # Public: Returns the raw excerpt of a file, directly as returned by
@@ -224,7 +247,7 @@ module Jekyll
224
247
  return file.data['excerpt'].to_s
225
248
  end
226
249
  rescue StandardError
227
- return nil
250
+ nil
228
251
  end
229
252
 
230
253
  # Public: Returns the HTML version of the excerpt
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'algoliasearch'
4
+ require 'yaml'
5
+ require 'algolia_html_extractor'
4
6
 
5
7
  module Jekyll
6
8
  module Algolia
@@ -17,8 +19,16 @@ module Jekyll
17
19
  application_id: Configurator.application_id,
18
20
  api_key: Configurator.api_key
19
21
  )
22
+ @index = ::Algolia::Index.new(Configurator.index_name)
20
23
 
21
24
  set_user_agent
25
+
26
+ self
27
+ end
28
+
29
+ # Public: Returns the Algolia index object
30
+ def self.index
31
+ @index
22
32
  end
23
33
 
24
34
  # Public: Set the User-Agent to send to the API
@@ -38,33 +48,11 @@ module Jekyll
38
48
  ::Algolia.set_extra_header('User-Agent', user_agent)
39
49
  end
40
50
 
41
- # Public: Returns an Algolia Index object from an index name
42
- #
43
- # index_name - String name of the index
44
- def self.index(index_name)
45
- ::Algolia::Index.new(index_name)
46
- end
47
-
48
- # Public: Check if an index exists
49
- #
50
- # index_name - Name of the index
51
- #
52
- # Note: there is no API endpoint to do that, so we try to get the settings
53
- # instead, which will fail if the index does not exist
54
- def self.index?(index_name)
55
- index(index_name).get_settings
56
- return true
57
- rescue StandardError
58
- return false
59
- end
60
-
61
51
  # Public: Returns an array of all the objectIDs in the index
62
52
  #
63
- # index - Algolia Index to target
64
- #
65
53
  # The returned array is sorted. It won't have any impact on the way it is
66
54
  # processed, but makes debugging easier when comparing arrays is needed.
67
- def self.remote_object_ids(index)
55
+ def self.remote_object_ids
68
56
  list = []
69
57
  Logger.verbose(
70
58
  "I:Inspecting existing records in index #{index.name}..."
@@ -88,49 +76,22 @@ module Jekyll
88
76
  records.map { |record| record[:objectID] }.compact.sort
89
77
  end
90
78
 
91
- # Public: Update settings of the index
92
- #
93
- # index - The Algolia Index
94
- #
95
- # Does nothing in dry run mode
96
- # Settings will only be updated in the first push, and if custom settings
97
- # are defined in _config.yml. Otherwise, they are left untouched, allowing
98
- # users to configure them through their dashboard.
99
- def self.update_settings(index)
100
- has_custom_settings = !Configurator.algolia('settings').nil?
101
- index_exists = index?(index.name)
102
-
103
- # No need to update the settings if the index is already configured and
104
- # the user did not specify custom settings
105
- return if index_exists && !has_custom_settings
106
-
107
- Logger.verbose('I:Updating settings')
108
- return if Configurator.dry_run?
109
- settings = Configurator.settings
110
- begin
111
- index.set_settings!(settings)
112
- rescue StandardError => error
113
- ErrorHandler.stop(error, settings: settings)
114
- end
115
- end
116
-
117
79
  # Public: Update records of the index
118
80
  #
119
- # index_name - The Algolia index
120
81
  # old_records_ids - Ids of records to delete from the index
121
82
  # new_records - Records to add to the index
122
83
  #
123
84
  # Note: All operations will be done in one batch, assuring an atomic
124
85
  # update
125
86
  # Does nothing in dry run mode
126
- def self.update_records(index_name, old_records_ids, new_records)
87
+ def self.update_records(old_records_ids, new_records)
127
88
  # Stop if nothing to change
128
89
  if old_records_ids.empty? && new_records.empty?
129
- Logger.log('I:Nothing to index. Your content is already up to date.')
90
+ Logger.log('I:Content is already up to date.')
130
91
  return
131
92
  end
132
93
 
133
- Logger.log("I:Updating records in index #{index_name}...")
94
+ Logger.log("I:Updating records in index #{index.name}...")
134
95
  Logger.log("I:Records to delete: #{old_records_ids.length}")
135
96
  Logger.log("I:Records to add: #{new_records.length}")
136
97
  return if Configurator.dry_run?
@@ -141,19 +102,31 @@ module Jekyll
141
102
  operations = []
142
103
  old_records_ids.each do |object_id|
143
104
  operations << {
144
- action: 'deleteObject', indexName: index_name,
105
+ action: 'deleteObject', indexName: index.name,
145
106
  body: { objectID: object_id }
146
107
  }
147
108
  end
148
109
  operations += new_records.map do |new_record|
149
- { action: 'addObject', indexName: index_name, body: new_record }
110
+ { action: 'addObject', indexName: index.name, body: new_record }
150
111
  end
151
112
 
152
113
  # Run the batches in slices if they are too large
153
114
  batch_size = Configurator.algolia('indexing_batch_size')
154
- operations.each_slice(batch_size) do |slice|
115
+ slices = operations.each_slice(batch_size).to_a
116
+
117
+ should_have_progress_bar = (slices.length > 1)
118
+ if should_have_progress_bar
119
+ progress_bar = ProgressBar.create(
120
+ total: slices.length,
121
+ format: 'Pushing records (%j%%) |%B|'
122
+ )
123
+ end
124
+
125
+ slices.each do |slice|
155
126
  begin
156
127
  ::Algolia.batch!(slice)
128
+
129
+ progress_bar.increment if should_have_progress_bar
157
130
  rescue StandardError => error
158
131
  records = slice.map do |record|
159
132
  record[:body]
@@ -163,6 +136,106 @@ module Jekyll
163
136
  end
164
137
  end
165
138
 
139
+ # Public: Get a unique settingID for the current settings
140
+ #
141
+ # The settingID is generated as a hash of the current settings. As it will
142
+ # be stored in the userData key of the resulting config, we exclude that
143
+ # key from the hashing.
144
+ def self.local_setting_id
145
+ settings = Configurator.settings
146
+ settings.delete('userData')
147
+ AlgoliaHTMLExtractor.uuid(settings)
148
+ end
149
+
150
+ # Public: Get the settings of the remote index
151
+ #
152
+ # In case the index is not accessible, it will return nil
153
+ def self.remote_settings
154
+ index.get_settings
155
+ rescue StandardError
156
+ nil
157
+ end
158
+
159
+ # Public: Smart update of the settings of the index
160
+ #
161
+ # This will first compare the settings about to be pushed with the
162
+ # settings already pushed. It will compare userData.settingID for that.
163
+ # If the settingID is the same, we don't push as this won't change
164
+ # anything. We will still check if the remote config seem to have been
165
+ # manually altered though, and warn the user that this is not the
166
+ # preferred way of doing so.
167
+ #
168
+ # If the settingID are not matching, it means our config is different, so
169
+ # we push it, overriding the settingID for next push.
170
+ def self.update_settings
171
+ current_remote_settings = remote_settings || {}
172
+ remote_setting_id = current_remote_settings.dig('userData', 'settingID')
173
+
174
+ settings = Configurator.settings
175
+ setting_id = local_setting_id
176
+
177
+ are_settings_forced = Configurator.force_settings?
178
+
179
+ # The config we're about to push is the same we pushed previously. We
180
+ # won't push again.
181
+ if setting_id == remote_setting_id && !are_settings_forced
182
+ Logger.log('I:Settings are already up to date.')
183
+ # Check if remote config has been changed outside of the plugin, so we
184
+ # can warn users that they should not alter their config from outside
185
+ # of the plugin.
186
+ current_remote_settings.delete('userData')
187
+ changed_keys = Utils.diff_keys(settings, current_remote_settings)
188
+ unless changed_keys.nil?
189
+ warn_of_manual_dashboard_editing(changed_keys)
190
+ end
191
+
192
+ return
193
+ end
194
+
195
+ # Settings have changed, we push them
196
+ settings['userData'] = {
197
+ 'settingID' => setting_id,
198
+ 'pluginVersion' => VERSION
199
+ }
200
+
201
+ Logger.log("I:Updating settings of index #{index.name}")
202
+ return if Configurator.dry_run?
203
+ set_settings(settings)
204
+ end
205
+
206
+ # Public: Set new settings to an index
207
+ #
208
+ # Will dispatch to the error handler if it fails
209
+ # rubocop:disable Naming/AccessorMethodName
210
+ def self.set_settings(settings)
211
+ index.set_settings!(settings)
212
+ rescue StandardError => error
213
+ ErrorHandler.stop(error, settings: settings)
214
+ end
215
+ # rubocop:enable Naming/AccessorMethodName
216
+
217
+ # Public: Warn users that they have some settings manually configured in
218
+ # their dashboard
219
+ #
220
+ # When users change some settings in their dashboard, those settings might
221
+ # get overwritten by the pluging. We can't prevent that, but we can warn
222
+ # them when we detect they changed something.
223
+ def self.warn_of_manual_dashboard_editing(changed_keys)
224
+ # Transform the hash into readable YAML
225
+ yaml_lines = changed_keys
226
+ .to_yaml(indentation: 2)
227
+ .split("\n")[1..-1]
228
+ yaml_lines.map! do |line|
229
+ line = line.gsub(/^ */) { |spaces| ' ' * spaces.length }
230
+ line = line.gsub('- ', '  - ')
231
+ "W:    #{line}"
232
+ end
233
+ Logger.known_message(
234
+ 'settings_manually_edited',
235
+ settings: yaml_lines.join("\n")
236
+ )
237
+ end
238
+
166
239
  # Public: Push all records to Algolia and configure the index
167
240
  #
168
241
  # records - Records to push
@@ -180,14 +253,11 @@ module Jekyll
180
253
  exit 1
181
254
  end
182
255
 
183
- index_name = Configurator.index_name
184
- index = index(index_name)
185
-
186
256
  # Update settings
187
- update_settings(index)
257
+ update_settings
188
258
 
189
259
  # Getting list of objectID in remote and locally
190
- remote_ids = remote_object_ids(index)
260
+ remote_ids = remote_object_ids
191
261
  local_ids = local_object_ids(records)
192
262
 
193
263
  # Getting list of what to add and what to delete
@@ -196,7 +266,7 @@ module Jekyll
196
266
  new_records = records.select do |record|
197
267
  new_records_ids.include?(record[:objectID])
198
268
  end
199
- update_records(index_name, old_records_ids, new_records)
269
+ update_records(old_records_ids, new_records)
200
270
 
201
271
  Logger.log('I:✔ Indexing complete')
202
272
  end