jekyll-algolia 1.1.5 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,45 +15,49 @@ module Jekyll
15
15
  'nodes_to_index' => 'p',
16
16
  'indexing_batch_size' => 1000,
17
17
  'settings' => {
18
- 'distinct' => true,
19
- 'attributeForDistinct' => 'url',
20
- 'attributesForFaceting' => %w[
21
- searchable(tags)
22
- searchable(type)
23
- searchable(title)
18
+ # Searchable attributes
19
+ 'searchableAttributes' => %w[
20
+ title
21
+ headings
22
+ unordered(content)
23
+ collection,categories,tags
24
24
  ],
25
+ # Custom Ranking
25
26
  'customRanking' => [
26
27
  'desc(date)',
27
- 'desc(weight.heading)',
28
- 'asc(weight.position)'
28
+ 'desc(custom_ranking.heading)',
29
+ 'asc(custom_ranking.position)'
29
30
  ],
30
- 'highlightPreTag' => '<em class="ais-Highlight">',
31
- 'highlightPostTag' => '</em>',
32
- 'searchableAttributes' => %w[
33
- title
34
- hierarchy.lvl0
35
- hierarchy.lvl1
36
- hierarchy.lvl2
37
- hierarchy.lvl3
38
- hierarchy.lvl4
39
- hierarchy.lvl5
40
- unordered(content)
41
- collection,unordered(categories),unordered(tags)
31
+ 'unretrievableAttributes' => [
32
+ 'custom_ranking'
42
33
  ],
43
- # We want to allow highlight in more keys than what we search on
34
+ # Highlight
44
35
  'attributesToHighlight' => %w[
45
36
  title
46
- hierarchy.lvl0
47
- hierarchy.lvl1
48
- hierarchy.lvl2
49
- hierarchy.lvl3
50
- hierarchy.lvl4
51
- hierarchy.lvl5
37
+ headings
52
38
  content
53
39
  html
54
40
  collection
55
41
  categories
56
42
  tags
43
+ ],
44
+ 'highlightPreTag' => '<em class="ais-Highlight">',
45
+ 'highlightPostTag' => '</em>',
46
+ # Snippet
47
+ 'attributesToSnippet' => %w[
48
+ content:55
49
+ ],
50
+ 'snippetEllipsisText' => '…',
51
+ # Distinct
52
+ 'distinct' => true,
53
+ 'attributeForDistinct' => 'url',
54
+ # Faceting
55
+ 'attributesForFaceting' => %w[
56
+ type
57
+ searchable(collection)
58
+ searchable(categories)
59
+ searchable(tags)
60
+ searchable(title)
57
61
  ]
58
62
  }
59
63
  }.freeze
@@ -68,7 +72,6 @@ module Jekyll
68
72
  Logger.silent { config = Jekyll.configuration } if config.nil?
69
73
 
70
74
  @config = config
71
- @config['exclude'] = files_excluded_from_render
72
75
 
73
76
  @config = disable_other_plugins(@config)
74
77
 
@@ -212,23 +215,14 @@ module Jekyll
212
215
  false
213
216
  end
214
217
 
215
- # Public: List of files to exclude from the Jekyll build
218
+ # Public: Returns true if the command should always update the settings
216
219
  #
217
- # We skip all files usually ignored by Jekyll, plus any file that should
218
- # not be indexed.
219
- def self.files_excluded_from_render
220
- site_exclude = get('exclude') || []
221
- algolia_exclude = algolia('files_to_exclude') || []
222
-
223
- excluded_files = site_exclude + algolia_exclude
224
-
225
- # 404 pages are not Jekyll defaults but a convention adopted by GitHub
226
- # pages. We don't want to index those.
227
- # https://help.github.com/articles/creating-a-custom-404-page-for-your-github-pages-site/
228
- excluded_files << '404.html'
229
- excluded_files << '404.md'
230
-
231
- excluded_files
220
+ # When set to true, the index settings will always be updated, no matter
221
+ # if they've been modified or not
222
+ def self.force_settings?
223
+ value = get('force_settings')
224
+ return true if value == true
225
+ false
232
226
  end
233
227
 
234
228
  # Public: Disable features from other Jekyll plugins that might interfere
@@ -12,7 +12,7 @@ module Jekyll
12
12
  #
13
13
  # file - The Jekyll file to process
14
14
  def self.run(file)
15
- # Getting all hierarchical nodes from the HTML input
15
+ # Getting all nodes from the HTML input
16
16
  raw_records = extract_raw_records(file.content)
17
17
  # Getting file metadata
18
18
  shared_metadata = FileBrowser.metadata(file)
@@ -47,7 +47,7 @@ module Jekyll
47
47
  end
48
48
 
49
49
  # Public: Extract raw records from the file, including content for each
50
- # node to index and hierarchy
50
+ # node and its headings
51
51
  #
52
52
  # content - The HTML content to parse
53
53
  def self.extract_raw_records(content)
@@ -14,46 +14,72 @@ module Jekyll
14
14
  module FileBrowser
15
15
  include Jekyll::Algolia
16
16
 
17
- # Public: Check if the specified file is a static Jekyll asset
18
- #
19
- # file - The Jekyll file
20
- #
21
- # We don't index static assets (js, css, images)
22
- def self.static_file?(file)
23
- file.is_a?(Jekyll::StaticFile)
24
- end
25
-
26
17
  # Public: Return the absolute path of a Jekyll file
27
18
  #
28
19
  # file - The Jekyll file to inspect
29
- #
30
- # Jekyll handles the .path property of some files as relative to the root
31
- # (pages) or as an absolute paths (posts and static assets). We make sure
32
- # we have a consistent way of accessing it
33
- def self.absolute_path(file)
34
- pathname = Pathname.new(file.path)
20
+ def self.absolute_path(filepath)
21
+ pathname = Pathname.new(filepath)
35
22
  return pathname.cleanpath.to_s if pathname.absolute?
36
23
 
37
- File.expand_path(File.join(Configurator.get('source'), file.path))
24
+ File.expand_path(File.join(Configurator.get('source'), filepath))
38
25
  end
39
26
 
40
27
  # Public: Return the path of a Jekyll file relative to the Jekyll source
41
28
  #
42
29
  # file - The Jekyll file to inspect
43
- #
44
- # Jekyll handles the .path property of some files as relative to the root
45
- # (pages) or as an absolute paths (posts and static assets). We make sure
46
- # we have a consistent way of accessing it
47
- def self.relative_path(file)
48
- pathname = Pathname.new(file.path)
49
- return file.path if pathname.relative?
30
+ def self.relative_path(filepath)
31
+ pathname = Pathname.new(filepath)
32
+ config_source = Configurator.get('source') || ''
33
+ jekyll_source = Pathname.new(File.expand_path(config_source))
34
+
35
+ # Removing any starting ./
36
+ if pathname.relative?
37
+ fullpath = File.expand_path(File.join(jekyll_source, pathname))
38
+ return fullpath.gsub(%r{^#{jekyll_source}/}, '')
39
+ end
50
40
 
51
- jekyll_source = Pathname.new(
52
- File.expand_path(Configurator.get('source'))
53
- )
54
41
  pathname.relative_path_from(jekyll_source).cleanpath.to_s
55
42
  end
56
43
 
44
+ # Public: Check if the file should be indexed
45
+ #
46
+ # file - The Jekyll file
47
+ #
48
+ # There are many reasons a file should not be indexed. We need to exclude
49
+ # all the static assets, only keep the actual content.
50
+ def self.indexable?(file)
51
+ return false if static_file?(file)
52
+ return false if is_404?(file)
53
+ return false unless allowed_extension?(file)
54
+ return false if excluded_from_config?(file)
55
+ return false if excluded_from_hook?(file)
56
+
57
+ true
58
+ end
59
+
60
+ # Public: Check if the specified file is a static Jekyll asset
61
+ #
62
+ # file - The Jekyll file
63
+ #
64
+ # We don't index static assets (js, css, images)
65
+ def self.static_file?(file)
66
+ file.is_a?(Jekyll::StaticFile)
67
+ end
68
+
69
+ # Public: Check if the file is a 404 error page
70
+ #
71
+ # file - The Jekyll file
72
+ #
73
+ # 404 pages are not Jekyll defaults but a convention adopted by GitHub
74
+ # pages. We don't want to index those.
75
+ # Source: https://help.github.com/articles/creating-a-custom-404-page-for-your-github-pages-site/
76
+ #
77
+ # rubocop:disable Naming/PredicateName
78
+ def self.is_404?(file)
79
+ ['404.md', '404.html'].include?(File.basename(file.path))
80
+ end
81
+ # rubocop:enable Naming/PredicateName
82
+
57
83
  # Public: Check if the file has one of the allowed extensions
58
84
  #
59
85
  # file - The Jekyll file
@@ -74,18 +100,13 @@ module Jekyll
74
100
  def self.excluded_from_config?(file)
75
101
  excluded_patterns = Configurator.algolia('files_to_exclude')
76
102
  jekyll_source = Configurator.get('source')
103
+ path = absolute_path(file.path)
77
104
 
78
- # Transform the glob patterns into a real list of files
79
- excluded_files = []
80
- Dir.chdir(jekyll_source) do
81
- excluded_patterns.each do |pattern|
82
- Dir.glob(pattern).each do |match|
83
- excluded_files << File.expand_path(match)
84
- end
85
- end
105
+ excluded_patterns.each do |pattern|
106
+ pattern = File.expand_path(File.join(jekyll_source, pattern))
107
+ return true if File.fnmatch(pattern, path, File::FNM_PATHNAME)
86
108
  end
87
-
88
- excluded_files.include?(absolute_path(file))
109
+ false
89
110
  end
90
111
 
91
112
  # Public: Check if the file has been excluded by running a custom user
@@ -96,20 +117,6 @@ module Jekyll
96
117
  Hooks.should_be_excluded?(file.path)
97
118
  end
98
119
 
99
- # Public: Check if the file should be indexed
100
- #
101
- # file - The Jekyll file
102
- #
103
- # There are many reasons a file should not be indexed. We need to exclude
104
- # all the static assets, only keep the actual content.
105
- def self.indexable?(file)
106
- return false if static_file?(file)
107
- return false unless allowed_extension?(file)
108
- return false if excluded_from_hook?(file)
109
-
110
- true
111
- end
112
-
113
120
  # Public: Return a hash of all the file metadata
114
121
  #
115
122
  # file - The Jekyll file
@@ -121,6 +128,8 @@ module Jekyll
121
128
  raw_data = raw_data(file)
122
129
  specific_data = {
123
130
  collection: collection(file),
131
+ tags: tags(file),
132
+ categories: categories(file),
124
133
  date: date(file),
125
134
  excerpt_html: excerpt_html(file),
126
135
  excerpt_text: excerpt_text(file),
@@ -153,6 +162,11 @@ module Jekyll
153
162
  end
154
163
  data.delete('excerpt')
155
164
 
165
+ # Delete other keys added by Jekyll that are not in the front-matter and
166
+ # not needed for search
167
+ data.delete('draft')
168
+ data.delete('ext')
169
+
156
170
  # Convert all values to a version that can be serialized to JSON
157
171
  data = Utils.jsonify(data)
158
172
 
@@ -184,30 +198,39 @@ module Jekyll
184
198
  file.url
185
199
  end
186
200
 
187
- # Public: Returns a timestamp of the file date
201
+ # Public: Returns the list of tags of a file, defaults to an empty array
188
202
  #
189
203
  # file - The Jekyll file
204
+ def self.tags(file)
205
+ file.data['tags'] || []
206
+ end
207
+
208
+ # Public: Returns the list of tags of a file, defaults to an empty array
190
209
  #
191
- # All collections (including posts) will have a date taken either from the
192
- # front-matter or the filename prefix. If none is set, Jekyll will use the
193
- # current date.
210
+ # file - The Jekyll file
211
+ def self.categories(file)
212
+ file.data['categories'] || []
213
+ end
214
+
215
+ # Public: Returns a timestamp of the file date
194
216
  #
195
- # For pages, only dates defined in the front-matter will be used.
217
+ # file - The Jekyll file
196
218
  #
197
- # Note that because the default date is the current one if none is
198
- # defined, we have to make sure the date is actually nil when we index it.
199
- # Otherwise the diff indexing mode will think that records have changed
200
- # while they haven't.
219
+ # Posts have their date coming from the filepath, or the front-matter.
220
+ # Pages and other collection items can only have a date set in
221
+ # front-matter.
201
222
  def self.date(file)
202
- date = file.data['date']
203
- return nil if date.nil?
223
+ # Collections get their date from .date, while pages read it from .data.
224
+ # Jekyll by default will set the date of collection to the current date,
225
+ # but we overwrote this.
226
+ date = if file.respond_to?(:date)
227
+ file.date
228
+ else
229
+ file.data['date']
230
+ end
204
231
 
205
- # The date is *exactly* the time where the `jekyll algolia` was run.
206
- # What a coincidence! It's a safe bet to assume that the original date
207
- # was nil and has been overwritten by Jekyll
208
- return nil if date.to_i == Jekyll::Algolia.start_time.to_i
209
-
210
- date.to_i
232
+ return nil if date.nil?
233
+ date.to_time.to_i
211
234
  end
212
235
 
213
236
  # Public: Returns the raw excerpt of a file, directly as returned by
@@ -224,7 +247,7 @@ module Jekyll
224
247
  return file.data['excerpt'].to_s
225
248
  end
226
249
  rescue StandardError
227
- return nil
250
+ nil
228
251
  end
229
252
 
230
253
  # Public: Returns the HTML version of the excerpt
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'algoliasearch'
4
+ require 'yaml'
5
+ require 'algolia_html_extractor'
4
6
 
5
7
  module Jekyll
6
8
  module Algolia
@@ -17,8 +19,16 @@ module Jekyll
17
19
  application_id: Configurator.application_id,
18
20
  api_key: Configurator.api_key
19
21
  )
22
+ @index = ::Algolia::Index.new(Configurator.index_name)
20
23
 
21
24
  set_user_agent
25
+
26
+ self
27
+ end
28
+
29
+ # Public: Returns the Algolia index object
30
+ def self.index
31
+ @index
22
32
  end
23
33
 
24
34
  # Public: Set the User-Agent to send to the API
@@ -38,33 +48,11 @@ module Jekyll
38
48
  ::Algolia.set_extra_header('User-Agent', user_agent)
39
49
  end
40
50
 
41
- # Public: Returns an Algolia Index object from an index name
42
- #
43
- # index_name - String name of the index
44
- def self.index(index_name)
45
- ::Algolia::Index.new(index_name)
46
- end
47
-
48
- # Public: Check if an index exists
49
- #
50
- # index_name - Name of the index
51
- #
52
- # Note: there is no API endpoint to do that, so we try to get the settings
53
- # instead, which will fail if the index does not exist
54
- def self.index?(index_name)
55
- index(index_name).get_settings
56
- return true
57
- rescue StandardError
58
- return false
59
- end
60
-
61
51
  # Public: Returns an array of all the objectIDs in the index
62
52
  #
63
- # index - Algolia Index to target
64
- #
65
53
  # The returned array is sorted. It won't have any impact on the way it is
66
54
  # processed, but makes debugging easier when comparing arrays is needed.
67
- def self.remote_object_ids(index)
55
+ def self.remote_object_ids
68
56
  list = []
69
57
  Logger.verbose(
70
58
  "I:Inspecting existing records in index #{index.name}..."
@@ -88,49 +76,22 @@ module Jekyll
88
76
  records.map { |record| record[:objectID] }.compact.sort
89
77
  end
90
78
 
91
- # Public: Update settings of the index
92
- #
93
- # index - The Algolia Index
94
- #
95
- # Does nothing in dry run mode
96
- # Settings will only be updated in the first push, and if custom settings
97
- # are defined in _config.yml. Otherwise, they are left untouched, allowing
98
- # users to configure them through their dashboard.
99
- def self.update_settings(index)
100
- has_custom_settings = !Configurator.algolia('settings').nil?
101
- index_exists = index?(index.name)
102
-
103
- # No need to update the settings if the index is already configured and
104
- # the user did not specify custom settings
105
- return if index_exists && !has_custom_settings
106
-
107
- Logger.verbose('I:Updating settings')
108
- return if Configurator.dry_run?
109
- settings = Configurator.settings
110
- begin
111
- index.set_settings!(settings)
112
- rescue StandardError => error
113
- ErrorHandler.stop(error, settings: settings)
114
- end
115
- end
116
-
117
79
  # Public: Update records of the index
118
80
  #
119
- # index_name - The Algolia index
120
81
  # old_records_ids - Ids of records to delete from the index
121
82
  # new_records - Records to add to the index
122
83
  #
123
84
  # Note: All operations will be done in one batch, assuring an atomic
124
85
  # update
125
86
  # Does nothing in dry run mode
126
- def self.update_records(index_name, old_records_ids, new_records)
87
+ def self.update_records(old_records_ids, new_records)
127
88
  # Stop if nothing to change
128
89
  if old_records_ids.empty? && new_records.empty?
129
- Logger.log('I:Nothing to index. Your content is already up to date.')
90
+ Logger.log('I:Content is already up to date.')
130
91
  return
131
92
  end
132
93
 
133
- Logger.log("I:Updating records in index #{index_name}...")
94
+ Logger.log("I:Updating records in index #{index.name}...")
134
95
  Logger.log("I:Records to delete: #{old_records_ids.length}")
135
96
  Logger.log("I:Records to add: #{new_records.length}")
136
97
  return if Configurator.dry_run?
@@ -141,19 +102,31 @@ module Jekyll
141
102
  operations = []
142
103
  old_records_ids.each do |object_id|
143
104
  operations << {
144
- action: 'deleteObject', indexName: index_name,
105
+ action: 'deleteObject', indexName: index.name,
145
106
  body: { objectID: object_id }
146
107
  }
147
108
  end
148
109
  operations += new_records.map do |new_record|
149
- { action: 'addObject', indexName: index_name, body: new_record }
110
+ { action: 'addObject', indexName: index.name, body: new_record }
150
111
  end
151
112
 
152
113
  # Run the batches in slices if they are too large
153
114
  batch_size = Configurator.algolia('indexing_batch_size')
154
- operations.each_slice(batch_size) do |slice|
115
+ slices = operations.each_slice(batch_size).to_a
116
+
117
+ should_have_progress_bar = (slices.length > 1)
118
+ if should_have_progress_bar
119
+ progress_bar = ProgressBar.create(
120
+ total: slices.length,
121
+ format: 'Pushing records (%j%%) |%B|'
122
+ )
123
+ end
124
+
125
+ slices.each do |slice|
155
126
  begin
156
127
  ::Algolia.batch!(slice)
128
+
129
+ progress_bar.increment if should_have_progress_bar
157
130
  rescue StandardError => error
158
131
  records = slice.map do |record|
159
132
  record[:body]
@@ -163,6 +136,106 @@ module Jekyll
163
136
  end
164
137
  end
165
138
 
139
+ # Public: Get a unique settingID for the current settings
140
+ #
141
+ # The settingID is generated as a hash of the current settings. As it will
142
+ # be stored in the userData key of the resulting config, we exclude that
143
+ # key from the hashing.
144
+ def self.local_setting_id
145
+ settings = Configurator.settings
146
+ settings.delete('userData')
147
+ AlgoliaHTMLExtractor.uuid(settings)
148
+ end
149
+
150
+ # Public: Get the settings of the remote index
151
+ #
152
+ # In case the index is not accessible, it will return nil
153
+ def self.remote_settings
154
+ index.get_settings
155
+ rescue StandardError
156
+ nil
157
+ end
158
+
159
+ # Public: Smart update of the settings of the index
160
+ #
161
+ # This will first compare the settings about to be pushed with the
162
+ # settings already pushed. It will compare userData.settingID for that.
163
+ # If the settingID is the same, we don't push as this won't change
164
+ # anything. We will still check if the remote config seem to have been
165
+ # manually altered though, and warn the user that this is not the
166
+ # preferred way of doing so.
167
+ #
168
+ # If the settingID are not matching, it means our config is different, so
169
+ # we push it, overriding the settingID for next push.
170
+ def self.update_settings
171
+ current_remote_settings = remote_settings || {}
172
+ remote_setting_id = current_remote_settings.dig('userData', 'settingID')
173
+
174
+ settings = Configurator.settings
175
+ setting_id = local_setting_id
176
+
177
+ are_settings_forced = Configurator.force_settings?
178
+
179
+ # The config we're about to push is the same we pushed previously. We
180
+ # won't push again.
181
+ if setting_id == remote_setting_id && !are_settings_forced
182
+ Logger.log('I:Settings are already up to date.')
183
+ # Check if remote config has been changed outside of the plugin, so we
184
+ # can warn users that they should not alter their config from outside
185
+ # of the plugin.
186
+ current_remote_settings.delete('userData')
187
+ changed_keys = Utils.diff_keys(settings, current_remote_settings)
188
+ unless changed_keys.nil?
189
+ warn_of_manual_dashboard_editing(changed_keys)
190
+ end
191
+
192
+ return
193
+ end
194
+
195
+ # Settings have changed, we push them
196
+ settings['userData'] = {
197
+ 'settingID' => setting_id,
198
+ 'pluginVersion' => VERSION
199
+ }
200
+
201
+ Logger.log("I:Updating settings of index #{index.name}")
202
+ return if Configurator.dry_run?
203
+ set_settings(settings)
204
+ end
205
+
206
+ # Public: Set new settings to an index
207
+ #
208
+ # Will dispatch to the error handler if it fails
209
+ # rubocop:disable Naming/AccessorMethodName
210
+ def self.set_settings(settings)
211
+ index.set_settings!(settings)
212
+ rescue StandardError => error
213
+ ErrorHandler.stop(error, settings: settings)
214
+ end
215
+ # rubocop:enable Naming/AccessorMethodName
216
+
217
+ # Public: Warn users that they have some settings manually configured in
218
+ # their dashboard
219
+ #
220
+ # When users change some settings in their dashboard, those settings might
221
+ # get overwritten by the pluging. We can't prevent that, but we can warn
222
+ # them when we detect they changed something.
223
+ def self.warn_of_manual_dashboard_editing(changed_keys)
224
+ # Transform the hash into readable YAML
225
+ yaml_lines = changed_keys
226
+ .to_yaml(indentation: 2)
227
+ .split("\n")[1..-1]
228
+ yaml_lines.map! do |line|
229
+ line = line.gsub(/^ */) { |spaces| ' ' * spaces.length }
230
+ line = line.gsub('- ', '  - ')
231
+ "W:    #{line}"
232
+ end
233
+ Logger.known_message(
234
+ 'settings_manually_edited',
235
+ settings: yaml_lines.join("\n")
236
+ )
237
+ end
238
+
166
239
  # Public: Push all records to Algolia and configure the index
167
240
  #
168
241
  # records - Records to push
@@ -180,14 +253,11 @@ module Jekyll
180
253
  exit 1
181
254
  end
182
255
 
183
- index_name = Configurator.index_name
184
- index = index(index_name)
185
-
186
256
  # Update settings
187
- update_settings(index)
257
+ update_settings
188
258
 
189
259
  # Getting list of objectID in remote and locally
190
- remote_ids = remote_object_ids(index)
260
+ remote_ids = remote_object_ids
191
261
  local_ids = local_object_ids(records)
192
262
 
193
263
  # Getting list of what to add and what to delete
@@ -196,7 +266,7 @@ module Jekyll
196
266
  new_records = records.select do |record|
197
267
  new_records_ids.include?(record[:objectID])
198
268
  end
199
- update_records(index_name, old_records_ids, new_records)
269
+ update_records(old_records_ids, new_records)
200
270
 
201
271
  Logger.log('I:✔ Indexing complete')
202
272
  end