jekyll-algolia 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18ee9d2d725bb531703ddc05d9f31250959d81ff
4
- data.tar.gz: 93115f75f4d41d301898b77809732453d5c195f2
3
+ metadata.gz: 145d078774c95a6e9dda69331854efa6fbc51672
4
+ data.tar.gz: 34e00c41dee01556fe49811604a7819a3e4b252a
5
5
  SHA512:
6
- metadata.gz: fed961094843fdf503620f86fcb83313c96b68061ce0d170b1a66f018d60cf1edf0a68a9dd5df7f17dee5c2a6e6e4fa05e677ae5a743383c859c42f3443e6bba
7
- data.tar.gz: d66ea9ca93b8cafc3749711f17d27f03753ddfc6f4de70d2cc94296e5209b9247f2e3bbbefbf924414e5fe3c380f13c13fac87f80b8eed9322ccb0338d2dad2b
6
+ metadata.gz: 3a6d8b20abbeba1af7ee8de0bf8a111d41d40f6119caa156a1db433b1ca502cdff793cc8ffde2691fa60389b29b270f6fe4e2a3afb2294414d88de3c3c699af2
7
+ data.tar.gz: 3bbcff1330a507c2f16a9c13f5f102741086bb34c362e71a746640f2ff55ec1f125985114bfe8598adb23f66c8ab71bb9de2e341168cb69c608242ae6e9e64d8
@@ -5,20 +5,22 @@ E: the {size_limit} size limit.
5
5
  W:
6
6
  W: The plugin will create one record for each element matching your
7
7
  W: `nodes_to_index` value (currently set to "{nodes_to_index}"). Each record
8
- W: should not weight more than {size_limit}. One of your records weights {size}
9
- W: and has been rejected.
8
+ W: should not weight more than {size_limit}.
9
+ W: One of your records weights {size} and has been rejected.
10
10
  W:
11
- W: Here are more information about the rejected record:
12
- W: {
13
- W: "objectID": "{object_id}",
14
- W: "title": "{object_title}",
15
- W: "url": "{object_url}",
16
- W: "text": "{object_hint}…",
17
- W: […]
18
- W: }
11
+ W: objectID: {object_id}
12
+ W: title: {object_title}
13
+ W: url: {object_url}
14
+ W:
15
+ W: Most probable keys causing the issue:
16
+ W: {probable_wrong_keys}
17
+ W:
18
+ W: Complete log of the record has been extracted to:
19
+ W: {record_log_path}
19
20
  W:
20
- I: This issue is sometimes caused by malformed HTML preventing the parser to
21
- I: correctly grab the content of the nodes.
21
+ I: This issue can be caused by malformed HTML preventing the parser to correctly
22
+ I: grab the content of the nodes. Double check that the page actually renders
23
+ I: correctly with a regular `jekyll build`.
22
24
  I:
23
25
  I: If you're having trouble solving this issue, feel free to file a bug on
24
26
  I: GitHub, ideally with a link to a repository where we can reproduce the issue.
@@ -91,14 +91,15 @@ module Jekyll
91
91
  files = []
92
92
  Logger.log('I:Extracting records...')
93
93
  each_site_file do |file|
94
+ path = FileBrowser.relative_path(file)
95
+
94
96
  # Skip files that should not be indexed
95
97
  is_indexable = FileBrowser.indexable?(file)
96
98
  unless is_indexable
97
- Logger.verbose("W:Skipping #{file.path}")
99
+ Logger.verbose("W:Skipping #{path}")
98
100
  next
99
101
  end
100
102
 
101
- path = FileBrowser.path_from_root(file)
102
103
  Logger.verbose("I:Extracting records from #{path}")
103
104
  file_records = Extractor.run(file)
104
105
 
@@ -18,7 +18,6 @@ module Jekyll
18
18
  # happened to the display
19
19
  def self.stop(error, context = {})
20
20
  Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
21
- Logger.verbose("E:[jekyll-algolia] Context: #{context}")
22
21
 
23
22
  identified_error = identify(error, context)
24
23
 
@@ -172,9 +171,27 @@ module Jekyll
172
171
  }
173
172
  end
174
173
 
174
+ # Public: Returns a string explaining which attributes are the largest in
175
+ # the record
176
+ #
177
+ # record - The record hash to analyze
178
+ #
179
+ # This will be used on the `record_too_big` error, to guide users in
180
+ # finding which record is causing trouble
181
+ def self.readable_largest_record_keys(record)
182
+ keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
183
+ largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
184
+ output = []
185
+ largest_keys.each do |key, size|
186
+ size = Filesize.from("#{size} B").to_s('Kb')
187
+ output << "#{key} (#{size})"
188
+ end
189
+ output.join(', ')
190
+ end
191
+
175
192
  # Public: Check if the sent records are not too big
176
193
  #
177
- # context[:records] - list of records to push
194
+ # context[:records] - list of records sent in the batch
178
195
  #
179
196
  # Records cannot weight more that 10Kb. If we're getting this error it
180
197
  # means that one of the records is too big, so we'll try to give
@@ -187,17 +204,25 @@ module Jekyll
187
204
 
188
205
  # Getting the record size
189
206
  size, = /.*size=(.*) bytes.*/.match(message).captures
190
- size = Filesize.from("#{size} B").pretty
207
+ size = Filesize.from("#{size} B").to_s('Kb')
191
208
  object_id = details['objectID']
192
209
 
193
210
  # Getting record details
194
211
  record = Utils.find_by_key(context[:records], :objectID, object_id)
212
+ probable_wrong_keys = readable_largest_record_keys(record)
213
+
214
+ # Writing the full record to disk for inspection
215
+ record_log_path = Logger.write_to_file(
216
+ "jekyll-algolia-record-too-big-#{object_id}.log",
217
+ JSON.pretty_generate(record)
218
+ )
195
219
 
196
220
  {
197
221
  'object_id' => object_id,
198
222
  'object_title' => record[:title],
199
223
  'object_url' => record[:url],
200
- 'object_hint' => record[:content][0..100],
224
+ 'probable_wrong_keys' => probable_wrong_keys,
225
+ 'record_log_path' => record_log_path,
201
226
  'nodes_to_index' => Configurator.algolia('nodes_to_index'),
202
227
  'size' => size,
203
228
  'size_limit' => '10 Kb'
@@ -11,7 +11,6 @@ module Jekyll
11
11
  # Public: Extract records from the file
12
12
  #
13
13
  # file - The Jekyll file to process
14
- # TOTEST
15
14
  def self.run(file)
16
15
  # Getting all hierarchical nodes from the HTML input
17
16
  raw_records = extract_raw_records(file.content)
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'algolia_html_extractor'
4
+ require 'pathname'
4
5
 
5
6
  module Jekyll
6
7
  module Algolia
@@ -22,6 +23,35 @@ module Jekyll
22
23
  file.is_a?(Jekyll::StaticFile)
23
24
  end
24
25
 
26
+ # Public: Return the absolute path of a Jekyll file
27
+ #
28
+ # file - The Jekyll file to inspect
29
+ #
30
+ # Jekyll handles the .path property of some files as relative to the root
31
+ # (pages) or as an absolute paths (posts and static assets). We make sure
32
+ # we have a consistent way of accessing it
33
+ def self.absolute_path(file)
34
+ pathname = Pathname.new(file.path)
35
+ return pathname.cleanpath.to_s if pathname.absolute?
36
+
37
+ File.join(Configurator.get('source'), file.path)
38
+ end
39
+
40
+ # Public: Return the path of a Jekyll file relative to the Jekyll source
41
+ #
42
+ # file - The Jekyll file to inspect
43
+ #
44
+ # Jekyll handles the .path property of some files as relative to the root
45
+ # (pages) or as an absolute paths (posts and static assets). We make sure
46
+ # we have a consistent way of accessing it
47
+ def self.relative_path(file)
48
+ pathname = Pathname.new(file.path)
49
+ return file.path if pathname.relative?
50
+
51
+ jekyll_source = Pathname.new(Configurator.get('source'))
52
+ pathname.relative_path_from(jekyll_source).cleanpath.to_s
53
+ end
54
+
25
55
  # Public: Check if the file is a 404 error page
26
56
  #
27
57
  # file - The Jekyll file
@@ -86,16 +116,19 @@ module Jekyll
86
116
  # file - The Jekyll file
87
117
  def self.excluded_from_config?(file)
88
118
  excluded_patterns = Configurator.algolia('files_to_exclude')
89
- excluded_files = []
119
+ jekyll_source = Configurator.get('source')
90
120
 
91
121
  # Transform the glob patterns into a real list of files
92
- Dir.chdir(Configurator.get('source')) do
122
+ excluded_files = []
123
+ Dir.chdir(jekyll_source) do
93
124
  excluded_patterns.each do |pattern|
94
- excluded_files += Dir.glob(pattern)
125
+ Dir.glob(pattern).each do |match|
126
+ excluded_files << File.expand_path(match)
127
+ end
95
128
  end
96
129
  end
97
130
 
98
- excluded_files.include?(file.path)
131
+ excluded_files.include?(absolute_path(file))
99
132
  end
100
133
 
101
134
  # Public: Check if the file has been excluded by running a custom user
@@ -106,18 +139,6 @@ module Jekyll
106
139
  Hooks.should_be_excluded?(file.path)
107
140
  end
108
141
 
109
- # Public: Return the path to the original file, relative from the Jekyll
110
- # source
111
- #
112
- # file - The Jekyll file
113
- #
114
- # Pages have their .path property relative to the source, but collections
115
- # (including posts) have an absolute file path.
116
- def self.path_from_root(file)
117
- source = Configurator.get('source')
118
- file.path.gsub(%r{^#{source}/}, '')
119
- end
120
-
121
142
  # Public: Check if the file should be indexed
122
143
  #
123
144
  # file - The Jekyll file
@@ -233,14 +254,32 @@ module Jekyll
233
254
  date.to_i
234
255
  end
235
256
 
257
+ # Public: Returns the raw excerpt of a file, directly as returned by
258
+ # Jekyll. Swallow any error that could occur when reading.
259
+ #
260
+ # file - The Jekyll file
261
+ #
262
+ # This might throw an exception if the excerpt is invalid. We also
263
+ # silence all logger output as Jekyll is quite verbose and will display
264
+ # the potential Liquid error in the terminal, even if we catch the actual
265
+ # error.
266
+ def self.excerpt_raw(file)
267
+ Logger.silent do
268
+ return file.data['excerpt'].to_s
269
+ end
270
+ rescue StandardError
271
+ return nil
272
+ end
273
+
236
274
  # Public: Returns the HTML version of the excerpt
237
275
  #
238
276
  # file - The Jekyll file
239
277
  #
240
278
  # Only collections (including posts) have an excerpt. Pages don't.
241
279
  def self.excerpt_html(file)
242
- excerpt = file.data['excerpt']
280
+ excerpt = excerpt_raw(file)
243
281
  return nil if excerpt.nil?
282
+ return nil if excerpt.empty?
244
283
  excerpt.to_s.tr("\n", ' ').strip
245
284
  end
246
285
 
@@ -251,7 +290,6 @@ module Jekyll
251
290
  # Only collections (including posts) have an excerpt. Pages don't.
252
291
  def self.excerpt_text(file)
253
292
  html = excerpt_html(file)
254
- return nil if html.nil?
255
293
  Utils.html_to_text(html)
256
294
  end
257
295
 
@@ -151,7 +151,10 @@ module Jekyll
151
151
  begin
152
152
  ::Algolia.batch!(slice)
153
153
  rescue StandardError => error
154
- ErrorHandler.stop(error)
154
+ records = slice.map do |record|
155
+ record[:body]
156
+ end
157
+ ErrorHandler.stop(error, records: records)
155
158
  end
156
159
  end
157
160
  end
@@ -4,6 +4,25 @@ module Jekyll
4
4
  module Algolia
5
5
  # Display helpful error messages
6
6
  module Logger
7
+ # Public: Silence all Jekyll log output in this block
8
+ # Usage:
9
+ # Logger.silence do
10
+ # # whatever Jekyll code here
11
+ # end
12
+ #
13
+ # This is especially useful when Jekyll is too talkative about what is
14
+ # loggued. It works by redefining Jekyll.logger.write to a noop
15
+ # temporarily and re-attributing the original method once finished.
16
+ def self.silent
17
+ initial_method = Jekyll.logger.method(:write)
18
+ Utils.monkey_patch(Jekyll.logger, :write, proc { |*args| })
19
+ begin
20
+ yield
21
+ ensure
22
+ Utils.monkey_patch(Jekyll.logger, :write, initial_method)
23
+ end
24
+ end
25
+
7
26
  # Public: Displays a log line
8
27
  #
9
28
  # line - Line to display. Expected to be of the following format:
@@ -32,6 +51,16 @@ module Jekyll
32
51
  log(line)
33
52
  end
34
53
 
54
+ # Public: Write the specified content to a file in the source directory
55
+ #
56
+ # filename - the file basename
57
+ # content - the actual content of the file
58
+ def self.write_to_file(filename, content)
59
+ filepath = File.join(Configurator.get('source'), filename)
60
+ File.write(filepath, content)
61
+ filepath
62
+ end
63
+
35
64
  # Public: Displays a helpful error message for one of the knows errors
36
65
  #
37
66
  # message_id: A string identifying a know message
@@ -6,6 +6,19 @@ module Jekyll
6
6
  module Algolia
7
7
  # Generic language-wide utils
8
8
  module Utils
9
+ # Public: Allow redefining an instance method on the fly with a new one
10
+ #
11
+ # instance - The instance to overwrite
12
+ # method - The method symbol to overwrite
13
+ # block - The new block to use for replacing (as a proc)
14
+ #
15
+ # Solution found on
16
+ # https://stackoverflow.com/questions/803020/redefining-a-single-ruby-method-on-a-single-instance-with-a-lambda/16631789
17
+ def self.monkey_patch(instance, method, block)
18
+ metaclass = class << instance; self; end
19
+ metaclass.send(:define_method, method, block)
20
+ end
21
+
9
22
  # Public: Convert a hash with string keys to a hash with symbol keys
10
23
  #
11
24
  # hash - The input hash, with string keys
@@ -17,6 +30,7 @@ module Jekyll
17
30
  #
18
31
  # html - String representation of the HTML node
19
32
  def self.html_to_text(html)
33
+ return nil if html.nil?
20
34
  text = Nokogiri::HTML(html).text
21
35
  text.tr("\n", ' ').squeeze(' ').strip
22
36
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Jekyll
4
4
  module Algolia
5
- VERSION = '1.1.0'
5
+ VERSION = '1.1.1'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-algolia
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-30 00:00:00.000000000 Z
11
+ date: 2018-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: algolia_html_extractor
@@ -66,20 +66,6 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '3.0'
69
- - !ruby/object:Gem::Dependency
70
- name: jekyll-paginate
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - "~>"
74
- - !ruby/object:Gem::Version
75
- version: '1.1'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - "~>"
81
- - !ruby/object:Gem::Version
82
- version: '1.1'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: json
85
71
  requirement: !ruby/object:Gem::Requirement
@@ -178,6 +164,20 @@ dependencies:
178
164
  - - "~>"
179
165
  - !ruby/object:Gem::Version
180
166
  version: '4.6'
167
+ - !ruby/object:Gem::Dependency
168
+ name: jekyll-paginate
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.1'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.1'
181
181
  - !ruby/object:Gem::Dependency
182
182
  name: rake
183
183
  requirement: !ruby/object:Gem::Requirement