jekyll-algolia 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18ee9d2d725bb531703ddc05d9f31250959d81ff
4
- data.tar.gz: 93115f75f4d41d301898b77809732453d5c195f2
3
+ metadata.gz: 145d078774c95a6e9dda69331854efa6fbc51672
4
+ data.tar.gz: 34e00c41dee01556fe49811604a7819a3e4b252a
5
5
  SHA512:
6
- metadata.gz: fed961094843fdf503620f86fcb83313c96b68061ce0d170b1a66f018d60cf1edf0a68a9dd5df7f17dee5c2a6e6e4fa05e677ae5a743383c859c42f3443e6bba
7
- data.tar.gz: d66ea9ca93b8cafc3749711f17d27f03753ddfc6f4de70d2cc94296e5209b9247f2e3bbbefbf924414e5fe3c380f13c13fac87f80b8eed9322ccb0338d2dad2b
6
+ metadata.gz: 3a6d8b20abbeba1af7ee8de0bf8a111d41d40f6119caa156a1db433b1ca502cdff793cc8ffde2691fa60389b29b270f6fe4e2a3afb2294414d88de3c3c699af2
7
+ data.tar.gz: 3bbcff1330a507c2f16a9c13f5f102741086bb34c362e71a746640f2ff55ec1f125985114bfe8598adb23f66c8ab71bb9de2e341168cb69c608242ae6e9e64d8
@@ -5,20 +5,22 @@ E: the {size_limit} size limit.
5
5
  W:
6
6
  W: The plugin will create one record for each element matching your
7
7
  W: `nodes_to_index` value (currently set to "{nodes_to_index}"). Each record
8
- W: should not weight more than {size_limit}. One of your records weights {size}
9
- W: and has been rejected.
8
+ W: should not weight more than {size_limit}.
9
+ W: One of your records weights {size} and has been rejected.
10
10
  W:
11
- W: Here are more information about the rejected record:
12
- W: {
13
- W: "objectID": "{object_id}",
14
- W: "title": "{object_title}",
15
- W: "url": "{object_url}",
16
- W: "text": "{object_hint}…",
17
- W: […]
18
- W: }
11
+ W: objectID: {object_id}
12
+ W: title: {object_title}
13
+ W: url: {object_url}
14
+ W:
15
+ W: Most probable keys causing the issue:
16
+ W: {probable_wrong_keys}
17
+ W:
18
+ W: Complete log of the record has been extracted to:
19
+ W: {record_log_path}
19
20
  W:
20
- I: This issue is sometimes caused by malformed HTML preventing the parser to
21
- I: correctly grab the content of the nodes.
21
+ I: This issue can be caused by malformed HTML preventing the parser to correctly
22
+ I: grab the content of the nodes. Double check that the page actually renders
23
+ I: correctly with a regular `jekyll build`.
22
24
  I:
23
25
  I: If you're having trouble solving this issue, feel free to file a bug on
24
26
  I: GitHub, ideally with a link to a repository where we can reproduce the issue.
@@ -91,14 +91,15 @@ module Jekyll
91
91
  files = []
92
92
  Logger.log('I:Extracting records...')
93
93
  each_site_file do |file|
94
+ path = FileBrowser.relative_path(file)
95
+
94
96
  # Skip files that should not be indexed
95
97
  is_indexable = FileBrowser.indexable?(file)
96
98
  unless is_indexable
97
- Logger.verbose("W:Skipping #{file.path}")
99
+ Logger.verbose("W:Skipping #{path}")
98
100
  next
99
101
  end
100
102
 
101
- path = FileBrowser.path_from_root(file)
102
103
  Logger.verbose("I:Extracting records from #{path}")
103
104
  file_records = Extractor.run(file)
104
105
 
@@ -18,7 +18,6 @@ module Jekyll
18
18
  # happened to the display
19
19
  def self.stop(error, context = {})
20
20
  Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
21
- Logger.verbose("E:[jekyll-algolia] Context: #{context}")
22
21
 
23
22
  identified_error = identify(error, context)
24
23
 
@@ -172,9 +171,27 @@ module Jekyll
172
171
  }
173
172
  end
174
173
 
174
+ # Public: Returns a string explaining which attributes are the largest in
175
+ # the record
176
+ #
177
+ # record - The record hash to analyze
178
+ #
179
+ # This will be used on the `record_too_big` error, to guide users in
180
+ # finding which record is causing trouble
181
+ def self.readable_largest_record_keys(record)
182
+ keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
183
+ largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
184
+ output = []
185
+ largest_keys.each do |key, size|
186
+ size = Filesize.from("#{size} B").to_s('Kb')
187
+ output << "#{key} (#{size})"
188
+ end
189
+ output.join(', ')
190
+ end
191
+
175
192
  # Public: Check if the sent records are not too big
176
193
  #
177
- # context[:records] - list of records to push
194
+ # context[:records] - list of records sent in the batch
178
195
  #
179
196
  # Records cannot weight more that 10Kb. If we're getting this error it
180
197
  # means that one of the records is too big, so we'll try to give
@@ -187,17 +204,25 @@ module Jekyll
187
204
 
188
205
  # Getting the record size
189
206
  size, = /.*size=(.*) bytes.*/.match(message).captures
190
- size = Filesize.from("#{size} B").pretty
207
+ size = Filesize.from("#{size} B").to_s('Kb')
191
208
  object_id = details['objectID']
192
209
 
193
210
  # Getting record details
194
211
  record = Utils.find_by_key(context[:records], :objectID, object_id)
212
+ probable_wrong_keys = readable_largest_record_keys(record)
213
+
214
+ # Writing the full record to disk for inspection
215
+ record_log_path = Logger.write_to_file(
216
+ "jekyll-algolia-record-too-big-#{object_id}.log",
217
+ JSON.pretty_generate(record)
218
+ )
195
219
 
196
220
  {
197
221
  'object_id' => object_id,
198
222
  'object_title' => record[:title],
199
223
  'object_url' => record[:url],
200
- 'object_hint' => record[:content][0..100],
224
+ 'probable_wrong_keys' => probable_wrong_keys,
225
+ 'record_log_path' => record_log_path,
201
226
  'nodes_to_index' => Configurator.algolia('nodes_to_index'),
202
227
  'size' => size,
203
228
  'size_limit' => '10 Kb'
@@ -11,7 +11,6 @@ module Jekyll
11
11
  # Public: Extract records from the file
12
12
  #
13
13
  # file - The Jekyll file to process
14
- # TOTEST
15
14
  def self.run(file)
16
15
  # Getting all hierarchical nodes from the HTML input
17
16
  raw_records = extract_raw_records(file.content)
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'algolia_html_extractor'
4
+ require 'pathname'
4
5
 
5
6
  module Jekyll
6
7
  module Algolia
@@ -22,6 +23,35 @@ module Jekyll
22
23
  file.is_a?(Jekyll::StaticFile)
23
24
  end
24
25
 
26
+ # Public: Return the absolute path of a Jekyll file
27
+ #
28
+ # file - The Jekyll file to inspect
29
+ #
30
+ # Jekyll handles the .path property of some files as relative to the root
31
+ # (pages) or as an absolute paths (posts and static assets). We make sure
32
+ # we have a consistent way of accessing it
33
+ def self.absolute_path(file)
34
+ pathname = Pathname.new(file.path)
35
+ return pathname.cleanpath.to_s if pathname.absolute?
36
+
37
+ File.join(Configurator.get('source'), file.path)
38
+ end
39
+
40
+ # Public: Return the path of a Jekyll file relative to the Jekyll source
41
+ #
42
+ # file - The Jekyll file to inspect
43
+ #
44
+ # Jekyll handles the .path property of some files as relative to the root
45
+ # (pages) or as an absolute paths (posts and static assets). We make sure
46
+ # we have a consistent way of accessing it
47
+ def self.relative_path(file)
48
+ pathname = Pathname.new(file.path)
49
+ return file.path if pathname.relative?
50
+
51
+ jekyll_source = Pathname.new(Configurator.get('source'))
52
+ pathname.relative_path_from(jekyll_source).cleanpath.to_s
53
+ end
54
+
25
55
  # Public: Check if the file is a 404 error page
26
56
  #
27
57
  # file - The Jekyll file
@@ -86,16 +116,19 @@ module Jekyll
86
116
  # file - The Jekyll file
87
117
  def self.excluded_from_config?(file)
88
118
  excluded_patterns = Configurator.algolia('files_to_exclude')
89
- excluded_files = []
119
+ jekyll_source = Configurator.get('source')
90
120
 
91
121
  # Transform the glob patterns into a real list of files
92
- Dir.chdir(Configurator.get('source')) do
122
+ excluded_files = []
123
+ Dir.chdir(jekyll_source) do
93
124
  excluded_patterns.each do |pattern|
94
- excluded_files += Dir.glob(pattern)
125
+ Dir.glob(pattern).each do |match|
126
+ excluded_files << File.expand_path(match)
127
+ end
95
128
  end
96
129
  end
97
130
 
98
- excluded_files.include?(file.path)
131
+ excluded_files.include?(absolute_path(file))
99
132
  end
100
133
 
101
134
  # Public: Check if the file has been excluded by running a custom user
@@ -106,18 +139,6 @@ module Jekyll
106
139
  Hooks.should_be_excluded?(file.path)
107
140
  end
108
141
 
109
- # Public: Return the path to the original file, relative from the Jekyll
110
- # source
111
- #
112
- # file - The Jekyll file
113
- #
114
- # Pages have their .path property relative to the source, but collections
115
- # (including posts) have an absolute file path.
116
- def self.path_from_root(file)
117
- source = Configurator.get('source')
118
- file.path.gsub(%r{^#{source}/}, '')
119
- end
120
-
121
142
  # Public: Check if the file should be indexed
122
143
  #
123
144
  # file - The Jekyll file
@@ -233,14 +254,32 @@ module Jekyll
233
254
  date.to_i
234
255
  end
235
256
 
257
+ # Public: Returns the raw excerpt of a file, directly as returned by
258
+ # Jekyll. Swallow any error that could occur when reading.
259
+ #
260
+ # file - The Jekyll file
261
+ #
262
+ # This might throw an exception if the excerpt is invalid. We also
263
+ # silence all logger output as Jekyll is quite verbose and will display
264
+ # the potential Liquid error in the terminal, even if we catch the actual
265
+ # error.
266
+ def self.excerpt_raw(file)
267
+ Logger.silent do
268
+ return file.data['excerpt'].to_s
269
+ end
270
+ rescue StandardError
271
+ return nil
272
+ end
273
+
236
274
  # Public: Returns the HTML version of the excerpt
237
275
  #
238
276
  # file - The Jekyll file
239
277
  #
240
278
  # Only collections (including posts) have an excerpt. Pages don't.
241
279
  def self.excerpt_html(file)
242
- excerpt = file.data['excerpt']
280
+ excerpt = excerpt_raw(file)
243
281
  return nil if excerpt.nil?
282
+ return nil if excerpt.empty?
244
283
  excerpt.to_s.tr("\n", ' ').strip
245
284
  end
246
285
 
@@ -251,7 +290,6 @@ module Jekyll
251
290
  # Only collections (including posts) have an excerpt. Pages don't.
252
291
  def self.excerpt_text(file)
253
292
  html = excerpt_html(file)
254
- return nil if html.nil?
255
293
  Utils.html_to_text(html)
256
294
  end
257
295
 
@@ -151,7 +151,10 @@ module Jekyll
151
151
  begin
152
152
  ::Algolia.batch!(slice)
153
153
  rescue StandardError => error
154
- ErrorHandler.stop(error)
154
+ records = slice.map do |record|
155
+ record[:body]
156
+ end
157
+ ErrorHandler.stop(error, records: records)
155
158
  end
156
159
  end
157
160
  end
@@ -4,6 +4,25 @@ module Jekyll
4
4
  module Algolia
5
5
  # Display helpful error messages
6
6
  module Logger
7
+ # Public: Silence all Jekyll log output in this block
8
+ # Usage:
9
+ # Logger.silence do
10
+ # # whatever Jekyll code here
11
+ # end
12
+ #
13
+ # This is especially useful when Jekyll is too talkative about what is
14
+ # loggued. It works by redefining Jekyll.logger.write to a noop
15
+ # temporarily and re-attributing the original method once finished.
16
+ def self.silent
17
+ initial_method = Jekyll.logger.method(:write)
18
+ Utils.monkey_patch(Jekyll.logger, :write, proc { |*args| })
19
+ begin
20
+ yield
21
+ ensure
22
+ Utils.monkey_patch(Jekyll.logger, :write, initial_method)
23
+ end
24
+ end
25
+
7
26
  # Public: Displays a log line
8
27
  #
9
28
  # line - Line to display. Expected to be of the following format:
@@ -32,6 +51,16 @@ module Jekyll
32
51
  log(line)
33
52
  end
34
53
 
54
+ # Public: Write the specified content to a file in the source directory
55
+ #
56
+ # filename - the file basename
57
+ # content - the actual content of the file
58
+ def self.write_to_file(filename, content)
59
+ filepath = File.join(Configurator.get('source'), filename)
60
+ File.write(filepath, content)
61
+ filepath
62
+ end
63
+
35
64
  # Public: Displays a helpful error message for one of the knows errors
36
65
  #
37
66
  # message_id: A string identifying a know message
@@ -6,6 +6,19 @@ module Jekyll
6
6
  module Algolia
7
7
  # Generic language-wide utils
8
8
  module Utils
9
+ # Public: Allow redefining an instance method on the fly with a new one
10
+ #
11
+ # instance - The instance to overwrite
12
+ # method - The method symbol to overwrite
13
+ # block - The new block to use for replacing (as a proc)
14
+ #
15
+ # Solution found on
16
+ # https://stackoverflow.com/questions/803020/redefining-a-single-ruby-method-on-a-single-instance-with-a-lambda/16631789
17
+ def self.monkey_patch(instance, method, block)
18
+ metaclass = class << instance; self; end
19
+ metaclass.send(:define_method, method, block)
20
+ end
21
+
9
22
  # Public: Convert a hash with string keys to a hash with symbol keys
10
23
  #
11
24
  # hash - The input hash, with string keys
@@ -17,6 +30,7 @@ module Jekyll
17
30
  #
18
31
  # html - String representation of the HTML node
19
32
  def self.html_to_text(html)
33
+ return nil if html.nil?
20
34
  text = Nokogiri::HTML(html).text
21
35
  text.tr("\n", ' ').squeeze(' ').strip
22
36
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Jekyll
4
4
  module Algolia
5
- VERSION = '1.1.0'
5
+ VERSION = '1.1.1'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-algolia
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-30 00:00:00.000000000 Z
11
+ date: 2018-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: algolia_html_extractor
@@ -66,20 +66,6 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '3.0'
69
- - !ruby/object:Gem::Dependency
70
- name: jekyll-paginate
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - "~>"
74
- - !ruby/object:Gem::Version
75
- version: '1.1'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - "~>"
81
- - !ruby/object:Gem::Version
82
- version: '1.1'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: json
85
71
  requirement: !ruby/object:Gem::Requirement
@@ -178,6 +164,20 @@ dependencies:
178
164
  - - "~>"
179
165
  - !ruby/object:Gem::Version
180
166
  version: '4.6'
167
+ - !ruby/object:Gem::Dependency
168
+ name: jekyll-paginate
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.1'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.1'
181
181
  - !ruby/object:Gem::Dependency
182
182
  name: rake
183
183
  requirement: !ruby/object:Gem::Requirement