jekyll-algolia 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/errors/record_too_big.txt +14 -12
- data/lib/jekyll-algolia.rb +3 -2
- data/lib/jekyll/algolia/error_handler.rb +29 -4
- data/lib/jekyll/algolia/extractor.rb +0 -1
- data/lib/jekyll/algolia/file_browser.rb +56 -18
- data/lib/jekyll/algolia/indexer.rb +4 -1
- data/lib/jekyll/algolia/logger.rb +29 -0
- data/lib/jekyll/algolia/utils.rb +14 -0
- data/lib/jekyll/algolia/version.rb +1 -1
- metadata +16 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 145d078774c95a6e9dda69331854efa6fbc51672
|
4
|
+
data.tar.gz: 34e00c41dee01556fe49811604a7819a3e4b252a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a6d8b20abbeba1af7ee8de0bf8a111d41d40f6119caa156a1db433b1ca502cdff793cc8ffde2691fa60389b29b270f6fe4e2a3afb2294414d88de3c3c699af2
|
7
|
+
data.tar.gz: 3bbcff1330a507c2f16a9c13f5f102741086bb34c362e71a746640f2ff55ec1f125985114bfe8598adb23f66c8ab71bb9de2e341168cb69c608242ae6e9e64d8
|
@@ -5,20 +5,22 @@ E: the {size_limit} size limit.
|
|
5
5
|
W:
|
6
6
|
W: The plugin will create one record for each element matching your
|
7
7
|
W: `nodes_to_index` value (currently set to "{nodes_to_index}"). Each record
|
8
|
-
W: should not weight more than {size_limit}.
|
9
|
-
W: and has been rejected.
|
8
|
+
W: should not weight more than {size_limit}.
|
9
|
+
W: One of your records weights {size} and has been rejected.
|
10
10
|
W:
|
11
|
-
W:
|
12
|
-
W: {
|
13
|
-
W:
|
14
|
-
W:
|
15
|
-
W:
|
16
|
-
W:
|
17
|
-
W:
|
18
|
-
W:
|
11
|
+
W: objectID: {object_id}
|
12
|
+
W: title: {object_title}
|
13
|
+
W: url: {object_url}
|
14
|
+
W:
|
15
|
+
W: Most probable keys causing the issue:
|
16
|
+
W: {probable_wrong_keys}
|
17
|
+
W:
|
18
|
+
W: Complete log of the record has been extracted to:
|
19
|
+
W: {record_log_path}
|
19
20
|
W:
|
20
|
-
I: This issue
|
21
|
-
I:
|
21
|
+
I: This issue can be caused by malformed HTML preventing the parser to correctly
|
22
|
+
I: grab the content of the nodes. Double check that the page actually renders
|
23
|
+
I: correctly with a regular `jekyll build`.
|
22
24
|
I:
|
23
25
|
I: If you're having trouble solving this issue, feel free to file a bug on
|
24
26
|
I: GitHub, ideally with a link to a repository where we can reproduce the issue.
|
data/lib/jekyll-algolia.rb
CHANGED
@@ -91,14 +91,15 @@ module Jekyll
|
|
91
91
|
files = []
|
92
92
|
Logger.log('I:Extracting records...')
|
93
93
|
each_site_file do |file|
|
94
|
+
path = FileBrowser.relative_path(file)
|
95
|
+
|
94
96
|
# Skip files that should not be indexed
|
95
97
|
is_indexable = FileBrowser.indexable?(file)
|
96
98
|
unless is_indexable
|
97
|
-
Logger.verbose("W:Skipping #{
|
99
|
+
Logger.verbose("W:Skipping #{path}")
|
98
100
|
next
|
99
101
|
end
|
100
102
|
|
101
|
-
path = FileBrowser.path_from_root(file)
|
102
103
|
Logger.verbose("I:Extracting records from #{path}")
|
103
104
|
file_records = Extractor.run(file)
|
104
105
|
|
@@ -18,7 +18,6 @@ module Jekyll
|
|
18
18
|
# happened to the display
|
19
19
|
def self.stop(error, context = {})
|
20
20
|
Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
|
21
|
-
Logger.verbose("E:[jekyll-algolia] Context: #{context}")
|
22
21
|
|
23
22
|
identified_error = identify(error, context)
|
24
23
|
|
@@ -172,9 +171,27 @@ module Jekyll
|
|
172
171
|
}
|
173
172
|
end
|
174
173
|
|
174
|
+
# Public: Returns a string explaining which attributes are the largest in
|
175
|
+
# the record
|
176
|
+
#
|
177
|
+
# record - The record hash to analyze
|
178
|
+
#
|
179
|
+
# This will be used on the `record_too_big` error, to guide users in
|
180
|
+
# finding which record is causing trouble
|
181
|
+
def self.readable_largest_record_keys(record)
|
182
|
+
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
|
183
|
+
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
|
184
|
+
output = []
|
185
|
+
largest_keys.each do |key, size|
|
186
|
+
size = Filesize.from("#{size} B").to_s('Kb')
|
187
|
+
output << "#{key} (#{size})"
|
188
|
+
end
|
189
|
+
output.join(', ')
|
190
|
+
end
|
191
|
+
|
175
192
|
# Public: Check if the sent records are not too big
|
176
193
|
#
|
177
|
-
# context[:records] - list of records
|
194
|
+
# context[:records] - list of records sent in the batch
|
178
195
|
#
|
179
196
|
# Records cannot weight more that 10Kb. If we're getting this error it
|
180
197
|
# means that one of the records is too big, so we'll try to give
|
@@ -187,17 +204,25 @@ module Jekyll
|
|
187
204
|
|
188
205
|
# Getting the record size
|
189
206
|
size, = /.*size=(.*) bytes.*/.match(message).captures
|
190
|
-
size = Filesize.from("#{size} B").
|
207
|
+
size = Filesize.from("#{size} B").to_s('Kb')
|
191
208
|
object_id = details['objectID']
|
192
209
|
|
193
210
|
# Getting record details
|
194
211
|
record = Utils.find_by_key(context[:records], :objectID, object_id)
|
212
|
+
probable_wrong_keys = readable_largest_record_keys(record)
|
213
|
+
|
214
|
+
# Writing the full record to disk for inspection
|
215
|
+
record_log_path = Logger.write_to_file(
|
216
|
+
"jekyll-algolia-record-too-big-#{object_id}.log",
|
217
|
+
JSON.pretty_generate(record)
|
218
|
+
)
|
195
219
|
|
196
220
|
{
|
197
221
|
'object_id' => object_id,
|
198
222
|
'object_title' => record[:title],
|
199
223
|
'object_url' => record[:url],
|
200
|
-
'
|
224
|
+
'probable_wrong_keys' => probable_wrong_keys,
|
225
|
+
'record_log_path' => record_log_path,
|
201
226
|
'nodes_to_index' => Configurator.algolia('nodes_to_index'),
|
202
227
|
'size' => size,
|
203
228
|
'size_limit' => '10 Kb'
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'algolia_html_extractor'
|
4
|
+
require 'pathname'
|
4
5
|
|
5
6
|
module Jekyll
|
6
7
|
module Algolia
|
@@ -22,6 +23,35 @@ module Jekyll
|
|
22
23
|
file.is_a?(Jekyll::StaticFile)
|
23
24
|
end
|
24
25
|
|
26
|
+
# Public: Return the absolute path of a Jekyll file
|
27
|
+
#
|
28
|
+
# file - The Jekyll file to inspect
|
29
|
+
#
|
30
|
+
# Jekyll handles the .path property of some files as relative to the root
|
31
|
+
# (pages) or as an absolute paths (posts and static assets). We make sure
|
32
|
+
# we have a consistent way of accessing it
|
33
|
+
def self.absolute_path(file)
|
34
|
+
pathname = Pathname.new(file.path)
|
35
|
+
return pathname.cleanpath.to_s if pathname.absolute?
|
36
|
+
|
37
|
+
File.join(Configurator.get('source'), file.path)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: Return the path of a Jekyll file relative to the Jekyll source
|
41
|
+
#
|
42
|
+
# file - The Jekyll file to inspect
|
43
|
+
#
|
44
|
+
# Jekyll handles the .path property of some files as relative to the root
|
45
|
+
# (pages) or as an absolute paths (posts and static assets). We make sure
|
46
|
+
# we have a consistent way of accessing it
|
47
|
+
def self.relative_path(file)
|
48
|
+
pathname = Pathname.new(file.path)
|
49
|
+
return file.path if pathname.relative?
|
50
|
+
|
51
|
+
jekyll_source = Pathname.new(Configurator.get('source'))
|
52
|
+
pathname.relative_path_from(jekyll_source).cleanpath.to_s
|
53
|
+
end
|
54
|
+
|
25
55
|
# Public: Check if the file is a 404 error page
|
26
56
|
#
|
27
57
|
# file - The Jekyll file
|
@@ -86,16 +116,19 @@ module Jekyll
|
|
86
116
|
# file - The Jekyll file
|
87
117
|
def self.excluded_from_config?(file)
|
88
118
|
excluded_patterns = Configurator.algolia('files_to_exclude')
|
89
|
-
|
119
|
+
jekyll_source = Configurator.get('source')
|
90
120
|
|
91
121
|
# Transform the glob patterns into a real list of files
|
92
|
-
|
122
|
+
excluded_files = []
|
123
|
+
Dir.chdir(jekyll_source) do
|
93
124
|
excluded_patterns.each do |pattern|
|
94
|
-
|
125
|
+
Dir.glob(pattern).each do |match|
|
126
|
+
excluded_files << File.expand_path(match)
|
127
|
+
end
|
95
128
|
end
|
96
129
|
end
|
97
130
|
|
98
|
-
excluded_files.include?(file
|
131
|
+
excluded_files.include?(absolute_path(file))
|
99
132
|
end
|
100
133
|
|
101
134
|
# Public: Check if the file has been excluded by running a custom user
|
@@ -106,18 +139,6 @@ module Jekyll
|
|
106
139
|
Hooks.should_be_excluded?(file.path)
|
107
140
|
end
|
108
141
|
|
109
|
-
# Public: Return the path to the original file, relative from the Jekyll
|
110
|
-
# source
|
111
|
-
#
|
112
|
-
# file - The Jekyll file
|
113
|
-
#
|
114
|
-
# Pages have their .path property relative to the source, but collections
|
115
|
-
# (including posts) have an absolute file path.
|
116
|
-
def self.path_from_root(file)
|
117
|
-
source = Configurator.get('source')
|
118
|
-
file.path.gsub(%r{^#{source}/}, '')
|
119
|
-
end
|
120
|
-
|
121
142
|
# Public: Check if the file should be indexed
|
122
143
|
#
|
123
144
|
# file - The Jekyll file
|
@@ -233,14 +254,32 @@ module Jekyll
|
|
233
254
|
date.to_i
|
234
255
|
end
|
235
256
|
|
257
|
+
# Public: Returns the raw excerpt of a file, directly as returned by
|
258
|
+
# Jekyll. Swallow any error that could occur when reading.
|
259
|
+
#
|
260
|
+
# file - The Jekyll file
|
261
|
+
#
|
262
|
+
# This might throw an exception if the excerpt is invalid. We also
|
263
|
+
# silence all logger output as Jekyll is quite verbose and will display
|
264
|
+
# the potential Liquid error in the terminal, even if we catch the actual
|
265
|
+
# error.
|
266
|
+
def self.excerpt_raw(file)
|
267
|
+
Logger.silent do
|
268
|
+
return file.data['excerpt'].to_s
|
269
|
+
end
|
270
|
+
rescue StandardError
|
271
|
+
return nil
|
272
|
+
end
|
273
|
+
|
236
274
|
# Public: Returns the HTML version of the excerpt
|
237
275
|
#
|
238
276
|
# file - The Jekyll file
|
239
277
|
#
|
240
278
|
# Only collections (including posts) have an excerpt. Pages don't.
|
241
279
|
def self.excerpt_html(file)
|
242
|
-
excerpt = file
|
280
|
+
excerpt = excerpt_raw(file)
|
243
281
|
return nil if excerpt.nil?
|
282
|
+
return nil if excerpt.empty?
|
244
283
|
excerpt.to_s.tr("\n", ' ').strip
|
245
284
|
end
|
246
285
|
|
@@ -251,7 +290,6 @@ module Jekyll
|
|
251
290
|
# Only collections (including posts) have an excerpt. Pages don't.
|
252
291
|
def self.excerpt_text(file)
|
253
292
|
html = excerpt_html(file)
|
254
|
-
return nil if html.nil?
|
255
293
|
Utils.html_to_text(html)
|
256
294
|
end
|
257
295
|
|
@@ -151,7 +151,10 @@ module Jekyll
|
|
151
151
|
begin
|
152
152
|
::Algolia.batch!(slice)
|
153
153
|
rescue StandardError => error
|
154
|
-
|
154
|
+
records = slice.map do |record|
|
155
|
+
record[:body]
|
156
|
+
end
|
157
|
+
ErrorHandler.stop(error, records: records)
|
155
158
|
end
|
156
159
|
end
|
157
160
|
end
|
@@ -4,6 +4,25 @@ module Jekyll
|
|
4
4
|
module Algolia
|
5
5
|
# Display helpful error messages
|
6
6
|
module Logger
|
7
|
+
# Public: Silence all Jekyll log output in this block
|
8
|
+
# Usage:
|
9
|
+
# Logger.silence do
|
10
|
+
# # whatever Jekyll code here
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# This is especially useful when Jekyll is too talkative about what is
|
14
|
+
# loggued. It works by redefining Jekyll.logger.write to a noop
|
15
|
+
# temporarily and re-attributing the original method once finished.
|
16
|
+
def self.silent
|
17
|
+
initial_method = Jekyll.logger.method(:write)
|
18
|
+
Utils.monkey_patch(Jekyll.logger, :write, proc { |*args| })
|
19
|
+
begin
|
20
|
+
yield
|
21
|
+
ensure
|
22
|
+
Utils.monkey_patch(Jekyll.logger, :write, initial_method)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
7
26
|
# Public: Displays a log line
|
8
27
|
#
|
9
28
|
# line - Line to display. Expected to be of the following format:
|
@@ -32,6 +51,16 @@ module Jekyll
|
|
32
51
|
log(line)
|
33
52
|
end
|
34
53
|
|
54
|
+
# Public: Write the specified content to a file in the source directory
|
55
|
+
#
|
56
|
+
# filename - the file basename
|
57
|
+
# content - the actual content of the file
|
58
|
+
def self.write_to_file(filename, content)
|
59
|
+
filepath = File.join(Configurator.get('source'), filename)
|
60
|
+
File.write(filepath, content)
|
61
|
+
filepath
|
62
|
+
end
|
63
|
+
|
35
64
|
# Public: Displays a helpful error message for one of the knows errors
|
36
65
|
#
|
37
66
|
# message_id: A string identifying a know message
|
data/lib/jekyll/algolia/utils.rb
CHANGED
@@ -6,6 +6,19 @@ module Jekyll
|
|
6
6
|
module Algolia
|
7
7
|
# Generic language-wide utils
|
8
8
|
module Utils
|
9
|
+
# Public: Allow redefining an instance method on the fly with a new one
|
10
|
+
#
|
11
|
+
# instance - The instance to overwrite
|
12
|
+
# method - The method symbol to overwrite
|
13
|
+
# block - The new block to use for replacing (as a proc)
|
14
|
+
#
|
15
|
+
# Solution found on
|
16
|
+
# https://stackoverflow.com/questions/803020/redefining-a-single-ruby-method-on-a-single-instance-with-a-lambda/16631789
|
17
|
+
def self.monkey_patch(instance, method, block)
|
18
|
+
metaclass = class << instance; self; end
|
19
|
+
metaclass.send(:define_method, method, block)
|
20
|
+
end
|
21
|
+
|
9
22
|
# Public: Convert a hash with string keys to a hash with symbol keys
|
10
23
|
#
|
11
24
|
# hash - The input hash, with string keys
|
@@ -17,6 +30,7 @@ module Jekyll
|
|
17
30
|
#
|
18
31
|
# html - String representation of the HTML node
|
19
32
|
def self.html_to_text(html)
|
33
|
+
return nil if html.nil?
|
20
34
|
text = Nokogiri::HTML(html).text
|
21
35
|
text.tr("\n", ' ').squeeze(' ').strip
|
22
36
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-algolia
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Carry
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: algolia_html_extractor
|
@@ -66,20 +66,6 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '3.0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: jekyll-paginate
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - "~>"
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '1.1'
|
76
|
-
type: :runtime
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - "~>"
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '1.1'
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
70
|
name: json
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -178,6 +164,20 @@ dependencies:
|
|
178
164
|
- - "~>"
|
179
165
|
- !ruby/object:Gem::Version
|
180
166
|
version: '4.6'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: jekyll-paginate
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '1.1'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '1.1'
|
181
181
|
- !ruby/object:Gem::Dependency
|
182
182
|
name: rake
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|