jekyll-algolia 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/errors/record_too_big.txt +14 -12
- data/lib/jekyll-algolia.rb +3 -2
- data/lib/jekyll/algolia/error_handler.rb +29 -4
- data/lib/jekyll/algolia/extractor.rb +0 -1
- data/lib/jekyll/algolia/file_browser.rb +56 -18
- data/lib/jekyll/algolia/indexer.rb +4 -1
- data/lib/jekyll/algolia/logger.rb +29 -0
- data/lib/jekyll/algolia/utils.rb +14 -0
- data/lib/jekyll/algolia/version.rb +1 -1
- metadata +16 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 145d078774c95a6e9dda69331854efa6fbc51672
|
4
|
+
data.tar.gz: 34e00c41dee01556fe49811604a7819a3e4b252a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a6d8b20abbeba1af7ee8de0bf8a111d41d40f6119caa156a1db433b1ca502cdff793cc8ffde2691fa60389b29b270f6fe4e2a3afb2294414d88de3c3c699af2
|
7
|
+
data.tar.gz: 3bbcff1330a507c2f16a9c13f5f102741086bb34c362e71a746640f2ff55ec1f125985114bfe8598adb23f66c8ab71bb9de2e341168cb69c608242ae6e9e64d8
|
@@ -5,20 +5,22 @@ E: the {size_limit} size limit.
|
|
5
5
|
W:
|
6
6
|
W: The plugin will create one record for each element matching your
|
7
7
|
W: `nodes_to_index` value (currently set to "{nodes_to_index}"). Each record
|
8
|
-
W: should not weight more than {size_limit}.
|
9
|
-
W: and has been rejected.
|
8
|
+
W: should not weight more than {size_limit}.
|
9
|
+
W: One of your records weights {size} and has been rejected.
|
10
10
|
W:
|
11
|
-
W:
|
12
|
-
W: {
|
13
|
-
W:
|
14
|
-
W:
|
15
|
-
W:
|
16
|
-
W:
|
17
|
-
W:
|
18
|
-
W:
|
11
|
+
W: objectID: {object_id}
|
12
|
+
W: title: {object_title}
|
13
|
+
W: url: {object_url}
|
14
|
+
W:
|
15
|
+
W: Most probable keys causing the issue:
|
16
|
+
W: {probable_wrong_keys}
|
17
|
+
W:
|
18
|
+
W: Complete log of the record has been extracted to:
|
19
|
+
W: {record_log_path}
|
19
20
|
W:
|
20
|
-
I: This issue
|
21
|
-
I:
|
21
|
+
I: This issue can be caused by malformed HTML preventing the parser to correctly
|
22
|
+
I: grab the content of the nodes. Double check that the page actually renders
|
23
|
+
I: correctly with a regular `jekyll build`.
|
22
24
|
I:
|
23
25
|
I: If you're having trouble solving this issue, feel free to file a bug on
|
24
26
|
I: GitHub, ideally with a link to a repository where we can reproduce the issue.
|
data/lib/jekyll-algolia.rb
CHANGED
@@ -91,14 +91,15 @@ module Jekyll
|
|
91
91
|
files = []
|
92
92
|
Logger.log('I:Extracting records...')
|
93
93
|
each_site_file do |file|
|
94
|
+
path = FileBrowser.relative_path(file)
|
95
|
+
|
94
96
|
# Skip files that should not be indexed
|
95
97
|
is_indexable = FileBrowser.indexable?(file)
|
96
98
|
unless is_indexable
|
97
|
-
Logger.verbose("W:Skipping #{
|
99
|
+
Logger.verbose("W:Skipping #{path}")
|
98
100
|
next
|
99
101
|
end
|
100
102
|
|
101
|
-
path = FileBrowser.path_from_root(file)
|
102
103
|
Logger.verbose("I:Extracting records from #{path}")
|
103
104
|
file_records = Extractor.run(file)
|
104
105
|
|
@@ -18,7 +18,6 @@ module Jekyll
|
|
18
18
|
# happened to the display
|
19
19
|
def self.stop(error, context = {})
|
20
20
|
Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
|
21
|
-
Logger.verbose("E:[jekyll-algolia] Context: #{context}")
|
22
21
|
|
23
22
|
identified_error = identify(error, context)
|
24
23
|
|
@@ -172,9 +171,27 @@ module Jekyll
|
|
172
171
|
}
|
173
172
|
end
|
174
173
|
|
174
|
+
# Public: Returns a string explaining which attributes are the largest in
|
175
|
+
# the record
|
176
|
+
#
|
177
|
+
# record - The record hash to analyze
|
178
|
+
#
|
179
|
+
# This will be used on the `record_too_big` error, to guide users in
|
180
|
+
# finding which record is causing trouble
|
181
|
+
def self.readable_largest_record_keys(record)
|
182
|
+
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
|
183
|
+
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
|
184
|
+
output = []
|
185
|
+
largest_keys.each do |key, size|
|
186
|
+
size = Filesize.from("#{size} B").to_s('Kb')
|
187
|
+
output << "#{key} (#{size})"
|
188
|
+
end
|
189
|
+
output.join(', ')
|
190
|
+
end
|
191
|
+
|
175
192
|
# Public: Check if the sent records are not too big
|
176
193
|
#
|
177
|
-
# context[:records] - list of records
|
194
|
+
# context[:records] - list of records sent in the batch
|
178
195
|
#
|
179
196
|
# Records cannot weight more that 10Kb. If we're getting this error it
|
180
197
|
# means that one of the records is too big, so we'll try to give
|
@@ -187,17 +204,25 @@ module Jekyll
|
|
187
204
|
|
188
205
|
# Getting the record size
|
189
206
|
size, = /.*size=(.*) bytes.*/.match(message).captures
|
190
|
-
size = Filesize.from("#{size} B").
|
207
|
+
size = Filesize.from("#{size} B").to_s('Kb')
|
191
208
|
object_id = details['objectID']
|
192
209
|
|
193
210
|
# Getting record details
|
194
211
|
record = Utils.find_by_key(context[:records], :objectID, object_id)
|
212
|
+
probable_wrong_keys = readable_largest_record_keys(record)
|
213
|
+
|
214
|
+
# Writing the full record to disk for inspection
|
215
|
+
record_log_path = Logger.write_to_file(
|
216
|
+
"jekyll-algolia-record-too-big-#{object_id}.log",
|
217
|
+
JSON.pretty_generate(record)
|
218
|
+
)
|
195
219
|
|
196
220
|
{
|
197
221
|
'object_id' => object_id,
|
198
222
|
'object_title' => record[:title],
|
199
223
|
'object_url' => record[:url],
|
200
|
-
'
|
224
|
+
'probable_wrong_keys' => probable_wrong_keys,
|
225
|
+
'record_log_path' => record_log_path,
|
201
226
|
'nodes_to_index' => Configurator.algolia('nodes_to_index'),
|
202
227
|
'size' => size,
|
203
228
|
'size_limit' => '10 Kb'
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'algolia_html_extractor'
|
4
|
+
require 'pathname'
|
4
5
|
|
5
6
|
module Jekyll
|
6
7
|
module Algolia
|
@@ -22,6 +23,35 @@ module Jekyll
|
|
22
23
|
file.is_a?(Jekyll::StaticFile)
|
23
24
|
end
|
24
25
|
|
26
|
+
# Public: Return the absolute path of a Jekyll file
|
27
|
+
#
|
28
|
+
# file - The Jekyll file to inspect
|
29
|
+
#
|
30
|
+
# Jekyll handles the .path property of some files as relative to the root
|
31
|
+
# (pages) or as an absolute paths (posts and static assets). We make sure
|
32
|
+
# we have a consistent way of accessing it
|
33
|
+
def self.absolute_path(file)
|
34
|
+
pathname = Pathname.new(file.path)
|
35
|
+
return pathname.cleanpath.to_s if pathname.absolute?
|
36
|
+
|
37
|
+
File.join(Configurator.get('source'), file.path)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: Return the path of a Jekyll file relative to the Jekyll source
|
41
|
+
#
|
42
|
+
# file - The Jekyll file to inspect
|
43
|
+
#
|
44
|
+
# Jekyll handles the .path property of some files as relative to the root
|
45
|
+
# (pages) or as an absolute paths (posts and static assets). We make sure
|
46
|
+
# we have a consistent way of accessing it
|
47
|
+
def self.relative_path(file)
|
48
|
+
pathname = Pathname.new(file.path)
|
49
|
+
return file.path if pathname.relative?
|
50
|
+
|
51
|
+
jekyll_source = Pathname.new(Configurator.get('source'))
|
52
|
+
pathname.relative_path_from(jekyll_source).cleanpath.to_s
|
53
|
+
end
|
54
|
+
|
25
55
|
# Public: Check if the file is a 404 error page
|
26
56
|
#
|
27
57
|
# file - The Jekyll file
|
@@ -86,16 +116,19 @@ module Jekyll
|
|
86
116
|
# file - The Jekyll file
|
87
117
|
def self.excluded_from_config?(file)
|
88
118
|
excluded_patterns = Configurator.algolia('files_to_exclude')
|
89
|
-
|
119
|
+
jekyll_source = Configurator.get('source')
|
90
120
|
|
91
121
|
# Transform the glob patterns into a real list of files
|
92
|
-
|
122
|
+
excluded_files = []
|
123
|
+
Dir.chdir(jekyll_source) do
|
93
124
|
excluded_patterns.each do |pattern|
|
94
|
-
|
125
|
+
Dir.glob(pattern).each do |match|
|
126
|
+
excluded_files << File.expand_path(match)
|
127
|
+
end
|
95
128
|
end
|
96
129
|
end
|
97
130
|
|
98
|
-
excluded_files.include?(file
|
131
|
+
excluded_files.include?(absolute_path(file))
|
99
132
|
end
|
100
133
|
|
101
134
|
# Public: Check if the file has been excluded by running a custom user
|
@@ -106,18 +139,6 @@ module Jekyll
|
|
106
139
|
Hooks.should_be_excluded?(file.path)
|
107
140
|
end
|
108
141
|
|
109
|
-
# Public: Return the path to the original file, relative from the Jekyll
|
110
|
-
# source
|
111
|
-
#
|
112
|
-
# file - The Jekyll file
|
113
|
-
#
|
114
|
-
# Pages have their .path property relative to the source, but collections
|
115
|
-
# (including posts) have an absolute file path.
|
116
|
-
def self.path_from_root(file)
|
117
|
-
source = Configurator.get('source')
|
118
|
-
file.path.gsub(%r{^#{source}/}, '')
|
119
|
-
end
|
120
|
-
|
121
142
|
# Public: Check if the file should be indexed
|
122
143
|
#
|
123
144
|
# file - The Jekyll file
|
@@ -233,14 +254,32 @@ module Jekyll
|
|
233
254
|
date.to_i
|
234
255
|
end
|
235
256
|
|
257
|
+
# Public: Returns the raw excerpt of a file, directly as returned by
|
258
|
+
# Jekyll. Swallow any error that could occur when reading.
|
259
|
+
#
|
260
|
+
# file - The Jekyll file
|
261
|
+
#
|
262
|
+
# This might throw an exception if the excerpt is invalid. We also
|
263
|
+
# silence all logger output as Jekyll is quite verbose and will display
|
264
|
+
# the potential Liquid error in the terminal, even if we catch the actual
|
265
|
+
# error.
|
266
|
+
def self.excerpt_raw(file)
|
267
|
+
Logger.silent do
|
268
|
+
return file.data['excerpt'].to_s
|
269
|
+
end
|
270
|
+
rescue StandardError
|
271
|
+
return nil
|
272
|
+
end
|
273
|
+
|
236
274
|
# Public: Returns the HTML version of the excerpt
|
237
275
|
#
|
238
276
|
# file - The Jekyll file
|
239
277
|
#
|
240
278
|
# Only collections (including posts) have an excerpt. Pages don't.
|
241
279
|
def self.excerpt_html(file)
|
242
|
-
excerpt = file
|
280
|
+
excerpt = excerpt_raw(file)
|
243
281
|
return nil if excerpt.nil?
|
282
|
+
return nil if excerpt.empty?
|
244
283
|
excerpt.to_s.tr("\n", ' ').strip
|
245
284
|
end
|
246
285
|
|
@@ -251,7 +290,6 @@ module Jekyll
|
|
251
290
|
# Only collections (including posts) have an excerpt. Pages don't.
|
252
291
|
def self.excerpt_text(file)
|
253
292
|
html = excerpt_html(file)
|
254
|
-
return nil if html.nil?
|
255
293
|
Utils.html_to_text(html)
|
256
294
|
end
|
257
295
|
|
@@ -151,7 +151,10 @@ module Jekyll
|
|
151
151
|
begin
|
152
152
|
::Algolia.batch!(slice)
|
153
153
|
rescue StandardError => error
|
154
|
-
|
154
|
+
records = slice.map do |record|
|
155
|
+
record[:body]
|
156
|
+
end
|
157
|
+
ErrorHandler.stop(error, records: records)
|
155
158
|
end
|
156
159
|
end
|
157
160
|
end
|
@@ -4,6 +4,25 @@ module Jekyll
|
|
4
4
|
module Algolia
|
5
5
|
# Display helpful error messages
|
6
6
|
module Logger
|
7
|
+
# Public: Silence all Jekyll log output in this block
|
8
|
+
# Usage:
|
9
|
+
# Logger.silence do
|
10
|
+
# # whatever Jekyll code here
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# This is especially useful when Jekyll is too talkative about what is
|
14
|
+
# loggued. It works by redefining Jekyll.logger.write to a noop
|
15
|
+
# temporarily and re-attributing the original method once finished.
|
16
|
+
def self.silent
|
17
|
+
initial_method = Jekyll.logger.method(:write)
|
18
|
+
Utils.monkey_patch(Jekyll.logger, :write, proc { |*args| })
|
19
|
+
begin
|
20
|
+
yield
|
21
|
+
ensure
|
22
|
+
Utils.monkey_patch(Jekyll.logger, :write, initial_method)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
7
26
|
# Public: Displays a log line
|
8
27
|
#
|
9
28
|
# line - Line to display. Expected to be of the following format:
|
@@ -32,6 +51,16 @@ module Jekyll
|
|
32
51
|
log(line)
|
33
52
|
end
|
34
53
|
|
54
|
+
# Public: Write the specified content to a file in the source directory
|
55
|
+
#
|
56
|
+
# filename - the file basename
|
57
|
+
# content - the actual content of the file
|
58
|
+
def self.write_to_file(filename, content)
|
59
|
+
filepath = File.join(Configurator.get('source'), filename)
|
60
|
+
File.write(filepath, content)
|
61
|
+
filepath
|
62
|
+
end
|
63
|
+
|
35
64
|
# Public: Displays a helpful error message for one of the knows errors
|
36
65
|
#
|
37
66
|
# message_id: A string identifying a know message
|
data/lib/jekyll/algolia/utils.rb
CHANGED
@@ -6,6 +6,19 @@ module Jekyll
|
|
6
6
|
module Algolia
|
7
7
|
# Generic language-wide utils
|
8
8
|
module Utils
|
9
|
+
# Public: Allow redefining an instance method on the fly with a new one
|
10
|
+
#
|
11
|
+
# instance - The instance to overwrite
|
12
|
+
# method - The method symbol to overwrite
|
13
|
+
# block - The new block to use for replacing (as a proc)
|
14
|
+
#
|
15
|
+
# Solution found on
|
16
|
+
# https://stackoverflow.com/questions/803020/redefining-a-single-ruby-method-on-a-single-instance-with-a-lambda/16631789
|
17
|
+
def self.monkey_patch(instance, method, block)
|
18
|
+
metaclass = class << instance; self; end
|
19
|
+
metaclass.send(:define_method, method, block)
|
20
|
+
end
|
21
|
+
|
9
22
|
# Public: Convert a hash with string keys to a hash with symbol keys
|
10
23
|
#
|
11
24
|
# hash - The input hash, with string keys
|
@@ -17,6 +30,7 @@ module Jekyll
|
|
17
30
|
#
|
18
31
|
# html - String representation of the HTML node
|
19
32
|
def self.html_to_text(html)
|
33
|
+
return nil if html.nil?
|
20
34
|
text = Nokogiri::HTML(html).text
|
21
35
|
text.tr("\n", ' ').squeeze(' ').strip
|
22
36
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-algolia
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Carry
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: algolia_html_extractor
|
@@ -66,20 +66,6 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '3.0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: jekyll-paginate
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - "~>"
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '1.1'
|
76
|
-
type: :runtime
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - "~>"
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '1.1'
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
70
|
name: json
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -178,6 +164,20 @@ dependencies:
|
|
178
164
|
- - "~>"
|
179
165
|
- !ruby/object:Gem::Version
|
180
166
|
version: '4.6'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: jekyll-paginate
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '1.1'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '1.1'
|
181
181
|
- !ruby/object:Gem::Dependency
|
182
182
|
name: rake
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|