grover 0.11.2 → 0.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/grover.rb +9 -200
- data/lib/grover/errors.rb +18 -0
- data/lib/grover/html_preprocessor.rb +2 -2
- data/lib/grover/js/processor.js +154 -0
- data/lib/grover/middleware.rb +8 -2
- data/lib/grover/options_builder.rb +50 -0
- data/lib/grover/options_fixer.rb +61 -0
- data/lib/grover/processor.rb +104 -0
- data/lib/grover/version.rb +1 -1
- metadata +14 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7376874743ddb5f08d3fc68c02f84a72eb6cbe98c63306b2ce2a5f57d37867c9
|
4
|
+
data.tar.gz: 546007fe354d88c117f70469c8cf80ba88fd22415ebb350aa79512c64592be3f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 010bb2d49e78af9cfe56cc0480d803f217dd1f644d99d36640fad060e8a70a2be7b4d422057a761a5d0710347649dd44d3da9efde553b4ff23bff62838973af0
|
7
|
+
data.tar.gz: 5879e39b9d00a6b1c5904a4e71cdf00fc695caa29de7b85ca4dbcbf7ec857d87822c2be42ded362efd3ef2466f8c5db338182e8115cbe6b4b822a6114d5e50d8
|
data/lib/grover.rb
CHANGED
@@ -5,137 +5,20 @@ require 'grover/version'
|
|
5
5
|
require 'grover/utils'
|
6
6
|
require 'active_support_ext/object/deep_dup' unless defined?(ActiveSupport)
|
7
7
|
|
8
|
+
require 'grover/errors'
|
8
9
|
require 'grover/html_preprocessor'
|
9
10
|
require 'grover/middleware'
|
10
11
|
require 'grover/configuration'
|
12
|
+
require 'grover/options_builder'
|
13
|
+
require 'grover/processor'
|
11
14
|
|
12
15
|
require 'nokogiri'
|
13
|
-
require 'schmooze'
|
14
16
|
require 'yaml'
|
15
17
|
|
16
18
|
#
|
17
19
|
# Grover interface for converting HTML to PDF
|
18
20
|
#
|
19
21
|
class Grover
|
20
|
-
#
|
21
|
-
# Processor helper class for calling out to Puppeteer NodeJS library
|
22
|
-
#
|
23
|
-
class Processor < Schmooze::Base
|
24
|
-
dependencies puppeteer: 'puppeteer'
|
25
|
-
|
26
|
-
def self.launch_params
|
27
|
-
ENV['GROVER_NO_SANDBOX'] == 'true' ? "{args: ['--no-sandbox', '--disable-setuid-sandbox']}" : '{args: []}'
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.convert_function(convert_action)
|
31
|
-
<<~FUNCTION
|
32
|
-
async (url_or_html, options) => {
|
33
|
-
let browser;
|
34
|
-
try {
|
35
|
-
let launchParams = #{launch_params};
|
36
|
-
|
37
|
-
// Configure puppeteer debugging options
|
38
|
-
const debug = options.debug; delete options.debug;
|
39
|
-
if (typeof debug === 'object' && !!debug) {
|
40
|
-
if (debug.headless != undefined) { launchParams.headless = debug.headless; }
|
41
|
-
if (debug.devtools != undefined) { launchParams.devtools = debug.devtools; }
|
42
|
-
}
|
43
|
-
|
44
|
-
// Configure additional launch arguments
|
45
|
-
const args = options.launchArgs; delete options.launchArgs;
|
46
|
-
if (Array.isArray(args)) {
|
47
|
-
launchParams.args = launchParams.args.concat(args);
|
48
|
-
}
|
49
|
-
|
50
|
-
// Set executable path if given
|
51
|
-
const executablePath = options.executablePath; delete options.executablePath;
|
52
|
-
if (executablePath) {
|
53
|
-
launchParams.executablePath = executablePath;
|
54
|
-
}
|
55
|
-
|
56
|
-
// Launch the browser and create a page
|
57
|
-
browser = await puppeteer.launch(launchParams);
|
58
|
-
const page = await browser.newPage();
|
59
|
-
|
60
|
-
// Basic auth
|
61
|
-
const username = options.username; delete options.username
|
62
|
-
const password = options.password; delete options.password
|
63
|
-
if (username != undefined && password != undefined) {
|
64
|
-
await page.authenticate({ username, password });
|
65
|
-
}
|
66
|
-
|
67
|
-
// Set caching flag (if provided)
|
68
|
-
const cache = options.cache; delete options.cache;
|
69
|
-
if (cache != undefined) {
|
70
|
-
await page.setCacheEnabled(cache);
|
71
|
-
}
|
72
|
-
|
73
|
-
// Setup timeout option (if provided)
|
74
|
-
let request_options = {};
|
75
|
-
const timeout = options.timeout; delete options.timeout;
|
76
|
-
if (timeout != undefined) {
|
77
|
-
request_options.timeout = timeout;
|
78
|
-
}
|
79
|
-
|
80
|
-
// Setup viewport options (if provided)
|
81
|
-
const viewport = options.viewport; delete options.viewport;
|
82
|
-
if (viewport != undefined) {
|
83
|
-
await page.setViewport(viewport);
|
84
|
-
}
|
85
|
-
|
86
|
-
const waitUntil = options.waitUntil; delete options.waitUntil;
|
87
|
-
if (url_or_html.match(/^http/i)) {
|
88
|
-
// Request is for a URL, so request it
|
89
|
-
request_options.waitUntil = waitUntil || 'networkidle2';
|
90
|
-
await page.goto(url_or_html, request_options);
|
91
|
-
} else {
|
92
|
-
// Request is some HTML content. Use request interception to assign the body
|
93
|
-
request_options.waitUntil = waitUntil || 'networkidle0';
|
94
|
-
await page.setRequestInterception(true);
|
95
|
-
page.once('request', request => {
|
96
|
-
request.respond({ body: url_or_html });
|
97
|
-
// Reset the request interception
|
98
|
-
// (we only want to intercept the first request - ie our HTML)
|
99
|
-
page.on('request', request => request.continue());
|
100
|
-
});
|
101
|
-
const displayUrl = options.displayUrl; delete options.displayUrl;
|
102
|
-
await page.goto(displayUrl || 'http://example.com', request_options);
|
103
|
-
}
|
104
|
-
|
105
|
-
// If specified, emulate the media type
|
106
|
-
const emulateMedia = options.emulateMedia; delete options.emulateMedia;
|
107
|
-
if (emulateMedia != undefined) {
|
108
|
-
if (typeof page.emulateMediaType == 'function') {
|
109
|
-
await page.emulateMediaType(emulateMedia);
|
110
|
-
} else {
|
111
|
-
await page.emulateMedia(emulateMedia);
|
112
|
-
}
|
113
|
-
}
|
114
|
-
|
115
|
-
// If specified, evaluate script on the page
|
116
|
-
const executeScript = options.executeScript; delete options.executeScript;
|
117
|
-
if (executeScript != undefined) {
|
118
|
-
await page.evaluate(executeScript);
|
119
|
-
}
|
120
|
-
|
121
|
-
// If we're running puppeteer in headless mode, return the converted PDF
|
122
|
-
if (debug == undefined || (typeof debug === 'object' && (debug.headless == undefined || debug.headless))) {
|
123
|
-
return await page.#{convert_action}(options);
|
124
|
-
}
|
125
|
-
} finally {
|
126
|
-
if (browser) {
|
127
|
-
await browser.close();
|
128
|
-
}
|
129
|
-
}
|
130
|
-
}
|
131
|
-
FUNCTION
|
132
|
-
end
|
133
|
-
|
134
|
-
method :convert_pdf, convert_function('pdf')
|
135
|
-
method :convert_screenshot, convert_function('screenshot')
|
136
|
-
end
|
137
|
-
private_constant :Processor
|
138
|
-
|
139
22
|
DEFAULT_HEADER_TEMPLATE = "<div class='date text left'></div><div class='title text center'></div>"
|
140
23
|
DEFAULT_FOOTER_TEMPLATE = <<~HTML
|
141
24
|
<div class='url text left grow'></div>
|
@@ -151,8 +34,7 @@ class Grover
|
|
151
34
|
#
|
152
35
|
def initialize(url, options = {})
|
153
36
|
@url = url
|
154
|
-
@options =
|
155
|
-
|
37
|
+
@options = OptionsBuilder.new(options, url)
|
156
38
|
@root_path = @options.delete 'root_path'
|
157
39
|
@front_cover_path = @options.delete 'front_cover_path'
|
158
40
|
@back_cover_path = @options.delete 'back_cover_path'
|
@@ -165,10 +47,7 @@ class Grover
|
|
165
47
|
# @return [String] The resulting PDF data
|
166
48
|
#
|
167
49
|
def to_pdf(path = nil)
|
168
|
-
|
169
|
-
return unless result
|
170
|
-
|
171
|
-
result['data'].pack('C*')
|
50
|
+
processor.convert :pdf, @url, normalized_options(path: path)
|
172
51
|
end
|
173
52
|
|
174
53
|
#
|
@@ -180,11 +59,8 @@ class Grover
|
|
180
59
|
#
|
181
60
|
def screenshot(path: nil, format: nil)
|
182
61
|
options = normalized_options(path: path)
|
183
|
-
options['type'] = format if
|
184
|
-
|
185
|
-
return unless result
|
186
|
-
|
187
|
-
result['data'].pack('C*')
|
62
|
+
options['type'] = format if %w[png jpeg].include? format
|
63
|
+
processor.convert :screenshot, @url, options
|
188
64
|
end
|
189
65
|
|
190
66
|
#
|
@@ -194,7 +70,7 @@ class Grover
|
|
194
70
|
# @return [String] The resulting PNG data
|
195
71
|
#
|
196
72
|
def to_png(path = nil)
|
197
|
-
screenshot
|
73
|
+
screenshot path: path, format: 'png'
|
198
74
|
end
|
199
75
|
|
200
76
|
#
|
@@ -204,7 +80,7 @@ class Grover
|
|
204
80
|
# @return [String] The resulting JPEG data
|
205
81
|
#
|
206
82
|
def to_jpeg(path = nil)
|
207
|
-
screenshot
|
83
|
+
screenshot path: path, format: 'jpeg'
|
208
84
|
end
|
209
85
|
|
210
86
|
#
|
@@ -258,73 +134,6 @@ class Grover
|
|
258
134
|
Processor.new(root_path)
|
259
135
|
end
|
260
136
|
|
261
|
-
def combine_options(options)
|
262
|
-
combined = Utils.deep_stringify_keys Grover.configuration.options
|
263
|
-
Utils.deep_merge! combined, Utils.deep_stringify_keys(options)
|
264
|
-
Utils.deep_merge! combined, meta_options unless url_source?
|
265
|
-
|
266
|
-
fix_boolean_options! combined
|
267
|
-
fix_integer_options! combined
|
268
|
-
fix_float_options! combined
|
269
|
-
fix_array_options! combined
|
270
|
-
|
271
|
-
combined
|
272
|
-
end
|
273
|
-
|
274
|
-
#
|
275
|
-
# Extract out options from meta tags in the source - based on code from PDFKit project
|
276
|
-
#
|
277
|
-
def meta_options
|
278
|
-
meta_opts = {}
|
279
|
-
|
280
|
-
meta_tags.each do |meta|
|
281
|
-
tag_name = meta['name'] && meta['name'][/#{Grover.configuration.meta_tag_prefix}([a-z_-]+)/, 1]
|
282
|
-
next unless tag_name
|
283
|
-
|
284
|
-
Utils.deep_assign meta_opts, tag_name.split('-'), meta['content']
|
285
|
-
end
|
286
|
-
|
287
|
-
meta_opts
|
288
|
-
end
|
289
|
-
|
290
|
-
def meta_tags
|
291
|
-
Nokogiri::HTML(@url).xpath('//meta')
|
292
|
-
end
|
293
|
-
|
294
|
-
def url_source?
|
295
|
-
@url.match(/\Ahttp/i)
|
296
|
-
end
|
297
|
-
|
298
|
-
def fix_boolean_options!(options)
|
299
|
-
%w[display_header_footer print_background landscape prefer_css_page_size].each do |opt|
|
300
|
-
next unless options.key? opt
|
301
|
-
|
302
|
-
options[opt] = !FALSE_VALUES.include?(options[opt])
|
303
|
-
end
|
304
|
-
end
|
305
|
-
|
306
|
-
FALSE_VALUES = [nil, false, 0, '0', 'f', 'F', 'false', 'FALSE', 'off', 'OFF'].freeze
|
307
|
-
|
308
|
-
def fix_integer_options!(options)
|
309
|
-
['viewport.width', 'viewport.height'].each do |opt|
|
310
|
-
keys = opt.split('.')
|
311
|
-
Utils.deep_assign(options, keys, options.dig(*keys).to_i) if options.dig(*keys)
|
312
|
-
end
|
313
|
-
end
|
314
|
-
|
315
|
-
def fix_float_options!(options)
|
316
|
-
['viewport.device_scale_factor', 'scale'].each do |opt|
|
317
|
-
keys = opt.split('.')
|
318
|
-
Utils.deep_assign(options, keys, options.dig(*keys).to_f) if options.dig(*keys)
|
319
|
-
end
|
320
|
-
end
|
321
|
-
|
322
|
-
def fix_array_options!(options)
|
323
|
-
return unless options['launch_args'].is_a? String
|
324
|
-
|
325
|
-
options['launch_args'] = YAML.safe_load options['launch_args']
|
326
|
-
end
|
327
|
-
|
328
137
|
def normalized_options(path:)
|
329
138
|
normalized_options = Utils.normalize_object @options
|
330
139
|
normalized_options['path'] = path if path.is_a? ::String
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Grover
|
4
|
+
#
|
5
|
+
# Error classes for calling out to Puppeteer NodeJS library
|
6
|
+
#
|
7
|
+
# Heavily based on the Schmooze library https://github.com/Shopify/schmooze
|
8
|
+
#
|
9
|
+
Error = Class.new(StandardError)
|
10
|
+
DependencyError = Class.new(Error)
|
11
|
+
module JavaScript # rubocop:disable Style/Documentation
|
12
|
+
Error = Class.new(::Grover::Error)
|
13
|
+
UnknownError = Class.new(Error)
|
14
|
+
def self.const_missing(name)
|
15
|
+
const_set name, Class.new(Error)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -17,13 +17,13 @@ class Grover
|
|
17
17
|
|
18
18
|
def self.translate_relative_paths(html, root_url)
|
19
19
|
# Try out this regexp using rubular http://rubular.com/r/hiAxBNX7KE
|
20
|
-
html.gsub(%r{(href|src)=(['"])/([^/"']([
|
20
|
+
html.gsub(%r{(href|src)=(['"])/([^/"']([^"']*|[^"']*))?['"]}, "\\1=\\2#{root_url}\\3\\2")
|
21
21
|
end
|
22
22
|
private_class_method :translate_relative_paths
|
23
23
|
|
24
24
|
def self.translate_relative_protocols(body, protocol)
|
25
25
|
# Try out this regexp using rubular http://rubular.com/r/0Ohk0wFYxV
|
26
|
-
body.gsub(%r{(href|src)=(['"])//([
|
26
|
+
body.gsub(%r{(href|src)=(['"])//([^"']*|[^"']*)['"]}, "\\1=\\2#{protocol}://\\3\\2")
|
27
27
|
end
|
28
28
|
private_class_method :translate_relative_protocols
|
29
29
|
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
// Setup imports
|
2
|
+
try {
|
3
|
+
const Module = require('module');
|
4
|
+
// resolve puppeteer from the CWD instead of where this script is located
|
5
|
+
var puppeteer = require(require.resolve('puppeteer', { paths: Module._nodeModulePaths(process.cwd()) }));
|
6
|
+
} catch (e) {
|
7
|
+
process.stdout.write(JSON.stringify(['err', e.toString()]));
|
8
|
+
process.stdout.write("\n");
|
9
|
+
process.exit(1);
|
10
|
+
}
|
11
|
+
process.stdout.write("[\"ok\"]\n");
|
12
|
+
|
13
|
+
const _processPage = (async (convertAction, urlOrHtml, options) => {
|
14
|
+
let browser;
|
15
|
+
try {
|
16
|
+
const launchParams = {
|
17
|
+
args: process.env.GROVER_NO_SANDBOX === 'true' ? ['--no-sandbox', '--disable-setuid-sandbox'] : []
|
18
|
+
};
|
19
|
+
|
20
|
+
// Configure puppeteer debugging options
|
21
|
+
const debug = options.debug; delete options.debug;
|
22
|
+
if (typeof debug === 'object' && !!debug) {
|
23
|
+
if (debug.headless !== undefined) { launchParams.headless = debug.headless; }
|
24
|
+
if (debug.devtools !== undefined) { launchParams.devtools = debug.devtools; }
|
25
|
+
}
|
26
|
+
|
27
|
+
// Configure additional launch arguments
|
28
|
+
const args = options.launchArgs; delete options.launchArgs;
|
29
|
+
if (Array.isArray(args)) {
|
30
|
+
launchParams.args = launchParams.args.concat(args);
|
31
|
+
}
|
32
|
+
|
33
|
+
// Set executable path if given
|
34
|
+
const executablePath = options.executablePath; delete options.executablePath;
|
35
|
+
if (executablePath) {
|
36
|
+
launchParams.executablePath = executablePath;
|
37
|
+
}
|
38
|
+
|
39
|
+
// Launch the browser and create a page
|
40
|
+
browser = await puppeteer.launch(launchParams);
|
41
|
+
const page = await browser.newPage();
|
42
|
+
|
43
|
+
// Basic auth
|
44
|
+
const username = options.username; delete options.username
|
45
|
+
const password = options.password; delete options.password
|
46
|
+
if (username !== undefined && password !== undefined) {
|
47
|
+
await page.authenticate({ username, password });
|
48
|
+
}
|
49
|
+
|
50
|
+
// Setting cookies
|
51
|
+
const cookies = options.cookies; delete options.cookies
|
52
|
+
if (Array.isArray(cookies)) {
|
53
|
+
await page.setCookie(...cookies);
|
54
|
+
}
|
55
|
+
|
56
|
+
// Set caching flag (if provided)
|
57
|
+
const cache = options.cache; delete options.cache;
|
58
|
+
if (cache !== undefined) {
|
59
|
+
await page.setCacheEnabled(cache);
|
60
|
+
}
|
61
|
+
|
62
|
+
// Setup timeout option (if provided)
|
63
|
+
let requestOptions = {};
|
64
|
+
const timeout = options.timeout; delete options.timeout;
|
65
|
+
if (timeout !== undefined) {
|
66
|
+
requestOptions.timeout = timeout;
|
67
|
+
}
|
68
|
+
|
69
|
+
// Setup viewport options (if provided)
|
70
|
+
const viewport = options.viewport; delete options.viewport;
|
71
|
+
if (viewport !== undefined) {
|
72
|
+
await page.setViewport(viewport);
|
73
|
+
}
|
74
|
+
|
75
|
+
const waitUntil = options.waitUntil; delete options.waitUntil;
|
76
|
+
if (urlOrHtml.match(/^http/i)) {
|
77
|
+
// Request is for a URL, so request it
|
78
|
+
requestOptions.waitUntil = waitUntil || 'networkidle2';
|
79
|
+
await page.goto(urlOrHtml, requestOptions);
|
80
|
+
} else {
|
81
|
+
// Request is some HTML content. Use request interception to assign the body
|
82
|
+
requestOptions.waitUntil = waitUntil || 'networkidle0';
|
83
|
+
await page.setRequestInterception(true);
|
84
|
+
page.once('request', request => {
|
85
|
+
request.respond({ body: urlOrHtml });
|
86
|
+
// Reset the request interception
|
87
|
+
// (we only want to intercept the first request - ie our HTML)
|
88
|
+
page.on('request', request => request.continue());
|
89
|
+
});
|
90
|
+
const displayUrl = options.displayUrl; delete options.displayUrl;
|
91
|
+
await page.goto(displayUrl || 'http://example.com', requestOptions);
|
92
|
+
}
|
93
|
+
|
94
|
+
// If specified, emulate the media type
|
95
|
+
const emulateMedia = options.emulateMedia; delete options.emulateMedia;
|
96
|
+
if (emulateMedia !== undefined) {
|
97
|
+
if (typeof page.emulateMediaType == 'function') {
|
98
|
+
await page.emulateMediaType(emulateMedia);
|
99
|
+
} else {
|
100
|
+
await page.emulateMedia(emulateMedia);
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
// If specified, evaluate script on the page
|
105
|
+
const executeScript = options.executeScript; delete options.executeScript;
|
106
|
+
if (executeScript !== undefined) {
|
107
|
+
await page.evaluate(executeScript);
|
108
|
+
}
|
109
|
+
|
110
|
+
// If specified, wait for selector
|
111
|
+
const waitForSelector = options.waitForSelector; delete options.waitForSelector;
|
112
|
+
const waitForSelectorOptions = options.waitForSelectorOptions; delete options.waitForSelectorOptions;
|
113
|
+
if (waitForSelector !== undefined) {
|
114
|
+
await page.waitForSelector(waitForSelector, waitForSelectorOptions)
|
115
|
+
}
|
116
|
+
|
117
|
+
// If we're running puppeteer in headless mode, return the converted PDF
|
118
|
+
if (debug === undefined || (typeof debug === 'object' && (debug.headless === undefined || debug.headless))) {
|
119
|
+
return await page[convertAction](options);
|
120
|
+
}
|
121
|
+
} finally {
|
122
|
+
if (browser) {
|
123
|
+
await browser.close();
|
124
|
+
}
|
125
|
+
}
|
126
|
+
});
|
127
|
+
|
128
|
+
function _handleError(error) {
|
129
|
+
if (error instanceof Error) {
|
130
|
+
process.stdout.write(
|
131
|
+
JSON.stringify(['err', error.toString().replace(new RegExp('^' + error.name + ': '), ''), error.name])
|
132
|
+
);
|
133
|
+
} else {
|
134
|
+
process.stdout.write(JSON.stringify(['err', error.toString()]));
|
135
|
+
}
|
136
|
+
process.stdout.write("\n");
|
137
|
+
}
|
138
|
+
|
139
|
+
// Interface for communicating between Ruby processor and Node processor
|
140
|
+
require('readline').createInterface({
|
141
|
+
input: process.stdin,
|
142
|
+
terminal: false,
|
143
|
+
}).on('line', function(line) {
|
144
|
+
try {
|
145
|
+
Promise.resolve(_processPage.apply(null, JSON.parse(line)))
|
146
|
+
.then(function (result) {
|
147
|
+
process.stdout.write(JSON.stringify(['ok', result]));
|
148
|
+
process.stdout.write("\n");
|
149
|
+
})
|
150
|
+
.catch(_handleError);
|
151
|
+
} catch(error) {
|
152
|
+
_handleError(error);
|
153
|
+
}
|
154
|
+
});
|
data/lib/grover/middleware.rb
CHANGED
@@ -97,9 +97,15 @@ class Grover
|
|
97
97
|
def create_grover_for_response(response)
|
98
98
|
body = response.respond_to?(:body) ? response.body : response.join
|
99
99
|
body = body.join if body.is_a?(Array)
|
100
|
-
|
101
100
|
body = HTMLPreprocessor.process body, root_url, protocol
|
102
|
-
|
101
|
+
|
102
|
+
options = { display_url: request_url }
|
103
|
+
cookies = Rack::Utils.parse_cookies(env).map do |name, value|
|
104
|
+
{ name: name, value: Rack::Utils.escape(value), domain: env['HTTP_HOST'] }
|
105
|
+
end
|
106
|
+
options[:cookies] = cookies if cookies.any?
|
107
|
+
|
108
|
+
Grover.new(body, options)
|
103
109
|
end
|
104
110
|
|
105
111
|
def add_cover_content(grover)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'grover/utils'
|
4
|
+
require 'grover/options_fixer'
|
5
|
+
|
6
|
+
class Grover
|
7
|
+
#
|
8
|
+
# Build options from Grover.configuration, meta_options, and passed-in options
|
9
|
+
#
|
10
|
+
class OptionsBuilder < Hash
|
11
|
+
def initialize(options, url)
|
12
|
+
@url = url
|
13
|
+
combined = grover_configuration
|
14
|
+
Utils.deep_merge! combined, Utils.deep_stringify_keys(options)
|
15
|
+
Utils.deep_merge! combined, meta_options unless url_source?
|
16
|
+
|
17
|
+
update OptionsFixer.new(combined).run
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def grover_configuration
|
23
|
+
Utils.deep_stringify_keys Grover.configuration.options
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# Extract out options from meta tags in the source - based on code from PDFKit project
|
28
|
+
#
|
29
|
+
def meta_options
|
30
|
+
meta_opts = {}
|
31
|
+
|
32
|
+
meta_tags.each do |meta|
|
33
|
+
tag_name = meta['name'] && meta['name'][/#{Grover.configuration.meta_tag_prefix}([a-z_-]+)/, 1]
|
34
|
+
next unless tag_name
|
35
|
+
|
36
|
+
Utils.deep_assign meta_opts, tag_name.split('-'), meta['content']
|
37
|
+
end
|
38
|
+
|
39
|
+
meta_opts
|
40
|
+
end
|
41
|
+
|
42
|
+
def meta_tags
|
43
|
+
Nokogiri::HTML(@url).xpath('//meta')
|
44
|
+
end
|
45
|
+
|
46
|
+
def url_source?
|
47
|
+
@url.match(/\Ahttp/i)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'grover/utils'
|
4
|
+
|
5
|
+
class Grover
|
6
|
+
#
|
7
|
+
# Convert string option values to boolean, numeric, and array literals
|
8
|
+
#
|
9
|
+
class OptionsFixer
|
10
|
+
FALSE_VALUES = [nil, false, 0, '0', 'f', 'F', 'false', 'FALSE', 'off', 'OFF'].freeze
|
11
|
+
|
12
|
+
def initialize(options)
|
13
|
+
@options = options
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
fix_boolean_options!
|
18
|
+
fix_integer_options!
|
19
|
+
fix_float_options!
|
20
|
+
fix_array_options!
|
21
|
+
@options
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def fix_options!(*option_paths)
|
27
|
+
option_paths.each do |option_path|
|
28
|
+
keys = option_path.split '.'
|
29
|
+
value = @options.dig(*keys)
|
30
|
+
Utils.deep_assign(@options, keys, yield(value)) if value
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def fix_boolean_options!
|
35
|
+
fix_options!(
|
36
|
+
'display_header_footer', 'full_page', 'landscape', 'omit_background', 'prefer_css_page_size',
|
37
|
+
'print_background', 'viewport.has_touch', 'viewport.is_landscape', 'viewport.is_mobile'
|
38
|
+
) { |value| !FALSE_VALUES.include?(value) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def fix_integer_options!
|
42
|
+
fix_options!(
|
43
|
+
'viewport.height', 'viewport.width',
|
44
|
+
&:to_i
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
def fix_float_options!
|
49
|
+
fix_options!(
|
50
|
+
'clip.height', 'clip.width', 'clip.x', 'clip.y', 'quality', 'scale', 'viewport.device_scale_factor',
|
51
|
+
&:to_f
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
def fix_array_options!
|
56
|
+
fix_options!('launch_args') do |value|
|
57
|
+
value.is_a?(String) ? YAML.safe_load(value) : value
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'open3'
|
5
|
+
|
6
|
+
class Grover
|
7
|
+
#
|
8
|
+
# Processor helper class for calling out to Puppeteer NodeJS library
|
9
|
+
#
|
10
|
+
# Heavily based on the Schmooze library https://github.com/Shopify/schmooze
|
11
|
+
#
|
12
|
+
class Processor
|
13
|
+
def initialize(app_root)
|
14
|
+
@app_root = app_root
|
15
|
+
end
|
16
|
+
|
17
|
+
def convert(method, url_or_html, options)
|
18
|
+
spawn_process
|
19
|
+
ensure_packages_are_initiated
|
20
|
+
result = call_js_method method, url_or_html, options
|
21
|
+
return unless result
|
22
|
+
|
23
|
+
result['data'].pack('C*')
|
24
|
+
ensure
|
25
|
+
cleanup_process if stdin
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
attr_reader :app_root, :stdin, :stdout, :stderr, :wait_thr
|
31
|
+
|
32
|
+
def spawn_process
|
33
|
+
@stdin, @stdout, @stderr, @wait_thr = Open3.popen3(
|
34
|
+
'node',
|
35
|
+
File.expand_path(File.join(__dir__, 'js/processor.js')),
|
36
|
+
chdir: app_root
|
37
|
+
)
|
38
|
+
end
|
39
|
+
|
40
|
+
def ensure_packages_are_initiated
|
41
|
+
input = stdout.gets
|
42
|
+
raise Grover::Error, "Failed to instantiate worker process:\n#{stderr.read}" if input.nil?
|
43
|
+
|
44
|
+
result = JSON.parse(input)
|
45
|
+
return if result[0] == 'ok'
|
46
|
+
|
47
|
+
cleanup_process
|
48
|
+
parse_package_error result[1]
|
49
|
+
end
|
50
|
+
|
51
|
+
def parse_package_error(error_message) # rubocop:disable Metrics/MethodLength
|
52
|
+
package_name = error_message[/^Error: Cannot find module '(.*)'$/, 1]
|
53
|
+
raise Grover::Error, error_message unless package_name
|
54
|
+
|
55
|
+
begin
|
56
|
+
%w[dependencies devDependencies].each do |key|
|
57
|
+
next unless package_json.key?(key) && package_json[key].key?(package_name)
|
58
|
+
|
59
|
+
raise Grover::DependencyError, Utils.squish(<<~ERROR)
|
60
|
+
Cannot find module '#{package_name}'.
|
61
|
+
The module was found in '#{package_json_path}' however, please run 'npm install' from '#{app_root}'
|
62
|
+
ERROR
|
63
|
+
end
|
64
|
+
rescue Errno::ENOENT # rubocop:disable Lint/SuppressedException
|
65
|
+
end
|
66
|
+
raise Grover::DependencyError, Utils.squish(<<~ERROR)
|
67
|
+
Cannot find module '#{package_name}'. You need to add it to '#{package_json_path}' and run 'npm install'
|
68
|
+
ERROR
|
69
|
+
end
|
70
|
+
|
71
|
+
def package_json_path
|
72
|
+
@package_json_path ||= File.join(app_root, 'package.json')
|
73
|
+
end
|
74
|
+
|
75
|
+
def package_json
|
76
|
+
@package_json ||= JSON.parse(File.read(package_json_path))
|
77
|
+
end
|
78
|
+
|
79
|
+
def call_js_method(method, url_or_html, options) # rubocop:disable Metrics/MethodLength
|
80
|
+
stdin.puts JSON.dump([method, url_or_html, options])
|
81
|
+
input = stdout.gets
|
82
|
+
raise Errno::EPIPE, "Can't read from worker" if input.nil?
|
83
|
+
|
84
|
+
status, message, error_class = JSON.parse(input)
|
85
|
+
|
86
|
+
if status == 'ok'
|
87
|
+
message
|
88
|
+
elsif error_class.nil?
|
89
|
+
raise Grover::JavaScript::UnknownError, message
|
90
|
+
else
|
91
|
+
raise Grover::JavaScript.const_get(error_class, false), message
|
92
|
+
end
|
93
|
+
rescue Errno::EPIPE, IOError
|
94
|
+
raise Grover::Error, "Worker process failed:\n#{stderr.read}"
|
95
|
+
end
|
96
|
+
|
97
|
+
def cleanup_process
|
98
|
+
stdin.close
|
99
|
+
stdout.close
|
100
|
+
stderr.close
|
101
|
+
wait_thr.join
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
data/lib/grover/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grover
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.12.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Bromwich
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: combine_pdf
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: schmooze
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0.2'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0.2'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: mini_magick
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -157,6 +143,9 @@ dependencies:
|
|
157
143
|
- - "~>"
|
158
144
|
- !ruby/object:Gem::Version
|
159
145
|
version: '0.17'
|
146
|
+
- - "<"
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
version: '0.18'
|
160
149
|
type: :development
|
161
150
|
prerelease: false
|
162
151
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -164,6 +153,9 @@ dependencies:
|
|
164
153
|
- - "~>"
|
165
154
|
- !ruby/object:Gem::Version
|
166
155
|
version: '0.17'
|
156
|
+
- - "<"
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0.18'
|
167
159
|
description: Transform HTML into PDF/PNG/JPEG using Google Puppeteer/Chromium
|
168
160
|
email:
|
169
161
|
- abromwich@studiosity.com
|
@@ -175,8 +167,13 @@ files:
|
|
175
167
|
- lib/active_support_ext/object/duplicable.rb
|
176
168
|
- lib/grover.rb
|
177
169
|
- lib/grover/configuration.rb
|
170
|
+
- lib/grover/errors.rb
|
178
171
|
- lib/grover/html_preprocessor.rb
|
172
|
+
- lib/grover/js/processor.js
|
179
173
|
- lib/grover/middleware.rb
|
174
|
+
- lib/grover/options_builder.rb
|
175
|
+
- lib/grover/options_fixer.rb
|
176
|
+
- lib/grover/processor.rb
|
180
177
|
- lib/grover/utils.rb
|
181
178
|
- lib/grover/version.rb
|
182
179
|
homepage: https://github.com/Studiosity/grover
|
@@ -198,8 +195,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
198
195
|
- !ruby/object:Gem::Version
|
199
196
|
version: '0'
|
200
197
|
requirements: []
|
201
|
-
|
202
|
-
rubygems_version: 2.7.6.2
|
198
|
+
rubygems_version: 3.0.6
|
203
199
|
signing_key:
|
204
200
|
specification_version: 4
|
205
201
|
summary: A Ruby gem to transform HTML into PDF, PNG or JPEG by wrapping the NodeJS
|