grover 0.11.2 → 0.12.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/grover.rb +9 -200
- data/lib/grover/errors.rb +18 -0
- data/lib/grover/html_preprocessor.rb +2 -2
- data/lib/grover/js/processor.js +154 -0
- data/lib/grover/middleware.rb +8 -2
- data/lib/grover/options_builder.rb +50 -0
- data/lib/grover/options_fixer.rb +61 -0
- data/lib/grover/processor.rb +104 -0
- data/lib/grover/version.rb +1 -1
- metadata +14 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7376874743ddb5f08d3fc68c02f84a72eb6cbe98c63306b2ce2a5f57d37867c9
|
4
|
+
data.tar.gz: 546007fe354d88c117f70469c8cf80ba88fd22415ebb350aa79512c64592be3f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 010bb2d49e78af9cfe56cc0480d803f217dd1f644d99d36640fad060e8a70a2be7b4d422057a761a5d0710347649dd44d3da9efde553b4ff23bff62838973af0
|
7
|
+
data.tar.gz: 5879e39b9d00a6b1c5904a4e71cdf00fc695caa29de7b85ca4dbcbf7ec857d87822c2be42ded362efd3ef2466f8c5db338182e8115cbe6b4b822a6114d5e50d8
|
data/lib/grover.rb
CHANGED
@@ -5,137 +5,20 @@ require 'grover/version'
|
|
5
5
|
require 'grover/utils'
|
6
6
|
require 'active_support_ext/object/deep_dup' unless defined?(ActiveSupport)
|
7
7
|
|
8
|
+
require 'grover/errors'
|
8
9
|
require 'grover/html_preprocessor'
|
9
10
|
require 'grover/middleware'
|
10
11
|
require 'grover/configuration'
|
12
|
+
require 'grover/options_builder'
|
13
|
+
require 'grover/processor'
|
11
14
|
|
12
15
|
require 'nokogiri'
|
13
|
-
require 'schmooze'
|
14
16
|
require 'yaml'
|
15
17
|
|
16
18
|
#
|
17
19
|
# Grover interface for converting HTML to PDF
|
18
20
|
#
|
19
21
|
class Grover
|
20
|
-
#
|
21
|
-
# Processor helper class for calling out to Puppeteer NodeJS library
|
22
|
-
#
|
23
|
-
class Processor < Schmooze::Base
|
24
|
-
dependencies puppeteer: 'puppeteer'
|
25
|
-
|
26
|
-
def self.launch_params
|
27
|
-
ENV['GROVER_NO_SANDBOX'] == 'true' ? "{args: ['--no-sandbox', '--disable-setuid-sandbox']}" : '{args: []}'
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.convert_function(convert_action)
|
31
|
-
<<~FUNCTION
|
32
|
-
async (url_or_html, options) => {
|
33
|
-
let browser;
|
34
|
-
try {
|
35
|
-
let launchParams = #{launch_params};
|
36
|
-
|
37
|
-
// Configure puppeteer debugging options
|
38
|
-
const debug = options.debug; delete options.debug;
|
39
|
-
if (typeof debug === 'object' && !!debug) {
|
40
|
-
if (debug.headless != undefined) { launchParams.headless = debug.headless; }
|
41
|
-
if (debug.devtools != undefined) { launchParams.devtools = debug.devtools; }
|
42
|
-
}
|
43
|
-
|
44
|
-
// Configure additional launch arguments
|
45
|
-
const args = options.launchArgs; delete options.launchArgs;
|
46
|
-
if (Array.isArray(args)) {
|
47
|
-
launchParams.args = launchParams.args.concat(args);
|
48
|
-
}
|
49
|
-
|
50
|
-
// Set executable path if given
|
51
|
-
const executablePath = options.executablePath; delete options.executablePath;
|
52
|
-
if (executablePath) {
|
53
|
-
launchParams.executablePath = executablePath;
|
54
|
-
}
|
55
|
-
|
56
|
-
// Launch the browser and create a page
|
57
|
-
browser = await puppeteer.launch(launchParams);
|
58
|
-
const page = await browser.newPage();
|
59
|
-
|
60
|
-
// Basic auth
|
61
|
-
const username = options.username; delete options.username
|
62
|
-
const password = options.password; delete options.password
|
63
|
-
if (username != undefined && password != undefined) {
|
64
|
-
await page.authenticate({ username, password });
|
65
|
-
}
|
66
|
-
|
67
|
-
// Set caching flag (if provided)
|
68
|
-
const cache = options.cache; delete options.cache;
|
69
|
-
if (cache != undefined) {
|
70
|
-
await page.setCacheEnabled(cache);
|
71
|
-
}
|
72
|
-
|
73
|
-
// Setup timeout option (if provided)
|
74
|
-
let request_options = {};
|
75
|
-
const timeout = options.timeout; delete options.timeout;
|
76
|
-
if (timeout != undefined) {
|
77
|
-
request_options.timeout = timeout;
|
78
|
-
}
|
79
|
-
|
80
|
-
// Setup viewport options (if provided)
|
81
|
-
const viewport = options.viewport; delete options.viewport;
|
82
|
-
if (viewport != undefined) {
|
83
|
-
await page.setViewport(viewport);
|
84
|
-
}
|
85
|
-
|
86
|
-
const waitUntil = options.waitUntil; delete options.waitUntil;
|
87
|
-
if (url_or_html.match(/^http/i)) {
|
88
|
-
// Request is for a URL, so request it
|
89
|
-
request_options.waitUntil = waitUntil || 'networkidle2';
|
90
|
-
await page.goto(url_or_html, request_options);
|
91
|
-
} else {
|
92
|
-
// Request is some HTML content. Use request interception to assign the body
|
93
|
-
request_options.waitUntil = waitUntil || 'networkidle0';
|
94
|
-
await page.setRequestInterception(true);
|
95
|
-
page.once('request', request => {
|
96
|
-
request.respond({ body: url_or_html });
|
97
|
-
// Reset the request interception
|
98
|
-
// (we only want to intercept the first request - ie our HTML)
|
99
|
-
page.on('request', request => request.continue());
|
100
|
-
});
|
101
|
-
const displayUrl = options.displayUrl; delete options.displayUrl;
|
102
|
-
await page.goto(displayUrl || 'http://example.com', request_options);
|
103
|
-
}
|
104
|
-
|
105
|
-
// If specified, emulate the media type
|
106
|
-
const emulateMedia = options.emulateMedia; delete options.emulateMedia;
|
107
|
-
if (emulateMedia != undefined) {
|
108
|
-
if (typeof page.emulateMediaType == 'function') {
|
109
|
-
await page.emulateMediaType(emulateMedia);
|
110
|
-
} else {
|
111
|
-
await page.emulateMedia(emulateMedia);
|
112
|
-
}
|
113
|
-
}
|
114
|
-
|
115
|
-
// If specified, evaluate script on the page
|
116
|
-
const executeScript = options.executeScript; delete options.executeScript;
|
117
|
-
if (executeScript != undefined) {
|
118
|
-
await page.evaluate(executeScript);
|
119
|
-
}
|
120
|
-
|
121
|
-
// If we're running puppeteer in headless mode, return the converted PDF
|
122
|
-
if (debug == undefined || (typeof debug === 'object' && (debug.headless == undefined || debug.headless))) {
|
123
|
-
return await page.#{convert_action}(options);
|
124
|
-
}
|
125
|
-
} finally {
|
126
|
-
if (browser) {
|
127
|
-
await browser.close();
|
128
|
-
}
|
129
|
-
}
|
130
|
-
}
|
131
|
-
FUNCTION
|
132
|
-
end
|
133
|
-
|
134
|
-
method :convert_pdf, convert_function('pdf')
|
135
|
-
method :convert_screenshot, convert_function('screenshot')
|
136
|
-
end
|
137
|
-
private_constant :Processor
|
138
|
-
|
139
22
|
DEFAULT_HEADER_TEMPLATE = "<div class='date text left'></div><div class='title text center'></div>"
|
140
23
|
DEFAULT_FOOTER_TEMPLATE = <<~HTML
|
141
24
|
<div class='url text left grow'></div>
|
@@ -151,8 +34,7 @@ class Grover
|
|
151
34
|
#
|
152
35
|
def initialize(url, options = {})
|
153
36
|
@url = url
|
154
|
-
@options =
|
155
|
-
|
37
|
+
@options = OptionsBuilder.new(options, url)
|
156
38
|
@root_path = @options.delete 'root_path'
|
157
39
|
@front_cover_path = @options.delete 'front_cover_path'
|
158
40
|
@back_cover_path = @options.delete 'back_cover_path'
|
@@ -165,10 +47,7 @@ class Grover
|
|
165
47
|
# @return [String] The resulting PDF data
|
166
48
|
#
|
167
49
|
def to_pdf(path = nil)
|
168
|
-
|
169
|
-
return unless result
|
170
|
-
|
171
|
-
result['data'].pack('C*')
|
50
|
+
processor.convert :pdf, @url, normalized_options(path: path)
|
172
51
|
end
|
173
52
|
|
174
53
|
#
|
@@ -180,11 +59,8 @@ class Grover
|
|
180
59
|
#
|
181
60
|
def screenshot(path: nil, format: nil)
|
182
61
|
options = normalized_options(path: path)
|
183
|
-
options['type'] = format if
|
184
|
-
|
185
|
-
return unless result
|
186
|
-
|
187
|
-
result['data'].pack('C*')
|
62
|
+
options['type'] = format if %w[png jpeg].include? format
|
63
|
+
processor.convert :screenshot, @url, options
|
188
64
|
end
|
189
65
|
|
190
66
|
#
|
@@ -194,7 +70,7 @@ class Grover
|
|
194
70
|
# @return [String] The resulting PNG data
|
195
71
|
#
|
196
72
|
def to_png(path = nil)
|
197
|
-
screenshot
|
73
|
+
screenshot path: path, format: 'png'
|
198
74
|
end
|
199
75
|
|
200
76
|
#
|
@@ -204,7 +80,7 @@ class Grover
|
|
204
80
|
# @return [String] The resulting JPEG data
|
205
81
|
#
|
206
82
|
def to_jpeg(path = nil)
|
207
|
-
screenshot
|
83
|
+
screenshot path: path, format: 'jpeg'
|
208
84
|
end
|
209
85
|
|
210
86
|
#
|
@@ -258,73 +134,6 @@ class Grover
|
|
258
134
|
Processor.new(root_path)
|
259
135
|
end
|
260
136
|
|
261
|
-
def combine_options(options)
|
262
|
-
combined = Utils.deep_stringify_keys Grover.configuration.options
|
263
|
-
Utils.deep_merge! combined, Utils.deep_stringify_keys(options)
|
264
|
-
Utils.deep_merge! combined, meta_options unless url_source?
|
265
|
-
|
266
|
-
fix_boolean_options! combined
|
267
|
-
fix_integer_options! combined
|
268
|
-
fix_float_options! combined
|
269
|
-
fix_array_options! combined
|
270
|
-
|
271
|
-
combined
|
272
|
-
end
|
273
|
-
|
274
|
-
#
|
275
|
-
# Extract out options from meta tags in the source - based on code from PDFKit project
|
276
|
-
#
|
277
|
-
def meta_options
|
278
|
-
meta_opts = {}
|
279
|
-
|
280
|
-
meta_tags.each do |meta|
|
281
|
-
tag_name = meta['name'] && meta['name'][/#{Grover.configuration.meta_tag_prefix}([a-z_-]+)/, 1]
|
282
|
-
next unless tag_name
|
283
|
-
|
284
|
-
Utils.deep_assign meta_opts, tag_name.split('-'), meta['content']
|
285
|
-
end
|
286
|
-
|
287
|
-
meta_opts
|
288
|
-
end
|
289
|
-
|
290
|
-
def meta_tags
|
291
|
-
Nokogiri::HTML(@url).xpath('//meta')
|
292
|
-
end
|
293
|
-
|
294
|
-
def url_source?
|
295
|
-
@url.match(/\Ahttp/i)
|
296
|
-
end
|
297
|
-
|
298
|
-
def fix_boolean_options!(options)
|
299
|
-
%w[display_header_footer print_background landscape prefer_css_page_size].each do |opt|
|
300
|
-
next unless options.key? opt
|
301
|
-
|
302
|
-
options[opt] = !FALSE_VALUES.include?(options[opt])
|
303
|
-
end
|
304
|
-
end
|
305
|
-
|
306
|
-
FALSE_VALUES = [nil, false, 0, '0', 'f', 'F', 'false', 'FALSE', 'off', 'OFF'].freeze
|
307
|
-
|
308
|
-
def fix_integer_options!(options)
|
309
|
-
['viewport.width', 'viewport.height'].each do |opt|
|
310
|
-
keys = opt.split('.')
|
311
|
-
Utils.deep_assign(options, keys, options.dig(*keys).to_i) if options.dig(*keys)
|
312
|
-
end
|
313
|
-
end
|
314
|
-
|
315
|
-
def fix_float_options!(options)
|
316
|
-
['viewport.device_scale_factor', 'scale'].each do |opt|
|
317
|
-
keys = opt.split('.')
|
318
|
-
Utils.deep_assign(options, keys, options.dig(*keys).to_f) if options.dig(*keys)
|
319
|
-
end
|
320
|
-
end
|
321
|
-
|
322
|
-
def fix_array_options!(options)
|
323
|
-
return unless options['launch_args'].is_a? String
|
324
|
-
|
325
|
-
options['launch_args'] = YAML.safe_load options['launch_args']
|
326
|
-
end
|
327
|
-
|
328
137
|
def normalized_options(path:)
|
329
138
|
normalized_options = Utils.normalize_object @options
|
330
139
|
normalized_options['path'] = path if path.is_a? ::String
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Grover
|
4
|
+
#
|
5
|
+
# Error classes for calling out to Puppeteer NodeJS library
|
6
|
+
#
|
7
|
+
# Heavily based on the Schmooze library https://github.com/Shopify/schmooze
|
8
|
+
#
|
9
|
+
Error = Class.new(StandardError)
|
10
|
+
DependencyError = Class.new(Error)
|
11
|
+
module JavaScript # rubocop:disable Style/Documentation
|
12
|
+
Error = Class.new(::Grover::Error)
|
13
|
+
UnknownError = Class.new(Error)
|
14
|
+
def self.const_missing(name)
|
15
|
+
const_set name, Class.new(Error)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -17,13 +17,13 @@ class Grover
|
|
17
17
|
|
18
18
|
def self.translate_relative_paths(html, root_url)
|
19
19
|
# Try out this regexp using rubular http://rubular.com/r/hiAxBNX7KE
|
20
|
-
html.gsub(%r{(href|src)=(['"])/([^/"']([
|
20
|
+
html.gsub(%r{(href|src)=(['"])/([^/"']([^"']*|[^"']*))?['"]}, "\\1=\\2#{root_url}\\3\\2")
|
21
21
|
end
|
22
22
|
private_class_method :translate_relative_paths
|
23
23
|
|
24
24
|
def self.translate_relative_protocols(body, protocol)
|
25
25
|
# Try out this regexp using rubular http://rubular.com/r/0Ohk0wFYxV
|
26
|
-
body.gsub(%r{(href|src)=(['"])//([
|
26
|
+
body.gsub(%r{(href|src)=(['"])//([^"']*|[^"']*)['"]}, "\\1=\\2#{protocol}://\\3\\2")
|
27
27
|
end
|
28
28
|
private_class_method :translate_relative_protocols
|
29
29
|
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
// Setup imports
|
2
|
+
try {
|
3
|
+
const Module = require('module');
|
4
|
+
// resolve puppeteer from the CWD instead of where this script is located
|
5
|
+
var puppeteer = require(require.resolve('puppeteer', { paths: Module._nodeModulePaths(process.cwd()) }));
|
6
|
+
} catch (e) {
|
7
|
+
process.stdout.write(JSON.stringify(['err', e.toString()]));
|
8
|
+
process.stdout.write("\n");
|
9
|
+
process.exit(1);
|
10
|
+
}
|
11
|
+
process.stdout.write("[\"ok\"]\n");
|
12
|
+
|
13
|
+
const _processPage = (async (convertAction, urlOrHtml, options) => {
|
14
|
+
let browser;
|
15
|
+
try {
|
16
|
+
const launchParams = {
|
17
|
+
args: process.env.GROVER_NO_SANDBOX === 'true' ? ['--no-sandbox', '--disable-setuid-sandbox'] : []
|
18
|
+
};
|
19
|
+
|
20
|
+
// Configure puppeteer debugging options
|
21
|
+
const debug = options.debug; delete options.debug;
|
22
|
+
if (typeof debug === 'object' && !!debug) {
|
23
|
+
if (debug.headless !== undefined) { launchParams.headless = debug.headless; }
|
24
|
+
if (debug.devtools !== undefined) { launchParams.devtools = debug.devtools; }
|
25
|
+
}
|
26
|
+
|
27
|
+
// Configure additional launch arguments
|
28
|
+
const args = options.launchArgs; delete options.launchArgs;
|
29
|
+
if (Array.isArray(args)) {
|
30
|
+
launchParams.args = launchParams.args.concat(args);
|
31
|
+
}
|
32
|
+
|
33
|
+
// Set executable path if given
|
34
|
+
const executablePath = options.executablePath; delete options.executablePath;
|
35
|
+
if (executablePath) {
|
36
|
+
launchParams.executablePath = executablePath;
|
37
|
+
}
|
38
|
+
|
39
|
+
// Launch the browser and create a page
|
40
|
+
browser = await puppeteer.launch(launchParams);
|
41
|
+
const page = await browser.newPage();
|
42
|
+
|
43
|
+
// Basic auth
|
44
|
+
const username = options.username; delete options.username
|
45
|
+
const password = options.password; delete options.password
|
46
|
+
if (username !== undefined && password !== undefined) {
|
47
|
+
await page.authenticate({ username, password });
|
48
|
+
}
|
49
|
+
|
50
|
+
// Setting cookies
|
51
|
+
const cookies = options.cookies; delete options.cookies
|
52
|
+
if (Array.isArray(cookies)) {
|
53
|
+
await page.setCookie(...cookies);
|
54
|
+
}
|
55
|
+
|
56
|
+
// Set caching flag (if provided)
|
57
|
+
const cache = options.cache; delete options.cache;
|
58
|
+
if (cache !== undefined) {
|
59
|
+
await page.setCacheEnabled(cache);
|
60
|
+
}
|
61
|
+
|
62
|
+
// Setup timeout option (if provided)
|
63
|
+
let requestOptions = {};
|
64
|
+
const timeout = options.timeout; delete options.timeout;
|
65
|
+
if (timeout !== undefined) {
|
66
|
+
requestOptions.timeout = timeout;
|
67
|
+
}
|
68
|
+
|
69
|
+
// Setup viewport options (if provided)
|
70
|
+
const viewport = options.viewport; delete options.viewport;
|
71
|
+
if (viewport !== undefined) {
|
72
|
+
await page.setViewport(viewport);
|
73
|
+
}
|
74
|
+
|
75
|
+
const waitUntil = options.waitUntil; delete options.waitUntil;
|
76
|
+
if (urlOrHtml.match(/^http/i)) {
|
77
|
+
// Request is for a URL, so request it
|
78
|
+
requestOptions.waitUntil = waitUntil || 'networkidle2';
|
79
|
+
await page.goto(urlOrHtml, requestOptions);
|
80
|
+
} else {
|
81
|
+
// Request is some HTML content. Use request interception to assign the body
|
82
|
+
requestOptions.waitUntil = waitUntil || 'networkidle0';
|
83
|
+
await page.setRequestInterception(true);
|
84
|
+
page.once('request', request => {
|
85
|
+
request.respond({ body: urlOrHtml });
|
86
|
+
// Reset the request interception
|
87
|
+
// (we only want to intercept the first request - ie our HTML)
|
88
|
+
page.on('request', request => request.continue());
|
89
|
+
});
|
90
|
+
const displayUrl = options.displayUrl; delete options.displayUrl;
|
91
|
+
await page.goto(displayUrl || 'http://example.com', requestOptions);
|
92
|
+
}
|
93
|
+
|
94
|
+
// If specified, emulate the media type
|
95
|
+
const emulateMedia = options.emulateMedia; delete options.emulateMedia;
|
96
|
+
if (emulateMedia !== undefined) {
|
97
|
+
if (typeof page.emulateMediaType == 'function') {
|
98
|
+
await page.emulateMediaType(emulateMedia);
|
99
|
+
} else {
|
100
|
+
await page.emulateMedia(emulateMedia);
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
// If specified, evaluate script on the page
|
105
|
+
const executeScript = options.executeScript; delete options.executeScript;
|
106
|
+
if (executeScript !== undefined) {
|
107
|
+
await page.evaluate(executeScript);
|
108
|
+
}
|
109
|
+
|
110
|
+
// If specified, wait for selector
|
111
|
+
const waitForSelector = options.waitForSelector; delete options.waitForSelector;
|
112
|
+
const waitForSelectorOptions = options.waitForSelectorOptions; delete options.waitForSelectorOptions;
|
113
|
+
if (waitForSelector !== undefined) {
|
114
|
+
await page.waitForSelector(waitForSelector, waitForSelectorOptions)
|
115
|
+
}
|
116
|
+
|
117
|
+
// If we're running puppeteer in headless mode, return the converted PDF
|
118
|
+
if (debug === undefined || (typeof debug === 'object' && (debug.headless === undefined || debug.headless))) {
|
119
|
+
return await page[convertAction](options);
|
120
|
+
}
|
121
|
+
} finally {
|
122
|
+
if (browser) {
|
123
|
+
await browser.close();
|
124
|
+
}
|
125
|
+
}
|
126
|
+
});
|
127
|
+
|
128
|
+
function _handleError(error) {
|
129
|
+
if (error instanceof Error) {
|
130
|
+
process.stdout.write(
|
131
|
+
JSON.stringify(['err', error.toString().replace(new RegExp('^' + error.name + ': '), ''), error.name])
|
132
|
+
);
|
133
|
+
} else {
|
134
|
+
process.stdout.write(JSON.stringify(['err', error.toString()]));
|
135
|
+
}
|
136
|
+
process.stdout.write("\n");
|
137
|
+
}
|
138
|
+
|
139
|
+
// Interface for communicating between Ruby processor and Node processor
|
140
|
+
require('readline').createInterface({
|
141
|
+
input: process.stdin,
|
142
|
+
terminal: false,
|
143
|
+
}).on('line', function(line) {
|
144
|
+
try {
|
145
|
+
Promise.resolve(_processPage.apply(null, JSON.parse(line)))
|
146
|
+
.then(function (result) {
|
147
|
+
process.stdout.write(JSON.stringify(['ok', result]));
|
148
|
+
process.stdout.write("\n");
|
149
|
+
})
|
150
|
+
.catch(_handleError);
|
151
|
+
} catch(error) {
|
152
|
+
_handleError(error);
|
153
|
+
}
|
154
|
+
});
|
data/lib/grover/middleware.rb
CHANGED
@@ -97,9 +97,15 @@ class Grover
|
|
97
97
|
def create_grover_for_response(response)
|
98
98
|
body = response.respond_to?(:body) ? response.body : response.join
|
99
99
|
body = body.join if body.is_a?(Array)
|
100
|
-
|
101
100
|
body = HTMLPreprocessor.process body, root_url, protocol
|
102
|
-
|
101
|
+
|
102
|
+
options = { display_url: request_url }
|
103
|
+
cookies = Rack::Utils.parse_cookies(env).map do |name, value|
|
104
|
+
{ name: name, value: Rack::Utils.escape(value), domain: env['HTTP_HOST'] }
|
105
|
+
end
|
106
|
+
options[:cookies] = cookies if cookies.any?
|
107
|
+
|
108
|
+
Grover.new(body, options)
|
103
109
|
end
|
104
110
|
|
105
111
|
def add_cover_content(grover)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'grover/utils'
|
4
|
+
require 'grover/options_fixer'
|
5
|
+
|
6
|
+
class Grover
|
7
|
+
#
|
8
|
+
# Build options from Grover.configuration, meta_options, and passed-in options
|
9
|
+
#
|
10
|
+
class OptionsBuilder < Hash
|
11
|
+
def initialize(options, url)
|
12
|
+
@url = url
|
13
|
+
combined = grover_configuration
|
14
|
+
Utils.deep_merge! combined, Utils.deep_stringify_keys(options)
|
15
|
+
Utils.deep_merge! combined, meta_options unless url_source?
|
16
|
+
|
17
|
+
update OptionsFixer.new(combined).run
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def grover_configuration
|
23
|
+
Utils.deep_stringify_keys Grover.configuration.options
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# Extract out options from meta tags in the source - based on code from PDFKit project
|
28
|
+
#
|
29
|
+
def meta_options
|
30
|
+
meta_opts = {}
|
31
|
+
|
32
|
+
meta_tags.each do |meta|
|
33
|
+
tag_name = meta['name'] && meta['name'][/#{Grover.configuration.meta_tag_prefix}([a-z_-]+)/, 1]
|
34
|
+
next unless tag_name
|
35
|
+
|
36
|
+
Utils.deep_assign meta_opts, tag_name.split('-'), meta['content']
|
37
|
+
end
|
38
|
+
|
39
|
+
meta_opts
|
40
|
+
end
|
41
|
+
|
42
|
+
def meta_tags
|
43
|
+
Nokogiri::HTML(@url).xpath('//meta')
|
44
|
+
end
|
45
|
+
|
46
|
+
def url_source?
|
47
|
+
@url.match(/\Ahttp/i)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'grover/utils'
|
4
|
+
|
5
|
+
class Grover
|
6
|
+
#
|
7
|
+
# Convert string option values to boolean, numeric, and array literals
|
8
|
+
#
|
9
|
+
class OptionsFixer
|
10
|
+
FALSE_VALUES = [nil, false, 0, '0', 'f', 'F', 'false', 'FALSE', 'off', 'OFF'].freeze
|
11
|
+
|
12
|
+
def initialize(options)
|
13
|
+
@options = options
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
fix_boolean_options!
|
18
|
+
fix_integer_options!
|
19
|
+
fix_float_options!
|
20
|
+
fix_array_options!
|
21
|
+
@options
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def fix_options!(*option_paths)
|
27
|
+
option_paths.each do |option_path|
|
28
|
+
keys = option_path.split '.'
|
29
|
+
value = @options.dig(*keys)
|
30
|
+
Utils.deep_assign(@options, keys, yield(value)) if value
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def fix_boolean_options!
|
35
|
+
fix_options!(
|
36
|
+
'display_header_footer', 'full_page', 'landscape', 'omit_background', 'prefer_css_page_size',
|
37
|
+
'print_background', 'viewport.has_touch', 'viewport.is_landscape', 'viewport.is_mobile'
|
38
|
+
) { |value| !FALSE_VALUES.include?(value) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def fix_integer_options!
|
42
|
+
fix_options!(
|
43
|
+
'viewport.height', 'viewport.width',
|
44
|
+
&:to_i
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
def fix_float_options!
|
49
|
+
fix_options!(
|
50
|
+
'clip.height', 'clip.width', 'clip.x', 'clip.y', 'quality', 'scale', 'viewport.device_scale_factor',
|
51
|
+
&:to_f
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
def fix_array_options!
|
56
|
+
fix_options!('launch_args') do |value|
|
57
|
+
value.is_a?(String) ? YAML.safe_load(value) : value
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'open3'
|
5
|
+
|
6
|
+
class Grover
|
7
|
+
#
|
8
|
+
# Processor helper class for calling out to Puppeteer NodeJS library
|
9
|
+
#
|
10
|
+
# Heavily based on the Schmooze library https://github.com/Shopify/schmooze
|
11
|
+
#
|
12
|
+
class Processor
|
13
|
+
def initialize(app_root)
|
14
|
+
@app_root = app_root
|
15
|
+
end
|
16
|
+
|
17
|
+
def convert(method, url_or_html, options)
|
18
|
+
spawn_process
|
19
|
+
ensure_packages_are_initiated
|
20
|
+
result = call_js_method method, url_or_html, options
|
21
|
+
return unless result
|
22
|
+
|
23
|
+
result['data'].pack('C*')
|
24
|
+
ensure
|
25
|
+
cleanup_process if stdin
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
attr_reader :app_root, :stdin, :stdout, :stderr, :wait_thr
|
31
|
+
|
32
|
+
def spawn_process
|
33
|
+
@stdin, @stdout, @stderr, @wait_thr = Open3.popen3(
|
34
|
+
'node',
|
35
|
+
File.expand_path(File.join(__dir__, 'js/processor.js')),
|
36
|
+
chdir: app_root
|
37
|
+
)
|
38
|
+
end
|
39
|
+
|
40
|
+
def ensure_packages_are_initiated
|
41
|
+
input = stdout.gets
|
42
|
+
raise Grover::Error, "Failed to instantiate worker process:\n#{stderr.read}" if input.nil?
|
43
|
+
|
44
|
+
result = JSON.parse(input)
|
45
|
+
return if result[0] == 'ok'
|
46
|
+
|
47
|
+
cleanup_process
|
48
|
+
parse_package_error result[1]
|
49
|
+
end
|
50
|
+
|
51
|
+
def parse_package_error(error_message) # rubocop:disable Metrics/MethodLength
|
52
|
+
package_name = error_message[/^Error: Cannot find module '(.*)'$/, 1]
|
53
|
+
raise Grover::Error, error_message unless package_name
|
54
|
+
|
55
|
+
begin
|
56
|
+
%w[dependencies devDependencies].each do |key|
|
57
|
+
next unless package_json.key?(key) && package_json[key].key?(package_name)
|
58
|
+
|
59
|
+
raise Grover::DependencyError, Utils.squish(<<~ERROR)
|
60
|
+
Cannot find module '#{package_name}'.
|
61
|
+
The module was found in '#{package_json_path}' however, please run 'npm install' from '#{app_root}'
|
62
|
+
ERROR
|
63
|
+
end
|
64
|
+
rescue Errno::ENOENT # rubocop:disable Lint/SuppressedException
|
65
|
+
end
|
66
|
+
raise Grover::DependencyError, Utils.squish(<<~ERROR)
|
67
|
+
Cannot find module '#{package_name}'. You need to add it to '#{package_json_path}' and run 'npm install'
|
68
|
+
ERROR
|
69
|
+
end
|
70
|
+
|
71
|
+
def package_json_path
|
72
|
+
@package_json_path ||= File.join(app_root, 'package.json')
|
73
|
+
end
|
74
|
+
|
75
|
+
def package_json
|
76
|
+
@package_json ||= JSON.parse(File.read(package_json_path))
|
77
|
+
end
|
78
|
+
|
79
|
+
def call_js_method(method, url_or_html, options) # rubocop:disable Metrics/MethodLength
|
80
|
+
stdin.puts JSON.dump([method, url_or_html, options])
|
81
|
+
input = stdout.gets
|
82
|
+
raise Errno::EPIPE, "Can't read from worker" if input.nil?
|
83
|
+
|
84
|
+
status, message, error_class = JSON.parse(input)
|
85
|
+
|
86
|
+
if status == 'ok'
|
87
|
+
message
|
88
|
+
elsif error_class.nil?
|
89
|
+
raise Grover::JavaScript::UnknownError, message
|
90
|
+
else
|
91
|
+
raise Grover::JavaScript.const_get(error_class, false), message
|
92
|
+
end
|
93
|
+
rescue Errno::EPIPE, IOError
|
94
|
+
raise Grover::Error, "Worker process failed:\n#{stderr.read}"
|
95
|
+
end
|
96
|
+
|
97
|
+
def cleanup_process
|
98
|
+
stdin.close
|
99
|
+
stdout.close
|
100
|
+
stderr.close
|
101
|
+
wait_thr.join
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
data/lib/grover/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grover
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.12.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Bromwich
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: combine_pdf
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: schmooze
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0.2'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0.2'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: mini_magick
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -157,6 +143,9 @@ dependencies:
|
|
157
143
|
- - "~>"
|
158
144
|
- !ruby/object:Gem::Version
|
159
145
|
version: '0.17'
|
146
|
+
- - "<"
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
version: '0.18'
|
160
149
|
type: :development
|
161
150
|
prerelease: false
|
162
151
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -164,6 +153,9 @@ dependencies:
|
|
164
153
|
- - "~>"
|
165
154
|
- !ruby/object:Gem::Version
|
166
155
|
version: '0.17'
|
156
|
+
- - "<"
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0.18'
|
167
159
|
description: Transform HTML into PDF/PNG/JPEG using Google Puppeteer/Chromium
|
168
160
|
email:
|
169
161
|
- abromwich@studiosity.com
|
@@ -175,8 +167,13 @@ files:
|
|
175
167
|
- lib/active_support_ext/object/duplicable.rb
|
176
168
|
- lib/grover.rb
|
177
169
|
- lib/grover/configuration.rb
|
170
|
+
- lib/grover/errors.rb
|
178
171
|
- lib/grover/html_preprocessor.rb
|
172
|
+
- lib/grover/js/processor.js
|
179
173
|
- lib/grover/middleware.rb
|
174
|
+
- lib/grover/options_builder.rb
|
175
|
+
- lib/grover/options_fixer.rb
|
176
|
+
- lib/grover/processor.rb
|
180
177
|
- lib/grover/utils.rb
|
181
178
|
- lib/grover/version.rb
|
182
179
|
homepage: https://github.com/Studiosity/grover
|
@@ -198,8 +195,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
198
195
|
- !ruby/object:Gem::Version
|
199
196
|
version: '0'
|
200
197
|
requirements: []
|
201
|
-
|
202
|
-
rubygems_version: 2.7.6.2
|
198
|
+
rubygems_version: 3.0.6
|
203
199
|
signing_key:
|
204
200
|
specification_version: 4
|
205
201
|
summary: A Ruby gem to transform HTML into PDF, PNG or JPEG by wrapping the NodeJS
|