grover 0.11.4 → 0.12.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 130c5f7d5181084098d85f0c6c8ff4afa0ba285746a3532cea2cbf6b3f28eb26
4
- data.tar.gz: dbd1c01477c9c8133511e0c0999ceef67eb92be82b113ce06761678e82019542
3
+ metadata.gz: 18d371bdb009b39ba004d06f24c35ddb4f51a0cd253082481d04b1c1dac45b57
4
+ data.tar.gz: f20bfd80f394ef4c3c7e98128a6d0d5615387af7c1c6733f867ee24ead91c9e5
5
5
  SHA512:
6
- metadata.gz: 9454063053df73117ce711fc4560ba38be688310399ba5f16f806ce9fe572529b84d152cd22ce51c94ac582866e26db253dd011bbf8fcb807ef7648cf5e19208
7
- data.tar.gz: 921c5e905f3f1416f543037cb85c358552558b27cd8535a6720cc62f6aa1c7d3d3c7a62b07481543ccdfc704c533c3a610ff312061089c1f7b2c304c91e1848d
6
+ metadata.gz: 280be92cb4f2fd14c94341d75b3564add657ec7acc496fe3cffd0b1d0d7bcde86ab492112fec7522fe6c666cc95045f8f5eaf06e5e6cd6d034abab443c8955ed
7
+ data.tar.gz: 5a30bf3b21ea80eeb6b6f54c559f0fe7d0fd0559941028cebc71fd96fbd00dae24b2d67ecec29a413931cc0f883b4323282cc3308f69f370649720bd7a64639d
@@ -5,144 +5,20 @@ require 'grover/version'
5
5
  require 'grover/utils'
6
6
  require 'active_support_ext/object/deep_dup' unless defined?(ActiveSupport)
7
7
 
8
+ require 'grover/errors'
8
9
  require 'grover/html_preprocessor'
9
10
  require 'grover/middleware'
10
11
  require 'grover/configuration'
11
12
  require 'grover/options_builder'
13
+ require 'grover/processor'
12
14
 
13
15
  require 'nokogiri'
14
- require 'schmooze'
15
16
  require 'yaml'
16
17
 
17
18
  #
18
19
  # Grover interface for converting HTML to PDF
19
20
  #
20
21
  class Grover
21
- #
22
- # Processor helper class for calling out to Puppeteer NodeJS library
23
- #
24
- class Processor < Schmooze::Base
25
- dependencies puppeteer: 'puppeteer'
26
-
27
- def self.launch_params
28
- ENV['GROVER_NO_SANDBOX'] == 'true' ? "{args: ['--no-sandbox', '--disable-setuid-sandbox']}" : '{args: []}'
29
- end
30
-
31
- def self.convert_function(convert_action)
32
- <<~FUNCTION
33
- async (url_or_html, options) => {
34
- let browser;
35
- try {
36
- let launchParams = #{launch_params};
37
-
38
- // Configure puppeteer debugging options
39
- const debug = options.debug; delete options.debug;
40
- if (typeof debug === 'object' && !!debug) {
41
- if (debug.headless != undefined) { launchParams.headless = debug.headless; }
42
- if (debug.devtools != undefined) { launchParams.devtools = debug.devtools; }
43
- }
44
-
45
- // Configure additional launch arguments
46
- const args = options.launchArgs; delete options.launchArgs;
47
- if (Array.isArray(args)) {
48
- launchParams.args = launchParams.args.concat(args);
49
- }
50
-
51
- // Set executable path if given
52
- const executablePath = options.executablePath; delete options.executablePath;
53
- if (executablePath) {
54
- launchParams.executablePath = executablePath;
55
- }
56
-
57
- // Launch the browser and create a page
58
- browser = await puppeteer.launch(launchParams);
59
- const page = await browser.newPage();
60
-
61
- // Basic auth
62
- const username = options.username; delete options.username
63
- const password = options.password; delete options.password
64
- if (username != undefined && password != undefined) {
65
- await page.authenticate({ username, password });
66
- }
67
-
68
- // Setting cookies
69
- const cookies = options.cookies; delete options.cookies
70
- if (Array.isArray(cookies)) {
71
- await page.setCookie(...cookies);
72
- }
73
-
74
- // Set caching flag (if provided)
75
- const cache = options.cache; delete options.cache;
76
- if (cache != undefined) {
77
- await page.setCacheEnabled(cache);
78
- }
79
-
80
- // Setup timeout option (if provided)
81
- let request_options = {};
82
- const timeout = options.timeout; delete options.timeout;
83
- if (timeout != undefined) {
84
- request_options.timeout = timeout;
85
- }
86
-
87
- // Setup viewport options (if provided)
88
- const viewport = options.viewport; delete options.viewport;
89
- if (viewport != undefined) {
90
- await page.setViewport(viewport);
91
- }
92
-
93
- const waitUntil = options.waitUntil; delete options.waitUntil;
94
- if (url_or_html.match(/^http/i)) {
95
- // Request is for a URL, so request it
96
- request_options.waitUntil = waitUntil || 'networkidle2';
97
- await page.goto(url_or_html, request_options);
98
- } else {
99
- // Request is some HTML content. Use request interception to assign the body
100
- request_options.waitUntil = waitUntil || 'networkidle0';
101
- await page.setRequestInterception(true);
102
- page.once('request', request => {
103
- request.respond({ body: url_or_html });
104
- // Reset the request interception
105
- // (we only want to intercept the first request - ie our HTML)
106
- page.on('request', request => request.continue());
107
- });
108
- const displayUrl = options.displayUrl; delete options.displayUrl;
109
- await page.goto(displayUrl || 'http://example.com', request_options);
110
- }
111
-
112
- // If specified, emulate the media type
113
- const emulateMedia = options.emulateMedia; delete options.emulateMedia;
114
- if (emulateMedia != undefined) {
115
- if (typeof page.emulateMediaType == 'function') {
116
- await page.emulateMediaType(emulateMedia);
117
- } else {
118
- await page.emulateMedia(emulateMedia);
119
- }
120
- }
121
-
122
- // If specified, evaluate script on the page
123
- const executeScript = options.executeScript; delete options.executeScript;
124
- if (executeScript != undefined) {
125
- await page.evaluate(executeScript);
126
- }
127
-
128
- // If we're running puppeteer in headless mode, return the converted PDF
129
- if (debug == undefined || (typeof debug === 'object' && (debug.headless == undefined || debug.headless))) {
130
- return await page.#{convert_action}(options);
131
- }
132
- } finally {
133
- if (browser) {
134
- await browser.close();
135
- }
136
- }
137
- }
138
- FUNCTION
139
- end
140
-
141
- method :convert_pdf, convert_function('pdf')
142
- method :convert_screenshot, convert_function('screenshot')
143
- end
144
- private_constant :Processor
145
-
146
22
  DEFAULT_HEADER_TEMPLATE = "<div class='date text left'></div><div class='title text center'></div>"
147
23
  DEFAULT_FOOTER_TEMPLATE = <<~HTML
148
24
  <div class='url text left grow'></div>
@@ -171,10 +47,7 @@ class Grover
171
47
  # @return [String] The resulting PDF data
172
48
  #
173
49
  def to_pdf(path = nil)
174
- result = processor.convert_pdf @url, normalized_options(path: path)
175
- return unless result
176
-
177
- result['data'].pack('C*')
50
+ processor.convert :pdf, @url, normalized_options(path: path)
178
51
  end
179
52
 
180
53
  #
@@ -186,11 +59,8 @@ class Grover
186
59
  #
187
60
  def screenshot(path: nil, format: nil)
188
61
  options = normalized_options(path: path)
189
- options['type'] = format if format.is_a? ::String
190
- result = processor.convert_screenshot @url, options
191
- return unless result
192
-
193
- result['data'].pack('C*')
62
+ options['type'] = format if %w[png jpeg].include? format
63
+ processor.convert :screenshot, @url, options
194
64
  end
195
65
 
196
66
  #
@@ -200,7 +70,7 @@ class Grover
200
70
  # @return [String] The resulting PNG data
201
71
  #
202
72
  def to_png(path = nil)
203
- screenshot(path: path, format: 'png')
73
+ screenshot path: path, format: 'png'
204
74
  end
205
75
 
206
76
  #
@@ -210,7 +80,7 @@ class Grover
210
80
  # @return [String] The resulting JPEG data
211
81
  #
212
82
  def to_jpeg(path = nil)
213
- screenshot(path: path, format: 'jpeg')
83
+ screenshot path: path, format: 'jpeg'
214
84
  end
215
85
 
216
86
  #
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Grover
4
+ #
5
+ # Error classes for calling out to Puppeteer NodeJS library
6
+ #
7
+ # Heavily based on the Schmooze library https://github.com/Shopify/schmooze
8
+ #
9
+ Error = Class.new(StandardError)
10
+ DependencyError = Class.new(Error)
11
+ module JavaScript # rubocop:disable Style/Documentation
12
+ Error = Class.new(::Grover::Error)
13
+ UnknownError = Class.new(Error)
14
+ def self.const_missing(name)
15
+ const_set name, Class.new(Error)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,147 @@
1
+ // Setup imports
2
+ try {
3
+ const Module = require('module');
4
+ // resolve puppeteer from the CWD instead of where this script is located
5
+ var puppeteer = require(require.resolve('puppeteer', { paths: Module._nodeModulePaths(process.cwd()) }));
6
+ } catch (e) {
7
+ process.stdout.write(JSON.stringify(['err', e.toString()]));
8
+ process.stdout.write("\n");
9
+ process.exit(1);
10
+ }
11
+ process.stdout.write("[\"ok\"]\n");
12
+
13
+ const _processPage = (async (convertAction, urlOrHtml, options) => {
14
+ let browser;
15
+ try {
16
+ const launchParams = {
17
+ args: process.env.GROVER_NO_SANDBOX === 'true' ? ['--no-sandbox', '--disable-setuid-sandbox'] : []
18
+ };
19
+
20
+ // Configure puppeteer debugging options
21
+ const debug = options.debug; delete options.debug;
22
+ if (typeof debug === 'object' && !!debug) {
23
+ if (debug.headless !== undefined) { launchParams.headless = debug.headless; }
24
+ if (debug.devtools !== undefined) { launchParams.devtools = debug.devtools; }
25
+ }
26
+
27
+ // Configure additional launch arguments
28
+ const args = options.launchArgs; delete options.launchArgs;
29
+ if (Array.isArray(args)) {
30
+ launchParams.args = launchParams.args.concat(args);
31
+ }
32
+
33
+ // Set executable path if given
34
+ const executablePath = options.executablePath; delete options.executablePath;
35
+ if (executablePath) {
36
+ launchParams.executablePath = executablePath;
37
+ }
38
+
39
+ // Launch the browser and create a page
40
+ browser = await puppeteer.launch(launchParams);
41
+ const page = await browser.newPage();
42
+
43
+ // Basic auth
44
+ const username = options.username; delete options.username
45
+ const password = options.password; delete options.password
46
+ if (username !== undefined && password !== undefined) {
47
+ await page.authenticate({ username, password });
48
+ }
49
+
50
+ // Setting cookies
51
+ const cookies = options.cookies; delete options.cookies
52
+ if (Array.isArray(cookies)) {
53
+ await page.setCookie(...cookies);
54
+ }
55
+
56
+ // Set caching flag (if provided)
57
+ const cache = options.cache; delete options.cache;
58
+ if (cache !== undefined) {
59
+ await page.setCacheEnabled(cache);
60
+ }
61
+
62
+ // Setup timeout option (if provided)
63
+ let requestOptions = {};
64
+ const timeout = options.timeout; delete options.timeout;
65
+ if (timeout !== undefined) {
66
+ requestOptions.timeout = timeout;
67
+ }
68
+
69
+ // Setup viewport options (if provided)
70
+ const viewport = options.viewport; delete options.viewport;
71
+ if (viewport !== undefined) {
72
+ await page.setViewport(viewport);
73
+ }
74
+
75
+ const waitUntil = options.waitUntil; delete options.waitUntil;
76
+ if (urlOrHtml.match(/^http/i)) {
77
+ // Request is for a URL, so request it
78
+ requestOptions.waitUntil = waitUntil || 'networkidle2';
79
+ await page.goto(urlOrHtml, requestOptions);
80
+ } else {
81
+ // Request is some HTML content. Use request interception to assign the body
82
+ requestOptions.waitUntil = waitUntil || 'networkidle0';
83
+ await page.setRequestInterception(true);
84
+ page.once('request', request => {
85
+ request.respond({ body: urlOrHtml });
86
+ // Reset the request interception
87
+ // (we only want to intercept the first request - ie our HTML)
88
+ page.on('request', request => request.continue());
89
+ });
90
+ const displayUrl = options.displayUrl; delete options.displayUrl;
91
+ await page.goto(displayUrl || 'http://example.com', requestOptions);
92
+ }
93
+
94
+ // If specified, emulate the media type
95
+ const emulateMedia = options.emulateMedia; delete options.emulateMedia;
96
+ if (emulateMedia !== undefined) {
97
+ if (typeof page.emulateMediaType == 'function') {
98
+ await page.emulateMediaType(emulateMedia);
99
+ } else {
100
+ await page.emulateMedia(emulateMedia);
101
+ }
102
+ }
103
+
104
+ // If specified, evaluate script on the page
105
+ const executeScript = options.executeScript; delete options.executeScript;
106
+ if (executeScript !== undefined) {
107
+ await page.evaluate(executeScript);
108
+ }
109
+
110
+ // If we're running puppeteer in headless mode, return the converted PDF
111
+ if (debug === undefined || (typeof debug === 'object' && (debug.headless === undefined || debug.headless))) {
112
+ return await page[convertAction](options);
113
+ }
114
+ } finally {
115
+ if (browser) {
116
+ await browser.close();
117
+ }
118
+ }
119
+ });
120
+
121
+ function _handleError(error) {
122
+ if (error instanceof Error) {
123
+ process.stdout.write(
124
+ JSON.stringify(['err', error.toString().replace(new RegExp('^' + error.name + ': '), ''), error.name])
125
+ );
126
+ } else {
127
+ process.stdout.write(JSON.stringify(['err', error.toString()]));
128
+ }
129
+ process.stdout.write("\n");
130
+ }
131
+
132
+ // Interface for communicating between Ruby processor and Node processor
133
+ require('readline').createInterface({
134
+ input: process.stdin,
135
+ terminal: false,
136
+ }).on('line', function(line) {
137
+ try {
138
+ Promise.resolve(_processPage.apply(null, JSON.parse(line)))
139
+ .then(function (result) {
140
+ process.stdout.write(JSON.stringify(['ok', result]));
141
+ process.stdout.write("\n");
142
+ })
143
+ .catch(_handleError);
144
+ } catch(error) {
145
+ _handleError(error);
146
+ }
147
+ });
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'open3'
5
+
6
+ class Grover
7
+ #
8
+ # Processor helper class for calling out to Puppeteer NodeJS library
9
+ #
10
+ # Heavily based on the Schmooze library https://github.com/Shopify/schmooze
11
+ #
12
+ class Processor
13
+ def initialize(app_root)
14
+ @app_root = app_root
15
+ end
16
+
17
+ def convert(method, url_or_html, options)
18
+ spawn_process
19
+ ensure_packages_are_initiated
20
+ result = call_js_method method, url_or_html, options
21
+ return unless result
22
+
23
+ result['data'].pack('C*')
24
+ ensure
25
+ cleanup_process
26
+ end
27
+
28
+ private
29
+
30
+ attr_reader :app_root, :stdin, :stdout, :stderr, :wait_thr
31
+
32
+ def spawn_process
33
+ @stdin, @stdout, @stderr, @wait_thr = Open3.popen3(
34
+ 'node',
35
+ File.expand_path(File.join(__dir__, 'js/processor.js')),
36
+ chdir: app_root
37
+ )
38
+ end
39
+
40
+ def ensure_packages_are_initiated
41
+ input = stdout.gets
42
+ raise Grover::Error, "Failed to instantiate worker process:\n#{stderr.read}" if input.nil?
43
+
44
+ result = JSON.parse(input)
45
+ return if result[0] == 'ok'
46
+
47
+ cleanup_process
48
+ parse_package_error result[1]
49
+ end
50
+
51
+ def parse_package_error(error_message) # rubocop:disable Metrics/MethodLength
52
+ package_name = error_message[/^Error: Cannot find module '(.*)'$/, 1]
53
+ raise Grover::Error, error_message unless package_name
54
+
55
+ begin
56
+ %w[dependencies devDependencies].each do |key|
57
+ next unless package_json.key?(key) && package_json[key].key?(package_name)
58
+
59
+ raise Grover::DependencyError, Utils.squish(<<~ERROR)
60
+ Cannot find module '#{package_name}'.
61
+ The module was found in '#{package_json_path}' however, please run 'npm install' from '#{app_root}'
62
+ ERROR
63
+ end
64
+ rescue Errno::ENOENT # rubocop:disable Lint/SuppressedException
65
+ end
66
+ raise Grover::DependencyError, Utils.squish(<<~ERROR)
67
+ Cannot find module '#{package_name}'. You need to add it to '#{package_json_path}' and run 'npm install'
68
+ ERROR
69
+ end
70
+
71
+ def package_json_path
72
+ @package_json_path ||= File.join(app_root, 'package.json')
73
+ end
74
+
75
+ def package_json
76
+ @package_json ||= JSON.parse(File.read(package_json_path))
77
+ end
78
+
79
+ def call_js_method(method, url_or_html, options) # rubocop:disable Metrics/MethodLength
80
+ stdin.puts JSON.dump([method, url_or_html, options])
81
+ input = stdout.gets
82
+ raise Errno::EPIPE, "Can't read from worker" if input.nil?
83
+
84
+ status, message, error_class = JSON.parse(input)
85
+
86
+ if status == 'ok'
87
+ message
88
+ elsif error_class.nil?
89
+ raise Grover::JavaScript::UnknownError, message
90
+ else
91
+ raise Grover::JavaScript.const_get(error_class, false), message
92
+ end
93
+ rescue Errno::EPIPE, IOError
94
+ raise Grover::Error, "Worker process failed:\n#{stderr.read}"
95
+ end
96
+
97
+ def cleanup_process
98
+ stdin.close
99
+ stdout.close
100
+ stderr.close
101
+ wait_thr.join
102
+ end
103
+ end
104
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Grover
4
- VERSION = '0.11.4'
4
+ VERSION = '0.12.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grover
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.4
4
+ version: 0.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Bromwich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-24 00:00:00.000000000 Z
11
+ date: 2020-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: combine_pdf
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '1.0'
41
- - !ruby/object:Gem::Dependency
42
- name: schmooze
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: '0.2'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '0.2'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: mini_magick
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -181,10 +167,13 @@ files:
181
167
  - lib/active_support_ext/object/duplicable.rb
182
168
  - lib/grover.rb
183
169
  - lib/grover/configuration.rb
170
+ - lib/grover/errors.rb
184
171
  - lib/grover/html_preprocessor.rb
172
+ - lib/grover/js/processor.js
185
173
  - lib/grover/middleware.rb
186
174
  - lib/grover/options_builder.rb
187
175
  - lib/grover/options_fixer.rb
176
+ - lib/grover/processor.rb
188
177
  - lib/grover/utils.rb
189
178
  - lib/grover/version.rb
190
179
  homepage: https://github.com/Studiosity/grover