grover 0.11.4 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 130c5f7d5181084098d85f0c6c8ff4afa0ba285746a3532cea2cbf6b3f28eb26
4
- data.tar.gz: dbd1c01477c9c8133511e0c0999ceef67eb92be82b113ce06761678e82019542
3
+ metadata.gz: 18d371bdb009b39ba004d06f24c35ddb4f51a0cd253082481d04b1c1dac45b57
4
+ data.tar.gz: f20bfd80f394ef4c3c7e98128a6d0d5615387af7c1c6733f867ee24ead91c9e5
5
5
  SHA512:
6
- metadata.gz: 9454063053df73117ce711fc4560ba38be688310399ba5f16f806ce9fe572529b84d152cd22ce51c94ac582866e26db253dd011bbf8fcb807ef7648cf5e19208
7
- data.tar.gz: 921c5e905f3f1416f543037cb85c358552558b27cd8535a6720cc62f6aa1c7d3d3c7a62b07481543ccdfc704c533c3a610ff312061089c1f7b2c304c91e1848d
6
+ metadata.gz: 280be92cb4f2fd14c94341d75b3564add657ec7acc496fe3cffd0b1d0d7bcde86ab492112fec7522fe6c666cc95045f8f5eaf06e5e6cd6d034abab443c8955ed
7
+ data.tar.gz: 5a30bf3b21ea80eeb6b6f54c559f0fe7d0fd0559941028cebc71fd96fbd00dae24b2d67ecec29a413931cc0f883b4323282cc3308f69f370649720bd7a64639d
@@ -5,144 +5,20 @@ require 'grover/version'
5
5
  require 'grover/utils'
6
6
  require 'active_support_ext/object/deep_dup' unless defined?(ActiveSupport)
7
7
 
8
+ require 'grover/errors'
8
9
  require 'grover/html_preprocessor'
9
10
  require 'grover/middleware'
10
11
  require 'grover/configuration'
11
12
  require 'grover/options_builder'
13
+ require 'grover/processor'
12
14
 
13
15
  require 'nokogiri'
14
- require 'schmooze'
15
16
  require 'yaml'
16
17
 
17
18
  #
18
19
  # Grover interface for converting HTML to PDF
19
20
  #
20
21
  class Grover
21
- #
22
- # Processor helper class for calling out to Puppeteer NodeJS library
23
- #
24
- class Processor < Schmooze::Base
25
- dependencies puppeteer: 'puppeteer'
26
-
27
- def self.launch_params
28
- ENV['GROVER_NO_SANDBOX'] == 'true' ? "{args: ['--no-sandbox', '--disable-setuid-sandbox']}" : '{args: []}'
29
- end
30
-
31
- def self.convert_function(convert_action)
32
- <<~FUNCTION
33
- async (url_or_html, options) => {
34
- let browser;
35
- try {
36
- let launchParams = #{launch_params};
37
-
38
- // Configure puppeteer debugging options
39
- const debug = options.debug; delete options.debug;
40
- if (typeof debug === 'object' && !!debug) {
41
- if (debug.headless != undefined) { launchParams.headless = debug.headless; }
42
- if (debug.devtools != undefined) { launchParams.devtools = debug.devtools; }
43
- }
44
-
45
- // Configure additional launch arguments
46
- const args = options.launchArgs; delete options.launchArgs;
47
- if (Array.isArray(args)) {
48
- launchParams.args = launchParams.args.concat(args);
49
- }
50
-
51
- // Set executable path if given
52
- const executablePath = options.executablePath; delete options.executablePath;
53
- if (executablePath) {
54
- launchParams.executablePath = executablePath;
55
- }
56
-
57
- // Launch the browser and create a page
58
- browser = await puppeteer.launch(launchParams);
59
- const page = await browser.newPage();
60
-
61
- // Basic auth
62
- const username = options.username; delete options.username
63
- const password = options.password; delete options.password
64
- if (username != undefined && password != undefined) {
65
- await page.authenticate({ username, password });
66
- }
67
-
68
- // Setting cookies
69
- const cookies = options.cookies; delete options.cookies
70
- if (Array.isArray(cookies)) {
71
- await page.setCookie(...cookies);
72
- }
73
-
74
- // Set caching flag (if provided)
75
- const cache = options.cache; delete options.cache;
76
- if (cache != undefined) {
77
- await page.setCacheEnabled(cache);
78
- }
79
-
80
- // Setup timeout option (if provided)
81
- let request_options = {};
82
- const timeout = options.timeout; delete options.timeout;
83
- if (timeout != undefined) {
84
- request_options.timeout = timeout;
85
- }
86
-
87
- // Setup viewport options (if provided)
88
- const viewport = options.viewport; delete options.viewport;
89
- if (viewport != undefined) {
90
- await page.setViewport(viewport);
91
- }
92
-
93
- const waitUntil = options.waitUntil; delete options.waitUntil;
94
- if (url_or_html.match(/^http/i)) {
95
- // Request is for a URL, so request it
96
- request_options.waitUntil = waitUntil || 'networkidle2';
97
- await page.goto(url_or_html, request_options);
98
- } else {
99
- // Request is some HTML content. Use request interception to assign the body
100
- request_options.waitUntil = waitUntil || 'networkidle0';
101
- await page.setRequestInterception(true);
102
- page.once('request', request => {
103
- request.respond({ body: url_or_html });
104
- // Reset the request interception
105
- // (we only want to intercept the first request - ie our HTML)
106
- page.on('request', request => request.continue());
107
- });
108
- const displayUrl = options.displayUrl; delete options.displayUrl;
109
- await page.goto(displayUrl || 'http://example.com', request_options);
110
- }
111
-
112
- // If specified, emulate the media type
113
- const emulateMedia = options.emulateMedia; delete options.emulateMedia;
114
- if (emulateMedia != undefined) {
115
- if (typeof page.emulateMediaType == 'function') {
116
- await page.emulateMediaType(emulateMedia);
117
- } else {
118
- await page.emulateMedia(emulateMedia);
119
- }
120
- }
121
-
122
- // If specified, evaluate script on the page
123
- const executeScript = options.executeScript; delete options.executeScript;
124
- if (executeScript != undefined) {
125
- await page.evaluate(executeScript);
126
- }
127
-
128
- // If we're running puppeteer in headless mode, return the converted PDF
129
- if (debug == undefined || (typeof debug === 'object' && (debug.headless == undefined || debug.headless))) {
130
- return await page.#{convert_action}(options);
131
- }
132
- } finally {
133
- if (browser) {
134
- await browser.close();
135
- }
136
- }
137
- }
138
- FUNCTION
139
- end
140
-
141
- method :convert_pdf, convert_function('pdf')
142
- method :convert_screenshot, convert_function('screenshot')
143
- end
144
- private_constant :Processor
145
-
146
22
  DEFAULT_HEADER_TEMPLATE = "<div class='date text left'></div><div class='title text center'></div>"
147
23
  DEFAULT_FOOTER_TEMPLATE = <<~HTML
148
24
  <div class='url text left grow'></div>
@@ -171,10 +47,7 @@ class Grover
171
47
  # @return [String] The resulting PDF data
172
48
  #
173
49
  def to_pdf(path = nil)
174
- result = processor.convert_pdf @url, normalized_options(path: path)
175
- return unless result
176
-
177
- result['data'].pack('C*')
50
+ processor.convert :pdf, @url, normalized_options(path: path)
178
51
  end
179
52
 
180
53
  #
@@ -186,11 +59,8 @@ class Grover
186
59
  #
187
60
  def screenshot(path: nil, format: nil)
188
61
  options = normalized_options(path: path)
189
- options['type'] = format if format.is_a? ::String
190
- result = processor.convert_screenshot @url, options
191
- return unless result
192
-
193
- result['data'].pack('C*')
62
+ options['type'] = format if %w[png jpeg].include? format
63
+ processor.convert :screenshot, @url, options
194
64
  end
195
65
 
196
66
  #
@@ -200,7 +70,7 @@ class Grover
200
70
  # @return [String] The resulting PNG data
201
71
  #
202
72
  def to_png(path = nil)
203
- screenshot(path: path, format: 'png')
73
+ screenshot path: path, format: 'png'
204
74
  end
205
75
 
206
76
  #
@@ -210,7 +80,7 @@ class Grover
210
80
  # @return [String] The resulting JPEG data
211
81
  #
212
82
  def to_jpeg(path = nil)
213
- screenshot(path: path, format: 'jpeg')
83
+ screenshot path: path, format: 'jpeg'
214
84
  end
215
85
 
216
86
  #
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Grover
4
+ #
5
+ # Error classes for calling out to Puppeteer NodeJS library
6
+ #
7
+ # Heavily based on the Schmooze library https://github.com/Shopify/schmooze
8
+ #
9
+ Error = Class.new(StandardError)
10
+ DependencyError = Class.new(Error)
11
+ module JavaScript # rubocop:disable Style/Documentation
12
+ Error = Class.new(::Grover::Error)
13
+ UnknownError = Class.new(Error)
14
+ def self.const_missing(name)
15
+ const_set name, Class.new(Error)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,147 @@
1
+ // Setup imports
2
+ try {
3
+ const Module = require('module');
4
+ // resolve puppeteer from the CWD instead of where this script is located
5
+ var puppeteer = require(require.resolve('puppeteer', { paths: Module._nodeModulePaths(process.cwd()) }));
6
+ } catch (e) {
7
+ process.stdout.write(JSON.stringify(['err', e.toString()]));
8
+ process.stdout.write("\n");
9
+ process.exit(1);
10
+ }
11
+ process.stdout.write("[\"ok\"]\n");
12
+
13
+ const _processPage = (async (convertAction, urlOrHtml, options) => {
14
+ let browser;
15
+ try {
16
+ const launchParams = {
17
+ args: process.env.GROVER_NO_SANDBOX === 'true' ? ['--no-sandbox', '--disable-setuid-sandbox'] : []
18
+ };
19
+
20
+ // Configure puppeteer debugging options
21
+ const debug = options.debug; delete options.debug;
22
+ if (typeof debug === 'object' && !!debug) {
23
+ if (debug.headless !== undefined) { launchParams.headless = debug.headless; }
24
+ if (debug.devtools !== undefined) { launchParams.devtools = debug.devtools; }
25
+ }
26
+
27
+ // Configure additional launch arguments
28
+ const args = options.launchArgs; delete options.launchArgs;
29
+ if (Array.isArray(args)) {
30
+ launchParams.args = launchParams.args.concat(args);
31
+ }
32
+
33
+ // Set executable path if given
34
+ const executablePath = options.executablePath; delete options.executablePath;
35
+ if (executablePath) {
36
+ launchParams.executablePath = executablePath;
37
+ }
38
+
39
+ // Launch the browser and create a page
40
+ browser = await puppeteer.launch(launchParams);
41
+ const page = await browser.newPage();
42
+
43
+ // Basic auth
44
+ const username = options.username; delete options.username
45
+ const password = options.password; delete options.password
46
+ if (username !== undefined && password !== undefined) {
47
+ await page.authenticate({ username, password });
48
+ }
49
+
50
+ // Setting cookies
51
+ const cookies = options.cookies; delete options.cookies
52
+ if (Array.isArray(cookies)) {
53
+ await page.setCookie(...cookies);
54
+ }
55
+
56
+ // Set caching flag (if provided)
57
+ const cache = options.cache; delete options.cache;
58
+ if (cache !== undefined) {
59
+ await page.setCacheEnabled(cache);
60
+ }
61
+
62
+ // Setup timeout option (if provided)
63
+ let requestOptions = {};
64
+ const timeout = options.timeout; delete options.timeout;
65
+ if (timeout !== undefined) {
66
+ requestOptions.timeout = timeout;
67
+ }
68
+
69
+ // Setup viewport options (if provided)
70
+ const viewport = options.viewport; delete options.viewport;
71
+ if (viewport !== undefined) {
72
+ await page.setViewport(viewport);
73
+ }
74
+
75
+ const waitUntil = options.waitUntil; delete options.waitUntil;
76
+ if (urlOrHtml.match(/^http/i)) {
77
+ // Request is for a URL, so request it
78
+ requestOptions.waitUntil = waitUntil || 'networkidle2';
79
+ await page.goto(urlOrHtml, requestOptions);
80
+ } else {
81
+ // Request is some HTML content. Use request interception to assign the body
82
+ requestOptions.waitUntil = waitUntil || 'networkidle0';
83
+ await page.setRequestInterception(true);
84
+ page.once('request', request => {
85
+ request.respond({ body: urlOrHtml });
86
+ // Reset the request interception
87
+ // (we only want to intercept the first request - ie our HTML)
88
+ page.on('request', request => request.continue());
89
+ });
90
+ const displayUrl = options.displayUrl; delete options.displayUrl;
91
+ await page.goto(displayUrl || 'http://example.com', requestOptions);
92
+ }
93
+
94
+ // If specified, emulate the media type
95
+ const emulateMedia = options.emulateMedia; delete options.emulateMedia;
96
+ if (emulateMedia !== undefined) {
97
+ if (typeof page.emulateMediaType == 'function') {
98
+ await page.emulateMediaType(emulateMedia);
99
+ } else {
100
+ await page.emulateMedia(emulateMedia);
101
+ }
102
+ }
103
+
104
+ // If specified, evaluate script on the page
105
+ const executeScript = options.executeScript; delete options.executeScript;
106
+ if (executeScript !== undefined) {
107
+ await page.evaluate(executeScript);
108
+ }
109
+
110
+ // If we're running puppeteer in headless mode, return the converted PDF
111
+ if (debug === undefined || (typeof debug === 'object' && (debug.headless === undefined || debug.headless))) {
112
+ return await page[convertAction](options);
113
+ }
114
+ } finally {
115
+ if (browser) {
116
+ await browser.close();
117
+ }
118
+ }
119
+ });
120
+
121
+ function _handleError(error) {
122
+ if (error instanceof Error) {
123
+ process.stdout.write(
124
+ JSON.stringify(['err', error.toString().replace(new RegExp('^' + error.name + ': '), ''), error.name])
125
+ );
126
+ } else {
127
+ process.stdout.write(JSON.stringify(['err', error.toString()]));
128
+ }
129
+ process.stdout.write("\n");
130
+ }
131
+
132
+ // Interface for communicating between Ruby processor and Node processor
133
+ require('readline').createInterface({
134
+ input: process.stdin,
135
+ terminal: false,
136
+ }).on('line', function(line) {
137
+ try {
138
+ Promise.resolve(_processPage.apply(null, JSON.parse(line)))
139
+ .then(function (result) {
140
+ process.stdout.write(JSON.stringify(['ok', result]));
141
+ process.stdout.write("\n");
142
+ })
143
+ .catch(_handleError);
144
+ } catch(error) {
145
+ _handleError(error);
146
+ }
147
+ });
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'open3'
5
+
6
+ class Grover
7
+ #
8
+ # Processor helper class for calling out to Puppeteer NodeJS library
9
+ #
10
+ # Heavily based on the Schmooze library https://github.com/Shopify/schmooze
11
+ #
12
+ class Processor
13
+ def initialize(app_root)
14
+ @app_root = app_root
15
+ end
16
+
17
+ def convert(method, url_or_html, options)
18
+ spawn_process
19
+ ensure_packages_are_initiated
20
+ result = call_js_method method, url_or_html, options
21
+ return unless result
22
+
23
+ result['data'].pack('C*')
24
+ ensure
25
+ cleanup_process
26
+ end
27
+
28
+ private
29
+
30
+ attr_reader :app_root, :stdin, :stdout, :stderr, :wait_thr
31
+
32
+ def spawn_process
33
+ @stdin, @stdout, @stderr, @wait_thr = Open3.popen3(
34
+ 'node',
35
+ File.expand_path(File.join(__dir__, 'js/processor.js')),
36
+ chdir: app_root
37
+ )
38
+ end
39
+
40
+ def ensure_packages_are_initiated
41
+ input = stdout.gets
42
+ raise Grover::Error, "Failed to instantiate worker process:\n#{stderr.read}" if input.nil?
43
+
44
+ result = JSON.parse(input)
45
+ return if result[0] == 'ok'
46
+
47
+ cleanup_process
48
+ parse_package_error result[1]
49
+ end
50
+
51
+ def parse_package_error(error_message) # rubocop:disable Metrics/MethodLength
52
+ package_name = error_message[/^Error: Cannot find module '(.*)'$/, 1]
53
+ raise Grover::Error, error_message unless package_name
54
+
55
+ begin
56
+ %w[dependencies devDependencies].each do |key|
57
+ next unless package_json.key?(key) && package_json[key].key?(package_name)
58
+
59
+ raise Grover::DependencyError, Utils.squish(<<~ERROR)
60
+ Cannot find module '#{package_name}'.
61
+ The module was found in '#{package_json_path}' however, please run 'npm install' from '#{app_root}'
62
+ ERROR
63
+ end
64
+ rescue Errno::ENOENT # rubocop:disable Lint/SuppressedException
65
+ end
66
+ raise Grover::DependencyError, Utils.squish(<<~ERROR)
67
+ Cannot find module '#{package_name}'. You need to add it to '#{package_json_path}' and run 'npm install'
68
+ ERROR
69
+ end
70
+
71
+ def package_json_path
72
+ @package_json_path ||= File.join(app_root, 'package.json')
73
+ end
74
+
75
+ def package_json
76
+ @package_json ||= JSON.parse(File.read(package_json_path))
77
+ end
78
+
79
+ def call_js_method(method, url_or_html, options) # rubocop:disable Metrics/MethodLength
80
+ stdin.puts JSON.dump([method, url_or_html, options])
81
+ input = stdout.gets
82
+ raise Errno::EPIPE, "Can't read from worker" if input.nil?
83
+
84
+ status, message, error_class = JSON.parse(input)
85
+
86
+ if status == 'ok'
87
+ message
88
+ elsif error_class.nil?
89
+ raise Grover::JavaScript::UnknownError, message
90
+ else
91
+ raise Grover::JavaScript.const_get(error_class, false), message
92
+ end
93
+ rescue Errno::EPIPE, IOError
94
+ raise Grover::Error, "Worker process failed:\n#{stderr.read}"
95
+ end
96
+
97
+ def cleanup_process
98
+ stdin.close
99
+ stdout.close
100
+ stderr.close
101
+ wait_thr.join
102
+ end
103
+ end
104
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Grover
4
- VERSION = '0.11.4'
4
+ VERSION = '0.12.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grover
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.4
4
+ version: 0.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Bromwich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-24 00:00:00.000000000 Z
11
+ date: 2020-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: combine_pdf
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '1.0'
41
- - !ruby/object:Gem::Dependency
42
- name: schmooze
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: '0.2'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '0.2'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: mini_magick
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -181,10 +167,13 @@ files:
181
167
  - lib/active_support_ext/object/duplicable.rb
182
168
  - lib/grover.rb
183
169
  - lib/grover/configuration.rb
170
+ - lib/grover/errors.rb
184
171
  - lib/grover/html_preprocessor.rb
172
+ - lib/grover/js/processor.js
185
173
  - lib/grover/middleware.rb
186
174
  - lib/grover/options_builder.rb
187
175
  - lib/grover/options_fixer.rb
176
+ - lib/grover/processor.rb
188
177
  - lib/grover/utils.rb
189
178
  - lib/grover/version.rb
190
179
  homepage: https://github.com/Studiosity/grover