Dhalang 0.6.6 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +18 -18
- data/README.md +29 -26
- data/lib/Dhalang/configuration.rb +89 -0
- data/lib/Dhalang/node_script_invoker.rb +29 -0
- data/lib/Dhalang/version.rb +1 -1
- data/lib/Dhalang.rb +3 -1
- data/lib/PDF.rb +4 -3
- data/lib/Scraper.rb +26 -0
- data/lib/Screenshot.rb +4 -3
- data/lib/js/dhalang.js +13 -5
- data/lib/js/html-scraper.js +29 -0
- data/lib/js/pdf-generator.js +5 -2
- data/lib/js/screenshot-generator.js +5 -2
- data/package-lock.json +2 -2
- data/package.json +1 -1
- metadata +11 -8
- data/lib/Dhalang/puppeteer.rb +0 -97
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 737f66adc02fc8014c5351e4b679a4d709931c43729fdba71c161e1b55a38551
|
4
|
+
data.tar.gz: a4c5721dda854821f3e705654c550a7edd80f2e51a0200ed64245899faa1e571
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eca69779f1f79d082dc78dfacb059b4078583aa9adb3e71082f08cee9eab63eb787773be4b93633944f229c302a6c812cbc909ac89df6608e1018466eeb427df
|
7
|
+
data.tar.gz: 3447cf8432a31e3f6e0e4d4edb9ed722c981bd16d659c542f1a82091cade222c65328b0764e73da540cb543188f413f4a82c57bcb851c7d500a2c76e1e7b7fe9
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
Dhalang (0.
|
4
|
+
Dhalang (0.7.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
Ascii85 (1.1.0)
|
10
10
|
afm (0.2.2)
|
11
|
-
|
12
|
-
|
11
|
+
bigdecimal (3.1.7)
|
12
|
+
diff-lcs (1.5.1)
|
13
|
+
fastimage (2.2.7)
|
13
14
|
hashery (2.1.2)
|
14
15
|
pdf-reader (2.9.2)
|
15
16
|
Ascii85 (~> 1.0)
|
@@ -18,26 +19,25 @@ GEM
|
|
18
19
|
ruby-rc4
|
19
20
|
ttfunk
|
20
21
|
rake (13.0.6)
|
21
|
-
rspec (3.
|
22
|
-
rspec-core (~> 3.
|
23
|
-
rspec-expectations (~> 3.
|
24
|
-
rspec-mocks (~> 3.
|
25
|
-
rspec-core (3.
|
26
|
-
rspec-support (~> 3.
|
27
|
-
rspec-expectations (3.
|
22
|
+
rspec (3.13.0)
|
23
|
+
rspec-core (~> 3.13.0)
|
24
|
+
rspec-expectations (~> 3.13.0)
|
25
|
+
rspec-mocks (~> 3.13.0)
|
26
|
+
rspec-core (3.13.0)
|
27
|
+
rspec-support (~> 3.13.0)
|
28
|
+
rspec-expectations (3.13.0)
|
28
29
|
diff-lcs (>= 1.2.0, < 2.0)
|
29
|
-
rspec-support (~> 3.
|
30
|
-
rspec-mocks (3.
|
30
|
+
rspec-support (~> 3.13.0)
|
31
|
+
rspec-mocks (3.13.0)
|
31
32
|
diff-lcs (>= 1.2.0, < 2.0)
|
32
|
-
rspec-support (~> 3.
|
33
|
-
rspec-support (3.
|
33
|
+
rspec-support (~> 3.13.0)
|
34
|
+
rspec-support (3.13.1)
|
34
35
|
ruby-rc4 (0.1.5)
|
35
|
-
ttfunk (1.
|
36
|
+
ttfunk (1.8.0)
|
37
|
+
bigdecimal (~> 3.1)
|
36
38
|
|
37
39
|
PLATFORMS
|
38
40
|
arm64-darwin-21
|
39
|
-
x86-mingw32
|
40
|
-
x86_64-darwin-20
|
41
41
|
x86_64-linux
|
42
42
|
|
43
43
|
DEPENDENCIES
|
@@ -49,4 +49,4 @@ DEPENDENCIES
|
|
49
49
|
rspec (~> 3.0)
|
50
50
|
|
51
51
|
BUNDLED WITH
|
52
|
-
2.3.
|
52
|
+
2.3.7
|
data/README.md
CHANGED
@@ -1,16 +1,21 @@
|
|
1
|
-
# Dhalang [](https://github.com/NielsSteensma/Dhalang/actions/workflows/build.yml)
|
1
|
+
# Dhalang [](https://github.com/NielsSteensma/Dhalang/actions/workflows/build.yml) [](https://badge.fury.io/rb/Dhalang)
|
2
2
|
|
3
3
|
> Dhalang is a Ruby wrapper for Google's Puppeteer.
|
4
4
|
|
5
5
|
|
6
6
|
|
7
7
|
## Features
|
8
|
-
* Generate PDFs from
|
9
|
-
* Generate PDFs from
|
10
|
-
* Capture
|
11
|
-
|
8
|
+
* Generate PDFs from webpages
|
9
|
+
* Generate PDFs from HTML ( external images/stylesheets supported )
|
10
|
+
* Capture screenshots from webpages
|
11
|
+
* Scrape HTML from webpages
|
12
12
|
|
13
13
|
|
14
|
+
## Prerequisites
|
15
|
+
* Node ≥ 18
|
16
|
+
* Puppeteer ≥ 22
|
17
|
+
* Unix shell ( Dhalang will not work on Windows shells )
|
18
|
+
|
14
19
|
## Installation
|
15
20
|
Add this line to your application's Gemfile:
|
16
21
|
|
@@ -20,43 +25,49 @@ And then execute:
|
|
20
25
|
|
21
26
|
$ bundle update
|
22
27
|
|
23
|
-
Install puppeteer in your application's root directory:
|
28
|
+
Install puppeteer or puppeteer-core in your application's root directory:
|
24
29
|
|
25
|
-
$ npm install puppeteer
|
30
|
+
$ npm install puppeteer
|
31
|
+
or
|
32
|
+
$ npm install puppeteer-core
|
26
33
|
|
27
|
-
<sub>Dhalang and Puppeteer require Node ≥ 18 and Puppeteer ≥ 22</sub>
|
28
34
|
## Usage
|
29
|
-
|
35
|
+
__PDF of a website url__
|
30
36
|
```ruby
|
31
37
|
Dhalang::PDF.get_from_url("https://www.google.com")
|
32
38
|
```
|
33
39
|
It is important to pass the complete url, leaving out https://, http:// or www. will result in an error.
|
34
40
|
|
35
|
-
|
41
|
+
__PDF of a HTML string__
|
36
42
|
```ruby
|
37
43
|
Dhalang::PDF.get_from_html("<html><head></head><body><h1>examplestring</h1></body></html>")
|
38
44
|
```
|
39
45
|
|
40
|
-
|
46
|
+
__PNG screenshot of a website__
|
41
47
|
```ruby
|
42
48
|
Dhalang::Screenshot.get_from_url("https://www.google.com", :png)
|
43
49
|
```
|
44
50
|
|
45
|
-
|
51
|
+
__JPEG screenshot of a website__
|
46
52
|
```ruby
|
47
53
|
Dhalang::Screenshot.get_from_url("https://www.google.com", :jpeg)
|
48
54
|
```
|
49
55
|
|
50
|
-
|
56
|
+
__WEBP screenshot of a website__
|
51
57
|
```ruby
|
52
58
|
Dhalang::Screenshot.get_from_url("https://www.google.com", :webp)
|
53
59
|
```
|
54
60
|
|
55
|
-
|
61
|
+
__HTML of a website__
|
62
|
+
```ruby
|
63
|
+
Dhalang::Scraper.html("https://www.google.com")
|
64
|
+
```
|
65
|
+
|
66
|
+
Above methods either return a string containing the PDF/JPEG/PNG/WEBP in binary or the scraped HTML.
|
56
67
|
|
57
68
|
|
58
69
|
|
59
|
-
## Custom
|
70
|
+
## Custom options
|
60
71
|
To override the default options that are set by Dhalang you can pass as last argument a hash with the custom options you want to set.
|
61
72
|
|
62
73
|
For example to set custom margins for PDFs:
|
@@ -80,18 +91,10 @@ A list of all possible screenshot options that can be set, can be found at: http
|
|
80
91
|
>
|
81
92
|
> For example: `Dhalang::PDF.get_from_url("https://www.google.com", {headerTemplateFile: '/tmp/header.html', footerTemplateFile: '/tmp/footer.html'})`
|
82
93
|
|
83
|
-
|
84
|
-
## Custom user options
|
85
|
-
You may want to change the way Dhalang interacts with Puppeteer in general. User options can be set by providing them in a hash as last argument to any calls you make to the library. Are you setting both custom PDF and user options? Then they should be passed as a single hash.
|
86
|
-
|
87
|
-
For example to set a custom navigation timeout:
|
88
|
-
```ruby
|
89
|
-
Dhalang::Screenshot.get_from_url("https://www.google.com", :jpeg, {navigationTimeout: 20000})
|
90
|
-
```
|
91
|
-
|
92
|
-
Below table lists all possible configuration parameters that can be set:
|
94
|
+
Below table lists more configuration parameters that can be set:
|
93
95
|
| Key | Description | Default |
|
94
96
|
|--------------------|-----------------------------------------------------------------------------------------|---------------------------------|
|
97
|
+
| browserWebsocketUrl | Websocket url of remote chromium browser to use | None |
|
95
98
|
| navigationTimeout | Amount of milliseconds until Puppeteer while timeout when navigating to the given page | 10000 |
|
96
99
|
| printToPDFTimeout | Amount of milliseconds until Puppeteer while timeout when calling Page.printToPDF | 0 (unlimited) |
|
97
100
|
| navigationWaitForSelector | If set, Dhalang will wait for the specified selector to appear before creating the screenshot or PDF | None |
|
@@ -119,4 +122,4 @@ def example_controller_method
|
|
119
122
|
binary_png = Dhalang::Screenshot.get_from_url("https://www.google.com", :png)
|
120
123
|
send_data(binary_png, filename: 'screenshotofgoogle.png', type: 'image/png')
|
121
124
|
end
|
122
|
-
```
|
125
|
+
```
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module Dhalang
|
2
|
+
# Groups Puppeteer and Dhalang configuration.
|
3
|
+
class Configuration
|
4
|
+
NODE_MODULES_PATH = Dir.pwd + '/node_modules/'.freeze
|
5
|
+
USER_OPTIONS = {
|
6
|
+
browserWebsocketUrl: '',
|
7
|
+
navigationTimeout: 10000,
|
8
|
+
printToPDFTimeout: 0, # unlimited
|
9
|
+
navigationWaitUntil: 'load',
|
10
|
+
navigationWaitForSelector: '',
|
11
|
+
navigationWaitForXPath: '',
|
12
|
+
userAgent: '',
|
13
|
+
isHeadless: true,
|
14
|
+
viewPort: '',
|
15
|
+
httpAuthenticationCredentials: '',
|
16
|
+
isAutoHeight: false,
|
17
|
+
chromeOptions: []
|
18
|
+
}.freeze
|
19
|
+
DEFAULT_PDF_OPTIONS = {
|
20
|
+
scale: 1,
|
21
|
+
displayHeaderFooter: false,
|
22
|
+
headerTemplate: '',
|
23
|
+
footerTemplate: '',
|
24
|
+
headerTemplateFile: '',
|
25
|
+
footerTemplateFile: '',
|
26
|
+
printBackground: true,
|
27
|
+
landscape: false,
|
28
|
+
pageRanges: '',
|
29
|
+
format: 'A4',
|
30
|
+
width: '',
|
31
|
+
height: '',
|
32
|
+
margin: { top: 36, right: 36, bottom: 20, left: 36 },
|
33
|
+
preferCSSPageSize: true,
|
34
|
+
omitBackground: false
|
35
|
+
}.freeze
|
36
|
+
DEFAULT_SCREENSHOT_OPTIONS = {
|
37
|
+
fullPage: true,
|
38
|
+
clip: nil,
|
39
|
+
omitBackground: false
|
40
|
+
}.freeze
|
41
|
+
DEFAULT_JPEG_OPTIONS = {
|
42
|
+
quality: 100
|
43
|
+
}.freeze
|
44
|
+
|
45
|
+
private_constant :NODE_MODULES_PATH
|
46
|
+
private_constant :USER_OPTIONS
|
47
|
+
private_constant :DEFAULT_PDF_OPTIONS
|
48
|
+
private_constant :DEFAULT_SCREENSHOT_OPTIONS
|
49
|
+
private_constant :DEFAULT_JPEG_OPTIONS
|
50
|
+
|
51
|
+
private attr_accessor :page_url
|
52
|
+
private attr_accessor :browser_websocket_url
|
53
|
+
private attr_accessor :temp_file_path
|
54
|
+
private attr_accessor :temp_file_extension
|
55
|
+
private attr_accessor :user_options
|
56
|
+
private attr_accessor :pdf_options
|
57
|
+
private attr_accessor :screenshot_options
|
58
|
+
private attr_accessor :jpeg_options
|
59
|
+
|
60
|
+
# @param [Hash] custom_options Changes that override default.
|
61
|
+
# @param [String] page_url Url for Puppeteer to visit.
|
62
|
+
# @param [String] temp_file_path Absolute path of temp file to use for writing script results.
|
63
|
+
# Can be nil for scripts using stdout.
|
64
|
+
# @param [String] temp_file_extension Extension of temp file. Can be nil for scripts using stdout.
|
65
|
+
def initialize(custom_options, page_url, temp_file_path = nil, temp_file_extension = nil)
|
66
|
+
self.page_url = page_url
|
67
|
+
self.temp_file_path = temp_file_path
|
68
|
+
self.temp_file_extension = temp_file_extension
|
69
|
+
self.user_options = USER_OPTIONS.map { |key, default_value| [key, custom_options.fetch(key, default_value)] }
|
70
|
+
self.pdf_options = DEFAULT_PDF_OPTIONS.map { |key, default_value| [key, custom_options.fetch(key, default_value)] }
|
71
|
+
self.screenshot_options = DEFAULT_SCREENSHOT_OPTIONS.map { |key, default_value| [key, custom_options.fetch(key, default_value)] }
|
72
|
+
self.jpeg_options = DEFAULT_JPEG_OPTIONS.map { |key, default_value| [key, custom_options.fetch(key, default_value)] }
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns configuration as JSON string.
|
76
|
+
def json
|
77
|
+
return {
|
78
|
+
webPageUrl: page_url,
|
79
|
+
tempFilePath: temp_file_path,
|
80
|
+
puppeteerPath: NODE_MODULES_PATH,
|
81
|
+
imageType: temp_file_extension,
|
82
|
+
userOptions: user_options.to_h,
|
83
|
+
pdfOptions: pdf_options.to_h,
|
84
|
+
screenshotOptions: screenshot_options.to_h,
|
85
|
+
jpegOptions: jpeg_options.to_h
|
86
|
+
}.to_json
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Dhalang
|
2
|
+
class NodeScriptInvoker
|
3
|
+
|
4
|
+
# Executes JS script under given script_path by launching a new Node process.
|
5
|
+
#
|
6
|
+
# @param [String] script_path Absolute path of JS script to execute.
|
7
|
+
# @param [Configuration] configuration Configuration to use.
|
8
|
+
def self.execute_script(script_path, configuration)
|
9
|
+
command = create_node_command(script_path, configuration)
|
10
|
+
Open3.popen2e(command) do |_stdin, stdouterr, wait|
|
11
|
+
return nil if wait.value.success?
|
12
|
+
|
13
|
+
output = stdouterr.read.strip
|
14
|
+
output = nil if output == ''
|
15
|
+
message = output || "Exited with status #{wait.value.exitstatus}"
|
16
|
+
raise DhalangError, message
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
# Returns a [String] with node command that invokes the provided script with the configuration.
|
22
|
+
#
|
23
|
+
# @param [String] script_path Absolute path of JS script to invoke.
|
24
|
+
# @param [Configuration] configuration Configuration to use.
|
25
|
+
private_class_method def self.create_node_command(script_path, configuration)
|
26
|
+
"node #{script_path} #{Shellwords.escape(configuration.json)}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/Dhalang/version.rb
CHANGED
data/lib/Dhalang.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
module Dhalang
|
2
2
|
require_relative 'PDF'
|
3
3
|
require_relative 'Screenshot'
|
4
|
+
require_relative 'Scraper'
|
4
5
|
require_relative 'Dhalang/version'
|
5
6
|
require_relative 'Dhalang/url_utils'
|
6
7
|
require_relative 'Dhalang/file_utils'
|
7
8
|
require_relative 'Dhalang/error'
|
8
|
-
require_relative 'Dhalang/
|
9
|
+
require_relative 'Dhalang/configuration'
|
10
|
+
require_relative 'Dhalang/node_script_invoker'
|
9
11
|
require 'uri'
|
10
12
|
require 'tempfile'
|
11
13
|
require 'shellwords'
|
data/lib/PDF.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
module Dhalang
|
2
2
|
# Allows consumers of this library to create PDFs with Puppeteer.
|
3
3
|
class PDF
|
4
|
-
|
5
|
-
private_constant :
|
4
|
+
SCRIPT_PATH = File.expand_path('../js/pdf-generator.js', __FILE__).freeze
|
5
|
+
private_constant :SCRIPT_PATH
|
6
6
|
|
7
7
|
# Captures the full webpage under the given url as PDF.
|
8
8
|
#
|
@@ -43,7 +43,8 @@ module Dhalang
|
|
43
43
|
private_class_method def self.get(url, options)
|
44
44
|
temp_file = FileUtils.create_temp_file("pdf")
|
45
45
|
begin
|
46
|
-
|
46
|
+
configuration = Configuration.new(options, url, temp_file.path, "pdf")
|
47
|
+
NodeScriptInvoker.execute_script(SCRIPT_PATH, configuration)
|
47
48
|
binary_pdf_content = FileUtils.read_binary(temp_file.path)
|
48
49
|
ensure
|
49
50
|
FileUtils.delete(temp_file)
|
data/lib/Scraper.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
module Dhalang
|
2
|
+
# Provides functionality for scraping webpages.
|
3
|
+
class Scraper
|
4
|
+
SCRIPT_PATH = File.expand_path('../js/html-scraper.js', __FILE__).freeze
|
5
|
+
private_constant :SCRIPT_PATH
|
6
|
+
|
7
|
+
# Scrapes full HTML content under given url.
|
8
|
+
#
|
9
|
+
# @param [String] url Url to scrape.
|
10
|
+
# @param [Hash] options User configurable options.
|
11
|
+
#
|
12
|
+
# @return [String] Scraped HTML content.
|
13
|
+
def self.html(url, options = {})
|
14
|
+
UrlUtils.validate(url)
|
15
|
+
temp_file = FileUtils.create_temp_file("html")
|
16
|
+
begin
|
17
|
+
configuration = Configuration.new(options, url, temp_file.path, "html")
|
18
|
+
NodeScriptInvoker.execute_script(SCRIPT_PATH, configuration)
|
19
|
+
html = IO.read(temp_file.path)
|
20
|
+
ensure
|
21
|
+
FileUtils.delete(temp_file)
|
22
|
+
end
|
23
|
+
return html
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/Screenshot.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module Dhalang
|
2
2
|
# Allows consumers of this library to take screenshots with Puppeteer.
|
3
3
|
class Screenshot
|
4
|
-
|
4
|
+
SCRIPT_PATH = File.expand_path('../js/screenshot-generator.js', __FILE__).freeze
|
5
5
|
IMAGE_TYPES = [:jpeg, :png, :webp].freeze
|
6
|
-
private_constant :
|
6
|
+
private_constant :SCRIPT_PATH
|
7
7
|
private_constant :IMAGE_TYPES
|
8
8
|
|
9
9
|
# <b>DEPRECATED:</b> Please use `get_from_url(url, :jpeg)` instead.
|
@@ -44,7 +44,8 @@ module Dhalang
|
|
44
44
|
|
45
45
|
temp_file = FileUtils.create_temp_file(image_type)
|
46
46
|
begin
|
47
|
-
|
47
|
+
configuration = Configuration.new(options, url, temp_file.path, image_type)
|
48
|
+
NodeScriptInvoker.execute_script(SCRIPT_PATH, configuration)
|
48
49
|
binary_image_content = FileUtils.read_binary(temp_file.path)
|
49
50
|
ensure
|
50
51
|
FileUtils.delete(temp_file)
|
data/lib/js/dhalang.js
CHANGED
@@ -14,6 +14,7 @@ const fs = require('fs')
|
|
14
14
|
|
15
15
|
/**
|
16
16
|
* @typedef {Object} UserOptions
|
17
|
+
* @property {string} browserWebsocketUrl - The websocket url of remote Chromium browser to use.
|
17
18
|
* @property {number} navigationTimeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout.
|
18
19
|
* @property {string} navigationWaitUntil - Determines when the navigation was finished, we wait here until the Window.load event is fired ( meaning all images, stylesheet, etc was loaded ).
|
19
20
|
* @property {string} navigationWaitForSelector - If set, specifies the selector Puppeteer should wait for to appear before continuing.
|
@@ -47,7 +48,7 @@ exports.getConfiguration = function () {
|
|
47
48
|
|
48
49
|
/**
|
49
50
|
* Launches Puppeteer and returns its instance.
|
50
|
-
* @param {
|
51
|
+
* @param {Configuration} configuration - The configuration to use.
|
51
52
|
* @returns {Promise<Object>}
|
52
53
|
* The launched instance of Puppeteer.
|
53
54
|
*/
|
@@ -55,10 +56,17 @@ exports.launchPuppeteer = async function (configuration) {
|
|
55
56
|
module.paths.push(configuration.puppeteerPath);
|
56
57
|
const puppeteer = require('puppeteer');
|
57
58
|
const launchArgs = ['--no-sandbox', '--disable-setuid-sandbox'].concat(configuration.userOptions.chromeOptions).filter((item, index, self) => self.indexOf(item) === index);
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
|
60
|
+
if (configuration.userOptions['browserWebsocketUrl'] !== "") {
|
61
|
+
return await puppeteer.connect( {
|
62
|
+
"browserWSEndpoint": configuration.userOptions.browserWebsocketUrl
|
63
|
+
})
|
64
|
+
} else {
|
65
|
+
return await puppeteer.launch({
|
66
|
+
args: launchArgs,
|
67
|
+
headless: configuration.userOptions.isHeadless
|
68
|
+
});
|
69
|
+
}
|
62
70
|
}
|
63
71
|
|
64
72
|
/**
|
@@ -0,0 +1,29 @@
|
|
1
|
+
'use strict';
|
2
|
+
const dhalang = require('./dhalang');
|
3
|
+
const fs = require('node:fs');
|
4
|
+
|
5
|
+
const scrapeHtml = async () => {
|
6
|
+
const configuration = dhalang.getConfiguration();
|
7
|
+
|
8
|
+
let browser;
|
9
|
+
let page;
|
10
|
+
try {
|
11
|
+
browser = await dhalang.launchPuppeteer(configuration);
|
12
|
+
page = await browser.newPage();
|
13
|
+
await dhalang.configure(page, configuration.userOptions);
|
14
|
+
await dhalang.navigate(page, configuration);
|
15
|
+
const html = await page.content();
|
16
|
+
fs.writeFileSync(configuration.tempFilePath, html);
|
17
|
+
} catch (error) {
|
18
|
+
console.error(error.message);
|
19
|
+
process.exit(1);
|
20
|
+
} finally {
|
21
|
+
if (browser && configuration.userOptions['browserWebsocketUrl'] === "") {
|
22
|
+
browser.close();
|
23
|
+
} else {
|
24
|
+
page.close();
|
25
|
+
}
|
26
|
+
process.exit(0);
|
27
|
+
}
|
28
|
+
};
|
29
|
+
scrapeHtml();
|
data/lib/js/pdf-generator.js
CHANGED
@@ -5,9 +5,10 @@ const createPdf = async () => {
|
|
5
5
|
const configuration = dhalang.getConfiguration();
|
6
6
|
|
7
7
|
let browser;
|
8
|
+
let page;
|
8
9
|
try {
|
9
10
|
browser = await dhalang.launchPuppeteer(configuration);
|
10
|
-
|
11
|
+
page = await browser.newPage();
|
11
12
|
await dhalang.configure(page, configuration.userOptions);
|
12
13
|
await dhalang.navigate(page, configuration);
|
13
14
|
const pdfOptions = await dhalang.getConfiguredPdfOptions(page, configuration);
|
@@ -21,8 +22,10 @@ const createPdf = async () => {
|
|
21
22
|
console.error(error.message);
|
22
23
|
process.exit(1);
|
23
24
|
} finally {
|
24
|
-
if (browser) {
|
25
|
+
if (browser && configuration.userOptions['browserWebsocketUrl'] === "") {
|
25
26
|
browser.close();
|
27
|
+
} else {
|
28
|
+
page.close();
|
26
29
|
}
|
27
30
|
process.exit();
|
28
31
|
}
|
@@ -5,9 +5,10 @@ const createScreenshot = async () => {
|
|
5
5
|
const configuration = dhalang.getConfiguration();
|
6
6
|
|
7
7
|
let browser;
|
8
|
+
let page;
|
8
9
|
try {
|
9
10
|
browser = await dhalang.launchPuppeteer(configuration);
|
10
|
-
|
11
|
+
page = await browser.newPage();
|
11
12
|
await dhalang.configure(page, configuration.userOptions);
|
12
13
|
await dhalang.navigate(page, configuration);
|
13
14
|
|
@@ -23,8 +24,10 @@ const createScreenshot = async () => {
|
|
23
24
|
console.error(error.message);
|
24
25
|
process.exit(1);
|
25
26
|
} finally {
|
26
|
-
if (browser) {
|
27
|
+
if (browser && configuration.userOptions['browserWebsocketUrl'] === "") {
|
27
28
|
browser.close();
|
29
|
+
} else {
|
30
|
+
page.close();
|
28
31
|
}
|
29
32
|
process.exit();
|
30
33
|
}
|
data/package-lock.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
{
|
2
2
|
"name": "dhalang",
|
3
|
-
"version": "0.
|
3
|
+
"version": "0.7.1",
|
4
4
|
"lockfileVersion": 3,
|
5
5
|
"requires": true,
|
6
6
|
"packages": {
|
7
7
|
"": {
|
8
8
|
"name": "dhalang",
|
9
|
-
"version": "0.
|
9
|
+
"version": "0.7.1",
|
10
10
|
"license": "MIT",
|
11
11
|
"dependencies": {
|
12
12
|
"puppeteer": "^22.5.0"
|
data/package.json
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: Dhalang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Niels Steensma
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,7 +80,7 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '3.0'
|
83
|
-
description:
|
83
|
+
description:
|
84
84
|
email:
|
85
85
|
- nielssteensma@yahoo.nl
|
86
86
|
executables: []
|
@@ -97,14 +97,17 @@ files:
|
|
97
97
|
- README.md
|
98
98
|
- Rakefile
|
99
99
|
- lib/Dhalang.rb
|
100
|
+
- lib/Dhalang/configuration.rb
|
100
101
|
- lib/Dhalang/error.rb
|
101
102
|
- lib/Dhalang/file_utils.rb
|
102
|
-
- lib/Dhalang/
|
103
|
+
- lib/Dhalang/node_script_invoker.rb
|
103
104
|
- lib/Dhalang/url_utils.rb
|
104
105
|
- lib/Dhalang/version.rb
|
105
106
|
- lib/PDF.rb
|
107
|
+
- lib/Scraper.rb
|
106
108
|
- lib/Screenshot.rb
|
107
109
|
- lib/js/dhalang.js
|
110
|
+
- lib/js/html-scraper.js
|
108
111
|
- lib/js/pdf-generator.js
|
109
112
|
- lib/js/screenshot-generator.js
|
110
113
|
- package-lock.json
|
@@ -113,7 +116,7 @@ homepage: https://github.com/NielsSteensma/Dhalang
|
|
113
116
|
licenses:
|
114
117
|
- MIT
|
115
118
|
metadata: {}
|
116
|
-
post_install_message:
|
119
|
+
post_install_message:
|
117
120
|
rdoc_options: []
|
118
121
|
require_paths:
|
119
122
|
- lib
|
@@ -128,8 +131,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
131
|
- !ruby/object:Gem::Version
|
129
132
|
version: '0'
|
130
133
|
requirements: []
|
131
|
-
rubygems_version: 3.3.
|
132
|
-
signing_key:
|
134
|
+
rubygems_version: 3.3.7
|
135
|
+
signing_key:
|
133
136
|
specification_version: 4
|
134
137
|
summary: Ruby wrapper for Puppeteer. Generate screenshots and PDF's from HTML!
|
135
138
|
test_files: []
|
data/lib/Dhalang/puppeteer.rb
DELETED
@@ -1,97 +0,0 @@
|
|
1
|
-
module Dhalang
|
2
|
-
# Contains common logic for interacting with Puppeteer.
|
3
|
-
class Puppeteer
|
4
|
-
NODE_MODULES_PATH = Dir.pwd + '/node_modules/'.freeze
|
5
|
-
private_constant :NODE_MODULES_PATH
|
6
|
-
|
7
|
-
USER_OPTIONS = {
|
8
|
-
navigationTimeout: 10000,
|
9
|
-
printToPDFTimeout: 0, # unlimited
|
10
|
-
navigationWaitUntil: 'load',
|
11
|
-
navigationWaitForSelector: '',
|
12
|
-
navigationWaitForXPath: '',
|
13
|
-
userAgent: '',
|
14
|
-
isHeadless: true,
|
15
|
-
viewPort: '',
|
16
|
-
httpAuthenticationCredentials: '',
|
17
|
-
isAutoHeight: false,
|
18
|
-
chromeOptions: []
|
19
|
-
}
|
20
|
-
private_constant :USER_OPTIONS
|
21
|
-
|
22
|
-
DEFAULT_PDF_OPTIONS = {
|
23
|
-
scale: 1,
|
24
|
-
displayHeaderFooter: false,
|
25
|
-
headerTemplate: '',
|
26
|
-
footerTemplate: '',
|
27
|
-
headerTemplateFile: '',
|
28
|
-
footerTemplateFile: '',
|
29
|
-
printBackground: true,
|
30
|
-
landscape: false,
|
31
|
-
pageRanges: '',
|
32
|
-
format: 'A4',
|
33
|
-
width: '',
|
34
|
-
height: '',
|
35
|
-
margin: { top: 36, right: 36, bottom: 20, left: 36 },
|
36
|
-
preferCSSPageSize: true,
|
37
|
-
omitBackground: false
|
38
|
-
}
|
39
|
-
private_constant :DEFAULT_PDF_OPTIONS
|
40
|
-
|
41
|
-
DEFAULT_SCREENSHOT_OPTIONS = {
|
42
|
-
fullPage: true,
|
43
|
-
clip: nil,
|
44
|
-
omitBackground: false
|
45
|
-
}
|
46
|
-
private_constant :DEFAULT_SCREENSHOT_OPTIONS
|
47
|
-
|
48
|
-
DEFAULT_JPEG_OPTIONS = {
|
49
|
-
quality: 100
|
50
|
-
}
|
51
|
-
private_constant :DEFAULT_JPEG_OPTIONS
|
52
|
-
|
53
|
-
|
54
|
-
# Launches a new Node process, executing the (Puppeteer) script under the given script_path.
|
55
|
-
#
|
56
|
-
# @param [String] page_url The url to pass to the goTo method of Puppeteer.
|
57
|
-
# @param [String] script_path The absolute path of the JS script to execute.
|
58
|
-
# @param [String] temp_file_path The absolute path of the temp file to use to write any actions from Puppeteer.
|
59
|
-
# @param [String] temp_file_extension The extension of the temp file.
|
60
|
-
# @param [Object] options Set of options to use, configurable by the user.
|
61
|
-
def self.visit(page_url, script_path, temp_file_path, temp_file_extension, options)
|
62
|
-
configuration = create_configuration(page_url, script_path, temp_file_path, temp_file_extension, options)
|
63
|
-
|
64
|
-
command = "node #{script_path} #{Shellwords.escape(configuration)}"
|
65
|
-
|
66
|
-
Open3.popen2e(command) do |_stdin, stdouterr, wait|
|
67
|
-
return nil if wait.value.success?
|
68
|
-
|
69
|
-
output = stdouterr.read.strip
|
70
|
-
output = nil if output == ''
|
71
|
-
message = output || "Exited with status #{wait.value.exitstatus}"
|
72
|
-
raise DhalangError, message
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
|
77
|
-
# Returns a JSON string with the configuration to use within the Puppeteer script.
|
78
|
-
#
|
79
|
-
# @param [String] page_url The url to pass to the goTo method of Puppeteer.
|
80
|
-
# @param [String] script_path The absolute path of the JS script to execute.
|
81
|
-
# @param [String] temp_file_path The absolute path of the temp file to use to write any actions from Puppeteer.
|
82
|
-
# @param [String] temp_file_extension The extension of the temp file.
|
83
|
-
# @param [Hash] options Set of options to use, configurable by the user.
|
84
|
-
private_class_method def self.create_configuration(page_url, script_path, temp_file_path, temp_file_extension, options)
|
85
|
-
{
|
86
|
-
webPageUrl: page_url,
|
87
|
-
tempFilePath: temp_file_path,
|
88
|
-
puppeteerPath: NODE_MODULES_PATH,
|
89
|
-
imageType: temp_file_extension,
|
90
|
-
userOptions: USER_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value]}.to_h,
|
91
|
-
pdfOptions: DEFAULT_PDF_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }.to_h,
|
92
|
-
screenshotOptions: DEFAULT_SCREENSHOT_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }.to_h,
|
93
|
-
jpegOptions: DEFAULT_JPEG_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }.to_h
|
94
|
-
}.to_json
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|