Dhalang 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -16
- data/lib/Dhalang/configuration.rb +2 -0
- data/lib/Dhalang/version.rb +1 -1
- data/lib/js/dhalang.js +13 -5
- data/lib/js/html-scraper.js +5 -2
- data/lib/js/pdf-generator.js +5 -2
- data/lib/js/screenshot-generator.js +5 -2
- data/package-lock.json +2 -2
- data/package.json +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 737f66adc02fc8014c5351e4b679a4d709931c43729fdba71c161e1b55a38551
|
4
|
+
data.tar.gz: a4c5721dda854821f3e705654c550a7edd80f2e51a0200ed64245899faa1e571
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eca69779f1f79d082dc78dfacb059b4078583aa9adb3e71082f08cee9eab63eb787773be4b93633944f229c302a6c812cbc909ac89df6608e1018466eeb427df
|
7
|
+
data.tar.gz: 3447cf8432a31e3f6e0e4d4edb9ed722c981bd16d659c542f1a82091cade222c65328b0764e73da540cb543188f413f4a82c57bcb851c7d500a2c76e1e7b7fe9
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Dhalang [](https://github.com/NielsSteensma/Dhalang/actions/workflows/build.yml)
|
1
|
+
# Dhalang [](https://github.com/NielsSteensma/Dhalang/actions/workflows/build.yml) [](https://badge.fury.io/rb/Dhalang)
|
2
2
|
|
3
3
|
> Dhalang is a Ruby wrapper for Google's Puppeteer.
|
4
4
|
|
@@ -11,7 +11,11 @@
|
|
11
11
|
* Scrape HTML from webpages
|
12
12
|
|
13
13
|
|
14
|
-
|
14
|
+
## Prerequisites
|
15
|
+
* Node ≥ 18
|
16
|
+
* Puppeteer ≥ 22
|
17
|
+
* Unix shell ( Dhalang will not work on Windows shells )
|
18
|
+
|
15
19
|
## Installation
|
16
20
|
Add this line to your application's Gemfile:
|
17
21
|
|
@@ -21,11 +25,12 @@ And then execute:
|
|
21
25
|
|
22
26
|
$ bundle update
|
23
27
|
|
24
|
-
Install puppeteer in your application's root directory:
|
28
|
+
Install puppeteer or puppeteer-core in your application's root directory:
|
25
29
|
|
26
|
-
$ npm install puppeteer
|
30
|
+
$ npm install puppeteer
|
31
|
+
or
|
32
|
+
$ npm install puppeteer-core
|
27
33
|
|
28
|
-
<sub>Dhalang and Puppeteer require Node ≥ 18 and Puppeteer ≥ 22</sub>
|
29
34
|
## Usage
|
30
35
|
__PDF of a website url__
|
31
36
|
```ruby
|
@@ -86,18 +91,10 @@ A list of all possible screenshot options that can be set, can be found at: http
|
|
86
91
|
>
|
87
92
|
> For example: `Dhalang::PDF.get_from_url("https://www.google.com", {headerTemplateFile: '/tmp/header.html', footerTemplateFile: '/tmp/footer.html'})`
|
88
93
|
|
89
|
-
|
90
|
-
## Custom user options
|
91
|
-
You may want to change the way Dhalang interacts with Puppeteer in general. User options can be set by providing them in a hash as last argument to any calls you make to the library. Are you setting both custom PDF and user options? Then they should be passed as a single hash.
|
92
|
-
|
93
|
-
For example to set a custom navigation timeout:
|
94
|
-
```ruby
|
95
|
-
Dhalang::Screenshot.get_from_url("https://www.google.com", :jpeg, {navigationTimeout: 20000})
|
96
|
-
```
|
97
|
-
|
98
|
-
Below table lists all possible configuration parameters that can be set:
|
94
|
+
Below table lists more configuration parameters that can be set:
|
99
95
|
| Key | Description | Default |
|
100
96
|
|--------------------|-----------------------------------------------------------------------------------------|---------------------------------|
|
97
|
+
| browserWebsocketUrl | Websocket url of remote chromium browser to use | None |
|
101
98
|
| navigationTimeout | Amount of milliseconds until Puppeteer while timeout when navigating to the given page | 10000 |
|
102
99
|
| printToPDFTimeout | Amount of milliseconds until Puppeteer while timeout when calling Page.printToPDF | 0 (unlimited) |
|
103
100
|
| navigationWaitForSelector | If set, Dhalang will wait for the specified selector to appear before creating the screenshot or PDF | None |
|
@@ -125,4 +122,4 @@ def example_controller_method
|
|
125
122
|
binary_png = Dhalang::Screenshot.get_from_url("https://www.google.com", :png)
|
126
123
|
send_data(binary_png, filename: 'screenshotofgoogle.png', type: 'image/png')
|
127
124
|
end
|
128
|
-
```
|
125
|
+
```
|
@@ -3,6 +3,7 @@ module Dhalang
|
|
3
3
|
class Configuration
|
4
4
|
NODE_MODULES_PATH = Dir.pwd + '/node_modules/'.freeze
|
5
5
|
USER_OPTIONS = {
|
6
|
+
browserWebsocketUrl: '',
|
6
7
|
navigationTimeout: 10000,
|
7
8
|
printToPDFTimeout: 0, # unlimited
|
8
9
|
navigationWaitUntil: 'load',
|
@@ -48,6 +49,7 @@ module Dhalang
|
|
48
49
|
private_constant :DEFAULT_JPEG_OPTIONS
|
49
50
|
|
50
51
|
private attr_accessor :page_url
|
52
|
+
private attr_accessor :browser_websocket_url
|
51
53
|
private attr_accessor :temp_file_path
|
52
54
|
private attr_accessor :temp_file_extension
|
53
55
|
private attr_accessor :user_options
|
data/lib/Dhalang/version.rb
CHANGED
data/lib/js/dhalang.js
CHANGED
@@ -14,6 +14,7 @@ const fs = require('fs')
|
|
14
14
|
|
15
15
|
/**
|
16
16
|
* @typedef {Object} UserOptions
|
17
|
+
* @property {string} browserWebsocketUrl - The websocket url of remote Chromium browser to use.
|
17
18
|
* @property {number} navigationTimeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout.
|
18
19
|
* @property {string} navigationWaitUntil - Determines when the navigation was finished, we wait here until the Window.load event is fired ( meaning all images, stylesheet, etc was loaded ).
|
19
20
|
* @property {string} navigationWaitForSelector - If set, specifies the selector Puppeteer should wait for to appear before continuing.
|
@@ -47,7 +48,7 @@ exports.getConfiguration = function () {
|
|
47
48
|
|
48
49
|
/**
|
49
50
|
* Launches Puppeteer and returns its instance.
|
50
|
-
* @param {
|
51
|
+
* @param {Configuration} configuration - The configuration to use.
|
51
52
|
* @returns {Promise<Object>}
|
52
53
|
* The launched instance of Puppeteer.
|
53
54
|
*/
|
@@ -55,10 +56,17 @@ exports.launchPuppeteer = async function (configuration) {
|
|
55
56
|
module.paths.push(configuration.puppeteerPath);
|
56
57
|
const puppeteer = require('puppeteer');
|
57
58
|
const launchArgs = ['--no-sandbox', '--disable-setuid-sandbox'].concat(configuration.userOptions.chromeOptions).filter((item, index, self) => self.indexOf(item) === index);
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
|
60
|
+
if (configuration.userOptions['browserWebsocketUrl'] !== "") {
|
61
|
+
return await puppeteer.connect( {
|
62
|
+
"browserWSEndpoint": configuration.userOptions.browserWebsocketUrl
|
63
|
+
})
|
64
|
+
} else {
|
65
|
+
return await puppeteer.launch({
|
66
|
+
args: launchArgs,
|
67
|
+
headless: configuration.userOptions.isHeadless
|
68
|
+
});
|
69
|
+
}
|
62
70
|
}
|
63
71
|
|
64
72
|
/**
|
data/lib/js/html-scraper.js
CHANGED
@@ -6,9 +6,10 @@ const scrapeHtml = async () => {
|
|
6
6
|
const configuration = dhalang.getConfiguration();
|
7
7
|
|
8
8
|
let browser;
|
9
|
+
let page;
|
9
10
|
try {
|
10
11
|
browser = await dhalang.launchPuppeteer(configuration);
|
11
|
-
|
12
|
+
page = await browser.newPage();
|
12
13
|
await dhalang.configure(page, configuration.userOptions);
|
13
14
|
await dhalang.navigate(page, configuration);
|
14
15
|
const html = await page.content();
|
@@ -17,8 +18,10 @@ const scrapeHtml = async () => {
|
|
17
18
|
console.error(error.message);
|
18
19
|
process.exit(1);
|
19
20
|
} finally {
|
20
|
-
if (browser) {
|
21
|
+
if (browser && configuration.userOptions['browserWebsocketUrl'] === "") {
|
21
22
|
browser.close();
|
23
|
+
} else {
|
24
|
+
page.close();
|
22
25
|
}
|
23
26
|
process.exit(0);
|
24
27
|
}
|
data/lib/js/pdf-generator.js
CHANGED
@@ -5,9 +5,10 @@ const createPdf = async () => {
|
|
5
5
|
const configuration = dhalang.getConfiguration();
|
6
6
|
|
7
7
|
let browser;
|
8
|
+
let page;
|
8
9
|
try {
|
9
10
|
browser = await dhalang.launchPuppeteer(configuration);
|
10
|
-
|
11
|
+
page = await browser.newPage();
|
11
12
|
await dhalang.configure(page, configuration.userOptions);
|
12
13
|
await dhalang.navigate(page, configuration);
|
13
14
|
const pdfOptions = await dhalang.getConfiguredPdfOptions(page, configuration);
|
@@ -21,8 +22,10 @@ const createPdf = async () => {
|
|
21
22
|
console.error(error.message);
|
22
23
|
process.exit(1);
|
23
24
|
} finally {
|
24
|
-
if (browser) {
|
25
|
+
if (browser && configuration.userOptions['browserWebsocketUrl'] === "") {
|
25
26
|
browser.close();
|
27
|
+
} else {
|
28
|
+
page.close();
|
26
29
|
}
|
27
30
|
process.exit();
|
28
31
|
}
|
@@ -5,9 +5,10 @@ const createScreenshot = async () => {
|
|
5
5
|
const configuration = dhalang.getConfiguration();
|
6
6
|
|
7
7
|
let browser;
|
8
|
+
let page;
|
8
9
|
try {
|
9
10
|
browser = await dhalang.launchPuppeteer(configuration);
|
10
|
-
|
11
|
+
page = await browser.newPage();
|
11
12
|
await dhalang.configure(page, configuration.userOptions);
|
12
13
|
await dhalang.navigate(page, configuration);
|
13
14
|
|
@@ -23,8 +24,10 @@ const createScreenshot = async () => {
|
|
23
24
|
console.error(error.message);
|
24
25
|
process.exit(1);
|
25
26
|
} finally {
|
26
|
-
if (browser) {
|
27
|
+
if (browser && configuration.userOptions['browserWebsocketUrl'] === "") {
|
27
28
|
browser.close();
|
29
|
+
} else {
|
30
|
+
page.close();
|
28
31
|
}
|
29
32
|
process.exit();
|
30
33
|
}
|
data/package-lock.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
{
|
2
2
|
"name": "dhalang",
|
3
|
-
"version": "0.7.
|
3
|
+
"version": "0.7.1",
|
4
4
|
"lockfileVersion": 3,
|
5
5
|
"requires": true,
|
6
6
|
"packages": {
|
7
7
|
"": {
|
8
8
|
"name": "dhalang",
|
9
|
-
"version": "0.7.
|
9
|
+
"version": "0.7.1",
|
10
10
|
"license": "MIT",
|
11
11
|
"dependencies": {
|
12
12
|
"puppeteer": "^22.5.0"
|
data/package.json
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: Dhalang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Niels Steensma
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|