html-get 2.9.14 → 2.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -38,21 +38,37 @@ $ npm install puppeteer html-get --save
38
38
  ## Usage
39
39
 
40
40
  ```js
41
- 'use strict'
42
-
41
+ const createBrowserless = require('browserless')
43
42
  const getHTML = require('html-get')
44
43
 
45
- getHTML('https://example.com').then(
46
- ({ url, html, stats, headers, statusCode }) =>
47
- console.log(`
48
- url: ${url}
49
- html: ${Buffer.from(html).byteLength} bytes (HTTP ${statusCode})
50
- time: ${stats.timing} (${stats.mode})
51
- headers: ${Object.keys(headers).reduce(
52
- (acc, key) => `${acc}${key}=${headers[key]} `,
53
- ''
54
- )}
55
- `))
44
+ // Spawn Chromium process once
45
+ const browserlessFactory = createBrowserless()
46
+
47
+ // Kill the process when Node.js exit
48
+ process.on('exit', () => {
49
+ console.log('closing resources!')
50
+ browserlessFactory.close()
51
+ })
52
+
53
+ const getContent = async url => {
54
+ // create a browser context inside Chromium process
55
+ const browserContext = browserlessFactory.createContext()
56
+ const getBrowserless = () => browserContext
57
+ const result = await getHTML(url, { getBrowserless })
58
+ // close the browser context after it's used
59
+ await getBrowserless((browser) => browser.destroyContext())
60
+ return result
61
+ }
62
+
63
+ getContent('https://example.com')
64
+ .then(content => {
65
+ console.log(content)
66
+ process.exit()
67
+ })
68
+ .catch(error => {
69
+ console.error(error)
70
+ process.exit(1)
71
+ })
56
72
  ```
57
73
 
58
74
  ### Command Line
package/bin/index.js CHANGED
@@ -2,18 +2,24 @@
2
2
 
3
3
  'use strict'
4
4
 
5
+ const createBrowserless = require('browserless')
5
6
  const minimist = require('minimist')
6
7
  const { URL } = require('url')
7
8
 
8
9
  const getHTML = require('..')
9
10
 
11
+ const browserlessFactory = createBrowserless()
12
+
10
13
  const [input, ...argv] = process.argv.slice(2)
11
14
  const url = new URL(input).toString()
12
15
 
13
16
  const { debug: isDebug, ...args } = minimist(argv)
14
17
 
15
- getHTML(url, args)
16
- .then(({ html, stats, headers, statusCode }) => {
18
+ const browserContext = browserlessFactory.createContext()
19
+ const getBrowserless = () => browserContext
20
+
21
+ getHTML(url, { getBrowserless, ...args })
22
+ .then(async ({ html, stats, headers, statusCode }) => {
17
23
  if (isDebug) {
18
24
  console.log(`
19
25
  url: ${url}
@@ -31,10 +37,13 @@ getHTML(url, args)
31
37
  } else {
32
38
  console.log(html)
33
39
  }
34
-
35
40
  process.exit(0)
36
41
  })
37
- .catch(err => {
42
+ .catch(async err => {
38
43
  console.error(err)
39
44
  process.exit(1)
40
45
  })
46
+ .finally(async () => {
47
+ await getBrowserless(browser => browser.destroyContext())
48
+ browserlessFactory.close()
49
+ })
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "html-get",
3
3
  "description": "Get the HTML from any website, using prerendering when is necessary.",
4
4
  "homepage": "https://nicedoc.com/microlinkhq/html-get",
5
- "version": "2.9.14",
5
+ "version": "2.9.15",
6
6
  "main": "src/index.js",
7
7
  "bin": {
8
8
  "html-get": "bin/index.js"
@@ -1,37 +1,37 @@
1
1
  [
2
2
  "youtube",
3
3
  "google",
4
- "microsoft",
5
4
  "apple",
6
- "wikipedia",
5
+ "microsoft",
7
6
  "wordpress",
7
+ "wikipedia",
8
8
  "blogspot",
9
- "vimeo",
10
9
  "github",
11
- "nytimes",
10
+ "vimeo",
12
11
  "slideshare",
13
- "bbc",
14
12
  "imdb",
13
+ "bbc",
15
14
  "theguardian",
15
+ "nytimes",
16
+ "huffingtonpost",
16
17
  "telegraph",
17
18
  "pinterest",
18
- "huffingtonpost",
19
- "spotify",
19
+ "yelp",
20
20
  "eventbrite",
21
+ "engadget",
21
22
  "zoom",
22
23
  "techcrunch",
23
- "yelp",
24
- "soundcloud",
25
- "engadget",
26
24
  "theverge",
25
+ "spotify",
26
+ "soundcloud",
27
+ "etsy",
27
28
  "flickr",
28
29
  "stackoverflow",
29
- "giphy",
30
- "imgur",
31
30
  "csdn",
32
31
  "digg",
33
- "etsy",
34
32
  "ghost",
33
+ "giphy",
34
+ "imgur",
35
35
  "meetup",
36
36
  "producthunt",
37
37
  "reddit",