html-get 2.9.22 → 2.9.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "html-get",
3
3
  "description": "Get the HTML from any website, using prerendering when is necessary.",
4
4
  "homepage": "https://nicedoc.com/microlinkhq/html-get",
5
- "version": "2.9.22",
5
+ "version": "2.9.24",
6
6
  "main": "src/index.js",
7
7
  "bin": {
8
8
  "html-get": "bin/index.js"
@@ -29,14 +29,14 @@
29
29
  "request"
30
30
  ],
31
31
  "dependencies": {
32
- "@metascraper/helpers": "~5.30.2",
32
+ "@metascraper/helpers": "~5.31.3",
33
33
  "cheerio": "~1.0.0-rc.12",
34
34
  "css-url-regex": "~4.0.0",
35
35
  "debug-logfmt": "~1.0.4",
36
36
  "execall": "~2.0.0",
37
37
  "got": "~11.8.5",
38
38
  "html-encode": "~2.1.6",
39
- "html-urls": "~2.4.37",
39
+ "html-urls": "~2.4.39",
40
40
  "is-html-content": "~1.0.0",
41
41
  "lodash": "~4.17.21",
42
42
  "minimist": "~1.2.6",
@@ -44,8 +44,7 @@
44
44
  "p-retry": "~4.6.0",
45
45
  "replace-string": "~3.1.0",
46
46
  "time-span": "~4.0.0",
47
- "tldts": "~5.7.89",
48
- "top-sites": "~1.1.117",
47
+ "top-sites": "~1.1.132",
49
48
  "write-json-file": "~4.3.0"
50
49
  },
51
50
  "devDependencies": {
@@ -94,7 +93,6 @@
94
93
  },
95
94
  "license": "MIT",
96
95
  "ava": {
97
- "workerThreads": false,
98
96
  "files": [
99
97
  "test/**/*.js",
100
98
  "!test/util.js"
@@ -3,11 +3,10 @@
3
3
  'use strict'
4
4
 
5
5
  const { compact, reduce, findIndex } = require('lodash')
6
+ const { parseUrl } = require('@metascraper/helpers')
6
7
  const writeJsonFile = require('write-json-file')
7
8
  const topsites = require('top-sites')
8
9
 
9
- const { getDomainWithoutSuffix } = require('tldts')
10
-
11
10
  const domains = [
12
11
  'apple',
13
12
  'bbc',
@@ -55,7 +54,7 @@ const { top, rest } = reduce(
55
54
  (acc, domain) => {
56
55
  const index = findIndex(
57
56
  topsites,
58
- ({ rootDomain }) => getDomainWithoutSuffix(rootDomain) === domain
57
+ ({ rootDomain }) => parseUrl(rootDomain).domainWithoutSuffix === domain
59
58
  )
60
59
  if (index !== -1) acc.top[index] = domain
61
60
  else acc.rest.push(domain)
@@ -1,41 +1,41 @@
1
1
  [
2
- "google",
3
2
  "youtube",
3
+ "google",
4
4
  "apple",
5
5
  "microsoft",
6
- "wordpress",
7
6
  "wikipedia",
7
+ "wordpress",
8
8
  "blogspot",
9
9
  "vimeo",
10
10
  "github",
11
- "imdb",
12
11
  "bbc",
12
+ "imdb",
13
+ "theguardian",
13
14
  "nytimes",
14
15
  "slideshare",
15
- "theguardian",
16
16
  "huffingtonpost",
17
17
  "soundcloud",
18
- "telegraph",
19
18
  "pinterest",
19
+ "telegraph",
20
+ "zoom",
21
+ "techcrunch",
20
22
  "spotify",
21
23
  "yelp",
22
- "eventbrite",
23
24
  "engadget",
24
25
  "theverge",
25
- "techcrunch",
26
- "zoom",
27
- "flickr",
28
- "stackoverflow",
29
- "reddit",
26
+ "eventbrite",
30
27
  "giphy",
31
- "etsy",
32
28
  "digg",
29
+ "imgur",
33
30
  "csdn",
31
+ "etsy",
32
+ "flickr",
34
33
  "ghost",
35
- "imgur",
36
34
  "meetup",
37
35
  "producthunt",
36
+ "reddit",
38
37
  "sourceforge",
38
+ "stackoverflow",
39
39
  "tumblr",
40
40
  "ycombinator"
41
41
  ]
package/src/html.js CHANGED
@@ -1,11 +1,11 @@
1
1
  'use strict'
2
2
 
3
3
  const { get, split, nth, castArray, forEach } = require('lodash')
4
+ const { parseUrl } = require('@metascraper/helpers')
4
5
  const { TAGS: URL_TAGS } = require('html-urls')
5
6
  const replaceString = require('replace-string')
6
7
  const isHTML = require('is-html-content')
7
8
  const cssUrl = require('css-url-regex')
8
- const { getDomain } = require('tldts')
9
9
  const execall = require('execall')
10
10
  const cheerio = require('cheerio')
11
11
  const { URL } = require('url')
@@ -36,7 +36,7 @@ const addHead = ({ $, url, headers }) => {
36
36
  upsert(
37
37
  head.find('meta[property="og:site_name"]'),
38
38
  tags,
39
- `<meta property="og:site_name" content="${getDomain(url)}">`
39
+ `<meta property="og:site_name" content="${parseUrl(url).domain}">`
40
40
  )
41
41
 
42
42
  if (date) {
package/src/index.js CHANGED
@@ -1,7 +1,6 @@
1
1
  'use strict'
2
2
 
3
- const { isMediaUrl } = require('@metascraper/helpers')
4
- const { getDomainWithoutSuffix } = require('tldts')
3
+ const { parseUrl, isMediaUrl } = require('@metascraper/helpers')
5
4
  const debug = require('debug-logfmt')('html-get')
6
5
  const PCancelable = require('p-cancelable')
7
6
  const { AbortError } = require('p-retry')
@@ -122,7 +121,8 @@ const prerender = async (
122
121
 
123
122
  const modes = { fetch, prerender }
124
123
 
125
- const isFetchMode = url => autoDomains.includes(getDomainWithoutSuffix(url))
124
+ const isFetchMode = url =>
125
+ autoDomains.includes(parseUrl(url).domainWithoutSuffix)
126
126
 
127
127
  const determinateMode = (url, { prerender }) => {
128
128
  if (prerender === false || isMediaUrl(url)) return 'fetch'