html-get 2.17.0-0 → 2.17.0-2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/html.js +16 -15
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "html-get",
|
|
3
3
|
"description": "Get the HTML from any website, fine-tuned for correction & speed",
|
|
4
4
|
"homepage": "https://nicedoc.com/microlinkhq/html-get",
|
|
5
|
-
"version": "2.17.0-
|
|
5
|
+
"version": "2.17.0-2",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"bin": {
|
|
8
8
|
"html-get": "bin/index.js"
|
package/src/html.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
'use strict'
|
|
2
2
|
|
|
3
3
|
const { get, split, nth, castArray, forEach } = require('lodash')
|
|
4
|
+
const debug = require('debug-logfmt')('html-get:rewrite')
|
|
4
5
|
const localhostUrl = require('localhost-url-regex')
|
|
5
6
|
const { TAGS: URL_TAGS } = require('html-urls')
|
|
6
7
|
const isHTML = require('is-html-content')
|
|
@@ -89,21 +90,24 @@ const addBody = ({ url, headers, html }) => {
|
|
|
89
90
|
return `<!DOCTYPE html><html><head></head><body>${element}</body></html>`
|
|
90
91
|
}
|
|
91
92
|
|
|
92
|
-
const
|
|
93
|
-
$('meta
|
|
93
|
+
const rewriteMetaTags = ({ $ }) => {
|
|
94
|
+
$('meta').each((_, element) => {
|
|
94
95
|
const el = $(element)
|
|
95
|
-
const name = el.attr('name')
|
|
96
|
-
el.removeAttr('name')
|
|
97
|
-
el.attr('property', name)
|
|
98
|
-
})
|
|
99
96
|
|
|
100
|
-
const
|
|
101
|
-
$('meta[property]:not([property^="og"])').each((_, element) => {
|
|
102
|
-
const el = $(element)
|
|
97
|
+
const name = el.attr('name')
|
|
103
98
|
const property = el.attr('property')
|
|
104
|
-
|
|
105
|
-
|
|
99
|
+
|
|
100
|
+
// Convert 'name' to 'property' for Open Graph tags if 'property' is not already set correctly
|
|
101
|
+
if (name?.startsWith('og:') && property !== name) {
|
|
102
|
+
el.removeAttr('name').attr('property', name)
|
|
103
|
+
debug('og', el.attr())
|
|
104
|
+
// Convert 'property' to 'name' for non-Open Graph tags
|
|
105
|
+
} else if (property && !property.startsWith('og')) {
|
|
106
|
+
el.removeAttr('property').attr('name', property)
|
|
107
|
+
debug('meta', el.attr())
|
|
108
|
+
}
|
|
106
109
|
})
|
|
110
|
+
}
|
|
107
111
|
|
|
108
112
|
const rewriteHtmlUrls = ({ $, url }) => {
|
|
109
113
|
forEach(URL_TAGS, (tagName, urlAttr) => {
|
|
@@ -184,10 +188,7 @@ module.exports = ({
|
|
|
184
188
|
|
|
185
189
|
if (rewriteUrls) rewriteHtmlUrls({ $, url })
|
|
186
190
|
|
|
187
|
-
if (rewriteHtml) {
|
|
188
|
-
rewriteOpenGraph({ $ })
|
|
189
|
-
rewriteMetaProperty({ $ })
|
|
190
|
-
}
|
|
191
|
+
if (rewriteHtml) rewriteMetaTags({ $, url })
|
|
191
192
|
|
|
192
193
|
addHead({ $, url, headers })
|
|
193
194
|
|