@unavatar/core 3.7.72 → 3.7.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@unavatar/core",
|
|
3
3
|
"description": "Get unified user avatar from social networks, including Instagram, SoundCloud, Telegram, Twitter, YouTube & more.",
|
|
4
4
|
"homepage": "https://unavatar.io",
|
|
5
|
-
"version": "3.7.
|
|
5
|
+
"version": "3.7.74",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"exports": {
|
|
8
8
|
".": "./src/index.js",
|
|
@@ -132,6 +132,7 @@
|
|
|
132
132
|
"html-get": "~2.22.0",
|
|
133
133
|
"https-tls": "~1.0.24",
|
|
134
134
|
"is-absolute-url": "~3.0.3",
|
|
135
|
+
"is-antibot": "~1.3.0",
|
|
135
136
|
"is-email-like": "~1.0.0",
|
|
136
137
|
"lodash": "~4.17.23",
|
|
137
138
|
"ms": "~2.1.3",
|
|
@@ -2,12 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
const randomCrawlerAgent = require('../util/crawler-agent')
|
|
4
4
|
|
|
5
|
-
const isBlocked = $ => $('title').text().includes('Login')
|
|
6
|
-
|
|
7
5
|
module.exports = ({ createHtmlProvider, getOgImage }) =>
|
|
8
6
|
createHtmlProvider({
|
|
9
7
|
name: 'instagram',
|
|
10
8
|
url: input => `https://www.instagram.com/${input}`,
|
|
11
|
-
getter:
|
|
9
|
+
getter: getOgImage,
|
|
12
10
|
htmlOpts: () => ({ headers: { 'user-agent': randomCrawlerAgent() } })
|
|
13
11
|
})
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
const { normalizeUrl } = require('@metascraper/helpers')
|
|
4
4
|
const debug = require('debug-logfmt')('html-provider')
|
|
5
|
+
const isAntibot = require('is-antibot')
|
|
5
6
|
|
|
6
7
|
const httpStatus = require('./http-status')
|
|
7
8
|
const ExtendableError = require('./error')
|
|
@@ -18,7 +19,6 @@ const createEmptyProviderValueError = ({ provider, statusCode }) =>
|
|
|
18
19
|
message: 'Empty value returned by the provider.'
|
|
19
20
|
})
|
|
20
21
|
|
|
21
|
-
|
|
22
22
|
const getOgImage = $ =>
|
|
23
23
|
$('meta[property="og:image"]').attr('content') ||
|
|
24
24
|
$('meta[name="og:image"]').attr('content')
|
|
@@ -28,14 +28,16 @@ module.exports = ({ PROXY_TIMEOUT, getHTML, onFetchHTML }) => {
|
|
|
28
28
|
* @param {object} opts
|
|
29
29
|
* @param {string} opts.name - Provider identifier used in logs and metrics.
|
|
30
30
|
* @param {(input: string) => string | Promise<string>} opts.url - Builds the URL to fetch for a given input.
|
|
31
|
-
* @param {($: cheerio.CheerioAPI) => string |
|
|
31
|
+
* @param {($: cheerio.CheerioAPI) => string | undefined} opts.getter
|
|
32
32
|
* Extracts the avatar URL from the fetched HTML.
|
|
33
33
|
* - `string` — avatar URL found (success).
|
|
34
34
|
* - `undefined` — avatar not found (normal failure, no retry).
|
|
35
|
-
*
|
|
35
|
+
* @param {($: cheerio.CheerioAPI) => boolean} [opts.isBlocked]
|
|
36
|
+
* Optional provider-specific blocked-page detector, checked after the
|
|
37
|
+
* default `is-antibot` check when getter returns empty/undefined.
|
|
36
38
|
* @param {() => object} [opts.htmlOpts] - Returns extra options merged into the fetch call.
|
|
37
39
|
*/
|
|
38
|
-
const createHtmlProvider = ({ name, url, getter, htmlOpts }) => {
|
|
40
|
+
const createHtmlProvider = ({ name, url, getter, isBlocked, htmlOpts }) => {
|
|
39
41
|
const provider = async function ({ input, req = {}, res = {} }) {
|
|
40
42
|
const providerUrl = await url(input)
|
|
41
43
|
const context = { provider: name, input, providerUrl }
|
|
@@ -47,7 +49,11 @@ module.exports = ({ PROXY_TIMEOUT, getHTML, onFetchHTML }) => {
|
|
|
47
49
|
|
|
48
50
|
const log = debug.duration({ ...context, tier })
|
|
49
51
|
|
|
50
|
-
const {
|
|
52
|
+
const {
|
|
53
|
+
$,
|
|
54
|
+
statusCode,
|
|
55
|
+
headers: responseHeaders = {}
|
|
56
|
+
} = await getHTML(providerUrl, fetchOpts)
|
|
51
57
|
|
|
52
58
|
attempt.lastHtml =
|
|
53
59
|
typeof $ === 'function' && typeof $.html === 'function'
|
|
@@ -71,9 +77,18 @@ module.exports = ({ PROXY_TIMEOUT, getHTML, onFetchHTML }) => {
|
|
|
71
77
|
statusCode
|
|
72
78
|
})
|
|
73
79
|
error.html = attempt.lastHtml
|
|
74
|
-
if (result === false) error.blocked = true
|
|
75
80
|
|
|
76
|
-
|
|
81
|
+
const { detected: antibotDetected } = isAntibot({
|
|
82
|
+
url: providerUrl,
|
|
83
|
+
headers: responseHeaders,
|
|
84
|
+
body: attempt.lastHtml
|
|
85
|
+
})
|
|
86
|
+
if (antibotDetected || isBlocked?.($) === true) error.blocked = true
|
|
87
|
+
|
|
88
|
+
log.error({
|
|
89
|
+
statusCode,
|
|
90
|
+
status: error.blocked ? 'blocked' : undefined
|
|
91
|
+
})
|
|
77
92
|
|
|
78
93
|
throw error
|
|
79
94
|
}
|