@unavatar/core 3.7.71 → 3.7.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@unavatar/core",
3
3
  "description": "Get unified user avatar from social networks, including Instagram, SoundCloud, Telegram, Twitter, YouTube & more.",
4
4
  "homepage": "https://unavatar.io",
5
- "version": "3.7.71",
5
+ "version": "3.7.73",
6
6
  "main": "src/index.js",
7
7
  "exports": {
8
8
  ".": "./src/index.js",
@@ -132,6 +132,7 @@
132
132
  "html-get": "~2.22.0",
133
133
  "https-tls": "~1.0.24",
134
134
  "is-absolute-url": "~3.0.3",
135
+ "is-antibot": "~1.3.0",
135
136
  "is-email-like": "~1.0.0",
136
137
  "lodash": "~4.17.23",
137
138
  "ms": "~2.1.3",
@@ -2,12 +2,10 @@
2
2
 
3
3
  const randomCrawlerAgent = require('../util/crawler-agent')
4
4
 
5
- const isBlocked = $ => $('title').text().includes('Login')
6
-
7
5
  module.exports = ({ createHtmlProvider, getOgImage }) =>
8
6
  createHtmlProvider({
9
7
  name: 'instagram',
10
8
  url: input => `https://www.instagram.com/${input}`,
11
- getter: $ => !isBlocked($) && getOgImage($),
9
+ getter: getOgImage,
12
10
  htmlOpts: () => ({ headers: { 'user-agent': randomCrawlerAgent() } })
13
11
  })
@@ -4,5 +4,5 @@ module.exports = ({ createHtmlProvider, getOgImage }) =>
4
4
  createHtmlProvider({
5
5
  name: 'linkedin',
6
6
  url: input => `https://www.linkedin.com/in/${input}`,
7
- getter: $ => getOgImage($) || false
7
+ getter: getOgImage
8
8
  })
@@ -2,6 +2,7 @@
2
2
 
3
3
  const { normalizeUrl } = require('@metascraper/helpers')
4
4
  const debug = require('debug-logfmt')('html-provider')
5
+ const isAntibot = require('is-antibot')
5
6
 
6
7
  const httpStatus = require('./http-status')
7
8
  const ExtendableError = require('./error')
@@ -18,7 +19,6 @@ const createEmptyProviderValueError = ({ provider, statusCode }) =>
18
19
  message: 'Empty value returned by the provider.'
19
20
  })
20
21
 
21
-
22
22
  const getOgImage = $ =>
23
23
  $('meta[property="og:image"]').attr('content') ||
24
24
  $('meta[name="og:image"]').attr('content')
@@ -28,14 +28,16 @@ module.exports = ({ PROXY_TIMEOUT, getHTML, onFetchHTML }) => {
28
28
  * @param {object} opts
29
29
  * @param {string} opts.name - Provider identifier used in logs and metrics.
30
30
  * @param {(input: string) => string | Promise<string>} opts.url - Builds the URL to fetch for a given input.
31
- * @param {($: cheerio.CheerioAPI) => string | false | undefined} opts.getter
31
+ * @param {($: cheerio.CheerioAPI) => string | undefined} opts.getter
32
32
  * Extracts the avatar URL from the fetched HTML.
33
33
  * - `string` — avatar URL found (success).
34
34
  * - `undefined` — avatar not found (normal failure, no retry).
35
- * - `false` — page is blocked; error.blocked will be set to true.
35
+ * @param {($: cheerio.CheerioAPI) => boolean} [opts.isBlocked]
36
+ * Optional provider-specific blocked-page detector, checked after the
37
+ * default `is-antibot` check when getter returns empty/undefined.
36
38
  * @param {() => object} [opts.htmlOpts] - Returns extra options merged into the fetch call.
37
39
  */
38
- const createHtmlProvider = ({ name, url, getter, htmlOpts }) => {
40
+ const createHtmlProvider = ({ name, url, getter, isBlocked, htmlOpts }) => {
39
41
  const provider = async function ({ input, req = {}, res = {} }) {
40
42
  const providerUrl = await url(input)
41
43
  const context = { provider: name, input, providerUrl }
@@ -71,9 +73,16 @@ module.exports = ({ PROXY_TIMEOUT, getHTML, onFetchHTML }) => {
71
73
  statusCode
72
74
  })
73
75
  error.html = attempt.lastHtml
74
- if (result === false) error.blocked = true
75
76
 
76
- log.error({ statusCode, status: result === false ? 'blocked' : undefined })
77
+ const { detected: antibotDetected } = isAntibot({
78
+ body: attempt.lastHtml
79
+ })
80
+ if (antibotDetected || isBlocked?.($) === true) error.blocked = true
81
+
82
+ log.error({
83
+ statusCode,
84
+ status: error.blocked ? 'blocked' : undefined
85
+ })
77
86
 
78
87
  throw error
79
88
  }