@unavatar/core 3.7.67 → 3.7.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@unavatar/core",
3
3
  "description": "Get unified user avatar from social networks, including Instagram, SoundCloud, Telegram, Twitter, YouTube & more.",
4
4
  "homepage": "https://unavatar.io",
5
- "version": "3.7.67",
5
+ "version": "3.7.69",
6
6
  "main": "src/index.js",
7
7
  "exports": {
8
8
  ".": "./src/index.js",
@@ -2,10 +2,12 @@
2
2
 
3
3
  const randomCrawlerAgent = require('../util/crawler-agent')
4
4
 
5
+ const isBlocked = $ => $('title').text().includes('Login')
6
+
5
7
  module.exports = ({ createHtmlProvider, getOgImage }) =>
6
8
  createHtmlProvider({
7
9
  name: 'instagram',
8
10
  url: input => `https://www.instagram.com/${input}`,
9
- getter: getOgImage,
11
+ getter: $ => !isBlocked($) && getOgImage($),
10
12
  htmlOpts: () => ({ headers: { 'user-agent': randomCrawlerAgent() } })
11
13
  })
@@ -2,9 +2,37 @@
2
2
 
3
3
  const { $jsonld } = require('@metascraper/helpers')
4
4
 
5
- module.exports = ({ createHtmlProvider }) =>
5
+ const unescapeUnicode = str =>
6
+ str.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
7
+ String.fromCharCode(parseInt(hex, 16))
8
+ )
9
+
10
+ const getRscAvatar = $ => {
11
+ let url
12
+ $('script').each((_, el) => {
13
+ const text = $(el).html() || ''
14
+ if (!text.includes('avatarPhotoImageUrls')) return
15
+ const match = text.match(
16
+ /avatarPhotoImageUrls[\s\S]*?\\"original\\":\\"((?:[^\\"]|\\u[0-9a-fA-F]{4})+)/
17
+ )
18
+ if (match) {
19
+ url = unescapeUnicode(match[1])
20
+ return false
21
+ }
22
+ })
23
+ return url
24
+ }
25
+
26
+ const getAvatar = $ =>
27
+ $jsonld('mainEntity.image.contentUrl')($) || getRscAvatar($)
28
+
29
+ const factory = ({ createHtmlProvider }) =>
6
30
  createHtmlProvider({
7
31
  name: 'patreon',
8
32
  url: username => `https://www.patreon.com/${username}`,
9
- getter: $ => $jsonld('mainEntity.image.thumbnailUrl')($)
33
+ getter: getAvatar
10
34
  })
35
+
36
+ factory.getAvatar = getAvatar
37
+
38
+ module.exports = factory
@@ -18,9 +18,21 @@ const createEmptyProviderValueError = ({ provider, statusCode }) =>
18
18
  message: 'Empty value returned by the provider.'
19
19
  })
20
20
 
21
+
21
22
  const getOgImage = $ => $('meta[property="og:image"]').attr('content')
22
23
 
23
24
  module.exports = ({ PROXY_TIMEOUT, getHTML, onFetchHTML }) => {
25
+ /**
26
+ * @param {object} opts
27
+ * @param {string} opts.name - Provider identifier used in logs and metrics.
28
+ * @param {(input: string) => string | Promise<string>} opts.url - Builds the URL to fetch for a given input.
29
+ * @param {($: cheerio.CheerioAPI) => string | false | undefined} opts.getter
30
+ * Extracts the avatar URL from the fetched HTML.
31
+ * - `string` — avatar URL found (success).
32
+ * - `undefined` — avatar not found (normal failure, no retry).
33
+ * - `false` — page is blocked; error.blocked will be set to true.
34
+ * @param {() => object} [opts.htmlOpts] - Returns extra options merged into the fetch call.
35
+ */
24
36
  const createHtmlProvider = ({ name, url, getter, htmlOpts }) => {
25
37
  const provider = async function ({ input, req = {}, res = {} }) {
26
38
  const providerUrl = await url(input)
@@ -57,8 +69,9 @@ module.exports = ({ PROXY_TIMEOUT, getHTML, onFetchHTML }) => {
57
69
  statusCode
58
70
  })
59
71
  error.html = attempt.lastHtml
72
+ if (result === false) error.blocked = true
60
73
 
61
- log.error({ statusCode })
74
+ log.error({ statusCode, status: result === false ? 'blocked' : undefined })
62
75
 
63
76
  throw error
64
77
  }