@unavatar/core 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +407 -0
- package/bin/index.js +163 -0
- package/bin/unavatar +3 -0
- package/bin/unavatar-dev +3 -0
- package/package.json +192 -0
- package/src/avatar/auto.js +94 -0
- package/src/constant.js +18 -0
- package/src/index.js +51 -0
- package/src/providers/apple-music.js +97 -0
- package/src/providers/bluesky.js +10 -0
- package/src/providers/deviantart.js +8 -0
- package/src/providers/dribbble.js +8 -0
- package/src/providers/duckduckgo.js +6 -0
- package/src/providers/github.js +10 -0
- package/src/providers/gitlab.js +8 -0
- package/src/providers/google.js +6 -0
- package/src/providers/gravatar.js +15 -0
- package/src/providers/index.js +60 -0
- package/src/providers/instagram.js +8 -0
- package/src/providers/microlink.js +15 -0
- package/src/providers/onlyfans.js +20 -0
- package/src/providers/openstreetmap.js +20 -0
- package/src/providers/patreon.js +10 -0
- package/src/providers/reddit.js +9 -0
- package/src/providers/soundcloud.js +17 -0
- package/src/providers/spotify.js +18 -0
- package/src/providers/substack.js +35 -0
- package/src/providers/telegram.js +8 -0
- package/src/providers/tiktok.js +26 -0
- package/src/providers/twitch.js +8 -0
- package/src/providers/vimeo.js +8 -0
- package/src/providers/whatsapp.js +31 -0
- package/src/providers/x.js +35 -0
- package/src/providers/youtube.js +8 -0
- package/src/util/browserless.js +38 -0
- package/src/util/cacheable-lookup.js +22 -0
- package/src/util/error.js +6 -0
- package/src/util/got.js +32 -0
- package/src/util/html-get.js +25 -0
- package/src/util/html-provider.js +190 -0
- package/src/util/http-status.js +17 -0
- package/src/util/is-iterable.js +9 -0
- package/src/util/keyv.js +27 -0
- package/src/util/reachable-url.js +21 -0
- package/src/util/stringify.js +5 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
module.exports = ({ createHtmlProvider }) =>
|
|
4
|
+
createHtmlProvider({
|
|
5
|
+
name: 'reddit',
|
|
6
|
+
url: input => `https://www.reddit.com/user/${input}/`,
|
|
7
|
+
getter: $ => $('img[alt*="avatar"]').attr('src'),
|
|
8
|
+
htmlOpts: () => ({ headers: { 'accept-language': 'en' } })
|
|
9
|
+
})
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const uniqueRandomArray = require('unique-random-array')
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* SoundCloud is serving an old app for desktop users,
|
|
7
|
+
* so we need to use a mobile user agent to get the avatar.
|
|
8
|
+
*/
|
|
9
|
+
const randomUserAgent = uniqueRandomArray(require('top-user-agents/mobile'))
|
|
10
|
+
|
|
11
|
+
module.exports = ({ createHtmlProvider, getOgImage }) =>
|
|
12
|
+
createHtmlProvider({
|
|
13
|
+
name: 'soundcloud',
|
|
14
|
+
url: input => `https://soundcloud.com/${input}`,
|
|
15
|
+
getter: getOgImage,
|
|
16
|
+
htmlOpts: () => ({ headers: { 'user-agent': randomUserAgent() } })
|
|
17
|
+
})
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const spotifyUri = input => {
|
|
4
|
+
const [first, second] = input.split(':')
|
|
5
|
+
const type = second ? first : 'user'
|
|
6
|
+
const id = second ?? first
|
|
7
|
+
return `${type}/${id}`
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
module.exports = ({ createHtmlProvider, getOgImage }) =>
|
|
11
|
+
createHtmlProvider({
|
|
12
|
+
name: 'spotify',
|
|
13
|
+
url: input => `https://open.spotify.com/${spotifyUri(input)}`,
|
|
14
|
+
getter: getOgImage,
|
|
15
|
+
htmlOpts: () => ({
|
|
16
|
+
prerender: true
|
|
17
|
+
})
|
|
18
|
+
})
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const { $jsonld } = require('@metascraper/helpers')
|
|
4
|
+
const { parse: parseSrcset } = require('srcset')
|
|
5
|
+
|
|
6
|
+
const getBestSrcsetUrl = srcset => {
|
|
7
|
+
if (typeof srcset !== 'string' || srcset.trim() === '') return
|
|
8
|
+
|
|
9
|
+
const candidates = parseSrcset(srcset).map(candidate => ({
|
|
10
|
+
url: candidate.url,
|
|
11
|
+
score: candidate.width ?? candidate.density ?? 0
|
|
12
|
+
}))
|
|
13
|
+
|
|
14
|
+
if (candidates.length === 0) return
|
|
15
|
+
return candidates.reduce((best, current) => (current.score > best.score ? current : best)).url
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const getPictureAvatar = $ => {
|
|
19
|
+
const pictureImg = $('picture img')
|
|
20
|
+
const srcset = pictureImg.attr('srcset')
|
|
21
|
+
return getBestSrcsetUrl(srcset) || pictureImg.attr('src')
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const getAvatarUrl = $ => $jsonld('publisher.logo.url')($) || getPictureAvatar($)
|
|
25
|
+
|
|
26
|
+
const factory = ({ createHtmlProvider }) =>
|
|
27
|
+
createHtmlProvider({
|
|
28
|
+
name: 'substack',
|
|
29
|
+
url: input => `https://${input}.substack.com`,
|
|
30
|
+
getter: getAvatarUrl
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
factory.getAvatarUrl = getAvatarUrl
|
|
34
|
+
|
|
35
|
+
module.exports = factory
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const { get } = require('lodash')
|
|
4
|
+
|
|
5
|
+
const getAvatarUrl = $ => {
|
|
6
|
+
const text = $('#__UNIVERSAL_DATA_FOR_REHYDRATION__').contents().text()
|
|
7
|
+
if (!text) return
|
|
8
|
+
return get(JSON.parse(text), [
|
|
9
|
+
'__DEFAULT_SCOPE__',
|
|
10
|
+
'webapp.user-detail',
|
|
11
|
+
'userInfo',
|
|
12
|
+
'user',
|
|
13
|
+
'avatarLarger'
|
|
14
|
+
])
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const factory = ({ createHtmlProvider }) =>
|
|
18
|
+
createHtmlProvider({
|
|
19
|
+
name: 'tiktok',
|
|
20
|
+
url: input => `https://www.tiktok.com/@${input}`,
|
|
21
|
+
getter: getAvatarUrl
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
factory.getAvatarUrl = getAvatarUrl
|
|
25
|
+
|
|
26
|
+
module.exports = factory
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const whatsappURI = input => {
|
|
4
|
+
const [first, second] = input.split(':')
|
|
5
|
+
return {
|
|
6
|
+
type: second ? first : 'phone',
|
|
7
|
+
id: second ?? first
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const whatsappURL = input => {
|
|
12
|
+
const { type, id } = whatsappURI(input)
|
|
13
|
+
switch (type) {
|
|
14
|
+
case 'phone':
|
|
15
|
+
return `https://api.whatsapp.com/send/?phone=${id}`
|
|
16
|
+
case 'channel':
|
|
17
|
+
return `https://www.whatsapp.com/channel/${id}`
|
|
18
|
+
case 'chat':
|
|
19
|
+
case 'group':
|
|
20
|
+
return `https://chat.whatsapp.com/${id}`
|
|
21
|
+
default:
|
|
22
|
+
throw new Error(`Unsupported WhatsApp type: ${type}`)
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
module.exports = ({ createHtmlProvider, getOgImage }) =>
|
|
27
|
+
createHtmlProvider({
|
|
28
|
+
name: 'whatsapp',
|
|
29
|
+
url: whatsappURL,
|
|
30
|
+
getter: getOgImage
|
|
31
|
+
})
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const uniqueRandomArray = require('unique-random-array')
|
|
4
|
+
const { $jsonld } = require('@metascraper/helpers')
|
|
5
|
+
|
|
6
|
+
const randomCrawlerAgent = uniqueRandomArray(
|
|
7
|
+
require('top-crawler-agents').filter(agent => agent.startsWith('Slackbot'))
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
const toHighResolution = url => {
|
|
11
|
+
if (url?.endsWith('_200x200.jpg')) {
|
|
12
|
+
return url.replace('_200x200.jpg', '_400x400.jpg')
|
|
13
|
+
}
|
|
14
|
+
if (url?.endsWith('_normal.jpg')) {
|
|
15
|
+
return url.replace('_normal.jpg', '_400x400.jpg')
|
|
16
|
+
}
|
|
17
|
+
return url
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const getProfileImage = $ =>
|
|
21
|
+
toHighResolution(
|
|
22
|
+
$jsonld('mainEntity.image.contentUrl')($) || $('meta[property="og:image"]').attr('content')
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
const factory = ({ createHtmlProvider }) =>
|
|
26
|
+
createHtmlProvider({
|
|
27
|
+
name: 'x',
|
|
28
|
+
url: input => `https://x.com/${input}`,
|
|
29
|
+
getter: getProfileImage,
|
|
30
|
+
htmlOpts: () => ({ headers: { 'user-agent': randomCrawlerAgent() } })
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
factory.getProfileImage = getProfileImage
|
|
34
|
+
|
|
35
|
+
module.exports = factory
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const createBrowser = require('browserless')
|
|
4
|
+
const { randomUUID } = require('crypto')
|
|
5
|
+
const puppeteer = require('puppeteer')
|
|
6
|
+
const path = require('path')
|
|
7
|
+
|
|
8
|
+
module.exports = ({ TMP_FOLDER, PUPPETEER_RANDOM_DIR }) => {
|
|
9
|
+
const PUPPETEER_BASE_DIR = path.join(TMP_FOLDER, 'puppeteer')
|
|
10
|
+
|
|
11
|
+
const getPuppeteerDir = PUPPETEER_RANDOM_DIR
|
|
12
|
+
? () => `${PUPPETEER_BASE_DIR}-${randomUUID()}`
|
|
13
|
+
: () => PUPPETEER_BASE_DIR
|
|
14
|
+
|
|
15
|
+
const getArgs = () => {
|
|
16
|
+
const PUPPETEER_DIR = getPuppeteerDir()
|
|
17
|
+
const DATA_DIR = path.join(PUPPETEER_DIR, 'profile')
|
|
18
|
+
const CACHE_DIR = path.join(PUPPETEER_DIR, 'cache')
|
|
19
|
+
|
|
20
|
+
const args = createBrowser.driver.defaultArgs.concat([
|
|
21
|
+
'--allow-running-insecure-content',
|
|
22
|
+
`--disk-cache-dir=${CACHE_DIR}`,
|
|
23
|
+
`--user-data-dir=${DATA_DIR}`
|
|
24
|
+
])
|
|
25
|
+
|
|
26
|
+
return { PUPPETEER_DIR, DATA_DIR, CACHE_DIR, args }
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const browser = createBrowser({
|
|
30
|
+
args: getArgs().args,
|
|
31
|
+
dumpio: false,
|
|
32
|
+
pipe: true,
|
|
33
|
+
puppeteer,
|
|
34
|
+
waitForInitialPage: false
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
return () => browser
|
|
38
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const CacheableLookup = require('cacheable-lookup')
|
|
4
|
+
const Tangerine = require('tangerine')
|
|
5
|
+
|
|
6
|
+
module.exports = ({ TTL_DEFAULT, DNS_TIMEOUT, createMemoryCache }) =>
|
|
7
|
+
new CacheableLookup({
|
|
8
|
+
maxTtl: TTL_DEFAULT,
|
|
9
|
+
cache: createMemoryCache({ namespace: 'dns' }),
|
|
10
|
+
resolver: new Tangerine(
|
|
11
|
+
{
|
|
12
|
+
cache: false,
|
|
13
|
+
timeout: DNS_TIMEOUT,
|
|
14
|
+
servers: ['1.1.1.1', '8.8.8.8']
|
|
15
|
+
},
|
|
16
|
+
require('got').extend({
|
|
17
|
+
responseType: 'buffer',
|
|
18
|
+
decompress: false,
|
|
19
|
+
retry: 0
|
|
20
|
+
})
|
|
21
|
+
)
|
|
22
|
+
})
|
package/src/util/got.js
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const uniqueRandomArray = require('unique-random-array')
|
|
4
|
+
const tlsHook = require('https-tls/hook')
|
|
5
|
+
const uaHints = require('ua-hints')
|
|
6
|
+
const got = require('got')
|
|
7
|
+
|
|
8
|
+
const randomUserAgent = uniqueRandomArray(require('top-user-agents'))
|
|
9
|
+
|
|
10
|
+
const userAgentHook = options => {
|
|
11
|
+
if (options.headers['user-agent'] === 'got (https://github.com/sindresorhus/got)') {
|
|
12
|
+
const userAgent = randomUserAgent()
|
|
13
|
+
options.headers['user-agent'] = userAgent
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
for (const [key, value] of Object.entries(uaHints(options.headers['user-agent']))) {
|
|
17
|
+
options.headers[key] = value
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
module.exports = ({ cacheableLookup }) => {
|
|
22
|
+
const gotOpts = {
|
|
23
|
+
dnsCache: cacheableLookup,
|
|
24
|
+
https: { rejectUnauthorized: false },
|
|
25
|
+
hooks: { beforeRequest: [userAgentHook, tlsHook] }
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const instance = got.extend(gotOpts)
|
|
29
|
+
instance.gotOpts = gotOpts
|
|
30
|
+
|
|
31
|
+
return instance
|
|
32
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
module.exports = ({ createBrowser, got }) =>
|
|
4
|
+
async function (url, { puppeteerOpts, timeout, gotOpts, ...opts } = {}) {
|
|
5
|
+
const browser = await createBrowser()
|
|
6
|
+
const browserContext = await browser.createContext()
|
|
7
|
+
|
|
8
|
+
const promise = require('html-get')(url, {
|
|
9
|
+
prerender: false,
|
|
10
|
+
...opts,
|
|
11
|
+
getBrowserless: () => browserContext,
|
|
12
|
+
serializeHtml: $ => ({ $ }),
|
|
13
|
+
puppeteerOpts: {
|
|
14
|
+
timeout,
|
|
15
|
+
...puppeteerOpts
|
|
16
|
+
},
|
|
17
|
+
gotOpts: {
|
|
18
|
+
timeout,
|
|
19
|
+
...got.gotOpts,
|
|
20
|
+
...gotOpts
|
|
21
|
+
}
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
return Promise.resolve(promise).finally(() => browserContext.destroyContext())
|
|
25
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const { mkdir, writeFile } = require('fs/promises')
|
|
4
|
+
const { normalizeUrl } = require('@metascraper/helpers')
|
|
5
|
+
const debug = require('debug-logfmt')('html-provider')
|
|
6
|
+
const path = require('path')
|
|
7
|
+
|
|
8
|
+
const httpStatus = require('./http-status')
|
|
9
|
+
const ExtendableError = require('./error')
|
|
10
|
+
|
|
11
|
+
const HTML_DEBUG_DIR = '/tmp/html'
|
|
12
|
+
|
|
13
|
+
const isStatusCodeMissing = statusCode =>
|
|
14
|
+
statusCode === undefined || statusCode === null || statusCode === ''
|
|
15
|
+
|
|
16
|
+
const createEmptyProviderValueError = ({ provider, statusCode }) =>
|
|
17
|
+
new ExtendableError({
|
|
18
|
+
provider,
|
|
19
|
+
statusCode,
|
|
20
|
+
message: 'Empty value returned by the provider.'
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
const NOT_FOUND = Symbol('NOT_FOUND')
|
|
24
|
+
|
|
25
|
+
const UNRESOLVED = Symbol('UNRESOLVED')
|
|
26
|
+
|
|
27
|
+
const getOgImage = $ => $('meta[property="og:image"]').attr('content')
|
|
28
|
+
|
|
29
|
+
const sanitizeFileToken = input =>
|
|
30
|
+
String(input || '')
|
|
31
|
+
.trim()
|
|
32
|
+
.toLowerCase()
|
|
33
|
+
.replace(/[^a-z0-9._-]+/g, '-')
|
|
34
|
+
.replace(/^-+|-+$/g, '') || 'unknown'
|
|
35
|
+
|
|
36
|
+
const writeHtmlDebugFile = async ({ debugEnabled, provider, tier, requestId, html }) => {
|
|
37
|
+
if (!debugEnabled || typeof html !== 'string' || html === '') return
|
|
38
|
+
|
|
39
|
+
const fileName = `${sanitizeFileToken(provider)}-${sanitizeFileToken(tier)}-${sanitizeFileToken(
|
|
40
|
+
requestId
|
|
41
|
+
)}.html`
|
|
42
|
+
const filePath = path.join(HTML_DEBUG_DIR, fileName)
|
|
43
|
+
await mkdir(path.dirname(filePath), { recursive: true })
|
|
44
|
+
await writeFile(filePath, html, 'utf8')
|
|
45
|
+
|
|
46
|
+
return filePath
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const getHtmlDebugInfo = async $ => {
|
|
50
|
+
if (typeof $ !== 'function') return {}
|
|
51
|
+
|
|
52
|
+
const html = typeof $.html === 'function' ? $.html() : ''
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
html,
|
|
56
|
+
htmlLength: html.length
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
module.exports = ({ PROXY_TIMEOUT, DEBUG_HTML_TO_FILE, getHTML }) => {
|
|
61
|
+
const createHtmlProvider = ({ name, url, getter, htmlOpts }) => {
|
|
62
|
+
const provider = async function ({ input, opts, req = {}, res = {} }) {
|
|
63
|
+
const providerUrl = await url(input)
|
|
64
|
+
const context = { provider: name, input, providerUrl }
|
|
65
|
+
|
|
66
|
+
const forceProxy = req.query?.proxy === true && typeof opts === 'function'
|
|
67
|
+
|
|
68
|
+
const logProviderError = payload => debug.error({ ...context, ...payload })
|
|
69
|
+
|
|
70
|
+
const logProviderLookup = payload => debug({ ...context, ...payload })
|
|
71
|
+
|
|
72
|
+
const getResult = async ($, statusCode, log, tier) => {
|
|
73
|
+
const result = getter($)
|
|
74
|
+
if (typeof result !== 'string' || result === '') {
|
|
75
|
+
const { html, ...htmlDebugInfo } = await getHtmlDebugInfo($)
|
|
76
|
+
const requestId = typeof res.getHeader === 'function'
|
|
77
|
+
? res.getHeader('x-request-id')
|
|
78
|
+
: undefined
|
|
79
|
+
const htmlFile = await writeHtmlDebugFile({
|
|
80
|
+
debugEnabled: DEBUG_HTML_TO_FILE,
|
|
81
|
+
provider: name,
|
|
82
|
+
tier,
|
|
83
|
+
requestId,
|
|
84
|
+
html
|
|
85
|
+
}).catch(() => undefined)
|
|
86
|
+
|
|
87
|
+
log.error({
|
|
88
|
+
statusCode,
|
|
89
|
+
...htmlDebugInfo,
|
|
90
|
+
...(htmlFile ? { htmlFile } : {})
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
throw createEmptyProviderValueError({ provider: name, statusCode })
|
|
94
|
+
}
|
|
95
|
+
const normalizedResult = normalizeUrl(providerUrl, result)
|
|
96
|
+
log({
|
|
97
|
+
statusCode,
|
|
98
|
+
status: 'success',
|
|
99
|
+
result: normalizedResult
|
|
100
|
+
})
|
|
101
|
+
return normalizedResult
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const reportSuccess = (requestType, result) => {
|
|
105
|
+
if (typeof res.setHeader === 'function') {
|
|
106
|
+
res.setHeader('x-proxy-tier', requestType)
|
|
107
|
+
}
|
|
108
|
+
return result
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const getResultOrUndefined = async ({
|
|
112
|
+
tier,
|
|
113
|
+
resolve,
|
|
114
|
+
onError
|
|
115
|
+
} = {}) => {
|
|
116
|
+
const log = debug.duration({ ...context, tier })
|
|
117
|
+
try {
|
|
118
|
+
return await resolve(log)
|
|
119
|
+
} catch (error) {
|
|
120
|
+
if (error?.provider !== name && error?.name !== 'TimeoutError') {
|
|
121
|
+
logProviderError({ tier, status: 'failed', message: error.message })
|
|
122
|
+
}
|
|
123
|
+
onError?.(error)
|
|
124
|
+
return UNRESOLVED
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const getProxyResultOrUndefined = async ({ superProxy = false } = {}) => {
|
|
129
|
+
const tier = superProxy ? 'residential' : 'datacenter'
|
|
130
|
+
return getResultOrUndefined({
|
|
131
|
+
tier,
|
|
132
|
+
resolve: async log => {
|
|
133
|
+
const { $, statusCode } = await getHTML(
|
|
134
|
+
providerUrl,
|
|
135
|
+
await opts(providerUrl, { superProxy, timeout: PROXY_TIMEOUT })
|
|
136
|
+
)
|
|
137
|
+
return getResult($, statusCode, log, tier)
|
|
138
|
+
}
|
|
139
|
+
})
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const getOriginResultOrUndefined = () =>
|
|
143
|
+
getResultOrUndefined({
|
|
144
|
+
tier: 'origin',
|
|
145
|
+
resolve: async log => {
|
|
146
|
+
const { $, statusCode } = await getHTML(providerUrl, {
|
|
147
|
+
...htmlOpts?.(),
|
|
148
|
+
timeout: PROXY_TIMEOUT
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
if (isStatusCodeMissing(statusCode)) {
|
|
152
|
+
log.error({ statusCode, status: 'missing_status_code' })
|
|
153
|
+
return UNRESOLVED
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (statusCode === httpStatus.NOT_FOUND) {
|
|
157
|
+
log.error({ statusCode, status: 'not_found' })
|
|
158
|
+
return NOT_FOUND
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return getResult($, statusCode, log, 'origin')
|
|
162
|
+
}
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
if (forceProxy) {
|
|
166
|
+
logProviderLookup({ tier: 'origin', status: 'skipped', reason: 'force_proxy' })
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (!forceProxy) {
|
|
170
|
+
const resultOrigin = await getOriginResultOrUndefined()
|
|
171
|
+
if (resultOrigin === NOT_FOUND) return
|
|
172
|
+
if (resultOrigin !== UNRESOLVED) return reportSuccess('origin', resultOrigin)
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (typeof opts !== 'function') return
|
|
176
|
+
|
|
177
|
+
const resultDatacenter = await getProxyResultOrUndefined()
|
|
178
|
+
if (resultDatacenter !== UNRESOLVED) return reportSuccess('datacenter', resultDatacenter)
|
|
179
|
+
|
|
180
|
+
const resultResidential = await getProxyResultOrUndefined({ superProxy: true })
|
|
181
|
+
if (resultResidential === UNRESOLVED) return
|
|
182
|
+
|
|
183
|
+
return reportSuccess('residential', resultResidential)
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return provider
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return { createHtmlProvider, getOgImage }
|
|
190
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const { STATUS_CODES } = require('http')
|
|
4
|
+
|
|
5
|
+
const byCode = Object.entries(STATUS_CODES).reduce((acc, [key, value]) => {
|
|
6
|
+
const name = value.replace(/[^a-zA-Z0-9]+/g, '_').toUpperCase()
|
|
7
|
+
acc[name] = key
|
|
8
|
+
return acc
|
|
9
|
+
}, {})
|
|
10
|
+
|
|
11
|
+
const fn = input => (typeof input === 'number' ? STATUS_CODES[input] ?? input : byCode[input])
|
|
12
|
+
|
|
13
|
+
Object.keys(byCode).forEach(key => {
|
|
14
|
+
fn[key] = Number(byCode[key])
|
|
15
|
+
})
|
|
16
|
+
|
|
17
|
+
module.exports = fn
|
package/src/util/keyv.js
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const keyvCompress = require('@keyvhq/compress')
|
|
4
|
+
const KeyvRedis = require('@keyvhq/redis')
|
|
5
|
+
const KeyvMulti = require('@keyvhq/multi')
|
|
6
|
+
const Keyv = require('@keyvhq/core')
|
|
7
|
+
const assert = require('assert')
|
|
8
|
+
|
|
9
|
+
module.exports = ({ TTL_DEFAULT }) => {
|
|
10
|
+
const createMultiCache = remote => new Keyv({ store: new KeyvMulti({ remote }) })
|
|
11
|
+
|
|
12
|
+
const createKeyv = opts => new Keyv({ ttl: TTL_DEFAULT, ...opts })
|
|
13
|
+
|
|
14
|
+
const createKeyvNamespace = opts => {
|
|
15
|
+
assert(opts.namespace, '`opts.namespace` is required.')
|
|
16
|
+
return keyvCompress(createKeyv(opts))
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const createMemoryCache = opts => createKeyvNamespace({ ...opts, store: new Map() })
|
|
20
|
+
|
|
21
|
+
const createRedisCache = (opts = {}) => {
|
|
22
|
+
const store = new Map()
|
|
23
|
+
return createKeyvNamespace({ ...opts, store })
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return { createMemoryCache, createMultiCache, createRedisCache }
|
|
27
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const createPingUrl = require('@microlink/ping-url')
|
|
4
|
+
|
|
5
|
+
module.exports = ({ got, createMemoryCache }) => {
|
|
6
|
+
const pingCache = createMemoryCache({ namespace: 'ping' })
|
|
7
|
+
|
|
8
|
+
const pingUrl = createPingUrl(pingCache, {
|
|
9
|
+
value: ({ url, statusCode }) => ({ url, statusCode })
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
const reachableUrl = (url, opts) =>
|
|
13
|
+
pingUrl(url, {
|
|
14
|
+
...got.gotOpts,
|
|
15
|
+
...opts
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
reachableUrl.isReachable = createPingUrl.isReachable
|
|
19
|
+
|
|
20
|
+
return reachableUrl
|
|
21
|
+
}
|