metanova 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +7 -0
- package/dist/index.cjs +644 -69
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +9 -1
- package/dist/index.d.ts +9 -1
- package/dist/index.js +644 -69
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1322,6 +1322,95 @@ function uniqueStrings(values) {
|
|
|
1322
1322
|
return [...new Set(values.filter(Boolean))];
|
|
1323
1323
|
}
|
|
1324
1324
|
|
|
1325
|
+
// src/utils/redditMedia.ts
|
|
1326
|
+
var REDDIT_BAD_IMAGE_URL_PATTERN = /thumbs\.redditmedia\.com|avatar|community_icon|subreddit|icon|award|emoji/i;
|
|
1327
|
+
var TRUSTED_REDDIT_SOURCES = /* @__PURE__ */ new Set([
|
|
1328
|
+
"adapter",
|
|
1329
|
+
"openGraph",
|
|
1330
|
+
"twitter",
|
|
1331
|
+
"jsonLd",
|
|
1332
|
+
"oEmbed",
|
|
1333
|
+
"nextData",
|
|
1334
|
+
"nuxt",
|
|
1335
|
+
"initialState",
|
|
1336
|
+
"preloadedState",
|
|
1337
|
+
"apollo",
|
|
1338
|
+
"applicationJson",
|
|
1339
|
+
"jsonScript"
|
|
1340
|
+
]);
|
|
1341
|
+
function isRedditUrl(value) {
|
|
1342
|
+
try {
|
|
1343
|
+
const host = new URL(value).hostname.toLowerCase().replace(/^www\./, "");
|
|
1344
|
+
return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
|
|
1345
|
+
} catch {
|
|
1346
|
+
return false;
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
function filterRedditImageCandidates(images) {
|
|
1350
|
+
const allowed = images.filter(isAllowedRedditImageCandidate);
|
|
1351
|
+
const trusted = allowed.filter(isTrustedRedditImageCandidate);
|
|
1352
|
+
return prioritizeRedditImages(trusted.length > 0 ? trusted : allowed);
|
|
1353
|
+
}
|
|
1354
|
+
function prioritizeRedditImages(images) {
|
|
1355
|
+
return images.slice().sort(
|
|
1356
|
+
(left, right) => redditImagePriority(right) - redditImagePriority(left) || imageArea(right) - imageArea(left)
|
|
1357
|
+
);
|
|
1358
|
+
}
|
|
1359
|
+
function isAllowedRedditImageCandidate(image) {
|
|
1360
|
+
if (image.width !== void 0 && image.width < 200) {
|
|
1361
|
+
return false;
|
|
1362
|
+
}
|
|
1363
|
+
if (image.height !== void 0 && image.height < 200) {
|
|
1364
|
+
return false;
|
|
1365
|
+
}
|
|
1366
|
+
return !REDDIT_BAD_IMAGE_URL_PATTERN.test(image.url);
|
|
1367
|
+
}
|
|
1368
|
+
function redditImagePriority(image) {
|
|
1369
|
+
const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
|
|
1370
|
+
const url = image.url.toLowerCase();
|
|
1371
|
+
if (mediaKind === "gallery") {
|
|
1372
|
+
return 700;
|
|
1373
|
+
}
|
|
1374
|
+
if (mediaKind === "previewOriginal") {
|
|
1375
|
+
return 620;
|
|
1376
|
+
}
|
|
1377
|
+
if (mediaKind === "directImage") {
|
|
1378
|
+
return 580;
|
|
1379
|
+
}
|
|
1380
|
+
if (/\/\/preview\.redd\.it\//i.test(url)) {
|
|
1381
|
+
return 560;
|
|
1382
|
+
}
|
|
1383
|
+
if (/\/\/i\.redd\.it\//i.test(url)) {
|
|
1384
|
+
return 540;
|
|
1385
|
+
}
|
|
1386
|
+
if (image.source === "openGraph" || image.source === "twitter") {
|
|
1387
|
+
return 420;
|
|
1388
|
+
}
|
|
1389
|
+
if (/\/\/external-preview\.redd\.it\//i.test(url)) {
|
|
1390
|
+
return 300;
|
|
1391
|
+
}
|
|
1392
|
+
if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
|
|
1393
|
+
return 1;
|
|
1394
|
+
}
|
|
1395
|
+
return isRedditMediaUrl(url) ? 250 : 0;
|
|
1396
|
+
}
|
|
1397
|
+
function hasRedditImageContext(images) {
|
|
1398
|
+
return images.some((image) => {
|
|
1399
|
+
const adapter = typeof image.metadata?.adapter === "string" ? image.metadata.adapter : "";
|
|
1400
|
+
const originalSource = typeof image.metadata?.originalSource === "string" ? image.metadata.originalSource : "";
|
|
1401
|
+
return adapter === "redditAdapter" || originalSource === "redditJsonEndpoint" || Boolean(image.metadata?.redditMediaKind);
|
|
1402
|
+
});
|
|
1403
|
+
}
|
|
1404
|
+
function isRedditMediaUrl(value) {
|
|
1405
|
+
return /(?:^https?:)?\/\/(?:(?:i|preview|external-preview)\.redd\.it|thumbs\.redditmedia\.com|v\.redd\.it)\//i.test(value);
|
|
1406
|
+
}
|
|
1407
|
+
function isTrustedRedditImageCandidate(image) {
|
|
1408
|
+
return TRUSTED_REDDIT_SOURCES.has(image.source) || Boolean(image.metadata?.redditMediaKind);
|
|
1409
|
+
}
|
|
1410
|
+
function imageArea(image) {
|
|
1411
|
+
return (image.width ?? 0) * (image.height ?? 0);
|
|
1412
|
+
}
|
|
1413
|
+
|
|
1325
1414
|
// src/scorers/image.ts
|
|
1326
1415
|
var SOURCE_WEIGHT = {
|
|
1327
1416
|
adapter: 98,
|
|
@@ -1342,6 +1431,7 @@ var SOURCE_WEIGHT = {
|
|
|
1342
1431
|
};
|
|
1343
1432
|
function scoreImages(images, customScorers = []) {
|
|
1344
1433
|
const duplicateCounts = countDuplicates(images);
|
|
1434
|
+
const redditContext = hasRedditImageContext(images);
|
|
1345
1435
|
return images.map((image, index) => {
|
|
1346
1436
|
const scored = scoreImageWithDetails(image, index, images, duplicateCounts);
|
|
1347
1437
|
const customScore = customScorers.reduce((total, scorer) => total + scorer(image, { index, images }), 0);
|
|
@@ -1357,7 +1447,7 @@ function scoreImages(images, customScorers = []) {
|
|
|
1357
1447
|
}
|
|
1358
1448
|
};
|
|
1359
1449
|
}).sort(
|
|
1360
|
-
(left, right) => (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) ||
|
|
1450
|
+
(left, right) => (redditContext ? redditImagePriority(right) - redditImagePriority(left) : 0) || (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea2(right) - imageArea2(left)
|
|
1361
1451
|
);
|
|
1362
1452
|
}
|
|
1363
1453
|
function selectBestImage(images, customScorers = []) {
|
|
@@ -1375,15 +1465,17 @@ function scoreImageWithDetails(image, index, images, duplicateCounts) {
|
|
|
1375
1465
|
const dimensions = scoreDimensions(image);
|
|
1376
1466
|
const format = scoreFormat(image);
|
|
1377
1467
|
const urlSignal = scoreUrlSignal(image);
|
|
1468
|
+
const redditMedia = scoreRedditMedia(image);
|
|
1378
1469
|
const urlPenalty = scoreUrlPenalty(image);
|
|
1379
1470
|
const duplicatePenalty = scoreDuplicatePenalty(image, duplicateCounts);
|
|
1380
1471
|
score += dimensions.score;
|
|
1381
1472
|
score += format.score;
|
|
1382
1473
|
score += urlSignal.score;
|
|
1474
|
+
score += redditMedia.score;
|
|
1383
1475
|
score -= urlPenalty;
|
|
1384
1476
|
score -= duplicatePenalty.score;
|
|
1385
1477
|
score -= Math.min(index * 1.5, 10);
|
|
1386
|
-
reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...duplicatePenalty.reasons);
|
|
1478
|
+
reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...redditMedia.reasons, ...duplicatePenalty.reasons);
|
|
1387
1479
|
if (images.length === 1) {
|
|
1388
1480
|
score += 4;
|
|
1389
1481
|
reasons.push("only candidate image added 4 points");
|
|
@@ -1483,6 +1575,33 @@ function platformThumbnailScore(url) {
|
|
|
1483
1575
|
}
|
|
1484
1576
|
return { score: 0, reasons: [] };
|
|
1485
1577
|
}
|
|
1578
|
+
function scoreRedditMedia(image) {
|
|
1579
|
+
const priority = redditImagePriority(image);
|
|
1580
|
+
if (priority === 0 && !isRedditMediaUrl(image.url)) {
|
|
1581
|
+
return { score: 0, reasons: [] };
|
|
1582
|
+
}
|
|
1583
|
+
const url = image.url.toLowerCase();
|
|
1584
|
+
const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
|
|
1585
|
+
if (mediaKind === "gallery") {
|
|
1586
|
+
return { score: 24, reasons: ["Reddit gallery media added 24 points"] };
|
|
1587
|
+
}
|
|
1588
|
+
if (mediaKind === "previewOriginal") {
|
|
1589
|
+
return { score: 20, reasons: ["Reddit original preview media added 20 points"] };
|
|
1590
|
+
}
|
|
1591
|
+
if (/\/\/i\.redd\.it\//i.test(url)) {
|
|
1592
|
+
return { score: 18, reasons: ["Reddit direct image media added 18 points"] };
|
|
1593
|
+
}
|
|
1594
|
+
if (/\/\/preview\.redd\.it\//i.test(url)) {
|
|
1595
|
+
return { score: 16, reasons: ["Reddit preview media added 16 points"] };
|
|
1596
|
+
}
|
|
1597
|
+
if (/\/\/external-preview\.redd\.it\//i.test(url)) {
|
|
1598
|
+
return { score: -8, reasons: ["Reddit external preview media subtracted 8 points"] };
|
|
1599
|
+
}
|
|
1600
|
+
if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
|
|
1601
|
+
return { score: -60, reasons: ["Reddit thumbnail host subtracted 60 points"] };
|
|
1602
|
+
}
|
|
1603
|
+
return { score: 0, reasons: [] };
|
|
1604
|
+
}
|
|
1486
1605
|
function scoreUrlPenalty(image) {
|
|
1487
1606
|
const url = image.url.toLowerCase();
|
|
1488
1607
|
let penalty = 0;
|
|
@@ -1555,7 +1674,7 @@ function countDuplicates(images) {
|
|
|
1555
1674
|
}
|
|
1556
1675
|
return counts;
|
|
1557
1676
|
}
|
|
1558
|
-
function
|
|
1677
|
+
function imageArea2(image) {
|
|
1559
1678
|
return (image.width ?? 0) * (image.height ?? 0);
|
|
1560
1679
|
}
|
|
1561
1680
|
function sourceSortWeight(image) {
|
|
@@ -1673,8 +1792,9 @@ function discoverMedia(rawSources, finalUrl) {
|
|
|
1673
1792
|
if (externalResults.some((result) => (result.images?.length ?? 0) > 0 || (result.videos?.length ?? 0) > 0)) {
|
|
1674
1793
|
trace.push("media discovery included adapter and plugin media");
|
|
1675
1794
|
}
|
|
1795
|
+
const dedupedImages = dedupeMediaBySignature(images);
|
|
1676
1796
|
return {
|
|
1677
|
-
images:
|
|
1797
|
+
images: isRedditUrl(finalUrl) ? filterRedditImageCandidates(dedupedImages) : dedupedImages,
|
|
1678
1798
|
videos: dedupeMediaBySignature(uniqueMediaByUrl(videos)),
|
|
1679
1799
|
audio: dedupeMediaBySignature(uniqueMediaByUrl(audio)),
|
|
1680
1800
|
trace
|
|
@@ -1766,7 +1886,8 @@ function mediaFromJsonValue(value, kind, source) {
|
|
|
1766
1886
|
height: parseNumber(stringFromUnknown(value.height)) ?? parseNumber(stringFromUnknown(nestedDetails?.height)),
|
|
1767
1887
|
alt: stringFromUnknown(value.alt) ?? stringFromUnknown(value.caption) ?? stringFromUnknown(value.name) ?? stringFromUnknown(nestedDetails?.alt),
|
|
1768
1888
|
title: stringFromUnknown(value.title) ?? stringFromUnknown(nestedDetails?.title),
|
|
1769
|
-
type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type)
|
|
1889
|
+
type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type),
|
|
1890
|
+
metadata: isRecord3(value.metadata) ? value.metadata : void 0
|
|
1770
1891
|
},
|
|
1771
1892
|
...srcsetAssets
|
|
1772
1893
|
];
|
|
@@ -1874,7 +1995,7 @@ function dedupeMediaBySignature(assets) {
|
|
|
1874
1995
|
for (const asset of assets) {
|
|
1875
1996
|
const key = mediaSignature2(asset.url);
|
|
1876
1997
|
const current = seen.get(key);
|
|
1877
|
-
if (!current ||
|
|
1998
|
+
if (!current || mediaRank(asset) > mediaRank(current)) {
|
|
1878
1999
|
seen.set(key, asset);
|
|
1879
2000
|
}
|
|
1880
2001
|
}
|
|
@@ -1908,6 +2029,10 @@ function sourceRank(source) {
|
|
|
1908
2029
|
};
|
|
1909
2030
|
return ranks[source] ?? 50;
|
|
1910
2031
|
}
|
|
2032
|
+
function mediaRank(asset) {
|
|
2033
|
+
const redditPriority = redditImagePriority(asset);
|
|
2034
|
+
return redditPriority > 0 ? 1e3 + redditPriority : sourceRank(asset.source);
|
|
2035
|
+
}
|
|
1911
2036
|
function shouldIgnoreMediaUrl2(url) {
|
|
1912
2037
|
const normalized = url.toLowerCase();
|
|
1913
2038
|
return normalized.startsWith("data:") || normalized.startsWith("blob:") || normalized.startsWith("javascript:") || /(?:sprite|spacer|blank|transparent|placeholder|tracking|beacon|pixel|emoji|favicon|apple-touch-icon)(?:[._/-]|$|\?)/i.test(normalized) || /(?:^|[/?_-])1x1(?:[._/-]|$|\?)/i.test(normalized);
|
|
@@ -2232,7 +2357,7 @@ function normalizeMetadata(rawSources, context = {}) {
|
|
|
2232
2357
|
...mediaDiscovery.trace,
|
|
2233
2358
|
...selectedImage.best ? [`selected image from ${sourceLabel2(selectedImage.best)}`] : []
|
|
2234
2359
|
]);
|
|
2235
|
-
|
|
2360
|
+
const normalized = stripUndefined2({
|
|
2236
2361
|
ok: true,
|
|
2237
2362
|
url,
|
|
2238
2363
|
finalUrl,
|
|
@@ -2262,6 +2387,12 @@ function normalizeMetadata(rawSources, context = {}) {
|
|
|
2262
2387
|
diagnostics,
|
|
2263
2388
|
trace: diagnostics.trace
|
|
2264
2389
|
});
|
|
2390
|
+
normalized.images = selectedImage.images;
|
|
2391
|
+
normalized.videos = videos;
|
|
2392
|
+
normalized.audio = audio;
|
|
2393
|
+
normalized.favicons = favicons;
|
|
2394
|
+
normalized.trace = diagnostics.trace;
|
|
2395
|
+
return normalized;
|
|
2265
2396
|
}
|
|
2266
2397
|
function normalizeAssets2(assets, baseUrl) {
|
|
2267
2398
|
return assets.map((asset) => {
|
|
@@ -2897,8 +3028,8 @@ var redditAdapter = {
|
|
|
2897
3028
|
type: reddit.isPost ? "social_post" : "website",
|
|
2898
3029
|
siteName: "Reddit",
|
|
2899
3030
|
canonicalUrl: context.raw.openGraph.url ?? context.raw.html.canonicalUrl,
|
|
2900
|
-
title:
|
|
2901
|
-
description: descriptionSelection.value,
|
|
3031
|
+
title: cleanRedditTitle(titleSelection.value),
|
|
3032
|
+
description: cleanRedditDescription(descriptionSelection.value),
|
|
2902
3033
|
images: markAdapterMedia(mediaFromContext(context).images, "redditAdapter"),
|
|
2903
3034
|
videos: markAdapterMedia(mediaFromContext(context).videos, "redditAdapter"),
|
|
2904
3035
|
author: username ? { name: username } : entityFromContext(context, ["author", "submitter", "user"]),
|
|
@@ -2984,7 +3115,7 @@ var tiktokAdapter = {
|
|
|
2984
3115
|
return this.detect?.(url) ?? false;
|
|
2985
3116
|
},
|
|
2986
3117
|
extract(context) {
|
|
2987
|
-
return
|
|
3118
|
+
return tiktokResult(context);
|
|
2988
3119
|
},
|
|
2989
3120
|
normalize(rawData) {
|
|
2990
3121
|
return normalizePlatformResult(rawData);
|
|
@@ -3203,25 +3334,215 @@ function redditDescriptionFromContext(context) {
|
|
|
3203
3334
|
}
|
|
3204
3335
|
return { value: context.raw.html.description, method: context.raw.html.description ? "reddit:html" : void 0 };
|
|
3205
3336
|
}
|
|
3206
|
-
function
|
|
3337
|
+
function tiktokResult(context) {
|
|
3207
3338
|
const url = new URL(context.finalUrl);
|
|
3208
3339
|
const username = url.pathname.match(/@([^/]+)/)?.[1];
|
|
3209
3340
|
const postId = url.pathname.match(/\/(?:video|photo)\/([^/]+)/)?.[1] ?? url.pathname.split("/").filter(Boolean).at(-1);
|
|
3341
|
+
const itemStruct = tiktokItemStructFromContext(context, postId);
|
|
3342
|
+
const titleSelection = tiktokTitleFromContext(context, itemStruct, username);
|
|
3343
|
+
const descriptionSelection = tiktokDescriptionFromContext(context, itemStruct);
|
|
3344
|
+
const author = tiktokAuthorFromItemStruct(itemStruct, username);
|
|
3345
|
+
const media = tiktokMediaFromContext(context, itemStruct);
|
|
3346
|
+
const publishedTime = tiktokPublishedTime(itemStruct) ?? publishedTimeFromContext(context);
|
|
3210
3347
|
return compactAdapterResult({
|
|
3211
|
-
source,
|
|
3212
|
-
platform,
|
|
3348
|
+
source: "tiktokAdapter",
|
|
3349
|
+
platform: "TikTok",
|
|
3213
3350
|
type: "social_post",
|
|
3214
|
-
siteName:
|
|
3351
|
+
siteName: "TikTok",
|
|
3215
3352
|
canonicalUrl: context.raw.openGraph.url,
|
|
3216
|
-
title:
|
|
3217
|
-
description:
|
|
3218
|
-
images: markAdapterMedia(
|
|
3219
|
-
videos: markAdapterMedia(
|
|
3220
|
-
author
|
|
3221
|
-
article: { publishedTime
|
|
3222
|
-
|
|
3353
|
+
title: titleSelection.value,
|
|
3354
|
+
description: descriptionSelection.value,
|
|
3355
|
+
images: markAdapterMedia(media.images, "tiktokAdapter"),
|
|
3356
|
+
videos: markAdapterMedia(media.videos, "tiktokAdapter"),
|
|
3357
|
+
author,
|
|
3358
|
+
article: { publishedTime },
|
|
3359
|
+
video: postId ? {
|
|
3360
|
+
id: postId,
|
|
3361
|
+
title: titleSelection.value,
|
|
3362
|
+
channel: author,
|
|
3363
|
+
publishedTime,
|
|
3364
|
+
duration: tiktokVideoDuration(itemStruct),
|
|
3365
|
+
viewCount: tiktokStatCount(itemStruct, "playCount")
|
|
3366
|
+
} : void 0,
|
|
3367
|
+
identifiers: { username, postId },
|
|
3368
|
+
raw: {
|
|
3369
|
+
extractionMethod: titleSelection.method ?? descriptionSelection.method ?? "tiktok:htmlFallback"
|
|
3370
|
+
}
|
|
3223
3371
|
});
|
|
3224
3372
|
}
|
|
3373
|
+
function tiktokTitleFromContext(context, itemStruct, username) {
|
|
3374
|
+
const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
|
|
3375
|
+
if (desc) {
|
|
3376
|
+
return { value: desc, method: "tiktok:itemStruct.desc" };
|
|
3377
|
+
}
|
|
3378
|
+
const musicTitle = tiktokMusicTitle(itemStruct);
|
|
3379
|
+
if (musicTitle) {
|
|
3380
|
+
return { value: musicTitle, method: "tiktok:itemStruct.music" };
|
|
3381
|
+
}
|
|
3382
|
+
const fallback = cleanTikTokText(titleFromContext(context, ["desc", "caption", "title", "description"]));
|
|
3383
|
+
if (fallback) {
|
|
3384
|
+
return { value: fallback, method: "tiktok:fallback" };
|
|
3385
|
+
}
|
|
3386
|
+
return {
|
|
3387
|
+
value: username ? `TikTok post by @${username}` : void 0,
|
|
3388
|
+
method: username ? "tiktok:urlFallback" : void 0
|
|
3389
|
+
};
|
|
3390
|
+
}
|
|
3391
|
+
function tiktokDescriptionFromContext(context, itemStruct) {
|
|
3392
|
+
const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
|
|
3393
|
+
if (desc) {
|
|
3394
|
+
return { value: desc, method: "tiktok:itemStruct.desc" };
|
|
3395
|
+
}
|
|
3396
|
+
return {
|
|
3397
|
+
value: cleanTikTokText(descriptionFromContext(context)),
|
|
3398
|
+
method: "tiktok:fallback"
|
|
3399
|
+
};
|
|
3400
|
+
}
|
|
3401
|
+
function tiktokItemStructFromContext(context, postId) {
|
|
3402
|
+
for (const item of context.raw.embeddedData.items) {
|
|
3403
|
+
const defaultScope = isRecord4(item.data["__DEFAULT_SCOPE__"]) ? item.data["__DEFAULT_SCOPE__"] : void 0;
|
|
3404
|
+
const videoDetail = isRecord4(defaultScope?.["webapp.video-detail"]) ? defaultScope["webapp.video-detail"] : void 0;
|
|
3405
|
+
const itemInfo = isRecord4(videoDetail?.itemInfo) ? videoDetail.itemInfo : void 0;
|
|
3406
|
+
const itemStruct = isRecord4(itemInfo?.itemStruct) ? itemInfo.itemStruct : void 0;
|
|
3407
|
+
if (itemStruct && (!postId || stringFromUnknown3(itemStruct.id) === postId)) {
|
|
3408
|
+
return itemStruct;
|
|
3409
|
+
}
|
|
3410
|
+
}
|
|
3411
|
+
let found;
|
|
3412
|
+
for (const item of context.raw.embeddedData.items) {
|
|
3413
|
+
walkData(item.data, (value, key) => {
|
|
3414
|
+
if (found || key !== "itemStruct" || !isRecord4(value)) {
|
|
3415
|
+
return;
|
|
3416
|
+
}
|
|
3417
|
+
if (!postId || stringFromUnknown3(value.id) === postId) {
|
|
3418
|
+
found = value;
|
|
3419
|
+
}
|
|
3420
|
+
});
|
|
3421
|
+
if (found) {
|
|
3422
|
+
return found;
|
|
3423
|
+
}
|
|
3424
|
+
}
|
|
3425
|
+
return void 0;
|
|
3426
|
+
}
|
|
3427
|
+
function tiktokMediaFromContext(context, itemStruct) {
|
|
3428
|
+
const discovered = mediaFromContext(context);
|
|
3429
|
+
return {
|
|
3430
|
+
images: [...tiktokImagesFromItemStruct(itemStruct), ...discovered.images],
|
|
3431
|
+
videos: [...tiktokVideosFromItemStruct(itemStruct), ...discovered.videos]
|
|
3432
|
+
};
|
|
3433
|
+
}
|
|
3434
|
+
function tiktokImagesFromItemStruct(itemStruct) {
|
|
3435
|
+
const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
|
|
3436
|
+
if (!video) {
|
|
3437
|
+
return [];
|
|
3438
|
+
}
|
|
3439
|
+
const width = numberFromUnknown(video.width);
|
|
3440
|
+
const height = numberFromUnknown(video.height);
|
|
3441
|
+
const candidates = [
|
|
3442
|
+
stringFromUnknown3(video.originCover),
|
|
3443
|
+
stringFromUnknown3(video.cover),
|
|
3444
|
+
stringFromUnknown3(video.dynamicCover),
|
|
3445
|
+
...urlsFromUnknown(video.shareCover)
|
|
3446
|
+
];
|
|
3447
|
+
return uniqueStrings3(candidates).map((url) => ({
|
|
3448
|
+
url,
|
|
3449
|
+
kind: "image",
|
|
3450
|
+
source: "applicationJson",
|
|
3451
|
+
width,
|
|
3452
|
+
height,
|
|
3453
|
+
metadata: {
|
|
3454
|
+
tiktokMediaKind: "videoCover"
|
|
3455
|
+
}
|
|
3456
|
+
}));
|
|
3457
|
+
}
|
|
3458
|
+
function tiktokVideosFromItemStruct(itemStruct) {
|
|
3459
|
+
const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
|
|
3460
|
+
if (!video) {
|
|
3461
|
+
return [];
|
|
3462
|
+
}
|
|
3463
|
+
const width = numberFromUnknown(video.width);
|
|
3464
|
+
const height = numberFromUnknown(video.height);
|
|
3465
|
+
const candidates = [
|
|
3466
|
+
stringFromUnknown3(video.playAddr),
|
|
3467
|
+
stringFromUnknown3(video.downloadAddr),
|
|
3468
|
+
...urlsFromTikTokPlayAddr(video.PlayAddrStruct),
|
|
3469
|
+
...Array.isArray(video.bitrateInfo) ? video.bitrateInfo.flatMap((item) => isRecord4(item) ? urlsFromTikTokPlayAddr(item.PlayAddr) : []) : []
|
|
3470
|
+
];
|
|
3471
|
+
return uniqueStrings3(candidates).filter((url) => /^https?:\/\//i.test(url)).map((url) => ({
|
|
3472
|
+
url,
|
|
3473
|
+
kind: "video",
|
|
3474
|
+
source: "applicationJson",
|
|
3475
|
+
width,
|
|
3476
|
+
height,
|
|
3477
|
+
type: "video/mp4",
|
|
3478
|
+
metadata: {
|
|
3479
|
+
tiktokMediaKind: "videoPlay"
|
|
3480
|
+
}
|
|
3481
|
+
}));
|
|
3482
|
+
}
|
|
3483
|
+
function urlsFromTikTokPlayAddr(value) {
|
|
3484
|
+
if (!isRecord4(value)) {
|
|
3485
|
+
return [];
|
|
3486
|
+
}
|
|
3487
|
+
return urlsFromUnknown(value.UrlList);
|
|
3488
|
+
}
|
|
3489
|
+
function tiktokAuthorFromItemStruct(itemStruct, username) {
|
|
3490
|
+
const author = isRecord4(itemStruct?.author) ? itemStruct.author : void 0;
|
|
3491
|
+
const name = stringFromUnknown3(author?.nickname) ?? stringFromUnknown3(author?.uniqueId) ?? username;
|
|
3492
|
+
if (!name) {
|
|
3493
|
+
return void 0;
|
|
3494
|
+
}
|
|
3495
|
+
return {
|
|
3496
|
+
name,
|
|
3497
|
+
url: username ? `https://www.tiktok.com/@${username}` : void 0
|
|
3498
|
+
};
|
|
3499
|
+
}
|
|
3500
|
+
function tiktokPublishedTime(itemStruct) {
|
|
3501
|
+
const created = numberFromUnknown(itemStruct?.createTime);
|
|
3502
|
+
return created ? new Date(created * 1e3).toISOString() : void 0;
|
|
3503
|
+
}
|
|
3504
|
+
function tiktokVideoDuration(itemStruct) {
|
|
3505
|
+
const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
|
|
3506
|
+
return stringFromUnknown3(video?.duration);
|
|
3507
|
+
}
|
|
3508
|
+
function tiktokStatCount(itemStruct, key) {
|
|
3509
|
+
const stats = isRecord4(itemStruct?.stats) ? itemStruct.stats : void 0;
|
|
3510
|
+
return numberFromUnknown(stats?.[key]);
|
|
3511
|
+
}
|
|
3512
|
+
function tiktokMusicTitle(itemStruct) {
|
|
3513
|
+
const music = isRecord4(itemStruct?.music) ? itemStruct.music : void 0;
|
|
3514
|
+
const title = cleanTikTokText(stringFromUnknown3(music?.title));
|
|
3515
|
+
const author = cleanTikTokText(stringFromUnknown3(music?.authorName));
|
|
3516
|
+
if (!title) {
|
|
3517
|
+
return void 0;
|
|
3518
|
+
}
|
|
3519
|
+
if (author && !/original sound/i.test(title)) {
|
|
3520
|
+
return `${title} - ${author}`;
|
|
3521
|
+
}
|
|
3522
|
+
return title;
|
|
3523
|
+
}
|
|
3524
|
+
function cleanTikTokText(value) {
|
|
3525
|
+
const cleaned = value?.replace(/\s+/g, " ").trim();
|
|
3526
|
+
if (!cleaned || isLowQualityTikTokText(cleaned)) {
|
|
3527
|
+
return void 0;
|
|
3528
|
+
}
|
|
3529
|
+
return cleaned;
|
|
3530
|
+
}
|
|
3531
|
+
function isLowQualityTikTokText(value) {
|
|
3532
|
+
return /\{[^}]+\}/.test(value) || /tiktok\s*shop|free shipping|eligible items|exclusive collections/i.test(value) || /^tiktok live creator networks$/i.test(value) || /^tiktok\s*-\s*make your day$/i.test(value) || /^discover (?:new |popular )?videos?\b.*\btiktok\b/i.test(value);
|
|
3533
|
+
}
|
|
3534
|
+
function urlsFromUnknown(value) {
|
|
3535
|
+
if (typeof value === "string" && value.trim()) {
|
|
3536
|
+
return [value.trim()];
|
|
3537
|
+
}
|
|
3538
|
+
if (Array.isArray(value)) {
|
|
3539
|
+
return value.flatMap(urlsFromUnknown);
|
|
3540
|
+
}
|
|
3541
|
+
return [];
|
|
3542
|
+
}
|
|
3543
|
+
function uniqueStrings3(values) {
|
|
3544
|
+
return [...new Set(values.filter((value) => Boolean(value)))];
|
|
3545
|
+
}
|
|
3225
3546
|
function normalizePlatformResult(rawData) {
|
|
3226
3547
|
const type = rawData.type ?? inferAdapterType(rawData);
|
|
3227
3548
|
return compactAdapterResult({
|
|
@@ -3572,6 +3893,20 @@ function parseRedditUrl(url) {
|
|
|
3572
3893
|
function cleanSocialTitle(title) {
|
|
3573
3894
|
return title?.replace(/\s*:\s*r\/[A-Za-z0-9_]+$/i, "").trim();
|
|
3574
3895
|
}
|
|
3896
|
+
function cleanRedditTitle(title) {
|
|
3897
|
+
const cleaned = cleanSocialTitle(title);
|
|
3898
|
+
if (!cleaned || /reddit\s*-\s*please wait for verification|please wait for verification|whoa there, pardner/i.test(cleaned)) {
|
|
3899
|
+
return void 0;
|
|
3900
|
+
}
|
|
3901
|
+
return cleaned;
|
|
3902
|
+
}
|
|
3903
|
+
function cleanRedditDescription(description) {
|
|
3904
|
+
const cleaned = description?.replace(/\s+/g, " ").trim();
|
|
3905
|
+
if (!cleaned || /please wait for verification|whoa there, pardner|request has been blocked/i.test(cleaned)) {
|
|
3906
|
+
return void 0;
|
|
3907
|
+
}
|
|
3908
|
+
return cleaned;
|
|
3909
|
+
}
|
|
3575
3910
|
function hostMatches(url, domains) {
|
|
3576
3911
|
const host = url.hostname.toLowerCase().replace(/^www\./, "");
|
|
3577
3912
|
return domains.some((domain) => host === domain || host.endsWith(`.${domain}`));
|
|
@@ -3961,12 +4296,17 @@ function ascii(bytes, offset, length) {
|
|
|
3961
4296
|
}
|
|
3962
4297
|
|
|
3963
4298
|
// src/fetchMetadata.ts
|
|
4299
|
+
var REDDIT_BLOCKED_METADATA_WARNING = "Reddit returned a verification/block page; metadata is incomplete.";
|
|
4300
|
+
var PROVIDER_BLOCKED_SUGGESTED_ACTION = "retry_on_different_host_or_use_supported_proxy";
|
|
3964
4301
|
async function fetchMetadata(url, options = {}) {
|
|
3965
4302
|
const startedAt = Date.now();
|
|
3966
4303
|
try {
|
|
3967
4304
|
const requestedUrl = normalizeUrl(url);
|
|
3968
4305
|
const fetchResult = await fetchPageWithStrategies(requestedUrl, options);
|
|
3969
4306
|
const page = fetchResult.page;
|
|
4307
|
+
if (fetchResult.providerDiagnostics?.blocked) {
|
|
4308
|
+
return createBlockedProviderMetadata(requestedUrl, fetchResult, Date.now() - startedAt);
|
|
4309
|
+
}
|
|
3970
4310
|
const directMedia = createDirectMediaMetadata(page, requestedUrl, Date.now() - startedAt);
|
|
3971
4311
|
if (directMedia) {
|
|
3972
4312
|
return directMedia;
|
|
@@ -3993,7 +4333,7 @@ async function fetchMetadata(url, options = {}) {
|
|
|
3993
4333
|
...metadata.canonicalUrl ? ["resolved canonical URL"] : []
|
|
3994
4334
|
];
|
|
3995
4335
|
metadata.diagnostics.fallbacksAttempted = mergeFallbackAttempts2(metadata.diagnostics.fallbacksAttempted, fetchResult.fallbacksAttempted);
|
|
3996
|
-
metadata.diagnostics.sourcePriority =
|
|
4336
|
+
metadata.diagnostics.sourcePriority = uniqueStrings4([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
|
|
3997
4337
|
metadata.diagnostics.extractionMethod = metadata.diagnostics.extractionMethod ?? fetchResult.extractionMethod;
|
|
3998
4338
|
metadata.diagnostics.retryInfo = metadata.diagnostics.retryInfo ?? fetchResult.retryInfo;
|
|
3999
4339
|
metadata.trace = metadata.diagnostics.trace;
|
|
@@ -4035,8 +4375,64 @@ async function fetchMetadata(url, options = {}) {
|
|
|
4035
4375
|
};
|
|
4036
4376
|
}
|
|
4037
4377
|
}
|
|
4378
|
+
function createBlockedProviderMetadata(requestedUrl, fetchResult, fetchDurationMs) {
|
|
4379
|
+
const page = fetchResult.page;
|
|
4380
|
+
const providerDiagnostics = fetchResult.providerDiagnostics;
|
|
4381
|
+
const trace = uniqueStrings4([
|
|
4382
|
+
...page.isShortUrl ? [`detected short URL provider: ${page.shortUrlProvider ?? "unknown"}`] : [],
|
|
4383
|
+
...page.redirects.length > 0 ? [`resolved ${page.redirects.length} redirect${page.redirects.length === 1 ? "" : "s"}`] : [],
|
|
4384
|
+
...fetchResult.trace,
|
|
4385
|
+
"detected blocked provider response"
|
|
4386
|
+
]);
|
|
4387
|
+
const warnings = uniqueStrings4([
|
|
4388
|
+
...fetchResult.warnings,
|
|
4389
|
+
REDDIT_BLOCKED_METADATA_WARNING,
|
|
4390
|
+
...page.statusCode < 200 || page.statusCode >= 300 ? [`Fetch completed with non-success status code ${page.statusCode}.`] : []
|
|
4391
|
+
]);
|
|
4392
|
+
return {
|
|
4393
|
+
ok: false,
|
|
4394
|
+
url: requestedUrl,
|
|
4395
|
+
finalUrl: page.finalUrl,
|
|
4396
|
+
type: "unknown",
|
|
4397
|
+
siteName: providerDiagnostics?.platform === "reddit" ? "Reddit" : void 0,
|
|
4398
|
+
confidence: 0,
|
|
4399
|
+
completeness: 0,
|
|
4400
|
+
reliability: 0,
|
|
4401
|
+
images: [],
|
|
4402
|
+
videos: [],
|
|
4403
|
+
audio: [],
|
|
4404
|
+
favicons: [],
|
|
4405
|
+
trace,
|
|
4406
|
+
diagnostics: {
|
|
4407
|
+
originalUrl: requestedUrl,
|
|
4408
|
+
finalUrl: page.finalUrl,
|
|
4409
|
+
isShortUrl: page.isShortUrl,
|
|
4410
|
+
shortUrlProvider: page.shortUrlProvider,
|
|
4411
|
+
statusCode: page.statusCode,
|
|
4412
|
+
contentType: page.contentType,
|
|
4413
|
+
redirects: page.redirects,
|
|
4414
|
+
sourcesUsed: [],
|
|
4415
|
+
warnings,
|
|
4416
|
+
fallbacksAttempted: mergeFallbackAttempts2(void 0, fetchResult.fallbacksAttempted),
|
|
4417
|
+
trace,
|
|
4418
|
+
sourcePriority: fetchResult.sourcePriority,
|
|
4419
|
+
extractionMethod: fetchResult.extractionMethod,
|
|
4420
|
+
retryInfo: fetchResult.retryInfo,
|
|
4421
|
+
providerDiagnostics,
|
|
4422
|
+
confidenceBreakdown: {
|
|
4423
|
+
title: 0,
|
|
4424
|
+
description: 0,
|
|
4425
|
+
image: 0,
|
|
4426
|
+
structuredData: 0,
|
|
4427
|
+
adapter: 0
|
|
4428
|
+
},
|
|
4429
|
+
fetchDurationMs,
|
|
4430
|
+
extractedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
4431
|
+
}
|
|
4432
|
+
};
|
|
4433
|
+
}
|
|
4038
4434
|
async function fetchPageWithStrategies(requestedUrl, options) {
|
|
4039
|
-
if (
|
|
4435
|
+
if (isRedditUrl2(requestedUrl)) {
|
|
4040
4436
|
return fetchRedditPageWithStrategy(requestedUrl, options);
|
|
4041
4437
|
}
|
|
4042
4438
|
return {
|
|
@@ -4049,6 +4445,7 @@ async function fetchPageWithStrategies(requestedUrl, options) {
|
|
|
4049
4445
|
async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
4050
4446
|
const attempts = [];
|
|
4051
4447
|
const warnings = [];
|
|
4448
|
+
const informationalFallbacks = [];
|
|
4052
4449
|
const sourcePriority = ["redditJsonEndpoint", "oldReddit", "embeddedStructuredData", "openGraph", "html"];
|
|
4053
4450
|
let lastError;
|
|
4054
4451
|
const jsonUrl = redditJsonEndpoint(requestedUrl);
|
|
@@ -4059,7 +4456,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
|
4059
4456
|
});
|
|
4060
4457
|
attempts.push(attempt);
|
|
4061
4458
|
lastError = attempt.error;
|
|
4062
|
-
if (attempt.page && attempt.ok
|
|
4459
|
+
if (attempt.page && attempt.ok) {
|
|
4063
4460
|
const redditPost = parseRedditJsonPayload(attempt.page.html);
|
|
4064
4461
|
if (redditPost?.title) {
|
|
4065
4462
|
return {
|
|
@@ -4074,7 +4471,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
|
4074
4471
|
}
|
|
4075
4472
|
warnings.push("Reddit JSON endpoint responded, but no post payload could be extracted.");
|
|
4076
4473
|
} else if (attempt.blocked) {
|
|
4077
|
-
|
|
4474
|
+
informationalFallbacks.push("Informational fallback: Reddit JSON endpoint appears to have blocked access; continuing with fallback extraction.");
|
|
4078
4475
|
}
|
|
4079
4476
|
}
|
|
4080
4477
|
const oldRedditUrl = redditOldUrl(requestedUrl);
|
|
@@ -4082,12 +4479,12 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
|
4082
4479
|
const attempt = await attemptFetch("oldReddit", oldRedditUrl, options);
|
|
4083
4480
|
attempts.push(attempt);
|
|
4084
4481
|
lastError = attempt.error;
|
|
4085
|
-
if (attempt.page && attempt.ok
|
|
4482
|
+
if (attempt.page && attempt.ok) {
|
|
4086
4483
|
return {
|
|
4087
4484
|
page: attempt.page,
|
|
4088
4485
|
fallbacksAttempted: attempts,
|
|
4089
4486
|
warnings,
|
|
4090
|
-
trace: ["retried Reddit page through old.reddit"],
|
|
4487
|
+
trace: [...informationalFallbacks, "retried Reddit page through old.reddit"],
|
|
4091
4488
|
sourcePriority,
|
|
4092
4489
|
extractionMethod: "reddit:oldReddit",
|
|
4093
4490
|
retryInfo: redditRetryInfo(attempts)
|
|
@@ -4100,33 +4497,48 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
|
4100
4497
|
const htmlAttempt = await attemptFetch("redditHtmlFallback", requestedUrl, options);
|
|
4101
4498
|
attempts.push(htmlAttempt);
|
|
4102
4499
|
lastError = htmlAttempt.error;
|
|
4103
|
-
if (htmlAttempt.page) {
|
|
4104
|
-
if (htmlAttempt.blocked) {
|
|
4105
|
-
warnings.push("Reddit HTML fallback appears to have been blocked; metadata may be incomplete.");
|
|
4106
|
-
}
|
|
4500
|
+
if (htmlAttempt.page && htmlAttempt.ok) {
|
|
4107
4501
|
return {
|
|
4108
4502
|
page: htmlAttempt.page,
|
|
4109
4503
|
fallbacksAttempted: attempts,
|
|
4110
4504
|
warnings,
|
|
4111
|
-
trace: ["used Reddit HTML fallback"],
|
|
4505
|
+
trace: [...informationalFallbacks, "used Reddit HTML fallback"],
|
|
4112
4506
|
sourcePriority,
|
|
4113
4507
|
extractionMethod: "reddit:htmlFallback",
|
|
4114
4508
|
retryInfo: redditRetryInfo(attempts)
|
|
4115
4509
|
};
|
|
4116
4510
|
}
|
|
4511
|
+
if (htmlAttempt.blocked) {
|
|
4512
|
+
warnings.push("Reddit HTML fallback appears to have been blocked; metadata may be incomplete.");
|
|
4513
|
+
}
|
|
4514
|
+
const providerDiagnostics = redditProviderDiagnosticsFromAttempts(attempts);
|
|
4515
|
+
if (providerDiagnostics) {
|
|
4516
|
+
return {
|
|
4517
|
+
page: synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics),
|
|
4518
|
+
fallbacksAttempted: attempts,
|
|
4519
|
+
warnings: uniqueStrings4([...warnings, REDDIT_BLOCKED_METADATA_WARNING]),
|
|
4520
|
+
trace: [...informationalFallbacks, "Reddit provider blocked metadata extraction"],
|
|
4521
|
+
sourcePriority,
|
|
4522
|
+
extractionMethod: "reddit:blockedProvider",
|
|
4523
|
+
retryInfo: redditRetryInfo(attempts),
|
|
4524
|
+
providerDiagnostics
|
|
4525
|
+
};
|
|
4526
|
+
}
|
|
4117
4527
|
throw lastError ?? new Error("All Reddit extraction fetch attempts failed.");
|
|
4118
4528
|
}
|
|
4119
4529
|
async function attemptFetch(method, url, options) {
|
|
4120
4530
|
try {
|
|
4121
4531
|
const page = await fetchPage(url, options);
|
|
4122
4532
|
const retryAfter = page.headers["retry-after"];
|
|
4123
|
-
const
|
|
4533
|
+
const blockReason = redditBlockReason(page);
|
|
4534
|
+
const blocked = Boolean(blockReason);
|
|
4124
4535
|
return {
|
|
4125
4536
|
method,
|
|
4126
4537
|
url,
|
|
4127
4538
|
ok: page.statusCode >= 200 && page.statusCode < 300 && !blocked,
|
|
4128
4539
|
statusCode: page.statusCode,
|
|
4129
4540
|
blocked,
|
|
4541
|
+
blockReason,
|
|
4130
4542
|
retryAfter,
|
|
4131
4543
|
page
|
|
4132
4544
|
};
|
|
@@ -4139,7 +4551,7 @@ async function attemptFetch(method, url, options) {
|
|
|
4139
4551
|
};
|
|
4140
4552
|
}
|
|
4141
4553
|
}
|
|
4142
|
-
function
|
|
4554
|
+
function isRedditUrl2(url) {
|
|
4143
4555
|
try {
|
|
4144
4556
|
const host = new URL(url).hostname.toLowerCase().replace(/^www\./, "");
|
|
4145
4557
|
return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
|
|
@@ -4255,46 +4667,163 @@ function findRedditPostRecord(value) {
|
|
|
4255
4667
|
return void 0;
|
|
4256
4668
|
}
|
|
4257
4669
|
function redditImagesFromPost(post) {
|
|
4258
|
-
const images = [
|
|
4670
|
+
const images = [
|
|
4671
|
+
...redditGalleryImagesFromPost(post),
|
|
4672
|
+
...redditDirectImagesFromPost(post)
|
|
4673
|
+
];
|
|
4259
4674
|
const preview = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
|
|
4260
4675
|
for (const image of preview) {
|
|
4261
4676
|
if (!isRecord5(image)) {
|
|
4262
4677
|
continue;
|
|
4263
4678
|
}
|
|
4264
|
-
|
|
4265
|
-
|
|
4266
|
-
|
|
4267
|
-
|
|
4268
|
-
|
|
4269
|
-
|
|
4270
|
-
|
|
4271
|
-
|
|
4272
|
-
images.push(
|
|
4273
|
-
url,
|
|
4274
|
-
kind: "image",
|
|
4275
|
-
source: "adapter",
|
|
4276
|
-
width: numberFromUnknown2(candidate.width),
|
|
4277
|
-
height: numberFromUnknown2(candidate.height),
|
|
4278
|
-
metadata: {
|
|
4279
|
-
adapter: "redditJsonEndpoint",
|
|
4280
|
-
originalSource: "redditJsonEndpoint"
|
|
4281
|
-
}
|
|
4282
|
-
});
|
|
4679
|
+
const source = redditImageFromRecord(image.source, "previewOriginal");
|
|
4680
|
+
if (source) {
|
|
4681
|
+
images.push(source);
|
|
4682
|
+
continue;
|
|
4683
|
+
}
|
|
4684
|
+
const fallbackResolution = largestRedditImageRecord(Array.isArray(image.resolutions) ? image.resolutions : []);
|
|
4685
|
+
const fallback = redditImageFromRecord(fallbackResolution, "previewResolution");
|
|
4686
|
+
if (fallback) {
|
|
4687
|
+
images.push(fallback);
|
|
4283
4688
|
}
|
|
4284
4689
|
}
|
|
4285
4690
|
const thumbnail = redditMediaUrl(stringFromUnknown4(post.thumbnail));
|
|
4286
4691
|
if (thumbnail && /^https?:\/\//i.test(thumbnail)) {
|
|
4287
|
-
|
|
4288
|
-
|
|
4289
|
-
|
|
4290
|
-
|
|
4291
|
-
|
|
4292
|
-
|
|
4293
|
-
|
|
4294
|
-
|
|
4295
|
-
|
|
4692
|
+
const thumbnailAsset = redditImageAsset(thumbnail, void 0, void 0, "thumbnail");
|
|
4693
|
+
if (thumbnailAsset) {
|
|
4694
|
+
images.push(thumbnailAsset);
|
|
4695
|
+
}
|
|
4696
|
+
}
|
|
4697
|
+
return dedupeRedditImages(prioritizeRedditImages(images));
|
|
4698
|
+
}
|
|
4699
|
+
function redditGalleryImagesFromPost(post) {
|
|
4700
|
+
const mediaMetadata = isRecord5(post.media_metadata) ? post.media_metadata : void 0;
|
|
4701
|
+
if (!mediaMetadata) {
|
|
4702
|
+
return [];
|
|
4296
4703
|
}
|
|
4297
|
-
|
|
4704
|
+
const galleryItems = isRecord5(post.gallery_data) && Array.isArray(post.gallery_data.items) ? post.gallery_data.items : [];
|
|
4705
|
+
const orderedIds = galleryItems.map((item) => isRecord5(item) ? stringFromUnknown4(item.media_id) : void 0).filter((item) => Boolean(item));
|
|
4706
|
+
const seenIds = /* @__PURE__ */ new Set();
|
|
4707
|
+
const assets = [];
|
|
4708
|
+
for (const id of orderedIds) {
|
|
4709
|
+
const asset = redditImageFromMediaMetadata(mediaMetadata[id], id);
|
|
4710
|
+
if (asset) {
|
|
4711
|
+
assets.push(asset);
|
|
4712
|
+
seenIds.add(id);
|
|
4713
|
+
}
|
|
4714
|
+
}
|
|
4715
|
+
for (const [id, value] of Object.entries(mediaMetadata)) {
|
|
4716
|
+
if (seenIds.has(id)) {
|
|
4717
|
+
continue;
|
|
4718
|
+
}
|
|
4719
|
+
const asset = redditImageFromMediaMetadata(value, id);
|
|
4720
|
+
if (asset) {
|
|
4721
|
+
assets.push(asset);
|
|
4722
|
+
}
|
|
4723
|
+
}
|
|
4724
|
+
return assets;
|
|
4725
|
+
}
|
|
4726
|
+
function redditDirectImagesFromPost(post) {
|
|
4727
|
+
const url = redditMediaUrl(stringFromUnknown4(post.url_overridden_by_dest) ?? stringFromUnknown4(post.url));
|
|
4728
|
+
if (!url || !isDirectRedditImageUrl(url)) {
|
|
4729
|
+
return [];
|
|
4730
|
+
}
|
|
4731
|
+
const previewSource = previewSourceRecord(post);
|
|
4732
|
+
const asset = redditImageAsset(
|
|
4733
|
+
url,
|
|
4734
|
+
numberFromUnknown2(previewSource?.width),
|
|
4735
|
+
numberFromUnknown2(previewSource?.height),
|
|
4736
|
+
"directImage"
|
|
4737
|
+
);
|
|
4738
|
+
return asset ? [asset] : [];
|
|
4739
|
+
}
|
|
4740
|
+
function redditImageFromMediaMetadata(value, mediaId) {
|
|
4741
|
+
if (!isRecord5(value)) {
|
|
4742
|
+
return void 0;
|
|
4743
|
+
}
|
|
4744
|
+
const source = isRecord5(value.s) ? value.s : void 0;
|
|
4745
|
+
const url = redditMediaUrl(
|
|
4746
|
+
stringFromUnknown4(source?.u) ?? stringFromUnknown4(source?.gif) ?? stringFromUnknown4(source?.mp4)
|
|
4747
|
+
);
|
|
4748
|
+
if (!url) {
|
|
4749
|
+
return void 0;
|
|
4750
|
+
}
|
|
4751
|
+
const asset = redditImageAsset(
|
|
4752
|
+
url,
|
|
4753
|
+
numberFromUnknown2(source?.x) ?? numberFromUnknown2(source?.width),
|
|
4754
|
+
numberFromUnknown2(source?.y) ?? numberFromUnknown2(source?.height),
|
|
4755
|
+
"gallery",
|
|
4756
|
+
stringFromUnknown4(value.m)
|
|
4757
|
+
);
|
|
4758
|
+
return asset ? {
|
|
4759
|
+
...asset,
|
|
4760
|
+
metadata: {
|
|
4761
|
+
...asset.metadata,
|
|
4762
|
+
redditMediaId: mediaId
|
|
4763
|
+
}
|
|
4764
|
+
} : void 0;
|
|
4765
|
+
}
|
|
4766
|
+
function redditImageFromRecord(value, redditMediaKind) {
|
|
4767
|
+
if (!isRecord5(value)) {
|
|
4768
|
+
return void 0;
|
|
4769
|
+
}
|
|
4770
|
+
const url = redditMediaUrl(stringFromUnknown4(value.url) ?? stringFromUnknown4(value.u));
|
|
4771
|
+
if (!url) {
|
|
4772
|
+
return void 0;
|
|
4773
|
+
}
|
|
4774
|
+
return redditImageAsset(
|
|
4775
|
+
url,
|
|
4776
|
+
numberFromUnknown2(value.width) ?? numberFromUnknown2(value.x),
|
|
4777
|
+
numberFromUnknown2(value.height) ?? numberFromUnknown2(value.y),
|
|
4778
|
+
redditMediaKind
|
|
4779
|
+
);
|
|
4780
|
+
}
|
|
4781
|
+
function redditImageAsset(url, width, height, redditMediaKind, type) {
|
|
4782
|
+
const asset = {
|
|
4783
|
+
url,
|
|
4784
|
+
kind: "image",
|
|
4785
|
+
source: "adapter",
|
|
4786
|
+
width,
|
|
4787
|
+
height,
|
|
4788
|
+
type,
|
|
4789
|
+
metadata: {
|
|
4790
|
+
adapter: "redditJsonEndpoint",
|
|
4791
|
+
originalSource: "redditJsonEndpoint",
|
|
4792
|
+
redditMediaKind
|
|
4793
|
+
}
|
|
4794
|
+
};
|
|
4795
|
+
return isAllowedRedditImageCandidate(asset) ? asset : void 0;
|
|
4796
|
+
}
|
|
4797
|
+
function largestRedditImageRecord(values) {
|
|
4798
|
+
return values.filter(isRecord5).sort(
|
|
4799
|
+
(left, right) => (numberFromUnknown2(right.width) ?? 0) * (numberFromUnknown2(right.height) ?? 0) - (numberFromUnknown2(left.width) ?? 0) * (numberFromUnknown2(left.height) ?? 0)
|
|
4800
|
+
)[0];
|
|
4801
|
+
}
|
|
4802
|
+
function previewSourceRecord(post) {
|
|
4803
|
+
const images = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
|
|
4804
|
+
const firstImage = images.find(isRecord5);
|
|
4805
|
+
return firstImage && isRecord5(firstImage.source) ? firstImage.source : void 0;
|
|
4806
|
+
}
|
|
4807
|
+
function isDirectRedditImageUrl(value) {
|
|
4808
|
+
try {
|
|
4809
|
+
const parsed = new URL(value);
|
|
4810
|
+
const host = parsed.hostname.toLowerCase();
|
|
4811
|
+
return host === "i.redd.it" || host === "preview.redd.it";
|
|
4812
|
+
} catch {
|
|
4813
|
+
return false;
|
|
4814
|
+
}
|
|
4815
|
+
}
|
|
4816
|
+
function dedupeRedditImages(images) {
|
|
4817
|
+
const seen = /* @__PURE__ */ new Set();
|
|
4818
|
+
const unique = [];
|
|
4819
|
+
for (const image of images) {
|
|
4820
|
+
if (seen.has(image.url)) {
|
|
4821
|
+
continue;
|
|
4822
|
+
}
|
|
4823
|
+
seen.add(image.url);
|
|
4824
|
+
unique.push(image);
|
|
4825
|
+
}
|
|
4826
|
+
return unique;
|
|
4298
4827
|
}
|
|
4299
4828
|
function redditVideosFromPost(post) {
|
|
4300
4829
|
const videos = [];
|
|
@@ -4321,7 +4850,9 @@ function redditVideosFromPost(post) {
|
|
|
4321
4850
|
}
|
|
4322
4851
|
function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
|
|
4323
4852
|
const finalUrl = post.canonicalUrl ?? requestedUrl;
|
|
4324
|
-
const bestImage = post.images.
|
|
4853
|
+
const bestImage = post.images.slice().sort(
|
|
4854
|
+
(left, right) => redditImagePriority(right) - redditImagePriority(left) || (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0)
|
|
4855
|
+
)[0];
|
|
4325
4856
|
const video = post.videos[0];
|
|
4326
4857
|
const structuredData = {
|
|
4327
4858
|
"@context": "https://schema.org",
|
|
@@ -4377,8 +4908,52 @@ function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
|
|
|
4377
4908
|
statusCode: jsonPage.statusCode
|
|
4378
4909
|
};
|
|
4379
4910
|
}
|
|
4380
|
-
function
|
|
4381
|
-
|
|
4911
|
+
function redditProviderDiagnosticsFromAttempts(attempts) {
|
|
4912
|
+
const blockedAttempts = attempts.filter((attempt) => attempt.blocked);
|
|
4913
|
+
if (blockedAttempts.length === 0) {
|
|
4914
|
+
return void 0;
|
|
4915
|
+
}
|
|
4916
|
+
const selectedAttempt = blockedAttempts.find((attempt) => attempt.blockReason === "provider_verification_required") ?? blockedAttempts.at(-1);
|
|
4917
|
+
return {
|
|
4918
|
+
platform: "reddit",
|
|
4919
|
+
blocked: true,
|
|
4920
|
+
statusCode: selectedAttempt?.statusCode,
|
|
4921
|
+
reason: selectedAttempt?.blockReason ?? "provider_blocked_request",
|
|
4922
|
+
suggestedAction: PROVIDER_BLOCKED_SUGGESTED_ACTION
|
|
4923
|
+
};
|
|
4924
|
+
}
|
|
4925
|
+
function synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics) {
|
|
4926
|
+
const selectedPage = attempts.find((attempt) => attempt.blockReason === providerDiagnostics.reason)?.page ?? attempts.slice().reverse().find((attempt) => attempt.page)?.page;
|
|
4927
|
+
return {
|
|
4928
|
+
url: requestedUrl,
|
|
4929
|
+
originalUrl: requestedUrl,
|
|
4930
|
+
finalUrl: requestedUrl,
|
|
4931
|
+
isShortUrl: selectedPage?.isShortUrl ?? false,
|
|
4932
|
+
shortUrlProvider: selectedPage?.shortUrlProvider,
|
|
4933
|
+
html: "",
|
|
4934
|
+
bytes: new Uint8Array(),
|
|
4935
|
+
statusCode: providerDiagnostics.statusCode ?? selectedPage?.statusCode ?? 403,
|
|
4936
|
+
contentType: selectedPage?.contentType,
|
|
4937
|
+
redirects: selectedPage?.redirects ?? [],
|
|
4938
|
+
headers: selectedPage?.headers ?? {}
|
|
4939
|
+
};
|
|
4940
|
+
}
|
|
4941
|
+
function redditBlockReason(page) {
|
|
4942
|
+
const title = htmlTitle(page.html);
|
|
4943
|
+
const text = normalizeText(`${title ?? ""} ${page.html}`);
|
|
4944
|
+
if (/reddit\s*-\s*please wait for verification/i.test(title ?? "") || /please wait for verification|verification required|verify you are human/i.test(text)) {
|
|
4945
|
+
return "provider_verification_required";
|
|
4946
|
+
}
|
|
4947
|
+
if (page.statusCode === 403 || page.statusCode === 429 || /whoa there, pardner|request has been blocked|too many requests|forbidden|you're blocked|you are blocked|youre blocked|blocked by network security/i.test(text) || /^blocked$/i.test(title ?? "")) {
|
|
4948
|
+
return "provider_blocked_request";
|
|
4949
|
+
}
|
|
4950
|
+
return void 0;
|
|
4951
|
+
}
|
|
4952
|
+
function htmlTitle(html) {
|
|
4953
|
+
return normalizeText(html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]);
|
|
4954
|
+
}
|
|
4955
|
+
function normalizeText(value) {
|
|
4956
|
+
return value?.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() ?? "";
|
|
4382
4957
|
}
|
|
4383
4958
|
function redditRetryInfo(attempts) {
|
|
4384
4959
|
const blockedAttempts = attempts.filter((attempt) => attempt.blocked || attempt.statusCode === 429 || attempt.statusCode === 403);
|
|
@@ -4412,7 +4987,7 @@ function mergeFallbackAttempts2(existing, incoming) {
|
|
|
4412
4987
|
}
|
|
4413
4988
|
const seen = /* @__PURE__ */ new Set();
|
|
4414
4989
|
return attempts.map((value) => {
|
|
4415
|
-
const { page: _page, ...attempt } = value;
|
|
4990
|
+
const { page: _page, blockReason: _blockReason, ...attempt } = value;
|
|
4416
4991
|
return attempt;
|
|
4417
4992
|
}).filter((attempt) => {
|
|
4418
4993
|
const key = `${attempt.method}:${attempt.url ?? ""}:${attempt.statusCode ?? ""}:${attempt.error ?? ""}`;
|
|
@@ -4423,7 +4998,7 @@ function mergeFallbackAttempts2(existing, incoming) {
|
|
|
4423
4998
|
return true;
|
|
4424
4999
|
});
|
|
4425
5000
|
}
|
|
4426
|
-
function
|
|
5001
|
+
function uniqueStrings4(values) {
|
|
4427
5002
|
return [...new Set(values.filter((value) => Boolean(value)))];
|
|
4428
5003
|
}
|
|
4429
5004
|
function redditMediaUrl(value) {
|