metanova 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +7 -0
- package/dist/index.cjs +644 -69
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +9 -1
- package/dist/index.d.ts +9 -1
- package/dist/index.js +644 -69
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1239,6 +1239,95 @@ function uniqueStrings(values) {
|
|
|
1239
1239
|
return [...new Set(values.filter(Boolean))];
|
|
1240
1240
|
}
|
|
1241
1241
|
|
|
1242
|
+
// src/utils/redditMedia.ts
|
|
1243
|
+
var REDDIT_BAD_IMAGE_URL_PATTERN = /thumbs\.redditmedia\.com|avatar|community_icon|subreddit|icon|award|emoji/i;
|
|
1244
|
+
var TRUSTED_REDDIT_SOURCES = /* @__PURE__ */ new Set([
|
|
1245
|
+
"adapter",
|
|
1246
|
+
"openGraph",
|
|
1247
|
+
"twitter",
|
|
1248
|
+
"jsonLd",
|
|
1249
|
+
"oEmbed",
|
|
1250
|
+
"nextData",
|
|
1251
|
+
"nuxt",
|
|
1252
|
+
"initialState",
|
|
1253
|
+
"preloadedState",
|
|
1254
|
+
"apollo",
|
|
1255
|
+
"applicationJson",
|
|
1256
|
+
"jsonScript"
|
|
1257
|
+
]);
|
|
1258
|
+
function isRedditUrl(value) {
|
|
1259
|
+
try {
|
|
1260
|
+
const host = new URL(value).hostname.toLowerCase().replace(/^www\./, "");
|
|
1261
|
+
return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
|
|
1262
|
+
} catch {
|
|
1263
|
+
return false;
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
function filterRedditImageCandidates(images) {
|
|
1267
|
+
const allowed = images.filter(isAllowedRedditImageCandidate);
|
|
1268
|
+
const trusted = allowed.filter(isTrustedRedditImageCandidate);
|
|
1269
|
+
return prioritizeRedditImages(trusted.length > 0 ? trusted : allowed);
|
|
1270
|
+
}
|
|
1271
|
+
function prioritizeRedditImages(images) {
|
|
1272
|
+
return images.slice().sort(
|
|
1273
|
+
(left, right) => redditImagePriority(right) - redditImagePriority(left) || imageArea(right) - imageArea(left)
|
|
1274
|
+
);
|
|
1275
|
+
}
|
|
1276
|
+
function isAllowedRedditImageCandidate(image) {
|
|
1277
|
+
if (image.width !== void 0 && image.width < 200) {
|
|
1278
|
+
return false;
|
|
1279
|
+
}
|
|
1280
|
+
if (image.height !== void 0 && image.height < 200) {
|
|
1281
|
+
return false;
|
|
1282
|
+
}
|
|
1283
|
+
return !REDDIT_BAD_IMAGE_URL_PATTERN.test(image.url);
|
|
1284
|
+
}
|
|
1285
|
+
function redditImagePriority(image) {
|
|
1286
|
+
const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
|
|
1287
|
+
const url = image.url.toLowerCase();
|
|
1288
|
+
if (mediaKind === "gallery") {
|
|
1289
|
+
return 700;
|
|
1290
|
+
}
|
|
1291
|
+
if (mediaKind === "previewOriginal") {
|
|
1292
|
+
return 620;
|
|
1293
|
+
}
|
|
1294
|
+
if (mediaKind === "directImage") {
|
|
1295
|
+
return 580;
|
|
1296
|
+
}
|
|
1297
|
+
if (/\/\/preview\.redd\.it\//i.test(url)) {
|
|
1298
|
+
return 560;
|
|
1299
|
+
}
|
|
1300
|
+
if (/\/\/i\.redd\.it\//i.test(url)) {
|
|
1301
|
+
return 540;
|
|
1302
|
+
}
|
|
1303
|
+
if (image.source === "openGraph" || image.source === "twitter") {
|
|
1304
|
+
return 420;
|
|
1305
|
+
}
|
|
1306
|
+
if (/\/\/external-preview\.redd\.it\//i.test(url)) {
|
|
1307
|
+
return 300;
|
|
1308
|
+
}
|
|
1309
|
+
if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
|
|
1310
|
+
return 1;
|
|
1311
|
+
}
|
|
1312
|
+
return isRedditMediaUrl(url) ? 250 : 0;
|
|
1313
|
+
}
|
|
1314
|
+
function hasRedditImageContext(images) {
|
|
1315
|
+
return images.some((image) => {
|
|
1316
|
+
const adapter = typeof image.metadata?.adapter === "string" ? image.metadata.adapter : "";
|
|
1317
|
+
const originalSource = typeof image.metadata?.originalSource === "string" ? image.metadata.originalSource : "";
|
|
1318
|
+
return adapter === "redditAdapter" || originalSource === "redditJsonEndpoint" || Boolean(image.metadata?.redditMediaKind);
|
|
1319
|
+
});
|
|
1320
|
+
}
|
|
1321
|
+
function isRedditMediaUrl(value) {
|
|
1322
|
+
return /(?:^https?:)?\/\/(?:(?:i|preview|external-preview)\.redd\.it|thumbs\.redditmedia\.com|v\.redd\.it)\//i.test(value);
|
|
1323
|
+
}
|
|
1324
|
+
function isTrustedRedditImageCandidate(image) {
|
|
1325
|
+
return TRUSTED_REDDIT_SOURCES.has(image.source) || Boolean(image.metadata?.redditMediaKind);
|
|
1326
|
+
}
|
|
1327
|
+
function imageArea(image) {
|
|
1328
|
+
return (image.width ?? 0) * (image.height ?? 0);
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1242
1331
|
// src/scorers/image.ts
|
|
1243
1332
|
var SOURCE_WEIGHT = {
|
|
1244
1333
|
adapter: 98,
|
|
@@ -1259,6 +1348,7 @@ var SOURCE_WEIGHT = {
|
|
|
1259
1348
|
};
|
|
1260
1349
|
function scoreImages(images, customScorers = []) {
|
|
1261
1350
|
const duplicateCounts = countDuplicates(images);
|
|
1351
|
+
const redditContext = hasRedditImageContext(images);
|
|
1262
1352
|
return images.map((image, index) => {
|
|
1263
1353
|
const scored = scoreImageWithDetails(image, index, images, duplicateCounts);
|
|
1264
1354
|
const customScore = customScorers.reduce((total, scorer) => total + scorer(image, { index, images }), 0);
|
|
@@ -1274,7 +1364,7 @@ function scoreImages(images, customScorers = []) {
|
|
|
1274
1364
|
}
|
|
1275
1365
|
};
|
|
1276
1366
|
}).sort(
|
|
1277
|
-
(left, right) => (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) ||
|
|
1367
|
+
(left, right) => (redditContext ? redditImagePriority(right) - redditImagePriority(left) : 0) || (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea2(right) - imageArea2(left)
|
|
1278
1368
|
);
|
|
1279
1369
|
}
|
|
1280
1370
|
function selectBestImage(images, customScorers = []) {
|
|
@@ -1292,15 +1382,17 @@ function scoreImageWithDetails(image, index, images, duplicateCounts) {
|
|
|
1292
1382
|
const dimensions = scoreDimensions(image);
|
|
1293
1383
|
const format = scoreFormat(image);
|
|
1294
1384
|
const urlSignal = scoreUrlSignal(image);
|
|
1385
|
+
const redditMedia = scoreRedditMedia(image);
|
|
1295
1386
|
const urlPenalty = scoreUrlPenalty(image);
|
|
1296
1387
|
const duplicatePenalty = scoreDuplicatePenalty(image, duplicateCounts);
|
|
1297
1388
|
score += dimensions.score;
|
|
1298
1389
|
score += format.score;
|
|
1299
1390
|
score += urlSignal.score;
|
|
1391
|
+
score += redditMedia.score;
|
|
1300
1392
|
score -= urlPenalty;
|
|
1301
1393
|
score -= duplicatePenalty.score;
|
|
1302
1394
|
score -= Math.min(index * 1.5, 10);
|
|
1303
|
-
reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...duplicatePenalty.reasons);
|
|
1395
|
+
reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...redditMedia.reasons, ...duplicatePenalty.reasons);
|
|
1304
1396
|
if (images.length === 1) {
|
|
1305
1397
|
score += 4;
|
|
1306
1398
|
reasons.push("only candidate image added 4 points");
|
|
@@ -1400,6 +1492,33 @@ function platformThumbnailScore(url) {
|
|
|
1400
1492
|
}
|
|
1401
1493
|
return { score: 0, reasons: [] };
|
|
1402
1494
|
}
|
|
1495
|
+
function scoreRedditMedia(image) {
|
|
1496
|
+
const priority = redditImagePriority(image);
|
|
1497
|
+
if (priority === 0 && !isRedditMediaUrl(image.url)) {
|
|
1498
|
+
return { score: 0, reasons: [] };
|
|
1499
|
+
}
|
|
1500
|
+
const url = image.url.toLowerCase();
|
|
1501
|
+
const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
|
|
1502
|
+
if (mediaKind === "gallery") {
|
|
1503
|
+
return { score: 24, reasons: ["Reddit gallery media added 24 points"] };
|
|
1504
|
+
}
|
|
1505
|
+
if (mediaKind === "previewOriginal") {
|
|
1506
|
+
return { score: 20, reasons: ["Reddit original preview media added 20 points"] };
|
|
1507
|
+
}
|
|
1508
|
+
if (/\/\/i\.redd\.it\//i.test(url)) {
|
|
1509
|
+
return { score: 18, reasons: ["Reddit direct image media added 18 points"] };
|
|
1510
|
+
}
|
|
1511
|
+
if (/\/\/preview\.redd\.it\//i.test(url)) {
|
|
1512
|
+
return { score: 16, reasons: ["Reddit preview media added 16 points"] };
|
|
1513
|
+
}
|
|
1514
|
+
if (/\/\/external-preview\.redd\.it\//i.test(url)) {
|
|
1515
|
+
return { score: -8, reasons: ["Reddit external preview media subtracted 8 points"] };
|
|
1516
|
+
}
|
|
1517
|
+
if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
|
|
1518
|
+
return { score: -60, reasons: ["Reddit thumbnail host subtracted 60 points"] };
|
|
1519
|
+
}
|
|
1520
|
+
return { score: 0, reasons: [] };
|
|
1521
|
+
}
|
|
1403
1522
|
function scoreUrlPenalty(image) {
|
|
1404
1523
|
const url = image.url.toLowerCase();
|
|
1405
1524
|
let penalty = 0;
|
|
@@ -1472,7 +1591,7 @@ function countDuplicates(images) {
|
|
|
1472
1591
|
}
|
|
1473
1592
|
return counts;
|
|
1474
1593
|
}
|
|
1475
|
-
function
|
|
1594
|
+
function imageArea2(image) {
|
|
1476
1595
|
return (image.width ?? 0) * (image.height ?? 0);
|
|
1477
1596
|
}
|
|
1478
1597
|
function sourceSortWeight(image) {
|
|
@@ -1590,8 +1709,9 @@ function discoverMedia(rawSources, finalUrl) {
|
|
|
1590
1709
|
if (externalResults.some((result) => (result.images?.length ?? 0) > 0 || (result.videos?.length ?? 0) > 0)) {
|
|
1591
1710
|
trace.push("media discovery included adapter and plugin media");
|
|
1592
1711
|
}
|
|
1712
|
+
const dedupedImages = dedupeMediaBySignature(images);
|
|
1593
1713
|
return {
|
|
1594
|
-
images:
|
|
1714
|
+
images: isRedditUrl(finalUrl) ? filterRedditImageCandidates(dedupedImages) : dedupedImages,
|
|
1595
1715
|
videos: dedupeMediaBySignature(uniqueMediaByUrl(videos)),
|
|
1596
1716
|
audio: dedupeMediaBySignature(uniqueMediaByUrl(audio)),
|
|
1597
1717
|
trace
|
|
@@ -1683,7 +1803,8 @@ function mediaFromJsonValue(value, kind, source) {
|
|
|
1683
1803
|
height: parseNumber(stringFromUnknown(value.height)) ?? parseNumber(stringFromUnknown(nestedDetails?.height)),
|
|
1684
1804
|
alt: stringFromUnknown(value.alt) ?? stringFromUnknown(value.caption) ?? stringFromUnknown(value.name) ?? stringFromUnknown(nestedDetails?.alt),
|
|
1685
1805
|
title: stringFromUnknown(value.title) ?? stringFromUnknown(nestedDetails?.title),
|
|
1686
|
-
type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type)
|
|
1806
|
+
type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type),
|
|
1807
|
+
metadata: isRecord3(value.metadata) ? value.metadata : void 0
|
|
1687
1808
|
},
|
|
1688
1809
|
...srcsetAssets
|
|
1689
1810
|
];
|
|
@@ -1791,7 +1912,7 @@ function dedupeMediaBySignature(assets) {
|
|
|
1791
1912
|
for (const asset of assets) {
|
|
1792
1913
|
const key = mediaSignature2(asset.url);
|
|
1793
1914
|
const current = seen.get(key);
|
|
1794
|
-
if (!current ||
|
|
1915
|
+
if (!current || mediaRank(asset) > mediaRank(current)) {
|
|
1795
1916
|
seen.set(key, asset);
|
|
1796
1917
|
}
|
|
1797
1918
|
}
|
|
@@ -1825,6 +1946,10 @@ function sourceRank(source) {
|
|
|
1825
1946
|
};
|
|
1826
1947
|
return ranks[source] ?? 50;
|
|
1827
1948
|
}
|
|
1949
|
+
function mediaRank(asset) {
|
|
1950
|
+
const redditPriority = redditImagePriority(asset);
|
|
1951
|
+
return redditPriority > 0 ? 1e3 + redditPriority : sourceRank(asset.source);
|
|
1952
|
+
}
|
|
1828
1953
|
function shouldIgnoreMediaUrl2(url) {
|
|
1829
1954
|
const normalized = url.toLowerCase();
|
|
1830
1955
|
return normalized.startsWith("data:") || normalized.startsWith("blob:") || normalized.startsWith("javascript:") || /(?:sprite|spacer|blank|transparent|placeholder|tracking|beacon|pixel|emoji|favicon|apple-touch-icon)(?:[._/-]|$|\?)/i.test(normalized) || /(?:^|[/?_-])1x1(?:[._/-]|$|\?)/i.test(normalized);
|
|
@@ -2149,7 +2274,7 @@ function normalizeMetadata(rawSources, context = {}) {
|
|
|
2149
2274
|
...mediaDiscovery.trace,
|
|
2150
2275
|
...selectedImage.best ? [`selected image from ${sourceLabel2(selectedImage.best)}`] : []
|
|
2151
2276
|
]);
|
|
2152
|
-
|
|
2277
|
+
const normalized = stripUndefined2({
|
|
2153
2278
|
ok: true,
|
|
2154
2279
|
url,
|
|
2155
2280
|
finalUrl,
|
|
@@ -2179,6 +2304,12 @@ function normalizeMetadata(rawSources, context = {}) {
|
|
|
2179
2304
|
diagnostics,
|
|
2180
2305
|
trace: diagnostics.trace
|
|
2181
2306
|
});
|
|
2307
|
+
normalized.images = selectedImage.images;
|
|
2308
|
+
normalized.videos = videos;
|
|
2309
|
+
normalized.audio = audio;
|
|
2310
|
+
normalized.favicons = favicons;
|
|
2311
|
+
normalized.trace = diagnostics.trace;
|
|
2312
|
+
return normalized;
|
|
2182
2313
|
}
|
|
2183
2314
|
function normalizeAssets2(assets, baseUrl) {
|
|
2184
2315
|
return assets.map((asset) => {
|
|
@@ -2814,8 +2945,8 @@ var redditAdapter = {
|
|
|
2814
2945
|
type: reddit.isPost ? "social_post" : "website",
|
|
2815
2946
|
siteName: "Reddit",
|
|
2816
2947
|
canonicalUrl: context.raw.openGraph.url ?? context.raw.html.canonicalUrl,
|
|
2817
|
-
title:
|
|
2818
|
-
description: descriptionSelection.value,
|
|
2948
|
+
title: cleanRedditTitle(titleSelection.value),
|
|
2949
|
+
description: cleanRedditDescription(descriptionSelection.value),
|
|
2819
2950
|
images: markAdapterMedia(mediaFromContext(context).images, "redditAdapter"),
|
|
2820
2951
|
videos: markAdapterMedia(mediaFromContext(context).videos, "redditAdapter"),
|
|
2821
2952
|
author: username ? { name: username } : entityFromContext(context, ["author", "submitter", "user"]),
|
|
@@ -2901,7 +3032,7 @@ var tiktokAdapter = {
|
|
|
2901
3032
|
return this.detect?.(url) ?? false;
|
|
2902
3033
|
},
|
|
2903
3034
|
extract(context) {
|
|
2904
|
-
return
|
|
3035
|
+
return tiktokResult(context);
|
|
2905
3036
|
},
|
|
2906
3037
|
normalize(rawData) {
|
|
2907
3038
|
return normalizePlatformResult(rawData);
|
|
@@ -3120,25 +3251,215 @@ function redditDescriptionFromContext(context) {
|
|
|
3120
3251
|
}
|
|
3121
3252
|
return { value: context.raw.html.description, method: context.raw.html.description ? "reddit:html" : void 0 };
|
|
3122
3253
|
}
|
|
3123
|
-
function
|
|
3254
|
+
function tiktokResult(context) {
|
|
3124
3255
|
const url = new URL(context.finalUrl);
|
|
3125
3256
|
const username = url.pathname.match(/@([^/]+)/)?.[1];
|
|
3126
3257
|
const postId = url.pathname.match(/\/(?:video|photo)\/([^/]+)/)?.[1] ?? url.pathname.split("/").filter(Boolean).at(-1);
|
|
3258
|
+
const itemStruct = tiktokItemStructFromContext(context, postId);
|
|
3259
|
+
const titleSelection = tiktokTitleFromContext(context, itemStruct, username);
|
|
3260
|
+
const descriptionSelection = tiktokDescriptionFromContext(context, itemStruct);
|
|
3261
|
+
const author = tiktokAuthorFromItemStruct(itemStruct, username);
|
|
3262
|
+
const media = tiktokMediaFromContext(context, itemStruct);
|
|
3263
|
+
const publishedTime = tiktokPublishedTime(itemStruct) ?? publishedTimeFromContext(context);
|
|
3127
3264
|
return compactAdapterResult({
|
|
3128
|
-
source,
|
|
3129
|
-
platform,
|
|
3265
|
+
source: "tiktokAdapter",
|
|
3266
|
+
platform: "TikTok",
|
|
3130
3267
|
type: "social_post",
|
|
3131
|
-
siteName:
|
|
3268
|
+
siteName: "TikTok",
|
|
3132
3269
|
canonicalUrl: context.raw.openGraph.url,
|
|
3133
|
-
title:
|
|
3134
|
-
description:
|
|
3135
|
-
images: markAdapterMedia(
|
|
3136
|
-
videos: markAdapterMedia(
|
|
3137
|
-
author
|
|
3138
|
-
article: { publishedTime
|
|
3139
|
-
|
|
3270
|
+
title: titleSelection.value,
|
|
3271
|
+
description: descriptionSelection.value,
|
|
3272
|
+
images: markAdapterMedia(media.images, "tiktokAdapter"),
|
|
3273
|
+
videos: markAdapterMedia(media.videos, "tiktokAdapter"),
|
|
3274
|
+
author,
|
|
3275
|
+
article: { publishedTime },
|
|
3276
|
+
video: postId ? {
|
|
3277
|
+
id: postId,
|
|
3278
|
+
title: titleSelection.value,
|
|
3279
|
+
channel: author,
|
|
3280
|
+
publishedTime,
|
|
3281
|
+
duration: tiktokVideoDuration(itemStruct),
|
|
3282
|
+
viewCount: tiktokStatCount(itemStruct, "playCount")
|
|
3283
|
+
} : void 0,
|
|
3284
|
+
identifiers: { username, postId },
|
|
3285
|
+
raw: {
|
|
3286
|
+
extractionMethod: titleSelection.method ?? descriptionSelection.method ?? "tiktok:htmlFallback"
|
|
3287
|
+
}
|
|
3140
3288
|
});
|
|
3141
3289
|
}
|
|
3290
|
+
function tiktokTitleFromContext(context, itemStruct, username) {
|
|
3291
|
+
const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
|
|
3292
|
+
if (desc) {
|
|
3293
|
+
return { value: desc, method: "tiktok:itemStruct.desc" };
|
|
3294
|
+
}
|
|
3295
|
+
const musicTitle = tiktokMusicTitle(itemStruct);
|
|
3296
|
+
if (musicTitle) {
|
|
3297
|
+
return { value: musicTitle, method: "tiktok:itemStruct.music" };
|
|
3298
|
+
}
|
|
3299
|
+
const fallback = cleanTikTokText(titleFromContext(context, ["desc", "caption", "title", "description"]));
|
|
3300
|
+
if (fallback) {
|
|
3301
|
+
return { value: fallback, method: "tiktok:fallback" };
|
|
3302
|
+
}
|
|
3303
|
+
return {
|
|
3304
|
+
value: username ? `TikTok post by @${username}` : void 0,
|
|
3305
|
+
method: username ? "tiktok:urlFallback" : void 0
|
|
3306
|
+
};
|
|
3307
|
+
}
|
|
3308
|
+
function tiktokDescriptionFromContext(context, itemStruct) {
|
|
3309
|
+
const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
|
|
3310
|
+
if (desc) {
|
|
3311
|
+
return { value: desc, method: "tiktok:itemStruct.desc" };
|
|
3312
|
+
}
|
|
3313
|
+
return {
|
|
3314
|
+
value: cleanTikTokText(descriptionFromContext(context)),
|
|
3315
|
+
method: "tiktok:fallback"
|
|
3316
|
+
};
|
|
3317
|
+
}
|
|
3318
|
+
function tiktokItemStructFromContext(context, postId) {
|
|
3319
|
+
for (const item of context.raw.embeddedData.items) {
|
|
3320
|
+
const defaultScope = isRecord4(item.data["__DEFAULT_SCOPE__"]) ? item.data["__DEFAULT_SCOPE__"] : void 0;
|
|
3321
|
+
const videoDetail = isRecord4(defaultScope?.["webapp.video-detail"]) ? defaultScope["webapp.video-detail"] : void 0;
|
|
3322
|
+
const itemInfo = isRecord4(videoDetail?.itemInfo) ? videoDetail.itemInfo : void 0;
|
|
3323
|
+
const itemStruct = isRecord4(itemInfo?.itemStruct) ? itemInfo.itemStruct : void 0;
|
|
3324
|
+
if (itemStruct && (!postId || stringFromUnknown3(itemStruct.id) === postId)) {
|
|
3325
|
+
return itemStruct;
|
|
3326
|
+
}
|
|
3327
|
+
}
|
|
3328
|
+
let found;
|
|
3329
|
+
for (const item of context.raw.embeddedData.items) {
|
|
3330
|
+
walkData(item.data, (value, key) => {
|
|
3331
|
+
if (found || key !== "itemStruct" || !isRecord4(value)) {
|
|
3332
|
+
return;
|
|
3333
|
+
}
|
|
3334
|
+
if (!postId || stringFromUnknown3(value.id) === postId) {
|
|
3335
|
+
found = value;
|
|
3336
|
+
}
|
|
3337
|
+
});
|
|
3338
|
+
if (found) {
|
|
3339
|
+
return found;
|
|
3340
|
+
}
|
|
3341
|
+
}
|
|
3342
|
+
return void 0;
|
|
3343
|
+
}
|
|
3344
|
+
function tiktokMediaFromContext(context, itemStruct) {
|
|
3345
|
+
const discovered = mediaFromContext(context);
|
|
3346
|
+
return {
|
|
3347
|
+
images: [...tiktokImagesFromItemStruct(itemStruct), ...discovered.images],
|
|
3348
|
+
videos: [...tiktokVideosFromItemStruct(itemStruct), ...discovered.videos]
|
|
3349
|
+
};
|
|
3350
|
+
}
|
|
3351
|
+
function tiktokImagesFromItemStruct(itemStruct) {
|
|
3352
|
+
const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
|
|
3353
|
+
if (!video) {
|
|
3354
|
+
return [];
|
|
3355
|
+
}
|
|
3356
|
+
const width = numberFromUnknown(video.width);
|
|
3357
|
+
const height = numberFromUnknown(video.height);
|
|
3358
|
+
const candidates = [
|
|
3359
|
+
stringFromUnknown3(video.originCover),
|
|
3360
|
+
stringFromUnknown3(video.cover),
|
|
3361
|
+
stringFromUnknown3(video.dynamicCover),
|
|
3362
|
+
...urlsFromUnknown(video.shareCover)
|
|
3363
|
+
];
|
|
3364
|
+
return uniqueStrings3(candidates).map((url) => ({
|
|
3365
|
+
url,
|
|
3366
|
+
kind: "image",
|
|
3367
|
+
source: "applicationJson",
|
|
3368
|
+
width,
|
|
3369
|
+
height,
|
|
3370
|
+
metadata: {
|
|
3371
|
+
tiktokMediaKind: "videoCover"
|
|
3372
|
+
}
|
|
3373
|
+
}));
|
|
3374
|
+
}
|
|
3375
|
+
function tiktokVideosFromItemStruct(itemStruct) {
|
|
3376
|
+
const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
|
|
3377
|
+
if (!video) {
|
|
3378
|
+
return [];
|
|
3379
|
+
}
|
|
3380
|
+
const width = numberFromUnknown(video.width);
|
|
3381
|
+
const height = numberFromUnknown(video.height);
|
|
3382
|
+
const candidates = [
|
|
3383
|
+
stringFromUnknown3(video.playAddr),
|
|
3384
|
+
stringFromUnknown3(video.downloadAddr),
|
|
3385
|
+
...urlsFromTikTokPlayAddr(video.PlayAddrStruct),
|
|
3386
|
+
...Array.isArray(video.bitrateInfo) ? video.bitrateInfo.flatMap((item) => isRecord4(item) ? urlsFromTikTokPlayAddr(item.PlayAddr) : []) : []
|
|
3387
|
+
];
|
|
3388
|
+
return uniqueStrings3(candidates).filter((url) => /^https?:\/\//i.test(url)).map((url) => ({
|
|
3389
|
+
url,
|
|
3390
|
+
kind: "video",
|
|
3391
|
+
source: "applicationJson",
|
|
3392
|
+
width,
|
|
3393
|
+
height,
|
|
3394
|
+
type: "video/mp4",
|
|
3395
|
+
metadata: {
|
|
3396
|
+
tiktokMediaKind: "videoPlay"
|
|
3397
|
+
}
|
|
3398
|
+
}));
|
|
3399
|
+
}
|
|
3400
|
+
function urlsFromTikTokPlayAddr(value) {
|
|
3401
|
+
if (!isRecord4(value)) {
|
|
3402
|
+
return [];
|
|
3403
|
+
}
|
|
3404
|
+
return urlsFromUnknown(value.UrlList);
|
|
3405
|
+
}
|
|
3406
|
+
function tiktokAuthorFromItemStruct(itemStruct, username) {
|
|
3407
|
+
const author = isRecord4(itemStruct?.author) ? itemStruct.author : void 0;
|
|
3408
|
+
const name = stringFromUnknown3(author?.nickname) ?? stringFromUnknown3(author?.uniqueId) ?? username;
|
|
3409
|
+
if (!name) {
|
|
3410
|
+
return void 0;
|
|
3411
|
+
}
|
|
3412
|
+
return {
|
|
3413
|
+
name,
|
|
3414
|
+
url: username ? `https://www.tiktok.com/@${username}` : void 0
|
|
3415
|
+
};
|
|
3416
|
+
}
|
|
3417
|
+
function tiktokPublishedTime(itemStruct) {
|
|
3418
|
+
const created = numberFromUnknown(itemStruct?.createTime);
|
|
3419
|
+
return created ? new Date(created * 1e3).toISOString() : void 0;
|
|
3420
|
+
}
|
|
3421
|
+
function tiktokVideoDuration(itemStruct) {
|
|
3422
|
+
const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
|
|
3423
|
+
return stringFromUnknown3(video?.duration);
|
|
3424
|
+
}
|
|
3425
|
+
function tiktokStatCount(itemStruct, key) {
|
|
3426
|
+
const stats = isRecord4(itemStruct?.stats) ? itemStruct.stats : void 0;
|
|
3427
|
+
return numberFromUnknown(stats?.[key]);
|
|
3428
|
+
}
|
|
3429
|
+
function tiktokMusicTitle(itemStruct) {
|
|
3430
|
+
const music = isRecord4(itemStruct?.music) ? itemStruct.music : void 0;
|
|
3431
|
+
const title = cleanTikTokText(stringFromUnknown3(music?.title));
|
|
3432
|
+
const author = cleanTikTokText(stringFromUnknown3(music?.authorName));
|
|
3433
|
+
if (!title) {
|
|
3434
|
+
return void 0;
|
|
3435
|
+
}
|
|
3436
|
+
if (author && !/original sound/i.test(title)) {
|
|
3437
|
+
return `${title} - ${author}`;
|
|
3438
|
+
}
|
|
3439
|
+
return title;
|
|
3440
|
+
}
|
|
3441
|
+
function cleanTikTokText(value) {
|
|
3442
|
+
const cleaned = value?.replace(/\s+/g, " ").trim();
|
|
3443
|
+
if (!cleaned || isLowQualityTikTokText(cleaned)) {
|
|
3444
|
+
return void 0;
|
|
3445
|
+
}
|
|
3446
|
+
return cleaned;
|
|
3447
|
+
}
|
|
3448
|
+
function isLowQualityTikTokText(value) {
|
|
3449
|
+
return /\{[^}]+\}/.test(value) || /tiktok\s*shop|free shipping|eligible items|exclusive collections/i.test(value) || /^tiktok live creator networks$/i.test(value) || /^tiktok\s*-\s*make your day$/i.test(value) || /^discover (?:new |popular )?videos?\b.*\btiktok\b/i.test(value);
|
|
3450
|
+
}
|
|
3451
|
+
function urlsFromUnknown(value) {
|
|
3452
|
+
if (typeof value === "string" && value.trim()) {
|
|
3453
|
+
return [value.trim()];
|
|
3454
|
+
}
|
|
3455
|
+
if (Array.isArray(value)) {
|
|
3456
|
+
return value.flatMap(urlsFromUnknown);
|
|
3457
|
+
}
|
|
3458
|
+
return [];
|
|
3459
|
+
}
|
|
3460
|
+
function uniqueStrings3(values) {
|
|
3461
|
+
return [...new Set(values.filter((value) => Boolean(value)))];
|
|
3462
|
+
}
|
|
3142
3463
|
function normalizePlatformResult(rawData) {
|
|
3143
3464
|
const type = rawData.type ?? inferAdapterType(rawData);
|
|
3144
3465
|
return compactAdapterResult({
|
|
@@ -3489,6 +3810,20 @@ function parseRedditUrl(url) {
|
|
|
3489
3810
|
function cleanSocialTitle(title) {
|
|
3490
3811
|
return title?.replace(/\s*:\s*r\/[A-Za-z0-9_]+$/i, "").trim();
|
|
3491
3812
|
}
|
|
3813
|
+
function cleanRedditTitle(title) {
|
|
3814
|
+
const cleaned = cleanSocialTitle(title);
|
|
3815
|
+
if (!cleaned || /reddit\s*-\s*please wait for verification|please wait for verification|whoa there, pardner/i.test(cleaned)) {
|
|
3816
|
+
return void 0;
|
|
3817
|
+
}
|
|
3818
|
+
return cleaned;
|
|
3819
|
+
}
|
|
3820
|
+
function cleanRedditDescription(description) {
|
|
3821
|
+
const cleaned = description?.replace(/\s+/g, " ").trim();
|
|
3822
|
+
if (!cleaned || /please wait for verification|whoa there, pardner|request has been blocked/i.test(cleaned)) {
|
|
3823
|
+
return void 0;
|
|
3824
|
+
}
|
|
3825
|
+
return cleaned;
|
|
3826
|
+
}
|
|
3492
3827
|
function hostMatches(url, domains) {
|
|
3493
3828
|
const host = url.hostname.toLowerCase().replace(/^www\./, "");
|
|
3494
3829
|
return domains.some((domain) => host === domain || host.endsWith(`.${domain}`));
|
|
@@ -3878,12 +4213,17 @@ function ascii(bytes, offset, length) {
|
|
|
3878
4213
|
}
|
|
3879
4214
|
|
|
3880
4215
|
// src/fetchMetadata.ts
|
|
4216
|
+
var REDDIT_BLOCKED_METADATA_WARNING = "Reddit returned a verification/block page; metadata is incomplete.";
|
|
4217
|
+
var PROVIDER_BLOCKED_SUGGESTED_ACTION = "retry_on_different_host_or_use_supported_proxy";
|
|
3881
4218
|
async function fetchMetadata(url, options = {}) {
|
|
3882
4219
|
const startedAt = Date.now();
|
|
3883
4220
|
try {
|
|
3884
4221
|
const requestedUrl = normalizeUrl(url);
|
|
3885
4222
|
const fetchResult = await fetchPageWithStrategies(requestedUrl, options);
|
|
3886
4223
|
const page = fetchResult.page;
|
|
4224
|
+
if (fetchResult.providerDiagnostics?.blocked) {
|
|
4225
|
+
return createBlockedProviderMetadata(requestedUrl, fetchResult, Date.now() - startedAt);
|
|
4226
|
+
}
|
|
3887
4227
|
const directMedia = createDirectMediaMetadata(page, requestedUrl, Date.now() - startedAt);
|
|
3888
4228
|
if (directMedia) {
|
|
3889
4229
|
return directMedia;
|
|
@@ -3910,7 +4250,7 @@ async function fetchMetadata(url, options = {}) {
|
|
|
3910
4250
|
...metadata.canonicalUrl ? ["resolved canonical URL"] : []
|
|
3911
4251
|
];
|
|
3912
4252
|
metadata.diagnostics.fallbacksAttempted = mergeFallbackAttempts2(metadata.diagnostics.fallbacksAttempted, fetchResult.fallbacksAttempted);
|
|
3913
|
-
metadata.diagnostics.sourcePriority =
|
|
4253
|
+
metadata.diagnostics.sourcePriority = uniqueStrings4([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
|
|
3914
4254
|
metadata.diagnostics.extractionMethod = metadata.diagnostics.extractionMethod ?? fetchResult.extractionMethod;
|
|
3915
4255
|
metadata.diagnostics.retryInfo = metadata.diagnostics.retryInfo ?? fetchResult.retryInfo;
|
|
3916
4256
|
metadata.trace = metadata.diagnostics.trace;
|
|
@@ -3952,8 +4292,64 @@ async function fetchMetadata(url, options = {}) {
|
|
|
3952
4292
|
};
|
|
3953
4293
|
}
|
|
3954
4294
|
}
|
|
4295
|
+
function createBlockedProviderMetadata(requestedUrl, fetchResult, fetchDurationMs) {
|
|
4296
|
+
const page = fetchResult.page;
|
|
4297
|
+
const providerDiagnostics = fetchResult.providerDiagnostics;
|
|
4298
|
+
const trace = uniqueStrings4([
|
|
4299
|
+
...page.isShortUrl ? [`detected short URL provider: ${page.shortUrlProvider ?? "unknown"}`] : [],
|
|
4300
|
+
...page.redirects.length > 0 ? [`resolved ${page.redirects.length} redirect${page.redirects.length === 1 ? "" : "s"}`] : [],
|
|
4301
|
+
...fetchResult.trace,
|
|
4302
|
+
"detected blocked provider response"
|
|
4303
|
+
]);
|
|
4304
|
+
const warnings = uniqueStrings4([
|
|
4305
|
+
...fetchResult.warnings,
|
|
4306
|
+
REDDIT_BLOCKED_METADATA_WARNING,
|
|
4307
|
+
...page.statusCode < 200 || page.statusCode >= 300 ? [`Fetch completed with non-success status code ${page.statusCode}.`] : []
|
|
4308
|
+
]);
|
|
4309
|
+
return {
|
|
4310
|
+
ok: false,
|
|
4311
|
+
url: requestedUrl,
|
|
4312
|
+
finalUrl: page.finalUrl,
|
|
4313
|
+
type: "unknown",
|
|
4314
|
+
siteName: providerDiagnostics?.platform === "reddit" ? "Reddit" : void 0,
|
|
4315
|
+
confidence: 0,
|
|
4316
|
+
completeness: 0,
|
|
4317
|
+
reliability: 0,
|
|
4318
|
+
images: [],
|
|
4319
|
+
videos: [],
|
|
4320
|
+
audio: [],
|
|
4321
|
+
favicons: [],
|
|
4322
|
+
trace,
|
|
4323
|
+
diagnostics: {
|
|
4324
|
+
originalUrl: requestedUrl,
|
|
4325
|
+
finalUrl: page.finalUrl,
|
|
4326
|
+
isShortUrl: page.isShortUrl,
|
|
4327
|
+
shortUrlProvider: page.shortUrlProvider,
|
|
4328
|
+
statusCode: page.statusCode,
|
|
4329
|
+
contentType: page.contentType,
|
|
4330
|
+
redirects: page.redirects,
|
|
4331
|
+
sourcesUsed: [],
|
|
4332
|
+
warnings,
|
|
4333
|
+
fallbacksAttempted: mergeFallbackAttempts2(void 0, fetchResult.fallbacksAttempted),
|
|
4334
|
+
trace,
|
|
4335
|
+
sourcePriority: fetchResult.sourcePriority,
|
|
4336
|
+
extractionMethod: fetchResult.extractionMethod,
|
|
4337
|
+
retryInfo: fetchResult.retryInfo,
|
|
4338
|
+
providerDiagnostics,
|
|
4339
|
+
confidenceBreakdown: {
|
|
4340
|
+
title: 0,
|
|
4341
|
+
description: 0,
|
|
4342
|
+
image: 0,
|
|
4343
|
+
structuredData: 0,
|
|
4344
|
+
adapter: 0
|
|
4345
|
+
},
|
|
4346
|
+
fetchDurationMs,
|
|
4347
|
+
extractedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
4348
|
+
}
|
|
4349
|
+
};
|
|
4350
|
+
}
|
|
3955
4351
|
async function fetchPageWithStrategies(requestedUrl, options) {
|
|
3956
|
-
if (
|
|
4352
|
+
if (isRedditUrl2(requestedUrl)) {
|
|
3957
4353
|
return fetchRedditPageWithStrategy(requestedUrl, options);
|
|
3958
4354
|
}
|
|
3959
4355
|
return {
|
|
@@ -3966,6 +4362,7 @@ async function fetchPageWithStrategies(requestedUrl, options) {
|
|
|
3966
4362
|
async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
3967
4363
|
const attempts = [];
|
|
3968
4364
|
const warnings = [];
|
|
4365
|
+
const informationalFallbacks = [];
|
|
3969
4366
|
const sourcePriority = ["redditJsonEndpoint", "oldReddit", "embeddedStructuredData", "openGraph", "html"];
|
|
3970
4367
|
let lastError;
|
|
3971
4368
|
const jsonUrl = redditJsonEndpoint(requestedUrl);
|
|
@@ -3976,7 +4373,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
|
3976
4373
|
});
|
|
3977
4374
|
attempts.push(attempt);
|
|
3978
4375
|
lastError = attempt.error;
|
|
3979
|
-
if (attempt.page && attempt.ok
|
|
4376
|
+
if (attempt.page && attempt.ok) {
|
|
3980
4377
|
const redditPost = parseRedditJsonPayload(attempt.page.html);
|
|
3981
4378
|
if (redditPost?.title) {
|
|
3982
4379
|
return {
|
|
@@ -3991,7 +4388,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
|
3991
4388
|
}
|
|
3992
4389
|
warnings.push("Reddit JSON endpoint responded, but no post payload could be extracted.");
|
|
3993
4390
|
} else if (attempt.blocked) {
|
|
3994
|
-
|
|
4391
|
+
informationalFallbacks.push("Informational fallback: Reddit JSON endpoint appears to have blocked access; continuing with fallback extraction.");
|
|
3995
4392
|
}
|
|
3996
4393
|
}
|
|
3997
4394
|
const oldRedditUrl = redditOldUrl(requestedUrl);
|
|
@@ -3999,12 +4396,12 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
|
3999
4396
|
const attempt = await attemptFetch("oldReddit", oldRedditUrl, options);
|
|
4000
4397
|
attempts.push(attempt);
|
|
4001
4398
|
lastError = attempt.error;
|
|
4002
|
-
if (attempt.page && attempt.ok
|
|
4399
|
+
if (attempt.page && attempt.ok) {
|
|
4003
4400
|
return {
|
|
4004
4401
|
page: attempt.page,
|
|
4005
4402
|
fallbacksAttempted: attempts,
|
|
4006
4403
|
warnings,
|
|
4007
|
-
trace: ["retried Reddit page through old.reddit"],
|
|
4404
|
+
trace: [...informationalFallbacks, "retried Reddit page through old.reddit"],
|
|
4008
4405
|
sourcePriority,
|
|
4009
4406
|
extractionMethod: "reddit:oldReddit",
|
|
4010
4407
|
retryInfo: redditRetryInfo(attempts)
|
|
@@ -4017,33 +4414,48 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
|
|
|
4017
4414
|
const htmlAttempt = await attemptFetch("redditHtmlFallback", requestedUrl, options);
|
|
4018
4415
|
attempts.push(htmlAttempt);
|
|
4019
4416
|
lastError = htmlAttempt.error;
|
|
4020
|
-
if (htmlAttempt.page) {
|
|
4021
|
-
if (htmlAttempt.blocked) {
|
|
4022
|
-
warnings.push("Reddit HTML fallback appears to have been blocked; metadata may be incomplete.");
|
|
4023
|
-
}
|
|
4417
|
+
if (htmlAttempt.page && htmlAttempt.ok) {
|
|
4024
4418
|
return {
|
|
4025
4419
|
page: htmlAttempt.page,
|
|
4026
4420
|
fallbacksAttempted: attempts,
|
|
4027
4421
|
warnings,
|
|
4028
|
-
trace: ["used Reddit HTML fallback"],
|
|
4422
|
+
trace: [...informationalFallbacks, "used Reddit HTML fallback"],
|
|
4029
4423
|
sourcePriority,
|
|
4030
4424
|
extractionMethod: "reddit:htmlFallback",
|
|
4031
4425
|
retryInfo: redditRetryInfo(attempts)
|
|
4032
4426
|
};
|
|
4033
4427
|
}
|
|
4428
|
+
if (htmlAttempt.blocked) {
|
|
4429
|
+
warnings.push("Reddit HTML fallback appears to have been blocked; metadata may be incomplete.");
|
|
4430
|
+
}
|
|
4431
|
+
const providerDiagnostics = redditProviderDiagnosticsFromAttempts(attempts);
|
|
4432
|
+
if (providerDiagnostics) {
|
|
4433
|
+
return {
|
|
4434
|
+
page: synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics),
|
|
4435
|
+
fallbacksAttempted: attempts,
|
|
4436
|
+
warnings: uniqueStrings4([...warnings, REDDIT_BLOCKED_METADATA_WARNING]),
|
|
4437
|
+
trace: [...informationalFallbacks, "Reddit provider blocked metadata extraction"],
|
|
4438
|
+
sourcePriority,
|
|
4439
|
+
extractionMethod: "reddit:blockedProvider",
|
|
4440
|
+
retryInfo: redditRetryInfo(attempts),
|
|
4441
|
+
providerDiagnostics
|
|
4442
|
+
};
|
|
4443
|
+
}
|
|
4034
4444
|
throw lastError ?? new Error("All Reddit extraction fetch attempts failed.");
|
|
4035
4445
|
}
|
|
4036
4446
|
async function attemptFetch(method, url, options) {
|
|
4037
4447
|
try {
|
|
4038
4448
|
const page = await fetchPage(url, options);
|
|
4039
4449
|
const retryAfter = page.headers["retry-after"];
|
|
4040
|
-
const
|
|
4450
|
+
const blockReason = redditBlockReason(page);
|
|
4451
|
+
const blocked = Boolean(blockReason);
|
|
4041
4452
|
return {
|
|
4042
4453
|
method,
|
|
4043
4454
|
url,
|
|
4044
4455
|
ok: page.statusCode >= 200 && page.statusCode < 300 && !blocked,
|
|
4045
4456
|
statusCode: page.statusCode,
|
|
4046
4457
|
blocked,
|
|
4458
|
+
blockReason,
|
|
4047
4459
|
retryAfter,
|
|
4048
4460
|
page
|
|
4049
4461
|
};
|
|
@@ -4056,7 +4468,7 @@ async function attemptFetch(method, url, options) {
|
|
|
4056
4468
|
};
|
|
4057
4469
|
}
|
|
4058
4470
|
}
|
|
4059
|
-
function
|
|
4471
|
+
function isRedditUrl2(url) {
|
|
4060
4472
|
try {
|
|
4061
4473
|
const host = new URL(url).hostname.toLowerCase().replace(/^www\./, "");
|
|
4062
4474
|
return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
|
|
@@ -4172,46 +4584,163 @@ function findRedditPostRecord(value) {
|
|
|
4172
4584
|
return void 0;
|
|
4173
4585
|
}
|
|
4174
4586
|
function redditImagesFromPost(post) {
|
|
4175
|
-
const images = [
|
|
4587
|
+
const images = [
|
|
4588
|
+
...redditGalleryImagesFromPost(post),
|
|
4589
|
+
...redditDirectImagesFromPost(post)
|
|
4590
|
+
];
|
|
4176
4591
|
const preview = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
|
|
4177
4592
|
for (const image of preview) {
|
|
4178
4593
|
if (!isRecord5(image)) {
|
|
4179
4594
|
continue;
|
|
4180
4595
|
}
|
|
4181
|
-
|
|
4182
|
-
|
|
4183
|
-
|
|
4184
|
-
|
|
4185
|
-
|
|
4186
|
-
|
|
4187
|
-
|
|
4188
|
-
|
|
4189
|
-
images.push(
|
|
4190
|
-
url,
|
|
4191
|
-
kind: "image",
|
|
4192
|
-
source: "adapter",
|
|
4193
|
-
width: numberFromUnknown2(candidate.width),
|
|
4194
|
-
height: numberFromUnknown2(candidate.height),
|
|
4195
|
-
metadata: {
|
|
4196
|
-
adapter: "redditJsonEndpoint",
|
|
4197
|
-
originalSource: "redditJsonEndpoint"
|
|
4198
|
-
}
|
|
4199
|
-
});
|
|
4596
|
+
const source = redditImageFromRecord(image.source, "previewOriginal");
|
|
4597
|
+
if (source) {
|
|
4598
|
+
images.push(source);
|
|
4599
|
+
continue;
|
|
4600
|
+
}
|
|
4601
|
+
const fallbackResolution = largestRedditImageRecord(Array.isArray(image.resolutions) ? image.resolutions : []);
|
|
4602
|
+
const fallback = redditImageFromRecord(fallbackResolution, "previewResolution");
|
|
4603
|
+
if (fallback) {
|
|
4604
|
+
images.push(fallback);
|
|
4200
4605
|
}
|
|
4201
4606
|
}
|
|
4202
4607
|
const thumbnail = redditMediaUrl(stringFromUnknown4(post.thumbnail));
|
|
4203
4608
|
if (thumbnail && /^https?:\/\//i.test(thumbnail)) {
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
|
|
4207
|
-
|
|
4208
|
-
|
|
4209
|
-
|
|
4210
|
-
|
|
4211
|
-
|
|
4212
|
-
|
|
4609
|
+
const thumbnailAsset = redditImageAsset(thumbnail, void 0, void 0, "thumbnail");
|
|
4610
|
+
if (thumbnailAsset) {
|
|
4611
|
+
images.push(thumbnailAsset);
|
|
4612
|
+
}
|
|
4613
|
+
}
|
|
4614
|
+
return dedupeRedditImages(prioritizeRedditImages(images));
|
|
4615
|
+
}
|
|
4616
|
+
function redditGalleryImagesFromPost(post) {
|
|
4617
|
+
const mediaMetadata = isRecord5(post.media_metadata) ? post.media_metadata : void 0;
|
|
4618
|
+
if (!mediaMetadata) {
|
|
4619
|
+
return [];
|
|
4213
4620
|
}
|
|
4214
|
-
|
|
4621
|
+
const galleryItems = isRecord5(post.gallery_data) && Array.isArray(post.gallery_data.items) ? post.gallery_data.items : [];
|
|
4622
|
+
const orderedIds = galleryItems.map((item) => isRecord5(item) ? stringFromUnknown4(item.media_id) : void 0).filter((item) => Boolean(item));
|
|
4623
|
+
const seenIds = /* @__PURE__ */ new Set();
|
|
4624
|
+
const assets = [];
|
|
4625
|
+
for (const id of orderedIds) {
|
|
4626
|
+
const asset = redditImageFromMediaMetadata(mediaMetadata[id], id);
|
|
4627
|
+
if (asset) {
|
|
4628
|
+
assets.push(asset);
|
|
4629
|
+
seenIds.add(id);
|
|
4630
|
+
}
|
|
4631
|
+
}
|
|
4632
|
+
for (const [id, value] of Object.entries(mediaMetadata)) {
|
|
4633
|
+
if (seenIds.has(id)) {
|
|
4634
|
+
continue;
|
|
4635
|
+
}
|
|
4636
|
+
const asset = redditImageFromMediaMetadata(value, id);
|
|
4637
|
+
if (asset) {
|
|
4638
|
+
assets.push(asset);
|
|
4639
|
+
}
|
|
4640
|
+
}
|
|
4641
|
+
return assets;
|
|
4642
|
+
}
|
|
4643
|
+
function redditDirectImagesFromPost(post) {
|
|
4644
|
+
const url = redditMediaUrl(stringFromUnknown4(post.url_overridden_by_dest) ?? stringFromUnknown4(post.url));
|
|
4645
|
+
if (!url || !isDirectRedditImageUrl(url)) {
|
|
4646
|
+
return [];
|
|
4647
|
+
}
|
|
4648
|
+
const previewSource = previewSourceRecord(post);
|
|
4649
|
+
const asset = redditImageAsset(
|
|
4650
|
+
url,
|
|
4651
|
+
numberFromUnknown2(previewSource?.width),
|
|
4652
|
+
numberFromUnknown2(previewSource?.height),
|
|
4653
|
+
"directImage"
|
|
4654
|
+
);
|
|
4655
|
+
return asset ? [asset] : [];
|
|
4656
|
+
}
|
|
4657
|
+
function redditImageFromMediaMetadata(value, mediaId) {
|
|
4658
|
+
if (!isRecord5(value)) {
|
|
4659
|
+
return void 0;
|
|
4660
|
+
}
|
|
4661
|
+
const source = isRecord5(value.s) ? value.s : void 0;
|
|
4662
|
+
const url = redditMediaUrl(
|
|
4663
|
+
stringFromUnknown4(source?.u) ?? stringFromUnknown4(source?.gif) ?? stringFromUnknown4(source?.mp4)
|
|
4664
|
+
);
|
|
4665
|
+
if (!url) {
|
|
4666
|
+
return void 0;
|
|
4667
|
+
}
|
|
4668
|
+
const asset = redditImageAsset(
|
|
4669
|
+
url,
|
|
4670
|
+
numberFromUnknown2(source?.x) ?? numberFromUnknown2(source?.width),
|
|
4671
|
+
numberFromUnknown2(source?.y) ?? numberFromUnknown2(source?.height),
|
|
4672
|
+
"gallery",
|
|
4673
|
+
stringFromUnknown4(value.m)
|
|
4674
|
+
);
|
|
4675
|
+
return asset ? {
|
|
4676
|
+
...asset,
|
|
4677
|
+
metadata: {
|
|
4678
|
+
...asset.metadata,
|
|
4679
|
+
redditMediaId: mediaId
|
|
4680
|
+
}
|
|
4681
|
+
} : void 0;
|
|
4682
|
+
}
|
|
4683
|
+
function redditImageFromRecord(value, redditMediaKind) {
|
|
4684
|
+
if (!isRecord5(value)) {
|
|
4685
|
+
return void 0;
|
|
4686
|
+
}
|
|
4687
|
+
const url = redditMediaUrl(stringFromUnknown4(value.url) ?? stringFromUnknown4(value.u));
|
|
4688
|
+
if (!url) {
|
|
4689
|
+
return void 0;
|
|
4690
|
+
}
|
|
4691
|
+
return redditImageAsset(
|
|
4692
|
+
url,
|
|
4693
|
+
numberFromUnknown2(value.width) ?? numberFromUnknown2(value.x),
|
|
4694
|
+
numberFromUnknown2(value.height) ?? numberFromUnknown2(value.y),
|
|
4695
|
+
redditMediaKind
|
|
4696
|
+
);
|
|
4697
|
+
}
|
|
4698
|
+
function redditImageAsset(url, width, height, redditMediaKind, type) {
|
|
4699
|
+
const asset = {
|
|
4700
|
+
url,
|
|
4701
|
+
kind: "image",
|
|
4702
|
+
source: "adapter",
|
|
4703
|
+
width,
|
|
4704
|
+
height,
|
|
4705
|
+
type,
|
|
4706
|
+
metadata: {
|
|
4707
|
+
adapter: "redditJsonEndpoint",
|
|
4708
|
+
originalSource: "redditJsonEndpoint",
|
|
4709
|
+
redditMediaKind
|
|
4710
|
+
}
|
|
4711
|
+
};
|
|
4712
|
+
return isAllowedRedditImageCandidate(asset) ? asset : void 0;
|
|
4713
|
+
}
|
|
4714
|
+
function largestRedditImageRecord(values) {
|
|
4715
|
+
return values.filter(isRecord5).sort(
|
|
4716
|
+
(left, right) => (numberFromUnknown2(right.width) ?? 0) * (numberFromUnknown2(right.height) ?? 0) - (numberFromUnknown2(left.width) ?? 0) * (numberFromUnknown2(left.height) ?? 0)
|
|
4717
|
+
)[0];
|
|
4718
|
+
}
|
|
4719
|
+
function previewSourceRecord(post) {
|
|
4720
|
+
const images = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
|
|
4721
|
+
const firstImage = images.find(isRecord5);
|
|
4722
|
+
return firstImage && isRecord5(firstImage.source) ? firstImage.source : void 0;
|
|
4723
|
+
}
|
|
4724
|
+
function isDirectRedditImageUrl(value) {
|
|
4725
|
+
try {
|
|
4726
|
+
const parsed = new URL(value);
|
|
4727
|
+
const host = parsed.hostname.toLowerCase();
|
|
4728
|
+
return host === "i.redd.it" || host === "preview.redd.it";
|
|
4729
|
+
} catch {
|
|
4730
|
+
return false;
|
|
4731
|
+
}
|
|
4732
|
+
}
|
|
4733
|
+
function dedupeRedditImages(images) {
|
|
4734
|
+
const seen = /* @__PURE__ */ new Set();
|
|
4735
|
+
const unique = [];
|
|
4736
|
+
for (const image of images) {
|
|
4737
|
+
if (seen.has(image.url)) {
|
|
4738
|
+
continue;
|
|
4739
|
+
}
|
|
4740
|
+
seen.add(image.url);
|
|
4741
|
+
unique.push(image);
|
|
4742
|
+
}
|
|
4743
|
+
return unique;
|
|
4215
4744
|
}
|
|
4216
4745
|
function redditVideosFromPost(post) {
|
|
4217
4746
|
const videos = [];
|
|
@@ -4238,7 +4767,9 @@ function redditVideosFromPost(post) {
|
|
|
4238
4767
|
}
|
|
4239
4768
|
function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
|
|
4240
4769
|
const finalUrl = post.canonicalUrl ?? requestedUrl;
|
|
4241
|
-
const bestImage = post.images.
|
|
4770
|
+
const bestImage = post.images.slice().sort(
|
|
4771
|
+
(left, right) => redditImagePriority(right) - redditImagePriority(left) || (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0)
|
|
4772
|
+
)[0];
|
|
4242
4773
|
const video = post.videos[0];
|
|
4243
4774
|
const structuredData = {
|
|
4244
4775
|
"@context": "https://schema.org",
|
|
@@ -4294,8 +4825,52 @@ function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
|
|
|
4294
4825
|
statusCode: jsonPage.statusCode
|
|
4295
4826
|
};
|
|
4296
4827
|
}
|
|
4297
|
-
function
|
|
4298
|
-
|
|
4828
|
+
function redditProviderDiagnosticsFromAttempts(attempts) {
|
|
4829
|
+
const blockedAttempts = attempts.filter((attempt) => attempt.blocked);
|
|
4830
|
+
if (blockedAttempts.length === 0) {
|
|
4831
|
+
return void 0;
|
|
4832
|
+
}
|
|
4833
|
+
const selectedAttempt = blockedAttempts.find((attempt) => attempt.blockReason === "provider_verification_required") ?? blockedAttempts.at(-1);
|
|
4834
|
+
return {
|
|
4835
|
+
platform: "reddit",
|
|
4836
|
+
blocked: true,
|
|
4837
|
+
statusCode: selectedAttempt?.statusCode,
|
|
4838
|
+
reason: selectedAttempt?.blockReason ?? "provider_blocked_request",
|
|
4839
|
+
suggestedAction: PROVIDER_BLOCKED_SUGGESTED_ACTION
|
|
4840
|
+
};
|
|
4841
|
+
}
|
|
4842
|
+
function synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics) {
|
|
4843
|
+
const selectedPage = attempts.find((attempt) => attempt.blockReason === providerDiagnostics.reason)?.page ?? attempts.slice().reverse().find((attempt) => attempt.page)?.page;
|
|
4844
|
+
return {
|
|
4845
|
+
url: requestedUrl,
|
|
4846
|
+
originalUrl: requestedUrl,
|
|
4847
|
+
finalUrl: requestedUrl,
|
|
4848
|
+
isShortUrl: selectedPage?.isShortUrl ?? false,
|
|
4849
|
+
shortUrlProvider: selectedPage?.shortUrlProvider,
|
|
4850
|
+
html: "",
|
|
4851
|
+
bytes: new Uint8Array(),
|
|
4852
|
+
statusCode: providerDiagnostics.statusCode ?? selectedPage?.statusCode ?? 403,
|
|
4853
|
+
contentType: selectedPage?.contentType,
|
|
4854
|
+
redirects: selectedPage?.redirects ?? [],
|
|
4855
|
+
headers: selectedPage?.headers ?? {}
|
|
4856
|
+
};
|
|
4857
|
+
}
|
|
4858
|
+
function redditBlockReason(page) {
|
|
4859
|
+
const title = htmlTitle(page.html);
|
|
4860
|
+
const text = normalizeText(`${title ?? ""} ${page.html}`);
|
|
4861
|
+
if (/reddit\s*-\s*please wait for verification/i.test(title ?? "") || /please wait for verification|verification required|verify you are human/i.test(text)) {
|
|
4862
|
+
return "provider_verification_required";
|
|
4863
|
+
}
|
|
4864
|
+
if (page.statusCode === 403 || page.statusCode === 429 || /whoa there, pardner|request has been blocked|too many requests|forbidden|you're blocked|you are blocked|youre blocked|blocked by network security/i.test(text) || /^blocked$/i.test(title ?? "")) {
|
|
4865
|
+
return "provider_blocked_request";
|
|
4866
|
+
}
|
|
4867
|
+
return void 0;
|
|
4868
|
+
}
|
|
4869
|
+
function htmlTitle(html) {
|
|
4870
|
+
return normalizeText(html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]);
|
|
4871
|
+
}
|
|
4872
|
+
function normalizeText(value) {
|
|
4873
|
+
return value?.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() ?? "";
|
|
4299
4874
|
}
|
|
4300
4875
|
function redditRetryInfo(attempts) {
|
|
4301
4876
|
const blockedAttempts = attempts.filter((attempt) => attempt.blocked || attempt.statusCode === 429 || attempt.statusCode === 403);
|
|
@@ -4329,7 +4904,7 @@ function mergeFallbackAttempts2(existing, incoming) {
|
|
|
4329
4904
|
}
|
|
4330
4905
|
const seen = /* @__PURE__ */ new Set();
|
|
4331
4906
|
return attempts.map((value) => {
|
|
4332
|
-
const { page: _page, ...attempt } = value;
|
|
4907
|
+
const { page: _page, blockReason: _blockReason, ...attempt } = value;
|
|
4333
4908
|
return attempt;
|
|
4334
4909
|
}).filter((attempt) => {
|
|
4335
4910
|
const key = `${attempt.method}:${attempt.url ?? ""}:${attempt.statusCode ?? ""}:${attempt.error ?? ""}`;
|
|
@@ -4340,7 +4915,7 @@ function mergeFallbackAttempts2(existing, incoming) {
|
|
|
4340
4915
|
return true;
|
|
4341
4916
|
});
|
|
4342
4917
|
}
|
|
4343
|
-
function
|
|
4918
|
+
function uniqueStrings4(values) {
|
|
4344
4919
|
return [...new Set(values.filter((value) => Boolean(value)))];
|
|
4345
4920
|
}
|
|
4346
4921
|
function redditMediaUrl(value) {
|