metanova 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1322,6 +1322,95 @@ function uniqueStrings(values) {
1322
1322
  return [...new Set(values.filter(Boolean))];
1323
1323
  }
1324
1324
 
1325
+ // src/utils/redditMedia.ts
1326
+ var REDDIT_BAD_IMAGE_URL_PATTERN = /thumbs\.redditmedia\.com|avatar|community_icon|subreddit|icon|award|emoji/i;
1327
+ var TRUSTED_REDDIT_SOURCES = /* @__PURE__ */ new Set([
1328
+ "adapter",
1329
+ "openGraph",
1330
+ "twitter",
1331
+ "jsonLd",
1332
+ "oEmbed",
1333
+ "nextData",
1334
+ "nuxt",
1335
+ "initialState",
1336
+ "preloadedState",
1337
+ "apollo",
1338
+ "applicationJson",
1339
+ "jsonScript"
1340
+ ]);
1341
+ function isRedditUrl(value) {
1342
+ try {
1343
+ const host = new URL(value).hostname.toLowerCase().replace(/^www\./, "");
1344
+ return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
1345
+ } catch {
1346
+ return false;
1347
+ }
1348
+ }
1349
+ function filterRedditImageCandidates(images) {
1350
+ const allowed = images.filter(isAllowedRedditImageCandidate);
1351
+ const trusted = allowed.filter(isTrustedRedditImageCandidate);
1352
+ return prioritizeRedditImages(trusted.length > 0 ? trusted : allowed);
1353
+ }
1354
+ function prioritizeRedditImages(images) {
1355
+ return images.slice().sort(
1356
+ (left, right) => redditImagePriority(right) - redditImagePriority(left) || imageArea(right) - imageArea(left)
1357
+ );
1358
+ }
1359
+ function isAllowedRedditImageCandidate(image) {
1360
+ if (image.width !== void 0 && image.width < 200) {
1361
+ return false;
1362
+ }
1363
+ if (image.height !== void 0 && image.height < 200) {
1364
+ return false;
1365
+ }
1366
+ return !REDDIT_BAD_IMAGE_URL_PATTERN.test(image.url);
1367
+ }
1368
+ function redditImagePriority(image) {
1369
+ const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
1370
+ const url = image.url.toLowerCase();
1371
+ if (mediaKind === "gallery") {
1372
+ return 700;
1373
+ }
1374
+ if (mediaKind === "previewOriginal") {
1375
+ return 620;
1376
+ }
1377
+ if (mediaKind === "directImage") {
1378
+ return 580;
1379
+ }
1380
+ if (/\/\/preview\.redd\.it\//i.test(url)) {
1381
+ return 560;
1382
+ }
1383
+ if (/\/\/i\.redd\.it\//i.test(url)) {
1384
+ return 540;
1385
+ }
1386
+ if (image.source === "openGraph" || image.source === "twitter") {
1387
+ return 420;
1388
+ }
1389
+ if (/\/\/external-preview\.redd\.it\//i.test(url)) {
1390
+ return 300;
1391
+ }
1392
+ if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
1393
+ return 1;
1394
+ }
1395
+ return isRedditMediaUrl(url) ? 250 : 0;
1396
+ }
1397
+ function hasRedditImageContext(images) {
1398
+ return images.some((image) => {
1399
+ const adapter = typeof image.metadata?.adapter === "string" ? image.metadata.adapter : "";
1400
+ const originalSource = typeof image.metadata?.originalSource === "string" ? image.metadata.originalSource : "";
1401
+ return adapter === "redditAdapter" || originalSource === "redditJsonEndpoint" || Boolean(image.metadata?.redditMediaKind);
1402
+ });
1403
+ }
1404
+ function isRedditMediaUrl(value) {
1405
+ return /(?:^https?:)?\/\/(?:(?:i|preview|external-preview)\.redd\.it|thumbs\.redditmedia\.com|v\.redd\.it)\//i.test(value);
1406
+ }
1407
+ function isTrustedRedditImageCandidate(image) {
1408
+ return TRUSTED_REDDIT_SOURCES.has(image.source) || Boolean(image.metadata?.redditMediaKind);
1409
+ }
1410
+ function imageArea(image) {
1411
+ return (image.width ?? 0) * (image.height ?? 0);
1412
+ }
1413
+
1325
1414
  // src/scorers/image.ts
1326
1415
  var SOURCE_WEIGHT = {
1327
1416
  adapter: 98,
@@ -1342,6 +1431,7 @@ var SOURCE_WEIGHT = {
1342
1431
  };
1343
1432
  function scoreImages(images, customScorers = []) {
1344
1433
  const duplicateCounts = countDuplicates(images);
1434
+ const redditContext = hasRedditImageContext(images);
1345
1435
  return images.map((image, index) => {
1346
1436
  const scored = scoreImageWithDetails(image, index, images, duplicateCounts);
1347
1437
  const customScore = customScorers.reduce((total, scorer) => total + scorer(image, { index, images }), 0);
@@ -1357,7 +1447,7 @@ function scoreImages(images, customScorers = []) {
1357
1447
  }
1358
1448
  };
1359
1449
  }).sort(
1360
- (left, right) => (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea(right) - imageArea(left)
1450
+ (left, right) => (redditContext ? redditImagePriority(right) - redditImagePriority(left) : 0) || (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea2(right) - imageArea2(left)
1361
1451
  );
1362
1452
  }
1363
1453
  function selectBestImage(images, customScorers = []) {
@@ -1375,15 +1465,17 @@ function scoreImageWithDetails(image, index, images, duplicateCounts) {
1375
1465
  const dimensions = scoreDimensions(image);
1376
1466
  const format = scoreFormat(image);
1377
1467
  const urlSignal = scoreUrlSignal(image);
1468
+ const redditMedia = scoreRedditMedia(image);
1378
1469
  const urlPenalty = scoreUrlPenalty(image);
1379
1470
  const duplicatePenalty = scoreDuplicatePenalty(image, duplicateCounts);
1380
1471
  score += dimensions.score;
1381
1472
  score += format.score;
1382
1473
  score += urlSignal.score;
1474
+ score += redditMedia.score;
1383
1475
  score -= urlPenalty;
1384
1476
  score -= duplicatePenalty.score;
1385
1477
  score -= Math.min(index * 1.5, 10);
1386
- reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...duplicatePenalty.reasons);
1478
+ reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...redditMedia.reasons, ...duplicatePenalty.reasons);
1387
1479
  if (images.length === 1) {
1388
1480
  score += 4;
1389
1481
  reasons.push("only candidate image added 4 points");
@@ -1483,6 +1575,33 @@ function platformThumbnailScore(url) {
1483
1575
  }
1484
1576
  return { score: 0, reasons: [] };
1485
1577
  }
1578
+ function scoreRedditMedia(image) {
1579
+ const priority = redditImagePriority(image);
1580
+ if (priority === 0 && !isRedditMediaUrl(image.url)) {
1581
+ return { score: 0, reasons: [] };
1582
+ }
1583
+ const url = image.url.toLowerCase();
1584
+ const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
1585
+ if (mediaKind === "gallery") {
1586
+ return { score: 24, reasons: ["Reddit gallery media added 24 points"] };
1587
+ }
1588
+ if (mediaKind === "previewOriginal") {
1589
+ return { score: 20, reasons: ["Reddit original preview media added 20 points"] };
1590
+ }
1591
+ if (/\/\/i\.redd\.it\//i.test(url)) {
1592
+ return { score: 18, reasons: ["Reddit direct image media added 18 points"] };
1593
+ }
1594
+ if (/\/\/preview\.redd\.it\//i.test(url)) {
1595
+ return { score: 16, reasons: ["Reddit preview media added 16 points"] };
1596
+ }
1597
+ if (/\/\/external-preview\.redd\.it\//i.test(url)) {
1598
+ return { score: -8, reasons: ["Reddit external preview media subtracted 8 points"] };
1599
+ }
1600
+ if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
1601
+ return { score: -60, reasons: ["Reddit thumbnail host subtracted 60 points"] };
1602
+ }
1603
+ return { score: 0, reasons: [] };
1604
+ }
1486
1605
  function scoreUrlPenalty(image) {
1487
1606
  const url = image.url.toLowerCase();
1488
1607
  let penalty = 0;
@@ -1555,7 +1674,7 @@ function countDuplicates(images) {
1555
1674
  }
1556
1675
  return counts;
1557
1676
  }
1558
- function imageArea(image) {
1677
+ function imageArea2(image) {
1559
1678
  return (image.width ?? 0) * (image.height ?? 0);
1560
1679
  }
1561
1680
  function sourceSortWeight(image) {
@@ -1673,8 +1792,9 @@ function discoverMedia(rawSources, finalUrl) {
1673
1792
  if (externalResults.some((result) => (result.images?.length ?? 0) > 0 || (result.videos?.length ?? 0) > 0)) {
1674
1793
  trace.push("media discovery included adapter and plugin media");
1675
1794
  }
1795
+ const dedupedImages = dedupeMediaBySignature(images);
1676
1796
  return {
1677
- images: dedupeMediaBySignature(uniqueMediaByUrl(images)),
1797
+ images: isRedditUrl(finalUrl) ? filterRedditImageCandidates(dedupedImages) : dedupedImages,
1678
1798
  videos: dedupeMediaBySignature(uniqueMediaByUrl(videos)),
1679
1799
  audio: dedupeMediaBySignature(uniqueMediaByUrl(audio)),
1680
1800
  trace
@@ -1766,7 +1886,8 @@ function mediaFromJsonValue(value, kind, source) {
1766
1886
  height: parseNumber(stringFromUnknown(value.height)) ?? parseNumber(stringFromUnknown(nestedDetails?.height)),
1767
1887
  alt: stringFromUnknown(value.alt) ?? stringFromUnknown(value.caption) ?? stringFromUnknown(value.name) ?? stringFromUnknown(nestedDetails?.alt),
1768
1888
  title: stringFromUnknown(value.title) ?? stringFromUnknown(nestedDetails?.title),
1769
- type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type)
1889
+ type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type),
1890
+ metadata: isRecord3(value.metadata) ? value.metadata : void 0
1770
1891
  },
1771
1892
  ...srcsetAssets
1772
1893
  ];
@@ -1874,7 +1995,7 @@ function dedupeMediaBySignature(assets) {
1874
1995
  for (const asset of assets) {
1875
1996
  const key = mediaSignature2(asset.url);
1876
1997
  const current = seen.get(key);
1877
- if (!current || sourceRank(asset.source) > sourceRank(current.source)) {
1998
+ if (!current || mediaRank(asset) > mediaRank(current)) {
1878
1999
  seen.set(key, asset);
1879
2000
  }
1880
2001
  }
@@ -1908,6 +2029,10 @@ function sourceRank(source) {
1908
2029
  };
1909
2030
  return ranks[source] ?? 50;
1910
2031
  }
2032
+ function mediaRank(asset) {
2033
+ const redditPriority = redditImagePriority(asset);
2034
+ return redditPriority > 0 ? 1e3 + redditPriority : sourceRank(asset.source);
2035
+ }
1911
2036
  function shouldIgnoreMediaUrl2(url) {
1912
2037
  const normalized = url.toLowerCase();
1913
2038
  return normalized.startsWith("data:") || normalized.startsWith("blob:") || normalized.startsWith("javascript:") || /(?:sprite|spacer|blank|transparent|placeholder|tracking|beacon|pixel|emoji|favicon|apple-touch-icon)(?:[._/-]|$|\?)/i.test(normalized) || /(?:^|[/?_-])1x1(?:[._/-]|$|\?)/i.test(normalized);
@@ -2232,7 +2357,7 @@ function normalizeMetadata(rawSources, context = {}) {
2232
2357
  ...mediaDiscovery.trace,
2233
2358
  ...selectedImage.best ? [`selected image from ${sourceLabel2(selectedImage.best)}`] : []
2234
2359
  ]);
2235
- return stripUndefined2({
2360
+ const normalized = stripUndefined2({
2236
2361
  ok: true,
2237
2362
  url,
2238
2363
  finalUrl,
@@ -2262,6 +2387,12 @@ function normalizeMetadata(rawSources, context = {}) {
2262
2387
  diagnostics,
2263
2388
  trace: diagnostics.trace
2264
2389
  });
2390
+ normalized.images = selectedImage.images;
2391
+ normalized.videos = videos;
2392
+ normalized.audio = audio;
2393
+ normalized.favicons = favicons;
2394
+ normalized.trace = diagnostics.trace;
2395
+ return normalized;
2265
2396
  }
2266
2397
  function normalizeAssets2(assets, baseUrl) {
2267
2398
  return assets.map((asset) => {
@@ -2897,8 +3028,8 @@ var redditAdapter = {
2897
3028
  type: reddit.isPost ? "social_post" : "website",
2898
3029
  siteName: "Reddit",
2899
3030
  canonicalUrl: context.raw.openGraph.url ?? context.raw.html.canonicalUrl,
2900
- title: cleanSocialTitle(titleSelection.value),
2901
- description: descriptionSelection.value,
3031
+ title: cleanRedditTitle(titleSelection.value),
3032
+ description: cleanRedditDescription(descriptionSelection.value),
2902
3033
  images: markAdapterMedia(mediaFromContext(context).images, "redditAdapter"),
2903
3034
  videos: markAdapterMedia(mediaFromContext(context).videos, "redditAdapter"),
2904
3035
  author: username ? { name: username } : entityFromContext(context, ["author", "submitter", "user"]),
@@ -2984,7 +3115,7 @@ var tiktokAdapter = {
2984
3115
  return this.detect?.(url) ?? false;
2985
3116
  },
2986
3117
  extract(context) {
2987
- return socialVideoResult("tiktokAdapter", "TikTok", context);
3118
+ return tiktokResult(context);
2988
3119
  },
2989
3120
  normalize(rawData) {
2990
3121
  return normalizePlatformResult(rawData);
@@ -3203,25 +3334,215 @@ function redditDescriptionFromContext(context) {
3203
3334
  }
3204
3335
  return { value: context.raw.html.description, method: context.raw.html.description ? "reddit:html" : void 0 };
3205
3336
  }
3206
- function socialVideoResult(source, platform, context) {
3337
+ function tiktokResult(context) {
3207
3338
  const url = new URL(context.finalUrl);
3208
3339
  const username = url.pathname.match(/@([^/]+)/)?.[1];
3209
3340
  const postId = url.pathname.match(/\/(?:video|photo)\/([^/]+)/)?.[1] ?? url.pathname.split("/").filter(Boolean).at(-1);
3341
+ const itemStruct = tiktokItemStructFromContext(context, postId);
3342
+ const titleSelection = tiktokTitleFromContext(context, itemStruct, username);
3343
+ const descriptionSelection = tiktokDescriptionFromContext(context, itemStruct);
3344
+ const author = tiktokAuthorFromItemStruct(itemStruct, username);
3345
+ const media = tiktokMediaFromContext(context, itemStruct);
3346
+ const publishedTime = tiktokPublishedTime(itemStruct) ?? publishedTimeFromContext(context);
3210
3347
  return compactAdapterResult({
3211
- source,
3212
- platform,
3348
+ source: "tiktokAdapter",
3349
+ platform: "TikTok",
3213
3350
  type: "social_post",
3214
- siteName: platform,
3351
+ siteName: "TikTok",
3215
3352
  canonicalUrl: context.raw.openGraph.url,
3216
- title: titleFromContext(context, ["title", "desc", "description", "caption"]),
3217
- description: descriptionFromContext(context),
3218
- images: markAdapterMedia(mediaFromContext(context).images, source),
3219
- videos: markAdapterMedia(mediaFromContext(context).videos, source),
3220
- author: username ? { name: username } : entityFromContext(context, ["author", "user", "creator", "owner"]),
3221
- article: { publishedTime: publishedTimeFromContext(context) },
3222
- identifiers: { username, postId }
3353
+ title: titleSelection.value,
3354
+ description: descriptionSelection.value,
3355
+ images: markAdapterMedia(media.images, "tiktokAdapter"),
3356
+ videos: markAdapterMedia(media.videos, "tiktokAdapter"),
3357
+ author,
3358
+ article: { publishedTime },
3359
+ video: postId ? {
3360
+ id: postId,
3361
+ title: titleSelection.value,
3362
+ channel: author,
3363
+ publishedTime,
3364
+ duration: tiktokVideoDuration(itemStruct),
3365
+ viewCount: tiktokStatCount(itemStruct, "playCount")
3366
+ } : void 0,
3367
+ identifiers: { username, postId },
3368
+ raw: {
3369
+ extractionMethod: titleSelection.method ?? descriptionSelection.method ?? "tiktok:htmlFallback"
3370
+ }
3223
3371
  });
3224
3372
  }
3373
+ function tiktokTitleFromContext(context, itemStruct, username) {
3374
+ const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
3375
+ if (desc) {
3376
+ return { value: desc, method: "tiktok:itemStruct.desc" };
3377
+ }
3378
+ const musicTitle = tiktokMusicTitle(itemStruct);
3379
+ if (musicTitle) {
3380
+ return { value: musicTitle, method: "tiktok:itemStruct.music" };
3381
+ }
3382
+ const fallback = cleanTikTokText(titleFromContext(context, ["desc", "caption", "title", "description"]));
3383
+ if (fallback) {
3384
+ return { value: fallback, method: "tiktok:fallback" };
3385
+ }
3386
+ return {
3387
+ value: username ? `TikTok post by @${username}` : void 0,
3388
+ method: username ? "tiktok:urlFallback" : void 0
3389
+ };
3390
+ }
3391
+ function tiktokDescriptionFromContext(context, itemStruct) {
3392
+ const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
3393
+ if (desc) {
3394
+ return { value: desc, method: "tiktok:itemStruct.desc" };
3395
+ }
3396
+ return {
3397
+ value: cleanTikTokText(descriptionFromContext(context)),
3398
+ method: "tiktok:fallback"
3399
+ };
3400
+ }
3401
+ function tiktokItemStructFromContext(context, postId) {
3402
+ for (const item of context.raw.embeddedData.items) {
3403
+ const defaultScope = isRecord4(item.data["__DEFAULT_SCOPE__"]) ? item.data["__DEFAULT_SCOPE__"] : void 0;
3404
+ const videoDetail = isRecord4(defaultScope?.["webapp.video-detail"]) ? defaultScope["webapp.video-detail"] : void 0;
3405
+ const itemInfo = isRecord4(videoDetail?.itemInfo) ? videoDetail.itemInfo : void 0;
3406
+ const itemStruct = isRecord4(itemInfo?.itemStruct) ? itemInfo.itemStruct : void 0;
3407
+ if (itemStruct && (!postId || stringFromUnknown3(itemStruct.id) === postId)) {
3408
+ return itemStruct;
3409
+ }
3410
+ }
3411
+ let found;
3412
+ for (const item of context.raw.embeddedData.items) {
3413
+ walkData(item.data, (value, key) => {
3414
+ if (found || key !== "itemStruct" || !isRecord4(value)) {
3415
+ return;
3416
+ }
3417
+ if (!postId || stringFromUnknown3(value.id) === postId) {
3418
+ found = value;
3419
+ }
3420
+ });
3421
+ if (found) {
3422
+ return found;
3423
+ }
3424
+ }
3425
+ return void 0;
3426
+ }
3427
+ function tiktokMediaFromContext(context, itemStruct) {
3428
+ const discovered = mediaFromContext(context);
3429
+ return {
3430
+ images: [...tiktokImagesFromItemStruct(itemStruct), ...discovered.images],
3431
+ videos: [...tiktokVideosFromItemStruct(itemStruct), ...discovered.videos]
3432
+ };
3433
+ }
3434
+ function tiktokImagesFromItemStruct(itemStruct) {
3435
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3436
+ if (!video) {
3437
+ return [];
3438
+ }
3439
+ const width = numberFromUnknown(video.width);
3440
+ const height = numberFromUnknown(video.height);
3441
+ const candidates = [
3442
+ stringFromUnknown3(video.originCover),
3443
+ stringFromUnknown3(video.cover),
3444
+ stringFromUnknown3(video.dynamicCover),
3445
+ ...urlsFromUnknown(video.shareCover)
3446
+ ];
3447
+ return uniqueStrings3(candidates).map((url) => ({
3448
+ url,
3449
+ kind: "image",
3450
+ source: "applicationJson",
3451
+ width,
3452
+ height,
3453
+ metadata: {
3454
+ tiktokMediaKind: "videoCover"
3455
+ }
3456
+ }));
3457
+ }
3458
+ function tiktokVideosFromItemStruct(itemStruct) {
3459
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3460
+ if (!video) {
3461
+ return [];
3462
+ }
3463
+ const width = numberFromUnknown(video.width);
3464
+ const height = numberFromUnknown(video.height);
3465
+ const candidates = [
3466
+ stringFromUnknown3(video.playAddr),
3467
+ stringFromUnknown3(video.downloadAddr),
3468
+ ...urlsFromTikTokPlayAddr(video.PlayAddrStruct),
3469
+ ...Array.isArray(video.bitrateInfo) ? video.bitrateInfo.flatMap((item) => isRecord4(item) ? urlsFromTikTokPlayAddr(item.PlayAddr) : []) : []
3470
+ ];
3471
+ return uniqueStrings3(candidates).filter((url) => /^https?:\/\//i.test(url)).map((url) => ({
3472
+ url,
3473
+ kind: "video",
3474
+ source: "applicationJson",
3475
+ width,
3476
+ height,
3477
+ type: "video/mp4",
3478
+ metadata: {
3479
+ tiktokMediaKind: "videoPlay"
3480
+ }
3481
+ }));
3482
+ }
3483
+ function urlsFromTikTokPlayAddr(value) {
3484
+ if (!isRecord4(value)) {
3485
+ return [];
3486
+ }
3487
+ return urlsFromUnknown(value.UrlList);
3488
+ }
3489
+ function tiktokAuthorFromItemStruct(itemStruct, username) {
3490
+ const author = isRecord4(itemStruct?.author) ? itemStruct.author : void 0;
3491
+ const name = stringFromUnknown3(author?.nickname) ?? stringFromUnknown3(author?.uniqueId) ?? username;
3492
+ if (!name) {
3493
+ return void 0;
3494
+ }
3495
+ return {
3496
+ name,
3497
+ url: username ? `https://www.tiktok.com/@${username}` : void 0
3498
+ };
3499
+ }
3500
+ function tiktokPublishedTime(itemStruct) {
3501
+ const created = numberFromUnknown(itemStruct?.createTime);
3502
+ return created ? new Date(created * 1e3).toISOString() : void 0;
3503
+ }
3504
+ function tiktokVideoDuration(itemStruct) {
3505
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3506
+ return stringFromUnknown3(video?.duration);
3507
+ }
3508
+ function tiktokStatCount(itemStruct, key) {
3509
+ const stats = isRecord4(itemStruct?.stats) ? itemStruct.stats : void 0;
3510
+ return numberFromUnknown(stats?.[key]);
3511
+ }
3512
+ function tiktokMusicTitle(itemStruct) {
3513
+ const music = isRecord4(itemStruct?.music) ? itemStruct.music : void 0;
3514
+ const title = cleanTikTokText(stringFromUnknown3(music?.title));
3515
+ const author = cleanTikTokText(stringFromUnknown3(music?.authorName));
3516
+ if (!title) {
3517
+ return void 0;
3518
+ }
3519
+ if (author && !/original sound/i.test(title)) {
3520
+ return `${title} - ${author}`;
3521
+ }
3522
+ return title;
3523
+ }
3524
+ function cleanTikTokText(value) {
3525
+ const cleaned = value?.replace(/\s+/g, " ").trim();
3526
+ if (!cleaned || isLowQualityTikTokText(cleaned)) {
3527
+ return void 0;
3528
+ }
3529
+ return cleaned;
3530
+ }
3531
+ function isLowQualityTikTokText(value) {
3532
+ return /\{[^}]+\}/.test(value) || /tiktok\s*shop|free shipping|eligible items|exclusive collections/i.test(value) || /^tiktok live creator networks$/i.test(value) || /^tiktok\s*-\s*make your day$/i.test(value) || /^discover (?:new |popular )?videos?\b.*\btiktok\b/i.test(value);
3533
+ }
3534
+ function urlsFromUnknown(value) {
3535
+ if (typeof value === "string" && value.trim()) {
3536
+ return [value.trim()];
3537
+ }
3538
+ if (Array.isArray(value)) {
3539
+ return value.flatMap(urlsFromUnknown);
3540
+ }
3541
+ return [];
3542
+ }
3543
+ function uniqueStrings3(values) {
3544
+ return [...new Set(values.filter((value) => Boolean(value)))];
3545
+ }
3225
3546
  function normalizePlatformResult(rawData) {
3226
3547
  const type = rawData.type ?? inferAdapterType(rawData);
3227
3548
  return compactAdapterResult({
@@ -3572,6 +3893,20 @@ function parseRedditUrl(url) {
3572
3893
  function cleanSocialTitle(title) {
3573
3894
  return title?.replace(/\s*:\s*r\/[A-Za-z0-9_]+$/i, "").trim();
3574
3895
  }
3896
+ function cleanRedditTitle(title) {
3897
+ const cleaned = cleanSocialTitle(title);
3898
+ if (!cleaned || /reddit\s*-\s*please wait for verification|please wait for verification|whoa there, pardner/i.test(cleaned)) {
3899
+ return void 0;
3900
+ }
3901
+ return cleaned;
3902
+ }
3903
+ function cleanRedditDescription(description) {
3904
+ const cleaned = description?.replace(/\s+/g, " ").trim();
3905
+ if (!cleaned || /please wait for verification|whoa there, pardner|request has been blocked/i.test(cleaned)) {
3906
+ return void 0;
3907
+ }
3908
+ return cleaned;
3909
+ }
3575
3910
  function hostMatches(url, domains) {
3576
3911
  const host = url.hostname.toLowerCase().replace(/^www\./, "");
3577
3912
  return domains.some((domain) => host === domain || host.endsWith(`.${domain}`));
@@ -3961,12 +4296,17 @@ function ascii(bytes, offset, length) {
3961
4296
  }
3962
4297
 
3963
4298
  // src/fetchMetadata.ts
4299
+ var REDDIT_BLOCKED_METADATA_WARNING = "Reddit returned a verification/block page; metadata is incomplete.";
4300
+ var PROVIDER_BLOCKED_SUGGESTED_ACTION = "retry_on_different_host_or_use_supported_proxy";
3964
4301
  async function fetchMetadata(url, options = {}) {
3965
4302
  const startedAt = Date.now();
3966
4303
  try {
3967
4304
  const requestedUrl = normalizeUrl(url);
3968
4305
  const fetchResult = await fetchPageWithStrategies(requestedUrl, options);
3969
4306
  const page = fetchResult.page;
4307
+ if (fetchResult.providerDiagnostics?.blocked) {
4308
+ return createBlockedProviderMetadata(requestedUrl, fetchResult, Date.now() - startedAt);
4309
+ }
3970
4310
  const directMedia = createDirectMediaMetadata(page, requestedUrl, Date.now() - startedAt);
3971
4311
  if (directMedia) {
3972
4312
  return directMedia;
@@ -3993,7 +4333,7 @@ async function fetchMetadata(url, options = {}) {
3993
4333
  ...metadata.canonicalUrl ? ["resolved canonical URL"] : []
3994
4334
  ];
3995
4335
  metadata.diagnostics.fallbacksAttempted = mergeFallbackAttempts2(metadata.diagnostics.fallbacksAttempted, fetchResult.fallbacksAttempted);
3996
- metadata.diagnostics.sourcePriority = uniqueStrings3([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
4336
+ metadata.diagnostics.sourcePriority = uniqueStrings4([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
3997
4337
  metadata.diagnostics.extractionMethod = metadata.diagnostics.extractionMethod ?? fetchResult.extractionMethod;
3998
4338
  metadata.diagnostics.retryInfo = metadata.diagnostics.retryInfo ?? fetchResult.retryInfo;
3999
4339
  metadata.trace = metadata.diagnostics.trace;
@@ -4035,8 +4375,64 @@ async function fetchMetadata(url, options = {}) {
4035
4375
  };
4036
4376
  }
4037
4377
  }
4378
+ function createBlockedProviderMetadata(requestedUrl, fetchResult, fetchDurationMs) {
4379
+ const page = fetchResult.page;
4380
+ const providerDiagnostics = fetchResult.providerDiagnostics;
4381
+ const trace = uniqueStrings4([
4382
+ ...page.isShortUrl ? [`detected short URL provider: ${page.shortUrlProvider ?? "unknown"}`] : [],
4383
+ ...page.redirects.length > 0 ? [`resolved ${page.redirects.length} redirect${page.redirects.length === 1 ? "" : "s"}`] : [],
4384
+ ...fetchResult.trace,
4385
+ "detected blocked provider response"
4386
+ ]);
4387
+ const warnings = uniqueStrings4([
4388
+ ...fetchResult.warnings,
4389
+ REDDIT_BLOCKED_METADATA_WARNING,
4390
+ ...page.statusCode < 200 || page.statusCode >= 300 ? [`Fetch completed with non-success status code ${page.statusCode}.`] : []
4391
+ ]);
4392
+ return {
4393
+ ok: false,
4394
+ url: requestedUrl,
4395
+ finalUrl: page.finalUrl,
4396
+ type: "unknown",
4397
+ siteName: providerDiagnostics?.platform === "reddit" ? "Reddit" : void 0,
4398
+ confidence: 0,
4399
+ completeness: 0,
4400
+ reliability: 0,
4401
+ images: [],
4402
+ videos: [],
4403
+ audio: [],
4404
+ favicons: [],
4405
+ trace,
4406
+ diagnostics: {
4407
+ originalUrl: requestedUrl,
4408
+ finalUrl: page.finalUrl,
4409
+ isShortUrl: page.isShortUrl,
4410
+ shortUrlProvider: page.shortUrlProvider,
4411
+ statusCode: page.statusCode,
4412
+ contentType: page.contentType,
4413
+ redirects: page.redirects,
4414
+ sourcesUsed: [],
4415
+ warnings,
4416
+ fallbacksAttempted: mergeFallbackAttempts2(void 0, fetchResult.fallbacksAttempted),
4417
+ trace,
4418
+ sourcePriority: fetchResult.sourcePriority,
4419
+ extractionMethod: fetchResult.extractionMethod,
4420
+ retryInfo: fetchResult.retryInfo,
4421
+ providerDiagnostics,
4422
+ confidenceBreakdown: {
4423
+ title: 0,
4424
+ description: 0,
4425
+ image: 0,
4426
+ structuredData: 0,
4427
+ adapter: 0
4428
+ },
4429
+ fetchDurationMs,
4430
+ extractedAt: (/* @__PURE__ */ new Date()).toISOString()
4431
+ }
4432
+ };
4433
+ }
4038
4434
  async function fetchPageWithStrategies(requestedUrl, options) {
4039
- if (isRedditUrl(requestedUrl)) {
4435
+ if (isRedditUrl2(requestedUrl)) {
4040
4436
  return fetchRedditPageWithStrategy(requestedUrl, options);
4041
4437
  }
4042
4438
  return {
@@ -4049,6 +4445,7 @@ async function fetchPageWithStrategies(requestedUrl, options) {
4049
4445
  async function fetchRedditPageWithStrategy(requestedUrl, options) {
4050
4446
  const attempts = [];
4051
4447
  const warnings = [];
4448
+ const informationalFallbacks = [];
4052
4449
  const sourcePriority = ["redditJsonEndpoint", "oldReddit", "embeddedStructuredData", "openGraph", "html"];
4053
4450
  let lastError;
4054
4451
  const jsonUrl = redditJsonEndpoint(requestedUrl);
@@ -4059,7 +4456,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4059
4456
  });
4060
4457
  attempts.push(attempt);
4061
4458
  lastError = attempt.error;
4062
- if (attempt.page && attempt.ok && !attempt.blocked) {
4459
+ if (attempt.page && attempt.ok) {
4063
4460
  const redditPost = parseRedditJsonPayload(attempt.page.html);
4064
4461
  if (redditPost?.title) {
4065
4462
  return {
@@ -4074,7 +4471,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4074
4471
  }
4075
4472
  warnings.push("Reddit JSON endpoint responded, but no post payload could be extracted.");
4076
4473
  } else if (attempt.blocked) {
4077
- warnings.push("Reddit JSON endpoint appears to have blocked access.");
4474
+ informationalFallbacks.push("Informational fallback: Reddit JSON endpoint appears to have blocked access; continuing with fallback extraction.");
4078
4475
  }
4079
4476
  }
4080
4477
  const oldRedditUrl = redditOldUrl(requestedUrl);
@@ -4082,12 +4479,12 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4082
4479
  const attempt = await attemptFetch("oldReddit", oldRedditUrl, options);
4083
4480
  attempts.push(attempt);
4084
4481
  lastError = attempt.error;
4085
- if (attempt.page && attempt.ok && !attempt.blocked) {
4482
+ if (attempt.page && attempt.ok) {
4086
4483
  return {
4087
4484
  page: attempt.page,
4088
4485
  fallbacksAttempted: attempts,
4089
4486
  warnings,
4090
- trace: ["retried Reddit page through old.reddit"],
4487
+ trace: [...informationalFallbacks, "retried Reddit page through old.reddit"],
4091
4488
  sourcePriority,
4092
4489
  extractionMethod: "reddit:oldReddit",
4093
4490
  retryInfo: redditRetryInfo(attempts)
@@ -4100,33 +4497,48 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4100
4497
  const htmlAttempt = await attemptFetch("redditHtmlFallback", requestedUrl, options);
4101
4498
  attempts.push(htmlAttempt);
4102
4499
  lastError = htmlAttempt.error;
4103
- if (htmlAttempt.page) {
4104
- if (htmlAttempt.blocked) {
4105
- warnings.push("Reddit HTML fallback appears to have been blocked; metadata may be incomplete.");
4106
- }
4500
+ if (htmlAttempt.page && htmlAttempt.ok) {
4107
4501
  return {
4108
4502
  page: htmlAttempt.page,
4109
4503
  fallbacksAttempted: attempts,
4110
4504
  warnings,
4111
- trace: ["used Reddit HTML fallback"],
4505
+ trace: [...informationalFallbacks, "used Reddit HTML fallback"],
4112
4506
  sourcePriority,
4113
4507
  extractionMethod: "reddit:htmlFallback",
4114
4508
  retryInfo: redditRetryInfo(attempts)
4115
4509
  };
4116
4510
  }
4511
+ if (htmlAttempt.blocked) {
4512
+ warnings.push("Reddit HTML fallback appears to have been blocked; metadata may be incomplete.");
4513
+ }
4514
+ const providerDiagnostics = redditProviderDiagnosticsFromAttempts(attempts);
4515
+ if (providerDiagnostics) {
4516
+ return {
4517
+ page: synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics),
4518
+ fallbacksAttempted: attempts,
4519
+ warnings: uniqueStrings4([...warnings, REDDIT_BLOCKED_METADATA_WARNING]),
4520
+ trace: [...informationalFallbacks, "Reddit provider blocked metadata extraction"],
4521
+ sourcePriority,
4522
+ extractionMethod: "reddit:blockedProvider",
4523
+ retryInfo: redditRetryInfo(attempts),
4524
+ providerDiagnostics
4525
+ };
4526
+ }
4117
4527
  throw lastError ?? new Error("All Reddit extraction fetch attempts failed.");
4118
4528
  }
4119
4529
  async function attemptFetch(method, url, options) {
4120
4530
  try {
4121
4531
  const page = await fetchPage(url, options);
4122
4532
  const retryAfter = page.headers["retry-after"];
4123
- const blocked = isRedditBlocked(page);
4533
+ const blockReason = redditBlockReason(page);
4534
+ const blocked = Boolean(blockReason);
4124
4535
  return {
4125
4536
  method,
4126
4537
  url,
4127
4538
  ok: page.statusCode >= 200 && page.statusCode < 300 && !blocked,
4128
4539
  statusCode: page.statusCode,
4129
4540
  blocked,
4541
+ blockReason,
4130
4542
  retryAfter,
4131
4543
  page
4132
4544
  };
@@ -4139,7 +4551,7 @@ async function attemptFetch(method, url, options) {
4139
4551
  };
4140
4552
  }
4141
4553
  }
4142
- function isRedditUrl(url) {
4554
+ function isRedditUrl2(url) {
4143
4555
  try {
4144
4556
  const host = new URL(url).hostname.toLowerCase().replace(/^www\./, "");
4145
4557
  return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
@@ -4255,46 +4667,163 @@ function findRedditPostRecord(value) {
4255
4667
  return void 0;
4256
4668
  }
4257
4669
  function redditImagesFromPost(post) {
4258
- const images = [];
4670
+ const images = [
4671
+ ...redditGalleryImagesFromPost(post),
4672
+ ...redditDirectImagesFromPost(post)
4673
+ ];
4259
4674
  const preview = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
4260
4675
  for (const image of preview) {
4261
4676
  if (!isRecord5(image)) {
4262
4677
  continue;
4263
4678
  }
4264
- for (const candidate of [image.source, ...Array.isArray(image.resolutions) ? image.resolutions : []]) {
4265
- if (!isRecord5(candidate)) {
4266
- continue;
4267
- }
4268
- const url = redditMediaUrl(stringFromUnknown4(candidate.url));
4269
- if (!url) {
4270
- continue;
4271
- }
4272
- images.push({
4273
- url,
4274
- kind: "image",
4275
- source: "adapter",
4276
- width: numberFromUnknown2(candidate.width),
4277
- height: numberFromUnknown2(candidate.height),
4278
- metadata: {
4279
- adapter: "redditJsonEndpoint",
4280
- originalSource: "redditJsonEndpoint"
4281
- }
4282
- });
4679
+ const source = redditImageFromRecord(image.source, "previewOriginal");
4680
+ if (source) {
4681
+ images.push(source);
4682
+ continue;
4683
+ }
4684
+ const fallbackResolution = largestRedditImageRecord(Array.isArray(image.resolutions) ? image.resolutions : []);
4685
+ const fallback = redditImageFromRecord(fallbackResolution, "previewResolution");
4686
+ if (fallback) {
4687
+ images.push(fallback);
4283
4688
  }
4284
4689
  }
4285
4690
  const thumbnail = redditMediaUrl(stringFromUnknown4(post.thumbnail));
4286
4691
  if (thumbnail && /^https?:\/\//i.test(thumbnail)) {
4287
- images.push({
4288
- url: thumbnail,
4289
- kind: "image",
4290
- source: "adapter",
4291
- metadata: {
4292
- adapter: "redditJsonEndpoint",
4293
- originalSource: "redditJsonEndpoint"
4294
- }
4295
- });
4692
+ const thumbnailAsset = redditImageAsset(thumbnail, void 0, void 0, "thumbnail");
4693
+ if (thumbnailAsset) {
4694
+ images.push(thumbnailAsset);
4695
+ }
4696
+ }
4697
+ return dedupeRedditImages(prioritizeRedditImages(images));
4698
+ }
4699
+ function redditGalleryImagesFromPost(post) {
4700
+ const mediaMetadata = isRecord5(post.media_metadata) ? post.media_metadata : void 0;
4701
+ if (!mediaMetadata) {
4702
+ return [];
4296
4703
  }
4297
- return images;
4704
+ const galleryItems = isRecord5(post.gallery_data) && Array.isArray(post.gallery_data.items) ? post.gallery_data.items : [];
4705
+ const orderedIds = galleryItems.map((item) => isRecord5(item) ? stringFromUnknown4(item.media_id) : void 0).filter((item) => Boolean(item));
4706
+ const seenIds = /* @__PURE__ */ new Set();
4707
+ const assets = [];
4708
+ for (const id of orderedIds) {
4709
+ const asset = redditImageFromMediaMetadata(mediaMetadata[id], id);
4710
+ if (asset) {
4711
+ assets.push(asset);
4712
+ seenIds.add(id);
4713
+ }
4714
+ }
4715
+ for (const [id, value] of Object.entries(mediaMetadata)) {
4716
+ if (seenIds.has(id)) {
4717
+ continue;
4718
+ }
4719
+ const asset = redditImageFromMediaMetadata(value, id);
4720
+ if (asset) {
4721
+ assets.push(asset);
4722
+ }
4723
+ }
4724
+ return assets;
4725
+ }
4726
+ function redditDirectImagesFromPost(post) {
4727
+ const url = redditMediaUrl(stringFromUnknown4(post.url_overridden_by_dest) ?? stringFromUnknown4(post.url));
4728
+ if (!url || !isDirectRedditImageUrl(url)) {
4729
+ return [];
4730
+ }
4731
+ const previewSource = previewSourceRecord(post);
4732
+ const asset = redditImageAsset(
4733
+ url,
4734
+ numberFromUnknown2(previewSource?.width),
4735
+ numberFromUnknown2(previewSource?.height),
4736
+ "directImage"
4737
+ );
4738
+ return asset ? [asset] : [];
4739
+ }
4740
+ function redditImageFromMediaMetadata(value, mediaId) {
4741
+ if (!isRecord5(value)) {
4742
+ return void 0;
4743
+ }
4744
+ const source = isRecord5(value.s) ? value.s : void 0;
4745
+ const url = redditMediaUrl(
4746
+ stringFromUnknown4(source?.u) ?? stringFromUnknown4(source?.gif) ?? stringFromUnknown4(source?.mp4)
4747
+ );
4748
+ if (!url) {
4749
+ return void 0;
4750
+ }
4751
+ const asset = redditImageAsset(
4752
+ url,
4753
+ numberFromUnknown2(source?.x) ?? numberFromUnknown2(source?.width),
4754
+ numberFromUnknown2(source?.y) ?? numberFromUnknown2(source?.height),
4755
+ "gallery",
4756
+ stringFromUnknown4(value.m)
4757
+ );
4758
+ return asset ? {
4759
+ ...asset,
4760
+ metadata: {
4761
+ ...asset.metadata,
4762
+ redditMediaId: mediaId
4763
+ }
4764
+ } : void 0;
4765
+ }
4766
+ function redditImageFromRecord(value, redditMediaKind) {
4767
+ if (!isRecord5(value)) {
4768
+ return void 0;
4769
+ }
4770
+ const url = redditMediaUrl(stringFromUnknown4(value.url) ?? stringFromUnknown4(value.u));
4771
+ if (!url) {
4772
+ return void 0;
4773
+ }
4774
+ return redditImageAsset(
4775
+ url,
4776
+ numberFromUnknown2(value.width) ?? numberFromUnknown2(value.x),
4777
+ numberFromUnknown2(value.height) ?? numberFromUnknown2(value.y),
4778
+ redditMediaKind
4779
+ );
4780
+ }
4781
+ function redditImageAsset(url, width, height, redditMediaKind, type) {
4782
+ const asset = {
4783
+ url,
4784
+ kind: "image",
4785
+ source: "adapter",
4786
+ width,
4787
+ height,
4788
+ type,
4789
+ metadata: {
4790
+ adapter: "redditJsonEndpoint",
4791
+ originalSource: "redditJsonEndpoint",
4792
+ redditMediaKind
4793
+ }
4794
+ };
4795
+ return isAllowedRedditImageCandidate(asset) ? asset : void 0;
4796
+ }
4797
+ function largestRedditImageRecord(values) {
4798
+ return values.filter(isRecord5).sort(
4799
+ (left, right) => (numberFromUnknown2(right.width) ?? 0) * (numberFromUnknown2(right.height) ?? 0) - (numberFromUnknown2(left.width) ?? 0) * (numberFromUnknown2(left.height) ?? 0)
4800
+ )[0];
4801
+ }
4802
+ function previewSourceRecord(post) {
4803
+ const images = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
4804
+ const firstImage = images.find(isRecord5);
4805
+ return firstImage && isRecord5(firstImage.source) ? firstImage.source : void 0;
4806
+ }
4807
+ function isDirectRedditImageUrl(value) {
4808
+ try {
4809
+ const parsed = new URL(value);
4810
+ const host = parsed.hostname.toLowerCase();
4811
+ return host === "i.redd.it" || host === "preview.redd.it";
4812
+ } catch {
4813
+ return false;
4814
+ }
4815
+ }
4816
+ function dedupeRedditImages(images) {
4817
+ const seen = /* @__PURE__ */ new Set();
4818
+ const unique = [];
4819
+ for (const image of images) {
4820
+ if (seen.has(image.url)) {
4821
+ continue;
4822
+ }
4823
+ seen.add(image.url);
4824
+ unique.push(image);
4825
+ }
4826
+ return unique;
4298
4827
  }
4299
4828
  function redditVideosFromPost(post) {
4300
4829
  const videos = [];
@@ -4321,7 +4850,9 @@ function redditVideosFromPost(post) {
4321
4850
  }
4322
4851
  function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
4323
4852
  const finalUrl = post.canonicalUrl ?? requestedUrl;
4324
- const bestImage = post.images.sort((left, right) => (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0))[0];
4853
+ const bestImage = post.images.slice().sort(
4854
+ (left, right) => redditImagePriority(right) - redditImagePriority(left) || (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0)
4855
+ )[0];
4325
4856
  const video = post.videos[0];
4326
4857
  const structuredData = {
4327
4858
  "@context": "https://schema.org",
@@ -4377,8 +4908,52 @@ function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
4377
4908
  statusCode: jsonPage.statusCode
4378
4909
  };
4379
4910
  }
4380
- function isRedditBlocked(page) {
4381
- return page.statusCode === 403 || page.statusCode === 429 || /please wait for verification|whoa there, pardner|blocked|forbidden|too many requests|request has been blocked/i.test(page.html);
4911
+ function redditProviderDiagnosticsFromAttempts(attempts) {
4912
+ const blockedAttempts = attempts.filter((attempt) => attempt.blocked);
4913
+ if (blockedAttempts.length === 0) {
4914
+ return void 0;
4915
+ }
4916
+ const selectedAttempt = blockedAttempts.find((attempt) => attempt.blockReason === "provider_verification_required") ?? blockedAttempts.at(-1);
4917
+ return {
4918
+ platform: "reddit",
4919
+ blocked: true,
4920
+ statusCode: selectedAttempt?.statusCode,
4921
+ reason: selectedAttempt?.blockReason ?? "provider_blocked_request",
4922
+ suggestedAction: PROVIDER_BLOCKED_SUGGESTED_ACTION
4923
+ };
4924
+ }
4925
+ function synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics) {
4926
+ const selectedPage = attempts.find((attempt) => attempt.blockReason === providerDiagnostics.reason)?.page ?? attempts.slice().reverse().find((attempt) => attempt.page)?.page;
4927
+ return {
4928
+ url: requestedUrl,
4929
+ originalUrl: requestedUrl,
4930
+ finalUrl: requestedUrl,
4931
+ isShortUrl: selectedPage?.isShortUrl ?? false,
4932
+ shortUrlProvider: selectedPage?.shortUrlProvider,
4933
+ html: "",
4934
+ bytes: new Uint8Array(),
4935
+ statusCode: providerDiagnostics.statusCode ?? selectedPage?.statusCode ?? 403,
4936
+ contentType: selectedPage?.contentType,
4937
+ redirects: selectedPage?.redirects ?? [],
4938
+ headers: selectedPage?.headers ?? {}
4939
+ };
4940
+ }
4941
+ function redditBlockReason(page) {
4942
+ const title = htmlTitle(page.html);
4943
+ const text = normalizeText(`${title ?? ""} ${page.html}`);
4944
+ if (/reddit\s*-\s*please wait for verification/i.test(title ?? "") || /please wait for verification|verification required|verify you are human/i.test(text)) {
4945
+ return "provider_verification_required";
4946
+ }
4947
+ if (page.statusCode === 403 || page.statusCode === 429 || /whoa there, pardner|request has been blocked|too many requests|forbidden|you're blocked|you are blocked|youre blocked|blocked by network security/i.test(text) || /^blocked$/i.test(title ?? "")) {
4948
+ return "provider_blocked_request";
4949
+ }
4950
+ return void 0;
4951
+ }
4952
+ function htmlTitle(html) {
4953
+ return normalizeText(html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]);
4954
+ }
4955
+ function normalizeText(value) {
4956
+ return value?.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() ?? "";
4382
4957
  }
4383
4958
  function redditRetryInfo(attempts) {
4384
4959
  const blockedAttempts = attempts.filter((attempt) => attempt.blocked || attempt.statusCode === 429 || attempt.statusCode === 403);
@@ -4412,7 +4987,7 @@ function mergeFallbackAttempts2(existing, incoming) {
4412
4987
  }
4413
4988
  const seen = /* @__PURE__ */ new Set();
4414
4989
  return attempts.map((value) => {
4415
- const { page: _page, ...attempt } = value;
4990
+ const { page: _page, blockReason: _blockReason, ...attempt } = value;
4416
4991
  return attempt;
4417
4992
  }).filter((attempt) => {
4418
4993
  const key = `${attempt.method}:${attempt.url ?? ""}:${attempt.statusCode ?? ""}:${attempt.error ?? ""}`;
@@ -4423,7 +4998,7 @@ function mergeFallbackAttempts2(existing, incoming) {
4423
4998
  return true;
4424
4999
  });
4425
5000
  }
4426
- function uniqueStrings3(values) {
5001
+ function uniqueStrings4(values) {
4427
5002
  return [...new Set(values.filter((value) => Boolean(value)))];
4428
5003
  }
4429
5004
  function redditMediaUrl(value) {