metanova 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1239,6 +1239,95 @@ function uniqueStrings(values) {
1239
1239
  return [...new Set(values.filter(Boolean))];
1240
1240
  }
1241
1241
 
1242
+ // src/utils/redditMedia.ts
1243
+ var REDDIT_BAD_IMAGE_URL_PATTERN = /thumbs\.redditmedia\.com|avatar|community_icon|subreddit|icon|award|emoji/i;
1244
+ var TRUSTED_REDDIT_SOURCES = /* @__PURE__ */ new Set([
1245
+ "adapter",
1246
+ "openGraph",
1247
+ "twitter",
1248
+ "jsonLd",
1249
+ "oEmbed",
1250
+ "nextData",
1251
+ "nuxt",
1252
+ "initialState",
1253
+ "preloadedState",
1254
+ "apollo",
1255
+ "applicationJson",
1256
+ "jsonScript"
1257
+ ]);
1258
+ function isRedditUrl(value) {
1259
+ try {
1260
+ const host = new URL(value).hostname.toLowerCase().replace(/^www\./, "");
1261
+ return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
1262
+ } catch {
1263
+ return false;
1264
+ }
1265
+ }
1266
+ function filterRedditImageCandidates(images) {
1267
+ const allowed = images.filter(isAllowedRedditImageCandidate);
1268
+ const trusted = allowed.filter(isTrustedRedditImageCandidate);
1269
+ return prioritizeRedditImages(trusted.length > 0 ? trusted : allowed);
1270
+ }
1271
+ function prioritizeRedditImages(images) {
1272
+ return images.slice().sort(
1273
+ (left, right) => redditImagePriority(right) - redditImagePriority(left) || imageArea(right) - imageArea(left)
1274
+ );
1275
+ }
1276
+ function isAllowedRedditImageCandidate(image) {
1277
+ if (image.width !== void 0 && image.width < 200) {
1278
+ return false;
1279
+ }
1280
+ if (image.height !== void 0 && image.height < 200) {
1281
+ return false;
1282
+ }
1283
+ return !REDDIT_BAD_IMAGE_URL_PATTERN.test(image.url);
1284
+ }
1285
+ function redditImagePriority(image) {
1286
+ const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
1287
+ const url = image.url.toLowerCase();
1288
+ if (mediaKind === "gallery") {
1289
+ return 700;
1290
+ }
1291
+ if (mediaKind === "previewOriginal") {
1292
+ return 620;
1293
+ }
1294
+ if (mediaKind === "directImage") {
1295
+ return 580;
1296
+ }
1297
+ if (/\/\/preview\.redd\.it\//i.test(url)) {
1298
+ return 560;
1299
+ }
1300
+ if (/\/\/i\.redd\.it\//i.test(url)) {
1301
+ return 540;
1302
+ }
1303
+ if (image.source === "openGraph" || image.source === "twitter") {
1304
+ return 420;
1305
+ }
1306
+ if (/\/\/external-preview\.redd\.it\//i.test(url)) {
1307
+ return 300;
1308
+ }
1309
+ if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
1310
+ return 1;
1311
+ }
1312
+ return isRedditMediaUrl(url) ? 250 : 0;
1313
+ }
1314
+ function hasRedditImageContext(images) {
1315
+ return images.some((image) => {
1316
+ const adapter = typeof image.metadata?.adapter === "string" ? image.metadata.adapter : "";
1317
+ const originalSource = typeof image.metadata?.originalSource === "string" ? image.metadata.originalSource : "";
1318
+ return adapter === "redditAdapter" || originalSource === "redditJsonEndpoint" || Boolean(image.metadata?.redditMediaKind);
1319
+ });
1320
+ }
1321
+ function isRedditMediaUrl(value) {
1322
+ return /(?:^https?:)?\/\/(?:(?:i|preview|external-preview)\.redd\.it|thumbs\.redditmedia\.com|v\.redd\.it)\//i.test(value);
1323
+ }
1324
+ function isTrustedRedditImageCandidate(image) {
1325
+ return TRUSTED_REDDIT_SOURCES.has(image.source) || Boolean(image.metadata?.redditMediaKind);
1326
+ }
1327
+ function imageArea(image) {
1328
+ return (image.width ?? 0) * (image.height ?? 0);
1329
+ }
1330
+
1242
1331
  // src/scorers/image.ts
1243
1332
  var SOURCE_WEIGHT = {
1244
1333
  adapter: 98,
@@ -1259,6 +1348,7 @@ var SOURCE_WEIGHT = {
1259
1348
  };
1260
1349
  function scoreImages(images, customScorers = []) {
1261
1350
  const duplicateCounts = countDuplicates(images);
1351
+ const redditContext = hasRedditImageContext(images);
1262
1352
  return images.map((image, index) => {
1263
1353
  const scored = scoreImageWithDetails(image, index, images, duplicateCounts);
1264
1354
  const customScore = customScorers.reduce((total, scorer) => total + scorer(image, { index, images }), 0);
@@ -1274,7 +1364,7 @@ function scoreImages(images, customScorers = []) {
1274
1364
  }
1275
1365
  };
1276
1366
  }).sort(
1277
- (left, right) => (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea(right) - imageArea(left)
1367
+ (left, right) => (redditContext ? redditImagePriority(right) - redditImagePriority(left) : 0) || (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea2(right) - imageArea2(left)
1278
1368
  );
1279
1369
  }
1280
1370
  function selectBestImage(images, customScorers = []) {
@@ -1292,15 +1382,17 @@ function scoreImageWithDetails(image, index, images, duplicateCounts) {
1292
1382
  const dimensions = scoreDimensions(image);
1293
1383
  const format = scoreFormat(image);
1294
1384
  const urlSignal = scoreUrlSignal(image);
1385
+ const redditMedia = scoreRedditMedia(image);
1295
1386
  const urlPenalty = scoreUrlPenalty(image);
1296
1387
  const duplicatePenalty = scoreDuplicatePenalty(image, duplicateCounts);
1297
1388
  score += dimensions.score;
1298
1389
  score += format.score;
1299
1390
  score += urlSignal.score;
1391
+ score += redditMedia.score;
1300
1392
  score -= urlPenalty;
1301
1393
  score -= duplicatePenalty.score;
1302
1394
  score -= Math.min(index * 1.5, 10);
1303
- reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...duplicatePenalty.reasons);
1395
+ reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...redditMedia.reasons, ...duplicatePenalty.reasons);
1304
1396
  if (images.length === 1) {
1305
1397
  score += 4;
1306
1398
  reasons.push("only candidate image added 4 points");
@@ -1400,6 +1492,33 @@ function platformThumbnailScore(url) {
1400
1492
  }
1401
1493
  return { score: 0, reasons: [] };
1402
1494
  }
1495
+ function scoreRedditMedia(image) {
1496
+ const priority = redditImagePriority(image);
1497
+ if (priority === 0 && !isRedditMediaUrl(image.url)) {
1498
+ return { score: 0, reasons: [] };
1499
+ }
1500
+ const url = image.url.toLowerCase();
1501
+ const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
1502
+ if (mediaKind === "gallery") {
1503
+ return { score: 24, reasons: ["Reddit gallery media added 24 points"] };
1504
+ }
1505
+ if (mediaKind === "previewOriginal") {
1506
+ return { score: 20, reasons: ["Reddit original preview media added 20 points"] };
1507
+ }
1508
+ if (/\/\/i\.redd\.it\//i.test(url)) {
1509
+ return { score: 18, reasons: ["Reddit direct image media added 18 points"] };
1510
+ }
1511
+ if (/\/\/preview\.redd\.it\//i.test(url)) {
1512
+ return { score: 16, reasons: ["Reddit preview media added 16 points"] };
1513
+ }
1514
+ if (/\/\/external-preview\.redd\.it\//i.test(url)) {
1515
+ return { score: -8, reasons: ["Reddit external preview media subtracted 8 points"] };
1516
+ }
1517
+ if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
1518
+ return { score: -60, reasons: ["Reddit thumbnail host subtracted 60 points"] };
1519
+ }
1520
+ return { score: 0, reasons: [] };
1521
+ }
1403
1522
  function scoreUrlPenalty(image) {
1404
1523
  const url = image.url.toLowerCase();
1405
1524
  let penalty = 0;
@@ -1472,7 +1591,7 @@ function countDuplicates(images) {
1472
1591
  }
1473
1592
  return counts;
1474
1593
  }
1475
- function imageArea(image) {
1594
+ function imageArea2(image) {
1476
1595
  return (image.width ?? 0) * (image.height ?? 0);
1477
1596
  }
1478
1597
  function sourceSortWeight(image) {
@@ -1590,8 +1709,9 @@ function discoverMedia(rawSources, finalUrl) {
1590
1709
  if (externalResults.some((result) => (result.images?.length ?? 0) > 0 || (result.videos?.length ?? 0) > 0)) {
1591
1710
  trace.push("media discovery included adapter and plugin media");
1592
1711
  }
1712
+ const dedupedImages = dedupeMediaBySignature(images);
1593
1713
  return {
1594
- images: dedupeMediaBySignature(uniqueMediaByUrl(images)),
1714
+ images: isRedditUrl(finalUrl) ? filterRedditImageCandidates(dedupedImages) : dedupedImages,
1595
1715
  videos: dedupeMediaBySignature(uniqueMediaByUrl(videos)),
1596
1716
  audio: dedupeMediaBySignature(uniqueMediaByUrl(audio)),
1597
1717
  trace
@@ -1683,7 +1803,8 @@ function mediaFromJsonValue(value, kind, source) {
1683
1803
  height: parseNumber(stringFromUnknown(value.height)) ?? parseNumber(stringFromUnknown(nestedDetails?.height)),
1684
1804
  alt: stringFromUnknown(value.alt) ?? stringFromUnknown(value.caption) ?? stringFromUnknown(value.name) ?? stringFromUnknown(nestedDetails?.alt),
1685
1805
  title: stringFromUnknown(value.title) ?? stringFromUnknown(nestedDetails?.title),
1686
- type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type)
1806
+ type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type),
1807
+ metadata: isRecord3(value.metadata) ? value.metadata : void 0
1687
1808
  },
1688
1809
  ...srcsetAssets
1689
1810
  ];
@@ -1791,7 +1912,7 @@ function dedupeMediaBySignature(assets) {
1791
1912
  for (const asset of assets) {
1792
1913
  const key = mediaSignature2(asset.url);
1793
1914
  const current = seen.get(key);
1794
- if (!current || sourceRank(asset.source) > sourceRank(current.source)) {
1915
+ if (!current || mediaRank(asset) > mediaRank(current)) {
1795
1916
  seen.set(key, asset);
1796
1917
  }
1797
1918
  }
@@ -1825,6 +1946,10 @@ function sourceRank(source) {
1825
1946
  };
1826
1947
  return ranks[source] ?? 50;
1827
1948
  }
1949
+ function mediaRank(asset) {
1950
+ const redditPriority = redditImagePriority(asset);
1951
+ return redditPriority > 0 ? 1e3 + redditPriority : sourceRank(asset.source);
1952
+ }
1828
1953
  function shouldIgnoreMediaUrl2(url) {
1829
1954
  const normalized = url.toLowerCase();
1830
1955
  return normalized.startsWith("data:") || normalized.startsWith("blob:") || normalized.startsWith("javascript:") || /(?:sprite|spacer|blank|transparent|placeholder|tracking|beacon|pixel|emoji|favicon|apple-touch-icon)(?:[._/-]|$|\?)/i.test(normalized) || /(?:^|[/?_-])1x1(?:[._/-]|$|\?)/i.test(normalized);
@@ -2149,7 +2274,7 @@ function normalizeMetadata(rawSources, context = {}) {
2149
2274
  ...mediaDiscovery.trace,
2150
2275
  ...selectedImage.best ? [`selected image from ${sourceLabel2(selectedImage.best)}`] : []
2151
2276
  ]);
2152
- return stripUndefined2({
2277
+ const normalized = stripUndefined2({
2153
2278
  ok: true,
2154
2279
  url,
2155
2280
  finalUrl,
@@ -2179,6 +2304,12 @@ function normalizeMetadata(rawSources, context = {}) {
2179
2304
  diagnostics,
2180
2305
  trace: diagnostics.trace
2181
2306
  });
2307
+ normalized.images = selectedImage.images;
2308
+ normalized.videos = videos;
2309
+ normalized.audio = audio;
2310
+ normalized.favicons = favicons;
2311
+ normalized.trace = diagnostics.trace;
2312
+ return normalized;
2182
2313
  }
2183
2314
  function normalizeAssets2(assets, baseUrl) {
2184
2315
  return assets.map((asset) => {
@@ -2814,8 +2945,8 @@ var redditAdapter = {
2814
2945
  type: reddit.isPost ? "social_post" : "website",
2815
2946
  siteName: "Reddit",
2816
2947
  canonicalUrl: context.raw.openGraph.url ?? context.raw.html.canonicalUrl,
2817
- title: cleanSocialTitle(titleSelection.value),
2818
- description: descriptionSelection.value,
2948
+ title: cleanRedditTitle(titleSelection.value),
2949
+ description: cleanRedditDescription(descriptionSelection.value),
2819
2950
  images: markAdapterMedia(mediaFromContext(context).images, "redditAdapter"),
2820
2951
  videos: markAdapterMedia(mediaFromContext(context).videos, "redditAdapter"),
2821
2952
  author: username ? { name: username } : entityFromContext(context, ["author", "submitter", "user"]),
@@ -2901,7 +3032,7 @@ var tiktokAdapter = {
2901
3032
  return this.detect?.(url) ?? false;
2902
3033
  },
2903
3034
  extract(context) {
2904
- return socialVideoResult("tiktokAdapter", "TikTok", context);
3035
+ return tiktokResult(context);
2905
3036
  },
2906
3037
  normalize(rawData) {
2907
3038
  return normalizePlatformResult(rawData);
@@ -3120,25 +3251,215 @@ function redditDescriptionFromContext(context) {
3120
3251
  }
3121
3252
  return { value: context.raw.html.description, method: context.raw.html.description ? "reddit:html" : void 0 };
3122
3253
  }
3123
- function socialVideoResult(source, platform, context) {
3254
+ function tiktokResult(context) {
3124
3255
  const url = new URL(context.finalUrl);
3125
3256
  const username = url.pathname.match(/@([^/]+)/)?.[1];
3126
3257
  const postId = url.pathname.match(/\/(?:video|photo)\/([^/]+)/)?.[1] ?? url.pathname.split("/").filter(Boolean).at(-1);
3258
+ const itemStruct = tiktokItemStructFromContext(context, postId);
3259
+ const titleSelection = tiktokTitleFromContext(context, itemStruct, username);
3260
+ const descriptionSelection = tiktokDescriptionFromContext(context, itemStruct);
3261
+ const author = tiktokAuthorFromItemStruct(itemStruct, username);
3262
+ const media = tiktokMediaFromContext(context, itemStruct);
3263
+ const publishedTime = tiktokPublishedTime(itemStruct) ?? publishedTimeFromContext(context);
3127
3264
  return compactAdapterResult({
3128
- source,
3129
- platform,
3265
+ source: "tiktokAdapter",
3266
+ platform: "TikTok",
3130
3267
  type: "social_post",
3131
- siteName: platform,
3268
+ siteName: "TikTok",
3132
3269
  canonicalUrl: context.raw.openGraph.url,
3133
- title: titleFromContext(context, ["title", "desc", "description", "caption"]),
3134
- description: descriptionFromContext(context),
3135
- images: markAdapterMedia(mediaFromContext(context).images, source),
3136
- videos: markAdapterMedia(mediaFromContext(context).videos, source),
3137
- author: username ? { name: username } : entityFromContext(context, ["author", "user", "creator", "owner"]),
3138
- article: { publishedTime: publishedTimeFromContext(context) },
3139
- identifiers: { username, postId }
3270
+ title: titleSelection.value,
3271
+ description: descriptionSelection.value,
3272
+ images: markAdapterMedia(media.images, "tiktokAdapter"),
3273
+ videos: markAdapterMedia(media.videos, "tiktokAdapter"),
3274
+ author,
3275
+ article: { publishedTime },
3276
+ video: postId ? {
3277
+ id: postId,
3278
+ title: titleSelection.value,
3279
+ channel: author,
3280
+ publishedTime,
3281
+ duration: tiktokVideoDuration(itemStruct),
3282
+ viewCount: tiktokStatCount(itemStruct, "playCount")
3283
+ } : void 0,
3284
+ identifiers: { username, postId },
3285
+ raw: {
3286
+ extractionMethod: titleSelection.method ?? descriptionSelection.method ?? "tiktok:htmlFallback"
3287
+ }
3140
3288
  });
3141
3289
  }
3290
+ function tiktokTitleFromContext(context, itemStruct, username) {
3291
+ const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
3292
+ if (desc) {
3293
+ return { value: desc, method: "tiktok:itemStruct.desc" };
3294
+ }
3295
+ const musicTitle = tiktokMusicTitle(itemStruct);
3296
+ if (musicTitle) {
3297
+ return { value: musicTitle, method: "tiktok:itemStruct.music" };
3298
+ }
3299
+ const fallback = cleanTikTokText(titleFromContext(context, ["desc", "caption", "title", "description"]));
3300
+ if (fallback) {
3301
+ return { value: fallback, method: "tiktok:fallback" };
3302
+ }
3303
+ return {
3304
+ value: username ? `TikTok post by @${username}` : void 0,
3305
+ method: username ? "tiktok:urlFallback" : void 0
3306
+ };
3307
+ }
3308
+ function tiktokDescriptionFromContext(context, itemStruct) {
3309
+ const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
3310
+ if (desc) {
3311
+ return { value: desc, method: "tiktok:itemStruct.desc" };
3312
+ }
3313
+ return {
3314
+ value: cleanTikTokText(descriptionFromContext(context)),
3315
+ method: "tiktok:fallback"
3316
+ };
3317
+ }
3318
+ function tiktokItemStructFromContext(context, postId) {
3319
+ for (const item of context.raw.embeddedData.items) {
3320
+ const defaultScope = isRecord4(item.data["__DEFAULT_SCOPE__"]) ? item.data["__DEFAULT_SCOPE__"] : void 0;
3321
+ const videoDetail = isRecord4(defaultScope?.["webapp.video-detail"]) ? defaultScope["webapp.video-detail"] : void 0;
3322
+ const itemInfo = isRecord4(videoDetail?.itemInfo) ? videoDetail.itemInfo : void 0;
3323
+ const itemStruct = isRecord4(itemInfo?.itemStruct) ? itemInfo.itemStruct : void 0;
3324
+ if (itemStruct && (!postId || stringFromUnknown3(itemStruct.id) === postId)) {
3325
+ return itemStruct;
3326
+ }
3327
+ }
3328
+ let found;
3329
+ for (const item of context.raw.embeddedData.items) {
3330
+ walkData(item.data, (value, key) => {
3331
+ if (found || key !== "itemStruct" || !isRecord4(value)) {
3332
+ return;
3333
+ }
3334
+ if (!postId || stringFromUnknown3(value.id) === postId) {
3335
+ found = value;
3336
+ }
3337
+ });
3338
+ if (found) {
3339
+ return found;
3340
+ }
3341
+ }
3342
+ return void 0;
3343
+ }
3344
+ function tiktokMediaFromContext(context, itemStruct) {
3345
+ const discovered = mediaFromContext(context);
3346
+ return {
3347
+ images: [...tiktokImagesFromItemStruct(itemStruct), ...discovered.images],
3348
+ videos: [...tiktokVideosFromItemStruct(itemStruct), ...discovered.videos]
3349
+ };
3350
+ }
3351
+ function tiktokImagesFromItemStruct(itemStruct) {
3352
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3353
+ if (!video) {
3354
+ return [];
3355
+ }
3356
+ const width = numberFromUnknown(video.width);
3357
+ const height = numberFromUnknown(video.height);
3358
+ const candidates = [
3359
+ stringFromUnknown3(video.originCover),
3360
+ stringFromUnknown3(video.cover),
3361
+ stringFromUnknown3(video.dynamicCover),
3362
+ ...urlsFromUnknown(video.shareCover)
3363
+ ];
3364
+ return uniqueStrings3(candidates).map((url) => ({
3365
+ url,
3366
+ kind: "image",
3367
+ source: "applicationJson",
3368
+ width,
3369
+ height,
3370
+ metadata: {
3371
+ tiktokMediaKind: "videoCover"
3372
+ }
3373
+ }));
3374
+ }
3375
+ function tiktokVideosFromItemStruct(itemStruct) {
3376
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3377
+ if (!video) {
3378
+ return [];
3379
+ }
3380
+ const width = numberFromUnknown(video.width);
3381
+ const height = numberFromUnknown(video.height);
3382
+ const candidates = [
3383
+ stringFromUnknown3(video.playAddr),
3384
+ stringFromUnknown3(video.downloadAddr),
3385
+ ...urlsFromTikTokPlayAddr(video.PlayAddrStruct),
3386
+ ...Array.isArray(video.bitrateInfo) ? video.bitrateInfo.flatMap((item) => isRecord4(item) ? urlsFromTikTokPlayAddr(item.PlayAddr) : []) : []
3387
+ ];
3388
+ return uniqueStrings3(candidates).filter((url) => /^https?:\/\//i.test(url)).map((url) => ({
3389
+ url,
3390
+ kind: "video",
3391
+ source: "applicationJson",
3392
+ width,
3393
+ height,
3394
+ type: "video/mp4",
3395
+ metadata: {
3396
+ tiktokMediaKind: "videoPlay"
3397
+ }
3398
+ }));
3399
+ }
3400
+ function urlsFromTikTokPlayAddr(value) {
3401
+ if (!isRecord4(value)) {
3402
+ return [];
3403
+ }
3404
+ return urlsFromUnknown(value.UrlList);
3405
+ }
3406
+ function tiktokAuthorFromItemStruct(itemStruct, username) {
3407
+ const author = isRecord4(itemStruct?.author) ? itemStruct.author : void 0;
3408
+ const name = stringFromUnknown3(author?.nickname) ?? stringFromUnknown3(author?.uniqueId) ?? username;
3409
+ if (!name) {
3410
+ return void 0;
3411
+ }
3412
+ return {
3413
+ name,
3414
+ url: username ? `https://www.tiktok.com/@${username}` : void 0
3415
+ };
3416
+ }
3417
+ function tiktokPublishedTime(itemStruct) {
3418
+ const created = numberFromUnknown(itemStruct?.createTime);
3419
+ return created ? new Date(created * 1e3).toISOString() : void 0;
3420
+ }
3421
+ function tiktokVideoDuration(itemStruct) {
3422
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3423
+ return stringFromUnknown3(video?.duration);
3424
+ }
3425
+ function tiktokStatCount(itemStruct, key) {
3426
+ const stats = isRecord4(itemStruct?.stats) ? itemStruct.stats : void 0;
3427
+ return numberFromUnknown(stats?.[key]);
3428
+ }
3429
+ function tiktokMusicTitle(itemStruct) {
3430
+ const music = isRecord4(itemStruct?.music) ? itemStruct.music : void 0;
3431
+ const title = cleanTikTokText(stringFromUnknown3(music?.title));
3432
+ const author = cleanTikTokText(stringFromUnknown3(music?.authorName));
3433
+ if (!title) {
3434
+ return void 0;
3435
+ }
3436
+ if (author && !/original sound/i.test(title)) {
3437
+ return `${title} - ${author}`;
3438
+ }
3439
+ return title;
3440
+ }
3441
+ function cleanTikTokText(value) {
3442
+ const cleaned = value?.replace(/\s+/g, " ").trim();
3443
+ if (!cleaned || isLowQualityTikTokText(cleaned)) {
3444
+ return void 0;
3445
+ }
3446
+ return cleaned;
3447
+ }
3448
+ function isLowQualityTikTokText(value) {
3449
+ return /\{[^}]+\}/.test(value) || /tiktok\s*shop|free shipping|eligible items|exclusive collections/i.test(value) || /^tiktok live creator networks$/i.test(value) || /^tiktok\s*-\s*make your day$/i.test(value) || /^discover (?:new |popular )?videos?\b.*\btiktok\b/i.test(value);
3450
+ }
3451
+ function urlsFromUnknown(value) {
3452
+ if (typeof value === "string" && value.trim()) {
3453
+ return [value.trim()];
3454
+ }
3455
+ if (Array.isArray(value)) {
3456
+ return value.flatMap(urlsFromUnknown);
3457
+ }
3458
+ return [];
3459
+ }
3460
+ function uniqueStrings3(values) {
3461
+ return [...new Set(values.filter((value) => Boolean(value)))];
3462
+ }
3142
3463
  function normalizePlatformResult(rawData) {
3143
3464
  const type = rawData.type ?? inferAdapterType(rawData);
3144
3465
  return compactAdapterResult({
@@ -3489,6 +3810,20 @@ function parseRedditUrl(url) {
3489
3810
  function cleanSocialTitle(title) {
3490
3811
  return title?.replace(/\s*:\s*r\/[A-Za-z0-9_]+$/i, "").trim();
3491
3812
  }
3813
+ function cleanRedditTitle(title) {
3814
+ const cleaned = cleanSocialTitle(title);
3815
+ if (!cleaned || /reddit\s*-\s*please wait for verification|please wait for verification|whoa there, pardner/i.test(cleaned)) {
3816
+ return void 0;
3817
+ }
3818
+ return cleaned;
3819
+ }
3820
+ function cleanRedditDescription(description) {
3821
+ const cleaned = description?.replace(/\s+/g, " ").trim();
3822
+ if (!cleaned || /please wait for verification|whoa there, pardner|request has been blocked/i.test(cleaned)) {
3823
+ return void 0;
3824
+ }
3825
+ return cleaned;
3826
+ }
3492
3827
  function hostMatches(url, domains) {
3493
3828
  const host = url.hostname.toLowerCase().replace(/^www\./, "");
3494
3829
  return domains.some((domain) => host === domain || host.endsWith(`.${domain}`));
@@ -3878,12 +4213,17 @@ function ascii(bytes, offset, length) {
3878
4213
  }
3879
4214
 
3880
4215
  // src/fetchMetadata.ts
4216
+ var REDDIT_BLOCKED_METADATA_WARNING = "Reddit returned a verification/block page; metadata is incomplete.";
4217
+ var PROVIDER_BLOCKED_SUGGESTED_ACTION = "retry_on_different_host_or_use_supported_proxy";
3881
4218
  async function fetchMetadata(url, options = {}) {
3882
4219
  const startedAt = Date.now();
3883
4220
  try {
3884
4221
  const requestedUrl = normalizeUrl(url);
3885
4222
  const fetchResult = await fetchPageWithStrategies(requestedUrl, options);
3886
4223
  const page = fetchResult.page;
4224
+ if (fetchResult.providerDiagnostics?.blocked) {
4225
+ return createBlockedProviderMetadata(requestedUrl, fetchResult, Date.now() - startedAt);
4226
+ }
3887
4227
  const directMedia = createDirectMediaMetadata(page, requestedUrl, Date.now() - startedAt);
3888
4228
  if (directMedia) {
3889
4229
  return directMedia;
@@ -3910,7 +4250,7 @@ async function fetchMetadata(url, options = {}) {
3910
4250
  ...metadata.canonicalUrl ? ["resolved canonical URL"] : []
3911
4251
  ];
3912
4252
  metadata.diagnostics.fallbacksAttempted = mergeFallbackAttempts2(metadata.diagnostics.fallbacksAttempted, fetchResult.fallbacksAttempted);
3913
- metadata.diagnostics.sourcePriority = uniqueStrings3([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
4253
+ metadata.diagnostics.sourcePriority = uniqueStrings4([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
3914
4254
  metadata.diagnostics.extractionMethod = metadata.diagnostics.extractionMethod ?? fetchResult.extractionMethod;
3915
4255
  metadata.diagnostics.retryInfo = metadata.diagnostics.retryInfo ?? fetchResult.retryInfo;
3916
4256
  metadata.trace = metadata.diagnostics.trace;
@@ -3952,8 +4292,64 @@ async function fetchMetadata(url, options = {}) {
3952
4292
  };
3953
4293
  }
3954
4294
  }
4295
+ function createBlockedProviderMetadata(requestedUrl, fetchResult, fetchDurationMs) {
4296
+ const page = fetchResult.page;
4297
+ const providerDiagnostics = fetchResult.providerDiagnostics;
4298
+ const trace = uniqueStrings4([
4299
+ ...page.isShortUrl ? [`detected short URL provider: ${page.shortUrlProvider ?? "unknown"}`] : [],
4300
+ ...page.redirects.length > 0 ? [`resolved ${page.redirects.length} redirect${page.redirects.length === 1 ? "" : "s"}`] : [],
4301
+ ...fetchResult.trace,
4302
+ "detected blocked provider response"
4303
+ ]);
4304
+ const warnings = uniqueStrings4([
4305
+ ...fetchResult.warnings,
4306
+ REDDIT_BLOCKED_METADATA_WARNING,
4307
+ ...page.statusCode < 200 || page.statusCode >= 300 ? [`Fetch completed with non-success status code ${page.statusCode}.`] : []
4308
+ ]);
4309
+ return {
4310
+ ok: false,
4311
+ url: requestedUrl,
4312
+ finalUrl: page.finalUrl,
4313
+ type: "unknown",
4314
+ siteName: providerDiagnostics?.platform === "reddit" ? "Reddit" : void 0,
4315
+ confidence: 0,
4316
+ completeness: 0,
4317
+ reliability: 0,
4318
+ images: [],
4319
+ videos: [],
4320
+ audio: [],
4321
+ favicons: [],
4322
+ trace,
4323
+ diagnostics: {
4324
+ originalUrl: requestedUrl,
4325
+ finalUrl: page.finalUrl,
4326
+ isShortUrl: page.isShortUrl,
4327
+ shortUrlProvider: page.shortUrlProvider,
4328
+ statusCode: page.statusCode,
4329
+ contentType: page.contentType,
4330
+ redirects: page.redirects,
4331
+ sourcesUsed: [],
4332
+ warnings,
4333
+ fallbacksAttempted: mergeFallbackAttempts2(void 0, fetchResult.fallbacksAttempted),
4334
+ trace,
4335
+ sourcePriority: fetchResult.sourcePriority,
4336
+ extractionMethod: fetchResult.extractionMethod,
4337
+ retryInfo: fetchResult.retryInfo,
4338
+ providerDiagnostics,
4339
+ confidenceBreakdown: {
4340
+ title: 0,
4341
+ description: 0,
4342
+ image: 0,
4343
+ structuredData: 0,
4344
+ adapter: 0
4345
+ },
4346
+ fetchDurationMs,
4347
+ extractedAt: (/* @__PURE__ */ new Date()).toISOString()
4348
+ }
4349
+ };
4350
+ }
3955
4351
  async function fetchPageWithStrategies(requestedUrl, options) {
3956
- if (isRedditUrl(requestedUrl)) {
4352
+ if (isRedditUrl2(requestedUrl)) {
3957
4353
  return fetchRedditPageWithStrategy(requestedUrl, options);
3958
4354
  }
3959
4355
  return {
@@ -3966,6 +4362,7 @@ async function fetchPageWithStrategies(requestedUrl, options) {
3966
4362
  async function fetchRedditPageWithStrategy(requestedUrl, options) {
3967
4363
  const attempts = [];
3968
4364
  const warnings = [];
4365
+ const informationalFallbacks = [];
3969
4366
  const sourcePriority = ["redditJsonEndpoint", "oldReddit", "embeddedStructuredData", "openGraph", "html"];
3970
4367
  let lastError;
3971
4368
  const jsonUrl = redditJsonEndpoint(requestedUrl);
@@ -3976,7 +4373,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
3976
4373
  });
3977
4374
  attempts.push(attempt);
3978
4375
  lastError = attempt.error;
3979
- if (attempt.page && attempt.ok && !attempt.blocked) {
4376
+ if (attempt.page && attempt.ok) {
3980
4377
  const redditPost = parseRedditJsonPayload(attempt.page.html);
3981
4378
  if (redditPost?.title) {
3982
4379
  return {
@@ -3991,7 +4388,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
3991
4388
  }
3992
4389
  warnings.push("Reddit JSON endpoint responded, but no post payload could be extracted.");
3993
4390
  } else if (attempt.blocked) {
3994
- warnings.push("Reddit JSON endpoint appears to have blocked access.");
4391
+ informationalFallbacks.push("Informational fallback: Reddit JSON endpoint appears to have blocked access; continuing with fallback extraction.");
3995
4392
  }
3996
4393
  }
3997
4394
  const oldRedditUrl = redditOldUrl(requestedUrl);
@@ -3999,12 +4396,12 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
3999
4396
  const attempt = await attemptFetch("oldReddit", oldRedditUrl, options);
4000
4397
  attempts.push(attempt);
4001
4398
  lastError = attempt.error;
4002
- if (attempt.page && attempt.ok && !attempt.blocked) {
4399
+ if (attempt.page && attempt.ok) {
4003
4400
  return {
4004
4401
  page: attempt.page,
4005
4402
  fallbacksAttempted: attempts,
4006
4403
  warnings,
4007
- trace: ["retried Reddit page through old.reddit"],
4404
+ trace: [...informationalFallbacks, "retried Reddit page through old.reddit"],
4008
4405
  sourcePriority,
4009
4406
  extractionMethod: "reddit:oldReddit",
4010
4407
  retryInfo: redditRetryInfo(attempts)
@@ -4017,33 +4414,48 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4017
4414
  const htmlAttempt = await attemptFetch("redditHtmlFallback", requestedUrl, options);
4018
4415
  attempts.push(htmlAttempt);
4019
4416
  lastError = htmlAttempt.error;
4020
- if (htmlAttempt.page) {
4021
- if (htmlAttempt.blocked) {
4022
- warnings.push("Reddit HTML fallback appears to have been blocked; metadata may be incomplete.");
4023
- }
4417
+ if (htmlAttempt.page && htmlAttempt.ok) {
4024
4418
  return {
4025
4419
  page: htmlAttempt.page,
4026
4420
  fallbacksAttempted: attempts,
4027
4421
  warnings,
4028
- trace: ["used Reddit HTML fallback"],
4422
+ trace: [...informationalFallbacks, "used Reddit HTML fallback"],
4029
4423
  sourcePriority,
4030
4424
  extractionMethod: "reddit:htmlFallback",
4031
4425
  retryInfo: redditRetryInfo(attempts)
4032
4426
  };
4033
4427
  }
4428
+ if (htmlAttempt.blocked) {
4429
+ warnings.push("Reddit HTML fallback appears to have been blocked; metadata may be incomplete.");
4430
+ }
4431
+ const providerDiagnostics = redditProviderDiagnosticsFromAttempts(attempts);
4432
+ if (providerDiagnostics) {
4433
+ return {
4434
+ page: synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics),
4435
+ fallbacksAttempted: attempts,
4436
+ warnings: uniqueStrings4([...warnings, REDDIT_BLOCKED_METADATA_WARNING]),
4437
+ trace: [...informationalFallbacks, "Reddit provider blocked metadata extraction"],
4438
+ sourcePriority,
4439
+ extractionMethod: "reddit:blockedProvider",
4440
+ retryInfo: redditRetryInfo(attempts),
4441
+ providerDiagnostics
4442
+ };
4443
+ }
4034
4444
  throw lastError ?? new Error("All Reddit extraction fetch attempts failed.");
4035
4445
  }
4036
4446
  async function attemptFetch(method, url, options) {
4037
4447
  try {
4038
4448
  const page = await fetchPage(url, options);
4039
4449
  const retryAfter = page.headers["retry-after"];
4040
- const blocked = isRedditBlocked(page);
4450
+ const blockReason = redditBlockReason(page);
4451
+ const blocked = Boolean(blockReason);
4041
4452
  return {
4042
4453
  method,
4043
4454
  url,
4044
4455
  ok: page.statusCode >= 200 && page.statusCode < 300 && !blocked,
4045
4456
  statusCode: page.statusCode,
4046
4457
  blocked,
4458
+ blockReason,
4047
4459
  retryAfter,
4048
4460
  page
4049
4461
  };
@@ -4056,7 +4468,7 @@ async function attemptFetch(method, url, options) {
4056
4468
  };
4057
4469
  }
4058
4470
  }
4059
- function isRedditUrl(url) {
4471
+ function isRedditUrl2(url) {
4060
4472
  try {
4061
4473
  const host = new URL(url).hostname.toLowerCase().replace(/^www\./, "");
4062
4474
  return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
@@ -4172,46 +4584,163 @@ function findRedditPostRecord(value) {
4172
4584
  return void 0;
4173
4585
  }
4174
4586
  function redditImagesFromPost(post) {
4175
- const images = [];
4587
+ const images = [
4588
+ ...redditGalleryImagesFromPost(post),
4589
+ ...redditDirectImagesFromPost(post)
4590
+ ];
4176
4591
  const preview = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
4177
4592
  for (const image of preview) {
4178
4593
  if (!isRecord5(image)) {
4179
4594
  continue;
4180
4595
  }
4181
- for (const candidate of [image.source, ...Array.isArray(image.resolutions) ? image.resolutions : []]) {
4182
- if (!isRecord5(candidate)) {
4183
- continue;
4184
- }
4185
- const url = redditMediaUrl(stringFromUnknown4(candidate.url));
4186
- if (!url) {
4187
- continue;
4188
- }
4189
- images.push({
4190
- url,
4191
- kind: "image",
4192
- source: "adapter",
4193
- width: numberFromUnknown2(candidate.width),
4194
- height: numberFromUnknown2(candidate.height),
4195
- metadata: {
4196
- adapter: "redditJsonEndpoint",
4197
- originalSource: "redditJsonEndpoint"
4198
- }
4199
- });
4596
+ const source = redditImageFromRecord(image.source, "previewOriginal");
4597
+ if (source) {
4598
+ images.push(source);
4599
+ continue;
4600
+ }
4601
+ const fallbackResolution = largestRedditImageRecord(Array.isArray(image.resolutions) ? image.resolutions : []);
4602
+ const fallback = redditImageFromRecord(fallbackResolution, "previewResolution");
4603
+ if (fallback) {
4604
+ images.push(fallback);
4200
4605
  }
4201
4606
  }
4202
4607
  const thumbnail = redditMediaUrl(stringFromUnknown4(post.thumbnail));
4203
4608
  if (thumbnail && /^https?:\/\//i.test(thumbnail)) {
4204
- images.push({
4205
- url: thumbnail,
4206
- kind: "image",
4207
- source: "adapter",
4208
- metadata: {
4209
- adapter: "redditJsonEndpoint",
4210
- originalSource: "redditJsonEndpoint"
4211
- }
4212
- });
4609
+ const thumbnailAsset = redditImageAsset(thumbnail, void 0, void 0, "thumbnail");
4610
+ if (thumbnailAsset) {
4611
+ images.push(thumbnailAsset);
4612
+ }
4613
+ }
4614
+ return dedupeRedditImages(prioritizeRedditImages(images));
4615
+ }
4616
+ function redditGalleryImagesFromPost(post) {
4617
+ const mediaMetadata = isRecord5(post.media_metadata) ? post.media_metadata : void 0;
4618
+ if (!mediaMetadata) {
4619
+ return [];
4213
4620
  }
4214
- return images;
4621
+ const galleryItems = isRecord5(post.gallery_data) && Array.isArray(post.gallery_data.items) ? post.gallery_data.items : [];
4622
+ const orderedIds = galleryItems.map((item) => isRecord5(item) ? stringFromUnknown4(item.media_id) : void 0).filter((item) => Boolean(item));
4623
+ const seenIds = /* @__PURE__ */ new Set();
4624
+ const assets = [];
4625
+ for (const id of orderedIds) {
4626
+ const asset = redditImageFromMediaMetadata(mediaMetadata[id], id);
4627
+ if (asset) {
4628
+ assets.push(asset);
4629
+ seenIds.add(id);
4630
+ }
4631
+ }
4632
+ for (const [id, value] of Object.entries(mediaMetadata)) {
4633
+ if (seenIds.has(id)) {
4634
+ continue;
4635
+ }
4636
+ const asset = redditImageFromMediaMetadata(value, id);
4637
+ if (asset) {
4638
+ assets.push(asset);
4639
+ }
4640
+ }
4641
+ return assets;
4642
+ }
4643
+ function redditDirectImagesFromPost(post) {
4644
+ const url = redditMediaUrl(stringFromUnknown4(post.url_overridden_by_dest) ?? stringFromUnknown4(post.url));
4645
+ if (!url || !isDirectRedditImageUrl(url)) {
4646
+ return [];
4647
+ }
4648
+ const previewSource = previewSourceRecord(post);
4649
+ const asset = redditImageAsset(
4650
+ url,
4651
+ numberFromUnknown2(previewSource?.width),
4652
+ numberFromUnknown2(previewSource?.height),
4653
+ "directImage"
4654
+ );
4655
+ return asset ? [asset] : [];
4656
+ }
4657
+ function redditImageFromMediaMetadata(value, mediaId) {
4658
+ if (!isRecord5(value)) {
4659
+ return void 0;
4660
+ }
4661
+ const source = isRecord5(value.s) ? value.s : void 0;
4662
+ const url = redditMediaUrl(
4663
+ stringFromUnknown4(source?.u) ?? stringFromUnknown4(source?.gif) ?? stringFromUnknown4(source?.mp4)
4664
+ );
4665
+ if (!url) {
4666
+ return void 0;
4667
+ }
4668
+ const asset = redditImageAsset(
4669
+ url,
4670
+ numberFromUnknown2(source?.x) ?? numberFromUnknown2(source?.width),
4671
+ numberFromUnknown2(source?.y) ?? numberFromUnknown2(source?.height),
4672
+ "gallery",
4673
+ stringFromUnknown4(value.m)
4674
+ );
4675
+ return asset ? {
4676
+ ...asset,
4677
+ metadata: {
4678
+ ...asset.metadata,
4679
+ redditMediaId: mediaId
4680
+ }
4681
+ } : void 0;
4682
+ }
4683
+ function redditImageFromRecord(value, redditMediaKind) {
4684
+ if (!isRecord5(value)) {
4685
+ return void 0;
4686
+ }
4687
+ const url = redditMediaUrl(stringFromUnknown4(value.url) ?? stringFromUnknown4(value.u));
4688
+ if (!url) {
4689
+ return void 0;
4690
+ }
4691
+ return redditImageAsset(
4692
+ url,
4693
+ numberFromUnknown2(value.width) ?? numberFromUnknown2(value.x),
4694
+ numberFromUnknown2(value.height) ?? numberFromUnknown2(value.y),
4695
+ redditMediaKind
4696
+ );
4697
+ }
4698
+ function redditImageAsset(url, width, height, redditMediaKind, type) {
4699
+ const asset = {
4700
+ url,
4701
+ kind: "image",
4702
+ source: "adapter",
4703
+ width,
4704
+ height,
4705
+ type,
4706
+ metadata: {
4707
+ adapter: "redditJsonEndpoint",
4708
+ originalSource: "redditJsonEndpoint",
4709
+ redditMediaKind
4710
+ }
4711
+ };
4712
+ return isAllowedRedditImageCandidate(asset) ? asset : void 0;
4713
+ }
4714
+ function largestRedditImageRecord(values) {
4715
+ return values.filter(isRecord5).sort(
4716
+ (left, right) => (numberFromUnknown2(right.width) ?? 0) * (numberFromUnknown2(right.height) ?? 0) - (numberFromUnknown2(left.width) ?? 0) * (numberFromUnknown2(left.height) ?? 0)
4717
+ )[0];
4718
+ }
4719
+ function previewSourceRecord(post) {
4720
+ const images = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
4721
+ const firstImage = images.find(isRecord5);
4722
+ return firstImage && isRecord5(firstImage.source) ? firstImage.source : void 0;
4723
+ }
4724
+ function isDirectRedditImageUrl(value) {
4725
+ try {
4726
+ const parsed = new URL(value);
4727
+ const host = parsed.hostname.toLowerCase();
4728
+ return host === "i.redd.it" || host === "preview.redd.it";
4729
+ } catch {
4730
+ return false;
4731
+ }
4732
+ }
4733
+ function dedupeRedditImages(images) {
4734
+ const seen = /* @__PURE__ */ new Set();
4735
+ const unique = [];
4736
+ for (const image of images) {
4737
+ if (seen.has(image.url)) {
4738
+ continue;
4739
+ }
4740
+ seen.add(image.url);
4741
+ unique.push(image);
4742
+ }
4743
+ return unique;
4215
4744
  }
4216
4745
  function redditVideosFromPost(post) {
4217
4746
  const videos = [];
@@ -4238,7 +4767,9 @@ function redditVideosFromPost(post) {
4238
4767
  }
4239
4768
  function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
4240
4769
  const finalUrl = post.canonicalUrl ?? requestedUrl;
4241
- const bestImage = post.images.sort((left, right) => (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0))[0];
4770
+ const bestImage = post.images.slice().sort(
4771
+ (left, right) => redditImagePriority(right) - redditImagePriority(left) || (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0)
4772
+ )[0];
4242
4773
  const video = post.videos[0];
4243
4774
  const structuredData = {
4244
4775
  "@context": "https://schema.org",
@@ -4294,8 +4825,52 @@ function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
4294
4825
  statusCode: jsonPage.statusCode
4295
4826
  };
4296
4827
  }
4297
- function isRedditBlocked(page) {
4298
- return page.statusCode === 403 || page.statusCode === 429 || /please wait for verification|whoa there, pardner|blocked|forbidden|too many requests|request has been blocked/i.test(page.html);
4828
+ function redditProviderDiagnosticsFromAttempts(attempts) {
4829
+ const blockedAttempts = attempts.filter((attempt) => attempt.blocked);
4830
+ if (blockedAttempts.length === 0) {
4831
+ return void 0;
4832
+ }
4833
+ const selectedAttempt = blockedAttempts.find((attempt) => attempt.blockReason === "provider_verification_required") ?? blockedAttempts.at(-1);
4834
+ return {
4835
+ platform: "reddit",
4836
+ blocked: true,
4837
+ statusCode: selectedAttempt?.statusCode,
4838
+ reason: selectedAttempt?.blockReason ?? "provider_blocked_request",
4839
+ suggestedAction: PROVIDER_BLOCKED_SUGGESTED_ACTION
4840
+ };
4841
+ }
4842
+ function synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics) {
4843
+ const selectedPage = attempts.find((attempt) => attempt.blockReason === providerDiagnostics.reason)?.page ?? attempts.slice().reverse().find((attempt) => attempt.page)?.page;
4844
+ return {
4845
+ url: requestedUrl,
4846
+ originalUrl: requestedUrl,
4847
+ finalUrl: requestedUrl,
4848
+ isShortUrl: selectedPage?.isShortUrl ?? false,
4849
+ shortUrlProvider: selectedPage?.shortUrlProvider,
4850
+ html: "",
4851
+ bytes: new Uint8Array(),
4852
+ statusCode: providerDiagnostics.statusCode ?? selectedPage?.statusCode ?? 403,
4853
+ contentType: selectedPage?.contentType,
4854
+ redirects: selectedPage?.redirects ?? [],
4855
+ headers: selectedPage?.headers ?? {}
4856
+ };
4857
+ }
4858
+ function redditBlockReason(page) {
4859
+ const title = htmlTitle(page.html);
4860
+ const text = normalizeText(`${title ?? ""} ${page.html}`);
4861
+ if (/reddit\s*-\s*please wait for verification/i.test(title ?? "") || /please wait for verification|verification required|verify you are human/i.test(text)) {
4862
+ return "provider_verification_required";
4863
+ }
4864
+ if (page.statusCode === 403 || page.statusCode === 429 || /whoa there, pardner|request has been blocked|too many requests|forbidden|you're blocked|you are blocked|youre blocked|blocked by network security/i.test(text) || /^blocked$/i.test(title ?? "")) {
4865
+ return "provider_blocked_request";
4866
+ }
4867
+ return void 0;
4868
+ }
4869
+ function htmlTitle(html) {
4870
+ return normalizeText(html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]);
4871
+ }
4872
+ function normalizeText(value) {
4873
+ return value?.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() ?? "";
4299
4874
  }
4300
4875
  function redditRetryInfo(attempts) {
4301
4876
  const blockedAttempts = attempts.filter((attempt) => attempt.blocked || attempt.statusCode === 429 || attempt.statusCode === 403);
@@ -4329,7 +4904,7 @@ function mergeFallbackAttempts2(existing, incoming) {
4329
4904
  }
4330
4905
  const seen = /* @__PURE__ */ new Set();
4331
4906
  return attempts.map((value) => {
4332
- const { page: _page, ...attempt } = value;
4907
+ const { page: _page, blockReason: _blockReason, ...attempt } = value;
4333
4908
  return attempt;
4334
4909
  }).filter((attempt) => {
4335
4910
  const key = `${attempt.method}:${attempt.url ?? ""}:${attempt.statusCode ?? ""}:${attempt.error ?? ""}`;
@@ -4340,7 +4915,7 @@ function mergeFallbackAttempts2(existing, incoming) {
4340
4915
  return true;
4341
4916
  });
4342
4917
  }
4343
- function uniqueStrings3(values) {
4918
+ function uniqueStrings4(values) {
4344
4919
  return [...new Set(values.filter((value) => Boolean(value)))];
4345
4920
  }
4346
4921
  function redditMediaUrl(value) {