metanova 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1322,6 +1322,95 @@ function uniqueStrings(values) {
1322
1322
  return [...new Set(values.filter(Boolean))];
1323
1323
  }
1324
1324
 
1325
+ // src/utils/redditMedia.ts
1326
+ var REDDIT_BAD_IMAGE_URL_PATTERN = /thumbs\.redditmedia\.com|avatar|community_icon|subreddit|icon|award|emoji/i;
1327
+ var TRUSTED_REDDIT_SOURCES = /* @__PURE__ */ new Set([
1328
+ "adapter",
1329
+ "openGraph",
1330
+ "twitter",
1331
+ "jsonLd",
1332
+ "oEmbed",
1333
+ "nextData",
1334
+ "nuxt",
1335
+ "initialState",
1336
+ "preloadedState",
1337
+ "apollo",
1338
+ "applicationJson",
1339
+ "jsonScript"
1340
+ ]);
1341
+ function isRedditUrl(value) {
1342
+ try {
1343
+ const host = new URL(value).hostname.toLowerCase().replace(/^www\./, "");
1344
+ return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
1345
+ } catch {
1346
+ return false;
1347
+ }
1348
+ }
1349
+ function filterRedditImageCandidates(images) {
1350
+ const allowed = images.filter(isAllowedRedditImageCandidate);
1351
+ const trusted = allowed.filter(isTrustedRedditImageCandidate);
1352
+ return prioritizeRedditImages(trusted.length > 0 ? trusted : allowed);
1353
+ }
1354
+ function prioritizeRedditImages(images) {
1355
+ return images.slice().sort(
1356
+ (left, right) => redditImagePriority(right) - redditImagePriority(left) || imageArea(right) - imageArea(left)
1357
+ );
1358
+ }
1359
+ function isAllowedRedditImageCandidate(image) {
1360
+ if (image.width !== void 0 && image.width < 200) {
1361
+ return false;
1362
+ }
1363
+ if (image.height !== void 0 && image.height < 200) {
1364
+ return false;
1365
+ }
1366
+ return !REDDIT_BAD_IMAGE_URL_PATTERN.test(image.url);
1367
+ }
1368
+ function redditImagePriority(image) {
1369
+ const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
1370
+ const url = image.url.toLowerCase();
1371
+ if (mediaKind === "gallery") {
1372
+ return 700;
1373
+ }
1374
+ if (mediaKind === "previewOriginal") {
1375
+ return 620;
1376
+ }
1377
+ if (mediaKind === "directImage") {
1378
+ return 580;
1379
+ }
1380
+ if (/\/\/preview\.redd\.it\//i.test(url)) {
1381
+ return 560;
1382
+ }
1383
+ if (/\/\/i\.redd\.it\//i.test(url)) {
1384
+ return 540;
1385
+ }
1386
+ if (image.source === "openGraph" || image.source === "twitter") {
1387
+ return 420;
1388
+ }
1389
+ if (/\/\/external-preview\.redd\.it\//i.test(url)) {
1390
+ return 300;
1391
+ }
1392
+ if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
1393
+ return 1;
1394
+ }
1395
+ return isRedditMediaUrl(url) ? 250 : 0;
1396
+ }
1397
+ function hasRedditImageContext(images) {
1398
+ return images.some((image) => {
1399
+ const adapter = typeof image.metadata?.adapter === "string" ? image.metadata.adapter : "";
1400
+ const originalSource = typeof image.metadata?.originalSource === "string" ? image.metadata.originalSource : "";
1401
+ return adapter === "redditAdapter" || originalSource === "redditJsonEndpoint" || Boolean(image.metadata?.redditMediaKind);
1402
+ });
1403
+ }
1404
+ function isRedditMediaUrl(value) {
1405
+ return /(?:^https?:)?\/\/(?:(?:i|preview|external-preview)\.redd\.it|thumbs\.redditmedia\.com|v\.redd\.it)\//i.test(value);
1406
+ }
1407
+ function isTrustedRedditImageCandidate(image) {
1408
+ return TRUSTED_REDDIT_SOURCES.has(image.source) || Boolean(image.metadata?.redditMediaKind);
1409
+ }
1410
+ function imageArea(image) {
1411
+ return (image.width ?? 0) * (image.height ?? 0);
1412
+ }
1413
+
1325
1414
  // src/scorers/image.ts
1326
1415
  var SOURCE_WEIGHT = {
1327
1416
  adapter: 98,
@@ -1342,6 +1431,7 @@ var SOURCE_WEIGHT = {
1342
1431
  };
1343
1432
  function scoreImages(images, customScorers = []) {
1344
1433
  const duplicateCounts = countDuplicates(images);
1434
+ const redditContext = hasRedditImageContext(images);
1345
1435
  return images.map((image, index) => {
1346
1436
  const scored = scoreImageWithDetails(image, index, images, duplicateCounts);
1347
1437
  const customScore = customScorers.reduce((total, scorer) => total + scorer(image, { index, images }), 0);
@@ -1357,7 +1447,7 @@ function scoreImages(images, customScorers = []) {
1357
1447
  }
1358
1448
  };
1359
1449
  }).sort(
1360
- (left, right) => (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea(right) - imageArea(left)
1450
+ (left, right) => (redditContext ? redditImagePriority(right) - redditImagePriority(left) : 0) || (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea2(right) - imageArea2(left)
1361
1451
  );
1362
1452
  }
1363
1453
  function selectBestImage(images, customScorers = []) {
@@ -1375,15 +1465,17 @@ function scoreImageWithDetails(image, index, images, duplicateCounts) {
1375
1465
  const dimensions = scoreDimensions(image);
1376
1466
  const format = scoreFormat(image);
1377
1467
  const urlSignal = scoreUrlSignal(image);
1468
+ const redditMedia = scoreRedditMedia(image);
1378
1469
  const urlPenalty = scoreUrlPenalty(image);
1379
1470
  const duplicatePenalty = scoreDuplicatePenalty(image, duplicateCounts);
1380
1471
  score += dimensions.score;
1381
1472
  score += format.score;
1382
1473
  score += urlSignal.score;
1474
+ score += redditMedia.score;
1383
1475
  score -= urlPenalty;
1384
1476
  score -= duplicatePenalty.score;
1385
1477
  score -= Math.min(index * 1.5, 10);
1386
- reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...duplicatePenalty.reasons);
1478
+ reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...redditMedia.reasons, ...duplicatePenalty.reasons);
1387
1479
  if (images.length === 1) {
1388
1480
  score += 4;
1389
1481
  reasons.push("only candidate image added 4 points");
@@ -1483,6 +1575,33 @@ function platformThumbnailScore(url) {
1483
1575
  }
1484
1576
  return { score: 0, reasons: [] };
1485
1577
  }
1578
+ function scoreRedditMedia(image) {
1579
+ const priority = redditImagePriority(image);
1580
+ if (priority === 0 && !isRedditMediaUrl(image.url)) {
1581
+ return { score: 0, reasons: [] };
1582
+ }
1583
+ const url = image.url.toLowerCase();
1584
+ const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
1585
+ if (mediaKind === "gallery") {
1586
+ return { score: 24, reasons: ["Reddit gallery media added 24 points"] };
1587
+ }
1588
+ if (mediaKind === "previewOriginal") {
1589
+ return { score: 20, reasons: ["Reddit original preview media added 20 points"] };
1590
+ }
1591
+ if (/\/\/i\.redd\.it\//i.test(url)) {
1592
+ return { score: 18, reasons: ["Reddit direct image media added 18 points"] };
1593
+ }
1594
+ if (/\/\/preview\.redd\.it\//i.test(url)) {
1595
+ return { score: 16, reasons: ["Reddit preview media added 16 points"] };
1596
+ }
1597
+ if (/\/\/external-preview\.redd\.it\//i.test(url)) {
1598
+ return { score: -8, reasons: ["Reddit external preview media subtracted 8 points"] };
1599
+ }
1600
+ if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
1601
+ return { score: -60, reasons: ["Reddit thumbnail host subtracted 60 points"] };
1602
+ }
1603
+ return { score: 0, reasons: [] };
1604
+ }
1486
1605
  function scoreUrlPenalty(image) {
1487
1606
  const url = image.url.toLowerCase();
1488
1607
  let penalty = 0;
@@ -1555,7 +1674,7 @@ function countDuplicates(images) {
1555
1674
  }
1556
1675
  return counts;
1557
1676
  }
1558
- function imageArea(image) {
1677
+ function imageArea2(image) {
1559
1678
  return (image.width ?? 0) * (image.height ?? 0);
1560
1679
  }
1561
1680
  function sourceSortWeight(image) {
@@ -1673,8 +1792,9 @@ function discoverMedia(rawSources, finalUrl) {
1673
1792
  if (externalResults.some((result) => (result.images?.length ?? 0) > 0 || (result.videos?.length ?? 0) > 0)) {
1674
1793
  trace.push("media discovery included adapter and plugin media");
1675
1794
  }
1795
+ const dedupedImages = dedupeMediaBySignature(images);
1676
1796
  return {
1677
- images: dedupeMediaBySignature(uniqueMediaByUrl(images)),
1797
+ images: isRedditUrl(finalUrl) ? filterRedditImageCandidates(dedupedImages) : dedupedImages,
1678
1798
  videos: dedupeMediaBySignature(uniqueMediaByUrl(videos)),
1679
1799
  audio: dedupeMediaBySignature(uniqueMediaByUrl(audio)),
1680
1800
  trace
@@ -1766,7 +1886,8 @@ function mediaFromJsonValue(value, kind, source) {
1766
1886
  height: parseNumber(stringFromUnknown(value.height)) ?? parseNumber(stringFromUnknown(nestedDetails?.height)),
1767
1887
  alt: stringFromUnknown(value.alt) ?? stringFromUnknown(value.caption) ?? stringFromUnknown(value.name) ?? stringFromUnknown(nestedDetails?.alt),
1768
1888
  title: stringFromUnknown(value.title) ?? stringFromUnknown(nestedDetails?.title),
1769
- type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type)
1889
+ type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type),
1890
+ metadata: isRecord3(value.metadata) ? value.metadata : void 0
1770
1891
  },
1771
1892
  ...srcsetAssets
1772
1893
  ];
@@ -1874,7 +1995,7 @@ function dedupeMediaBySignature(assets) {
1874
1995
  for (const asset of assets) {
1875
1996
  const key = mediaSignature2(asset.url);
1876
1997
  const current = seen.get(key);
1877
- if (!current || sourceRank(asset.source) > sourceRank(current.source)) {
1998
+ if (!current || mediaRank(asset) > mediaRank(current)) {
1878
1999
  seen.set(key, asset);
1879
2000
  }
1880
2001
  }
@@ -1908,6 +2029,10 @@ function sourceRank(source) {
1908
2029
  };
1909
2030
  return ranks[source] ?? 50;
1910
2031
  }
2032
+ function mediaRank(asset) {
2033
+ const redditPriority = redditImagePriority(asset);
2034
+ return redditPriority > 0 ? 1e3 + redditPriority : sourceRank(asset.source);
2035
+ }
1911
2036
  function shouldIgnoreMediaUrl2(url) {
1912
2037
  const normalized = url.toLowerCase();
1913
2038
  return normalized.startsWith("data:") || normalized.startsWith("blob:") || normalized.startsWith("javascript:") || /(?:sprite|spacer|blank|transparent|placeholder|tracking|beacon|pixel|emoji|favicon|apple-touch-icon)(?:[._/-]|$|\?)/i.test(normalized) || /(?:^|[/?_-])1x1(?:[._/-]|$|\?)/i.test(normalized);
@@ -2232,7 +2357,7 @@ function normalizeMetadata(rawSources, context = {}) {
2232
2357
  ...mediaDiscovery.trace,
2233
2358
  ...selectedImage.best ? [`selected image from ${sourceLabel2(selectedImage.best)}`] : []
2234
2359
  ]);
2235
- return stripUndefined2({
2360
+ const normalized = stripUndefined2({
2236
2361
  ok: true,
2237
2362
  url,
2238
2363
  finalUrl,
@@ -2262,6 +2387,12 @@ function normalizeMetadata(rawSources, context = {}) {
2262
2387
  diagnostics,
2263
2388
  trace: diagnostics.trace
2264
2389
  });
2390
+ normalized.images = selectedImage.images;
2391
+ normalized.videos = videos;
2392
+ normalized.audio = audio;
2393
+ normalized.favicons = favicons;
2394
+ normalized.trace = diagnostics.trace;
2395
+ return normalized;
2265
2396
  }
2266
2397
  function normalizeAssets2(assets, baseUrl) {
2267
2398
  return assets.map((asset) => {
@@ -2984,7 +3115,7 @@ var tiktokAdapter = {
2984
3115
  return this.detect?.(url) ?? false;
2985
3116
  },
2986
3117
  extract(context) {
2987
- return socialVideoResult("tiktokAdapter", "TikTok", context);
3118
+ return tiktokResult(context);
2988
3119
  },
2989
3120
  normalize(rawData) {
2990
3121
  return normalizePlatformResult(rawData);
@@ -3203,25 +3334,215 @@ function redditDescriptionFromContext(context) {
3203
3334
  }
3204
3335
  return { value: context.raw.html.description, method: context.raw.html.description ? "reddit:html" : void 0 };
3205
3336
  }
3206
- function socialVideoResult(source, platform, context) {
3337
+ function tiktokResult(context) {
3207
3338
  const url = new URL(context.finalUrl);
3208
3339
  const username = url.pathname.match(/@([^/]+)/)?.[1];
3209
3340
  const postId = url.pathname.match(/\/(?:video|photo)\/([^/]+)/)?.[1] ?? url.pathname.split("/").filter(Boolean).at(-1);
3341
+ const itemStruct = tiktokItemStructFromContext(context, postId);
3342
+ const titleSelection = tiktokTitleFromContext(context, itemStruct, username);
3343
+ const descriptionSelection = tiktokDescriptionFromContext(context, itemStruct);
3344
+ const author = tiktokAuthorFromItemStruct(itemStruct, username);
3345
+ const media = tiktokMediaFromContext(context, itemStruct);
3346
+ const publishedTime = tiktokPublishedTime(itemStruct) ?? publishedTimeFromContext(context);
3210
3347
  return compactAdapterResult({
3211
- source,
3212
- platform,
3348
+ source: "tiktokAdapter",
3349
+ platform: "TikTok",
3213
3350
  type: "social_post",
3214
- siteName: platform,
3351
+ siteName: "TikTok",
3215
3352
  canonicalUrl: context.raw.openGraph.url,
3216
- title: titleFromContext(context, ["title", "desc", "description", "caption"]),
3217
- description: descriptionFromContext(context),
3218
- images: markAdapterMedia(mediaFromContext(context).images, source),
3219
- videos: markAdapterMedia(mediaFromContext(context).videos, source),
3220
- author: username ? { name: username } : entityFromContext(context, ["author", "user", "creator", "owner"]),
3221
- article: { publishedTime: publishedTimeFromContext(context) },
3222
- identifiers: { username, postId }
3353
+ title: titleSelection.value,
3354
+ description: descriptionSelection.value,
3355
+ images: markAdapterMedia(media.images, "tiktokAdapter"),
3356
+ videos: markAdapterMedia(media.videos, "tiktokAdapter"),
3357
+ author,
3358
+ article: { publishedTime },
3359
+ video: postId ? {
3360
+ id: postId,
3361
+ title: titleSelection.value,
3362
+ channel: author,
3363
+ publishedTime,
3364
+ duration: tiktokVideoDuration(itemStruct),
3365
+ viewCount: tiktokStatCount(itemStruct, "playCount")
3366
+ } : void 0,
3367
+ identifiers: { username, postId },
3368
+ raw: {
3369
+ extractionMethod: titleSelection.method ?? descriptionSelection.method ?? "tiktok:htmlFallback"
3370
+ }
3223
3371
  });
3224
3372
  }
3373
+ function tiktokTitleFromContext(context, itemStruct, username) {
3374
+ const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
3375
+ if (desc) {
3376
+ return { value: desc, method: "tiktok:itemStruct.desc" };
3377
+ }
3378
+ const musicTitle = tiktokMusicTitle(itemStruct);
3379
+ if (musicTitle) {
3380
+ return { value: musicTitle, method: "tiktok:itemStruct.music" };
3381
+ }
3382
+ const fallback = cleanTikTokText(titleFromContext(context, ["desc", "caption", "title", "description"]));
3383
+ if (fallback) {
3384
+ return { value: fallback, method: "tiktok:fallback" };
3385
+ }
3386
+ return {
3387
+ value: username ? `TikTok post by @${username}` : void 0,
3388
+ method: username ? "tiktok:urlFallback" : void 0
3389
+ };
3390
+ }
3391
+ function tiktokDescriptionFromContext(context, itemStruct) {
3392
+ const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
3393
+ if (desc) {
3394
+ return { value: desc, method: "tiktok:itemStruct.desc" };
3395
+ }
3396
+ return {
3397
+ value: cleanTikTokText(descriptionFromContext(context)),
3398
+ method: "tiktok:fallback"
3399
+ };
3400
+ }
3401
+ function tiktokItemStructFromContext(context, postId) {
3402
+ for (const item of context.raw.embeddedData.items) {
3403
+ const defaultScope = isRecord4(item.data["__DEFAULT_SCOPE__"]) ? item.data["__DEFAULT_SCOPE__"] : void 0;
3404
+ const videoDetail = isRecord4(defaultScope?.["webapp.video-detail"]) ? defaultScope["webapp.video-detail"] : void 0;
3405
+ const itemInfo = isRecord4(videoDetail?.itemInfo) ? videoDetail.itemInfo : void 0;
3406
+ const itemStruct = isRecord4(itemInfo?.itemStruct) ? itemInfo.itemStruct : void 0;
3407
+ if (itemStruct && (!postId || stringFromUnknown3(itemStruct.id) === postId)) {
3408
+ return itemStruct;
3409
+ }
3410
+ }
3411
+ let found;
3412
+ for (const item of context.raw.embeddedData.items) {
3413
+ walkData(item.data, (value, key) => {
3414
+ if (found || key !== "itemStruct" || !isRecord4(value)) {
3415
+ return;
3416
+ }
3417
+ if (!postId || stringFromUnknown3(value.id) === postId) {
3418
+ found = value;
3419
+ }
3420
+ });
3421
+ if (found) {
3422
+ return found;
3423
+ }
3424
+ }
3425
+ return void 0;
3426
+ }
3427
+ function tiktokMediaFromContext(context, itemStruct) {
3428
+ const discovered = mediaFromContext(context);
3429
+ return {
3430
+ images: [...tiktokImagesFromItemStruct(itemStruct), ...discovered.images],
3431
+ videos: [...tiktokVideosFromItemStruct(itemStruct), ...discovered.videos]
3432
+ };
3433
+ }
3434
+ function tiktokImagesFromItemStruct(itemStruct) {
3435
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3436
+ if (!video) {
3437
+ return [];
3438
+ }
3439
+ const width = numberFromUnknown(video.width);
3440
+ const height = numberFromUnknown(video.height);
3441
+ const candidates = [
3442
+ stringFromUnknown3(video.originCover),
3443
+ stringFromUnknown3(video.cover),
3444
+ stringFromUnknown3(video.dynamicCover),
3445
+ ...urlsFromUnknown(video.shareCover)
3446
+ ];
3447
+ return uniqueStrings3(candidates).map((url) => ({
3448
+ url,
3449
+ kind: "image",
3450
+ source: "applicationJson",
3451
+ width,
3452
+ height,
3453
+ metadata: {
3454
+ tiktokMediaKind: "videoCover"
3455
+ }
3456
+ }));
3457
+ }
3458
+ function tiktokVideosFromItemStruct(itemStruct) {
3459
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3460
+ if (!video) {
3461
+ return [];
3462
+ }
3463
+ const width = numberFromUnknown(video.width);
3464
+ const height = numberFromUnknown(video.height);
3465
+ const candidates = [
3466
+ stringFromUnknown3(video.playAddr),
3467
+ stringFromUnknown3(video.downloadAddr),
3468
+ ...urlsFromTikTokPlayAddr(video.PlayAddrStruct),
3469
+ ...Array.isArray(video.bitrateInfo) ? video.bitrateInfo.flatMap((item) => isRecord4(item) ? urlsFromTikTokPlayAddr(item.PlayAddr) : []) : []
3470
+ ];
3471
+ return uniqueStrings3(candidates).filter((url) => /^https?:\/\//i.test(url)).map((url) => ({
3472
+ url,
3473
+ kind: "video",
3474
+ source: "applicationJson",
3475
+ width,
3476
+ height,
3477
+ type: "video/mp4",
3478
+ metadata: {
3479
+ tiktokMediaKind: "videoPlay"
3480
+ }
3481
+ }));
3482
+ }
3483
+ function urlsFromTikTokPlayAddr(value) {
3484
+ if (!isRecord4(value)) {
3485
+ return [];
3486
+ }
3487
+ return urlsFromUnknown(value.UrlList);
3488
+ }
3489
+ function tiktokAuthorFromItemStruct(itemStruct, username) {
3490
+ const author = isRecord4(itemStruct?.author) ? itemStruct.author : void 0;
3491
+ const name = stringFromUnknown3(author?.nickname) ?? stringFromUnknown3(author?.uniqueId) ?? username;
3492
+ if (!name) {
3493
+ return void 0;
3494
+ }
3495
+ return {
3496
+ name,
3497
+ url: username ? `https://www.tiktok.com/@${username}` : void 0
3498
+ };
3499
+ }
3500
+ function tiktokPublishedTime(itemStruct) {
3501
+ const created = numberFromUnknown(itemStruct?.createTime);
3502
+ return created ? new Date(created * 1e3).toISOString() : void 0;
3503
+ }
3504
+ function tiktokVideoDuration(itemStruct) {
3505
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3506
+ return stringFromUnknown3(video?.duration);
3507
+ }
3508
+ function tiktokStatCount(itemStruct, key) {
3509
+ const stats = isRecord4(itemStruct?.stats) ? itemStruct.stats : void 0;
3510
+ return numberFromUnknown(stats?.[key]);
3511
+ }
3512
+ function tiktokMusicTitle(itemStruct) {
3513
+ const music = isRecord4(itemStruct?.music) ? itemStruct.music : void 0;
3514
+ const title = cleanTikTokText(stringFromUnknown3(music?.title));
3515
+ const author = cleanTikTokText(stringFromUnknown3(music?.authorName));
3516
+ if (!title) {
3517
+ return void 0;
3518
+ }
3519
+ if (author && !/original sound/i.test(title)) {
3520
+ return `${title} - ${author}`;
3521
+ }
3522
+ return title;
3523
+ }
3524
+ function cleanTikTokText(value) {
3525
+ const cleaned = value?.replace(/\s+/g, " ").trim();
3526
+ if (!cleaned || isLowQualityTikTokText(cleaned)) {
3527
+ return void 0;
3528
+ }
3529
+ return cleaned;
3530
+ }
3531
+ function isLowQualityTikTokText(value) {
3532
+ return /\{[^}]+\}/.test(value) || /tiktok\s*shop|free shipping|eligible items|exclusive collections/i.test(value) || /^tiktok live creator networks$/i.test(value) || /^tiktok\s*-\s*make your day$/i.test(value) || /^discover (?:new |popular )?videos?\b.*\btiktok\b/i.test(value);
3533
+ }
3534
+ function urlsFromUnknown(value) {
3535
+ if (typeof value === "string" && value.trim()) {
3536
+ return [value.trim()];
3537
+ }
3538
+ if (Array.isArray(value)) {
3539
+ return value.flatMap(urlsFromUnknown);
3540
+ }
3541
+ return [];
3542
+ }
3543
+ function uniqueStrings3(values) {
3544
+ return [...new Set(values.filter((value) => Boolean(value)))];
3545
+ }
3225
3546
  function normalizePlatformResult(rawData) {
3226
3547
  const type = rawData.type ?? inferAdapterType(rawData);
3227
3548
  return compactAdapterResult({
@@ -4012,7 +4333,7 @@ async function fetchMetadata(url, options = {}) {
4012
4333
  ...metadata.canonicalUrl ? ["resolved canonical URL"] : []
4013
4334
  ];
4014
4335
  metadata.diagnostics.fallbacksAttempted = mergeFallbackAttempts2(metadata.diagnostics.fallbacksAttempted, fetchResult.fallbacksAttempted);
4015
- metadata.diagnostics.sourcePriority = uniqueStrings3([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
4336
+ metadata.diagnostics.sourcePriority = uniqueStrings4([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
4016
4337
  metadata.diagnostics.extractionMethod = metadata.diagnostics.extractionMethod ?? fetchResult.extractionMethod;
4017
4338
  metadata.diagnostics.retryInfo = metadata.diagnostics.retryInfo ?? fetchResult.retryInfo;
4018
4339
  metadata.trace = metadata.diagnostics.trace;
@@ -4057,13 +4378,13 @@ async function fetchMetadata(url, options = {}) {
4057
4378
  function createBlockedProviderMetadata(requestedUrl, fetchResult, fetchDurationMs) {
4058
4379
  const page = fetchResult.page;
4059
4380
  const providerDiagnostics = fetchResult.providerDiagnostics;
4060
- const trace = uniqueStrings3([
4381
+ const trace = uniqueStrings4([
4061
4382
  ...page.isShortUrl ? [`detected short URL provider: ${page.shortUrlProvider ?? "unknown"}`] : [],
4062
4383
  ...page.redirects.length > 0 ? [`resolved ${page.redirects.length} redirect${page.redirects.length === 1 ? "" : "s"}`] : [],
4063
4384
  ...fetchResult.trace,
4064
4385
  "detected blocked provider response"
4065
4386
  ]);
4066
- const warnings = uniqueStrings3([
4387
+ const warnings = uniqueStrings4([
4067
4388
  ...fetchResult.warnings,
4068
4389
  REDDIT_BLOCKED_METADATA_WARNING,
4069
4390
  ...page.statusCode < 200 || page.statusCode >= 300 ? [`Fetch completed with non-success status code ${page.statusCode}.`] : []
@@ -4111,7 +4432,7 @@ function createBlockedProviderMetadata(requestedUrl, fetchResult, fetchDurationM
4111
4432
  };
4112
4433
  }
4113
4434
  async function fetchPageWithStrategies(requestedUrl, options) {
4114
- if (isRedditUrl(requestedUrl)) {
4435
+ if (isRedditUrl2(requestedUrl)) {
4115
4436
  return fetchRedditPageWithStrategy(requestedUrl, options);
4116
4437
  }
4117
4438
  return {
@@ -4124,6 +4445,7 @@ async function fetchPageWithStrategies(requestedUrl, options) {
4124
4445
  async function fetchRedditPageWithStrategy(requestedUrl, options) {
4125
4446
  const attempts = [];
4126
4447
  const warnings = [];
4448
+ const informationalFallbacks = [];
4127
4449
  const sourcePriority = ["redditJsonEndpoint", "oldReddit", "embeddedStructuredData", "openGraph", "html"];
4128
4450
  let lastError;
4129
4451
  const jsonUrl = redditJsonEndpoint(requestedUrl);
@@ -4149,7 +4471,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4149
4471
  }
4150
4472
  warnings.push("Reddit JSON endpoint responded, but no post payload could be extracted.");
4151
4473
  } else if (attempt.blocked) {
4152
- warnings.push("Reddit JSON endpoint appears to have blocked access.");
4474
+ informationalFallbacks.push("Informational fallback: Reddit JSON endpoint appears to have blocked access; continuing with fallback extraction.");
4153
4475
  }
4154
4476
  }
4155
4477
  const oldRedditUrl = redditOldUrl(requestedUrl);
@@ -4162,7 +4484,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4162
4484
  page: attempt.page,
4163
4485
  fallbacksAttempted: attempts,
4164
4486
  warnings,
4165
- trace: ["retried Reddit page through old.reddit"],
4487
+ trace: [...informationalFallbacks, "retried Reddit page through old.reddit"],
4166
4488
  sourcePriority,
4167
4489
  extractionMethod: "reddit:oldReddit",
4168
4490
  retryInfo: redditRetryInfo(attempts)
@@ -4180,7 +4502,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4180
4502
  page: htmlAttempt.page,
4181
4503
  fallbacksAttempted: attempts,
4182
4504
  warnings,
4183
- trace: ["used Reddit HTML fallback"],
4505
+ trace: [...informationalFallbacks, "used Reddit HTML fallback"],
4184
4506
  sourcePriority,
4185
4507
  extractionMethod: "reddit:htmlFallback",
4186
4508
  retryInfo: redditRetryInfo(attempts)
@@ -4194,8 +4516,8 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4194
4516
  return {
4195
4517
  page: synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics),
4196
4518
  fallbacksAttempted: attempts,
4197
- warnings: uniqueStrings3([...warnings, REDDIT_BLOCKED_METADATA_WARNING]),
4198
- trace: ["Reddit provider blocked metadata extraction"],
4519
+ warnings: uniqueStrings4([...warnings, REDDIT_BLOCKED_METADATA_WARNING]),
4520
+ trace: [...informationalFallbacks, "Reddit provider blocked metadata extraction"],
4199
4521
  sourcePriority,
4200
4522
  extractionMethod: "reddit:blockedProvider",
4201
4523
  retryInfo: redditRetryInfo(attempts),
@@ -4229,7 +4551,7 @@ async function attemptFetch(method, url, options) {
4229
4551
  };
4230
4552
  }
4231
4553
  }
4232
- function isRedditUrl(url) {
4554
+ function isRedditUrl2(url) {
4233
4555
  try {
4234
4556
  const host = new URL(url).hostname.toLowerCase().replace(/^www\./, "");
4235
4557
  return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
@@ -4345,46 +4667,163 @@ function findRedditPostRecord(value) {
4345
4667
  return void 0;
4346
4668
  }
4347
4669
  function redditImagesFromPost(post) {
4348
- const images = [];
4670
+ const images = [
4671
+ ...redditGalleryImagesFromPost(post),
4672
+ ...redditDirectImagesFromPost(post)
4673
+ ];
4349
4674
  const preview = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
4350
4675
  for (const image of preview) {
4351
4676
  if (!isRecord5(image)) {
4352
4677
  continue;
4353
4678
  }
4354
- for (const candidate of [image.source, ...Array.isArray(image.resolutions) ? image.resolutions : []]) {
4355
- if (!isRecord5(candidate)) {
4356
- continue;
4357
- }
4358
- const url = redditMediaUrl(stringFromUnknown4(candidate.url));
4359
- if (!url) {
4360
- continue;
4361
- }
4362
- images.push({
4363
- url,
4364
- kind: "image",
4365
- source: "adapter",
4366
- width: numberFromUnknown2(candidate.width),
4367
- height: numberFromUnknown2(candidate.height),
4368
- metadata: {
4369
- adapter: "redditJsonEndpoint",
4370
- originalSource: "redditJsonEndpoint"
4371
- }
4372
- });
4679
+ const source = redditImageFromRecord(image.source, "previewOriginal");
4680
+ if (source) {
4681
+ images.push(source);
4682
+ continue;
4683
+ }
4684
+ const fallbackResolution = largestRedditImageRecord(Array.isArray(image.resolutions) ? image.resolutions : []);
4685
+ const fallback = redditImageFromRecord(fallbackResolution, "previewResolution");
4686
+ if (fallback) {
4687
+ images.push(fallback);
4373
4688
  }
4374
4689
  }
4375
4690
  const thumbnail = redditMediaUrl(stringFromUnknown4(post.thumbnail));
4376
4691
  if (thumbnail && /^https?:\/\//i.test(thumbnail)) {
4377
- images.push({
4378
- url: thumbnail,
4379
- kind: "image",
4380
- source: "adapter",
4381
- metadata: {
4382
- adapter: "redditJsonEndpoint",
4383
- originalSource: "redditJsonEndpoint"
4384
- }
4385
- });
4692
+ const thumbnailAsset = redditImageAsset(thumbnail, void 0, void 0, "thumbnail");
4693
+ if (thumbnailAsset) {
4694
+ images.push(thumbnailAsset);
4695
+ }
4386
4696
  }
4387
- return images;
4697
+ return dedupeRedditImages(prioritizeRedditImages(images));
4698
+ }
4699
+ function redditGalleryImagesFromPost(post) {
4700
+ const mediaMetadata = isRecord5(post.media_metadata) ? post.media_metadata : void 0;
4701
+ if (!mediaMetadata) {
4702
+ return [];
4703
+ }
4704
+ const galleryItems = isRecord5(post.gallery_data) && Array.isArray(post.gallery_data.items) ? post.gallery_data.items : [];
4705
+ const orderedIds = galleryItems.map((item) => isRecord5(item) ? stringFromUnknown4(item.media_id) : void 0).filter((item) => Boolean(item));
4706
+ const seenIds = /* @__PURE__ */ new Set();
4707
+ const assets = [];
4708
+ for (const id of orderedIds) {
4709
+ const asset = redditImageFromMediaMetadata(mediaMetadata[id], id);
4710
+ if (asset) {
4711
+ assets.push(asset);
4712
+ seenIds.add(id);
4713
+ }
4714
+ }
4715
+ for (const [id, value] of Object.entries(mediaMetadata)) {
4716
+ if (seenIds.has(id)) {
4717
+ continue;
4718
+ }
4719
+ const asset = redditImageFromMediaMetadata(value, id);
4720
+ if (asset) {
4721
+ assets.push(asset);
4722
+ }
4723
+ }
4724
+ return assets;
4725
+ }
4726
+ function redditDirectImagesFromPost(post) {
4727
+ const url = redditMediaUrl(stringFromUnknown4(post.url_overridden_by_dest) ?? stringFromUnknown4(post.url));
4728
+ if (!url || !isDirectRedditImageUrl(url)) {
4729
+ return [];
4730
+ }
4731
+ const previewSource = previewSourceRecord(post);
4732
+ const asset = redditImageAsset(
4733
+ url,
4734
+ numberFromUnknown2(previewSource?.width),
4735
+ numberFromUnknown2(previewSource?.height),
4736
+ "directImage"
4737
+ );
4738
+ return asset ? [asset] : [];
4739
+ }
4740
+ function redditImageFromMediaMetadata(value, mediaId) {
4741
+ if (!isRecord5(value)) {
4742
+ return void 0;
4743
+ }
4744
+ const source = isRecord5(value.s) ? value.s : void 0;
4745
+ const url = redditMediaUrl(
4746
+ stringFromUnknown4(source?.u) ?? stringFromUnknown4(source?.gif) ?? stringFromUnknown4(source?.mp4)
4747
+ );
4748
+ if (!url) {
4749
+ return void 0;
4750
+ }
4751
+ const asset = redditImageAsset(
4752
+ url,
4753
+ numberFromUnknown2(source?.x) ?? numberFromUnknown2(source?.width),
4754
+ numberFromUnknown2(source?.y) ?? numberFromUnknown2(source?.height),
4755
+ "gallery",
4756
+ stringFromUnknown4(value.m)
4757
+ );
4758
+ return asset ? {
4759
+ ...asset,
4760
+ metadata: {
4761
+ ...asset.metadata,
4762
+ redditMediaId: mediaId
4763
+ }
4764
+ } : void 0;
4765
+ }
4766
+ function redditImageFromRecord(value, redditMediaKind) {
4767
+ if (!isRecord5(value)) {
4768
+ return void 0;
4769
+ }
4770
+ const url = redditMediaUrl(stringFromUnknown4(value.url) ?? stringFromUnknown4(value.u));
4771
+ if (!url) {
4772
+ return void 0;
4773
+ }
4774
+ return redditImageAsset(
4775
+ url,
4776
+ numberFromUnknown2(value.width) ?? numberFromUnknown2(value.x),
4777
+ numberFromUnknown2(value.height) ?? numberFromUnknown2(value.y),
4778
+ redditMediaKind
4779
+ );
4780
+ }
4781
+ function redditImageAsset(url, width, height, redditMediaKind, type) {
4782
+ const asset = {
4783
+ url,
4784
+ kind: "image",
4785
+ source: "adapter",
4786
+ width,
4787
+ height,
4788
+ type,
4789
+ metadata: {
4790
+ adapter: "redditJsonEndpoint",
4791
+ originalSource: "redditJsonEndpoint",
4792
+ redditMediaKind
4793
+ }
4794
+ };
4795
+ return isAllowedRedditImageCandidate(asset) ? asset : void 0;
4796
+ }
4797
+ function largestRedditImageRecord(values) {
4798
+ return values.filter(isRecord5).sort(
4799
+ (left, right) => (numberFromUnknown2(right.width) ?? 0) * (numberFromUnknown2(right.height) ?? 0) - (numberFromUnknown2(left.width) ?? 0) * (numberFromUnknown2(left.height) ?? 0)
4800
+ )[0];
4801
+ }
4802
+ function previewSourceRecord(post) {
4803
+ const images = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
4804
+ const firstImage = images.find(isRecord5);
4805
+ return firstImage && isRecord5(firstImage.source) ? firstImage.source : void 0;
4806
+ }
4807
+ function isDirectRedditImageUrl(value) {
4808
+ try {
4809
+ const parsed = new URL(value);
4810
+ const host = parsed.hostname.toLowerCase();
4811
+ return host === "i.redd.it" || host === "preview.redd.it";
4812
+ } catch {
4813
+ return false;
4814
+ }
4815
+ }
4816
+ function dedupeRedditImages(images) {
4817
+ const seen = /* @__PURE__ */ new Set();
4818
+ const unique = [];
4819
+ for (const image of images) {
4820
+ if (seen.has(image.url)) {
4821
+ continue;
4822
+ }
4823
+ seen.add(image.url);
4824
+ unique.push(image);
4825
+ }
4826
+ return unique;
4388
4827
  }
4389
4828
  function redditVideosFromPost(post) {
4390
4829
  const videos = [];
@@ -4411,7 +4850,9 @@ function redditVideosFromPost(post) {
4411
4850
  }
4412
4851
  function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
4413
4852
  const finalUrl = post.canonicalUrl ?? requestedUrl;
4414
- const bestImage = post.images.sort((left, right) => (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0))[0];
4853
+ const bestImage = post.images.slice().sort(
4854
+ (left, right) => redditImagePriority(right) - redditImagePriority(left) || (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0)
4855
+ )[0];
4415
4856
  const video = post.videos[0];
4416
4857
  const structuredData = {
4417
4858
  "@context": "https://schema.org",
@@ -4557,7 +4998,7 @@ function mergeFallbackAttempts2(existing, incoming) {
4557
4998
  return true;
4558
4999
  });
4559
5000
  }
4560
- function uniqueStrings3(values) {
5001
+ function uniqueStrings4(values) {
4561
5002
  return [...new Set(values.filter((value) => Boolean(value)))];
4562
5003
  }
4563
5004
  function redditMediaUrl(value) {