metanova 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1239,6 +1239,95 @@ function uniqueStrings(values) {
1239
1239
  return [...new Set(values.filter(Boolean))];
1240
1240
  }
1241
1241
 
1242
+ // src/utils/redditMedia.ts
1243
+ var REDDIT_BAD_IMAGE_URL_PATTERN = /thumbs\.redditmedia\.com|avatar|community_icon|subreddit|icon|award|emoji/i;
1244
+ var TRUSTED_REDDIT_SOURCES = /* @__PURE__ */ new Set([
1245
+ "adapter",
1246
+ "openGraph",
1247
+ "twitter",
1248
+ "jsonLd",
1249
+ "oEmbed",
1250
+ "nextData",
1251
+ "nuxt",
1252
+ "initialState",
1253
+ "preloadedState",
1254
+ "apollo",
1255
+ "applicationJson",
1256
+ "jsonScript"
1257
+ ]);
1258
+ function isRedditUrl(value) {
1259
+ try {
1260
+ const host = new URL(value).hostname.toLowerCase().replace(/^www\./, "");
1261
+ return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
1262
+ } catch {
1263
+ return false;
1264
+ }
1265
+ }
1266
+ function filterRedditImageCandidates(images) {
1267
+ const allowed = images.filter(isAllowedRedditImageCandidate);
1268
+ const trusted = allowed.filter(isTrustedRedditImageCandidate);
1269
+ return prioritizeRedditImages(trusted.length > 0 ? trusted : allowed);
1270
+ }
1271
+ function prioritizeRedditImages(images) {
1272
+ return images.slice().sort(
1273
+ (left, right) => redditImagePriority(right) - redditImagePriority(left) || imageArea(right) - imageArea(left)
1274
+ );
1275
+ }
1276
+ function isAllowedRedditImageCandidate(image) {
1277
+ if (image.width !== void 0 && image.width < 200) {
1278
+ return false;
1279
+ }
1280
+ if (image.height !== void 0 && image.height < 200) {
1281
+ return false;
1282
+ }
1283
+ return !REDDIT_BAD_IMAGE_URL_PATTERN.test(image.url);
1284
+ }
1285
+ function redditImagePriority(image) {
1286
+ const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
1287
+ const url = image.url.toLowerCase();
1288
+ if (mediaKind === "gallery") {
1289
+ return 700;
1290
+ }
1291
+ if (mediaKind === "previewOriginal") {
1292
+ return 620;
1293
+ }
1294
+ if (mediaKind === "directImage") {
1295
+ return 580;
1296
+ }
1297
+ if (/\/\/preview\.redd\.it\//i.test(url)) {
1298
+ return 560;
1299
+ }
1300
+ if (/\/\/i\.redd\.it\//i.test(url)) {
1301
+ return 540;
1302
+ }
1303
+ if (image.source === "openGraph" || image.source === "twitter") {
1304
+ return 420;
1305
+ }
1306
+ if (/\/\/external-preview\.redd\.it\//i.test(url)) {
1307
+ return 300;
1308
+ }
1309
+ if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
1310
+ return 1;
1311
+ }
1312
+ return isRedditMediaUrl(url) ? 250 : 0;
1313
+ }
1314
+ function hasRedditImageContext(images) {
1315
+ return images.some((image) => {
1316
+ const adapter = typeof image.metadata?.adapter === "string" ? image.metadata.adapter : "";
1317
+ const originalSource = typeof image.metadata?.originalSource === "string" ? image.metadata.originalSource : "";
1318
+ return adapter === "redditAdapter" || originalSource === "redditJsonEndpoint" || Boolean(image.metadata?.redditMediaKind);
1319
+ });
1320
+ }
1321
+ function isRedditMediaUrl(value) {
1322
+ return /(?:^https?:)?\/\/(?:(?:i|preview|external-preview)\.redd\.it|thumbs\.redditmedia\.com|v\.redd\.it)\//i.test(value);
1323
+ }
1324
+ function isTrustedRedditImageCandidate(image) {
1325
+ return TRUSTED_REDDIT_SOURCES.has(image.source) || Boolean(image.metadata?.redditMediaKind);
1326
+ }
1327
+ function imageArea(image) {
1328
+ return (image.width ?? 0) * (image.height ?? 0);
1329
+ }
1330
+
1242
1331
  // src/scorers/image.ts
1243
1332
  var SOURCE_WEIGHT = {
1244
1333
  adapter: 98,
@@ -1259,6 +1348,7 @@ var SOURCE_WEIGHT = {
1259
1348
  };
1260
1349
  function scoreImages(images, customScorers = []) {
1261
1350
  const duplicateCounts = countDuplicates(images);
1351
+ const redditContext = hasRedditImageContext(images);
1262
1352
  return images.map((image, index) => {
1263
1353
  const scored = scoreImageWithDetails(image, index, images, duplicateCounts);
1264
1354
  const customScore = customScorers.reduce((total, scorer) => total + scorer(image, { index, images }), 0);
@@ -1274,7 +1364,7 @@ function scoreImages(images, customScorers = []) {
1274
1364
  }
1275
1365
  };
1276
1366
  }).sort(
1277
- (left, right) => (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea(right) - imageArea(left)
1367
+ (left, right) => (redditContext ? redditImagePriority(right) - redditImagePriority(left) : 0) || (right.score ?? 0) - (left.score ?? 0) || sourceSortWeight(right) - sourceSortWeight(left) || imageArea2(right) - imageArea2(left)
1278
1368
  );
1279
1369
  }
1280
1370
  function selectBestImage(images, customScorers = []) {
@@ -1292,15 +1382,17 @@ function scoreImageWithDetails(image, index, images, duplicateCounts) {
1292
1382
  const dimensions = scoreDimensions(image);
1293
1383
  const format = scoreFormat(image);
1294
1384
  const urlSignal = scoreUrlSignal(image);
1385
+ const redditMedia = scoreRedditMedia(image);
1295
1386
  const urlPenalty = scoreUrlPenalty(image);
1296
1387
  const duplicatePenalty = scoreDuplicatePenalty(image, duplicateCounts);
1297
1388
  score += dimensions.score;
1298
1389
  score += format.score;
1299
1390
  score += urlSignal.score;
1391
+ score += redditMedia.score;
1300
1392
  score -= urlPenalty;
1301
1393
  score -= duplicatePenalty.score;
1302
1394
  score -= Math.min(index * 1.5, 10);
1303
- reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...duplicatePenalty.reasons);
1395
+ reasons.push(...dimensions.reasons, ...format.reasons, ...urlSignal.reasons, ...redditMedia.reasons, ...duplicatePenalty.reasons);
1304
1396
  if (images.length === 1) {
1305
1397
  score += 4;
1306
1398
  reasons.push("only candidate image added 4 points");
@@ -1400,6 +1492,33 @@ function platformThumbnailScore(url) {
1400
1492
  }
1401
1493
  return { score: 0, reasons: [] };
1402
1494
  }
1495
+ function scoreRedditMedia(image) {
1496
+ const priority = redditImagePriority(image);
1497
+ if (priority === 0 && !isRedditMediaUrl(image.url)) {
1498
+ return { score: 0, reasons: [] };
1499
+ }
1500
+ const url = image.url.toLowerCase();
1501
+ const mediaKind = typeof image.metadata?.redditMediaKind === "string" ? image.metadata.redditMediaKind : void 0;
1502
+ if (mediaKind === "gallery") {
1503
+ return { score: 24, reasons: ["Reddit gallery media added 24 points"] };
1504
+ }
1505
+ if (mediaKind === "previewOriginal") {
1506
+ return { score: 20, reasons: ["Reddit original preview media added 20 points"] };
1507
+ }
1508
+ if (/\/\/i\.redd\.it\//i.test(url)) {
1509
+ return { score: 18, reasons: ["Reddit direct image media added 18 points"] };
1510
+ }
1511
+ if (/\/\/preview\.redd\.it\//i.test(url)) {
1512
+ return { score: 16, reasons: ["Reddit preview media added 16 points"] };
1513
+ }
1514
+ if (/\/\/external-preview\.redd\.it\//i.test(url)) {
1515
+ return { score: -8, reasons: ["Reddit external preview media subtracted 8 points"] };
1516
+ }
1517
+ if (/\/\/thumbs\.redditmedia\.com\//i.test(url)) {
1518
+ return { score: -60, reasons: ["Reddit thumbnail host subtracted 60 points"] };
1519
+ }
1520
+ return { score: 0, reasons: [] };
1521
+ }
1403
1522
  function scoreUrlPenalty(image) {
1404
1523
  const url = image.url.toLowerCase();
1405
1524
  let penalty = 0;
@@ -1472,7 +1591,7 @@ function countDuplicates(images) {
1472
1591
  }
1473
1592
  return counts;
1474
1593
  }
1475
- function imageArea(image) {
1594
+ function imageArea2(image) {
1476
1595
  return (image.width ?? 0) * (image.height ?? 0);
1477
1596
  }
1478
1597
  function sourceSortWeight(image) {
@@ -1590,8 +1709,9 @@ function discoverMedia(rawSources, finalUrl) {
1590
1709
  if (externalResults.some((result) => (result.images?.length ?? 0) > 0 || (result.videos?.length ?? 0) > 0)) {
1591
1710
  trace.push("media discovery included adapter and plugin media");
1592
1711
  }
1712
+ const dedupedImages = dedupeMediaBySignature(images);
1593
1713
  return {
1594
- images: dedupeMediaBySignature(uniqueMediaByUrl(images)),
1714
+ images: isRedditUrl(finalUrl) ? filterRedditImageCandidates(dedupedImages) : dedupedImages,
1595
1715
  videos: dedupeMediaBySignature(uniqueMediaByUrl(videos)),
1596
1716
  audio: dedupeMediaBySignature(uniqueMediaByUrl(audio)),
1597
1717
  trace
@@ -1683,7 +1803,8 @@ function mediaFromJsonValue(value, kind, source) {
1683
1803
  height: parseNumber(stringFromUnknown(value.height)) ?? parseNumber(stringFromUnknown(nestedDetails?.height)),
1684
1804
  alt: stringFromUnknown(value.alt) ?? stringFromUnknown(value.caption) ?? stringFromUnknown(value.name) ?? stringFromUnknown(nestedDetails?.alt),
1685
1805
  title: stringFromUnknown(value.title) ?? stringFromUnknown(nestedDetails?.title),
1686
- type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type)
1806
+ type: stringFromUnknown(value.type) ?? stringFromUnknown(value.mimeType) ?? stringFromUnknown(value.encodingFormat) ?? stringFromUnknown(nestedDetails?.type),
1807
+ metadata: isRecord3(value.metadata) ? value.metadata : void 0
1687
1808
  },
1688
1809
  ...srcsetAssets
1689
1810
  ];
@@ -1791,7 +1912,7 @@ function dedupeMediaBySignature(assets) {
1791
1912
  for (const asset of assets) {
1792
1913
  const key = mediaSignature2(asset.url);
1793
1914
  const current = seen.get(key);
1794
- if (!current || sourceRank(asset.source) > sourceRank(current.source)) {
1915
+ if (!current || mediaRank(asset) > mediaRank(current)) {
1795
1916
  seen.set(key, asset);
1796
1917
  }
1797
1918
  }
@@ -1825,6 +1946,10 @@ function sourceRank(source) {
1825
1946
  };
1826
1947
  return ranks[source] ?? 50;
1827
1948
  }
1949
+ function mediaRank(asset) {
1950
+ const redditPriority = redditImagePriority(asset);
1951
+ return redditPriority > 0 ? 1e3 + redditPriority : sourceRank(asset.source);
1952
+ }
1828
1953
  function shouldIgnoreMediaUrl2(url) {
1829
1954
  const normalized = url.toLowerCase();
1830
1955
  return normalized.startsWith("data:") || normalized.startsWith("blob:") || normalized.startsWith("javascript:") || /(?:sprite|spacer|blank|transparent|placeholder|tracking|beacon|pixel|emoji|favicon|apple-touch-icon)(?:[._/-]|$|\?)/i.test(normalized) || /(?:^|[/?_-])1x1(?:[._/-]|$|\?)/i.test(normalized);
@@ -2149,7 +2274,7 @@ function normalizeMetadata(rawSources, context = {}) {
2149
2274
  ...mediaDiscovery.trace,
2150
2275
  ...selectedImage.best ? [`selected image from ${sourceLabel2(selectedImage.best)}`] : []
2151
2276
  ]);
2152
- return stripUndefined2({
2277
+ const normalized = stripUndefined2({
2153
2278
  ok: true,
2154
2279
  url,
2155
2280
  finalUrl,
@@ -2179,6 +2304,12 @@ function normalizeMetadata(rawSources, context = {}) {
2179
2304
  diagnostics,
2180
2305
  trace: diagnostics.trace
2181
2306
  });
2307
+ normalized.images = selectedImage.images;
2308
+ normalized.videos = videos;
2309
+ normalized.audio = audio;
2310
+ normalized.favicons = favicons;
2311
+ normalized.trace = diagnostics.trace;
2312
+ return normalized;
2182
2313
  }
2183
2314
  function normalizeAssets2(assets, baseUrl) {
2184
2315
  return assets.map((asset) => {
@@ -2901,7 +3032,7 @@ var tiktokAdapter = {
2901
3032
  return this.detect?.(url) ?? false;
2902
3033
  },
2903
3034
  extract(context) {
2904
- return socialVideoResult("tiktokAdapter", "TikTok", context);
3035
+ return tiktokResult(context);
2905
3036
  },
2906
3037
  normalize(rawData) {
2907
3038
  return normalizePlatformResult(rawData);
@@ -3120,25 +3251,215 @@ function redditDescriptionFromContext(context) {
3120
3251
  }
3121
3252
  return { value: context.raw.html.description, method: context.raw.html.description ? "reddit:html" : void 0 };
3122
3253
  }
3123
- function socialVideoResult(source, platform, context) {
3254
+ function tiktokResult(context) {
3124
3255
  const url = new URL(context.finalUrl);
3125
3256
  const username = url.pathname.match(/@([^/]+)/)?.[1];
3126
3257
  const postId = url.pathname.match(/\/(?:video|photo)\/([^/]+)/)?.[1] ?? url.pathname.split("/").filter(Boolean).at(-1);
3258
+ const itemStruct = tiktokItemStructFromContext(context, postId);
3259
+ const titleSelection = tiktokTitleFromContext(context, itemStruct, username);
3260
+ const descriptionSelection = tiktokDescriptionFromContext(context, itemStruct);
3261
+ const author = tiktokAuthorFromItemStruct(itemStruct, username);
3262
+ const media = tiktokMediaFromContext(context, itemStruct);
3263
+ const publishedTime = tiktokPublishedTime(itemStruct) ?? publishedTimeFromContext(context);
3127
3264
  return compactAdapterResult({
3128
- source,
3129
- platform,
3265
+ source: "tiktokAdapter",
3266
+ platform: "TikTok",
3130
3267
  type: "social_post",
3131
- siteName: platform,
3268
+ siteName: "TikTok",
3132
3269
  canonicalUrl: context.raw.openGraph.url,
3133
- title: titleFromContext(context, ["title", "desc", "description", "caption"]),
3134
- description: descriptionFromContext(context),
3135
- images: markAdapterMedia(mediaFromContext(context).images, source),
3136
- videos: markAdapterMedia(mediaFromContext(context).videos, source),
3137
- author: username ? { name: username } : entityFromContext(context, ["author", "user", "creator", "owner"]),
3138
- article: { publishedTime: publishedTimeFromContext(context) },
3139
- identifiers: { username, postId }
3270
+ title: titleSelection.value,
3271
+ description: descriptionSelection.value,
3272
+ images: markAdapterMedia(media.images, "tiktokAdapter"),
3273
+ videos: markAdapterMedia(media.videos, "tiktokAdapter"),
3274
+ author,
3275
+ article: { publishedTime },
3276
+ video: postId ? {
3277
+ id: postId,
3278
+ title: titleSelection.value,
3279
+ channel: author,
3280
+ publishedTime,
3281
+ duration: tiktokVideoDuration(itemStruct),
3282
+ viewCount: tiktokStatCount(itemStruct, "playCount")
3283
+ } : void 0,
3284
+ identifiers: { username, postId },
3285
+ raw: {
3286
+ extractionMethod: titleSelection.method ?? descriptionSelection.method ?? "tiktok:htmlFallback"
3287
+ }
3140
3288
  });
3141
3289
  }
3290
+ function tiktokTitleFromContext(context, itemStruct, username) {
3291
+ const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
3292
+ if (desc) {
3293
+ return { value: desc, method: "tiktok:itemStruct.desc" };
3294
+ }
3295
+ const musicTitle = tiktokMusicTitle(itemStruct);
3296
+ if (musicTitle) {
3297
+ return { value: musicTitle, method: "tiktok:itemStruct.music" };
3298
+ }
3299
+ const fallback = cleanTikTokText(titleFromContext(context, ["desc", "caption", "title", "description"]));
3300
+ if (fallback) {
3301
+ return { value: fallback, method: "tiktok:fallback" };
3302
+ }
3303
+ return {
3304
+ value: username ? `TikTok post by @${username}` : void 0,
3305
+ method: username ? "tiktok:urlFallback" : void 0
3306
+ };
3307
+ }
3308
+ function tiktokDescriptionFromContext(context, itemStruct) {
3309
+ const desc = cleanTikTokText(stringFromUnknown3(itemStruct?.desc));
3310
+ if (desc) {
3311
+ return { value: desc, method: "tiktok:itemStruct.desc" };
3312
+ }
3313
+ return {
3314
+ value: cleanTikTokText(descriptionFromContext(context)),
3315
+ method: "tiktok:fallback"
3316
+ };
3317
+ }
3318
+ function tiktokItemStructFromContext(context, postId) {
3319
+ for (const item of context.raw.embeddedData.items) {
3320
+ const defaultScope = isRecord4(item.data["__DEFAULT_SCOPE__"]) ? item.data["__DEFAULT_SCOPE__"] : void 0;
3321
+ const videoDetail = isRecord4(defaultScope?.["webapp.video-detail"]) ? defaultScope["webapp.video-detail"] : void 0;
3322
+ const itemInfo = isRecord4(videoDetail?.itemInfo) ? videoDetail.itemInfo : void 0;
3323
+ const itemStruct = isRecord4(itemInfo?.itemStruct) ? itemInfo.itemStruct : void 0;
3324
+ if (itemStruct && (!postId || stringFromUnknown3(itemStruct.id) === postId)) {
3325
+ return itemStruct;
3326
+ }
3327
+ }
3328
+ let found;
3329
+ for (const item of context.raw.embeddedData.items) {
3330
+ walkData(item.data, (value, key) => {
3331
+ if (found || key !== "itemStruct" || !isRecord4(value)) {
3332
+ return;
3333
+ }
3334
+ if (!postId || stringFromUnknown3(value.id) === postId) {
3335
+ found = value;
3336
+ }
3337
+ });
3338
+ if (found) {
3339
+ return found;
3340
+ }
3341
+ }
3342
+ return void 0;
3343
+ }
3344
+ function tiktokMediaFromContext(context, itemStruct) {
3345
+ const discovered = mediaFromContext(context);
3346
+ return {
3347
+ images: [...tiktokImagesFromItemStruct(itemStruct), ...discovered.images],
3348
+ videos: [...tiktokVideosFromItemStruct(itemStruct), ...discovered.videos]
3349
+ };
3350
+ }
3351
+ function tiktokImagesFromItemStruct(itemStruct) {
3352
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3353
+ if (!video) {
3354
+ return [];
3355
+ }
3356
+ const width = numberFromUnknown(video.width);
3357
+ const height = numberFromUnknown(video.height);
3358
+ const candidates = [
3359
+ stringFromUnknown3(video.originCover),
3360
+ stringFromUnknown3(video.cover),
3361
+ stringFromUnknown3(video.dynamicCover),
3362
+ ...urlsFromUnknown(video.shareCover)
3363
+ ];
3364
+ return uniqueStrings3(candidates).map((url) => ({
3365
+ url,
3366
+ kind: "image",
3367
+ source: "applicationJson",
3368
+ width,
3369
+ height,
3370
+ metadata: {
3371
+ tiktokMediaKind: "videoCover"
3372
+ }
3373
+ }));
3374
+ }
3375
+ function tiktokVideosFromItemStruct(itemStruct) {
3376
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3377
+ if (!video) {
3378
+ return [];
3379
+ }
3380
+ const width = numberFromUnknown(video.width);
3381
+ const height = numberFromUnknown(video.height);
3382
+ const candidates = [
3383
+ stringFromUnknown3(video.playAddr),
3384
+ stringFromUnknown3(video.downloadAddr),
3385
+ ...urlsFromTikTokPlayAddr(video.PlayAddrStruct),
3386
+ ...Array.isArray(video.bitrateInfo) ? video.bitrateInfo.flatMap((item) => isRecord4(item) ? urlsFromTikTokPlayAddr(item.PlayAddr) : []) : []
3387
+ ];
3388
+ return uniqueStrings3(candidates).filter((url) => /^https?:\/\//i.test(url)).map((url) => ({
3389
+ url,
3390
+ kind: "video",
3391
+ source: "applicationJson",
3392
+ width,
3393
+ height,
3394
+ type: "video/mp4",
3395
+ metadata: {
3396
+ tiktokMediaKind: "videoPlay"
3397
+ }
3398
+ }));
3399
+ }
3400
+ function urlsFromTikTokPlayAddr(value) {
3401
+ if (!isRecord4(value)) {
3402
+ return [];
3403
+ }
3404
+ return urlsFromUnknown(value.UrlList);
3405
+ }
3406
+ function tiktokAuthorFromItemStruct(itemStruct, username) {
3407
+ const author = isRecord4(itemStruct?.author) ? itemStruct.author : void 0;
3408
+ const name = stringFromUnknown3(author?.nickname) ?? stringFromUnknown3(author?.uniqueId) ?? username;
3409
+ if (!name) {
3410
+ return void 0;
3411
+ }
3412
+ return {
3413
+ name,
3414
+ url: username ? `https://www.tiktok.com/@${username}` : void 0
3415
+ };
3416
+ }
3417
+ function tiktokPublishedTime(itemStruct) {
3418
+ const created = numberFromUnknown(itemStruct?.createTime);
3419
+ return created ? new Date(created * 1e3).toISOString() : void 0;
3420
+ }
3421
+ function tiktokVideoDuration(itemStruct) {
3422
+ const video = isRecord4(itemStruct?.video) ? itemStruct.video : void 0;
3423
+ return stringFromUnknown3(video?.duration);
3424
+ }
3425
+ function tiktokStatCount(itemStruct, key) {
3426
+ const stats = isRecord4(itemStruct?.stats) ? itemStruct.stats : void 0;
3427
+ return numberFromUnknown(stats?.[key]);
3428
+ }
3429
+ function tiktokMusicTitle(itemStruct) {
3430
+ const music = isRecord4(itemStruct?.music) ? itemStruct.music : void 0;
3431
+ const title = cleanTikTokText(stringFromUnknown3(music?.title));
3432
+ const author = cleanTikTokText(stringFromUnknown3(music?.authorName));
3433
+ if (!title) {
3434
+ return void 0;
3435
+ }
3436
+ if (author && !/original sound/i.test(title)) {
3437
+ return `${title} - ${author}`;
3438
+ }
3439
+ return title;
3440
+ }
3441
+ function cleanTikTokText(value) {
3442
+ const cleaned = value?.replace(/\s+/g, " ").trim();
3443
+ if (!cleaned || isLowQualityTikTokText(cleaned)) {
3444
+ return void 0;
3445
+ }
3446
+ return cleaned;
3447
+ }
3448
+ function isLowQualityTikTokText(value) {
3449
+ return /\{[^}]+\}/.test(value) || /tiktok\s*shop|free shipping|eligible items|exclusive collections/i.test(value) || /^tiktok live creator networks$/i.test(value) || /^tiktok\s*-\s*make your day$/i.test(value) || /^discover (?:new |popular )?videos?\b.*\btiktok\b/i.test(value);
3450
+ }
3451
+ function urlsFromUnknown(value) {
3452
+ if (typeof value === "string" && value.trim()) {
3453
+ return [value.trim()];
3454
+ }
3455
+ if (Array.isArray(value)) {
3456
+ return value.flatMap(urlsFromUnknown);
3457
+ }
3458
+ return [];
3459
+ }
3460
+ function uniqueStrings3(values) {
3461
+ return [...new Set(values.filter((value) => Boolean(value)))];
3462
+ }
3142
3463
  function normalizePlatformResult(rawData) {
3143
3464
  const type = rawData.type ?? inferAdapterType(rawData);
3144
3465
  return compactAdapterResult({
@@ -3929,7 +4250,7 @@ async function fetchMetadata(url, options = {}) {
3929
4250
  ...metadata.canonicalUrl ? ["resolved canonical URL"] : []
3930
4251
  ];
3931
4252
  metadata.diagnostics.fallbacksAttempted = mergeFallbackAttempts2(metadata.diagnostics.fallbacksAttempted, fetchResult.fallbacksAttempted);
3932
- metadata.diagnostics.sourcePriority = uniqueStrings3([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
4253
+ metadata.diagnostics.sourcePriority = uniqueStrings4([...metadata.diagnostics.sourcePriority ?? [], ...fetchResult.sourcePriority ?? []]);
3933
4254
  metadata.diagnostics.extractionMethod = metadata.diagnostics.extractionMethod ?? fetchResult.extractionMethod;
3934
4255
  metadata.diagnostics.retryInfo = metadata.diagnostics.retryInfo ?? fetchResult.retryInfo;
3935
4256
  metadata.trace = metadata.diagnostics.trace;
@@ -3974,13 +4295,13 @@ async function fetchMetadata(url, options = {}) {
3974
4295
  function createBlockedProviderMetadata(requestedUrl, fetchResult, fetchDurationMs) {
3975
4296
  const page = fetchResult.page;
3976
4297
  const providerDiagnostics = fetchResult.providerDiagnostics;
3977
- const trace = uniqueStrings3([
4298
+ const trace = uniqueStrings4([
3978
4299
  ...page.isShortUrl ? [`detected short URL provider: ${page.shortUrlProvider ?? "unknown"}`] : [],
3979
4300
  ...page.redirects.length > 0 ? [`resolved ${page.redirects.length} redirect${page.redirects.length === 1 ? "" : "s"}`] : [],
3980
4301
  ...fetchResult.trace,
3981
4302
  "detected blocked provider response"
3982
4303
  ]);
3983
- const warnings = uniqueStrings3([
4304
+ const warnings = uniqueStrings4([
3984
4305
  ...fetchResult.warnings,
3985
4306
  REDDIT_BLOCKED_METADATA_WARNING,
3986
4307
  ...page.statusCode < 200 || page.statusCode >= 300 ? [`Fetch completed with non-success status code ${page.statusCode}.`] : []
@@ -4028,7 +4349,7 @@ function createBlockedProviderMetadata(requestedUrl, fetchResult, fetchDurationM
4028
4349
  };
4029
4350
  }
4030
4351
  async function fetchPageWithStrategies(requestedUrl, options) {
4031
- if (isRedditUrl(requestedUrl)) {
4352
+ if (isRedditUrl2(requestedUrl)) {
4032
4353
  return fetchRedditPageWithStrategy(requestedUrl, options);
4033
4354
  }
4034
4355
  return {
@@ -4041,6 +4362,7 @@ async function fetchPageWithStrategies(requestedUrl, options) {
4041
4362
  async function fetchRedditPageWithStrategy(requestedUrl, options) {
4042
4363
  const attempts = [];
4043
4364
  const warnings = [];
4365
+ const informationalFallbacks = [];
4044
4366
  const sourcePriority = ["redditJsonEndpoint", "oldReddit", "embeddedStructuredData", "openGraph", "html"];
4045
4367
  let lastError;
4046
4368
  const jsonUrl = redditJsonEndpoint(requestedUrl);
@@ -4066,7 +4388,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4066
4388
  }
4067
4389
  warnings.push("Reddit JSON endpoint responded, but no post payload could be extracted.");
4068
4390
  } else if (attempt.blocked) {
4069
- warnings.push("Reddit JSON endpoint appears to have blocked access.");
4391
+ informationalFallbacks.push("Informational fallback: Reddit JSON endpoint appears to have blocked access; continuing with fallback extraction.");
4070
4392
  }
4071
4393
  }
4072
4394
  const oldRedditUrl = redditOldUrl(requestedUrl);
@@ -4079,7 +4401,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4079
4401
  page: attempt.page,
4080
4402
  fallbacksAttempted: attempts,
4081
4403
  warnings,
4082
- trace: ["retried Reddit page through old.reddit"],
4404
+ trace: [...informationalFallbacks, "retried Reddit page through old.reddit"],
4083
4405
  sourcePriority,
4084
4406
  extractionMethod: "reddit:oldReddit",
4085
4407
  retryInfo: redditRetryInfo(attempts)
@@ -4097,7 +4419,7 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4097
4419
  page: htmlAttempt.page,
4098
4420
  fallbacksAttempted: attempts,
4099
4421
  warnings,
4100
- trace: ["used Reddit HTML fallback"],
4422
+ trace: [...informationalFallbacks, "used Reddit HTML fallback"],
4101
4423
  sourcePriority,
4102
4424
  extractionMethod: "reddit:htmlFallback",
4103
4425
  retryInfo: redditRetryInfo(attempts)
@@ -4111,8 +4433,8 @@ async function fetchRedditPageWithStrategy(requestedUrl, options) {
4111
4433
  return {
4112
4434
  page: synthesizeRedditBlockedPage(requestedUrl, attempts, providerDiagnostics),
4113
4435
  fallbacksAttempted: attempts,
4114
- warnings: uniqueStrings3([...warnings, REDDIT_BLOCKED_METADATA_WARNING]),
4115
- trace: ["Reddit provider blocked metadata extraction"],
4436
+ warnings: uniqueStrings4([...warnings, REDDIT_BLOCKED_METADATA_WARNING]),
4437
+ trace: [...informationalFallbacks, "Reddit provider blocked metadata extraction"],
4116
4438
  sourcePriority,
4117
4439
  extractionMethod: "reddit:blockedProvider",
4118
4440
  retryInfo: redditRetryInfo(attempts),
@@ -4146,7 +4468,7 @@ async function attemptFetch(method, url, options) {
4146
4468
  };
4147
4469
  }
4148
4470
  }
4149
- function isRedditUrl(url) {
4471
+ function isRedditUrl2(url) {
4150
4472
  try {
4151
4473
  const host = new URL(url).hostname.toLowerCase().replace(/^www\./, "");
4152
4474
  return host === "reddit.com" || host === "redd.it" || host.endsWith(".reddit.com");
@@ -4262,46 +4584,163 @@ function findRedditPostRecord(value) {
4262
4584
  return void 0;
4263
4585
  }
4264
4586
  function redditImagesFromPost(post) {
4265
- const images = [];
4587
+ const images = [
4588
+ ...redditGalleryImagesFromPost(post),
4589
+ ...redditDirectImagesFromPost(post)
4590
+ ];
4266
4591
  const preview = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
4267
4592
  for (const image of preview) {
4268
4593
  if (!isRecord5(image)) {
4269
4594
  continue;
4270
4595
  }
4271
- for (const candidate of [image.source, ...Array.isArray(image.resolutions) ? image.resolutions : []]) {
4272
- if (!isRecord5(candidate)) {
4273
- continue;
4274
- }
4275
- const url = redditMediaUrl(stringFromUnknown4(candidate.url));
4276
- if (!url) {
4277
- continue;
4278
- }
4279
- images.push({
4280
- url,
4281
- kind: "image",
4282
- source: "adapter",
4283
- width: numberFromUnknown2(candidate.width),
4284
- height: numberFromUnknown2(candidate.height),
4285
- metadata: {
4286
- adapter: "redditJsonEndpoint",
4287
- originalSource: "redditJsonEndpoint"
4288
- }
4289
- });
4596
+ const source = redditImageFromRecord(image.source, "previewOriginal");
4597
+ if (source) {
4598
+ images.push(source);
4599
+ continue;
4600
+ }
4601
+ const fallbackResolution = largestRedditImageRecord(Array.isArray(image.resolutions) ? image.resolutions : []);
4602
+ const fallback = redditImageFromRecord(fallbackResolution, "previewResolution");
4603
+ if (fallback) {
4604
+ images.push(fallback);
4290
4605
  }
4291
4606
  }
4292
4607
  const thumbnail = redditMediaUrl(stringFromUnknown4(post.thumbnail));
4293
4608
  if (thumbnail && /^https?:\/\//i.test(thumbnail)) {
4294
- images.push({
4295
- url: thumbnail,
4296
- kind: "image",
4297
- source: "adapter",
4298
- metadata: {
4299
- adapter: "redditJsonEndpoint",
4300
- originalSource: "redditJsonEndpoint"
4301
- }
4302
- });
4609
+ const thumbnailAsset = redditImageAsset(thumbnail, void 0, void 0, "thumbnail");
4610
+ if (thumbnailAsset) {
4611
+ images.push(thumbnailAsset);
4612
+ }
4303
4613
  }
4304
- return images;
4614
+ return dedupeRedditImages(prioritizeRedditImages(images));
4615
+ }
4616
+ function redditGalleryImagesFromPost(post) {
4617
+ const mediaMetadata = isRecord5(post.media_metadata) ? post.media_metadata : void 0;
4618
+ if (!mediaMetadata) {
4619
+ return [];
4620
+ }
4621
+ const galleryItems = isRecord5(post.gallery_data) && Array.isArray(post.gallery_data.items) ? post.gallery_data.items : [];
4622
+ const orderedIds = galleryItems.map((item) => isRecord5(item) ? stringFromUnknown4(item.media_id) : void 0).filter((item) => Boolean(item));
4623
+ const seenIds = /* @__PURE__ */ new Set();
4624
+ const assets = [];
4625
+ for (const id of orderedIds) {
4626
+ const asset = redditImageFromMediaMetadata(mediaMetadata[id], id);
4627
+ if (asset) {
4628
+ assets.push(asset);
4629
+ seenIds.add(id);
4630
+ }
4631
+ }
4632
+ for (const [id, value] of Object.entries(mediaMetadata)) {
4633
+ if (seenIds.has(id)) {
4634
+ continue;
4635
+ }
4636
+ const asset = redditImageFromMediaMetadata(value, id);
4637
+ if (asset) {
4638
+ assets.push(asset);
4639
+ }
4640
+ }
4641
+ return assets;
4642
+ }
4643
+ function redditDirectImagesFromPost(post) {
4644
+ const url = redditMediaUrl(stringFromUnknown4(post.url_overridden_by_dest) ?? stringFromUnknown4(post.url));
4645
+ if (!url || !isDirectRedditImageUrl(url)) {
4646
+ return [];
4647
+ }
4648
+ const previewSource = previewSourceRecord(post);
4649
+ const asset = redditImageAsset(
4650
+ url,
4651
+ numberFromUnknown2(previewSource?.width),
4652
+ numberFromUnknown2(previewSource?.height),
4653
+ "directImage"
4654
+ );
4655
+ return asset ? [asset] : [];
4656
+ }
4657
+ function redditImageFromMediaMetadata(value, mediaId) {
4658
+ if (!isRecord5(value)) {
4659
+ return void 0;
4660
+ }
4661
+ const source = isRecord5(value.s) ? value.s : void 0;
4662
+ const url = redditMediaUrl(
4663
+ stringFromUnknown4(source?.u) ?? stringFromUnknown4(source?.gif) ?? stringFromUnknown4(source?.mp4)
4664
+ );
4665
+ if (!url) {
4666
+ return void 0;
4667
+ }
4668
+ const asset = redditImageAsset(
4669
+ url,
4670
+ numberFromUnknown2(source?.x) ?? numberFromUnknown2(source?.width),
4671
+ numberFromUnknown2(source?.y) ?? numberFromUnknown2(source?.height),
4672
+ "gallery",
4673
+ stringFromUnknown4(value.m)
4674
+ );
4675
+ return asset ? {
4676
+ ...asset,
4677
+ metadata: {
4678
+ ...asset.metadata,
4679
+ redditMediaId: mediaId
4680
+ }
4681
+ } : void 0;
4682
+ }
4683
+ function redditImageFromRecord(value, redditMediaKind) {
4684
+ if (!isRecord5(value)) {
4685
+ return void 0;
4686
+ }
4687
+ const url = redditMediaUrl(stringFromUnknown4(value.url) ?? stringFromUnknown4(value.u));
4688
+ if (!url) {
4689
+ return void 0;
4690
+ }
4691
+ return redditImageAsset(
4692
+ url,
4693
+ numberFromUnknown2(value.width) ?? numberFromUnknown2(value.x),
4694
+ numberFromUnknown2(value.height) ?? numberFromUnknown2(value.y),
4695
+ redditMediaKind
4696
+ );
4697
+ }
4698
+ function redditImageAsset(url, width, height, redditMediaKind, type) {
4699
+ const asset = {
4700
+ url,
4701
+ kind: "image",
4702
+ source: "adapter",
4703
+ width,
4704
+ height,
4705
+ type,
4706
+ metadata: {
4707
+ adapter: "redditJsonEndpoint",
4708
+ originalSource: "redditJsonEndpoint",
4709
+ redditMediaKind
4710
+ }
4711
+ };
4712
+ return isAllowedRedditImageCandidate(asset) ? asset : void 0;
4713
+ }
4714
+ function largestRedditImageRecord(values) {
4715
+ return values.filter(isRecord5).sort(
4716
+ (left, right) => (numberFromUnknown2(right.width) ?? 0) * (numberFromUnknown2(right.height) ?? 0) - (numberFromUnknown2(left.width) ?? 0) * (numberFromUnknown2(left.height) ?? 0)
4717
+ )[0];
4718
+ }
4719
+ function previewSourceRecord(post) {
4720
+ const images = isRecord5(post.preview) && Array.isArray(post.preview.images) ? post.preview.images : [];
4721
+ const firstImage = images.find(isRecord5);
4722
+ return firstImage && isRecord5(firstImage.source) ? firstImage.source : void 0;
4723
+ }
4724
+ function isDirectRedditImageUrl(value) {
4725
+ try {
4726
+ const parsed = new URL(value);
4727
+ const host = parsed.hostname.toLowerCase();
4728
+ return host === "i.redd.it" || host === "preview.redd.it";
4729
+ } catch {
4730
+ return false;
4731
+ }
4732
+ }
4733
+ function dedupeRedditImages(images) {
4734
+ const seen = /* @__PURE__ */ new Set();
4735
+ const unique = [];
4736
+ for (const image of images) {
4737
+ if (seen.has(image.url)) {
4738
+ continue;
4739
+ }
4740
+ seen.add(image.url);
4741
+ unique.push(image);
4742
+ }
4743
+ return unique;
4305
4744
  }
4306
4745
  function redditVideosFromPost(post) {
4307
4746
  const videos = [];
@@ -4328,7 +4767,9 @@ function redditVideosFromPost(post) {
4328
4767
  }
4329
4768
  function synthesizeRedditJsonPage(jsonPage, requestedUrl, post) {
4330
4769
  const finalUrl = post.canonicalUrl ?? requestedUrl;
4331
- const bestImage = post.images.sort((left, right) => (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0))[0];
4770
+ const bestImage = post.images.slice().sort(
4771
+ (left, right) => redditImagePriority(right) - redditImagePriority(left) || (right.width ?? 0) * (right.height ?? 0) - (left.width ?? 0) * (left.height ?? 0)
4772
+ )[0];
4332
4773
  const video = post.videos[0];
4333
4774
  const structuredData = {
4334
4775
  "@context": "https://schema.org",
@@ -4474,7 +4915,7 @@ function mergeFallbackAttempts2(existing, incoming) {
4474
4915
  return true;
4475
4916
  });
4476
4917
  }
4477
- function uniqueStrings3(values) {
4918
+ function uniqueStrings4(values) {
4478
4919
  return [...new Set(values.filter((value) => Boolean(value)))];
4479
4920
  }
4480
4921
  function redditMediaUrl(value) {