ultra-igdl 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,3270 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropNames = Object.getOwnPropertyNames;
3
+ var __esm = (fn, res) => function __init() {
4
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
5
+ };
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+
11
+ // node_modules/tsup/assets/esm_shims.js
12
+ import path from "path";
13
+ import { fileURLToPath } from "url";
14
+ var init_esm_shims = __esm({
15
+ "node_modules/tsup/assets/esm_shims.js"() {
16
+ "use strict";
17
+ }
18
+ });
19
+
20
+ // src/utils/logger.ts
21
+ function shouldLog(level) {
22
+ if (verboseEnabled && level === "debug") return true;
23
+ return LEVELS[level] >= LEVELS[globalLevel];
24
+ }
25
+ var globalLevel, verboseEnabled, LEVELS, logger;
26
+ var init_logger = __esm({
27
+ "src/utils/logger.ts"() {
28
+ "use strict";
29
+ init_esm_shims();
30
+ globalLevel = "info";
31
+ verboseEnabled = false;
32
+ LEVELS = {
33
+ debug: 0,
34
+ info: 1,
35
+ warn: 2,
36
+ error: 3,
37
+ silent: 4
38
+ };
39
+ logger = {
40
+ debug(...args) {
41
+ if (shouldLog("debug")) console.debug("[ultra-igdl]", ...args);
42
+ },
43
+ info(...args) {
44
+ if (shouldLog("info")) console.info("[ultra-igdl]", ...args);
45
+ },
46
+ warn(...args) {
47
+ if (shouldLog("warn")) console.warn("[ultra-igdl]", ...args);
48
+ },
49
+ error(...args) {
50
+ if (shouldLog("error")) console.error("[ultra-igdl]", ...args);
51
+ }
52
+ };
53
+ }
54
+ });
55
+
56
+ // src/utils/caption-normalize.ts
57
+ function unescapeCaptionEscapes(text) {
58
+ let cur = text;
59
+ for (let i = 0; i < 4; i++) {
60
+ const next = cur.replace(/\\+u000d\\+u000a/gi, "\n").replace(/\\+u000a/gi, "\n").replace(/\\+u000d/gi, "\n").replace(/\\+r\\+n/gi, "\n").replace(/\\+n/g, "\n").replace(/\\+r/g, "\n");
61
+ if (next === cur) break;
62
+ cur = next;
63
+ }
64
+ return cur.replace(/\u2028/g, "\n").replace(/\u2029/g, "\n");
65
+ }
66
+ function normalizeCaptionText(raw) {
67
+ if (!raw) return "";
68
+ let text = unescapeCaptionEscapes(decodeHtmlEntities(raw));
69
+ text = text.replace(/^\s+/, "").replace(/\s+$/, "");
70
+ text = text.replace(/^["'\u201c\u201d]+|["'\u201d\u201c]+$/g, "");
71
+ return text;
72
+ }
73
+ function cleanInstagramCaptionLayout(text) {
74
+ const lines = text.split(/\r?\n/);
75
+ const kept = lines.map((line) => line.trim()).filter((line) => line && !/^\.+$/.test(line) && !/^[\u2026…]+$/.test(line));
76
+ return kept.join(" ").replace(/\s+/g, " ").trim();
77
+ }
78
+ function normalizePostCaptionText(raw) {
79
+ return cleanInstagramCaptionLayout(normalizeCaptionText(raw));
80
+ }
81
+ var init_caption_normalize = __esm({
82
+ "src/utils/caption-normalize.ts"() {
83
+ "use strict";
84
+ init_esm_shims();
85
+ init_parser();
86
+ }
87
+ });
88
+
89
+ // src/utils/engagement.ts
90
+ function normalizeInstagramQuotes(raw) {
91
+ return raw.replace(/[\u201c\u201d\u201e\u2033\u2036]/g, '"').replace(/[\u2018\u2019\u2032]/g, "'").trim();
92
+ }
93
+ function parseCount(token) {
94
+ const m = token.trim().match(/^([\d.]+)\s*([KMB])?$/i);
95
+ if (!m) return void 0;
96
+ let n = parseFloat(m[1]);
97
+ const suffix = (m[2] ?? "").toUpperCase();
98
+ if (suffix === "K") n *= 1e3;
99
+ else if (suffix === "M") n *= 1e6;
100
+ else if (suffix === "B") n *= 1e9;
101
+ return Math.round(n);
102
+ }
103
+ function extractEngagementCounts(statsSegment) {
104
+ const engagement = {};
105
+ const likes = statsSegment.match(/([\d.]+[KMB]?)\s+likes?/i);
106
+ const comments = statsSegment.match(/([\d.]+[KMB]?)\s+comments?/i);
107
+ const views = statsSegment.match(/([\d.]+[KMB]?)\s+views?/i);
108
+ const shares = statsSegment.match(/([\d.]+[KMB]?)\s+shares?/i);
109
+ if (likes) engagement.likes = parseCount(likes[1]);
110
+ if (comments) engagement.comments = parseCount(comments[1]);
111
+ if (views) engagement.views = parseCount(views[1]);
112
+ if (shares) engagement.shares = parseCount(shares[1]);
113
+ return engagement;
114
+ }
115
+ function parseInstagramDescription(raw) {
116
+ const text = normalizeInstagramQuotes(raw);
117
+ const engagement = {};
118
+ let caption = text;
119
+ let username = "";
120
+ const quoted = text.match(
121
+ /^([\d\s,KMB.likescommentsviews]+?)\s*-\s*@?([\w.]+)\s+on\s+[\s\S]+?:\s*["']([\s\S]+)["']\s*\.?\s*$/i
122
+ );
123
+ if (quoted) {
124
+ Object.assign(engagement, extractEngagementCounts(quoted[1]));
125
+ username = quoted[2];
126
+ caption = normalizeCaptionText(quoted[3].trim());
127
+ return { caption, username, engagement };
128
+ }
129
+ const colonBody = text.match(
130
+ /^([\d\s,KMB.likescommentsviews]+?)\s*-\s*@?([\w.]+)\s+on\s+([^:]+):\s*([\s\S]+)\s*$/i
131
+ );
132
+ if (colonBody) {
133
+ Object.assign(engagement, extractEngagementCounts(colonBody[1]));
134
+ username = colonBody[2];
135
+ caption = normalizeCaptionText(
136
+ colonBody[4].replace(/^["']|["']$/g, "").replace(/\s*\.\s*$/, "").trim()
137
+ );
138
+ return { caption, username, engagement };
139
+ }
140
+ if (!/\s+on\s+[\w\s\d,]+:\s*/i.test(text) && /^\d[\d.KMB,\s]*(?:likes?|comments?|views?)/i.test(text)) {
141
+ Object.assign(engagement, extractEngagementCounts(text));
142
+ caption = "";
143
+ engagement.raw = text;
144
+ }
145
+ return { caption, username, engagement };
146
+ }
147
+ function parseInstagramTitle(raw) {
148
+ const text = raw.trim();
149
+ let username = "";
150
+ let caption = "";
151
+ const highlight = text.match(/^(.+?)\s*=\s*@([\w.]+)/);
152
+ if (highlight) {
153
+ caption = highlight[1].trim();
154
+ username = highlight[2];
155
+ return { username, caption };
156
+ }
157
+ const paren = text.match(/\(@([\w.]+)\)/);
158
+ if (paren) username = paren[1];
159
+ const reel = text.match(/^(.+?)\s*\(@([\w.]+)\)\s*•/);
160
+ if (reel) {
161
+ caption = "";
162
+ username = reel[2];
163
+ return { username, caption };
164
+ }
165
+ const onIg = text.match(/^(.+?)\s+on Instagram/i);
166
+ if (onIg && !text.toLowerCase().includes("watch this story")) {
167
+ const part = onIg[1].trim();
168
+ const at = part.match(/@([\w.]+)/);
169
+ if (at) username = at[1];
170
+ else caption = part;
171
+ }
172
+ const atOnly = text.match(/@([\w.]+)/);
173
+ if (!username && atOnly) username = atOnly[1];
174
+ return { username, caption };
175
+ }
176
+ var init_engagement = __esm({
177
+ "src/utils/engagement.ts"() {
178
+ "use strict";
179
+ init_esm_shims();
180
+ init_caption_normalize();
181
+ }
182
+ });
183
+
184
+ // src/utils/caption.ts
185
+ function isEngagementPrefixedCaption(text) {
186
+ return /^\d[\d.KMB,\s]*(?:likes?|comments?|views?)/i.test(text) && /\s+on\s+[\w\s\d,.]+:\s*/i.test(text);
187
+ }
188
+ function captionWithoutEngagementPrefix(raw) {
189
+ const text = normalizeCaptionText(raw);
190
+ if (!text) return "";
191
+ if (!isEngagementPrefixedCaption(text)) return text;
192
+ return normalizeCaptionText(parseInstagramDescription(text).caption);
193
+ }
194
+ function pickBestCaption(...candidates) {
195
+ let best = "";
196
+ for (const raw of candidates) {
197
+ if (!raw) continue;
198
+ const text = captionWithoutEngagementPrefix(raw);
199
+ if (!text) continue;
200
+ if (text.length > best.length) best = text;
201
+ }
202
+ return best;
203
+ }
204
+ function scrapeCaptionFromHtml(html) {
205
+ if (!html) return "";
206
+ const found = [];
207
+ const patterns = [
208
+ /"caption"\s*:\s*\{[^}]*"text"\s*:\s*"((?:\\.|[^"\\])*)"/g,
209
+ /"edge_media_to_caption"\s*:\s*\{[^}]*"text"\s*:\s*"((?:\\.|[^"\\])*)"/g
210
+ ];
211
+ for (const re of patterns) {
212
+ let m;
213
+ while ((m = re.exec(html)) !== null) {
214
+ try {
215
+ const text = JSON.parse(`"${m[1]}"`);
216
+ if (text && !isEngagementPrefixedCaption(text)) found.push(text);
217
+ } catch {
218
+ }
219
+ }
220
+ }
221
+ return pickBestCaption(...found);
222
+ }
223
+ function resolveCaptionForContent(type, sources) {
224
+ const embed = sources.embed ? normalizeCaptionText(sources.embed) : "";
225
+ const scraped = sources.scraped ? captionWithoutEngagementPrefix(sources.scraped) : "";
226
+ const extracted = sources.extracted ? captionWithoutEngagementPrefix(sources.extracted) : "";
227
+ const og = sources.og ? captionWithoutEngagementPrefix(sources.og) : "";
228
+ if (type === "reel" || type === "tv") {
229
+ if (embed) return embed;
230
+ return pickBestCaption(scraped, extracted, og);
231
+ }
232
+ return pickBestCaption(scraped, embed, extracted, og);
233
+ }
234
+ function extractCaptionFromApiItem(item) {
235
+ const cap = item.caption;
236
+ if (cap && typeof cap.text === "string") return normalizeCaptionText(cap.text);
237
+ if (typeof item.caption_text === "string") return normalizeCaptionText(item.caption_text);
238
+ return "";
239
+ }
240
+ var init_caption = __esm({
241
+ "src/utils/caption.ts"() {
242
+ "use strict";
243
+ init_esm_shims();
244
+ init_engagement();
245
+ init_caption_normalize();
246
+ init_caption_normalize();
247
+ }
248
+ });
249
+
250
+ // src/core/parser.ts
251
+ var parser_exports = {};
252
+ __export(parser_exports, {
253
+ decodeEscapedUrl: () => decodeEscapedUrl,
254
+ decodeHtmlEntities: () => decodeHtmlEntities,
255
+ detectNotFound: () => detectNotFound,
256
+ detectRateLimit: () => detectRateLimit,
257
+ mergeExtracted: () => mergeExtracted,
258
+ needsVideoEmbedFallback: () => needsVideoEmbedFallback,
259
+ parseEmbedHtml: () => parseEmbedHtml,
260
+ parseHtml: () => parseHtml,
261
+ pickBestImage: () => pickBestImage,
262
+ pickBestVideo: () => pickBestVideo
263
+ });
264
+ import * as cheerio from "cheerio";
265
+ function area(w, h) {
266
+ return (w ?? 0) * (h ?? 0);
267
+ }
268
+ function pickBestImage(candidates) {
269
+ if (!candidates.length) return null;
270
+ return candidates.reduce(
271
+ (best, cur) => area(cur.width, cur.height) > area(best.width, best.height) ? cur : best
272
+ );
273
+ }
274
+ function pickBestVideo(candidates) {
275
+ if (!candidates.length) return null;
276
+ return candidates.reduce((best, cur) => {
277
+ const bestScore = area(best.width, best.height) + (best.duration ?? 0);
278
+ const curScore = area(cur.width, cur.height) + (cur.duration ?? 0);
279
+ return curScore > bestScore ? cur : best;
280
+ });
281
+ }
282
+ function decodeEscapedUrl(url) {
283
+ let decoded = url.replace(/\\u0026/g, "&").replace(/\\u00253D/g, "=").replace(/\\u003c/g, "<").replace(/\\u003e/g, ">").replace(/&amp;/g, "&");
284
+ while (decoded.includes("\\/") || decoded.includes("\\\\")) {
285
+ decoded = decoded.replace(/\\+\//g, "/").replace(/\\\\/g, "\\");
286
+ }
287
+ decoded = decoded.replace(/\\+$/, "").replace(/\\/g, "");
288
+ if (decoded.startsWith("https:/") && !decoded.startsWith("https://")) {
289
+ decoded = decoded.replace("https:/", "https://");
290
+ }
291
+ return decoded;
292
+ }
293
+ function decodeHtmlEntities(text) {
294
+ return text.replace(/&amp;/g, "&").replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(
295
+ /&#x([0-9a-f]+);/gi,
296
+ (_, hex) => String.fromCodePoint(parseInt(hex, 16))
297
+ ).replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(parseInt(dec, 10)));
298
+ }
299
+ function isProfileOrStaticCdn(url) {
300
+ return url.includes("static.cdninstagram.com") || /\/t51\.2885-19\//.test(url) || /stp=dst-jpg_s\d+x\d+/.test(url);
301
+ }
302
+ function extractJsonLd(html) {
303
+ const $ = cheerio.load(html);
304
+ const scripts = $('script[type="application/ld+json"]');
305
+ for (let i = 0; i < scripts.length; i++) {
306
+ try {
307
+ const raw = $(scripts[i]).html();
308
+ if (!raw) continue;
309
+ const data = JSON.parse(raw);
310
+ const items = Array.isArray(data) ? data : [data];
311
+ const media = [];
312
+ let caption = "";
313
+ let username = "";
314
+ for (const item of items) {
315
+ if (typeof item !== "object" || !item) continue;
316
+ const obj = item;
317
+ if (typeof obj.description === "string") {
318
+ caption = pickBestCaption(caption, obj.description);
319
+ }
320
+ if (typeof obj.author === "object" && obj.author) {
321
+ const author = obj.author;
322
+ if (typeof author.name === "string") username = author.name;
323
+ if (typeof author.identifier === "string") {
324
+ username = author.identifier.replace(/^@/, "");
325
+ }
326
+ }
327
+ const contentUrl = obj.contentUrl ?? obj.embedUrl;
328
+ if (typeof contentUrl === "string") {
329
+ const isVideo = obj["@type"] === "VideoObject" || String(contentUrl).includes(".mp4");
330
+ media.push({
331
+ type: isVideo ? "video" : "image",
332
+ url: decodeEscapedUrl(contentUrl),
333
+ thumbnail: typeof obj.thumbnailUrl === "string" ? decodeEscapedUrl(obj.thumbnailUrl) : void 0,
334
+ width: typeof obj.width === "number" ? obj.width : void 0,
335
+ height: typeof obj.height === "number" ? obj.height : void 0,
336
+ duration: typeof obj.duration === "string" ? parseDuration(obj.duration) : void 0
337
+ });
338
+ }
339
+ if (Array.isArray(obj.image)) {
340
+ for (const img of obj.image) {
341
+ if (typeof img === "string") {
342
+ media.push({ type: "image", url: decodeEscapedUrl(img) });
343
+ } else if (img && typeof img === "object") {
344
+ const im = img;
345
+ if (typeof im.url === "string") {
346
+ media.push({
347
+ type: "image",
348
+ url: decodeEscapedUrl(im.url),
349
+ width: typeof im.width === "number" ? im.width : void 0,
350
+ height: typeof im.height === "number" ? im.height : void 0
351
+ });
352
+ }
353
+ }
354
+ }
355
+ } else if (typeof obj.image === "string") {
356
+ media.push({ type: "image", url: decodeEscapedUrl(obj.image) });
357
+ }
358
+ }
359
+ if (media.length) {
360
+ logger.debug("Layer 1 (JSON-LD) succeeded");
361
+ return { media: dedupeMedia(media), caption, username };
362
+ }
363
+ } catch {
364
+ continue;
365
+ }
366
+ }
367
+ return null;
368
+ }
369
+ function parseDuration(iso) {
370
+ const match = iso.match(/PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?/);
371
+ if (!match) return void 0;
372
+ const h = parseInt(match[1] ?? "0", 10);
373
+ const m = parseInt(match[2] ?? "0", 10);
374
+ const s = parseInt(match[3] ?? "0", 10);
375
+ return h * 3600 + m * 60 + s;
376
+ }
377
+ function extractWindowJson(html, marker) {
378
+ const idx = html.indexOf(marker);
379
+ if (idx === -1) return null;
380
+ const start = html.indexOf("{", idx + marker.length);
381
+ if (start === -1) return null;
382
+ let depth = 0;
383
+ let inString = false;
384
+ let escape = false;
385
+ for (let i = start; i < html.length; i++) {
386
+ const ch = html[i];
387
+ if (inString) {
388
+ if (escape) escape = false;
389
+ else if (ch === "\\") escape = true;
390
+ else if (ch === '"') inString = false;
391
+ continue;
392
+ }
393
+ if (ch === '"') {
394
+ inString = true;
395
+ continue;
396
+ }
397
+ if (ch === "{") depth++;
398
+ else if (ch === "}") {
399
+ depth--;
400
+ if (depth === 0) {
401
+ try {
402
+ return JSON.parse(html.slice(start, i + 1));
403
+ } catch {
404
+ return null;
405
+ }
406
+ }
407
+ }
408
+ }
409
+ return null;
410
+ }
411
+ function mediaFromNode(node) {
412
+ const results = [];
413
+ const isVideo = node.is_video === true || node.media_type === 2 || node.__typename === "GraphVideo";
414
+ if (isVideo && node.video_versions) {
415
+ const versions = node.video_versions;
416
+ const best = versions.reduce(
417
+ (a, b) => (b.width ?? 0) > (a.width ?? 0) ? b : a
418
+ );
419
+ if (typeof best.url === "string") {
420
+ results.push({
421
+ type: "video",
422
+ url: decodeEscapedUrl(best.url),
423
+ width: best.width,
424
+ height: best.height,
425
+ duration: node.video_duration ?? node.duration,
426
+ thumbnail: extractThumbnail(node)
427
+ });
428
+ }
429
+ } else if (node.image_versions2) {
430
+ const candidates = node.image_versions2.candidates;
431
+ if (candidates?.length) {
432
+ const best = candidates.reduce(
433
+ (a, b) => (b.width ?? 0) > (a.width ?? 0) ? b : a
434
+ );
435
+ if (typeof best.url === "string") {
436
+ results.push({
437
+ type: "image",
438
+ url: decodeEscapedUrl(best.url),
439
+ width: best.width,
440
+ height: best.height
441
+ });
442
+ }
443
+ }
444
+ } else if (node.display_url) {
445
+ results.push({
446
+ type: "image",
447
+ url: decodeEscapedUrl(String(node.display_url)),
448
+ width: node.original_width,
449
+ height: node.original_height
450
+ });
451
+ } else if (node.display_resources) {
452
+ const resources = node.display_resources;
453
+ const best = resources.reduce(
454
+ (a, b) => (b.config_width ?? 0) > (a.config_width ?? 0) ? b : a
455
+ );
456
+ if (typeof best.src === "string") {
457
+ results.push({
458
+ type: "image",
459
+ url: decodeEscapedUrl(best.src),
460
+ width: best.config_width,
461
+ height: best.config_height
462
+ });
463
+ }
464
+ }
465
+ return results;
466
+ }
467
+ function extractThumbnail(node) {
468
+ if (typeof node.thumbnail_src === "string") return decodeEscapedUrl(node.thumbnail_src);
469
+ const candidates = node.image_versions2?.candidates;
470
+ if (candidates?.[0] && typeof candidates[0].url === "string") {
471
+ return decodeEscapedUrl(candidates[0].url);
472
+ }
473
+ return void 0;
474
+ }
475
+ function walkForMedia(obj, media, meta) {
476
+ if (!obj || typeof obj !== "object") return;
477
+ if (Array.isArray(obj)) {
478
+ for (const item of obj) walkForMedia(item, media, meta);
479
+ return;
480
+ }
481
+ const record = obj;
482
+ if (typeof record.caption === "object" && record.caption) {
483
+ const cap = record.caption;
484
+ if (typeof cap.text === "string") {
485
+ meta.caption = pickBestCaption(meta.caption, normalizeCaptionText(cap.text));
486
+ }
487
+ } else if (typeof record.edge_media_to_caption === "object") {
488
+ const edges = record.edge_media_to_caption.edges;
489
+ const text = edges?.[0]?.node;
490
+ if (typeof text?.text === "string") {
491
+ meta.caption = pickBestCaption(meta.caption, normalizeCaptionText(text.text));
492
+ }
493
+ }
494
+ if (typeof record.owner === "object" && record.owner) {
495
+ const owner = record.owner;
496
+ if (typeof owner.username === "string") meta.username = owner.username;
497
+ }
498
+ const carouselSlides = [];
499
+ if (Array.isArray(record.carousel_media)) {
500
+ for (const item of record.carousel_media) {
501
+ if (item && typeof item === "object") {
502
+ carouselSlides.push(item);
503
+ }
504
+ }
505
+ }
506
+ if (record.edge_sidecar_to_children) {
507
+ const edges = record.edge_sidecar_to_children.edges;
508
+ for (const edge of edges ?? []) {
509
+ const node = edge.node;
510
+ if (node) carouselSlides.push(node);
511
+ }
512
+ }
513
+ if (carouselSlides.length > 0) {
514
+ for (const slide of carouselSlides) {
515
+ media.push(...mediaFromNode(slide));
516
+ }
517
+ } else if (record.shortcode || record.display_url || record.video_versions || record.image_versions2 || record.is_video !== void 0) {
518
+ media.push(...mediaFromNode(record));
519
+ }
520
+ for (const [key, value] of Object.entries(record)) {
521
+ if (key === "carousel_media" || key === "edge_sidecar_to_children") continue;
522
+ if (value && typeof value === "object") walkForMedia(value, media, meta);
523
+ }
524
+ }
525
+ function extractAdditionalData(html) {
526
+ const data = extractWindowJson(html, "window.__additionalDataLoaded(");
527
+ if (!data) return null;
528
+ const media = [];
529
+ const meta = { caption: "", username: "" };
530
+ walkForMedia(data, media, meta);
531
+ if (media.length) {
532
+ logger.debug("Layer 2 (__additionalDataLoaded) succeeded");
533
+ return { media: dedupeMedia(media), caption: meta.caption, username: meta.username };
534
+ }
535
+ return null;
536
+ }
537
+ function extractSharedData(html) {
538
+ const data = extractWindowJson(html, "window._sharedData");
539
+ if (!data) return null;
540
+ const media = [];
541
+ const meta = { caption: "", username: "" };
542
+ walkForMedia(data, media, meta);
543
+ if (media.length) {
544
+ logger.debug("Layer 3 (_sharedData) succeeded");
545
+ return { media: dedupeMedia(media), caption: meta.caption, username: meta.username };
546
+ }
547
+ return null;
548
+ }
549
+ function extractNextData(html) {
550
+ const $ = cheerio.load(html);
551
+ const script = $("#__NEXT_DATA__").html();
552
+ if (!script) return null;
553
+ try {
554
+ const data = JSON.parse(script);
555
+ const media = [];
556
+ const meta = { caption: "", username: "" };
557
+ walkForMedia(data, media, meta);
558
+ if (media.length) {
559
+ logger.debug("Layer 4 (Next.js) succeeded");
560
+ return { media: dedupeMedia(media), caption: meta.caption, username: meta.username };
561
+ }
562
+ } catch {
563
+ return null;
564
+ }
565
+ return null;
566
+ }
567
+ function extractOpenGraph(html) {
568
+ const $ = cheerio.load(html);
569
+ const media = [];
570
+ let caption = normalizeCaptionText(
571
+ decodeHtmlEntities(
572
+ $('meta[property="og:description"]').attr("content") ?? $('meta[name="description"]').attr("content") ?? ""
573
+ )
574
+ );
575
+ const ogTitle = decodeHtmlEntities($('meta[property="og:title"]').attr("content") ?? "");
576
+ let username = ogTitle.match(/\(@([^)]+)\)/)?.[1] ?? ogTitle.split("(@")[1]?.replace(")", "").trim() ?? "";
577
+ if (!username && ogTitle.includes(" on Instagram")) {
578
+ username = ogTitle.split(" on Instagram")[0]?.replace(/^.*@/, "").trim() ?? "";
579
+ }
580
+ if (!username && ogTitle.includes("@")) {
581
+ const m = ogTitle.match(/@([\w.]+)/);
582
+ if (m) username = m[1];
583
+ }
584
+ const ogVideo = decodeHtmlEntities(
585
+ $('meta[property="og:video:secure_url"], meta[property="og:video"]').attr("content") ?? ""
586
+ );
587
+ const ogImage = decodeHtmlEntities(
588
+ $('meta[property="og:image"]').attr("content") ?? $('meta[property="og:image:url"]').attr("content") ?? $('meta[name="twitter:image"]').attr("content") ?? ""
589
+ );
590
+ if (ogVideo) {
591
+ media.push({
592
+ type: "video",
593
+ url: decodeEscapedUrl(ogVideo),
594
+ thumbnail: ogImage ? decodeEscapedUrl(ogImage) : void 0
595
+ });
596
+ }
597
+ if (ogImage) {
598
+ const imageUrl = decodeEscapedUrl(ogImage);
599
+ const type = imageUrl.includes(".mp4") ? "video" : "image";
600
+ const isThumbForVideo = Boolean(ogVideo) && type === "image";
601
+ if (!isThumbForVideo && !media.some((m) => m.url === imageUrl) && (!isProfileOrStaticCdn(imageUrl) || !ogVideo)) {
602
+ media.push({ type, url: imageUrl });
603
+ } else if (ogVideo && imageUrl && !media[0]?.thumbnail) {
604
+ media[0].thumbnail = imageUrl;
605
+ }
606
+ }
607
+ if (media.length) {
608
+ logger.debug("Layer 5 (Open Graph) succeeded");
609
+ return { media: dedupeMedia(media), caption, username };
610
+ }
611
+ return null;
612
+ }
613
+ function extractScriptJson(html) {
614
+ const $ = cheerio.load(html);
615
+ const media = [];
616
+ const meta = { caption: "", username: "" };
617
+ $('script[type="application/json"]').each((_, el) => {
618
+ const raw = $(el).html();
619
+ if (!raw || raw.length < 500) return;
620
+ try {
621
+ const data = JSON.parse(raw);
622
+ walkForMedia(data, media, meta);
623
+ } catch {
624
+ }
625
+ });
626
+ if (media.length) {
627
+ logger.debug("Layer 5b (application/json scripts) succeeded");
628
+ return { media: dedupeMedia(media), caption: meta.caption, username: meta.username };
629
+ }
630
+ return null;
631
+ }
632
+ function parseEmbedHtml(html) {
633
+ const scriptResult = extractScriptJson(html);
634
+ if (scriptResult?.media.some((m) => m.type === "video")) {
635
+ return scriptResult;
636
+ }
637
+ const media = [];
638
+ const patterns = [
639
+ /video_url\\":\\"([^"]+)/g,
640
+ /"video_url":"(https?:[^"]+)"/g,
641
+ /playback_url\\":\\"([^"]+)/g,
642
+ /"playback_url":"(https?:[^"]+)"/g
643
+ ];
644
+ for (const pattern of patterns) {
645
+ let match;
646
+ while ((match = pattern.exec(html)) !== null) {
647
+ let url = decodeEscapedUrl(match[1]);
648
+ if (!url.startsWith("http")) url = `https://${url.replace(/^\/+/, "")}`;
649
+ if (url.includes(".mp4") || url.includes("fbcdn") || url.includes("cdninstagram")) {
650
+ media.push({ type: "video", url });
651
+ }
652
+ }
653
+ }
654
+ if (!media.length) return null;
655
+ return { media: dedupeMedia(media), caption: "", username: "" };
656
+ }
657
+ function extractDirectCdn(html) {
658
+ const media = [];
659
+ const found = /* @__PURE__ */ new Set();
660
+ const patterns = [
661
+ /https:\/\/scontent\.cdninstagram\.com\/[^"'\s<>\\&]+/g,
662
+ /https:\\\/\\\/scontent\.cdninstagram\.com\/[^"\\]+/g
663
+ ];
664
+ for (const pattern of patterns) {
665
+ let match;
666
+ while ((match = pattern.exec(html)) !== null) {
667
+ const url = decodeEscapedUrl(match[0]);
668
+ if (isProfileOrStaticCdn(url) || found.has(url)) continue;
669
+ found.add(url);
670
+ const type = url.includes(".mp4") ? "video" : "image";
671
+ media.push({ type, url });
672
+ }
673
+ }
674
+ if (media.length) {
675
+ logger.debug("Layer 6b (direct CDN) succeeded");
676
+ return { media: dedupeMedia(media), caption: "", username: "" };
677
+ }
678
+ return null;
679
+ }
680
+ function extractGraphQLFromPage(html) {
681
+ const docIdMatch = html.match(/"doc_id":"(\d+)"/);
682
+ const queryIdMatch = html.match(/"query_id":"(\d+)"/);
683
+ if (!docIdMatch && !queryIdMatch) return null;
684
+ const media = [];
685
+ const cdnPatterns = [
686
+ /"video_url":"([^"]+)"/g,
687
+ /"display_url":"([^"]+)"/g,
688
+ /"url":"(https:\\\/\\\/[^"]+?cdninstagram[^"]+)"/g
689
+ ];
690
+ for (const pattern of cdnPatterns) {
691
+ let match;
692
+ while ((match = pattern.exec(html)) !== null) {
693
+ const url = decodeEscapedUrl(match[1]);
694
+ if (url.includes(".mp4") || url.includes("video")) {
695
+ media.push({ type: "video", url });
696
+ } else if (url.includes("cdninstagram") || url.includes("fbcdn")) {
697
+ media.push({ type: "image", url });
698
+ }
699
+ }
700
+ }
701
+ if (media.length) {
702
+ logger.debug("Layer 6 (GraphQL discovery) succeeded");
703
+ return { media: dedupeMedia(media), caption: "", username: "" };
704
+ }
705
+ return null;
706
+ }
707
+ function extractFallback(html) {
708
+ const media = [];
709
+ const videoRegex = /"video_versions":\s*\[([^\]]+)\]/g;
710
+ const urlRegex = /"url":"(https?:\\\/\\\/[^"]+)"/g;
711
+ let block;
712
+ while ((block = videoRegex.exec(html)) !== null) {
713
+ const segment = block[1];
714
+ const urls = [];
715
+ let m;
716
+ const inner = /"url":"([^"]+)","width":(\d+)/g;
717
+ while ((m = inner.exec(segment)) !== null) {
718
+ urls.push({ url: decodeEscapedUrl(m[1]), width: parseInt(m[2], 10) });
719
+ }
720
+ if (urls.length) {
721
+ const best = urls.reduce((a, b) => b.width > a.width ? b : a);
722
+ media.push({ type: "video", url: best.url, width: best.width });
723
+ }
724
+ }
725
+ const imageCandidates = [];
726
+ const imgRegex = /"display_url":"([^"]+)"|"src":"(https?:\\\/\\\/[^"]+?)"/g;
727
+ let im;
728
+ while ((im = imgRegex.exec(html)) !== null) {
729
+ const url = decodeEscapedUrl(im[1] ?? im[2]);
730
+ if (url.includes("cdninstagram") || url.includes("fbcdn")) {
731
+ imageCandidates.push({ url, width: 0 });
732
+ }
733
+ }
734
+ const configRegex = /"src":"(https?:\\\/\\\/[^"]+)","config_width":(\d+)/g;
735
+ while ((im = configRegex.exec(html)) !== null) {
736
+ imageCandidates.push({
737
+ url: decodeEscapedUrl(im[1]),
738
+ width: parseInt(im[2], 10)
739
+ });
740
+ }
741
+ if (imageCandidates.length) {
742
+ const best = imageCandidates.reduce((a, b) => b.width > a.width ? b : a);
743
+ if (!media.some((m) => m.url === best.url)) {
744
+ media.push({ type: "image", url: best.url, width: best.width || void 0 });
745
+ }
746
+ }
747
+ if (!media.length) {
748
+ let m;
749
+ while ((m = urlRegex.exec(html)) !== null) {
750
+ const url = decodeEscapedUrl(m[1]);
751
+ if (url.includes(".mp4")) media.push({ type: "video", url });
752
+ else if (url.includes("cdninstagram")) media.push({ type: "image", url });
753
+ }
754
+ }
755
+ if (media.length) {
756
+ logger.debug("Layer 7 (fallback) succeeded");
757
+ return { media: dedupeMedia(media), caption: "", username: "" };
758
+ }
759
+ return null;
760
+ }
761
+ function dedupeMedia(media) {
762
+ const seen = /* @__PURE__ */ new Set();
763
+ const result = [];
764
+ for (const item of media) {
765
+ const normalized = item.url.split("?")[0];
766
+ if (seen.has(normalized)) continue;
767
+ seen.add(normalized);
768
+ if (!item.url.startsWith("http")) continue;
769
+ if (item.url.includes("static.cdninstagram.com")) continue;
770
+ result.push(item);
771
+ }
772
+ return result.map((m) => ({
773
+ ...m,
774
+ url: decodeEscapedUrl(m.url)
775
+ }));
776
+ }
777
+ function mergeExtracted(primary, secondary) {
778
+ if (!primary && !secondary) return null;
779
+ if (!primary) return secondary;
780
+ if (!secondary) return primary;
781
+ const media = dedupeMedia([...primary.media, ...secondary.media]);
782
+ const tags = [.../* @__PURE__ */ new Set([...primary.tags ?? [], ...secondary.tags ?? []])];
783
+ return {
784
+ media,
785
+ caption: pickBestCaption(primary.caption, secondary.caption),
786
+ username: primary.username || secondary.username,
787
+ engagement: primary.engagement || secondary.engagement ? { ...secondary.engagement, ...primary.engagement } : void 0,
788
+ tags: tags.length ? tags : void 0,
789
+ isPrivate: primary.isPrivate || secondary.isPrivate
790
+ };
791
+ }
792
+ function needsVideoEmbedFallback(parsed, data) {
793
+ if (!data?.media.length) return parsed.type === "reel" || parsed.type === "tv";
794
+ if (parsed.type !== "reel" && parsed.type !== "tv") return false;
795
+ return !data.media.some((m) => m.type === "video");
796
+ }
797
+ function extractionComplete(data, contentType) {
798
+ if (data.media.some((m) => m.type === "video")) return true;
799
+ if (contentType === "post") {
800
+ const img = data.media.find((m) => m.type === "image");
801
+ if (!img) return false;
802
+ if (img.width && img.height && img.width > 640) return true;
803
+ const fromUrl = img.url.match(/stp=c[\d.]+?\.(\d+)\.(\d+)a/i);
804
+ if (fromUrl) {
805
+ const w = parseInt(fromUrl[1], 10);
806
+ return w > 640;
807
+ }
808
+ return false;
809
+ }
810
+ if (contentType === "reel" || contentType === "tv") {
811
+ return data.media.some((m) => m.type === "video");
812
+ }
813
+ return data.media.length > 0;
814
+ }
815
+ function parseHtml(html, contentType) {
816
+ const fast = contentType === "post" || contentType === "reel" || contentType === "tv" || contentType === "highlight" || contentType === "story";
817
+ const layers = fast ? [
818
+ extractOpenGraph,
819
+ extractScriptJson,
820
+ extractAdditionalData,
821
+ extractSharedData,
822
+ extractFallback
823
+ ] : [
824
+ extractScriptJson,
825
+ extractJsonLd,
826
+ extractAdditionalData,
827
+ extractSharedData,
828
+ extractNextData,
829
+ extractOpenGraph,
830
+ extractGraphQLFromPage,
831
+ extractDirectCdn,
832
+ extractFallback
833
+ ];
834
+ let merged = null;
835
+ for (const layer of layers) {
836
+ const result = layer(html);
837
+ if (result?.media.length) {
838
+ merged = mergeExtracted(merged, result);
839
+ if (fast && contentType !== "post" && merged && extractionComplete(merged, contentType)) {
840
+ break;
841
+ }
842
+ }
843
+ }
844
+ if (merged?.media.length) {
845
+ return upgradeMediaQuality(merged);
846
+ }
847
+ if (html.includes("login") && html.includes("Log in to Instagram")) {
848
+ return { media: [], caption: "", username: "", isPrivate: true };
849
+ }
850
+ return null;
851
+ }
852
+ function upgradeMediaQuality(data) {
853
+ const videos = data.media.filter((m) => m.type === "video");
854
+ const images = data.media.filter((m) => m.type === "image");
855
+ const upgraded = [];
856
+ const bestVideo = pickBestVideo(videos);
857
+ if (bestVideo) upgraded.push(bestVideo);
858
+ else if (videos[0]) upgraded.push(videos[0]);
859
+ for (const img of images) {
860
+ if (!upgraded.some((u) => u.url.split("?")[0] === img.url.split("?")[0])) {
861
+ upgraded.push(img);
862
+ }
863
+ }
864
+ if (!bestVideo && images.length > 1) {
865
+ const sorted = [...images].sort(
866
+ (a, b) => area(b.width, b.height) - area(a.width, a.height)
867
+ );
868
+ return { ...data, media: sorted };
869
+ }
870
+ return { ...data, media: upgraded.length ? upgraded : data.media };
871
+ }
872
+ function detectRateLimit(html) {
873
+ return html.includes("Please wait a few minutes") || html.includes("429 Too Many Requests") || html.includes("rate limit");
874
+ }
875
+ function detectNotFound(html, statusCode) {
876
+ return statusCode === 404 || html.includes("Sorry, this page isn't available") || html.includes("Page Not Found");
877
+ }
878
+ var init_parser = __esm({
879
+ "src/core/parser.ts"() {
880
+ "use strict";
881
+ init_esm_shims();
882
+ init_caption();
883
+ init_caption_normalize();
884
+ init_logger();
885
+ }
886
+ });
887
+
888
+ // src/index.ts
889
+ init_esm_shims();
890
+
891
+ // src/core/downloader.ts
892
+ init_esm_shims();
893
+
894
+ // src/types/index.ts
895
+ init_esm_shims();
896
+
897
+ // src/version.ts
898
+ init_esm_shims();
899
+ var PACKAGE_VERSION = "1.0.0";
900
+
901
+ // src/types/index.ts
902
+ var EXTRACTOR_NAME = "ultra-igdl";
903
+
904
+ // src/network/client.ts
905
+ init_esm_shims();
906
+
907
+ // src/network/request.ts
908
+ init_esm_shims();
909
+ import { request } from "undici";
910
+
911
+ // src/network/headers.ts
912
+ init_esm_shims();
913
+ var USER_AGENTS = [
914
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
915
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
916
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
917
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
918
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0",
919
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1"
920
+ ];
921
+ var ACCEPT_LANGUAGES = [
922
+ "en-US,en;q=0.9",
923
+ "en-GB,en;q=0.9",
924
+ "en-US,en;q=0.8,es;q=0.6"
925
+ ];
926
+ var SEC_CH_UA = [
927
+ '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
928
+ '"Chromium";v="131", "Not_A Brand";v="24"'
929
+ ];
930
+ var rotationIndex = 0;
931
+ function rotateIndex(max) {
932
+ const idx = rotationIndex % max;
933
+ rotationIndex += 1;
934
+ return idx;
935
+ }
936
+ var MOBILE_UA = USER_AGENTS[5];
937
+ function buildInstagramPageHeaders(_rotate = false) {
938
+ const ua = MOBILE_UA;
939
+ return {
940
+ "User-Agent": ua,
941
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
942
+ "Accept-Language": "en-US,en;q=0.9",
943
+ // Do not request br — undici/IG combo returns stripped HTML without OG tags.
944
+ Referer: "https://www.instagram.com/"
945
+ };
946
+ }
947
+ function buildHeaders(url, rotate = true) {
948
+ if (url.includes("instagram.com") && !url.includes("cdninstagram") && !url.includes("fbcdn")) {
949
+ return buildInstagramPageHeaders(rotate);
950
+ }
951
+ const uaIdx = rotate ? rotateIndex(USER_AGENTS.length) : 0;
952
+ const langIdx = rotate ? rotateIndex(ACCEPT_LANGUAGES.length) : 0;
953
+ const secIdx = rotate ? rotateIndex(SEC_CH_UA.length) : 0;
954
+ return {
955
+ "User-Agent": USER_AGENTS[uaIdx],
956
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
957
+ "Accept-Language": ACCEPT_LANGUAGES[langIdx],
958
+ "Accept-Encoding": "gzip, deflate, br",
959
+ "Cache-Control": "no-cache",
960
+ Pragma: "no-cache",
961
+ "Sec-Fetch-Dest": "document",
962
+ "Sec-Fetch-Mode": "navigate",
963
+ "Sec-Fetch-Site": "none",
964
+ "Sec-Fetch-User": "?1",
965
+ "Upgrade-Insecure-Requests": "1",
966
+ "Sec-Ch-Ua": SEC_CH_UA[secIdx],
967
+ "Sec-Ch-Ua-Mobile": "?0",
968
+ "Sec-Ch-Ua-Platform": '"Windows"',
969
+ Referer: "https://www.instagram.com/",
970
+ Origin: "https://www.instagram.com",
971
+ ...url.includes("cdninstagram") || url.includes("fbcdn") ? { Referer: "https://www.instagram.com/" } : {}
972
+ };
973
+ }
974
+ function buildApiHeaders() {
975
+ const base = buildHeaders("https://www.instagram.com/");
976
+ return {
977
+ ...base,
978
+ "X-Requested-With": "XMLHttpRequest",
979
+ "X-IG-App-ID": "936619743392459",
980
+ "X-ASBD-ID": "129477",
981
+ Accept: "*/*"
982
+ };
983
+ }
984
+
985
+ // src/network/retry.ts
986
+ init_esm_shims();
987
+ var DEFAULT_RETRY_ON = (error, statusCode) => {
988
+ if (statusCode === 429 || statusCode === 503 || statusCode === 502) return true;
989
+ if (error instanceof Error) {
990
+ const msg = error.message.toLowerCase();
991
+ return msg.includes("timeout") || msg.includes("econnreset") || msg.includes("socket") || msg.includes("network");
992
+ }
993
+ return false;
994
+ };
995
+ async function withRetry(fn, options = {}) {
996
+ const {
997
+ maxRetries = 3,
998
+ baseDelayMs = 300,
999
+ maxDelayMs = 8e3,
1000
+ retryOn = DEFAULT_RETRY_ON
1001
+ } = options;
1002
+ let lastError;
1003
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1004
+ try {
1005
+ return await fn(attempt);
1006
+ } catch (error) {
1007
+ lastError = error;
1008
+ const statusCode = error && typeof error === "object" && "statusCode" in error ? error.statusCode : void 0;
1009
+ if (attempt >= maxRetries || !retryOn(error, statusCode)) {
1010
+ throw error;
1011
+ }
1012
+ const jitter = Math.random() * 100;
1013
+ const delay = Math.min(
1014
+ baseDelayMs * Math.pow(2, attempt) + jitter,
1015
+ maxDelayMs
1016
+ );
1017
+ await sleep(delay);
1018
+ }
1019
+ }
1020
+ throw lastError;
1021
+ }
1022
+ function sleep(ms) {
1023
+ return new Promise((resolve) => setTimeout(resolve, ms));
1024
+ }
1025
+
1026
+ // src/network/pool.ts
1027
+ init_esm_shims();
1028
+ import { Agent, Pool } from "undici";
1029
+ var DEFAULT_CONNECTIONS = 100;
1030
+ var sharedAgent = null;
1031
+ function getAgent(maxConnections = DEFAULT_CONNECTIONS) {
1032
+ if (!sharedAgent) {
1033
+ sharedAgent = new Agent({
1034
+ connections: maxConnections,
1035
+ pipelining: 1,
1036
+ keepAliveTimeout: 6e4,
1037
+ keepAliveMaxTimeout: 12e4,
1038
+ connect: { rejectUnauthorized: true }
1039
+ });
1040
+ }
1041
+ return sharedAgent;
1042
+ }
1043
+ function getPoolStats() {
1044
+ return {
1045
+ connections: DEFAULT_CONNECTIONS,
1046
+ pending: 0
1047
+ };
1048
+ }
1049
+
1050
+ // src/network/client.ts
1051
+ init_logger();
1052
+ var inFlight = /* @__PURE__ */ new Map();
1053
+ var inFlightCookie = /* @__PURE__ */ new Map();
1054
+ var HttpClient = class {
1055
+ timeoutMs;
1056
+ retries;
1057
+ userAgentRotation;
1058
+ constructor(options = {}) {
1059
+ this.timeoutMs = options.timeoutMs ?? 8e3;
1060
+ this.retries = options.retries ?? 2;
1061
+ this.userAgentRotation = options.userAgentRotation ?? true;
1062
+ }
1063
+ async fetchWithCookie(url, cookie, dedupe = true) {
1064
+ const key = `${url}\0${cookie.slice(0, 48)}`;
1065
+ if (dedupe && inFlightCookie.has(key)) {
1066
+ return inFlightCookie.get(key);
1067
+ }
1068
+ const promise = this.fetchWithCookieInternal(url, cookie);
1069
+ if (dedupe) {
1070
+ inFlightCookie.set(key, promise);
1071
+ promise.finally(() => inFlightCookie.delete(key));
1072
+ }
1073
+ return promise;
1074
+ }
1075
+ async fetchWithCookieInternal(url, cookie) {
1076
+ const controller = new AbortController();
1077
+ const timer = setTimeout(() => controller.abort(), this.timeoutMs);
1078
+ try {
1079
+ const response = await request(url, {
1080
+ method: "GET",
1081
+ headers: { ...buildInstagramPageHeaders(), Cookie: cookie },
1082
+ signal: controller.signal
1083
+ });
1084
+ const body = await response.body.text();
1085
+ const resHeaders = {};
1086
+ for (const [key, value] of Object.entries(response.headers)) {
1087
+ if (typeof value === "string") resHeaders[key] = value;
1088
+ else if (Array.isArray(value)) resHeaders[key] = value.join(", ");
1089
+ }
1090
+ return { body, statusCode: response.statusCode, headers: resHeaders };
1091
+ } finally {
1092
+ clearTimeout(timer);
1093
+ }
1094
+ }
1095
+ async fetch(url, dedupe = true) {
1096
+ if (dedupe && inFlight.has(url)) {
1097
+ return inFlight.get(url);
1098
+ }
1099
+ const promise = this.fetchInternal(url);
1100
+ if (dedupe) {
1101
+ inFlight.set(url, promise);
1102
+ promise.finally(() => inFlight.delete(url));
1103
+ }
1104
+ return promise;
1105
+ }
1106
+ async fetchInternal(url) {
1107
+ return withRetry(
1108
+ async (attempt) => {
1109
+ const headers = buildHeaders(url, this.userAgentRotation || attempt > 0);
1110
+ const controller = new AbortController();
1111
+ const timer = setTimeout(() => controller.abort(), this.timeoutMs);
1112
+ try {
1113
+ logger.debug(`Fetching ${url} (attempt ${attempt + 1})`);
1114
+ const usePool = url.includes("cdninstagram") || url.includes("fbcdn.net") || url.includes("fbsbx.com");
1115
+ const response = await request(url, {
1116
+ method: "GET",
1117
+ headers,
1118
+ ...usePool ? { dispatcher: getAgent() } : {},
1119
+ signal: controller.signal
1120
+ });
1121
+ const body = await response.body.text();
1122
+ const statusCode = response.statusCode;
1123
+ const resHeaders = {};
1124
+ for (const [key, value] of Object.entries(response.headers)) {
1125
+ if (typeof value === "string") resHeaders[key] = value;
1126
+ else if (Array.isArray(value)) resHeaders[key] = value.join(", ");
1127
+ }
1128
+ if (statusCode === 429) {
1129
+ const err = new Error("Rate limited");
1130
+ err.statusCode = 429;
1131
+ throw err;
1132
+ }
1133
+ if (statusCode >= 500) {
1134
+ const err = new Error(`Server error ${statusCode}`);
1135
+ err.statusCode = statusCode;
1136
+ throw err;
1137
+ }
1138
+ return { body, statusCode, headers: resHeaders };
1139
+ } finally {
1140
+ clearTimeout(timer);
1141
+ }
1142
+ },
1143
+ { maxRetries: this.retries }
1144
+ );
1145
+ }
1146
+ getInFlightCount() {
1147
+ return inFlight.size;
1148
+ }
1149
+ };
1150
+
1151
+ // src/core/cache.ts
1152
+ init_esm_shims();
1153
+ import { LRUCache } from "lru-cache";
1154
+ var ResponseCache = class {
1155
+ lru;
1156
+ redis;
1157
+ freshTtlMs;
1158
+ staleTtlMs;
1159
+ hits = 0;
1160
+ misses = 0;
1161
+ staleHits = 0;
1162
+ constructor(options = {}) {
1163
+ this.freshTtlMs = options.ttlMs ?? 3e5;
1164
+ this.staleTtlMs = options.staleTtlMs ?? 864e5;
1165
+ this.lru = new LRUCache({
1166
+ max: options.maxSize ?? 500,
1167
+ ttl: this.staleTtlMs
1168
+ });
1169
+ this.redis = options.redis;
1170
+ }
1171
+ wrap(value) {
1172
+ const now = Date.now();
1173
+ return {
1174
+ payload: JSON.stringify(value),
1175
+ freshUntil: now + this.freshTtlMs,
1176
+ staleUntil: now + this.staleTtlMs
1177
+ };
1178
+ }
1179
+ /** Instant in-memory fresh hit (sub-ms). */
1180
+ getFreshSync(key) {
1181
+ const entry = this.lru.get(key);
1182
+ if (!entry || Date.now() > entry.freshUntil) return null;
1183
+ this.hits++;
1184
+ return JSON.parse(entry.payload);
1185
+ }
1186
+ /** Instant stale hit for stale-while-revalidate. */
1187
+ getStaleSync(key) {
1188
+ const entry = this.lru.get(key);
1189
+ if (!entry || Date.now() > entry.staleUntil) return null;
1190
+ if (Date.now() <= entry.freshUntil) return null;
1191
+ this.staleHits++;
1192
+ return JSON.parse(entry.payload);
1193
+ }
1194
+ async get(key) {
1195
+ const fresh = this.getFreshSync(key);
1196
+ if (fresh) return fresh;
1197
+ if (this.redis) {
1198
+ const remote = await this.redis.get(`ultra-igdl:${key}`);
1199
+ if (remote) {
1200
+ this.hits++;
1201
+ const parsed = JSON.parse(remote);
1202
+ this.lru.set(key, this.wrap(parsed));
1203
+ return parsed;
1204
+ }
1205
+ }
1206
+ this.misses++;
1207
+ return null;
1208
+ }
1209
+ set(key, value) {
1210
+ this.lru.set(key, this.wrap(value));
1211
+ if (this.redis) {
1212
+ void this.redis.set(`ultra-igdl:${key}`, JSON.stringify(value), this.staleTtlMs);
1213
+ }
1214
+ }
1215
+ delete(key) {
1216
+ this.lru.delete(key);
1217
+ this.redis?.del?.(`ultra-igdl:${key}`);
1218
+ }
1219
+ clear() {
1220
+ this.lru.clear();
1221
+ }
1222
+ getStats() {
1223
+ const total = this.hits + this.misses + this.staleHits;
1224
+ return {
1225
+ size: this.lru.size,
1226
+ maxSize: this.lru.max,
1227
+ hits: this.hits,
1228
+ misses: this.misses,
1229
+ staleHits: this.staleHits,
1230
+ hitRate: total > 0 ? (this.hits + this.staleHits) / total : 0
1231
+ };
1232
+ }
1233
+ };
1234
+
1235
+ // src/core/extractor.ts
1236
+ init_esm_shims();
1237
+
1238
+ // src/extractors/reel.ts
1239
+ init_esm_shims();
1240
+
1241
+ // src/extractors/post.ts
1242
+ init_esm_shims();
1243
+ init_parser();
1244
+ async function extractPost(ctx) {
1245
+ if (detectRateLimit(ctx.html)) {
1246
+ throw Object.assign(new Error("Rate limited"), { code: 429 });
1247
+ }
1248
+ if (detectNotFound(ctx.html, 200)) {
1249
+ return null;
1250
+ }
1251
+ return parseHtml(ctx.html, ctx.parsed.type);
1252
+ }
1253
+
1254
+ // src/extractors/reel.ts
1255
+ async function extractReel(ctx) {
1256
+ return extractPost(ctx);
1257
+ }
1258
+
1259
+ // src/extractors/story.ts
1260
+ init_esm_shims();
1261
+ init_parser();
1262
+
1263
+ // src/utils/media-quality.ts
1264
+ init_esm_shims();
1265
+
1266
+ // src/utils/media-dimensions.ts
1267
+ init_esm_shims();
1268
+ init_parser();
1269
+ function maxOriginalDimensionsFromHtml(html) {
1270
+ let bestArea = 0;
1271
+ let width;
1272
+ let height;
1273
+ const patterns = [
1274
+ /"original_width":\s*(\d+)\s*,\s*"original_height":\s*(\d+)/g,
1275
+ /"original_width":(\d+),"original_height":(\d+)/g,
1276
+ /original_width\\":(\d+),\\"original_height\\":(\d+)/g
1277
+ ];
1278
+ for (const re of patterns) {
1279
+ let m;
1280
+ while ((m = re.exec(html)) !== null) {
1281
+ const w = parseInt(m[1], 10);
1282
+ const h = parseInt(m[2], 10);
1283
+ const area2 = w * h;
1284
+ if (area2 > bestArea) {
1285
+ bestArea = area2;
1286
+ width = w;
1287
+ height = h;
1288
+ }
1289
+ }
1290
+ }
1291
+ return { width, height };
1292
+ }
1293
+ function scrapeImageCandidates(html) {
1294
+ const candidates = [];
1295
+ const seen = /* @__PURE__ */ new Set();
1296
+ const add = (url, w, h) => {
1297
+ const decoded = decodeEscapedUrl(url.replace(/\\u0026/g, "&").replace(/\\\//g, "/"));
1298
+ if (!decoded.startsWith("http")) return;
1299
+ const key = decoded.split("?")[0];
1300
+ if (seen.has(key)) return;
1301
+ seen.add(key);
1302
+ candidates.push({ url: decoded, width: w, height: h });
1303
+ };
1304
+ const iv2 = /"width":\s*(\d+)\s*,\s*"height":\s*(\d+)[\s\S]*?"url":\s*"([^"]+)"/g;
1305
+ let m;
1306
+ while ((m = iv2.exec(html)) !== null) {
1307
+ add(m[3], parseInt(m[1], 10), parseInt(m[2], 10));
1308
+ }
1309
+ const iv2t = /"url":\s*"([^"]+)"[\s\S]*?"width":\s*(\d+)\s*,\s*"height":\s*(\d+)/g;
1310
+ while ((m = iv2t.exec(html)) !== null) {
1311
+ add(m[1], parseInt(m[2], 10), parseInt(m[3], 10));
1312
+ }
1313
+ const resources = /"config_width":\s*(\d+)\s*,\s*"config_height":\s*(\d+)[^}]*"src":\s*"([^"]+)"/g;
1314
+ while ((m = resources.exec(html)) !== null) {
1315
+ add(m[3], parseInt(m[1], 10), parseInt(m[2], 10));
1316
+ }
1317
+ const displayUrl = /"display_url":\s*"([^"]+)"[\s\S]{0,400}?"original_width":\s*(\d+)\s*,\s*"original_height":\s*(\d+)/g;
1318
+ while ((m = displayUrl.exec(html)) !== null) {
1319
+ add(m[1], parseInt(m[2], 10), parseInt(m[3], 10));
1320
+ }
1321
+ return candidates;
1322
+ }
1323
+ function mediaAssetKey(url) {
1324
+ const m = url.match(/\/(\d+)_(\d+)_/);
1325
+ return m ? `${m[1]}_${m[2]}` : null;
1326
+ }
1327
+ function isLowResDeliveryUrl(url) {
1328
+ return /_s640x640|_s\d{3}x\d{3}_|e\d+_s640x640/i.test(url);
1329
+ }
1330
+ function bestImageCandidateForMedia(html, mediaUrl) {
1331
+ const candidates = scrapeImageCandidates(html);
1332
+ if (!candidates.length) return null;
1333
+ const key = mediaAssetKey(mediaUrl);
1334
+ if (key) {
1335
+ const matched = candidates.filter((c) => c.url.includes(key));
1336
+ if (matched.length) return pickLargestCandidate(matched);
1337
+ }
1338
+ return pickLargestCandidate(candidates);
1339
+ }
1340
+ function pickLargestCandidate(candidates) {
1341
+ const valid = candidates.filter((c) => isValidMediaUrl(c.url));
1342
+ const pool = valid.length ? valid : candidates;
1343
+ const sorted = [...pool].sort((a, b) => b.width * b.height - a.width * a.height);
1344
+ const withoutLow = sorted.find((c) => !isLowResDeliveryUrl(c.url));
1345
+ return withoutLow ?? sorted[0];
1346
+ }
1347
+ function scrapeAssetUrlsFromHtml(html, assetKey) {
1348
+ const urls = [];
1349
+ const patterns = [
1350
+ new RegExp(`"(https?:[^"]*${assetKey}[^"]*)"`, "gi"),
1351
+ new RegExp(`(https?:\\\\/\\\\/[^"\\\\]*${assetKey}[^"\\\\]*)`, "gi")
1352
+ ];
1353
+ for (const re of patterns) {
1354
+ let m;
1355
+ while ((m = re.exec(html)) !== null) {
1356
+ const decoded = decodeEscapedUrl(m[1]);
1357
+ if (isValidMediaUrl(decoded)) urls.push(decoded);
1358
+ }
1359
+ }
1360
+ return urls;
1361
+ }
1362
+ function enrichImageDimensions(media, html) {
1363
+ if (media.type !== "image") return media;
1364
+ const best = bestImageCandidateForMedia(html, media.url);
1365
+ const fromOriginal = maxOriginalDimensionsFromHtml(html);
1366
+ const fromStp = dimensionsFromImageUrl(media.url);
1367
+ let width = media.width;
1368
+ let height = media.height;
1369
+ let url = media.url;
1370
+ const apply = (w, h, candidateUrl) => {
1371
+ if (!w || !h) return;
1372
+ const area2 = w * h;
1373
+ const cur = (width ?? 0) * (height ?? 0);
1374
+ const urlArea = dimensionsFromImageUrl(url);
1375
+ const curUrlArea = (urlArea.width ?? 0) * (urlArea.height ?? 0);
1376
+ const betterDims = area2 > cur;
1377
+ const betterUrl = candidateUrl && (!isLowResDeliveryUrl(candidateUrl) || isLowResDeliveryUrl(url) && area2 >= curUrlArea);
1378
+ if (betterDims) {
1379
+ width = w;
1380
+ height = h;
1381
+ }
1382
+ if (betterUrl && (betterDims || area2 >= curUrlArea)) {
1383
+ url = candidateUrl;
1384
+ }
1385
+ };
1386
+ apply(fromOriginal.width, fromOriginal.height);
1387
+ apply(fromStp.width, fromStp.height);
1388
+ if (best) apply(best.width, best.height, best.url);
1389
+ const assetKey = mediaAssetKey(media.url);
1390
+ if (assetKey && isLowResDeliveryUrl(url)) {
1391
+ const alternates = scrapeAssetUrlsFromHtml(html, assetKey);
1392
+ const better = alternates.filter((u) => !isLowResDeliveryUrl(u)).map((u) => ({ url: u, ...dimensionsFromImageUrl(u) })).filter((c) => c.width && c.height).sort((a, b) => b.width * b.height - a.width * a.height)[0];
1393
+ if (better) apply(better.width, better.height, better.url);
1394
+ }
1395
+ if ((!width || !height) && fromStp.width && fromStp.height) {
1396
+ width = fromStp.width;
1397
+ height = fromStp.height;
1398
+ }
1399
+ if (!isValidMediaUrl(url)) {
1400
+ url = media.url;
1401
+ }
1402
+ return { ...media, url, width, height };
1403
+ }
1404
+ function applyPageHtmlToMedia(media, html) {
1405
+ const images = media.filter((m) => m.type === "image");
1406
+ if (images.length > 1) {
1407
+ return media.map((m) => {
1408
+ if (m.type !== "image") return m;
1409
+ const dims = dimensionsFromImageUrl(m.url);
1410
+ return {
1411
+ ...m,
1412
+ width: m.width ?? dims.width,
1413
+ height: m.height ?? dims.height
1414
+ };
1415
+ });
1416
+ }
1417
+ return media.map((m) => m.type === "image" ? enrichImageDimensions(m, html) : m);
1418
+ }
1419
+ function mediaArea(m) {
1420
+ return (m.width ?? 0) * (m.height ?? 0);
1421
+ }
1422
+ function imageNeedsDimensions(media) {
1423
+ const img = media.find((m) => m.type === "image");
1424
+ return Boolean(img && (!img.width || !img.height));
1425
+ }
1426
+ function imageNeedsHigherResolution(media) {
1427
+ const img = media.find((m) => m.type === "image");
1428
+ if (!img) return false;
1429
+ if (!isLowResDeliveryUrl(img.url)) return false;
1430
+ const area2 = (img.width ?? 0) * (img.height ?? 0);
1431
+ if (area2 > 640 * 640) return true;
1432
+ const fromStp = dimensionsFromImageUrl(img.url);
1433
+ return (fromStp.width ?? 0) * (fromStp.height ?? 0) > 640 * 640;
1434
+ }
1435
+ function postNeedsEmbedFetch(media) {
1436
+ return imageNeedsDimensions(media) || imageNeedsHigherResolution(media);
1437
+ }
1438
+
1439
+ // src/utils/media-quality.ts
1440
+ function isValidMediaUrl(url) {
1441
+ if (!isCdnMediaUrl(url)) return false;
1442
+ if (/\.js(?:\?|$)/i.test(url) || /rsrc\.php/i.test(url)) return false;
1443
+ if (/static\.cdninstagram\.com/i.test(url)) return false;
1444
+ return true;
1445
+ }
1446
+ function filterValidMedia(media) {
1447
+ return media.filter((m) => isValidMediaUrl(m.url));
1448
+ }
1449
+ function dimensionsFromImageUrl(url) {
1450
+ const crop = url.match(/stp=c[\d.]+?\.(\d+)\.(\d+)a/i);
1451
+ if (crop) {
1452
+ return { width: parseInt(crop[1], 10), height: parseInt(crop[2], 10) };
1453
+ }
1454
+ const size = url.match(/_s(\d+)x(\d+)_/i);
1455
+ if (size) {
1456
+ return { width: parseInt(size[1], 10), height: parseInt(size[2], 10) };
1457
+ }
1458
+ return {};
1459
+ }
1460
+ function upgradeMediaItem(media, html) {
1461
+ if (media.type !== "image") return media;
1462
+ if (html) return enrichImageDimensions(media, html);
1463
+ const dims = dimensionsFromImageUrl(media.url);
1464
+ return {
1465
+ ...media,
1466
+ url: media.url,
1467
+ width: media.width ?? dims.width,
1468
+ height: media.height ?? dims.height
1469
+ };
1470
+ }
1471
+ function isStoryProfileImage(url) {
1472
+ return /\/t51\.2885-19\//.test(url) || /stp=dst-jpg_s\d+x\d+/.test(url) || /profile_pic/i.test(url);
1473
+ }
1474
+ function isCdnMediaUrl(url) {
1475
+ return /cdninstagram\.com|fbcdn\.net|fbsbx\.com/i.test(url);
1476
+ }
1477
+ function isValidThumbnailUrl(url) {
1478
+ if (!url) return false;
1479
+ if (/\.js(?:\?|$)/i.test(url) || /rsrc\.php/i.test(url)) return false;
1480
+ return isCdnMediaUrl(url);
1481
+ }
1482
+
1483
+ // src/network/instagram-api.ts
1484
+ init_esm_shims();
1485
+ init_parser();
1486
+ init_caption();
1487
+
1488
+ // src/utils/engagement-tags.ts
1489
+ init_esm_shims();
1490
+ function ogDescriptionHasPublicCounts(ogDescription) {
1491
+ return /^\d[\d.KMB,\s]*(?:likes?|comments?|views?)/i.test(ogDescription.trim());
1492
+ }
1493
+ function scrapeEngagementFlagsFromHtml(html) {
1494
+ let likesHidden = false;
1495
+ let commentsHidden = false;
1496
+ if (!html) return { likesHidden, commentsHidden };
1497
+ if (/like_and_view_counts_disabled":\s*true/i.test(html) || /"hide_like_and_view_counts":\s*true/i.test(html)) {
1498
+ likesHidden = true;
1499
+ }
1500
+ if (/comments_disabled":\s*true/i.test(html)) {
1501
+ commentsHidden = true;
1502
+ }
1503
+ return { likesHidden, commentsHidden };
1504
+ }
1505
+ function engagementFromApiItem(item) {
1506
+ const engagement = {};
1507
+ if (typeof item.like_count === "number") engagement.likes = item.like_count;
1508
+ if (typeof item.comment_count === "number") engagement.comments = item.comment_count;
1509
+ if (typeof item.view_count === "number") engagement.views = item.view_count;
1510
+ if (typeof item.play_count === "number" && engagement.views == null) {
1511
+ engagement.views = item.play_count;
1512
+ }
1513
+ if (item.like_and_view_counts_disabled === true) {
1514
+ engagement.likesHidden = true;
1515
+ }
1516
+ if (item.comments_disabled === true) {
1517
+ engagement.commentsHidden = true;
1518
+ }
1519
+ const hasMetric = engagement.likes != null || engagement.comments != null || engagement.views != null;
1520
+ const hasFlags = engagement.likesHidden || engagement.commentsHidden;
1521
+ return hasMetric || hasFlags ? engagement : void 0;
1522
+ }
1523
+ function buildEngagementTags(parsedType, ogDescription, engagement, html) {
1524
+ const tags = /* @__PURE__ */ new Set();
1525
+ const countable = parsedType === "reel" || parsedType === "post" || parsedType === "tv";
1526
+ if (engagement.likesHidden) tags.add("likes_hidden");
1527
+ if (engagement.commentsHidden) tags.add("comments_hidden");
1528
+ const fromHtml = scrapeEngagementFlagsFromHtml(html);
1529
+ if (fromHtml.likesHidden) tags.add("likes_hidden");
1530
+ if (fromHtml.commentsHidden) tags.add("comments_hidden");
1531
+ if (!countable || !ogDescription?.trim()) {
1532
+ return [...tags];
1533
+ }
1534
+ const publicCounts = ogDescriptionHasPublicCounts(ogDescription);
1535
+ const captionOnlyOg = /\bon\s+[\w\s\d,.]+:\s*["']/i.test(ogDescription);
1536
+ if (captionOnlyOg && !publicCounts) {
1537
+ if (engagement.likes == null) tags.add("likes_hidden");
1538
+ if (engagement.comments == null) tags.add("comments_hidden");
1539
+ }
1540
+ if (tags.has("likes_hidden") && tags.has("comments_hidden")) {
1541
+ tags.add("engagement_hidden");
1542
+ }
1543
+ return [...tags];
1544
+ }
1545
+
1546
+ // src/network/instagram-api.ts
1547
+ init_logger();
1548
+ var IG_APP_ID = "936619743392459";
1549
+ var API_HOSTS = ["https://www.instagram.com", "https://i.instagram.com"];
1550
+ function normalizeSessionId(sessionId) {
1551
+ const trimmed = sessionId.trim();
1552
+ try {
1553
+ return decodeURIComponent(trimmed);
1554
+ } catch {
1555
+ return trimmed;
1556
+ }
1557
+ }
1558
+ function buildSessionCookie(sessionId, cookies) {
1559
+ if (cookies?.trim()) {
1560
+ const map = /* @__PURE__ */ new Map();
1561
+ for (const part of cookies.trim().split(";")) {
1562
+ const [k, ...rest] = part.trim().split("=");
1563
+ if (k) map.set(k.trim(), rest.join("=").trim());
1564
+ }
1565
+ if (map.has("sessionid")) {
1566
+ map.set("sessionid", normalizeSessionId(map.get("sessionid")));
1567
+ }
1568
+ return [...map.entries()].map(([k, v]) => `${k}=${v}`).join("; ");
1569
+ }
1570
+ if (sessionId?.trim()) return `sessionid=${normalizeSessionId(sessionId)}`;
1571
+ return void 0;
1572
+ }
1573
+ function mergeSetCookie(existing, setCookieHeader) {
1574
+ const map = /* @__PURE__ */ new Map();
1575
+ for (const part of existing.split(";")) {
1576
+ const [k, ...rest] = part.trim().split("=");
1577
+ if (k) map.set(k.trim(), rest.join("=").trim());
1578
+ }
1579
+ const headers = Array.isArray(setCookieHeader) ? setCookieHeader : setCookieHeader ? [setCookieHeader] : [];
1580
+ for (const h of headers) {
1581
+ const first = h.split(";")[0];
1582
+ const eq = first.indexOf("=");
1583
+ if (eq > 0) map.set(first.slice(0, eq).trim(), first.slice(eq + 1).trim());
1584
+ }
1585
+ return [...map.entries()].map(([k, v]) => `${k}=${v}`).join("; ");
1586
+ }
1587
+ function userIdFromCookie(sessionCookie) {
1588
+ const id = sessionCookie.match(/ds_user_id=([^;]+)/)?.[1];
1589
+ return id && /^\d+$/.test(id) ? id : null;
1590
+ }
1591
+ function sessionCookieReady(cookie) {
1592
+ return cookie.includes("csrftoken=") && cookie.includes("ds_user_id=");
1593
+ }
1594
+ async function enrichSessionCookie(cookie) {
1595
+ if (sessionCookieReady(cookie)) return cookie;
1596
+ const res = await request("https://www.instagram.com/", {
1597
+ headers: {
1598
+ ...buildInstagramPageHeaders(),
1599
+ Cookie: cookie
1600
+ }
1601
+ });
1602
+ await res.body.text();
1603
+ return mergeSetCookie(cookie, res.headers["set-cookie"]);
1604
+ }
1605
+ function parseCsrfFromCookie(cookie) {
1606
+ return cookie.match(/csrftoken=([^;]+)/)?.[1] ?? "";
1607
+ }
1608
+ function mediaFromStoryItem(item) {
1609
+ const media = [];
1610
+ const isVideo = item.media_type === 2 || item.is_video === true;
1611
+ if (isVideo && Array.isArray(item.video_versions)) {
1612
+ const versions = item.video_versions;
1613
+ const best = versions.reduce(
1614
+ (a, b) => (b.width ?? 0) > (a.width ?? 0) ? b : a
1615
+ );
1616
+ if (typeof best.url === "string") {
1617
+ media.push({
1618
+ type: "video",
1619
+ url: decodeEscapedUrl(best.url),
1620
+ width: best.width,
1621
+ height: best.height,
1622
+ duration: item.video_duration
1623
+ });
1624
+ }
1625
+ }
1626
+ const candidates = item.image_versions2?.candidates;
1627
+ if (candidates?.length) {
1628
+ const best = candidates.reduce(
1629
+ (a, b) => (b.width ?? 0) > (a.width ?? 0) ? b : a
1630
+ );
1631
+ if (typeof best.url === "string") {
1632
+ const image = {
1633
+ type: "image",
1634
+ url: decodeEscapedUrl(best.url),
1635
+ width: best.width ?? item.original_width,
1636
+ height: best.height ?? item.original_height
1637
+ };
1638
+ if (!media.length || !isVideo) media.push(image);
1639
+ else if (media[0] && !media[0].thumbnail) media[0].thumbnail = image.url;
1640
+ }
1641
+ }
1642
+ return media;
1643
+ }
1644
+ function mediaFromApiItem(item) {
1645
+ const carousel = item.carousel_media;
1646
+ if (Array.isArray(carousel) && carousel.length > 0) {
1647
+ const media = [];
1648
+ for (const slide of carousel) {
1649
+ if (slide && typeof slide === "object") {
1650
+ media.push(...mediaFromStoryItem(slide));
1651
+ }
1652
+ }
1653
+ return media;
1654
+ }
1655
+ return mediaFromStoryItem(item);
1656
+ }
1657
+ var API_TIMEOUT_MS = 6e3;
1658
+ async function igApiGet(path2, sessionCookie, referer, host = API_HOSTS[0], timeoutMs = API_TIMEOUT_MS) {
1659
+ const csrf = parseCsrfFromCookie(sessionCookie);
1660
+ const headers = {
1661
+ ...buildApiHeaders(),
1662
+ Cookie: sessionCookie,
1663
+ Referer: referer,
1664
+ "X-CSRFToken": csrf,
1665
+ "X-IG-App-ID": IG_APP_ID
1666
+ };
1667
+ const url = path2.startsWith("http") ? path2 : `${host}${path2}`;
1668
+ const controller = new AbortController();
1669
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
1670
+ try {
1671
+ const response = await request(url, {
1672
+ method: "GET",
1673
+ headers,
1674
+ signal: controller.signal
1675
+ });
1676
+ const body = await response.body.text();
1677
+ return { statusCode: response.statusCode, body };
1678
+ } finally {
1679
+ clearTimeout(timer);
1680
+ }
1681
+ }
1682
+ async function fetchStoryPageWithSession(url, sessionCookie) {
1683
+ const response = await request(url, {
1684
+ headers: {
1685
+ ...buildInstagramPageHeaders(),
1686
+ Cookie: sessionCookie
1687
+ }
1688
+ });
1689
+ const body = await response.body.text();
1690
+ return { statusCode: response.statusCode, body };
1691
+ }
1692
+ function extractStoryMediaFromHtml(html, storyId) {
1693
+ const media = [];
1694
+ const seen = /* @__PURE__ */ new Set();
1695
+ const add = (item) => {
1696
+ if (isStoryProfileImage(item.url)) return;
1697
+ const key = item.url.split("?")[0];
1698
+ if (seen.has(key)) return;
1699
+ seen.add(key);
1700
+ media.push(item);
1701
+ };
1702
+ if (storyId) {
1703
+ let pos = 0;
1704
+ while ((pos = html.indexOf(storyId, pos)) !== -1) {
1705
+ const chunk = html.slice(Math.max(0, pos - 12e3), pos + 16e3);
1706
+ collectFromChunk(chunk, add);
1707
+ pos += storyId.length;
1708
+ }
1709
+ }
1710
+ collectFromChunk(html, add);
1711
+ return media;
1712
+ }
1713
+ function collectFromChunk(chunk, add) {
1714
+ const patterns = [
1715
+ /"video_versions":\s*\[[^\]]*?"url":\s*"([^"]+)"/g,
1716
+ /video_url\\":\\"([^"]+)/g,
1717
+ /"playback_url":"(https?:[^"]+)"/g,
1718
+ /"url":"(https?:\\\/\\\/[^"]*?fbcdn[^"]+)"/g,
1719
+ /"url":"(https?:\\\/\\\/[^"]*?cdninstagram[^"]+)"/g
1720
+ ];
1721
+ for (const pattern of patterns) {
1722
+ let m;
1723
+ while ((m = pattern.exec(chunk)) !== null) {
1724
+ let url = decodeEscapedUrl(m[1]);
1725
+ if (!url.startsWith("http")) url = `https://${url.replace(/^\/+/, "")}`;
1726
+ if (url.includes(".mp4") || url.includes("video")) {
1727
+ add({ type: "video", url });
1728
+ } else if (url.includes("cdninstagram") || url.includes("fbcdn")) {
1729
+ add({ type: "image", url });
1730
+ }
1731
+ }
1732
+ }
1733
+ }
1734
+ async function fetchUserId(username, sessionCookie, referer) {
1735
+ const fromCookie = userIdFromCookie(sessionCookie);
1736
+ if (fromCookie) return fromCookie;
1737
+ for (const host of API_HOSTS) {
1738
+ const { statusCode, body } = await igApiGet(
1739
+ `/api/v1/users/web_profile_info/?username=${encodeURIComponent(username)}`,
1740
+ sessionCookie,
1741
+ referer,
1742
+ host
1743
+ );
1744
+ if (statusCode === 429) break;
1745
+ if (statusCode !== 200) continue;
1746
+ try {
1747
+ const data = JSON.parse(body);
1748
+ const user = data.data?.user;
1749
+ const id = user?.id ?? user?.pk;
1750
+ if (id != null) return String(id);
1751
+ } catch {
1752
+ const m = body.match(/"id":"(\d+)"/);
1753
+ if (m) return m[1];
1754
+ }
1755
+ }
1756
+ return null;
1757
+ }
1758
+ function storyItemMatches(item, storyPk) {
1759
+ const ids = [item.pk, item.id, item.media_id, item.story_media_id, item.fbid, item.code].map(
1760
+ (v) => v != null ? String(v) : ""
1761
+ );
1762
+ return ids.some(
1763
+ (id) => id === storyPk || id.startsWith(`${storyPk}_`) || storyPk.startsWith(`${id}_`) || id.includes(storyPk)
1764
+ );
1765
+ }
1766
+ function parseMediaInfoBody(body) {
1767
+ try {
1768
+ const payload = JSON.parse(body);
1769
+ const items = payload.items;
1770
+ const item = items?.[0] ?? payload;
1771
+ if (!item || typeof item !== "object") return null;
1772
+ const media = mediaFromApiItem(item);
1773
+ if (!media.length) return null;
1774
+ const user = item.user;
1775
+ return {
1776
+ media,
1777
+ caption: extractCaptionFromApiItem(item),
1778
+ username: typeof user?.username === "string" ? user.username : "",
1779
+ engagement: engagementFromApiItem(item)
1780
+ };
1781
+ } catch {
1782
+ return null;
1783
+ }
1784
+ }
1785
+ async function fetchMediaInfoOnce(id, sessionCookie, referer, host, timeoutMs) {
1786
+ const { statusCode, body } = await igApiGet(
1787
+ `/api/v1/media/${id}/info/?media_id=${id}`,
1788
+ sessionCookie,
1789
+ referer,
1790
+ host,
1791
+ timeoutMs ?? API_TIMEOUT_MS
1792
+ );
1793
+ if (statusCode !== 200) {
1794
+ logger.debug(`media info ${statusCode} ${host} id=${id}`);
1795
+ return null;
1796
+ }
1797
+ const parsed = parseMediaInfoBody(body);
1798
+ return parsed?.media.length ? parsed : null;
1799
+ }
1800
+ async function fetchMediaInfoByPk(mediaPk, sessionCookie, referer, ownerUserId, timeoutMs) {
1801
+ const ids = ownerUserId ? [mediaPk, `${mediaPk}_${ownerUserId}`] : [mediaPk];
1802
+ const perAttempt = timeoutMs ?? API_TIMEOUT_MS;
1803
+ const attempts = [];
1804
+ for (const host of API_HOSTS) {
1805
+ for (const id of ids) {
1806
+ attempts.push(
1807
+ fetchMediaInfoOnce(id, sessionCookie, referer, host, perAttempt).then((r) => {
1808
+ if (!r?.media.length) throw new Error("miss");
1809
+ return r;
1810
+ })
1811
+ );
1812
+ }
1813
+ }
1814
+ try {
1815
+ return await Promise.any(attempts);
1816
+ } catch {
1817
+ return null;
1818
+ }
1819
+ }
1820
+ async function fetchUserStoryFeed(userId, sessionCookie, referer, storyPk) {
1821
+ for (const host of API_HOSTS) {
1822
+ const { statusCode, body } = await igApiGet(
1823
+ `/api/v1/feed/user/${userId}/story/`,
1824
+ sessionCookie,
1825
+ referer,
1826
+ host
1827
+ );
1828
+ if (statusCode !== 200) {
1829
+ logger.debug(`user story feed ${statusCode} ${host}`);
1830
+ continue;
1831
+ }
1832
+ try {
1833
+ const payload = JSON.parse(body);
1834
+ const reel = payload.reel;
1835
+ const items = reel?.items ?? payload.items;
1836
+ if (!items?.length) continue;
1837
+ const item = (storyPk ? items.find((i) => storyItemMatches(i, storyPk)) : void 0) ?? items[0];
1838
+ const media = mediaFromStoryItem(item);
1839
+ if (!media.length) continue;
1840
+ return { media, caption: "", username: "" };
1841
+ } catch {
1842
+ continue;
1843
+ }
1844
+ }
1845
+ return null;
1846
+ }
1847
+ async function fetchReelsMedia(userId, sessionCookie, referer, storyPk) {
1848
+ for (const host of API_HOSTS) {
1849
+ const { statusCode, body } = await igApiGet(
1850
+ `/api/v1/feed/reels_media/?reel_ids=${userId}`,
1851
+ sessionCookie,
1852
+ referer,
1853
+ host
1854
+ );
1855
+ if (statusCode !== 200) {
1856
+ logger.debug(`reels_media ${statusCode} ${host}`);
1857
+ continue;
1858
+ }
1859
+ try {
1860
+ const payload = JSON.parse(body);
1861
+ const reels = payload.reels;
1862
+ const reel = reels?.[userId] ?? Object.values(reels ?? {})[0];
1863
+ const items = reel?.items;
1864
+ if (!items?.length) continue;
1865
+ const item = (storyPk ? items.find((i) => storyItemMatches(i, storyPk)) : void 0) ?? items[0];
1866
+ const media = mediaFromStoryItem(item);
1867
+ if (!media.length) continue;
1868
+ return { media, caption: "", username: "" };
1869
+ } catch {
1870
+ continue;
1871
+ }
1872
+ }
1873
+ return null;
1874
+ }
1875
+ function storyMediaFromPageHtml(html, storyPk) {
1876
+ const fromHtml = extractStoryMediaFromHtml(html, storyPk);
1877
+ const videos = fromHtml.filter((m) => m.type === "video" && isCdnMediaUrl(m.url));
1878
+ if (videos.length) {
1879
+ return { media: videos, caption: "", username: "" };
1880
+ }
1881
+ const parsedHtml = parseHtml(html, "story");
1882
+ const ogMedia = parsedHtml?.media.filter(
1883
+ (m) => !isStoryProfileImage(m.url) && (m.type === "video" ? isCdnMediaUrl(m.url) : true)
1884
+ ) ?? [];
1885
+ if (ogMedia.length) {
1886
+ return { media: ogMedia, caption: "", username: "" };
1887
+ }
1888
+ return null;
1889
+ }
1890
+ async function fetchStoryViaSession(parsed, sessionCookie, existingPageHtml) {
1891
+ if (!parsed.username) return null;
1892
+ const referer = parsed.normalized;
1893
+ const storyPk = parsed.storyId;
1894
+ const ownerUserId = userIdFromCookie(sessionCookie);
1895
+ if (storyPk) {
1896
+ const direct = await fetchMediaInfoByPk(
1897
+ storyPk,
1898
+ sessionCookie,
1899
+ referer,
1900
+ ownerUserId ?? void 0
1901
+ );
1902
+ if (direct?.media.length) {
1903
+ return { ...direct, username: parsed.username };
1904
+ }
1905
+ }
1906
+ if (existingPageHtml) {
1907
+ const fromPage = storyMediaFromPageHtml(existingPageHtml, storyPk);
1908
+ if (fromPage?.media.length) {
1909
+ return { ...fromPage, username: parsed.username };
1910
+ }
1911
+ }
1912
+ if (!existingPageHtml) {
1913
+ const authPage = await fetchStoryPageWithSession(referer, sessionCookie);
1914
+ if (authPage.statusCode === 200) {
1915
+ const fromPage = storyMediaFromPageHtml(authPage.body, storyPk);
1916
+ if (fromPage?.media.length) {
1917
+ return { ...fromPage, username: parsed.username };
1918
+ }
1919
+ }
1920
+ }
1921
+ const userId = ownerUserId ?? await fetchUserId(parsed.username, sessionCookie, referer);
1922
+ if (!userId) return null;
1923
+ const feed = await fetchUserStoryFeed(userId, sessionCookie, referer, storyPk);
1924
+ if (feed?.media.length) {
1925
+ return { ...feed, username: parsed.username };
1926
+ }
1927
+ const reels = await fetchReelsMedia(userId, sessionCookie, referer, storyPk);
1928
+ if (reels?.media.length) {
1929
+ return { ...reels, username: parsed.username };
1930
+ }
1931
+ if (storyPk) {
1932
+ const retry = await fetchMediaInfoByPk(storyPk, sessionCookie, referer, userId);
1933
+ if (retry?.media.length) {
1934
+ return { ...retry, username: parsed.username };
1935
+ }
1936
+ }
1937
+ return null;
1938
+ }
1939
+
1940
+ // src/extractors/story.ts
1941
+ async function extractStory(ctx) {
1942
+ if (detectRateLimit(ctx.html)) {
1943
+ throw Object.assign(new Error("Rate limited"), { code: 429 });
1944
+ }
1945
+ if (detectNotFound(ctx.html, 200)) {
1946
+ return null;
1947
+ }
1948
+ const nearId = extractStoryMediaFromHtml(ctx.html, ctx.parsed.storyId);
1949
+ const parsed = parseHtml(ctx.html, "story");
1950
+ const media = [
1951
+ ...nearId,
1952
+ ...parsed?.media.filter((m) => !isStoryProfileImage(m.url)) ?? []
1953
+ ];
1954
+ const seen = /* @__PURE__ */ new Set();
1955
+ const unique = media.filter((m) => {
1956
+ const k = m.url.split("?")[0];
1957
+ if (seen.has(k)) return false;
1958
+ seen.add(k);
1959
+ return true;
1960
+ });
1961
+ if (!unique.length && parsed) {
1962
+ return {
1963
+ media: [],
1964
+ caption: "",
1965
+ username: ctx.parsed.username ?? ""
1966
+ };
1967
+ }
1968
+ if (!unique.length) return null;
1969
+ return {
1970
+ media: unique,
1971
+ caption: parsed?.caption ?? "",
1972
+ username: ctx.parsed.username ?? parsed?.username ?? ""
1973
+ };
1974
+ }
1975
+
1976
+ // src/extractors/highlight.ts
1977
+ init_esm_shims();
1978
+ init_parser();
1979
+ function parseMediaBlock(chunk) {
1980
+ const isVideo = /"media_type":\s*2/.test(chunk) || /"is_video":\s*true/.test(chunk);
1981
+ if (isVideo) {
1982
+ const videoRe = /"video_versions":\s*\[[^\]]*?"width":\s*(\d+)[^}]*"height":\s*(\d+)[^}]*"url":\s*"([^"]+)"/;
1983
+ const vm = chunk.match(videoRe);
1984
+ if (vm) {
1985
+ return {
1986
+ type: "video",
1987
+ url: decodeEscapedUrl(vm[3].replace(/\\u0026/g, "&").replace(/\\\//g, "/")),
1988
+ width: parseInt(vm[1], 10),
1989
+ height: parseInt(vm[2], 10)
1990
+ };
1991
+ }
1992
+ const urlOnly = /"video_versions":\[[^\]]*?"url":"([^"]+)"/.exec(chunk);
1993
+ if (urlOnly) {
1994
+ return {
1995
+ type: "video",
1996
+ url: decodeEscapedUrl(urlOnly[1].replace(/\\u0026/g, "&").replace(/\\\//g, "/"))
1997
+ };
1998
+ }
1999
+ }
2000
+ const imgRe = /"image_versions2":\s*\{\s*"candidates":\s*\[[^\]]*?"width":\s*(\d+)\s*,\s*"height":\s*(\d+)[^}]*"url":\s*"([^"]+)"/;
2001
+ const im = chunk.match(imgRe);
2002
+ if (im) {
2003
+ return {
2004
+ type: "image",
2005
+ url: decodeEscapedUrl(im[3].replace(/\\u0026/g, "&").replace(/\\\//g, "/")),
2006
+ width: parseInt(im[1], 10),
2007
+ height: parseInt(im[2], 10)
2008
+ };
2009
+ }
2010
+ return null;
2011
+ }
2012
+ function extractNearMediaId(html, storyMediaId) {
2013
+ const pk = storyMediaId?.split("_")[0];
2014
+ if (!pk) return [];
2015
+ const media = [];
2016
+ let pos = 0;
2017
+ while ((pos = html.indexOf(pk, pos)) !== -1 && media.length < 8) {
2018
+ const chunk = html.slice(Math.max(0, pos - 14e3), pos + 18e3);
2019
+ const item = parseMediaBlock(chunk);
2020
+ if (item && !media.some((m) => m.url === item.url)) {
2021
+ media.push(item);
2022
+ }
2023
+ pos += pk.length;
2024
+ }
2025
+ return media;
2026
+ }
2027
+ function dedupeMediaList(media) {
2028
+ const seen = /* @__PURE__ */ new Set();
2029
+ return media.filter((m) => {
2030
+ const k = m.url.split("?")[0];
2031
+ if (seen.has(k)) return false;
2032
+ seen.add(k);
2033
+ return true;
2034
+ });
2035
+ }
2036
+ async function extractHighlight(ctx) {
2037
+ if (detectRateLimit(ctx.html)) {
2038
+ throw Object.assign(new Error("Rate limited"), { code: 429 });
2039
+ }
2040
+ if (detectNotFound(ctx.html, 200)) {
2041
+ return null;
2042
+ }
2043
+ const parsed = parseHtml(ctx.html, "highlight");
2044
+ const allMedia = [];
2045
+ allMedia.push(...extractNearMediaId(ctx.html, ctx.parsed.storyMediaId));
2046
+ if (parsed?.media.length) {
2047
+ allMedia.push(...parsed.media);
2048
+ }
2049
+ const itemRegex = /"media_type":\s*(\d+)[\s\S]{0,4000}?(?="media_type":|$)/g;
2050
+ let match;
2051
+ while ((match = itemRegex.exec(ctx.html)) !== null) {
2052
+ const block = match[0];
2053
+ const item = parseMediaBlock(block);
2054
+ if (item && !allMedia.some((m) => m.url === item.url)) {
2055
+ allMedia.push(item);
2056
+ }
2057
+ }
2058
+ const unique = dedupeMediaList(allMedia).map(
2059
+ (m) => m.type === "image" ? enrichImageDimensions(m, ctx.html) : m
2060
+ );
2061
+ if (!unique.length) return parsed;
2062
+ return {
2063
+ media: unique,
2064
+ caption: parsed?.caption ?? "",
2065
+ username: ctx.parsed.username ?? parsed?.username ?? ""
2066
+ };
2067
+ }
2068
+
2069
+ // src/core/extractor.ts
2070
+ async function runExtractor(ctx) {
2071
+ const { parsed } = ctx;
2072
+ switch (parsed.type) {
2073
+ case "reel":
2074
+ case "tv":
2075
+ return extractReel(ctx);
2076
+ case "post":
2077
+ return extractPost(ctx);
2078
+ case "story":
2079
+ return extractStory(ctx);
2080
+ case "highlight":
2081
+ return extractHighlight(ctx);
2082
+ default:
2083
+ return extractPost(ctx);
2084
+ }
2085
+ }
2086
+ function resolveFetchUrl(parsed) {
2087
+ if (parsed.type === "story" && parsed.username && !parsed.storyId) {
2088
+ return `https://www.instagram.com/stories/${parsed.username}/`;
2089
+ }
2090
+ return parsed.normalized;
2091
+ }
2092
+ function resolveEmbedUrl(parsed) {
2093
+ if (!parsed.shortcode) return null;
2094
+ if (parsed.type === "reel") {
2095
+ return `https://www.instagram.com/reel/${parsed.shortcode}/embed/`;
2096
+ }
2097
+ if (parsed.type === "post") {
2098
+ return `https://www.instagram.com/p/${parsed.shortcode}/embed/`;
2099
+ }
2100
+ if (parsed.type === "tv") {
2101
+ return `https://www.instagram.com/tv/${parsed.shortcode}/embed/`;
2102
+ }
2103
+ return null;
2104
+ }
2105
+ function resolveStoryEmbedUrl(parsed) {
2106
+ if (parsed.type !== "story" || !parsed.username || !parsed.storyId) return null;
2107
+ return `https://www.instagram.com/stories/${parsed.username}/${parsed.storyId}/embed/`;
2108
+ }
2109
+
2110
+ // src/core/downloader.ts
2111
+ init_parser();
2112
+
2113
+ // src/core/normalize.ts
2114
+ init_esm_shims();
2115
+ import * as cheerio2 from "cheerio";
2116
+ init_engagement();
2117
+ init_caption();
2118
+ init_parser();
2119
+
2120
+ // src/utils/post-carousel.ts
2121
+ init_esm_shims();
2122
+ init_parser();
2123
+ function imageQualityScore(item) {
2124
+ const area2 = mediaArea(item);
2125
+ if (area2 > 0) return area2;
2126
+ return item.width ?? 0;
2127
+ }
2128
+ function encodeTagFromUrl(url) {
2129
+ const raw = url.match(/[?&]efg=([^&]+)/i)?.[1];
2130
+ if (!raw) return null;
2131
+ try {
2132
+ const b64 = decodeURIComponent(raw);
2133
+ const json = JSON.parse(Buffer.from(b64, "base64").toString("utf8"));
2134
+ return json.encode_tag ?? json.vencode_tag ?? json.efg_tag ?? null;
2135
+ } catch {
2136
+ return null;
2137
+ }
2138
+ }
2139
+ function isCarouselAuxiliaryImage(url) {
2140
+ if (/CAROUSEL_BEST_IMAGE|best_image_urlgen|cover_photo/i.test(url)) return true;
2141
+ const tag = encodeTagFromUrl(url);
2142
+ if (tag && /BEST_IMAGE|COVER|THUMBNAIL/i.test(tag)) return true;
2143
+ return false;
2144
+ }
2145
+ function stripCarouselAuxiliaryImages(media) {
2146
+ return media.filter((m) => m.type !== "image" || !isCarouselAuxiliaryImage(m.url));
2147
+ }
2148
+ function slideKeyFromMediaUrl(url) {
2149
+ const cacheKey = url.match(/ig_cache_key=([^&]+)/i)?.[1];
2150
+ if (cacheKey) {
2151
+ try {
2152
+ return `ck:${decodeURIComponent(cacheKey)}`;
2153
+ } catch {
2154
+ return `ck:${cacheKey}`;
2155
+ }
2156
+ }
2157
+ const base = (url.split("?")[0] ?? url).replace(/\\/g, "");
2158
+ const fileId = base.match(/\/(\d{8,12})_(\d{10,})_\d+_n\./i);
2159
+ if (fileId) return `file:${fileId[1]}_${fileId[2]}`;
2160
+ const idMatch = base.match(/\/(\d{11,})(?:_\d+)?[_/]/);
2161
+ if (idMatch) return `id:${idMatch[1]}`;
2162
+ return base.replace(/_s\d+x\d+/gi, "").replace(/p\d+x\d+/gi, "");
2163
+ }
2164
+ function dedupePostSlides(media) {
2165
+ if (media.length <= 1) return media;
2166
+ const bestByKey = /* @__PURE__ */ new Map();
2167
+ for (const item of media) {
2168
+ if (item.type !== "image") continue;
2169
+ const key = slideKeyFromMediaUrl(item.url);
2170
+ const prev = bestByKey.get(key);
2171
+ if (!prev || imageQualityScore(item) > imageQualityScore(prev)) bestByKey.set(key, item);
2172
+ }
2173
+ const out = [];
2174
+ const usedImageKeys = /* @__PURE__ */ new Set();
2175
+ for (const item of media) {
2176
+ if (item.type === "video") {
2177
+ out.push(item);
2178
+ continue;
2179
+ }
2180
+ const key = slideKeyFromMediaUrl(item.url);
2181
+ if (usedImageKeys.has(key)) continue;
2182
+ usedImageKeys.add(key);
2183
+ out.push(bestByKey.get(key) ?? item);
2184
+ }
2185
+ return out.length ? out : media;
2186
+ }
2187
+ function pickPostMedia(media, pageHtml = "") {
2188
+ const filtered = stripCarouselAuxiliaryImages(media);
2189
+ const imageCount = filtered.filter((m) => m.type === "image").length;
2190
+ const upgraded = filtered.map((m) => {
2191
+ if (m.type !== "image") return m;
2192
+ return imageCount > 1 ? upgradeMediaItem(m) : upgradeMediaItem(m, pageHtml);
2193
+ });
2194
+ const slides = dedupePostSlides(upgraded);
2195
+ if (slides.length <= 1) {
2196
+ const only = slides[0];
2197
+ if (!only) return [];
2198
+ if (only.type === "video") return [only];
2199
+ return [pickBestImage(upgraded) ?? only];
2200
+ }
2201
+ return slides;
2202
+ }
2203
+
2204
+ // src/core/normalize.ts
2205
+ function pickBestMedia(media, contentType, pageHtml = "") {
2206
+ const videos = media.filter((m) => m.type === "video");
2207
+ const images = media.filter((m) => m.type === "image");
2208
+ const preferVideo = contentType === "highlight" || contentType === "story";
2209
+ const thumbUrl = (video, fallback) => {
2210
+ if (isValidThumbnailUrl(video.thumbnail)) return video.thumbnail;
2211
+ if (fallback) {
2212
+ const upgraded = upgradeMediaItem(fallback, pageHtml).url;
2213
+ return isValidThumbnailUrl(upgraded) ? upgraded : void 0;
2214
+ }
2215
+ return void 0;
2216
+ };
2217
+ const isReelOrTv = contentType === "reel" || contentType === "tv";
2218
+ const bestVideo = pickBestVideo(videos);
2219
+ if (bestVideo && (preferVideo || isReelOrTv)) {
2220
+ const thumb = pickBestImage(images);
2221
+ return [{ ...bestVideo, thumbnail: thumbUrl(bestVideo, thumb) }];
2222
+ }
2223
+ if (isReelOrTv) {
2224
+ return bestVideo ? [bestVideo] : [];
2225
+ }
2226
+ if (contentType === "post") {
2227
+ if (!images.length) return bestVideo ? [bestVideo] : [];
2228
+ return pickPostMedia(media, pageHtml);
2229
+ }
2230
+ if (!images.length) return bestVideo ? [bestVideo] : [];
2231
+ const enriched = images.map((m) => upgradeMediaItem(m, pageHtml));
2232
+ const sorted = [...enriched].sort((a, b) => mediaArea(b) - mediaArea(a));
2233
+ const bestImage = sorted[0] ?? pickBestImage(enriched);
2234
+ return bestImage ? [bestImage] : [];
2235
+ }
2236
+ function extractPageMeta(html) {
2237
+ if (!html) return {};
2238
+ const $ = cheerio2.load(html);
2239
+ const ogDescription = decodeHtmlEntities(
2240
+ $('meta[property="og:description"]').attr("content") ?? ""
2241
+ );
2242
+ const ogTitle = decodeHtmlEntities($('meta[property="og:title"]').attr("content") ?? "");
2243
+ return { ogDescription, ogTitle };
2244
+ }
2245
+ function normalizeExtraction(data, parsed, pageMeta = {}, pageHtml = "") {
2246
+ if (!data) return null;
2247
+ data = { ...data, media: filterValidMedia(data.media) };
2248
+ if (!data.media.length && !data.isPrivate) return null;
2249
+ const html = pageMeta.html ?? pageHtml;
2250
+ const scrapedCaption = scrapeCaptionFromHtml(html);
2251
+ let ogCaption = "";
2252
+ let username = data.username;
2253
+ let engagement = data.engagement ?? {};
2254
+ if (pageMeta.ogDescription) {
2255
+ const parsedDesc = parseInstagramDescription(pageMeta.ogDescription);
2256
+ ogCaption = parsedDesc.caption;
2257
+ if (parsedDesc.username) username = parsedDesc.username;
2258
+ engagement = { ...engagement, ...parsedDesc.engagement };
2259
+ }
2260
+ let caption = resolveCaptionForContent(parsed.type, {
2261
+ embed: pageMeta.embedCaption,
2262
+ scraped: scrapedCaption,
2263
+ extracted: data.caption,
2264
+ og: ogCaption
2265
+ });
2266
+ if (pageMeta.ogTitle) {
2267
+ const fromTitle = parseInstagramTitle(pageMeta.ogTitle);
2268
+ if (fromTitle.username && !username) {
2269
+ username = fromTitle.username;
2270
+ }
2271
+ if (fromTitle.caption) {
2272
+ caption = parsed.type === "highlight" ? normalizeCaptionText(fromTitle.caption) || caption : resolveCaptionForContent(parsed.type, {
2273
+ embed: pageMeta.embedCaption,
2274
+ scraped: scrapedCaption,
2275
+ extracted: caption,
2276
+ og: fromTitle.caption
2277
+ });
2278
+ }
2279
+ }
2280
+ if (parsed.type === "story") {
2281
+ if (parsed.username) username = parsed.username;
2282
+ if (/^\d[\d.KMB,\s]*(?:likes?|comments?|views?)/i.test(caption)) {
2283
+ const parsedDesc = parseInstagramDescription(caption);
2284
+ caption = parsedDesc.caption;
2285
+ engagement = { ...engagement, ...parsedDesc.engagement };
2286
+ }
2287
+ data.media = data.media.filter((m) => !isStoryProfileImage(m.url));
2288
+ }
2289
+ if (parsed.username && (parsed.type === "story" || parsed.type === "highlight")) {
2290
+ username = parsed.username;
2291
+ }
2292
+ const skipHtmlEnrich = !html && (parsed.type === "story" || parsed.type === "highlight");
2293
+ const enrichedMedia = (skipHtmlEnrich ? data.media : applyPageHtmlToMedia(data.media, html)).map(
2294
+ (m) => {
2295
+ if (m.type !== "image" || m.width && m.height) return m;
2296
+ const dims = dimensionsFromImageUrl(m.url);
2297
+ if (!dims.width || !dims.height) return m;
2298
+ return { ...m, width: dims.width, height: dims.height };
2299
+ }
2300
+ );
2301
+ const media = pickBestMedia(enrichedMedia, parsed.type, html);
2302
+ const tags = [
2303
+ .../* @__PURE__ */ new Set([
2304
+ ...data.tags ?? [],
2305
+ ...buildEngagementTags(parsed.type, pageMeta.ogDescription, engagement, html)
2306
+ ])
2307
+ ];
2308
+ if (tags.includes("likes_hidden")) engagement.likesHidden = true;
2309
+ if (tags.includes("comments_hidden")) engagement.commentsHidden = true;
2310
+ const hasEngagementMetrics = engagement.likes != null || engagement.comments != null || engagement.views != null || engagement.shares != null || Boolean(engagement.raw);
2311
+ const hasEngagement = hasEngagementMetrics || engagement.likesHidden || engagement.commentsHidden;
2312
+ return {
2313
+ ...data,
2314
+ media,
2315
+ caption: parsed.type === "post" ? normalizePostCaptionText(caption) : normalizeCaptionText(caption),
2316
+ username: username.replace(/^@/, "").trim(),
2317
+ engagement: hasEngagement ? engagement : void 0,
2318
+ tags: tags.length ? tags : void 0
2319
+ };
2320
+ }
2321
+
2322
+ // src/network/post-media.ts
2323
+ init_esm_shims();
2324
+ var REDIRECT_TIMEOUT_MS = 6e3;
2325
+ function dimensionsFromRedirectUrl(url) {
2326
+ const p = url.match(/p(\d+)x(\d+)/i);
2327
+ if (p) {
2328
+ return { width: parseInt(p[1], 10), height: parseInt(p[2], 10) };
2329
+ }
2330
+ return dimensionsFromImageUrl(url);
2331
+ }
2332
+ async function fetchPostLargeImageUrl(shortcode, timeoutMs = REDIRECT_TIMEOUT_MS) {
2333
+ const pageUrl = `https://www.instagram.com/p/${shortcode}/media/?size=l`;
2334
+ const controller = new AbortController();
2335
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
2336
+ try {
2337
+ const response = await request(pageUrl, {
2338
+ method: "GET",
2339
+ headers: buildInstagramPageHeaders(),
2340
+ signal: controller.signal,
2341
+ // undici supports this; @types omit maxRedirections on RequestOptions
2342
+ maxRedirections: 0
2343
+ });
2344
+ await response.body.text();
2345
+ if (response.statusCode !== 301 && response.statusCode !== 302) return null;
2346
+ const raw = response.headers.location;
2347
+ const target = (Array.isArray(raw) ? raw[0] : raw)?.trim();
2348
+ if (!target || !isValidMediaUrl(target) || isLowResDeliveryUrl(target) || !/\.(jpe?g|webp|png)/i.test(target.split("?")[0] ?? "")) {
2349
+ return null;
2350
+ }
2351
+ return { url: target, ...dimensionsFromRedirectUrl(target) };
2352
+ } catch {
2353
+ return null;
2354
+ } finally {
2355
+ clearTimeout(timer);
2356
+ }
2357
+ }
2358
+ function applyLargePostImage(media, large) {
2359
+ if (!isValidMediaUrl(large.url)) return media;
2360
+ const multiImage = media.filter((m) => m.type === "image").length > 1;
2361
+ let firstImageUpgraded = false;
2362
+ return media.map((item) => {
2363
+ if (item.type !== "image") return item;
2364
+ if (multiImage && firstImageUpgraded) return item;
2365
+ const fromUrl = dimensionsFromImageUrl(item.url);
2366
+ const width = Math.max(item.width ?? 0, large.width ?? 0, fromUrl.width ?? 0) || item.width;
2367
+ const height = Math.max(item.height ?? 0, large.height ?? 0, fromUrl.height ?? 0) || item.height;
2368
+ firstImageUpgraded = true;
2369
+ return { ...item, url: large.url, width, height };
2370
+ });
2371
+ }
2372
+
2373
+ // src/network/embed-caption.ts
2374
+ init_esm_shims();
2375
+ init_caption_normalize();
2376
+ import * as cheerio3 from "cheerio";
2377
+ function captionedEmbedUrl(parsed) {
2378
+ if (!parsed.shortcode) return null;
2379
+ const segment = parsed.type === "reel" ? "reel" : parsed.type === "tv" ? "tv" : "p";
2380
+ return `https://www.instagram.com/${segment}/${parsed.shortcode}/embed/captioned/`;
2381
+ }
2382
+ function parseCaptionFromCaptionedEmbed(html, contentType = "reel") {
2383
+ const $ = cheerio3.load(html);
2384
+ const block = $(".Caption").first().text().trim();
2385
+ if (!block) return "";
2386
+ const user = $(".CaptionUsername").first().text().trim();
2387
+ let caption = block;
2388
+ if (user && caption.startsWith(user)) {
2389
+ caption = caption.slice(user.length);
2390
+ }
2391
+ caption = caption.replace(/View all [\d,.]+[KMB]?\s+comments?.*$/i, "").trim();
2392
+ if (contentType === "post") {
2393
+ caption = caption.replace(/\n{3,}/g, "\n\n").trim();
2394
+ } else {
2395
+ caption = caption.replace(/\n+/g, " ").replace(/\s+/g, " ").trim();
2396
+ }
2397
+ return normalizeCaptionText(caption);
2398
+ }
2399
+ function contentTypesWithEmbedCaption(type) {
2400
+ return type === "reel" || type === "post" || type === "tv";
2401
+ }
2402
+
2403
+ // src/utils/media-id.ts
2404
+ init_esm_shims();
2405
+ function extractMediaPkFromHtml(html) {
2406
+ if (!html) return null;
2407
+ const cacheKey = html.match(/ig_cache_key=([A-Za-z0-9%_+=]+)/i)?.[1];
2408
+ if (cacheKey) {
2409
+ try {
2410
+ const decoded = Buffer.from(
2411
+ decodeURIComponent(cacheKey.replace(/ /g, "+")),
2412
+ "base64"
2413
+ ).toString("utf8");
2414
+ if (/^\d{10,}$/.test(decoded)) return decoded;
2415
+ } catch {
2416
+ }
2417
+ }
2418
+ const patterns = [
2419
+ /data-media-id="(\d+)"/,
2420
+ /data-media-id\\":\\"(\d+)\\"/,
2421
+ /"media_id":"(\d+)"/,
2422
+ /"pk":"(\d+)"/,
2423
+ /"pk":"(\d+)"[^}]{0,120}"media_type":\s*2/
2424
+ ];
2425
+ for (const re of patterns) {
2426
+ const m = html.match(re);
2427
+ if (m?.[1]) return m[1];
2428
+ }
2429
+ return null;
2430
+ }
2431
+
2432
+ // src/utils/post-media-pk.ts
2433
+ init_esm_shims();
2434
+
2435
+ // src/utils/shortcode.ts
2436
+ init_esm_shims();
2437
+ var SHORTCODE_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
2438
+ function shortcodeToMediaPk(shortcode) {
2439
+ if (!shortcode?.trim()) return null;
2440
+ try {
2441
+ let id = 0n;
2442
+ for (const char of shortcode.trim()) {
2443
+ const idx = SHORTCODE_ALPHABET.indexOf(char);
2444
+ if (idx < 0) return null;
2445
+ id = id * 64n + BigInt(idx);
2446
+ }
2447
+ return id > 0n ? id.toString() : null;
2448
+ } catch {
2449
+ return null;
2450
+ }
2451
+ }
2452
+
2453
+ // src/utils/post-media-pk.ts
2454
+ function postMediaPkCandidates(shortcode, html = "", media = []) {
2455
+ const seen = /* @__PURE__ */ new Set();
2456
+ const add = (pk) => {
2457
+ if (pk && !seen.has(pk)) {
2458
+ seen.add(pk);
2459
+ return pk;
2460
+ }
2461
+ return null;
2462
+ };
2463
+ const out = [];
2464
+ const fromShortcode = add(shortcodeToMediaPk(shortcode));
2465
+ if (fromShortcode) {
2466
+ out.push(fromShortcode);
2467
+ return out;
2468
+ }
2469
+ const fromHtml = add(extractMediaPkFromHtml(html));
2470
+ if (fromHtml) out.push(fromHtml);
2471
+ for (const item of media) {
2472
+ const fromUrl = add(extractMediaPkFromHtml(item.url));
2473
+ if (fromUrl) out.push(fromUrl);
2474
+ }
2475
+ return out;
2476
+ }
2477
+
2478
+ // src/utils/post-carousel-detect.ts
2479
+ init_esm_shims();
2480
+ function htmlIndicatesCarouselPost(html) {
2481
+ if (!html) return false;
2482
+ if (/edge_sidecar_to_children/.test(html)) return true;
2483
+ if (/"carousel_media"\s*:/.test(html)) return true;
2484
+ if (/carousel_media_count/.test(html)) return true;
2485
+ if (/GraphSidecar/.test(html)) return true;
2486
+ if (/CAROUSEL_ITEM/.test(html)) return true;
2487
+ if (/media_type["\s]*:["\s]*8\b/.test(html)) return true;
2488
+ if (/product_type["\s]*:["\s]*carousel/.test(html)) return true;
2489
+ return false;
2490
+ }
2491
+ function buildPostContentTags(parsedType, mediaCount, html, hasSession) {
2492
+ if (parsedType !== "post") return [];
2493
+ if (mediaCount > 1) return ["carousel"];
2494
+ if (!htmlIndicatesCarouselPost(html)) return [];
2495
+ const tags = ["partial_carousel"];
2496
+ if (!hasSession) tags.push("session_recommended");
2497
+ return tags;
2498
+ }
2499
+ function mergeResultTags(existing, content) {
2500
+ const merged = [.../* @__PURE__ */ new Set([...existing ?? [], ...content])];
2501
+ return merged.length ? merged : void 0;
2502
+ }
2503
+
2504
+ // src/utils/urls.ts
2505
+ init_esm_shims();
2506
+ var IG_HOST = /^(?:www\.)?instagram\.com$/i;
2507
+ function decodeShareSlug(slug) {
2508
+ try {
2509
+ const decoded = Buffer.from(slug, "base64").toString("utf8");
2510
+ const match = decoded.match(/^highlight:(\d+)$/);
2511
+ if (match) return { type: "highlight", id: match[1] };
2512
+ } catch {
2513
+ return null;
2514
+ }
2515
+ return null;
2516
+ }
2517
+ function buildNormalizedPath(pathname) {
2518
+ let path2 = pathname.replace(/\/+$/, "");
2519
+ if (!path2.endsWith("/")) path2 += "/";
2520
+ return `https://www.instagram.com${path2}`;
2521
+ }
2522
+ function normalizeInstagramUrl(input) {
2523
+ return parseInstagramUrl(input).normalized;
2524
+ }
2525
+ function parseInstagramUrl(input) {
2526
+ let raw = input.trim();
2527
+ if (!/^https?:\/\//i.test(raw)) {
2528
+ const pathOnly = !raw.includes("instagram.com");
2529
+ raw = pathOnly ? `https://www.instagram.com/${raw.replace(/^\//, "")}` : `https://${raw.replace(/^\//, "")}`;
2530
+ }
2531
+ let url;
2532
+ try {
2533
+ url = new URL(raw);
2534
+ } catch {
2535
+ return { type: "unknown", normalized: raw };
2536
+ }
2537
+ if (!IG_HOST.test(url.hostname)) {
2538
+ return { type: "unknown", normalized: raw };
2539
+ }
2540
+ const storyMediaId = url.searchParams.get("story_media_id") ?? void 0;
2541
+ const parts = url.pathname.split("/").filter(Boolean);
2542
+ if (parts[0] === "s" && parts[1]) {
2543
+ const share = decodeShareSlug(parts[1]);
2544
+ if (share?.type === "highlight") {
2545
+ return {
2546
+ type: "highlight",
2547
+ highlightId: share.id,
2548
+ storyMediaId,
2549
+ normalized: buildNormalizedPath(`/stories/highlights/${share.id}`)
2550
+ };
2551
+ }
2552
+ }
2553
+ if (parts[0] === "reel" && parts[1]) {
2554
+ return {
2555
+ type: "reel",
2556
+ shortcode: parts[1],
2557
+ normalized: buildNormalizedPath(`/reel/${parts[1]}`)
2558
+ };
2559
+ }
2560
+ if (parts[0] === "p" && parts[1]) {
2561
+ return {
2562
+ type: "post",
2563
+ shortcode: parts[1],
2564
+ normalized: buildNormalizedPath(`/p/${parts[1]}`)
2565
+ };
2566
+ }
2567
+ if (parts[0] === "tv" && parts[1]) {
2568
+ return {
2569
+ type: "tv",
2570
+ shortcode: parts[1],
2571
+ normalized: buildNormalizedPath(`/tv/${parts[1]}`)
2572
+ };
2573
+ }
2574
+ if (parts[0] === "stories" && parts[1]) {
2575
+ if (parts[1] === "highlights" && parts[2]) {
2576
+ return {
2577
+ type: "highlight",
2578
+ highlightId: parts[2],
2579
+ storyMediaId,
2580
+ normalized: buildNormalizedPath(`/stories/highlights/${parts[2]}`)
2581
+ };
2582
+ }
2583
+ if (parts[2]) {
2584
+ return {
2585
+ type: "story",
2586
+ username: parts[1],
2587
+ storyId: parts[2],
2588
+ normalized: buildNormalizedPath(`/stories/${parts[1]}/${parts[2]}`)
2589
+ };
2590
+ }
2591
+ return {
2592
+ type: "story",
2593
+ username: parts[1],
2594
+ normalized: buildNormalizedPath(`/stories/${parts[1]}`)
2595
+ };
2596
+ }
2597
+ return { type: "unknown", normalized: buildNormalizedPath(url.pathname || "/") };
2598
+ }
2599
+ function isInstagramUrl(input) {
2600
+ try {
2601
+ const normalized = normalizeInstagramUrl(
2602
+ input.startsWith("http") ? input : `https://${input}`
2603
+ );
2604
+ const parsed = parseInstagramUrl(normalized);
2605
+ return parsed.type !== "unknown";
2606
+ } catch {
2607
+ return false;
2608
+ }
2609
+ }
2610
+
2611
+ // src/utils/validators.ts
2612
+ init_esm_shims();
2613
+ function validateUrl(input) {
2614
+ if (!input || typeof input !== "string") {
2615
+ return { valid: false, error: "URL must be a non-empty string" };
2616
+ }
2617
+ try {
2618
+ new URL(input.startsWith("http") ? input : `https://${input}`);
2619
+ } catch {
2620
+ return { valid: false, error: "Invalid URL format" };
2621
+ }
2622
+ if (!isInstagramUrl(input)) {
2623
+ return { valid: false, error: "Not a supported Instagram URL" };
2624
+ }
2625
+ const parsed = parseInstagramUrl(input);
2626
+ return {
2627
+ valid: true,
2628
+ type: parsed.type,
2629
+ normalized: parsed.normalized
2630
+ };
2631
+ }
2632
+
2633
+ // src/core/downloader.ts
2634
+ init_logger();
2635
+ var pendingDownloads = /* @__PURE__ */ new Map();
2636
+ function hasStoryVideo(media) {
2637
+ return media.some((m) => m.type === "video" && isCdnMediaUrl(m.url));
2638
+ }
2639
+ function hasReelVideo(media) {
2640
+ return media.some((m) => m.type === "video" && isCdnMediaUrl(m.url));
2641
+ }
2642
+ var DownloaderCore = class {
2643
+ client;
2644
+ cache;
2645
+ options;
2646
+ startTime = Date.now();
2647
+ semaphore;
2648
+ constructor(options = {}) {
2649
+ const fastMode = options.fastMode === true;
2650
+ const responseBudgetMs = fastMode ? 500 : options.responseBudgetMs;
2651
+ this.options = {
2652
+ cache: true,
2653
+ cacheTtlMs: 3e5,
2654
+ staleCacheTtlMs: 864e5,
2655
+ cacheMaxSize: 500,
2656
+ maxConcurrency: 100,
2657
+ timeoutMs: 8e3,
2658
+ retries: 2,
2659
+ userAgentRotation: true,
2660
+ ...options,
2661
+ responseBudgetMs,
2662
+ fastMode
2663
+ };
2664
+ if (responseBudgetMs) {
2665
+ this.options.retries = 0;
2666
+ this.options.cache = this.options.cache !== false;
2667
+ }
2668
+ this.client = new HttpClient({
2669
+ timeoutMs: this.options.timeoutMs,
2670
+ retries: this.options.retries,
2671
+ userAgentRotation: this.options.userAgentRotation
2672
+ });
2673
+ this.cache = new ResponseCache({
2674
+ maxSize: this.options.cacheMaxSize,
2675
+ ttlMs: this.options.cacheTtlMs,
2676
+ staleTtlMs: this.options.staleCacheTtlMs,
2677
+ redis: this.options.redis
2678
+ });
2679
+ this.semaphore = {
2680
+ max: this.options.maxConcurrency ?? 100,
2681
+ current: 0,
2682
+ queue: []
2683
+ };
2684
+ }
2685
+ async acquire() {
2686
+ if (this.semaphore.current < this.semaphore.max) {
2687
+ this.semaphore.current++;
2688
+ return;
2689
+ }
2690
+ await new Promise((resolve) => {
2691
+ this.semaphore.queue.push(() => {
2692
+ this.semaphore.current++;
2693
+ resolve();
2694
+ });
2695
+ });
2696
+ }
2697
+ release() {
2698
+ this.semaphore.current--;
2699
+ const next = this.semaphore.queue.shift();
2700
+ if (next) next();
2701
+ }
2702
+ meta() {
2703
+ return { extractor: EXTRACTOR_NAME, version: PACKAGE_VERSION };
2704
+ }
2705
+ success(data) {
2706
+ return {
2707
+ code: 200,
2708
+ meta: this.meta(),
2709
+ media: data.media,
2710
+ caption: data.caption,
2711
+ username: data.username,
2712
+ engagement: data.engagement,
2713
+ tags: data.tags
2714
+ };
2715
+ }
2716
+ error(code, message, retryAfterMs) {
2717
+ return { code, message, meta: this.meta(), retryAfterMs };
2718
+ }
2719
+ startDownloadTask(cacheKey, parsed) {
2720
+ return (async () => {
2721
+ await this.acquire();
2722
+ try {
2723
+ const result = await this.fetchAndExtract(parsed);
2724
+ if (this.options.cache !== false && result.code === 200) {
2725
+ this.cache.set(cacheKey, result);
2726
+ }
2727
+ return result;
2728
+ } catch (err) {
2729
+ logger.error("Background extraction failed", err);
2730
+ return this.error(500, err instanceof Error ? err.message : "Extraction failed");
2731
+ } finally {
2732
+ this.release();
2733
+ }
2734
+ })();
2735
+ }
2736
+ /** API-only attempt for story/highlight (fits ~500ms budget). */
2737
+ async fetchFastExtract(parsed) {
2738
+ const budget = this.options.responseBudgetMs;
2739
+ if (!budget) return null;
2740
+ const sessionCookie = buildSessionCookie(
2741
+ this.options.sessionId,
2742
+ this.options.cookies
2743
+ );
2744
+ if (!sessionCookie || !sessionCookieReady(sessionCookie)) return null;
2745
+ const apiMs = Math.max(250, budget - 80);
2746
+ try {
2747
+ if (parsed.type === "story" && parsed.storyId) {
2748
+ const fromApi = await fetchMediaInfoByPk(
2749
+ parsed.storyId,
2750
+ sessionCookie,
2751
+ parsed.normalized,
2752
+ userIdFromCookie(sessionCookie) ?? void 0,
2753
+ apiMs
2754
+ );
2755
+ const media = filterValidMedia(fromApi?.media ?? []);
2756
+ if (!hasStoryVideo(media)) return null;
2757
+ const extracted = normalizeExtraction(
2758
+ {
2759
+ ...fromApi,
2760
+ media,
2761
+ caption: "",
2762
+ username: parsed.username ?? fromApi.username
2763
+ },
2764
+ parsed,
2765
+ {},
2766
+ ""
2767
+ );
2768
+ return extracted?.media.length ? this.success(extracted) : null;
2769
+ }
2770
+ if (parsed.type === "highlight" && parsed.storyMediaId) {
2771
+ const pk = parsed.storyMediaId.split("_")[0];
2772
+ const ownerId = parsed.storyMediaId.split("_")[1];
2773
+ const fromApi = await fetchMediaInfoByPk(
2774
+ pk,
2775
+ sessionCookie,
2776
+ parsed.normalized,
2777
+ ownerId,
2778
+ apiMs
2779
+ );
2780
+ const media = filterValidMedia(fromApi?.media ?? []);
2781
+ if (!media.some((m) => m.type === "video" && isCdnMediaUrl(m.url))) return null;
2782
+ const extracted = normalizeExtraction(
2783
+ {
2784
+ ...fromApi,
2785
+ media,
2786
+ caption: fromApi.caption ?? "",
2787
+ username: parsed.username ?? fromApi.username
2788
+ },
2789
+ parsed,
2790
+ {},
2791
+ ""
2792
+ );
2793
+ return extracted?.media.length ? this.success(extracted) : null;
2794
+ }
2795
+ } catch {
2796
+ return null;
2797
+ }
2798
+ return null;
2799
+ }
2800
+ /** Warm cache in background (full extraction, no response budget). */
2801
+ prefetch(url) {
2802
+ const validation = validateUrl(url);
2803
+ if (!validation.valid) {
2804
+ return Promise.resolve(this.error(400, validation.error ?? "Invalid URL"));
2805
+ }
2806
+ const parsed = parseInstagramUrl(url);
2807
+ const cacheKey = parsed.normalized;
2808
+ let task = pendingDownloads.get(cacheKey);
2809
+ if (!task) {
2810
+ task = this.startDownloadTask(cacheKey, parsed);
2811
+ pendingDownloads.set(cacheKey, task);
2812
+ void task.finally(() => pendingDownloads.delete(cacheKey));
2813
+ }
2814
+ return task;
2815
+ }
2816
+ async download(url) {
2817
+ const validation = validateUrl(url);
2818
+ if (!validation.valid) {
2819
+ return this.error(400, validation.error ?? "Invalid URL");
2820
+ }
2821
+ const parsed = parseInstagramUrl(url);
2822
+ const cacheKey = parsed.normalized;
2823
+ const budget = this.options.responseBudgetMs;
2824
+ if (this.options.cache !== false) {
2825
+ const fresh = this.cache.getFreshSync(cacheKey);
2826
+ if (fresh) {
2827
+ logger.debug(`Cache hit (sync) for ${cacheKey}`);
2828
+ return fresh;
2829
+ }
2830
+ const stale = this.cache.getStaleSync(cacheKey);
2831
+ if (stale) {
2832
+ logger.debug(`Stale cache hit for ${cacheKey}`);
2833
+ this.ensureBackgroundFetch(cacheKey, parsed);
2834
+ return stale;
2835
+ }
2836
+ if (!budget) {
2837
+ const cached = await this.cache.get(cacheKey);
2838
+ if (cached) {
2839
+ logger.debug(`Cache hit for ${cacheKey}`);
2840
+ return cached;
2841
+ }
2842
+ }
2843
+ }
2844
+ const inFlight2 = pendingDownloads.get(cacheKey);
2845
+ if (inFlight2) {
2846
+ return inFlight2;
2847
+ }
2848
+ const task = this.startDownloadTask(cacheKey, parsed);
2849
+ pendingDownloads.set(cacheKey, task);
2850
+ void task.finally(() => pendingDownloads.delete(cacheKey));
2851
+ if (!budget) {
2852
+ return task;
2853
+ }
2854
+ const fastAttempt = this.fetchFastExtract(parsed);
2855
+ const raced = await Promise.race([
2856
+ fastAttempt,
2857
+ task,
2858
+ sleep(budget).then(() => null)
2859
+ ]);
2860
+ if (raced) {
2861
+ if (raced.code === 200 && this.options.cache !== false) {
2862
+ this.cache.set(cacheKey, raced);
2863
+ }
2864
+ return raced;
2865
+ }
2866
+ const retryAfterMs = Math.max(200, Math.min(500, budget));
2867
+ return this.error(
2868
+ 503,
2869
+ "Response budget exceeded \u2014 fetch still running; retry the same URL immediately",
2870
+ retryAfterMs
2871
+ );
2872
+ }
2873
+ ensureBackgroundFetch(cacheKey, parsed) {
2874
+ if (pendingDownloads.has(cacheKey)) return;
2875
+ const task = this.startDownloadTask(cacheKey, parsed);
2876
+ pendingDownloads.set(cacheKey, task);
2877
+ void task.finally(() => pendingDownloads.delete(cacheKey));
2878
+ }
2879
+ async fetchAndExtract(parsed) {
2880
+ const resolvedUrl = resolveFetchUrl(parsed);
2881
+ try {
2882
+ let sessionCookie = buildSessionCookie(
2883
+ this.options.sessionId,
2884
+ this.options.cookies
2885
+ );
2886
+ const sessionTypes = parsed.type === "story" || parsed.type === "highlight";
2887
+ const embedUrl = resolveEmbedUrl(parsed);
2888
+ let prefetchApiData = null;
2889
+ let body = "";
2890
+ let statusCode = 200;
2891
+ let embedBody = "";
2892
+ const ensureSession = async (cookie) => {
2893
+ if (sessionCookieReady(cookie)) return cookie;
2894
+ return enrichSessionCookie(cookie);
2895
+ };
2896
+ if (sessionCookie && parsed.type === "story" && parsed.storyId) {
2897
+ sessionCookie = await ensureSession(sessionCookie);
2898
+ const fromApi = await fetchMediaInfoByPk(
2899
+ parsed.storyId,
2900
+ sessionCookie,
2901
+ parsed.normalized,
2902
+ userIdFromCookie(sessionCookie) ?? void 0
2903
+ );
2904
+ const apiMedia = filterValidMedia(fromApi?.media ?? []);
2905
+ if (hasStoryVideo(apiMedia)) {
2906
+ const extracted2 = normalizeExtraction(
2907
+ {
2908
+ ...fromApi,
2909
+ media: apiMedia,
2910
+ caption: fromApi.caption ?? "",
2911
+ username: parsed.username ?? fromApi.username
2912
+ },
2913
+ parsed,
2914
+ {},
2915
+ ""
2916
+ );
2917
+ if (extracted2?.media.length) {
2918
+ return this.success(extracted2);
2919
+ }
2920
+ }
2921
+ prefetchApiData = fromApi;
2922
+ } else if (sessionCookie && parsed.type === "highlight" && parsed.storyMediaId) {
2923
+ sessionCookie = await ensureSession(sessionCookie);
2924
+ const pk = parsed.storyMediaId.split("_")[0];
2925
+ const ownerId = parsed.storyMediaId.split("_")[1];
2926
+ const [fromApi, pageRes] = await Promise.all([
2927
+ fetchMediaInfoByPk(pk, sessionCookie, parsed.normalized, ownerId),
2928
+ this.client.fetchWithCookie(resolvedUrl, sessionCookie)
2929
+ ]);
2930
+ body = pageRes.body;
2931
+ statusCode = pageRes.statusCode;
2932
+ const apiMedia = filterValidMedia(fromApi?.media ?? []);
2933
+ if (apiMedia.some((m) => m.type === "video" && isCdnMediaUrl(m.url))) {
2934
+ const pageMeta2 = extractPageMeta(body);
2935
+ const extracted2 = normalizeExtraction(
2936
+ {
2937
+ ...fromApi,
2938
+ media: apiMedia,
2939
+ caption: fromApi.caption ?? "",
2940
+ username: parsed.username ?? fromApi.username
2941
+ },
2942
+ parsed,
2943
+ { ...pageMeta2, html: body },
2944
+ body
2945
+ );
2946
+ if (extracted2?.media.length) {
2947
+ return this.success(extracted2);
2948
+ }
2949
+ }
2950
+ prefetchApiData = fromApi;
2951
+ } else if (sessionCookie && parsed.type === "post" && parsed.shortcode) {
2952
+ sessionCookie = await ensureSession(sessionCookie);
2953
+ const mediaPk = shortcodeToMediaPk(parsed.shortcode);
2954
+ const ownerId = userIdFromCookie(sessionCookie) ?? void 0;
2955
+ const pagePromise = this.client.fetchWithCookie(resolvedUrl, sessionCookie);
2956
+ if (mediaPk) {
2957
+ const [fromApi, pageRes] = await Promise.all([
2958
+ fetchMediaInfoByPk(mediaPk, sessionCookie, parsed.normalized, ownerId),
2959
+ pagePromise
2960
+ ]);
2961
+ prefetchApiData = fromApi;
2962
+ body = pageRes.body;
2963
+ statusCode = pageRes.statusCode;
2964
+ } else {
2965
+ const pageRes = await pagePromise;
2966
+ body = pageRes.body;
2967
+ statusCode = pageRes.statusCode;
2968
+ }
2969
+ }
2970
+ if (!body) {
2971
+ if (sessionCookie && sessionTypes) {
2972
+ sessionCookie = await ensureSession(sessionCookie);
2973
+ const res = await this.client.fetchWithCookie(resolvedUrl, sessionCookie);
2974
+ body = res.body;
2975
+ statusCode = res.statusCode;
2976
+ } else {
2977
+ const res = await this.client.fetch(resolvedUrl);
2978
+ body = res.body;
2979
+ statusCode = res.statusCode;
2980
+ }
2981
+ }
2982
+ if (statusCode === 404) {
2983
+ return this.error(404, "Media not found");
2984
+ }
2985
+ if (statusCode === 429) {
2986
+ return this.error(429, "Rate limited");
2987
+ }
2988
+ const pageMeta = extractPageMeta(body);
2989
+ const ctx = { html: body, url: resolvedUrl, parsed, sessionCookie };
2990
+ const prefetchedMedia = prefetchApiData ? filterValidMedia(prefetchApiData.media) : [];
2991
+ let extracted;
2992
+ if (parsed.type === "story" && prefetchedMedia.length && hasStoryVideo(prefetchedMedia)) {
2993
+ extracted = {
2994
+ ...prefetchApiData,
2995
+ media: prefetchedMedia,
2996
+ username: parsed.username ?? prefetchApiData.username
2997
+ };
2998
+ } else {
2999
+ extracted = await runExtractor(ctx);
3000
+ if (prefetchApiData) {
3001
+ extracted = mergeExtracted(extracted, prefetchApiData);
3002
+ }
3003
+ }
3004
+ if (parsed.type === "post" && prefetchApiData) {
3005
+ const apiMedia = filterValidMedia(prefetchApiData.media);
3006
+ const htmlCount = filterValidMedia(extracted?.media ?? []).length;
3007
+ if (apiMedia.length >= 2 || apiMedia.length > htmlCount) {
3008
+ extracted = {
3009
+ ...extracted ?? { media: [], caption: "", username: "" },
3010
+ ...prefetchApiData,
3011
+ media: apiMedia,
3012
+ caption: prefetchApiData.caption || extracted?.caption || "",
3013
+ username: prefetchApiData.username || extracted?.username || "",
3014
+ engagement: prefetchApiData.engagement ?? extracted?.engagement
3015
+ };
3016
+ }
3017
+ }
3018
+ const validExtracted = filterValidMedia(extracted?.media ?? []);
3019
+ if (parsed.type === "story" && sessionCookie && !hasStoryVideo(validExtracted)) {
3020
+ logger.debug("Trying story API with session cookie");
3021
+ const fromApi = await fetchStoryViaSession(parsed, sessionCookie, body);
3022
+ extracted = mergeExtracted(extracted, fromApi);
3023
+ }
3024
+ let embedCaption = "";
3025
+ let captionEmbedHtml = "";
3026
+ const captionEmbedUrl = contentTypesWithEmbedCaption(parsed.type) ? captionedEmbedUrl(parsed) : null;
3027
+ const enrichmentTasks = [];
3028
+ if (captionEmbedUrl) {
3029
+ enrichmentTasks.push(
3030
+ (async () => {
3031
+ try {
3032
+ const capRes = await this.client.fetch(captionEmbedUrl, false);
3033
+ captionEmbedHtml = capRes.body;
3034
+ embedCaption = parseCaptionFromCaptionedEmbed(capRes.body, parsed.type);
3035
+ } catch {
3036
+ }
3037
+ })()
3038
+ );
3039
+ }
3040
+ if (parsed.type === "post" && parsed.shortcode && imageNeedsHigherResolution(extracted?.media ?? [])) {
3041
+ enrichmentTasks.push(
3042
+ (async () => {
3043
+ const large = await fetchPostLargeImageUrl(parsed.shortcode);
3044
+ if (large && extracted?.media.length) {
3045
+ extracted = {
3046
+ ...extracted,
3047
+ media: applyLargePostImage(extracted.media, large)
3048
+ };
3049
+ }
3050
+ })()
3051
+ );
3052
+ }
3053
+ if (enrichmentTasks.length) {
3054
+ await Promise.all(enrichmentTasks);
3055
+ }
3056
+ if ((parsed.type === "reel" || parsed.type === "tv") && !hasReelVideo(extracted?.media ?? []) && captionEmbedHtml) {
3057
+ extracted = mergeExtracted(extracted, parseEmbedHtml(captionEmbedHtml));
3058
+ }
3059
+ const postHtml = body;
3060
+ const carouselHintEarly = parsed.type === "post" && htmlIndicatesCarouselPost(postHtml);
3061
+ const needPostEmbed = parsed.type === "post" && embedUrl && (postNeedsEmbedFetch(extracted?.media ?? []) || carouselHintEarly);
3062
+ const needReelEmbed = (parsed.type === "reel" || parsed.type === "tv") && embedUrl && needsVideoEmbedFallback(parsed, extracted);
3063
+ if (needPostEmbed || needReelEmbed) {
3064
+ logger.debug(`Fetching embed: ${embedUrl}`);
3065
+ const embedRes = await this.client.fetch(embedUrl, false);
3066
+ embedBody = embedRes.body;
3067
+ if (needReelEmbed) {
3068
+ const embedData = parseEmbedHtml(embedBody);
3069
+ extracted = mergeExtracted(extracted, embedData);
3070
+ } else {
3071
+ const { parseHtml: parseHtmlLayers } = await Promise.resolve().then(() => (init_parser(), parser_exports));
3072
+ const embedParsed = parseEmbedHtml(embedBody) ?? parseHtmlLayers(embedBody, "post");
3073
+ extracted = mergeExtracted(extracted, embedParsed);
3074
+ }
3075
+ }
3076
+ const dimensionHtmlPre = embedBody || captionEmbedHtml ? `${body}
3077
+ ${embedBody}
3078
+ ${captionEmbedHtml}` : body;
3079
+ if ((parsed.type === "reel" || parsed.type === "tv") && sessionCookie && !hasReelVideo(extracted?.media ?? [])) {
3080
+ const mediaPk = extractMediaPkFromHtml(dimensionHtmlPre);
3081
+ if (mediaPk) {
3082
+ logger.debug(`Reel API fallback for media pk=${mediaPk}`);
3083
+ const fromApi = await fetchMediaInfoByPk(
3084
+ mediaPk,
3085
+ sessionCookie,
3086
+ parsed.normalized
3087
+ );
3088
+ extracted = mergeExtracted(extracted, fromApi);
3089
+ }
3090
+ }
3091
+ if (parsed.type === "post" && sessionCookie && parsed.shortcode) {
3092
+ sessionCookie = await ensureSession(sessionCookie);
3093
+ const htmlSlideCount = extracted?.media.length ?? 0;
3094
+ const carouselHint = htmlIndicatesCarouselPost(dimensionHtmlPre);
3095
+ const pkCandidates = postMediaPkCandidates(
3096
+ parsed.shortcode,
3097
+ dimensionHtmlPre,
3098
+ extracted?.media ?? []
3099
+ );
3100
+ let bestApi = null;
3101
+ for (const mediaPk of pkCandidates) {
3102
+ logger.debug(`Post API carousel pk=${mediaPk}`);
3103
+ const fromApi = await fetchMediaInfoByPk(
3104
+ mediaPk,
3105
+ sessionCookie,
3106
+ parsed.normalized,
3107
+ userIdFromCookie(sessionCookie) ?? void 0
3108
+ );
3109
+ const apiMedia2 = filterValidMedia(fromApi?.media ?? []);
3110
+ if (fromApi && apiMedia2.length > (bestApi?.media.length ?? 0)) {
3111
+ bestApi = { ...fromApi, media: apiMedia2 };
3112
+ }
3113
+ }
3114
+ const apiMedia = filterValidMedia(bestApi?.media ?? []);
3115
+ const apiImprovesCarousel = apiMedia.length >= 2 || apiMedia.length > htmlSlideCount || carouselHint && apiMedia.length > htmlSlideCount;
3116
+ if (bestApi && apiImprovesCarousel) {
3117
+ extracted = {
3118
+ ...extracted,
3119
+ ...bestApi,
3120
+ media: apiMedia,
3121
+ caption: bestApi.caption || extracted?.caption || "",
3122
+ username: bestApi.username || extracted?.username || "",
3123
+ engagement: bestApi.engagement ?? extracted?.engagement
3124
+ };
3125
+ }
3126
+ }
3127
+ const storyEmbedUrl = resolveStoryEmbedUrl(parsed);
3128
+ if (storyEmbedUrl && parsed.type === "story" && !hasStoryVideo(extracted?.media ?? [])) {
3129
+ logger.debug(`Fetching story embed: ${storyEmbedUrl}`);
3130
+ const storyEmbedRes = await this.client.fetch(storyEmbedUrl, false);
3131
+ const storyEmbedData = parseEmbedHtml(storyEmbedRes.body);
3132
+ extracted = mergeExtracted(extracted, storyEmbedData);
3133
+ if (!extracted?.media.some((m) => m.type === "video")) {
3134
+ const { parseHtml: parseHtml2 } = await Promise.resolve().then(() => (init_parser(), parser_exports));
3135
+ extracted = mergeExtracted(extracted, parseHtml2(storyEmbedRes.body));
3136
+ }
3137
+ }
3138
+ const dimensionHtml = dimensionHtmlPre;
3139
+ extracted = normalizeExtraction(
3140
+ extracted,
3141
+ parsed,
3142
+ { ...pageMeta, html: dimensionHtml, embedCaption },
3143
+ dimensionHtml
3144
+ );
3145
+ if (extracted && parsed.type === "post") {
3146
+ const contentTags = buildPostContentTags(
3147
+ parsed.type,
3148
+ extracted.media.length,
3149
+ dimensionHtml,
3150
+ Boolean(sessionCookie)
3151
+ );
3152
+ extracted = {
3153
+ ...extracted,
3154
+ tags: mergeResultTags(extracted.tags, contentTags)
3155
+ };
3156
+ }
3157
+ if (extracted?.media.length) {
3158
+ extracted = {
3159
+ ...extracted,
3160
+ media: extracted.media.map((m) => {
3161
+ if (m.type !== "video" || isValidThumbnailUrl(m.thumbnail)) return m;
3162
+ const { thumbnail: _t, ...rest } = m;
3163
+ return rest;
3164
+ })
3165
+ };
3166
+ }
3167
+ if (extracted?.isPrivate) {
3168
+ return this.error(403, "Private account");
3169
+ }
3170
+ if (!extracted?.media.length) {
3171
+ const msg = parsed.type === "story" ? sessionCookie ? "Story not found or expired (check sessionId and that the story is still active)" : "Story media requires sessionId \u2014 Instagram does not expose story CDN URLs to logged-out requests. Pass options.sessionId from a logged-in browser cookie." : parsed.type === "reel" || parsed.type === "tv" ? sessionCookie ? "Reel video not found \u2014 reel may be unavailable or session expired" : "Reel video requires INSTAGRAM_SESSION_ID or INSTAGRAM_COOKIES \u2014 Instagram no longer exposes reel MP4 URLs in public HTML" : "Media not found";
3172
+ return this.error(404, msg);
3173
+ }
3174
+ return this.success(extracted);
3175
+ } catch (err) {
3176
+ const code = err?.code ?? err?.statusCode;
3177
+ if (code === 429) return this.error(429, "Rate limited");
3178
+ if (err instanceof Error && err.name === "AbortError") {
3179
+ return this.error(504, "Request timed out");
3180
+ }
3181
+ logger.error("Extraction failed", err);
3182
+ return this.error(500, err instanceof Error ? err.message : "Extraction failed");
3183
+ }
3184
+ }
3185
+ async info(url) {
3186
+ return this.download(url);
3187
+ }
3188
+ async validate(url) {
3189
+ return validateUrl(url);
3190
+ }
3191
+ async media(url) {
3192
+ const result = await this.download(url);
3193
+ if (result.code !== 200) return result;
3194
+ return result.media;
3195
+ }
3196
+ async batch(urls) {
3197
+ const tasks = urls.map(async (url) => {
3198
+ const start = Date.now();
3199
+ const result = await this.download(url);
3200
+ return { url, result, durationMs: Date.now() - start };
3201
+ });
3202
+ return Promise.all(tasks);
3203
+ }
3204
+ async health() {
3205
+ const stats = this.cache.getStats();
3206
+ const pool = getPoolStats();
3207
+ return {
3208
+ status: "ok",
3209
+ version: PACKAGE_VERSION,
3210
+ uptime: Date.now() - this.startTime,
3211
+ cache: {
3212
+ size: stats.size,
3213
+ maxSize: stats.maxSize,
3214
+ hitRate: stats.hitRate
3215
+ },
3216
+ pool: {
3217
+ connections: pool.connections,
3218
+ pending: this.client.getInFlightCount()
3219
+ }
3220
+ };
3221
+ }
3222
+ clearCache() {
3223
+ this.cache.clear();
3224
+ }
3225
+ };
3226
+
3227
+ // src/index.ts
3228
+ var ultraigdl = class {
3229
+ core;
3230
+ constructor(options) {
3231
+ this.core = new DownloaderCore(options);
3232
+ }
3233
+ download(url) {
3234
+ return this.core.download(url);
3235
+ }
3236
+ info(url) {
3237
+ return this.core.info(url);
3238
+ }
3239
+ validate(url) {
3240
+ return this.core.validate(url);
3241
+ }
3242
+ media(url) {
3243
+ return this.core.media(url);
3244
+ }
3245
+ batch(urls) {
3246
+ return this.core.batch(urls);
3247
+ }
3248
+ health() {
3249
+ return this.core.health();
3250
+ }
3251
+ clearCache() {
3252
+ this.core.clearCache();
3253
+ }
3254
+ /** Start extraction now so the next `download()` can return within `responseBudgetMs`. */
3255
+ prefetch(url) {
3256
+ return this.core.prefetch(url);
3257
+ }
3258
+ };
3259
+ var src_default = ultraigdl;
3260
+ export {
3261
+ DownloaderCore,
3262
+ EXTRACTOR_NAME,
3263
+ PACKAGE_VERSION,
3264
+ src_default as default,
3265
+ isInstagramUrl,
3266
+ parseInstagramUrl,
3267
+ ultraigdl,
3268
+ validateUrl
3269
+ };
3270
+ //# sourceMappingURL=index.js.map