@chilfish/gallery-dl-instagram 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2246 @@
1
+ //#region src/config.ts
2
+ var ConfigManager = class {
3
+ data;
4
+ constructor(data = {}) {
5
+ this.data = data;
6
+ }
7
+ /**
8
+ * Read a value at a dot-path like ``'extractor.instagram.videos'``.
9
+ * Returns ``undefined`` when the path doesn't exist.
10
+ */
11
+ get(path, defaultValue) {
12
+ const keys = path.split(".");
13
+ let node = this.data;
14
+ for (const key of keys) {
15
+ if (node == null || typeof node !== "object" || Array.isArray(node)) return defaultValue;
16
+ node = node[key];
17
+ }
18
+ if (node === void 0) return defaultValue;
19
+ return node;
20
+ }
21
+ /**
22
+ * Interpolate a config key through a hierarchy of paths.
23
+ */
24
+ interpolate(cfgPath, key, defaultVal) {
25
+ let node = this.data;
26
+ for (let i = 0; i < cfgPath.length; i++) {
27
+ if (node != null && typeof node === "object" && !Array.isArray(node)) {
28
+ const v = node[key];
29
+ if (v !== void 0) return v;
30
+ }
31
+ if (node == null || typeof node !== "object" || Array.isArray(node)) break;
32
+ node = node[cfgPath[i]];
33
+ }
34
+ return defaultVal;
35
+ }
36
+ /**
37
+ * Mutate the config at a given dot-path.
38
+ */
39
+ set(path, value) {
40
+ const keys = path.split(".");
41
+ let node = this.data;
42
+ for (let i = 0; i < keys.length - 1; i++) {
43
+ const key = keys[i];
44
+ let child = node[key];
45
+ if (child == null || typeof child !== "object" || Array.isArray(child)) {
46
+ child = {};
47
+ node[key] = child;
48
+ }
49
+ node = child;
50
+ }
51
+ node[keys[keys.length - 1]] = value;
52
+ }
53
+ };
54
+ //#endregion
55
+ //#region src/core/extractor.ts
56
+ /** A no-op logger */
57
+ const noopLogger = {
58
+ debug: () => {},
59
+ info: () => {},
60
+ warn: () => {},
61
+ error: () => {}
62
+ };
63
+ var Extractor = class {
64
+ /** Regex pattern to match against URLs */
65
+ static pattern = /^$/;
66
+ /** The input URL */
67
+ url;
68
+ /** Regex match groups from ``fromURL`` */
69
+ groups;
70
+ config;
71
+ /** HTTP client — public so Job can access for downloads */
72
+ http;
73
+ /** Storage backend — public so Job can access for writes */
74
+ storage;
75
+ /** Logger instance — public so Job can access for reporting */
76
+ log;
77
+ /** Delay range in seconds — random between [min, max] before each request */
78
+ requestInterval = [6, 12];
79
+ _initialized = false;
80
+ constructor(opts) {
81
+ this.url = opts.url;
82
+ this.groups = opts.match ? [...opts.match].slice(1) : [];
83
+ this.config = opts.config;
84
+ this.http = opts.http;
85
+ this.storage = opts.storage;
86
+ this.log = opts.log;
87
+ }
88
+ /** Initialization */
89
+ /**
90
+ * One-time async setup (cookies, session, internal state).
91
+ * Safe to call multiple times — after the first call it becomes a no-op.
92
+ */
93
+ async initialize() {
94
+ if (this._initialized) return;
95
+ await this._init();
96
+ this._initialized = true;
97
+ this.initialize = async () => {};
98
+ }
99
+ /**
100
+ * Subclass hook for one-time setup.
101
+ */
102
+ async _init() {}
103
+ /** Async iteration */
104
+ async *[Symbol.asyncIterator]() {
105
+ await this.initialize();
106
+ yield* this.items();
107
+ }
108
+ /** Config helpers */
109
+ /**
110
+ * Read a config value using the interpolated hierarchy.
111
+ */
112
+ _cfg(key, defaultVal) {
113
+ const path = [
114
+ "extractor",
115
+ this.category,
116
+ this.subcategory
117
+ ];
118
+ return this.config.interpolate(path, key, defaultVal);
119
+ }
120
+ /** HTTP */
121
+ _lastRequestTime = 0;
122
+ /**
123
+ * Rate-limited HTTP request wrapper.
124
+ */
125
+ async request(url, cfg = {}) {
126
+ await this._throttle();
127
+ const response = await this.http.request({
128
+ url,
129
+ ...cfg
130
+ });
131
+ this._lastRequestTime = Date.now();
132
+ return response;
133
+ }
134
+ /**
135
+ * Convenience: request + parse JSON body.
136
+ */
137
+ async requestJSON(url, cfg = {}) {
138
+ const resp = await this.request(url, cfg);
139
+ if (typeof resp.data === "object") return resp.data;
140
+ try {
141
+ return JSON.parse(resp.data);
142
+ } catch {
143
+ return {};
144
+ }
145
+ }
146
+ /** Rate limiting */
147
+ /**
148
+ * Sleep long enough to keep the minimum interval between requests.
149
+ */
150
+ async _throttle() {
151
+ const elapsed = Date.now() - this._lastRequestTime;
152
+ const [min, max] = this.requestInterval;
153
+ const target = min + Math.random() * (max - min);
154
+ const waitMs = Math.max(0, target * 1e3 - elapsed);
155
+ if (waitMs > 0) await new Promise((r) => setTimeout(r, waitMs));
156
+ }
157
+ /** Utility */
158
+ /**
159
+ * Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
160
+ */
161
+ parseTimestamp(ts) {
162
+ if (ts == null) return "";
163
+ const asMs = ts > 25e8 ? ts : ts * 1e3;
164
+ return new Date(asMs).toISOString();
165
+ }
166
+ /**
167
+ * Generate a random hex token (used for CSRF).
168
+ */
169
+ static generateToken(size = 16) {
170
+ const bytes = new Uint8Array(size);
171
+ if (typeof crypto !== "undefined" && crypto.getRandomValues) crypto.getRandomValues(bytes);
172
+ else for (let i = 0; i < size; i++) bytes[i] = Math.floor(Math.random() * 256);
173
+ return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
174
+ }
175
+ };
176
+ //#endregion
177
+ //#region src/core/job.ts
178
+ function formatBytes(bytes) {
179
+ if (bytes === 0) return "0 B";
180
+ const units = [
181
+ "B",
182
+ "KB",
183
+ "MB",
184
+ "GB"
185
+ ];
186
+ const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
187
+ return `${(bytes / 1024 ** i).toFixed(i === 0 ? 0 : 1)} ${units[i]}`;
188
+ }
189
+ const BOLD = "\x1B[1m";
190
+ const DIM = "\x1B[2m";
191
+ const CYAN = "\x1B[36m";
192
+ const GREEN = "\x1B[32m";
193
+ const YELLOW = "\x1B[33m";
194
+ const RESET = "\x1B[0m";
195
+ function b(s) {
196
+ return `${BOLD}${s}${RESET}`;
197
+ }
198
+ function dim(s) {
199
+ return `${DIM}${s}${RESET}`;
200
+ }
201
+ function c(s) {
202
+ return `${CYAN}${s}${RESET}`;
203
+ }
204
+ function g(s) {
205
+ return `${GREEN}${s}${RESET}`;
206
+ }
207
+ function pad(s, n) {
208
+ return s.length >= n ? s : s + " ".repeat(n - s.length);
209
+ }
210
+ var Job = class {
211
+ extractor;
212
+ status = 0;
213
+ constructor(extractor) {
214
+ this.extractor = extractor;
215
+ }
216
+ /**
217
+ * Main entry point. Calls ``extractor[Symbol.asyncIterator]()`` and
218
+ * dispatches every yielded message.
219
+ */
220
+ async run() {
221
+ this.extractor.log.info(`Starting ${this.extractor.category}/${this.extractor.subcategory} — ${this.extractor.url}`);
222
+ await this.extractor.initialize();
223
+ for await (const msg of this.extractor) switch (msg.type) {
224
+ case "directory":
225
+ await this.handleDirectory(msg);
226
+ break;
227
+ case "url":
228
+ await this.handleUrl(msg);
229
+ break;
230
+ case "queue":
231
+ await this.handleQueue(msg);
232
+ break;
233
+ }
234
+ this._report();
235
+ return this.status;
236
+ }
237
+ /** Override in subclasses to print a summary. */
238
+ _report() {}
239
+ };
240
+ var DownloadJob = class DownloadJob extends Job {
241
+ /** Base output directory (prepended to all paths). */
242
+ basePath = "";
243
+ /** Current target directory metadata (set by directory messages). */
244
+ _currentDir = {};
245
+ /** In-memory archive keyed by archive format. */
246
+ archive = /* @__PURE__ */ new Map();
247
+ /**
248
+ * Registry of per-category "archive formats" — the key is formed
249
+ * by interpolating this format string over the metadata.
250
+ */
251
+ _archiveFmts = /* @__PURE__ */ new Map();
252
+ _postCount = 0;
253
+ _fileCount = 0;
254
+ _downloadedBytes = 0;
255
+ _skippedCount = 0;
256
+ registerArchive(category, format) {
257
+ this._archiveFmts.set(category, format);
258
+ }
259
+ /** Simple format-string interpolation for archive keys. */
260
+ _interp(fmt, meta) {
261
+ return fmt.replace(/\{(\w+)\}/g, (_, key) => {
262
+ const v = meta[key];
263
+ return v == null ? "" : String(v);
264
+ });
265
+ }
266
+ /** Check whether this URL has already been downloaded (and skip). */
267
+ _isArchived(meta) {
268
+ const cat = meta.category ?? this.extractor.category;
269
+ const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
270
+ const key = this._interp(fmt, meta);
271
+ const set = this.archive.get(cat);
272
+ if (set && set.has(key)) return true;
273
+ return false;
274
+ }
275
+ /** Mark a post/media as archived. */
276
+ _archive(meta) {
277
+ const cat = meta.category ?? this.extractor.category;
278
+ const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
279
+ const key = this._interp(fmt, meta);
280
+ let set = this.archive.get(cat);
281
+ if (!set) {
282
+ set = /* @__PURE__ */ new Set();
283
+ this.archive.set(cat, set);
284
+ }
285
+ set.add(key);
286
+ }
287
+ /** Handlers */
288
+ async handleDirectory(msg) {
289
+ this._currentDir = { ...msg.metadata };
290
+ this._postCount++;
291
+ const dirPath = this.basePath ? `${this.basePath}/${this._buildDirPath(msg.metadata)}` : this._buildDirPath(msg.metadata);
292
+ await this.extractor.storage.mkdir(dirPath);
293
+ this.extractor.log.info(`#${this._postCount} ${msg.metadata.username ?? "?"}/${msg.metadata.post_shortcode ?? "?"} → ${dirPath}/`);
294
+ }
295
+ async handleUrl(msg) {
296
+ const meta = {
297
+ ...this._currentDir,
298
+ ...msg.metadata
299
+ };
300
+ if (this._isArchived(meta)) {
301
+ this._skippedCount++;
302
+ return;
303
+ }
304
+ const filename = this._buildFilename(meta);
305
+ const fullPath = `${this.basePath ? `${this.basePath}/${this._buildDirPath(meta)}` : this._buildDirPath(meta)}/${filename}`;
306
+ try {
307
+ const resp = await this.extractor.http.request({
308
+ url: msg.url,
309
+ method: "GET",
310
+ responseType: "arraybuffer"
311
+ });
312
+ let data;
313
+ if (resp.data instanceof Uint8Array) data = resp.data;
314
+ else if (resp.data instanceof ArrayBuffer) data = new Uint8Array(resp.data);
315
+ else if (typeof resp.data === "string") data = resp.data;
316
+ else if (typeof resp.data === "object" && resp.data != null && "type" in resp.data && resp.data.type === "Buffer") data = new Uint8Array(resp.data);
317
+ else data = JSON.stringify(resp.data);
318
+ await this.extractor.storage.write(fullPath, data);
319
+ this._fileCount++;
320
+ const size = data instanceof Uint8Array ? data.byteLength : data.length;
321
+ this._downloadedBytes += size;
322
+ this.extractor.log.info(` └─ ${filename} (${formatBytes(size)})`);
323
+ this._archive(meta);
324
+ } catch (err) {
325
+ this.extractor.log.error(`Failed to download ${filename}: ${String(err)}`);
326
+ this.status |= 4;
327
+ }
328
+ }
329
+ async handleQueue(msg) {
330
+ const meta = {
331
+ ...this._currentDir,
332
+ ...msg.metadata
333
+ };
334
+ const extrClass = meta._extractor;
335
+ if (!extrClass || typeof extrClass !== "object") return;
336
+ const cls = extrClass;
337
+ const match = cls.pattern.exec(msg.url);
338
+ if (!match) return;
339
+ const parentExtr = this.extractor;
340
+ const childJob = new DownloadJob(Reflect.construct(cls, [{
341
+ url: msg.url,
342
+ match,
343
+ config: parentExtr.config,
344
+ http: parentExtr.http,
345
+ storage: parentExtr.storage,
346
+ log: parentExtr.log
347
+ }]));
348
+ childJob.basePath = this.basePath;
349
+ childJob._currentDir = meta;
350
+ for (const [cat, set] of this.archive) childJob.archive.set(cat, new Set(set));
351
+ for (const [cat, fmt] of this._archiveFmts) childJob._archiveFmts.set(cat, fmt);
352
+ const childStatus = await childJob.run();
353
+ this.status |= childStatus;
354
+ for (const [cat, set] of childJob.archive) {
355
+ const mine = this.archive.get(cat);
356
+ if (mine) for (const k of set) mine.add(k);
357
+ else this.archive.set(cat, set);
358
+ }
359
+ }
360
+ /** Report */
361
+ _report() {
362
+ const log = this.extractor.log;
363
+ log.info(`Done — ${this._postCount} post(s), ${this._fileCount} file(s) downloaded (${formatBytes(this._downloadedBytes)})`);
364
+ if (this._skippedCount > 0) log.info(` ${this._skippedCount} file(s) skipped (already archived)`);
365
+ }
366
+ /** Path builders */
367
+ _buildDirPath(meta) {
368
+ return `${meta.category ?? this.extractor.category}/${meta.username ?? "_"}`;
369
+ }
370
+ _buildFilename(meta) {
371
+ const mid = meta.media_id ?? "0";
372
+ const ext = meta.extension ?? "jpg";
373
+ return `${mid}${meta.num ? `_${meta.num}` : ""}.${ext}`;
374
+ }
375
+ };
376
+ var PrintJob = class PrintJob extends Job {
377
+ _currentDir = {};
378
+ _files = [];
379
+ _postCount = 0;
380
+ _fileCount = 0;
381
+ _width;
382
+ constructor(extractor) {
383
+ super(extractor);
384
+ this._width = Math.min(process.stdout.columns ?? 80, 100);
385
+ }
386
+ async handleDirectory(msg) {
387
+ if (this._postCount > 0) this._flushPost();
388
+ this._currentDir = { ...msg.metadata };
389
+ this._postCount++;
390
+ this._files = [];
391
+ }
392
+ async handleUrl(msg) {
393
+ const meta = {
394
+ ...this._currentDir,
395
+ ...msg.metadata
396
+ };
397
+ this._fileCount++;
398
+ const ext = meta.extension ?? "jpg";
399
+ const mid = meta.media_id ?? "?";
400
+ this._files.push({
401
+ num: meta.num ?? this._files.length + 1,
402
+ filename: `${mid}.${ext}`,
403
+ width: meta.width ?? 0,
404
+ height: meta.height ?? 0,
405
+ videoUrl: meta.video_url ?? null,
406
+ audioUrl: meta.audio_url ?? null
407
+ });
408
+ }
409
+ async handleQueue(msg) {
410
+ if (this._files.length > 0 || this._postCount > 0) this._flushPost();
411
+ this._postCount = 0;
412
+ this._files = [];
413
+ const extrClass = {
414
+ ...this._currentDir,
415
+ ...msg.metadata
416
+ }._extractor;
417
+ if (!extrClass || typeof extrClass !== "object") return;
418
+ const cls = extrClass;
419
+ const match = cls.pattern.exec(msg.url);
420
+ if (!match) return;
421
+ const parentExtr = this.extractor;
422
+ const childJob = new PrintJob(Reflect.construct(cls, [{
423
+ url: msg.url,
424
+ match,
425
+ config: parentExtr.config,
426
+ http: parentExtr.http,
427
+ storage: parentExtr.storage,
428
+ log: parentExtr.log
429
+ }]));
430
+ const childStatus = await childJob.run();
431
+ this.status |= childStatus;
432
+ this._postCount += childJob._postCount;
433
+ this._fileCount += childJob._fileCount;
434
+ }
435
+ /** Output */
436
+ _flushPost() {
437
+ const m = this._currentDir;
438
+ if (Object.keys(m).length === 0) return;
439
+ const w = this._width;
440
+ const labelW = 14;
441
+ const shortcode = m.post_shortcode ?? "?";
442
+ const header = ` Post #${this._postCount}: ${shortcode} `;
443
+ const padTotal = w - 2 - header.length;
444
+ const padL = Math.floor(padTotal / 2);
445
+ const padR = padTotal - padL;
446
+ process.stdout.write(`\n${dim("┌")}${"─".repeat(padL)}${b(header)}${"─".repeat(padR)}${dim("┐")}\n`);
447
+ const row = (label, value, color) => {
448
+ const colored = typeof color === "function" ? color(value) : color ? `${color}${value}${RESET}` : value;
449
+ process.stdout.write(` ${dim("│")} ${c(pad(label, labelW))} ${colored}\n`);
450
+ };
451
+ const username = m.username ?? "?";
452
+ const fullname = m.fullname ?? "";
453
+ row("Author:", fullname ? `${username} (${fullname})` : username, g);
454
+ row("Date:", m.date ?? m.post_date ?? "?");
455
+ row("Likes:", `${typeof m.likes === "number" ? m.likes.toLocaleString() : "?"} | Liked: ${m.liked ? "yes" : "no"}`);
456
+ row("Type:", `${m.type ?? "?"} (${this._files.length} files)`);
457
+ row("URL:", m.post_url ?? "?");
458
+ const desc = m.description ?? "";
459
+ if (desc) {
460
+ process.stdout.write(` ${dim("│")}\n`);
461
+ process.stdout.write(` ${dim("│")} ${b("Description:")}\n`);
462
+ const lines = desc.split("\n");
463
+ for (const line of lines) {
464
+ const wrapped = this._wrap(line, w - 8);
465
+ for (const wl of wrapped) process.stdout.write(` ${dim("│")} ${dim(wl)}\n`);
466
+ }
467
+ }
468
+ const tags = m.tags;
469
+ if (tags && tags.length > 0) {
470
+ process.stdout.write(` ${dim("│")}\n`);
471
+ process.stdout.write(` ${dim("│")} ${b("Tags:")} ${dim(tags.map((t) => `#${t}`).join(" "))}\n`);
472
+ }
473
+ const locName = m.location_slug ?? "";
474
+ const locId = m.location_id ?? "";
475
+ if (locName || locId) row("Location:", locId ? `${locName} (ID: ${locId})` : locName);
476
+ const coauthors = m.coauthors;
477
+ if (coauthors && coauthors.length > 0) row("Co-authors:", coauthors.map((c) => c.full_name ? `${c.username} (${c.full_name})` : c.username).join(", "));
478
+ const pinned = m.pinned;
479
+ if (pinned && pinned.length > 0) row("Pinned:", pinned.join(", "));
480
+ const expires = m.expires;
481
+ if (expires) row("Expires:", expires, YELLOW);
482
+ const hlTitle = m.highlight_title;
483
+ if (hlTitle) row("Highlight:", hlTitle);
484
+ const taggedUser = m.tagged_username ?? "";
485
+ if (taggedUser) {
486
+ const taggedFull = m.tagged_full_name ?? "";
487
+ row("Tagged by:", taggedFull ? `${taggedUser} (${taggedFull})` : taggedUser);
488
+ }
489
+ if (this._files.length > 0) {
490
+ process.stdout.write(` ${dim("│")}\n`);
491
+ process.stdout.write(` ${dim("│")} ${b(`Media (${this._files.length} files):`)}\n`);
492
+ const maxNumW = String(this._files.length).length;
493
+ const maxFileW = Math.max(...this._files.map((f) => f.filename.length));
494
+ const dimW = Math.min(maxFileW, 40);
495
+ for (const f of this._files) {
496
+ const numStr = `[${String(f.num).padStart(maxNumW)}]`;
497
+ const dimStr = f.filename.length > 40 ? `${f.filename.slice(0, 37)}...` : pad(f.filename, dimW);
498
+ const res = f.width ? `${f.width}x${f.height}` : "?x?";
499
+ const badges = [];
500
+ if (f.videoUrl) badges.push("video");
501
+ if (f.audioUrl) badges.push("audio");
502
+ let line = ` ${dim("│")} ${g(numStr)} ${dimStr} ${res}`;
503
+ if (badges.length > 0) line += ` ${YELLOW}(${badges.join("+")})${RESET}`;
504
+ process.stdout.write(`${line}\n`);
505
+ }
506
+ }
507
+ process.stdout.write(` ${dim("└")}${"─".repeat(w - 2)}${dim("┘")}\n`);
508
+ }
509
+ _wrap(text, maxLen) {
510
+ if (text.length <= maxLen) return [text];
511
+ const lines = [];
512
+ let remaining = text;
513
+ while (remaining.length > maxLen) {
514
+ let cut = maxLen;
515
+ while (cut > 0 && remaining[cut] !== " ") cut--;
516
+ if (cut === 0) cut = maxLen;
517
+ lines.push(remaining.slice(0, cut).trimEnd());
518
+ remaining = remaining.slice(cut).trimStart();
519
+ }
520
+ if (remaining) lines.push(remaining);
521
+ return lines;
522
+ }
523
+ _report() {
524
+ this._flushPost();
525
+ process.stdout.write(`\n${dim("──")} ${b("Summary")} ${dim("───")}\n`);
526
+ process.stdout.write(` Posts: ${g(String(this._postCount))}\n`);
527
+ process.stdout.write(` Files: ${g(String(this._fileCount))}\n`);
528
+ process.stdout.write(`\n`);
529
+ }
530
+ };
531
+ //#endregion
532
+ //#region src/message.ts
533
+ function directory(metadata = {}) {
534
+ return {
535
+ type: "directory",
536
+ metadata
537
+ };
538
+ }
539
+ function url(u, metadata = {}) {
540
+ return {
541
+ type: "url",
542
+ url: u,
543
+ metadata
544
+ };
545
+ }
546
+ function queue(u, metadata = {}) {
547
+ return {
548
+ type: "queue",
549
+ url: u,
550
+ metadata
551
+ };
552
+ }
553
+ //#endregion
554
+ //#region src/utils/id-codec.ts
555
+ /**
556
+ * Instagram-style Base64-variant ID ↔ shortcode conversion.
557
+ */
558
+ const ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
559
+ /** Pre-built index for O(1) character lookup during decode. */
560
+ const CHAR_INDEX = {};
561
+ for (let i = 0; i < 64; i++) CHAR_INDEX[ALPHABET[i]] = i;
562
+ const BASE = BigInt(64);
563
+ /**
564
+ * Decode an Instagram shortcode into its numeric post ID.
565
+ */
566
+ function idFromShortcode(shortcode) {
567
+ let num = 0n;
568
+ for (const ch of shortcode) num = num * BASE + BigInt(CHAR_INDEX[ch] ?? 0);
569
+ return num.toString();
570
+ }
571
+ /**
572
+ * Encode a numeric post ID into an Instagram shortcode.
573
+ */
574
+ function shortcodeFromId(postId) {
575
+ let num = BigInt(postId);
576
+ const chars = [];
577
+ while (num > 0n) {
578
+ const remainder = Number(num % BASE);
579
+ chars.push(ALPHABET[remainder]);
580
+ num = num / BASE;
581
+ }
582
+ return chars.reverse().join("");
583
+ }
584
+ //#endregion
585
+ //#region src/utils/text.ts
586
+ /**
587
+ * Text utilities ported from gallery-dl's ``text`` module.
588
+ *
589
+ * All functions are pure and environment-agnostic.
590
+ */
591
+ /** String extraction */
592
+ /**
593
+ * Extract the substring between ``begin`` and ``end`` from ``txt``.
594
+ * Returns the substring or ``null`` if either delimiter is missing.
595
+ */
596
+ function extract(txt, begin, end) {
597
+ const first = txt.indexOf(begin);
598
+ if (first < 0) return null;
599
+ const start = first + begin.length;
600
+ const last = txt.indexOf(end, start);
601
+ if (last < 0) return null;
602
+ return txt.slice(start, last);
603
+ }
604
+ /**
605
+ * Shorthand: same as ``extract`` but returns ``default_`` on failure.
606
+ * Mirrors the Python ``extr()`` function.
607
+ */
608
+ function extr(txt, begin, end, default_ = "") {
609
+ return extract(txt, begin, end) ?? default_;
610
+ }
611
+ /** Unicode / HTML */
612
+ /**
613
+ * Decode ``\\uXXXX`` escape sequences in a string.
614
+ */
615
+ function parseUnicodeEscapes$1(text) {
616
+ if (!text.includes("\\u")) return text;
617
+ return text.replace(/\\u([0-9a-fA-F]{4})/g, (_m, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
618
+ }
619
+ /**
620
+ * HTML entity decode.
621
+ *
622
+ * In Node.js we could use a DOM parser, but since this library is
623
+ * environment-agnostic we ship a minimal covering the common cases.
624
+ */
625
+ const HTML_ENTITIES = {
626
+ "amp": "&",
627
+ "lt": "<",
628
+ "gt": ">",
629
+ "quot": "\"",
630
+ "apos": "'",
631
+ "nbsp": "\xA0",
632
+ "#x27": "'",
633
+ "#x2F": "/",
634
+ "#39": "'",
635
+ "#47": "/"
636
+ };
637
+ const RE_ENTITY = /&([^;]+);/g;
638
+ function unescape(text) {
639
+ return text.replace(RE_ENTITY, (m, name) => {
640
+ const ch = HTML_ENTITIES[name];
641
+ if (ch !== void 0) return ch;
642
+ if (name.startsWith("#")) {
643
+ const cp = name[1] === "x" || name[1] === "X" ? Number.parseInt(name.slice(2), 16) : Number.parseInt(name.slice(1), 10);
644
+ if (Number.isSafeInteger(cp)) return String.fromCodePoint(cp);
645
+ }
646
+ return m;
647
+ });
648
+ }
649
+ /** URL helpers */
650
+ /**
651
+ * URL-decode a string.
652
+ */
653
+ function unquote(text) {
654
+ try {
655
+ return decodeURIComponent(text);
656
+ } catch {
657
+ return text.replace(/%[0-9a-f]{2}/gi, (m) => {
658
+ try {
659
+ return decodeURIComponent(m);
660
+ } catch {
661
+ return m;
662
+ }
663
+ });
664
+ }
665
+ }
666
+ /**
667
+ * Ensure a URL starts with ``https://`` (or ``http://``).
668
+ */
669
+ function ensureHttpScheme(url, scheme = "https://") {
670
+ if (!url) return url;
671
+ if (url.startsWith("https://") || url.startsWith("http://")) return url;
672
+ return scheme + url.replace(/^[/:]+/, "");
673
+ }
674
+ /**
675
+ * Extract filename + extension from a URL and write into ``meta``.
676
+ */
677
+ function nameExtFromURL(url, meta) {
678
+ const filename = filenameFromURL(url);
679
+ const dot = filename.lastIndexOf(".");
680
+ if (dot > 0 && filename.length - dot - 1 <= 16) {
681
+ meta.filename = unquote(filename.slice(0, dot));
682
+ meta.extension = unquote(filename.slice(dot + 1)).toLowerCase();
683
+ } else {
684
+ meta.filename = unquote(filename);
685
+ meta.extension = "";
686
+ }
687
+ }
688
+ /**
689
+ * Extract the file-name portion of a URL (before query string).
690
+ */
691
+ function filenameFromURL(url) {
692
+ try {
693
+ return url.split("?")[0].split("/").pop() ?? "";
694
+ } catch {
695
+ return "";
696
+ }
697
+ }
698
+ /**
699
+ * Parse an integer from a possibly-null value. Returns ``default_`` on failure.
700
+ */
701
+ function parseInt(value, default_ = 0) {
702
+ if (value == null) return default_;
703
+ const n = typeof value === "number" ? value : Number.parseInt(String(value), 10);
704
+ return Number.isFinite(n) ? n : default_;
705
+ }
706
+ function tagRe(pattern) {
707
+ const re = new RegExp(pattern, "g");
708
+ return (text) => {
709
+ const matches = text.match(re);
710
+ return matches ? [...new Set(matches)] : [];
711
+ };
712
+ }
713
+ /** Pre-configured hashtag regex. */
714
+ const findTags = tagRe("#\\w+");
715
+ //#endregion
716
+ //#region src/instagram/api.ts
717
+ const APP_ID = "936619743392459";
718
+ const ASBD_ID = "129477";
719
+ var InstagramRestAPI = class {
720
+ http;
721
+ root;
722
+ getCsrf;
723
+ getWwwClaim;
724
+ setWwwClaim;
725
+ setCsrf;
726
+ /** A ref to the extractor's cursor. */
727
+ getCursor;
728
+ setCursor;
729
+ constructor(opts) {
730
+ this.http = opts.http;
731
+ this.root = opts.root;
732
+ this.getCsrf = () => opts.csrfToken.value;
733
+ this.setCsrf = (v) => {
734
+ opts.csrfToken.value = v;
735
+ };
736
+ this.getWwwClaim = () => opts.wwwClaim.value;
737
+ this.setWwwClaim = (v) => {
738
+ opts.wwwClaim.value = v;
739
+ };
740
+ this.getCursor = () => opts.cursor.value;
741
+ this.setCursor = (v) => {
742
+ opts.cursor.value = v;
743
+ return v;
744
+ };
745
+ }
746
+ /** Public endpoint methods */
747
+ /** Single post by shortcode. */
748
+ async *media(shortcode) {
749
+ const endpoint = `/v1/media/${idFromShortcode(shortcode.length > 28 ? shortcode.slice(0, -28) : shortcode)}/info/`;
750
+ yield* this._pagination(endpoint);
751
+ }
752
+ /** Paginated user feed. */
753
+ userFeed(userId) {
754
+ return this._pagination(`/v1/feed/user/${userId}/`, { count: 30 });
755
+ }
756
+ /** Paginated user reels (POST endpoint). */
757
+ userClips(userId) {
758
+ const data = {
759
+ target_user_id: userId,
760
+ page_size: "50",
761
+ max_id: null,
762
+ include_feed_video: "true"
763
+ };
764
+ return this._paginationPost("/v1/clips/user/", data);
765
+ }
766
+ /** Paginated tagged posts. */
767
+ userTagged(userId) {
768
+ return this._pagination(`/v1/usertags/${userId}/feed/`, { count: 20 });
769
+ }
770
+ /** Paginated saved posts (media wrapper). */
771
+ userSaved() {
772
+ return this._pagination("/v1/feed/saved/posts/", { count: 50 }, true);
773
+ }
774
+ /** Paginated collection. */
775
+ userCollection(collectionId) {
776
+ return this._pagination(`/v1/feed/collection/${collectionId}/posts/`, { count: 50 }, true);
777
+ }
778
+ /** Reels media — batch call, returns full reel objects. */
779
+ async reelsMedia(reelIds) {
780
+ const data = await this._call("/v1/feed/reels_media/", { params: { reel_ids: reelIds } });
781
+ if (data && typeof data === "object") {
782
+ const reels = data.reels_media;
783
+ if (Array.isArray(reels)) return reels;
784
+ }
785
+ throw new Error("Auth required — authenticated cookies needed for reels");
786
+ }
787
+ /** Story tray. */
788
+ async reelsTray() {
789
+ const data = await this._call("/v1/feed/reels_tray/");
790
+ if (data && typeof data === "object") {
791
+ const tray = data.tray;
792
+ if (Array.isArray(tray)) return tray;
793
+ }
794
+ return [];
795
+ }
796
+ /** Highlights list (tray). */
797
+ async highlightsTray(userId) {
798
+ const data = await this._call(`/v1/highlights/${userId}/highlights_tray/`);
799
+ if (data && typeof data === "object") return data.tray ?? [];
800
+ return [];
801
+ }
802
+ /** All highlights' media batched by ``chunkSize``. */
803
+ async *highlightsMedia(userId, chunkSize = 5) {
804
+ const ids = (await this.highlightsTray(userId)).map((hl) => hl.id);
805
+ for (let i = 0; i < ids.length; i += chunkSize) {
806
+ const chunk = ids.slice(i, i + chunkSize);
807
+ yield* await this.reelsMedia(chunk);
808
+ }
809
+ }
810
+ /** Hashtag posts (via sections). */
811
+ async *tagsMedia(tag) {
812
+ for await (const section of this.tagsSections(tag)) {
813
+ const medias = section.layout_content?.medias ?? [];
814
+ for (const m of medias) if (m.media) yield m.media;
815
+ }
816
+ }
817
+ async *tagsSections(tag) {
818
+ yield* this._paginationSections(`/v1/tags/${tag}/sections/`, {
819
+ include_persistent: "0",
820
+ max_id: null,
821
+ page: null,
822
+ surface: "grid",
823
+ tab: "recent"
824
+ });
825
+ }
826
+ /** User by numeric ID. */
827
+ async userById(userId) {
828
+ const data = await this._call(`/v1/users/${userId}/info/`);
829
+ if (data && typeof data === "object") return data.user;
830
+ throw new Error("User not found");
831
+ }
832
+ /** User by username (web_profile_info). */
833
+ async userByName(username) {
834
+ const data = await this._call("/v1/users/web_profile_info/", { params: { username } });
835
+ if (data && typeof data === "object") return data.data;
836
+ throw new Error("User not found");
837
+ }
838
+ /** Search user by username. */
839
+ async userBySearch(username) {
840
+ const data = await this._call("https://www.instagram.com/web/search/topsearch/", { params: { query: username } });
841
+ if (data && typeof data === "object") {
842
+ const users = data.users;
843
+ if (users) {
844
+ const name = username.toLowerCase();
845
+ for (const result of users) if (result.user.username.toLowerCase() === name) return result.user;
846
+ }
847
+ }
848
+ throw new Error("User not found");
849
+ }
850
+ /** Scrape user ID from HTML profile page. */
851
+ async userByWeb(username) {
852
+ const resp = await this.http.request({
853
+ url: `https://www.instagram.com/${username}`,
854
+ headers: {
855
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
856
+ "Accept-Language": "en-US,en;q=0.5",
857
+ "Accept-Encoding": "gzip, deflate, br, zstd",
858
+ "Alt-Used": "www.instagram.com",
859
+ "Connection": "keep-alive",
860
+ "Sec-Fetch-Dest": "document",
861
+ "Sec-Fetch-Mode": "navigate",
862
+ "Sec-Fetch-Site": "none",
863
+ "Priority": "u=0, i"
864
+ }
865
+ });
866
+ const text = typeof resp.data === "string" ? resp.data : "";
867
+ const idx = text.indexOf("\"profile_id\":\"");
868
+ if (idx >= 0) {
869
+ const start = idx + 15;
870
+ const end = text.indexOf("\"", start);
871
+ if (end > start) return { id: text.slice(start, end) };
872
+ }
873
+ throw new Error("User not found");
874
+ }
875
+ /** Resolve screen name via fallback chain: search → info → web. */
876
+ async userByScreenName(screenName) {
877
+ for (const strategy of [
878
+ "search",
879
+ "info",
880
+ "web"
881
+ ]) try {
882
+ if (strategy === "search") return await this.userBySearch(screenName);
883
+ if (strategy === "info") return await this.userByName(screenName);
884
+ if (strategy === "web") {
885
+ const result = await this.userByWeb(screenName);
886
+ return {
887
+ pk: result.id,
888
+ id: result.id,
889
+ username: screenName,
890
+ full_name: ""
891
+ };
892
+ }
893
+ } catch {}
894
+ throw new Error("User not found");
895
+ }
896
+ /** Resolve username/id to numeric user ID string. */
897
+ async userId(screenName, checkPrivate = true) {
898
+ if (screenName.startsWith("id:")) return screenName.slice(3);
899
+ const user = await this.userByScreenName(screenName);
900
+ if (checkPrivate && user.is_private && !user.followed_by_viewer) {}
901
+ return user.id ?? user.pk;
902
+ }
903
+ /** Followers (paginated). */
904
+ async *userFollowers(userId) {
905
+ yield* this._paginationFollowing(`/v1/friendships/${userId}/followers/`, {
906
+ count: 12,
907
+ max_id: null
908
+ });
909
+ }
910
+ /** Following (paginated). */
911
+ async *userFollowing(userId) {
912
+ yield* this._paginationFollowing(`/v1/friendships/${userId}/following/`, {
913
+ count: 12,
914
+ max_id: null
915
+ });
916
+ }
917
+ /** Internal — HTTP call */
918
+ async _call(endpoint, opts = {}) {
919
+ const url = endpoint.startsWith("/") ? `https://www.instagram.com/api${endpoint}` : endpoint;
920
+ const csrf = this.getCsrf();
921
+ const headers = {
922
+ "Accept": "*/*",
923
+ "Cookie": `csrftoken=${csrf}`,
924
+ "X-CSRFToken": csrf,
925
+ "X-IG-App-ID": APP_ID,
926
+ "X-ASBD-ID": ASBD_ID,
927
+ "X-IG-WWW-Claim": this.getWwwClaim(),
928
+ "X-Requested-With": "XMLHttpRequest",
929
+ "Connection": "keep-alive",
930
+ "Referer": `${this.root}/`,
931
+ "Sec-Fetch-Dest": "empty",
932
+ "Sec-Fetch-Mode": "cors",
933
+ "Sec-Fetch-Site": "same-origin"
934
+ };
935
+ const resp = await this.http.request({
936
+ url,
937
+ method: opts.method ?? "GET",
938
+ headers,
939
+ params: opts.params ? Object.fromEntries(Object.entries(opts.params).filter(([, v]) => v != null)) : void 0,
940
+ data: opts.data
941
+ });
942
+ const finalUrl = resp.url;
943
+ if (finalUrl.includes("/accounts/login/")) throw new Error("Instagram redirected to login page — you need a valid sessionid. Export it from your browser (F12 → Application → Cookies → sessionid) and pass --sessionid=<value> or set INSTAGRAM_SESSIONID env var.");
944
+ if (finalUrl.includes("/challenge/")) throw new Error("Instagram redirected to challenge page — account flagged. Log in via browser to resolve the challenge, then export a fresh sessionid.");
945
+ const rawCookie = resp.headers["set-cookie"];
946
+ const csrfCookie = (Array.isArray(rawCookie) ? rawCookie.join("; ") : rawCookie ?? "").split(";").find((c) => c.trim().startsWith("csrftoken="));
947
+ if (csrfCookie) {
948
+ const val = csrfCookie.split("=")[1]?.trim();
949
+ if (val) this.setCsrf(val);
950
+ }
951
+ const claim = resp.headers["x-ig-set-www-claim"];
952
+ if (claim != null) this.setWwwClaim(String(claim));
953
+ return resp.data;
954
+ }
955
+ /** Pagination engines */
956
+ async *_pagination(endpoint, params = {}, media = false) {
957
+ let maxId = this.getCursor();
958
+ const reqParams = { ...params };
959
+ while (true) {
960
+ reqParams.max_id = maxId;
961
+ const data = await this._call(endpoint, { params: reqParams });
962
+ if (data) {
963
+ const items = data.items;
964
+ if (items) for (const item of items) if (media) yield item.media ?? item;
965
+ else yield item;
966
+ if (!data.more_available) {
967
+ this.setCursor(null);
968
+ return;
969
+ }
970
+ maxId = this.setCursor(data.next_max_id);
971
+ } else {
972
+ this.setCursor(null);
973
+ return;
974
+ }
975
+ }
976
+ }
977
+ async *_paginationPost(endpoint, reqData) {
978
+ let maxId = this.getCursor();
979
+ const data = { ...reqData };
980
+ while (true) {
981
+ data.max_id = maxId;
982
+ const resp = await this._call(endpoint, {
983
+ method: "POST",
984
+ data
985
+ });
986
+ if (resp) {
987
+ const items = resp.items;
988
+ if (items) for (const item of items) yield item.media ?? item;
989
+ const info = resp.paging_info;
990
+ if (!info || !info.more_available) {
991
+ this.setCursor(null);
992
+ return;
993
+ }
994
+ maxId = this.setCursor(info.max_id);
995
+ } else {
996
+ this.setCursor(null);
997
+ return;
998
+ }
999
+ }
1000
+ }
1001
+ async *_paginationSections(endpoint, reqData) {
1002
+ let maxId = this.getCursor();
1003
+ let page = null;
1004
+ const data = { ...reqData };
1005
+ while (true) {
1006
+ data.max_id = maxId;
1007
+ data.page = page;
1008
+ const info = await this._call(endpoint, {
1009
+ method: "POST",
1010
+ data
1011
+ });
1012
+ if (info) {
1013
+ const sections = info.sections;
1014
+ if (sections) yield* sections;
1015
+ if (!info.more_available) {
1016
+ this.setCursor(null);
1017
+ return;
1018
+ }
1019
+ page = info.next_page;
1020
+ maxId = this.setCursor(info.next_max_id);
1021
+ } else {
1022
+ this.setCursor(null);
1023
+ return;
1024
+ }
1025
+ }
1026
+ }
1027
+ async *_paginationFollowing(endpoint, params) {
1028
+ let maxId = this._parseIntCursor(this.getCursor());
1029
+ const reqParams = { ...params };
1030
+ while (true) {
1031
+ reqParams.max_id = maxId;
1032
+ const data = await this._call(endpoint, { params: reqParams });
1033
+ if (data) {
1034
+ const users = data.users;
1035
+ if (users) yield* users;
1036
+ const nextMaxId = data.next_max_id;
1037
+ if (nextMaxId == null) {
1038
+ this.setCursor(null);
1039
+ return;
1040
+ }
1041
+ maxId = this._parseIntCursor(String(nextMaxId));
1042
+ this.setCursor(String(maxId));
1043
+ } else {
1044
+ this.setCursor(null);
1045
+ return;
1046
+ }
1047
+ }
1048
+ }
1049
+ _parseIntCursor(v) {
1050
+ if (v == null || v === "") return null;
1051
+ const n = Number(v);
1052
+ return Number.isFinite(n) ? n : null;
1053
+ }
1054
+ };
1055
+ //#endregion
1056
+ //#region src/instagram/parsers.ts
1057
+ /** Main entry — REST */
1058
+ function parsePostRest(post, cfg) {
1059
+ if (post.items) return parseStoryRest(post, cfg);
1060
+ const owner = post.user;
1061
+ const caption = post.caption;
1062
+ const ts = post.taken_at ?? post.created_at;
1063
+ const date = cfg.parseTimestamp(ts ?? null);
1064
+ const data = {
1065
+ post_id: post.pk,
1066
+ post_shortcode: post.code,
1067
+ post_url: `${cfg.root}/p/${post.code}/`,
1068
+ likes: post.like_count ?? 0,
1069
+ liked: post.has_liked ?? false,
1070
+ pinned: extractPinned(post),
1071
+ owner_id: owner.pk,
1072
+ username: owner.username ?? "",
1073
+ fullname: owner.full_name ?? "",
1074
+ post_date: date,
1075
+ date,
1076
+ description: caption ? caption.text : "",
1077
+ type: "post",
1078
+ count: 0,
1079
+ _files: []
1080
+ };
1081
+ const tags = cfg.findTags(data.description);
1082
+ if (tags.length > 0) data.tags = [...new Set(tags)].sort();
1083
+ if (post.location) {
1084
+ const loc = post.location;
1085
+ const slug = loc.short_name.replace(/\s+/g, "-").toLowerCase();
1086
+ data.location_id = loc.pk;
1087
+ data.location_slug = slug;
1088
+ data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${slug}/`;
1089
+ }
1090
+ if (post.coauthor_producers) data.coauthors = post.coauthor_producers.map((u) => ({
1091
+ id: u.pk,
1092
+ username: u.username,
1093
+ full_name: u.full_name
1094
+ }));
1095
+ let items;
1096
+ if (post.carousel_media?.length) {
1097
+ data.sidecar_media_id = data.post_id;
1098
+ data.sidecar_shortcode = data.post_shortcode;
1099
+ items = post.carousel_media;
1100
+ } else items = [post];
1101
+ for (let num = 0; num < items.length; num++) {
1102
+ const item = items[num];
1103
+ const media = parseMediaItem(item, post, cfg, num + 1);
1104
+ if (!media) continue;
1105
+ const itemRec = item;
1106
+ extractTaggedUsers(itemRec, media);
1107
+ data._files.push(media);
1108
+ const stickers = itemRec.story_music_stickers;
1109
+ if (stickers?.[0]) {
1110
+ const audio = extractAudio(itemRec, data, stickers[0], cfg);
1111
+ if (audio) {
1112
+ audio.num = num + 1;
1113
+ data._files.push(audio);
1114
+ }
1115
+ }
1116
+ }
1117
+ if (post.music_metadata) {
1118
+ const info = post.music_metadata.music_info;
1119
+ if (info) {
1120
+ const audio = extractAudio(post, data, { music_asset_info: info }, cfg);
1121
+ if (audio) {
1122
+ audio.num = items.length;
1123
+ data._files.push(audio);
1124
+ }
1125
+ }
1126
+ }
1127
+ const files = data._files;
1128
+ if (files.length === 1 && files[0].video_url) {
1129
+ data.type = "reel";
1130
+ data.post_url = `${cfg.root}/reel/${post.code}/`;
1131
+ }
1132
+ if (post.subscription_media_visibility) data.subscription = post.subscription_media_visibility;
1133
+ return data;
1134
+ }
1135
+ /** Story / highlight */
1136
+ function parseStoryRest(post, cfg) {
1137
+ const items = post.items;
1138
+ const reelId = String(post.id).split(":").pop() ?? "0";
1139
+ const date = cfg.parseTimestamp(post.taken_at ?? post.created_at ?? post.seen ?? null);
1140
+ const expires = post.expiring_at;
1141
+ const isStory = !!expires;
1142
+ const data = {
1143
+ post_id: reelId,
1144
+ post_shortcode: shortcodeFromId(reelId),
1145
+ post_url: isStory ? `${cfg.root}/stories/${post.user.username}/` : `${cfg.root}/stories/highlights/${reelId}/`,
1146
+ likes: 0,
1147
+ liked: false,
1148
+ pinned: [],
1149
+ owner_id: post.user.pk,
1150
+ username: post.user.username ?? "",
1151
+ fullname: post.user.full_name ?? "",
1152
+ post_date: date,
1153
+ date,
1154
+ description: "",
1155
+ type: isStory ? "story" : "highlight",
1156
+ count: 0,
1157
+ _files: [],
1158
+ expires: expires ? cfg.parseTimestamp(expires) : void 0,
1159
+ user: post.user
1160
+ };
1161
+ if (!isStory) {
1162
+ if (post.title) data.highlight_title = post.title;
1163
+ } else if (!post.seen) post.seen = expires - 86400;
1164
+ for (let num = 0; num < items.length; num++) {
1165
+ const item = items[num];
1166
+ const media = parseMediaItem(item, post, cfg, num + 1);
1167
+ if (!media) continue;
1168
+ extractTaggedUsers(item, media);
1169
+ data._files.push(media);
1170
+ }
1171
+ return data;
1172
+ }
1173
+ /** Single media item */
1174
+ function parseMediaItem(item, parent, cfg, num) {
1175
+ let image;
1176
+ try {
1177
+ image = item.image_versions2.candidates[0];
1178
+ } catch {
1179
+ return null;
1180
+ }
1181
+ const itemRec = item;
1182
+ if (!cfg.staticVideo && item.original_media_type != null && item.original_media_type === 1 && item.original_media_type !== item.media_type) {
1183
+ delete itemRec.video_versions;
1184
+ if (image) {
1185
+ item.original_width = image.width;
1186
+ item.original_height = image.height;
1187
+ }
1188
+ }
1189
+ const widthOrig = item.original_width ?? 0;
1190
+ const heightOrig = item.original_height ?? 0;
1191
+ let video = null;
1192
+ let manifest = null;
1193
+ let width;
1194
+ let height;
1195
+ if (item.video_versions?.length) {
1196
+ video = item.video_versions.reduce((best, v) => v.width * v.height * v.type > best.width * best.height * best.type ? v : best);
1197
+ if (item.video_dash_manifest && cfg.videosDash) {
1198
+ manifest = item.video_dash_manifest;
1199
+ width = widthOrig;
1200
+ height = heightOrig;
1201
+ } else {
1202
+ width = video.width;
1203
+ height = video.height;
1204
+ }
1205
+ } else {
1206
+ video = null;
1207
+ manifest = null;
1208
+ width = image.width;
1209
+ height = image.height;
1210
+ }
1211
+ const media = {
1212
+ num,
1213
+ date: cfg.parseTimestamp(itemRec.taken_at ?? video?.taken_at ?? parent.taken_at ?? null),
1214
+ media_id: item.pk,
1215
+ shortcode: item.code ?? shortcodeFromId(item.pk),
1216
+ display_url: image.url,
1217
+ video_url: video?.url ?? null,
1218
+ width,
1219
+ width_original: widthOrig,
1220
+ height,
1221
+ height_original: heightOrig,
1222
+ tagged_users: []
1223
+ };
1224
+ if (manifest != null) media._ytdl_manifest_data = manifest;
1225
+ if (item.owner) media.owner = item.owner;
1226
+ if (item.reshared_story_media_author) media.author = item.reshared_story_media_author;
1227
+ if (item.expiring_at != null) media.expires = cfg.parseTimestamp(item.expiring_at);
1228
+ if (item.subscription_media_visibility) media.subscription = item.subscription_media_visibility;
1229
+ if (itemRec.audience) media.audience = itemRec.audience;
1230
+ return media;
1231
+ }
1232
+ /** Tagged users */
1233
+ function extractTaggedUsers(src, dest) {
1234
+ dest.tagged_users = [];
1235
+ const edges = src.edge_media_to_tagged_user;
1236
+ if (edges?.edges) for (const edge of edges.edges) {
1237
+ const u = edge.node.user;
1238
+ dest.tagged_users.push({
1239
+ id: u.id ?? u.pk,
1240
+ username: u.username,
1241
+ full_name: u.full_name
1242
+ });
1243
+ }
1244
+ const usertags = src.usertags;
1245
+ if (usertags?.in) for (const tag of usertags.in) {
1246
+ const u = tag.user;
1247
+ dest.tagged_users.push({
1248
+ id: u.pk,
1249
+ username: u.username,
1250
+ full_name: u.full_name
1251
+ });
1252
+ }
1253
+ const mentions = src.reel_mentions;
1254
+ if (mentions) for (const m of mentions) {
1255
+ const u = m.user;
1256
+ dest.tagged_users.push({
1257
+ id: u.pk ?? u.id ?? "",
1258
+ username: u.username,
1259
+ full_name: u.full_name
1260
+ });
1261
+ }
1262
+ const bloks = src.story_bloks_stickers;
1263
+ if (bloks) for (const sticker of bloks) {
1264
+ const s = sticker.bloks_sticker;
1265
+ if (s.bloks_sticker_type === "mention") {
1266
+ const m = s.sticker_data.ig_mention;
1267
+ dest.tagged_users.push({
1268
+ id: m.account_id,
1269
+ username: m.username,
1270
+ full_name: m.full_name
1271
+ });
1272
+ }
1273
+ }
1274
+ const seen = /* @__PURE__ */ new Set();
1275
+ dest.tagged_users = dest.tagged_users.filter((t) => {
1276
+ if (seen.has(t.id)) return false;
1277
+ seen.add(t.id);
1278
+ return true;
1279
+ });
1280
+ }
1281
+ /** Audio / music extraction */
1282
+ function extractAudio(src, dest, sticker, cfg) {
1283
+ const info = sticker.music_asset_info;
1284
+ if (!info) return null;
1285
+ const cinfo = sticker.music_consumption_info ?? info;
1286
+ dest.audio_title = info.title;
1287
+ dest.audio_duration = (info.duration_in_ms ?? 0) / 1e3;
1288
+ dest.audio_timestamps = info.highlight_start_times_in_ms;
1289
+ dest.audio_artist = info.display_artist ?? cinfo.display_artist;
1290
+ dest.audio_user = info.ig_artist ?? cinfo.ig_artist;
1291
+ const url = info.progressive_download_url;
1292
+ if (!url) return null;
1293
+ return {
1294
+ num: 0,
1295
+ date: cfg.parseTimestamp(src.taken_at ?? null),
1296
+ media_id: info.id,
1297
+ shortcode: shortcodeFromId(info.id),
1298
+ display_url: info.cover_artwork_uri ?? "",
1299
+ video_url: null,
1300
+ audio_url: url,
1301
+ width: 0,
1302
+ width_original: 0,
1303
+ height: 0,
1304
+ height_original: 0,
1305
+ tagged_users: [],
1306
+ audio_user: info.ig_artist ?? cinfo.ig_artist,
1307
+ audio_title: info.title,
1308
+ audio_artist: info.display_artist ?? cinfo.display_artist,
1309
+ audio_duration: (info.duration_in_ms ?? 0) / 1e3,
1310
+ audio_timestamps: info.highlight_start_times_in_ms
1311
+ };
1312
+ }
1313
+ /** GraphQL parser */
1314
+ function parsePostGraphql(post, cfg) {
1315
+ const typename = post.__typename ?? "GraphImage";
1316
+ const owner = post.owner;
1317
+ const date = cfg.parseTimestamp(post.taken_at_timestamp);
1318
+ const data = {
1319
+ typename,
1320
+ likes: post.edge_media_preview_like?.count ?? 0,
1321
+ liked: post.viewer_has_liked ?? false,
1322
+ pinned: post.pinned_for_users?.map((u) => Number(u.id)) ?? [],
1323
+ owner_id: owner.id ?? owner.pk,
1324
+ username: owner.username ?? "",
1325
+ fullname: owner.full_name ?? "",
1326
+ post_id: post.id,
1327
+ post_shortcode: post.shortcode,
1328
+ post_url: `${cfg.root}/p/${post.shortcode}/`,
1329
+ post_date: date,
1330
+ date,
1331
+ description: "",
1332
+ type: "post",
1333
+ count: 0,
1334
+ _files: []
1335
+ };
1336
+ data.description = post.edge_media_to_caption?.edges?.map((e) => e.node.text).join("\n") ?? "";
1337
+ data.description = parseUnicodeEscapes(data.description);
1338
+ const tags = cfg.findTags(data.description);
1339
+ if (tags.length > 0) data.tags = [...new Set(tags)].sort();
1340
+ const location = post.location;
1341
+ if (location) {
1342
+ data.location_id = location.pk;
1343
+ data.location_slug = location.short_name;
1344
+ data.location_url = `${cfg.root}/explore/locations/${location.pk}/${location.short_name}/`;
1345
+ }
1346
+ const coauthors = post.coauthor_producers;
1347
+ if (coauthors?.length) data.coauthors = coauthors.map((u) => ({
1348
+ id: u.id ?? u.pk,
1349
+ username: u.username
1350
+ }));
1351
+ const sidecar = post.edge_sidecar_to_children;
1352
+ if (sidecar?.edges) {
1353
+ data.sidecar_media_id = data.post_id;
1354
+ data.sidecar_shortcode = data.post_shortcode;
1355
+ let num = 0;
1356
+ for (const edge of sidecar.edges) {
1357
+ num++;
1358
+ const node = edge.node;
1359
+ const dimensions = node.dimensions;
1360
+ const media = {
1361
+ num,
1362
+ date: data.date,
1363
+ media_id: node.id,
1364
+ shortcode: node.shortcode ?? shortcodeFromId(node.id),
1365
+ display_url: node.display_url,
1366
+ video_url: node.video_url ?? null,
1367
+ width: dimensions.width,
1368
+ height: dimensions.height,
1369
+ sidecar_media_id: data.post_id,
1370
+ sidecar_shortcode: data.post_shortcode,
1371
+ tagged_users: [],
1372
+ width_original: dimensions.width,
1373
+ height_original: dimensions.height
1374
+ };
1375
+ extractTaggedUsers(node, media);
1376
+ data._files.push(media);
1377
+ }
1378
+ } else {
1379
+ const dimensions = post.dimensions;
1380
+ const media = {
1381
+ num: 1,
1382
+ date: data.date,
1383
+ media_id: post.id,
1384
+ shortcode: post.shortcode,
1385
+ display_url: post.display_url,
1386
+ video_url: post.video_url ?? null,
1387
+ width: dimensions.width,
1388
+ height: dimensions.height,
1389
+ tagged_users: [],
1390
+ width_original: dimensions.width,
1391
+ height_original: dimensions.height
1392
+ };
1393
+ extractTaggedUsers(post, media);
1394
+ data._files.push(media);
1395
+ }
1396
+ return data;
1397
+ }
1398
+ function extractPinned(post) {
1399
+ if (post.timeline_pinned_user_ids) return post.timeline_pinned_user_ids;
1400
+ if (post.clips_tab_pinned_user_ids) return post.clips_tab_pinned_user_ids;
1401
+ return [];
1402
+ }
1403
+ function parseUnicodeEscapes(text) {
1404
+ if (!text.includes("\\u")) return text;
1405
+ return text.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
1406
+ }
1407
+ //#endregion
1408
+ //#region src/instagram/base.ts
1409
+ var Ref = class {
1410
+ value;
1411
+ constructor(v) {
1412
+ this.value = v;
1413
+ }
1414
+ };
1415
+ var InstagramExtractor = class extends Extractor {
1416
+ category = "instagram";
1417
+ root = "https://www.instagram.com";
1418
+ api;
1419
+ csrfToken = new Ref("");
1420
+ wwwClaim = new Ref("0");
1421
+ cursor = new Ref(null);
1422
+ _loggedIn = true;
1423
+ _user = null;
1424
+ _findTags = findTags;
1425
+ _csrfSeed;
1426
+ constructor(opts) {
1427
+ super(opts);
1428
+ this._csrfSeed = opts.csrfToken;
1429
+ }
1430
+ /** Initialization */
1431
+ async _init() {
1432
+ this.csrfToken.value = this._csrfSeed || Extractor.generateToken(16);
1433
+ this.api = new InstagramRestAPI({
1434
+ http: this.http,
1435
+ root: this.root,
1436
+ csrfToken: this.csrfToken,
1437
+ wwwClaim: this.wwwClaim,
1438
+ cursor: this.cursor
1439
+ });
1440
+ }
1441
+ /** Request override */
1442
+ async request(url, cfg = {}) {
1443
+ const response = await super.request(url, cfg);
1444
+ const finalUrl = response.url;
1445
+ if (finalUrl.includes("/accounts/login/")) throw new Error("HTTP redirect to login page — cookies expired or invalid");
1446
+ if (finalUrl.includes("/challenge/")) throw new Error("HTTP redirect to challenge page — account flagged");
1447
+ const claim = response.headers["x-ig-set-www-claim"];
1448
+ if (claim != null) this.wwwClaim.value = String(claim);
1449
+ return response;
1450
+ }
1451
+ /** Login */
1452
+ async login() {
1453
+ this._loggedIn = true;
1454
+ }
1455
+ /** Core pipeline */
1456
+ async *items() {
1457
+ await this.login();
1458
+ const meta = await this.metadata() ?? {};
1459
+ const videos = this._cfg("videos", true);
1460
+ const videosDash = videos !== "merged";
1461
+ const shouldDownloadVideos = !!videos;
1462
+ const previews = this._cfg("previews", false);
1463
+ const previewsVid = typeof previews === "object" ? previews.includes("video") : false;
1464
+ const previewsAud = typeof previews === "object" ? previews.includes("audio") : false;
1465
+ const audio = this._cfg("audio", false);
1466
+ const maxPosts = this._cfg("max-posts");
1467
+ const orderFiles = this._cfg("order-files");
1468
+ const reverse = orderFiles ? ["r", "d"].includes(orderFiles[0]) : false;
1469
+ const parserCfg = {
1470
+ root: this.root,
1471
+ findTags: this._findTags,
1472
+ parseTimestamp: this.parseTimestamp.bind(this),
1473
+ staticVideo: this._cfg("static-videos", true) ?? true,
1474
+ warnVideo: !previews && shouldDownloadVideos,
1475
+ warnImage: 1,
1476
+ videosDash
1477
+ };
1478
+ this.log.debug(`cfg: videos=${shouldDownloadVideos} previews=${!!previews} audio=${audio} maxPosts=${maxPosts ?? "∞"} staticVideos=${parserCfg.staticVideo}`);
1479
+ let count = 0;
1480
+ for await (const post of this.posts()) {
1481
+ if (maxPosts != null && count >= maxPosts) break;
1482
+ count++;
1483
+ const parsed = "__typename" in post ? parsePostGraphql(post, parserCfg) : parsePostRest(post, parserCfg);
1484
+ if (this._user) parsed.user = this._user;
1485
+ Object.assign(parsed, meta);
1486
+ const files = parsed._files;
1487
+ parsed.count = files.length;
1488
+ yield {
1489
+ type: "directory",
1490
+ metadata: parsed
1491
+ };
1492
+ const ordered = reverse ? [...files].reverse() : files;
1493
+ for (const file of ordered) {
1494
+ const combined = {
1495
+ ...parsed,
1496
+ ...file
1497
+ };
1498
+ if (file.audio_url) {
1499
+ if (audio) {
1500
+ nameExtFromURL(file.audio_url, combined);
1501
+ yield url(file.audio_url, combined);
1502
+ }
1503
+ if (previewsAud) combined.media_id = `${combined.media_id}p`;
1504
+ else continue;
1505
+ }
1506
+ if (file.video_url) {
1507
+ if (shouldDownloadVideos) {
1508
+ nameExtFromURL(file.video_url, combined);
1509
+ yield url(file.video_url, combined);
1510
+ }
1511
+ if (previewsVid) combined.media_id = `${combined.media_id}p`;
1512
+ else continue;
1513
+ }
1514
+ const imgUrl = file.display_url;
1515
+ nameExtFromURL(imgUrl, combined);
1516
+ if (combined.extension === "webp" && imgUrl.includes("stp=dst-jpg")) combined.extension = "jpg";
1517
+ yield url(imgUrl, combined);
1518
+ }
1519
+ }
1520
+ if (count === 0) this.log.warn("No posts returned — API may have returned empty data (check sessionid or post visibility)");
1521
+ }
1522
+ /** Subclass hooks */
1523
+ /** @virtual */
1524
+ async metadata() {
1525
+ return {};
1526
+ }
1527
+ /** Cursor management */
1528
+ _initCursor() {
1529
+ const cursor = this._cfg("cursor", true);
1530
+ if (cursor === true) return null;
1531
+ if (!cursor) return null;
1532
+ return cursor;
1533
+ }
1534
+ _updateCursor(cursor) {
1535
+ if (cursor) this.log.debug(`Cursor: ${cursor}`);
1536
+ this.cursor.value = cursor;
1537
+ return cursor;
1538
+ }
1539
+ /** User assignment */
1540
+ _assignUser(user) {
1541
+ this._user = user;
1542
+ const mappings = [
1543
+ ["count_media", "edge_owner_to_timeline_media"],
1544
+ ["count_video", "edge_felix_video_timeline"],
1545
+ ["count_saved", "edge_saved_media"],
1546
+ ["count_mutual", "edge_mutual_followed_by"],
1547
+ ["count_follow", "edge_follow"],
1548
+ ["count_followed", "edge_followed_by"],
1549
+ ["count_collection", "edge_media_collections"]
1550
+ ];
1551
+ const rec = user;
1552
+ for (const [newKey, oldKey] of mappings) try {
1553
+ rec[newKey] = rec[oldKey]?.count ?? 0;
1554
+ delete rec[oldKey];
1555
+ } catch {
1556
+ rec[newKey] = 0;
1557
+ }
1558
+ }
1559
+ };
1560
+ //#endregion
1561
+ //#region src/instagram/extractors.ts
1562
+ const BASE_RE = /^(?:https?:\/\/)?(?:www\.)?instagram\.com/;
1563
+ function re(base, path) {
1564
+ const pathSrc = typeof path === "string" ? path : path.source;
1565
+ return new RegExp(base.source + pathSrc, "i");
1566
+ }
1567
+ var InstagramPostExtractor = class InstagramPostExtractor extends InstagramExtractor {
1568
+ static subcategory = "post";
1569
+ static pattern = re(/^(?:https?:\/\/)?(?:www\.)?instagram\.com\//, /(?:share(?:\/(?:p|tv|reels?))?|(?:[^/?#]+\/)?(?:p|tv|reels?))\/([^/?#]+)/);
1570
+ subcategory = InstagramPostExtractor.subcategory;
1571
+ constructor(opts) {
1572
+ super(opts);
1573
+ if (opts.match[2] != null || opts.match[3] != null) this.subcategory = "reel";
1574
+ }
1575
+ static fromURL(url, opts) {
1576
+ const match = InstagramPostExtractor.pattern.exec(url);
1577
+ if (!match) return null;
1578
+ return new InstagramPostExtractor({
1579
+ ...opts,
1580
+ url,
1581
+ match
1582
+ });
1583
+ }
1584
+ async *posts() {
1585
+ const groups = this.groups;
1586
+ let shortcode = groups[0];
1587
+ if (!shortcode) return;
1588
+ if (groups[1] === "") {
1589
+ this.log.info(`Resolving share link: ${this.url}`);
1590
+ const parts = (await this.request(ensureHttpScheme(this.url), { headers: {
1591
+ "Sec-Fetch-Dest": "empty",
1592
+ "Sec-Fetch-Mode": "navigate",
1593
+ "Sec-Fetch-Site": "same-origin"
1594
+ } })).url?.split("/");
1595
+ shortcode = parts?.[parts.length - 2] ?? shortcode;
1596
+ }
1597
+ this.log.debug(`Fetching post: ${shortcode}`);
1598
+ yield* this.api.media(shortcode);
1599
+ }
1600
+ };
1601
+ var InstagramUserExtractor = class InstagramUserExtractor extends InstagramExtractor {
1602
+ static subcategory = "user";
1603
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/?(?:$|[?#])/);
1604
+ subcategory = InstagramUserExtractor.subcategory;
1605
+ constructor(opts) {
1606
+ super(opts);
1607
+ }
1608
+ static fromURL(url, opts) {
1609
+ const match = InstagramUserExtractor.pattern.exec(url);
1610
+ if (!match) return null;
1611
+ return new InstagramUserExtractor({
1612
+ ...opts,
1613
+ url,
1614
+ match
1615
+ });
1616
+ }
1617
+ async *items() {
1618
+ await this.login();
1619
+ const userPath = this.groups[0] ?? "/";
1620
+ const base = `${this.root}${userPath}/`;
1621
+ const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`;
1622
+ const include = this._cfg("include", ["posts"]);
1623
+ const categories = include === "all" ? [
1624
+ "posts",
1625
+ "reels",
1626
+ "tagged",
1627
+ "stories",
1628
+ "highlights",
1629
+ "info",
1630
+ "avatar"
1631
+ ] : typeof include === "string" ? include.replace(/\s+/g, "").split(",") : include;
1632
+ const extractors = {
1633
+ info: {
1634
+ cls: InstagramInfoExtractor,
1635
+ url: `${base}info/`
1636
+ },
1637
+ avatar: {
1638
+ cls: InstagramAvatarExtractor,
1639
+ url: `${base}avatar/`
1640
+ },
1641
+ stories: {
1642
+ cls: InstagramStoriesExtractor,
1643
+ url: storiesUrl
1644
+ },
1645
+ highlights: {
1646
+ cls: InstagramHighlightsExtractor,
1647
+ url: `${base}highlights/`
1648
+ },
1649
+ posts: {
1650
+ cls: InstagramPostsExtractor,
1651
+ url: `${base}posts/`
1652
+ },
1653
+ reels: {
1654
+ cls: InstagramReelsExtractor,
1655
+ url: `${base}reels/`
1656
+ },
1657
+ tagged: {
1658
+ cls: InstagramTaggedExtractor,
1659
+ url: `${base}tagged/`
1660
+ }
1661
+ };
1662
+ for (const cat of categories) {
1663
+ const entry = extractors[cat];
1664
+ if (entry) yield queue(entry.url, { _extractor: entry.cls });
1665
+ else this.log.warn(`Invalid include '${cat}'`);
1666
+ }
1667
+ }
1668
+ async *posts() {}
1669
+ };
1670
+ var InstagramPostsExtractor = class InstagramPostsExtractor extends InstagramExtractor {
1671
+ static subcategory = "posts";
1672
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/posts/);
1673
+ subcategory = InstagramPostsExtractor.subcategory;
1674
+ constructor(opts) {
1675
+ super(opts);
1676
+ }
1677
+ static fromURL(url, opts) {
1678
+ const match = InstagramPostsExtractor.pattern.exec(url);
1679
+ if (!match) return null;
1680
+ return new InstagramPostsExtractor({
1681
+ ...opts,
1682
+ url,
1683
+ match
1684
+ });
1685
+ }
1686
+ async *posts() {
1687
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1688
+ const uid = await this.api.userId(screenName);
1689
+ yield* this.api.userFeed(uid);
1690
+ }
1691
+ };
1692
+ var InstagramReelsExtractor = class InstagramReelsExtractor extends InstagramExtractor {
1693
+ static subcategory = "reels";
1694
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/reels/);
1695
+ subcategory = InstagramReelsExtractor.subcategory;
1696
+ constructor(opts) {
1697
+ super(opts);
1698
+ }
1699
+ static fromURL(url, opts) {
1700
+ const match = InstagramReelsExtractor.pattern.exec(url);
1701
+ if (!match) return null;
1702
+ return new InstagramReelsExtractor({
1703
+ ...opts,
1704
+ url,
1705
+ match
1706
+ });
1707
+ }
1708
+ async *posts() {
1709
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1710
+ const uid = await this.api.userId(screenName);
1711
+ yield* this.api.userClips(uid);
1712
+ }
1713
+ };
1714
+ var InstagramTaggedExtractor = class InstagramTaggedExtractor extends InstagramExtractor {
1715
+ static subcategory = "tagged";
1716
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/tagged/);
1717
+ subcategory = InstagramTaggedExtractor.subcategory;
1718
+ _taggedUserId = "";
1719
+ constructor(opts) {
1720
+ super(opts);
1721
+ }
1722
+ static fromURL(url, opts) {
1723
+ const match = InstagramTaggedExtractor.pattern.exec(url);
1724
+ if (!match) return null;
1725
+ return new InstagramTaggedExtractor({
1726
+ ...opts,
1727
+ url,
1728
+ match
1729
+ });
1730
+ }
1731
+ async metadata() {
1732
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1733
+ let user;
1734
+ if (screenName.startsWith("id:")) {
1735
+ this._taggedUserId = screenName.slice(3);
1736
+ user = await this.api.userById(screenName.slice(3));
1737
+ } else {
1738
+ this._taggedUserId = await this.api.userId(screenName);
1739
+ user = await this.api.userByScreenName(screenName);
1740
+ }
1741
+ return {
1742
+ tagged_owner_id: user.id ?? user.pk,
1743
+ tagged_username: user.username,
1744
+ tagged_full_name: user.full_name
1745
+ };
1746
+ }
1747
+ async *posts() {
1748
+ if (!this._taggedUserId) await this.metadata();
1749
+ yield* this.api.userTagged(this._taggedUserId);
1750
+ }
1751
+ };
1752
+ var InstagramStoriesExtractor = class InstagramStoriesExtractor extends InstagramExtractor {
1753
+ static subcategory = "stories";
1754
+ static pattern = /^(?:https?:\/\/)?(?:www\.)?instagram\.com\/(?:stories\/(?:highlights\/(\d+)|([^/?#]+)(?:\/(\d+))?)|\/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)/;
1755
+ subcategory = InstagramStoriesExtractor.subcategory;
1756
+ highlightId = null;
1757
+ mediaId = null;
1758
+ constructor(opts) {
1759
+ super(opts);
1760
+ const groups = this.groups;
1761
+ const h1 = groups[0];
1762
+ const user = groups[1];
1763
+ const m1 = groups[2];
1764
+ const h2 = groups[3];
1765
+ const m2 = groups[4];
1766
+ if (user) {
1767
+ this.subcategory = "stories";
1768
+ this.highlightId = null;
1769
+ } else {
1770
+ this.subcategory = "highlights";
1771
+ this.highlightId = h1 ? `highlight:${h1}` : `highlight:${Buffer.from(h2 ?? "", "base64").toString("utf-8")}`;
1772
+ }
1773
+ this.mediaId = m1 ?? m2 ?? null;
1774
+ }
1775
+ static fromURL(url, opts) {
1776
+ const match = InstagramStoriesExtractor.pattern.exec(url);
1777
+ if (!match) return null;
1778
+ return new InstagramStoriesExtractor({
1779
+ ...opts,
1780
+ url,
1781
+ match
1782
+ });
1783
+ }
1784
+ async *posts() {
1785
+ const reelId = this.highlightId ? this.highlightId : await this.api.userId((this.groups[1] ?? "").toString());
1786
+ const reels = await this.api.reelsMedia([reelId]);
1787
+ if (!reels.length) return;
1788
+ if (this.mediaId) {
1789
+ const reel = reels[0];
1790
+ for (const item of reel.items ?? []) if (item.pk === this.mediaId) {
1791
+ reel.items = [item];
1792
+ break;
1793
+ }
1794
+ yield reel;
1795
+ return;
1796
+ }
1797
+ if (this._cfg("split", false)) {
1798
+ const reel = reels[0];
1799
+ for (const item of reel.items ?? []) {
1800
+ const copy = { ...reel };
1801
+ copy.items = [item];
1802
+ yield copy;
1803
+ }
1804
+ } else yield* reels;
1805
+ }
1806
+ };
1807
+ var InstagramHighlightsExtractor = class InstagramHighlightsExtractor extends InstagramExtractor {
1808
+ static subcategory = "highlights";
1809
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/highlights/);
1810
+ subcategory = InstagramHighlightsExtractor.subcategory;
1811
+ constructor(opts) {
1812
+ super(opts);
1813
+ }
1814
+ static fromURL(url, opts) {
1815
+ const match = InstagramHighlightsExtractor.pattern.exec(url);
1816
+ if (!match) return null;
1817
+ return new InstagramHighlightsExtractor({
1818
+ ...opts,
1819
+ url,
1820
+ match
1821
+ });
1822
+ }
1823
+ async *posts() {
1824
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1825
+ const uid = await this.api.userId(screenName);
1826
+ yield* this.api.highlightsMedia(uid);
1827
+ }
1828
+ };
1829
+ var InstagramTagExtractor = class InstagramTagExtractor extends InstagramExtractor {
1830
+ static subcategory = "tag";
1831
+ static pattern = re(BASE_RE, /\/explore\/tags\/([^/?#]+)/);
1832
+ subcategory = InstagramTagExtractor.subcategory;
1833
+ constructor(opts) {
1834
+ super(opts);
1835
+ }
1836
+ static fromURL(url, opts) {
1837
+ const match = InstagramTagExtractor.pattern.exec(url);
1838
+ if (!match) return null;
1839
+ return new InstagramTagExtractor({
1840
+ ...opts,
1841
+ url,
1842
+ match
1843
+ });
1844
+ }
1845
+ async metadata() {
1846
+ const tag = this.groups[0] ?? "";
1847
+ return { tag: decodeURIComponent(tag) };
1848
+ }
1849
+ async *posts() {
1850
+ const tag = this.groups[0] ?? "";
1851
+ yield* this.api.tagsMedia(decodeURIComponent(tag));
1852
+ }
1853
+ };
1854
+ var InstagramInfoExtractor = class InstagramInfoExtractor extends InstagramExtractor {
1855
+ static subcategory = "info";
1856
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/info/);
1857
+ subcategory = InstagramInfoExtractor.subcategory;
1858
+ constructor(opts) {
1859
+ super(opts);
1860
+ }
1861
+ static fromURL(url, opts) {
1862
+ const match = InstagramInfoExtractor.pattern.exec(url);
1863
+ if (!match) return null;
1864
+ return new InstagramInfoExtractor({
1865
+ ...opts,
1866
+ url,
1867
+ match
1868
+ });
1869
+ }
1870
+ async *items() {
1871
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1872
+ let user;
1873
+ if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
1874
+ else user = await this.api.userByScreenName(screenName);
1875
+ yield directory(user);
1876
+ }
1877
+ async *posts() {}
1878
+ };
1879
+ var InstagramAvatarExtractor = class InstagramAvatarExtractor extends InstagramExtractor {
1880
+ static subcategory = "avatar";
1881
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/avatar/);
1882
+ subcategory = InstagramAvatarExtractor.subcategory;
1883
+ constructor(opts) {
1884
+ super(opts);
1885
+ }
1886
+ static fromURL(url, opts) {
1887
+ const match = InstagramAvatarExtractor.pattern.exec(url);
1888
+ if (!match) return null;
1889
+ return new InstagramAvatarExtractor({
1890
+ ...opts,
1891
+ url,
1892
+ match
1893
+ });
1894
+ }
1895
+ async *posts() {
1896
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1897
+ let user;
1898
+ if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
1899
+ else user = await this.api.userByScreenName(screenName);
1900
+ const avatar = user.hd_profile_pic_url_info ?? user.hd_profile_pic_versions?.[user.hd_profile_pic_versions.length - 1] ?? {
1901
+ url: user.profile_pic_url ?? "",
1902
+ width: 0,
1903
+ height: 0
1904
+ };
1905
+ let pk = user.profile_pic_id?.split("_")[0];
1906
+ let code;
1907
+ if (pk) code = shortcodeFromId(pk);
1908
+ else {
1909
+ pk = `avatar:${user.pk}`;
1910
+ code = pk;
1911
+ }
1912
+ yield {
1913
+ pk,
1914
+ code,
1915
+ user,
1916
+ caption: null,
1917
+ like_count: 0,
1918
+ image_versions2: { candidates: [avatar] }
1919
+ };
1920
+ }
1921
+ };
1922
+ var InstagramSavedExtractor = class InstagramSavedExtractor extends InstagramExtractor {
1923
+ static subcategory = "saved";
1924
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/saved(?:\/all-posts)?\/?$/);
1925
+ subcategory = InstagramSavedExtractor.subcategory;
1926
+ constructor(opts) {
1927
+ super(opts);
1928
+ }
1929
+ static fromURL(url, opts) {
1930
+ const match = InstagramSavedExtractor.pattern.exec(url);
1931
+ if (!match) return null;
1932
+ return new InstagramSavedExtractor({
1933
+ ...opts,
1934
+ url,
1935
+ match
1936
+ });
1937
+ }
1938
+ async *posts() {
1939
+ yield* this.api.userSaved();
1940
+ }
1941
+ };
1942
+ //#endregion
1943
+ //#region src/sdk.ts
1944
+ var InstagramSDK = class {
1945
+ http;
1946
+ storage;
1947
+ log;
1948
+ config;
1949
+ _csrfToken;
1950
+ constructor(opts) {
1951
+ this.http = opts.http;
1952
+ this.storage = opts.storage ?? void 0;
1953
+ this.log = opts.log ?? noopLogger;
1954
+ this.config = new ConfigManager();
1955
+ this._csrfToken = opts.csrfToken ?? "";
1956
+ }
1957
+ /**
1958
+ * Extract messages from an Instagram URL without downloading.
1959
+ *
1960
+ * Returns an async generator yielding Directory / Url / Queue messages.
1961
+ * Each ``url`` message includes full metadata (post_id, username, dimensions, etc.).
1962
+ */
1963
+ async *extract(url) {
1964
+ const extractor = this._resolve(url);
1965
+ await extractor.initialize();
1966
+ yield* extractor;
1967
+ }
1968
+ /**
1969
+ * Download all media from an Instagram URL.
1970
+ *
1971
+ * Uses the built-in DownloadJob + Storage to save files to disk.
1972
+ * Requires ``storage`` to be set in constructor options.
1973
+ *
1974
+ * ```ts
1975
+ * const stats = await ig.download('https://www.instagram.com/p/.../', './my-downloads')
1976
+ * // → { posts: 1, files: 9, bytes: 4500000 }
1977
+ * ```
1978
+ */
1979
+ async download(url, outputDir = "./data") {
1980
+ const job = new DownloadJob(this._resolve(url));
1981
+ job.basePath = outputDir;
1982
+ await job.run();
1983
+ return {
1984
+ posts: job._postCount ?? 0,
1985
+ files: job._fileCount ?? 0,
1986
+ bytes: job._downloadedBytes ?? 0
1987
+ };
1988
+ }
1989
+ /** Resolve a URL to an Extractor instance via pattern matching. */
1990
+ _resolve(url) {
1991
+ for (const Cls of [
1992
+ InstagramPostExtractor,
1993
+ InstagramStoriesExtractor,
1994
+ InstagramHighlightsExtractor,
1995
+ InstagramTagExtractor,
1996
+ InstagramSavedExtractor,
1997
+ InstagramPostsExtractor,
1998
+ InstagramReelsExtractor,
1999
+ InstagramTaggedExtractor,
2000
+ InstagramInfoExtractor,
2001
+ InstagramAvatarExtractor,
2002
+ InstagramUserExtractor
2003
+ ]) {
2004
+ const match = Cls.pattern.exec(url);
2005
+ if (match) return Reflect.construct(Cls, [{
2006
+ url,
2007
+ match,
2008
+ config: this.config,
2009
+ http: this.http,
2010
+ storage: this.storage,
2011
+ log: this.log,
2012
+ csrfToken: this._csrfToken
2013
+ }]);
2014
+ }
2015
+ throw new Error(`No extractor matched URL: ${url}. Supported: /p/, /reel/, /{user}/, /stories/, /highlights/, /explore/tags/, /saved/`);
2016
+ }
2017
+ };
2018
+ //#endregion
2019
+ Object.defineProperty(exports, "ConfigManager", {
2020
+ enumerable: true,
2021
+ get: function() {
2022
+ return ConfigManager;
2023
+ }
2024
+ });
2025
+ Object.defineProperty(exports, "DownloadJob", {
2026
+ enumerable: true,
2027
+ get: function() {
2028
+ return DownloadJob;
2029
+ }
2030
+ });
2031
+ Object.defineProperty(exports, "Extractor", {
2032
+ enumerable: true,
2033
+ get: function() {
2034
+ return Extractor;
2035
+ }
2036
+ });
2037
+ Object.defineProperty(exports, "InstagramAvatarExtractor", {
2038
+ enumerable: true,
2039
+ get: function() {
2040
+ return InstagramAvatarExtractor;
2041
+ }
2042
+ });
2043
+ Object.defineProperty(exports, "InstagramExtractor", {
2044
+ enumerable: true,
2045
+ get: function() {
2046
+ return InstagramExtractor;
2047
+ }
2048
+ });
2049
+ Object.defineProperty(exports, "InstagramHighlightsExtractor", {
2050
+ enumerable: true,
2051
+ get: function() {
2052
+ return InstagramHighlightsExtractor;
2053
+ }
2054
+ });
2055
+ Object.defineProperty(exports, "InstagramInfoExtractor", {
2056
+ enumerable: true,
2057
+ get: function() {
2058
+ return InstagramInfoExtractor;
2059
+ }
2060
+ });
2061
+ Object.defineProperty(exports, "InstagramPostExtractor", {
2062
+ enumerable: true,
2063
+ get: function() {
2064
+ return InstagramPostExtractor;
2065
+ }
2066
+ });
2067
+ Object.defineProperty(exports, "InstagramPostsExtractor", {
2068
+ enumerable: true,
2069
+ get: function() {
2070
+ return InstagramPostsExtractor;
2071
+ }
2072
+ });
2073
+ Object.defineProperty(exports, "InstagramReelsExtractor", {
2074
+ enumerable: true,
2075
+ get: function() {
2076
+ return InstagramReelsExtractor;
2077
+ }
2078
+ });
2079
+ Object.defineProperty(exports, "InstagramRestAPI", {
2080
+ enumerable: true,
2081
+ get: function() {
2082
+ return InstagramRestAPI;
2083
+ }
2084
+ });
2085
+ Object.defineProperty(exports, "InstagramSDK", {
2086
+ enumerable: true,
2087
+ get: function() {
2088
+ return InstagramSDK;
2089
+ }
2090
+ });
2091
+ Object.defineProperty(exports, "InstagramSavedExtractor", {
2092
+ enumerable: true,
2093
+ get: function() {
2094
+ return InstagramSavedExtractor;
2095
+ }
2096
+ });
2097
+ Object.defineProperty(exports, "InstagramStoriesExtractor", {
2098
+ enumerable: true,
2099
+ get: function() {
2100
+ return InstagramStoriesExtractor;
2101
+ }
2102
+ });
2103
+ Object.defineProperty(exports, "InstagramTagExtractor", {
2104
+ enumerable: true,
2105
+ get: function() {
2106
+ return InstagramTagExtractor;
2107
+ }
2108
+ });
2109
+ Object.defineProperty(exports, "InstagramTaggedExtractor", {
2110
+ enumerable: true,
2111
+ get: function() {
2112
+ return InstagramTaggedExtractor;
2113
+ }
2114
+ });
2115
+ Object.defineProperty(exports, "InstagramUserExtractor", {
2116
+ enumerable: true,
2117
+ get: function() {
2118
+ return InstagramUserExtractor;
2119
+ }
2120
+ });
2121
+ Object.defineProperty(exports, "Job", {
2122
+ enumerable: true,
2123
+ get: function() {
2124
+ return Job;
2125
+ }
2126
+ });
2127
+ Object.defineProperty(exports, "PrintJob", {
2128
+ enumerable: true,
2129
+ get: function() {
2130
+ return PrintJob;
2131
+ }
2132
+ });
2133
+ Object.defineProperty(exports, "directory", {
2134
+ enumerable: true,
2135
+ get: function() {
2136
+ return directory;
2137
+ }
2138
+ });
2139
+ Object.defineProperty(exports, "ensureHttpScheme", {
2140
+ enumerable: true,
2141
+ get: function() {
2142
+ return ensureHttpScheme;
2143
+ }
2144
+ });
2145
+ Object.defineProperty(exports, "extr", {
2146
+ enumerable: true,
2147
+ get: function() {
2148
+ return extr;
2149
+ }
2150
+ });
2151
+ Object.defineProperty(exports, "extract", {
2152
+ enumerable: true,
2153
+ get: function() {
2154
+ return extract;
2155
+ }
2156
+ });
2157
+ Object.defineProperty(exports, "extractAudio", {
2158
+ enumerable: true,
2159
+ get: function() {
2160
+ return extractAudio;
2161
+ }
2162
+ });
2163
+ Object.defineProperty(exports, "extractTaggedUsers", {
2164
+ enumerable: true,
2165
+ get: function() {
2166
+ return extractTaggedUsers;
2167
+ }
2168
+ });
2169
+ Object.defineProperty(exports, "findTags", {
2170
+ enumerable: true,
2171
+ get: function() {
2172
+ return findTags;
2173
+ }
2174
+ });
2175
+ Object.defineProperty(exports, "idFromShortcode", {
2176
+ enumerable: true,
2177
+ get: function() {
2178
+ return idFromShortcode;
2179
+ }
2180
+ });
2181
+ Object.defineProperty(exports, "nameExtFromURL", {
2182
+ enumerable: true,
2183
+ get: function() {
2184
+ return nameExtFromURL;
2185
+ }
2186
+ });
2187
+ Object.defineProperty(exports, "noopLogger", {
2188
+ enumerable: true,
2189
+ get: function() {
2190
+ return noopLogger;
2191
+ }
2192
+ });
2193
+ Object.defineProperty(exports, "parseInt", {
2194
+ enumerable: true,
2195
+ get: function() {
2196
+ return parseInt;
2197
+ }
2198
+ });
2199
+ Object.defineProperty(exports, "parsePostGraphql", {
2200
+ enumerable: true,
2201
+ get: function() {
2202
+ return parsePostGraphql;
2203
+ }
2204
+ });
2205
+ Object.defineProperty(exports, "parsePostRest", {
2206
+ enumerable: true,
2207
+ get: function() {
2208
+ return parsePostRest;
2209
+ }
2210
+ });
2211
+ Object.defineProperty(exports, "parseUnicodeEscapes", {
2212
+ enumerable: true,
2213
+ get: function() {
2214
+ return parseUnicodeEscapes$1;
2215
+ }
2216
+ });
2217
+ Object.defineProperty(exports, "queue", {
2218
+ enumerable: true,
2219
+ get: function() {
2220
+ return queue;
2221
+ }
2222
+ });
2223
+ Object.defineProperty(exports, "shortcodeFromId", {
2224
+ enumerable: true,
2225
+ get: function() {
2226
+ return shortcodeFromId;
2227
+ }
2228
+ });
2229
+ Object.defineProperty(exports, "unescape", {
2230
+ enumerable: true,
2231
+ get: function() {
2232
+ return unescape;
2233
+ }
2234
+ });
2235
+ Object.defineProperty(exports, "unquote", {
2236
+ enumerable: true,
2237
+ get: function() {
2238
+ return unquote;
2239
+ }
2240
+ });
2241
+ Object.defineProperty(exports, "url", {
2242
+ enumerable: true,
2243
+ get: function() {
2244
+ return url;
2245
+ }
2246
+ });