@chilfish/gallery-dl-instagram 0.1.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,126 +1,4 @@
1
- //#region core/extractor.ts
2
- /** A no-op logger */
3
- const noopLogger = {
4
- debug: () => {},
5
- info: () => {},
6
- warn: () => {},
7
- error: () => {}
8
- };
9
- var Extractor = class {
10
- /** Regex pattern to match against URLs */
11
- static pattern = /^$/;
12
- /** The input URL */
13
- url;
14
- /** Regex match groups from ``fromURL`` */
15
- groups;
16
- config;
17
- /** HTTP client — public so Job can access for downloads */
18
- http;
19
- /** Storage backend — public so Job can access for writes */
20
- storage;
21
- /** Logger instance — public so Job can access for reporting */
22
- log;
23
- /** Delay range in seconds — random between [min, max] before each request */
24
- requestInterval = [6, 12];
25
- _initialized = false;
26
- constructor(opts) {
27
- this.url = opts.url;
28
- this.groups = opts.match ? [...opts.match].slice(1) : [];
29
- this.config = opts.config;
30
- this.http = opts.http;
31
- this.storage = opts.storage;
32
- this.log = opts.log;
33
- }
34
- /** Initialization */
35
- /**
36
- * One-time async setup (cookies, session, internal state).
37
- * Safe to call multiple times — after the first call it becomes a no-op.
38
- */
39
- async initialize() {
40
- if (this._initialized) return;
41
- await this._init();
42
- this._initialized = true;
43
- this.initialize = async () => {};
44
- }
45
- /**
46
- * Subclass hook for one-time setup.
47
- */
48
- async _init() {}
49
- /** Async iteration */
50
- async *[Symbol.asyncIterator]() {
51
- await this.initialize();
52
- yield* this.items();
53
- }
54
- /** Config helpers */
55
- /**
56
- * Read a config value using the interpolated hierarchy.
57
- */
58
- _cfg(key, defaultVal) {
59
- const path = [
60
- "extractor",
61
- this.category,
62
- this.subcategory
63
- ];
64
- return this.config.interpolate(path, key, defaultVal);
65
- }
66
- /** HTTP */
67
- _lastRequestTime = 0;
68
- /**
69
- * Rate-limited HTTP request wrapper.
70
- */
71
- async request(url, cfg = {}) {
72
- await this._throttle();
73
- const response = await this.http.request({
74
- url,
75
- ...cfg
76
- });
77
- this._lastRequestTime = Date.now();
78
- return response;
79
- }
80
- /**
81
- * Convenience: request + parse JSON body.
82
- */
83
- async requestJSON(url, cfg = {}) {
84
- const resp = await this.request(url, cfg);
85
- if (typeof resp.data === "object") return resp.data;
86
- try {
87
- return JSON.parse(resp.data);
88
- } catch {
89
- return {};
90
- }
91
- }
92
- /** Rate limiting */
93
- /**
94
- * Sleep long enough to keep the minimum interval between requests.
95
- */
96
- async _throttle() {
97
- const elapsed = Date.now() - this._lastRequestTime;
98
- const [min, max] = this.requestInterval;
99
- const target = min + Math.random() * (max - min);
100
- const waitMs = Math.max(0, target * 1e3 - elapsed);
101
- if (waitMs > 0) await new Promise((r) => setTimeout(r, waitMs));
102
- }
103
- /** Utility */
104
- /**
105
- * Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
106
- */
107
- parseTimestamp(ts) {
108
- if (ts == null) return "";
109
- const asMs = ts > 25e8 ? ts : ts * 1e3;
110
- return new Date(asMs).toISOString();
111
- }
112
- /**
113
- * Generate a random hex token (used for CSRF).
114
- */
115
- static generateToken(size = 16) {
116
- const bytes = new Uint8Array(size);
117
- if (typeof crypto !== "undefined" && crypto.getRandomValues) crypto.getRandomValues(bytes);
118
- else for (let i = 0; i < size; i++) bytes[i] = Math.floor(Math.random() * 256);
119
- return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
120
- }
121
- };
122
- //#endregion
123
- //#region config.ts
1
+ //#region src/config.ts
124
2
  var ConfigManager = class {
125
3
  data;
126
4
  constructor(data = {}) {
@@ -174,7 +52,8 @@ var ConfigManager = class {
174
52
  }
175
53
  };
176
54
  //#endregion
177
- //#region core/job.ts
55
+ //#region src/core/format.ts
56
+ /** Shared ANSI formatting and display utilities. */
178
57
  function formatBytes(bytes) {
179
58
  if (bytes === 0) return "0 B";
180
59
  const units = [
@@ -204,19 +83,20 @@ function c(s) {
204
83
  function g(s) {
205
84
  return `${GREEN}${s}${RESET}`;
206
85
  }
86
+ const _YELLOW = YELLOW;
87
+ const _RESET = RESET;
207
88
  function pad(s, n) {
208
89
  return s.length >= n ? s : s + " ".repeat(n - s.length);
209
90
  }
91
+ //#endregion
92
+ //#region src/core/job.ts
210
93
  var Job = class {
211
94
  extractor;
212
95
  status = 0;
213
96
  constructor(extractor) {
214
97
  this.extractor = extractor;
215
98
  }
216
- /**
217
- * Main entry point. Calls ``extractor[Symbol.asyncIterator]()`` and
218
- * dispatches every yielded message.
219
- */
99
+ /** Main entry point. Dispatches every yielded message. */
220
100
  async run() {
221
101
  this.extractor.log.info(`Starting ${this.extractor.category}/${this.extractor.subcategory} — ${this.extractor.url}`);
222
102
  await this.extractor.initialize();
@@ -237,6 +117,8 @@ var Job = class {
237
117
  /** Override in subclasses to print a summary. */
238
118
  _report() {}
239
119
  };
120
+ //#endregion
121
+ //#region src/core/download-job.ts
240
122
  var DownloadJob = class DownloadJob extends Job {
241
123
  /** Base output directory (prepended to all paths). */
242
124
  basePath = "";
@@ -244,10 +126,6 @@ var DownloadJob = class DownloadJob extends Job {
244
126
  _currentDir = {};
245
127
  /** In-memory archive keyed by archive format. */
246
128
  archive = /* @__PURE__ */ new Map();
247
- /**
248
- * Registry of per-category "archive formats" — the key is formed
249
- * by interpolating this format string over the metadata.
250
- */
251
129
  _archiveFmts = /* @__PURE__ */ new Map();
252
130
  _postCount = 0;
253
131
  _fileCount = 0;
@@ -256,23 +134,18 @@ var DownloadJob = class DownloadJob extends Job {
256
134
  registerArchive(category, format) {
257
135
  this._archiveFmts.set(category, format);
258
136
  }
259
- /** Simple format-string interpolation for archive keys. */
260
137
  _interp(fmt, meta) {
261
138
  return fmt.replace(/\{(\w+)\}/g, (_, key) => {
262
139
  const v = meta[key];
263
140
  return v == null ? "" : String(v);
264
141
  });
265
142
  }
266
- /** Check whether this URL has already been downloaded (and skip). */
267
143
  _isArchived(meta) {
268
144
  const cat = meta.category ?? this.extractor.category;
269
145
  const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
270
146
  const key = this._interp(fmt, meta);
271
- const set = this.archive.get(cat);
272
- if (set && set.has(key)) return true;
273
- return false;
147
+ return !!this.archive.get(cat)?.has(key);
274
148
  }
275
- /** Mark a post/media as archived. */
276
149
  _archive(meta) {
277
150
  const cat = meta.category ?? this.extractor.category;
278
151
  const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
@@ -284,7 +157,6 @@ var DownloadJob = class DownloadJob extends Job {
284
157
  }
285
158
  set.add(key);
286
159
  }
287
- /** Handlers */
288
160
  async handleDirectory(msg) {
289
161
  this._currentDir = { ...msg.metadata };
290
162
  this._postCount++;
@@ -313,7 +185,6 @@ var DownloadJob = class DownloadJob extends Job {
313
185
  if (resp.data instanceof Uint8Array) data = resp.data;
314
186
  else if (resp.data instanceof ArrayBuffer) data = new Uint8Array(resp.data);
315
187
  else if (typeof resp.data === "string") data = resp.data;
316
- else if (typeof resp.data === "object" && resp.data != null && "type" in resp.data && resp.data.type === "Buffer") data = new Uint8Array(resp.data);
317
188
  else data = JSON.stringify(resp.data);
318
189
  await this.extractor.storage.write(fullPath, data);
319
190
  this._fileCount++;
@@ -357,13 +228,11 @@ var DownloadJob = class DownloadJob extends Job {
357
228
  else this.archive.set(cat, set);
358
229
  }
359
230
  }
360
- /** Report */
361
231
  _report() {
362
232
  const log = this.extractor.log;
363
233
  log.info(`Done — ${this._postCount} post(s), ${this._fileCount} file(s) downloaded (${formatBytes(this._downloadedBytes)})`);
364
234
  if (this._skippedCount > 0) log.info(` ${this._skippedCount} file(s) skipped (already archived)`);
365
235
  }
366
- /** Path builders */
367
236
  _buildDirPath(meta) {
368
237
  return `${meta.category ?? this.extractor.category}/${meta.username ?? "_"}`;
369
238
  }
@@ -373,166 +242,133 @@ var DownloadJob = class DownloadJob extends Job {
373
242
  return `${mid}${meta.num ? `_${meta.num}` : ""}.${ext}`;
374
243
  }
375
244
  };
376
- var PrintJob = class PrintJob extends Job {
377
- _currentDir = {};
378
- _files = [];
379
- _postCount = 0;
380
- _fileCount = 0;
381
- _width;
382
- constructor(extractor) {
383
- super(extractor);
384
- this._width = Math.min(process.stdout.columns ?? 80, 100);
245
+ //#endregion
246
+ //#region src/core/extractor.ts
247
+ /** A no-op logger */
248
+ const noopLogger = {
249
+ debug: () => {},
250
+ info: () => {},
251
+ warn: () => {},
252
+ error: () => {}
253
+ };
254
+ var Extractor = class {
255
+ /** Regex pattern to match against URLs */
256
+ static pattern = /^$/;
257
+ /** The input URL */
258
+ url;
259
+ /** Regex match groups from ``fromURL`` */
260
+ groups;
261
+ config;
262
+ /** HTTP client — public so Job can access for downloads */
263
+ http;
264
+ /** Storage backend — public so Job can access for writes */
265
+ storage;
266
+ /** Logger instance — public so Job can access for reporting */
267
+ log;
268
+ /** Delay range in seconds — random between [min, max] before each request */
269
+ requestInterval = [6, 12];
270
+ _initialized = false;
271
+ constructor(opts) {
272
+ this.url = opts.url;
273
+ this.groups = opts.match ? [...opts.match].slice(1) : [];
274
+ this.config = opts.config;
275
+ this.http = opts.http;
276
+ this.storage = opts.storage;
277
+ this.log = opts.log;
385
278
  }
386
- async handleDirectory(msg) {
387
- if (this._postCount > 0) this._flushPost();
388
- this._currentDir = { ...msg.metadata };
389
- this._postCount++;
390
- this._files = [];
279
+ /** Initialization */
280
+ /**
281
+ * One-time async setup (cookies, session, internal state).
282
+ * Safe to call multiple times — after the first call it becomes a no-op.
283
+ */
284
+ async initialize() {
285
+ if (this._initialized) return;
286
+ await this._init();
287
+ this._initialized = true;
288
+ this.initialize = async () => {};
391
289
  }
392
- async handleUrl(msg) {
393
- const meta = {
394
- ...this._currentDir,
395
- ...msg.metadata
396
- };
397
- this._fileCount++;
398
- const ext = meta.extension ?? "jpg";
399
- const mid = meta.media_id ?? "?";
400
- this._files.push({
401
- num: meta.num ?? this._files.length + 1,
402
- filename: `${mid}.${ext}`,
403
- width: meta.width ?? 0,
404
- height: meta.height ?? 0,
405
- videoUrl: meta.video_url ?? null,
406
- audioUrl: meta.audio_url ?? null
290
+ /**
291
+ * Subclass hook for one-time setup.
292
+ */
293
+ async _init() {}
294
+ /** Async iteration */
295
+ async *[Symbol.asyncIterator]() {
296
+ await this.initialize();
297
+ yield* this.items();
298
+ }
299
+ /** Config helpers */
300
+ /**
301
+ * Read a config value using the interpolated hierarchy.
302
+ */
303
+ _cfg(key, defaultVal) {
304
+ const path = [
305
+ "extractor",
306
+ this.category,
307
+ this.subcategory
308
+ ];
309
+ return this.config.interpolate(path, key, defaultVal);
310
+ }
311
+ /** HTTP */
312
+ _lastRequestTime = 0;
313
+ /**
314
+ * Rate-limited HTTP request wrapper.
315
+ */
316
+ async request(url, cfg = {}) {
317
+ await this._throttle();
318
+ const response = await this.http.request({
319
+ url,
320
+ ...cfg
407
321
  });
322
+ this._lastRequestTime = Date.now();
323
+ return response;
408
324
  }
409
- async handleQueue(msg) {
410
- if (this._files.length > 0 || this._postCount > 0) this._flushPost();
411
- this._postCount = 0;
412
- this._files = [];
413
- const extrClass = {
414
- ...this._currentDir,
415
- ...msg.metadata
416
- }._extractor;
417
- if (!extrClass || typeof extrClass !== "object") return;
418
- const cls = extrClass;
419
- const match = cls.pattern.exec(msg.url);
420
- if (!match) return;
421
- const parentExtr = this.extractor;
422
- const childJob = new PrintJob(Reflect.construct(cls, [{
423
- url: msg.url,
424
- match,
425
- config: parentExtr.config,
426
- http: parentExtr.http,
427
- storage: parentExtr.storage,
428
- log: parentExtr.log
429
- }]));
430
- const childStatus = await childJob.run();
431
- this.status |= childStatus;
432
- this._postCount += childJob._postCount;
433
- this._fileCount += childJob._fileCount;
434
- }
435
- /** Output */
436
- _flushPost() {
437
- const m = this._currentDir;
438
- if (Object.keys(m).length === 0) return;
439
- const w = this._width;
440
- const labelW = 14;
441
- const shortcode = m.post_shortcode ?? "?";
442
- const header = ` Post #${this._postCount}: ${shortcode} `;
443
- const padTotal = w - 2 - header.length;
444
- const padL = Math.floor(padTotal / 2);
445
- const padR = padTotal - padL;
446
- process.stdout.write(`\n${dim("┌")}${"─".repeat(padL)}${b(header)}${"─".repeat(padR)}${dim("┐")}\n`);
447
- const row = (label, value, color) => {
448
- const colored = typeof color === "function" ? color(value) : color ? `${color}${value}${RESET}` : value;
449
- process.stdout.write(` ${dim("│")} ${c(pad(label, labelW))} ${colored}\n`);
450
- };
451
- const username = m.username ?? "?";
452
- const fullname = m.fullname ?? "";
453
- row("Author:", fullname ? `${username} (${fullname})` : username, g);
454
- row("Date:", m.date ?? m.post_date ?? "?");
455
- row("Likes:", `${typeof m.likes === "number" ? m.likes.toLocaleString() : "?"} | Liked: ${m.liked ? "yes" : "no"}`);
456
- row("Type:", `${m.type ?? "?"} (${this._files.length} files)`);
457
- row("URL:", m.post_url ?? "?");
458
- const desc = m.description ?? "";
459
- if (desc) {
460
- process.stdout.write(` ${dim("│")}\n`);
461
- process.stdout.write(` ${dim("│")} ${b("Description:")}\n`);
462
- const lines = desc.split("\n");
463
- for (const line of lines) {
464
- const wrapped = this._wrap(line, w - 8);
465
- for (const wl of wrapped) process.stdout.write(` ${dim("│")} ${dim(wl)}\n`);
466
- }
467
- }
468
- const tags = m.tags;
469
- if (tags && tags.length > 0) {
470
- process.stdout.write(` ${dim("│")}\n`);
471
- process.stdout.write(` ${dim("│")} ${b("Tags:")} ${dim(tags.map((t) => `#${t}`).join(" "))}\n`);
472
- }
473
- const locName = m.location_slug ?? "";
474
- const locId = m.location_id ?? "";
475
- if (locName || locId) row("Location:", locId ? `${locName} (ID: ${locId})` : locName);
476
- const coauthors = m.coauthors;
477
- if (coauthors && coauthors.length > 0) row("Co-authors:", coauthors.map((c) => c.full_name ? `${c.username} (${c.full_name})` : c.username).join(", "));
478
- const pinned = m.pinned;
479
- if (pinned && pinned.length > 0) row("Pinned:", pinned.join(", "));
480
- const expires = m.expires;
481
- if (expires) row("Expires:", expires, YELLOW);
482
- const hlTitle = m.highlight_title;
483
- if (hlTitle) row("Highlight:", hlTitle);
484
- const taggedUser = m.tagged_username ?? "";
485
- if (taggedUser) {
486
- const taggedFull = m.tagged_full_name ?? "";
487
- row("Tagged by:", taggedFull ? `${taggedUser} (${taggedFull})` : taggedUser);
488
- }
489
- if (this._files.length > 0) {
490
- process.stdout.write(` ${dim("│")}\n`);
491
- process.stdout.write(` ${dim("│")} ${b(`Media (${this._files.length} files):`)}\n`);
492
- const maxNumW = String(this._files.length).length;
493
- const maxFileW = Math.max(...this._files.map((f) => f.filename.length));
494
- const dimW = Math.min(maxFileW, 40);
495
- for (const f of this._files) {
496
- const numStr = `[${String(f.num).padStart(maxNumW)}]`;
497
- const dimStr = f.filename.length > 40 ? `${f.filename.slice(0, 37)}...` : pad(f.filename, dimW);
498
- const res = f.width ? `${f.width}x${f.height}` : "?x?";
499
- const badges = [];
500
- if (f.videoUrl) badges.push("video");
501
- if (f.audioUrl) badges.push("audio");
502
- let line = ` ${dim("│")} ${g(numStr)} ${dimStr} ${res}`;
503
- if (badges.length > 0) line += ` ${YELLOW}(${badges.join("+")})${RESET}`;
504
- process.stdout.write(`${line}\n`);
505
- }
506
- }
507
- process.stdout.write(` ${dim("└")}${"─".repeat(w - 2)}${dim("┘")}\n`);
508
- }
509
- _wrap(text, maxLen) {
510
- if (text.length <= maxLen) return [text];
511
- const lines = [];
512
- let remaining = text;
513
- while (remaining.length > maxLen) {
514
- let cut = maxLen;
515
- while (cut > 0 && remaining[cut] !== " ") cut--;
516
- if (cut === 0) cut = maxLen;
517
- lines.push(remaining.slice(0, cut).trimEnd());
518
- remaining = remaining.slice(cut).trimStart();
325
+ /**
326
+ * Convenience: request + parse JSON body.
327
+ */
328
+ async requestJSON(url, cfg = {}) {
329
+ const resp = await this.request(url, cfg);
330
+ if (typeof resp.data === "object") return resp.data;
331
+ try {
332
+ return JSON.parse(resp.data);
333
+ } catch {
334
+ return {};
519
335
  }
520
- if (remaining) lines.push(remaining);
521
- return lines;
522
- }
523
- _report() {
524
- this._flushPost();
525
- process.stdout.write(`\n${dim("──")} ${b("Summary")} ${dim("───")}\n`);
526
- process.stdout.write(` Posts: ${g(String(this._postCount))}\n`);
527
- process.stdout.write(` Files: ${g(String(this._fileCount))}\n`);
528
- process.stdout.write(`\n`);
529
336
  }
530
- };
531
- //#endregion
532
- //#region utils/id-codec.ts
533
- /**
534
- * Instagram-style Base64-variant ID ↔ shortcode conversion.
535
- */
337
+ /** Rate limiting */
338
+ /**
339
+ * Sleep long enough to keep the minimum interval between requests.
340
+ */
341
+ async _throttle() {
342
+ const elapsed = Date.now() - this._lastRequestTime;
343
+ const [min, max] = this.requestInterval;
344
+ const target = min + Math.random() * (max - min);
345
+ const waitMs = Math.max(0, target * 1e3 - elapsed);
346
+ if (waitMs > 0) await new Promise((r) => setTimeout(r, waitMs));
347
+ }
348
+ /** Utility */
349
+ /**
350
+ * Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
351
+ */
352
+ parseTimestamp(ts) {
353
+ if (ts == null) return "";
354
+ const asMs = ts > 25e8 ? ts : ts * 1e3;
355
+ return new Date(asMs).toISOString();
356
+ }
357
+ /**
358
+ * Generate a random hex token (used for CSRF).
359
+ */
360
+ static generateToken(size = 16) {
361
+ const bytes = new Uint8Array(size);
362
+ if (typeof crypto !== "undefined" && crypto.getRandomValues) crypto.getRandomValues(bytes);
363
+ else for (let i = 0; i < size; i++) bytes[i] = Math.floor(Math.random() * 256);
364
+ return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
365
+ }
366
+ };
367
+ //#endregion
368
+ //#region src/utils/id-codec.ts
369
+ /**
370
+ * Instagram-style Base64-variant ID ↔ shortcode conversion.
371
+ */
536
372
  const ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
537
373
  /** Pre-built index for O(1) character lookup during decode. */
538
374
  const CHAR_INDEX = {};
@@ -560,7 +396,160 @@ function shortcodeFromId(postId) {
560
396
  return chars.reverse().join("");
561
397
  }
562
398
  //#endregion
563
- //#region instagram/api.ts
399
+ //#region src/message.ts
400
+ function directory(metadata = {}) {
401
+ return {
402
+ type: "directory",
403
+ metadata
404
+ };
405
+ }
406
+ function url(u, metadata = {}) {
407
+ return {
408
+ type: "url",
409
+ url: u,
410
+ metadata
411
+ };
412
+ }
413
+ function queue(u, metadata = {}) {
414
+ return {
415
+ type: "queue",
416
+ url: u,
417
+ metadata
418
+ };
419
+ }
420
+ //#endregion
421
+ //#region src/utils/text.ts
422
+ /**
423
+ * Text utilities ported from gallery-dl's ``text`` module.
424
+ *
425
+ * All functions are pure and environment-agnostic.
426
+ */
427
+ /** String extraction */
428
+ /**
429
+ * Extract the substring between ``begin`` and ``end`` from ``txt``.
430
+ * Returns the substring or ``null`` if either delimiter is missing.
431
+ */
432
+ function extract(txt, begin, end) {
433
+ const first = txt.indexOf(begin);
434
+ if (first < 0) return null;
435
+ const start = first + begin.length;
436
+ const last = txt.indexOf(end, start);
437
+ if (last < 0) return null;
438
+ return txt.slice(start, last);
439
+ }
440
+ /**
441
+ * Shorthand: same as ``extract`` but returns ``default_`` on failure.
442
+ * Mirrors the Python ``extr()`` function.
443
+ */
444
+ function extr(txt, begin, end, default_ = "") {
445
+ return extract(txt, begin, end) ?? default_;
446
+ }
447
+ /** Unicode / HTML */
448
+ /**
449
+ * Decode ``\\uXXXX`` escape sequences in a string.
450
+ */
451
+ function parseUnicodeEscapes$1(text) {
452
+ if (!text.includes("\\u")) return text;
453
+ return text.replace(/\\u([0-9a-fA-F]{4})/g, (_m, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
454
+ }
455
+ /**
456
+ * HTML entity decode.
457
+ *
458
+ * In Node.js we could use a DOM parser, but since this library is
459
+ * environment-agnostic we ship a minimal covering the common cases.
460
+ */
461
+ const HTML_ENTITIES = {
462
+ "amp": "&",
463
+ "lt": "<",
464
+ "gt": ">",
465
+ "quot": "\"",
466
+ "apos": "'",
467
+ "nbsp": "\xA0",
468
+ "#x27": "'",
469
+ "#x2F": "/",
470
+ "#39": "'",
471
+ "#47": "/"
472
+ };
473
+ const RE_ENTITY = /&([^;]+);/g;
474
+ function unescape(text) {
475
+ return text.replace(RE_ENTITY, (m, name) => {
476
+ const ch = HTML_ENTITIES[name];
477
+ if (ch !== void 0) return ch;
478
+ if (name.startsWith("#")) {
479
+ const cp = name[1] === "x" || name[1] === "X" ? Number.parseInt(name.slice(2), 16) : Number.parseInt(name.slice(1), 10);
480
+ if (Number.isSafeInteger(cp)) return String.fromCodePoint(cp);
481
+ }
482
+ return m;
483
+ });
484
+ }
485
+ /** URL helpers */
486
+ /**
487
+ * URL-decode a string.
488
+ */
489
+ function unquote(text) {
490
+ try {
491
+ return decodeURIComponent(text);
492
+ } catch {
493
+ return text.replace(/%[0-9a-f]{2}/gi, (m) => {
494
+ try {
495
+ return decodeURIComponent(m);
496
+ } catch {
497
+ return m;
498
+ }
499
+ });
500
+ }
501
+ }
502
+ /**
503
+ * Ensure a URL starts with ``https://`` (or ``http://``).
504
+ */
505
+ function ensureHttpScheme(url, scheme = "https://") {
506
+ if (!url) return url;
507
+ if (url.startsWith("https://") || url.startsWith("http://")) return url;
508
+ return scheme + url.replace(/^[/:]+/, "");
509
+ }
510
+ /**
511
+ * Extract filename + extension from a URL and write into ``meta``.
512
+ */
513
+ function nameExtFromURL(url, meta) {
514
+ const filename = filenameFromURL(url);
515
+ const dot = filename.lastIndexOf(".");
516
+ if (dot > 0 && filename.length - dot - 1 <= 16) {
517
+ meta.filename = unquote(filename.slice(0, dot));
518
+ meta.extension = unquote(filename.slice(dot + 1)).toLowerCase();
519
+ } else {
520
+ meta.filename = unquote(filename);
521
+ meta.extension = "";
522
+ }
523
+ }
524
+ /**
525
+ * Extract the file-name portion of a URL (before query string).
526
+ */
527
+ function filenameFromURL(url) {
528
+ try {
529
+ return url.split("?")[0].split("/").pop() ?? "";
530
+ } catch {
531
+ return "";
532
+ }
533
+ }
534
+ /**
535
+ * Parse an integer from a possibly-null value. Returns ``default_`` on failure.
536
+ */
537
+ function parseInt(value, default_ = 0) {
538
+ if (value == null) return default_;
539
+ const n = typeof value === "number" ? value : Number.parseInt(String(value), 10);
540
+ return Number.isFinite(n) ? n : default_;
541
+ }
542
+ function tagRe(pattern) {
543
+ const re = new RegExp(pattern, "g");
544
+ return (text) => {
545
+ const matches = text.match(re);
546
+ return matches ? [...new Set(matches)] : [];
547
+ };
548
+ }
549
+ /** Pre-configured hashtag regex. */
550
+ const findTags = tagRe("#\\w+");
551
+ //#endregion
552
+ //#region src/instagram/api.ts
564
553
  const APP_ID = "936619743392459";
565
554
  const ASBD_ID = "129477";
566
555
  var InstagramRestAPI = class {
@@ -900,192 +889,38 @@ var InstagramRestAPI = class {
900
889
  }
901
890
  };
902
891
  //#endregion
903
- //#region message.ts
904
- function directory(metadata = {}) {
905
- return {
906
- type: "directory",
907
- metadata
908
- };
909
- }
910
- function url(u, metadata = {}) {
911
- return {
912
- type: "url",
913
- url: u,
914
- metadata
915
- };
916
- }
917
- function queue(u, metadata = {}) {
918
- return {
919
- type: "queue",
920
- url: u,
921
- metadata
922
- };
923
- }
924
- //#endregion
925
- //#region utils/text.ts
926
- /**
927
- * Text utilities ported from gallery-dl's ``text`` module.
928
- *
929
- * All functions are pure and environment-agnostic.
930
- */
931
- /** String extraction */
932
- /**
933
- * Extract the substring between ``begin`` and ``end`` from ``txt``.
934
- * Returns the substring or ``null`` if either delimiter is missing.
935
- */
936
- function extract(txt, begin, end) {
937
- const first = txt.indexOf(begin);
938
- if (first < 0) return null;
939
- const start = first + begin.length;
940
- const last = txt.indexOf(end, start);
941
- if (last < 0) return null;
942
- return txt.slice(start, last);
943
- }
944
- /**
945
- * Shorthand: same as ``extract`` but returns ``default_`` on failure.
946
- * Mirrors the Python ``extr()`` function.
947
- */
948
- function extr(txt, begin, end, default_ = "") {
949
- return extract(txt, begin, end) ?? default_;
950
- }
951
- /** Unicode / HTML */
952
- /**
953
- * Decode ``\\uXXXX`` escape sequences in a string.
954
- */
955
- function parseUnicodeEscapes$1(text) {
956
- if (!text.includes("\\u")) return text;
957
- return text.replace(/\\u([0-9a-fA-F]{4})/g, (_m, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
958
- }
959
- /**
960
- * HTML entity decode.
961
- *
962
- * In Node.js we could use a DOM parser, but since this library is
963
- * environment-agnostic we ship a minimal covering the common cases.
964
- */
965
- const HTML_ENTITIES = {
966
- "amp": "&",
967
- "lt": "<",
968
- "gt": ">",
969
- "quot": "\"",
970
- "apos": "'",
971
- "nbsp": "\xA0",
972
- "#x27": "'",
973
- "#x2F": "/",
974
- "#39": "'",
975
- "#47": "/"
976
- };
977
- const RE_ENTITY = /&([^;]+);/g;
978
- function unescape(text) {
979
- return text.replace(RE_ENTITY, (m, name) => {
980
- const ch = HTML_ENTITIES[name];
981
- if (ch !== void 0) return ch;
982
- if (name.startsWith("#")) {
983
- const cp = name[1] === "x" || name[1] === "X" ? Number.parseInt(name.slice(2), 16) : Number.parseInt(name.slice(1), 10);
984
- if (Number.isSafeInteger(cp)) return String.fromCodePoint(cp);
985
- }
986
- return m;
987
- });
988
- }
989
- /** URL helpers */
990
- /**
991
- * URL-decode a string.
992
- */
993
- function unquote(text) {
994
- try {
995
- return decodeURIComponent(text);
996
- } catch {
997
- return text.replace(/%[0-9a-f]{2}/gi, (m) => {
998
- try {
999
- return decodeURIComponent(m);
1000
- } catch {
1001
- return m;
1002
- }
1003
- });
1004
- }
1005
- }
1006
- /**
1007
- * Ensure a URL starts with ``https://`` (or ``http://``).
1008
- */
1009
- function ensureHttpScheme(url, scheme = "https://") {
1010
- if (!url) return url;
1011
- if (url.startsWith("https://") || url.startsWith("http://")) return url;
1012
- return scheme + url.replace(/^[/:]+/, "");
1013
- }
1014
- /**
1015
- * Extract filename + extension from a URL and write into ``meta``.
1016
- */
1017
- function nameExtFromURL(url, meta) {
1018
- const filename = filenameFromURL(url);
1019
- const dot = filename.lastIndexOf(".");
1020
- if (dot > 0 && filename.length - dot - 1 <= 16) {
1021
- meta.filename = unquote(filename.slice(0, dot));
1022
- meta.extension = unquote(filename.slice(dot + 1)).toLowerCase();
1023
- } else {
1024
- meta.filename = unquote(filename);
1025
- meta.extension = "";
1026
- }
1027
- }
1028
- /**
1029
- * Extract the file-name portion of a URL (before query string).
1030
- */
1031
- function filenameFromURL(url) {
1032
- try {
1033
- return url.split("?")[0].split("/").pop() ?? "";
1034
- } catch {
1035
- return "";
1036
- }
1037
- }
1038
- /**
1039
- * Parse an integer from a possibly-null value. Returns ``default_`` on failure.
1040
- */
1041
- function parseInt(value, default_ = 0) {
1042
- if (value == null) return default_;
1043
- const n = typeof value === "number" ? value : Number.parseInt(String(value), 10);
1044
- return Number.isFinite(n) ? n : default_;
1045
- }
1046
- function tagRe(pattern) {
1047
- const re = new RegExp(pattern, "g");
1048
- return (text) => {
1049
- const matches = text.match(re);
1050
- return matches ? [...new Set(matches)] : [];
1051
- };
1052
- }
1053
- /** Pre-configured hashtag regex. */
1054
- const findTags = tagRe("#\\w+");
1055
- //#endregion
1056
- //#region instagram/parsers.ts
1057
- /** Main entry — REST */
1058
- function parsePostRest(post, cfg) {
1059
- if (post.items) return parseStoryRest(post, cfg);
1060
- const owner = post.user;
1061
- const caption = post.caption;
1062
- const ts = post.taken_at ?? post.created_at;
1063
- const date = cfg.parseTimestamp(ts ?? null);
1064
- const data = {
1065
- post_id: post.pk,
1066
- post_shortcode: post.code,
1067
- post_url: `${cfg.root}/p/${post.code}/`,
1068
- likes: post.like_count ?? 0,
1069
- liked: post.has_liked ?? false,
1070
- pinned: extractPinned(post),
1071
- owner_id: owner.pk,
1072
- username: owner.username ?? "",
1073
- fullname: owner.full_name ?? "",
1074
- post_date: date,
1075
- date,
1076
- description: caption ? caption.text : "",
1077
- type: "post",
1078
- count: 0,
1079
- _files: []
892
+ //#region src/instagram/parsers/rest.ts
893
+ /** Main entry parse a REST post response. */
894
+ function parsePostRest(post, cfg) {
895
+ if (post.items) return parseStoryRest(post, cfg);
896
+ const owner = post.user;
897
+ const caption = post.caption;
898
+ const ts = post.taken_at ?? post.created_at;
899
+ const date = cfg.parseTimestamp(ts ?? null);
900
+ const data = {
901
+ post_id: post.pk,
902
+ post_shortcode: post.code,
903
+ post_url: `${cfg.root}/p/${post.code}/`,
904
+ likes: post.like_count ?? 0,
905
+ liked: post.has_liked ?? false,
906
+ pinned: extractPinned(post),
907
+ owner_id: owner.pk,
908
+ username: owner.username ?? "",
909
+ fullname: owner.full_name ?? "",
910
+ post_date: date,
911
+ date,
912
+ description: caption ? caption.text : "",
913
+ type: "post",
914
+ count: 0,
915
+ _files: []
1080
916
  };
1081
917
  const tags = cfg.findTags(data.description);
1082
918
  if (tags.length > 0) data.tags = [...new Set(tags)].sort();
1083
919
  if (post.location) {
1084
920
  const loc = post.location;
1085
- const slug = loc.short_name.replace(/\s+/g, "-").toLowerCase();
1086
921
  data.location_id = loc.pk;
1087
- data.location_slug = slug;
1088
- data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${slug}/`;
922
+ data.location_slug = loc.short_name.replace(/\s+/g, "-").toLowerCase();
923
+ data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${data.location_slug}/`;
1089
924
  }
1090
925
  if (post.coauthor_producers) data.coauthors = post.coauthor_producers.map((u) => ({
1091
926
  id: u.pk,
@@ -1132,7 +967,7 @@ function parsePostRest(post, cfg) {
1132
967
  if (post.subscription_media_visibility) data.subscription = post.subscription_media_visibility;
1133
968
  return data;
1134
969
  }
1135
- /** Story / highlight */
970
+ /** Parse a story or highlight REST response. */
1136
971
  function parseStoryRest(post, cfg) {
1137
972
  const items = post.items;
1138
973
  const reelId = String(post.id).split(":").pop() ?? "0";
@@ -1158,9 +993,8 @@ function parseStoryRest(post, cfg) {
1158
993
  expires: expires ? cfg.parseTimestamp(expires) : void 0,
1159
994
  user: post.user
1160
995
  };
1161
- if (!isStory) {
1162
- if (post.title) data.highlight_title = post.title;
1163
- } else if (!post.seen) post.seen = expires - 86400;
996
+ if (!isStory && post.title) data.highlight_title = post.title;
997
+ else if (!post.seen) post.seen = expires - 86400;
1164
998
  for (let num = 0; num < items.length; num++) {
1165
999
  const item = items[num];
1166
1000
  const media = parseMediaItem(item, post, cfg, num + 1);
@@ -1170,7 +1004,7 @@ function parseStoryRest(post, cfg) {
1170
1004
  }
1171
1005
  return data;
1172
1006
  }
1173
- /** Single media item */
1007
+ /** Parse a single media item (image/video) from a carousel or story. */
1174
1008
  function parseMediaItem(item, parent, cfg, num) {
1175
1009
  let image;
1176
1010
  try {
@@ -1229,7 +1063,7 @@ function parseMediaItem(item, parent, cfg, num) {
1229
1063
  if (itemRec.audience) media.audience = itemRec.audience;
1230
1064
  return media;
1231
1065
  }
1232
- /** Tagged users */
1066
+ /** Extract tagged users from various field formats. */
1233
1067
  function extractTaggedUsers(src, dest) {
1234
1068
  dest.tagged_users = [];
1235
1069
  const edges = src.edge_media_to_tagged_user;
@@ -1272,13 +1106,9 @@ function extractTaggedUsers(src, dest) {
1272
1106
  }
1273
1107
  }
1274
1108
  const seen = /* @__PURE__ */ new Set();
1275
- dest.tagged_users = dest.tagged_users.filter((t) => {
1276
- if (seen.has(t.id)) return false;
1277
- seen.add(t.id);
1278
- return true;
1279
- });
1109
+ dest.tagged_users = dest.tagged_users.filter((t) => seen.has(t.id) ? false : (seen.add(t.id), true));
1280
1110
  }
1281
- /** Audio / music extraction */
1111
+ /** Extract audio/music metadata from a story sticker. */
1282
1112
  function extractAudio(src, dest, sticker, cfg) {
1283
1113
  const info = sticker.music_asset_info;
1284
1114
  if (!info) return null;
@@ -1310,7 +1140,14 @@ function extractAudio(src, dest, sticker, cfg) {
1310
1140
  audio_timestamps: info.highlight_start_times_in_ms
1311
1141
  };
1312
1142
  }
1313
- /** GraphQL parser */
1143
+ function extractPinned(post) {
1144
+ if (post.timeline_pinned_user_ids) return post.timeline_pinned_user_ids;
1145
+ if (post.clips_tab_pinned_user_ids) return post.clips_tab_pinned_user_ids;
1146
+ return [];
1147
+ }
1148
+ //#endregion
1149
+ //#region src/instagram/parsers/graphql.ts
1150
+ /** Parse a GraphQL post/edge response. */
1314
1151
  function parsePostGraphql(post, cfg) {
1315
1152
  const typename = post.__typename ?? "GraphImage";
1316
1153
  const owner = post.owner;
@@ -1395,17 +1232,12 @@ function parsePostGraphql(post, cfg) {
1395
1232
  }
1396
1233
  return data;
1397
1234
  }
1398
- function extractPinned(post) {
1399
- if (post.timeline_pinned_user_ids) return post.timeline_pinned_user_ids;
1400
- if (post.clips_tab_pinned_user_ids) return post.clips_tab_pinned_user_ids;
1401
- return [];
1402
- }
1403
1235
  function parseUnicodeEscapes(text) {
1404
1236
  if (!text.includes("\\u")) return text;
1405
1237
  return text.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
1406
1238
  }
1407
1239
  //#endregion
1408
- //#region instagram/base.ts
1240
+ //#region src/instagram/base.ts
1409
1241
  var Ref = class {
1410
1242
  value;
1411
1243
  constructor(v) {
@@ -1558,12 +1390,123 @@ var InstagramExtractor = class extends Extractor {
1558
1390
  }
1559
1391
  };
1560
1392
  //#endregion
1561
- //#region instagram/extractors.ts
1393
+ //#region src/instagram/extractors/helpers.ts
1394
+ /** Shared regex utilities for Instagram extractor URL patterns. */
1562
1395
  const BASE_RE = /^(?:https?:\/\/)?(?:www\.)?instagram\.com/;
1563
1396
  function re(base, path) {
1564
1397
  const pathSrc = typeof path === "string" ? path : path.source;
1565
1398
  return new RegExp(base.source + pathSrc, "i");
1566
1399
  }
1400
+ //#endregion
1401
+ //#region src/instagram/extractors/registry.ts
1402
+ const _registry = /* @__PURE__ */ new Map();
1403
+ function register(subcategory, cls) {
1404
+ _registry.set(subcategory, cls);
1405
+ }
1406
+ function get(subcategory) {
1407
+ return _registry.get(subcategory);
1408
+ }
1409
+ //#endregion
1410
+ //#region src/instagram/extractors/avatar.ts
1411
+ var InstagramAvatarExtractor = class InstagramAvatarExtractor extends InstagramExtractor {
1412
+ static subcategory = "avatar";
1413
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/avatar/);
1414
+ subcategory = InstagramAvatarExtractor.subcategory;
1415
+ constructor(opts) {
1416
+ super(opts);
1417
+ }
1418
+ static fromURL(url, opts) {
1419
+ const match = InstagramAvatarExtractor.pattern.exec(url);
1420
+ if (!match) return null;
1421
+ return new InstagramAvatarExtractor({
1422
+ ...opts,
1423
+ url,
1424
+ match
1425
+ });
1426
+ }
1427
+ async *posts() {
1428
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1429
+ let user;
1430
+ if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
1431
+ else user = await this.api.userByScreenName(screenName);
1432
+ const avatar = user.hd_profile_pic_url_info ?? user.hd_profile_pic_versions?.[user.hd_profile_pic_versions.length - 1] ?? {
1433
+ url: user.profile_pic_url ?? "",
1434
+ width: 0,
1435
+ height: 0
1436
+ };
1437
+ let pk = user.profile_pic_id?.split("_")[0];
1438
+ let code;
1439
+ if (pk) code = shortcodeFromId(pk);
1440
+ else {
1441
+ pk = `avatar:${user.pk}`;
1442
+ code = pk;
1443
+ }
1444
+ yield {
1445
+ pk,
1446
+ code,
1447
+ user,
1448
+ caption: null,
1449
+ like_count: 0,
1450
+ image_versions2: { candidates: [avatar] }
1451
+ };
1452
+ }
1453
+ };
1454
+ register(InstagramAvatarExtractor.subcategory, InstagramAvatarExtractor);
1455
+ //#endregion
1456
+ //#region src/instagram/extractors/highlights.ts
1457
+ var InstagramHighlightsExtractor = class InstagramHighlightsExtractor extends InstagramExtractor {
1458
+ static subcategory = "highlights";
1459
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/highlights/);
1460
+ subcategory = InstagramHighlightsExtractor.subcategory;
1461
+ constructor(opts) {
1462
+ super(opts);
1463
+ }
1464
+ static fromURL(url, opts) {
1465
+ const match = InstagramHighlightsExtractor.pattern.exec(url);
1466
+ if (!match) return null;
1467
+ return new InstagramHighlightsExtractor({
1468
+ ...opts,
1469
+ url,
1470
+ match
1471
+ });
1472
+ }
1473
+ async *posts() {
1474
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1475
+ const uid = await this.api.userId(screenName);
1476
+ yield* this.api.highlightsMedia(uid);
1477
+ }
1478
+ };
1479
+ register(InstagramHighlightsExtractor.subcategory, InstagramHighlightsExtractor);
1480
+ //#endregion
1481
+ //#region src/instagram/extractors/info.ts
1482
+ var InstagramInfoExtractor = class InstagramInfoExtractor extends InstagramExtractor {
1483
+ static subcategory = "info";
1484
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/info/);
1485
+ subcategory = InstagramInfoExtractor.subcategory;
1486
+ constructor(opts) {
1487
+ super(opts);
1488
+ }
1489
+ static fromURL(url, opts) {
1490
+ const match = InstagramInfoExtractor.pattern.exec(url);
1491
+ if (!match) return null;
1492
+ return new InstagramInfoExtractor({
1493
+ ...opts,
1494
+ url,
1495
+ match
1496
+ });
1497
+ }
1498
+ async *items() {
1499
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1500
+ let user;
1501
+ if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
1502
+ else user = await this.api.userByScreenName(screenName);
1503
+ yield directory(user);
1504
+ }
1505
+ async *posts() {}
1506
+ };
1507
+ register(InstagramInfoExtractor.subcategory, InstagramInfoExtractor);
1508
+ //#endregion
1509
+ //#region src/instagram/extractors/post.ts
1567
1510
  var InstagramPostExtractor = class InstagramPostExtractor extends InstagramExtractor {
1568
1511
  static subcategory = "post";
1569
1512
  static pattern = re(/^(?:https?:\/\/)?(?:www\.)?instagram\.com\//, /(?:share(?:\/(?:p|tv|reels?))?|(?:[^/?#]+\/)?(?:p|tv|reels?))\/([^/?#]+)/);
@@ -1598,75 +1541,9 @@ var InstagramPostExtractor = class InstagramPostExtractor extends InstagramExtra
1598
1541
  yield* this.api.media(shortcode);
1599
1542
  }
1600
1543
  };
1601
- var InstagramUserExtractor = class InstagramUserExtractor extends InstagramExtractor {
1602
- static subcategory = "user";
1603
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/?(?:$|[?#])/);
1604
- subcategory = InstagramUserExtractor.subcategory;
1605
- constructor(opts) {
1606
- super(opts);
1607
- }
1608
- static fromURL(url, opts) {
1609
- const match = InstagramUserExtractor.pattern.exec(url);
1610
- if (!match) return null;
1611
- return new InstagramUserExtractor({
1612
- ...opts,
1613
- url,
1614
- match
1615
- });
1616
- }
1617
- async *items() {
1618
- await this.login();
1619
- const userPath = this.groups[0] ?? "/";
1620
- const base = `${this.root}${userPath}/`;
1621
- const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`;
1622
- const include = this._cfg("include", ["posts"]);
1623
- const categories = include === "all" ? [
1624
- "posts",
1625
- "reels",
1626
- "tagged",
1627
- "stories",
1628
- "highlights",
1629
- "info",
1630
- "avatar"
1631
- ] : typeof include === "string" ? include.replace(/\s+/g, "").split(",") : include;
1632
- const extractors = {
1633
- info: {
1634
- cls: InstagramInfoExtractor,
1635
- url: `${base}info/`
1636
- },
1637
- avatar: {
1638
- cls: InstagramAvatarExtractor,
1639
- url: `${base}avatar/`
1640
- },
1641
- stories: {
1642
- cls: InstagramStoriesExtractor,
1643
- url: storiesUrl
1644
- },
1645
- highlights: {
1646
- cls: InstagramHighlightsExtractor,
1647
- url: `${base}highlights/`
1648
- },
1649
- posts: {
1650
- cls: InstagramPostsExtractor,
1651
- url: `${base}posts/`
1652
- },
1653
- reels: {
1654
- cls: InstagramReelsExtractor,
1655
- url: `${base}reels/`
1656
- },
1657
- tagged: {
1658
- cls: InstagramTaggedExtractor,
1659
- url: `${base}tagged/`
1660
- }
1661
- };
1662
- for (const cat of categories) {
1663
- const entry = extractors[cat];
1664
- if (entry) yield queue(entry.url, { _extractor: entry.cls });
1665
- else this.log.warn(`Invalid include '${cat}'`);
1666
- }
1667
- }
1668
- async *posts() {}
1669
- };
1544
+ register(InstagramPostExtractor.subcategory, InstagramPostExtractor);
1545
+ //#endregion
1546
+ //#region src/instagram/extractors/posts-list.ts
1670
1547
  var InstagramPostsExtractor = class InstagramPostsExtractor extends InstagramExtractor {
1671
1548
  static subcategory = "posts";
1672
1549
  static pattern = re(BASE_RE, /(\/[^/?#]+)\/posts/);
@@ -1689,6 +1566,9 @@ var InstagramPostsExtractor = class InstagramPostsExtractor extends InstagramExt
1689
1566
  yield* this.api.userFeed(uid);
1690
1567
  }
1691
1568
  };
1569
+ register(InstagramPostsExtractor.subcategory, InstagramPostsExtractor);
1570
+ //#endregion
1571
+ //#region src/instagram/extractors/reels-list.ts
1692
1572
  var InstagramReelsExtractor = class InstagramReelsExtractor extends InstagramExtractor {
1693
1573
  static subcategory = "reels";
1694
1574
  static pattern = re(BASE_RE, /(\/[^/?#]+)\/reels/);
@@ -1711,44 +1591,32 @@ var InstagramReelsExtractor = class InstagramReelsExtractor extends InstagramExt
1711
1591
  yield* this.api.userClips(uid);
1712
1592
  }
1713
1593
  };
1714
- var InstagramTaggedExtractor = class InstagramTaggedExtractor extends InstagramExtractor {
1715
- static subcategory = "tagged";
1716
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/tagged/);
1717
- subcategory = InstagramTaggedExtractor.subcategory;
1718
- _taggedUserId = "";
1594
+ register(InstagramReelsExtractor.subcategory, InstagramReelsExtractor);
1595
+ //#endregion
1596
+ //#region src/instagram/extractors/saved.ts
1597
+ var InstagramSavedExtractor = class InstagramSavedExtractor extends InstagramExtractor {
1598
+ static subcategory = "saved";
1599
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/saved(?:\/all-posts)?\/?$/);
1600
+ subcategory = InstagramSavedExtractor.subcategory;
1719
1601
  constructor(opts) {
1720
1602
  super(opts);
1721
1603
  }
1722
1604
  static fromURL(url, opts) {
1723
- const match = InstagramTaggedExtractor.pattern.exec(url);
1605
+ const match = InstagramSavedExtractor.pattern.exec(url);
1724
1606
  if (!match) return null;
1725
- return new InstagramTaggedExtractor({
1607
+ return new InstagramSavedExtractor({
1726
1608
  ...opts,
1727
1609
  url,
1728
1610
  match
1729
1611
  });
1730
1612
  }
1731
- async metadata() {
1732
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1733
- let user;
1734
- if (screenName.startsWith("id:")) {
1735
- this._taggedUserId = screenName.slice(3);
1736
- user = await this.api.userById(screenName.slice(3));
1737
- } else {
1738
- this._taggedUserId = await this.api.userId(screenName);
1739
- user = await this.api.userByScreenName(screenName);
1740
- }
1741
- return {
1742
- tagged_owner_id: user.id ?? user.pk,
1743
- tagged_username: user.username,
1744
- tagged_full_name: user.full_name
1745
- };
1746
- }
1747
1613
  async *posts() {
1748
- if (!this._taggedUserId) await this.metadata();
1749
- yield* this.api.userTagged(this._taggedUserId);
1614
+ yield* this.api.userSaved();
1750
1615
  }
1751
1616
  };
1617
+ register(InstagramSavedExtractor.subcategory, InstagramSavedExtractor);
1618
+ //#endregion
1619
+ //#region src/instagram/extractors/stories.ts
1752
1620
  var InstagramStoriesExtractor = class InstagramStoriesExtractor extends InstagramExtractor {
1753
1621
  static subcategory = "stories";
1754
1622
  static pattern = /^(?:https?:\/\/)?(?:www\.)?instagram\.com\/(?:stories\/(?:highlights\/(\d+)|([^/?#]+)(?:\/(\d+))?)|\/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)/;
@@ -1804,28 +1672,9 @@ var InstagramStoriesExtractor = class InstagramStoriesExtractor extends Instagra
1804
1672
  } else yield* reels;
1805
1673
  }
1806
1674
  };
1807
- var InstagramHighlightsExtractor = class InstagramHighlightsExtractor extends InstagramExtractor {
1808
- static subcategory = "highlights";
1809
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/highlights/);
1810
- subcategory = InstagramHighlightsExtractor.subcategory;
1811
- constructor(opts) {
1812
- super(opts);
1813
- }
1814
- static fromURL(url, opts) {
1815
- const match = InstagramHighlightsExtractor.pattern.exec(url);
1816
- if (!match) return null;
1817
- return new InstagramHighlightsExtractor({
1818
- ...opts,
1819
- url,
1820
- match
1821
- });
1822
- }
1823
- async *posts() {
1824
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1825
- const uid = await this.api.userId(screenName);
1826
- yield* this.api.highlightsMedia(uid);
1827
- }
1828
- };
1675
+ register(InstagramStoriesExtractor.subcategory, InstagramStoriesExtractor);
1676
+ //#endregion
1677
+ //#region src/instagram/extractors/tag.ts
1829
1678
  var InstagramTagExtractor = class InstagramTagExtractor extends InstagramExtractor {
1830
1679
  static subcategory = "tag";
1831
1680
  static pattern = re(BASE_RE, /\/explore\/tags\/([^/?#]+)/);
@@ -1851,93 +1700,294 @@ var InstagramTagExtractor = class InstagramTagExtractor extends InstagramExtract
1851
1700
  yield* this.api.tagsMedia(decodeURIComponent(tag));
1852
1701
  }
1853
1702
  };
1854
- var InstagramInfoExtractor = class InstagramInfoExtractor extends InstagramExtractor {
1855
- static subcategory = "info";
1856
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/info/);
1857
- subcategory = InstagramInfoExtractor.subcategory;
1703
+ register(InstagramTagExtractor.subcategory, InstagramTagExtractor);
1704
+ //#endregion
1705
+ //#region src/instagram/extractors/tagged.ts
1706
+ var InstagramTaggedExtractor = class InstagramTaggedExtractor extends InstagramExtractor {
1707
+ static subcategory = "tagged";
1708
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/tagged/);
1709
+ subcategory = InstagramTaggedExtractor.subcategory;
1710
+ _taggedUserId = "";
1858
1711
  constructor(opts) {
1859
1712
  super(opts);
1860
1713
  }
1861
1714
  static fromURL(url, opts) {
1862
- const match = InstagramInfoExtractor.pattern.exec(url);
1715
+ const match = InstagramTaggedExtractor.pattern.exec(url);
1863
1716
  if (!match) return null;
1864
- return new InstagramInfoExtractor({
1717
+ return new InstagramTaggedExtractor({
1865
1718
  ...opts,
1866
1719
  url,
1867
1720
  match
1868
1721
  });
1869
1722
  }
1870
- async *items() {
1723
+ async metadata() {
1871
1724
  const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1872
1725
  let user;
1873
- if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
1874
- else user = await this.api.userByScreenName(screenName);
1875
- yield directory(user);
1726
+ if (screenName.startsWith("id:")) {
1727
+ this._taggedUserId = screenName.slice(3);
1728
+ user = await this.api.userById(screenName.slice(3));
1729
+ } else {
1730
+ this._taggedUserId = await this.api.userId(screenName);
1731
+ user = await this.api.userByScreenName(screenName);
1732
+ }
1733
+ return {
1734
+ tagged_owner_id: user.id ?? user.pk,
1735
+ tagged_username: user.username,
1736
+ tagged_full_name: user.full_name
1737
+ };
1738
+ }
1739
+ async *posts() {
1740
+ if (!this._taggedUserId) await this.metadata();
1741
+ yield* this.api.userTagged(this._taggedUserId);
1876
1742
  }
1877
- async *posts() {}
1878
1743
  };
1879
- var InstagramAvatarExtractor = class InstagramAvatarExtractor extends InstagramExtractor {
1880
- static subcategory = "avatar";
1881
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/avatar/);
1882
- subcategory = InstagramAvatarExtractor.subcategory;
1744
+ register(InstagramTaggedExtractor.subcategory, InstagramTaggedExtractor);
1745
+ //#endregion
1746
+ //#region src/instagram/extractors/user.ts
1747
+ var InstagramUserExtractor = class InstagramUserExtractor extends InstagramExtractor {
1748
+ static subcategory = "user";
1749
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/?(?:$|[?#])/);
1750
+ subcategory = InstagramUserExtractor.subcategory;
1883
1751
  constructor(opts) {
1884
1752
  super(opts);
1885
1753
  }
1886
1754
  static fromURL(url, opts) {
1887
- const match = InstagramAvatarExtractor.pattern.exec(url);
1755
+ const match = InstagramUserExtractor.pattern.exec(url);
1888
1756
  if (!match) return null;
1889
- return new InstagramAvatarExtractor({
1757
+ return new InstagramUserExtractor({
1890
1758
  ...opts,
1891
1759
  url,
1892
1760
  match
1893
1761
  });
1894
1762
  }
1895
- async *posts() {
1896
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
1897
- let user;
1898
- if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
1899
- else user = await this.api.userByScreenName(screenName);
1900
- const avatar = user.hd_profile_pic_url_info ?? user.hd_profile_pic_versions?.[user.hd_profile_pic_versions.length - 1] ?? {
1901
- url: user.profile_pic_url ?? "",
1902
- width: 0,
1903
- height: 0
1763
+ async *items() {
1764
+ await this.login();
1765
+ const userPath = this.groups[0] ?? "/";
1766
+ const base = `${this.root}${userPath}/`;
1767
+ const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`;
1768
+ const include = this._cfg("include", ["posts"]);
1769
+ const categories = include === "all" ? [
1770
+ "posts",
1771
+ "reels",
1772
+ "tagged",
1773
+ "stories",
1774
+ "highlights",
1775
+ "info",
1776
+ "avatar"
1777
+ ] : typeof include === "string" ? include.replace(/\s+/g, "").split(",") : include;
1778
+ const urls = {
1779
+ info: `${base}info/`,
1780
+ avatar: `${base}avatar/`,
1781
+ stories: storiesUrl,
1782
+ highlights: `${base}highlights/`,
1783
+ posts: `${base}posts/`,
1784
+ reels: `${base}reels/`,
1785
+ tagged: `${base}tagged/`
1904
1786
  };
1905
- let pk = user.profile_pic_id?.split("_")[0];
1906
- let code;
1907
- if (pk) code = shortcodeFromId(pk);
1908
- else {
1909
- pk = `avatar:${user.pk}`;
1910
- code = pk;
1787
+ for (const cat of categories) {
1788
+ const cls = get(cat);
1789
+ const url = urls[cat];
1790
+ if (cls && url) yield queue(url, { _extractor: cls });
1791
+ else this.log.warn(`Invalid include '${cat}'`);
1911
1792
  }
1912
- yield {
1913
- pk,
1914
- code,
1915
- user,
1916
- caption: null,
1917
- like_count: 0,
1918
- image_versions2: { candidates: [avatar] }
1919
- };
1920
1793
  }
1794
+ async *posts() {}
1921
1795
  };
1922
- var InstagramSavedExtractor = class InstagramSavedExtractor extends InstagramExtractor {
1923
- static subcategory = "saved";
1924
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/saved(?:\/all-posts)?\/?$/);
1925
- subcategory = InstagramSavedExtractor.subcategory;
1926
- constructor(opts) {
1927
- super(opts);
1796
+ register(InstagramUserExtractor.subcategory, InstagramUserExtractor);
1797
+ //#endregion
1798
+ //#region src/fetcher.ts
1799
+ /** Build URL with query params appended as URLSearchParams. */
1800
+ function buildUrl(base, params) {
1801
+ if (!params) return base;
1802
+ const cleaned = {};
1803
+ for (const [k, v] of Object.entries(params)) if (v != null) cleaned[k] = String(v);
1804
+ const entries = Object.entries(cleaned);
1805
+ if (entries.length === 0) return base;
1806
+ const qs = new URLSearchParams(entries).toString();
1807
+ return `${base}${base.includes("?") ? "&" : "?"}${qs}`;
1808
+ }
1809
+ /** Merge cookie strings with append semantics: a=1 + b=2 → a=1; b=2 */
1810
+ function mergeCookie(base, extra) {
1811
+ if (!base) return extra;
1812
+ return `${base}; ${extra}`;
1813
+ }
1814
+ /** Extract csrftoken value from a Cookie header string. */
1815
+ function extractCsrf(cookies) {
1816
+ return cookies.match(/(?:^|;\s*)csrftoken=([^;]+)/)?.[1] ?? "";
1817
+ }
1818
+ /** Convert fetch Headers to a plain Record. */
1819
+ function headersToRecord(headers) {
1820
+ const rec = {};
1821
+ headers.forEach((v, k) => {
1822
+ rec[k] = v;
1823
+ });
1824
+ return rec;
1825
+ }
1826
+ /** Read response body according to the requested type. */
1827
+ async function readBody(resp, responseType) {
1828
+ switch (responseType) {
1829
+ case "arraybuffer": {
1830
+ const buf = await resp.arrayBuffer();
1831
+ return Buffer.from(buf);
1832
+ }
1833
+ case "text": return resp.text();
1834
+ default: return resp.json();
1928
1835
  }
1929
- static fromURL(url, opts) {
1930
- const match = InstagramSavedExtractor.pattern.exec(url);
1931
- if (!match) return null;
1932
- return new InstagramSavedExtractor({
1933
- ...opts,
1934
- url,
1935
- match
1936
- });
1836
+ }
1837
+ /** Serialize a request body value for fetch. */
1838
+ function serializeBody(data) {
1839
+ if (data == null) return void 0;
1840
+ if (typeof data === "string") return data;
1841
+ if (data instanceof URLSearchParams) return data;
1842
+ return JSON.stringify(data);
1843
+ }
1844
+ const UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
1845
+ /**
1846
+ * Create a platform-agnostic HttpClient backed by native ``fetch``.
1847
+ *
1848
+ * Zero dependencies — works in Node.js 18+, browsers, Deno, and Edge.
1849
+ *
1850
+ * @example Plain (no cookies)
1851
+ * ```ts
1852
+ * const http = createFetchHttpClient()
1853
+ * ```
1854
+ *
1855
+ * @example With static cookies (CLI session mode)
1856
+ * ```ts
1857
+ * const http = createFetchHttpClient({ cookie: 'sessionid=abc; csrftoken=xyz' })
1858
+ * ```
1859
+ *
1860
+ * @example With cookie jar (anonymous session)
1861
+ * ```ts
1862
+ * const jar = createCookieJar()
1863
+ * const http = createFetchHttpClient({
1864
+ * cookieProvider: () => jar.getCookieHeader(),
1865
+ * onResponse: (headers) => jar.setFromResponse(headers),
1866
+ * })
1867
+ * ```
1868
+ */
1869
+ function createFetchHttpClient(opts = {}) {
1870
+ const { cookie, cookieProvider, userAgent = UA, timeout = 3e4, onResponse } = opts;
1871
+ return { async request(config) {
1872
+ const method = config.method ?? "GET";
1873
+ const url = buildUrl(config.url, config.params);
1874
+ const headers = new Headers(config.headers);
1875
+ const reqCookie = cookieProvider?.() ?? cookie;
1876
+ if (reqCookie) {
1877
+ const existing = headers.get("Cookie");
1878
+ headers.set("Cookie", existing ? mergeCookie(reqCookie, existing) : reqCookie);
1879
+ }
1880
+ if (!headers.has("User-Agent")) headers.set("User-Agent", userAgent);
1881
+ const body = serializeBody(config.data);
1882
+ if (typeof body === "string" && !headers.has("Content-Type")) headers.set("Content-Type", "application/json");
1883
+ let controller = null;
1884
+ let timer = null;
1885
+ let signal = config.signal ?? null;
1886
+ const timeoutMs = config.timeout ?? timeout;
1887
+ if (!signal) {
1888
+ controller = new AbortController();
1889
+ timer = setTimeout(() => controller.abort(), timeoutMs);
1890
+ signal = controller.signal;
1891
+ }
1892
+ try {
1893
+ const resp = await fetch(url, {
1894
+ method,
1895
+ headers,
1896
+ body,
1897
+ signal
1898
+ });
1899
+ onResponse?.(headersToRecord(resp.headers));
1900
+ const data = await readBody(resp, config.responseType);
1901
+ return {
1902
+ status: resp.status,
1903
+ data,
1904
+ headers: headersToRecord(resp.headers),
1905
+ url: resp.url
1906
+ };
1907
+ } catch (err) {
1908
+ if (controller?.signal.aborted && !config.signal?.aborted) throw new Error(`Request timeout after ${timeoutMs}ms: ${url}`);
1909
+ if (String(err).includes("too many redirect")) throw new Error("Too many redirects — session may be expired or invalid. Export a fresh session from your browser.");
1910
+ throw err;
1911
+ } finally {
1912
+ if (timer) clearTimeout(timer);
1913
+ }
1914
+ } };
1915
+ }
1916
+ //#endregion
1917
+ //#region src/sdk.ts
1918
+ var InstagramSDK = class {
1919
+ http;
1920
+ storage;
1921
+ log;
1922
+ config;
1923
+ _csrfToken;
1924
+ constructor(opts = {}) {
1925
+ this.http = opts.http ?? createFetchHttpClient();
1926
+ this.storage = opts.storage ?? void 0;
1927
+ this.log = opts.log ?? noopLogger;
1928
+ this.config = new ConfigManager();
1929
+ this._csrfToken = opts.csrfToken ?? "";
1937
1930
  }
1938
- async *posts() {
1939
- yield* this.api.userSaved();
1931
+ /**
1932
+ * Extract messages from an Instagram URL without downloading.
1933
+ *
1934
+ * Returns an async generator yielding Directory / Url / Queue messages.
1935
+ * Each ``url`` message includes full metadata (post_id, username, dimensions, etc.).
1936
+ */
1937
+ async *extract(url) {
1938
+ const extractor = this._resolve(url);
1939
+ await extractor.initialize();
1940
+ yield* extractor;
1941
+ }
1942
+ /**
1943
+ * Download all media from an Instagram URL.
1944
+ *
1945
+ * Uses the built-in DownloadJob + Storage to save files to disk.
1946
+ * Requires ``storage`` to be set in constructor options.
1947
+ *
1948
+ * ```ts
1949
+ * const stats = await ig.download('https://www.instagram.com/p/.../', './my-downloads')
1950
+ * // → { posts: 1, files: 9, bytes: 4500000 }
1951
+ * ```
1952
+ */
1953
+ async download(url, outputDir = "./data") {
1954
+ const job = new DownloadJob(this._resolve(url));
1955
+ job.basePath = outputDir;
1956
+ await job.run();
1957
+ return {
1958
+ posts: job._postCount ?? 0,
1959
+ files: job._fileCount ?? 0,
1960
+ bytes: job._downloadedBytes ?? 0
1961
+ };
1962
+ }
1963
+ /** Resolve a URL to an Extractor instance via pattern matching. */
1964
+ _resolve(url) {
1965
+ for (const Cls of [
1966
+ InstagramPostExtractor,
1967
+ InstagramStoriesExtractor,
1968
+ InstagramHighlightsExtractor,
1969
+ InstagramTagExtractor,
1970
+ InstagramSavedExtractor,
1971
+ InstagramPostsExtractor,
1972
+ InstagramReelsExtractor,
1973
+ InstagramTaggedExtractor,
1974
+ InstagramInfoExtractor,
1975
+ InstagramAvatarExtractor,
1976
+ InstagramUserExtractor
1977
+ ]) {
1978
+ const match = Cls.pattern.exec(url);
1979
+ if (match) return Reflect.construct(Cls, [{
1980
+ url,
1981
+ match,
1982
+ config: this.config,
1983
+ http: this.http,
1984
+ storage: this.storage,
1985
+ log: this.log,
1986
+ csrfToken: this._csrfToken
1987
+ }]);
1988
+ }
1989
+ throw new Error(`No extractor matched URL: ${url}. Supported: /p/, /reel/, /{user}/, /stories/, /highlights/, /explore/tags/, /saved/`);
1940
1990
  }
1941
1991
  };
1942
1992
  //#endregion
1943
- export { idFromShortcode as A, parseUnicodeEscapes$1 as C, queue as D, directory as E, ConfigManager as F, Extractor as I, noopLogger as L, DownloadJob as M, Job as N, url as O, PrintJob as P, parseInt as S, unquote as T, ensureHttpScheme as _, InstagramPostsExtractor as a, findTags as b, InstagramStoriesExtractor as c, InstagramUserExtractor as d, InstagramExtractor as f, parsePostRest as g, parsePostGraphql as h, InstagramPostExtractor as i, shortcodeFromId as j, InstagramRestAPI as k, InstagramTagExtractor as l, extractTaggedUsers as m, InstagramHighlightsExtractor as n, InstagramReelsExtractor as o, extractAudio as p, InstagramInfoExtractor as r, InstagramSavedExtractor as s, InstagramAvatarExtractor as t, InstagramTaggedExtractor as u, extr as v, unescape as w, nameExtFromURL as x, extract as y };
1993
+ export { directory as A, _YELLOW as B, extract as C, parseUnicodeEscapes$1 as D, parseInt as E, Extractor as F, pad as G, c as H, noopLogger as I, ConfigManager as K, DownloadJob as L, url as M, idFromShortcode as N, unescape as O, shortcodeFromId as P, Job as R, extr as S, nameExtFromURL as T, dim as U, b as V, g as W, extractAudio as _, InstagramTaggedExtractor as a, InstagramRestAPI as b, InstagramSavedExtractor as c, InstagramPostExtractor as d, InstagramInfoExtractor as f, parsePostGraphql as g, InstagramExtractor as h, InstagramUserExtractor as i, queue as j, unquote as k, InstagramReelsExtractor as l, InstagramAvatarExtractor as m, createFetchHttpClient as n, InstagramTagExtractor as o, InstagramHighlightsExtractor as p, extractCsrf as r, InstagramStoriesExtractor as s, InstagramSDK as t, InstagramPostsExtractor as u, extractTaggedUsers as v, findTags as w, ensureHttpScheme as x, parsePostRest as y, _RESET as z };