@chilfish/gallery-dl-instagram 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/dl-ins.mjs CHANGED
@@ -5,8 +5,19 @@ import path, { dirname } from "node:path";
5
5
  import fs from "node:fs";
6
6
  import process$1 from "node:process";
7
7
  import { stripVTControlCharacters } from "node:util";
8
- import axios from "axios";
9
8
  import { access, mkdir, writeFile } from "node:fs/promises";
9
+ //#region \0rolldown/runtime.js
10
+ var __defProp = Object.defineProperty;
11
+ var __exportAll = (all, no_symbols) => {
12
+ let target = {};
13
+ for (var name in all) __defProp(target, name, {
14
+ get: all[name],
15
+ enumerable: true
16
+ });
17
+ if (!no_symbols) __defProp(target, Symbol.toStringTag, { value: "Module" });
18
+ return target;
19
+ };
20
+ //#endregion
10
21
  //#region node_modules/commander/lib/error.js
11
22
  /**
12
23
  * CommanderError class
@@ -2956,422 +2967,156 @@ function useColor() {
2956
2967
  }
2957
2968
  new Command();
2958
2969
  //#endregion
2959
- //#region src/config.ts
2960
- var ConfigManager = class {
2961
- data;
2962
- constructor(data = {}) {
2963
- this.data = data;
2970
+ //#region package.json
2971
+ var version = "0.2.1";
2972
+ //#endregion
2973
+ //#region src/utils/id-codec.ts
2974
+ /**
2975
+ * Instagram-style Base64-variant ID ↔ shortcode conversion.
2976
+ */
2977
+ const ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
2978
+ /** Pre-built index for O(1) character lookup during decode. */
2979
+ const CHAR_INDEX = {};
2980
+ for (let i = 0; i < 64; i++) CHAR_INDEX[ALPHABET[i]] = i;
2981
+ const BASE = BigInt(64);
2982
+ /**
2983
+ * Decode an Instagram shortcode into its numeric post ID.
2984
+ */
2985
+ function idFromShortcode(shortcode) {
2986
+ let num = 0n;
2987
+ for (const ch of shortcode) num = num * BASE + BigInt(CHAR_INDEX[ch] ?? 0);
2988
+ return num.toString();
2989
+ }
2990
+ /**
2991
+ * Encode a numeric post ID into an Instagram shortcode.
2992
+ */
2993
+ function shortcodeFromId(postId) {
2994
+ let num = BigInt(postId);
2995
+ const chars = [];
2996
+ while (num > 0n) {
2997
+ const remainder = Number(num % BASE);
2998
+ chars.push(ALPHABET[remainder]);
2999
+ num = num / BASE;
3000
+ }
3001
+ return chars.reverse().join("");
3002
+ }
3003
+ //#endregion
3004
+ //#region src/core/extractor.ts
3005
+ var Extractor = class {
3006
+ /** Regex pattern to match against URLs */
3007
+ static pattern = /^$/;
3008
+ /** The input URL */
3009
+ url;
3010
+ /** Regex match groups from ``fromURL`` */
3011
+ groups;
3012
+ config;
3013
+ /** HTTP client — public so Job can access for downloads */
3014
+ http;
3015
+ /** Storage backend — public so Job can access for writes */
3016
+ storage;
3017
+ /** Logger instance — public so Job can access for reporting */
3018
+ log;
3019
+ /** Delay range in seconds — random between [min, max] before each request */
3020
+ requestInterval = [6, 12];
3021
+ _initialized = false;
3022
+ constructor(opts) {
3023
+ this.url = opts.url;
3024
+ this.groups = opts.match ? [...opts.match].slice(1) : [];
3025
+ this.config = opts.config;
3026
+ this.http = opts.http;
3027
+ this.storage = opts.storage;
3028
+ this.log = opts.log;
2964
3029
  }
3030
+ /** Initialization */
2965
3031
  /**
2966
- * Read a value at a dot-path like ``'extractor.instagram.videos'``.
2967
- * Returns ``undefined`` when the path doesn't exist.
3032
+ * One-time async setup (cookies, session, internal state).
3033
+ * Safe to call multiple times — after the first call it becomes a no-op.
2968
3034
  */
2969
- get(path, defaultValue) {
2970
- const keys = path.split(".");
2971
- let node = this.data;
2972
- for (const key of keys) {
2973
- if (node == null || typeof node !== "object" || Array.isArray(node)) return defaultValue;
2974
- node = node[key];
2975
- }
2976
- if (node === void 0) return defaultValue;
2977
- return node;
3035
+ async initialize() {
3036
+ if (this._initialized) return;
3037
+ await this._init();
3038
+ this._initialized = true;
3039
+ this.initialize = async () => {};
2978
3040
  }
2979
3041
  /**
2980
- * Interpolate a config key through a hierarchy of paths.
3042
+ * Subclass hook for one-time setup.
2981
3043
  */
2982
- interpolate(cfgPath, key, defaultVal) {
2983
- let node = this.data;
2984
- for (let i = 0; i < cfgPath.length; i++) {
2985
- if (node != null && typeof node === "object" && !Array.isArray(node)) {
2986
- const v = node[key];
2987
- if (v !== void 0) return v;
2988
- }
2989
- if (node == null || typeof node !== "object" || Array.isArray(node)) break;
2990
- node = node[cfgPath[i]];
2991
- }
2992
- return defaultVal;
3044
+ async _init() {}
3045
+ /** Async iteration */
3046
+ async *[Symbol.asyncIterator]() {
3047
+ await this.initialize();
3048
+ yield* this.items();
2993
3049
  }
3050
+ /** Config helpers */
2994
3051
  /**
2995
- * Mutate the config at a given dot-path.
3052
+ * Read a config value using the interpolated hierarchy.
2996
3053
  */
2997
- set(path, value) {
2998
- const keys = path.split(".");
2999
- let node = this.data;
3000
- for (let i = 0; i < keys.length - 1; i++) {
3001
- const key = keys[i];
3002
- let child = node[key];
3003
- if (child == null || typeof child !== "object" || Array.isArray(child)) {
3004
- child = {};
3005
- node[key] = child;
3006
- }
3007
- node = child;
3008
- }
3009
- node[keys[keys.length - 1]] = value;
3054
+ _cfg(key, defaultVal) {
3055
+ const path = [
3056
+ "extractor",
3057
+ this.category,
3058
+ this.subcategory
3059
+ ];
3060
+ return this.config.interpolate(path, key, defaultVal);
3010
3061
  }
3011
- };
3012
- //#endregion
3013
- //#region src/core/job.ts
3014
- function formatBytes(bytes) {
3015
- if (bytes === 0) return "0 B";
3016
- const units = [
3017
- "B",
3018
- "KB",
3019
- "MB",
3020
- "GB"
3021
- ];
3022
- const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
3023
- return `${(bytes / 1024 ** i).toFixed(i === 0 ? 0 : 1)} ${units[i]}`;
3024
- }
3025
- const BOLD = "\x1B[1m";
3026
- const DIM = "\x1B[2m";
3027
- const CYAN = "\x1B[36m";
3028
- const GREEN = "\x1B[32m";
3029
- const YELLOW = "\x1B[33m";
3030
- const RESET = "\x1B[0m";
3031
- function b(s) {
3032
- return `${BOLD}${s}${RESET}`;
3033
- }
3034
- function dim(s) {
3035
- return `${DIM}${s}${RESET}`;
3036
- }
3037
- function c(s) {
3038
- return `${CYAN}${s}${RESET}`;
3039
- }
3040
- function g(s) {
3041
- return `${GREEN}${s}${RESET}`;
3042
- }
3043
- function pad(s, n) {
3044
- return s.length >= n ? s : s + " ".repeat(n - s.length);
3045
- }
3046
- var Job = class {
3047
- extractor;
3048
- status = 0;
3049
- constructor(extractor) {
3050
- this.extractor = extractor;
3062
+ /** HTTP */
3063
+ _lastRequestTime = 0;
3064
+ /**
3065
+ * Rate-limited HTTP request wrapper.
3066
+ */
3067
+ async request(url, cfg = {}) {
3068
+ await this._throttle();
3069
+ const response = await this.http.request({
3070
+ url,
3071
+ ...cfg
3072
+ });
3073
+ this._lastRequestTime = Date.now();
3074
+ return response;
3051
3075
  }
3052
3076
  /**
3053
- * Main entry point. Calls ``extractor[Symbol.asyncIterator]()`` and
3054
- * dispatches every yielded message.
3077
+ * Convenience: request + parse JSON body.
3055
3078
  */
3056
- async run() {
3057
- this.extractor.log.info(`Starting ${this.extractor.category}/${this.extractor.subcategory} ${this.extractor.url}`);
3058
- await this.extractor.initialize();
3059
- for await (const msg of this.extractor) switch (msg.type) {
3060
- case "directory":
3061
- await this.handleDirectory(msg);
3062
- break;
3063
- case "url":
3064
- await this.handleUrl(msg);
3065
- break;
3066
- case "queue":
3067
- await this.handleQueue(msg);
3068
- break;
3079
+ async requestJSON(url, cfg = {}) {
3080
+ const resp = await this.request(url, cfg);
3081
+ if (typeof resp.data === "object") return resp.data;
3082
+ try {
3083
+ return JSON.parse(resp.data);
3084
+ } catch {
3085
+ return {};
3069
3086
  }
3070
- this._report();
3071
- return this.status;
3072
3087
  }
3073
- /** Override in subclasses to print a summary. */
3074
- _report() {}
3075
- };
3076
- var DownloadJob = class DownloadJob extends Job {
3077
- /** Base output directory (prepended to all paths). */
3078
- basePath = "";
3079
- /** Current target directory metadata (set by directory messages). */
3080
- _currentDir = {};
3081
- /** In-memory archive keyed by archive format. */
3082
- archive = /* @__PURE__ */ new Map();
3088
+ /** Rate limiting */
3083
3089
  /**
3084
- * Registry of per-category "archive formats" the key is formed
3085
- * by interpolating this format string over the metadata.
3090
+ * Sleep long enough to keep the minimum interval between requests.
3086
3091
  */
3087
- _archiveFmts = /* @__PURE__ */ new Map();
3088
- _postCount = 0;
3089
- _fileCount = 0;
3090
- _downloadedBytes = 0;
3091
- _skippedCount = 0;
3092
- registerArchive(category, format) {
3093
- this._archiveFmts.set(category, format);
3092
+ async _throttle() {
3093
+ const elapsed = Date.now() - this._lastRequestTime;
3094
+ const [min, max] = this.requestInterval;
3095
+ const target = min + Math.random() * (max - min);
3096
+ const waitMs = Math.max(0, target * 1e3 - elapsed);
3097
+ if (waitMs > 0) await new Promise((r) => setTimeout(r, waitMs));
3094
3098
  }
3095
- /** Simple format-string interpolation for archive keys. */
3096
- _interp(fmt, meta) {
3097
- return fmt.replace(/\{(\w+)\}/g, (_, key) => {
3098
- const v = meta[key];
3099
- return v == null ? "" : String(v);
3100
- });
3099
+ /** Utility */
3100
+ /**
3101
+ * Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
3102
+ */
3103
+ parseTimestamp(ts) {
3104
+ if (ts == null) return "";
3105
+ const asMs = ts > 25e8 ? ts : ts * 1e3;
3106
+ return new Date(asMs).toISOString();
3101
3107
  }
3102
- /** Check whether this URL has already been downloaded (and skip). */
3103
- _isArchived(meta) {
3104
- const cat = meta.category ?? this.extractor.category;
3105
- const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
3106
- const key = this._interp(fmt, meta);
3107
- const set = this.archive.get(cat);
3108
- if (set && set.has(key)) return true;
3109
- return false;
3110
- }
3111
- /** Mark a post/media as archived. */
3112
- _archive(meta) {
3113
- const cat = meta.category ?? this.extractor.category;
3114
- const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
3115
- const key = this._interp(fmt, meta);
3116
- let set = this.archive.get(cat);
3117
- if (!set) {
3118
- set = /* @__PURE__ */ new Set();
3119
- this.archive.set(cat, set);
3120
- }
3121
- set.add(key);
3122
- }
3123
- /** Handlers */
3124
- async handleDirectory(msg) {
3125
- this._currentDir = { ...msg.metadata };
3126
- this._postCount++;
3127
- const dirPath = this.basePath ? `${this.basePath}/${this._buildDirPath(msg.metadata)}` : this._buildDirPath(msg.metadata);
3128
- await this.extractor.storage.mkdir(dirPath);
3129
- this.extractor.log.info(`#${this._postCount} ${msg.metadata.username ?? "?"}/${msg.metadata.post_shortcode ?? "?"} → ${dirPath}/`);
3130
- }
3131
- async handleUrl(msg) {
3132
- const meta = {
3133
- ...this._currentDir,
3134
- ...msg.metadata
3135
- };
3136
- if (this._isArchived(meta)) {
3137
- this._skippedCount++;
3138
- return;
3139
- }
3140
- const filename = this._buildFilename(meta);
3141
- const fullPath = `${this.basePath ? `${this.basePath}/${this._buildDirPath(meta)}` : this._buildDirPath(meta)}/${filename}`;
3142
- try {
3143
- const resp = await this.extractor.http.request({
3144
- url: msg.url,
3145
- method: "GET",
3146
- responseType: "arraybuffer"
3147
- });
3148
- let data;
3149
- if (resp.data instanceof Uint8Array) data = resp.data;
3150
- else if (resp.data instanceof ArrayBuffer) data = new Uint8Array(resp.data);
3151
- else if (typeof resp.data === "string") data = resp.data;
3152
- else if (typeof resp.data === "object" && resp.data != null && "type" in resp.data && resp.data.type === "Buffer") data = new Uint8Array(resp.data);
3153
- else data = JSON.stringify(resp.data);
3154
- await this.extractor.storage.write(fullPath, data);
3155
- this._fileCount++;
3156
- const size = data instanceof Uint8Array ? data.byteLength : data.length;
3157
- this._downloadedBytes += size;
3158
- this.extractor.log.info(` └─ ${filename} (${formatBytes(size)})`);
3159
- this._archive(meta);
3160
- } catch (err) {
3161
- this.extractor.log.error(`Failed to download ${filename}: ${String(err)}`);
3162
- this.status |= 4;
3163
- }
3164
- }
3165
- async handleQueue(msg) {
3166
- const meta = {
3167
- ...this._currentDir,
3168
- ...msg.metadata
3169
- };
3170
- const extrClass = meta._extractor;
3171
- if (!extrClass || typeof extrClass !== "object") return;
3172
- const cls = extrClass;
3173
- const match = cls.pattern.exec(msg.url);
3174
- if (!match) return;
3175
- const parentExtr = this.extractor;
3176
- const childJob = new DownloadJob(Reflect.construct(cls, [{
3177
- url: msg.url,
3178
- match,
3179
- config: parentExtr.config,
3180
- http: parentExtr.http,
3181
- storage: parentExtr.storage,
3182
- log: parentExtr.log
3183
- }]));
3184
- childJob.basePath = this.basePath;
3185
- childJob._currentDir = meta;
3186
- for (const [cat, set] of this.archive) childJob.archive.set(cat, new Set(set));
3187
- for (const [cat, fmt] of this._archiveFmts) childJob._archiveFmts.set(cat, fmt);
3188
- const childStatus = await childJob.run();
3189
- this.status |= childStatus;
3190
- for (const [cat, set] of childJob.archive) {
3191
- const mine = this.archive.get(cat);
3192
- if (mine) for (const k of set) mine.add(k);
3193
- else this.archive.set(cat, set);
3194
- }
3195
- }
3196
- /** Report */
3197
- _report() {
3198
- const log = this.extractor.log;
3199
- log.info(`Done — ${this._postCount} post(s), ${this._fileCount} file(s) downloaded (${formatBytes(this._downloadedBytes)})`);
3200
- if (this._skippedCount > 0) log.info(` ${this._skippedCount} file(s) skipped (already archived)`);
3201
- }
3202
- /** Path builders */
3203
- _buildDirPath(meta) {
3204
- return `${meta.category ?? this.extractor.category}/${meta.username ?? "_"}`;
3205
- }
3206
- _buildFilename(meta) {
3207
- const mid = meta.media_id ?? "0";
3208
- const ext = meta.extension ?? "jpg";
3209
- return `${mid}${meta.num ? `_${meta.num}` : ""}.${ext}`;
3210
- }
3211
- };
3212
- var PrintJob = class PrintJob extends Job {
3213
- _currentDir = {};
3214
- _files = [];
3215
- _postCount = 0;
3216
- _fileCount = 0;
3217
- _width;
3218
- constructor(extractor) {
3219
- super(extractor);
3220
- this._width = Math.min(process.stdout.columns ?? 80, 100);
3221
- }
3222
- async handleDirectory(msg) {
3223
- if (this._postCount > 0) this._flushPost();
3224
- this._currentDir = { ...msg.metadata };
3225
- this._postCount++;
3226
- this._files = [];
3227
- }
3228
- async handleUrl(msg) {
3229
- const meta = {
3230
- ...this._currentDir,
3231
- ...msg.metadata
3232
- };
3233
- this._fileCount++;
3234
- const ext = meta.extension ?? "jpg";
3235
- const mid = meta.media_id ?? "?";
3236
- this._files.push({
3237
- num: meta.num ?? this._files.length + 1,
3238
- filename: `${mid}.${ext}`,
3239
- width: meta.width ?? 0,
3240
- height: meta.height ?? 0,
3241
- videoUrl: meta.video_url ?? null,
3242
- audioUrl: meta.audio_url ?? null
3243
- });
3244
- }
3245
- async handleQueue(msg) {
3246
- if (this._files.length > 0 || this._postCount > 0) this._flushPost();
3247
- this._postCount = 0;
3248
- this._files = [];
3249
- const extrClass = {
3250
- ...this._currentDir,
3251
- ...msg.metadata
3252
- }._extractor;
3253
- if (!extrClass || typeof extrClass !== "object") return;
3254
- const cls = extrClass;
3255
- const match = cls.pattern.exec(msg.url);
3256
- if (!match) return;
3257
- const parentExtr = this.extractor;
3258
- const childJob = new PrintJob(Reflect.construct(cls, [{
3259
- url: msg.url,
3260
- match,
3261
- config: parentExtr.config,
3262
- http: parentExtr.http,
3263
- storage: parentExtr.storage,
3264
- log: parentExtr.log
3265
- }]));
3266
- const childStatus = await childJob.run();
3267
- this.status |= childStatus;
3268
- this._postCount += childJob._postCount;
3269
- this._fileCount += childJob._fileCount;
3270
- }
3271
- /** Output */
3272
- _flushPost() {
3273
- const m = this._currentDir;
3274
- if (Object.keys(m).length === 0) return;
3275
- const w = this._width;
3276
- const labelW = 14;
3277
- const shortcode = m.post_shortcode ?? "?";
3278
- const header = ` Post #${this._postCount}: ${shortcode} `;
3279
- const padTotal = w - 2 - header.length;
3280
- const padL = Math.floor(padTotal / 2);
3281
- const padR = padTotal - padL;
3282
- process.stdout.write(`\n${dim("┌")}${"─".repeat(padL)}${b(header)}${"─".repeat(padR)}${dim("┐")}\n`);
3283
- const row = (label, value, color) => {
3284
- const colored = typeof color === "function" ? color(value) : color ? `${color}${value}${RESET}` : value;
3285
- process.stdout.write(` ${dim("│")} ${c(pad(label, labelW))} ${colored}\n`);
3286
- };
3287
- const username = m.username ?? "?";
3288
- const fullname = m.fullname ?? "";
3289
- row("Author:", fullname ? `${username} (${fullname})` : username, g);
3290
- row("Date:", m.date ?? m.post_date ?? "?");
3291
- row("Likes:", `${typeof m.likes === "number" ? m.likes.toLocaleString() : "?"} | Liked: ${m.liked ? "yes" : "no"}`);
3292
- row("Type:", `${m.type ?? "?"} (${this._files.length} files)`);
3293
- row("URL:", m.post_url ?? "?");
3294
- const desc = m.description ?? "";
3295
- if (desc) {
3296
- process.stdout.write(` ${dim("│")}\n`);
3297
- process.stdout.write(` ${dim("│")} ${b("Description:")}\n`);
3298
- const lines = desc.split("\n");
3299
- for (const line of lines) {
3300
- const wrapped = this._wrap(line, w - 8);
3301
- for (const wl of wrapped) process.stdout.write(` ${dim("│")} ${dim(wl)}\n`);
3302
- }
3303
- }
3304
- const tags = m.tags;
3305
- if (tags && tags.length > 0) {
3306
- process.stdout.write(` ${dim("│")}\n`);
3307
- process.stdout.write(` ${dim("│")} ${b("Tags:")} ${dim(tags.map((t) => `#${t}`).join(" "))}\n`);
3308
- }
3309
- const locName = m.location_slug ?? "";
3310
- const locId = m.location_id ?? "";
3311
- if (locName || locId) row("Location:", locId ? `${locName} (ID: ${locId})` : locName);
3312
- const coauthors = m.coauthors;
3313
- if (coauthors && coauthors.length > 0) row("Co-authors:", coauthors.map((c) => c.full_name ? `${c.username} (${c.full_name})` : c.username).join(", "));
3314
- const pinned = m.pinned;
3315
- if (pinned && pinned.length > 0) row("Pinned:", pinned.join(", "));
3316
- const expires = m.expires;
3317
- if (expires) row("Expires:", expires, YELLOW);
3318
- const hlTitle = m.highlight_title;
3319
- if (hlTitle) row("Highlight:", hlTitle);
3320
- const taggedUser = m.tagged_username ?? "";
3321
- if (taggedUser) {
3322
- const taggedFull = m.tagged_full_name ?? "";
3323
- row("Tagged by:", taggedFull ? `${taggedUser} (${taggedFull})` : taggedUser);
3324
- }
3325
- if (this._files.length > 0) {
3326
- process.stdout.write(` ${dim("│")}\n`);
3327
- process.stdout.write(` ${dim("│")} ${b(`Media (${this._files.length} files):`)}\n`);
3328
- const maxNumW = String(this._files.length).length;
3329
- const maxFileW = Math.max(...this._files.map((f) => f.filename.length));
3330
- const dimW = Math.min(maxFileW, 40);
3331
- for (const f of this._files) {
3332
- const numStr = `[${String(f.num).padStart(maxNumW)}]`;
3333
- const dimStr = f.filename.length > 40 ? `${f.filename.slice(0, 37)}...` : pad(f.filename, dimW);
3334
- const res = f.width ? `${f.width}x${f.height}` : "?x?";
3335
- const badges = [];
3336
- if (f.videoUrl) badges.push("video");
3337
- if (f.audioUrl) badges.push("audio");
3338
- let line = ` ${dim("│")} ${g(numStr)} ${dimStr} ${res}`;
3339
- if (badges.length > 0) line += ` ${YELLOW}(${badges.join("+")})${RESET}`;
3340
- process.stdout.write(`${line}\n`);
3341
- }
3342
- }
3343
- process.stdout.write(` ${dim("└")}${"─".repeat(w - 2)}${dim("┘")}\n`);
3344
- }
3345
- _wrap(text, maxLen) {
3346
- if (text.length <= maxLen) return [text];
3347
- const lines = [];
3348
- let remaining = text;
3349
- while (remaining.length > maxLen) {
3350
- let cut = maxLen;
3351
- while (cut > 0 && remaining[cut] !== " ") cut--;
3352
- if (cut === 0) cut = maxLen;
3353
- lines.push(remaining.slice(0, cut).trimEnd());
3354
- remaining = remaining.slice(cut).trimStart();
3355
- }
3356
- if (remaining) lines.push(remaining);
3357
- return lines;
3358
- }
3359
- _report() {
3360
- this._flushPost();
3361
- process.stdout.write(`\n${dim("──")} ${b("Summary")} ${dim("───")}\n`);
3362
- process.stdout.write(` Posts: ${g(String(this._postCount))}\n`);
3363
- process.stdout.write(` Files: ${g(String(this._fileCount))}\n`);
3364
- process.stdout.write(`\n`);
3108
+ /**
3109
+ * Generate a random hex token (used for CSRF).
3110
+ */
3111
+ static generateToken(size = 16) {
3112
+ const bytes = new Uint8Array(size);
3113
+ if (typeof crypto !== "undefined" && crypto.getRandomValues) crypto.getRandomValues(bytes);
3114
+ else for (let i = 0; i < size; i++) bytes[i] = Math.floor(Math.random() * 256);
3115
+ return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
3365
3116
  }
3366
3117
  };
3367
3118
  //#endregion
3368
3119
  //#region src/message.ts
3369
- function directory(metadata = {}) {
3370
- return {
3371
- type: "directory",
3372
- metadata
3373
- };
3374
- }
3375
3120
  function url(u, metadata = {}) {
3376
3121
  return {
3377
3122
  type: "url",
@@ -3387,41 +3132,10 @@ function queue(u, metadata = {}) {
3387
3132
  };
3388
3133
  }
3389
3134
  //#endregion
3390
- //#region src/utils/id-codec.ts
3391
- /**
3392
- * Instagram-style Base64-variant ID ↔ shortcode conversion.
3393
- */
3394
- const ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
3395
- /** Pre-built index for O(1) character lookup during decode. */
3396
- const CHAR_INDEX = {};
3397
- for (let i = 0; i < 64; i++) CHAR_INDEX[ALPHABET[i]] = i;
3398
- const BASE = BigInt(64);
3135
+ //#region src/utils/text.ts
3136
+ /** URL helpers */
3399
3137
  /**
3400
- * Decode an Instagram shortcode into its numeric post ID.
3401
- */
3402
- function idFromShortcode(shortcode) {
3403
- let num = 0n;
3404
- for (const ch of shortcode) num = num * BASE + BigInt(CHAR_INDEX[ch] ?? 0);
3405
- return num.toString();
3406
- }
3407
- /**
3408
- * Encode a numeric post ID into an Instagram shortcode.
3409
- */
3410
- function shortcodeFromId(postId) {
3411
- let num = BigInt(postId);
3412
- const chars = [];
3413
- while (num > 0n) {
3414
- const remainder = Number(num % BASE);
3415
- chars.push(ALPHABET[remainder]);
3416
- num = num / BASE;
3417
- }
3418
- return chars.reverse().join("");
3419
- }
3420
- //#endregion
3421
- //#region src/utils/text.ts
3422
- /** URL helpers */
3423
- /**
3424
- * URL-decode a string.
3138
+ * URL-decode a string.
3425
3139
  */
3426
3140
  function unquote(text) {
3427
3141
  try {
@@ -3478,121 +3192,6 @@ function tagRe(pattern) {
3478
3192
  /** Pre-configured hashtag regex. */
3479
3193
  const findTags = tagRe("#\\w+");
3480
3194
  //#endregion
3481
- //#region src/core/extractor.ts
3482
- var Extractor = class {
3483
- /** Regex pattern to match against URLs */
3484
- static pattern = /^$/;
3485
- /** The input URL */
3486
- url;
3487
- /** Regex match groups from ``fromURL`` */
3488
- groups;
3489
- config;
3490
- /** HTTP client — public so Job can access for downloads */
3491
- http;
3492
- /** Storage backend — public so Job can access for writes */
3493
- storage;
3494
- /** Logger instance — public so Job can access for reporting */
3495
- log;
3496
- /** Delay range in seconds — random between [min, max] before each request */
3497
- requestInterval = [6, 12];
3498
- _initialized = false;
3499
- constructor(opts) {
3500
- this.url = opts.url;
3501
- this.groups = opts.match ? [...opts.match].slice(1) : [];
3502
- this.config = opts.config;
3503
- this.http = opts.http;
3504
- this.storage = opts.storage;
3505
- this.log = opts.log;
3506
- }
3507
- /** Initialization */
3508
- /**
3509
- * One-time async setup (cookies, session, internal state).
3510
- * Safe to call multiple times — after the first call it becomes a no-op.
3511
- */
3512
- async initialize() {
3513
- if (this._initialized) return;
3514
- await this._init();
3515
- this._initialized = true;
3516
- this.initialize = async () => {};
3517
- }
3518
- /**
3519
- * Subclass hook for one-time setup.
3520
- */
3521
- async _init() {}
3522
- /** Async iteration */
3523
- async *[Symbol.asyncIterator]() {
3524
- await this.initialize();
3525
- yield* this.items();
3526
- }
3527
- /** Config helpers */
3528
- /**
3529
- * Read a config value using the interpolated hierarchy.
3530
- */
3531
- _cfg(key, defaultVal) {
3532
- const path = [
3533
- "extractor",
3534
- this.category,
3535
- this.subcategory
3536
- ];
3537
- return this.config.interpolate(path, key, defaultVal);
3538
- }
3539
- /** HTTP */
3540
- _lastRequestTime = 0;
3541
- /**
3542
- * Rate-limited HTTP request wrapper.
3543
- */
3544
- async request(url, cfg = {}) {
3545
- await this._throttle();
3546
- const response = await this.http.request({
3547
- url,
3548
- ...cfg
3549
- });
3550
- this._lastRequestTime = Date.now();
3551
- return response;
3552
- }
3553
- /**
3554
- * Convenience: request + parse JSON body.
3555
- */
3556
- async requestJSON(url, cfg = {}) {
3557
- const resp = await this.request(url, cfg);
3558
- if (typeof resp.data === "object") return resp.data;
3559
- try {
3560
- return JSON.parse(resp.data);
3561
- } catch {
3562
- return {};
3563
- }
3564
- }
3565
- /** Rate limiting */
3566
- /**
3567
- * Sleep long enough to keep the minimum interval between requests.
3568
- */
3569
- async _throttle() {
3570
- const elapsed = Date.now() - this._lastRequestTime;
3571
- const [min, max] = this.requestInterval;
3572
- const target = min + Math.random() * (max - min);
3573
- const waitMs = Math.max(0, target * 1e3 - elapsed);
3574
- if (waitMs > 0) await new Promise((r) => setTimeout(r, waitMs));
3575
- }
3576
- /** Utility */
3577
- /**
3578
- * Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
3579
- */
3580
- parseTimestamp(ts) {
3581
- if (ts == null) return "";
3582
- const asMs = ts > 25e8 ? ts : ts * 1e3;
3583
- return new Date(asMs).toISOString();
3584
- }
3585
- /**
3586
- * Generate a random hex token (used for CSRF).
3587
- */
3588
- static generateToken(size = 16) {
3589
- const bytes = new Uint8Array(size);
3590
- if (typeof crypto !== "undefined" && crypto.getRandomValues) crypto.getRandomValues(bytes);
3591
- else for (let i = 0; i < size; i++) bytes[i] = Math.floor(Math.random() * 256);
3592
- return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
3593
- }
3594
- };
3595
- //#endregion
3596
3195
  //#region src/instagram/api.ts
3597
3196
  const APP_ID = "936619743392459";
3598
3197
  const ASBD_ID = "129477";
@@ -3933,8 +3532,8 @@ var InstagramRestAPI = class {
3933
3532
  }
3934
3533
  };
3935
3534
  //#endregion
3936
- //#region src/instagram/parsers.ts
3937
- /** Main entry — REST */
3535
+ //#region src/instagram/parsers/rest.ts
3536
+ /** Main entry — parse a REST post response. */
3938
3537
  function parsePostRest(post, cfg) {
3939
3538
  if (post.items) return parseStoryRest(post, cfg);
3940
3539
  const owner = post.user;
@@ -3962,10 +3561,9 @@ function parsePostRest(post, cfg) {
3962
3561
  if (tags.length > 0) data.tags = [...new Set(tags)].sort();
3963
3562
  if (post.location) {
3964
3563
  const loc = post.location;
3965
- const slug = loc.short_name.replace(/\s+/g, "-").toLowerCase();
3966
3564
  data.location_id = loc.pk;
3967
- data.location_slug = slug;
3968
- data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${slug}/`;
3565
+ data.location_slug = loc.short_name.replace(/\s+/g, "-").toLowerCase();
3566
+ data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${data.location_slug}/`;
3969
3567
  }
3970
3568
  if (post.coauthor_producers) data.coauthors = post.coauthor_producers.map((u) => ({
3971
3569
  id: u.pk,
@@ -4012,7 +3610,7 @@ function parsePostRest(post, cfg) {
4012
3610
  if (post.subscription_media_visibility) data.subscription = post.subscription_media_visibility;
4013
3611
  return data;
4014
3612
  }
4015
- /** Story / highlight */
3613
+ /** Parse a story or highlight REST response. */
4016
3614
  function parseStoryRest(post, cfg) {
4017
3615
  const items = post.items;
4018
3616
  const reelId = String(post.id).split(":").pop() ?? "0";
@@ -4038,9 +3636,8 @@ function parseStoryRest(post, cfg) {
4038
3636
  expires: expires ? cfg.parseTimestamp(expires) : void 0,
4039
3637
  user: post.user
4040
3638
  };
4041
- if (!isStory) {
4042
- if (post.title) data.highlight_title = post.title;
4043
- } else if (!post.seen) post.seen = expires - 86400;
3639
+ if (!isStory && post.title) data.highlight_title = post.title;
3640
+ else if (!post.seen) post.seen = expires - 86400;
4044
3641
  for (let num = 0; num < items.length; num++) {
4045
3642
  const item = items[num];
4046
3643
  const media = parseMediaItem(item, post, cfg, num + 1);
@@ -4050,7 +3647,7 @@ function parseStoryRest(post, cfg) {
4050
3647
  }
4051
3648
  return data;
4052
3649
  }
4053
- /** Single media item */
3650
+ /** Parse a single media item (image/video) from a carousel or story. */
4054
3651
  function parseMediaItem(item, parent, cfg, num) {
4055
3652
  let image;
4056
3653
  try {
@@ -4109,7 +3706,7 @@ function parseMediaItem(item, parent, cfg, num) {
4109
3706
  if (itemRec.audience) media.audience = itemRec.audience;
4110
3707
  return media;
4111
3708
  }
4112
- /** Tagged users */
3709
+ /** Extract tagged users from various field formats. */
4113
3710
  function extractTaggedUsers(src, dest) {
4114
3711
  dest.tagged_users = [];
4115
3712
  const edges = src.edge_media_to_tagged_user;
@@ -4152,13 +3749,9 @@ function extractTaggedUsers(src, dest) {
4152
3749
  }
4153
3750
  }
4154
3751
  const seen = /* @__PURE__ */ new Set();
4155
- dest.tagged_users = dest.tagged_users.filter((t) => {
4156
- if (seen.has(t.id)) return false;
4157
- seen.add(t.id);
4158
- return true;
4159
- });
3752
+ dest.tagged_users = dest.tagged_users.filter((t) => seen.has(t.id) ? false : (seen.add(t.id), true));
4160
3753
  }
4161
- /** Audio / music extraction */
3754
+ /** Extract audio/music metadata from a story sticker. */
4162
3755
  function extractAudio(src, dest, sticker, cfg) {
4163
3756
  const info = sticker.music_asset_info;
4164
3757
  if (!info) return null;
@@ -4190,7 +3783,14 @@ function extractAudio(src, dest, sticker, cfg) {
4190
3783
  audio_timestamps: info.highlight_start_times_in_ms
4191
3784
  };
4192
3785
  }
4193
- /** GraphQL parser */
3786
+ function extractPinned(post) {
3787
+ if (post.timeline_pinned_user_ids) return post.timeline_pinned_user_ids;
3788
+ if (post.clips_tab_pinned_user_ids) return post.clips_tab_pinned_user_ids;
3789
+ return [];
3790
+ }
3791
+ //#endregion
3792
+ //#region src/instagram/parsers/graphql.ts
3793
+ /** Parse a GraphQL post/edge response. */
4194
3794
  function parsePostGraphql(post, cfg) {
4195
3795
  const typename = post.__typename ?? "GraphImage";
4196
3796
  const owner = post.owner;
@@ -4275,11 +3875,6 @@ function parsePostGraphql(post, cfg) {
4275
3875
  }
4276
3876
  return data;
4277
3877
  }
4278
- function extractPinned(post) {
4279
- if (post.timeline_pinned_user_ids) return post.timeline_pinned_user_ids;
4280
- if (post.clips_tab_pinned_user_ids) return post.clips_tab_pinned_user_ids;
4281
- return [];
4282
- }
4283
3878
  function parseUnicodeEscapes(text) {
4284
3879
  if (!text.includes("\\u")) return text;
4285
3880
  return text.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
@@ -4438,12 +4033,49 @@ var InstagramExtractor = class extends Extractor {
4438
4033
  }
4439
4034
  };
4440
4035
  //#endregion
4441
- //#region src/instagram/extractors.ts
4036
+ //#region src/instagram/extractors/helpers.ts
4037
+ /** Shared regex utilities for Instagram extractor URL patterns. */
4442
4038
  const BASE_RE = /^(?:https?:\/\/)?(?:www\.)?instagram\.com/;
4443
4039
  function re(base, path) {
4444
4040
  const pathSrc = typeof path === "string" ? path : path.source;
4445
4041
  return new RegExp(base.source + pathSrc, "i");
4446
4042
  }
4043
+ //#endregion
4044
+ //#region src/instagram/extractors/registry.ts
4045
+ const _registry = /* @__PURE__ */ new Map();
4046
+ function register(subcategory, cls) {
4047
+ _registry.set(subcategory, cls);
4048
+ }
4049
+ function get(subcategory) {
4050
+ return _registry.get(subcategory);
4051
+ }
4052
+ //#endregion
4053
+ //#region src/instagram/extractors/highlights.ts
4054
+ var InstagramHighlightsExtractor = class InstagramHighlightsExtractor extends InstagramExtractor {
4055
+ static subcategory = "highlights";
4056
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/highlights/);
4057
+ subcategory = InstagramHighlightsExtractor.subcategory;
4058
+ constructor(opts) {
4059
+ super(opts);
4060
+ }
4061
+ static fromURL(url, opts) {
4062
+ const match = InstagramHighlightsExtractor.pattern.exec(url);
4063
+ if (!match) return null;
4064
+ return new InstagramHighlightsExtractor({
4065
+ ...opts,
4066
+ url,
4067
+ match
4068
+ });
4069
+ }
4070
+ async *posts() {
4071
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4072
+ const uid = await this.api.userId(screenName);
4073
+ yield* this.api.highlightsMedia(uid);
4074
+ }
4075
+ };
4076
+ register(InstagramHighlightsExtractor.subcategory, InstagramHighlightsExtractor);
4077
+ //#endregion
4078
+ //#region src/instagram/extractors/post.ts
4447
4079
  var InstagramPostExtractor = class InstagramPostExtractor extends InstagramExtractor {
4448
4080
  static subcategory = "post";
4449
4081
  static pattern = re(/^(?:https?:\/\/)?(?:www\.)?instagram\.com\//, /(?:share(?:\/(?:p|tv|reels?))?|(?:[^/?#]+\/)?(?:p|tv|reels?))\/([^/?#]+)/);
@@ -4478,184 +4110,59 @@ var InstagramPostExtractor = class InstagramPostExtractor extends InstagramExtra
4478
4110
  yield* this.api.media(shortcode);
4479
4111
  }
4480
4112
  };
4481
- var InstagramUserExtractor = class InstagramUserExtractor extends InstagramExtractor {
4482
- static subcategory = "user";
4483
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/?(?:$|[?#])/);
4484
- subcategory = InstagramUserExtractor.subcategory;
4485
- constructor(opts) {
4486
- super(opts);
4487
- }
4488
- static fromURL(url, opts) {
4489
- const match = InstagramUserExtractor.pattern.exec(url);
4490
- if (!match) return null;
4491
- return new InstagramUserExtractor({
4492
- ...opts,
4493
- url,
4494
- match
4495
- });
4496
- }
4497
- async *items() {
4498
- await this.login();
4499
- const userPath = this.groups[0] ?? "/";
4500
- const base = `${this.root}${userPath}/`;
4501
- const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`;
4502
- const include = this._cfg("include", ["posts"]);
4503
- const categories = include === "all" ? [
4504
- "posts",
4505
- "reels",
4506
- "tagged",
4507
- "stories",
4508
- "highlights",
4509
- "info",
4510
- "avatar"
4511
- ] : typeof include === "string" ? include.replace(/\s+/g, "").split(",") : include;
4512
- const extractors = {
4513
- info: {
4514
- cls: InstagramInfoExtractor,
4515
- url: `${base}info/`
4516
- },
4517
- avatar: {
4518
- cls: InstagramAvatarExtractor,
4519
- url: `${base}avatar/`
4520
- },
4521
- stories: {
4522
- cls: InstagramStoriesExtractor,
4523
- url: storiesUrl
4524
- },
4525
- highlights: {
4526
- cls: InstagramHighlightsExtractor,
4527
- url: `${base}highlights/`
4528
- },
4529
- posts: {
4530
- cls: InstagramPostsExtractor,
4531
- url: `${base}posts/`
4532
- },
4533
- reels: {
4534
- cls: InstagramReelsExtractor,
4535
- url: `${base}reels/`
4536
- },
4537
- tagged: {
4538
- cls: InstagramTaggedExtractor,
4539
- url: `${base}tagged/`
4540
- }
4541
- };
4542
- for (const cat of categories) {
4543
- const entry = extractors[cat];
4544
- if (entry) yield queue(entry.url, { _extractor: entry.cls });
4545
- else this.log.warn(`Invalid include '${cat}'`);
4546
- }
4547
- }
4548
- async *posts() {}
4549
- };
4550
- var InstagramPostsExtractor = class InstagramPostsExtractor extends InstagramExtractor {
4551
- static subcategory = "posts";
4552
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/posts/);
4553
- subcategory = InstagramPostsExtractor.subcategory;
4113
+ register(InstagramPostExtractor.subcategory, InstagramPostExtractor);
4114
+ //#endregion
4115
+ //#region src/instagram/extractors/saved.ts
4116
+ var InstagramSavedExtractor = class InstagramSavedExtractor extends InstagramExtractor {
4117
+ static subcategory = "saved";
4118
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/saved(?:\/all-posts)?\/?$/);
4119
+ subcategory = InstagramSavedExtractor.subcategory;
4554
4120
  constructor(opts) {
4555
4121
  super(opts);
4556
4122
  }
4557
4123
  static fromURL(url, opts) {
4558
- const match = InstagramPostsExtractor.pattern.exec(url);
4124
+ const match = InstagramSavedExtractor.pattern.exec(url);
4559
4125
  if (!match) return null;
4560
- return new InstagramPostsExtractor({
4126
+ return new InstagramSavedExtractor({
4561
4127
  ...opts,
4562
4128
  url,
4563
4129
  match
4564
4130
  });
4565
4131
  }
4566
4132
  async *posts() {
4567
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4568
- const uid = await this.api.userId(screenName);
4569
- yield* this.api.userFeed(uid);
4133
+ yield* this.api.userSaved();
4570
4134
  }
4571
4135
  };
4572
- var InstagramReelsExtractor = class InstagramReelsExtractor extends InstagramExtractor {
4573
- static subcategory = "reels";
4574
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/reels/);
4575
- subcategory = InstagramReelsExtractor.subcategory;
4136
+ register(InstagramSavedExtractor.subcategory, InstagramSavedExtractor);
4137
+ //#endregion
4138
+ //#region src/instagram/extractors/stories.ts
4139
+ var InstagramStoriesExtractor = class InstagramStoriesExtractor extends InstagramExtractor {
4140
+ static subcategory = "stories";
4141
+ static pattern = /^(?:https?:\/\/)?(?:www\.)?instagram\.com\/(?:stories\/(?:highlights\/(\d+)|([^/?#]+)(?:\/(\d+))?)|\/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)/;
4142
+ subcategory = InstagramStoriesExtractor.subcategory;
4143
+ highlightId = null;
4144
+ mediaId = null;
4576
4145
  constructor(opts) {
4577
4146
  super(opts);
4147
+ const groups = this.groups;
4148
+ const h1 = groups[0];
4149
+ const user = groups[1];
4150
+ const m1 = groups[2];
4151
+ const h2 = groups[3];
4152
+ const m2 = groups[4];
4153
+ if (user) {
4154
+ this.subcategory = "stories";
4155
+ this.highlightId = null;
4156
+ } else {
4157
+ this.subcategory = "highlights";
4158
+ this.highlightId = h1 ? `highlight:${h1}` : `highlight:${Buffer.from(h2 ?? "", "base64").toString("utf-8")}`;
4159
+ }
4160
+ this.mediaId = m1 ?? m2 ?? null;
4578
4161
  }
4579
4162
  static fromURL(url, opts) {
4580
- const match = InstagramReelsExtractor.pattern.exec(url);
4163
+ const match = InstagramStoriesExtractor.pattern.exec(url);
4581
4164
  if (!match) return null;
4582
- return new InstagramReelsExtractor({
4583
- ...opts,
4584
- url,
4585
- match
4586
- });
4587
- }
4588
- async *posts() {
4589
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4590
- const uid = await this.api.userId(screenName);
4591
- yield* this.api.userClips(uid);
4592
- }
4593
- };
4594
- var InstagramTaggedExtractor = class InstagramTaggedExtractor extends InstagramExtractor {
4595
- static subcategory = "tagged";
4596
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/tagged/);
4597
- subcategory = InstagramTaggedExtractor.subcategory;
4598
- _taggedUserId = "";
4599
- constructor(opts) {
4600
- super(opts);
4601
- }
4602
- static fromURL(url, opts) {
4603
- const match = InstagramTaggedExtractor.pattern.exec(url);
4604
- if (!match) return null;
4605
- return new InstagramTaggedExtractor({
4606
- ...opts,
4607
- url,
4608
- match
4609
- });
4610
- }
4611
- async metadata() {
4612
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4613
- let user;
4614
- if (screenName.startsWith("id:")) {
4615
- this._taggedUserId = screenName.slice(3);
4616
- user = await this.api.userById(screenName.slice(3));
4617
- } else {
4618
- this._taggedUserId = await this.api.userId(screenName);
4619
- user = await this.api.userByScreenName(screenName);
4620
- }
4621
- return {
4622
- tagged_owner_id: user.id ?? user.pk,
4623
- tagged_username: user.username,
4624
- tagged_full_name: user.full_name
4625
- };
4626
- }
4627
- async *posts() {
4628
- if (!this._taggedUserId) await this.metadata();
4629
- yield* this.api.userTagged(this._taggedUserId);
4630
- }
4631
- };
4632
- var InstagramStoriesExtractor = class InstagramStoriesExtractor extends InstagramExtractor {
4633
- static subcategory = "stories";
4634
- static pattern = /^(?:https?:\/\/)?(?:www\.)?instagram\.com\/(?:stories\/(?:highlights\/(\d+)|([^/?#]+)(?:\/(\d+))?)|\/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)/;
4635
- subcategory = InstagramStoriesExtractor.subcategory;
4636
- highlightId = null;
4637
- mediaId = null;
4638
- constructor(opts) {
4639
- super(opts);
4640
- const groups = this.groups;
4641
- const h1 = groups[0];
4642
- const user = groups[1];
4643
- const m1 = groups[2];
4644
- const h2 = groups[3];
4645
- const m2 = groups[4];
4646
- if (user) {
4647
- this.subcategory = "stories";
4648
- this.highlightId = null;
4649
- } else {
4650
- this.subcategory = "highlights";
4651
- this.highlightId = h1 ? `highlight:${h1}` : `highlight:${Buffer.from(h2 ?? "", "base64").toString("utf-8")}`;
4652
- }
4653
- this.mediaId = m1 ?? m2 ?? null;
4654
- }
4655
- static fromURL(url, opts) {
4656
- const match = InstagramStoriesExtractor.pattern.exec(url);
4657
- if (!match) return null;
4658
- return new InstagramStoriesExtractor({
4165
+ return new InstagramStoriesExtractor({
4659
4166
  ...opts,
4660
4167
  url,
4661
4168
  match
@@ -4684,28 +4191,9 @@ var InstagramStoriesExtractor = class InstagramStoriesExtractor extends Instagra
4684
4191
  } else yield* reels;
4685
4192
  }
4686
4193
  };
4687
- var InstagramHighlightsExtractor = class InstagramHighlightsExtractor extends InstagramExtractor {
4688
- static subcategory = "highlights";
4689
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/highlights/);
4690
- subcategory = InstagramHighlightsExtractor.subcategory;
4691
- constructor(opts) {
4692
- super(opts);
4693
- }
4694
- static fromURL(url, opts) {
4695
- const match = InstagramHighlightsExtractor.pattern.exec(url);
4696
- if (!match) return null;
4697
- return new InstagramHighlightsExtractor({
4698
- ...opts,
4699
- url,
4700
- match
4701
- });
4702
- }
4703
- async *posts() {
4704
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4705
- const uid = await this.api.userId(screenName);
4706
- yield* this.api.highlightsMedia(uid);
4707
- }
4708
- };
4194
+ register(InstagramStoriesExtractor.subcategory, InstagramStoriesExtractor);
4195
+ //#endregion
4196
+ //#region src/instagram/extractors/tag.ts
4709
4197
  var InstagramTagExtractor = class InstagramTagExtractor extends InstagramExtractor {
4710
4198
  static subcategory = "tag";
4711
4199
  static pattern = re(BASE_RE, /\/explore\/tags\/([^/?#]+)/);
@@ -4731,95 +4219,607 @@ var InstagramTagExtractor = class InstagramTagExtractor extends InstagramExtract
4731
4219
  yield* this.api.tagsMedia(decodeURIComponent(tag));
4732
4220
  }
4733
4221
  };
4734
- var InstagramInfoExtractor = class InstagramInfoExtractor extends InstagramExtractor {
4735
- static subcategory = "info";
4736
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/info/);
4737
- subcategory = InstagramInfoExtractor.subcategory;
4222
+ register(InstagramTagExtractor.subcategory, InstagramTagExtractor);
4223
+ //#endregion
4224
+ //#region src/instagram/extractors/user.ts
4225
+ var InstagramUserExtractor = class InstagramUserExtractor extends InstagramExtractor {
4226
+ static subcategory = "user";
4227
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/?(?:$|[?#])/);
4228
+ subcategory = InstagramUserExtractor.subcategory;
4738
4229
  constructor(opts) {
4739
4230
  super(opts);
4740
4231
  }
4741
4232
  static fromURL(url, opts) {
4742
- const match = InstagramInfoExtractor.pattern.exec(url);
4233
+ const match = InstagramUserExtractor.pattern.exec(url);
4743
4234
  if (!match) return null;
4744
- return new InstagramInfoExtractor({
4235
+ return new InstagramUserExtractor({
4745
4236
  ...opts,
4746
4237
  url,
4747
4238
  match
4748
4239
  });
4749
4240
  }
4750
4241
  async *items() {
4751
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4752
- let user;
4753
- if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
4754
- else user = await this.api.userByScreenName(screenName);
4755
- yield directory(user);
4242
+ await this.login();
4243
+ const userPath = this.groups[0] ?? "/";
4244
+ const base = `${this.root}${userPath}/`;
4245
+ const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`;
4246
+ const include = this._cfg("include", ["posts"]);
4247
+ const categories = include === "all" ? [
4248
+ "posts",
4249
+ "reels",
4250
+ "tagged",
4251
+ "stories",
4252
+ "highlights",
4253
+ "info",
4254
+ "avatar"
4255
+ ] : typeof include === "string" ? include.replace(/\s+/g, "").split(",") : include;
4256
+ const urls = {
4257
+ info: `${base}info/`,
4258
+ avatar: `${base}avatar/`,
4259
+ stories: storiesUrl,
4260
+ highlights: `${base}highlights/`,
4261
+ posts: `${base}posts/`,
4262
+ reels: `${base}reels/`,
4263
+ tagged: `${base}tagged/`
4264
+ };
4265
+ for (const cat of categories) {
4266
+ const cls = get(cat);
4267
+ const url = urls[cat];
4268
+ if (cls && url) yield queue(url, { _extractor: cls });
4269
+ else this.log.warn(`Invalid include '${cat}'`);
4270
+ }
4271
+ }
4272
+ async *posts() {}
4273
+ };
4274
+ register(InstagramUserExtractor.subcategory, InstagramUserExtractor);
4275
+ //#endregion
4276
+ //#region src/config.ts
4277
+ var ConfigManager = class {
4278
+ data;
4279
+ constructor(data = {}) {
4280
+ this.data = data;
4281
+ }
4282
+ /**
4283
+ * Read a value at a dot-path like ``'extractor.instagram.videos'``.
4284
+ * Returns ``undefined`` when the path doesn't exist.
4285
+ */
4286
+ get(path, defaultValue) {
4287
+ const keys = path.split(".");
4288
+ let node = this.data;
4289
+ for (const key of keys) {
4290
+ if (node == null || typeof node !== "object" || Array.isArray(node)) return defaultValue;
4291
+ node = node[key];
4292
+ }
4293
+ if (node === void 0) return defaultValue;
4294
+ return node;
4295
+ }
4296
+ /**
4297
+ * Interpolate a config key through a hierarchy of paths.
4298
+ */
4299
+ interpolate(cfgPath, key, defaultVal) {
4300
+ let node = this.data;
4301
+ for (let i = 0; i < cfgPath.length; i++) {
4302
+ if (node != null && typeof node === "object" && !Array.isArray(node)) {
4303
+ const v = node[key];
4304
+ if (v !== void 0) return v;
4305
+ }
4306
+ if (node == null || typeof node !== "object" || Array.isArray(node)) break;
4307
+ node = node[cfgPath[i]];
4308
+ }
4309
+ return defaultVal;
4310
+ }
4311
+ /**
4312
+ * Mutate the config at a given dot-path.
4313
+ */
4314
+ set(path, value) {
4315
+ const keys = path.split(".");
4316
+ let node = this.data;
4317
+ for (let i = 0; i < keys.length - 1; i++) {
4318
+ const key = keys[i];
4319
+ let child = node[key];
4320
+ if (child == null || typeof child !== "object" || Array.isArray(child)) {
4321
+ child = {};
4322
+ node[key] = child;
4323
+ }
4324
+ node = child;
4325
+ }
4326
+ node[keys[keys.length - 1]] = value;
4327
+ }
4328
+ };
4329
+ //#endregion
4330
+ //#region src/cli/options.ts
4331
+ function addSharedOptions(cmd) {
4332
+ return cmd.option("--sessionid <cookie>", "Instagram sessionid cookie value (from browser)", process.env.INSTAGRAM_SESSIONID).option("--cookies <string>", "Full Cookie header string from browser", process.env.INSTAGRAM_COOKIES).option("-o, --output <dir>", "Output directory", "./data").option("--videos <mode>", "Download videos: true, false, or merged (yt-dlp)", "true").option("--previews <types>", "Download only previews: video,audio (comma-separated)").option("--audio", "Download standalone audio tracks", false).option("--max-posts <n>", "Maximum number of posts to download", Number.parseInt).option("--cursor <cursor>", "Resume from pagination cursor").option("--order-posts <order>", "Post ordering: asc, desc, id, id_asc, id_desc").option("--order-files <order>", "File ordering: r, d (reverse), or empty for natural order").option("--static-videos", "Download static video cover images instead of actual videos", false).option("--no-static-videos", "Do not force static video covers").option("--api <backend>", "API backend: rest (default) or graphql", "rest").option("-v, --verbose", "Verbose debug output", false).option("--include <list>", "For user: comma-separated sub-extractors (posts,reels,tagged,stories,highlights,info,avatar)", "posts").option("--split", "For stories: split each frame into a separate post", false).option("-i, --info", "Print structured post info to terminal (no download)", false);
4333
+ }
4334
+ function buildConfig(opts) {
4335
+ const config = new ConfigManager();
4336
+ const ig = {};
4337
+ if (opts.videos) ig.videos = opts.videos;
4338
+ if (opts.previews) ig.previews = opts.previews.split(",");
4339
+ if (opts.audio) ig.audio = true;
4340
+ if (opts.maxPosts) ig["max-posts"] = opts.maxPosts;
4341
+ if (opts.cursor) ig.cursor = opts.cursor;
4342
+ if (opts.orderPosts) ig["order-posts"] = opts.orderPosts;
4343
+ if (opts.orderFiles) ig["order-files"] = opts.orderFiles;
4344
+ if (opts.staticVideos) ig["static-videos"] = true;
4345
+ if (opts.api) ig.api = opts.api;
4346
+ if (opts.include) ig.include = opts.include;
4347
+ if (opts.split) ig.split = true;
4348
+ if (Object.keys(ig).length > 0) config.set("extractor.instagram", ig);
4349
+ return config;
4350
+ }
4351
+ //#endregion
4352
+ //#region src/core/format.ts
4353
+ /** Shared ANSI formatting and display utilities. */
4354
+ function formatBytes(bytes) {
4355
+ if (bytes === 0) return "0 B";
4356
+ const units = [
4357
+ "B",
4358
+ "KB",
4359
+ "MB",
4360
+ "GB"
4361
+ ];
4362
+ const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
4363
+ return `${(bytes / 1024 ** i).toFixed(i === 0 ? 0 : 1)} ${units[i]}`;
4364
+ }
4365
+ const BOLD = "\x1B[1m";
4366
+ const DIM = "\x1B[2m";
4367
+ const CYAN = "\x1B[36m";
4368
+ const GREEN = "\x1B[32m";
4369
+ const YELLOW = "\x1B[33m";
4370
+ const RESET = "\x1B[0m";
4371
+ function b(s) {
4372
+ return `${BOLD}${s}${RESET}`;
4373
+ }
4374
+ function dim(s) {
4375
+ return `${DIM}${s}${RESET}`;
4376
+ }
4377
+ function c(s) {
4378
+ return `${CYAN}${s}${RESET}`;
4379
+ }
4380
+ function g(s) {
4381
+ return `${GREEN}${s}${RESET}`;
4382
+ }
4383
+ const _YELLOW = YELLOW;
4384
+ const _RESET = RESET;
4385
+ function pad(s, n) {
4386
+ return s.length >= n ? s : s + " ".repeat(n - s.length);
4387
+ }
4388
+ //#endregion
4389
+ //#region src/core/job.ts
4390
+ var Job = class {
4391
+ extractor;
4392
+ status = 0;
4393
+ constructor(extractor) {
4394
+ this.extractor = extractor;
4395
+ }
4396
+ /** Main entry point. Dispatches every yielded message. */
4397
+ async run() {
4398
+ this.extractor.log.info(`Starting ${this.extractor.category}/${this.extractor.subcategory} — ${this.extractor.url}`);
4399
+ await this.extractor.initialize();
4400
+ for await (const msg of this.extractor) switch (msg.type) {
4401
+ case "directory":
4402
+ await this.handleDirectory(msg);
4403
+ break;
4404
+ case "url":
4405
+ await this.handleUrl(msg);
4406
+ break;
4407
+ case "queue":
4408
+ await this.handleQueue(msg);
4409
+ break;
4410
+ }
4411
+ this._report();
4412
+ return this.status;
4413
+ }
4414
+ /** Override in subclasses to print a summary. */
4415
+ _report() {}
4416
+ };
4417
+ //#endregion
4418
+ //#region src/core/download-job.ts
4419
+ var DownloadJob = class DownloadJob extends Job {
4420
+ /** Base output directory (prepended to all paths). */
4421
+ basePath = "";
4422
+ /** Current target directory metadata (set by directory messages). */
4423
+ _currentDir = {};
4424
+ /** In-memory archive keyed by archive format. */
4425
+ archive = /* @__PURE__ */ new Map();
4426
+ _archiveFmts = /* @__PURE__ */ new Map();
4427
+ _postCount = 0;
4428
+ _fileCount = 0;
4429
+ _downloadedBytes = 0;
4430
+ _skippedCount = 0;
4431
+ registerArchive(category, format) {
4432
+ this._archiveFmts.set(category, format);
4433
+ }
4434
+ _interp(fmt, meta) {
4435
+ return fmt.replace(/\{(\w+)\}/g, (_, key) => {
4436
+ const v = meta[key];
4437
+ return v == null ? "" : String(v);
4438
+ });
4439
+ }
4440
+ _isArchived(meta) {
4441
+ const cat = meta.category ?? this.extractor.category;
4442
+ const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
4443
+ const key = this._interp(fmt, meta);
4444
+ return !!this.archive.get(cat)?.has(key);
4445
+ }
4446
+ _archive(meta) {
4447
+ const cat = meta.category ?? this.extractor.category;
4448
+ const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
4449
+ const key = this._interp(fmt, meta);
4450
+ let set = this.archive.get(cat);
4451
+ if (!set) {
4452
+ set = /* @__PURE__ */ new Set();
4453
+ this.archive.set(cat, set);
4454
+ }
4455
+ set.add(key);
4456
+ }
4457
+ async handleDirectory(msg) {
4458
+ this._currentDir = { ...msg.metadata };
4459
+ this._postCount++;
4460
+ const dirPath = this.basePath ? `${this.basePath}/${this._buildDirPath(msg.metadata)}` : this._buildDirPath(msg.metadata);
4461
+ await this.extractor.storage.mkdir(dirPath);
4462
+ this.extractor.log.info(`#${this._postCount} ${msg.metadata.username ?? "?"}/${msg.metadata.post_shortcode ?? "?"} → ${dirPath}/`);
4463
+ }
4464
+ async handleUrl(msg) {
4465
+ const meta = {
4466
+ ...this._currentDir,
4467
+ ...msg.metadata
4468
+ };
4469
+ if (this._isArchived(meta)) {
4470
+ this._skippedCount++;
4471
+ return;
4472
+ }
4473
+ const filename = this._buildFilename(meta);
4474
+ const fullPath = `${this.basePath ? `${this.basePath}/${this._buildDirPath(meta)}` : this._buildDirPath(meta)}/${filename}`;
4475
+ try {
4476
+ const resp = await this.extractor.http.request({
4477
+ url: msg.url,
4478
+ method: "GET",
4479
+ responseType: "arraybuffer"
4480
+ });
4481
+ let data;
4482
+ if (resp.data instanceof Uint8Array) data = resp.data;
4483
+ else if (resp.data instanceof ArrayBuffer) data = new Uint8Array(resp.data);
4484
+ else if (typeof resp.data === "string") data = resp.data;
4485
+ else data = JSON.stringify(resp.data);
4486
+ await this.extractor.storage.write(fullPath, data);
4487
+ this._fileCount++;
4488
+ const size = data instanceof Uint8Array ? data.byteLength : data.length;
4489
+ this._downloadedBytes += size;
4490
+ this.extractor.log.info(` └─ ${filename} (${formatBytes(size)})`);
4491
+ this._archive(meta);
4492
+ } catch (err) {
4493
+ this.extractor.log.error(`Failed to download ${filename}: ${String(err)}`);
4494
+ this.status |= 4;
4495
+ }
4496
+ }
4497
+ async handleQueue(msg) {
4498
+ const meta = {
4499
+ ...this._currentDir,
4500
+ ...msg.metadata
4501
+ };
4502
+ const extrClass = meta._extractor;
4503
+ if (!extrClass || typeof extrClass !== "object") return;
4504
+ const cls = extrClass;
4505
+ const match = cls.pattern.exec(msg.url);
4506
+ if (!match) return;
4507
+ const parentExtr = this.extractor;
4508
+ const childJob = new DownloadJob(Reflect.construct(cls, [{
4509
+ url: msg.url,
4510
+ match,
4511
+ config: parentExtr.config,
4512
+ http: parentExtr.http,
4513
+ storage: parentExtr.storage,
4514
+ log: parentExtr.log
4515
+ }]));
4516
+ childJob.basePath = this.basePath;
4517
+ childJob._currentDir = meta;
4518
+ for (const [cat, set] of this.archive) childJob.archive.set(cat, new Set(set));
4519
+ for (const [cat, fmt] of this._archiveFmts) childJob._archiveFmts.set(cat, fmt);
4520
+ const childStatus = await childJob.run();
4521
+ this.status |= childStatus;
4522
+ for (const [cat, set] of childJob.archive) {
4523
+ const mine = this.archive.get(cat);
4524
+ if (mine) for (const k of set) mine.add(k);
4525
+ else this.archive.set(cat, set);
4526
+ }
4527
+ }
4528
+ _report() {
4529
+ const log = this.extractor.log;
4530
+ log.info(`Done — ${this._postCount} post(s), ${this._fileCount} file(s) downloaded (${formatBytes(this._downloadedBytes)})`);
4531
+ if (this._skippedCount > 0) log.info(` ${this._skippedCount} file(s) skipped (already archived)`);
4532
+ }
4533
+ _buildDirPath(meta) {
4534
+ return `${meta.category ?? this.extractor.category}/${meta.username ?? "_"}`;
4535
+ }
4536
+ _buildFilename(meta) {
4537
+ const mid = meta.media_id ?? "0";
4538
+ const ext = meta.extension ?? "jpg";
4539
+ return `${mid}${meta.num ? `_${meta.num}` : ""}.${ext}`;
4756
4540
  }
4757
- async *posts() {}
4758
4541
  };
4759
- var InstagramAvatarExtractor = class InstagramAvatarExtractor extends InstagramExtractor {
4760
- static subcategory = "avatar";
4761
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/avatar/);
4762
- subcategory = InstagramAvatarExtractor.subcategory;
4763
- constructor(opts) {
4764
- super(opts);
4542
+ //#endregion
4543
+ //#region src/core/print-job.ts
4544
+ var PrintJob = class PrintJob extends Job {
4545
+ _currentDir = {};
4546
+ _files = [];
4547
+ _postCount = 0;
4548
+ _fileCount = 0;
4549
+ _width;
4550
+ constructor(extractor) {
4551
+ super(extractor);
4552
+ this._width = Math.min(process.stdout.columns ?? 80, 100);
4765
4553
  }
4766
- static fromURL(url, opts) {
4767
- const match = InstagramAvatarExtractor.pattern.exec(url);
4768
- if (!match) return null;
4769
- return new InstagramAvatarExtractor({
4770
- ...opts,
4771
- url,
4772
- match
4554
+ async handleDirectory(msg) {
4555
+ if (this._postCount > 0) this._flushPost();
4556
+ this._currentDir = { ...msg.metadata };
4557
+ this._postCount++;
4558
+ this._files = [];
4559
+ }
4560
+ async handleUrl(msg) {
4561
+ const meta = {
4562
+ ...this._currentDir,
4563
+ ...msg.metadata
4564
+ };
4565
+ this._fileCount++;
4566
+ const ext = meta.extension ?? "jpg";
4567
+ const mid = meta.media_id ?? "?";
4568
+ this._files.push({
4569
+ num: meta.num ?? this._files.length + 1,
4570
+ filename: `${mid}.${ext}`,
4571
+ width: meta.width ?? 0,
4572
+ height: meta.height ?? 0,
4573
+ videoUrl: meta.video_url ?? null,
4574
+ audioUrl: meta.audio_url ?? null
4773
4575
  });
4774
4576
  }
4775
- async *posts() {
4776
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4777
- let user;
4778
- if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
4779
- else user = await this.api.userByScreenName(screenName);
4780
- const avatar = user.hd_profile_pic_url_info ?? user.hd_profile_pic_versions?.[user.hd_profile_pic_versions.length - 1] ?? {
4781
- url: user.profile_pic_url ?? "",
4782
- width: 0,
4783
- height: 0
4577
+ async handleQueue(msg) {
4578
+ if (this._files.length > 0 || this._postCount > 0) this._flushPost();
4579
+ this._postCount = 0;
4580
+ this._files = [];
4581
+ const extrClass = {
4582
+ ...this._currentDir,
4583
+ ...msg.metadata
4584
+ }._extractor;
4585
+ if (!extrClass || typeof extrClass !== "object") return;
4586
+ const cls = extrClass;
4587
+ const match = cls.pattern.exec(msg.url);
4588
+ if (!match) return;
4589
+ const parentExtr = this.extractor;
4590
+ const childJob = new PrintJob(Reflect.construct(cls, [{
4591
+ url: msg.url,
4592
+ match,
4593
+ config: parentExtr.config,
4594
+ http: parentExtr.http,
4595
+ storage: parentExtr.storage,
4596
+ log: parentExtr.log
4597
+ }]));
4598
+ const childStatus = await childJob.run();
4599
+ this.status |= childStatus;
4600
+ this._postCount += childJob._postCount;
4601
+ this._fileCount += childJob._fileCount;
4602
+ }
4603
+ _flushPost() {
4604
+ const m = this._currentDir;
4605
+ if (Object.keys(m).length === 0) return;
4606
+ const w = this._width;
4607
+ const labelW = 14;
4608
+ const shortcode = m.post_shortcode ?? "?";
4609
+ const header = ` Post #${this._postCount}: ${shortcode} `;
4610
+ const padTotal = w - 2 - header.length;
4611
+ const padL = Math.floor(padTotal / 2);
4612
+ const padR = padTotal - padL;
4613
+ process.stdout.write(`\n${dim("┌")}${"─".repeat(padL)}${b(header)}${"─".repeat(padR)}${dim("┐")}\n`);
4614
+ const row = (label, value, color) => {
4615
+ const colored = typeof color === "function" ? color(value) : color ? `${color}${value}${_RESET}` : value;
4616
+ process.stdout.write(` ${dim("│")} ${c(pad(label, labelW))} ${colored}\n`);
4784
4617
  };
4785
- let pk = user.profile_pic_id?.split("_")[0];
4786
- let code;
4787
- if (pk) code = shortcodeFromId(pk);
4788
- else {
4789
- pk = `avatar:${user.pk}`;
4790
- code = pk;
4618
+ const username = m.username ?? "?";
4619
+ const fullname = m.fullname ?? "";
4620
+ row("Author:", fullname ? `${username} (${fullname})` : username, g);
4621
+ row("Date:", m.date ?? m.post_date ?? "?");
4622
+ row("Likes:", `${typeof m.likes === "number" ? m.likes.toLocaleString() : "?"} | Liked: ${m.liked ? "yes" : "no"}`);
4623
+ row("Type:", `${m.type ?? "?"} (${this._files.length} files)`);
4624
+ row("URL:", m.post_url ?? "?");
4625
+ const desc = m.description ?? "";
4626
+ if (desc) {
4627
+ process.stdout.write(` ${dim("│")}\n`);
4628
+ process.stdout.write(` ${dim("│")} ${b("Description:")}\n`);
4629
+ for (const line of desc.split("\n")) for (const wl of this._wrap(line, w - 8)) process.stdout.write(` ${dim("│")} ${dim(wl)}\n`);
4791
4630
  }
4792
- yield {
4793
- pk,
4794
- code,
4795
- user,
4796
- caption: null,
4797
- like_count: 0,
4798
- image_versions2: { candidates: [avatar] }
4799
- };
4800
- }
4801
- };
4802
- var InstagramSavedExtractor = class InstagramSavedExtractor extends InstagramExtractor {
4803
- static subcategory = "saved";
4804
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/saved(?:\/all-posts)?\/?$/);
4805
- subcategory = InstagramSavedExtractor.subcategory;
4806
- constructor(opts) {
4807
- super(opts);
4631
+ const tags = m.tags;
4632
+ if (tags && tags.length > 0) {
4633
+ process.stdout.write(` ${dim("│")}\n`);
4634
+ process.stdout.write(` ${dim("│")} ${b("Tags:")} ${dim(tags.map((t) => `#${t}`).join(" "))}\n`);
4635
+ }
4636
+ const locName = m.location_slug ?? "";
4637
+ const locId = m.location_id ?? "";
4638
+ if (locName || locId) row("Location:", locId ? `${locName} (ID: ${locId})` : locName);
4639
+ const coauthors = m.coauthors;
4640
+ if (coauthors && coauthors.length > 0) row("Co-authors:", coauthors.map((c) => c.full_name ? `${c.username} (${c.full_name})` : c.username).join(", "));
4641
+ const pinned = m.pinned;
4642
+ if (pinned && pinned.length > 0) row("Pinned:", pinned.join(", "));
4643
+ const expires = m.expires;
4644
+ if (expires) row("Expires:", expires, _YELLOW);
4645
+ const hlTitle = m.highlight_title;
4646
+ if (hlTitle) row("Highlight:", hlTitle);
4647
+ const taggedUser = m.tagged_username ?? "";
4648
+ if (taggedUser) {
4649
+ const taggedFull = m.tagged_full_name ?? "";
4650
+ row("Tagged by:", taggedFull ? `${taggedUser} (${taggedFull})` : taggedUser);
4651
+ }
4652
+ if (this._files.length > 0) {
4653
+ process.stdout.write(` ${dim("│")}\n`);
4654
+ process.stdout.write(` ${dim("│")} ${b(`Media (${this._files.length} files):`)}\n`);
4655
+ const maxNumW = String(this._files.length).length;
4656
+ const maxFileW = Math.max(...this._files.map((f) => f.filename.length));
4657
+ const dimW = Math.min(maxFileW, 40);
4658
+ for (const f of this._files) {
4659
+ const numStr = `[${String(f.num).padStart(maxNumW)}]`;
4660
+ const dimStr = f.filename.length > 40 ? `${f.filename.slice(0, 37)}...` : pad(f.filename, dimW);
4661
+ const res = f.width ? `${f.width}x${f.height}` : "?x?";
4662
+ const badges = [];
4663
+ if (f.videoUrl) badges.push("video");
4664
+ if (f.audioUrl) badges.push("audio");
4665
+ let line = ` ${dim("│")} ${g(numStr)} ${dimStr} ${res}`;
4666
+ if (badges.length > 0) line += ` ${_YELLOW}(${badges.join("+")})${_RESET}`;
4667
+ process.stdout.write(`${line}\n`);
4668
+ }
4669
+ }
4670
+ process.stdout.write(` ${dim("└")}${"─".repeat(w - 2)}${dim("┘")}\n`);
4808
4671
  }
4809
- static fromURL(url, opts) {
4810
- const match = InstagramSavedExtractor.pattern.exec(url);
4811
- if (!match) return null;
4812
- return new InstagramSavedExtractor({
4813
- ...opts,
4814
- url,
4815
- match
4816
- });
4672
+ _wrap(text, maxLen) {
4673
+ if (text.length <= maxLen) return [text];
4674
+ const lines = [];
4675
+ let remaining = text;
4676
+ while (remaining.length > maxLen) {
4677
+ let cut = maxLen;
4678
+ while (cut > 0 && remaining[cut] !== " ") cut--;
4679
+ if (cut === 0) cut = maxLen;
4680
+ lines.push(remaining.slice(0, cut).trimEnd());
4681
+ remaining = remaining.slice(cut).trimStart();
4682
+ }
4683
+ if (remaining) lines.push(remaining);
4684
+ return lines;
4817
4685
  }
4818
- async *posts() {
4819
- yield* this.api.userSaved();
4686
+ _report() {
4687
+ this._flushPost();
4688
+ process.stdout.write(`\n${dim("──")} ${b("Summary")} ${dim("───")}\n`);
4689
+ process.stdout.write(` Posts: ${g(String(this._postCount))}\n`);
4690
+ process.stdout.write(` Files: ${g(String(this._fileCount))}\n`);
4691
+ process.stdout.write(`\n`);
4820
4692
  }
4821
4693
  };
4822
4694
  //#endregion
4695
+ //#region src/fetcher.ts
4696
+ var fetcher_exports = /* @__PURE__ */ __exportAll({
4697
+ buildUrl: () => buildUrl,
4698
+ createFetchHttpClient: () => createFetchHttpClient,
4699
+ extractCsrf: () => extractCsrf,
4700
+ headersToRecord: () => headersToRecord,
4701
+ mergeCookie: () => mergeCookie,
4702
+ readBody: () => readBody,
4703
+ serializeBody: () => serializeBody
4704
+ });
4705
+ /** Build URL with query params appended as URLSearchParams. */
4706
+ function buildUrl(base, params) {
4707
+ if (!params) return base;
4708
+ const cleaned = {};
4709
+ for (const [k, v] of Object.entries(params)) if (v != null) cleaned[k] = String(v);
4710
+ const entries = Object.entries(cleaned);
4711
+ if (entries.length === 0) return base;
4712
+ const qs = new URLSearchParams(entries).toString();
4713
+ return `${base}${base.includes("?") ? "&" : "?"}${qs}`;
4714
+ }
4715
+ /** Merge cookie strings with append semantics: a=1 + b=2 → a=1; b=2 */
4716
+ function mergeCookie(base, extra) {
4717
+ if (!base) return extra;
4718
+ return `${base}; ${extra}`;
4719
+ }
4720
+ /** Extract csrftoken value from a Cookie header string. */
4721
+ function extractCsrf(cookies) {
4722
+ return cookies.match(/(?:^|;\s*)csrftoken=([^;]+)/)?.[1] ?? "";
4723
+ }
4724
+ /** Convert fetch Headers to a plain Record. */
4725
+ function headersToRecord(headers) {
4726
+ const rec = {};
4727
+ headers.forEach((v, k) => {
4728
+ rec[k] = v;
4729
+ });
4730
+ return rec;
4731
+ }
4732
+ /** Read response body according to the requested type. */
4733
+ async function readBody(resp, responseType) {
4734
+ switch (responseType) {
4735
+ case "arraybuffer": {
4736
+ const buf = await resp.arrayBuffer();
4737
+ return Buffer.from(buf);
4738
+ }
4739
+ case "text": return resp.text();
4740
+ default: return resp.json();
4741
+ }
4742
+ }
4743
+ /** Serialize a request body value for fetch. */
4744
+ function serializeBody(data) {
4745
+ if (data == null) return void 0;
4746
+ if (typeof data === "string") return data;
4747
+ if (data instanceof URLSearchParams) return data;
4748
+ return JSON.stringify(data);
4749
+ }
4750
+ const UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
4751
+ /**
4752
+ * Create a platform-agnostic HttpClient backed by native ``fetch``.
4753
+ *
4754
+ * Zero dependencies — works in Node.js 18+, browsers, Deno, and Edge.
4755
+ *
4756
+ * @example Plain (no cookies)
4757
+ * ```ts
4758
+ * const http = createFetchHttpClient()
4759
+ * ```
4760
+ *
4761
+ * @example With static cookies (CLI session mode)
4762
+ * ```ts
4763
+ * const http = createFetchHttpClient({ cookie: 'sessionid=abc; csrftoken=xyz' })
4764
+ * ```
4765
+ *
4766
+ * @example With cookie jar (anonymous session)
4767
+ * ```ts
4768
+ * const jar = createCookieJar()
4769
+ * const http = createFetchHttpClient({
4770
+ * cookieProvider: () => jar.getCookieHeader(),
4771
+ * onResponse: (headers) => jar.setFromResponse(headers),
4772
+ * })
4773
+ * ```
4774
+ */
4775
+ function createFetchHttpClient(opts = {}) {
4776
+ const { cookie, cookieProvider, userAgent = UA, timeout = 3e4, onResponse } = opts;
4777
+ return { async request(config) {
4778
+ const method = config.method ?? "GET";
4779
+ const url = buildUrl(config.url, config.params);
4780
+ const headers = new Headers(config.headers);
4781
+ const reqCookie = cookieProvider?.() ?? cookie;
4782
+ if (reqCookie) {
4783
+ const existing = headers.get("Cookie");
4784
+ headers.set("Cookie", existing ? mergeCookie(reqCookie, existing) : reqCookie);
4785
+ }
4786
+ if (!headers.has("User-Agent")) headers.set("User-Agent", userAgent);
4787
+ const body = serializeBody(config.data);
4788
+ if (typeof body === "string" && !headers.has("Content-Type")) headers.set("Content-Type", "application/json");
4789
+ let controller = null;
4790
+ let timer = null;
4791
+ let signal = config.signal ?? null;
4792
+ const timeoutMs = config.timeout ?? timeout;
4793
+ if (!signal) {
4794
+ controller = new AbortController();
4795
+ timer = setTimeout(() => controller.abort(), timeoutMs);
4796
+ signal = controller.signal;
4797
+ }
4798
+ try {
4799
+ const resp = await fetch(url, {
4800
+ method,
4801
+ headers,
4802
+ body,
4803
+ signal
4804
+ });
4805
+ onResponse?.(headersToRecord(resp.headers));
4806
+ const data = await readBody(resp, config.responseType);
4807
+ return {
4808
+ status: resp.status,
4809
+ data,
4810
+ headers: headersToRecord(resp.headers),
4811
+ url: resp.url
4812
+ };
4813
+ } catch (err) {
4814
+ if (controller?.signal.aborted && !config.signal?.aborted) throw new Error(`Request timeout after ${timeoutMs}ms: ${url}`);
4815
+ if (String(err).includes("too many redirect")) throw new Error("Too many redirects — session may be expired or invalid. Export a fresh session from your browser.");
4816
+ throw err;
4817
+ } finally {
4818
+ if (timer) clearTimeout(timer);
4819
+ }
4820
+ } };
4821
+ }
4822
+ //#endregion
4823
4823
  //#region src/cli/cookies.ts
4824
4824
  function createCookieJar() {
4825
4825
  const cookies = /* @__PURE__ */ new Map();
@@ -4849,125 +4849,43 @@ function createCookieJar() {
4849
4849
  }
4850
4850
  //#endregion
4851
4851
  //#region src/cli/adapter.ts
4852
- /** NodeHttpClient — axios wrapper */
4853
4852
  /**
4854
- * Extract csrftoken value from a Cookie header string.
4853
+ * Create an HttpClient with a static cookie string (sessionid cookie).
4854
+ *
4855
+ * Used by the CLI when ``--cookies`` or ``--sessionid`` is provided.
4855
4856
  */
4856
- function extractCsrfFromCookies(cookies) {
4857
- return cookies.match(/(?:^|;\s*)csrftoken=([^;]+)/)?.[1] ?? "";
4858
- }
4859
- function createHttpClient(sessionId, fullCookies, logger) {
4860
- const instance = axios.create({
4861
- timeout: 3e4,
4862
- maxRedirects: 20,
4863
- validateStatus: () => true,
4864
- headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" }
4865
- });
4866
- const baseCookie = fullCookies || (sessionId ? `sessionid=${sessionId}` : null);
4867
- return { async request(config) {
4868
- const method = config.method ?? "GET";
4869
- logger?.debug(`${method} ${config.url}`);
4870
- const mergedHeaders = {};
4871
- if (baseCookie) mergedHeaders.Cookie = baseCookie;
4872
- if (config.headers) for (const [k, v] of Object.entries(config.headers)) if (k.toLowerCase() === "cookie" && mergedHeaders.Cookie) mergedHeaders.Cookie = `${mergedHeaders.Cookie}; ${v}`;
4873
- else mergedHeaders[k] = v;
4874
- if (mergedHeaders.Cookie) logger?.debug(` Cookie: ${mergedHeaders.Cookie.slice(0, 200)}`);
4875
- try {
4876
- const resp = await instance.request({
4877
- url: config.url,
4878
- method,
4879
- headers: mergedHeaders,
4880
- params: cleanupParams(config.params),
4881
- data: config.data,
4882
- signal: config.signal,
4883
- timeout: config.timeout,
4884
- responseType: config.responseType ?? "json"
4885
- });
4886
- const finalUrl = resp.request?.res?.responseUrl ?? config.url;
4887
- logger?.debug(` ← ${resp.status} ${resp.status >= 400 ? "⚠️" : ""} (${finalUrl.slice(0, 100)})`);
4888
- return {
4889
- status: resp.status,
4890
- data: resp.data,
4891
- headers: resp.headers,
4892
- url: finalUrl
4893
- };
4894
- } catch (err) {
4895
- const msg = String(err);
4896
- if (msg.includes("TOO_MANY_REDIRECTS") || msg.includes("too many redirects")) throw new Error("Too many redirects — sessionid may be expired or invalid. Export a fresh sessionid from your browser.");
4897
- throw err;
4898
- }
4899
- } };
4857
+ function createHttpClient(sessionId, fullCookies, _logger) {
4858
+ return createFetchHttpClient({ cookie: (fullCookies || (sessionId ? `sessionid=${sessionId}` : null)) ?? void 0 });
4900
4859
  }
4901
- /** WebClient — anonymous cookie-jar HTTP client */
4902
4860
  /**
4903
- * Create an HTTP client with an in-memory cookie jar.
4861
+ * Create an HTTP client backed by an in-memory cookie jar.
4904
4862
  *
4905
- * Use this when you don't have a sessionid — the client first seeds its
4906
- * cookie jar by visiting ``instagram.com``, then uses those anonymous
4907
- * cookies for subsequent API calls. This is how incognito browsing works.
4863
+ * Seeds cookies by visiting instagram.com first, then uses those
4864
+ * anonymous cookies for subsequent API calls (like incognito browsing).
4908
4865
  *
4909
4866
  * Returns the client + the initial CSRF token extracted from cookies.
4910
4867
  */
4911
4868
  async function createWebClient(logger) {
4912
4869
  const jar = createCookieJar();
4913
4870
  logger?.info("Seeding anonymous session (visiting instagram.com)…");
4914
- const seedResp = await axios.get("https://www.instagram.com/", {
4915
- headers: {
4916
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
4917
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
4918
- },
4919
- maxRedirects: 20,
4920
- validateStatus: () => true
4921
- });
4922
- jar.setFromResponse(seedResp.headers);
4923
- logger?.debug(` ← ${seedResp.status} — got ${jar.getCookieHeader().split(";").length} cookies`);
4871
+ const seedResp = await fetch("https://www.instagram.com/", { headers: {
4872
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
4873
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
4874
+ } });
4875
+ const { headersToRecord } = await Promise.resolve().then(() => fetcher_exports);
4876
+ jar.setFromResponse(headersToRecord(seedResp.headers));
4877
+ const cookieCount = jar.getCookieHeader().split(";").length;
4878
+ logger?.debug(` ← ${seedResp.status} — got ${cookieCount} cookies`);
4879
+ const csrfToken = extractCsrf(jar.getCookieHeader());
4924
4880
  return {
4925
- http: { async request(config) {
4926
- const method = config.method ?? "GET";
4927
- logger?.debug(`${method} ${config.url}`);
4928
- const jarCookie = jar.getCookieHeader();
4929
- const mergedHeaders = {};
4930
- if (jarCookie) mergedHeaders.Cookie = jarCookie;
4931
- if (config.headers) for (const [k, v] of Object.entries(config.headers)) if (k.toLowerCase() === "cookie" && mergedHeaders.Cookie) mergedHeaders.Cookie = `${mergedHeaders.Cookie}; ${v}`;
4932
- else mergedHeaders[k] = v;
4933
- try {
4934
- const resp = await axios.request({
4935
- url: config.url,
4936
- method,
4937
- headers: mergedHeaders,
4938
- params: cleanupParams(config.params),
4939
- data: config.data,
4940
- signal: config.signal,
4941
- timeout: config.timeout ?? 3e4,
4942
- maxRedirects: 20,
4943
- validateStatus: () => true,
4944
- responseType: config.responseType ?? "json"
4945
- });
4946
- jar.setFromResponse(resp.headers);
4947
- const finalUrl = resp.request?.res?.responseUrl ?? config.url;
4948
- logger?.debug(` ← ${resp.status} ${resp.status >= 400 ? "⚠️" : ""} (${finalUrl.slice(0, 100)})`);
4949
- return {
4950
- status: resp.status,
4951
- data: resp.data,
4952
- headers: resp.headers,
4953
- url: finalUrl
4954
- };
4955
- } catch (err) {
4956
- const msg = String(err);
4957
- if (msg.includes("TOO_MANY_REDIRECTS") || msg.includes("too many redirects")) throw new Error("Too many redirects — Instagram may be blocking the request. Try again later or use --sessionid.");
4958
- throw err;
4959
- }
4960
- } },
4961
- csrfToken: jar.getCookieHeader().match(/(?:^|;\s*)csrftoken=([^;]+)/)?.[1] ?? ""
4881
+ http: createFetchHttpClient({
4882
+ cookieProvider: () => jar.getCookieHeader(),
4883
+ onResponse: (headers) => jar.setFromResponse(headers)
4884
+ }),
4885
+ csrfToken
4962
4886
  };
4963
4887
  }
4964
- function cleanupParams(params) {
4965
- if (!params) return void 0;
4966
- const cleaned = {};
4967
- for (const [k, v] of Object.entries(params)) if (v != null) cleaned[k] = String(v);
4968
- return cleaned;
4969
- }
4970
- /** NodeLogger — console wrapper */
4888
+ /** Node.js console-based logger. */
4971
4889
  function createLogger(verbose) {
4972
4890
  return {
4973
4891
  debug(message, ...args) {
@@ -5006,39 +4924,7 @@ function createStorage() {
5006
4924
  };
5007
4925
  }
5008
4926
  //#endregion
5009
- //#region src/cli/index.ts
5010
- /**
5011
- * gdl-instagram — CLI entry point.
5012
- *
5013
- * Usage:
5014
- * gdl-instagram <url> [options] ← auto-detect from URL
5015
- * gdl-instagram tag <hashtag> [options]
5016
- * gdl-instagram saved [options]
5017
- *
5018
- * Every option is self-documented via ``--help``.
5019
- */
5020
- function addSharedOptions(cmd) {
5021
- return cmd.option("--sessionid <cookie>", "Instagram sessionid cookie value (from browser)", process.env.INSTAGRAM_SESSIONID).option("--cookies <string>", "Full Cookie header string from browser (DevTools → Network → Request Headers → Cookie)", process.env.INSTAGRAM_COOKIES).option("-o, --output <dir>", "Output directory", "./data").option("--videos <mode>", "Download videos: true, false, or merged (yt-dlp)", "true").option("--previews <types>", "Download only previews: video,audio (comma-separated)").option("--audio", "Download standalone audio tracks", false).option("--max-posts <n>", "Maximum number of posts to download", Number.parseInt).option("--cursor <cursor>", "Resume from pagination cursor (see output of previous run)").option("--order-posts <order>", "Post ordering: asc, desc, id, id_asc, id_desc").option("--order-files <order>", "File ordering: r, d (reverse), or empty for natural order").option("--static-videos", "Download static video cover images instead of actual videos", false).option("--no-static-videos", "Do not force static video covers (download real videos)").option("--api <backend>", "API backend: rest (default) or graphql", "rest").option("-v, --verbose", "Verbose debug output", false).option("--include <list>", "For user: comma-separated sub-extractors (posts,reels,tagged,stories,highlights,info,avatar)", "posts").option("--split", "For stories: split each frame into a separate post", false).option("-i, --info", "Print structured post info to terminal (no download)", false);
5022
- }
5023
- /** Build config from parsed options */
5024
- function buildConfig(opts) {
5025
- const config = new ConfigManager();
5026
- const ig = {};
5027
- if (opts.videos) ig.videos = opts.videos;
5028
- if (opts.previews) ig.previews = opts.previews.split(",");
5029
- if (opts.audio) ig.audio = true;
5030
- if (opts.maxPosts) ig["max-posts"] = opts.maxPosts;
5031
- if (opts.cursor) ig.cursor = opts.cursor;
5032
- if (opts.orderPosts) ig["order-posts"] = opts.orderPosts;
5033
- if (opts.orderFiles) ig["order-files"] = opts.orderFiles;
5034
- if (opts.staticVideos) ig["static-videos"] = true;
5035
- if (opts.api) ig.api = opts.api;
5036
- if (opts.include) ig.include = opts.include;
5037
- if (opts.split) ig.split = true;
5038
- if (Object.keys(ig).length > 0) config.set("extractor.instagram", ig);
5039
- return config;
5040
- }
5041
- /** Auto-detect the right extractor for a URL */
4927
+ //#region src/cli/runner.ts
5042
4928
  function resolveExtractor(url) {
5043
4929
  for (const Cls of [
5044
4930
  InstagramPostExtractor,
@@ -5050,7 +4936,6 @@ function resolveExtractor(url) {
5050
4936
  ]) if (Cls.pattern.test(url)) return Cls;
5051
4937
  throw new Error(`No extractor matched URL: ${url}. Supported: /p/, /reel/, /{user}/, /stories/, /highlights/, /explore/tags/, /saved/`);
5052
4938
  }
5053
- /** Run an extractor */
5054
4939
  async function runExtractor(url, extrClass, opts) {
5055
4940
  const config = buildConfig(opts);
5056
4941
  const log = createLogger(opts.verbose ?? false);
@@ -5058,7 +4943,7 @@ async function runExtractor(url, extrClass, opts) {
5058
4943
  let webCsrf;
5059
4944
  if (opts.cookies) {
5060
4945
  http = createHttpClient(void 0, opts.cookies, log);
5061
- webCsrf = extractCsrfFromCookies(opts.cookies);
4946
+ webCsrf = extractCsrf(opts.cookies);
5062
4947
  } else if (opts.sessionid) http = createHttpClient(opts.sessionid, void 0, log);
5063
4948
  else {
5064
4949
  const wc = await createWebClient(log);
@@ -5107,9 +4992,10 @@ async function runExtractor(url, extrClass, opts) {
5107
4992
  process.exit(1);
5108
4993
  }
5109
4994
  }
5110
- /** Program */
4995
+ //#endregion
4996
+ //#region src/cli/index.ts
5111
4997
  const program = new Command();
5112
- program.name("gdl-instagram").description("Download images and videos from Instagram.\n\nUses gallery-dl's extraction pipeline — supports posts, reels,\nstories, highlights, tagged posts, saved collections, and more.\n\nRequires a sessionid cookie exported from your browser.\nSet via --sessionid or INSTAGRAM_SESSIONID environment variable.").version("0.1.0");
4998
+ program.name("gdl-instagram").description("Download images and videos from Instagram.\n\nUses gallery-dl's extraction pipeline — supports posts, reels,\nstories, highlights, tagged posts, saved collections, and more.\n\nRequires a sessionid cookie exported from your browser.\nSet via --sessionid or INSTAGRAM_SESSIONID environment variable.").version(version);
5113
4999
  addSharedOptions(program.command("dl", { isDefault: true }).argument("[url]", "Instagram URL to download (auto-detects type)").description("Download media from an Instagram URL (auto-detects post/user/stories/…)\n\nExamples:\n gdl-instagram https://www.instagram.com/p/CxAbCdEfGh/\n gdl-instagram https://www.instagram.com/username/ --include=posts,reels\n gdl-instagram https://www.instagram.com/stories/username/").action(async (url, opts) => {
5114
5000
  if (!url) {
5115
5001
  program.help();
@@ -5123,7 +5009,6 @@ addSharedOptions(program.command("tag <hashtag>").description("Download posts fr
5123
5009
  addSharedOptions(program.command("saved").description("Download your saved (bookmarked) posts\n\nRequires authentication via --sessionid.\n\nExamples:\n gdl-instagram saved --sessionid=abc123").action(async (opts) => {
5124
5010
  await runExtractor("https://www.instagram.com/me/saved/", InstagramSavedExtractor, opts);
5125
5011
  }));
5126
- /** parse */
5127
5012
  program.parse();
5128
5013
  //#endregion
5129
5014
  export {};