@chilfish/gallery-dl-instagram 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/dl-ins.mjs CHANGED
@@ -5,8 +5,19 @@ import path, { dirname } from "node:path";
5
5
  import fs from "node:fs";
6
6
  import process$1 from "node:process";
7
7
  import { stripVTControlCharacters } from "node:util";
8
- import axios from "axios";
9
8
  import { access, mkdir, writeFile } from "node:fs/promises";
9
+ //#region \0rolldown/runtime.js
10
+ var __defProp = Object.defineProperty;
11
+ var __exportAll = (all, no_symbols) => {
12
+ let target = {};
13
+ for (var name in all) __defProp(target, name, {
14
+ get: all[name],
15
+ enumerable: true
16
+ });
17
+ if (!no_symbols) __defProp(target, Symbol.toStringTag, { value: "Module" });
18
+ return target;
19
+ };
20
+ //#endregion
10
21
  //#region node_modules/commander/lib/error.js
11
22
  /**
12
23
  * CommanderError class
@@ -2956,422 +2967,156 @@ function useColor() {
2956
2967
  }
2957
2968
  new Command();
2958
2969
  //#endregion
2959
- //#region src/config.ts
2960
- var ConfigManager = class {
2961
- data;
2962
- constructor(data = {}) {
2963
- this.data = data;
2970
+ //#region package.json
2971
+ var version = "0.2.3";
2972
+ //#endregion
2973
+ //#region src/utils/id-codec.ts
2974
+ /**
2975
+ * Instagram-style Base64-variant ID ↔ shortcode conversion.
2976
+ */
2977
+ const ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
2978
+ /** Pre-built index for O(1) character lookup during decode. */
2979
+ const CHAR_INDEX = {};
2980
+ for (let i = 0; i < 64; i++) CHAR_INDEX[ALPHABET[i]] = i;
2981
+ const BASE = BigInt(64);
2982
+ /**
2983
+ * Decode an Instagram shortcode into its numeric post ID.
2984
+ */
2985
+ function idFromShortcode(shortcode) {
2986
+ let num = 0n;
2987
+ for (const ch of shortcode) num = num * BASE + BigInt(CHAR_INDEX[ch] ?? 0);
2988
+ return num.toString();
2989
+ }
2990
+ /**
2991
+ * Encode a numeric post ID into an Instagram shortcode.
2992
+ */
2993
+ function shortcodeFromId(postId) {
2994
+ let num = BigInt(postId);
2995
+ const chars = [];
2996
+ while (num > 0n) {
2997
+ const remainder = Number(num % BASE);
2998
+ chars.push(ALPHABET[remainder]);
2999
+ num = num / BASE;
3000
+ }
3001
+ return chars.reverse().join("");
3002
+ }
3003
+ //#endregion
3004
+ //#region src/core/extractor.ts
3005
+ var Extractor = class {
3006
+ /** Regex pattern to match against URLs */
3007
+ static pattern = /^$/;
3008
+ /** The input URL */
3009
+ url;
3010
+ /** Regex match groups from ``fromURL`` */
3011
+ groups;
3012
+ config;
3013
+ /** HTTP client — public so Job can access for downloads */
3014
+ http;
3015
+ /** Storage backend — public so Job can access for writes */
3016
+ storage;
3017
+ /** Logger instance — public so Job can access for reporting */
3018
+ log;
3019
+ /** Delay range in seconds — random between [min, max] before each request */
3020
+ requestInterval = [6, 12];
3021
+ _initialized = false;
3022
+ constructor(opts) {
3023
+ this.url = opts.url;
3024
+ this.groups = opts.match ? [...opts.match].slice(1) : [];
3025
+ this.config = opts.config;
3026
+ this.http = opts.http;
3027
+ this.storage = opts.storage;
3028
+ this.log = opts.log;
2964
3029
  }
3030
+ /** Initialization */
2965
3031
  /**
2966
- * Read a value at a dot-path like ``'extractor.instagram.videos'``.
2967
- * Returns ``undefined`` when the path doesn't exist.
3032
+ * One-time async setup (cookies, session, internal state).
3033
+ * Safe to call multiple times — after the first call it becomes a no-op.
2968
3034
  */
2969
- get(path, defaultValue) {
2970
- const keys = path.split(".");
2971
- let node = this.data;
2972
- for (const key of keys) {
2973
- if (node == null || typeof node !== "object" || Array.isArray(node)) return defaultValue;
2974
- node = node[key];
2975
- }
2976
- if (node === void 0) return defaultValue;
2977
- return node;
3035
+ async initialize() {
3036
+ if (this._initialized) return;
3037
+ await this._init();
3038
+ this._initialized = true;
3039
+ this.initialize = async () => {};
2978
3040
  }
2979
3041
  /**
2980
- * Interpolate a config key through a hierarchy of paths.
3042
+ * Subclass hook for one-time setup.
2981
3043
  */
2982
- interpolate(cfgPath, key, defaultVal) {
2983
- let node = this.data;
2984
- for (let i = 0; i < cfgPath.length; i++) {
2985
- if (node != null && typeof node === "object" && !Array.isArray(node)) {
2986
- const v = node[key];
2987
- if (v !== void 0) return v;
2988
- }
2989
- if (node == null || typeof node !== "object" || Array.isArray(node)) break;
2990
- node = node[cfgPath[i]];
2991
- }
2992
- return defaultVal;
3044
+ async _init() {}
3045
+ /** Async iteration */
3046
+ async *[Symbol.asyncIterator]() {
3047
+ await this.initialize();
3048
+ yield* this.items();
2993
3049
  }
3050
+ /** Config helpers */
2994
3051
  /**
2995
- * Mutate the config at a given dot-path.
3052
+ * Read a config value using the interpolated hierarchy.
2996
3053
  */
2997
- set(path, value) {
2998
- const keys = path.split(".");
2999
- let node = this.data;
3000
- for (let i = 0; i < keys.length - 1; i++) {
3001
- const key = keys[i];
3002
- let child = node[key];
3003
- if (child == null || typeof child !== "object" || Array.isArray(child)) {
3004
- child = {};
3005
- node[key] = child;
3006
- }
3007
- node = child;
3008
- }
3009
- node[keys[keys.length - 1]] = value;
3054
+ _cfg(key, defaultVal) {
3055
+ const path = [
3056
+ "extractor",
3057
+ this.category,
3058
+ this.subcategory
3059
+ ];
3060
+ return this.config.interpolate(path, key, defaultVal);
3010
3061
  }
3011
- };
3012
- //#endregion
3013
- //#region src/core/job.ts
3014
- function formatBytes(bytes) {
3015
- if (bytes === 0) return "0 B";
3016
- const units = [
3017
- "B",
3018
- "KB",
3019
- "MB",
3020
- "GB"
3021
- ];
3022
- const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
3023
- return `${(bytes / 1024 ** i).toFixed(i === 0 ? 0 : 1)} ${units[i]}`;
3024
- }
3025
- const BOLD = "\x1B[1m";
3026
- const DIM = "\x1B[2m";
3027
- const CYAN = "\x1B[36m";
3028
- const GREEN = "\x1B[32m";
3029
- const YELLOW = "\x1B[33m";
3030
- const RESET = "\x1B[0m";
3031
- function b(s) {
3032
- return `${BOLD}${s}${RESET}`;
3033
- }
3034
- function dim(s) {
3035
- return `${DIM}${s}${RESET}`;
3036
- }
3037
- function c(s) {
3038
- return `${CYAN}${s}${RESET}`;
3039
- }
3040
- function g(s) {
3041
- return `${GREEN}${s}${RESET}`;
3042
- }
3043
- function pad(s, n) {
3044
- return s.length >= n ? s : s + " ".repeat(n - s.length);
3045
- }
3046
- var Job = class {
3047
- extractor;
3048
- status = 0;
3049
- constructor(extractor) {
3050
- this.extractor = extractor;
3062
+ /** HTTP */
3063
+ _lastRequestTime = 0;
3064
+ /**
3065
+ * Rate-limited HTTP request wrapper.
3066
+ */
3067
+ async request(url, cfg = {}) {
3068
+ await this._throttle();
3069
+ const response = await this.http.request({
3070
+ url,
3071
+ ...cfg
3072
+ });
3073
+ this._lastRequestTime = Date.now();
3074
+ return response;
3051
3075
  }
3052
3076
  /**
3053
- * Main entry point. Calls ``extractor[Symbol.asyncIterator]()`` and
3054
- * dispatches every yielded message.
3077
+ * Convenience: request + parse JSON body.
3055
3078
  */
3056
- async run() {
3057
- this.extractor.log.info(`Starting ${this.extractor.category}/${this.extractor.subcategory} ${this.extractor.url}`);
3058
- await this.extractor.initialize();
3059
- for await (const msg of this.extractor) switch (msg.type) {
3060
- case "directory":
3061
- await this.handleDirectory(msg);
3062
- break;
3063
- case "url":
3064
- await this.handleUrl(msg);
3065
- break;
3066
- case "queue":
3067
- await this.handleQueue(msg);
3068
- break;
3079
+ async requestJSON(url, cfg = {}) {
3080
+ const resp = await this.request(url, cfg);
3081
+ if (typeof resp.data === "object") return resp.data;
3082
+ try {
3083
+ return JSON.parse(resp.data);
3084
+ } catch {
3085
+ return {};
3069
3086
  }
3070
- this._report();
3071
- return this.status;
3072
3087
  }
3073
- /** Override in subclasses to print a summary. */
3074
- _report() {}
3075
- };
3076
- var DownloadJob = class DownloadJob extends Job {
3077
- /** Base output directory (prepended to all paths). */
3078
- basePath = "";
3079
- /** Current target directory metadata (set by directory messages). */
3080
- _currentDir = {};
3081
- /** In-memory archive keyed by archive format. */
3082
- archive = /* @__PURE__ */ new Map();
3088
+ /** Rate limiting */
3083
3089
  /**
3084
- * Registry of per-category "archive formats" the key is formed
3085
- * by interpolating this format string over the metadata.
3090
+ * Sleep long enough to keep the minimum interval between requests.
3086
3091
  */
3087
- _archiveFmts = /* @__PURE__ */ new Map();
3088
- _postCount = 0;
3089
- _fileCount = 0;
3090
- _downloadedBytes = 0;
3091
- _skippedCount = 0;
3092
- registerArchive(category, format) {
3093
- this._archiveFmts.set(category, format);
3092
+ async _throttle() {
3093
+ const elapsed = Date.now() - this._lastRequestTime;
3094
+ const [min, max] = this.requestInterval;
3095
+ const target = min + Math.random() * (max - min);
3096
+ const waitMs = Math.max(0, target * 1e3 - elapsed);
3097
+ if (waitMs > 0) await new Promise((r) => setTimeout(r, waitMs));
3094
3098
  }
3095
- /** Simple format-string interpolation for archive keys. */
3096
- _interp(fmt, meta) {
3097
- return fmt.replace(/\{(\w+)\}/g, (_, key) => {
3098
- const v = meta[key];
3099
- return v == null ? "" : String(v);
3100
- });
3099
+ /** Utility */
3100
+ /**
3101
+ * Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
3102
+ */
3103
+ parseTimestamp(ts) {
3104
+ if (ts == null) return "";
3105
+ const asMs = ts > 25e8 ? ts : ts * 1e3;
3106
+ return new Date(asMs).toISOString();
3101
3107
  }
3102
- /** Check whether this URL has already been downloaded (and skip). */
3103
- _isArchived(meta) {
3104
- const cat = meta.category ?? this.extractor.category;
3105
- const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
3106
- const key = this._interp(fmt, meta);
3107
- const set = this.archive.get(cat);
3108
- if (set && set.has(key)) return true;
3109
- return false;
3110
- }
3111
- /** Mark a post/media as archived. */
3112
- _archive(meta) {
3113
- const cat = meta.category ?? this.extractor.category;
3114
- const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
3115
- const key = this._interp(fmt, meta);
3116
- let set = this.archive.get(cat);
3117
- if (!set) {
3118
- set = /* @__PURE__ */ new Set();
3119
- this.archive.set(cat, set);
3120
- }
3121
- set.add(key);
3122
- }
3123
- /** Handlers */
3124
- async handleDirectory(msg) {
3125
- this._currentDir = { ...msg.metadata };
3126
- this._postCount++;
3127
- const dirPath = this.basePath ? `${this.basePath}/${this._buildDirPath(msg.metadata)}` : this._buildDirPath(msg.metadata);
3128
- await this.extractor.storage.mkdir(dirPath);
3129
- this.extractor.log.info(`#${this._postCount} ${msg.metadata.username ?? "?"}/${msg.metadata.post_shortcode ?? "?"} → ${dirPath}/`);
3130
- }
3131
- async handleUrl(msg) {
3132
- const meta = {
3133
- ...this._currentDir,
3134
- ...msg.metadata
3135
- };
3136
- if (this._isArchived(meta)) {
3137
- this._skippedCount++;
3138
- return;
3139
- }
3140
- const filename = this._buildFilename(meta);
3141
- const fullPath = `${this.basePath ? `${this.basePath}/${this._buildDirPath(meta)}` : this._buildDirPath(meta)}/${filename}`;
3142
- try {
3143
- const resp = await this.extractor.http.request({
3144
- url: msg.url,
3145
- method: "GET",
3146
- responseType: "arraybuffer"
3147
- });
3148
- let data;
3149
- if (resp.data instanceof Uint8Array) data = resp.data;
3150
- else if (resp.data instanceof ArrayBuffer) data = new Uint8Array(resp.data);
3151
- else if (typeof resp.data === "string") data = resp.data;
3152
- else if (typeof resp.data === "object" && resp.data != null && "type" in resp.data && resp.data.type === "Buffer") data = new Uint8Array(resp.data);
3153
- else data = JSON.stringify(resp.data);
3154
- await this.extractor.storage.write(fullPath, data);
3155
- this._fileCount++;
3156
- const size = data instanceof Uint8Array ? data.byteLength : data.length;
3157
- this._downloadedBytes += size;
3158
- this.extractor.log.info(` └─ ${filename} (${formatBytes(size)})`);
3159
- this._archive(meta);
3160
- } catch (err) {
3161
- this.extractor.log.error(`Failed to download ${filename}: ${String(err)}`);
3162
- this.status |= 4;
3163
- }
3164
- }
3165
- async handleQueue(msg) {
3166
- const meta = {
3167
- ...this._currentDir,
3168
- ...msg.metadata
3169
- };
3170
- const extrClass = meta._extractor;
3171
- if (!extrClass || typeof extrClass !== "object") return;
3172
- const cls = extrClass;
3173
- const match = cls.pattern.exec(msg.url);
3174
- if (!match) return;
3175
- const parentExtr = this.extractor;
3176
- const childJob = new DownloadJob(Reflect.construct(cls, [{
3177
- url: msg.url,
3178
- match,
3179
- config: parentExtr.config,
3180
- http: parentExtr.http,
3181
- storage: parentExtr.storage,
3182
- log: parentExtr.log
3183
- }]));
3184
- childJob.basePath = this.basePath;
3185
- childJob._currentDir = meta;
3186
- for (const [cat, set] of this.archive) childJob.archive.set(cat, new Set(set));
3187
- for (const [cat, fmt] of this._archiveFmts) childJob._archiveFmts.set(cat, fmt);
3188
- const childStatus = await childJob.run();
3189
- this.status |= childStatus;
3190
- for (const [cat, set] of childJob.archive) {
3191
- const mine = this.archive.get(cat);
3192
- if (mine) for (const k of set) mine.add(k);
3193
- else this.archive.set(cat, set);
3194
- }
3195
- }
3196
- /** Report */
3197
- _report() {
3198
- const log = this.extractor.log;
3199
- log.info(`Done — ${this._postCount} post(s), ${this._fileCount} file(s) downloaded (${formatBytes(this._downloadedBytes)})`);
3200
- if (this._skippedCount > 0) log.info(` ${this._skippedCount} file(s) skipped (already archived)`);
3201
- }
3202
- /** Path builders */
3203
- _buildDirPath(meta) {
3204
- return `${meta.category ?? this.extractor.category}/${meta.username ?? "_"}`;
3205
- }
3206
- _buildFilename(meta) {
3207
- const mid = meta.media_id ?? "0";
3208
- const ext = meta.extension ?? "jpg";
3209
- return `${mid}${meta.num ? `_${meta.num}` : ""}.${ext}`;
3210
- }
3211
- };
3212
- var PrintJob = class PrintJob extends Job {
3213
- _currentDir = {};
3214
- _files = [];
3215
- _postCount = 0;
3216
- _fileCount = 0;
3217
- _width;
3218
- constructor(extractor) {
3219
- super(extractor);
3220
- this._width = Math.min(process.stdout.columns ?? 80, 100);
3221
- }
3222
- async handleDirectory(msg) {
3223
- if (this._postCount > 0) this._flushPost();
3224
- this._currentDir = { ...msg.metadata };
3225
- this._postCount++;
3226
- this._files = [];
3227
- }
3228
- async handleUrl(msg) {
3229
- const meta = {
3230
- ...this._currentDir,
3231
- ...msg.metadata
3232
- };
3233
- this._fileCount++;
3234
- const ext = meta.extension ?? "jpg";
3235
- const mid = meta.media_id ?? "?";
3236
- this._files.push({
3237
- num: meta.num ?? this._files.length + 1,
3238
- filename: `${mid}.${ext}`,
3239
- width: meta.width ?? 0,
3240
- height: meta.height ?? 0,
3241
- videoUrl: meta.video_url ?? null,
3242
- audioUrl: meta.audio_url ?? null
3243
- });
3244
- }
3245
- async handleQueue(msg) {
3246
- if (this._files.length > 0 || this._postCount > 0) this._flushPost();
3247
- this._postCount = 0;
3248
- this._files = [];
3249
- const extrClass = {
3250
- ...this._currentDir,
3251
- ...msg.metadata
3252
- }._extractor;
3253
- if (!extrClass || typeof extrClass !== "object") return;
3254
- const cls = extrClass;
3255
- const match = cls.pattern.exec(msg.url);
3256
- if (!match) return;
3257
- const parentExtr = this.extractor;
3258
- const childJob = new PrintJob(Reflect.construct(cls, [{
3259
- url: msg.url,
3260
- match,
3261
- config: parentExtr.config,
3262
- http: parentExtr.http,
3263
- storage: parentExtr.storage,
3264
- log: parentExtr.log
3265
- }]));
3266
- const childStatus = await childJob.run();
3267
- this.status |= childStatus;
3268
- this._postCount += childJob._postCount;
3269
- this._fileCount += childJob._fileCount;
3270
- }
3271
- /** Output */
3272
- _flushPost() {
3273
- const m = this._currentDir;
3274
- if (Object.keys(m).length === 0) return;
3275
- const w = this._width;
3276
- const labelW = 14;
3277
- const shortcode = m.post_shortcode ?? "?";
3278
- const header = ` Post #${this._postCount}: ${shortcode} `;
3279
- const padTotal = w - 2 - header.length;
3280
- const padL = Math.floor(padTotal / 2);
3281
- const padR = padTotal - padL;
3282
- process.stdout.write(`\n${dim("┌")}${"─".repeat(padL)}${b(header)}${"─".repeat(padR)}${dim("┐")}\n`);
3283
- const row = (label, value, color) => {
3284
- const colored = typeof color === "function" ? color(value) : color ? `${color}${value}${RESET}` : value;
3285
- process.stdout.write(` ${dim("│")} ${c(pad(label, labelW))} ${colored}\n`);
3286
- };
3287
- const username = m.username ?? "?";
3288
- const fullname = m.fullname ?? "";
3289
- row("Author:", fullname ? `${username} (${fullname})` : username, g);
3290
- row("Date:", m.date ?? m.post_date ?? "?");
3291
- row("Likes:", `${typeof m.likes === "number" ? m.likes.toLocaleString() : "?"} | Liked: ${m.liked ? "yes" : "no"}`);
3292
- row("Type:", `${m.type ?? "?"} (${this._files.length} files)`);
3293
- row("URL:", m.post_url ?? "?");
3294
- const desc = m.description ?? "";
3295
- if (desc) {
3296
- process.stdout.write(` ${dim("│")}\n`);
3297
- process.stdout.write(` ${dim("│")} ${b("Description:")}\n`);
3298
- const lines = desc.split("\n");
3299
- for (const line of lines) {
3300
- const wrapped = this._wrap(line, w - 8);
3301
- for (const wl of wrapped) process.stdout.write(` ${dim("│")} ${dim(wl)}\n`);
3302
- }
3303
- }
3304
- const tags = m.tags;
3305
- if (tags && tags.length > 0) {
3306
- process.stdout.write(` ${dim("│")}\n`);
3307
- process.stdout.write(` ${dim("│")} ${b("Tags:")} ${dim(tags.map((t) => `#${t}`).join(" "))}\n`);
3308
- }
3309
- const locName = m.location_slug ?? "";
3310
- const locId = m.location_id ?? "";
3311
- if (locName || locId) row("Location:", locId ? `${locName} (ID: ${locId})` : locName);
3312
- const coauthors = m.coauthors;
3313
- if (coauthors && coauthors.length > 0) row("Co-authors:", coauthors.map((c) => c.full_name ? `${c.username} (${c.full_name})` : c.username).join(", "));
3314
- const pinned = m.pinned;
3315
- if (pinned && pinned.length > 0) row("Pinned:", pinned.join(", "));
3316
- const expires = m.expires;
3317
- if (expires) row("Expires:", expires, YELLOW);
3318
- const hlTitle = m.highlight_title;
3319
- if (hlTitle) row("Highlight:", hlTitle);
3320
- const taggedUser = m.tagged_username ?? "";
3321
- if (taggedUser) {
3322
- const taggedFull = m.tagged_full_name ?? "";
3323
- row("Tagged by:", taggedFull ? `${taggedUser} (${taggedFull})` : taggedUser);
3324
- }
3325
- if (this._files.length > 0) {
3326
- process.stdout.write(` ${dim("│")}\n`);
3327
- process.stdout.write(` ${dim("│")} ${b(`Media (${this._files.length} files):`)}\n`);
3328
- const maxNumW = String(this._files.length).length;
3329
- const maxFileW = Math.max(...this._files.map((f) => f.filename.length));
3330
- const dimW = Math.min(maxFileW, 40);
3331
- for (const f of this._files) {
3332
- const numStr = `[${String(f.num).padStart(maxNumW)}]`;
3333
- const dimStr = f.filename.length > 40 ? `${f.filename.slice(0, 37)}...` : pad(f.filename, dimW);
3334
- const res = f.width ? `${f.width}x${f.height}` : "?x?";
3335
- const badges = [];
3336
- if (f.videoUrl) badges.push("video");
3337
- if (f.audioUrl) badges.push("audio");
3338
- let line = ` ${dim("│")} ${g(numStr)} ${dimStr} ${res}`;
3339
- if (badges.length > 0) line += ` ${YELLOW}(${badges.join("+")})${RESET}`;
3340
- process.stdout.write(`${line}\n`);
3341
- }
3342
- }
3343
- process.stdout.write(` ${dim("└")}${"─".repeat(w - 2)}${dim("┘")}\n`);
3344
- }
3345
- _wrap(text, maxLen) {
3346
- if (text.length <= maxLen) return [text];
3347
- const lines = [];
3348
- let remaining = text;
3349
- while (remaining.length > maxLen) {
3350
- let cut = maxLen;
3351
- while (cut > 0 && remaining[cut] !== " ") cut--;
3352
- if (cut === 0) cut = maxLen;
3353
- lines.push(remaining.slice(0, cut).trimEnd());
3354
- remaining = remaining.slice(cut).trimStart();
3355
- }
3356
- if (remaining) lines.push(remaining);
3357
- return lines;
3358
- }
3359
- _report() {
3360
- this._flushPost();
3361
- process.stdout.write(`\n${dim("──")} ${b("Summary")} ${dim("───")}\n`);
3362
- process.stdout.write(` Posts: ${g(String(this._postCount))}\n`);
3363
- process.stdout.write(` Files: ${g(String(this._fileCount))}\n`);
3364
- process.stdout.write(`\n`);
3108
+ /**
3109
+ * Generate a random hex token (used for CSRF).
3110
+ */
3111
+ static generateToken(size = 16) {
3112
+ const bytes = new Uint8Array(size);
3113
+ if (typeof crypto !== "undefined" && crypto.getRandomValues) crypto.getRandomValues(bytes);
3114
+ else for (let i = 0; i < size; i++) bytes[i] = Math.floor(Math.random() * 256);
3115
+ return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
3365
3116
  }
3366
3117
  };
3367
3118
  //#endregion
3368
3119
  //#region src/message.ts
3369
- function directory(metadata = {}) {
3370
- return {
3371
- type: "directory",
3372
- metadata
3373
- };
3374
- }
3375
3120
  function url(u, metadata = {}) {
3376
3121
  return {
3377
3122
  type: "url",
@@ -3387,41 +3132,10 @@ function queue(u, metadata = {}) {
3387
3132
  };
3388
3133
  }
3389
3134
  //#endregion
3390
- //#region src/utils/id-codec.ts
3391
- /**
3392
- * Instagram-style Base64-variant ID ↔ shortcode conversion.
3393
- */
3394
- const ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
3395
- /** Pre-built index for O(1) character lookup during decode. */
3396
- const CHAR_INDEX = {};
3397
- for (let i = 0; i < 64; i++) CHAR_INDEX[ALPHABET[i]] = i;
3398
- const BASE = BigInt(64);
3135
+ //#region src/utils/text.ts
3136
+ /** URL helpers */
3399
3137
  /**
3400
- * Decode an Instagram shortcode into its numeric post ID.
3401
- */
3402
- function idFromShortcode(shortcode) {
3403
- let num = 0n;
3404
- for (const ch of shortcode) num = num * BASE + BigInt(CHAR_INDEX[ch] ?? 0);
3405
- return num.toString();
3406
- }
3407
- /**
3408
- * Encode a numeric post ID into an Instagram shortcode.
3409
- */
3410
- function shortcodeFromId(postId) {
3411
- let num = BigInt(postId);
3412
- const chars = [];
3413
- while (num > 0n) {
3414
- const remainder = Number(num % BASE);
3415
- chars.push(ALPHABET[remainder]);
3416
- num = num / BASE;
3417
- }
3418
- return chars.reverse().join("");
3419
- }
3420
- //#endregion
3421
- //#region src/utils/text.ts
3422
- /** URL helpers */
3423
- /**
3424
- * URL-decode a string.
3138
+ * URL-decode a string.
3425
3139
  */
3426
3140
  function unquote(text) {
3427
3141
  try {
@@ -3478,121 +3192,6 @@ function tagRe(pattern) {
3478
3192
  /** Pre-configured hashtag regex. */
3479
3193
  const findTags = tagRe("#\\w+");
3480
3194
  //#endregion
3481
- //#region src/core/extractor.ts
3482
- var Extractor = class {
3483
- /** Regex pattern to match against URLs */
3484
- static pattern = /^$/;
3485
- /** The input URL */
3486
- url;
3487
- /** Regex match groups from ``fromURL`` */
3488
- groups;
3489
- config;
3490
- /** HTTP client — public so Job can access for downloads */
3491
- http;
3492
- /** Storage backend — public so Job can access for writes */
3493
- storage;
3494
- /** Logger instance — public so Job can access for reporting */
3495
- log;
3496
- /** Delay range in seconds — random between [min, max] before each request */
3497
- requestInterval = [6, 12];
3498
- _initialized = false;
3499
- constructor(opts) {
3500
- this.url = opts.url;
3501
- this.groups = opts.match ? [...opts.match].slice(1) : [];
3502
- this.config = opts.config;
3503
- this.http = opts.http;
3504
- this.storage = opts.storage;
3505
- this.log = opts.log;
3506
- }
3507
- /** Initialization */
3508
- /**
3509
- * One-time async setup (cookies, session, internal state).
3510
- * Safe to call multiple times — after the first call it becomes a no-op.
3511
- */
3512
- async initialize() {
3513
- if (this._initialized) return;
3514
- await this._init();
3515
- this._initialized = true;
3516
- this.initialize = async () => {};
3517
- }
3518
- /**
3519
- * Subclass hook for one-time setup.
3520
- */
3521
- async _init() {}
3522
- /** Async iteration */
3523
- async *[Symbol.asyncIterator]() {
3524
- await this.initialize();
3525
- yield* this.items();
3526
- }
3527
- /** Config helpers */
3528
- /**
3529
- * Read a config value using the interpolated hierarchy.
3530
- */
3531
- _cfg(key, defaultVal) {
3532
- const path = [
3533
- "extractor",
3534
- this.category,
3535
- this.subcategory
3536
- ];
3537
- return this.config.interpolate(path, key, defaultVal);
3538
- }
3539
- /** HTTP */
3540
- _lastRequestTime = 0;
3541
- /**
3542
- * Rate-limited HTTP request wrapper.
3543
- */
3544
- async request(url, cfg = {}) {
3545
- await this._throttle();
3546
- const response = await this.http.request({
3547
- url,
3548
- ...cfg
3549
- });
3550
- this._lastRequestTime = Date.now();
3551
- return response;
3552
- }
3553
- /**
3554
- * Convenience: request + parse JSON body.
3555
- */
3556
- async requestJSON(url, cfg = {}) {
3557
- const resp = await this.request(url, cfg);
3558
- if (typeof resp.data === "object") return resp.data;
3559
- try {
3560
- return JSON.parse(resp.data);
3561
- } catch {
3562
- return {};
3563
- }
3564
- }
3565
- /** Rate limiting */
3566
- /**
3567
- * Sleep long enough to keep the minimum interval between requests.
3568
- */
3569
- async _throttle() {
3570
- const elapsed = Date.now() - this._lastRequestTime;
3571
- const [min, max] = this.requestInterval;
3572
- const target = min + Math.random() * (max - min);
3573
- const waitMs = Math.max(0, target * 1e3 - elapsed);
3574
- if (waitMs > 0) await new Promise((r) => setTimeout(r, waitMs));
3575
- }
3576
- /** Utility */
3577
- /**
3578
- * Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
3579
- */
3580
- parseTimestamp(ts) {
3581
- if (ts == null) return "";
3582
- const asMs = ts > 25e8 ? ts : ts * 1e3;
3583
- return new Date(asMs).toISOString();
3584
- }
3585
- /**
3586
- * Generate a random hex token (used for CSRF).
3587
- */
3588
- static generateToken(size = 16) {
3589
- const bytes = new Uint8Array(size);
3590
- if (typeof crypto !== "undefined" && crypto.getRandomValues) crypto.getRandomValues(bytes);
3591
- else for (let i = 0; i < size; i++) bytes[i] = Math.floor(Math.random() * 256);
3592
- return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
3593
- }
3594
- };
3595
- //#endregion
3596
3195
  //#region src/instagram/api.ts
3597
3196
  const APP_ID = "936619743392459";
3598
3197
  const ASBD_ID = "129477";
@@ -3933,8 +3532,8 @@ var InstagramRestAPI = class {
3933
3532
  }
3934
3533
  };
3935
3534
  //#endregion
3936
- //#region src/instagram/parsers.ts
3937
- /** Main entry — REST */
3535
+ //#region src/instagram/parsers/rest.ts
3536
+ /** Main entry — parse a REST post response. */
3938
3537
  function parsePostRest(post, cfg) {
3939
3538
  if (post.items) return parseStoryRest(post, cfg);
3940
3539
  const owner = post.user;
@@ -3951,6 +3550,7 @@ function parsePostRest(post, cfg) {
3951
3550
  owner_id: owner.pk,
3952
3551
  username: owner.username ?? "",
3953
3552
  fullname: owner.full_name ?? "",
3553
+ user: owner,
3954
3554
  post_date: date,
3955
3555
  date,
3956
3556
  description: caption ? caption.text : "",
@@ -3962,10 +3562,9 @@ function parsePostRest(post, cfg) {
3962
3562
  if (tags.length > 0) data.tags = [...new Set(tags)].sort();
3963
3563
  if (post.location) {
3964
3564
  const loc = post.location;
3965
- const slug = loc.short_name.replace(/\s+/g, "-").toLowerCase();
3966
3565
  data.location_id = loc.pk;
3967
- data.location_slug = slug;
3968
- data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${slug}/`;
3566
+ data.location_slug = loc.short_name.replace(/\s+/g, "-").toLowerCase();
3567
+ data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${data.location_slug}/`;
3969
3568
  }
3970
3569
  if (post.coauthor_producers) data.coauthors = post.coauthor_producers.map((u) => ({
3971
3570
  id: u.pk,
@@ -3997,7 +3596,7 @@ function parsePostRest(post, cfg) {
3997
3596
  if (post.music_metadata) {
3998
3597
  const info = post.music_metadata.music_info;
3999
3598
  if (info) {
4000
- const audio = extractAudio(post, data, { music_asset_info: info }, cfg);
3599
+ const audio = extractAudio(post, data, info, cfg);
4001
3600
  if (audio) {
4002
3601
  audio.num = items.length;
4003
3602
  data._files.push(audio);
@@ -4012,7 +3611,7 @@ function parsePostRest(post, cfg) {
4012
3611
  if (post.subscription_media_visibility) data.subscription = post.subscription_media_visibility;
4013
3612
  return data;
4014
3613
  }
4015
- /** Story / highlight */
3614
+ /** Parse a story or highlight REST response. */
4016
3615
  function parseStoryRest(post, cfg) {
4017
3616
  const items = post.items;
4018
3617
  const reelId = String(post.id).split(":").pop() ?? "0";
@@ -4038,9 +3637,8 @@ function parseStoryRest(post, cfg) {
4038
3637
  expires: expires ? cfg.parseTimestamp(expires) : void 0,
4039
3638
  user: post.user
4040
3639
  };
4041
- if (!isStory) {
4042
- if (post.title) data.highlight_title = post.title;
4043
- } else if (!post.seen) post.seen = expires - 86400;
3640
+ if (!isStory && post.title) data.highlight_title = post.title;
3641
+ else if (!post.seen) post.seen = expires - 86400;
4044
3642
  for (let num = 0; num < items.length; num++) {
4045
3643
  const item = items[num];
4046
3644
  const media = parseMediaItem(item, post, cfg, num + 1);
@@ -4050,7 +3648,7 @@ function parseStoryRest(post, cfg) {
4050
3648
  }
4051
3649
  return data;
4052
3650
  }
4053
- /** Single media item */
3651
+ /** Parse a single media item (image/video) from a carousel or story. */
4054
3652
  function parseMediaItem(item, parent, cfg, num) {
4055
3653
  let image;
4056
3654
  try {
@@ -4109,7 +3707,7 @@ function parseMediaItem(item, parent, cfg, num) {
4109
3707
  if (itemRec.audience) media.audience = itemRec.audience;
4110
3708
  return media;
4111
3709
  }
4112
- /** Tagged users */
3710
+ /** Extract tagged users from various field formats. */
4113
3711
  function extractTaggedUsers(src, dest) {
4114
3712
  dest.tagged_users = [];
4115
3713
  const edges = src.edge_media_to_tagged_user;
@@ -4152,22 +3750,23 @@ function extractTaggedUsers(src, dest) {
4152
3750
  }
4153
3751
  }
4154
3752
  const seen = /* @__PURE__ */ new Set();
4155
- dest.tagged_users = dest.tagged_users.filter((t) => {
4156
- if (seen.has(t.id)) return false;
4157
- seen.add(t.id);
4158
- return true;
4159
- });
3753
+ dest.tagged_users = dest.tagged_users.filter((t) => seen.has(t.id) ? false : (seen.add(t.id), true));
4160
3754
  }
4161
- /** Audio / music extraction */
3755
+ /** Extract audio/music metadata from a story sticker. */
4162
3756
  function extractAudio(src, dest, sticker, cfg) {
4163
3757
  const info = sticker.music_asset_info;
4164
3758
  if (!info) return null;
4165
3759
  const cinfo = sticker.music_consumption_info ?? info;
4166
- dest.audio_title = info.title;
3760
+ dest.audio_title = info.title ?? info.sanitized_title;
3761
+ dest.audio_subtitle = info.subtitle;
4167
3762
  dest.audio_duration = (info.duration_in_ms ?? 0) / 1e3;
4168
3763
  dest.audio_timestamps = info.highlight_start_times_in_ms;
4169
3764
  dest.audio_artist = info.display_artist ?? cinfo.display_artist;
4170
3765
  dest.audio_user = info.ig_artist ?? cinfo.ig_artist;
3766
+ dest.audio_has_lyrics = info.has_lyrics;
3767
+ dest.audio_is_explicit = info.is_explicit;
3768
+ dest.audio_cover_artwork_uri = info.cover_artwork_uri;
3769
+ dest.audio_cover_artwork_thumbnail_uri = info.cover_artwork_thumbnail_uri;
4171
3770
  const url = info.progressive_download_url;
4172
3771
  if (!url) return null;
4173
3772
  return {
@@ -4184,13 +3783,25 @@ function extractAudio(src, dest, sticker, cfg) {
4184
3783
  height_original: 0,
4185
3784
  tagged_users: [],
4186
3785
  audio_user: info.ig_artist ?? cinfo.ig_artist,
4187
- audio_title: info.title,
3786
+ audio_title: info.title ?? info.sanitized_title,
3787
+ audio_subtitle: info.subtitle,
4188
3788
  audio_artist: info.display_artist ?? cinfo.display_artist,
4189
3789
  audio_duration: (info.duration_in_ms ?? 0) / 1e3,
4190
- audio_timestamps: info.highlight_start_times_in_ms
3790
+ audio_timestamps: info.highlight_start_times_in_ms,
3791
+ audio_cover_artwork_uri: info.cover_artwork_uri,
3792
+ audio_cover_artwork_thumbnail_uri: info.cover_artwork_thumbnail_uri,
3793
+ audio_has_lyrics: info.has_lyrics,
3794
+ audio_is_explicit: info.is_explicit
4191
3795
  };
4192
3796
  }
4193
- /** GraphQL parser */
3797
+ function extractPinned(post) {
3798
+ if (post.timeline_pinned_user_ids) return post.timeline_pinned_user_ids;
3799
+ if (post.clips_tab_pinned_user_ids) return post.clips_tab_pinned_user_ids;
3800
+ return [];
3801
+ }
3802
+ //#endregion
3803
+ //#region src/instagram/parsers/graphql.ts
3804
+ /** Parse a GraphQL post/edge response. */
4194
3805
  function parsePostGraphql(post, cfg) {
4195
3806
  const typename = post.__typename ?? "GraphImage";
4196
3807
  const owner = post.owner;
@@ -4203,6 +3814,7 @@ function parsePostGraphql(post, cfg) {
4203
3814
  owner_id: owner.id ?? owner.pk,
4204
3815
  username: owner.username ?? "",
4205
3816
  fullname: owner.full_name ?? "",
3817
+ user: owner,
4206
3818
  post_id: post.id,
4207
3819
  post_shortcode: post.shortcode,
4208
3820
  post_url: `${cfg.root}/p/${post.shortcode}/`,
@@ -4275,11 +3887,6 @@ function parsePostGraphql(post, cfg) {
4275
3887
  }
4276
3888
  return data;
4277
3889
  }
4278
- function extractPinned(post) {
4279
- if (post.timeline_pinned_user_ids) return post.timeline_pinned_user_ids;
4280
- if (post.clips_tab_pinned_user_ids) return post.clips_tab_pinned_user_ids;
4281
- return [];
4282
- }
4283
3890
  function parseUnicodeEscapes(text) {
4284
3891
  if (!text.includes("\\u")) return text;
4285
3892
  return text.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
@@ -4438,12 +4045,49 @@ var InstagramExtractor = class extends Extractor {
4438
4045
  }
4439
4046
  };
4440
4047
  //#endregion
4441
- //#region src/instagram/extractors.ts
4048
+ //#region src/instagram/extractors/helpers.ts
4049
+ /** Shared regex utilities for Instagram extractor URL patterns. */
4442
4050
  const BASE_RE = /^(?:https?:\/\/)?(?:www\.)?instagram\.com/;
4443
4051
  function re(base, path) {
4444
4052
  const pathSrc = typeof path === "string" ? path : path.source;
4445
4053
  return new RegExp(base.source + pathSrc, "i");
4446
4054
  }
4055
+ //#endregion
4056
+ //#region src/instagram/extractors/registry.ts
4057
+ const _registry = /* @__PURE__ */ new Map();
4058
+ function register(subcategory, cls) {
4059
+ _registry.set(subcategory, cls);
4060
+ }
4061
+ function get(subcategory) {
4062
+ return _registry.get(subcategory);
4063
+ }
4064
+ //#endregion
4065
+ //#region src/instagram/extractors/highlights.ts
4066
+ var InstagramHighlightsExtractor = class InstagramHighlightsExtractor extends InstagramExtractor {
4067
+ static subcategory = "highlights";
4068
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/highlights/);
4069
+ subcategory = InstagramHighlightsExtractor.subcategory;
4070
+ constructor(opts) {
4071
+ super(opts);
4072
+ }
4073
+ static fromURL(url, opts) {
4074
+ const match = InstagramHighlightsExtractor.pattern.exec(url);
4075
+ if (!match) return null;
4076
+ return new InstagramHighlightsExtractor({
4077
+ ...opts,
4078
+ url,
4079
+ match
4080
+ });
4081
+ }
4082
+ async *posts() {
4083
+ const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4084
+ const uid = await this.api.userId(screenName);
4085
+ yield* this.api.highlightsMedia(uid);
4086
+ }
4087
+ };
4088
+ register(InstagramHighlightsExtractor.subcategory, InstagramHighlightsExtractor);
4089
+ //#endregion
4090
+ //#region src/instagram/extractors/post.ts
4447
4091
  var InstagramPostExtractor = class InstagramPostExtractor extends InstagramExtractor {
4448
4092
  static subcategory = "post";
4449
4093
  static pattern = re(/^(?:https?:\/\/)?(?:www\.)?instagram\.com\//, /(?:share(?:\/(?:p|tv|reels?))?|(?:[^/?#]+\/)?(?:p|tv|reels?))\/([^/?#]+)/);
@@ -4478,348 +4122,742 @@ var InstagramPostExtractor = class InstagramPostExtractor extends InstagramExtra
4478
4122
  yield* this.api.media(shortcode);
4479
4123
  }
4480
4124
  };
4481
- var InstagramUserExtractor = class InstagramUserExtractor extends InstagramExtractor {
4482
- static subcategory = "user";
4483
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/?(?:$|[?#])/);
4484
- subcategory = InstagramUserExtractor.subcategory;
4125
+ register(InstagramPostExtractor.subcategory, InstagramPostExtractor);
4126
+ //#endregion
4127
+ //#region src/instagram/extractors/saved.ts
4128
+ var InstagramSavedExtractor = class InstagramSavedExtractor extends InstagramExtractor {
4129
+ static subcategory = "saved";
4130
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/saved(?:\/all-posts)?\/?$/);
4131
+ subcategory = InstagramSavedExtractor.subcategory;
4485
4132
  constructor(opts) {
4486
4133
  super(opts);
4487
4134
  }
4488
4135
  static fromURL(url, opts) {
4489
- const match = InstagramUserExtractor.pattern.exec(url);
4136
+ const match = InstagramSavedExtractor.pattern.exec(url);
4490
4137
  if (!match) return null;
4491
- return new InstagramUserExtractor({
4138
+ return new InstagramSavedExtractor({
4492
4139
  ...opts,
4493
4140
  url,
4494
4141
  match
4495
4142
  });
4496
4143
  }
4497
- async *items() {
4498
- await this.login();
4499
- const userPath = this.groups[0] ?? "/";
4500
- const base = `${this.root}${userPath}/`;
4501
- const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`;
4502
- const include = this._cfg("include", ["posts"]);
4503
- const categories = include === "all" ? [
4504
- "posts",
4505
- "reels",
4506
- "tagged",
4507
- "stories",
4508
- "highlights",
4509
- "info",
4510
- "avatar"
4511
- ] : typeof include === "string" ? include.replace(/\s+/g, "").split(",") : include;
4512
- const extractors = {
4513
- info: {
4514
- cls: InstagramInfoExtractor,
4515
- url: `${base}info/`
4516
- },
4517
- avatar: {
4518
- cls: InstagramAvatarExtractor,
4519
- url: `${base}avatar/`
4520
- },
4521
- stories: {
4522
- cls: InstagramStoriesExtractor,
4523
- url: storiesUrl
4524
- },
4525
- highlights: {
4526
- cls: InstagramHighlightsExtractor,
4527
- url: `${base}highlights/`
4528
- },
4529
- posts: {
4530
- cls: InstagramPostsExtractor,
4531
- url: `${base}posts/`
4532
- },
4533
- reels: {
4534
- cls: InstagramReelsExtractor,
4535
- url: `${base}reels/`
4536
- },
4537
- tagged: {
4538
- cls: InstagramTaggedExtractor,
4539
- url: `${base}tagged/`
4540
- }
4541
- };
4542
- for (const cat of categories) {
4543
- const entry = extractors[cat];
4544
- if (entry) yield queue(entry.url, { _extractor: entry.cls });
4545
- else this.log.warn(`Invalid include '${cat}'`);
4546
- }
4144
+ async *posts() {
4145
+ yield* this.api.userSaved();
4547
4146
  }
4548
- async *posts() {}
4549
4147
  };
4550
- var InstagramPostsExtractor = class InstagramPostsExtractor extends InstagramExtractor {
4551
- static subcategory = "posts";
4552
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/posts/);
4553
- subcategory = InstagramPostsExtractor.subcategory;
4148
+ register(InstagramSavedExtractor.subcategory, InstagramSavedExtractor);
4149
+ //#endregion
4150
+ //#region src/instagram/extractors/stories.ts
4151
+ var InstagramStoriesExtractor = class InstagramStoriesExtractor extends InstagramExtractor {
4152
+ static subcategory = "stories";
4153
+ static pattern = /^(?:https?:\/\/)?(?:www\.)?instagram\.com\/(?:stories\/(?:highlights\/(\d+)|([^/?#]+)(?:\/(\d+))?)|\/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)/;
4154
+ subcategory = InstagramStoriesExtractor.subcategory;
4155
+ highlightId = null;
4156
+ mediaId = null;
4554
4157
  constructor(opts) {
4555
4158
  super(opts);
4159
+ const groups = this.groups;
4160
+ const h1 = groups[0];
4161
+ const user = groups[1];
4162
+ const m1 = groups[2];
4163
+ const h2 = groups[3];
4164
+ const m2 = groups[4];
4165
+ if (user) {
4166
+ this.subcategory = "stories";
4167
+ this.highlightId = null;
4168
+ } else {
4169
+ this.subcategory = "highlights";
4170
+ this.highlightId = h1 ? `highlight:${h1}` : `highlight:${Buffer.from(h2 ?? "", "base64").toString("utf-8")}`;
4171
+ }
4172
+ this.mediaId = m1 ?? m2 ?? null;
4556
4173
  }
4557
4174
  static fromURL(url, opts) {
4558
- const match = InstagramPostsExtractor.pattern.exec(url);
4175
+ const match = InstagramStoriesExtractor.pattern.exec(url);
4559
4176
  if (!match) return null;
4560
- return new InstagramPostsExtractor({
4177
+ return new InstagramStoriesExtractor({
4561
4178
  ...opts,
4562
4179
  url,
4563
4180
  match
4564
4181
  });
4565
4182
  }
4566
4183
  async *posts() {
4567
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4568
- const uid = await this.api.userId(screenName);
4569
- yield* this.api.userFeed(uid);
4184
+ const reelId = this.highlightId ? this.highlightId : await this.api.userId((this.groups[1] ?? "").toString());
4185
+ const reels = await this.api.reelsMedia([reelId]);
4186
+ if (!reels.length) return;
4187
+ if (this.mediaId) {
4188
+ const reel = reels[0];
4189
+ for (const item of reel.items ?? []) if (item.pk === this.mediaId) {
4190
+ reel.items = [item];
4191
+ break;
4192
+ }
4193
+ yield reel;
4194
+ return;
4195
+ }
4196
+ if (this._cfg("split", false)) {
4197
+ const reel = reels[0];
4198
+ for (const item of reel.items ?? []) {
4199
+ const copy = { ...reel };
4200
+ copy.items = [item];
4201
+ yield copy;
4202
+ }
4203
+ } else yield* reels;
4570
4204
  }
4571
4205
  };
4572
- var InstagramReelsExtractor = class InstagramReelsExtractor extends InstagramExtractor {
4573
- static subcategory = "reels";
4574
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/reels/);
4575
- subcategory = InstagramReelsExtractor.subcategory;
4206
+ register(InstagramStoriesExtractor.subcategory, InstagramStoriesExtractor);
4207
+ //#endregion
4208
+ //#region src/instagram/extractors/tag.ts
4209
+ var InstagramTagExtractor = class InstagramTagExtractor extends InstagramExtractor {
4210
+ static subcategory = "tag";
4211
+ static pattern = re(BASE_RE, /\/explore\/tags\/([^/?#]+)/);
4212
+ subcategory = InstagramTagExtractor.subcategory;
4576
4213
  constructor(opts) {
4577
4214
  super(opts);
4578
4215
  }
4579
4216
  static fromURL(url, opts) {
4580
- const match = InstagramReelsExtractor.pattern.exec(url);
4217
+ const match = InstagramTagExtractor.pattern.exec(url);
4581
4218
  if (!match) return null;
4582
- return new InstagramReelsExtractor({
4219
+ return new InstagramTagExtractor({
4583
4220
  ...opts,
4584
4221
  url,
4585
4222
  match
4586
4223
  });
4587
4224
  }
4225
+ async metadata() {
4226
+ const tag = this.groups[0] ?? "";
4227
+ return { tag: decodeURIComponent(tag) };
4228
+ }
4588
4229
  async *posts() {
4589
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4590
- const uid = await this.api.userId(screenName);
4591
- yield* this.api.userClips(uid);
4230
+ const tag = this.groups[0] ?? "";
4231
+ yield* this.api.tagsMedia(decodeURIComponent(tag));
4592
4232
  }
4593
4233
  };
4594
- var InstagramTaggedExtractor = class InstagramTaggedExtractor extends InstagramExtractor {
4595
- static subcategory = "tagged";
4596
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/tagged/);
4597
- subcategory = InstagramTaggedExtractor.subcategory;
4598
- _taggedUserId = "";
4234
+ register(InstagramTagExtractor.subcategory, InstagramTagExtractor);
4235
+ //#endregion
4236
+ //#region src/instagram/extractors/user.ts
4237
+ var InstagramUserExtractor = class InstagramUserExtractor extends InstagramExtractor {
4238
+ static subcategory = "user";
4239
+ static pattern = re(BASE_RE, /(\/[^/?#]+)\/?(?:$|[?#])/);
4240
+ subcategory = InstagramUserExtractor.subcategory;
4599
4241
  constructor(opts) {
4600
4242
  super(opts);
4601
4243
  }
4602
4244
  static fromURL(url, opts) {
4603
- const match = InstagramTaggedExtractor.pattern.exec(url);
4245
+ const match = InstagramUserExtractor.pattern.exec(url);
4604
4246
  if (!match) return null;
4605
- return new InstagramTaggedExtractor({
4247
+ return new InstagramUserExtractor({
4606
4248
  ...opts,
4607
4249
  url,
4608
4250
  match
4609
4251
  });
4610
4252
  }
4611
- async metadata() {
4612
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4613
- let user;
4614
- if (screenName.startsWith("id:")) {
4615
- this._taggedUserId = screenName.slice(3);
4616
- user = await this.api.userById(screenName.slice(3));
4617
- } else {
4618
- this._taggedUserId = await this.api.userId(screenName);
4619
- user = await this.api.userByScreenName(screenName);
4620
- }
4621
- return {
4622
- tagged_owner_id: user.id ?? user.pk,
4623
- tagged_username: user.username,
4624
- tagged_full_name: user.full_name
4253
+ async *items() {
4254
+ await this.login();
4255
+ const userPath = this.groups[0] ?? "/";
4256
+ const base = `${this.root}${userPath}/`;
4257
+ const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`;
4258
+ const include = this._cfg("include", ["posts"]);
4259
+ const categories = include === "all" ? [
4260
+ "posts",
4261
+ "reels",
4262
+ "tagged",
4263
+ "stories",
4264
+ "highlights",
4265
+ "info",
4266
+ "avatar"
4267
+ ] : typeof include === "string" ? include.replace(/\s+/g, "").split(",") : include;
4268
+ const urls = {
4269
+ info: `${base}info/`,
4270
+ avatar: `${base}avatar/`,
4271
+ stories: storiesUrl,
4272
+ highlights: `${base}highlights/`,
4273
+ posts: `${base}posts/`,
4274
+ reels: `${base}reels/`,
4275
+ tagged: `${base}tagged/`
4625
4276
  };
4277
+ for (const cat of categories) {
4278
+ const cls = get(cat);
4279
+ const url = urls[cat];
4280
+ if (cls && url) yield queue(url, { _extractor: cls });
4281
+ else this.log.warn(`Invalid include '${cat}'`);
4282
+ }
4626
4283
  }
4627
- async *posts() {
4628
- if (!this._taggedUserId) await this.metadata();
4629
- yield* this.api.userTagged(this._taggedUserId);
4630
- }
4284
+ async *posts() {}
4631
4285
  };
4632
- var InstagramStoriesExtractor = class InstagramStoriesExtractor extends InstagramExtractor {
4633
- static subcategory = "stories";
4634
- static pattern = /^(?:https?:\/\/)?(?:www\.)?instagram\.com\/(?:stories\/(?:highlights\/(\d+)|([^/?#]+)(?:\/(\d+))?)|\/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)/;
4635
- subcategory = InstagramStoriesExtractor.subcategory;
4636
- highlightId = null;
4637
- mediaId = null;
4638
- constructor(opts) {
4639
- super(opts);
4640
- const groups = this.groups;
4641
- const h1 = groups[0];
4642
- const user = groups[1];
4643
- const m1 = groups[2];
4644
- const h2 = groups[3];
4645
- const m2 = groups[4];
4646
- if (user) {
4647
- this.subcategory = "stories";
4648
- this.highlightId = null;
4649
- } else {
4650
- this.subcategory = "highlights";
4651
- this.highlightId = h1 ? `highlight:${h1}` : `highlight:${Buffer.from(h2 ?? "", "base64").toString("utf-8")}`;
4652
- }
4653
- this.mediaId = m1 ?? m2 ?? null;
4286
+ register(InstagramUserExtractor.subcategory, InstagramUserExtractor);
4287
+ //#endregion
4288
+ //#region src/config.ts
4289
+ var ConfigManager = class {
4290
+ data;
4291
+ constructor(data = {}) {
4292
+ this.data = data;
4654
4293
  }
4655
- static fromURL(url, opts) {
4656
- const match = InstagramStoriesExtractor.pattern.exec(url);
4657
- if (!match) return null;
4658
- return new InstagramStoriesExtractor({
4659
- ...opts,
4660
- url,
4661
- match
4662
- });
4294
+ /**
4295
+ * Read a value at a dot-path like ``'extractor.instagram.videos'``.
4296
+ * Returns ``undefined`` when the path doesn't exist.
4297
+ */
4298
+ get(path, defaultValue) {
4299
+ const keys = path.split(".");
4300
+ let node = this.data;
4301
+ for (const key of keys) {
4302
+ if (node == null || typeof node !== "object" || Array.isArray(node)) return defaultValue;
4303
+ node = node[key];
4304
+ }
4305
+ if (node === void 0) return defaultValue;
4306
+ return node;
4663
4307
  }
4664
- async *posts() {
4665
- const reelId = this.highlightId ? this.highlightId : await this.api.userId((this.groups[1] ?? "").toString());
4666
- const reels = await this.api.reelsMedia([reelId]);
4667
- if (!reels.length) return;
4668
- if (this.mediaId) {
4669
- const reel = reels[0];
4670
- for (const item of reel.items ?? []) if (item.pk === this.mediaId) {
4671
- reel.items = [item];
4672
- break;
4308
+ /**
4309
+ * Interpolate a config key through a hierarchy of paths.
4310
+ */
4311
+ interpolate(cfgPath, key, defaultVal) {
4312
+ let node = this.data;
4313
+ for (let i = 0; i < cfgPath.length; i++) {
4314
+ if (node != null && typeof node === "object" && !Array.isArray(node)) {
4315
+ const v = node[key];
4316
+ if (v !== void 0) return v;
4673
4317
  }
4674
- yield reel;
4675
- return;
4318
+ if (node == null || typeof node !== "object" || Array.isArray(node)) break;
4319
+ node = node[cfgPath[i]];
4676
4320
  }
4677
- if (this._cfg("split", false)) {
4678
- const reel = reels[0];
4679
- for (const item of reel.items ?? []) {
4680
- const copy = { ...reel };
4681
- copy.items = [item];
4682
- yield copy;
4321
+ return defaultVal;
4322
+ }
4323
+ /**
4324
+ * Mutate the config at a given dot-path.
4325
+ */
4326
+ set(path, value) {
4327
+ const keys = path.split(".");
4328
+ let node = this.data;
4329
+ for (let i = 0; i < keys.length - 1; i++) {
4330
+ const key = keys[i];
4331
+ let child = node[key];
4332
+ if (child == null || typeof child !== "object" || Array.isArray(child)) {
4333
+ child = {};
4334
+ node[key] = child;
4683
4335
  }
4684
- } else yield* reels;
4336
+ node = child;
4337
+ }
4338
+ node[keys[keys.length - 1]] = value;
4685
4339
  }
4686
4340
  };
4687
- var InstagramHighlightsExtractor = class InstagramHighlightsExtractor extends InstagramExtractor {
4688
- static subcategory = "highlights";
4689
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/highlights/);
4690
- subcategory = InstagramHighlightsExtractor.subcategory;
4691
- constructor(opts) {
4692
- super(opts);
4693
- }
4694
- static fromURL(url, opts) {
4695
- const match = InstagramHighlightsExtractor.pattern.exec(url);
4696
- if (!match) return null;
4697
- return new InstagramHighlightsExtractor({
4698
- ...opts,
4699
- url,
4700
- match
4701
- });
4341
+ //#endregion
4342
+ //#region src/cli/options.ts
4343
+ function addSharedOptions(cmd) {
4344
+ return cmd.option("--sessionid <cookie>", "Instagram sessionid cookie value (from browser)", process.env.INSTAGRAM_SESSIONID).option("--cookies <string>", "Full Cookie header string from browser", process.env.INSTAGRAM_COOKIES).option("-o, --output <dir>", "Output directory", "./data").option("--videos <mode>", "Download videos: true, false, or merged (yt-dlp)", "true").option("--previews <types>", "Download only previews: video,audio (comma-separated)").option("--audio", "Download standalone audio tracks", false).option("--max-posts <n>", "Maximum number of posts to download", Number.parseInt).option("--cursor <cursor>", "Resume from pagination cursor").option("--order-posts <order>", "Post ordering: asc, desc, id, id_asc, id_desc").option("--order-files <order>", "File ordering: r, d (reverse), or empty for natural order").option("--static-videos", "Download static video cover images instead of actual videos", false).option("--no-static-videos", "Do not force static video covers").option("--api <backend>", "API backend: rest (default) or graphql", "rest").option("-v, --verbose", "Verbose debug output", false).option("--include <list>", "For user: comma-separated sub-extractors (posts,reels,tagged,stories,highlights,info,avatar)", "posts").option("--split", "For stories: split each frame into a separate post", false).option("-i, --info", "Print structured post info to terminal (no download)", false);
4345
+ }
4346
+ function buildConfig(opts) {
4347
+ const config = new ConfigManager();
4348
+ const ig = {};
4349
+ if (opts.videos) ig.videos = opts.videos;
4350
+ if (opts.previews) ig.previews = opts.previews.split(",");
4351
+ if (opts.audio) ig.audio = true;
4352
+ if (opts.maxPosts) ig["max-posts"] = opts.maxPosts;
4353
+ if (opts.cursor) ig.cursor = opts.cursor;
4354
+ if (opts.orderPosts) ig["order-posts"] = opts.orderPosts;
4355
+ if (opts.orderFiles) ig["order-files"] = opts.orderFiles;
4356
+ if (opts.staticVideos) ig["static-videos"] = true;
4357
+ if (opts.api) ig.api = opts.api;
4358
+ if (opts.include) ig.include = opts.include;
4359
+ if (opts.split) ig.split = true;
4360
+ if (Object.keys(ig).length > 0) config.set("extractor.instagram", ig);
4361
+ return config;
4362
+ }
4363
+ //#endregion
4364
+ //#region src/core/format.ts
4365
+ /** Shared ANSI formatting and display utilities. */
4366
+ function formatBytes(bytes) {
4367
+ if (bytes === 0) return "0 B";
4368
+ const units = [
4369
+ "B",
4370
+ "KB",
4371
+ "MB",
4372
+ "GB"
4373
+ ];
4374
+ const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
4375
+ return `${(bytes / 1024 ** i).toFixed(i === 0 ? 0 : 1)} ${units[i]}`;
4376
+ }
4377
+ const BOLD = "\x1B[1m";
4378
+ const DIM = "\x1B[2m";
4379
+ const CYAN = "\x1B[36m";
4380
+ const GREEN = "\x1B[32m";
4381
+ const YELLOW = "\x1B[33m";
4382
+ const RESET = "\x1B[0m";
4383
+ function b(s) {
4384
+ return `${BOLD}${s}${RESET}`;
4385
+ }
4386
+ function dim(s) {
4387
+ return `${DIM}${s}${RESET}`;
4388
+ }
4389
+ function c(s) {
4390
+ return `${CYAN}${s}${RESET}`;
4391
+ }
4392
+ function g(s) {
4393
+ return `${GREEN}${s}${RESET}`;
4394
+ }
4395
+ const _YELLOW = YELLOW;
4396
+ const _RESET = RESET;
4397
+ function pad(s, n) {
4398
+ return s.length >= n ? s : s + " ".repeat(n - s.length);
4399
+ }
4400
+ //#endregion
4401
+ //#region src/core/job.ts
4402
+ var Job = class {
4403
+ extractor;
4404
+ status = 0;
4405
+ constructor(extractor) {
4406
+ this.extractor = extractor;
4702
4407
  }
4703
- async *posts() {
4704
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4705
- const uid = await this.api.userId(screenName);
4706
- yield* this.api.highlightsMedia(uid);
4408
+ /** Main entry point. Dispatches every yielded message. */
4409
+ async run() {
4410
+ this.extractor.log.info(`Starting ${this.extractor.category}/${this.extractor.subcategory} ${this.extractor.url}`);
4411
+ await this.extractor.initialize();
4412
+ for await (const msg of this.extractor) switch (msg.type) {
4413
+ case "directory":
4414
+ await this.handleDirectory(msg);
4415
+ break;
4416
+ case "url":
4417
+ await this.handleUrl(msg);
4418
+ break;
4419
+ case "queue":
4420
+ await this.handleQueue(msg);
4421
+ break;
4422
+ }
4423
+ this._report();
4424
+ return this.status;
4707
4425
  }
4426
+ /** Override in subclasses to print a summary. */
4427
+ _report() {}
4708
4428
  };
4709
- var InstagramTagExtractor = class InstagramTagExtractor extends InstagramExtractor {
4710
- static subcategory = "tag";
4711
- static pattern = re(BASE_RE, /\/explore\/tags\/([^/?#]+)/);
4712
- subcategory = InstagramTagExtractor.subcategory;
4713
- constructor(opts) {
4714
- super(opts);
4429
+ //#endregion
4430
+ //#region src/core/download-job.ts
4431
+ var DownloadJob = class DownloadJob extends Job {
4432
+ /** Base output directory (prepended to all paths). */
4433
+ basePath = "";
4434
+ /** Current target directory metadata (set by directory messages). */
4435
+ _currentDir = {};
4436
+ /** In-memory archive keyed by archive format. */
4437
+ archive = /* @__PURE__ */ new Map();
4438
+ _archiveFmts = /* @__PURE__ */ new Map();
4439
+ _postCount = 0;
4440
+ _fileCount = 0;
4441
+ _downloadedBytes = 0;
4442
+ _skippedCount = 0;
4443
+ registerArchive(category, format) {
4444
+ this._archiveFmts.set(category, format);
4715
4445
  }
4716
- static fromURL(url, opts) {
4717
- const match = InstagramTagExtractor.pattern.exec(url);
4718
- if (!match) return null;
4719
- return new InstagramTagExtractor({
4720
- ...opts,
4721
- url,
4722
- match
4446
+ _interp(fmt, meta) {
4447
+ return fmt.replace(/\{(\w+)\}/g, (_, key) => {
4448
+ const v = meta[key];
4449
+ return v == null ? "" : String(v);
4723
4450
  });
4724
4451
  }
4725
- async metadata() {
4726
- const tag = this.groups[0] ?? "";
4727
- return { tag: decodeURIComponent(tag) };
4452
+ _isArchived(meta) {
4453
+ const cat = meta.category ?? this.extractor.category;
4454
+ const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
4455
+ const key = this._interp(fmt, meta);
4456
+ return !!this.archive.get(cat)?.has(key);
4728
4457
  }
4729
- async *posts() {
4730
- const tag = this.groups[0] ?? "";
4731
- yield* this.api.tagsMedia(decodeURIComponent(tag));
4458
+ _archive(meta) {
4459
+ const cat = meta.category ?? this.extractor.category;
4460
+ const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
4461
+ const key = this._interp(fmt, meta);
4462
+ let set = this.archive.get(cat);
4463
+ if (!set) {
4464
+ set = /* @__PURE__ */ new Set();
4465
+ this.archive.set(cat, set);
4466
+ }
4467
+ set.add(key);
4732
4468
  }
4733
- };
4734
- var InstagramInfoExtractor = class InstagramInfoExtractor extends InstagramExtractor {
4735
- static subcategory = "info";
4736
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/info/);
4737
- subcategory = InstagramInfoExtractor.subcategory;
4738
- constructor(opts) {
4739
- super(opts);
4469
+ async handleDirectory(msg) {
4470
+ this._currentDir = { ...msg.metadata };
4471
+ this._postCount++;
4472
+ const dirPath = this.basePath ? `${this.basePath}/${this._buildDirPath(msg.metadata)}` : this._buildDirPath(msg.metadata);
4473
+ await this.extractor.storage.mkdir(dirPath);
4474
+ this.extractor.log.info(`#${this._postCount} ${msg.metadata.username ?? "?"}/${msg.metadata.post_shortcode ?? "?"} → ${dirPath}/`);
4740
4475
  }
4741
- static fromURL(url, opts) {
4742
- const match = InstagramInfoExtractor.pattern.exec(url);
4743
- if (!match) return null;
4744
- return new InstagramInfoExtractor({
4745
- ...opts,
4746
- url,
4747
- match
4748
- });
4476
+ async handleUrl(msg) {
4477
+ const meta = {
4478
+ ...this._currentDir,
4479
+ ...msg.metadata
4480
+ };
4481
+ if (this._isArchived(meta)) {
4482
+ this._skippedCount++;
4483
+ return;
4484
+ }
4485
+ const filename = this._buildFilename(meta);
4486
+ const fullPath = `${this.basePath ? `${this.basePath}/${this._buildDirPath(meta)}` : this._buildDirPath(meta)}/${filename}`;
4487
+ try {
4488
+ const resp = await this.extractor.http.request({
4489
+ url: msg.url,
4490
+ method: "GET",
4491
+ responseType: "arraybuffer"
4492
+ });
4493
+ let data;
4494
+ if (resp.data instanceof Uint8Array) data = resp.data;
4495
+ else if (resp.data instanceof ArrayBuffer) data = new Uint8Array(resp.data);
4496
+ else if (typeof resp.data === "string") data = resp.data;
4497
+ else data = JSON.stringify(resp.data);
4498
+ await this.extractor.storage.write(fullPath, data);
4499
+ this._fileCount++;
4500
+ const size = data instanceof Uint8Array ? data.byteLength : data.length;
4501
+ this._downloadedBytes += size;
4502
+ this.extractor.log.info(` └─ ${filename} (${formatBytes(size)})`);
4503
+ this._archive(meta);
4504
+ } catch (err) {
4505
+ this.extractor.log.error(`Failed to download ${filename}: ${String(err)}`);
4506
+ this.status |= 4;
4507
+ }
4749
4508
  }
4750
- async *items() {
4751
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4752
- let user;
4753
- if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
4754
- else user = await this.api.userByScreenName(screenName);
4755
- yield directory(user);
4509
+ async handleQueue(msg) {
4510
+ const meta = {
4511
+ ...this._currentDir,
4512
+ ...msg.metadata
4513
+ };
4514
+ const extrClass = meta._extractor;
4515
+ if (!extrClass || typeof extrClass !== "object") return;
4516
+ const cls = extrClass;
4517
+ const match = cls.pattern.exec(msg.url);
4518
+ if (!match) return;
4519
+ const parentExtr = this.extractor;
4520
+ const childJob = new DownloadJob(Reflect.construct(cls, [{
4521
+ url: msg.url,
4522
+ match,
4523
+ config: parentExtr.config,
4524
+ http: parentExtr.http,
4525
+ storage: parentExtr.storage,
4526
+ log: parentExtr.log
4527
+ }]));
4528
+ childJob.basePath = this.basePath;
4529
+ childJob._currentDir = meta;
4530
+ for (const [cat, set] of this.archive) childJob.archive.set(cat, new Set(set));
4531
+ for (const [cat, fmt] of this._archiveFmts) childJob._archiveFmts.set(cat, fmt);
4532
+ const childStatus = await childJob.run();
4533
+ this.status |= childStatus;
4534
+ for (const [cat, set] of childJob.archive) {
4535
+ const mine = this.archive.get(cat);
4536
+ if (mine) for (const k of set) mine.add(k);
4537
+ else this.archive.set(cat, set);
4538
+ }
4539
+ }
4540
+ _report() {
4541
+ const log = this.extractor.log;
4542
+ log.info(`Done — ${this._postCount} post(s), ${this._fileCount} file(s) downloaded (${formatBytes(this._downloadedBytes)})`);
4543
+ if (this._skippedCount > 0) log.info(` ${this._skippedCount} file(s) skipped (already archived)`);
4544
+ }
4545
+ _buildDirPath(meta) {
4546
+ return `${meta.category ?? this.extractor.category}/${meta.username ?? "_"}`;
4547
+ }
4548
+ _buildFilename(meta) {
4549
+ const mid = meta.media_id ?? "0";
4550
+ const ext = meta.extension ?? "jpg";
4551
+ return `${mid}${meta.num ? `_${meta.num}` : ""}.${ext}`;
4552
+ }
4553
+ };
4554
+ //#endregion
4555
+ //#region src/core/print-job.ts
4556
+ var PrintJob = class PrintJob extends Job {
4557
+ _currentDir = {};
4558
+ _files = [];
4559
+ _postCount = 0;
4560
+ _fileCount = 0;
4561
+ _width;
4562
+ constructor(extractor) {
4563
+ super(extractor);
4564
+ this._width = Math.min(process.stdout.columns ?? 80, 100);
4756
4565
  }
4757
- async *posts() {}
4758
- };
4759
- var InstagramAvatarExtractor = class InstagramAvatarExtractor extends InstagramExtractor {
4760
- static subcategory = "avatar";
4761
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/avatar/);
4762
- subcategory = InstagramAvatarExtractor.subcategory;
4763
- constructor(opts) {
4764
- super(opts);
4566
+ async handleDirectory(msg) {
4567
+ if (this._postCount > 0) this._flushPost();
4568
+ this._currentDir = { ...msg.metadata };
4569
+ this._postCount++;
4570
+ this._files = [];
4765
4571
  }
4766
- static fromURL(url, opts) {
4767
- const match = InstagramAvatarExtractor.pattern.exec(url);
4768
- if (!match) return null;
4769
- return new InstagramAvatarExtractor({
4770
- ...opts,
4771
- url,
4772
- match
4572
+ async handleUrl(msg) {
4573
+ const meta = {
4574
+ ...this._currentDir,
4575
+ ...msg.metadata
4576
+ };
4577
+ this._fileCount++;
4578
+ const ext = meta.extension ?? "jpg";
4579
+ const mid = meta.media_id ?? "?";
4580
+ this._files.push({
4581
+ num: meta.num ?? this._files.length + 1,
4582
+ filename: `${mid}.${ext}`,
4583
+ width: meta.width ?? 0,
4584
+ height: meta.height ?? 0,
4585
+ videoUrl: meta.video_url ?? null,
4586
+ audioUrl: meta.audio_url ?? null,
4587
+ audioTitle: meta.audio_title ?? void 0,
4588
+ audioArtist: meta.audio_artist ?? void 0,
4589
+ audioDuration: meta.audio_duration ?? void 0,
4590
+ audioHasLyrics: meta.audio_has_lyrics ?? void 0,
4591
+ audioIsExplicit: meta.audio_is_explicit ?? void 0,
4592
+ coverArtworkUri: meta.audio_cover_artwork_uri ?? meta.audio_cover_artwork_thumbnail_uri ?? void 0
4773
4593
  });
4774
4594
  }
4775
- async *posts() {
4776
- const screenName = (this.groups[0] ?? "").replace(/^\//, "");
4777
- let user;
4778
- if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
4779
- else user = await this.api.userByScreenName(screenName);
4780
- const avatar = user.hd_profile_pic_url_info ?? user.hd_profile_pic_versions?.[user.hd_profile_pic_versions.length - 1] ?? {
4781
- url: user.profile_pic_url ?? "",
4782
- width: 0,
4783
- height: 0
4595
+ async handleQueue(msg) {
4596
+ if (this._files.length > 0 || this._postCount > 0) this._flushPost();
4597
+ this._postCount = 0;
4598
+ this._files = [];
4599
+ const extrClass = {
4600
+ ...this._currentDir,
4601
+ ...msg.metadata
4602
+ }._extractor;
4603
+ if (!extrClass || typeof extrClass !== "object") return;
4604
+ const cls = extrClass;
4605
+ const match = cls.pattern.exec(msg.url);
4606
+ if (!match) return;
4607
+ const parentExtr = this.extractor;
4608
+ const childJob = new PrintJob(Reflect.construct(cls, [{
4609
+ url: msg.url,
4610
+ match,
4611
+ config: parentExtr.config,
4612
+ http: parentExtr.http,
4613
+ storage: parentExtr.storage,
4614
+ log: parentExtr.log
4615
+ }]));
4616
+ const childStatus = await childJob.run();
4617
+ this.status |= childStatus;
4618
+ this._postCount += childJob._postCount;
4619
+ this._fileCount += childJob._fileCount;
4620
+ }
4621
+ _flushPost() {
4622
+ const m = this._currentDir;
4623
+ if (Object.keys(m).length === 0) return;
4624
+ const w = this._width;
4625
+ const labelW = 14;
4626
+ const shortcode = m.post_shortcode ?? "?";
4627
+ const header = ` Post #${this._postCount}: ${shortcode} `;
4628
+ const padTotal = w - 2 - header.length;
4629
+ const padL = Math.floor(padTotal / 2);
4630
+ const padR = padTotal - padL;
4631
+ process.stdout.write(`\n${dim("┌")}${"─".repeat(padL)}${b(header)}${"─".repeat(padR)}${dim("┐")}\n`);
4632
+ const row = (label, value, color) => {
4633
+ const colored = typeof color === "function" ? color(value) : color ? `${color}${value}${_RESET}` : value;
4634
+ process.stdout.write(` ${dim("│")} ${c(pad(label, labelW))} ${colored}\n`);
4784
4635
  };
4785
- let pk = user.profile_pic_id?.split("_")[0];
4786
- let code;
4787
- if (pk) code = shortcodeFromId(pk);
4788
- else {
4789
- pk = `avatar:${user.pk}`;
4790
- code = pk;
4636
+ const username = m.username ?? "?";
4637
+ const fullname = m.fullname ?? "";
4638
+ row("Author:", fullname ? `${username} (${fullname})` : username, g);
4639
+ row("Date:", m.date ?? m.post_date ?? "?");
4640
+ row("Likes:", `${typeof m.likes === "number" ? m.likes.toLocaleString() : "?"} | Liked: ${m.liked ? "yes" : "no"}`);
4641
+ row("Type:", `${m.type ?? "?"} (${this._files.length} files)`);
4642
+ row("URL:", m.post_url ?? "?");
4643
+ const desc = m.description ?? "";
4644
+ if (desc) {
4645
+ process.stdout.write(` ${dim("│")}\n`);
4646
+ process.stdout.write(` ${dim("│")} ${b("Description:")}\n`);
4647
+ for (const line of desc.split("\n")) for (const wl of this._wrap(line, w - 8)) process.stdout.write(` ${dim("│")} ${dim(wl)}\n`);
4791
4648
  }
4792
- yield {
4793
- pk,
4794
- code,
4795
- user,
4796
- caption: null,
4797
- like_count: 0,
4798
- image_versions2: { candidates: [avatar] }
4799
- };
4800
- }
4801
- };
4802
- var InstagramSavedExtractor = class InstagramSavedExtractor extends InstagramExtractor {
4803
- static subcategory = "saved";
4804
- static pattern = re(BASE_RE, /(\/[^/?#]+)\/saved(?:\/all-posts)?\/?$/);
4805
- subcategory = InstagramSavedExtractor.subcategory;
4806
- constructor(opts) {
4807
- super(opts);
4649
+ const tags = m.tags;
4650
+ if (tags && tags.length > 0) {
4651
+ process.stdout.write(` ${dim("│")}\n`);
4652
+ process.stdout.write(` ${dim("│")} ${b("Tags:")} ${dim(tags.map((t) => `#${t}`).join(" "))}\n`);
4653
+ }
4654
+ const locName = m.location_slug ?? "";
4655
+ const locId = m.location_id ?? "";
4656
+ if (locName || locId) row("Location:", locId ? `${locName} (ID: ${locId})` : locName);
4657
+ const coauthors = m.coauthors;
4658
+ if (coauthors && coauthors.length > 0) row("Co-authors:", coauthors.map((c) => c.full_name ? `${c.username} (${c.full_name})` : c.username).join(", "));
4659
+ const pinned = m.pinned;
4660
+ if (pinned && pinned.length > 0) row("Pinned:", pinned.join(", "));
4661
+ const expires = m.expires;
4662
+ if (expires) row("Expires:", expires, _YELLOW);
4663
+ const hlTitle = m.highlight_title;
4664
+ if (hlTitle) row("Highlight:", hlTitle);
4665
+ const taggedUser = m.tagged_username ?? "";
4666
+ if (taggedUser) {
4667
+ const taggedFull = m.tagged_full_name ?? "";
4668
+ row("Tagged by:", taggedFull ? `${taggedUser} (${taggedFull})` : taggedUser);
4669
+ }
4670
+ if (this._files.length > 0) {
4671
+ process.stdout.write(` ${dim("│")}\n`);
4672
+ process.stdout.write(` ${dim("│")} ${b(`Media (${this._files.length} files):`)}\n`);
4673
+ const maxNumW = String(this._files.length).length;
4674
+ const maxFileW = Math.max(...this._files.map((f) => f.filename.length));
4675
+ const dimW = Math.min(maxFileW, 40);
4676
+ for (const f of this._files) {
4677
+ const numStr = `[${String(f.num).padStart(maxNumW)}]`;
4678
+ const dimStr = f.filename.length > 40 ? `${f.filename.slice(0, 37)}...` : pad(f.filename, dimW);
4679
+ const res = f.width ? `${f.width}x${f.height}` : "?x?";
4680
+ const badges = [];
4681
+ if (f.videoUrl) badges.push("video");
4682
+ if (f.audioUrl) badges.push("audio");
4683
+ let line = ` ${dim("│")} ${g(numStr)} ${dimStr} ${res}`;
4684
+ if (badges.length > 0) line += ` ${_YELLOW}(${badges.join("+")})${_RESET}`;
4685
+ process.stdout.write(`${line}\n`);
4686
+ }
4687
+ }
4688
+ const audioFiles = this._files.filter((f) => f.audioUrl);
4689
+ if (audioFiles.length > 0) {
4690
+ process.stdout.write(` ${dim("│")}\n`);
4691
+ process.stdout.write(` ${dim("│")} ${b("Music:")}\n`);
4692
+ for (const af of audioFiles) {
4693
+ if (af.audioTitle) {
4694
+ const title = af.audioArtist ? `${af.audioTitle} — ${af.audioArtist}` : af.audioTitle;
4695
+ process.stdout.write(` ${dim("│")} ${g("♪")} ${title}\n`);
4696
+ }
4697
+ if (af.audioDuration) {
4698
+ const mins = Math.floor(af.audioDuration / 60);
4699
+ const secs = Math.round(af.audioDuration % 60);
4700
+ const badges = [`${mins}:${String(secs).padStart(2, "0")}`];
4701
+ if (af.audioHasLyrics) badges.push("lyrics");
4702
+ if (af.audioIsExplicit) badges.push(`${_YELLOW}explicit${_RESET}`);
4703
+ process.stdout.write(` ${dim("│")} ${dim(badges.join(" · "))}\n`);
4704
+ }
4705
+ if (af.coverArtworkUri) process.stdout.write(` ${dim("│")} ${dim("Art:")} ${dim(`${af.coverArtworkUri.slice(0, 60)}…`)}\n`);
4706
+ }
4707
+ }
4708
+ process.stdout.write(` ${dim("└")}${"─".repeat(w - 2)}${dim("┘")}\n`);
4808
4709
  }
4809
- static fromURL(url, opts) {
4810
- const match = InstagramSavedExtractor.pattern.exec(url);
4811
- if (!match) return null;
4812
- return new InstagramSavedExtractor({
4813
- ...opts,
4814
- url,
4815
- match
4816
- });
4710
+ _wrap(text, maxLen) {
4711
+ if (text.length <= maxLen) return [text];
4712
+ const lines = [];
4713
+ let remaining = text;
4714
+ while (remaining.length > maxLen) {
4715
+ let cut = maxLen;
4716
+ while (cut > 0 && remaining[cut] !== " ") cut--;
4717
+ if (cut === 0) cut = maxLen;
4718
+ lines.push(remaining.slice(0, cut).trimEnd());
4719
+ remaining = remaining.slice(cut).trimStart();
4720
+ }
4721
+ if (remaining) lines.push(remaining);
4722
+ return lines;
4817
4723
  }
4818
- async *posts() {
4819
- yield* this.api.userSaved();
4724
+ _report() {
4725
+ this._flushPost();
4726
+ process.stdout.write(`\n${dim("──")} ${b("Summary")} ${dim("───")}\n`);
4727
+ process.stdout.write(` Posts: ${g(String(this._postCount))}\n`);
4728
+ process.stdout.write(` Files: ${g(String(this._fileCount))}\n`);
4729
+ process.stdout.write(`\n`);
4820
4730
  }
4821
4731
  };
4822
4732
  //#endregion
4733
+ //#region src/fetcher.ts
4734
+ var fetcher_exports = /* @__PURE__ */ __exportAll({
4735
+ buildUrl: () => buildUrl,
4736
+ createFetchHttpClient: () => createFetchHttpClient,
4737
+ extractCsrf: () => extractCsrf,
4738
+ headersToRecord: () => headersToRecord,
4739
+ mergeCookie: () => mergeCookie,
4740
+ readBody: () => readBody,
4741
+ serializeBody: () => serializeBody
4742
+ });
4743
+ /** Build URL with query params appended as URLSearchParams. */
4744
+ function buildUrl(base, params) {
4745
+ if (!params) return base;
4746
+ const cleaned = {};
4747
+ for (const [k, v] of Object.entries(params)) if (v != null) cleaned[k] = String(v);
4748
+ const entries = Object.entries(cleaned);
4749
+ if (entries.length === 0) return base;
4750
+ const qs = new URLSearchParams(entries).toString();
4751
+ return `${base}${base.includes("?") ? "&" : "?"}${qs}`;
4752
+ }
4753
+ /** Merge cookie strings with append semantics: a=1 + b=2 → a=1; b=2 */
4754
+ function mergeCookie(base, extra) {
4755
+ if (!base) return extra;
4756
+ return `${base}; ${extra}`;
4757
+ }
4758
+ /** Extract csrftoken value from a Cookie header string. */
4759
+ function extractCsrf(cookies) {
4760
+ return cookies.match(/(?:^|;\s*)csrftoken=([^;]+)/)?.[1] ?? "";
4761
+ }
4762
+ /** Convert fetch Headers to a plain Record. */
4763
+ function headersToRecord(headers) {
4764
+ const rec = {};
4765
+ headers.forEach((v, k) => {
4766
+ rec[k] = v;
4767
+ });
4768
+ return rec;
4769
+ }
4770
+ /** Read response body according to the requested type. */
4771
+ async function readBody(resp, responseType) {
4772
+ switch (responseType) {
4773
+ case "arraybuffer": {
4774
+ const buf = await resp.arrayBuffer();
4775
+ return Buffer.from(buf);
4776
+ }
4777
+ case "text": return resp.text();
4778
+ default: return resp.json();
4779
+ }
4780
+ }
4781
+ /** Serialize a request body value for fetch. */
4782
+ function serializeBody(data) {
4783
+ if (data == null) return void 0;
4784
+ if (typeof data === "string") return data;
4785
+ if (data instanceof URLSearchParams) return data;
4786
+ return JSON.stringify(data);
4787
+ }
4788
+ const UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
4789
+ /**
4790
+ * Create a platform-agnostic HttpClient backed by native ``fetch``.
4791
+ *
4792
+ * Zero dependencies — works in Node.js 18+, browsers, Deno, and Edge.
4793
+ *
4794
+ * @example Plain (no cookies)
4795
+ * ```ts
4796
+ * const http = createFetchHttpClient()
4797
+ * ```
4798
+ *
4799
+ * @example With static cookies (CLI session mode)
4800
+ * ```ts
4801
+ * const http = createFetchHttpClient({ cookie: 'sessionid=abc; csrftoken=xyz' })
4802
+ * ```
4803
+ *
4804
+ * @example With cookie jar (anonymous session)
4805
+ * ```ts
4806
+ * const jar = createCookieJar()
4807
+ * const http = createFetchHttpClient({
4808
+ * cookieProvider: () => jar.getCookieHeader(),
4809
+ * onResponse: (headers) => jar.setFromResponse(headers),
4810
+ * })
4811
+ * ```
4812
+ */
4813
+ function createFetchHttpClient(opts = {}) {
4814
+ const { cookie, cookieProvider, userAgent = UA, timeout = 3e4, onResponse } = opts;
4815
+ return { async request(config) {
4816
+ const method = config.method ?? "GET";
4817
+ const url = buildUrl(config.url, config.params);
4818
+ const headers = new Headers(config.headers);
4819
+ const reqCookie = cookieProvider?.() ?? cookie;
4820
+ if (reqCookie) {
4821
+ const existing = headers.get("Cookie");
4822
+ headers.set("Cookie", existing ? mergeCookie(reqCookie, existing) : reqCookie);
4823
+ }
4824
+ if (!headers.has("User-Agent")) headers.set("User-Agent", userAgent);
4825
+ const body = serializeBody(config.data);
4826
+ if (typeof body === "string" && !headers.has("Content-Type")) headers.set("Content-Type", "application/json");
4827
+ let controller = null;
4828
+ let timer = null;
4829
+ let signal = config.signal ?? null;
4830
+ const timeoutMs = config.timeout ?? timeout;
4831
+ if (!signal) {
4832
+ controller = new AbortController();
4833
+ timer = setTimeout(() => controller.abort(), timeoutMs);
4834
+ signal = controller.signal;
4835
+ }
4836
+ try {
4837
+ const resp = await fetch(url, {
4838
+ method,
4839
+ headers,
4840
+ body,
4841
+ signal
4842
+ });
4843
+ onResponse?.(headersToRecord(resp.headers));
4844
+ const data = await readBody(resp, config.responseType);
4845
+ return {
4846
+ status: resp.status,
4847
+ data,
4848
+ headers: headersToRecord(resp.headers),
4849
+ url: resp.url
4850
+ };
4851
+ } catch (err) {
4852
+ if (controller?.signal.aborted && !config.signal?.aborted) throw new Error(`Request timeout after ${timeoutMs}ms: ${url}`);
4853
+ if (String(err).includes("too many redirect")) throw new Error("Too many redirects — session may be expired or invalid. Export a fresh session from your browser.");
4854
+ throw err;
4855
+ } finally {
4856
+ if (timer) clearTimeout(timer);
4857
+ }
4858
+ } };
4859
+ }
4860
+ //#endregion
4823
4861
  //#region src/cli/cookies.ts
4824
4862
  function createCookieJar() {
4825
4863
  const cookies = /* @__PURE__ */ new Map();
@@ -4849,125 +4887,43 @@ function createCookieJar() {
4849
4887
  }
4850
4888
  //#endregion
4851
4889
  //#region src/cli/adapter.ts
4852
- /** NodeHttpClient — axios wrapper */
4853
4890
  /**
4854
- * Extract csrftoken value from a Cookie header string.
4891
+ * Create an HttpClient with a static cookie string (sessionid cookie).
4892
+ *
4893
+ * Used by the CLI when ``--cookies`` or ``--sessionid`` is provided.
4855
4894
  */
4856
- function extractCsrfFromCookies(cookies) {
4857
- return cookies.match(/(?:^|;\s*)csrftoken=([^;]+)/)?.[1] ?? "";
4858
- }
4859
- function createHttpClient(sessionId, fullCookies, logger) {
4860
- const instance = axios.create({
4861
- timeout: 3e4,
4862
- maxRedirects: 20,
4863
- validateStatus: () => true,
4864
- headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" }
4865
- });
4866
- const baseCookie = fullCookies || (sessionId ? `sessionid=${sessionId}` : null);
4867
- return { async request(config) {
4868
- const method = config.method ?? "GET";
4869
- logger?.debug(`${method} ${config.url}`);
4870
- const mergedHeaders = {};
4871
- if (baseCookie) mergedHeaders.Cookie = baseCookie;
4872
- if (config.headers) for (const [k, v] of Object.entries(config.headers)) if (k.toLowerCase() === "cookie" && mergedHeaders.Cookie) mergedHeaders.Cookie = `${mergedHeaders.Cookie}; ${v}`;
4873
- else mergedHeaders[k] = v;
4874
- if (mergedHeaders.Cookie) logger?.debug(` Cookie: ${mergedHeaders.Cookie.slice(0, 200)}`);
4875
- try {
4876
- const resp = await instance.request({
4877
- url: config.url,
4878
- method,
4879
- headers: mergedHeaders,
4880
- params: cleanupParams(config.params),
4881
- data: config.data,
4882
- signal: config.signal,
4883
- timeout: config.timeout,
4884
- responseType: config.responseType ?? "json"
4885
- });
4886
- const finalUrl = resp.request?.res?.responseUrl ?? config.url;
4887
- logger?.debug(` ← ${resp.status} ${resp.status >= 400 ? "⚠️" : ""} (${finalUrl.slice(0, 100)})`);
4888
- return {
4889
- status: resp.status,
4890
- data: resp.data,
4891
- headers: resp.headers,
4892
- url: finalUrl
4893
- };
4894
- } catch (err) {
4895
- const msg = String(err);
4896
- if (msg.includes("TOO_MANY_REDIRECTS") || msg.includes("too many redirects")) throw new Error("Too many redirects — sessionid may be expired or invalid. Export a fresh sessionid from your browser.");
4897
- throw err;
4898
- }
4899
- } };
4895
+ function createHttpClient(sessionId, fullCookies, _logger) {
4896
+ return createFetchHttpClient({ cookie: (fullCookies || (sessionId ? `sessionid=${sessionId}` : null)) ?? void 0 });
4900
4897
  }
4901
- /** WebClient — anonymous cookie-jar HTTP client */
4902
4898
  /**
4903
- * Create an HTTP client with an in-memory cookie jar.
4899
+ * Create an HTTP client backed by an in-memory cookie jar.
4904
4900
  *
4905
- * Use this when you don't have a sessionid — the client first seeds its
4906
- * cookie jar by visiting ``instagram.com``, then uses those anonymous
4907
- * cookies for subsequent API calls. This is how incognito browsing works.
4901
+ * Seeds cookies by visiting instagram.com first, then uses those
4902
+ * anonymous cookies for subsequent API calls (like incognito browsing).
4908
4903
  *
4909
4904
  * Returns the client + the initial CSRF token extracted from cookies.
4910
4905
  */
4911
4906
  async function createWebClient(logger) {
4912
4907
  const jar = createCookieJar();
4913
4908
  logger?.info("Seeding anonymous session (visiting instagram.com)…");
4914
- const seedResp = await axios.get("https://www.instagram.com/", {
4915
- headers: {
4916
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
4917
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
4918
- },
4919
- maxRedirects: 20,
4920
- validateStatus: () => true
4921
- });
4922
- jar.setFromResponse(seedResp.headers);
4923
- logger?.debug(` ← ${seedResp.status} — got ${jar.getCookieHeader().split(";").length} cookies`);
4909
+ const seedResp = await fetch("https://www.instagram.com/", { headers: {
4910
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
4911
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
4912
+ } });
4913
+ const { headersToRecord } = await Promise.resolve().then(() => fetcher_exports);
4914
+ jar.setFromResponse(headersToRecord(seedResp.headers));
4915
+ const cookieCount = jar.getCookieHeader().split(";").length;
4916
+ logger?.debug(` ← ${seedResp.status} — got ${cookieCount} cookies`);
4917
+ const csrfToken = extractCsrf(jar.getCookieHeader());
4924
4918
  return {
4925
- http: { async request(config) {
4926
- const method = config.method ?? "GET";
4927
- logger?.debug(`${method} ${config.url}`);
4928
- const jarCookie = jar.getCookieHeader();
4929
- const mergedHeaders = {};
4930
- if (jarCookie) mergedHeaders.Cookie = jarCookie;
4931
- if (config.headers) for (const [k, v] of Object.entries(config.headers)) if (k.toLowerCase() === "cookie" && mergedHeaders.Cookie) mergedHeaders.Cookie = `${mergedHeaders.Cookie}; ${v}`;
4932
- else mergedHeaders[k] = v;
4933
- try {
4934
- const resp = await axios.request({
4935
- url: config.url,
4936
- method,
4937
- headers: mergedHeaders,
4938
- params: cleanupParams(config.params),
4939
- data: config.data,
4940
- signal: config.signal,
4941
- timeout: config.timeout ?? 3e4,
4942
- maxRedirects: 20,
4943
- validateStatus: () => true,
4944
- responseType: config.responseType ?? "json"
4945
- });
4946
- jar.setFromResponse(resp.headers);
4947
- const finalUrl = resp.request?.res?.responseUrl ?? config.url;
4948
- logger?.debug(` ← ${resp.status} ${resp.status >= 400 ? "⚠️" : ""} (${finalUrl.slice(0, 100)})`);
4949
- return {
4950
- status: resp.status,
4951
- data: resp.data,
4952
- headers: resp.headers,
4953
- url: finalUrl
4954
- };
4955
- } catch (err) {
4956
- const msg = String(err);
4957
- if (msg.includes("TOO_MANY_REDIRECTS") || msg.includes("too many redirects")) throw new Error("Too many redirects — Instagram may be blocking the request. Try again later or use --sessionid.");
4958
- throw err;
4959
- }
4960
- } },
4961
- csrfToken: jar.getCookieHeader().match(/(?:^|;\s*)csrftoken=([^;]+)/)?.[1] ?? ""
4919
+ http: createFetchHttpClient({
4920
+ cookieProvider: () => jar.getCookieHeader(),
4921
+ onResponse: (headers) => jar.setFromResponse(headers)
4922
+ }),
4923
+ csrfToken
4962
4924
  };
4963
4925
  }
4964
- function cleanupParams(params) {
4965
- if (!params) return void 0;
4966
- const cleaned = {};
4967
- for (const [k, v] of Object.entries(params)) if (v != null) cleaned[k] = String(v);
4968
- return cleaned;
4969
- }
4970
- /** NodeLogger — console wrapper */
4926
+ /** Node.js console-based logger. */
4971
4927
  function createLogger(verbose) {
4972
4928
  return {
4973
4929
  debug(message, ...args) {
@@ -5006,39 +4962,7 @@ function createStorage() {
5006
4962
  };
5007
4963
  }
5008
4964
  //#endregion
5009
- //#region src/cli/index.ts
5010
- /**
5011
- * gdl-instagram — CLI entry point.
5012
- *
5013
- * Usage:
5014
- * gdl-instagram <url> [options] ← auto-detect from URL
5015
- * gdl-instagram tag <hashtag> [options]
5016
- * gdl-instagram saved [options]
5017
- *
5018
- * Every option is self-documented via ``--help``.
5019
- */
5020
- function addSharedOptions(cmd) {
5021
- return cmd.option("--sessionid <cookie>", "Instagram sessionid cookie value (from browser)", process.env.INSTAGRAM_SESSIONID).option("--cookies <string>", "Full Cookie header string from browser (DevTools → Network → Request Headers → Cookie)", process.env.INSTAGRAM_COOKIES).option("-o, --output <dir>", "Output directory", "./data").option("--videos <mode>", "Download videos: true, false, or merged (yt-dlp)", "true").option("--previews <types>", "Download only previews: video,audio (comma-separated)").option("--audio", "Download standalone audio tracks", false).option("--max-posts <n>", "Maximum number of posts to download", Number.parseInt).option("--cursor <cursor>", "Resume from pagination cursor (see output of previous run)").option("--order-posts <order>", "Post ordering: asc, desc, id, id_asc, id_desc").option("--order-files <order>", "File ordering: r, d (reverse), or empty for natural order").option("--static-videos", "Download static video cover images instead of actual videos", false).option("--no-static-videos", "Do not force static video covers (download real videos)").option("--api <backend>", "API backend: rest (default) or graphql", "rest").option("-v, --verbose", "Verbose debug output", false).option("--include <list>", "For user: comma-separated sub-extractors (posts,reels,tagged,stories,highlights,info,avatar)", "posts").option("--split", "For stories: split each frame into a separate post", false).option("-i, --info", "Print structured post info to terminal (no download)", false);
5022
- }
5023
- /** Build config from parsed options */
5024
- function buildConfig(opts) {
5025
- const config = new ConfigManager();
5026
- const ig = {};
5027
- if (opts.videos) ig.videos = opts.videos;
5028
- if (opts.previews) ig.previews = opts.previews.split(",");
5029
- if (opts.audio) ig.audio = true;
5030
- if (opts.maxPosts) ig["max-posts"] = opts.maxPosts;
5031
- if (opts.cursor) ig.cursor = opts.cursor;
5032
- if (opts.orderPosts) ig["order-posts"] = opts.orderPosts;
5033
- if (opts.orderFiles) ig["order-files"] = opts.orderFiles;
5034
- if (opts.staticVideos) ig["static-videos"] = true;
5035
- if (opts.api) ig.api = opts.api;
5036
- if (opts.include) ig.include = opts.include;
5037
- if (opts.split) ig.split = true;
5038
- if (Object.keys(ig).length > 0) config.set("extractor.instagram", ig);
5039
- return config;
5040
- }
5041
- /** Auto-detect the right extractor for a URL */
4965
+ //#region src/cli/runner.ts
5042
4966
  function resolveExtractor(url) {
5043
4967
  for (const Cls of [
5044
4968
  InstagramPostExtractor,
@@ -5050,7 +4974,6 @@ function resolveExtractor(url) {
5050
4974
  ]) if (Cls.pattern.test(url)) return Cls;
5051
4975
  throw new Error(`No extractor matched URL: ${url}. Supported: /p/, /reel/, /{user}/, /stories/, /highlights/, /explore/tags/, /saved/`);
5052
4976
  }
5053
- /** Run an extractor */
5054
4977
  async function runExtractor(url, extrClass, opts) {
5055
4978
  const config = buildConfig(opts);
5056
4979
  const log = createLogger(opts.verbose ?? false);
@@ -5058,7 +4981,7 @@ async function runExtractor(url, extrClass, opts) {
5058
4981
  let webCsrf;
5059
4982
  if (opts.cookies) {
5060
4983
  http = createHttpClient(void 0, opts.cookies, log);
5061
- webCsrf = extractCsrfFromCookies(opts.cookies);
4984
+ webCsrf = extractCsrf(opts.cookies);
5062
4985
  } else if (opts.sessionid) http = createHttpClient(opts.sessionid, void 0, log);
5063
4986
  else {
5064
4987
  const wc = await createWebClient(log);
@@ -5107,9 +5030,10 @@ async function runExtractor(url, extrClass, opts) {
5107
5030
  process.exit(1);
5108
5031
  }
5109
5032
  }
5110
- /** Program */
5033
+ //#endregion
5034
+ //#region src/cli/index.ts
5111
5035
  const program = new Command();
5112
- program.name("gdl-instagram").description("Download images and videos from Instagram.\n\nUses gallery-dl's extraction pipeline — supports posts, reels,\nstories, highlights, tagged posts, saved collections, and more.\n\nRequires a sessionid cookie exported from your browser.\nSet via --sessionid or INSTAGRAM_SESSIONID environment variable.").version("0.1.0");
5036
+ program.name("gdl-instagram").description("Download images and videos from Instagram.\n\nUses gallery-dl's extraction pipeline — supports posts, reels,\nstories, highlights, tagged posts, saved collections, and more.\n\nRequires a sessionid cookie exported from your browser.\nSet via --sessionid or INSTAGRAM_SESSIONID environment variable.").version(version);
5113
5037
  addSharedOptions(program.command("dl", { isDefault: true }).argument("[url]", "Instagram URL to download (auto-detects type)").description("Download media from an Instagram URL (auto-detects post/user/stories/…)\n\nExamples:\n gdl-instagram https://www.instagram.com/p/CxAbCdEfGh/\n gdl-instagram https://www.instagram.com/username/ --include=posts,reels\n gdl-instagram https://www.instagram.com/stories/username/").action(async (url, opts) => {
5114
5038
  if (!url) {
5115
5039
  program.help();
@@ -5123,7 +5047,6 @@ addSharedOptions(program.command("tag <hashtag>").description("Download posts fr
5123
5047
  addSharedOptions(program.command("saved").description("Download your saved (bookmarked) posts\n\nRequires authentication via --sessionid.\n\nExamples:\n gdl-instagram saved --sessionid=abc123").action(async (opts) => {
5124
5048
  await runExtractor("https://www.instagram.com/me/saved/", InstagramSavedExtractor, opts);
5125
5049
  }));
5126
- /** parse */
5127
5050
  program.parse();
5128
5051
  //#endregion
5129
5052
  export {};