@chilfish/gallery-dl-instagram 0.1.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +340 -0
- package/README.md +134 -0
- package/dist/dl-ins.mjs +5014 -0
- package/dist/index.cjs +192 -0
- package/dist/{sdk-B9fRyc1e.d.mts → index.d.cts} +189 -309
- package/dist/index.d.mts +502 -72
- package/dist/index.mjs +153 -39
- package/dist/node.cjs +41 -0
- package/dist/node.d.cts +47 -0
- package/dist/node.d.mts +47 -0
- package/dist/node.mjs +40 -0
- package/dist/sdk-BClg0Kv2.cjs +2268 -0
- package/dist/{extractors-Byw-2lPL.mjs → sdk-CovBsEps.mjs} +720 -670
- package/dist/sdk-DyZz22bT.d.cts +262 -0
- package/dist/sdk-DyZz22bT.d.mts +262 -0
- package/dist/storage-77hqz5Fi.mjs +24 -0
- package/dist/storage-BwGaT6XO.cjs +24 -0
- package/package.json +26 -26
- package/cli/adapter.ts +0 -284
- package/cli/cookies.ts +0 -59
- package/cli/index.ts +0 -337
- package/config.ts +0 -80
- package/core/extractor.ts +0 -217
- package/core/job.ts +0 -581
- package/dist/adapter-Bt86eL1R.mjs +0 -189
- package/dist/cli/index.d.mts +0 -1
- package/dist/cli/index.mjs +0 -3160
- package/dist/sdk.d.mts +0 -2
- package/dist/sdk.mjs +0 -93
- package/index.ts +0 -159
- package/instagram/api.ts +0 -531
- package/instagram/base.ts +0 -275
- package/instagram/extractors.ts +0 -521
- package/instagram/index.ts +0 -43
- package/instagram/parsers.ts +0 -583
- package/instagram/types.ts +0 -244
- package/message.ts +0 -31
- package/types.ts +0 -115
- package/utils/id-codec.ts +0 -39
- package/utils/text.ts +0 -178
|
@@ -1,126 +1,4 @@
|
|
|
1
|
-
//#region
|
|
2
|
-
/** A no-op logger */
|
|
3
|
-
const noopLogger = {
|
|
4
|
-
debug: () => {},
|
|
5
|
-
info: () => {},
|
|
6
|
-
warn: () => {},
|
|
7
|
-
error: () => {}
|
|
8
|
-
};
|
|
9
|
-
var Extractor = class {
|
|
10
|
-
/** Regex pattern to match against URLs */
|
|
11
|
-
static pattern = /^$/;
|
|
12
|
-
/** The input URL */
|
|
13
|
-
url;
|
|
14
|
-
/** Regex match groups from ``fromURL`` */
|
|
15
|
-
groups;
|
|
16
|
-
config;
|
|
17
|
-
/** HTTP client — public so Job can access for downloads */
|
|
18
|
-
http;
|
|
19
|
-
/** Storage backend — public so Job can access for writes */
|
|
20
|
-
storage;
|
|
21
|
-
/** Logger instance — public so Job can access for reporting */
|
|
22
|
-
log;
|
|
23
|
-
/** Delay range in seconds — random between [min, max] before each request */
|
|
24
|
-
requestInterval = [6, 12];
|
|
25
|
-
_initialized = false;
|
|
26
|
-
constructor(opts) {
|
|
27
|
-
this.url = opts.url;
|
|
28
|
-
this.groups = opts.match ? [...opts.match].slice(1) : [];
|
|
29
|
-
this.config = opts.config;
|
|
30
|
-
this.http = opts.http;
|
|
31
|
-
this.storage = opts.storage;
|
|
32
|
-
this.log = opts.log;
|
|
33
|
-
}
|
|
34
|
-
/** Initialization */
|
|
35
|
-
/**
|
|
36
|
-
* One-time async setup (cookies, session, internal state).
|
|
37
|
-
* Safe to call multiple times — after the first call it becomes a no-op.
|
|
38
|
-
*/
|
|
39
|
-
async initialize() {
|
|
40
|
-
if (this._initialized) return;
|
|
41
|
-
await this._init();
|
|
42
|
-
this._initialized = true;
|
|
43
|
-
this.initialize = async () => {};
|
|
44
|
-
}
|
|
45
|
-
/**
|
|
46
|
-
* Subclass hook for one-time setup.
|
|
47
|
-
*/
|
|
48
|
-
async _init() {}
|
|
49
|
-
/** Async iteration */
|
|
50
|
-
async *[Symbol.asyncIterator]() {
|
|
51
|
-
await this.initialize();
|
|
52
|
-
yield* this.items();
|
|
53
|
-
}
|
|
54
|
-
/** Config helpers */
|
|
55
|
-
/**
|
|
56
|
-
* Read a config value using the interpolated hierarchy.
|
|
57
|
-
*/
|
|
58
|
-
_cfg(key, defaultVal) {
|
|
59
|
-
const path = [
|
|
60
|
-
"extractor",
|
|
61
|
-
this.category,
|
|
62
|
-
this.subcategory
|
|
63
|
-
];
|
|
64
|
-
return this.config.interpolate(path, key, defaultVal);
|
|
65
|
-
}
|
|
66
|
-
/** HTTP */
|
|
67
|
-
_lastRequestTime = 0;
|
|
68
|
-
/**
|
|
69
|
-
* Rate-limited HTTP request wrapper.
|
|
70
|
-
*/
|
|
71
|
-
async request(url, cfg = {}) {
|
|
72
|
-
await this._throttle();
|
|
73
|
-
const response = await this.http.request({
|
|
74
|
-
url,
|
|
75
|
-
...cfg
|
|
76
|
-
});
|
|
77
|
-
this._lastRequestTime = Date.now();
|
|
78
|
-
return response;
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* Convenience: request + parse JSON body.
|
|
82
|
-
*/
|
|
83
|
-
async requestJSON(url, cfg = {}) {
|
|
84
|
-
const resp = await this.request(url, cfg);
|
|
85
|
-
if (typeof resp.data === "object") return resp.data;
|
|
86
|
-
try {
|
|
87
|
-
return JSON.parse(resp.data);
|
|
88
|
-
} catch {
|
|
89
|
-
return {};
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
/** Rate limiting */
|
|
93
|
-
/**
|
|
94
|
-
* Sleep long enough to keep the minimum interval between requests.
|
|
95
|
-
*/
|
|
96
|
-
async _throttle() {
|
|
97
|
-
const elapsed = Date.now() - this._lastRequestTime;
|
|
98
|
-
const [min, max] = this.requestInterval;
|
|
99
|
-
const target = min + Math.random() * (max - min);
|
|
100
|
-
const waitMs = Math.max(0, target * 1e3 - elapsed);
|
|
101
|
-
if (waitMs > 0) await new Promise((r) => setTimeout(r, waitMs));
|
|
102
|
-
}
|
|
103
|
-
/** Utility */
|
|
104
|
-
/**
|
|
105
|
-
* Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
|
|
106
|
-
*/
|
|
107
|
-
parseTimestamp(ts) {
|
|
108
|
-
if (ts == null) return "";
|
|
109
|
-
const asMs = ts > 25e8 ? ts : ts * 1e3;
|
|
110
|
-
return new Date(asMs).toISOString();
|
|
111
|
-
}
|
|
112
|
-
/**
|
|
113
|
-
* Generate a random hex token (used for CSRF).
|
|
114
|
-
*/
|
|
115
|
-
static generateToken(size = 16) {
|
|
116
|
-
const bytes = new Uint8Array(size);
|
|
117
|
-
if (typeof crypto !== "undefined" && crypto.getRandomValues) crypto.getRandomValues(bytes);
|
|
118
|
-
else for (let i = 0; i < size; i++) bytes[i] = Math.floor(Math.random() * 256);
|
|
119
|
-
return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
|
|
120
|
-
}
|
|
121
|
-
};
|
|
122
|
-
//#endregion
|
|
123
|
-
//#region config.ts
|
|
1
|
+
//#region src/config.ts
|
|
124
2
|
var ConfigManager = class {
|
|
125
3
|
data;
|
|
126
4
|
constructor(data = {}) {
|
|
@@ -174,7 +52,8 @@ var ConfigManager = class {
|
|
|
174
52
|
}
|
|
175
53
|
};
|
|
176
54
|
//#endregion
|
|
177
|
-
//#region core/
|
|
55
|
+
//#region src/core/format.ts
|
|
56
|
+
/** Shared ANSI formatting and display utilities. */
|
|
178
57
|
function formatBytes(bytes) {
|
|
179
58
|
if (bytes === 0) return "0 B";
|
|
180
59
|
const units = [
|
|
@@ -204,19 +83,20 @@ function c(s) {
|
|
|
204
83
|
function g(s) {
|
|
205
84
|
return `${GREEN}${s}${RESET}`;
|
|
206
85
|
}
|
|
86
|
+
const _YELLOW = YELLOW;
|
|
87
|
+
const _RESET = RESET;
|
|
207
88
|
function pad(s, n) {
|
|
208
89
|
return s.length >= n ? s : s + " ".repeat(n - s.length);
|
|
209
90
|
}
|
|
91
|
+
//#endregion
|
|
92
|
+
//#region src/core/job.ts
|
|
210
93
|
var Job = class {
|
|
211
94
|
extractor;
|
|
212
95
|
status = 0;
|
|
213
96
|
constructor(extractor) {
|
|
214
97
|
this.extractor = extractor;
|
|
215
98
|
}
|
|
216
|
-
/**
|
|
217
|
-
* Main entry point. Calls ``extractor[Symbol.asyncIterator]()`` and
|
|
218
|
-
* dispatches every yielded message.
|
|
219
|
-
*/
|
|
99
|
+
/** Main entry point. Dispatches every yielded message. */
|
|
220
100
|
async run() {
|
|
221
101
|
this.extractor.log.info(`Starting ${this.extractor.category}/${this.extractor.subcategory} — ${this.extractor.url}`);
|
|
222
102
|
await this.extractor.initialize();
|
|
@@ -237,6 +117,8 @@ var Job = class {
|
|
|
237
117
|
/** Override in subclasses to print a summary. */
|
|
238
118
|
_report() {}
|
|
239
119
|
};
|
|
120
|
+
//#endregion
|
|
121
|
+
//#region src/core/download-job.ts
|
|
240
122
|
var DownloadJob = class DownloadJob extends Job {
|
|
241
123
|
/** Base output directory (prepended to all paths). */
|
|
242
124
|
basePath = "";
|
|
@@ -244,10 +126,6 @@ var DownloadJob = class DownloadJob extends Job {
|
|
|
244
126
|
_currentDir = {};
|
|
245
127
|
/** In-memory archive keyed by archive format. */
|
|
246
128
|
archive = /* @__PURE__ */ new Map();
|
|
247
|
-
/**
|
|
248
|
-
* Registry of per-category "archive formats" — the key is formed
|
|
249
|
-
* by interpolating this format string over the metadata.
|
|
250
|
-
*/
|
|
251
129
|
_archiveFmts = /* @__PURE__ */ new Map();
|
|
252
130
|
_postCount = 0;
|
|
253
131
|
_fileCount = 0;
|
|
@@ -256,23 +134,18 @@ var DownloadJob = class DownloadJob extends Job {
|
|
|
256
134
|
registerArchive(category, format) {
|
|
257
135
|
this._archiveFmts.set(category, format);
|
|
258
136
|
}
|
|
259
|
-
/** Simple format-string interpolation for archive keys. */
|
|
260
137
|
_interp(fmt, meta) {
|
|
261
138
|
return fmt.replace(/\{(\w+)\}/g, (_, key) => {
|
|
262
139
|
const v = meta[key];
|
|
263
140
|
return v == null ? "" : String(v);
|
|
264
141
|
});
|
|
265
142
|
}
|
|
266
|
-
/** Check whether this URL has already been downloaded (and skip). */
|
|
267
143
|
_isArchived(meta) {
|
|
268
144
|
const cat = meta.category ?? this.extractor.category;
|
|
269
145
|
const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
|
|
270
146
|
const key = this._interp(fmt, meta);
|
|
271
|
-
|
|
272
|
-
if (set && set.has(key)) return true;
|
|
273
|
-
return false;
|
|
147
|
+
return !!this.archive.get(cat)?.has(key);
|
|
274
148
|
}
|
|
275
|
-
/** Mark a post/media as archived. */
|
|
276
149
|
_archive(meta) {
|
|
277
150
|
const cat = meta.category ?? this.extractor.category;
|
|
278
151
|
const fmt = this._archiveFmts.get(cat) ?? "{media_id}";
|
|
@@ -284,7 +157,6 @@ var DownloadJob = class DownloadJob extends Job {
|
|
|
284
157
|
}
|
|
285
158
|
set.add(key);
|
|
286
159
|
}
|
|
287
|
-
/** Handlers */
|
|
288
160
|
async handleDirectory(msg) {
|
|
289
161
|
this._currentDir = { ...msg.metadata };
|
|
290
162
|
this._postCount++;
|
|
@@ -313,7 +185,6 @@ var DownloadJob = class DownloadJob extends Job {
|
|
|
313
185
|
if (resp.data instanceof Uint8Array) data = resp.data;
|
|
314
186
|
else if (resp.data instanceof ArrayBuffer) data = new Uint8Array(resp.data);
|
|
315
187
|
else if (typeof resp.data === "string") data = resp.data;
|
|
316
|
-
else if (typeof resp.data === "object" && resp.data != null && "type" in resp.data && resp.data.type === "Buffer") data = new Uint8Array(resp.data);
|
|
317
188
|
else data = JSON.stringify(resp.data);
|
|
318
189
|
await this.extractor.storage.write(fullPath, data);
|
|
319
190
|
this._fileCount++;
|
|
@@ -357,13 +228,11 @@ var DownloadJob = class DownloadJob extends Job {
|
|
|
357
228
|
else this.archive.set(cat, set);
|
|
358
229
|
}
|
|
359
230
|
}
|
|
360
|
-
/** Report */
|
|
361
231
|
_report() {
|
|
362
232
|
const log = this.extractor.log;
|
|
363
233
|
log.info(`Done — ${this._postCount} post(s), ${this._fileCount} file(s) downloaded (${formatBytes(this._downloadedBytes)})`);
|
|
364
234
|
if (this._skippedCount > 0) log.info(` ${this._skippedCount} file(s) skipped (already archived)`);
|
|
365
235
|
}
|
|
366
|
-
/** Path builders */
|
|
367
236
|
_buildDirPath(meta) {
|
|
368
237
|
return `${meta.category ?? this.extractor.category}/${meta.username ?? "_"}`;
|
|
369
238
|
}
|
|
@@ -373,166 +242,133 @@ var DownloadJob = class DownloadJob extends Job {
|
|
|
373
242
|
return `${mid}${meta.num ? `_${meta.num}` : ""}.${ext}`;
|
|
374
243
|
}
|
|
375
244
|
};
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
245
|
+
//#endregion
|
|
246
|
+
//#region src/core/extractor.ts
|
|
247
|
+
/** A no-op logger */
|
|
248
|
+
const noopLogger = {
|
|
249
|
+
debug: () => {},
|
|
250
|
+
info: () => {},
|
|
251
|
+
warn: () => {},
|
|
252
|
+
error: () => {}
|
|
253
|
+
};
|
|
254
|
+
var Extractor = class {
|
|
255
|
+
/** Regex pattern to match against URLs */
|
|
256
|
+
static pattern = /^$/;
|
|
257
|
+
/** The input URL */
|
|
258
|
+
url;
|
|
259
|
+
/** Regex match groups from ``fromURL`` */
|
|
260
|
+
groups;
|
|
261
|
+
config;
|
|
262
|
+
/** HTTP client — public so Job can access for downloads */
|
|
263
|
+
http;
|
|
264
|
+
/** Storage backend — public so Job can access for writes */
|
|
265
|
+
storage;
|
|
266
|
+
/** Logger instance — public so Job can access for reporting */
|
|
267
|
+
log;
|
|
268
|
+
/** Delay range in seconds — random between [min, max] before each request */
|
|
269
|
+
requestInterval = [6, 12];
|
|
270
|
+
_initialized = false;
|
|
271
|
+
constructor(opts) {
|
|
272
|
+
this.url = opts.url;
|
|
273
|
+
this.groups = opts.match ? [...opts.match].slice(1) : [];
|
|
274
|
+
this.config = opts.config;
|
|
275
|
+
this.http = opts.http;
|
|
276
|
+
this.storage = opts.storage;
|
|
277
|
+
this.log = opts.log;
|
|
385
278
|
}
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
279
|
+
/** Initialization */
|
|
280
|
+
/**
|
|
281
|
+
* One-time async setup (cookies, session, internal state).
|
|
282
|
+
* Safe to call multiple times — after the first call it becomes a no-op.
|
|
283
|
+
*/
|
|
284
|
+
async initialize() {
|
|
285
|
+
if (this._initialized) return;
|
|
286
|
+
await this._init();
|
|
287
|
+
this._initialized = true;
|
|
288
|
+
this.initialize = async () => {};
|
|
391
289
|
}
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
290
|
+
/**
|
|
291
|
+
* Subclass hook for one-time setup.
|
|
292
|
+
*/
|
|
293
|
+
async _init() {}
|
|
294
|
+
/** Async iteration */
|
|
295
|
+
async *[Symbol.asyncIterator]() {
|
|
296
|
+
await this.initialize();
|
|
297
|
+
yield* this.items();
|
|
298
|
+
}
|
|
299
|
+
/** Config helpers */
|
|
300
|
+
/**
|
|
301
|
+
* Read a config value using the interpolated hierarchy.
|
|
302
|
+
*/
|
|
303
|
+
_cfg(key, defaultVal) {
|
|
304
|
+
const path = [
|
|
305
|
+
"extractor",
|
|
306
|
+
this.category,
|
|
307
|
+
this.subcategory
|
|
308
|
+
];
|
|
309
|
+
return this.config.interpolate(path, key, defaultVal);
|
|
310
|
+
}
|
|
311
|
+
/** HTTP */
|
|
312
|
+
_lastRequestTime = 0;
|
|
313
|
+
/**
|
|
314
|
+
* Rate-limited HTTP request wrapper.
|
|
315
|
+
*/
|
|
316
|
+
async request(url, cfg = {}) {
|
|
317
|
+
await this._throttle();
|
|
318
|
+
const response = await this.http.request({
|
|
319
|
+
url,
|
|
320
|
+
...cfg
|
|
407
321
|
});
|
|
322
|
+
this._lastRequestTime = Date.now();
|
|
323
|
+
return response;
|
|
408
324
|
}
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
const
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
const match = cls.pattern.exec(msg.url);
|
|
420
|
-
if (!match) return;
|
|
421
|
-
const parentExtr = this.extractor;
|
|
422
|
-
const childJob = new PrintJob(Reflect.construct(cls, [{
|
|
423
|
-
url: msg.url,
|
|
424
|
-
match,
|
|
425
|
-
config: parentExtr.config,
|
|
426
|
-
http: parentExtr.http,
|
|
427
|
-
storage: parentExtr.storage,
|
|
428
|
-
log: parentExtr.log
|
|
429
|
-
}]));
|
|
430
|
-
const childStatus = await childJob.run();
|
|
431
|
-
this.status |= childStatus;
|
|
432
|
-
this._postCount += childJob._postCount;
|
|
433
|
-
this._fileCount += childJob._fileCount;
|
|
434
|
-
}
|
|
435
|
-
/** Output */
|
|
436
|
-
_flushPost() {
|
|
437
|
-
const m = this._currentDir;
|
|
438
|
-
if (Object.keys(m).length === 0) return;
|
|
439
|
-
const w = this._width;
|
|
440
|
-
const labelW = 14;
|
|
441
|
-
const shortcode = m.post_shortcode ?? "?";
|
|
442
|
-
const header = ` Post #${this._postCount}: ${shortcode} `;
|
|
443
|
-
const padTotal = w - 2 - header.length;
|
|
444
|
-
const padL = Math.floor(padTotal / 2);
|
|
445
|
-
const padR = padTotal - padL;
|
|
446
|
-
process.stdout.write(`\n${dim("┌")}${"─".repeat(padL)}${b(header)}${"─".repeat(padR)}${dim("┐")}\n`);
|
|
447
|
-
const row = (label, value, color) => {
|
|
448
|
-
const colored = typeof color === "function" ? color(value) : color ? `${color}${value}${RESET}` : value;
|
|
449
|
-
process.stdout.write(` ${dim("│")} ${c(pad(label, labelW))} ${colored}\n`);
|
|
450
|
-
};
|
|
451
|
-
const username = m.username ?? "?";
|
|
452
|
-
const fullname = m.fullname ?? "";
|
|
453
|
-
row("Author:", fullname ? `${username} (${fullname})` : username, g);
|
|
454
|
-
row("Date:", m.date ?? m.post_date ?? "?");
|
|
455
|
-
row("Likes:", `${typeof m.likes === "number" ? m.likes.toLocaleString() : "?"} | Liked: ${m.liked ? "yes" : "no"}`);
|
|
456
|
-
row("Type:", `${m.type ?? "?"} (${this._files.length} files)`);
|
|
457
|
-
row("URL:", m.post_url ?? "?");
|
|
458
|
-
const desc = m.description ?? "";
|
|
459
|
-
if (desc) {
|
|
460
|
-
process.stdout.write(` ${dim("│")}\n`);
|
|
461
|
-
process.stdout.write(` ${dim("│")} ${b("Description:")}\n`);
|
|
462
|
-
const lines = desc.split("\n");
|
|
463
|
-
for (const line of lines) {
|
|
464
|
-
const wrapped = this._wrap(line, w - 8);
|
|
465
|
-
for (const wl of wrapped) process.stdout.write(` ${dim("│")} ${dim(wl)}\n`);
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
const tags = m.tags;
|
|
469
|
-
if (tags && tags.length > 0) {
|
|
470
|
-
process.stdout.write(` ${dim("│")}\n`);
|
|
471
|
-
process.stdout.write(` ${dim("│")} ${b("Tags:")} ${dim(tags.map((t) => `#${t}`).join(" "))}\n`);
|
|
472
|
-
}
|
|
473
|
-
const locName = m.location_slug ?? "";
|
|
474
|
-
const locId = m.location_id ?? "";
|
|
475
|
-
if (locName || locId) row("Location:", locId ? `${locName} (ID: ${locId})` : locName);
|
|
476
|
-
const coauthors = m.coauthors;
|
|
477
|
-
if (coauthors && coauthors.length > 0) row("Co-authors:", coauthors.map((c) => c.full_name ? `${c.username} (${c.full_name})` : c.username).join(", "));
|
|
478
|
-
const pinned = m.pinned;
|
|
479
|
-
if (pinned && pinned.length > 0) row("Pinned:", pinned.join(", "));
|
|
480
|
-
const expires = m.expires;
|
|
481
|
-
if (expires) row("Expires:", expires, YELLOW);
|
|
482
|
-
const hlTitle = m.highlight_title;
|
|
483
|
-
if (hlTitle) row("Highlight:", hlTitle);
|
|
484
|
-
const taggedUser = m.tagged_username ?? "";
|
|
485
|
-
if (taggedUser) {
|
|
486
|
-
const taggedFull = m.tagged_full_name ?? "";
|
|
487
|
-
row("Tagged by:", taggedFull ? `${taggedUser} (${taggedFull})` : taggedUser);
|
|
488
|
-
}
|
|
489
|
-
if (this._files.length > 0) {
|
|
490
|
-
process.stdout.write(` ${dim("│")}\n`);
|
|
491
|
-
process.stdout.write(` ${dim("│")} ${b(`Media (${this._files.length} files):`)}\n`);
|
|
492
|
-
const maxNumW = String(this._files.length).length;
|
|
493
|
-
const maxFileW = Math.max(...this._files.map((f) => f.filename.length));
|
|
494
|
-
const dimW = Math.min(maxFileW, 40);
|
|
495
|
-
for (const f of this._files) {
|
|
496
|
-
const numStr = `[${String(f.num).padStart(maxNumW)}]`;
|
|
497
|
-
const dimStr = f.filename.length > 40 ? `${f.filename.slice(0, 37)}...` : pad(f.filename, dimW);
|
|
498
|
-
const res = f.width ? `${f.width}x${f.height}` : "?x?";
|
|
499
|
-
const badges = [];
|
|
500
|
-
if (f.videoUrl) badges.push("video");
|
|
501
|
-
if (f.audioUrl) badges.push("audio");
|
|
502
|
-
let line = ` ${dim("│")} ${g(numStr)} ${dimStr} ${res}`;
|
|
503
|
-
if (badges.length > 0) line += ` ${YELLOW}(${badges.join("+")})${RESET}`;
|
|
504
|
-
process.stdout.write(`${line}\n`);
|
|
505
|
-
}
|
|
506
|
-
}
|
|
507
|
-
process.stdout.write(` ${dim("└")}${"─".repeat(w - 2)}${dim("┘")}\n`);
|
|
508
|
-
}
|
|
509
|
-
_wrap(text, maxLen) {
|
|
510
|
-
if (text.length <= maxLen) return [text];
|
|
511
|
-
const lines = [];
|
|
512
|
-
let remaining = text;
|
|
513
|
-
while (remaining.length > maxLen) {
|
|
514
|
-
let cut = maxLen;
|
|
515
|
-
while (cut > 0 && remaining[cut] !== " ") cut--;
|
|
516
|
-
if (cut === 0) cut = maxLen;
|
|
517
|
-
lines.push(remaining.slice(0, cut).trimEnd());
|
|
518
|
-
remaining = remaining.slice(cut).trimStart();
|
|
325
|
+
/**
|
|
326
|
+
* Convenience: request + parse JSON body.
|
|
327
|
+
*/
|
|
328
|
+
async requestJSON(url, cfg = {}) {
|
|
329
|
+
const resp = await this.request(url, cfg);
|
|
330
|
+
if (typeof resp.data === "object") return resp.data;
|
|
331
|
+
try {
|
|
332
|
+
return JSON.parse(resp.data);
|
|
333
|
+
} catch {
|
|
334
|
+
return {};
|
|
519
335
|
}
|
|
520
|
-
if (remaining) lines.push(remaining);
|
|
521
|
-
return lines;
|
|
522
|
-
}
|
|
523
|
-
_report() {
|
|
524
|
-
this._flushPost();
|
|
525
|
-
process.stdout.write(`\n${dim("──")} ${b("Summary")} ${dim("───")}\n`);
|
|
526
|
-
process.stdout.write(` Posts: ${g(String(this._postCount))}\n`);
|
|
527
|
-
process.stdout.write(` Files: ${g(String(this._fileCount))}\n`);
|
|
528
|
-
process.stdout.write(`\n`);
|
|
529
336
|
}
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
337
|
+
/** Rate limiting */
|
|
338
|
+
/**
|
|
339
|
+
* Sleep long enough to keep the minimum interval between requests.
|
|
340
|
+
*/
|
|
341
|
+
async _throttle() {
|
|
342
|
+
const elapsed = Date.now() - this._lastRequestTime;
|
|
343
|
+
const [min, max] = this.requestInterval;
|
|
344
|
+
const target = min + Math.random() * (max - min);
|
|
345
|
+
const waitMs = Math.max(0, target * 1e3 - elapsed);
|
|
346
|
+
if (waitMs > 0) await new Promise((r) => setTimeout(r, waitMs));
|
|
347
|
+
}
|
|
348
|
+
/** Utility */
|
|
349
|
+
/**
|
|
350
|
+
* Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
|
|
351
|
+
*/
|
|
352
|
+
parseTimestamp(ts) {
|
|
353
|
+
if (ts == null) return "";
|
|
354
|
+
const asMs = ts > 25e8 ? ts : ts * 1e3;
|
|
355
|
+
return new Date(asMs).toISOString();
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Generate a random hex token (used for CSRF).
|
|
359
|
+
*/
|
|
360
|
+
static generateToken(size = 16) {
|
|
361
|
+
const bytes = new Uint8Array(size);
|
|
362
|
+
if (typeof crypto !== "undefined" && crypto.getRandomValues) crypto.getRandomValues(bytes);
|
|
363
|
+
else for (let i = 0; i < size; i++) bytes[i] = Math.floor(Math.random() * 256);
|
|
364
|
+
return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
|
|
365
|
+
}
|
|
366
|
+
};
|
|
367
|
+
//#endregion
|
|
368
|
+
//#region src/utils/id-codec.ts
|
|
369
|
+
/**
|
|
370
|
+
* Instagram-style Base64-variant ID ↔ shortcode conversion.
|
|
371
|
+
*/
|
|
536
372
|
const ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
|
|
537
373
|
/** Pre-built index for O(1) character lookup during decode. */
|
|
538
374
|
const CHAR_INDEX = {};
|
|
@@ -560,7 +396,160 @@ function shortcodeFromId(postId) {
|
|
|
560
396
|
return chars.reverse().join("");
|
|
561
397
|
}
|
|
562
398
|
//#endregion
|
|
563
|
-
//#region
|
|
399
|
+
//#region src/message.ts
|
|
400
|
+
function directory(metadata = {}) {
|
|
401
|
+
return {
|
|
402
|
+
type: "directory",
|
|
403
|
+
metadata
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
function url(u, metadata = {}) {
|
|
407
|
+
return {
|
|
408
|
+
type: "url",
|
|
409
|
+
url: u,
|
|
410
|
+
metadata
|
|
411
|
+
};
|
|
412
|
+
}
|
|
413
|
+
function queue(u, metadata = {}) {
|
|
414
|
+
return {
|
|
415
|
+
type: "queue",
|
|
416
|
+
url: u,
|
|
417
|
+
metadata
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
//#endregion
|
|
421
|
+
//#region src/utils/text.ts
|
|
422
|
+
/**
|
|
423
|
+
* Text utilities ported from gallery-dl's ``text`` module.
|
|
424
|
+
*
|
|
425
|
+
* All functions are pure and environment-agnostic.
|
|
426
|
+
*/
|
|
427
|
+
/** String extraction */
|
|
428
|
+
/**
|
|
429
|
+
* Extract the substring between ``begin`` and ``end`` from ``txt``.
|
|
430
|
+
* Returns the substring or ``null`` if either delimiter is missing.
|
|
431
|
+
*/
|
|
432
|
+
function extract(txt, begin, end) {
|
|
433
|
+
const first = txt.indexOf(begin);
|
|
434
|
+
if (first < 0) return null;
|
|
435
|
+
const start = first + begin.length;
|
|
436
|
+
const last = txt.indexOf(end, start);
|
|
437
|
+
if (last < 0) return null;
|
|
438
|
+
return txt.slice(start, last);
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Shorthand: same as ``extract`` but returns ``default_`` on failure.
|
|
442
|
+
* Mirrors the Python ``extr()`` function.
|
|
443
|
+
*/
|
|
444
|
+
function extr(txt, begin, end, default_ = "") {
|
|
445
|
+
return extract(txt, begin, end) ?? default_;
|
|
446
|
+
}
|
|
447
|
+
/** Unicode / HTML */
|
|
448
|
+
/**
|
|
449
|
+
* Decode ``\\uXXXX`` escape sequences in a string.
|
|
450
|
+
*/
|
|
451
|
+
function parseUnicodeEscapes$1(text) {
|
|
452
|
+
if (!text.includes("\\u")) return text;
|
|
453
|
+
return text.replace(/\\u([0-9a-fA-F]{4})/g, (_m, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* HTML entity decode.
|
|
457
|
+
*
|
|
458
|
+
* In Node.js we could use a DOM parser, but since this library is
|
|
459
|
+
* environment-agnostic we ship a minimal covering the common cases.
|
|
460
|
+
*/
|
|
461
|
+
const HTML_ENTITIES = {
|
|
462
|
+
"amp": "&",
|
|
463
|
+
"lt": "<",
|
|
464
|
+
"gt": ">",
|
|
465
|
+
"quot": "\"",
|
|
466
|
+
"apos": "'",
|
|
467
|
+
"nbsp": "\xA0",
|
|
468
|
+
"#x27": "'",
|
|
469
|
+
"#x2F": "/",
|
|
470
|
+
"#39": "'",
|
|
471
|
+
"#47": "/"
|
|
472
|
+
};
|
|
473
|
+
const RE_ENTITY = /&([^;]+);/g;
|
|
474
|
+
function unescape(text) {
|
|
475
|
+
return text.replace(RE_ENTITY, (m, name) => {
|
|
476
|
+
const ch = HTML_ENTITIES[name];
|
|
477
|
+
if (ch !== void 0) return ch;
|
|
478
|
+
if (name.startsWith("#")) {
|
|
479
|
+
const cp = name[1] === "x" || name[1] === "X" ? Number.parseInt(name.slice(2), 16) : Number.parseInt(name.slice(1), 10);
|
|
480
|
+
if (Number.isSafeInteger(cp)) return String.fromCodePoint(cp);
|
|
481
|
+
}
|
|
482
|
+
return m;
|
|
483
|
+
});
|
|
484
|
+
}
|
|
485
|
+
/** URL helpers */
|
|
486
|
+
/**
|
|
487
|
+
* URL-decode a string.
|
|
488
|
+
*/
|
|
489
|
+
function unquote(text) {
|
|
490
|
+
try {
|
|
491
|
+
return decodeURIComponent(text);
|
|
492
|
+
} catch {
|
|
493
|
+
return text.replace(/%[0-9a-f]{2}/gi, (m) => {
|
|
494
|
+
try {
|
|
495
|
+
return decodeURIComponent(m);
|
|
496
|
+
} catch {
|
|
497
|
+
return m;
|
|
498
|
+
}
|
|
499
|
+
});
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
/**
|
|
503
|
+
* Ensure a URL starts with ``https://`` (or ``http://``).
|
|
504
|
+
*/
|
|
505
|
+
function ensureHttpScheme(url, scheme = "https://") {
|
|
506
|
+
if (!url) return url;
|
|
507
|
+
if (url.startsWith("https://") || url.startsWith("http://")) return url;
|
|
508
|
+
return scheme + url.replace(/^[/:]+/, "");
|
|
509
|
+
}
|
|
510
|
+
/**
|
|
511
|
+
* Extract filename + extension from a URL and write into ``meta``.
|
|
512
|
+
*/
|
|
513
|
+
function nameExtFromURL(url, meta) {
|
|
514
|
+
const filename = filenameFromURL(url);
|
|
515
|
+
const dot = filename.lastIndexOf(".");
|
|
516
|
+
if (dot > 0 && filename.length - dot - 1 <= 16) {
|
|
517
|
+
meta.filename = unquote(filename.slice(0, dot));
|
|
518
|
+
meta.extension = unquote(filename.slice(dot + 1)).toLowerCase();
|
|
519
|
+
} else {
|
|
520
|
+
meta.filename = unquote(filename);
|
|
521
|
+
meta.extension = "";
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Extract the file-name portion of a URL (before query string).
|
|
526
|
+
*/
|
|
527
|
+
function filenameFromURL(url) {
|
|
528
|
+
try {
|
|
529
|
+
return url.split("?")[0].split("/").pop() ?? "";
|
|
530
|
+
} catch {
|
|
531
|
+
return "";
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
/**
|
|
535
|
+
* Parse an integer from a possibly-null value. Returns ``default_`` on failure.
|
|
536
|
+
*/
|
|
537
|
+
function parseInt(value, default_ = 0) {
|
|
538
|
+
if (value == null) return default_;
|
|
539
|
+
const n = typeof value === "number" ? value : Number.parseInt(String(value), 10);
|
|
540
|
+
return Number.isFinite(n) ? n : default_;
|
|
541
|
+
}
|
|
542
|
+
function tagRe(pattern) {
|
|
543
|
+
const re = new RegExp(pattern, "g");
|
|
544
|
+
return (text) => {
|
|
545
|
+
const matches = text.match(re);
|
|
546
|
+
return matches ? [...new Set(matches)] : [];
|
|
547
|
+
};
|
|
548
|
+
}
|
|
549
|
+
/** Pre-configured hashtag regex. */
|
|
550
|
+
const findTags = tagRe("#\\w+");
|
|
551
|
+
//#endregion
|
|
552
|
+
//#region src/instagram/api.ts
|
|
564
553
|
const APP_ID = "936619743392459";
|
|
565
554
|
const ASBD_ID = "129477";
|
|
566
555
|
var InstagramRestAPI = class {
|
|
@@ -900,192 +889,38 @@ var InstagramRestAPI = class {
|
|
|
900
889
|
}
|
|
901
890
|
};
|
|
902
891
|
//#endregion
|
|
903
|
-
//#region
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
* Text utilities ported from gallery-dl's ``text`` module.
|
|
928
|
-
*
|
|
929
|
-
* All functions are pure and environment-agnostic.
|
|
930
|
-
*/
|
|
931
|
-
/** String extraction */
|
|
932
|
-
/**
|
|
933
|
-
* Extract the substring between ``begin`` and ``end`` from ``txt``.
|
|
934
|
-
* Returns the substring or ``null`` if either delimiter is missing.
|
|
935
|
-
*/
|
|
936
|
-
function extract(txt, begin, end) {
|
|
937
|
-
const first = txt.indexOf(begin);
|
|
938
|
-
if (first < 0) return null;
|
|
939
|
-
const start = first + begin.length;
|
|
940
|
-
const last = txt.indexOf(end, start);
|
|
941
|
-
if (last < 0) return null;
|
|
942
|
-
return txt.slice(start, last);
|
|
943
|
-
}
|
|
944
|
-
/**
|
|
945
|
-
* Shorthand: same as ``extract`` but returns ``default_`` on failure.
|
|
946
|
-
* Mirrors the Python ``extr()`` function.
|
|
947
|
-
*/
|
|
948
|
-
function extr(txt, begin, end, default_ = "") {
|
|
949
|
-
return extract(txt, begin, end) ?? default_;
|
|
950
|
-
}
|
|
951
|
-
/** Unicode / HTML */
|
|
952
|
-
/**
|
|
953
|
-
* Decode ``\\uXXXX`` escape sequences in a string.
|
|
954
|
-
*/
|
|
955
|
-
function parseUnicodeEscapes$1(text) {
|
|
956
|
-
if (!text.includes("\\u")) return text;
|
|
957
|
-
return text.replace(/\\u([0-9a-fA-F]{4})/g, (_m, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
|
|
958
|
-
}
|
|
959
|
-
/**
|
|
960
|
-
* HTML entity decode.
|
|
961
|
-
*
|
|
962
|
-
* In Node.js we could use a DOM parser, but since this library is
|
|
963
|
-
* environment-agnostic we ship a minimal covering the common cases.
|
|
964
|
-
*/
|
|
965
|
-
const HTML_ENTITIES = {
|
|
966
|
-
"amp": "&",
|
|
967
|
-
"lt": "<",
|
|
968
|
-
"gt": ">",
|
|
969
|
-
"quot": "\"",
|
|
970
|
-
"apos": "'",
|
|
971
|
-
"nbsp": "\xA0",
|
|
972
|
-
"#x27": "'",
|
|
973
|
-
"#x2F": "/",
|
|
974
|
-
"#39": "'",
|
|
975
|
-
"#47": "/"
|
|
976
|
-
};
|
|
977
|
-
const RE_ENTITY = /&([^;]+);/g;
|
|
978
|
-
function unescape(text) {
|
|
979
|
-
return text.replace(RE_ENTITY, (m, name) => {
|
|
980
|
-
const ch = HTML_ENTITIES[name];
|
|
981
|
-
if (ch !== void 0) return ch;
|
|
982
|
-
if (name.startsWith("#")) {
|
|
983
|
-
const cp = name[1] === "x" || name[1] === "X" ? Number.parseInt(name.slice(2), 16) : Number.parseInt(name.slice(1), 10);
|
|
984
|
-
if (Number.isSafeInteger(cp)) return String.fromCodePoint(cp);
|
|
985
|
-
}
|
|
986
|
-
return m;
|
|
987
|
-
});
|
|
988
|
-
}
|
|
989
|
-
/** URL helpers */
|
|
990
|
-
/**
|
|
991
|
-
* URL-decode a string.
|
|
992
|
-
*/
|
|
993
|
-
function unquote(text) {
|
|
994
|
-
try {
|
|
995
|
-
return decodeURIComponent(text);
|
|
996
|
-
} catch {
|
|
997
|
-
return text.replace(/%[0-9a-f]{2}/gi, (m) => {
|
|
998
|
-
try {
|
|
999
|
-
return decodeURIComponent(m);
|
|
1000
|
-
} catch {
|
|
1001
|
-
return m;
|
|
1002
|
-
}
|
|
1003
|
-
});
|
|
1004
|
-
}
|
|
1005
|
-
}
|
|
1006
|
-
/**
|
|
1007
|
-
* Ensure a URL starts with ``https://`` (or ``http://``).
|
|
1008
|
-
*/
|
|
1009
|
-
function ensureHttpScheme(url, scheme = "https://") {
|
|
1010
|
-
if (!url) return url;
|
|
1011
|
-
if (url.startsWith("https://") || url.startsWith("http://")) return url;
|
|
1012
|
-
return scheme + url.replace(/^[/:]+/, "");
|
|
1013
|
-
}
|
|
1014
|
-
/**
|
|
1015
|
-
* Extract filename + extension from a URL and write into ``meta``.
|
|
1016
|
-
*/
|
|
1017
|
-
function nameExtFromURL(url, meta) {
|
|
1018
|
-
const filename = filenameFromURL(url);
|
|
1019
|
-
const dot = filename.lastIndexOf(".");
|
|
1020
|
-
if (dot > 0 && filename.length - dot - 1 <= 16) {
|
|
1021
|
-
meta.filename = unquote(filename.slice(0, dot));
|
|
1022
|
-
meta.extension = unquote(filename.slice(dot + 1)).toLowerCase();
|
|
1023
|
-
} else {
|
|
1024
|
-
meta.filename = unquote(filename);
|
|
1025
|
-
meta.extension = "";
|
|
1026
|
-
}
|
|
1027
|
-
}
|
|
1028
|
-
/**
|
|
1029
|
-
* Extract the file-name portion of a URL (before query string).
|
|
1030
|
-
*/
|
|
1031
|
-
function filenameFromURL(url) {
|
|
1032
|
-
try {
|
|
1033
|
-
return url.split("?")[0].split("/").pop() ?? "";
|
|
1034
|
-
} catch {
|
|
1035
|
-
return "";
|
|
1036
|
-
}
|
|
1037
|
-
}
|
|
1038
|
-
/**
|
|
1039
|
-
* Parse an integer from a possibly-null value. Returns ``default_`` on failure.
|
|
1040
|
-
*/
|
|
1041
|
-
function parseInt(value, default_ = 0) {
|
|
1042
|
-
if (value == null) return default_;
|
|
1043
|
-
const n = typeof value === "number" ? value : Number.parseInt(String(value), 10);
|
|
1044
|
-
return Number.isFinite(n) ? n : default_;
|
|
1045
|
-
}
|
|
1046
|
-
function tagRe(pattern) {
|
|
1047
|
-
const re = new RegExp(pattern, "g");
|
|
1048
|
-
return (text) => {
|
|
1049
|
-
const matches = text.match(re);
|
|
1050
|
-
return matches ? [...new Set(matches)] : [];
|
|
1051
|
-
};
|
|
1052
|
-
}
|
|
1053
|
-
/** Pre-configured hashtag regex. */
|
|
1054
|
-
const findTags = tagRe("#\\w+");
|
|
1055
|
-
//#endregion
|
|
1056
|
-
//#region instagram/parsers.ts
|
|
1057
|
-
/** Main entry — REST */
|
|
1058
|
-
function parsePostRest(post, cfg) {
|
|
1059
|
-
if (post.items) return parseStoryRest(post, cfg);
|
|
1060
|
-
const owner = post.user;
|
|
1061
|
-
const caption = post.caption;
|
|
1062
|
-
const ts = post.taken_at ?? post.created_at;
|
|
1063
|
-
const date = cfg.parseTimestamp(ts ?? null);
|
|
1064
|
-
const data = {
|
|
1065
|
-
post_id: post.pk,
|
|
1066
|
-
post_shortcode: post.code,
|
|
1067
|
-
post_url: `${cfg.root}/p/${post.code}/`,
|
|
1068
|
-
likes: post.like_count ?? 0,
|
|
1069
|
-
liked: post.has_liked ?? false,
|
|
1070
|
-
pinned: extractPinned(post),
|
|
1071
|
-
owner_id: owner.pk,
|
|
1072
|
-
username: owner.username ?? "",
|
|
1073
|
-
fullname: owner.full_name ?? "",
|
|
1074
|
-
post_date: date,
|
|
1075
|
-
date,
|
|
1076
|
-
description: caption ? caption.text : "",
|
|
1077
|
-
type: "post",
|
|
1078
|
-
count: 0,
|
|
1079
|
-
_files: []
|
|
892
|
+
//#region src/instagram/parsers/rest.ts
|
|
893
|
+
/** Main entry — parse a REST post response. */
|
|
894
|
+
function parsePostRest(post, cfg) {
|
|
895
|
+
if (post.items) return parseStoryRest(post, cfg);
|
|
896
|
+
const owner = post.user;
|
|
897
|
+
const caption = post.caption;
|
|
898
|
+
const ts = post.taken_at ?? post.created_at;
|
|
899
|
+
const date = cfg.parseTimestamp(ts ?? null);
|
|
900
|
+
const data = {
|
|
901
|
+
post_id: post.pk,
|
|
902
|
+
post_shortcode: post.code,
|
|
903
|
+
post_url: `${cfg.root}/p/${post.code}/`,
|
|
904
|
+
likes: post.like_count ?? 0,
|
|
905
|
+
liked: post.has_liked ?? false,
|
|
906
|
+
pinned: extractPinned(post),
|
|
907
|
+
owner_id: owner.pk,
|
|
908
|
+
username: owner.username ?? "",
|
|
909
|
+
fullname: owner.full_name ?? "",
|
|
910
|
+
post_date: date,
|
|
911
|
+
date,
|
|
912
|
+
description: caption ? caption.text : "",
|
|
913
|
+
type: "post",
|
|
914
|
+
count: 0,
|
|
915
|
+
_files: []
|
|
1080
916
|
};
|
|
1081
917
|
const tags = cfg.findTags(data.description);
|
|
1082
918
|
if (tags.length > 0) data.tags = [...new Set(tags)].sort();
|
|
1083
919
|
if (post.location) {
|
|
1084
920
|
const loc = post.location;
|
|
1085
|
-
const slug = loc.short_name.replace(/\s+/g, "-").toLowerCase();
|
|
1086
921
|
data.location_id = loc.pk;
|
|
1087
|
-
data.location_slug =
|
|
1088
|
-
data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${
|
|
922
|
+
data.location_slug = loc.short_name.replace(/\s+/g, "-").toLowerCase();
|
|
923
|
+
data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${data.location_slug}/`;
|
|
1089
924
|
}
|
|
1090
925
|
if (post.coauthor_producers) data.coauthors = post.coauthor_producers.map((u) => ({
|
|
1091
926
|
id: u.pk,
|
|
@@ -1132,7 +967,7 @@ function parsePostRest(post, cfg) {
|
|
|
1132
967
|
if (post.subscription_media_visibility) data.subscription = post.subscription_media_visibility;
|
|
1133
968
|
return data;
|
|
1134
969
|
}
|
|
1135
|
-
/**
|
|
970
|
+
/** Parse a story or highlight REST response. */
|
|
1136
971
|
function parseStoryRest(post, cfg) {
|
|
1137
972
|
const items = post.items;
|
|
1138
973
|
const reelId = String(post.id).split(":").pop() ?? "0";
|
|
@@ -1158,9 +993,8 @@ function parseStoryRest(post, cfg) {
|
|
|
1158
993
|
expires: expires ? cfg.parseTimestamp(expires) : void 0,
|
|
1159
994
|
user: post.user
|
|
1160
995
|
};
|
|
1161
|
-
if (!isStory)
|
|
1162
|
-
|
|
1163
|
-
} else if (!post.seen) post.seen = expires - 86400;
|
|
996
|
+
if (!isStory && post.title) data.highlight_title = post.title;
|
|
997
|
+
else if (!post.seen) post.seen = expires - 86400;
|
|
1164
998
|
for (let num = 0; num < items.length; num++) {
|
|
1165
999
|
const item = items[num];
|
|
1166
1000
|
const media = parseMediaItem(item, post, cfg, num + 1);
|
|
@@ -1170,7 +1004,7 @@ function parseStoryRest(post, cfg) {
|
|
|
1170
1004
|
}
|
|
1171
1005
|
return data;
|
|
1172
1006
|
}
|
|
1173
|
-
/**
|
|
1007
|
+
/** Parse a single media item (image/video) from a carousel or story. */
|
|
1174
1008
|
function parseMediaItem(item, parent, cfg, num) {
|
|
1175
1009
|
let image;
|
|
1176
1010
|
try {
|
|
@@ -1229,7 +1063,7 @@ function parseMediaItem(item, parent, cfg, num) {
|
|
|
1229
1063
|
if (itemRec.audience) media.audience = itemRec.audience;
|
|
1230
1064
|
return media;
|
|
1231
1065
|
}
|
|
1232
|
-
/**
|
|
1066
|
+
/** Extract tagged users from various field formats. */
|
|
1233
1067
|
function extractTaggedUsers(src, dest) {
|
|
1234
1068
|
dest.tagged_users = [];
|
|
1235
1069
|
const edges = src.edge_media_to_tagged_user;
|
|
@@ -1272,13 +1106,9 @@ function extractTaggedUsers(src, dest) {
|
|
|
1272
1106
|
}
|
|
1273
1107
|
}
|
|
1274
1108
|
const seen = /* @__PURE__ */ new Set();
|
|
1275
|
-
dest.tagged_users = dest.tagged_users.filter((t) =>
|
|
1276
|
-
if (seen.has(t.id)) return false;
|
|
1277
|
-
seen.add(t.id);
|
|
1278
|
-
return true;
|
|
1279
|
-
});
|
|
1109
|
+
dest.tagged_users = dest.tagged_users.filter((t) => seen.has(t.id) ? false : (seen.add(t.id), true));
|
|
1280
1110
|
}
|
|
1281
|
-
/**
|
|
1111
|
+
/** Extract audio/music metadata from a story sticker. */
|
|
1282
1112
|
function extractAudio(src, dest, sticker, cfg) {
|
|
1283
1113
|
const info = sticker.music_asset_info;
|
|
1284
1114
|
if (!info) return null;
|
|
@@ -1310,7 +1140,14 @@ function extractAudio(src, dest, sticker, cfg) {
|
|
|
1310
1140
|
audio_timestamps: info.highlight_start_times_in_ms
|
|
1311
1141
|
};
|
|
1312
1142
|
}
|
|
1313
|
-
|
|
1143
|
+
function extractPinned(post) {
|
|
1144
|
+
if (post.timeline_pinned_user_ids) return post.timeline_pinned_user_ids;
|
|
1145
|
+
if (post.clips_tab_pinned_user_ids) return post.clips_tab_pinned_user_ids;
|
|
1146
|
+
return [];
|
|
1147
|
+
}
|
|
1148
|
+
//#endregion
|
|
1149
|
+
//#region src/instagram/parsers/graphql.ts
|
|
1150
|
+
/** Parse a GraphQL post/edge response. */
|
|
1314
1151
|
function parsePostGraphql(post, cfg) {
|
|
1315
1152
|
const typename = post.__typename ?? "GraphImage";
|
|
1316
1153
|
const owner = post.owner;
|
|
@@ -1395,17 +1232,12 @@ function parsePostGraphql(post, cfg) {
|
|
|
1395
1232
|
}
|
|
1396
1233
|
return data;
|
|
1397
1234
|
}
|
|
1398
|
-
function extractPinned(post) {
|
|
1399
|
-
if (post.timeline_pinned_user_ids) return post.timeline_pinned_user_ids;
|
|
1400
|
-
if (post.clips_tab_pinned_user_ids) return post.clips_tab_pinned_user_ids;
|
|
1401
|
-
return [];
|
|
1402
|
-
}
|
|
1403
1235
|
function parseUnicodeEscapes(text) {
|
|
1404
1236
|
if (!text.includes("\\u")) return text;
|
|
1405
1237
|
return text.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
|
|
1406
1238
|
}
|
|
1407
1239
|
//#endregion
|
|
1408
|
-
//#region instagram/base.ts
|
|
1240
|
+
//#region src/instagram/base.ts
|
|
1409
1241
|
var Ref = class {
|
|
1410
1242
|
value;
|
|
1411
1243
|
constructor(v) {
|
|
@@ -1558,12 +1390,123 @@ var InstagramExtractor = class extends Extractor {
|
|
|
1558
1390
|
}
|
|
1559
1391
|
};
|
|
1560
1392
|
//#endregion
|
|
1561
|
-
//#region instagram/extractors.ts
|
|
1393
|
+
//#region src/instagram/extractors/helpers.ts
|
|
1394
|
+
/** Shared regex utilities for Instagram extractor URL patterns. */
|
|
1562
1395
|
const BASE_RE = /^(?:https?:\/\/)?(?:www\.)?instagram\.com/;
|
|
1563
1396
|
function re(base, path) {
|
|
1564
1397
|
const pathSrc = typeof path === "string" ? path : path.source;
|
|
1565
1398
|
return new RegExp(base.source + pathSrc, "i");
|
|
1566
1399
|
}
|
|
1400
|
+
//#endregion
|
|
1401
|
+
//#region src/instagram/extractors/registry.ts
|
|
1402
|
+
const _registry = /* @__PURE__ */ new Map();
|
|
1403
|
+
function register(subcategory, cls) {
|
|
1404
|
+
_registry.set(subcategory, cls);
|
|
1405
|
+
}
|
|
1406
|
+
function get(subcategory) {
|
|
1407
|
+
return _registry.get(subcategory);
|
|
1408
|
+
}
|
|
1409
|
+
//#endregion
|
|
1410
|
+
//#region src/instagram/extractors/avatar.ts
|
|
1411
|
+
var InstagramAvatarExtractor = class InstagramAvatarExtractor extends InstagramExtractor {
|
|
1412
|
+
static subcategory = "avatar";
|
|
1413
|
+
static pattern = re(BASE_RE, /(\/[^/?#]+)\/avatar/);
|
|
1414
|
+
subcategory = InstagramAvatarExtractor.subcategory;
|
|
1415
|
+
constructor(opts) {
|
|
1416
|
+
super(opts);
|
|
1417
|
+
}
|
|
1418
|
+
static fromURL(url, opts) {
|
|
1419
|
+
const match = InstagramAvatarExtractor.pattern.exec(url);
|
|
1420
|
+
if (!match) return null;
|
|
1421
|
+
return new InstagramAvatarExtractor({
|
|
1422
|
+
...opts,
|
|
1423
|
+
url,
|
|
1424
|
+
match
|
|
1425
|
+
});
|
|
1426
|
+
}
|
|
1427
|
+
async *posts() {
|
|
1428
|
+
const screenName = (this.groups[0] ?? "").replace(/^\//, "");
|
|
1429
|
+
let user;
|
|
1430
|
+
if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
|
|
1431
|
+
else user = await this.api.userByScreenName(screenName);
|
|
1432
|
+
const avatar = user.hd_profile_pic_url_info ?? user.hd_profile_pic_versions?.[user.hd_profile_pic_versions.length - 1] ?? {
|
|
1433
|
+
url: user.profile_pic_url ?? "",
|
|
1434
|
+
width: 0,
|
|
1435
|
+
height: 0
|
|
1436
|
+
};
|
|
1437
|
+
let pk = user.profile_pic_id?.split("_")[0];
|
|
1438
|
+
let code;
|
|
1439
|
+
if (pk) code = shortcodeFromId(pk);
|
|
1440
|
+
else {
|
|
1441
|
+
pk = `avatar:${user.pk}`;
|
|
1442
|
+
code = pk;
|
|
1443
|
+
}
|
|
1444
|
+
yield {
|
|
1445
|
+
pk,
|
|
1446
|
+
code,
|
|
1447
|
+
user,
|
|
1448
|
+
caption: null,
|
|
1449
|
+
like_count: 0,
|
|
1450
|
+
image_versions2: { candidates: [avatar] }
|
|
1451
|
+
};
|
|
1452
|
+
}
|
|
1453
|
+
};
|
|
1454
|
+
register(InstagramAvatarExtractor.subcategory, InstagramAvatarExtractor);
|
|
1455
|
+
//#endregion
|
|
1456
|
+
//#region src/instagram/extractors/highlights.ts
|
|
1457
|
+
var InstagramHighlightsExtractor = class InstagramHighlightsExtractor extends InstagramExtractor {
|
|
1458
|
+
static subcategory = "highlights";
|
|
1459
|
+
static pattern = re(BASE_RE, /(\/[^/?#]+)\/highlights/);
|
|
1460
|
+
subcategory = InstagramHighlightsExtractor.subcategory;
|
|
1461
|
+
constructor(opts) {
|
|
1462
|
+
super(opts);
|
|
1463
|
+
}
|
|
1464
|
+
static fromURL(url, opts) {
|
|
1465
|
+
const match = InstagramHighlightsExtractor.pattern.exec(url);
|
|
1466
|
+
if (!match) return null;
|
|
1467
|
+
return new InstagramHighlightsExtractor({
|
|
1468
|
+
...opts,
|
|
1469
|
+
url,
|
|
1470
|
+
match
|
|
1471
|
+
});
|
|
1472
|
+
}
|
|
1473
|
+
async *posts() {
|
|
1474
|
+
const screenName = (this.groups[0] ?? "").replace(/^\//, "");
|
|
1475
|
+
const uid = await this.api.userId(screenName);
|
|
1476
|
+
yield* this.api.highlightsMedia(uid);
|
|
1477
|
+
}
|
|
1478
|
+
};
|
|
1479
|
+
register(InstagramHighlightsExtractor.subcategory, InstagramHighlightsExtractor);
|
|
1480
|
+
//#endregion
|
|
1481
|
+
//#region src/instagram/extractors/info.ts
|
|
1482
|
+
var InstagramInfoExtractor = class InstagramInfoExtractor extends InstagramExtractor {
|
|
1483
|
+
static subcategory = "info";
|
|
1484
|
+
static pattern = re(BASE_RE, /(\/[^/?#]+)\/info/);
|
|
1485
|
+
subcategory = InstagramInfoExtractor.subcategory;
|
|
1486
|
+
constructor(opts) {
|
|
1487
|
+
super(opts);
|
|
1488
|
+
}
|
|
1489
|
+
static fromURL(url, opts) {
|
|
1490
|
+
const match = InstagramInfoExtractor.pattern.exec(url);
|
|
1491
|
+
if (!match) return null;
|
|
1492
|
+
return new InstagramInfoExtractor({
|
|
1493
|
+
...opts,
|
|
1494
|
+
url,
|
|
1495
|
+
match
|
|
1496
|
+
});
|
|
1497
|
+
}
|
|
1498
|
+
async *items() {
|
|
1499
|
+
const screenName = (this.groups[0] ?? "").replace(/^\//, "");
|
|
1500
|
+
let user;
|
|
1501
|
+
if (screenName.startsWith("id:")) user = await this.api.userById(screenName.slice(3));
|
|
1502
|
+
else user = await this.api.userByScreenName(screenName);
|
|
1503
|
+
yield directory(user);
|
|
1504
|
+
}
|
|
1505
|
+
async *posts() {}
|
|
1506
|
+
};
|
|
1507
|
+
register(InstagramInfoExtractor.subcategory, InstagramInfoExtractor);
|
|
1508
|
+
//#endregion
|
|
1509
|
+
//#region src/instagram/extractors/post.ts
|
|
1567
1510
|
var InstagramPostExtractor = class InstagramPostExtractor extends InstagramExtractor {
|
|
1568
1511
|
static subcategory = "post";
|
|
1569
1512
|
static pattern = re(/^(?:https?:\/\/)?(?:www\.)?instagram\.com\//, /(?:share(?:\/(?:p|tv|reels?))?|(?:[^/?#]+\/)?(?:p|tv|reels?))\/([^/?#]+)/);
|
|
@@ -1598,75 +1541,9 @@ var InstagramPostExtractor = class InstagramPostExtractor extends InstagramExtra
|
|
|
1598
1541
|
yield* this.api.media(shortcode);
|
|
1599
1542
|
}
|
|
1600
1543
|
};
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
subcategory = InstagramUserExtractor.subcategory;
|
|
1605
|
-
constructor(opts) {
|
|
1606
|
-
super(opts);
|
|
1607
|
-
}
|
|
1608
|
-
static fromURL(url, opts) {
|
|
1609
|
-
const match = InstagramUserExtractor.pattern.exec(url);
|
|
1610
|
-
if (!match) return null;
|
|
1611
|
-
return new InstagramUserExtractor({
|
|
1612
|
-
...opts,
|
|
1613
|
-
url,
|
|
1614
|
-
match
|
|
1615
|
-
});
|
|
1616
|
-
}
|
|
1617
|
-
async *items() {
|
|
1618
|
-
await this.login();
|
|
1619
|
-
const userPath = this.groups[0] ?? "/";
|
|
1620
|
-
const base = `${this.root}${userPath}/`;
|
|
1621
|
-
const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`;
|
|
1622
|
-
const include = this._cfg("include", ["posts"]);
|
|
1623
|
-
const categories = include === "all" ? [
|
|
1624
|
-
"posts",
|
|
1625
|
-
"reels",
|
|
1626
|
-
"tagged",
|
|
1627
|
-
"stories",
|
|
1628
|
-
"highlights",
|
|
1629
|
-
"info",
|
|
1630
|
-
"avatar"
|
|
1631
|
-
] : typeof include === "string" ? include.replace(/\s+/g, "").split(",") : include;
|
|
1632
|
-
const extractors = {
|
|
1633
|
-
info: {
|
|
1634
|
-
cls: InstagramInfoExtractor,
|
|
1635
|
-
url: `${base}info/`
|
|
1636
|
-
},
|
|
1637
|
-
avatar: {
|
|
1638
|
-
cls: InstagramAvatarExtractor,
|
|
1639
|
-
url: `${base}avatar/`
|
|
1640
|
-
},
|
|
1641
|
-
stories: {
|
|
1642
|
-
cls: InstagramStoriesExtractor,
|
|
1643
|
-
url: storiesUrl
|
|
1644
|
-
},
|
|
1645
|
-
highlights: {
|
|
1646
|
-
cls: InstagramHighlightsExtractor,
|
|
1647
|
-
url: `${base}highlights/`
|
|
1648
|
-
},
|
|
1649
|
-
posts: {
|
|
1650
|
-
cls: InstagramPostsExtractor,
|
|
1651
|
-
url: `${base}posts/`
|
|
1652
|
-
},
|
|
1653
|
-
reels: {
|
|
1654
|
-
cls: InstagramReelsExtractor,
|
|
1655
|
-
url: `${base}reels/`
|
|
1656
|
-
},
|
|
1657
|
-
tagged: {
|
|
1658
|
-
cls: InstagramTaggedExtractor,
|
|
1659
|
-
url: `${base}tagged/`
|
|
1660
|
-
}
|
|
1661
|
-
};
|
|
1662
|
-
for (const cat of categories) {
|
|
1663
|
-
const entry = extractors[cat];
|
|
1664
|
-
if (entry) yield queue(entry.url, { _extractor: entry.cls });
|
|
1665
|
-
else this.log.warn(`Invalid include '${cat}'`);
|
|
1666
|
-
}
|
|
1667
|
-
}
|
|
1668
|
-
async *posts() {}
|
|
1669
|
-
};
|
|
1544
|
+
register(InstagramPostExtractor.subcategory, InstagramPostExtractor);
|
|
1545
|
+
//#endregion
|
|
1546
|
+
//#region src/instagram/extractors/posts-list.ts
|
|
1670
1547
|
var InstagramPostsExtractor = class InstagramPostsExtractor extends InstagramExtractor {
|
|
1671
1548
|
static subcategory = "posts";
|
|
1672
1549
|
static pattern = re(BASE_RE, /(\/[^/?#]+)\/posts/);
|
|
@@ -1689,6 +1566,9 @@ var InstagramPostsExtractor = class InstagramPostsExtractor extends InstagramExt
|
|
|
1689
1566
|
yield* this.api.userFeed(uid);
|
|
1690
1567
|
}
|
|
1691
1568
|
};
|
|
1569
|
+
register(InstagramPostsExtractor.subcategory, InstagramPostsExtractor);
|
|
1570
|
+
//#endregion
|
|
1571
|
+
//#region src/instagram/extractors/reels-list.ts
|
|
1692
1572
|
var InstagramReelsExtractor = class InstagramReelsExtractor extends InstagramExtractor {
|
|
1693
1573
|
static subcategory = "reels";
|
|
1694
1574
|
static pattern = re(BASE_RE, /(\/[^/?#]+)\/reels/);
|
|
@@ -1711,44 +1591,32 @@ var InstagramReelsExtractor = class InstagramReelsExtractor extends InstagramExt
|
|
|
1711
1591
|
yield* this.api.userClips(uid);
|
|
1712
1592
|
}
|
|
1713
1593
|
};
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1594
|
+
register(InstagramReelsExtractor.subcategory, InstagramReelsExtractor);
|
|
1595
|
+
//#endregion
|
|
1596
|
+
//#region src/instagram/extractors/saved.ts
|
|
1597
|
+
var InstagramSavedExtractor = class InstagramSavedExtractor extends InstagramExtractor {
|
|
1598
|
+
static subcategory = "saved";
|
|
1599
|
+
static pattern = re(BASE_RE, /(\/[^/?#]+)\/saved(?:\/all-posts)?\/?$/);
|
|
1600
|
+
subcategory = InstagramSavedExtractor.subcategory;
|
|
1719
1601
|
constructor(opts) {
|
|
1720
1602
|
super(opts);
|
|
1721
1603
|
}
|
|
1722
1604
|
static fromURL(url, opts) {
|
|
1723
|
-
const match =
|
|
1605
|
+
const match = InstagramSavedExtractor.pattern.exec(url);
|
|
1724
1606
|
if (!match) return null;
|
|
1725
|
-
return new
|
|
1607
|
+
return new InstagramSavedExtractor({
|
|
1726
1608
|
...opts,
|
|
1727
1609
|
url,
|
|
1728
1610
|
match
|
|
1729
1611
|
});
|
|
1730
1612
|
}
|
|
1731
|
-
async metadata() {
|
|
1732
|
-
const screenName = (this.groups[0] ?? "").replace(/^\//, "");
|
|
1733
|
-
let user;
|
|
1734
|
-
if (screenName.startsWith("id:")) {
|
|
1735
|
-
this._taggedUserId = screenName.slice(3);
|
|
1736
|
-
user = await this.api.userById(screenName.slice(3));
|
|
1737
|
-
} else {
|
|
1738
|
-
this._taggedUserId = await this.api.userId(screenName);
|
|
1739
|
-
user = await this.api.userByScreenName(screenName);
|
|
1740
|
-
}
|
|
1741
|
-
return {
|
|
1742
|
-
tagged_owner_id: user.id ?? user.pk,
|
|
1743
|
-
tagged_username: user.username,
|
|
1744
|
-
tagged_full_name: user.full_name
|
|
1745
|
-
};
|
|
1746
|
-
}
|
|
1747
1613
|
async *posts() {
|
|
1748
|
-
|
|
1749
|
-
yield* this.api.userTagged(this._taggedUserId);
|
|
1614
|
+
yield* this.api.userSaved();
|
|
1750
1615
|
}
|
|
1751
1616
|
};
|
|
1617
|
+
register(InstagramSavedExtractor.subcategory, InstagramSavedExtractor);
|
|
1618
|
+
//#endregion
|
|
1619
|
+
//#region src/instagram/extractors/stories.ts
|
|
1752
1620
|
var InstagramStoriesExtractor = class InstagramStoriesExtractor extends InstagramExtractor {
|
|
1753
1621
|
static subcategory = "stories";
|
|
1754
1622
|
static pattern = /^(?:https?:\/\/)?(?:www\.)?instagram\.com\/(?:stories\/(?:highlights\/(\d+)|([^/?#]+)(?:\/(\d+))?)|\/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)/;
|
|
@@ -1804,28 +1672,9 @@ var InstagramStoriesExtractor = class InstagramStoriesExtractor extends Instagra
|
|
|
1804
1672
|
} else yield* reels;
|
|
1805
1673
|
}
|
|
1806
1674
|
};
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
subcategory = InstagramHighlightsExtractor.subcategory;
|
|
1811
|
-
constructor(opts) {
|
|
1812
|
-
super(opts);
|
|
1813
|
-
}
|
|
1814
|
-
static fromURL(url, opts) {
|
|
1815
|
-
const match = InstagramHighlightsExtractor.pattern.exec(url);
|
|
1816
|
-
if (!match) return null;
|
|
1817
|
-
return new InstagramHighlightsExtractor({
|
|
1818
|
-
...opts,
|
|
1819
|
-
url,
|
|
1820
|
-
match
|
|
1821
|
-
});
|
|
1822
|
-
}
|
|
1823
|
-
async *posts() {
|
|
1824
|
-
const screenName = (this.groups[0] ?? "").replace(/^\//, "");
|
|
1825
|
-
const uid = await this.api.userId(screenName);
|
|
1826
|
-
yield* this.api.highlightsMedia(uid);
|
|
1827
|
-
}
|
|
1828
|
-
};
|
|
1675
|
+
register(InstagramStoriesExtractor.subcategory, InstagramStoriesExtractor);
|
|
1676
|
+
//#endregion
|
|
1677
|
+
//#region src/instagram/extractors/tag.ts
|
|
1829
1678
|
var InstagramTagExtractor = class InstagramTagExtractor extends InstagramExtractor {
|
|
1830
1679
|
static subcategory = "tag";
|
|
1831
1680
|
static pattern = re(BASE_RE, /\/explore\/tags\/([^/?#]+)/);
|
|
@@ -1851,93 +1700,294 @@ var InstagramTagExtractor = class InstagramTagExtractor extends InstagramExtract
|
|
|
1851
1700
|
yield* this.api.tagsMedia(decodeURIComponent(tag));
|
|
1852
1701
|
}
|
|
1853
1702
|
};
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1703
|
+
register(InstagramTagExtractor.subcategory, InstagramTagExtractor);
|
|
1704
|
+
//#endregion
|
|
1705
|
+
//#region src/instagram/extractors/tagged.ts
|
|
1706
|
+
var InstagramTaggedExtractor = class InstagramTaggedExtractor extends InstagramExtractor {
|
|
1707
|
+
static subcategory = "tagged";
|
|
1708
|
+
static pattern = re(BASE_RE, /(\/[^/?#]+)\/tagged/);
|
|
1709
|
+
subcategory = InstagramTaggedExtractor.subcategory;
|
|
1710
|
+
_taggedUserId = "";
|
|
1858
1711
|
constructor(opts) {
|
|
1859
1712
|
super(opts);
|
|
1860
1713
|
}
|
|
1861
1714
|
static fromURL(url, opts) {
|
|
1862
|
-
const match =
|
|
1715
|
+
const match = InstagramTaggedExtractor.pattern.exec(url);
|
|
1863
1716
|
if (!match) return null;
|
|
1864
|
-
return new
|
|
1717
|
+
return new InstagramTaggedExtractor({
|
|
1865
1718
|
...opts,
|
|
1866
1719
|
url,
|
|
1867
1720
|
match
|
|
1868
1721
|
});
|
|
1869
1722
|
}
|
|
1870
|
-
async
|
|
1723
|
+
async metadata() {
|
|
1871
1724
|
const screenName = (this.groups[0] ?? "").replace(/^\//, "");
|
|
1872
1725
|
let user;
|
|
1873
|
-
if (screenName.startsWith("id:"))
|
|
1874
|
-
|
|
1875
|
-
|
|
1726
|
+
if (screenName.startsWith("id:")) {
|
|
1727
|
+
this._taggedUserId = screenName.slice(3);
|
|
1728
|
+
user = await this.api.userById(screenName.slice(3));
|
|
1729
|
+
} else {
|
|
1730
|
+
this._taggedUserId = await this.api.userId(screenName);
|
|
1731
|
+
user = await this.api.userByScreenName(screenName);
|
|
1732
|
+
}
|
|
1733
|
+
return {
|
|
1734
|
+
tagged_owner_id: user.id ?? user.pk,
|
|
1735
|
+
tagged_username: user.username,
|
|
1736
|
+
tagged_full_name: user.full_name
|
|
1737
|
+
};
|
|
1738
|
+
}
|
|
1739
|
+
async *posts() {
|
|
1740
|
+
if (!this._taggedUserId) await this.metadata();
|
|
1741
|
+
yield* this.api.userTagged(this._taggedUserId);
|
|
1876
1742
|
}
|
|
1877
|
-
async *posts() {}
|
|
1878
1743
|
};
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1744
|
+
register(InstagramTaggedExtractor.subcategory, InstagramTaggedExtractor);
|
|
1745
|
+
//#endregion
|
|
1746
|
+
//#region src/instagram/extractors/user.ts
|
|
1747
|
+
var InstagramUserExtractor = class InstagramUserExtractor extends InstagramExtractor {
|
|
1748
|
+
static subcategory = "user";
|
|
1749
|
+
static pattern = re(BASE_RE, /(\/[^/?#]+)\/?(?:$|[?#])/);
|
|
1750
|
+
subcategory = InstagramUserExtractor.subcategory;
|
|
1883
1751
|
constructor(opts) {
|
|
1884
1752
|
super(opts);
|
|
1885
1753
|
}
|
|
1886
1754
|
static fromURL(url, opts) {
|
|
1887
|
-
const match =
|
|
1755
|
+
const match = InstagramUserExtractor.pattern.exec(url);
|
|
1888
1756
|
if (!match) return null;
|
|
1889
|
-
return new
|
|
1757
|
+
return new InstagramUserExtractor({
|
|
1890
1758
|
...opts,
|
|
1891
1759
|
url,
|
|
1892
1760
|
match
|
|
1893
1761
|
});
|
|
1894
1762
|
}
|
|
1895
|
-
async *
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
const
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1763
|
+
async *items() {
|
|
1764
|
+
await this.login();
|
|
1765
|
+
const userPath = this.groups[0] ?? "/";
|
|
1766
|
+
const base = `${this.root}${userPath}/`;
|
|
1767
|
+
const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`;
|
|
1768
|
+
const include = this._cfg("include", ["posts"]);
|
|
1769
|
+
const categories = include === "all" ? [
|
|
1770
|
+
"posts",
|
|
1771
|
+
"reels",
|
|
1772
|
+
"tagged",
|
|
1773
|
+
"stories",
|
|
1774
|
+
"highlights",
|
|
1775
|
+
"info",
|
|
1776
|
+
"avatar"
|
|
1777
|
+
] : typeof include === "string" ? include.replace(/\s+/g, "").split(",") : include;
|
|
1778
|
+
const urls = {
|
|
1779
|
+
info: `${base}info/`,
|
|
1780
|
+
avatar: `${base}avatar/`,
|
|
1781
|
+
stories: storiesUrl,
|
|
1782
|
+
highlights: `${base}highlights/`,
|
|
1783
|
+
posts: `${base}posts/`,
|
|
1784
|
+
reels: `${base}reels/`,
|
|
1785
|
+
tagged: `${base}tagged/`
|
|
1904
1786
|
};
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
code = pk;
|
|
1787
|
+
for (const cat of categories) {
|
|
1788
|
+
const cls = get(cat);
|
|
1789
|
+
const url = urls[cat];
|
|
1790
|
+
if (cls && url) yield queue(url, { _extractor: cls });
|
|
1791
|
+
else this.log.warn(`Invalid include '${cat}'`);
|
|
1911
1792
|
}
|
|
1912
|
-
yield {
|
|
1913
|
-
pk,
|
|
1914
|
-
code,
|
|
1915
|
-
user,
|
|
1916
|
-
caption: null,
|
|
1917
|
-
like_count: 0,
|
|
1918
|
-
image_versions2: { candidates: [avatar] }
|
|
1919
|
-
};
|
|
1920
1793
|
}
|
|
1794
|
+
async *posts() {}
|
|
1921
1795
|
};
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1796
|
+
register(InstagramUserExtractor.subcategory, InstagramUserExtractor);
|
|
1797
|
+
//#endregion
|
|
1798
|
+
//#region src/fetcher.ts
|
|
1799
|
+
/** Build URL with query params appended as URLSearchParams. */
|
|
1800
|
+
function buildUrl(base, params) {
|
|
1801
|
+
if (!params) return base;
|
|
1802
|
+
const cleaned = {};
|
|
1803
|
+
for (const [k, v] of Object.entries(params)) if (v != null) cleaned[k] = String(v);
|
|
1804
|
+
const entries = Object.entries(cleaned);
|
|
1805
|
+
if (entries.length === 0) return base;
|
|
1806
|
+
const qs = new URLSearchParams(entries).toString();
|
|
1807
|
+
return `${base}${base.includes("?") ? "&" : "?"}${qs}`;
|
|
1808
|
+
}
|
|
1809
|
+
/** Merge cookie strings with append semantics: a=1 + b=2 → a=1; b=2 */
|
|
1810
|
+
function mergeCookie(base, extra) {
|
|
1811
|
+
if (!base) return extra;
|
|
1812
|
+
return `${base}; ${extra}`;
|
|
1813
|
+
}
|
|
1814
|
+
/** Extract csrftoken value from a Cookie header string. */
|
|
1815
|
+
function extractCsrf(cookies) {
|
|
1816
|
+
return cookies.match(/(?:^|;\s*)csrftoken=([^;]+)/)?.[1] ?? "";
|
|
1817
|
+
}
|
|
1818
|
+
/** Convert fetch Headers to a plain Record. */
|
|
1819
|
+
function headersToRecord(headers) {
|
|
1820
|
+
const rec = {};
|
|
1821
|
+
headers.forEach((v, k) => {
|
|
1822
|
+
rec[k] = v;
|
|
1823
|
+
});
|
|
1824
|
+
return rec;
|
|
1825
|
+
}
|
|
1826
|
+
/** Read response body according to the requested type. */
|
|
1827
|
+
async function readBody(resp, responseType) {
|
|
1828
|
+
switch (responseType) {
|
|
1829
|
+
case "arraybuffer": {
|
|
1830
|
+
const buf = await resp.arrayBuffer();
|
|
1831
|
+
return Buffer.from(buf);
|
|
1832
|
+
}
|
|
1833
|
+
case "text": return resp.text();
|
|
1834
|
+
default: return resp.json();
|
|
1928
1835
|
}
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1836
|
+
}
|
|
1837
|
+
/** Serialize a request body value for fetch. */
|
|
1838
|
+
function serializeBody(data) {
|
|
1839
|
+
if (data == null) return void 0;
|
|
1840
|
+
if (typeof data === "string") return data;
|
|
1841
|
+
if (data instanceof URLSearchParams) return data;
|
|
1842
|
+
return JSON.stringify(data);
|
|
1843
|
+
}
|
|
1844
|
+
const UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
1845
|
+
/**
|
|
1846
|
+
* Create a platform-agnostic HttpClient backed by native ``fetch``.
|
|
1847
|
+
*
|
|
1848
|
+
* Zero dependencies — works in Node.js 18+, browsers, Deno, and Edge.
|
|
1849
|
+
*
|
|
1850
|
+
* @example Plain (no cookies)
|
|
1851
|
+
* ```ts
|
|
1852
|
+
* const http = createFetchHttpClient()
|
|
1853
|
+
* ```
|
|
1854
|
+
*
|
|
1855
|
+
* @example With static cookies (CLI session mode)
|
|
1856
|
+
* ```ts
|
|
1857
|
+
* const http = createFetchHttpClient({ cookie: 'sessionid=abc; csrftoken=xyz' })
|
|
1858
|
+
* ```
|
|
1859
|
+
*
|
|
1860
|
+
* @example With cookie jar (anonymous session)
|
|
1861
|
+
* ```ts
|
|
1862
|
+
* const jar = createCookieJar()
|
|
1863
|
+
* const http = createFetchHttpClient({
|
|
1864
|
+
* cookieProvider: () => jar.getCookieHeader(),
|
|
1865
|
+
* onResponse: (headers) => jar.setFromResponse(headers),
|
|
1866
|
+
* })
|
|
1867
|
+
* ```
|
|
1868
|
+
*/
|
|
1869
|
+
function createFetchHttpClient(opts = {}) {
|
|
1870
|
+
const { cookie, cookieProvider, userAgent = UA, timeout = 3e4, onResponse } = opts;
|
|
1871
|
+
return { async request(config) {
|
|
1872
|
+
const method = config.method ?? "GET";
|
|
1873
|
+
const url = buildUrl(config.url, config.params);
|
|
1874
|
+
const headers = new Headers(config.headers);
|
|
1875
|
+
const reqCookie = cookieProvider?.() ?? cookie;
|
|
1876
|
+
if (reqCookie) {
|
|
1877
|
+
const existing = headers.get("Cookie");
|
|
1878
|
+
headers.set("Cookie", existing ? mergeCookie(reqCookie, existing) : reqCookie);
|
|
1879
|
+
}
|
|
1880
|
+
if (!headers.has("User-Agent")) headers.set("User-Agent", userAgent);
|
|
1881
|
+
const body = serializeBody(config.data);
|
|
1882
|
+
if (typeof body === "string" && !headers.has("Content-Type")) headers.set("Content-Type", "application/json");
|
|
1883
|
+
let controller = null;
|
|
1884
|
+
let timer = null;
|
|
1885
|
+
let signal = config.signal ?? null;
|
|
1886
|
+
const timeoutMs = config.timeout ?? timeout;
|
|
1887
|
+
if (!signal) {
|
|
1888
|
+
controller = new AbortController();
|
|
1889
|
+
timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
1890
|
+
signal = controller.signal;
|
|
1891
|
+
}
|
|
1892
|
+
try {
|
|
1893
|
+
const resp = await fetch(url, {
|
|
1894
|
+
method,
|
|
1895
|
+
headers,
|
|
1896
|
+
body,
|
|
1897
|
+
signal
|
|
1898
|
+
});
|
|
1899
|
+
onResponse?.(headersToRecord(resp.headers));
|
|
1900
|
+
const data = await readBody(resp, config.responseType);
|
|
1901
|
+
return {
|
|
1902
|
+
status: resp.status,
|
|
1903
|
+
data,
|
|
1904
|
+
headers: headersToRecord(resp.headers),
|
|
1905
|
+
url: resp.url
|
|
1906
|
+
};
|
|
1907
|
+
} catch (err) {
|
|
1908
|
+
if (controller?.signal.aborted && !config.signal?.aborted) throw new Error(`Request timeout after ${timeoutMs}ms: ${url}`);
|
|
1909
|
+
if (String(err).includes("too many redirect")) throw new Error("Too many redirects — session may be expired or invalid. Export a fresh session from your browser.");
|
|
1910
|
+
throw err;
|
|
1911
|
+
} finally {
|
|
1912
|
+
if (timer) clearTimeout(timer);
|
|
1913
|
+
}
|
|
1914
|
+
} };
|
|
1915
|
+
}
|
|
1916
|
+
//#endregion
|
|
1917
|
+
//#region src/sdk.ts
|
|
1918
|
+
var InstagramSDK = class {
|
|
1919
|
+
http;
|
|
1920
|
+
storage;
|
|
1921
|
+
log;
|
|
1922
|
+
config;
|
|
1923
|
+
_csrfToken;
|
|
1924
|
+
constructor(opts = {}) {
|
|
1925
|
+
this.http = opts.http ?? createFetchHttpClient();
|
|
1926
|
+
this.storage = opts.storage ?? void 0;
|
|
1927
|
+
this.log = opts.log ?? noopLogger;
|
|
1928
|
+
this.config = new ConfigManager();
|
|
1929
|
+
this._csrfToken = opts.csrfToken ?? "";
|
|
1937
1930
|
}
|
|
1938
|
-
|
|
1939
|
-
|
|
1931
|
+
/**
|
|
1932
|
+
* Extract messages from an Instagram URL without downloading.
|
|
1933
|
+
*
|
|
1934
|
+
* Returns an async generator yielding Directory / Url / Queue messages.
|
|
1935
|
+
* Each ``url`` message includes full metadata (post_id, username, dimensions, etc.).
|
|
1936
|
+
*/
|
|
1937
|
+
async *extract(url) {
|
|
1938
|
+
const extractor = this._resolve(url);
|
|
1939
|
+
await extractor.initialize();
|
|
1940
|
+
yield* extractor;
|
|
1941
|
+
}
|
|
1942
|
+
/**
|
|
1943
|
+
* Download all media from an Instagram URL.
|
|
1944
|
+
*
|
|
1945
|
+
* Uses the built-in DownloadJob + Storage to save files to disk.
|
|
1946
|
+
* Requires ``storage`` to be set in constructor options.
|
|
1947
|
+
*
|
|
1948
|
+
* ```ts
|
|
1949
|
+
* const stats = await ig.download('https://www.instagram.com/p/.../', './my-downloads')
|
|
1950
|
+
* // → { posts: 1, files: 9, bytes: 4500000 }
|
|
1951
|
+
* ```
|
|
1952
|
+
*/
|
|
1953
|
+
async download(url, outputDir = "./data") {
|
|
1954
|
+
const job = new DownloadJob(this._resolve(url));
|
|
1955
|
+
job.basePath = outputDir;
|
|
1956
|
+
await job.run();
|
|
1957
|
+
return {
|
|
1958
|
+
posts: job._postCount ?? 0,
|
|
1959
|
+
files: job._fileCount ?? 0,
|
|
1960
|
+
bytes: job._downloadedBytes ?? 0
|
|
1961
|
+
};
|
|
1962
|
+
}
|
|
1963
|
+
/** Resolve a URL to an Extractor instance via pattern matching. */
|
|
1964
|
+
_resolve(url) {
|
|
1965
|
+
for (const Cls of [
|
|
1966
|
+
InstagramPostExtractor,
|
|
1967
|
+
InstagramStoriesExtractor,
|
|
1968
|
+
InstagramHighlightsExtractor,
|
|
1969
|
+
InstagramTagExtractor,
|
|
1970
|
+
InstagramSavedExtractor,
|
|
1971
|
+
InstagramPostsExtractor,
|
|
1972
|
+
InstagramReelsExtractor,
|
|
1973
|
+
InstagramTaggedExtractor,
|
|
1974
|
+
InstagramInfoExtractor,
|
|
1975
|
+
InstagramAvatarExtractor,
|
|
1976
|
+
InstagramUserExtractor
|
|
1977
|
+
]) {
|
|
1978
|
+
const match = Cls.pattern.exec(url);
|
|
1979
|
+
if (match) return Reflect.construct(Cls, [{
|
|
1980
|
+
url,
|
|
1981
|
+
match,
|
|
1982
|
+
config: this.config,
|
|
1983
|
+
http: this.http,
|
|
1984
|
+
storage: this.storage,
|
|
1985
|
+
log: this.log,
|
|
1986
|
+
csrfToken: this._csrfToken
|
|
1987
|
+
}]);
|
|
1988
|
+
}
|
|
1989
|
+
throw new Error(`No extractor matched URL: ${url}. Supported: /p/, /reel/, /{user}/, /stories/, /highlights/, /explore/tags/, /saved/`);
|
|
1940
1990
|
}
|
|
1941
1991
|
};
|
|
1942
1992
|
//#endregion
|
|
1943
|
-
export {
|
|
1993
|
+
export { directory as A, _YELLOW as B, extract as C, parseUnicodeEscapes$1 as D, parseInt as E, Extractor as F, pad as G, c as H, noopLogger as I, ConfigManager as K, DownloadJob as L, url as M, idFromShortcode as N, unescape as O, shortcodeFromId as P, Job as R, extr as S, nameExtFromURL as T, dim as U, b as V, g as W, extractAudio as _, InstagramTaggedExtractor as a, InstagramRestAPI as b, InstagramSavedExtractor as c, InstagramPostExtractor as d, InstagramInfoExtractor as f, parsePostGraphql as g, InstagramExtractor as h, InstagramUserExtractor as i, queue as j, unquote as k, InstagramReelsExtractor as l, InstagramAvatarExtractor as m, createFetchHttpClient as n, InstagramTagExtractor as o, InstagramHighlightsExtractor as p, extractCsrf as r, InstagramStoriesExtractor as s, InstagramSDK as t, InstagramPostsExtractor as u, extractTaggedUsers as v, findTags as w, ensureHttpScheme as x, parsePostRest as y, _RESET as z };
|