@chilfish/gallery-dl-instagram 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +340 -0
- package/README.md +134 -0
- package/dist/adapter-CFsiiEpM.cjs +83 -0
- package/dist/adapter-tSleX8Cr.mjs +59 -0
- package/dist/dl-ins.mjs +5129 -0
- package/dist/index.cjs +40 -0
- package/dist/{sdk-B9fRyc1e.d.mts → index.d.cts} +139 -270
- package/dist/index.d.mts +470 -51
- package/dist/index.mjs +2 -40
- package/dist/node.cjs +43 -0
- package/dist/node.d.cts +47 -0
- package/dist/node.d.mts +47 -0
- package/dist/node.mjs +42 -0
- package/dist/{extractors-Byw-2lPL.mjs → sdk-Bn0VCUIT.mjs} +291 -215
- package/dist/sdk-CK9x5wFL.d.cts +259 -0
- package/dist/sdk-CK9x5wFL.d.mts +259 -0
- package/dist/sdk-nzhAxf1O.cjs +2246 -0
- package/dist/storage-77hqz5Fi.mjs +24 -0
- package/dist/storage-BwGaT6XO.cjs +24 -0
- package/package.json +32 -25
- package/cli/adapter.ts +0 -284
- package/cli/cookies.ts +0 -59
- package/cli/index.ts +0 -337
- package/config.ts +0 -80
- package/core/extractor.ts +0 -217
- package/core/job.ts +0 -581
- package/dist/adapter-Bt86eL1R.mjs +0 -189
- package/dist/cli/index.d.mts +0 -1
- package/dist/cli/index.mjs +0 -3160
- package/dist/sdk.d.mts +0 -2
- package/dist/sdk.mjs +0 -93
- package/index.ts +0 -159
- package/instagram/api.ts +0 -531
- package/instagram/base.ts +0 -275
- package/instagram/extractors.ts +0 -521
- package/instagram/index.ts +0 -43
- package/instagram/parsers.ts +0 -583
- package/instagram/types.ts +0 -244
- package/message.ts +0 -31
- package/types.ts +0 -115
- package/utils/id-codec.ts +0 -39
- package/utils/text.ts +0 -178
package/dist/node.d.mts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { b as Storage, n as InstagramSDK, o as Logger, p as HttpClient } from "./sdk-CK9x5wFL.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/node-factory.d.ts
|
|
4
|
+
/** Options for the Node.js convenience factory. */
|
|
5
|
+
interface CreateSDKOptions {
|
|
6
|
+
/**
|
|
7
|
+
* Full browser Cookie header string.
|
|
8
|
+
* Copy from DevTools → Network → Request Headers → Cookie.
|
|
9
|
+
* Auto-extracts csrftoken for X-CSRFToken header.
|
|
10
|
+
*
|
|
11
|
+
* Either ``cookies`` or ``http`` must be provided.
|
|
12
|
+
*/
|
|
13
|
+
cookies?: string;
|
|
14
|
+
/**
|
|
15
|
+
* Custom HttpClient implementation.
|
|
16
|
+
* If omitted, a Node.js axios-based client is created from ``cookies``.
|
|
17
|
+
*/
|
|
18
|
+
http?: HttpClient;
|
|
19
|
+
/**
|
|
20
|
+
* Custom Storage implementation for file output.
|
|
21
|
+
* Defaults to Node.js fs/promises-based storage.
|
|
22
|
+
*/
|
|
23
|
+
storage?: Storage;
|
|
24
|
+
/**
|
|
25
|
+
* Logger instance. Defaults to a silent no-op logger.
|
|
26
|
+
*/
|
|
27
|
+
log?: Logger;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Create an SDK instance with Node.js defaults.
|
|
31
|
+
*
|
|
32
|
+
* When ``cookies`` is provided, auto-creates an axios-based HttpClient
|
|
33
|
+
* with CSRF token extraction. Pass ``http`` directly for custom adapters.
|
|
34
|
+
*
|
|
35
|
+
* ```ts
|
|
36
|
+
* import { createSDK } from '@chilfish/gallery-dl-instagram/node'
|
|
37
|
+
*
|
|
38
|
+
* // Node.js with cookies
|
|
39
|
+
* const ig = await createSDK({ cookies: 'ds_user_id=...; sessionid=...' })
|
|
40
|
+
*
|
|
41
|
+
* // Custom http adapter (browser / Deno / Edge)
|
|
42
|
+
* const ig = await createSDK({ http: myHttpClient, storage: myStorage })
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
declare function createSDK(opts?: CreateSDKOptions): Promise<InstagramSDK>;
|
|
46
|
+
//#endregion
|
|
47
|
+
export { type CreateSDKOptions, createSDK };
|
package/dist/node.mjs
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { L as noopLogger, t as InstagramSDK } from "./sdk-Bn0VCUIT.mjs";
|
|
2
|
+
//#region src/node-factory.ts
|
|
3
|
+
/**
|
|
4
|
+
* Create an SDK instance with Node.js defaults.
|
|
5
|
+
*
|
|
6
|
+
* When ``cookies`` is provided, auto-creates an axios-based HttpClient
|
|
7
|
+
* with CSRF token extraction. Pass ``http`` directly for custom adapters.
|
|
8
|
+
*
|
|
9
|
+
* ```ts
|
|
10
|
+
* import { createSDK } from '@chilfish/gallery-dl-instagram/node'
|
|
11
|
+
*
|
|
12
|
+
* // Node.js with cookies
|
|
13
|
+
* const ig = await createSDK({ cookies: 'ds_user_id=...; sessionid=...' })
|
|
14
|
+
*
|
|
15
|
+
* // Custom http adapter (browser / Deno / Edge)
|
|
16
|
+
* const ig = await createSDK({ http: myHttpClient, storage: myStorage })
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
async function createSDK(opts = {}) {
|
|
20
|
+
const log = opts.log ?? noopLogger;
|
|
21
|
+
let http;
|
|
22
|
+
let storage = opts.storage;
|
|
23
|
+
let csrfToken = "";
|
|
24
|
+
if (opts.http) http = opts.http;
|
|
25
|
+
else if (opts.cookies) {
|
|
26
|
+
const { createHttpClient, extractCsrfFromCookies } = await import("./adapter-tSleX8Cr.mjs");
|
|
27
|
+
csrfToken = extractCsrfFromCookies(opts.cookies);
|
|
28
|
+
http = createHttpClient(void 0, opts.cookies, log);
|
|
29
|
+
if (!storage) {
|
|
30
|
+
const { createStorage } = await import("./storage-77hqz5Fi.mjs");
|
|
31
|
+
storage = createStorage();
|
|
32
|
+
}
|
|
33
|
+
} else throw new Error("Either \"cookies\" or \"http\" must be provided. Get cookies from browser DevTools → Application → Cookies → instagram.com");
|
|
34
|
+
return new InstagramSDK({
|
|
35
|
+
http,
|
|
36
|
+
storage,
|
|
37
|
+
log,
|
|
38
|
+
csrfToken
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
//#endregion
|
|
42
|
+
export { createSDK };
|
|
@@ -1,4 +1,58 @@
|
|
|
1
|
-
//#region
|
|
1
|
+
//#region src/config.ts
|
|
2
|
+
var ConfigManager = class {
|
|
3
|
+
data;
|
|
4
|
+
constructor(data = {}) {
|
|
5
|
+
this.data = data;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Read a value at a dot-path like ``'extractor.instagram.videos'``.
|
|
9
|
+
* Returns ``undefined`` when the path doesn't exist.
|
|
10
|
+
*/
|
|
11
|
+
get(path, defaultValue) {
|
|
12
|
+
const keys = path.split(".");
|
|
13
|
+
let node = this.data;
|
|
14
|
+
for (const key of keys) {
|
|
15
|
+
if (node == null || typeof node !== "object" || Array.isArray(node)) return defaultValue;
|
|
16
|
+
node = node[key];
|
|
17
|
+
}
|
|
18
|
+
if (node === void 0) return defaultValue;
|
|
19
|
+
return node;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Interpolate a config key through a hierarchy of paths.
|
|
23
|
+
*/
|
|
24
|
+
interpolate(cfgPath, key, defaultVal) {
|
|
25
|
+
let node = this.data;
|
|
26
|
+
for (let i = 0; i < cfgPath.length; i++) {
|
|
27
|
+
if (node != null && typeof node === "object" && !Array.isArray(node)) {
|
|
28
|
+
const v = node[key];
|
|
29
|
+
if (v !== void 0) return v;
|
|
30
|
+
}
|
|
31
|
+
if (node == null || typeof node !== "object" || Array.isArray(node)) break;
|
|
32
|
+
node = node[cfgPath[i]];
|
|
33
|
+
}
|
|
34
|
+
return defaultVal;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Mutate the config at a given dot-path.
|
|
38
|
+
*/
|
|
39
|
+
set(path, value) {
|
|
40
|
+
const keys = path.split(".");
|
|
41
|
+
let node = this.data;
|
|
42
|
+
for (let i = 0; i < keys.length - 1; i++) {
|
|
43
|
+
const key = keys[i];
|
|
44
|
+
let child = node[key];
|
|
45
|
+
if (child == null || typeof child !== "object" || Array.isArray(child)) {
|
|
46
|
+
child = {};
|
|
47
|
+
node[key] = child;
|
|
48
|
+
}
|
|
49
|
+
node = child;
|
|
50
|
+
}
|
|
51
|
+
node[keys[keys.length - 1]] = value;
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
//#endregion
|
|
55
|
+
//#region src/core/extractor.ts
|
|
2
56
|
/** A no-op logger */
|
|
3
57
|
const noopLogger = {
|
|
4
58
|
debug: () => {},
|
|
@@ -120,61 +174,7 @@ var Extractor = class {
|
|
|
120
174
|
}
|
|
121
175
|
};
|
|
122
176
|
//#endregion
|
|
123
|
-
//#region
|
|
124
|
-
var ConfigManager = class {
|
|
125
|
-
data;
|
|
126
|
-
constructor(data = {}) {
|
|
127
|
-
this.data = data;
|
|
128
|
-
}
|
|
129
|
-
/**
|
|
130
|
-
* Read a value at a dot-path like ``'extractor.instagram.videos'``.
|
|
131
|
-
* Returns ``undefined`` when the path doesn't exist.
|
|
132
|
-
*/
|
|
133
|
-
get(path, defaultValue) {
|
|
134
|
-
const keys = path.split(".");
|
|
135
|
-
let node = this.data;
|
|
136
|
-
for (const key of keys) {
|
|
137
|
-
if (node == null || typeof node !== "object" || Array.isArray(node)) return defaultValue;
|
|
138
|
-
node = node[key];
|
|
139
|
-
}
|
|
140
|
-
if (node === void 0) return defaultValue;
|
|
141
|
-
return node;
|
|
142
|
-
}
|
|
143
|
-
/**
|
|
144
|
-
* Interpolate a config key through a hierarchy of paths.
|
|
145
|
-
*/
|
|
146
|
-
interpolate(cfgPath, key, defaultVal) {
|
|
147
|
-
let node = this.data;
|
|
148
|
-
for (let i = 0; i < cfgPath.length; i++) {
|
|
149
|
-
if (node != null && typeof node === "object" && !Array.isArray(node)) {
|
|
150
|
-
const v = node[key];
|
|
151
|
-
if (v !== void 0) return v;
|
|
152
|
-
}
|
|
153
|
-
if (node == null || typeof node !== "object" || Array.isArray(node)) break;
|
|
154
|
-
node = node[cfgPath[i]];
|
|
155
|
-
}
|
|
156
|
-
return defaultVal;
|
|
157
|
-
}
|
|
158
|
-
/**
|
|
159
|
-
* Mutate the config at a given dot-path.
|
|
160
|
-
*/
|
|
161
|
-
set(path, value) {
|
|
162
|
-
const keys = path.split(".");
|
|
163
|
-
let node = this.data;
|
|
164
|
-
for (let i = 0; i < keys.length - 1; i++) {
|
|
165
|
-
const key = keys[i];
|
|
166
|
-
let child = node[key];
|
|
167
|
-
if (child == null || typeof child !== "object" || Array.isArray(child)) {
|
|
168
|
-
child = {};
|
|
169
|
-
node[key] = child;
|
|
170
|
-
}
|
|
171
|
-
node = child;
|
|
172
|
-
}
|
|
173
|
-
node[keys[keys.length - 1]] = value;
|
|
174
|
-
}
|
|
175
|
-
};
|
|
176
|
-
//#endregion
|
|
177
|
-
//#region core/job.ts
|
|
177
|
+
//#region src/core/job.ts
|
|
178
178
|
function formatBytes(bytes) {
|
|
179
179
|
if (bytes === 0) return "0 B";
|
|
180
180
|
const units = [
|
|
@@ -529,7 +529,29 @@ var PrintJob = class PrintJob extends Job {
|
|
|
529
529
|
}
|
|
530
530
|
};
|
|
531
531
|
//#endregion
|
|
532
|
-
//#region
|
|
532
|
+
//#region src/message.ts
|
|
533
|
+
function directory(metadata = {}) {
|
|
534
|
+
return {
|
|
535
|
+
type: "directory",
|
|
536
|
+
metadata
|
|
537
|
+
};
|
|
538
|
+
}
|
|
539
|
+
function url(u, metadata = {}) {
|
|
540
|
+
return {
|
|
541
|
+
type: "url",
|
|
542
|
+
url: u,
|
|
543
|
+
metadata
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
function queue(u, metadata = {}) {
|
|
547
|
+
return {
|
|
548
|
+
type: "queue",
|
|
549
|
+
url: u,
|
|
550
|
+
metadata
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
//#endregion
|
|
554
|
+
//#region src/utils/id-codec.ts
|
|
533
555
|
/**
|
|
534
556
|
* Instagram-style Base64-variant ID ↔ shortcode conversion.
|
|
535
557
|
*/
|
|
@@ -560,7 +582,138 @@ function shortcodeFromId(postId) {
|
|
|
560
582
|
return chars.reverse().join("");
|
|
561
583
|
}
|
|
562
584
|
//#endregion
|
|
563
|
-
//#region
|
|
585
|
+
//#region src/utils/text.ts
|
|
586
|
+
/**
|
|
587
|
+
* Text utilities ported from gallery-dl's ``text`` module.
|
|
588
|
+
*
|
|
589
|
+
* All functions are pure and environment-agnostic.
|
|
590
|
+
*/
|
|
591
|
+
/** String extraction */
|
|
592
|
+
/**
|
|
593
|
+
* Extract the substring between ``begin`` and ``end`` from ``txt``.
|
|
594
|
+
* Returns the substring or ``null`` if either delimiter is missing.
|
|
595
|
+
*/
|
|
596
|
+
function extract(txt, begin, end) {
|
|
597
|
+
const first = txt.indexOf(begin);
|
|
598
|
+
if (first < 0) return null;
|
|
599
|
+
const start = first + begin.length;
|
|
600
|
+
const last = txt.indexOf(end, start);
|
|
601
|
+
if (last < 0) return null;
|
|
602
|
+
return txt.slice(start, last);
|
|
603
|
+
}
|
|
604
|
+
/**
|
|
605
|
+
* Shorthand: same as ``extract`` but returns ``default_`` on failure.
|
|
606
|
+
* Mirrors the Python ``extr()`` function.
|
|
607
|
+
*/
|
|
608
|
+
function extr(txt, begin, end, default_ = "") {
|
|
609
|
+
return extract(txt, begin, end) ?? default_;
|
|
610
|
+
}
|
|
611
|
+
/** Unicode / HTML */
|
|
612
|
+
/**
|
|
613
|
+
* Decode ``\\uXXXX`` escape sequences in a string.
|
|
614
|
+
*/
|
|
615
|
+
function parseUnicodeEscapes$1(text) {
|
|
616
|
+
if (!text.includes("\\u")) return text;
|
|
617
|
+
return text.replace(/\\u([0-9a-fA-F]{4})/g, (_m, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* HTML entity decode.
|
|
621
|
+
*
|
|
622
|
+
* In Node.js we could use a DOM parser, but since this library is
|
|
623
|
+
* environment-agnostic we ship a minimal covering the common cases.
|
|
624
|
+
*/
|
|
625
|
+
const HTML_ENTITIES = {
|
|
626
|
+
"amp": "&",
|
|
627
|
+
"lt": "<",
|
|
628
|
+
"gt": ">",
|
|
629
|
+
"quot": "\"",
|
|
630
|
+
"apos": "'",
|
|
631
|
+
"nbsp": "\xA0",
|
|
632
|
+
"#x27": "'",
|
|
633
|
+
"#x2F": "/",
|
|
634
|
+
"#39": "'",
|
|
635
|
+
"#47": "/"
|
|
636
|
+
};
|
|
637
|
+
const RE_ENTITY = /&([^;]+);/g;
|
|
638
|
+
function unescape(text) {
|
|
639
|
+
return text.replace(RE_ENTITY, (m, name) => {
|
|
640
|
+
const ch = HTML_ENTITIES[name];
|
|
641
|
+
if (ch !== void 0) return ch;
|
|
642
|
+
if (name.startsWith("#")) {
|
|
643
|
+
const cp = name[1] === "x" || name[1] === "X" ? Number.parseInt(name.slice(2), 16) : Number.parseInt(name.slice(1), 10);
|
|
644
|
+
if (Number.isSafeInteger(cp)) return String.fromCodePoint(cp);
|
|
645
|
+
}
|
|
646
|
+
return m;
|
|
647
|
+
});
|
|
648
|
+
}
|
|
649
|
+
/** URL helpers */
|
|
650
|
+
/**
|
|
651
|
+
* URL-decode a string.
|
|
652
|
+
*/
|
|
653
|
+
function unquote(text) {
|
|
654
|
+
try {
|
|
655
|
+
return decodeURIComponent(text);
|
|
656
|
+
} catch {
|
|
657
|
+
return text.replace(/%[0-9a-f]{2}/gi, (m) => {
|
|
658
|
+
try {
|
|
659
|
+
return decodeURIComponent(m);
|
|
660
|
+
} catch {
|
|
661
|
+
return m;
|
|
662
|
+
}
|
|
663
|
+
});
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Ensure a URL starts with ``https://`` (or ``http://``).
|
|
668
|
+
*/
|
|
669
|
+
function ensureHttpScheme(url, scheme = "https://") {
|
|
670
|
+
if (!url) return url;
|
|
671
|
+
if (url.startsWith("https://") || url.startsWith("http://")) return url;
|
|
672
|
+
return scheme + url.replace(/^[/:]+/, "");
|
|
673
|
+
}
|
|
674
|
+
/**
|
|
675
|
+
* Extract filename + extension from a URL and write into ``meta``.
|
|
676
|
+
*/
|
|
677
|
+
function nameExtFromURL(url, meta) {
|
|
678
|
+
const filename = filenameFromURL(url);
|
|
679
|
+
const dot = filename.lastIndexOf(".");
|
|
680
|
+
if (dot > 0 && filename.length - dot - 1 <= 16) {
|
|
681
|
+
meta.filename = unquote(filename.slice(0, dot));
|
|
682
|
+
meta.extension = unquote(filename.slice(dot + 1)).toLowerCase();
|
|
683
|
+
} else {
|
|
684
|
+
meta.filename = unquote(filename);
|
|
685
|
+
meta.extension = "";
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
/**
|
|
689
|
+
* Extract the file-name portion of a URL (before query string).
|
|
690
|
+
*/
|
|
691
|
+
function filenameFromURL(url) {
|
|
692
|
+
try {
|
|
693
|
+
return url.split("?")[0].split("/").pop() ?? "";
|
|
694
|
+
} catch {
|
|
695
|
+
return "";
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
/**
|
|
699
|
+
* Parse an integer from a possibly-null value. Returns ``default_`` on failure.
|
|
700
|
+
*/
|
|
701
|
+
function parseInt(value, default_ = 0) {
|
|
702
|
+
if (value == null) return default_;
|
|
703
|
+
const n = typeof value === "number" ? value : Number.parseInt(String(value), 10);
|
|
704
|
+
return Number.isFinite(n) ? n : default_;
|
|
705
|
+
}
|
|
706
|
+
function tagRe(pattern) {
|
|
707
|
+
const re = new RegExp(pattern, "g");
|
|
708
|
+
return (text) => {
|
|
709
|
+
const matches = text.match(re);
|
|
710
|
+
return matches ? [...new Set(matches)] : [];
|
|
711
|
+
};
|
|
712
|
+
}
|
|
713
|
+
/** Pre-configured hashtag regex. */
|
|
714
|
+
const findTags = tagRe("#\\w+");
|
|
715
|
+
//#endregion
|
|
716
|
+
//#region src/instagram/api.ts
|
|
564
717
|
const APP_ID = "936619743392459";
|
|
565
718
|
const ASBD_ID = "129477";
|
|
566
719
|
var InstagramRestAPI = class {
|
|
@@ -900,160 +1053,7 @@ var InstagramRestAPI = class {
|
|
|
900
1053
|
}
|
|
901
1054
|
};
|
|
902
1055
|
//#endregion
|
|
903
|
-
//#region
|
|
904
|
-
function directory(metadata = {}) {
|
|
905
|
-
return {
|
|
906
|
-
type: "directory",
|
|
907
|
-
metadata
|
|
908
|
-
};
|
|
909
|
-
}
|
|
910
|
-
function url(u, metadata = {}) {
|
|
911
|
-
return {
|
|
912
|
-
type: "url",
|
|
913
|
-
url: u,
|
|
914
|
-
metadata
|
|
915
|
-
};
|
|
916
|
-
}
|
|
917
|
-
function queue(u, metadata = {}) {
|
|
918
|
-
return {
|
|
919
|
-
type: "queue",
|
|
920
|
-
url: u,
|
|
921
|
-
metadata
|
|
922
|
-
};
|
|
923
|
-
}
|
|
924
|
-
//#endregion
|
|
925
|
-
//#region utils/text.ts
|
|
926
|
-
/**
|
|
927
|
-
* Text utilities ported from gallery-dl's ``text`` module.
|
|
928
|
-
*
|
|
929
|
-
* All functions are pure and environment-agnostic.
|
|
930
|
-
*/
|
|
931
|
-
/** String extraction */
|
|
932
|
-
/**
|
|
933
|
-
* Extract the substring between ``begin`` and ``end`` from ``txt``.
|
|
934
|
-
* Returns the substring or ``null`` if either delimiter is missing.
|
|
935
|
-
*/
|
|
936
|
-
function extract(txt, begin, end) {
|
|
937
|
-
const first = txt.indexOf(begin);
|
|
938
|
-
if (first < 0) return null;
|
|
939
|
-
const start = first + begin.length;
|
|
940
|
-
const last = txt.indexOf(end, start);
|
|
941
|
-
if (last < 0) return null;
|
|
942
|
-
return txt.slice(start, last);
|
|
943
|
-
}
|
|
944
|
-
/**
|
|
945
|
-
* Shorthand: same as ``extract`` but returns ``default_`` on failure.
|
|
946
|
-
* Mirrors the Python ``extr()`` function.
|
|
947
|
-
*/
|
|
948
|
-
function extr(txt, begin, end, default_ = "") {
|
|
949
|
-
return extract(txt, begin, end) ?? default_;
|
|
950
|
-
}
|
|
951
|
-
/** Unicode / HTML */
|
|
952
|
-
/**
|
|
953
|
-
* Decode ``\\uXXXX`` escape sequences in a string.
|
|
954
|
-
*/
|
|
955
|
-
function parseUnicodeEscapes$1(text) {
|
|
956
|
-
if (!text.includes("\\u")) return text;
|
|
957
|
-
return text.replace(/\\u([0-9a-fA-F]{4})/g, (_m, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
|
|
958
|
-
}
|
|
959
|
-
/**
|
|
960
|
-
* HTML entity decode.
|
|
961
|
-
*
|
|
962
|
-
* In Node.js we could use a DOM parser, but since this library is
|
|
963
|
-
* environment-agnostic we ship a minimal covering the common cases.
|
|
964
|
-
*/
|
|
965
|
-
const HTML_ENTITIES = {
|
|
966
|
-
"amp": "&",
|
|
967
|
-
"lt": "<",
|
|
968
|
-
"gt": ">",
|
|
969
|
-
"quot": "\"",
|
|
970
|
-
"apos": "'",
|
|
971
|
-
"nbsp": "\xA0",
|
|
972
|
-
"#x27": "'",
|
|
973
|
-
"#x2F": "/",
|
|
974
|
-
"#39": "'",
|
|
975
|
-
"#47": "/"
|
|
976
|
-
};
|
|
977
|
-
const RE_ENTITY = /&([^;]+);/g;
|
|
978
|
-
function unescape(text) {
|
|
979
|
-
return text.replace(RE_ENTITY, (m, name) => {
|
|
980
|
-
const ch = HTML_ENTITIES[name];
|
|
981
|
-
if (ch !== void 0) return ch;
|
|
982
|
-
if (name.startsWith("#")) {
|
|
983
|
-
const cp = name[1] === "x" || name[1] === "X" ? Number.parseInt(name.slice(2), 16) : Number.parseInt(name.slice(1), 10);
|
|
984
|
-
if (Number.isSafeInteger(cp)) return String.fromCodePoint(cp);
|
|
985
|
-
}
|
|
986
|
-
return m;
|
|
987
|
-
});
|
|
988
|
-
}
|
|
989
|
-
/** URL helpers */
|
|
990
|
-
/**
|
|
991
|
-
* URL-decode a string.
|
|
992
|
-
*/
|
|
993
|
-
function unquote(text) {
|
|
994
|
-
try {
|
|
995
|
-
return decodeURIComponent(text);
|
|
996
|
-
} catch {
|
|
997
|
-
return text.replace(/%[0-9a-f]{2}/gi, (m) => {
|
|
998
|
-
try {
|
|
999
|
-
return decodeURIComponent(m);
|
|
1000
|
-
} catch {
|
|
1001
|
-
return m;
|
|
1002
|
-
}
|
|
1003
|
-
});
|
|
1004
|
-
}
|
|
1005
|
-
}
|
|
1006
|
-
/**
|
|
1007
|
-
* Ensure a URL starts with ``https://`` (or ``http://``).
|
|
1008
|
-
*/
|
|
1009
|
-
function ensureHttpScheme(url, scheme = "https://") {
|
|
1010
|
-
if (!url) return url;
|
|
1011
|
-
if (url.startsWith("https://") || url.startsWith("http://")) return url;
|
|
1012
|
-
return scheme + url.replace(/^[/:]+/, "");
|
|
1013
|
-
}
|
|
1014
|
-
/**
|
|
1015
|
-
* Extract filename + extension from a URL and write into ``meta``.
|
|
1016
|
-
*/
|
|
1017
|
-
function nameExtFromURL(url, meta) {
|
|
1018
|
-
const filename = filenameFromURL(url);
|
|
1019
|
-
const dot = filename.lastIndexOf(".");
|
|
1020
|
-
if (dot > 0 && filename.length - dot - 1 <= 16) {
|
|
1021
|
-
meta.filename = unquote(filename.slice(0, dot));
|
|
1022
|
-
meta.extension = unquote(filename.slice(dot + 1)).toLowerCase();
|
|
1023
|
-
} else {
|
|
1024
|
-
meta.filename = unquote(filename);
|
|
1025
|
-
meta.extension = "";
|
|
1026
|
-
}
|
|
1027
|
-
}
|
|
1028
|
-
/**
|
|
1029
|
-
* Extract the file-name portion of a URL (before query string).
|
|
1030
|
-
*/
|
|
1031
|
-
function filenameFromURL(url) {
|
|
1032
|
-
try {
|
|
1033
|
-
return url.split("?")[0].split("/").pop() ?? "";
|
|
1034
|
-
} catch {
|
|
1035
|
-
return "";
|
|
1036
|
-
}
|
|
1037
|
-
}
|
|
1038
|
-
/**
|
|
1039
|
-
* Parse an integer from a possibly-null value. Returns ``default_`` on failure.
|
|
1040
|
-
*/
|
|
1041
|
-
function parseInt(value, default_ = 0) {
|
|
1042
|
-
if (value == null) return default_;
|
|
1043
|
-
const n = typeof value === "number" ? value : Number.parseInt(String(value), 10);
|
|
1044
|
-
return Number.isFinite(n) ? n : default_;
|
|
1045
|
-
}
|
|
1046
|
-
function tagRe(pattern) {
|
|
1047
|
-
const re = new RegExp(pattern, "g");
|
|
1048
|
-
return (text) => {
|
|
1049
|
-
const matches = text.match(re);
|
|
1050
|
-
return matches ? [...new Set(matches)] : [];
|
|
1051
|
-
};
|
|
1052
|
-
}
|
|
1053
|
-
/** Pre-configured hashtag regex. */
|
|
1054
|
-
const findTags = tagRe("#\\w+");
|
|
1055
|
-
//#endregion
|
|
1056
|
-
//#region instagram/parsers.ts
|
|
1056
|
+
//#region src/instagram/parsers.ts
|
|
1057
1057
|
/** Main entry — REST */
|
|
1058
1058
|
function parsePostRest(post, cfg) {
|
|
1059
1059
|
if (post.items) return parseStoryRest(post, cfg);
|
|
@@ -1405,7 +1405,7 @@ function parseUnicodeEscapes(text) {
|
|
|
1405
1405
|
return text.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)));
|
|
1406
1406
|
}
|
|
1407
1407
|
//#endregion
|
|
1408
|
-
//#region instagram/base.ts
|
|
1408
|
+
//#region src/instagram/base.ts
|
|
1409
1409
|
var Ref = class {
|
|
1410
1410
|
value;
|
|
1411
1411
|
constructor(v) {
|
|
@@ -1558,7 +1558,7 @@ var InstagramExtractor = class extends Extractor {
|
|
|
1558
1558
|
}
|
|
1559
1559
|
};
|
|
1560
1560
|
//#endregion
|
|
1561
|
-
//#region instagram/extractors.ts
|
|
1561
|
+
//#region src/instagram/extractors.ts
|
|
1562
1562
|
const BASE_RE = /^(?:https?:\/\/)?(?:www\.)?instagram\.com/;
|
|
1563
1563
|
function re(base, path) {
|
|
1564
1564
|
const pathSrc = typeof path === "string" ? path : path.source;
|
|
@@ -1940,4 +1940,80 @@ var InstagramSavedExtractor = class InstagramSavedExtractor extends InstagramExt
|
|
|
1940
1940
|
}
|
|
1941
1941
|
};
|
|
1942
1942
|
//#endregion
|
|
1943
|
-
|
|
1943
|
+
//#region src/sdk.ts
|
|
1944
|
+
var InstagramSDK = class {
|
|
1945
|
+
http;
|
|
1946
|
+
storage;
|
|
1947
|
+
log;
|
|
1948
|
+
config;
|
|
1949
|
+
_csrfToken;
|
|
1950
|
+
constructor(opts) {
|
|
1951
|
+
this.http = opts.http;
|
|
1952
|
+
this.storage = opts.storage ?? void 0;
|
|
1953
|
+
this.log = opts.log ?? noopLogger;
|
|
1954
|
+
this.config = new ConfigManager();
|
|
1955
|
+
this._csrfToken = opts.csrfToken ?? "";
|
|
1956
|
+
}
|
|
1957
|
+
/**
|
|
1958
|
+
* Extract messages from an Instagram URL without downloading.
|
|
1959
|
+
*
|
|
1960
|
+
* Returns an async generator yielding Directory / Url / Queue messages.
|
|
1961
|
+
* Each ``url`` message includes full metadata (post_id, username, dimensions, etc.).
|
|
1962
|
+
*/
|
|
1963
|
+
async *extract(url) {
|
|
1964
|
+
const extractor = this._resolve(url);
|
|
1965
|
+
await extractor.initialize();
|
|
1966
|
+
yield* extractor;
|
|
1967
|
+
}
|
|
1968
|
+
/**
|
|
1969
|
+
* Download all media from an Instagram URL.
|
|
1970
|
+
*
|
|
1971
|
+
* Uses the built-in DownloadJob + Storage to save files to disk.
|
|
1972
|
+
* Requires ``storage`` to be set in constructor options.
|
|
1973
|
+
*
|
|
1974
|
+
* ```ts
|
|
1975
|
+
* const stats = await ig.download('https://www.instagram.com/p/.../', './my-downloads')
|
|
1976
|
+
* // → { posts: 1, files: 9, bytes: 4500000 }
|
|
1977
|
+
* ```
|
|
1978
|
+
*/
|
|
1979
|
+
async download(url, outputDir = "./data") {
|
|
1980
|
+
const job = new DownloadJob(this._resolve(url));
|
|
1981
|
+
job.basePath = outputDir;
|
|
1982
|
+
await job.run();
|
|
1983
|
+
return {
|
|
1984
|
+
posts: job._postCount ?? 0,
|
|
1985
|
+
files: job._fileCount ?? 0,
|
|
1986
|
+
bytes: job._downloadedBytes ?? 0
|
|
1987
|
+
};
|
|
1988
|
+
}
|
|
1989
|
+
/** Resolve a URL to an Extractor instance via pattern matching. */
|
|
1990
|
+
_resolve(url) {
|
|
1991
|
+
for (const Cls of [
|
|
1992
|
+
InstagramPostExtractor,
|
|
1993
|
+
InstagramStoriesExtractor,
|
|
1994
|
+
InstagramHighlightsExtractor,
|
|
1995
|
+
InstagramTagExtractor,
|
|
1996
|
+
InstagramSavedExtractor,
|
|
1997
|
+
InstagramPostsExtractor,
|
|
1998
|
+
InstagramReelsExtractor,
|
|
1999
|
+
InstagramTaggedExtractor,
|
|
2000
|
+
InstagramInfoExtractor,
|
|
2001
|
+
InstagramAvatarExtractor,
|
|
2002
|
+
InstagramUserExtractor
|
|
2003
|
+
]) {
|
|
2004
|
+
const match = Cls.pattern.exec(url);
|
|
2005
|
+
if (match) return Reflect.construct(Cls, [{
|
|
2006
|
+
url,
|
|
2007
|
+
match,
|
|
2008
|
+
config: this.config,
|
|
2009
|
+
http: this.http,
|
|
2010
|
+
storage: this.storage,
|
|
2011
|
+
log: this.log,
|
|
2012
|
+
csrfToken: this._csrfToken
|
|
2013
|
+
}]);
|
|
2014
|
+
}
|
|
2015
|
+
throw new Error(`No extractor matched URL: ${url}. Supported: /p/, /reel/, /{user}/, /stories/, /highlights/, /explore/tags/, /saved/`);
|
|
2016
|
+
}
|
|
2017
|
+
};
|
|
2018
|
+
//#endregion
|
|
2019
|
+
export { directory as A, nameExtFromURL as C, unquote as D, unescape as E, PrintJob as F, Extractor as I, noopLogger as L, url as M, DownloadJob as N, idFromShortcode as O, Job as P, ConfigManager as R, findTags as S, parseUnicodeEscapes$1 as T, parsePostRest as _, InstagramPostExtractor as a, extr as b, InstagramSavedExtractor as c, InstagramTaggedExtractor as d, InstagramUserExtractor as f, parsePostGraphql as g, extractTaggedUsers as h, InstagramInfoExtractor as i, queue as j, shortcodeFromId as k, InstagramStoriesExtractor as l, extractAudio as m, InstagramAvatarExtractor as n, InstagramPostsExtractor as o, InstagramExtractor as p, InstagramHighlightsExtractor as r, InstagramReelsExtractor as s, InstagramSDK as t, InstagramTagExtractor as u, InstagramRestAPI as v, parseInt as w, extract as x, ensureHttpScheme as y };
|