@chilfish/gallery-dl-instagram 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/adapter.ts +284 -0
- package/cli/cookies.ts +59 -0
- package/cli/index.ts +337 -0
- package/config.ts +80 -0
- package/core/extractor.ts +217 -0
- package/core/job.ts +581 -0
- package/dist/adapter-Bt86eL1R.mjs +189 -0
- package/dist/cli/index.d.mts +1 -0
- package/dist/cli/index.mjs +3160 -0
- package/dist/extractors-Byw-2lPL.mjs +1943 -0
- package/dist/index.d.mts +187 -0
- package/dist/index.mjs +40 -0
- package/dist/sdk-B9fRyc1e.d.mts +737 -0
- package/dist/sdk.d.mts +2 -0
- package/dist/sdk.mjs +93 -0
- package/index.ts +159 -0
- package/instagram/api.ts +531 -0
- package/instagram/base.ts +275 -0
- package/instagram/extractors.ts +521 -0
- package/instagram/index.ts +43 -0
- package/instagram/parsers.ts +583 -0
- package/instagram/types.ts +244 -0
- package/message.ts +31 -0
- package/package.json +68 -0
- package/types.ts +115 -0
- package/utils/id-codec.ts +39 -0
- package/utils/text.ts +178 -0
package/cli/adapter.ts
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Node.js adapter — HttpClient + Storage + Logger implementations.
|
|
3
|
+
*
|
|
4
|
+
* These bind the platform-agnostic SDK interfaces to Node.js primitives:
|
|
5
|
+
* axios for HTTP, fs/promises for file I/O, console for logging.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { AxiosInstance } from 'axios'
|
|
9
|
+
import type { Logger } from '../core/extractor'
|
|
10
|
+
import type { HttpClient, HttpResponse, Storage } from '../types'
|
|
11
|
+
import { access, mkdir, writeFile } from 'node:fs/promises'
|
|
12
|
+
import { dirname } from 'node:path'
|
|
13
|
+
import axios from 'axios'
|
|
14
|
+
import { createCookieJar } from './cookies'
|
|
15
|
+
|
|
16
|
+
/** NodeHttpClient — axios wrapper */
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Extract csrftoken value from a Cookie header string.
|
|
20
|
+
*/
|
|
21
|
+
export function extractCsrfFromCookies(cookies: string): string {
|
|
22
|
+
const m = cookies.match(/(?:^|;\s*)csrftoken=([^;]+)/)
|
|
23
|
+
return m?.[1] ?? ''
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function createHttpClient(
|
|
27
|
+
sessionId?: string,
|
|
28
|
+
fullCookies?: string,
|
|
29
|
+
logger?: Logger,
|
|
30
|
+
): HttpClient {
|
|
31
|
+
const instance: AxiosInstance = axios.create({
|
|
32
|
+
timeout: 30000,
|
|
33
|
+
maxRedirects: 20,
|
|
34
|
+
validateStatus: () => true, // don't throw on non-2xx
|
|
35
|
+
headers: {
|
|
36
|
+
'User-Agent':
|
|
37
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
38
|
+
},
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
// Build base Cookie string
|
|
42
|
+
// Priority: full cookies string > sessionid-only
|
|
43
|
+
const baseCookie = fullCookies
|
|
44
|
+
|| (sessionId ? `sessionid=${sessionId}` : null)
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
async request<T = unknown>(config: {
|
|
48
|
+
url: string
|
|
49
|
+
method?: string
|
|
50
|
+
headers?: Record<string, string>
|
|
51
|
+
params?: Record<string, string | number | null | undefined>
|
|
52
|
+
data?: unknown
|
|
53
|
+
signal?: AbortSignal
|
|
54
|
+
timeout?: number
|
|
55
|
+
responseType?: 'arraybuffer' | 'text' | 'json'
|
|
56
|
+
}): Promise<HttpResponse<T>> {
|
|
57
|
+
const method = config.method ?? 'GET'
|
|
58
|
+
logger?.debug(`${method} ${config.url}`)
|
|
59
|
+
|
|
60
|
+
// Merge base cookie with per-request headers (Cookie values are joined)
|
|
61
|
+
const mergedHeaders: Record<string, string> = {}
|
|
62
|
+
if (baseCookie) {
|
|
63
|
+
mergedHeaders.Cookie = baseCookie
|
|
64
|
+
}
|
|
65
|
+
if (config.headers) {
|
|
66
|
+
for (const [k, v] of Object.entries(config.headers)) {
|
|
67
|
+
if (k.toLowerCase() === 'cookie' && mergedHeaders.Cookie) {
|
|
68
|
+
// Append instead of overriding: sessionid=xxx + csrftoken=yyy → sessionid=xxx; csrftoken=yyy
|
|
69
|
+
mergedHeaders.Cookie = `${mergedHeaders.Cookie}; ${v}`
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
mergedHeaders[k] = v
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (mergedHeaders.Cookie) {
|
|
78
|
+
logger?.debug(` Cookie: ${mergedHeaders.Cookie.slice(0, 200)}`)
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
const resp = await instance.request<T>({
|
|
83
|
+
url: config.url,
|
|
84
|
+
method,
|
|
85
|
+
headers: mergedHeaders,
|
|
86
|
+
params: cleanupParams(config.params),
|
|
87
|
+
data: config.data,
|
|
88
|
+
signal: config.signal,
|
|
89
|
+
timeout: config.timeout,
|
|
90
|
+
responseType: config.responseType ?? 'json',
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
const finalUrl = resp.request?.res?.responseUrl ?? config.url
|
|
94
|
+
logger?.debug(` ← ${resp.status} ${resp.status >= 400 ? '⚠️' : ''} (${finalUrl.slice(0, 100)})`)
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
status: resp.status,
|
|
98
|
+
data: resp.data,
|
|
99
|
+
headers: resp.headers as Record<string, string>,
|
|
100
|
+
url: finalUrl,
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
catch (err) {
|
|
104
|
+
const msg = String(err)
|
|
105
|
+
if (msg.includes('TOO_MANY_REDIRECTS') || msg.includes('too many redirects')) {
|
|
106
|
+
throw new Error(
|
|
107
|
+
'Too many redirects — sessionid may be expired or invalid. '
|
|
108
|
+
+ 'Export a fresh sessionid from your browser.',
|
|
109
|
+
)
|
|
110
|
+
}
|
|
111
|
+
throw err
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/** WebClient — anonymous cookie-jar HTTP client */
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Create an HTTP client with an in-memory cookie jar.
|
|
121
|
+
*
|
|
122
|
+
* Use this when you don't have a sessionid — the client first seeds its
|
|
123
|
+
* cookie jar by visiting ``instagram.com``, then uses those anonymous
|
|
124
|
+
* cookies for subsequent API calls. This is how incognito browsing works.
|
|
125
|
+
*
|
|
126
|
+
* Returns the client + the initial CSRF token extracted from cookies.
|
|
127
|
+
*/
|
|
128
|
+
export async function createWebClient(
|
|
129
|
+
logger?: Logger,
|
|
130
|
+
): Promise<{ http: HttpClient, csrfToken: string }> {
|
|
131
|
+
const jar = createCookieJar()
|
|
132
|
+
|
|
133
|
+
// Seed the cookie jar by visiting Instagram's homepage
|
|
134
|
+
logger?.info('Seeding anonymous session (visiting instagram.com)…')
|
|
135
|
+
const seedResp = await axios.get('https://www.instagram.com/', {
|
|
136
|
+
headers: {
|
|
137
|
+
'User-Agent':
|
|
138
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
139
|
+
'Accept':
|
|
140
|
+
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
141
|
+
},
|
|
142
|
+
maxRedirects: 20,
|
|
143
|
+
validateStatus: () => true,
|
|
144
|
+
})
|
|
145
|
+
jar.setFromResponse(seedResp.headers as Record<string, string>)
|
|
146
|
+
logger?.debug(` ← ${seedResp.status} — got ${jar.getCookieHeader().split(';').length} cookies`)
|
|
147
|
+
|
|
148
|
+
// Extract csrftoken from jar
|
|
149
|
+
const cookieStr = jar.getCookieHeader()
|
|
150
|
+
const csrfMatch = cookieStr.match(/(?:^|;\s*)csrftoken=([^;]+)/)
|
|
151
|
+
const csrfToken = csrfMatch?.[1] ?? ''
|
|
152
|
+
|
|
153
|
+
// Build the wrapper
|
|
154
|
+
const http: HttpClient = {
|
|
155
|
+
async request<T = unknown>(config: {
|
|
156
|
+
url: string
|
|
157
|
+
method?: string
|
|
158
|
+
headers?: Record<string, string>
|
|
159
|
+
params?: Record<string, string | number | null | undefined>
|
|
160
|
+
data?: unknown
|
|
161
|
+
signal?: AbortSignal
|
|
162
|
+
timeout?: number
|
|
163
|
+
responseType?: 'arraybuffer' | 'text' | 'json'
|
|
164
|
+
}): Promise<HttpResponse<T>> {
|
|
165
|
+
const method = config.method ?? 'GET'
|
|
166
|
+
logger?.debug(`${method} ${config.url}`)
|
|
167
|
+
|
|
168
|
+
// Merge jar cookies with per-request headers (Cookie values are joined)
|
|
169
|
+
const jarCookie = jar.getCookieHeader()
|
|
170
|
+
const mergedHeaders: Record<string, string> = {}
|
|
171
|
+
if (jarCookie) {
|
|
172
|
+
mergedHeaders.Cookie = jarCookie
|
|
173
|
+
}
|
|
174
|
+
if (config.headers) {
|
|
175
|
+
for (const [k, v] of Object.entries(config.headers)) {
|
|
176
|
+
if (k.toLowerCase() === 'cookie' && mergedHeaders.Cookie) {
|
|
177
|
+
mergedHeaders.Cookie = `${mergedHeaders.Cookie}; ${v}`
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
mergedHeaders[k] = v
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
try {
|
|
186
|
+
const resp = await axios.request<T>({
|
|
187
|
+
url: config.url,
|
|
188
|
+
method,
|
|
189
|
+
headers: mergedHeaders,
|
|
190
|
+
params: cleanupParams(config.params),
|
|
191
|
+
data: config.data,
|
|
192
|
+
signal: config.signal,
|
|
193
|
+
timeout: config.timeout ?? 30000,
|
|
194
|
+
maxRedirects: 20,
|
|
195
|
+
validateStatus: () => true,
|
|
196
|
+
responseType: config.responseType ?? 'json',
|
|
197
|
+
})
|
|
198
|
+
|
|
199
|
+
// Update jar with response cookies
|
|
200
|
+
jar.setFromResponse(resp.headers as Record<string, string>)
|
|
201
|
+
|
|
202
|
+
const finalUrl = resp.request?.res?.responseUrl ?? config.url
|
|
203
|
+
logger?.debug(` ← ${resp.status} ${resp.status >= 400 ? '⚠️' : ''} (${finalUrl.slice(0, 100)})`)
|
|
204
|
+
|
|
205
|
+
return {
|
|
206
|
+
status: resp.status,
|
|
207
|
+
data: resp.data,
|
|
208
|
+
headers: resp.headers as Record<string, string>,
|
|
209
|
+
url: finalUrl,
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
catch (err) {
|
|
213
|
+
const msg = String(err)
|
|
214
|
+
if (msg.includes('TOO_MANY_REDIRECTS') || msg.includes('too many redirects')) {
|
|
215
|
+
throw new Error(
|
|
216
|
+
'Too many redirects — Instagram may be blocking the request. Try again later or use --sessionid.',
|
|
217
|
+
)
|
|
218
|
+
}
|
|
219
|
+
throw err
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return { http, csrfToken }
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function cleanupParams(
|
|
228
|
+
params?: Record<string, string | number | null | undefined>,
|
|
229
|
+
): Record<string, string> | undefined {
|
|
230
|
+
if (!params)
|
|
231
|
+
return undefined
|
|
232
|
+
const cleaned: Record<string, string> = {}
|
|
233
|
+
for (const [k, v] of Object.entries(params)) {
|
|
234
|
+
if (v != null)
|
|
235
|
+
cleaned[k] = String(v)
|
|
236
|
+
}
|
|
237
|
+
return cleaned
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/** NodeStorage — fs/promises wrapper */
|
|
241
|
+
|
|
242
|
+
export function createStorage(): Storage {
|
|
243
|
+
return {
|
|
244
|
+
async exists(path: string): Promise<boolean> {
|
|
245
|
+
try {
|
|
246
|
+
await access(path)
|
|
247
|
+
return true
|
|
248
|
+
}
|
|
249
|
+
catch {
|
|
250
|
+
return false
|
|
251
|
+
}
|
|
252
|
+
},
|
|
253
|
+
|
|
254
|
+
async write(path: string, data: Uint8Array | string): Promise<void> {
|
|
255
|
+
// Ensure parent directory
|
|
256
|
+
await mkdir(dirname(path), { recursive: true })
|
|
257
|
+
await writeFile(path, data)
|
|
258
|
+
},
|
|
259
|
+
|
|
260
|
+
async mkdir(path: string): Promise<void> {
|
|
261
|
+
await mkdir(path, { recursive: true })
|
|
262
|
+
},
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/** NodeLogger — console wrapper */
|
|
267
|
+
|
|
268
|
+
export function createLogger(verbose: boolean): Logger {
|
|
269
|
+
return {
|
|
270
|
+
debug(message: string, ...args: unknown[]): void {
|
|
271
|
+
if (verbose)
|
|
272
|
+
console.debug(`[debug] ${message}`, ...args)
|
|
273
|
+
},
|
|
274
|
+
info(message: string, ...args: unknown[]): void {
|
|
275
|
+
console.info(`[info] ${message}`, ...args)
|
|
276
|
+
},
|
|
277
|
+
warn(message: string, ...args: unknown[]): void {
|
|
278
|
+
console.warn(`[warn] ${message}`, ...args)
|
|
279
|
+
},
|
|
280
|
+
error(message: string, ...args: unknown[]): void {
|
|
281
|
+
console.error(`[error] ${message}`, ...args)
|
|
282
|
+
},
|
|
283
|
+
}
|
|
284
|
+
}
|
package/cli/cookies.ts
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Simple in-memory cookie jar for Node.js HTTP clients.
|
|
3
|
+
*
|
|
4
|
+
* Tracks Set-Cookie headers across requests and injects stored cookies
|
|
5
|
+
* into subsequent requests to the same domain. This is enough for
|
|
6
|
+
* Instagram's anonymous session flow: visit homepage → get cookies →
|
|
7
|
+
* use those cookies for API calls.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export interface CookieJar {
|
|
11
|
+
/** Record cookies from a response's Set-Cookie header(s). */
|
|
12
|
+
setFromResponse: (headers: Record<string, string>) => void
|
|
13
|
+
/** Get the Cookie header value for the next request. */
|
|
14
|
+
getCookieHeader: () => string
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function createCookieJar(): CookieJar {
|
|
18
|
+
const cookies = new Map<string, string>()
|
|
19
|
+
let cookieString = ''
|
|
20
|
+
|
|
21
|
+
return {
|
|
22
|
+
setFromResponse(headers: Record<string, string>): void {
|
|
23
|
+
const raw = headers['set-cookie']
|
|
24
|
+
if (!raw)
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
const cookieHeaders = Array.isArray(raw) ? raw : [raw]
|
|
28
|
+
for (const header of cookieHeaders) {
|
|
29
|
+
const parts = header.split(';')
|
|
30
|
+
if (parts.length === 0)
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
const [nameValue] = parts
|
|
34
|
+
const eqIdx = nameValue!.indexOf('=')
|
|
35
|
+
if (eqIdx <= 0)
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
const name = nameValue!.slice(0, eqIdx).trim()
|
|
39
|
+
const value = nameValue!.slice(eqIdx + 1).trim()
|
|
40
|
+
|
|
41
|
+
// Skip expiration by checking Max-Age=0 or Expires=epoch
|
|
42
|
+
const rest = parts.slice(1).map((s: string) => s.trim().toLowerCase())
|
|
43
|
+
if (rest.includes('max-age=0'))
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
cookies.set(name, value)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Rebuild cookie string
|
|
50
|
+
cookieString = Array.from(cookies.entries())
|
|
51
|
+
.map(([k, v]) => `${k}=${v}`)
|
|
52
|
+
.join('; ')
|
|
53
|
+
},
|
|
54
|
+
|
|
55
|
+
getCookieHeader(): string {
|
|
56
|
+
return cookieString
|
|
57
|
+
},
|
|
58
|
+
}
|
|
59
|
+
}
|
package/cli/index.ts
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* gdl-instagram — CLI entry point.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* gdl-instagram <url> [options] ← auto-detect from URL
|
|
7
|
+
* gdl-instagram tag <hashtag> [options]
|
|
8
|
+
* gdl-instagram saved [options]
|
|
9
|
+
*
|
|
10
|
+
* Every option is self-documented via ``--help``.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { Command } from 'commander'
|
|
14
|
+
import { ConfigManager } from '../config'
|
|
15
|
+
import { DownloadJob, PrintJob } from '../core/job'
|
|
16
|
+
import {
|
|
17
|
+
InstagramHighlightsExtractor,
|
|
18
|
+
InstagramPostExtractor,
|
|
19
|
+
InstagramSavedExtractor,
|
|
20
|
+
InstagramStoriesExtractor,
|
|
21
|
+
InstagramTagExtractor,
|
|
22
|
+
InstagramUserExtractor,
|
|
23
|
+
} from '../instagram/extractors'
|
|
24
|
+
import { createHttpClient, createLogger, createStorage, createWebClient, extractCsrfFromCookies } from './adapter'
|
|
25
|
+
|
|
26
|
+
/** Shared options — applied to all subcommands */
|
|
27
|
+
|
|
28
|
+
interface GlobalOptions {
|
|
29
|
+
sessionid?: string
|
|
30
|
+
cookies?: string
|
|
31
|
+
output?: string
|
|
32
|
+
videos?: string
|
|
33
|
+
previews?: string
|
|
34
|
+
audio?: boolean
|
|
35
|
+
maxPosts?: number
|
|
36
|
+
cursor?: string
|
|
37
|
+
orderPosts?: string
|
|
38
|
+
orderFiles?: string
|
|
39
|
+
staticVideos?: boolean
|
|
40
|
+
api?: string
|
|
41
|
+
verbose?: boolean
|
|
42
|
+
include?: string
|
|
43
|
+
split?: boolean
|
|
44
|
+
info?: boolean
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function addSharedOptions(cmd: Command): Command {
|
|
48
|
+
return cmd
|
|
49
|
+
.option(
|
|
50
|
+
'--sessionid <cookie>',
|
|
51
|
+
'Instagram sessionid cookie value (from browser)',
|
|
52
|
+
process.env.INSTAGRAM_SESSIONID,
|
|
53
|
+
)
|
|
54
|
+
.option(
|
|
55
|
+
'--cookies <string>',
|
|
56
|
+
'Full Cookie header string from browser (DevTools → Network → Request Headers → Cookie)',
|
|
57
|
+
process.env.INSTAGRAM_COOKIES,
|
|
58
|
+
)
|
|
59
|
+
.option('-o, --output <dir>', 'Output directory', './data')
|
|
60
|
+
.option(
|
|
61
|
+
'--videos <mode>',
|
|
62
|
+
'Download videos: true, false, or merged (yt-dlp)',
|
|
63
|
+
'true',
|
|
64
|
+
)
|
|
65
|
+
.option(
|
|
66
|
+
'--previews <types>',
|
|
67
|
+
'Download only previews: video,audio (comma-separated)',
|
|
68
|
+
)
|
|
69
|
+
.option('--audio', 'Download standalone audio tracks', false)
|
|
70
|
+
.option('--max-posts <n>', 'Maximum number of posts to download', Number.parseInt)
|
|
71
|
+
.option(
|
|
72
|
+
'--cursor <cursor>',
|
|
73
|
+
'Resume from pagination cursor (see output of previous run)',
|
|
74
|
+
)
|
|
75
|
+
.option(
|
|
76
|
+
'--order-posts <order>',
|
|
77
|
+
'Post ordering: asc, desc, id, id_asc, id_desc',
|
|
78
|
+
)
|
|
79
|
+
.option(
|
|
80
|
+
'--order-files <order>',
|
|
81
|
+
'File ordering: r, d (reverse), or empty for natural order',
|
|
82
|
+
)
|
|
83
|
+
.option(
|
|
84
|
+
'--static-videos',
|
|
85
|
+
'Download static video cover images instead of actual videos',
|
|
86
|
+
false,
|
|
87
|
+
)
|
|
88
|
+
.option(
|
|
89
|
+
'--no-static-videos',
|
|
90
|
+
'Do not force static video covers (download real videos)',
|
|
91
|
+
)
|
|
92
|
+
.option('--api <backend>', 'API backend: rest (default) or graphql', 'rest')
|
|
93
|
+
.option('-v, --verbose', 'Verbose debug output', false)
|
|
94
|
+
.option(
|
|
95
|
+
'--include <list>',
|
|
96
|
+
'For user: comma-separated sub-extractors (posts,reels,tagged,stories,highlights,info,avatar)',
|
|
97
|
+
'posts',
|
|
98
|
+
)
|
|
99
|
+
.option(
|
|
100
|
+
'--split',
|
|
101
|
+
'For stories: split each frame into a separate post',
|
|
102
|
+
false,
|
|
103
|
+
)
|
|
104
|
+
.option(
|
|
105
|
+
'-i, --info',
|
|
106
|
+
'Print structured post info to terminal (no download)',
|
|
107
|
+
false,
|
|
108
|
+
)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** Build config from parsed options */
|
|
112
|
+
|
|
113
|
+
function buildConfig(opts: GlobalOptions): ConfigManager {
|
|
114
|
+
const config = new ConfigManager()
|
|
115
|
+
|
|
116
|
+
const ig: Record<string, unknown> = {}
|
|
117
|
+
|
|
118
|
+
if (opts.videos)
|
|
119
|
+
ig.videos = opts.videos
|
|
120
|
+
if (opts.previews)
|
|
121
|
+
ig.previews = opts.previews.split(',')
|
|
122
|
+
if (opts.audio)
|
|
123
|
+
ig.audio = true
|
|
124
|
+
if (opts.maxPosts)
|
|
125
|
+
ig['max-posts'] = opts.maxPosts
|
|
126
|
+
if (opts.cursor)
|
|
127
|
+
ig.cursor = opts.cursor
|
|
128
|
+
if (opts.orderPosts)
|
|
129
|
+
ig['order-posts'] = opts.orderPosts
|
|
130
|
+
if (opts.orderFiles)
|
|
131
|
+
ig['order-files'] = opts.orderFiles
|
|
132
|
+
if (opts.staticVideos)
|
|
133
|
+
ig['static-videos'] = true
|
|
134
|
+
if (opts.api)
|
|
135
|
+
ig.api = opts.api
|
|
136
|
+
if (opts.include)
|
|
137
|
+
ig.include = opts.include
|
|
138
|
+
if (opts.split)
|
|
139
|
+
ig.split = true
|
|
140
|
+
|
|
141
|
+
if (Object.keys(ig).length > 0) {
|
|
142
|
+
config.set('extractor.instagram', ig)
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return config
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/** Auto-detect the right extractor for a URL */
|
|
149
|
+
|
|
150
|
+
function resolveExtractor(url: string): {
|
|
151
|
+
new (opts: any): any
|
|
152
|
+
readonly pattern: RegExp
|
|
153
|
+
} {
|
|
154
|
+
for (const Cls of [
|
|
155
|
+
InstagramPostExtractor,
|
|
156
|
+
InstagramStoriesExtractor,
|
|
157
|
+
InstagramHighlightsExtractor,
|
|
158
|
+
InstagramTagExtractor,
|
|
159
|
+
InstagramSavedExtractor,
|
|
160
|
+
InstagramUserExtractor,
|
|
161
|
+
]) {
|
|
162
|
+
if (Cls.pattern.test(url)) {
|
|
163
|
+
return Cls
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
throw new Error(
|
|
168
|
+
`No extractor matched URL: ${url}. `
|
|
169
|
+
+ 'Supported: /p/, /reel/, /{user}/, /stories/, /highlights/, /explore/tags/, /saved/',
|
|
170
|
+
)
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/** Run an extractor */
|
|
174
|
+
|
|
175
|
+
async function runExtractor(
|
|
176
|
+
url: string,
|
|
177
|
+
extrClass: {
|
|
178
|
+
new (opts: any): any
|
|
179
|
+
readonly pattern: RegExp
|
|
180
|
+
},
|
|
181
|
+
opts: GlobalOptions,
|
|
182
|
+
): Promise<void> {
|
|
183
|
+
const config = buildConfig(opts)
|
|
184
|
+
const log = createLogger(opts.verbose ?? false)
|
|
185
|
+
let http: ReturnType<typeof createHttpClient>
|
|
186
|
+
let webCsrf: string | undefined
|
|
187
|
+
|
|
188
|
+
if (opts.cookies) {
|
|
189
|
+
// Full cookie string — highest priority
|
|
190
|
+
http = createHttpClient(undefined, opts.cookies, log)
|
|
191
|
+
webCsrf = extractCsrfFromCookies(opts.cookies)
|
|
192
|
+
}
|
|
193
|
+
else if (opts.sessionid) {
|
|
194
|
+
http = createHttpClient(opts.sessionid, undefined, log)
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
const wc = await createWebClient(log)
|
|
198
|
+
http = wc.http
|
|
199
|
+
webCsrf = wc.csrfToken
|
|
200
|
+
}
|
|
201
|
+
const storage = createStorage()
|
|
202
|
+
|
|
203
|
+
const match = extrClass.pattern.exec(url)
|
|
204
|
+
if (!match) {
|
|
205
|
+
console.error(`URL did not match expected pattern: ${url}`)
|
|
206
|
+
process.exit(1)
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// eslint-disable-next-line new-cap
|
|
210
|
+
const extractor = new extrClass({
|
|
211
|
+
url,
|
|
212
|
+
match,
|
|
213
|
+
config,
|
|
214
|
+
http,
|
|
215
|
+
storage,
|
|
216
|
+
log,
|
|
217
|
+
sessionId: opts.sessionid,
|
|
218
|
+
csrfToken: webCsrf,
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
if (opts.info) {
|
|
222
|
+
const job = new PrintJob(
|
|
223
|
+
extractor as import('../core/extractor').Extractor,
|
|
224
|
+
)
|
|
225
|
+
const start = Date.now()
|
|
226
|
+
try {
|
|
227
|
+
const status = await job.run()
|
|
228
|
+
const elapsed = ((Date.now() - start) / 1000).toFixed(1)
|
|
229
|
+
if (status !== 0) {
|
|
230
|
+
log.warn(`Finished with status ${status} in ${elapsed}s`)
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
catch (err) {
|
|
234
|
+
log.error(String(err))
|
|
235
|
+
process.exit(1)
|
|
236
|
+
}
|
|
237
|
+
return
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const job = new DownloadJob(
|
|
241
|
+
extractor as import('../core/extractor').Extractor,
|
|
242
|
+
)
|
|
243
|
+
job.basePath = opts.output ?? './data'
|
|
244
|
+
|
|
245
|
+
const start = Date.now()
|
|
246
|
+
try {
|
|
247
|
+
const status = await job.run()
|
|
248
|
+
const elapsed = ((Date.now() - start) / 1000).toFixed(1)
|
|
249
|
+
|
|
250
|
+
if (status === 0) {
|
|
251
|
+
log.info(`Done in ${elapsed}s`)
|
|
252
|
+
}
|
|
253
|
+
else {
|
|
254
|
+
log.warn(`Finished with status ${status} in ${elapsed}s`)
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
catch (err) {
|
|
258
|
+
log.error(String(err))
|
|
259
|
+
process.exit(1)
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/** Program */
|
|
264
|
+
|
|
265
|
+
const program = new Command()
|
|
266
|
+
|
|
267
|
+
program
|
|
268
|
+
.name('gdl-instagram')
|
|
269
|
+
.description(
|
|
270
|
+
'Download images and videos from Instagram.\n\n'
|
|
271
|
+
+ 'Uses gallery-dl\'s extraction pipeline — supports posts, reels,\n'
|
|
272
|
+
+ 'stories, highlights, tagged posts, saved collections, and more.\n\n'
|
|
273
|
+
+ 'Requires a sessionid cookie exported from your browser.\n'
|
|
274
|
+
+ 'Set via --sessionid or INSTAGRAM_SESSIONID environment variable.',
|
|
275
|
+
)
|
|
276
|
+
.version('0.1.0')
|
|
277
|
+
|
|
278
|
+
/** Default command — auto-detect extractor from URL */
|
|
279
|
+
|
|
280
|
+
const dlCmd = program
|
|
281
|
+
.command('dl', { isDefault: true })
|
|
282
|
+
.argument('[url]', 'Instagram URL to download (auto-detects type)')
|
|
283
|
+
.description(
|
|
284
|
+
'Download media from an Instagram URL (auto-detects post/user/stories/…)\n\n'
|
|
285
|
+
+ 'Examples:\n'
|
|
286
|
+
+ ' gdl-instagram https://www.instagram.com/p/CxAbCdEfGh/\n'
|
|
287
|
+
+ ' gdl-instagram https://www.instagram.com/username/ --include=posts,reels\n'
|
|
288
|
+
+ ' gdl-instagram https://www.instagram.com/stories/username/',
|
|
289
|
+
)
|
|
290
|
+
.action(async (url: string | undefined, opts: GlobalOptions) => {
|
|
291
|
+
if (!url) {
|
|
292
|
+
program.help()
|
|
293
|
+
return
|
|
294
|
+
}
|
|
295
|
+
const ExtrClass = resolveExtractor(url)
|
|
296
|
+
await runExtractor(url, ExtrClass, opts)
|
|
297
|
+
})
|
|
298
|
+
addSharedOptions(dlCmd)
|
|
299
|
+
|
|
300
|
+
/** tag subcommand */
|
|
301
|
+
|
|
302
|
+
const tag = program
|
|
303
|
+
.command('tag <hashtag>')
|
|
304
|
+
.description(
|
|
305
|
+
'Download posts from an Instagram hashtag\n\n'
|
|
306
|
+
+ 'Examples:\n'
|
|
307
|
+
+ ' gdl-instagram tag cats\n'
|
|
308
|
+
+ ' gdl-instagram tag https://www.instagram.com/explore/tags/cats/',
|
|
309
|
+
)
|
|
310
|
+
.action(async (hashtag: string, opts: GlobalOptions) => {
|
|
311
|
+
// Accept both raw tags and full URLs
|
|
312
|
+
const url = hashtag.startsWith('http')
|
|
313
|
+
? hashtag
|
|
314
|
+
: `https://www.instagram.com/explore/tags/${hashtag}/`
|
|
315
|
+
await runExtractor(url, InstagramTagExtractor, opts)
|
|
316
|
+
})
|
|
317
|
+
addSharedOptions(tag)
|
|
318
|
+
|
|
319
|
+
/** saved subcommand */
|
|
320
|
+
|
|
321
|
+
const saved = program
|
|
322
|
+
.command('saved')
|
|
323
|
+
.description(
|
|
324
|
+
'Download your saved (bookmarked) posts\n\n'
|
|
325
|
+
+ 'Requires authentication via --sessionid.\n\n'
|
|
326
|
+
+ 'Examples:\n'
|
|
327
|
+
+ ' gdl-instagram saved --sessionid=abc123',
|
|
328
|
+
)
|
|
329
|
+
.action(async (opts: GlobalOptions) => {
|
|
330
|
+
const url = 'https://www.instagram.com/me/saved/'
|
|
331
|
+
await runExtractor(url, InstagramSavedExtractor, opts)
|
|
332
|
+
})
|
|
333
|
+
addSharedOptions(saved)
|
|
334
|
+
|
|
335
|
+
/** parse */
|
|
336
|
+
|
|
337
|
+
program.parse()
|