@chilfish/gallery-dl-instagram 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/index.ts DELETED
@@ -1,337 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * gdl-instagram — CLI entry point.
4
- *
5
- * Usage:
6
- * gdl-instagram <url> [options] ← auto-detect from URL
7
- * gdl-instagram tag <hashtag> [options]
8
- * gdl-instagram saved [options]
9
- *
10
- * Every option is self-documented via ``--help``.
11
- */
12
-
13
- import { Command } from 'commander'
14
- import { ConfigManager } from '../config'
15
- import { DownloadJob, PrintJob } from '../core/job'
16
- import {
17
- InstagramHighlightsExtractor,
18
- InstagramPostExtractor,
19
- InstagramSavedExtractor,
20
- InstagramStoriesExtractor,
21
- InstagramTagExtractor,
22
- InstagramUserExtractor,
23
- } from '../instagram/extractors'
24
- import { createHttpClient, createLogger, createStorage, createWebClient, extractCsrfFromCookies } from './adapter'
25
-
26
- /** Shared options — applied to all subcommands */
27
-
28
- interface GlobalOptions {
29
- sessionid?: string
30
- cookies?: string
31
- output?: string
32
- videos?: string
33
- previews?: string
34
- audio?: boolean
35
- maxPosts?: number
36
- cursor?: string
37
- orderPosts?: string
38
- orderFiles?: string
39
- staticVideos?: boolean
40
- api?: string
41
- verbose?: boolean
42
- include?: string
43
- split?: boolean
44
- info?: boolean
45
- }
46
-
47
- function addSharedOptions(cmd: Command): Command {
48
- return cmd
49
- .option(
50
- '--sessionid <cookie>',
51
- 'Instagram sessionid cookie value (from browser)',
52
- process.env.INSTAGRAM_SESSIONID,
53
- )
54
- .option(
55
- '--cookies <string>',
56
- 'Full Cookie header string from browser (DevTools → Network → Request Headers → Cookie)',
57
- process.env.INSTAGRAM_COOKIES,
58
- )
59
- .option('-o, --output <dir>', 'Output directory', './data')
60
- .option(
61
- '--videos <mode>',
62
- 'Download videos: true, false, or merged (yt-dlp)',
63
- 'true',
64
- )
65
- .option(
66
- '--previews <types>',
67
- 'Download only previews: video,audio (comma-separated)',
68
- )
69
- .option('--audio', 'Download standalone audio tracks', false)
70
- .option('--max-posts <n>', 'Maximum number of posts to download', Number.parseInt)
71
- .option(
72
- '--cursor <cursor>',
73
- 'Resume from pagination cursor (see output of previous run)',
74
- )
75
- .option(
76
- '--order-posts <order>',
77
- 'Post ordering: asc, desc, id, id_asc, id_desc',
78
- )
79
- .option(
80
- '--order-files <order>',
81
- 'File ordering: r, d (reverse), or empty for natural order',
82
- )
83
- .option(
84
- '--static-videos',
85
- 'Download static video cover images instead of actual videos',
86
- false,
87
- )
88
- .option(
89
- '--no-static-videos',
90
- 'Do not force static video covers (download real videos)',
91
- )
92
- .option('--api <backend>', 'API backend: rest (default) or graphql', 'rest')
93
- .option('-v, --verbose', 'Verbose debug output', false)
94
- .option(
95
- '--include <list>',
96
- 'For user: comma-separated sub-extractors (posts,reels,tagged,stories,highlights,info,avatar)',
97
- 'posts',
98
- )
99
- .option(
100
- '--split',
101
- 'For stories: split each frame into a separate post',
102
- false,
103
- )
104
- .option(
105
- '-i, --info',
106
- 'Print structured post info to terminal (no download)',
107
- false,
108
- )
109
- }
110
-
111
- /** Build config from parsed options */
112
-
113
- function buildConfig(opts: GlobalOptions): ConfigManager {
114
- const config = new ConfigManager()
115
-
116
- const ig: Record<string, unknown> = {}
117
-
118
- if (opts.videos)
119
- ig.videos = opts.videos
120
- if (opts.previews)
121
- ig.previews = opts.previews.split(',')
122
- if (opts.audio)
123
- ig.audio = true
124
- if (opts.maxPosts)
125
- ig['max-posts'] = opts.maxPosts
126
- if (opts.cursor)
127
- ig.cursor = opts.cursor
128
- if (opts.orderPosts)
129
- ig['order-posts'] = opts.orderPosts
130
- if (opts.orderFiles)
131
- ig['order-files'] = opts.orderFiles
132
- if (opts.staticVideos)
133
- ig['static-videos'] = true
134
- if (opts.api)
135
- ig.api = opts.api
136
- if (opts.include)
137
- ig.include = opts.include
138
- if (opts.split)
139
- ig.split = true
140
-
141
- if (Object.keys(ig).length > 0) {
142
- config.set('extractor.instagram', ig)
143
- }
144
-
145
- return config
146
- }
147
-
148
- /** Auto-detect the right extractor for a URL */
149
-
150
- function resolveExtractor(url: string): {
151
- new (opts: any): any
152
- readonly pattern: RegExp
153
- } {
154
- for (const Cls of [
155
- InstagramPostExtractor,
156
- InstagramStoriesExtractor,
157
- InstagramHighlightsExtractor,
158
- InstagramTagExtractor,
159
- InstagramSavedExtractor,
160
- InstagramUserExtractor,
161
- ]) {
162
- if (Cls.pattern.test(url)) {
163
- return Cls
164
- }
165
- }
166
-
167
- throw new Error(
168
- `No extractor matched URL: ${url}. `
169
- + 'Supported: /p/, /reel/, /{user}/, /stories/, /highlights/, /explore/tags/, /saved/',
170
- )
171
- }
172
-
173
- /** Run an extractor */
174
-
175
- async function runExtractor(
176
- url: string,
177
- extrClass: {
178
- new (opts: any): any
179
- readonly pattern: RegExp
180
- },
181
- opts: GlobalOptions,
182
- ): Promise<void> {
183
- const config = buildConfig(opts)
184
- const log = createLogger(opts.verbose ?? false)
185
- let http: ReturnType<typeof createHttpClient>
186
- let webCsrf: string | undefined
187
-
188
- if (opts.cookies) {
189
- // Full cookie string — highest priority
190
- http = createHttpClient(undefined, opts.cookies, log)
191
- webCsrf = extractCsrfFromCookies(opts.cookies)
192
- }
193
- else if (opts.sessionid) {
194
- http = createHttpClient(opts.sessionid, undefined, log)
195
- }
196
- else {
197
- const wc = await createWebClient(log)
198
- http = wc.http
199
- webCsrf = wc.csrfToken
200
- }
201
- const storage = createStorage()
202
-
203
- const match = extrClass.pattern.exec(url)
204
- if (!match) {
205
- console.error(`URL did not match expected pattern: ${url}`)
206
- process.exit(1)
207
- }
208
-
209
- // eslint-disable-next-line new-cap
210
- const extractor = new extrClass({
211
- url,
212
- match,
213
- config,
214
- http,
215
- storage,
216
- log,
217
- sessionId: opts.sessionid,
218
- csrfToken: webCsrf,
219
- })
220
-
221
- if (opts.info) {
222
- const job = new PrintJob(
223
- extractor as import('../core/extractor').Extractor,
224
- )
225
- const start = Date.now()
226
- try {
227
- const status = await job.run()
228
- const elapsed = ((Date.now() - start) / 1000).toFixed(1)
229
- if (status !== 0) {
230
- log.warn(`Finished with status ${status} in ${elapsed}s`)
231
- }
232
- }
233
- catch (err) {
234
- log.error(String(err))
235
- process.exit(1)
236
- }
237
- return
238
- }
239
-
240
- const job = new DownloadJob(
241
- extractor as import('../core/extractor').Extractor,
242
- )
243
- job.basePath = opts.output ?? './data'
244
-
245
- const start = Date.now()
246
- try {
247
- const status = await job.run()
248
- const elapsed = ((Date.now() - start) / 1000).toFixed(1)
249
-
250
- if (status === 0) {
251
- log.info(`Done in ${elapsed}s`)
252
- }
253
- else {
254
- log.warn(`Finished with status ${status} in ${elapsed}s`)
255
- }
256
- }
257
- catch (err) {
258
- log.error(String(err))
259
- process.exit(1)
260
- }
261
- }
262
-
263
- /** Program */
264
-
265
- const program = new Command()
266
-
267
- program
268
- .name('gdl-instagram')
269
- .description(
270
- 'Download images and videos from Instagram.\n\n'
271
- + 'Uses gallery-dl\'s extraction pipeline — supports posts, reels,\n'
272
- + 'stories, highlights, tagged posts, saved collections, and more.\n\n'
273
- + 'Requires a sessionid cookie exported from your browser.\n'
274
- + 'Set via --sessionid or INSTAGRAM_SESSIONID environment variable.',
275
- )
276
- .version('0.1.0')
277
-
278
- /** Default command — auto-detect extractor from URL */
279
-
280
- const dlCmd = program
281
- .command('dl', { isDefault: true })
282
- .argument('[url]', 'Instagram URL to download (auto-detects type)')
283
- .description(
284
- 'Download media from an Instagram URL (auto-detects post/user/stories/…)\n\n'
285
- + 'Examples:\n'
286
- + ' gdl-instagram https://www.instagram.com/p/CxAbCdEfGh/\n'
287
- + ' gdl-instagram https://www.instagram.com/username/ --include=posts,reels\n'
288
- + ' gdl-instagram https://www.instagram.com/stories/username/',
289
- )
290
- .action(async (url: string | undefined, opts: GlobalOptions) => {
291
- if (!url) {
292
- program.help()
293
- return
294
- }
295
- const ExtrClass = resolveExtractor(url)
296
- await runExtractor(url, ExtrClass, opts)
297
- })
298
- addSharedOptions(dlCmd)
299
-
300
- /** tag subcommand */
301
-
302
- const tag = program
303
- .command('tag <hashtag>')
304
- .description(
305
- 'Download posts from an Instagram hashtag\n\n'
306
- + 'Examples:\n'
307
- + ' gdl-instagram tag cats\n'
308
- + ' gdl-instagram tag https://www.instagram.com/explore/tags/cats/',
309
- )
310
- .action(async (hashtag: string, opts: GlobalOptions) => {
311
- // Accept both raw tags and full URLs
312
- const url = hashtag.startsWith('http')
313
- ? hashtag
314
- : `https://www.instagram.com/explore/tags/${hashtag}/`
315
- await runExtractor(url, InstagramTagExtractor, opts)
316
- })
317
- addSharedOptions(tag)
318
-
319
- /** saved subcommand */
320
-
321
- const saved = program
322
- .command('saved')
323
- .description(
324
- 'Download your saved (bookmarked) posts\n\n'
325
- + 'Requires authentication via --sessionid.\n\n'
326
- + 'Examples:\n'
327
- + ' gdl-instagram saved --sessionid=abc123',
328
- )
329
- .action(async (opts: GlobalOptions) => {
330
- const url = 'https://www.instagram.com/me/saved/'
331
- await runExtractor(url, InstagramSavedExtractor, opts)
332
- })
333
- addSharedOptions(saved)
334
-
335
- /** parse */
336
-
337
- program.parse()
package/config.ts DELETED
@@ -1,80 +0,0 @@
1
- /**
2
- * Simple nested config reader.
3
- *
4
- * Mirrors gallery-dl's ``config.interpolate``:
5
- *
6
- * cfgPath like ``['extractor', 'instagram', 'post']``
7
- * looks up: extractor.instagram.post.{key}, extractor.instagram.{key},
8
- * extractor.{key}
9
- */
10
-
11
- import type { Config, ConfigValue } from './types'
12
-
13
- export class ConfigManager {
14
- private readonly data: Config
15
-
16
- constructor(data: Config = {}) {
17
- this.data = data
18
- }
19
-
20
- /**
21
- * Read a value at a dot-path like ``'extractor.instagram.videos'``.
22
- * Returns ``undefined`` when the path doesn't exist.
23
- */
24
- get(path: string, defaultValue?: ConfigValue): ConfigValue | undefined {
25
- const keys = path.split('.')
26
- let node: unknown = this.data
27
- for (const key of keys) {
28
- if (node == null || typeof node !== 'object' || Array.isArray(node)) {
29
- return defaultValue
30
- }
31
- node = (node as Record<string, unknown>)[key]
32
- }
33
- if (node === undefined)
34
- return defaultValue
35
- return node as ConfigValue
36
- }
37
-
38
- /**
39
- * Interpolate a config key through a hierarchy of paths.
40
- */
41
- interpolate(
42
- cfgPath: readonly string[],
43
- key: string,
44
- defaultVal?: ConfigValue,
45
- ): ConfigValue | undefined {
46
- let node: unknown = this.data
47
-
48
- for (let i = 0; i < cfgPath.length; i++) {
49
- if (node != null && typeof node === 'object' && !Array.isArray(node)) {
50
- const v = (node as Record<string, unknown>)[key]
51
- if (v !== undefined)
52
- return v as ConfigValue
53
- }
54
- if (node == null || typeof node !== 'object' || Array.isArray(node)) {
55
- break
56
- }
57
- node = (node as Record<string, unknown>)[cfgPath[i]!]
58
- }
59
-
60
- return defaultVal
61
- }
62
-
63
- /**
64
- * Mutate the config at a given dot-path.
65
- */
66
- set(path: string, value: unknown): void {
67
- const keys = path.split('.')
68
- let node: Record<string, unknown> = this.data as Record<string, unknown>
69
- for (let i = 0; i < keys.length - 1; i++) {
70
- const key = keys[i]!
71
- let child = node[key]
72
- if (child == null || typeof child !== 'object' || Array.isArray(child)) {
73
- child = {}
74
- node[key] = child
75
- }
76
- node = child as Record<string, unknown>
77
- }
78
- node[keys[keys.length - 1]!] = value
79
- }
80
- }
package/core/extractor.ts DELETED
@@ -1,217 +0,0 @@
1
- /**
2
- * Base Extractor abstract class.
3
- *
4
- * Every extractor extends this. The class provides:
5
- * - URL pattern matching via ``fromURL``
6
- * - One-time initialization guarded by ``initialize()``
7
- * - Async-iteration entrypoint ``[Symbol.asyncIterator]`` → ``items()``
8
- * - Rate-limited HTTP requests
9
- * - Timestamp parsing
10
- */
11
-
12
- import type { ConfigManager } from '../config'
13
- import type {
14
- ConfigValue,
15
- HttpClient,
16
- HttpResponse,
17
- MessageIter,
18
- RequestConfig,
19
- Storage,
20
- } from '../types'
21
-
22
- export interface ExtractorOptions {
23
- url: string
24
- match: RegExpMatchArray
25
- config: ConfigManager
26
- http: HttpClient
27
- storage: Storage
28
- /** The logger interface — at minimum a debug/info/warn/error contract */
29
- log: Logger
30
- }
31
-
32
- export interface Logger {
33
- debug: (message: string, ...args: unknown[]) => void
34
- info: (message: string, ...args: unknown[]) => void
35
- warn: (message: string, ...args: unknown[]) => void
36
- error: (message: string, ...args: unknown[]) => void
37
- }
38
-
39
- /** A no-op logger */
40
- export const noopLogger: Logger = {
41
- debug: () => {},
42
- info: () => {},
43
- warn: () => {},
44
- error: () => {},
45
- }
46
-
47
- export abstract class Extractor {
48
- /** Human-readable category (e.g. ``'instagram'``) */
49
- abstract readonly category: string
50
-
51
- /** Sub-category (e.g. ``'post'``, ``'posts'``, ``'reels'``) */
52
- abstract readonly subcategory: string
53
-
54
- /** Root URL (e.g. ``'https://www.instagram.com'``) */
55
- abstract readonly root: string
56
-
57
- /** Regex pattern to match against URLs */
58
- static readonly pattern: RegExp = /^$/
59
-
60
- /** The input URL */
61
- readonly url: string
62
-
63
- /** Regex match groups from ``fromURL`` */
64
- readonly groups: readonly string[]
65
-
66
- readonly config: ConfigManager
67
- /** HTTP client — public so Job can access for downloads */
68
- readonly http: HttpClient
69
- /** Storage backend — public so Job can access for writes */
70
- readonly storage: Storage
71
- /** Logger instance — public so Job can access for reporting */
72
- readonly log: Logger
73
-
74
- /** Delay range in seconds — random between [min, max] before each request */
75
- protected requestInterval: [number, number] = [6, 12]
76
-
77
- private _initialized = false
78
-
79
- constructor(opts: ExtractorOptions) {
80
- this.url = opts.url
81
- this.groups = opts.match ? [...opts.match].slice(1) : []
82
- this.config = opts.config
83
- this.http = opts.http
84
- this.storage = opts.storage
85
- this.log = opts.log
86
- }
87
-
88
- /** Initialization */
89
-
90
- /**
91
- * One-time async setup (cookies, session, internal state).
92
- * Safe to call multiple times — after the first call it becomes a no-op.
93
- */
94
- async initialize(): Promise<void> {
95
- if (this._initialized)
96
- return
97
- await this._init()
98
- this._initialized = true
99
- // Replace with no-op so subclasses can call super.initialize() freely
100
- this.initialize = async () => {}
101
- }
102
-
103
- /**
104
- * Subclass hook for one-time setup.
105
- */
106
- protected async _init(): Promise<void> {
107
- // no-op by default
108
- }
109
-
110
- /** Async iteration */
111
-
112
- async* [Symbol.asyncIterator](): MessageIter {
113
- await this.initialize()
114
- yield* this.items()
115
- }
116
-
117
- /**
118
- * The main extraction pipeline. Subclasses *must* implement this.
119
- */
120
- abstract items(): MessageIter
121
-
122
- /** Config helpers */
123
-
124
- /**
125
- * Read a config value using the interpolated hierarchy.
126
- */
127
- protected _cfg(key: string, defaultVal?: ConfigValue): ConfigValue | undefined {
128
- const path: readonly string[] = [
129
- 'extractor',
130
- this.category,
131
- this.subcategory,
132
- ]
133
- return this.config.interpolate(path, key, defaultVal)
134
- }
135
-
136
- /** HTTP */
137
-
138
- private _lastRequestTime = 0
139
-
140
- /**
141
- * Rate-limited HTTP request wrapper.
142
- */
143
- async request(
144
- url: string,
145
- cfg: RequestConfig = {},
146
- ): Promise<HttpResponse<unknown>> {
147
- await this._throttle()
148
- const response = await this.http.request({ url, ...cfg })
149
- this._lastRequestTime = Date.now()
150
- return response
151
- }
152
-
153
- /**
154
- * Convenience: request + parse JSON body.
155
- */
156
- async requestJSON(
157
- url: string,
158
- cfg: RequestConfig = {},
159
- ): Promise<unknown> {
160
- const resp = await this.request(url, cfg)
161
- if (typeof resp.data === 'object')
162
- return resp.data
163
- try {
164
- return JSON.parse(resp.data as string)
165
- }
166
- catch {
167
- return {}
168
- }
169
- }
170
-
171
- /** Rate limiting */
172
-
173
- /**
174
- * Sleep long enough to keep the minimum interval between requests.
175
- */
176
- private async _throttle(): Promise<void> {
177
- const now = Date.now()
178
- const elapsed = now - this._lastRequestTime
179
- const [min, max] = this.requestInterval
180
- // Random delay in milliseconds
181
- const target = min + Math.random() * (max - min)
182
- const waitMs = Math.max(0, target * 1000 - elapsed)
183
- if (waitMs > 0) {
184
- await new Promise(r => setTimeout(r, waitMs))
185
- }
186
- }
187
-
188
- /** Utility */
189
-
190
- /**
191
- * Convert a Unix timestamp (seconds or ms) to an ISO-8601 string.
192
- */
193
- parseTimestamp(ts: number | null | undefined): string {
194
- if (ts == null)
195
- return ''
196
- // If ts looks like milliseconds (year > 2100 in seconds)
197
- const asMs = ts > 2_500_000_000 ? ts : ts * 1000
198
- return new Date(asMs).toISOString()
199
- }
200
-
201
- /**
202
- * Generate a random hex token (used for CSRF).
203
- */
204
- static generateToken(size = 16): string {
205
- const bytes = new Uint8Array(size)
206
- if (typeof crypto !== 'undefined' && crypto.getRandomValues) {
207
- crypto.getRandomValues(bytes)
208
- }
209
- else {
210
- // Fallback for Node without global crypto
211
- for (let i = 0; i < size; i++) {
212
- bytes[i] = Math.floor(Math.random() * 256)
213
- }
214
- }
215
- return Array.from(bytes, b => b.toString(16).padStart(2, '0')).join('')
216
- }
217
- }