@chilfish/gallery-dl-instagram 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,521 @@
1
+ /**
2
+ * Instagram sub-extractors — one class per resource type.
3
+ */
4
+
5
+ import type { ExtractorClass, MessageIter, Metadata } from '../types'
6
+ import type { InstagramExtractorOptions } from './base'
7
+ import type { InstagramPost, InstagramUser } from './types'
8
+ import { directory, queue } from '../message'
9
+ import { shortcodeFromId } from '../utils/id-codec'
10
+ import { ensureHttpScheme } from '../utils/text'
11
+ import { InstagramExtractor } from './base'
12
+
13
+ const BASE_RE = /^(?:https?:\/\/)?(?:www\.)?instagram\.com/
14
+
15
+ function re(base: RegExp, path: RegExp | string): RegExp {
16
+ const pathSrc = typeof path === 'string' ? path : path.source
17
+ return new RegExp(base.source + pathSrc, 'i')
18
+ }
19
+
20
+ export class InstagramPostExtractor extends InstagramExtractor {
21
+ static readonly subcategory = 'post'
22
+ static override pattern = re(
23
+ /^(?:https?:\/\/)?(?:www\.)?instagram\.com\//,
24
+ /(?:share(?:\/(?:p|tv|reels?))?|(?:[^/?#]+\/)?(?:p|tv|reels?))\/([^/?#]+)/,
25
+ )
26
+
27
+ override readonly subcategory = InstagramPostExtractor.subcategory
28
+
29
+ constructor(opts: InstagramExtractorOptions) {
30
+ super(opts)
31
+ if (opts.match[2] != null || opts.match[3] != null) {
32
+ (this as { subcategory: string }).subcategory = 'reel'
33
+ }
34
+ }
35
+
36
+ static fromURL(
37
+ url: string,
38
+ opts: InstagramExtractorOptions,
39
+ ): InstagramPostExtractor | null {
40
+ const match = InstagramPostExtractor.pattern.exec(url)
41
+ if (!match)
42
+ return null
43
+ return new InstagramPostExtractor({ ...opts, url, match })
44
+ }
45
+
46
+ override async* posts(): AsyncGenerator<InstagramPost> {
47
+ const groups = this.groups as string[]
48
+ let shortcode: string | undefined = groups[0]
49
+ if (!shortcode)
50
+ return
51
+
52
+ if (groups[1] === '') {
53
+ // Share link — follow redirect
54
+ this.log.info(`Resolving share link: ${this.url}`)
55
+ const resp = await this.request(ensureHttpScheme(this.url), {
56
+ headers: {
57
+ 'Sec-Fetch-Dest': 'empty',
58
+ 'Sec-Fetch-Mode': 'navigate',
59
+ 'Sec-Fetch-Site': 'same-origin',
60
+ },
61
+ })
62
+ const parts = (resp as { url?: string }).url?.split('/')
63
+ shortcode = parts?.[parts.length - 2] ?? shortcode
64
+ }
65
+
66
+ this.log.debug(`Fetching post: ${shortcode}`)
67
+ yield* this.api.media(shortcode)
68
+ }
69
+ }
70
+
71
+ export class InstagramUserExtractor extends InstagramExtractor {
72
+ static readonly subcategory = 'user'
73
+ static override pattern = re(BASE_RE, /(\/[^/?#]+)\/?(?:$|[?#])/)
74
+
75
+ override readonly subcategory = InstagramUserExtractor.subcategory
76
+
77
+ constructor(opts: InstagramExtractorOptions) {
78
+ super(opts)
79
+ }
80
+
81
+ static fromURL(
82
+ url: string,
83
+ opts: InstagramExtractorOptions,
84
+ ): InstagramUserExtractor | null {
85
+ const match = InstagramUserExtractor.pattern.exec(url)
86
+ if (!match)
87
+ return null
88
+ return new InstagramUserExtractor({ ...opts, url, match })
89
+ }
90
+
91
+ override async* items(): MessageIter {
92
+ await this.login()
93
+
94
+ const userPath = this.groups[0] ?? '/'
95
+ const base = `${this.root}${userPath}/`
96
+ const storiesUrl = `${this.root}/stories/${userPath.slice(1)}/`
97
+
98
+ const include = this._cfg('include', ['posts']) as string[] | string
99
+ const categories: string[]
100
+ = include === 'all'
101
+ ? [
102
+ 'posts',
103
+ 'reels',
104
+ 'tagged',
105
+ 'stories',
106
+ 'highlights',
107
+ 'info',
108
+ 'avatar',
109
+ ]
110
+ : typeof include === 'string'
111
+ ? include.replace(/\s+/g, '').split(',')
112
+ : include
113
+
114
+ const extractors: Record<
115
+ string,
116
+ { cls: ExtractorClass, url: string }
117
+ > = {
118
+ info: {
119
+ cls: InstagramInfoExtractor as unknown as ExtractorClass,
120
+ url: `${base}info/`,
121
+ },
122
+ avatar: {
123
+ cls: InstagramAvatarExtractor as unknown as ExtractorClass,
124
+ url: `${base}avatar/`,
125
+ },
126
+ stories: {
127
+ cls: InstagramStoriesExtractor as unknown as ExtractorClass,
128
+ url: storiesUrl,
129
+ },
130
+ highlights: {
131
+ cls: InstagramHighlightsExtractor as unknown as ExtractorClass,
132
+ url: `${base}highlights/`,
133
+ },
134
+ posts: {
135
+ cls: InstagramPostsExtractor as unknown as ExtractorClass,
136
+ url: `${base}posts/`,
137
+ },
138
+ reels: {
139
+ cls: InstagramReelsExtractor as unknown as ExtractorClass,
140
+ url: `${base}reels/`,
141
+ },
142
+ tagged: {
143
+ cls: InstagramTaggedExtractor as unknown as ExtractorClass,
144
+ url: `${base}tagged/`,
145
+ },
146
+ }
147
+
148
+ for (const cat of categories) {
149
+ const entry = extractors[cat]
150
+ if (entry) {
151
+ yield queue(entry.url, { _extractor: entry.cls })
152
+ }
153
+ else {
154
+ this.log.warn(`Invalid include '${cat}'`)
155
+ }
156
+ }
157
+ }
158
+
159
+ override async* posts(): AsyncGenerator<InstagramPost> {}
160
+ }
161
+
162
+ export class InstagramPostsExtractor extends InstagramExtractor {
163
+ static readonly subcategory = 'posts'
164
+ static override pattern = re(BASE_RE, /(\/[^/?#]+)\/posts/)
165
+
166
+ override readonly subcategory = InstagramPostsExtractor.subcategory
167
+
168
+ constructor(opts: InstagramExtractorOptions) {
169
+ super(opts)
170
+ }
171
+
172
+ static fromURL(
173
+ url: string,
174
+ opts: InstagramExtractorOptions,
175
+ ): InstagramPostsExtractor | null {
176
+ const match = InstagramPostsExtractor.pattern.exec(url)
177
+ if (!match)
178
+ return null
179
+ return new InstagramPostsExtractor({ ...opts, url, match })
180
+ }
181
+
182
+ override async* posts(): AsyncGenerator<InstagramPost> {
183
+ const screenName = (this.groups[0] ?? '').replace(/^\//, '')
184
+ const uid = await this.api.userId(screenName)
185
+ yield* this.api.userFeed(uid)
186
+ }
187
+ }
188
+
189
+ export class InstagramReelsExtractor extends InstagramExtractor {
190
+ static readonly subcategory = 'reels'
191
+ static override pattern = re(BASE_RE, /(\/[^/?#]+)\/reels/)
192
+
193
+ override readonly subcategory = InstagramReelsExtractor.subcategory
194
+
195
+ constructor(opts: InstagramExtractorOptions) {
196
+ super(opts)
197
+ }
198
+
199
+ static fromURL(
200
+ url: string,
201
+ opts: InstagramExtractorOptions,
202
+ ): InstagramReelsExtractor | null {
203
+ const match = InstagramReelsExtractor.pattern.exec(url)
204
+ if (!match)
205
+ return null
206
+ return new InstagramReelsExtractor({ ...opts, url, match })
207
+ }
208
+
209
+ override async* posts(): AsyncGenerator<InstagramPost> {
210
+ const screenName = (this.groups[0] ?? '').replace(/^\//, '')
211
+ const uid = await this.api.userId(screenName)
212
+ yield* this.api.userClips(uid)
213
+ }
214
+ }
215
+
216
+ export class InstagramTaggedExtractor extends InstagramExtractor {
217
+ static readonly subcategory = 'tagged'
218
+ static override pattern = re(BASE_RE, /(\/[^/?#]+)\/tagged/)
219
+
220
+ override readonly subcategory = InstagramTaggedExtractor.subcategory
221
+ private _taggedUserId = ''
222
+
223
+ constructor(opts: InstagramExtractorOptions) {
224
+ super(opts)
225
+ }
226
+
227
+ static fromURL(
228
+ url: string,
229
+ opts: InstagramExtractorOptions,
230
+ ): InstagramTaggedExtractor | null {
231
+ const match = InstagramTaggedExtractor.pattern.exec(url)
232
+ if (!match)
233
+ return null
234
+ return new InstagramTaggedExtractor({ ...opts, url, match })
235
+ }
236
+
237
+ override async metadata(): Promise<Record<string, unknown>> {
238
+ const screenName = (this.groups[0] ?? '').replace(/^\//, '')
239
+ let user: InstagramUser
240
+
241
+ if (screenName.startsWith('id:')) {
242
+ this._taggedUserId = screenName.slice(3)
243
+ user = await this.api.userById(screenName.slice(3))
244
+ }
245
+ else {
246
+ this._taggedUserId = await this.api.userId(screenName)
247
+ user = await this.api.userByScreenName(screenName)
248
+ }
249
+
250
+ return {
251
+ tagged_owner_id: user.id ?? user.pk,
252
+ tagged_username: user.username,
253
+ tagged_full_name: user.full_name,
254
+ }
255
+ }
256
+
257
+ override async* posts(): AsyncGenerator<InstagramPost> {
258
+ if (!this._taggedUserId)
259
+ await this.metadata()
260
+ yield* this.api.userTagged(this._taggedUserId)
261
+ }
262
+ }
263
+
264
+ export class InstagramStoriesExtractor extends InstagramExtractor {
265
+ static readonly subcategory = 'stories'
266
+ static override pattern
267
+ = /^(?:https?:\/\/)?(?:www\.)?instagram\.com\/(?:stories\/(?:highlights\/(\d+)|([^/?#]+)(?:\/(\d+))?)|\/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)/
268
+
269
+ override readonly subcategory = InstagramStoriesExtractor.subcategory
270
+ private highlightId: string | null = null
271
+ private mediaId: string | null = null
272
+
273
+ constructor(opts: InstagramExtractorOptions) {
274
+ super(opts)
275
+ const groups = this.groups as string[]
276
+ const h1 = groups[0]
277
+ const user = groups[1]
278
+ const m1 = groups[2]
279
+ const h2 = groups[3]
280
+ const m2 = groups[4]
281
+
282
+ if (user) {
283
+ (this as { subcategory: string }).subcategory = 'stories'
284
+ this.highlightId = null
285
+ }
286
+ else {
287
+ (this as { subcategory: string }).subcategory = 'highlights'
288
+ this.highlightId = h1
289
+ ? `highlight:${h1}`
290
+ : `highlight:${Buffer.from(h2 ?? '', 'base64').toString('utf-8')}`
291
+ }
292
+
293
+ this.mediaId = m1 ?? m2 ?? null
294
+ }
295
+
296
+ static fromURL(
297
+ url: string,
298
+ opts: InstagramExtractorOptions,
299
+ ): InstagramStoriesExtractor | null {
300
+ const match = InstagramStoriesExtractor.pattern.exec(url)
301
+ if (!match)
302
+ return null
303
+ return new InstagramStoriesExtractor({ ...opts, url, match })
304
+ }
305
+
306
+ override async* posts(): AsyncGenerator<InstagramPost> {
307
+ const reelId = this.highlightId
308
+ ? this.highlightId
309
+ : await this.api.userId((this.groups[1] ?? '').toString())
310
+
311
+ const reels = await this.api.reelsMedia([reelId])
312
+ if (!reels.length)
313
+ return
314
+
315
+ if (this.mediaId) {
316
+ const reel = reels[0]!
317
+ for (const item of reel.items ?? []) {
318
+ if (item.pk === this.mediaId) {
319
+ reel.items = [item]
320
+ break
321
+ }
322
+ }
323
+ yield reel
324
+ return
325
+ }
326
+
327
+ const split = this._cfg('split', false) as boolean
328
+ if (split) {
329
+ const reel = reels[0]!
330
+ for (const item of reel.items ?? []) {
331
+ const copy = { ...reel }
332
+ copy.items = [item]
333
+ yield copy
334
+ }
335
+ }
336
+ else {
337
+ yield* reels
338
+ }
339
+ }
340
+ }
341
+
342
+ export class InstagramHighlightsExtractor extends InstagramExtractor {
343
+ static readonly subcategory = 'highlights'
344
+ static override pattern = re(BASE_RE, /(\/[^/?#]+)\/highlights/)
345
+
346
+ override readonly subcategory = InstagramHighlightsExtractor.subcategory
347
+
348
+ constructor(opts: InstagramExtractorOptions) {
349
+ super(opts)
350
+ }
351
+
352
+ static fromURL(
353
+ url: string,
354
+ opts: InstagramExtractorOptions,
355
+ ): InstagramHighlightsExtractor | null {
356
+ const match = InstagramHighlightsExtractor.pattern.exec(url)
357
+ if (!match)
358
+ return null
359
+ return new InstagramHighlightsExtractor({ ...opts, url, match })
360
+ }
361
+
362
+ override async* posts(): AsyncGenerator<InstagramPost> {
363
+ const screenName = (this.groups[0] ?? '').replace(/^\//, '')
364
+ const uid = await this.api.userId(screenName)
365
+ yield* this.api.highlightsMedia(uid)
366
+ }
367
+ }
368
+
369
+ export class InstagramTagExtractor extends InstagramExtractor {
370
+ static readonly subcategory = 'tag'
371
+ static override pattern = re(BASE_RE, /\/explore\/tags\/([^/?#]+)/)
372
+
373
+ override readonly subcategory = InstagramTagExtractor.subcategory
374
+
375
+ constructor(opts: InstagramExtractorOptions) {
376
+ super(opts)
377
+ }
378
+
379
+ static fromURL(
380
+ url: string,
381
+ opts: InstagramExtractorOptions,
382
+ ): InstagramTagExtractor | null {
383
+ const match = InstagramTagExtractor.pattern.exec(url)
384
+ if (!match)
385
+ return null
386
+ return new InstagramTagExtractor({ ...opts, url, match })
387
+ }
388
+
389
+ override async metadata(): Promise<Record<string, unknown>> {
390
+ const tag = this.groups[0] ?? ''
391
+ return { tag: decodeURIComponent(tag) }
392
+ }
393
+
394
+ override async* posts(): AsyncGenerator<InstagramPost> {
395
+ const tag = this.groups[0] ?? ''
396
+ yield* this.api.tagsMedia(decodeURIComponent(tag))
397
+ }
398
+ }
399
+
400
+ export class InstagramInfoExtractor extends InstagramExtractor {
401
+ static readonly subcategory = 'info'
402
+ static override pattern = re(BASE_RE, /(\/[^/?#]+)\/info/)
403
+
404
+ override readonly subcategory = InstagramInfoExtractor.subcategory
405
+
406
+ constructor(opts: InstagramExtractorOptions) {
407
+ super(opts)
408
+ }
409
+
410
+ static fromURL(
411
+ url: string,
412
+ opts: InstagramExtractorOptions,
413
+ ): InstagramInfoExtractor | null {
414
+ const match = InstagramInfoExtractor.pattern.exec(url)
415
+ if (!match)
416
+ return null
417
+ return new InstagramInfoExtractor({ ...opts, url, match })
418
+ }
419
+
420
+ override async* items(): MessageIter {
421
+ const screenName = (this.groups[0] ?? '').replace(/^\//, '')
422
+ let user: InstagramUser
423
+
424
+ if (screenName.startsWith('id:')) {
425
+ user = await this.api.userById(screenName.slice(3))
426
+ }
427
+ else {
428
+ user = await this.api.userByScreenName(screenName)
429
+ }
430
+
431
+ yield directory(user as unknown as Metadata)
432
+ }
433
+
434
+ override async* posts(): AsyncGenerator<InstagramPost> {}
435
+ }
436
+
437
+ export class InstagramAvatarExtractor extends InstagramExtractor {
438
+ static readonly subcategory = 'avatar'
439
+ static override pattern = re(BASE_RE, /(\/[^/?#]+)\/avatar/)
440
+
441
+ override readonly subcategory = InstagramAvatarExtractor.subcategory
442
+
443
+ constructor(opts: InstagramExtractorOptions) {
444
+ super(opts)
445
+ }
446
+
447
+ static fromURL(
448
+ url: string,
449
+ opts: InstagramExtractorOptions,
450
+ ): InstagramAvatarExtractor | null {
451
+ const match = InstagramAvatarExtractor.pattern.exec(url)
452
+ if (!match)
453
+ return null
454
+ return new InstagramAvatarExtractor({ ...opts, url, match })
455
+ }
456
+
457
+ override async* posts(): AsyncGenerator<InstagramPost> {
458
+ const screenName = (this.groups[0] ?? '').replace(/^\//, '')
459
+ let user: InstagramUser
460
+
461
+ if (screenName.startsWith('id:')) {
462
+ user = await this.api.userById(screenName.slice(3))
463
+ }
464
+ else {
465
+ user = await this.api.userByScreenName(screenName)
466
+ }
467
+
468
+ const avatar
469
+ = user.hd_profile_pic_url_info
470
+ ?? user.hd_profile_pic_versions?.[user.hd_profile_pic_versions.length - 1]
471
+ ?? { url: user.profile_pic_url ?? '', width: 0, height: 0 }
472
+
473
+ let pk = user.profile_pic_id?.split('_')[0]
474
+ let code: string
475
+
476
+ if (pk) {
477
+ code = shortcodeFromId(pk)
478
+ }
479
+ else {
480
+ pk = `avatar:${user.pk}`
481
+ code = pk
482
+ }
483
+
484
+ yield {
485
+ pk,
486
+ code,
487
+ user,
488
+ caption: null,
489
+ like_count: 0,
490
+ image_versions2: { candidates: [avatar] },
491
+ } as unknown as InstagramPost
492
+ }
493
+ }
494
+
495
+ export class InstagramSavedExtractor extends InstagramExtractor {
496
+ static readonly subcategory = 'saved'
497
+ static override pattern = re(
498
+ BASE_RE,
499
+ /(\/[^/?#]+)\/saved(?:\/all-posts)?\/?$/,
500
+ )
501
+
502
+ override readonly subcategory = InstagramSavedExtractor.subcategory
503
+
504
+ constructor(opts: InstagramExtractorOptions) {
505
+ super(opts)
506
+ }
507
+
508
+ static fromURL(
509
+ url: string,
510
+ opts: InstagramExtractorOptions,
511
+ ): InstagramSavedExtractor | null {
512
+ const match = InstagramSavedExtractor.pattern.exec(url)
513
+ if (!match)
514
+ return null
515
+ return new InstagramSavedExtractor({ ...opts, url, match })
516
+ }
517
+
518
+ override async* posts(): AsyncGenerator<InstagramPost> {
519
+ yield* this.api.userSaved()
520
+ }
521
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Instagram module — public API surface.
3
+ */
4
+
5
+ // API client
6
+ export { InstagramRestAPI } from './api'
7
+
8
+ // Base extractor
9
+ export { InstagramExtractor } from './base'
10
+ export type { InstagramExtractorOptions } from './base'
11
+
12
+ // Extractors
13
+ export {
14
+ InstagramAvatarExtractor,
15
+ InstagramHighlightsExtractor,
16
+ InstagramInfoExtractor,
17
+ InstagramPostExtractor,
18
+ InstagramPostsExtractor,
19
+ InstagramReelsExtractor,
20
+ InstagramSavedExtractor,
21
+ InstagramStoriesExtractor,
22
+ InstagramTagExtractor,
23
+ InstagramTaggedExtractor,
24
+ InstagramUserExtractor,
25
+ } from './extractors'
26
+
27
+ // Parsers
28
+ export { extractAudio, extractTaggedUsers, parsePostGraphql, parsePostRest } from './parsers'
29
+
30
+ // Types
31
+ export type {
32
+ Coauthor,
33
+ ImageCandidate,
34
+ InstagramCarouselItem,
35
+ InstagramLocation,
36
+ InstagramPost,
37
+ InstagramUser,
38
+ ParsedMedia,
39
+ ParsedPost,
40
+ ParserConfig,
41
+ TaggedUser,
42
+ VideoVersion,
43
+ } from './types'