@chilfish/gallery-dl-instagram 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,531 @@
1
+ /**
2
+ * Instagram REST API client.
3
+ *
4
+ * All endpoint methods return ``AsyncGenerator`` for paginated lists or
5
+ * plain values for single-item endpoints.
6
+ */
7
+
8
+ import type { HttpClient, HttpResponse } from '../types'
9
+ import type {
10
+ InstagramPost,
11
+ InstagramUser,
12
+ } from './types'
13
+ import { idFromShortcode } from '../utils/id-codec'
14
+
15
+ const APP_ID = '936619743392459'
16
+ const ASBD_ID = '129477'
17
+
18
+ export class InstagramRestAPI {
19
+ private readonly http: HttpClient
20
+ private readonly root: string
21
+ private readonly getCsrf: () => string
22
+ private readonly getWwwClaim: () => string
23
+ private readonly setWwwClaim: (v: string) => void
24
+ private readonly setCsrf: (v: string) => void
25
+
26
+ /** A ref to the extractor's cursor. */
27
+ private getCursor: () => string | null
28
+ private setCursor: (v: string | null) => string | null
29
+
30
+ constructor(opts: {
31
+ http: HttpClient
32
+ root: string
33
+ csrfToken: { value: string }
34
+ wwwClaim: { value: string }
35
+ cursor: { value: string | null }
36
+ }) {
37
+ this.http = opts.http
38
+ this.root = opts.root
39
+
40
+ this.getCsrf = () => opts.csrfToken.value
41
+ this.setCsrf = (v) => {
42
+ opts.csrfToken.value = v
43
+ }
44
+ this.getWwwClaim = () => opts.wwwClaim.value
45
+ this.setWwwClaim = (v) => {
46
+ opts.wwwClaim.value = v
47
+ }
48
+ this.getCursor = () => opts.cursor.value
49
+ this.setCursor = (v) => {
50
+ opts.cursor.value = v
51
+ return v
52
+ }
53
+ }
54
+
55
+ /** Public endpoint methods */
56
+
57
+ /** Single post by shortcode. */
58
+ async* media(shortcode: string): AsyncGenerator<InstagramPost> {
59
+ // Trim shortcode if it happens to be the full "long" form
60
+ const sc = shortcode.length > 28 ? shortcode.slice(0, -28) : shortcode
61
+ const numericId = idFromShortcode(sc)
62
+ const endpoint = `/v1/media/${numericId}/info/`
63
+ yield* this._pagination(endpoint)
64
+ }
65
+
66
+ /** Paginated user feed. */
67
+ userFeed(userId: string): AsyncGenerator<InstagramPost> {
68
+ return this._pagination(`/v1/feed/user/${userId}/`, { count: 30 })
69
+ }
70
+
71
+ /** Paginated user reels (POST endpoint). */
72
+ userClips(userId: string): AsyncGenerator<InstagramPost> {
73
+ const data: Record<string, unknown> = {
74
+ target_user_id: userId,
75
+ page_size: '50',
76
+ max_id: null,
77
+ include_feed_video: 'true',
78
+ }
79
+ return this._paginationPost('/v1/clips/user/', data)
80
+ }
81
+
82
+ /** Paginated tagged posts. */
83
+ userTagged(userId: string): AsyncGenerator<InstagramPost> {
84
+ return this._pagination(`/v1/usertags/${userId}/feed/`, { count: 20 })
85
+ }
86
+
87
+ /** Paginated saved posts (media wrapper). */
88
+ userSaved(): AsyncGenerator<InstagramPost> {
89
+ return this._pagination('/v1/feed/saved/posts/', { count: 50 }, true)
90
+ }
91
+
92
+ /** Paginated collection. */
93
+ userCollection(collectionId: string): AsyncGenerator<InstagramPost> {
94
+ return this._pagination(
95
+ `/v1/feed/collection/${collectionId}/posts/`,
96
+ { count: 50 },
97
+ true,
98
+ )
99
+ }
100
+
101
+ /** Reels media — batch call, returns full reel objects. */
102
+ async reelsMedia(reelIds: string[]): Promise<InstagramPost[]> {
103
+ const data = await this._call('/v1/feed/reels_media/', {
104
+ params: { reel_ids: reelIds },
105
+ })
106
+ if (data && typeof data === 'object') {
107
+ const reels = (data as Record<string, unknown>).reels_media
108
+ if (Array.isArray(reels))
109
+ return reels as InstagramPost[]
110
+ }
111
+ throw new Error('Auth required — authenticated cookies needed for reels')
112
+ }
113
+
114
+ /** Story tray. */
115
+ async reelsTray(): Promise<InstagramPost[]> {
116
+ const data = await this._call('/v1/feed/reels_tray/')
117
+ if (data && typeof data === 'object') {
118
+ const tray = (data as Record<string, unknown>).tray
119
+ if (Array.isArray(tray))
120
+ return tray as InstagramPost[]
121
+ }
122
+ return []
123
+ }
124
+
125
+ /** Highlights list (tray). */
126
+ async highlightsTray(userId: string): Promise<{ id: string }[]> {
127
+ const data = await this._call(
128
+ `/v1/highlights/${userId}/highlights_tray/`,
129
+ )
130
+ if (data && typeof data === 'object') {
131
+ return ((data as Record<string, unknown>).tray ?? []) as { id: string }[]
132
+ }
133
+ return []
134
+ }
135
+
136
+ /** All highlights' media batched by ``chunkSize``. */
137
+ async* highlightsMedia(
138
+ userId: string,
139
+ chunkSize = 5,
140
+ ): AsyncGenerator<InstagramPost> {
141
+ const tray = await this.highlightsTray(userId)
142
+ const ids = tray.map(hl => hl.id)
143
+
144
+ for (let i = 0; i < ids.length; i += chunkSize) {
145
+ const chunk = ids.slice(i, i + chunkSize)
146
+ const reels = await this.reelsMedia(chunk)
147
+ yield* reels
148
+ }
149
+ }
150
+
151
+ /** Hashtag posts (via sections). */
152
+ async* tagsMedia(tag: string): AsyncGenerator<InstagramPost> {
153
+ for await (const section of this.tagsSections(tag)) {
154
+ const layout = section.layout_content as Record<string, unknown> | undefined
155
+ const medias = (layout?.medias ?? []) as Array<Record<string, unknown>>
156
+ for (const m of medias) {
157
+ if (m.media)
158
+ yield m.media as InstagramPost
159
+ }
160
+ }
161
+ }
162
+
163
+ private async* tagsSections(
164
+ tag: string,
165
+ ): AsyncGenerator<Record<string, unknown>> {
166
+ const data: Record<string, unknown> = {
167
+ include_persistent: '0',
168
+ max_id: null,
169
+ page: null,
170
+ surface: 'grid',
171
+ tab: 'recent',
172
+ }
173
+ yield* this._paginationSections(`/v1/tags/${tag}/sections/`, data)
174
+ }
175
+
176
+ /** User by numeric ID. */
177
+ async userById(userId: string): Promise<InstagramUser> {
178
+ const data = await this._call(`/v1/users/${userId}/info/`)
179
+ if (data && typeof data === 'object') {
180
+ return (data as Record<string, unknown>).user as InstagramUser
181
+ }
182
+ throw new Error('User not found')
183
+ }
184
+
185
+ /** User by username (web_profile_info). */
186
+ async userByName(username: string): Promise<InstagramUser> {
187
+ const data = await this._call('/v1/users/web_profile_info/', {
188
+ params: { username },
189
+ })
190
+ if (data && typeof data === 'object') {
191
+ return (data as Record<string, unknown>).data as InstagramUser
192
+ }
193
+ throw new Error('User not found')
194
+ }
195
+
196
+ /** Search user by username. */
197
+ async userBySearch(username: string): Promise<InstagramUser> {
198
+ const url = 'https://www.instagram.com/web/search/topsearch/'
199
+ const data = await this._call(url, { params: { query: username } })
200
+ if (data && typeof data === 'object') {
201
+ const users = (data as Record<string, unknown>).users as Array<{ user: InstagramUser }> | undefined
202
+ if (users) {
203
+ const name = username.toLowerCase()
204
+ for (const result of users) {
205
+ if (result.user.username.toLowerCase() === name) {
206
+ return result.user
207
+ }
208
+ }
209
+ }
210
+ }
211
+ throw new Error('User not found')
212
+ }
213
+
214
+ /** Scrape user ID from HTML profile page. */
215
+ async userByWeb(username: string): Promise<{ id: string }> {
216
+ const resp = await this.http.request({
217
+ url: `https://www.instagram.com/${username}`,
218
+ headers: {
219
+ 'Accept':
220
+ 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
221
+ 'Accept-Language': 'en-US,en;q=0.5',
222
+ 'Accept-Encoding': 'gzip, deflate, br, zstd',
223
+ 'Alt-Used': 'www.instagram.com',
224
+ 'Connection': 'keep-alive',
225
+ 'Sec-Fetch-Dest': 'document',
226
+ 'Sec-Fetch-Mode': 'navigate',
227
+ 'Sec-Fetch-Site': 'none',
228
+ 'Priority': 'u=0, i',
229
+ },
230
+ })
231
+ const text = typeof resp.data === 'string' ? resp.data : ''
232
+ const idx = text.indexOf('"profile_id":"')
233
+ if (idx >= 0) {
234
+ const start = idx + 15
235
+ const end = text.indexOf('"', start)
236
+ if (end > start)
237
+ return { id: text.slice(start, end) }
238
+ }
239
+ throw new Error('User not found')
240
+ }
241
+
242
+ /** Resolve screen name via fallback chain: search → info → web. */
243
+ async userByScreenName(screenName: string): Promise<InstagramUser> {
244
+ for (const strategy of ['search', 'info', 'web']) {
245
+ try {
246
+ if (strategy === 'search')
247
+ return await this.userBySearch(screenName)
248
+ if (strategy === 'info')
249
+ return await this.userByName(screenName)
250
+ if (strategy === 'web') {
251
+ const result = await this.userByWeb(screenName)
252
+ return { pk: result.id, id: result.id, username: screenName, full_name: '' }
253
+ }
254
+ }
255
+ catch {
256
+ // try next strategy
257
+ }
258
+ }
259
+ throw new Error('User not found')
260
+ }
261
+
262
+ /** Resolve username/id to numeric user ID string. */
263
+ async userId(screenName: string, checkPrivate = true): Promise<string> {
264
+ if (screenName.startsWith('id:')) {
265
+ return screenName.slice(3)
266
+ }
267
+ const user = await this.userByScreenName(screenName)
268
+ if (checkPrivate && user.is_private && !user.followed_by_viewer) {
269
+ // warn, but proceed
270
+ }
271
+ return user.id ?? user.pk
272
+ }
273
+
274
+ /** Followers (paginated). */
275
+ async* userFollowers(
276
+ userId: string,
277
+ ): AsyncGenerator<InstagramUser> {
278
+ const params: Record<string, unknown> = { count: 12, max_id: null }
279
+ yield* this._paginationFollowing(
280
+ `/v1/friendships/${userId}/followers/`,
281
+ params,
282
+ )
283
+ }
284
+
285
+ /** Following (paginated). */
286
+ async* userFollowing(
287
+ userId: string,
288
+ ): AsyncGenerator<InstagramUser> {
289
+ const params: Record<string, unknown> = { count: 12, max_id: null }
290
+ yield* this._paginationFollowing(
291
+ `/v1/friendships/${userId}/following/`,
292
+ params,
293
+ )
294
+ }
295
+
296
+ /** Internal — HTTP call */
297
+
298
+ private async _call(
299
+ endpoint: string,
300
+ opts: {
301
+ params?: Record<string, unknown>
302
+ method?: string
303
+ data?: Record<string, unknown>
304
+ } = {},
305
+ ): Promise<unknown> {
306
+ const url = endpoint.startsWith('/')
307
+ ? `https://www.instagram.com/api${endpoint}`
308
+ : endpoint
309
+
310
+ const csrf = this.getCsrf()
311
+ const headers: Record<string, string> = {
312
+ 'Accept': '*/*',
313
+ 'Cookie': `csrftoken=${csrf}`,
314
+ 'X-CSRFToken': csrf,
315
+ 'X-IG-App-ID': APP_ID,
316
+ 'X-ASBD-ID': ASBD_ID,
317
+ 'X-IG-WWW-Claim': this.getWwwClaim(),
318
+ 'X-Requested-With': 'XMLHttpRequest',
319
+ 'Connection': 'keep-alive',
320
+ 'Referer': `${this.root}/`,
321
+ 'Sec-Fetch-Dest': 'empty',
322
+ 'Sec-Fetch-Mode': 'cors',
323
+ 'Sec-Fetch-Site': 'same-origin',
324
+ }
325
+
326
+ const resp: HttpResponse<unknown> = await this.http.request({
327
+ url,
328
+ method: opts.method ?? 'GET',
329
+ headers,
330
+ params: opts.params
331
+ ? Object.fromEntries(
332
+ Object.entries(opts.params).filter(
333
+ ([, v]) => v != null,
334
+ ) as [string, string][],
335
+ )
336
+ : undefined,
337
+ data: opts.data,
338
+ })
339
+
340
+ // Check for login / challenge redirects (same logic as InstagramExtractor.request)
341
+ const finalUrl = resp.url
342
+ if (finalUrl.includes('/accounts/login/')) {
343
+ throw new Error(
344
+ 'Instagram redirected to login page — you need a valid sessionid. '
345
+ + 'Export it from your browser (F12 → Application → Cookies → sessionid) '
346
+ + 'and pass --sessionid=<value> or set INSTAGRAM_SESSIONID env var.',
347
+ )
348
+ }
349
+ if (finalUrl.includes('/challenge/')) {
350
+ throw new Error(
351
+ 'Instagram redirected to challenge page — account flagged. '
352
+ + 'Log in via browser to resolve the challenge, then export a fresh sessionid.',
353
+ )
354
+ }
355
+
356
+ // Track CSRF updates
357
+ const rawCookie = resp.headers['set-cookie']
358
+ const cookieStr = Array.isArray(rawCookie) ? rawCookie.join('; ') : (rawCookie ?? '')
359
+ const csrfCookie = cookieStr
360
+ .split(';')
361
+ .find(c => c.trim().startsWith('csrftoken='))
362
+ if (csrfCookie) {
363
+ const val = csrfCookie.split('=')[1]?.trim()
364
+ if (val)
365
+ this.setCsrf(val)
366
+ }
367
+
368
+ // Track www-claim
369
+ const claim = resp.headers['x-ig-set-www-claim']
370
+ if (claim != null) {
371
+ this.setWwwClaim(String(claim))
372
+ }
373
+
374
+ return resp.data
375
+ }
376
+
377
+ /** Pagination engines */
378
+
379
+ private async* _pagination(
380
+ endpoint: string,
381
+ params: Record<string, unknown> = {},
382
+ media = false,
383
+ ): AsyncGenerator<InstagramPost> {
384
+ let maxId: string | null = this.getCursor()
385
+ const reqParams = { ...params }
386
+
387
+ while (true) {
388
+ reqParams.max_id = maxId
389
+ const data = (await this._call(endpoint, { params: reqParams })) as Record<string, unknown>
390
+
391
+ if (data) {
392
+ const items = data.items as Array<Record<string, unknown>> | undefined
393
+ if (items) {
394
+ for (const item of items) {
395
+ if (media) {
396
+ yield (item.media ?? item) as InstagramPost
397
+ }
398
+ else {
399
+ yield item as unknown as InstagramPost
400
+ }
401
+ }
402
+ }
403
+
404
+ if (!data.more_available) {
405
+ this.setCursor(null)
406
+ return
407
+ }
408
+ maxId = this.setCursor(data.next_max_id as string | null)
409
+ }
410
+ else {
411
+ this.setCursor(null)
412
+ return
413
+ }
414
+ }
415
+ }
416
+
417
+ private async* _paginationPost(
418
+ endpoint: string,
419
+ reqData: Record<string, unknown>,
420
+ ): AsyncGenerator<InstagramPost> {
421
+ let maxId: string | null = this.getCursor()
422
+ const data = { ...reqData }
423
+
424
+ while (true) {
425
+ data.max_id = maxId
426
+ const resp = (await this._call(endpoint, {
427
+ method: 'POST',
428
+ data,
429
+ })) as Record<string, unknown>
430
+
431
+ if (resp) {
432
+ const items = resp.items as Array<Record<string, unknown>> | undefined
433
+ if (items) {
434
+ for (const item of items) {
435
+ yield (item.media ?? item) as InstagramPost
436
+ }
437
+ }
438
+
439
+ const info = resp.paging_info as Record<string, unknown> | undefined
440
+ if (!info || !info.more_available) {
441
+ this.setCursor(null)
442
+ return
443
+ }
444
+ maxId = this.setCursor(info.max_id as string | null)
445
+ }
446
+ else {
447
+ this.setCursor(null)
448
+ return
449
+ }
450
+ }
451
+ }
452
+
453
+ private async* _paginationSections(
454
+ endpoint: string,
455
+ reqData: Record<string, unknown>,
456
+ ): AsyncGenerator<Record<string, unknown>> {
457
+ let maxId: string | null = this.getCursor()
458
+ let page: string | null = null
459
+ const data = { ...reqData }
460
+
461
+ while (true) {
462
+ data.max_id = maxId
463
+ data.page = page
464
+ const info = (await this._call(endpoint, {
465
+ method: 'POST',
466
+ data,
467
+ })) as Record<string, unknown>
468
+
469
+ if (info) {
470
+ const sections = info.sections as Record<string, unknown>[] | undefined
471
+ if (sections) {
472
+ yield* sections
473
+ }
474
+
475
+ if (!info.more_available) {
476
+ this.setCursor(null)
477
+ return
478
+ }
479
+ page = info.next_page as string | null
480
+ maxId = this.setCursor(info.next_max_id as string | null)
481
+ }
482
+ else {
483
+ this.setCursor(null)
484
+ return
485
+ }
486
+ }
487
+ }
488
+
489
+ private async* _paginationFollowing(
490
+ endpoint: string,
491
+ params: Record<string, unknown>,
492
+ ): AsyncGenerator<InstagramUser> {
493
+ let maxId: string | number | null = this._parseIntCursor(
494
+ this.getCursor() as string | null,
495
+ )
496
+ const reqParams = { ...params }
497
+
498
+ while (true) {
499
+ reqParams.max_id = maxId
500
+ const data = (await this._call(endpoint, {
501
+ params: reqParams as Record<string, unknown>,
502
+ })) as Record<string, unknown>
503
+
504
+ if (data) {
505
+ const users = data.users as InstagramUser[] | undefined
506
+ if (users) {
507
+ yield* users
508
+ }
509
+
510
+ const nextMaxId = data.next_max_id
511
+ if (nextMaxId == null) {
512
+ this.setCursor(null)
513
+ return
514
+ }
515
+ maxId = this._parseIntCursor(String(nextMaxId))
516
+ this.setCursor(String(maxId))
517
+ }
518
+ else {
519
+ this.setCursor(null)
520
+ return
521
+ }
522
+ }
523
+ }
524
+
525
+ private _parseIntCursor(v: string | null): number | null {
526
+ if (v == null || v === '')
527
+ return null
528
+ const n = Number(v)
529
+ return Number.isFinite(n) ? n : null
530
+ }
531
+ }