@chilfish/gallery-dl-instagram 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,583 @@
1
+ /**
2
+ * Instagram post parsers — normalize raw API responses into a uniform
3
+ * ``ParsedPost`` structure.
4
+ */
5
+
6
+ import type {
7
+ ImageCandidate,
8
+ InstagramCarouselItem,
9
+ InstagramPost,
10
+ InstagramUser,
11
+ MusicSticker,
12
+ ParsedMedia,
13
+ ParsedPost,
14
+ ParserConfig,
15
+ VideoVersion,
16
+ } from './types'
17
+ import { shortcodeFromId } from '../utils/id-codec'
18
+
19
+ /** Main entry — REST */
20
+
21
+ export function parsePostRest(
22
+ post: InstagramPost,
23
+ cfg: ParserConfig,
24
+ ): ParsedPost {
25
+ // ── Story or highlight (has top-level "items") ──
26
+ if (post.items) {
27
+ return parseStoryRest(post, cfg)
28
+ }
29
+
30
+ // ── Regular post / carousel ──
31
+ const owner = post.user
32
+ const caption = post.caption
33
+ const ts = post.taken_at ?? post.created_at
34
+ const date = cfg.parseTimestamp(ts ?? null)
35
+
36
+ const data: ParsedPost = {
37
+ post_id: post.pk,
38
+ post_shortcode: post.code,
39
+ post_url: `${cfg.root}/p/${post.code}/`,
40
+ likes: post.like_count ?? 0,
41
+ liked: post.has_liked ?? false,
42
+ pinned: extractPinned(post),
43
+ owner_id: owner.pk,
44
+ username: owner.username ?? '',
45
+ fullname: owner.full_name ?? '',
46
+ post_date: date,
47
+ date,
48
+ description: caption ? caption.text : '',
49
+ type: 'post',
50
+ count: 0,
51
+ _files: [],
52
+ }
53
+
54
+ // Description & tags
55
+ const tags = cfg.findTags(data.description)
56
+ if (tags.length > 0) {
57
+ data.tags = [...new Set(tags)].sort()
58
+ }
59
+
60
+ // Location
61
+ if (post.location) {
62
+ const loc = post.location
63
+ const slug = loc.short_name.replace(/\s+/g, '-').toLowerCase()
64
+ data.location_id = loc.pk
65
+ data.location_slug = slug
66
+ data.location_url = `${cfg.root}/explore/locations/${loc.pk}/${slug}/`
67
+ }
68
+
69
+ // Coauthors
70
+ if (post.coauthor_producers) {
71
+ data.coauthors = post.coauthor_producers.map(u => ({
72
+ id: u.pk,
73
+ username: u.username,
74
+ full_name: u.full_name,
75
+ }))
76
+ }
77
+
78
+ // Determine items (carousel vs single)
79
+ let items: readonly InstagramCarouselItem[]
80
+ if (post.carousel_media?.length) {
81
+ data.sidecar_media_id = data.post_id
82
+ data.sidecar_shortcode = data.post_shortcode
83
+ items = post.carousel_media
84
+ }
85
+ else {
86
+ items = [post as unknown as InstagramCarouselItem]
87
+ }
88
+
89
+ // Parse each media item
90
+ for (let num = 0; num < items.length; num++) {
91
+ const item = items[num]!
92
+ const media = parseMediaItem(item, post, cfg, num + 1)
93
+ if (!media)
94
+ continue
95
+
96
+ const itemRec = item as unknown as Record<string, unknown>
97
+ extractTaggedUsers(itemRec, media)
98
+ data._files.push(media)
99
+
100
+ // Story music stickers
101
+ const stickers = itemRec.story_music_stickers as MusicSticker[] | undefined
102
+ if (stickers?.[0]) {
103
+ const audio = extractAudio(itemRec, data as unknown as Record<string, unknown>, stickers[0], cfg)
104
+ if (audio) {
105
+ audio.num = num + 1
106
+ data._files.push(audio)
107
+ }
108
+ }
109
+ }
110
+
111
+ // Music metadata on the post itself
112
+ if (post.music_metadata) {
113
+ const info = post.music_metadata.music_info
114
+ if (info) {
115
+ const audio = extractAudio(
116
+ post as unknown as Record<string, unknown>,
117
+ data as unknown as Record<string, unknown>,
118
+ { music_asset_info: info },
119
+ cfg,
120
+ )
121
+ if (audio) {
122
+ audio.num = items.length
123
+ data._files.push(audio)
124
+ }
125
+ }
126
+ }
127
+
128
+ // Set type & post URL
129
+ const files = data._files
130
+ if (files.length === 1 && files[0]!.video_url) {
131
+ data.type = 'reel'
132
+ data.post_url = `${cfg.root}/reel/${post.code}/`
133
+ }
134
+
135
+ // Subscription
136
+ if (post.subscription_media_visibility) {
137
+ data.subscription = post.subscription_media_visibility
138
+ }
139
+
140
+ return data
141
+ }
142
+
143
+ /** Story / highlight */
144
+
145
+ function parseStoryRest(post: InstagramPost, cfg: ParserConfig): ParsedPost {
146
+ const items = post.items!
147
+ const reelId = String(post.id).split(':').pop() ?? '0'
148
+
149
+ const date = cfg.parseTimestamp(
150
+ (post.taken_at ?? post.created_at ?? post.seen) ?? null,
151
+ )
152
+
153
+ const expires = post.expiring_at
154
+ const isStory = !!expires
155
+
156
+ const data: ParsedPost = {
157
+ post_id: reelId,
158
+ post_shortcode: shortcodeFromId(reelId),
159
+ post_url: isStory
160
+ ? `${cfg.root}/stories/${post.user.username}/`
161
+ : `${cfg.root}/stories/highlights/${reelId}/`,
162
+ likes: 0,
163
+ liked: false,
164
+ pinned: [],
165
+ owner_id: post.user.pk,
166
+ username: post.user.username ?? '',
167
+ fullname: post.user.full_name ?? '',
168
+ post_date: date,
169
+ date,
170
+ description: '',
171
+ type: isStory ? 'story' : 'highlight',
172
+ count: 0,
173
+ _files: [],
174
+ expires: expires ? cfg.parseTimestamp(expires) : undefined,
175
+ user: post.user,
176
+ }
177
+
178
+ if (!isStory) {
179
+ if (post.title)
180
+ data.highlight_title = post.title
181
+ }
182
+ else if (!post.seen) {
183
+ post.seen = expires! - 86400
184
+ }
185
+
186
+ // Parse each frame
187
+ for (let num = 0; num < items.length; num++) {
188
+ const item = items[num]!
189
+ const media = parseMediaItem(item, post, cfg, num + 1)
190
+ if (!media)
191
+ continue
192
+
193
+ const itemRec = item as unknown as Record<string, unknown>
194
+ extractTaggedUsers(itemRec, media)
195
+ data._files.push(media)
196
+ }
197
+
198
+ return data
199
+ }
200
+
201
+ /** Single media item */
202
+
203
+ function parseMediaItem(
204
+ item: InstagramCarouselItem,
205
+ parent: InstagramPost,
206
+ cfg: ParserConfig,
207
+ num: number,
208
+ ): ParsedMedia | null {
209
+ // Image candidate
210
+ let image: ImageCandidate
211
+ try {
212
+ image = item.image_versions2.candidates[0]!
213
+ }
214
+ catch {
215
+ return null
216
+ }
217
+
218
+ const itemRec = item as unknown as Record<string, unknown>
219
+
220
+ // Static video handling
221
+ if (
222
+ !cfg.staticVideo
223
+ && item.original_media_type != null
224
+ && item.original_media_type === 1
225
+ && item.original_media_type !== item.media_type
226
+ ) {
227
+ delete itemRec.video_versions
228
+ if (image) {
229
+ item.original_width = image.width
230
+ item.original_height = image.height
231
+ }
232
+ }
233
+
234
+ const widthOrig = item.original_width ?? 0
235
+ const heightOrig = item.original_height ?? 0
236
+
237
+ // Video
238
+ let video: VideoVersion | null = null
239
+ let manifest: string | null = null
240
+ let width: number
241
+ let height: number
242
+
243
+ if (item.video_versions?.length) {
244
+ video = item.video_versions.reduce((best, v) =>
245
+ v.width * v.height * v.type > best.width * best.height * best.type
246
+ ? v
247
+ : best,
248
+ )
249
+
250
+ if (item.video_dash_manifest && cfg.videosDash) {
251
+ manifest = item.video_dash_manifest
252
+ width = widthOrig
253
+ height = heightOrig
254
+ }
255
+ else {
256
+ width = video.width
257
+ height = video.height
258
+ }
259
+ }
260
+ else {
261
+ video = null
262
+ manifest = null
263
+ width = image.width
264
+ height = image.height
265
+ }
266
+
267
+ // Build media object
268
+ const media: ParsedMedia = {
269
+ num,
270
+ date: cfg.parseTimestamp(
271
+ (itemRec.taken_at as number)
272
+ ?? (video as unknown as Record<string, number> | null)?.taken_at
273
+ ?? parent.taken_at
274
+ ?? null,
275
+ ),
276
+ media_id: item.pk,
277
+ shortcode: item.code ?? shortcodeFromId(item.pk),
278
+ display_url: image.url,
279
+ video_url: video?.url ?? null,
280
+ width,
281
+ width_original: widthOrig,
282
+ height,
283
+ height_original: heightOrig,
284
+ tagged_users: [],
285
+ }
286
+
287
+ if (manifest != null) {
288
+ media._ytdl_manifest_data = manifest
289
+ }
290
+ if (item.owner)
291
+ media.owner = item.owner
292
+ if (item.reshared_story_media_author) {
293
+ media.author = item.reshared_story_media_author
294
+ }
295
+ if (item.expiring_at != null) {
296
+ media.expires = cfg.parseTimestamp(item.expiring_at)
297
+ }
298
+ if (item.subscription_media_visibility) {
299
+ media.subscription = item.subscription_media_visibility
300
+ }
301
+ if (itemRec.audience) {
302
+ media.audience = itemRec.audience as string
303
+ }
304
+
305
+ return media
306
+ }
307
+
308
+ /** Tagged users */
309
+
310
+ export function extractTaggedUsers(
311
+ src: Record<string, unknown>,
312
+ dest: ParsedMedia,
313
+ ): void {
314
+ dest.tagged_users = []
315
+
316
+ // edge_media_to_tagged_user (GraphQL style)
317
+ const edges = src.edge_media_to_tagged_user as
318
+ | { edges: Array<{ node: { user: InstagramUser } }> }
319
+ | undefined
320
+ if (edges?.edges) {
321
+ for (const edge of edges.edges) {
322
+ const u = edge.node.user
323
+ dest.tagged_users.push({
324
+ id: u.id ?? u.pk,
325
+ username: u.username,
326
+ full_name: u.full_name,
327
+ })
328
+ }
329
+ }
330
+
331
+ // usertags.in (REST style)
332
+ const usertags = src.usertags as
333
+ | { in?: Array<{ user: InstagramUser }> }
334
+ | undefined
335
+ if (usertags?.in) {
336
+ for (const tag of usertags.in) {
337
+ const u = tag.user
338
+ dest.tagged_users.push({
339
+ id: u.pk,
340
+ username: u.username,
341
+ full_name: u.full_name,
342
+ })
343
+ }
344
+ }
345
+
346
+ // reel_mentions
347
+ const mentions = src.reel_mentions as
348
+ | Array<{ user: InstagramUser }>
349
+ | undefined
350
+ if (mentions) {
351
+ for (const m of mentions) {
352
+ const u = m.user
353
+ dest.tagged_users.push({
354
+ id: u.pk ?? u.id ?? '',
355
+ username: u.username,
356
+ full_name: u.full_name,
357
+ })
358
+ }
359
+ }
360
+
361
+ // story_bloks_stickers
362
+ const bloks = src.story_bloks_stickers as
363
+ | Array<{
364
+ bloks_sticker: {
365
+ bloks_sticker_type: string
366
+ sticker_data: {
367
+ ig_mention: {
368
+ account_id: string
369
+ username: string
370
+ full_name: string
371
+ }
372
+ }
373
+ }
374
+ }>
375
+ | undefined
376
+ if (bloks) {
377
+ for (const sticker of bloks) {
378
+ const s = sticker.bloks_sticker
379
+ if (s.bloks_sticker_type === 'mention') {
380
+ const m = s.sticker_data.ig_mention
381
+ dest.tagged_users.push({
382
+ id: m.account_id,
383
+ username: m.username,
384
+ full_name: m.full_name,
385
+ })
386
+ }
387
+ }
388
+ }
389
+
390
+ // Deduplicate by id
391
+ const seen = new Set<string>()
392
+ dest.tagged_users = dest.tagged_users.filter((t) => {
393
+ if (seen.has(t.id))
394
+ return false
395
+ seen.add(t.id)
396
+ return true
397
+ })
398
+ }
399
+
400
+ /** Audio / music extraction */
401
+
402
+ export function extractAudio(
403
+ src: Record<string, unknown>,
404
+ dest: Record<string, unknown>,
405
+ sticker: MusicSticker,
406
+ cfg: ParserConfig,
407
+ ): ParsedMedia | null {
408
+ const info = sticker.music_asset_info
409
+ if (!info)
410
+ return null
411
+
412
+ const cinfo = sticker.music_consumption_info ?? info
413
+
414
+ dest.audio_title = info.title
415
+ dest.audio_duration = (info.duration_in_ms ?? 0) / 1000
416
+ dest.audio_timestamps = info.highlight_start_times_in_ms
417
+ dest.audio_artist = info.display_artist ?? cinfo.display_artist
418
+ dest.audio_user = info.ig_artist ?? cinfo.ig_artist
419
+
420
+ const url = info.progressive_download_url
421
+ if (!url)
422
+ return null
423
+
424
+ return {
425
+ num: 0,
426
+ date: cfg.parseTimestamp((src.taken_at as number) ?? null),
427
+ media_id: info.id,
428
+ shortcode: shortcodeFromId(info.id),
429
+ display_url: info.cover_artwork_uri ?? '',
430
+ video_url: null,
431
+ audio_url: url,
432
+ width: 0,
433
+ width_original: 0,
434
+ height: 0,
435
+ height_original: 0,
436
+ tagged_users: [],
437
+ audio_user: (info.ig_artist ?? cinfo.ig_artist) as string,
438
+ audio_title: info.title,
439
+ audio_artist: (info.display_artist ?? cinfo.display_artist) as string,
440
+ audio_duration: (info.duration_in_ms ?? 0) / 1000,
441
+ audio_timestamps: info.highlight_start_times_in_ms,
442
+ }
443
+ }
444
+
445
+ /** GraphQL parser */
446
+
447
+ export function parsePostGraphql(
448
+ post: Record<string, unknown>,
449
+ cfg: ParserConfig,
450
+ ): ParsedPost {
451
+ const typename = (post.__typename as string) ?? 'GraphImage'
452
+
453
+ const owner = post.owner as InstagramUser
454
+ const date = cfg.parseTimestamp(post.taken_at_timestamp as number)
455
+
456
+ const data: ParsedPost = {
457
+ typename,
458
+ likes:
459
+ (post.edge_media_preview_like as { count: number } | undefined)?.count
460
+ ?? 0,
461
+ liked: (post.viewer_has_liked ?? false) as boolean,
462
+ pinned: (
463
+ (post.pinned_for_users as Array<{ id: string }> | undefined)?.map(
464
+ u => Number(u.id),
465
+ ) ?? []
466
+ ) as unknown as string[],
467
+ owner_id: owner.id ?? owner.pk,
468
+ username: owner.username ?? '',
469
+ fullname: owner.full_name ?? '',
470
+ post_id: post.id as string,
471
+ post_shortcode: post.shortcode as string,
472
+ post_url: `${cfg.root}/p/${post.shortcode}/`,
473
+ post_date: date,
474
+ date,
475
+ description: '',
476
+ type: 'post',
477
+ count: 0,
478
+ _files: [],
479
+ }
480
+
481
+ // Description
482
+ const captionEdges = post.edge_media_to_caption as
483
+ | { edges: Array<{ node: { text: string } }> }
484
+ | undefined
485
+ data.description
486
+ = captionEdges?.edges?.map(e => e.node.text).join('\n') ?? ''
487
+ data.description = parseUnicodeEscapes(data.description)
488
+
489
+ const tags = cfg.findTags(data.description)
490
+ if (tags.length > 0)
491
+ data.tags = [...new Set(tags)].sort()
492
+
493
+ // Location
494
+ const location = post.location as InstagramPost['location'] | undefined
495
+ if (location) {
496
+ data.location_id = location.pk
497
+ data.location_slug = location.short_name
498
+ data.location_url = `${cfg.root}/explore/locations/${location.pk}/${location.short_name}/`
499
+ }
500
+
501
+ // Coauthors
502
+ const coauthors = post.coauthor_producers as InstagramUser[] | undefined
503
+ if (coauthors?.length) {
504
+ data.coauthors = coauthors.map(u => ({
505
+ id: u.id ?? u.pk,
506
+ username: u.username,
507
+ }))
508
+ }
509
+
510
+ // Carousel vs single
511
+ const sidecar = post.edge_sidecar_to_children as
512
+ | { edges: Array<{ node: Record<string, unknown> }> }
513
+ | undefined
514
+
515
+ if (sidecar?.edges) {
516
+ data.sidecar_media_id = data.post_id
517
+ data.sidecar_shortcode = data.post_shortcode
518
+
519
+ let num = 0
520
+ for (const edge of sidecar.edges) {
521
+ num++
522
+ const node = edge.node
523
+ const dimensions = node.dimensions as {
524
+ width: number
525
+ height: number
526
+ }
527
+
528
+ const media: ParsedMedia = {
529
+ num,
530
+ date: data.date,
531
+ media_id: node.id as string,
532
+ shortcode:
533
+ (node.shortcode as string) ?? shortcodeFromId(node.id as string),
534
+ display_url: node.display_url as string,
535
+ video_url: (node.video_url as string) ?? null,
536
+ width: dimensions.width,
537
+ height: dimensions.height,
538
+ sidecar_media_id: data.post_id,
539
+ sidecar_shortcode: data.post_shortcode,
540
+ tagged_users: [],
541
+ width_original: dimensions.width,
542
+ height_original: dimensions.height,
543
+ }
544
+ extractTaggedUsers(node, media)
545
+ data._files.push(media)
546
+ }
547
+ }
548
+ else {
549
+ const dimensions = post.dimensions as { width: number, height: number }
550
+ const media: ParsedMedia = {
551
+ num: 1,
552
+ date: data.date,
553
+ media_id: post.id as string,
554
+ shortcode: post.shortcode as string,
555
+ display_url: post.display_url as string,
556
+ video_url: (post.video_url as string) ?? null,
557
+ width: dimensions.width,
558
+ height: dimensions.height,
559
+ tagged_users: [],
560
+ width_original: dimensions.width,
561
+ height_original: dimensions.height,
562
+ }
563
+ extractTaggedUsers(post, media)
564
+ data._files.push(media)
565
+ }
566
+
567
+ return data
568
+ }
569
+
570
+ function extractPinned(post: InstagramPost): string[] {
571
+ if (post.timeline_pinned_user_ids)
572
+ return post.timeline_pinned_user_ids
573
+ if (post.clips_tab_pinned_user_ids)
574
+ return post.clips_tab_pinned_user_ids
575
+ return []
576
+ }
577
+
578
+ function parseUnicodeEscapes(text: string): string {
579
+ if (!text.includes('\\u'))
580
+ return text
581
+ return text.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
582
+ String.fromCharCode(Number.parseInt(hex, 16)))
583
+ }