@chilfish/gallery-dl-instagram 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,275 @@
1
+ /**
2
+ * InstagramExtractor — base class shared by all Instagram sub-extractors.
3
+ */
4
+
5
+ import type { ExtractorOptions } from '../core/extractor'
6
+ import type {
7
+ HttpResponse,
8
+ MessageIter,
9
+ Metadata,
10
+ RequestConfig,
11
+ } from '../types'
12
+ import type {
13
+ InstagramPost,
14
+ InstagramUser,
15
+ ParsedPost,
16
+ ParserConfig,
17
+ } from './types'
18
+ import { Extractor } from '../core/extractor'
19
+ import { url } from '../message'
20
+ import { findTags, nameExtFromURL } from '../utils/text'
21
+ import { InstagramRestAPI } from './api'
22
+ import { parsePostGraphql, parsePostRest } from './parsers'
23
+
24
+ export interface InstagramExtractorOptions extends ExtractorOptions {
25
+ sessionId?: string
26
+ cookies?: Record<string, string>
27
+ /** Pre-seeded CSRF token (from anonymous session cookie jar). */
28
+ csrfToken?: string
29
+ }
30
+
31
+ export class Ref<T> {
32
+ value: T
33
+ constructor(v: T) {
34
+ this.value = v
35
+ }
36
+ }
37
+
38
+ export abstract class InstagramExtractor extends Extractor {
39
+ override readonly category = 'instagram'
40
+ override readonly root = 'https://www.instagram.com'
41
+
42
+ api!: InstagramRestAPI
43
+ csrfToken: Ref<string> = new Ref('')
44
+ wwwClaim: Ref<string> = new Ref('0')
45
+ cursor: Ref<string | null> = new Ref(null)
46
+
47
+ protected _loggedIn = true
48
+ protected _user: InstagramUser | null = null
49
+
50
+ private readonly _findTags = findTags
51
+ private readonly _csrfSeed: string | undefined
52
+
53
+ constructor(opts: InstagramExtractorOptions) {
54
+ super(opts)
55
+ this._csrfSeed = opts.csrfToken
56
+ }
57
+
58
+ /** Initialization */
59
+
60
+ protected override async _init(): Promise<void> {
61
+ // Use pre-seeded CSRF token if available (from anonymous cookie jar),
62
+ // otherwise generate a random one for sessionid-based auth.
63
+ this.csrfToken.value = this._csrfSeed || Extractor.generateToken(16)
64
+
65
+ this.api = new InstagramRestAPI({
66
+ http: this.http,
67
+ root: this.root,
68
+ csrfToken: this.csrfToken,
69
+ wwwClaim: this.wwwClaim,
70
+ cursor: this.cursor,
71
+ })
72
+ }
73
+
74
+ /** Request override */
75
+
76
+ override async request(
77
+ url: string,
78
+ cfg: RequestConfig = {},
79
+ ): Promise<HttpResponse<unknown>> {
80
+ const response = await super.request(url, cfg)
81
+
82
+ const finalUrl = response.url
83
+ if (finalUrl.includes('/accounts/login/')) {
84
+ throw new Error(
85
+ 'HTTP redirect to login page — cookies expired or invalid',
86
+ )
87
+ }
88
+ if (finalUrl.includes('/challenge/')) {
89
+ throw new Error(
90
+ 'HTTP redirect to challenge page — account flagged',
91
+ )
92
+ }
93
+
94
+ const claim = response.headers['x-ig-set-www-claim']
95
+ if (claim != null) {
96
+ this.wwwClaim.value = String(claim)
97
+ }
98
+
99
+ return response
100
+ }
101
+
102
+ /** Login */
103
+
104
+ async login(): Promise<void> {
105
+ this._loggedIn = true
106
+ }
107
+
108
+ /** Core pipeline */
109
+
110
+ override async* items(): MessageIter {
111
+ await this.login()
112
+
113
+ const meta = (await this.metadata()) ?? {}
114
+
115
+ const videos = this._cfg('videos', true) as string | boolean
116
+ const videosDash = videos !== 'merged'
117
+ const shouldDownloadVideos = !!videos
118
+
119
+ const previews = this._cfg('previews', false) as
120
+ | string
121
+ | boolean
122
+ | string[]
123
+ const previewsVid
124
+ = typeof previews === 'object' ? previews.includes('video') : false
125
+ const previewsAud
126
+ = typeof previews === 'object' ? previews.includes('audio') : false
127
+
128
+ const audio = this._cfg('audio', false) as boolean
129
+ const maxPosts = this._cfg('max-posts') as number | undefined
130
+ const orderFiles = this._cfg('order-files') as string | undefined
131
+ const reverse = orderFiles
132
+ ? (['r', 'd'] as readonly string[]).includes(orderFiles[0]!)
133
+ : false
134
+
135
+ const parserCfg: ParserConfig = {
136
+ root: this.root,
137
+ findTags: this._findTags,
138
+ parseTimestamp: this.parseTimestamp.bind(this),
139
+ staticVideo: (this._cfg('static-videos', true) ?? true) as boolean,
140
+ warnVideo: !previews && shouldDownloadVideos,
141
+ warnImage: 1,
142
+ videosDash,
143
+ }
144
+
145
+ this.log.debug(
146
+ `cfg: videos=${shouldDownloadVideos} previews=${!!previews} audio=${audio} maxPosts=${maxPosts ?? '∞'} staticVideos=${parserCfg.staticVideo}`,
147
+ )
148
+
149
+ let count = 0
150
+ for await (const post of this.posts()) {
151
+ if (maxPosts != null && count >= maxPosts)
152
+ break
153
+ count++
154
+
155
+ const parsed: ParsedPost
156
+ = '__typename' in (post as unknown as Record<string, unknown>)
157
+ ? parsePostGraphql(
158
+ post as unknown as Record<string, unknown>,
159
+ parserCfg,
160
+ )
161
+ : parsePostRest(post, parserCfg)
162
+
163
+ if (this._user) {
164
+ (parsed as unknown as Record<string, unknown>).user = this._user
165
+ }
166
+ Object.assign(parsed, meta)
167
+
168
+ const files = parsed._files
169
+ parsed.count = files.length
170
+
171
+ yield { type: 'directory', metadata: parsed as unknown as Metadata }
172
+
173
+ const ordered = reverse ? [...files].reverse() : files
174
+ for (const file of ordered) {
175
+ const combined: Record<string, unknown> = { ...parsed, ...file }
176
+
177
+ if (file.audio_url) {
178
+ if (audio) {
179
+ nameExtFromURL(file.audio_url, combined)
180
+ yield url(file.audio_url, combined as unknown as Metadata)
181
+ }
182
+ if (previewsAud) {
183
+ combined.media_id = `${combined.media_id as string}p`
184
+ }
185
+ else {
186
+ continue
187
+ }
188
+ }
189
+
190
+ if (file.video_url) {
191
+ if (shouldDownloadVideos) {
192
+ nameExtFromURL(file.video_url, combined)
193
+ yield url(file.video_url, combined as unknown as Metadata)
194
+ }
195
+ if (previewsVid) {
196
+ combined.media_id = `${combined.media_id as string}p`
197
+ }
198
+ else {
199
+ continue
200
+ }
201
+ }
202
+
203
+ const imgUrl = file.display_url
204
+ nameExtFromURL(imgUrl, combined)
205
+ if (
206
+ combined.extension === 'webp'
207
+ && imgUrl.includes('stp=dst-jpg')
208
+ ) {
209
+ combined.extension = 'jpg'
210
+ }
211
+ yield url(imgUrl, combined as unknown as Metadata)
212
+ }
213
+ }
214
+
215
+ if (count === 0) {
216
+ this.log.warn('No posts returned — API may have returned empty data (check sessionid or post visibility)')
217
+ }
218
+ }
219
+
220
+ /** Subclass hooks */
221
+
222
+ /** @virtual */
223
+ async metadata(): Promise<Record<string, unknown>> {
224
+ return {}
225
+ }
226
+
227
+ abstract posts(): AsyncGenerator<InstagramPost>
228
+
229
+ /** Cursor management */
230
+
231
+ protected _initCursor(): string | null {
232
+ const cursor = this._cfg('cursor', true)
233
+ if (cursor === true)
234
+ return null
235
+ if (!cursor)
236
+ return null
237
+ return cursor as string | null
238
+ }
239
+
240
+ protected _updateCursor(cursor: string | null): string | null {
241
+ if (cursor) {
242
+ this.log.debug(`Cursor: ${cursor}`)
243
+ }
244
+ this.cursor.value = cursor
245
+ return cursor
246
+ }
247
+
248
+ /** User assignment */
249
+
250
+ protected _assignUser(user: InstagramUser): void {
251
+ this._user = user
252
+
253
+ const mappings: Array<[string, string]> = [
254
+ ['count_media', 'edge_owner_to_timeline_media'],
255
+ ['count_video', 'edge_felix_video_timeline'],
256
+ ['count_saved', 'edge_saved_media'],
257
+ ['count_mutual', 'edge_mutual_followed_by'],
258
+ ['count_follow', 'edge_follow'],
259
+ ['count_followed', 'edge_followed_by'],
260
+ ['count_collection', 'edge_media_collections'],
261
+ ]
262
+
263
+ const rec = user as unknown as Record<string, unknown>
264
+ for (const [newKey, oldKey] of mappings) {
265
+ try {
266
+ const edge = rec[oldKey] as { count: number } | undefined
267
+ rec[newKey] = edge?.count ?? 0
268
+ delete rec[oldKey]
269
+ }
270
+ catch {
271
+ rec[newKey] = 0
272
+ }
273
+ }
274
+ }
275
+ }