@chilfish/gallery-dl-instagram 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/job.ts DELETED
@@ -1,581 +0,0 @@
1
- /**
2
- * Job dispatch system.
3
- *
4
- * ``Job`` is the abstract base — it loops over ``Extractor.items()`` and
5
- * routes each message to ``handleDirectory``, ``handleUrl``, or
6
- * ``handleQueue``.
7
- *
8
- * ``DownloadJob`` is a concrete implementation that downloads files via
9
- * the ``Storage`` abstraction and maintains an in-memory download archive.
10
- */
11
-
12
- import type {
13
- DirectoryMsg,
14
- Metadata,
15
- QueueMsg,
16
- UrlMsg,
17
- } from '../types'
18
- import type { Extractor } from './extractor'
19
-
20
- function formatBytes(bytes: number): string {
21
- if (bytes === 0)
22
- return '0 B'
23
- const units = ['B', 'KB', 'MB', 'GB']
24
- const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1)
25
- const val = bytes / 1024 ** i
26
- return `${val.toFixed(i === 0 ? 0 : 1)} ${units[i]}`
27
- }
28
-
29
- // ── ANSI helpers ──
30
-
31
- const BOLD = '\x1B[1m'
32
- const DIM = '\x1B[2m'
33
- const CYAN = '\x1B[36m'
34
- const GREEN = '\x1B[32m'
35
- const YELLOW = '\x1B[33m'
36
- const RESET = '\x1B[0m'
37
-
38
- function b(s: string): string {
39
- return `${BOLD}${s}${RESET}`
40
- }
41
- function dim(s: string): string {
42
- return `${DIM}${s}${RESET}`
43
- }
44
- function c(s: string): string {
45
- return `${CYAN}${s}${RESET}`
46
- }
47
- function g(s: string): string {
48
- return `${GREEN}${s}${RESET}`
49
- }
50
-
51
- function pad(s: string, n: number): string {
52
- return s.length >= n ? s : s + ' '.repeat(n - s.length)
53
- }
54
-
55
- export abstract class Job {
56
- readonly extractor: Extractor
57
- status = 0
58
-
59
- constructor(extractor: Extractor) {
60
- this.extractor = extractor
61
- }
62
-
63
- /**
64
- * Main entry point. Calls ``extractor[Symbol.asyncIterator]()`` and
65
- * dispatches every yielded message.
66
- */
67
- async run(): Promise<number> {
68
- this.extractor.log.info(
69
- `Starting ${this.extractor.category}/${this.extractor.subcategory} — ${this.extractor.url}`,
70
- )
71
- await this.extractor.initialize()
72
-
73
- for await (const msg of this.extractor) {
74
- switch (msg.type) {
75
- case 'directory':
76
- await this.handleDirectory(msg)
77
- break
78
- case 'url':
79
- await this.handleUrl(msg)
80
- break
81
- case 'queue':
82
- await this.handleQueue(msg)
83
- break
84
- }
85
- }
86
-
87
- this._report()
88
- return this.status
89
- }
90
-
91
- /** Override in subclasses to print a summary. */
92
- protected _report(): void {}
93
-
94
- abstract handleDirectory(msg: DirectoryMsg): Promise<void>
95
- abstract handleUrl(msg: UrlMsg): Promise<void>
96
- abstract handleQueue(msg: QueueMsg): Promise<void>
97
- }
98
-
99
- /** An in-memory archive: category → Set<archive-key>. */
100
- type ArchiveMap = Map<string, Set<string>>
101
-
102
- export class DownloadJob extends Job {
103
- /** Base output directory (prepended to all paths). */
104
- basePath: string = ''
105
-
106
- /** Current target directory metadata (set by directory messages). */
107
- private _currentDir: Metadata = {}
108
-
109
- /** In-memory archive keyed by archive format. */
110
- readonly archive: ArchiveMap = new Map()
111
-
112
- /**
113
- * Registry of per-category "archive formats" — the key is formed
114
- * by interpolating this format string over the metadata.
115
- */
116
- private readonly _archiveFmts: Map<string, string> = new Map()
117
-
118
- // ── Stats ──
119
- private _postCount = 0
120
- private _fileCount = 0
121
- private _downloadedBytes = 0
122
- private _skippedCount = 0
123
-
124
- registerArchive(category: string, format: string): void {
125
- this._archiveFmts.set(category, format)
126
- }
127
-
128
- /** Simple format-string interpolation for archive keys. */
129
- private _interp(fmt: string, meta: Metadata): string {
130
- return fmt.replace(/\{(\w+)\}/g, (_, key) => {
131
- const v = meta[key]
132
- return v == null ? '' : String(v)
133
- })
134
- }
135
-
136
- /** Check whether this URL has already been downloaded (and skip). */
137
- private _isArchived(meta: Metadata): boolean {
138
- const cat = (meta.category ?? this.extractor.category) as string
139
- const fmt = this._archiveFmts.get(cat) ?? '{media_id}'
140
- const key = this._interp(fmt, meta)
141
- const set = this.archive.get(cat)
142
- if (set && set.has(key))
143
- return true
144
- return false
145
- }
146
-
147
- /** Mark a post/media as archived. */
148
- private _archive(meta: Metadata): void {
149
- const cat = (meta.category ?? this.extractor.category) as string
150
- const fmt = this._archiveFmts.get(cat) ?? '{media_id}'
151
- const key = this._interp(fmt, meta)
152
- let set = this.archive.get(cat)
153
- if (!set) {
154
- set = new Set()
155
- this.archive.set(cat, set)
156
- }
157
- set.add(key)
158
- }
159
-
160
- /** Handlers */
161
-
162
- async handleDirectory(msg: DirectoryMsg): Promise<void> {
163
- this._currentDir = { ...msg.metadata }
164
- this._postCount++
165
-
166
- // Ensure directory exists
167
- const dirPath = this.basePath
168
- ? `${this.basePath}/${this._buildDirPath(msg.metadata)}`
169
- : this._buildDirPath(msg.metadata)
170
- await this.extractor.storage.mkdir(dirPath)
171
- this.extractor.log.info(
172
- `#${this._postCount} ${msg.metadata.username ?? '?'}/${msg.metadata.post_shortcode ?? '?'} → ${dirPath}/`,
173
- )
174
- }
175
-
176
- async handleUrl(msg: UrlMsg): Promise<void> {
177
- const meta: Metadata = {
178
- ...this._currentDir,
179
- ...msg.metadata,
180
- }
181
-
182
- // Archive check
183
- if (this._isArchived(meta)) {
184
- this._skippedCount++
185
- return
186
- }
187
-
188
- // Build output path
189
- const filename = this._buildFilename(meta)
190
- const dirPath = this.basePath
191
- ? `${this.basePath}/${this._buildDirPath(meta)}`
192
- : this._buildDirPath(meta)
193
- const fullPath = `${dirPath}/${filename}`
194
-
195
- try {
196
- // Fetch as binary (images, videos are not text)
197
- const resp = await this.extractor.http.request({
198
- url: msg.url,
199
- method: 'GET',
200
- responseType: 'arraybuffer',
201
- })
202
-
203
- // Convert to Uint8Array for Storage.write
204
- let data: Uint8Array | string
205
- if (resp.data instanceof Uint8Array) {
206
- data = resp.data
207
- }
208
- else if (resp.data instanceof ArrayBuffer) {
209
- data = new Uint8Array(resp.data)
210
- }
211
- else if (typeof resp.data === 'string') {
212
- data = resp.data
213
- }
214
- else if (
215
- typeof resp.data === 'object'
216
- && resp.data != null
217
- && 'type' in resp.data
218
- && (resp.data as { type: string }).type === 'Buffer'
219
- ) {
220
- // Node.js Buffer (Bun's axios returns this for arraybuffer)
221
- data = new Uint8Array(resp.data as unknown as ArrayBuffer)
222
- }
223
- else {
224
- data = JSON.stringify(resp.data)
225
- }
226
- await this.extractor.storage.write(fullPath, data)
227
-
228
- this._fileCount++
229
- const size = data instanceof Uint8Array ? data.byteLength : data.length
230
- this._downloadedBytes += size
231
- this.extractor.log.info(
232
- ` └─ ${filename} (${formatBytes(size)})`,
233
- )
234
-
235
- this._archive(meta)
236
- }
237
- catch (err) {
238
- this.extractor.log.error(
239
- `Failed to download ${filename}: ${String(err)}`,
240
- )
241
- this.status |= 4
242
- }
243
- }
244
-
245
- async handleQueue(msg: QueueMsg): Promise<void> {
246
- const meta = { ...this._currentDir, ...msg.metadata }
247
- const extrClass = meta._extractor
248
- if (!extrClass || typeof extrClass !== 'object')
249
- return
250
-
251
- // The _extractor in metadata is a static class reference
252
- // Use its pattern to match the URL, then construct with parent deps
253
- const cls = extrClass as unknown as {
254
- pattern: RegExp
255
- subcategory: string
256
- new (opts: Record<string, unknown>): Extractor
257
- }
258
-
259
- const match = cls.pattern.exec(msg.url)
260
- if (!match)
261
- return
262
-
263
- // Construct child extractor with parent's http/storage/log/config
264
- const parentExtr = this.extractor
265
- const childExtr = Reflect.construct(cls, [{
266
- url: msg.url,
267
- match,
268
- config: parentExtr.config,
269
- http: parentExtr.http,
270
- storage: parentExtr.storage,
271
- log: parentExtr.log,
272
- }])
273
-
274
- const childJob = new DownloadJob(childExtr)
275
- childJob.basePath = (this as any).basePath
276
-
277
- // Propagate current directory metadata
278
- ;(childJob as any)._currentDir = meta
279
-
280
- // Propagate archive state
281
- for (const [cat, set] of (this as any).archive) {
282
- ;(childJob as any).archive.set(cat, new Set(set))
283
- }
284
- for (const [cat, fmt] of (this as any)._archiveFmts) {
285
- ;(childJob as any)._archiveFmts.set(cat, fmt)
286
- }
287
-
288
- const childStatus = await childJob.run()
289
- this.status |= childStatus
290
-
291
- // Merge back new archive entries
292
- for (const [cat, set] of (childJob as any).archive) {
293
- const mine = (this as any).archive.get(cat)
294
- if (mine) {
295
- for (const k of set as Set<string>) (mine as Set<string>).add(k)
296
- }
297
- else {
298
- ;(this as any).archive.set(cat, set)
299
- }
300
- }
301
- }
302
-
303
- /** Report */
304
-
305
- protected override _report(): void {
306
- const log = this.extractor.log
307
- log.info(
308
- `Done — ${this._postCount} post(s), ${this._fileCount} file(s) downloaded (${formatBytes(this._downloadedBytes)})`,
309
- )
310
- if (this._skippedCount > 0) {
311
- log.info(` ${this._skippedCount} file(s) skipped (already archived)`)
312
- }
313
- }
314
-
315
- /** Path builders */
316
-
317
- private _buildDirPath(meta: Metadata): string {
318
- const cat = meta.category ?? this.extractor.category
319
- const user = meta.username ?? '_'
320
- return `${cat}/${user}`
321
- }
322
-
323
- private _buildFilename(meta: Metadata): string {
324
- const mid = meta.media_id ?? '0'
325
- const ext = meta.extension ?? 'jpg'
326
- const num = meta.num ? `_${meta.num}` : ''
327
- return `${mid}${num}.${ext}`
328
- }
329
- }
330
-
331
- /** Job that prints structured post info to the terminal instead of downloading. */
332
-
333
- interface _FileLine {
334
- num: number
335
- filename: string
336
- width: number
337
- height: number
338
- videoUrl: string | null
339
- audioUrl: string | null
340
- }
341
-
342
- export class PrintJob extends Job {
343
- private _currentDir: Metadata = {}
344
- private _files: _FileLine[] = []
345
- private _postCount = 0
346
- private _fileCount = 0
347
- private _width: number
348
-
349
- constructor(extractor: Extractor) {
350
- super(extractor)
351
- this._width = Math.min(process.stdout.columns ?? 80, 100)
352
- }
353
-
354
- async handleDirectory(msg: DirectoryMsg): Promise<void> {
355
- if (this._postCount > 0) {
356
- this._flushPost()
357
- }
358
- this._currentDir = { ...msg.metadata }
359
- this._postCount++
360
- this._files = []
361
- }
362
-
363
- async handleUrl(msg: UrlMsg): Promise<void> {
364
- const meta = { ...this._currentDir, ...msg.metadata }
365
- this._fileCount++
366
- const ext = (meta.extension as string) ?? 'jpg'
367
- const mid = meta.media_id ?? '?'
368
- this._files.push({
369
- num: (meta.num as number) ?? this._files.length + 1,
370
- filename: `${mid}.${ext}`,
371
- width: (meta.width as number) ?? 0,
372
- height: (meta.height as number) ?? 0,
373
- videoUrl: (meta.video_url as string) ?? null,
374
- audioUrl: (meta.audio_url as string) ?? null,
375
- })
376
- }
377
-
378
- async handleQueue(msg: QueueMsg): Promise<void> {
379
- if (this._files.length > 0 || this._postCount > 0) {
380
- this._flushPost()
381
- }
382
- this._postCount = 0
383
- this._files = []
384
-
385
- const meta = { ...this._currentDir, ...msg.metadata }
386
- const extrClass = meta._extractor
387
- if (!extrClass || typeof extrClass !== 'object')
388
- return
389
-
390
- const cls = extrClass as unknown as {
391
- pattern: RegExp
392
- subcategory: string
393
- new (opts: Record<string, unknown>): Extractor
394
- }
395
-
396
- const match = cls.pattern.exec(msg.url)
397
- if (!match)
398
- return
399
-
400
- const parentExtr = this.extractor
401
- const childExtr = Reflect.construct(cls, [{
402
- url: msg.url,
403
- match,
404
- config: parentExtr.config,
405
- http: parentExtr.http,
406
- storage: parentExtr.storage,
407
- log: parentExtr.log,
408
- }])
409
-
410
- const childJob = new PrintJob(childExtr)
411
- const childStatus = await childJob.run()
412
- this.status |= childStatus
413
-
414
- // Merge stats
415
- this._postCount += childJob._postCount
416
- this._fileCount += childJob._fileCount
417
- }
418
-
419
- /** Output */
420
-
421
- private _flushPost(): void {
422
- const m = this._currentDir
423
- if (Object.keys(m).length === 0)
424
- return
425
-
426
- const w = this._width
427
- const labelW = 14
428
-
429
- // Header
430
- const shortcode = (m.post_shortcode as string) ?? '?'
431
- const header = ` Post #${this._postCount}: ${shortcode} `
432
- const padTotal = w - 2 - header.length
433
- const padL = Math.floor(padTotal / 2)
434
- const padR = padTotal - padL
435
- process.stdout.write(`\n${dim('┌')}${'─'.repeat(padL)}${b(header)}${'─'.repeat(padR)}${dim('┐')}\n`)
436
-
437
- const row = (label: string, value: string, color?: ((s: string) => string) | string): void => {
438
- const colored = typeof color === 'function' ? color(value) : color ? `${color}${value}${RESET}` : value
439
- process.stdout.write(` ${dim('│')} ${c(pad(label, labelW))} ${colored}\n`)
440
- }
441
-
442
- // Meta
443
- const username = (m.username as string) ?? '?'
444
- const fullname = (m.fullname as string) ?? ''
445
- const author = fullname ? `${username} (${fullname})` : username
446
- row('Author:', author, g)
447
-
448
- const date = (m.date as string) ?? (m.post_date as string) ?? '?'
449
- row('Date:', date)
450
-
451
- const likes = typeof m.likes === 'number' ? m.likes.toLocaleString() : '?'
452
- const liked = m.liked ? 'yes' : 'no'
453
- row('Likes:', `${likes} | Liked: ${liked}`)
454
-
455
- const ptype = (m.type as string) ?? '?'
456
- row('Type:', `${ptype} (${this._files.length} files)`)
457
-
458
- const postUrl = (m.post_url as string) ?? '?'
459
- row('URL:', postUrl)
460
-
461
- // Description
462
- const desc = (m.description as string) ?? ''
463
- if (desc) {
464
- process.stdout.write(` ${dim('│')}\n`)
465
- process.stdout.write(` ${dim('│')} ${b('Description:')}\n`)
466
- const lines = desc.split('\n')
467
- for (const line of lines) {
468
- const wrapped = this._wrap(line, w - 8)
469
- for (const wl of wrapped) {
470
- process.stdout.write(` ${dim('│')} ${dim(wl)}\n`)
471
- }
472
- }
473
- }
474
-
475
- // Tags
476
- const tags = m.tags as string[] | undefined
477
- if (tags && tags.length > 0) {
478
- process.stdout.write(` ${dim('│')}\n`)
479
- process.stdout.write(` ${dim('│')} ${b('Tags:')} ${dim(tags.map(t => `#${t}`).join(' '))}\n`)
480
- }
481
-
482
- // Location
483
- const locName = (m.location_slug as string) ?? ''
484
- const locId = (m.location_id as string) ?? ''
485
- if (locName || locId) {
486
- const loc = locId ? `${locName} (ID: ${locId})` : locName
487
- row('Location:', loc)
488
- }
489
-
490
- // Coauthors
491
- const coauthors = m.coauthors as Array<{ username: string, full_name?: string }> | undefined
492
- if (coauthors && coauthors.length > 0) {
493
- row('Co-authors:', coauthors.map(c => c.full_name ? `${c.username} (${c.full_name})` : c.username).join(', '))
494
- }
495
-
496
- // Pinned
497
- const pinned = m.pinned as string[] | undefined
498
- if (pinned && pinned.length > 0) {
499
- row('Pinned:', pinned.join(', '))
500
- }
501
-
502
- // Expires (stories)
503
- const expires = m.expires as string | undefined
504
- if (expires) {
505
- row('Expires:', expires, YELLOW)
506
- }
507
-
508
- // Highlight title
509
- const hlTitle = m.highlight_title as string | undefined
510
- if (hlTitle) {
511
- row('Highlight:', hlTitle)
512
- }
513
-
514
- // Tagged owner
515
- const taggedUser = (m.tagged_username as string) ?? ''
516
- if (taggedUser) {
517
- const taggedFull = (m.tagged_full_name as string) ?? ''
518
- const display = taggedFull ? `${taggedUser} (${taggedFull})` : taggedUser
519
- row('Tagged by:', display)
520
- }
521
-
522
- // Media files
523
- if (this._files.length > 0) {
524
- process.stdout.write(` ${dim('│')}\n`)
525
- process.stdout.write(` ${dim('│')} ${b(`Media (${this._files.length} files):`)}\n`)
526
-
527
- const maxNumW = String(this._files.length).length
528
- const maxFileW = Math.max(...this._files.map(f => f.filename.length))
529
- const dimW = Math.min(maxFileW, 40)
530
-
531
- for (const f of this._files) {
532
- const numStr = `[${String(f.num).padStart(maxNumW)}]`
533
- const dimStr = f.filename.length > 40 ? `${f.filename.slice(0, 37)}...` : pad(f.filename, dimW)
534
- const res = f.width ? `${f.width}x${f.height}` : '?x?'
535
- const badges: string[] = []
536
- if (f.videoUrl)
537
- badges.push('video')
538
- if (f.audioUrl)
539
- badges.push('audio')
540
-
541
- let line = ` ${dim('│')} ${g(numStr)} ${dimStr} ${res}`
542
- if (badges.length > 0) {
543
- line += ` ${YELLOW}(${badges.join('+')})${RESET}`
544
- }
545
- process.stdout.write(`${line}\n`)
546
- }
547
- }
548
-
549
- // Footer
550
- process.stdout.write(` ${dim('└')}${'─'.repeat(w - 2)}${dim('┘')}\n`)
551
- }
552
-
553
- private _wrap(text: string, maxLen: number): string[] {
554
- if (text.length <= maxLen)
555
- return [text]
556
- const lines: string[] = []
557
- let remaining = text
558
- while (remaining.length > maxLen) {
559
- let cut = maxLen
560
- while (cut > 0 && remaining[cut] !== ' ')
561
- cut--
562
- if (cut === 0)
563
- cut = maxLen
564
- lines.push(remaining.slice(0, cut).trimEnd())
565
- remaining = remaining.slice(cut).trimStart()
566
- }
567
- if (remaining)
568
- lines.push(remaining)
569
- return lines
570
- }
571
-
572
- protected override _report(): void {
573
- // Flush last post
574
- this._flushPost()
575
-
576
- process.stdout.write(`\n${dim('──')} ${b('Summary')} ${dim('───')}\n`)
577
- process.stdout.write(` Posts: ${g(String(this._postCount))}\n`)
578
- process.stdout.write(` Files: ${g(String(this._fileCount))}\n`)
579
- process.stdout.write(`\n`)
580
- }
581
- }