@chilfish/gallery-dl-instagram 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Instagram API & parsed-post type definitions.
3
+ *
4
+ * These mirror the actual JSON shapes returned by Instagram's internal API
5
+ * (``/api/v1/…``).
6
+ */
7
+
8
+ /** API response types */
9
+
10
+ export interface InstagramUser {
11
+ pk: string
12
+ id?: string
13
+ username: string
14
+ full_name: string
15
+ is_private?: boolean
16
+ profile_pic_url?: string
17
+ profile_pic_url_hd?: string
18
+ hd_profile_pic_url_info?: ImageCandidate
19
+ hd_profile_pic_versions?: ImageCandidate[]
20
+ profile_pic_id?: string
21
+ edge_owner_to_timeline_media?: { count: number }
22
+ edge_felix_video_timeline?: { count: number }
23
+ edge_saved_media?: { count: number }
24
+ edge_mutual_followed_by?: { count: number }
25
+ edge_follow?: { count: number }
26
+ edge_followed_by?: { count: number }
27
+ edge_media_collections?: { count: number }
28
+ followed_by_viewer?: boolean
29
+ }
30
+
31
+ export interface ImageCandidate {
32
+ url: string
33
+ width: number
34
+ height: number
35
+ }
36
+
37
+ export interface VideoVersion {
38
+ url: string
39
+ width: number
40
+ height: number
41
+ type: number
42
+ }
43
+
44
+ export interface InstagramLocation {
45
+ pk: string
46
+ short_name: string
47
+ }
48
+
49
+ export interface Caption {
50
+ text: string
51
+ }
52
+
53
+ export interface UserTag {
54
+ user: InstagramUser
55
+ }
56
+
57
+ export interface ReelMention {
58
+ user: InstagramUser
59
+ }
60
+
61
+ export interface BloksSticker {
62
+ bloks_sticker: {
63
+ bloks_sticker_type: string
64
+ sticker_data: {
65
+ ig_mention: {
66
+ account_id: string
67
+ username: string
68
+ full_name: string
69
+ }
70
+ }
71
+ }
72
+ }
73
+
74
+ export interface MusicSticker {
75
+ music_asset_info?: MusicAssetInfo
76
+ music_consumption_info?: MusicConsumptionInfo
77
+ }
78
+
79
+ export interface MusicAssetInfo {
80
+ id: string
81
+ title?: string
82
+ display_artist?: string
83
+ ig_artist?: string
84
+ duration_in_ms?: number
85
+ highlight_start_times_in_ms?: number[]
86
+ progressive_download_url: string
87
+ cover_artwork_uri?: string
88
+ }
89
+
90
+ export interface MusicConsumptionInfo {
91
+ display_artist?: string
92
+ ig_artist?: string
93
+ }
94
+
95
+ export interface MusicMetadata {
96
+ music_info?: MusicAssetInfo
97
+ }
98
+
99
+ export interface InstagramPost {
100
+ pk: string
101
+ id?: string
102
+ code: string
103
+ caption: Caption | null
104
+ taken_at: number
105
+ created_at?: number
106
+ like_count?: number
107
+ has_liked?: boolean
108
+ user: InstagramUser
109
+ carousel_media?: InstagramCarouselItem[]
110
+ image_versions2: { candidates: ImageCandidate[] }
111
+ video_versions?: VideoVersion[]
112
+ video_dash_manifest?: string
113
+ original_width?: number
114
+ original_height?: number
115
+ media_type?: number
116
+ original_media_type?: number
117
+ location?: InstagramLocation
118
+ coauthor_producers?: InstagramUser[]
119
+ usertags?: { in: UserTag[] }
120
+ reel_mentions?: ReelMention[]
121
+ story_bloks_stickers?: BloksSticker[]
122
+ story_music_stickers?: MusicSticker[]
123
+ music_metadata?: MusicMetadata
124
+ expiring_at?: number
125
+ seen?: number
126
+ items?: InstagramCarouselItem[]
127
+ timeline_pinned_user_ids?: string[]
128
+ clips_tab_pinned_user_ids?: string[]
129
+ subscription_media_visibility?: string
130
+ audience?: string
131
+ title?: string
132
+ pins?: unknown[]
133
+ }
134
+
135
+ export interface InstagramCarouselItem {
136
+ pk: string
137
+ id?: string
138
+ code?: string
139
+ taken_at?: number
140
+ image_versions2: { candidates: ImageCandidate[] }
141
+ video_versions?: VideoVersion[]
142
+ video_dash_manifest?: string
143
+ original_width?: number
144
+ original_height?: number
145
+ media_type?: number
146
+ original_media_type?: number
147
+ owner?: InstagramUser
148
+ reshared_story_media_author?: InstagramUser
149
+ expiring_at?: number
150
+ subscription_media_visibility?: string
151
+ audience?: string
152
+ story_music_stickers?: MusicSticker[]
153
+ usertags?: { in: UserTag[] }
154
+ reel_mentions?: ReelMention[]
155
+ story_bloks_stickers?: BloksSticker[]
156
+ }
157
+
158
+ /** Parsed post (normalized output) */
159
+
160
+ export interface ParsedPost {
161
+ post_id: string
162
+ post_shortcode: string
163
+ post_url: string
164
+ owner_id: string
165
+ username: string
166
+ fullname: string
167
+ post_date: string
168
+ date: string
169
+ description: string
170
+ tags?: string[]
171
+ location_id?: string
172
+ location_slug?: string
173
+ location_url?: string
174
+ likes: number
175
+ liked: boolean
176
+ pinned: string[]
177
+ coauthors?: Coauthor[]
178
+ sidecar_media_id?: string
179
+ sidecar_shortcode?: string
180
+ type: 'post' | 'reel' | 'story' | 'highlight'
181
+ count: number
182
+ _files: ParsedMedia[]
183
+ user?: InstagramUser
184
+ expires?: string
185
+ highlight_title?: string
186
+ tagged_owner_id?: string
187
+ tagged_username?: string
188
+ tagged_full_name?: string
189
+ subscription?: string
190
+ /** For graphql: */
191
+ typename?: string
192
+ }
193
+
194
+ export interface ParsedMedia {
195
+ num: number
196
+ date: string
197
+ media_id: string
198
+ shortcode: string
199
+ display_url: string
200
+ video_url: string | null
201
+ width: number
202
+ width_original: number
203
+ height: number
204
+ height_original: number
205
+ tagged_users: TaggedUser[]
206
+ owner?: InstagramUser
207
+ author?: InstagramUser
208
+ expires?: string
209
+ subscription?: string
210
+ audience?: string
211
+ audio_url?: string
212
+ audio_user?: string
213
+ audio_title?: string
214
+ audio_artist?: string
215
+ audio_duration?: number
216
+ audio_timestamps?: number[]
217
+ _ytdl_manifest_data?: string
218
+ sidecar_media_id?: string
219
+ sidecar_shortcode?: string
220
+ }
221
+
222
+ export interface Coauthor {
223
+ id: string
224
+ username: string
225
+ full_name?: string
226
+ }
227
+
228
+ export interface TaggedUser {
229
+ id: string
230
+ username: string
231
+ full_name: string
232
+ }
233
+
234
+ /** Parser config */
235
+
236
+ export interface ParserConfig {
237
+ root: string
238
+ findTags: (text: string) => string[]
239
+ parseTimestamp: (ts: number | null | undefined) => string
240
+ staticVideo: boolean
241
+ warnVideo: boolean
242
+ warnImage: number
243
+ videosDash: boolean
244
+ }
package/message.ts ADDED
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Message constructors and types.
3
+ *
4
+ * Simple factory functions so consumers never write raw message objects.
5
+ */
6
+
7
+ import type {
8
+ DirectoryMsg,
9
+ ExtractorClass,
10
+ Metadata,
11
+ QueueMsg,
12
+ UrlMsg,
13
+ } from './types'
14
+
15
+ // Re-export types for convenience
16
+ export type { Message, MessageIter } from './types'
17
+
18
+ export function directory(metadata: Metadata = {}): DirectoryMsg {
19
+ return { type: 'directory', metadata }
20
+ }
21
+
22
+ export function url(u: string, metadata: Metadata = {}): UrlMsg {
23
+ return { type: 'url', url: u, metadata }
24
+ }
25
+
26
+ export function queue(
27
+ u: string,
28
+ metadata: Metadata & { _extractor?: ExtractorClass } = {},
29
+ ): QueueMsg {
30
+ return { type: 'queue', url: u, metadata }
31
+ }
package/package.json ADDED
@@ -0,0 +1,68 @@
1
+ {
2
+ "name": "@chilfish/gallery-dl-instagram",
3
+ "type": "module",
4
+ "version": "0.1.0",
5
+ "description": "Instagram extraction pipeline — platform-agnostic SDK + CLI",
6
+ "license": "GPL-2.0-only",
7
+ "keywords": [
8
+ "instagram",
9
+ "scraper",
10
+ "downloader",
11
+ "gallery-dl",
12
+ "cli"
13
+ ],
14
+ "exports": {
15
+ ".": "./dist/index.mjs",
16
+ "./cli": "./dist/cli/index.mjs",
17
+ "./sdk": "./dist/sdk.mjs",
18
+ "./package.json": "./package.json"
19
+ },
20
+ "types": "./dist/index.d.mts",
21
+ "bin": {
22
+ "gallery-dl-instagram": "./dist/cli/index.mjs"
23
+ },
24
+ "files": [
25
+ "!*.log",
26
+ "!node_modules",
27
+ "cli/",
28
+ "config.ts",
29
+ "core/",
30
+ "dist/",
31
+ "index.ts",
32
+ "instagram/",
33
+ "message.ts",
34
+ "types.ts",
35
+ "utils/"
36
+ ],
37
+ "engines": {
38
+ "node": ">=18"
39
+ },
40
+ "scripts": {
41
+ "build": "tsdown",
42
+ "typecheck": "tsc --noEmit -p tsconfig.json",
43
+ "cli": "bun cli/index.ts",
44
+ "lint": "eslint . --fix",
45
+ "test": "vitest run",
46
+ "test:watch": "vitest",
47
+ "test:coverage": "vitest run --coverage",
48
+ "test:unit": "vitest run tests/unit",
49
+ "test:integration": "vitest run tests/integration"
50
+ },
51
+ "dependencies": {
52
+ "axios": "^1.16.1"
53
+ },
54
+ "devDependencies": {
55
+ "@antfu/eslint-config": "^9.0.0",
56
+ "@types/node": "^25.9.1",
57
+ "commander": "^14.0.3",
58
+ "dotenv": "^17.4.2",
59
+ "eslint": "^10.4.0",
60
+ "lefthook": "^2.1.8",
61
+ "tsdown": "^0.22.0",
62
+ "typescript": "^6.0.3",
63
+ "vitest": "^4.1.7"
64
+ },
65
+ "inlinedDependencies": {
66
+ "commander": "14.0.3"
67
+ }
68
+ }
package/types.ts ADDED
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Shared type definitions for the gallery-dl TypeScript port.
3
+ *
4
+ * Message types form a discriminated union — the `type` tag determines
5
+ * which handler a Job should invoke.
6
+ */
7
+
8
+ /** HTTP abstraction */
9
+
10
+ export interface RequestConfig {
11
+ headers?: Record<string, string>
12
+ params?: Record<string, string | number | null | undefined>
13
+ method?: string
14
+ data?: unknown
15
+ timeout?: number
16
+ signal?: AbortSignal
17
+ /** WithCredentials / CORS cookie passthrough for browsers */
18
+ withCredentials?: boolean
19
+ /** For binary downloads — 'arraybuffer' returns raw bytes */
20
+ responseType?: 'arraybuffer' | 'text' | 'json'
21
+ }
22
+
23
+ export interface HttpResponse<T = unknown> {
24
+ status: number
25
+ data: T
26
+ headers: Record<string, string>
27
+ /** Final URL after redirects */
28
+ url: string
29
+ request?: RequestConfig
30
+ }
31
+
32
+ export interface HttpClient {
33
+ request: <T = unknown>(config: {
34
+ url: string
35
+ method?: string
36
+ headers?: Record<string, string>
37
+ params?: Record<string, string | number | null | undefined>
38
+ data?: unknown
39
+ signal?: AbortSignal
40
+ timeout?: number
41
+ responseType?: 'arraybuffer' | 'text' | 'json'
42
+ }) => Promise<HttpResponse<T>>
43
+ }
44
+
45
+ /** Storage abstraction */
46
+
47
+ export interface Storage {
48
+ exists: (path: string) => Promise<boolean>
49
+ write: (path: string, data: Uint8Array | string) => Promise<void>
50
+ mkdir: (path: string) => Promise<void>
51
+ }
52
+
53
+ export type ConfigValue
54
+ = | string
55
+ | number
56
+ | boolean
57
+ | null
58
+ | ConfigValue[]
59
+ | { [key: string]: ConfigValue }
60
+
61
+ export interface Config { [key: string]: ConfigValue }
62
+
63
+ /** Metadata & messages */
64
+
65
+ /**
66
+ * Flat string-keyed metadata dictionary.
67
+ * In gallery-dl every kwdict is a plain `{string → value}` map.
68
+ */
69
+ export type Metadata = Record<string, unknown>
70
+
71
+ export interface DirectoryMsg {
72
+ readonly type: 'directory'
73
+ readonly metadata: Metadata
74
+ }
75
+
76
+ export interface UrlMsg {
77
+ readonly type: 'url'
78
+ readonly url: string
79
+ readonly metadata: Metadata
80
+ }
81
+
82
+ export interface QueueMsg {
83
+ readonly type: 'queue'
84
+ readonly url: string
85
+ readonly metadata: Metadata & {
86
+ readonly _extractor?: ExtractorClass
87
+ }
88
+ }
89
+
90
+ export type Message = DirectoryMsg | UrlMsg | QueueMsg
91
+
92
+ /**
93
+ * Async generator that yields Message values.
94
+ */
95
+ export type MessageIter = AsyncGenerator<Message, void, unknown>
96
+
97
+ /** Extractor class reference (for Queue dispatch) */
98
+
99
+ /**
100
+ * Minimal shape that every Extractor class must expose so the Dispatch
101
+ * logic can re-instantiate from a URL.
102
+ */
103
+ export interface ExtractorClass {
104
+ pattern: RegExp
105
+ subcategory: string
106
+ }
107
+
108
+ // Pre-declare the Extractor interface to avoid circular refs
109
+ export interface Extractor {
110
+ readonly category: string
111
+ readonly subcategory: string
112
+ readonly root: string
113
+ initialize: () => Promise<void>
114
+ [Symbol.asyncIterator]: () => MessageIter
115
+ }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Instagram-style Base64-variant ID ↔ shortcode conversion.
3
+ */
4
+
5
+ const ALPHABET
6
+ = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
7
+
8
+ /** Pre-built index for O(1) character lookup during decode. */
9
+ const CHAR_INDEX: Record<string, number> = {}
10
+ for (let i = 0; i < ALPHABET.length; i++) {
11
+ CHAR_INDEX[ALPHABET[i]!] = i
12
+ }
13
+
14
+ const BASE = BigInt(ALPHABET.length)
15
+
16
+ /**
17
+ * Decode an Instagram shortcode into its numeric post ID.
18
+ */
19
+ export function idFromShortcode(shortcode: string): string {
20
+ let num = 0n
21
+ for (const ch of shortcode) {
22
+ num = num * BASE + BigInt(CHAR_INDEX[ch] ?? 0)
23
+ }
24
+ return num.toString()
25
+ }
26
+
27
+ /**
28
+ * Encode a numeric post ID into an Instagram shortcode.
29
+ */
30
+ export function shortcodeFromId(postId: string | number): string {
31
+ let num = BigInt(postId)
32
+ const chars: string[] = []
33
+ while (num > 0n) {
34
+ const remainder = Number(num % BASE)
35
+ chars.push(ALPHABET[remainder]!)
36
+ num = num / BASE
37
+ }
38
+ return chars.reverse().join('')
39
+ }
package/utils/text.ts ADDED
@@ -0,0 +1,178 @@
1
+ /**
2
+ * Text utilities ported from gallery-dl's ``text`` module.
3
+ *
4
+ * All functions are pure and environment-agnostic.
5
+ */
6
+
7
+ /** String extraction */
8
+
9
+ /**
10
+ * Extract the substring between ``begin`` and ``end`` from ``txt``.
11
+ * Returns the substring or ``null`` if either delimiter is missing.
12
+ */
13
+ export function extract(
14
+ txt: string,
15
+ begin: string,
16
+ end: string,
17
+ ): string | null {
18
+ const first = txt.indexOf(begin)
19
+ if (first < 0)
20
+ return null
21
+ const start = first + begin.length
22
+ const last = txt.indexOf(end, start)
23
+ if (last < 0)
24
+ return null
25
+ return txt.slice(start, last)
26
+ }
27
+
28
+ /**
29
+ * Shorthand: same as ``extract`` but returns ``default_`` on failure.
30
+ * Mirrors the Python ``extr()`` function.
31
+ */
32
+ export function extr(
33
+ txt: string,
34
+ begin: string,
35
+ end: string,
36
+ default_ = '',
37
+ ): string {
38
+ return extract(txt, begin, end) ?? default_
39
+ }
40
+
41
+ /** Unicode / HTML */
42
+
43
+ /**
44
+ * Decode ``\\uXXXX`` escape sequences in a string.
45
+ */
46
+ export function parseUnicodeEscapes(text: string): string {
47
+ if (!text.includes('\\u'))
48
+ return text
49
+ return text.replace(/\\u([0-9a-fA-F]{4})/g, (_m, hex) =>
50
+ String.fromCharCode(Number.parseInt(hex, 16)))
51
+ }
52
+
53
+ /**
54
+ * HTML entity decode.
55
+ *
56
+ * In Node.js we could use a DOM parser, but since this library is
57
+ * environment-agnostic we ship a minimal covering the common cases.
58
+ */
59
+ const HTML_ENTITIES: Record<string, string> = {
60
+ 'amp': '&',
61
+ 'lt': '<',
62
+ 'gt': '>',
63
+ 'quot': '"',
64
+ 'apos': '\'',
65
+ 'nbsp': '\u00A0',
66
+ '#x27': '\'',
67
+ '#x2F': '/',
68
+ '#39': '\'',
69
+ '#47': '/',
70
+ }
71
+
72
+ const RE_ENTITY = /&([^;]+);/g
73
+
74
+ export function unescape(text: string): string {
75
+ return text.replace(RE_ENTITY, (m, name) => {
76
+ const ch = HTML_ENTITIES[name]
77
+ if (ch !== undefined)
78
+ return ch
79
+ // Numeric entities like &#123;
80
+ if (name.startsWith('#')) {
81
+ const cp = name[1] === 'x' || name[1] === 'X'
82
+ ? Number.parseInt(name.slice(2), 16)
83
+ : Number.parseInt(name.slice(1), 10)
84
+ if (Number.isSafeInteger(cp))
85
+ return String.fromCodePoint(cp)
86
+ }
87
+ return m // leave unrecognized as-is
88
+ })
89
+ }
90
+
91
+ /** URL helpers */
92
+
93
+ /**
94
+ * URL-decode a string.
95
+ */
96
+ export function unquote(text: string): string {
97
+ try {
98
+ return decodeURIComponent(text)
99
+ }
100
+ catch {
101
+ // Best-effort: replace invalid sequences
102
+ return text.replace(/%[0-9a-f]{2}/gi, (m) => {
103
+ try {
104
+ return decodeURIComponent(m)
105
+ }
106
+ catch {
107
+ return m
108
+ }
109
+ })
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Ensure a URL starts with ``https://`` (or ``http://``).
115
+ */
116
+ export function ensureHttpScheme(url: string, scheme = 'https://'): string {
117
+ if (!url)
118
+ return url
119
+ if (url.startsWith('https://') || url.startsWith('http://'))
120
+ return url
121
+ return scheme + url.replace(/^[/:]+/, '')
122
+ }
123
+
124
+ /**
125
+ * Extract filename + extension from a URL and write into ``meta``.
126
+ */
127
+ export function nameExtFromURL(
128
+ url: string,
129
+ meta: Record<string, unknown>,
130
+ ): void {
131
+ const filename = filenameFromURL(url)
132
+ const dot = filename.lastIndexOf('.')
133
+ if (dot > 0 && filename.length - dot - 1 <= 16) {
134
+ meta.filename = unquote(filename.slice(0, dot))
135
+ meta.extension = unquote(filename.slice(dot + 1)).toLowerCase()
136
+ }
137
+ else {
138
+ meta.filename = unquote(filename)
139
+ meta.extension = ''
140
+ }
141
+ }
142
+
143
+ /**
144
+ * Extract the file-name portion of a URL (before query string).
145
+ */
146
+ function filenameFromURL(url: string): string {
147
+ try {
148
+ return url.split('?')[0]!.split('/').pop() ?? ''
149
+ }
150
+ catch {
151
+ return ''
152
+ }
153
+ }
154
+
155
+ /**
156
+ * Parse an integer from a possibly-null value. Returns ``default_`` on failure.
157
+ */
158
+ export function parseInt(
159
+ value: string | number | null | undefined,
160
+ default_: number = 0,
161
+ ): number {
162
+ if (value == null)
163
+ return default_
164
+ const n = typeof value === 'number' ? value : Number.parseInt(String(value), 10)
165
+ return Number.isFinite(n) ? n : default_
166
+ }
167
+
168
+ // Regex helper: compile a pattern and call .findall()
169
+ export function tagRe(pattern: string): (text: string) => string[] {
170
+ const re = new RegExp(pattern, 'g')
171
+ return (text: string) => {
172
+ const matches = text.match(re)
173
+ return matches ? [...new Set(matches)] : []
174
+ }
175
+ }
176
+
177
+ /** Pre-configured hashtag regex. */
178
+ export const findTags = tagRe('#\\w+')