@chilfish/gallery-dl-instagram 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/adapter.ts +284 -0
- package/cli/cookies.ts +59 -0
- package/cli/index.ts +337 -0
- package/config.ts +80 -0
- package/core/extractor.ts +217 -0
- package/core/job.ts +581 -0
- package/dist/adapter-Bt86eL1R.mjs +189 -0
- package/dist/cli/index.d.mts +1 -0
- package/dist/cli/index.mjs +3160 -0
- package/dist/extractors-Byw-2lPL.mjs +1943 -0
- package/dist/index.d.mts +187 -0
- package/dist/index.mjs +40 -0
- package/dist/sdk-B9fRyc1e.d.mts +737 -0
- package/dist/sdk.d.mts +2 -0
- package/dist/sdk.mjs +93 -0
- package/index.ts +159 -0
- package/instagram/api.ts +531 -0
- package/instagram/base.ts +275 -0
- package/instagram/extractors.ts +521 -0
- package/instagram/index.ts +43 -0
- package/instagram/parsers.ts +583 -0
- package/instagram/types.ts +244 -0
- package/message.ts +31 -0
- package/package.json +68 -0
- package/types.ts +115 -0
- package/utils/id-codec.ts +39 -0
- package/utils/text.ts +178 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Instagram API & parsed-post type definitions.
|
|
3
|
+
*
|
|
4
|
+
* These mirror the actual JSON shapes returned by Instagram's internal API
|
|
5
|
+
* (``/api/v1/…``).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/** API response types */
|
|
9
|
+
|
|
10
|
+
export interface InstagramUser {
|
|
11
|
+
pk: string
|
|
12
|
+
id?: string
|
|
13
|
+
username: string
|
|
14
|
+
full_name: string
|
|
15
|
+
is_private?: boolean
|
|
16
|
+
profile_pic_url?: string
|
|
17
|
+
profile_pic_url_hd?: string
|
|
18
|
+
hd_profile_pic_url_info?: ImageCandidate
|
|
19
|
+
hd_profile_pic_versions?: ImageCandidate[]
|
|
20
|
+
profile_pic_id?: string
|
|
21
|
+
edge_owner_to_timeline_media?: { count: number }
|
|
22
|
+
edge_felix_video_timeline?: { count: number }
|
|
23
|
+
edge_saved_media?: { count: number }
|
|
24
|
+
edge_mutual_followed_by?: { count: number }
|
|
25
|
+
edge_follow?: { count: number }
|
|
26
|
+
edge_followed_by?: { count: number }
|
|
27
|
+
edge_media_collections?: { count: number }
|
|
28
|
+
followed_by_viewer?: boolean
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface ImageCandidate {
|
|
32
|
+
url: string
|
|
33
|
+
width: number
|
|
34
|
+
height: number
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface VideoVersion {
|
|
38
|
+
url: string
|
|
39
|
+
width: number
|
|
40
|
+
height: number
|
|
41
|
+
type: number
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export interface InstagramLocation {
|
|
45
|
+
pk: string
|
|
46
|
+
short_name: string
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface Caption {
|
|
50
|
+
text: string
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface UserTag {
|
|
54
|
+
user: InstagramUser
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface ReelMention {
|
|
58
|
+
user: InstagramUser
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface BloksSticker {
|
|
62
|
+
bloks_sticker: {
|
|
63
|
+
bloks_sticker_type: string
|
|
64
|
+
sticker_data: {
|
|
65
|
+
ig_mention: {
|
|
66
|
+
account_id: string
|
|
67
|
+
username: string
|
|
68
|
+
full_name: string
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface MusicSticker {
|
|
75
|
+
music_asset_info?: MusicAssetInfo
|
|
76
|
+
music_consumption_info?: MusicConsumptionInfo
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface MusicAssetInfo {
|
|
80
|
+
id: string
|
|
81
|
+
title?: string
|
|
82
|
+
display_artist?: string
|
|
83
|
+
ig_artist?: string
|
|
84
|
+
duration_in_ms?: number
|
|
85
|
+
highlight_start_times_in_ms?: number[]
|
|
86
|
+
progressive_download_url: string
|
|
87
|
+
cover_artwork_uri?: string
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export interface MusicConsumptionInfo {
|
|
91
|
+
display_artist?: string
|
|
92
|
+
ig_artist?: string
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export interface MusicMetadata {
|
|
96
|
+
music_info?: MusicAssetInfo
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export interface InstagramPost {
|
|
100
|
+
pk: string
|
|
101
|
+
id?: string
|
|
102
|
+
code: string
|
|
103
|
+
caption: Caption | null
|
|
104
|
+
taken_at: number
|
|
105
|
+
created_at?: number
|
|
106
|
+
like_count?: number
|
|
107
|
+
has_liked?: boolean
|
|
108
|
+
user: InstagramUser
|
|
109
|
+
carousel_media?: InstagramCarouselItem[]
|
|
110
|
+
image_versions2: { candidates: ImageCandidate[] }
|
|
111
|
+
video_versions?: VideoVersion[]
|
|
112
|
+
video_dash_manifest?: string
|
|
113
|
+
original_width?: number
|
|
114
|
+
original_height?: number
|
|
115
|
+
media_type?: number
|
|
116
|
+
original_media_type?: number
|
|
117
|
+
location?: InstagramLocation
|
|
118
|
+
coauthor_producers?: InstagramUser[]
|
|
119
|
+
usertags?: { in: UserTag[] }
|
|
120
|
+
reel_mentions?: ReelMention[]
|
|
121
|
+
story_bloks_stickers?: BloksSticker[]
|
|
122
|
+
story_music_stickers?: MusicSticker[]
|
|
123
|
+
music_metadata?: MusicMetadata
|
|
124
|
+
expiring_at?: number
|
|
125
|
+
seen?: number
|
|
126
|
+
items?: InstagramCarouselItem[]
|
|
127
|
+
timeline_pinned_user_ids?: string[]
|
|
128
|
+
clips_tab_pinned_user_ids?: string[]
|
|
129
|
+
subscription_media_visibility?: string
|
|
130
|
+
audience?: string
|
|
131
|
+
title?: string
|
|
132
|
+
pins?: unknown[]
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export interface InstagramCarouselItem {
|
|
136
|
+
pk: string
|
|
137
|
+
id?: string
|
|
138
|
+
code?: string
|
|
139
|
+
taken_at?: number
|
|
140
|
+
image_versions2: { candidates: ImageCandidate[] }
|
|
141
|
+
video_versions?: VideoVersion[]
|
|
142
|
+
video_dash_manifest?: string
|
|
143
|
+
original_width?: number
|
|
144
|
+
original_height?: number
|
|
145
|
+
media_type?: number
|
|
146
|
+
original_media_type?: number
|
|
147
|
+
owner?: InstagramUser
|
|
148
|
+
reshared_story_media_author?: InstagramUser
|
|
149
|
+
expiring_at?: number
|
|
150
|
+
subscription_media_visibility?: string
|
|
151
|
+
audience?: string
|
|
152
|
+
story_music_stickers?: MusicSticker[]
|
|
153
|
+
usertags?: { in: UserTag[] }
|
|
154
|
+
reel_mentions?: ReelMention[]
|
|
155
|
+
story_bloks_stickers?: BloksSticker[]
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/** Parsed post (normalized output) */
|
|
159
|
+
|
|
160
|
+
export interface ParsedPost {
|
|
161
|
+
post_id: string
|
|
162
|
+
post_shortcode: string
|
|
163
|
+
post_url: string
|
|
164
|
+
owner_id: string
|
|
165
|
+
username: string
|
|
166
|
+
fullname: string
|
|
167
|
+
post_date: string
|
|
168
|
+
date: string
|
|
169
|
+
description: string
|
|
170
|
+
tags?: string[]
|
|
171
|
+
location_id?: string
|
|
172
|
+
location_slug?: string
|
|
173
|
+
location_url?: string
|
|
174
|
+
likes: number
|
|
175
|
+
liked: boolean
|
|
176
|
+
pinned: string[]
|
|
177
|
+
coauthors?: Coauthor[]
|
|
178
|
+
sidecar_media_id?: string
|
|
179
|
+
sidecar_shortcode?: string
|
|
180
|
+
type: 'post' | 'reel' | 'story' | 'highlight'
|
|
181
|
+
count: number
|
|
182
|
+
_files: ParsedMedia[]
|
|
183
|
+
user?: InstagramUser
|
|
184
|
+
expires?: string
|
|
185
|
+
highlight_title?: string
|
|
186
|
+
tagged_owner_id?: string
|
|
187
|
+
tagged_username?: string
|
|
188
|
+
tagged_full_name?: string
|
|
189
|
+
subscription?: string
|
|
190
|
+
/** For graphql: */
|
|
191
|
+
typename?: string
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export interface ParsedMedia {
|
|
195
|
+
num: number
|
|
196
|
+
date: string
|
|
197
|
+
media_id: string
|
|
198
|
+
shortcode: string
|
|
199
|
+
display_url: string
|
|
200
|
+
video_url: string | null
|
|
201
|
+
width: number
|
|
202
|
+
width_original: number
|
|
203
|
+
height: number
|
|
204
|
+
height_original: number
|
|
205
|
+
tagged_users: TaggedUser[]
|
|
206
|
+
owner?: InstagramUser
|
|
207
|
+
author?: InstagramUser
|
|
208
|
+
expires?: string
|
|
209
|
+
subscription?: string
|
|
210
|
+
audience?: string
|
|
211
|
+
audio_url?: string
|
|
212
|
+
audio_user?: string
|
|
213
|
+
audio_title?: string
|
|
214
|
+
audio_artist?: string
|
|
215
|
+
audio_duration?: number
|
|
216
|
+
audio_timestamps?: number[]
|
|
217
|
+
_ytdl_manifest_data?: string
|
|
218
|
+
sidecar_media_id?: string
|
|
219
|
+
sidecar_shortcode?: string
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
export interface Coauthor {
|
|
223
|
+
id: string
|
|
224
|
+
username: string
|
|
225
|
+
full_name?: string
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
export interface TaggedUser {
|
|
229
|
+
id: string
|
|
230
|
+
username: string
|
|
231
|
+
full_name: string
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/** Parser config */
|
|
235
|
+
|
|
236
|
+
export interface ParserConfig {
|
|
237
|
+
root: string
|
|
238
|
+
findTags: (text: string) => string[]
|
|
239
|
+
parseTimestamp: (ts: number | null | undefined) => string
|
|
240
|
+
staticVideo: boolean
|
|
241
|
+
warnVideo: boolean
|
|
242
|
+
warnImage: number
|
|
243
|
+
videosDash: boolean
|
|
244
|
+
}
|
package/message.ts
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Message constructors and types.
|
|
3
|
+
*
|
|
4
|
+
* Simple factory functions so consumers never write raw message objects.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type {
|
|
8
|
+
DirectoryMsg,
|
|
9
|
+
ExtractorClass,
|
|
10
|
+
Metadata,
|
|
11
|
+
QueueMsg,
|
|
12
|
+
UrlMsg,
|
|
13
|
+
} from './types'
|
|
14
|
+
|
|
15
|
+
// Re-export types for convenience
|
|
16
|
+
export type { Message, MessageIter } from './types'
|
|
17
|
+
|
|
18
|
+
export function directory(metadata: Metadata = {}): DirectoryMsg {
|
|
19
|
+
return { type: 'directory', metadata }
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function url(u: string, metadata: Metadata = {}): UrlMsg {
|
|
23
|
+
return { type: 'url', url: u, metadata }
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function queue(
|
|
27
|
+
u: string,
|
|
28
|
+
metadata: Metadata & { _extractor?: ExtractorClass } = {},
|
|
29
|
+
): QueueMsg {
|
|
30
|
+
return { type: 'queue', url: u, metadata }
|
|
31
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@chilfish/gallery-dl-instagram",
|
|
3
|
+
"type": "module",
|
|
4
|
+
"version": "0.1.0",
|
|
5
|
+
"description": "Instagram extraction pipeline — platform-agnostic SDK + CLI",
|
|
6
|
+
"license": "GPL-2.0-only",
|
|
7
|
+
"keywords": [
|
|
8
|
+
"instagram",
|
|
9
|
+
"scraper",
|
|
10
|
+
"downloader",
|
|
11
|
+
"gallery-dl",
|
|
12
|
+
"cli"
|
|
13
|
+
],
|
|
14
|
+
"exports": {
|
|
15
|
+
".": "./dist/index.mjs",
|
|
16
|
+
"./cli": "./dist/cli/index.mjs",
|
|
17
|
+
"./sdk": "./dist/sdk.mjs",
|
|
18
|
+
"./package.json": "./package.json"
|
|
19
|
+
},
|
|
20
|
+
"types": "./dist/index.d.mts",
|
|
21
|
+
"bin": {
|
|
22
|
+
"gallery-dl-instagram": "./dist/cli/index.mjs"
|
|
23
|
+
},
|
|
24
|
+
"files": [
|
|
25
|
+
"!*.log",
|
|
26
|
+
"!node_modules",
|
|
27
|
+
"cli/",
|
|
28
|
+
"config.ts",
|
|
29
|
+
"core/",
|
|
30
|
+
"dist/",
|
|
31
|
+
"index.ts",
|
|
32
|
+
"instagram/",
|
|
33
|
+
"message.ts",
|
|
34
|
+
"types.ts",
|
|
35
|
+
"utils/"
|
|
36
|
+
],
|
|
37
|
+
"engines": {
|
|
38
|
+
"node": ">=18"
|
|
39
|
+
},
|
|
40
|
+
"scripts": {
|
|
41
|
+
"build": "tsdown",
|
|
42
|
+
"typecheck": "tsc --noEmit -p tsconfig.json",
|
|
43
|
+
"cli": "bun cli/index.ts",
|
|
44
|
+
"lint": "eslint . --fix",
|
|
45
|
+
"test": "vitest run",
|
|
46
|
+
"test:watch": "vitest",
|
|
47
|
+
"test:coverage": "vitest run --coverage",
|
|
48
|
+
"test:unit": "vitest run tests/unit",
|
|
49
|
+
"test:integration": "vitest run tests/integration"
|
|
50
|
+
},
|
|
51
|
+
"dependencies": {
|
|
52
|
+
"axios": "^1.16.1"
|
|
53
|
+
},
|
|
54
|
+
"devDependencies": {
|
|
55
|
+
"@antfu/eslint-config": "^9.0.0",
|
|
56
|
+
"@types/node": "^25.9.1",
|
|
57
|
+
"commander": "^14.0.3",
|
|
58
|
+
"dotenv": "^17.4.2",
|
|
59
|
+
"eslint": "^10.4.0",
|
|
60
|
+
"lefthook": "^2.1.8",
|
|
61
|
+
"tsdown": "^0.22.0",
|
|
62
|
+
"typescript": "^6.0.3",
|
|
63
|
+
"vitest": "^4.1.7"
|
|
64
|
+
},
|
|
65
|
+
"inlinedDependencies": {
|
|
66
|
+
"commander": "14.0.3"
|
|
67
|
+
}
|
|
68
|
+
}
|
package/types.ts
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared type definitions for the gallery-dl TypeScript port.
|
|
3
|
+
*
|
|
4
|
+
* Message types form a discriminated union — the `type` tag determines
|
|
5
|
+
* which handler a Job should invoke.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/** HTTP abstraction */
|
|
9
|
+
|
|
10
|
+
export interface RequestConfig {
|
|
11
|
+
headers?: Record<string, string>
|
|
12
|
+
params?: Record<string, string | number | null | undefined>
|
|
13
|
+
method?: string
|
|
14
|
+
data?: unknown
|
|
15
|
+
timeout?: number
|
|
16
|
+
signal?: AbortSignal
|
|
17
|
+
/** WithCredentials / CORS cookie passthrough for browsers */
|
|
18
|
+
withCredentials?: boolean
|
|
19
|
+
/** For binary downloads — 'arraybuffer' returns raw bytes */
|
|
20
|
+
responseType?: 'arraybuffer' | 'text' | 'json'
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface HttpResponse<T = unknown> {
|
|
24
|
+
status: number
|
|
25
|
+
data: T
|
|
26
|
+
headers: Record<string, string>
|
|
27
|
+
/** Final URL after redirects */
|
|
28
|
+
url: string
|
|
29
|
+
request?: RequestConfig
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface HttpClient {
|
|
33
|
+
request: <T = unknown>(config: {
|
|
34
|
+
url: string
|
|
35
|
+
method?: string
|
|
36
|
+
headers?: Record<string, string>
|
|
37
|
+
params?: Record<string, string | number | null | undefined>
|
|
38
|
+
data?: unknown
|
|
39
|
+
signal?: AbortSignal
|
|
40
|
+
timeout?: number
|
|
41
|
+
responseType?: 'arraybuffer' | 'text' | 'json'
|
|
42
|
+
}) => Promise<HttpResponse<T>>
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Storage abstraction */
|
|
46
|
+
|
|
47
|
+
export interface Storage {
|
|
48
|
+
exists: (path: string) => Promise<boolean>
|
|
49
|
+
write: (path: string, data: Uint8Array | string) => Promise<void>
|
|
50
|
+
mkdir: (path: string) => Promise<void>
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export type ConfigValue
|
|
54
|
+
= | string
|
|
55
|
+
| number
|
|
56
|
+
| boolean
|
|
57
|
+
| null
|
|
58
|
+
| ConfigValue[]
|
|
59
|
+
| { [key: string]: ConfigValue }
|
|
60
|
+
|
|
61
|
+
export interface Config { [key: string]: ConfigValue }
|
|
62
|
+
|
|
63
|
+
/** Metadata & messages */
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Flat string-keyed metadata dictionary.
|
|
67
|
+
* In gallery-dl every kwdict is a plain `{string → value}` map.
|
|
68
|
+
*/
|
|
69
|
+
export type Metadata = Record<string, unknown>
|
|
70
|
+
|
|
71
|
+
export interface DirectoryMsg {
|
|
72
|
+
readonly type: 'directory'
|
|
73
|
+
readonly metadata: Metadata
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export interface UrlMsg {
|
|
77
|
+
readonly type: 'url'
|
|
78
|
+
readonly url: string
|
|
79
|
+
readonly metadata: Metadata
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export interface QueueMsg {
|
|
83
|
+
readonly type: 'queue'
|
|
84
|
+
readonly url: string
|
|
85
|
+
readonly metadata: Metadata & {
|
|
86
|
+
readonly _extractor?: ExtractorClass
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export type Message = DirectoryMsg | UrlMsg | QueueMsg
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Async generator that yields Message values.
|
|
94
|
+
*/
|
|
95
|
+
export type MessageIter = AsyncGenerator<Message, void, unknown>
|
|
96
|
+
|
|
97
|
+
/** Extractor class reference (for Queue dispatch) */
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Minimal shape that every Extractor class must expose so the Dispatch
|
|
101
|
+
* logic can re-instantiate from a URL.
|
|
102
|
+
*/
|
|
103
|
+
export interface ExtractorClass {
|
|
104
|
+
pattern: RegExp
|
|
105
|
+
subcategory: string
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Pre-declare the Extractor interface to avoid circular refs
|
|
109
|
+
export interface Extractor {
|
|
110
|
+
readonly category: string
|
|
111
|
+
readonly subcategory: string
|
|
112
|
+
readonly root: string
|
|
113
|
+
initialize: () => Promise<void>
|
|
114
|
+
[Symbol.asyncIterator]: () => MessageIter
|
|
115
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Instagram-style Base64-variant ID ↔ shortcode conversion.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
const ALPHABET
|
|
6
|
+
= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
|
|
7
|
+
|
|
8
|
+
/** Pre-built index for O(1) character lookup during decode. */
|
|
9
|
+
const CHAR_INDEX: Record<string, number> = {}
|
|
10
|
+
for (let i = 0; i < ALPHABET.length; i++) {
|
|
11
|
+
CHAR_INDEX[ALPHABET[i]!] = i
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const BASE = BigInt(ALPHABET.length)
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Decode an Instagram shortcode into its numeric post ID.
|
|
18
|
+
*/
|
|
19
|
+
export function idFromShortcode(shortcode: string): string {
|
|
20
|
+
let num = 0n
|
|
21
|
+
for (const ch of shortcode) {
|
|
22
|
+
num = num * BASE + BigInt(CHAR_INDEX[ch] ?? 0)
|
|
23
|
+
}
|
|
24
|
+
return num.toString()
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Encode a numeric post ID into an Instagram shortcode.
|
|
29
|
+
*/
|
|
30
|
+
export function shortcodeFromId(postId: string | number): string {
|
|
31
|
+
let num = BigInt(postId)
|
|
32
|
+
const chars: string[] = []
|
|
33
|
+
while (num > 0n) {
|
|
34
|
+
const remainder = Number(num % BASE)
|
|
35
|
+
chars.push(ALPHABET[remainder]!)
|
|
36
|
+
num = num / BASE
|
|
37
|
+
}
|
|
38
|
+
return chars.reverse().join('')
|
|
39
|
+
}
|
package/utils/text.ts
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text utilities ported from gallery-dl's ``text`` module.
|
|
3
|
+
*
|
|
4
|
+
* All functions are pure and environment-agnostic.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/** String extraction */
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Extract the substring between ``begin`` and ``end`` from ``txt``.
|
|
11
|
+
* Returns the substring or ``null`` if either delimiter is missing.
|
|
12
|
+
*/
|
|
13
|
+
export function extract(
|
|
14
|
+
txt: string,
|
|
15
|
+
begin: string,
|
|
16
|
+
end: string,
|
|
17
|
+
): string | null {
|
|
18
|
+
const first = txt.indexOf(begin)
|
|
19
|
+
if (first < 0)
|
|
20
|
+
return null
|
|
21
|
+
const start = first + begin.length
|
|
22
|
+
const last = txt.indexOf(end, start)
|
|
23
|
+
if (last < 0)
|
|
24
|
+
return null
|
|
25
|
+
return txt.slice(start, last)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Shorthand: same as ``extract`` but returns ``default_`` on failure.
|
|
30
|
+
* Mirrors the Python ``extr()`` function.
|
|
31
|
+
*/
|
|
32
|
+
export function extr(
|
|
33
|
+
txt: string,
|
|
34
|
+
begin: string,
|
|
35
|
+
end: string,
|
|
36
|
+
default_ = '',
|
|
37
|
+
): string {
|
|
38
|
+
return extract(txt, begin, end) ?? default_
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Unicode / HTML */
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Decode ``\\uXXXX`` escape sequences in a string.
|
|
45
|
+
*/
|
|
46
|
+
export function parseUnicodeEscapes(text: string): string {
|
|
47
|
+
if (!text.includes('\\u'))
|
|
48
|
+
return text
|
|
49
|
+
return text.replace(/\\u([0-9a-fA-F]{4})/g, (_m, hex) =>
|
|
50
|
+
String.fromCharCode(Number.parseInt(hex, 16)))
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* HTML entity decode.
|
|
55
|
+
*
|
|
56
|
+
* In Node.js we could use a DOM parser, but since this library is
|
|
57
|
+
* environment-agnostic we ship a minimal covering the common cases.
|
|
58
|
+
*/
|
|
59
|
+
const HTML_ENTITIES: Record<string, string> = {
|
|
60
|
+
'amp': '&',
|
|
61
|
+
'lt': '<',
|
|
62
|
+
'gt': '>',
|
|
63
|
+
'quot': '"',
|
|
64
|
+
'apos': '\'',
|
|
65
|
+
'nbsp': '\u00A0',
|
|
66
|
+
'#x27': '\'',
|
|
67
|
+
'#x2F': '/',
|
|
68
|
+
'#39': '\'',
|
|
69
|
+
'#47': '/',
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const RE_ENTITY = /&([^;]+);/g
|
|
73
|
+
|
|
74
|
+
export function unescape(text: string): string {
|
|
75
|
+
return text.replace(RE_ENTITY, (m, name) => {
|
|
76
|
+
const ch = HTML_ENTITIES[name]
|
|
77
|
+
if (ch !== undefined)
|
|
78
|
+
return ch
|
|
79
|
+
// Numeric entities like {
|
|
80
|
+
if (name.startsWith('#')) {
|
|
81
|
+
const cp = name[1] === 'x' || name[1] === 'X'
|
|
82
|
+
? Number.parseInt(name.slice(2), 16)
|
|
83
|
+
: Number.parseInt(name.slice(1), 10)
|
|
84
|
+
if (Number.isSafeInteger(cp))
|
|
85
|
+
return String.fromCodePoint(cp)
|
|
86
|
+
}
|
|
87
|
+
return m // leave unrecognized as-is
|
|
88
|
+
})
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/** URL helpers */
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* URL-decode a string.
|
|
95
|
+
*/
|
|
96
|
+
export function unquote(text: string): string {
|
|
97
|
+
try {
|
|
98
|
+
return decodeURIComponent(text)
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
// Best-effort: replace invalid sequences
|
|
102
|
+
return text.replace(/%[0-9a-f]{2}/gi, (m) => {
|
|
103
|
+
try {
|
|
104
|
+
return decodeURIComponent(m)
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
return m
|
|
108
|
+
}
|
|
109
|
+
})
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Ensure a URL starts with ``https://`` (or ``http://``).
|
|
115
|
+
*/
|
|
116
|
+
export function ensureHttpScheme(url: string, scheme = 'https://'): string {
|
|
117
|
+
if (!url)
|
|
118
|
+
return url
|
|
119
|
+
if (url.startsWith('https://') || url.startsWith('http://'))
|
|
120
|
+
return url
|
|
121
|
+
return scheme + url.replace(/^[/:]+/, '')
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Extract filename + extension from a URL and write into ``meta``.
|
|
126
|
+
*/
|
|
127
|
+
export function nameExtFromURL(
|
|
128
|
+
url: string,
|
|
129
|
+
meta: Record<string, unknown>,
|
|
130
|
+
): void {
|
|
131
|
+
const filename = filenameFromURL(url)
|
|
132
|
+
const dot = filename.lastIndexOf('.')
|
|
133
|
+
if (dot > 0 && filename.length - dot - 1 <= 16) {
|
|
134
|
+
meta.filename = unquote(filename.slice(0, dot))
|
|
135
|
+
meta.extension = unquote(filename.slice(dot + 1)).toLowerCase()
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
meta.filename = unquote(filename)
|
|
139
|
+
meta.extension = ''
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Extract the file-name portion of a URL (before query string).
|
|
145
|
+
*/
|
|
146
|
+
function filenameFromURL(url: string): string {
|
|
147
|
+
try {
|
|
148
|
+
return url.split('?')[0]!.split('/').pop() ?? ''
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
return ''
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Parse an integer from a possibly-null value. Returns ``default_`` on failure.
|
|
157
|
+
*/
|
|
158
|
+
export function parseInt(
|
|
159
|
+
value: string | number | null | undefined,
|
|
160
|
+
default_: number = 0,
|
|
161
|
+
): number {
|
|
162
|
+
if (value == null)
|
|
163
|
+
return default_
|
|
164
|
+
const n = typeof value === 'number' ? value : Number.parseInt(String(value), 10)
|
|
165
|
+
return Number.isFinite(n) ? n : default_
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Regex helper: compile a pattern and call .findall()
|
|
169
|
+
export function tagRe(pattern: string): (text: string) => string[] {
|
|
170
|
+
const re = new RegExp(pattern, 'g')
|
|
171
|
+
return (text: string) => {
|
|
172
|
+
const matches = text.match(re)
|
|
173
|
+
return matches ? [...new Set(matches)] : []
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/** Pre-configured hashtag regex. */
|
|
178
|
+
export const findTags = tagRe('#\\w+')
|