@postfetch/core 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,380 @@
1
+ import {
2
+ asUrl,
3
+ filename,
4
+ number,
5
+ object,
6
+ string,
7
+ type ResolveContext,
8
+ type Net,
9
+ type Json,
10
+ type PostfetchResult,
11
+ type MediaItem,
12
+ } from "./internal";
13
+ import { browserFingerprint, browserUserAgent, instagramAppUserAgent, navigationHeaders } from "./fingerprint";
14
+
15
+ const appId = "936619743392459";
16
+
17
+ function mobileHeaders(): Record<string, string> {
18
+ return {
19
+ "accept-language": "en-US",
20
+ "content-length": "0",
21
+ "user-agent": instagramAppUserAgent(),
22
+ "x-fb-client-ip": "True",
23
+ "x-fb-http-engine": "Liger",
24
+ "x-fb-server-cluster": "True",
25
+ "x-ig-app-locale": "en_US",
26
+ "x-ig-device-locale": "en_US",
27
+ "x-ig-mapped-locale": "en_US",
28
+ };
29
+ }
30
+
31
+ function embedHeaders(): Record<string, string> {
32
+ const fingerprint = browserFingerprint();
33
+ return {
34
+ Accept:
35
+ "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
36
+ "Accept-Language": fingerprint.acceptLanguage,
37
+ "Cache-Control": "max-age=0",
38
+ Dnt: "1",
39
+ Priority: "u=0, i",
40
+ "Sec-Ch-Ua": fingerprint.secChUa,
41
+ "Sec-Ch-Ua-Mobile": "?0",
42
+ "Sec-Ch-Ua-Platform": fingerprint.secChUaPlatform,
43
+ "Sec-Fetch-Dest": "document",
44
+ "Sec-Fetch-Mode": "navigate",
45
+ "Sec-Fetch-Site": "none",
46
+ "Sec-Fetch-User": "?1",
47
+ "Upgrade-Insecure-Requests": "1",
48
+ "User-Agent": fingerprint.userAgent,
49
+ };
50
+ }
51
+
52
+ export async function resolveInstagram(input: ResolveContext): Promise<PostfetchResult> {
53
+ const code = shortcode(input.url);
54
+ const media =
55
+ (await pageMedia(input.net, code, input.preferredWidth)) ??
56
+ (await mobileMedia(input.net, code, input.preferredWidth)) ??
57
+ (await embedMedia(input.net, code)) ??
58
+ (await graphqlMedia(input.net, code));
59
+ if (!media) {
60
+ throw new Error("Instagram media not found");
61
+ }
62
+ const items = mediaItems(media, code, input.preferredWidth);
63
+ if (items.length === 0) {
64
+ throw new Error("Instagram media url not found");
65
+ }
66
+ return { archiveFilename: filename(`instagram_${code}.zip`), id: code, items, platform: "instagram" };
67
+ }
68
+
69
+ function shortcode(input: string): string {
70
+ const path = asUrl(input).pathname.split("/").filter(Boolean);
71
+ const index = path.findIndex((part) => part === "p" || part === "reel" || part === "reels" || part === "tv");
72
+ const code = index >= 0 ? path[index + 1] : path[path.length - 1];
73
+ if (!code) {
74
+ throw new Error("Instagram shortcode not found");
75
+ }
76
+ return code;
77
+ }
78
+
79
+ async function pageMedia(net: Net, code: string, preferredWidth: number): Promise<Json | null> {
80
+ const response = await net(`https://www.instagram.com/p/${code}/`, { headers: navigationHeaders() });
81
+ if (!response.ok) {
82
+ return null;
83
+ }
84
+ const html = await response.text();
85
+ const media = inlineMedia(html, code);
86
+ return media && mediaItems(media, code, preferredWidth).length > 0 ? media : null;
87
+ }
88
+
89
+ function inlineMedia(html: string, code: string): Json | null {
90
+ for (const match of html.matchAll(/<script type="application\/json"[^>]*>(.*?)<\/script>/gs)) {
91
+ let parsed: unknown;
92
+ try {
93
+ parsed = JSON.parse(match[1]);
94
+ } catch {
95
+ continue;
96
+ }
97
+ const media = searchMedia(parsed, code);
98
+ if (media) {
99
+ return media;
100
+ }
101
+ }
102
+ return null;
103
+ }
104
+
105
+ function searchMedia(node: unknown, code: string): Json | null {
106
+ if (Array.isArray(node)) {
107
+ for (const child of node) {
108
+ const media = searchMedia(child, code);
109
+ if (media) {
110
+ return media;
111
+ }
112
+ }
113
+ return null;
114
+ }
115
+ if (!object(node)) {
116
+ return null;
117
+ }
118
+ const hasMedia =
119
+ Array.isArray(node.video_versions) ||
120
+ Array.isArray(node.carousel_media) ||
121
+ (object(node.image_versions2) && Array.isArray(node.image_versions2.candidates));
122
+ if (hasMedia && node.code === code) {
123
+ return node;
124
+ }
125
+ for (const key in node) {
126
+ const media = searchMedia(node[key], code);
127
+ if (media) {
128
+ return media;
129
+ }
130
+ }
131
+ return null;
132
+ }
133
+
134
+ async function mobileMedia(net: Net, code: string, preferredWidth: number): Promise<Json | null> {
135
+ const id = await mediaId(net, code);
136
+ if (!id) {
137
+ return null;
138
+ }
139
+ const media = await mobileInfo(net, id);
140
+ return media && mediaItems(media, code, preferredWidth).length > 0 ? media : null;
141
+ }
142
+
143
+ async function mediaId(net: Net, code: string): Promise<string | null> {
144
+ const url = new URL("https://i.instagram.com/api/v1/oembed/");
145
+ url.searchParams.set("url", `https://www.instagram.com/p/${code}/`);
146
+ const response = await net(url.href, { headers: mobileHeaders() }, 1);
147
+ if (!response.ok) {
148
+ return null;
149
+ }
150
+ const payload = await response.json().catch(() => null);
151
+ return object(payload) ? string(payload.media_id) : null;
152
+ }
153
+
154
+ async function mobileInfo(net: Net, mediaId: string): Promise<Json | null> {
155
+ const response = await net(`https://i.instagram.com/api/v1/media/${mediaId}/info/`, {
156
+ headers: mobileHeaders(),
157
+ }, 1);
158
+ if (!response.ok) {
159
+ return null;
160
+ }
161
+ const payload = await response.json().catch(() => null);
162
+ const items = object(payload) && Array.isArray(payload.items) ? payload.items : [];
163
+ const first = items[0];
164
+ return object(first) ? first : null;
165
+ }
166
+
167
+ async function embedMedia(net: Net, code: string): Promise<Json | null> {
168
+ const response = await net(`https://www.instagram.com/p/${code}/embed/captioned/`, {
169
+ headers: embedHeaders(),
170
+ });
171
+ if (!response.ok) {
172
+ return null;
173
+ }
174
+ const html = await response.text();
175
+ const init = html.match(/"init",\[\],\[(.*?)\]\],/s)?.[1];
176
+ if (!init) {
177
+ return null;
178
+ }
179
+ const parsed: unknown = JSON.parse(init);
180
+ const contextJson = object(parsed) ? string(parsed.contextJSON) : null;
181
+ if (!contextJson) {
182
+ return null;
183
+ }
184
+ const context: unknown = JSON.parse(contextJson);
185
+ if (!object(context)) {
186
+ return null;
187
+ }
188
+ const embedded = object(context.context) && object(context.context.media) ? context.context.media : null;
189
+ const gqlMedia = gqlShortcodeMedia(context);
190
+ return embedded ?? gqlMedia;
191
+ }
192
+
193
+ async function graphqlMedia(net: Net, code: string): Promise<Json | null> {
194
+ const params = await graphqlParams(net, code);
195
+ if (!params) {
196
+ return null;
197
+ }
198
+ const body = new URLSearchParams({
199
+ ...params.body,
200
+ doc_id: "8845758582119845",
201
+ fb_api_caller_class: "RelayModern",
202
+ fb_api_req_friendly_name: "PolarisPostActionLoadPostQueryQuery",
203
+ server_timestamps: "true",
204
+ variables: JSON.stringify({
205
+ fetch_tagged_user_count: null,
206
+ hoisted_comment_id: null,
207
+ hoisted_reply_id: null,
208
+ shortcode: code,
209
+ }),
210
+ });
211
+ const response = await net("https://www.instagram.com/graphql/query", {
212
+ body,
213
+ headers: {
214
+ ...embedHeaders(),
215
+ ...params.headers,
216
+ "X-FB-Friendly-Name": "PolarisPostActionLoadPostQueryQuery",
217
+ "content-type": "application/x-www-form-urlencoded",
218
+ },
219
+ method: "POST",
220
+ });
221
+ if (!response.ok) {
222
+ return null;
223
+ }
224
+ const payload = await response.json().catch(() => null);
225
+ const data = object(payload) && object(payload.data) ? payload.data : null;
226
+ return data ? gqlShortcodeMedia(data) : null;
227
+ }
228
+
229
+ async function graphqlParams(net: Net, code: string): Promise<{ body: Record<string, string>; headers: Record<string, string> } | null> {
230
+ const response = await net(`https://www.instagram.com/p/${code}/`, {
231
+ headers: embedHeaders(),
232
+ });
233
+ if (!response.ok) {
234
+ return null;
235
+ }
236
+ const html = await response.text();
237
+ const site = entryObject("SiteData", html);
238
+ const polaris = entryObject("PolarisSiteData", html);
239
+ const web = entryObject("DGWWebConfig", html);
240
+ const push = entryObject("InstagramWebPushInfo", html);
241
+ const lsd = entryObject("LSD", html)?.token ?? randomToken();
242
+ const csrf = entryObject("InstagramSecurityConfig", html)?.csrf_token;
243
+ const cookie = [
244
+ csrf && `csrftoken=${csrf}`,
245
+ polaris?.device_id && `ig_did=${polaris.device_id}`,
246
+ polaris?.machine_id && `mid=${polaris.machine_id}`,
247
+ "wd=1280x720",
248
+ "dpr=2",
249
+ "ig_nrcb=1",
250
+ ].filter((value): value is string => typeof value === "string" && value.length > 0).join("; ");
251
+ return {
252
+ headers: {
253
+ "X-CSRFToken": string(csrf) ?? "",
254
+ "X-FB-LSD": string(lsd) ?? randomToken(),
255
+ "X-Bloks-Version-Id": string(entryObject("WebBloksVersioningID", html)?.versioningID) ?? "",
256
+ cookie,
257
+ "x-asbd-id": "129477",
258
+ "x-ig-app-id": string(web?.appId) ?? appId,
259
+ },
260
+ body: {
261
+ __a: "1",
262
+ __ccg: "EXCELLENT",
263
+ __comet_req: String(queryNumber("__comet_req", html) ?? 7),
264
+ __csr: randomToken(154),
265
+ __d: "www",
266
+ __dyn: randomToken(154),
267
+ __hs: string(site?.haste_session) ?? "20126.HYP:instagram_web_pkg.2.1...0",
268
+ __hsi: string(site?.hsi) ?? "7436540909012459023",
269
+ __req: "b",
270
+ __rev: string(push?.rollout_hash) ?? "1019933358",
271
+ __s: `::${Math.random().toString(36).replace(/\d/g, "").slice(2, 8)}`,
272
+ __spin_b: string(site?.__spin_b) ?? "trunk",
273
+ __spin_r: string(site?.__spin_r) ?? "1019933358",
274
+ __spin_t: String(number(site?.__spin_t) ?? Math.floor(Date.now() / 1000)),
275
+ __user: "0",
276
+ av: "0",
277
+ dpr: "2",
278
+ jazoest: String(queryNumber("jazoest", html) ?? Math.floor(Math.random() * 10000)),
279
+ lsd: string(lsd) ?? randomToken(),
280
+ },
281
+ };
282
+ }
283
+
284
+ function gqlShortcodeMedia(data: Json): Json | null {
285
+ const media = data.gql_data && object(data.gql_data)
286
+ ? data.gql_data.shortcode_media ?? data.gql_data.xdt_shortcode_media
287
+ : data.shortcode_media ?? data.xdt_shortcode_media;
288
+ return object(media) ? media : null;
289
+ }
290
+
291
+ function mediaItems(media: Json, code: string, preferredWidth: number): MediaItem[] {
292
+ const sidecar = object(media.edge_sidecar_to_children) && Array.isArray(media.edge_sidecar_to_children.edges)
293
+ ? media.edge_sidecar_to_children.edges
294
+ : [];
295
+ const oldItems = sidecar.flatMap((edge, index) => {
296
+ const node = object(edge) && object(edge.node) ? edge.node : null;
297
+ return node ? instagramItem(node, code, index + 1, preferredWidth) : [];
298
+ });
299
+ if (oldItems.length > 0) {
300
+ return oldItems;
301
+ }
302
+ const carousel = Array.isArray(media.carousel_media) ? media.carousel_media.filter(object) : [];
303
+ const newItems = carousel.flatMap((item, index) => instagramItem(item, code, index + 1, preferredWidth));
304
+ if (newItems.length > 0) {
305
+ return newItems;
306
+ }
307
+ return instagramItem(media, code, null, preferredWidth);
308
+ }
309
+
310
+ function instagramItem(media: Json, code: string, index: number | null, preferredWidth: number): MediaItem[] {
311
+ const video = selectVersion(media.video_versions, preferredWidth) ?? string(media.video_url);
312
+ const suffix = index === null ? "" : `_${index}`;
313
+ if (video) {
314
+ return [{
315
+ filename: filename(`instagram_${code}${suffix}.mp4`),
316
+ headers: { "user-agent": browserUserAgent() },
317
+ id: code,
318
+ kind: "video",
319
+ mime: "video/mp4",
320
+ platform: "instagram",
321
+ url: video,
322
+ }];
323
+ }
324
+ const image = selectImage(media);
325
+ return image
326
+ ? [{
327
+ filename: filename(`instagram_${code}${suffix}.jpg`),
328
+ headers: { "user-agent": browserUserAgent() },
329
+ id: code,
330
+ kind: "image",
331
+ mime: "image/jpeg",
332
+ platform: "instagram",
333
+ url: image,
334
+ }]
335
+ : [];
336
+ }
337
+
338
+ function selectImage(media: Json): string | null {
339
+ const imageVersions = object(media.image_versions2) && Array.isArray(media.image_versions2.candidates)
340
+ ? media.image_versions2.candidates.filter(object)
341
+ : [];
342
+ const first = imageVersions[0] ? string(imageVersions[0].url) : null;
343
+ return first ?? string(media.display_url);
344
+ }
345
+
346
+ function selectVersion(value: unknown, preferredWidth: number): string | null {
347
+ const versions = Array.isArray(value) ? value.filter(object) : [];
348
+ const best = versions.reduce<Json | null>((current, candidate) => {
349
+ const width = number(candidate.width);
350
+ const currentWidth = current ? number(current.width) : null;
351
+ if (width === null) {
352
+ return current;
353
+ }
354
+ if (currentWidth === null) {
355
+ return candidate;
356
+ }
357
+ return Math.abs(width - preferredWidth) < Math.abs(currentWidth - preferredWidth) ? candidate : current;
358
+ }, null);
359
+ const selected = best ?? versions[0] ?? null;
360
+ return selected ? string(selected.url) : null;
361
+ }
362
+
363
+ function entryObject(name: string, html: string): Json | null {
364
+ const raw = html.match(new RegExp(`\\\\["${name}",.*?,({.*?}),\\\\d+\\\\]`))?.[1];
365
+ if (!raw) {
366
+ return null;
367
+ }
368
+ const parsed: unknown = JSON.parse(raw);
369
+ return object(parsed) ? parsed : null;
370
+ }
371
+
372
+ function queryNumber(name: string, html: string): number | null {
373
+ const raw = html.match(new RegExp(`${name}=(\\d+)`))?.[1];
374
+ const parsed = raw ? Number(raw) : NaN;
375
+ return Number.isFinite(parsed) ? parsed : null;
376
+ }
377
+
378
+ function randomToken(length = 8): string {
379
+ return crypto.getRandomValues(new Uint8Array(length)).reduce((value, byte) => value + (byte % 36).toString(36), "");
380
+ }
@@ -0,0 +1,113 @@
1
+ export type Platform = "facebook" | "instagram" | "tiktok" | "twitter" | "youtube";
2
+
3
+ export type MediaKind = "audio" | "image" | "video";
4
+
5
+ export type MediaItem = {
6
+ filename: string;
7
+ headers: HeadersInit;
8
+ id: string;
9
+ kind: MediaKind;
10
+ mime: string;
11
+ platform: Platform;
12
+ url: string;
13
+ };
14
+
15
+ export type PostfetchResult = {
16
+ archiveFilename: string;
17
+ id: string;
18
+ items: MediaItem[];
19
+ platform: Platform;
20
+ };
21
+
22
+ export type Net = (url: string, init?: RequestInit, attempts?: number) => Promise<Response>;
23
+
24
+ export type ResolveContext = {
25
+ net: Net;
26
+ preferredWidth: number;
27
+ url: string;
28
+ };
29
+
30
+ export class PostfetchError extends Error {
31
+ constructor(
32
+ readonly status: number,
33
+ message: string,
34
+ ) {
35
+ super(message);
36
+ this.name = "PostfetchError";
37
+ }
38
+ }
39
+
40
+ export type Json = Record<string, unknown>;
41
+
42
+ export function createNet(baseFetch: typeof fetch = globalThis.fetch): Net {
43
+ return async function net(url, init = {}, attempts = 3): Promise<Response> {
44
+ let lastError: unknown;
45
+ for (let attempt = 1; attempt <= attempts; attempt += 1) {
46
+ const controller = new AbortController();
47
+ const timeout = setTimeout(() => controller.abort(), 30_000);
48
+ try {
49
+ const response = await baseFetch(url, { ...init, signal: controller.signal });
50
+ if (!retryable(response.status) || attempt === attempts) {
51
+ return response;
52
+ }
53
+ await sleep(retryDelay(response, attempt));
54
+ } catch (error) {
55
+ lastError = error;
56
+ if (attempt === attempts) {
57
+ break;
58
+ }
59
+ await sleep(500 * 2 ** (attempt - 1));
60
+ } finally {
61
+ clearTimeout(timeout);
62
+ }
63
+ }
64
+ throw lastError instanceof Error ? lastError : new Error("request failed");
65
+ };
66
+ }
67
+
68
+ export function object(value: unknown): value is Json {
69
+ return typeof value === "object" && value !== null && !Array.isArray(value);
70
+ }
71
+
72
+ export function string(value: unknown): string | null {
73
+ return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
74
+ }
75
+
76
+ export function number(value: unknown): number | null {
77
+ return typeof value === "number" && Number.isFinite(value) ? value : null;
78
+ }
79
+
80
+ export function asUrl(value: string): URL {
81
+ try {
82
+ return new URL(value);
83
+ } catch {
84
+ throw new PostfetchError(400, "invalid url");
85
+ }
86
+ }
87
+
88
+ export function filename(value: string): string {
89
+ return value.replace(/[^A-Za-z0-9_.-]+/g, "_");
90
+ }
91
+
92
+ function retryable(status: number): boolean {
93
+ return status === 408 || status === 429 || status >= 500;
94
+ }
95
+
96
+ function retryDelay(response: Response, attempt: number): number {
97
+ const retryAfter = response.headers.get("retry-after");
98
+ if (retryAfter) {
99
+ const seconds = Number(retryAfter);
100
+ if (Number.isFinite(seconds) && seconds >= 0) {
101
+ return seconds * 1000;
102
+ }
103
+ const time = Date.parse(retryAfter);
104
+ if (!Number.isNaN(time) && time > Date.now()) {
105
+ return time - Date.now();
106
+ }
107
+ }
108
+ return Math.min(500 * 2 ** (attempt - 1), 10_000);
109
+ }
110
+
111
+ function sleep(ms: number): Promise<void> {
112
+ return new Promise((resolve) => setTimeout(resolve, ms));
113
+ }
@@ -0,0 +1,55 @@
1
+ import { asUrl, createNet, PostfetchError, type Platform, type PostfetchResult } from "./internal";
2
+ import { resolveFacebook } from "./facebook";
3
+ import { resolveInstagram } from "./instagram";
4
+ import { resolveTiktok } from "./tiktok";
5
+ import { resolveTwitter } from "./twitter";
6
+ import { resolveYoutube } from "./youtube";
7
+
8
+ export type PostfetchOptions = {
9
+ fetch?: typeof fetch;
10
+ preferredWidth?: number;
11
+ };
12
+
13
+ export async function postfetch(url: string, options: PostfetchOptions = {}): Promise<PostfetchResult> {
14
+ const trimmed = url.trim();
15
+ if (trimmed.length === 0) {
16
+ throw new PostfetchError(400, "url is required");
17
+ }
18
+ const context = {
19
+ net: createNet(options.fetch ?? globalThis.fetch),
20
+ preferredWidth: options.preferredWidth ?? 720,
21
+ url: trimmed,
22
+ };
23
+ switch (detect(trimmed)) {
24
+ case "facebook":
25
+ return resolveFacebook(context);
26
+ case "instagram":
27
+ return resolveInstagram(context);
28
+ case "tiktok":
29
+ return resolveTiktok(context);
30
+ case "twitter":
31
+ return resolveTwitter(context);
32
+ case "youtube":
33
+ return resolveYoutube(context);
34
+ }
35
+ }
36
+
37
+ export function detect(url: string): Platform {
38
+ const host = asUrl(url).hostname;
39
+ if (host.includes("tiktok.com")) {
40
+ return "tiktok";
41
+ }
42
+ if (host.includes("instagram.com")) {
43
+ return "instagram";
44
+ }
45
+ if (host.includes("youtube.com") || host === "youtu.be") {
46
+ return "youtube";
47
+ }
48
+ if (host.includes("facebook.com") || host === "fb.watch") {
49
+ return "facebook";
50
+ }
51
+ if (host === "x.com" || host.endsWith(".x.com") || host.includes("twitter.com")) {
52
+ return "twitter";
53
+ }
54
+ throw new PostfetchError(400, "only Facebook, Instagram, TikTok, X and YouTube URLs are supported");
55
+ }