@syengup/friday-channel-next 0.1.27 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/http/handlers/files.js +1 -0
- package/dist/src/http/handlers/link-preview.d.ts +9 -0
- package/dist/src/http/handlers/link-preview.js +41 -0
- package/dist/src/http/server.js +5 -0
- package/dist/src/link-preview/og-parse.d.ts +21 -0
- package/dist/src/link-preview/og-parse.js +232 -0
- package/dist/src/link-preview/preview-service.d.ts +31 -0
- package/dist/src/link-preview/preview-service.js +216 -0
- package/dist/src/link-preview/ssrf-guard.d.ts +43 -0
- package/dist/src/link-preview/ssrf-guard.js +223 -0
- package/dist/src/version.js +1 -1
- package/package.json +1 -1
- package/src/http/handlers/files.ts +1 -0
- package/src/http/handlers/link-preview.test.ts +242 -0
- package/src/http/handlers/link-preview.ts +47 -0
- package/src/http/server.ts +6 -0
- package/src/link-preview/og-parse.test.ts +168 -0
- package/src/link-preview/og-parse.ts +249 -0
- package/src/link-preview/preview-service.ts +247 -0
- package/src/link-preview/ssrf-guard.test.ts +234 -0
- package/src/link-preview/ssrf-guard.ts +229 -0
- package/src/version.ts +1 -1
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GET /friday-next/link-preview?url=<percent-encoded http(s) URL>
|
|
3
|
+
*
|
|
4
|
+
* Returns Open Graph metadata for a page link so the app can render a preview card without
|
|
5
|
+
* ever contacting the third-party site itself. Cover images are re-hosted under
|
|
6
|
+
* /friday-next/files/ by the preview service.
|
|
7
|
+
*/
|
|
8
|
+
import type { IncomingMessage, ServerResponse } from "node:http";
|
|
9
|
+
export declare function handleLinkPreview(req: IncomingMessage, res: ServerResponse): Promise<boolean>;
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GET /friday-next/link-preview?url=<percent-encoded http(s) URL>
|
|
3
|
+
*
|
|
4
|
+
* Returns Open Graph metadata for a page link so the app can render a preview card without
|
|
5
|
+
* ever contacting the third-party site itself. Cover images are re-hosted under
|
|
6
|
+
* /friday-next/files/ by the preview service.
|
|
7
|
+
*/
|
|
8
|
+
import { getLinkPreview } from "../../link-preview/preview-service.js";
|
|
9
|
+
import { extractBearerToken } from "../middleware/auth.js";
|
|
10
|
+
const ERROR_STATUS = {
|
|
11
|
+
invalid_url: 400,
|
|
12
|
+
blocked_url: 403,
|
|
13
|
+
no_metadata: 422,
|
|
14
|
+
fetch_failed: 502,
|
|
15
|
+
};
|
|
16
|
+
export async function handleLinkPreview(req, res) {
|
|
17
|
+
if (req.method !== "GET") {
|
|
18
|
+
res.statusCode = 405;
|
|
19
|
+
res.setHeader("Content-Type", "application/json");
|
|
20
|
+
res.end(JSON.stringify({ error: "Method Not Allowed" }));
|
|
21
|
+
return true;
|
|
22
|
+
}
|
|
23
|
+
if (!extractBearerToken(req)) {
|
|
24
|
+
res.statusCode = 401;
|
|
25
|
+
res.setHeader("Content-Type", "application/json");
|
|
26
|
+
res.end(JSON.stringify({ ok: false, error: "unauthorized" }));
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
const url = new URL(req.url ?? "/", "http://localhost").searchParams.get("url")?.trim();
|
|
30
|
+
if (!url) {
|
|
31
|
+
res.statusCode = 400;
|
|
32
|
+
res.setHeader("Content-Type", "application/json");
|
|
33
|
+
res.end(JSON.stringify({ ok: false, error: "invalid_url" }));
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
const result = await getLinkPreview(url);
|
|
37
|
+
res.statusCode = result.ok ? 200 : ERROR_STATUS[result.error];
|
|
38
|
+
res.setHeader("Content-Type", "application/json");
|
|
39
|
+
res.end(JSON.stringify(result));
|
|
40
|
+
return true;
|
|
41
|
+
}
|
package/dist/src/http/server.js
CHANGED
|
@@ -18,6 +18,7 @@ import { handleHistorySessions } from "./handlers/history-sessions.js";
|
|
|
18
18
|
import { handleHistoryMessages } from "./handlers/history-messages.js";
|
|
19
19
|
import { handleHistorySetTitle } from "./handlers/history-set-title.js";
|
|
20
20
|
import { handleStatus } from "./handlers/status.js";
|
|
21
|
+
import { handleLinkPreview } from "./handlers/link-preview.js";
|
|
21
22
|
import { handleHealth } from "./handlers/health.js";
|
|
22
23
|
import { handlePluginInfo } from "./handlers/plugin-info.js";
|
|
23
24
|
import { handlePluginUpgrade } from "./handlers/plugin-upgrade.js";
|
|
@@ -85,6 +86,10 @@ async function handleFridayNextRoute(req, res) {
|
|
|
85
86
|
if ((req.method === "PUT" || req.method === "POST") && pathname === "/friday-next/sessions/title") {
|
|
86
87
|
return await handleHistorySetTitle(req, res);
|
|
87
88
|
}
|
|
89
|
+
// Route: GET /friday-next/link-preview?url=... (Open Graph metadata for preview cards)
|
|
90
|
+
if (req.method === "GET" && pathname === "/friday-next/link-preview") {
|
|
91
|
+
return await handleLinkPreview(req, res);
|
|
92
|
+
}
|
|
88
93
|
// Route: GET /friday-next/health?deviceId=...&nodeDeviceId=...&selfHeal=true
|
|
89
94
|
if (req.method === "GET" && pathname === "/friday-next/health") {
|
|
90
95
|
return await handleHealth(req, res);
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Open Graph metadata extraction via regex — no HTML parser dependency.
|
|
3
|
+
*
|
|
4
|
+
* Good enough for the link-preview card use case: og:* meta tags are flat, attribute-ordered
|
|
5
|
+
* variants are handled generically, and pages where this fails simply degrade to "no card".
|
|
6
|
+
*/
|
|
7
|
+
export interface OpenGraphResult {
|
|
8
|
+
title: string | null;
|
|
9
|
+
description: string | null;
|
|
10
|
+
imageUrl: string | null;
|
|
11
|
+
siteName: string | null;
|
|
12
|
+
/** Favicon URL parsed from `<link rel="...icon...">`, resolved absolute. */
|
|
13
|
+
iconUrl: string | null;
|
|
14
|
+
}
|
|
15
|
+
export declare function decodeHtmlEntities(s: string): string;
|
|
16
|
+
export declare function parseOpenGraph(html: string, baseUrl: string): OpenGraphResult;
|
|
17
|
+
/**
|
|
18
|
+
* Pick the best `<link rel="...icon...">` href. Prefers a high-res `apple-touch-icon`, then a
|
|
19
|
+
* regular `icon` / `shortcut icon`. Skips `mask-icon` (monochrome SVG). Returns absolute http(s).
|
|
20
|
+
*/
|
|
21
|
+
export declare function parseFaviconUrl(html: string, baseUrl: string): string | null;
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Open Graph metadata extraction via regex — no HTML parser dependency.
|
|
3
|
+
*
|
|
4
|
+
* Good enough for the link-preview card use case: og:* meta tags are flat, attribute-ordered
|
|
5
|
+
* variants are handled generically, and pages where this fails simply degrade to "no card".
|
|
6
|
+
*/
|
|
7
|
+
const MAX_PARSE_BYTES = 512 * 1024;
|
|
8
|
+
const META_TAG_RE = /<meta\b[^>]*>/gi;
|
|
9
|
+
const TITLE_TAG_RE = /<title[^>]*>([\s\S]*?)<\/title>/i;
|
|
10
|
+
const LINK_TAG_RE = /<link\b[^>]*>/gi;
|
|
11
|
+
/** Extract one attribute value from a tag, tolerating single/double/no quotes and any order. */
|
|
12
|
+
function attributeValue(tag, name) {
|
|
13
|
+
const re = new RegExp(`\\b${name}\\s*=\\s*(?:"([^"]*)"|'([^']*)'|([^\\s"'>]+))`, "i");
|
|
14
|
+
const m = tag.match(re);
|
|
15
|
+
if (!m)
|
|
16
|
+
return null;
|
|
17
|
+
return m[1] ?? m[2] ?? m[3] ?? "";
|
|
18
|
+
}
|
|
19
|
+
const NAMED_ENTITIES = {
|
|
20
|
+
amp: "&",
|
|
21
|
+
lt: "<",
|
|
22
|
+
gt: ">",
|
|
23
|
+
quot: '"',
|
|
24
|
+
apos: "'",
|
|
25
|
+
nbsp: " ",
|
|
26
|
+
ndash: "–",
|
|
27
|
+
mdash: "—",
|
|
28
|
+
hellip: "…",
|
|
29
|
+
middot: "·",
|
|
30
|
+
copy: "©",
|
|
31
|
+
reg: "®",
|
|
32
|
+
trade: "™",
|
|
33
|
+
lsquo: "‘",
|
|
34
|
+
rsquo: "’",
|
|
35
|
+
ldquo: "“",
|
|
36
|
+
rdquo: "”",
|
|
37
|
+
laquo: "«",
|
|
38
|
+
raquo: "»",
|
|
39
|
+
};
|
|
40
|
+
export function decodeHtmlEntities(s) {
|
|
41
|
+
return s.replace(/&(#x?[0-9a-f]+|[a-z]+);/gi, (whole, body) => {
|
|
42
|
+
if (body.startsWith("#x") || body.startsWith("#X")) {
|
|
43
|
+
const code = Number.parseInt(body.slice(2), 16);
|
|
44
|
+
return Number.isFinite(code) ? String.fromCodePoint(code) : whole;
|
|
45
|
+
}
|
|
46
|
+
if (body.startsWith("#")) {
|
|
47
|
+
const code = Number.parseInt(body.slice(1), 10);
|
|
48
|
+
return Number.isFinite(code) ? String.fromCodePoint(code) : whole;
|
|
49
|
+
}
|
|
50
|
+
return NAMED_ENTITIES[body.toLowerCase()] ?? whole;
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
function cleanText(raw) {
|
|
54
|
+
if (raw == null)
|
|
55
|
+
return null;
|
|
56
|
+
const text = decodeHtmlEntities(raw).replace(/\s+/g, " ").trim();
|
|
57
|
+
return text || null;
|
|
58
|
+
}
|
|
59
|
+
/** Resolve og:image (possibly relative) against the final page URL; only http(s) survives. */
|
|
60
|
+
function resolveImageUrl(raw, baseUrl) {
|
|
61
|
+
if (!raw)
|
|
62
|
+
return null;
|
|
63
|
+
try {
|
|
64
|
+
const url = new URL(raw.trim(), baseUrl);
|
|
65
|
+
if (url.protocol !== "http:" && url.protocol !== "https:")
|
|
66
|
+
return null;
|
|
67
|
+
return url.toString();
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
export function parseOpenGraph(html, baseUrl) {
|
|
74
|
+
const slice = html.length > MAX_PARSE_BYTES ? html.slice(0, MAX_PARSE_BYTES) : html;
|
|
75
|
+
// First occurrence wins per key (matches browser/crawler behavior).
|
|
76
|
+
const og = {};
|
|
77
|
+
const tw = {};
|
|
78
|
+
let metaDescription = null;
|
|
79
|
+
for (const match of slice.matchAll(META_TAG_RE)) {
|
|
80
|
+
const tag = match[0];
|
|
81
|
+
const key = (attributeValue(tag, "property") ?? attributeValue(tag, "name"))?.trim().toLowerCase();
|
|
82
|
+
if (!key)
|
|
83
|
+
continue;
|
|
84
|
+
const content = attributeValue(tag, "content");
|
|
85
|
+
if (content == null || !content.trim())
|
|
86
|
+
continue;
|
|
87
|
+
if (key.startsWith("og:")) {
|
|
88
|
+
const ogKey = key.slice(3);
|
|
89
|
+
if (!(ogKey in og))
|
|
90
|
+
og[ogKey] = content;
|
|
91
|
+
}
|
|
92
|
+
else if (key.startsWith("twitter:")) {
|
|
93
|
+
const twKey = key.slice(8);
|
|
94
|
+
if (!(twKey in tw))
|
|
95
|
+
tw[twKey] = content;
|
|
96
|
+
}
|
|
97
|
+
else if (key === "description" && metaDescription == null) {
|
|
98
|
+
metaDescription = content;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
const ld = parseJsonLd(slice);
|
|
102
|
+
const pageTitle = slice.match(TITLE_TAG_RE)?.[1] ?? null;
|
|
103
|
+
// Title chain: standard tags first, then server-rendered body title (h1 / article-title class)
|
|
104
|
+
// BEFORE the generic <title> — many SPA/news shells put a useless <title> ("搜索资讯页") in the
|
|
105
|
+
// head while the real headline lives in the body.
|
|
106
|
+
const title = cleanText(og["title"]) ??
|
|
107
|
+
cleanText(tw["title"]) ??
|
|
108
|
+
cleanText(ld.title) ??
|
|
109
|
+
cleanText(parseBodyTitle(slice)) ??
|
|
110
|
+
cleanText(pageTitle);
|
|
111
|
+
const description = cleanText(og["description"]) ??
|
|
112
|
+
cleanText(tw["description"]) ??
|
|
113
|
+
cleanText(ld.description) ??
|
|
114
|
+
cleanText(metaDescription);
|
|
115
|
+
const imageUrl = resolveImageUrl(og["image"] ?? null, baseUrl) ??
|
|
116
|
+
resolveImageUrl(tw["image"] ?? null, baseUrl) ??
|
|
117
|
+
resolveImageUrl(ld.image, baseUrl) ??
|
|
118
|
+
resolveImageUrl(parseBodyCoverImage(slice), baseUrl);
|
|
119
|
+
return {
|
|
120
|
+
title,
|
|
121
|
+
description,
|
|
122
|
+
imageUrl,
|
|
123
|
+
siteName: cleanText(og["site_name"] ?? tw["site"] ?? null),
|
|
124
|
+
iconUrl: parseFaviconUrl(slice, baseUrl),
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
const JSON_LD_RE = /<script[^>]*type\s*=\s*["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
|
|
128
|
+
/** Extract title/description/image from JSON-LD blocks (schema.org Article/NewsArticle/etc.). */
|
|
129
|
+
function parseJsonLd(html) {
|
|
130
|
+
for (const match of html.matchAll(JSON_LD_RE)) {
|
|
131
|
+
let data;
|
|
132
|
+
try {
|
|
133
|
+
data = JSON.parse(match[1].trim());
|
|
134
|
+
}
|
|
135
|
+
catch {
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
// JSON-LD may be a single object, an array, or a @graph container.
|
|
139
|
+
const nodes = Array.isArray(data)
|
|
140
|
+
? data
|
|
141
|
+
: isRecord(data) && Array.isArray(data["@graph"])
|
|
142
|
+
? data["@graph"]
|
|
143
|
+
: [data];
|
|
144
|
+
for (const node of nodes) {
|
|
145
|
+
if (!isRecord(node))
|
|
146
|
+
continue;
|
|
147
|
+
const title = asString(node.headline) ?? asString(node.name);
|
|
148
|
+
const description = asString(node.description);
|
|
149
|
+
const image = firstImage(node.image) ?? asString(node.thumbnailUrl);
|
|
150
|
+
if (title || description || image) {
|
|
151
|
+
return { title: title ?? null, description: description ?? null, image: image ?? null };
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return { title: null, description: null, image: null };
|
|
156
|
+
}
|
|
157
|
+
function isRecord(v) {
|
|
158
|
+
return typeof v === "object" && v !== null;
|
|
159
|
+
}
|
|
160
|
+
function asString(v) {
|
|
161
|
+
return typeof v === "string" && v.trim() ? v : null;
|
|
162
|
+
}
|
|
163
|
+
/** JSON-LD `image` is a string, an array, or an ImageObject `{ url }`. */
|
|
164
|
+
function firstImage(v) {
|
|
165
|
+
if (typeof v === "string")
|
|
166
|
+
return v;
|
|
167
|
+
if (Array.isArray(v)) {
|
|
168
|
+
for (const item of v) {
|
|
169
|
+
const found = firstImage(item);
|
|
170
|
+
if (found)
|
|
171
|
+
return found;
|
|
172
|
+
}
|
|
173
|
+
return null;
|
|
174
|
+
}
|
|
175
|
+
if (isRecord(v))
|
|
176
|
+
return asString(v.url);
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
// Common server-rendered article-title class names (whitelist keeps false positives down vs. any
|
|
180
|
+
// class containing "title", e.g. a sidebar "related-titles" block).
|
|
181
|
+
const BODY_TITLE_CLASS_RE = /class\s*=\s*["'][^"']*\b(?:article-title|post-title|entry-title|news-title|content-title|headline|title-text)\b[^"']*["'][^>]*>\s*([^<]{4,200}?)\s*</i;
|
|
182
|
+
const H1_RE = /<h1\b[^>]*>\s*([\s\S]{4,200}?)\s*<\/h1>/i;
|
|
183
|
+
/** Server-rendered headline fallback: first <h1>, else an element with a known article-title class. */
|
|
184
|
+
function parseBodyTitle(html) {
|
|
185
|
+
const h1 = html.match(H1_RE)?.[1];
|
|
186
|
+
if (h1) {
|
|
187
|
+
const text = stripTags(h1).trim();
|
|
188
|
+
if (text.length >= 4)
|
|
189
|
+
return text;
|
|
190
|
+
}
|
|
191
|
+
return html.match(BODY_TITLE_CLASS_RE)?.[1] ?? null;
|
|
192
|
+
}
|
|
193
|
+
// Cover image embedded in inline JSON (e.g. QQ's `"imgUrl":"http:\/\/...cover..."`). The URL may be
|
|
194
|
+
// extensionless; the re-host step's magic-byte sniff is the safety net against non-image matches.
|
|
195
|
+
const JSON_COVER_RE = /"(?:imgUrl|imageUrl|coverUrl|coverImage|cover|ogImage|thumbnail|picUrl)"\s*:\s*"(https?:(?:\\?\/){2}[^"]+?)"/i;
|
|
196
|
+
/** Cover image from inline JSON when no og/twitter/json-ld image is present. */
|
|
197
|
+
function parseBodyCoverImage(html) {
|
|
198
|
+
const raw = html.match(JSON_COVER_RE)?.[1];
|
|
199
|
+
if (!raw)
|
|
200
|
+
return null;
|
|
201
|
+
return raw.replace(/\\\//g, "/"); // unescape JSON `\/`
|
|
202
|
+
}
|
|
203
|
+
function stripTags(s) {
|
|
204
|
+
return s.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Pick the best `<link rel="...icon...">` href. Prefers a high-res `apple-touch-icon`, then a
|
|
208
|
+
* regular `icon` / `shortcut icon`. Skips `mask-icon` (monochrome SVG). Returns absolute http(s).
|
|
209
|
+
*/
|
|
210
|
+
export function parseFaviconUrl(html, baseUrl) {
|
|
211
|
+
let appleTouch = null;
|
|
212
|
+
let regular = null;
|
|
213
|
+
for (const match of html.matchAll(LINK_TAG_RE)) {
|
|
214
|
+
const tag = match[0];
|
|
215
|
+
const rel = attributeValue(tag, "rel")?.trim().toLowerCase();
|
|
216
|
+
if (!rel || !rel.includes("icon") || rel.includes("mask-icon"))
|
|
217
|
+
continue;
|
|
218
|
+
const href = attributeValue(tag, "href");
|
|
219
|
+
if (!href)
|
|
220
|
+
continue;
|
|
221
|
+
const resolved = resolveImageUrl(href, baseUrl);
|
|
222
|
+
if (!resolved)
|
|
223
|
+
continue;
|
|
224
|
+
if (rel.includes("apple-touch-icon")) {
|
|
225
|
+
appleTouch ??= resolved;
|
|
226
|
+
}
|
|
227
|
+
else {
|
|
228
|
+
regular ??= resolved;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return appleTouch ?? regular;
|
|
232
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Link-preview orchestration: fetch page → parse Open Graph → re-host the cover image through
|
|
3
|
+
* the gateway's stored files → cache.
|
|
4
|
+
*
|
|
5
|
+
* The cover image is downloaded server-side and served from /friday-next/files/ so the app only
|
|
6
|
+
* ever talks to the trusted gateway host (same rationale as `downloadRemoteMedia` for outbound
|
|
7
|
+
* media). Failures degrade to "no card" on the app side, so every error path returns a typed
|
|
8
|
+
* error instead of throwing.
|
|
9
|
+
*/
|
|
10
|
+
export interface LinkPreviewPayload {
|
|
11
|
+
url: string;
|
|
12
|
+
finalUrl: string;
|
|
13
|
+
siteName: string | null;
|
|
14
|
+
title: string;
|
|
15
|
+
description: string | null;
|
|
16
|
+
/** Gateway-relative cover URL ("/friday-next/files/{token}") or null. */
|
|
17
|
+
imageUrl: string | null;
|
|
18
|
+
/** Gateway-relative favicon URL ("/friday-next/files/{token}") or null. */
|
|
19
|
+
iconUrl: string | null;
|
|
20
|
+
fetchedAt: number;
|
|
21
|
+
}
|
|
22
|
+
export type LinkPreviewError = "invalid_url" | "blocked_url" | "fetch_failed" | "no_metadata";
|
|
23
|
+
export type LinkPreviewResult = {
|
|
24
|
+
ok: true;
|
|
25
|
+
preview: LinkPreviewPayload;
|
|
26
|
+
} | {
|
|
27
|
+
ok: false;
|
|
28
|
+
error: LinkPreviewError;
|
|
29
|
+
};
|
|
30
|
+
export declare function resetLinkPreviewCacheForTest(): void;
|
|
31
|
+
export declare function getLinkPreview(rawUrl: string): Promise<LinkPreviewResult>;
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Link-preview orchestration: fetch page → parse Open Graph → re-host the cover image through
|
|
3
|
+
* the gateway's stored files → cache.
|
|
4
|
+
*
|
|
5
|
+
* The cover image is downloaded server-side and served from /friday-next/files/ so the app only
|
|
6
|
+
* ever talks to the trusted gateway host (same rationale as `downloadRemoteMedia` for outbound
|
|
7
|
+
* media). Failures degrade to "no card" on the app side, so every error path returns a typed
|
|
8
|
+
* error instead of throwing.
|
|
9
|
+
*/
|
|
10
|
+
import { createFridayNextLogger } from "../logging.js";
|
|
11
|
+
import { storeFile } from "../http/handlers/files.js";
|
|
12
|
+
import { parseOpenGraph } from "./og-parse.js";
|
|
13
|
+
import { BlockedUrlError, fetchPublicUrl, parseHttpUrl } from "./ssrf-guard.js";
|
|
14
|
+
const HTML_MAX_BYTES = 2 * 1024 * 1024;
|
|
15
|
+
const HTML_TIMEOUT_MS = 10_000;
|
|
16
|
+
const IMAGE_MAX_BYTES = 8 * 1024 * 1024;
|
|
17
|
+
const IMAGE_TIMEOUT_MS = 10_000;
|
|
18
|
+
const SUCCESS_TTL_MS = 24 * 60 * 60 * 1000;
|
|
19
|
+
const FAILURE_TTL_MS = 10 * 60 * 1000;
|
|
20
|
+
const MAX_CACHE_ENTRIES = 1000;
|
|
21
|
+
const logger = createFridayNextLogger("link-preview");
|
|
22
|
+
const cache = new Map();
|
|
23
|
+
const inFlight = new Map();
|
|
24
|
+
export function resetLinkPreviewCacheForTest() {
|
|
25
|
+
cache.clear();
|
|
26
|
+
inFlight.clear();
|
|
27
|
+
}
|
|
28
|
+
export async function getLinkPreview(rawUrl) {
|
|
29
|
+
const parsed = parseHttpUrl(rawUrl);
|
|
30
|
+
if (!parsed)
|
|
31
|
+
return { ok: false, error: "invalid_url" };
|
|
32
|
+
const key = parsed.toString();
|
|
33
|
+
const cached = cache.get(key);
|
|
34
|
+
if (cached) {
|
|
35
|
+
const ttl = cached.result.ok ? SUCCESS_TTL_MS : FAILURE_TTL_MS;
|
|
36
|
+
if (Date.now() - cached.cachedAt < ttl)
|
|
37
|
+
return cached.result;
|
|
38
|
+
cache.delete(key);
|
|
39
|
+
}
|
|
40
|
+
const pending = inFlight.get(key);
|
|
41
|
+
if (pending)
|
|
42
|
+
return pending;
|
|
43
|
+
const task = buildPreview(key)
|
|
44
|
+
.then((result) => {
|
|
45
|
+
writeCache(key, result);
|
|
46
|
+
return result;
|
|
47
|
+
})
|
|
48
|
+
.finally(() => {
|
|
49
|
+
inFlight.delete(key);
|
|
50
|
+
});
|
|
51
|
+
inFlight.set(key, task);
|
|
52
|
+
return task;
|
|
53
|
+
}
|
|
54
|
+
function writeCache(key, result) {
|
|
55
|
+
if (cache.size >= MAX_CACHE_ENTRIES) {
|
|
56
|
+
let oldestKey = null;
|
|
57
|
+
let oldestAt = Infinity;
|
|
58
|
+
for (const [k, entry] of cache) {
|
|
59
|
+
if (entry.cachedAt < oldestAt) {
|
|
60
|
+
oldestAt = entry.cachedAt;
|
|
61
|
+
oldestKey = k;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
if (oldestKey)
|
|
65
|
+
cache.delete(oldestKey);
|
|
66
|
+
}
|
|
67
|
+
cache.set(key, { result, cachedAt: Date.now() });
|
|
68
|
+
}
|
|
69
|
+
async function buildPreview(pageUrl) {
|
|
70
|
+
let page;
|
|
71
|
+
try {
|
|
72
|
+
page = await fetchPublicUrl(pageUrl, {
|
|
73
|
+
maxBytes: HTML_MAX_BYTES,
|
|
74
|
+
timeoutMs: HTML_TIMEOUT_MS,
|
|
75
|
+
accept: "text/html,application/xhtml+xml",
|
|
76
|
+
requireContentTypePrefixes: ["text/html", "application/xhtml+xml"],
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
catch (err) {
|
|
80
|
+
if (err instanceof BlockedUrlError) {
|
|
81
|
+
logger.warn(`link-preview blocked: ${pageUrl} (${err.reason})`);
|
|
82
|
+
return { ok: false, error: "blocked_url" };
|
|
83
|
+
}
|
|
84
|
+
page = null; // network/timeout — fall through to a favicon-only minimal card
|
|
85
|
+
}
|
|
86
|
+
const finalUrl = page?.finalUrl ?? pageUrl;
|
|
87
|
+
const og = page ? parseOpenGraph(page.body.toString("utf8"), finalUrl) : null;
|
|
88
|
+
const hostname = (() => {
|
|
89
|
+
try {
|
|
90
|
+
return new URL(finalUrl).hostname;
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
})();
|
|
96
|
+
// Favicon: parsed <link rel icon> first, then the conventional /favicon.ico (which is reachable
|
|
97
|
+
// even for pages that block bots, e.g. zhihu → redirects to its CDN icon).
|
|
98
|
+
const iconUrl = await resolveFavicon(og?.iconUrl ?? null, finalUrl);
|
|
99
|
+
// A failed page fetch only yields a (minimal) card when the favicon resolved — that proves the
|
|
100
|
+
// domain is real/reachable (e.g. bot-blocked zhihu). A dead domain (favicon also fails) collapses.
|
|
101
|
+
const reachable = page !== null || iconUrl !== null;
|
|
102
|
+
const title = og?.title ?? hostname;
|
|
103
|
+
if (!reachable || !title) {
|
|
104
|
+
return { ok: false, error: page ? "no_metadata" : "fetch_failed" };
|
|
105
|
+
}
|
|
106
|
+
const imageUrl = og?.imageUrl ? await rehostCoverImage(og.imageUrl) : null;
|
|
107
|
+
return {
|
|
108
|
+
ok: true,
|
|
109
|
+
preview: {
|
|
110
|
+
url: pageUrl,
|
|
111
|
+
finalUrl,
|
|
112
|
+
siteName: og?.siteName ?? hostname,
|
|
113
|
+
title: title ?? hostname ?? pageUrl,
|
|
114
|
+
description: og?.description ?? null,
|
|
115
|
+
imageUrl,
|
|
116
|
+
iconUrl,
|
|
117
|
+
fetchedAt: Date.now(),
|
|
118
|
+
},
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
/** Re-host a favicon: try the parsed `<link rel icon>`, then `<origin>/favicon.ico`. */
|
|
122
|
+
async function resolveFavicon(parsedIconUrl, finalUrl) {
|
|
123
|
+
const candidates = [];
|
|
124
|
+
if (parsedIconUrl)
|
|
125
|
+
candidates.push(parsedIconUrl);
|
|
126
|
+
try {
|
|
127
|
+
candidates.push(new URL("/favicon.ico", finalUrl).toString());
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
// finalUrl unparseable — skip the conventional fallback
|
|
131
|
+
}
|
|
132
|
+
for (const candidate of candidates) {
|
|
133
|
+
const rehosted = await rehostIconImage(candidate);
|
|
134
|
+
if (rehosted)
|
|
135
|
+
return rehosted;
|
|
136
|
+
}
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
/** Download a favicon (full SSRF checks) and re-publish via stored files. Null on any failure. */
|
|
140
|
+
async function rehostIconImage(iconUrl) {
|
|
141
|
+
let image;
|
|
142
|
+
try {
|
|
143
|
+
image = await fetchPublicUrl(iconUrl, {
|
|
144
|
+
maxBytes: 1024 * 1024,
|
|
145
|
+
timeoutMs: IMAGE_TIMEOUT_MS,
|
|
146
|
+
accept: "image/*",
|
|
147
|
+
requireContentTypePrefixes: ["image/"],
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
if (!image)
|
|
154
|
+
return null;
|
|
155
|
+
const sniffed = sniffImageType(image.body);
|
|
156
|
+
if (!sniffed)
|
|
157
|
+
return null;
|
|
158
|
+
try {
|
|
159
|
+
const stored = storeFile(image.body, `link-preview-icon.${sniffed.ext}`, sniffed.mime);
|
|
160
|
+
return `/friday-next/files/${encodeURIComponent(stored.urlToken)}`;
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
/** Download og:image (full SSRF checks) and re-publish via stored files. Null on any failure. */
|
|
167
|
+
async function rehostCoverImage(imageUrl) {
|
|
168
|
+
let image;
|
|
169
|
+
try {
|
|
170
|
+
image = await fetchPublicUrl(imageUrl, {
|
|
171
|
+
maxBytes: IMAGE_MAX_BYTES,
|
|
172
|
+
timeoutMs: IMAGE_TIMEOUT_MS,
|
|
173
|
+
accept: "image/*",
|
|
174
|
+
requireContentTypePrefixes: ["image/"],
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
catch {
|
|
178
|
+
return null; // blocked og:image just means no cover
|
|
179
|
+
}
|
|
180
|
+
if (!image)
|
|
181
|
+
return null;
|
|
182
|
+
const sniffed = sniffImageType(image.body);
|
|
183
|
+
if (!sniffed)
|
|
184
|
+
return null;
|
|
185
|
+
try {
|
|
186
|
+
const stored = storeFile(image.body, `link-preview-cover.${sniffed.ext}`, sniffed.mime);
|
|
187
|
+
return `/friday-next/files/${encodeURIComponent(stored.urlToken)}`;
|
|
188
|
+
}
|
|
189
|
+
catch (err) {
|
|
190
|
+
logger.warn(`link-preview cover store failed for ${imageUrl}: ${String(err)}`);
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
/** Magic-byte sniff — second line of defense after the Content-Type check. */
|
|
195
|
+
function sniffImageType(buffer) {
|
|
196
|
+
if (buffer.length < 12)
|
|
197
|
+
return null;
|
|
198
|
+
// ICO: 00 00 01 00 (favicons are commonly .ico; iOS ImageIO decodes them).
|
|
199
|
+
if (buffer[0] === 0x00 && buffer[1] === 0x00 && buffer[2] === 0x01 && buffer[3] === 0x00) {
|
|
200
|
+
return { ext: "ico", mime: "image/x-icon" };
|
|
201
|
+
}
|
|
202
|
+
if (buffer[0] === 0x89 && buffer[1] === 0x50 && buffer[2] === 0x4e && buffer[3] === 0x47) {
|
|
203
|
+
return { ext: "png", mime: "image/png" };
|
|
204
|
+
}
|
|
205
|
+
if (buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) {
|
|
206
|
+
return { ext: "jpg", mime: "image/jpeg" };
|
|
207
|
+
}
|
|
208
|
+
if (buffer.subarray(0, 4).toString("latin1") === "GIF8") {
|
|
209
|
+
return { ext: "gif", mime: "image/gif" };
|
|
210
|
+
}
|
|
211
|
+
if (buffer.subarray(0, 4).toString("latin1") === "RIFF" &&
|
|
212
|
+
buffer.subarray(8, 12).toString("latin1") === "WEBP") {
|
|
213
|
+
return { ext: "webp", mime: "image/webp" };
|
|
214
|
+
}
|
|
215
|
+
return null;
|
|
216
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSRF guard + restricted fetch for the link-preview endpoint.
|
|
3
|
+
*
|
|
4
|
+
* Unlike `downloadRemoteMedia` (agent-supplied URLs for outbound media), link-preview fetches
|
|
5
|
+
* URLs that originate from arbitrary message text, so every hop must be validated: protocol,
|
|
6
|
+
* port, hostname literals, and the full set of DNS-resolved addresses. Redirects are followed
|
|
7
|
+
* manually so each target is re-checked before the next request.
|
|
8
|
+
*
|
|
9
|
+
* Known residual risk: DNS rebinding TOCTOU — we validate resolved addresses, then `fetch`
|
|
10
|
+
* resolves again. Closing that gap requires dialing by IP with SNI/Host rewriting, which is
|
|
11
|
+
* disproportionate for preview metadata; accepted at this threat level.
|
|
12
|
+
*/
|
|
13
|
+
export declare class BlockedUrlError extends Error {
|
|
14
|
+
readonly reason: string;
|
|
15
|
+
constructor(reason: string);
|
|
16
|
+
}
|
|
17
|
+
/** Parse an absolute http/https URL. Returns null for anything else (caller maps to invalid_url). */
|
|
18
|
+
export declare function parseHttpUrl(raw: string): URL | null;
|
|
19
|
+
/** Synchronous literal checks: port, hostname blocklist, IP-literal hosts. Throws BlockedUrlError. */
|
|
20
|
+
export declare function assertPublicHttpUrl(url: URL): void;
|
|
21
|
+
/** True when the IP (v4 or v6, including ::ffff: mapped v4) is private, loopback, or reserved. */
|
|
22
|
+
export declare function isPrivateAddress(ip: string): boolean;
|
|
23
|
+
/** Resolve the hostname and require every returned address to be public. Throws BlockedUrlError. */
|
|
24
|
+
export declare function assertResolvesPublic(url: URL): Promise<void>;
|
|
25
|
+
export interface FetchPublicUrlOptions {
|
|
26
|
+
maxBytes: number;
|
|
27
|
+
timeoutMs: number;
|
|
28
|
+
/** Sent as the Accept header. */
|
|
29
|
+
accept?: string;
|
|
30
|
+
/** When set, the response Content-Type must start with one of these prefixes. */
|
|
31
|
+
requireContentTypePrefixes?: string[];
|
|
32
|
+
}
|
|
33
|
+
export interface FetchPublicUrlResult {
|
|
34
|
+
finalUrl: string;
|
|
35
|
+
contentType: string;
|
|
36
|
+
body: Buffer;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Fetch a public http/https URL with manual redirects (≤5 hops, each hop re-validated) and a
|
|
40
|
+
* streamed size cap (Content-Length is not trusted). Returns null on ordinary failures (non-2xx,
|
|
41
|
+
* oversize, timeout, bad content type, DNS error); throws BlockedUrlError on SSRF rejection.
|
|
42
|
+
*/
|
|
43
|
+
export declare function fetchPublicUrl(rawUrl: string, opts: FetchPublicUrlOptions): Promise<FetchPublicUrlResult | null>;
|