@syengup/friday-channel-next 0.1.26 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +2 -0
- package/dist/src/http/handlers/files.js +1 -0
- package/dist/src/http/handlers/health.d.ts +1 -0
- package/dist/src/http/handlers/health.js +2 -0
- package/dist/src/http/handlers/link-preview.d.ts +9 -0
- package/dist/src/http/handlers/link-preview.js +41 -0
- package/dist/src/http/handlers/messages.d.ts +5 -0
- package/dist/src/http/handlers/messages.js +19 -8
- package/dist/src/http/handlers/plugin-info.d.ts +11 -0
- package/dist/src/http/handlers/plugin-info.js +32 -0
- package/dist/src/http/handlers/plugin-upgrade.d.ts +11 -0
- package/dist/src/http/handlers/plugin-upgrade.js +94 -0
- package/dist/src/http/handlers/sse.js +2 -0
- package/dist/src/http/handlers/status.js +2 -0
- package/dist/src/http/server.js +15 -0
- package/dist/src/link-preview/og-parse.d.ts +21 -0
- package/dist/src/link-preview/og-parse.js +232 -0
- package/dist/src/link-preview/preview-service.d.ts +31 -0
- package/dist/src/link-preview/preview-service.js +216 -0
- package/dist/src/link-preview/ssrf-guard.d.ts +43 -0
- package/dist/src/link-preview/ssrf-guard.js +223 -0
- package/dist/src/plugin-install-info.d.ts +15 -0
- package/dist/src/plugin-install-info.js +87 -0
- package/dist/src/upgrade-runtime.d.ts +39 -0
- package/dist/src/upgrade-runtime.js +27 -0
- package/dist/src/version.d.ts +5 -0
- package/dist/src/version.js +37 -0
- package/index.ts +2 -0
- package/package.json +1 -1
- package/src/http/handlers/files.ts +1 -0
- package/src/http/handlers/health.ts +3 -0
- package/src/http/handlers/link-preview.test.ts +242 -0
- package/src/http/handlers/link-preview.ts +47 -0
- package/src/http/handlers/messages.test.ts +75 -1
- package/src/http/handlers/messages.ts +19 -7
- package/src/http/handlers/plugin-info.ts +51 -0
- package/src/http/handlers/plugin-upgrade.ts +112 -0
- package/src/http/handlers/sse.ts +2 -0
- package/src/http/handlers/status.ts +2 -0
- package/src/http/server.ts +18 -0
- package/src/link-preview/og-parse.test.ts +168 -0
- package/src/link-preview/og-parse.ts +249 -0
- package/src/link-preview/preview-service.ts +247 -0
- package/src/link-preview/ssrf-guard.test.ts +234 -0
- package/src/link-preview/ssrf-guard.ts +229 -0
- package/src/plugin-install-info.test.ts +28 -0
- package/src/plugin-install-info.ts +95 -0
- package/src/upgrade-runtime.ts +69 -0
- package/src/version.ts +41 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import type { IncomingMessage, ServerResponse } from "node:http";
|
|
2
|
+
import { extractBearerToken } from "../middleware/auth.js";
|
|
3
|
+
import { createFridayNextLogger } from "../../logging.js";
|
|
4
|
+
import { PLUGIN_PACKAGE_NAME, PLUGIN_VERSION } from "../../version.js";
|
|
5
|
+
import { getInstallSource } from "../../plugin-install-info.js";
|
|
6
|
+
import { getUpgradeRuntime } from "../../upgrade-runtime.js";
|
|
7
|
+
|
|
8
|
+
const UPGRADE_TIMEOUT_MS = 120_000;
|
|
9
|
+
/** Give the 202 response time to flush before the restart kills the process. */
|
|
10
|
+
const RESTART_DELAY_MS = 500;
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* POST /friday-next/plugin/upgrade
|
|
14
|
+
*
|
|
15
|
+
* Runs `openclaw plugins install @syengup/friday-channel-next@latest --force`
|
|
16
|
+
* (registry-aware, updates the install record), responds 202, then triggers a
|
|
17
|
+
* safe gateway restart so the new version loads. Only npm-installed plugins are
|
|
18
|
+
* eligible — dev (load.paths / source==="path") installs return 409 to protect
|
|
19
|
+
* the dev environment from duplicate npm installs.
|
|
20
|
+
*/
|
|
21
|
+
export async function handlePluginUpgrade(req: IncomingMessage, res: ServerResponse): Promise<boolean> {
|
|
22
|
+
if (req.method !== "POST") {
|
|
23
|
+
res.statusCode = 405;
|
|
24
|
+
res.setHeader("Content-Type", "application/json");
|
|
25
|
+
res.end(JSON.stringify({ error: "Method Not Allowed" }));
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
if (!extractBearerToken(req)) {
|
|
29
|
+
res.statusCode = 401;
|
|
30
|
+
res.setHeader("Content-Type", "application/json");
|
|
31
|
+
res.end(JSON.stringify({ error: "Unauthorized: bearer token mismatch" }));
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const log = createFridayNextLogger("upgrade");
|
|
36
|
+
const installSource = getInstallSource();
|
|
37
|
+
if (installSource !== "npm") {
|
|
38
|
+
res.statusCode = 409;
|
|
39
|
+
res.setHeader("Content-Type", "application/json");
|
|
40
|
+
res.end(
|
|
41
|
+
JSON.stringify({
|
|
42
|
+
error: "auto-upgrade not available",
|
|
43
|
+
detail: `install source is "${installSource}"; only npm installs can be auto-upgraded`,
|
|
44
|
+
installSource,
|
|
45
|
+
}),
|
|
46
|
+
);
|
|
47
|
+
return true;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const rt = getUpgradeRuntime();
|
|
51
|
+
if (!rt) {
|
|
52
|
+
res.statusCode = 500;
|
|
53
|
+
res.setHeader("Content-Type", "application/json");
|
|
54
|
+
res.end(JSON.stringify({ error: "upgrade runtime unavailable" }));
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const spec = `${PLUGIN_PACKAGE_NAME}@latest`;
|
|
59
|
+
log.info(`Starting plugin upgrade: ${spec} (from ${PLUGIN_VERSION})`);
|
|
60
|
+
|
|
61
|
+
let result;
|
|
62
|
+
try {
|
|
63
|
+
result = await rt.runCommandWithTimeout(
|
|
64
|
+
["openclaw", "plugins", "install", spec, "--force"],
|
|
65
|
+
UPGRADE_TIMEOUT_MS,
|
|
66
|
+
);
|
|
67
|
+
} catch (err) {
|
|
68
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
69
|
+
log.error(`plugin upgrade command failed to spawn: ${msg}`);
|
|
70
|
+
res.statusCode = 500;
|
|
71
|
+
res.setHeader("Content-Type", "application/json");
|
|
72
|
+
res.end(JSON.stringify({ error: "upgrade command failed", detail: msg }));
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (result.code !== 0) {
|
|
77
|
+
const stderrTail = (result.stderr ?? "").slice(-2000);
|
|
78
|
+
log.error(`plugin upgrade exited code=${result.code}: ${stderrTail}`);
|
|
79
|
+
res.statusCode = 500;
|
|
80
|
+
res.setHeader("Content-Type", "application/json");
|
|
81
|
+
res.end(
|
|
82
|
+
JSON.stringify({
|
|
83
|
+
error: "upgrade command exited non-zero",
|
|
84
|
+
code: result.code,
|
|
85
|
+
detail: stderrTail,
|
|
86
|
+
}),
|
|
87
|
+
);
|
|
88
|
+
return true;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
log.info("Plugin upgrade install succeeded; scheduling gateway restart");
|
|
92
|
+
|
|
93
|
+
// Respond first so the app receives confirmation before the restart drops the
|
|
94
|
+
// connection, then trigger the safe restart after a short flush delay.
|
|
95
|
+
res.statusCode = 202;
|
|
96
|
+
res.setHeader("Content-Type", "application/json");
|
|
97
|
+
res.end(JSON.stringify({ status: "upgrading", from: PLUGIN_VERSION }));
|
|
98
|
+
|
|
99
|
+
setTimeout(() => {
|
|
100
|
+
void rt
|
|
101
|
+
.mutateConfigFile({
|
|
102
|
+
afterWrite: { mode: "restart", reason: "friday-next 插件自动升级后重启" },
|
|
103
|
+
mutate: () => {},
|
|
104
|
+
})
|
|
105
|
+
.catch((err: unknown) => {
|
|
106
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
107
|
+
log.error(`gateway restart trigger failed: ${msg}`);
|
|
108
|
+
});
|
|
109
|
+
}, RESTART_DELAY_MS).unref?.();
|
|
110
|
+
|
|
111
|
+
return true;
|
|
112
|
+
}
|
package/src/http/handlers/sse.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { getHostOpenClawConfigSnapshot } from "../../host-config.js";
|
|
|
4
4
|
import { getFridayNextRuntime } from "../../runtime.js";
|
|
5
5
|
import { sseEmitter } from "../../sse/emitter.js";
|
|
6
6
|
import { extractBearerToken } from "../middleware/auth.js";
|
|
7
|
+
import { PLUGIN_VERSION } from "../../version.js";
|
|
7
8
|
|
|
8
9
|
function parseLastEventId(req: IncomingMessage, url: URL): number {
|
|
9
10
|
const query = Number.parseInt(url.searchParams.get("lastEventId") ?? "", 10);
|
|
@@ -56,6 +57,7 @@ export async function handleSseStream(req: IncomingMessage, res: ServerResponse)
|
|
|
56
57
|
deviceId: normalized,
|
|
57
58
|
serverTime: Date.now(),
|
|
58
59
|
lastSeq,
|
|
60
|
+
pluginVersion: PLUGIN_VERSION,
|
|
59
61
|
},
|
|
60
62
|
},
|
|
61
63
|
deviceId,
|
|
@@ -2,6 +2,7 @@ import type { IncomingMessage, ServerResponse } from "node:http";
|
|
|
2
2
|
import { getActiveRunIds } from "../../agent/active-runs.js";
|
|
3
3
|
import { sseEmitter } from "../../sse/emitter.js";
|
|
4
4
|
import { extractBearerToken } from "../middleware/auth.js";
|
|
5
|
+
import { PLUGIN_VERSION } from "../../version.js";
|
|
5
6
|
|
|
6
7
|
export async function handleStatus(req: IncomingMessage, res: ServerResponse): Promise<boolean> {
|
|
7
8
|
if (req.method !== "GET") {
|
|
@@ -24,6 +25,7 @@ export async function handleStatus(req: IncomingMessage, res: ServerResponse): P
|
|
|
24
25
|
ok: true,
|
|
25
26
|
channel: "friday-next",
|
|
26
27
|
version: "v2",
|
|
28
|
+
pluginVersion: PLUGIN_VERSION,
|
|
27
29
|
connections: sseEmitter.getConnectionCount(),
|
|
28
30
|
activeRuns,
|
|
29
31
|
activeRunCount: activeRuns.length,
|
package/src/http/server.ts
CHANGED
|
@@ -20,7 +20,10 @@ import { handleHistorySessions } from "./handlers/history-sessions.js";
|
|
|
20
20
|
import { handleHistoryMessages } from "./handlers/history-messages.js";
|
|
21
21
|
import { handleHistorySetTitle } from "./handlers/history-set-title.js";
|
|
22
22
|
import { handleStatus } from "./handlers/status.js";
|
|
23
|
+
import { handleLinkPreview } from "./handlers/link-preview.js";
|
|
23
24
|
import { handleHealth } from "./handlers/health.js";
|
|
25
|
+
import { handlePluginInfo } from "./handlers/plugin-info.js";
|
|
26
|
+
import { handlePluginUpgrade } from "./handlers/plugin-upgrade.js";
|
|
24
27
|
import { applyCorsHeaders } from "./middleware/cors.js";
|
|
25
28
|
import { resolveFridayNextConfig } from "../config.js";
|
|
26
29
|
import { getHostOpenClawConfigSnapshot } from "../host-config.js";
|
|
@@ -104,11 +107,26 @@ async function handleFridayNextRoute(
|
|
|
104
107
|
return await handleHistorySetTitle(req, res);
|
|
105
108
|
}
|
|
106
109
|
|
|
110
|
+
// Route: GET /friday-next/link-preview?url=... (Open Graph metadata for preview cards)
|
|
111
|
+
if (req.method === "GET" && pathname === "/friday-next/link-preview") {
|
|
112
|
+
return await handleLinkPreview(req, res);
|
|
113
|
+
}
|
|
114
|
+
|
|
107
115
|
// Route: GET /friday-next/health?deviceId=...&nodeDeviceId=...&selfHeal=true
|
|
108
116
|
if (req.method === "GET" && pathname === "/friday-next/health") {
|
|
109
117
|
return await handleHealth(req, res);
|
|
110
118
|
}
|
|
111
119
|
|
|
120
|
+
// Route: GET /friday-next/plugin/info (current/latest version + upgradability)
|
|
121
|
+
if (req.method === "GET" && pathname === "/friday-next/plugin/info") {
|
|
122
|
+
return await handlePluginInfo(req, res);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Route: POST /friday-next/plugin/upgrade (npm install @latest + safe gateway restart)
|
|
126
|
+
if (req.method === "POST" && pathname === "/friday-next/plugin/upgrade") {
|
|
127
|
+
return await handlePluginUpgrade(req, res);
|
|
128
|
+
}
|
|
129
|
+
|
|
112
130
|
// Not found
|
|
113
131
|
return false;
|
|
114
132
|
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { decodeHtmlEntities, parseOpenGraph } from "./og-parse.js";
|
|
3
|
+
|
|
4
|
+
const BASE = "https://example.com/article/42";
|
|
5
|
+
|
|
6
|
+
describe("decodeHtmlEntities", () => {
|
|
7
|
+
it("decodes named, decimal, and hex entities", () => {
|
|
8
|
+
expect(decodeHtmlEntities("Tom & Jerry — "fun"")).toBe('Tom & Jerry — "fun"');
|
|
9
|
+
expect(decodeHtmlEntities("中文")).toBe("中文");
|
|
10
|
+
expect(decodeHtmlEntities("'quoted'")).toBe("'quoted'");
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it("leaves unknown entities untouched", () => {
|
|
14
|
+
expect(decodeHtmlEntities("&unknownentity; stays")).toBe("&unknownentity; stays");
|
|
15
|
+
});
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
describe("parseOpenGraph", () => {
|
|
19
|
+
it("extracts the standard og tags", () => {
|
|
20
|
+
const html = `<html><head>
|
|
21
|
+
<meta property="og:title" content="Hello World" />
|
|
22
|
+
<meta property="og:description" content="A page about things" />
|
|
23
|
+
<meta property="og:image" content="https://cdn.example.com/cover.jpg" />
|
|
24
|
+
<meta property="og:site_name" content="Example" />
|
|
25
|
+
</head><body></body></html>`;
|
|
26
|
+
expect(parseOpenGraph(html, BASE)).toEqual({
|
|
27
|
+
title: "Hello World",
|
|
28
|
+
description: "A page about things",
|
|
29
|
+
imageUrl: "https://cdn.example.com/cover.jpg",
|
|
30
|
+
siteName: "Example",
|
|
31
|
+
iconUrl: null,
|
|
32
|
+
});
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("handles name= variant, swapped attribute order, and single quotes", () => {
|
|
36
|
+
const html = `
|
|
37
|
+
<meta content="Swapped" property="og:title">
|
|
38
|
+
<meta name='og:description' content='Single quoted'>
|
|
39
|
+
`;
|
|
40
|
+
const result = parseOpenGraph(html, BASE);
|
|
41
|
+
expect(result.title).toBe("Swapped");
|
|
42
|
+
expect(result.description).toBe("Single quoted");
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("first occurrence wins for duplicate og tags", () => {
|
|
46
|
+
const html = `
|
|
47
|
+
<meta property="og:title" content="First">
|
|
48
|
+
<meta property="og:title" content="Second">
|
|
49
|
+
`;
|
|
50
|
+
expect(parseOpenGraph(html, BASE).title).toBe("First");
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it("falls back to <title> and meta description", () => {
|
|
54
|
+
const html = `<html><head>
|
|
55
|
+
<title> Fallback Title </title>
|
|
56
|
+
<meta name="description" content="Fallback description">
|
|
57
|
+
</head></html>`;
|
|
58
|
+
const result = parseOpenGraph(html, BASE);
|
|
59
|
+
expect(result.title).toBe("Fallback Title");
|
|
60
|
+
expect(result.description).toBe("Fallback description");
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("decodes entities and collapses whitespace in text fields", () => {
|
|
64
|
+
const html = `<meta property="og:title" content="Q&A: What's
|
|
65
|
+
new">`;
|
|
66
|
+
expect(parseOpenGraph(html, BASE).title).toBe("Q&A: What's new");
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("resolves a relative og:image against the page URL", () => {
|
|
70
|
+
const html = `<meta property="og:image" content="/img/cover.png">`;
|
|
71
|
+
expect(parseOpenGraph(html, BASE).imageUrl).toBe("https://example.com/img/cover.png");
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("drops non-http og:image values", () => {
|
|
75
|
+
const html = `<meta property="og:image" content="data:image/png;base64,AAAA">`;
|
|
76
|
+
expect(parseOpenGraph(html, BASE).imageUrl).toBeNull();
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it("returns nulls for a page with no usable metadata", () => {
|
|
80
|
+
expect(parseOpenGraph("<html><body>plain</body></html>", BASE)).toEqual({
|
|
81
|
+
title: null,
|
|
82
|
+
description: null,
|
|
83
|
+
imageUrl: null,
|
|
84
|
+
siteName: null,
|
|
85
|
+
iconUrl: null,
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it("extracts and resolves a favicon, preferring apple-touch-icon, skipping mask-icon", () => {
|
|
90
|
+
const html = `<head>
|
|
91
|
+
<link rel="mask-icon" href="/safari.svg" color="#000">
|
|
92
|
+
<link rel="icon" type="image/png" href="/favicon-32.png">
|
|
93
|
+
<link rel="apple-touch-icon" href="https://cdn.example.com/touch.png">
|
|
94
|
+
</head>`;
|
|
95
|
+
expect(parseOpenGraph(html, BASE).iconUrl).toBe("https://cdn.example.com/touch.png");
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("falls back to a regular icon link and resolves relative hrefs", () => {
|
|
99
|
+
const html = `<link rel="shortcut icon" href="/static/fav.ico">`;
|
|
100
|
+
expect(parseOpenGraph(html, BASE).iconUrl).toBe("https://example.com/static/fav.ico");
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it("returns null icon when only a mask-icon is present", () => {
|
|
104
|
+
expect(parseOpenGraph(`<link rel="mask-icon" href="/m.svg">`, BASE).iconUrl).toBeNull();
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it("falls back to twitter card tags when og is absent", () => {
|
|
108
|
+
const html = `
|
|
109
|
+
<meta name="twitter:title" content="TW Title">
|
|
110
|
+
<meta name="twitter:description" content="TW Desc">
|
|
111
|
+
<meta name="twitter:image" content="https://cdn.example.com/tw.jpg">
|
|
112
|
+
`;
|
|
113
|
+
const r = parseOpenGraph(html, BASE);
|
|
114
|
+
expect(r.title).toBe("TW Title");
|
|
115
|
+
expect(r.description).toBe("TW Desc");
|
|
116
|
+
expect(r.imageUrl).toBe("https://cdn.example.com/tw.jpg");
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it("falls back to JSON-LD (headline/description/image, incl. @graph and ImageObject)", () => {
|
|
120
|
+
const html = `<script type="application/ld+json">
|
|
121
|
+
{"@context":"https://schema.org","@graph":[
|
|
122
|
+
{"@type":"NewsArticle","headline":"LD Headline","description":"LD Desc",
|
|
123
|
+
"image":{"@type":"ImageObject","url":"https://cdn.example.com/ld.jpg"}}
|
|
124
|
+
]}
|
|
125
|
+
</script>`;
|
|
126
|
+
const r = parseOpenGraph(html, BASE);
|
|
127
|
+
expect(r.title).toBe("LD Headline");
|
|
128
|
+
expect(r.description).toBe("LD Desc");
|
|
129
|
+
expect(r.imageUrl).toBe("https://cdn.example.com/ld.jpg");
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it("prefers a body article-title over a generic <title> (QQ-style SPA shell)", () => {
|
|
133
|
+
const html = `<head><title>搜索资讯页</title></head>
|
|
134
|
+
<body><div class="article-wrapper"><div class="article-title">钉钉"两篇大作文"事件——离职副总裁万字长文</div></div></body>`;
|
|
135
|
+
expect(parseOpenGraph(html, BASE).title).toBe('钉钉"两篇大作文"事件——离职副总裁万字长文');
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it("prefers an <h1> over a generic <title>", () => {
|
|
139
|
+
const html = `<title>Home</title><h1>The Real Headline</h1>`;
|
|
140
|
+
expect(parseOpenGraph(html, BASE).title).toBe("The Real Headline");
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it("extracts a cover image from inline JSON (extensionless, escaped slashes)", () => {
|
|
144
|
+
const html = `<title>搜索资讯页</title>
|
|
145
|
+
<script>window.__INFO__={"imgUrl":"http:\\/\\/qqpublic.qpic.cn\\/qq_public_cover\\/0\\/0-2342_op"}</script>`;
|
|
146
|
+
expect(parseOpenGraph(html, BASE).imageUrl).toBe("http://qqpublic.qpic.cn/qq_public_cover/0/0-2342_op");
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
it("standard og tags still win over body/json fallbacks", () => {
|
|
150
|
+
const html = `<meta property="og:title" content="OG Wins">
|
|
151
|
+
<h1>Body H1</h1>
|
|
152
|
+
<div class="article-title">Body Title</div>`;
|
|
153
|
+
expect(parseOpenGraph(html, BASE).title).toBe("OG Wins");
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
it("does not throw on malformed or truncated HTML", () => {
|
|
157
|
+
expect(() => parseOpenGraph(`<meta property="og:title" content="Trunc`, BASE)).not.toThrow();
|
|
158
|
+
expect(() => parseOpenGraph("<<<>>><meta<meta>", BASE)).not.toThrow();
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it("ignores empty content values", () => {
|
|
162
|
+
const html = `<meta property="og:title" content="">
|
|
163
|
+
<title>Real Title</title>`;
|
|
164
|
+
// og:title 占位为空串 → cleanText 归 null,但 og map 已记录空串;回退逻辑应仍给出可用 title
|
|
165
|
+
const result = parseOpenGraph(html, BASE);
|
|
166
|
+
expect(result.title).toBe("Real Title");
|
|
167
|
+
});
|
|
168
|
+
});
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Open Graph metadata extraction via regex — no HTML parser dependency.
|
|
3
|
+
*
|
|
4
|
+
* Good enough for the link-preview card use case: og:* meta tags are flat, attribute-ordered
|
|
5
|
+
* variants are handled generically, and pages where this fails simply degrade to "no card".
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const MAX_PARSE_BYTES = 512 * 1024;
|
|
9
|
+
|
|
10
|
+
export interface OpenGraphResult {
|
|
11
|
+
title: string | null;
|
|
12
|
+
description: string | null;
|
|
13
|
+
imageUrl: string | null;
|
|
14
|
+
siteName: string | null;
|
|
15
|
+
/** Favicon URL parsed from `<link rel="...icon...">`, resolved absolute. */
|
|
16
|
+
iconUrl: string | null;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const META_TAG_RE = /<meta\b[^>]*>/gi;
|
|
20
|
+
const TITLE_TAG_RE = /<title[^>]*>([\s\S]*?)<\/title>/i;
|
|
21
|
+
const LINK_TAG_RE = /<link\b[^>]*>/gi;
|
|
22
|
+
|
|
23
|
+
/** Extract one attribute value from a tag, tolerating single/double/no quotes and any order. */
|
|
24
|
+
function attributeValue(tag: string, name: string): string | null {
|
|
25
|
+
const re = new RegExp(`\\b${name}\\s*=\\s*(?:"([^"]*)"|'([^']*)'|([^\\s"'>]+))`, "i");
|
|
26
|
+
const m = tag.match(re);
|
|
27
|
+
if (!m) return null;
|
|
28
|
+
return m[1] ?? m[2] ?? m[3] ?? "";
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const NAMED_ENTITIES: Record<string, string> = {
|
|
32
|
+
amp: "&",
|
|
33
|
+
lt: "<",
|
|
34
|
+
gt: ">",
|
|
35
|
+
quot: '"',
|
|
36
|
+
apos: "'",
|
|
37
|
+
nbsp: " ",
|
|
38
|
+
ndash: "–",
|
|
39
|
+
mdash: "—",
|
|
40
|
+
hellip: "…",
|
|
41
|
+
middot: "·",
|
|
42
|
+
copy: "©",
|
|
43
|
+
reg: "®",
|
|
44
|
+
trade: "™",
|
|
45
|
+
lsquo: "‘",
|
|
46
|
+
rsquo: "’",
|
|
47
|
+
ldquo: "“",
|
|
48
|
+
rdquo: "”",
|
|
49
|
+
laquo: "«",
|
|
50
|
+
raquo: "»",
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
export function decodeHtmlEntities(s: string): string {
|
|
54
|
+
return s.replace(/&(#x?[0-9a-f]+|[a-z]+);/gi, (whole, body: string) => {
|
|
55
|
+
if (body.startsWith("#x") || body.startsWith("#X")) {
|
|
56
|
+
const code = Number.parseInt(body.slice(2), 16);
|
|
57
|
+
return Number.isFinite(code) ? String.fromCodePoint(code) : whole;
|
|
58
|
+
}
|
|
59
|
+
if (body.startsWith("#")) {
|
|
60
|
+
const code = Number.parseInt(body.slice(1), 10);
|
|
61
|
+
return Number.isFinite(code) ? String.fromCodePoint(code) : whole;
|
|
62
|
+
}
|
|
63
|
+
return NAMED_ENTITIES[body.toLowerCase()] ?? whole;
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function cleanText(raw: string | null | undefined): string | null {
|
|
68
|
+
if (raw == null) return null;
|
|
69
|
+
const text = decodeHtmlEntities(raw).replace(/\s+/g, " ").trim();
|
|
70
|
+
return text || null;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Resolve og:image (possibly relative) against the final page URL; only http(s) survives. */
|
|
74
|
+
function resolveImageUrl(raw: string | null | undefined, baseUrl: string): string | null {
|
|
75
|
+
if (!raw) return null;
|
|
76
|
+
try {
|
|
77
|
+
const url = new URL(raw.trim(), baseUrl);
|
|
78
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") return null;
|
|
79
|
+
return url.toString();
|
|
80
|
+
} catch {
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function parseOpenGraph(html: string, baseUrl: string): OpenGraphResult {
|
|
86
|
+
const slice = html.length > MAX_PARSE_BYTES ? html.slice(0, MAX_PARSE_BYTES) : html;
|
|
87
|
+
|
|
88
|
+
// First occurrence wins per key (matches browser/crawler behavior).
|
|
89
|
+
const og: Record<string, string> = {};
|
|
90
|
+
const tw: Record<string, string> = {};
|
|
91
|
+
let metaDescription: string | null = null;
|
|
92
|
+
for (const match of slice.matchAll(META_TAG_RE)) {
|
|
93
|
+
const tag = match[0];
|
|
94
|
+
const key = (attributeValue(tag, "property") ?? attributeValue(tag, "name"))?.trim().toLowerCase();
|
|
95
|
+
if (!key) continue;
|
|
96
|
+
const content = attributeValue(tag, "content");
|
|
97
|
+
if (content == null || !content.trim()) continue;
|
|
98
|
+
if (key.startsWith("og:")) {
|
|
99
|
+
const ogKey = key.slice(3);
|
|
100
|
+
if (!(ogKey in og)) og[ogKey] = content;
|
|
101
|
+
} else if (key.startsWith("twitter:")) {
|
|
102
|
+
const twKey = key.slice(8);
|
|
103
|
+
if (!(twKey in tw)) tw[twKey] = content;
|
|
104
|
+
} else if (key === "description" && metaDescription == null) {
|
|
105
|
+
metaDescription = content;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const ld = parseJsonLd(slice);
|
|
110
|
+
const pageTitle = slice.match(TITLE_TAG_RE)?.[1] ?? null;
|
|
111
|
+
|
|
112
|
+
// Title chain: standard tags first, then server-rendered body title (h1 / article-title class)
|
|
113
|
+
// BEFORE the generic <title> — many SPA/news shells put a useless <title> ("搜索资讯页") in the
|
|
114
|
+
// head while the real headline lives in the body.
|
|
115
|
+
const title =
|
|
116
|
+
cleanText(og["title"]) ??
|
|
117
|
+
cleanText(tw["title"]) ??
|
|
118
|
+
cleanText(ld.title) ??
|
|
119
|
+
cleanText(parseBodyTitle(slice)) ??
|
|
120
|
+
cleanText(pageTitle);
|
|
121
|
+
|
|
122
|
+
const description =
|
|
123
|
+
cleanText(og["description"]) ??
|
|
124
|
+
cleanText(tw["description"]) ??
|
|
125
|
+
cleanText(ld.description) ??
|
|
126
|
+
cleanText(metaDescription);
|
|
127
|
+
|
|
128
|
+
const imageUrl =
|
|
129
|
+
resolveImageUrl(og["image"] ?? null, baseUrl) ??
|
|
130
|
+
resolveImageUrl(tw["image"] ?? null, baseUrl) ??
|
|
131
|
+
resolveImageUrl(ld.image, baseUrl) ??
|
|
132
|
+
resolveImageUrl(parseBodyCoverImage(slice), baseUrl);
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
title,
|
|
136
|
+
description,
|
|
137
|
+
imageUrl,
|
|
138
|
+
siteName: cleanText(og["site_name"] ?? tw["site"] ?? null),
|
|
139
|
+
iconUrl: parseFaviconUrl(slice, baseUrl),
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const JSON_LD_RE = /<script[^>]*type\s*=\s*["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
|
|
144
|
+
|
|
145
|
+
/** Extract title/description/image from JSON-LD blocks (schema.org Article/NewsArticle/etc.). */
|
|
146
|
+
function parseJsonLd(html: string): { title: string | null; description: string | null; image: string | null } {
|
|
147
|
+
for (const match of html.matchAll(JSON_LD_RE)) {
|
|
148
|
+
let data: unknown;
|
|
149
|
+
try {
|
|
150
|
+
data = JSON.parse(match[1].trim());
|
|
151
|
+
} catch {
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
// JSON-LD may be a single object, an array, or a @graph container.
|
|
155
|
+
const nodes: unknown[] = Array.isArray(data)
|
|
156
|
+
? data
|
|
157
|
+
: isRecord(data) && Array.isArray(data["@graph"])
|
|
158
|
+
? (data["@graph"] as unknown[])
|
|
159
|
+
: [data];
|
|
160
|
+
for (const node of nodes) {
|
|
161
|
+
if (!isRecord(node)) continue;
|
|
162
|
+
const title = asString(node.headline) ?? asString(node.name);
|
|
163
|
+
const description = asString(node.description);
|
|
164
|
+
const image = firstImage(node.image) ?? asString(node.thumbnailUrl);
|
|
165
|
+
if (title || description || image) {
|
|
166
|
+
return { title: title ?? null, description: description ?? null, image: image ?? null };
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return { title: null, description: null, image: null };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function isRecord(v: unknown): v is Record<string, unknown> {
|
|
174
|
+
return typeof v === "object" && v !== null;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function asString(v: unknown): string | null {
|
|
178
|
+
return typeof v === "string" && v.trim() ? v : null;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/** JSON-LD `image` is a string, an array, or an ImageObject `{ url }`. */
|
|
182
|
+
function firstImage(v: unknown): string | null {
|
|
183
|
+
if (typeof v === "string") return v;
|
|
184
|
+
if (Array.isArray(v)) {
|
|
185
|
+
for (const item of v) {
|
|
186
|
+
const found = firstImage(item);
|
|
187
|
+
if (found) return found;
|
|
188
|
+
}
|
|
189
|
+
return null;
|
|
190
|
+
}
|
|
191
|
+
if (isRecord(v)) return asString(v.url);
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Common server-rendered article-title class names (whitelist keeps false positives down vs. any
|
|
196
|
+
// class containing "title", e.g. a sidebar "related-titles" block).
|
|
197
|
+
const BODY_TITLE_CLASS_RE =
|
|
198
|
+
/class\s*=\s*["'][^"']*\b(?:article-title|post-title|entry-title|news-title|content-title|headline|title-text)\b[^"']*["'][^>]*>\s*([^<]{4,200}?)\s*</i;
|
|
199
|
+
const H1_RE = /<h1\b[^>]*>\s*([\s\S]{4,200}?)\s*<\/h1>/i;
|
|
200
|
+
|
|
201
|
+
/** Server-rendered headline fallback: first <h1>, else an element with a known article-title class. */
|
|
202
|
+
function parseBodyTitle(html: string): string | null {
|
|
203
|
+
const h1 = html.match(H1_RE)?.[1];
|
|
204
|
+
if (h1) {
|
|
205
|
+
const text = stripTags(h1).trim();
|
|
206
|
+
if (text.length >= 4) return text;
|
|
207
|
+
}
|
|
208
|
+
return html.match(BODY_TITLE_CLASS_RE)?.[1] ?? null;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Cover image embedded in inline JSON (e.g. QQ's `"imgUrl":"http:\/\/...cover..."`). The URL may be
|
|
212
|
+
// extensionless; the re-host step's magic-byte sniff is the safety net against non-image matches.
|
|
213
|
+
const JSON_COVER_RE =
|
|
214
|
+
/"(?:imgUrl|imageUrl|coverUrl|coverImage|cover|ogImage|thumbnail|picUrl)"\s*:\s*"(https?:(?:\\?\/){2}[^"]+?)"/i;
|
|
215
|
+
|
|
216
|
+
/** Cover image from inline JSON when no og/twitter/json-ld image is present. */
|
|
217
|
+
function parseBodyCoverImage(html: string): string | null {
|
|
218
|
+
const raw = html.match(JSON_COVER_RE)?.[1];
|
|
219
|
+
if (!raw) return null;
|
|
220
|
+
return raw.replace(/\\\//g, "/"); // unescape JSON `\/`
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function stripTags(s: string): string {
|
|
224
|
+
return s.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Pick the best `<link rel="...icon...">` href. Prefers a high-res `apple-touch-icon`, then a
|
|
229
|
+
* regular `icon` / `shortcut icon`. Skips `mask-icon` (monochrome SVG). Returns absolute http(s).
|
|
230
|
+
*/
|
|
231
|
+
export function parseFaviconUrl(html: string, baseUrl: string): string | null {
|
|
232
|
+
let appleTouch: string | null = null;
|
|
233
|
+
let regular: string | null = null;
|
|
234
|
+
for (const match of html.matchAll(LINK_TAG_RE)) {
|
|
235
|
+
const tag = match[0];
|
|
236
|
+
const rel = attributeValue(tag, "rel")?.trim().toLowerCase();
|
|
237
|
+
if (!rel || !rel.includes("icon") || rel.includes("mask-icon")) continue;
|
|
238
|
+
const href = attributeValue(tag, "href");
|
|
239
|
+
if (!href) continue;
|
|
240
|
+
const resolved = resolveImageUrl(href, baseUrl);
|
|
241
|
+
if (!resolved) continue;
|
|
242
|
+
if (rel.includes("apple-touch-icon")) {
|
|
243
|
+
appleTouch ??= resolved;
|
|
244
|
+
} else {
|
|
245
|
+
regular ??= resolved;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return appleTouch ?? regular;
|
|
249
|
+
}
|