tgo-wiki 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +255 -0
- package/docs/mcp-usage.md +631 -0
- package/docs/v0-acceptance.md +105 -0
- package/docs/v0-delivery-checklist.md +57 -0
- package/docs/v1-acceptance.md +39 -0
- package/docs/v2-acceptance.md +165 -0
- package/package.json +69 -0
- package/packages/core/src/config/config-loader.ts +109 -0
- package/packages/core/src/config/defaults.ts +74 -0
- package/packages/core/src/config/workspace-resolver.ts +40 -0
- package/packages/core/src/documents/command-document-parser.ts +206 -0
- package/packages/core/src/documents/document-id.ts +26 -0
- package/packages/core/src/documents/document-parser-registry.ts +126 -0
- package/packages/core/src/documents/document-service.ts +656 -0
- package/packages/core/src/documents/document-store.ts +132 -0
- package/packages/core/src/documents/document-types.ts +33 -0
- package/packages/core/src/documents/pdf-text-parser.ts +35 -0
- package/packages/core/src/documents/text-markdown-parser.ts +50 -0
- package/packages/core/src/errors.ts +46 -0
- package/packages/core/src/git/git-service.ts +68 -0
- package/packages/core/src/index.ts +38 -0
- package/packages/core/src/markdown/markdown-scanner.ts +90 -0
- package/packages/core/src/permissions/permission-service.ts +50 -0
- package/packages/core/src/publish/publish-service.ts +142 -0
- package/packages/core/src/result.ts +13 -0
- package/packages/core/src/services/session-workflow-service.ts +493 -0
- package/packages/core/src/services/wiki-service.ts +119 -0
- package/packages/core/src/services/workspace-service.ts +223 -0
- package/packages/core/src/session/session-id.ts +14 -0
- package/packages/core/src/session/session-service.ts +77 -0
- package/packages/core/src/session/session-store.ts +91 -0
- package/packages/core/src/session/session-types.ts +17 -0
- package/packages/core/src/sources/source-id.ts +19 -0
- package/packages/core/src/sources/source-paths.ts +15 -0
- package/packages/core/src/sources/source-service.ts +416 -0
- package/packages/core/src/sources/source-types.ts +77 -0
- package/packages/core/src/sources/source-validator.ts +132 -0
- package/packages/core/src/sources/source-writer.ts +419 -0
- package/packages/core/src/validation/frontmatter-validator.ts +128 -0
- package/packages/core/src/validation/link-validator.ts +55 -0
- package/packages/core/src/validation/path-validator.ts +65 -0
- package/packages/core/src/validation/source-reference-validator.ts +191 -0
- package/packages/core/src/validation/validation-service.ts +106 -0
- package/packages/core/src/vfs/vfs-command-parser.ts +69 -0
- package/packages/core/src/vfs/vfs-service.ts +498 -0
- package/packages/core/src/web/html-to-markdown.ts +144 -0
- package/packages/core/src/web/static-web-fetcher.ts +537 -0
- package/packages/core/src/web/web-id.ts +26 -0
- package/packages/core/src/web/web-ingestion-service.ts +335 -0
- package/packages/core/src/web/web-paths.ts +6 -0
- package/packages/core/src/web/web-types.ts +33 -0
- package/packages/server/src/cli.ts +56 -0
- package/packages/server/src/context.ts +7 -0
- package/packages/server/src/index.ts +2 -0
- package/packages/server/src/mcp-server.ts +111 -0
- package/packages/server/src/schemas/documents.ts +17 -0
- package/packages/server/src/schemas/read.ts +16 -0
- package/packages/server/src/schemas/session.ts +31 -0
- package/packages/server/src/schemas/sources.ts +12 -0
- package/packages/server/src/schemas/web.ts +23 -0
- package/packages/server/src/tools/document-tools.ts +46 -0
- package/packages/server/src/tools/publish-tools.ts +33 -0
- package/packages/server/src/tools/read-tools.ts +52 -0
- package/packages/server/src/tools/response.ts +24 -0
- package/packages/server/src/tools/session-tools.ts +100 -0
- package/packages/server/src/tools/source-tools.ts +32 -0
- package/packages/server/src/tools/web-tools.ts +26 -0
|
@@ -0,0 +1,537 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { lookup } from "node:dns/promises";
|
|
3
|
+
import { request as httpRequest } from "node:http";
|
|
4
|
+
import { request as httpsRequest } from "node:https";
|
|
5
|
+
import { isIP } from "node:net";
|
|
6
|
+
import { Readable } from "node:stream";
|
|
7
|
+
import { WikiError } from "../errors.js";
|
|
8
|
+
import type { StaticWebFetchResult, WebFetchConfig } from "./web-types.js";
|
|
9
|
+
|
|
10
|
+
type ResolvedHost = {
|
|
11
|
+
address: string;
|
|
12
|
+
family: number;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
type WebRequestOptions = {
|
|
16
|
+
signal: AbortSignal;
|
|
17
|
+
headers: Record<string, string>;
|
|
18
|
+
lookupHost: (hostname: string) => Promise<ResolvedHost>;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
type WebRequest = (url: URL, options: WebRequestOptions) => Promise<Response>;
|
|
22
|
+
|
|
23
|
+
type StaticWebFetchDependencies = {
|
|
24
|
+
request?: WebRequest;
|
|
25
|
+
resolveHost?: (hostname: string) => Promise<ResolvedHost[]>;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const maxRedirects = 5;
|
|
29
|
+
|
|
30
|
+
export async function staticWebFetch(
|
|
31
|
+
url: string,
|
|
32
|
+
config: WebFetchConfig,
|
|
33
|
+
dependencies: StaticWebFetchDependencies = {}
|
|
34
|
+
): Promise<StaticWebFetchResult> {
|
|
35
|
+
let currentUrl = parseHttpUrl(url);
|
|
36
|
+
const request = dependencies.request ?? nodeWebRequest;
|
|
37
|
+
const resolveHost = dependencies.resolveHost ?? resolveHostWithDns;
|
|
38
|
+
const controller = new AbortController();
|
|
39
|
+
const timeout = setTimeout(() => controller.abort(), config.requestTimeoutMs);
|
|
40
|
+
const hostPolicy = createHostPolicy(config);
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
let response: Response;
|
|
44
|
+
|
|
45
|
+
for (let redirectCount = 0; ; redirectCount += 1) {
|
|
46
|
+
assertAllowedUrlHost(currentUrl, hostPolicy);
|
|
47
|
+
response = await request(currentUrl, {
|
|
48
|
+
signal: controller.signal,
|
|
49
|
+
headers: { accept: "text/html,application/xhtml+xml" },
|
|
50
|
+
lookupHost: hostname => resolveAllowedHost(currentUrl, hostname, hostPolicy, resolveHost)
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const redirectTarget = redirectLocation(response, currentUrl);
|
|
54
|
+
if (!redirectTarget) {
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
await cancelResponseBody(response);
|
|
59
|
+
|
|
60
|
+
if (redirectCount >= maxRedirects) {
|
|
61
|
+
throw new WikiError("fetch_failed", `Web fetch exceeded redirect limit: ${url}`, {
|
|
62
|
+
url,
|
|
63
|
+
maxRedirects
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
currentUrl = redirectTarget;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
71
|
+
|
|
72
|
+
if (!response.ok) {
|
|
73
|
+
await cancelResponseBody(response);
|
|
74
|
+
throw new WikiError("fetch_failed", `Web fetch returned HTTP ${response.status}`, {
|
|
75
|
+
url,
|
|
76
|
+
statusCode: response.status
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (!isHtmlContentType(contentType)) {
|
|
81
|
+
await cancelResponseBody(response);
|
|
82
|
+
throw new WikiError("fetch_failed", `Web fetch did not return HTML: ${contentType || "unknown"}`, {
|
|
83
|
+
url,
|
|
84
|
+
contentType
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const bytes = await readResponseBytes(response, config.maxResponseBytes);
|
|
89
|
+
const htmlText = new TextDecoder("utf-8", { fatal: false }).decode(bytes);
|
|
90
|
+
|
|
91
|
+
return {
|
|
92
|
+
requestedUrl: parseHttpUrl(url).toString(),
|
|
93
|
+
finalUrl: currentUrl.toString(),
|
|
94
|
+
contentType,
|
|
95
|
+
statusCode: response.status,
|
|
96
|
+
htmlBytes: bytes.byteLength,
|
|
97
|
+
htmlContent: bytes,
|
|
98
|
+
htmlText,
|
|
99
|
+
htmlBlobSha256: createHash("sha256").update(bytes).digest("hex")
|
|
100
|
+
};
|
|
101
|
+
} catch (error) {
|
|
102
|
+
if (error instanceof WikiError) {
|
|
103
|
+
throw error;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
107
|
+
throw new WikiError("fetch_timeout", `Web fetch timed out: ${url}`, {
|
|
108
|
+
url,
|
|
109
|
+
requestTimeoutMs: config.requestTimeoutMs
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
throw new WikiError("fetch_failed", error instanceof Error ? error.message : String(error), { url });
|
|
114
|
+
} finally {
|
|
115
|
+
clearTimeout(timeout);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async function nodeWebRequest(url: URL, options: WebRequestOptions): Promise<Response> {
|
|
120
|
+
return new Promise<Response>((resolve, reject) => {
|
|
121
|
+
let settled = false;
|
|
122
|
+
const resolveOnce = (response: Response) => {
|
|
123
|
+
settled = true;
|
|
124
|
+
resolve(response);
|
|
125
|
+
};
|
|
126
|
+
const rejectOnce = (error: Error) => {
|
|
127
|
+
if (!settled) {
|
|
128
|
+
settled = true;
|
|
129
|
+
reject(error);
|
|
130
|
+
}
|
|
131
|
+
};
|
|
132
|
+
const request = (url.protocol === "https:" ? httpsRequest : httpRequest)(
|
|
133
|
+
url,
|
|
134
|
+
{
|
|
135
|
+
method: "GET",
|
|
136
|
+
headers: options.headers,
|
|
137
|
+
lookup: (hostname, _lookupOptions, callback) => {
|
|
138
|
+
options.lookupHost(String(hostname)).then(
|
|
139
|
+
resolved => callback(null, resolved.address, resolved.family),
|
|
140
|
+
error => callback(error instanceof Error ? error : new Error(String(error)), "", 4)
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
},
|
|
144
|
+
message => {
|
|
145
|
+
const status = message.statusCode ?? 500;
|
|
146
|
+
const headers = headersFromMessage(message);
|
|
147
|
+
const body = [204, 205, 304].includes(status)
|
|
148
|
+
? null
|
|
149
|
+
: (Readable.toWeb(message) as unknown as ReadableStream<Uint8Array>);
|
|
150
|
+
resolveOnce(new Response(body, { status, headers }));
|
|
151
|
+
}
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
const abort = () => {
|
|
155
|
+
const error = new Error("Web fetch aborted");
|
|
156
|
+
error.name = "AbortError";
|
|
157
|
+
request.destroy(error);
|
|
158
|
+
rejectOnce(error);
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
if (options.signal.aborted) {
|
|
162
|
+
abort();
|
|
163
|
+
} else {
|
|
164
|
+
options.signal.addEventListener("abort", abort, { once: true });
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
request.on("error", rejectOnce);
|
|
168
|
+
request.end();
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function headersFromMessage(message: { headers: Record<string, string | string[] | undefined> }): Headers {
|
|
173
|
+
const headers = new Headers();
|
|
174
|
+
|
|
175
|
+
for (const [name, value] of Object.entries(message.headers)) {
|
|
176
|
+
if (Array.isArray(value)) {
|
|
177
|
+
for (const entry of value) {
|
|
178
|
+
headers.append(name, entry);
|
|
179
|
+
}
|
|
180
|
+
continue;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (value !== undefined) {
|
|
184
|
+
headers.set(name, value);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return headers;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function parseHttpUrl(value: string): URL {
|
|
192
|
+
let parsed: URL;
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
parsed = new URL(value);
|
|
196
|
+
} catch (error) {
|
|
197
|
+
throw new WikiError("invalid_url", `Invalid web URL: ${value}`, {
|
|
198
|
+
url: value,
|
|
199
|
+
reason: error instanceof Error ? error.message : String(error)
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
204
|
+
throw new WikiError("invalid_url", `Unsupported web URL protocol: ${parsed.protocol}`, {
|
|
205
|
+
url: value,
|
|
206
|
+
protocol: parsed.protocol
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return parsed;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function redirectLocation(response: Response, currentUrl: URL): URL | null {
|
|
214
|
+
if (![301, 302, 303, 307, 308].includes(response.status)) {
|
|
215
|
+
return null;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const location = response.headers.get("location");
|
|
219
|
+
if (!location) {
|
|
220
|
+
return null;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
return parseHttpUrl(new URL(location, currentUrl).toString());
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
type HostPolicy = {
|
|
227
|
+
allowedPrivateHosts: Set<string>;
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
function createHostPolicy(config: WebFetchConfig): HostPolicy {
|
|
231
|
+
return {
|
|
232
|
+
allowedPrivateHosts: new Set((config.allowedPrivateHosts ?? []).map(normalizeHost))
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function assertAllowedUrlHost(url: URL, policy: HostPolicy): void {
|
|
237
|
+
const host = normalizeHost(url.hostname);
|
|
238
|
+
|
|
239
|
+
if (policy.allowedPrivateHosts.has(host)) {
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (host === "localhost" || host.endsWith(".localhost")) {
|
|
244
|
+
throw disallowedHostError(url, "localhost hostnames are not allowed");
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const literalIp = hostToIpLiteral(host);
|
|
248
|
+
if (literalIp && isDisallowedIp(literalIp)) {
|
|
249
|
+
throw disallowedHostError(url, "private or reserved IP addresses are not allowed", literalIp);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
async function resolveAllowedHost(
|
|
254
|
+
url: URL,
|
|
255
|
+
hostname: string,
|
|
256
|
+
policy: HostPolicy,
|
|
257
|
+
resolveHost: (hostname: string) => Promise<ResolvedHost[]>
|
|
258
|
+
): Promise<ResolvedHost> {
|
|
259
|
+
const host = normalizeHost(hostname);
|
|
260
|
+
const allowPrivate = policy.allowedPrivateHosts.has(host);
|
|
261
|
+
|
|
262
|
+
if (!allowPrivate && (host === "localhost" || host.endsWith(".localhost"))) {
|
|
263
|
+
throw disallowedHostError(url, "localhost hostnames are not allowed");
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
const literalIp = hostToIpLiteral(host);
|
|
267
|
+
if (literalIp) {
|
|
268
|
+
if (!allowPrivate && isDisallowedIp(literalIp)) {
|
|
269
|
+
throw disallowedHostError(url, "private or reserved IP addresses are not allowed", literalIp);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return { address: literalIp, family: isIP(literalIp) };
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
let addresses: ResolvedHost[];
|
|
276
|
+
try {
|
|
277
|
+
addresses = await resolveHost(host);
|
|
278
|
+
} catch (error) {
|
|
279
|
+
throw new WikiError("fetch_failed", `Could not resolve web URL host: ${host}`, {
|
|
280
|
+
url: url.toString(),
|
|
281
|
+
host,
|
|
282
|
+
reason: error instanceof Error ? error.message : String(error)
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (addresses.length === 0) {
|
|
287
|
+
throw new WikiError("fetch_failed", `Could not resolve web URL host: ${host}`, {
|
|
288
|
+
url: url.toString(),
|
|
289
|
+
host
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if (!allowPrivate) {
|
|
294
|
+
for (const address of addresses) {
|
|
295
|
+
if (isDisallowedIp(address.address)) {
|
|
296
|
+
throw disallowedHostError(url, "host resolves to a private or reserved IP address", address.address);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return addresses[0];
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
async function resolveHostWithDns(hostname: string): Promise<ResolvedHost[]> {
|
|
305
|
+
return lookup(hostname, { all: true, verbatim: true });
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
function normalizeHost(value: string): string {
|
|
309
|
+
let host = value.trim().toLowerCase();
|
|
310
|
+
|
|
311
|
+
if (host.startsWith("[") && host.endsWith("]")) {
|
|
312
|
+
host = host.slice(1, -1);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
while (host.endsWith(".")) {
|
|
316
|
+
host = host.slice(0, -1);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
return host;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
function hostToIpLiteral(host: string): string | null {
|
|
323
|
+
return isIP(host) === 0 ? null : host;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
function disallowedHostError(url: URL, reason: string, address?: string): WikiError {
|
|
327
|
+
return new WikiError("invalid_url", `Web URL target is not allowed: ${url.toString()}`, {
|
|
328
|
+
url: url.toString(),
|
|
329
|
+
reason,
|
|
330
|
+
address
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
function isDisallowedIp(address: string): boolean {
|
|
335
|
+
const normalized = normalizeHost(address);
|
|
336
|
+
const family = isIP(normalized);
|
|
337
|
+
|
|
338
|
+
if (family === 4) {
|
|
339
|
+
return isDisallowedIpv4(normalized);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
if (family === 6) {
|
|
343
|
+
return isDisallowedIpv6(normalized);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
return true;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function isDisallowedIpv4(address: string): boolean {
|
|
350
|
+
const value = ipv4ToNumber(address);
|
|
351
|
+
const ranges: Array<[string, number]> = [
|
|
352
|
+
["0.0.0.0", 8],
|
|
353
|
+
["10.0.0.0", 8],
|
|
354
|
+
["100.64.0.0", 10],
|
|
355
|
+
["127.0.0.0", 8],
|
|
356
|
+
["169.254.0.0", 16],
|
|
357
|
+
["172.16.0.0", 12],
|
|
358
|
+
["192.0.0.0", 24],
|
|
359
|
+
["192.0.2.0", 24],
|
|
360
|
+
["192.168.0.0", 16],
|
|
361
|
+
["198.18.0.0", 15],
|
|
362
|
+
["198.51.100.0", 24],
|
|
363
|
+
["203.0.113.0", 24],
|
|
364
|
+
["224.0.0.0", 4],
|
|
365
|
+
["240.0.0.0", 4],
|
|
366
|
+
["255.255.255.255", 32]
|
|
367
|
+
];
|
|
368
|
+
|
|
369
|
+
return ranges.some(([base, bits]) => ipv4InRange(value, ipv4ToNumber(base), bits));
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
function ipv4ToNumber(address: string): number {
|
|
373
|
+
const parts = address.split(".").map(part => Number.parseInt(part, 10));
|
|
374
|
+
return (((parts[0] << 24) >>> 0) + (parts[1] << 16) + (parts[2] << 8) + parts[3]) >>> 0;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
function ipv4InRange(value: number, base: number, bits: number): boolean {
|
|
378
|
+
const mask = bits === 0 ? 0 : (0xffffffff << (32 - bits)) >>> 0;
|
|
379
|
+
return (value & mask) === (base & mask);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
function isDisallowedIpv6(address: string): boolean {
|
|
383
|
+
const value = ipv6ToBigInt(address);
|
|
384
|
+
const mappedIpv4 = mappedIpv4Address(value);
|
|
385
|
+
|
|
386
|
+
if (mappedIpv4) {
|
|
387
|
+
return isDisallowedIpv4(mappedIpv4);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
const ranges: Array<[string, number]> = [
|
|
391
|
+
["::", 128],
|
|
392
|
+
["::", 96],
|
|
393
|
+
["::1", 128],
|
|
394
|
+
["64:ff9b::", 96],
|
|
395
|
+
["64:ff9b:1::", 48],
|
|
396
|
+
["100::", 64],
|
|
397
|
+
["2001::", 23],
|
|
398
|
+
["2001:db8::", 32],
|
|
399
|
+
["2002::", 16],
|
|
400
|
+
["fc00::", 7],
|
|
401
|
+
["fe80::", 10],
|
|
402
|
+
["ff00::", 8]
|
|
403
|
+
];
|
|
404
|
+
|
|
405
|
+
return ranges.some(([base, bits]) => ipv6InRange(value, ipv6ToBigInt(base), bits));
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
function mappedIpv4Address(value: bigint): string | null {
|
|
409
|
+
const mappedPrefix = ipv6ToBigInt("::ffff:0:0");
|
|
410
|
+
if (!ipv6InRange(value, mappedPrefix, 96)) {
|
|
411
|
+
return null;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
const ipv4 = Number(value & 0xffffffffn);
|
|
415
|
+
return [
|
|
416
|
+
(ipv4 >>> 24) & 0xff,
|
|
417
|
+
(ipv4 >>> 16) & 0xff,
|
|
418
|
+
(ipv4 >>> 8) & 0xff,
|
|
419
|
+
ipv4 & 0xff
|
|
420
|
+
].join(".");
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
function ipv6InRange(value: bigint, base: bigint, bits: number): boolean {
|
|
424
|
+
if (bits === 0) {
|
|
425
|
+
return true;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
const mask = ((1n << BigInt(bits)) - 1n) << BigInt(128 - bits);
|
|
429
|
+
return (value & mask) === (base & mask);
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
function ipv6ToBigInt(address: string): bigint {
|
|
433
|
+
const normalized = address.toLowerCase();
|
|
434
|
+
const [head = "", tail = ""] = normalized.split("::");
|
|
435
|
+
const headParts = parseIpv6Parts(head);
|
|
436
|
+
const tailParts = parseIpv6Parts(tail);
|
|
437
|
+
const missing = 8 - headParts.length - tailParts.length;
|
|
438
|
+
const parts = normalized.includes("::") ? [...headParts, ...Array(Math.max(missing, 0)).fill(0), ...tailParts] : headParts;
|
|
439
|
+
|
|
440
|
+
return parts.reduce((value, part) => (value << 16n) + BigInt(part), 0n);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
function parseIpv6Parts(value: string): number[] {
|
|
444
|
+
if (value.length === 0) {
|
|
445
|
+
return [];
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
const rawParts = value.split(":");
|
|
449
|
+
const last = rawParts.at(-1);
|
|
450
|
+
|
|
451
|
+
if (last && last.includes(".")) {
|
|
452
|
+
rawParts.pop();
|
|
453
|
+
const ipv4 = ipv4ToNumber(last);
|
|
454
|
+
rawParts.push(String((ipv4 >>> 16) & 0xffff), String(ipv4 & 0xffff));
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
return rawParts.map(part => Number.parseInt(part, 16));
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
function isHtmlContentType(value: string): boolean {
|
|
461
|
+
const mediaType = value.split(";", 1)[0]?.trim().toLowerCase();
|
|
462
|
+
return mediaType === "text/html" || mediaType === "application/xhtml+xml";
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
async function cancelResponseBody(response: Response): Promise<void> {
|
|
466
|
+
try {
|
|
467
|
+
await response.body?.cancel();
|
|
468
|
+
} catch {
|
|
469
|
+
// Best-effort cleanup only; callers should keep the original fetch error.
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
async function readResponseBytes(response: Response, maxBytes: number): Promise<Uint8Array> {
|
|
474
|
+
if (!response.body) {
|
|
475
|
+
const bytes = new Uint8Array(await response.arrayBuffer());
|
|
476
|
+
assertByteLimit(bytes.byteLength, maxBytes);
|
|
477
|
+
return bytes;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
const reader = response.body.getReader();
|
|
481
|
+
const chunks: Uint8Array[] = [];
|
|
482
|
+
let observedBytes = 0;
|
|
483
|
+
|
|
484
|
+
try {
|
|
485
|
+
while (true) {
|
|
486
|
+
const { done, value } = await reader.read();
|
|
487
|
+
|
|
488
|
+
if (done) {
|
|
489
|
+
break;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if (!value) {
|
|
493
|
+
continue;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
observedBytes += value.byteLength;
|
|
497
|
+
if (observedBytes > maxBytes) {
|
|
498
|
+
const error = responseTooLargeError(observedBytes, maxBytes);
|
|
499
|
+
|
|
500
|
+
try {
|
|
501
|
+
await reader.cancel(error);
|
|
502
|
+
} catch {
|
|
503
|
+
// Best-effort cleanup only; keep response_too_large as the error.
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
throw error;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
chunks.push(value);
|
|
510
|
+
}
|
|
511
|
+
} finally {
|
|
512
|
+
reader.releaseLock();
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
const bytes = new Uint8Array(observedBytes);
|
|
516
|
+
let offset = 0;
|
|
517
|
+
|
|
518
|
+
for (const chunk of chunks) {
|
|
519
|
+
bytes.set(chunk, offset);
|
|
520
|
+
offset += chunk.byteLength;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
return bytes;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
function assertByteLimit(observedBytes: number, maxBytes: number): void {
|
|
527
|
+
if (observedBytes > maxBytes) {
|
|
528
|
+
throw responseTooLargeError(observedBytes, maxBytes);
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
function responseTooLargeError(observedBytes: number, maxBytes: number): WikiError {
|
|
533
|
+
return new WikiError("response_too_large", `Web response exceeded byte limit: ${observedBytes} > ${maxBytes}`, {
|
|
534
|
+
observedBytes,
|
|
535
|
+
maxBytes
|
|
536
|
+
});
|
|
537
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { assertValidSourceId } from "../sources/source-id.js";
|
|
3
|
+
|
|
4
|
+
export type WebSourceIdInput = {
|
|
5
|
+
finalUrl: string;
|
|
6
|
+
htmlBlobSha256: string;
|
|
7
|
+
fetchedAt: Date;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export type WebSourceIdGenerator = (input: WebSourceIdInput) => string;
|
|
11
|
+
|
|
12
|
+
export function generateWebSourceId(input: WebSourceIdInput): string {
|
|
13
|
+
const now = input.fetchedAt;
|
|
14
|
+
const yyyy = String(now.getUTCFullYear()).padStart(4, "0");
|
|
15
|
+
const mm = String(now.getUTCMonth() + 1).padStart(2, "0");
|
|
16
|
+
const dd = String(now.getUTCDate()).padStart(2, "0");
|
|
17
|
+
const suffix = createHash("sha256")
|
|
18
|
+
.update(input.finalUrl)
|
|
19
|
+
.update("\0")
|
|
20
|
+
.update(input.htmlBlobSha256)
|
|
21
|
+
.digest("hex")
|
|
22
|
+
.slice(0, 16);
|
|
23
|
+
const id = `web_${yyyy}${mm}${dd}_${suffix}`;
|
|
24
|
+
assertValidSourceId(id);
|
|
25
|
+
return id;
|
|
26
|
+
}
|