@dwk/microsub 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +92 -0
  3. package/dist/auth.d.ts +53 -0
  4. package/dist/auth.d.ts.map +1 -0
  5. package/dist/auth.js +102 -0
  6. package/dist/auth.js.map +1 -0
  7. package/dist/config.d.ts +102 -0
  8. package/dist/config.d.ts.map +1 -0
  9. package/dist/config.js +64 -0
  10. package/dist/config.js.map +1 -0
  11. package/dist/consumer.d.ts +40 -0
  12. package/dist/consumer.d.ts.map +1 -0
  13. package/dist/consumer.js +87 -0
  14. package/dist/consumer.js.map +1 -0
  15. package/dist/discovery.d.ts +59 -0
  16. package/dist/discovery.d.ts.map +1 -0
  17. package/dist/discovery.js +190 -0
  18. package/dist/discovery.js.map +1 -0
  19. package/dist/fetch.d.ts +28 -0
  20. package/dist/fetch.d.ts.map +1 -0
  21. package/dist/fetch.js +72 -0
  22. package/dist/fetch.js.map +1 -0
  23. package/dist/handler.d.ts +24 -0
  24. package/dist/handler.d.ts.map +1 -0
  25. package/dist/handler.js +434 -0
  26. package/dist/handler.js.map +1 -0
  27. package/dist/hfeed.d.ts +25 -0
  28. package/dist/hfeed.d.ts.map +1 -0
  29. package/dist/hfeed.js +252 -0
  30. package/dist/hfeed.js.map +1 -0
  31. package/dist/index.d.ts +39 -0
  32. package/dist/index.d.ts.map +1 -0
  33. package/dist/index.js +32 -0
  34. package/dist/index.js.map +1 -0
  35. package/dist/jf2.d.ts +69 -0
  36. package/dist/jf2.d.ts.map +1 -0
  37. package/dist/jf2.js +295 -0
  38. package/dist/jf2.js.map +1 -0
  39. package/dist/log.d.ts +44 -0
  40. package/dist/log.d.ts.map +1 -0
  41. package/dist/log.js +42 -0
  42. package/dist/log.js.map +1 -0
  43. package/dist/poll.d.ts +22 -0
  44. package/dist/poll.d.ts.map +1 -0
  45. package/dist/poll.js +39 -0
  46. package/dist/poll.js.map +1 -0
  47. package/dist/queue.d.ts +25 -0
  48. package/dist/queue.d.ts.map +1 -0
  49. package/dist/queue.js +13 -0
  50. package/dist/queue.js.map +1 -0
  51. package/dist/replay.d.ts +34 -0
  52. package/dist/replay.d.ts.map +1 -0
  53. package/dist/replay.js +49 -0
  54. package/dist/replay.js.map +1 -0
  55. package/dist/safe-fetch.d.ts +86 -0
  56. package/dist/safe-fetch.d.ts.map +1 -0
  57. package/dist/safe-fetch.js +311 -0
  58. package/dist/safe-fetch.js.map +1 -0
  59. package/dist/store.d.ts +131 -0
  60. package/dist/store.d.ts.map +1 -0
  61. package/dist/store.js +393 -0
  62. package/dist/store.js.map +1 -0
  63. package/dist/xml.d.ts +51 -0
  64. package/dist/xml.d.ts.map +1 -0
  65. package/dist/xml.js +196 -0
  66. package/dist/xml.js.map +1 -0
  67. package/package.json +49 -0
  68. package/src/auth.ts +184 -0
  69. package/src/config.ts +156 -0
  70. package/src/consumer.ts +140 -0
  71. package/src/discovery.ts +270 -0
  72. package/src/fetch.ts +82 -0
  73. package/src/handler.ts +594 -0
  74. package/src/hfeed.ts +287 -0
  75. package/src/index.ts +86 -0
  76. package/src/jf2.ts +394 -0
  77. package/src/log.ts +46 -0
  78. package/src/poll.ts +72 -0
  79. package/src/queue.ts +26 -0
  80. package/src/replay.ts +68 -0
  81. package/src/safe-fetch.ts +346 -0
  82. package/src/store.ts +644 -0
  83. package/src/xml.ts +229 -0
@@ -0,0 +1,346 @@
1
+ /**
2
+ * `@dwk/microsub` — SSRF-safe outbound fetch.
3
+ *
4
+ * A Microsub server fetches user- and feed-supplied URLs: a `follow` discovers
5
+ * a feed at a URL the user typed, polling re-fetches it, and `preview`/`search`
6
+ * fetch an arbitrary URL. Without guardrails any of these could be pointed at
7
+ * the Worker's own network — loopback, the link-local cloud metadata IP
8
+ * (`169.254.169.254`), or RFC 1918 ranges — to exfiltrate credentials or probe
9
+ * internal services. This module is the single choke point every outbound fetch
10
+ * in the package goes through. It:
11
+ *
12
+ * 1. rejects URLs whose host is a private, loopback, link-local, or otherwise
13
+ * non-public address (or a name like `localhost` / `*.internal`),
14
+ * 2. follows redirects manually, re-validating the host on every `Location`
15
+ * hop so a public-looking host cannot 302 to an internal one, and capping
16
+ * the hop count, and
17
+ * 3. bounds the whole operation with a single timeout, so a slow-loris source
18
+ * cannot pin a poll-queue invocation.
19
+ *
20
+ * Host validation is purely syntactic on the URL host — DNS rebinding (a name
21
+ * that resolves to a private IP) is out of scope here, as the Workers runtime
22
+ * does not expose name resolution to user code. See `spec/packages/microsub.md`
23
+ * and `spec/non-functional-requirements.md`.
24
+ *
25
+ * @packageDocumentation
26
+ */
27
+
28
+ import { noopLogger, noopMetrics, type Logger, type Metrics } from "@dwk/log";
29
+ import type { FetchLike } from "./fetch";
30
+ import { MicrosubLogEvent } from "./log";
31
+
32
+ /** Default cap on redirect hops before a fetch is abandoned. */
33
+ export const DEFAULT_MAX_REDIRECTS = 5;
34
+ /** Default overall timeout (ms) bounding a fetch, redirects included. */
35
+ export const DEFAULT_TIMEOUT_MS = 10_000;
36
+
37
+ /** HTTP status codes that carry a `Location` we may follow. */
38
+ const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
39
+
40
+ /** Machine-readable cause of an {@link SsrfError}. */
41
+ export type SsrfReason =
42
+ | "invalid_url"
43
+ | "disallowed_scheme"
44
+ | "blocked_host"
45
+ | "too_many_redirects";
46
+
47
+ /**
48
+ * Raised when a request is refused on SSRF grounds (blocked host, disallowed
49
+ * scheme, or too many redirects). Callers catch this exactly like a network
50
+ * failure — a blocked attempt looks the same as an unreachable host — but
51
+ * {@link safeFetch} logs it first (event `microsub.ssrf.blocked`) so the single
52
+ * most security-relevant event in the package still produces a signal.
53
+ */
54
+ export class SsrfError extends Error {
55
+ readonly reason: SsrfReason;
56
+ readonly host?: string;
57
+ constructor(message: string, reason: SsrfReason, host?: string) {
58
+ super(message);
59
+ this.name = "SsrfError";
60
+ this.reason = reason;
61
+ this.host = host;
62
+ }
63
+ }
64
+
65
+ /** Parse a canonical dotted-decimal IPv4 host into its four octets. */
66
+ function parseIPv4(host: string): [number, number, number, number] | null {
67
+ const match = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/.exec(host);
68
+ if (match === null) return null;
69
+ const octets: number[] = [];
70
+ for (let group = 1; group <= 4; group++) {
71
+ const part = match[group];
72
+ if (part === undefined) return null;
73
+ const octet = Number.parseInt(part, 10);
74
+ if (octet > 255) return null;
75
+ octets.push(octet);
76
+ }
77
+ return octets as [number, number, number, number];
78
+ }
79
+
80
+ /**
81
+ * True when `octets` falls in a range that must never be fetched from inside
82
+ * the Worker's network: this-network, loopback, link-local (incl. the cloud
83
+ * metadata IP), the RFC 1918 private blocks, CGNAT, IETF protocol/benchmark
84
+ * assignments, and the multicast/reserved/broadcast space.
85
+ */
86
+ function isPrivateIPv4(octets: [number, number, number, number]): boolean {
87
+ const [a, b, c] = octets;
88
+ if (a === 0) return true; // 0.0.0.0/8 ("this network", incl. 0.0.0.0)
89
+ if (a === 10) return true; // 10.0.0.0/8 private
90
+ if (a === 127) return true; // 127.0.0.0/8 loopback
91
+ if (a === 100 && b >= 64 && b <= 127) return true; // 100.64.0.0/10 CGNAT
92
+ if (a === 169 && b === 254) return true; // 169.254.0.0/16 link-local (metadata)
93
+ if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12 private
94
+ if (a === 192 && b === 0 && c === 0) return true; // 192.0.0.0/24 IETF protocol
95
+ if (a === 192 && b === 0 && c === 2) return true; // 192.0.2.0/24 TEST-NET-1
96
+ if (a === 192 && b === 168) return true; // 192.168.0.0/16 private
97
+ if (a === 198 && b === 51 && c === 100) return true; // 198.51.100.0/24 TEST-NET-2
98
+ if (a === 198 && (b === 18 || b === 19)) return true; // 198.18.0.0/15 benchmark
99
+ if (a === 203 && b === 0 && c === 113) return true; // 203.0.113.0/24 TEST-NET-3
100
+ if (a >= 224) return true; // 224.0.0.0/4 multicast + 240.0.0.0/4 reserved + broadcast
101
+ return false;
102
+ }
103
+
104
+ /**
105
+ * Parse an IPv6 host (brackets already stripped) into its eight 16-bit groups,
106
+ * expanding `::` compression and any trailing embedded IPv4 literal. Returns
107
+ * `null` when `host` is not a valid IPv6 address.
108
+ */
109
+ function parseIPv6(host: string): number[] | null {
110
+ if (!host.includes(":")) return null;
111
+ let str = host;
112
+
113
+ const v4Match = /(?:^|:)((?:\d{1,3}\.){3}\d{1,3})$/.exec(str);
114
+ const v4Str = v4Match?.[1];
115
+ if (v4Str !== undefined) {
116
+ const v4 = parseIPv4(v4Str);
117
+ if (v4 === null) return null;
118
+ const hi = ((v4[0] << 8) | v4[1]).toString(16);
119
+ const lo = ((v4[2] << 8) | v4[3]).toString(16);
120
+ str = `${str.slice(0, str.length - v4Str.length)}${hi}:${lo}`;
121
+ }
122
+
123
+ if (str.indexOf("::") !== str.lastIndexOf("::")) return null;
124
+
125
+ const toGroups = (part: string): number[] | null => {
126
+ if (part === "") return [];
127
+ const groups: number[] = [];
128
+ for (const token of part.split(":")) {
129
+ if (!/^[0-9a-fA-F]{1,4}$/.test(token)) return null;
130
+ groups.push(Number.parseInt(token, 16));
131
+ }
132
+ return groups;
133
+ };
134
+
135
+ if (str.includes("::")) {
136
+ const parts = str.split("::");
137
+ const left = toGroups(parts[0] ?? "");
138
+ const right = toGroups(parts[1] ?? "");
139
+ if (left === null || right === null) return null;
140
+ const missing = 8 - left.length - right.length;
141
+ if (missing < 1) return null;
142
+ return [...left, ...new Array<number>(missing).fill(0), ...right];
143
+ }
144
+
145
+ const all = toGroups(str);
146
+ if (all === null || all.length !== 8) return null;
147
+ return all;
148
+ }
149
+
150
+ /**
151
+ * True when `groups` (eight 16-bit values) is an IPv6 address that must never
152
+ * be fetched: unspecified, loopback, link-local, site-local, unique-local,
153
+ * multicast, the documentation prefix, or an address that embeds a private
154
+ * IPv4.
155
+ */
156
+ function isPrivateIPv6(groups: number[]): boolean {
157
+ const first = groups[0] ?? 0;
158
+ const g6 = groups[6] ?? 0;
159
+ const g7 = groups[7] ?? 0;
160
+ if (groups.every((group) => group === 0)) return true; // :: unspecified
161
+ if (groups.slice(0, 7).every((group) => group === 0) && g7 === 1) return true; // ::1 loopback
162
+ if ((first & 0xffc0) === 0xfe80) return true; // fe80::/10 link-local
163
+ if ((first & 0xffc0) === 0xfec0) return true; // fec0::/10 site-local (deprecated)
164
+ if ((first & 0xfe00) === 0xfc00) return true; // fc00::/7 unique local
165
+ if ((first & 0xff00) === 0xff00) return true; // ff00::/8 multicast
166
+ if (first === 0x2001 && groups[1] === 0x0db8) return true; // 2001:db8::/32 documentation
167
+
168
+ const embeddedV4: [number, number, number, number] = [
169
+ g6 >> 8,
170
+ g6 & 0xff,
171
+ g7 >> 8,
172
+ g7 & 0xff,
173
+ ];
174
+ if (
175
+ groups.slice(0, 5).every((group) => group === 0) &&
176
+ (groups[5] === 0xffff || groups[5] === 0x0000)
177
+ ) {
178
+ return isPrivateIPv4(embeddedV4);
179
+ }
180
+ if (
181
+ first === 0x0064 &&
182
+ groups[1] === 0xff9b &&
183
+ groups.slice(2, 6).every((group) => group === 0)
184
+ ) {
185
+ return isPrivateIPv4(embeddedV4);
186
+ }
187
+ return false;
188
+ }
189
+
190
+ /** Hostnames (non-IP) that are never public and must never be fetched. */
191
+ function isBlockedHostname(host: string): boolean {
192
+ const lower = host.toLowerCase();
193
+ return (
194
+ lower === "localhost" ||
195
+ lower.endsWith(".localhost") ||
196
+ lower.endsWith(".local") ||
197
+ lower.endsWith(".internal")
198
+ );
199
+ }
200
+
201
+ /**
202
+ * Decide whether a URL host is private, loopback, link-local, or otherwise
203
+ * not safe to fetch from inside the Worker's network. Accepts the raw
204
+ * `URL.hostname` form (IPv6 hosts may arrive wrapped in `[...]`).
205
+ */
206
+ export function isPrivateOrReservedHost(hostname: string): boolean {
207
+ if (hostname === "") return true;
208
+ const host = (
209
+ hostname.startsWith("[") && hostname.endsWith("]")
210
+ ? hostname.slice(1, -1)
211
+ : hostname
212
+ ).replace(/\.$/, "");
213
+
214
+ const v4 = parseIPv4(host);
215
+ if (v4 !== null) return isPrivateIPv4(v4);
216
+ const v6 = parseIPv6(host);
217
+ if (v6 !== null) return isPrivateIPv6(v6);
218
+ return isBlockedHostname(host);
219
+ }
220
+
221
+ /**
222
+ * Validate that `rawUrl` is a fetchable public `http(s)` URL, returning the
223
+ * parsed {@link URL}. Throws {@link SsrfError} for an unparseable URL, a
224
+ * non-`http(s)` scheme (e.g. `file:`, `javascript:`), or a private/reserved
225
+ * host.
226
+ */
227
+ export function assertPublicUrl(rawUrl: string): URL {
228
+ let url: URL;
229
+ try {
230
+ url = new URL(rawUrl);
231
+ } catch {
232
+ throw new SsrfError(`invalid URL: ${rawUrl}`, "invalid_url");
233
+ }
234
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
235
+ throw new SsrfError(
236
+ `disallowed scheme: ${url.protocol}`,
237
+ "disallowed_scheme",
238
+ url.hostname,
239
+ );
240
+ }
241
+ if (isPrivateOrReservedHost(url.hostname)) {
242
+ throw new SsrfError(
243
+ `blocked host: ${url.hostname}`,
244
+ "blocked_host",
245
+ url.hostname,
246
+ );
247
+ }
248
+ return url;
249
+ }
250
+
251
+ /** Tunables for {@link safeFetch}. */
252
+ export interface SafeFetchOptions {
253
+ readonly maxRedirects?: number;
254
+ readonly timeoutMs?: number;
255
+ readonly logger?: Logger;
256
+ readonly metrics?: Metrics;
257
+ }
258
+
259
+ /** A completed {@link safeFetch}: the final response and the URL it came from. */
260
+ export interface SafeFetchResult {
261
+ readonly response: Response;
262
+ readonly url: string;
263
+ }
264
+
265
+ /**
266
+ * Fetch `rawUrl` through `doFetch` with SSRF guardrails.
267
+ *
268
+ * The initial host and every redirect target are validated with
269
+ * {@link assertPublicUrl}; redirects are followed manually (`redirect:
270
+ * "manual"`) up to `maxRedirects` hops; and a single {@link AbortSignal.timeout}
271
+ * bounds the whole chain. Credential-bearing headers are stripped on a
272
+ * cross-origin redirect, matching what a browser's `fetch` does.
273
+ *
274
+ * @throws {SsrfError} when a host is blocked, a scheme is disallowed, or the
275
+ * redirect cap is exceeded. Other failures (network, timeout) propagate as the
276
+ * underlying fetch rejection. Callers treat any throw as "fetch failed".
277
+ */
278
+ export async function safeFetch(
279
+ doFetch: FetchLike,
280
+ rawUrl: string,
281
+ init: RequestInit,
282
+ options?: SafeFetchOptions,
283
+ ): Promise<SafeFetchResult> {
284
+ const maxRedirects = options?.maxRedirects ?? DEFAULT_MAX_REDIRECTS;
285
+ const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
286
+ const logger = options?.logger ?? noopLogger;
287
+ const metrics = options?.metrics ?? noopMetrics;
288
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
289
+ const signal =
290
+ init.signal != null
291
+ ? AbortSignal.any([init.signal, timeoutSignal])
292
+ : timeoutSignal;
293
+
294
+ try {
295
+ let currentUrl = assertPublicUrl(rawUrl).toString();
296
+ let currentInit: RequestInit = { ...init };
297
+ for (let hop = 0; ; hop++) {
298
+ const response = await doFetch(currentUrl, {
299
+ ...currentInit,
300
+ redirect: "manual",
301
+ signal,
302
+ });
303
+
304
+ if (!REDIRECT_STATUSES.has(response.status)) {
305
+ return { response, url: currentUrl };
306
+ }
307
+
308
+ const location = response.headers.get("location");
309
+ if (location === null || location === "") {
310
+ return { response, url: currentUrl };
311
+ }
312
+ if (hop >= maxRedirects) {
313
+ throw new SsrfError(
314
+ `too many redirects (> ${maxRedirects})`,
315
+ "too_many_redirects",
316
+ new URL(currentUrl).host,
317
+ );
318
+ }
319
+
320
+ const next = assertPublicUrl(new URL(location, currentUrl).toString());
321
+ await response.body?.cancel().catch(() => undefined);
322
+
323
+ if (currentInit.headers && new URL(currentUrl).origin !== next.origin) {
324
+ const headers = new Headers(currentInit.headers as HeadersInit);
325
+ for (const name of [
326
+ "authorization",
327
+ "cookie",
328
+ "cookie2",
329
+ "proxy-authorization",
330
+ "set-cookie",
331
+ ]) {
332
+ headers.delete(name);
333
+ }
334
+ currentInit = { ...currentInit, headers };
335
+ }
336
+ currentUrl = next.toString();
337
+ }
338
+ } catch (err) {
339
+ if (err instanceof SsrfError) {
340
+ const fields = { reason: err.reason, host: err.host };
341
+ logger.warn(MicrosubLogEvent.SsrfBlocked, fields);
342
+ metrics.count(MicrosubLogEvent.SsrfBlocked, fields);
343
+ }
344
+ throw err;
345
+ }
346
+ }