@dwk/webmention 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +140 -0
  3. package/dist/discovery.d.ts +43 -0
  4. package/dist/discovery.d.ts.map +1 -0
  5. package/dist/discovery.js +128 -0
  6. package/dist/discovery.js.map +1 -0
  7. package/dist/fetch.d.ts +28 -0
  8. package/dist/fetch.d.ts.map +1 -0
  9. package/dist/fetch.js +73 -0
  10. package/dist/fetch.js.map +1 -0
  11. package/dist/html.d.ts +68 -0
  12. package/dist/html.d.ts.map +1 -0
  13. package/dist/html.js +183 -0
  14. package/dist/html.js.map +1 -0
  15. package/dist/inbox.d.ts +41 -0
  16. package/dist/inbox.d.ts.map +1 -0
  17. package/dist/inbox.js +73 -0
  18. package/dist/inbox.js.map +1 -0
  19. package/dist/index.d.ts +96 -0
  20. package/dist/index.d.ts.map +1 -0
  21. package/dist/index.js +161 -0
  22. package/dist/index.js.map +1 -0
  23. package/dist/log.d.ts +42 -0
  24. package/dist/log.d.ts.map +1 -0
  25. package/dist/log.js +40 -0
  26. package/dist/log.js.map +1 -0
  27. package/dist/safe-fetch.d.ts +101 -0
  28. package/dist/safe-fetch.d.ts.map +1 -0
  29. package/dist/safe-fetch.js +348 -0
  30. package/dist/safe-fetch.js.map +1 -0
  31. package/dist/sender.d.ts +43 -0
  32. package/dist/sender.d.ts.map +1 -0
  33. package/dist/sender.js +80 -0
  34. package/dist/sender.js.map +1 -0
  35. package/dist/validate.d.ts +47 -0
  36. package/dist/validate.d.ts.map +1 -0
  37. package/dist/validate.js +76 -0
  38. package/dist/validate.js.map +1 -0
  39. package/dist/verify.d.ts +61 -0
  40. package/dist/verify.d.ts.map +1 -0
  41. package/dist/verify.js +216 -0
  42. package/dist/verify.js.map +1 -0
  43. package/package.json +45 -0
  44. package/src/discovery.ts +167 -0
  45. package/src/fetch.ts +84 -0
  46. package/src/html.ts +206 -0
  47. package/src/inbox.ts +121 -0
  48. package/src/index.ts +297 -0
  49. package/src/log.ts +44 -0
  50. package/src/safe-fetch.ts +405 -0
  51. package/src/sender.ts +131 -0
  52. package/src/validate.ts +116 -0
  53. package/src/verify.ts +294 -0
package/src/fetch.ts ADDED
@@ -0,0 +1,84 @@
1
+ /**
2
+ * `@dwk/webmention` — injectable `fetch` type.
3
+ *
4
+ * Endpoint discovery, source verification, and sending all perform HTTP I/O.
5
+ * They accept a {@link FetchLike} so callers can inject a stub in tests (no
6
+ * network) and so the package never reaches for a global it didn't receive.
7
+ *
8
+ * @packageDocumentation
9
+ */
10
+
11
+ /** A minimal, injectable `fetch` signature. */
12
+ export type FetchLike = (
13
+ input: string,
14
+ init?: RequestInit,
15
+ ) => Promise<Response>;
16
+
17
+ /**
18
+ * Default cap on a fetched document body (2 MB). Discovery and verification
19
+ * only need to scan markup for links; a larger body is almost certainly hostile
20
+ * or irrelevant, and buffering it would risk an OOM (the Worker memory limit is
21
+ * 128 MB). See `spec/non-functional-requirements.md`.
22
+ */
23
+ export const MAX_BODY_BYTES = 2 * 1024 * 1024;
24
+
25
+ /**
26
+ * Read a response body as text, refusing bodies larger than `maxBytes`.
27
+ *
28
+ * A declared `Content-Length` over the cap is rejected up front; the stream is
29
+ * then read incrementally and aborted the moment the cap is exceeded, so a
30
+ * missing or lying `Content-Length` cannot force the whole body into memory.
31
+ * Returns `null` when the body is too large or cannot be read.
32
+ */
33
+ export async function readBodyCapped(
34
+ response: Response,
35
+ maxBytes = MAX_BODY_BYTES,
36
+ ): Promise<string | null> {
37
+ const declared = response.headers.get("content-length");
38
+ if (declared !== null) {
39
+ const length = Number.parseInt(declared, 10);
40
+ if (Number.isFinite(length) && length > maxBytes) {
41
+ return null;
42
+ }
43
+ }
44
+
45
+ const body = response.body;
46
+ if (body === null) {
47
+ try {
48
+ const text = await response.text();
49
+ return text.length > maxBytes ? null : text;
50
+ } catch {
51
+ return null;
52
+ }
53
+ }
54
+
55
+ const reader = body.getReader();
56
+ const chunks: Uint8Array[] = [];
57
+ let total = 0;
58
+ try {
59
+ for (;;) {
60
+ const { done, value } = await reader.read();
61
+ if (done) {
62
+ break;
63
+ }
64
+ if (value !== undefined) {
65
+ total += value.byteLength;
66
+ if (total > maxBytes) {
67
+ await reader.cancel();
68
+ return null;
69
+ }
70
+ chunks.push(value);
71
+ }
72
+ }
73
+ } catch {
74
+ return null;
75
+ }
76
+
77
+ const merged = new Uint8Array(total);
78
+ let offset = 0;
79
+ for (const chunk of chunks) {
80
+ merged.set(chunk, offset);
81
+ offset += chunk.byteLength;
82
+ }
83
+ return new TextDecoder().decode(merged);
84
+ }
package/src/html.ts ADDED
@@ -0,0 +1,206 @@
1
+ /**
2
+ * `@dwk/webmention` — HTML / `Link`-header parsing helpers.
3
+ *
4
+ * Shared by endpoint discovery (sender) and source verification (receiver).
5
+ * `Link`-header parsing is plain string scanning; HTML scanning uses the
6
+ * Workers runtime's streaming `HTMLRewriter` rather than regex tag matching, so
7
+ * it handles comments, attribute quoting, and malformed markup correctly
8
+ * without pulling a parser into the bundle (`HTMLRewriter` is built into the
9
+ * runtime — zero script-size cost; see `spec/non-functional-requirements.md`).
10
+ *
11
+ * Because `HTMLRewriter` is a `workerd` global, the HTML scanners are async and
12
+ * exercised under the Workers test pool, not bare Node.
13
+ *
14
+ * @packageDocumentation
15
+ */
16
+
17
+ /** A parsed `Link` header entry: its target URI and `rel` tokens. */
18
+ export interface LinkHeaderEntry {
19
+ readonly uri: string;
20
+ readonly rels: readonly string[];
21
+ }
22
+
23
+ /**
24
+ * Parse an HTTP `Link` header into entries. Handles multiple comma-separated
25
+ * links and semicolon-separated parameters, e.g.
26
+ * `<https://a.example/webmention>; rel="webmention"`.
27
+ *
28
+ * Entries with no `rel` parameter are dropped; an empty URI (`<>`) is kept so
29
+ * the caller can resolve it against the document URL (a Webmention endpoint
30
+ * advertised at the page itself).
31
+ */
32
+ export function parseLinkHeader(value: string | null): LinkHeaderEntry[] {
33
+ if (value === null || value.trim() === "") {
34
+ return [];
35
+ }
36
+ const entries: LinkHeaderEntry[] = [];
37
+ for (const part of splitLinks(value)) {
38
+ const match = /^\s*<([^>]*)>\s*(.*)$/.exec(part);
39
+ if (match === null) {
40
+ continue;
41
+ }
42
+ const uri = match[1] ?? "";
43
+ const rels = splitTokens(extractRel(match[2] ?? ""));
44
+ if (rels.length > 0) {
45
+ entries.push({ uri, rels });
46
+ }
47
+ }
48
+ return entries;
49
+ }
50
+
51
+ /**
52
+ * Split a `Link` header on top-level commas, respecting both the angle-bracket
53
+ * URI reference and double-quoted parameter values — so a comma inside
54
+ * `title="A, B"` or inside `<…>` does not split the entry.
55
+ */
56
+ function splitLinks(value: string): string[] {
57
+ const result: string[] = [];
58
+ let depth = 0;
59
+ let inQuotes = false;
60
+ let current = "";
61
+ for (let i = 0; i < value.length; i++) {
62
+ const char = value[i] as string;
63
+ if (char === '"' && value[i - 1] !== "\\") {
64
+ inQuotes = !inQuotes;
65
+ } else if (!inQuotes && char === "<") {
66
+ depth++;
67
+ } else if (!inQuotes && char === ">") {
68
+ depth--;
69
+ }
70
+ if (char === "," && depth === 0 && !inQuotes) {
71
+ result.push(current);
72
+ current = "";
73
+ } else {
74
+ current += char;
75
+ }
76
+ }
77
+ if (current.trim() !== "") {
78
+ result.push(current);
79
+ }
80
+ return result;
81
+ }
82
+
83
+ /**
84
+ * Split a `Link` entry's parameter string on top-level semicolons, respecting
85
+ * double-quoted values so a `;` inside a quoted value does not split a param.
86
+ */
87
+ function splitParams(paramString: string): string[] {
88
+ const result: string[] = [];
89
+ let inQuotes = false;
90
+ let current = "";
91
+ for (let i = 0; i < paramString.length; i++) {
92
+ const char = paramString[i] as string;
93
+ if (char === '"' && paramString[i - 1] !== "\\") {
94
+ inQuotes = !inQuotes;
95
+ }
96
+ if (char === ";" && !inQuotes) {
97
+ result.push(current);
98
+ current = "";
99
+ } else {
100
+ current += char;
101
+ }
102
+ }
103
+ if (current.trim() !== "") {
104
+ result.push(current);
105
+ }
106
+ return result;
107
+ }
108
+
109
+ /**
110
+ * Extract the `rel` parameter from a `Link` entry's parameter string. Matches
111
+ * the `rel` parameter exactly (per-parameter), so a `rel=` substring inside
112
+ * another parameter's quoted value (e.g. `title="my rel=x"`) is not mistaken
113
+ * for it.
114
+ */
115
+ function extractRel(paramString: string): string | null {
116
+ for (const param of splitParams(paramString)) {
117
+ const match = /^\s*rel\s*=\s*("([^"]*)"|'([^']*)'|[^;\s]+)\s*$/i.exec(
118
+ param,
119
+ );
120
+ if (match !== null) {
121
+ return match[2] ?? match[3] ?? match[1] ?? null;
122
+ }
123
+ }
124
+ return null;
125
+ }
126
+
127
+ /**
128
+ * Whether a `Content-Type` value names an HTML document (`text/html` or
129
+ * `application/xhtml+xml`). Compares the media type's essence — the part before
130
+ * any `;` parameters — case-insensitively, so `text/html; charset=utf-8`
131
+ * matches but an unrelated type carrying `text/html` inside a parameter does
132
+ * not.
133
+ */
134
+ export function isHtmlContentType(contentType: string): boolean {
135
+ const essence = contentType.split(";")[0]?.trim().toLowerCase() ?? "";
136
+ return essence === "text/html" || essence === "application/xhtml+xml";
137
+ }
138
+
139
+ /**
140
+ * Whether a `Content-Type` value names a JSON document (`application/json` or a
141
+ * `+json`-suffixed type such as `application/activity+json`). Compares the media
142
+ * type's essence — the part before any `;` parameters — case-insensitively.
143
+ */
144
+ export function isJsonContentType(contentType: string): boolean {
145
+ const essence = contentType.split(";")[0]?.trim().toLowerCase() ?? "";
146
+ return essence === "application/json" || essence.endsWith("+json");
147
+ }
148
+
149
+ /** Split a whitespace-separated token list (e.g. a `rel` value) into tokens. */
150
+ export function splitTokens(value: string | null): string[] {
151
+ if (value === null) {
152
+ return [];
153
+ }
154
+ return value
155
+ .trim()
156
+ .split(/\s+/)
157
+ .filter((token) => token !== "");
158
+ }
159
+
160
+ /** Resolve `uri` against `base`, returning a normalized absolute URL or `null`. */
161
+ export function resolveUrl(uri: string, base: string): string | null {
162
+ try {
163
+ return new URL(uri, base).toString();
164
+ } catch {
165
+ return null;
166
+ }
167
+ }
168
+
169
+ /** An element seen by {@link scanElements}: its (lowercased) tag name and the requested attributes. */
170
+ export interface ScannedElement {
171
+ /** Lowercased tag name, e.g. `"a"`, `"link"`, `"base"`. */
172
+ readonly name: string;
173
+ /** Requested attribute values; `null` when the attribute is absent, `""` when present but empty. */
174
+ readonly attrs: Readonly<Record<string, string | null>>;
175
+ }
176
+
177
+ /**
178
+ * Scan `html` with the runtime's streaming `HTMLRewriter`, returning — in
179
+ * document order — every element matching `selector` together with the
180
+ * requested attribute values.
181
+ *
182
+ * Using a real tokenizer (rather than regex) means elements inside comments are
183
+ * never reported (the parser treats comment contents as text, satisfying
184
+ * webmention.rocks discovery test 13), attribute quoting is handled correctly,
185
+ * and a `data-href` attribute is never mistaken for `href`. An absent attribute
186
+ * is reported as `null`; a present-but-empty one (`href=""`) as `""`.
187
+ */
188
+ export async function scanElements(
189
+ html: string,
190
+ selector: string,
191
+ attrNames: readonly string[],
192
+ ): Promise<ScannedElement[]> {
193
+ const elements: ScannedElement[] = [];
194
+ const rewriter = new HTMLRewriter().on(selector, {
195
+ element(el) {
196
+ const attrs: Record<string, string | null> = {};
197
+ for (const name of attrNames) {
198
+ attrs[name] = el.getAttribute(name);
199
+ }
200
+ elements.push({ name: el.tagName, attrs });
201
+ },
202
+ });
203
+ // Drive the parser to completion by consuming the transformed body.
204
+ await rewriter.transform(new Response(html)).text();
205
+ return elements;
206
+ }
package/src/inbox.ts ADDED
@@ -0,0 +1,121 @@
1
+ /**
2
+ * `@dwk/webmention` — inbox store.
3
+ *
4
+ * Verified mentions are persisted to an inbox so they can be surfaced on the
5
+ * target resource. The default is a D1-backed store (strongly consistent —
6
+ * never KV, per `spec/non-functional-requirements.md`); when composed into a
7
+ * Solid Pod, a caller can supply an {@link InboxStore} backed by the
8
+ * `@dwk/solid-pod` Durable Object instead. The store keys on the
9
+ * `(source, target)` pair so re-verifying a mention updates it in place and a
10
+ * source that drops the link can be removed. See `spec/packages/webmention.md`.
11
+ *
12
+ * @packageDocumentation
13
+ */
14
+
15
+ import type { D1Database } from "@cloudflare/workers-types";
16
+
17
+ /** A verified Webmention: `source` links to `target`, confirmed at `verifiedAt`. */
18
+ export interface VerifiedMention {
19
+ readonly source: string;
20
+ readonly target: string;
21
+ /** Verification time, epoch milliseconds. */
22
+ readonly verifiedAt: number;
23
+ }
24
+
25
+ /** Persistence surface for verified mentions. */
26
+ export interface InboxStore {
27
+ /** Upsert a verified mention, keyed on `(source, target)`. */
28
+ store(mention: VerifiedMention): Promise<void>;
29
+ /** Remove a mention (e.g. the source dropped the link); no-op when absent. */
30
+ remove(source: string, target: string): Promise<void>;
31
+ /** List mentions, newest first; scoped to `target` when given. */
32
+ list(target?: string): Promise<VerifiedMention[]>;
33
+ }
34
+
35
+ /** Options for {@link createD1Inbox}. */
36
+ export interface D1InboxOptions {
37
+ /** Table name to use; created if absent. Defaults to `webmentions`. */
38
+ readonly table?: string;
39
+ }
40
+
41
+ interface MentionRow {
42
+ readonly source: string;
43
+ readonly target: string;
44
+ readonly verified_at: number;
45
+ }
46
+
47
+ /**
48
+ * Build a D1-backed {@link InboxStore}. The backing table is created on first
49
+ * use if it does not already exist.
50
+ */
51
+ export function createD1Inbox(
52
+ db: D1Database,
53
+ options?: D1InboxOptions,
54
+ ): InboxStore {
55
+ const table = options?.table ?? "webmentions";
56
+ // Guard the identifier: it is interpolated into DDL, so only allow a safe
57
+ // set of characters rather than trusting the caller blindly.
58
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(table)) {
59
+ throw new Error(`@dwk/webmention: invalid inbox table name "${table}".`);
60
+ }
61
+
62
+ let ready: Promise<void> | null = null;
63
+ const ensureSchema = (): Promise<void> => {
64
+ ready ??= db
65
+ .prepare(
66
+ `CREATE TABLE IF NOT EXISTS ${table} (` +
67
+ `source TEXT NOT NULL, ` +
68
+ `target TEXT NOT NULL, ` +
69
+ `verified_at INTEGER NOT NULL, ` +
70
+ `PRIMARY KEY (source, target))`,
71
+ )
72
+ .run()
73
+ .then(() => undefined);
74
+ return ready;
75
+ };
76
+
77
+ return {
78
+ async store(mention) {
79
+ await ensureSchema();
80
+ await db
81
+ .prepare(
82
+ `INSERT INTO ${table} (source, target, verified_at) ` +
83
+ `VALUES (?1, ?2, ?3) ` +
84
+ `ON CONFLICT (source, target) ` +
85
+ `DO UPDATE SET verified_at = excluded.verified_at`,
86
+ )
87
+ .bind(mention.source, mention.target, mention.verifiedAt)
88
+ .run();
89
+ },
90
+
91
+ async remove(source, target) {
92
+ await ensureSchema();
93
+ await db
94
+ .prepare(`DELETE FROM ${table} WHERE source = ?1 AND target = ?2`)
95
+ .bind(source, target)
96
+ .run();
97
+ },
98
+
99
+ async list(target) {
100
+ await ensureSchema();
101
+ const statement =
102
+ target === undefined
103
+ ? db.prepare(
104
+ `SELECT source, target, verified_at FROM ${table} ` +
105
+ `ORDER BY verified_at DESC`,
106
+ )
107
+ : db
108
+ .prepare(
109
+ `SELECT source, target, verified_at FROM ${table} ` +
110
+ `WHERE target = ?1 ORDER BY verified_at DESC`,
111
+ )
112
+ .bind(target);
113
+ const { results } = await statement.all<MentionRow>();
114
+ return results.map((row) => ({
115
+ source: row.source,
116
+ target: row.target,
117
+ verifiedAt: row.verified_at,
118
+ }));
119
+ },
120
+ };
121
+ }
package/src/index.ts ADDED
@@ -0,0 +1,297 @@
1
+ /**
2
+ * `@dwk/webmention` — Webmention (W3C) receiver + sender.
3
+ *
4
+ * Endpoint package. The receiver validates `source`/`target` synchronously,
5
+ * returns `202 Accepted`, and enqueues the pair for asynchronous link
6
+ * verification; the queue consumer fetches the source, confirms it links to the
7
+ * target, and persists (or removes) the mention in an inbox. The sender
8
+ * discovers a target's Webmention endpoint and notifies it on publish. Cloud
9
+ * specifics (Queue, D1) are confined here; HTML scanning uses the runtime's
10
+ * streaming `HTMLRewriter`, so the parsing/verification helpers are async and
11
+ * exercised under the Workers test pool.
12
+ *
13
+ * @see spec/packages/webmention.md
14
+ * @packageDocumentation
15
+ */
16
+
17
+ import type {
18
+ D1Database,
19
+ ExecutionContext,
20
+ MessageBatch,
21
+ Queue,
22
+ } from "@cloudflare/workers-types";
23
+ import {
24
+ hostFromUrl,
25
+ noopLogger,
26
+ noopMetrics,
27
+ type Logger,
28
+ type Metrics,
29
+ } from "@dwk/log";
30
+ import { createD1Inbox, type InboxStore } from "./inbox";
31
+ import type { FetchLike } from "./fetch";
32
+ import { WebmentionLogEvent } from "./log";
33
+ import { validateWebmentionParams } from "./validate";
34
+ import { verifySource } from "./verify";
35
+
36
+ export {
37
+ validateWebmentionParams,
38
+ type ValidateParams,
39
+ type ValidationResult,
40
+ type WebmentionValidationError,
41
+ } from "./validate";
42
+ export {
43
+ discoverEndpoint,
44
+ findWebmentionEndpoint,
45
+ type DiscoverOptions,
46
+ } from "./discovery";
47
+ export {
48
+ sendWebmention,
49
+ sendWebmentions,
50
+ type SendOptions,
51
+ type SendResult,
52
+ } from "./sender";
53
+ export {
54
+ verifySource,
55
+ sourceLinksTo,
56
+ extractLinks,
57
+ type VerifyOptions,
58
+ type VerifyResult,
59
+ } from "./verify";
60
+ export {
61
+ createD1Inbox,
62
+ type InboxStore,
63
+ type VerifiedMention,
64
+ type D1InboxOptions,
65
+ } from "./inbox";
66
+ export type { FetchLike } from "./fetch";
67
+ export {
68
+ safeFetch,
69
+ assertPublicUrl,
70
+ isPrivateOrReservedHost,
71
+ SsrfError,
72
+ DEFAULT_MAX_REDIRECTS,
73
+ DEFAULT_TIMEOUT_MS,
74
+ type SafeFetchOptions,
75
+ type SafeFetchResult,
76
+ type SsrfReason,
77
+ } from "./safe-fetch";
78
+ export { WebmentionLogEvent } from "./log";
79
+ export type { Logger, Metrics } from "@dwk/log";
80
+
81
+ /** A queued verification job: confirm that `source` links to `target`. */
82
+ export interface WebmentionJob {
83
+ readonly source: string;
84
+ readonly target: string;
85
+ }
86
+
87
+ /** Cloudflare bindings required by the Webmention handler and queue consumer. */
88
+ export interface WebmentionEnv {
89
+ /** Queue producer for async verification of received mentions. */
90
+ readonly WEBMENTION_QUEUE: Queue<WebmentionJob>;
91
+ /**
92
+ * D1 database backing the default inbox. Optional only when an
93
+ * {@link InboxStore} is supplied via {@link WebmentionConfig.inbox} (e.g. a
94
+ * Solid Pod DO-backed inbox).
95
+ */
96
+ readonly WEBMENTION_INBOX?: D1Database;
97
+ }
98
+
99
+ /** Configuration passed to {@link createWebmention}. */
100
+ export interface WebmentionConfig {
101
+ /** Base URL of this receiver; a `target` must live under its origin. */
102
+ readonly baseUrl: string;
103
+ /** Additional controlled hostnames besides `baseUrl`'s. */
104
+ readonly allowedHosts?: readonly string[];
105
+ /**
106
+ * Inbox store for verified mentions. Defaults to a D1 store built from
107
+ * {@link WebmentionEnv.WEBMENTION_INBOX}; supply one to back the inbox with
108
+ * the `@dwk/solid-pod` Durable Object instead.
109
+ */
110
+ readonly inbox?: InboxStore;
111
+ /** `fetch` implementation for verification; defaults to the global `fetch`. */
112
+ readonly fetch?: FetchLike;
113
+ /**
114
+ * Logger for receiver/queue events; defaults to a no-op. Wire a real logger
115
+ * (see `@dwk/log`) to surface SSRF blocks, validation rejections, and
116
+ * poison-message retries instead of swallowing them.
117
+ */
118
+ readonly logger?: Logger;
119
+ /**
120
+ * Metrics sink for receiver/queue counters; defaults to a no-op. Wire an
121
+ * adapter (e.g. `analyticsEngineMetrics` from `@dwk/log`, bound to an
122
+ * `AnalyticsEngineDataset`) to chart the same events the logger names —
123
+ * "SSRF blocks/min", "verification success rate", "queue retries by reason".
124
+ */
125
+ readonly metrics?: Metrics;
126
+ }
127
+
128
+ /** A `fetch`-compatible Worker handler. */
129
+ export type WebmentionHandler = (
130
+ request: Request,
131
+ env: WebmentionEnv,
132
+ ctx: ExecutionContext,
133
+ ) => Promise<Response>;
134
+
135
+ /** A Queue consumer for asynchronous Webmention verification. */
136
+ export type WebmentionQueueConsumer = (
137
+ batch: MessageBatch<WebmentionJob>,
138
+ env: WebmentionEnv,
139
+ ctx: ExecutionContext,
140
+ ) => Promise<void>;
141
+
142
+ function textResponse(status: number, body: string): Response {
143
+ return new Response(body, {
144
+ status,
145
+ headers: { "content-type": "text/plain; charset=utf-8" },
146
+ });
147
+ }
148
+
149
+ function resolveInbox(
150
+ config: WebmentionConfig,
151
+ env: WebmentionEnv,
152
+ ): InboxStore {
153
+ if (config.inbox !== undefined) {
154
+ return config.inbox;
155
+ }
156
+ if (env.WEBMENTION_INBOX !== undefined) {
157
+ return createD1Inbox(env.WEBMENTION_INBOX);
158
+ }
159
+ throw new Error(
160
+ "@dwk/webmention: no inbox configured — provide config.inbox or bind " +
161
+ "WEBMENTION_INBOX (D1).",
162
+ );
163
+ }
164
+
165
+ function formValue(value: string | File | null): string | null {
166
+ return typeof value === "string" ? value : null;
167
+ }
168
+
169
+ /**
170
+ * Whether the request body is `application/x-www-form-urlencoded` — the encoding
171
+ * Webmention §3.1.3 requires. `Request.formData()` would also accept
172
+ * `multipart/form-data`, so the essence is checked up front rather than relying
173
+ * on it.
174
+ */
175
+ function isFormUrlEncoded(contentType: string | null): boolean {
176
+ const essence = contentType?.split(";")[0]?.trim().toLowerCase() ?? "";
177
+ return essence === "application/x-www-form-urlencoded";
178
+ }
179
+
180
+ /**
181
+ * Build the Webmention receiver handler from configuration.
182
+ *
183
+ * The returned handler is mountable under any path prefix. It accepts a
184
+ * form-encoded `POST` (`source` + `target`), validates synchronously, enqueues
185
+ * the pair for verification, and returns `202 Accepted`. Invalid requests get
186
+ * `400`; other methods get `405`. Fails loudly if the required `WEBMENTION_QUEUE`
187
+ * binding is missing.
188
+ */
189
+ export function createWebmention(config: WebmentionConfig): WebmentionHandler {
190
+ const logger = config.logger ?? noopLogger;
191
+ const metrics = config.metrics ?? noopMetrics;
192
+ return async (request, env, _ctx) => {
193
+ if (request.method !== "POST") {
194
+ return new Response("Method Not Allowed", {
195
+ status: 405,
196
+ headers: { allow: "POST" },
197
+ });
198
+ }
199
+
200
+ if (env.WEBMENTION_QUEUE === undefined) {
201
+ throw new Error(
202
+ "@dwk/webmention: missing required binding WEBMENTION_QUEUE.",
203
+ );
204
+ }
205
+
206
+ if (!isFormUrlEncoded(request.headers.get("content-type"))) {
207
+ const fields = { reason: "invalid_content_type" as const };
208
+ logger.warn(WebmentionLogEvent.ReceiveRejected, fields);
209
+ metrics.count(WebmentionLogEvent.ReceiveRejected, fields);
210
+ return textResponse(
211
+ 400,
212
+ "invalid_request: Content-Type must be application/x-www-form-urlencoded",
213
+ );
214
+ }
215
+
216
+ let form: FormData;
217
+ try {
218
+ form = await request.formData();
219
+ } catch {
220
+ return textResponse(
221
+ 400,
222
+ "invalid_request: expected a form-encoded body with source and target",
223
+ );
224
+ }
225
+
226
+ const result = validateWebmentionParams({
227
+ source: formValue(form.get("source")),
228
+ target: formValue(form.get("target")),
229
+ baseUrl: config.baseUrl,
230
+ allowedHosts: config.allowedHosts,
231
+ });
232
+ if (!result.ok) {
233
+ const fields = { reason: result.error };
234
+ logger.warn(WebmentionLogEvent.ReceiveRejected, fields);
235
+ metrics.count(WebmentionLogEvent.ReceiveRejected, fields);
236
+ return textResponse(400, result.error);
237
+ }
238
+
239
+ await env.WEBMENTION_QUEUE.send({
240
+ source: result.source,
241
+ target: result.target,
242
+ });
243
+
244
+ const fields = {
245
+ sourceHost: hostFromUrl(result.source),
246
+ targetHost: hostFromUrl(result.target),
247
+ };
248
+ logger.info(WebmentionLogEvent.ReceiveAccepted, fields);
249
+ metrics.count(WebmentionLogEvent.ReceiveAccepted, fields);
250
+ return new Response(null, { status: 202 });
251
+ };
252
+ }
253
+
254
+ /**
255
+ * Build the Queue consumer that verifies received mentions.
256
+ *
257
+ * For each job it fetches the source and checks for a link to the target: a
258
+ * verified mention is upserted into the inbox, while a source that no longer
259
+ * links is removed. A job that throws is retried; otherwise it is acked. Fails
260
+ * loudly if no inbox is configured.
261
+ */
262
+ export function createWebmentionQueueConsumer(
263
+ config: WebmentionConfig,
264
+ ): WebmentionQueueConsumer {
265
+ const logger = config.logger ?? noopLogger;
266
+ const metrics = config.metrics ?? noopMetrics;
267
+ return async (batch, env, _ctx) => {
268
+ const inbox = resolveInbox(config, env);
269
+ for (const message of batch.messages) {
270
+ const { source, target } = message.body;
271
+ try {
272
+ const result = await verifySource(source, target, {
273
+ fetch: config.fetch,
274
+ logger,
275
+ metrics,
276
+ });
277
+ if (result.links) {
278
+ await inbox.store({ source, target, verifiedAt: Date.now() });
279
+ } else {
280
+ await inbox.remove(source, target);
281
+ }
282
+ message.ack();
283
+ } catch (err) {
284
+ // A poison message must not retry silently — record why so an operator
285
+ // can tell a transient failure from a wedged one.
286
+ const fields = {
287
+ sourceHost: hostFromUrl(source),
288
+ targetHost: hostFromUrl(target),
289
+ error: err instanceof Error ? err.name : "unknown",
290
+ };
291
+ logger.warn(WebmentionLogEvent.QueueRetry, fields);
292
+ metrics.count(WebmentionLogEvent.QueueRetry, fields);
293
+ message.retry();
294
+ }
295
+ }
296
+ };
297
+ }