@dwk/webmention 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +140 -0
  3. package/dist/discovery.d.ts +43 -0
  4. package/dist/discovery.d.ts.map +1 -0
  5. package/dist/discovery.js +128 -0
  6. package/dist/discovery.js.map +1 -0
  7. package/dist/fetch.d.ts +28 -0
  8. package/dist/fetch.d.ts.map +1 -0
  9. package/dist/fetch.js +73 -0
  10. package/dist/fetch.js.map +1 -0
  11. package/dist/html.d.ts +68 -0
  12. package/dist/html.d.ts.map +1 -0
  13. package/dist/html.js +183 -0
  14. package/dist/html.js.map +1 -0
  15. package/dist/inbox.d.ts +41 -0
  16. package/dist/inbox.d.ts.map +1 -0
  17. package/dist/inbox.js +73 -0
  18. package/dist/inbox.js.map +1 -0
  19. package/dist/index.d.ts +96 -0
  20. package/dist/index.d.ts.map +1 -0
  21. package/dist/index.js +161 -0
  22. package/dist/index.js.map +1 -0
  23. package/dist/log.d.ts +42 -0
  24. package/dist/log.d.ts.map +1 -0
  25. package/dist/log.js +40 -0
  26. package/dist/log.js.map +1 -0
  27. package/dist/safe-fetch.d.ts +101 -0
  28. package/dist/safe-fetch.d.ts.map +1 -0
  29. package/dist/safe-fetch.js +348 -0
  30. package/dist/safe-fetch.js.map +1 -0
  31. package/dist/sender.d.ts +43 -0
  32. package/dist/sender.d.ts.map +1 -0
  33. package/dist/sender.js +80 -0
  34. package/dist/sender.js.map +1 -0
  35. package/dist/validate.d.ts +47 -0
  36. package/dist/validate.d.ts.map +1 -0
  37. package/dist/validate.js +76 -0
  38. package/dist/validate.js.map +1 -0
  39. package/dist/verify.d.ts +61 -0
  40. package/dist/verify.d.ts.map +1 -0
  41. package/dist/verify.js +216 -0
  42. package/dist/verify.js.map +1 -0
  43. package/package.json +45 -0
  44. package/src/discovery.ts +167 -0
  45. package/src/fetch.ts +84 -0
  46. package/src/html.ts +206 -0
  47. package/src/inbox.ts +121 -0
  48. package/src/index.ts +297 -0
  49. package/src/log.ts +44 -0
  50. package/src/safe-fetch.ts +405 -0
  51. package/src/sender.ts +131 -0
  52. package/src/validate.ts +116 -0
  53. package/src/verify.ts +294 -0
package/LICENSE ADDED
@@ -0,0 +1,15 @@
1
+ ISC License
2
+
3
+ Copyright (c) 2026 David W. Keith
4
+
5
+ Permission to use, copy, modify, and/or distribute this software for any
6
+ purpose with or without fee is hereby granted, provided that the above
7
+ copyright notice and this permission notice appear in all copies.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,140 @@
1
+ # `@dwk/webmention`
2
+
3
+ > Webmention (W3C) receiver + sender. Endpoint package.
4
+
5
+ Part of the [`@dwk` IndieWeb + Solid cohort](../../README.md). See the
6
+ [package specification](../../spec/packages/webmention.md) for the full
7
+ requirements.
8
+
9
+ The receiver validates `source`/`target` **synchronously**, returns
10
+ `202 Accepted`, and enqueues the pair for **asynchronous** link verification.
11
+ The queue consumer fetches the source, confirms it links to the target, and
12
+ persists (or removes) the mention in an inbox. The sender discovers a target's
13
+ Webmention endpoint and notifies it on publish. Cloudflare specifics (Queue,
14
+ D1) are confined to this package; the parsing and verification logic is pure and
15
+ unit-tests without a Workers runtime.
16
+
17
+ ## Receiver
18
+
19
+ ```ts
20
+ import { createWebmention } from "@dwk/webmention";
21
+
22
+ const webmention = createWebmention({ baseUrl: "https://example.com" });
23
+
24
+ // In your Worker's fetch handler, mount under any path prefix:
25
+ // POST /webmention (application/x-www-form-urlencoded: source, target)
26
+ return webmention(request, env, ctx);
27
+ ```
28
+
29
+ `createWebmention` validates up front — both fields are syntactically valid
30
+ `http(s)` URLs, `source` ≠ `target`, and `target` is a resource under this
31
+ receiver's control (`baseUrl`'s host, plus any `allowedHosts`). Invalid requests
32
+ get `400` with a stable error code in the body; valid ones are enqueued and get
33
+ `202`. Other methods get `405`. The handler **fails loudly** if the required
34
+ `WEBMENTION_QUEUE` binding is missing.
35
+
36
+ ### Async verification (queue consumer)
37
+
38
+ ```ts
39
+ import { createWebmentionQueueConsumer } from "@dwk/webmention";
40
+
41
+ const verify = createWebmentionQueueConsumer({ baseUrl: "https://example.com" });
42
+
43
+ export default {
44
+ fetch: webmention,
45
+ queue: verify, // bound to WEBMENTION_QUEUE
46
+ };
47
+ ```
48
+
49
+ The consumer fetches each `source`, checks for a link to `target`, and **upserts**
50
+ the verified mention into the inbox — or **removes** it when the source no longer
51
+ links. Jobs that throw are retried.
52
+
53
+ ## Sender
54
+
55
+ ```ts
56
+ import { sendWebmentions } from "@dwk/webmention";
57
+
58
+ // On publish, notify each outbound link's target:
59
+ const results = await sendWebmentions(myPostUrl, outboundLinks);
60
+ ```
61
+
62
+ Endpoint discovery follows the spec: the HTTP `Link` header (`rel=webmention`)
63
+ wins, then the first `<link>`/`<a rel="webmention">` in document order, with
64
+ relative URLs resolved against the (post-redirect) document URL — honoring a
65
+ `<base href>` and ignoring endpoints inside HTML comments. The legacy
66
+ `http://webmention.org/` rel is also accepted. The sender refuses to POST a
67
+ discovered endpoint that is not `http(s)`.
68
+
69
+ ## Bindings (`Env` fragment)
70
+
71
+ | Binding | Type | Required | Purpose |
72
+ | ------------------ | ------------ | -------- | ---------------------------------------- |
73
+ | `WEBMENTION_QUEUE` | `Queue` | yes | Async verification of received mentions. |
74
+ | `WEBMENTION_INBOX` | `D1Database` | yes\* | Default inbox for verified mentions. |
75
+
76
+ \* `WEBMENTION_INBOX` is optional when you pass a custom `inbox` in config — for
77
+ example, to store mentions in the `@dwk/solid-pod` Durable Object when composed
78
+ into a pod.
79
+
80
+ ## Config
81
+
82
+ | Field | Type | Default | Purpose |
83
+ | -------------- | ------------ | -------------- | --------------------------------------------- |
84
+ | `baseUrl` | `string` | — | Receiver origin; `target` must live under it. |
85
+ | `allowedHosts` | `string[]` | `[]` | Extra controlled hostnames. |
86
+ | `inbox` | `InboxStore` | D1 from `env` | Override the default inbox store. |
87
+ | `fetch` | `FetchLike` | global `fetch` | Override `fetch` (verification/discovery). |
88
+ | `logger` | `Logger` | `noopLogger` | Structured logs (see `@dwk/log`). |
89
+ | `metrics` | `Metrics` | `noopMetrics` | Queryable counters (see `@dwk/log`). |
90
+
91
+ ## Observability
92
+
93
+ Logging and metrics are **opt-in and injected** (see [`@dwk/log`](../log)),
94
+ defaulting to no-ops. Both seams share one event vocabulary
95
+ (`WebmentionLogEvent`), so a log line and its counter line up — SSRF blocks (by
96
+ reason), receive accepted/rejected, verification outcomes (by links/status),
97
+ queue retries (by reason), and send outcomes (by delivered/status):
98
+
99
+ ```ts
100
+ import { consoleLogger, analyticsEngineMetrics } from "@dwk/log";
101
+ import { createWebmention } from "@dwk/webmention";
102
+
103
+ const webmention = createWebmention({
104
+ baseUrl: "https://example.com",
105
+ logger: consoleLogger({ minLevel: "info" }),
106
+ // env.WEBMENTION_METRICS is an AnalyticsEngineDataset binding.
107
+ metrics: analyticsEngineMetrics(env.WEBMENTION_METRICS),
108
+ });
109
+ ```
110
+
111
+ Both honor the redaction policy: only sanitized hosts, status, reason codes,
112
+ booleans, and counts — never tokens, bodies, or full URLs.
113
+
114
+ ## Federation handoff (documented config, not core code)
115
+
116
+ To bridge to the fediverse, emit `h-card` / `h-entry` markup on your published
117
+ pages and include [Bridgy Fed](https://fed.brid.gy/) (`https://fed.brid.gy/`) as
118
+ one of the outbound targets you pass to `sendWebmentions`. Bridgy Fed discovers
119
+ your page's Webmention endpoint and handles the ActivityPub translation; no
120
+ special code in this package is required.
121
+
122
+ ## Conformance
123
+
124
+ The discovery and verification logic is unit-tested against the
125
+ [webmention.rocks](https://webmention.rocks/) discovery cases — including exact
126
+ `rel` matching (not naïve substring), endpoints hidden in HTML comments, escaped
127
+ HTML, empty vs. missing `href`, `<base href>` resolution, query-string
128
+ endpoints, and multiple/ordered endpoint advertisements.
129
+
130
+ ## Scope
131
+
132
+ - Verification is **link-level**: the source document must contain an
133
+ `href`/`src` resolving to the target. Full **Microformats2** extraction (author,
134
+ content, mention type) is intentionally out of scope to keep the Worker bundle
135
+ within the runtime budget; the inbox records `source`, `target`, and the
136
+ verification time.
137
+
138
+ ## License
139
+
140
+ [ISC](../../LICENSE)
@@ -0,0 +1,43 @@
1
+ /**
2
+ * `@dwk/webmention` — Webmention endpoint discovery (sender side).
3
+ *
4
+ * Given a target URL, find its declared Webmention endpoint following the W3C
5
+ * discovery algorithm: the HTTP `Link` header (`rel=webmention`) wins, then the
6
+ * first `<link>`/`<a rel="webmention">` in document order, with relative URLs
7
+ * resolved against the (post-redirect) document URL. The legacy
8
+ * `http://webmention.org/` rel value is also accepted. See
9
+ * `spec/packages/webmention.md`.
10
+ *
11
+ * @packageDocumentation
12
+ */
13
+ import { type Logger, type Metrics } from "@dwk/log";
14
+ import { type FetchLike } from "./fetch";
15
+ /**
16
+ * Find the Webmention endpoint declared by a fetched document.
17
+ *
18
+ * Pass the `Link` header value, the response body, and the document URL (used
19
+ * as the base for relative resolution). Returns the absolute endpoint URL or
20
+ * `null` when none is advertised. Async because HTML scanning runs through the
21
+ * runtime's `HTMLRewriter`.
22
+ */
23
+ export declare function findWebmentionEndpoint(linkHeader: string | null, html: string, documentUrl: string): Promise<string | null>;
24
+ /** Options for {@link discoverEndpoint}. */
25
+ export interface DiscoverOptions {
26
+ /** `fetch` implementation to use; defaults to the global `fetch`. */
27
+ readonly fetch?: FetchLike;
28
+ /** Logger passed through to the SSRF-safe fetch; defaults to a no-op. */
29
+ readonly logger?: Logger;
30
+ /** Metrics sink passed through to the SSRF-safe fetch; defaults to a no-op. */
31
+ readonly metrics?: Metrics;
32
+ }
33
+ /**
34
+ * Fetch `target` and discover its Webmention endpoint.
35
+ *
36
+ * Fetches through the SSRF-safe wrapper ({@link safeFetch}): the target host —
37
+ * and every redirect hop — is validated against private/loopback/link-local
38
+ * ranges, redirects are capped, and the request is bounded by a timeout. The
39
+ * endpoint resolves against the final URL. Returns the absolute endpoint URL,
40
+ * or `null` when discovery finds none or the fetch fails or is blocked.
41
+ */
42
+ export declare function discoverEndpoint(target: string, options?: DiscoverOptions): Promise<string | null>;
43
+ //# sourceMappingURL=discovery.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"discovery.d.ts","sourceRoot":"","sources":["../src/discovery.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAA2B,KAAK,MAAM,EAAE,KAAK,OAAO,EAAE,MAAM,UAAU,CAAC;AAQ9E,OAAO,EAAkB,KAAK,SAAS,EAAE,MAAM,SAAS,CAAC;AA4BzD;;;;;;;GAOG;AACH,wBAAsB,sBAAsB,CAC1C,UAAU,EAAE,MAAM,GAAG,IAAI,EACzB,IAAI,EAAE,MAAM,EACZ,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAyCxB;AAED,4CAA4C;AAC5C,MAAM,WAAW,eAAe;IAC9B,qEAAqE;IACrE,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,CAAC;IAC3B,yEAAyE;IACzE,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,+EAA+E;IAC/E,QAAQ,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;GAQG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,eAAe,GACxB,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAwCxB"}
@@ -0,0 +1,128 @@
1
+ /**
2
+ * `@dwk/webmention` — Webmention endpoint discovery (sender side).
3
+ *
4
+ * Given a target URL, find its declared Webmention endpoint following the W3C
5
+ * discovery algorithm: the HTTP `Link` header (`rel=webmention`) wins, then the
6
+ * first `<link>`/`<a rel="webmention">` in document order, with relative URLs
7
+ * resolved against the (post-redirect) document URL. The legacy
8
+ * `http://webmention.org/` rel value is also accepted. See
9
+ * `spec/packages/webmention.md`.
10
+ *
11
+ * @packageDocumentation
12
+ */
13
+ import { noopLogger, noopMetrics } from "@dwk/log";
14
+ import { isHtmlContentType, parseLinkHeader, resolveUrl, scanElements, splitTokens, } from "./html";
15
+ import { readBodyCapped } from "./fetch";
16
+ import { safeFetch } from "./safe-fetch";
17
+ // The legacy rel values predating the standardized `webmention` token. They are
18
+ // absolute URLs, so a candidate rel is normalized through `URL` before being
19
+ // compared: `http://webmention.org` and `http://webmention.org/` then coincide
20
+ // (tolerating the trailing slash developers commonly omit), while a look-alike
21
+ // host like `http://webmention.org.evil.example/` parses to a different href and
22
+ // is rejected — which a bare `startsWith` prefix test would not catch.
23
+ const LEGACY_REL_HREFS = new Set([
24
+ "http://webmention.org/",
25
+ "http://webmention.org/webmention",
26
+ ]);
27
+ function isWebmentionRel(rel) {
28
+ if (rel.toLowerCase() === "webmention") {
29
+ return true;
30
+ }
31
+ let href;
32
+ try {
33
+ href = new URL(rel).href;
34
+ }
35
+ catch {
36
+ // Not the standard token and not an absolute URL — not a webmention rel.
37
+ return false;
38
+ }
39
+ return LEGACY_REL_HREFS.has(href);
40
+ }
41
+ /**
42
+ * Find the Webmention endpoint declared by a fetched document.
43
+ *
44
+ * Pass the `Link` header value, the response body, and the document URL (used
45
+ * as the base for relative resolution). Returns the absolute endpoint URL or
46
+ * `null` when none is advertised. Async because HTML scanning runs through the
47
+ * runtime's `HTMLRewriter`.
48
+ */
49
+ export async function findWebmentionEndpoint(linkHeader, html, documentUrl) {
50
+ // 1. HTTP Link header wins, in header order.
51
+ for (const entry of parseLinkHeader(linkHeader)) {
52
+ if (entry.rels.some(isWebmentionRel)) {
53
+ return resolveUrl(entry.uri, documentUrl);
54
+ }
55
+ }
56
+ // 2. Fall back to the first <link>/<a rel="webmention"> in document order,
57
+ // resolving relative hrefs against the document's <base href> if present.
58
+ // HTMLRewriter does not report elements inside comments, so a commented-out
59
+ // endpoint is ignored without a separate stripping pass.
60
+ if (html === "") {
61
+ return null;
62
+ }
63
+ const elements = await scanElements(html, "base, link, a", ["rel", "href"]);
64
+ // The first <base href> anywhere in the document governs relative resolution.
65
+ let documentBase = documentUrl;
66
+ for (const el of elements) {
67
+ if (el.name === "base" && el.attrs.href) {
68
+ documentBase = resolveUrl(el.attrs.href, documentUrl) ?? documentUrl;
69
+ break;
70
+ }
71
+ }
72
+ for (const el of elements) {
73
+ if (el.name === "base") {
74
+ continue;
75
+ }
76
+ const rels = splitTokens(el.attrs.rel ?? null);
77
+ if (!rels.some(isWebmentionRel)) {
78
+ continue;
79
+ }
80
+ const href = el.attrs.href;
81
+ // A tag with no `href` attribute at all is malformed — skip it and keep
82
+ // looking (test 20). An empty `href=""` is valid and advertises the
83
+ // document itself as the endpoint (test 15).
84
+ if (href === null || href === undefined) {
85
+ continue;
86
+ }
87
+ return resolveUrl(href, documentBase);
88
+ }
89
+ return null;
90
+ }
91
+ /**
92
+ * Fetch `target` and discover its Webmention endpoint.
93
+ *
94
+ * Fetches through the SSRF-safe wrapper ({@link safeFetch}): the target host —
95
+ * and every redirect hop — is validated against private/loopback/link-local
96
+ * ranges, redirects are capped, and the request is bounded by a timeout. The
97
+ * endpoint resolves against the final URL. Returns the absolute endpoint URL,
98
+ * or `null` when discovery finds none or the fetch fails or is blocked.
99
+ */
100
+ export async function discoverEndpoint(target, options) {
101
+ const doFetch = options?.fetch ?? ((input, init) => fetch(input, init));
102
+ const logger = options?.logger ?? noopLogger;
103
+ const metrics = options?.metrics ?? noopMetrics;
104
+ let response;
105
+ let base;
106
+ try {
107
+ const result = await safeFetch(doFetch, target, { method: "GET", headers: { accept: "text/html, */*" } }, { logger, metrics });
108
+ response = result.response;
109
+ base = result.url;
110
+ }
111
+ catch {
112
+ return null;
113
+ }
114
+ const fromHeader = await findWebmentionEndpoint(response.headers.get("link"), "", base);
115
+ if (fromHeader !== null) {
116
+ return fromHeader;
117
+ }
118
+ const contentType = response.headers.get("content-type") ?? "";
119
+ if (!isHtmlContentType(contentType)) {
120
+ return null;
121
+ }
122
+ const html = await readBodyCapped(response);
123
+ if (html === null) {
124
+ return null;
125
+ }
126
+ return findWebmentionEndpoint(null, html, base);
127
+ }
128
+ //# sourceMappingURL=discovery.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"discovery.js","sourceRoot":"","sources":["../src/discovery.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,UAAU,EAAE,WAAW,EAA6B,MAAM,UAAU,CAAC;AAC9E,OAAO,EACL,iBAAiB,EACjB,eAAe,EACf,UAAU,EACV,YAAY,EACZ,WAAW,GACZ,MAAM,QAAQ,CAAC;AAChB,OAAO,EAAE,cAAc,EAAkB,MAAM,SAAS,CAAC;AACzD,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,gFAAgF;AAChF,6EAA6E;AAC7E,+EAA+E;AAC/E,+EAA+E;AAC/E,iFAAiF;AACjF,uEAAuE;AACvE,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC;IAC/B,wBAAwB;IACxB,kCAAkC;CACnC,CAAC,CAAC;AAEH,SAAS,eAAe,CAAC,GAAW;IAClC,IAAI,GAAG,CAAC,WAAW,EAAE,KAAK,YAAY,EAAE,CAAC;QACvC,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,IAAY,CAAC;IACjB,IAAI,CAAC;QACH,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;IAC3B,CAAC;IAAC,MAAM,CAAC;QACP,yEAAyE;QACzE,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACpC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,UAAyB,EACzB,IAAY,EACZ,WAAmB;IAEnB,6CAA6C;IAC7C,KAAK,MAAM,KAAK,IAAI,eAAe,CAAC,UAAU,CAAC,EAAE,CAAC;QAChD,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;YACrC,OAAO,UAAU,CAAC,KAAK,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IACD,2EAA2E;IAC3E,6EAA6E;IAC7E,+EAA+E;IAC/E,4DAA4D;IAC5D,IAAI,IAAI,KAAK,EAAE,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,IAAI,EAAE,eAAe,EAAE,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;IAC5E,8EAA8E;IAC9E,IAAI,YAAY,GAAG,WAAW,CAAC;IAC/B,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,IAAI,EAAE,CAAC,IAAI,KAAK,MAAM,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACxC,YAAY,GAAG,UAAU,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,WAAW,CAAC,IAAI,WAAW,CAAC;YACrE,MAAM;QACR,CAAC;IACH,CAAC;IACD,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,IAAI,EAAE,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QACD,MAAM,IAAI,GAAG,WAAW,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,IAAI,IAAI,CAAC,CAAC;QAC/C,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;QAC3B,wEAAwE;QACxE,oEAAoE;QACpE,6CAA6C;QAC7C,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACxC,SAAS;QACX,CAAC;QACD,OAAO,UAAU,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;IACxC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAYD;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,MAAc,EACd,OAAyB;IAEzB,MAAM,OAAO,GACX,OAAO,EAAE,KAAK,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,UAAU,CAAC;IAC7C,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,WAAW,CAAC;IAEhD,IAAI,QAAkB,CAAC;IACvB,IAAI,IAAY,CAAC;IACjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAC5B,OAAO,EACP,MAAM,EACN,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,gBAAgB,EAAE,EAAE,EACxD,EAAE,MAAM,EAAE,OAAO,EAAE,CACpB,CAAC;QACF,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QAC3B,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC;IACpB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAC7C,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAC5B,EAAE,EACF,IAAI,CACL,CAAC;IACF,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;QACxB,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IAC/D,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,EAAE,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC5C,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,sBAAsB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;AAClD,CAAC"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * `@dwk/webmention` — injectable `fetch` type.
3
+ *
4
+ * Endpoint discovery, source verification, and sending all perform HTTP I/O.
5
+ * They accept a {@link FetchLike} so callers can inject a stub in tests (no
6
+ * network) and so the package never reaches for a global it didn't receive.
7
+ *
8
+ * @packageDocumentation
9
+ */
10
+ /** A minimal, injectable `fetch` signature. */
11
+ export type FetchLike = (input: string, init?: RequestInit) => Promise<Response>;
12
+ /**
13
+ * Default cap on a fetched document body (2 MB). Discovery and verification
14
+ * only need to scan markup for links; a larger body is almost certainly hostile
15
+ * or irrelevant, and buffering it would risk an OOM (the Worker memory limit is
16
+ * 128 MB). See `spec/non-functional-requirements.md`.
17
+ */
18
+ export declare const MAX_BODY_BYTES: number;
19
+ /**
20
+ * Read a response body as text, refusing bodies larger than `maxBytes`.
21
+ *
22
+ * A declared `Content-Length` over the cap is rejected up front; the stream is
23
+ * then read incrementally and aborted the moment the cap is exceeded, so a
24
+ * missing or lying `Content-Length` cannot force the whole body into memory.
25
+ * Returns `null` when the body is too large or cannot be read.
26
+ */
27
+ export declare function readBodyCapped(response: Response, maxBytes?: number): Promise<string | null>;
28
+ //# sourceMappingURL=fetch.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch.d.ts","sourceRoot":"","sources":["../src/fetch.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,+CAA+C;AAC/C,MAAM,MAAM,SAAS,GAAG,CACtB,KAAK,EAAE,MAAM,EACb,IAAI,CAAC,EAAE,WAAW,KACf,OAAO,CAAC,QAAQ,CAAC,CAAC;AAEvB;;;;;GAKG;AACH,eAAO,MAAM,cAAc,QAAkB,CAAC;AAE9C;;;;;;;GAOG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,QAAQ,EAClB,QAAQ,SAAiB,GACxB,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAgDxB"}
package/dist/fetch.js ADDED
@@ -0,0 +1,73 @@
1
+ /**
2
+ * `@dwk/webmention` — injectable `fetch` type.
3
+ *
4
+ * Endpoint discovery, source verification, and sending all perform HTTP I/O.
5
+ * They accept a {@link FetchLike} so callers can inject a stub in tests (no
6
+ * network) and so the package never reaches for a global it didn't receive.
7
+ *
8
+ * @packageDocumentation
9
+ */
10
+ /**
11
+ * Default cap on a fetched document body (2 MB). Discovery and verification
12
+ * only need to scan markup for links; a larger body is almost certainly hostile
13
+ * or irrelevant, and buffering it would risk an OOM (the Worker memory limit is
14
+ * 128 MB). See `spec/non-functional-requirements.md`.
15
+ */
16
+ export const MAX_BODY_BYTES = 2 * 1024 * 1024;
17
+ /**
18
+ * Read a response body as text, refusing bodies larger than `maxBytes`.
19
+ *
20
+ * A declared `Content-Length` over the cap is rejected up front; the stream is
21
+ * then read incrementally and aborted the moment the cap is exceeded, so a
22
+ * missing or lying `Content-Length` cannot force the whole body into memory.
23
+ * Returns `null` when the body is too large or cannot be read.
24
+ */
25
+ export async function readBodyCapped(response, maxBytes = MAX_BODY_BYTES) {
26
+ const declared = response.headers.get("content-length");
27
+ if (declared !== null) {
28
+ const length = Number.parseInt(declared, 10);
29
+ if (Number.isFinite(length) && length > maxBytes) {
30
+ return null;
31
+ }
32
+ }
33
+ const body = response.body;
34
+ if (body === null) {
35
+ try {
36
+ const text = await response.text();
37
+ return text.length > maxBytes ? null : text;
38
+ }
39
+ catch {
40
+ return null;
41
+ }
42
+ }
43
+ const reader = body.getReader();
44
+ const chunks = [];
45
+ let total = 0;
46
+ try {
47
+ for (;;) {
48
+ const { done, value } = await reader.read();
49
+ if (done) {
50
+ break;
51
+ }
52
+ if (value !== undefined) {
53
+ total += value.byteLength;
54
+ if (total > maxBytes) {
55
+ await reader.cancel();
56
+ return null;
57
+ }
58
+ chunks.push(value);
59
+ }
60
+ }
61
+ }
62
+ catch {
63
+ return null;
64
+ }
65
+ const merged = new Uint8Array(total);
66
+ let offset = 0;
67
+ for (const chunk of chunks) {
68
+ merged.set(chunk, offset);
69
+ offset += chunk.byteLength;
70
+ }
71
+ return new TextDecoder().decode(merged);
72
+ }
73
+ //# sourceMappingURL=fetch.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch.js","sourceRoot":"","sources":["../src/fetch.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAQH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;AAE9C;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,QAAkB,EAClB,QAAQ,GAAG,cAAc;IAEzB,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IACxD,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAC7C,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,QAAQ,EAAE,CAAC;YACjD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;IAC3B,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;IAChC,MAAM,MAAM,GAAiB,EAAE,CAAC;IAChC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,CAAC;QACH,SAAS,CAAC;YACR,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YAC5C,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM;YACR,CAAC;YACD,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBACxB,KAAK,IAAI,KAAK,CAAC,UAAU,CAAC;gBAC1B,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;oBACrB,MAAM,MAAM,CAAC,MAAM,EAAE,CAAC;oBACtB,OAAO,IAAI,CAAC;gBACd,CAAC;gBACD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC;IACrC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC;IAC7B,CAAC;IACD,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;AAC1C,CAAC"}
package/dist/html.d.ts ADDED
@@ -0,0 +1,68 @@
1
+ /**
2
+ * `@dwk/webmention` — HTML / `Link`-header parsing helpers.
3
+ *
4
+ * Shared by endpoint discovery (sender) and source verification (receiver).
5
+ * `Link`-header parsing is plain string scanning; HTML scanning uses the
6
+ * Workers runtime's streaming `HTMLRewriter` rather than regex tag matching, so
7
+ * it handles comments, attribute quoting, and malformed markup correctly
8
+ * without pulling a parser into the bundle (`HTMLRewriter` is built into the
9
+ * runtime — zero script-size cost; see `spec/non-functional-requirements.md`).
10
+ *
11
+ * Because `HTMLRewriter` is a `workerd` global, the HTML scanners are async and
12
+ * exercised under the Workers test pool, not bare Node.
13
+ *
14
+ * @packageDocumentation
15
+ */
16
+ /** A parsed `Link` header entry: its target URI and `rel` tokens. */
17
+ export interface LinkHeaderEntry {
18
+ readonly uri: string;
19
+ readonly rels: readonly string[];
20
+ }
21
+ /**
22
+ * Parse an HTTP `Link` header into entries. Handles multiple comma-separated
23
+ * links and semicolon-separated parameters, e.g.
24
+ * `<https://a.example/webmention>; rel="webmention"`.
25
+ *
26
+ * Entries with no `rel` parameter are dropped; an empty URI (`<>`) is kept so
27
+ * the caller can resolve it against the document URL (a Webmention endpoint
28
+ * advertised at the page itself).
29
+ */
30
+ export declare function parseLinkHeader(value: string | null): LinkHeaderEntry[];
31
+ /**
32
+ * Whether a `Content-Type` value names an HTML document (`text/html` or
33
+ * `application/xhtml+xml`). Compares the media type's essence — the part before
34
+ * any `;` parameters — case-insensitively, so `text/html; charset=utf-8`
35
+ * matches but an unrelated type carrying `text/html` inside a parameter does
36
+ * not.
37
+ */
38
+ export declare function isHtmlContentType(contentType: string): boolean;
39
+ /**
40
+ * Whether a `Content-Type` value names a JSON document (`application/json` or a
41
+ * `+json`-suffixed type such as `application/activity+json`). Compares the media
42
+ * type's essence — the part before any `;` parameters — case-insensitively.
43
+ */
44
+ export declare function isJsonContentType(contentType: string): boolean;
45
+ /** Split a whitespace-separated token list (e.g. a `rel` value) into tokens. */
46
+ export declare function splitTokens(value: string | null): string[];
47
+ /** Resolve `uri` against `base`, returning a normalized absolute URL or `null`. */
48
+ export declare function resolveUrl(uri: string, base: string): string | null;
49
+ /** An element seen by {@link scanElements}: its (lowercased) tag name and the requested attributes. */
50
+ export interface ScannedElement {
51
+ /** Lowercased tag name, e.g. `"a"`, `"link"`, `"base"`. */
52
+ readonly name: string;
53
+ /** Requested attribute values; `null` when the attribute is absent, `""` when present but empty. */
54
+ readonly attrs: Readonly<Record<string, string | null>>;
55
+ }
56
+ /**
57
+ * Scan `html` with the runtime's streaming `HTMLRewriter`, returning — in
58
+ * document order — every element matching `selector` together with the
59
+ * requested attribute values.
60
+ *
61
+ * Using a real tokenizer (rather than regex) means elements inside comments are
62
+ * never reported (the parser treats comment contents as text, satisfying
63
+ * webmention.rocks discovery test 13), attribute quoting is handled correctly,
64
+ * and a `data-href` attribute is never mistaken for `href`. An absent attribute
65
+ * is reported as `null`; a present-but-empty one (`href=""`) as `""`.
66
+ */
67
+ export declare function scanElements(html: string, selector: string, attrNames: readonly string[]): Promise<ScannedElement[]>;
68
+ //# sourceMappingURL=html.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../src/html.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,qEAAqE;AACrE,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,CAAC;CAClC;AAED;;;;;;;;GAQG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,GAAG,eAAe,EAAE,CAiBvE;AA8ED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAG9D;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAG9D;AAED,gFAAgF;AAChF,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,EAAE,CAQ1D;AAED,mFAAmF;AACnF,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAMnE;AAED,uGAAuG;AACvG,MAAM,WAAW,cAAc;IAC7B,2DAA2D;IAC3D,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,oGAAoG;IACpG,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC;CACzD;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,YAAY,CAChC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,SAAS,MAAM,EAAE,GAC3B,OAAO,CAAC,cAAc,EAAE,CAAC,CAc3B"}