@dwk/webmention 0.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +140 -0
- package/dist/discovery.d.ts +43 -0
- package/dist/discovery.d.ts.map +1 -0
- package/dist/discovery.js +128 -0
- package/dist/discovery.js.map +1 -0
- package/dist/fetch.d.ts +28 -0
- package/dist/fetch.d.ts.map +1 -0
- package/dist/fetch.js +73 -0
- package/dist/fetch.js.map +1 -0
- package/dist/html.d.ts +68 -0
- package/dist/html.d.ts.map +1 -0
- package/dist/html.js +183 -0
- package/dist/html.js.map +1 -0
- package/dist/inbox.d.ts +41 -0
- package/dist/inbox.d.ts.map +1 -0
- package/dist/inbox.js +73 -0
- package/dist/inbox.js.map +1 -0
- package/dist/index.d.ts +96 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +161 -0
- package/dist/index.js.map +1 -0
- package/dist/log.d.ts +42 -0
- package/dist/log.d.ts.map +1 -0
- package/dist/log.js +40 -0
- package/dist/log.js.map +1 -0
- package/dist/safe-fetch.d.ts +101 -0
- package/dist/safe-fetch.d.ts.map +1 -0
- package/dist/safe-fetch.js +348 -0
- package/dist/safe-fetch.js.map +1 -0
- package/dist/sender.d.ts +43 -0
- package/dist/sender.d.ts.map +1 -0
- package/dist/sender.js +80 -0
- package/dist/sender.js.map +1 -0
- package/dist/validate.d.ts +47 -0
- package/dist/validate.d.ts.map +1 -0
- package/dist/validate.js +76 -0
- package/dist/validate.js.map +1 -0
- package/dist/verify.d.ts +61 -0
- package/dist/verify.d.ts.map +1 -0
- package/dist/verify.js +216 -0
- package/dist/verify.js.map +1 -0
- package/package.json +45 -0
- package/src/discovery.ts +167 -0
- package/src/fetch.ts +84 -0
- package/src/html.ts +206 -0
- package/src/inbox.ts +121 -0
- package/src/index.ts +297 -0
- package/src/log.ts +44 -0
- package/src/safe-fetch.ts +405 -0
- package/src/sender.ts +131 -0
- package/src/validate.ts +116 -0
- package/src/verify.ts +294 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@dwk/webmention` — synchronous receiver validation.
|
|
3
|
+
*
|
|
4
|
+
* The Webmention receiver MUST validate `source` and `target` up front, before
|
|
5
|
+
* returning `202 Accepted` and before any network work, to reject malformed or
|
|
6
|
+
* foreign requests and prevent queue-exhaustion / spam. This module holds that
|
|
7
|
+
* pure check: plain strings in, a decision out. See `spec/packages/webmention.md`.
|
|
8
|
+
*
|
|
9
|
+
* @packageDocumentation
|
|
10
|
+
*/
|
|
11
|
+
/** Stable, locale-independent codes for an up-front validation failure. */
|
|
12
|
+
export type WebmentionValidationError = "missing_source" | "missing_target" | "invalid_source" | "invalid_target" | "source_equals_target" | "target_not_supported";
|
|
13
|
+
/** Inputs to {@link validateWebmentionParams}. */
|
|
14
|
+
export interface ValidateParams {
|
|
15
|
+
/** The `source` form field (or `null` when absent). */
|
|
16
|
+
readonly source: string | null;
|
|
17
|
+
/** The `target` form field (or `null` when absent). */
|
|
18
|
+
readonly target: string | null;
|
|
19
|
+
/** Base URL of this receiver; `target` must live under its origin. */
|
|
20
|
+
readonly baseUrl: string;
|
|
21
|
+
/**
|
|
22
|
+
* Additional hostnames (besides `baseUrl`'s) that this receiver controls.
|
|
23
|
+
* Useful when one Worker fronts several domains.
|
|
24
|
+
*/
|
|
25
|
+
readonly allowedHosts?: readonly string[];
|
|
26
|
+
}
|
|
27
|
+
/** Result of {@link validateWebmentionParams}. */
|
|
28
|
+
export type ValidationResult = {
|
|
29
|
+
readonly ok: true;
|
|
30
|
+
readonly source: string;
|
|
31
|
+
readonly target: string;
|
|
32
|
+
} | {
|
|
33
|
+
readonly ok: false;
|
|
34
|
+
readonly error: WebmentionValidationError;
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Validate a received `source`/`target` pair synchronously.
|
|
38
|
+
*
|
|
39
|
+
* Checks, in order: both fields present, both syntactically valid `http(s)`
|
|
40
|
+
* URLs, `source` ≠ `target`, and `target` is a resource under this receiver's
|
|
41
|
+
* control (its host matches `baseUrl` or `allowedHosts`). Never performs I/O.
|
|
42
|
+
*
|
|
43
|
+
* @returns `{ ok: true, source, target }` with normalized URLs on success, or
|
|
44
|
+
* `{ ok: false, error }` with a stable {@link WebmentionValidationError}.
|
|
45
|
+
*/
|
|
46
|
+
export declare function validateWebmentionParams(params: ValidateParams): ValidationResult;
|
|
47
|
+
//# sourceMappingURL=validate.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validate.d.ts","sourceRoot":"","sources":["../src/validate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,2EAA2E;AAC3E,MAAM,MAAM,yBAAyB,GACjC,gBAAgB,GAChB,gBAAgB,GAChB,gBAAgB,GAChB,gBAAgB,GAChB,sBAAsB,GACtB,sBAAsB,CAAC;AAE3B,kDAAkD;AAClD,MAAM,WAAW,cAAc;IAC7B,uDAAuD;IACvD,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,uDAAuD;IACvD,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,sEAAsE;IACtE,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB;;;OAGG;IACH,QAAQ,CAAC,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CAC3C;AAED,kDAAkD;AAClD,MAAM,MAAM,gBAAgB,GACxB;IAAE,QAAQ,CAAC,EAAE,EAAE,IAAI,CAAC;IAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACvE;IAAE,QAAQ,CAAC,EAAE,EAAE,KAAK,CAAC;IAAC,QAAQ,CAAC,KAAK,EAAE,yBAAyB,CAAA;CAAE,CAAC;AAetE;;;;;;;;;GASG;AACH,wBAAgB,wBAAwB,CACtC,MAAM,EAAE,cAAc,GACrB,gBAAgB,CAgClB"}
|
package/dist/validate.js
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@dwk/webmention` — synchronous receiver validation.
|
|
3
|
+
*
|
|
4
|
+
* The Webmention receiver MUST validate `source` and `target` up front, before
|
|
5
|
+
* returning `202 Accepted` and before any network work, to reject malformed or
|
|
6
|
+
* foreign requests and prevent queue-exhaustion / spam. This module holds that
|
|
7
|
+
* pure check: plain strings in, a decision out. See `spec/packages/webmention.md`.
|
|
8
|
+
*
|
|
9
|
+
* @packageDocumentation
|
|
10
|
+
*/
|
|
11
|
+
function parseHttpUrl(value) {
|
|
12
|
+
let url;
|
|
13
|
+
try {
|
|
14
|
+
url = new URL(value);
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
return url;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Validate a received `source`/`target` pair synchronously.
|
|
26
|
+
*
|
|
27
|
+
* Checks, in order: both fields present, both syntactically valid `http(s)`
|
|
28
|
+
* URLs, `source` ≠ `target`, and `target` is a resource under this receiver's
|
|
29
|
+
* control (its host matches `baseUrl` or `allowedHosts`). Never performs I/O.
|
|
30
|
+
*
|
|
31
|
+
* @returns `{ ok: true, source, target }` with normalized URLs on success, or
|
|
32
|
+
* `{ ok: false, error }` with a stable {@link WebmentionValidationError}.
|
|
33
|
+
*/
|
|
34
|
+
export function validateWebmentionParams(params) {
|
|
35
|
+
const { source, target, baseUrl, allowedHosts } = params;
|
|
36
|
+
if (source === null || source === "") {
|
|
37
|
+
return { ok: false, error: "missing_source" };
|
|
38
|
+
}
|
|
39
|
+
if (target === null || target === "") {
|
|
40
|
+
return { ok: false, error: "missing_target" };
|
|
41
|
+
}
|
|
42
|
+
const sourceUrl = parseHttpUrl(source);
|
|
43
|
+
if (sourceUrl === null) {
|
|
44
|
+
return { ok: false, error: "invalid_source" };
|
|
45
|
+
}
|
|
46
|
+
const targetUrl = parseHttpUrl(target);
|
|
47
|
+
if (targetUrl === null) {
|
|
48
|
+
return { ok: false, error: "invalid_target" };
|
|
49
|
+
}
|
|
50
|
+
if (sourceUrl.toString() === targetUrl.toString()) {
|
|
51
|
+
return { ok: false, error: "source_equals_target" };
|
|
52
|
+
}
|
|
53
|
+
if (!isControlledHost(targetUrl.host, baseUrl, allowedHosts)) {
|
|
54
|
+
return { ok: false, error: "target_not_supported" };
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
ok: true,
|
|
58
|
+
source: sourceUrl.toString(),
|
|
59
|
+
target: targetUrl.toString(),
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
function isControlledHost(host, baseUrl, allowedHosts) {
|
|
63
|
+
const allowed = new Set();
|
|
64
|
+
try {
|
|
65
|
+
allowed.add(new URL(baseUrl).host.toLowerCase());
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
// A malformed baseUrl is a configuration error; fall through with no host
|
|
69
|
+
// allowed so every target is rejected rather than silently accepted.
|
|
70
|
+
}
|
|
71
|
+
for (const entry of allowedHosts ?? []) {
|
|
72
|
+
allowed.add(entry.toLowerCase());
|
|
73
|
+
}
|
|
74
|
+
return allowed.has(host.toLowerCase());
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=validate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../src/validate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AA+BH,SAAS,YAAY,CAAC,KAAa;IACjC,IAAI,GAAQ,CAAC;IACb,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC;IACvB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,GAAG,CAAC,QAAQ,KAAK,OAAO,IAAI,GAAG,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC1D,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,wBAAwB,CACtC,MAAsB;IAEtB,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,MAAM,CAAC;IAEzD,IAAI,MAAM,KAAK,IAAI,IAAI,MAAM,KAAK,EAAE,EAAE,CAAC;QACrC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC;IAChD,CAAC;IACD,IAAI,MAAM,KAAK,IAAI,IAAI,MAAM,KAAK,EAAE,EAAE,CAAC;QACrC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC;IAChD,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;IACvC,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC;IAChD,CAAC;IACD,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;IACvC,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC;IAChD,CAAC;IAED,IAAI,SAAS,CAAC,QAAQ,EAAE,KAAK,SAAS,CAAC,QAAQ,EAAE,EAAE,CAAC;QAClD,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,sBAAsB,EAAE,CAAC;IACtD,CAAC;IAED,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,EAAE,YAAY,CAAC,EAAE,CAAC;QAC7D,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,sBAAsB,EAAE,CAAC;IACtD,CAAC;IAED,OAAO;QACL,EAAE,EAAE,IAAI;QACR,MAAM,EAAE,SAAS,CAAC,QAAQ,EAAE;QAC5B,MAAM,EAAE,SAAS,CAAC,QAAQ,EAAE;KAC7B,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CACvB,IAAY,EACZ,OAAe,EACf,YAA2C;IAE3C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IACnD,CAAC;IAAC,MAAM,CAAC;QACP,0EAA0E;QAC1E,qEAAqE;IACvE,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,YAAY,IAAI,EAAE,EAAE,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;IACnC,CAAC;IACD,OAAO,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;AACzC,CAAC"}
|
package/dist/verify.d.ts
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@dwk/webmention` — asynchronous source verification (receiver side).
|
|
3
|
+
*
|
|
4
|
+
* After the receiver has returned `202 Accepted`, a queued worker fetches the
|
|
5
|
+
* `source` and confirms it actually links to `target` (Webmention §3.2.1).
|
|
6
|
+
* Verification is link-level: the source document must contain a link
|
|
7
|
+
* (`href`/`src`) that resolves to the target. Full Microformats2 extraction is
|
|
8
|
+
* intentionally out of scope here — it would pull a parser into the Worker
|
|
9
|
+
* bundle the runtime budget rules out. See `spec/packages/webmention.md`.
|
|
10
|
+
*
|
|
11
|
+
* @packageDocumentation
|
|
12
|
+
*/
|
|
13
|
+
import { type Logger, type Metrics } from "@dwk/log";
|
|
14
|
+
import { type FetchLike } from "./fetch";
|
|
15
|
+
/**
|
|
16
|
+
* Extract every absolute link URL (`href` and `src`) from an HTML document,
|
|
17
|
+
* resolved against `baseUrl`. `href` links are listed before `src` links.
|
|
18
|
+
*
|
|
19
|
+
* Async because HTML scanning runs through the runtime's `HTMLRewriter`, which
|
|
20
|
+
* also means links inside comments are ignored without a separate stripping
|
|
21
|
+
* pass.
|
|
22
|
+
*/
|
|
23
|
+
export declare function extractLinks(html: string, baseUrl: string): Promise<string[]>;
|
|
24
|
+
/**
|
|
25
|
+
* Decide whether `body` (a fetched source document) links to `target`.
|
|
26
|
+
*
|
|
27
|
+
* HTML bodies are scanned for an `href`/`src` resolving to the target. Other
|
|
28
|
+
* content types require an **exact** match of the target URL (Webmention
|
|
29
|
+
* §3.2.2), not a loose substring: a JSON body must carry a string value equal to
|
|
30
|
+
* the target, and any other (e.g. plain text) body must contain the target as a
|
|
31
|
+
* standalone URL token.
|
|
32
|
+
*/
|
|
33
|
+
export declare function sourceLinksTo(body: string, target: string, baseUrl: string, contentType: string): Promise<boolean>;
|
|
34
|
+
/** Options for {@link verifySource}. */
|
|
35
|
+
export interface VerifyOptions {
|
|
36
|
+
/** `fetch` implementation to use; defaults to the global `fetch`. */
|
|
37
|
+
readonly fetch?: FetchLike;
|
|
38
|
+
/** Logger for verification outcomes/failures; defaults to a no-op. */
|
|
39
|
+
readonly logger?: Logger;
|
|
40
|
+
/** Metrics sink for verification-outcome counters; defaults to a no-op. */
|
|
41
|
+
readonly metrics?: Metrics;
|
|
42
|
+
}
|
|
43
|
+
/** Outcome of fetching and checking a source document. */
|
|
44
|
+
export interface VerifyResult {
|
|
45
|
+
/** Whether the source links to the target. */
|
|
46
|
+
readonly links: boolean;
|
|
47
|
+
/** The source's HTTP status (`0` when the fetch threw). */
|
|
48
|
+
readonly status: number;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Fetch `source` and verify that it links to `target`.
|
|
52
|
+
*
|
|
53
|
+
* Fetches through the SSRF-safe wrapper ({@link safeFetch}): the source host —
|
|
54
|
+
* and every redirect hop — is validated against private/loopback/link-local
|
|
55
|
+
* ranges, redirects are capped, and the request is bounded by a timeout.
|
|
56
|
+
* Relative links resolve against the final URL. A failed, blocked, or non-2xx
|
|
57
|
+
* fetch yields `{ links: false }` — a removed/unreachable source no longer
|
|
58
|
+
* endorses the mention.
|
|
59
|
+
*/
|
|
60
|
+
export declare function verifySource(source: string, target: string, options?: VerifyOptions): Promise<VerifyResult>;
|
|
61
|
+
//# sourceMappingURL=verify.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"verify.d.ts","sourceRoot":"","sources":["../src/verify.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAIL,KAAK,MAAM,EACX,KAAK,OAAO,EACb,MAAM,UAAU,CAAC;AAOlB,OAAO,EAAkB,KAAK,SAAS,EAAE,MAAM,SAAS,CAAC;AAWzD;;;;;;;GAOG;AACH,wBAAsB,YAAY,CAChC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,MAAM,EAAE,CAAC,CA6BnB;AAqED;;;;;;;;GAQG;AACH,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,OAAO,CAAC,CA4BlB;AAED,wCAAwC;AACxC,MAAM,WAAW,aAAa;IAC5B,qEAAqE;IACrE,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,CAAC;IAC3B,sEAAsE;IACtE,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,2EAA2E;IAC3E,QAAQ,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC;CAC5B;AAyBD,0DAA0D;AAC1D,MAAM,WAAW,YAAY;IAC3B,8CAA8C;IAC9C,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC;IACxB,2DAA2D;IAC3D,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;GASG;AACH,wBAAsB,YAAY,CAChC,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,YAAY,CAAC,CAgDvB"}
|
package/dist/verify.js
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@dwk/webmention` — asynchronous source verification (receiver side).
|
|
3
|
+
*
|
|
4
|
+
* After the receiver has returned `202 Accepted`, a queued worker fetches the
|
|
5
|
+
* `source` and confirms it actually links to `target` (Webmention §3.2.1).
|
|
6
|
+
* Verification is link-level: the source document must contain a link
|
|
7
|
+
* (`href`/`src`) that resolves to the target. Full Microformats2 extraction is
|
|
8
|
+
* intentionally out of scope here — it would pull a parser into the Worker
|
|
9
|
+
* bundle the runtime budget rules out. See `spec/packages/webmention.md`.
|
|
10
|
+
*
|
|
11
|
+
* @packageDocumentation
|
|
12
|
+
*/
|
|
13
|
+
import { hostFromUrl, noopLogger, noopMetrics, } from "@dwk/log";
|
|
14
|
+
import { isHtmlContentType, isJsonContentType, resolveUrl, scanElements, } from "./html";
|
|
15
|
+
import { readBodyCapped } from "./fetch";
|
|
16
|
+
import { WebmentionLogEvent } from "./log";
|
|
17
|
+
import { safeFetch } from "./safe-fetch";
|
|
18
|
+
/** Elements whose `href` may constitute a link to the target. */
|
|
19
|
+
const HREF_TAGS = new Set(["a", "link", "area"]);
|
|
20
|
+
/** Elements whose `src` may constitute a link to the target. */
|
|
21
|
+
const SRC_TAGS = new Set(["img", "video", "audio", "source", "track"]);
|
|
22
|
+
const LINK_SELECTOR = "base, a, link, area, img, video, audio, source, track";
|
|
23
|
+
/**
|
|
24
|
+
* Extract every absolute link URL (`href` and `src`) from an HTML document,
|
|
25
|
+
* resolved against `baseUrl`. `href` links are listed before `src` links.
|
|
26
|
+
*
|
|
27
|
+
* Async because HTML scanning runs through the runtime's `HTMLRewriter`, which
|
|
28
|
+
* also means links inside comments are ignored without a separate stripping
|
|
29
|
+
* pass.
|
|
30
|
+
*/
|
|
31
|
+
export async function extractLinks(html, baseUrl) {
|
|
32
|
+
const elements = await scanElements(html, LINK_SELECTOR, ["href", "src"]);
|
|
33
|
+
// The first <base href> anywhere in the document governs relative resolution.
|
|
34
|
+
let documentBase = baseUrl;
|
|
35
|
+
for (const el of elements) {
|
|
36
|
+
if (el.name === "base" && el.attrs.href) {
|
|
37
|
+
documentBase = resolveUrl(el.attrs.href, baseUrl) ?? baseUrl;
|
|
38
|
+
break;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
const links = [];
|
|
42
|
+
const collect = (tags, attr) => {
|
|
43
|
+
for (const el of elements) {
|
|
44
|
+
if (!tags.has(el.name)) {
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
const value = el.attrs[attr];
|
|
48
|
+
if (value === null || value === undefined || value === "") {
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
const resolved = resolveUrl(value, documentBase);
|
|
52
|
+
if (resolved !== null) {
|
|
53
|
+
links.push(resolved);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
collect(HREF_TAGS, "href");
|
|
58
|
+
collect(SRC_TAGS, "src");
|
|
59
|
+
return links;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Characters that unambiguously continue a URL token: an alphanumeric, or a
|
|
63
|
+
* structural delimiter (path / query / fragment / userinfo). One of these
|
|
64
|
+
* abutting the target means the target is part of a longer URL.
|
|
65
|
+
*/
|
|
66
|
+
const URL_CORE_CHAR = /[A-Za-z0-9_/\-~%+=&?#@]/;
|
|
67
|
+
/**
|
|
68
|
+
* The full RFC 3986 URL character set (unreserved + reserved + `%`). Punctuation
|
|
69
|
+
* such as `.` `,` `;` `)` `]` is valid inside a URL but also routinely trails or
|
|
70
|
+
* wraps one in prose, so on its own it does not prove continuation — only when
|
|
71
|
+
* it is itself followed by a {@link URL_CORE_CHAR} (e.g. the `.` in `…/post.html`).
|
|
72
|
+
*/
|
|
73
|
+
const URL_CHAR = /[A-Za-z0-9\-._~:/?#[\]@!$&'()*+,;=%]/;
|
|
74
|
+
/**
|
|
75
|
+
* Whether `body` contains `target` as a standalone URL token — present, with
|
|
76
|
+
* neither neighbour continuing a URL. This rejects the over-matches a bare
|
|
77
|
+
* substring admits (`…/post` inside `…/posting`, `…/target` inside
|
|
78
|
+
* `…/target/extra`, or the target as a suffix of a longer URL) while still
|
|
79
|
+
* accepting a target trailed by sentence punctuation or wrapped in brackets.
|
|
80
|
+
*/
|
|
81
|
+
function textHasUrlToken(body, target) {
|
|
82
|
+
for (let from = body.indexOf(target); from !== -1; from = body.indexOf(target, from + 1)) {
|
|
83
|
+
const before = from === 0 ? "" : (body[from - 1] ?? "");
|
|
84
|
+
const after = body[from + target.length] ?? "";
|
|
85
|
+
const afterNext = body[from + target.length + 1] ?? "";
|
|
86
|
+
// A preceding core URL char makes the target a suffix of a longer URL.
|
|
87
|
+
const beforeContinues = URL_CORE_CHAR.test(before);
|
|
88
|
+
// A following core URL char continues the URL; a punctuation URL char only
|
|
89
|
+
// continues it when itself followed by a core char (so `.html` continues,
|
|
90
|
+
// but a sentence-ending `.` or a wrapping `)` is a boundary).
|
|
91
|
+
const afterContinues = URL_CORE_CHAR.test(after) ||
|
|
92
|
+
(URL_CHAR.test(after) && URL_CORE_CHAR.test(afterNext));
|
|
93
|
+
if (!beforeContinues && !afterContinues) {
|
|
94
|
+
return true;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Whether any string value within a parsed JSON value equals `target` exactly.
|
|
101
|
+
* Webmention §3.2.2 requires an exact match of the target URL in a non-HTML
|
|
102
|
+
* source, so a JSON body is walked for a string property value identical to the
|
|
103
|
+
* target rather than substring-scanned.
|
|
104
|
+
*/
|
|
105
|
+
function jsonHasTargetValue(value, target) {
|
|
106
|
+
if (typeof value === "string") {
|
|
107
|
+
return value === target;
|
|
108
|
+
}
|
|
109
|
+
if (Array.isArray(value)) {
|
|
110
|
+
return value.some((item) => jsonHasTargetValue(item, target));
|
|
111
|
+
}
|
|
112
|
+
if (value !== null && typeof value === "object") {
|
|
113
|
+
return Object.values(value).some((item) => jsonHasTargetValue(item, target));
|
|
114
|
+
}
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Decide whether `body` (a fetched source document) links to `target`.
|
|
119
|
+
*
|
|
120
|
+
* HTML bodies are scanned for an `href`/`src` resolving to the target. Other
|
|
121
|
+
* content types require an **exact** match of the target URL (Webmention
|
|
122
|
+
* §3.2.2), not a loose substring: a JSON body must carry a string value equal to
|
|
123
|
+
* the target, and any other (e.g. plain text) body must contain the target as a
|
|
124
|
+
* standalone URL token.
|
|
125
|
+
*/
|
|
126
|
+
export async function sourceLinksTo(body, target, baseUrl, contentType) {
|
|
127
|
+
const normalizedTarget = resolveUrl(target, target);
|
|
128
|
+
if (normalizedTarget === null) {
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
if (isHtmlContentType(contentType)) {
|
|
132
|
+
return (await extractLinks(body, baseUrl)).some((link) => link === normalizedTarget);
|
|
133
|
+
}
|
|
134
|
+
if (isJsonContentType(contentType)) {
|
|
135
|
+
let parsed;
|
|
136
|
+
try {
|
|
137
|
+
parsed = JSON.parse(body);
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
// A body that claims to be JSON but does not parse cannot exact-match.
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
return (jsonHasTargetValue(parsed, target) ||
|
|
144
|
+
(normalizedTarget !== target &&
|
|
145
|
+
jsonHasTargetValue(parsed, normalizedTarget)));
|
|
146
|
+
}
|
|
147
|
+
return (textHasUrlToken(body, target) ||
|
|
148
|
+
(normalizedTarget !== target && textHasUrlToken(body, normalizedTarget)));
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Record a verification outcome on both seams (sanitized hosts only) and return
|
|
152
|
+
* the result. The counter mirrors the log so "verification success rate" is
|
|
153
|
+
* chartable from the `links`/`status` fields.
|
|
154
|
+
*/
|
|
155
|
+
function recordVerifyOutcome(logger, metrics, source, target, result) {
|
|
156
|
+
const fields = {
|
|
157
|
+
sourceHost: hostFromUrl(source),
|
|
158
|
+
targetHost: hostFromUrl(target),
|
|
159
|
+
links: result.links,
|
|
160
|
+
status: result.status,
|
|
161
|
+
};
|
|
162
|
+
logger.info(WebmentionLogEvent.VerifyCompleted, fields);
|
|
163
|
+
metrics.count(WebmentionLogEvent.VerifyCompleted, fields);
|
|
164
|
+
return result;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Fetch `source` and verify that it links to `target`.
|
|
168
|
+
*
|
|
169
|
+
* Fetches through the SSRF-safe wrapper ({@link safeFetch}): the source host —
|
|
170
|
+
* and every redirect hop — is validated against private/loopback/link-local
|
|
171
|
+
* ranges, redirects are capped, and the request is bounded by a timeout.
|
|
172
|
+
* Relative links resolve against the final URL. A failed, blocked, or non-2xx
|
|
173
|
+
* fetch yields `{ links: false }` — a removed/unreachable source no longer
|
|
174
|
+
* endorses the mention.
|
|
175
|
+
*/
|
|
176
|
+
export async function verifySource(source, target, options) {
|
|
177
|
+
const doFetch = options?.fetch ?? ((input, init) => fetch(input, init));
|
|
178
|
+
const logger = options?.logger ?? noopLogger;
|
|
179
|
+
const metrics = options?.metrics ?? noopMetrics;
|
|
180
|
+
let response;
|
|
181
|
+
let base;
|
|
182
|
+
try {
|
|
183
|
+
const result = await safeFetch(doFetch, source, { method: "GET", headers: { accept: "text/html, */*" } }, { logger, metrics });
|
|
184
|
+
response = result.response;
|
|
185
|
+
base = result.url;
|
|
186
|
+
}
|
|
187
|
+
catch (err) {
|
|
188
|
+
// A blocked attempt is already logged as `ssrf.blocked` inside safeFetch;
|
|
189
|
+
// record the verification-level failure too so the outcome isn't silent.
|
|
190
|
+
logger.debug(WebmentionLogEvent.VerifyFetchFailed, {
|
|
191
|
+
sourceHost: hostFromUrl(source),
|
|
192
|
+
error: err instanceof Error ? err.name : "unknown",
|
|
193
|
+
});
|
|
194
|
+
return { links: false, status: 0 };
|
|
195
|
+
}
|
|
196
|
+
if (!response.ok) {
|
|
197
|
+
return recordVerifyOutcome(logger, metrics, source, target, {
|
|
198
|
+
links: false,
|
|
199
|
+
status: response.status,
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
203
|
+
const body = await readBodyCapped(response);
|
|
204
|
+
if (body === null) {
|
|
205
|
+
// Unreadable or oversized body: treat as no longer endorsing the mention.
|
|
206
|
+
return recordVerifyOutcome(logger, metrics, source, target, {
|
|
207
|
+
links: false,
|
|
208
|
+
status: response.status,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
return recordVerifyOutcome(logger, metrics, source, target, {
|
|
212
|
+
links: await sourceLinksTo(body, target, base, contentType),
|
|
213
|
+
status: response.status,
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
//# sourceMappingURL=verify.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"verify.js","sourceRoot":"","sources":["../src/verify.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EACL,WAAW,EACX,UAAU,EACV,WAAW,GAGZ,MAAM,UAAU,CAAC;AAClB,OAAO,EACL,iBAAiB,EACjB,iBAAiB,EACjB,UAAU,EACV,YAAY,GACb,MAAM,QAAQ,CAAC;AAChB,OAAO,EAAE,cAAc,EAAkB,MAAM,SAAS,CAAC;AACzD,OAAO,EAAE,kBAAkB,EAAE,MAAM,OAAO,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,iEAAiE;AACjE,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;AACjD,gEAAgE;AAChE,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;AAEvE,MAAM,aAAa,GAAG,uDAAuD,CAAC;AAE9E;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,IAAY,EACZ,OAAe;IAEf,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,IAAI,EAAE,aAAa,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;IAC1E,8EAA8E;IAC9E,IAAI,YAAY,GAAG,OAAO,CAAC;IAC3B,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,IAAI,EAAE,CAAC,IAAI,KAAK,MAAM,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACxC,YAAY,GAAG,UAAU,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,OAAO,CAAC;YAC7D,MAAM;QACR,CAAC;IACH,CAAC;IACD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAG,CAAC,IAAyB,EAAE,IAAoB,EAAE,EAAE;QAClE,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvB,SAAS;YACX,CAAC;YACD,MAAM,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC7B,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,EAAE,EAAE,CAAC;gBAC1D,SAAS;YACX,CAAC;YACD,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;YACjD,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;gBACtB,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC,CAAC;IACF,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC3B,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IACzB,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,aAAa,GAAG,yBAAyB,CAAC;AAEhD;;;;;GAKG;AACH,MAAM,QAAQ,GAAG,sCAAsC,CAAC;AAExD;;;;;;GAMG;AACH,SAAS,eAAe,CAAC,IAAY,EAAE,MAAc;IACnD,KACE,IAAI,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAC/B,IAAI,KAAK,CAAC,CAAC,EACX,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,GAAG,CAAC,CAAC,EACrC,CAAC;QACD,MAAM,MAAM,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACxD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACvD,uEAAuE;QACvE,MAAM,eAAe,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnD,2EAA2E;QAC3E,0EAA0E;QAC1E,8DAA8D;QAC9D,MAAM,cAAc,GAClB,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC;YACzB,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,aAAa,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;QAC1D,IAAI,CAAC,eAAe,IAAI,CAAC,cAAc,EAAE,CAAC;YACxC,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;GAKG;AACH,SAAS,kBAAkB,CAAC,KAAc,EAAE,MAAc;IACxD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,OAAO,KAAK,KAAK,MAAM,CAAC;IAC1B,CAAC;IACD,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,kBAAkB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;IAChE,CAAC;IACD,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAChD,OAAO,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CACxC,kBAAkB,CAAC,IAAI,EAAE,MAAM,CAAC,CACjC,CAAC;IACJ,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAY,EACZ,MAAc,EACd,OAAe,EACf,WAAmB;IAEnB,MAAM,gBAAgB,GAAG,UAAU,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpD,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,iBAAiB,CAAC,WAAW,CAAC,EAAE,CAAC;QACnC,OAAO,CAAC,MAAM,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAC7C,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,gBAAgB,CACpC,CAAC;IACJ,CAAC;IACD,IAAI,iBAAiB,CAAC,WAAW,CAAC,EAAE,CAAC;QACnC,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACP,uEAAuE;YACvE,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,CACL,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;YAClC,CAAC,gBAAgB,KAAK,MAAM;gBAC1B,kBAAkB,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC,CAChD,CAAC;IACJ,CAAC;IACD,OAAO,CACL,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC;QAC7B,CAAC,gBAAgB,KAAK,MAAM,IAAI,eAAe,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC,CACzE,CAAC;AACJ,CAAC;AAYD;;;;GAIG;AACH,SAAS,mBAAmB,CAC1B,MAAc,EACd,OAAgB,EAChB,MAAc,EACd,MAAc,EACd,MAAoB;IAEpB,MAAM,MAAM,GAAG;QACb,UAAU,EAAE,WAAW,CAAC,MAAM,CAAC;QAC/B,UAAU,EAAE,WAAW,CAAC,MAAM,CAAC;QAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,MAAM,EAAE,MAAM,CAAC,MAAM;KACtB,CAAC;IACF,MAAM,CAAC,IAAI,CAAC,kBAAkB,CAAC,eAAe,EAAE,MAAM,CAAC,CAAC;IACxD,OAAO,CAAC,KAAK,CAAC,kBAAkB,CAAC,eAAe,EAAE,MAAM,CAAC,CAAC;IAC1D,OAAO,MAAM,CAAC;AAChB,CAAC;AAUD;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,MAAc,EACd,MAAc,EACd,OAAuB;IAEvB,MAAM,OAAO,GACX,OAAO,EAAE,KAAK,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,UAAU,CAAC;IAC7C,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,WAAW,CAAC;IAEhD,IAAI,QAAkB,CAAC;IACvB,IAAI,IAAY,CAAC;IACjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAC5B,OAAO,EACP,MAAM,EACN,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,gBAAgB,EAAE,EAAE,EACxD,EAAE,MAAM,EAAE,OAAO,EAAE,CACpB,CAAC;QACF,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QAC3B,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC;IACpB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,0EAA0E;QAC1E,yEAAyE;QACzE,MAAM,CAAC,KAAK,CAAC,kBAAkB,CAAC,iBAAiB,EAAE;YACjD,UAAU,EAAE,WAAW,CAAC,MAAM,CAAC;YAC/B,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;SACnD,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IACrC,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,OAAO,mBAAmB,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE;YAC1D,KAAK,EAAE,KAAK;YACZ,MAAM,EAAE,QAAQ,CAAC,MAAM;SACxB,CAAC,CAAC;IACL,CAAC;IAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IAC/D,MAAM,IAAI,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC5C,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,0EAA0E;QAC1E,OAAO,mBAAmB,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE;YAC1D,KAAK,EAAE,KAAK;YACZ,MAAM,EAAE,QAAQ,CAAC,MAAM;SACxB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,mBAAmB,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE;QAC1D,KAAK,EAAE,MAAM,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,WAAW,CAAC;QAC3D,MAAM,EAAE,QAAQ,CAAC,MAAM;KACxB,CAAC,CAAC;AACL,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@dwk/webmention",
|
|
3
|
+
"version": "0.1.0-beta.0",
|
|
4
|
+
"description": "Webmention receiver (async verification queue) and sender.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"webmention",
|
|
7
|
+
"indieweb",
|
|
8
|
+
"w3c",
|
|
9
|
+
"cloudflare-workers"
|
|
10
|
+
],
|
|
11
|
+
"type": "module",
|
|
12
|
+
"license": "ISC",
|
|
13
|
+
"author": "David W. Keith <me@dwk.io>",
|
|
14
|
+
"homepage": "https://github.com/davidwkeith/workers/tree/main/packages/webmention#readme",
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "git+https://github.com/davidwkeith/workers.git",
|
|
18
|
+
"directory": "packages/webmention"
|
|
19
|
+
},
|
|
20
|
+
"sideEffects": false,
|
|
21
|
+
"main": "./dist/index.js",
|
|
22
|
+
"types": "./dist/index.d.ts",
|
|
23
|
+
"exports": {
|
|
24
|
+
".": {
|
|
25
|
+
"types": "./dist/index.d.ts",
|
|
26
|
+
"import": "./dist/index.js"
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"files": [
|
|
30
|
+
"dist",
|
|
31
|
+
"src",
|
|
32
|
+
"!src/**/*.test.ts"
|
|
33
|
+
],
|
|
34
|
+
"publishConfig": {
|
|
35
|
+
"access": "public"
|
|
36
|
+
},
|
|
37
|
+
"dependencies": {
|
|
38
|
+
"@dwk/log": "0.1.0-beta.0"
|
|
39
|
+
},
|
|
40
|
+
"scripts": {
|
|
41
|
+
"build": "tsc -p tsconfig.build.json",
|
|
42
|
+
"typecheck": "tsc -p tsconfig.json",
|
|
43
|
+
"clean": "rm -rf dist"
|
|
44
|
+
}
|
|
45
|
+
}
|
package/src/discovery.ts
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@dwk/webmention` — Webmention endpoint discovery (sender side).
|
|
3
|
+
*
|
|
4
|
+
* Given a target URL, find its declared Webmention endpoint following the W3C
|
|
5
|
+
* discovery algorithm: the HTTP `Link` header (`rel=webmention`) wins, then the
|
|
6
|
+
* first `<link>`/`<a rel="webmention">` in document order, with relative URLs
|
|
7
|
+
* resolved against the (post-redirect) document URL. The legacy
|
|
8
|
+
* `http://webmention.org/` rel value is also accepted. See
|
|
9
|
+
* `spec/packages/webmention.md`.
|
|
10
|
+
*
|
|
11
|
+
* @packageDocumentation
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { noopLogger, noopMetrics, type Logger, type Metrics } from "@dwk/log";
|
|
15
|
+
import {
|
|
16
|
+
isHtmlContentType,
|
|
17
|
+
parseLinkHeader,
|
|
18
|
+
resolveUrl,
|
|
19
|
+
scanElements,
|
|
20
|
+
splitTokens,
|
|
21
|
+
} from "./html";
|
|
22
|
+
import { readBodyCapped, type FetchLike } from "./fetch";
|
|
23
|
+
import { safeFetch } from "./safe-fetch";
|
|
24
|
+
|
|
25
|
+
// The legacy rel values predating the standardized `webmention` token. They are
|
|
26
|
+
// absolute URLs, so a candidate rel is normalized through `URL` before being
|
|
27
|
+
// compared: `http://webmention.org` and `http://webmention.org/` then coincide
|
|
28
|
+
// (tolerating the trailing slash developers commonly omit), while a look-alike
|
|
29
|
+
// host like `http://webmention.org.evil.example/` parses to a different href and
|
|
30
|
+
// is rejected — which a bare `startsWith` prefix test would not catch.
|
|
31
|
+
const LEGACY_REL_HREFS = new Set([
|
|
32
|
+
"http://webmention.org/",
|
|
33
|
+
"http://webmention.org/webmention",
|
|
34
|
+
]);
|
|
35
|
+
|
|
36
|
+
function isWebmentionRel(rel: string): boolean {
|
|
37
|
+
if (rel.toLowerCase() === "webmention") {
|
|
38
|
+
return true;
|
|
39
|
+
}
|
|
40
|
+
let href: string;
|
|
41
|
+
try {
|
|
42
|
+
href = new URL(rel).href;
|
|
43
|
+
} catch {
|
|
44
|
+
// Not the standard token and not an absolute URL — not a webmention rel.
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
return LEGACY_REL_HREFS.has(href);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Find the Webmention endpoint declared by a fetched document.
|
|
52
|
+
*
|
|
53
|
+
* Pass the `Link` header value, the response body, and the document URL (used
|
|
54
|
+
* as the base for relative resolution). Returns the absolute endpoint URL or
|
|
55
|
+
* `null` when none is advertised. Async because HTML scanning runs through the
|
|
56
|
+
* runtime's `HTMLRewriter`.
|
|
57
|
+
*/
|
|
58
|
+
export async function findWebmentionEndpoint(
|
|
59
|
+
linkHeader: string | null,
|
|
60
|
+
html: string,
|
|
61
|
+
documentUrl: string,
|
|
62
|
+
): Promise<string | null> {
|
|
63
|
+
// 1. HTTP Link header wins, in header order.
|
|
64
|
+
for (const entry of parseLinkHeader(linkHeader)) {
|
|
65
|
+
if (entry.rels.some(isWebmentionRel)) {
|
|
66
|
+
return resolveUrl(entry.uri, documentUrl);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// 2. Fall back to the first <link>/<a rel="webmention"> in document order,
|
|
70
|
+
// resolving relative hrefs against the document's <base href> if present.
|
|
71
|
+
// HTMLRewriter does not report elements inside comments, so a commented-out
|
|
72
|
+
// endpoint is ignored without a separate stripping pass.
|
|
73
|
+
if (html === "") {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
const elements = await scanElements(html, "base, link, a", ["rel", "href"]);
|
|
77
|
+
// The first <base href> anywhere in the document governs relative resolution.
|
|
78
|
+
let documentBase = documentUrl;
|
|
79
|
+
for (const el of elements) {
|
|
80
|
+
if (el.name === "base" && el.attrs.href) {
|
|
81
|
+
documentBase = resolveUrl(el.attrs.href, documentUrl) ?? documentUrl;
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
for (const el of elements) {
|
|
86
|
+
if (el.name === "base") {
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
const rels = splitTokens(el.attrs.rel ?? null);
|
|
90
|
+
if (!rels.some(isWebmentionRel)) {
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
const href = el.attrs.href;
|
|
94
|
+
// A tag with no `href` attribute at all is malformed — skip it and keep
|
|
95
|
+
// looking (test 20). An empty `href=""` is valid and advertises the
|
|
96
|
+
// document itself as the endpoint (test 15).
|
|
97
|
+
if (href === null || href === undefined) {
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
return resolveUrl(href, documentBase);
|
|
101
|
+
}
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/** Options for {@link discoverEndpoint}. */
|
|
106
|
+
export interface DiscoverOptions {
|
|
107
|
+
/** `fetch` implementation to use; defaults to the global `fetch`. */
|
|
108
|
+
readonly fetch?: FetchLike;
|
|
109
|
+
/** Logger passed through to the SSRF-safe fetch; defaults to a no-op. */
|
|
110
|
+
readonly logger?: Logger;
|
|
111
|
+
/** Metrics sink passed through to the SSRF-safe fetch; defaults to a no-op. */
|
|
112
|
+
readonly metrics?: Metrics;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Fetch `target` and discover its Webmention endpoint.
|
|
117
|
+
*
|
|
118
|
+
* Fetches through the SSRF-safe wrapper ({@link safeFetch}): the target host —
|
|
119
|
+
* and every redirect hop — is validated against private/loopback/link-local
|
|
120
|
+
* ranges, redirects are capped, and the request is bounded by a timeout. The
|
|
121
|
+
* endpoint resolves against the final URL. Returns the absolute endpoint URL,
|
|
122
|
+
* or `null` when discovery finds none or the fetch fails or is blocked.
|
|
123
|
+
*/
|
|
124
|
+
export async function discoverEndpoint(
|
|
125
|
+
target: string,
|
|
126
|
+
options?: DiscoverOptions,
|
|
127
|
+
): Promise<string | null> {
|
|
128
|
+
const doFetch: FetchLike =
|
|
129
|
+
options?.fetch ?? ((input, init) => fetch(input, init));
|
|
130
|
+
const logger = options?.logger ?? noopLogger;
|
|
131
|
+
const metrics = options?.metrics ?? noopMetrics;
|
|
132
|
+
|
|
133
|
+
let response: Response;
|
|
134
|
+
let base: string;
|
|
135
|
+
try {
|
|
136
|
+
const result = await safeFetch(
|
|
137
|
+
doFetch,
|
|
138
|
+
target,
|
|
139
|
+
{ method: "GET", headers: { accept: "text/html, */*" } },
|
|
140
|
+
{ logger, metrics },
|
|
141
|
+
);
|
|
142
|
+
response = result.response;
|
|
143
|
+
base = result.url;
|
|
144
|
+
} catch {
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const fromHeader = await findWebmentionEndpoint(
|
|
149
|
+
response.headers.get("link"),
|
|
150
|
+
"",
|
|
151
|
+
base,
|
|
152
|
+
);
|
|
153
|
+
if (fromHeader !== null) {
|
|
154
|
+
return fromHeader;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
158
|
+
if (!isHtmlContentType(contentType)) {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const html = await readBodyCapped(response);
|
|
163
|
+
if (html === null) {
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
return findWebmentionEndpoint(null, html, base);
|
|
167
|
+
}
|