@fedify/vocab-runtime 2.0.0-dev.1908 → 2.0.0-dev.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/deno.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fedify/vocab-runtime",
3
- "version": "2.0.0-dev.1908+c31cc639",
3
+ "version": "2.0.0-dev.85+a55c8362",
4
4
  "license": "MIT",
5
5
  "exports": {
6
6
  ".": "./src/mod.ts"
@@ -12,6 +12,7 @@
12
12
  "url": "https://hongminhee.org/"
13
13
  },
14
14
  "imports": {
15
+ "@multiformats/base-x": "npm:@multiformats/base-x@^4.0.1",
15
16
  "asn1js": "npm:asn1js@^3.0.6",
16
17
  "byte-encodings": "npm:byte-encodings@^1.0.11",
17
18
  "fetch-mock": "npm:fetch-mock@^12.5.4",
package/dist/mod.cjs CHANGED
@@ -22,6 +22,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
22
22
 
23
23
  //#endregion
24
24
  const __logtape_logtape = __toESM(require("@logtape/logtape"));
25
+ const __opentelemetry_api = __toESM(require("@opentelemetry/api"));
25
26
  const node_process = __toESM(require("node:process"));
26
27
  const node_dns_promises = __toESM(require("node:dns/promises"));
27
28
  const node_net = __toESM(require("node:net"));
@@ -4178,6 +4179,43 @@ const preloadedContexts = {
4178
4179
  };
4179
4180
  var contexts_default = preloadedContexts;
4180
4181
 
4182
+ //#endregion
4183
+ //#region deno.json
4184
+ var name = "@fedify/vocab-runtime";
4185
+ var version = "2.0.0-dev.85+a55c8362";
4186
+ var license = "MIT";
4187
+ var exports$1 = { ".": "./src/mod.ts" };
4188
+ var description = "Runtime library for @fedify/vocab";
4189
+ var author = {
4190
+ "name": "Hong Minhee",
4191
+ "email": "hong@minhee.org",
4192
+ "url": "https://hongminhee.org/"
4193
+ };
4194
+ var imports = {
4195
+ "@multiformats/base-x": "npm:@multiformats/base-x@^4.0.1",
4196
+ "asn1js": "npm:asn1js@^3.0.6",
4197
+ "byte-encodings": "npm:byte-encodings@^1.0.11",
4198
+ "fetch-mock": "npm:fetch-mock@^12.5.4",
4199
+ "multicodec": "npm:multicodec@^3.2.1",
4200
+ "pkijs": "npm:pkijs@^3.2.5"
4201
+ };
4202
+ var exclude = ["dist", "node_modules"];
4203
+ var tasks = {
4204
+ "check": "deno fmt --check && deno lint && deno check src/*.ts",
4205
+ "test": "deno test"
4206
+ };
4207
+ var deno_default = {
4208
+ name,
4209
+ version,
4210
+ license,
4211
+ exports: exports$1,
4212
+ description,
4213
+ author,
4214
+ imports,
4215
+ exclude,
4216
+ tasks
4217
+ };
4218
+
4181
4219
  //#endregion
4182
4220
  //#region src/link.ts
4183
4221
  const parametersNeedLowerCase = ["rel", "type"];
@@ -4360,42 +4398,6 @@ var HttpHeaderLink = class HttpHeaderLink {
4360
4398
  }
4361
4399
  };
4362
4400
 
4363
- //#endregion
4364
- //#region deno.json
4365
- var name = "@fedify/vocab-runtime";
4366
- var version = "2.0.0-dev.1908+c31cc639";
4367
- var license = "MIT";
4368
- var exports$1 = { ".": "./src/mod.ts" };
4369
- var description = "Runtime library for @fedify/vocab";
4370
- var author = {
4371
- "name": "Hong Minhee",
4372
- "email": "hong@minhee.org",
4373
- "url": "https://hongminhee.org/"
4374
- };
4375
- var imports = {
4376
- "asn1js": "npm:asn1js@^3.0.6",
4377
- "byte-encodings": "npm:byte-encodings@^1.0.11",
4378
- "fetch-mock": "npm:fetch-mock@^12.5.4",
4379
- "multicodec": "npm:multicodec@^3.2.1",
4380
- "pkijs": "npm:pkijs@^3.2.5"
4381
- };
4382
- var exclude = ["dist", "node_modules"];
4383
- var tasks = {
4384
- "check": "deno fmt --check && deno lint && deno check src/*.ts",
4385
- "test": "deno test"
4386
- };
4387
- var deno_default = {
4388
- name,
4389
- version,
4390
- license,
4391
- exports: exports$1,
4392
- description,
4393
- author,
4394
- imports,
4395
- exclude,
4396
- tasks
4397
- };
4398
-
4399
4401
  //#endregion
4400
4402
  //#region src/request.ts
4401
4403
  /**
@@ -4582,29 +4584,38 @@ async function getRemoteDocument(url, response, fetch$1) {
4582
4584
  }
4583
4585
  let document;
4584
4586
  if (!jsonLd && (contentType === "text/html" || contentType?.startsWith("text/html;") || contentType === "application/xhtml+xml" || contentType?.startsWith("application/xhtml+xml;"))) {
4585
- const p = /<(a|link)((\s+[a-z][a-z:_-]*=("[^"]*"|'[^']*'|[^\s>]+))+)\s*\/?>/gi;
4586
- const p2 = /\s+([a-z][a-z:_-]*)=("([^"]*)"|'([^']*)'|([^\s>]+))/gi;
4587
+ const MAX_HTML_SIZE = 1024 * 1024;
4587
4588
  const html = await response.text();
4588
- let m;
4589
- const rawAttribs = [];
4590
- while ((m = p.exec(html)) !== null) rawAttribs.push(m[2]);
4591
- for (const rawAttrs of rawAttribs) {
4592
- let m2;
4593
- const attribs = {};
4594
- while ((m2 = p2.exec(rawAttrs)) !== null) {
4595
- const key = m2[1].toLowerCase();
4596
- const value = m2[3] ?? m2[4] ?? m2[5] ?? "";
4597
- attribs[key] = value;
4598
- }
4599
- if (attribs.rel === "alternate" && "type" in attribs && (attribs.type === "application/activity+json" || attribs.type === "application/ld+json" || attribs.type.startsWith("application/ld+json;")) && "href" in attribs && new URL(attribs.href, docUrl).href !== docUrl.href) {
4600
- logger.debug("Found alternate document: {alternateUrl} from {url}", {
4601
- alternateUrl: attribs.href,
4602
- url: documentUrl
4603
- });
4604
- return await fetch$1(new URL(attribs.href, docUrl).href);
4589
+ if (html.length > MAX_HTML_SIZE) {
4590
+ logger.warn("HTML response too large, skipping alternate link discovery: {url}", {
4591
+ url: documentUrl,
4592
+ size: html.length
4593
+ });
4594
+ document = JSON.parse(html);
4595
+ } else {
4596
+ const tagPattern = /<(a|link)\s+([^>]*?)\s*\/?>/gi;
4597
+ const attrPattern = /([a-z][a-z:_-]*)=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/gi;
4598
+ let tagMatch;
4599
+ while ((tagMatch = tagPattern.exec(html)) !== null) {
4600
+ const tagContent = tagMatch[2];
4601
+ let attrMatch;
4602
+ const attribs = {};
4603
+ attrPattern.lastIndex = 0;
4604
+ while ((attrMatch = attrPattern.exec(tagContent)) !== null) {
4605
+ const key = attrMatch[1].toLowerCase();
4606
+ const value = attrMatch[2] ?? attrMatch[3] ?? attrMatch[4] ?? "";
4607
+ attribs[key] = value;
4608
+ }
4609
+ if (attribs.rel === "alternate" && "type" in attribs && (attribs.type === "application/activity+json" || attribs.type === "application/ld+json" || attribs.type.startsWith("application/ld+json;")) && "href" in attribs && new URL(attribs.href, docUrl).href !== docUrl.href) {
4610
+ logger.debug("Found alternate document: {alternateUrl} from {url}", {
4611
+ alternateUrl: attribs.href,
4612
+ url: documentUrl
4613
+ });
4614
+ return await fetch$1(new URL(attribs.href, docUrl).href);
4615
+ }
4605
4616
  }
4617
+ document = JSON.parse(html);
4606
4618
  }
4607
- document = JSON.parse(html);
4608
4619
  } else document = await response.json();
4609
4620
  logger.debug("Fetched document: {status} {url} {headers}", {
4610
4621
  status: response.status,
@@ -4635,6 +4646,8 @@ async function getRemoteDocument(url, response, fetch$1) {
4635
4646
  * @since 1.3.0
4636
4647
  */
4637
4648
  function getDocumentLoader({ allowPrivateAddress, skipPreloadedContexts, userAgent } = {}) {
4649
+ const tracerProvider = __opentelemetry_api.trace.getTracerProvider();
4650
+ const tracer = tracerProvider.getTracer(deno_default.name, deno_default.version);
4638
4651
  async function load(url, options) {
4639
4652
  options?.signal?.throwIfAborted();
4640
4653
  if (!skipPreloadedContexts && url in contexts_default) {
@@ -4654,14 +4667,38 @@ function getDocumentLoader({ allowPrivateAddress, skipPreloadedContexts, userAge
4654
4667
  });
4655
4668
  throw error;
4656
4669
  }
4657
- const request = createActivityPubRequest(url, { userAgent });
4658
- logRequest(logger, request);
4659
- const response = await fetch(request, {
4660
- redirect: "manual",
4661
- signal: options?.signal
4670
+ return await tracer.startActiveSpan("activitypub.fetch_document", {
4671
+ kind: __opentelemetry_api.SpanKind.CLIENT,
4672
+ attributes: { "url.full": url }
4673
+ }, async (span) => {
4674
+ try {
4675
+ const request = createActivityPubRequest(url, { userAgent });
4676
+ logRequest(logger, request);
4677
+ const response = await fetch(request, {
4678
+ redirect: "manual",
4679
+ signal: options?.signal
4680
+ });
4681
+ span.setAttribute("http.response.status_code", response.status);
4682
+ if (response.status >= 300 && response.status < 400 && response.headers.has("Location")) {
4683
+ const redirectUrl = response.headers.get("Location");
4684
+ span.setAttribute("http.redirect.url", redirectUrl);
4685
+ return await load(redirectUrl, options);
4686
+ }
4687
+ const result = await getRemoteDocument(url, response, load);
4688
+ span.setAttribute("docloader.document_url", result.documentUrl);
4689
+ if (result.contextUrl != null) span.setAttribute("docloader.context_url", result.contextUrl);
4690
+ return result;
4691
+ } catch (error) {
4692
+ span.recordException(error);
4693
+ span.setStatus({
4694
+ code: __opentelemetry_api.SpanStatusCode.ERROR,
4695
+ message: String(error)
4696
+ });
4697
+ throw error;
4698
+ } finally {
4699
+ span.end();
4700
+ }
4662
4701
  });
4663
- if (response.status >= 300 && response.status < 400 && response.headers.has("Location")) return load(response.headers.get("Location"), options);
4664
- return getRemoteDocument(url, response, load);
4665
4702
  }
4666
4703
  return load;
4667
4704
  }
package/dist/mod.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { getLogger } from "@logtape/logtape";
2
+ import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
2
3
  import process from "node:process";
3
4
  import { lookup } from "node:dns/promises";
4
5
  import { isIP } from "node:net";
@@ -4155,6 +4156,43 @@ const preloadedContexts = {
4155
4156
  };
4156
4157
  var contexts_default = preloadedContexts;
4157
4158
 
4159
+ //#endregion
4160
+ //#region deno.json
4161
+ var name = "@fedify/vocab-runtime";
4162
+ var version = "2.0.0-dev.85+a55c8362";
4163
+ var license = "MIT";
4164
+ var exports = { ".": "./src/mod.ts" };
4165
+ var description = "Runtime library for @fedify/vocab";
4166
+ var author = {
4167
+ "name": "Hong Minhee",
4168
+ "email": "hong@minhee.org",
4169
+ "url": "https://hongminhee.org/"
4170
+ };
4171
+ var imports = {
4172
+ "@multiformats/base-x": "npm:@multiformats/base-x@^4.0.1",
4173
+ "asn1js": "npm:asn1js@^3.0.6",
4174
+ "byte-encodings": "npm:byte-encodings@^1.0.11",
4175
+ "fetch-mock": "npm:fetch-mock@^12.5.4",
4176
+ "multicodec": "npm:multicodec@^3.2.1",
4177
+ "pkijs": "npm:pkijs@^3.2.5"
4178
+ };
4179
+ var exclude = ["dist", "node_modules"];
4180
+ var tasks = {
4181
+ "check": "deno fmt --check && deno lint && deno check src/*.ts",
4182
+ "test": "deno test"
4183
+ };
4184
+ var deno_default = {
4185
+ name,
4186
+ version,
4187
+ license,
4188
+ exports,
4189
+ description,
4190
+ author,
4191
+ imports,
4192
+ exclude,
4193
+ tasks
4194
+ };
4195
+
4158
4196
  //#endregion
4159
4197
  //#region src/link.ts
4160
4198
  const parametersNeedLowerCase = ["rel", "type"];
@@ -4337,42 +4375,6 @@ var HttpHeaderLink = class HttpHeaderLink {
4337
4375
  }
4338
4376
  };
4339
4377
 
4340
- //#endregion
4341
- //#region deno.json
4342
- var name = "@fedify/vocab-runtime";
4343
- var version = "2.0.0-dev.1908+c31cc639";
4344
- var license = "MIT";
4345
- var exports = { ".": "./src/mod.ts" };
4346
- var description = "Runtime library for @fedify/vocab";
4347
- var author = {
4348
- "name": "Hong Minhee",
4349
- "email": "hong@minhee.org",
4350
- "url": "https://hongminhee.org/"
4351
- };
4352
- var imports = {
4353
- "asn1js": "npm:asn1js@^3.0.6",
4354
- "byte-encodings": "npm:byte-encodings@^1.0.11",
4355
- "fetch-mock": "npm:fetch-mock@^12.5.4",
4356
- "multicodec": "npm:multicodec@^3.2.1",
4357
- "pkijs": "npm:pkijs@^3.2.5"
4358
- };
4359
- var exclude = ["dist", "node_modules"];
4360
- var tasks = {
4361
- "check": "deno fmt --check && deno lint && deno check src/*.ts",
4362
- "test": "deno test"
4363
- };
4364
- var deno_default = {
4365
- name,
4366
- version,
4367
- license,
4368
- exports,
4369
- description,
4370
- author,
4371
- imports,
4372
- exclude,
4373
- tasks
4374
- };
4375
-
4376
4378
  //#endregion
4377
4379
  //#region src/request.ts
4378
4380
  /**
@@ -4559,29 +4561,38 @@ async function getRemoteDocument(url, response, fetch$1) {
4559
4561
  }
4560
4562
  let document;
4561
4563
  if (!jsonLd && (contentType === "text/html" || contentType?.startsWith("text/html;") || contentType === "application/xhtml+xml" || contentType?.startsWith("application/xhtml+xml;"))) {
4562
- const p = /<(a|link)((\s+[a-z][a-z:_-]*=("[^"]*"|'[^']*'|[^\s>]+))+)\s*\/?>/gi;
4563
- const p2 = /\s+([a-z][a-z:_-]*)=("([^"]*)"|'([^']*)'|([^\s>]+))/gi;
4564
+ const MAX_HTML_SIZE = 1024 * 1024;
4564
4565
  const html = await response.text();
4565
- let m;
4566
- const rawAttribs = [];
4567
- while ((m = p.exec(html)) !== null) rawAttribs.push(m[2]);
4568
- for (const rawAttrs of rawAttribs) {
4569
- let m2;
4570
- const attribs = {};
4571
- while ((m2 = p2.exec(rawAttrs)) !== null) {
4572
- const key = m2[1].toLowerCase();
4573
- const value = m2[3] ?? m2[4] ?? m2[5] ?? "";
4574
- attribs[key] = value;
4575
- }
4576
- if (attribs.rel === "alternate" && "type" in attribs && (attribs.type === "application/activity+json" || attribs.type === "application/ld+json" || attribs.type.startsWith("application/ld+json;")) && "href" in attribs && new URL(attribs.href, docUrl).href !== docUrl.href) {
4577
- logger.debug("Found alternate document: {alternateUrl} from {url}", {
4578
- alternateUrl: attribs.href,
4579
- url: documentUrl
4580
- });
4581
- return await fetch$1(new URL(attribs.href, docUrl).href);
4566
+ if (html.length > MAX_HTML_SIZE) {
4567
+ logger.warn("HTML response too large, skipping alternate link discovery: {url}", {
4568
+ url: documentUrl,
4569
+ size: html.length
4570
+ });
4571
+ document = JSON.parse(html);
4572
+ } else {
4573
+ const tagPattern = /<(a|link)\s+([^>]*?)\s*\/?>/gi;
4574
+ const attrPattern = /([a-z][a-z:_-]*)=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/gi;
4575
+ let tagMatch;
4576
+ while ((tagMatch = tagPattern.exec(html)) !== null) {
4577
+ const tagContent = tagMatch[2];
4578
+ let attrMatch;
4579
+ const attribs = {};
4580
+ attrPattern.lastIndex = 0;
4581
+ while ((attrMatch = attrPattern.exec(tagContent)) !== null) {
4582
+ const key = attrMatch[1].toLowerCase();
4583
+ const value = attrMatch[2] ?? attrMatch[3] ?? attrMatch[4] ?? "";
4584
+ attribs[key] = value;
4585
+ }
4586
+ if (attribs.rel === "alternate" && "type" in attribs && (attribs.type === "application/activity+json" || attribs.type === "application/ld+json" || attribs.type.startsWith("application/ld+json;")) && "href" in attribs && new URL(attribs.href, docUrl).href !== docUrl.href) {
4587
+ logger.debug("Found alternate document: {alternateUrl} from {url}", {
4588
+ alternateUrl: attribs.href,
4589
+ url: documentUrl
4590
+ });
4591
+ return await fetch$1(new URL(attribs.href, docUrl).href);
4592
+ }
4582
4593
  }
4594
+ document = JSON.parse(html);
4583
4595
  }
4584
- document = JSON.parse(html);
4585
4596
  } else document = await response.json();
4586
4597
  logger.debug("Fetched document: {status} {url} {headers}", {
4587
4598
  status: response.status,
@@ -4612,6 +4623,8 @@ async function getRemoteDocument(url, response, fetch$1) {
4612
4623
  * @since 1.3.0
4613
4624
  */
4614
4625
  function getDocumentLoader({ allowPrivateAddress, skipPreloadedContexts, userAgent } = {}) {
4626
+ const tracerProvider = trace.getTracerProvider();
4627
+ const tracer = tracerProvider.getTracer(deno_default.name, deno_default.version);
4615
4628
  async function load(url, options) {
4616
4629
  options?.signal?.throwIfAborted();
4617
4630
  if (!skipPreloadedContexts && url in contexts_default) {
@@ -4631,14 +4644,38 @@ function getDocumentLoader({ allowPrivateAddress, skipPreloadedContexts, userAge
4631
4644
  });
4632
4645
  throw error;
4633
4646
  }
4634
- const request = createActivityPubRequest(url, { userAgent });
4635
- logRequest(logger, request);
4636
- const response = await fetch(request, {
4637
- redirect: "manual",
4638
- signal: options?.signal
4647
+ return await tracer.startActiveSpan("activitypub.fetch_document", {
4648
+ kind: SpanKind.CLIENT,
4649
+ attributes: { "url.full": url }
4650
+ }, async (span) => {
4651
+ try {
4652
+ const request = createActivityPubRequest(url, { userAgent });
4653
+ logRequest(logger, request);
4654
+ const response = await fetch(request, {
4655
+ redirect: "manual",
4656
+ signal: options?.signal
4657
+ });
4658
+ span.setAttribute("http.response.status_code", response.status);
4659
+ if (response.status >= 300 && response.status < 400 && response.headers.has("Location")) {
4660
+ const redirectUrl = response.headers.get("Location");
4661
+ span.setAttribute("http.redirect.url", redirectUrl);
4662
+ return await load(redirectUrl, options);
4663
+ }
4664
+ const result = await getRemoteDocument(url, response, load);
4665
+ span.setAttribute("docloader.document_url", result.documentUrl);
4666
+ if (result.contextUrl != null) span.setAttribute("docloader.context_url", result.contextUrl);
4667
+ return result;
4668
+ } catch (error) {
4669
+ span.recordException(error);
4670
+ span.setStatus({
4671
+ code: SpanStatusCode.ERROR,
4672
+ message: String(error)
4673
+ });
4674
+ throw error;
4675
+ } finally {
4676
+ span.end();
4677
+ }
4639
4678
  });
4640
- if (response.status >= 300 && response.status < 400 && response.headers.has("Location")) return load(response.headers.get("Location"), options);
4641
- return getRemoteDocument(url, response, load);
4642
4679
  }
4643
4680
  return load;
4644
4681
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fedify/vocab-runtime",
3
- "version": "2.0.0-dev.1908+c31cc639",
3
+ "version": "2.0.0-dev.85+a55c8362",
4
4
  "homepage": "https://fedify.dev/",
5
5
  "repository": {
6
6
  "type": "git",
@@ -55,8 +55,9 @@
55
55
  "typescript": "^5.9.3"
56
56
  },
57
57
  "dependencies": {
58
- "@logtape/logtape": "^1.1.1",
58
+ "@logtape/logtape": "^1.3.5",
59
59
  "@multiformats/base-x": "^4.0.1",
60
+ "@opentelemetry/api": "^1.9.0",
60
61
  "asn1js": "^3.0.6",
61
62
  "byte-encodings": "^1.0.11",
62
63
  "multicodec": "^3.2.1",
@@ -1,5 +1,5 @@
1
1
  import fetchMock from "fetch-mock";
2
- import { deepStrictEqual, rejects } from "node:assert";
2
+ import { deepStrictEqual, ok, rejects } from "node:assert";
3
3
  import { test } from "node:test";
4
4
  import preloadedContexts from "./contexts.ts";
5
5
  import { getDocumentLoader } from "./docloader.ts";
@@ -361,5 +361,33 @@ test("getDocumentLoader()", async (t) => {
361
361
  );
362
362
  });
363
363
 
364
+ // Regression test for ReDoS vulnerability (CVE-2025-68475)
365
+ // Malicious HTML payload: <a a="b" a="b" ... (unclosed tag)
366
+ // With the vulnerable regex, this causes catastrophic backtracking
367
+ const maliciousPayload = "<a" + ' a="b"'.repeat(30) + " ";
368
+
369
+ fetchMock.get("https://example.com/redos", {
370
+ body: maliciousPayload,
371
+ headers: { "Content-Type": "text/html; charset=utf-8" },
372
+ });
373
+
374
+ await t.test("ReDoS resistance (CVE-2025-68475)", async () => {
375
+ const start = performance.now();
376
+ // The malicious HTML will fail JSON parsing, but the important thing is
377
+ // that it should complete quickly (not hang due to ReDoS)
378
+ await rejects(
379
+ () => fetchDocumentLoader("https://example.com/redos"),
380
+ SyntaxError,
381
+ );
382
+ const elapsed = performance.now() - start;
383
+
384
+ // Should complete in under 1 second. With the vulnerable regex,
385
+ // this would take 14+ seconds for 30 repetitions.
386
+ ok(
387
+ elapsed < 1000,
388
+ `Potential ReDoS vulnerability detected: ${elapsed}ms (expected < 1000ms)`,
389
+ );
390
+ });
391
+
364
392
  fetchMock.hardReset();
365
393
  });
package/src/docloader.ts CHANGED
@@ -1,4 +1,6 @@
1
1
  import { getLogger } from "@logtape/logtape";
2
+ import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
3
+ import metadata from "../deno.json" with { type: "json" };
2
4
  import preloadedContexts from "./contexts.ts";
3
5
  import { HttpHeaderLink } from "./link.ts";
4
6
  import {
@@ -189,37 +191,55 @@ export async function getRemoteDocument(
189
191
  contentType === "application/xhtml+xml" ||
190
192
  contentType?.startsWith("application/xhtml+xml;"))
191
193
  ) {
192
- const p =
193
- /<(a|link)((\s+[a-z][a-z:_-]*=("[^"]*"|'[^']*'|[^\s>]+))+)\s*\/?>/ig;
194
- const p2 = /\s+([a-z][a-z:_-]*)=("([^"]*)"|'([^']*)'|([^\s>]+))/ig;
194
+ // Security: Limit HTML response size to mitigate ReDoS attacks
195
+ const MAX_HTML_SIZE = 1024 * 1024; // 1MB
195
196
  const html = await response.text();
196
- let m: RegExpExecArray | null;
197
- const rawAttribs: string[] = [];
198
- while ((m = p.exec(html)) !== null) rawAttribs.push(m[2]);
199
- for (const rawAttrs of rawAttribs) {
200
- let m2: RegExpExecArray | null;
201
- const attribs: Record<string, string> = {};
202
- while ((m2 = p2.exec(rawAttrs)) !== null) {
203
- const key = m2[1].toLowerCase();
204
- const value = m2[3] ?? m2[4] ?? m2[5] ?? "";
205
- attribs[key] = value;
206
- }
207
- if (
208
- attribs.rel === "alternate" && "type" in attribs && (
209
- attribs.type === "application/activity+json" ||
210
- attribs.type === "application/ld+json" ||
211
- attribs.type.startsWith("application/ld+json;")
212
- ) && "href" in attribs &&
213
- new URL(attribs.href, docUrl).href !== docUrl.href
214
- ) {
215
- logger.debug(
216
- "Found alternate document: {alternateUrl} from {url}",
217
- { alternateUrl: attribs.href, url: documentUrl },
218
- );
219
- return await fetch(new URL(attribs.href, docUrl).href);
197
+ if (html.length > MAX_HTML_SIZE) {
198
+ logger.warn(
199
+ "HTML response too large, skipping alternate link discovery: {url}",
200
+ { url: documentUrl, size: html.length },
201
+ );
202
+ document = JSON.parse(html);
203
+ } else {
204
+ // Safe regex patterns without nested quantifiers to prevent ReDoS
205
+ // (CVE-2025-68475)
206
+ // Step 1: Extract <a ...> or <link ...> tags
207
+ const tagPattern = /<(a|link)\s+([^>]*?)\s*\/?>/gi;
208
+ // Step 2: Parse attributes
209
+ const attrPattern =
210
+ /([a-z][a-z:_-]*)=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/gi;
211
+
212
+ let tagMatch: RegExpExecArray | null;
213
+ while ((tagMatch = tagPattern.exec(html)) !== null) {
214
+ const tagContent = tagMatch[2];
215
+ let attrMatch: RegExpExecArray | null;
216
+ const attribs: Record<string, string> = {};
217
+
218
+ // Reset regex state for attribute parsing
219
+ attrPattern.lastIndex = 0;
220
+ while ((attrMatch = attrPattern.exec(tagContent)) !== null) {
221
+ const key = attrMatch[1].toLowerCase();
222
+ const value = attrMatch[2] ?? attrMatch[3] ?? attrMatch[4] ?? "";
223
+ attribs[key] = value;
224
+ }
225
+
226
+ if (
227
+ attribs.rel === "alternate" && "type" in attribs && (
228
+ attribs.type === "application/activity+json" ||
229
+ attribs.type === "application/ld+json" ||
230
+ attribs.type.startsWith("application/ld+json;")
231
+ ) && "href" in attribs &&
232
+ new URL(attribs.href, docUrl).href !== docUrl.href
233
+ ) {
234
+ logger.debug(
235
+ "Found alternate document: {alternateUrl} from {url}",
236
+ { alternateUrl: attribs.href, url: documentUrl },
237
+ );
238
+ return await fetch(new URL(attribs.href, docUrl).href);
239
+ }
220
240
  }
241
+ document = JSON.parse(html);
221
242
  }
222
- document = JSON.parse(html);
223
243
  } else {
224
244
  document = await response.json();
225
245
  }
@@ -266,6 +286,9 @@ export function getDocumentLoader(
266
286
  { allowPrivateAddress, skipPreloadedContexts, userAgent }:
267
287
  GetDocumentLoaderOptions = {},
268
288
  ): DocumentLoader {
289
+ const tracerProvider = trace.getTracerProvider();
290
+ const tracer = tracerProvider.getTracer(metadata.name, metadata.version);
291
+
269
292
  async function load(
270
293
  url: string,
271
294
  options?: DocumentLoaderOptions,
@@ -289,23 +312,56 @@ export function getDocumentLoader(
289
312
  throw error;
290
313
  }
291
314
  }
292
- const request = createActivityPubRequest(url, { userAgent });
293
- logRequest(logger, request);
294
- const response = await fetch(request, {
295
- // Since Bun has a bug that ignores the `Request.redirect` option,
296
- // to work around it we specify `redirect: "manual"` here too:
297
- // https://github.com/oven-sh/bun/issues/10754
298
- redirect: "manual",
299
- signal: options?.signal,
300
- });
301
- // Follow redirects manually to get the final URL:
302
- if (
303
- response.status >= 300 && response.status < 400 &&
304
- response.headers.has("Location")
305
- ) {
306
- return load(response.headers.get("Location")!, options);
307
- }
308
- return getRemoteDocument(url, response, load);
315
+
316
+ return await tracer.startActiveSpan(
317
+ "activitypub.fetch_document",
318
+ {
319
+ kind: SpanKind.CLIENT,
320
+ attributes: {
321
+ "url.full": url,
322
+ },
323
+ },
324
+ async (span) => {
325
+ try {
326
+ const request = createActivityPubRequest(url, { userAgent });
327
+ logRequest(logger, request);
328
+ const response = await fetch(request, {
329
+ // Since Bun has a bug that ignores the `Request.redirect` option,
330
+ // to work around it we specify `redirect: "manual"` here too:
331
+ // https://github.com/oven-sh/bun/issues/10754
332
+ redirect: "manual",
333
+ signal: options?.signal,
334
+ });
335
+ span.setAttribute("http.response.status_code", response.status);
336
+
337
+ // Follow redirects manually to get the final URL:
338
+ if (
339
+ response.status >= 300 && response.status < 400 &&
340
+ response.headers.has("Location")
341
+ ) {
342
+ const redirectUrl = response.headers.get("Location")!;
343
+ span.setAttribute("http.redirect.url", redirectUrl);
344
+ return await load(redirectUrl, options);
345
+ }
346
+
347
+ const result = await getRemoteDocument(url, response, load);
348
+ span.setAttribute("docloader.document_url", result.documentUrl);
349
+ if (result.contextUrl != null) {
350
+ span.setAttribute("docloader.context_url", result.contextUrl);
351
+ }
352
+ return result;
353
+ } catch (error) {
354
+ span.recordException(error as Error);
355
+ span.setStatus({
356
+ code: SpanStatusCode.ERROR,
357
+ message: String(error),
358
+ });
359
+ throw error;
360
+ } finally {
361
+ span.end();
362
+ }
363
+ },
364
+ );
309
365
  }
310
366
  return load;
311
367
  }