@defai.digital/research-domain 13.4.10 → 13.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,14 @@
9
9
  * - INV-RSH-102: Failed sources don't block
10
10
  */
11
11
  import type { WebFetcherPort } from './types.js';
12
+ /**
13
+ * Error thrown when semaphore acquire times out
14
+ * INV-RSH-103: Semaphore acquire has timeout to prevent deadlocks
15
+ */
16
+ declare class SemaphoreTimeoutError extends Error {
17
+ constructor(timeoutMs: number);
18
+ }
19
+ export { SemaphoreTimeoutError };
12
20
  /**
13
21
  * Create a stub web fetcher
14
22
  */
@@ -1 +1 @@
1
- {"version":3,"file":"web-fetcher.d.ts","sourceRoot":"","sources":["../src/web-fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAUH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAsCjD;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,cAAc,CA4BrD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE;IACxC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GAAG,cAAc,CAkFjB"}
1
+ {"version":3,"file":"web-fetcher.d.ts","sourceRoot":"","sources":["../src/web-fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAUH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAOjD;;;GAGG;AACH,cAAM,qBAAsB,SAAQ,KAAK;gBAC3B,SAAS,EAAE,MAAM;CAI9B;AA8DD,OAAO,EAAE,qBAAqB,EAAE,CAAC;AAEjC;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,cAAc,CA4BrD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE;IACxC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GAAG,cAAc,CAkFjB"}
@@ -9,8 +9,24 @@
9
9
  * - INV-RSH-102: Failed sources don't block
10
10
  */
11
11
  import { getErrorMessage } from '@defai.digital/contracts';
12
+ /**
13
+ * Default timeout for semaphore acquire in milliseconds
14
+ */
15
+ const DEFAULT_SEMAPHORE_TIMEOUT_MS = 30000;
16
+ /**
17
+ * Error thrown when semaphore acquire times out
18
+ * INV-RSH-103: Semaphore acquire has timeout to prevent deadlocks
19
+ */
20
+ class SemaphoreTimeoutError extends Error {
21
+ constructor(timeoutMs) {
22
+ super(`Semaphore acquire timed out after ${timeoutMs}ms`);
23
+ this.name = 'SemaphoreTimeoutError';
24
+ }
25
+ }
12
26
  /**
13
27
  * Simple semaphore for limiting concurrent operations
28
+ * INV-RSH-101: Concurrent fetches limited
29
+ * INV-RSH-103: Acquire has timeout to prevent deadlocks
14
30
  */
15
31
  class Semaphore {
16
32
  permits;
@@ -18,14 +34,33 @@ class Semaphore {
18
34
  constructor(permits) {
19
35
  this.permits = permits;
20
36
  }
21
- async acquire() {
37
+ /**
38
+ * Acquire a permit, with timeout to prevent deadlocks
39
+ * INV-RSH-103: Throws SemaphoreTimeoutError if timeout expires
40
+ */
41
+ async acquire(timeoutMs = DEFAULT_SEMAPHORE_TIMEOUT_MS) {
22
42
  if (this.permits > 0) {
23
43
  this.permits--;
24
44
  return;
25
45
  }
26
- // Wait for a permit to become available
27
- await new Promise((resolve) => {
28
- this.waiting.push(resolve);
46
+ // Wait for a permit to become available, with timeout
47
+ return new Promise((resolve, reject) => {
48
+ const waiter = { resolve, reject };
49
+ this.waiting.push(waiter);
50
+ // Set up timeout to prevent deadlock
51
+ const timeoutId = setTimeout(() => {
52
+ const index = this.waiting.indexOf(waiter);
53
+ if (index !== -1) {
54
+ this.waiting.splice(index, 1);
55
+ reject(new SemaphoreTimeoutError(timeoutMs));
56
+ }
57
+ }, timeoutMs);
58
+ // Wrap resolve to clear timeout when permit is acquired
59
+ const originalResolve = waiter.resolve;
60
+ waiter.resolve = () => {
61
+ clearTimeout(timeoutId);
62
+ originalResolve();
63
+ };
29
64
  });
30
65
  // Permit was transferred directly by release(), no need to decrement
31
66
  }
@@ -34,7 +69,7 @@ class Semaphore {
34
69
  if (next) {
35
70
  // Transfer permit directly to waiting acquirer
36
71
  // Don't increment permits - the permit goes straight to the waiter
37
- next();
72
+ next.resolve();
38
73
  }
39
74
  else {
40
75
  // No one waiting, return permit to pool
@@ -42,6 +77,7 @@ class Semaphore {
42
77
  }
43
78
  }
44
79
  }
80
+ export { SemaphoreTimeoutError };
45
81
  /**
46
82
  * Create a stub web fetcher
47
83
  */
@@ -140,6 +176,16 @@ export function createWebFetcher(options) {
140
176
  },
141
177
  };
142
178
  }
179
+ /**
180
+ * Maximum HTML size to process for code extraction
181
+ * INV-RSH-103: Limit input size to prevent ReDoS attacks
182
+ */
183
+ const MAX_HTML_SIZE_FOR_CODE_EXTRACTION = 1_000_000; // 1MB
184
+ /**
185
+ * Maximum number of code blocks to extract
186
+ * INV-RSH-104: Limit code block count to prevent excessive processing
187
+ */
188
+ const MAX_CODE_BLOCKS = 50;
143
189
  /**
144
190
  * Parse HTML content
145
191
  */
@@ -147,13 +193,28 @@ function parseHtml(html, maxLength) {
147
193
  // Extract title
148
194
  const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
149
195
  const title = titleMatch?.[1]?.trim() ?? '';
150
- // Extract code blocks
196
+ // Extract code blocks with protection against ReDoS
151
197
  const codeBlocks = [];
152
- const codeRegex = /<(pre|code)[^>]*(?:class="[^"]*language-(\w+)[^"]*")?[^>]*>([\s\S]*?)<\/\1>/gi;
198
+ // INV-RSH-103: Limit HTML size before regex to prevent ReDoS
199
+ const safeHtml = html.length > MAX_HTML_SIZE_FOR_CODE_EXTRACTION
200
+ ? html.slice(0, MAX_HTML_SIZE_FOR_CODE_EXTRACTION)
201
+ : html;
202
+ // INV-RSH-103: Use safer regex pattern with bounded attribute matching
203
+ // Pattern limits attribute length and avoids nested quantifiers
204
+ const codeRegex = /<(pre|code)(?:\s+[^>]{0,500})?>([\s\S]{0,10000}?)<\/\1>/gi;
205
+ // Separate pattern for language detection (simpler, applied only to small matches)
206
+ const langRegex = /class="[^"]*\blanguage-(\w+)\b[^"]*"/i;
153
207
  let match;
154
- while ((match = codeRegex.exec(html)) !== null) {
155
- const language = match[2] ?? 'text';
156
- const code = stripHtml(match[3] ?? '').trim();
208
+ let matchCount = 0;
209
+ while ((match = codeRegex.exec(safeHtml)) !== null) {
210
+ // INV-RSH-104: Limit number of code blocks extracted
211
+ if (matchCount >= MAX_CODE_BLOCKS)
212
+ break;
213
+ matchCount++;
214
+ const tagContent = match[0] ?? '';
215
+ const langMatch = langRegex.exec(tagContent);
216
+ const language = langMatch?.[1] ?? 'text';
217
+ const code = stripHtml(match[2] ?? '').trim();
157
218
  if (code.length > 10 && code.length < 5000) {
158
219
  codeBlocks.push({
159
220
  code,
@@ -1 +1 @@
1
- {"version":3,"file":"web-fetcher.js","sourceRoot":"","sources":["../src/web-fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AASH,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAG3D;;GAEG;AACH,MAAM,SAAS;IACL,OAAO,CAAS;IAChB,OAAO,GAAmB,EAAE,CAAC;IAErC,YAAY,OAAe;QACzB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,OAAO;QACT,CAAC;QACD,wCAAwC;QACxC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;YAClC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;QACH,qEAAqE;IACvE,CAAC;IAED,OAAO;QACL,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAClC,IAAI,IAAI,EAAE,CAAC;YACT,+CAA+C;YAC/C,mEAAmE;YACnE,IAAI,EAAE,CAAC;QACT,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC;IACH,CAAC;CACF;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB;IAClC,OAAO;QACL,KAAK,CAAC,KAAK,CAAC,OAAqB;YAC/B,OAAO,CAAC,IAAI,CACV,4DAA4D;gBAC1D,kDAAkD,CACrD,CAAC;YAEF,OAAO;gBACL,GAAG,EAAE,OAAO,CAAC,GAAG;gBAChB,KAAK,EAAE,eAAe;gBACtB,OAAO,EAAE,2EAA2E;gBACpF,UAAU,EAAE,EAAE;gBACd,WAAW,EAAE,SAAS;gBACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,WAAmB;YAC9C,OAAO,CAAC,IAAI,CACV,wDAAwD;gBACtD,kDAAkD,CACrD,CAAC;YAEF,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAIhC;IACC,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC,OAAO,CAAC,aAAa,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,KAAK,CAAC;IACvD,MAAM,SAAS,GACb,OAAO,CAAC,SAAS,IAAI,wEAAwE,CAAC;IAEhG,OAAO;QACL,KAAK,CAAC,KAAK,CAAC,OAAqB;YAC/B,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;YAE1B,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,cAAc,CAClC,CAAC;gBAEF,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,EAAE;wBACxC,MAAM,EAAE,UAAU,CAAC,MAAM;wBACzB,OAAO,EAAE;4BACP,YAAY,EAAE,SAAS;4BACvB,MAAM,EAAE,4CAA4C;yBACrD;qBACF,CAAC,CAAC;oBAEH,YAAY,CAAC,SAAS,CAAC,CAAC;oBAExB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;wBACjB,OAAO;4BACL,GAAG,EAAE,OAAO,CAAC,GAAG;4BAChB,KAAK,EAAE,EAAE;4BACT,OAAO,EAAE,EAAE;4BACX,UAAU,EAAE,EAAE;4BACd,WAAW,EAAE,SAAS;4BACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;4BACnC,OAAO,EAAE,KAAK;4BACd,KAAK,EAAE,QAAQ,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,UAAU,EAAE;yBACzD,CAAC;oBACJ,CAAC;oBAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;oBACnC,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;oBAC1E,MAAM,WAAW,GAAG,oBAAoB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;oBAEtD,OAAO;wBACL,GAAG,EAAE,OAAO,CAAC,GAAG;wBAChB,KAAK;wBACL,OAAO;wBACP,UAAU,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE;wBACjD,WAAW;wBACX,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACnC,OAAO,EAAE,IAAI;qBACd,CAAC;gBACJ,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,YAAY,CAAC,SAAS,CAAC,CAAC;oBAExB,OAAO;wBACL,GAAG,EAAE,OAAO,CAAC,GAAG;wBAChB,KAAK,EAAE,EAAE;wBACT,OAAO,EAAE,EAAE;wBACX,UAAU,EAAE,EAAE;wBACd,WAAW,EAAE,SAAS;wBACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACnC,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,eAAe,CAAC,KAAK,EAAE,cAAc,CAAC;qBAC9C,CAAC;gBACJ,CAAC;YACH,CAAC;oBAAS,CAAC;gBACT,SAAS,CAAC,OAAO,EAAE,CAAC;YACtB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,WAAmB;YAC9C,6DAA6D;YAC7D,OAAO,CAAC,IAAI,CACV,iEAAiE;gBAC/D,sDAAsD,CACzD,CAAC;YACF,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAChB,IAAY,EACZ,SAAiB;IAEjB,gBAAgB;IAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;IAC/D,MAAM,KAAK,GAAG,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAE5C,sBAAsB;IACtB,MAAM,UAAU,GAAkB,EAAE,CAAC;IACrC,MAAM,SAAS,GAAG,+EAA+E,CAAC;IAClG,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC;QACpC,MAAM,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAC9C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;YAC3C,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI;gBACJ,QAAQ;gBACR,MAAM,EAAE,KAAK;aACd,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,IAAI,OAAO,GAAG,IAAI;QAChB,0BAA0B;SACzB,OAAO,CAAC,6BAA6B,EAAE,EAAE,CAAC;SAC1C,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC;QACzC,mBAAmB;SAClB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;QACzB,kBAAkB;SACjB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;QACxB,uBAAuB;SACtB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;IAEV,qBAAqB;IACrB,IAAI,OAAO,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;QAC/B,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,KAAK,CAAC;IAChD,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,GAAW;IACvC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAErD,iCAAiC;IACjC,MAAM,eAAe,GAAG;QACtB,iBAAiB;QACjB,uBAAuB;QACvB,YAAY;QACZ,oBAAoB;QACpB,WAAW;QACX,WAAW;QACX,YAAY;QACZ,YAAY;QACZ,QAAQ;QACR,eAAe;QACf,oBAAoB;QACpB,kBAAkB;QAClB,qBAAqB;KACtB,CAAC;IAEF,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACtD,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,oBAAoB;IACpB,MAAM,gBAAgB,GAAG;QACvB,mBAAmB;QACnB,YAAY;QACZ,QAAQ;QACR,YAAY;QACZ,cAAc;KACf,CAAC;IAEF,IAAI,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACvD,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
1
+ {"version":3,"file":"web-fetcher.js","sourceRoot":"","sources":["../src/web-fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AASH,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAG3D;;GAEG;AACH,MAAM,4BAA4B,GAAG,KAAK,CAAC;AAE3C;;;GAGG;AACH,MAAM,qBAAsB,SAAQ,KAAK;IACvC,YAAY,SAAiB;QAC3B,KAAK,CAAC,qCAAqC,SAAS,IAAI,CAAC,CAAC;QAC1D,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;IACtC,CAAC;CACF;AAED;;;;GAIG;AACH,MAAM,SAAS;IACL,OAAO,CAAS;IAChB,OAAO,GAAiE,EAAE,CAAC;IAEnF,YAAY,OAAe;QACzB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,OAAO,CAAC,YAAoB,4BAA4B;QAC5D,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,OAAO;QACT,CAAC;QAED,sDAAsD;QACtD,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC3C,MAAM,MAAM,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YACnC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAE1B,qCAAqC;YACrC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE;gBAChC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;gBAC3C,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;oBACjB,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;oBAC9B,MAAM,CAAC,IAAI,qBAAqB,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC/C,CAAC;YACH,CAAC,EAAE,SAAS,CAAC,CAAC;YAEd,wDAAwD;YACxD,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO,CAAC;YACvC,MAAM,CAAC,OAAO,GAAG,GAAG,EAAE;gBACpB,YAAY,CAAC,SAAS,CAAC,CAAC;gBACxB,eAAe,EAAE,CAAC;YACpB,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;QACH,qEAAqE;IACvE,CAAC;IAED,OAAO;QACL,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAClC,IAAI,IAAI,EAAE,CAAC;YACT,+CAA+C;YAC/C,mEAAmE;YACnE,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC;IACH,CAAC;CACF;AAED,OAAO,EAAE,qBAAqB,EAAE,CAAC;AAEjC;;GAEG;AACH,MAAM,UAAU,oBAAoB;IAClC,OAAO;QACL,KAAK,CAAC,KAAK,CAAC,OAAqB;YAC/B,OAAO,CAAC,IAAI,CACV,4DAA4D;gBAC1D,kDAAkD,CACrD,CAAC;YAEF,OAAO;gBACL,GAAG,EAAE,OAAO,CAAC,GAAG;gBAChB,KAAK,EAAE,eAAe;gBACtB,OAAO,EAAE,2EAA2E;gBACpF,UAAU,EAAE,EAAE;gBACd,WAAW,EAAE,SAAS;gBACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,WAAmB;YAC9C,OAAO,CAAC,IAAI,CACV,wDAAwD;gBACtD,kDAAkD,CACrD,CAAC;YAEF,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAIhC;IACC,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC,OAAO,CAAC,aAAa,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,KAAK,CAAC;IACvD,MAAM,SAAS,GACb,OAAO,CAAC,SAAS,IAAI,wEAAwE,CAAC;IAEhG,OAAO;QACL,KAAK,CAAC,KAAK,CAAC,OAAqB;YAC/B,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;YAE1B,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,cAAc,CAClC,CAAC;gBAEF,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,EAAE;wBACxC,MAAM,EAAE,UAAU,CAAC,MAAM;wBACzB,OAAO,EAAE;4BACP,YAAY,EAAE,SAAS;4BACvB,MAAM,EAAE,4CAA4C;yBACrD;qBACF,CAAC,CAAC;oBAEH,YAAY,CAAC,SAAS,CAAC,CAAC;oBAExB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;wBACjB,OAAO;4BACL,GAAG,EAAE,OAAO,CAAC,GAAG;4BAChB,KAAK,EAAE,EAAE;4BACT,OAAO,EAAE,EAAE;4BACX,UAAU,EAAE,EAAE;4BACd,WAAW,EAAE,SAAS;4BACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;4BACnC,OAAO,EAAE,KAAK;4BACd,KAAK,EAAE,QAAQ,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,UAAU,EAAE;yBACzD,CAAC;oBACJ,CAAC;oBAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;oBACnC,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;oBAC1E,MAAM,WAAW,GAAG,oBAAoB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;oBAEtD,OAAO;wBACL,GAAG,EAAE,OAAO,CAAC,GAAG;wBAChB,KAAK;wBACL,OAAO;wBACP,UAAU,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE;wBACjD,WAAW;wBACX,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACnC,OAAO,EAAE,IAAI;qBACd,CAAC;gBACJ,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,YAAY,CAAC,SAAS,CAAC,CAAC;oBAExB,OAAO;wBACL,GAAG,EAAE,OAAO,CAAC,GAAG;wBAChB,KAAK,EAAE,EAAE;wBACT,OAAO,EAAE,EAAE;wBACX,UAAU,EAAE,EAAE;wBACd,WAAW,EAAE,SAAS;wBACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACnC,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,eAAe,CAAC,KAAK,EAAE,cAAc,CAAC;qBAC9C,CAAC;gBACJ,CAAC;YACH,CAAC;oBAAS,CAAC;gBACT,SAAS,CAAC,OAAO,EAAE,CAAC;YACtB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,WAAmB;YAC9C,6DAA6D;YAC7D,OAAO,CAAC,IAAI,CACV,iEAAiE;gBAC/D,sDAAsD,CACzD,CAAC;YACF,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,iCAAiC,GAAG,SAAS,CAAC,CAAC,MAAM;AAE3D;;;GAGG;AACH,MAAM,eAAe,GAAG,EAAE,CAAC;AAE3B;;GAEG;AACH,SAAS,SAAS,CAChB,IAAY,EACZ,SAAiB;IAEjB,gBAAgB;IAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;IAC/D,MAAM,KAAK,GAAG,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAE5C,oDAAoD;IACpD,MAAM,UAAU,GAAkB,EAAE,CAAC;IAErC,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,GAAG,iCAAiC;QAC9D,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,iCAAiC,CAAC;QAClD,CAAC,CAAC,IAAI,CAAC;IAET,uEAAuE;IACvE,gEAAgE;IAChE,MAAM,SAAS,GAAG,2DAA2D,CAAC;IAC9E,mFAAmF;IACnF,MAAM,SAAS,GAAG,uCAAuC,CAAC;IAE1D,IAAI,KAAK,CAAC;IACV,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,qDAAqD;QACrD,IAAI,UAAU,IAAI,eAAe;YAAE,MAAM;QACzC,UAAU,EAAE,CAAC;QAEb,MAAM,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC7C,MAAM,QAAQ,GAAG,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC;QAC1C,MAAM,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAE9C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;YAC3C,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI;gBACJ,QAAQ;gBACR,MAAM,EAAE,KAAK;aACd,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,IAAI,OAAO,GAAG,IAAI;QAChB,0BAA0B;SACzB,OAAO,CAAC,6BAA6B,EAAE,EAAE,CAAC;SAC1C,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC;QACzC,mBAAmB;SAClB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;QACzB,kBAAkB;SACjB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;QACxB,uBAAuB;SACtB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;IAEV,qBAAqB;IACrB,IAAI,OAAO,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;QAC/B,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,KAAK,CAAC;IAChD,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,GAAW;IACvC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAErD,iCAAiC;IACjC,MAAM,eAAe,GAAG;QACtB,iBAAiB;QACjB,uBAAuB;QACvB,YAAY;QACZ,oBAAoB;QACpB,WAAW;QACX,WAAW;QACX,YAAY;QACZ,YAAY;QACZ,QAAQ;QACR,eAAe;QACf,oBAAoB;QACpB,kBAAkB;QAClB,qBAAqB;KACtB,CAAC;IAEF,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACtD,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,oBAAoB;IACpB,MAAM,gBAAgB,GAAG;QACvB,mBAAmB;QACnB,YAAY;QACZ,QAAQ;QACR,YAAY;QACZ,cAAc;KACf,CAAC;IAEF,IAAI,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACvD,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@defai.digital/research-domain",
3
- "version": "13.4.10",
3
+ "version": "13.5.2",
4
4
  "type": "module",
5
5
  "description": "Deep research agent with live documentation fetching and knowledge synthesis",
6
6
  "license": "BUSL-1.1",
@@ -33,7 +33,7 @@
33
33
  "access": "public"
34
34
  },
35
35
  "dependencies": {
36
- "@defai.digital/contracts": "13.4.10"
36
+ "@defai.digital/contracts": "13.5.2"
37
37
  },
38
38
  "devDependencies": {
39
39
  "typescript": "^5.6.3"
@@ -19,25 +19,65 @@ import type {
19
19
  import { getErrorMessage } from '@defai.digital/contracts';
20
20
  import type { WebFetcherPort } from './types.js';
21
21
 
22
+ /**
23
+ * Default timeout for semaphore acquire in milliseconds
24
+ */
25
+ const DEFAULT_SEMAPHORE_TIMEOUT_MS = 30000;
26
+
27
+ /**
28
+ * Error thrown when semaphore acquire times out
29
+ * INV-RSH-103: Semaphore acquire has timeout to prevent deadlocks
30
+ */
31
+ class SemaphoreTimeoutError extends Error {
32
+ constructor(timeoutMs: number) {
33
+ super(`Semaphore acquire timed out after ${timeoutMs}ms`);
34
+ this.name = 'SemaphoreTimeoutError';
35
+ }
36
+ }
37
+
22
38
  /**
23
39
  * Simple semaphore for limiting concurrent operations
40
+ * INV-RSH-101: Concurrent fetches limited
41
+ * INV-RSH-103: Acquire has timeout to prevent deadlocks
24
42
  */
25
43
  class Semaphore {
26
44
  private permits: number;
27
- private waiting: (() => void)[] = [];
45
+ private waiting: Array<{ resolve: () => void; reject: (err: Error) => void }> = [];
28
46
 
29
47
  constructor(permits: number) {
30
48
  this.permits = permits;
31
49
  }
32
50
 
33
- async acquire(): Promise<void> {
51
+ /**
52
+ * Acquire a permit, with timeout to prevent deadlocks
53
+ * INV-RSH-103: Throws SemaphoreTimeoutError if timeout expires
54
+ */
55
+ async acquire(timeoutMs: number = DEFAULT_SEMAPHORE_TIMEOUT_MS): Promise<void> {
34
56
  if (this.permits > 0) {
35
57
  this.permits--;
36
58
  return;
37
59
  }
38
- // Wait for a permit to become available
39
- await new Promise<void>((resolve) => {
40
- this.waiting.push(resolve);
60
+
61
+ // Wait for a permit to become available, with timeout
62
+ return new Promise<void>((resolve, reject) => {
63
+ const waiter = { resolve, reject };
64
+ this.waiting.push(waiter);
65
+
66
+ // Set up timeout to prevent deadlock
67
+ const timeoutId = setTimeout(() => {
68
+ const index = this.waiting.indexOf(waiter);
69
+ if (index !== -1) {
70
+ this.waiting.splice(index, 1);
71
+ reject(new SemaphoreTimeoutError(timeoutMs));
72
+ }
73
+ }, timeoutMs);
74
+
75
+ // Wrap resolve to clear timeout when permit is acquired
76
+ const originalResolve = waiter.resolve;
77
+ waiter.resolve = () => {
78
+ clearTimeout(timeoutId);
79
+ originalResolve();
80
+ };
41
81
  });
42
82
  // Permit was transferred directly by release(), no need to decrement
43
83
  }
@@ -47,7 +87,7 @@ class Semaphore {
47
87
  if (next) {
48
88
  // Transfer permit directly to waiting acquirer
49
89
  // Don't increment permits - the permit goes straight to the waiter
50
- next();
90
+ next.resolve();
51
91
  } else {
52
92
  // No one waiting, return permit to pool
53
93
  this.permits++;
@@ -55,6 +95,8 @@ class Semaphore {
55
95
  }
56
96
  }
57
97
 
98
+ export { SemaphoreTimeoutError };
99
+
58
100
  /**
59
101
  * Create a stub web fetcher
60
102
  */
@@ -179,6 +221,18 @@ export function createWebFetcher(options: {
179
221
  };
180
222
  }
181
223
 
224
+ /**
225
+ * Maximum HTML size to process for code extraction
226
+ * INV-RSH-103: Limit input size to prevent ReDoS attacks
227
+ */
228
+ const MAX_HTML_SIZE_FOR_CODE_EXTRACTION = 1_000_000; // 1MB
229
+
230
+ /**
231
+ * Maximum number of code blocks to extract
232
+ * INV-RSH-104: Limit code block count to prevent excessive processing
233
+ */
234
+ const MAX_CODE_BLOCKS = 50;
235
+
182
236
  /**
183
237
  * Parse HTML content
184
238
  */
@@ -190,14 +244,33 @@ function parseHtml(
190
244
  const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
191
245
  const title = titleMatch?.[1]?.trim() ?? '';
192
246
 
193
- // Extract code blocks
247
+ // Extract code blocks with protection against ReDoS
194
248
  const codeBlocks: CodeExample[] = [];
195
- const codeRegex = /<(pre|code)[^>]*(?:class="[^"]*language-(\w+)[^"]*")?[^>]*>([\s\S]*?)<\/\1>/gi;
249
+
250
+ // INV-RSH-103: Limit HTML size before regex to prevent ReDoS
251
+ const safeHtml = html.length > MAX_HTML_SIZE_FOR_CODE_EXTRACTION
252
+ ? html.slice(0, MAX_HTML_SIZE_FOR_CODE_EXTRACTION)
253
+ : html;
254
+
255
+ // INV-RSH-103: Use safer regex pattern with bounded attribute matching
256
+ // Pattern limits attribute length and avoids nested quantifiers
257
+ const codeRegex = /<(pre|code)(?:\s+[^>]{0,500})?>([\s\S]{0,10000}?)<\/\1>/gi;
258
+ // Separate pattern for language detection (simpler, applied only to small matches)
259
+ const langRegex = /class="[^"]*\blanguage-(\w+)\b[^"]*"/i;
260
+
196
261
  let match;
262
+ let matchCount = 0;
263
+
264
+ while ((match = codeRegex.exec(safeHtml)) !== null) {
265
+ // INV-RSH-104: Limit number of code blocks extracted
266
+ if (matchCount >= MAX_CODE_BLOCKS) break;
267
+ matchCount++;
268
+
269
+ const tagContent = match[0] ?? '';
270
+ const langMatch = langRegex.exec(tagContent);
271
+ const language = langMatch?.[1] ?? 'text';
272
+ const code = stripHtml(match[2] ?? '').trim();
197
273
 
198
- while ((match = codeRegex.exec(html)) !== null) {
199
- const language = match[2] ?? 'text';
200
- const code = stripHtml(match[3] ?? '').trim();
201
274
  if (code.length > 10 && code.length < 5000) {
202
275
  codeBlocks.push({
203
276
  code,