@defai.digital/research-domain 13.4.11 → 13.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/web-fetcher.d.ts +8 -0
- package/dist/web-fetcher.d.ts.map +1 -1
- package/dist/web-fetcher.js +71 -10
- package/dist/web-fetcher.js.map +1 -1
- package/package.json +2 -2
- package/src/web-fetcher.ts +84 -11
package/dist/web-fetcher.d.ts
CHANGED
|
@@ -9,6 +9,14 @@
|
|
|
9
9
|
* - INV-RSH-102: Failed sources don't block
|
|
10
10
|
*/
|
|
11
11
|
import type { WebFetcherPort } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Error thrown when semaphore acquire times out
|
|
14
|
+
* INV-RSH-103: Semaphore acquire has timeout to prevent deadlocks
|
|
15
|
+
*/
|
|
16
|
+
declare class SemaphoreTimeoutError extends Error {
|
|
17
|
+
constructor(timeoutMs: number);
|
|
18
|
+
}
|
|
19
|
+
export { SemaphoreTimeoutError };
|
|
12
20
|
/**
|
|
13
21
|
* Create a stub web fetcher
|
|
14
22
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"web-fetcher.d.ts","sourceRoot":"","sources":["../src/web-fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAUH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"web-fetcher.d.ts","sourceRoot":"","sources":["../src/web-fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAUH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAOjD;;;GAGG;AACH,cAAM,qBAAsB,SAAQ,KAAK;gBAC3B,SAAS,EAAE,MAAM;CAI9B;AA8DD,OAAO,EAAE,qBAAqB,EAAE,CAAC;AAEjC;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,cAAc,CA4BrD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE;IACxC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GAAG,cAAc,CAkFjB"}
|
package/dist/web-fetcher.js
CHANGED
|
@@ -9,8 +9,24 @@
|
|
|
9
9
|
* - INV-RSH-102: Failed sources don't block
|
|
10
10
|
*/
|
|
11
11
|
import { getErrorMessage } from '@defai.digital/contracts';
|
|
12
|
+
/**
|
|
13
|
+
* Default timeout for semaphore acquire in milliseconds
|
|
14
|
+
*/
|
|
15
|
+
const DEFAULT_SEMAPHORE_TIMEOUT_MS = 30000;
|
|
16
|
+
/**
|
|
17
|
+
* Error thrown when semaphore acquire times out
|
|
18
|
+
* INV-RSH-103: Semaphore acquire has timeout to prevent deadlocks
|
|
19
|
+
*/
|
|
20
|
+
class SemaphoreTimeoutError extends Error {
|
|
21
|
+
constructor(timeoutMs) {
|
|
22
|
+
super(`Semaphore acquire timed out after ${timeoutMs}ms`);
|
|
23
|
+
this.name = 'SemaphoreTimeoutError';
|
|
24
|
+
}
|
|
25
|
+
}
|
|
12
26
|
/**
|
|
13
27
|
* Simple semaphore for limiting concurrent operations
|
|
28
|
+
* INV-RSH-101: Concurrent fetches limited
|
|
29
|
+
* INV-RSH-103: Acquire has timeout to prevent deadlocks
|
|
14
30
|
*/
|
|
15
31
|
class Semaphore {
|
|
16
32
|
permits;
|
|
@@ -18,14 +34,33 @@ class Semaphore {
|
|
|
18
34
|
constructor(permits) {
|
|
19
35
|
this.permits = permits;
|
|
20
36
|
}
|
|
21
|
-
|
|
37
|
+
/**
|
|
38
|
+
* Acquire a permit, with timeout to prevent deadlocks
|
|
39
|
+
* INV-RSH-103: Throws SemaphoreTimeoutError if timeout expires
|
|
40
|
+
*/
|
|
41
|
+
async acquire(timeoutMs = DEFAULT_SEMAPHORE_TIMEOUT_MS) {
|
|
22
42
|
if (this.permits > 0) {
|
|
23
43
|
this.permits--;
|
|
24
44
|
return;
|
|
25
45
|
}
|
|
26
|
-
// Wait for a permit to become available
|
|
27
|
-
|
|
28
|
-
|
|
46
|
+
// Wait for a permit to become available, with timeout
|
|
47
|
+
return new Promise((resolve, reject) => {
|
|
48
|
+
const waiter = { resolve, reject };
|
|
49
|
+
this.waiting.push(waiter);
|
|
50
|
+
// Set up timeout to prevent deadlock
|
|
51
|
+
const timeoutId = setTimeout(() => {
|
|
52
|
+
const index = this.waiting.indexOf(waiter);
|
|
53
|
+
if (index !== -1) {
|
|
54
|
+
this.waiting.splice(index, 1);
|
|
55
|
+
reject(new SemaphoreTimeoutError(timeoutMs));
|
|
56
|
+
}
|
|
57
|
+
}, timeoutMs);
|
|
58
|
+
// Wrap resolve to clear timeout when permit is acquired
|
|
59
|
+
const originalResolve = waiter.resolve;
|
|
60
|
+
waiter.resolve = () => {
|
|
61
|
+
clearTimeout(timeoutId);
|
|
62
|
+
originalResolve();
|
|
63
|
+
};
|
|
29
64
|
});
|
|
30
65
|
// Permit was transferred directly by release(), no need to decrement
|
|
31
66
|
}
|
|
@@ -34,7 +69,7 @@ class Semaphore {
|
|
|
34
69
|
if (next) {
|
|
35
70
|
// Transfer permit directly to waiting acquirer
|
|
36
71
|
// Don't increment permits - the permit goes straight to the waiter
|
|
37
|
-
next();
|
|
72
|
+
next.resolve();
|
|
38
73
|
}
|
|
39
74
|
else {
|
|
40
75
|
// No one waiting, return permit to pool
|
|
@@ -42,6 +77,7 @@ class Semaphore {
|
|
|
42
77
|
}
|
|
43
78
|
}
|
|
44
79
|
}
|
|
80
|
+
export { SemaphoreTimeoutError };
|
|
45
81
|
/**
|
|
46
82
|
* Create a stub web fetcher
|
|
47
83
|
*/
|
|
@@ -140,6 +176,16 @@ export function createWebFetcher(options) {
|
|
|
140
176
|
},
|
|
141
177
|
};
|
|
142
178
|
}
|
|
179
|
+
/**
|
|
180
|
+
* Maximum HTML size to process for code extraction
|
|
181
|
+
* INV-RSH-103: Limit input size to prevent ReDoS attacks
|
|
182
|
+
*/
|
|
183
|
+
const MAX_HTML_SIZE_FOR_CODE_EXTRACTION = 1_000_000; // 1MB
|
|
184
|
+
/**
|
|
185
|
+
* Maximum number of code blocks to extract
|
|
186
|
+
* INV-RSH-104: Limit code block count to prevent excessive processing
|
|
187
|
+
*/
|
|
188
|
+
const MAX_CODE_BLOCKS = 50;
|
|
143
189
|
/**
|
|
144
190
|
* Parse HTML content
|
|
145
191
|
*/
|
|
@@ -147,13 +193,28 @@ function parseHtml(html, maxLength) {
|
|
|
147
193
|
// Extract title
|
|
148
194
|
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
149
195
|
const title = titleMatch?.[1]?.trim() ?? '';
|
|
150
|
-
// Extract code blocks
|
|
196
|
+
// Extract code blocks with protection against ReDoS
|
|
151
197
|
const codeBlocks = [];
|
|
152
|
-
|
|
198
|
+
// INV-RSH-103: Limit HTML size before regex to prevent ReDoS
|
|
199
|
+
const safeHtml = html.length > MAX_HTML_SIZE_FOR_CODE_EXTRACTION
|
|
200
|
+
? html.slice(0, MAX_HTML_SIZE_FOR_CODE_EXTRACTION)
|
|
201
|
+
: html;
|
|
202
|
+
// INV-RSH-103: Use safer regex pattern with bounded attribute matching
|
|
203
|
+
// Pattern limits attribute length and avoids nested quantifiers
|
|
204
|
+
const codeRegex = /<(pre|code)(?:\s+[^>]{0,500})?>([\s\S]{0,10000}?)<\/\1>/gi;
|
|
205
|
+
// Separate pattern for language detection (simpler, applied only to small matches)
|
|
206
|
+
const langRegex = /class="[^"]*\blanguage-(\w+)\b[^"]*"/i;
|
|
153
207
|
let match;
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
208
|
+
let matchCount = 0;
|
|
209
|
+
while ((match = codeRegex.exec(safeHtml)) !== null) {
|
|
210
|
+
// INV-RSH-104: Limit number of code blocks extracted
|
|
211
|
+
if (matchCount >= MAX_CODE_BLOCKS)
|
|
212
|
+
break;
|
|
213
|
+
matchCount++;
|
|
214
|
+
const tagContent = match[0] ?? '';
|
|
215
|
+
const langMatch = langRegex.exec(tagContent);
|
|
216
|
+
const language = langMatch?.[1] ?? 'text';
|
|
217
|
+
const code = stripHtml(match[2] ?? '').trim();
|
|
157
218
|
if (code.length > 10 && code.length < 5000) {
|
|
158
219
|
codeBlocks.push({
|
|
159
220
|
code,
|
package/dist/web-fetcher.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"web-fetcher.js","sourceRoot":"","sources":["../src/web-fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AASH,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAG3D;;GAEG;AACH,MAAM,SAAS;IACL,OAAO,CAAS;IAChB,OAAO,
|
|
1
|
+
{"version":3,"file":"web-fetcher.js","sourceRoot":"","sources":["../src/web-fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AASH,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAG3D;;GAEG;AACH,MAAM,4BAA4B,GAAG,KAAK,CAAC;AAE3C;;;GAGG;AACH,MAAM,qBAAsB,SAAQ,KAAK;IACvC,YAAY,SAAiB;QAC3B,KAAK,CAAC,qCAAqC,SAAS,IAAI,CAAC,CAAC;QAC1D,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;IACtC,CAAC;CACF;AAED;;;;GAIG;AACH,MAAM,SAAS;IACL,OAAO,CAAS;IAChB,OAAO,GAAiE,EAAE,CAAC;IAEnF,YAAY,OAAe;QACzB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,OAAO,CAAC,YAAoB,4BAA4B;QAC5D,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,OAAO;QACT,CAAC;QAED,sDAAsD;QACtD,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC3C,MAAM,MAAM,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YACnC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAE1B,qCAAqC;YACrC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE;gBAChC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;gBAC3C,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;oBACjB,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;oBAC9B,MAAM,CAAC,IAAI,qBAAqB,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC/C,CAAC;YACH,CAAC,EAAE,SAAS,CAAC,CAAC;YAEd,wDAAwD;YACxD,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO,CAAC;YACvC,MAAM,CAAC,OAAO,GAAG,GAAG,EAAE;gBACpB,YAAY,CAAC,SAAS,CAAC,CAAC;gBACxB,eAAe,EAAE,CAAC;YACpB,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;QACH,qEAAqE;IACvE,CAAC;IAED,OAAO;QACL,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAClC,IAAI,IAAI,EAAE,CAAC;YACT,+CAA+C;YAC/C,mEAAmE;YACnE,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC;IACH,CAAC;CACF;AAED,OAAO,EAAE,qBAAqB,EAAE,CAAC;AAEjC;;GAEG;AACH,MAAM,UAAU,oBAAoB;IAClC,OAAO;QACL,KAAK,CAAC,KAAK,CAAC,OAAqB;YAC/B,OAAO,CAAC,IAAI,CACV,4DAA4D;gBAC1D,kDAAkD,CACrD,CAAC;YAEF,OAAO;gBACL,GAAG,EAAE,OAAO,CAAC,GAAG;gBAChB,KAAK,EAAE,eAAe;gBACtB,OAAO,EAAE,2EAA2E;gBACpF,UAAU,EAAE,EAAE;gBACd,WAAW,EAAE,SAAS;gBACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,WAAmB;YAC9C,OAAO,CAAC,IAAI,CACV,wDAAwD;gBACtD,kDAAkD,CACrD,CAAC;YAEF,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAIhC;IACC,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC,OAAO,CAAC,aAAa,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,KAAK,CAAC;IACvD,MAAM,SAAS,GACb,OAAO,CAAC,SAAS,IAAI,wEAAwE,CAAC;IAEhG,OAAO;QACL,KAAK,CAAC,KAAK,CAAC,OAAqB;YAC/B,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;YAE1B,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,SAAS,GAAG,UAAU,CAC1B,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EACxB,OAAO,CAAC,OAAO,IAAI,cAAc,CAClC,CAAC;gBAEF,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,EAAE;wBACxC,MAAM,EAAE,UAAU,CAAC,MAAM;wBACzB,OAAO,EAAE;4BACP,YAAY,EAAE,SAAS;4BACvB,MAAM,EAAE,4CAA4C;yBACrD;qBACF,CAAC,CAAC;oBAEH,YAAY,CAAC,SAAS,CAAC,CAAC;oBAExB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;wBACjB,OAAO;4BACL,GAAG,EAAE,OAAO,CAAC,GAAG;4BAChB,KAAK,EAAE,EAAE;4BACT,OAAO,EAAE,EAAE;4BACX,UAAU,EAAE,EAAE;4BACd,WAAW,EAAE,SAAS;4BACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;4BACnC,OAAO,EAAE,KAAK;4BACd,KAAK,EAAE,QAAQ,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,UAAU,EAAE;yBACzD,CAAC;oBACJ,CAAC;oBAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;oBACnC,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;oBAC1E,MAAM,WAAW,GAAG,oBAAoB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;oBAEtD,OAAO;wBACL,GAAG,EAAE,OAAO,CAAC,GAAG;wBAChB,KAAK;wBACL,OAAO;wBACP,UAAU,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE;wBACjD,WAAW;wBACX,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACnC,OAAO,EAAE,IAAI;qBACd,CAAC;gBACJ,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,YAAY,CAAC,SAAS,CAAC,CAAC;oBAExB,OAAO;wBACL,GAAG,EAAE,OAAO,CAAC,GAAG;wBAChB,KAAK,EAAE,EAAE;wBACT,OAAO,EAAE,EAAE;wBACX,UAAU,EAAE,EAAE;wBACd,WAAW,EAAE,SAAS;wBACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACnC,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,eAAe,CAAC,KAAK,EAAE,cAAc,CAAC;qBAC9C,CAAC;gBACJ,CAAC;YACH,CAAC;oBAAS,CAAC;gBACT,SAAS,CAAC,OAAO,EAAE,CAAC;YACtB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,WAAmB;YAC9C,6DAA6D;YAC7D,OAAO,CAAC,IAAI,CACV,iEAAiE;gBAC/D,sDAAsD,CACzD,CAAC;YACF,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,iCAAiC,GAAG,SAAS,CAAC,CAAC,MAAM;AAE3D;;;GAGG;AACH,MAAM,eAAe,GAAG,EAAE,CAAC;AAE3B;;GAEG;AACH,SAAS,SAAS,CAChB,IAAY,EACZ,SAAiB;IAEjB,gBAAgB;IAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;IAC/D,MAAM,KAAK,GAAG,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAE5C,oDAAoD;IACpD,MAAM,UAAU,GAAkB,EAAE,CAAC;IAErC,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,GAAG,iCAAiC;QAC9D,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,iCAAiC,CAAC;QAClD,CAAC,CAAC,IAAI,CAAC;IAET,uEAAuE;IACvE,gEAAgE;IAChE,MAAM,SAAS,GAAG,2DAA2D,CAAC;IAC9E,mFAAmF;IACnF,MAAM,SAAS,GAAG,uCAAuC,CAAC;IAE1D,IAAI,KAAK,CAAC;IACV,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,qDAAqD;QACrD,IAAI,UAAU,IAAI,eAAe;YAAE,MAAM;QACzC,UAAU,EAAE,CAAC;QAEb,MAAM,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC7C,MAAM,QAAQ,GAAG,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC;QAC1C,MAAM,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAE9C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;YAC3C,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI;gBACJ,QAAQ;gBACR,MAAM,EAAE,KAAK;aACd,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,IAAI,OAAO,GAAG,IAAI;QAChB,0BAA0B;SACzB,OAAO,CAAC,6BAA6B,EAAE,EAAE,CAAC;SAC1C,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC;QACzC,mBAAmB;SAClB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;QACzB,kBAAkB;SACjB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;QACxB,uBAAuB;SACtB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;IAEV,qBAAqB;IACrB,IAAI,OAAO,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;QAC/B,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,KAAK,CAAC;IAChD,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,GAAW;IACvC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAErD,iCAAiC;IACjC,MAAM,eAAe,GAAG;QACtB,iBAAiB;QACjB,uBAAuB;QACvB,YAAY;QACZ,oBAAoB;QACpB,WAAW;QACX,WAAW;QACX,YAAY;QACZ,YAAY;QACZ,QAAQ;QACR,eAAe;QACf,oBAAoB;QACpB,kBAAkB;QAClB,qBAAqB;KACtB,CAAC;IAEF,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACtD,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,oBAAoB;IACpB,MAAM,gBAAgB,GAAG;QACvB,mBAAmB;QACnB,YAAY;QACZ,QAAQ;QACR,YAAY;QACZ,cAAc;KACf,CAAC;IAEF,IAAI,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACvD,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@defai.digital/research-domain",
|
|
3
|
-
"version": "13.
|
|
3
|
+
"version": "13.5.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Deep research agent with live documentation fetching and knowledge synthesis",
|
|
6
6
|
"license": "BUSL-1.1",
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
"access": "public"
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
|
-
"@defai.digital/contracts": "13.
|
|
36
|
+
"@defai.digital/contracts": "13.5.2"
|
|
37
37
|
},
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"typescript": "^5.6.3"
|
package/src/web-fetcher.ts
CHANGED
|
@@ -19,25 +19,65 @@ import type {
|
|
|
19
19
|
import { getErrorMessage } from '@defai.digital/contracts';
|
|
20
20
|
import type { WebFetcherPort } from './types.js';
|
|
21
21
|
|
|
22
|
+
/**
|
|
23
|
+
* Default timeout for semaphore acquire in milliseconds
|
|
24
|
+
*/
|
|
25
|
+
const DEFAULT_SEMAPHORE_TIMEOUT_MS = 30000;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Error thrown when semaphore acquire times out
|
|
29
|
+
* INV-RSH-103: Semaphore acquire has timeout to prevent deadlocks
|
|
30
|
+
*/
|
|
31
|
+
class SemaphoreTimeoutError extends Error {
|
|
32
|
+
constructor(timeoutMs: number) {
|
|
33
|
+
super(`Semaphore acquire timed out after ${timeoutMs}ms`);
|
|
34
|
+
this.name = 'SemaphoreTimeoutError';
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
22
38
|
/**
|
|
23
39
|
* Simple semaphore for limiting concurrent operations
|
|
40
|
+
* INV-RSH-101: Concurrent fetches limited
|
|
41
|
+
* INV-RSH-103: Acquire has timeout to prevent deadlocks
|
|
24
42
|
*/
|
|
25
43
|
class Semaphore {
|
|
26
44
|
private permits: number;
|
|
27
|
-
private waiting: (
|
|
45
|
+
private waiting: Array<{ resolve: () => void; reject: (err: Error) => void }> = [];
|
|
28
46
|
|
|
29
47
|
constructor(permits: number) {
|
|
30
48
|
this.permits = permits;
|
|
31
49
|
}
|
|
32
50
|
|
|
33
|
-
|
|
51
|
+
/**
|
|
52
|
+
* Acquire a permit, with timeout to prevent deadlocks
|
|
53
|
+
* INV-RSH-103: Throws SemaphoreTimeoutError if timeout expires
|
|
54
|
+
*/
|
|
55
|
+
async acquire(timeoutMs: number = DEFAULT_SEMAPHORE_TIMEOUT_MS): Promise<void> {
|
|
34
56
|
if (this.permits > 0) {
|
|
35
57
|
this.permits--;
|
|
36
58
|
return;
|
|
37
59
|
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
60
|
+
|
|
61
|
+
// Wait for a permit to become available, with timeout
|
|
62
|
+
return new Promise<void>((resolve, reject) => {
|
|
63
|
+
const waiter = { resolve, reject };
|
|
64
|
+
this.waiting.push(waiter);
|
|
65
|
+
|
|
66
|
+
// Set up timeout to prevent deadlock
|
|
67
|
+
const timeoutId = setTimeout(() => {
|
|
68
|
+
const index = this.waiting.indexOf(waiter);
|
|
69
|
+
if (index !== -1) {
|
|
70
|
+
this.waiting.splice(index, 1);
|
|
71
|
+
reject(new SemaphoreTimeoutError(timeoutMs));
|
|
72
|
+
}
|
|
73
|
+
}, timeoutMs);
|
|
74
|
+
|
|
75
|
+
// Wrap resolve to clear timeout when permit is acquired
|
|
76
|
+
const originalResolve = waiter.resolve;
|
|
77
|
+
waiter.resolve = () => {
|
|
78
|
+
clearTimeout(timeoutId);
|
|
79
|
+
originalResolve();
|
|
80
|
+
};
|
|
41
81
|
});
|
|
42
82
|
// Permit was transferred directly by release(), no need to decrement
|
|
43
83
|
}
|
|
@@ -47,7 +87,7 @@ class Semaphore {
|
|
|
47
87
|
if (next) {
|
|
48
88
|
// Transfer permit directly to waiting acquirer
|
|
49
89
|
// Don't increment permits - the permit goes straight to the waiter
|
|
50
|
-
next();
|
|
90
|
+
next.resolve();
|
|
51
91
|
} else {
|
|
52
92
|
// No one waiting, return permit to pool
|
|
53
93
|
this.permits++;
|
|
@@ -55,6 +95,8 @@ class Semaphore {
|
|
|
55
95
|
}
|
|
56
96
|
}
|
|
57
97
|
|
|
98
|
+
export { SemaphoreTimeoutError };
|
|
99
|
+
|
|
58
100
|
/**
|
|
59
101
|
* Create a stub web fetcher
|
|
60
102
|
*/
|
|
@@ -179,6 +221,18 @@ export function createWebFetcher(options: {
|
|
|
179
221
|
};
|
|
180
222
|
}
|
|
181
223
|
|
|
224
|
+
/**
|
|
225
|
+
* Maximum HTML size to process for code extraction
|
|
226
|
+
* INV-RSH-103: Limit input size to prevent ReDoS attacks
|
|
227
|
+
*/
|
|
228
|
+
const MAX_HTML_SIZE_FOR_CODE_EXTRACTION = 1_000_000; // 1MB
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Maximum number of code blocks to extract
|
|
232
|
+
* INV-RSH-104: Limit code block count to prevent excessive processing
|
|
233
|
+
*/
|
|
234
|
+
const MAX_CODE_BLOCKS = 50;
|
|
235
|
+
|
|
182
236
|
/**
|
|
183
237
|
* Parse HTML content
|
|
184
238
|
*/
|
|
@@ -190,14 +244,33 @@ function parseHtml(
|
|
|
190
244
|
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
191
245
|
const title = titleMatch?.[1]?.trim() ?? '';
|
|
192
246
|
|
|
193
|
-
// Extract code blocks
|
|
247
|
+
// Extract code blocks with protection against ReDoS
|
|
194
248
|
const codeBlocks: CodeExample[] = [];
|
|
195
|
-
|
|
249
|
+
|
|
250
|
+
// INV-RSH-103: Limit HTML size before regex to prevent ReDoS
|
|
251
|
+
const safeHtml = html.length > MAX_HTML_SIZE_FOR_CODE_EXTRACTION
|
|
252
|
+
? html.slice(0, MAX_HTML_SIZE_FOR_CODE_EXTRACTION)
|
|
253
|
+
: html;
|
|
254
|
+
|
|
255
|
+
// INV-RSH-103: Use safer regex pattern with bounded attribute matching
|
|
256
|
+
// Pattern limits attribute length and avoids nested quantifiers
|
|
257
|
+
const codeRegex = /<(pre|code)(?:\s+[^>]{0,500})?>([\s\S]{0,10000}?)<\/\1>/gi;
|
|
258
|
+
// Separate pattern for language detection (simpler, applied only to small matches)
|
|
259
|
+
const langRegex = /class="[^"]*\blanguage-(\w+)\b[^"]*"/i;
|
|
260
|
+
|
|
196
261
|
let match;
|
|
262
|
+
let matchCount = 0;
|
|
263
|
+
|
|
264
|
+
while ((match = codeRegex.exec(safeHtml)) !== null) {
|
|
265
|
+
// INV-RSH-104: Limit number of code blocks extracted
|
|
266
|
+
if (matchCount >= MAX_CODE_BLOCKS) break;
|
|
267
|
+
matchCount++;
|
|
268
|
+
|
|
269
|
+
const tagContent = match[0] ?? '';
|
|
270
|
+
const langMatch = langRegex.exec(tagContent);
|
|
271
|
+
const language = langMatch?.[1] ?? 'text';
|
|
272
|
+
const code = stripHtml(match[2] ?? '').trim();
|
|
197
273
|
|
|
198
|
-
while ((match = codeRegex.exec(html)) !== null) {
|
|
199
|
-
const language = match[2] ?? 'text';
|
|
200
|
-
const code = stripHtml(match[3] ?? '').trim();
|
|
201
274
|
if (code.length > 10 && code.length < 5000) {
|
|
202
275
|
codeBlocks.push({
|
|
203
276
|
code,
|