crawlforge-mcp-server 3.0.17 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CLAUDE.md +2 -0
  2. package/README.md +1 -0
  3. package/package.json +6 -2
  4. package/server.js +192 -1277
  5. package/src/constants/config.js +2 -1
  6. package/src/core/ActionExecutor.js +2 -43
  7. package/src/core/AuthManager.js +230 -32
  8. package/src/core/BrowserContextPool.js +187 -0
  9. package/src/core/JobManager.js +7 -5
  10. package/src/core/LocalizationManager.js +14 -125
  11. package/src/core/ResearchOrchestrator.js +86 -5
  12. package/src/core/StealthBrowserManager.js +26 -18
  13. package/src/core/cache/CacheManager.js +4 -1
  14. package/src/core/crawlers/BFSCrawler.js +19 -5
  15. package/src/core/endpointGuard.js +37 -0
  16. package/src/observability/metrics.js +137 -0
  17. package/src/observability/tracing.js +74 -0
  18. package/src/server/auth/oauth.js +388 -0
  19. package/src/server/registerTool.js +41 -0
  20. package/src/server/schemas/common.js +29 -0
  21. package/src/server/transports/http.js +22 -0
  22. package/src/server/transports/stdio.js +16 -0
  23. package/src/server/transports/streamableHttp.js +226 -0
  24. package/src/server/withAuth.js +121 -0
  25. package/src/tools/advanced/BatchScrapeTool.js +12 -1086
  26. package/src/tools/advanced/ScrapeWithActionsTool.js +105 -19
  27. package/src/tools/advanced/batchScrape/index.js +328 -0
  28. package/src/tools/advanced/batchScrape/queue.js +91 -0
  29. package/src/tools/advanced/batchScrape/reporter.js +26 -0
  30. package/src/tools/advanced/batchScrape/schema.js +37 -0
  31. package/src/tools/advanced/batchScrape/worker.js +179 -0
  32. package/src/tools/advanced/scrapeWithActions/recorder.js +188 -0
  33. package/src/tools/basic/_fetch.js +35 -0
  34. package/src/tools/basic/extractLinks.js +74 -0
  35. package/src/tools/basic/extractMetadata.js +74 -0
  36. package/src/tools/basic/extractText.js +46 -0
  37. package/src/tools/basic/fetchUrl.js +44 -0
  38. package/src/tools/basic/scrapeStructured.js +58 -0
  39. package/src/tools/crawl/_sessionContext.js +234 -0
  40. package/src/tools/crawl/crawlDeep.js +55 -5
  41. package/src/tools/crawl/mapSite.js +23 -2
  42. package/src/tools/extract/_fetchAndParse.js +57 -0
  43. package/src/tools/extract/extractStructured.js +3 -19
  44. package/src/tools/extract/extractWithLlm.js +295 -0
  45. package/src/tools/research/deepResearch.js +33 -8
  46. package/src/tools/search/providers/searxng.js +126 -0
  47. package/src/tools/search/ranking/ResultDeduplicator.js +18 -11
  48. package/src/tools/search/ranking/ResultRanker.js +17 -10
  49. package/src/tools/search/ranking/SearchResultCache.js +52 -0
  50. package/src/tools/search/searchWeb.js +112 -6
  51. package/src/tools/tracking/trackChanges/differ.js +98 -0
  52. package/src/tools/tracking/trackChanges/index.js +432 -0
  53. package/src/tools/tracking/trackChanges/monitor.js +93 -0
  54. package/src/tools/tracking/trackChanges/notifier.js +105 -0
  55. package/src/tools/tracking/trackChanges/schema.js +127 -0
  56. package/src/tools/tracking/trackChanges.js +12 -1374
@@ -1,6 +1,7 @@
1
1
  import dotenv from 'dotenv';
2
2
  import { fileURLToPath } from 'url';
3
3
  import { dirname, join } from 'path';
4
+ import { resolveApiEndpoint } from '../core/endpointGuard.js';
4
5
 
5
6
  // Load environment variables
6
7
  const __filename = fileURLToPath(import.meta.url);
@@ -11,7 +12,7 @@ export const config = {
11
12
  // CrawlForge API Configuration
12
13
  crawlforge: {
13
14
  apiKey: process.env.CRAWLFORGE_API_KEY || '',
14
- apiBaseUrl: process.env.CRAWLFORGE_API_URL || 'https://www.crawlforge.dev'
15
+ apiBaseUrl: resolveApiEndpoint(process.env.CRAWLFORGE_API_URL || 'https://www.crawlforge.dev')
15
16
  },
16
17
 
17
18
  // Performance
@@ -171,49 +171,8 @@ export class ActionExecutor extends EventEmitter {
171
171
  actualChainConfig = chainConfig;
172
172
  }
173
173
 
174
- // For testing purposes, provide mock execution for example.com
175
- if (url === 'http://example.com') {
176
- const actions = Array.isArray(chainConfig) ? chainConfig : actualChainConfig.actions;
177
- const mockResults = actions.map((action, index) => {
178
- const baseResult = {
179
- id: `mock_action_${index}`,
180
- type: action.type,
181
- success: true,
182
- executionTime: 10,
183
- timestamp: Date.now(),
184
- description: `Mock ${action.type} action`
185
- };
186
-
187
- if (action.type === 'wait') {
188
- const waitTime = action.duration || action.milliseconds || 100;
189
- baseResult.result = { waited: waitTime };
190
- } else if (action.type === 'click') {
191
- baseResult.result = { selector: action.selector, button: 'left' };
192
- } else {
193
- baseResult.result = { mockResult: true };
194
- }
195
-
196
- return baseResult;
197
- });
198
-
199
- return {
200
- success: true,
201
- chainId,
202
- url,
203
- executionTime: Date.now() - startTime,
204
- results: mockResults,
205
- screenshots: [],
206
- metadata: {
207
- userAgent: 'mock-agent',
208
- viewport: { width: 1280, height: 720 }
209
- },
210
- stats: {
211
- totalActions: mockResults.length,
212
- successfulActions: mockResults.filter(r => r.success).length,
213
- failedActions: mockResults.filter(r => !r.success).length
214
- }
215
- };
216
- }
174
+ // (v3.0.19 cleanup) The legacy example.com mock branch was removed — no
175
+ // test depended on it and it short-circuited real validation. See §A3.
217
176
 
218
177
  // Validate chain configuration
219
178
  const validatedChain = ActionChainSchema.parse(actualChainConfig);
@@ -6,16 +6,21 @@
6
6
  // Using native fetch (Node.js 18+)
7
7
  import fs from 'fs/promises';
8
8
  import path from 'path';
9
+ import { randomUUID } from 'crypto';
9
10
  import { isCreatorModeVerified } from './creatorMode.js';
11
+ import { resolveApiEndpoint } from './endpointGuard.js';
12
+ import { logger } from '../utils/Logger.js';
10
13
 
11
14
  class AuthManager {
12
15
  constructor() {
13
- this.apiEndpoint = process.env.CRAWLFORGE_API_URL || 'https://www.crawlforge.dev';
16
+ this.apiEndpoint = resolveApiEndpoint(process.env.CRAWLFORGE_API_URL || 'https://www.crawlforge.dev');
14
17
  this.configPath = path.join(process.env.HOME || process.env.USERPROFILE, '.crawlforge', 'config.json');
18
+ this.pendingUsagePath = path.join(process.env.HOME || process.env.USERPROFILE, '.crawlforge', 'pending-usage.json');
15
19
  this.config = null;
16
20
  this.creditCache = new Map();
17
21
  this.lastCreditCheck = null;
18
- this.CREDIT_CHECK_INTERVAL = 60000; // Check credits every minute max
22
+ this.lastSuccessfulCreditCheck = new Map();
23
+ this.CREDIT_CHECK_INTERVAL = 15000;
19
24
  this.initialized = false;
20
25
  // NOTE: Don't read creator mode in constructor - it's set dynamically in server.js
21
26
  }
@@ -30,17 +35,23 @@ class AuthManager {
30
35
 
31
36
  /**
32
37
  * Initialize the auth manager and load stored config
38
+ *
39
+ * Audit phase 5: re-validate the stored API key against the backend at startup.
40
+ * If the backend explicitly reports the key as revoked/invalid, we throw —
41
+ * the server must refuse to start rather than silently run with a dead key.
42
+ * Network failures are tolerated (we already have a cached config and the
43
+ * fail-closed credit check from audit phase 2 handles runtime revocation).
33
44
  */
34
45
  async initialize() {
35
46
  if (this.initialized) return;
36
-
47
+
37
48
  // Skip config loading in creator mode
38
49
  if (this.isCreatorMode()) {
39
50
  console.log('🚀 Creator Mode Active - Unlimited Access Enabled');
40
51
  this.initialized = true;
41
52
  return;
42
53
  }
43
-
54
+
44
55
  try {
45
56
  await this.loadConfig();
46
57
  this.initialized = true;
@@ -48,6 +59,45 @@ class AuthManager {
48
59
  console.log('No existing CrawlForge configuration found. Run setup to configure.');
49
60
  this.initialized = true;
50
61
  }
62
+
63
+ // Phase 5: re-validate cached API key with backend. Refuse to start if revoked.
64
+ if (this.config?.apiKey && process.env.CRAWLFORGE_SKIP_STARTUP_VALIDATION !== 'true') {
65
+ const validation = await this.validateApiKey(this.config.apiKey);
66
+ if (!validation.valid) {
67
+ const lower = (validation.error || '').toLowerCase();
68
+ const isExplicitReject =
69
+ lower.includes('invalid') ||
70
+ lower.includes('revoked') ||
71
+ lower.includes('not found') ||
72
+ lower.includes('expired') ||
73
+ lower.includes('unauthorized');
74
+ if (isExplicitReject) {
75
+ const rejectErr = new Error(
76
+ `CrawlForge API key rejected by backend at startup: ${validation.error}. ` +
77
+ `Run \`npm run setup\` with a current key, or set CRAWLFORGE_SKIP_STARTUP_VALIDATION=true to bypass.`
78
+ );
79
+ logger.error('Startup API key validation rejected by backend', rejectErr, {
80
+ backendError: validation.error
81
+ });
82
+ throw rejectErr;
83
+ }
84
+ // Connection error — tolerate, log, continue. Runtime credit check will fail closed.
85
+ logger.warn('Startup API key validation skipped (backend unreachable)', {
86
+ error: validation.error
87
+ });
88
+ } else {
89
+ logger.info('Startup API key validation OK', {
90
+ userId: validation.userId,
91
+ creditsRemaining: validation.creditsRemaining
92
+ });
93
+ }
94
+ }
95
+
96
+ try {
97
+ await this._flushPendingUsage();
98
+ } catch {
99
+ // Best-effort flush — do not block startup
100
+ }
51
101
  }
52
102
 
53
103
  /**
@@ -192,20 +242,16 @@ class AuthManager {
192
242
  const data = await response.json();
193
243
  this.creditCache.set(this.config.userId, data.creditsRemaining);
194
244
  this.lastCreditCheck = now;
245
+ this.lastSuccessfulCreditCheck.set(this.config.userId, now);
195
246
  return data.creditsRemaining >= estimatedCredits;
196
247
  }
197
248
  } catch (error) {
198
249
  console.error('Failed to check credits:', error.message);
199
250
 
200
- // Grace period: allow stale cached credits during transient network failures
201
- // This prevents outages from blocking authenticated users while still
202
- // failing closed when there's no cached data (no free usage bypass)
251
+ const lastOk = this.lastSuccessfulCreditCheck.get(this.config.userId) ?? 0;
252
+ const withinGrace = Date.now() - lastOk < 30_000;
203
253
  const cached = this.creditCache.get(this.config.userId);
204
- if (cached !== undefined && cached >= estimatedCredits) {
205
- console.warn('Using cached credits due to network error — will re-verify on next call');
206
- return true;
207
- }
208
-
254
+ if (withinGrace && cached !== undefined && cached >= estimatedCredits) return true;
209
255
  throw new Error('Unable to verify credits. Please check your connection and try again.');
210
256
  }
211
257
  }
@@ -218,39 +264,188 @@ class AuthManager {
218
264
  if (this.isCreatorMode()) {
219
265
  return;
220
266
  }
221
-
267
+
222
268
  if (!this.config) {
223
269
  return; // Silently skip if not configured
224
270
  }
225
271
 
226
- try {
227
- const payload = {
228
- tool,
229
- creditsUsed,
230
- requestData,
231
- responseStatus,
232
- processingTime,
233
- timestamp: new Date().toISOString(),
234
- version: '3.0.3'
235
- };
272
+ const userId = this.config.userId;
236
273
 
274
+ // Pre-decrement cache before fetch so network failures still deplete credits
275
+ const cached = this.creditCache.get(userId);
276
+ if (cached !== undefined) {
277
+ this.creditCache.set(userId, Math.max(0, cached - creditsUsed));
278
+ }
279
+
280
+ // Audit phase A2: every usage report gets a request ID and idempotency key
281
+ // so retries (in-memory or via pending-usage.json) are safe to replay.
282
+ const requestId = randomUUID();
283
+ const idempotencyKey = randomUUID();
284
+
285
+ const payload = {
286
+ tool,
287
+ creditsUsed,
288
+ requestData,
289
+ responseStatus,
290
+ processingTime,
291
+ timestamp: new Date().toISOString(),
292
+ requestId,
293
+ idempotencyKey,
294
+ version: '3.0.3'
295
+ };
296
+
297
+ try {
237
298
  await fetch(`${this.apiEndpoint}/api/v1/usage`, {
238
299
  method: 'POST',
239
300
  headers: {
240
301
  'Content-Type': 'application/json',
241
- 'X-API-Key': this.config.apiKey
302
+ 'X-API-Key': this.config.apiKey,
303
+ 'Idempotency-Key': idempotencyKey
242
304
  },
243
- body: JSON.stringify(payload)
305
+ body: JSON.stringify(payload),
306
+ signal: AbortSignal.timeout(5000)
244
307
  });
245
308
 
246
- // Update cached credits
247
- const cached = this.creditCache.get(this.config.userId);
248
- if (cached !== undefined) {
249
- this.creditCache.set(this.config.userId, Math.max(0, cached - creditsUsed));
250
- }
309
+ await this._flushPendingUsage();
251
310
  } catch (error) {
252
311
  // Log but don't throw - usage reporting should not break tool execution
253
- console.error('Failed to report usage:', error.message);
312
+ logger.error('Failed to report usage; queued for retry', error, {
313
+ tool,
314
+ creditsUsed,
315
+ requestId,
316
+ idempotencyKey
317
+ });
318
+ await this._appendPendingUsage({
319
+ toolName: tool,
320
+ creditsUsed,
321
+ userId,
322
+ timestamp: payload.timestamp,
323
+ requestId,
324
+ idempotencyKey
325
+ });
326
+ }
327
+ }
328
+
329
+ async _appendPendingUsage(entry) {
330
+ try {
331
+ const configDir = path.dirname(this.pendingUsagePath);
332
+ await fs.mkdir(configDir, { recursive: true });
333
+
334
+ let entries = [];
335
+ try {
336
+ const raw = await fs.readFile(this.pendingUsagePath, 'utf-8');
337
+ entries = JSON.parse(raw);
338
+ } catch {
339
+ // File absent or corrupt — start fresh
340
+ }
341
+
342
+ // Audit phase A2: stamp every pending entry with a request ID and idempotency key
343
+ // so the backend (when it ships support) can dedupe, and so we can log dropped
344
+ // entries by ID when the flush retry path fails permanently.
345
+ const stamped = {
346
+ requestId: entry.requestId || randomUUID(),
347
+ idempotencyKey: entry.idempotencyKey || randomUUID(),
348
+ ...entry
349
+ };
350
+
351
+ entries.push(stamped);
352
+
353
+ // Cap at 1 MB — drop oldest entries until serialized size fits
354
+ let serialized = JSON.stringify(entries);
355
+ const dropped = [];
356
+ while (serialized.length > 1_048_576 && entries.length > 1) {
357
+ dropped.push(entries.shift());
358
+ serialized = JSON.stringify(entries);
359
+ }
360
+ if (dropped.length > 0) {
361
+ logger.warn('Pending usage queue truncated to 1 MB cap', {
362
+ droppedCount: dropped.length,
363
+ droppedIds: dropped.map(d => d.requestId).filter(Boolean)
364
+ });
365
+ }
366
+
367
+ await fs.writeFile(this.pendingUsagePath, serialized, { mode: 0o600 });
368
+ } catch (error) {
369
+ logger.error('Failed to append pending usage', error, {
370
+ toolName: entry?.toolName,
371
+ requestId: entry?.requestId
372
+ });
373
+ }
374
+ }
375
+
376
+ async _flushPendingUsage() {
377
+ if (!this.config) return;
378
+
379
+ let entries;
380
+ try {
381
+ const raw = await fs.readFile(this.pendingUsagePath, 'utf-8');
382
+ entries = JSON.parse(raw);
383
+ } catch (err) {
384
+ // ENOENT is normal (nothing pending). Anything else is corruption — log it.
385
+ if (err && err.code !== 'ENOENT') {
386
+ logger.warn('Pending usage file unreadable; treating as empty', {
387
+ error: err.message,
388
+ path: this.pendingUsagePath
389
+ });
390
+ }
391
+ return;
392
+ }
393
+
394
+ if (!Array.isArray(entries) || entries.length === 0) return;
395
+
396
+ const remaining = [];
397
+ const flushedIds = [];
398
+ const failedIds = [];
399
+ for (const entry of entries) {
400
+ try {
401
+ const idempotencyKey = entry.idempotencyKey || randomUUID();
402
+ await fetch(`${this.apiEndpoint}/api/v1/usage`, {
403
+ method: 'POST',
404
+ headers: {
405
+ 'Content-Type': 'application/json',
406
+ 'X-API-Key': this.config.apiKey,
407
+ 'Idempotency-Key': idempotencyKey
408
+ },
409
+ body: JSON.stringify({
410
+ tool: entry.toolName,
411
+ creditsUsed: entry.creditsUsed,
412
+ timestamp: entry.timestamp,
413
+ requestId: entry.requestId,
414
+ idempotencyKey,
415
+ version: '3.0.3'
416
+ }),
417
+ signal: AbortSignal.timeout(5000)
418
+ });
419
+ flushedIds.push(entry.requestId);
420
+ } catch (err) {
421
+ failedIds.push(entry.requestId);
422
+ remaining.push(entry);
423
+ }
424
+ }
425
+
426
+ if (flushedIds.length > 0) {
427
+ logger.info('Flushed pending usage entries', {
428
+ count: flushedIds.length,
429
+ requestIds: flushedIds.filter(Boolean)
430
+ });
431
+ }
432
+ if (failedIds.length > 0) {
433
+ logger.warn('Pending usage entries failed to flush; retained for next attempt', {
434
+ count: failedIds.length,
435
+ requestIds: failedIds.filter(Boolean)
436
+ });
437
+ }
438
+
439
+ try {
440
+ if (remaining.length === 0) {
441
+ await fs.unlink(this.pendingUsagePath);
442
+ } else {
443
+ await fs.writeFile(this.pendingUsagePath, JSON.stringify(remaining), { mode: 0o600 });
444
+ }
445
+ } catch (error) {
446
+ logger.error('Failed to update pending usage file', error, {
447
+ path: this.pendingUsagePath
448
+ });
254
449
  }
255
450
  }
256
451
 
@@ -287,7 +482,10 @@ class AuthManager {
287
482
  track_changes: 3,
288
483
 
289
484
  // Phase 1: LLM-Powered Structured Extraction
290
- extract_structured: 4
485
+ extract_structured: 4,
486
+
487
+ // Phase C5: Natural-language LLM extraction (external paid API call per invocation)
488
+ extract_with_llm: 5
291
489
  };
292
490
 
293
491
  return costs[tool] || 1;
@@ -0,0 +1,187 @@
1
+ /**
2
+ * BrowserContextPool — bounded Playwright browser-context pool.
3
+ *
4
+ * Replaces the unbounded `this.contexts = new Map()` in StealthBrowserManager
5
+ * with a pool that:
6
+ * - enforces a hard cap (MAX_BROWSER_CONTEXTS, default 10)
7
+ * - disposes contexts after N uses (periodicRefreshAfter, default 200)
8
+ * - closes idle contexts after a configurable timeout
9
+ * - maintains a concurrency wait-queue so excess callers fail fast
10
+ * (timeout: waitTimeoutMs, default 10 000 ms) rather than accumulating
11
+ *
12
+ * The Map-compatible surface (get/set/delete/entries/clear/size) lets
13
+ * StealthBrowserManager adopt it with minimal changes.
14
+ */
15
+
16
+ const DEFAULT_MAX_CONTEXTS = parseInt(process.env.MAX_BROWSER_CONTEXTS || '10', 10);
17
+ const DEFAULT_PERIODIC_REFRESH_AFTER = 200; // context uses before forced close+relaunch
18
+ const DEFAULT_CLOSE_IDLE_AFTER_MS = 5 * 60 * 1000; // 5 minutes
19
+ const DEFAULT_WAIT_TIMEOUT_MS = 10_000;
20
+
21
+ export class BrowserContextPool {
22
+ /**
23
+ * @param {Object} [opts]
24
+ * @param {number} [opts.maxContexts]
25
+ * @param {number} [opts.periodicRefreshAfter] — max uses per context before disposal
26
+ * @param {number} [opts.closeIdleAfterMs]
27
+ * @param {number} [opts.waitTimeoutMs] — max wait for a free slot; fails fast after
28
+ * @param {Function} [opts.onContextExpired] — async (contextId, contextData) => void
29
+ */
30
+ constructor(opts = {}) {
31
+ this._maxContexts = opts.maxContexts ?? DEFAULT_MAX_CONTEXTS;
32
+ this._periodicRefreshAfter = opts.periodicRefreshAfter ?? DEFAULT_PERIODIC_REFRESH_AFTER;
33
+ this._closeIdleAfterMs = opts.closeIdleAfterMs ?? DEFAULT_CLOSE_IDLE_AFTER_MS;
34
+ this._waitTimeoutMs = opts.waitTimeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS;
35
+ this._onContextExpired = opts.onContextExpired || null;
36
+
37
+ /** @type {Map<string, { context: any, fingerprint: any, config: any, uses: number, lastUsed: number, created: number }>} */
38
+ this._contexts = new Map();
39
+
40
+ /** Pending callers waiting for a free slot */
41
+ this._waitQueue = [];
42
+
43
+ /** Periodic idle-checker timer */
44
+ this._idleTimer = setInterval(() => this._closeIdleContexts(), Math.min(this._closeIdleAfterMs, 60_000));
45
+ this._idleTimer.unref?.(); // don't prevent process exit
46
+ }
47
+
48
+ // ── Map-compatible surface ──────────────────────────────────────────────────
49
+
50
+ get size() { return this._contexts.size; }
51
+
52
+ get(contextId) { return this._contexts.get(contextId) ?? undefined; }
53
+
54
+ has(contextId) { return this._contexts.has(contextId); }
55
+
56
+ entries() { return this._contexts.entries(); }
57
+
58
+ keys() { return this._contexts.keys(); }
59
+
60
+ values() { return this._contexts.values(); }
61
+
62
+ /**
63
+ * Register a context. Throws if the pool is full and no slot becomes
64
+ * available within waitTimeoutMs.
65
+ */
66
+ async set(contextId, contextData) {
67
+ if (this._contexts.size >= this._maxContexts) {
68
+ await this._waitForSlot();
69
+ }
70
+ this._contexts.set(contextId, {
71
+ ...contextData,
72
+ uses: 0,
73
+ lastUsed: Date.now(),
74
+ created: Date.now()
75
+ });
76
+ return this;
77
+ }
78
+
79
+ /**
80
+ * Synchronous set — for callers that already verified there is capacity.
81
+ * Throws immediately if pool is at capacity.
82
+ */
83
+ setSync(contextId, contextData) {
84
+ if (this._contexts.size >= this._maxContexts) {
85
+ throw new Error(`BrowserContextPool is at capacity (${this._maxContexts} contexts). Use await pool.set() to wait for a free slot.`);
86
+ }
87
+ this._contexts.set(contextId, {
88
+ ...contextData,
89
+ uses: 0,
90
+ lastUsed: Date.now(),
91
+ created: Date.now()
92
+ });
93
+ return this;
94
+ }
95
+
96
+ delete(contextId) {
97
+ const deleted = this._contexts.delete(contextId);
98
+ if (deleted) this._notifyWaiter();
99
+ return deleted;
100
+ }
101
+
102
+ clear() {
103
+ this._contexts.clear();
104
+ // Drain any waiters with rejections so they don't hang
105
+ const waiters = this._waitQueue.splice(0);
106
+ waiters.forEach(({ reject }) => reject(new Error('BrowserContextPool cleared')));
107
+ }
108
+
109
+ // ── Pool-specific API ───────────────────────────────────────────────────────
110
+
111
+ /**
112
+ * Record a use for the given context.
113
+ * Returns true if the context should be closed and re-created (refresh needed).
114
+ * @param {string} contextId
115
+ */
116
+ recordUse(contextId) {
117
+ const entry = this._contexts.get(contextId);
118
+ if (!entry) return false;
119
+ entry.uses++;
120
+ entry.lastUsed = Date.now();
121
+ return entry.uses >= this._periodicRefreshAfter;
122
+ }
123
+
124
+ /**
125
+ * Dispose a context (close it + remove from pool).
126
+ * @param {string} contextId
127
+ */
128
+ async dispose(contextId) {
129
+ const entry = this._contexts.get(contextId);
130
+ if (!entry) return;
131
+ this._contexts.delete(contextId);
132
+ this._notifyWaiter();
133
+ try {
134
+ await entry.context?.close();
135
+ } catch {
136
+ // ignore close errors
137
+ }
138
+ this._onContextExpired?.(contextId, entry);
139
+ }
140
+
141
+ /**
142
+ * Close all idle contexts (lastUsed > closeIdleAfterMs ago).
143
+ */
144
+ async _closeIdleContexts() {
145
+ const now = Date.now();
146
+ for (const [contextId, entry] of this._contexts.entries()) {
147
+ if (now - entry.lastUsed > this._closeIdleAfterMs) {
148
+ await this.dispose(contextId);
149
+ }
150
+ }
151
+ }
152
+
153
+ /**
154
+ * Wait until a slot becomes available (or time out).
155
+ */
156
+ _waitForSlot() {
157
+ return new Promise((resolve, reject) => {
158
+ const timer = setTimeout(() => {
159
+ const idx = this._waitQueue.findIndex(w => w.resolve === resolve);
160
+ if (idx !== -1) this._waitQueue.splice(idx, 1);
161
+ reject(new Error(`BrowserContextPool: timed out waiting for a free context slot after ${this._waitTimeoutMs}ms`));
162
+ }, this._waitTimeoutMs);
163
+
164
+ this._waitQueue.push({ resolve, reject, timer });
165
+ });
166
+ }
167
+
168
+ /** Notify the oldest pending waiter that a slot is now free. */
169
+ _notifyWaiter() {
170
+ if (this._waitQueue.length === 0) return;
171
+ const { resolve, timer } = this._waitQueue.shift();
172
+ clearTimeout(timer);
173
+ resolve();
174
+ }
175
+
176
+ /** Destroy the pool — closes all contexts and clears the idle timer. */
177
+ async destroy() {
178
+ clearInterval(this._idleTimer);
179
+ for (const contextId of Array.from(this._contexts.keys())) {
180
+ await this.dispose(contextId);
181
+ }
182
+ const waiters = this._waitQueue.splice(0);
183
+ waiters.forEach(({ reject }) => reject(new Error('BrowserContextPool destroyed')));
184
+ }
185
+ }
186
+
187
+ export default BrowserContextPool;
@@ -573,11 +573,13 @@ export class JobManager extends EventEmitter {
573
573
  * @returns {boolean} Whether job is valid
574
574
  */
575
575
  validateJob(job) {
576
- return job &&
577
- typeof job.id === 'string' &&
578
- typeof job.type === 'string' &&
579
- typeof job.status === 'string' &&
580
- Object.values(this.JOB_STATES).includes(job.status);
576
+ return Boolean(
577
+ job &&
578
+ typeof job.id === 'string' &&
579
+ typeof job.type === 'string' &&
580
+ typeof job.status === 'string' &&
581
+ Object.values(this.JOB_STATES).includes(job.status)
582
+ );
581
583
  }
582
584
 
583
585
  /**