crawlforge-mcp-server 3.0.18 → 3.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -2
- package/server.js +192 -1277
- package/src/core/ActionExecutor.js +2 -43
- package/src/core/AuthManager.js +127 -14
- package/src/core/BrowserContextPool.js +187 -0
- package/src/core/JobManager.js +7 -5
- package/src/core/LocalizationManager.js +14 -125
- package/src/core/StealthBrowserManager.js +26 -18
- package/src/core/cache/CacheManager.js +4 -1
- package/src/core/crawlers/BFSCrawler.js +19 -5
- package/src/observability/metrics.js +137 -0
- package/src/observability/tracing.js +74 -0
- package/src/server/auth/oauth.js +388 -0
- package/src/server/registerTool.js +41 -0
- package/src/server/schemas/common.js +29 -0
- package/src/server/transports/http.js +22 -0
- package/src/server/transports/stdio.js +16 -0
- package/src/server/transports/streamableHttp.js +226 -0
- package/src/server/withAuth.js +121 -0
- package/src/tools/advanced/BatchScrapeTool.js +12 -1086
- package/src/tools/advanced/ScrapeWithActionsTool.js +105 -19
- package/src/tools/advanced/batchScrape/index.js +328 -0
- package/src/tools/advanced/batchScrape/queue.js +91 -0
- package/src/tools/advanced/batchScrape/reporter.js +26 -0
- package/src/tools/advanced/batchScrape/schema.js +37 -0
- package/src/tools/advanced/batchScrape/worker.js +179 -0
- package/src/tools/advanced/scrapeWithActions/recorder.js +188 -0
- package/src/tools/basic/_fetch.js +35 -0
- package/src/tools/basic/extractLinks.js +74 -0
- package/src/tools/basic/extractMetadata.js +74 -0
- package/src/tools/basic/extractText.js +46 -0
- package/src/tools/basic/fetchUrl.js +44 -0
- package/src/tools/basic/scrapeStructured.js +58 -0
- package/src/tools/crawl/_sessionContext.js +234 -0
- package/src/tools/crawl/crawlDeep.js +55 -5
- package/src/tools/crawl/mapSite.js +23 -2
- package/src/tools/extract/_fetchAndParse.js +57 -0
- package/src/tools/extract/extractStructured.js +3 -19
- package/src/tools/extract/extractWithLlm.js +295 -0
- package/src/tools/search/providers/searxng.js +126 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +18 -11
- package/src/tools/search/ranking/ResultRanker.js +17 -10
- package/src/tools/search/ranking/SearchResultCache.js +52 -0
- package/src/tools/search/searchWeb.js +112 -6
- package/src/tools/tracking/trackChanges/differ.js +98 -0
- package/src/tools/tracking/trackChanges/index.js +432 -0
- package/src/tools/tracking/trackChanges/monitor.js +93 -0
- package/src/tools/tracking/trackChanges/notifier.js +105 -0
- package/src/tools/tracking/trackChanges/schema.js +127 -0
- package/src/tools/tracking/trackChanges.js +12 -1374
|
@@ -171,49 +171,8 @@ export class ActionExecutor extends EventEmitter {
|
|
|
171
171
|
actualChainConfig = chainConfig;
|
|
172
172
|
}
|
|
173
173
|
|
|
174
|
-
//
|
|
175
|
-
|
|
176
|
-
const actions = Array.isArray(chainConfig) ? chainConfig : actualChainConfig.actions;
|
|
177
|
-
const mockResults = actions.map((action, index) => {
|
|
178
|
-
const baseResult = {
|
|
179
|
-
id: `mock_action_${index}`,
|
|
180
|
-
type: action.type,
|
|
181
|
-
success: true,
|
|
182
|
-
executionTime: 10,
|
|
183
|
-
timestamp: Date.now(),
|
|
184
|
-
description: `Mock ${action.type} action`
|
|
185
|
-
};
|
|
186
|
-
|
|
187
|
-
if (action.type === 'wait') {
|
|
188
|
-
const waitTime = action.duration || action.milliseconds || 100;
|
|
189
|
-
baseResult.result = { waited: waitTime };
|
|
190
|
-
} else if (action.type === 'click') {
|
|
191
|
-
baseResult.result = { selector: action.selector, button: 'left' };
|
|
192
|
-
} else {
|
|
193
|
-
baseResult.result = { mockResult: true };
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
return baseResult;
|
|
197
|
-
});
|
|
198
|
-
|
|
199
|
-
return {
|
|
200
|
-
success: true,
|
|
201
|
-
chainId,
|
|
202
|
-
url,
|
|
203
|
-
executionTime: Date.now() - startTime,
|
|
204
|
-
results: mockResults,
|
|
205
|
-
screenshots: [],
|
|
206
|
-
metadata: {
|
|
207
|
-
userAgent: 'mock-agent',
|
|
208
|
-
viewport: { width: 1280, height: 720 }
|
|
209
|
-
},
|
|
210
|
-
stats: {
|
|
211
|
-
totalActions: mockResults.length,
|
|
212
|
-
successfulActions: mockResults.filter(r => r.success).length,
|
|
213
|
-
failedActions: mockResults.filter(r => !r.success).length
|
|
214
|
-
}
|
|
215
|
-
};
|
|
216
|
-
}
|
|
174
|
+
// (v3.0.19 cleanup) The legacy example.com mock branch was removed — no
|
|
175
|
+
// test depended on it and it short-circuited real validation. See §A3.
|
|
217
176
|
|
|
218
177
|
// Validate chain configuration
|
|
219
178
|
const validatedChain = ActionChainSchema.parse(actualChainConfig);
|
package/src/core/AuthManager.js
CHANGED
|
@@ -6,8 +6,10 @@
|
|
|
6
6
|
// Using native fetch (Node.js 18+)
|
|
7
7
|
import fs from 'fs/promises';
|
|
8
8
|
import path from 'path';
|
|
9
|
+
import { randomUUID } from 'crypto';
|
|
9
10
|
import { isCreatorModeVerified } from './creatorMode.js';
|
|
10
11
|
import { resolveApiEndpoint } from './endpointGuard.js';
|
|
12
|
+
import { logger } from '../utils/Logger.js';
|
|
11
13
|
|
|
12
14
|
class AuthManager {
|
|
13
15
|
constructor() {
|
|
@@ -33,17 +35,23 @@ class AuthManager {
|
|
|
33
35
|
|
|
34
36
|
/**
|
|
35
37
|
* Initialize the auth manager and load stored config
|
|
38
|
+
*
|
|
39
|
+
* Audit phase 5: re-validate the stored API key against the backend at startup.
|
|
40
|
+
* If the backend explicitly reports the key as revoked/invalid, we throw —
|
|
41
|
+
* the server must refuse to start rather than silently run with a dead key.
|
|
42
|
+
* Network failures are tolerated (we already have a cached config and the
|
|
43
|
+
* fail-closed credit check from audit phase 2 handles runtime revocation).
|
|
36
44
|
*/
|
|
37
45
|
async initialize() {
|
|
38
46
|
if (this.initialized) return;
|
|
39
|
-
|
|
47
|
+
|
|
40
48
|
// Skip config loading in creator mode
|
|
41
49
|
if (this.isCreatorMode()) {
|
|
42
50
|
console.log('🚀 Creator Mode Active - Unlimited Access Enabled');
|
|
43
51
|
this.initialized = true;
|
|
44
52
|
return;
|
|
45
53
|
}
|
|
46
|
-
|
|
54
|
+
|
|
47
55
|
try {
|
|
48
56
|
await this.loadConfig();
|
|
49
57
|
this.initialized = true;
|
|
@@ -52,6 +60,39 @@ class AuthManager {
|
|
|
52
60
|
this.initialized = true;
|
|
53
61
|
}
|
|
54
62
|
|
|
63
|
+
// Phase 5: re-validate cached API key with backend. Refuse to start if revoked.
|
|
64
|
+
if (this.config?.apiKey && process.env.CRAWLFORGE_SKIP_STARTUP_VALIDATION !== 'true') {
|
|
65
|
+
const validation = await this.validateApiKey(this.config.apiKey);
|
|
66
|
+
if (!validation.valid) {
|
|
67
|
+
const lower = (validation.error || '').toLowerCase();
|
|
68
|
+
const isExplicitReject =
|
|
69
|
+
lower.includes('invalid') ||
|
|
70
|
+
lower.includes('revoked') ||
|
|
71
|
+
lower.includes('not found') ||
|
|
72
|
+
lower.includes('expired') ||
|
|
73
|
+
lower.includes('unauthorized');
|
|
74
|
+
if (isExplicitReject) {
|
|
75
|
+
const rejectErr = new Error(
|
|
76
|
+
`CrawlForge API key rejected by backend at startup: ${validation.error}. ` +
|
|
77
|
+
`Run \`npm run setup\` with a current key, or set CRAWLFORGE_SKIP_STARTUP_VALIDATION=true to bypass.`
|
|
78
|
+
);
|
|
79
|
+
logger.error('Startup API key validation rejected by backend', rejectErr, {
|
|
80
|
+
backendError: validation.error
|
|
81
|
+
});
|
|
82
|
+
throw rejectErr;
|
|
83
|
+
}
|
|
84
|
+
// Connection error — tolerate, log, continue. Runtime credit check will fail closed.
|
|
85
|
+
logger.warn('Startup API key validation skipped (backend unreachable)', {
|
|
86
|
+
error: validation.error
|
|
87
|
+
});
|
|
88
|
+
} else {
|
|
89
|
+
logger.info('Startup API key validation OK', {
|
|
90
|
+
userId: validation.userId,
|
|
91
|
+
creditsRemaining: validation.creditsRemaining
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
55
96
|
try {
|
|
56
97
|
await this._flushPendingUsage();
|
|
57
98
|
} catch {
|
|
@@ -236,6 +277,11 @@ class AuthManager {
|
|
|
236
277
|
this.creditCache.set(userId, Math.max(0, cached - creditsUsed));
|
|
237
278
|
}
|
|
238
279
|
|
|
280
|
+
// Audit phase A2: every usage report gets a request ID and idempotency key
|
|
281
|
+
// so retries (in-memory or via pending-usage.json) are safe to replay.
|
|
282
|
+
const requestId = randomUUID();
|
|
283
|
+
const idempotencyKey = randomUUID();
|
|
284
|
+
|
|
239
285
|
const payload = {
|
|
240
286
|
tool,
|
|
241
287
|
creditsUsed,
|
|
@@ -243,6 +289,8 @@ class AuthManager {
|
|
|
243
289
|
responseStatus,
|
|
244
290
|
processingTime,
|
|
245
291
|
timestamp: new Date().toISOString(),
|
|
292
|
+
requestId,
|
|
293
|
+
idempotencyKey,
|
|
246
294
|
version: '3.0.3'
|
|
247
295
|
};
|
|
248
296
|
|
|
@@ -251,7 +299,8 @@ class AuthManager {
|
|
|
251
299
|
method: 'POST',
|
|
252
300
|
headers: {
|
|
253
301
|
'Content-Type': 'application/json',
|
|
254
|
-
'X-API-Key': this.config.apiKey
|
|
302
|
+
'X-API-Key': this.config.apiKey,
|
|
303
|
+
'Idempotency-Key': idempotencyKey
|
|
255
304
|
},
|
|
256
305
|
body: JSON.stringify(payload),
|
|
257
306
|
signal: AbortSignal.timeout(5000)
|
|
@@ -260,8 +309,20 @@ class AuthManager {
|
|
|
260
309
|
await this._flushPendingUsage();
|
|
261
310
|
} catch (error) {
|
|
262
311
|
// Log but don't throw - usage reporting should not break tool execution
|
|
263
|
-
|
|
264
|
-
|
|
312
|
+
logger.error('Failed to report usage; queued for retry', error, {
|
|
313
|
+
tool,
|
|
314
|
+
creditsUsed,
|
|
315
|
+
requestId,
|
|
316
|
+
idempotencyKey
|
|
317
|
+
});
|
|
318
|
+
await this._appendPendingUsage({
|
|
319
|
+
toolName: tool,
|
|
320
|
+
creditsUsed,
|
|
321
|
+
userId,
|
|
322
|
+
timestamp: payload.timestamp,
|
|
323
|
+
requestId,
|
|
324
|
+
idempotencyKey
|
|
325
|
+
});
|
|
265
326
|
}
|
|
266
327
|
}
|
|
267
328
|
|
|
@@ -278,18 +339,37 @@ class AuthManager {
|
|
|
278
339
|
// File absent or corrupt — start fresh
|
|
279
340
|
}
|
|
280
341
|
|
|
281
|
-
|
|
342
|
+
// Audit phase A2: stamp every pending entry with a request ID and idempotency key
|
|
343
|
+
// so the backend (when it ships support) can dedupe, and so we can log dropped
|
|
344
|
+
// entries by ID when the flush retry path fails permanently.
|
|
345
|
+
const stamped = {
|
|
346
|
+
requestId: entry.requestId || randomUUID(),
|
|
347
|
+
idempotencyKey: entry.idempotencyKey || randomUUID(),
|
|
348
|
+
...entry
|
|
349
|
+
};
|
|
350
|
+
|
|
351
|
+
entries.push(stamped);
|
|
282
352
|
|
|
283
353
|
// Cap at 1 MB — drop oldest entries until serialized size fits
|
|
284
354
|
let serialized = JSON.stringify(entries);
|
|
355
|
+
const dropped = [];
|
|
285
356
|
while (serialized.length > 1_048_576 && entries.length > 1) {
|
|
286
|
-
entries.shift();
|
|
357
|
+
dropped.push(entries.shift());
|
|
287
358
|
serialized = JSON.stringify(entries);
|
|
288
359
|
}
|
|
360
|
+
if (dropped.length > 0) {
|
|
361
|
+
logger.warn('Pending usage queue truncated to 1 MB cap', {
|
|
362
|
+
droppedCount: dropped.length,
|
|
363
|
+
droppedIds: dropped.map(d => d.requestId).filter(Boolean)
|
|
364
|
+
});
|
|
365
|
+
}
|
|
289
366
|
|
|
290
367
|
await fs.writeFile(this.pendingUsagePath, serialized, { mode: 0o600 });
|
|
291
368
|
} catch (error) {
|
|
292
|
-
|
|
369
|
+
logger.error('Failed to append pending usage', error, {
|
|
370
|
+
toolName: entry?.toolName,
|
|
371
|
+
requestId: entry?.requestId
|
|
372
|
+
});
|
|
293
373
|
}
|
|
294
374
|
}
|
|
295
375
|
|
|
@@ -300,34 +380,62 @@ class AuthManager {
|
|
|
300
380
|
try {
|
|
301
381
|
const raw = await fs.readFile(this.pendingUsagePath, 'utf-8');
|
|
302
382
|
entries = JSON.parse(raw);
|
|
303
|
-
} catch {
|
|
304
|
-
|
|
383
|
+
} catch (err) {
|
|
384
|
+
// ENOENT is normal (nothing pending). Anything else is corruption — log it.
|
|
385
|
+
if (err && err.code !== 'ENOENT') {
|
|
386
|
+
logger.warn('Pending usage file unreadable; treating as empty', {
|
|
387
|
+
error: err.message,
|
|
388
|
+
path: this.pendingUsagePath
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
return;
|
|
305
392
|
}
|
|
306
393
|
|
|
307
394
|
if (!Array.isArray(entries) || entries.length === 0) return;
|
|
308
395
|
|
|
309
396
|
const remaining = [];
|
|
397
|
+
const flushedIds = [];
|
|
398
|
+
const failedIds = [];
|
|
310
399
|
for (const entry of entries) {
|
|
311
400
|
try {
|
|
401
|
+
const idempotencyKey = entry.idempotencyKey || randomUUID();
|
|
312
402
|
await fetch(`${this.apiEndpoint}/api/v1/usage`, {
|
|
313
403
|
method: 'POST',
|
|
314
404
|
headers: {
|
|
315
405
|
'Content-Type': 'application/json',
|
|
316
|
-
'X-API-Key': this.config.apiKey
|
|
406
|
+
'X-API-Key': this.config.apiKey,
|
|
407
|
+
'Idempotency-Key': idempotencyKey
|
|
317
408
|
},
|
|
318
409
|
body: JSON.stringify({
|
|
319
410
|
tool: entry.toolName,
|
|
320
411
|
creditsUsed: entry.creditsUsed,
|
|
321
412
|
timestamp: entry.timestamp,
|
|
413
|
+
requestId: entry.requestId,
|
|
414
|
+
idempotencyKey,
|
|
322
415
|
version: '3.0.3'
|
|
323
416
|
}),
|
|
324
417
|
signal: AbortSignal.timeout(5000)
|
|
325
418
|
});
|
|
326
|
-
|
|
419
|
+
flushedIds.push(entry.requestId);
|
|
420
|
+
} catch (err) {
|
|
421
|
+
failedIds.push(entry.requestId);
|
|
327
422
|
remaining.push(entry);
|
|
328
423
|
}
|
|
329
424
|
}
|
|
330
425
|
|
|
426
|
+
if (flushedIds.length > 0) {
|
|
427
|
+
logger.info('Flushed pending usage entries', {
|
|
428
|
+
count: flushedIds.length,
|
|
429
|
+
requestIds: flushedIds.filter(Boolean)
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
if (failedIds.length > 0) {
|
|
433
|
+
logger.warn('Pending usage entries failed to flush; retained for next attempt', {
|
|
434
|
+
count: failedIds.length,
|
|
435
|
+
requestIds: failedIds.filter(Boolean)
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
|
|
331
439
|
try {
|
|
332
440
|
if (remaining.length === 0) {
|
|
333
441
|
await fs.unlink(this.pendingUsagePath);
|
|
@@ -335,7 +443,9 @@ class AuthManager {
|
|
|
335
443
|
await fs.writeFile(this.pendingUsagePath, JSON.stringify(remaining), { mode: 0o600 });
|
|
336
444
|
}
|
|
337
445
|
} catch (error) {
|
|
338
|
-
|
|
446
|
+
logger.error('Failed to update pending usage file', error, {
|
|
447
|
+
path: this.pendingUsagePath
|
|
448
|
+
});
|
|
339
449
|
}
|
|
340
450
|
}
|
|
341
451
|
|
|
@@ -372,7 +482,10 @@ class AuthManager {
|
|
|
372
482
|
track_changes: 3,
|
|
373
483
|
|
|
374
484
|
// Phase 1: LLM-Powered Structured Extraction
|
|
375
|
-
extract_structured: 4
|
|
485
|
+
extract_structured: 4,
|
|
486
|
+
|
|
487
|
+
// Phase C5: Natural-language LLM extraction (external paid API call per invocation)
|
|
488
|
+
extract_with_llm: 5
|
|
376
489
|
};
|
|
377
490
|
|
|
378
491
|
return costs[tool] || 1;
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrowserContextPool — bounded Playwright browser-context pool.
|
|
3
|
+
*
|
|
4
|
+
* Replaces the unbounded `this.contexts = new Map()` in StealthBrowserManager
|
|
5
|
+
* with a pool that:
|
|
6
|
+
* - enforces a hard cap (MAX_BROWSER_CONTEXTS, default 10)
|
|
7
|
+
* - disposes contexts after N uses (periodicRefreshAfter, default 200)
|
|
8
|
+
* - closes idle contexts after a configurable timeout
|
|
9
|
+
* - maintains a concurrency wait-queue so excess callers fail fast
|
|
10
|
+
* (timeout: waitTimeoutMs, default 10 000 ms) rather than accumulating
|
|
11
|
+
*
|
|
12
|
+
* The Map-compatible surface (get/set/delete/entries/clear/size) lets
|
|
13
|
+
* StealthBrowserManager adopt it with minimal changes.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const DEFAULT_MAX_CONTEXTS = parseInt(process.env.MAX_BROWSER_CONTEXTS || '10', 10);
|
|
17
|
+
const DEFAULT_PERIODIC_REFRESH_AFTER = 200; // context uses before forced close+relaunch
|
|
18
|
+
const DEFAULT_CLOSE_IDLE_AFTER_MS = 5 * 60 * 1000; // 5 minutes
|
|
19
|
+
const DEFAULT_WAIT_TIMEOUT_MS = 10_000;
|
|
20
|
+
|
|
21
|
+
export class BrowserContextPool {
|
|
22
|
+
/**
|
|
23
|
+
* @param {Object} [opts]
|
|
24
|
+
* @param {number} [opts.maxContexts]
|
|
25
|
+
* @param {number} [opts.periodicRefreshAfter] — max uses per context before disposal
|
|
26
|
+
* @param {number} [opts.closeIdleAfterMs]
|
|
27
|
+
* @param {number} [opts.waitTimeoutMs] — max wait for a free slot; fails fast after
|
|
28
|
+
* @param {Function} [opts.onContextExpired] — async (contextId, contextData) => void
|
|
29
|
+
*/
|
|
30
|
+
constructor(opts = {}) {
|
|
31
|
+
this._maxContexts = opts.maxContexts ?? DEFAULT_MAX_CONTEXTS;
|
|
32
|
+
this._periodicRefreshAfter = opts.periodicRefreshAfter ?? DEFAULT_PERIODIC_REFRESH_AFTER;
|
|
33
|
+
this._closeIdleAfterMs = opts.closeIdleAfterMs ?? DEFAULT_CLOSE_IDLE_AFTER_MS;
|
|
34
|
+
this._waitTimeoutMs = opts.waitTimeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS;
|
|
35
|
+
this._onContextExpired = opts.onContextExpired || null;
|
|
36
|
+
|
|
37
|
+
/** @type {Map<string, { context: any, fingerprint: any, config: any, uses: number, lastUsed: number, created: number }>} */
|
|
38
|
+
this._contexts = new Map();
|
|
39
|
+
|
|
40
|
+
/** Pending callers waiting for a free slot */
|
|
41
|
+
this._waitQueue = [];
|
|
42
|
+
|
|
43
|
+
/** Periodic idle-checker timer */
|
|
44
|
+
this._idleTimer = setInterval(() => this._closeIdleContexts(), Math.min(this._closeIdleAfterMs, 60_000));
|
|
45
|
+
this._idleTimer.unref?.(); // don't prevent process exit
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ── Map-compatible surface ──────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
get size() { return this._contexts.size; }
|
|
51
|
+
|
|
52
|
+
get(contextId) { return this._contexts.get(contextId) ?? undefined; }
|
|
53
|
+
|
|
54
|
+
has(contextId) { return this._contexts.has(contextId); }
|
|
55
|
+
|
|
56
|
+
entries() { return this._contexts.entries(); }
|
|
57
|
+
|
|
58
|
+
keys() { return this._contexts.keys(); }
|
|
59
|
+
|
|
60
|
+
values() { return this._contexts.values(); }
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Register a context. Throws if the pool is full and no slot becomes
|
|
64
|
+
* available within waitTimeoutMs.
|
|
65
|
+
*/
|
|
66
|
+
async set(contextId, contextData) {
|
|
67
|
+
if (this._contexts.size >= this._maxContexts) {
|
|
68
|
+
await this._waitForSlot();
|
|
69
|
+
}
|
|
70
|
+
this._contexts.set(contextId, {
|
|
71
|
+
...contextData,
|
|
72
|
+
uses: 0,
|
|
73
|
+
lastUsed: Date.now(),
|
|
74
|
+
created: Date.now()
|
|
75
|
+
});
|
|
76
|
+
return this;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Synchronous set — for callers that already verified there is capacity.
|
|
81
|
+
* Throws immediately if pool is at capacity.
|
|
82
|
+
*/
|
|
83
|
+
setSync(contextId, contextData) {
|
|
84
|
+
if (this._contexts.size >= this._maxContexts) {
|
|
85
|
+
throw new Error(`BrowserContextPool is at capacity (${this._maxContexts} contexts). Use await pool.set() to wait for a free slot.`);
|
|
86
|
+
}
|
|
87
|
+
this._contexts.set(contextId, {
|
|
88
|
+
...contextData,
|
|
89
|
+
uses: 0,
|
|
90
|
+
lastUsed: Date.now(),
|
|
91
|
+
created: Date.now()
|
|
92
|
+
});
|
|
93
|
+
return this;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
delete(contextId) {
|
|
97
|
+
const deleted = this._contexts.delete(contextId);
|
|
98
|
+
if (deleted) this._notifyWaiter();
|
|
99
|
+
return deleted;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
clear() {
|
|
103
|
+
this._contexts.clear();
|
|
104
|
+
// Drain any waiters with rejections so they don't hang
|
|
105
|
+
const waiters = this._waitQueue.splice(0);
|
|
106
|
+
waiters.forEach(({ reject }) => reject(new Error('BrowserContextPool cleared')));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// ── Pool-specific API ───────────────────────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Record a use for the given context.
|
|
113
|
+
* Returns true if the context should be closed and re-created (refresh needed).
|
|
114
|
+
* @param {string} contextId
|
|
115
|
+
*/
|
|
116
|
+
recordUse(contextId) {
|
|
117
|
+
const entry = this._contexts.get(contextId);
|
|
118
|
+
if (!entry) return false;
|
|
119
|
+
entry.uses++;
|
|
120
|
+
entry.lastUsed = Date.now();
|
|
121
|
+
return entry.uses >= this._periodicRefreshAfter;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Dispose a context (close it + remove from pool).
|
|
126
|
+
* @param {string} contextId
|
|
127
|
+
*/
|
|
128
|
+
async dispose(contextId) {
|
|
129
|
+
const entry = this._contexts.get(contextId);
|
|
130
|
+
if (!entry) return;
|
|
131
|
+
this._contexts.delete(contextId);
|
|
132
|
+
this._notifyWaiter();
|
|
133
|
+
try {
|
|
134
|
+
await entry.context?.close();
|
|
135
|
+
} catch {
|
|
136
|
+
// ignore close errors
|
|
137
|
+
}
|
|
138
|
+
this._onContextExpired?.(contextId, entry);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Close all idle contexts (lastUsed > closeIdleAfterMs ago).
|
|
143
|
+
*/
|
|
144
|
+
async _closeIdleContexts() {
|
|
145
|
+
const now = Date.now();
|
|
146
|
+
for (const [contextId, entry] of this._contexts.entries()) {
|
|
147
|
+
if (now - entry.lastUsed > this._closeIdleAfterMs) {
|
|
148
|
+
await this.dispose(contextId);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Wait until a slot becomes available (or time out).
|
|
155
|
+
*/
|
|
156
|
+
_waitForSlot() {
|
|
157
|
+
return new Promise((resolve, reject) => {
|
|
158
|
+
const timer = setTimeout(() => {
|
|
159
|
+
const idx = this._waitQueue.findIndex(w => w.resolve === resolve);
|
|
160
|
+
if (idx !== -1) this._waitQueue.splice(idx, 1);
|
|
161
|
+
reject(new Error(`BrowserContextPool: timed out waiting for a free context slot after ${this._waitTimeoutMs}ms`));
|
|
162
|
+
}, this._waitTimeoutMs);
|
|
163
|
+
|
|
164
|
+
this._waitQueue.push({ resolve, reject, timer });
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/** Notify the oldest pending waiter that a slot is now free. */
|
|
169
|
+
_notifyWaiter() {
|
|
170
|
+
if (this._waitQueue.length === 0) return;
|
|
171
|
+
const { resolve, timer } = this._waitQueue.shift();
|
|
172
|
+
clearTimeout(timer);
|
|
173
|
+
resolve();
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/** Destroy the pool — closes all contexts and clears the idle timer. */
|
|
177
|
+
async destroy() {
|
|
178
|
+
clearInterval(this._idleTimer);
|
|
179
|
+
for (const contextId of Array.from(this._contexts.keys())) {
|
|
180
|
+
await this.dispose(contextId);
|
|
181
|
+
}
|
|
182
|
+
const waiters = this._waitQueue.splice(0);
|
|
183
|
+
waiters.forEach(({ reject }) => reject(new Error('BrowserContextPool destroyed')));
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
export default BrowserContextPool;
|
package/src/core/JobManager.js
CHANGED
|
@@ -573,11 +573,13 @@ export class JobManager extends EventEmitter {
|
|
|
573
573
|
* @returns {boolean} Whether job is valid
|
|
574
574
|
*/
|
|
575
575
|
validateJob(job) {
|
|
576
|
-
return
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
576
|
+
return Boolean(
|
|
577
|
+
job &&
|
|
578
|
+
typeof job.id === 'string' &&
|
|
579
|
+
typeof job.type === 'string' &&
|
|
580
|
+
typeof job.status === 'string' &&
|
|
581
|
+
Object.values(this.JOB_STATES).includes(job.status)
|
|
582
|
+
);
|
|
581
583
|
}
|
|
582
584
|
|
|
583
585
|
/**
|
|
@@ -63,47 +63,11 @@ const LANGUAGE_MAPPINGS = {
|
|
|
63
63
|
// RTL Languages Configuration
|
|
64
64
|
const RTL_LANGUAGES = new Set(['ar', 'he', 'fa', 'ur', 'ku', 'dv']);
|
|
65
65
|
|
|
66
|
-
//
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
'eu-west': { endpoint: 'proxy-eu-west.example.com', port: 8080 },
|
|
72
|
-
'eu-central': { endpoint: 'proxy-eu-central.example.com', port: 8080 },
|
|
73
|
-
'eu-north': { endpoint: 'proxy-eu-north.example.com', port: 8080 },
|
|
74
|
-
'eu-east': { endpoint: 'proxy-eu-east.example.com', port: 8080 },
|
|
75
|
-
'asia-pacific': { endpoint: 'proxy-asia-pacific.example.com', port: 8080 },
|
|
76
|
-
'middle-east': { endpoint: 'proxy-middle-east.example.com', port: 8080 },
|
|
77
|
-
'south-america': { endpoint: 'proxy-south-america.example.com', port: 8080 },
|
|
78
|
-
'north-america': { endpoint: 'proxy-north-america.example.com', port: 8080 },
|
|
79
|
-
'africa': { endpoint: 'proxy-africa.example.com', port: 8080 }
|
|
80
|
-
},
|
|
81
|
-
fallbackStrategies: {
|
|
82
|
-
'geo-blocked': ['rotate-proxy', 'change-user-agent', 'delay-request'],
|
|
83
|
-
'rate-limited': ['change-proxy', 'exponential-backoff'],
|
|
84
|
-
'detection': ['rotate-fingerprint', 'change-proxy', 'human-delay']
|
|
85
|
-
}
|
|
86
|
-
};
|
|
87
|
-
|
|
88
|
-
// Translation Service Configuration
|
|
89
|
-
const TRANSLATION_SERVICES = {
|
|
90
|
-
google: {
|
|
91
|
-
enabled: process.env.GOOGLE_TRANSLATE_API_KEY ? true : false,
|
|
92
|
-
apiKey: process.env.GOOGLE_TRANSLATE_API_KEY,
|
|
93
|
-
endpoint: 'https://translation.googleapis.com/language/translate/v2'
|
|
94
|
-
},
|
|
95
|
-
azure: {
|
|
96
|
-
enabled: process.env.AZURE_TRANSLATE_KEY ? true : false,
|
|
97
|
-
key: process.env.AZURE_TRANSLATE_KEY,
|
|
98
|
-
region: process.env.AZURE_TRANSLATE_REGION || 'global',
|
|
99
|
-
endpoint: 'https://api.cognitive.microsofttranslator.com/translate'
|
|
100
|
-
},
|
|
101
|
-
libre: {
|
|
102
|
-
enabled: process.env.LIBRE_TRANSLATE_URL ? true : false,
|
|
103
|
-
url: process.env.LIBRE_TRANSLATE_URL,
|
|
104
|
-
apiKey: process.env.LIBRE_TRANSLATE_API_KEY
|
|
105
|
-
}
|
|
106
|
-
};
|
|
66
|
+
// NOTE (v3.0.19 cleanup): PROXY_PROVIDERS and TRANSLATION_SERVICES configs were
|
|
67
|
+
// removed. They pointed at `*.example.com` endpoints and translation services
|
|
68
|
+
// that were never wired up — pure dead code. If/when real proxy rotation or
|
|
69
|
+
// translation lands, configure providers explicitly rather than reviving these
|
|
70
|
+
// placeholders. See IMPROVEMENT_PLAN.md §A3.
|
|
107
71
|
|
|
108
72
|
const LocalizationSchema = z.object({
|
|
109
73
|
countryCode: z.string().length(2).optional(),
|
|
@@ -237,27 +201,21 @@ export class LocalizationManager extends EventEmitter {
|
|
|
237
201
|
try {
|
|
238
202
|
// Pre-populate timezone mappings
|
|
239
203
|
await this.loadTimezoneData();
|
|
240
|
-
|
|
204
|
+
|
|
241
205
|
// Initialize geo-location data
|
|
242
206
|
await this.loadGeoLocationData();
|
|
243
|
-
|
|
244
|
-
// Initialize proxy configurations
|
|
245
|
-
await this.initializeProxySystem();
|
|
246
|
-
|
|
247
|
-
// Initialize translation services
|
|
248
|
-
await this.initializeTranslationServices();
|
|
249
|
-
|
|
207
|
+
|
|
250
208
|
// Load cultural browsing patterns
|
|
251
209
|
await this.loadCulturalPatterns();
|
|
252
|
-
|
|
210
|
+
|
|
253
211
|
// Setup periodic health checks
|
|
254
212
|
this.setupHealthChecks();
|
|
255
|
-
|
|
213
|
+
|
|
256
214
|
this.emit('initialized');
|
|
257
215
|
} catch (error) {
|
|
258
|
-
this.emit('error', {
|
|
259
|
-
type: 'initialization_failed',
|
|
260
|
-
error: error.message
|
|
216
|
+
this.emit('error', {
|
|
217
|
+
type: 'initialization_failed',
|
|
218
|
+
error: error.message
|
|
261
219
|
});
|
|
262
220
|
throw error;
|
|
263
221
|
}
|
|
@@ -958,76 +916,6 @@ export class LocalizationManager extends EventEmitter {
|
|
|
958
916
|
|
|
959
917
|
return null;
|
|
960
918
|
}
|
|
961
|
-
/**
|
|
962
|
-
* Initialize proxy system with regional configurations
|
|
963
|
-
*/
|
|
964
|
-
async initializeProxySystem() {
|
|
965
|
-
try {
|
|
966
|
-
// Load proxy configurations from environment or config
|
|
967
|
-
for (const [region, config] of Object.entries(PROXY_PROVIDERS.regions)) {
|
|
968
|
-
if (process.env[`PROXY_${region.toUpperCase().replace('-', '_')}_ENABLED`] === 'true') {
|
|
969
|
-
this.proxyManager.activeProxies.set(region, {
|
|
970
|
-
...config,
|
|
971
|
-
username: process.env[`PROXY_${region.toUpperCase().replace('-', '_')}_USERNAME`],
|
|
972
|
-
password: process.env[`PROXY_${region.toUpperCase().replace('-', '_')}_PASSWORD`],
|
|
973
|
-
healthScore: 100,
|
|
974
|
-
lastCheck: 0,
|
|
975
|
-
failureCount: 0
|
|
976
|
-
});
|
|
977
|
-
}
|
|
978
|
-
}
|
|
979
|
-
|
|
980
|
-
// Setup proxy health monitoring
|
|
981
|
-
if (this.proxyManager.activeProxies.size > 0) {
|
|
982
|
-
await this.performProxyHealthChecks();
|
|
983
|
-
}
|
|
984
|
-
|
|
985
|
-
} catch (error) {
|
|
986
|
-
console.warn('Failed to initialize proxy system:', error.message);
|
|
987
|
-
}
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
/**
|
|
991
|
-
* Initialize translation services
|
|
992
|
-
*/
|
|
993
|
-
async initializeTranslationServices() {
|
|
994
|
-
try {
|
|
995
|
-
// Google Translate
|
|
996
|
-
if (TRANSLATION_SERVICES.google.enabled) {
|
|
997
|
-
this.translationProviders.set('google', {
|
|
998
|
-
type: 'google',
|
|
999
|
-
apiKey: TRANSLATION_SERVICES.google.apiKey,
|
|
1000
|
-
endpoint: TRANSLATION_SERVICES.google.endpoint,
|
|
1001
|
-
available: true
|
|
1002
|
-
});
|
|
1003
|
-
}
|
|
1004
|
-
|
|
1005
|
-
// Azure Translator
|
|
1006
|
-
if (TRANSLATION_SERVICES.azure.enabled) {
|
|
1007
|
-
this.translationProviders.set('azure', {
|
|
1008
|
-
type: 'azure',
|
|
1009
|
-
key: TRANSLATION_SERVICES.azure.key,
|
|
1010
|
-
region: TRANSLATION_SERVICES.azure.region,
|
|
1011
|
-
endpoint: TRANSLATION_SERVICES.azure.endpoint,
|
|
1012
|
-
available: true
|
|
1013
|
-
});
|
|
1014
|
-
}
|
|
1015
|
-
|
|
1016
|
-
// LibreTranslate
|
|
1017
|
-
if (TRANSLATION_SERVICES.libre.enabled) {
|
|
1018
|
-
this.translationProviders.set('libre', {
|
|
1019
|
-
type: 'libre',
|
|
1020
|
-
url: TRANSLATION_SERVICES.libre.url,
|
|
1021
|
-
apiKey: TRANSLATION_SERVICES.libre.apiKey,
|
|
1022
|
-
available: true
|
|
1023
|
-
});
|
|
1024
|
-
}
|
|
1025
|
-
|
|
1026
|
-
} catch (error) {
|
|
1027
|
-
console.warn('Failed to initialize translation services:', error.message);
|
|
1028
|
-
}
|
|
1029
|
-
}
|
|
1030
|
-
|
|
1031
919
|
/**
|
|
1032
920
|
* Load cultural browsing patterns for different regions
|
|
1033
921
|
*/
|
|
@@ -1612,4 +1500,5 @@ export class LocalizationManager extends EventEmitter {
|
|
|
1612
1500
|
export default LocalizationManager;
|
|
1613
1501
|
|
|
1614
1502
|
// Export constants for external use
|
|
1615
|
-
|
|
1503
|
+
// (PROXY_PROVIDERS / TRANSLATION_SERVICES removed in v3.0.19 — see §A3 of IMPROVEMENT_PLAN.md)
|
|
1504
|
+
export { SUPPORTED_COUNTRIES, RTL_LANGUAGES };
|