@debugg-ai/debugg-ai-mcp 2.4.1 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,8 +15,23 @@ import { tunnelManager } from '../services/ngrok/tunnelManager.js';
15
15
  import { probeLocalPort, probeTunnelHealth } from '../utils/localReachability.js';
16
16
  import { extractLocalhostPort } from '../utils/urlParser.js';
17
17
  import { getCachedTemplateUuid, getCachedProjectUuid, invalidateTemplateCache, invalidateProjectCache, } from '../utils/handlerCaches.js';
18
+ import { isTransientWorkflowError, transientReasonTag } from '../utils/transientErrors.js';
19
+ import { Telemetry, TelemetryEvents } from '../utils/telemetry.js';
18
20
  const logger = new Logger({ module: 'testPageChangesHandler' });
19
21
  const TEMPLATE_NAME = 'app evaluation';
22
+ // Bead kbxy: bounded retry on known transient backend signatures (Pydantic
23
+ // JSON parse errors, 502s, ECONNRESETs). Default 1 retry; env-overridable
24
+ // up to 3 to balance reliability vs quota cost. Conservative: only retries
25
+ // on documented transient patterns (utils/transientErrors.ts).
26
+ function getMaxTransientRetries() {
27
+ const raw = process.env.DEBUGGAI_TRANSIENT_RETRIES;
28
+ if (raw === undefined || raw === '')
29
+ return 1;
30
+ const n = parseInt(raw, 10);
31
+ if (!Number.isFinite(n) || n < 0)
32
+ return 1;
33
+ return Math.min(n, 3);
34
+ }
20
35
  // Concurrency control — max 2 simultaneous browser checks.
21
36
  // Additional requests queue and run when a slot opens.
22
37
  const MAX_CONCURRENT = 2;
@@ -229,88 +244,126 @@ async function testPageChangesHandlerInner(input, context, rawProgressCallback)
229
244
  if (progressCallback) {
230
245
  await progressCallback({ progress: 3, total: TOTAL_STEPS, message: 'Queuing workflow execution...' });
231
246
  }
232
- const executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
233
- const executionUuid = executeResponse.executionUuid;
234
- logger.info(`Execution queued: ${executionUuid}`);
235
- // --- Poll ---
236
- // Progress phases:
247
+ // --- Execute + Poll (with bounded retry on transient errors, bead kbxy) ---
248
+ // Progress phases (per attempt):
237
249
  // 1-3: MCP setup (tunnel, template, queue) — already sent above
238
250
  // 4-6: Backend setup (trigger, browser.setup, subworkflow starting)
239
251
  // 7-27: Agent steps (mapped from state.stepsTaken)
240
252
  // 28: Complete
241
253
  const BACKEND_SETUP_END = 6;
242
- let lastStepsTaken = 0;
243
- let observedMaxSteps = MAX_EXEC_STEPS;
244
254
  const TERMINAL_STATUSES = new Set(['completed', 'failed', 'cancelled']);
245
- const finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
246
- // Keep the tunnel alive while the workflow is actively running
247
- if (ctx.tunnelId)
248
- touchTunnelById(ctx.tunnelId);
249
- const nodes = exec.nodeExecutions ?? [];
250
- const stepsTaken = Math.max(nodes.filter(n => n.nodeType === 'brain.step').length, exec.state?.stepsTaken ?? 0);
251
- if (stepsTaken !== lastStepsTaken) {
252
- lastStepsTaken = stepsTaken;
253
- logger.info(`Execution status: ${exec.status}, nodes: ${nodes.length}, steps: ${stepsTaken}`);
254
- }
255
- if (!progressCallback)
256
- return;
257
- // Bead 0bq: emit the final "Complete:" progress INSIDE this callback
258
- // when terminal status is detected. pollExecution will return on the
259
- // next line (line 183 in services/workflows.ts), so there's no
260
- // post-pollExecution progress emission that could race the response.
261
- if (TERMINAL_STATUSES.has(exec.status)) {
262
- const terminalOutcome = exec.state?.outcome ?? exec.status;
263
- await progressCallback({
264
- progress: TOTAL_STEPS,
265
- total: TOTAL_STEPS,
266
- message: `Complete: ${terminalOutcome}`,
255
+ const MAX_RETRIES = getMaxTransientRetries();
256
+ let executeResponse;
257
+ let executionUuid = '';
258
+ let finalExecution;
259
+ let attempt = 0;
260
+ while (true) {
261
+ attempt++;
262
+ if (attempt > 1) {
263
+ // Retry path emit telemetry + progress notification + brief backoff.
264
+ Telemetry.capture(TelemetryEvents.WORKFLOW_TRANSIENT_RETRY, {
265
+ tool: 'check_app_in_browser',
266
+ attempt,
267
+ reason: transientReasonTag(finalExecution),
268
+ previousExecutionId: executionUuid,
269
+ previousErrorMessage: finalExecution?.errorMessage?.slice(0, 200),
270
+ previousStateError: finalExecution?.state?.error?.slice(0, 200),
267
271
  });
268
- return;
269
- }
270
- // --- Compute progress number ---
271
- let execProgress;
272
- let message;
273
- if (stepsTaken > 0) {
274
- // Agent is actively stepping — map into slots 7..27
275
- if (stepsTaken > observedMaxSteps)
276
- observedMaxSteps = stepsTaken + 5;
277
- const stepSlots = TOTAL_STEPS - BACKEND_SETUP_END - 1; // 21 slots
278
- execProgress = BACKEND_SETUP_END + Math.max(1, Math.round((stepsTaken / observedMaxSteps) * stepSlots));
279
- execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
280
- // Use state.currentAction for the message (backend sends intent + actionType)
281
- const ca = exec.state?.currentAction;
282
- if (ca?.intent) {
283
- const action = ca.actionType ?? ca.action_type ?? 'working';
284
- message = `Step ${stepsTaken}: [${action}] ${ca.intent}`;
285
- }
286
- else {
287
- message = `Agent evaluating... (step ${stepsTaken})`;
272
+ if (progressCallback) {
273
+ await progressCallback({
274
+ progress: SETUP_STEPS,
275
+ total: TOTAL_STEPS,
276
+ message: `Transient backend error — retrying (attempt ${attempt}/${MAX_RETRIES + 1})...`,
277
+ });
288
278
  }
279
+ await new Promise(r => setTimeout(r, 1000 * (attempt - 1)));
289
280
  }
290
- else {
291
- // No agent steps yet — show backend setup progress from node transitions
292
- const hasSubworkflow = nodes.some(n => n.nodeType === 'subworkflow.run');
293
- const hasBrowserSetup = nodes.some(n => n.nodeType === 'browser.setup');
294
- const browserReady = nodes.some(n => n.nodeType === 'browser.setup' && n.status === 'success');
295
- if (browserReady || hasSubworkflow) {
296
- execProgress = BACKEND_SETUP_END;
297
- message = 'Browser ready, agent starting...';
281
+ executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
282
+ executionUuid = executeResponse.executionUuid;
283
+ logger.info(`Execution queued: ${executionUuid}${attempt > 1 ? ` (retry ${attempt - 1}/${MAX_RETRIES})` : ''}`);
284
+ // Closure state reset PER ATTEMPT so progress numbers don't double-count
285
+ // across retries.
286
+ let lastStepsTaken = 0;
287
+ let observedMaxSteps = MAX_EXEC_STEPS;
288
+ finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
289
+ // Keep the tunnel alive while the workflow is actively running
290
+ if (ctx.tunnelId)
291
+ touchTunnelById(ctx.tunnelId);
292
+ const nodes = exec.nodeExecutions ?? [];
293
+ const stepsTaken = Math.max(nodes.filter(n => n.nodeType === 'brain.step').length, exec.state?.stepsTaken ?? 0);
294
+ if (stepsTaken !== lastStepsTaken) {
295
+ lastStepsTaken = stepsTaken;
296
+ logger.info(`Execution status: ${exec.status}, nodes: ${nodes.length}, steps: ${stepsTaken}`);
298
297
  }
299
- else if (hasBrowserSetup) {
300
- execProgress = SETUP_STEPS + 2;
301
- message = 'Launching browser...';
298
+ if (!progressCallback)
299
+ return;
300
+ // Bead 0bq: emit the final "Complete:" progress INSIDE this callback
301
+ // when terminal status is detected. pollExecution will return on the
302
+ // next line (line 183 in services/workflows.ts), so there's no
303
+ // post-pollExecution progress emission that could race the response.
304
+ if (TERMINAL_STATUSES.has(exec.status)) {
305
+ const terminalOutcome = exec.state?.outcome ?? exec.status;
306
+ await progressCallback({
307
+ progress: TOTAL_STEPS,
308
+ total: TOTAL_STEPS,
309
+ message: `Complete: ${terminalOutcome}`,
310
+ });
311
+ return;
302
312
  }
303
- else if (nodes.length > 0) {
304
- execProgress = SETUP_STEPS + 1;
305
- message = 'Workflow triggered, preparing...';
313
+ // --- Compute progress number ---
314
+ let execProgress;
315
+ let message;
316
+ if (stepsTaken > 0) {
317
+ // Agent is actively stepping — map into slots 7..27
318
+ if (stepsTaken > observedMaxSteps)
319
+ observedMaxSteps = stepsTaken + 5;
320
+ const stepSlots = TOTAL_STEPS - BACKEND_SETUP_END - 1; // 21 slots
321
+ execProgress = BACKEND_SETUP_END + Math.max(1, Math.round((stepsTaken / observedMaxSteps) * stepSlots));
322
+ execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
323
+ // Use state.currentAction for the message (backend sends intent + actionType)
324
+ const ca = exec.state?.currentAction;
325
+ if (ca?.intent) {
326
+ const action = ca.actionType ?? ca.action_type ?? 'working';
327
+ message = `Step ${stepsTaken}: [${action}] ${ca.intent}`;
328
+ }
329
+ else {
330
+ message = `Agent evaluating... (step ${stepsTaken})`;
331
+ }
306
332
  }
307
333
  else {
308
- execProgress = SETUP_STEPS + 1;
309
- message = 'Waiting for execution to start...';
334
+ // No agent steps yet — show backend setup progress from node transitions
335
+ const hasSubworkflow = nodes.some(n => n.nodeType === 'subworkflow.run');
336
+ const hasBrowserSetup = nodes.some(n => n.nodeType === 'browser.setup');
337
+ const browserReady = nodes.some(n => n.nodeType === 'browser.setup' && n.status === 'success');
338
+ if (browserReady || hasSubworkflow) {
339
+ execProgress = BACKEND_SETUP_END;
340
+ message = 'Browser ready, agent starting...';
341
+ }
342
+ else if (hasBrowserSetup) {
343
+ execProgress = SETUP_STEPS + 2;
344
+ message = 'Launching browser...';
345
+ }
346
+ else if (nodes.length > 0) {
347
+ execProgress = SETUP_STEPS + 1;
348
+ message = 'Workflow triggered, preparing...';
349
+ }
350
+ else {
351
+ execProgress = SETUP_STEPS + 1;
352
+ message = 'Waiting for execution to start...';
353
+ }
310
354
  }
311
- }
312
- await progressCallback({ progress: execProgress, total: TOTAL_STEPS, message });
313
- }, abortController.signal);
355
+ await progressCallback({ progress: execProgress, total: TOTAL_STEPS, message });
356
+ }, abortController.signal);
357
+ // Decide retry vs exit: only retry on documented transient signatures
358
+ // AND while we still have budget. Otherwise break and surface whatever
359
+ // result the agent reached.
360
+ if (attempt > MAX_RETRIES)
361
+ break;
362
+ if (!isTransientWorkflowError(finalExecution))
363
+ break;
364
+ logger.warn(`Transient backend error detected (${transientReasonTag(finalExecution) ?? 'unknown'}) — ` +
365
+ `retrying (attempt ${attempt + 1}/${MAX_RETRIES + 1})`);
366
+ }
314
367
  const duration = Date.now() - startTime;
315
368
  // --- Format result ---
316
369
  const outcome = finalExecution.state?.outcome ?? finalExecution.status;
@@ -368,15 +421,41 @@ async function testPageChangesHandlerInner(input, context, rawProgressCallback)
368
421
  reason: sw.error || undefined,
369
422
  };
370
423
  }
424
+ const stepsTaken = finalExecution.state?.stepsTaken ?? subworkflowNode?.outputData?.stepsTaken ?? actionTrace.length;
425
+ const success = finalExecution.state?.success ?? subworkflowNode?.outputData?.success ?? false;
371
426
  const responsePayload = {
372
427
  outcome,
373
- success: finalExecution.state?.success ?? subworkflowNode?.outputData?.success ?? false,
428
+ success,
374
429
  status: finalExecution.status,
375
- stepsTaken: finalExecution.state?.stepsTaken ?? subworkflowNode?.outputData?.stepsTaken ?? actionTrace.length,
430
+ stepsTaken,
431
+ stepsBudget: MAX_EXEC_STEPS, // bead qmdd
432
+ stepsRemaining: Math.max(0, MAX_EXEC_STEPS - (stepsTaken ?? 0)), // bead qmdd
376
433
  targetUrl: originalUrl,
377
434
  executionId: executionUuid,
378
435
  durationMs: finalExecution.durationMs ?? duration,
379
436
  };
437
+ // Bead jqmj: failureCategory disambiguates the three meanings of 'fail':
438
+ // 'agent-error' — workflow/infra failure (Pydantic parse error,
439
+ // backend exception, transport issue). Caller's
440
+ // right move: retry-with-backoff.
441
+ // 'assertion-mismatch' — agent ran the scenario but page state didn't
442
+ // match expectations. Caller's right move: fix
443
+ // code or update the test description.
444
+ // ('page-error' is reserved for v2 — needs a structured signal from
445
+ // backend to distinguish from assertion-mismatch reliably; today's
446
+ // inferrable info is too fragile.)
447
+ // Field is OMITTED on success (no failure to categorize).
448
+ if (!success) {
449
+ // state.error is the AGENT's narrative — it can describe assertion
450
+ // failures ("expected heading to contain Welcome") OR infrastructure
451
+ // failures ("Pydantic JSON parse error"). Without a structured signal,
452
+ // we only count it as 'agent-error' when paired with workflow-level
453
+ // failure (status='failed') or transient signature.
454
+ // status='failed' or errorMessage set → workflow-level / transport error.
455
+ const hasInfraFailure = finalExecution.status === 'failed'
456
+ || !!finalExecution.errorMessage;
457
+ responsePayload.failureCategory = hasInfraFailure ? 'agent-error' : 'assertion-mismatch';
458
+ }
380
459
  if (actionTrace.length > 0)
381
460
  responsePayload.actionTrace = actionTrace;
382
461
  if (evaluation)
@@ -20,8 +20,20 @@ import { probeLocalPort, probeTunnelHealth } from '../utils/localReachability.js
20
20
  import { extractLocalhostPort } from '../utils/urlParser.js';
21
21
  import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, touchTunnelById, } from '../utils/tunnelContext.js';
22
22
  import { getCachedTemplateUuid, invalidateTemplateCache } from '../utils/handlerCaches.js';
23
+ import { isTransientWorkflowError, transientReasonTag } from '../utils/transientErrors.js';
24
+ import { Telemetry, TelemetryEvents } from '../utils/telemetry.js';
23
25
  const logger = new Logger({ module: 'triggerCrawlHandler' });
24
26
  const TEMPLATE_KEYWORD = 'raw crawl';
27
+ // Bead kbo9: same env-driven retry budget as testPageChangesHandler (kbxy).
28
+ function getMaxTransientRetries() {
29
+ const raw = process.env.DEBUGGAI_TRANSIENT_RETRIES;
30
+ if (raw === undefined || raw === '')
31
+ return 1;
32
+ const n = parseInt(raw, 10);
33
+ if (!Number.isFinite(n) || n < 0)
34
+ return 1;
35
+ return Math.min(n, 3);
36
+ }
25
37
  export async function triggerCrawlHandler(input, context, rawProgressCallback) {
26
38
  const startTime = Date.now();
27
39
  logger.toolStart('trigger_crawl', input);
@@ -151,32 +163,64 @@ export async function triggerCrawlHandler(input, context, rawProgressCallback) {
151
163
  if (progressCallback) {
152
164
  await progressCallback({ progress: 3, total: 4, message: 'Queuing crawl execution...' });
153
165
  }
154
- const executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
155
- const executionUuid = executeResponse.executionUuid;
156
- logger.info(`Crawl execution queued: ${executionUuid}`);
157
- // --- Poll ---
158
- // Bead 0bq: emit the final progress (4/4 "Complete:...") INSIDE onUpdate
159
- // when terminal status detected, so there's no post-resolve emission that
160
- // could race the response and cause stale-progressToken transport tear-down.
166
+ // --- Execute + Poll (with bounded retry on transient errors, bead kbo9) ---
161
167
  const TERMINAL_STATUSES = new Set(['completed', 'failed', 'cancelled']);
162
- const finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
163
- if (ctx.tunnelId)
164
- touchTunnelById(ctx.tunnelId);
165
- if (!progressCallback)
166
- return;
167
- const nodeCount = (exec.nodeExecutions ?? []).length;
168
- if (TERMINAL_STATUSES.has(exec.status)) {
168
+ const MAX_RETRIES = getMaxTransientRetries();
169
+ let executeResponse;
170
+ let executionUuid = '';
171
+ let finalExecution;
172
+ let attempt = 0;
173
+ while (true) {
174
+ attempt++;
175
+ if (attempt > 1) {
176
+ Telemetry.capture(TelemetryEvents.WORKFLOW_TRANSIENT_RETRY, {
177
+ tool: 'trigger_crawl',
178
+ attempt,
179
+ reason: transientReasonTag(finalExecution),
180
+ previousExecutionId: executionUuid,
181
+ previousErrorMessage: finalExecution?.errorMessage?.slice(0, 200),
182
+ previousStateError: finalExecution?.state?.error?.slice(0, 200),
183
+ });
184
+ if (progressCallback) {
185
+ await progressCallback({
186
+ progress: 3, total: 4,
187
+ message: `Transient backend error — retrying crawl (attempt ${attempt}/${MAX_RETRIES + 1})...`,
188
+ });
189
+ }
190
+ await new Promise(r => setTimeout(r, 1000 * (attempt - 1)));
191
+ }
192
+ executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
193
+ executionUuid = executeResponse.executionUuid;
194
+ logger.info(`Crawl execution queued: ${executionUuid}${attempt > 1 ? ` (retry ${attempt - 1}/${MAX_RETRIES})` : ''}`);
195
+ // --- Poll ---
196
+ // Bead 0bq: emit the final progress (4/4 "Complete:...") INSIDE onUpdate
197
+ // when terminal status detected, so there's no post-resolve emission that
198
+ // could race the response and cause stale-progressToken transport tear-down.
199
+ finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
200
+ if (ctx.tunnelId)
201
+ touchTunnelById(ctx.tunnelId);
202
+ if (!progressCallback)
203
+ return;
204
+ const nodeCount = (exec.nodeExecutions ?? []).length;
205
+ if (TERMINAL_STATUSES.has(exec.status)) {
206
+ await progressCallback({
207
+ progress: 4, total: 4,
208
+ message: `Crawl ${exec.status} (${nodeCount} nodes)`,
209
+ });
210
+ return;
211
+ }
169
212
  await progressCallback({
170
213
  progress: 4, total: 4,
171
214
  message: `Crawl ${exec.status} (${nodeCount} nodes)`,
172
215
  });
173
- return;
174
- }
175
- await progressCallback({
176
- progress: 4, total: 4,
177
- message: `Crawl ${exec.status} (${nodeCount} nodes)`,
178
- });
179
- }, abortController.signal);
216
+ }, abortController.signal);
217
+ if (attempt > MAX_RETRIES)
218
+ break;
219
+ if (!isTransientWorkflowError(finalExecution))
220
+ break;
221
+ logger.warn(`Transient backend error detected on crawl (${transientReasonTag(finalExecution) ?? 'unknown'}) — ` +
222
+ `retrying (attempt ${attempt + 1}/${MAX_RETRIES + 1})`);
223
+ }
180
224
  const duration = Date.now() - startTime;
181
225
  const nodes = finalExecution.nodeExecutions ?? [];
182
226
  // --- Format response ---
@@ -49,6 +49,17 @@ class TunnelManager {
49
49
  pendingTunnels = new Map();
50
50
  initialized = false;
51
51
  TUNNEL_TIMEOUT_MS = 55 * 60 * 1000;
52
+ /**
53
+ * Bead `3th`: registry-entry freshness window. An entry not touched within
54
+ * this many ms is treated as stale even if its owner PID is alive — defends
55
+ * against PID-reuse (OS reassigns dead-owner's PID to a different process).
56
+ */
57
+ REGISTRY_FRESHNESS_TTL_MS = 30 * 60 * 1000;
58
+ /**
59
+ * Bead `mdp`: prune-on-startup eviction window. Entries older than this OR
60
+ * with dead owner PID get swept out when TunnelManager initializes.
61
+ */
62
+ REGISTRY_PRUNE_THRESHOLD_MS = 60 * 60 * 1000;
52
63
  /**
53
64
  * Backoff schedule (ms) between ngrok.connect() retry attempts. Bead ixh.
54
65
  * Exposed on the class so tests can override with short delays without
@@ -57,6 +68,26 @@ class TunnelManager {
57
68
  connectBackoffMs = [500, 1500];
58
69
  constructor(reg = getDefaultRegistry()) {
59
70
  this.reg = reg;
71
+ // Bead `mdp`: sweep stale entries on startup so the registry doesn't grow
72
+ // unboundedly across MCP processes that exited without stopAllTunnels
73
+ // (SIGKILL / crash). Best-effort — no-op registries don't actually prune.
74
+ try {
75
+ const result = this.reg.prune({ staleAfterMs: this.REGISTRY_PRUNE_THRESHOLD_MS });
76
+ if (result.pruned > 0) {
77
+ logger.info(`Pruned ${result.pruned} stale registry entries on startup (${result.remaining} remaining)`);
78
+ }
79
+ }
80
+ catch (err) {
81
+ logger.warn(`Registry prune-on-startup failed (non-fatal): ${err}`);
82
+ }
83
+ }
84
+ /**
85
+ * Bead `3th`: freshness check used at borrow sites. Returns true if the
86
+ * entry is BOTH owner-alive AND touched recently enough to trust.
87
+ */
88
+ isEntryUsable(entry, nowMs = Date.now()) {
89
+ return (this.reg.isPidAlive(entry.ownerPid) &&
90
+ (nowMs - entry.lastAccessedAt) <= this.REGISTRY_FRESHNESS_TTL_MS);
60
91
  }
61
92
  // ── Public API ──────────────────────────────────────────────────────────────
62
93
  async processUrl(url, authToken, specificTunnelId, keyId, revokeKey) {
@@ -82,11 +113,18 @@ class TunnelManager {
82
113
  if (!existing)
83
114
  return undefined;
84
115
  if (!existing.isOwned) {
85
- // Verify the owning process is still alive
116
+ // Verify the owning process is still alive AND the entry is fresh
117
+ // (lastAccessedAt within REGISTRY_FRESHNESS_TTL_MS — defends against
118
+ // PID-reuse per bead 3th).
86
119
  const entry = this.reg.read()[String(port)];
87
- if (!entry || !this.reg.isPidAlive(entry.ownerPid)) {
120
+ if (!entry || !this.isEntryUsable(entry)) {
88
121
  this.activeTunnels.delete(existing.tunnelId);
89
- logger.info(`Evicted stale borrowed tunnel ${existing.tunnelId} (owner PID ${entry?.ownerPid} dead)`);
122
+ const reason = !entry
123
+ ? 'no registry entry'
124
+ : !this.reg.isPidAlive(entry.ownerPid)
125
+ ? `owner PID ${entry.ownerPid} dead`
126
+ : `entry stale (last accessed ${Math.round((Date.now() - entry.lastAccessedAt) / 1000)}s ago)`;
127
+ logger.info(`Evicted stale borrowed tunnel ${existing.tunnelId} (${reason})`);
90
128
  return undefined;
91
129
  }
92
130
  }
@@ -223,10 +261,12 @@ class TunnelManager {
223
261
  const info = await pending;
224
262
  return { url: info.publicUrl, tunnelId: info.tunnelId, isLocalhost: true };
225
263
  }
226
- // 3. Check cross-process registry — another MCP instance may own a tunnel
264
+ // 3. Check cross-process registry — another MCP instance may own a tunnel.
265
+ // Borrow only if the entry is fresh (PID alive AND touched within
266
+ // REGISTRY_FRESHNESS_TTL_MS — defends against PID-reuse, bead 3th).
227
267
  const registry = this.reg.read();
228
268
  const regEntry = registry[String(port)];
229
- if (regEntry && this.reg.isPidAlive(regEntry.ownerPid)) {
269
+ if (regEntry && this.isEntryUsable(regEntry)) {
230
270
  logger.info(`Borrowing tunnel from PID ${regEntry.ownerPid} for port ${port}: ${regEntry.publicUrl}`);
231
271
  const now = Date.now();
232
272
  const borrowed = {
@@ -293,7 +333,6 @@ class TunnelManager {
293
333
  // (existing "agent died" recovery path)
294
334
  // - Attempt 3: after 1500ms backoff, retry with the already-reset agent
295
335
  // Auth-token errors short-circuit at any attempt — no point looping.
296
- const self = this;
297
336
  // Bead 42g: fault injection + trace. Only active when NODE_ENV !== 'production'
298
337
  // AND DEBUGG_TUNNEL_FAULT_MODE env var is set. Zero overhead when disabled.
299
338
  const faultMode = getFaultModeFromEnv();
@@ -302,7 +341,7 @@ class TunnelManager {
302
341
  trace.emit('createTunnel.start', { port, tunnelId, hasFaultMode: !!faultMode });
303
342
  const connectWithRetry = async () => {
304
343
  const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
305
- const BACKOFF_MS = self.connectBackoffMs; // bead ixh: test-overridable
344
+ const BACKOFF_MS = this.connectBackoffMs; // bead ixh: test-overridable
306
345
  const MAX_ATTEMPTS = BACKOFF_MS.length + 1; // N sleeps between N+1 attempts
307
346
  const connectOpts = {
308
347
  proto: 'http',
@@ -14,7 +14,7 @@ import { join } from 'path';
14
14
  // ── File-backed implementation (production) ───────────────────────────────────
15
15
  const REGISTRY_FILE = join(tmpdir(), 'debugg-ai-tunnels.json');
16
16
  export function createFileRegistry() {
17
- return {
17
+ const store = {
18
18
  read() {
19
19
  try {
20
20
  if (!existsSync(REGISTRY_FILE))
@@ -38,22 +38,29 @@ export function createFileRegistry() {
38
38
  isPidAlive(pid) {
39
39
  return checkPid(pid);
40
40
  },
41
+ prune(opts) {
42
+ return pruneRegistryData(store, opts);
43
+ },
41
44
  };
45
+ return store;
42
46
  }
43
47
  // ── In-memory implementation (tests / injectable) ─────────────────────────────
44
48
  export function createInMemoryRegistry(isPidAliveImpl) {
45
- let store = {};
46
- return {
47
- read: () => ({ ...store }),
48
- write: (data) => { store = { ...data }; },
49
+ let data = {};
50
+ const store = {
51
+ read: () => ({ ...data }),
52
+ write: (next) => { data = { ...next }; },
49
53
  isPidAlive: isPidAliveImpl ?? checkPid,
54
+ prune: (opts) => pruneRegistryData(store, opts),
50
55
  };
56
+ return store;
51
57
  }
52
58
  // ── No-op implementation (tests that don't exercise registry) ─────────────────
53
59
  export const noopRegistry = {
54
60
  read: () => ({}),
55
61
  write: () => { },
56
62
  isPidAlive: () => false,
63
+ prune: () => ({ pruned: 0, remaining: 0 }),
57
64
  };
58
65
  // ── Default selection ─────────────────────────────────────────────────────────
59
66
  /**
@@ -73,3 +80,30 @@ function checkPid(pid) {
73
80
  return false;
74
81
  }
75
82
  }
83
+ /**
84
+ * Shared prune logic — read, filter, write back. Used by both the file-backed
85
+ * and in-memory implementations so the eviction policy lives in one place.
86
+ *
87
+ * Eviction rule: drop entries where EITHER the owner PID is dead OR the entry
88
+ * hasn't been touched within `staleAfterMs`. The freshness check is what
89
+ * defends against PID-reuse (bead 3th).
90
+ */
91
+ function pruneRegistryData(store, opts) {
92
+ const now = opts.nowMs ?? Date.now();
93
+ const data = store.read();
94
+ const next = {};
95
+ let pruned = 0;
96
+ for (const [port, entry] of Object.entries(data)) {
97
+ const aliveAndFresh = store.isPidAlive(entry.ownerPid) &&
98
+ (now - entry.lastAccessedAt) <= opts.staleAfterMs;
99
+ if (aliveAndFresh) {
100
+ next[port] = entry;
101
+ }
102
+ else {
103
+ pruned++;
104
+ }
105
+ }
106
+ if (pruned > 0)
107
+ store.write(next);
108
+ return { pruned, remaining: Object.keys(next).length };
109
+ }
@@ -1,2 +1 @@
1
1
  export {};
2
- /* eslint-enable */
@@ -1,5 +1,6 @@
1
1
  import { buildTestPageChangesTool, buildValidatedTestPageChangesTool } from './testPageChanges.js';
2
2
  import { buildTriggerCrawlTool, buildValidatedTriggerCrawlTool } from './triggerCrawl.js';
3
+ import { buildProbePageTool, buildValidatedProbePageTool } from './probePage.js';
3
4
  import { buildSearchProjectsTool, buildValidatedSearchProjectsTool } from './searchProjects.js';
4
5
  import { buildSearchEnvironmentsTool, buildValidatedSearchEnvironmentsTool } from './searchEnvironments.js';
5
6
  import { buildSearchExecutionsTool, buildValidatedSearchExecutionsTool } from './searchExecutions.js';
@@ -19,6 +20,7 @@ export function initTools(ctx) {
19
20
  const tools = [
20
21
  buildTestPageChangesTool(ctx),
21
22
  buildTriggerCrawlTool(ctx),
23
+ buildProbePageTool(),
22
24
  buildSearchProjectsTool(),
23
25
  buildSearchEnvironmentsTool(),
24
26
  buildCreateEnvironmentTool(),
@@ -32,6 +34,7 @@ export function initTools(ctx) {
32
34
  const validated = [
33
35
  buildValidatedTestPageChangesTool(ctx),
34
36
  buildValidatedTriggerCrawlTool(ctx),
37
+ buildValidatedProbePageTool(),
35
38
  buildValidatedSearchProjectsTool(),
36
39
  buildValidatedSearchEnvironmentsTool(),
37
40
  buildValidatedCreateEnvironmentTool(),