npm - @debugg-ai/debugg-ai-mcp - Versions diffs - 2.4.0 → 2.5.0 - Mend

@debugg-ai/debugg-ai-mcp 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +70 -2
package/dist/handlers/index.js +1 -0
package/dist/handlers/probePageHandler.js +275 -0
package/dist/handlers/searchEnvironmentsHandler.js +12 -2
package/dist/handlers/testPageChangesHandler.js +149 -70
package/dist/handlers/triggerCrawlHandler.js +65 -21
package/dist/services/ngrok/tunnelManager.js +46 -7
package/dist/services/ngrok/tunnelRegistry.js +39 -5
package/dist/services/ngrok/types.js +0 -1
package/dist/tools/index.js +3 -0
package/dist/tools/probePage.js +89 -0
package/dist/types/index.js +17 -0
package/dist/utils/errors.js +0 -1
package/dist/utils/harSummarizer.js +105 -0
package/dist/utils/projectAnalyzer.js +2 -2
package/dist/utils/telemetry.js +1 -0
package/dist/utils/transientErrors.js +82 -0
package/dist/utils/urlParser.js +1 -1
package/dist/utils/validation.js +1 -1
package/package.json +1 -1

package/dist/handlers/testPageChangesHandler.js CHANGED Viewed

@@ -15,8 +15,23 @@ import { tunnelManager } from '../services/ngrok/tunnelManager.js';
 import { probeLocalPort, probeTunnelHealth } from '../utils/localReachability.js';
 import { extractLocalhostPort } from '../utils/urlParser.js';
 import { getCachedTemplateUuid, getCachedProjectUuid, invalidateTemplateCache, invalidateProjectCache, } from '../utils/handlerCaches.js';
+import { isTransientWorkflowError, transientReasonTag } from '../utils/transientErrors.js';
+import { Telemetry, TelemetryEvents } from '../utils/telemetry.js';
 const logger = new Logger({ module: 'testPageChangesHandler' });
 const TEMPLATE_NAME = 'app evaluation';
+// Bead kbxy: bounded retry on known transient backend signatures (Pydantic
+// JSON parse errors, 502s, ECONNRESETs). Default 1 retry; env-overridable
+// up to 3 to balance reliability vs quota cost. Conservative: only retries
+// on documented transient patterns (utils/transientErrors.ts).
+function getMaxTransientRetries() {
+    const raw = process.env.DEBUGGAI_TRANSIENT_RETRIES;
+    if (raw === undefined || raw === '')
+        return 1;
+    const n = parseInt(raw, 10);
+    if (!Number.isFinite(n) || n < 0)
+        return 1;
+    return Math.min(n, 3);
+}
 // Concurrency control — max 2 simultaneous browser checks.
 // Additional requests queue and run when a slot opens.
 const MAX_CONCURRENT = 2;
@@ -229,88 +244,126 @@ async function testPageChangesHandlerInner(input, context, rawProgressCallback)
         if (progressCallback) {
             await progressCallback({ progress: 3, total: TOTAL_STEPS, message: 'Queuing workflow execution...' });
         }
-        const executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
-        const executionUuid = executeResponse.executionUuid;
-        logger.info(`Execution queued: ${executionUuid}`);
-        // --- Poll ---
-        // Progress phases:
+        // --- Execute + Poll (with bounded retry on transient errors, bead kbxy) ---
+        // Progress phases (per attempt):
         //   1-3:   MCP setup (tunnel, template, queue) — already sent above
         //   4-6:   Backend setup (trigger, browser.setup, subworkflow starting)
         //   7-27:  Agent steps (mapped from state.stepsTaken)
         //   28:    Complete
         const BACKEND_SETUP_END = 6;
-        let lastStepsTaken = 0;
-        let observedMaxSteps = MAX_EXEC_STEPS;
         const TERMINAL_STATUSES = new Set(['completed', 'failed', 'cancelled']);
-        const finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
-            // Keep the tunnel alive while the workflow is actively running
-            if (ctx.tunnelId)
-                touchTunnelById(ctx.tunnelId);
-            const nodes = exec.nodeExecutions ?? [];
-            const stepsTaken = Math.max(nodes.filter(n => n.nodeType === 'brain.step').length, exec.state?.stepsTaken ?? 0);
-            if (stepsTaken !== lastStepsTaken) {
-                lastStepsTaken = stepsTaken;
-                logger.info(`Execution status: ${exec.status}, nodes: ${nodes.length}, steps: ${stepsTaken}`);
-            }
-            if (!progressCallback)
-                return;
-            // Bead 0bq: emit the final "Complete:" progress INSIDE this callback
-            // when terminal status is detected. pollExecution will return on the
-            // next line (line 183 in services/workflows.ts), so there's no
-            // post-pollExecution progress emission that could race the response.
-            if (TERMINAL_STATUSES.has(exec.status)) {
-                const terminalOutcome = exec.state?.outcome ?? exec.status;
-                await progressCallback({
-                    progress: TOTAL_STEPS,
-                    total: TOTAL_STEPS,
-                    message: `Complete: ${terminalOutcome}`,
+        const MAX_RETRIES = getMaxTransientRetries();
+        let executeResponse;
+        let executionUuid = '';
+        let finalExecution;
+        let attempt = 0;
+        while (true) {
+            attempt++;
+            if (attempt > 1) {
+                // Retry path — emit telemetry + progress notification + brief backoff.
+                Telemetry.capture(TelemetryEvents.WORKFLOW_TRANSIENT_RETRY, {
+                    tool: 'check_app_in_browser',
+                    attempt,
+                    reason: transientReasonTag(finalExecution),
+                    previousExecutionId: executionUuid,
+                    previousErrorMessage: finalExecution?.errorMessage?.slice(0, 200),
+                    previousStateError: finalExecution?.state?.error?.slice(0, 200),
                 });
-                return;
-            }
-            // --- Compute progress number ---
-            let execProgress;
-            let message;
-            if (stepsTaken > 0) {
-                // Agent is actively stepping — map into slots 7..27
-                if (stepsTaken > observedMaxSteps)
-                    observedMaxSteps = stepsTaken + 5;
-                const stepSlots = TOTAL_STEPS - BACKEND_SETUP_END - 1; // 21 slots
-                execProgress = BACKEND_SETUP_END + Math.max(1, Math.round((stepsTaken / observedMaxSteps) * stepSlots));
-                execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
-                // Use state.currentAction for the message (backend sends intent + actionType)
-                const ca = exec.state?.currentAction;
-                if (ca?.intent) {
-                    const action = ca.actionType ?? ca.action_type ?? 'working';
-                    message = `Step ${stepsTaken}: [${action}] ${ca.intent}`;
-                }
-                else {
-                    message = `Agent evaluating... (step ${stepsTaken})`;
+                if (progressCallback) {
+                    await progressCallback({
+                        progress: SETUP_STEPS,
+                        total: TOTAL_STEPS,
+                        message: `Transient backend error — retrying (attempt ${attempt}/${MAX_RETRIES + 1})...`,
+                    });
                 }
+                await new Promise(r => setTimeout(r, 1000 * (attempt - 1)));
             }
-            else {
-                // No agent steps yet — show backend setup progress from node transitions
-                const hasSubworkflow = nodes.some(n => n.nodeType === 'subworkflow.run');
-                const hasBrowserSetup = nodes.some(n => n.nodeType === 'browser.setup');
-                const browserReady = nodes.some(n => n.nodeType === 'browser.setup' && n.status === 'success');
-                if (browserReady || hasSubworkflow) {
-                    execProgress = BACKEND_SETUP_END;
-                    message = 'Browser ready, agent starting...';
+            executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
+            executionUuid = executeResponse.executionUuid;
+            logger.info(`Execution queued: ${executionUuid}${attempt > 1 ? ` (retry ${attempt - 1}/${MAX_RETRIES})` : ''}`);
+            // Closure state — reset PER ATTEMPT so progress numbers don't double-count
+            // across retries.
+            let lastStepsTaken = 0;
+            let observedMaxSteps = MAX_EXEC_STEPS;
+            finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
+                // Keep the tunnel alive while the workflow is actively running
+                if (ctx.tunnelId)
+                    touchTunnelById(ctx.tunnelId);
+                const nodes = exec.nodeExecutions ?? [];
+                const stepsTaken = Math.max(nodes.filter(n => n.nodeType === 'brain.step').length, exec.state?.stepsTaken ?? 0);
+                if (stepsTaken !== lastStepsTaken) {
+                    lastStepsTaken = stepsTaken;
+                    logger.info(`Execution status: ${exec.status}, nodes: ${nodes.length}, steps: ${stepsTaken}`);
                 }
-                else if (hasBrowserSetup) {
-                    execProgress = SETUP_STEPS + 2;
-                    message = 'Launching browser...';
+                if (!progressCallback)
+                    return;
+                // Bead 0bq: emit the final "Complete:" progress INSIDE this callback
+                // when terminal status is detected. pollExecution will return on the
+                // next line (line 183 in services/workflows.ts), so there's no
+                // post-pollExecution progress emission that could race the response.
+                if (TERMINAL_STATUSES.has(exec.status)) {
+                    const terminalOutcome = exec.state?.outcome ?? exec.status;
+                    await progressCallback({
+                        progress: TOTAL_STEPS,
+                        total: TOTAL_STEPS,
+                        message: `Complete: ${terminalOutcome}`,
+                    });
+                    return;
                 }
-                else if (nodes.length > 0) {
-                    execProgress = SETUP_STEPS + 1;
-                    message = 'Workflow triggered, preparing...';
+                // --- Compute progress number ---
+                let execProgress;
+                let message;
+                if (stepsTaken > 0) {
+                    // Agent is actively stepping — map into slots 7..27
+                    if (stepsTaken > observedMaxSteps)
+                        observedMaxSteps = stepsTaken + 5;
+                    const stepSlots = TOTAL_STEPS - BACKEND_SETUP_END - 1; // 21 slots
+                    execProgress = BACKEND_SETUP_END + Math.max(1, Math.round((stepsTaken / observedMaxSteps) * stepSlots));
+                    execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
+                    // Use state.currentAction for the message (backend sends intent + actionType)
+                    const ca = exec.state?.currentAction;
+                    if (ca?.intent) {
+                        const action = ca.actionType ?? ca.action_type ?? 'working';
+                        message = `Step ${stepsTaken}: [${action}] ${ca.intent}`;
+                    }
+                    else {
+                        message = `Agent evaluating... (step ${stepsTaken})`;
+                    }
                 }
                 else {
-                    execProgress = SETUP_STEPS + 1;
-                    message = 'Waiting for execution to start...';
+                    // No agent steps yet — show backend setup progress from node transitions
+                    const hasSubworkflow = nodes.some(n => n.nodeType === 'subworkflow.run');
+                    const hasBrowserSetup = nodes.some(n => n.nodeType === 'browser.setup');
+                    const browserReady = nodes.some(n => n.nodeType === 'browser.setup' && n.status === 'success');
+                    if (browserReady || hasSubworkflow) {
+                        execProgress = BACKEND_SETUP_END;
+                        message = 'Browser ready, agent starting...';
+                    }
+                    else if (hasBrowserSetup) {
+                        execProgress = SETUP_STEPS + 2;
+                        message = 'Launching browser...';
+                    }
+                    else if (nodes.length > 0) {
+                        execProgress = SETUP_STEPS + 1;
+                        message = 'Workflow triggered, preparing...';
+                    }
+                    else {
+                        execProgress = SETUP_STEPS + 1;
+                        message = 'Waiting for execution to start...';
+                    }
                 }
-            }
-            await progressCallback({ progress: execProgress, total: TOTAL_STEPS, message });
-        }, abortController.signal);
+                await progressCallback({ progress: execProgress, total: TOTAL_STEPS, message });
+            }, abortController.signal);
+            // Decide retry vs exit: only retry on documented transient signatures
+            // AND while we still have budget. Otherwise break and surface whatever
+            // result the agent reached.
+            if (attempt > MAX_RETRIES)
+                break;
+            if (!isTransientWorkflowError(finalExecution))
+                break;
+            logger.warn(`Transient backend error detected (${transientReasonTag(finalExecution) ?? 'unknown'}) — ` +
+                `retrying (attempt ${attempt + 1}/${MAX_RETRIES + 1})`);
+        }
         const duration = Date.now() - startTime;
         // --- Format result ---
         const outcome = finalExecution.state?.outcome ?? finalExecution.status;
@@ -368,15 +421,41 @@ async function testPageChangesHandlerInner(input, context, rawProgressCallback)
                 reason: sw.error || undefined,
             };
         }
+        const stepsTaken = finalExecution.state?.stepsTaken ?? subworkflowNode?.outputData?.stepsTaken ?? actionTrace.length;
+        const success = finalExecution.state?.success ?? subworkflowNode?.outputData?.success ?? false;
         const responsePayload = {
             outcome,
-            success: finalExecution.state?.success ?? subworkflowNode?.outputData?.success ?? false,
+            success,
             status: finalExecution.status,
-            stepsTaken: finalExecution.state?.stepsTaken ?? subworkflowNode?.outputData?.stepsTaken ?? actionTrace.length,
+            stepsTaken,
+            stepsBudget: MAX_EXEC_STEPS, // bead qmdd
+            stepsRemaining: Math.max(0, MAX_EXEC_STEPS - (stepsTaken ?? 0)), // bead qmdd
             targetUrl: originalUrl,
             executionId: executionUuid,
             durationMs: finalExecution.durationMs ?? duration,
         };
+        // Bead jqmj: failureCategory disambiguates the three meanings of 'fail':
+        //   'agent-error'        — workflow/infra failure (Pydantic parse error,
+        //                          backend exception, transport issue). Caller's
+        //                          right move: retry-with-backoff.
+        //   'assertion-mismatch' — agent ran the scenario but page state didn't
+        //                          match expectations. Caller's right move: fix
+        //                          code or update the test description.
+        //   ('page-error' is reserved for v2 — needs a structured signal from
+        //   backend to distinguish from assertion-mismatch reliably; today's
+        //   inferrable info is too fragile.)
+        // Field is OMITTED on success (no failure to categorize).
+        if (!success) {
+            // state.error is the AGENT's narrative — it can describe assertion
+            // failures ("expected heading to contain Welcome") OR infrastructure
+            // failures ("Pydantic JSON parse error"). Without a structured signal,
+            // we only count it as 'agent-error' when paired with workflow-level
+            // failure (status='failed') or transient signature.
+            // status='failed' or errorMessage set → workflow-level / transport error.
+            const hasInfraFailure = finalExecution.status === 'failed'
+                || !!finalExecution.errorMessage;
+            responsePayload.failureCategory = hasInfraFailure ? 'agent-error' : 'assertion-mismatch';
+        }
         if (actionTrace.length > 0)
             responsePayload.actionTrace = actionTrace;
         if (evaluation)

package/dist/handlers/triggerCrawlHandler.js CHANGED Viewed

@@ -20,8 +20,20 @@ import { probeLocalPort, probeTunnelHealth } from '../utils/localReachability.js
 import { extractLocalhostPort } from '../utils/urlParser.js';
 import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, touchTunnelById, } from '../utils/tunnelContext.js';
 import { getCachedTemplateUuid, invalidateTemplateCache } from '../utils/handlerCaches.js';
+import { isTransientWorkflowError, transientReasonTag } from '../utils/transientErrors.js';
+import { Telemetry, TelemetryEvents } from '../utils/telemetry.js';
 const logger = new Logger({ module: 'triggerCrawlHandler' });
 const TEMPLATE_KEYWORD = 'raw crawl';
+// Bead kbo9: same env-driven retry budget as testPageChangesHandler (kbxy).
+function getMaxTransientRetries() {
+    const raw = process.env.DEBUGGAI_TRANSIENT_RETRIES;
+    if (raw === undefined || raw === '')
+        return 1;
+    const n = parseInt(raw, 10);
+    if (!Number.isFinite(n) || n < 0)
+        return 1;
+    return Math.min(n, 3);
+}
 export async function triggerCrawlHandler(input, context, rawProgressCallback) {
     const startTime = Date.now();
     logger.toolStart('trigger_crawl', input);
@@ -151,32 +163,64 @@ export async function triggerCrawlHandler(input, context, rawProgressCallback) {
         if (progressCallback) {
             await progressCallback({ progress: 3, total: 4, message: 'Queuing crawl execution...' });
         }
-        const executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
-        const executionUuid = executeResponse.executionUuid;
-        logger.info(`Crawl execution queued: ${executionUuid}`);
-        // --- Poll ---
-        // Bead 0bq: emit the final progress (4/4 "Complete:...") INSIDE onUpdate
-        // when terminal status detected, so there's no post-resolve emission that
-        // could race the response and cause stale-progressToken transport tear-down.
+        // --- Execute + Poll (with bounded retry on transient errors, bead kbo9) ---
         const TERMINAL_STATUSES = new Set(['completed', 'failed', 'cancelled']);
-        const finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
-            if (ctx.tunnelId)
-                touchTunnelById(ctx.tunnelId);
-            if (!progressCallback)
-                return;
-            const nodeCount = (exec.nodeExecutions ?? []).length;
-            if (TERMINAL_STATUSES.has(exec.status)) {
+        const MAX_RETRIES = getMaxTransientRetries();
+        let executeResponse;
+        let executionUuid = '';
+        let finalExecution;
+        let attempt = 0;
+        while (true) {
+            attempt++;
+            if (attempt > 1) {
+                Telemetry.capture(TelemetryEvents.WORKFLOW_TRANSIENT_RETRY, {
+                    tool: 'trigger_crawl',
+                    attempt,
+                    reason: transientReasonTag(finalExecution),
+                    previousExecutionId: executionUuid,
+                    previousErrorMessage: finalExecution?.errorMessage?.slice(0, 200),
+                    previousStateError: finalExecution?.state?.error?.slice(0, 200),
+                });
+                if (progressCallback) {
+                    await progressCallback({
+                        progress: 3, total: 4,
+                        message: `Transient backend error — retrying crawl (attempt ${attempt}/${MAX_RETRIES + 1})...`,
+                    });
+                }
+                await new Promise(r => setTimeout(r, 1000 * (attempt - 1)));
+            }
+            executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
+            executionUuid = executeResponse.executionUuid;
+            logger.info(`Crawl execution queued: ${executionUuid}${attempt > 1 ? ` (retry ${attempt - 1}/${MAX_RETRIES})` : ''}`);
+            // --- Poll ---
+            // Bead 0bq: emit the final progress (4/4 "Complete:...") INSIDE onUpdate
+            // when terminal status detected, so there's no post-resolve emission that
+            // could race the response and cause stale-progressToken transport tear-down.
+            finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
+                if (ctx.tunnelId)
+                    touchTunnelById(ctx.tunnelId);
+                if (!progressCallback)
+                    return;
+                const nodeCount = (exec.nodeExecutions ?? []).length;
+                if (TERMINAL_STATUSES.has(exec.status)) {
+                    await progressCallback({
+                        progress: 4, total: 4,
+                        message: `Crawl ${exec.status} (${nodeCount} nodes)`,
+                    });
+                    return;
+                }
                 await progressCallback({
                     progress: 4, total: 4,
                     message: `Crawl ${exec.status} (${nodeCount} nodes)`,
                 });
-                return;
-            }
-            await progressCallback({
-                progress: 4, total: 4,
-                message: `Crawl ${exec.status} (${nodeCount} nodes)`,
-            });
-        }, abortController.signal);
+            }, abortController.signal);
+            if (attempt > MAX_RETRIES)
+                break;
+            if (!isTransientWorkflowError(finalExecution))
+                break;
+            logger.warn(`Transient backend error detected on crawl (${transientReasonTag(finalExecution) ?? 'unknown'}) — ` +
+                `retrying (attempt ${attempt + 1}/${MAX_RETRIES + 1})`);
+        }
         const duration = Date.now() - startTime;
         const nodes = finalExecution.nodeExecutions ?? [];
         // --- Format response ---

package/dist/services/ngrok/tunnelManager.js CHANGED Viewed

@@ -49,6 +49,17 @@ class TunnelManager {
     pendingTunnels = new Map();
     initialized = false;
     TUNNEL_TIMEOUT_MS = 55 * 60 * 1000;
+    /**
+     * Bead `3th`: registry-entry freshness window. An entry not touched within
+     * this many ms is treated as stale even if its owner PID is alive — defends
+     * against PID-reuse (OS reassigns dead-owner's PID to a different process).
+     */
+    REGISTRY_FRESHNESS_TTL_MS = 30 * 60 * 1000;
+    /**
+     * Bead `mdp`: prune-on-startup eviction window. Entries older than this OR
+     * with dead owner PID get swept out when TunnelManager initializes.
+     */
+    REGISTRY_PRUNE_THRESHOLD_MS = 60 * 60 * 1000;
     /**
      * Backoff schedule (ms) between ngrok.connect() retry attempts. Bead ixh.
      * Exposed on the class so tests can override with short delays without
@@ -57,6 +68,26 @@ class TunnelManager {
     connectBackoffMs = [500, 1500];
     constructor(reg = getDefaultRegistry()) {
         this.reg = reg;
+        // Bead `mdp`: sweep stale entries on startup so the registry doesn't grow
+        // unboundedly across MCP processes that exited without stopAllTunnels
+        // (SIGKILL / crash). Best-effort — no-op registries don't actually prune.
+        try {
+            const result = this.reg.prune({ staleAfterMs: this.REGISTRY_PRUNE_THRESHOLD_MS });
+            if (result.pruned > 0) {
+                logger.info(`Pruned ${result.pruned} stale registry entries on startup (${result.remaining} remaining)`);
+            }
+        }
+        catch (err) {
+            logger.warn(`Registry prune-on-startup failed (non-fatal): ${err}`);
+        }
+    }
+    /**
+     * Bead `3th`: freshness check used at borrow sites. Returns true if the
+     * entry is BOTH owner-alive AND touched recently enough to trust.
+     */
+    isEntryUsable(entry, nowMs = Date.now()) {
+        return (this.reg.isPidAlive(entry.ownerPid) &&
+            (nowMs - entry.lastAccessedAt) <= this.REGISTRY_FRESHNESS_TTL_MS);
     }
     // ── Public API ──────────────────────────────────────────────────────────────
     async processUrl(url, authToken, specificTunnelId, keyId, revokeKey) {
@@ -82,11 +113,18 @@ class TunnelManager {
         if (!existing)
             return undefined;
         if (!existing.isOwned) {
-            // Verify the owning process is still alive
+            // Verify the owning process is still alive AND the entry is fresh
+            // (lastAccessedAt within REGISTRY_FRESHNESS_TTL_MS — defends against
+            // PID-reuse per bead 3th).
             const entry = this.reg.read()[String(port)];
-            if (!entry || !this.reg.isPidAlive(entry.ownerPid)) {
+            if (!entry || !this.isEntryUsable(entry)) {
                 this.activeTunnels.delete(existing.tunnelId);
-                logger.info(`Evicted stale borrowed tunnel ${existing.tunnelId} (owner PID ${entry?.ownerPid} dead)`);
+                const reason = !entry
+                    ? 'no registry entry'
+                    : !this.reg.isPidAlive(entry.ownerPid)
+                        ? `owner PID ${entry.ownerPid} dead`
+                        : `entry stale (last accessed ${Math.round((Date.now() - entry.lastAccessedAt) / 1000)}s ago)`;
+                logger.info(`Evicted stale borrowed tunnel ${existing.tunnelId} (${reason})`);
                 return undefined;
             }
         }
@@ -223,10 +261,12 @@ class TunnelManager {
             const info = await pending;
             return { url: info.publicUrl, tunnelId: info.tunnelId, isLocalhost: true };
         }
-        // 3. Check cross-process registry — another MCP instance may own a tunnel
+        // 3. Check cross-process registry — another MCP instance may own a tunnel.
+        //    Borrow only if the entry is fresh (PID alive AND touched within
+        //    REGISTRY_FRESHNESS_TTL_MS — defends against PID-reuse, bead 3th).
         const registry = this.reg.read();
         const regEntry = registry[String(port)];
-        if (regEntry && this.reg.isPidAlive(regEntry.ownerPid)) {
+        if (regEntry && this.isEntryUsable(regEntry)) {
             logger.info(`Borrowing tunnel from PID ${regEntry.ownerPid} for port ${port}: ${regEntry.publicUrl}`);
             const now = Date.now();
             const borrowed = {
@@ -293,7 +333,6 @@ class TunnelManager {
         //   (existing "agent died" recovery path)
         // - Attempt 3: after 1500ms backoff, retry with the already-reset agent
         // Auth-token errors short-circuit at any attempt — no point looping.
-        const self = this;
         // Bead 42g: fault injection + trace. Only active when NODE_ENV !== 'production'
         // AND DEBUGG_TUNNEL_FAULT_MODE env var is set. Zero overhead when disabled.
         const faultMode = getFaultModeFromEnv();
@@ -302,7 +341,7 @@ class TunnelManager {
         trace.emit('createTunnel.start', { port, tunnelId, hasFaultMode: !!faultMode });
         const connectWithRetry = async () => {
             const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
-            const BACKOFF_MS = self.connectBackoffMs; // bead ixh: test-overridable
+            const BACKOFF_MS = this.connectBackoffMs; // bead ixh: test-overridable
             const MAX_ATTEMPTS = BACKOFF_MS.length + 1; // N sleeps between N+1 attempts
             const connectOpts = {
                 proto: 'http',

package/dist/services/ngrok/tunnelRegistry.js CHANGED Viewed

@@ -14,7 +14,7 @@ import { join } from 'path';
 // ── File-backed implementation (production) ───────────────────────────────────
 const REGISTRY_FILE = join(tmpdir(), 'debugg-ai-tunnels.json');
 export function createFileRegistry() {
-    return {
+    const store = {
         read() {
             try {
                 if (!existsSync(REGISTRY_FILE))
@@ -38,22 +38,29 @@ export function createFileRegistry() {
         isPidAlive(pid) {
             return checkPid(pid);
         },
+        prune(opts) {
+            return pruneRegistryData(store, opts);
+        },
     };
+    return store;
 }
 // ── In-memory implementation (tests / injectable) ─────────────────────────────
 export function createInMemoryRegistry(isPidAliveImpl) {
-    let store = {};
-    return {
-        read: () => ({ ...store }),
-        write: (data) => { store = { ...data }; },
+    let data = {};
+    const store = {
+        read: () => ({ ...data }),
+        write: (next) => { data = { ...next }; },
         isPidAlive: isPidAliveImpl ?? checkPid,
+        prune: (opts) => pruneRegistryData(store, opts),
     };
+    return store;
 }
 // ── No-op implementation (tests that don't exercise registry) ─────────────────
 export const noopRegistry = {
     read: () => ({}),
     write: () => { },
     isPidAlive: () => false,
+    prune: () => ({ pruned: 0, remaining: 0 }),
 };
 // ── Default selection ─────────────────────────────────────────────────────────
 /**
@@ -73,3 +80,30 @@ function checkPid(pid) {
         return false;
     }
 }
+/**
+ * Shared prune logic — read, filter, write back. Used by both the file-backed
+ * and in-memory implementations so the eviction policy lives in one place.
+ *
+ * Eviction rule: drop entries where EITHER the owner PID is dead OR the entry
+ * hasn't been touched within `staleAfterMs`. The freshness check is what
+ * defends against PID-reuse (bead 3th).
+ */
+function pruneRegistryData(store, opts) {
+    const now = opts.nowMs ?? Date.now();
+    const data = store.read();
+    const next = {};
+    let pruned = 0;
+    for (const [port, entry] of Object.entries(data)) {
+        const aliveAndFresh = store.isPidAlive(entry.ownerPid) &&
+            (now - entry.lastAccessedAt) <= opts.staleAfterMs;
+        if (aliveAndFresh) {
+            next[port] = entry;
+        }
+        else {
+            pruned++;
+        }
+    }
+    if (pruned > 0)
+        store.write(next);
+    return { pruned, remaining: Object.keys(next).length };
+}

package/dist/services/ngrok/types.js CHANGED Viewed

	@@ -1,2 +1 @@
1 1	export {};
2	- /* eslint-enable */

package/dist/tools/index.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { buildTestPageChangesTool, buildValidatedTestPageChangesTool } from './testPageChanges.js';
 import { buildTriggerCrawlTool, buildValidatedTriggerCrawlTool } from './triggerCrawl.js';
+import { buildProbePageTool, buildValidatedProbePageTool } from './probePage.js';
 import { buildSearchProjectsTool, buildValidatedSearchProjectsTool } from './searchProjects.js';
 import { buildSearchEnvironmentsTool, buildValidatedSearchEnvironmentsTool } from './searchEnvironments.js';
 import { buildSearchExecutionsTool, buildValidatedSearchExecutionsTool } from './searchExecutions.js';
@@ -19,6 +20,7 @@ export function initTools(ctx) {
     const tools = [
         buildTestPageChangesTool(ctx),
         buildTriggerCrawlTool(ctx),
+        buildProbePageTool(),
         buildSearchProjectsTool(),
         buildSearchEnvironmentsTool(),
         buildCreateEnvironmentTool(),
@@ -32,6 +34,7 @@ export function initTools(ctx) {
     const validated = [
         buildValidatedTestPageChangesTool(ctx),
         buildValidatedTriggerCrawlTool(ctx),
+        buildValidatedProbePageTool(),
         buildValidatedSearchProjectsTool(),
         buildValidatedSearchEnvironmentsTool(),
         buildValidatedCreateEnvironmentTool(),