npm - @debugg-ai/debugg-ai-mcp - Versions diffs - 1.0.36 → 1.0.37 - Mend

@debugg-ai/debugg-ai-mcp 1.0.36 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/handlers/testPageChangesHandler.js +107 -33
package/dist/services/index.js +23 -0
package/dist/services/ngrok/tunnelManager.js +48 -9
package/dist/utils/tunnelContext.js +9 -0
package/package.json +1 -1

package/dist/handlers/testPageChangesHandler.js CHANGED Viewed

@@ -8,10 +8,11 @@ import { Logger } from '../utils/logger.js';
 import { handleExternalServiceError } from '../utils/errors.js';
 import { fetchImageAsBase64, imageContentBlock } from '../utils/imageUtils.js';
 import { DebuggAIServerClient } from '../services/index.js';
-import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, } from '../utils/tunnelContext.js';
+import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, touchTunnelById, } from '../utils/tunnelContext.js';
 const logger = new Logger({ module: 'testPageChangesHandler' });
-// Cache the template UUID within a server session to avoid re-fetching
+// Cache the template UUID and project UUID within a server session to avoid re-fetching
 let cachedTemplateUuid = null;
+let cachedProjectUuid = null;
 export async function testPageChangesHandler(input, context, progressCallback) {
     const startTime = Date.now();
     logger.toolStart('check_app_in_browser', input);
@@ -23,11 +24,15 @@ export async function testPageChangesHandler(input, context, progressCallback) {
     const abortController = new AbortController();
     const onStdinClose = () => abortController.abort();
     process.stdin.once('close', onStdinClose);
+    // Progress budget: 3 setup steps + 25 execution steps = 28 total
+    const SETUP_STEPS = 3;
+    const MAX_EXEC_STEPS = 25;
+    const TOTAL_STEPS = SETUP_STEPS + MAX_EXEC_STEPS;
     try {
         // --- Tunnel: reuse existing or provision a fresh one ---
         if (ctx.isLocalhost) {
             if (progressCallback) {
-                await progressCallback({ progress: 1, total: 10, message: 'Provisioning secure tunnel for localhost...' });
+                await progressCallback({ progress: 1, total: TOTAL_STEPS, message: 'Provisioning secure tunnel for localhost...' });
             }
             const reused = findExistingTunnel(ctx);
             if (reused) {
@@ -62,7 +67,7 @@ export async function testPageChangesHandler(input, context, progressCallback) {
         }
         // --- Find workflow template ---
         if (progressCallback) {
-            await progressCallback({ progress: 2, total: 10, message: 'Locating evaluation workflow template...' });
+            await progressCallback({ progress: 2, total: TOTAL_STEPS, message: 'Locating evaluation workflow template...' });
         }
         if (!cachedTemplateUuid) {
             const template = await client.workflows.findEvaluationTemplate();
@@ -73,11 +78,30 @@ export async function testPageChangesHandler(input, context, progressCallback) {
             cachedTemplateUuid = template.uuid;
             logger.info(`Using workflow template: ${template.name} (${template.uuid})`);
         }
+        // --- Resolve project UUID (best-effort, non-blocking) ---
+        if (!cachedProjectUuid && config.defaults.repoName) {
+            try {
+                const project = await client.findProjectByRepoName(config.defaults.repoName);
+                if (project) {
+                    cachedProjectUuid = project.uuid;
+                    logger.info(`Resolved project: ${project.name} (${project.uuid})`);
+                }
+                else {
+                    logger.info(`No project found for repo "${config.defaults.repoName}" — proceeding without project_id`);
+                }
+            }
+            catch (err) {
+                logger.warn(`Failed to look up project for repo "${config.defaults.repoName}": ${err}`);
+            }
+        }
         // --- Build context data (targetUrl is the tunnel URL for localhost, original URL otherwise) ---
         const contextData = {
             targetUrl: ctx.targetUrl ?? originalUrl,
             goal: input.description,
         };
+        if (cachedProjectUuid) {
+            contextData.projectId = cachedProjectUuid;
+        }
         // --- Build env (credentials/environment) ---
         const env = {};
         if (input.environmentId)
@@ -92,59 +116,109 @@ export async function testPageChangesHandler(input, context, progressCallback) {
             env.password = input.password;
         // --- Execute ---
         if (progressCallback) {
-            await progressCallback({ progress: 3, total: 10, message: 'Queuing workflow execution...' });
+            await progressCallback({ progress: 3, total: TOTAL_STEPS, message: 'Queuing workflow execution...' });
         }
         const executeResponse = await client.workflows.executeWorkflow(cachedTemplateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
         const executionUuid = executeResponse.executionUuid;
         logger.info(`Execution queued: ${executionUuid}`);
         // --- Poll ---
-        // nodeExecutions grows as each node completes: trigger → browser.setup → surfer.execute_task → browser.teardown
-        const NODE_PHASE_LABELS = {
-            0: 'Browser agent starting up...',
-            1: 'Browser ready, agent navigating...',
-            2: 'Agent evaluating app...',
-            3: 'Wrapping up...',
-        };
+        // Track execution progress via state.stepsTaken from the API.
+        // Setup is steps 1-3, execution maps stepsTaken into steps 4-28 (25 slots).
+        let lastStepsTaken = 0;
         let lastNodeCount = 0;
+        let observedMaxSteps = MAX_EXEC_STEPS;
         const finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
+            // Keep the tunnel alive while the workflow is actively running
+            if (ctx.tunnelId)
+                touchTunnelById(ctx.tunnelId);
             const nodeCount = exec.nodeExecutions?.length ?? 0;
-            if (nodeCount !== lastNodeCount || exec.status !== 'pending') {
+            const stepsTaken = exec.state?.stepsTaken ?? 0;
+            if (nodeCount !== lastNodeCount || stepsTaken !== lastStepsTaken || exec.status !== 'pending') {
                 lastNodeCount = nodeCount;
-                logger.info(`Execution status: ${exec.status}, nodes completed: ${nodeCount}`);
+                lastStepsTaken = stepsTaken;
+                logger.info(`Execution status: ${exec.status}, nodes: ${nodeCount}, steps: ${stepsTaken}`);
             }
             if (progressCallback) {
-                // Map 0-4 completed nodes to progress 3-9 (3 reserved for tunnel setup)
-                const progress = Math.min(3 + nodeCount * 2, 9);
-                const message = exec.status === 'running'
-                    ? (NODE_PHASE_LABELS[nodeCount] ?? 'Agent working...')
-                    : exec.status;
-                await progressCallback({ progress, total: 10, message });
+                // If we see steps > our assumed max, bump our ceiling so progress never goes backwards
+                if (stepsTaken > observedMaxSteps) {
+                    observedMaxSteps = stepsTaken + 5;
+                }
+                // Map stepsTaken (0..observedMaxSteps) into progress (SETUP_STEPS+1 .. TOTAL_STEPS-1)
+                // Reserve the last tick for the "Complete" message
+                let execProgress;
+                if (stepsTaken > 0) {
+                    execProgress = SETUP_STEPS + Math.round((stepsTaken / observedMaxSteps) * (MAX_EXEC_STEPS - 1));
+                }
+                else {
+                    // No steps yet — show we're past setup but execution is starting
+                    execProgress = SETUP_STEPS + 1;
+                }
+                execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
+                let message;
+                if (exec.status === 'running') {
+                    if (stepsTaken > 0) {
+                        message = `Agent evaluating app... (step ${stepsTaken})`;
+                    }
+                    else if (nodeCount === 0) {
+                        message = 'Browser agent starting up...';
+                    }
+                    else {
+                        message = 'Browser ready, agent navigating...';
+                    }
+                }
+                else {
+                    message = exec.status;
+                }
+                await progressCallback({ progress: execProgress, total: TOTAL_STEPS, message });
             }
         }, abortController.signal);
         const duration = Date.now() - startTime;
         // --- Format result ---
         const outcome = finalExecution.state?.outcome ?? finalExecution.status;
-        const surferNode = finalExecution.nodeExecutions?.find(n => n.nodeType === 'surfer.execute_task');
-        // Log all node executions to diagnose what the backend returns
-        logger.info('Node executions raw data', {
-            nodeCount: finalExecution.nodeExecutions?.length ?? 0,
-            nodes: finalExecution.nodeExecutions?.map(n => ({
-                nodeId: n.nodeId,
-                nodeType: n.nodeType,
-                status: n.status,
-                outputKeys: n.outputData ? Object.keys(n.outputData) : [],
-                outputData: n.outputData,
-            })),
+        const nodes = finalExecution.nodeExecutions ?? [];
+        // Extract step-by-step action trace from brain.step nodes
+        const brainSteps = nodes
+            .filter(n => n.nodeType === 'brain.step' && n.outputData?.decision)
+            .sort((a, b) => a.executionOrder - b.executionOrder);
+        const actionTrace = brainSteps.map((n, i) => {
+            const d = n.outputData.decision;
+            return {
+                step: i + 1,
+                action: d.actionType ?? d.action_type,
+                intent: d.intent,
+                target: d.target,
+                value: d.value ?? undefined,
+                success: n.outputData.success ?? n.status === 'success',
+                durationMs: n.executionTimeMs,
+            };
         });
+        // Extract evaluation from brain.evaluate node
+        const evalNode = nodes.find(n => n.nodeType === 'brain.evaluate');
+        const evaluation = evalNode?.outputData ? {
+            passed: evalNode.outputData.passed,
+            outcome: evalNode.outputData.outcome,
+            reason: evalNode.outputData.reason,
+            verifications: evalNode.outputData.verifications,
+        } : undefined;
+        // Also check for surfer.execute_task (older workflow graphs)
+        const surferNode = nodes.find(n => n.nodeType === 'surfer.execute_task');
         const responsePayload = {
             outcome,
             success: finalExecution.state?.success ?? false,
             status: finalExecution.status,
-            stepsTaken: finalExecution.state?.stepsTaken ?? surferNode?.outputData?.stepsTaken ?? 0,
+            stepsTaken: finalExecution.state?.stepsTaken ?? actionTrace.length ?? 0,
             targetUrl: originalUrl,
             executionId: executionUuid,
             durationMs: finalExecution.durationMs ?? duration,
         };
+        // The step-by-step action trace — what the browser agent did and why
+        if (actionTrace.length > 0) {
+            responsePayload.actionTrace = actionTrace;
+        }
+        // The final evaluation — pass/fail with reasoning
+        if (evaluation) {
+            responsePayload.evaluation = evaluation;
+        }
         if (finalExecution.state?.error)
             responsePayload.agentError = finalExecution.state.error;
         if (finalExecution.errorMessage)
@@ -160,7 +234,7 @@ export async function testPageChangesHandler(input, context, progressCallback) {
         }
         logger.toolComplete('check_app_in_browser', duration);
         if (progressCallback) {
-            await progressCallback({ progress: 10, total: 10, message: `Complete: ${outcome}` });
+            await progressCallback({ progress: TOTAL_STEPS, total: TOTAL_STEPS, message: `Complete: ${outcome}` });
         }
         const content = [
             { type: 'text', text: JSON.stringify(responsePayload, null, 2) },

package/dist/services/index.js CHANGED Viewed

@@ -46,6 +46,29 @@ export class DebuggAIServerClient {
         this.workflows = createWorkflowsService(this.tx);
         this.tunnels = createTunnelsService(this.tx);
     }
+    /**
+     * Look up a project by repo name. Uses ?search= then client-side filters
+     * on repo.name (which is "owner/repo-name" format).
+     * Returns the first match or null.
+     */
+    async findProjectByRepoName(repoName) {
+        if (!this.tx)
+            throw new Error('Client not initialized — call init() first');
+        const response = await this.tx.get('api/v1/projects/', { search: repoName });
+        const projects = response?.results ?? [];
+        if (projects.length === 0)
+            return null;
+        // Exact match on project name or slug first
+        const exact = projects.find(p => p.name === repoName || p.slug === repoName);
+        if (exact)
+            return exact;
+        // Match on repo.name (owner/repo-name — check if it ends with /repoName)
+        const repoMatch = projects.find(p => p.repo?.name === repoName || p.repo?.name?.endsWith(`/${repoName}`));
+        if (repoMatch)
+            return repoMatch;
+        // Fallback to first result from search
+        return projects[0];
+    }
     /**
      * Revoke an ngrok API key by its key ID.
      * Call this after workflow execution completes to clean up the short-lived key.

package/dist/services/ngrok/tunnelManager.js CHANGED Viewed

@@ -33,6 +33,13 @@ async function getNgrok() {
     }
     return ngrokModule;
 }
+/**
+ * Reset the cached ngrok module so the next connect() bootstraps a fresh agent.
+ * Called when the last owned tunnel is disconnected and the agent process may have died.
+ */
+function resetNgrokModule() {
+    ngrokModule = null;
+}
 const logger = new Logger({ module: 'tunnelManager' });
 // ── TunnelManager ─────────────────────────────────────────────────────────────
 class TunnelManager {
@@ -149,6 +156,14 @@ class TunnelManager {
         catch (error) {
             logger.warn(`ngrok.disconnect failed for tunnel ${tunnelId} (already cleaned up):`, error);
         }
+        // If no owned tunnels remain, the ngrok agent process may have exited.
+        // Reset module + init state so the next connect() bootstraps a fresh agent.
+        const hasOwnedTunnels = Array.from(this.activeTunnels.values()).some(t => t.isOwned);
+        if (!hasOwnedTunnels) {
+            logger.info('No owned tunnels remain — resetting ngrok module for fresh init on next request');
+            resetNgrokModule();
+            this.initialized = false;
+        }
         if (tunnelInfo.revokeKey) {
             tunnelInfo.revokeKey().catch((err) => logger.warn(`Failed to revoke key for tunnel ${tunnelId}:`, err));
         }
@@ -251,16 +266,40 @@ class TunnelManager {
         else {
             localAddr = inDocker ? `${dockerHost}:${port}` : port;
         }
+        const connectWithRetry = async () => {
+            try {
+                const ngrok = await getNgrok();
+                const url = await ngrok.connect({
+                    proto: 'http',
+                    addr: localAddr,
+                    hostname: tunnelDomain,
+                    authtoken: authToken,
+                });
+                if (!url)
+                    throw new Error('ngrok.connect() returned empty URL');
+                return url;
+            }
+            catch (firstError) {
+                // The ngrok agent process may have died after a previous disconnect.
+                // Reset module state and retry once with a fresh agent.
+                logger.warn(`ngrok.connect() failed, retrying with fresh agent: ${firstError}`);
+                resetNgrokModule();
+                this.initialized = false;
+                await this.ensureInitialized();
+                const ngrok = await getNgrok();
+                const url = await ngrok.connect({
+                    proto: 'http',
+                    addr: localAddr,
+                    hostname: tunnelDomain,
+                    authtoken: authToken,
+                });
+                if (!url)
+                    throw new Error('ngrok.connect() returned empty URL after retry');
+                return url;
+            }
+        };
         try {
-            const ngrok = await getNgrok();
-            const tunnelUrl = await ngrok.connect({
-                proto: 'http',
-                addr: localAddr,
-                hostname: tunnelDomain,
-                authtoken: authToken,
-            });
-            if (!tunnelUrl)
-                throw new Error('ngrok.connect() returned empty URL');
+            const tunnelUrl = await connectWithRetry();
             const publicUrl = generateTunnelUrl(originalUrl, tunnelId);
             const now = Date.now();
             const tunnelInfo = {

package/dist/utils/tunnelContext.js CHANGED Viewed

@@ -71,6 +71,15 @@ export async function releaseTunnel(ctx) {
         await tunnelManager.stopTunnel(ctx.tunnelId);
     }
 }
+/**
+ * Touch a tunnel's timer by ID to prevent auto-shutoff during active use.
+ * Safe to call with undefined (no-op).
+ */
+export function touchTunnelById(tunnelId) {
+    if (tunnelId) {
+        tunnelManager.touchTunnel(tunnelId);
+    }
+}
 // ─── Response sanitization ───────────────────────────────────────────────────
 /**
  * Replace any tunnel URLs in a backend response with the original localhost origin.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@debugg-ai/debugg-ai-mcp",
-  "version": "1.0.36",
+  "version": "1.0.37",
   "description": "Zero-Config, Fully AI-Managed End-to-End Testing for all code gen platforms.",
   "type": "module",
   "bin": {