@debugg-ai/debugg-ai-mcp 1.0.35 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,10 +8,11 @@ import { Logger } from '../utils/logger.js';
8
8
  import { handleExternalServiceError } from '../utils/errors.js';
9
9
  import { fetchImageAsBase64, imageContentBlock } from '../utils/imageUtils.js';
10
10
  import { DebuggAIServerClient } from '../services/index.js';
11
- import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, } from '../utils/tunnelContext.js';
11
+ import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, touchTunnelById, } from '../utils/tunnelContext.js';
12
12
  const logger = new Logger({ module: 'testPageChangesHandler' });
13
- // Cache the template UUID within a server session to avoid re-fetching
13
+ // Cache the template UUID and project UUID within a server session to avoid re-fetching
14
14
  let cachedTemplateUuid = null;
15
+ let cachedProjectUuid = null;
15
16
  export async function testPageChangesHandler(input, context, progressCallback) {
16
17
  const startTime = Date.now();
17
18
  logger.toolStart('check_app_in_browser', input);
@@ -23,11 +24,15 @@ export async function testPageChangesHandler(input, context, progressCallback) {
23
24
  const abortController = new AbortController();
24
25
  const onStdinClose = () => abortController.abort();
25
26
  process.stdin.once('close', onStdinClose);
27
+ // Progress budget: 3 setup steps + 25 execution steps = 28 total
28
+ const SETUP_STEPS = 3;
29
+ const MAX_EXEC_STEPS = 25;
30
+ const TOTAL_STEPS = SETUP_STEPS + MAX_EXEC_STEPS;
26
31
  try {
27
32
  // --- Tunnel: reuse existing or provision a fresh one ---
28
33
  if (ctx.isLocalhost) {
29
34
  if (progressCallback) {
30
- await progressCallback({ progress: 1, total: 10, message: 'Provisioning secure tunnel for localhost...' });
35
+ await progressCallback({ progress: 1, total: TOTAL_STEPS, message: 'Provisioning secure tunnel for localhost...' });
31
36
  }
32
37
  const reused = findExistingTunnel(ctx);
33
38
  if (reused) {
@@ -35,16 +40,34 @@ export async function testPageChangesHandler(input, context, progressCallback) {
35
40
  logger.info(`Reusing tunnel: ${ctx.targetUrl} (id: ${ctx.tunnelId})`);
36
41
  }
37
42
  else {
38
- const tunnel = await client.tunnels.provision();
43
+ let tunnel;
44
+ try {
45
+ tunnel = await client.tunnels.provision();
46
+ }
47
+ catch (provisionError) {
48
+ const msg = provisionError instanceof Error ? provisionError.message : String(provisionError);
49
+ throw new Error(`Failed to provision tunnel for ${ctx.originalUrl}. ` +
50
+ `The remote browser needs a secure tunnel to reach your local dev server. ` +
51
+ `Make sure your dev server is running on the specified port and try again. ` +
52
+ `(Detail: ${msg})`);
53
+ }
39
54
  keyId = tunnel.keyId;
40
- // revokeKey is stored on the TunnelInfo and fires when the tunnel auto-stops.
41
- ctx = await ensureTunnel(ctx, tunnel.tunnelKey, tunnel.tunnelId, tunnel.keyId, () => client.revokeNgrokKey(tunnel.keyId));
55
+ try {
56
+ ctx = await ensureTunnel(ctx, tunnel.tunnelKey, tunnel.tunnelId, tunnel.keyId, () => client.revokeNgrokKey(tunnel.keyId));
57
+ }
58
+ catch (tunnelError) {
59
+ const msg = tunnelError instanceof Error ? tunnelError.message : String(tunnelError);
60
+ throw new Error(`Tunnel creation failed for ${ctx.originalUrl}. ` +
61
+ `Could not establish a secure connection between the remote browser and your local port. ` +
62
+ `Verify your dev server is running and the port is accessible. ` +
63
+ `(Detail: ${msg})`);
64
+ }
42
65
  logger.info(`Tunnel ready: ${ctx.targetUrl} (id: ${ctx.tunnelId})`);
43
66
  }
44
67
  }
45
68
  // --- Find workflow template ---
46
69
  if (progressCallback) {
47
- await progressCallback({ progress: 2, total: 10, message: 'Locating evaluation workflow template...' });
70
+ await progressCallback({ progress: 2, total: TOTAL_STEPS, message: 'Locating evaluation workflow template...' });
48
71
  }
49
72
  if (!cachedTemplateUuid) {
50
73
  const template = await client.workflows.findEvaluationTemplate();
@@ -55,11 +78,30 @@ export async function testPageChangesHandler(input, context, progressCallback) {
55
78
  cachedTemplateUuid = template.uuid;
56
79
  logger.info(`Using workflow template: ${template.name} (${template.uuid})`);
57
80
  }
81
+ // --- Resolve project UUID (best-effort, non-blocking) ---
82
+ if (!cachedProjectUuid && config.defaults.repoName) {
83
+ try {
84
+ const project = await client.findProjectByRepoName(config.defaults.repoName);
85
+ if (project) {
86
+ cachedProjectUuid = project.uuid;
87
+ logger.info(`Resolved project: ${project.name} (${project.uuid})`);
88
+ }
89
+ else {
90
+ logger.info(`No project found for repo "${config.defaults.repoName}" — proceeding without project_id`);
91
+ }
92
+ }
93
+ catch (err) {
94
+ logger.warn(`Failed to look up project for repo "${config.defaults.repoName}": ${err}`);
95
+ }
96
+ }
58
97
  // --- Build context data (targetUrl is the tunnel URL for localhost, original URL otherwise) ---
59
98
  const contextData = {
60
99
  targetUrl: ctx.targetUrl ?? originalUrl,
61
100
  goal: input.description,
62
101
  };
102
+ if (cachedProjectUuid) {
103
+ contextData.projectId = cachedProjectUuid;
104
+ }
63
105
  // --- Build env (credentials/environment) ---
64
106
  const env = {};
65
107
  if (input.environmentId)
@@ -74,59 +116,109 @@ export async function testPageChangesHandler(input, context, progressCallback) {
74
116
  env.password = input.password;
75
117
  // --- Execute ---
76
118
  if (progressCallback) {
77
- await progressCallback({ progress: 3, total: 10, message: 'Queuing workflow execution...' });
119
+ await progressCallback({ progress: 3, total: TOTAL_STEPS, message: 'Queuing workflow execution...' });
78
120
  }
79
121
  const executeResponse = await client.workflows.executeWorkflow(cachedTemplateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
80
122
  const executionUuid = executeResponse.executionUuid;
81
123
  logger.info(`Execution queued: ${executionUuid}`);
82
124
  // --- Poll ---
83
- // nodeExecutions grows as each node completes: trigger → browser.setup surfer.execute_task → browser.teardown
84
- const NODE_PHASE_LABELS = {
85
- 0: 'Browser agent starting up...',
86
- 1: 'Browser ready, agent navigating...',
87
- 2: 'Agent evaluating app...',
88
- 3: 'Wrapping up...',
89
- };
125
+ // Track execution progress via state.stepsTaken from the API.
126
+ // Setup is steps 1-3, execution maps stepsTaken into steps 4-28 (25 slots).
127
+ let lastStepsTaken = 0;
90
128
  let lastNodeCount = 0;
129
+ let observedMaxSteps = MAX_EXEC_STEPS;
91
130
  const finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
131
+ // Keep the tunnel alive while the workflow is actively running
132
+ if (ctx.tunnelId)
133
+ touchTunnelById(ctx.tunnelId);
92
134
  const nodeCount = exec.nodeExecutions?.length ?? 0;
93
- if (nodeCount !== lastNodeCount || exec.status !== 'pending') {
135
+ const stepsTaken = exec.state?.stepsTaken ?? 0;
136
+ if (nodeCount !== lastNodeCount || stepsTaken !== lastStepsTaken || exec.status !== 'pending') {
94
137
  lastNodeCount = nodeCount;
95
- logger.info(`Execution status: ${exec.status}, nodes completed: ${nodeCount}`);
138
+ lastStepsTaken = stepsTaken;
139
+ logger.info(`Execution status: ${exec.status}, nodes: ${nodeCount}, steps: ${stepsTaken}`);
96
140
  }
97
141
  if (progressCallback) {
98
- // Map 0-4 completed nodes to progress 3-9 (3 reserved for tunnel setup)
99
- const progress = Math.min(3 + nodeCount * 2, 9);
100
- const message = exec.status === 'running'
101
- ? (NODE_PHASE_LABELS[nodeCount] ?? 'Agent working...')
102
- : exec.status;
103
- await progressCallback({ progress, total: 10, message });
142
+ // If we see steps > our assumed max, bump our ceiling so progress never goes backwards
143
+ if (stepsTaken > observedMaxSteps) {
144
+ observedMaxSteps = stepsTaken + 5;
145
+ }
146
+ // Map stepsTaken (0..observedMaxSteps) into progress (SETUP_STEPS+1 .. TOTAL_STEPS-1)
147
+ // Reserve the last tick for the "Complete" message
148
+ let execProgress;
149
+ if (stepsTaken > 0) {
150
+ execProgress = SETUP_STEPS + Math.round((stepsTaken / observedMaxSteps) * (MAX_EXEC_STEPS - 1));
151
+ }
152
+ else {
153
+ // No steps yet — show we're past setup but execution is starting
154
+ execProgress = SETUP_STEPS + 1;
155
+ }
156
+ execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
157
+ let message;
158
+ if (exec.status === 'running') {
159
+ if (stepsTaken > 0) {
160
+ message = `Agent evaluating app... (step ${stepsTaken})`;
161
+ }
162
+ else if (nodeCount === 0) {
163
+ message = 'Browser agent starting up...';
164
+ }
165
+ else {
166
+ message = 'Browser ready, agent navigating...';
167
+ }
168
+ }
169
+ else {
170
+ message = exec.status;
171
+ }
172
+ await progressCallback({ progress: execProgress, total: TOTAL_STEPS, message });
104
173
  }
105
174
  }, abortController.signal);
106
175
  const duration = Date.now() - startTime;
107
176
  // --- Format result ---
108
177
  const outcome = finalExecution.state?.outcome ?? finalExecution.status;
109
- const surferNode = finalExecution.nodeExecutions?.find(n => n.nodeType === 'surfer.execute_task');
110
- // Log all node executions to diagnose what the backend returns
111
- logger.info('Node executions raw data', {
112
- nodeCount: finalExecution.nodeExecutions?.length ?? 0,
113
- nodes: finalExecution.nodeExecutions?.map(n => ({
114
- nodeId: n.nodeId,
115
- nodeType: n.nodeType,
116
- status: n.status,
117
- outputKeys: n.outputData ? Object.keys(n.outputData) : [],
118
- outputData: n.outputData,
119
- })),
178
+ const nodes = finalExecution.nodeExecutions ?? [];
179
+ // Extract step-by-step action trace from brain.step nodes
180
+ const brainSteps = nodes
181
+ .filter(n => n.nodeType === 'brain.step' && n.outputData?.decision)
182
+ .sort((a, b) => a.executionOrder - b.executionOrder);
183
+ const actionTrace = brainSteps.map((n, i) => {
184
+ const d = n.outputData.decision;
185
+ return {
186
+ step: i + 1,
187
+ action: d.actionType ?? d.action_type,
188
+ intent: d.intent,
189
+ target: d.target,
190
+ value: d.value ?? undefined,
191
+ success: n.outputData.success ?? n.status === 'success',
192
+ durationMs: n.executionTimeMs,
193
+ };
120
194
  });
195
+ // Extract evaluation from brain.evaluate node
196
+ const evalNode = nodes.find(n => n.nodeType === 'brain.evaluate');
197
+ const evaluation = evalNode?.outputData ? {
198
+ passed: evalNode.outputData.passed,
199
+ outcome: evalNode.outputData.outcome,
200
+ reason: evalNode.outputData.reason,
201
+ verifications: evalNode.outputData.verifications,
202
+ } : undefined;
203
+ // Also check for surfer.execute_task (older workflow graphs)
204
+ const surferNode = nodes.find(n => n.nodeType === 'surfer.execute_task');
121
205
  const responsePayload = {
122
206
  outcome,
123
207
  success: finalExecution.state?.success ?? false,
124
208
  status: finalExecution.status,
125
- stepsTaken: finalExecution.state?.stepsTaken ?? surferNode?.outputData?.stepsTaken ?? 0,
209
+ stepsTaken: finalExecution.state?.stepsTaken ?? actionTrace.length ?? 0,
126
210
  targetUrl: originalUrl,
127
211
  executionId: executionUuid,
128
212
  durationMs: finalExecution.durationMs ?? duration,
129
213
  };
214
+ // The step-by-step action trace — what the browser agent did and why
215
+ if (actionTrace.length > 0) {
216
+ responsePayload.actionTrace = actionTrace;
217
+ }
218
+ // The final evaluation — pass/fail with reasoning
219
+ if (evaluation) {
220
+ responsePayload.evaluation = evaluation;
221
+ }
130
222
  if (finalExecution.state?.error)
131
223
  responsePayload.agentError = finalExecution.state.error;
132
224
  if (finalExecution.errorMessage)
@@ -142,7 +234,7 @@ export async function testPageChangesHandler(input, context, progressCallback) {
142
234
  }
143
235
  logger.toolComplete('check_app_in_browser', duration);
144
236
  if (progressCallback) {
145
- await progressCallback({ progress: 10, total: 10, message: `Complete: ${outcome}` });
237
+ await progressCallback({ progress: TOTAL_STEPS, total: TOTAL_STEPS, message: `Complete: ${outcome}` });
146
238
  }
147
239
  const content = [
148
240
  { type: 'text', text: JSON.stringify(responsePayload, null, 2) },
@@ -46,6 +46,29 @@ export class DebuggAIServerClient {
46
46
  this.workflows = createWorkflowsService(this.tx);
47
47
  this.tunnels = createTunnelsService(this.tx);
48
48
  }
49
+ /**
50
+ * Look up a project by repo name. Uses ?search= then client-side filters
51
+ * on repo.name (which is "owner/repo-name" format).
52
+ * Returns the first match or null.
53
+ */
54
+ async findProjectByRepoName(repoName) {
55
+ if (!this.tx)
56
+ throw new Error('Client not initialized — call init() first');
57
+ const response = await this.tx.get('api/v1/projects/', { search: repoName });
58
+ const projects = response?.results ?? [];
59
+ if (projects.length === 0)
60
+ return null;
61
+ // Exact match on project name or slug first
62
+ const exact = projects.find(p => p.name === repoName || p.slug === repoName);
63
+ if (exact)
64
+ return exact;
65
+ // Match on repo.name (owner/repo-name — check if it ends with /repoName)
66
+ const repoMatch = projects.find(p => p.repo?.name === repoName || p.repo?.name?.endsWith(`/${repoName}`));
67
+ if (repoMatch)
68
+ return repoMatch;
69
+ // Fallback to first result from search
70
+ return projects[0];
71
+ }
49
72
  /**
50
73
  * Revoke an ngrok API key by its key ID.
51
74
  * Call this after workflow execution completes to clean up the short-lived key.
@@ -33,6 +33,13 @@ async function getNgrok() {
33
33
  }
34
34
  return ngrokModule;
35
35
  }
36
+ /**
37
+ * Reset the cached ngrok module so the next connect() bootstraps a fresh agent.
38
+ * Called when the last owned tunnel is disconnected and the agent process may have died.
39
+ */
40
+ function resetNgrokModule() {
41
+ ngrokModule = null;
42
+ }
36
43
  const logger = new Logger({ module: 'tunnelManager' });
37
44
  // ── TunnelManager ─────────────────────────────────────────────────────────────
38
45
  class TunnelManager {
@@ -149,6 +156,14 @@ class TunnelManager {
149
156
  catch (error) {
150
157
  logger.warn(`ngrok.disconnect failed for tunnel ${tunnelId} (already cleaned up):`, error);
151
158
  }
159
+ // If no owned tunnels remain, the ngrok agent process may have exited.
160
+ // Reset module + init state so the next connect() bootstraps a fresh agent.
161
+ const hasOwnedTunnels = Array.from(this.activeTunnels.values()).some(t => t.isOwned);
162
+ if (!hasOwnedTunnels) {
163
+ logger.info('No owned tunnels remain — resetting ngrok module for fresh init on next request');
164
+ resetNgrokModule();
165
+ this.initialized = false;
166
+ }
152
167
  if (tunnelInfo.revokeKey) {
153
168
  tunnelInfo.revokeKey().catch((err) => logger.warn(`Failed to revoke key for tunnel ${tunnelId}:`, err));
154
169
  }
@@ -251,16 +266,40 @@ class TunnelManager {
251
266
  else {
252
267
  localAddr = inDocker ? `${dockerHost}:${port}` : port;
253
268
  }
269
+ const connectWithRetry = async () => {
270
+ try {
271
+ const ngrok = await getNgrok();
272
+ const url = await ngrok.connect({
273
+ proto: 'http',
274
+ addr: localAddr,
275
+ hostname: tunnelDomain,
276
+ authtoken: authToken,
277
+ });
278
+ if (!url)
279
+ throw new Error('ngrok.connect() returned empty URL');
280
+ return url;
281
+ }
282
+ catch (firstError) {
283
+ // The ngrok agent process may have died after a previous disconnect.
284
+ // Reset module state and retry once with a fresh agent.
285
+ logger.warn(`ngrok.connect() failed, retrying with fresh agent: ${firstError}`);
286
+ resetNgrokModule();
287
+ this.initialized = false;
288
+ await this.ensureInitialized();
289
+ const ngrok = await getNgrok();
290
+ const url = await ngrok.connect({
291
+ proto: 'http',
292
+ addr: localAddr,
293
+ hostname: tunnelDomain,
294
+ authtoken: authToken,
295
+ });
296
+ if (!url)
297
+ throw new Error('ngrok.connect() returned empty URL after retry');
298
+ return url;
299
+ }
300
+ };
254
301
  try {
255
- const ngrok = await getNgrok();
256
- const tunnelUrl = await ngrok.connect({
257
- proto: 'http',
258
- addr: localAddr,
259
- hostname: tunnelDomain,
260
- authtoken: authToken,
261
- });
262
- if (!tunnelUrl)
263
- throw new Error('ngrok.connect() returned empty URL');
302
+ const tunnelUrl = await connectWithRetry();
264
303
  const publicUrl = generateTunnelUrl(originalUrl, tunnelId);
265
304
  const now = Date.now();
266
305
  const tunnelInfo = {
@@ -10,7 +10,7 @@ import { testPageChangesHandler } from '../handlers/testPageChangesHandler.js';
10
10
  export const testPageChangesTool = {
11
11
  name: "check_app_in_browser",
12
12
  title: "Run E2E Browser Test",
13
- description: "Give an AI agent eyes on a live website or app. The agent browses it, interacts with it, and tells you whether a given task or check passed. Works on localhost or any URL. Use for visual QA, flow validation, regression checks, or anything that needs a real browser to verify.",
13
+ description: "Give an AI agent eyes on a live website or app. The agent browses it, interacts with it, and tells you whether a given task or check passed. Works on localhost or any URL. Use for visual QA, flow validation, regression checks, or anything that needs a real browser to verify.\n\nLOCALHOST SUPPORT: Pass any localhost URL (e.g. http://localhost:3000) and it Just Works. A secure tunnel is automatically created so the remote browser can reach your local dev server — no manual ngrok setup, no port forwarding, no config. Supports localhost, 127.0.0.1, 0.0.0.0, [::1], and private IPs (192.168.x.x, 10.x.x.x). The tunnel stays alive for 55 minutes and is reused across calls to the same port.",
14
14
  inputSchema: {
15
15
  type: "object",
16
16
  properties: {
@@ -21,7 +21,7 @@ export const testPageChangesTool = {
21
21
  },
22
22
  url: {
23
23
  type: "string",
24
- description: "URL to navigate to. Accepts any URL including localhost (e.g. 'http://localhost:3000', 'https://example.com'). Localhost URLs are automatically tunneled so the remote browser can reach them."
24
+ description: "URL to navigate to. Can be any public URL (https://example.com) OR a localhost/local dev server URL. For localhost URLs (http://localhost:3000, http://127.0.0.1:8080, etc.), a secure tunnel is automatically created so the remote browser can reach your machine — just make sure your dev server is running on that port. No extra setup needed."
25
25
  },
26
26
  environmentId: {
27
27
  type: "string",
@@ -8,7 +8,7 @@ import { normalizeUrl } from '../utils/urlParser.js';
8
8
  */
9
9
  export const TestPageChangesInputSchema = z.object({
10
10
  description: z.string().min(1, 'Description is required'),
11
- url: z.preprocess(normalizeUrl, z.string().url('Must be a valid URL accepts localhost (e.g. "http://localhost:3000") or any public URL')),
11
+ url: z.preprocess(normalizeUrl, z.string().url('Invalid URL. Pass a full URL like "http://localhost:3000" or "https://example.com". Localhost URLs are auto-tunneled to the remote browser — no extra setup needed.')),
12
12
  // Credential/environment resolution
13
13
  environmentId: z.string().uuid().optional(),
14
14
  credentialId: z.string().uuid().optional(),
@@ -71,6 +71,15 @@ export async function releaseTunnel(ctx) {
71
71
  await tunnelManager.stopTunnel(ctx.tunnelId);
72
72
  }
73
73
  }
74
+ /**
75
+ * Touch a tunnel's timer by ID to prevent auto-shutoff during active use.
76
+ * Safe to call with undefined (no-op).
77
+ */
78
+ export function touchTunnelById(tunnelId) {
79
+ if (tunnelId) {
80
+ tunnelManager.touchTunnel(tunnelId);
81
+ }
82
+ }
74
83
  // ─── Response sanitization ───────────────────────────────────────────────────
75
84
  /**
76
85
  * Replace any tunnel URLs in a backend response with the original localhost origin.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@debugg-ai/debugg-ai-mcp",
3
- "version": "1.0.35",
3
+ "version": "1.0.37",
4
4
  "description": "Zero-Config, Fully AI-Managed End-to-End Testing for all code gen platforms.",
5
5
  "type": "module",
6
6
  "bin": {