@debugg-ai/debugg-ai-mcp 1.0.36 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,10 +8,11 @@ import { Logger } from '../utils/logger.js';
8
8
  import { handleExternalServiceError } from '../utils/errors.js';
9
9
  import { fetchImageAsBase64, imageContentBlock } from '../utils/imageUtils.js';
10
10
  import { DebuggAIServerClient } from '../services/index.js';
11
- import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, } from '../utils/tunnelContext.js';
11
+ import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, touchTunnelById, } from '../utils/tunnelContext.js';
12
12
  const logger = new Logger({ module: 'testPageChangesHandler' });
13
- // Cache the template UUID within a server session to avoid re-fetching
13
+ // Cache the template UUID and project UUID within a server session to avoid re-fetching
14
14
  let cachedTemplateUuid = null;
15
+ let cachedProjectUuid = null;
15
16
  export async function testPageChangesHandler(input, context, progressCallback) {
16
17
  const startTime = Date.now();
17
18
  logger.toolStart('check_app_in_browser', input);
@@ -23,11 +24,15 @@ export async function testPageChangesHandler(input, context, progressCallback) {
23
24
  const abortController = new AbortController();
24
25
  const onStdinClose = () => abortController.abort();
25
26
  process.stdin.once('close', onStdinClose);
27
+ // Progress budget: 3 setup steps + 25 execution steps = 28 total
28
+ const SETUP_STEPS = 3;
29
+ const MAX_EXEC_STEPS = 25;
30
+ const TOTAL_STEPS = SETUP_STEPS + MAX_EXEC_STEPS;
26
31
  try {
27
32
  // --- Tunnel: reuse existing or provision a fresh one ---
28
33
  if (ctx.isLocalhost) {
29
34
  if (progressCallback) {
30
- await progressCallback({ progress: 1, total: 10, message: 'Provisioning secure tunnel for localhost...' });
35
+ await progressCallback({ progress: 1, total: TOTAL_STEPS, message: 'Provisioning secure tunnel for localhost...' });
31
36
  }
32
37
  const reused = findExistingTunnel(ctx);
33
38
  if (reused) {
@@ -62,7 +67,7 @@ export async function testPageChangesHandler(input, context, progressCallback) {
62
67
  }
63
68
  // --- Find workflow template ---
64
69
  if (progressCallback) {
65
- await progressCallback({ progress: 2, total: 10, message: 'Locating evaluation workflow template...' });
70
+ await progressCallback({ progress: 2, total: TOTAL_STEPS, message: 'Locating evaluation workflow template...' });
66
71
  }
67
72
  if (!cachedTemplateUuid) {
68
73
  const template = await client.workflows.findEvaluationTemplate();
@@ -73,11 +78,30 @@ export async function testPageChangesHandler(input, context, progressCallback) {
73
78
  cachedTemplateUuid = template.uuid;
74
79
  logger.info(`Using workflow template: ${template.name} (${template.uuid})`);
75
80
  }
81
+ // --- Resolve project UUID (best-effort, non-blocking) ---
82
+ if (!cachedProjectUuid && config.defaults.repoName) {
83
+ try {
84
+ const project = await client.findProjectByRepoName(config.defaults.repoName);
85
+ if (project) {
86
+ cachedProjectUuid = project.uuid;
87
+ logger.info(`Resolved project: ${project.name} (${project.uuid})`);
88
+ }
89
+ else {
90
+ logger.info(`No project found for repo "${config.defaults.repoName}" — proceeding without project_id`);
91
+ }
92
+ }
93
+ catch (err) {
94
+ logger.warn(`Failed to look up project for repo "${config.defaults.repoName}": ${err}`);
95
+ }
96
+ }
76
97
  // --- Build context data (targetUrl is the tunnel URL for localhost, original URL otherwise) ---
77
98
  const contextData = {
78
99
  targetUrl: ctx.targetUrl ?? originalUrl,
79
100
  goal: input.description,
80
101
  };
102
+ if (cachedProjectUuid) {
103
+ contextData.projectId = cachedProjectUuid;
104
+ }
81
105
  // --- Build env (credentials/environment) ---
82
106
  const env = {};
83
107
  if (input.environmentId)
@@ -92,59 +116,109 @@ export async function testPageChangesHandler(input, context, progressCallback) {
92
116
  env.password = input.password;
93
117
  // --- Execute ---
94
118
  if (progressCallback) {
95
- await progressCallback({ progress: 3, total: 10, message: 'Queuing workflow execution...' });
119
+ await progressCallback({ progress: 3, total: TOTAL_STEPS, message: 'Queuing workflow execution...' });
96
120
  }
97
121
  const executeResponse = await client.workflows.executeWorkflow(cachedTemplateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
98
122
  const executionUuid = executeResponse.executionUuid;
99
123
  logger.info(`Execution queued: ${executionUuid}`);
100
124
  // --- Poll ---
101
- // nodeExecutions grows as each node completes: trigger → browser.setup surfer.execute_task → browser.teardown
102
- const NODE_PHASE_LABELS = {
103
- 0: 'Browser agent starting up...',
104
- 1: 'Browser ready, agent navigating...',
105
- 2: 'Agent evaluating app...',
106
- 3: 'Wrapping up...',
107
- };
125
+ // Track execution progress via state.stepsTaken from the API.
126
+ // Setup is steps 1-3, execution maps stepsTaken into steps 4-28 (25 slots).
127
+ let lastStepsTaken = 0;
108
128
  let lastNodeCount = 0;
129
+ let observedMaxSteps = MAX_EXEC_STEPS;
109
130
  const finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
131
+ // Keep the tunnel alive while the workflow is actively running
132
+ if (ctx.tunnelId)
133
+ touchTunnelById(ctx.tunnelId);
110
134
  const nodeCount = exec.nodeExecutions?.length ?? 0;
111
- if (nodeCount !== lastNodeCount || exec.status !== 'pending') {
135
+ const stepsTaken = exec.state?.stepsTaken ?? 0;
136
+ if (nodeCount !== lastNodeCount || stepsTaken !== lastStepsTaken || exec.status !== 'pending') {
112
137
  lastNodeCount = nodeCount;
113
- logger.info(`Execution status: ${exec.status}, nodes completed: ${nodeCount}`);
138
+ lastStepsTaken = stepsTaken;
139
+ logger.info(`Execution status: ${exec.status}, nodes: ${nodeCount}, steps: ${stepsTaken}`);
114
140
  }
115
141
  if (progressCallback) {
116
- // Map 0-4 completed nodes to progress 3-9 (3 reserved for tunnel setup)
117
- const progress = Math.min(3 + nodeCount * 2, 9);
118
- const message = exec.status === 'running'
119
- ? (NODE_PHASE_LABELS[nodeCount] ?? 'Agent working...')
120
- : exec.status;
121
- await progressCallback({ progress, total: 10, message });
142
+ // If we see steps > our assumed max, bump our ceiling so progress never goes backwards
143
+ if (stepsTaken > observedMaxSteps) {
144
+ observedMaxSteps = stepsTaken + 5;
145
+ }
146
+ // Map stepsTaken (0..observedMaxSteps) into progress (SETUP_STEPS+1 .. TOTAL_STEPS-1)
147
+ // Reserve the last tick for the "Complete" message
148
+ let execProgress;
149
+ if (stepsTaken > 0) {
150
+ execProgress = SETUP_STEPS + Math.round((stepsTaken / observedMaxSteps) * (MAX_EXEC_STEPS - 1));
151
+ }
152
+ else {
153
+ // No steps yet — show we're past setup but execution is starting
154
+ execProgress = SETUP_STEPS + 1;
155
+ }
156
+ execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
157
+ let message;
158
+ if (exec.status === 'running') {
159
+ if (stepsTaken > 0) {
160
+ message = `Agent evaluating app... (step ${stepsTaken})`;
161
+ }
162
+ else if (nodeCount === 0) {
163
+ message = 'Browser agent starting up...';
164
+ }
165
+ else {
166
+ message = 'Browser ready, agent navigating...';
167
+ }
168
+ }
169
+ else {
170
+ message = exec.status;
171
+ }
172
+ await progressCallback({ progress: execProgress, total: TOTAL_STEPS, message });
122
173
  }
123
174
  }, abortController.signal);
124
175
  const duration = Date.now() - startTime;
125
176
  // --- Format result ---
126
177
  const outcome = finalExecution.state?.outcome ?? finalExecution.status;
127
- const surferNode = finalExecution.nodeExecutions?.find(n => n.nodeType === 'surfer.execute_task');
128
- // Log all node executions to diagnose what the backend returns
129
- logger.info('Node executions raw data', {
130
- nodeCount: finalExecution.nodeExecutions?.length ?? 0,
131
- nodes: finalExecution.nodeExecutions?.map(n => ({
132
- nodeId: n.nodeId,
133
- nodeType: n.nodeType,
134
- status: n.status,
135
- outputKeys: n.outputData ? Object.keys(n.outputData) : [],
136
- outputData: n.outputData,
137
- })),
178
+ const nodes = finalExecution.nodeExecutions ?? [];
179
+ // Extract step-by-step action trace from brain.step nodes
180
+ const brainSteps = nodes
181
+ .filter(n => n.nodeType === 'brain.step' && n.outputData?.decision)
182
+ .sort((a, b) => a.executionOrder - b.executionOrder);
183
+ const actionTrace = brainSteps.map((n, i) => {
184
+ const d = n.outputData.decision;
185
+ return {
186
+ step: i + 1,
187
+ action: d.actionType ?? d.action_type,
188
+ intent: d.intent,
189
+ target: d.target,
190
+ value: d.value ?? undefined,
191
+ success: n.outputData.success ?? n.status === 'success',
192
+ durationMs: n.executionTimeMs,
193
+ };
138
194
  });
195
+ // Extract evaluation from brain.evaluate node
196
+ const evalNode = nodes.find(n => n.nodeType === 'brain.evaluate');
197
+ const evaluation = evalNode?.outputData ? {
198
+ passed: evalNode.outputData.passed,
199
+ outcome: evalNode.outputData.outcome,
200
+ reason: evalNode.outputData.reason,
201
+ verifications: evalNode.outputData.verifications,
202
+ } : undefined;
203
+ // Also check for surfer.execute_task (older workflow graphs)
204
+ const surferNode = nodes.find(n => n.nodeType === 'surfer.execute_task');
139
205
  const responsePayload = {
140
206
  outcome,
141
207
  success: finalExecution.state?.success ?? false,
142
208
  status: finalExecution.status,
143
- stepsTaken: finalExecution.state?.stepsTaken ?? surferNode?.outputData?.stepsTaken ?? 0,
209
+ stepsTaken: finalExecution.state?.stepsTaken ?? actionTrace.length ?? 0,
144
210
  targetUrl: originalUrl,
145
211
  executionId: executionUuid,
146
212
  durationMs: finalExecution.durationMs ?? duration,
147
213
  };
214
+ // The step-by-step action trace — what the browser agent did and why
215
+ if (actionTrace.length > 0) {
216
+ responsePayload.actionTrace = actionTrace;
217
+ }
218
+ // The final evaluation — pass/fail with reasoning
219
+ if (evaluation) {
220
+ responsePayload.evaluation = evaluation;
221
+ }
148
222
  if (finalExecution.state?.error)
149
223
  responsePayload.agentError = finalExecution.state.error;
150
224
  if (finalExecution.errorMessage)
@@ -160,7 +234,7 @@ export async function testPageChangesHandler(input, context, progressCallback) {
160
234
  }
161
235
  logger.toolComplete('check_app_in_browser', duration);
162
236
  if (progressCallback) {
163
- await progressCallback({ progress: 10, total: 10, message: `Complete: ${outcome}` });
237
+ await progressCallback({ progress: TOTAL_STEPS, total: TOTAL_STEPS, message: `Complete: ${outcome}` });
164
238
  }
165
239
  const content = [
166
240
  { type: 'text', text: JSON.stringify(responsePayload, null, 2) },
@@ -46,6 +46,29 @@ export class DebuggAIServerClient {
46
46
  this.workflows = createWorkflowsService(this.tx);
47
47
  this.tunnels = createTunnelsService(this.tx);
48
48
  }
49
+ /**
50
+ * Look up a project by repo name. Uses ?search= then client-side filters
51
+ * on repo.name (which is "owner/repo-name" format).
52
+ * Returns the first match or null.
53
+ */
54
+ async findProjectByRepoName(repoName) {
55
+ if (!this.tx)
56
+ throw new Error('Client not initialized — call init() first');
57
+ const response = await this.tx.get('api/v1/projects/', { search: repoName });
58
+ const projects = response?.results ?? [];
59
+ if (projects.length === 0)
60
+ return null;
61
+ // Exact match on project name or slug first
62
+ const exact = projects.find(p => p.name === repoName || p.slug === repoName);
63
+ if (exact)
64
+ return exact;
65
+ // Match on repo.name (owner/repo-name — check if it ends with /repoName)
66
+ const repoMatch = projects.find(p => p.repo?.name === repoName || p.repo?.name?.endsWith(`/${repoName}`));
67
+ if (repoMatch)
68
+ return repoMatch;
69
+ // Fallback to first result from search
70
+ return projects[0];
71
+ }
49
72
  /**
50
73
  * Revoke an ngrok API key by its key ID.
51
74
  * Call this after workflow execution completes to clean up the short-lived key.
@@ -33,6 +33,13 @@ async function getNgrok() {
33
33
  }
34
34
  return ngrokModule;
35
35
  }
36
+ /**
37
+ * Reset the cached ngrok module so the next connect() bootstraps a fresh agent.
38
+ * Called when the last owned tunnel is disconnected and the agent process may have died.
39
+ */
40
+ function resetNgrokModule() {
41
+ ngrokModule = null;
42
+ }
36
43
  const logger = new Logger({ module: 'tunnelManager' });
37
44
  // ── TunnelManager ─────────────────────────────────────────────────────────────
38
45
  class TunnelManager {
@@ -149,6 +156,14 @@ class TunnelManager {
149
156
  catch (error) {
150
157
  logger.warn(`ngrok.disconnect failed for tunnel ${tunnelId} (already cleaned up):`, error);
151
158
  }
159
+ // If no owned tunnels remain, the ngrok agent process may have exited.
160
+ // Reset module + init state so the next connect() bootstraps a fresh agent.
161
+ const hasOwnedTunnels = Array.from(this.activeTunnels.values()).some(t => t.isOwned);
162
+ if (!hasOwnedTunnels) {
163
+ logger.info('No owned tunnels remain — resetting ngrok module for fresh init on next request');
164
+ resetNgrokModule();
165
+ this.initialized = false;
166
+ }
152
167
  if (tunnelInfo.revokeKey) {
153
168
  tunnelInfo.revokeKey().catch((err) => logger.warn(`Failed to revoke key for tunnel ${tunnelId}:`, err));
154
169
  }
@@ -251,16 +266,40 @@ class TunnelManager {
251
266
  else {
252
267
  localAddr = inDocker ? `${dockerHost}:${port}` : port;
253
268
  }
269
+ const connectWithRetry = async () => {
270
+ try {
271
+ const ngrok = await getNgrok();
272
+ const url = await ngrok.connect({
273
+ proto: 'http',
274
+ addr: localAddr,
275
+ hostname: tunnelDomain,
276
+ authtoken: authToken,
277
+ });
278
+ if (!url)
279
+ throw new Error('ngrok.connect() returned empty URL');
280
+ return url;
281
+ }
282
+ catch (firstError) {
283
+ // The ngrok agent process may have died after a previous disconnect.
284
+ // Reset module state and retry once with a fresh agent.
285
+ logger.warn(`ngrok.connect() failed, retrying with fresh agent: ${firstError}`);
286
+ resetNgrokModule();
287
+ this.initialized = false;
288
+ await this.ensureInitialized();
289
+ const ngrok = await getNgrok();
290
+ const url = await ngrok.connect({
291
+ proto: 'http',
292
+ addr: localAddr,
293
+ hostname: tunnelDomain,
294
+ authtoken: authToken,
295
+ });
296
+ if (!url)
297
+ throw new Error('ngrok.connect() returned empty URL after retry');
298
+ return url;
299
+ }
300
+ };
254
301
  try {
255
- const ngrok = await getNgrok();
256
- const tunnelUrl = await ngrok.connect({
257
- proto: 'http',
258
- addr: localAddr,
259
- hostname: tunnelDomain,
260
- authtoken: authToken,
261
- });
262
- if (!tunnelUrl)
263
- throw new Error('ngrok.connect() returned empty URL');
302
+ const tunnelUrl = await connectWithRetry();
264
303
  const publicUrl = generateTunnelUrl(originalUrl, tunnelId);
265
304
  const now = Date.now();
266
305
  const tunnelInfo = {
@@ -71,6 +71,15 @@ export async function releaseTunnel(ctx) {
71
71
  await tunnelManager.stopTunnel(ctx.tunnelId);
72
72
  }
73
73
  }
74
+ /**
75
+ * Touch a tunnel's timer by ID to prevent auto-shutoff during active use.
76
+ * Safe to call with undefined (no-op).
77
+ */
78
+ export function touchTunnelById(tunnelId) {
79
+ if (tunnelId) {
80
+ tunnelManager.touchTunnel(tunnelId);
81
+ }
82
+ }
74
83
  // ─── Response sanitization ───────────────────────────────────────────────────
75
84
  /**
76
85
  * Replace any tunnel URLs in a backend response with the original localhost origin.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@debugg-ai/debugg-ai-mcp",
3
- "version": "1.0.36",
3
+ "version": "1.0.37",
4
4
  "description": "Zero-Config, Fully AI-Managed End-to-End Testing for all code gen platforms.",
5
5
  "type": "module",
6
6
  "bin": {