@debugg-ai/debugg-ai-mcp 1.0.36 → 1.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -8,10 +8,11 @@ import { Logger } from '../utils/logger.js';
|
|
|
8
8
|
import { handleExternalServiceError } from '../utils/errors.js';
|
|
9
9
|
import { fetchImageAsBase64, imageContentBlock } from '../utils/imageUtils.js';
|
|
10
10
|
import { DebuggAIServerClient } from '../services/index.js';
|
|
11
|
-
import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, } from '../utils/tunnelContext.js';
|
|
11
|
+
import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, touchTunnelById, } from '../utils/tunnelContext.js';
|
|
12
12
|
const logger = new Logger({ module: 'testPageChangesHandler' });
|
|
13
|
-
// Cache the template UUID within a server session to avoid re-fetching
|
|
13
|
+
// Cache the template UUID and project UUID within a server session to avoid re-fetching
|
|
14
14
|
let cachedTemplateUuid = null;
|
|
15
|
+
let cachedProjectUuid = null;
|
|
15
16
|
export async function testPageChangesHandler(input, context, progressCallback) {
|
|
16
17
|
const startTime = Date.now();
|
|
17
18
|
logger.toolStart('check_app_in_browser', input);
|
|
@@ -23,11 +24,15 @@ export async function testPageChangesHandler(input, context, progressCallback) {
|
|
|
23
24
|
const abortController = new AbortController();
|
|
24
25
|
const onStdinClose = () => abortController.abort();
|
|
25
26
|
process.stdin.once('close', onStdinClose);
|
|
27
|
+
// Progress budget: 3 setup steps + 25 execution steps = 28 total
|
|
28
|
+
const SETUP_STEPS = 3;
|
|
29
|
+
const MAX_EXEC_STEPS = 25;
|
|
30
|
+
const TOTAL_STEPS = SETUP_STEPS + MAX_EXEC_STEPS;
|
|
26
31
|
try {
|
|
27
32
|
// --- Tunnel: reuse existing or provision a fresh one ---
|
|
28
33
|
if (ctx.isLocalhost) {
|
|
29
34
|
if (progressCallback) {
|
|
30
|
-
await progressCallback({ progress: 1, total:
|
|
35
|
+
await progressCallback({ progress: 1, total: TOTAL_STEPS, message: 'Provisioning secure tunnel for localhost...' });
|
|
31
36
|
}
|
|
32
37
|
const reused = findExistingTunnel(ctx);
|
|
33
38
|
if (reused) {
|
|
@@ -62,7 +67,7 @@ export async function testPageChangesHandler(input, context, progressCallback) {
|
|
|
62
67
|
}
|
|
63
68
|
// --- Find workflow template ---
|
|
64
69
|
if (progressCallback) {
|
|
65
|
-
await progressCallback({ progress: 2, total:
|
|
70
|
+
await progressCallback({ progress: 2, total: TOTAL_STEPS, message: 'Locating evaluation workflow template...' });
|
|
66
71
|
}
|
|
67
72
|
if (!cachedTemplateUuid) {
|
|
68
73
|
const template = await client.workflows.findEvaluationTemplate();
|
|
@@ -73,11 +78,30 @@ export async function testPageChangesHandler(input, context, progressCallback) {
|
|
|
73
78
|
cachedTemplateUuid = template.uuid;
|
|
74
79
|
logger.info(`Using workflow template: ${template.name} (${template.uuid})`);
|
|
75
80
|
}
|
|
81
|
+
// --- Resolve project UUID (best-effort, non-blocking) ---
|
|
82
|
+
if (!cachedProjectUuid && config.defaults.repoName) {
|
|
83
|
+
try {
|
|
84
|
+
const project = await client.findProjectByRepoName(config.defaults.repoName);
|
|
85
|
+
if (project) {
|
|
86
|
+
cachedProjectUuid = project.uuid;
|
|
87
|
+
logger.info(`Resolved project: ${project.name} (${project.uuid})`);
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
logger.info(`No project found for repo "${config.defaults.repoName}" — proceeding without project_id`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch (err) {
|
|
94
|
+
logger.warn(`Failed to look up project for repo "${config.defaults.repoName}": ${err}`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
76
97
|
// --- Build context data (targetUrl is the tunnel URL for localhost, original URL otherwise) ---
|
|
77
98
|
const contextData = {
|
|
78
99
|
targetUrl: ctx.targetUrl ?? originalUrl,
|
|
79
100
|
goal: input.description,
|
|
80
101
|
};
|
|
102
|
+
if (cachedProjectUuid) {
|
|
103
|
+
contextData.projectId = cachedProjectUuid;
|
|
104
|
+
}
|
|
81
105
|
// --- Build env (credentials/environment) ---
|
|
82
106
|
const env = {};
|
|
83
107
|
if (input.environmentId)
|
|
@@ -92,59 +116,109 @@ export async function testPageChangesHandler(input, context, progressCallback) {
|
|
|
92
116
|
env.password = input.password;
|
|
93
117
|
// --- Execute ---
|
|
94
118
|
if (progressCallback) {
|
|
95
|
-
await progressCallback({ progress: 3, total:
|
|
119
|
+
await progressCallback({ progress: 3, total: TOTAL_STEPS, message: 'Queuing workflow execution...' });
|
|
96
120
|
}
|
|
97
121
|
const executeResponse = await client.workflows.executeWorkflow(cachedTemplateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
|
|
98
122
|
const executionUuid = executeResponse.executionUuid;
|
|
99
123
|
logger.info(`Execution queued: ${executionUuid}`);
|
|
100
124
|
// --- Poll ---
|
|
101
|
-
//
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
1: 'Browser ready, agent navigating...',
|
|
105
|
-
2: 'Agent evaluating app...',
|
|
106
|
-
3: 'Wrapping up...',
|
|
107
|
-
};
|
|
125
|
+
// Track execution progress via state.stepsTaken from the API.
|
|
126
|
+
// Setup is steps 1-3, execution maps stepsTaken into steps 4-28 (25 slots).
|
|
127
|
+
let lastStepsTaken = 0;
|
|
108
128
|
let lastNodeCount = 0;
|
|
129
|
+
let observedMaxSteps = MAX_EXEC_STEPS;
|
|
109
130
|
const finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
|
|
131
|
+
// Keep the tunnel alive while the workflow is actively running
|
|
132
|
+
if (ctx.tunnelId)
|
|
133
|
+
touchTunnelById(ctx.tunnelId);
|
|
110
134
|
const nodeCount = exec.nodeExecutions?.length ?? 0;
|
|
111
|
-
|
|
135
|
+
const stepsTaken = exec.state?.stepsTaken ?? 0;
|
|
136
|
+
if (nodeCount !== lastNodeCount || stepsTaken !== lastStepsTaken || exec.status !== 'pending') {
|
|
112
137
|
lastNodeCount = nodeCount;
|
|
113
|
-
|
|
138
|
+
lastStepsTaken = stepsTaken;
|
|
139
|
+
logger.info(`Execution status: ${exec.status}, nodes: ${nodeCount}, steps: ${stepsTaken}`);
|
|
114
140
|
}
|
|
115
141
|
if (progressCallback) {
|
|
116
|
-
//
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
142
|
+
// If we see steps > our assumed max, bump our ceiling so progress never goes backwards
|
|
143
|
+
if (stepsTaken > observedMaxSteps) {
|
|
144
|
+
observedMaxSteps = stepsTaken + 5;
|
|
145
|
+
}
|
|
146
|
+
// Map stepsTaken (0..observedMaxSteps) into progress (SETUP_STEPS+1 .. TOTAL_STEPS-1)
|
|
147
|
+
// Reserve the last tick for the "Complete" message
|
|
148
|
+
let execProgress;
|
|
149
|
+
if (stepsTaken > 0) {
|
|
150
|
+
execProgress = SETUP_STEPS + Math.round((stepsTaken / observedMaxSteps) * (MAX_EXEC_STEPS - 1));
|
|
151
|
+
}
|
|
152
|
+
else {
|
|
153
|
+
// No steps yet — show we're past setup but execution is starting
|
|
154
|
+
execProgress = SETUP_STEPS + 1;
|
|
155
|
+
}
|
|
156
|
+
execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
|
|
157
|
+
let message;
|
|
158
|
+
if (exec.status === 'running') {
|
|
159
|
+
if (stepsTaken > 0) {
|
|
160
|
+
message = `Agent evaluating app... (step ${stepsTaken})`;
|
|
161
|
+
}
|
|
162
|
+
else if (nodeCount === 0) {
|
|
163
|
+
message = 'Browser agent starting up...';
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
message = 'Browser ready, agent navigating...';
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
message = exec.status;
|
|
171
|
+
}
|
|
172
|
+
await progressCallback({ progress: execProgress, total: TOTAL_STEPS, message });
|
|
122
173
|
}
|
|
123
174
|
}, abortController.signal);
|
|
124
175
|
const duration = Date.now() - startTime;
|
|
125
176
|
// --- Format result ---
|
|
126
177
|
const outcome = finalExecution.state?.outcome ?? finalExecution.status;
|
|
127
|
-
const
|
|
128
|
-
//
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
178
|
+
const nodes = finalExecution.nodeExecutions ?? [];
|
|
179
|
+
// Extract step-by-step action trace from brain.step nodes
|
|
180
|
+
const brainSteps = nodes
|
|
181
|
+
.filter(n => n.nodeType === 'brain.step' && n.outputData?.decision)
|
|
182
|
+
.sort((a, b) => a.executionOrder - b.executionOrder);
|
|
183
|
+
const actionTrace = brainSteps.map((n, i) => {
|
|
184
|
+
const d = n.outputData.decision;
|
|
185
|
+
return {
|
|
186
|
+
step: i + 1,
|
|
187
|
+
action: d.actionType ?? d.action_type,
|
|
188
|
+
intent: d.intent,
|
|
189
|
+
target: d.target,
|
|
190
|
+
value: d.value ?? undefined,
|
|
191
|
+
success: n.outputData.success ?? n.status === 'success',
|
|
192
|
+
durationMs: n.executionTimeMs,
|
|
193
|
+
};
|
|
138
194
|
});
|
|
195
|
+
// Extract evaluation from brain.evaluate node
|
|
196
|
+
const evalNode = nodes.find(n => n.nodeType === 'brain.evaluate');
|
|
197
|
+
const evaluation = evalNode?.outputData ? {
|
|
198
|
+
passed: evalNode.outputData.passed,
|
|
199
|
+
outcome: evalNode.outputData.outcome,
|
|
200
|
+
reason: evalNode.outputData.reason,
|
|
201
|
+
verifications: evalNode.outputData.verifications,
|
|
202
|
+
} : undefined;
|
|
203
|
+
// Also check for surfer.execute_task (older workflow graphs)
|
|
204
|
+
const surferNode = nodes.find(n => n.nodeType === 'surfer.execute_task');
|
|
139
205
|
const responsePayload = {
|
|
140
206
|
outcome,
|
|
141
207
|
success: finalExecution.state?.success ?? false,
|
|
142
208
|
status: finalExecution.status,
|
|
143
|
-
stepsTaken: finalExecution.state?.stepsTaken ??
|
|
209
|
+
stepsTaken: finalExecution.state?.stepsTaken ?? actionTrace.length ?? 0,
|
|
144
210
|
targetUrl: originalUrl,
|
|
145
211
|
executionId: executionUuid,
|
|
146
212
|
durationMs: finalExecution.durationMs ?? duration,
|
|
147
213
|
};
|
|
214
|
+
// The step-by-step action trace — what the browser agent did and why
|
|
215
|
+
if (actionTrace.length > 0) {
|
|
216
|
+
responsePayload.actionTrace = actionTrace;
|
|
217
|
+
}
|
|
218
|
+
// The final evaluation — pass/fail with reasoning
|
|
219
|
+
if (evaluation) {
|
|
220
|
+
responsePayload.evaluation = evaluation;
|
|
221
|
+
}
|
|
148
222
|
if (finalExecution.state?.error)
|
|
149
223
|
responsePayload.agentError = finalExecution.state.error;
|
|
150
224
|
if (finalExecution.errorMessage)
|
|
@@ -160,7 +234,7 @@ export async function testPageChangesHandler(input, context, progressCallback) {
|
|
|
160
234
|
}
|
|
161
235
|
logger.toolComplete('check_app_in_browser', duration);
|
|
162
236
|
if (progressCallback) {
|
|
163
|
-
await progressCallback({ progress:
|
|
237
|
+
await progressCallback({ progress: TOTAL_STEPS, total: TOTAL_STEPS, message: `Complete: ${outcome}` });
|
|
164
238
|
}
|
|
165
239
|
const content = [
|
|
166
240
|
{ type: 'text', text: JSON.stringify(responsePayload, null, 2) },
|
package/dist/services/index.js
CHANGED
|
@@ -46,6 +46,29 @@ export class DebuggAIServerClient {
|
|
|
46
46
|
this.workflows = createWorkflowsService(this.tx);
|
|
47
47
|
this.tunnels = createTunnelsService(this.tx);
|
|
48
48
|
}
|
|
49
|
+
/**
|
|
50
|
+
* Look up a project by repo name. Uses ?search= then client-side filters
|
|
51
|
+
* on repo.name (which is "owner/repo-name" format).
|
|
52
|
+
* Returns the first match or null.
|
|
53
|
+
*/
|
|
54
|
+
async findProjectByRepoName(repoName) {
|
|
55
|
+
if (!this.tx)
|
|
56
|
+
throw new Error('Client not initialized — call init() first');
|
|
57
|
+
const response = await this.tx.get('api/v1/projects/', { search: repoName });
|
|
58
|
+
const projects = response?.results ?? [];
|
|
59
|
+
if (projects.length === 0)
|
|
60
|
+
return null;
|
|
61
|
+
// Exact match on project name or slug first
|
|
62
|
+
const exact = projects.find(p => p.name === repoName || p.slug === repoName);
|
|
63
|
+
if (exact)
|
|
64
|
+
return exact;
|
|
65
|
+
// Match on repo.name (owner/repo-name — check if it ends with /repoName)
|
|
66
|
+
const repoMatch = projects.find(p => p.repo?.name === repoName || p.repo?.name?.endsWith(`/${repoName}`));
|
|
67
|
+
if (repoMatch)
|
|
68
|
+
return repoMatch;
|
|
69
|
+
// Fallback to first result from search
|
|
70
|
+
return projects[0];
|
|
71
|
+
}
|
|
49
72
|
/**
|
|
50
73
|
* Revoke an ngrok API key by its key ID.
|
|
51
74
|
* Call this after workflow execution completes to clean up the short-lived key.
|
|
@@ -33,6 +33,13 @@ async function getNgrok() {
|
|
|
33
33
|
}
|
|
34
34
|
return ngrokModule;
|
|
35
35
|
}
|
|
36
|
+
/**
|
|
37
|
+
* Reset the cached ngrok module so the next connect() bootstraps a fresh agent.
|
|
38
|
+
* Called when the last owned tunnel is disconnected and the agent process may have died.
|
|
39
|
+
*/
|
|
40
|
+
function resetNgrokModule() {
|
|
41
|
+
ngrokModule = null;
|
|
42
|
+
}
|
|
36
43
|
const logger = new Logger({ module: 'tunnelManager' });
|
|
37
44
|
// ── TunnelManager ─────────────────────────────────────────────────────────────
|
|
38
45
|
class TunnelManager {
|
|
@@ -149,6 +156,14 @@ class TunnelManager {
|
|
|
149
156
|
catch (error) {
|
|
150
157
|
logger.warn(`ngrok.disconnect failed for tunnel ${tunnelId} (already cleaned up):`, error);
|
|
151
158
|
}
|
|
159
|
+
// If no owned tunnels remain, the ngrok agent process may have exited.
|
|
160
|
+
// Reset module + init state so the next connect() bootstraps a fresh agent.
|
|
161
|
+
const hasOwnedTunnels = Array.from(this.activeTunnels.values()).some(t => t.isOwned);
|
|
162
|
+
if (!hasOwnedTunnels) {
|
|
163
|
+
logger.info('No owned tunnels remain — resetting ngrok module for fresh init on next request');
|
|
164
|
+
resetNgrokModule();
|
|
165
|
+
this.initialized = false;
|
|
166
|
+
}
|
|
152
167
|
if (tunnelInfo.revokeKey) {
|
|
153
168
|
tunnelInfo.revokeKey().catch((err) => logger.warn(`Failed to revoke key for tunnel ${tunnelId}:`, err));
|
|
154
169
|
}
|
|
@@ -251,16 +266,40 @@ class TunnelManager {
|
|
|
251
266
|
else {
|
|
252
267
|
localAddr = inDocker ? `${dockerHost}:${port}` : port;
|
|
253
268
|
}
|
|
269
|
+
const connectWithRetry = async () => {
|
|
270
|
+
try {
|
|
271
|
+
const ngrok = await getNgrok();
|
|
272
|
+
const url = await ngrok.connect({
|
|
273
|
+
proto: 'http',
|
|
274
|
+
addr: localAddr,
|
|
275
|
+
hostname: tunnelDomain,
|
|
276
|
+
authtoken: authToken,
|
|
277
|
+
});
|
|
278
|
+
if (!url)
|
|
279
|
+
throw new Error('ngrok.connect() returned empty URL');
|
|
280
|
+
return url;
|
|
281
|
+
}
|
|
282
|
+
catch (firstError) {
|
|
283
|
+
// The ngrok agent process may have died after a previous disconnect.
|
|
284
|
+
// Reset module state and retry once with a fresh agent.
|
|
285
|
+
logger.warn(`ngrok.connect() failed, retrying with fresh agent: ${firstError}`);
|
|
286
|
+
resetNgrokModule();
|
|
287
|
+
this.initialized = false;
|
|
288
|
+
await this.ensureInitialized();
|
|
289
|
+
const ngrok = await getNgrok();
|
|
290
|
+
const url = await ngrok.connect({
|
|
291
|
+
proto: 'http',
|
|
292
|
+
addr: localAddr,
|
|
293
|
+
hostname: tunnelDomain,
|
|
294
|
+
authtoken: authToken,
|
|
295
|
+
});
|
|
296
|
+
if (!url)
|
|
297
|
+
throw new Error('ngrok.connect() returned empty URL after retry');
|
|
298
|
+
return url;
|
|
299
|
+
}
|
|
300
|
+
};
|
|
254
301
|
try {
|
|
255
|
-
const
|
|
256
|
-
const tunnelUrl = await ngrok.connect({
|
|
257
|
-
proto: 'http',
|
|
258
|
-
addr: localAddr,
|
|
259
|
-
hostname: tunnelDomain,
|
|
260
|
-
authtoken: authToken,
|
|
261
|
-
});
|
|
262
|
-
if (!tunnelUrl)
|
|
263
|
-
throw new Error('ngrok.connect() returned empty URL');
|
|
302
|
+
const tunnelUrl = await connectWithRetry();
|
|
264
303
|
const publicUrl = generateTunnelUrl(originalUrl, tunnelId);
|
|
265
304
|
const now = Date.now();
|
|
266
305
|
const tunnelInfo = {
|
|
@@ -71,6 +71,15 @@ export async function releaseTunnel(ctx) {
|
|
|
71
71
|
await tunnelManager.stopTunnel(ctx.tunnelId);
|
|
72
72
|
}
|
|
73
73
|
}
|
|
74
|
+
/**
|
|
75
|
+
* Touch a tunnel's timer by ID to prevent auto-shutoff during active use.
|
|
76
|
+
* Safe to call with undefined (no-op).
|
|
77
|
+
*/
|
|
78
|
+
export function touchTunnelById(tunnelId) {
|
|
79
|
+
if (tunnelId) {
|
|
80
|
+
tunnelManager.touchTunnel(tunnelId);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
74
83
|
// ─── Response sanitization ───────────────────────────────────────────────────
|
|
75
84
|
/**
|
|
76
85
|
* Replace any tunnel URLs in a backend response with the original localhost origin.
|