@debugg-ai/debugg-ai-mcp 2.4.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -1
- package/dist/handlers/index.js +1 -0
- package/dist/handlers/probePageHandler.js +275 -0
- package/dist/handlers/searchEnvironmentsHandler.js +12 -2
- package/dist/handlers/testPageChangesHandler.js +149 -70
- package/dist/handlers/triggerCrawlHandler.js +65 -21
- package/dist/services/ngrok/tunnelManager.js +46 -7
- package/dist/services/ngrok/tunnelRegistry.js +39 -5
- package/dist/services/ngrok/types.js +0 -1
- package/dist/tools/index.js +3 -0
- package/dist/tools/probePage.js +89 -0
- package/dist/types/index.js +17 -0
- package/dist/utils/errors.js +0 -1
- package/dist/utils/harSummarizer.js +105 -0
- package/dist/utils/projectAnalyzer.js +2 -2
- package/dist/utils/telemetry.js +1 -0
- package/dist/utils/transientErrors.js +82 -0
- package/dist/utils/urlParser.js +1 -1
- package/dist/utils/validation.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -34,7 +34,7 @@ docker run -i --rm --init -e DEBUGGAI_API_KEY=your_api_key quinnosha/debugg-ai-m
|
|
|
34
34
|
|
|
35
35
|
## Tools
|
|
36
36
|
|
|
37
|
-
The server exposes **
|
|
37
|
+
The server exposes **12** tools grouped into Browser (3), Search (3), Projects (3), and Environments (3). The headline tools are `check_app_in_browser` (full AI agent) and `probe_page` (lightweight no-LLM page probe); the rest manage projects, environments + their credentials, and execution history through a uniform `search_*` + CRUD pattern.
|
|
38
38
|
|
|
39
39
|
### Browser
|
|
40
40
|
|
|
@@ -75,6 +75,26 @@ URLs are short-lived presigned S3 — refetch the parent execution via `search_e
|
|
|
75
75
|
|
|
76
76
|
Fires a server-side browser-agent crawl to populate the project's knowledge graph. Localhost URLs tunnel automatically. Returns `{executionId, status, targetUrl, durationMs, outcome?, crawlSummary?, knowledgeGraph?, browserSession?}` with `knowledgeGraph.imported === true` on successful ingestion. The `browserSession` block (HAR + console-log URLs, same shape as above) is also present on completed crawls.
|
|
77
77
|
|
|
78
|
+
#### `probe_page`
|
|
79
|
+
|
|
80
|
+
**Lightweight no-LLM batch page probe.** Pass 1-20 URLs; each navigates, waits for load, and returns rendered state — screenshot + page metadata + structured console errors + network summary. No agent loop, no LLM cost, no scenario assertions. Use it for "did I just break /settings?", multi-route smoke after a refactor, CI per-PR sweeps, and quick is-it-up checks where `check_app_in_browser`'s 60-150s agent loop is overkill.
|
|
81
|
+
|
|
82
|
+
| Parameter | Type | Description |
|
|
83
|
+
|-----------|------|-------------|
|
|
84
|
+
| `targets` | array **required** | 1-20 entries: `[{url, waitForSelector?, waitForLoadState?, timeoutMs?}]` |
|
|
85
|
+
| `targets[].url` | string **required** | Public URL or localhost (auto-tunneled) |
|
|
86
|
+
| `targets[].waitForLoadState` | enum | `'load'` (default) / `'domcontentloaded'` / `'networkidle'` |
|
|
87
|
+
| `targets[].waitForSelector` | string | Optional CSS selector to wait for after navigation |
|
|
88
|
+
| `targets[].timeoutMs` | number | Per-URL timeout, 1000-30000 (default 10000) |
|
|
89
|
+
| `includeHtml` | boolean | Return raw HTML in each result (default false) |
|
|
90
|
+
| `captureScreenshots` | boolean | Return one PNG per target (default true) |
|
|
91
|
+
|
|
92
|
+
The whole batch shares a single backend execution + browser session + tunnel — 5 URLs in one call is dramatically faster than 5 parallel single-URL calls. Per-URL `error` field preserves batch resilience: a single failed target doesn't fail the others.
|
|
93
|
+
|
|
94
|
+
**`networkSummary` aggregation key is `origin + pathname`** — refetch loops (`?n=0..4` repeatedly hitting the same endpoint) collapse into a single entry with the count, so `/api/poll` showing up with `count: 47` is the actionable "infinite refetch loop" signal users originally asked for.
|
|
95
|
+
|
|
96
|
+
Performance budget: <10s for 1 URL, <25s for 20. Localhost dead-port returns `LocalServerUnreachable` in <2s without burning a workflow execution.
|
|
97
|
+
|
|
78
98
|
### Search (dual-mode: uuid detail OR filtered list)
|
|
79
99
|
|
|
80
100
|
Each `search_*` tool has two modes. Pass `{uuid}` for a single-record detail response. Pass filter params for a paginated summary list. 404 from the backend surfaces as `isError: true` with `{error: 'NotFound', message, uuid}`.
|
package/dist/handlers/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export * from './testPageChangesHandler.js';
|
|
2
2
|
export * from './triggerCrawlHandler.js';
|
|
3
|
+
export * from './probePageHandler.js';
|
|
3
4
|
export * from './searchProjectsHandler.js';
|
|
4
5
|
export * from './searchEnvironmentsHandler.js';
|
|
5
6
|
export * from './searchExecutionsHandler.js';
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* probePageHandler — lightweight no-LLM batch page probe.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors triggerCrawlHandler's 4-step pattern (find template → execute →
|
|
5
|
+
* poll → format response) but: (a) takes a list of targets and produces a
|
|
6
|
+
* list of results, (b) does no agent steps (zero LLM in critical path),
|
|
7
|
+
* (c) MCP-side aggregates per-target HAR slices into NetworkSummary[].
|
|
8
|
+
*
|
|
9
|
+
* The backend "Page Probe" workflow template runs:
|
|
10
|
+
* browser.setup → loop[targets](page.navigate → page.capture) → done
|
|
11
|
+
*
|
|
12
|
+
* Each page.capture node emits per-iteration outputData with consoleSlice
|
|
13
|
+
* + harSlice windowed to that URL's load span — that's what makes per-URL
|
|
14
|
+
* networkSummary attribution accurate.
|
|
15
|
+
*/
|
|
16
|
+
import { config } from '../config/index.js';
|
|
17
|
+
import { Logger } from '../utils/logger.js';
|
|
18
|
+
import { handleExternalServiceError } from '../utils/errors.js';
|
|
19
|
+
import { imageContentBlock } from '../utils/imageUtils.js';
|
|
20
|
+
import { DebuggAIServerClient } from '../services/index.js';
|
|
21
|
+
import { TunnelProvisionError } from '../services/tunnels.js';
|
|
22
|
+
import { tunnelManager } from '../services/ngrok/tunnelManager.js';
|
|
23
|
+
import { probeLocalPort, probeTunnelHealth } from '../utils/localReachability.js';
|
|
24
|
+
import { extractLocalhostPort } from '../utils/urlParser.js';
|
|
25
|
+
import { buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, touchTunnelById, } from '../utils/tunnelContext.js';
|
|
26
|
+
import { getCachedTemplateUuid, invalidateTemplateCache } from '../utils/handlerCaches.js';
|
|
27
|
+
import { summarizeHar, summarizeConsole } from '../utils/harSummarizer.js';
|
|
28
|
+
const logger = new Logger({ module: 'probePageHandler' });
|
|
29
|
+
const TEMPLATE_KEYWORD = 'page probe';
|
|
30
|
+
export async function probePageHandler(input, context, rawProgressCallback) {
|
|
31
|
+
const startTime = Date.now();
|
|
32
|
+
logger.toolStart('probe_page', input);
|
|
33
|
+
// Bead 0bq: progress circuit-breaker — see testPageChangesHandler for rationale.
|
|
34
|
+
let progressDisabled = false;
|
|
35
|
+
const progressCallback = rawProgressCallback
|
|
36
|
+
? async (update) => {
|
|
37
|
+
if (progressDisabled)
|
|
38
|
+
return;
|
|
39
|
+
try {
|
|
40
|
+
await rawProgressCallback(update);
|
|
41
|
+
}
|
|
42
|
+
catch (err) {
|
|
43
|
+
progressDisabled = true;
|
|
44
|
+
logger.warn('Progress emission failed; disabling further emissions for this request', {
|
|
45
|
+
error: err instanceof Error ? err.message : String(err),
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
: undefined;
|
|
50
|
+
const client = new DebuggAIServerClient(config.api.key);
|
|
51
|
+
await client.init();
|
|
52
|
+
const abortController = new AbortController();
|
|
53
|
+
const onStdinClose = () => {
|
|
54
|
+
abortController.abort();
|
|
55
|
+
progressDisabled = true;
|
|
56
|
+
};
|
|
57
|
+
process.stdin.once('close', onStdinClose);
|
|
58
|
+
// Per-target tunnel contexts. Index aligns with input.targets[].
|
|
59
|
+
const targetContexts = [];
|
|
60
|
+
// Tunnel keys we provisioned this call (for cleanup if creation fails after key acquired).
|
|
61
|
+
const acquiredKeyIds = [];
|
|
62
|
+
// Progress budget: 1 pre-flight + 1 template + 1 execute + N per-target captures + 1 done
|
|
63
|
+
const TOTAL_STEPS = 3 + input.targets.length + 1;
|
|
64
|
+
let progressStep = 0;
|
|
65
|
+
try {
|
|
66
|
+
if (progressCallback) {
|
|
67
|
+
await progressCallback({ progress: ++progressStep, total: TOTAL_STEPS, message: `Pre-flight + tunnel setup (${input.targets.length} target${input.targets.length === 1 ? '' : 's'})...` });
|
|
68
|
+
}
|
|
69
|
+
// ── Per-target pre-flight + tunnel resolution ──────────────────────────
|
|
70
|
+
for (const target of input.targets) {
|
|
71
|
+
const ctx = buildContext(target.url);
|
|
72
|
+
if (ctx.isLocalhost) {
|
|
73
|
+
// Pre-flight TCP probe: fail fast if dev server isn't listening.
|
|
74
|
+
const port = extractLocalhostPort(ctx.originalUrl);
|
|
75
|
+
if (typeof port === 'number') {
|
|
76
|
+
const probe = await probeLocalPort(port);
|
|
77
|
+
if (!probe.reachable) {
|
|
78
|
+
const payload = {
|
|
79
|
+
error: 'LocalServerUnreachable',
|
|
80
|
+
message: `No server listening on 127.0.0.1:${port}. Start your dev server on that port before running probe_page. Probe result: ${probe.code} (${probe.detail ?? 'no detail'}).`,
|
|
81
|
+
detail: {
|
|
82
|
+
port,
|
|
83
|
+
probeCode: probe.code,
|
|
84
|
+
probeDetail: probe.detail,
|
|
85
|
+
elapsedMs: probe.elapsedMs,
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
logger.warn(`Pre-flight port probe failed for ${ctx.originalUrl}: ${probe.code} in ${probe.elapsedMs}ms`);
|
|
89
|
+
return { content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }], isError: true };
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// Reuse existing tunnel for this port if any; otherwise provision.
|
|
93
|
+
const reused = findExistingTunnel(ctx);
|
|
94
|
+
if (reused) {
|
|
95
|
+
targetContexts.push(reused);
|
|
96
|
+
}
|
|
97
|
+
else {
|
|
98
|
+
let tunnel;
|
|
99
|
+
try {
|
|
100
|
+
tunnel = await client.tunnels.provisionWithRetry();
|
|
101
|
+
}
|
|
102
|
+
catch (provisionError) {
|
|
103
|
+
const msg = provisionError instanceof Error ? provisionError.message : String(provisionError);
|
|
104
|
+
const diag = provisionError instanceof TunnelProvisionError ? ` ${provisionError.diagnosticSuffix()}` : '';
|
|
105
|
+
throw new Error(`Failed to provision tunnel for ${ctx.originalUrl}. ` +
|
|
106
|
+
`(Detail: ${msg})${diag}`);
|
|
107
|
+
}
|
|
108
|
+
acquiredKeyIds.push(tunnel.keyId);
|
|
109
|
+
let tunneled;
|
|
110
|
+
try {
|
|
111
|
+
tunneled = await ensureTunnel(ctx, tunnel.tunnelKey, tunnel.tunnelId, tunnel.keyId, () => client.revokeNgrokKey(tunnel.keyId));
|
|
112
|
+
}
|
|
113
|
+
catch (tunnelError) {
|
|
114
|
+
const msg = tunnelError instanceof Error ? tunnelError.message : String(tunnelError);
|
|
115
|
+
throw new Error(`Tunnel creation failed for ${ctx.originalUrl}. (Detail: ${msg})`);
|
|
116
|
+
}
|
|
117
|
+
// Tunnel health probe: catch the IPv4/IPv6 bind / dead-server case
|
|
118
|
+
// before committing to a full backend execution.
|
|
119
|
+
if (tunneled.targetUrl) {
|
|
120
|
+
const health = await probeTunnelHealth(tunneled.targetUrl);
|
|
121
|
+
if (!health.healthy) {
|
|
122
|
+
const payload = {
|
|
123
|
+
error: 'TunnelTrafficBlocked',
|
|
124
|
+
message: `Tunnel established but traffic isn't reaching the dev server. ${health.detail ?? ''}`,
|
|
125
|
+
detail: {
|
|
126
|
+
code: health.code,
|
|
127
|
+
status: health.status,
|
|
128
|
+
ngrokErrorCode: health.ngrokErrorCode,
|
|
129
|
+
elapsedMs: health.elapsedMs,
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
if (tunneled.tunnelId) {
|
|
133
|
+
tunnelManager.stopTunnel(tunneled.tunnelId).catch((err) => logger.warn(`Failed to stop broken tunnel ${tunneled.tunnelId}: ${err}`));
|
|
134
|
+
}
|
|
135
|
+
return { content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }], isError: true };
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
targetContexts.push(tunneled);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
// Public URL — no tunnel needed.
|
|
143
|
+
targetContexts.push(ctx);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// ── Locate workflow template ───────────────────────────────────────────
|
|
147
|
+
if (progressCallback) {
|
|
148
|
+
await progressCallback({ progress: ++progressStep, total: TOTAL_STEPS, message: 'Locating page-probe workflow template...' });
|
|
149
|
+
}
|
|
150
|
+
const templateUuid = await getCachedTemplateUuid(TEMPLATE_KEYWORD, async (name) => {
|
|
151
|
+
return client.workflows.findTemplateByName(name);
|
|
152
|
+
});
|
|
153
|
+
if (!templateUuid) {
|
|
154
|
+
throw new Error(`Page Probe Workflow Template not found. ` +
|
|
155
|
+
`Ensure the backend has a template matching "${TEMPLATE_KEYWORD}" seeded and accessible.`);
|
|
156
|
+
}
|
|
157
|
+
// ── Build contextData (camelCase; axiosTransport snake_cases on the wire) ──
|
|
158
|
+
const contextData = {
|
|
159
|
+
targets: input.targets.map((t, i) => ({
|
|
160
|
+
url: targetContexts[i].targetUrl ?? t.url,
|
|
161
|
+
waitForSelector: t.waitForSelector,
|
|
162
|
+
waitForLoadState: t.waitForLoadState,
|
|
163
|
+
timeoutMs: t.timeoutMs,
|
|
164
|
+
})),
|
|
165
|
+
includeHtml: input.includeHtml,
|
|
166
|
+
captureScreenshots: input.captureScreenshots,
|
|
167
|
+
};
|
|
168
|
+
// ── Execute ────────────────────────────────────────────────────────────
|
|
169
|
+
if (progressCallback) {
|
|
170
|
+
await progressCallback({ progress: ++progressStep, total: TOTAL_STEPS, message: 'Queuing workflow execution...' });
|
|
171
|
+
}
|
|
172
|
+
const executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData);
|
|
173
|
+
const executionUuid = executeResponse.executionUuid;
|
|
174
|
+
logger.info(`Probe execution queued: ${executionUuid}`);
|
|
175
|
+
// ── Poll ───────────────────────────────────────────────────────────────
|
|
176
|
+
let lastCompleted = -1;
|
|
177
|
+
const finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
|
|
178
|
+
// Keep all active tunnels alive during polling.
|
|
179
|
+
for (const tc of targetContexts) {
|
|
180
|
+
if (tc.tunnelId)
|
|
181
|
+
touchTunnelById(tc.tunnelId);
|
|
182
|
+
}
|
|
183
|
+
if (!progressCallback)
|
|
184
|
+
return;
|
|
185
|
+
const completedNodes = (exec.nodeExecutions ?? []).filter(n => n.nodeType === 'page.capture' && n.status === 'success').length;
|
|
186
|
+
if (completedNodes !== lastCompleted) {
|
|
187
|
+
lastCompleted = completedNodes;
|
|
188
|
+
await progressCallback({
|
|
189
|
+
progress: Math.min(progressStep + completedNodes, TOTAL_STEPS - 1),
|
|
190
|
+
total: TOTAL_STEPS,
|
|
191
|
+
message: `Probed ${completedNodes}/${input.targets.length} target${input.targets.length === 1 ? '' : 's'}...`,
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
}, abortController.signal);
|
|
195
|
+
// ── Format response ────────────────────────────────────────────────────
|
|
196
|
+
const duration = Date.now() - startTime;
|
|
197
|
+
const captureNodes = (finalExecution.nodeExecutions ?? [])
|
|
198
|
+
.filter(n => n.nodeType === 'page.capture')
|
|
199
|
+
.sort((a, b) => a.executionOrder - b.executionOrder);
|
|
200
|
+
const results = [];
|
|
201
|
+
const screenshotBlocks = [];
|
|
202
|
+
for (let i = 0; i < input.targets.length; i++) {
|
|
203
|
+
const target = input.targets[i];
|
|
204
|
+
const node = captureNodes[i];
|
|
205
|
+
const data = node?.outputData ?? {};
|
|
206
|
+
const result = {
|
|
207
|
+
url: target.url, // ORIGINAL caller URL — not the tunneled rewrite
|
|
208
|
+
finalUrl: typeof data.finalUrl === 'string' ? data.finalUrl : (typeof data.url === 'string' ? data.url : target.url),
|
|
209
|
+
statusCode: typeof data.statusCode === 'number' ? data.statusCode : 0,
|
|
210
|
+
title: typeof data.title === 'string' ? data.title : null,
|
|
211
|
+
loadTimeMs: typeof data.loadTimeMs === 'number' ? data.loadTimeMs : 0,
|
|
212
|
+
consoleErrors: summarizeConsole(Array.isArray(data.consoleSlice) ? data.consoleSlice : []),
|
|
213
|
+
networkSummary: summarizeHar(Array.isArray(data.harSlice) ? data.harSlice : []),
|
|
214
|
+
};
|
|
215
|
+
if (input.includeHtml && typeof data.html === 'string') {
|
|
216
|
+
result.html = data.html;
|
|
217
|
+
}
|
|
218
|
+
if (typeof data.error === 'string' && data.error) {
|
|
219
|
+
result.error = data.error;
|
|
220
|
+
}
|
|
221
|
+
results.push(result);
|
|
222
|
+
if (input.captureScreenshots && typeof data.screenshotB64 === 'string' && data.screenshotB64) {
|
|
223
|
+
screenshotBlocks.push(imageContentBlock(data.screenshotB64, 'image/png'));
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
const responsePayload = {
|
|
227
|
+
executionId: executionUuid,
|
|
228
|
+
durationMs: typeof finalExecution.durationMs === 'number' ? finalExecution.durationMs : duration,
|
|
229
|
+
results,
|
|
230
|
+
};
|
|
231
|
+
if (finalExecution.browserSession) {
|
|
232
|
+
responsePayload.browserSession = finalExecution.browserSession;
|
|
233
|
+
}
|
|
234
|
+
// Sanitize ngrok URLs from the entire payload — agent-authored strings in
|
|
235
|
+
// node outputData (titles, HTML, console messages from the page itself)
|
|
236
|
+
// can occasionally contain the tunnel URL; rewrite to the original
|
|
237
|
+
// localhost origin per tunnel context. For multi-localhost batches we
|
|
238
|
+
// run sanitize once per localhost target since each may have its own
|
|
239
|
+
// tunnel↔origin mapping.
|
|
240
|
+
let sanitizedPayload = responsePayload;
|
|
241
|
+
for (const tc of targetContexts) {
|
|
242
|
+
if (tc.isLocalhost) {
|
|
243
|
+
sanitizedPayload = sanitizeResponseUrls(sanitizedPayload, tc);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
logger.toolComplete('probe_page', duration);
|
|
247
|
+
return {
|
|
248
|
+
content: [
|
|
249
|
+
{ type: 'text', text: JSON.stringify(sanitizedPayload, null, 2) },
|
|
250
|
+
...screenshotBlocks,
|
|
251
|
+
],
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
catch (error) {
|
|
255
|
+
const duration = Date.now() - startTime;
|
|
256
|
+
logger.toolError('probe_page', error, duration);
|
|
257
|
+
if (error instanceof Error && (error.message.includes('not found') || error.message.includes('401'))) {
|
|
258
|
+
invalidateTemplateCache();
|
|
259
|
+
}
|
|
260
|
+
throw handleExternalServiceError(error, 'DebuggAI', 'probe_page execution');
|
|
261
|
+
}
|
|
262
|
+
finally {
|
|
263
|
+
process.stdin.removeListener('close', onStdinClose);
|
|
264
|
+
// Tunnels intentionally NOT torn down — reuse pattern (bead vwd) +
|
|
265
|
+
// 55-min idle auto-shutoff. Revoke only orphaned keys (we acquired the
|
|
266
|
+
// key but tunnel creation failed before ensureTunnel completed).
|
|
267
|
+
for (let i = 0; i < acquiredKeyIds.length; i++) {
|
|
268
|
+
const keyId = acquiredKeyIds[i];
|
|
269
|
+
const tc = targetContexts[i];
|
|
270
|
+
if (tc && !tc.tunnelId && keyId) {
|
|
271
|
+
client.revokeNgrokKey(keyId).catch(err => logger.warn(`Failed to revoke unused ngrok key ${keyId}: ${err}`));
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
@@ -61,6 +61,11 @@ export async function searchEnvironmentsHandler(input, _context) {
|
|
|
61
61
|
const client = new DebuggAIServerClient(config.api.key);
|
|
62
62
|
await client.init();
|
|
63
63
|
// ── Resolve projectUuid ──
|
|
64
|
+
// Bead gb4n: when projectUuid is provided directly (caller skips git
|
|
65
|
+
// auto-resolution), `name` and `repoName` are unknown. OMIT those fields
|
|
66
|
+
// rather than emitting nulls — null fields surprised callers and
|
|
67
|
+
// muddied the contract. If a caller needs them, they fetch via
|
|
68
|
+
// search_projects.
|
|
64
69
|
let projectUuid = input.projectUuid;
|
|
65
70
|
let project = null;
|
|
66
71
|
if (!projectUuid) {
|
|
@@ -73,10 +78,15 @@ export async function searchEnvironmentsHandler(input, _context) {
|
|
|
73
78
|
return noProjectResolved(pagination, `No DebuggAI project found for repo "${repoName}". Pass projectUuid explicitly.`);
|
|
74
79
|
}
|
|
75
80
|
projectUuid = resolved.uuid;
|
|
76
|
-
project = { uuid: resolved.uuid
|
|
81
|
+
project = { uuid: resolved.uuid };
|
|
82
|
+
if (resolved.name)
|
|
83
|
+
project.name = resolved.name;
|
|
84
|
+
const rn = resolved.repo?.name ?? repoName;
|
|
85
|
+
if (rn)
|
|
86
|
+
project.repoName = rn;
|
|
77
87
|
}
|
|
78
88
|
else {
|
|
79
|
-
project = { uuid: projectUuid
|
|
89
|
+
project = { uuid: projectUuid };
|
|
80
90
|
}
|
|
81
91
|
// ── uuid mode ──
|
|
82
92
|
if (input.uuid) {
|
|
@@ -15,8 +15,23 @@ import { tunnelManager } from '../services/ngrok/tunnelManager.js';
|
|
|
15
15
|
import { probeLocalPort, probeTunnelHealth } from '../utils/localReachability.js';
|
|
16
16
|
import { extractLocalhostPort } from '../utils/urlParser.js';
|
|
17
17
|
import { getCachedTemplateUuid, getCachedProjectUuid, invalidateTemplateCache, invalidateProjectCache, } from '../utils/handlerCaches.js';
|
|
18
|
+
import { isTransientWorkflowError, transientReasonTag } from '../utils/transientErrors.js';
|
|
19
|
+
import { Telemetry, TelemetryEvents } from '../utils/telemetry.js';
|
|
18
20
|
const logger = new Logger({ module: 'testPageChangesHandler' });
|
|
19
21
|
const TEMPLATE_NAME = 'app evaluation';
|
|
22
|
+
// Bead kbxy: bounded retry on known transient backend signatures (Pydantic
|
|
23
|
+
// JSON parse errors, 502s, ECONNRESETs). Default 1 retry; env-overridable
|
|
24
|
+
// up to 3 to balance reliability vs quota cost. Conservative: only retries
|
|
25
|
+
// on documented transient patterns (utils/transientErrors.ts).
|
|
26
|
+
function getMaxTransientRetries() {
|
|
27
|
+
const raw = process.env.DEBUGGAI_TRANSIENT_RETRIES;
|
|
28
|
+
if (raw === undefined || raw === '')
|
|
29
|
+
return 1;
|
|
30
|
+
const n = parseInt(raw, 10);
|
|
31
|
+
if (!Number.isFinite(n) || n < 0)
|
|
32
|
+
return 1;
|
|
33
|
+
return Math.min(n, 3);
|
|
34
|
+
}
|
|
20
35
|
// Concurrency control — max 2 simultaneous browser checks.
|
|
21
36
|
// Additional requests queue and run when a slot opens.
|
|
22
37
|
const MAX_CONCURRENT = 2;
|
|
@@ -229,88 +244,126 @@ async function testPageChangesHandlerInner(input, context, rawProgressCallback)
|
|
|
229
244
|
if (progressCallback) {
|
|
230
245
|
await progressCallback({ progress: 3, total: TOTAL_STEPS, message: 'Queuing workflow execution...' });
|
|
231
246
|
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
logger.info(`Execution queued: ${executionUuid}`);
|
|
235
|
-
// --- Poll ---
|
|
236
|
-
// Progress phases:
|
|
247
|
+
// --- Execute + Poll (with bounded retry on transient errors, bead kbxy) ---
|
|
248
|
+
// Progress phases (per attempt):
|
|
237
249
|
// 1-3: MCP setup (tunnel, template, queue) — already sent above
|
|
238
250
|
// 4-6: Backend setup (trigger, browser.setup, subworkflow starting)
|
|
239
251
|
// 7-27: Agent steps (mapped from state.stepsTaken)
|
|
240
252
|
// 28: Complete
|
|
241
253
|
const BACKEND_SETUP_END = 6;
|
|
242
|
-
let lastStepsTaken = 0;
|
|
243
|
-
let observedMaxSteps = MAX_EXEC_STEPS;
|
|
244
254
|
const TERMINAL_STATUSES = new Set(['completed', 'failed', 'cancelled']);
|
|
245
|
-
const
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
if (TERMINAL_STATUSES.has(exec.status)) {
|
|
262
|
-
const terminalOutcome = exec.state?.outcome ?? exec.status;
|
|
263
|
-
await progressCallback({
|
|
264
|
-
progress: TOTAL_STEPS,
|
|
265
|
-
total: TOTAL_STEPS,
|
|
266
|
-
message: `Complete: ${terminalOutcome}`,
|
|
255
|
+
const MAX_RETRIES = getMaxTransientRetries();
|
|
256
|
+
let executeResponse;
|
|
257
|
+
let executionUuid = '';
|
|
258
|
+
let finalExecution;
|
|
259
|
+
let attempt = 0;
|
|
260
|
+
while (true) {
|
|
261
|
+
attempt++;
|
|
262
|
+
if (attempt > 1) {
|
|
263
|
+
// Retry path — emit telemetry + progress notification + brief backoff.
|
|
264
|
+
Telemetry.capture(TelemetryEvents.WORKFLOW_TRANSIENT_RETRY, {
|
|
265
|
+
tool: 'check_app_in_browser',
|
|
266
|
+
attempt,
|
|
267
|
+
reason: transientReasonTag(finalExecution),
|
|
268
|
+
previousExecutionId: executionUuid,
|
|
269
|
+
previousErrorMessage: finalExecution?.errorMessage?.slice(0, 200),
|
|
270
|
+
previousStateError: finalExecution?.state?.error?.slice(0, 200),
|
|
267
271
|
});
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
// Agent is actively stepping — map into slots 7..27
|
|
275
|
-
if (stepsTaken > observedMaxSteps)
|
|
276
|
-
observedMaxSteps = stepsTaken + 5;
|
|
277
|
-
const stepSlots = TOTAL_STEPS - BACKEND_SETUP_END - 1; // 21 slots
|
|
278
|
-
execProgress = BACKEND_SETUP_END + Math.max(1, Math.round((stepsTaken / observedMaxSteps) * stepSlots));
|
|
279
|
-
execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
|
|
280
|
-
// Use state.currentAction for the message (backend sends intent + actionType)
|
|
281
|
-
const ca = exec.state?.currentAction;
|
|
282
|
-
if (ca?.intent) {
|
|
283
|
-
const action = ca.actionType ?? ca.action_type ?? 'working';
|
|
284
|
-
message = `Step ${stepsTaken}: [${action}] ${ca.intent}`;
|
|
285
|
-
}
|
|
286
|
-
else {
|
|
287
|
-
message = `Agent evaluating... (step ${stepsTaken})`;
|
|
272
|
+
if (progressCallback) {
|
|
273
|
+
await progressCallback({
|
|
274
|
+
progress: SETUP_STEPS,
|
|
275
|
+
total: TOTAL_STEPS,
|
|
276
|
+
message: `Transient backend error — retrying (attempt ${attempt}/${MAX_RETRIES + 1})...`,
|
|
277
|
+
});
|
|
288
278
|
}
|
|
279
|
+
await new Promise(r => setTimeout(r, 1000 * (attempt - 1)));
|
|
289
280
|
}
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
281
|
+
executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
|
|
282
|
+
executionUuid = executeResponse.executionUuid;
|
|
283
|
+
logger.info(`Execution queued: ${executionUuid}${attempt > 1 ? ` (retry ${attempt - 1}/${MAX_RETRIES})` : ''}`);
|
|
284
|
+
// Closure state — reset PER ATTEMPT so progress numbers don't double-count
|
|
285
|
+
// across retries.
|
|
286
|
+
let lastStepsTaken = 0;
|
|
287
|
+
let observedMaxSteps = MAX_EXEC_STEPS;
|
|
288
|
+
finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
|
|
289
|
+
// Keep the tunnel alive while the workflow is actively running
|
|
290
|
+
if (ctx.tunnelId)
|
|
291
|
+
touchTunnelById(ctx.tunnelId);
|
|
292
|
+
const nodes = exec.nodeExecutions ?? [];
|
|
293
|
+
const stepsTaken = Math.max(nodes.filter(n => n.nodeType === 'brain.step').length, exec.state?.stepsTaken ?? 0);
|
|
294
|
+
if (stepsTaken !== lastStepsTaken) {
|
|
295
|
+
lastStepsTaken = stepsTaken;
|
|
296
|
+
logger.info(`Execution status: ${exec.status}, nodes: ${nodes.length}, steps: ${stepsTaken}`);
|
|
298
297
|
}
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
298
|
+
if (!progressCallback)
|
|
299
|
+
return;
|
|
300
|
+
// Bead 0bq: emit the final "Complete:" progress INSIDE this callback
|
|
301
|
+
// when terminal status is detected. pollExecution will return on the
|
|
302
|
+
// next line (line 183 in services/workflows.ts), so there's no
|
|
303
|
+
// post-pollExecution progress emission that could race the response.
|
|
304
|
+
if (TERMINAL_STATUSES.has(exec.status)) {
|
|
305
|
+
const terminalOutcome = exec.state?.outcome ?? exec.status;
|
|
306
|
+
await progressCallback({
|
|
307
|
+
progress: TOTAL_STEPS,
|
|
308
|
+
total: TOTAL_STEPS,
|
|
309
|
+
message: `Complete: ${terminalOutcome}`,
|
|
310
|
+
});
|
|
311
|
+
return;
|
|
302
312
|
}
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
313
|
+
// --- Compute progress number ---
|
|
314
|
+
let execProgress;
|
|
315
|
+
let message;
|
|
316
|
+
if (stepsTaken > 0) {
|
|
317
|
+
// Agent is actively stepping — map into slots 7..27
|
|
318
|
+
if (stepsTaken > observedMaxSteps)
|
|
319
|
+
observedMaxSteps = stepsTaken + 5;
|
|
320
|
+
const stepSlots = TOTAL_STEPS - BACKEND_SETUP_END - 1; // 21 slots
|
|
321
|
+
execProgress = BACKEND_SETUP_END + Math.max(1, Math.round((stepsTaken / observedMaxSteps) * stepSlots));
|
|
322
|
+
execProgress = Math.min(execProgress, TOTAL_STEPS - 1);
|
|
323
|
+
// Use state.currentAction for the message (backend sends intent + actionType)
|
|
324
|
+
const ca = exec.state?.currentAction;
|
|
325
|
+
if (ca?.intent) {
|
|
326
|
+
const action = ca.actionType ?? ca.action_type ?? 'working';
|
|
327
|
+
message = `Step ${stepsTaken}: [${action}] ${ca.intent}`;
|
|
328
|
+
}
|
|
329
|
+
else {
|
|
330
|
+
message = `Agent evaluating... (step ${stepsTaken})`;
|
|
331
|
+
}
|
|
306
332
|
}
|
|
307
333
|
else {
|
|
308
|
-
|
|
309
|
-
|
|
334
|
+
// No agent steps yet — show backend setup progress from node transitions
|
|
335
|
+
const hasSubworkflow = nodes.some(n => n.nodeType === 'subworkflow.run');
|
|
336
|
+
const hasBrowserSetup = nodes.some(n => n.nodeType === 'browser.setup');
|
|
337
|
+
const browserReady = nodes.some(n => n.nodeType === 'browser.setup' && n.status === 'success');
|
|
338
|
+
if (browserReady || hasSubworkflow) {
|
|
339
|
+
execProgress = BACKEND_SETUP_END;
|
|
340
|
+
message = 'Browser ready, agent starting...';
|
|
341
|
+
}
|
|
342
|
+
else if (hasBrowserSetup) {
|
|
343
|
+
execProgress = SETUP_STEPS + 2;
|
|
344
|
+
message = 'Launching browser...';
|
|
345
|
+
}
|
|
346
|
+
else if (nodes.length > 0) {
|
|
347
|
+
execProgress = SETUP_STEPS + 1;
|
|
348
|
+
message = 'Workflow triggered, preparing...';
|
|
349
|
+
}
|
|
350
|
+
else {
|
|
351
|
+
execProgress = SETUP_STEPS + 1;
|
|
352
|
+
message = 'Waiting for execution to start...';
|
|
353
|
+
}
|
|
310
354
|
}
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
355
|
+
await progressCallback({ progress: execProgress, total: TOTAL_STEPS, message });
|
|
356
|
+
}, abortController.signal);
|
|
357
|
+
// Decide retry vs exit: only retry on documented transient signatures
|
|
358
|
+
// AND while we still have budget. Otherwise break and surface whatever
|
|
359
|
+
// result the agent reached.
|
|
360
|
+
if (attempt > MAX_RETRIES)
|
|
361
|
+
break;
|
|
362
|
+
if (!isTransientWorkflowError(finalExecution))
|
|
363
|
+
break;
|
|
364
|
+
logger.warn(`Transient backend error detected (${transientReasonTag(finalExecution) ?? 'unknown'}) — ` +
|
|
365
|
+
`retrying (attempt ${attempt + 1}/${MAX_RETRIES + 1})`);
|
|
366
|
+
}
|
|
314
367
|
const duration = Date.now() - startTime;
|
|
315
368
|
// --- Format result ---
|
|
316
369
|
const outcome = finalExecution.state?.outcome ?? finalExecution.status;
|
|
@@ -368,15 +421,41 @@ async function testPageChangesHandlerInner(input, context, rawProgressCallback)
|
|
|
368
421
|
reason: sw.error || undefined,
|
|
369
422
|
};
|
|
370
423
|
}
|
|
424
|
+
const stepsTaken = finalExecution.state?.stepsTaken ?? subworkflowNode?.outputData?.stepsTaken ?? actionTrace.length;
|
|
425
|
+
const success = finalExecution.state?.success ?? subworkflowNode?.outputData?.success ?? false;
|
|
371
426
|
const responsePayload = {
|
|
372
427
|
outcome,
|
|
373
|
-
success
|
|
428
|
+
success,
|
|
374
429
|
status: finalExecution.status,
|
|
375
|
-
stepsTaken
|
|
430
|
+
stepsTaken,
|
|
431
|
+
stepsBudget: MAX_EXEC_STEPS, // bead qmdd
|
|
432
|
+
stepsRemaining: Math.max(0, MAX_EXEC_STEPS - (stepsTaken ?? 0)), // bead qmdd
|
|
376
433
|
targetUrl: originalUrl,
|
|
377
434
|
executionId: executionUuid,
|
|
378
435
|
durationMs: finalExecution.durationMs ?? duration,
|
|
379
436
|
};
|
|
437
|
+
// Bead jqmj: failureCategory disambiguates the three meanings of 'fail':
|
|
438
|
+
// 'agent-error' — workflow/infra failure (Pydantic parse error,
|
|
439
|
+
// backend exception, transport issue). Caller's
|
|
440
|
+
// right move: retry-with-backoff.
|
|
441
|
+
// 'assertion-mismatch' — agent ran the scenario but page state didn't
|
|
442
|
+
// match expectations. Caller's right move: fix
|
|
443
|
+
// code or update the test description.
|
|
444
|
+
// ('page-error' is reserved for v2 — needs a structured signal from
|
|
445
|
+
// backend to distinguish from assertion-mismatch reliably; today's
|
|
446
|
+
// inferrable info is too fragile.)
|
|
447
|
+
// Field is OMITTED on success (no failure to categorize).
|
|
448
|
+
if (!success) {
|
|
449
|
+
// state.error is the AGENT's narrative — it can describe assertion
|
|
450
|
+
// failures ("expected heading to contain Welcome") OR infrastructure
|
|
451
|
+
// failures ("Pydantic JSON parse error"). Without a structured signal,
|
|
452
|
+
// we only count it as 'agent-error' when paired with workflow-level
|
|
453
|
+
// failure (status='failed') or transient signature.
|
|
454
|
+
// status='failed' or errorMessage set → workflow-level / transport error.
|
|
455
|
+
const hasInfraFailure = finalExecution.status === 'failed'
|
|
456
|
+
|| !!finalExecution.errorMessage;
|
|
457
|
+
responsePayload.failureCategory = hasInfraFailure ? 'agent-error' : 'assertion-mismatch';
|
|
458
|
+
}
|
|
380
459
|
if (actionTrace.length > 0)
|
|
381
460
|
responsePayload.actionTrace = actionTrace;
|
|
382
461
|
if (evaluation)
|