@debugg-ai/debugg-ai-mcp 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -1
- package/dist/services/ngrok/tunnelManager.js +78 -29
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -10,9 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
10
10
|
### Fixed — tunnel provisioning flakiness surfaces as user-facing errors
|
|
11
11
|
|
|
12
12
|
- `check_app_in_browser` / `trigger_crawl` now automatically retry transient tunnel-provision failures (5xx, 408, 429, network errors like ECONNRESET) with exponential backoff (500ms → 1500ms → 3000ms, 3 attempts). Previously a single ngrok/backend blip forced the caller to manually retry the tool call. Bead `7nx`.
|
|
13
|
+
- **ngrok.connect() retry widened from 2 to 3 attempts** with 500ms / 1500ms backoff. A client still hit "Tunnel setup failed" after `7nx` shipped — the failure was in the ngrok-listener-bringup path, not the backend-provision path. Auth errors still fail fast. Bead `ixh`.
|
|
13
14
|
- Tunnel-provision error messages now carry structured diagnostic context — HTTP status, ngrok error code, backend `x-request-id`, retryable flag — so users have something actionable to file bug reports against instead of opaque "Tunnel setup failed". Bead `5wz`.
|
|
14
15
|
- 4xx auth/quota errors (401/403/404) fail fast without retry to avoid loops against a bad API key.
|
|
15
|
-
- New posthog telemetry event `tunnel.provision_retry` fires per retry attempt with outcome, status, and diagnostic fields so flaky
|
|
16
|
+
- New posthog telemetry event `tunnel.provision_retry` fires per retry attempt with outcome, status, stage (`ngrok_connect` vs backend-provision), and diagnostic fields so flaky rates become measurable.
|
|
16
17
|
|
|
17
18
|
## [2.0.0] - 2026-04-23
|
|
18
19
|
|
|
@@ -48,6 +48,12 @@ class TunnelManager {
|
|
|
48
48
|
pendingTunnels = new Map();
|
|
49
49
|
initialized = false;
|
|
50
50
|
TUNNEL_TIMEOUT_MS = 55 * 60 * 1000;
|
|
51
|
+
/**
|
|
52
|
+
* Backoff schedule (ms) between ngrok.connect() retry attempts. Bead ixh.
|
|
53
|
+
* Exposed on the class so tests can override with short delays without
|
|
54
|
+
* changing the public API or depending on jest fake timers.
|
|
55
|
+
*/
|
|
56
|
+
connectBackoffMs = [500, 1500];
|
|
51
57
|
constructor(reg = getDefaultRegistry()) {
|
|
52
58
|
this.reg = reg;
|
|
53
59
|
}
|
|
@@ -266,37 +272,80 @@ class TunnelManager {
|
|
|
266
272
|
else {
|
|
267
273
|
localAddr = inDocker ? `${dockerHost}:${port}` : port;
|
|
268
274
|
}
|
|
275
|
+
// Bead ixh: 3-attempt retry for ngrok.connect transient failures. Previously
|
|
276
|
+
// only retried ONCE (with agent reset), which is insufficient against real
|
|
277
|
+
// ngrok / network flakes (client-reported incident 2026-04-24).
|
|
278
|
+
// - Attempt 1: fresh connect
|
|
279
|
+
// - Attempt 2: after 500ms backoff, reset the ngrok agent module and retry
|
|
280
|
+
// (existing "agent died" recovery path)
|
|
281
|
+
// - Attempt 3: after 1500ms backoff, retry with the already-reset agent
|
|
282
|
+
// Auth-token errors short-circuit at any attempt — no point looping.
|
|
283
|
+
const self = this;
|
|
269
284
|
const connectWithRetry = async () => {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
285
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
286
|
+
const BACKOFF_MS = self.connectBackoffMs; // bead ixh: test-overridable
|
|
287
|
+
const MAX_ATTEMPTS = BACKOFF_MS.length + 1; // N sleeps between N+1 attempts
|
|
288
|
+
const connectOpts = {
|
|
289
|
+
proto: 'http',
|
|
290
|
+
addr: localAddr,
|
|
291
|
+
hostname: tunnelDomain,
|
|
292
|
+
authtoken: authToken,
|
|
293
|
+
};
|
|
294
|
+
let lastError;
|
|
295
|
+
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
|
|
296
|
+
try {
|
|
297
|
+
const ngrok = await getNgrok();
|
|
298
|
+
const url = await ngrok.connect(connectOpts);
|
|
299
|
+
if (!url)
|
|
300
|
+
throw new Error(`ngrok.connect() returned empty URL (attempt ${attempt})`);
|
|
301
|
+
if (attempt > 1) {
|
|
302
|
+
Telemetry.capture(TelemetryEvents.TUNNEL_PROVISION_RETRY, {
|
|
303
|
+
attempt,
|
|
304
|
+
outcome: 'success',
|
|
305
|
+
stage: 'ngrok_connect',
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
return url;
|
|
309
|
+
}
|
|
310
|
+
catch (err) {
|
|
311
|
+
lastError = err;
|
|
312
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
313
|
+
// Auth-class errors are non-retryable — retrying with the same token
|
|
314
|
+
// would loop. Let the outer catch (line ~437) classify the message.
|
|
315
|
+
if (/authtoken|unauthorized|\b401\b|\b403\b/i.test(msg)) {
|
|
316
|
+
Telemetry.capture(TelemetryEvents.TUNNEL_PROVISION_RETRY, {
|
|
317
|
+
attempt,
|
|
318
|
+
outcome: 'giving-up',
|
|
319
|
+
stage: 'ngrok_connect',
|
|
320
|
+
reason: 'auth-error',
|
|
321
|
+
});
|
|
322
|
+
throw err;
|
|
323
|
+
}
|
|
324
|
+
const isLastAttempt = attempt >= MAX_ATTEMPTS;
|
|
325
|
+
Telemetry.capture(TelemetryEvents.TUNNEL_PROVISION_RETRY, {
|
|
326
|
+
attempt,
|
|
327
|
+
outcome: isLastAttempt ? 'giving-up' : 'will-retry',
|
|
328
|
+
stage: 'ngrok_connect',
|
|
329
|
+
});
|
|
330
|
+
if (isLastAttempt)
|
|
331
|
+
throw err;
|
|
332
|
+
// Between attempt 1→2, do an agent-reset (covers the "agent died"
|
|
333
|
+
// failure mode that used to be the only retried case). Between 2→3,
|
|
334
|
+
// just wait — the reset already happened.
|
|
335
|
+
if (attempt === 1) {
|
|
336
|
+
logger.warn(`ngrok.connect() failed (attempt 1/${MAX_ATTEMPTS}), resetting agent: ${msg}`);
|
|
337
|
+
resetNgrokModule();
|
|
338
|
+
this.initialized = false;
|
|
339
|
+
await this.ensureInitialized();
|
|
340
|
+
}
|
|
341
|
+
else {
|
|
342
|
+
logger.warn(`ngrok.connect() failed (attempt ${attempt}/${MAX_ATTEMPTS}), will retry: ${msg}`);
|
|
343
|
+
}
|
|
344
|
+
await sleep(BACKOFF_MS[attempt - 1] ?? BACKOFF_MS[BACKOFF_MS.length - 1]);
|
|
345
|
+
}
|
|
299
346
|
}
|
|
347
|
+
// Unreachable (loop always returns or throws), but satisfy TS
|
|
348
|
+
throw lastError ?? new Error('connectWithRetry: exhausted attempts without error');
|
|
300
349
|
};
|
|
301
350
|
try {
|
|
302
351
|
const tunnelUrl = await connectWithRetry();
|