switchroom 0.13.10 → 0.13.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/bridge/bridge.js +23 -4
- package/telegram-plugin/dist/gateway/gateway.js +51 -74
- package/telegram-plugin/dist/server.js +23 -4
- package/telegram-plugin/gateway/gateway.ts +44 -78
- package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts +82 -0
- package/telegram-plugin/model-unavailable.ts +11 -1
- package/telegram-plugin/operator-events.fixtures.json +14 -24
- package/telegram-plugin/operator-events.ts +11 -2
- package/telegram-plugin/session-tail.ts +71 -4
- package/telegram-plugin/subagent-watcher.ts +13 -20
- package/telegram-plugin/tests/fleet-state-watcher.test.ts +0 -1
- package/telegram-plugin/tests/model-unavailable.test.ts +15 -2
- package/telegram-plugin/tests/operator-events-session-tail.test.ts +53 -2
- package/telegram-plugin/tests/operator-events.test.ts +14 -7
- package/telegram-plugin/tests/subagent-handback-decision.test.ts +112 -0
- package/telegram-plugin/tests/subagent-registry-bugs.test.ts +1 -3
- package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +0 -1
- package/telegram-plugin/tests/subagent-watcher-parent-marker.test.ts +0 -1
- package/telegram-plugin/tests/subagent-watcher-stall-notification.test.ts +1 -4
- package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +0 -1
- package/telegram-plugin/tests/subagent-watcher.test.ts +15 -5
- package/telegram-plugin/tests/turn-flush-safety.test.ts +29 -81
- package/telegram-plugin/turn-flush-safety.ts +23 -53
|
@@ -326,7 +326,17 @@ export function resolveModelUnavailableFromOperatorEvent(
|
|
|
326
326
|
return detectModelUnavailable(detail) ?? { kind: 'quota_exhausted', raw: detail }
|
|
327
327
|
}
|
|
328
328
|
if (ev.kind === 'rate-limited') {
|
|
329
|
-
|
|
329
|
+
// A rate-limited / transient overload is NOT "model unavailable" —
|
|
330
|
+
// it is retryable and Claude Code retries it internally. Escalate
|
|
331
|
+
// to the model-unavailable card ONLY if the detail carries a
|
|
332
|
+
// genuine quota signal (a 4xx that slipped past the classifier
|
|
333
|
+
// with usage-limit wording in its body). A bare overload /
|
|
334
|
+
// rate-limit returns null → the caller renders the calm
|
|
335
|
+
// `rate-limited` card, never the scary "⚠️ Model unavailable" one.
|
|
336
|
+
// Returning `{kind:'overload'}` here is what fired a false
|
|
337
|
+
// model-unavailable card on every transient 529.
|
|
338
|
+
const detected = detectModelUnavailable(detail)
|
|
339
|
+
return detected?.kind === 'quota_exhausted' ? detected : null
|
|
330
340
|
}
|
|
331
341
|
if (ev.kind === 'unknown-5xx') {
|
|
332
342
|
return detectModelUnavailable(detail) ?? { kind: 'overload', raw: detail }
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_comment": "Captured error shapes per OperatorEventKind. Real API keys/IDs have been scrubbed.",
|
|
3
|
-
|
|
4
3
|
"credentials-expired": [
|
|
5
4
|
{
|
|
6
5
|
"_source": "Anthropic API — 401 with authentication_error + expired hint",
|
|
@@ -16,7 +15,6 @@
|
|
|
16
15
|
"message": "OAuth token expired, please re-authenticate to continue"
|
|
17
16
|
}
|
|
18
17
|
],
|
|
19
|
-
|
|
20
18
|
"credentials-invalid": [
|
|
21
19
|
{
|
|
22
20
|
"_source": "Anthropic API — 401 with invalid_api_key",
|
|
@@ -40,7 +38,6 @@
|
|
|
40
38
|
"message": "Invalid API key"
|
|
41
39
|
}
|
|
42
40
|
],
|
|
43
|
-
|
|
44
41
|
"credit-exhausted": [
|
|
45
42
|
{
|
|
46
43
|
"_source": "Anthropic API — 402 credit_balance_too_low",
|
|
@@ -56,23 +53,7 @@
|
|
|
56
53
|
"message": "credit balance insufficient"
|
|
57
54
|
}
|
|
58
55
|
],
|
|
59
|
-
|
|
60
|
-
"quota-exhausted": [
|
|
61
|
-
{
|
|
62
|
-
"_source": "Anthropic API — 529 overloaded_error (Claude Code converts to quota-exhausted)",
|
|
63
|
-
"status": 529,
|
|
64
|
-
"error": {
|
|
65
|
-
"type": "overloaded_error",
|
|
66
|
-
"message": "Overloaded"
|
|
67
|
-
}
|
|
68
|
-
},
|
|
69
|
-
{
|
|
70
|
-
"_source": "Synthetic — set by session-tail after repeated 429 + slot exhaustion",
|
|
71
|
-
"type": "overloaded_error",
|
|
72
|
-
"message": "Service overloaded, usage limits reached"
|
|
73
|
-
}
|
|
74
|
-
],
|
|
75
|
-
|
|
56
|
+
"quota-exhausted": [],
|
|
76
57
|
"rate-limited": [
|
|
77
58
|
{
|
|
78
59
|
"_source": "Anthropic API — 429 rate_limit_error",
|
|
@@ -86,9 +67,21 @@
|
|
|
86
67
|
"_source": "Top-level rate_limit_error",
|
|
87
68
|
"type": "rate_limit_error",
|
|
88
69
|
"message": "rate limit exceeded"
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"_source": "Anthropic API — 529 overloaded_error (transient server capacity → rate-limited, NOT quota)",
|
|
73
|
+
"status": 529,
|
|
74
|
+
"error": {
|
|
75
|
+
"type": "overloaded_error",
|
|
76
|
+
"message": "Overloaded"
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
"_source": "Synthetic — overloaded_error from session-tail (transient → rate-limited, NOT quota)",
|
|
81
|
+
"type": "overloaded_error",
|
|
82
|
+
"message": "Service overloaded, usage limits reached"
|
|
89
83
|
}
|
|
90
84
|
],
|
|
91
|
-
|
|
92
85
|
"agent-crashed": [
|
|
93
86
|
{
|
|
94
87
|
"_source": "Synthetic — emitted by IPC bridge when Claude child exits nonzero",
|
|
@@ -101,7 +94,6 @@
|
|
|
101
94
|
"message": "IPC socket disconnected unexpectedly"
|
|
102
95
|
}
|
|
103
96
|
],
|
|
104
|
-
|
|
105
97
|
"agent-restarted-unexpectedly": [
|
|
106
98
|
{
|
|
107
99
|
"_source": "Synthetic — emitted by gateway boot-banner diff when uptime drops unexpectedly",
|
|
@@ -114,7 +106,6 @@
|
|
|
114
106
|
"message": "systemd unit restarted outside of operator request"
|
|
115
107
|
}
|
|
116
108
|
],
|
|
117
|
-
|
|
118
109
|
"unknown-4xx": [
|
|
119
110
|
{
|
|
120
111
|
"_source": "Novel 4xx not matching any known Anthropic error type",
|
|
@@ -142,7 +133,6 @@
|
|
|
142
133
|
"_value": "something went wrong"
|
|
143
134
|
}
|
|
144
135
|
],
|
|
145
|
-
|
|
146
136
|
"unknown-5xx": [
|
|
147
137
|
{
|
|
148
138
|
"_source": "500 with no recognised type",
|
|
@@ -139,8 +139,17 @@ function classifyInner(raw: unknown): OperatorEventKind {
|
|
|
139
139
|
message.toLowerCase().includes('overloaded_error') ||
|
|
140
140
|
message.toLowerCase().includes('overloaded')
|
|
141
141
|
) {
|
|
142
|
-
// Anthropic overloaded
|
|
143
|
-
|
|
142
|
+
// Anthropic "overloaded" (HTTP 529) is transient SERVER-side
|
|
143
|
+
// capacity pressure — orthogonal to account quota. It is retryable
|
|
144
|
+
// (`x-should-retry: true`) and Claude Code retries it internally.
|
|
145
|
+
// Classifying it `quota-exhausted` fired a false "Model
|
|
146
|
+
// unavailable — quota exhausted" card AND a self-cancelling fleet
|
|
147
|
+
// auto-fallback on every 529 (the active account always probes
|
|
148
|
+
// healthy — nothing is actually exhausted — so the fallback no-ops
|
|
149
|
+
// with "probed healthy / Stale event?"). It is a rate-limit-family
|
|
150
|
+
// transient; failing over to another account does nothing because
|
|
151
|
+
// every account is equally affected.
|
|
152
|
+
return 'rate-limited'
|
|
144
153
|
}
|
|
145
154
|
|
|
146
155
|
// Synthetic kinds (non-Anthropic — set by session-tail or IPC bridge)
|
|
@@ -409,9 +409,37 @@ export function projectSubagentLine(
|
|
|
409
409
|
* Returns null when no actionable error is detected (routine lines).
|
|
410
410
|
* Never throws — delegates to classifyClaudeError's own safety guarantee.
|
|
411
411
|
*/
|
|
412
|
+
/**
|
|
413
|
+
* Extract Claude Code's retry-state annotations from a transcript line.
|
|
414
|
+
* Claude Code writes top-level `retryAttempt` / `maxRetries` on a
|
|
415
|
+
* retried API error (e.g. a 529 it is internally retrying). Used to
|
|
416
|
+
* tell an in-flight retry from an exhausted (terminal) one. Both
|
|
417
|
+
* optional — non-retried errors and older Claude Code versions omit
|
|
418
|
+
* them.
|
|
419
|
+
*/
|
|
420
|
+
function extractRetryState(obj: Record<string, unknown>): {
|
|
421
|
+
retryAttempt: number | null
|
|
422
|
+
maxRetries: number | null
|
|
423
|
+
} {
|
|
424
|
+
return {
|
|
425
|
+
retryAttempt: typeof obj.retryAttempt === 'number' ? obj.retryAttempt : null,
|
|
426
|
+
maxRetries: typeof obj.maxRetries === 'number' ? obj.maxRetries : null,
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
412
430
|
export function detectErrorInTranscriptLine(
|
|
413
431
|
line: string,
|
|
414
|
-
): {
|
|
432
|
+
): {
|
|
433
|
+
kind: OperatorEventKind
|
|
434
|
+
raw: unknown
|
|
435
|
+
detail: string
|
|
436
|
+
/** True for the rate-limit / transient-overload family. */
|
|
437
|
+
transient: boolean
|
|
438
|
+
/** True when the error is final — NOT an in-flight retry. A transient
|
|
439
|
+
* error mid-retry is `transient:true, terminal:false`; the caller
|
|
440
|
+
* suppresses it (no operator card until the failure is terminal). */
|
|
441
|
+
terminal: boolean
|
|
442
|
+
} | null {
|
|
415
443
|
if (!line || line.length > 2 * 1024 * 1024) return null
|
|
416
444
|
let obj: Record<string, unknown>
|
|
417
445
|
try {
|
|
@@ -447,7 +475,16 @@ export function detectErrorInTranscriptLine(
|
|
|
447
475
|
status === 429
|
|
448
476
|
? 'quota-exhausted'
|
|
449
477
|
: classifyClaudeError({ type: errStr, status, message: text })
|
|
450
|
-
|
|
478
|
+
// An `isApiErrorMessage` line is Claude surfacing the failure to the
|
|
479
|
+
// user — terminal by construction (Claude writes this shape only
|
|
480
|
+
// after its own internal retries are exhausted).
|
|
481
|
+
return {
|
|
482
|
+
kind,
|
|
483
|
+
raw: obj,
|
|
484
|
+
detail: text || errStr || 'api error',
|
|
485
|
+
transient: kind === 'rate-limited',
|
|
486
|
+
terminal: true,
|
|
487
|
+
}
|
|
451
488
|
}
|
|
452
489
|
|
|
453
490
|
// Explicit error line types from Claude Code JSONL
|
|
@@ -472,7 +509,23 @@ export function detectErrorInTranscriptLine(
|
|
|
472
509
|
extractDetailMessage(obj) ??
|
|
473
510
|
String(type ?? '')
|
|
474
511
|
|
|
475
|
-
|
|
512
|
+
// Transient = the rate-limit / overload family. For a transient,
|
|
513
|
+
// decide `terminal` from Claude Code's retry annotations: below the
|
|
514
|
+
// cap → still retrying (in-flight); at/above → exhausted. With no
|
|
515
|
+
// retry state, an explicit `type:"api_error"`/`"error"` LINE means
|
|
516
|
+
// Claude surfaced the failure (terminal); an embedded-error object
|
|
517
|
+
// with no retry state is ambiguous → treat as in-flight and suppress
|
|
518
|
+
// (the silence-poke covers a genuinely stuck turn; a false card is
|
|
519
|
+
// the bug we are fixing, a missed ambiguous card costs nothing).
|
|
520
|
+
const transient = kind === 'rate-limited'
|
|
521
|
+
const retry = extractRetryState(obj)
|
|
522
|
+
const terminal = !transient
|
|
523
|
+
? true
|
|
524
|
+
: retry.retryAttempt != null && retry.maxRetries != null
|
|
525
|
+
? retry.retryAttempt >= retry.maxRetries
|
|
526
|
+
: isErrorLine
|
|
527
|
+
|
|
528
|
+
return { kind, raw, detail, transient, terminal }
|
|
476
529
|
}
|
|
477
530
|
|
|
478
531
|
function extractDetailMessage(obj: Record<string, unknown> | null): string | null {
|
|
@@ -514,6 +567,10 @@ export interface TailOperatorEvent {
|
|
|
514
567
|
kind: OperatorEventKind
|
|
515
568
|
detail: string
|
|
516
569
|
raw: unknown
|
|
570
|
+
/** True for the rate-limit / transient-overload family. */
|
|
571
|
+
transient: boolean
|
|
572
|
+
/** True when the failure is final, not an in-flight retry. */
|
|
573
|
+
terminal: boolean
|
|
517
574
|
}
|
|
518
575
|
|
|
519
576
|
export interface SessionTailConfig {
|
|
@@ -665,7 +722,17 @@ export function startSessionTail(config: SessionTailConfig): SessionTailHandle {
|
|
|
665
722
|
try {
|
|
666
723
|
const errEvent = detectErrorInTranscriptLine(line)
|
|
667
724
|
if (errEvent) {
|
|
668
|
-
|
|
725
|
+
// Honest escalation: a transient overload Claude is still
|
|
726
|
+
// retrying (transient && !terminal) posts NO operator
|
|
727
|
+
// card — it almost always resolves on the next retry.
|
|
728
|
+
// Escalate only terminal failures + non-transient errors.
|
|
729
|
+
if (errEvent.terminal || !errEvent.transient) {
|
|
730
|
+
onOperatorEvent(errEvent)
|
|
731
|
+
} else {
|
|
732
|
+
log?.(
|
|
733
|
+
`session-tail: transient overload suppressed (in-flight retry) kind=${errEvent.kind}`,
|
|
734
|
+
)
|
|
735
|
+
}
|
|
669
736
|
}
|
|
670
737
|
} catch (err) {
|
|
671
738
|
log?.(`session-tail: onOperatorEvent threw: ${(err as Error).message}`)
|
|
@@ -146,11 +146,6 @@ export interface SubagentWatcherConfig {
|
|
|
146
146
|
* an agent's home pollutes the watcher with phantom registrations).
|
|
147
147
|
*/
|
|
148
148
|
agentCwd?: string
|
|
149
|
-
/**
|
|
150
|
-
* Send a fresh (non-edit) Telegram message. For stall / completion
|
|
151
|
-
* state-transition notifications.
|
|
152
|
-
*/
|
|
153
|
-
sendNotification: (text: string) => void
|
|
154
149
|
/**
|
|
155
150
|
* How often to re-scan for new subagent dirs (ms). Default 1000.
|
|
156
151
|
*/
|
|
@@ -862,21 +857,19 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
862
857
|
|
|
863
858
|
if (entry.state === 'done' && !entry.completionNotified) {
|
|
864
859
|
entry.completionNotified = true
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
//
|
|
876
|
-
//
|
|
877
|
-
//
|
|
878
|
-
// boot get their `completionNotified=true` shortcut in registerAgent
|
|
879
|
-
// and skip this path entirely — only post-boot transitions fire.
|
|
860
|
+
// Card retired (#1122): the watcher no longer sends a user-facing
|
|
861
|
+
// "✓ Worker done" message. A framework-authored status line is a
|
|
862
|
+
// conversational-pacing anti-pattern, and the heuristic that drove
|
|
863
|
+
// it (silent-stall synthesis) fired on a worker mid-`Bash` as
|
|
864
|
+
// readily as on a finished one. The user-facing handback is the
|
|
865
|
+
// model's own beat-4 reply, woken by Claude Code's native
|
|
866
|
+
// background-task notification. Completion is surfaced here only
|
|
867
|
+
// via the structured `onFinish` callback — emitted before the
|
|
868
|
+
// deferred cleanup runs so the callback always sees a live
|
|
869
|
+
// registry entry. Historical entries that already-completed at
|
|
870
|
+
// boot get their `completionNotified=true` shortcut in
|
|
871
|
+
// registerAgent and skip this path — only post-boot transitions
|
|
872
|
+
// fire.
|
|
880
873
|
if (config.onFinish) {
|
|
881
874
|
try {
|
|
882
875
|
config.onFinish({
|
|
@@ -50,7 +50,6 @@ describe('subagent-watcher: WorkerEntry.lastTool', () => {
|
|
|
50
50
|
const intervals: Array<{ fn: () => void }> = []
|
|
51
51
|
const w = startSubagentWatcher({
|
|
52
52
|
agentDir,
|
|
53
|
-
sendNotification: () => {},
|
|
54
53
|
stallThresholdMs: 60_000,
|
|
55
54
|
rescanMs: 500,
|
|
56
55
|
now: () => Date.now(),
|
|
@@ -247,9 +247,22 @@ describe('resolveModelUnavailableFromOperatorEvent — kind-driven mapping', ()
|
|
|
247
247
|
expect(d?.kind).toBe('quota_exhausted')
|
|
248
248
|
})
|
|
249
249
|
|
|
250
|
-
it('
|
|
250
|
+
it('treats a bare kind=rate-limited as NOT model-unavailable (transient → calm card)', () => {
|
|
251
|
+
// A transient overload / rate-limit is retryable — Claude Code
|
|
252
|
+
// retries it internally. resolveModelUnavailableFromOperatorEvent
|
|
253
|
+
// returns null so the gateway renders the calm `rate-limited` card,
|
|
254
|
+
// never the scary "⚠️ Model unavailable" one. Returning
|
|
255
|
+
// `{kind:'overload'}` here is what fired a false card on every 529.
|
|
251
256
|
const d = resolveModelUnavailableFromOperatorEvent({ kind: 'rate-limited', detail: '' })
|
|
252
|
-
expect(d
|
|
257
|
+
expect(d).toBeNull()
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
it('escalates a kind=rate-limited that carries a genuine quota signal', () => {
|
|
261
|
+
const d = resolveModelUnavailableFromOperatorEvent({
|
|
262
|
+
kind: 'rate-limited',
|
|
263
|
+
detail: "You've hit your limit · resets 8:50am",
|
|
264
|
+
})
|
|
265
|
+
expect(d?.kind).toBe('quota_exhausted')
|
|
253
266
|
})
|
|
254
267
|
|
|
255
268
|
it('always treats kind=unknown-5xx as overload', () => {
|
|
@@ -56,13 +56,64 @@ describe('detectErrorInTranscriptLine — error detection', () => {
|
|
|
56
56
|
expect(result!.kind).toBe('credit-exhausted')
|
|
57
57
|
})
|
|
58
58
|
|
|
59
|
-
it('classifies overloaded_error as quota-exhausted', () => {
|
|
59
|
+
it('classifies overloaded_error as rate-limited (transient), NOT quota-exhausted', () => {
|
|
60
|
+
// A 529 "overloaded" is transient Anthropic server-capacity
|
|
61
|
+
// pressure — orthogonal to account quota. Classifying it
|
|
62
|
+
// quota-exhausted fired a false "Model unavailable" card + a
|
|
63
|
+
// self-cancelling fleet auto-fallback on every 529.
|
|
60
64
|
const line = JSON.stringify({
|
|
61
65
|
type: 'api_error',
|
|
62
66
|
error: { type: 'overloaded_error', message: 'Overloaded' },
|
|
63
67
|
})
|
|
64
68
|
const result = detectErrorInTranscriptLine(line)
|
|
65
|
-
expect(result!.kind).toBe('
|
|
69
|
+
expect(result!.kind).toBe('rate-limited')
|
|
70
|
+
expect(result!.transient).toBe(true)
|
|
71
|
+
// An explicit `type:"api_error"` line (no retry state) = Claude
|
|
72
|
+
// surfaced the failure → terminal.
|
|
73
|
+
expect(result!.terminal).toBe(true)
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
it('marks an in-flight 529 retry transient + NOT terminal (suppressed)', () => {
|
|
77
|
+
// Real on-disk shape: a 529 Claude Code is internally retrying,
|
|
78
|
+
// annotated with retryAttempt < maxRetries.
|
|
79
|
+
const line = JSON.stringify({
|
|
80
|
+
type: 'system',
|
|
81
|
+
subtype: 'api_error',
|
|
82
|
+
error: { status: 529, type: 'overloaded_error', message: 'Overloaded' },
|
|
83
|
+
retryAttempt: 9,
|
|
84
|
+
maxRetries: 10,
|
|
85
|
+
retryInMs: 34479,
|
|
86
|
+
})
|
|
87
|
+
const result = detectErrorInTranscriptLine(line)
|
|
88
|
+
expect(result!.kind).toBe('rate-limited')
|
|
89
|
+
expect(result!.transient).toBe(true)
|
|
90
|
+
// 9 < 10 — still retrying → in-flight → the caller suppresses it.
|
|
91
|
+
expect(result!.terminal).toBe(false)
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
it('marks an exhausted 529 retry terminal (escalates)', () => {
|
|
95
|
+
const line = JSON.stringify({
|
|
96
|
+
type: 'system',
|
|
97
|
+
subtype: 'api_error',
|
|
98
|
+
error: { status: 529, type: 'overloaded_error', message: 'Overloaded' },
|
|
99
|
+
retryAttempt: 10,
|
|
100
|
+
maxRetries: 10,
|
|
101
|
+
})
|
|
102
|
+
const result = detectErrorInTranscriptLine(line)
|
|
103
|
+
expect(result!.kind).toBe('rate-limited')
|
|
104
|
+
expect(result!.transient).toBe(true)
|
|
105
|
+
// retries exhausted → terminal → escalates.
|
|
106
|
+
expect(result!.terminal).toBe(true)
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
it('marks non-transient errors terminal (always escalate)', () => {
|
|
110
|
+
const line = JSON.stringify({
|
|
111
|
+
type: 'api_error',
|
|
112
|
+
error: { type: 'authentication_error', message: 'expired' },
|
|
113
|
+
})
|
|
114
|
+
const result = detectErrorInTranscriptLine(line)
|
|
115
|
+
expect(result!.transient).toBe(false)
|
|
116
|
+
expect(result!.terminal).toBe(true)
|
|
66
117
|
})
|
|
67
118
|
|
|
68
119
|
it('returns null for lines without error field', () => {
|
|
@@ -57,13 +57,20 @@ describe('classifyClaudeError — credit-exhausted fixtures', () => {
|
|
|
57
57
|
}
|
|
58
58
|
})
|
|
59
59
|
|
|
60
|
-
describe('classifyClaudeError — quota-exhausted
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
60
|
+
describe('classifyClaudeError — quota-exhausted', () => {
|
|
61
|
+
// classifyClaudeError is type/code/status-based and intentionally
|
|
62
|
+
// does NOT self-classify quota-exhausted: a genuine subscription
|
|
63
|
+
// usage-limit hit has no reliable Anthropic error TYPE — it is
|
|
64
|
+
// detected from the response TEXT. session-tail's `isApiErrorMessage`
|
|
65
|
+
// 429 branch + the `detectModelUnavailable` text path own quota
|
|
66
|
+
// detection. (`overloaded_error` used to be mapped here — wrongly;
|
|
67
|
+
// a 529 overload is transient server capacity, now `rate-limited`.)
|
|
68
|
+
it('no error TYPE maps to quota-exhausted (the text path owns it)', () => {
|
|
69
|
+
expect(fixtures['quota-exhausted']).toHaveLength(0)
|
|
70
|
+
expect(
|
|
71
|
+
classifyClaudeError({ type: 'overloaded_error', message: 'Overloaded' }),
|
|
72
|
+
).not.toBe('quota-exhausted')
|
|
73
|
+
})
|
|
67
74
|
})
|
|
68
75
|
|
|
69
76
|
describe('classifyClaudeError — rate-limited fixtures', () => {
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression coverage for `decideSubagentHandback` — the gate the
|
|
3
|
+
* gateway's subagent-watcher `onFinish` callback runs to decide whether
|
|
4
|
+
* a finished sub-agent gets a handback turn injected.
|
|
5
|
+
*
|
|
6
|
+
* This is the highest-risk surface of the handback feature (#1650): it
|
|
7
|
+
* injects a fresh turn. Before this suite the decision lived inline in
|
|
8
|
+
* the gateway's `onFinish` closure with no automated test — a refactor
|
|
9
|
+
* that broke the `isBackground` gate would have fired handbacks for
|
|
10
|
+
* foreground sub-agents (double messages) with nothing to catch it.
|
|
11
|
+
* The decision is now a pure function; these cases pin every gate.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, it, expect } from 'vitest'
|
|
15
|
+
import { decideSubagentHandback } from '../gateway/subagent-handback-inbound-builder.js'
|
|
16
|
+
|
|
17
|
+
const FIXED_NOW = 1_700_000_000_000
|
|
18
|
+
|
|
19
|
+
const base = {
|
|
20
|
+
handbackEnvValue: undefined as string | undefined,
|
|
21
|
+
outcome: 'completed' as 'completed' | 'failed' | 'orphan',
|
|
22
|
+
isBackground: true,
|
|
23
|
+
fleetChatId: '777',
|
|
24
|
+
ownerChatId: '999',
|
|
25
|
+
taskDescription: 'Do the thing',
|
|
26
|
+
resultText: 'Done.',
|
|
27
|
+
nowMs: FIXED_NOW,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
describe('decideSubagentHandback', () => {
|
|
31
|
+
it('delivers for a background completed sub-agent', () => {
|
|
32
|
+
const d = decideSubagentHandback({ ...base })
|
|
33
|
+
expect(d.deliver).toBe(true)
|
|
34
|
+
if (d.deliver) {
|
|
35
|
+
expect(d.chatId).toBe('777')
|
|
36
|
+
expect(d.inbound.meta.source).toBe('subagent_handback')
|
|
37
|
+
expect(d.inbound.chatId).toBe('777')
|
|
38
|
+
}
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
it('delivers for a background FAILED sub-agent', () => {
|
|
42
|
+
const d = decideSubagentHandback({ ...base, outcome: 'failed' })
|
|
43
|
+
expect(d.deliver).toBe(true)
|
|
44
|
+
if (d.deliver) expect(d.inbound.meta.outcome).toBe('failed')
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
it('skips a foreground sub-agent (handed back natively in-turn)', () => {
|
|
48
|
+
const d = decideSubagentHandback({ ...base, isBackground: false })
|
|
49
|
+
expect(d).toEqual({ deliver: false, reason: 'foreground' })
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
it("skips an 'orphan' outcome (stale historical-at-boot row)", () => {
|
|
53
|
+
const d = decideSubagentHandback({ ...base, outcome: 'orphan' })
|
|
54
|
+
expect(d).toEqual({ deliver: false, reason: 'outcome-not-terminal' })
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
it('skips when the kill-switch is set (SWITCHROOM_SUBAGENT_HANDBACK=0)', () => {
|
|
58
|
+
const d = decideSubagentHandback({ ...base, handbackEnvValue: '0' })
|
|
59
|
+
expect(d).toEqual({ deliver: false, reason: 'env-disabled' })
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
it('treats any non-"0" env value (incl. undefined) as enabled', () => {
|
|
63
|
+
expect(decideSubagentHandback({ ...base, handbackEnvValue: undefined }).deliver).toBe(true)
|
|
64
|
+
expect(decideSubagentHandback({ ...base, handbackEnvValue: '1' }).deliver).toBe(true)
|
|
65
|
+
expect(decideSubagentHandback({ ...base, handbackEnvValue: '' }).deliver).toBe(true)
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
it('falls back to the owner chat when the fleet entry is gone', () => {
|
|
69
|
+
const d = decideSubagentHandback({ ...base, fleetChatId: '' })
|
|
70
|
+
expect(d.deliver).toBe(true)
|
|
71
|
+
if (d.deliver) {
|
|
72
|
+
expect(d.chatId).toBe('999')
|
|
73
|
+
expect(d.inbound.chatId).toBe('999')
|
|
74
|
+
}
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
it('prefers the fleet chat id over the owner chat when both are present', () => {
|
|
78
|
+
const d = decideSubagentHandback({ ...base, fleetChatId: '777', ownerChatId: '999' })
|
|
79
|
+
expect(d.deliver).toBe(true)
|
|
80
|
+
if (d.deliver) expect(d.chatId).toBe('777')
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('skips when no chat resolves at all', () => {
|
|
84
|
+
const d = decideSubagentHandback({ ...base, fleetChatId: '', ownerChatId: '' })
|
|
85
|
+
expect(d).toEqual({ deliver: false, reason: 'no-chat' })
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
it('gate order: kill-switch wins over every other condition', () => {
|
|
89
|
+
// env-disabled even though it is a deliverable background completion.
|
|
90
|
+
const d = decideSubagentHandback({ ...base, handbackEnvValue: '0', isBackground: true })
|
|
91
|
+
expect(d).toEqual({ deliver: false, reason: 'env-disabled' })
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
it('gate order: outcome filter applies before the foreground check', () => {
|
|
95
|
+
// orphan + foreground — outcome filter is checked first.
|
|
96
|
+
const d = decideSubagentHandback({ ...base, outcome: 'orphan', isBackground: false })
|
|
97
|
+
expect(d).toEqual({ deliver: false, reason: 'outcome-not-terminal' })
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
it('carries the task description and result text into the inbound', () => {
|
|
101
|
+
const d = decideSubagentHandback({
|
|
102
|
+
...base,
|
|
103
|
+
taskDescription: 'Migrate the DB',
|
|
104
|
+
resultText: 'Applied 3 migrations, 0 rows dropped.',
|
|
105
|
+
})
|
|
106
|
+
expect(d.deliver).toBe(true)
|
|
107
|
+
if (d.deliver) {
|
|
108
|
+
expect(d.inbound.text).toContain('Migrate the DB')
|
|
109
|
+
expect(d.inbound.text).toContain('Applied 3 migrations')
|
|
110
|
+
}
|
|
111
|
+
})
|
|
112
|
+
})
|
|
@@ -148,7 +148,6 @@ function makeHarnessWithDb(opts: {
|
|
|
148
148
|
} = opts
|
|
149
149
|
|
|
150
150
|
let currentTime = 10_000
|
|
151
|
-
const notifications: string[] = []
|
|
152
151
|
const logs: string[] = []
|
|
153
152
|
|
|
154
153
|
const fileContents: Map<string, Buffer> = new Map()
|
|
@@ -217,7 +216,6 @@ function makeHarnessWithDb(opts: {
|
|
|
217
216
|
|
|
218
217
|
const watcher = startSubagentWatcher({
|
|
219
218
|
agentDir,
|
|
220
|
-
sendNotification: (text) => notifications.push(text),
|
|
221
219
|
stallThresholdMs,
|
|
222
220
|
// Mirror the active-loop threshold for fixtures with toolCount=0;
|
|
223
221
|
// tests that need the silent-synthesis vs active-loop distinction
|
|
@@ -257,7 +255,7 @@ function makeHarnessWithDb(opts: {
|
|
|
257
255
|
if (pollInterval) pollInterval.fn()
|
|
258
256
|
}
|
|
259
257
|
|
|
260
|
-
return {
|
|
258
|
+
return { logs, advance, poll, watcher, now: () => currentTime, mockFs, fileContents }
|
|
261
259
|
}
|
|
262
260
|
|
|
263
261
|
// ─── Bug 1 — ID mismatch: watcher never bumps last_activity_at ───────────────
|
|
@@ -84,7 +84,6 @@ function makeHarness(opts: {
|
|
|
84
84
|
silentSynthesisStallThresholdMs: configStallThresholdMs,
|
|
85
85
|
silentStallTerminalMs: configSilentStallTerminalMs,
|
|
86
86
|
rescanMs: 500,
|
|
87
|
-
sendNotification: () => {},
|
|
88
87
|
onStall: (_id, idleMs) => stallCalls.push({ idleMs }),
|
|
89
88
|
onStallTerminal: (id) => stallTerminalCalls.push({ agentId: id }),
|
|
90
89
|
onFinish: ({ outcome }) => finishCalls.push({ outcome }),
|
|
@@ -86,7 +86,6 @@ describe('subagent-watcher: parent turn-active marker refresh (#501)', () => {
|
|
|
86
86
|
let nextRef = 1
|
|
87
87
|
const watcher = startSubagentWatcher({
|
|
88
88
|
agentDir: opts.agentDir,
|
|
89
|
-
sendNotification: () => { /* noop */ },
|
|
90
89
|
stallThresholdMs: 60_000,
|
|
91
90
|
rescanMs: 500,
|
|
92
91
|
now: () => Date.now(),
|
|
@@ -26,7 +26,6 @@ function subAgentUserMsg(promptText: string) {
|
|
|
26
26
|
// ─── Harness (mirrors subagent-watcher.test.ts pattern) ──────────────────────
|
|
27
27
|
|
|
28
28
|
interface StallHarness {
|
|
29
|
-
notifications: string[]
|
|
30
29
|
stallCalls: Array<{ agentId: string; idleMs: number; description: string }>
|
|
31
30
|
unstallCalls: Array<{ agentId: string; description: string }>
|
|
32
31
|
logs: string[]
|
|
@@ -55,7 +54,6 @@ function makeStallHarness(opts: {
|
|
|
55
54
|
} = opts
|
|
56
55
|
|
|
57
56
|
let currentTime = 1000
|
|
58
|
-
const notifications: string[] = []
|
|
59
57
|
const stallCalls: Array<{ agentId: string; idleMs: number; description: string }> = []
|
|
60
58
|
const unstallCalls: Array<{ agentId: string; description: string }> = []
|
|
61
59
|
const logs: string[] = []
|
|
@@ -139,7 +137,6 @@ function makeStallHarness(opts: {
|
|
|
139
137
|
// silent-synthesis vs active-loop split.
|
|
140
138
|
silentSynthesisStallThresholdMs: silentSynthesisStallThresholdMs ?? stallThresholdMs,
|
|
141
139
|
rescanMs,
|
|
142
|
-
sendNotification: (text) => notifications.push(text),
|
|
143
140
|
onStall: (id, idle, desc) => stallCalls.push({ agentId: id, idleMs: idle, description: desc }),
|
|
144
141
|
onUnstall: (id, desc) => unstallCalls.push({ agentId: id, description: desc }),
|
|
145
142
|
now: () => currentTime,
|
|
@@ -168,7 +165,7 @@ function makeStallHarness(opts: {
|
|
|
168
165
|
}
|
|
169
166
|
}
|
|
170
167
|
|
|
171
|
-
return {
|
|
168
|
+
return { stallCalls, unstallCalls, logs, advance, watcher, now: () => currentTime, fileContents, jsonlPath }
|
|
172
169
|
}
|
|
173
170
|
|
|
174
171
|
// ─── Tests ────────────────────────────────────────────────────────────────────
|
|
@@ -127,7 +127,6 @@ function makeHarness(opts: {
|
|
|
127
127
|
silentSynthesisStallThresholdMs: stallThresholdMs,
|
|
128
128
|
silentStallTerminalMs,
|
|
129
129
|
rescanMs,
|
|
130
|
-
sendNotification: () => {},
|
|
131
130
|
onStall: (id, idleMs) => stallCalls.push({ agentId: id, idleMs }),
|
|
132
131
|
onUnstall: (id) => unstallCalls.push({ agentId: id }),
|
|
133
132
|
onStallTerminal: (id, desc) => stallTerminalCalls.push({ agentId: id, description: desc }),
|