switchroom 0.15.45 → 0.16.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +56 -15
- package/dist/auth-broker/index.js +383 -97
- package/dist/cli/autoaccept-poll.js +4842 -35
- package/dist/cli/drive-write-pretool.mjs +7 -4
- package/dist/cli/notion-write-pretool.mjs +35 -4
- package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
- package/dist/cli/self-improve-stop.mjs +428 -0
- package/dist/cli/switchroom.js +2894 -841
- package/dist/host-control/main.js +2685 -207
- package/dist/vault/approvals/kernel-server.js +7453 -7413
- package/dist/vault/broker/server.js +11428 -11388
- package/examples/minimal.yaml +1 -0
- package/examples/switchroom.yaml +1 -0
- package/package.json +3 -3
- package/profiles/_base/start.sh.hbs +97 -1
- package/profiles/_shared/execution-discipline.md.hbs +18 -0
- package/profiles/default/CLAUDE.md.hbs +0 -19
- package/telegram-plugin/.claude-plugin/plugin.json +2 -2
- package/telegram-plugin/answer-stream-flag.ts +12 -49
- package/telegram-plugin/answer-stream.ts +5 -150
- package/telegram-plugin/auth-snapshot-format.ts +280 -48
- package/telegram-plugin/auto-fallback-fleet.ts +44 -1
- package/telegram-plugin/context-exhaustion.ts +12 -0
- package/telegram-plugin/demo-mask.ts +154 -0
- package/telegram-plugin/dist/bridge/bridge.js +55 -12
- package/telegram-plugin/dist/gateway/gateway.js +2938 -977
- package/telegram-plugin/dist/server.js +55 -12
- package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
- package/telegram-plugin/draft-stream.ts +47 -410
- package/telegram-plugin/final-answer-detect.ts +17 -12
- package/telegram-plugin/fleet-fallback-resume.ts +131 -0
- package/telegram-plugin/format.ts +56 -19
- package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
- package/telegram-plugin/gateway/auth-command.ts +70 -14
- package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
- package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
- package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
- package/telegram-plugin/gateway/current-turn-map.ts +188 -0
- package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
- package/telegram-plugin/gateway/effort-command.ts +8 -3
- package/telegram-plugin/gateway/emission-authority.ts +369 -0
- package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
- package/telegram-plugin/gateway/gateway.ts +1857 -292
- package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
- package/telegram-plugin/gateway/model-command.ts +115 -4
- package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
- package/telegram-plugin/gateway/represent-guard.ts +72 -0
- package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
- package/telegram-plugin/gateway/status-surface-log.ts +14 -3
- package/telegram-plugin/history.ts +33 -11
- package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
- package/telegram-plugin/issues-card.ts +4 -0
- package/telegram-plugin/model-unavailable.ts +124 -0
- package/telegram-plugin/narrative-dedup.ts +69 -0
- package/telegram-plugin/over-ping-safety-net.ts +70 -4
- package/telegram-plugin/package.json +3 -3
- package/telegram-plugin/pending-work-progress.ts +12 -0
- package/telegram-plugin/permission-rule.ts +32 -5
- package/telegram-plugin/permission-title.ts +152 -9
- package/telegram-plugin/quota-check.ts +13 -0
- package/telegram-plugin/quota-watch.ts +135 -7
- package/telegram-plugin/registry/turns-schema.test.ts +24 -0
- package/telegram-plugin/registry/turns-schema.ts +9 -0
- package/telegram-plugin/runtime-metrics.ts +13 -0
- package/telegram-plugin/session-tail.ts +96 -11
- package/telegram-plugin/silence-poke.ts +170 -24
- package/telegram-plugin/slot-banner-driver.ts +3 -0
- package/telegram-plugin/status-no-truncate.ts +44 -0
- package/telegram-plugin/status-reactions.ts +20 -3
- package/telegram-plugin/stream-controller.ts +4 -23
- package/telegram-plugin/stream-reply-handler.ts +6 -24
- package/telegram-plugin/streaming-metrics.ts +91 -0
- package/telegram-plugin/subagent-watcher.ts +212 -66
- package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
- package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
- package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
- package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
- package/telegram-plugin/tests/answer-stream.test.ts +2 -411
- package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
- package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
- package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
- package/telegram-plugin/tests/demo-mask.test.ts +127 -0
- package/telegram-plugin/tests/draft-stream.test.ts +0 -827
- package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
- package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
- package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
- package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
- package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
- package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
- package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
- package/telegram-plugin/tests/feed-survival.test.ts +526 -0
- package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
- package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
- package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
- package/telegram-plugin/tests/history.test.ts +60 -0
- package/telegram-plugin/tests/model-command.test.ts +134 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
- package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
- package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
- package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
- package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
- package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
- package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
- package/telegram-plugin/tests/permission-rule.test.ts +17 -0
- package/telegram-plugin/tests/permission-title.test.ts +206 -17
- package/telegram-plugin/tests/quota-watch.test.ts +252 -9
- package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
- package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
- package/telegram-plugin/tests/represent-guard.test.ts +162 -0
- package/telegram-plugin/tests/session-tail.test.ts +147 -3
- package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
- package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
- package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
- package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
- package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
- package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
- package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
- package/telegram-plugin/tests/telegram-format.test.ts +101 -6
- package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
- package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
- package/telegram-plugin/tests/tool-labels.test.ts +67 -0
- package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
- package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
- package/telegram-plugin/tests/welcome-text.test.ts +32 -3
- package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
- package/telegram-plugin/tool-activity-summary.ts +375 -58
- package/telegram-plugin/turn-liveness-floor.ts +240 -0
- package/telegram-plugin/uat/assertions.ts +115 -0
- package/telegram-plugin/uat/driver.ts +68 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
- package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
- package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
- package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
- package/telegram-plugin/welcome-text.ts +13 -1
- package/telegram-plugin/worker-activity-feed.ts +157 -82
- package/telegram-plugin/draft-transport.ts +0 -122
- package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
- package/telegram-plugin/tests/draft-transport.test.ts +0 -211
|
@@ -30,6 +30,13 @@
|
|
|
30
30
|
* IPC call (cheap). `probeQuota` is only called on state-change (when
|
|
31
31
|
* we're going to send a message anyway) to get fresh numbers for the
|
|
32
32
|
* notification body. On no-change polls, only `listState` is called.
|
|
33
|
+
*
|
|
34
|
+
* #2495 Change 3 — the transition-to-alarm probe is `forceLive` (bypasses
|
|
35
|
+
* the broker's probe-on-open TTL), so the DECISION to alarm is corroborated
|
|
36
|
+
* by a TRUE live probe of the affected account, not a possibly-stale cache
|
|
37
|
+
* read. The re-evaluation with fresh numbers can suppress an alarm whose
|
|
38
|
+
* stale-snapshot transition no longer holds. Steady state stays cheap: a
|
|
39
|
+
* no-change poll never probes. Cost is one live probe per transition edge.
|
|
33
40
|
*/
|
|
34
41
|
|
|
35
42
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
@@ -175,6 +182,51 @@ export type QuotaWatchDecision =
|
|
|
175
182
|
}
|
|
176
183
|
| { kind: "skip"; accountLabel: string; reason: string };
|
|
177
184
|
|
|
185
|
+
/**
|
|
186
|
+
* #2495 BLOCKER fix — the corroboration probe result, as the gateway's
|
|
187
|
+
* runQuotaWatch sees it from `brokerClient.probeQuota(..., forceLive=true)`.
|
|
188
|
+
* Structurally a subset of `ProbeQuotaEntry` (src/auth/broker/client.ts): a
|
|
189
|
+
* `result` discriminated on `ok`, plus a `served` tag the broker stamps to
|
|
190
|
+
* say HOW the result was sourced.
|
|
191
|
+
*
|
|
192
|
+
* The trap this guards: under `forceLive`, when the upstream live probe FAILS
|
|
193
|
+
* and the broker holds a prior snapshot, it returns `cachedSnapshotToResult`
|
|
194
|
+
* — `result.ok === true` but `served === "cache"` (server.ts opProbeQuota).
|
|
195
|
+
* A naive `result.ok` check then treats that stale cache read as a live
|
|
196
|
+
* corroboration, fires the alarm, and stamps the false "Live-probe
|
|
197
|
+
* corroborated (#2495)" footnote. The acceptance criterion is the opposite:
|
|
198
|
+
* an alarm must be backed by a LIVE probe, not a stale cache read.
|
|
199
|
+
*/
|
|
200
|
+
export type CorroborationProbe = {
|
|
201
|
+
result: { ok: true } | { ok: false };
|
|
202
|
+
/**
|
|
203
|
+
* How the result was sourced. `"live"` = fresh upstream probe (genuine
|
|
204
|
+
* corroboration). `"cache"` = served from the durable cache (TTL-hit or
|
|
205
|
+
* probe-failure fallback) — NOT corroboration. Absent on legacy responses,
|
|
206
|
+
* which we treat as NOT corroborated (fail-closed: never claim a live
|
|
207
|
+
* corroboration we can't prove).
|
|
208
|
+
*/
|
|
209
|
+
served?: "live" | "cache";
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* #2495 BLOCKER fix — decide whether a forceLive corroboration probe counts
|
|
214
|
+
* as a genuine LIVE corroboration of the alarm.
|
|
215
|
+
*
|
|
216
|
+
* Genuine corroboration requires BOTH `result.ok` AND `served === "live"`.
|
|
217
|
+
* A result that is `ok:true` but `served:"cache"` (the failed-probe
|
|
218
|
+
* cache-fallback) is treated EXACTLY like a probe failure: it is NOT
|
|
219
|
+
* corroboration, so the caller must DEFER — leave watch state untouched and
|
|
220
|
+
* re-evaluate next tick when a true live probe can be obtained. A missing
|
|
221
|
+
* entry (`undefined`) is likewise not corroboration.
|
|
222
|
+
*
|
|
223
|
+
* Pure + total so it can be unit-tested at the seam without standing up the
|
|
224
|
+
* broker or the gateway loop.
|
|
225
|
+
*/
|
|
226
|
+
export function isLiveCorroboration(entry: CorroborationProbe | undefined): boolean {
|
|
227
|
+
return entry?.result.ok === true && entry.served === "live";
|
|
228
|
+
}
|
|
229
|
+
|
|
178
230
|
/**
|
|
179
231
|
* Evaluate one account's quota state against its last-notified health.
|
|
180
232
|
*
|
|
@@ -224,7 +276,11 @@ export function evaluateQuotaWatchAccount(args: {
|
|
|
224
276
|
return { kind: "skip", accountLabel: label, reason: "stale-snapshot" };
|
|
225
277
|
}
|
|
226
278
|
|
|
227
|
-
|
|
279
|
+
// #2494 Bug A — classify against THIS tick's clock so the refill
|
|
280
|
+
// normalization uses the same `now` the rest of the decision does (the
|
|
281
|
+
// default `new Date()` would diverge from a frozen test clock / a replayed
|
|
282
|
+
// tick and mis-zero a still-future reset window).
|
|
283
|
+
const currentHealth = classifyHealth(snap, new Date(now));
|
|
228
284
|
|
|
229
285
|
// Unknown (probe failed) or blocked — skip entirely.
|
|
230
286
|
if (currentHealth === "unknown" || currentHealth === "blocked") {
|
|
@@ -324,22 +380,58 @@ export type FleetAllExhaustedDecision =
|
|
|
324
380
|
* cases the trigger-based interactive all-blocked card misses: a quiet period
|
|
325
381
|
* (no agent happens to 429 into the wall) and the consumer/cron paths.
|
|
326
382
|
*
|
|
327
|
-
*
|
|
328
|
-
*
|
|
329
|
-
*
|
|
330
|
-
*
|
|
383
|
+
* Source: the broker's per-account `exhausted` flag (set by mark-exhausted via
|
|
384
|
+
* failover + the consumer sensor). That flag is NOT purely live — `isAccountBlocked`
|
|
385
|
+
* (src/auth/broker/account-eligibility.ts) falls back to the persisted
|
|
386
|
+
* `exhausted_until` mark whenever there is no fresh live snapshot. During a
|
|
387
|
+
* broker-unreachable / probe-timeout blackout, short-lived auto-fallback marks
|
|
388
|
+
* can make `every(a.exhausted)` momentarily true with ZERO live corroboration
|
|
389
|
+
* (#2478, klanker 2026-06-20). So the `entered` alert requires POSITIVE LIVE
|
|
390
|
+
* CORROBORATION: an account counts toward "all exhausted" only when its
|
|
391
|
+
* `exhausted` flag is backed by a FRESH live snapshot (last_quota.capturedAt
|
|
392
|
+
* within `maxStaleMs`). If ANY account's exhaustion rests solely on a
|
|
393
|
+
* stale/absent-probe mark we are
|
|
394
|
+
* probe-blind and return `skip: "probe-blind"` — no false fleet alert. The
|
|
395
|
+
* guarantee is "no false alarm off stale marks during a probe blackout", NOT
|
|
396
|
+
* blanket probe-failure immunity. The `recovered` transition is unguarded so a
|
|
397
|
+
* legitimately-fired alert is never stranded. Requires at least one account; an
|
|
398
|
+
* empty fleet never alerts.
|
|
331
399
|
*/
|
|
332
400
|
export function evaluateFleetAllExhausted(args: {
|
|
333
|
-
accounts: Array<{
|
|
401
|
+
accounts: Array<{
|
|
402
|
+
label: string;
|
|
403
|
+
exhausted: boolean;
|
|
404
|
+
exhausted_until?: number;
|
|
405
|
+
/** Most-recent live probe snapshot, used to corroborate `exhausted`. */
|
|
406
|
+
last_quota?: {
|
|
407
|
+
capturedAt: number;
|
|
408
|
+
overageDisabledReason?: string | null;
|
|
409
|
+
} | null;
|
|
410
|
+
}>;
|
|
334
411
|
prev: QuotaWatchAccountState;
|
|
335
412
|
now: number;
|
|
413
|
+
/** Staleness ceiling for "fresh probe"; 0 disables the gate (legacy callers/tests). */
|
|
414
|
+
tuning?: Pick<QuotaWatchTuning, "maxStaleMs">;
|
|
336
415
|
}): FleetAllExhaustedDecision {
|
|
337
416
|
const { accounts, prev, now } = args;
|
|
417
|
+
const maxStaleMs = args.tuning?.maxStaleMs ?? 0;
|
|
338
418
|
const allExhausted = accounts.length > 0 && accounts.every((a) => a.exhausted);
|
|
339
419
|
// "throttling" doubles as the "currently alerting all-exhausted" marker.
|
|
340
420
|
const wasAlerting = prev.lastNotifiedHealth === "throttling";
|
|
341
421
|
|
|
342
422
|
if (allExhausted && !wasAlerting) {
|
|
423
|
+
// Probe-blind guard (#2478): only fire `entered` if EVERY account's
|
|
424
|
+
// exhaustion is backed by live evidence — a fresh snapshot. An account
|
|
425
|
+
// exhausted solely on a stale/absent mark means we have no live
|
|
426
|
+
// corroboration → skip rather than false-alarm.
|
|
427
|
+
if (maxStaleMs > 0) {
|
|
428
|
+
const allLiveCorroborated = accounts.every((a) =>
|
|
429
|
+
exhaustionLiveCorroborated(a, now, maxStaleMs),
|
|
430
|
+
);
|
|
431
|
+
if (!allLiveCorroborated) {
|
|
432
|
+
return { kind: "skip", reason: "probe-blind" };
|
|
433
|
+
}
|
|
434
|
+
}
|
|
343
435
|
return {
|
|
344
436
|
kind: "notify",
|
|
345
437
|
message: buildAllExhaustedMessage(accounts, now),
|
|
@@ -358,6 +450,42 @@ export function evaluateFleetAllExhausted(args: {
|
|
|
358
450
|
return { kind: "skip", reason: allExhausted ? "still-all-exhausted" : "not-all-exhausted" };
|
|
359
451
|
}
|
|
360
452
|
|
|
453
|
+
/**
|
|
454
|
+
* Is an account's `exhausted` flag backed by live evidence (#2478)?
|
|
455
|
+
*
|
|
456
|
+
* True when the most-recent live probe is FRESH (`capturedAt` within
|
|
457
|
+
* `maxStaleMs`) — that fresh probe is what set/upholds the broker's blocked
|
|
458
|
+
* verdict. False when there is no `last_quota` at all, or the snapshot is
|
|
459
|
+
* stale: the `exhausted` flag then rests solely on a persisted mark with no
|
|
460
|
+
* live backing, which is exactly the probe-blind condition that false-fires
|
|
461
|
+
* the fleet alert.
|
|
462
|
+
*
|
|
463
|
+
* NOTE: `out_of_credits` is NOT treated as corroboration here. Per
|
|
464
|
+
* fix/out-of-credits-serve-block, out_of_credits is INFORMATIONAL — it is
|
|
465
|
+
* not exhaustion in its own right at any util. Corroboration requires a
|
|
466
|
+
* genuinely fresh quota snapshot (real 429 / util-wall path).
|
|
467
|
+
*
|
|
468
|
+
* Mirrors `snapshotFresh` in src/auth/broker/account-eligibility.ts (the
|
|
469
|
+
* serving-side authority); kept as a local check so the decision layer
|
|
470
|
+
* carries no broker dependency.
|
|
471
|
+
*/
|
|
472
|
+
function exhaustionLiveCorroborated(
|
|
473
|
+
account: {
|
|
474
|
+
last_quota?: { capturedAt: number; overageDisabledReason?: string | null } | null;
|
|
475
|
+
},
|
|
476
|
+
now: number,
|
|
477
|
+
maxStaleMs: number,
|
|
478
|
+
): boolean {
|
|
479
|
+
const lq = account.last_quota;
|
|
480
|
+
if (!lq) return false;
|
|
481
|
+
// Mirror `snapshotFresh`'s clock-skew guard: a future-dated `capturedAt`
|
|
482
|
+
// makes `now - capturedAt` negative and would slip past the staleness gate,
|
|
483
|
+
// so a skewed snapshot reads as fresh. Reject snapshots dated more than the
|
|
484
|
+
// broker's 60_000 ms tolerance ahead of `now` (matches the inline literal in
|
|
485
|
+
// `snapshotFresh`, src/auth/broker/account-eligibility.ts).
|
|
486
|
+
return now - lq.capturedAt <= maxStaleMs && lq.capturedAt <= now + 60_000;
|
|
487
|
+
}
|
|
488
|
+
|
|
361
489
|
function buildAllExhaustedMessage(
|
|
362
490
|
accounts: Array<{ label: string; exhausted_until?: number }>,
|
|
363
491
|
now: number,
|
|
@@ -420,7 +548,7 @@ function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): strin
|
|
|
420
548
|
`Binding window: ${winLabel}${resetStr}`,
|
|
421
549
|
`${activeNote}${altNote}`,
|
|
422
550
|
``,
|
|
423
|
-
`<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window.
|
|
551
|
+
`<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Live-probe corroborated (#2495).</i>`,
|
|
424
552
|
`<i>Run /auth for full fleet status or /usage for the active account.</i>`,
|
|
425
553
|
]
|
|
426
554
|
.join("\n")
|
|
@@ -23,6 +23,30 @@ import {
|
|
|
23
23
|
getTurnByKey,
|
|
24
24
|
} from './turns-schema.js'
|
|
25
25
|
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Concurrency PRAGMAs — applySchema must arm busy_timeout so concurrent
|
|
28
|
+
// writers (the subagent-tracker hooks + the gateway watcher) wait-and-retry
|
|
29
|
+
// instead of failing with SQLITE_BUSY ("database is locked").
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
describe('registry concurrency PRAGMAs', () => {
|
|
33
|
+
it('arms busy_timeout (5000ms) on every opened connection', () => {
|
|
34
|
+
const db = openTurnsDbInMemory()
|
|
35
|
+
const row = db.prepare('PRAGMA busy_timeout').get() as { timeout: number }
|
|
36
|
+
expect(row.timeout).toBe(5000)
|
|
37
|
+
db.close()
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
it('uses WAL journal mode for concurrent readers', () => {
|
|
41
|
+
const db = openTurnsDbInMemory()
|
|
42
|
+
const row = db.prepare('PRAGMA journal_mode').get() as { journal_mode: string }
|
|
43
|
+
// `:memory:` reports 'memory'; a file DB reports 'wal'. Either way the
|
|
44
|
+
// exec ran without error — the file-path open (openTurnsDb) yields 'wal'.
|
|
45
|
+
expect(['wal', 'memory']).toContain(String(row.journal_mode).toLowerCase())
|
|
46
|
+
db.close()
|
|
47
|
+
})
|
|
48
|
+
})
|
|
49
|
+
|
|
26
50
|
// ---------------------------------------------------------------------------
|
|
27
51
|
// Test 1 — empty DB
|
|
28
52
|
// ---------------------------------------------------------------------------
|
|
@@ -172,6 +172,15 @@ const PHASE2_MIGRATIONS = [
|
|
|
172
172
|
function applySchema(db: SqliteDatabase): void {
|
|
173
173
|
db.exec('PRAGMA journal_mode = WAL')
|
|
174
174
|
db.exec('PRAGMA synchronous = NORMAL')
|
|
175
|
+
// Concurrency: multiple writers contend on this registry (the PreToolUse
|
|
176
|
+
// subagent-tracker hook, the gateway's subagent-watcher backfill, the turns
|
|
177
|
+
// writer) — especially when several sub-agents dispatch at once. Without a
|
|
178
|
+
// busy_timeout, bun:sqlite/better-sqlite3 default to 0ms and the second
|
|
179
|
+
// contending write fails IMMEDIATELY with SQLITE_BUSY ("database is locked"),
|
|
180
|
+
// which the watcher swallows → jsonl_agent_id / parent_turn_key left NULL →
|
|
181
|
+
// worker card mis-routes to the operator DM + false silent-stall synthesis.
|
|
182
|
+
// 5s of wait-and-retry serializes the contenders instead of dropping writes.
|
|
183
|
+
db.exec('PRAGMA busy_timeout = 5000')
|
|
175
184
|
db.exec(SCHEMA_SQL)
|
|
176
185
|
// Run migrations. SQLite doesn't support "ADD COLUMN IF NOT EXISTS", so
|
|
177
186
|
// we swallow the "duplicate column" error to stay idempotent on
|
|
@@ -77,6 +77,19 @@ export type RuntimeMetricEvent =
|
|
|
77
77
|
fallback_kind: 'working' | 'thinking'
|
|
78
78
|
silence_ms: number
|
|
79
79
|
}
|
|
80
|
+
/**
|
|
81
|
+
* #2527 — mid-turn liveness floor decision. `decision: 'fire'` when the
|
|
82
|
+
* quiet "still on it" beat was sent; otherwise the machine-readable skip
|
|
83
|
+
* reason for a declined forced ("Status?") poke. `forced` distinguishes
|
|
84
|
+
* the timer beat from a user-asked one.
|
|
85
|
+
*/
|
|
86
|
+
| {
|
|
87
|
+
kind: 'mid_turn_floor'
|
|
88
|
+
key: string
|
|
89
|
+
silence_ms: number
|
|
90
|
+
forced: boolean
|
|
91
|
+
decision: string
|
|
92
|
+
}
|
|
80
93
|
/**
|
|
81
94
|
* #1445 cross-turn pending-async ambient lifecycle. `started` fires
|
|
82
95
|
* when a turn ends with a captured anchor AND a pending Agent/Task/
|
|
@@ -98,7 +98,17 @@ export type SessionEvent =
|
|
|
98
98
|
// the lazily-flushed transcript. The draft-mirror drives off THIS, not
|
|
99
99
|
// the flush-gated `tool_use`, so activity streams deterministically.
|
|
100
100
|
| { kind: 'tool_label'; toolUseId: string; label: string; toolName: string }
|
|
101
|
-
|
|
101
|
+
// `blockIndex` = index of this text block in the assistant message's
|
|
102
|
+
// content[] — load-bearing: it keys the returned Map so callers emit
|
|
103
|
+
// events in source order. `lastInMessage` = true iff no tool_use block
|
|
104
|
+
// follows it in the SAME message. NOTE: `lastInMessage` is a PROJECTION
|
|
105
|
+
// ARTIFACT only — the current reducer-side narrative-dedup gate
|
|
106
|
+
// (narrative-dedup.ts) decides draft-then-send vs working-narration by
|
|
107
|
+
// LOOKAHEAD (the next tool_use / turn_end), NOT by reading this flag. It
|
|
108
|
+
// is retained as a stable projection output (pinned by the kernel test)
|
|
109
|
+
// and reserved for a future staging-skip optimization; do not assume the
|
|
110
|
+
// gate keys on it.
|
|
111
|
+
| { kind: 'text'; text: string; blockIndex: number; lastInMessage: boolean }
|
|
102
112
|
| { kind: 'tool_result'; toolUseId: string; toolName: string | null; isError?: boolean; errorText?: string }
|
|
103
113
|
| { kind: 'turn_end'; durationMs: number }
|
|
104
114
|
// Multi-agent: sub-agent-scoped events. agentId is the sub-agent JSONL
|
|
@@ -106,8 +116,12 @@ export type SessionEvent =
|
|
|
106
116
|
// as parent events; the reducer fans them out to per-sub-agent state.
|
|
107
117
|
| { kind: 'sub_agent_started'; agentId: string; firstPromptText: string; subagentType?: string }
|
|
108
118
|
| { kind: 'sub_agent_tool_use'; agentId: string; toolUseId: string | null; toolName: string; input?: Record<string, unknown>; precomputedLabel?: string }
|
|
109
|
-
|
|
110
|
-
|
|
119
|
+
// Same shared contract as the main-agent `text` kind — see its doc above
|
|
120
|
+
// (including the `lastInMessage` projection-artifact note). The wire-kind
|
|
121
|
+
// stays distinct (the gateway/watcher split is load-bearing) but the
|
|
122
|
+
// payload + `lastInMessage` derivation are identical so ONE shared dedup
|
|
123
|
+
// gate handles both tiers.
|
|
124
|
+
| { kind: 'sub_agent_text'; agentId: string; text: string; blockIndex: number; lastInMessage: boolean }
|
|
111
125
|
| { kind: 'sub_agent_tool_result'; agentId: string; toolUseId: string; isError?: boolean; errorText?: string }
|
|
112
126
|
| { kind: 'sub_agent_turn_end'; agentId: string }
|
|
113
127
|
| { kind: 'sub_agent_nested_spawn'; agentId: string }
|
|
@@ -182,6 +196,49 @@ function extractToolResultErrorText(content: unknown): string {
|
|
|
182
196
|
return ''
|
|
183
197
|
}
|
|
184
198
|
|
|
199
|
+
/**
|
|
200
|
+
* THE single text→narrative projection primitive. Both projectTranscriptLine
|
|
201
|
+
* and projectSubagentLine derive their text events through this helper so
|
|
202
|
+
* main-agent, sub-agent, worker, and every other execution shape inherit
|
|
203
|
+
* identical text-block semantics from ONE place: empty/whitespace blocks are
|
|
204
|
+
* dropped, and each surviving block carries its `blockIndex` plus the
|
|
205
|
+
* `lastInMessage` signal (no tool_use follows it in this message). NOTE:
|
|
206
|
+
* `lastInMessage` is a projection artifact — the reducer-side dedup gate
|
|
207
|
+
* decides SHOW/SUPPRESS by lookahead, not by reading this flag (see the
|
|
208
|
+
* SessionEvent `text` doc); it is reserved for a future staging-skip
|
|
209
|
+
* optimization.
|
|
210
|
+
*
|
|
211
|
+
* `make` adapts the shared payload into the tier-specific wire kind
|
|
212
|
+
* (`text` vs `sub_agent_text`); the contract — what counts as a text block,
|
|
213
|
+
* how `lastInMessage` is computed — lives here, not in the callers.
|
|
214
|
+
*
|
|
215
|
+
* Returns a `Map<blockIndex, SessionEvent>` keyed by the text block's source
|
|
216
|
+
* index, NOT a flat list. This is the load-bearing design choice: the callers
|
|
217
|
+
* must emit thinking / tool_use / text events in SOURCE ORDER (the reducer
|
|
218
|
+
* pairs a preamble to the immediately-next tool_use), so they iterate
|
|
219
|
+
* `content` once and, at each text position, emit the precomputed event from
|
|
220
|
+
* this map. The kernel owns the contract; the caller owns only the ordering.
|
|
221
|
+
*/
|
|
222
|
+
export function projectAssistantTextBlocks(
|
|
223
|
+
content: Array<Record<string, unknown>>,
|
|
224
|
+
make: (text: string, blockIndex: number, lastInMessage: boolean) => SessionEvent,
|
|
225
|
+
): Map<number, SessionEvent> {
|
|
226
|
+
const out = new Map<number, SessionEvent>()
|
|
227
|
+
// Precompute the index of the last tool_use so each text block knows
|
|
228
|
+
// whether a tool_use follows it in THIS message (the draft-then-send signal).
|
|
229
|
+
let lastToolUseIdx = -1
|
|
230
|
+
content.forEach((c, i) => {
|
|
231
|
+
if (c.type === 'tool_use') lastToolUseIdx = i
|
|
232
|
+
})
|
|
233
|
+
content.forEach((c, i) => {
|
|
234
|
+
if (c.type !== 'text') return
|
|
235
|
+
const text = (c.text as string | undefined) ?? ''
|
|
236
|
+
if (text.trim().length === 0) return // drop empty/whitespace-only blocks
|
|
237
|
+
out.set(i, make(text, i, i > lastToolUseIdx))
|
|
238
|
+
})
|
|
239
|
+
return out
|
|
240
|
+
}
|
|
241
|
+
|
|
185
242
|
/**
|
|
186
243
|
* Project a single transcript line into a SessionEvent (or null if it's
|
|
187
244
|
* uninteresting noise). Caller is responsible for the JSON parse — if a
|
|
@@ -218,7 +275,16 @@ export function projectTranscriptLine(line: string): SessionEvent[] {
|
|
|
218
275
|
const content = message?.content as Array<Record<string, unknown>> | undefined
|
|
219
276
|
if (!Array.isArray(content)) return []
|
|
220
277
|
const events: SessionEvent[] = []
|
|
221
|
-
|
|
278
|
+
// Text→narrative projection comes from the ONE shared kernel
|
|
279
|
+
// (projectAssistantTextBlocks): it owns the empty-drop + blockIndex +
|
|
280
|
+
// lastInMessage contract. We emit its events at their source positions
|
|
281
|
+
// so thinking / tool_use / text stay in source order (the reducer pairs
|
|
282
|
+
// a preamble to the immediately-next tool_use).
|
|
283
|
+
const textEvents = projectAssistantTextBlocks(
|
|
284
|
+
content,
|
|
285
|
+
(text, blockIndex, lastInMessage): SessionEvent => ({ kind: 'text', text, blockIndex, lastInMessage }),
|
|
286
|
+
)
|
|
287
|
+
content.forEach((c, i) => {
|
|
222
288
|
const ct = c.type as string | undefined
|
|
223
289
|
if (ct === 'thinking') {
|
|
224
290
|
events.push({ kind: 'thinking' })
|
|
@@ -237,10 +303,10 @@ export function projectTranscriptLine(line: string): SessionEvent[] {
|
|
|
237
303
|
input: input && typeof input === 'object' ? input : undefined,
|
|
238
304
|
})
|
|
239
305
|
} else if (ct === 'text') {
|
|
240
|
-
const
|
|
241
|
-
events.push(
|
|
306
|
+
const ev = textEvents.get(i)
|
|
307
|
+
if (ev != null) events.push(ev)
|
|
242
308
|
}
|
|
243
|
-
}
|
|
309
|
+
})
|
|
244
310
|
return events
|
|
245
311
|
}
|
|
246
312
|
|
|
@@ -357,7 +423,25 @@ export function projectSubagentLine(
|
|
|
357
423
|
const content = message?.content as Array<Record<string, unknown>> | undefined
|
|
358
424
|
if (!Array.isArray(content)) return []
|
|
359
425
|
const events: SessionEvent[] = []
|
|
360
|
-
|
|
426
|
+
// Text→narrative projection comes from the SAME shared kernel as the
|
|
427
|
+
// main agent (projectAssistantTextBlocks): one source for the empty-drop
|
|
428
|
+
// + blockIndex + lastInMessage contract. The `make` adapter only changes
|
|
429
|
+
// the wire kind to `sub_agent_text`. A nested Agent/Task tool_use still
|
|
430
|
+
// counts as a tool_use that follows a preceding text block — handled by
|
|
431
|
+
// the kernel — so a sub-agent preamble before a nested spawn is correctly
|
|
432
|
+
// NOT `lastInMessage`. We emit at source positions so text + tool_use
|
|
433
|
+
// stay in source order (the reducer pairs preamble → next tool_use).
|
|
434
|
+
const textEvents = projectAssistantTextBlocks(
|
|
435
|
+
content,
|
|
436
|
+
(text, blockIndex, lastInMessage): SessionEvent => ({
|
|
437
|
+
kind: 'sub_agent_text',
|
|
438
|
+
agentId,
|
|
439
|
+
text,
|
|
440
|
+
blockIndex,
|
|
441
|
+
lastInMessage,
|
|
442
|
+
}),
|
|
443
|
+
)
|
|
444
|
+
content.forEach((c, i) => {
|
|
361
445
|
const ct = c.type as string | undefined
|
|
362
446
|
if (ct === 'tool_use') {
|
|
363
447
|
const name = (c.name as string | undefined) ?? ''
|
|
@@ -386,10 +470,11 @@ export function projectSubagentLine(
|
|
|
386
470
|
// in the SAME assistant message must be emitted in source order
|
|
387
471
|
// so the reducer consumes the preamble on the immediately-next
|
|
388
472
|
// tool_use and sibling tool_uses fall back to filename/pattern.
|
|
389
|
-
|
|
390
|
-
|
|
473
|
+
// The event itself comes from the shared kernel (textEvents above).
|
|
474
|
+
const ev = textEvents.get(i)
|
|
475
|
+
if (ev != null) events.push(ev)
|
|
391
476
|
}
|
|
392
|
-
}
|
|
477
|
+
})
|
|
393
478
|
// Authoritative early terminal: a background `Agent` worker's JSONL on
|
|
394
479
|
// claude ≥2.1.156 never writes the `system/turn_duration` line below, so
|
|
395
480
|
// the watcher used to only learn the worker finished via the ~5-min
|