switchroom 0.14.92 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-broker/index.js +48 -12
- package/dist/cli/switchroom.js +935 -887
- package/dist/cli/ui/index.html +16 -5
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +49 -9
- package/telegram-plugin/gateway/gateway.ts +81 -4
- package/telegram-plugin/gateway/obligation-ledger.ts +47 -8
- package/telegram-plugin/gateway/turn-active-marker.ts +22 -0
- package/telegram-plugin/tests/obligation-determinism.test.ts +63 -3
- package/telegram-plugin/tests/obligation-ledger.test.ts +85 -0
- package/telegram-plugin/tests/turn-active-marker.test.ts +28 -0
package/dist/cli/ui/index.html
CHANGED
|
@@ -440,15 +440,20 @@
|
|
|
440
440
|
return h;
|
|
441
441
|
}
|
|
442
442
|
|
|
443
|
+
// Tolerate a single transient failure on the 10s auto-refresh: one blip
|
|
444
|
+
// (e.g. the web container restarting) shouldn't flash a scary error over a
|
|
445
|
+
// healthy dashboard. Only surface the error after two in a row.
|
|
446
|
+
let agentFetchFails = 0;
|
|
443
447
|
async function fetchAgents() {
|
|
444
448
|
try {
|
|
445
449
|
const res = await fetch(`${API}/api/agents`, { headers: authHeaders() });
|
|
446
450
|
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
|
447
451
|
agents = await res.json();
|
|
448
452
|
render();
|
|
453
|
+
agentFetchFails = 0;
|
|
449
454
|
clearError();
|
|
450
455
|
} catch (err) {
|
|
451
|
-
showError(`Failed to fetch agents: ${err.message}`);
|
|
456
|
+
if (++agentFetchFails >= 2) showError(`Failed to fetch agents: ${err.message}`);
|
|
452
457
|
}
|
|
453
458
|
}
|
|
454
459
|
|
|
@@ -494,12 +499,18 @@
|
|
|
494
499
|
}
|
|
495
500
|
|
|
496
501
|
async function fetchConnections() {
|
|
502
|
+
// Each fetch falls back independently (.catch → default). A single
|
|
503
|
+
// network blip — e.g. one endpoint momentarily unreachable — must NOT
|
|
504
|
+
// reject the whole batch and blank the connected accounts; the others
|
|
505
|
+
// still render. (Previously a bare Promise.all meant any one failure
|
|
506
|
+
// wiped the tab, so a connected Google/Microsoft account "vanished".)
|
|
507
|
+
const safe = (p, fallback) => p.then(r => r.ok ? r.json() : fallback).catch(() => fallback);
|
|
497
508
|
try {
|
|
498
509
|
const [google, microsoft, notion, agents] = await Promise.all([
|
|
499
|
-
fetch(`${API}/api/google-accounts`, { headers: authHeaders() })
|
|
500
|
-
fetch(`${API}/api/microsoft-accounts`, { headers: authHeaders() })
|
|
501
|
-
fetch(`${API}/api/notion-workspace`, { headers: authHeaders() })
|
|
502
|
-
fetch(`${API}/api/agents`, { headers: authHeaders() })
|
|
510
|
+
safe(fetch(`${API}/api/google-accounts`, { headers: authHeaders() }), []),
|
|
511
|
+
safe(fetch(`${API}/api/microsoft-accounts`, { headers: authHeaders() }), []),
|
|
512
|
+
safe(fetch(`${API}/api/notion-workspace`, { headers: authHeaders() }), { configured: false, databases: [] }),
|
|
513
|
+
safe(fetch(`${API}/api/agents`, { headers: authHeaders() }), []),
|
|
503
514
|
]);
|
|
504
515
|
const agentNames = (agents || []).map(a => a.name).sort();
|
|
505
516
|
renderConnections({ google, microsoft, notion, agentNames });
|
package/package.json
CHANGED
|
@@ -47637,18 +47637,21 @@ class ObligationLedger {
|
|
|
47637
47637
|
return best;
|
|
47638
47638
|
}
|
|
47639
47639
|
decideAtIdle(opts) {
|
|
47640
|
-
const
|
|
47640
|
+
const useEligible = opts != null && (opts.graceMs > 0 || opts.backgroundWorkActive === true);
|
|
47641
|
+
const o = useEligible ? this.oldestEligible(opts.now, opts.graceMs, opts.backgroundWorkActive === true, opts.backgroundGraceMs ?? 0) : this.oldest();
|
|
47641
47642
|
if (o === undefined)
|
|
47642
47643
|
return { action: "none" };
|
|
47643
47644
|
if (o.representCount >= this.maxRepresents)
|
|
47644
47645
|
return { action: "escalate", obligation: o };
|
|
47645
47646
|
return { action: "represent", obligation: o };
|
|
47646
47647
|
}
|
|
47647
|
-
oldestEligible(now, graceMs) {
|
|
47648
|
+
oldestEligible(now, graceMs, backgroundWorkActive, backgroundGraceMs) {
|
|
47648
47649
|
let best;
|
|
47649
47650
|
for (const o of this.open.values()) {
|
|
47650
47651
|
if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs)
|
|
47651
47652
|
continue;
|
|
47653
|
+
if (backgroundWorkActive && backgroundGraceMs > 0 && now - o.openedAt < backgroundGraceMs)
|
|
47654
|
+
continue;
|
|
47652
47655
|
if (best === undefined || o.openedAt < best.openedAt)
|
|
47653
47656
|
best = o;
|
|
47654
47657
|
}
|
|
@@ -52957,13 +52960,22 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
52957
52960
|
return false;
|
|
52958
52961
|
}
|
|
52959
52962
|
}
|
|
52963
|
+
function readTurnActiveMarkerAgeMs(stateDir, now) {
|
|
52964
|
+
const path = join32(stateDir, TURN_ACTIVE_MARKER_FILE2);
|
|
52965
|
+
try {
|
|
52966
|
+
const st = statSync10(path);
|
|
52967
|
+
return (now ?? Date.now()) - st.mtimeMs;
|
|
52968
|
+
} catch {
|
|
52969
|
+
return null;
|
|
52970
|
+
}
|
|
52971
|
+
}
|
|
52960
52972
|
|
|
52961
52973
|
// ../src/build-info.ts
|
|
52962
|
-
var VERSION = "0.
|
|
52963
|
-
var COMMIT_SHA = "
|
|
52964
|
-
var COMMIT_DATE = "2026-06-
|
|
52965
|
-
var LATEST_PR =
|
|
52966
|
-
var COMMITS_AHEAD_OF_TAG =
|
|
52974
|
+
var VERSION = "0.15.0";
|
|
52975
|
+
var COMMIT_SHA = "5841c1d5";
|
|
52976
|
+
var COMMIT_DATE = "2026-06-09T23:17:14Z";
|
|
52977
|
+
var LATEST_PR = 2253;
|
|
52978
|
+
var COMMITS_AHEAD_OF_TAG = 0;
|
|
52967
52979
|
|
|
52968
52980
|
// gateway/boot-version.ts
|
|
52969
52981
|
function formatRelativeAgo(iso) {
|
|
@@ -54188,6 +54200,14 @@ var OBLIGATION_ESCALATE_GRACE_MS = (() => {
|
|
|
54188
54200
|
const n = Number(raw);
|
|
54189
54201
|
return Number.isFinite(n) && n >= 0 ? n : 45000;
|
|
54190
54202
|
})();
|
|
54203
|
+
var OBLIGATION_BACKGROUND_WORK_GRACE_MS = (() => {
|
|
54204
|
+
const raw = process.env.SWITCHROOM_OBLIGATION_BACKGROUND_WORK_GRACE_MS;
|
|
54205
|
+
if (raw == null || raw === "")
|
|
54206
|
+
return 1200000;
|
|
54207
|
+
const n = Number(raw);
|
|
54208
|
+
return Number.isFinite(n) && n >= 0 ? n : 1200000;
|
|
54209
|
+
})();
|
|
54210
|
+
var TURN_ACTIVE_MARKER_FRESH_MS = 90000;
|
|
54191
54211
|
var AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW !== "0";
|
|
54192
54212
|
var lastAgentOutputAt = new Map;
|
|
54193
54213
|
var LAST_OUTPUT_MAX_KEYS = 512;
|
|
@@ -55705,6 +55725,13 @@ var inboundSpool = STATIC ? undefined : createInboundSpool({
|
|
|
55705
55725
|
}
|
|
55706
55726
|
});
|
|
55707
55727
|
var pendingInboundBuffer = createPendingInboundBuffer({ spool: inboundSpool });
|
|
55728
|
+
function agentHasInFlightBackgroundWork(now) {
|
|
55729
|
+
if (countRunningWorkers() > 0)
|
|
55730
|
+
return true;
|
|
55731
|
+
const ageMs = readTurnActiveMarkerAgeMs(STATE_DIR, now);
|
|
55732
|
+
return ageMs != null && ageMs < TURN_ACTIVE_MARKER_FRESH_MS;
|
|
55733
|
+
}
|
|
55734
|
+
var lastBgWorkDeferLogMs = 0;
|
|
55708
55735
|
function obligationSweep() {
|
|
55709
55736
|
if (!OBLIGATION_LEDGER_ENABLED)
|
|
55710
55737
|
return;
|
|
@@ -55713,10 +55740,23 @@ function obligationSweep() {
|
|
|
55713
55740
|
if (turnInFlightForGate())
|
|
55714
55741
|
return;
|
|
55715
55742
|
const agent = process.env.SWITCHROOM_AGENT_NAME ?? "";
|
|
55716
|
-
const
|
|
55743
|
+
const now = Date.now();
|
|
55744
|
+
const backgroundWorkActive = OBLIGATION_BACKGROUND_WORK_GRACE_MS > 0 && agentHasInFlightBackgroundWork(now);
|
|
55745
|
+
const decision = obligationLedger.decideAtIdle(OBLIGATION_ESCALATE_GRACE_MS > 0 || backgroundWorkActive ? {
|
|
55746
|
+
now,
|
|
55747
|
+
graceMs: OBLIGATION_ESCALATE_GRACE_MS,
|
|
55748
|
+
backgroundWorkActive,
|
|
55749
|
+
backgroundGraceMs: OBLIGATION_BACKGROUND_WORK_GRACE_MS
|
|
55750
|
+
} : undefined);
|
|
55717
55751
|
const o = decision.obligation;
|
|
55718
|
-
if (decision.action === "none" || o == null)
|
|
55752
|
+
if (decision.action === "none" || o == null) {
|
|
55753
|
+
if (backgroundWorkActive && obligationLedger.hasOpen() && now - lastBgWorkDeferLogMs > 60000) {
|
|
55754
|
+
lastBgWorkDeferLogMs = now;
|
|
55755
|
+
process.stderr.write(`telegram gateway: obligation sweep deferred \u2014 in-flight autonomous sub-agent work ` + `(${obligationLedger.size()} open, bounded ${Math.round(OBLIGATION_BACKGROUND_WORK_GRACE_MS / 60000)}m from receipt)
|
|
55756
|
+
`);
|
|
55757
|
+
}
|
|
55719
55758
|
return;
|
|
55759
|
+
}
|
|
55720
55760
|
if (decision.action === "represent") {
|
|
55721
55761
|
if (pendingInboundBuffer.depth(agent) > 0)
|
|
55722
55762
|
return;
|
|
@@ -429,6 +429,7 @@ import {
|
|
|
429
429
|
touchTurnActiveMarker,
|
|
430
430
|
removeTurnActiveMarker,
|
|
431
431
|
sweepStaleTurnActiveMarker,
|
|
432
|
+
readTurnActiveMarkerAgeMs,
|
|
432
433
|
TURN_ACTIVE_MARKER_FILE,
|
|
433
434
|
} from './turn-active-marker.js'
|
|
434
435
|
import {
|
|
@@ -1468,6 +1469,34 @@ const OBLIGATION_ESCALATE_GRACE_MS = (() => {
|
|
|
1468
1469
|
return Number.isFinite(n) && n >= 0 ? n : 45_000
|
|
1469
1470
|
})()
|
|
1470
1471
|
|
|
1472
|
+
// Background-work escalate-grace ceiling. The 45s grace above is far too short
|
|
1473
|
+
// for extended-autonomous sub-agent work: an agent that ack-firsts ("on it")
|
|
1474
|
+
// then delegates to a background worker OR an orphaned foreground sub-agent ends
|
|
1475
|
+
// its FOREGROUND turn in seconds, but the real answer lands minutes later. The
|
|
1476
|
+
// turn-in-flight machine (turn already ended) doesn't see that work, so the
|
|
1477
|
+
// sweep would re-present/escalate a false "⚠️ I may have missed this — re-send"
|
|
1478
|
+
// while the agent is genuinely researching (the gymbro liven-research case,
|
|
1479
|
+
// 2026-06-10). While `agentHasInFlightBackgroundWork()` holds, an open
|
|
1480
|
+
// obligation younger than THIS ceiling (from openedAt) is skipped. Bounded BY
|
|
1481
|
+
// CONSTRUCTION — a hard wall-clock ceiling, so even a stuck/leaked worker can't
|
|
1482
|
+
// suppress escalation forever; the obligation FSM still terminates. This also
|
|
1483
|
+
// preserves the represent budget across a restart that kills the work: with the
|
|
1484
|
+
// false represents suppressed, the hydrated obligation re-presents (resumes the
|
|
1485
|
+
// research) instead of prematurely escalating. Kill switch: =0 → pre-fix
|
|
1486
|
+
// behaviour (no background-work grace).
|
|
1487
|
+
const OBLIGATION_BACKGROUND_WORK_GRACE_MS = (() => {
|
|
1488
|
+
const raw = process.env.SWITCHROOM_OBLIGATION_BACKGROUND_WORK_GRACE_MS
|
|
1489
|
+
if (raw == null || raw === '') return 20 * 60_000 // 20 min — generous for real research, still bounded
|
|
1490
|
+
const n = Number(raw)
|
|
1491
|
+
return Number.isFinite(n) && n >= 0 ? n : 20 * 60_000
|
|
1492
|
+
})()
|
|
1493
|
+
// Marker-freshness window for the orphaned-foreground signal. The turn-active
|
|
1494
|
+
// marker is touched on every foreground tool_use and on foreground sub-agent
|
|
1495
|
+
// JSONL growth, so an mtime younger than this means a sub-agent is touching it
|
|
1496
|
+
// RIGHT NOW; older ⇒ the work stopped (or the marker leaked) ⇒ not active.
|
|
1497
|
+
// Comfortably exceeds the sub-agent poll cadence so it doesn't flap.
|
|
1498
|
+
const TURN_ACTIVE_MARKER_FRESH_MS = 90_000
|
|
1499
|
+
|
|
1471
1500
|
// ─── Mid-turn auto-classify (steer-vs-queue), SHADOW mode ─────────────────────
|
|
1472
1501
|
// Today a no-prefix mid-turn message always QUEUES. autoClassifyMidTurnInbound
|
|
1473
1502
|
// (auto-classify-mid-turn.ts) is the basis for a smarter default using
|
|
@@ -5324,24 +5353,72 @@ const pendingInboundBuffer = createPendingInboundBuffer({ spool: inboundSpool })
|
|
|
5324
5353
|
// disconnect (disconnect-flush.ts), and by the 300s silence-poke watchdog;
|
|
5325
5354
|
// (3) the escalation send settles — bounded BY CONSTRUCTION via withDeadline
|
|
5326
5355
|
// below (grammy has no request timeout, so an unbounded send was the one
|
|
5327
|
-
// way an obligation could get stuck OPEN forever — now closed)
|
|
5356
|
+
// way an obligation could get stuck OPEN forever — now closed);
|
|
5357
|
+
// (4) the background-work grace releases — an obligation skipped because
|
|
5358
|
+
// agentHasInFlightBackgroundWork() is true is bounded by
|
|
5359
|
+
// OBLIGATION_BACKGROUND_WORK_GRACE_MS (a hard wall-clock ceiling from
|
|
5360
|
+
// openedAt). Even a permanently-stuck/leaked worker signal cannot suppress
|
|
5361
|
+
// the act past the ceiling, so this adds NO unbounded liveness dependency:
|
|
5362
|
+
// decideAtIdle ignores the work signal once now ≥ openedAt + ceiling, μ
|
|
5363
|
+
// resumes decreasing, and termination still holds.
|
|
5328
5364
|
// The only residual liveness assumption is the bridge eventually reconnecting /
|
|
5329
5365
|
// the process restarting, which the entire gateway's inbound delivery already
|
|
5330
5366
|
// depends on and which durable spool + boot-replay make self-healing.
|
|
5367
|
+
// True when the agent has in-flight autonomous sub-agent work the turn-in-flight
|
|
5368
|
+
// gate does NOT see: a running background worker (countRunningWorkers — its row
|
|
5369
|
+
// is INSERTed status='running' at dispatch, before the parent turn ends), OR an
|
|
5370
|
+
// orphaned/extended-autonomous FOREGROUND sub-agent that outlived its turn and is
|
|
5371
|
+
// still touching the turn-active marker (#2240; background activity deliberately
|
|
5372
|
+
// does NOT touch the parent marker, so the two signals are complementary).
|
|
5373
|
+
// Used ONLY by the obligation sweep to bound a false escalation during genuine
|
|
5374
|
+
// post-turn work. The caller already established the turn machine is idle (the
|
|
5375
|
+
// `turnInFlightForGate()` early-return), so a fresh marker here means orphaned
|
|
5376
|
+
// sub-agent activity (or a just-ended turn within the freshness window — a
|
|
5377
|
+
// harmless small extra grace, bounded by the ceiling either way).
|
|
5378
|
+
function agentHasInFlightBackgroundWork(now: number): boolean {
|
|
5379
|
+
if (countRunningWorkers() > 0) return true
|
|
5380
|
+
const ageMs = readTurnActiveMarkerAgeMs(STATE_DIR, now)
|
|
5381
|
+
return ageMs != null && ageMs < TURN_ACTIVE_MARKER_FRESH_MS
|
|
5382
|
+
}
|
|
5383
|
+
// Throttle for the background-work defer diagnostic (the 5s sweep would otherwise
|
|
5384
|
+
// log every tick across a multi-minute research window).
|
|
5385
|
+
let lastBgWorkDeferLogMs = 0
|
|
5331
5386
|
function obligationSweep(): void {
|
|
5332
5387
|
if (!OBLIGATION_LEDGER_ENABLED) return
|
|
5333
5388
|
if (!obligationLedger.hasOpen()) return
|
|
5334
5389
|
if (turnInFlightForGate()) return // a turn is running — let it finish/answer
|
|
5335
5390
|
const agent = process.env.SWITCHROOM_AGENT_NAME ?? ''
|
|
5391
|
+
const now = Date.now()
|
|
5392
|
+
// Background-work grace: while genuine autonomous sub-agent work is in flight
|
|
5393
|
+
// (a running worker, or an orphaned foreground sub-agent — neither visible to
|
|
5394
|
+
// the turn machine), an obligation younger than the ceiling is NOT re-presented
|
|
5395
|
+
// /escalated. Bounded by OBLIGATION_BACKGROUND_WORK_GRACE_MS so escalation
|
|
5396
|
+
// always eventually fires. =0 disables it.
|
|
5397
|
+
const backgroundWorkActive =
|
|
5398
|
+
OBLIGATION_BACKGROUND_WORK_GRACE_MS > 0 && agentHasInFlightBackgroundWork(now)
|
|
5336
5399
|
// Grace window: skip an obligation whose handling turn ended < grace ago — its
|
|
5337
5400
|
// trailing slow/worker answer may still be landing (over-escalation fix).
|
|
5338
5401
|
const decision = obligationLedger.decideAtIdle(
|
|
5339
|
-
OBLIGATION_ESCALATE_GRACE_MS > 0
|
|
5340
|
-
? {
|
|
5402
|
+
OBLIGATION_ESCALATE_GRACE_MS > 0 || backgroundWorkActive
|
|
5403
|
+
? {
|
|
5404
|
+
now,
|
|
5405
|
+
graceMs: OBLIGATION_ESCALATE_GRACE_MS,
|
|
5406
|
+
backgroundWorkActive,
|
|
5407
|
+
backgroundGraceMs: OBLIGATION_BACKGROUND_WORK_GRACE_MS,
|
|
5408
|
+
}
|
|
5341
5409
|
: undefined,
|
|
5342
5410
|
)
|
|
5343
5411
|
const o = decision.obligation
|
|
5344
|
-
if (decision.action === 'none' || o == null)
|
|
5412
|
+
if (decision.action === 'none' || o == null) {
|
|
5413
|
+
if (backgroundWorkActive && obligationLedger.hasOpen() && now - lastBgWorkDeferLogMs > 60_000) {
|
|
5414
|
+
lastBgWorkDeferLogMs = now
|
|
5415
|
+
process.stderr.write(
|
|
5416
|
+
`telegram gateway: obligation sweep deferred — in-flight autonomous sub-agent work ` +
|
|
5417
|
+
`(${obligationLedger.size()} open, bounded ${Math.round(OBLIGATION_BACKGROUND_WORK_GRACE_MS / 60_000)}m from receipt)\n`,
|
|
5418
|
+
)
|
|
5419
|
+
}
|
|
5420
|
+
return
|
|
5421
|
+
}
|
|
5345
5422
|
if (decision.action === 'represent') {
|
|
5346
5423
|
// Re-present goes through the bridge → buffer. Only the represent path is
|
|
5347
5424
|
// gated on an empty buffer (let the existing drain run first, avoid
|
|
@@ -187,22 +187,61 @@ export class ObligationLedger {
|
|
|
187
187
|
* genuinely-stale one is still acted on while a freshly-ended one waits. Pure
|
|
188
188
|
* (clock injected via opts.now, mirroring the builder convention). With no opts
|
|
189
189
|
* (or graceMs<=0) this is the pre-grace behaviour exactly.
|
|
190
|
+
*
|
|
191
|
+
* BACKGROUND-WORK GRACE (opts.backgroundWorkActive): the 45s `graceMs` above is
|
|
192
|
+
* far too short for extended-autonomous sub-agent work — an agent that
|
|
193
|
+
* ack-firsts ("on it") then delegates to a background worker or an orphaned
|
|
194
|
+
* foreground sub-agent ends its FOREGROUND turn in seconds, but the real answer
|
|
195
|
+
* lands minutes later. The in-flight machine (turn already ended) does not see
|
|
196
|
+
* that work, so the sweep would re-present/escalate a false "did I miss this?
|
|
197
|
+
* re-send" while the agent is genuinely researching (the gymbro liven case,
|
|
198
|
+
* 2026-06-10). When the gateway reports `backgroundWorkActive` (a running worker
|
|
199
|
+
* or a freshly-touched turn-active marker), an obligation younger than
|
|
200
|
+
* `backgroundGraceMs` (measured from openedAt) is SKIPPED. Bounded BY
|
|
201
|
+
* CONSTRUCTION: `backgroundGraceMs` is a hard wall-clock ceiling, so even a
|
|
202
|
+
* pathologically-stuck/leaked worker cannot suppress the escalation forever —
|
|
203
|
+
* once openedAt+backgroundGraceMs passes, the obligation is acted on regardless
|
|
204
|
+
* of work state, and the FSM still terminates.
|
|
190
205
|
*/
|
|
191
|
-
decideAtIdle(opts?: {
|
|
192
|
-
|
|
193
|
-
|
|
206
|
+
decideAtIdle(opts?: {
|
|
207
|
+
now: number
|
|
208
|
+
graceMs: number
|
|
209
|
+
backgroundWorkActive?: boolean
|
|
210
|
+
backgroundGraceMs?: number
|
|
211
|
+
}): LedgerDecision {
|
|
212
|
+
const useEligible = opts != null && (opts.graceMs > 0 || opts.backgroundWorkActive === true)
|
|
213
|
+
const o = useEligible
|
|
214
|
+
? this.oldestEligible(
|
|
215
|
+
opts!.now,
|
|
216
|
+
opts!.graceMs,
|
|
217
|
+
opts!.backgroundWorkActive === true,
|
|
218
|
+
opts!.backgroundGraceMs ?? 0,
|
|
219
|
+
)
|
|
220
|
+
: this.oldest()
|
|
194
221
|
if (o === undefined) return { action: 'none' }
|
|
195
222
|
if (o.representCount >= this.maxRepresents) return { action: 'escalate', obligation: o }
|
|
196
223
|
return { action: 'represent', obligation: o }
|
|
197
224
|
}
|
|
198
225
|
|
|
199
|
-
/** The oldest open obligation
|
|
200
|
-
*
|
|
201
|
-
*
|
|
202
|
-
|
|
226
|
+
/** The oldest open obligation that is currently ELIGIBLE to act on — i.e. NOT
|
|
227
|
+
* within either grace window:
|
|
228
|
+
* - trailing-answer grace: its handling turn ended < `graceMs` ago (a queued
|
|
229
|
+
* obligation with no lastTurnEndedAt can't have a trailing answer, so it is
|
|
230
|
+
* always eligible on this axis); AND
|
|
231
|
+
* - background-work grace: when `backgroundWorkActive`, it was opened <
|
|
232
|
+
* `backgroundGraceMs` ago (genuine in-flight autonomous work — bounded by
|
|
233
|
+
* the ceiling so a stale/leaked worker can't suppress escalation forever). */
|
|
234
|
+
private oldestEligible(
|
|
235
|
+
now: number,
|
|
236
|
+
graceMs: number,
|
|
237
|
+
backgroundWorkActive: boolean,
|
|
238
|
+
backgroundGraceMs: number,
|
|
239
|
+
): Obligation | undefined {
|
|
203
240
|
let best: Obligation | undefined
|
|
204
241
|
for (const o of this.open.values()) {
|
|
205
|
-
if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs) continue //
|
|
242
|
+
if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs) continue // trailing-answer grace
|
|
243
|
+
if (backgroundWorkActive && backgroundGraceMs > 0 && now - o.openedAt < backgroundGraceMs)
|
|
244
|
+
continue // in-flight autonomous work, bounded by the ceiling
|
|
206
245
|
if (best === undefined || o.openedAt < best.openedAt) best = o
|
|
207
246
|
}
|
|
208
247
|
return best
|
|
@@ -174,3 +174,25 @@ export function sweepStaleTurnActiveMarker(
|
|
|
174
174
|
return false;
|
|
175
175
|
}
|
|
176
176
|
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Age (ms) of the turn-active marker's mtime, or null if the marker is
|
|
180
|
+
* absent/unstattable. The marker is touched on every foreground tool_use AND
|
|
181
|
+
* (via the subagent-watcher, #501) on foreground sub-agent JSONL growth — so a
|
|
182
|
+
* SMALL age means the agent, or an orphaned/extended-autonomous foreground
|
|
183
|
+
* sub-agent that outlived its turn (#2240), is actively working RIGHT NOW, even
|
|
184
|
+
* though the turn-in-flight machine has gone idle. A large age (or null) means
|
|
185
|
+
* the work stopped or the marker leaked. Used by the obligation sweep to avoid a
|
|
186
|
+
* false "did I miss this? re-send" escalation while genuine post-turn work is in
|
|
187
|
+
* flight. Pure read; clock injectable for tests. Never throws — a stat failure
|
|
188
|
+
* is reported as null (treated as "not working").
|
|
189
|
+
*/
|
|
190
|
+
export function readTurnActiveMarkerAgeMs(stateDir: string, now?: number): number | null {
|
|
191
|
+
const path = join(stateDir, TURN_ACTIVE_MARKER_FILE);
|
|
192
|
+
try {
|
|
193
|
+
const st = statSync(path);
|
|
194
|
+
return (now ?? Date.now()) - st.mtimeMs;
|
|
195
|
+
} catch {
|
|
196
|
+
return null; // ENOENT / unstattable → not working
|
|
197
|
+
}
|
|
198
|
+
}
|
|
@@ -89,7 +89,13 @@ interface Sim {
|
|
|
89
89
|
steps: number;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
function runSchedule(
|
|
92
|
+
function runSchedule(
|
|
93
|
+
msgs: Msg[],
|
|
94
|
+
seed: number,
|
|
95
|
+
graceMs = 0,
|
|
96
|
+
bgGraceMs = 0,
|
|
97
|
+
bgAlwaysActive = false,
|
|
98
|
+
): Sim {
|
|
93
99
|
const PATH = "/state/agent/telegram/obligations.json";
|
|
94
100
|
const store = memStore();
|
|
95
101
|
let ledger = new ObligationLedger(MAX_REPRESENTS, {
|
|
@@ -148,12 +154,23 @@ function runSchedule(msgs: Msg[], seed: number, graceMs = 0): Sim {
|
|
|
148
154
|
threadId: 3,
|
|
149
155
|
messageId: m.msgId,
|
|
150
156
|
text: `msg ${m.id}`,
|
|
151
|
-
|
|
157
|
+
// When the background-work ceiling is exercised it is measured from
|
|
158
|
+
// openedAt against `clock`, so openedAt must live on the same virtual
|
|
159
|
+
// clock (the legacy proofs keep the tiny 1000+steps value — they never
|
|
160
|
+
// read openedAt against `now`).
|
|
161
|
+
openedAt: bgGraceMs > 0 ? clock : 1000 + steps,
|
|
152
162
|
});
|
|
153
163
|
deliverTurn(m.id); // original turn (attempt 0)
|
|
154
164
|
} else if (open) {
|
|
155
165
|
const decision =
|
|
156
|
-
graceMs > 0
|
|
166
|
+
graceMs > 0 || bgGraceMs > 0
|
|
167
|
+
? ledger.decideAtIdle({
|
|
168
|
+
now: clock,
|
|
169
|
+
graceMs,
|
|
170
|
+
backgroundWorkActive: bgGraceMs > 0 && bgAlwaysActive,
|
|
171
|
+
backgroundGraceMs: bgGraceMs,
|
|
172
|
+
})
|
|
173
|
+
: ledger.decideAtIdle();
|
|
157
174
|
if (decision.action === "none") {
|
|
158
175
|
// Every open obligation is within its grace window — the sweep waits.
|
|
159
176
|
// Advance the clock so grace deterministically expires; no livelock.
|
|
@@ -280,6 +297,49 @@ describe("obligation determinism — every inbound reaches a terminal, no silent
|
|
|
280
297
|
}
|
|
281
298
|
});
|
|
282
299
|
|
|
300
|
+
it("holds across 3000 schedules WITH background-work grace PERPETUALLY active (ceiling forces a terminal, never prevents one)", () => {
|
|
301
|
+
// The hardest case for the new bound: the agent appears to be doing
|
|
302
|
+
// autonomous sub-agent work for the ENTIRE run (backgroundWorkActive never
|
|
303
|
+
// clears). The ledger must still drive every obligation to its correct
|
|
304
|
+
// terminal — proving the OBLIGATION_BACKGROUND_WORK_GRACE_MS ceiling makes
|
|
305
|
+
// the suppression bounded BY CONSTRUCTION (no livelock, no silent loss), and
|
|
306
|
+
// that, like the trailing-answer grace, it only DELAYS: the terminal each
|
|
307
|
+
// message reaches is IDENTICAL to the no-grace run. If always-on terminates
|
|
308
|
+
// correctly, every intermittent work pattern does too (strictly less
|
|
309
|
+
// suppression).
|
|
310
|
+
const ANSWER = [0, 1, 2, 3, 99];
|
|
311
|
+
const ESCFAIL = [0, 1, 2, 3, 5];
|
|
312
|
+
const GRACE_MS = 45_000;
|
|
313
|
+
const BG_CEIL_MS = 20 * 60_000; // mirrors OBLIGATION_BACKGROUND_WORK_GRACE_MS default
|
|
314
|
+
for (let seed = 1; seed <= 3000; seed++) {
|
|
315
|
+
const r = rng(seed * 7919);
|
|
316
|
+
const n = 1 + Math.floor(r() * 5);
|
|
317
|
+
const msgs: Msg[] = [];
|
|
318
|
+
for (let i = 0; i < n; i++) {
|
|
319
|
+
const msgId = seed * 100 + i;
|
|
320
|
+
msgs.push({
|
|
321
|
+
id: `c:3#${msgId}`,
|
|
322
|
+
msgId,
|
|
323
|
+
answerOnAttempt: pick(ANSWER, r),
|
|
324
|
+
escalateFailsFor: pick(ESCFAIL, r),
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
const { terminals, steps } = runSchedule(msgs, seed * 104729, GRACE_MS, BG_CEIL_MS, true);
|
|
328
|
+
expect(steps).toBeLessThan(10_000); // ceiling forces progress — no bg-grace livelock
|
|
329
|
+
for (const m of msgs) {
|
|
330
|
+
const t = terminals.get(m.id);
|
|
331
|
+
expect(t, `bg seed=${seed} msg=${m.id} answer=${m.answerOnAttempt} escFail=${m.escalateFailsFor}`).toBeDefined();
|
|
332
|
+
if (m.answerOnAttempt <= MAX_REPRESENTS) {
|
|
333
|
+
expect(t).toBe("answered");
|
|
334
|
+
} else if (m.escalateFailsFor < ESCALATE_MAX) {
|
|
335
|
+
expect(t).toBe("escalation-delivered");
|
|
336
|
+
} else {
|
|
337
|
+
expect(t).toBe("escalation-give-up");
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
});
|
|
342
|
+
|
|
283
343
|
it("a delivered-but-unanswered obligation survives a restart and is escalated, not lost", () => {
|
|
284
344
|
// Deterministic single case: model NEVER answers, escalation succeeds first try,
|
|
285
345
|
// with a restart forced mid-life via a seed that triggers the 0.15 branch.
|
|
@@ -326,3 +326,88 @@ describe("ObligationLedger — escalate-grace window (over-escalation fix)", ()
|
|
|
326
326
|
expect(L.decideAtIdle({ now: 142000, graceMs: 45000 }).action).toBe("escalate");
|
|
327
327
|
});
|
|
328
328
|
});
|
|
329
|
+
|
|
330
|
+
describe("ObligationLedger — background-work grace (extended-autonomous fix, gymbro 2026-06-10)", () => {
|
|
331
|
+
function input(id: string, openedAt: number) {
|
|
332
|
+
return { originTurnId: id, chatId: "-100123", threadId: 3, messageId: Number(id.split("#").pop() ?? 0), text: "research liven", openedAt };
|
|
333
|
+
}
|
|
334
|
+
// 20-min ceiling, mirroring OBLIGATION_BACKGROUND_WORK_GRACE_MS default.
|
|
335
|
+
const CEIL = 20 * 60_000;
|
|
336
|
+
|
|
337
|
+
it("skips an obligation younger than the ceiling while background work is active", () => {
|
|
338
|
+
const L = new ObligationLedger();
|
|
339
|
+
L.openIfAbsent(input("c:3#1", 1000)); // opened at t=1000
|
|
340
|
+
// 5 min later, a worker is running → genuine work in flight → wait.
|
|
341
|
+
expect(
|
|
342
|
+
L.decideAtIdle({ now: 1000 + 5 * 60_000, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL }).action,
|
|
343
|
+
).toBe("none");
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
it("acts once the obligation crosses the ceiling EVEN IF work is still active (bounded — no silent drop)", () => {
|
|
347
|
+
const L = new ObligationLedger();
|
|
348
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
349
|
+
// 20m + 1s after openedAt, still flagged active → ceiling wins → act.
|
|
350
|
+
expect(
|
|
351
|
+
L.decideAtIdle({ now: 1000 + CEIL + 1000, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL }).action,
|
|
352
|
+
).toBe("represent");
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
it("backgroundWorkActive=false → no extra grace (pre-fix behaviour on this axis)", () => {
|
|
356
|
+
const L = new ObligationLedger();
|
|
357
|
+
L.openIfAbsent(input("c:3#1", 1000)); // still-queued, no turn end
|
|
358
|
+
expect(
|
|
359
|
+
L.decideAtIdle({ now: 2000, graceMs: 45000, backgroundWorkActive: false, backgroundGraceMs: CEIL }).action,
|
|
360
|
+
).toBe("represent");
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
it("backgroundGraceMs=0 (kill switch) → work signal ignored, acts immediately", () => {
|
|
364
|
+
const L = new ObligationLedger();
|
|
365
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
366
|
+
expect(
|
|
367
|
+
L.decideAtIdle({ now: 2000, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: 0 }).action,
|
|
368
|
+
).toBe("represent");
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
it("composes with the trailing-answer grace: both must clear before acting", () => {
|
|
372
|
+
const L = new ObligationLedger();
|
|
373
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
374
|
+
L.noteTurnEnded("c:3#1", 5000);
|
|
375
|
+
// turn-end grace cleared (60s later) but within bg ceiling + work active → still wait.
|
|
376
|
+
expect(
|
|
377
|
+
L.decideAtIdle({ now: 65000, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL }).action,
|
|
378
|
+
).toBe("none");
|
|
379
|
+
// same instant, work no longer active → trailing grace already clear → act.
|
|
380
|
+
expect(
|
|
381
|
+
L.decideAtIdle({ now: 65000, graceMs: 45000, backgroundWorkActive: false, backgroundGraceMs: CEIL }).action,
|
|
382
|
+
).toBe("represent");
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
it("picks the oldest ELIGIBLE: a young in-work obligation does not block an ancient one past the ceiling", () => {
|
|
386
|
+
const L = new ObligationLedger();
|
|
387
|
+
L.openIfAbsent(input("c:3#old", 1000)); // ancient
|
|
388
|
+
L.openIfAbsent(input("c:3#new", 1000 + CEIL)); // opened CEIL later
|
|
389
|
+
const now = 1000 + CEIL + 5000; // old is past ceiling; new is only 5s old
|
|
390
|
+
const d = L.decideAtIdle({ now, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL });
|
|
391
|
+
expect(d.action).toBe("represent");
|
|
392
|
+
expect(d.obligation?.originTurnId).toBe("c:3#old");
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
it("represent budget is preserved across the work window → resumes (not escalates) after a restart-kill", () => {
|
|
396
|
+
// Models the gymbro case: while the worker runs, the sweep must NOT burn the
|
|
397
|
+
// represent ladder. So after a restart kills the work (work now inactive),
|
|
398
|
+
// a never-represented obligation re-presents (resume) rather than escalates.
|
|
399
|
+
const L = new ObligationLedger(2);
|
|
400
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
401
|
+
// During the work window, every sweep is a no-op (no markRepresented called).
|
|
402
|
+
for (const t of [60_000, 120_000, 300_000]) {
|
|
403
|
+
expect(
|
|
404
|
+
L.decideAtIdle({ now: t, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL }).action,
|
|
405
|
+
).toBe("none");
|
|
406
|
+
}
|
|
407
|
+
expect(L.list()[0].representCount).toBe(0); // budget intact
|
|
408
|
+
// Restart kills the work; obligation hydrated with representCount 0 → resume.
|
|
409
|
+
expect(
|
|
410
|
+
L.decideAtIdle({ now: 360_000, graceMs: 45000, backgroundWorkActive: false, backgroundGraceMs: CEIL }).action,
|
|
411
|
+
).toBe("represent");
|
|
412
|
+
});
|
|
413
|
+
});
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
touchTurnActiveMarker,
|
|
15
15
|
removeTurnActiveMarker,
|
|
16
16
|
sweepStaleTurnActiveMarker,
|
|
17
|
+
readTurnActiveMarkerAgeMs,
|
|
17
18
|
} from '../gateway/turn-active-marker.js'
|
|
18
19
|
|
|
19
20
|
describe('turn-active-marker (#412)', () => {
|
|
@@ -192,4 +193,31 @@ describe('turn-active-marker (#412)', () => {
|
|
|
192
193
|
const mode = statSync(path).mode & 0o777
|
|
193
194
|
expect(mode).toBe(0o600)
|
|
194
195
|
})
|
|
196
|
+
|
|
197
|
+
// readTurnActiveMarkerAgeMs — the orphaned-foreground "agent still working"
|
|
198
|
+
// signal for the obligation sweep (#2240 / gymbro 2026-06-10).
|
|
199
|
+
it('readTurnActiveMarkerAgeMs returns null when the marker is absent', () => {
|
|
200
|
+
expect(readTurnActiveMarkerAgeMs(tmp)).toBeNull()
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
it('readTurnActiveMarkerAgeMs returns a small age for a fresh marker', () => {
|
|
204
|
+
writeTurnActiveMarker(tmp, { turnKey: 'k', chatId: 'c', threadId: null, startedAt: 1 })
|
|
205
|
+
const age = readTurnActiveMarkerAgeMs(tmp)
|
|
206
|
+
expect(age).not.toBeNull()
|
|
207
|
+
// |age| is tiny for a just-written marker. It can be a hair negative when the
|
|
208
|
+
// filesystem mtime resolves slightly ahead of Date.now() — that's fine; what
|
|
209
|
+
// matters for the freshness signal is the small magnitude.
|
|
210
|
+
expect(Math.abs(age!)).toBeLessThan(5_000)
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
it('readTurnActiveMarkerAgeMs reflects a stale (back-dated) mtime against an injected clock', () => {
|
|
214
|
+
const path = join(tmp, TURN_ACTIVE_MARKER_FILE)
|
|
215
|
+
writeTurnActiveMarker(tmp, { turnKey: 'k', chatId: 'c', threadId: null, startedAt: 1 })
|
|
216
|
+
const tenMinAgo = new Date(Date.now() - 10 * 60_000)
|
|
217
|
+
utimesSync(path, tenMinAgo, tenMinAgo)
|
|
218
|
+
const now = tenMinAgo.getTime() + 10 * 60_000
|
|
219
|
+
const age = readTurnActiveMarkerAgeMs(tmp, now)
|
|
220
|
+
expect(age).not.toBeNull()
|
|
221
|
+
expect(Math.abs(age! - 10 * 60_000)).toBeLessThan(50) // ~10 min old
|
|
222
|
+
})
|
|
195
223
|
})
|