switchroom 0.14.91 → 0.14.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +1030 -56
- package/dist/auth-broker/index.js +50 -3
- package/dist/cli/notion-write-pretool.mjs +50 -3
- package/dist/cli/switchroom.js +1239 -906
- package/dist/host-control/main.js +50 -3
- package/dist/vault/approvals/kernel-server.js +51 -4
- package/dist/vault/broker/server.js +51 -4
- package/package.json +1 -1
- package/profiles/_base/cron-session.sh.hbs +77 -0
- package/profiles/_base/start.sh.hbs +13 -0
- package/telegram-plugin/dist/gateway/gateway.js +139 -19
- package/telegram-plugin/gateway/cron-session.ts +45 -0
- package/telegram-plugin/gateway/gateway.ts +133 -10
- package/telegram-plugin/gateway/obligation-ledger.ts +47 -8
- package/telegram-plugin/gateway/turn-active-marker.ts +22 -0
- package/telegram-plugin/tests/cron-session.test.ts +32 -0
- package/telegram-plugin/tests/obligation-determinism.test.ts +63 -3
- package/telegram-plugin/tests/obligation-ledger.test.ts +85 -0
- package/telegram-plugin/tests/turn-active-marker.test.ts +28 -0
|
@@ -174,3 +174,25 @@ export function sweepStaleTurnActiveMarker(
|
|
|
174
174
|
return false;
|
|
175
175
|
}
|
|
176
176
|
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Age (ms) of the turn-active marker's mtime, or null if the marker is
|
|
180
|
+
* absent/unstattable. The marker is touched on every foreground tool_use AND
|
|
181
|
+
* (via the subagent-watcher, #501) on foreground sub-agent JSONL growth — so a
|
|
182
|
+
* SMALL age means the agent, or an orphaned/extended-autonomous foreground
|
|
183
|
+
* sub-agent that outlived its turn (#2240), is actively working RIGHT NOW, even
|
|
184
|
+
* though the turn-in-flight machine has gone idle. A large age (or null) means
|
|
185
|
+
* the work stopped or the marker leaked. Used by the obligation sweep to avoid a
|
|
186
|
+
* false "did I miss this? re-send" escalation while genuine post-turn work is in
|
|
187
|
+
* flight. Pure read; clock injectable for tests. Never throws — a stat failure
|
|
188
|
+
* is reported as null (treated as "not working").
|
|
189
|
+
*/
|
|
190
|
+
export function readTurnActiveMarkerAgeMs(stateDir: string, now?: number): number | null {
|
|
191
|
+
const path = join(stateDir, TURN_ACTIVE_MARKER_FILE);
|
|
192
|
+
try {
|
|
193
|
+
const st = statSync(path);
|
|
194
|
+
return (now ?? Date.now()) - st.mtimeMs;
|
|
195
|
+
} catch {
|
|
196
|
+
return null; // ENOENT / unstattable → not working
|
|
197
|
+
}
|
|
198
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
CRON_IDENTITY_SUFFIX,
|
|
4
|
+
baseAgent,
|
|
5
|
+
cronIdentity,
|
|
6
|
+
isCronIdentity,
|
|
7
|
+
resolveInjectTarget,
|
|
8
|
+
} from '../gateway/cron-session.js'
|
|
9
|
+
|
|
10
|
+
describe('cron-session identity helpers', () => {
|
|
11
|
+
it('derives and detects the cron identity', () => {
|
|
12
|
+
expect(cronIdentity('clerk')).toBe(`clerk${CRON_IDENTITY_SUFFIX}`)
|
|
13
|
+
expect(isCronIdentity('clerk-cron')).toBe(true)
|
|
14
|
+
expect(isCronIdentity('clerk')).toBe(false)
|
|
15
|
+
expect(isCronIdentity(null)).toBe(false)
|
|
16
|
+
expect(isCronIdentity(undefined)).toBe(false)
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
it('round-trips base agent', () => {
|
|
20
|
+
expect(baseAgent(cronIdentity('marko'))).toBe('marko')
|
|
21
|
+
expect(baseAgent('marko')).toBe('marko')
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
it('resolveInjectTarget routes only meta.session=cron to the cron bridge', () => {
|
|
25
|
+
expect(resolveInjectTarget('clerk', { session: 'cron', source: 'cron' })).toBe('clerk-cron')
|
|
26
|
+
expect(resolveInjectTarget('clerk', { session: 'main', source: 'cron' })).toBe('clerk')
|
|
27
|
+
expect(resolveInjectTarget('clerk', { source: 'cron' })).toBe('clerk')
|
|
28
|
+
expect(resolveInjectTarget('clerk', undefined)).toBe('clerk')
|
|
29
|
+
// back-compat: every legacy caller (no session) is unchanged.
|
|
30
|
+
expect(resolveInjectTarget('clerk', { source: 'telegram' })).toBe('clerk')
|
|
31
|
+
})
|
|
32
|
+
})
|
|
@@ -89,7 +89,13 @@ interface Sim {
|
|
|
89
89
|
steps: number;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
function runSchedule(
|
|
92
|
+
function runSchedule(
|
|
93
|
+
msgs: Msg[],
|
|
94
|
+
seed: number,
|
|
95
|
+
graceMs = 0,
|
|
96
|
+
bgGraceMs = 0,
|
|
97
|
+
bgAlwaysActive = false,
|
|
98
|
+
): Sim {
|
|
93
99
|
const PATH = "/state/agent/telegram/obligations.json";
|
|
94
100
|
const store = memStore();
|
|
95
101
|
let ledger = new ObligationLedger(MAX_REPRESENTS, {
|
|
@@ -148,12 +154,23 @@ function runSchedule(msgs: Msg[], seed: number, graceMs = 0): Sim {
|
|
|
148
154
|
threadId: 3,
|
|
149
155
|
messageId: m.msgId,
|
|
150
156
|
text: `msg ${m.id}`,
|
|
151
|
-
|
|
157
|
+
// When the background-work ceiling is exercised it is measured from
|
|
158
|
+
// openedAt against `clock`, so openedAt must live on the same virtual
|
|
159
|
+
// clock (the legacy proofs keep the tiny 1000+steps value — they never
|
|
160
|
+
// read openedAt against `now`).
|
|
161
|
+
openedAt: bgGraceMs > 0 ? clock : 1000 + steps,
|
|
152
162
|
});
|
|
153
163
|
deliverTurn(m.id); // original turn (attempt 0)
|
|
154
164
|
} else if (open) {
|
|
155
165
|
const decision =
|
|
156
|
-
graceMs > 0
|
|
166
|
+
graceMs > 0 || bgGraceMs > 0
|
|
167
|
+
? ledger.decideAtIdle({
|
|
168
|
+
now: clock,
|
|
169
|
+
graceMs,
|
|
170
|
+
backgroundWorkActive: bgGraceMs > 0 && bgAlwaysActive,
|
|
171
|
+
backgroundGraceMs: bgGraceMs,
|
|
172
|
+
})
|
|
173
|
+
: ledger.decideAtIdle();
|
|
157
174
|
if (decision.action === "none") {
|
|
158
175
|
// Every open obligation is within its grace window — the sweep waits.
|
|
159
176
|
// Advance the clock so grace deterministically expires; no livelock.
|
|
@@ -280,6 +297,49 @@ describe("obligation determinism — every inbound reaches a terminal, no silent
|
|
|
280
297
|
}
|
|
281
298
|
});
|
|
282
299
|
|
|
300
|
+
it("holds across 3000 schedules WITH background-work grace PERPETUALLY active (ceiling forces a terminal, never prevents one)", () => {
|
|
301
|
+
// The hardest case for the new bound: the agent appears to be doing
|
|
302
|
+
// autonomous sub-agent work for the ENTIRE run (backgroundWorkActive never
|
|
303
|
+
// clears). The ledger must still drive every obligation to its correct
|
|
304
|
+
// terminal — proving the OBLIGATION_BACKGROUND_WORK_GRACE_MS ceiling makes
|
|
305
|
+
// the suppression bounded BY CONSTRUCTION (no livelock, no silent loss), and
|
|
306
|
+
// that, like the trailing-answer grace, it only DELAYS: the terminal each
|
|
307
|
+
// message reaches is IDENTICAL to the no-grace run. If always-on terminates
|
|
308
|
+
// correctly, every intermittent work pattern does too (strictly less
|
|
309
|
+
// suppression).
|
|
310
|
+
const ANSWER = [0, 1, 2, 3, 99];
|
|
311
|
+
const ESCFAIL = [0, 1, 2, 3, 5];
|
|
312
|
+
const GRACE_MS = 45_000;
|
|
313
|
+
const BG_CEIL_MS = 20 * 60_000; // mirrors OBLIGATION_BACKGROUND_WORK_GRACE_MS default
|
|
314
|
+
for (let seed = 1; seed <= 3000; seed++) {
|
|
315
|
+
const r = rng(seed * 7919);
|
|
316
|
+
const n = 1 + Math.floor(r() * 5);
|
|
317
|
+
const msgs: Msg[] = [];
|
|
318
|
+
for (let i = 0; i < n; i++) {
|
|
319
|
+
const msgId = seed * 100 + i;
|
|
320
|
+
msgs.push({
|
|
321
|
+
id: `c:3#${msgId}`,
|
|
322
|
+
msgId,
|
|
323
|
+
answerOnAttempt: pick(ANSWER, r),
|
|
324
|
+
escalateFailsFor: pick(ESCFAIL, r),
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
const { terminals, steps } = runSchedule(msgs, seed * 104729, GRACE_MS, BG_CEIL_MS, true);
|
|
328
|
+
expect(steps).toBeLessThan(10_000); // ceiling forces progress — no bg-grace livelock
|
|
329
|
+
for (const m of msgs) {
|
|
330
|
+
const t = terminals.get(m.id);
|
|
331
|
+
expect(t, `bg seed=${seed} msg=${m.id} answer=${m.answerOnAttempt} escFail=${m.escalateFailsFor}`).toBeDefined();
|
|
332
|
+
if (m.answerOnAttempt <= MAX_REPRESENTS) {
|
|
333
|
+
expect(t).toBe("answered");
|
|
334
|
+
} else if (m.escalateFailsFor < ESCALATE_MAX) {
|
|
335
|
+
expect(t).toBe("escalation-delivered");
|
|
336
|
+
} else {
|
|
337
|
+
expect(t).toBe("escalation-give-up");
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
});
|
|
342
|
+
|
|
283
343
|
it("a delivered-but-unanswered obligation survives a restart and is escalated, not lost", () => {
|
|
284
344
|
// Deterministic single case: model NEVER answers, escalation succeeds first try,
|
|
285
345
|
// with a restart forced mid-life via a seed that triggers the 0.15 branch.
|
|
@@ -326,3 +326,88 @@ describe("ObligationLedger — escalate-grace window (over-escalation fix)", ()
|
|
|
326
326
|
expect(L.decideAtIdle({ now: 142000, graceMs: 45000 }).action).toBe("escalate");
|
|
327
327
|
});
|
|
328
328
|
});
|
|
329
|
+
|
|
330
|
+
describe("ObligationLedger — background-work grace (extended-autonomous fix, gymbro 2026-06-10)", () => {
|
|
331
|
+
function input(id: string, openedAt: number) {
|
|
332
|
+
return { originTurnId: id, chatId: "-100123", threadId: 3, messageId: Number(id.split("#").pop() ?? 0), text: "research liven", openedAt };
|
|
333
|
+
}
|
|
334
|
+
// 20-min ceiling, mirroring OBLIGATION_BACKGROUND_WORK_GRACE_MS default.
|
|
335
|
+
const CEIL = 20 * 60_000;
|
|
336
|
+
|
|
337
|
+
it("skips an obligation younger than the ceiling while background work is active", () => {
|
|
338
|
+
const L = new ObligationLedger();
|
|
339
|
+
L.openIfAbsent(input("c:3#1", 1000)); // opened at t=1000
|
|
340
|
+
// 5 min later, a worker is running → genuine work in flight → wait.
|
|
341
|
+
expect(
|
|
342
|
+
L.decideAtIdle({ now: 1000 + 5 * 60_000, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL }).action,
|
|
343
|
+
).toBe("none");
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
it("acts once the obligation crosses the ceiling EVEN IF work is still active (bounded — no silent drop)", () => {
|
|
347
|
+
const L = new ObligationLedger();
|
|
348
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
349
|
+
// 20m + 1s after openedAt, still flagged active → ceiling wins → act.
|
|
350
|
+
expect(
|
|
351
|
+
L.decideAtIdle({ now: 1000 + CEIL + 1000, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL }).action,
|
|
352
|
+
).toBe("represent");
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
it("backgroundWorkActive=false → no extra grace (pre-fix behaviour on this axis)", () => {
|
|
356
|
+
const L = new ObligationLedger();
|
|
357
|
+
L.openIfAbsent(input("c:3#1", 1000)); // still-queued, no turn end
|
|
358
|
+
expect(
|
|
359
|
+
L.decideAtIdle({ now: 2000, graceMs: 45000, backgroundWorkActive: false, backgroundGraceMs: CEIL }).action,
|
|
360
|
+
).toBe("represent");
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
it("backgroundGraceMs=0 (kill switch) → work signal ignored, acts immediately", () => {
|
|
364
|
+
const L = new ObligationLedger();
|
|
365
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
366
|
+
expect(
|
|
367
|
+
L.decideAtIdle({ now: 2000, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: 0 }).action,
|
|
368
|
+
).toBe("represent");
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
it("composes with the trailing-answer grace: both must clear before acting", () => {
|
|
372
|
+
const L = new ObligationLedger();
|
|
373
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
374
|
+
L.noteTurnEnded("c:3#1", 5000);
|
|
375
|
+
// turn-end grace cleared (60s later) but within bg ceiling + work active → still wait.
|
|
376
|
+
expect(
|
|
377
|
+
L.decideAtIdle({ now: 65000, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL }).action,
|
|
378
|
+
).toBe("none");
|
|
379
|
+
// same instant, work no longer active → trailing grace already clear → act.
|
|
380
|
+
expect(
|
|
381
|
+
L.decideAtIdle({ now: 65000, graceMs: 45000, backgroundWorkActive: false, backgroundGraceMs: CEIL }).action,
|
|
382
|
+
).toBe("represent");
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
it("picks the oldest ELIGIBLE: a young in-work obligation does not block an ancient one past the ceiling", () => {
|
|
386
|
+
const L = new ObligationLedger();
|
|
387
|
+
L.openIfAbsent(input("c:3#old", 1000)); // ancient
|
|
388
|
+
L.openIfAbsent(input("c:3#new", 1000 + CEIL)); // opened CEIL later
|
|
389
|
+
const now = 1000 + CEIL + 5000; // old is past ceiling; new is only 5s old
|
|
390
|
+
const d = L.decideAtIdle({ now, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL });
|
|
391
|
+
expect(d.action).toBe("represent");
|
|
392
|
+
expect(d.obligation?.originTurnId).toBe("c:3#old");
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
it("represent budget is preserved across the work window → resumes (not escalates) after a restart-kill", () => {
|
|
396
|
+
// Models the gymbro case: while the worker runs, the sweep must NOT burn the
|
|
397
|
+
// represent ladder. So after a restart kills the work (work now inactive),
|
|
398
|
+
// a never-represented obligation re-presents (resume) rather than escalates.
|
|
399
|
+
const L = new ObligationLedger(2);
|
|
400
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
401
|
+
// During the work window, every sweep is a no-op (no markRepresented called).
|
|
402
|
+
for (const t of [60_000, 120_000, 300_000]) {
|
|
403
|
+
expect(
|
|
404
|
+
L.decideAtIdle({ now: t, graceMs: 45000, backgroundWorkActive: true, backgroundGraceMs: CEIL }).action,
|
|
405
|
+
).toBe("none");
|
|
406
|
+
}
|
|
407
|
+
expect(L.list()[0].representCount).toBe(0); // budget intact
|
|
408
|
+
// Restart kills the work; obligation hydrated with representCount 0 → resume.
|
|
409
|
+
expect(
|
|
410
|
+
L.decideAtIdle({ now: 360_000, graceMs: 45000, backgroundWorkActive: false, backgroundGraceMs: CEIL }).action,
|
|
411
|
+
).toBe("represent");
|
|
412
|
+
});
|
|
413
|
+
});
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
touchTurnActiveMarker,
|
|
15
15
|
removeTurnActiveMarker,
|
|
16
16
|
sweepStaleTurnActiveMarker,
|
|
17
|
+
readTurnActiveMarkerAgeMs,
|
|
17
18
|
} from '../gateway/turn-active-marker.js'
|
|
18
19
|
|
|
19
20
|
describe('turn-active-marker (#412)', () => {
|
|
@@ -192,4 +193,31 @@ describe('turn-active-marker (#412)', () => {
|
|
|
192
193
|
const mode = statSync(path).mode & 0o777
|
|
193
194
|
expect(mode).toBe(0o600)
|
|
194
195
|
})
|
|
196
|
+
|
|
197
|
+
// readTurnActiveMarkerAgeMs — the orphaned-foreground "agent still working"
|
|
198
|
+
// signal for the obligation sweep (#2240 / gymbro 2026-06-10).
|
|
199
|
+
it('readTurnActiveMarkerAgeMs returns null when the marker is absent', () => {
|
|
200
|
+
expect(readTurnActiveMarkerAgeMs(tmp)).toBeNull()
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
it('readTurnActiveMarkerAgeMs returns a small age for a fresh marker', () => {
|
|
204
|
+
writeTurnActiveMarker(tmp, { turnKey: 'k', chatId: 'c', threadId: null, startedAt: 1 })
|
|
205
|
+
const age = readTurnActiveMarkerAgeMs(tmp)
|
|
206
|
+
expect(age).not.toBeNull()
|
|
207
|
+
// |age| is tiny for a just-written marker. It can be a hair negative when the
|
|
208
|
+
// filesystem mtime resolves slightly ahead of Date.now() — that's fine; what
|
|
209
|
+
// matters for the freshness signal is the small magnitude.
|
|
210
|
+
expect(Math.abs(age!)).toBeLessThan(5_000)
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
it('readTurnActiveMarkerAgeMs reflects a stale (back-dated) mtime against an injected clock', () => {
|
|
214
|
+
const path = join(tmp, TURN_ACTIVE_MARKER_FILE)
|
|
215
|
+
writeTurnActiveMarker(tmp, { turnKey: 'k', chatId: 'c', threadId: null, startedAt: 1 })
|
|
216
|
+
const tenMinAgo = new Date(Date.now() - 10 * 60_000)
|
|
217
|
+
utimesSync(path, tenMinAgo, tenMinAgo)
|
|
218
|
+
const now = tenMinAgo.getTime() + 10 * 60_000
|
|
219
|
+
const age = readTurnActiveMarkerAgeMs(tmp, now)
|
|
220
|
+
expect(age).not.toBeNull()
|
|
221
|
+
expect(Math.abs(age! - 10 * 60_000)).toBeLessThan(50) // ~10 min old
|
|
222
|
+
})
|
|
195
223
|
})
|