switchroom 0.12.16 → 0.12.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +82 -81
- package/dist/auth-broker/index.js +82 -81
- package/dist/cli/drive-write-pretool.mjs +10 -10
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +359 -358
- package/dist/host-control/main.js +101 -100
- package/dist/vault/approvals/kernel-server.js +84 -83
- package/dist/vault/broker/server.js +85 -84
- package/package.json +1 -1
- package/telegram-plugin/bridge/bridge.ts +7 -0
- package/telegram-plugin/dist/bridge/bridge.js +115 -113
- package/telegram-plugin/dist/gateway/gateway.js +461 -287
- package/telegram-plugin/dist/server.js +163 -161
- package/telegram-plugin/gateway/gateway.ts +111 -4
- package/telegram-plugin/gateway/ipc-protocol.ts +9 -0
- package/telegram-plugin/gateway/proactive-compact.ts +84 -0
- package/telegram-plugin/tests/proactive-compact.test.ts +101 -0
|
@@ -228,6 +228,8 @@ import { handleInjectCommand } from './inject-handler.js'
|
|
|
228
228
|
import { type BannerState } from '../slot-banner.js'
|
|
229
229
|
import { refreshBanner } from '../slot-banner-driver.js'
|
|
230
230
|
import { loadConfig as loadSwitchroomConfig } from '../../src/config/loader.js'; import { resolveAgentConfig } from '../../src/config/merge.js'
|
|
231
|
+
import { readTurnUsages } from '../../src/agents/perf.js'
|
|
232
|
+
import { decideProactiveCompact, initialCompactState, type CompactState } from './proactive-compact.js'
|
|
231
233
|
import {
|
|
232
234
|
tryHostdDispatch,
|
|
233
235
|
hostdRequestId,
|
|
@@ -1062,6 +1064,28 @@ const chatAvailableReactions = new Map<string, Set<string> | null>()
|
|
|
1062
1064
|
const chatProbesInFlight = new Set<string>()
|
|
1063
1065
|
const activeTurnStartedAt = new Map<string, number>()
|
|
1064
1066
|
const pendingRestarts = new Map<string, number>() // agentName -> timestamp when restart was requested
|
|
1067
|
+
|
|
1068
|
+
// ─── Proactive context compaction (session.max_context_tokens) ──────────
|
|
1069
|
+
//
|
|
1070
|
+
// Opt-in: when the resolved agent config sets session.max_context_tokens,
|
|
1071
|
+
// we fire `/compact` once the live context-window occupancy of the latest
|
|
1072
|
+
// assistant turn reaches that many tokens. Evaluated ONLY at the
|
|
1073
|
+
// model-idle gate inside purgeReactionTracking (activeTurnStartedAt.size
|
|
1074
|
+
// === 0) — never mid-turn — mirroring the pendingRestarts drain. The
|
|
1075
|
+
// `/compact` verb is allowlisted in src/agents/inject.ts and runs via the
|
|
1076
|
+
// tmux send-keys path (the only path that actually executes the slash
|
|
1077
|
+
// command; inject_inbound would deliver it as literal text).
|
|
1078
|
+
//
|
|
1079
|
+
// `lastSessionActiveFile` is the session-tail's tracked currentFile,
|
|
1080
|
+
// forwarded by the bridge on every session_event — we read occupancy from
|
|
1081
|
+
// exactly that file (never an independent findActiveSessionFile re-scan).
|
|
1082
|
+
let lastSessionActiveFile: string | null = null
|
|
1083
|
+
// Anti-spam state machine lives in ./proactive-compact (pure, unit
|
|
1084
|
+
// tested). `compactDispatching` is a synchronous re-entrancy guard for
|
|
1085
|
+
// the async tmux send — purgeReactionTracking can run several times per
|
|
1086
|
+
// turn and we must not double-dispatch before the first send settles.
|
|
1087
|
+
let compactState: CompactState = initialCompactState()
|
|
1088
|
+
let compactDispatching = false
|
|
1065
1089
|
const activeDraftStreams = new Map<string, DraftStreamHandle>()
|
|
1066
1090
|
const activeDraftParseModes = new Map<string, 'HTML' | 'MarkdownV2' | undefined>()
|
|
1067
1091
|
const suppressPtyPreview = new Set<string>()
|
|
@@ -1233,14 +1257,94 @@ function purgeReactionTracking(key: string): void {
|
|
|
1233
1257
|
// survives us getting killed by our own restart. Fire-and-forget;
|
|
1234
1258
|
// response to the client was already sent when the restart was
|
|
1235
1259
|
// scheduled, so nobody is waiting on this.
|
|
1236
|
-
if (activeTurnStartedAt.size === 0
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1260
|
+
if (activeTurnStartedAt.size === 0) {
|
|
1261
|
+
if (pendingRestarts.size > 0) {
|
|
1262
|
+
for (const [agentName, _timestamp] of pendingRestarts.entries()) {
|
|
1263
|
+
triggerSelfRestart(agentName, 'turn-complete-pending-restart');
|
|
1264
|
+
pendingRestarts.delete(agentName);
|
|
1265
|
+
}
|
|
1266
|
+
} else {
|
|
1267
|
+
// Strictly lower priority than a pending restart: if we just
|
|
1268
|
+
// kicked a restart the process is going away and compacting is
|
|
1269
|
+
// moot, so only evaluate when no restart drained this pass.
|
|
1270
|
+
maybeProactiveCompact();
|
|
1240
1271
|
}
|
|
1241
1272
|
}
|
|
1242
1273
|
}
|
|
1243
1274
|
|
|
1275
|
+
/**
|
|
1276
|
+
* Model-idle proactive-compaction check. Called ONLY from the
|
|
1277
|
+
* activeTurnStartedAt.size === 0 gate above (never mid-turn). Opt-in via
|
|
1278
|
+
* the resolved agent config's session.max_context_tokens; a no-op when
|
|
1279
|
+
* unset, so a fresh `switchroom setup` is unchanged.
|
|
1280
|
+
*
|
|
1281
|
+
* Occupancy = the latest usage-bearing assistant turn's
|
|
1282
|
+
* input + cache_read + cache_creation tokens (the prefix the model
|
|
1283
|
+
* actually re-read this turn ≈ current window fill). readTurnUsages(_,1)
|
|
1284
|
+
* returns exactly that single turn and skips tool-only / usage-less
|
|
1285
|
+
* lines, so we never under-count off a sub-line.
|
|
1286
|
+
*
|
|
1287
|
+
* Note (accepted, benign): there is a check-to-send race — a new inbound
|
|
1288
|
+
* could set activeTurnStartedAt between this idle check and the async
|
|
1289
|
+
* tmux send. A `/compact` that lands as a new turn starts is queued in
|
|
1290
|
+
* claude's prompt buffer and runs at the next idle prompt (see the
|
|
1291
|
+
* FUTURE-GAP note in src/agents/inject.ts); it is not a mid-generation
|
|
1292
|
+
* injection. We do not claim size===0 is atomic.
|
|
1293
|
+
*/
|
|
1294
|
+
function maybeProactiveCompact(): void {
|
|
1295
|
+
if (compactDispatching) return;
|
|
1296
|
+
|
|
1297
|
+
const agentName = process.env.SWITCHROOM_AGENT_NAME;
|
|
1298
|
+
if (!agentName) return;
|
|
1299
|
+
|
|
1300
|
+
let cap: number | undefined;
|
|
1301
|
+
try {
|
|
1302
|
+
const cfg = loadSwitchroomConfig();
|
|
1303
|
+
// Resolve through the cascade so a fleet-wide
|
|
1304
|
+
// `defaults.session.max_context_tokens` applies even when the agent
|
|
1305
|
+
// has no explicit per-agent session block (rawAgent → {}).
|
|
1306
|
+
const rawAgent = cfg.agents?.[agentName] ?? {};
|
|
1307
|
+
const resolved = resolveAgentConfig(cfg.defaults, cfg.profiles, rawAgent);
|
|
1308
|
+
cap = resolved.session?.max_context_tokens;
|
|
1309
|
+
} catch {
|
|
1310
|
+
// Best-effort — config may be unreadable in odd boot states; a
|
|
1311
|
+
// failed read just means "no proactive compaction this pass".
|
|
1312
|
+
return;
|
|
1313
|
+
}
|
|
1314
|
+
if (cap == null || cap <= 0) return; // opt-in: unset → native compaction only
|
|
1315
|
+
|
|
1316
|
+
const file = lastSessionActiveFile;
|
|
1317
|
+
if (!file) return;
|
|
1318
|
+
|
|
1319
|
+
const turns = readTurnUsages(file, 1);
|
|
1320
|
+
if (turns.length === 0) return;
|
|
1321
|
+
const t = turns[0];
|
|
1322
|
+
const occupancy = t.input + t.cacheRead + t.cacheCreate;
|
|
1323
|
+
|
|
1324
|
+
const decision = decideProactiveCompact(compactState, occupancy, cap);
|
|
1325
|
+
compactState = decision.state;
|
|
1326
|
+
if (!decision.fire) return;
|
|
1327
|
+
|
|
1328
|
+
// Set the re-entrancy guard synchronously BEFORE the await so a
|
|
1329
|
+
// re-entrant purge pass can't double-dispatch (the decider already
|
|
1330
|
+
// disarmed + armed the cooldown in decision.state).
|
|
1331
|
+
compactDispatching = true;
|
|
1332
|
+
process.stderr.write(
|
|
1333
|
+
`telegram gateway: proactive /compact for ${agentName} ` +
|
|
1334
|
+
`(occupancy=${occupancy} >= cap=${cap})\n`,
|
|
1335
|
+
);
|
|
1336
|
+
void injectSlashCommandImpl(agentName, '/compact')
|
|
1337
|
+
.catch((err: unknown) => {
|
|
1338
|
+
process.stderr.write(
|
|
1339
|
+
`telegram gateway: proactive /compact inject failed for ` +
|
|
1340
|
+
`${agentName}: ${err instanceof Error ? err.message : String(err)}\n`,
|
|
1341
|
+
);
|
|
1342
|
+
})
|
|
1343
|
+
.finally(() => {
|
|
1344
|
+
compactDispatching = false;
|
|
1345
|
+
});
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1244
1348
|
function endStatusReaction(chatId: string, threadId: number | undefined, outcome: 'done' | 'error'): void {
|
|
1245
1349
|
const key = statusKey(chatId, threadId)
|
|
1246
1350
|
const ctrl = activeStatusReactions.get(key)
|
|
@@ -2997,6 +3101,9 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
2997
3101
|
},
|
|
2998
3102
|
|
|
2999
3103
|
onSessionEvent(_client: IpcClient, msg: SessionEventForward) {
|
|
3104
|
+
// Track the session-tail's attached file for the proactive-
|
|
3105
|
+
// compaction occupancy read (see maybeProactiveCompact).
|
|
3106
|
+
if (msg.activeFile) lastSessionActiveFile = msg.activeFile
|
|
3000
3107
|
const ev = msg.event as unknown as SessionEvent
|
|
3001
3108
|
// Pass the envelope's chatId so non-enqueue events can route to the
|
|
3002
3109
|
// correct card even when the driver's currentChatId is stale.
|
|
@@ -121,6 +121,15 @@ export interface SessionEventForward {
|
|
|
121
121
|
event: Record<string, unknown>;
|
|
122
122
|
chatId: string;
|
|
123
123
|
threadId?: number;
|
|
124
|
+
/**
|
|
125
|
+
* The session-tail's currently-attached JSONL path (its tracked
|
|
126
|
+
* `currentFile`, not an independent re-scan). Forwarded so the
|
|
127
|
+
* gateway's proactive-compaction check reads occupancy from the
|
|
128
|
+
* exact file the tailer is on — avoids the sub-agent-mtime /
|
|
129
|
+
* stale-rotation wrong-file hazard. Absent until the tailer has
|
|
130
|
+
* attached a file.
|
|
131
|
+
*/
|
|
132
|
+
activeFile?: string;
|
|
124
133
|
}
|
|
125
134
|
|
|
126
135
|
export interface PermissionRequestForward {
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure decision core for proactive context compaction
|
|
3
|
+
* (`session.max_context_tokens`). Kept side-effect-free so the
|
|
4
|
+
* anti-spam state machine — the part most prone to livelock /
|
|
5
|
+
* double-fire — is unit-testable in isolation. The impure shell
|
|
6
|
+
* (config load, session-file read, tmux `/compact` inject) lives in
|
|
7
|
+
* gateway.ts and calls `decideProactiveCompact` at the model-idle gate.
|
|
8
|
+
*
|
|
9
|
+
* Occupancy fed in by the caller = the latest usage-bearing assistant
|
|
10
|
+
* turn's `input + cache_read + cache_creation` tokens (the prefix the
|
|
11
|
+
* model re-read this turn ≈ live context-window fill). Not cumulative.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
/** Hysteresis lower band: re-arm only once occupancy < fraction × cap. */
|
|
15
|
+
export const COMPACT_REARM_FRACTION = 0.6;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Turn-count re-fire floor: after a fire, skip this many idle
|
|
19
|
+
* evaluations regardless of occupancy. Guards against a slow
|
|
20
|
+
* post-`/compact` JSONL rotation still reading the pre-compact turn and
|
|
21
|
+
* triggering an immediate second compaction.
|
|
22
|
+
*/
|
|
23
|
+
export const COMPACT_COOLDOWN_TURNS = 3;
|
|
24
|
+
|
|
25
|
+
export interface CompactState {
|
|
26
|
+
/** False after a fire; re-armed only below the hysteresis band. */
|
|
27
|
+
armed: boolean;
|
|
28
|
+
/** Idle evaluations remaining before re-fire is even considered. */
|
|
29
|
+
cooldownTurns: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface CompactDecision {
|
|
33
|
+
fire: boolean;
|
|
34
|
+
/** Next state — caller must persist this verbatim. */
|
|
35
|
+
state: CompactState;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function initialCompactState(): CompactState {
|
|
39
|
+
return { armed: true, cooldownTurns: 0 };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Decide whether to fire `/compact` this idle evaluation, given the
|
|
44
|
+
* current state, the measured occupancy, and the configured cap.
|
|
45
|
+
*
|
|
46
|
+
* Precedence (each returns early):
|
|
47
|
+
* 1. Cooldown floor — burn one turn, never fire.
|
|
48
|
+
* 2. Disarmed — re-arm iff occupancy < REARM_FRACTION × cap; never
|
|
49
|
+
* fire on the arming pass (so we can't arm and fire together).
|
|
50
|
+
* 3. Below cap — hold.
|
|
51
|
+
* 4. Armed and at/above cap — fire, disarm, start the cooldown.
|
|
52
|
+
*
|
|
53
|
+
* Livelock safety: once disarmed, step 2 is the ONLY path back to
|
|
54
|
+
* armed, and it requires occupancy to actually drop below the lower
|
|
55
|
+
* band. If a compaction fails to shrink context, the cap stays
|
|
56
|
+
* exceeded, occupancy never drops below 0.6×cap, and we stay disarmed
|
|
57
|
+
* — i.e. we degrade to "don't fire" rather than firing every turn.
|
|
58
|
+
*/
|
|
59
|
+
export function decideProactiveCompact(
|
|
60
|
+
state: CompactState,
|
|
61
|
+
occupancy: number,
|
|
62
|
+
cap: number,
|
|
63
|
+
): CompactDecision {
|
|
64
|
+
if (state.cooldownTurns > 0) {
|
|
65
|
+
return {
|
|
66
|
+
fire: false,
|
|
67
|
+
state: { armed: state.armed, cooldownTurns: state.cooldownTurns - 1 },
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (!state.armed) {
|
|
72
|
+
const armed = occupancy < cap * COMPACT_REARM_FRACTION;
|
|
73
|
+
return { fire: false, state: { armed, cooldownTurns: 0 } };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (occupancy < cap) {
|
|
77
|
+
return { fire: false, state };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
fire: true,
|
|
82
|
+
state: { armed: false, cooldownTurns: COMPACT_COOLDOWN_TURNS },
|
|
83
|
+
};
|
|
84
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
decideProactiveCompact,
|
|
4
|
+
initialCompactState,
|
|
5
|
+
COMPACT_COOLDOWN_TURNS,
|
|
6
|
+
COMPACT_REARM_FRACTION,
|
|
7
|
+
type CompactState,
|
|
8
|
+
} from '../gateway/proactive-compact.js'
|
|
9
|
+
|
|
10
|
+
const CAP = 190_000
|
|
11
|
+
|
|
12
|
+
// Drive the state machine over a sequence of occupancy readings,
|
|
13
|
+
// returning the index of every evaluation that fired.
|
|
14
|
+
function run(occ: number[], start: CompactState = initialCompactState()) {
|
|
15
|
+
let state = start
|
|
16
|
+
const fires: number[] = []
|
|
17
|
+
occ.forEach((o, i) => {
|
|
18
|
+
const d = decideProactiveCompact(state, o, CAP)
|
|
19
|
+
state = d.state
|
|
20
|
+
if (d.fire) fires.push(i)
|
|
21
|
+
})
|
|
22
|
+
return { state, fires }
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
describe('decideProactiveCompact', () => {
|
|
26
|
+
it('does not fire below the cap', () => {
|
|
27
|
+
const { fires } = run([0, 50_000, 150_000, CAP - 1])
|
|
28
|
+
expect(fires).toEqual([])
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
it('fires exactly once when occupancy reaches the cap, then disarms', () => {
|
|
32
|
+
// Stays high after the fire (compaction has not landed yet).
|
|
33
|
+
const { fires } = run([CAP, CAP, CAP])
|
|
34
|
+
expect(fires).toEqual([0])
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
it('fires at occupancy strictly above the cap too', () => {
|
|
38
|
+
const { fires } = run([CAP + 25_000])
|
|
39
|
+
expect(fires).toEqual([0])
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
it('burns exactly COMPACT_COOLDOWN_TURNS idle evals after a fire before re-considering', () => {
|
|
43
|
+
// Fire at 0, then occupancy stays pegged high. The cooldown must
|
|
44
|
+
// swallow the next COMPACT_COOLDOWN_TURNS evals with no fire, and
|
|
45
|
+
// because it is still above the re-arm band it never re-arms ->
|
|
46
|
+
// never fires again. (Livelock guard.)
|
|
47
|
+
const seq = new Array(1 + COMPACT_COOLDOWN_TURNS + 5).fill(CAP)
|
|
48
|
+
const { fires } = run(seq)
|
|
49
|
+
expect(fires).toEqual([0])
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
it('re-arms only after occupancy falls below REARM_FRACTION × cap, never on the arming pass', () => {
|
|
53
|
+
const lowBand = CAP * COMPACT_REARM_FRACTION
|
|
54
|
+
// fire(0) -> cooldown(1..3) -> still high(4) stays disarmed ->
|
|
55
|
+
// drop just below band(5): arms but does NOT fire same pass ->
|
|
56
|
+
// climb back to cap(6): fires again.
|
|
57
|
+
const seq = [
|
|
58
|
+
CAP, // 0 fire
|
|
59
|
+
CAP, // 1 cooldown
|
|
60
|
+
CAP, // 2 cooldown
|
|
61
|
+
CAP, // 3 cooldown
|
|
62
|
+
CAP, // 4 disarmed, above band -> hold
|
|
63
|
+
lowBand - 1, // 5 re-arm, must NOT fire here
|
|
64
|
+
CAP, // 6 armed + at cap -> fire
|
|
65
|
+
]
|
|
66
|
+
const { fires } = run(seq)
|
|
67
|
+
expect(fires).toEqual([0, 6])
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
it('does not re-arm if occupancy only drops to the band but not below it', () => {
|
|
71
|
+
const lowBand = CAP * COMPACT_REARM_FRACTION
|
|
72
|
+
// After cooldown, occupancy sits exactly at the band (not strictly
|
|
73
|
+
// below) forever -> never re-arms -> only the first fire.
|
|
74
|
+
const seq = [CAP, CAP, CAP, CAP, lowBand, lowBand, lowBand, CAP, CAP]
|
|
75
|
+
const { fires } = run(seq)
|
|
76
|
+
expect(fires).toEqual([0])
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
it('full healthy cycle: fire, compaction shrinks context, climbs again, fires again', () => {
|
|
80
|
+
const seq = [
|
|
81
|
+
120_000, // below cap
|
|
82
|
+
CAP, // fire (idx 1)
|
|
83
|
+
30_000, // cooldown 1 (post-compact, small)
|
|
84
|
+
35_000, // cooldown 2
|
|
85
|
+
40_000, // cooldown 3
|
|
86
|
+
45_000, // disarmed, below band -> re-arm (no fire)
|
|
87
|
+
90_000, // armed, below cap -> hold
|
|
88
|
+
CAP + 5_000, // fire again (idx 7)
|
|
89
|
+
]
|
|
90
|
+
const { fires } = run(seq)
|
|
91
|
+
expect(fires).toEqual([1, 7])
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
it('never fires twice in immediate succession even with no cooldown left if still disarmed', () => {
|
|
95
|
+
// Construct a state that is past cooldown but disarmed, occupancy
|
|
96
|
+
// pegged at cap: must hold (not fire) until it drops below band.
|
|
97
|
+
const stuck: CompactState = { armed: false, cooldownTurns: 0 }
|
|
98
|
+
const { fires } = run([CAP, CAP, CAP, CAP], stuck)
|
|
99
|
+
expect(fires).toEqual([])
|
|
100
|
+
})
|
|
101
|
+
})
|