switchroom 0.5.0 → 0.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -121
- package/bin/autoaccept.exp +29 -6
- package/dist/agent-scheduler/index.js +12261 -0
- package/dist/cli/autoaccept-poll.js +10 -0
- package/dist/cli/switchroom.js +27250 -25324
- package/dist/vault/approvals/kernel-server.js +12709 -0
- package/dist/vault/broker/server.js +15724 -0
- package/package.json +4 -3
- package/profiles/_base/start.sh.hbs +133 -0
- package/profiles/_shared/telegram-style.md.hbs +3 -3
- package/profiles/default/CLAUDE.md +3 -3
- package/profiles/default/CLAUDE.md.hbs +2 -2
- package/profiles/default/workspace/CLAUDE.md.hbs +9 -0
- package/skills/docx/VENDORED.md +1 -1
- package/skills/mcp-builder/VENDORED.md +1 -1
- package/skills/pdf/VENDORED.md +1 -1
- package/skills/pptx/VENDORED.md +1 -1
- package/skills/skill-creator/VENDORED.md +1 -1
- package/skills/switchroom-architecture/SKILL.md +8 -7
- package/skills/switchroom-cli/SKILL.md +23 -15
- package/skills/switchroom-health/SKILL.md +7 -7
- package/skills/switchroom-install/SKILL.md +36 -39
- package/skills/switchroom-manage/SKILL.md +4 -4
- package/skills/switchroom-status/SKILL.md +1 -1
- package/skills/webapp-testing/VENDORED.md +1 -1
- package/skills/xlsx/VENDORED.md +1 -1
- package/telegram-plugin/admin-commands/dispatch.test.ts +119 -1
- package/telegram-plugin/admin-commands/index.ts +71 -0
- package/telegram-plugin/ask-user.ts +1 -0
- package/telegram-plugin/card-event-log.ts +138 -0
- package/telegram-plugin/dist/bridge/bridge.js +178 -31
- package/telegram-plugin/dist/foreman/foreman.js +6875 -6526
- package/telegram-plugin/dist/gateway/gateway.js +13862 -11834
- package/telegram-plugin/dist/server.js +202 -40
- package/telegram-plugin/fleet-state.ts +25 -10
- package/telegram-plugin/foreman/foreman.ts +38 -3
- package/telegram-plugin/gateway/approval-callback.ts +126 -0
- package/telegram-plugin/gateway/approval-card.test.ts +90 -0
- package/telegram-plugin/gateway/approval-card.ts +127 -0
- package/telegram-plugin/gateway/approvals-commands.ts +126 -0
- package/telegram-plugin/gateway/boot-card.ts +31 -6
- package/telegram-plugin/gateway/boot-probes.ts +503 -72
- package/telegram-plugin/gateway/gateway.ts +822 -94
- package/telegram-plugin/gateway/ipc-protocol.ts +34 -1
- package/telegram-plugin/gateway/ipc-server.ts +35 -0
- package/telegram-plugin/gateway/startup-mutex.ts +110 -2
- package/telegram-plugin/hooks/hooks.json +19 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +216 -0
- package/telegram-plugin/hooks/tool-label-stop.mjs +63 -0
- package/telegram-plugin/package.json +4 -1
- package/telegram-plugin/plugin-logger.ts +20 -1
- package/telegram-plugin/progress-card-driver.ts +202 -13
- package/telegram-plugin/progress-card.ts +2 -2
- package/telegram-plugin/quota-check.ts +1 -0
- package/telegram-plugin/registry/subagents-schema.ts +37 -0
- package/telegram-plugin/registry/subagents.test.ts +64 -0
- package/telegram-plugin/session-tail.ts +58 -5
- package/telegram-plugin/shared/bot-runtime.ts +48 -2
- package/telegram-plugin/subagent-watcher.ts +139 -7
- package/telegram-plugin/tests/_progress-card-harness.ts +4 -0
- package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +201 -0
- package/telegram-plugin/tests/boot-card-probe-target.test.ts +10 -34
- package/telegram-plugin/tests/boot-card-render.test.ts +6 -5
- package/telegram-plugin/tests/boot-probes.test.ts +558 -0
- package/telegram-plugin/tests/card-event-log.test.ts +145 -0
- package/telegram-plugin/tests/gateway-startup-mutex.test.ts +102 -0
- package/telegram-plugin/tests/ipc-server-validate-inject-inbound.test.ts +134 -0
- package/telegram-plugin/tests/progress-card-delay-842.test.ts +160 -0
- package/telegram-plugin/tests/quota-check.test.ts +37 -1
- package/telegram-plugin/tests/subagent-registry-bugs.test.ts +5 -0
- package/telegram-plugin/tests/subagent-watcher-stall-notification.test.ts +104 -1
- package/telegram-plugin/tests/subagent-watcher.test.ts +5 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +114 -0
- package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +5 -3
- package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +10 -0
- package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +58 -14
- package/telegram-plugin/tests/welcome-text.test.ts +57 -0
- package/telegram-plugin/tool-label-sidecar.ts +140 -0
- package/telegram-plugin/tool-labels.ts +55 -0
- package/telegram-plugin/two-zone-card.ts +27 -7
- package/telegram-plugin/uat/SETUP.md +160 -0
- package/telegram-plugin/uat/assertions.ts +140 -0
- package/telegram-plugin/uat/driver.ts +174 -0
- package/telegram-plugin/uat/harness.ts +161 -0
- package/telegram-plugin/uat/login.ts +134 -0
- package/telegram-plugin/uat/port-allocator.ts +71 -0
- package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +61 -0
- package/telegram-plugin/welcome-text.ts +44 -2
- package/bin/bridge-watchdog.sh +0 -967
|
@@ -58,7 +58,8 @@ import { handlePtyPartialPure, type PtyHandlerState } from '../pty-partial-handl
|
|
|
58
58
|
import { handleStreamReply } from '../stream-reply-handler.js'
|
|
59
59
|
import { createChatLock } from '../chat-lock.js'
|
|
60
60
|
import { createRetryApiCall } from '../retry-api-call.js'
|
|
61
|
-
import { installTgPostLogger } from '../shared/bot-runtime.js'
|
|
61
|
+
import { installTgPostLogger, withTgPostTags } from '../shared/bot-runtime.js'
|
|
62
|
+
import { emitCardEvent } from '../card-event-log.js'
|
|
62
63
|
import { buildAttachmentPath, assertInsideInbox } from '../attachment-path.js'
|
|
63
64
|
import { createPinManager } from '../progress-card-pin-manager.js'
|
|
64
65
|
import { createPinWatchdog } from '../progress-card-pin-watchdog.js'
|
|
@@ -152,7 +153,7 @@ import {
|
|
|
152
153
|
resetSessionAckText as buildResetSessionAckText,
|
|
153
154
|
TELEGRAM_BASE_COMMANDS,
|
|
154
155
|
TELEGRAM_SWITCHROOM_COMMANDS,
|
|
155
|
-
type AgentMetadata, type AuthSummary,
|
|
156
|
+
type AgentMetadata, type AuthSummary, type StatusProbeRow,
|
|
156
157
|
} from '../welcome-text.js'
|
|
157
158
|
import {
|
|
158
159
|
isContextExhaustionText,
|
|
@@ -229,6 +230,7 @@ import type {
|
|
|
229
230
|
OperatorEventForward,
|
|
230
231
|
PtyPartialForward,
|
|
231
232
|
InboundMessage,
|
|
233
|
+
InjectInboundMessage,
|
|
232
234
|
} from './ipc-protocol.js'
|
|
233
235
|
import { writePidFile, clearPidFile } from './pid-file.js'
|
|
234
236
|
import { acquireStartupLock, releaseStartupLock } from './startup-mutex.js'
|
|
@@ -257,7 +259,7 @@ import { StagingMap } from '../secret-detect/staging.js'
|
|
|
257
259
|
import { maskToken } from '../secret-detect/mask.js'
|
|
258
260
|
import { defaultVaultWrite, defaultVaultList } from '../secret-detect/vault-write.js'
|
|
259
261
|
import { detectSecrets } from '../secret-detect/index.js'
|
|
260
|
-
import {
|
|
262
|
+
import { classifyAdminGate } from '../admin-commands/index.js'
|
|
261
263
|
import {
|
|
262
264
|
startSubagentWatcher,
|
|
263
265
|
type SubagentWatcherHandle,
|
|
@@ -303,6 +305,11 @@ import {
|
|
|
303
305
|
listGrantsViaBroker,
|
|
304
306
|
revokeGrantViaBroker,
|
|
305
307
|
} from '../../src/vault/broker/client.js'
|
|
308
|
+
import {
|
|
309
|
+
approvalRequest,
|
|
310
|
+
approvalConsume,
|
|
311
|
+
approvalRecord,
|
|
312
|
+
} from '../../src/vault/approvals/client.js'
|
|
306
313
|
import {
|
|
307
314
|
openTurnsDb,
|
|
308
315
|
markOrphanedAsRestarted,
|
|
@@ -325,6 +332,71 @@ const APPROVED_DIR = join(STATE_DIR, 'approved')
|
|
|
325
332
|
const ENV_FILE = join(STATE_DIR, '.env')
|
|
326
333
|
const INBOX_DIR = join(STATE_DIR, 'inbox')
|
|
327
334
|
|
|
335
|
+
/**
|
|
336
|
+
* Trigger a restart of the agent + gateway pair.
|
|
337
|
+
*
|
|
338
|
+
* Branches on `SWITCHROOM_RUNTIME`:
|
|
339
|
+
* - `docker`: send `SIGTERM` to PID 1 (tini) after a brief delay so
|
|
340
|
+
* in-flight IPC responses flush. tini propagates the signal to its
|
|
341
|
+
* children (claude → start.sh → us), the whole tree exits cleanly,
|
|
342
|
+
* the container exits, and docker compose's `restart: unless-stopped`
|
|
343
|
+
* policy recreates it. This covers BOTH the agent process and the
|
|
344
|
+
* gateway plugin (we're a child of claude inside the same container).
|
|
345
|
+
* `targetAgent` is informational only here — we can't restart a
|
|
346
|
+
* different agent's container from inside our own (no docker.sock).
|
|
347
|
+
* - else (legacy systemd): detached `systemctl --user restart` of the
|
|
348
|
+
* two units. The detach is required so the systemctl job survives
|
|
349
|
+
* us being SIGTERM'd by systemd itself.
|
|
350
|
+
*
|
|
351
|
+
* `targetAgent` defaults to `SWITCHROOM_AGENT_NAME`; pass a different
|
|
352
|
+
* value only for the inline restart-button callback handler. Under
|
|
353
|
+
* docker, a `targetAgent !== SWITCHROOM_AGENT_NAME` request returns
|
|
354
|
+
* false (and logs) so the caller can surface a "not supported" message.
|
|
355
|
+
*/
|
|
356
|
+
function triggerSelfRestart(
|
|
357
|
+
targetAgent: string,
|
|
358
|
+
reason: string,
|
|
359
|
+
delayMs = 300,
|
|
360
|
+
): boolean {
|
|
361
|
+
const isDocker = process.env.SWITCHROOM_RUNTIME === 'docker'
|
|
362
|
+
const selfAgent = process.env.SWITCHROOM_AGENT_NAME
|
|
363
|
+
if (isDocker) {
|
|
364
|
+
if (selfAgent && targetAgent !== selfAgent) {
|
|
365
|
+
process.stderr.write(
|
|
366
|
+
`telegram gateway: cross-agent restart not supported under docker (target=${targetAgent}, self=${selfAgent}, reason=${reason})\n`,
|
|
367
|
+
)
|
|
368
|
+
return false
|
|
369
|
+
}
|
|
370
|
+
process.stderr.write(
|
|
371
|
+
`telegram gateway: restart-via-SIGTERM-PID1 agent=${targetAgent} reason=${reason} (docker)\n`,
|
|
372
|
+
)
|
|
373
|
+
setTimeout(() => {
|
|
374
|
+
try { process.kill(1, 'SIGTERM') } catch (err) {
|
|
375
|
+
process.stderr.write(`telegram gateway: SIGTERM PID 1 failed: ${err}\n`)
|
|
376
|
+
}
|
|
377
|
+
}, delayMs).unref()
|
|
378
|
+
return true
|
|
379
|
+
}
|
|
380
|
+
// Legacy systemd path.
|
|
381
|
+
process.stderr.write(
|
|
382
|
+
`telegram gateway: restart-via-systemctl agent=${targetAgent} reason=${reason}\n`,
|
|
383
|
+
)
|
|
384
|
+
try {
|
|
385
|
+
spawn(
|
|
386
|
+
'sh',
|
|
387
|
+
[
|
|
388
|
+
'-c',
|
|
389
|
+
`sleep ${(delayMs / 1000).toFixed(2)} && systemctl --user restart switchroom-${targetAgent}.service switchroom-${targetAgent}-gateway.service`,
|
|
390
|
+
],
|
|
391
|
+
{ detached: true, stdio: 'ignore' },
|
|
392
|
+
).unref()
|
|
393
|
+
return true
|
|
394
|
+
} catch (err) {
|
|
395
|
+
process.stderr.write(`telegram gateway: restart spawn failed for ${targetAgent}: ${err}\n`)
|
|
396
|
+
return false
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
328
400
|
/**
|
|
329
401
|
* Format the version string shown in the boot-card ack line. Two shapes
|
|
330
402
|
* matching the deleted greeting card's behavior:
|
|
@@ -380,12 +452,53 @@ try {
|
|
|
380
452
|
}
|
|
381
453
|
}
|
|
382
454
|
|
|
383
|
-
|
|
384
|
-
|
|
455
|
+
// Issue #758: if TELEGRAM_BOT_TOKEN is not set in env (e.g. agent's .env was
|
|
456
|
+
// never written because bot_token in switchroom.yaml is a `vault:` reference),
|
|
457
|
+
// materialize it from the vault at startup. Resolved value is held in
|
|
458
|
+
// process.env only — never written back to disk.
|
|
459
|
+
//
|
|
460
|
+
// The outer try/catch is narrowed (post-#761 review) to ONLY catch the case
|
|
461
|
+
// where the helper module itself fails to load (ERR_MODULE_NOT_FOUND from the
|
|
462
|
+
// dynamic import). Anything else — including throws from inside
|
|
463
|
+
// materializeBotToken that aren't BotTokenMaterializeError — must propagate
|
|
464
|
+
// with its original message so we don't mask real bugs behind the legacy
|
|
465
|
+
// "set in .env" hint.
|
|
466
|
+
type MaterializeMod = typeof import('../../src/telegram/materialize-bot-token.js')
|
|
467
|
+
let materializeMod: MaterializeMod | null = null
|
|
468
|
+
try {
|
|
469
|
+
materializeMod = await import('../../src/telegram/materialize-bot-token.js')
|
|
470
|
+
} catch (err) {
|
|
471
|
+
const code = (err as NodeJS.ErrnoException | undefined)?.code
|
|
472
|
+
if (code === 'ERR_MODULE_NOT_FOUND' || code === 'MODULE_NOT_FOUND') {
|
|
473
|
+
// Module genuinely missing — fall through with materializeMod=null and
|
|
474
|
+
// handle below.
|
|
475
|
+
} else {
|
|
476
|
+
// Programming error, side-effect failure during module init, etc.
|
|
477
|
+
// Propagate the real message rather than masking it.
|
|
478
|
+
throw err
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
let TOKEN: string
|
|
483
|
+
if (materializeMod !== null) {
|
|
484
|
+
const { materializeBotToken, BotTokenMaterializeError } = materializeMod
|
|
485
|
+
try {
|
|
486
|
+
TOKEN = await materializeBotToken({ agentName: process.env.SWITCHROOM_AGENT_NAME })
|
|
487
|
+
} catch (err) {
|
|
488
|
+
if (err instanceof BotTokenMaterializeError) {
|
|
489
|
+
process.stderr.write(`telegram gateway: ${err.message}\n`)
|
|
490
|
+
process.exit(1)
|
|
491
|
+
}
|
|
492
|
+
throw err
|
|
493
|
+
}
|
|
494
|
+
} else if (process.env.TELEGRAM_BOT_TOKEN) {
|
|
495
|
+
TOKEN = process.env.TELEGRAM_BOT_TOKEN
|
|
496
|
+
} else {
|
|
385
497
|
process.stderr.write(
|
|
386
498
|
`telegram gateway: TELEGRAM_BOT_TOKEN required\n` +
|
|
387
499
|
` set in ${ENV_FILE}\n` +
|
|
388
|
-
` format: TELEGRAM_BOT_TOKEN=123456789:AAH...\n
|
|
500
|
+
` format: TELEGRAM_BOT_TOKEN=123456789:AAH...\n` +
|
|
501
|
+
` (token-materialization helper not found)\n`,
|
|
389
502
|
)
|
|
390
503
|
process.exit(1)
|
|
391
504
|
}
|
|
@@ -954,20 +1067,7 @@ function purgeReactionTracking(key: string): void {
|
|
|
954
1067
|
// scheduled, so nobody is waiting on this.
|
|
955
1068
|
if (activeTurnStartedAt.size === 0 && pendingRestarts.size > 0) {
|
|
956
1069
|
for (const [agentName, _timestamp] of pendingRestarts.entries()) {
|
|
957
|
-
|
|
958
|
-
try {
|
|
959
|
-
spawn(
|
|
960
|
-
'sh',
|
|
961
|
-
[
|
|
962
|
-
'-c',
|
|
963
|
-
// Sleep briefly so our stderr flush lands before systemd kills us.
|
|
964
|
-
`sleep 0.3 && systemctl --user restart switchroom-${agentName}.service switchroom-${agentName}-gateway.service`,
|
|
965
|
-
],
|
|
966
|
-
{ detached: true, stdio: 'ignore' },
|
|
967
|
-
).unref();
|
|
968
|
-
} catch (err) {
|
|
969
|
-
process.stderr.write(`telegram gateway: restart spawn failed for ${agentName}: ${err}\n`);
|
|
970
|
-
}
|
|
1070
|
+
triggerSelfRestart(agentName, 'turn-complete-pending-restart');
|
|
971
1071
|
pendingRestarts.delete(agentName);
|
|
972
1072
|
}
|
|
973
1073
|
}
|
|
@@ -1275,6 +1375,17 @@ type PendingVaultOp =
|
|
|
1275
1375
|
expiresLabel?: string // human-readable label for confirmation
|
|
1276
1376
|
description?: string
|
|
1277
1377
|
awaitingCustomDuration?: boolean // true while waiting for text reply
|
|
1378
|
+
/**
|
|
1379
|
+
* Approval-kernel request_id minted at the wizard confirm step
|
|
1380
|
+
* (MIGRATION.md §2, Phase 1 dual-dispatch — audit-only, advisory).
|
|
1381
|
+
* When set, `vg:generate` ALSO consumes + records an `allow_once`
|
|
1382
|
+
* decision on the kernel; `vg:cancel` records a `deny`. Cards in
|
|
1383
|
+
* flight from before this PR landed have it `undefined` and the
|
|
1384
|
+
* legacy `mintGrantViaBroker` runs alone — no kernel write. After
|
|
1385
|
+
* 1-2 releases the legacy-only branch can be removed (#833 Phase 2
|
|
1386
|
+
* is the enforcing flip).
|
|
1387
|
+
*/
|
|
1388
|
+
kernel_request_id?: string
|
|
1278
1389
|
startedAt: number
|
|
1279
1390
|
}
|
|
1280
1391
|
// Issue #228: waiting for confirmation before revoking a grant.
|
|
@@ -1300,12 +1411,165 @@ interface DeferredSecret {
|
|
|
1300
1411
|
* slug if detection didn't fire.
|
|
1301
1412
|
*/
|
|
1302
1413
|
suggested_slug: string
|
|
1414
|
+
/**
|
|
1415
|
+
* Approval-kernel request_id minted alongside the bespoke deferred-secret
|
|
1416
|
+
* card (MIGRATION.md §1, Phase 1 dual-dispatch). When set, the
|
|
1417
|
+
* `vd:unlock` / `vd:cancel` callback handler ALSO records the user's
|
|
1418
|
+
* decision on the kernel side via `approvalConsume` + `approvalRecord`,
|
|
1419
|
+
* so the audit log captures the unlock event.
|
|
1420
|
+
*
|
|
1421
|
+
* `undefined` on cards built before this PR landed (in-flight at deploy
|
|
1422
|
+
* time) — the legacy handler runs alone, no kernel record. After ~1-2
|
|
1423
|
+
* releases the legacy-only branch can be removed (separate cleanup PR).
|
|
1424
|
+
*/
|
|
1425
|
+
kernel_request_id?: string
|
|
1303
1426
|
}
|
|
1304
1427
|
const deferredSecrets = new Map<string, DeferredSecret>()
|
|
1428
|
+
|
|
1429
|
+
/**
|
|
1430
|
+
* Mint an approval-kernel decision row for a deferred-secret card
|
|
1431
|
+
* (MIGRATION.md §1). Best-effort: if the kernel/broker is unreachable, we
|
|
1432
|
+
* return null and the caller proceeds with the legacy-only path so the
|
|
1433
|
+
* core unlock UX never depends on kernel availability.
|
|
1434
|
+
*
|
|
1435
|
+
* `agent_unit` is the gateway's agent — the per-agent ACL ships in Docker
|
|
1436
|
+
* Phase 2b. The kernel-server checks the listener's bound socket against
|
|
1437
|
+
* the claimed agent, so passing the local agent name is safe.
|
|
1438
|
+
*/
|
|
1439
|
+
async function mintDeferredSecretKernelRequest(
|
|
1440
|
+
slug: string,
|
|
1441
|
+
approverSet: string[],
|
|
1442
|
+
): Promise<string | null> {
|
|
1443
|
+
const agentName = process.env.SWITCHROOM_AGENT_NAME
|
|
1444
|
+
if (!agentName) return null
|
|
1445
|
+
try {
|
|
1446
|
+
const r = await approvalRequest({
|
|
1447
|
+
agent_unit: `switchroom-${agentName}.service`,
|
|
1448
|
+
scope: `secret:${slug}`,
|
|
1449
|
+
action: 'unlock',
|
|
1450
|
+
approver_set: approverSet,
|
|
1451
|
+
why: 'Unlock vault to save a deferred secret detected in chat.',
|
|
1452
|
+
})
|
|
1453
|
+
if (r === null || r.state !== 'pending') return null
|
|
1454
|
+
return r.request_id
|
|
1455
|
+
} catch (err) {
|
|
1456
|
+
process.stderr.write(
|
|
1457
|
+
`[approval-kernel] mintDeferredSecretKernelRequest failed: ${(err as Error).message}\n`,
|
|
1458
|
+
)
|
|
1459
|
+
return null
|
|
1460
|
+
}
|
|
1461
|
+
}
|
|
1462
|
+
|
|
1463
|
+
/**
|
|
1464
|
+
* Record the user's decision (allow/deny) on the approval kernel for a
|
|
1465
|
+
* deferred-secret card. Best-effort and idempotent — a missing
|
|
1466
|
+
* `request_id` (legacy in-flight card) or an unreachable kernel both
|
|
1467
|
+
* silently no-op so the legacy UX is unaffected.
|
|
1468
|
+
*/
|
|
1469
|
+
async function recordDeferredSecretKernelDecision(
|
|
1470
|
+
request_id: string | undefined,
|
|
1471
|
+
decision: 'allow_once' | 'deny',
|
|
1472
|
+
granted_by_user_id: number,
|
|
1473
|
+
approverSet: string[],
|
|
1474
|
+
): Promise<void> {
|
|
1475
|
+
if (!request_id) return
|
|
1476
|
+
try {
|
|
1477
|
+
const consumed = await approvalConsume(request_id)
|
|
1478
|
+
if (consumed === null || !consumed.consumed) return
|
|
1479
|
+
await approvalRecord({
|
|
1480
|
+
request_id,
|
|
1481
|
+
decision,
|
|
1482
|
+
approver_set: approverSet,
|
|
1483
|
+
granted_by_user_id,
|
|
1484
|
+
ttl_ms: null,
|
|
1485
|
+
})
|
|
1486
|
+
} catch (err) {
|
|
1487
|
+
process.stderr.write(
|
|
1488
|
+
`[approval-kernel] recordDeferredSecretKernelDecision failed: ${(err as Error).message}\n`,
|
|
1489
|
+
)
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1305
1492
|
function deferredKey(chat_id: string, message_id: number): string {
|
|
1306
1493
|
return `${chat_id}:${message_id}`
|
|
1307
1494
|
}
|
|
1308
1495
|
|
|
1496
|
+
/**
|
|
1497
|
+
* Mint an approval-kernel decision row for a `/vault grant` wizard
|
|
1498
|
+
* confirm step (MIGRATION.md §2, Phase 1 audit-only dual-dispatch).
|
|
1499
|
+
*
|
|
1500
|
+
* Best-effort: kernel/broker unreachable → returns null and the wizard
|
|
1501
|
+
* proceeds on the legacy `mint_grant` path alone, so the user-facing
|
|
1502
|
+
* grant UX never depends on kernel availability. This is *advisory*
|
|
1503
|
+
* in Phase 1 — the kernel verdict is informational alongside the
|
|
1504
|
+
* legacy `vault_grants` row, not enforcing. Phase 2 (issue #833) flips
|
|
1505
|
+
* enforcement.
|
|
1506
|
+
*
|
|
1507
|
+
* Scope shape `vault:grant:<agent_slug>` mirrors the `vault:secret:<slug>`
|
|
1508
|
+
* namespacing established in #832 / PR #830 — one decision per (agent,
|
|
1509
|
+
* grant-mint) tuple. Action `mint`. Approver-set is the gateway's
|
|
1510
|
+
* allowFrom (same set that gates the wizard callback in the first place).
|
|
1511
|
+
*/
|
|
1512
|
+
async function mintGrantWizardKernelRequest(
|
|
1513
|
+
agentSlug: string,
|
|
1514
|
+
approverSet: string[],
|
|
1515
|
+
selectedKeys: string[],
|
|
1516
|
+
ttlSeconds: number | null,
|
|
1517
|
+
): Promise<string | null> {
|
|
1518
|
+
const agentName = process.env.SWITCHROOM_AGENT_NAME
|
|
1519
|
+
if (!agentName) return null
|
|
1520
|
+
try {
|
|
1521
|
+
const why =
|
|
1522
|
+
`Mint capability token for agent "${agentSlug}" — ` +
|
|
1523
|
+
`${selectedKeys.length} key(s), ` +
|
|
1524
|
+
`${ttlSeconds === null ? 'no expiry' : `${ttlSeconds}s TTL`}.`
|
|
1525
|
+
const r = await approvalRequest({
|
|
1526
|
+
agent_unit: `switchroom-${agentName}.service`,
|
|
1527
|
+
scope: `vault:grant:${agentSlug}`,
|
|
1528
|
+
action: 'mint',
|
|
1529
|
+
approver_set: approverSet,
|
|
1530
|
+
why,
|
|
1531
|
+
})
|
|
1532
|
+
if (r === null || r.state !== 'pending') return null
|
|
1533
|
+
return r.request_id
|
|
1534
|
+
} catch (err) {
|
|
1535
|
+
process.stderr.write(
|
|
1536
|
+
`[approval-kernel] mintGrantWizardKernelRequest failed: ${(err as Error).message}\n`,
|
|
1537
|
+
)
|
|
1538
|
+
return null
|
|
1539
|
+
}
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
/**
|
|
1543
|
+
* Record the user's wizard decision (allow/deny) on the approval kernel
|
|
1544
|
+
* for a `/vault grant` wizard card. Best-effort and idempotent — a
|
|
1545
|
+
* missing `request_id` (legacy in-flight wizard) or an unreachable
|
|
1546
|
+
* broker silently no-op so the legacy UX is unaffected. Audit-only in
|
|
1547
|
+
* Phase 1: nothing downstream reads this verdict yet.
|
|
1548
|
+
*/
|
|
1549
|
+
async function recordGrantWizardKernelDecision(
|
|
1550
|
+
request_id: string | undefined,
|
|
1551
|
+
decision: 'allow_once' | 'deny',
|
|
1552
|
+
granted_by_user_id: number,
|
|
1553
|
+
approverSet: string[],
|
|
1554
|
+
): Promise<void> {
|
|
1555
|
+
if (!request_id) return
|
|
1556
|
+
try {
|
|
1557
|
+
const consumed = await approvalConsume(request_id)
|
|
1558
|
+
if (consumed === null || !consumed.consumed) return
|
|
1559
|
+
await approvalRecord({
|
|
1560
|
+
request_id,
|
|
1561
|
+
decision,
|
|
1562
|
+
approver_set: approverSet,
|
|
1563
|
+
granted_by_user_id,
|
|
1564
|
+
ttl_ms: null,
|
|
1565
|
+
})
|
|
1566
|
+
} catch (err) {
|
|
1567
|
+
process.stderr.write(
|
|
1568
|
+
`[approval-kernel] recordGrantWizardKernelDecision failed: ${(err as Error).message}\n`,
|
|
1569
|
+
)
|
|
1570
|
+
}
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1309
1573
|
// Channel B context rule — tracks when the gateway has emitted the
|
|
1310
1574
|
// "Paste the browser code here" prompt so that the next inbound message
|
|
1311
1575
|
// in the same chat is treated as auth-flow-sensitive regardless of whether
|
|
@@ -1440,20 +1704,7 @@ const pendingStateReaper = setInterval(() => {
|
|
|
1440
1704
|
`telegram gateway: [restart-drain] forcing agent=${agentName} waited=${waitedSec}s threshold=${Math.round(PENDING_RESTART_DRAIN_CAP_MS / 1000)}s\n`,
|
|
1441
1705
|
)
|
|
1442
1706
|
pendingRestarts.delete(agentName)
|
|
1443
|
-
|
|
1444
|
-
spawn(
|
|
1445
|
-
'sh',
|
|
1446
|
-
[
|
|
1447
|
-
'-c',
|
|
1448
|
-
// The systemctl restart will SIGTERM then SIGKILL after TimeoutStopSec.
|
|
1449
|
-
// The currently-running claude process will get SIGKILL via the unit stop.
|
|
1450
|
-
`sleep 0.1 && systemctl --user restart switchroom-${agentName}.service switchroom-${agentName}-gateway.service`,
|
|
1451
|
-
],
|
|
1452
|
-
{ detached: true, stdio: 'ignore' },
|
|
1453
|
-
).unref()
|
|
1454
|
-
} catch (err) {
|
|
1455
|
-
process.stderr.write(`telegram gateway: [restart-drain] forced restart spawn failed agent=${agentName}: ${err}\n`)
|
|
1456
|
-
}
|
|
1707
|
+
triggerSelfRestart(agentName, 'restart-drain-cap-forced', 100)
|
|
1457
1708
|
}
|
|
1458
1709
|
}
|
|
1459
1710
|
}, 60_000)
|
|
@@ -1887,6 +2138,7 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
1887
2138
|
restartAgeMs: markerAgeMs,
|
|
1888
2139
|
loadAccounts: () => loadAccountsForBootCard(agentSlug),
|
|
1889
2140
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
2141
|
+
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
1890
2142
|
}, ackMsgId).then(handle => {
|
|
1891
2143
|
activeBootCard = handle
|
|
1892
2144
|
}).catch((err: Error) => {
|
|
@@ -2007,27 +2259,19 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
2007
2259
|
const turnInFlight = activeTurnStartedAt.size > 0;
|
|
2008
2260
|
|
|
2009
2261
|
if (!turnInFlight) {
|
|
2010
|
-
// No active turn, restart immediately. Cycle both the agent
|
|
2011
|
-
//
|
|
2012
|
-
// propagate. Send the client response FIRST, then
|
|
2013
|
-
//
|
|
2014
|
-
//
|
|
2015
|
-
//
|
|
2262
|
+
// No active turn, restart immediately. Cycle both the agent and
|
|
2263
|
+
// gateway side-by-side so telegram-plugin code changes always
|
|
2264
|
+
// propagate. Send the client response FIRST, then trigger the
|
|
2265
|
+
// restart after a brief delay so the IPC response has flushed
|
|
2266
|
+
// before we get killed. (Under docker the helper SIGTERM's PID 1;
|
|
2267
|
+
// under systemd it spawns a detached `systemctl restart`.)
|
|
2016
2268
|
try {
|
|
2017
2269
|
client.send({
|
|
2018
2270
|
type: 'schedule_restart_result',
|
|
2019
2271
|
success: true,
|
|
2020
2272
|
restartedImmediately: true,
|
|
2021
2273
|
});
|
|
2022
|
-
|
|
2023
|
-
'sh',
|
|
2024
|
-
[
|
|
2025
|
-
'-c',
|
|
2026
|
-
`sleep 0.3 && systemctl --user restart switchroom-${agentName}.service switchroom-${agentName}-gateway.service`,
|
|
2027
|
-
],
|
|
2028
|
-
{ detached: true, stdio: 'ignore' },
|
|
2029
|
-
).unref();
|
|
2030
|
-
process.stderr.write(`telegram gateway: scheduled immediate restart of ${agentName} (agent + gateway)\n`);
|
|
2274
|
+
triggerSelfRestart(agentName, 'schedule-restart-immediate');
|
|
2031
2275
|
} catch (err) {
|
|
2032
2276
|
client.send({
|
|
2033
2277
|
type: 'schedule_restart_result',
|
|
@@ -2090,6 +2334,31 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
2090
2334
|
handlePtyPartial(msg.text)
|
|
2091
2335
|
},
|
|
2092
2336
|
|
|
2337
|
+
/**
|
|
2338
|
+
* Phase 2 cron-fold-in: forward a synthesized inbound from the
|
|
2339
|
+
* in-agent scheduler sibling to the registered bridge for the
|
|
2340
|
+
* named agent. The wrapped `inbound` envelope is shipped verbatim
|
|
2341
|
+
* — the in-agent scheduler is the synthesis authority (it runs
|
|
2342
|
+
* `dispatchAsInbound` from `src/scheduler/dispatch.ts` to build
|
|
2343
|
+
* the message). The gateway only validates wire shape (handled
|
|
2344
|
+
* in ipc-server.ts:validateClientMessage) and routes.
|
|
2345
|
+
*
|
|
2346
|
+
* Logs every fire so an operator can correlate the agent's
|
|
2347
|
+
* transcript turn against the scheduler's audit row by `prompt_key`.
|
|
2348
|
+
*/
|
|
2349
|
+
onInjectInbound(_client: IpcClient, msg: InjectInboundMessage) {
|
|
2350
|
+
const promptKey = typeof msg.inbound.meta?.prompt_key === 'string'
|
|
2351
|
+
? msg.inbound.meta.prompt_key
|
|
2352
|
+
: 'unknown'
|
|
2353
|
+
const source = typeof msg.inbound.meta?.source === 'string'
|
|
2354
|
+
? msg.inbound.meta.source
|
|
2355
|
+
: 'unknown'
|
|
2356
|
+
const delivered = ipcServer.sendToAgent(msg.agentName, msg.inbound)
|
|
2357
|
+
process.stderr.write(
|
|
2358
|
+
`telegram gateway: inject_inbound agent=${msg.agentName} source=${source} prompt_key=${promptKey} delivered=${delivered}\n`,
|
|
2359
|
+
)
|
|
2360
|
+
},
|
|
2361
|
+
|
|
2093
2362
|
log: (msg) => process.stderr.write(`telegram gateway: ipc — ${msg}\n`),
|
|
2094
2363
|
})
|
|
2095
2364
|
|
|
@@ -4702,12 +4971,18 @@ async function handleInbound(
|
|
|
4702
4971
|
// the post-context flow stays seamless.
|
|
4703
4972
|
const dKey = deferredKey(chat_id, msgId ?? 0)
|
|
4704
4973
|
const cachedBranchDetection = detectSecrets(effectiveText).find((d) => d.confidence === 'high' && !d.suppressed)
|
|
4974
|
+
const cachedBranchSlug = cachedBranchDetection?.suggested_slug ?? (isAuthFlowContext ? 'anthropic_oauth_code' : 'secret')
|
|
4975
|
+
const cachedBranchKernelId = await mintDeferredSecretKernelRequest(
|
|
4976
|
+
cachedBranchSlug,
|
|
4977
|
+
loadAccess().allowFrom,
|
|
4978
|
+
)
|
|
4705
4979
|
deferredSecrets.set(dKey, {
|
|
4706
4980
|
chat_id,
|
|
4707
4981
|
original_message_id: msgId ?? 0,
|
|
4708
4982
|
text: effectiveText,
|
|
4709
4983
|
staged_at: Date.now(),
|
|
4710
|
-
suggested_slug:
|
|
4984
|
+
suggested_slug: cachedBranchSlug,
|
|
4985
|
+
kernel_request_id: cachedBranchKernelId ?? undefined,
|
|
4711
4986
|
})
|
|
4712
4987
|
await switchroomReply(
|
|
4713
4988
|
ctx,
|
|
@@ -4748,12 +5023,17 @@ async function handleInbound(
|
|
|
4748
5023
|
highConfDetection?.suggested_slug
|
|
4749
5024
|
?? (isAuthFlowContext ? 'anthropic_oauth_code' : 'secret')
|
|
4750
5025
|
const dKey = deferredKey(chat_id, msgId ?? 0)
|
|
5026
|
+
const noPassKernelId = await mintDeferredSecretKernelRequest(
|
|
5027
|
+
suggestedSlug,
|
|
5028
|
+
loadAccess().allowFrom,
|
|
5029
|
+
)
|
|
4751
5030
|
deferredSecrets.set(dKey, {
|
|
4752
5031
|
chat_id,
|
|
4753
5032
|
original_message_id: msgId ?? 0,
|
|
4754
5033
|
text: effectiveText,
|
|
4755
5034
|
staged_at: Date.now(),
|
|
4756
5035
|
suggested_slug: suggestedSlug,
|
|
5036
|
+
kernel_request_id: noPassKernelId ?? undefined,
|
|
4757
5037
|
})
|
|
4758
5038
|
if (msgId != null) {
|
|
4759
5039
|
try { await bot.api.deleteMessage(chat_id, msgId) } catch {}
|
|
@@ -5336,6 +5616,47 @@ function resolveSystemdRunPath(): string | null {
|
|
|
5336
5616
|
return _systemdRunPath
|
|
5337
5617
|
}
|
|
5338
5618
|
|
|
5619
|
+
/**
|
|
5620
|
+
* Detect whether `docker` is callable from this process — required by
|
|
5621
|
+
* `switchroom update`'s pull-images and recreate-containers steps.
|
|
5622
|
+
*
|
|
5623
|
+
* The gateway runs INSIDE the agent container (cron-fold-in / Phase 4
|
|
5624
|
+
* docker model), which by design has no docker binary AND no socket
|
|
5625
|
+
* mount. We probe both: binary on PATH (via `docker --version`) and
|
|
5626
|
+
* socket on disk (via existsSync). True only if BOTH are present —
|
|
5627
|
+
* mirroring the actual requirements `switchroom update` will hit when
|
|
5628
|
+
* it shells out.
|
|
5629
|
+
*
|
|
5630
|
+
* Cached: docker availability doesn't change at runtime within a
|
|
5631
|
+
* single container generation.
|
|
5632
|
+
*/
|
|
5633
|
+
let _dockerReachable: boolean | undefined
|
|
5634
|
+
function isDockerReachable(): boolean {
|
|
5635
|
+
if (_dockerReachable !== undefined) return _dockerReachable
|
|
5636
|
+
// Cheap socket probe first — if the mount is absent, no need to
|
|
5637
|
+
// pay the execSync cost. Common-case fast-path on docker installs.
|
|
5638
|
+
if (!existsSync('/var/run/docker.sock')) {
|
|
5639
|
+
_dockerReachable = false
|
|
5640
|
+
return _dockerReachable
|
|
5641
|
+
}
|
|
5642
|
+
try {
|
|
5643
|
+
// -version is fast and doesn't require an actual daemon roundtrip
|
|
5644
|
+
// for binary-present probing. Bounded timeout in case the binary
|
|
5645
|
+
// exists but blocks (unlikely but defensive).
|
|
5646
|
+
execSync('docker --version', { stdio: 'ignore', timeout: 2000 })
|
|
5647
|
+
_dockerReachable = true
|
|
5648
|
+
} catch {
|
|
5649
|
+
_dockerReachable = false
|
|
5650
|
+
}
|
|
5651
|
+
return _dockerReachable
|
|
5652
|
+
}
|
|
5653
|
+
|
|
5654
|
+
// @internal exported for tests — resets the docker-reachable cache so
|
|
5655
|
+
// a test can swap underlying state and observe the new probe result.
|
|
5656
|
+
export function _resetDockerReachableCache(): void {
|
|
5657
|
+
_dockerReachable = undefined
|
|
5658
|
+
}
|
|
5659
|
+
|
|
5339
5660
|
function spawnSwitchroomDetached(
|
|
5340
5661
|
args: string[],
|
|
5341
5662
|
onFailure?: (info: { code: number; tail: string }) => void,
|
|
@@ -5748,28 +6069,35 @@ async function runSwitchroomCommandFormatted(ctx: Context, args: string[], label
|
|
|
5748
6069
|
}
|
|
5749
6070
|
|
|
5750
6071
|
// ─── Admin-command gating middleware ─────────────────────────────────────
|
|
5751
|
-
// When AGENT_ADMIN=false (default), admin slash commands
|
|
5752
|
-
// /
|
|
5753
|
-
//
|
|
5754
|
-
//
|
|
5755
|
-
// due to isAuthorizedSender) and never reach handleInboundCoalesced.
|
|
6072
|
+
// When AGENT_ADMIN=false (default), admin slash commands (/agents, /logs,
|
|
6073
|
+
// /grant, etc.) must NOT execute locally — this agent isn't admin-flagged
|
|
6074
|
+
// and routing them through Claude burns tokens for no benefit. Reply with a
|
|
6075
|
+
// concise "admin required" warning instead.
|
|
5756
6076
|
//
|
|
5757
|
-
//
|
|
5758
|
-
//
|
|
5759
|
-
//
|
|
6077
|
+
// Special case: `/restart` with no arg, or `/restart <my-agent-name>`, is
|
|
6078
|
+
// allowed to fall through to the local bot.command('restart', …) handler so
|
|
6079
|
+
// every agent can self-restart without admin privilege. `/restart <other>`
|
|
6080
|
+
// is blocked just like any other admin verb.
|
|
5760
6081
|
//
|
|
5761
6082
|
// Invariant: when AGENT_ADMIN=true, this middleware is a no-op — bot.command()
|
|
5762
|
-
// handlers run normally and Claude never sees
|
|
6083
|
+
// handlers run normally for all admin verbs and Claude never sees them.
|
|
5763
6084
|
bot.use(async (ctx, next) => {
|
|
5764
6085
|
if (!AGENT_ADMIN && ctx.message?.text) {
|
|
5765
|
-
const
|
|
5766
|
-
|
|
5767
|
-
|
|
5768
|
-
//
|
|
6086
|
+
const myName = getMyAgentName()
|
|
6087
|
+
const decision = classifyAdminGate(ctx.message.text, myName)
|
|
6088
|
+
if (decision.action === 'block') {
|
|
6089
|
+
// Block admin commands the LLM should never see. Reply with a concise
|
|
6090
|
+
// "admin required" warning instead of forwarding to Claude.
|
|
5769
6091
|
process.stderr.write(
|
|
5770
|
-
`telegram gateway: admin-gate
|
|
6092
|
+
`telegram gateway: admin-gate blocked cmd=/${decision.cmd} agent=${process.env.SWITCHROOM_AGENT_NAME ?? '-'} reason=${decision.reason} (AGENT_ADMIN=false)\n`,
|
|
5771
6093
|
)
|
|
5772
|
-
|
|
6094
|
+
const cmdHtml = escapeHtmlForTg(`/${decision.cmd}`)
|
|
6095
|
+
const nameHtml = escapeHtmlForTg(myName)
|
|
6096
|
+
const text =
|
|
6097
|
+
decision.reason === 'other-agent'
|
|
6098
|
+
? `⚠️ <code>${cmdHtml}</code> targeting another agent is an admin operation — this agent (<code>${nameHtml}</code>) isn't admin-flagged. Run it from an admin agent, or set <code>admin: true</code> for this agent in switchroom.yaml. (Self-restart is allowed: send <code>/restart</code> with no arg.)`
|
|
6099
|
+
: `⚠️ <code>${cmdHtml}</code> is an admin command — this agent (<code>${nameHtml}</code>) isn't admin-flagged. Run it from an admin agent, or set <code>admin: true</code> for this agent in switchroom.yaml.`
|
|
6100
|
+
await switchroomReply(ctx, text, { html: true })
|
|
5773
6101
|
return
|
|
5774
6102
|
}
|
|
5775
6103
|
}
|
|
@@ -5848,7 +6176,7 @@ function buildAgentAudit(agentName: string): AgentAudit | undefined {
|
|
|
5848
6176
|
// to `switchroom agent list --json` and `switchroom auth status --json`.
|
|
5849
6177
|
// Best-effort — any missing piece renders as a placeholder in the text
|
|
5850
6178
|
// templates rather than blocking the reply.
|
|
5851
|
-
function buildAgentMetadata(agentName: string): AgentMetadata {
|
|
6179
|
+
async function buildAgentMetadata(agentName: string): Promise<AgentMetadata> {
|
|
5852
6180
|
type AgentListResp = {
|
|
5853
6181
|
agents: Array<{
|
|
5854
6182
|
name: string; status: string; uptime: string;
|
|
@@ -5885,9 +6213,66 @@ function buildAgentMetadata(agentName: string): AgentMetadata {
|
|
|
5885
6213
|
status: a?.status ?? null,
|
|
5886
6214
|
auth: authSummary,
|
|
5887
6215
|
audit: buildAgentAudit(agentName),
|
|
6216
|
+
live: await buildLiveProbeRows(agentName),
|
|
5888
6217
|
}
|
|
5889
6218
|
}
|
|
5890
6219
|
|
|
6220
|
+
/**
|
|
6221
|
+
* Run the boot-card probe set on demand for `/status`. Same probes,
|
|
6222
|
+
* different rendering contract: `/status` shows every row (silent-when-
|
|
6223
|
+
* healthy is for the boot card; the user explicitly asked for current
|
|
6224
|
+
* state here). Failures are swallowed per-probe via runAllProbes's
|
|
6225
|
+
* Promise.allSettled, and we filter out anything we couldn't render so
|
|
6226
|
+
* the reply doesn't break on a broken probe.
|
|
6227
|
+
*/
|
|
6228
|
+
async function buildLiveProbeRows(agentName: string): Promise<StatusProbeRow[]> {
|
|
6229
|
+
try {
|
|
6230
|
+
const { runAllProbes } = await import('./boot-card.js')
|
|
6231
|
+
const agentDir = resolveAgentDirFromEnv()
|
|
6232
|
+
?? (process.env.TELEGRAM_STATE_DIR
|
|
6233
|
+
? require('path').dirname(process.env.TELEGRAM_STATE_DIR)
|
|
6234
|
+
: '/tmp')
|
|
6235
|
+
const probes = await runAllProbes({
|
|
6236
|
+
agentName,
|
|
6237
|
+
agentSlug: agentName,
|
|
6238
|
+
version: formatBootVersion(),
|
|
6239
|
+
agentDir,
|
|
6240
|
+
gatewayInfo: { pid: process.pid, startedAtMs: GATEWAY_STARTED_AT_MS },
|
|
6241
|
+
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
6242
|
+
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
6243
|
+
})
|
|
6244
|
+
const rows: StatusProbeRow[] = []
|
|
6245
|
+
// Render order matches the boot card's PROBE_KEYS so the two
|
|
6246
|
+
// surfaces tell the same story in the same order.
|
|
6247
|
+
const order = ['account', 'agent', 'gateway', 'quota', 'hindsight',
|
|
6248
|
+
'scheduler', 'broker', 'kernel', 'skills'] as const
|
|
6249
|
+
for (const k of order) {
|
|
6250
|
+
const r = probes[k]
|
|
6251
|
+
if (!r) continue
|
|
6252
|
+
rows.push({ status: r.status, label: r.label, detail: r.detail })
|
|
6253
|
+
}
|
|
6254
|
+
return rows
|
|
6255
|
+
} catch (err: unknown) {
|
|
6256
|
+
process.stderr.write(
|
|
6257
|
+
`telegram gateway: /status: probe gathering failed: ${
|
|
6258
|
+
(err as Error)?.message ?? String(err)
|
|
6259
|
+
}\n`,
|
|
6260
|
+
)
|
|
6261
|
+
return []
|
|
6262
|
+
}
|
|
6263
|
+
}
|
|
6264
|
+
|
|
6265
|
+
// RFC B §9: register /approvals list|revoke against the approval kernel.
|
|
6266
|
+
// The kernel's IPC client (`src/vault/approvals/client.ts`) round-trips
|
|
6267
|
+
// through the vault broker — same socket, no new daemon. The isApprover
|
|
6268
|
+
// gate reuses the existing dmCommandGate / allowFrom pattern.
|
|
6269
|
+
{
|
|
6270
|
+
const { registerApprovalsCommands } = await import('./approvals-commands.js')
|
|
6271
|
+
registerApprovalsCommands(bot, {
|
|
6272
|
+
isApprover: ctx => dmCommandGate(ctx) !== null,
|
|
6273
|
+
})
|
|
6274
|
+
}
|
|
6275
|
+
|
|
5891
6276
|
bot.command('start', async ctx => {
|
|
5892
6277
|
// dmCommandGate (#894 backport): silent drop on disabled or
|
|
5893
6278
|
// non-allowlisted senders so the bot doesn't leak its existence.
|
|
@@ -5912,7 +6297,7 @@ bot.command('status', async ctx => {
|
|
|
5912
6297
|
const from = ctx.from!
|
|
5913
6298
|
if (access.allowFrom.includes(senderId)) {
|
|
5914
6299
|
const userTag = from.username ? `@${from.username}` : senderId
|
|
5915
|
-
const meta = buildAgentMetadata(getMyAgentName())
|
|
6300
|
+
const meta = await buildAgentMetadata(getMyAgentName())
|
|
5916
6301
|
await ctx.reply(buildStatusPairedText({ user: userTag, meta }), { parse_mode: 'HTML' })
|
|
5917
6302
|
return
|
|
5918
6303
|
}
|
|
@@ -6123,6 +6508,168 @@ async function handleNewOrResetCommand(ctx: Context, kind: 'new' | 'reset'): Pro
|
|
|
6123
6508
|
bot.command('new', async ctx => handleNewOrResetCommand(ctx, 'new'))
|
|
6124
6509
|
bot.command('reset', async ctx => handleNewOrResetCommand(ctx, 'reset'))
|
|
6125
6510
|
|
|
6511
|
+
// /update — host update from Telegram (#919). Default = dry-run plan
|
|
6512
|
+
// (`switchroom update --check`); explicit `apply` triggers the real
|
|
6513
|
+
// thing via spawnSwitchroomDetached so the gateway can be killed
|
|
6514
|
+
// mid-flight by the recreate-containers step without orphaning the
|
|
6515
|
+
// update. Admin-gated via ADMIN_COMMAND_NAMES.
|
|
6516
|
+
bot.command('update', async ctx => {
|
|
6517
|
+
if (!isAuthorizedSender(ctx)) return
|
|
6518
|
+
const arg = ctx.match?.trim() || ''
|
|
6519
|
+
if (arg === '' || arg === 'check' || arg === '--check') {
|
|
6520
|
+
await runSwitchroomCommand(ctx, ['update', '--check'], 'update --check')
|
|
6521
|
+
await switchroomReply(
|
|
6522
|
+
ctx,
|
|
6523
|
+
'Reply with <code>/update apply</code> to execute, or <code>/update apply --skip-images</code> to skip the image pull.',
|
|
6524
|
+
{ html: true },
|
|
6525
|
+
)
|
|
6526
|
+
return
|
|
6527
|
+
}
|
|
6528
|
+
// Parse `apply` (with optional --skip-images / --rebuild passthrough).
|
|
6529
|
+
// `/update apply` and `/update apply --skip-images` are the supported
|
|
6530
|
+
// forms; everything else surfaces a usage hint.
|
|
6531
|
+
const tokens = arg.split(/\s+/)
|
|
6532
|
+
if (tokens[0] !== 'apply' && tokens[0] !== '--apply') {
|
|
6533
|
+
await switchroomReply(
|
|
6534
|
+
ctx,
|
|
6535
|
+
'Usage: <code>/update</code> (dry-run) or <code>/update apply [--skip-images] [--rebuild]</code>',
|
|
6536
|
+
{ html: true },
|
|
6537
|
+
)
|
|
6538
|
+
return
|
|
6539
|
+
}
|
|
6540
|
+
// Whitelist passthrough flags. Anything outside the allowlist is
|
|
6541
|
+
// refused — operators should not be able to inject arbitrary CLI
|
|
6542
|
+
// args via Telegram (defense in depth even though admin-gated).
|
|
6543
|
+
const ALLOWED_FLAGS = new Set(['--skip-images', '--rebuild'])
|
|
6544
|
+
const passthrough = tokens.slice(1)
|
|
6545
|
+
for (const tok of passthrough) {
|
|
6546
|
+
if (!ALLOWED_FLAGS.has(tok)) {
|
|
6547
|
+
await switchroomReply(
|
|
6548
|
+
ctx,
|
|
6549
|
+
`Refusing to pass unknown flag: <code>${escapeHtmlForTg(tok)}</code>. ` +
|
|
6550
|
+
`Allowed: <code>--skip-images</code>, <code>--rebuild</code>.`,
|
|
6551
|
+
{ html: true },
|
|
6552
|
+
)
|
|
6553
|
+
return
|
|
6554
|
+
}
|
|
6555
|
+
}
|
|
6556
|
+
// Docker reachability guard (#926). The gateway runs INSIDE the agent
|
|
6557
|
+
// container, which has the switchroom CLI baked in but no docker
|
|
6558
|
+
// binary and no /var/run/docker.sock mount. So `switchroom update`'s
|
|
6559
|
+
// pull-images and recreate-containers steps would fail with
|
|
6560
|
+
// "docker: command not found". Without this guard, the operator
|
|
6561
|
+
// sees an opaque "❌ update failed (exit 127)" via
|
|
6562
|
+
// notifyDetachedFailure ~5s after the ack.
|
|
6563
|
+
//
|
|
6564
|
+
// Surface a clean explanation instead, pointing them at the host
|
|
6565
|
+
// CLI as the working path. /update (dry-run) does NOT need docker
|
|
6566
|
+
// and is unaffected — only /update apply.
|
|
6567
|
+
if (!isDockerReachable()) {
|
|
6568
|
+
await switchroomReply(
|
|
6569
|
+
ctx,
|
|
6570
|
+
`❌ <b>/update apply</b> needs docker access from inside the agent ` +
|
|
6571
|
+
`container, but it's not available (no <code>docker</code> binary on ` +
|
|
6572
|
+
`PATH, no <code>/var/run/docker.sock</code> mount).\n\n` +
|
|
6573
|
+
`On docker installs, run <code>switchroom update</code> from the ` +
|
|
6574
|
+
`host shell instead.\n\n` +
|
|
6575
|
+
`<i>Tracked as #926 — host-side update daemon would close this gap.</i>`,
|
|
6576
|
+
{ html: true },
|
|
6577
|
+
)
|
|
6578
|
+
return
|
|
6579
|
+
}
|
|
6580
|
+
// Debounce vs concurrent self-restart commands (/restart, /new, /reset
|
|
6581
|
+
// and other /update). Reading + writing the SAME restart marker means
|
|
6582
|
+
// a double-tap of /update apply is rejected, AND a /restart fired
|
|
6583
|
+
// mid-update is rejected (and vice versa). 15s window matches the
|
|
6584
|
+
// /restart handler.
|
|
6585
|
+
const existing = readRestartMarker()
|
|
6586
|
+
if (existing && Date.now() - existing.ts < 15_000) {
|
|
6587
|
+
await switchroomReply(
|
|
6588
|
+
ctx,
|
|
6589
|
+
`⏳ Self-restart already in progress (started ${Math.round(
|
|
6590
|
+
(Date.now() - existing.ts) / 1000,
|
|
6591
|
+
)}s ago) — ignoring duplicate.`,
|
|
6592
|
+
{ html: true },
|
|
6593
|
+
)
|
|
6594
|
+
return
|
|
6595
|
+
}
|
|
6596
|
+
const chatId = String(ctx.chat!.id)
|
|
6597
|
+
const threadId = resolveThreadId(chatId, ctx.message?.message_thread_id)
|
|
6598
|
+
// Send the ack and capture its message_id so the post-restart
|
|
6599
|
+
// greeting card can edit/reply into the same message. Mirrors the
|
|
6600
|
+
// /restart handler (gateway.ts ~6273) so the boot-card lookup
|
|
6601
|
+
// (gateway.ts ~10393) finds chat_id + ack_message_id in the marker.
|
|
6602
|
+
const ackText =
|
|
6603
|
+
`🚀 <b>update started</b> — running ${[
|
|
6604
|
+
'<code>switchroom update</code>',
|
|
6605
|
+
...passthrough.map((t) => `<code>${escapeHtmlForTg(t)}</code>`),
|
|
6606
|
+
].join(' ')}\n` +
|
|
6607
|
+
`\nThe gateway will restart as part of the recreate step; watch ` +
|
|
6608
|
+
`for the post-restart greeting card to confirm completion.`
|
|
6609
|
+
let ackId: number | null = null
|
|
6610
|
+
try {
|
|
6611
|
+
const sent = await lockedBot.api.sendMessage(chatId, ackText, {
|
|
6612
|
+
parse_mode: 'HTML',
|
|
6613
|
+
link_preview_options: { is_disabled: true },
|
|
6614
|
+
...(threadId != null ? { message_thread_id: threadId } : {}),
|
|
6615
|
+
})
|
|
6616
|
+
ackId = sent.message_id
|
|
6617
|
+
if (HISTORY_ENABLED) {
|
|
6618
|
+
try {
|
|
6619
|
+
recordOutbound({
|
|
6620
|
+
chat_id: chatId,
|
|
6621
|
+
thread_id: threadId ?? null,
|
|
6622
|
+
message_ids: [sent.message_id],
|
|
6623
|
+
texts: [`🚀 update started`],
|
|
6624
|
+
attachment_kinds: [],
|
|
6625
|
+
})
|
|
6626
|
+
} catch {}
|
|
6627
|
+
}
|
|
6628
|
+
} catch {}
|
|
6629
|
+
writeRestartMarker({
|
|
6630
|
+
chat_id: chatId,
|
|
6631
|
+
thread_id: threadId ?? null,
|
|
6632
|
+
ack_message_id: ackId,
|
|
6633
|
+
ts: Date.now(),
|
|
6634
|
+
})
|
|
6635
|
+
// Reason banner for the post-restart greeting card. Without this the
|
|
6636
|
+
// banner falls back to whatever the CLI's clean-shutdown marker
|
|
6637
|
+
// stamped — usually 'unknown' or a docker-compose-restart string.
|
|
6638
|
+
stampUserRestartReason('user: /update from chat')
|
|
6639
|
+
// Unpin progress cards + clear active reactions before we die. The
|
|
6640
|
+
// pinned-progress-card surface is the headline feature per CLAUDE.md;
|
|
6641
|
+
// leaving one pinned across the recreate would surprise the operator.
|
|
6642
|
+
await sweepBeforeSelfRestart()
|
|
6643
|
+
spawnSwitchroomDetached(
|
|
6644
|
+
['update', ...passthrough],
|
|
6645
|
+
notifyDetachedFailure(chatId, threadId ?? null, 'update'),
|
|
6646
|
+
)
|
|
6647
|
+
})
|
|
6648
|
+
|
|
6649
|
+
// /upgradestatus — read-only snapshot of where this host stands (#927).
|
|
6650
|
+
// Wraps `switchroom update --status` synchronously and posts the
|
|
6651
|
+
// formatted output. NOT admin-gated: read-only fleet metadata is safe
|
|
6652
|
+
// for any allowFrom user to see, and the answer "is something behind?"
|
|
6653
|
+
// is the missing companion to /update's "trigger an update".
|
|
6654
|
+
// (Telegram slash-commands forbid hyphens, hence /upgradestatus not
|
|
6655
|
+
// /upgrade-status. The /upgrade alias just below redirects.)
|
|
6656
|
+
bot.command('upgradestatus', async ctx => {
|
|
6657
|
+
if (!isAuthorizedSender(ctx)) return
|
|
6658
|
+
await runSwitchroomCommand(ctx, ['update', '--status'], 'update --status')
|
|
6659
|
+
})
|
|
6660
|
+
// Alias with hyphen — Grammy doesn't allow hyphens in command names
|
|
6661
|
+
// (Telegram's slash-command grammar excludes them) but operators are
|
|
6662
|
+
// likely to type /upgrade-status; surface a polite redirect.
|
|
6663
|
+
bot.command('upgrade', async ctx => {
|
|
6664
|
+
if (!isAuthorizedSender(ctx)) return
|
|
6665
|
+
await switchroomReply(
|
|
6666
|
+
ctx,
|
|
6667
|
+
'Did you mean <code>/upgradestatus</code> (no hyphen — Telegram slash-command grammar)? ' +
|
|
6668
|
+
'Or <code>/update</code> to plan, <code>/update apply</code> to execute.',
|
|
6669
|
+
{ html: true },
|
|
6670
|
+
)
|
|
6671
|
+
})
|
|
6672
|
+
|
|
6126
6673
|
// ─── /approve, /deny, /pending ────────────────────────────────────────────
|
|
6127
6674
|
// Slash-command alternatives to the inline-button approval flow (useful for
|
|
6128
6675
|
// desktop-only sessions and power-users). Share pendingPermissions state
|
|
@@ -6961,6 +7508,16 @@ async function handleVaultDeferCallback(ctx: Context, data: string): Promise<voi
|
|
|
6961
7508
|
const cardMessageId = ctx.callbackQuery?.message?.message_id
|
|
6962
7509
|
|
|
6963
7510
|
if (action === 'cancel') {
|
|
7511
|
+
// Kernel-side dual-dispatch (MIGRATION.md §1): record the deny decision
|
|
7512
|
+
// BEFORE the legacy handler clears state, so the audit log captures it
|
|
7513
|
+
// even if the editMessageText below races with another tap. Best-effort
|
|
7514
|
+
// — broker unreachable falls back to legacy-only.
|
|
7515
|
+
await recordDeferredSecretKernelDecision(
|
|
7516
|
+
deferred.kernel_request_id,
|
|
7517
|
+
'deny',
|
|
7518
|
+
ctx.from?.id ?? 0,
|
|
7519
|
+
access.allowFrom,
|
|
7520
|
+
)
|
|
6964
7521
|
deferredSecrets.delete(deferKey)
|
|
6965
7522
|
await ctx.answerCallbackQuery({ text: 'Discarded.' }).catch(() => {})
|
|
6966
7523
|
if (cardMessageId != null) {
|
|
@@ -6974,6 +7531,18 @@ async function handleVaultDeferCallback(ctx: Context, data: string): Promise<voi
|
|
|
6974
7531
|
}
|
|
6975
7532
|
|
|
6976
7533
|
if (action === 'unlock') {
|
|
7534
|
+
// Kernel-side dual-dispatch (MIGRATION.md §1): record the allow_once
|
|
7535
|
+
// decision when the user taps unlock. The actual passphrase capture +
|
|
7536
|
+
// vault write still happens via the legacy path below — the kernel
|
|
7537
|
+
// decision is for audit/state, not secret material (per RFC B). We
|
|
7538
|
+
// record at tap-time rather than after passphrase entry so a kernel
|
|
7539
|
+
// record exists even if the user abandons the passphrase prompt.
|
|
7540
|
+
await recordDeferredSecretKernelDecision(
|
|
7541
|
+
deferred.kernel_request_id,
|
|
7542
|
+
'allow_once',
|
|
7543
|
+
ctx.from?.id ?? 0,
|
|
7544
|
+
access.allowFrom,
|
|
7545
|
+
)
|
|
6977
7546
|
// If a passphrase is already cached we can skip straight to the write.
|
|
6978
7547
|
// Covers the case where the user had unlocked separately between
|
|
6979
7548
|
// detection and tap.
|
|
@@ -7163,12 +7732,43 @@ async function grantWizardConfirm(ctx: Context, chatId: string, state: Extract<P
|
|
|
7163
7732
|
const sent = await switchroomReply(ctx, text, { html: true, reply_markup: kb })
|
|
7164
7733
|
state.wizardMsgId = (sent as unknown as { message_id?: number })?.message_id
|
|
7165
7734
|
}
|
|
7166
|
-
|
|
7735
|
+
// Mint kernel decision row at the confirm step (MIGRATION.md §2,
|
|
7736
|
+
// audit-only Phase 1). We do it here rather than at executeGrantWizard
|
|
7737
|
+
// so a kernel row exists even if the user taps Cancel from the confirm
|
|
7738
|
+
// card — the deny verdict on cancel is then recorded against the same
|
|
7739
|
+
// request_id. If the kernel/broker is unreachable, request_id stays
|
|
7740
|
+
// undefined and the wizard runs legacy-only (no behaviour change).
|
|
7741
|
+
const kernelRequestId = await mintGrantWizardKernelRequest(
|
|
7742
|
+
state.agent!,
|
|
7743
|
+
loadAccess().allowFrom,
|
|
7744
|
+
state.selectedKeys!,
|
|
7745
|
+
state.ttlSeconds ?? null,
|
|
7746
|
+
)
|
|
7747
|
+
pendingVaultOps.set(chatId, {
|
|
7748
|
+
...state,
|
|
7749
|
+
step: 'confirm',
|
|
7750
|
+
expiresLabel,
|
|
7751
|
+
kernel_request_id: kernelRequestId ?? state.kernel_request_id,
|
|
7752
|
+
})
|
|
7167
7753
|
}
|
|
7168
7754
|
|
|
7169
7755
|
/** Execute the grant: call broker mint_grant, write token, reply. */
|
|
7170
7756
|
async function executeGrantWizard(ctx: Context, chatId: string, state: Extract<PendingVaultOp, { kind: 'grant-wizard' }>): Promise<void> {
|
|
7171
7757
|
pendingVaultOps.delete(chatId)
|
|
7758
|
+
// Kernel-side dual-dispatch (MIGRATION.md §2, audit-only Phase 1):
|
|
7759
|
+
// record the allow_once decision when the user taps Generate. The
|
|
7760
|
+
// legacy `mintGrantViaBroker` below still drives the actual grant
|
|
7761
|
+
// mint + token write — the kernel row is informational, not
|
|
7762
|
+
// enforcing, in Phase 1 (issue #833 will flip to enforcing).
|
|
7763
|
+
// We record at tap-time rather than after mint_grant succeeds so a
|
|
7764
|
+
// kernel row exists even if the legacy mint fails (audit captures
|
|
7765
|
+
// intent regardless of downstream outcome).
|
|
7766
|
+
await recordGrantWizardKernelDecision(
|
|
7767
|
+
state.kernel_request_id,
|
|
7768
|
+
'allow_once',
|
|
7769
|
+
ctx.from?.id ?? 0,
|
|
7770
|
+
loadAccess().allowFrom,
|
|
7771
|
+
)
|
|
7172
7772
|
// Defence-in-depth: state.agent flows from callback_data into a path
|
|
7173
7773
|
// join below. A crafted vg:agent:../../etc payload would produce a
|
|
7174
7774
|
// path traversal. Validate against the same regex the rest of the
|
|
@@ -7316,6 +7916,20 @@ async function handleVaultGrantCallback(ctx: Context, data: string): Promise<voi
|
|
|
7316
7916
|
|
|
7317
7917
|
// Cancel at any wizard step
|
|
7318
7918
|
if (data === 'vg:cancel') {
|
|
7919
|
+
// Kernel-side dual-dispatch (MIGRATION.md §2, audit-only Phase 1):
|
|
7920
|
+
// if the user got as far as the confirm step, a kernel request_id
|
|
7921
|
+
// will be on the wizard state — record the deny decision so the
|
|
7922
|
+
// audit log captures the abandonment. No-op if the user cancelled
|
|
7923
|
+
// before the confirm step (or if the kernel was unreachable).
|
|
7924
|
+
const cancelState = pendingVaultOps.get(chatId)
|
|
7925
|
+
if (cancelState && cancelState.kind === 'grant-wizard') {
|
|
7926
|
+
await recordGrantWizardKernelDecision(
|
|
7927
|
+
cancelState.kernel_request_id,
|
|
7928
|
+
'deny',
|
|
7929
|
+
ctx.from?.id ?? 0,
|
|
7930
|
+
loadAccess().allowFrom,
|
|
7931
|
+
)
|
|
7932
|
+
}
|
|
7319
7933
|
pendingVaultOps.delete(chatId)
|
|
7320
7934
|
const msg = ctx.callbackQuery?.message
|
|
7321
7935
|
if (msg && 'text' in msg) {
|
|
@@ -7556,19 +8170,23 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
|
|
|
7556
8170
|
}
|
|
7557
8171
|
case 'restart': {
|
|
7558
8172
|
await ctx.answerCallbackQuery({ text: `Restarting ${agent}…` }).catch(() => {})
|
|
7559
|
-
|
|
7560
|
-
|
|
7561
|
-
encoding: 'utf-8',
|
|
7562
|
-
timeout: 15000,
|
|
7563
|
-
stdio: ['ignore', 'pipe', 'pipe'],
|
|
7564
|
-
})
|
|
8173
|
+
const ok = triggerSelfRestart(agent, 'inline-button-restart')
|
|
8174
|
+
if (ok) {
|
|
7565
8175
|
await ctx.reply(`<b>${agent}</b> restart requested.`, { parse_mode: 'HTML' })
|
|
7566
8176
|
await ctx.editMessageReplyMarkup({ reply_markup: { inline_keyboard: [] } }).catch(() => {})
|
|
7567
|
-
}
|
|
7568
|
-
//
|
|
7569
|
-
//
|
|
7570
|
-
|
|
7571
|
-
|
|
8177
|
+
} else {
|
|
8178
|
+
// Under docker the helper refuses cross-agent restart; surface
|
|
8179
|
+
// a clear message instead of a silent no-op. Service name in
|
|
8180
|
+
// the generated compose is `agent-<name>` (compose.ts:408);
|
|
8181
|
+
// container_name is `switchroom-<name>` (compose.ts:410).
|
|
8182
|
+
// `docker compose restart` takes a SERVICE name, so we point
|
|
8183
|
+
// the operator at the service.
|
|
8184
|
+
const isDocker = process.env.SWITCHROOM_RUNTIME === 'docker'
|
|
8185
|
+
const detail = isDocker
|
|
8186
|
+
? `cross-agent restart is not supported under docker. ` +
|
|
8187
|
+
`Restart from the host: <code>docker compose -p switchroom restart agent-${agent}</code>.`
|
|
8188
|
+
: 'restart trigger failed'
|
|
8189
|
+
await ctx.reply(`<b>Restart failed for ${agent}:</b> ${detail}`, {
|
|
7572
8190
|
parse_mode: 'HTML',
|
|
7573
8191
|
})
|
|
7574
8192
|
}
|
|
@@ -7582,6 +8200,21 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
|
|
|
7582
8200
|
}
|
|
7583
8201
|
case 'logs': {
|
|
7584
8202
|
await ctx.answerCallbackQuery({ text: 'Fetching logs…' }).catch(() => {})
|
|
8203
|
+
// Pick the right log source for the runtime. Under docker, the
|
|
8204
|
+
// gateway is INSIDE the agent container — calling `docker logs`
|
|
8205
|
+
// requires the host's docker socket which is deliberately not
|
|
8206
|
+
// mounted into agent containers. Under systemd, journalctl
|
|
8207
|
+
// works as before. v0.7.2 fixed `case 'restart'` but left this
|
|
8208
|
+
// path systemd-only.
|
|
8209
|
+
const isDocker = process.env.SWITCHROOM_RUNTIME === 'docker'
|
|
8210
|
+
if (isDocker) {
|
|
8211
|
+
await ctx.reply(
|
|
8212
|
+
`<i>Inline log fetch is not available under docker mode (no docker.sock in agent containers). ` +
|
|
8213
|
+
`Run from the host: <code>docker logs --since 30m --tail 30 switchroom-${agent}</code></i>`,
|
|
8214
|
+
{ parse_mode: 'HTML' },
|
|
8215
|
+
)
|
|
8216
|
+
return
|
|
8217
|
+
}
|
|
7585
8218
|
try {
|
|
7586
8219
|
const out = execFileSync(
|
|
7587
8220
|
'journalctl',
|
|
@@ -8231,17 +8864,11 @@ bot.command('permissions', async ctx => {
|
|
|
8231
8864
|
await runSwitchroomCommand(ctx, ['agent', 'permissions', agentName], `permissions ${agentName}`)
|
|
8232
8865
|
})
|
|
8233
8866
|
|
|
8234
|
-
|
|
8235
|
-
|
|
8236
|
-
|
|
8237
|
-
|
|
8238
|
-
|
|
8239
|
-
const threadId = resolveThreadId(chatId, ctx.message?.message_thread_id)
|
|
8240
|
-
spawnSwitchroomDetached(
|
|
8241
|
-
['update'],
|
|
8242
|
-
notifyDetachedFailure(chatId, threadId ?? null, 'update'),
|
|
8243
|
-
)
|
|
8244
|
-
})
|
|
8867
|
+
// Drive-by cleanup (#927): the dead /update handler that lived here
|
|
8868
|
+
// was a pre-#919 stub. Grammy registers in order so the comprehensive
|
|
8869
|
+
// /update handler at line ~6516 (added in #919, hardened in #924,
|
|
8870
|
+
// docker-guarded in #934) fired first and this one never ran.
|
|
8871
|
+
// Removed to avoid future confusion.
|
|
8245
8872
|
|
|
8246
8873
|
bot.command('version', async ctx => {
|
|
8247
8874
|
if (!isAuthorizedSender(ctx)) return
|
|
@@ -8293,6 +8920,16 @@ bot.on('callback_query:data', async ctx => {
|
|
|
8293
8920
|
return
|
|
8294
8921
|
}
|
|
8295
8922
|
|
|
8923
|
+
// RFC B §6.1: apv:<request_id>:<choice>[:<param>] — approval kernel taps.
|
|
8924
|
+
// Routed through the generic kernel handler so any surface that uses
|
|
8925
|
+
// buildApprovalCard inherits consume → record → confirmation UX without
|
|
8926
|
+
// each surface re-implementing it.
|
|
8927
|
+
if (data.startsWith('apv:')) {
|
|
8928
|
+
const { handleApprovalCallback } = await import('./approval-callback.js')
|
|
8929
|
+
await handleApprovalCallback(ctx, data)
|
|
8930
|
+
return
|
|
8931
|
+
}
|
|
8932
|
+
|
|
8296
8933
|
// op:<action>:<encoded-agent> callbacks from operator-events.ts
|
|
8297
8934
|
// renderOperatorEvent(). Agent name is URL-encoded at emit (issue #24).
|
|
8298
8935
|
// Actions: dismiss, restart, reauth, swap-slot, add-slot, logs.
|
|
@@ -9391,8 +10028,37 @@ if (streamMode === 'checklist') {
|
|
|
9391
10028
|
return { code: 0, description: msg, kind: 'transient' }
|
|
9392
10029
|
}
|
|
9393
10030
|
|
|
10031
|
+
// #842: progress-card first-render gating. Read the per-agent
|
|
10032
|
+
// overrides from switchroom.yaml; fall back to driver defaults
|
|
10033
|
+
// (45000 ms / 0 ms) when absent, unreadable, or not present in the
|
|
10034
|
+
// cascade (defaults → profile → per-agent).
|
|
10035
|
+
let progressCardDelayMs: number | undefined
|
|
10036
|
+
let progressCardDelayMsBackground: number | undefined
|
|
10037
|
+
try {
|
|
10038
|
+
const swConfig = loadSwitchroomConfig()
|
|
10039
|
+
const agentSlugForCfg = process.env.SWITCHROOM_AGENT_NAME
|
|
10040
|
+
const agentCfg = agentSlugForCfg ? swConfig.agents?.[agentSlugForCfg] : undefined
|
|
10041
|
+
const pc = agentCfg?.channels?.telegram?.progress_card
|
|
10042
|
+
if (pc) {
|
|
10043
|
+
if (typeof pc.delay_ms === 'number') progressCardDelayMs = pc.delay_ms
|
|
10044
|
+
if (typeof pc.delay_ms_background === 'number') progressCardDelayMsBackground = pc.delay_ms_background
|
|
10045
|
+
}
|
|
10046
|
+
} catch {
|
|
10047
|
+
// Best-effort — gateway may run in dirs where loadSwitchroomConfig
|
|
10048
|
+
// fails. Driver defaults apply.
|
|
10049
|
+
}
|
|
10050
|
+
|
|
9394
10051
|
progressDriver = createProgressDriver({
|
|
10052
|
+
...(progressCardDelayMs != null ? { initialDelayMs: progressCardDelayMs } : {}),
|
|
10053
|
+
...(progressCardDelayMsBackground != null ? { initialDelayMsBackground: progressCardDelayMsBackground } : {}),
|
|
9395
10054
|
emit: ({ chatId, threadId, turnKey, html, done, isFirstEmit, replyToMessageId, agentId }) => {
|
|
10055
|
+
// Tag the outbound API calls so `tg-post` log lines carry turnKey
|
|
10056
|
+
// (and cardMessageId when known) — lets us audit days-old session
|
|
10057
|
+
// logs for "did the card render?" / "what edit storms hit it?"
|
|
10058
|
+
// without parsing free-form progress-card traces. (#card-audit-log)
|
|
10059
|
+
const knownCardMessageId = pinMgr.pinnedMessageId(turnKey, agentId)
|
|
10060
|
+
const tgPostTags: Record<string, string | number> = { turnKey }
|
|
10061
|
+
if (knownCardMessageId != null) tgPostTags.cardMessageId = knownCardMessageId
|
|
9396
10062
|
const args = {
|
|
9397
10063
|
chat_id: chatId, text: html, done, message_thread_id: threadId,
|
|
9398
10064
|
lane: 'progress', format: 'html', turnKey,
|
|
@@ -9439,7 +10105,7 @@ if (streamMode === 'checklist') {
|
|
|
9439
10105
|
// default in a follow-up PR.
|
|
9440
10106
|
const draftFlagOn = process.env.PROGRESS_CARD_DRAFT_TRANSPORT === '1'
|
|
9441
10107
|
const draftEligible = draftFlagOn && isDmChatId(chatId) && threadId == null
|
|
9442
|
-
handleStreamReply(args, { activeDraftStreams, activeDraftParseModes, suppressPtyPreview }, {
|
|
10108
|
+
withTgPostTags(tgPostTags, () => handleStreamReply(args, { activeDraftStreams, activeDraftParseModes, suppressPtyPreview }, {
|
|
9443
10109
|
// grammy Bot vs local StreamBotApi — see cast pattern above.
|
|
9444
10110
|
bot: lockedBot as never, retry: robustApiCall, markdownToHtml, escapeMarkdownV2, repairEscapedWhitespace,
|
|
9445
10111
|
takeHandoffPrefix: () => '', assertAllowedChat, resolveThreadId, disableLinkPreview: true,
|
|
@@ -9466,7 +10132,7 @@ if (streamMode === 'checklist') {
|
|
|
9466
10132
|
...(sendMessageDraftFn != null ? { sendMessageDraft: sendMessageDraftFn } : {}),
|
|
9467
10133
|
}
|
|
9468
10134
|
: {}),
|
|
9469
|
-
}).then((result) => {
|
|
10135
|
+
})).then((result) => {
|
|
9470
10136
|
// Successful API call — reset the consecutive-4xx counter.
|
|
9471
10137
|
progressDriver?.reportApiSuccess(turnKey)
|
|
9472
10138
|
// #203: progress-card edit is a user-visible signal.
|
|
@@ -9929,7 +10595,10 @@ void (async () => {
|
|
|
9929
10595
|
const cleanMarkerStale = cleanMarker
|
|
9930
10596
|
? !shouldSuppressRecoveryBanner(cleanMarker, nowMs, CLEAN_SHUTDOWN_MAX_AGE_MS)
|
|
9931
10597
|
: false
|
|
9932
|
-
const
|
|
10598
|
+
const supervisor = process.env.SWITCHROOM_RUNTIME === 'docker'
|
|
10599
|
+
? 'docker compose'
|
|
10600
|
+
: 'systemd'
|
|
10601
|
+
const detailParts: string[] = [`gateway crashed and was auto-restarted by ${supervisor}`]
|
|
9933
10602
|
if (cleanMarker?.signal) detailParts.push(`prior signal=${cleanMarker.signal}`)
|
|
9934
10603
|
if (cleanMarkerStale) detailParts.push('clean-shutdown marker stale')
|
|
9935
10604
|
emitGatewayOperatorEvent({
|
|
@@ -9978,6 +10647,7 @@ void (async () => {
|
|
|
9978
10647
|
restartAgeMs: markerAgeMs,
|
|
9979
10648
|
loadAccounts: () => loadAccountsForBootCard(agentSlug),
|
|
9980
10649
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
10650
|
+
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
9981
10651
|
}, ackMsgId)
|
|
9982
10652
|
activeBootCard = handle
|
|
9983
10653
|
} catch (err) {
|
|
@@ -10051,11 +10721,23 @@ void (async () => {
|
|
|
10051
10721
|
// Closes #30 task 4 and the 2026-04-21 lessons-learned loop where
|
|
10052
10722
|
// IPC flaps falsely triggered the gateway's recovery banner.
|
|
10053
10723
|
// SWITCHROOM_RESTART_WATCHDOG_POLL_MS=0 disables it.
|
|
10724
|
+
//
|
|
10725
|
+
// Disabled under SWITCHROOM_RUNTIME=docker — the watchdog reads
|
|
10726
|
+
// systemd's NRestarts counter, which doesn't exist for docker
|
|
10727
|
+
// containers. Reading docker's restart count would require
|
|
10728
|
+
// mounting docker.sock into the agent container (a security
|
|
10729
|
+
// regression we explicitly avoid). Container restart visibility
|
|
10730
|
+
// comes from the boot card + gateway boot logs in docker mode.
|
|
10054
10731
|
const RESTART_WATCHDOG_POLL_MS = Number(
|
|
10055
10732
|
process.env.SWITCHROOM_RESTART_WATCHDOG_POLL_MS ?? 30_000,
|
|
10056
10733
|
)
|
|
10057
10734
|
const watchdogAgentName = process.env.SWITCHROOM_AGENT_NAME
|
|
10058
|
-
|
|
10735
|
+
const watchdogDockerMode = process.env.SWITCHROOM_RUNTIME === 'docker'
|
|
10736
|
+
if (watchdogDockerMode) {
|
|
10737
|
+
process.stderr.write(
|
|
10738
|
+
`telegram gateway: restart-watchdog disabled (SWITCHROOM_RUNTIME=docker; systemd NRestarts unavailable)\n`,
|
|
10739
|
+
)
|
|
10740
|
+
} else if (RESTART_WATCHDOG_POLL_MS > 0 && watchdogAgentName) {
|
|
10059
10741
|
startRestartWatchdog({
|
|
10060
10742
|
agentName: watchdogAgentName,
|
|
10061
10743
|
pollIntervalMs: RESTART_WATCHDOG_POLL_MS,
|
|
@@ -10120,6 +10802,52 @@ void (async () => {
|
|
|
10120
10802
|
onStall: (agentId, idleMs, description) => {
|
|
10121
10803
|
progressDriver?.onSubAgentStall(agentId, idleMs, description)
|
|
10122
10804
|
},
|
|
10805
|
+
// Symmetric to onStall: clear the ⚠ Stalled badge as soon
|
|
10806
|
+
// as the watcher sees JSONL activity return, instead of
|
|
10807
|
+
// waiting on the next render tick to recompute idle ms.
|
|
10808
|
+
onUnstall: (agentId, description) => {
|
|
10809
|
+
progressDriver?.onSubAgentUnstall?.(agentId, description)
|
|
10810
|
+
},
|
|
10811
|
+
// #card-audit-log: symmetric sub_agent_finished surface.
|
|
10812
|
+
// The driver's per-chat shadow knows the parent turnKey and
|
|
10813
|
+
// the registry DB carries the background flag — combine them
|
|
10814
|
+
// into a single audit-log line for retrospective debugging.
|
|
10815
|
+
onFinish: ({ agentId, outcome, toolCount, durationMs }) => {
|
|
10816
|
+
let parentTurnKey = ''
|
|
10817
|
+
let chatId = ''
|
|
10818
|
+
let isBackground = false
|
|
10819
|
+
try {
|
|
10820
|
+
const fleets = progressDriver?.peekAllFleets() ?? []
|
|
10821
|
+
for (const f of fleets) {
|
|
10822
|
+
if (f.fleet.has(agentId)) {
|
|
10823
|
+
parentTurnKey = f.turnKey
|
|
10824
|
+
chatId = f.chatId ?? ''
|
|
10825
|
+
break
|
|
10826
|
+
}
|
|
10827
|
+
}
|
|
10828
|
+
} catch {
|
|
10829
|
+
// peek failures are non-fatal — we still emit the event.
|
|
10830
|
+
}
|
|
10831
|
+
if (turnsDb != null) {
|
|
10832
|
+
try {
|
|
10833
|
+
const row = turnsDb
|
|
10834
|
+
.prepare('SELECT background FROM subagents WHERE jsonl_agent_id = ?')
|
|
10835
|
+
.get(agentId) as { background: number } | undefined
|
|
10836
|
+
if (row != null) isBackground = row.background === 1
|
|
10837
|
+
} catch { /* best-effort */ }
|
|
10838
|
+
}
|
|
10839
|
+
const finalOutcome: 'completed' | 'orphan' | 'background' =
|
|
10840
|
+
isBackground ? 'background' : (outcome === 'completed' ? 'completed' : 'orphan')
|
|
10841
|
+
emitCardEvent({
|
|
10842
|
+
agent: process.env.SWITCHROOM_AGENT_NAME ?? '',
|
|
10843
|
+
chatId,
|
|
10844
|
+
turnKey: parentTurnKey,
|
|
10845
|
+
event: 'finalized',
|
|
10846
|
+
reason: `sub_agent_finished agentId=${agentId} outcome=${finalOutcome} tools=${toolCount}`,
|
|
10847
|
+
subagents: [agentId],
|
|
10848
|
+
durationMs,
|
|
10849
|
+
})
|
|
10850
|
+
},
|
|
10123
10851
|
})
|
|
10124
10852
|
process.stderr.write('telegram gateway: subagent-watcher active\n')
|
|
10125
10853
|
}
|