switchroom 0.14.27 → 0.14.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +20 -4
- package/dist/host-control/main.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/bridge/bridge.ts +15 -0
- package/telegram-plugin/card-format.ts +7 -4
- package/telegram-plugin/dist/bridge/bridge.js +18 -0
- package/telegram-plugin/dist/gateway/gateway.js +2151 -1729
- package/telegram-plugin/dist/server.js +18 -0
- package/telegram-plugin/gateway/gateway.ts +464 -12
- package/telegram-plugin/history.ts +16 -4
- package/telegram-plugin/permission-title.ts +48 -0
- package/telegram-plugin/registry/subagents-schema.ts +35 -0
- package/telegram-plugin/registry/subagents.test.ts +78 -0
- package/telegram-plugin/secret-detect/patterns.ts +8 -0
- package/telegram-plugin/secret-detect/redact.ts +76 -0
- package/telegram-plugin/session-tail.ts +15 -0
- package/telegram-plugin/subagent-watcher.ts +19 -1
- package/telegram-plugin/tests/card-format.test.ts +16 -0
- package/telegram-plugin/tests/gateway-outbound-redact.test.ts +80 -0
- package/telegram-plugin/tests/gateway-request-secret.test.ts +78 -0
- package/telegram-plugin/tests/history.test.ts +59 -0
- package/telegram-plugin/tests/permission-title.test.ts +68 -0
- package/telegram-plugin/tests/permission-verdict-resume-guard.test.ts +35 -0
- package/telegram-plugin/tests/secret-detect-sanctum.test.ts +115 -0
- package/telegram-plugin/tests/session-tail.test.ts +43 -0
- package/telegram-plugin/tests/worker-activity-feed.test.ts +15 -0
- package/telegram-plugin/uat/scenarios/jtbd-request-secret-dm.test.ts +101 -0
- package/telegram-plugin/worker-activity-feed.ts +5 -2
|
@@ -27,6 +27,7 @@
|
|
|
27
27
|
|
|
28
28
|
import { chmodSync, mkdirSync } from 'fs'
|
|
29
29
|
import { join } from 'path'
|
|
30
|
+
import { redact } from './secret-detect/redact.js'
|
|
30
31
|
|
|
31
32
|
/**
|
|
32
33
|
* `bun:sqlite` is a Bun built-in — Vite/Node loaders can't resolve it
|
|
@@ -300,6 +301,10 @@ export function recordInbound(args: RecordInboundArgs): void {
|
|
|
300
301
|
(chat_id, thread_id, message_id, role, user, user_id, ts, text, attachment_kind, group_id, reply_to_message_id, reply_to_text)
|
|
301
302
|
VALUES (?, ?, ?, 'user', ?, ?, ?, ?, ?, NULL, ?, ?)
|
|
302
303
|
`)
|
|
304
|
+
// Defense-in-depth: never persist a detected secret to the message store.
|
|
305
|
+
// The inbound gate (server.ts handleInbound) already deletes + vaults a
|
|
306
|
+
// high-confidence hit before reaching here, so for caught secrets this is
|
|
307
|
+
// a no-op; it's the backstop for any shape the gate's pattern set misses.
|
|
303
308
|
stmt.run(
|
|
304
309
|
args.chat_id,
|
|
305
310
|
args.thread_id ?? null,
|
|
@@ -307,10 +312,10 @@ export function recordInbound(args: RecordInboundArgs): void {
|
|
|
307
312
|
args.user ?? null,
|
|
308
313
|
args.user_id ?? null,
|
|
309
314
|
args.ts,
|
|
310
|
-
args.text,
|
|
315
|
+
redact(args.text),
|
|
311
316
|
args.attachment_kind ?? null,
|
|
312
317
|
args.reply_to_message_id ?? null,
|
|
313
|
-
args.reply_to_text ?? null,
|
|
318
|
+
args.reply_to_text != null ? redact(args.reply_to_text) : (args.reply_to_text ?? null),
|
|
314
319
|
)
|
|
315
320
|
}
|
|
316
321
|
|
|
@@ -356,9 +361,14 @@ export function recordOutbound(args: RecordOutboundArgs): void {
|
|
|
356
361
|
)
|
|
357
362
|
}
|
|
358
363
|
}) as (...args: unknown[]) => unknown)
|
|
364
|
+
// Outbound redaction: the agent→user direction has no other secret
|
|
365
|
+
// scrub, so this is the chokepoint that keeps an agent-echoed secret out
|
|
366
|
+
// of the message store (e.g. an agent quoting a token it read from a file
|
|
367
|
+
// or a not-yet-vaulted value). Masks the secret bytes in place; the
|
|
368
|
+
// surrounding reply text is preserved.
|
|
359
369
|
const rows: Array<[number, string, string | null]> = args.message_ids.map((id, i) => [
|
|
360
370
|
id,
|
|
361
|
-
args.texts[i] ?? '',
|
|
371
|
+
redact(args.texts[i] ?? ''),
|
|
362
372
|
args.attachment_kinds?.[i] ?? null,
|
|
363
373
|
])
|
|
364
374
|
tx(rows)
|
|
@@ -387,7 +397,9 @@ export function recordEdit(args: RecordEditArgs): void {
|
|
|
387
397
|
SET text = ?
|
|
388
398
|
WHERE chat_id = ? AND message_id = ?
|
|
389
399
|
`)
|
|
390
|
-
|
|
400
|
+
// Same outbound chokepoint as recordOutbound — an edit must not
|
|
401
|
+
// reintroduce a raw secret into the stored row.
|
|
402
|
+
.run(redact(args.text), args.chat_id, args.message_id)
|
|
391
403
|
}
|
|
392
404
|
|
|
393
405
|
export interface RecordReactionArgs {
|
|
@@ -248,6 +248,54 @@ export function describeGrant(
|
|
|
248
248
|
}
|
|
249
249
|
}
|
|
250
250
|
|
|
251
|
+
/**
|
|
252
|
+
* Agent-voiced "I got your verdict and I'm continuing" message, posted as
|
|
253
|
+
* a *distinct* Telegram message the instant the operator answers a
|
|
254
|
+
* permission card (allow / deny / always / slash / free-text). The card
|
|
255
|
+
* edit + status reaction are easy to miss — a reaction lands on the turn's
|
|
256
|
+
* triggering message far up the chat, and the card footnote is a one-liner
|
|
257
|
+
* the operator scrolls past — so this is the legible signal that the tap
|
|
258
|
+
* landed and names the work being (re)started.
|
|
259
|
+
*
|
|
260
|
+
* Mirrors `formatPermissionCardBody`'s style ("🔐 <b>Gymbro</b> wants to
|
|
261
|
+
* edit: log.md" → "▶️ <b>Gymbro</b> — got it, continuing: edit: log.md").
|
|
262
|
+
* `action` is a phrase from {@link naturalAction} (already operator-facing,
|
|
263
|
+
* no tool ids). Output is HTML-escaped for `parse_mode: 'HTML'`.
|
|
264
|
+
*
|
|
265
|
+
* `timeoutMinutes` marks the TTL auto-deny variant (no operator tapped —
|
|
266
|
+
* the request aged out) so the wording reflects "no answer" rather than a
|
|
267
|
+
* deliberate denial.
|
|
268
|
+
*/
|
|
269
|
+
export function formatPermissionResumeMessage(opts: {
|
|
270
|
+
agentName: string | null;
|
|
271
|
+
behavior: "allow" | "deny";
|
|
272
|
+
action: string;
|
|
273
|
+
timeoutMinutes?: number;
|
|
274
|
+
}): string {
|
|
275
|
+
const who =
|
|
276
|
+
opts.agentName && opts.agentName.length > 0
|
|
277
|
+
? `<b>${escapeTgHtml(capFirst(opts.agentName))}</b>`
|
|
278
|
+
: `<b>Agent</b>`;
|
|
279
|
+
const act = (opts.action ?? "").trim();
|
|
280
|
+
const hasAction = act.length > 0;
|
|
281
|
+
|
|
282
|
+
if (opts.behavior === "allow") {
|
|
283
|
+
return hasAction
|
|
284
|
+
? `▶️ ${who} — got it, continuing: <i>${escapeTgHtml(act)}</i>`
|
|
285
|
+
: `▶️ ${who} — got it, back to work.`;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// deny
|
|
289
|
+
if (opts.timeoutMinutes != null) {
|
|
290
|
+
return hasAction
|
|
291
|
+
? `🚫 ${who} — no answer in ${opts.timeoutMinutes}m, continuing without it (<i>${escapeTgHtml(act)}</i>).`
|
|
292
|
+
: `🚫 ${who} — no answer in ${opts.timeoutMinutes}m, continuing without it.`;
|
|
293
|
+
}
|
|
294
|
+
return hasAction
|
|
295
|
+
? `🚫 ${who} — noted, I won't ${escapeTgHtml(lowerFirst(act))}. Continuing without it.`
|
|
296
|
+
: `🚫 ${who} — noted, continuing without it.`;
|
|
297
|
+
}
|
|
298
|
+
|
|
251
299
|
function resolveSkillName(input: Record<string, unknown>): string | null {
|
|
252
300
|
return (
|
|
253
301
|
readString(input, "skill") ??
|
|
@@ -360,6 +360,41 @@ export function getSubagentByJsonlId(db: SqliteDatabase, jsonlAgentId: string):
|
|
|
360
360
|
return row ? mapSubagentRow(row) : null
|
|
361
361
|
}
|
|
362
362
|
|
|
363
|
+
/**
|
|
364
|
+
* Count background subagents that have not yet reached a terminal state.
|
|
365
|
+
*
|
|
366
|
+
* This is the dispatch-time source of truth for "is a background worker still
|
|
367
|
+
* running" — the row is INSERTed with `status='running'` by `recordSubagentStart`
|
|
368
|
+
* the moment the parent's `Agent` tool_use fires (keyed on the `toolu_…` id),
|
|
369
|
+
* which is BEFORE the parent's turn ends. The deferred-done-reaction gate reads
|
|
370
|
+
* this so it holds the 👍 the instant a worker is dispatched, rather than
|
|
371
|
+
* snapshotting the file-discovery registry (which lags dispatch by a poll/fswatch
|
|
372
|
+
* tick and so missed just-dispatched workers — the premature-👍 race).
|
|
373
|
+
*
|
|
374
|
+
* Counts `running` ONLY — `stalled` is deliberately excluded. `stalled` is NOT
|
|
375
|
+
* a terminal status: the reaper (`reapStuckRunningRows`) transitions a row to
|
|
376
|
+
* `stalled`, never to `completed`/`failed`. A genuinely-orphaned background row
|
|
377
|
+
* — one INSERTed at dispatch whose JSONL was never linked, so no activity ever
|
|
378
|
+
* bumped it and the in-memory silent-stall synthesis never terminalised it —
|
|
379
|
+
* sits in `stalled` indefinitely (the 1h reaper TTL is the only thing that
|
|
380
|
+
* moves it off `running`). Counting `stalled` would wedge the deferred 👍 above
|
|
381
|
+
* zero forever for that row (`reaction-defer.ts` `promote()` bails while the
|
|
382
|
+
* count is > 0). A live-but-quiet worker, by contrast, is driven to `completed`
|
|
383
|
+
* by the watcher's terminal paths (end_turn signal OR silent-stall synthesis,
|
|
384
|
+
* both call `recordSubagentEnd`) long before the 1h reaper, and a stalled row
|
|
385
|
+
* that genuinely resumes is flipped back to `running` by `recordSubagentResume`
|
|
386
|
+
* — so excluding `stalled` never releases the 👍 on a worker that's merely
|
|
387
|
+
* paused rather than dead.
|
|
388
|
+
*/
|
|
389
|
+
export function countRunningBackgroundSubagents(db: SqliteDatabase): number {
|
|
390
|
+
const row = db
|
|
391
|
+
.prepare(
|
|
392
|
+
"SELECT count(*) AS n FROM subagents WHERE background = 1 AND status = 'running'",
|
|
393
|
+
)
|
|
394
|
+
.get() as { n: number } | undefined
|
|
395
|
+
return row?.n ?? 0
|
|
396
|
+
}
|
|
397
|
+
|
|
363
398
|
/**
|
|
364
399
|
* Record that a subagent has reached a terminal state (completed or failed).
|
|
365
400
|
* Sets `ended_at`, `status`, and optionally `result_summary`.
|
|
@@ -28,6 +28,7 @@ import {
|
|
|
28
28
|
bumpSubagentActivity,
|
|
29
29
|
getSubagent,
|
|
30
30
|
reapStuckRunningRows,
|
|
31
|
+
countRunningBackgroundSubagents,
|
|
31
32
|
} from './subagents-schema.js'
|
|
32
33
|
|
|
33
34
|
// ---------------------------------------------------------------------------
|
|
@@ -182,6 +183,83 @@ describe('recordSubagentStart + recordSubagentEnd happy path', () => {
|
|
|
182
183
|
})
|
|
183
184
|
})
|
|
184
185
|
|
|
186
|
+
// ---------------------------------------------------------------------------
|
|
187
|
+
// countRunningBackgroundSubagents — the dispatch-time gate for the
|
|
188
|
+
// deferred-done 👍 reaction. A row counts as "still running" the instant
|
|
189
|
+
// recordSubagentStart inserts it (status='running'), closing the
|
|
190
|
+
// file-discovery registration race that promoted the 👍 prematurely.
|
|
191
|
+
// ---------------------------------------------------------------------------
|
|
192
|
+
|
|
193
|
+
describe('countRunningBackgroundSubagents', () => {
|
|
194
|
+
it('counts a background worker the moment it starts (before any terminal)', () => {
|
|
195
|
+
const db = openFreshSubagentsDbInMemory()
|
|
196
|
+
expect(countRunningBackgroundSubagents(db)).toBe(0)
|
|
197
|
+
recordSubagentStart(db, { id: 'bg-1', background: true, startedAt: 1000 })
|
|
198
|
+
expect(countRunningBackgroundSubagents(db)).toBe(1)
|
|
199
|
+
db.close()
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
it('does NOT count a stalled worker — stalled is the reaper sink, never terminalised', () => {
|
|
203
|
+
// A `stalled` row is NOT terminal and is NOT actively running. The only
|
|
204
|
+
// way a background row reaches `stalled` is the 1h reaper firing on a row
|
|
205
|
+
// that never linked a JSONL (no activity bumps, no silent-stall synthesis
|
|
206
|
+
// to drive it to `completed`) — i.e. an orphaned/dead dispatch. Counting it
|
|
207
|
+
// would wedge the deferred 👍 above zero forever (promote() bails while the
|
|
208
|
+
// count is > 0). A live-but-quiet worker terminalises to `completed` long
|
|
209
|
+
// before the reaper, so `stalled` always means "dead" here.
|
|
210
|
+
const db = openFreshSubagentsDbInMemory()
|
|
211
|
+
recordSubagentStart(db, { id: 'bg-2', background: true, startedAt: 1000 })
|
|
212
|
+
recordSubagentStall(db, { id: 'bg-2', stalledAt: 1500 })
|
|
213
|
+
expect(countRunningBackgroundSubagents(db)).toBe(0)
|
|
214
|
+
db.close()
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
it('a row reaped to stalled does not keep the gate above zero (permanent-👍-hold regression guard)', () => {
|
|
218
|
+
// Regression guard for the orphaned-dispatch wedge: dispatch inserts a
|
|
219
|
+
// `running` row, the JSONL never links, and the reaper transitions it to
|
|
220
|
+
// `stalled`. The deferred-👍 gate must read zero so promote() can fire —
|
|
221
|
+
// counting the reaped row would hold the 👍 forever.
|
|
222
|
+
const db = openFreshSubagentsDbInMemory()
|
|
223
|
+
recordSubagentStart(db, { id: 'bg-orphan', background: true, startedAt: 1000 })
|
|
224
|
+
expect(countRunningBackgroundSubagents(db)).toBe(1)
|
|
225
|
+
const result = reapStuckRunningRows(db, { ttlMs: 500, now: 5000 })
|
|
226
|
+
expect(result.reaped).toBe(1)
|
|
227
|
+
expect(getSubagent(db, 'bg-orphan')!.status).toBe('stalled')
|
|
228
|
+
expect(countRunningBackgroundSubagents(db)).toBe(0)
|
|
229
|
+
db.close()
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
it('re-counts a stalled worker that resumes — recordSubagentResume flips it back to running', () => {
|
|
233
|
+
// A worker that's merely paused (not dead) and resumes JSONL activity is
|
|
234
|
+
// flipped stalled → running by recordSubagentResume, so the gate holds the
|
|
235
|
+
// 👍 again. Excluding `stalled` from the count never releases the 👍 on a
|
|
236
|
+
// worker that's only paused.
|
|
237
|
+
const db = openFreshSubagentsDbInMemory()
|
|
238
|
+
recordSubagentStart(db, { id: 'bg-resume', background: true, startedAt: 1000 })
|
|
239
|
+
recordSubagentStall(db, { id: 'bg-resume', stalledAt: 1500 })
|
|
240
|
+
expect(countRunningBackgroundSubagents(db)).toBe(0)
|
|
241
|
+
recordSubagentResume(db, { id: 'bg-resume', resumedAt: 2000 })
|
|
242
|
+
expect(countRunningBackgroundSubagents(db)).toBe(1)
|
|
243
|
+
db.close()
|
|
244
|
+
})
|
|
245
|
+
|
|
246
|
+
it('drops to zero once the worker reaches a terminal status', () => {
|
|
247
|
+
const db = openFreshSubagentsDbInMemory()
|
|
248
|
+
recordSubagentStart(db, { id: 'bg-3', background: true, startedAt: 1000 })
|
|
249
|
+
expect(countRunningBackgroundSubagents(db)).toBe(1)
|
|
250
|
+
recordSubagentEnd(db, { id: 'bg-3', endedAt: 2000, status: 'completed' })
|
|
251
|
+
expect(countRunningBackgroundSubagents(db)).toBe(0)
|
|
252
|
+
db.close()
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
it('ignores foreground subagents — only background workers gate the reaction', () => {
|
|
256
|
+
const db = openFreshSubagentsDbInMemory()
|
|
257
|
+
recordSubagentStart(db, { id: 'fg-1', background: false, startedAt: 1000 })
|
|
258
|
+
expect(countRunningBackgroundSubagents(db)).toBe(0)
|
|
259
|
+
db.close()
|
|
260
|
+
})
|
|
261
|
+
})
|
|
262
|
+
|
|
185
263
|
// ---------------------------------------------------------------------------
|
|
186
264
|
// Test 4 — start → stall → end
|
|
187
265
|
// ---------------------------------------------------------------------------
|
|
@@ -54,6 +54,14 @@ export const ANCHORED_PATTERNS: PatternDef[] = [
|
|
|
54
54
|
// Telegram bot tokens: with "bot" prefix or bare ID:token.
|
|
55
55
|
{ rule_id: 'telegram_bot_token_prefixed', regex: /\bbot(\d{6,}:[A-Za-z0-9_-]{20,})\b/g, captureIndex: 1, slugHint: 'telegram_bot_token' },
|
|
56
56
|
{ rule_id: 'telegram_bot_token', regex: /\b(\d{6,}:[A-Za-z0-9_-]{20,})\b/g, captureIndex: 1, slugHint: 'telegram_bot_token' },
|
|
57
|
+
// Laravel Sanctum / Coolify personal-access tokens. Shape: `<id>|<token>`
|
|
58
|
+
// where <id> is the integer PK and <token> is `Str::random(40)` — 40 base62
|
|
59
|
+
// chars. The `|` separator is what distinguishes this from a Telegram
|
|
60
|
+
// `id:token` (colon) or a JWT. Length floor 40 (the Sanctum default) keeps
|
|
61
|
+
// this off short pipe-joined chat like `1|foo` or markdown table cells.
|
|
62
|
+
// Incident 2026-06-01: a live `17|<40-char>` Coolify token pasted by a user
|
|
63
|
+
// slipped every existing pattern and persisted in plaintext.
|
|
64
|
+
{ rule_id: 'laravel_sanctum_token', regex: /\b(\d+\|[A-Za-z0-9]{40,})\b/g, captureIndex: 1, slugHint: 'api_token' },
|
|
57
65
|
{ rule_id: 'aws_access_key', regex: /\b(AKIA[0-9A-Z]{16})\b/g, captureIndex: 1, slugHint: 'aws_access_key' },
|
|
58
66
|
{ rule_id: 'jwt', regex: /\b(eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,})\b/g, captureIndex: 1, slugHint: 'jwt' },
|
|
59
67
|
]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `redact(text)` — sanitize text by replacing detected secrets and
|
|
3
|
+
* credential-bearing URL parts with `[REDACTED]` markers, in place.
|
|
4
|
+
*
|
|
5
|
+
* This is the shared mask-in-place chokepoint. Unlike the inbound
|
|
6
|
+
* vault-staging flow (which DELETES the Telegram message and offers to
|
|
7
|
+
* save the secret), `redact()` leaves the surrounding prose intact and
|
|
8
|
+
* only masks the secret byte-ranges — the right behavior when we must
|
|
9
|
+
* keep a record but never store/log/forward the raw value:
|
|
10
|
+
*
|
|
11
|
+
* - INBOUND + OUTBOUND history persistence — `history.ts`
|
|
12
|
+
* `recordInbound` / `recordOutbound` redact before the row hits
|
|
13
|
+
* SQLite, so no detected secret survives in the message store in
|
|
14
|
+
* either direction (defense-in-depth behind the inbound gate, and
|
|
15
|
+
* the only redaction on the agent→user direction).
|
|
16
|
+
* - `switchroom issues record` — `src/issues/store.ts:capDetail`.
|
|
17
|
+
* - `switchroom secret-detect redact --stdin` — bash-callable shim.
|
|
18
|
+
* - hostd — `src/host-control/server.ts`.
|
|
19
|
+
*
|
|
20
|
+
* Detection is delegated to `detectSecrets()` (same patterns, same
|
|
21
|
+
* suppressor, same engine as the inbound Telegram gate) so a pattern
|
|
22
|
+
* added once — e.g. the Laravel/Coolify Sanctum `<id>|<token>` shape —
|
|
23
|
+
* covers detection, inbound interception, and this redactor uniformly.
|
|
24
|
+
*
|
|
25
|
+
* Idempotence: for token-shape detections the marker doesn't re-match.
|
|
26
|
+
* For *structural* detectors (`cli_flag`, `json_secret_field`) a second
|
|
27
|
+
* pass may rewrite the tag, but the bytes stay redacted. Rely on "no
|
|
28
|
+
* detected secret bytes survive any number of passes", not on strict
|
|
29
|
+
* `redact(redact(x)) === redact(x)`.
|
|
30
|
+
*/
|
|
31
|
+
import { detectSecrets, type Detection } from './index.js'
|
|
32
|
+
import { redactUrls } from './url-redact.js'
|
|
33
|
+
|
|
34
|
+
export const REDACTED_MARKER = '[REDACTED]'
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Synchronous, fast redactor. Vendored pattern engine only (no async
|
|
38
|
+
* secretlint) so it is safe on hot paths (every stored message).
|
|
39
|
+
*
|
|
40
|
+
* Order matters:
|
|
41
|
+
* 1. URL credentials (`https://u:p@host` → `https://***@host`,
|
|
42
|
+
* sensitive query params → `?key=***`).
|
|
43
|
+
* 2. Token-shape detection over the URL-normalized text; matched byte
|
|
44
|
+
* ranges are replaced right-to-left so earlier offsets stay valid.
|
|
45
|
+
*/
|
|
46
|
+
export function redact(text: string): string {
|
|
47
|
+
if (!text || text.length === 0) return text
|
|
48
|
+
|
|
49
|
+
// Step 1 — URL credentials and known-sensitive query params.
|
|
50
|
+
const urlScrubbed = redactUrls(text)
|
|
51
|
+
|
|
52
|
+
// Step 2 — token shape detection over the URL-scrubbed text.
|
|
53
|
+
const hits: Detection[] = detectSecrets(urlScrubbed)
|
|
54
|
+
if (hits.length === 0) return urlScrubbed
|
|
55
|
+
|
|
56
|
+
// Apply replacements right-to-left so byte offsets stay valid.
|
|
57
|
+
const sorted = [...hits].sort((a, b) => b.start - a.start)
|
|
58
|
+
let out = urlScrubbed
|
|
59
|
+
for (const h of sorted) {
|
|
60
|
+
out = out.slice(0, h.start) + redactedMarker(h.rule_id) + out.slice(h.end)
|
|
61
|
+
}
|
|
62
|
+
return out
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* `[REDACTED:<rule_id>]` when the rule_id is informative,
|
|
67
|
+
* `[REDACTED]` otherwise. The rule_id is detector-emitted, so it never
|
|
68
|
+
* contains attacker-controlled bytes — safe to embed verbatim.
|
|
69
|
+
*/
|
|
70
|
+
function redactedMarker(ruleId: string): string {
|
|
71
|
+
const trimmed = ruleId.replace(/^(kv|env)_/, '')
|
|
72
|
+
if (!trimmed || trimmed === 'key_value' || trimmed === 'kv_entropy') {
|
|
73
|
+
return REDACTED_MARKER
|
|
74
|
+
}
|
|
75
|
+
return `[REDACTED:${trimmed}]`
|
|
76
|
+
}
|
|
@@ -390,6 +390,21 @@ export function projectSubagentLine(
|
|
|
390
390
|
events.push({ kind: 'sub_agent_text', agentId, text })
|
|
391
391
|
}
|
|
392
392
|
}
|
|
393
|
+
// Authoritative early terminal: a background `Agent` worker's JSONL on
|
|
394
|
+
// claude ≥2.1.156 never writes the `system/turn_duration` line below, so
|
|
395
|
+
// the watcher used to only learn the worker finished via the ~5-min
|
|
396
|
+
// silent-stall synthesis net — leaving the card stuck "running" and the
|
|
397
|
+
// deferred 👍 held for minutes after the work was actually done. The
|
|
398
|
+
// worker DOES write a final assistant message with
|
|
399
|
+
// `stop_reason: 'end_turn'` (a tool-using turn is `'tool_use'` and keeps
|
|
400
|
+
// going), so treat that as the terminal signal. Emitted AFTER the content
|
|
401
|
+
// events so the final text/preamble still renders; the watcher's turn_end
|
|
402
|
+
// handler is guarded on `state === 'running'`, so a later real
|
|
403
|
+
// turn_duration line is a no-op.
|
|
404
|
+
const stopReason = message?.stop_reason as string | undefined
|
|
405
|
+
if (stopReason === 'end_turn') {
|
|
406
|
+
events.push({ kind: 'sub_agent_turn_end', agentId })
|
|
407
|
+
}
|
|
393
408
|
return events
|
|
394
409
|
}
|
|
395
410
|
|
|
@@ -43,7 +43,7 @@ import { homedir } from 'os'
|
|
|
43
43
|
import { projectSubagentLine, sanitizeCwdToProjectName, detectErrorInTranscriptLine } from './session-tail.js'
|
|
44
44
|
import { sanitiseToolArg } from './fleet-state.js'
|
|
45
45
|
import { escapeHtml, truncate } from './card-format.js'
|
|
46
|
-
import { bumpSubagentActivity, recordSubagentStall, recordSubagentResume, recordSubagentEnd, reapStuckRunningRows } from './registry/subagents-schema.js'
|
|
46
|
+
import { bumpSubagentActivity, recordSubagentStall, recordSubagentResume, recordSubagentEnd, reapStuckRunningRows, countRunningBackgroundSubagents } from './registry/subagents-schema.js'
|
|
47
47
|
import { touchTurnActiveMarker } from './gateway/turn-active-marker.js'
|
|
48
48
|
|
|
49
49
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
@@ -377,6 +377,13 @@ export interface SubagentWatcherHandle {
|
|
|
377
377
|
stop(): void
|
|
378
378
|
/** Snapshot of current registry for tests/inspection. */
|
|
379
379
|
getRegistry(): ReadonlyMap<string, WorkerEntry>
|
|
380
|
+
/**
|
|
381
|
+
* Count background workers still in flight, read from the dispatch-time DB
|
|
382
|
+
* (not the file-discovery registry). Returns null when no DB is wired so the
|
|
383
|
+
* caller can fall back to the registry snapshot. Drives the deferred-done
|
|
384
|
+
* reaction gate — see `countRunningBackgroundSubagents`.
|
|
385
|
+
*/
|
|
386
|
+
countRunningBackgroundWorkers(): number | null
|
|
380
387
|
}
|
|
381
388
|
|
|
382
389
|
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
@@ -1498,5 +1505,16 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
1498
1505
|
getRegistry(): ReadonlyMap<string, WorkerEntry> {
|
|
1499
1506
|
return registry
|
|
1500
1507
|
},
|
|
1508
|
+
|
|
1509
|
+
countRunningBackgroundWorkers(): number | null {
|
|
1510
|
+
if (db == null) return null
|
|
1511
|
+
try {
|
|
1512
|
+
return countRunningBackgroundSubagents(db)
|
|
1513
|
+
} catch {
|
|
1514
|
+
// A torn/locked DB read must not wedge the reaction gate — fall back
|
|
1515
|
+
// to the registry snapshot by returning null.
|
|
1516
|
+
return null
|
|
1517
|
+
}
|
|
1518
|
+
},
|
|
1501
1519
|
}
|
|
1502
1520
|
}
|
|
@@ -27,6 +27,22 @@ describe('stripMarkdown', () => {
|
|
|
27
27
|
expect(stripMarkdown('2) second')).toBe('second')
|
|
28
28
|
})
|
|
29
29
|
|
|
30
|
+
it('strips leading block markup on EVERY line, not just the string start', () => {
|
|
31
|
+
// The screenshot regression: a worker summary that opens with prose
|
|
32
|
+
// ("Done.") then a `## Summary` heading mid-string. Without the `gm`
|
|
33
|
+
// flags the heading marker leaked into the rendered card.
|
|
34
|
+
const headed = stripMarkdown('Done.\n\n## Summary\n\nFixed the bug')
|
|
35
|
+
expect(headed).not.toContain('##')
|
|
36
|
+
expect(headed).toContain('Done.')
|
|
37
|
+
expect(headed).toContain('Summary')
|
|
38
|
+
expect(headed).toContain('Fixed the bug')
|
|
39
|
+
|
|
40
|
+
const mixed = stripMarkdown('intro\n> quoted\n- item')
|
|
41
|
+
expect(mixed).not.toMatch(/(^|\n)\s*[>\-*]/)
|
|
42
|
+
expect(mixed).toContain('quoted')
|
|
43
|
+
expect(mixed).toContain('item')
|
|
44
|
+
})
|
|
45
|
+
|
|
30
46
|
it('reduces a link to its label', () => {
|
|
31
47
|
expect(stripMarkdown('see [the PR](https://x/y) here')).toBe('see the PR here')
|
|
32
48
|
})
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { readFileSync } from 'node:fs'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Structural test for the outbound secret-scrub (#2044).
|
|
6
|
+
*
|
|
7
|
+
* Outbound (agent→user) text previously had NO redaction — an agent that
|
|
8
|
+
* echoed a secret it read from a file/env/not-yet-vaulted value would send
|
|
9
|
+
* the raw bytes to Telegram, log a preview to stderr, and store them in
|
|
10
|
+
* history. This pins that `redactOutboundText()` runs at the ENTRY of each
|
|
11
|
+
* agent-free-text tool (reply / stream_reply / edit_message), before the
|
|
12
|
+
* stderr preview, the dedup key, the send, and the history record.
|
|
13
|
+
*
|
|
14
|
+
* Why structural: executeReply/executeStreamReply/executeEditMessage are
|
|
15
|
+
* not exported (same constraint as gateway-secret-detect.test.ts). The
|
|
16
|
+
* masking itself — that `redact()` covers the Sanctum shape and every
|
|
17
|
+
* provider token — is exercised behaviorally in secret-detect-sanctum.test.ts
|
|
18
|
+
* and the redact() unit tests; what's left to pin here is the wiring + slot.
|
|
19
|
+
*/
|
|
20
|
+
describe('gateway outbound secret-scrub — structural wiring', () => {
|
|
21
|
+
const src = readFileSync(
|
|
22
|
+
new URL('../gateway/gateway.ts', import.meta.url),
|
|
23
|
+
'utf8',
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
it('imports the shared redactor', () => {
|
|
27
|
+
expect(src).toMatch(/import \{ redact \} from '\.\.\/secret-detect\/redact\.js'/)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('defines the redactOutboundText helper backed by redact()', () => {
|
|
31
|
+
const idx = src.indexOf('function redactOutboundText(')
|
|
32
|
+
expect(idx).toBeGreaterThan(0)
|
|
33
|
+
const body = src.slice(idx, idx + 400)
|
|
34
|
+
expect(body).toMatch(/redact\(text\)/)
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
it('reply: scrubs at entry, before the stderr preview log', () => {
|
|
38
|
+
const start = src.indexOf('async function executeReply(')
|
|
39
|
+
const redactIdx = src.indexOf(`redactOutboundText(text, 'reply')`, start)
|
|
40
|
+
const previewIdx = src.indexOf('reply: invoked chatId=', start)
|
|
41
|
+
expect(start).toBeGreaterThan(0)
|
|
42
|
+
expect(redactIdx).toBeGreaterThan(start)
|
|
43
|
+
expect(previewIdx).toBeGreaterThan(redactIdx) // mask BEFORE the preview is logged
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
it('stream_reply: scrubs at entry, before the voice scrub + dedup', () => {
|
|
47
|
+
const start = src.indexOf('async function executeStreamReply(')
|
|
48
|
+
const redactIdx = src.indexOf(`redactOutboundText(args.text as string, 'stream_reply')`, start)
|
|
49
|
+
const scrubIdx = src.indexOf(`site: 'stream_reply'`, start)
|
|
50
|
+
expect(start).toBeGreaterThan(0)
|
|
51
|
+
expect(redactIdx).toBeGreaterThan(start)
|
|
52
|
+
expect(scrubIdx).toBeGreaterThan(redactIdx)
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
it('edit_message: scrubs at entry, before the voice scrub + send', () => {
|
|
56
|
+
const start = src.indexOf('async function executeEditMessage(')
|
|
57
|
+
const redactIdx = src.indexOf(`redactOutboundText(editRawText, 'edit_message')`, start)
|
|
58
|
+
const scrubIdx = src.indexOf(`site: 'edit_message'`, start)
|
|
59
|
+
expect(start).toBeGreaterThan(0)
|
|
60
|
+
expect(redactIdx).toBeGreaterThan(start)
|
|
61
|
+
expect(scrubIdx).toBeGreaterThan(redactIdx)
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
it('turn-flush backstop: scrubs the model terminal prose before send', () => {
|
|
65
|
+
// Turn-flush delivers the model's answer when it skipped reply/stream_reply
|
|
66
|
+
// — arbitrary agent free-text that hits the wire + stderr preview.
|
|
67
|
+
const redactIdx = src.indexOf(`redactOutboundText(capturedText, 'turn_flush')`)
|
|
68
|
+
const scrubSiteIdx = src.indexOf(`site: 'turn_flush'`)
|
|
69
|
+
expect(redactIdx).toBeGreaterThan(0)
|
|
70
|
+
expect(scrubSiteIdx).toBeGreaterThan(redactIdx) // mask BEFORE the voice scrub + send
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('does not log the secret value when a mask fires', () => {
|
|
74
|
+
const idx = src.indexOf('function redactOutboundText(')
|
|
75
|
+
const body = src.slice(idx, idx + 400)
|
|
76
|
+
// The log line names the site, never the text/masked value.
|
|
77
|
+
expect(body).toMatch(/outbound secret masked site=\$\{site\}/)
|
|
78
|
+
expect(body).not.toMatch(/\$\{text\}|\$\{masked\}/)
|
|
79
|
+
})
|
|
80
|
+
})
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { readFileSync } from 'node:fs'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Structural test for the `request_secret` tool (#2045) — the secure
|
|
6
|
+
* "agent asks the operator to PROVIDE a missing secret" flow. The operator
|
|
7
|
+
* taps [Provide securely], sends the value once, and the gateway deletes it
|
|
8
|
+
* + writes it straight to the vault; the raw value is never recorded,
|
|
9
|
+
* logged, or returned to the agent.
|
|
10
|
+
*
|
|
11
|
+
* Structural because the gateway handlers (handleInbound, executeToolCall,
|
|
12
|
+
* the callback router) aren't exported — same constraint as
|
|
13
|
+
* gateway-secret-detect.test.ts. The vault-write reuse + card UX are
|
|
14
|
+
* exercised end-to-end by the mtcute UAT (jtbd-request-secret-dm).
|
|
15
|
+
*/
|
|
16
|
+
describe('request_secret — gateway wiring', () => {
|
|
17
|
+
const gw = readFileSync(new URL('../gateway/gateway.ts', import.meta.url), 'utf8')
|
|
18
|
+
const bridge = readFileSync(new URL('../bridge/bridge.ts', import.meta.url), 'utf8')
|
|
19
|
+
|
|
20
|
+
it('declares the MCP tool with required {chat_id,key} and NO value arg', () => {
|
|
21
|
+
const idx = bridge.indexOf(`name: 'request_secret'`)
|
|
22
|
+
expect(idx).toBeGreaterThan(0)
|
|
23
|
+
const schema = bridge.slice(idx, idx + 3000)
|
|
24
|
+
expect(schema).toMatch(/required: \['chat_id', 'key'\]/)
|
|
25
|
+
// The whole point: the agent does NOT supply the value.
|
|
26
|
+
expect(schema).not.toMatch(/\bvalue:\s*\{/)
|
|
27
|
+
// Tells the agent never to ask for a chat paste.
|
|
28
|
+
expect(schema).toMatch(/NEVER ask the user to paste/i)
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
it('is allow-listed and dispatched', () => {
|
|
32
|
+
expect(gw).toMatch(/'request_secret',\n\]\)/)
|
|
33
|
+
expect(gw).toMatch(/case 'request_secret':\s*\n\s*return executeRequestSecret\(args\)/)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('routes the vsp: callback', () => {
|
|
37
|
+
expect(gw).toMatch(/data\.startsWith\('vsp:'\)/)
|
|
38
|
+
expect(gw).toMatch(/handleSecretRequestCallback\(ctx, data\)/)
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
it('captures the provided value BEFORE recordInbound and the broadcast', () => {
|
|
42
|
+
const start = gw.indexOf('async function handleInbound(')
|
|
43
|
+
const captureIdx = gw.indexOf('captureProvidedSecret(ctx, chat_id', start)
|
|
44
|
+
const recordIdx = gw.indexOf('recordInbound(', captureIdx)
|
|
45
|
+
const broadcastIdx = gw.indexOf('ipcServer.broadcast(inboundMsg)', captureIdx)
|
|
46
|
+
expect(captureIdx).toBeGreaterThan(start)
|
|
47
|
+
expect(recordIdx).toBeGreaterThan(captureIdx)
|
|
48
|
+
expect(broadcastIdx).toBeGreaterThan(captureIdx)
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
it('the capture deletes the raw message and writes to the vault, then returns', () => {
|
|
52
|
+
const idx = gw.indexOf('async function captureProvidedSecret(')
|
|
53
|
+
expect(idx).toBeGreaterThan(0)
|
|
54
|
+
const body = gw.slice(idx, idx + 3600)
|
|
55
|
+
// delete the raw message before anything else
|
|
56
|
+
expect(body).toMatch(/deleteSensitiveMessage\(chat_id, msgId/)
|
|
57
|
+
// write via the posture/passphrase helper
|
|
58
|
+
expect(body).toMatch(/writeRequestedSecret\(armed\.key, value, chat_id\)/)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('the agent-resume inbound carries the key but NOT the value', () => {
|
|
62
|
+
const idx = gw.indexOf('async function captureProvidedSecret(')
|
|
63
|
+
const body = gw.slice(idx, idx + 3600)
|
|
64
|
+
const syntheticIdx = body.indexOf("source: 'secret_provided'")
|
|
65
|
+
expect(syntheticIdx).toBeGreaterThan(0)
|
|
66
|
+
// The synthetic text references vault:<key>, never the raw `value`.
|
|
67
|
+
expect(body).toMatch(/vault:\$\{armed\.key\}/)
|
|
68
|
+
const textIdx = body.lastIndexOf('text:', syntheticIdx)
|
|
69
|
+
const metaIdx = body.indexOf('meta:', syntheticIdx)
|
|
70
|
+
expect(body.slice(textIdx, metaIdx)).not.toMatch(/\$\{value\}/)
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('dedupes to one open request per (chat,key)', () => {
|
|
74
|
+
const idx = gw.indexOf('async function executeRequestSecret(')
|
|
75
|
+
const body = gw.slice(idx, idx + 1800)
|
|
76
|
+
expect(body).toMatch(/p\.chat_id === chat_id && p\.key === key/)
|
|
77
|
+
})
|
|
78
|
+
})
|
|
@@ -362,3 +362,62 @@ describe('getRecentOutboundCount (backstop dedup helper)', () => {
|
|
|
362
362
|
expect(getRecentOutboundCount('-200', 2)).toBe(1)
|
|
363
363
|
})
|
|
364
364
|
})
|
|
365
|
+
|
|
366
|
+
describe('secret redaction at persistence (both directions)', () => {
|
|
367
|
+
beforeEach(() => initHistory(stateDir, 30))
|
|
368
|
+
|
|
369
|
+
// Built by concatenation so the source never holds a contiguous
|
|
370
|
+
// secret-shaped literal (repo Push Protection / no-pii lint).
|
|
371
|
+
const SANCTUM = `19|${'qP4mN7rT2v'.repeat(4)}` // <id>|<40 base62> (Sanctum/Coolify)
|
|
372
|
+
const GH_PAT = `ghp_${'A1b2C3d4E5'.repeat(3)}` // ghp_<30 base62>
|
|
373
|
+
|
|
374
|
+
it('masks a user-pasted secret before it is stored (inbound)', () => {
|
|
375
|
+
recordInbound({
|
|
376
|
+
chat_id: '-100',
|
|
377
|
+
thread_id: null,
|
|
378
|
+
message_id: 1,
|
|
379
|
+
user: 'alice',
|
|
380
|
+
user_id: '111',
|
|
381
|
+
ts: 1000,
|
|
382
|
+
text: `the new coolify token is ${SANCTUM}, save it`,
|
|
383
|
+
})
|
|
384
|
+
const text = query({ chat_id: '-100' })[0]!.text as string
|
|
385
|
+
expect(text).not.toContain(SANCTUM)
|
|
386
|
+
expect(text).toContain('[REDACTED')
|
|
387
|
+
expect(text).toContain('the new coolify token is') // surrounding prose preserved
|
|
388
|
+
})
|
|
389
|
+
|
|
390
|
+
it('masks a secret echoed by the agent before it is stored (outbound)', () => {
|
|
391
|
+
recordOutbound({
|
|
392
|
+
chat_id: '-100',
|
|
393
|
+
thread_id: null,
|
|
394
|
+
message_ids: [2],
|
|
395
|
+
texts: [`sure — your key is ${GH_PAT}, keep it safe`],
|
|
396
|
+
ts: 2000,
|
|
397
|
+
})
|
|
398
|
+
const text = query({ chat_id: '-100' })[0]!.text as string
|
|
399
|
+
expect(text).not.toContain(GH_PAT)
|
|
400
|
+
expect(text).toContain('[REDACTED')
|
|
401
|
+
})
|
|
402
|
+
|
|
403
|
+
it('masks a secret introduced by an edit', () => {
|
|
404
|
+
recordOutbound({ chat_id: '-100', thread_id: null, message_ids: [3], texts: ['placeholder'], ts: 3000 })
|
|
405
|
+
recordEdit({ chat_id: '-100', message_id: 3, text: `token: ${SANCTUM}` })
|
|
406
|
+
const text = query({ chat_id: '-100' })[0]!.text as string
|
|
407
|
+
expect(text).not.toContain(SANCTUM)
|
|
408
|
+
expect(text).toContain('[REDACTED')
|
|
409
|
+
})
|
|
410
|
+
|
|
411
|
+
it('leaves ordinary prose untouched', () => {
|
|
412
|
+
recordInbound({
|
|
413
|
+
chat_id: '-100',
|
|
414
|
+
thread_id: null,
|
|
415
|
+
message_id: 4,
|
|
416
|
+
user: 'a',
|
|
417
|
+
user_id: '1',
|
|
418
|
+
ts: 4000,
|
|
419
|
+
text: 'hello, how are you?',
|
|
420
|
+
})
|
|
421
|
+
expect(query({ chat_id: '-100' })[0]!.text).toBe('hello, how are you?')
|
|
422
|
+
})
|
|
423
|
+
})
|