switchroom 0.8.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -61
- package/bin/timezone-hook.sh +9 -7
- package/dist/agent-scheduler/index.js +285 -45
- package/dist/auth-broker/index.js +13932 -0
- package/dist/cli/drive-write-pretool.mjs +5418 -0
- package/dist/cli/switchroom.js +8890 -5560
- package/dist/host-control/main.js +582 -43
- package/dist/vault/approvals/kernel-server.js +276 -47
- package/dist/vault/broker/server.js +333 -69
- package/examples/minimal.yaml +63 -0
- package/examples/personal-google-workspace-mcp/.env.example +34 -0
- package/examples/personal-google-workspace-mcp/README.md +194 -0
- package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
- package/examples/switchroom.yaml +220 -0
- package/package.json +6 -4
- package/profiles/_base/start.sh.hbs +3 -3
- package/profiles/_shared/agent-self-service.md.hbs +126 -0
- package/profiles/default/CLAUDE.md +10 -0
- package/profiles/default/CLAUDE.md.hbs +16 -0
- package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
- package/skills/buildkite-agent-runtime/SKILL.md +44 -11
- package/skills/buildkite-api/SKILL.md +31 -8
- package/skills/buildkite-cli/SKILL.md +27 -9
- package/skills/buildkite-migration/SKILL.md +22 -9
- package/skills/buildkite-pipelines/SKILL.md +26 -9
- package/skills/buildkite-secure-delivery/SKILL.md +23 -9
- package/skills/buildkite-test-engine/SKILL.md +25 -8
- package/skills/docx/SKILL.md +1 -1
- package/skills/file-bug/SKILL.md +34 -6
- package/skills/humanizer/SKILL.md +15 -0
- package/skills/humanizer-calibrate/SKILL.md +7 -1
- package/skills/mcp-builder/SKILL.md +1 -1
- package/skills/pdf/SKILL.md +1 -1
- package/skills/pptx/SKILL.md +1 -1
- package/skills/skill-creator/SKILL.md +21 -1
- package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
- package/skills/switchroom-cli/SKILL.md +63 -64
- package/skills/switchroom-health/SKILL.md +23 -10
- package/skills/switchroom-install/SKILL.md +3 -3
- package/skills/switchroom-manage/SKILL.md +26 -19
- package/skills/switchroom-runtime/SKILL.md +67 -15
- package/skills/switchroom-status/SKILL.md +26 -1
- package/skills/telegram-test-harness/SKILL.md +3 -0
- package/skills/webapp-testing/SKILL.md +31 -1
- package/skills/xlsx/SKILL.md +1 -1
- package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
- package/telegram-plugin/admin-commands/index.ts +9 -5
- package/telegram-plugin/auth-snapshot-format.ts +612 -0
- package/telegram-plugin/auto-fallback-fleet.ts +215 -0
- package/telegram-plugin/auto-fallback.ts +28 -301
- package/telegram-plugin/dist/gateway/gateway.js +17453 -15100
- package/telegram-plugin/fleet-fallback-gate.ts +105 -0
- package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
- package/telegram-plugin/gateway/approval-callback.ts +31 -3
- package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
- package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
- package/telegram-plugin/gateway/auth-command.ts +905 -0
- package/telegram-plugin/gateway/auth-line.ts +123 -0
- package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
- package/telegram-plugin/gateway/boot-card.ts +23 -37
- package/telegram-plugin/gateway/boot-probes.ts +9 -12
- package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
- package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
- package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
- package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
- package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
- package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
- package/telegram-plugin/gateway/gateway.ts +1156 -938
- package/telegram-plugin/gateway/hostd-dispatch.ts +244 -0
- package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
- package/telegram-plugin/gateway/ipc-server.ts +69 -0
- package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
- package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
- package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
- package/telegram-plugin/model-unavailable.ts +28 -12
- package/telegram-plugin/permission-title.ts +56 -0
- package/telegram-plugin/quota-check.ts +19 -41
- package/telegram-plugin/scripts/build.mjs +0 -1
- package/telegram-plugin/shared/bot-runtime.ts +5 -4
- package/telegram-plugin/silence-poke.ts +153 -1
- package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
- package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
- package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
- package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
- package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
- package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
- package/telegram-plugin/tests/boot-probes.test.ts +27 -22
- package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
- package/telegram-plugin/tests/permission-title.test.ts +31 -0
- package/telegram-plugin/tests/quota-check.test.ts +5 -35
- package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
- package/telegram-plugin/tests/silence-poke.test.ts +237 -0
- package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
- package/telegram-plugin/turn-flush-safety.ts +55 -1
- package/telegram-plugin/uat/SETUP.md +35 -1
- package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
- package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
- package/telegram-plugin/uat/runners/report.ts +150 -0
- package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
- package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
- package/telegram-plugin/uat/runners/scorer.ts +106 -0
- package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
- package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
- package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +7 -1
- package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +7 -1
- package/telegram-plugin/auth-dashboard.ts +0 -1104
- package/telegram-plugin/auth-slot-parser.ts +0 -497
- package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
- package/telegram-plugin/dist/foreman/foreman.js +0 -31358
- package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
- package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
- package/telegram-plugin/foreman/foreman.ts +0 -1165
- package/telegram-plugin/foreman/setup-flow.ts +0 -345
- package/telegram-plugin/foreman/setup-state.ts +0 -239
- package/telegram-plugin/foreman/state.ts +0 -203
- package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
- package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
- package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
- package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
- package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
- package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
- package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
- package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
- package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
- package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
- package/telegram-plugin/tests/foreman-state.test.ts +0 -164
- package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
- package/telegram-plugin/tests/setup-flow.test.ts +0 -510
- package/telegram-plugin/tests/setup-state.test.ts +0 -146
|
@@ -45,6 +45,22 @@
|
|
|
45
45
|
|
|
46
46
|
export type PokeLevel = 'soft' | 'firm'
|
|
47
47
|
|
|
48
|
+
/** #1292: snapshot of an in-flight tool call, surfaced in the 300s
|
|
49
|
+
* framework-fallback message so the user sees the actual observable
|
|
50
|
+
* ("running Grep \"foo\" for 4m") instead of the dishonest generic
|
|
51
|
+
* "still working… no update in 5 min" when the agent is clearly busy
|
|
52
|
+
* grinding through tool calls. */
|
|
53
|
+
export interface ToolSnapshot {
|
|
54
|
+
/** Bare tool name as it came off the wire (e.g. "Grep", "Read", "Bash"). */
|
|
55
|
+
name: string
|
|
56
|
+
/** Natural-language descriptor from `toolLabel()` if available (e.g. the
|
|
57
|
+
* query for Grep, basename for Read/Edit/Write, hostname for WebFetch),
|
|
58
|
+
* or null when no useful label could be derived. */
|
|
59
|
+
label: string | null
|
|
60
|
+
/** Time since this tool call started, in ms. */
|
|
61
|
+
durationMs: number
|
|
62
|
+
}
|
|
63
|
+
|
|
48
64
|
export interface SilencePokeState {
|
|
49
65
|
/** Wall-clock ms of turn start. Silence clock zero-point when no outbound yet. */
|
|
50
66
|
turnStartedAt: number
|
|
@@ -62,6 +78,16 @@ export interface SilencePokeState {
|
|
|
62
78
|
fallbackFired: boolean
|
|
63
79
|
/** Wall-clock ms of last poke fire — used for poke-success latency. */
|
|
64
80
|
lastPokeFiredAt: number | null
|
|
81
|
+
/** #1292: in-flight tool calls keyed by toolUseId. Populated by
|
|
82
|
+
* `noteToolStart` on every parent-agent `tool_use` event the gateway
|
|
83
|
+
* sees and drained by `noteToolEnd` on the matching `tool_result`.
|
|
84
|
+
* Read only inside `tick()` when the 300s fallback fires — at that
|
|
85
|
+
* point we snapshot the entries (sorted by startedAt ascending) and
|
|
86
|
+
* include the longest-running one in the fallback message body.
|
|
87
|
+
* NOTE: presence of in-flight tools does NOT reset the silence
|
|
88
|
+
* clock — the design choice in this module's header is preserved.
|
|
89
|
+
* We only enrich the fallback TEXT, not the timing. */
|
|
90
|
+
inFlightTools: Map<string, { name: string; startedAt: number; label: string | null }>
|
|
65
91
|
}
|
|
66
92
|
|
|
67
93
|
export interface ThresholdsMs {
|
|
@@ -89,9 +115,19 @@ export interface FrameworkFallbackContext {
|
|
|
89
115
|
chatId: string
|
|
90
116
|
threadId: number | null
|
|
91
117
|
/** Picked from lastThinkingAt: 'thinking' if a thinking event landed in
|
|
92
|
-
* the last 30s of silence, else 'working'.
|
|
118
|
+
* the last 30s of silence, else 'working'. Note: 'working' is the
|
|
119
|
+
* default base; when `inFlightTools` is non-empty the fallback text
|
|
120
|
+
* uses the tool-aware wording instead of either 'working' / 'thinking'
|
|
121
|
+
* (see `formatFrameworkFallbackText`). */
|
|
93
122
|
fallbackKind: 'working' | 'thinking'
|
|
94
123
|
silenceMs: number
|
|
124
|
+
/** #1292: snapshot of in-flight tool calls at the moment the fallback
|
|
125
|
+
* fires, sorted by startedAt ascending. Empty when no tools were
|
|
126
|
+
* in flight (e.g. agent genuinely silent, or all tools completed
|
|
127
|
+
* faster than the 300s threshold). The format helper uses entry [0]
|
|
128
|
+
* (longest-running) for the message body and "+ N more" when
|
|
129
|
+
* length > 1. */
|
|
130
|
+
inFlightTools: ToolSnapshot[]
|
|
95
131
|
}
|
|
96
132
|
|
|
97
133
|
export type SilencePokeMetric =
|
|
@@ -141,6 +177,7 @@ export function startTurn(key: string, now: number): void {
|
|
|
141
177
|
lastThinkingAt: null,
|
|
142
178
|
fallbackFired: false,
|
|
143
179
|
lastPokeFiredAt: null,
|
|
180
|
+
inFlightTools: new Map(),
|
|
144
181
|
})
|
|
145
182
|
}
|
|
146
183
|
|
|
@@ -205,6 +242,72 @@ export function noteThinking(key: string, now: number): void {
|
|
|
205
242
|
s.lastThinkingAt = now
|
|
206
243
|
}
|
|
207
244
|
|
|
245
|
+
/**
|
|
246
|
+
* #1292: record the start of a tool call. Stored in `inFlightTools` keyed
|
|
247
|
+
* by `toolUseId` so a later `noteToolEnd` can drain the entry. Read only
|
|
248
|
+
* by `tick()` when the 300s fallback fires, where we snapshot the map
|
|
249
|
+
* into the fallback context so the user-visible message can name the
|
|
250
|
+
* actual observable (e.g. "running Grep \"foo\" for 4m") instead of the
|
|
251
|
+
* dishonest generic "still working… no update in 5 min".
|
|
252
|
+
*
|
|
253
|
+
* Idempotent: calling twice with the same toolUseId overwrites — useful
|
|
254
|
+
* when a late `noteToolLabel` arrives but the caller wants to reuse the
|
|
255
|
+
* start-side API. The `startedAt` is updated; for label-only refreshes
|
|
256
|
+
* use `noteToolLabel` instead so duration stays correct.
|
|
257
|
+
*
|
|
258
|
+
* No-op when the kill switch is on (state Map will be empty for this key).
|
|
259
|
+
*/
|
|
260
|
+
export function noteToolStart(
|
|
261
|
+
key: string,
|
|
262
|
+
toolUseId: string,
|
|
263
|
+
name: string,
|
|
264
|
+
label: string | null,
|
|
265
|
+
now: number,
|
|
266
|
+
): void {
|
|
267
|
+
const s = state.get(key)
|
|
268
|
+
if (s == null) return
|
|
269
|
+
s.inFlightTools.set(toolUseId, { name, startedAt: now, label })
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* #1292: record completion of a tool call. Removes the entry from
|
|
274
|
+
* `inFlightTools`. Idempotent — calling on an unknown toolUseId is a
|
|
275
|
+
* no-op. Sub-second tools that start and end inside one poll interval
|
|
276
|
+
* are still safe because the map is only read inside `tick()` at the
|
|
277
|
+
* 300s fallback boundary; the churn never gets observed.
|
|
278
|
+
*/
|
|
279
|
+
export function noteToolEnd(
|
|
280
|
+
key: string,
|
|
281
|
+
toolUseId: string,
|
|
282
|
+
_now: number,
|
|
283
|
+
): void {
|
|
284
|
+
const s = state.get(key)
|
|
285
|
+
if (s == null) return
|
|
286
|
+
s.inFlightTools.delete(toolUseId)
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* #1292: late label update for an in-flight tool. The tool-label sidecar
|
|
291
|
+
* (PreToolUse hook, polled every 250ms via `tool-label-sidecar.ts`) can
|
|
292
|
+
* publish a richer label some time after the `tool_use` event landed.
|
|
293
|
+
* When that arrives, refresh the entry in-place so the fallback message
|
|
294
|
+
* — if it fires later — picks up the better label.
|
|
295
|
+
*
|
|
296
|
+
* No-op when the toolUseId isn't tracked (e.g. tool already completed,
|
|
297
|
+
* or the start event was skipped because the tool is a Telegram surface).
|
|
298
|
+
*/
|
|
299
|
+
export function noteToolLabel(
|
|
300
|
+
key: string,
|
|
301
|
+
toolUseId: string,
|
|
302
|
+
label: string,
|
|
303
|
+
): void {
|
|
304
|
+
const s = state.get(key)
|
|
305
|
+
if (s == null) return
|
|
306
|
+
const entry = s.inFlightTools.get(toolUseId)
|
|
307
|
+
if (entry == null) return
|
|
308
|
+
entry.label = label
|
|
309
|
+
}
|
|
310
|
+
|
|
208
311
|
/**
|
|
209
312
|
* Drain any armed poke for ANY active turn and return the system-reminder
|
|
210
313
|
* text to append. Returns null if nothing is armed.
|
|
@@ -273,14 +376,52 @@ export function formatPokeText(level: PokeLevel): string {
|
|
|
273
376
|
export function formatFrameworkFallbackText(
|
|
274
377
|
fallbackKind: 'working' | 'thinking',
|
|
275
378
|
silenceMs: number,
|
|
379
|
+
inFlightTools: ToolSnapshot[] = [],
|
|
276
380
|
): string {
|
|
277
381
|
const minutes = Math.max(1, Math.round(silenceMs / 60_000))
|
|
278
382
|
const suffix = `(no update from agent in ${minutes} min)`
|
|
383
|
+
// #1292 case (a): tools in flight. Name the longest-running one
|
|
384
|
+
// (entry[0] — caller pre-sorts by startedAt ascending). Avoid the
|
|
385
|
+
// "still working" framing #1292 explicitly calls out as dishonest:
|
|
386
|
+
// the agent IS doing work, we can see the tool. Format:
|
|
387
|
+
// running Grep "foo" for 4m (no update from agent in 5 min)
|
|
388
|
+
// running Grep "foo" + 2 more (4m) (no update from agent in 5 min)
|
|
389
|
+
// running Grep (no label) for 4m (no update from agent in 5 min)
|
|
390
|
+
if (inFlightTools.length > 0) {
|
|
391
|
+
const longest = inFlightTools[0]!
|
|
392
|
+
const dur = formatDurationShort(longest.durationMs)
|
|
393
|
+
const labelTail = longest.label && longest.label.length > 0
|
|
394
|
+
? ` ${truncateLabel(longest.label)}`
|
|
395
|
+
: ''
|
|
396
|
+
const more = inFlightTools.length > 1
|
|
397
|
+
? ` + ${inFlightTools.length - 1} more`
|
|
398
|
+
: ''
|
|
399
|
+
return `running ${longest.name}${labelTail}${more} for ${dur} ${suffix}`
|
|
400
|
+
}
|
|
279
401
|
return fallbackKind === 'thinking'
|
|
280
402
|
? `still thinking… ${suffix}`
|
|
281
403
|
: `still working… ${suffix}`
|
|
282
404
|
}
|
|
283
405
|
|
|
406
|
+
/** Compact m/s rendering for the fallback message. Anything under a
|
|
407
|
+
* minute reads as `${s}s`, otherwise `${m}m`. Always rounds toward the
|
|
408
|
+
* user-honest direction — "4m" for 4m 30s, "5m" for 4m 45s. */
|
|
409
|
+
function formatDurationShort(ms: number): string {
|
|
410
|
+
const totalSec = Math.max(0, Math.round(ms / 1000))
|
|
411
|
+
if (totalSec < 60) return `${totalSec}s`
|
|
412
|
+
const minutes = Math.round(totalSec / 60)
|
|
413
|
+
return `${minutes}m`
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
/** Telegram lines are short on mobile. Clip the label to keep the
|
|
417
|
+
* fallback message readable. Truncation point is generous (60 chars)
|
|
418
|
+
* because tool labels are pre-truncated by `toolLabel()` already. */
|
|
419
|
+
function truncateLabel(label: string): string {
|
|
420
|
+
const MAX = 60
|
|
421
|
+
if (label.length <= MAX) return label
|
|
422
|
+
return label.slice(0, MAX - 1) + '…'
|
|
423
|
+
}
|
|
424
|
+
|
|
284
425
|
/**
|
|
285
426
|
* Internal tick — iterates active states, arms pokes or fires fallback.
|
|
286
427
|
* Exported as __tickForTests so suite can step the clock deterministically.
|
|
@@ -331,6 +472,16 @@ function tick(now: number): void {
|
|
|
331
472
|
const recentThinking = s.lastThinkingAt != null
|
|
332
473
|
&& (now - s.lastThinkingAt) < 30_000
|
|
333
474
|
const fallbackKind: 'working' | 'thinking' = recentThinking ? 'thinking' : 'working'
|
|
475
|
+
// #1292: snapshot in-flight tools at fire time, sorted by
|
|
476
|
+
// startedAt ascending so entry[0] is the longest-running.
|
|
477
|
+
// Pre-computed durations in ms; the formatter just renders.
|
|
478
|
+
const inFlightTools: ToolSnapshot[] = Array.from(s.inFlightTools.values())
|
|
479
|
+
.sort((a, b) => a.startedAt - b.startedAt)
|
|
480
|
+
.map(t => ({
|
|
481
|
+
name: t.name,
|
|
482
|
+
label: t.label,
|
|
483
|
+
durationMs: now - t.startedAt,
|
|
484
|
+
}))
|
|
334
485
|
activeDeps.emitMetric({
|
|
335
486
|
kind: 'silence_fallback_sent',
|
|
336
487
|
key,
|
|
@@ -345,6 +496,7 @@ function tick(now: number): void {
|
|
|
345
496
|
threadId,
|
|
346
497
|
fallbackKind,
|
|
347
498
|
silenceMs: silence,
|
|
499
|
+
inFlightTools,
|
|
348
500
|
})
|
|
349
501
|
if (r != null && typeof (r as Promise<void>).catch === 'function') {
|
|
350
502
|
;(r as Promise<void>).catch((err) => {
|