switchroom 0.8.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/README.md +54 -61
  2. package/bin/timezone-hook.sh +9 -7
  3. package/dist/agent-scheduler/index.js +285 -45
  4. package/dist/auth-broker/index.js +13932 -0
  5. package/dist/cli/drive-write-pretool.mjs +5418 -0
  6. package/dist/cli/switchroom.js +8890 -5560
  7. package/dist/host-control/main.js +582 -43
  8. package/dist/vault/approvals/kernel-server.js +276 -47
  9. package/dist/vault/broker/server.js +333 -69
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +6 -4
  16. package/profiles/_base/start.sh.hbs +3 -3
  17. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  18. package/profiles/default/CLAUDE.md +10 -0
  19. package/profiles/default/CLAUDE.md.hbs +16 -0
  20. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  21. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  22. package/skills/buildkite-api/SKILL.md +31 -8
  23. package/skills/buildkite-cli/SKILL.md +27 -9
  24. package/skills/buildkite-migration/SKILL.md +22 -9
  25. package/skills/buildkite-pipelines/SKILL.md +26 -9
  26. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  27. package/skills/buildkite-test-engine/SKILL.md +25 -8
  28. package/skills/docx/SKILL.md +1 -1
  29. package/skills/file-bug/SKILL.md +34 -6
  30. package/skills/humanizer/SKILL.md +15 -0
  31. package/skills/humanizer-calibrate/SKILL.md +7 -1
  32. package/skills/mcp-builder/SKILL.md +1 -1
  33. package/skills/pdf/SKILL.md +1 -1
  34. package/skills/pptx/SKILL.md +1 -1
  35. package/skills/skill-creator/SKILL.md +21 -1
  36. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  37. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  38. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  39. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  40. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  41. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  42. package/skills/switchroom-cli/SKILL.md +63 -64
  43. package/skills/switchroom-health/SKILL.md +23 -10
  44. package/skills/switchroom-install/SKILL.md +3 -3
  45. package/skills/switchroom-manage/SKILL.md +26 -19
  46. package/skills/switchroom-runtime/SKILL.md +67 -15
  47. package/skills/switchroom-status/SKILL.md +26 -1
  48. package/skills/telegram-test-harness/SKILL.md +3 -0
  49. package/skills/webapp-testing/SKILL.md +31 -1
  50. package/skills/xlsx/SKILL.md +1 -1
  51. package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
  52. package/telegram-plugin/admin-commands/index.ts +9 -5
  53. package/telegram-plugin/auth-snapshot-format.ts +612 -0
  54. package/telegram-plugin/auto-fallback-fleet.ts +215 -0
  55. package/telegram-plugin/auto-fallback.ts +28 -301
  56. package/telegram-plugin/dist/gateway/gateway.js +17453 -15100
  57. package/telegram-plugin/fleet-fallback-gate.ts +105 -0
  58. package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
  59. package/telegram-plugin/gateway/approval-callback.ts +31 -3
  60. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  61. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  62. package/telegram-plugin/gateway/auth-command.ts +905 -0
  63. package/telegram-plugin/gateway/auth-line.ts +123 -0
  64. package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
  65. package/telegram-plugin/gateway/boot-card.ts +23 -37
  66. package/telegram-plugin/gateway/boot-probes.ts +9 -12
  67. package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
  68. package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
  69. package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
  70. package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
  71. package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
  72. package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
  73. package/telegram-plugin/gateway/gateway.ts +1156 -938
  74. package/telegram-plugin/gateway/hostd-dispatch.ts +244 -0
  75. package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
  76. package/telegram-plugin/gateway/ipc-server.ts +69 -0
  77. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
  78. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  79. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  80. package/telegram-plugin/model-unavailable.ts +28 -12
  81. package/telegram-plugin/permission-title.ts +56 -0
  82. package/telegram-plugin/quota-check.ts +19 -41
  83. package/telegram-plugin/scripts/build.mjs +0 -1
  84. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  85. package/telegram-plugin/silence-poke.ts +153 -1
  86. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  87. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  88. package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
  89. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  90. package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
  91. package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
  92. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
  93. package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
  94. package/telegram-plugin/tests/boot-probes.test.ts +27 -22
  95. package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
  96. package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
  97. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  98. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  99. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
  100. package/telegram-plugin/tests/silence-poke.test.ts +237 -0
  101. package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
  102. package/telegram-plugin/turn-flush-safety.ts +55 -1
  103. package/telegram-plugin/uat/SETUP.md +35 -1
  104. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  105. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  106. package/telegram-plugin/uat/runners/report.ts +150 -0
  107. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  108. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  109. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  110. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  111. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  112. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +7 -1
  113. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +7 -1
  114. package/telegram-plugin/auth-dashboard.ts +0 -1104
  115. package/telegram-plugin/auth-slot-parser.ts +0 -497
  116. package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
  117. package/telegram-plugin/dist/foreman/foreman.js +0 -31358
  118. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  119. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  120. package/telegram-plugin/foreman/foreman.ts +0 -1165
  121. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  122. package/telegram-plugin/foreman/setup-state.ts +0 -239
  123. package/telegram-plugin/foreman/state.ts +0 -203
  124. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  125. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  126. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  127. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  128. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  129. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  130. package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
  131. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  132. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  133. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  134. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  135. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  136. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  137. package/telegram-plugin/tests/setup-state.test.ts +0 -146
@@ -45,6 +45,22 @@
45
45
 
46
46
  export type PokeLevel = 'soft' | 'firm'
47
47
 
48
+ /** #1292: snapshot of an in-flight tool call, surfaced in the 300s
49
+ * framework-fallback message so the user sees the actual observable
50
+ * ("running Grep \"foo\" for 4m") instead of the dishonest generic
51
+ * "still working… no update in 5 min" when the agent is clearly busy
52
+ * grinding through tool calls. */
53
+ export interface ToolSnapshot {
54
+ /** Bare tool name as it came off the wire (e.g. "Grep", "Read", "Bash"). */
55
+ name: string
56
+ /** Natural-language descriptor from `toolLabel()` if available (e.g. the
57
+ * query for Grep, basename for Read/Edit/Write, hostname for WebFetch),
58
+ * or null when no useful label could be derived. */
59
+ label: string | null
60
+ /** Time since this tool call started, in ms. */
61
+ durationMs: number
62
+ }
63
+
48
64
  export interface SilencePokeState {
49
65
  /** Wall-clock ms of turn start. Silence clock zero-point when no outbound yet. */
50
66
  turnStartedAt: number
@@ -62,6 +78,16 @@ export interface SilencePokeState {
62
78
  fallbackFired: boolean
63
79
  /** Wall-clock ms of last poke fire — used for poke-success latency. */
64
80
  lastPokeFiredAt: number | null
81
+ /** #1292: in-flight tool calls keyed by toolUseId. Populated by
82
+ * `noteToolStart` on every parent-agent `tool_use` event the gateway
83
+ * sees and drained by `noteToolEnd` on the matching `tool_result`.
84
+ * Read only inside `tick()` when the 300s fallback fires — at that
85
+ * point we snapshot the entries (sorted by startedAt ascending) and
86
+ * include the longest-running one in the fallback message body.
87
+ * NOTE: presence of in-flight tools does NOT reset the silence
88
+ * clock — the design choice in this module's header is preserved.
89
+ * We only enrich the fallback TEXT, not the timing. */
90
+ inFlightTools: Map<string, { name: string; startedAt: number; label: string | null }>
65
91
  }
66
92
 
67
93
  export interface ThresholdsMs {
@@ -89,9 +115,19 @@ export interface FrameworkFallbackContext {
89
115
  chatId: string
90
116
  threadId: number | null
91
117
  /** Picked from lastThinkingAt: 'thinking' if a thinking event landed in
92
- * the last 30s of silence, else 'working'. */
118
+ * the last 30s of silence, else 'working'. Note: 'working' is the
119
+ * default base; when `inFlightTools` is non-empty the fallback text
120
+ * uses the tool-aware wording instead of either 'working' / 'thinking'
121
+ * (see `formatFrameworkFallbackText`). */
93
122
  fallbackKind: 'working' | 'thinking'
94
123
  silenceMs: number
124
+ /** #1292: snapshot of in-flight tool calls at the moment the fallback
125
+ * fires, sorted by startedAt ascending. Empty when no tools were
126
+ * in flight (e.g. agent genuinely silent, or all tools completed
127
+ * faster than the 300s threshold). The format helper uses entry [0]
128
+ * (longest-running) for the message body and "+ N more" when
129
+ * length > 1. */
130
+ inFlightTools: ToolSnapshot[]
95
131
  }
96
132
 
97
133
  export type SilencePokeMetric =
@@ -141,6 +177,7 @@ export function startTurn(key: string, now: number): void {
141
177
  lastThinkingAt: null,
142
178
  fallbackFired: false,
143
179
  lastPokeFiredAt: null,
180
+ inFlightTools: new Map(),
144
181
  })
145
182
  }
146
183
 
@@ -205,6 +242,72 @@ export function noteThinking(key: string, now: number): void {
205
242
  s.lastThinkingAt = now
206
243
  }
207
244
 
245
+ /**
246
+ * #1292: record the start of a tool call. Stored in `inFlightTools` keyed
247
+ * by `toolUseId` so a later `noteToolEnd` can drain the entry. Read only
248
+ * by `tick()` when the 300s fallback fires, where we snapshot the map
249
+ * into the fallback context so the user-visible message can name the
250
+ * actual observable (e.g. "running Grep \"foo\" for 4m") instead of the
251
+ * dishonest generic "still working… no update in 5 min".
252
+ *
253
+ * Idempotent: calling twice with the same toolUseId overwrites — useful
254
+ * when a late `noteToolLabel` arrives but the caller wants to reuse the
255
+ * start-side API. The `startedAt` is updated; for label-only refreshes
256
+ * use `noteToolLabel` instead so duration stays correct.
257
+ *
258
+ * No-op when the kill switch is on (state Map will be empty for this key).
259
+ */
260
+ export function noteToolStart(
261
+ key: string,
262
+ toolUseId: string,
263
+ name: string,
264
+ label: string | null,
265
+ now: number,
266
+ ): void {
267
+ const s = state.get(key)
268
+ if (s == null) return
269
+ s.inFlightTools.set(toolUseId, { name, startedAt: now, label })
270
+ }
271
+
272
+ /**
273
+ * #1292: record completion of a tool call. Removes the entry from
274
+ * `inFlightTools`. Idempotent — calling on an unknown toolUseId is a
275
+ * no-op. Sub-second tools that start and end inside one poll interval
276
+ * are still safe because the map is only read inside `tick()` at the
277
+ * 300s fallback boundary; the churn never gets observed.
278
+ */
279
+ export function noteToolEnd(
280
+ key: string,
281
+ toolUseId: string,
282
+ _now: number,
283
+ ): void {
284
+ const s = state.get(key)
285
+ if (s == null) return
286
+ s.inFlightTools.delete(toolUseId)
287
+ }
288
+
289
+ /**
290
+ * #1292: late label update for an in-flight tool. The tool-label sidecar
291
+ * (PreToolUse hook, polled every 250ms via `tool-label-sidecar.ts`) can
292
+ * publish a richer label some time after the `tool_use` event landed.
293
+ * When that arrives, refresh the entry in-place so the fallback message
294
+ * — if it fires later — picks up the better label.
295
+ *
296
+ * No-op when the toolUseId isn't tracked (e.g. tool already completed,
297
+ * or the start event was skipped because the tool is a Telegram surface).
298
+ */
299
+ export function noteToolLabel(
300
+ key: string,
301
+ toolUseId: string,
302
+ label: string,
303
+ ): void {
304
+ const s = state.get(key)
305
+ if (s == null) return
306
+ const entry = s.inFlightTools.get(toolUseId)
307
+ if (entry == null) return
308
+ entry.label = label
309
+ }
310
+
208
311
  /**
209
312
  * Drain any armed poke for ANY active turn and return the system-reminder
210
313
  * text to append. Returns null if nothing is armed.
@@ -273,14 +376,52 @@ export function formatPokeText(level: PokeLevel): string {
273
376
  export function formatFrameworkFallbackText(
274
377
  fallbackKind: 'working' | 'thinking',
275
378
  silenceMs: number,
379
+ inFlightTools: ToolSnapshot[] = [],
276
380
  ): string {
277
381
  const minutes = Math.max(1, Math.round(silenceMs / 60_000))
278
382
  const suffix = `(no update from agent in ${minutes} min)`
383
+ // #1292 case (a): tools in flight. Name the longest-running one
384
+ // (entry[0] — caller pre-sorts by startedAt ascending). Avoid the
385
+ // "still working" framing #1292 explicitly calls out as dishonest:
386
+ // the agent IS doing work, we can see the tool. Format:
387
+ // running Grep "foo" for 4m (no update from agent in 5 min)
388
+ // running Grep "foo" + 2 more (4m) (no update from agent in 5 min)
389
+ // running Grep (no label) for 4m (no update from agent in 5 min)
390
+ if (inFlightTools.length > 0) {
391
+ const longest = inFlightTools[0]!
392
+ const dur = formatDurationShort(longest.durationMs)
393
+ const labelTail = longest.label && longest.label.length > 0
394
+ ? ` ${truncateLabel(longest.label)}`
395
+ : ''
396
+ const more = inFlightTools.length > 1
397
+ ? ` + ${inFlightTools.length - 1} more`
398
+ : ''
399
+ return `running ${longest.name}${labelTail}${more} for ${dur} ${suffix}`
400
+ }
279
401
  return fallbackKind === 'thinking'
280
402
  ? `still thinking… ${suffix}`
281
403
  : `still working… ${suffix}`
282
404
  }
283
405
 
406
+ /** Compact m/s rendering for the fallback message. Anything under a
407
+ * minute reads as `${s}s`, otherwise `${m}m`. Always rounds toward the
408
+ * user-honest direction — "4m" for 4m 30s, "5m" for 4m 45s. */
409
+ function formatDurationShort(ms: number): string {
410
+ const totalSec = Math.max(0, Math.round(ms / 1000))
411
+ if (totalSec < 60) return `${totalSec}s`
412
+ const minutes = Math.round(totalSec / 60)
413
+ return `${minutes}m`
414
+ }
415
+
416
+ /** Telegram lines are short on mobile. Clip the label to keep the
417
+ * fallback message readable. Truncation point is generous (60 chars)
418
+ * because tool labels are pre-truncated by `toolLabel()` already. */
419
+ function truncateLabel(label: string): string {
420
+ const MAX = 60
421
+ if (label.length <= MAX) return label
422
+ return label.slice(0, MAX - 1) + '…'
423
+ }
424
+
284
425
  /**
285
426
  * Internal tick — iterates active states, arms pokes or fires fallback.
286
427
  * Exported as __tickForTests so suite can step the clock deterministically.
@@ -331,6 +472,16 @@ function tick(now: number): void {
331
472
  const recentThinking = s.lastThinkingAt != null
332
473
  && (now - s.lastThinkingAt) < 30_000
333
474
  const fallbackKind: 'working' | 'thinking' = recentThinking ? 'thinking' : 'working'
475
+ // #1292: snapshot in-flight tools at fire time, sorted by
476
+ // startedAt ascending so entry[0] is the longest-running.
477
+ // Pre-computed durations in ms; the formatter just renders.
478
+ const inFlightTools: ToolSnapshot[] = Array.from(s.inFlightTools.values())
479
+ .sort((a, b) => a.startedAt - b.startedAt)
480
+ .map(t => ({
481
+ name: t.name,
482
+ label: t.label,
483
+ durationMs: now - t.startedAt,
484
+ }))
334
485
  activeDeps.emitMetric({
335
486
  kind: 'silence_fallback_sent',
336
487
  key,
@@ -345,6 +496,7 @@ function tick(now: number): void {
345
496
  threadId,
346
497
  fallbackKind,
347
498
  silenceMs: silence,
499
+ inFlightTools,
348
500
  })
349
501
  if (r != null && typeof (r as Promise<void>).catch === 'function') {
350
502
  ;(r as Promise<void>).catch((err) => {