npm - switchroom - Versions diffs - 0.8.1 → 0.11.0 - Mend

switchroom 0.8.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

package/README.md +54 -61
package/bin/timezone-hook.sh +9 -7
package/dist/agent-scheduler/index.js +285 -45
package/dist/auth-broker/index.js +13932 -0
package/dist/cli/drive-write-pretool.mjs +5418 -0
package/dist/cli/switchroom.js +8890 -5560
package/dist/host-control/main.js +582 -43
package/dist/vault/approvals/kernel-server.js +276 -47
package/dist/vault/broker/server.js +333 -69
package/examples/minimal.yaml +63 -0
package/examples/personal-google-workspace-mcp/.env.example +34 -0
package/examples/personal-google-workspace-mcp/README.md +194 -0
package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
package/examples/switchroom.yaml +220 -0
package/package.json +6 -4
package/profiles/_base/start.sh.hbs +3 -3
package/profiles/_shared/agent-self-service.md.hbs +126 -0
package/profiles/default/CLAUDE.md +10 -0
package/profiles/default/CLAUDE.md.hbs +16 -0
package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
package/skills/buildkite-agent-runtime/SKILL.md +44 -11
package/skills/buildkite-api/SKILL.md +31 -8
package/skills/buildkite-cli/SKILL.md +27 -9
package/skills/buildkite-migration/SKILL.md +22 -9
package/skills/buildkite-pipelines/SKILL.md +26 -9
package/skills/buildkite-secure-delivery/SKILL.md +23 -9
package/skills/buildkite-test-engine/SKILL.md +25 -8
package/skills/docx/SKILL.md +1 -1
package/skills/file-bug/SKILL.md +34 -6
package/skills/humanizer/SKILL.md +15 -0
package/skills/humanizer-calibrate/SKILL.md +7 -1
package/skills/mcp-builder/SKILL.md +1 -1
package/skills/pdf/SKILL.md +1 -1
package/skills/pptx/SKILL.md +1 -1
package/skills/skill-creator/SKILL.md +21 -1
package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
package/skills/switchroom-cli/SKILL.md +63 -64
package/skills/switchroom-health/SKILL.md +23 -10
package/skills/switchroom-install/SKILL.md +3 -3
package/skills/switchroom-manage/SKILL.md +26 -19
package/skills/switchroom-runtime/SKILL.md +67 -15
package/skills/switchroom-status/SKILL.md +26 -1
package/skills/telegram-test-harness/SKILL.md +3 -0
package/skills/webapp-testing/SKILL.md +31 -1
package/skills/xlsx/SKILL.md +1 -1
package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
package/telegram-plugin/admin-commands/index.ts +9 -5
package/telegram-plugin/auth-snapshot-format.ts +612 -0
package/telegram-plugin/auto-fallback-fleet.ts +215 -0
package/telegram-plugin/auto-fallback.ts +28 -301
package/telegram-plugin/dist/gateway/gateway.js +17453 -15100
package/telegram-plugin/fleet-fallback-gate.ts +105 -0
package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
package/telegram-plugin/gateway/approval-callback.ts +31 -3
package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
package/telegram-plugin/gateway/auth-command.ts +905 -0
package/telegram-plugin/gateway/auth-line.ts +123 -0
package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
package/telegram-plugin/gateway/boot-card.ts +23 -37
package/telegram-plugin/gateway/boot-probes.ts +9 -12
package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
package/telegram-plugin/gateway/gateway.ts +1156 -938
package/telegram-plugin/gateway/hostd-dispatch.ts +244 -0
package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
package/telegram-plugin/gateway/ipc-server.ts +69 -0
package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
package/telegram-plugin/model-unavailable.ts +28 -12
package/telegram-plugin/permission-title.ts +56 -0
package/telegram-plugin/quota-check.ts +19 -41
package/telegram-plugin/scripts/build.mjs +0 -1
package/telegram-plugin/shared/bot-runtime.ts +5 -4
package/telegram-plugin/silence-poke.ts +153 -1
package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
package/telegram-plugin/tests/boot-probes.test.ts +27 -22
package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
package/telegram-plugin/tests/permission-title.test.ts +31 -0
package/telegram-plugin/tests/quota-check.test.ts +5 -35
package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
package/telegram-plugin/tests/silence-poke.test.ts +237 -0
package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
package/telegram-plugin/turn-flush-safety.ts +55 -1
package/telegram-plugin/uat/SETUP.md +35 -1
package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
package/telegram-plugin/uat/runners/report.ts +150 -0
package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
package/telegram-plugin/uat/runners/scorer.ts +106 -0
package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +7 -1
package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +7 -1
package/telegram-plugin/auth-dashboard.ts +0 -1104
package/telegram-plugin/auth-slot-parser.ts +0 -497
package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
package/telegram-plugin/dist/foreman/foreman.js +0 -31358
package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
package/telegram-plugin/foreman/foreman.ts +0 -1165
package/telegram-plugin/foreman/setup-flow.ts +0 -345
package/telegram-plugin/foreman/setup-state.ts +0 -239
package/telegram-plugin/foreman/state.ts +0 -203
package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
package/telegram-plugin/tests/foreman-state.test.ts +0 -164
package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
package/telegram-plugin/tests/setup-flow.test.ts +0 -510
package/telegram-plugin/tests/setup-state.test.ts +0 -146

package/telegram-plugin/silence-poke.ts CHANGED Viewed

@@ -45,6 +45,22 @@
 export type PokeLevel = 'soft' | 'firm'
+/** #1292: snapshot of an in-flight tool call, surfaced in the 300s
+ * framework-fallback message so the user sees the actual observable
+ * ("running Grep \"foo\" for 4m") instead of the dishonest generic
+ * "still working… no update in 5 min" when the agent is clearly busy
+ * grinding through tool calls. */
+export interface ToolSnapshot {
+  /** Bare tool name as it came off the wire (e.g. "Grep", "Read", "Bash"). */
+  name: string
+  /** Natural-language descriptor from `toolLabel()` if available (e.g. the
+   * query for Grep, basename for Read/Edit/Write, hostname for WebFetch),
+   * or null when no useful label could be derived. */
+  label: string | null
+  /** Time since this tool call started, in ms. */
+  durationMs: number
+}
 export interface SilencePokeState {
   /** Wall-clock ms of turn start. Silence clock zero-point when no outbound yet. */
   turnStartedAt: number
@@ -62,6 +78,16 @@ export interface SilencePokeState {
   fallbackFired: boolean
   /** Wall-clock ms of last poke fire — used for poke-success latency. */
   lastPokeFiredAt: number | null
+  /** #1292: in-flight tool calls keyed by toolUseId. Populated by
+   * `noteToolStart` on every parent-agent `tool_use` event the gateway
+   * sees and drained by `noteToolEnd` on the matching `tool_result`.
+   * Read only inside `tick()` when the 300s fallback fires — at that
+   * point we snapshot the entries (sorted by startedAt ascending) and
+   * include the longest-running one in the fallback message body.
+   * NOTE: presence of in-flight tools does NOT reset the silence
+   * clock — the design choice in this module's header is preserved.
+   * We only enrich the fallback TEXT, not the timing. */
+  inFlightTools: Map<string, { name: string; startedAt: number; label: string | null }>
 }
 export interface ThresholdsMs {
@@ -89,9 +115,19 @@ export interface FrameworkFallbackContext {
   chatId: string
   threadId: number | null
   /** Picked from lastThinkingAt: 'thinking' if a thinking event landed in
-   *  the last 30s of silence, else 'working'. */
+   *  the last 30s of silence, else 'working'. Note: 'working' is the
+   *  default base; when `inFlightTools` is non-empty the fallback text
+   *  uses the tool-aware wording instead of either 'working' / 'thinking'
+   *  (see `formatFrameworkFallbackText`). */
   fallbackKind: 'working' | 'thinking'
   silenceMs: number
+  /** #1292: snapshot of in-flight tool calls at the moment the fallback
+   *  fires, sorted by startedAt ascending. Empty when no tools were
+   *  in flight (e.g. agent genuinely silent, or all tools completed
+   *  faster than the 300s threshold). The format helper uses entry [0]
+   *  (longest-running) for the message body and "+ N more" when
+   *  length > 1. */
+  inFlightTools: ToolSnapshot[]
 }
 export type SilencePokeMetric =
@@ -141,6 +177,7 @@ export function startTurn(key: string, now: number): void {
     lastThinkingAt: null,
     fallbackFired: false,
     lastPokeFiredAt: null,
+    inFlightTools: new Map(),
   })
 }
@@ -205,6 +242,72 @@ export function noteThinking(key: string, now: number): void {
   s.lastThinkingAt = now
 }
+/**
+ * #1292: record the start of a tool call. Stored in `inFlightTools` keyed
+ * by `toolUseId` so a later `noteToolEnd` can drain the entry. Read only
+ * by `tick()` when the 300s fallback fires, where we snapshot the map
+ * into the fallback context so the user-visible message can name the
+ * actual observable (e.g. "running Grep \"foo\" for 4m") instead of the
+ * dishonest generic "still working… no update in 5 min".
+ *
+ * Idempotent: calling twice with the same toolUseId overwrites — useful
+ * when a late `noteToolLabel` arrives but the caller wants to reuse the
+ * start-side API. The `startedAt` is updated; for label-only refreshes
+ * use `noteToolLabel` instead so duration stays correct.
+ *
+ * No-op when the kill switch is on (state Map will be empty for this key).
+ */
+export function noteToolStart(
+  key: string,
+  toolUseId: string,
+  name: string,
+  label: string | null,
+  now: number,
+): void {
+  const s = state.get(key)
+  if (s == null) return
+  s.inFlightTools.set(toolUseId, { name, startedAt: now, label })
+}
+/**
+ * #1292: record completion of a tool call. Removes the entry from
+ * `inFlightTools`. Idempotent — calling on an unknown toolUseId is a
+ * no-op. Sub-second tools that start and end inside one poll interval
+ * are still safe because the map is only read inside `tick()` at the
+ * 300s fallback boundary; the churn never gets observed.
+ */
+export function noteToolEnd(
+  key: string,
+  toolUseId: string,
+  _now: number,
+): void {
+  const s = state.get(key)
+  if (s == null) return
+  s.inFlightTools.delete(toolUseId)
+}
+/**
+ * #1292: late label update for an in-flight tool. The tool-label sidecar
+ * (PreToolUse hook, polled every 250ms via `tool-label-sidecar.ts`) can
+ * publish a richer label some time after the `tool_use` event landed.
+ * When that arrives, refresh the entry in-place so the fallback message
+ * — if it fires later — picks up the better label.
+ *
+ * No-op when the toolUseId isn't tracked (e.g. tool already completed,
+ * or the start event was skipped because the tool is a Telegram surface).
+ */
+export function noteToolLabel(
+  key: string,
+  toolUseId: string,
+  label: string,
+): void {
+  const s = state.get(key)
+  if (s == null) return
+  const entry = s.inFlightTools.get(toolUseId)
+  if (entry == null) return
+  entry.label = label
+}
 /**
  * Drain any armed poke for ANY active turn and return the system-reminder
  * text to append. Returns null if nothing is armed.
@@ -273,14 +376,52 @@ export function formatPokeText(level: PokeLevel): string {
 export function formatFrameworkFallbackText(
   fallbackKind: 'working' | 'thinking',
   silenceMs: number,
+  inFlightTools: ToolSnapshot[] = [],
 ): string {
   const minutes = Math.max(1, Math.round(silenceMs / 60_000))
   const suffix = `(no update from agent in ${minutes} min)`
+  // #1292 case (a): tools in flight. Name the longest-running one
+  // (entry[0] — caller pre-sorts by startedAt ascending). Avoid the
+  // "still working" framing #1292 explicitly calls out as dishonest:
+  // the agent IS doing work, we can see the tool. Format:
+  //   running Grep "foo" for 4m (no update from agent in 5 min)
+  //   running Grep "foo" + 2 more (4m) (no update from agent in 5 min)
+  //   running Grep (no label) for 4m (no update from agent in 5 min)
+  if (inFlightTools.length > 0) {
+    const longest = inFlightTools[0]!
+    const dur = formatDurationShort(longest.durationMs)
+    const labelTail = longest.label && longest.label.length > 0
+      ? ` ${truncateLabel(longest.label)}`
+      : ''
+    const more = inFlightTools.length > 1
+      ? ` + ${inFlightTools.length - 1} more`
+      : ''
+    return `running ${longest.name}${labelTail}${more} for ${dur} ${suffix}`
+  }
   return fallbackKind === 'thinking'
     ? `still thinking… ${suffix}`
     : `still working… ${suffix}`
 }
+/** Compact m/s rendering for the fallback message. Anything under a
+ *  minute reads as `${s}s`, otherwise `${m}m`. Always rounds toward the
+ *  user-honest direction — "4m" for 4m 30s, "5m" for 4m 45s. */
+function formatDurationShort(ms: number): string {
+  const totalSec = Math.max(0, Math.round(ms / 1000))
+  if (totalSec < 60) return `${totalSec}s`
+  const minutes = Math.round(totalSec / 60)
+  return `${minutes}m`
+}
+/** Telegram lines are short on mobile. Clip the label to keep the
+ *  fallback message readable. Truncation point is generous (60 chars)
+ *  because tool labels are pre-truncated by `toolLabel()` already. */
+function truncateLabel(label: string): string {
+  const MAX = 60
+  if (label.length <= MAX) return label
+  return label.slice(0, MAX - 1) + '…'
+}
 /**
  * Internal tick — iterates active states, arms pokes or fires fallback.
  * Exported as __tickForTests so suite can step the clock deterministically.
@@ -331,6 +472,16 @@ function tick(now: number): void {
       const recentThinking = s.lastThinkingAt != null
         && (now - s.lastThinkingAt) < 30_000
       const fallbackKind: 'working' | 'thinking' = recentThinking ? 'thinking' : 'working'
+      // #1292: snapshot in-flight tools at fire time, sorted by
+      // startedAt ascending so entry[0] is the longest-running.
+      // Pre-computed durations in ms; the formatter just renders.
+      const inFlightTools: ToolSnapshot[] = Array.from(s.inFlightTools.values())
+        .sort((a, b) => a.startedAt - b.startedAt)
+        .map(t => ({
+          name: t.name,
+          label: t.label,
+          durationMs: now - t.startedAt,
+        }))
       activeDeps.emitMetric({
         kind: 'silence_fallback_sent',
         key,
@@ -345,6 +496,7 @@ function tick(now: number): void {
           threadId,
           fallbackKind,
           silenceMs: silence,
+          inFlightTools,
         })
         if (r != null && typeof (r as Promise<void>).catch === 'function') {
           ;(r as Promise<void>).catch((err) => {