npm - switchroom - Versions diffs - 0.15.45 → 0.16.4 - Mend

switchroom 0.15.45 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

package/dist/agent-scheduler/index.js +122 -88
package/dist/auth-broker/index.js +463 -177
package/dist/cli/autoaccept-poll.js +4842 -35
package/dist/cli/drive-write-pretool.mjs +17 -14
package/dist/cli/notion-write-pretool.mjs +117 -86
package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
package/dist/cli/self-improve-stop.mjs +428 -0
package/dist/cli/skill-validate-pretool.mjs +72 -72
package/dist/cli/switchroom.js +3158 -1178
package/dist/host-control/main.js +2833 -355
package/dist/vault/approvals/kernel-server.js +7479 -7439
package/dist/vault/broker/server.js +11312 -11272
package/examples/minimal.yaml +1 -0
package/examples/switchroom.yaml +1 -0
package/package.json +3 -3
package/profiles/_base/start.sh.hbs +88 -1
package/profiles/_shared/execution-discipline.md.hbs +18 -0
package/profiles/default/CLAUDE.md.hbs +0 -19
package/telegram-plugin/.claude-plugin/plugin.json +2 -2
package/telegram-plugin/answer-stream-flag.ts +12 -49
package/telegram-plugin/answer-stream.ts +5 -150
package/telegram-plugin/auth-snapshot-format.ts +280 -48
package/telegram-plugin/auto-fallback-fleet.ts +44 -1
package/telegram-plugin/context-exhaustion.ts +12 -0
package/telegram-plugin/demo-mask.ts +154 -0
package/telegram-plugin/dist/bridge/bridge.js +167 -124
package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
package/telegram-plugin/dist/server.js +215 -172
package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
package/telegram-plugin/draft-stream.ts +47 -410
package/telegram-plugin/final-answer-detect.ts +17 -12
package/telegram-plugin/fleet-fallback-resume.ts +131 -0
package/telegram-plugin/format.ts +56 -19
package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
package/telegram-plugin/gateway/auth-command.ts +70 -14
package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
package/telegram-plugin/gateway/current-turn-map.ts +188 -0
package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
package/telegram-plugin/gateway/effort-command.ts +8 -3
package/telegram-plugin/gateway/emission-authority.ts +369 -0
package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
package/telegram-plugin/gateway/gateway.ts +1837 -291
package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
package/telegram-plugin/gateway/represent-guard.ts +72 -0
package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
package/telegram-plugin/gateway/status-surface-log.ts +14 -3
package/telegram-plugin/history.ts +33 -11
package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
package/telegram-plugin/issues-card.ts +4 -0
package/telegram-plugin/model-unavailable.ts +124 -0
package/telegram-plugin/narrative-dedup.ts +69 -0
package/telegram-plugin/over-ping-safety-net.ts +70 -4
package/telegram-plugin/package.json +3 -3
package/telegram-plugin/pending-work-progress.ts +12 -0
package/telegram-plugin/permission-rule.ts +32 -5
package/telegram-plugin/permission-title.ts +152 -9
package/telegram-plugin/quota-check.ts +13 -0
package/telegram-plugin/quota-watch.ts +135 -7
package/telegram-plugin/registry/turns-schema.test.ts +24 -0
package/telegram-plugin/registry/turns-schema.ts +9 -0
package/telegram-plugin/runtime-metrics.ts +13 -0
package/telegram-plugin/session-tail.ts +96 -11
package/telegram-plugin/silence-poke.ts +170 -24
package/telegram-plugin/slot-banner-driver.ts +3 -0
package/telegram-plugin/status-no-truncate.ts +44 -0
package/telegram-plugin/status-reactions.ts +20 -3
package/telegram-plugin/stream-controller.ts +4 -23
package/telegram-plugin/stream-reply-handler.ts +6 -24
package/telegram-plugin/streaming-metrics.ts +91 -0
package/telegram-plugin/subagent-watcher.ts +212 -66
package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
package/telegram-plugin/tests/answer-stream.test.ts +2 -411
package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
package/telegram-plugin/tests/demo-mask.test.ts +127 -0
package/telegram-plugin/tests/draft-stream.test.ts +0 -827
package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
package/telegram-plugin/tests/feed-survival.test.ts +526 -0
package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
package/telegram-plugin/tests/history.test.ts +60 -0
package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
package/telegram-plugin/tests/permission-rule.test.ts +17 -0
package/telegram-plugin/tests/permission-title.test.ts +206 -17
package/telegram-plugin/tests/quota-watch.test.ts +252 -9
package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
package/telegram-plugin/tests/represent-guard.test.ts +162 -0
package/telegram-plugin/tests/session-tail.test.ts +147 -3
package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
package/telegram-plugin/tests/telegram-format.test.ts +101 -6
package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
package/telegram-plugin/tests/tool-labels.test.ts +67 -0
package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
package/telegram-plugin/tests/welcome-text.test.ts +32 -3
package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
package/telegram-plugin/tool-activity-summary.ts +375 -58
package/telegram-plugin/turn-liveness-floor.ts +240 -0
package/telegram-plugin/uat/assertions.ts +115 -0
package/telegram-plugin/uat/driver.ts +68 -0
package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
package/telegram-plugin/welcome-text.ts +13 -1
package/telegram-plugin/worker-activity-feed.ts +157 -82
package/telegram-plugin/draft-transport.ts +0 -122
package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
package/telegram-plugin/tests/draft-transport.test.ts +0 -211

package/telegram-plugin/gateway/inject-handler.test.ts CHANGED Viewed

@@ -152,12 +152,13 @@ describe('handleInjectCommand — outcome=ok_no_output', () => {
     expect(replies[0].text).toContain('empty capture')
   })
-  it('bare ack with accent=done when expectsOutput=false and no silentNote (/clear)', async () => {
+  it('uses silentNote for /clear (context cleared — fresh slate)', async () => {
     const inject = vi.fn().mockResolvedValue(noOutputResult('/clear'))
     const { deps, replies } = makeDeps({ getArgs: () => '/clear', inject })
     await handleInjectCommand(fakeCtx(), deps)
     expect(replies[0].opts?.accent).toBe('done')
     expect(replies[0].text).toContain('<code>/clear</code>')
+    expect(replies[0].text).toContain('context cleared')
     expect(replies[0].text).not.toContain('empty capture')
     expect(replies[0].text).not.toContain('<pre>')
   })

package/telegram-plugin/gateway/ms365-write-approval.test.ts CHANGED Viewed

@@ -26,7 +26,7 @@ describe("validateMs365Preview", () => {
     toolName: "mcp__ms-365__upload-file-content",
     itemId: "01ABCDEFG",
     itemDisplayName: "Q3-Strategy.docx",
-    accountEmail: "ken@outlook.com",
+    accountEmail: "bob@example.com",
   };
   it("accepts a minimal valid preview", () => {
@@ -93,7 +93,7 @@ describe("buildMs365CardText", () => {
     toolName: "mcp__ms-365__upload-file-content",
     itemId: "01ABCDEFG",
     itemDisplayName: "Q3-Strategy.docx",
-    accountEmail: "ken@outlook.com",
+    accountEmail: "bob@example.com",
   };
   it("includes agent, tool, item, account", () => {
@@ -102,7 +102,7 @@ describe("buildMs365CardText", () => {
     expect(text).toContain("ms-365__upload-file-content");
     expect(text).toContain("Q3-Strategy.docx");
     expect(text).toContain("01ABCDEFG");
-    expect(text).toContain("ken@outlook.com");
+    expect(text).toContain("bob@example.com");
   });
   it("omits ID line for new files", () => {
@@ -183,7 +183,7 @@ function makeMsg(overrides: Partial<RequestMs365ApprovalMessage> = {}): RequestM
       toolName: "mcp__ms-365__upload-file-content",
       itemId: "01ABC",
       itemDisplayName: "Strategy.docx",
-      accountEmail: "ken@outlook.com",
+      accountEmail: "bob@example.com",
     },
     ttlMs: 5 * 60 * 1000,
     ...overrides,

package/telegram-plugin/gateway/represent-guard.ts ADDED Viewed

@@ -0,0 +1,72 @@
+/**
+ * represent-guard.ts — the duplicate-represent guard for the obligation sweep,
+ * extracted from obligationSweep so the "satisfied-but-misdetected obligation
+ * must NOT re-fire" decision (#2472) is EXECUTABLE in a pure unit test.
+ *
+ * The bug (#2472): obligation_represent re-fired for the same origin_turn_id even
+ * after the agent had already answered represent_count=1 with a reply tool call,
+ * producing a second near-identical message. The reply landed but its routing did
+ * not resolve back to the origin, so the ledger's normal close path missed it —
+ * and the represent branch (unlike the escalate branch) had no belt-and-braces
+ * outbound-history check before re-firing.
+ *
+ * This helper is the decision the sweep's represent branch now consults. PURE —
+ * no Telegram, no SQLite; the gateway injects `hasOutboundDeliveredSince` as a
+ * predicate. The single load-bearing subtlety lives here in one testable place:
+ *
+ *   The cutoff is `lastRepresentedAt` (the time of the PREVIOUS represent), NOT
+ *   `openedAt`. On the FIRST represent (`lastRepresentedAt` undefined) the guard
+ *   is a no-op, so the genuine "agent wrote a plain-text answer and never called
+ *   the reply tool" case still re-presents ONCE. Only the SECOND-and-later
+ *   represent is gated — exactly where a reply that landed BETWEEN fires must
+ *   suppress the re-ask. A reply that predates the last represent (e.g. the
+ *   original plain-text answer) does not count, because it is not evidence the
+ *   most recent represent was answered.
+ */
+/** The obligation fields the represent guard inspects. */
+export interface RepresentGuardObligation {
+  readonly originTurnId: string
+  readonly chatId: string
+  readonly threadId?: number
+  /** Wall-clock ms this obligation was most recently re-presented, if ever. */
+  readonly lastRepresentedAt?: number
+}
+export interface RepresentGuardDeps {
+  /** True when history is available to query (else the guard never suppresses). */
+  historyEnabled: boolean
+  /**
+   * Has a genuine assistant reply been delivered to this chat (optionally scoped
+   * to thread) at or after `sinceMs`? Wraps history.hasOutboundDeliveredSince.
+   *
+   * For the represent guard the gateway binds this with a LOW minChars (#2474
+   * follow-up): ANY real reply to the turn — even a terse "Yes — done." — means
+   * the user was answered and the duplicate represent must be suppressed. The
+   * 200-char "substantive" proxy is the ESCALATE branch's concern, not this one;
+   * applying it here left short-but-real replies failing to suppress the duplicate
+   * (the #2472 gap). The underlying query only counts recordOutbound rows, so
+   * typing indicators / progress-card edits are never miscounted as a reply.
+   */
+  hasOutboundDeliveredSince: (chatId: string, sinceMs: number, threadId?: number) => boolean
+}
+/**
+ * Decide whether a represent for `o` should be SUPPRESSED because the agent has
+ * already delivered a reply since the obligation was last re-presented.
+ *
+ * Returns true ⇒ the obligation is satisfied-but-misdetected; the caller closes
+ * it silently and does NOT re-fire. Returns false ⇒ proceed with the represent
+ * (first represent always proceeds; a represent with no reply since the last one
+ * proceeds; an unavailable history proceeds — never suppress on doubt).
+ */
+export function shouldSuppressRepresent(
+  o: RepresentGuardObligation,
+  deps: RepresentGuardDeps,
+): boolean {
+  if (!deps.historyEnabled) return false
+  // First represent: nothing to compare against — let the single re-ask fire so
+  // the genuine plain-text-no-reply case is preserved.
+  if (o.lastRepresentedAt == null) return false
+  return deps.hasOutboundDeliveredSince(o.chatId, o.lastRepresentedAt, o.threadId)
+}

package/telegram-plugin/gateway/status-surface-log.test.ts CHANGED Viewed

@@ -12,6 +12,7 @@ function turn(overrides: Partial<StatusSurfaceTurnView> = {}): StatusSurfaceTurn
     sessionThreadId: undefined,
     startedAt: 1_780_000_000_000,
     toolCallCount: 0,
+    labeledToolCount: 0,
     activityMessageId: null,
     activityEverOpened: false,
     activityDrainFailures: 0,
@@ -35,7 +36,7 @@ describe('formatTurnLifecycle', () => {
     const line = formatTurnLifecycle(
       'clear',
       'turn_end',
-      turn({ sessionThreadId: 3, toolCallCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
+      turn({ sessionThreadId: 3, toolCallCount: 5, labeledToolCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
       1_780_000_300_000, // +300s
     )
     expect(line).toContain('turn-lifecycle clear reason=turn_end')
@@ -63,7 +64,7 @@ describe('formatTurnLifecycle', () => {
 describe('detectStatusSurfaceDegraded', () => {
   it('flags a turn that did tool work but never opened the feed due to send failures (the resume-400 signature)', () => {
     const d = detectStatusSurfaceDegraded(
-      turn({ toolCallCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
+      turn({ toolCallCount: 3, labeledToolCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
     )
     expect(d).not.toBeNull()
     expect(d!.reason).toBe('feed-never-opened')
@@ -75,7 +76,7 @@ describe('detectStatusSurfaceDegraded', () => {
     // the sticky activityEverOpened keeps this from false-positiving.
     expect(
       detectStatusSurfaceDegraded(
-        turn({ toolCallCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
+        turn({ toolCallCount: 4, labeledToolCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
       ),
     ).toBeNull()
   })
@@ -83,7 +84,7 @@ describe('detectStatusSurfaceDegraded', () => {
   it('does NOT flag a turn that never attempted a feed send (e.g. ack-first suppression)', () => {
     expect(
       detectStatusSurfaceDegraded(
-        turn({ toolCallCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
+        turn({ toolCallCount: 2, labeledToolCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
       ),
     ).toBeNull()
   })

package/telegram-plugin/gateway/status-surface-log.ts CHANGED Viewed

@@ -30,6 +30,17 @@ export interface StatusSurfaceTurnView {
   sessionThreadId: number | undefined
   startedAt: number
   toolCallCount: number
+  /**
+   * Count of tool_label events that passed the surface-tool guard this turn —
+   * i.e. the number of surfaced (non-surface, non-suppressed) tool steps. This
+   * is the deterministic single source of truth for the `tools=` lifecycle
+   * field and the `✓ N steps` activity-feed total. Incremented in
+   * `case 'tool_label':` AFTER the `isTelegramSurfaceTool` guard so that
+   * reply/stream_reply/edit_message/react are never counted. send_typing and
+   * sync_retain are suppressed at the hook level (computeLabel returns null)
+   * and never arrive as tool_label events, so they are excluded automatically.
+   */
+  labeledToolCount: number
   /** Live activity-feed message id; null until the first send captures it. */
   activityMessageId: number | null
   /**
@@ -67,7 +78,7 @@ export function formatTurnLifecycle(
   return (
     `turn-lifecycle ${action} reason=${reason} turnId=${t.turnId} ` +
     `chat=${t.sessionChatId} thread=${t.sessionThreadId ?? '-'} ` +
-    `tools=${t.toolCallCount} activityMsgId=${t.activityMessageId ?? 'none'} ` +
+    `tools=${t.labeledToolCount} activityMsgId=${t.activityMessageId ?? 'none'} ` +
     `feedOpened=${t.activityEverOpened} drainFailures=${t.activityDrainFailures} ` +
     `replyCalled=${t.replyCalled} finalAnswer=${t.finalAnswerDelivered} age_ms=${ageMs}`
   )
@@ -89,13 +100,13 @@ export function formatTurnLifecycle(
 export function detectStatusSurfaceDegraded(
   t: StatusSurfaceTurnView,
 ): { reason: string; detail: string } | null {
-  if (t.toolCallCount === 0) return null
+  if (t.labeledToolCount === 0) return null
   if (t.activityEverOpened) return null
   if (t.activityDrainFailures === 0) return null
   return {
     reason: 'feed-never-opened',
     detail:
-      `tools=${t.toolCallCount} drainFailures=${t.activityDrainFailures} ` +
+      `tools=${t.labeledToolCount} drainFailures=${t.activityDrainFailures} ` +
       `activityMsgId=none — the live activity feed failed every send this turn ` +
       `(card was dark despite tool work)`,
   }

package/telegram-plugin/history.ts CHANGED Viewed

@@ -557,11 +557,26 @@ export function getRecentOutboundCount(
  * SUBSTANTIVE: we never suppress escalation on a bare ack ("on it", "give me a
  * sec") — an agent that acks then ghosts must still escalate. The history schema
  * does not store a done/substantive flag, so we approximate: a row counts only
- * when LENGTH(text) >= 200 (the FINAL_ANSWER_MIN_CHARS constant from
- * final-answer-detect.ts). This is false-negative-safe: a genuine substantive
- * answer that happens to be < 200 chars will still fire an escalation, which is
- * the conservative (safe) outcome. A schema column would be more precise but is
- * disproportionate for this predicate; the reviewer accepted this approach.
+ * when LENGTH(text) >= `minChars` (default 200, the FINAL_ANSWER_MIN_CHARS
+ * constant from final-answer-detect.ts). This is false-negative-safe for the
+ * escalate branch: a genuine substantive answer that happens to be < 200 chars
+ * will still fire an escalation, which is the conservative (safe) outcome. A
+ * schema column would be more precise but is disproportionate for this predicate;
+ * the reviewer accepted this approach.
+ *
+ * `minChars` semantics (decoupled per caller, #2474 follow-up):
+ *   - The ESCALATE branch (Fix 4) keeps the 200 default: it must not stand down an
+ *     escalation on a mere ack, so it still demands a substantive-LENGTH outbound.
+ *   - The duplicate-represent GUARD (#2472) passes a LOW value (1): for that path
+ *     ANY genuine assistant reply to the turn — even a terse "Yes — done." or
+ *     "Merged, all three landed." — means the user was answered, so the duplicate
+ *     represent must be suppressed. The 200-char proxy was borrowed from the
+ *     escalate branch and is WRONG there: a short-but-real reply left the
+ *     duplicate-represent bug (#2472) alive. This is safe because the rows this
+ *     query counts (role='assistant') are ONLY ever written by recordOutbound —
+ *     i.e. real bot→user messages (reply / stream_reply / silent-anchor content /
+ *     command acks). Typing indicators and progress-card edits NEVER call
+ *     recordOutbound, so they cannot be miscounted as "the user was answered".
  *
  * `threadId` semantics:
  *   - undefined → any message in the chat regardless of thread (DMs + supergroups)
@@ -575,16 +590,23 @@ export function hasOutboundDeliveredSince(
   chatId: string,
   sinceMs: number,
   threadId?: number | null,
+  minChars = 200,
 ): boolean {
   try {
     const cutoffSec = Math.floor(sinceMs / 1000)
-    const params: unknown[] = [chatId, cutoffSec]
-    // LENGTH(text) >= 200 scopes to substantive replies only — never suppress
-    // escalation on a mere ack. Mirrors FINAL_ANSWER_MIN_CHARS (200) from
-    // final-answer-detect.ts; the `done` flag is not stored in the history
-    // schema, so length is the closest available proxy.
+    // Clamp to >= 1 so the predicate never counts an empty/whitespace-only row
+    // (a degenerate outbound) as a delivered reply, even if a caller passes 0.
+    const minLen = Math.max(1, Math.floor(minChars))
+    const params: unknown[] = [chatId, cutoffSec, minLen]
+    // LENGTH(text) >= minChars scopes to replies of at least the caller's
+    // threshold. ESCALATE passes the default 200 (substantive-only — never stand
+    // down on a mere ack). The duplicate-represent GUARD passes a low value so a
+    // terse-but-real reply counts (#2472/#2474). The `done` flag is not stored in
+    // the history schema, so length is the closest available proxy; rows here are
+    // only ever recordOutbound writes (real bot→user sends), so progress-card
+    // edits / typing indicators are structurally excluded.
     let sql =
-      "SELECT 1 FROM messages WHERE chat_id = ? AND role = 'assistant' AND ts >= ? AND LENGTH(text) >= 200"
+      "SELECT 1 FROM messages WHERE chat_id = ? AND role = 'assistant' AND ts >= ? AND LENGTH(text) >= ?"
     if (threadId !== undefined) {
       if (threadId === null) {
         sql += ' AND thread_id IS NULL'

package/telegram-plugin/hooks/repo-context-pretool.mjs CHANGED Viewed

@@ -297,6 +297,32 @@ async function main() {
   const markerPath = findNearestMarker(targetDir)
   if (markerPath == null) process.exit(0)
+  // Own-agent marker guard: suppress the agent's own CLAUDE.md / AGENTS.md /
+  // AGENT.md so it is never injected as additionalContext. The agent's own
+  // marker is already in the system prompt (baked by start.sh via
+  // --append-system-prompt); re-injecting it wastes ~30KB per session.
+  //
+  // The existing isUnderAgentWorkspace guard only blocks paths under the
+  // agent's workspace/ subdirectory. It misses the agent's start cwd
+  // (/home/.../.switchroom/agents/<name>) because that guard computes against
+  // workspace/, not agentDir itself. This marker-path check closes that gap.
+  //
+  // We do NOT add a "targetDir under startCwd" directory guard because that
+  // would wrongly suppress a legitimate worktree repo the operator has checked
+  // out inside the agent dir (e.g. agentDir/workspace/ repos) — the directory
+  // guard would catch those too. The marker-path equality check is surgical:
+  // only the exact CLAUDE.md / AGENTS.md / AGENT.md at agentDir root is blocked;
+  // any nested repo's marker injects normally.
+  if (agentName) {
+    const startCwd = normalize(
+      process.env.SWITCHROOM_AGENT_START_CWD ??
+        join(home, '.switchroom', 'agents', agentName),
+    )
+    for (const m of MARKER_FILES) {
+      if (markerPath === join(startCwd, m)) process.exit(0)
+    }
+  }
   const state = readSessionState(sessionId)
   // Already-loaded dedup — the load-once-per-repo-per-session invariant.

package/telegram-plugin/hooks/subagent-tracker-posttool.mjs CHANGED Viewed

@@ -313,6 +313,11 @@ function updateRow(dbPath, { id, status, resultSummary, now, asyncLaunch }, done
     setImmediate(() => {
       try {
         const db = new SnapDatabaseSync(snapDbPath)
+        // Concurrency: per-connection busy_timeout so this hook's writes
+        // wait-and-retry instead of failing with SQLITE_BUSY under concurrent
+        // sub-agent dispatch. Set on the real open so BOTH the node:sqlite
+        // (production) and bun:sqlite branches are armed (#2535 review).
+        try { db.exec('PRAGMA busy_timeout = 5000') } catch { /* best-effort */ }
         const row = db.prepare(SELECT_SQL).get(snapId)
         const isBackground = row != null && row.background === 1
         if (isBackground) {

package/telegram-plugin/hooks/subagent-tracker-pretool.mjs CHANGED Viewed

@@ -184,6 +184,14 @@ function writeRow(dbPath, { id, parentSessionId, parentTurnKey, agentType, descr
     setImmediate(() => {
       try {
         const db = new SnapDatabaseSync(snapDbPath)
+        // Concurrency: this hook writes registry.db from a separate process
+        // that contends with the gateway's subagent-watcher + the PostToolUse
+        // hook. Without a busy_timeout, the contending write fails IMMEDIATELY
+        // with SQLITE_BUSY ("database is locked") when several sub-agents
+        // dispatch at once, dropping the row → NULL jsonl_agent_id/parent_turn_key.
+        // Per-connection PRAGMA, set on the real open so BOTH the node:sqlite
+        // (production) and bun:sqlite branches are armed.
+        try { db.exec('PRAGMA busy_timeout = 5000') } catch { /* best-effort */ }
         db.exec(snapSchemaSql)
         // Migrate older DBs that pre-date jsonl_agent_id.
         const hasJsonlCol = db.prepare(snapMigrateSql).get()

package/telegram-plugin/hooks/tool-label-pretool.mjs CHANGED Viewed

@@ -149,30 +149,42 @@ export function computeLabel(toolName, input) {
       // the progress card path that used to surface this was retired
       // when `progressDriver` was nulled out in #1122 PR3.
       const slug = clip(asText(i.skill), 64)
+      // Empty-slug Skill stays suppressed (degenerate/malformed call): the
+      // liveness feed-open backstops visibility for a tool-less turn, so this
+      // does not need a label. Keeps the #2111 sidecar contract.
       return slug ? `Running skill ${slug}` : null
     }
   }
   // MCP tools.
   if (typeof toolName === 'string' && toolName.startsWith('mcp__')) {
-    // Explicit labels / suppressions for the built-in servers.
+    // Telegram-plugin tools: matched by the key-agnostic regex so renames/forks work.
+    // Strip the `mcp__<server>__` prefix to get just the tool suffix.
+    const TELEGRAM_PREFIX_RE = /^mcp__[^_].*?telegram__/
+    const telegramMatch = TELEGRAM_PREFIX_RE.exec(toolName)
+    if (telegramMatch) {
+      const suffix = toolName.slice(telegramMatch[0].length)
+      // Surface tools (reply, stream_reply, edit_message, react) are the
+      // conversation itself — suppress them from the activity feed entirely.
+      // Mirrors isTelegramSurfaceTool in tool-names.ts.
+      if (
+        suffix === 'reply' ||
+        suffix === 'stream_reply' ||
+        suffix === 'edit_message' ||
+        suffix === 'react'
+      ) return null
+      if (suffix === 'get_recent_messages') return 'Reading chat history'
+      // send_typing and all other surface/control tools: suppress.
+      return null
+    }
+    // Explicit labels / suppressions for the hindsight server.
     switch (toolName) {
-      case 'mcp__switchroom-telegram__reply':
-      case 'mcp__switchroom-telegram__stream_reply':
-        return 'Replying'
-      case 'mcp__switchroom-telegram__react': {
-        const emoji = clip(asText(i.emoji), 8)
-        return emoji ? `Reacting ${emoji}` : 'Reacting'
-      }
-      case 'mcp__switchroom-telegram__get_recent_messages':
-        return 'Reading chat history'
       case 'mcp__hindsight__recall':
       case 'mcp__hindsight__reflect':
         return 'Searching memory'
       case 'mcp__hindsight__retain':
         return 'Saving memory'
       // Explicit suppressions — return null so we don't emit a sidecar line.
-      case 'mcp__switchroom-telegram__send_typing':
       case 'mcp__hindsight__sync_retain':
         return null
     }
@@ -182,13 +194,17 @@ export function computeLabel(toolName, input) {
     // entirely by MCP tools read as pure silence (only a typing dot + the
     // 👀 reaction) — the "I can't see what it's doing" report. Mirror the
     // gateway's describeToolUse: friendly per-server labels, else a
-    // model-authored field, else a humanized tool name. NEVER label
-    // switchroom-telegram surface/control tools (they ARE the conversation).
+    // model-authored field, else a humanized tool name. NEVER label any
+    // Telegram surface/control tools (they ARE the conversation). Use the
+    // same regex predicate as isTelegramSurfaceTool in tool-names.ts so
+    // this works regardless of the plugin's registration key (clerk-telegram,
+    // switchroom-telegram, custom fork, …).
+    const TELEGRAM_SURFACE_RE = /^mcp__[^_].*?telegram__/
+    if (TELEGRAM_SURFACE_RE.test(toolName)) return null
     const m = /^mcp__(.+?)__(.+)$/.exec(toolName)
     if (!m) return null
     const server = m[1].toLowerCase()
     const tool = m[2].toLowerCase()
-    if (server === 'switchroom-telegram') return null
     if (server === 'hindsight') return 'Working with memory'
     if (server === 'google-workspace' || server === 'claude_ai_google_calendar')
       return 'Checking your calendar'
@@ -213,7 +229,15 @@ export function computeLabel(toolName, input) {
     return `Using ${tool.replace(/[-_]+/g, ' ')}`
   }
-  return null
+  // Never-null fallthrough: any unrecognized BUILT-IN tool (no mcp__ prefix,
+  // not matched above) gets a generic label rather than dropping its sidecar
+  // line. A null here was the dark-turn mechanism — if such a tool was a
+  // turn's first/only tool, no tool_label event fired, the activity feed
+  // never opened, and a working turn read as pure silence. Surface tools
+  // (reply/react/send_typing/sync_retain) return earlier and are also
+  // suppressed at the gateway's isTelegramSurfaceTool guard, so this does
+  // not resurface them.
+  return 'Working…'
 }
 function main() {

package/telegram-plugin/issues-card.ts CHANGED Viewed

@@ -328,6 +328,10 @@ export function createIssuesCardHandle(
       const sendOpts: Record<string, unknown> = {
         parse_mode: "HTML",
         disable_web_page_preview: true,
+        // Status card, not the user's answer — silence the open ping.
+        // (editMessageText ignores disable_notification, so the shared
+        // edit path below is unaffected.)
+        disable_notification: true,
         ...(opts.threadId != null ? { message_thread_id: opts.threadId } : {}),
       };

package/telegram-plugin/model-unavailable.ts CHANGED Viewed

@@ -84,6 +84,13 @@ export function detectModelUnavailable(
     // resets 8:50am (Australia/Melbourne)".
     'hit your limit',
     'hit the limit',
+    // SESSION-cap wording: "You've hit your session limit · resets 5pm".
+    // A session cap is a quota exhaustion that frees in HOURS (its reset is a
+    // bare time-of-day, see parseResetTime's time-only branch) — recognising
+    // it here is what lets the time-only reset parse fire and keeps a
+    // session-capped account from the +7d weekly bench.
+    'session limit',
+    'session cap',
   ]
   if (quotaSignals.some(s => lower.includes(s))) {
     const resetAt = parseResetTime(sample)
@@ -192,9 +199,126 @@ function parseResetTime(text: string, parseTimeNow: Date = new Date()): Date | u
     if (!Number.isNaN(d.getTime())) return d
   }
+  // "resets 5pm (Australia/Melbourne)" / "resets 8:50am" / "resets 17:00 (UTC)"
+  // SESSION-cap wording: a time of day with NO month/day. This frees in
+  // HOURS, not a week — without this branch it falls through to undefined,
+  // and the 429 inference path then applies resolveExhaustUntil's +7d weekly
+  // floor, benching a session-capped account for a week. Must sit AFTER the
+  // calendar branch so "resets May 3, 11am" never matches here. The leading
+  // negative lookahead `(?!...)` rejects a month name so a date-bearing
+  // string can't fall into this time-only branch.
+  const timeOnly = text.match(
+    /resets?\s+(?:at\s+)?(?!(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\b)(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*(?:\(([^)]+)\))?/i,
+  )
+  if (timeOnly) {
+    const d = resolveNextWallClock(
+      Number(timeOnly[1]),
+      timeOnly[2] ? Number(timeOnly[2]) : 0,
+      timeOnly[3]?.toLowerCase(),
+      timeOnly[4]?.trim(),
+      parseTimeNow,
+    )
+    if (d != null) return d
+  }
+  return undefined
+}
+/**
+ * Resolve a bare wall-clock time ("5pm", "8:50am", "17:00") to the NEXT
+ * occurrence of that time, tz-aware. Returns the soonest future Date (rolls
+ * to tomorrow when the time has already passed today). Null on bad input
+ * (out-of-range hour/minute or an unknown tz). When `tz` is omitted the
+ * time is interpreted in UTC (best-effort) — Anthropic's strings normally
+ * carry the IANA tz in parens, e.g. "(Australia/Melbourne)".
+ */
+function resolveNextWallClock(
+  hour12or24: number,
+  minute: number,
+  ampm: string | undefined,
+  tz: string | undefined,
+  nowDate: Date,
+): Date | undefined {
+  let hour = hour12or24
+  if (ampm === 'pm' && hour < 12) hour += 12
+  if (ampm === 'am' && hour === 12) hour = 0
+  if (!Number.isFinite(hour) || hour > 23 || hour < 0) return undefined
+  if (!Number.isFinite(minute) || minute > 59 || minute < 0) return undefined
+  const nowMs = nowDate.getTime()
+  // Walk today and the next two days (DST-safe span) and pick the first
+  // occurrence strictly in the future relative to now.
+  const base = new Date(nowMs)
+  for (let dayOffset = 0; dayOffset <= 2; dayOffset++) {
+    // Derive the y/m/d for `dayOffset` days from now IN THE TARGET TZ, so the
+    // wall-clock date we resolve is the tz's calendar date, not the container's.
+    const dateParts = tzDateParts(new Date(nowMs + dayOffset * 86_400_000), tz)
+    if (dateParts == null) return undefined
+    const epoch = wallClockToEpoch(
+      dateParts.year, dateParts.month, dateParts.day, hour, minute, tz,
+    )
+    if (epoch != null && epoch > nowMs) return new Date(epoch)
+  }
+  // Fallback: shouldn't happen, but keep the function total.
+  void base
   return undefined
 }
+/** The y/m/d of `d` as seen in `tz` (UTC when tz omitted). Null on bad tz. */
+function tzDateParts(
+  d: Date,
+  tz: string | undefined,
+): { year: number; month: number; day: number } | null {
+  if (!tz) {
+    return { year: d.getUTCFullYear(), month: d.getUTCMonth(), day: d.getUTCDate() }
+  }
+  try {
+    const fmt = new Intl.DateTimeFormat('en-US', {
+      timeZone: tz, year: 'numeric', month: '2-digit', day: '2-digit',
+    })
+    const parts = Object.fromEntries(
+      fmt.formatToParts(d).filter((p) => p.type !== 'literal').map((p) => [p.type, p.value]),
+    )
+    return {
+      year: Number(parts.year),
+      month: Number(parts.month) - 1,
+      day: Number(parts.day),
+    }
+  } catch {
+    return null
+  }
+}
+/**
+ * Convert a wall-clock time in an IANA tz to epoch-ms (null if the tz is
+ * unknown). Resolves the tz's offset AT that date via Intl, so it is correct
+ * across DST — NOT `new Date(localString)`, which assumes the container TZ.
+ * Mirrors wedge-watchdog.ts's helper of the same name (kept local to keep
+ * this module dependency-free / pure-testable).
+ */
+function wallClockToEpoch(
+  year: number, month: number, day: number, hour: number, minute: number, tz: string | undefined,
+): number | null {
+  const asUtc = Date.UTC(year, month, day, hour, minute, 0)
+  if (!tz) return asUtc // no tz given → best-effort UTC
+  try {
+    const fmt = new Intl.DateTimeFormat('en-US', {
+      timeZone: tz, year: 'numeric', month: '2-digit', day: '2-digit',
+      hour: '2-digit', minute: '2-digit', second: '2-digit', hour12: false,
+    })
+    const parts = Object.fromEntries(
+      fmt.formatToParts(new Date(asUtc)).filter((p) => p.type !== 'literal').map((p) => [p.type, p.value]),
+    )
+    const shown = Date.UTC(
+      Number(parts.year), Number(parts.month) - 1, Number(parts.day),
+      Number(parts.hour) % 24, Number(parts.minute), Number(parts.second),
+    )
+    const offset = shown - asUtc // how far ahead the tz wall clock is of UTC
+    return asUtc - offset
+  } catch {
+    return null // unknown tz
+  }
+}
 function parseRelativeDuration(s: string): number | null {
   // "2h 15m" / "30m" / "45 seconds"
   let total = 0

package/telegram-plugin/narrative-dedup.ts ADDED Viewed

@@ -0,0 +1,69 @@
+/**
+ * Reducer-side narrative dedup gate (the correctness core of the
+ * JSONL-text-narrative primitive).
+ *
+ * A `text` / `sub_agent_text` JSONL block is one of two things:
+ *
+ *   1. DRAFT-THEN-SEND — the model composing its answer just before it
+ *      calls `reply` / `stream_reply` with near-identical text. Surfacing
+ *      it would double-print the answer (once as a transient narrative
+ *      step, once as the canonical reply). It MUST be suppressed.
+ *   2. WORKING NARRATION — the agent's own commentary that is never sent
+ *      to the user ("On it. Let me find the repo…", "Sent. Waiting on the
+ *      build…"). It SHOULD be surfaced as a transient liveness step.
+ *
+ * A projector sees one JSONL line at a time and cannot know whether a
+ * later line is a reply tool_use, so the SHOW/SUPPRESS decision is a
+ * stateful, one-step-deferred decision made reducer-side (the gateway for
+ * the main agent, the subagent-watcher for sub/worker). This module is the
+ * pure, fully-unit-testable kernel of that decision — no I/O, no state of
+ * its own; the caller owns the `pendingNarrative` slot.
+ *
+ * The threshold heuristic deliberately matches the spirit of the #546
+ * outbound dedup (trim + lowercase + whitespace-collapse) so a draft and
+ * its reply compare equal the same way #546 considers them the same
+ * message.
+ */
+/** Tools whose `input.text` IS the canonical answer surface. */
+export const REPLY_TOOLS = new Set(['reply', 'stream_reply'])
+/**
+ * Normalize for prefix comparison: strip markdown/HTML-ish emphasis,
+ * heading and quote marks, collapse whitespace, lowercase. Mirrors the
+ * #546 outbound-dedup normalization so a markdown-decorated draft and its
+ * plain reply compare equal.
+ */
+export function normalizeNarrative(s: string): string {
+  return s
+    .replace(/[*_`>#~]/g, '') // markdown emphasis / heading / quote marks
+    .replace(/\s+/g, ' ')
+    .trim()
+    .toLowerCase()
+}
+/** Longest-common-prefix ratio over the SHORTER of the two normalized strings. */
+export function prefixSimilarity(a: string, b: string): number {
+  const x = normalizeNarrative(a)
+  const y = normalizeNarrative(b)
+  if (x.length === 0 || y.length === 0) return 0
+  const n = Math.min(x.length, y.length)
+  let i = 0
+  while (i < n && x[i] === y[i]) i++
+  return i / n
+}
+/**
+ * The single tunable. Longest-common-PREFIX ratio (not Levenshtein) is
+ * deliberate: a draft shares a long head with the sent answer even when the
+ * model trims a trailing sentence before sending. 0.8 over the shorter
+ * string tolerates that trim while rejecting the "Sent. Waiting…" +
+ * different-reply case (short string, near-zero shared prefix). Exported so
+ * the test pins it — a silent retune breaks a test.
+ */
+export const DRAFT_SUPPRESS_THRESHOLD = 0.8
+/** TRUE ⇒ this text block is a draft-then-send of `replyText`; SUPPRESS it. */
+export function isDraftOfReply(textBlock: string, replyText: string): boolean {
+  return prefixSimilarity(textBlock, replyText) >= DRAFT_SUPPRESS_THRESHOLD
+}