npm - switchroom - Versions diffs - 0.13.10 → 0.13.12 - Mend

switchroom 0.13.10 → 0.13.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/telegram-plugin/tests/subagent-watcher.test.ts CHANGED Viewed

@@ -198,7 +198,10 @@ function makeHarness(opts: {
   const watcher = startSubagentWatcher({
     agentDir,
-    sendNotification: (text) => notifications.push(text),
+    // Card retired (#1122): completion surfaces via onFinish, not a
+    // user-facing message. Capture it so the completion assertions still
+    // verify the terminal-transition + de-dup behaviour.
+    onFinish: (info) => notifications.push(`✓ Worker done: ${info.description}`),
     stallThresholdMs,
     // Mirror the active-loop threshold so existing fixtures (which have
     // toolCount=0 and use the simple "advance past N" model) keep
@@ -382,8 +385,13 @@ describe('startSubagentWatcher', () => {
       let nextRef = 1
       const watcher = startSubagentWatcher({
         agentDir: opts.agentDir,
-        sendNotification: (text) => notifications.push(text),
-        ...(opts.onFinish ? { onFinish: opts.onFinish } : {}),
+        // Card retired (#1122): completion surfaces via onFinish. Capture
+        // it for the completion assertions and still delegate to any
+        // test-supplied onFinish.
+        onFinish: (info) => {
+          notifications.push(`✓ Worker done: ${info.description}`)
+          opts.onFinish?.(info)
+        },
         stallThresholdMs: 60_000,
         rescanMs: 500,
         now: () => Date.now(),
@@ -994,7 +1002,8 @@ describe('startSubagentWatcher', () => {
       const watcher = startSubagentWatcher({
         agentDir: opts.agentDir,
         ...(opts.agentCwd !== undefined ? { agentCwd: opts.agentCwd } : {}),
-        sendNotification: (text) => notifications.push(text),
+        // Card retired (#1122): completion surfaces via onFinish.
+        onFinish: (info) => notifications.push(`✓ Worker done: ${info.description}`),
         stallThresholdMs: 60_000,
         rescanMs: 500,
         now: () => Date.now(),
@@ -1133,7 +1142,8 @@ describe('startSubagentWatcher', () => {
       let nextRef = 1
       const watcher = startSubagentWatcher({
         agentDir,
-        sendNotification: (text) => notifications.push(text),
+        // Card retired (#1122): completion surfaces via onFinish.
+        onFinish: (info) => notifications.push(`✓ Worker done: ${info.description}`),
         stallThresholdMs: 60_000,
         rescanMs: 500,
         now: () => Date.now(),

package/telegram-plugin/tests/turn-flush-safety.test.ts CHANGED Viewed

@@ -138,112 +138,60 @@ describe('decideTurnFlush', () => {
     ).toEqual({ kind: 'skip', reason: 'reply-called' })
   })
-  // #1291 — when the model emits a soft-commit reply ("on it, back in a
-  // few") and then composes the real substantive answer in terminal text
-  // only, the pre-#1291 behaviour skipped flush entirely because
-  // replyCalled was true. The fix: track capturedTextLenAtLastReply and
-  // flush the post-reply tail when it meets the substantive threshold.
-  describe('#1291 — post-reply tail flush', () => {
-    it('flushes the post-reply tail when it meets the substantive threshold', () => {
+  // The turn-flush safety net covers exactly one failure mode: a turn that
+  // ended with the model never having said anything. Once the model has
+  // called reply / stream_reply the turn is served — any assistant text it
+  // emits afterwards is its own end-of-turn wrap-up (a closing summary,
+  // narration to itself), NOT a message it chose to send. The framework
+  // must never promote that terminal text into a second Telegram bubble.
+  //
+  // Regression guard for the redundant-follow-up-message fix: this reverts
+  // the #1291 post-reply-tail flush, which posted a duplicate recap on
+  // essentially every turn because the model habitually writes a closing
+  // summary after its final reply. See reference/conversational-pacing.md
+  // — "the framework owns the beat; the model authors the words".
+  describe('reply-called turns never flush trailing terminal text', () => {
+    it('skips even when a long substantive tail follows the reply', () => {
       const decision = decideTurnFlush({
         chatId: '700',
         replyCalled: true,
-        // Index 0 = the captured text BEFORE the reply tool was called
-        // (some thinking-as-text). Indices 1..2 are post-reply.
         capturedText: [
           'thinking out loud before the reply',
-          'Now here is the actual substantive answer the model composed ',
-          'in terminal text only after the interim reply call.',
+          'Answered the Playwright question and acked the calendar ' +
+            'diagnosis is still in flight. Will surface the root cause ' +
+            'when the worker returns.',
         ],
-        capturedTextLenAtLastReply: 1,
-      })
-      expect(decision).toEqual({
-        kind: 'flush',
-        text:
-          'Now here is the actual substantive answer the model composed ' +
-          '\nin terminal text only after the interim reply call.',
-      })
-    })
-    it('skips with reply-called-no-new-text when post-reply tail is below threshold', () => {
-      const decision = decideTurnFlush({
-        chatId: '701',
-        replyCalled: true,
-        capturedText: ['the pre-reply scratch', 'ok.'], // tail = "ok." (3 chars)
-        capturedTextLenAtLastReply: 1,
-      })
-      expect(decision).toEqual({
-        kind: 'skip',
-        reason: 'reply-called-no-new-text',
-      })
-    })
-    it('skips with reply-called when there is no post-reply text at all', () => {
-      const decision = decideTurnFlush({
-        chatId: '702',
-        replyCalled: true,
-        capturedText: ['everything-was-before-the-reply'],
-        capturedTextLenAtLastReply: 1, // tail slice is empty
       })
       expect(decision).toEqual({ kind: 'skip', reason: 'reply-called' })
     })
-    it('post-reply tail honors a silent marker (skip)', () => {
+    it('skips regardless of how many text blocks trail the reply', () => {
       const decision = decideTurnFlush({
-        chatId: '703',
-        replyCalled: true,
-        capturedText: ['real answer pre-reply', 'NO_REPLY'],
-        capturedTextLenAtLastReply: 1,
-        replyCalledTailMinChars: 1, // force the marker check
-      })
-      expect(decision).toEqual({ kind: 'skip', reason: 'silent-marker' })
-    })
-    it('post-reply tail with null chatId still skips (no-inbound-chat)', () => {
-      const decision = decideTurnFlush({
-        chatId: null,
+        chatId: '701',
         replyCalled: true,
         capturedText: [
-          'pre',
-          'this tail would have been substantive enough to flush normally',
+          'a substantive paragraph the model wrote as terminal text',
+          'and another one, each well over any old length threshold',
+          'and a third closing summary block for good measure',
         ],
-        capturedTextLenAtLastReply: 1,
-      })
-      expect(decision).toEqual({ kind: 'skip', reason: 'no-inbound-chat' })
-    })
-    it('preserves pre-#1291 behaviour when capturedTextLenAtLastReply is omitted', () => {
-      // Legacy caller doesn't track the marker — defaults to
-      // capturedText.length, so the tail slice is empty and we skip
-      // with reason 'reply-called' (the original behaviour).
-      const decision = decideTurnFlush({
-        chatId: '704',
-        replyCalled: true,
-        capturedText: ['some answer the model emitted'],
       })
       expect(decision).toEqual({ kind: 'skip', reason: 'reply-called' })
     })
-    it('respects a custom replyCalledTailMinChars threshold', () => {
+    it('skips with reply-called when capturedText is empty', () => {
       const decision = decideTurnFlush({
-        chatId: '705',
+        chatId: '702',
         replyCalled: true,
-        capturedText: ['pre-reply', 'short but substantive in this test'],
-        capturedTextLenAtLastReply: 1,
-        replyCalledTailMinChars: 10,
+        capturedText: [],
       })
-      expect(decision.kind).toBe('flush')
+      expect(decision).toEqual({ kind: 'skip', reason: 'reply-called' })
     })
-    it('feature flag off still wins over post-reply tail flush', () => {
+    it('feature flag off still wins over a reply-called turn', () => {
       const decision = decideTurnFlush({
-        chatId: '706',
+        chatId: '703',
         replyCalled: true,
-        capturedText: [
-          'pre',
-          'a long substantive post-reply tail that would otherwise flush',
-        ],
-        capturedTextLenAtLastReply: 1,
+        capturedText: ['a long substantive tail that pre-fix would flush'],
         flushEnabled: false,
       })
       expect(decision).toEqual({ kind: 'skip', reason: 'flag-disabled' })

package/telegram-plugin/turn-flush-safety.ts CHANGED Viewed

@@ -57,7 +57,6 @@ export type FlushDecision =
 export type FlushSkipReason =
   | 'flag-disabled'
   | 'reply-called'
-  | 'reply-called-no-new-text'
   | 'no-inbound-chat'
   | 'empty-text'
   | 'silent-marker'
@@ -70,35 +69,14 @@ export interface FlushDecisionInput {
    * this turn. */
   replyCalled: boolean
   /** Raw text content blocks accumulated from assistant events across the
-   * turn. Joined + trimmed internally. */
+   * turn. Joined + trimmed internally. Only consulted when `replyCalled`
+   * is false — once the model has called reply / stream_reply the turn is
+   * served and trailing terminal text is dropped (see `decideTurnFlush`). */
   capturedText: string[]
-  /** Snapshot of `capturedText.length` at the moment of the most recent
-   * reply / stream_reply tool call in this turn. Indices `[capturedText
-   * length-at-last-reply, capturedText.length)` are the post-reply tail
-   * — substantive content the model emitted AFTER the reply (e.g. soft
-   * commit "on it, back in a few" followed by the real answer in
-   * terminal text only, the #1291 repro). When the tail meets
-   * `replyCalledTailMinChars` we flush it; otherwise we skip.
-   *
-   * Defaults to `capturedText.length` (treat all captured text as
-   * pre-reply, preserve the pre-#1291 behaviour where any reply tool
-   * call suppressed flush entirely) so callers that don't track the
-   * marker keep the old contract. */
-  capturedTextLenAtLastReply?: number
-  /** Minimum trimmed-tail length to qualify a post-reply tail flush.
-   * Defaults to `REPLY_CALLED_TAIL_MIN_CHARS` (40). Below this we skip
-   * with `reply-called-no-new-text` — typical for trailing markdown
-   * artifacts or a one-word afterthought. */
-  replyCalledTailMinChars?: number
   /** Feature flag — defaults to true. Pass `false` to force skip everywhere. */
   flushEnabled?: boolean
 }
-/** Default minimum trimmed length for the post-reply tail to be flushed
- * as a follow-up message. Below this we treat the tail as noise / artifact
- * and skip silently. */
-export const REPLY_CALLED_TAIL_MIN_CHARS = 40
 /**
  * Pure decision: should the gateway deterministically send the model's
  * captured assistant text at turn_end? Returns `{kind: 'flush', text}` with
@@ -107,39 +85,31 @@ export const REPLY_CALLED_TAIL_MIN_CHARS = 40
  * Ordering of checks is deliberate: cheapest/strongest first so logs
  * attribute a skip to the most specific cause.
  *
- * #1291 — when `replyCalled` is true we no longer suppress unconditionally.
- * The model may have emitted a soft-commit reply ("on it, back in a few")
- * followed by the real substantive answer in terminal text only. Using
- * `capturedTextLenAtLastReply` we isolate the post-reply tail and flush
- * it if it's substantive enough; otherwise we skip with
- * `reply-called-no-new-text` (logged) or `reply-called` (silent, no tail).
+ * The safety net has exactly one job: a turn that ended with the model
+ * having said *nothing* to the user. Once `replyCalled` is true the model
+ * has communicated through the proper channel and the decision is always
+ * `skip` — assistant text emitted after a reply is the model's own
+ * end-of-turn wrap-up (a closing summary, narration to itself), not a
+ * message it chose to send. Promoting that terminal text into a Telegram
+ * message second-guesses an explicit reply and posts a redundant duplicate
+ * on essentially every turn, because the model habitually writes a closing
+ * summary. The framework owns the *beat*; the model authors the *words*
+ * and emits them via reply (`reference/conversational-pacing.md`).
+ *
+ * (This reverts the #1291 post-reply-tail flush. Its intent — catch a
+ * soft-commit reply followed by the real answer in terminal text only —
+ * could not be told apart from the habitual wrap-up by length, so it
+ * misfired constantly. A model that soft-commits and never delivers is a
+ * pacing failure caught by the silence-poke ladder, not papered over here.)
  */
 export function decideTurnFlush(input: FlushDecisionInput): FlushDecision {
   const flushEnabled = input.flushEnabled !== false
   if (!flushEnabled) return { kind: 'skip', reason: 'flag-disabled' }
-  if (input.replyCalled) {
-    const tailIdx = input.capturedTextLenAtLastReply ?? input.capturedText.length
-    const tail = input.capturedText.slice(tailIdx).join('\n').trim()
-    const minChars = input.replyCalledTailMinChars ?? REPLY_CALLED_TAIL_MIN_CHARS
-    if (tail.length === 0) {
-      // The reply tool was called and nothing of substance came after —
-      // the turn is fully served by the reply. Skip silently (the gateway
-      // WARN gate excludes this reason from logs).
-      return { kind: 'skip', reason: 'reply-called' }
-    }
-    if (tail.length < minChars) {
-      // Post-reply tail exists but is below the substantive-content
-      // threshold — typically trailing markdown artifacts or a one-word
-      // afterthought. Skip but with a distinct reason so this case IS
-      // logged (auditable for #1291 regressions, vs the silent
-      // 'reply-called' which is the expected steady state).
-      return { kind: 'skip', reason: 'reply-called-no-new-text' }
-    }
-    if (input.chatId == null) return { kind: 'skip', reason: 'no-inbound-chat' }
-    if (isSilentFlushMarker(tail)) return { kind: 'skip', reason: 'silent-marker' }
-    return { kind: 'flush', text: tail }
-  }
+  // The model communicated through the proper channel — trust it. Any
+  // assistant text it emitted as terminal text afterwards is its own
+  // end-of-turn wrap-up, never a second Telegram message.
+  if (input.replyCalled) return { kind: 'skip', reason: 'reply-called' }
   if (input.chatId == null) return { kind: 'skip', reason: 'no-inbound-chat' }
   const joined = input.capturedText.join('\n').trim()