npm - switchroom - Versions diffs - 0.13.25 → 0.13.27 - Mend

switchroom 0.13.25 → 0.13.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/telegram-plugin/tests/subagent-progress-inbound-builder.test.ts ADDED Viewed

@@ -0,0 +1,269 @@
+/**
+ * Pin the InboundMessage shape and decision logic for the synthetic
+ * `subagent_progress` envelope (#1720 — mid-flight progress beat).
+ *
+ * Three load-bearing guarantees:
+ *
+ *   1. The `meta.source` string is `subagent_progress`. The MCP
+ *      channel notification wraps it as
+ *      `<channel source="subagent_progress">`; the parent agent's
+ *      beat-3 hook keys on exactly that tag.
+ *   2. `meta.expiresAt` is set on every envelope (`nowMs + 2 × interval`).
+ *      Stale progress is a lie, so the spool's TTL filter MUST have a
+ *      live numeric value to gate on.
+ *   3. The spool dedup id derived via `meta.subagent_jsonl_id` +
+ *      `meta.bucket_idx` is deterministic — same (jsonl id, bucket idx)
+ *      always collapses to the same `s:progress:...` id, so a re-fire
+ *      within the same window is structurally a no-op.
+ */
+import { describe, it, expect } from 'vitest'
+import {
+  buildSubagentProgressInbound,
+  decideSubagentProgress,
+  isEnvFlagOn,
+  DEFAULT_PROGRESS_INTERVAL_MS,
+  PROGRESS_DESC_MAX,
+  PROGRESS_RESULT_MAX,
+} from '../gateway/subagent-progress-inbound-builder.js'
+import { spoolId } from '../gateway/inbound-spool.js'
+const FIXED_NOW = 1_700_000_000_000
+const INTERVAL_MS = DEFAULT_PROGRESS_INTERVAL_MS
+describe('buildSubagentProgressInbound', () => {
+  it('builds an envelope with the load-bearing meta.source and TTL', () => {
+    const inbound = buildSubagentProgressInbound({
+      ctx: {
+        chatId: '12345',
+        subagentJsonlId: 'jsonl-abc',
+        taskDescription: 'Crawl the repo for dead code',
+        latestSummary: 'scanning packages/server — 14 files in so far',
+        elapsedMs: 7 * 60 * 1000,
+        bucketIdx: 1,
+        progressIntervalMs: INTERVAL_MS,
+      },
+      nowMs: FIXED_NOW,
+    })
+    expect(inbound.type).toBe('inbound')
+    expect(inbound.chatId).toBe('12345')
+    expect(inbound.user).toBe('subagent-watcher')
+    expect(inbound.ts).toBe(FIXED_NOW)
+    expect(inbound.messageId).toBe(FIXED_NOW)
+    expect(inbound.meta.source).toBe('subagent_progress')
+    expect(inbound.meta.subagent_jsonl_id).toBe('jsonl-abc')
+    expect(inbound.meta.bucket_idx).toBe('1')
+    // TTL: nowMs + 2 × interval. Numeric, parseable, > now.
+    expect(inbound.meta.expiresAt).toBeDefined()
+    const exp = Number(inbound.meta.expiresAt)
+    expect(Number.isFinite(exp)).toBe(true)
+    expect(exp).toBe(FIXED_NOW + 2 * INTERVAL_MS)
+    // Body content
+    expect(inbound.text).toContain('Crawl the repo for dead code')
+    expect(inbound.text).toContain('scanning packages/server')
+    expect(inbound.text).toMatch(/beat 3/)
+  })
+  it('handles an empty latest summary (tool-only worker) without breaking shape', () => {
+    const inbound = buildSubagentProgressInbound({
+      ctx: {
+        chatId: '99',
+        subagentJsonlId: 'jsonl-xyz',
+        taskDescription: 'Run the suite',
+        latestSummary: '',
+        elapsedMs: 6 * 60 * 1000,
+        bucketIdx: 1,
+        progressIntervalMs: INTERVAL_MS,
+      },
+      nowMs: FIXED_NOW,
+    })
+    expect(inbound.meta.source).toBe('subagent_progress')
+    expect(inbound.text).toContain('tool-only')
+  })
+  it('caps an over-long latest summary and description', () => {
+    const inbound = buildSubagentProgressInbound({
+      ctx: {
+        chatId: '99',
+        subagentJsonlId: 'jsonl-xyz',
+        taskDescription: 'D'.repeat(PROGRESS_DESC_MAX + 500),
+        latestSummary: 'L'.repeat(PROGRESS_RESULT_MAX + 5000),
+        elapsedMs: 5 * 60 * 1000,
+        bucketIdx: 1,
+        progressIntervalMs: INTERVAL_MS,
+      },
+      nowMs: FIXED_NOW,
+    })
+    expect(inbound.text.length).toBeLessThan(
+      PROGRESS_RESULT_MAX + PROGRESS_DESC_MAX + 800,
+    )
+    expect(inbound.text).toContain('…')
+  })
+  // Deterministic spoolId per bucket — the core #1720 guarantee.
+  it('two envelopes with the same (jsonl id, bucket idx) yield IDENTICAL spoolIds', () => {
+    const a = buildSubagentProgressInbound({
+      ctx: {
+        chatId: '12345',
+        subagentJsonlId: 'jsonl-abc',
+        taskDescription: 'x',
+        latestSummary: 'first line',
+        elapsedMs: 7 * 60 * 1000,
+        bucketIdx: 1,
+        progressIntervalMs: INTERVAL_MS,
+      },
+      nowMs: FIXED_NOW,
+    })
+    const b = buildSubagentProgressInbound({
+      ctx: {
+        chatId: '12345',
+        subagentJsonlId: 'jsonl-abc',
+        taskDescription: 'x',
+        latestSummary: 'a DIFFERENT line, also in bucket 1',
+        elapsedMs: 8 * 60 * 1000, // still bucket 1 (8 < 10)
+        bucketIdx: 1,
+        progressIntervalMs: INTERVAL_MS,
+      },
+      // Different nowMs — proves dedup is content-keyed, not ts-keyed.
+      nowMs: FIXED_NOW + 60_000,
+    })
+    expect(spoolId(a)).toBe(spoolId(b))
+    expect(spoolId(a)).toBe('s:progress:jsonl-abc:1')
+  })
+  it('different bucket idx → different spoolId (envelopes do NOT collapse)', () => {
+    const bucket1 = buildSubagentProgressInbound({
+      ctx: {
+        chatId: '12345',
+        subagentJsonlId: 'jsonl-abc',
+        taskDescription: 'x',
+        latestSummary: 'b1',
+        elapsedMs: 7 * 60 * 1000,
+        bucketIdx: 1,
+        progressIntervalMs: INTERVAL_MS,
+      },
+      nowMs: FIXED_NOW,
+    })
+    const bucket2 = buildSubagentProgressInbound({
+      ctx: {
+        chatId: '12345',
+        subagentJsonlId: 'jsonl-abc',
+        taskDescription: 'x',
+        latestSummary: 'b2',
+        elapsedMs: 12 * 60 * 1000,
+        bucketIdx: 2,
+        progressIntervalMs: INTERVAL_MS,
+      },
+      nowMs: FIXED_NOW + 5 * 60 * 1000,
+    })
+    expect(spoolId(bucket1)).not.toBe(spoolId(bucket2))
+  })
+})
+describe('isEnvFlagOn — bool env parser', () => {
+  it('treats unset / empty / 0 / false / no / off as OFF (case-insensitive, trimmed)', () => {
+    for (const v of [undefined, '', '0', 'false', 'FALSE', 'False', 'no', 'NO', 'off', 'OFF', '  0  ', ' false ']) {
+      expect(isEnvFlagOn(v), `value=${JSON.stringify(v)}`).toBe(false)
+    }
+  })
+  it('treats 1 / true / yes / arbitrary non-empty as ON', () => {
+    for (const v of ['1', 'true', 'TRUE', 'yes', 'on', 'enabled', 'kill', 'anything']) {
+      expect(isEnvFlagOn(v), `value=${JSON.stringify(v)}`).toBe(true)
+    }
+  })
+})
+describe('decideSubagentProgress', () => {
+  function baseInput(over: Partial<Parameters<typeof decideSubagentProgress>[0]> = {}) {
+    return {
+      disableEnvValue: undefined,
+      isBackground: true,
+      fleetChatId: '12345',
+      ownerChatId: '999',
+      subagentJsonlId: 'jsonl-abc',
+      taskDescription: 'x',
+      latestSummary: 'hi',
+      elapsedMs: 7 * 60 * 1000,
+      progressIntervalMs: INTERVAL_MS,
+      lastBucketIdx: null,
+      nowMs: FIXED_NOW,
+      ...over,
+    } as const
+  }
+  it('delivers when all gates pass; bucketIdx is floor(elapsed/interval)', () => {
+    const d = decideSubagentProgress(baseInput())
+    expect(d.deliver).toBe(true)
+    if (d.deliver) {
+      expect(d.bucketIdx).toBe(1)
+      expect(d.chatId).toBe('12345')
+      expect(d.inbound.meta.source).toBe('subagent_progress')
+    }
+  })
+  it('kill-switch (SWITCHROOM_DISABLE_SUBAGENT_PROGRESS=1) skips delivery', () => {
+    const d = decideSubagentProgress(baseInput({ disableEnvValue: '1' }))
+    expect(d.deliver).toBe(false)
+    if (!d.deliver) expect(d.reason).toBe('env-disabled')
+  })
+  it('kill-switch treats `false`/`no`/`off`/empty/unset as OFF (not enabled)', () => {
+    for (const v of [undefined, '', '0', 'false', 'FALSE', 'False', 'no', 'NO', 'off', 'OFF', '  0  ', ' false ']) {
+      const d = decideSubagentProgress(baseInput({ disableEnvValue: v }))
+      expect(d.deliver, `value=${JSON.stringify(v)} should NOT be env-disabled`).toBe(true)
+    }
+  })
+  it('kill-switch treats `1`/`true`/`yes`/arbitrary non-empty as ON', () => {
+    for (const v of ['1', 'true', 'TRUE', 'yes', 'on', 'enabled', 'kill']) {
+      const d = decideSubagentProgress(baseInput({ disableEnvValue: v }))
+      expect(d.deliver, `value=${JSON.stringify(v)} should be env-disabled`).toBe(false)
+      if (!d.deliver) expect(d.reason).toBe('env-disabled')
+    }
+  })
+  it('foreground sub-agents skip (parent already sees the narrative)', () => {
+    const d = decideSubagentProgress(baseInput({ isBackground: false }))
+    expect(d.deliver).toBe(false)
+    if (!d.deliver) expect(d.reason).toBe('foreground')
+  })
+  it('falls back to owner chat when fleet chat is empty', () => {
+    const d = decideSubagentProgress(baseInput({ fleetChatId: '' }))
+    expect(d.deliver).toBe(true)
+    if (d.deliver) expect(d.chatId).toBe('999')
+  })
+  it('no-chat when neither fleet nor owner resolves', () => {
+    const d = decideSubagentProgress(baseInput({ fleetChatId: '', ownerChatId: '' }))
+    expect(d.deliver).toBe(false)
+    if (!d.deliver) expect(d.reason).toBe('no-chat')
+  })
+  it('first bucket (idx 0) is suppressed — ambient liveness is plenty for the opening window', () => {
+    const d = decideSubagentProgress(baseInput({ elapsedMs: 60_000 }))
+    expect(d.deliver).toBe(false)
+    if (!d.deliver) expect(d.reason).toBe('first-bucket-suppressed')
+  })
+  it('same bucket as last fired is a no-op (dedup at the decision layer)', () => {
+    const d = decideSubagentProgress(baseInput({ lastBucketIdx: 1 }))
+    expect(d.deliver).toBe(false)
+    if (!d.deliver) expect(d.reason).toBe('bucket-already-fired')
+  })
+  it('next bucket fires even when prior bucket already fired', () => {
+    const d = decideSubagentProgress(baseInput({
+      elapsedMs: 12 * 60 * 1000,
+      lastBucketIdx: 1,
+    }))
+    expect(d.deliver).toBe(true)
+    if (d.deliver) expect(d.bucketIdx).toBe(2)
+  })
+  it('missing-jsonl-id refuses to mint a non-deterministic envelope', () => {
+    const d = decideSubagentProgress(baseInput({ subagentJsonlId: '' }))
+    expect(d.deliver).toBe(false)
+    if (!d.deliver) expect(d.reason).toBe('missing-jsonl-id')
+  })
+})

package/telegram-plugin/uat/scenarios/jtbd-reflective-status-reaction-dm.test.ts ADDED Viewed

@@ -0,0 +1,204 @@
+/**
+ * JTBD scenario — reflective status reaction (#1713).
+ *
+ * Serves the JTBD "know what my agent is actually doing" (see
+ * `reference/know-what-my-agent-is-doing.md`). The status reaction on
+ * the user's inbound is the *primary* ambient liveness signal — the
+ * user reads it as "what is the agent doing right now". When it
+ * collapses straight to 👍 mid-turn, the signal evaporates and the
+ * user is left wondering whether the agent is still working.
+ *
+ * #1713 defect: plain `reply` and `stream_reply done=true` were both
+ * firing the terminal 👍 immediately, regardless of whether the turn
+ * had actually ended (the model often continues with tools after a
+ * mid-turn ack or a stream-finalized answer). The fix makes the
+ * controller reflective:
+ *
+ *   - Working states (🤔, ✍, 👨‍💻, ⚡, 🗜) are bidirectional and may
+ *     re-enter any number of times within a turn.
+ *   - Mid-turn replies (ack or final) are non-events for the reaction.
+ *   - 🔥 / 😱 is non-terminal — recovery to a working state is allowed.
+ *   - Only the `turn_end` IPC event (Stop hook) finalizes to 👍.
+ *   - Rapid state flips coalesce via a 3-5s debounce window.
+ *
+ * This JTBD test asserts the controller contract end-to-end at the
+ * gateway-wiring level (via the StatusReactionController itself —
+ * the wiring is exhaustively covered by gateway integration tests
+ * and the unit suite at `telegram-plugin/tests/status-reactions.test.ts`).
+ * A live-Telegram harness UAT was considered but is poor value for
+ * this contract: real Bot-API reactions only expose the *current*
+ * emoji, not the full transition trail, so the unit-suite assertion
+ * shape is strictly more powerful. Live-harness coverage can be
+ * added as a follow-up once Bot-API reaction-history surfaces.
+ *
+ * Acceptance criteria (mapped from #1713 issue body):
+ *   1. Reply does NOT advance reaction to 👍.
+ *   2. Bidirectional transitions: thinking → tool → thinking works.
+ *   3. Only turn_end produces 👍.
+ *   4. Debounce coalesces rapid flips.
+ */
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { StatusReactionController } from "../../status-reactions.js";
+function makeEmitter() {
+  const calls: string[] = [];
+  const emit = vi.fn(async (emoji: string) => {
+    calls.push(emoji);
+  });
+  return { emit, calls };
+}
+async function flush(): Promise<void> {
+  for (let i = 0; i < 8; i++) {
+    await Promise.resolve();
+  }
+}
+describe("uat-jtbd: reflective status reaction (#1713)", () => {
+  beforeEach(() => {
+    vi.useFakeTimers();
+  });
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+  // ── AC1 ──────────────────────────────────────────────────────────────
+  it("AC1: mid-turn reply does NOT advance reaction to 👍", async () => {
+    // Simulate a turn that:
+    //   1. Receives an inbound (👀).
+    //   2. The model thinks (🤔).
+    //   3. The model sends a reply mid-turn (NON-EVENT — no controller call).
+    //   4. The model continues with a tool call (👨‍💻).
+    //   5. turn_end finalizes (👍).
+    const { emit, calls } = makeEmitter();
+    const ctrl = new StatusReactionController(emit);
+    ctrl.setQueued(); // 👀
+    await flush();
+    ctrl.setThinking(); // → 🤔 (debounced)
+    vi.advanceTimersByTime(3500);
+    await flush();
+    // Mid-turn reply happens here — gateway does NOT call any controller
+    // method. The reaction must remain on 🤔.
+    expect(calls).toEqual(["👀", "🤔"]);
+    // Model continues working post-reply.
+    ctrl.setTool("Bash"); // → 👨‍💻 (debounced)
+    vi.advanceTimersByTime(3500);
+    await flush();
+    expect(calls).toEqual(["👀", "🤔", "👨‍💻"]);
+    // turn_end is the only terminal trigger.
+    ctrl.finalize("done");
+    await flush();
+    expect(calls).toEqual(["👀", "🤔", "👨‍💻", "👍"]);
+  });
+  // ── AC2 ──────────────────────────────────────────────────────────────
+  it("AC2: bidirectional transitions — thinking → tool → thinking re-enters", async () => {
+    const { emit, calls } = makeEmitter();
+    const ctrl = new StatusReactionController(emit);
+    ctrl.setQueued();
+    await flush();
+    ctrl.setThinking();
+    vi.advanceTimersByTime(3500);
+    await flush();
+    expect(calls).toEqual(["👀", "🤔"]);
+    ctrl.setTool("Read"); // → 👨‍💻 (coding family)
+    vi.advanceTimersByTime(3500);
+    await flush();
+    expect(calls).toEqual(["👀", "🤔", "👨‍💻"]);
+    // Back to thinking. The same state may re-enter; controller is
+    // reflective, not a one-way ratchet.
+    ctrl.setThinking();
+    vi.advanceTimersByTime(3500);
+    await flush();
+    expect(calls).toEqual(["👀", "🤔", "👨‍💻", "🤔"]);
+    // And a non-terminal 😱 mid-turn (e.g. transient 5xx) must permit
+    // recovery to a working state.
+    ctrl.setError();
+    vi.advanceTimersByTime(3500);
+    await flush();
+    expect(calls).toEqual(["👀", "🤔", "👨‍💻", "🤔", "😱"]);
+    ctrl.setTool("WebFetch"); // recovery → ⚡
+    vi.advanceTimersByTime(3500);
+    await flush();
+    expect(calls).toEqual(["👀", "🤔", "👨‍💻", "🤔", "😱", "⚡"]);
+  });
+  // ── AC3 ──────────────────────────────────────────────────────────────
+  it("AC3: only turn_end (finalize) produces 👍", async () => {
+    const { emit, calls } = makeEmitter();
+    const ctrl = new StatusReactionController(emit);
+    ctrl.setQueued();
+    await flush();
+    // Many working transitions; never call finalize. The reaction must
+    // not land on 👍.
+    for (let i = 0; i < 5; i++) {
+      ctrl.setThinking();
+      vi.advanceTimersByTime(3500);
+      await flush();
+      ctrl.setTool("Bash");
+      vi.advanceTimersByTime(3500);
+      await flush();
+    }
+    expect(calls).not.toContain("👍");
+    // turn_end fires → 👍 finally lands.
+    ctrl.finalize("done");
+    await flush();
+    expect(calls[calls.length - 1]).toBe("👍");
+  });
+  // ── AC4 ──────────────────────────────────────────────────────────────
+  it("AC4: 3500ms debounce coalesces rapid state flips into one emit", async () => {
+    const { emit, calls } = makeEmitter();
+    const ctrl = new StatusReactionController(emit);
+    ctrl.setQueued();
+    await flush();
+    // Rapid flip storm — 10 transitions within ~2s.
+    for (let i = 0; i < 10; i++) {
+      if (i % 2 === 0) ctrl.setThinking();
+      else ctrl.setTool("Bash");
+      vi.advanceTimersByTime(200);
+    }
+    await flush();
+    // Debounce hasn't elapsed — only the queued emoji has landed.
+    expect(calls).toEqual(["👀"]);
+    // After the window closes, only the LAST state lands.
+    vi.advanceTimersByTime(3500);
+    await flush();
+    // Last call was setTool('Bash') on odd index — last index is 9 (odd).
+    expect(calls).toEqual(["👀", "👨‍💻"]);
+  });
+  // ── AC5: terminal flushes any pending debounced state ────────────────
+  it("AC5: finalize() flushes a pending working state before emitting 👍", async () => {
+    // F1 guarantee (#553) — preserved through #1713. If the turn ends
+    // while a non-terminal reaction is mid-debounce, the pending state
+    // must land BEFORE the terminal so the user sees the working
+    // signal rather than a 👀 → 👍 collapse.
+    const { emit, calls } = makeEmitter();
+    const ctrl = new StatusReactionController(emit);
+    ctrl.setQueued();
+    await flush();
+    ctrl.setThinking(); // pending — debounce window open
+    // Turn ends before debounce elapses.
+    ctrl.finalize("done");
+    await flush();
+    expect(calls).toEqual(["👀", "🤔", "👍"]);
+  });
+});