npm - switchroom - Versions diffs - 0.14.57 → 0.14.59 - Mend

switchroom 0.14.57 → 0.14.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/agent-scheduler/index.js +1 -1
package/dist/auth-broker/index.js +19 -9
package/dist/cli/notion-write-pretool.mjs +1 -1
package/dist/cli/switchroom.js +29 -70
package/dist/host-control/main.js +1 -1
package/dist/vault/approvals/kernel-server.js +1 -1
package/dist/vault/broker/server.js +1 -1
package/package.json +1 -1
package/telegram-plugin/dist/gateway/gateway.js +268 -14
package/telegram-plugin/final-answer-detect.ts +34 -0
package/telegram-plugin/gateway/feed-reopen-gate.ts +162 -0
package/telegram-plugin/gateway/gateway.ts +304 -4
package/telegram-plugin/gateway/obligation-ledger.ts +216 -0
package/telegram-plugin/tests/feed-reopen-gate.test.ts +133 -0
package/telegram-plugin/tests/final-answer-detect.test.ts +67 -1
package/telegram-plugin/tests/obligation-ledger.test.ts +167 -0
package/telegram-plugin/tests/tool-activity-summary.test.ts +44 -0
package/telegram-plugin/tool-activity-summary.ts +14 -4

package/telegram-plugin/tests/feed-reopen-gate.test.ts ADDED Viewed

@@ -0,0 +1,133 @@
+import { describe, expect, it } from 'vitest'
+import {
+  decideFeedReopen,
+  shouldReopenFeedAfterAck,
+} from '../gateway/feed-reopen-gate.js'
+/**
+ * Feed-reopen-after-ack — pure decision gate.
+ *
+ * A supergroup agent that ACKS FIRST ("on it, checking Brevo…") then works
+ * had its live activity feed go dark for the real work: the ack reply is
+ * classified as the final answer by isFinalAnswerReply (it pings or is ≥200
+ * chars), setting turn.finalAnswerDelivered=true, and the tool_label handler
+ * then dropped every subsequent label. This predicate decides whether a tool
+ * label arriving after finalAnswerDelivered (the model is still working)
+ * should RE-OPEN the feed.
+ *
+ * ACK-ONLY refinement: finalAnswerDelivered latches true for BOTH a short
+ * pinging ack AND a substantive answer. Reopening after a GENUINE final
+ * answer is harmful — post-answer housekeeping (memory write / TodoWrite /
+ * Bash) would reset finalAnswerDelivered=false and trip the silent-end
+ * re-prompt → duplicate answer. So the gate reopens ONLY when the prior
+ * final was a short ack (finalAnswerSubstantive=false).
+ */
+describe('shouldReopenFeedAfterAck', () => {
+  it('reopens when delivered AND NOT substantive AND enabled (the ack-first fix)', () => {
+    expect(
+      shouldReopenFeedAfterAck({
+        finalAnswerDelivered: true,
+        finalAnswerSubstantive: false,
+        enabled: true,
+      }),
+    ).toBe(true)
+  })
+  it('does NOT reopen when the prior final was SUBSTANTIVE (the new guard)', () => {
+    // A real final answer followed by post-answer housekeeping tool work:
+    // keep the legacy gate (no reopen) so the silent-end re-prompt and the
+    // #2137 drain see the delivered final correctly. This is the harmful
+    // case the refinement closes.
+    expect(
+      shouldReopenFeedAfterAck({
+        finalAnswerDelivered: true,
+        finalAnswerSubstantive: true,
+        enabled: true,
+      }),
+    ).toBe(false)
+  })
+  it('does NOT reopen when the kill switch is off (legacy: drop the label)', () => {
+    expect(
+      shouldReopenFeedAfterAck({
+        finalAnswerDelivered: true,
+        finalAnswerSubstantive: false,
+        enabled: false,
+      }),
+    ).toBe(false)
+  })
+  it('does NOT reopen when the final answer was never delivered (no reopen needed)', () => {
+    // The feed was never gated off — the normal append/drain path applies.
+    expect(
+      shouldReopenFeedAfterAck({
+        finalAnswerDelivered: false,
+        finalAnswerSubstantive: false,
+        enabled: true,
+      }),
+    ).toBe(false)
+    expect(
+      shouldReopenFeedAfterAck({
+        finalAnswerDelivered: false,
+        finalAnswerSubstantive: false,
+        enabled: false,
+      }),
+    ).toBe(false)
+  })
+})
+describe('decideFeedReopen — tool_label branch outcome for a delivered turn', () => {
+  it('tool_label after a SHORT ACK (not substantive, kill switch ON) → reset + render proceeds', () => {
+    // The exact contract the gateway tool_label handler applies: the interim
+    // ack is reclassified — finalAnswerDelivered back to false, a FRESH feed
+    // message (activityMessageId null), last-sent render cleared so the drain
+    // re-sends. dropLabel false → the handler proceeds to append + drain.
+    const outcome = decideFeedReopen({
+      finalAnswerDelivered: true,
+      finalAnswerSubstantive: false,
+      enabled: true,
+    })
+    expect(outcome.dropLabel).toBe(false)
+    expect(outcome.reset).toEqual({
+      finalAnswerDelivered: false,
+      activityMessageId: null,
+      activityLastSentRender: null,
+    })
+  })
+  it('tool_label after a SUBSTANTIVE final → drops the label (the new guard, feed stays gated)', () => {
+    // Genuine final answer + post-answer housekeeping: NO reopen, NO reset.
+    // finalAnswerDelivered stays true so the silent-end re-prompt does not
+    // fire and the #2137 drain proceeds correctly.
+    const outcome = decideFeedReopen({
+      finalAnswerDelivered: true,
+      finalAnswerSubstantive: true,
+      enabled: true,
+    })
+    expect(outcome.dropLabel).toBe(true)
+    expect(outcome.reset).toBeUndefined()
+  })
+  it('kill switch OFF → drops the label (legacy early return, feed stays dark)', () => {
+    const outcome = decideFeedReopen({
+      finalAnswerDelivered: true,
+      finalAnswerSubstantive: false,
+      enabled: false,
+    })
+    expect(outcome.dropLabel).toBe(true)
+    expect(outcome.reset).toBeUndefined()
+  })
+  it('finalAnswerDelivered false → no reopen branch (handler never reaches it)', () => {
+    // The handler only calls decideFeedReopen inside `if (finalAnswerDelivered)`,
+    // but the predicate is total: a false flag yields dropLabel (no reset).
+    const outcome = decideFeedReopen({
+      finalAnswerDelivered: false,
+      finalAnswerSubstantive: false,
+      enabled: true,
+    })
+    expect(outcome.dropLabel).toBe(true)
+    expect(outcome.reset).toBeUndefined()
+  })
+})

package/telegram-plugin/tests/final-answer-detect.test.ts CHANGED Viewed

@@ -16,7 +16,11 @@
  */
 import { describe, it, expect } from 'vitest'
-import { isFinalAnswerReply, FINAL_ANSWER_MIN_CHARS } from '../final-answer-detect.js'
+import {
+  isFinalAnswerReply,
+  isSubstantiveFinalReply,
+  FINAL_ANSWER_MIN_CHARS,
+} from '../final-answer-detect.js'
 describe('isFinalAnswerReply — #1664 final-answer classification', () => {
   it('classifies a notification-bearing reply as the final answer', () => {
@@ -87,3 +91,65 @@ describe('isFinalAnswerReply — #1664 final-answer classification', () => {
     expect(FINAL_ANSWER_MIN_CHARS).toBe(200)
   })
 })
+describe('isSubstantiveFinalReply — feed-reopen ACK-ONLY distinction', () => {
+  // isSubstantiveFinalReply is isFinalAnswerReply MINUS the ping-only path.
+  // It tells "genuine final answer" (stream-done or ≥200 chars) apart from
+  // "final only because it pinged" (a short interim ack). The feed-reopen
+  // gate reopens only when finalAnswerDelivered && !substantive, so a real
+  // answer + post-answer housekeeping does NOT spuriously reopen / trip the
+  // silent-end re-prompt.
+  it('stream_reply done=true → substantive (closes the stream = the answer)', () => {
+    expect(
+      isSubstantiveFinalReply({ text: 'ok', disableNotification: true, done: true }),
+    ).toBe(true)
+  })
+  it('a reply at/over the length backstop → substantive', () => {
+    expect(
+      isSubstantiveFinalReply({
+        text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS),
+        disableNotification: true,
+      }),
+    ).toBe(true)
+    // One under the threshold, silent → not substantive.
+    expect(
+      isSubstantiveFinalReply({
+        text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS - 1),
+        disableNotification: true,
+      }),
+    ).toBe(false)
+  })
+  it('a short PINGING reply is final but NOT substantive (the ack case)', () => {
+    // The crux: isFinalAnswerReply says true (it pings), but this is the
+    // ack the feed-reopen gate must treat as reopen-eligible — NOT a real
+    // answer. So isSubstantiveFinalReply must say false.
+    expect(
+      isFinalAnswerReply({ text: 'on it, checking Brevo…', disableNotification: false }),
+    ).toBe(true)
+    expect(
+      isSubstantiveFinalReply({ text: 'on it, checking Brevo…', disableNotification: false }),
+    ).toBe(false)
+  })
+  it('a short SILENT interim reply is neither final nor substantive', () => {
+    expect(
+      isFinalAnswerReply({ text: 'thinking…', disableNotification: true }),
+    ).toBe(false)
+    expect(
+      isSubstantiveFinalReply({ text: 'thinking…', disableNotification: true }),
+    ).toBe(false)
+  })
+  it('a long reply is substantive regardless of the ping flag', () => {
+    const longText = 'x'.repeat(FINAL_ANSWER_MIN_CHARS)
+    expect(
+      isSubstantiveFinalReply({ text: longText, disableNotification: false }),
+    ).toBe(true)
+    expect(
+      isSubstantiveFinalReply({ text: longText, disableNotification: true }),
+    ).toBe(true)
+  })
+})

package/telegram-plugin/tests/obligation-ledger.test.ts ADDED Viewed

@@ -0,0 +1,167 @@
+import { describe, it, expect } from "vitest";
+import {
+  ObligationLedger,
+  buildObligationRepresentInbound,
+  obligationEscalationText,
+  type Obligation,
+} from "../gateway/obligation-ledger.js";
+function input(id: string, openedAt: number, text = "do the thing") {
+  return { originTurnId: id, chatId: "-100123", threadId: 3, messageId: Number(id.split("#").pop() ?? 0), text, openedAt };
+}
+describe("ObligationLedger", () => {
+  it("opens, reports open, and closes by origin id", () => {
+    const L = new ObligationLedger();
+    expect(L.hasOpen()).toBe(false);
+    expect(L.openIfAbsent(input("c:3#715", 1000))).toBe(true);
+    expect(L.hasOpen()).toBe(true);
+    expect(L.isOpen("c:3#715")).toBe(true);
+    expect(L.close("c:3#715")).toBe(true);
+    expect(L.hasOpen()).toBe(false);
+    expect(L.close("c:3#715")).toBe(false); // already closed
+  });
+  it("openIfAbsent is idempotent — buffer-then-enqueue opens once, keeps the first", () => {
+    const L = new ObligationLedger();
+    expect(L.openIfAbsent(input("c:3#715", 1000, "first"))).toBe(true);
+    expect(L.openIfAbsent(input("c:3#715", 2000, "second"))).toBe(false);
+    expect(L.size()).toBe(1);
+    expect(L.list()[0].text).toBe("first");
+    expect(L.list()[0].openedAt).toBe(1000);
+  });
+  it("close(null/undefined) is a safe no-op", () => {
+    const L = new ObligationLedger();
+    L.openIfAbsent(input("c:3#715", 1000));
+    expect(L.close(null)).toBe(false);
+    expect(L.close(undefined)).toBe(false);
+    expect(L.hasOpen()).toBe(true);
+  });
+  it("decideAtIdle returns 'none' when nothing is open", () => {
+    expect(new ObligationLedger().decideAtIdle()).toEqual({ action: "none" });
+  });
+  it("decideAtIdle picks the OLDEST open obligation to re-present", () => {
+    const L = new ObligationLedger();
+    L.openIfAbsent(input("c:3#715", 2000));
+    L.openIfAbsent(input("c:4#690", 1000)); // older
+    const d = L.decideAtIdle();
+    expect(d.action).toBe("represent");
+    expect(d.obligation?.originTurnId).toBe("c:4#690");
+  });
+  it("re-presents up to maxRepresents, then escalates (no infinite loop)", () => {
+    const L = new ObligationLedger(2); // max 2 represents
+    L.openIfAbsent(input("c:3#715", 1000));
+    // represent #1
+    expect(L.decideAtIdle().action).toBe("represent");
+    expect(L.markRepresented("c:3#715")).toBe(1);
+    // represent #2
+    expect(L.decideAtIdle().action).toBe("represent");
+    expect(L.markRepresented("c:3#715")).toBe(2);
+    // now exhausted → escalate
+    const d = L.decideAtIdle();
+    expect(d.action).toBe("escalate");
+    expect(d.obligation?.originTurnId).toBe("c:3#715");
+    // caller closes on escalate → ledger empties (no loop)
+    expect(L.close("c:3#715")).toBe(true);
+    expect(L.decideAtIdle().action).toBe("none");
+  });
+  it("the 715 scenario: open at receipt, NOT closed by an unrelated reply, re-presented", () => {
+    const L = new ObligationLedger();
+    // 713 (video, topic 635) and 715 (Meta report, topic 3) both open
+    L.openIfAbsent(input("c:635#713", 1000, "video swap task"));
+    L.openIfAbsent(input("c:3#715", 1100, "do the Meta report"));
+    // 713 gets a substantive reply resolving to its origin → close 713 only
+    expect(L.close("c:635#713")).toBe(true);
+    // 715 was only verbally deferred (no reply resolved to it) → still OPEN
+    expect(L.isOpen("c:3#715")).toBe(true);
+    // at idle, 715 is re-presented (this is the fix — the drop becomes a re-ask)
+    const d = L.decideAtIdle();
+    expect(d.action).toBe("represent");
+    expect(d.obligation?.originTurnId).toBe("c:3#715");
+  });
+  it("markRepresented on an unknown/closed id is a harmless 0", () => {
+    const L = new ObligationLedger();
+    expect(L.markRepresented("nope")).toBe(0);
+  });
+  describe("resolveCloseTarget — deterministic, holds for any model behavior", () => {
+    it("an echoed origin is authoritative (closes exactly that)", () => {
+      const L = new ObligationLedger();
+      L.openIfAbsent(input("c:635#713", 1000));
+      L.openIfAbsent(input("c:3#715", 1100));
+      // model echoed 713 while live turn is 715 → close 713, NOT the live turn
+      expect(L.resolveCloseTarget("c:635#713", "c:3#715")).toBe("c:635#713");
+    });
+    it("no echo + exactly ONE open → close the live turn (unambiguous)", () => {
+      const L = new ObligationLedger();
+      L.openIfAbsent(input("c:3#715", 1100));
+      expect(L.resolveCloseTarget(undefined, "c:3#715")).toBe("c:3#715");
+    });
+    it("no echo + MULTIPLE open → close NOTHING (never wrong-close/drop)", () => {
+      const L = new ObligationLedger();
+      L.openIfAbsent(input("c:635#713", 1000));
+      L.openIfAbsent(input("c:3#715", 1100));
+      // the marko race: 713's un-echoed reply lands while currentTurn=715.
+      // Closing 715 would silently drop it → resolveCloseTarget refuses.
+      expect(L.resolveCloseTarget(undefined, "c:3#715")).toBeNull();
+      expect(L.isOpen("c:3#715")).toBe(true); // 715 stays open → re-presented
+    });
+    it("no echo + live turn not an open obligation → null", () => {
+      const L = new ObligationLedger();
+      L.openIfAbsent(input("c:3#715", 1100));
+      expect(L.resolveCloseTarget(undefined, "c:9#999")).toBeNull();
+    });
+  });
+});
+describe("buildObligationRepresentInbound", () => {
+  const ob: Obligation = {
+    originTurnId: "-100123:3#715",
+    chatId: "-100123",
+    threadId: 3,
+    messageId: 715,
+    text: "do the Meta report",
+    openedAt: 1000,
+    representCount: 0,
+  };
+  it("carries the original message_id + origin_turn_id so the reply resolves back", () => {
+    const m = buildObligationRepresentInbound(ob, 5000);
+    expect(m.type).toBe("inbound");
+    expect(m.chatId).toBe("-100123");
+    expect(m.threadId).toBe(3);
+    expect(m.messageId).toBe(715); // ORIGINAL id → reply-quote + origin routing
+    expect(m.meta.origin_turn_id).toBe("-100123:3#715");
+    expect(m.meta.source).toBe("obligation_represent"); // synthetic → not tracked, no new obligation
+    expect(m.meta.represent_count).toBe("1");
+    expect(m.text).toContain("do the Meta report");
+    expect(m.text).toMatch(/answer it now|reply tool/i);
+  });
+  it("omits threadId for a DM obligation", () => {
+    const m = buildObligationRepresentInbound({ ...ob, threadId: undefined }, 5000);
+    expect(m.threadId).toBeUndefined();
+  });
+  it("truncates a long original to ~200 chars", () => {
+    const long = "x".repeat(500);
+    const m = buildObligationRepresentInbound({ ...ob, text: long }, 5000);
+    expect(m.text).toContain("…");
+    expect(m.text).not.toContain("x".repeat(201));
+  });
+  it("escalation text names the message and asks to re-send", () => {
+    expect(obligationEscalationText(ob)).toMatch(/missed|not sure/i);
+    expect(obligationEscalationText(ob)).toContain("do the Meta report");
+    expect(obligationEscalationText(ob)).toMatch(/re-?send/i);
+  });
+});

package/telegram-plugin/tests/tool-activity-summary.test.ts CHANGED Viewed

@@ -146,6 +146,33 @@ describe("appendActivityLine + renderActivityFeed — accumulating activity feed
   it("final defaults false (live render keeps the → in-progress newest line)", () => {
     expect(renderActivityFeed(["Reading a.ts"])).toBe("<b>→ Reading a.ts</b>");
   });
+  // liveSuffix (PR1 heartbeat): appended INSIDE the newest in-progress line so a
+  // long single step visibly advances ("→ Pulling Meta data · 18s") even though
+  // the feed is pull-only and no new tool label arrived.
+  describe("liveSuffix (heartbeat)", () => {
+    it("appends the suffix to the newest in-progress line only", () => {
+      expect(renderActivityFeed(["Reading a.ts", "Running a command"], false, " · 18s")).toBe(
+        "<i>✓ Reading a.ts</i>\n<b>→ Running a command · 18s</b>",
+      );
+    });
+    it("single live line gets the suffix", () => {
+      expect(renderActivityFeed(["Pulling Meta data"], false, " · 1m05s")).toBe(
+        "<b>→ Pulling Meta data · 1m05s</b>",
+      );
+    });
+    it("final=true ignores the suffix (a finalized record never ticks)", () => {
+      const out = renderActivityFeed(["Reading a.ts", "Running a command"], true, " · 18s")!;
+      expect(out).not.toContain("·");
+      expect(out).not.toContain("→");
+      expect(out).toBe("<i>✓ Reading a.ts</i>\n<i>✓ Running a command</i>");
+    });
+    it("default empty suffix is byte-identical to no suffix", () => {
+      expect(renderActivityFeed(["Reading a.ts"], false, "")).toBe(
+        renderActivityFeed(["Reading a.ts"]),
+      );
+    });
+  });
 });
 describe("appendActivityLabel — precomputed label feed (tool_label path)", () => {
@@ -223,6 +250,23 @@ describe("renderActivityFeedWithNested — foreground sub-agent nesting (Model A
     );
   });
+  it("liveSuffix (heartbeat) lands on the nested newest in-progress step", () => {
+    const out = renderActivityFeedWithNested(
+      ["Delegating: x"],
+      ["Reading schema.ts", "Looking for foreign keys"],
+      false,
+      " · 22s",
+    )!;
+    expect(out).toContain("   ↳ <b>→ Looking for foreign keys · 22s</b>");
+    expect(out).not.toContain("Reading schema.ts · "); // only the newest line ticks
+  });
+  it("liveSuffix passes through to the flat render when there are no children", () => {
+    expect(renderActivityFeedWithNested(["Reading a.ts"], [], false, " · 9s")).toBe(
+      "<b>→ Reading a.ts · 9s</b>",
+    );
+  });
   // Pins the invariant the gateway's foreground handoff-clear path relies on:
   // on an ack-first turn the parent feed is empty (mirrorLines=[]) and the only
   // content is the foreground sub-agent's nested narrative. The finalized

package/telegram-plugin/tool-activity-summary.ts CHANGED Viewed

@@ -200,7 +200,11 @@ function escapeFeedHtml(s: string): string {
  * `✓ +N earlier…` header when the turn ran longer. Returns null when empty.
  * Callers send the result verbatim — do NOT re-escape or re-wrap it.
  */
-export function renderActivityFeed(lines: string[], final = false): string | null {
+export function renderActivityFeed(
+  lines: string[],
+  final = false,
+  liveSuffix = "",
+): string | null {
   if (lines.length === 0) return null;
   const shown = lines.slice(-MIRROR_MAX_LINES);
   const hidden = lines.length - shown.length;
@@ -210,10 +214,14 @@ export function renderActivityFeed(lines: string[], final = false): string | nul
   // Newest line = in-progress step (bold, →); earlier = done (italic, ✓).
   // `final` (turn complete, feed left as a record): ALL lines render done (✓)
   // so the persisted message doesn't freeze on a misleading "→ in-progress".
+  // `liveSuffix` (heartbeat): appended INSIDE the newest in-progress line only
+  // (e.g. " · 18s") so the feed visibly advances during a long single step that
+  // emits no new tool label — the feed is otherwise pull-only and freezes.
+  // Caller passes framework-generated, HTML-safe text; never final + suffix.
   // Returns ready Telegram HTML — callers must NOT re-escape or re-wrap it.
   shown.forEach((l, i) => {
     const esc = escapeFeedHtml(l);
-    out.push(i === lastIdx && !final ? `<b>→ ${esc}</b>` : `<i>✓ ${esc}</i>`);
+    out.push(i === lastIdx && !final ? `<b>→ ${esc}${liveSuffix}</b>` : `<i>✓ ${esc}</i>`);
   });
   return out.join("\n");
 }
@@ -248,9 +256,10 @@ export function renderActivityFeedWithNested(
   lines: string[],
   childLines: string[],
   final = false,
+  liveSuffix = "",
 ): string | null {
   const children = childLines.map((s) => s.trim()).filter((s) => s.length > 0);
-  if (children.length === 0) return renderActivityFeed(lines, final);
+  if (children.length === 0) return renderActivityFeed(lines, final, liveSuffix);
   const out: string[] = [];
   const shownParent = lines.slice(-MIRROR_MAX_LINES);
@@ -264,12 +273,13 @@ export function renderActivityFeedWithNested(
   const lastChildIdx = shownChild.length - 1;
   // `final`: the nested newest step also renders done (✓) so the left-behind
   // feed reads as completed, not stuck on a "→ in-progress" child step.
+  // `liveSuffix` (heartbeat): appended to the nested newest in-progress step.
   shownChild.forEach((l, i) => {
     const t = l.length > NESTED_LINE_MAX ? l.slice(0, NESTED_LINE_MAX - 1) + "…" : l;
     const esc = escapeFeedHtml(t);
     out.push(
       i === lastChildIdx && !final
-        ? `${NESTED_PREFIX}<b>→ ${esc}</b>`
+        ? `${NESTED_PREFIX}<b>→ ${esc}${liveSuffix}</b>`
         : `${NESTED_PREFIX}<i>${esc}</i>`,
     );
   });