npm - @polderlabs/bizar-plugin - Versions diffs - 0.6.0 → 0.6.2 - Mend

@polderlabs/bizar-plugin 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +1 -1
package/index.ts +123 -16
package/package.json +1 -1
package/src/background-state.ts +41 -0
package/src/background.ts +147 -11
package/src/commands-impl.ts +4 -4
package/src/commands.ts +278 -101
package/src/reasoning-clean.ts +360 -0
package/src/serve.ts +12 -3
package/src/tools/bg-spawn.ts +21 -1
package/tests/attach-handler-bug.test.ts +5 -3
package/tests/background-state.test.ts +1 -1
package/tests/background.test.ts +1 -1
package/tests/block.test.ts +3 -1
package/tests/canonical-key-order.test.ts +11 -7
package/tests/event.test.ts +1 -1
package/tests/fingerprint.test.ts +22 -21
package/tests/http-client.test.ts +5 -3
package/tests/options.test.ts +10 -8
package/tests/settings.test.ts +2 -2
package/tests/stall-think.test.ts +13 -12
package/tests/state.test.ts +2 -1
package/tests/tools/bg-spawn.test.ts +12 -12
package/tests/update-deadlock.test.ts +1 -1

package/src/reasoning-clean.ts ADDED Viewed

@@ -0,0 +1,360 @@
+/**
+ * plugins/bizar/src/reasoning-clean.ts
+ *
+ * Wraps a provider's `fetch` to strip inline ``...</think>`` blocks from
+ * `message.content` / `delta.content` when the response also includes
+ * structured reasoning (`reasoning`, `reasoning_details`, or
+ * `reasoning_content`).
+ *
+ * Why this exists
+ * ───────────────
+ * Some reasoning models (e.g. MiniMax M3 via OpenRouter) emit their chain
+ * of thought BOTH:
+ *   1. In the structured `reasoning` / `reasoning_details` field, which
+ *      opencode already extracts and renders as a separate "thought"
+ *      chunk, AND
+ *   2. Inlined in `content` as `` blocks, which opencode would also
+ *      render as plain text — producing the duplicate "Thought: … + the
+ *      same text in the assistant message" the user sees.
+ *
+ * opencode's openrouter-specific SDK does not strip the inline think
+ * blocks from `content`. The opencode-level `interleaved` config that
+ * could solve this only applies to the `@ai-sdk/openai-compatible` SDK.
+ * Wrapping `provider.options.fetch` in the `config` hook is the only
+ * hook surface where the response body can be post-processed.
+ *
+ * Behaviour
+ * ─────────
+ * • Only `POST` requests whose URL ends with `/chat/completions` are
+ *   intercepted. Other requests pass through untouched.
+ * • Non-streaming responses (`Content-Type: application/json`) are parsed,
+ *   mutated, and re-serialised.
+ * • Streaming responses (`Content-Type: text/event-stream`) are piped
+ *   through a `TransformStream` that buffers content across chunks and
+ *   drops anything between a complete ` pair, using a tiny state
+ *   machine so chunks that split a marker mid-stream are handled.
+ * • If parsing or rewriting fails for any reason, the original response
+ *   is forwarded unchanged — this wrapper must never break a chat.
+ */
+const THINK_OPEN = "<think>" as const;
+const THINK_CLOSE = "</think>" as const;
+type FetchLike = (input: Parameters<typeof fetch>[0], init?: RequestInit) => Promise<Response>;
+export interface ReasoningCleanOptions {
+  /** Extra logger for debug lines; defaults to no-op. */
+  debug?: (msg: string) => void;
+  /**
+   * Provider ids whose responses should be cleaned. Defaults to the set
+   * known to exhibit the duplicated-think pattern: openrouter and minimax.
+   */
+  providers?: string[];
+}
+const DEFAULT_PROVIDERS = new Set(["openrouter", "minimax"]);
+/**
+ * Strip ``...</think>`` blocks from a plain string. Used for
+ * non-streaming responses (or for accumulated streamed content).
+ *
+ * The trailing whitespace after `</think>` is also consumed so the
+ * cleaned content does not start with an extra blank line.
+ */
+export function stripInlineThinkBlocks(content: string): string {
+  return content.replace(/<think>[\s\S]*?<\/think>\s*/g, "");
+}
+/**
+ * Stream-level state machine: feed it the content deltas in order; it
+ * yields the content that should be forwarded to the caller.
+ */
+class ThinkStripper {
+  private state: "NORMAL" | "IN_THINK" = "NORMAL";
+  // Buffer of characters that may be the start of a marker but are not
+  // yet complete. Holds at most max(THINK_OPEN.length, THINK_CLOSE.length)
+  // characters from a chunk boundary.
+  private pending = "";
+  push(chunk: string): string {
+    if (chunk.length === 0) return "";
+    let input = this.pending + chunk;
+    this.pending = "";
+    let out = "";
+    while (input.length > 0) {
+      if (this.state === "NORMAL") {
+        const idx = input.indexOf(THINK_OPEN);
+        if (idx === -1) {
+          // No open marker; might have a partial at the tail.
+          const tail = keepPartialTail(input, [THINK_OPEN]);
+          out += input.slice(0, input.length - tail.length);
+          this.pending = tail;
+          input = "";
+          break;
+        }
+        out += input.slice(0, idx);
+        input = input.slice(idx + THINK_OPEN.length);
+        this.state = "IN_THINK";
+      } else {
+        // IN_THINK
+        const idx = input.indexOf(THINK_CLOSE);
+        if (idx === -1) {
+          // Still inside a think block; might have a partial close at tail.
+          const tail = keepPartialTail(input, [THINK_CLOSE]);
+          // Discard everything except the possible partial tail.
+          this.pending = tail;
+          input = "";
+          break;
+        }
+        input = input.slice(idx + THINK_CLOSE.length);
+        this.state = "NORMAL";
+        // Drop any whitespace that immediately follows the close tag so
+        // the next emitted content does not start with extra blank lines.
+        const wsMatch = input.match(/^\s*/);
+        if (wsMatch) input = input.slice(wsMatch[0].length);
+      }
+    }
+    return out;
+  }
+  flush(): string {
+    // If the stream ended while still inside a think block (malformed
+    // response), emit any pending tail rather than swallowing it.
+    const tail = this.pending;
+    this.pending = "";
+    if (this.state === "IN_THINK") {
+      this.state = "NORMAL";
+      return tail;
+    }
+    return tail;
+  }
+}
+/**
+ * Of a string, return the longest suffix that is a prefix of one of the
+ * given markers. Used to defer deciding whether a chunk ends in a real
+ * marker until the next chunk arrives.
+ */
+function keepPartialTail(input: string, markers: readonly string[]): string {
+  const max = Math.max(...markers.map((m) => m.length));
+  const start = Math.max(0, input.length - max);
+  const window = input.slice(start);
+  for (let len = Math.min(max, window.length); len > 0; len--) {
+    const candidate = window.slice(0, len);
+    if (markers.some((m) => m.startsWith(candidate))) return candidate;
+  }
+  return "";
+}
+/**
+ * Decide whether the URL targets one of the providers we should clean.
+ * The provider id may appear in the hostname (e.g. `openrouter.ai`)
+ * rather than as a path segment, so we match against the full URL.
+ */
+function targetsProvider(url: string, providers: Set<string>): string | null {
+  const lower = url.toLowerCase();
+  for (const p of providers) {
+    const lp = p.toLowerCase();
+    if (lower.includes(`/${lp}/`) || lower.includes(`/${lp}?`) || lower.includes(`${lp}.`) || lower.includes(`-${lp}.`) || lower.includes(`.${lp}/`)) {
+      return p;
+    }
+  }
+  return null;
+}
+/**
+ * Return true if a request body looks like an OpenAI-compatible chat
+ * completions request (so we know whether to inspect the response).
+ */
+function isChatCompletionsRequest(url: string, init?: RequestInit): boolean {
+  if (!/\/chat\/completions(?:\?|$)/.test(url)) return false;
+  const method = (init?.method ?? "POST").toUpperCase();
+  return method === "POST";
+}
+/**
+ * Process one non-streaming JSON response: strip inline think blocks
+ * from `choices[*].message.content`. Returns the original text on any
+ * parse error.
+ */
+function cleanNonStreamingJson(text: string): string {
+  const data = JSON.parse(text);
+  const choices = Array.isArray(data?.choices) ? data.choices : [];
+  let touched = false;
+  for (const choice of choices) {
+    const msg = choice?.message;
+    if (msg && typeof msg.content === "string" && msg.content.includes(THINK_OPEN)) {
+      const cleaned = stripInlineThinkBlocks(msg.content);
+      if (cleaned !== msg.content) {
+        msg.content = cleaned;
+        touched = true;
+      }
+    }
+  }
+  return touched ? JSON.stringify(data) : text;
+}
+/**
+ * Process one SSE event line of the form `data: <payload>`. Mutates the
+ * decoded payload in place to strip inline think blocks from
+ * `choices[*].delta.content`, using a per-message `ThinkStripper` so
+ * content split across chunks is still handled correctly.
+ *
+ * `strippers` is an array keyed by choice index — each choice maintains
+ * its own stripper across multiple events.
+ */
+function cleanSseLine(
+  line: string,
+  strippers: ThinkStripper[],
+  debug?: (msg: string) => void,
+): string {
+  if (!line.startsWith("data:")) return line;
+  const payload = line.slice(5).trimStart();
+  if (payload === "[DONE]") {
+    // Flush any in-flight strippers so we don't lose content that was
+    // waiting on a chunk boundary.
+    return line;
+  }
+  let obj: any;
+  try {
+    obj = JSON.parse(payload);
+  } catch {
+    return line;
+  }
+  const choices = Array.isArray(obj?.choices) ? obj.choices : [];
+  for (let i = 0; i < choices.length; i++) {
+    const delta = choices[i]?.delta;
+    if (!delta || typeof delta.content !== "string" || delta.content.length === 0) continue;
+    let stripper = strippers[i];
+    if (!stripper) {
+      stripper = new ThinkStripper();
+      strippers[i] = stripper;
+    }
+    const cleaned = stripper.push(delta.content);
+    if (cleaned.length > 0) {
+      delta.content = cleaned;
+    } else {
+      // Avoid sending an empty content delta — some relays reject them.
+      delete delta.content;
+    }
+    if (choices[i]?.finish_reason) {
+      // End of this choice: flush the stripper so any pending partial
+      // marker becomes part of the final delta.
+      const flushed = stripper.flush();
+      if (flushed.length > 0) {
+        delta.content = (delta.content ?? "") + flushed;
+      } else if (delta.content === undefined) {
+        // Keep the choice delta well-formed even if nothing is left.
+        delta.content = "";
+      }
+    }
+  }
+  debug?.(`reasoning-clean: rewrote SSE line ${payload.length}B`);
+  return "data: " + JSON.stringify(obj);
+}
+/**
+ * Transform an SSE response stream: parse each `data:` line, strip inline
+ * think blocks, and re-emit the bytes.
+ */
+function streamTransformer(debug?: (msg: string) => void): TransformStream<Uint8Array, Uint8Array> {
+  const decoder = new TextDecoder("utf-8");
+  const encoder = new TextEncoder();
+  const strippers: ThinkStripper[] = [];
+  let buffer = "";
+  return new TransformStream<Uint8Array, Uint8Array>({
+    transform(chunk, controller) {
+      buffer += decoder.decode(chunk, { stream: true });
+      // SSE events are separated by a blank line ("\n\n").
+      let boundary = buffer.indexOf("\n\n");
+      while (boundary !== -1) {
+        const event = buffer.slice(0, boundary);
+        buffer = buffer.slice(boundary + 2);
+        const rewritten = event
+          .split("\n")
+          .map((line) => cleanSseLine(line, strippers, debug))
+          .join("\n");
+        controller.enqueue(encoder.encode(rewritten + "\n\n"));
+        boundary = buffer.indexOf("\n\n");
+      }
+    },
+    flush(controller) {
+      // Flush any leftover SSE event at end of stream.
+      const tail = buffer + decoder.decode();
+      if (tail.length > 0) {
+        const rewritten = tail
+          .split("\n")
+          .map((line) => cleanSseLine(line, strippers, debug))
+          .join("\n");
+        controller.enqueue(encoder.encode(rewritten));
+      }
+    },
+  });
+}
+/**
+ * Wrap a fetch implementation so that responses from the listed
+ * providers have inline ``...</think>`` blocks stripped from the
+ * content while preserving the structured reasoning fields. Returns a
+ * function with the same signature as the original fetch.
+ */
+export function wrapFetchForReasoningCleanup(
+  originalFetch: FetchLike,
+  options: ReasoningCleanOptions = {},
+): FetchLike {
+  const providers = options.providers
+    ? new Set(options.providers)
+    : DEFAULT_PROVIDERS;
+  const debug = options.debug;
+  return async (input, init) => {
+    const url =
+      typeof input === "string"
+        ? input
+        : input instanceof URL
+          ? input.toString()
+          : (input as Request).url;
+    // Resolve which provider this call is going to. If we can't tell, pass through.
+    const providerHit = targetsProvider(url, providers);
+    if (!isChatCompletionsRequest(url, init)) {
+      return originalFetch(input, init);
+    }
+    if (!providerHit) {
+      return originalFetch(input, init);
+    }
+    let response: Response;
+    try {
+      response = await originalFetch(input, init);
+    } catch (err) {
+      debug?.(`reasoning-clean: fetch threw, passing through: ${(err as Error).message}`);
+      throw err;
+    }
+    const ct = response.headers.get("content-type") ?? "";
+    if (ct.includes("text/event-stream")) {
+      const body = response.body;
+      if (!body) return response;
+      const transformed = body.pipeThrough(streamTransformer(debug));
+      return new Response(transformed, {
+        status: response.status,
+        statusText: response.statusText,
+        headers: response.headers,
+      });
+    }
+    // Non-streaming JSON.
+    try {
+      const text = await response.text();
+      const cleaned = cleanNonStreamingJson(text);
+      if (cleaned === text) return response;
+      return new Response(cleaned, {
+        status: response.status,
+        statusText: response.statusText,
+        headers: response.headers,
+      });
+    } catch (err) {
+      debug?.(`reasoning-clean: clean failed, passing through: ${(err as Error).message}`);
+      return response;
+    }
+  };
+}

package/src/serve.ts CHANGED Viewed

@@ -229,13 +229,19 @@ export class ServeLifecycle {
   /**
    * Graceful stop: SIGTERM, wait up to 5s, then SIGKILL. Idempotent.
+   *
+   * Cross-platform note: Bun's `Subprocess.kill()` without an explicit
+   * signal maps to the platform-appropriate default (`SIGTERM` on
+   * POSIX, `TerminateProcess` on Windows). The forced-kill phase drops
+   * the signal argument for the same reason, so the same code works
+   * on both Windows and Linux/macOS without a platform branch.
    */
   async stop(): Promise<void> {
     const proc = this._proc;
     if (proc === null) return;
     this._intentionalShutdown = true;
     try {
-      proc.kill("SIGTERM");
+      proc.kill();
     } catch {
       // already dead
     }
@@ -243,7 +249,7 @@ export class ServeLifecycle {
       await withTimeout(proc.exited, 5_000);
     } catch {
       try {
-        proc.kill("SIGKILL");
+        proc.kill();
       } catch {
         // ignore
       }
@@ -344,7 +350,10 @@ export class ServeLifecycle {
     const proc = this._proc;
     if (proc !== null) {
       try {
-        proc.kill("SIGKILL");
+        // No signal — Bun maps the default to the platform-appropriate
+        // forced termination (SIGKILL on POSIX, TerminateProcess on
+        // Windows).
+        proc.kill();
       } catch {
         // ignore
       }

package/src/tools/bg-spawn.ts CHANGED Viewed

@@ -92,6 +92,19 @@ export function createBgSpawnTool(deps: BgSpawnDeps) {
         .positive()
         .optional()
         .describe("Collect-time timeout in ms (1s..30min, default 5min)."),
+      persistent: z
+        .boolean()
+        .optional()
+        .default(false)
+        .describe("When true, auto-restart on terminal failure (up to maxRestarts)."),
+      maxRestarts: z
+        .number()
+        .int()
+        .min(1)
+        .max(10)
+        .optional()
+        .default(3)
+        .describe("Number of auto-restart attempts before giving up."),
     },
     execute: async (rawArgs, ctx) => {
       // 1. Odin-only (MEDIUM-26).
@@ -109,6 +122,8 @@ export function createBgSpawnTool(deps: BgSpawnDeps) {
         prompt: string;
         model?: string;
         timeoutMs?: number;
+        persistent?: boolean;
+        maxRestarts?: number;
       };
       // 2. Validate the model parameter (LOW-34 / §1.4).
@@ -118,7 +133,7 @@ export function createBgSpawnTool(deps: BgSpawnDeps) {
         if (m === null) {
           return {
             output: JSON.stringify({
-              error: `model must be in "providerID/modelID" format (e.g. "minimax/MiniMax-M3"). Omit to use the agent's default.`,
+              error: `model must be in "providerID/modelID" format (e.g. "openrouter/minimax-m3"). Omit to use the agent's default.`,
             }),
           };
         }
@@ -146,10 +161,15 @@ export function createBgSpawnTool(deps: BgSpawnDeps) {
           ? `${modelOverride.providerID}/${modelOverride.modelID}`
           : "agent-default",
         promptPreview: args.prompt.slice(0, 200),
+        prompt: args.prompt, // store full prompt for restart support
         parentAgent: ctx.agent,
         logPath: buildLogPath(deps.worktree, instanceId),
         timeoutMs,
         toolCallCount: 0,
+        // v0.5.5 — persistent auto-restart
+        persistent: args.persistent ?? false,
+        maxRestarts: args.maxRestarts ?? 3,
+        restartCount: 0,
       };
       const addRes = await deps.instanceManager.add(draft);
       if (addRes === "cap_reached") {

package/tests/attach-handler-bug.test.ts CHANGED Viewed

@@ -14,6 +14,8 @@
  */
 import { describe, it, expect, beforeEach } from "bun:test";
+import os from "node:os";
+import path from "node:path";
 // --- Real InstanceManager (the one under test) ----------------------------
@@ -86,14 +88,14 @@ function makeDraft(overrides: Partial<BackgroundState> = {}): BackgroundState {
     agent: "mimir",
     status: "pending",
     startedAt: Date.now(),
-    model: "minimax/MiniMax-M3",
+    model: "openrouter/minimax-m3",
     promptPreview: "test",
     resultPreview: undefined,
     resultMessageIds: [],
     error: undefined,
     parentAgent: "odin",
     parentInstanceId: undefined,
-    logPath: "/tmp/test.log",
+    logPath: path.join(os.tmpdir(), "test.log"),
     timeoutMs: 300_000,
     toolCallCount: 0,
     loopGuardTool: undefined,
@@ -122,7 +124,7 @@ describe("InstanceManager.add — empty sessionId (BUGFIX v0.5.1)", () => {
         warn: () => {},
         error: () => {},
       } as never,
-      serve: { worktree: "/tmp" } as never,
+      serve: { worktree: os.tmpdir() } as never,
       http: {} as never,
       stream: stream as never,
       stallTimeoutMs: 180_000,

package/tests/background-state.test.ts CHANGED Viewed

@@ -40,7 +40,7 @@ function makeState(overrides: Partial<BackgroundState> = {}): BackgroundState {
     agent: "mimir",
     status: "running",
     startedAt: Date.now(),
-    model: "minimax/MiniMax-M3",
+    model: "openrouter/minimax-m3",
     promptPreview: "Do the thing",
     resultPreview: undefined,
     resultMessageIds: [],

package/tests/background.test.ts CHANGED Viewed

@@ -22,7 +22,7 @@ function makeBgState(overrides: Partial<BackgroundState> = {}): BackgroundState
     agent: "mimir",
     status: "pending",
     startedAt: Date.now(),
-    model: "minimax/MiniMax-M3",
+    model: "openrouter/minimax-m3",
     promptPreview: "Do the thing",
     resultPreview: undefined,
     resultMessageIds: [],

package/tests/block.test.ts CHANGED Viewed

@@ -17,6 +17,8 @@
  */
 import { describe, test, expect } from "bun:test";
+import os from "node:os";
+import path from "node:path";
 import { decide } from "../src/loop.js";
 import {
@@ -58,7 +60,7 @@ function emptyState(): SessionState {
 const FP = "fp:read:loop";
 const TOOL = "read";
-const ARGS = { path: "/tmp/example.txt" };
+const ARGS = { path: path.join(os.tmpdir(), "example.txt") };
 const NOW = 1_700_000_500_000;
 // For the block tests we need a window size that is at least as large as

package/tests/canonical-key-order.test.ts CHANGED Viewed

@@ -8,25 +8,29 @@
 import { describe, test, expect } from "bun:test";
 import { fingerprint } from "../src/fingerprint";
+import os from "node:os";
+import path from "node:path";
+const TMP = path.join(os.tmpdir(), "canonical-key-order-test");
 describe("fingerprint — canonical key order", () => {
   test("flat object: same keys/values in different insertion order produce the same fingerprint", () => {
     const a = {
       tool: "read",
-      args: { path: "/tmp/foo.ts", recursive: false, limit: 10 },
+      args: { path: path.join(os.tmpdir(), "foo.ts"), recursive: false, limit: 10 },
     };
     const b = {
       tool: "read",
-      args: { limit: 10, recursive: false, path: "/tmp/foo.ts" },
+      args: { limit: 10, recursive: false, path: path.join(os.tmpdir(), "foo.ts") },
     };
     // Same keys, same values, different insertion order — must match.
-    expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
+    expect(fingerprint(a.tool, a.args, TMP)).toBe(fingerprint(b.tool, b.args, TMP));
   });
   test("nested objects: different insertion order at both levels also match", () => {
     const a = { tool: "edit", args: { meta: { z: 1, a: 2 }, path: "/x" } };
     const b = { tool: "edit", args: { path: "/x", meta: { a: 2, z: 1 } } };
-    expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
+    expect(fingerprint(a.tool, a.args, TMP)).toBe(fingerprint(b.tool, b.args, TMP));
   });
   test("deeply nested: three levels of differing key order all resolve to same fingerprint", () => {
@@ -54,18 +58,18 @@ describe("fingerprint — canonical key order", () => {
         },
       },
     };
-    expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
+    expect(fingerprint(a.tool, a.args, TMP)).toBe(fingerprint(b.tool, b.args, TMP));
   });
   test("array order is preserved (arrays of same values in same order match)", () => {
     const a = { tool: "bash", args: { commands: ["echo a", "echo b"] } };
     const b = { tool: "bash", args: { commands: ["echo a", "echo b"] } };
-    expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
+    expect(fingerprint(a.tool, a.args, TMP)).toBe(fingerprint(b.tool, b.args, TMP));
   });
   test("array with different order produces different fingerprint", () => {
     const a = { tool: "bash", args: { commands: ["echo a", "echo b"] } };
     const b = { tool: "bash", args: { commands: ["echo b", "echo a"] } };
-    expect(fingerprint(a.tool, a.args, "/tmp")).not.toBe(fingerprint(b.tool, b.args, "/tmp"));
+    expect(fingerprint(a.tool, a.args, TMP)).not.toBe(fingerprint(b.tool, b.args, TMP));
   });
 });

package/tests/event.test.ts CHANGED Viewed

@@ -125,7 +125,7 @@ class MockPlugin {
 // ── Test setup ───────────────────────────────────────────────────────────────
-const TEST_DIR = "/tmp/bizar-event-test";
+const TEST_DIR = path.join(os.tmpdir(), "bizar-event-test");
 const TEST_SESSION = "session-evt-001";
 const TEST_SESSION_2 = "session-evt-002";