npm - baro-ai - Versions diffs - 0.42.1 → 0.43.0 - Mend

baro-ai 0.42.1 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli.mjs +330 -34
package/dist/cli.mjs.map +1 -1
package/dist/run-architect.mjs +80 -3
package/dist/run-architect.mjs.map +1 -1
package/dist/run-planner.mjs +113 -5
package/dist/run-planner.mjs.map +1 -1
package/package.json +1 -1

package/dist/cli.mjs CHANGED Viewed

@@ -10752,6 +10752,163 @@ function extractVerdictJson(text) {
   throw new Error(`unbalanced JSON object in critic response: ${trimmed.slice(0, 200)}`);
 }
+// ../baro-orchestrator/src/codex-one-shot.ts
+import { execFile as execFile3 } from "child_process";
+import { promisify as promisify3 } from "util";
+var execFileAsync2 = promisify3(execFile3);
+async function runCodexOneShot(opts) {
+  const args = ["exec", "--json"];
+  if (opts.skipGitRepoCheck) args.push("--skip-git-repo-check");
+  if (opts.bypassSandbox !== false) {
+    args.push("--dangerously-bypass-approvals-and-sandbox");
+  }
+  if (opts.model) args.push("--model", opts.model);
+  args.push(opts.prompt);
+  const { stdout } = await execFileAsync2(opts.codexBin ?? "codex", args, {
+    cwd: opts.cwd,
+    timeout: opts.timeoutMs ?? 18e4,
+    maxBuffer: opts.maxBuffer ?? 16 * 1024 * 1024
+  });
+  let result = "";
+  for (const rawLine of stdout.split("\n")) {
+    const line = rawLine.trim();
+    if (!line) continue;
+    let event;
+    try {
+      event = JSON.parse(line);
+    } catch {
+      continue;
+    }
+    if (event.type === "turn.completed") {
+      const usage = event.usage;
+      if (usage) {
+        process.stderr.write(
+          `[codex] usage: in=${usage.input_tokens ?? 0} out=${usage.output_tokens ?? 0}
+`
+        );
+      }
+      continue;
+    }
+    if (event.type !== "item.completed") continue;
+    const item = event.item;
+    if (!item) continue;
+    if (item.type === "agent_message" && typeof item.text === "string") {
+      result = result ? `${result}
+${item.text}` : item.text;
+    }
+  }
+  if (!result.trim()) {
+    throw new Error("runCodexOneShot: codex produced no agent_message");
+  }
+  return result;
+}
+// ../baro-orchestrator/src/participants/critic-codex.ts
+var CriticCodex = class extends BaseObserver {
+  opts;
+  emissions = /* @__PURE__ */ new Map();
+  turnCount = /* @__PURE__ */ new Map();
+  pending = /* @__PURE__ */ new Set();
+  constructor(opts) {
+    super();
+    this.opts = {
+      maxEmissionsPerAgent: opts.maxEmissionsPerAgent ?? 2,
+      model: opts.model,
+      codexBin: opts.codexBin ?? "codex",
+      timeoutMs: opts.timeoutMs ?? 6e4,
+      targets: opts.targets
+    };
+  }
+  /** Resolves once every in-flight evaluation has emitted its CritiqueItem. */
+  async idle() {
+    await Promise.allSettled([...this.pending]);
+  }
+  async onExternalEvent(_source, event) {
+    if (!AgentResult.is(event)) return;
+    const { agentId, isError, resultText } = event.data;
+    if (isError || !resultText) return;
+    const criteria = this.opts.targets.get(agentId);
+    if (!criteria || criteria.length === 0) return;
+    const turn = (this.turnCount.get(agentId) ?? 0) + 1;
+    this.turnCount.set(agentId, turn);
+    const work = (async () => {
+      const { verdict, reasoning, violatedCriteria } = await this.evaluate(
+        resultText,
+        criteria
+      );
+      const critiqueEvent = Critique.create({
+        agentId,
+        verdict,
+        reasoning,
+        violatedCriteria,
+        turn,
+        modelUsed: this.opts.model ?? "codex-default"
+      });
+      for (const env of this.getEnvironments()) {
+        env.deliverSemanticEvent(this, critiqueEvent);
+      }
+      if (verdict === "fail") {
+        const emitted = this.emissions.get(agentId) ?? 0;
+        if (emitted < this.opts.maxEmissionsPerAgent) {
+          this.emissions.set(agentId, emitted + 1);
+          const text = buildCorrectiveMessage(reasoning, violatedCriteria);
+          const msg = AgentTargetedMessage.create({
+            recipientId: agentId,
+            text,
+            metadata: {
+              criticTurn: turn,
+              emissionIndex: emitted + 1
+            }
+          });
+          for (const env of this.getEnvironments()) {
+            env.deliverSemanticEvent(this, msg);
+          }
+        }
+      }
+    })();
+    this.pending.add(work);
+    work.finally(() => {
+      this.pending.delete(work);
+    });
+    await work;
+  }
+  async evaluate(resultText, criteria) {
+    const userPrompt = buildEvalPrompt(criteria, resultText);
+    const prompt = `${VERDICT_SYSTEM_PROMPT}
+${userPrompt}`;
+    try {
+      const text = await runCodexOneShot({
+        prompt,
+        // Critic doesn't operate on the worktree — but Codex
+        // still insists on running inside a git repo unless we
+        // skip the check. Pass through skipGitRepoCheck so the
+        // critic can be invoked from anywhere (including baro's
+        // own cwd that may not be the story worktree).
+        cwd: process.cwd(),
+        skipGitRepoCheck: true,
+        bypassSandbox: true,
+        model: this.opts.model,
+        codexBin: this.opts.codexBin,
+        timeoutMs: this.opts.timeoutMs
+      });
+      const verdictJson = extractVerdictJson(text.trim());
+      const parsed = JSON.parse(verdictJson);
+      return {
+        verdict: parsed.verdict === "pass" ? "pass" : "fail",
+        reasoning: parsed.reasoning ?? "",
+        violatedCriteria: Array.isArray(parsed.violated_criteria) ? parsed.violated_criteria : []
+      };
+    } catch (err) {
+      return {
+        verdict: "fail",
+        reasoning: `CriticCodex LLM call failed: ${String(err?.message ?? err)}`,
+        violatedCriteria: ["[critic error \u2014 could not evaluate]"]
+      };
+    }
+  }
+};
 // ../baro-orchestrator/src/planning/openai-runtime.ts
 async function runInferenceRound(_context, _model) {
   throw new Error(
@@ -10927,9 +11084,9 @@ var CriticOpenAI = class extends BaseObserver {
 };
 // ../baro-orchestrator/src/participants/finalizer.ts
-import { execFile as execFile3 } from "child_process";
-import { promisify as promisify3 } from "util";
-var execFileAsync2 = promisify3(execFile3);
+import { execFile as execFile4 } from "child_process";
+import { promisify as promisify4 } from "util";
+var execFileAsync3 = promisify4(execFile4);
 var Finalizer = class extends BaseObserver {
   opts;
   envRef = null;
@@ -11178,7 +11335,7 @@ var Finalizer = class extends BaseObserver {
   async collectCommitsSinceBase() {
     if (!this.baseSha) return [];
     try {
-      const { stdout } = await execFileAsync2(
+      const { stdout } = await execFileAsync3(
         "git",
         ["log", `${this.baseSha}..HEAD`, "--pretty=format:%H%x09%s"],
         { cwd: this.opts.cwd }
@@ -11194,7 +11351,7 @@ var Finalizer = class extends BaseObserver {
   async collectFileStats() {
     if (!this.baseSha) return { created: 0, modified: 0 };
     try {
-      const { stdout } = await execFileAsync2(
+      const { stdout } = await execFileAsync3(
         "git",
         ["diff", "--name-status", this.baseSha, "HEAD"],
         { cwd: this.opts.cwd }
@@ -11213,7 +11370,7 @@ var Finalizer = class extends BaseObserver {
   }
   async detectBranch() {
     try {
-      const { stdout } = await execFileAsync2(
+      const { stdout } = await execFileAsync3(
         "git",
         ["branch", "--show-current"],
         { cwd: this.opts.cwd }
@@ -11225,7 +11382,7 @@ var Finalizer = class extends BaseObserver {
   }
   async detectDefaultBaseBranch() {
     try {
-      const { stdout } = await execFileAsync2(
+      const { stdout } = await execFileAsync3(
         "gh",
         ["repo", "view", "--json", "defaultBranchRef", "--jq", ".defaultBranchRef.name"],
         { cwd: this.opts.cwd }
@@ -11235,7 +11392,7 @@ var Finalizer = class extends BaseObserver {
     } catch {
     }
     try {
-      const { stdout } = await execFileAsync2(
+      const { stdout } = await execFileAsync3(
         "git",
         ["symbolic-ref", "--short", "refs/remotes/origin/HEAD"],
         { cwd: this.opts.cwd }
@@ -11323,7 +11480,7 @@ var Finalizer = class extends BaseObserver {
   }
   async hasGhBinary() {
     try {
-      await execFileAsync2("gh", ["--version"], { cwd: this.opts.cwd });
+      await execFileAsync3("gh", ["--version"], { cwd: this.opts.cwd });
       return true;
     } catch {
       return false;
@@ -11331,7 +11488,7 @@ var Finalizer = class extends BaseObserver {
   }
   async openPr(args) {
     try {
-      const { stdout } = await execFileAsync2(
+      const { stdout } = await execFileAsync3(
         "gh",
         [
           "pr",
@@ -11932,7 +12089,7 @@ var CodexCliParticipant = class _CodexCliParticipant extends BaseObserver {
     this.agentId = agentId;
     this.options = {
       codexBin: "codex",
-      fullAuto: false,
+      bypassSandbox: false,
       skipGitRepoCheck: false,
       ...opts
     };
@@ -12048,7 +12205,9 @@ var CodexCliParticipant = class _CodexCliParticipant extends BaseObserver {
   buildArgs() {
     const args = ["exec", "--json"];
     if (this.options.skipGitRepoCheck) args.push("--skip-git-repo-check");
-    if (this.options.fullAuto) args.push("--full-auto");
+    if (this.options.bypassSandbox) {
+      args.push("--dangerously-bypass-approvals-and-sandbox");
+    }
     if (this.options.model) args.push("--model", this.options.model);
     if (this.options.extraArgs?.length) args.push(...this.options.extraArgs);
     args.push(this.options.prompt);
@@ -12153,7 +12312,7 @@ var CodexStoryAgent = class extends BaseObserver {
       timeoutSecs: 600,
       retryDelayMs: 1500,
       hardTimeoutSecs: 0,
-      fullAuto: true,
+      bypassSandbox: true,
       skipGitRepoCheck: false,
       ...spec
     };
@@ -12275,7 +12434,7 @@ var CodexStoryAgent = class extends BaseObserver {
       cwd: this.spec.cwd,
       prompt: this.spec.prompt,
       model: this.spec.model,
-      fullAuto: this.spec.fullAuto,
+      bypassSandbox: this.spec.bypassSandbox,
       skipGitRepoCheck: this.spec.skipGitRepoCheck
     });
     this.currentCodex = codex;
@@ -13842,9 +14001,9 @@ var StoryFactory = class extends BaseObserver {
 };
 // ../baro-orchestrator/src/participants/surgeon.ts
-import { execFile as execFile4 } from "child_process";
-import { promisify as promisify4 } from "util";
-var execFileAsync3 = promisify4(execFile4);
+import { execFile as execFile5 } from "child_process";
+import { promisify as promisify5 } from "util";
+var execFileAsync4 = promisify5(execFile5);
 var SURGEON_SYSTEM_PROMPT = `You are the Surgeon \u2014 an autonomous planner that adapts a software-project
 DAG when stories fail. Given:
 1. A snapshot of the current PRD (project, story list with dependencies +
@@ -13950,7 +14109,7 @@ var Surgeon = class extends BaseObserver {
     const snap = this.opts.snapshot();
     const prompt = buildSurgeonPrompt(snap, failure);
     try {
-      const { stdout } = await execFileAsync3(
+      const { stdout } = await execFileAsync4(
         this.opts.claudeBin,
         [
           "--print",
@@ -14051,6 +14210,85 @@ function surgeonDeterministicReplan(failure) {
   };
 }
+// ../baro-orchestrator/src/participants/surgeon-codex.ts
+var SurgeonCodex = class extends BaseObserver {
+  opts;
+  replansEmitted = 0;
+  pending = /* @__PURE__ */ new Set();
+  constructor(opts) {
+    super();
+    this.opts = {
+      useLlm: opts.useLlm ?? true,
+      model: opts.model,
+      maxReplans: opts.maxReplans ?? 10,
+      codexBin: opts.codexBin ?? "codex",
+      timeoutMs: opts.timeoutMs ?? 12e4,
+      snapshot: opts.snapshot
+    };
+  }
+  async idle() {
+    await Promise.allSettled([...this.pending]);
+  }
+  async onExternalEvent(_source, event) {
+    if (!StoryResult.is(event)) return;
+    if (event.data.success) return;
+    if (this.replansEmitted >= this.opts.maxReplans) return;
+    const work = (async () => {
+      const replan = this.opts.useLlm ? await this.evaluateWithLlm(event.data) : surgeonDeterministicReplan(event.data);
+      if (!replan) return;
+      this.replansEmitted += 1;
+      for (const env of this.getEnvironments()) {
+        env.deliverSemanticEvent(this, Replan.create(replan));
+      }
+    })();
+    this.pending.add(work);
+    work.finally(() => this.pending.delete(work));
+    await work;
+  }
+  async evaluateWithLlm(failure) {
+    const snap = this.opts.snapshot();
+    const userPrompt = buildSurgeonPrompt(snap, failure);
+    const prompt = `${SURGEON_SYSTEM_PROMPT}
+${userPrompt}`;
+    try {
+      const text = await runCodexOneShot({
+        prompt,
+        cwd: process.cwd(),
+        skipGitRepoCheck: true,
+        bypassSandbox: true,
+        model: this.opts.model,
+        codexBin: this.opts.codexBin,
+        timeoutMs: this.opts.timeoutMs
+      });
+      const verdictText = text.trim();
+      if (!verdictText) throw new Error("empty result");
+      const verdictJson = extractJsonObject(verdictText);
+      const parsed = JSON.parse(verdictJson);
+      if (parsed.action === "abort") return null;
+      const modifiedDeps = {};
+      for (const m of parsed.modifiedDeps ?? []) {
+        if (typeof m.id === "string" && Array.isArray(m.newDependsOn)) {
+          modifiedDeps[m.id] = [...m.newDependsOn];
+        }
+      }
+      return {
+        source: "surgeon",
+        reason: `${parsed.action}: ${parsed.reason ?? ""}`,
+        addedStories: parsed.added ?? [],
+        removedStoryIds: parsed.removed ?? [],
+        modifiedDeps
+      };
+    } catch (err) {
+      const fallback = surgeonDeterministicReplan(failure);
+      return {
+        ...fallback,
+        reason: `${fallback.reason} (codex fallback after error: ${err?.message ?? String(err)})`
+      };
+    }
+  }
+};
 // ../baro-orchestrator/src/participants/surgeon-openai.ts
 function pickModel3(name) {
   switch (name) {
@@ -14171,7 +14409,7 @@ async function orchestrate(config) {
     );
   } else if (llm === "codex") {
     process.stderr.write(
-      "[orchestrate] llm=codex: Story phase shells out to `codex exec --json` (ChatGPT subscription path). Architect / Planner / Critic / Surgeon fall back to Claude in v1 \u2014 codex-* siblings for those phases are a v2 follow-up.\n"
+      "[orchestrate] llm=codex: every LLM phase shells out to `codex exec --json` (ChatGPT subscription path). Architect / Planner / Critic / Surgeon / StoryAgent all running through Codex.\n"
     );
   } else {
     process.stderr.write(
@@ -14216,14 +14454,24 @@ async function orchestrate(config) {
         }))
       };
     };
-    surgeon = llm === "openai" ? new SurgeonOpenAI({
-      snapshot,
-      model: config.surgeonModel ?? "gpt-5.5"
-    }) : new Surgeon({
-      snapshot,
-      useLlm: config.surgeonUseLlm ?? false,
-      model: config.surgeonModel ?? "opus"
-    });
+    if (llm === "openai") {
+      surgeon = new SurgeonOpenAI({
+        snapshot,
+        model: config.surgeonModel ?? "gpt-5.5"
+      });
+    } else if (llm === "codex") {
+      surgeon = new SurgeonCodex({
+        snapshot,
+        useLlm: config.surgeonUseLlm ?? true,
+        model: config.surgeonModel
+      });
+    } else {
+      surgeon = new Surgeon({
+        snapshot,
+        useLlm: config.surgeonUseLlm ?? false,
+        model: config.surgeonModel ?? "opus"
+      });
+    }
     surgeon.join(env);
   }
   let critic = null;
@@ -14232,13 +14480,22 @@ async function orchestrate(config) {
     const targets = new Map(
       prd.userStories.filter((s) => s.acceptance && s.acceptance.length > 0).map((s) => [s.id, s.acceptance])
     );
-    critic = llm === "openai" ? new CriticOpenAI({
-      targets,
-      model: config.criticModel ?? "gpt-5.4-mini"
-    }) : new Critic({
-      targets,
-      model: config.criticModel ?? "haiku"
-    });
+    if (llm === "openai") {
+      critic = new CriticOpenAI({
+        targets,
+        model: config.criticModel ?? "gpt-5.4-mini"
+      });
+    } else if (llm === "codex") {
+      critic = new CriticCodex({
+        targets,
+        model: config.criticModel
+      });
+    } else {
+      critic = new Critic({
+        targets,
+        model: config.criticModel ?? "haiku"
+      });
+    }
     critic.join(env);
   }
   const finalizer = useGit ? new Finalizer({
@@ -14397,6 +14654,10 @@ var BaroEventForwarder = class extends BaseObserver {
       this.handleClaudeResult(event.data);
       return;
     }
+    if (CodexTurnEvent.is(event)) {
+      this.handleCodexTurnEvent(event.data);
+      return;
+    }
     if (AgentState.is(event)) {
       this.handleAgentState(event.data);
       return;
@@ -14481,6 +14742,41 @@ var BaroEventForwarder = class extends BaseObserver {
       output_tokens: outputTokens
     });
   }
+  /**
+   * Codex emits its usage stats inside `turn.completed` envelopes
+   * (shape: `{type:"turn.completed", usage:{input_tokens,
+   * cached_input_tokens, output_tokens, reasoning_output_tokens}}`).
+   * Translate to the same `token_usage` BaroEvent shape Claude uses
+   * so the TUI's existing counter works without backend-specific
+   * branching. `cached_input_tokens` is rolled into `input_tokens`
+   * (Codex reports both — Claude only reports the combined total —
+   * so we surface the same number here for parity). Reasoning
+   * tokens are billed as output tokens by OpenAI so we lump them
+   * with output_tokens.
+   */
+  handleCodexTurnEvent(item) {
+    if (item.phase !== "completed") return;
+    const raw = item.raw;
+    const usage = raw.usage;
+    if (!usage) return;
+    const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : 0;
+    const outputBase = typeof usage.output_tokens === "number" ? usage.output_tokens : 0;
+    const reasoning = typeof usage.reasoning_output_tokens === "number" ? usage.reasoning_output_tokens : 0;
+    const outputTokens = outputBase + reasoning;
+    const tally = this.tokensByStory.get(item.agentId) ?? {
+      input: 0,
+      output: 0
+    };
+    tally.input += inputTokens;
+    tally.output += outputTokens;
+    this.tokensByStory.set(item.agentId, tally);
+    emit({
+      type: "token_usage",
+      id: item.agentId,
+      input_tokens: inputTokens,
+      output_tokens: outputTokens
+    });
+  }
   handleAgentState(item) {
     if (item.phase === "running" && !this.startedStories.has(item.agentId)) {
       this.startedStories.add(item.agentId);