baro-ai 0.42.1 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -10752,6 +10752,163 @@ function extractVerdictJson(text) {
10752
10752
  throw new Error(`unbalanced JSON object in critic response: ${trimmed.slice(0, 200)}`);
10753
10753
  }
10754
10754
 
10755
+ // ../baro-orchestrator/src/codex-one-shot.ts
10756
+ import { execFile as execFile3 } from "child_process";
10757
+ import { promisify as promisify3 } from "util";
10758
+ var execFileAsync2 = promisify3(execFile3);
10759
+ async function runCodexOneShot(opts) {
10760
+ const args = ["exec", "--json"];
10761
+ if (opts.skipGitRepoCheck) args.push("--skip-git-repo-check");
10762
+ if (opts.bypassSandbox !== false) {
10763
+ args.push("--dangerously-bypass-approvals-and-sandbox");
10764
+ }
10765
+ if (opts.model) args.push("--model", opts.model);
10766
+ args.push(opts.prompt);
10767
+ const { stdout } = await execFileAsync2(opts.codexBin ?? "codex", args, {
10768
+ cwd: opts.cwd,
10769
+ timeout: opts.timeoutMs ?? 18e4,
10770
+ maxBuffer: opts.maxBuffer ?? 16 * 1024 * 1024
10771
+ });
10772
+ let result = "";
10773
+ for (const rawLine of stdout.split("\n")) {
10774
+ const line = rawLine.trim();
10775
+ if (!line) continue;
10776
+ let event;
10777
+ try {
10778
+ event = JSON.parse(line);
10779
+ } catch {
10780
+ continue;
10781
+ }
10782
+ if (event.type === "turn.completed") {
10783
+ const usage = event.usage;
10784
+ if (usage) {
10785
+ process.stderr.write(
10786
+ `[codex] usage: in=${usage.input_tokens ?? 0} out=${usage.output_tokens ?? 0}
10787
+ `
10788
+ );
10789
+ }
10790
+ continue;
10791
+ }
10792
+ if (event.type !== "item.completed") continue;
10793
+ const item = event.item;
10794
+ if (!item) continue;
10795
+ if (item.type === "agent_message" && typeof item.text === "string") {
10796
+ result = result ? `${result}
10797
+ ${item.text}` : item.text;
10798
+ }
10799
+ }
10800
+ if (!result.trim()) {
10801
+ throw new Error("runCodexOneShot: codex produced no agent_message");
10802
+ }
10803
+ return result;
10804
+ }
10805
+
10806
+ // ../baro-orchestrator/src/participants/critic-codex.ts
10807
+ var CriticCodex = class extends BaseObserver {
10808
+ opts;
10809
+ emissions = /* @__PURE__ */ new Map();
10810
+ turnCount = /* @__PURE__ */ new Map();
10811
+ pending = /* @__PURE__ */ new Set();
10812
+ constructor(opts) {
10813
+ super();
10814
+ this.opts = {
10815
+ maxEmissionsPerAgent: opts.maxEmissionsPerAgent ?? 2,
10816
+ model: opts.model,
10817
+ codexBin: opts.codexBin ?? "codex",
10818
+ timeoutMs: opts.timeoutMs ?? 6e4,
10819
+ targets: opts.targets
10820
+ };
10821
+ }
10822
+ /** Resolves once every in-flight evaluation has emitted its CritiqueItem. */
10823
+ async idle() {
10824
+ await Promise.allSettled([...this.pending]);
10825
+ }
10826
+ async onExternalEvent(_source, event) {
10827
+ if (!AgentResult.is(event)) return;
10828
+ const { agentId, isError, resultText } = event.data;
10829
+ if (isError || !resultText) return;
10830
+ const criteria = this.opts.targets.get(agentId);
10831
+ if (!criteria || criteria.length === 0) return;
10832
+ const turn = (this.turnCount.get(agentId) ?? 0) + 1;
10833
+ this.turnCount.set(agentId, turn);
10834
+ const work = (async () => {
10835
+ const { verdict, reasoning, violatedCriteria } = await this.evaluate(
10836
+ resultText,
10837
+ criteria
10838
+ );
10839
+ const critiqueEvent = Critique.create({
10840
+ agentId,
10841
+ verdict,
10842
+ reasoning,
10843
+ violatedCriteria,
10844
+ turn,
10845
+ modelUsed: this.opts.model ?? "codex-default"
10846
+ });
10847
+ for (const env of this.getEnvironments()) {
10848
+ env.deliverSemanticEvent(this, critiqueEvent);
10849
+ }
10850
+ if (verdict === "fail") {
10851
+ const emitted = this.emissions.get(agentId) ?? 0;
10852
+ if (emitted < this.opts.maxEmissionsPerAgent) {
10853
+ this.emissions.set(agentId, emitted + 1);
10854
+ const text = buildCorrectiveMessage(reasoning, violatedCriteria);
10855
+ const msg = AgentTargetedMessage.create({
10856
+ recipientId: agentId,
10857
+ text,
10858
+ metadata: {
10859
+ criticTurn: turn,
10860
+ emissionIndex: emitted + 1
10861
+ }
10862
+ });
10863
+ for (const env of this.getEnvironments()) {
10864
+ env.deliverSemanticEvent(this, msg);
10865
+ }
10866
+ }
10867
+ }
10868
+ })();
10869
+ this.pending.add(work);
10870
+ work.finally(() => {
10871
+ this.pending.delete(work);
10872
+ });
10873
+ await work;
10874
+ }
10875
+ async evaluate(resultText, criteria) {
10876
+ const userPrompt = buildEvalPrompt(criteria, resultText);
10877
+ const prompt = `${VERDICT_SYSTEM_PROMPT}
10878
+
10879
+ ${userPrompt}`;
10880
+ try {
10881
+ const text = await runCodexOneShot({
10882
+ prompt,
10883
+ // Critic doesn't operate on the worktree — but Codex
10884
+ // still insists on running inside a git repo unless we
10885
+ // skip the check. Pass through skipGitRepoCheck so the
10886
+ // critic can be invoked from anywhere (including baro's
10887
+ // own cwd that may not be the story worktree).
10888
+ cwd: process.cwd(),
10889
+ skipGitRepoCheck: true,
10890
+ bypassSandbox: true,
10891
+ model: this.opts.model,
10892
+ codexBin: this.opts.codexBin,
10893
+ timeoutMs: this.opts.timeoutMs
10894
+ });
10895
+ const verdictJson = extractVerdictJson(text.trim());
10896
+ const parsed = JSON.parse(verdictJson);
10897
+ return {
10898
+ verdict: parsed.verdict === "pass" ? "pass" : "fail",
10899
+ reasoning: parsed.reasoning ?? "",
10900
+ violatedCriteria: Array.isArray(parsed.violated_criteria) ? parsed.violated_criteria : []
10901
+ };
10902
+ } catch (err) {
10903
+ return {
10904
+ verdict: "fail",
10905
+ reasoning: `CriticCodex LLM call failed: ${String(err?.message ?? err)}`,
10906
+ violatedCriteria: ["[critic error \u2014 could not evaluate]"]
10907
+ };
10908
+ }
10909
+ }
10910
+ };
10911
+
10755
10912
  // ../baro-orchestrator/src/planning/openai-runtime.ts
10756
10913
  async function runInferenceRound(_context, _model) {
10757
10914
  throw new Error(
@@ -10927,9 +11084,9 @@ var CriticOpenAI = class extends BaseObserver {
10927
11084
  };
10928
11085
 
10929
11086
  // ../baro-orchestrator/src/participants/finalizer.ts
10930
- import { execFile as execFile3 } from "child_process";
10931
- import { promisify as promisify3 } from "util";
10932
- var execFileAsync2 = promisify3(execFile3);
11087
+ import { execFile as execFile4 } from "child_process";
11088
+ import { promisify as promisify4 } from "util";
11089
+ var execFileAsync3 = promisify4(execFile4);
10933
11090
  var Finalizer = class extends BaseObserver {
10934
11091
  opts;
10935
11092
  envRef = null;
@@ -11178,7 +11335,7 @@ var Finalizer = class extends BaseObserver {
11178
11335
  async collectCommitsSinceBase() {
11179
11336
  if (!this.baseSha) return [];
11180
11337
  try {
11181
- const { stdout } = await execFileAsync2(
11338
+ const { stdout } = await execFileAsync3(
11182
11339
  "git",
11183
11340
  ["log", `${this.baseSha}..HEAD`, "--pretty=format:%H%x09%s"],
11184
11341
  { cwd: this.opts.cwd }
@@ -11194,7 +11351,7 @@ var Finalizer = class extends BaseObserver {
11194
11351
  async collectFileStats() {
11195
11352
  if (!this.baseSha) return { created: 0, modified: 0 };
11196
11353
  try {
11197
- const { stdout } = await execFileAsync2(
11354
+ const { stdout } = await execFileAsync3(
11198
11355
  "git",
11199
11356
  ["diff", "--name-status", this.baseSha, "HEAD"],
11200
11357
  { cwd: this.opts.cwd }
@@ -11213,7 +11370,7 @@ var Finalizer = class extends BaseObserver {
11213
11370
  }
11214
11371
  async detectBranch() {
11215
11372
  try {
11216
- const { stdout } = await execFileAsync2(
11373
+ const { stdout } = await execFileAsync3(
11217
11374
  "git",
11218
11375
  ["branch", "--show-current"],
11219
11376
  { cwd: this.opts.cwd }
@@ -11225,7 +11382,7 @@ var Finalizer = class extends BaseObserver {
11225
11382
  }
11226
11383
  async detectDefaultBaseBranch() {
11227
11384
  try {
11228
- const { stdout } = await execFileAsync2(
11385
+ const { stdout } = await execFileAsync3(
11229
11386
  "gh",
11230
11387
  ["repo", "view", "--json", "defaultBranchRef", "--jq", ".defaultBranchRef.name"],
11231
11388
  { cwd: this.opts.cwd }
@@ -11235,7 +11392,7 @@ var Finalizer = class extends BaseObserver {
11235
11392
  } catch {
11236
11393
  }
11237
11394
  try {
11238
- const { stdout } = await execFileAsync2(
11395
+ const { stdout } = await execFileAsync3(
11239
11396
  "git",
11240
11397
  ["symbolic-ref", "--short", "refs/remotes/origin/HEAD"],
11241
11398
  { cwd: this.opts.cwd }
@@ -11323,7 +11480,7 @@ var Finalizer = class extends BaseObserver {
11323
11480
  }
11324
11481
  async hasGhBinary() {
11325
11482
  try {
11326
- await execFileAsync2("gh", ["--version"], { cwd: this.opts.cwd });
11483
+ await execFileAsync3("gh", ["--version"], { cwd: this.opts.cwd });
11327
11484
  return true;
11328
11485
  } catch {
11329
11486
  return false;
@@ -11331,7 +11488,7 @@ var Finalizer = class extends BaseObserver {
11331
11488
  }
11332
11489
  async openPr(args) {
11333
11490
  try {
11334
- const { stdout } = await execFileAsync2(
11491
+ const { stdout } = await execFileAsync3(
11335
11492
  "gh",
11336
11493
  [
11337
11494
  "pr",
@@ -11932,7 +12089,7 @@ var CodexCliParticipant = class _CodexCliParticipant extends BaseObserver {
11932
12089
  this.agentId = agentId;
11933
12090
  this.options = {
11934
12091
  codexBin: "codex",
11935
- fullAuto: false,
12092
+ bypassSandbox: false,
11936
12093
  skipGitRepoCheck: false,
11937
12094
  ...opts
11938
12095
  };
@@ -12048,7 +12205,9 @@ var CodexCliParticipant = class _CodexCliParticipant extends BaseObserver {
12048
12205
  buildArgs() {
12049
12206
  const args = ["exec", "--json"];
12050
12207
  if (this.options.skipGitRepoCheck) args.push("--skip-git-repo-check");
12051
- if (this.options.fullAuto) args.push("--full-auto");
12208
+ if (this.options.bypassSandbox) {
12209
+ args.push("--dangerously-bypass-approvals-and-sandbox");
12210
+ }
12052
12211
  if (this.options.model) args.push("--model", this.options.model);
12053
12212
  if (this.options.extraArgs?.length) args.push(...this.options.extraArgs);
12054
12213
  args.push(this.options.prompt);
@@ -12153,7 +12312,7 @@ var CodexStoryAgent = class extends BaseObserver {
12153
12312
  timeoutSecs: 600,
12154
12313
  retryDelayMs: 1500,
12155
12314
  hardTimeoutSecs: 0,
12156
- fullAuto: true,
12315
+ bypassSandbox: true,
12157
12316
  skipGitRepoCheck: false,
12158
12317
  ...spec
12159
12318
  };
@@ -12275,7 +12434,7 @@ var CodexStoryAgent = class extends BaseObserver {
12275
12434
  cwd: this.spec.cwd,
12276
12435
  prompt: this.spec.prompt,
12277
12436
  model: this.spec.model,
12278
- fullAuto: this.spec.fullAuto,
12437
+ bypassSandbox: this.spec.bypassSandbox,
12279
12438
  skipGitRepoCheck: this.spec.skipGitRepoCheck
12280
12439
  });
12281
12440
  this.currentCodex = codex;
@@ -13842,9 +14001,9 @@ var StoryFactory = class extends BaseObserver {
13842
14001
  };
13843
14002
 
13844
14003
  // ../baro-orchestrator/src/participants/surgeon.ts
13845
- import { execFile as execFile4 } from "child_process";
13846
- import { promisify as promisify4 } from "util";
13847
- var execFileAsync3 = promisify4(execFile4);
14004
+ import { execFile as execFile5 } from "child_process";
14005
+ import { promisify as promisify5 } from "util";
14006
+ var execFileAsync4 = promisify5(execFile5);
13848
14007
  var SURGEON_SYSTEM_PROMPT = `You are the Surgeon \u2014 an autonomous planner that adapts a software-project
13849
14008
  DAG when stories fail. Given:
13850
14009
  1. A snapshot of the current PRD (project, story list with dependencies +
@@ -13950,7 +14109,7 @@ var Surgeon = class extends BaseObserver {
13950
14109
  const snap = this.opts.snapshot();
13951
14110
  const prompt = buildSurgeonPrompt(snap, failure);
13952
14111
  try {
13953
- const { stdout } = await execFileAsync3(
14112
+ const { stdout } = await execFileAsync4(
13954
14113
  this.opts.claudeBin,
13955
14114
  [
13956
14115
  "--print",
@@ -14051,6 +14210,85 @@ function surgeonDeterministicReplan(failure) {
14051
14210
  };
14052
14211
  }
14053
14212
 
14213
+ // ../baro-orchestrator/src/participants/surgeon-codex.ts
14214
+ var SurgeonCodex = class extends BaseObserver {
14215
+ opts;
14216
+ replansEmitted = 0;
14217
+ pending = /* @__PURE__ */ new Set();
14218
+ constructor(opts) {
14219
+ super();
14220
+ this.opts = {
14221
+ useLlm: opts.useLlm ?? true,
14222
+ model: opts.model,
14223
+ maxReplans: opts.maxReplans ?? 10,
14224
+ codexBin: opts.codexBin ?? "codex",
14225
+ timeoutMs: opts.timeoutMs ?? 12e4,
14226
+ snapshot: opts.snapshot
14227
+ };
14228
+ }
14229
+ async idle() {
14230
+ await Promise.allSettled([...this.pending]);
14231
+ }
14232
+ async onExternalEvent(_source, event) {
14233
+ if (!StoryResult.is(event)) return;
14234
+ if (event.data.success) return;
14235
+ if (this.replansEmitted >= this.opts.maxReplans) return;
14236
+ const work = (async () => {
14237
+ const replan = this.opts.useLlm ? await this.evaluateWithLlm(event.data) : surgeonDeterministicReplan(event.data);
14238
+ if (!replan) return;
14239
+ this.replansEmitted += 1;
14240
+ for (const env of this.getEnvironments()) {
14241
+ env.deliverSemanticEvent(this, Replan.create(replan));
14242
+ }
14243
+ })();
14244
+ this.pending.add(work);
14245
+ work.finally(() => this.pending.delete(work));
14246
+ await work;
14247
+ }
14248
+ async evaluateWithLlm(failure) {
14249
+ const snap = this.opts.snapshot();
14250
+ const userPrompt = buildSurgeonPrompt(snap, failure);
14251
+ const prompt = `${SURGEON_SYSTEM_PROMPT}
14252
+
14253
+ ${userPrompt}`;
14254
+ try {
14255
+ const text = await runCodexOneShot({
14256
+ prompt,
14257
+ cwd: process.cwd(),
14258
+ skipGitRepoCheck: true,
14259
+ bypassSandbox: true,
14260
+ model: this.opts.model,
14261
+ codexBin: this.opts.codexBin,
14262
+ timeoutMs: this.opts.timeoutMs
14263
+ });
14264
+ const verdictText = text.trim();
14265
+ if (!verdictText) throw new Error("empty result");
14266
+ const verdictJson = extractJsonObject(verdictText);
14267
+ const parsed = JSON.parse(verdictJson);
14268
+ if (parsed.action === "abort") return null;
14269
+ const modifiedDeps = {};
14270
+ for (const m of parsed.modifiedDeps ?? []) {
14271
+ if (typeof m.id === "string" && Array.isArray(m.newDependsOn)) {
14272
+ modifiedDeps[m.id] = [...m.newDependsOn];
14273
+ }
14274
+ }
14275
+ return {
14276
+ source: "surgeon",
14277
+ reason: `${parsed.action}: ${parsed.reason ?? ""}`,
14278
+ addedStories: parsed.added ?? [],
14279
+ removedStoryIds: parsed.removed ?? [],
14280
+ modifiedDeps
14281
+ };
14282
+ } catch (err) {
14283
+ const fallback = surgeonDeterministicReplan(failure);
14284
+ return {
14285
+ ...fallback,
14286
+ reason: `${fallback.reason} (codex fallback after error: ${err?.message ?? String(err)})`
14287
+ };
14288
+ }
14289
+ }
14290
+ };
14291
+
14054
14292
  // ../baro-orchestrator/src/participants/surgeon-openai.ts
14055
14293
  function pickModel3(name) {
14056
14294
  switch (name) {
@@ -14171,7 +14409,7 @@ async function orchestrate(config) {
14171
14409
  );
14172
14410
  } else if (llm === "codex") {
14173
14411
  process.stderr.write(
14174
- "[orchestrate] llm=codex: Story phase shells out to `codex exec --json` (ChatGPT subscription path). Architect / Planner / Critic / Surgeon fall back to Claude in v1 \u2014 codex-* siblings for those phases are a v2 follow-up.\n"
14412
+ "[orchestrate] llm=codex: every LLM phase shells out to `codex exec --json` (ChatGPT subscription path). Architect / Planner / Critic / Surgeon / StoryAgent all running through Codex.\n"
14175
14413
  );
14176
14414
  } else {
14177
14415
  process.stderr.write(
@@ -14216,14 +14454,24 @@ async function orchestrate(config) {
14216
14454
  }))
14217
14455
  };
14218
14456
  };
14219
- surgeon = llm === "openai" ? new SurgeonOpenAI({
14220
- snapshot,
14221
- model: config.surgeonModel ?? "gpt-5.5"
14222
- }) : new Surgeon({
14223
- snapshot,
14224
- useLlm: config.surgeonUseLlm ?? false,
14225
- model: config.surgeonModel ?? "opus"
14226
- });
14457
+ if (llm === "openai") {
14458
+ surgeon = new SurgeonOpenAI({
14459
+ snapshot,
14460
+ model: config.surgeonModel ?? "gpt-5.5"
14461
+ });
14462
+ } else if (llm === "codex") {
14463
+ surgeon = new SurgeonCodex({
14464
+ snapshot,
14465
+ useLlm: config.surgeonUseLlm ?? true,
14466
+ model: config.surgeonModel
14467
+ });
14468
+ } else {
14469
+ surgeon = new Surgeon({
14470
+ snapshot,
14471
+ useLlm: config.surgeonUseLlm ?? false,
14472
+ model: config.surgeonModel ?? "opus"
14473
+ });
14474
+ }
14227
14475
  surgeon.join(env);
14228
14476
  }
14229
14477
  let critic = null;
@@ -14232,13 +14480,22 @@ async function orchestrate(config) {
14232
14480
  const targets = new Map(
14233
14481
  prd.userStories.filter((s) => s.acceptance && s.acceptance.length > 0).map((s) => [s.id, s.acceptance])
14234
14482
  );
14235
- critic = llm === "openai" ? new CriticOpenAI({
14236
- targets,
14237
- model: config.criticModel ?? "gpt-5.4-mini"
14238
- }) : new Critic({
14239
- targets,
14240
- model: config.criticModel ?? "haiku"
14241
- });
14483
+ if (llm === "openai") {
14484
+ critic = new CriticOpenAI({
14485
+ targets,
14486
+ model: config.criticModel ?? "gpt-5.4-mini"
14487
+ });
14488
+ } else if (llm === "codex") {
14489
+ critic = new CriticCodex({
14490
+ targets,
14491
+ model: config.criticModel
14492
+ });
14493
+ } else {
14494
+ critic = new Critic({
14495
+ targets,
14496
+ model: config.criticModel ?? "haiku"
14497
+ });
14498
+ }
14242
14499
  critic.join(env);
14243
14500
  }
14244
14501
  const finalizer = useGit ? new Finalizer({
@@ -14397,6 +14654,10 @@ var BaroEventForwarder = class extends BaseObserver {
14397
14654
  this.handleClaudeResult(event.data);
14398
14655
  return;
14399
14656
  }
14657
+ if (CodexTurnEvent.is(event)) {
14658
+ this.handleCodexTurnEvent(event.data);
14659
+ return;
14660
+ }
14400
14661
  if (AgentState.is(event)) {
14401
14662
  this.handleAgentState(event.data);
14402
14663
  return;
@@ -14481,6 +14742,41 @@ var BaroEventForwarder = class extends BaseObserver {
14481
14742
  output_tokens: outputTokens
14482
14743
  });
14483
14744
  }
14745
+ /**
14746
+ * Codex emits its usage stats inside `turn.completed` envelopes
14747
+ * (shape: `{type:"turn.completed", usage:{input_tokens,
14748
+ * cached_input_tokens, output_tokens, reasoning_output_tokens}}`).
14749
+ * Translate to the same `token_usage` BaroEvent shape Claude uses
14750
+ * so the TUI's existing counter works without backend-specific
14751
+ * branching. `cached_input_tokens` is rolled into `input_tokens`
14752
+ * (Codex reports both — Claude only reports the combined total —
14753
+ * so we surface the same number here for parity). Reasoning
14754
+ * tokens are billed as output tokens by OpenAI so we lump them
14755
+ * with output_tokens.
14756
+ */
14757
+ handleCodexTurnEvent(item) {
14758
+ if (item.phase !== "completed") return;
14759
+ const raw = item.raw;
14760
+ const usage = raw.usage;
14761
+ if (!usage) return;
14762
+ const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : 0;
14763
+ const outputBase = typeof usage.output_tokens === "number" ? usage.output_tokens : 0;
14764
+ const reasoning = typeof usage.reasoning_output_tokens === "number" ? usage.reasoning_output_tokens : 0;
14765
+ const outputTokens = outputBase + reasoning;
14766
+ const tally = this.tokensByStory.get(item.agentId) ?? {
14767
+ input: 0,
14768
+ output: 0
14769
+ };
14770
+ tally.input += inputTokens;
14771
+ tally.output += outputTokens;
14772
+ this.tokensByStory.set(item.agentId, tally);
14773
+ emit({
14774
+ type: "token_usage",
14775
+ id: item.agentId,
14776
+ input_tokens: inputTokens,
14777
+ output_tokens: outputTokens
14778
+ });
14779
+ }
14484
14780
  handleAgentState(item) {
14485
14781
  if (item.phase === "running" && !this.startedStories.has(item.agentId)) {
14486
14782
  this.startedStories.add(item.agentId);