baro-ai 0.43.3 → 0.44.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -6
- package/dist/cli.mjs +37 -9
- package/dist/cli.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -69,7 +69,31 @@ flowchart LR
|
|
|
69
69
|
F --> PR([Pull Request])
|
|
70
70
|
```
|
|
71
71
|
|
|
72
|
-
Every story is one
|
|
72
|
+
Every story is one CLI subprocess — Claude Code, OpenAI Codex CLI, or a Mozaik-native OpenAI Responses session, depending on `--llm`. Auth inherits from whichever CLI you already have signed in, no API key plumbing.
|
|
73
|
+
|
|
74
|
+
## Three LLM backends, one DAG
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
baro --llm claude "Your goal" # default — Claude Code on Anthropic Max subscription
|
|
78
|
+
baro --llm codex "Your goal" # OpenAI Codex CLI on ChatGPT Pro/Plus subscription
|
|
79
|
+
baro --llm openai "Your goal" # Mozaik-native OpenAI Responses (per-call API billing)
|
|
80
|
+
baro --llm hybrid "Your goal" # Claude on Architect/Planner/Surgeon, Codex on Story/Critic
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Same orchestration. Same DAG. Same prompts. The only thing that moves is which provider every agent talks to. `--llm hybrid` is the new default-recommendation for serious runs — Claude where the upstream plan matters, Codex for the parallel story+critic work that dominates the budget.
|
|
84
|
+
|
|
85
|
+
Each phase has its own override flag if you want to mix it yourself:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
baro --architect-llm claude \
|
|
89
|
+
--planner-llm claude \
|
|
90
|
+
--story-llm codex \
|
|
91
|
+
--critic-llm codex \
|
|
92
|
+
--surgeon-llm claude \
|
|
93
|
+
"Your goal"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Full breakdown at [docs.baro.rs/llm-providers](https://docs.baro.rs/llm-providers) — provider economics, per-phase routing, the side-by-side benchmark across three real tasks: [**I tested Claude Code vs OpenAI Codex in my parallel agent setup. Then I built a hybrid.**](https://jigjoy.ai/blog/claude-code-vs-codex-baro)
|
|
73
97
|
|
|
74
98
|
## Recent real run
|
|
75
99
|
|
|
@@ -82,7 +106,7 @@ Every story is one **Claude Code subprocess** (or one Mozaik-native OpenAI sessi
|
|
|
82
106
|
| **Architect** | One Opus call before planning — emits a `DecisionDocument` that pins every cross-cutting design decision (file paths, schemas, API shapes, library choices) so 30 parallel agents don't each invent their own |
|
|
83
107
|
| **Planner** | Decomposes the goal into a story DAG, with the DecisionDocument already pinned |
|
|
84
108
|
| **Conductor** | State machine that drives the run by reacting to bus events |
|
|
85
|
-
| **StoryAgent** | One Claude Code
|
|
109
|
+
| **StoryAgent** | One CLI subprocess per story (Claude Code / Codex / OpenAI Responses, picked by `--llm` or `--story-llm`); multi-turn loop until story completes |
|
|
86
110
|
| **Critic** | Per-turn evaluator (Haiku). On fail verdict, injects corrective feedback as the agent's next turn |
|
|
87
111
|
| **Sentry** | Flags overlapping Edit/Write tool calls across concurrent stories |
|
|
88
112
|
| **Librarian** | Indexes one agent's Read/Grep findings so siblings don't redo the exploration |
|
|
@@ -96,16 +120,22 @@ Bus is open. CI deployers, Slack notifiers, ticket triggers — all new particip
|
|
|
96
120
|
```bash
|
|
97
121
|
npm install -g baro-ai
|
|
98
122
|
|
|
99
|
-
# Full run (default —
|
|
123
|
+
# Full run (default — Claude on every phase via Claude Code CLI)
|
|
100
124
|
baro "Migrate the hardcoded category data to a backend dictionary"
|
|
101
125
|
|
|
102
126
|
# Trivial goal — skip Architect + Critic + Surgeon, single story
|
|
103
127
|
baro --quick "fix the typo on line 42 of README.md"
|
|
104
128
|
|
|
105
|
-
#
|
|
129
|
+
# Codex everywhere (ChatGPT Pro/Plus subscription, ~3-11× cheaper per run than Claude)
|
|
130
|
+
baro --llm codex "Refactor the database layer"
|
|
131
|
+
|
|
132
|
+
# Per-phase routing — Claude upstream (tight plans), Codex downstream (cheap writes)
|
|
133
|
+
baro --llm hybrid "Add WebSocket support across api and frontend"
|
|
134
|
+
|
|
135
|
+
# Route every phase through GPT-5.5 (Mozaik-native OpenAI API)
|
|
106
136
|
OPENAI_API_KEY=sk-... baro --llm openai "Refactor the database layer"
|
|
107
137
|
|
|
108
|
-
# Limit parallelism (
|
|
138
|
+
# Limit parallelism (plan-tier concurrency caps)
|
|
109
139
|
baro --parallel 3 "Add unit tests for the auth module"
|
|
110
140
|
|
|
111
141
|
# Dry-run first, execute later
|
|
@@ -134,7 +164,11 @@ For a deeper side-by-side on a real refactor, see [baro vs Claude Code `/goal`](
|
|
|
134
164
|
|
|
135
165
|
## Requirements
|
|
136
166
|
|
|
137
|
-
-
|
|
167
|
+
- At least one of:
|
|
168
|
+
- [Claude CLI](https://docs.anthropic.com/en/docs/claude-cli) authenticated (for `--llm claude`, the default)
|
|
169
|
+
- [OpenAI Codex CLI](https://github.com/openai/codex) authenticated (for `--llm codex`)
|
|
170
|
+
- `OPENAI_API_KEY` set (for `--llm openai`)
|
|
171
|
+
- Both Claude CLI **and** Codex CLI authenticated (for `--llm hybrid`)
|
|
138
172
|
- Node.js 20+
|
|
139
173
|
- macOS (arm64/x64), Linux (x64/arm64), Windows (x64)
|
|
140
174
|
- `gh` CLI (optional, for automatic PR creation)
|
package/dist/cli.mjs
CHANGED
|
@@ -14467,17 +14467,26 @@ async function orchestrate(config) {
|
|
|
14467
14467
|
const env = new AgenticEnvironment();
|
|
14468
14468
|
const emitTui = config.emitTuiEvents ?? true;
|
|
14469
14469
|
const llm = config.llm ?? "claude";
|
|
14470
|
-
|
|
14470
|
+
const storyLlm = config.storyLlm ?? llm;
|
|
14471
|
+
const criticLlm = config.criticLlm ?? llm;
|
|
14472
|
+
const surgeonLlm = config.surgeonLlm ?? llm;
|
|
14473
|
+
const isHybrid = (/* @__PURE__ */ new Set([storyLlm, criticLlm, surgeonLlm, llm])).size > 1;
|
|
14474
|
+
if (isHybrid) {
|
|
14471
14475
|
process.stderr.write(
|
|
14472
|
-
|
|
14476
|
+
`[orchestrate] hybrid routing: story=${storyLlm} critic=${criticLlm} surgeon=${surgeonLlm} (default=${llm})
|
|
14477
|
+
`
|
|
14478
|
+
);
|
|
14479
|
+
} else if (llm === "openai") {
|
|
14480
|
+
process.stderr.write(
|
|
14481
|
+
"[orchestrate] llm=openai: Story, Critic, Surgeon all running through Mozaik's native OpenAI runner (gpt-5.x).\n"
|
|
14473
14482
|
);
|
|
14474
14483
|
} else if (llm === "codex") {
|
|
14475
14484
|
process.stderr.write(
|
|
14476
|
-
"[orchestrate] llm=codex:
|
|
14485
|
+
"[orchestrate] llm=codex: Story, Critic, Surgeon all shelling out to `codex exec --json` (ChatGPT subscription path).\n"
|
|
14477
14486
|
);
|
|
14478
14487
|
} else {
|
|
14479
14488
|
process.stderr.write(
|
|
14480
|
-
"[orchestrate] llm=claude:
|
|
14489
|
+
"[orchestrate] llm=claude: Story, Critic, Surgeon all shelling out to the Claude Code CLI.\n"
|
|
14481
14490
|
);
|
|
14482
14491
|
}
|
|
14483
14492
|
if (config.auditLogPath) {
|
|
@@ -14518,12 +14527,12 @@ async function orchestrate(config) {
|
|
|
14518
14527
|
}))
|
|
14519
14528
|
};
|
|
14520
14529
|
};
|
|
14521
|
-
if (
|
|
14530
|
+
if (surgeonLlm === "openai") {
|
|
14522
14531
|
surgeon = new SurgeonOpenAI({
|
|
14523
14532
|
snapshot,
|
|
14524
14533
|
model: config.surgeonModel ?? "gpt-5.5"
|
|
14525
14534
|
});
|
|
14526
|
-
} else if (
|
|
14535
|
+
} else if (surgeonLlm === "codex") {
|
|
14527
14536
|
surgeon = new SurgeonCodex({
|
|
14528
14537
|
snapshot,
|
|
14529
14538
|
useLlm: config.surgeonUseLlm ?? true,
|
|
@@ -14544,12 +14553,12 @@ async function orchestrate(config) {
|
|
|
14544
14553
|
const targets = new Map(
|
|
14545
14554
|
prd.userStories.filter((s) => s.acceptance && s.acceptance.length > 0).map((s) => [s.id, s.acceptance])
|
|
14546
14555
|
);
|
|
14547
|
-
if (
|
|
14556
|
+
if (criticLlm === "openai") {
|
|
14548
14557
|
critic = new CriticOpenAI({
|
|
14549
14558
|
targets,
|
|
14550
14559
|
model: config.criticModel ?? "gpt-5.4-mini"
|
|
14551
14560
|
});
|
|
14552
|
-
} else if (
|
|
14561
|
+
} else if (criticLlm === "codex") {
|
|
14553
14562
|
critic = new CriticCodex({
|
|
14554
14563
|
targets,
|
|
14555
14564
|
model: config.criticModel
|
|
@@ -14631,7 +14640,7 @@ async function orchestrate(config) {
|
|
|
14631
14640
|
conductor.join(env);
|
|
14632
14641
|
const storyFactory = new StoryFactory({
|
|
14633
14642
|
cwd: config.cwd,
|
|
14634
|
-
llm,
|
|
14643
|
+
llm: storyLlm,
|
|
14635
14644
|
openaiModel: config.storyModel ?? "gpt-5.5",
|
|
14636
14645
|
storyModelOverride: config.storyModel
|
|
14637
14646
|
});
|
|
@@ -14983,6 +14992,22 @@ function parseArgs(argv) {
|
|
|
14983
14992
|
args.llm = v;
|
|
14984
14993
|
break;
|
|
14985
14994
|
}
|
|
14995
|
+
case "--story-llm":
|
|
14996
|
+
case "--critic-llm":
|
|
14997
|
+
case "--surgeon-llm": {
|
|
14998
|
+
const v = required(argv, ++i, a);
|
|
14999
|
+
if (v !== "claude" && v !== "openai" && v !== "codex") {
|
|
15000
|
+
process.stderr.write(
|
|
15001
|
+
`[cli] ${a} must be 'claude' | 'openai' | 'codex', got '${v}'
|
|
15002
|
+
`
|
|
15003
|
+
);
|
|
15004
|
+
process.exit(2);
|
|
15005
|
+
}
|
|
15006
|
+
if (a === "--story-llm") args.storyLlm = v;
|
|
15007
|
+
else if (a === "--critic-llm") args.criticLlm = v;
|
|
15008
|
+
else args.surgeonLlm = v;
|
|
15009
|
+
break;
|
|
15010
|
+
}
|
|
14986
15011
|
default:
|
|
14987
15012
|
process.stderr.write(`[cli] unknown flag: ${a}
|
|
14988
15013
|
`);
|
|
@@ -15062,6 +15087,9 @@ async function main() {
|
|
|
15062
15087
|
surgeonModel: args.surgeonModel,
|
|
15063
15088
|
intraLevelDelaySecs: args.intraLevelDelaySecs,
|
|
15064
15089
|
llm: args.llm,
|
|
15090
|
+
storyLlm: args.storyLlm,
|
|
15091
|
+
criticLlm: args.criticLlm,
|
|
15092
|
+
surgeonLlm: args.surgeonLlm,
|
|
15065
15093
|
storyModel: args.storyModel
|
|
15066
15094
|
};
|
|
15067
15095
|
if (args.llm === "openai" && !process.env.OPENAI_API_KEY) {
|