baro-ai 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -18
- package/dist/cli.mjs +54 -15
- package/dist/cli.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -124,18 +124,23 @@ Options:
|
|
|
124
124
|
--resume Resume from existing prd.json (also runs dry-run plans)
|
|
125
125
|
--skip-context Skip CLAUDE.md auto-generation
|
|
126
126
|
--cwd <path> Working directory (default: current)
|
|
127
|
-
--
|
|
128
|
-
|
|
129
|
-
|
|
127
|
+
--no-critic Disable live Critic (default: on). The Critic
|
|
128
|
+
reviews each agent turn against acceptance
|
|
129
|
+
criteria via `claude --model haiku` and injects
|
|
130
|
+
corrective feedback when the turn doesn't pass.
|
|
130
131
|
--critic-model <name> Model for the Critic (default: haiku)
|
|
131
132
|
--no-librarian Disable cross-agent runtime memory (default: on)
|
|
132
133
|
--no-sentry Disable file-touch conflict detector (default: on)
|
|
133
|
-
--
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
134
|
+
--no-surgeon Disable Surgeon (default: on). The Surgeon
|
|
135
|
+
observes terminal story failures and proposes
|
|
136
|
+
replans (split / prereq / rewire) so failed
|
|
137
|
+
work gets done in a different shape rather
|
|
138
|
+
than dropped.
|
|
139
|
+
--no-surgeon-llm Use deterministic Surgeon (skip-only) instead
|
|
140
|
+
of the LLM-driven replanner. The LLM Surgeon
|
|
141
|
+
is on by default; it costs an Opus call per
|
|
142
|
+
terminal failure but produces richer replans.
|
|
143
|
+
--surgeon-model <name> Model for the Surgeon LLM (default: opus)
|
|
139
144
|
-h, --help Print help
|
|
140
145
|
```
|
|
141
146
|
|
|
@@ -150,15 +155,22 @@ react to one another's bus events:
|
|
|
150
155
|
redundant exploration. Measurable token savings on multi-story runs.
|
|
151
156
|
- **Sentry** (default ON) — flags overlapping Edit/Write tool calls
|
|
152
157
|
across concurrent stories.
|
|
153
|
-
- **Critic** (
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
- **Surgeon** (
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
158
|
+
- **Critic** (default ON) — Haiku evaluator reviews each agent turn
|
|
159
|
+
against acceptance criteria; on a fail verdict, an inline corrective
|
|
160
|
+
message lands as the agent's next turn so it self-corrects before
|
|
161
|
+
commit. Disable with `--no-critic`.
|
|
162
|
+
- **Surgeon** (default ON, with LLM) — when a story fails its retry
|
|
163
|
+
budget, the Surgeon asks Opus for a richer replan and emits a
|
|
164
|
+
ReplanItem the Conductor applies at the next level boundary. The LLM
|
|
165
|
+
is biased toward keeping the work done — it prefers splitting a too-
|
|
166
|
+
large story into smaller pieces, inserting a prerequisite, or
|
|
167
|
+
rewiring dependencies, over dropping outright. A run is reported as
|
|
168
|
+
successful only when every original story passes; if the Surgeon
|
|
169
|
+
drops a story without replacement, the run terminates with a clear
|
|
170
|
+
"did not complete the goal" verdict instead of a green tick. Disable
|
|
171
|
+
the LLM with `--no-surgeon-llm` to fall back to deterministic
|
|
172
|
+
skip-only behavior, or `--no-surgeon` to remove adaptive replans
|
|
173
|
+
entirely.
|
|
162
174
|
|
|
163
175
|
## Requirements
|
|
164
176
|
|
package/dist/cli.mjs
CHANGED
|
@@ -8673,6 +8673,14 @@ var Conductor = class extends Participant {
|
|
|
8673
8673
|
globalCompleted = [];
|
|
8674
8674
|
/** All stories that have failed terminally (after retries) in this run. */
|
|
8675
8675
|
globalFailed = [];
|
|
8676
|
+
/**
|
|
8677
|
+
* Stories removed from the PRD by a Surgeon replan without a
|
|
8678
|
+
* replacement. These do NOT come back to globalFailed (the failing
|
|
8679
|
+
* story is gone from the PRD and won't be retried) but they DO
|
|
8680
|
+
* count against the run's success verdict — terminateRun(success)
|
|
8681
|
+
* is true only when this list is empty.
|
|
8682
|
+
*/
|
|
8683
|
+
globalDropped = [];
|
|
8676
8684
|
totalAttempts = 0;
|
|
8677
8685
|
appliedReplans = 0;
|
|
8678
8686
|
currentLevel = null;
|
|
@@ -8761,7 +8769,8 @@ var Conductor = class extends Participant {
|
|
|
8761
8769
|
if (!this.prd) return;
|
|
8762
8770
|
const levels = buildDag(this.prd.userStories, { onlyIncomplete: true });
|
|
8763
8771
|
if (levels.length === 0) {
|
|
8764
|
-
this.
|
|
8772
|
+
const allPassed = this.prd.userStories.every((s) => s.passes) && this.globalDropped.length === 0;
|
|
8773
|
+
this.terminateRun(allPassed, null);
|
|
8765
8774
|
return;
|
|
8766
8775
|
}
|
|
8767
8776
|
const level = levels[0];
|
|
@@ -8898,9 +8907,17 @@ ${prompt}`;
|
|
|
8898
8907
|
replannedThisLevel = true;
|
|
8899
8908
|
if (replan.removedStoryIds.length > 0) {
|
|
8900
8909
|
const removeSet = new Set(replan.removedStoryIds);
|
|
8901
|
-
|
|
8902
|
-
|
|
8903
|
-
this.globalFailed
|
|
8910
|
+
if (replan.addedStories.length > 0) {
|
|
8911
|
+
for (let i = this.globalFailed.length - 1; i >= 0; i--) {
|
|
8912
|
+
if (removeSet.has(this.globalFailed[i])) {
|
|
8913
|
+
this.globalFailed.splice(i, 1);
|
|
8914
|
+
}
|
|
8915
|
+
}
|
|
8916
|
+
} else {
|
|
8917
|
+
for (const id of replan.removedStoryIds) {
|
|
8918
|
+
if (!this.globalDropped.includes(id)) {
|
|
8919
|
+
this.globalDropped.push(id);
|
|
8920
|
+
}
|
|
8904
8921
|
}
|
|
8905
8922
|
}
|
|
8906
8923
|
}
|
|
@@ -8930,15 +8947,17 @@ ${prompt}`;
|
|
|
8930
8947
|
if (this.phase === "done") return;
|
|
8931
8948
|
this.phase = "done";
|
|
8932
8949
|
const totalDurationSecs = Math.round((Date.now() - this.startedAt) / 1e3);
|
|
8950
|
+
const droppedSegment = this.globalDropped.length > 0 ? `, ${this.globalDropped.length} dropped` : "";
|
|
8933
8951
|
this.emit(
|
|
8934
8952
|
new ConductorStateItem(
|
|
8935
8953
|
success ? "done" : "failed",
|
|
8936
|
-
abortReason ?? `${this.globalCompleted.length} passed, ${this.globalFailed.length} failed in ${totalDurationSecs}s`
|
|
8954
|
+
abortReason ?? `${this.globalCompleted.length} passed, ${this.globalFailed.length} failed${droppedSegment} in ${totalDurationSecs}s`
|
|
8937
8955
|
)
|
|
8938
8956
|
);
|
|
8939
8957
|
const summary = {
|
|
8940
8958
|
completedStories: [...this.globalCompleted],
|
|
8941
8959
|
failedStories: [...this.globalFailed],
|
|
8960
|
+
droppedStories: [...this.globalDropped],
|
|
8942
8961
|
totalDurationSecs,
|
|
8943
8962
|
totalAttempts: this.totalAttempts
|
|
8944
8963
|
};
|
|
@@ -9563,18 +9582,38 @@ DAG when stories fail. Given:
|
|
|
9563
9582
|
2. The id, title, description, and FAILURE REASON of the story that just
|
|
9564
9583
|
exhausted its retry budget.
|
|
9565
9584
|
|
|
9566
|
-
Decide ONE of:
|
|
9567
|
-
(a) "
|
|
9568
|
-
|
|
9569
|
-
|
|
9570
|
-
|
|
9571
|
-
|
|
9572
|
-
|
|
9585
|
+
Decide ONE of, in this order of preference:
|
|
9586
|
+
(a) "split" \u2014 replace the failing story with 2-3 smaller stories
|
|
9587
|
+
that together cover its acceptance criteria. Use
|
|
9588
|
+
this whenever the failure looks like the story was
|
|
9589
|
+
too broad \u2014 too many files, too many concerns,
|
|
9590
|
+
too much for one Claude session. Strongly preferred
|
|
9591
|
+
over removal whenever the goal still needs the work.
|
|
9592
|
+
(b) "prereq" \u2014 insert ONE OR MORE new prerequisite stories that
|
|
9593
|
+
the failing story now depends on, then ALSO add a
|
|
9594
|
+
replacement of the failing story (with updated
|
|
9595
|
+
dependsOn) so the original work still gets done.
|
|
9596
|
+
Removing without replacement is NOT prereq.
|
|
9597
|
+
(c) "rewire" \u2014 keep the failing story BUT modifyDeps so it runs
|
|
9598
|
+
in a different order, or change its dependsOn to
|
|
9599
|
+
unblock dependents. Use when the failure was
|
|
9600
|
+
timing-related, not scope-related.
|
|
9601
|
+
(d) "skip" \u2014 last resort. Use ONLY when the story is genuinely
|
|
9602
|
+
infeasible (e.g., asks for a library that doesn't
|
|
9603
|
+
exist, references files that aren't there). When
|
|
9604
|
+
you skip, modifyDeps for any dependents so the
|
|
9605
|
+
rest of the run can still complete.
|
|
9606
|
+
(e) "abort" \u2014 only when the entire run cannot continue.
|
|
9607
|
+
|
|
9608
|
+
Strong bias: the run is only successful when EVERY original goal item
|
|
9609
|
+
gets done. Splitting into smaller stories is almost always better than
|
|
9610
|
+
dropping. Don't drop just because one attempt failed \u2014 propose a
|
|
9611
|
+
different approach.
|
|
9573
9612
|
|
|
9574
9613
|
Respond ONLY with a JSON object \u2014 no prose, no markdown fences \u2014 in
|
|
9575
9614
|
exactly this shape:
|
|
9576
9615
|
|
|
9577
|
-
{"action":"
|
|
9616
|
+
{"action":"split"|"prereq"|"rewire"|"skip"|"abort",
|
|
9578
9617
|
"reason":"\u2026",
|
|
9579
9618
|
"added":[ { "id":"S?","priority":N,"title":"\u2026","description":"\u2026",
|
|
9580
9619
|
"dependsOn":["\u2026"], "acceptance":["\u2026"] } ],
|
|
@@ -9595,9 +9634,9 @@ var Surgeon = class extends Participant {
|
|
|
9595
9634
|
constructor(opts) {
|
|
9596
9635
|
super();
|
|
9597
9636
|
this.opts = {
|
|
9598
|
-
useLlm: opts.useLlm ??
|
|
9637
|
+
useLlm: opts.useLlm ?? true,
|
|
9599
9638
|
model: opts.model ?? "opus",
|
|
9600
|
-
maxReplans: opts.maxReplans ??
|
|
9639
|
+
maxReplans: opts.maxReplans ?? 10,
|
|
9601
9640
|
claudeBin: opts.claudeBin ?? "claude",
|
|
9602
9641
|
timeoutMs: opts.timeoutMs ?? 9e4,
|
|
9603
9642
|
snapshot: opts.snapshot
|