baro-ai 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -124,18 +124,23 @@ Options:
124
124
  --resume Resume from existing prd.json (also runs dry-run plans)
125
125
  --skip-context Skip CLAUDE.md auto-generation
126
126
  --cwd <path> Working directory (default: current)
127
- --with-critic Enable live Critic reviews each agent turn
128
- against acceptance criteria via `claude --model haiku`
129
- and injects corrective feedback (default: off)
127
+ --no-critic Disable live Critic (default: on). The Critic
128
+ reviews each agent turn against acceptance
129
+ criteria via `claude --model haiku` and injects
130
+ corrective feedback when the turn doesn't pass.
130
131
  --critic-model <name> Model for the Critic (default: haiku)
131
132
  --no-librarian Disable cross-agent runtime memory (default: on)
132
133
  --no-sentry Disable file-touch conflict detector (default: on)
133
- --with-surgeon Enable adaptive DAG: drop / replace failing stories
134
- at level boundaries instead of stalling (default: off)
135
- --surgeon-use-llm Use `claude --model …` for richer Surgeon replans
136
- (default: deterministic skip-only)
137
- --surgeon-model <name> Model for Surgeon when --surgeon-use-llm is on
138
- (default: opus)
134
+ --no-surgeon Disable Surgeon (default: on). The Surgeon
135
+ observes terminal story failures and proposes
136
+ replans (split / prereq / rewire) so failed
137
+ work gets done in a different shape rather
138
+ than dropped.
139
+ --no-surgeon-llm Use deterministic Surgeon (skip-only) instead
140
+ of the LLM-driven replanner. The LLM Surgeon
141
+ is on by default; it costs an Opus call per
142
+ terminal failure but produces richer replans.
143
+ --surgeon-model <name> Model for the Surgeon LLM (default: opus)
139
144
  -h, --help Print help
140
145
  ```
141
146
 
@@ -150,15 +155,22 @@ react to one another's bus events:
150
155
  redundant exploration. Measurable token savings on multi-story runs.
151
156
  - **Sentry** (default ON) — flags overlapping Edit/Write tool calls
152
157
  across concurrent stories.
153
- - **Critic** (`--with-critic`, default OFF) — Haiku evaluator reviews
154
- each agent turn against acceptance criteria; on a fail verdict, an
155
- inline corrective message lands as the agent's next turn so it
156
- self-corrects before commit.
157
- - **Surgeon** (`--with-surgeon`, default OFF) — when a story fails its
158
- retry budget, a ReplanItem is emitted on the bus and the Conductor
159
- recomputes the DAG at the next level boundary. The simplest mode just
160
- drops failing stories so dependents unblock; with `--surgeon-use-llm`
161
- Opus proposes splits, prerequisite inserts, or dependency rewires.
158
+ - **Critic** (default ON) — Haiku evaluator reviews each agent turn
159
+ against acceptance criteria; on a fail verdict, an inline corrective
160
+ message lands as the agent's next turn so it self-corrects before
161
+ commit. Disable with `--no-critic`.
162
+ - **Surgeon** (default ON, with LLM) — when a story fails its retry
163
+ budget, the Surgeon asks Opus for a richer replan and emits a
164
+ ReplanItem the Conductor applies at the next level boundary. The LLM
165
+ is biased toward keeping the work done — it prefers splitting a too-
166
+ large story into smaller pieces, inserting a prerequisite, or
167
+ rewiring dependencies, over dropping outright. A run is reported as
168
+ successful only when every original story passes; if the Surgeon
169
+ drops a story without replacement, the run terminates with a clear
170
+ "did not complete the goal" verdict instead of a green tick. Disable
171
+ the LLM with `--no-surgeon-llm` to fall back to deterministic
172
+ skip-only behavior, or `--no-surgeon` to remove adaptive replans
173
+ entirely.
162
174
 
163
175
  ## Requirements
164
176
 
package/dist/cli.mjs CHANGED
@@ -8673,6 +8673,14 @@ var Conductor = class extends Participant {
8673
8673
  globalCompleted = [];
8674
8674
  /** All stories that have failed terminally (after retries) in this run. */
8675
8675
  globalFailed = [];
8676
+ /**
8677
+ * Stories removed from the PRD by a Surgeon replan without a
8678
+ * replacement. These do NOT come back to globalFailed (the failing
8679
+ * story is gone from the PRD and won't be retried) but they DO
8680
+ * count against the run's success verdict — terminateRun(success)
8681
+ * is true only when this list is empty.
8682
+ */
8683
+ globalDropped = [];
8676
8684
  totalAttempts = 0;
8677
8685
  appliedReplans = 0;
8678
8686
  currentLevel = null;
@@ -8761,7 +8769,8 @@ var Conductor = class extends Participant {
8761
8769
  if (!this.prd) return;
8762
8770
  const levels = buildDag(this.prd.userStories, { onlyIncomplete: true });
8763
8771
  if (levels.length === 0) {
8764
- this.terminateRun(this.globalFailed.length === 0, null);
8772
+ const allPassed = this.prd.userStories.every((s) => s.passes) && this.globalDropped.length === 0;
8773
+ this.terminateRun(allPassed, null);
8765
8774
  return;
8766
8775
  }
8767
8776
  const level = levels[0];
@@ -8898,9 +8907,17 @@ ${prompt}`;
8898
8907
  replannedThisLevel = true;
8899
8908
  if (replan.removedStoryIds.length > 0) {
8900
8909
  const removeSet = new Set(replan.removedStoryIds);
8901
- for (let i = this.globalFailed.length - 1; i >= 0; i--) {
8902
- if (removeSet.has(this.globalFailed[i])) {
8903
- this.globalFailed.splice(i, 1);
8910
+ if (replan.addedStories.length > 0) {
8911
+ for (let i = this.globalFailed.length - 1; i >= 0; i--) {
8912
+ if (removeSet.has(this.globalFailed[i])) {
8913
+ this.globalFailed.splice(i, 1);
8914
+ }
8915
+ }
8916
+ } else {
8917
+ for (const id of replan.removedStoryIds) {
8918
+ if (!this.globalDropped.includes(id)) {
8919
+ this.globalDropped.push(id);
8920
+ }
8904
8921
  }
8905
8922
  }
8906
8923
  }
@@ -8930,15 +8947,17 @@ ${prompt}`;
8930
8947
  if (this.phase === "done") return;
8931
8948
  this.phase = "done";
8932
8949
  const totalDurationSecs = Math.round((Date.now() - this.startedAt) / 1e3);
8950
+ const droppedSegment = this.globalDropped.length > 0 ? `, ${this.globalDropped.length} dropped` : "";
8933
8951
  this.emit(
8934
8952
  new ConductorStateItem(
8935
8953
  success ? "done" : "failed",
8936
- abortReason ?? `${this.globalCompleted.length} passed, ${this.globalFailed.length} failed in ${totalDurationSecs}s`
8954
+ abortReason ?? `${this.globalCompleted.length} passed, ${this.globalFailed.length} failed${droppedSegment} in ${totalDurationSecs}s`
8937
8955
  )
8938
8956
  );
8939
8957
  const summary = {
8940
8958
  completedStories: [...this.globalCompleted],
8941
8959
  failedStories: [...this.globalFailed],
8960
+ droppedStories: [...this.globalDropped],
8942
8961
  totalDurationSecs,
8943
8962
  totalAttempts: this.totalAttempts
8944
8963
  };
@@ -9563,18 +9582,38 @@ DAG when stories fail. Given:
9563
9582
  2. The id, title, description, and FAILURE REASON of the story that just
9564
9583
  exhausted its retry budget.
9565
9584
 
9566
- Decide ONE of:
9567
- (a) "skip" \u2014 the failure isn't load-bearing; remove only this story.
9568
- (b) "split" \u2014 replace the failing story with 2-3 smaller stories.
9569
- (c) "prereq" \u2014 insert a NEW story that the failing one depends on,
9570
- AND remove the failing one (it can be re-attempted
9571
- later by re-introducing it manually).
9572
- (d) "abort" \u2014 nothing useful can be salvaged; emit no replan.
9585
+ Decide ONE of, in this order of preference:
9586
+ (a) "split" \u2014 replace the failing story with 2-3 smaller stories
9587
+ that together cover its acceptance criteria. Use
9588
+ this whenever the failure looks like the story was
9589
+ too broad \u2014 too many files, too many concerns,
9590
+ too much for one Claude session. Strongly preferred
9591
+ over removal whenever the goal still needs the work.
9592
+ (b) "prereq" \u2014 insert ONE OR MORE new prerequisite stories that
9593
+ the failing story now depends on, then ALSO add a
9594
+ replacement of the failing story (with updated
9595
+ dependsOn) so the original work still gets done.
9596
+ Removing without replacement is NOT prereq.
9597
+ (c) "rewire" \u2014 keep the failing story BUT modifyDeps so it runs
9598
+ in a different order, or change its dependsOn to
9599
+ unblock dependents. Use when the failure was
9600
+ timing-related, not scope-related.
9601
+ (d) "skip" \u2014 last resort. Use ONLY when the story is genuinely
9602
+ infeasible (e.g., asks for a library that doesn't
9603
+ exist, references files that aren't there). When
9604
+ you skip, modifyDeps for any dependents so the
9605
+ rest of the run can still complete.
9606
+ (e) "abort" \u2014 only when the entire run cannot continue.
9607
+
9608
+ Strong bias: the run is only successful when EVERY original goal item
9609
+ gets done. Splitting into smaller stories is almost always better than
9610
+ dropping. Don't drop just because one attempt failed \u2014 propose a
9611
+ different approach.
9573
9612
 
9574
9613
  Respond ONLY with a JSON object \u2014 no prose, no markdown fences \u2014 in
9575
9614
  exactly this shape:
9576
9615
 
9577
- {"action":"skip"|"split"|"prereq"|"abort",
9616
+ {"action":"split"|"prereq"|"rewire"|"skip"|"abort",
9578
9617
  "reason":"\u2026",
9579
9618
  "added":[ { "id":"S?","priority":N,"title":"\u2026","description":"\u2026",
9580
9619
  "dependsOn":["\u2026"], "acceptance":["\u2026"] } ],
@@ -9595,9 +9634,9 @@ var Surgeon = class extends Participant {
9595
9634
  constructor(opts) {
9596
9635
  super();
9597
9636
  this.opts = {
9598
- useLlm: opts.useLlm ?? false,
9637
+ useLlm: opts.useLlm ?? true,
9599
9638
  model: opts.model ?? "opus",
9600
- maxReplans: opts.maxReplans ?? 3,
9639
+ maxReplans: opts.maxReplans ?? 10,
9601
9640
  claudeBin: opts.claudeBin ?? "claude",
9602
9641
  timeoutMs: opts.timeoutMs ?? 9e4,
9603
9642
  snapshot: opts.snapshot