baro-ai 0.33.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -7672,17 +7672,6 @@ var AgenticEnvironment = class {
7672
7672
  this.isActive = false;
7673
7673
  }
7674
7674
  };
7675
- var OpenAIInferenceRunner = class {
7676
- constructor() {
7677
- this.runtime = new OpenAIResponses();
7678
- }
7679
- async *run(context, model, signal) {
7680
- const response = await this.runtime.infer(new InferenceRequest(model, context));
7681
- for (const item of response.contextItems) {
7682
- yield item;
7683
- }
7684
- }
7685
- };
7686
7675
  var Gpt55 = class {
7687
7676
  constructor() {
7688
7677
  this.specification = {
@@ -8169,7 +8158,7 @@ var ClaudeSystemItem = class extends BusEvent {
8169
8158
  };
8170
8159
  }
8171
8160
  };
8172
- var ClaudeResultItem = class extends BusEvent {
8161
+ var AgentResultItem = class extends BusEvent {
8173
8162
  constructor(agentId, subtype, sessionId, isError, resultText, usage, totalCostUsd, numTurns, durationMs, raw) {
8174
8163
  super();
8175
8164
  this.agentId = agentId;
@@ -8662,7 +8651,7 @@ function mapClaudeEvent(agentId, event) {
8662
8651
  }
8663
8652
  case "result": {
8664
8653
  items.push(
8665
- new ClaudeResultItem(
8654
+ new AgentResultItem(
8666
8655
  agentId,
8667
8656
  typeof event.subtype === "string" ? event.subtype : "unknown",
8668
8657
  sessionId,
@@ -8904,7 +8893,7 @@ var ClaudeCliParticipant = class _ClaudeCliParticipant extends BaroParticipant {
8904
8893
  this.transition("running", "claude init received");
8905
8894
  this.resolveReady();
8906
8895
  }
8907
- if (item instanceof ClaudeResultItem) {
8896
+ if (item instanceof AgentResultItem) {
8908
8897
  this.lastResult = item;
8909
8898
  this.transition(item.isError ? "failed" : "done", `result:${item.subtype}`);
8910
8899
  }
@@ -9038,7 +9027,7 @@ var StoryAgent = class extends BaroParticipant {
9038
9027
  if (event instanceof AgentTargetedMessageItem && event.recipientId === this.spec.id) {
9039
9028
  this.notifyStoryMessage?.();
9040
9029
  }
9041
- if (event instanceof ClaudeResultItem && event.agentId === this.spec.id) {
9030
+ if (event instanceof AgentResultItem && event.agentId === this.spec.id) {
9042
9031
  this.notifyStoryResult?.();
9043
9032
  }
9044
9033
  }
@@ -9734,7 +9723,7 @@ var Critic = class extends BaroParticipant {
9734
9723
  await Promise.allSettled([...this.pending]);
9735
9724
  }
9736
9725
  async onExternalBusEvent(_source, event) {
9737
- if (!(event instanceof ClaudeResultItem)) return;
9726
+ if (!(event instanceof AgentResultItem)) return;
9738
9727
  if (event.isError || !event.resultText) return;
9739
9728
  const criteria = this.opts.targets.get(event.agentId);
9740
9729
  if (!criteria || criteria.length === 0) return;
@@ -9876,6 +9865,58 @@ function extractVerdictJson(text) {
9876
9865
  throw new Error(`unbalanced JSON object in critic response: ${trimmed.slice(0, 200)}`);
9877
9866
  }
9878
9867
 
9868
+ // ../baro-orchestrator/src/planning/openai-runtime.ts
9869
+ var runtime = new OpenAIResponses();
9870
+ async function runInferenceRound(context, model) {
9871
+ const response = await runtime.infer(new InferenceRequest(model, context));
9872
+ return {
9873
+ items: response.contextItems,
9874
+ usage: response.tokenUsage
9875
+ };
9876
+ }
9877
+ var UsageAccumulator = class {
9878
+ input = 0;
9879
+ output = 0;
9880
+ total = 0;
9881
+ cached = 0;
9882
+ reasoning = 0;
9883
+ rounds = 0;
9884
+ add(usage) {
9885
+ if (!usage) return;
9886
+ this.rounds += 1;
9887
+ this.input += usage.inputTokens ?? 0;
9888
+ this.output += usage.outputTokens ?? 0;
9889
+ this.total += usage.totalTokens ?? 0;
9890
+ this.cached += usage.inputTokenDetails?.cached_tokens ?? 0;
9891
+ this.reasoning += usage.outputTokenDetails?.reasoning_tokens ?? 0;
9892
+ }
9893
+ get isEmpty() {
9894
+ return this.rounds === 0;
9895
+ }
9896
+ /**
9897
+ * Plain-object snapshot suitable for embedding in
9898
+ * `AgentResultItem.usage` (which is typed `any` to allow per-
9899
+ * provider shapes). Keys are snake_case to line up with what the
9900
+ * Claude side's stream-json mapper produces from Anthropic
9901
+ * usage frames.
9902
+ */
9903
+ toJSON() {
9904
+ return {
9905
+ input_tokens: this.input,
9906
+ output_tokens: this.output,
9907
+ total_tokens: this.total,
9908
+ cached_input_tokens: this.cached,
9909
+ reasoning_tokens: this.reasoning,
9910
+ rounds: this.rounds
9911
+ };
9912
+ }
9913
+ /** One-line summary for the stderr / log path. */
9914
+ summary() {
9915
+ if (this.isEmpty) return "(no token usage reported)";
9916
+ return `${this.total} total tokens (${this.input} in, ${this.output} out${this.cached ? `, ${this.cached} cached` : ""}${this.reasoning ? `, ${this.reasoning} reasoning` : ""}) across ${this.rounds} round(s)`;
9917
+ }
9918
+ };
9919
+
9879
9920
  // ../baro-orchestrator/src/participants/critic-openai.ts
9880
9921
  function pickModel(name) {
9881
9922
  switch (name) {
@@ -9896,7 +9937,6 @@ function pickModel(name) {
9896
9937
  var CriticOpenAI = class extends BaroParticipant {
9897
9938
  opts;
9898
9939
  model;
9899
- runner = new OpenAIInferenceRunner();
9900
9940
  emissions = /* @__PURE__ */ new Map();
9901
9941
  turnCount = /* @__PURE__ */ new Map();
9902
9942
  pending = /* @__PURE__ */ new Set();
@@ -9914,7 +9954,7 @@ var CriticOpenAI = class extends BaroParticipant {
9914
9954
  await Promise.allSettled([...this.pending]);
9915
9955
  }
9916
9956
  async onExternalBusEvent(_source, event) {
9917
- if (!(event instanceof ClaudeResultItem)) return;
9957
+ if (!(event instanceof AgentResultItem)) return;
9918
9958
  if (event.isError || !event.resultText) return;
9919
9959
  const criteria = this.opts.targets.get(event.agentId);
9920
9960
  if (!criteria || criteria.length === 0) return;
@@ -9967,9 +10007,12 @@ var CriticOpenAI = class extends BaroParticipant {
9967
10007
  const userPrompt = buildEvalPrompt(criteria, resultText);
9968
10008
  const context = ModelContext.create("critic").addContextItem(SystemMessageItem.create(VERDICT_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
9969
10009
  try {
10010
+ const round = await runInferenceRound(context, this.model);
10011
+ const usage = new UsageAccumulator();
10012
+ usage.add(round.usage);
9970
10013
  let assistantText = "";
9971
- for await (const item of this.runner.run(context, this.model)) {
9972
- if (item.type === "message" && item.role === "assistant") {
10014
+ for (const item of round.items) {
10015
+ if (item.type === "message") {
9973
10016
  const json = item.toJSON();
9974
10017
  assistantText += json.content?.[0]?.text ?? "";
9975
10018
  }
@@ -9977,6 +10020,8 @@ var CriticOpenAI = class extends BaroParticipant {
9977
10020
  if (!assistantText.trim()) {
9978
10021
  throw new Error("OpenAI returned empty assistant text");
9979
10022
  }
10023
+ process.stderr.write(`[critic-openai] ${usage.summary()}
10024
+ `);
9980
10025
  const verdictJson = extractVerdictJson(assistantText);
9981
10026
  const parsed = JSON.parse(verdictJson);
9982
10027
  return {
@@ -11266,7 +11311,6 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11266
11311
  spec;
11267
11312
  opts;
11268
11313
  model;
11269
- runner = new OpenAIInferenceRunner();
11270
11314
  tools;
11271
11315
  envRef = null;
11272
11316
  currentPhase = "idle";
@@ -11381,23 +11425,27 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11381
11425
  for (let turn = 1; turn <= this.spec.maxTurns; turn++) {
11382
11426
  const turnResult = await this.runOneTurn(context);
11383
11427
  context = turnResult.context;
11428
+ const usageJson = turnResult.usage.isEmpty ? null : turnResult.usage.toJSON();
11384
11429
  this.envRef?.deliverBusEvent(
11385
11430
  this,
11386
- new ClaudeResultItem(
11431
+ new AgentResultItem(
11387
11432
  this.spec.id,
11388
11433
  turnResult.success ? "success" : "error",
11389
11434
  null,
11390
11435
  // session id — not applicable for OpenAI
11391
11436
  !turnResult.success,
11392
11437
  turnResult.assistantText,
11393
- null,
11394
- // usage info — not surfaced this phase
11438
+ usageJson,
11395
11439
  null,
11396
11440
  null,
11397
11441
  null,
11398
11442
  {}
11399
11443
  )
11400
11444
  );
11445
+ process.stderr.write(
11446
+ `[story-openai/${this.spec.id}] turn ${turn}: ${turnResult.usage.summary()}
11447
+ `
11448
+ );
11401
11449
  if (!turnResult.success) {
11402
11450
  this.transition("failed", turnResult.error ?? "turn failed");
11403
11451
  return;
@@ -11428,19 +11476,27 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11428
11476
  let context = initialContext;
11429
11477
  let assistantText = null;
11430
11478
  const perRoundMs = this.opts.perRoundTimeoutSecs * 1e3;
11479
+ const usage = new UsageAccumulator();
11431
11480
  for (let round = 1; round <= this.opts.maxRoundsPerTurn; round++) {
11432
- const ac = new AbortController();
11433
- const timer = setTimeout(() => ac.abort(), perRoundMs);
11434
11481
  const calls = [];
11435
11482
  let sawMessage = false;
11436
11483
  let lastMessageText = null;
11437
11484
  try {
11438
- for await (const item of this.runner.run(context, this.model, ac.signal)) {
11485
+ const roundPromise = runInferenceRound(context, this.model);
11486
+ const timeoutPromise = new Promise(
11487
+ (_, rej) => setTimeout(
11488
+ () => rej(new Error(`round ${round} timed out after ${perRoundMs}ms`)),
11489
+ perRoundMs
11490
+ )
11491
+ );
11492
+ const result = await Promise.race([roundPromise, timeoutPromise]);
11493
+ usage.add(result.usage);
11494
+ for (const item of result.items) {
11439
11495
  if (item.type === "function_call") {
11440
11496
  await this.envRef?.deliverFunctionCall(this, item);
11441
11497
  context = context.addContextItem(item);
11442
11498
  calls.push(item);
11443
- } else if (item.type === "message" && item.role === "assistant") {
11499
+ } else if (item.type === "message") {
11444
11500
  await this.envRef?.deliverModelMessage(this, item);
11445
11501
  context = context.addContextItem(item);
11446
11502
  const json = item.toJSON();
@@ -11452,15 +11508,13 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11452
11508
  }
11453
11509
  }
11454
11510
  } catch (e) {
11455
- clearTimeout(timer);
11456
11511
  return {
11457
11512
  context,
11458
11513
  success: false,
11459
11514
  assistantText,
11515
+ usage,
11460
11516
  error: `inference round ${round} failed: ${e?.message ?? String(e)}`
11461
11517
  };
11462
- } finally {
11463
- clearTimeout(timer);
11464
11518
  }
11465
11519
  for (const call of calls) {
11466
11520
  const tool = this.tools.find((t) => t.name === call.name);
@@ -11473,7 +11527,8 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11473
11527
  return {
11474
11528
  context,
11475
11529
  success: true,
11476
- assistantText: lastMessageText
11530
+ assistantText: lastMessageText,
11531
+ usage
11477
11532
  };
11478
11533
  }
11479
11534
  if (!sawMessage && calls.length === 0) {
@@ -11481,6 +11536,7 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11481
11536
  context,
11482
11537
  success: false,
11483
11538
  assistantText,
11539
+ usage,
11484
11540
  error: `round ${round} returned no items`
11485
11541
  };
11486
11542
  }
@@ -11490,6 +11546,7 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11490
11546
  context,
11491
11547
  success: false,
11492
11548
  assistantText,
11549
+ usage,
11493
11550
  error: `exceeded maxRoundsPerTurn=${this.opts.maxRoundsPerTurn}`
11494
11551
  };
11495
11552
  }
@@ -11591,6 +11648,8 @@ var StoryFactory = class extends BaroParticipant {
11591
11648
  if (!this.envRef) return;
11592
11649
  if (this.active.has(req.storyId)) return;
11593
11650
  const llm = this.opts.llm ?? "claude";
11651
+ const claudeModel = this.opts.storyModelOverride ?? req.model;
11652
+ const openaiModel = this.opts.storyModelOverride ?? this.opts.openaiModel ?? "gpt-5.5";
11594
11653
  const agent = llm === "openai" ? new OpenAIStoryAgent(
11595
11654
  {
11596
11655
  id: req.storyId,
@@ -11600,12 +11659,12 @@ var StoryFactory = class extends BaroParticipant {
11600
11659
  retries: req.retries,
11601
11660
  timeoutSecs: req.timeoutSecs
11602
11661
  },
11603
- { model: this.opts.openaiModel ?? "gpt-5.5" }
11662
+ { model: openaiModel }
11604
11663
  ) : new StoryAgent({
11605
11664
  id: req.storyId,
11606
11665
  prompt: req.prompt,
11607
11666
  cwd: this.opts.cwd,
11608
- model: req.model,
11667
+ model: claudeModel,
11609
11668
  retries: req.retries,
11610
11669
  timeoutSecs: req.timeoutSecs
11611
11670
  });
@@ -11850,7 +11909,6 @@ function pickModel3(name) {
11850
11909
  var SurgeonOpenAI = class extends BaroParticipant {
11851
11910
  opts;
11852
11911
  model;
11853
- runner = new OpenAIInferenceRunner();
11854
11912
  replansEmitted = 0;
11855
11913
  pending = /* @__PURE__ */ new Set();
11856
11914
  constructor(opts) {
@@ -11894,9 +11952,12 @@ var SurgeonOpenAI = class extends BaroParticipant {
11894
11952
  const userPrompt = buildSurgeonPrompt(snap, failure);
11895
11953
  const context = ModelContext.create("surgeon").addContextItem(SystemMessageItem.create(SURGEON_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
11896
11954
  try {
11955
+ const round = await runInferenceRound(context, this.model);
11956
+ const usage = new UsageAccumulator();
11957
+ usage.add(round.usage);
11897
11958
  let assistantText = "";
11898
- for await (const item of this.runner.run(context, this.model)) {
11899
- if (item.type === "message" && item.role === "assistant") {
11959
+ for (const item of round.items) {
11960
+ if (item.type === "message") {
11900
11961
  const json = item.toJSON();
11901
11962
  assistantText += json.content?.[0]?.text ?? "";
11902
11963
  }
@@ -11904,6 +11965,8 @@ var SurgeonOpenAI = class extends BaroParticipant {
11904
11965
  if (!assistantText.trim()) {
11905
11966
  throw new Error("OpenAI returned empty assistant text");
11906
11967
  }
11968
+ process.stderr.write(`[surgeon-openai] ${usage.summary()}
11969
+ `);
11907
11970
  const verdictJson = extractJsonObject(assistantText);
11908
11971
  const parsed = JSON.parse(verdictJson);
11909
11972
  if (parsed.action === "abort") return null;
@@ -12082,7 +12145,8 @@ async function orchestrate(config) {
12082
12145
  const storyFactory = new StoryFactory({
12083
12146
  cwd: config.cwd,
12084
12147
  llm,
12085
- openaiModel: "gpt-5.5"
12148
+ openaiModel: config.storyModel ?? "gpt-5.5",
12149
+ storyModelOverride: config.storyModel
12086
12150
  });
12087
12151
  storyFactory.setEnvironment(env);
12088
12152
  storyFactory.join(env);
@@ -12160,7 +12224,7 @@ var BaroEventForwarder = class extends BaroParticipant {
12160
12224
  this.handleStoryResult(event);
12161
12225
  return;
12162
12226
  }
12163
- if (event instanceof ClaudeResultItem) {
12227
+ if (event instanceof AgentResultItem) {
12164
12228
  this.handleClaudeResult(event);
12165
12229
  return;
12166
12230
  }
@@ -12375,6 +12439,9 @@ function parseArgs(argv) {
12375
12439
  10
12376
12440
  );
12377
12441
  break;
12442
+ case "--story-model":
12443
+ args.storyModel = required(argv, ++i, "--story-model");
12444
+ break;
12378
12445
  case "--llm": {
12379
12446
  const v = required(argv, ++i, "--llm");
12380
12447
  if (v !== "claude" && v !== "openai") {
@@ -12463,7 +12530,8 @@ async function main() {
12463
12530
  surgeonUseLlm: args.surgeonUseLlm,
12464
12531
  surgeonModel: args.surgeonModel,
12465
12532
  intraLevelDelaySecs: args.intraLevelDelaySecs,
12466
- llm: args.llm
12533
+ llm: args.llm,
12534
+ storyModel: args.storyModel
12467
12535
  };
12468
12536
  if (args.llm === "openai" && !process.env.OPENAI_API_KEY) {
12469
12537
  process.stderr.write(