baro-ai 0.34.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -7672,17 +7672,6 @@ var AgenticEnvironment = class {
7672
7672
  this.isActive = false;
7673
7673
  }
7674
7674
  };
7675
- var OpenAIInferenceRunner = class {
7676
- constructor() {
7677
- this.runtime = new OpenAIResponses();
7678
- }
7679
- async *run(context, model, signal) {
7680
- const response = await this.runtime.infer(new InferenceRequest(model, context));
7681
- for (const item of response.contextItems) {
7682
- yield item;
7683
- }
7684
- }
7685
- };
7686
7675
  var Gpt55 = class {
7687
7676
  constructor() {
7688
7677
  this.specification = {
@@ -9876,6 +9865,58 @@ function extractVerdictJson(text) {
9876
9865
  throw new Error(`unbalanced JSON object in critic response: ${trimmed.slice(0, 200)}`);
9877
9866
  }
9878
9867
 
9868
+ // ../baro-orchestrator/src/planning/openai-runtime.ts
9869
+ var runtime = new OpenAIResponses();
9870
+ async function runInferenceRound(context, model) {
9871
+ const response = await runtime.infer(new InferenceRequest(model, context));
9872
+ return {
9873
+ items: response.contextItems,
9874
+ usage: response.tokenUsage
9875
+ };
9876
+ }
9877
+ var UsageAccumulator = class {
9878
+ input = 0;
9879
+ output = 0;
9880
+ total = 0;
9881
+ cached = 0;
9882
+ reasoning = 0;
9883
+ rounds = 0;
9884
+ add(usage) {
9885
+ if (!usage) return;
9886
+ this.rounds += 1;
9887
+ this.input += usage.inputTokens ?? 0;
9888
+ this.output += usage.outputTokens ?? 0;
9889
+ this.total += usage.totalTokens ?? 0;
9890
+ this.cached += usage.inputTokenDetails?.cached_tokens ?? 0;
9891
+ this.reasoning += usage.outputTokenDetails?.reasoning_tokens ?? 0;
9892
+ }
9893
+ get isEmpty() {
9894
+ return this.rounds === 0;
9895
+ }
9896
+ /**
9897
+ * Plain-object snapshot suitable for embedding in
9898
+ * `AgentResultItem.usage` (which is typed `any` to allow per-
9899
+ * provider shapes). Keys are snake_case to line up with what the
9900
+ * Claude side's stream-json mapper produces from Anthropic
9901
+ * usage frames.
9902
+ */
9903
+ toJSON() {
9904
+ return {
9905
+ input_tokens: this.input,
9906
+ output_tokens: this.output,
9907
+ total_tokens: this.total,
9908
+ cached_input_tokens: this.cached,
9909
+ reasoning_tokens: this.reasoning,
9910
+ rounds: this.rounds
9911
+ };
9912
+ }
9913
+ /** One-line summary for the stderr / log path. */
9914
+ summary() {
9915
+ if (this.isEmpty) return "(no token usage reported)";
9916
+ return `${this.total} total tokens (${this.input} in, ${this.output} out${this.cached ? `, ${this.cached} cached` : ""}${this.reasoning ? `, ${this.reasoning} reasoning` : ""}) across ${this.rounds} round(s)`;
9917
+ }
9918
+ };
9919
+
9879
9920
  // ../baro-orchestrator/src/participants/critic-openai.ts
9880
9921
  function pickModel(name) {
9881
9922
  switch (name) {
@@ -9896,7 +9937,6 @@ function pickModel(name) {
9896
9937
  var CriticOpenAI = class extends BaroParticipant {
9897
9938
  opts;
9898
9939
  model;
9899
- runner = new OpenAIInferenceRunner();
9900
9940
  emissions = /* @__PURE__ */ new Map();
9901
9941
  turnCount = /* @__PURE__ */ new Map();
9902
9942
  pending = /* @__PURE__ */ new Set();
@@ -9967,9 +10007,12 @@ var CriticOpenAI = class extends BaroParticipant {
9967
10007
  const userPrompt = buildEvalPrompt(criteria, resultText);
9968
10008
  const context = ModelContext.create("critic").addContextItem(SystemMessageItem.create(VERDICT_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
9969
10009
  try {
10010
+ const round = await runInferenceRound(context, this.model);
10011
+ const usage = new UsageAccumulator();
10012
+ usage.add(round.usage);
9970
10013
  let assistantText = "";
9971
- for await (const item of this.runner.run(context, this.model)) {
9972
- if (item.type === "message" && item.role === "assistant") {
10014
+ for (const item of round.items) {
10015
+ if (item.type === "message") {
9973
10016
  const json = item.toJSON();
9974
10017
  assistantText += json.content?.[0]?.text ?? "";
9975
10018
  }
@@ -9977,6 +10020,8 @@ var CriticOpenAI = class extends BaroParticipant {
9977
10020
  if (!assistantText.trim()) {
9978
10021
  throw new Error("OpenAI returned empty assistant text");
9979
10022
  }
10023
+ process.stderr.write(`[critic-openai] ${usage.summary()}
10024
+ `);
9980
10025
  const verdictJson = extractVerdictJson(assistantText);
9981
10026
  const parsed = JSON.parse(verdictJson);
9982
10027
  return {
@@ -11266,7 +11311,6 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11266
11311
  spec;
11267
11312
  opts;
11268
11313
  model;
11269
- runner = new OpenAIInferenceRunner();
11270
11314
  tools;
11271
11315
  envRef = null;
11272
11316
  currentPhase = "idle";
@@ -11381,6 +11425,7 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11381
11425
  for (let turn = 1; turn <= this.spec.maxTurns; turn++) {
11382
11426
  const turnResult = await this.runOneTurn(context);
11383
11427
  context = turnResult.context;
11428
+ const usageJson = turnResult.usage.isEmpty ? null : turnResult.usage.toJSON();
11384
11429
  this.envRef?.deliverBusEvent(
11385
11430
  this,
11386
11431
  new AgentResultItem(
@@ -11390,14 +11435,17 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11390
11435
  // session id — not applicable for OpenAI
11391
11436
  !turnResult.success,
11392
11437
  turnResult.assistantText,
11393
- null,
11394
- // usage info — not surfaced this phase
11438
+ usageJson,
11395
11439
  null,
11396
11440
  null,
11397
11441
  null,
11398
11442
  {}
11399
11443
  )
11400
11444
  );
11445
+ process.stderr.write(
11446
+ `[story-openai/${this.spec.id}] turn ${turn}: ${turnResult.usage.summary()}
11447
+ `
11448
+ );
11401
11449
  if (!turnResult.success) {
11402
11450
  this.transition("failed", turnResult.error ?? "turn failed");
11403
11451
  return;
@@ -11428,19 +11476,27 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11428
11476
  let context = initialContext;
11429
11477
  let assistantText = null;
11430
11478
  const perRoundMs = this.opts.perRoundTimeoutSecs * 1e3;
11479
+ const usage = new UsageAccumulator();
11431
11480
  for (let round = 1; round <= this.opts.maxRoundsPerTurn; round++) {
11432
- const ac = new AbortController();
11433
- const timer = setTimeout(() => ac.abort(), perRoundMs);
11434
11481
  const calls = [];
11435
11482
  let sawMessage = false;
11436
11483
  let lastMessageText = null;
11437
11484
  try {
11438
- for await (const item of this.runner.run(context, this.model, ac.signal)) {
11485
+ const roundPromise = runInferenceRound(context, this.model);
11486
+ const timeoutPromise = new Promise(
11487
+ (_, rej) => setTimeout(
11488
+ () => rej(new Error(`round ${round} timed out after ${perRoundMs}ms`)),
11489
+ perRoundMs
11490
+ )
11491
+ );
11492
+ const result = await Promise.race([roundPromise, timeoutPromise]);
11493
+ usage.add(result.usage);
11494
+ for (const item of result.items) {
11439
11495
  if (item.type === "function_call") {
11440
11496
  await this.envRef?.deliverFunctionCall(this, item);
11441
11497
  context = context.addContextItem(item);
11442
11498
  calls.push(item);
11443
- } else if (item.type === "message" && item.role === "assistant") {
11499
+ } else if (item.type === "message") {
11444
11500
  await this.envRef?.deliverModelMessage(this, item);
11445
11501
  context = context.addContextItem(item);
11446
11502
  const json = item.toJSON();
@@ -11452,15 +11508,13 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11452
11508
  }
11453
11509
  }
11454
11510
  } catch (e) {
11455
- clearTimeout(timer);
11456
11511
  return {
11457
11512
  context,
11458
11513
  success: false,
11459
11514
  assistantText,
11515
+ usage,
11460
11516
  error: `inference round ${round} failed: ${e?.message ?? String(e)}`
11461
11517
  };
11462
- } finally {
11463
- clearTimeout(timer);
11464
11518
  }
11465
11519
  for (const call of calls) {
11466
11520
  const tool = this.tools.find((t) => t.name === call.name);
@@ -11473,7 +11527,8 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11473
11527
  return {
11474
11528
  context,
11475
11529
  success: true,
11476
- assistantText: lastMessageText
11530
+ assistantText: lastMessageText,
11531
+ usage
11477
11532
  };
11478
11533
  }
11479
11534
  if (!sawMessage && calls.length === 0) {
@@ -11481,6 +11536,7 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11481
11536
  context,
11482
11537
  success: false,
11483
11538
  assistantText,
11539
+ usage,
11484
11540
  error: `round ${round} returned no items`
11485
11541
  };
11486
11542
  }
@@ -11490,6 +11546,7 @@ var OpenAIStoryAgent = class extends BaroParticipant {
11490
11546
  context,
11491
11547
  success: false,
11492
11548
  assistantText,
11549
+ usage,
11493
11550
  error: `exceeded maxRoundsPerTurn=${this.opts.maxRoundsPerTurn}`
11494
11551
  };
11495
11552
  }
@@ -11852,7 +11909,6 @@ function pickModel3(name) {
11852
11909
  var SurgeonOpenAI = class extends BaroParticipant {
11853
11910
  opts;
11854
11911
  model;
11855
- runner = new OpenAIInferenceRunner();
11856
11912
  replansEmitted = 0;
11857
11913
  pending = /* @__PURE__ */ new Set();
11858
11914
  constructor(opts) {
@@ -11896,9 +11952,12 @@ var SurgeonOpenAI = class extends BaroParticipant {
11896
11952
  const userPrompt = buildSurgeonPrompt(snap, failure);
11897
11953
  const context = ModelContext.create("surgeon").addContextItem(SystemMessageItem.create(SURGEON_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
11898
11954
  try {
11955
+ const round = await runInferenceRound(context, this.model);
11956
+ const usage = new UsageAccumulator();
11957
+ usage.add(round.usage);
11899
11958
  let assistantText = "";
11900
- for await (const item of this.runner.run(context, this.model)) {
11901
- if (item.type === "message" && item.role === "assistant") {
11959
+ for (const item of round.items) {
11960
+ if (item.type === "message") {
11902
11961
  const json = item.toJSON();
11903
11962
  assistantText += json.content?.[0]?.text ?? "";
11904
11963
  }
@@ -11906,6 +11965,8 @@ var SurgeonOpenAI = class extends BaroParticipant {
11906
11965
  if (!assistantText.trim()) {
11907
11966
  throw new Error("OpenAI returned empty assistant text");
11908
11967
  }
11968
+ process.stderr.write(`[surgeon-openai] ${usage.summary()}
11969
+ `);
11909
11970
  const verdictJson = extractJsonObject(assistantText);
11910
11971
  const parsed = JSON.parse(verdictJson);
11911
11972
  if (parsed.action === "abort") return null;