npm - baro-ai - Versions diffs - 0.34.0 → 0.35.0 - Mend

baro-ai 0.34.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli.mjs CHANGED Viewed

@@ -7672,17 +7672,6 @@ var AgenticEnvironment = class {
     this.isActive = false;
   }
 };
-var OpenAIInferenceRunner = class {
-  constructor() {
-    this.runtime = new OpenAIResponses();
-  }
-  async *run(context, model, signal) {
-    const response = await this.runtime.infer(new InferenceRequest(model, context));
-    for (const item of response.contextItems) {
-      yield item;
-    }
-  }
-};
 var Gpt55 = class {
   constructor() {
     this.specification = {
@@ -9876,6 +9865,58 @@ function extractVerdictJson(text) {
   throw new Error(`unbalanced JSON object in critic response: ${trimmed.slice(0, 200)}`);
 }
+// ../baro-orchestrator/src/planning/openai-runtime.ts
+var runtime = new OpenAIResponses();
+async function runInferenceRound(context, model) {
+  const response = await runtime.infer(new InferenceRequest(model, context));
+  return {
+    items: response.contextItems,
+    usage: response.tokenUsage
+  };
+}
+var UsageAccumulator = class {
+  input = 0;
+  output = 0;
+  total = 0;
+  cached = 0;
+  reasoning = 0;
+  rounds = 0;
+  add(usage) {
+    if (!usage) return;
+    this.rounds += 1;
+    this.input += usage.inputTokens ?? 0;
+    this.output += usage.outputTokens ?? 0;
+    this.total += usage.totalTokens ?? 0;
+    this.cached += usage.inputTokenDetails?.cached_tokens ?? 0;
+    this.reasoning += usage.outputTokenDetails?.reasoning_tokens ?? 0;
+  }
+  get isEmpty() {
+    return this.rounds === 0;
+  }
+  /**
+   * Plain-object snapshot suitable for embedding in
+   * `AgentResultItem.usage` (which is typed `any` to allow per-
+   * provider shapes). Keys are snake_case to line up with what the
+   * Claude side's stream-json mapper produces from Anthropic
+   * usage frames.
+   */
+  toJSON() {
+    return {
+      input_tokens: this.input,
+      output_tokens: this.output,
+      total_tokens: this.total,
+      cached_input_tokens: this.cached,
+      reasoning_tokens: this.reasoning,
+      rounds: this.rounds
+    };
+  }
+  /** One-line summary for the stderr / log path. */
+  summary() {
+    if (this.isEmpty) return "(no token usage reported)";
+    return `${this.total} total tokens (${this.input} in, ${this.output} out${this.cached ? `, ${this.cached} cached` : ""}${this.reasoning ? `, ${this.reasoning} reasoning` : ""}) across ${this.rounds} round(s)`;
+  }
+};
 // ../baro-orchestrator/src/participants/critic-openai.ts
 function pickModel(name) {
   switch (name) {
@@ -9896,7 +9937,6 @@ function pickModel(name) {
 var CriticOpenAI = class extends BaroParticipant {
   opts;
   model;
-  runner = new OpenAIInferenceRunner();
   emissions = /* @__PURE__ */ new Map();
   turnCount = /* @__PURE__ */ new Map();
   pending = /* @__PURE__ */ new Set();
@@ -9967,9 +10007,12 @@ var CriticOpenAI = class extends BaroParticipant {
     const userPrompt = buildEvalPrompt(criteria, resultText);
     const context = ModelContext.create("critic").addContextItem(SystemMessageItem.create(VERDICT_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
     try {
+      const round = await runInferenceRound(context, this.model);
+      const usage = new UsageAccumulator();
+      usage.add(round.usage);
       let assistantText = "";
-      for await (const item of this.runner.run(context, this.model)) {
-        if (item.type === "message" && item.role === "assistant") {
+      for (const item of round.items) {
+        if (item.type === "message") {
           const json = item.toJSON();
           assistantText += json.content?.[0]?.text ?? "";
         }
@@ -9977,6 +10020,8 @@ var CriticOpenAI = class extends BaroParticipant {
       if (!assistantText.trim()) {
         throw new Error("OpenAI returned empty assistant text");
       }
+      process.stderr.write(`[critic-openai] ${usage.summary()}
+`);
       const verdictJson = extractVerdictJson(assistantText);
       const parsed = JSON.parse(verdictJson);
       return {
@@ -11266,7 +11311,6 @@ var OpenAIStoryAgent = class extends BaroParticipant {
   spec;
   opts;
   model;
-  runner = new OpenAIInferenceRunner();
   tools;
   envRef = null;
   currentPhase = "idle";
@@ -11381,6 +11425,7 @@ var OpenAIStoryAgent = class extends BaroParticipant {
     for (let turn = 1; turn <= this.spec.maxTurns; turn++) {
       const turnResult = await this.runOneTurn(context);
       context = turnResult.context;
+      const usageJson = turnResult.usage.isEmpty ? null : turnResult.usage.toJSON();
       this.envRef?.deliverBusEvent(
         this,
         new AgentResultItem(
@@ -11390,14 +11435,17 @@ var OpenAIStoryAgent = class extends BaroParticipant {
           // session id — not applicable for OpenAI
           !turnResult.success,
           turnResult.assistantText,
-          null,
-          // usage info — not surfaced this phase
+          usageJson,
           null,
           null,
           null,
           {}
         )
       );
+      process.stderr.write(
+        `[story-openai/${this.spec.id}] turn ${turn}: ${turnResult.usage.summary()}
+`
+      );
       if (!turnResult.success) {
         this.transition("failed", turnResult.error ?? "turn failed");
         return;
@@ -11428,19 +11476,27 @@ var OpenAIStoryAgent = class extends BaroParticipant {
     let context = initialContext;
     let assistantText = null;
     const perRoundMs = this.opts.perRoundTimeoutSecs * 1e3;
+    const usage = new UsageAccumulator();
     for (let round = 1; round <= this.opts.maxRoundsPerTurn; round++) {
-      const ac = new AbortController();
-      const timer = setTimeout(() => ac.abort(), perRoundMs);
       const calls = [];
       let sawMessage = false;
       let lastMessageText = null;
       try {
-        for await (const item of this.runner.run(context, this.model, ac.signal)) {
+        const roundPromise = runInferenceRound(context, this.model);
+        const timeoutPromise = new Promise(
+          (_, rej) => setTimeout(
+            () => rej(new Error(`round ${round} timed out after ${perRoundMs}ms`)),
+            perRoundMs
+          )
+        );
+        const result = await Promise.race([roundPromise, timeoutPromise]);
+        usage.add(result.usage);
+        for (const item of result.items) {
           if (item.type === "function_call") {
             await this.envRef?.deliverFunctionCall(this, item);
             context = context.addContextItem(item);
             calls.push(item);
-          } else if (item.type === "message" && item.role === "assistant") {
+          } else if (item.type === "message") {
             await this.envRef?.deliverModelMessage(this, item);
             context = context.addContextItem(item);
             const json = item.toJSON();
@@ -11452,15 +11508,13 @@ var OpenAIStoryAgent = class extends BaroParticipant {
           }
         }
       } catch (e) {
-        clearTimeout(timer);
         return {
           context,
           success: false,
           assistantText,
+          usage,
           error: `inference round ${round} failed: ${e?.message ?? String(e)}`
         };
-      } finally {
-        clearTimeout(timer);
       }
       for (const call of calls) {
         const tool = this.tools.find((t) => t.name === call.name);
@@ -11473,7 +11527,8 @@ var OpenAIStoryAgent = class extends BaroParticipant {
         return {
           context,
           success: true,
-          assistantText: lastMessageText
+          assistantText: lastMessageText,
+          usage
         };
       }
       if (!sawMessage && calls.length === 0) {
@@ -11481,6 +11536,7 @@ var OpenAIStoryAgent = class extends BaroParticipant {
           context,
           success: false,
           assistantText,
+          usage,
           error: `round ${round} returned no items`
         };
       }
@@ -11490,6 +11546,7 @@ var OpenAIStoryAgent = class extends BaroParticipant {
       context,
       success: false,
       assistantText,
+      usage,
       error: `exceeded maxRoundsPerTurn=${this.opts.maxRoundsPerTurn}`
     };
   }
@@ -11852,7 +11909,6 @@ function pickModel3(name) {
 var SurgeonOpenAI = class extends BaroParticipant {
   opts;
   model;
-  runner = new OpenAIInferenceRunner();
   replansEmitted = 0;
   pending = /* @__PURE__ */ new Set();
   constructor(opts) {
@@ -11896,9 +11952,12 @@ var SurgeonOpenAI = class extends BaroParticipant {
     const userPrompt = buildSurgeonPrompt(snap, failure);
     const context = ModelContext.create("surgeon").addContextItem(SystemMessageItem.create(SURGEON_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
     try {
+      const round = await runInferenceRound(context, this.model);
+      const usage = new UsageAccumulator();
+      usage.add(round.usage);
       let assistantText = "";
-      for await (const item of this.runner.run(context, this.model)) {
-        if (item.type === "message" && item.role === "assistant") {
+      for (const item of round.items) {
+        if (item.type === "message") {
           const json = item.toJSON();
           assistantText += json.content?.[0]?.text ?? "";
         }
@@ -11906,6 +11965,8 @@ var SurgeonOpenAI = class extends BaroParticipant {
       if (!assistantText.trim()) {
         throw new Error("OpenAI returned empty assistant text");
       }
+      process.stderr.write(`[surgeon-openai] ${usage.summary()}
+`);
       const verdictJson = extractJsonObject(assistantText);
       const parsed = JSON.parse(verdictJson);
       if (parsed.action === "abort") return null;