npm - @poncho-ai/harness - Versions diffs - 0.59.3 → 0.59.5 - Mend

@poncho-ai/harness 0.59.3 → 0.59.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/.turbo/turbo-build.log +5 -5
package/CHANGELOG.md +17 -0
package/dist/index.d.ts +11 -0
package/dist/index.js +6 -4
package/package.json +2 -2
package/src/harness.ts +13 -2
package/src/orchestrator/run-conversation-turn.ts +7 -0

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,5 +1,5 @@
-> @poncho-ai/harness@0.59.3 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
+> @poncho-ai/harness@0.59.5 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
 > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
 [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
 [34mCLI[39m tsup v8.5.1
 [34mCLI[39m Target: es2022
 [34mESM[39m Build start
-[32mESM[39m [1mdist/index.js            [22m[32m556.92 KB[39m
+[32mESM[39m [1mdist/index.js            [22m[32m557.00 KB[39m
 [32mESM[39m [1mdist/isolate-F2PPSUL6.js [22m[32m53.82 KB[39m
-[32mESM[39m ⚡️ Build success in 268ms
+[32mESM[39m ⚡️ Build success in 252ms
 [34mDTS[39m Build start
-[32mDTS[39m ⚡️ Build success in 8081ms
-[32mDTS[39m [1mdist/index.d.ts [22m[32m101.11 KB[39m
+[32mDTS[39m ⚡️ Build success in 7698ms
+[32mDTS[39m [1mdist/index.d.ts [22m[32m101.66 KB[39m

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,22 @@
 # @poncho-ai/harness
+## 0.59.5
+### Patch Changes
+- [`d14c390`](https://github.com/cesr/poncho-ai/commit/d14c390ce6830f7446ea7a4e934d2cb76833c455) Thanks [@cesr](https://github.com/cesr)! - `continueFromToolResult` accepts and forwards the per-run `model` override, so approval-checkpoint continuations run on the same model as the checkpointed run instead of re-reading the (possibly concurrently-mutated) agent frontmatter.
+## 0.59.4
+### Patch Changes
+- [`3a25676`](https://github.com/cesr/poncho-ai/commit/3a2567666e1bc8d6650818db76d07765c0250264) Thanks [@cesr](https://github.com/cesr)! - Add a per-run `model` override to `RunInput` (and forward it from `runConversationTurn`'s opts). The override is captured once at run start and wins over the agent definition's `model.name` for every step of the run.
+  Previously the only way to vary the model per run kind on a shared harness was mutating `parsedAgent.frontmatter.model.name` before each run — but the harness re-reads that field at the start of every step, so a concurrent run's mutation flipped an in-flight run's model mid-turn. Besides being the wrong model, the switch invalidated the run's entire Anthropic prompt cache (caches are per-model), observed in production as the same ~104k-token prefix being cache-written twice back-to-back, once per model. Callers should pass `model` in the run input instead of mutating frontmatter.
+- Updated dependencies [[`3a25676`](https://github.com/cesr/poncho-ai/commit/3a2567666e1bc8d6650818db76d07765c0250264)]:
+  - @poncho-ai/sdk@1.15.2
 ## 0.59.3
 ### Patch Changes

package/dist/index.d.ts CHANGED Viewed

@@ -1582,6 +1582,11 @@ declare class AgentHarness {
         /** Emit no telemetry for the continuation run (e.g. resuming an
          *  incognito turn after an approval). */
         suppressTelemetry?: boolean;
+        /** Per-run model override for the continuation run — same semantics as
+         *  `RunInput.model`. Forward the model the checkpointed run was using,
+         *  otherwise the continuation falls back to the agent definition's
+         *  (possibly concurrently-mutated) frontmatter model. */
+        model?: string;
     }): AsyncGenerator<AgentEvent>;
     runToCompletion(input: RunInput): Promise<HarnessRunOutput>;
 }
@@ -2334,6 +2339,12 @@ interface RunConversationTurnOpts {
      * built with an OTLP exporter attached.
      */
     suppressTelemetry?: boolean;
+    /**
+     * Forwarded to `RunInput.model`. Per-run model override, captured once at
+     * run start — safe under concurrent runs on a shared harness, unlike
+     * mutating the parsed agent's frontmatter.
+     */
+    model?: string;
     /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
     onEvent?: (event: AgentEvent) => void | Promise<void>;
 }

package/dist/index.js CHANGED Viewed

@@ -10719,7 +10719,7 @@ ${this.skillFingerprint}`;
       }
       return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
     };
-    const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
+    const resolvedModelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
     const contextWindow = agent.frontmatter.model?.contextWindow ?? getModelContextWindow(resolvedModelName);
     yield pushEvent({
       type: "run:started",
@@ -11086,7 +11086,7 @@ ${textContent}` };
             }
             return [];
           };
-          const modelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
+          const modelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
           if (step === 1) {
             modelLog.item(`${modelName} (provider=${agent.frontmatter.model?.provider ?? "anthropic"})`);
           }
@@ -11921,7 +11921,8 @@ ${this.skillFingerprint}`;
       tenantId: input.tenantId,
       parameters: input.parameters,
       abortSignal: input.abortSignal,
-      suppressTelemetry: input.suppressTelemetry
+      suppressTelemetry: input.suppressTelemetry,
+      model: input.model
     });
   }
   async runToCompletion(input) {
@@ -14323,7 +14324,8 @@ var runConversationTurn = async (opts) => {
         files: opts.files && opts.files.length > 0 ? opts.files : void 0,
         abortSignal: opts.abortSignal,
         disablePromptCache: opts.disablePromptCache,
-        suppressTelemetry: opts.suppressTelemetry
+        suppressTelemetry: opts.suppressTelemetry,
+        model: opts.model
       },
       initialContextTokens: conversation.contextTokens ?? 0,
       initialContextWindow: conversation.contextWindow ?? 0,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@poncho-ai/harness",
-  "version": "0.59.3",
+  "version": "0.59.5",
   "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
   "repository": {
     "type": "git",
@@ -34,7 +34,7 @@
     "mustache": "^4.2.0",
     "yaml": "^2.4.0",
     "zod": "^3.22.0",
-    "@poncho-ai/sdk": "1.15.1"
+    "@poncho-ai/sdk": "1.15.2"
   },
   "peerDependencies": {
     "esbuild": ">=0.17.0",

package/src/harness.ts CHANGED Viewed

@@ -2376,7 +2376,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
       return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
     };
-    const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
+    const resolvedModelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
     const contextWindow =
       agent.frontmatter.model?.contextWindow ?? getModelContextWindow(resolvedModelName);
@@ -2836,7 +2836,12 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
           return [];
         };
-        const modelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
+        // Per-run override wins over frontmatter. Reading frontmatter here is
+        // what made model selection racy: the field is re-read every step, so
+        // a concurrent run's setHarnessModel-style mutation flipped this
+        // run's model mid-turn (and a model switch drops the whole per-model
+        // Anthropic prompt cache).
+        const modelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
         if (step === 1) {
           modelLog.item(`${modelName} (provider=${agent.frontmatter.model?.provider ?? "anthropic"})`);
         }
@@ -3836,6 +3841,11 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
     /** Emit no telemetry for the continuation run (e.g. resuming an
      *  incognito turn after an approval). */
     suppressTelemetry?: boolean;
+    /** Per-run model override for the continuation run — same semantics as
+     *  `RunInput.model`. Forward the model the checkpointed run was using,
+     *  otherwise the continuation falls back to the agent definition's
+     *  (possibly concurrently-mutated) frontmatter model. */
+    model?: string;
   }): AsyncGenerator<AgentEvent> {
     const messages = [...input.messages];
     const lastMsg = messages[messages.length - 1];
@@ -3895,6 +3905,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
       parameters: input.parameters,
       abortSignal: input.abortSignal,
       suppressTelemetry: input.suppressTelemetry,
+      model: input.model,
     });
   }

package/src/orchestrator/run-conversation-turn.ts CHANGED Viewed

@@ -74,6 +74,12 @@ export interface RunConversationTurnOpts {
    * built with an OTLP exporter attached.
    */
   suppressTelemetry?: boolean;
+  /**
+   * Forwarded to `RunInput.model`. Per-run model override, captured once at
+   * run start — safe under concurrent runs on a shared harness, unlike
+   * mutating the parsed agent's frontmatter.
+   */
+  model?: string;
   /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
   onEvent?: (event: AgentEvent) => void | Promise<void>;
 }
@@ -230,6 +236,7 @@ export const runConversationTurn = async (
         abortSignal: opts.abortSignal,
         disablePromptCache: opts.disablePromptCache,
         suppressTelemetry: opts.suppressTelemetry,
+        model: opts.model,
       },
       initialContextTokens: conversation.contextTokens ?? 0,
       initialContextWindow: conversation.contextWindow ?? 0,