@poncho-ai/harness 0.59.3 → 0.59.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.59.3 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.59.4 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 556.92 KB
11
+ ESM dist/index.js 556.98 KB
12
12
  ESM dist/isolate-F2PPSUL6.js 53.82 KB
13
- ESM ⚡️ Build success in 268ms
13
+ ESM ⚡️ Build success in 270ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 8081ms
16
- DTS dist/index.d.ts 101.11 KB
15
+ DTS ⚡️ Build success in 8326ms
16
+ DTS dist/index.d.ts 101.34 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.59.4
4
+
5
+ ### Patch Changes
6
+
7
+ - [`3a25676`](https://github.com/cesr/poncho-ai/commit/3a2567666e1bc8d6650818db76d07765c0250264) Thanks [@cesr](https://github.com/cesr)! - Add a per-run `model` override to `RunInput` (and forward it from `runConversationTurn`'s opts). The override is captured once at run start and wins over the agent definition's `model.name` for every step of the run.
8
+
9
+ Previously the only way to vary the model per run kind on a shared harness was mutating `parsedAgent.frontmatter.model.name` before each run — but the harness re-reads that field at the start of every step, so a concurrent run's mutation flipped an in-flight run's model mid-turn. Besides being the wrong model, the switch invalidated the run's entire Anthropic prompt cache (caches are per-model), observed in production as the same ~104k-token prefix being cache-written twice back-to-back, once per model. Callers should pass `model` in the run input instead of mutating frontmatter.
10
+
11
+ - Updated dependencies [[`3a25676`](https://github.com/cesr/poncho-ai/commit/3a2567666e1bc8d6650818db76d07765c0250264)]:
12
+ - @poncho-ai/sdk@1.15.2
13
+
3
14
  ## 0.59.3
4
15
 
5
16
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -2334,6 +2334,12 @@ interface RunConversationTurnOpts {
2334
2334
  * built with an OTLP exporter attached.
2335
2335
  */
2336
2336
  suppressTelemetry?: boolean;
2337
+ /**
2338
+ * Forwarded to `RunInput.model`. Per-run model override, captured once at
2339
+ * run start — safe under concurrent runs on a shared harness, unlike
2340
+ * mutating the parsed agent's frontmatter.
2341
+ */
2342
+ model?: string;
2337
2343
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
2338
2344
  onEvent?: (event: AgentEvent) => void | Promise<void>;
2339
2345
  }
package/dist/index.js CHANGED
@@ -10719,7 +10719,7 @@ ${this.skillFingerprint}`;
10719
10719
  }
10720
10720
  return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
10721
10721
  };
10722
- const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
10722
+ const resolvedModelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
10723
10723
  const contextWindow = agent.frontmatter.model?.contextWindow ?? getModelContextWindow(resolvedModelName);
10724
10724
  yield pushEvent({
10725
10725
  type: "run:started",
@@ -11086,7 +11086,7 @@ ${textContent}` };
11086
11086
  }
11087
11087
  return [];
11088
11088
  };
11089
- const modelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
11089
+ const modelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
11090
11090
  if (step === 1) {
11091
11091
  modelLog.item(`${modelName} (provider=${agent.frontmatter.model?.provider ?? "anthropic"})`);
11092
11092
  }
@@ -14323,7 +14323,8 @@ var runConversationTurn = async (opts) => {
14323
14323
  files: opts.files && opts.files.length > 0 ? opts.files : void 0,
14324
14324
  abortSignal: opts.abortSignal,
14325
14325
  disablePromptCache: opts.disablePromptCache,
14326
- suppressTelemetry: opts.suppressTelemetry
14326
+ suppressTelemetry: opts.suppressTelemetry,
14327
+ model: opts.model
14327
14328
  },
14328
14329
  initialContextTokens: conversation.contextTokens ?? 0,
14329
14330
  initialContextWindow: conversation.contextWindow ?? 0,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.59.3",
3
+ "version": "0.59.4",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  "mustache": "^4.2.0",
35
35
  "yaml": "^2.4.0",
36
36
  "zod": "^3.22.0",
37
- "@poncho-ai/sdk": "1.15.1"
37
+ "@poncho-ai/sdk": "1.15.2"
38
38
  },
39
39
  "peerDependencies": {
40
40
  "esbuild": ">=0.17.0",
package/src/harness.ts CHANGED
@@ -2376,7 +2376,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2376
2376
  return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
2377
2377
  };
2378
2378
 
2379
- const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
2379
+ const resolvedModelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
2380
2380
  const contextWindow =
2381
2381
  agent.frontmatter.model?.contextWindow ?? getModelContextWindow(resolvedModelName);
2382
2382
 
@@ -2836,7 +2836,12 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2836
2836
  return [];
2837
2837
  };
2838
2838
 
2839
- const modelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
2839
+ // Per-run override wins over frontmatter. Reading frontmatter here is
2840
+ // what made model selection racy: the field is re-read every step, so
2841
+ // a concurrent run's setHarnessModel-style mutation flipped this
2842
+ // run's model mid-turn (and a model switch drops the whole per-model
2843
+ // Anthropic prompt cache).
2844
+ const modelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
2840
2845
  if (step === 1) {
2841
2846
  modelLog.item(`${modelName} (provider=${agent.frontmatter.model?.provider ?? "anthropic"})`);
2842
2847
  }
@@ -74,6 +74,12 @@ export interface RunConversationTurnOpts {
74
74
  * built with an OTLP exporter attached.
75
75
  */
76
76
  suppressTelemetry?: boolean;
77
+ /**
78
+ * Forwarded to `RunInput.model`. Per-run model override, captured once at
79
+ * run start — safe under concurrent runs on a shared harness, unlike
80
+ * mutating the parsed agent's frontmatter.
81
+ */
82
+ model?: string;
77
83
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
78
84
  onEvent?: (event: AgentEvent) => void | Promise<void>;
79
85
  }
@@ -230,6 +236,7 @@ export const runConversationTurn = async (
230
236
  abortSignal: opts.abortSignal,
231
237
  disablePromptCache: opts.disablePromptCache,
232
238
  suppressTelemetry: opts.suppressTelemetry,
239
+ model: opts.model,
233
240
  },
234
241
  initialContextTokens: conversation.contextTokens ?? 0,
235
242
  initialContextWindow: conversation.contextWindow ?? 0,