@poncho-ai/harness 0.59.3 → 0.59.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.59.3 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.59.5 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 556.92 KB
11
+ ESM dist/index.js 557.00 KB
12
12
  ESM dist/isolate-F2PPSUL6.js 53.82 KB
13
- ESM ⚡️ Build success in 268ms
13
+ ESM ⚡️ Build success in 252ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 8081ms
16
- DTS dist/index.d.ts 101.11 KB
15
+ DTS ⚡️ Build success in 7698ms
16
+ DTS dist/index.d.ts 101.66 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.59.5
4
+
5
+ ### Patch Changes
6
+
7
+ - [`d14c390`](https://github.com/cesr/poncho-ai/commit/d14c390ce6830f7446ea7a4e934d2cb76833c455) Thanks [@cesr](https://github.com/cesr)! - `continueFromToolResult` accepts and forwards the per-run `model` override, so approval-checkpoint continuations run on the same model as the checkpointed run instead of re-reading the (possibly concurrently-mutated) agent frontmatter.
8
+
9
+ ## 0.59.4
10
+
11
+ ### Patch Changes
12
+
13
+ - [`3a25676`](https://github.com/cesr/poncho-ai/commit/3a2567666e1bc8d6650818db76d07765c0250264) Thanks [@cesr](https://github.com/cesr)! - Add a per-run `model` override to `RunInput` (and forward it from `runConversationTurn`'s opts). The override is captured once at run start and wins over the agent definition's `model.name` for every step of the run.
14
+
15
+ Previously the only way to vary the model per run kind on a shared harness was mutating `parsedAgent.frontmatter.model.name` before each run — but the harness re-reads that field at the start of every step, so a concurrent run's mutation flipped an in-flight run's model mid-turn. Besides being the wrong model, the switch invalidated the run's entire Anthropic prompt cache (caches are per-model), observed in production as the same ~104k-token prefix being cache-written twice back-to-back, once per model. Callers should pass `model` in the run input instead of mutating frontmatter.
16
+
17
+ - Updated dependencies [[`3a25676`](https://github.com/cesr/poncho-ai/commit/3a2567666e1bc8d6650818db76d07765c0250264)]:
18
+ - @poncho-ai/sdk@1.15.2
19
+
3
20
  ## 0.59.3
4
21
 
5
22
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -1582,6 +1582,11 @@ declare class AgentHarness {
1582
1582
  /** Emit no telemetry for the continuation run (e.g. resuming an
1583
1583
  * incognito turn after an approval). */
1584
1584
  suppressTelemetry?: boolean;
1585
+ /** Per-run model override for the continuation run — same semantics as
1586
+ * `RunInput.model`. Forward the model the checkpointed run was using,
1587
+ * otherwise the continuation falls back to the agent definition's
1588
+ * (possibly concurrently-mutated) frontmatter model. */
1589
+ model?: string;
1585
1590
  }): AsyncGenerator<AgentEvent>;
1586
1591
  runToCompletion(input: RunInput): Promise<HarnessRunOutput>;
1587
1592
  }
@@ -2334,6 +2339,12 @@ interface RunConversationTurnOpts {
2334
2339
  * built with an OTLP exporter attached.
2335
2340
  */
2336
2341
  suppressTelemetry?: boolean;
2342
+ /**
2343
+ * Forwarded to `RunInput.model`. Per-run model override, captured once at
2344
+ * run start — safe under concurrent runs on a shared harness, unlike
2345
+ * mutating the parsed agent's frontmatter.
2346
+ */
2347
+ model?: string;
2337
2348
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
2338
2349
  onEvent?: (event: AgentEvent) => void | Promise<void>;
2339
2350
  }
package/dist/index.js CHANGED
@@ -10719,7 +10719,7 @@ ${this.skillFingerprint}`;
10719
10719
  }
10720
10720
  return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
10721
10721
  };
10722
- const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
10722
+ const resolvedModelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
10723
10723
  const contextWindow = agent.frontmatter.model?.contextWindow ?? getModelContextWindow(resolvedModelName);
10724
10724
  yield pushEvent({
10725
10725
  type: "run:started",
@@ -11086,7 +11086,7 @@ ${textContent}` };
11086
11086
  }
11087
11087
  return [];
11088
11088
  };
11089
- const modelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
11089
+ const modelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
11090
11090
  if (step === 1) {
11091
11091
  modelLog.item(`${modelName} (provider=${agent.frontmatter.model?.provider ?? "anthropic"})`);
11092
11092
  }
@@ -11921,7 +11921,8 @@ ${this.skillFingerprint}`;
11921
11921
  tenantId: input.tenantId,
11922
11922
  parameters: input.parameters,
11923
11923
  abortSignal: input.abortSignal,
11924
- suppressTelemetry: input.suppressTelemetry
11924
+ suppressTelemetry: input.suppressTelemetry,
11925
+ model: input.model
11925
11926
  });
11926
11927
  }
11927
11928
  async runToCompletion(input) {
@@ -14323,7 +14324,8 @@ var runConversationTurn = async (opts) => {
14323
14324
  files: opts.files && opts.files.length > 0 ? opts.files : void 0,
14324
14325
  abortSignal: opts.abortSignal,
14325
14326
  disablePromptCache: opts.disablePromptCache,
14326
- suppressTelemetry: opts.suppressTelemetry
14327
+ suppressTelemetry: opts.suppressTelemetry,
14328
+ model: opts.model
14327
14329
  },
14328
14330
  initialContextTokens: conversation.contextTokens ?? 0,
14329
14331
  initialContextWindow: conversation.contextWindow ?? 0,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.59.3",
3
+ "version": "0.59.5",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  "mustache": "^4.2.0",
35
35
  "yaml": "^2.4.0",
36
36
  "zod": "^3.22.0",
37
- "@poncho-ai/sdk": "1.15.1"
37
+ "@poncho-ai/sdk": "1.15.2"
38
38
  },
39
39
  "peerDependencies": {
40
40
  "esbuild": ">=0.17.0",
package/src/harness.ts CHANGED
@@ -2376,7 +2376,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2376
2376
  return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
2377
2377
  };
2378
2378
 
2379
- const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
2379
+ const resolvedModelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
2380
2380
  const contextWindow =
2381
2381
  agent.frontmatter.model?.contextWindow ?? getModelContextWindow(resolvedModelName);
2382
2382
 
@@ -2836,7 +2836,12 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2836
2836
  return [];
2837
2837
  };
2838
2838
 
2839
- const modelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
2839
+ // Per-run override wins over frontmatter. Reading frontmatter here is
2840
+ // what made model selection racy: the field is re-read every step, so
2841
+ // a concurrent run's setHarnessModel-style mutation flipped this
2842
+ // run's model mid-turn (and a model switch drops the whole per-model
2843
+ // Anthropic prompt cache).
2844
+ const modelName = input.model ?? agent.frontmatter.model?.name ?? "claude-opus-4-5";
2840
2845
  if (step === 1) {
2841
2846
  modelLog.item(`${modelName} (provider=${agent.frontmatter.model?.provider ?? "anthropic"})`);
2842
2847
  }
@@ -3836,6 +3841,11 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3836
3841
  /** Emit no telemetry for the continuation run (e.g. resuming an
3837
3842
  * incognito turn after an approval). */
3838
3843
  suppressTelemetry?: boolean;
3844
+ /** Per-run model override for the continuation run — same semantics as
3845
+ * `RunInput.model`. Forward the model the checkpointed run was using,
3846
+ * otherwise the continuation falls back to the agent definition's
3847
+ * (possibly concurrently-mutated) frontmatter model. */
3848
+ model?: string;
3839
3849
  }): AsyncGenerator<AgentEvent> {
3840
3850
  const messages = [...input.messages];
3841
3851
  const lastMsg = messages[messages.length - 1];
@@ -3895,6 +3905,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3895
3905
  parameters: input.parameters,
3896
3906
  abortSignal: input.abortSignal,
3897
3907
  suppressTelemetry: input.suppressTelemetry,
3908
+ model: input.model,
3898
3909
  });
3899
3910
  }
3900
3911
 
@@ -74,6 +74,12 @@ export interface RunConversationTurnOpts {
74
74
  * built with an OTLP exporter attached.
75
75
  */
76
76
  suppressTelemetry?: boolean;
77
+ /**
78
+ * Forwarded to `RunInput.model`. Per-run model override, captured once at
79
+ * run start — safe under concurrent runs on a shared harness, unlike
80
+ * mutating the parsed agent's frontmatter.
81
+ */
82
+ model?: string;
77
83
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
78
84
  onEvent?: (event: AgentEvent) => void | Promise<void>;
79
85
  }
@@ -230,6 +236,7 @@ export const runConversationTurn = async (
230
236
  abortSignal: opts.abortSignal,
231
237
  disablePromptCache: opts.disablePromptCache,
232
238
  suppressTelemetry: opts.suppressTelemetry,
239
+ model: opts.model,
233
240
  },
234
241
  initialContextTokens: conversation.contextTokens ?? 0,
235
242
  initialContextWindow: conversation.contextWindow ?? 0,