@poncho-ai/sdk 1.15.0 → 1.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/sdk@1.15.0 build /home/runner/work/poncho-ai/poncho-ai/packages/sdk
2
+ > @poncho-ai/sdk@1.15.2 build /home/runner/work/poncho-ai/poncho-ai/packages/sdk
3
3
  > tsup src/index.ts --format esm --dts
4
4
 
5
5
  CLI Building entry: src/index.ts
@@ -8,7 +8,7 @@
8
8
  CLI Target: es2022
9
9
  ESM Build start
10
10
  ESM dist/index.js 17.24 KB
11
- ESM ⚡️ Build success in 19ms
11
+ ESM ⚡️ Build success in 21ms
12
12
  DTS Build start
13
- DTS ⚡️ Build success in 1324ms
14
- DTS dist/index.d.ts 31.27 KB
13
+ DTS ⚡️ Build success in 1365ms
14
+ DTS dist/index.d.ts 32.36 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,30 @@
1
1
  # @poncho-ai/sdk
2
2
 
3
+ ## 1.15.2
4
+
5
+ ### Patch Changes
6
+
7
+ - [`3a25676`](https://github.com/cesr/poncho-ai/commit/3a2567666e1bc8d6650818db76d07765c0250264) Thanks [@cesr](https://github.com/cesr)! - Add a per-run `model` override to `RunInput` (and forward it from `runConversationTurn`'s opts). The override is captured once at run start and wins over the agent definition's `model.name` for every step of the run.
8
+
9
+ Previously the only way to vary the model per run kind on a shared harness was mutating `parsedAgent.frontmatter.model.name` before each run — but the harness re-reads that field at the start of every step, so a concurrent run's mutation flipped an in-flight run's model mid-turn. Besides being the wrong model, the switch invalidated the run's entire Anthropic prompt cache (caches are per-model), observed in production as the same ~104k-token prefix being cache-written twice back-to-back, once per model. Callers should pass `model` in the run input instead of mutating frontmatter.
10
+
11
+ ## 1.15.1
12
+
13
+ ### Patch Changes
14
+
15
+ - [`299f574`](https://github.com/cesr/poncho-ai/commit/299f574a2f2f0d4873f42bbcffdf604e9cc4c29c) Thanks [@cesr](https://github.com/cesr)! - Mark in-flight assistant drafts with `metadata.incomplete = true`.
16
+
17
+ The orchestrator's per-step draft persist (`persistDraft`) and the
18
+ approval/device checkpoint and continuation writes now stamp the trailing
19
+ assistant message `metadata.incomplete = true`; the three terminal writes
20
+ (normal finalize, cancelled, errored) clear it. This lets a consumer that
21
+ reconciles a persisted snapshot against a live event stream (e.g. a
22
+ WebSocket layer) strip the in-flight draft from the authoritative snapshot
23
+ and rebuild that turn from the event log instead — so the snapshot and the
24
+ replayed events never both carry the in-flight turn, eliminating
25
+ reconnect-time duplication. Additive + backwards-compatible: consumers that
26
+ ignore the flag are unaffected.
27
+
3
28
  ## 1.15.0
4
29
 
5
30
  ### Minor Changes
package/dist/index.d.ts CHANGED
@@ -680,6 +680,14 @@ interface Message {
680
680
  content: string | string[];
681
681
  }>;
682
682
  isCompactionSummary?: boolean;
683
+ /** True while this assistant message is an in-flight DRAFT (the turn
684
+ * hasn't finished). Set by the orchestrator's per-step draft persist and
685
+ * cleared at finalize. Consumers that reconcile a persisted snapshot with
686
+ * a live event stream (e.g. PonchOS's WS layer) strip `incomplete`
687
+ * messages from the snapshot and rebuild the in-flight turn from the
688
+ * event log instead — so the two never both carry it (no reconnect
689
+ * duplication). */
690
+ incomplete?: boolean;
683
691
  };
684
692
  }
685
693
  /** Extract the text content from a message, regardless of content format. */
@@ -763,6 +771,17 @@ interface RunInput {
763
771
  * will hit the cache before the 5-min TTL expires.
764
772
  */
765
773
  disablePromptCache?: boolean;
774
+ /**
775
+ * Model name override for this run, captured once at run start. Takes
776
+ * precedence over the agent definition's `model.name` for every step of
777
+ * the run. Use this instead of mutating the parsed agent's frontmatter
778
+ * when one harness instance serves runs that need different models
779
+ * (e.g. user turns vs cron jobs) — a frontmatter mutation made while
780
+ * another run is in flight changes that run's model mid-turn, and the
781
+ * model switch invalidates its entire Anthropic prompt cache (caches
782
+ * are per-model).
783
+ */
784
+ model?: string;
766
785
  /** Scope this run to a specific tenant. */
767
786
  tenantId?: string;
768
787
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/sdk",
3
- "version": "1.15.0",
3
+ "version": "1.15.2",
4
4
  "description": "Core types and utilities for building Poncho skills",
5
5
  "repository": {
6
6
  "type": "git",
package/src/index.ts CHANGED
@@ -38,6 +38,14 @@ export interface Message {
38
38
  toolActivity?: string[];
39
39
  sections?: Array<{ type: "text" | "tools"; content: string | string[] }>;
40
40
  isCompactionSummary?: boolean;
41
+ /** True while this assistant message is an in-flight DRAFT (the turn
42
+ * hasn't finished). Set by the orchestrator's per-step draft persist and
43
+ * cleared at finalize. Consumers that reconcile a persisted snapshot with
44
+ * a live event stream (e.g. PonchOS's WS layer) strip `incomplete`
45
+ * messages from the snapshot and rebuild the in-flight turn from the
46
+ * event log instead — so the two never both carry it (no reconnect
47
+ * duplication). */
48
+ incomplete?: boolean;
41
49
  };
42
50
  }
43
51
 
@@ -144,6 +152,17 @@ export interface RunInput {
144
152
  * will hit the cache before the 5-min TTL expires.
145
153
  */
146
154
  disablePromptCache?: boolean;
155
+ /**
156
+ * Model name override for this run, captured once at run start. Takes
157
+ * precedence over the agent definition's `model.name` for every step of
158
+ * the run. Use this instead of mutating the parsed agent's frontmatter
159
+ * when one harness instance serves runs that need different models
160
+ * (e.g. user turns vs cron jobs) — a frontmatter mutation made while
161
+ * another run is in flight changes that run's model mid-turn, and the
162
+ * model switch invalidates its entire Anthropic prompt cache (caches
163
+ * are per-model).
164
+ */
165
+ model?: string;
147
166
  /** Scope this run to a specific tenant. */
148
167
  tenantId?: string;
149
168
  /**