npm - @poncho-ai/sdk - Versions diffs - 1.15.0 → 1.15.2 - Mend

@poncho-ai/sdk 1.15.0 → 1.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,5 +1,5 @@
-> @poncho-ai/sdk@1.15.0 build /home/runner/work/poncho-ai/poncho-ai/packages/sdk
+> @poncho-ai/sdk@1.15.2 build /home/runner/work/poncho-ai/poncho-ai/packages/sdk
 > tsup src/index.ts --format esm --dts
 [34mCLI[39m Building entry: src/index.ts
@@ -8,7 +8,7 @@
 [34mCLI[39m Target: es2022
 [34mESM[39m Build start
 [32mESM[39m [1mdist/index.js [22m[32m17.24 KB[39m
-[32mESM[39m ⚡️ Build success in 19ms
+[32mESM[39m ⚡️ Build success in 21ms
 [34mDTS[39m Build start
-[32mDTS[39m ⚡️ Build success in 1324ms
-[32mDTS[39m [1mdist/index.d.ts [22m[32m31.27 KB[39m
+[32mDTS[39m ⚡️ Build success in 1365ms
+[32mDTS[39m [1mdist/index.d.ts [22m[32m32.36 KB[39m

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,30 @@
 # @poncho-ai/sdk
+## 1.15.2
+### Patch Changes
+- [`3a25676`](https://github.com/cesr/poncho-ai/commit/3a2567666e1bc8d6650818db76d07765c0250264) Thanks [@cesr](https://github.com/cesr)! - Add a per-run `model` override to `RunInput` (and forward it from `runConversationTurn`'s opts). The override is captured once at run start and wins over the agent definition's `model.name` for every step of the run.
+  Previously the only way to vary the model per run kind on a shared harness was mutating `parsedAgent.frontmatter.model.name` before each run — but the harness re-reads that field at the start of every step, so a concurrent run's mutation flipped an in-flight run's model mid-turn. Besides being the wrong model, the switch invalidated the run's entire Anthropic prompt cache (caches are per-model), observed in production as the same ~104k-token prefix being cache-written twice back-to-back, once per model. Callers should pass `model` in the run input instead of mutating frontmatter.
+## 1.15.1
+### Patch Changes
+- [`299f574`](https://github.com/cesr/poncho-ai/commit/299f574a2f2f0d4873f42bbcffdf604e9cc4c29c) Thanks [@cesr](https://github.com/cesr)! - Mark in-flight assistant drafts with `metadata.incomplete = true`.
+  The orchestrator's per-step draft persist (`persistDraft`) and the
+  approval/device checkpoint and continuation writes now stamp the trailing
+  assistant message `metadata.incomplete = true`; the three terminal writes
+  (normal finalize, cancelled, errored) clear it. This lets a consumer that
+  reconciles a persisted snapshot against a live event stream (e.g. a
+  WebSocket layer) strip the in-flight draft from the authoritative snapshot
+  and rebuild that turn from the event log instead — so the snapshot and the
+  replayed events never both carry the in-flight turn, eliminating
+  reconnect-time duplication. Additive + backwards-compatible: consumers that
+  ignore the flag are unaffected.
 ## 1.15.0
 ### Minor Changes

package/dist/index.d.ts CHANGED Viewed

@@ -680,6 +680,14 @@ interface Message {
             content: string | string[];
         }>;
         isCompactionSummary?: boolean;
+        /** True while this assistant message is an in-flight DRAFT (the turn
+         *  hasn't finished). Set by the orchestrator's per-step draft persist and
+         *  cleared at finalize. Consumers that reconcile a persisted snapshot with
+         *  a live event stream (e.g. PonchOS's WS layer) strip `incomplete`
+         *  messages from the snapshot and rebuild the in-flight turn from the
+         *  event log instead — so the two never both carry it (no reconnect
+         *  duplication). */
+        incomplete?: boolean;
     };
 }
 /** Extract the text content from a message, regardless of content format. */
@@ -763,6 +771,17 @@ interface RunInput {
      * will hit the cache before the 5-min TTL expires.
      */
     disablePromptCache?: boolean;
+    /**
+     * Model name override for this run, captured once at run start. Takes
+     * precedence over the agent definition's `model.name` for every step of
+     * the run. Use this instead of mutating the parsed agent's frontmatter
+     * when one harness instance serves runs that need different models
+     * (e.g. user turns vs cron jobs) — a frontmatter mutation made while
+     * another run is in flight changes that run's model mid-turn, and the
+     * model switch invalidates its entire Anthropic prompt cache (caches
+     * are per-model).
+     */
+    model?: string;
     /** Scope this run to a specific tenant. */
     tenantId?: string;
     /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@poncho-ai/sdk",
-  "version": "1.15.0",
+  "version": "1.15.2",
   "description": "Core types and utilities for building Poncho skills",
   "repository": {
     "type": "git",

package/src/index.ts CHANGED Viewed

@@ -38,6 +38,14 @@ export interface Message {
     toolActivity?: string[];
     sections?: Array<{ type: "text" | "tools"; content: string | string[] }>;
     isCompactionSummary?: boolean;
+    /** True while this assistant message is an in-flight DRAFT (the turn
+     *  hasn't finished). Set by the orchestrator's per-step draft persist and
+     *  cleared at finalize. Consumers that reconcile a persisted snapshot with
+     *  a live event stream (e.g. PonchOS's WS layer) strip `incomplete`
+     *  messages from the snapshot and rebuild the in-flight turn from the
+     *  event log instead — so the two never both carry it (no reconnect
+     *  duplication). */
+    incomplete?: boolean;
   };
 }
@@ -144,6 +152,17 @@ export interface RunInput {
    * will hit the cache before the 5-min TTL expires.
    */
   disablePromptCache?: boolean;
+  /**
+   * Model name override for this run, captured once at run start. Takes
+   * precedence over the agent definition's `model.name` for every step of
+   * the run. Use this instead of mutating the parsed agent's frontmatter
+   * when one harness instance serves runs that need different models
+   * (e.g. user turns vs cron jobs) — a frontmatter mutation made while
+   * another run is in flight changes that run's model mid-turn, and the
+   * model switch invalidates its entire Anthropic prompt cache (caches
+   * are per-model).
+   */
+  model?: string;
   /** Scope this run to a specific tenant. */
   tenantId?: string;
   /**