@poncho-ai/sdk 1.11.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/sdk@1.11.0 build /home/runner/work/poncho-ai/poncho-ai/packages/sdk
2
+ > @poncho-ai/sdk@1.13.0 build /home/runner/work/poncho-ai/poncho-ai/packages/sdk
3
3
  > tsup src/index.ts --format esm --dts
4
4
 
5
5
  CLI Building entry: src/index.ts
@@ -10,5 +10,5 @@
10
10
  ESM dist/index.js 17.24 KB
11
11
  ESM ⚡️ Build success in 21ms
12
12
  DTS Build start
13
- DTS ⚡️ Build success in 1385ms
14
- DTS dist/index.d.ts 29.09 KB
13
+ DTS ⚡️ Build success in 1513ms
14
+ DTS dist/index.d.ts 29.81 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,54 @@
1
1
  # @poncho-ai/sdk
2
2
 
3
+ ## 1.13.0
4
+
5
+ ### Minor Changes
6
+
7
+ - [`773f113`](https://github.com/cesr/poncho-ai/commit/773f11309e2410d6c5e17af0fde17425953105f2) Thanks [@cesr](https://github.com/cesr)! - harness: add a per-run `suppressTelemetry` flag so one harness can serve both telemetry-on and telemetry-off runs.
8
+
9
+ Telemetry was effectively an instance-level property: whether the OTLP exporter is attached is decided at construction, so a host that wants telemetry-off runs (e.g. incognito) had to build and maintain a _second_ harness instance with no exporter — duplicating all per-harness provisioning (tool registration, subagent manager, etc.) and risking drift between the two.
10
+
11
+ `RunInput.suppressTelemetry` lets a single harness — built once, with the exporter attached — emit nothing for a given run: the `invoke_agent` root span, the `execute_tool` spans, and the AI-SDK spans are all gated on `!input.suppressTelemetry`. Hosts can now keep one harness per user and pass `suppressTelemetry: true` per run instead of routing to a parallel exporter-less instance.
12
+
13
+ ## 1.12.0
14
+
15
+ ### Minor Changes
16
+
17
+ - [#118](https://github.com/cesr/poncho-ai/pull/118) [`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8) Thanks [@cesr](https://github.com/cesr)! - harness: 1h static system-prompt cache breakpoint + per-run cache kill-switch
18
+
19
+ Two related changes to Anthropic prompt caching:
20
+
21
+ **1-hour static system-prompt breakpoint.** The harness now splits the
22
+ assembled system prompt into a static portion (agent body + skill
23
+ context + browser/fs/isolate context — stable across many turns and
24
+ jobs within an hour) and a dynamic tail (memory, todos, time). On
25
+ Anthropic models, these are sent as two `role: "system"` messages with
26
+ `cacheControl: { ttl: "1h" }` on the static block. The existing 5-min
27
+ tail breakpoint on the last user/assistant/tool message is retained.
28
+
29
+ This lets later turns and job runs read ~95% of the system prompt at
30
+ 0.1× (cache read) instead of paying 1× whenever the 5-min tail cache
31
+ has expired — the previous setup only cached for 5 minutes via the
32
+ tail breakpoint. Within-user cross-conversation and interactive-vs-job
33
+ all share the static cache.
34
+
35
+ **Per-run cache kill-switch.** Added `RunInput.disablePromptCache?:
36
+ boolean` (also exposed on `RunConversationTurnOpts.disablePromptCache`,
37
+ forwarded into `runInput`). When set, the harness skips the 5-min tail
38
+ breakpoint for that run. The 1-hour static breakpoint is still
39
+ applied — the run still benefits from reading the shared static cache,
40
+ just doesn't write a new tail entry that won't be read before TTL.
41
+
42
+ Intended for one-shot programmatic invocations (cron-fired jobs,
43
+ subagent dispatch) where no follow-up turn is coming within the 5-min
44
+ TTL window, so the 1.25× write surcharge would be pure waste.
45
+
46
+ Non-Anthropic providers fall through to the previous single concatenated
47
+ `system:` string with no cache control — those providers auto-cache.
48
+
49
+ Internal: `isAnthropicModel` is now exported from `prompt-cache.ts`
50
+ for reuse at the streamText site.
51
+
3
52
  ## 1.11.0
4
53
 
5
54
  ### Minor Changes
package/dist/index.d.ts CHANGED
@@ -747,8 +747,23 @@ interface RunInput {
747
747
  conversationId?: string;
748
748
  /** When true, ignores PONCHO_MAX_DURATION soft deadline (used for background subagent runs). */
749
749
  disableSoftDeadline?: boolean;
750
+ /**
751
+ * When true, skip the Anthropic prompt-cache breakpoint for this run.
752
+ * Use for one-shot runs with no follow-up turn coming (e.g. cron-fired
753
+ * jobs) — the 1.25× write surcharge is pure waste when no later read
754
+ * will hit the cache before the 5-min TTL expires.
755
+ */
756
+ disablePromptCache?: boolean;
750
757
  /** Scope this run to a specific tenant. */
751
758
  tenantId?: string;
759
+ /**
760
+ * When true, emit no telemetry for this run — no `invoke_agent` /
761
+ * `execute_tool` spans and no AI-SDK spans, even on a harness built with an
762
+ * OTLP exporter attached. Lets a single harness serve both telemetry-on and
763
+ * telemetry-off (e.g. incognito) runs, instead of needing a separate
764
+ * exporter-less harness instance per mode.
765
+ */
766
+ suppressTelemetry?: boolean;
752
767
  }
753
768
  interface TokenUsage {
754
769
  input: number;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/sdk",
3
- "version": "1.11.0",
3
+ "version": "1.13.0",
4
4
  "description": "Core types and utilities for building Poncho skills",
5
5
  "repository": {
6
6
  "type": "git",
package/src/index.ts CHANGED
@@ -128,8 +128,23 @@ export interface RunInput {
128
128
  conversationId?: string;
129
129
  /** When true, ignores PONCHO_MAX_DURATION soft deadline (used for background subagent runs). */
130
130
  disableSoftDeadline?: boolean;
131
+ /**
132
+ * When true, skip the Anthropic prompt-cache breakpoint for this run.
133
+ * Use for one-shot runs with no follow-up turn coming (e.g. cron-fired
134
+ * jobs) — the 1.25× write surcharge is pure waste when no later read
135
+ * will hit the cache before the 5-min TTL expires.
136
+ */
137
+ disablePromptCache?: boolean;
131
138
  /** Scope this run to a specific tenant. */
132
139
  tenantId?: string;
140
+ /**
141
+ * When true, emit no telemetry for this run — no `invoke_agent` /
142
+ * `execute_tool` spans and no AI-SDK spans, even on a harness built with an
143
+ * OTLP exporter attached. Lets a single harness serve both telemetry-on and
144
+ * telemetry-off (e.g. incognito) runs, instead of needing a separate
145
+ * exporter-less harness instance per mode.
146
+ */
147
+ suppressTelemetry?: boolean;
133
148
  }
134
149
 
135
150
  export interface TokenUsage {