@poncho-ai/harness 0.44.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.44.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.46.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 516.00 KB
11
+ ESM dist/index.js 525.40 KB
12
12
  ESM dist/isolate-VY35DGLM.js 49.43 KB
13
- ESM ⚡️ Build success in 209ms
13
+ ESM ⚡️ Build success in 214ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 6795ms
16
- DTS dist/index.d.ts 83.51 KB
15
+ DTS ⚡️ Build success in 7043ms
16
+ DTS dist/index.d.ts 85.30 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,92 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.46.0
4
+
5
+ ### Minor Changes
6
+
7
+ - [#118](https://github.com/cesr/poncho-ai/pull/118) [`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8) Thanks [@cesr](https://github.com/cesr)! - harness: 1h static system-prompt cache breakpoint + per-run cache kill-switch
8
+
9
+ Two related changes to Anthropic prompt caching:
10
+
11
+ **1-hour static system-prompt breakpoint.** The harness now splits the
12
+ assembled system prompt into a static portion (agent body + skill
13
+ context + browser/fs/isolate context — stable across many turns and
14
+ jobs within an hour) and a dynamic tail (memory, todos, time). On
15
+ Anthropic models, these are sent as two `role: "system"` messages with
16
+ `cacheControl: { ttl: "1h" }` on the static block. The existing 5-min
17
+ tail breakpoint on the last user/assistant/tool message is retained.
18
+
19
+ This lets later turns and job runs read ~95% of the system prompt at
20
+ 0.1× (cache read) instead of paying 1× whenever the 5-min tail cache
21
+ has expired — the previous setup only cached for 5 minutes via the
22
+ tail breakpoint. Within-user cross-conversation and interactive-vs-job
23
+ all share the static cache.
24
+
25
+ **Per-run cache kill-switch.** Added `RunInput.disablePromptCache?:
26
+ boolean` (also exposed on `RunConversationTurnOpts.disablePromptCache`,
27
+ forwarded into `runInput`). When set, the harness skips the 5-min tail
28
+ breakpoint for that run. The 1-hour static breakpoint is still
29
+ applied — the run still benefits from reading the shared static cache,
30
+ just doesn't write a new tail entry that won't be read before TTL.
31
+
32
+ Intended for one-shot programmatic invocations (cron-fired jobs,
33
+ subagent dispatch) where no follow-up turn is coming within the 5-min
34
+ TTL window, so the 1.25× write surcharge would be pure waste.
35
+
36
+ Non-Anthropic providers fall through to the previous single concatenated
37
+ `system:` string with no cache control — those providers auto-cache.
38
+
39
+ Internal: `isAnthropicModel` is now exported from `prompt-cache.ts`
40
+ for reuse at the streamText site.
41
+
42
+ ### Patch Changes
43
+
44
+ - Updated dependencies [[`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8)]:
45
+ - @poncho-ai/sdk@1.12.0
46
+
47
+ ## 0.45.0
48
+
49
+ ### Minor Changes
50
+
51
+ - [`1adaae2`](https://github.com/cesr/poncho-ai/commit/1adaae2d4cc55800f01d602f2a7d6ecc65031443) Thanks [@cesr](https://github.com/cesr)! - harness: device-dispatch mode for tools that execute on a connected client
52
+
53
+ Tools can now be marked `dispatch: "device"` on `loadedConfig.tools`. When
54
+ the model calls such a tool the dispatcher pauses the run, emits a new
55
+ `tool:device:required` event, and checkpoints with the new
56
+ `kind: "device"` discriminator on `pendingApprovals` — same plumbing as
57
+ the approval flow, different trigger and different resume payload.
58
+ Consumers (e.g. PonchOS for iOS device tools) drive the external
59
+ execution and feed the result back via `continueFromToolResult`.
60
+
61
+ Approval can be combined: `{access: "approval", dispatch: "device"}`
62
+ yields the approval card first, then on resume falls through to the
63
+ device-required event. The wire vocabulary for approvals
64
+ (`approvalId` etc.) is unchanged; the `pendingApprovals` column /
65
+ field name stays.
66
+
67
+ `ToolAccess` is broadened to accept both the legacy string `"approval"`
68
+ and the new `{access?, dispatch?}` object form. Existing configs keep
69
+ working unchanged.
70
+
71
+ - [`6132601`](https://github.com/cesr/poncho-ai/commit/613260159cdd80fcc02d68aa58ad52d4465bcede) Thanks [@cesr](https://github.com/cesr)! - harness: add `read_subagent` tool for fetching subagent transcripts
72
+
73
+ Parent agents can now read a spawned subagent's conversation directly
74
+ instead of using `message_subagent` to ask it to repeat its work. The
75
+ new tool accepts a `mode` parameter — `"final"` (last assistant message,
76
+ default), `"assistant"` (assistant messages only), or `"full"` (every
77
+ message including tool calls and results) — plus optional `since_index`
78
+ and `max_messages` for paging long transcripts.
79
+
80
+ Access is restricted to direct children: a parent can only read
81
+ transcripts of subagents whose `parentConversationId` matches its own
82
+ conversation. The `SubagentManager` interface gains a corresponding
83
+ `getTranscript` method.
84
+
85
+ ### Patch Changes
86
+
87
+ - Updated dependencies [[`1adaae2`](https://github.com/cesr/poncho-ai/commit/1adaae2d4cc55800f01d602f2a7d6ecc65031443)]:
88
+ - @poncho-ai/sdk@1.11.0
89
+
3
90
  ## 0.44.0
4
91
 
5
92
  ### Minor Changes
package/dist/index.d.ts CHANGED
@@ -167,6 +167,15 @@ interface Conversation {
167
167
  input: Record<string, unknown>;
168
168
  }>;
169
169
  decision?: "approved" | "denied";
170
+ /**
171
+ * Checkpoint kind discriminator.
172
+ * - "approval" (default for legacy rows): user approve/deny gate.
173
+ * - "device": tool executes on a connected client device (e.g. iOS); the
174
+ * consumer of the harness POSTs a tool result back to resume.
175
+ * Treat `undefined` as "approval" for backward compatibility with rows
176
+ * persisted before this field existed.
177
+ */
178
+ kind?: "approval" | "device";
170
179
  }>;
171
180
  runStatus?: "running" | "idle";
172
181
  ownerId: string;
@@ -450,7 +459,20 @@ interface UploadsConfig {
450
459
  region?: string;
451
460
  endpoint?: string;
452
461
  }
453
- type ToolAccess = boolean | "approval";
462
+ type ToolAccess = boolean | "approval" | {
463
+ access?: "approval";
464
+ dispatch?: "device";
465
+ };
466
+ /**
467
+ * Normalize any ToolAccess value into a {access, dispatch} struct.
468
+ * `boolean` collapses to no special handling — the boolean only encodes
469
+ * enable/disable, not dispatch — callers gate behavior on `dispatch` and
470
+ * `access`.
471
+ */
472
+ declare const normalizeToolAccess: (value: ToolAccess | undefined) => {
473
+ access?: "approval";
474
+ dispatch?: "device";
475
+ };
454
476
  /** @deprecated Use flat tool keys on `tools` instead. Kept for backward compat. */
455
477
  type BuiltInToolToggles = {
456
478
  list_directory?: boolean;
@@ -1101,6 +1123,16 @@ interface SubagentSummary {
1101
1123
  interface SubagentSpawnResult {
1102
1124
  subagentId: string;
1103
1125
  }
1126
+ type SubagentTranscriptMode = "final" | "assistant" | "full";
1127
+ interface SubagentTranscript {
1128
+ subagentId: string;
1129
+ task: string;
1130
+ status: string;
1131
+ totalMessages: number;
1132
+ startIndex: number;
1133
+ messages: Message[];
1134
+ truncated: boolean;
1135
+ }
1104
1136
  interface SubagentManager {
1105
1137
  spawn(opts: {
1106
1138
  task: string;
@@ -1111,6 +1143,13 @@ interface SubagentManager {
1111
1143
  sendMessage(subagentId: string, message: string): Promise<SubagentSpawnResult>;
1112
1144
  stop(subagentId: string): Promise<void>;
1113
1145
  list(parentConversationId: string): Promise<SubagentSummary[]>;
1146
+ getTranscript(opts: {
1147
+ subagentId: string;
1148
+ parentConversationId: string;
1149
+ mode: SubagentTranscriptMode;
1150
+ sinceIndex?: number;
1151
+ maxMessages?: number;
1152
+ }): Promise<SubagentTranscript>;
1114
1153
  }
1115
1154
 
1116
1155
  interface ToolCall {
@@ -1229,6 +1268,8 @@ declare class AgentHarness {
1229
1268
  /** Read-only virtual mounts overlaid on the VFS. Empty by default. */
1230
1269
  private virtualMounts;
1231
1270
  private resolveToolAccess;
1271
+ /** Returns the normalized {access, dispatch} mode for the tool. */
1272
+ private resolveToolMode;
1232
1273
  private isToolEnabled;
1233
1274
  private registerIfMissing;
1234
1275
  /**
@@ -1820,12 +1861,13 @@ declare const executeConversationTurn: ({ harness, runInput, events, initialCont
1820
1861
  onEvent?: (event: AgentEvent, draft: TurnDraftState) => void | Promise<void>;
1821
1862
  }) => Promise<ExecuteTurnResult>;
1822
1863
  declare const normalizeApprovalCheckpoint: (approval: StoredApproval, fallbackMessages: Message[]) => StoredApproval;
1823
- declare const buildApprovalCheckpoints: ({ approvals, runId, checkpointMessages, baseMessageCount, pendingToolCalls, }: {
1864
+ declare const buildApprovalCheckpoints: ({ approvals, runId, checkpointMessages, baseMessageCount, pendingToolCalls, kind, }: {
1824
1865
  approvals: ApprovalEventItem[];
1825
1866
  runId: string;
1826
1867
  checkpointMessages: Message[];
1827
1868
  baseMessageCount: number;
1828
1869
  pendingToolCalls: PendingToolCall[];
1870
+ kind?: "approval" | "device";
1829
1871
  }) => NonNullable<Conversation["pendingApprovals"]>;
1830
1872
  declare const applyTurnMetadata: (conv: Conversation, meta: TurnResultMetadata, opts?: {
1831
1873
  clearContinuation?: boolean;
@@ -1994,6 +2036,12 @@ interface RunConversationTurnOpts {
1994
2036
  parameters?: Record<string, unknown>;
1995
2037
  abortSignal?: AbortSignal;
1996
2038
  tenantId?: string | null;
2039
+ /**
2040
+ * Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
2041
+ * turns with no follow-up coming (cron-fired jobs, etc.) so the
2042
+ * harness skips the Anthropic cache write.
2043
+ */
2044
+ disablePromptCache?: boolean;
1997
2045
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
1998
2046
  onEvent?: (event: AgentEvent) => void | Promise<void>;
1999
2047
  }
@@ -2013,4 +2061,4 @@ interface RunConversationTurnResult {
2013
2061
  }
2014
2062
  declare const runConversationTurn: (opts: RunConversationTurnOpts) => Promise<RunConversationTurnResult>;
2015
2063
 
2016
- export { type ActiveConversationRun, type ActiveSubagentRun, type AgentFrontmatter, AgentHarness, type AgentIdentity, type AgentLimitsConfig, type AgentModelConfig, AgentOrchestrator, type ApprovalEventItem, type ArchivedToolResult$1 as ArchivedToolResult, type BashConfig, BashEnvironmentManager, type BashExecutionLimits, type BuiltInToolToggles, CALLBACK_LOCK_STALE_MS, type CompactMessagesOptions, type CompactResult, type CompactionConfig, type ContinuationHooks, type Conversation, type ConversationCreateInit, type ConversationState, type ConversationStatusSnapshot, type ConversationStore, type ConversationSummary, type CreateSkillToolsOptions, type CronJobConfig, DEFAULT_AGENT_DESCRIPTION, DEFAULT_AGENT_NAME, DEFAULT_MAX_STEPS, DEFAULT_MODEL_NAME, DEFAULT_MODEL_PROVIDER, DEFAULT_TEMPERATURE, DEFAULT_TIMEOUT, type DefaultAgentDefinitionOptions, type EventSink, type ExecuteTurnResult, type HarnessOptions, type HarnessRunOutput, type HistorySource, InMemoryConversationStore, InMemoryEngine, InMemoryStateStore, type IsolateBinding, type IsolateConfig, LocalMcpBridge, LocalUploadStore, MAX_CONCURRENT_SUBAGENTS, MAX_CONTINUATION_COUNT, MAX_SUBAGENT_CALLBACK_COUNT, MAX_SUBAGENT_NESTING, type MainMemory, type McpConfig, type MemoryConfig, type MemoryStore, type MessagingChannelConfig, type ModelProviderFactory, type NetworkConfig, OPENAI_CODEX_CLIENT_ID, type OpenAICodexAuthConfig, type OpenAICodexDeviceAuthRequest, type OpenAICodexSession, type OrchestratorHooks, type OrchestratorOptions, type OtlpConfig, type OtlpOption, PONCHO_UPLOAD_SCHEME, type ParsedAgent, type PendingSubagentApproval, type PendingSubagentResult, type PendingToolCall, type PonchoConfig, PonchoFsAdapter, PostgresEngine, type ProviderConfig, type Recurrence, type RecurrenceType, type Reminder, type ReminderCreateInput, type ReminderStatus, type ReminderStore, type RemoteMcpServerConfig, type RunConversationTurnOpts, type RunConversationTurnResult, type RunOutcome, type RunRequest, type RuntimeRenderContext, S3UploadStore, STALE_SUBAGENT_THRESHOLD_MS, STORAGE_SCHEMA_VERSION, type SecretsStore, type SkillContextEntry, type SkillMetadata, type SkillSource, SqliteEngine, type StateConfig, type StateProviderName, type StateStore, type StorageConfig, type StorageEngine, type StorageFactoryOptions, type StorageProvider, type StoredApproval, type SubagentManager, type SubagentResult, type SubagentSpawnResult, type SubagentSummary, TOOL_RESULT_ARCHIVE_PARAM, type TelemetryConfig, TelemetryEmitter, type TenantTokenPayload, type ToolAccess, type ToolCall, ToolDispatcher, type ToolExecutionResult, type TurnDraftState, type TurnResultMetadata, type TurnSection, type UploadStore, type UploadsConfig, VFS_SCHEME, VercelBlobUploadStore, type VfsDirEntry, type VfsStat, type VirtualMount, applyTurnMetadata, buildAgentDirectoryName, buildApprovalCheckpoints, buildAssistantMetadata, buildSkillContextWindow, buildToolCompletedText, cloneSections, compactMessages, completeOpenAICodexDeviceAuth, computeNextOccurrence, createBashTool, createConversationStore, createConversationStoreFromEngine, createDefaultTools, createDeleteDirectoryTool, createDeleteTool, createEditTool, createMemoryStore, createMemoryStoreFromEngine, createMemoryTools, createModelProvider, createReminderStore, createReminderStoreFromEngine, createReminderTools, createSearchTools, createSecretsStore, createSkillTools, createStateStore, createStorageEngine, createSubagentTools, createTodoStoreFromEngine, createTurnDraftState, createUploadStore, createWriteTool, decodeFileInputData, defaultAgentDefinition, deleteOpenAICodexSession, deriveUploadKey, ensureAgentIdentity, estimateTokens, estimateTotalTokens, executeConversationTurn, findSafeSplitPoint, flushTurnDraft, generateAgentId, getAgentStoreDirectory, getModelContextWindow, getOpenAICodexAccessToken, getOpenAICodexAuthFilePath, getOpenAICodexRequiredScopes, getPonchoStoreRoot, isMessageArray, jsonSchemaToZod, loadCanonicalHistory, loadPonchoConfig, loadRunHistory, loadSkillContext, loadSkillInstructions, loadSkillMetadata, loadVfsSkillMetadata, mergeSkills, normalizeApprovalCheckpoint, normalizeOtlp, normalizeScriptPolicyPath, parseAgentFile, parseAgentMarkdown, parseSkillFrontmatter, ponchoDocsTool, readOpenAICodexSession, readSkillResource, recordStandardTurnEvent, renderAgentPrompt, resolveAgentIdentity, resolveCompactionConfig, resolveEnv, resolveMemoryConfig, resolveRunRequest, resolveSkillDirs, resolveStateConfig, runConversationTurn, slugifyStorageComponent, startOpenAICodexDeviceAuth, verifyTenantToken, withToolResultArchiveParam, writeOpenAICodexSession };
2064
+ export { type ActiveConversationRun, type ActiveSubagentRun, type AgentFrontmatter, AgentHarness, type AgentIdentity, type AgentLimitsConfig, type AgentModelConfig, AgentOrchestrator, type ApprovalEventItem, type ArchivedToolResult$1 as ArchivedToolResult, type BashConfig, BashEnvironmentManager, type BashExecutionLimits, type BuiltInToolToggles, CALLBACK_LOCK_STALE_MS, type CompactMessagesOptions, type CompactResult, type CompactionConfig, type ContinuationHooks, type Conversation, type ConversationCreateInit, type ConversationState, type ConversationStatusSnapshot, type ConversationStore, type ConversationSummary, type CreateSkillToolsOptions, type CronJobConfig, DEFAULT_AGENT_DESCRIPTION, DEFAULT_AGENT_NAME, DEFAULT_MAX_STEPS, DEFAULT_MODEL_NAME, DEFAULT_MODEL_PROVIDER, DEFAULT_TEMPERATURE, DEFAULT_TIMEOUT, type DefaultAgentDefinitionOptions, type EventSink, type ExecuteTurnResult, type HarnessOptions, type HarnessRunOutput, type HistorySource, InMemoryConversationStore, InMemoryEngine, InMemoryStateStore, type IsolateBinding, type IsolateConfig, LocalMcpBridge, LocalUploadStore, MAX_CONCURRENT_SUBAGENTS, MAX_CONTINUATION_COUNT, MAX_SUBAGENT_CALLBACK_COUNT, MAX_SUBAGENT_NESTING, type MainMemory, type McpConfig, type MemoryConfig, type MemoryStore, type MessagingChannelConfig, type ModelProviderFactory, type NetworkConfig, OPENAI_CODEX_CLIENT_ID, type OpenAICodexAuthConfig, type OpenAICodexDeviceAuthRequest, type OpenAICodexSession, type OrchestratorHooks, type OrchestratorOptions, type OtlpConfig, type OtlpOption, PONCHO_UPLOAD_SCHEME, type ParsedAgent, type PendingSubagentApproval, type PendingSubagentResult, type PendingToolCall, type PonchoConfig, PonchoFsAdapter, PostgresEngine, type ProviderConfig, type Recurrence, type RecurrenceType, type Reminder, type ReminderCreateInput, type ReminderStatus, type ReminderStore, type RemoteMcpServerConfig, type RunConversationTurnOpts, type RunConversationTurnResult, type RunOutcome, type RunRequest, type RuntimeRenderContext, S3UploadStore, STALE_SUBAGENT_THRESHOLD_MS, STORAGE_SCHEMA_VERSION, type SecretsStore, type SkillContextEntry, type SkillMetadata, type SkillSource, SqliteEngine, type StateConfig, type StateProviderName, type StateStore, type StorageConfig, type StorageEngine, type StorageFactoryOptions, type StorageProvider, type StoredApproval, type SubagentManager, type SubagentResult, type SubagentSpawnResult, type SubagentSummary, type SubagentTranscript, type SubagentTranscriptMode, TOOL_RESULT_ARCHIVE_PARAM, type TelemetryConfig, TelemetryEmitter, type TenantTokenPayload, type ToolAccess, type ToolCall, ToolDispatcher, type ToolExecutionResult, type TurnDraftState, type TurnResultMetadata, type TurnSection, type UploadStore, type UploadsConfig, VFS_SCHEME, VercelBlobUploadStore, type VfsDirEntry, type VfsStat, type VirtualMount, applyTurnMetadata, buildAgentDirectoryName, buildApprovalCheckpoints, buildAssistantMetadata, buildSkillContextWindow, buildToolCompletedText, cloneSections, compactMessages, completeOpenAICodexDeviceAuth, computeNextOccurrence, createBashTool, createConversationStore, createConversationStoreFromEngine, createDefaultTools, createDeleteDirectoryTool, createDeleteTool, createEditTool, createMemoryStore, createMemoryStoreFromEngine, createMemoryTools, createModelProvider, createReminderStore, createReminderStoreFromEngine, createReminderTools, createSearchTools, createSecretsStore, createSkillTools, createStateStore, createStorageEngine, createSubagentTools, createTodoStoreFromEngine, createTurnDraftState, createUploadStore, createWriteTool, decodeFileInputData, defaultAgentDefinition, deleteOpenAICodexSession, deriveUploadKey, ensureAgentIdentity, estimateTokens, estimateTotalTokens, executeConversationTurn, findSafeSplitPoint, flushTurnDraft, generateAgentId, getAgentStoreDirectory, getModelContextWindow, getOpenAICodexAccessToken, getOpenAICodexAuthFilePath, getOpenAICodexRequiredScopes, getPonchoStoreRoot, isMessageArray, jsonSchemaToZod, loadCanonicalHistory, loadPonchoConfig, loadRunHistory, loadSkillContext, loadSkillInstructions, loadSkillMetadata, loadVfsSkillMetadata, mergeSkills, normalizeApprovalCheckpoint, normalizeOtlp, normalizeScriptPolicyPath, normalizeToolAccess, parseAgentFile, parseAgentMarkdown, parseSkillFrontmatter, ponchoDocsTool, readOpenAICodexSession, readSkillResource, recordStandardTurnEvent, renderAgentPrompt, resolveAgentIdentity, resolveCompactionConfig, resolveEnv, resolveMemoryConfig, resolveRunRequest, resolveSkillDirs, resolveStateConfig, runConversationTurn, slugifyStorageComponent, startOpenAICodexDeviceAuth, verifyTenantToken, withToolResultArchiveParam, writeOpenAICodexSession };
package/dist/index.js CHANGED
@@ -505,6 +505,13 @@ var compactMessages = async (model, messages, config, options) => {
505
505
  import { access } from "fs/promises";
506
506
  import { resolve as resolve3 } from "path";
507
507
  import { createJiti } from "jiti";
508
+ var normalizeToolAccess = (value) => {
509
+ if (value === "approval") return { access: "approval" };
510
+ if (value && typeof value === "object") {
511
+ return { access: value.access, dispatch: value.dispatch };
512
+ }
513
+ return {};
514
+ };
508
515
  var resolveTtl = (ttl, key) => {
509
516
  if (typeof ttl === "number") {
510
517
  return ttl;
@@ -8256,6 +8263,57 @@ var createSubagentTools = (manager) => [
8256
8263
  }
8257
8264
  return { subagents };
8258
8265
  }
8266
+ }),
8267
+ defineTool11({
8268
+ name: "read_subagent",
8269
+ description: "Fetch the conversation transcript of a subagent you spawned. Use this to inspect a subagent's intermediate reasoning, tool calls, or full output -- instead of asking it to repeat its work via message_subagent.\n\nModes:\n- 'final' (default): just the last assistant message. Cheap.\n- 'assistant': all assistant messages, no tool calls/results.\n- 'full': every message including tool calls and results. Can be large.\n\nUse since_index / max_messages to page through long transcripts. Only works on subagents directly spawned by this conversation.",
8270
+ inputSchema: {
8271
+ type: "object",
8272
+ properties: {
8273
+ subagent_id: {
8274
+ type: "string",
8275
+ description: "The subagent ID (from spawn_subagent or list_subagents)."
8276
+ },
8277
+ mode: {
8278
+ type: "string",
8279
+ enum: ["final", "assistant", "full"],
8280
+ description: "How much of the transcript to return. Defaults to 'final'."
8281
+ },
8282
+ since_index: {
8283
+ type: "number",
8284
+ description: "Skip messages before this index (applied after mode filter)."
8285
+ },
8286
+ max_messages: {
8287
+ type: "number",
8288
+ description: "Cap the number of messages returned."
8289
+ }
8290
+ },
8291
+ required: ["subagent_id"],
8292
+ additionalProperties: false
8293
+ },
8294
+ handler: async (input, context) => {
8295
+ const subagentId = typeof input.subagent_id === "string" ? input.subagent_id : "";
8296
+ if (!subagentId) {
8297
+ return { error: "subagent_id is required" };
8298
+ }
8299
+ const parentConversationId = context.conversationId;
8300
+ if (!parentConversationId) {
8301
+ return { error: "no active conversation" };
8302
+ }
8303
+ const rawMode = typeof input.mode === "string" ? input.mode : "final";
8304
+ const mode = rawMode === "assistant" || rawMode === "full" ? rawMode : "final";
8305
+ try {
8306
+ return await manager.getTranscript({
8307
+ subagentId,
8308
+ parentConversationId,
8309
+ mode,
8310
+ sinceIndex: typeof input.since_index === "number" ? input.since_index : void 0,
8311
+ maxMessages: typeof input.max_messages === "number" ? input.max_messages : void 0
8312
+ });
8313
+ } catch (err) {
8314
+ return { error: err instanceof Error ? err.message : String(err) };
8315
+ }
8316
+ }
8259
8317
  })
8260
8318
  ];
8261
8319
 
@@ -9044,11 +9102,20 @@ var AgentHarness = class _AgentHarness {
9044
9102
  const envOverride = tools.byEnvironment?.[env]?.[toolName];
9045
9103
  if (envOverride !== void 0) return envOverride;
9046
9104
  const flatValue = tools[toolName];
9047
- if (typeof flatValue === "boolean" || flatValue === "approval") return flatValue;
9105
+ if (typeof flatValue === "boolean" || flatValue === "approval" || flatValue !== null && typeof flatValue === "object" && !Array.isArray(flatValue) && // distinguish a ToolAccess object from the nested `defaults` /
9106
+ // `byEnvironment` sibling fields by checking it has only the
9107
+ // expected ToolAccess keys.
9108
+ Object.keys(flatValue).every((k) => k === "access" || k === "dispatch")) {
9109
+ return flatValue;
9110
+ }
9048
9111
  const legacyValue = tools.defaults?.[toolName];
9049
9112
  if (legacyValue !== void 0) return legacyValue;
9050
9113
  return true;
9051
9114
  }
9115
+ /** Returns the normalized {access, dispatch} mode for the tool. */
9116
+ resolveToolMode(toolName) {
9117
+ return normalizeToolAccess(this.resolveToolAccess(toolName));
9118
+ }
9052
9119
  isToolEnabled(name) {
9053
9120
  const access4 = this.resolveToolAccess(name);
9054
9121
  if (access4 === false) return false;
@@ -9536,7 +9603,7 @@ var AgentHarness = class _AgentHarness {
9536
9603
  );
9537
9604
  }
9538
9605
  requiresApprovalForToolCall(toolName, input) {
9539
- if (this.resolveToolAccess(toolName) === "approval") {
9606
+ if (this.resolveToolMode(toolName).access === "approval") {
9540
9607
  return true;
9541
9608
  }
9542
9609
  if (toolName === "run_skill_script") {
@@ -10062,10 +10129,13 @@ var AgentHarness = class _AgentHarness {
10062
10129
  );
10063
10130
  }
10064
10131
  const hasFullToolResults = hasUntruncatedToolResults(messages);
10065
- if (hasFullToolResults) {
10066
- costLog.debug(`cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
10132
+ const skipTailCache = input.disablePromptCache === true;
10133
+ if (skipTailCache) {
10134
+ costLog.debug(`tail cache breakpoint skipped \u2014 disablePromptCache (run=${runId.slice(0, 12)})`);
10135
+ } else if (hasFullToolResults) {
10136
+ costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
10067
10137
  } else {
10068
- costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
10138
+ costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
10069
10139
  }
10070
10140
  const inputMessageCount = messages.length;
10071
10141
  const events = [];
@@ -10154,11 +10224,11 @@ ${typeStubs}
10154
10224
 
10155
10225
  Code is wrapped in an async IIFE \u2014 use \`return\` to return a value to the tool result.`;
10156
10226
  }
10157
- const buildSystemPrompt = async () => {
10227
+ const buildSystemPromptParts = async () => {
10158
10228
  const agentPrompt = renderCurrentAgentPrompt();
10159
10229
  const tenantSkills = await this.getSkillsForTenant(input.tenantId);
10160
10230
  const skillContextWindow = buildSkillContextWindow(tenantSkills);
10161
- const promptWithSkills = skillContextWindow ? `${agentPrompt}${developmentContext}
10231
+ const staticPart = skillContextWindow ? `${agentPrompt}${developmentContext}
10162
10232
 
10163
10233
  ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
10164
10234
  const hourlyTime = (() => {
@@ -10170,9 +10240,11 @@ ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentP
10170
10240
  const timeContext = this.reminderStore ? `
10171
10241
 
10172
10242
  Current UTC time (hour precision): ${hourlyTime}` : "";
10173
- return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
10243
+ const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
10244
+ return { staticPart, dynamicPart };
10174
10245
  };
10175
- let systemPrompt = await buildSystemPrompt();
10246
+ let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
10247
+ let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
10176
10248
  let lastPromptFingerprint = `${this.agentFileFingerprint}
10177
10249
  ${this.skillFingerprint}`;
10178
10250
  const pushEvent = (event) => {
@@ -10606,17 +10678,28 @@ ${textContent}` };
10606
10678
  const coreMessages = cachedCoreMessages;
10607
10679
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
10608
10680
  const maxTokens = agent.frontmatter.model?.maxTokens;
10609
- const breakpointIndex = hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1;
10610
- const cachedMessages = addPromptCacheBreakpoints(
10681
+ const cachedMessages = skipTailCache ? coreMessages : addPromptCacheBreakpoints(
10611
10682
  coreMessages,
10612
10683
  modelInstance,
10613
- breakpointIndex
10684
+ hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1
10614
10685
  );
10686
+ const useStaticCache = isAnthropicModel(modelInstance);
10687
+ const finalMessages = useStaticCache ? [
10688
+ {
10689
+ role: "system",
10690
+ content: staticSystemPart,
10691
+ providerOptions: {
10692
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
10693
+ }
10694
+ },
10695
+ ...dynamicSystemPart.length > 0 ? [{ role: "system", content: dynamicSystemPart }] : [],
10696
+ ...cachedMessages
10697
+ ] : cachedMessages;
10615
10698
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
10616
10699
  const result = await streamText({
10617
10700
  model: modelInstance,
10618
- system: systemPrompt,
10619
- messages: cachedMessages,
10701
+ ...useStaticCache ? {} : { system: systemPrompt },
10702
+ messages: finalMessages,
10620
10703
  tools,
10621
10704
  temperature,
10622
10705
  abortSignal: input.abortSignal,
@@ -10895,6 +10978,7 @@ ${textContent}` };
10895
10978
  const richToolResults = [];
10896
10979
  const approvedCalls = [];
10897
10980
  const approvalNeeded = [];
10981
+ const deviceNeeded = [];
10898
10982
  for (const call of toolCalls) {
10899
10983
  if (isCancelled()) {
10900
10984
  yield emitCancellation();
@@ -10909,6 +10993,13 @@ ${textContent}` };
10909
10993
  name: runtimeToolName,
10910
10994
  input: call.input
10911
10995
  });
10996
+ } else if (this.resolveToolMode(runtimeToolName).dispatch === "device") {
10997
+ deviceNeeded.push({
10998
+ approvalId: `device_${randomUUID5()}`,
10999
+ id: call.id,
11000
+ name: runtimeToolName,
11001
+ input: call.input
11002
+ });
10912
11003
  } else {
10913
11004
  approvedCalls.push({
10914
11005
  id: call.id,
@@ -10957,6 +11048,46 @@ ${textContent}` };
10957
11048
  });
10958
11049
  return;
10959
11050
  }
11051
+ if (deviceNeeded.length > 0) {
11052
+ for (const dn of deviceNeeded) {
11053
+ yield pushEvent({
11054
+ type: "tool:device:required",
11055
+ tool: dn.name,
11056
+ input: dn.input,
11057
+ requestId: dn.approvalId
11058
+ });
11059
+ }
11060
+ const assistantContent2 = JSON.stringify({
11061
+ text: fullText,
11062
+ tool_calls: toolCalls.map((tc) => ({
11063
+ id: tc.id,
11064
+ name: exposedToolNames.get(tc.name) ?? tc.name,
11065
+ input: tc.input
11066
+ }))
11067
+ });
11068
+ const assistantMsg = {
11069
+ role: "assistant",
11070
+ content: assistantContent2,
11071
+ metadata: { timestamp: now(), id: randomUUID5(), step, runId }
11072
+ };
11073
+ const deltaMessages = [...messages.slice(inputMessageCount), assistantMsg];
11074
+ yield pushEvent({
11075
+ type: "tool:device:checkpoint",
11076
+ approvals: deviceNeeded.map((dn) => ({
11077
+ approvalId: dn.approvalId,
11078
+ tool: dn.name,
11079
+ toolCallId: dn.id,
11080
+ input: dn.input
11081
+ })),
11082
+ checkpointMessages: deltaMessages,
11083
+ pendingToolCalls: toolCalls.map((tc) => ({
11084
+ id: tc.id,
11085
+ name: exposedToolNames.get(tc.name) ?? tc.name,
11086
+ input: tc.input
11087
+ }))
11088
+ });
11089
+ return;
11090
+ }
10960
11091
  const batchStart = now();
10961
11092
  if (isCancelled()) {
10962
11093
  yield emitCancellation();
@@ -11193,7 +11324,8 @@ ${textContent}` };
11193
11324
  const currentFingerprint = `${this.agentFileFingerprint}
11194
11325
  ${this.skillFingerprint}`;
11195
11326
  if (currentFingerprint !== lastPromptFingerprint) {
11196
- systemPrompt = await buildSystemPrompt();
11327
+ ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
11328
+ systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
11197
11329
  lastPromptFingerprint = currentFingerprint;
11198
11330
  }
11199
11331
  }
@@ -11970,7 +12102,8 @@ var buildApprovalCheckpoints = ({
11970
12102
  runId,
11971
12103
  checkpointMessages,
11972
12104
  baseMessageCount,
11973
- pendingToolCalls
12105
+ pendingToolCalls,
12106
+ kind = "approval"
11974
12107
  }) => approvals.map((approval) => ({
11975
12108
  approvalId: approval.approvalId,
11976
12109
  runId,
@@ -11979,7 +12112,8 @@ var buildApprovalCheckpoints = ({
11979
12112
  input: approval.input,
11980
12113
  checkpointMessages,
11981
12114
  baseMessageCount,
11982
- pendingToolCalls
12115
+ pendingToolCalls,
12116
+ kind
11983
12117
  }));
11984
12118
  var applyTurnMetadata = (conv, meta, opts = {}) => {
11985
12119
  const {
@@ -13268,6 +13402,48 @@ ${resultBody}`,
13268
13402
  }
13269
13403
  }
13270
13404
  return results;
13405
+ },
13406
+ getTranscript: async (opts) => {
13407
+ const conversation = await this.conversationStore.get(opts.subagentId);
13408
+ if (!conversation) {
13409
+ throw new Error(`Subagent "${opts.subagentId}" not found.`);
13410
+ }
13411
+ if (!conversation.parentConversationId) {
13412
+ throw new Error(`Conversation "${opts.subagentId}" is not a subagent.`);
13413
+ }
13414
+ if (conversation.parentConversationId !== opts.parentConversationId) {
13415
+ throw new Error(`Subagent "${opts.subagentId}" was not spawned by this conversation.`);
13416
+ }
13417
+ const all = conversation.messages;
13418
+ let filtered;
13419
+ if (opts.mode === "final") {
13420
+ let lastAssistant;
13421
+ for (let i = all.length - 1; i >= 0; i--) {
13422
+ if (all[i].role === "assistant") {
13423
+ lastAssistant = all[i];
13424
+ break;
13425
+ }
13426
+ }
13427
+ filtered = lastAssistant ? [lastAssistant] : [];
13428
+ } else if (opts.mode === "assistant") {
13429
+ filtered = all.filter((m) => m.role === "assistant");
13430
+ } else {
13431
+ filtered = all;
13432
+ }
13433
+ const startIndex = Math.max(0, opts.sinceIndex ?? 0);
13434
+ const sliced = filtered.slice(startIndex);
13435
+ const cap = opts.maxMessages !== void 0 && opts.maxMessages >= 0 ? opts.maxMessages : sliced.length;
13436
+ const messages = sliced.slice(0, cap);
13437
+ const truncated = startIndex + messages.length < filtered.length;
13438
+ return {
13439
+ subagentId: conversation.conversationId,
13440
+ task: conversation.subagentMeta?.task ?? conversation.title,
13441
+ status: conversation.subagentMeta?.status ?? "stopped",
13442
+ totalMessages: filtered.length,
13443
+ startIndex,
13444
+ messages,
13445
+ truncated
13446
+ };
13271
13447
  }
13272
13448
  };
13273
13449
  }
@@ -13418,7 +13594,8 @@ var runConversationTurn = async (opts) => {
13418
13594
  ),
13419
13595
  messages: harnessMessages,
13420
13596
  files: opts.files && opts.files.length > 0 ? opts.files : void 0,
13421
- abortSignal: opts.abortSignal
13597
+ abortSignal: opts.abortSignal,
13598
+ disablePromptCache: opts.disablePromptCache
13422
13599
  },
13423
13600
  initialContextTokens: conversation.contextTokens ?? 0,
13424
13601
  initialContextWindow: conversation.contextWindow ?? 0,
@@ -13467,7 +13644,33 @@ var runConversationTurn = async (opts) => {
13467
13644
  input: event.input ?? {},
13468
13645
  checkpointMessages: void 0,
13469
13646
  baseMessageCount: historyMessages.length,
13470
- pendingToolCalls: []
13647
+ pendingToolCalls: [],
13648
+ kind: "approval"
13649
+ }
13650
+ ];
13651
+ conversation.updatedAt = Date.now();
13652
+ await opts.conversationStore.update(conversation);
13653
+ }
13654
+ await persistDraft();
13655
+ }
13656
+ if (event.type === "tool:device:required") {
13657
+ const toolText = `- device dispatch \`${event.tool}\``;
13658
+ draft.toolTimeline.push(toolText);
13659
+ draft.currentTools.push(toolText);
13660
+ const existing = Array.isArray(conversation.pendingApprovals) ? conversation.pendingApprovals : [];
13661
+ if (!existing.some((a) => a.approvalId === event.requestId)) {
13662
+ conversation.pendingApprovals = [
13663
+ ...existing,
13664
+ {
13665
+ approvalId: event.requestId,
13666
+ runId: latestRunId || conversation.runtimeRunId || "",
13667
+ tool: event.tool,
13668
+ toolCallId: void 0,
13669
+ input: event.input ?? {},
13670
+ checkpointMessages: void 0,
13671
+ baseMessageCount: historyMessages.length,
13672
+ pendingToolCalls: [],
13673
+ kind: "device"
13471
13674
  }
13472
13675
  ];
13473
13676
  conversation.updatedAt = Date.now();
@@ -13482,7 +13685,25 @@ var runConversationTurn = async (opts) => {
13482
13685
  runId: latestRunId,
13483
13686
  checkpointMessages: event.checkpointMessages,
13484
13687
  baseMessageCount: historyMessages.length,
13485
- pendingToolCalls: event.pendingToolCalls
13688
+ pendingToolCalls: event.pendingToolCalls,
13689
+ kind: "approval"
13690
+ });
13691
+ conversation._toolResultArchive = opts.harness.getToolResultArchive(
13692
+ opts.conversationId
13693
+ );
13694
+ conversation.updatedAt = Date.now();
13695
+ await opts.conversationStore.update(conversation);
13696
+ checkpointedRun = true;
13697
+ }
13698
+ if (event.type === "tool:device:checkpoint") {
13699
+ conversation.messages = buildMessages();
13700
+ conversation.pendingApprovals = buildApprovalCheckpoints({
13701
+ approvals: event.approvals,
13702
+ runId: latestRunId,
13703
+ checkpointMessages: event.checkpointMessages,
13704
+ baseMessageCount: historyMessages.length,
13705
+ pendingToolCalls: event.pendingToolCalls,
13706
+ kind: "device"
13486
13707
  });
13487
13708
  conversation._toolResultArchive = opts.harness.getToolResultArchive(
13488
13709
  opts.conversationId
@@ -13716,6 +13937,7 @@ export {
13716
13937
  normalizeApprovalCheckpoint,
13717
13938
  normalizeOtlp,
13718
13939
  normalizeScriptPolicyPath,
13940
+ normalizeToolAccess,
13719
13941
  parseAgentFile,
13720
13942
  parseAgentMarkdown,
13721
13943
  parseSkillFrontmatter,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.44.0",
3
+ "version": "0.46.0",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  "mustache": "^4.2.0",
35
35
  "yaml": "^2.4.0",
36
36
  "zod": "^3.22.0",
37
- "@poncho-ai/sdk": "1.10.0"
37
+ "@poncho-ai/sdk": "1.12.0"
38
38
  },
39
39
  "peerDependencies": {
40
40
  "esbuild": ">=0.17.0",
package/src/config.ts CHANGED
@@ -37,7 +37,26 @@ export interface UploadsConfig {
37
37
  endpoint?: string;
38
38
  }
39
39
 
40
- export type ToolAccess = boolean | "approval";
40
+ export type ToolAccess =
41
+ | boolean
42
+ | "approval"
43
+ | { access?: "approval"; dispatch?: "device" };
44
+
45
+ /**
46
+ * Normalize any ToolAccess value into a {access, dispatch} struct.
47
+ * `boolean` collapses to no special handling — the boolean only encodes
48
+ * enable/disable, not dispatch — callers gate behavior on `dispatch` and
49
+ * `access`.
50
+ */
51
+ export const normalizeToolAccess = (
52
+ value: ToolAccess | undefined,
53
+ ): { access?: "approval"; dispatch?: "device" } => {
54
+ if (value === "approval") return { access: "approval" };
55
+ if (value && typeof value === "object") {
56
+ return { access: value.access, dispatch: value.dispatch };
57
+ }
58
+ return {};
59
+ };
41
60
 
42
61
  /** @deprecated Use flat tool keys on `tools` instead. Kept for backward compat. */
43
62
  export type BuiltInToolToggles = {
package/src/harness.ts CHANGED
@@ -38,7 +38,7 @@ import { createEditFileTool } from "./vfs/edit-file-tool.js";
38
38
  import { createWriteFileTool } from "./vfs/write-file-tool.js";
39
39
  import { PonchoFsAdapter } from "./vfs/poncho-fs-adapter.js";
40
40
  import { parseAgentFile, parseAgentMarkdown, renderAgentPrompt, type ParsedAgent, type AgentFrontmatter } from "./agent-parser.js";
41
- import { loadPonchoConfig, resolveMemoryConfig, resolveStateConfig, type PonchoConfig, type ToolAccess, type BuiltInToolToggles } from "./config.js";
41
+ import { loadPonchoConfig, normalizeToolAccess, resolveMemoryConfig, resolveStateConfig, type PonchoConfig, type ToolAccess, type BuiltInToolToggles } from "./config.js";
42
42
  import { ponchoDocsTool } from "./default-tools.js";
43
43
  import {
44
44
  createMemoryStore,
@@ -59,7 +59,7 @@ import {
59
59
  mergeSkills,
60
60
  } from "./skill-context.js";
61
61
  import { generateText, streamText, type ModelMessage } from "ai";
62
- import { addPromptCacheBreakpoints } from "./prompt-cache.js";
62
+ import { addPromptCacheBreakpoints, isAnthropicModel } from "./prompt-cache.js";
63
63
  import { jsonSchemaToZod } from "./schema-converter.js";
64
64
  import type { SkillMetadata } from "./skill-context.js";
65
65
  import { createSkillTools, normalizeScriptPolicyPath } from "./skill-tools.js";
@@ -878,7 +878,17 @@ export class AgentHarness {
878
878
  if (envOverride !== undefined) return envOverride;
879
879
 
880
880
  const flatValue = tools[toolName];
881
- if (typeof flatValue === "boolean" || flatValue === "approval") return flatValue;
881
+ if (
882
+ typeof flatValue === "boolean" ||
883
+ flatValue === "approval" ||
884
+ (flatValue !== null && typeof flatValue === "object" && !Array.isArray(flatValue) &&
885
+ // distinguish a ToolAccess object from the nested `defaults` /
886
+ // `byEnvironment` sibling fields by checking it has only the
887
+ // expected ToolAccess keys.
888
+ Object.keys(flatValue as object).every((k) => k === "access" || k === "dispatch"))
889
+ ) {
890
+ return flatValue as ToolAccess;
891
+ }
882
892
 
883
893
  const legacyValue = tools.defaults?.[toolName as keyof BuiltInToolToggles];
884
894
  if (legacyValue !== undefined) return legacyValue;
@@ -886,6 +896,11 @@ export class AgentHarness {
886
896
  return true;
887
897
  }
888
898
 
899
+ /** Returns the normalized {access, dispatch} mode for the tool. */
900
+ private resolveToolMode(toolName: string): { access?: "approval"; dispatch?: "device" } {
901
+ return normalizeToolAccess(this.resolveToolAccess(toolName));
902
+ }
903
+
889
904
  private isToolEnabled(name: string): boolean {
890
905
  const access = this.resolveToolAccess(name);
891
906
  if (access === false) return false;
@@ -1470,7 +1485,7 @@ export class AgentHarness {
1470
1485
  toolName: string,
1471
1486
  input: Record<string, unknown>,
1472
1487
  ): boolean {
1473
- if (this.resolveToolAccess(toolName) === "approval") {
1488
+ if (this.resolveToolMode(toolName).access === "approval") {
1474
1489
  return true;
1475
1490
  }
1476
1491
  if (toolName === "run_skill_script") {
@@ -2089,10 +2104,17 @@ export class AgentHarness {
2089
2104
  );
2090
2105
  }
2091
2106
  const hasFullToolResults = hasUntruncatedToolResults(messages);
2092
- if (hasFullToolResults) {
2093
- costLog.debug(`cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
2107
+ // The 5-min tail breakpoint is skipped only when the caller explicitly
2108
+ // declares no follow-up is coming (jobs, programmatic one-shots). The
2109
+ // 1-hour static breakpoint on the system prompt is always on — it
2110
+ // amortizes across every later turn or job within the hour.
2111
+ const skipTailCache = input.disablePromptCache === true;
2112
+ if (skipTailCache) {
2113
+ costLog.debug(`tail cache breakpoint skipped — disablePromptCache (run=${runId.slice(0, 12)})`);
2114
+ } else if (hasFullToolResults) {
2115
+ costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
2094
2116
  } else {
2095
- costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
2117
+ costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
2096
2118
  }
2097
2119
  const inputMessageCount = messages.length;
2098
2120
  const events: AgentEvent[] = [];
@@ -2195,11 +2217,17 @@ ${typeStubs}
2195
2217
  Code is wrapped in an async IIFE — use \`return\` to return a value to the tool result.`;
2196
2218
  }
2197
2219
 
2198
- const buildSystemPrompt = async (): Promise<string> => {
2220
+ // Split the system prompt into a static portion (stable across turns
2221
+ // and jobs within an hour, modulo MCP connect/skill author/memory edit)
2222
+ // and a dynamic tail (memory, todos, time). The static portion gets a
2223
+ // 1-hour Anthropic cache breakpoint downstream; the tail rides the
2224
+ // existing 5-min message-level breakpoint. See the streamText site for
2225
+ // the breakpoint wiring.
2226
+ const buildSystemPromptParts = async (): Promise<{ staticPart: string; dynamicPart: string }> => {
2199
2227
  const agentPrompt = renderCurrentAgentPrompt();
2200
2228
  const tenantSkills = await this.getSkillsForTenant(input.tenantId);
2201
2229
  const skillContextWindow = buildSkillContextWindow(tenantSkills);
2202
- const promptWithSkills = skillContextWindow
2230
+ const staticPart = skillContextWindow
2203
2231
  ? `${agentPrompt}${developmentContext}\n\n${skillContextWindow}${browserContext}${fsContext}${isolateContext}`
2204
2232
  : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
2205
2233
  // Quantize to the hour so the system prompt is stable across runs
@@ -2215,9 +2243,13 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2215
2243
  const timeContext = this.reminderStore
2216
2244
  ? `\n\nCurrent UTC time (hour precision): ${hourlyTime}`
2217
2245
  : "";
2218
- return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
2246
+ const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
2247
+ return { staticPart, dynamicPart };
2219
2248
  };
2220
- let systemPrompt = await buildSystemPrompt();
2249
+ let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
2250
+ await buildSystemPromptParts();
2251
+ // Concatenated form for legacy consumers (token estimation, telemetry).
2252
+ let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
2221
2253
  let lastPromptFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
2222
2254
 
2223
2255
  const pushEvent = (event: AgentEvent): AgentEvent => {
@@ -2757,25 +2789,55 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2757
2789
 
2758
2790
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
2759
2791
  const maxTokens = agent.frontmatter.model?.maxTokens;
2760
- // Place the breakpoint before any untruncated tool-result so we
2761
- // cache only the stable prefix when prior-run tool results are
2762
- // still full-fidelity. Otherwise cache at the history tail.
2763
- const breakpointIndex = hasFullToolResults
2764
- ? findLastStableCacheIndex(coreMessages)
2765
- : coreMessages.length - 1;
2766
- const cachedMessages = addPromptCacheBreakpoints(
2767
- coreMessages,
2768
- modelInstance,
2769
- breakpointIndex,
2770
- );
2792
+ // Place the tail breakpoint before any untruncated tool-result so
2793
+ // we cache only the stable prefix when prior-run tool results are
2794
+ // still full-fidelity. Otherwise cache at the history tail. When
2795
+ // `skipTailCache` is set (per-run override), don't write the tail
2796
+ // breakpoint at all. The 1-hour static-prefix breakpoint is added
2797
+ // separately when assembling the final messages array.
2798
+ const cachedMessages = skipTailCache
2799
+ ? coreMessages
2800
+ : addPromptCacheBreakpoints(
2801
+ coreMessages,
2802
+ modelInstance,
2803
+ hasFullToolResults
2804
+ ? findLastStableCacheIndex(coreMessages)
2805
+ : coreMessages.length - 1,
2806
+ );
2807
+
2808
+ // Anthropic: split system into two blocks with a 1-hour cache
2809
+ // breakpoint at the boundary between the static portion (agent
2810
+ // body + skills + browser/fs/isolate context — stable across many
2811
+ // turns and jobs) and the dynamic tail (memory, todos, time).
2812
+ // The static block becomes a hot cache that every later turn and
2813
+ // job in the hour reads at 0.1× — much bigger payoff than the
2814
+ // 5-min tail breakpoint, which only survives active back-and-forth.
2815
+ // For non-Anthropic models, fall back to the single concatenated
2816
+ // string via `system:` — those providers auto-cache.
2817
+ const useStaticCache = isAnthropicModel(modelInstance);
2818
+ const finalMessages: ModelMessage[] = useStaticCache
2819
+ ? [
2820
+ {
2821
+ role: "system",
2822
+ content: staticSystemPart,
2823
+ providerOptions: {
2824
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
2825
+ },
2826
+ },
2827
+ ...(dynamicSystemPart.length > 0
2828
+ ? [{ role: "system" as const, content: dynamicSystemPart }]
2829
+ : []),
2830
+ ...cachedMessages,
2831
+ ]
2832
+ : cachedMessages;
2771
2833
 
2772
2834
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
2773
2835
 
2774
2836
 
2775
2837
  const result = await streamText({
2776
2838
  model: modelInstance,
2777
- system: systemPrompt,
2778
- messages: cachedMessages,
2839
+ ...(useStaticCache ? {} : { system: systemPrompt }),
2840
+ messages: finalMessages,
2779
2841
  tools,
2780
2842
  temperature,
2781
2843
  abortSignal: input.abortSignal,
@@ -3119,8 +3181,19 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3119
3181
  name: string;
3120
3182
  input: Record<string, unknown>;
3121
3183
  }> = [];
3184
+ const deviceNeeded: Array<{
3185
+ approvalId: string;
3186
+ id: string;
3187
+ name: string;
3188
+ input: Record<string, unknown>;
3189
+ }> = [];
3122
3190
 
3123
- // Phase 1: classify all tool calls
3191
+ // Phase 1: classify all tool calls.
3192
+ // Approval gates run first; device dispatch fires only after approval is
3193
+ // cleared. On a device+approval tool the first dispatch pass yields the
3194
+ // approval, and the post-resume pass (where access is no longer required
3195
+ // because the message stream has the approve decision baked in) sees
3196
+ // dispatch="device" still set and falls into deviceNeeded below.
3124
3197
  for (const call of toolCalls) {
3125
3198
  if (isCancelled()) {
3126
3199
  yield emitCancellation();
@@ -3135,6 +3208,13 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3135
3208
  name: runtimeToolName,
3136
3209
  input: call.input,
3137
3210
  });
3211
+ } else if (this.resolveToolMode(runtimeToolName).dispatch === "device") {
3212
+ deviceNeeded.push({
3213
+ approvalId: `device_${randomUUID()}`,
3214
+ id: call.id,
3215
+ name: runtimeToolName,
3216
+ input: call.input,
3217
+ });
3138
3218
  } else {
3139
3219
  approvedCalls.push({
3140
3220
  id: call.id,
@@ -3187,6 +3267,52 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3187
3267
  return;
3188
3268
  }
3189
3269
 
3270
+ // Phase 2a': if any tools must dispatch to a connected device, emit
3271
+ // tool:device:required events for each and checkpoint with kind="device".
3272
+ // Consumers (e.g. PonchOS) route the events to the right WS and POST
3273
+ // the resulting tool output back through resumeRunFromCheckpoint.
3274
+ if (deviceNeeded.length > 0) {
3275
+ for (const dn of deviceNeeded) {
3276
+ yield pushEvent({
3277
+ type: "tool:device:required",
3278
+ tool: dn.name,
3279
+ input: dn.input,
3280
+ requestId: dn.approvalId,
3281
+ });
3282
+ }
3283
+
3284
+ const assistantContent = JSON.stringify({
3285
+ text: fullText,
3286
+ tool_calls: toolCalls.map(tc => ({
3287
+ id: tc.id,
3288
+ name: exposedToolNames.get(tc.name) ?? tc.name,
3289
+ input: tc.input,
3290
+ })),
3291
+ });
3292
+ const assistantMsg: Message = {
3293
+ role: "assistant",
3294
+ content: assistantContent,
3295
+ metadata: { timestamp: now(), id: randomUUID(), step, runId },
3296
+ };
3297
+ const deltaMessages = [...messages.slice(inputMessageCount), assistantMsg];
3298
+ yield pushEvent({
3299
+ type: "tool:device:checkpoint",
3300
+ approvals: deviceNeeded.map(dn => ({
3301
+ approvalId: dn.approvalId,
3302
+ tool: dn.name,
3303
+ toolCallId: dn.id,
3304
+ input: dn.input,
3305
+ })),
3306
+ checkpointMessages: deltaMessages,
3307
+ pendingToolCalls: toolCalls.map(tc => ({
3308
+ id: tc.id,
3309
+ name: exposedToolNames.get(tc.name) ?? tc.name,
3310
+ input: tc.input,
3311
+ })),
3312
+ });
3313
+ return;
3314
+ }
3315
+
3190
3316
  // Phase 2b: no approvals needed — execute all auto-approved calls
3191
3317
  const batchStart = now();
3192
3318
  if (isCancelled()) {
@@ -3453,7 +3579,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3453
3579
  agent = this.parsedAgent as ParsedAgent;
3454
3580
  const currentFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
3455
3581
  if (currentFingerprint !== lastPromptFingerprint) {
3456
- systemPrompt = await buildSystemPrompt();
3582
+ ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
3583
+ await buildSystemPromptParts());
3584
+ systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
3457
3585
  lastPromptFingerprint = currentFingerprint;
3458
3586
  }
3459
3587
  }
package/src/index.ts CHANGED
@@ -21,7 +21,7 @@ export * from "./telemetry.js";
21
21
  export * from "./secrets-store.js";
22
22
  export * from "./storage/index.js";
23
23
  export * from "./storage/store-adapters.js";
24
- export { PonchoFsAdapter } from "./vfs/poncho-fs-adapter.js";
24
+ export { PonchoFsAdapter, type VirtualMount } from "./vfs/poncho-fs-adapter.js";
25
25
  export { BashEnvironmentManager } from "./vfs/bash-manager.js";
26
26
  export { createBashTool } from "./vfs/bash-tool.js";
27
27
  export * from "./tenant-token.js";
@@ -1511,6 +1511,52 @@ export class AgentOrchestrator {
1511
1511
  }
1512
1512
  return results;
1513
1513
  },
1514
+
1515
+ getTranscript: async (opts) => {
1516
+ const conversation = await this.conversationStore.get(opts.subagentId);
1517
+ if (!conversation) {
1518
+ throw new Error(`Subagent "${opts.subagentId}" not found.`);
1519
+ }
1520
+ if (!conversation.parentConversationId) {
1521
+ throw new Error(`Conversation "${opts.subagentId}" is not a subagent.`);
1522
+ }
1523
+ if (conversation.parentConversationId !== opts.parentConversationId) {
1524
+ throw new Error(`Subagent "${opts.subagentId}" was not spawned by this conversation.`);
1525
+ }
1526
+
1527
+ const all = conversation.messages;
1528
+ let filtered: Message[];
1529
+ if (opts.mode === "final") {
1530
+ let lastAssistant: Message | undefined;
1531
+ for (let i = all.length - 1; i >= 0; i--) {
1532
+ if (all[i]!.role === "assistant") {
1533
+ lastAssistant = all[i];
1534
+ break;
1535
+ }
1536
+ }
1537
+ filtered = lastAssistant ? [lastAssistant] : [];
1538
+ } else if (opts.mode === "assistant") {
1539
+ filtered = all.filter((m) => m.role === "assistant");
1540
+ } else {
1541
+ filtered = all;
1542
+ }
1543
+
1544
+ const startIndex = Math.max(0, opts.sinceIndex ?? 0);
1545
+ const sliced = filtered.slice(startIndex);
1546
+ const cap = opts.maxMessages !== undefined && opts.maxMessages >= 0 ? opts.maxMessages : sliced.length;
1547
+ const messages = sliced.slice(0, cap);
1548
+ const truncated = startIndex + messages.length < filtered.length;
1549
+
1550
+ return {
1551
+ subagentId: conversation.conversationId,
1552
+ task: conversation.subagentMeta?.task ?? conversation.title,
1553
+ status: conversation.subagentMeta?.status ?? "stopped",
1554
+ totalMessages: filtered.length,
1555
+ startIndex,
1556
+ messages,
1557
+ truncated,
1558
+ };
1559
+ },
1514
1560
  };
1515
1561
  }
1516
1562
 
@@ -62,6 +62,12 @@ export interface RunConversationTurnOpts {
62
62
  parameters?: Record<string, unknown>;
63
63
  abortSignal?: AbortSignal;
64
64
  tenantId?: string | null;
65
+ /**
66
+ * Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
67
+ * turns with no follow-up coming (cron-fired jobs, etc.) so the
68
+ * harness skips the Anthropic cache write.
69
+ */
70
+ disablePromptCache?: boolean;
65
71
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
66
72
  onEvent?: (event: AgentEvent) => void | Promise<void>;
67
73
  }
@@ -203,6 +209,7 @@ export const runConversationTurn = async (
203
209
  messages: harnessMessages,
204
210
  files: opts.files && opts.files.length > 0 ? opts.files : undefined,
205
211
  abortSignal: opts.abortSignal,
212
+ disablePromptCache: opts.disablePromptCache,
206
213
  },
207
214
  initialContextTokens: conversation.contextTokens ?? 0,
208
215
  initialContextWindow: conversation.contextWindow ?? 0,
@@ -257,6 +264,34 @@ export const runConversationTurn = async (
257
264
  checkpointMessages: undefined,
258
265
  baseMessageCount: historyMessages.length,
259
266
  pendingToolCalls: [],
267
+ kind: "approval",
268
+ },
269
+ ];
270
+ conversation.updatedAt = Date.now();
271
+ await opts.conversationStore.update(conversation);
272
+ }
273
+ await persistDraft();
274
+ }
275
+ if (event.type === "tool:device:required") {
276
+ const toolText = `- device dispatch \`${event.tool}\``;
277
+ draft.toolTimeline.push(toolText);
278
+ draft.currentTools.push(toolText);
279
+ const existing = Array.isArray(conversation.pendingApprovals)
280
+ ? conversation.pendingApprovals
281
+ : [];
282
+ if (!existing.some((a) => a.approvalId === event.requestId)) {
283
+ conversation.pendingApprovals = [
284
+ ...existing,
285
+ {
286
+ approvalId: event.requestId,
287
+ runId: latestRunId || conversation.runtimeRunId || "",
288
+ tool: event.tool,
289
+ toolCallId: undefined,
290
+ input: (event.input ?? {}) as Record<string, unknown>,
291
+ checkpointMessages: undefined,
292
+ baseMessageCount: historyMessages.length,
293
+ pendingToolCalls: [],
294
+ kind: "device",
260
295
  },
261
296
  ];
262
297
  conversation.updatedAt = Date.now();
@@ -272,6 +307,24 @@ export const runConversationTurn = async (
272
307
  checkpointMessages: event.checkpointMessages,
273
308
  baseMessageCount: historyMessages.length,
274
309
  pendingToolCalls: event.pendingToolCalls,
310
+ kind: "approval",
311
+ });
312
+ conversation._toolResultArchive = opts.harness.getToolResultArchive(
313
+ opts.conversationId,
314
+ );
315
+ conversation.updatedAt = Date.now();
316
+ await opts.conversationStore.update(conversation);
317
+ checkpointedRun = true;
318
+ }
319
+ if (event.type === "tool:device:checkpoint") {
320
+ conversation.messages = buildMessages();
321
+ conversation.pendingApprovals = buildApprovalCheckpoints({
322
+ approvals: event.approvals,
323
+ runId: latestRunId,
324
+ checkpointMessages: event.checkpointMessages,
325
+ baseMessageCount: historyMessages.length,
326
+ pendingToolCalls: event.pendingToolCalls,
327
+ kind: "device",
275
328
  });
276
329
  conversation._toolResultArchive = opts.harness.getToolResultArchive(
277
330
  opts.conversationId,
@@ -304,12 +304,14 @@ export const buildApprovalCheckpoints = ({
304
304
  checkpointMessages,
305
305
  baseMessageCount,
306
306
  pendingToolCalls,
307
+ kind = "approval",
307
308
  }: {
308
309
  approvals: ApprovalEventItem[];
309
310
  runId: string;
310
311
  checkpointMessages: Message[];
311
312
  baseMessageCount: number;
312
313
  pendingToolCalls: PendingToolCall[];
314
+ kind?: "approval" | "device";
313
315
  }): NonNullable<Conversation["pendingApprovals"]> =>
314
316
  approvals.map((approval) => ({
315
317
  approvalId: approval.approvalId,
@@ -320,6 +322,7 @@ export const buildApprovalCheckpoints = ({
320
322
  checkpointMessages,
321
323
  baseMessageCount,
322
324
  pendingToolCalls,
325
+ kind,
323
326
  }));
324
327
 
325
328
  // ── Turn metadata persistence ──
@@ -1,6 +1,6 @@
1
1
  import type { ModelMessage, LanguageModel } from "ai";
2
2
 
3
- function isAnthropicModel(model: LanguageModel): boolean {
3
+ export function isAnthropicModel(model: LanguageModel): boolean {
4
4
  if (typeof model === "string") {
5
5
  return model.includes("anthropic") || model.includes("claude");
6
6
  }
package/src/state.ts CHANGED
@@ -47,6 +47,15 @@ export interface Conversation {
47
47
  baseMessageCount?: number;
48
48
  pendingToolCalls?: Array<{ id: string; name: string; input: Record<string, unknown> }>;
49
49
  decision?: "approved" | "denied";
50
+ /**
51
+ * Checkpoint kind discriminator.
52
+ * - "approval" (default for legacy rows): user approve/deny gate.
53
+ * - "device": tool executes on a connected client device (e.g. iOS); the
54
+ * consumer of the harness POSTs a tool result back to resume.
55
+ * Treat `undefined` as "approval" for backward compatibility with rows
56
+ * persisted before this field existed.
57
+ */
58
+ kind?: "approval" | "device";
50
59
  }>;
51
60
  runStatus?: "running" | "idle";
52
61
  ownerId: string;
@@ -19,6 +19,18 @@ export interface SubagentSpawnResult {
19
19
  subagentId: string;
20
20
  }
21
21
 
22
+ export type SubagentTranscriptMode = "final" | "assistant" | "full";
23
+
24
+ export interface SubagentTranscript {
25
+ subagentId: string;
26
+ task: string;
27
+ status: string;
28
+ totalMessages: number;
29
+ startIndex: number;
30
+ messages: Message[];
31
+ truncated: boolean;
32
+ }
33
+
22
34
  export interface SubagentManager {
23
35
  spawn(opts: {
24
36
  task: string;
@@ -32,4 +44,12 @@ export interface SubagentManager {
32
44
  stop(subagentId: string): Promise<void>;
33
45
 
34
46
  list(parentConversationId: string): Promise<SubagentSummary[]>;
47
+
48
+ getTranscript(opts: {
49
+ subagentId: string;
50
+ parentConversationId: string;
51
+ mode: SubagentTranscriptMode;
52
+ sinceIndex?: number;
53
+ maxMessages?: number;
54
+ }): Promise<SubagentTranscript>;
35
55
  }
@@ -131,4 +131,66 @@ export const createSubagentTools = (
131
131
  return { subagents };
132
132
  },
133
133
  }),
134
+
135
+ defineTool({
136
+ name: "read_subagent",
137
+ description:
138
+ "Fetch the conversation transcript of a subagent you spawned. Use this to inspect a " +
139
+ "subagent's intermediate reasoning, tool calls, or full output -- instead of asking it " +
140
+ "to repeat its work via message_subagent.\n\n" +
141
+ "Modes:\n" +
142
+ "- 'final' (default): just the last assistant message. Cheap.\n" +
143
+ "- 'assistant': all assistant messages, no tool calls/results.\n" +
144
+ "- 'full': every message including tool calls and results. Can be large.\n\n" +
145
+ "Use since_index / max_messages to page through long transcripts. Only works on " +
146
+ "subagents directly spawned by this conversation.",
147
+ inputSchema: {
148
+ type: "object",
149
+ properties: {
150
+ subagent_id: {
151
+ type: "string",
152
+ description: "The subagent ID (from spawn_subagent or list_subagents).",
153
+ },
154
+ mode: {
155
+ type: "string",
156
+ enum: ["final", "assistant", "full"],
157
+ description: "How much of the transcript to return. Defaults to 'final'.",
158
+ },
159
+ since_index: {
160
+ type: "number",
161
+ description: "Skip messages before this index (applied after mode filter).",
162
+ },
163
+ max_messages: {
164
+ type: "number",
165
+ description: "Cap the number of messages returned.",
166
+ },
167
+ },
168
+ required: ["subagent_id"],
169
+ additionalProperties: false,
170
+ },
171
+ handler: async (input: Record<string, unknown>, context: ToolContext) => {
172
+ const subagentId = typeof input.subagent_id === "string" ? input.subagent_id : "";
173
+ if (!subagentId) {
174
+ return { error: "subagent_id is required" };
175
+ }
176
+ const parentConversationId = context.conversationId;
177
+ if (!parentConversationId) {
178
+ return { error: "no active conversation" };
179
+ }
180
+ const rawMode = typeof input.mode === "string" ? input.mode : "final";
181
+ const mode: "final" | "assistant" | "full" =
182
+ rawMode === "assistant" || rawMode === "full" ? rawMode : "final";
183
+ try {
184
+ return await manager.getTranscript({
185
+ subagentId,
186
+ parentConversationId,
187
+ mode,
188
+ sinceIndex: typeof input.since_index === "number" ? input.since_index : undefined,
189
+ maxMessages: typeof input.max_messages === "number" ? input.max_messages : undefined,
190
+ });
191
+ } catch (err) {
192
+ return { error: err instanceof Error ? err.message : String(err) };
193
+ }
194
+ },
195
+ }),
134
196
  ];