@poncho-ai/harness 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.14.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.14.1 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > tsup src/index.ts --format esm --dts
4
4
 
5
5
  CLI Building entry: src/index.ts
@@ -7,8 +7,8 @@
7
7
  CLI tsup v8.5.1
8
8
  CLI Target: es2022
9
9
  ESM Build start
10
- ESM dist/index.js 173.62 KB
11
- ESM ⚡️ Build success in 98ms
10
+ ESM dist/index.js 175.01 KB
11
+ ESM ⚡️ Build success in 76ms
12
12
  DTS Build start
13
- DTS ⚡️ Build success in 5281ms
14
- DTS dist/index.d.ts 20.92 KB
13
+ DTS ⚡️ Build success in 5663ms
14
+ DTS dist/index.d.ts 21.38 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.14.1
4
+
5
+ ### Patch Changes
6
+
7
+ - [`e000b96`](https://github.com/cesr/poncho-ai/commit/e000b96837cbbb8d95c868c91a614f458868c444) Thanks [@cesr](https://github.com/cesr)! - Durable approval checkpoints, email conversation improvements, and web UI fixes
8
+ - Simplify approval system to checkpoint-only (remove legacy blocking approvalHandler)
9
+ - Optimize checkpoint storage with delta messages instead of full history
10
+ - Add sidebar sections for conversations awaiting approval with status indicator
11
+ - Fix nested checkpoint missing baseMessageCount in resumeRunFromCheckpoint
12
+ - Improve email conversation titles (sender email + subject)
13
+ - Remove email threading — each incoming email creates its own conversation
14
+ - Fix streaming after approval to preserve existing messages (liveOnly mode)
15
+ - Preserve newlines in user messages in web UI
16
+
17
+ - Updated dependencies [[`e000b96`](https://github.com/cesr/poncho-ai/commit/e000b96837cbbb8d95c868c91a614f458868c444)]:
18
+ - @poncho-ai/sdk@1.0.2
19
+
3
20
  ## 0.14.0
4
21
 
5
22
  ### Minor Changes
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { Message, ToolDefinition, RunInput, AgentEvent, RunResult, JsonSchema, ToolContext } from '@poncho-ai/sdk';
1
+ import { Message, ToolDefinition, ToolContext, RunInput, AgentEvent, RunResult, JsonSchema } from '@poncho-ai/sdk';
2
2
  export { ToolDefinition, defineTool } from '@poncho-ai/sdk';
3
3
  import { LanguageModel } from 'ai';
4
4
  import { z } from 'zod';
@@ -84,7 +84,15 @@ interface Conversation {
84
84
  approvalId: string;
85
85
  runId: string;
86
86
  tool: string;
87
+ toolCallId?: string;
87
88
  input: Record<string, unknown>;
89
+ checkpointMessages?: Message[];
90
+ baseMessageCount?: number;
91
+ pendingToolCalls?: Array<{
92
+ id: string;
93
+ name: string;
94
+ input: Record<string, unknown>;
95
+ }>;
88
96
  }>;
89
97
  ownerId: string;
90
98
  tenantId: string | null;
@@ -362,17 +370,33 @@ declare const getModelContextWindow: (modelName: string) => number;
362
370
  */
363
371
  declare const createModelProvider: (provider?: string) => ModelProviderFactory;
364
372
 
373
+ interface ToolCall {
374
+ id: string;
375
+ name: string;
376
+ input: Record<string, unknown>;
377
+ }
378
+ interface ToolExecutionResult {
379
+ callId: string;
380
+ tool: string;
381
+ output?: unknown;
382
+ error?: string;
383
+ }
384
+ declare class ToolDispatcher {
385
+ private readonly tools;
386
+ register(tool: ToolDefinition): void;
387
+ registerMany(tools: ToolDefinition[]): void;
388
+ unregister(name: string): void;
389
+ unregisterMany(names: Iterable<string>): void;
390
+ list(): ToolDefinition[];
391
+ get(name: string): ToolDefinition | undefined;
392
+ execute(call: ToolCall, context: ToolContext): Promise<ToolExecutionResult>;
393
+ executeBatch(calls: ToolCall[], context: ToolContext): Promise<ToolExecutionResult[]>;
394
+ }
395
+
365
396
  interface HarnessOptions {
366
397
  workingDir?: string;
367
398
  environment?: "development" | "staging" | "production";
368
399
  toolDefinitions?: ToolDefinition[];
369
- approvalHandler?: (request: {
370
- tool: string;
371
- input: Record<string, unknown>;
372
- runId: string;
373
- step: number;
374
- approvalId: string;
375
- }) => Promise<boolean> | boolean;
376
400
  modelProvider?: ModelProviderFactory;
377
401
  uploadStore?: UploadStore;
378
402
  }
@@ -388,7 +412,6 @@ declare class AgentHarness {
388
412
  private modelProvider;
389
413
  private readonly modelProviderInjected;
390
414
  private readonly dispatcher;
391
- private readonly approvalHandler?;
392
415
  readonly uploadStore?: UploadStore;
393
416
  private skillContextWindow;
394
417
  private memoryStore?;
@@ -439,6 +462,19 @@ declare class AgentHarness {
439
462
  */
440
463
  runWithTelemetry(input: RunInput): AsyncGenerator<AgentEvent>;
441
464
  run(input: RunInput): AsyncGenerator<AgentEvent>;
465
+ executeTools(calls: ToolCall[], context: ToolContext): Promise<ToolExecutionResult[]>;
466
+ continueFromToolResult(input: {
467
+ messages: Message[];
468
+ toolResults: Array<{
469
+ callId: string;
470
+ toolName: string;
471
+ result?: unknown;
472
+ error?: string;
473
+ }>;
474
+ conversationId?: string;
475
+ parameters?: Record<string, unknown>;
476
+ abortSignal?: AbortSignal;
477
+ }): AsyncGenerator<AgentEvent>;
442
478
  runToCompletion(input: RunInput): Promise<HarnessRunOutput>;
443
479
  }
444
480
 
@@ -563,27 +599,4 @@ declare class TelemetryEmitter {
563
599
  private sendOtlp;
564
600
  }
565
601
 
566
- interface ToolCall {
567
- id: string;
568
- name: string;
569
- input: Record<string, unknown>;
570
- }
571
- interface ToolExecutionResult {
572
- callId: string;
573
- tool: string;
574
- output?: unknown;
575
- error?: string;
576
- }
577
- declare class ToolDispatcher {
578
- private readonly tools;
579
- register(tool: ToolDefinition): void;
580
- registerMany(tools: ToolDefinition[]): void;
581
- unregister(name: string): void;
582
- unregisterMany(names: Iterable<string>): void;
583
- list(): ToolDefinition[];
584
- get(name: string): ToolDefinition | undefined;
585
- execute(call: ToolCall, context: ToolContext): Promise<ToolExecutionResult>;
586
- executeBatch(calls: ToolCall[], context: ToolContext): Promise<ToolExecutionResult[]>;
587
- }
588
-
589
602
  export { type AgentFrontmatter, AgentHarness, type AgentIdentity, type AgentLimitsConfig, type AgentModelConfig, type BuiltInToolToggles, type Conversation, type ConversationState, type ConversationStore, type CronJobConfig, type HarnessOptions, type HarnessRunOutput, InMemoryConversationStore, InMemoryStateStore, LatitudeCapture, type LatitudeCaptureConfig, LocalMcpBridge, LocalUploadStore, type MainMemory, type McpConfig, type MemoryConfig, type MemoryStore, type MessagingChannelConfig, type ModelProviderFactory, PONCHO_UPLOAD_SCHEME, type ParsedAgent, type PonchoConfig, type RemoteMcpServerConfig, type RuntimeRenderContext, S3UploadStore, STORAGE_SCHEMA_VERSION, type SkillContextEntry, type SkillMetadata, type StateConfig, type StateProviderName, type StateStore, type StorageConfig, type TelemetryConfig, TelemetryEmitter, type ToolAccess, type ToolCall, ToolDispatcher, type ToolExecutionResult, type UploadStore, type UploadsConfig, VercelBlobUploadStore, buildAgentDirectoryName, buildSkillContextWindow, createConversationStore, createDefaultTools, createMemoryStore, createMemoryTools, createModelProvider, createSkillTools, createStateStore, createUploadStore, createWriteTool, deriveUploadKey, ensureAgentIdentity, generateAgentId, getAgentStoreDirectory, getModelContextWindow, getPonchoStoreRoot, jsonSchemaToZod, loadPonchoConfig, loadSkillContext, loadSkillInstructions, loadSkillMetadata, normalizeScriptPolicyPath, parseAgentFile, parseAgentMarkdown, readSkillResource, renderAgentPrompt, resolveAgentIdentity, resolveMemoryConfig, resolveSkillDirs, resolveStateConfig, slugifyStorageComponent };
package/dist/index.js CHANGED
@@ -2851,7 +2851,6 @@ var AgentHarness = class {
2851
2851
  modelProvider;
2852
2852
  modelProviderInjected;
2853
2853
  dispatcher = new ToolDispatcher();
2854
- approvalHandler;
2855
2854
  uploadStore;
2856
2855
  skillContextWindow = "";
2857
2856
  memoryStore;
@@ -2925,7 +2924,6 @@ var AgentHarness = class {
2925
2924
  this.environment = options.environment ?? "development";
2926
2925
  this.modelProviderInjected = !!options.modelProvider;
2927
2926
  this.modelProvider = options.modelProvider ?? createModelProvider("anthropic");
2928
- this.approvalHandler = options.approvalHandler;
2929
2927
  this.uploadStore = options.uploadStore;
2930
2928
  if (options.toolDefinitions?.length) {
2931
2929
  this.dispatcher.registerMany(options.toolDefinitions);
@@ -3317,6 +3315,7 @@ var AgentHarness = class {
3317
3315
  const platformMaxDurationSec = Number(process.env.PONCHO_MAX_DURATION) || 0;
3318
3316
  const softDeadlineMs = platformMaxDurationSec > 0 ? platformMaxDurationSec * 800 : 0;
3319
3317
  const messages = [...input.messages ?? []];
3318
+ const inputMessageCount = messages.length;
3320
3319
  const events = [];
3321
3320
  const systemPrompt = renderAgentPrompt(agent, {
3322
3321
  parameters: input.parameters,
@@ -3366,41 +3365,43 @@ ${boundedMainMemory.trim()}` : "";
3366
3365
  agentId: agent.frontmatter.id ?? agent.frontmatter.name,
3367
3366
  contextWindow
3368
3367
  });
3369
- if (input.files && input.files.length > 0) {
3370
- const parts = [
3371
- { type: "text", text: input.task }
3372
- ];
3373
- for (const file of input.files) {
3374
- if (this.uploadStore) {
3375
- const buf = Buffer.from(file.data, "base64");
3376
- const key = deriveUploadKey(buf, file.mediaType);
3377
- const ref = await this.uploadStore.put(key, buf, file.mediaType);
3378
- parts.push({
3379
- type: "file",
3380
- data: ref,
3381
- mediaType: file.mediaType,
3382
- filename: file.filename
3383
- });
3384
- } else {
3385
- parts.push({
3386
- type: "file",
3387
- data: file.data,
3388
- mediaType: file.mediaType,
3389
- filename: file.filename
3390
- });
3368
+ if (input.task != null) {
3369
+ if (input.files && input.files.length > 0) {
3370
+ const parts = [
3371
+ { type: "text", text: input.task }
3372
+ ];
3373
+ for (const file of input.files) {
3374
+ if (this.uploadStore) {
3375
+ const buf = Buffer.from(file.data, "base64");
3376
+ const key = deriveUploadKey(buf, file.mediaType);
3377
+ const ref = await this.uploadStore.put(key, buf, file.mediaType);
3378
+ parts.push({
3379
+ type: "file",
3380
+ data: ref,
3381
+ mediaType: file.mediaType,
3382
+ filename: file.filename
3383
+ });
3384
+ } else {
3385
+ parts.push({
3386
+ type: "file",
3387
+ data: file.data,
3388
+ mediaType: file.mediaType,
3389
+ filename: file.filename
3390
+ });
3391
+ }
3391
3392
  }
3393
+ messages.push({
3394
+ role: "user",
3395
+ content: parts,
3396
+ metadata: { timestamp: now(), id: randomUUID3() }
3397
+ });
3398
+ } else {
3399
+ messages.push({
3400
+ role: "user",
3401
+ content: input.task,
3402
+ metadata: { timestamp: now(), id: randomUUID3() }
3403
+ });
3392
3404
  }
3393
- messages.push({
3394
- role: "user",
3395
- content: parts,
3396
- metadata: { timestamp: now(), id: randomUUID3() }
3397
- });
3398
- } else {
3399
- messages.push({
3400
- role: "user",
3401
- content: input.task,
3402
- metadata: { timestamp: now(), id: randomUUID3() }
3403
- });
3404
3405
  }
3405
3406
  let responseText = "";
3406
3407
  let totalInputTokens = 0;
@@ -3843,45 +3844,34 @@ ${textContent}` };
3843
3844
  input: call.input,
3844
3845
  approvalId
3845
3846
  });
3846
- const approved = this.approvalHandler ? await this.approvalHandler({
3847
+ const assistantContent2 = JSON.stringify({
3848
+ text: fullText,
3849
+ tool_calls: toolCalls.map((tc) => ({
3850
+ id: tc.id,
3851
+ name: exposedToolNames.get(tc.name) ?? tc.name,
3852
+ input: tc.input
3853
+ }))
3854
+ });
3855
+ const assistantMsg = {
3856
+ role: "assistant",
3857
+ content: assistantContent2,
3858
+ metadata: { timestamp: now(), id: randomUUID3(), step }
3859
+ };
3860
+ const deltaMessages = [...messages.slice(inputMessageCount), assistantMsg];
3861
+ yield pushEvent({
3862
+ type: "tool:approval:checkpoint",
3863
+ approvalId,
3847
3864
  tool: runtimeToolName,
3865
+ toolCallId: call.id,
3848
3866
  input: call.input,
3849
- runId,
3850
- step,
3851
- approvalId
3852
- }) : false;
3853
- if (isCancelled()) {
3854
- yield emitCancellation();
3855
- return;
3856
- }
3857
- if (!approved) {
3858
- if (this.insideTelemetryCapture && this.latitudeTelemetry) {
3859
- const deniedSpan = this.latitudeTelemetry.span.tool({
3860
- name: runtimeToolName,
3861
- call: { id: call.id, arguments: call.input }
3862
- });
3863
- deniedSpan.end({ result: { value: "Tool execution denied by approval policy", isError: true } });
3864
- }
3865
- yield pushEvent({
3866
- type: "tool:approval:denied",
3867
- approvalId,
3868
- reason: "No approval handler granted execution"
3869
- });
3870
- yield pushEvent({
3871
- type: "tool:error",
3872
- tool: call.name,
3873
- error: "Tool execution denied by approval policy",
3874
- recoverable: true
3875
- });
3876
- toolResultsForModel.push({
3877
- type: "tool_result",
3878
- tool_use_id: call.id,
3879
- tool_name: runtimeToolName,
3880
- content: "Tool error: Tool execution denied by approval policy"
3881
- });
3882
- continue;
3883
- }
3884
- yield pushEvent({ type: "tool:approval:granted", approvalId });
3867
+ checkpointMessages: deltaMessages,
3868
+ pendingToolCalls: toolCalls.map((tc) => ({
3869
+ id: tc.id,
3870
+ name: exposedToolNames.get(tc.name) ?? tc.name,
3871
+ input: tc.input
3872
+ }))
3873
+ });
3874
+ return;
3885
3875
  }
3886
3876
  approvedCalls.push({
3887
3877
  id: call.id,
@@ -4015,12 +4005,64 @@ ${textContent}` };
4015
4005
  });
4016
4006
  }
4017
4007
  }
4008
+ async executeTools(calls, context) {
4009
+ return this.dispatcher.executeBatch(calls, context);
4010
+ }
4011
+ async *continueFromToolResult(input) {
4012
+ const messages = [...input.messages];
4013
+ const lastMsg = messages[messages.length - 1];
4014
+ if (!lastMsg || lastMsg.role !== "assistant") {
4015
+ throw new Error("continueFromToolResult: last message must be an assistant message with tool calls");
4016
+ }
4017
+ let allToolCalls = [];
4018
+ try {
4019
+ const parsed = JSON.parse(typeof lastMsg.content === "string" ? lastMsg.content : "");
4020
+ allToolCalls = parsed.tool_calls ?? [];
4021
+ } catch {
4022
+ throw new Error("continueFromToolResult: could not parse tool calls from last assistant message");
4023
+ }
4024
+ const providedMap = new Map(
4025
+ input.toolResults.map((r) => [r.callId, r])
4026
+ );
4027
+ const toolResultsForModel = [];
4028
+ for (const tc of allToolCalls) {
4029
+ const provided = providedMap.get(tc.id);
4030
+ if (provided) {
4031
+ toolResultsForModel.push({
4032
+ type: "tool_result",
4033
+ tool_use_id: tc.id,
4034
+ tool_name: provided.toolName,
4035
+ content: provided.error ? `Tool error: ${provided.error}` : JSON.stringify(provided.result ?? null)
4036
+ });
4037
+ } else {
4038
+ toolResultsForModel.push({
4039
+ type: "tool_result",
4040
+ tool_use_id: tc.id,
4041
+ tool_name: tc.name,
4042
+ content: "Tool error: Tool execution deferred (pending approval checkpoint)"
4043
+ });
4044
+ }
4045
+ }
4046
+ messages.push({
4047
+ role: "tool",
4048
+ content: JSON.stringify(toolResultsForModel),
4049
+ metadata: { timestamp: Date.now(), id: randomUUID3() }
4050
+ });
4051
+ yield* this.runWithTelemetry({
4052
+ messages,
4053
+ conversationId: input.conversationId,
4054
+ parameters: input.parameters,
4055
+ abortSignal: input.abortSignal
4056
+ });
4057
+ }
4018
4058
  async runToCompletion(input) {
4019
4059
  const events = [];
4020
4060
  let runId = "";
4021
4061
  let finalResult;
4022
4062
  const messages = [...input.messages ?? []];
4023
- messages.push({ role: "user", content: input.task });
4063
+ if (input.task != null) {
4064
+ messages.push({ role: "user", content: input.task });
4065
+ }
4024
4066
  for await (const event of this.runWithTelemetry(input)) {
4025
4067
  events.push(event);
4026
4068
  if (event.type === "run:started") {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.14.0",
3
+ "version": "0.14.1",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -31,7 +31,7 @@
31
31
  "redis": "^5.10.0",
32
32
  "yaml": "^2.4.0",
33
33
  "zod": "^3.22.0",
34
- "@poncho-ai/sdk": "1.0.1"
34
+ "@poncho-ai/sdk": "1.0.2"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/mustache": "^4.2.6",
package/src/harness.ts CHANGED
@@ -37,20 +37,13 @@ import {
37
37
  matchesSlashPattern,
38
38
  normalizeRelativeScriptPattern,
39
39
  } from "./tool-policy.js";
40
- import { ToolDispatcher } from "./tool-dispatcher.js";
40
+ import { ToolDispatcher, type ToolCall, type ToolExecutionResult } from "./tool-dispatcher.js";
41
41
  import { ensureAgentIdentity } from "./agent-identity.js";
42
42
 
43
43
  export interface HarnessOptions {
44
44
  workingDir?: string;
45
45
  environment?: "development" | "staging" | "production";
46
46
  toolDefinitions?: ToolDefinition[];
47
- approvalHandler?: (request: {
48
- tool: string;
49
- input: Record<string, unknown>;
50
- runId: string;
51
- step: number;
52
- approvalId: string;
53
- }) => Promise<boolean> | boolean;
54
47
  modelProvider?: ModelProviderFactory;
55
48
  uploadStore?: UploadStore;
56
49
  }
@@ -414,7 +407,6 @@ export class AgentHarness {
414
407
  private modelProvider: ModelProviderFactory;
415
408
  private readonly modelProviderInjected: boolean;
416
409
  private readonly dispatcher = new ToolDispatcher();
417
- private readonly approvalHandler?: HarnessOptions["approvalHandler"];
418
410
  readonly uploadStore?: UploadStore;
419
411
  private skillContextWindow = "";
420
412
  private memoryStore?: MemoryStore;
@@ -500,7 +492,6 @@ export class AgentHarness {
500
492
  this.environment = options.environment ?? "development";
501
493
  this.modelProviderInjected = !!options.modelProvider;
502
494
  this.modelProvider = options.modelProvider ?? createModelProvider("anthropic");
503
- this.approvalHandler = options.approvalHandler;
504
495
  this.uploadStore = options.uploadStore;
505
496
 
506
497
  if (options.toolDefinitions?.length) {
@@ -963,6 +954,7 @@ export class AgentHarness {
963
954
  ? platformMaxDurationSec * 800
964
955
  : 0;
965
956
  const messages: Message[] = [...(input.messages ?? [])];
957
+ const inputMessageCount = messages.length;
966
958
  const events: AgentEvent[] = [];
967
959
 
968
960
  const systemPrompt = renderAgentPrompt(agent, {
@@ -1024,41 +1016,43 @@ ${boundedMainMemory.trim()}`
1024
1016
  contextWindow,
1025
1017
  });
1026
1018
 
1027
- if (input.files && input.files.length > 0) {
1028
- const parts: ContentPart[] = [
1029
- { type: "text", text: input.task } satisfies TextContentPart,
1030
- ];
1031
- for (const file of input.files) {
1032
- if (this.uploadStore) {
1033
- const buf = Buffer.from(file.data, "base64");
1034
- const key = deriveUploadKey(buf, file.mediaType);
1035
- const ref = await this.uploadStore.put(key, buf, file.mediaType);
1036
- parts.push({
1037
- type: "file",
1038
- data: ref,
1039
- mediaType: file.mediaType,
1040
- filename: file.filename,
1041
- } satisfies FileContentPart);
1042
- } else {
1043
- parts.push({
1044
- type: "file",
1045
- data: file.data,
1046
- mediaType: file.mediaType,
1047
- filename: file.filename,
1048
- } satisfies FileContentPart);
1019
+ if (input.task != null) {
1020
+ if (input.files && input.files.length > 0) {
1021
+ const parts: ContentPart[] = [
1022
+ { type: "text", text: input.task } satisfies TextContentPart,
1023
+ ];
1024
+ for (const file of input.files) {
1025
+ if (this.uploadStore) {
1026
+ const buf = Buffer.from(file.data, "base64");
1027
+ const key = deriveUploadKey(buf, file.mediaType);
1028
+ const ref = await this.uploadStore.put(key, buf, file.mediaType);
1029
+ parts.push({
1030
+ type: "file",
1031
+ data: ref,
1032
+ mediaType: file.mediaType,
1033
+ filename: file.filename,
1034
+ } satisfies FileContentPart);
1035
+ } else {
1036
+ parts.push({
1037
+ type: "file",
1038
+ data: file.data,
1039
+ mediaType: file.mediaType,
1040
+ filename: file.filename,
1041
+ } satisfies FileContentPart);
1042
+ }
1049
1043
  }
1044
+ messages.push({
1045
+ role: "user",
1046
+ content: parts,
1047
+ metadata: { timestamp: now(), id: randomUUID() },
1048
+ });
1049
+ } else {
1050
+ messages.push({
1051
+ role: "user",
1052
+ content: input.task,
1053
+ metadata: { timestamp: now(), id: randomUUID() },
1054
+ });
1050
1055
  }
1051
- messages.push({
1052
- role: "user",
1053
- content: parts,
1054
- metadata: { timestamp: now(), id: randomUUID() },
1055
- });
1056
- } else {
1057
- messages.push({
1058
- role: "user",
1059
- content: input.task,
1060
- metadata: { timestamp: now(), id: randomUUID() },
1061
- });
1062
1056
  }
1063
1057
 
1064
1058
  let responseText = "";
@@ -1597,47 +1591,35 @@ ${boundedMainMemory.trim()}`
1597
1591
  input: call.input,
1598
1592
  approvalId,
1599
1593
  });
1600
- const approved = this.approvalHandler
1601
- ? await this.approvalHandler({
1602
- tool: runtimeToolName,
1603
- input: call.input,
1604
- runId,
1605
- step,
1606
- approvalId,
1607
- })
1608
- : false;
1609
- if (isCancelled()) {
1610
- yield emitCancellation();
1611
- return;
1612
- }
1613
- if (!approved) {
1614
- if (this.insideTelemetryCapture && this.latitudeTelemetry) {
1615
- const deniedSpan = this.latitudeTelemetry.span.tool({
1616
- name: runtimeToolName,
1617
- call: { id: call.id, arguments: call.input },
1618
- });
1619
- deniedSpan.end({ result: { value: "Tool execution denied by approval policy", isError: true } });
1620
- }
1621
- yield pushEvent({
1622
- type: "tool:approval:denied",
1623
- approvalId,
1624
- reason: "No approval handler granted execution",
1625
- });
1626
- yield pushEvent({
1627
- type: "tool:error",
1628
- tool: call.name,
1629
- error: "Tool execution denied by approval policy",
1630
- recoverable: true,
1631
- });
1632
- toolResultsForModel.push({
1633
- type: "tool_result",
1634
- tool_use_id: call.id,
1635
- tool_name: runtimeToolName,
1636
- content: "Tool error: Tool execution denied by approval policy",
1637
- });
1638
- continue;
1639
- }
1640
- yield pushEvent({ type: "tool:approval:granted", approvalId });
1594
+
1595
+ const assistantContent = JSON.stringify({
1596
+ text: fullText,
1597
+ tool_calls: toolCalls.map(tc => ({
1598
+ id: tc.id,
1599
+ name: exposedToolNames.get(tc.name) ?? tc.name,
1600
+ input: tc.input,
1601
+ })),
1602
+ });
1603
+ const assistantMsg: Message = {
1604
+ role: "assistant",
1605
+ content: assistantContent,
1606
+ metadata: { timestamp: now(), id: randomUUID(), step },
1607
+ };
1608
+ const deltaMessages = [...messages.slice(inputMessageCount), assistantMsg];
1609
+ yield pushEvent({
1610
+ type: "tool:approval:checkpoint",
1611
+ approvalId,
1612
+ tool: runtimeToolName,
1613
+ toolCallId: call.id,
1614
+ input: call.input,
1615
+ checkpointMessages: deltaMessages,
1616
+ pendingToolCalls: toolCalls.map(tc => ({
1617
+ id: tc.id,
1618
+ name: exposedToolNames.get(tc.name) ?? tc.name,
1619
+ input: tc.input,
1620
+ })),
1621
+ });
1622
+ return;
1641
1623
  }
1642
1624
  approvedCalls.push({
1643
1625
  id: call.id,
@@ -1790,12 +1772,87 @@ ${boundedMainMemory.trim()}`
1790
1772
  }
1791
1773
  }
1792
1774
 
1775
+ async executeTools(
1776
+ calls: ToolCall[],
1777
+ context: ToolContext,
1778
+ ): Promise<ToolExecutionResult[]> {
1779
+ return this.dispatcher.executeBatch(calls, context);
1780
+ }
1781
+
1782
+ async *continueFromToolResult(input: {
1783
+ messages: Message[];
1784
+ toolResults: Array<{ callId: string; toolName: string; result?: unknown; error?: string }>;
1785
+ conversationId?: string;
1786
+ parameters?: Record<string, unknown>;
1787
+ abortSignal?: AbortSignal;
1788
+ }): AsyncGenerator<AgentEvent> {
1789
+ const messages = [...input.messages];
1790
+ const lastMsg = messages[messages.length - 1];
1791
+ if (!lastMsg || lastMsg.role !== "assistant") {
1792
+ throw new Error("continueFromToolResult: last message must be an assistant message with tool calls");
1793
+ }
1794
+
1795
+ let allToolCalls: Array<{ id: string; name: string; input: Record<string, unknown> }> = [];
1796
+ try {
1797
+ const parsed = JSON.parse(typeof lastMsg.content === "string" ? lastMsg.content : "");
1798
+ allToolCalls = parsed.tool_calls ?? [];
1799
+ } catch {
1800
+ throw new Error("continueFromToolResult: could not parse tool calls from last assistant message");
1801
+ }
1802
+
1803
+ const providedMap = new Map(
1804
+ input.toolResults.map(r => [r.callId, r]),
1805
+ );
1806
+ const toolResultsForModel: Array<{
1807
+ type: "tool_result";
1808
+ tool_use_id: string;
1809
+ tool_name: string;
1810
+ content: string;
1811
+ }> = [];
1812
+
1813
+ for (const tc of allToolCalls) {
1814
+ const provided = providedMap.get(tc.id);
1815
+ if (provided) {
1816
+ toolResultsForModel.push({
1817
+ type: "tool_result",
1818
+ tool_use_id: tc.id,
1819
+ tool_name: provided.toolName,
1820
+ content: provided.error
1821
+ ? `Tool error: ${provided.error}`
1822
+ : JSON.stringify(provided.result ?? null),
1823
+ });
1824
+ } else {
1825
+ toolResultsForModel.push({
1826
+ type: "tool_result",
1827
+ tool_use_id: tc.id,
1828
+ tool_name: tc.name,
1829
+ content: "Tool error: Tool execution deferred (pending approval checkpoint)",
1830
+ });
1831
+ }
1832
+ }
1833
+
1834
+ messages.push({
1835
+ role: "tool",
1836
+ content: JSON.stringify(toolResultsForModel),
1837
+ metadata: { timestamp: Date.now(), id: randomUUID() },
1838
+ });
1839
+
1840
+ yield* this.runWithTelemetry({
1841
+ messages,
1842
+ conversationId: input.conversationId,
1843
+ parameters: input.parameters,
1844
+ abortSignal: input.abortSignal,
1845
+ });
1846
+ }
1847
+
1793
1848
  async runToCompletion(input: RunInput): Promise<HarnessRunOutput> {
1794
1849
  const events: AgentEvent[] = [];
1795
1850
  let runId = "";
1796
1851
  let finalResult: RunResult | undefined;
1797
1852
  const messages: Message[] = [...(input.messages ?? [])];
1798
- messages.push({ role: "user", content: input.task });
1853
+ if (input.task != null) {
1854
+ messages.push({ role: "user", content: input.task });
1855
+ }
1799
1856
 
1800
1857
  for await (const event of this.runWithTelemetry(input)) {
1801
1858
  events.push(event);
package/src/state.ts CHANGED
@@ -30,7 +30,11 @@ export interface Conversation {
30
30
  approvalId: string;
31
31
  runId: string;
32
32
  tool: string;
33
+ toolCallId?: string;
33
34
  input: Record<string, unknown>;
35
+ checkpointMessages?: Message[];
36
+ baseMessageCount?: number;
37
+ pendingToolCalls?: Array<{ id: string; name: string; input: Record<string, unknown> }>;
34
38
  }>;
35
39
  ownerId: string;
36
40
  tenantId: string | null;