@poncho-ai/harness 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +17 -0
- package/dist/index.d.ts +45 -32
- package/dist/index.js +115 -73
- package/package.json +2 -2
- package/src/harness.ts +142 -85
- package/src/state.ts +4 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/harness@0.14.
|
|
2
|
+
> @poncho-ai/harness@0.14.1 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
|
|
3
3
|
> tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
[34mCLI[39m tsup v8.5.1
|
|
8
8
|
[34mCLI[39m Target: es2022
|
|
9
9
|
[34mESM[39m Build start
|
|
10
|
-
[32mESM[39m [1mdist/index.js [22m[
|
|
11
|
-
[32mESM[39m ⚡️ Build success in
|
|
10
|
+
[32mESM[39m [1mdist/index.js [22m[32m175.01 KB[39m
|
|
11
|
+
[32mESM[39m ⚡️ Build success in 76ms
|
|
12
12
|
[34mDTS[39m Build start
|
|
13
|
-
[32mDTS[39m ⚡️ Build success in
|
|
14
|
-
[32mDTS[39m [1mdist/index.d.ts [22m[
|
|
13
|
+
[32mDTS[39m ⚡️ Build success in 5663ms
|
|
14
|
+
[32mDTS[39m [1mdist/index.d.ts [22m[32m21.38 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# @poncho-ai/harness
|
|
2
2
|
|
|
3
|
+
## 0.14.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [`e000b96`](https://github.com/cesr/poncho-ai/commit/e000b96837cbbb8d95c868c91a614f458868c444) Thanks [@cesr](https://github.com/cesr)! - Durable approval checkpoints, email conversation improvements, and web UI fixes
|
|
8
|
+
- Simplify approval system to checkpoint-only (remove legacy blocking approvalHandler)
|
|
9
|
+
- Optimize checkpoint storage with delta messages instead of full history
|
|
10
|
+
- Add sidebar sections for conversations awaiting approval with status indicator
|
|
11
|
+
- Fix nested checkpoint missing baseMessageCount in resumeRunFromCheckpoint
|
|
12
|
+
- Improve email conversation titles (sender email + subject)
|
|
13
|
+
- Remove email threading — each incoming email creates its own conversation
|
|
14
|
+
- Fix streaming after approval to preserve existing messages (liveOnly mode)
|
|
15
|
+
- Preserve newlines in user messages in web UI
|
|
16
|
+
|
|
17
|
+
- Updated dependencies [[`e000b96`](https://github.com/cesr/poncho-ai/commit/e000b96837cbbb8d95c868c91a614f458868c444)]:
|
|
18
|
+
- @poncho-ai/sdk@1.0.2
|
|
19
|
+
|
|
3
20
|
## 0.14.0
|
|
4
21
|
|
|
5
22
|
### Minor Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Message, ToolDefinition, RunInput, AgentEvent, RunResult, JsonSchema
|
|
1
|
+
import { Message, ToolDefinition, ToolContext, RunInput, AgentEvent, RunResult, JsonSchema } from '@poncho-ai/sdk';
|
|
2
2
|
export { ToolDefinition, defineTool } from '@poncho-ai/sdk';
|
|
3
3
|
import { LanguageModel } from 'ai';
|
|
4
4
|
import { z } from 'zod';
|
|
@@ -84,7 +84,15 @@ interface Conversation {
|
|
|
84
84
|
approvalId: string;
|
|
85
85
|
runId: string;
|
|
86
86
|
tool: string;
|
|
87
|
+
toolCallId?: string;
|
|
87
88
|
input: Record<string, unknown>;
|
|
89
|
+
checkpointMessages?: Message[];
|
|
90
|
+
baseMessageCount?: number;
|
|
91
|
+
pendingToolCalls?: Array<{
|
|
92
|
+
id: string;
|
|
93
|
+
name: string;
|
|
94
|
+
input: Record<string, unknown>;
|
|
95
|
+
}>;
|
|
88
96
|
}>;
|
|
89
97
|
ownerId: string;
|
|
90
98
|
tenantId: string | null;
|
|
@@ -362,17 +370,33 @@ declare const getModelContextWindow: (modelName: string) => number;
|
|
|
362
370
|
*/
|
|
363
371
|
declare const createModelProvider: (provider?: string) => ModelProviderFactory;
|
|
364
372
|
|
|
373
|
+
interface ToolCall {
|
|
374
|
+
id: string;
|
|
375
|
+
name: string;
|
|
376
|
+
input: Record<string, unknown>;
|
|
377
|
+
}
|
|
378
|
+
interface ToolExecutionResult {
|
|
379
|
+
callId: string;
|
|
380
|
+
tool: string;
|
|
381
|
+
output?: unknown;
|
|
382
|
+
error?: string;
|
|
383
|
+
}
|
|
384
|
+
declare class ToolDispatcher {
|
|
385
|
+
private readonly tools;
|
|
386
|
+
register(tool: ToolDefinition): void;
|
|
387
|
+
registerMany(tools: ToolDefinition[]): void;
|
|
388
|
+
unregister(name: string): void;
|
|
389
|
+
unregisterMany(names: Iterable<string>): void;
|
|
390
|
+
list(): ToolDefinition[];
|
|
391
|
+
get(name: string): ToolDefinition | undefined;
|
|
392
|
+
execute(call: ToolCall, context: ToolContext): Promise<ToolExecutionResult>;
|
|
393
|
+
executeBatch(calls: ToolCall[], context: ToolContext): Promise<ToolExecutionResult[]>;
|
|
394
|
+
}
|
|
395
|
+
|
|
365
396
|
interface HarnessOptions {
|
|
366
397
|
workingDir?: string;
|
|
367
398
|
environment?: "development" | "staging" | "production";
|
|
368
399
|
toolDefinitions?: ToolDefinition[];
|
|
369
|
-
approvalHandler?: (request: {
|
|
370
|
-
tool: string;
|
|
371
|
-
input: Record<string, unknown>;
|
|
372
|
-
runId: string;
|
|
373
|
-
step: number;
|
|
374
|
-
approvalId: string;
|
|
375
|
-
}) => Promise<boolean> | boolean;
|
|
376
400
|
modelProvider?: ModelProviderFactory;
|
|
377
401
|
uploadStore?: UploadStore;
|
|
378
402
|
}
|
|
@@ -388,7 +412,6 @@ declare class AgentHarness {
|
|
|
388
412
|
private modelProvider;
|
|
389
413
|
private readonly modelProviderInjected;
|
|
390
414
|
private readonly dispatcher;
|
|
391
|
-
private readonly approvalHandler?;
|
|
392
415
|
readonly uploadStore?: UploadStore;
|
|
393
416
|
private skillContextWindow;
|
|
394
417
|
private memoryStore?;
|
|
@@ -439,6 +462,19 @@ declare class AgentHarness {
|
|
|
439
462
|
*/
|
|
440
463
|
runWithTelemetry(input: RunInput): AsyncGenerator<AgentEvent>;
|
|
441
464
|
run(input: RunInput): AsyncGenerator<AgentEvent>;
|
|
465
|
+
executeTools(calls: ToolCall[], context: ToolContext): Promise<ToolExecutionResult[]>;
|
|
466
|
+
continueFromToolResult(input: {
|
|
467
|
+
messages: Message[];
|
|
468
|
+
toolResults: Array<{
|
|
469
|
+
callId: string;
|
|
470
|
+
toolName: string;
|
|
471
|
+
result?: unknown;
|
|
472
|
+
error?: string;
|
|
473
|
+
}>;
|
|
474
|
+
conversationId?: string;
|
|
475
|
+
parameters?: Record<string, unknown>;
|
|
476
|
+
abortSignal?: AbortSignal;
|
|
477
|
+
}): AsyncGenerator<AgentEvent>;
|
|
442
478
|
runToCompletion(input: RunInput): Promise<HarnessRunOutput>;
|
|
443
479
|
}
|
|
444
480
|
|
|
@@ -563,27 +599,4 @@ declare class TelemetryEmitter {
|
|
|
563
599
|
private sendOtlp;
|
|
564
600
|
}
|
|
565
601
|
|
|
566
|
-
interface ToolCall {
|
|
567
|
-
id: string;
|
|
568
|
-
name: string;
|
|
569
|
-
input: Record<string, unknown>;
|
|
570
|
-
}
|
|
571
|
-
interface ToolExecutionResult {
|
|
572
|
-
callId: string;
|
|
573
|
-
tool: string;
|
|
574
|
-
output?: unknown;
|
|
575
|
-
error?: string;
|
|
576
|
-
}
|
|
577
|
-
declare class ToolDispatcher {
|
|
578
|
-
private readonly tools;
|
|
579
|
-
register(tool: ToolDefinition): void;
|
|
580
|
-
registerMany(tools: ToolDefinition[]): void;
|
|
581
|
-
unregister(name: string): void;
|
|
582
|
-
unregisterMany(names: Iterable<string>): void;
|
|
583
|
-
list(): ToolDefinition[];
|
|
584
|
-
get(name: string): ToolDefinition | undefined;
|
|
585
|
-
execute(call: ToolCall, context: ToolContext): Promise<ToolExecutionResult>;
|
|
586
|
-
executeBatch(calls: ToolCall[], context: ToolContext): Promise<ToolExecutionResult[]>;
|
|
587
|
-
}
|
|
588
|
-
|
|
589
602
|
export { type AgentFrontmatter, AgentHarness, type AgentIdentity, type AgentLimitsConfig, type AgentModelConfig, type BuiltInToolToggles, type Conversation, type ConversationState, type ConversationStore, type CronJobConfig, type HarnessOptions, type HarnessRunOutput, InMemoryConversationStore, InMemoryStateStore, LatitudeCapture, type LatitudeCaptureConfig, LocalMcpBridge, LocalUploadStore, type MainMemory, type McpConfig, type MemoryConfig, type MemoryStore, type MessagingChannelConfig, type ModelProviderFactory, PONCHO_UPLOAD_SCHEME, type ParsedAgent, type PonchoConfig, type RemoteMcpServerConfig, type RuntimeRenderContext, S3UploadStore, STORAGE_SCHEMA_VERSION, type SkillContextEntry, type SkillMetadata, type StateConfig, type StateProviderName, type StateStore, type StorageConfig, type TelemetryConfig, TelemetryEmitter, type ToolAccess, type ToolCall, ToolDispatcher, type ToolExecutionResult, type UploadStore, type UploadsConfig, VercelBlobUploadStore, buildAgentDirectoryName, buildSkillContextWindow, createConversationStore, createDefaultTools, createMemoryStore, createMemoryTools, createModelProvider, createSkillTools, createStateStore, createUploadStore, createWriteTool, deriveUploadKey, ensureAgentIdentity, generateAgentId, getAgentStoreDirectory, getModelContextWindow, getPonchoStoreRoot, jsonSchemaToZod, loadPonchoConfig, loadSkillContext, loadSkillInstructions, loadSkillMetadata, normalizeScriptPolicyPath, parseAgentFile, parseAgentMarkdown, readSkillResource, renderAgentPrompt, resolveAgentIdentity, resolveMemoryConfig, resolveSkillDirs, resolveStateConfig, slugifyStorageComponent };
|
package/dist/index.js
CHANGED
|
@@ -2851,7 +2851,6 @@ var AgentHarness = class {
|
|
|
2851
2851
|
modelProvider;
|
|
2852
2852
|
modelProviderInjected;
|
|
2853
2853
|
dispatcher = new ToolDispatcher();
|
|
2854
|
-
approvalHandler;
|
|
2855
2854
|
uploadStore;
|
|
2856
2855
|
skillContextWindow = "";
|
|
2857
2856
|
memoryStore;
|
|
@@ -2925,7 +2924,6 @@ var AgentHarness = class {
|
|
|
2925
2924
|
this.environment = options.environment ?? "development";
|
|
2926
2925
|
this.modelProviderInjected = !!options.modelProvider;
|
|
2927
2926
|
this.modelProvider = options.modelProvider ?? createModelProvider("anthropic");
|
|
2928
|
-
this.approvalHandler = options.approvalHandler;
|
|
2929
2927
|
this.uploadStore = options.uploadStore;
|
|
2930
2928
|
if (options.toolDefinitions?.length) {
|
|
2931
2929
|
this.dispatcher.registerMany(options.toolDefinitions);
|
|
@@ -3317,6 +3315,7 @@ var AgentHarness = class {
|
|
|
3317
3315
|
const platformMaxDurationSec = Number(process.env.PONCHO_MAX_DURATION) || 0;
|
|
3318
3316
|
const softDeadlineMs = platformMaxDurationSec > 0 ? platformMaxDurationSec * 800 : 0;
|
|
3319
3317
|
const messages = [...input.messages ?? []];
|
|
3318
|
+
const inputMessageCount = messages.length;
|
|
3320
3319
|
const events = [];
|
|
3321
3320
|
const systemPrompt = renderAgentPrompt(agent, {
|
|
3322
3321
|
parameters: input.parameters,
|
|
@@ -3366,41 +3365,43 @@ ${boundedMainMemory.trim()}` : "";
|
|
|
3366
3365
|
agentId: agent.frontmatter.id ?? agent.frontmatter.name,
|
|
3367
3366
|
contextWindow
|
|
3368
3367
|
});
|
|
3369
|
-
if (input.
|
|
3370
|
-
|
|
3371
|
-
|
|
3372
|
-
|
|
3373
|
-
|
|
3374
|
-
|
|
3375
|
-
|
|
3376
|
-
|
|
3377
|
-
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3368
|
+
if (input.task != null) {
|
|
3369
|
+
if (input.files && input.files.length > 0) {
|
|
3370
|
+
const parts = [
|
|
3371
|
+
{ type: "text", text: input.task }
|
|
3372
|
+
];
|
|
3373
|
+
for (const file of input.files) {
|
|
3374
|
+
if (this.uploadStore) {
|
|
3375
|
+
const buf = Buffer.from(file.data, "base64");
|
|
3376
|
+
const key = deriveUploadKey(buf, file.mediaType);
|
|
3377
|
+
const ref = await this.uploadStore.put(key, buf, file.mediaType);
|
|
3378
|
+
parts.push({
|
|
3379
|
+
type: "file",
|
|
3380
|
+
data: ref,
|
|
3381
|
+
mediaType: file.mediaType,
|
|
3382
|
+
filename: file.filename
|
|
3383
|
+
});
|
|
3384
|
+
} else {
|
|
3385
|
+
parts.push({
|
|
3386
|
+
type: "file",
|
|
3387
|
+
data: file.data,
|
|
3388
|
+
mediaType: file.mediaType,
|
|
3389
|
+
filename: file.filename
|
|
3390
|
+
});
|
|
3391
|
+
}
|
|
3391
3392
|
}
|
|
3393
|
+
messages.push({
|
|
3394
|
+
role: "user",
|
|
3395
|
+
content: parts,
|
|
3396
|
+
metadata: { timestamp: now(), id: randomUUID3() }
|
|
3397
|
+
});
|
|
3398
|
+
} else {
|
|
3399
|
+
messages.push({
|
|
3400
|
+
role: "user",
|
|
3401
|
+
content: input.task,
|
|
3402
|
+
metadata: { timestamp: now(), id: randomUUID3() }
|
|
3403
|
+
});
|
|
3392
3404
|
}
|
|
3393
|
-
messages.push({
|
|
3394
|
-
role: "user",
|
|
3395
|
-
content: parts,
|
|
3396
|
-
metadata: { timestamp: now(), id: randomUUID3() }
|
|
3397
|
-
});
|
|
3398
|
-
} else {
|
|
3399
|
-
messages.push({
|
|
3400
|
-
role: "user",
|
|
3401
|
-
content: input.task,
|
|
3402
|
-
metadata: { timestamp: now(), id: randomUUID3() }
|
|
3403
|
-
});
|
|
3404
3405
|
}
|
|
3405
3406
|
let responseText = "";
|
|
3406
3407
|
let totalInputTokens = 0;
|
|
@@ -3843,45 +3844,34 @@ ${textContent}` };
|
|
|
3843
3844
|
input: call.input,
|
|
3844
3845
|
approvalId
|
|
3845
3846
|
});
|
|
3846
|
-
const
|
|
3847
|
+
const assistantContent2 = JSON.stringify({
|
|
3848
|
+
text: fullText,
|
|
3849
|
+
tool_calls: toolCalls.map((tc) => ({
|
|
3850
|
+
id: tc.id,
|
|
3851
|
+
name: exposedToolNames.get(tc.name) ?? tc.name,
|
|
3852
|
+
input: tc.input
|
|
3853
|
+
}))
|
|
3854
|
+
});
|
|
3855
|
+
const assistantMsg = {
|
|
3856
|
+
role: "assistant",
|
|
3857
|
+
content: assistantContent2,
|
|
3858
|
+
metadata: { timestamp: now(), id: randomUUID3(), step }
|
|
3859
|
+
};
|
|
3860
|
+
const deltaMessages = [...messages.slice(inputMessageCount), assistantMsg];
|
|
3861
|
+
yield pushEvent({
|
|
3862
|
+
type: "tool:approval:checkpoint",
|
|
3863
|
+
approvalId,
|
|
3847
3864
|
tool: runtimeToolName,
|
|
3865
|
+
toolCallId: call.id,
|
|
3848
3866
|
input: call.input,
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
|
|
3852
|
-
|
|
3853
|
-
|
|
3854
|
-
|
|
3855
|
-
|
|
3856
|
-
|
|
3857
|
-
if (!approved) {
|
|
3858
|
-
if (this.insideTelemetryCapture && this.latitudeTelemetry) {
|
|
3859
|
-
const deniedSpan = this.latitudeTelemetry.span.tool({
|
|
3860
|
-
name: runtimeToolName,
|
|
3861
|
-
call: { id: call.id, arguments: call.input }
|
|
3862
|
-
});
|
|
3863
|
-
deniedSpan.end({ result: { value: "Tool execution denied by approval policy", isError: true } });
|
|
3864
|
-
}
|
|
3865
|
-
yield pushEvent({
|
|
3866
|
-
type: "tool:approval:denied",
|
|
3867
|
-
approvalId,
|
|
3868
|
-
reason: "No approval handler granted execution"
|
|
3869
|
-
});
|
|
3870
|
-
yield pushEvent({
|
|
3871
|
-
type: "tool:error",
|
|
3872
|
-
tool: call.name,
|
|
3873
|
-
error: "Tool execution denied by approval policy",
|
|
3874
|
-
recoverable: true
|
|
3875
|
-
});
|
|
3876
|
-
toolResultsForModel.push({
|
|
3877
|
-
type: "tool_result",
|
|
3878
|
-
tool_use_id: call.id,
|
|
3879
|
-
tool_name: runtimeToolName,
|
|
3880
|
-
content: "Tool error: Tool execution denied by approval policy"
|
|
3881
|
-
});
|
|
3882
|
-
continue;
|
|
3883
|
-
}
|
|
3884
|
-
yield pushEvent({ type: "tool:approval:granted", approvalId });
|
|
3867
|
+
checkpointMessages: deltaMessages,
|
|
3868
|
+
pendingToolCalls: toolCalls.map((tc) => ({
|
|
3869
|
+
id: tc.id,
|
|
3870
|
+
name: exposedToolNames.get(tc.name) ?? tc.name,
|
|
3871
|
+
input: tc.input
|
|
3872
|
+
}))
|
|
3873
|
+
});
|
|
3874
|
+
return;
|
|
3885
3875
|
}
|
|
3886
3876
|
approvedCalls.push({
|
|
3887
3877
|
id: call.id,
|
|
@@ -4015,12 +4005,64 @@ ${textContent}` };
|
|
|
4015
4005
|
});
|
|
4016
4006
|
}
|
|
4017
4007
|
}
|
|
4008
|
+
async executeTools(calls, context) {
|
|
4009
|
+
return this.dispatcher.executeBatch(calls, context);
|
|
4010
|
+
}
|
|
4011
|
+
async *continueFromToolResult(input) {
|
|
4012
|
+
const messages = [...input.messages];
|
|
4013
|
+
const lastMsg = messages[messages.length - 1];
|
|
4014
|
+
if (!lastMsg || lastMsg.role !== "assistant") {
|
|
4015
|
+
throw new Error("continueFromToolResult: last message must be an assistant message with tool calls");
|
|
4016
|
+
}
|
|
4017
|
+
let allToolCalls = [];
|
|
4018
|
+
try {
|
|
4019
|
+
const parsed = JSON.parse(typeof lastMsg.content === "string" ? lastMsg.content : "");
|
|
4020
|
+
allToolCalls = parsed.tool_calls ?? [];
|
|
4021
|
+
} catch {
|
|
4022
|
+
throw new Error("continueFromToolResult: could not parse tool calls from last assistant message");
|
|
4023
|
+
}
|
|
4024
|
+
const providedMap = new Map(
|
|
4025
|
+
input.toolResults.map((r) => [r.callId, r])
|
|
4026
|
+
);
|
|
4027
|
+
const toolResultsForModel = [];
|
|
4028
|
+
for (const tc of allToolCalls) {
|
|
4029
|
+
const provided = providedMap.get(tc.id);
|
|
4030
|
+
if (provided) {
|
|
4031
|
+
toolResultsForModel.push({
|
|
4032
|
+
type: "tool_result",
|
|
4033
|
+
tool_use_id: tc.id,
|
|
4034
|
+
tool_name: provided.toolName,
|
|
4035
|
+
content: provided.error ? `Tool error: ${provided.error}` : JSON.stringify(provided.result ?? null)
|
|
4036
|
+
});
|
|
4037
|
+
} else {
|
|
4038
|
+
toolResultsForModel.push({
|
|
4039
|
+
type: "tool_result",
|
|
4040
|
+
tool_use_id: tc.id,
|
|
4041
|
+
tool_name: tc.name,
|
|
4042
|
+
content: "Tool error: Tool execution deferred (pending approval checkpoint)"
|
|
4043
|
+
});
|
|
4044
|
+
}
|
|
4045
|
+
}
|
|
4046
|
+
messages.push({
|
|
4047
|
+
role: "tool",
|
|
4048
|
+
content: JSON.stringify(toolResultsForModel),
|
|
4049
|
+
metadata: { timestamp: Date.now(), id: randomUUID3() }
|
|
4050
|
+
});
|
|
4051
|
+
yield* this.runWithTelemetry({
|
|
4052
|
+
messages,
|
|
4053
|
+
conversationId: input.conversationId,
|
|
4054
|
+
parameters: input.parameters,
|
|
4055
|
+
abortSignal: input.abortSignal
|
|
4056
|
+
});
|
|
4057
|
+
}
|
|
4018
4058
|
async runToCompletion(input) {
|
|
4019
4059
|
const events = [];
|
|
4020
4060
|
let runId = "";
|
|
4021
4061
|
let finalResult;
|
|
4022
4062
|
const messages = [...input.messages ?? []];
|
|
4023
|
-
|
|
4063
|
+
if (input.task != null) {
|
|
4064
|
+
messages.push({ role: "user", content: input.task });
|
|
4065
|
+
}
|
|
4024
4066
|
for await (const event of this.runWithTelemetry(input)) {
|
|
4025
4067
|
events.push(event);
|
|
4026
4068
|
if (event.type === "run:started") {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@poncho-ai/harness",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.1",
|
|
4
4
|
"description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
"redis": "^5.10.0",
|
|
32
32
|
"yaml": "^2.4.0",
|
|
33
33
|
"zod": "^3.22.0",
|
|
34
|
-
"@poncho-ai/sdk": "1.0.
|
|
34
|
+
"@poncho-ai/sdk": "1.0.2"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@types/mustache": "^4.2.6",
|
package/src/harness.ts
CHANGED
|
@@ -37,20 +37,13 @@ import {
|
|
|
37
37
|
matchesSlashPattern,
|
|
38
38
|
normalizeRelativeScriptPattern,
|
|
39
39
|
} from "./tool-policy.js";
|
|
40
|
-
import { ToolDispatcher } from "./tool-dispatcher.js";
|
|
40
|
+
import { ToolDispatcher, type ToolCall, type ToolExecutionResult } from "./tool-dispatcher.js";
|
|
41
41
|
import { ensureAgentIdentity } from "./agent-identity.js";
|
|
42
42
|
|
|
43
43
|
export interface HarnessOptions {
|
|
44
44
|
workingDir?: string;
|
|
45
45
|
environment?: "development" | "staging" | "production";
|
|
46
46
|
toolDefinitions?: ToolDefinition[];
|
|
47
|
-
approvalHandler?: (request: {
|
|
48
|
-
tool: string;
|
|
49
|
-
input: Record<string, unknown>;
|
|
50
|
-
runId: string;
|
|
51
|
-
step: number;
|
|
52
|
-
approvalId: string;
|
|
53
|
-
}) => Promise<boolean> | boolean;
|
|
54
47
|
modelProvider?: ModelProviderFactory;
|
|
55
48
|
uploadStore?: UploadStore;
|
|
56
49
|
}
|
|
@@ -414,7 +407,6 @@ export class AgentHarness {
|
|
|
414
407
|
private modelProvider: ModelProviderFactory;
|
|
415
408
|
private readonly modelProviderInjected: boolean;
|
|
416
409
|
private readonly dispatcher = new ToolDispatcher();
|
|
417
|
-
private readonly approvalHandler?: HarnessOptions["approvalHandler"];
|
|
418
410
|
readonly uploadStore?: UploadStore;
|
|
419
411
|
private skillContextWindow = "";
|
|
420
412
|
private memoryStore?: MemoryStore;
|
|
@@ -500,7 +492,6 @@ export class AgentHarness {
|
|
|
500
492
|
this.environment = options.environment ?? "development";
|
|
501
493
|
this.modelProviderInjected = !!options.modelProvider;
|
|
502
494
|
this.modelProvider = options.modelProvider ?? createModelProvider("anthropic");
|
|
503
|
-
this.approvalHandler = options.approvalHandler;
|
|
504
495
|
this.uploadStore = options.uploadStore;
|
|
505
496
|
|
|
506
497
|
if (options.toolDefinitions?.length) {
|
|
@@ -963,6 +954,7 @@ export class AgentHarness {
|
|
|
963
954
|
? platformMaxDurationSec * 800
|
|
964
955
|
: 0;
|
|
965
956
|
const messages: Message[] = [...(input.messages ?? [])];
|
|
957
|
+
const inputMessageCount = messages.length;
|
|
966
958
|
const events: AgentEvent[] = [];
|
|
967
959
|
|
|
968
960
|
const systemPrompt = renderAgentPrompt(agent, {
|
|
@@ -1024,41 +1016,43 @@ ${boundedMainMemory.trim()}`
|
|
|
1024
1016
|
contextWindow,
|
|
1025
1017
|
});
|
|
1026
1018
|
|
|
1027
|
-
if (input.
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1019
|
+
if (input.task != null) {
|
|
1020
|
+
if (input.files && input.files.length > 0) {
|
|
1021
|
+
const parts: ContentPart[] = [
|
|
1022
|
+
{ type: "text", text: input.task } satisfies TextContentPart,
|
|
1023
|
+
];
|
|
1024
|
+
for (const file of input.files) {
|
|
1025
|
+
if (this.uploadStore) {
|
|
1026
|
+
const buf = Buffer.from(file.data, "base64");
|
|
1027
|
+
const key = deriveUploadKey(buf, file.mediaType);
|
|
1028
|
+
const ref = await this.uploadStore.put(key, buf, file.mediaType);
|
|
1029
|
+
parts.push({
|
|
1030
|
+
type: "file",
|
|
1031
|
+
data: ref,
|
|
1032
|
+
mediaType: file.mediaType,
|
|
1033
|
+
filename: file.filename,
|
|
1034
|
+
} satisfies FileContentPart);
|
|
1035
|
+
} else {
|
|
1036
|
+
parts.push({
|
|
1037
|
+
type: "file",
|
|
1038
|
+
data: file.data,
|
|
1039
|
+
mediaType: file.mediaType,
|
|
1040
|
+
filename: file.filename,
|
|
1041
|
+
} satisfies FileContentPart);
|
|
1042
|
+
}
|
|
1049
1043
|
}
|
|
1044
|
+
messages.push({
|
|
1045
|
+
role: "user",
|
|
1046
|
+
content: parts,
|
|
1047
|
+
metadata: { timestamp: now(), id: randomUUID() },
|
|
1048
|
+
});
|
|
1049
|
+
} else {
|
|
1050
|
+
messages.push({
|
|
1051
|
+
role: "user",
|
|
1052
|
+
content: input.task,
|
|
1053
|
+
metadata: { timestamp: now(), id: randomUUID() },
|
|
1054
|
+
});
|
|
1050
1055
|
}
|
|
1051
|
-
messages.push({
|
|
1052
|
-
role: "user",
|
|
1053
|
-
content: parts,
|
|
1054
|
-
metadata: { timestamp: now(), id: randomUUID() },
|
|
1055
|
-
});
|
|
1056
|
-
} else {
|
|
1057
|
-
messages.push({
|
|
1058
|
-
role: "user",
|
|
1059
|
-
content: input.task,
|
|
1060
|
-
metadata: { timestamp: now(), id: randomUUID() },
|
|
1061
|
-
});
|
|
1062
1056
|
}
|
|
1063
1057
|
|
|
1064
1058
|
let responseText = "";
|
|
@@ -1597,47 +1591,35 @@ ${boundedMainMemory.trim()}`
|
|
|
1597
1591
|
input: call.input,
|
|
1598
1592
|
approvalId,
|
|
1599
1593
|
});
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
error: "Tool execution denied by approval policy",
|
|
1630
|
-
recoverable: true,
|
|
1631
|
-
});
|
|
1632
|
-
toolResultsForModel.push({
|
|
1633
|
-
type: "tool_result",
|
|
1634
|
-
tool_use_id: call.id,
|
|
1635
|
-
tool_name: runtimeToolName,
|
|
1636
|
-
content: "Tool error: Tool execution denied by approval policy",
|
|
1637
|
-
});
|
|
1638
|
-
continue;
|
|
1639
|
-
}
|
|
1640
|
-
yield pushEvent({ type: "tool:approval:granted", approvalId });
|
|
1594
|
+
|
|
1595
|
+
const assistantContent = JSON.stringify({
|
|
1596
|
+
text: fullText,
|
|
1597
|
+
tool_calls: toolCalls.map(tc => ({
|
|
1598
|
+
id: tc.id,
|
|
1599
|
+
name: exposedToolNames.get(tc.name) ?? tc.name,
|
|
1600
|
+
input: tc.input,
|
|
1601
|
+
})),
|
|
1602
|
+
});
|
|
1603
|
+
const assistantMsg: Message = {
|
|
1604
|
+
role: "assistant",
|
|
1605
|
+
content: assistantContent,
|
|
1606
|
+
metadata: { timestamp: now(), id: randomUUID(), step },
|
|
1607
|
+
};
|
|
1608
|
+
const deltaMessages = [...messages.slice(inputMessageCount), assistantMsg];
|
|
1609
|
+
yield pushEvent({
|
|
1610
|
+
type: "tool:approval:checkpoint",
|
|
1611
|
+
approvalId,
|
|
1612
|
+
tool: runtimeToolName,
|
|
1613
|
+
toolCallId: call.id,
|
|
1614
|
+
input: call.input,
|
|
1615
|
+
checkpointMessages: deltaMessages,
|
|
1616
|
+
pendingToolCalls: toolCalls.map(tc => ({
|
|
1617
|
+
id: tc.id,
|
|
1618
|
+
name: exposedToolNames.get(tc.name) ?? tc.name,
|
|
1619
|
+
input: tc.input,
|
|
1620
|
+
})),
|
|
1621
|
+
});
|
|
1622
|
+
return;
|
|
1641
1623
|
}
|
|
1642
1624
|
approvedCalls.push({
|
|
1643
1625
|
id: call.id,
|
|
@@ -1790,12 +1772,87 @@ ${boundedMainMemory.trim()}`
|
|
|
1790
1772
|
}
|
|
1791
1773
|
}
|
|
1792
1774
|
|
|
1775
|
+
async executeTools(
|
|
1776
|
+
calls: ToolCall[],
|
|
1777
|
+
context: ToolContext,
|
|
1778
|
+
): Promise<ToolExecutionResult[]> {
|
|
1779
|
+
return this.dispatcher.executeBatch(calls, context);
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
async *continueFromToolResult(input: {
|
|
1783
|
+
messages: Message[];
|
|
1784
|
+
toolResults: Array<{ callId: string; toolName: string; result?: unknown; error?: string }>;
|
|
1785
|
+
conversationId?: string;
|
|
1786
|
+
parameters?: Record<string, unknown>;
|
|
1787
|
+
abortSignal?: AbortSignal;
|
|
1788
|
+
}): AsyncGenerator<AgentEvent> {
|
|
1789
|
+
const messages = [...input.messages];
|
|
1790
|
+
const lastMsg = messages[messages.length - 1];
|
|
1791
|
+
if (!lastMsg || lastMsg.role !== "assistant") {
|
|
1792
|
+
throw new Error("continueFromToolResult: last message must be an assistant message with tool calls");
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1795
|
+
let allToolCalls: Array<{ id: string; name: string; input: Record<string, unknown> }> = [];
|
|
1796
|
+
try {
|
|
1797
|
+
const parsed = JSON.parse(typeof lastMsg.content === "string" ? lastMsg.content : "");
|
|
1798
|
+
allToolCalls = parsed.tool_calls ?? [];
|
|
1799
|
+
} catch {
|
|
1800
|
+
throw new Error("continueFromToolResult: could not parse tool calls from last assistant message");
|
|
1801
|
+
}
|
|
1802
|
+
|
|
1803
|
+
const providedMap = new Map(
|
|
1804
|
+
input.toolResults.map(r => [r.callId, r]),
|
|
1805
|
+
);
|
|
1806
|
+
const toolResultsForModel: Array<{
|
|
1807
|
+
type: "tool_result";
|
|
1808
|
+
tool_use_id: string;
|
|
1809
|
+
tool_name: string;
|
|
1810
|
+
content: string;
|
|
1811
|
+
}> = [];
|
|
1812
|
+
|
|
1813
|
+
for (const tc of allToolCalls) {
|
|
1814
|
+
const provided = providedMap.get(tc.id);
|
|
1815
|
+
if (provided) {
|
|
1816
|
+
toolResultsForModel.push({
|
|
1817
|
+
type: "tool_result",
|
|
1818
|
+
tool_use_id: tc.id,
|
|
1819
|
+
tool_name: provided.toolName,
|
|
1820
|
+
content: provided.error
|
|
1821
|
+
? `Tool error: ${provided.error}`
|
|
1822
|
+
: JSON.stringify(provided.result ?? null),
|
|
1823
|
+
});
|
|
1824
|
+
} else {
|
|
1825
|
+
toolResultsForModel.push({
|
|
1826
|
+
type: "tool_result",
|
|
1827
|
+
tool_use_id: tc.id,
|
|
1828
|
+
tool_name: tc.name,
|
|
1829
|
+
content: "Tool error: Tool execution deferred (pending approval checkpoint)",
|
|
1830
|
+
});
|
|
1831
|
+
}
|
|
1832
|
+
}
|
|
1833
|
+
|
|
1834
|
+
messages.push({
|
|
1835
|
+
role: "tool",
|
|
1836
|
+
content: JSON.stringify(toolResultsForModel),
|
|
1837
|
+
metadata: { timestamp: Date.now(), id: randomUUID() },
|
|
1838
|
+
});
|
|
1839
|
+
|
|
1840
|
+
yield* this.runWithTelemetry({
|
|
1841
|
+
messages,
|
|
1842
|
+
conversationId: input.conversationId,
|
|
1843
|
+
parameters: input.parameters,
|
|
1844
|
+
abortSignal: input.abortSignal,
|
|
1845
|
+
});
|
|
1846
|
+
}
|
|
1847
|
+
|
|
1793
1848
|
async runToCompletion(input: RunInput): Promise<HarnessRunOutput> {
|
|
1794
1849
|
const events: AgentEvent[] = [];
|
|
1795
1850
|
let runId = "";
|
|
1796
1851
|
let finalResult: RunResult | undefined;
|
|
1797
1852
|
const messages: Message[] = [...(input.messages ?? [])];
|
|
1798
|
-
|
|
1853
|
+
if (input.task != null) {
|
|
1854
|
+
messages.push({ role: "user", content: input.task });
|
|
1855
|
+
}
|
|
1799
1856
|
|
|
1800
1857
|
for await (const event of this.runWithTelemetry(input)) {
|
|
1801
1858
|
events.push(event);
|
package/src/state.ts
CHANGED
|
@@ -30,7 +30,11 @@ export interface Conversation {
|
|
|
30
30
|
approvalId: string;
|
|
31
31
|
runId: string;
|
|
32
32
|
tool: string;
|
|
33
|
+
toolCallId?: string;
|
|
33
34
|
input: Record<string, unknown>;
|
|
35
|
+
checkpointMessages?: Message[];
|
|
36
|
+
baseMessageCount?: number;
|
|
37
|
+
pendingToolCalls?: Array<{ id: string; name: string; input: Record<string, unknown> }>;
|
|
34
38
|
}>;
|
|
35
39
|
ownerId: string;
|
|
36
40
|
tenantId: string | null;
|