@bluecopa/harness 0.1.0-snapshot.55 → 0.1.0-snapshot.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/create-agent.ts +34 -15
- package/src/agent/types.ts +22 -5
- package/src/arc/agent-runner.ts +17 -318
- package/src/arc/arc-loop.ts +34 -104
- package/src/arc/message-convert.ts +3 -15
- package/src/arc/profile-builder.ts +2 -17
- package/src/arc/skill-resolver.ts +2 -33
- package/src/arc/types.ts +0 -39
- package/src/interfaces/hooks.ts +1 -2
- package/src/interfaces/tool-provider.ts +2 -0
- package/src/loop/vercel-agent-loop.ts +104 -21
- package/tests/arc/process-profiles.test.ts +5 -7
- package/src/arc/multi-model.ts +0 -70
- package/src/arc/result-pager.ts +0 -77
- package/src/hooks/middleware.ts +0 -95
- package/tests/arc/middleware.test.ts +0 -113
- package/tests/arc/result-paging.test.ts +0 -392
package/package.json
CHANGED
|
@@ -7,12 +7,14 @@ import type { HarnessTelemetry } from '../observability/otel';
|
|
|
7
7
|
import { HookRunner } from '../hooks/hook-runner';
|
|
8
8
|
import { PermissionManager } from '../permissions/permission-manager';
|
|
9
9
|
import { VercelAgentLoop } from '../loop/vercel-agent-loop';
|
|
10
|
+
export type { SystemPromptBlock, VercelAgentLoopConfig } from '../loop/vercel-agent-loop';
|
|
11
|
+
export type { PrepareStepContext, PrepareStepResult } from './types';
|
|
10
12
|
import { SkillManager } from '../skills/skill-manager';
|
|
11
13
|
import { SkillRouter } from '../skills/skill-router';
|
|
12
14
|
import type { SkillSummary } from '../skills/skill-types';
|
|
13
15
|
import { SingleFlightStepExecutor } from './step-executor';
|
|
14
|
-
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
15
|
-
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
16
|
+
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
17
|
+
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
16
18
|
export { HookRunner } from '../hooks/hook-runner';
|
|
17
19
|
export { PermissionManager } from '../permissions/permission-manager';
|
|
18
20
|
export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
|
|
@@ -220,9 +222,21 @@ function toStreamResult(r: ToolResult): { success: boolean; output: string; erro
|
|
|
220
222
|
return base;
|
|
221
223
|
}
|
|
222
224
|
|
|
223
|
-
/**
|
|
225
|
+
/** Build the text the LLM sees for a tool result.
|
|
226
|
+
* Success: prefer modelOutput (compact) over raw output.
|
|
227
|
+
* Failure: prefer modelOutput (structured fix guidance) → error → output → generic fallback.
|
|
228
|
+
* This ensures custom tools can feed actionable error feedback to the model via modelOutput
|
|
229
|
+
* so the agent can self-correct instead of stopping with "unknown failure". */
|
|
230
|
+
function resultTextForLLM(result: ToolResult): string {
|
|
231
|
+
if (result.success) return result.modelOutput ?? result.output;
|
|
232
|
+
return result.modelOutput ?? result.error ?? result.output ?? 'unknown failure';
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/** Format content string for LLM context. Uses modelOutput (compact summary) when available. */
|
|
224
236
|
function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
|
|
225
|
-
const content = result.success
|
|
237
|
+
const content = result.success
|
|
238
|
+
? resultTextForLLM(result)
|
|
239
|
+
: `ERROR: ${resultTextForLLM(result)}`;
|
|
226
240
|
switch (call.name) {
|
|
227
241
|
case 'Write':
|
|
228
242
|
return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
|
|
@@ -517,6 +531,11 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
517
531
|
? { nextAction: runtime.nextAction }
|
|
518
532
|
: new VercelAgentLoop());
|
|
519
533
|
|
|
534
|
+
/** Read lastUsage from the loop if it's a VercelAgentLoop. */
|
|
535
|
+
function getLoopUsage(): StepUsage | undefined {
|
|
536
|
+
return loop instanceof VercelAgentLoop ? loop.lastUsage : undefined;
|
|
537
|
+
}
|
|
538
|
+
|
|
520
539
|
async function resolveSkillContext(prompt: string): Promise<string> {
|
|
521
540
|
if (!skillManager || !skillIndexPath) return '';
|
|
522
541
|
|
|
@@ -609,7 +628,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
609
628
|
if (!r.success) {
|
|
610
629
|
recordAgentError(runtime.telemetry);
|
|
611
630
|
}
|
|
612
|
-
const resultText = r.success ? r
|
|
631
|
+
const resultText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
613
632
|
messages.push({
|
|
614
633
|
role: 'tool',
|
|
615
634
|
content: formatToolResultContent(call, r),
|
|
@@ -684,7 +703,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
684
703
|
if (!result.success) {
|
|
685
704
|
recordAgentError(runtime.telemetry);
|
|
686
705
|
}
|
|
687
|
-
const singleResultText = result.success ? result
|
|
706
|
+
const singleResultText = result.success ? resultTextForLLM(result) : `ERROR: ${resultTextForLLM(result)}`;
|
|
688
707
|
messages.push({
|
|
689
708
|
role: 'tool',
|
|
690
709
|
content: formatToolResultContent(action, result),
|
|
@@ -746,7 +765,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
746
765
|
// If no tools → final response
|
|
747
766
|
if (pendingTools.length === 0) {
|
|
748
767
|
messages.push({ role: 'assistant', content: finalText });
|
|
749
|
-
yield { type: 'step_end', step };
|
|
768
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
750
769
|
yield { type: 'done', output: finalText, steps: step };
|
|
751
770
|
return;
|
|
752
771
|
}
|
|
@@ -772,7 +791,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
772
791
|
if (action.type === 'final') {
|
|
773
792
|
yield { type: 'text_delta', text: action.content };
|
|
774
793
|
messages.push({ role: 'assistant', content: action.content });
|
|
775
|
-
yield { type: 'step_end', step };
|
|
794
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
776
795
|
yield { type: 'done', output: action.content, steps: step };
|
|
777
796
|
return;
|
|
778
797
|
}
|
|
@@ -784,7 +803,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
784
803
|
try {
|
|
785
804
|
const r = await executeTool(runtime.toolProvider, call, runtime);
|
|
786
805
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
787
|
-
const rText = r.success ? r
|
|
806
|
+
const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
788
807
|
messages.push({
|
|
789
808
|
role: 'tool',
|
|
790
809
|
content: formatToolResultContent(call, r),
|
|
@@ -806,7 +825,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
806
825
|
try {
|
|
807
826
|
const r = await executeTool(runtime.toolProvider, action, runtime);
|
|
808
827
|
yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
|
|
809
|
-
const rText = r.success ? r
|
|
828
|
+
const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
810
829
|
messages.push({
|
|
811
830
|
role: 'tool',
|
|
812
831
|
content: formatToolResultContent(action, r),
|
|
@@ -822,7 +841,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
822
841
|
});
|
|
823
842
|
}
|
|
824
843
|
}
|
|
825
|
-
yield { type: 'step_end', step };
|
|
844
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
826
845
|
continue;
|
|
827
846
|
}
|
|
828
847
|
|
|
@@ -832,7 +851,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
832
851
|
const call = pendingTools[i]!;
|
|
833
852
|
const r = results[i]!;
|
|
834
853
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
835
|
-
const rText = r.success ? r
|
|
854
|
+
const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
836
855
|
messages.push({
|
|
837
856
|
role: 'tool',
|
|
838
857
|
content: formatToolResultContent(call, r),
|
|
@@ -850,7 +869,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
850
869
|
|
|
851
870
|
if (action.type === 'final') {
|
|
852
871
|
messages.push({ role: 'assistant', content: action.content });
|
|
853
|
-
yield { type: 'step_end', step };
|
|
872
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
854
873
|
yield { type: 'done', output: action.content, steps: step };
|
|
855
874
|
return;
|
|
856
875
|
}
|
|
@@ -874,7 +893,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
874
893
|
const call = calls[i]!;
|
|
875
894
|
const r = results[i]!;
|
|
876
895
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
877
|
-
const rText = r.success ? r
|
|
896
|
+
const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
878
897
|
messages.push({
|
|
879
898
|
role: 'tool',
|
|
880
899
|
content: formatToolResultContent(call, r),
|
|
@@ -888,7 +907,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
888
907
|
}
|
|
889
908
|
}
|
|
890
909
|
|
|
891
|
-
yield { type: 'step_end', step };
|
|
910
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
892
911
|
}
|
|
893
912
|
|
|
894
913
|
yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
|
package/src/agent/types.ts
CHANGED
|
@@ -2,8 +2,6 @@ export interface ToolCallInfo {
|
|
|
2
2
|
toolCallId: string;
|
|
3
3
|
toolName: string;
|
|
4
4
|
args: Record<string, unknown>;
|
|
5
|
-
/** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
|
|
6
|
-
providerMetadata?: Record<string, unknown>;
|
|
7
5
|
}
|
|
8
6
|
|
|
9
7
|
export interface ToolResultInfo {
|
|
@@ -22,8 +20,6 @@ export interface AgentMessage {
|
|
|
22
20
|
content: string | ContentPart[];
|
|
23
21
|
toolCalls?: ToolCallInfo[]; // assistant messages: what tools were called
|
|
24
22
|
toolResults?: ToolResultInfo[]; // tool messages: results keyed by toolCallId
|
|
25
|
-
/** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
|
|
26
|
-
providerMetadata?: Record<string, unknown>;
|
|
27
23
|
}
|
|
28
24
|
|
|
29
25
|
/** Extract plain text from content (string or ContentPart[]). */
|
|
@@ -60,15 +56,36 @@ export interface AgentRunResult {
|
|
|
60
56
|
steps: number;
|
|
61
57
|
}
|
|
62
58
|
|
|
59
|
+
/** Token usage breakdown for a single LLM step. */
|
|
60
|
+
export interface StepUsage {
|
|
61
|
+
inputTokens?: number;
|
|
62
|
+
outputTokens?: number;
|
|
63
|
+
cacheReadTokens?: number;
|
|
64
|
+
cacheWriteTokens?: number;
|
|
65
|
+
reasoningTokens?: number;
|
|
66
|
+
}
|
|
67
|
+
|
|
63
68
|
export type AgentStreamEvent =
|
|
64
69
|
| { type: 'text_delta'; text: string }
|
|
65
70
|
| { type: 'tool_start'; name: string; args: Record<string, unknown>; toolCallId?: string }
|
|
66
71
|
| { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string; [key: string]: unknown } }
|
|
67
72
|
| { type: 'step_start'; step: number }
|
|
68
|
-
| { type: 'step_end'; step: number }
|
|
73
|
+
| { type: 'step_end'; step: number; usage?: StepUsage }
|
|
69
74
|
| { type: 'done'; output: string; steps: number };
|
|
70
75
|
|
|
71
76
|
export interface AgentLoop {
|
|
72
77
|
nextAction(messages: AgentMessage[]): Promise<AgentAction>;
|
|
73
78
|
streamAction?(messages: AgentMessage[]): AsyncIterable<AgentStreamEvent>;
|
|
74
79
|
}
|
|
80
|
+
|
|
81
|
+
/** Context passed to `prepareStep` before each LLM call. */
|
|
82
|
+
export interface PrepareStepContext {
|
|
83
|
+
stepNumber: number;
|
|
84
|
+
toolCallHistory: string[];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Overrides returned by `prepareStep`. All fields optional — omit to keep defaults. */
|
|
88
|
+
export interface PrepareStepResult {
|
|
89
|
+
model?: string;
|
|
90
|
+
activeTools?: string[];
|
|
91
|
+
}
|