@bluecopa/harness 0.1.0-snapshot.60 → 0.1.0-snapshot.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/create-agent.ts +15 -34
- package/src/agent/types.ts +5 -22
- package/src/arc/agent-runner.ts +333 -36
- package/src/arc/arc-loop.ts +118 -97
- package/src/arc/message-convert.ts +15 -3
- package/src/arc/multi-model.ts +70 -0
- package/src/arc/profile-builder.ts +18 -2
- package/src/arc/result-pager.ts +77 -0
- package/src/arc/skill-resolver.ts +33 -2
- package/src/arc/types.ts +47 -11
- package/src/hooks/middleware.ts +95 -0
- package/src/interfaces/hooks.ts +2 -1
- package/src/interfaces/tool-provider.ts +0 -2
- package/src/loop/vercel-agent-loop.ts +21 -104
- package/tests/arc/middleware.test.ts +113 -0
- package/tests/arc/process-profiles.test.ts +7 -5
- package/tests/arc/result-paging.test.ts +392 -0
package/package.json
CHANGED
|
@@ -7,14 +7,12 @@ import type { HarnessTelemetry } from '../observability/otel';
|
|
|
7
7
|
import { HookRunner } from '../hooks/hook-runner';
|
|
8
8
|
import { PermissionManager } from '../permissions/permission-manager';
|
|
9
9
|
import { VercelAgentLoop } from '../loop/vercel-agent-loop';
|
|
10
|
-
export type { SystemPromptBlock, VercelAgentLoopConfig } from '../loop/vercel-agent-loop';
|
|
11
|
-
export type { PrepareStepContext, PrepareStepResult } from './types';
|
|
12
10
|
import { SkillManager } from '../skills/skill-manager';
|
|
13
11
|
import { SkillRouter } from '../skills/skill-router';
|
|
14
12
|
import type { SkillSummary } from '../skills/skill-types';
|
|
15
13
|
import { SingleFlightStepExecutor } from './step-executor';
|
|
16
|
-
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent,
|
|
17
|
-
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent,
|
|
14
|
+
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
15
|
+
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
18
16
|
export { HookRunner } from '../hooks/hook-runner';
|
|
19
17
|
export { PermissionManager } from '../permissions/permission-manager';
|
|
20
18
|
export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
|
|
@@ -222,21 +220,9 @@ function toStreamResult(r: ToolResult): { success: boolean; output: string; erro
|
|
|
222
220
|
return base;
|
|
223
221
|
}
|
|
224
222
|
|
|
225
|
-
/**
|
|
226
|
-
* Success: prefer modelOutput (compact) over raw output.
|
|
227
|
-
* Failure: prefer modelOutput (structured fix guidance) → error → output → generic fallback.
|
|
228
|
-
* This ensures custom tools can feed actionable error feedback to the model via modelOutput
|
|
229
|
-
* so the agent can self-correct instead of stopping with "unknown failure". */
|
|
230
|
-
function resultTextForLLM(result: ToolResult): string {
|
|
231
|
-
if (result.success) return result.modelOutput ?? result.output;
|
|
232
|
-
return result.modelOutput ?? result.error ?? result.output ?? 'unknown failure';
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
/** Format content string for LLM context. Uses modelOutput (compact summary) when available. */
|
|
223
|
+
/** Format a display-friendly content string for tool results (used in content field). */
|
|
236
224
|
function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
|
|
237
|
-
const content = result.success
|
|
238
|
-
? resultTextForLLM(result)
|
|
239
|
-
: `ERROR: ${resultTextForLLM(result)}`;
|
|
225
|
+
const content = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
240
226
|
switch (call.name) {
|
|
241
227
|
case 'Write':
|
|
242
228
|
return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
|
|
@@ -531,11 +517,6 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
531
517
|
? { nextAction: runtime.nextAction }
|
|
532
518
|
: new VercelAgentLoop());
|
|
533
519
|
|
|
534
|
-
/** Read lastUsage from the loop if it's a VercelAgentLoop. */
|
|
535
|
-
function getLoopUsage(): StepUsage | undefined {
|
|
536
|
-
return loop instanceof VercelAgentLoop ? loop.lastUsage : undefined;
|
|
537
|
-
}
|
|
538
|
-
|
|
539
520
|
async function resolveSkillContext(prompt: string): Promise<string> {
|
|
540
521
|
if (!skillManager || !skillIndexPath) return '';
|
|
541
522
|
|
|
@@ -628,7 +609,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
628
609
|
if (!r.success) {
|
|
629
610
|
recordAgentError(runtime.telemetry);
|
|
630
611
|
}
|
|
631
|
-
const resultText = r.success ?
|
|
612
|
+
const resultText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
632
613
|
messages.push({
|
|
633
614
|
role: 'tool',
|
|
634
615
|
content: formatToolResultContent(call, r),
|
|
@@ -703,7 +684,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
703
684
|
if (!result.success) {
|
|
704
685
|
recordAgentError(runtime.telemetry);
|
|
705
686
|
}
|
|
706
|
-
const singleResultText = result.success ?
|
|
687
|
+
const singleResultText = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
707
688
|
messages.push({
|
|
708
689
|
role: 'tool',
|
|
709
690
|
content: formatToolResultContent(action, result),
|
|
@@ -765,7 +746,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
765
746
|
// If no tools → final response
|
|
766
747
|
if (pendingTools.length === 0) {
|
|
767
748
|
messages.push({ role: 'assistant', content: finalText });
|
|
768
|
-
|
|
749
|
+
yield { type: 'step_end', step };
|
|
769
750
|
yield { type: 'done', output: finalText, steps: step };
|
|
770
751
|
return;
|
|
771
752
|
}
|
|
@@ -791,7 +772,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
791
772
|
if (action.type === 'final') {
|
|
792
773
|
yield { type: 'text_delta', text: action.content };
|
|
793
774
|
messages.push({ role: 'assistant', content: action.content });
|
|
794
|
-
|
|
775
|
+
yield { type: 'step_end', step };
|
|
795
776
|
yield { type: 'done', output: action.content, steps: step };
|
|
796
777
|
return;
|
|
797
778
|
}
|
|
@@ -803,7 +784,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
803
784
|
try {
|
|
804
785
|
const r = await executeTool(runtime.toolProvider, call, runtime);
|
|
805
786
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
806
|
-
const rText = r.success ?
|
|
787
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
807
788
|
messages.push({
|
|
808
789
|
role: 'tool',
|
|
809
790
|
content: formatToolResultContent(call, r),
|
|
@@ -825,7 +806,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
825
806
|
try {
|
|
826
807
|
const r = await executeTool(runtime.toolProvider, action, runtime);
|
|
827
808
|
yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
|
|
828
|
-
const rText = r.success ?
|
|
809
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
829
810
|
messages.push({
|
|
830
811
|
role: 'tool',
|
|
831
812
|
content: formatToolResultContent(action, r),
|
|
@@ -841,7 +822,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
841
822
|
});
|
|
842
823
|
}
|
|
843
824
|
}
|
|
844
|
-
|
|
825
|
+
yield { type: 'step_end', step };
|
|
845
826
|
continue;
|
|
846
827
|
}
|
|
847
828
|
|
|
@@ -851,7 +832,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
851
832
|
const call = pendingTools[i]!;
|
|
852
833
|
const r = results[i]!;
|
|
853
834
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
854
|
-
const rText = r.success ?
|
|
835
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
855
836
|
messages.push({
|
|
856
837
|
role: 'tool',
|
|
857
838
|
content: formatToolResultContent(call, r),
|
|
@@ -869,7 +850,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
869
850
|
|
|
870
851
|
if (action.type === 'final') {
|
|
871
852
|
messages.push({ role: 'assistant', content: action.content });
|
|
872
|
-
|
|
853
|
+
yield { type: 'step_end', step };
|
|
873
854
|
yield { type: 'done', output: action.content, steps: step };
|
|
874
855
|
return;
|
|
875
856
|
}
|
|
@@ -893,7 +874,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
893
874
|
const call = calls[i]!;
|
|
894
875
|
const r = results[i]!;
|
|
895
876
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
896
|
-
const rText = r.success ?
|
|
877
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
897
878
|
messages.push({
|
|
898
879
|
role: 'tool',
|
|
899
880
|
content: formatToolResultContent(call, r),
|
|
@@ -907,7 +888,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
907
888
|
}
|
|
908
889
|
}
|
|
909
890
|
|
|
910
|
-
|
|
891
|
+
yield { type: 'step_end', step };
|
|
911
892
|
}
|
|
912
893
|
|
|
913
894
|
yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
|
package/src/agent/types.ts
CHANGED
|
@@ -2,6 +2,8 @@ export interface ToolCallInfo {
|
|
|
2
2
|
toolCallId: string;
|
|
3
3
|
toolName: string;
|
|
4
4
|
args: Record<string, unknown>;
|
|
5
|
+
/** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
|
|
6
|
+
providerMetadata?: Record<string, unknown>;
|
|
5
7
|
}
|
|
6
8
|
|
|
7
9
|
export interface ToolResultInfo {
|
|
@@ -20,6 +22,8 @@ export interface AgentMessage {
|
|
|
20
22
|
content: string | ContentPart[];
|
|
21
23
|
toolCalls?: ToolCallInfo[]; // assistant messages: what tools were called
|
|
22
24
|
toolResults?: ToolResultInfo[]; // tool messages: results keyed by toolCallId
|
|
25
|
+
/** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
|
|
26
|
+
providerMetadata?: Record<string, unknown>;
|
|
23
27
|
}
|
|
24
28
|
|
|
25
29
|
/** Extract plain text from content (string or ContentPart[]). */
|
|
@@ -56,36 +60,15 @@ export interface AgentRunResult {
|
|
|
56
60
|
steps: number;
|
|
57
61
|
}
|
|
58
62
|
|
|
59
|
-
/** Token usage breakdown for a single LLM step. */
|
|
60
|
-
export interface StepUsage {
|
|
61
|
-
inputTokens?: number;
|
|
62
|
-
outputTokens?: number;
|
|
63
|
-
cacheReadTokens?: number;
|
|
64
|
-
cacheWriteTokens?: number;
|
|
65
|
-
reasoningTokens?: number;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
63
|
export type AgentStreamEvent =
|
|
69
64
|
| { type: 'text_delta'; text: string }
|
|
70
65
|
| { type: 'tool_start'; name: string; args: Record<string, unknown>; toolCallId?: string }
|
|
71
66
|
| { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string; [key: string]: unknown } }
|
|
72
67
|
| { type: 'step_start'; step: number }
|
|
73
|
-
| { type: 'step_end'; step: number
|
|
68
|
+
| { type: 'step_end'; step: number }
|
|
74
69
|
| { type: 'done'; output: string; steps: number };
|
|
75
70
|
|
|
76
71
|
export interface AgentLoop {
|
|
77
72
|
nextAction(messages: AgentMessage[]): Promise<AgentAction>;
|
|
78
73
|
streamAction?(messages: AgentMessage[]): AsyncIterable<AgentStreamEvent>;
|
|
79
74
|
}
|
|
80
|
-
|
|
81
|
-
/** Context passed to `prepareStep` before each LLM call. */
|
|
82
|
-
export interface PrepareStepContext {
|
|
83
|
-
stepNumber: number;
|
|
84
|
-
toolCallHistory: string[];
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
/** Overrides returned by `prepareStep`. All fields optional — omit to keep defaults. */
|
|
88
|
-
export interface PrepareStepResult {
|
|
89
|
-
model?: string;
|
|
90
|
-
activeTools?: string[];
|
|
91
|
-
}
|