@bluecopa/harness 0.1.0-snapshot.55 → 0.1.0-snapshot.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bluecopa/harness",
3
- "version": "0.1.0-snapshot.55",
3
+ "version": "0.1.0-snapshot.56",
4
4
  "description": "Provider-agnostic TypeScript agent framework",
5
5
  "license": "UNLICENSED",
6
6
  "scripts": {
@@ -7,12 +7,14 @@ import type { HarnessTelemetry } from '../observability/otel';
7
7
  import { HookRunner } from '../hooks/hook-runner';
8
8
  import { PermissionManager } from '../permissions/permission-manager';
9
9
  import { VercelAgentLoop } from '../loop/vercel-agent-loop';
10
+ export type { SystemPromptBlock, VercelAgentLoopConfig } from '../loop/vercel-agent-loop';
11
+ export type { PrepareStepContext, PrepareStepResult } from './types';
10
12
  import { SkillManager } from '../skills/skill-manager';
11
13
  import { SkillRouter } from '../skills/skill-router';
12
14
  import type { SkillSummary } from '../skills/skill-types';
13
15
  import { SingleFlightStepExecutor } from './step-executor';
14
- import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
15
- export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
16
+ import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
17
+ export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
16
18
  export { HookRunner } from '../hooks/hook-runner';
17
19
  export { PermissionManager } from '../permissions/permission-manager';
18
20
  export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
@@ -220,9 +222,21 @@ function toStreamResult(r: ToolResult): { success: boolean; output: string; erro
220
222
  return base;
221
223
  }
222
224
 
223
- /** Format a display-friendly content string for tool results (used in content field). */
225
+ /** Build the text the LLM sees for a tool result.
226
+ * Success: prefer modelOutput (compact) over raw output.
227
+ * Failure: prefer modelOutput (structured fix guidance) → error → output → generic fallback.
228
+ * This ensures custom tools can feed actionable error feedback to the model via modelOutput
229
+ * so the agent can self-correct instead of stopping with "unknown failure". */
230
+ function resultTextForLLM(result: ToolResult): string {
231
+ if (result.success) return result.modelOutput ?? result.output;
232
+ return result.modelOutput ?? result.error ?? result.output ?? 'unknown failure';
233
+ }
234
+
235
+ /** Format content string for LLM context. Uses modelOutput (compact summary) when available. */
224
236
  function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
225
- const content = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
237
+ const content = result.success
238
+ ? resultTextForLLM(result)
239
+ : `ERROR: ${resultTextForLLM(result)}`;
226
240
  switch (call.name) {
227
241
  case 'Write':
228
242
  return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
@@ -517,6 +531,11 @@ export function createAgent(runtime: AgentRuntime) {
517
531
  ? { nextAction: runtime.nextAction }
518
532
  : new VercelAgentLoop());
519
533
 
534
+ /** Read lastUsage from the loop if it's a VercelAgentLoop. */
535
+ function getLoopUsage(): StepUsage | undefined {
536
+ return loop instanceof VercelAgentLoop ? loop.lastUsage : undefined;
537
+ }
538
+
520
539
  async function resolveSkillContext(prompt: string): Promise<string> {
521
540
  if (!skillManager || !skillIndexPath) return '';
522
541
 
@@ -609,7 +628,7 @@ export function createAgent(runtime: AgentRuntime) {
609
628
  if (!r.success) {
610
629
  recordAgentError(runtime.telemetry);
611
630
  }
612
- const resultText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
631
+ const resultText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
613
632
  messages.push({
614
633
  role: 'tool',
615
634
  content: formatToolResultContent(call, r),
@@ -684,7 +703,7 @@ export function createAgent(runtime: AgentRuntime) {
684
703
  if (!result.success) {
685
704
  recordAgentError(runtime.telemetry);
686
705
  }
687
- const singleResultText = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
706
+ const singleResultText = result.success ? resultTextForLLM(result) : `ERROR: ${resultTextForLLM(result)}`;
688
707
  messages.push({
689
708
  role: 'tool',
690
709
  content: formatToolResultContent(action, result),
@@ -746,7 +765,7 @@ export function createAgent(runtime: AgentRuntime) {
746
765
  // If no tools → final response
747
766
  if (pendingTools.length === 0) {
748
767
  messages.push({ role: 'assistant', content: finalText });
749
- yield { type: 'step_end', step };
768
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
750
769
  yield { type: 'done', output: finalText, steps: step };
751
770
  return;
752
771
  }
@@ -772,7 +791,7 @@ export function createAgent(runtime: AgentRuntime) {
772
791
  if (action.type === 'final') {
773
792
  yield { type: 'text_delta', text: action.content };
774
793
  messages.push({ role: 'assistant', content: action.content });
775
- yield { type: 'step_end', step };
794
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
776
795
  yield { type: 'done', output: action.content, steps: step };
777
796
  return;
778
797
  }
@@ -784,7 +803,7 @@ export function createAgent(runtime: AgentRuntime) {
784
803
  try {
785
804
  const r = await executeTool(runtime.toolProvider, call, runtime);
786
805
  yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
787
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
806
+ const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
788
807
  messages.push({
789
808
  role: 'tool',
790
809
  content: formatToolResultContent(call, r),
@@ -806,7 +825,7 @@ export function createAgent(runtime: AgentRuntime) {
806
825
  try {
807
826
  const r = await executeTool(runtime.toolProvider, action, runtime);
808
827
  yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
809
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
828
+ const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
810
829
  messages.push({
811
830
  role: 'tool',
812
831
  content: formatToolResultContent(action, r),
@@ -822,7 +841,7 @@ export function createAgent(runtime: AgentRuntime) {
822
841
  });
823
842
  }
824
843
  }
825
- yield { type: 'step_end', step };
844
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
826
845
  continue;
827
846
  }
828
847
 
@@ -832,7 +851,7 @@ export function createAgent(runtime: AgentRuntime) {
832
851
  const call = pendingTools[i]!;
833
852
  const r = results[i]!;
834
853
  yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
835
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
854
+ const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
836
855
  messages.push({
837
856
  role: 'tool',
838
857
  content: formatToolResultContent(call, r),
@@ -850,7 +869,7 @@ export function createAgent(runtime: AgentRuntime) {
850
869
 
851
870
  if (action.type === 'final') {
852
871
  messages.push({ role: 'assistant', content: action.content });
853
- yield { type: 'step_end', step };
872
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
854
873
  yield { type: 'done', output: action.content, steps: step };
855
874
  return;
856
875
  }
@@ -874,7 +893,7 @@ export function createAgent(runtime: AgentRuntime) {
874
893
  const call = calls[i]!;
875
894
  const r = results[i]!;
876
895
  yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
877
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
896
+ const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
878
897
  messages.push({
879
898
  role: 'tool',
880
899
  content: formatToolResultContent(call, r),
@@ -888,7 +907,7 @@ export function createAgent(runtime: AgentRuntime) {
888
907
  }
889
908
  }
890
909
 
891
- yield { type: 'step_end', step };
910
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
892
911
  }
893
912
 
894
913
  yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
@@ -2,8 +2,6 @@ export interface ToolCallInfo {
2
2
  toolCallId: string;
3
3
  toolName: string;
4
4
  args: Record<string, unknown>;
5
- /** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
6
- providerMetadata?: Record<string, unknown>;
7
5
  }
8
6
 
9
7
  export interface ToolResultInfo {
@@ -22,8 +20,6 @@ export interface AgentMessage {
22
20
  content: string | ContentPart[];
23
21
  toolCalls?: ToolCallInfo[]; // assistant messages: what tools were called
24
22
  toolResults?: ToolResultInfo[]; // tool messages: results keyed by toolCallId
25
- /** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
26
- providerMetadata?: Record<string, unknown>;
27
23
  }
28
24
 
29
25
  /** Extract plain text from content (string or ContentPart[]). */
@@ -60,15 +56,36 @@ export interface AgentRunResult {
60
56
  steps: number;
61
57
  }
62
58
 
59
+ /** Token usage breakdown for a single LLM step. */
60
+ export interface StepUsage {
61
+ inputTokens?: number;
62
+ outputTokens?: number;
63
+ cacheReadTokens?: number;
64
+ cacheWriteTokens?: number;
65
+ reasoningTokens?: number;
66
+ }
67
+
63
68
  export type AgentStreamEvent =
64
69
  | { type: 'text_delta'; text: string }
65
70
  | { type: 'tool_start'; name: string; args: Record<string, unknown>; toolCallId?: string }
66
71
  | { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string; [key: string]: unknown } }
67
72
  | { type: 'step_start'; step: number }
68
- | { type: 'step_end'; step: number }
73
+ | { type: 'step_end'; step: number; usage?: StepUsage }
69
74
  | { type: 'done'; output: string; steps: number };
70
75
 
71
76
  export interface AgentLoop {
72
77
  nextAction(messages: AgentMessage[]): Promise<AgentAction>;
73
78
  streamAction?(messages: AgentMessage[]): AsyncIterable<AgentStreamEvent>;
74
79
  }
80
+
81
+ /** Context passed to `prepareStep` before each LLM call. */
82
+ export interface PrepareStepContext {
83
+ stepNumber: number;
84
+ toolCallHistory: string[];
85
+ }
86
+
87
+ /** Overrides returned by `prepareStep`. All fields optional — omit to keep defaults. */
88
+ export interface PrepareStepResult {
89
+ model?: string;
90
+ activeTools?: string[];
91
+ }