@bluecopa/harness 0.1.0-snapshot.61 → 0.1.0-snapshot.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bluecopa/harness",
3
- "version": "0.1.0-snapshot.61",
3
+ "version": "0.1.0-snapshot.62",
4
4
  "description": "Provider-agnostic TypeScript agent framework",
5
5
  "license": "UNLICENSED",
6
6
  "scripts": {
@@ -7,12 +7,14 @@ import type { HarnessTelemetry } from '../observability/otel';
7
7
  import { HookRunner } from '../hooks/hook-runner';
8
8
  import { PermissionManager } from '../permissions/permission-manager';
9
9
  import { VercelAgentLoop } from '../loop/vercel-agent-loop';
10
+ export type { SystemPromptBlock, VercelAgentLoopConfig } from '../loop/vercel-agent-loop';
11
+ export type { PrepareStepContext, PrepareStepResult } from './types';
10
12
  import { SkillManager } from '../skills/skill-manager';
11
13
  import { SkillRouter } from '../skills/skill-router';
12
14
  import type { SkillSummary } from '../skills/skill-types';
13
15
  import { SingleFlightStepExecutor } from './step-executor';
14
- import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
15
- export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
16
+ import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
17
+ export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
16
18
  export { HookRunner } from '../hooks/hook-runner';
17
19
  export { PermissionManager } from '../permissions/permission-manager';
18
20
  export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
@@ -220,9 +222,21 @@ function toStreamResult(r: ToolResult): { success: boolean; output: string; erro
220
222
  return base;
221
223
  }
222
224
 
223
- /** Format a display-friendly content string for tool results (used in content field). */
225
+ /** Build the text the LLM sees for a tool result.
226
+ * Success: prefer modelOutput (compact) over raw output.
227
+ * Failure: prefer modelOutput (structured fix guidance) → error → output → generic fallback.
228
+ * This ensures custom tools can feed actionable error feedback to the model via modelOutput
229
+ * so the agent can self-correct instead of stopping with "unknown failure". */
230
+ function resultTextForLLM(result: ToolResult): string {
231
+ if (result.success) return result.modelOutput ?? result.output;
232
+ return result.modelOutput ?? result.error ?? result.output ?? 'unknown failure';
233
+ }
234
+
235
+ /** Format content string for LLM context. Uses modelOutput (compact summary) when available. */
224
236
  function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
225
- const content = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
237
+ const content = result.success
238
+ ? resultTextForLLM(result)
239
+ : `ERROR: ${resultTextForLLM(result)}`;
226
240
  switch (call.name) {
227
241
  case 'Write':
228
242
  return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
@@ -517,6 +531,11 @@ export function createAgent(runtime: AgentRuntime) {
517
531
  ? { nextAction: runtime.nextAction }
518
532
  : new VercelAgentLoop());
519
533
 
534
+ /** Read lastUsage from the loop if it's a VercelAgentLoop. */
535
+ function getLoopUsage(): StepUsage | undefined {
536
+ return loop instanceof VercelAgentLoop ? loop.lastUsage : undefined;
537
+ }
538
+
520
539
  async function resolveSkillContext(prompt: string): Promise<string> {
521
540
  if (!skillManager || !skillIndexPath) return '';
522
541
 
@@ -609,7 +628,7 @@ export function createAgent(runtime: AgentRuntime) {
609
628
  if (!r.success) {
610
629
  recordAgentError(runtime.telemetry);
611
630
  }
612
- const resultText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
631
+ const resultText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
613
632
  messages.push({
614
633
  role: 'tool',
615
634
  content: formatToolResultContent(call, r),
@@ -684,7 +703,7 @@ export function createAgent(runtime: AgentRuntime) {
684
703
  if (!result.success) {
685
704
  recordAgentError(runtime.telemetry);
686
705
  }
687
- const singleResultText = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
706
+ const singleResultText = result.success ? resultTextForLLM(result) : `ERROR: ${resultTextForLLM(result)}`;
688
707
  messages.push({
689
708
  role: 'tool',
690
709
  content: formatToolResultContent(action, result),
@@ -746,7 +765,7 @@ export function createAgent(runtime: AgentRuntime) {
746
765
  // If no tools → final response
747
766
  if (pendingTools.length === 0) {
748
767
  messages.push({ role: 'assistant', content: finalText });
749
- yield { type: 'step_end', step };
768
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
750
769
  yield { type: 'done', output: finalText, steps: step };
751
770
  return;
752
771
  }
@@ -772,7 +791,7 @@ export function createAgent(runtime: AgentRuntime) {
772
791
  if (action.type === 'final') {
773
792
  yield { type: 'text_delta', text: action.content };
774
793
  messages.push({ role: 'assistant', content: action.content });
775
- yield { type: 'step_end', step };
794
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
776
795
  yield { type: 'done', output: action.content, steps: step };
777
796
  return;
778
797
  }
@@ -784,7 +803,7 @@ export function createAgent(runtime: AgentRuntime) {
784
803
  try {
785
804
  const r = await executeTool(runtime.toolProvider, call, runtime);
786
805
  yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
787
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
806
+ const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
788
807
  messages.push({
789
808
  role: 'tool',
790
809
  content: formatToolResultContent(call, r),
@@ -806,7 +825,7 @@ export function createAgent(runtime: AgentRuntime) {
806
825
  try {
807
826
  const r = await executeTool(runtime.toolProvider, action, runtime);
808
827
  yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
809
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
828
+ const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
810
829
  messages.push({
811
830
  role: 'tool',
812
831
  content: formatToolResultContent(action, r),
@@ -822,7 +841,7 @@ export function createAgent(runtime: AgentRuntime) {
822
841
  });
823
842
  }
824
843
  }
825
- yield { type: 'step_end', step };
844
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
826
845
  continue;
827
846
  }
828
847
 
@@ -832,7 +851,7 @@ export function createAgent(runtime: AgentRuntime) {
832
851
  const call = pendingTools[i]!;
833
852
  const r = results[i]!;
834
853
  yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
835
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
854
+ const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
836
855
  messages.push({
837
856
  role: 'tool',
838
857
  content: formatToolResultContent(call, r),
@@ -850,7 +869,7 @@ export function createAgent(runtime: AgentRuntime) {
850
869
 
851
870
  if (action.type === 'final') {
852
871
  messages.push({ role: 'assistant', content: action.content });
853
- yield { type: 'step_end', step };
872
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
854
873
  yield { type: 'done', output: action.content, steps: step };
855
874
  return;
856
875
  }
@@ -874,7 +893,7 @@ export function createAgent(runtime: AgentRuntime) {
874
893
  const call = calls[i]!;
875
894
  const r = results[i]!;
876
895
  yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
877
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
896
+ const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
878
897
  messages.push({
879
898
  role: 'tool',
880
899
  content: formatToolResultContent(call, r),
@@ -888,7 +907,7 @@ export function createAgent(runtime: AgentRuntime) {
888
907
  }
889
908
  }
890
909
 
891
- yield { type: 'step_end', step };
910
+ { const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
892
911
  }
893
912
 
894
913
  yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
@@ -60,15 +60,36 @@ export interface AgentRunResult {
60
60
  steps: number;
61
61
  }
62
62
 
63
+ /** Token usage breakdown for a single LLM step. */
64
+ export interface StepUsage {
65
+ inputTokens?: number;
66
+ outputTokens?: number;
67
+ cacheReadTokens?: number;
68
+ cacheWriteTokens?: number;
69
+ reasoningTokens?: number;
70
+ }
71
+
63
72
  export type AgentStreamEvent =
64
73
  | { type: 'text_delta'; text: string }
65
74
  | { type: 'tool_start'; name: string; args: Record<string, unknown>; toolCallId?: string }
66
75
  | { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string; [key: string]: unknown } }
67
76
  | { type: 'step_start'; step: number }
68
- | { type: 'step_end'; step: number }
77
+ | { type: 'step_end'; step: number; usage?: StepUsage }
69
78
  | { type: 'done'; output: string; steps: number };
70
79
 
71
80
  export interface AgentLoop {
72
81
  nextAction(messages: AgentMessage[]): Promise<AgentAction>;
73
82
  streamAction?(messages: AgentMessage[]): AsyncIterable<AgentStreamEvent>;
74
83
  }
84
+
85
+ /** Context passed to `prepareStep` before each LLM call. */
86
+ export interface PrepareStepContext {
87
+ stepNumber: number;
88
+ toolCallHistory: string[];
89
+ }
90
+
91
+ /** Overrides returned by `prepareStep`. All fields optional — omit to keep defaults. */
92
+ export interface PrepareStepResult {
93
+ model?: string;
94
+ activeTools?: string[];
95
+ }
@@ -1,6 +1,8 @@
1
1
  export interface ToolResult {
2
2
  success: boolean;
3
3
  output: string;
4
+ /** Compact summary for LLM context. When present, sent to the model instead of `output`. */
5
+ modelOutput?: string | undefined;
4
6
  error?: string | undefined;
5
7
  metadata?: Record<string, unknown> | undefined;
6
8
  }
@@ -4,8 +4,7 @@ import type { ModelFactory, ToolChoiceConfig } from '../arc/types';
4
4
  import { resolveToolChoice } from '../arc/types';
5
5
  import { z } from 'zod';
6
6
 
7
- import type { AgentAction, AgentMessage, AgentLoop, AgentStreamEvent, ToolCallAction, ToolBatchAction } from '../agent/types';
8
- import { getTextContent } from '../agent/types';
7
+ import type { AgentAction, AgentMessage, AgentLoop, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, PrepareStepContext, PrepareStepResult } from '../agent/types';
9
8
 
10
9
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
11
10
  type AnyTool = Tool<any, any>;
@@ -161,15 +160,25 @@ function toModelMessages(messages: AgentMessage[]): ModelMessage[] {
161
160
  return out;
162
161
  }
163
162
 
163
+ /** A system prompt block with optional Anthropic cache control. */
164
+ export interface SystemPromptBlock {
165
+ text: string;
166
+ cacheControl?: { type: 'ephemeral' };
167
+ }
168
+
164
169
  export interface VercelAgentLoopConfig {
165
170
  model?: string;
171
+ /** System prompt — string or structured blocks with cache control markers. */
172
+ systemPrompt?: string | SystemPromptBlock[];
166
173
  createModel?: ModelFactory;
167
- systemPrompt?: string;
168
- apiKey?: string;
169
174
  /** Custom tool definitions. If provided, replaces built-in agentTools for LLM calls. */
170
175
  tools?: Record<string, AnyTool>;
171
176
  /** Tool choice for LLM calls. Supports per-turn callbacks. Default: 'auto'. */
172
177
  toolChoice?: ToolChoiceConfig;
178
+ /** Provider options passed to generateText/streamText (e.g. anthropic thinking config). */
179
+ providerOptions?: Record<string, unknown>;
180
+ /** Per-step callback to override model and active tools before each LLM call. */
181
+ prepareStep?: (context: PrepareStepContext) => PrepareStepResult | void;
173
182
  }
174
183
 
175
184
  export class VercelAgentLoop implements AgentLoop {
@@ -181,15 +190,25 @@ export class VercelAgentLoop implements AgentLoop {
181
190
  private readonly tools: Record<string, AnyTool>;
182
191
  private readonly validToolNames: Set<string>;
183
192
  private readonly toolChoiceConfig?: ToolChoiceConfig;
193
+ private readonly providerOptions: Record<string, unknown> | undefined;
194
+ private readonly prepareStep: VercelAgentLoopConfig['prepareStep'];
195
+ /** Track tool names called across steps for prepareStep context. */
196
+ private toolCallHistory: string[] = [];
184
197
  private step = 0;
185
198
 
199
+ /** Last step's token usage — read after nextAction/streamAction completes. */
200
+ public lastUsage: StepUsage | undefined;
201
+
186
202
  constructor(config: VercelAgentLoopConfig = {}) {
187
203
  this.toolChoiceConfig = config.toolChoice;
188
204
  this.model = config.model ?? process.env.HARNESS_MODEL ?? 'claude-sonnet-4-5';
205
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
189
206
  this.createModel = config.createModel ?? defaultAnthropicProvider;
190
207
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
191
208
  this.tools = config.tools ?? builtinTools as any;
192
209
  this.validToolNames = new Set(Object.keys(this.tools));
210
+ this.providerOptions = config.providerOptions;
211
+ this.prepareStep = config.prepareStep;
193
212
  this.systemPrompt =
194
213
  config.systemPrompt ??
195
214
  [
@@ -211,9 +230,63 @@ export class VercelAgentLoop implements AgentLoop {
211
230
  }
212
231
  }
213
232
 
233
+ /** Build the `system` parameter for generateText/streamText. */
234
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
235
+ private buildSystemParam(): any {
236
+ if (typeof this.systemPrompt === 'string') return this.systemPrompt;
237
+ // Structured blocks → AI SDK v6 SystemModelMessage format
238
+ return this.systemPrompt.map(block => ({
239
+ role: 'system' as const,
240
+ content: block.text,
241
+ ...(block.cacheControl
242
+ ? { providerOptions: { anthropic: { cacheControl: block.cacheControl } } }
243
+ : {}),
244
+ }));
245
+ }
246
+
247
+ /** Resolve model + tools for this step via prepareStep callback. */
248
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
249
+ private resolveStep(): { model: string; tools: Record<string, any>; validNames: Set<string> } {
250
+ this.stepNumber++;
251
+ if (!this.prepareStep) {
252
+ return { model: this.model, tools: this.tools, validNames: this.validToolNames };
253
+ }
254
+ const overrides = this.prepareStep({ stepNumber: this.stepNumber, toolCallHistory: this.toolCallHistory });
255
+ if (!overrides) {
256
+ return { model: this.model, tools: this.tools, validNames: this.validToolNames };
257
+ }
258
+ const model = overrides.model ?? this.model;
259
+ let tools: Record<string, AnyTool> = this.tools;
260
+ let validNames = this.validToolNames;
261
+ if (overrides.activeTools) {
262
+ const allowed = new Set(overrides.activeTools);
263
+ tools = Object.fromEntries(Object.entries(this.tools).filter(([k]) => allowed.has(k)));
264
+ validNames = new Set(Object.keys(tools));
265
+ }
266
+ return { model, tools, validNames };
267
+ }
268
+
269
+ /** Extract StepUsage from AI SDK usage object. */
270
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
271
+ private static extractUsage(usage: any): StepUsage | undefined {
272
+ if (!usage) return undefined;
273
+ const u: StepUsage = {};
274
+ if (usage.inputTokens != null) u.inputTokens = usage.inputTokens;
275
+ if (usage.outputTokens != null) u.outputTokens = usage.outputTokens;
276
+ // AI SDK v6 nests cache/reasoning under inputTokenDetails/outputTokenDetails
277
+ const inputDetails = usage.inputTokenDetails ?? usage;
278
+ const outputDetails = usage.outputTokenDetails ?? usage;
279
+ if (inputDetails.cacheReadTokens != null) u.cacheReadTokens = inputDetails.cacheReadTokens;
280
+ if (inputDetails.cacheWriteTokens != null) u.cacheWriteTokens = inputDetails.cacheWriteTokens;
281
+ if (outputDetails.reasoningTokens != null) u.reasoningTokens = outputDetails.reasoningTokens;
282
+ return Object.keys(u).length > 0 ? u : undefined;
283
+ }
284
+
214
285
  async nextAction(messages: AgentMessage[]): Promise<AgentAction> {
215
286
  const currentStep = this.step++;
216
287
 
288
+ const { model, tools, validNames } = this.resolveStep();
289
+
217
290
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
218
291
  const result = await (generateText as any)({
219
292
  model: this.createModel(this.model),
@@ -222,14 +295,18 @@ export class VercelAgentLoop implements AgentLoop {
222
295
  system: this.cachedSystem,
223
296
  messages: toModelMessages(messages),
224
297
  stopWhen: stepCountIs(1),
298
+ ...(this.providerOptions ? { providerOptions: this.providerOptions } : {}),
225
299
  });
226
300
 
301
+ // Capture usage
302
+ this.lastUsage = VercelAgentLoop.extractUsage(result.usage);
303
+
227
304
  // If the model made tool calls, extract them
228
305
  if (result.toolCalls && result.toolCalls.length > 0) {
229
306
  const validCalls: ToolCallAction[] = [];
230
307
  for (const call of result.toolCalls) {
231
308
  const name = call.toolName;
232
- if (this.validToolNames.has(name)) {
309
+ if (validNames.has(name)) {
233
310
  const toolCallId = (call as { toolCallId?: string }).toolCallId;
234
311
  validCalls.push({
235
312
  type: 'tool',
@@ -237,6 +314,7 @@ export class VercelAgentLoop implements AgentLoop {
237
314
  args: (call as { input: Record<string, unknown> }).input,
238
315
  ...(toolCallId != null ? { toolCallId } : {}),
239
316
  });
317
+ this.toolCallHistory.push(name);
240
318
  }
241
319
  }
242
320
 
@@ -261,6 +339,8 @@ export class VercelAgentLoop implements AgentLoop {
261
339
  async *streamAction(messages: AgentMessage[]): AsyncGenerator<AgentStreamEvent> {
262
340
  const currentStep = this.step++;
263
341
 
342
+ const { model, tools, validNames } = this.resolveStep();
343
+
264
344
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
265
345
  const result = (streamText as any)({
266
346
  model: this.createModel(this.model),
@@ -269,6 +349,7 @@ export class VercelAgentLoop implements AgentLoop {
269
349
  system: this.cachedSystem,
270
350
  messages: toModelMessages(messages),
271
351
  stopWhen: stepCountIs(1),
352
+ ...(this.providerOptions ? { providerOptions: this.providerOptions } : {}),
272
353
  });
273
354
 
274
355
  const toolArgs = new Map<string, string>();
@@ -285,14 +366,23 @@ export class VercelAgentLoop implements AgentLoop {
285
366
  }
286
367
  if (part.type === 'tool-call') {
287
368
  const name = part.toolName;
288
- if (this.validToolNames.has(name)) {
369
+ if (validNames.has(name)) {
289
370
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
290
371
  const p = part as any;
291
372
  const args: Record<string, unknown> = p.args ?? p.input ?? {};
292
373
  const toolCallId: string | undefined = p.toolCallId;
293
374
  yield { type: 'tool_start', name, args, ...(toolCallId != null ? { toolCallId } : {}) };
375
+ this.toolCallHistory.push(name);
294
376
  }
295
377
  }
296
378
  }
379
+
380
+ // Capture usage after stream completes
381
+ try {
382
+ const usage = await result.usage;
383
+ this.lastUsage = VercelAgentLoop.extractUsage(usage);
384
+ } catch {
385
+ this.lastUsage = undefined;
386
+ }
297
387
  }
298
388
  }