@bluecopa/harness 0.1.0-snapshot.60 → 0.1.0-snapshot.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bluecopa/harness",
3
- "version": "0.1.0-snapshot.60",
3
+ "version": "0.1.0-snapshot.62",
4
4
  "description": "Provider-agnostic TypeScript agent framework",
5
5
  "license": "UNLICENSED",
6
6
  "scripts": {
@@ -2,6 +2,8 @@ export interface ToolCallInfo {
2
2
  toolCallId: string;
3
3
  toolName: string;
4
4
  args: Record<string, unknown>;
5
+ /** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
6
+ providerMetadata?: Record<string, unknown>;
5
7
  }
6
8
 
7
9
  export interface ToolResultInfo {
@@ -20,6 +22,8 @@ export interface AgentMessage {
20
22
  content: string | ContentPart[];
21
23
  toolCalls?: ToolCallInfo[]; // assistant messages: what tools were called
22
24
  toolResults?: ToolResultInfo[]; // tool messages: results keyed by toolCallId
25
+ /** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
26
+ providerMetadata?: Record<string, unknown>;
23
27
  }
24
28
 
25
29
  /** Extract plain text from content (string or ContentPart[]). */
@@ -1,6 +1,7 @@
1
1
  import { randomUUID } from 'node:crypto';
2
- import { generateText, generateObject } from 'ai';
2
+ import { generateText, generateObject, tool as aiTool } from 'ai';
3
3
  import { anthropic as defaultAnthropicProvider } from '@ai-sdk/anthropic';
4
+ import { z } from 'zod';
4
5
  import type { ModelFactory } from './types';
5
6
  import type { AgentMessage, ToolCallAction } from '../agent/types';
6
7
  import { getTextContent } from '../agent/types';
@@ -8,11 +9,13 @@ import type { ToolProvider, ToolResult } from '../interfaces/tool-provider';
8
9
  import type { HookRunner } from '../hooks/hook-runner';
9
10
  import type { PermissionManager } from '../permissions/permission-manager';
10
11
  import type { HarnessTelemetry } from '../observability/otel';
11
- import type { Activity, Process, ProcessEvent, ProcessRequest, ProcessResult, ArcLoopConfig } from './types';
12
+ import type { Activity, Process, ProcessEvent, ProcessRequest, ProcessResult, ArcLoopConfig, ToolChoiceConfig } from './types';
13
+ import { resolveToolChoice } from './types';
14
+ import type { ResultPager } from './result-pager';
12
15
  import type { Episode, EpisodeStore, ModelTier } from './arc-types';
13
16
  import type { ResiliencePolicy, ExecutionContext } from './resilience/types';
14
17
  import { resolveModel, DEFAULT_MODEL_MAP } from './arc-types';
15
- import { toModelMessages } from './message-convert';
18
+ import { toModelMessages, estimateTokens } from './message-convert';
16
19
  import { EpisodeCompressor } from './episode-compressor';
17
20
  import { pickDefined, normalizeTools } from './utils';
18
21
 
@@ -74,12 +77,21 @@ export async function firstEvent<T extends { type: string }>(
74
77
  throw new Error(`Stream ended without '${type}' event`);
75
78
  }
76
79
 
80
+ // ── Constants ──
81
+
82
+ /** Default character threshold above which tool results are paged externally. */
83
+ export const DEFAULT_PAGE_THRESHOLD = 4_000;
84
+
85
+ /** Hard cap on ReadFullResult output — never re-paged, prevents infinite recursion. */
86
+ export const READ_FULL_RESULT_HARD_CAP = 32_000;
87
+
77
88
  // ── Process system prompt ──
78
89
 
79
90
  const PROCESS_SYSTEM_PROMPT = [
80
91
  'You are a focused execution thread within a larger agent system.',
81
92
  'Complete the assigned task using the available tools.',
82
93
  'Be efficient — accomplish the objective with minimal steps.',
94
+ 'If your context includes the user\'s original message or attachment metadata, use that information directly.',
83
95
  'When done, provide a brief summary of what you accomplished.',
84
96
  ].join(' ');
85
97
 
@@ -212,6 +224,23 @@ export interface AgentRunnerConfig {
212
224
 
213
225
  /** Optional resilience policy applied to generateText calls. */
214
226
  resilience?: ResiliencePolicy;
227
+
228
+ /** Tool choice for LLM calls. Supports per-turn callbacks. Default: 'auto'. */
229
+ toolChoice?: ToolChoiceConfig;
230
+
231
+ /** ResultPager for storing large tool results externally. When set, enables context paging. */
232
+ resultPager?: ResultPager;
233
+ /** Character threshold above which tool results are paged. Default: 4000. */
234
+ resultPageThreshold?: number;
235
+ /** Tool names to never page (e.g., ['Read', 'Edit'] — filesystem tools return needed content). */
236
+ pagingExclude?: string[];
237
+ /** Hard cap on tool result length (chars) when no resultPager is configured. Truncates with a note. No default (unlimited). */
238
+ maxToolResultLength?: number;
239
+
240
+ /** Structured facts injected into the system prompt (e.g., from long-term memory). */
241
+ contextFacts?: string[];
242
+ /** Max context tokens before old messages are trimmed. When set, stubs old tool results to keep within budget. */
243
+ maxContextTokens?: number;
215
244
  }
216
245
 
217
246
  export interface AgentRunResult {
@@ -220,8 +249,6 @@ export interface AgentRunResult {
220
249
  steps: number;
221
250
  /** Structured output from generateObject when outputSchema is set. */
222
251
  structuredOutput?: Record<string, unknown>;
223
- /** Token usage accumulated across all steps in this thread. */
224
- usage?: { inputTokens: number; outputTokens: number; cacheReadTokens: number; cacheWriteTokens: number };
225
252
  }
226
253
 
227
254
  export class AgentRunner {
@@ -231,9 +258,36 @@ export class AgentRunner {
231
258
  { role: 'user', content: config.prompt },
232
259
  ];
233
260
 
234
- // AI SDK v6 `system` expects string or Array<{ type: 'text', text }>, not [{ role, content }]
235
- const cachedSystem = config.systemPrompt;
236
- const threadUsage = { inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 };
261
+ // Build system prompt with optional structured facts
262
+ const systemContent = config.contextFacts?.length
263
+ ? config.systemPrompt + '\n\n## Known Facts\n' + config.contextFacts.map(f => `- ${f}`).join('\n')
264
+ : config.systemPrompt;
265
+
266
+ const cachedSystem = [{
267
+ role: 'system' as const,
268
+ content: systemContent,
269
+ }];
270
+
271
+ // Pre-compute paging config (avoid per-iteration allocation)
272
+ const pageThreshold = config.resultPageThreshold ?? DEFAULT_PAGE_THRESHOLD;
273
+ const pagingExcludeSet = new Set(config.pagingExclude ?? []);
274
+
275
+ // Augment tools with ReadFullResult when paging is enabled
276
+ const effectiveTools = config.resultPager
277
+ ? {
278
+ ...config.tools,
279
+ ReadFullResult: aiTool({
280
+ description: 'Retrieve the full content of a paged tool result. Use when the summary is insufficient and you need the complete data.',
281
+ parameters: z.object({
282
+ ref: z.string().describe('The paged result reference from a previous tool output'),
283
+ lineRange: z.object({
284
+ start: z.number().int().min(1).describe('Start line (1-indexed, inclusive)'),
285
+ end: z.number().int().min(1).describe('End line (1-indexed, inclusive)'),
286
+ }).optional().describe('Optional line range to retrieve. Omit for full content.'),
287
+ }),
288
+ }),
289
+ }
290
+ : config.tools;
237
291
 
238
292
  for (let step = 0; step < config.maxSteps; step++) {
239
293
  config.signal.throwIfAborted();
@@ -244,12 +298,17 @@ export class AgentRunner {
244
298
  }
245
299
  }
246
300
 
301
+ // Context trimming: stub old tool results when context exceeds budget
302
+ if (config.maxContextTokens && step > 0) {
303
+ trimContext(messages, config.maxContextTokens);
304
+ }
305
+
247
306
  const callLLM = async (effectiveSignal: AbortSignal) =>
248
307
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
249
308
  (generateText as any)({
250
309
  model: (config.createModel ?? defaultAnthropicProvider)(config.model),
251
- tools: normalizeTools(config.tools),
252
- toolChoice: 'auto',
310
+ tools: normalizeTools(effectiveTools),
311
+ toolChoice: resolveToolChoice(config.toolChoice, step),
253
312
  messages: toModelMessages(messages),
254
313
  system: cachedSystem,
255
314
  abortSignal: effectiveSignal,
@@ -272,25 +331,40 @@ export class AgentRunner {
272
331
  result = await callLLM(config.signal);
273
332
  }
274
333
 
275
- // Extract token usage from generateText result
276
- try {
277
- const usage = result.usage;
278
- if (usage) {
279
- threadUsage.inputTokens += usage.inputTokens ?? 0;
280
- threadUsage.outputTokens += usage.outputTokens ?? 0;
281
- const details = usage.inputTokenDetails ?? usage;
282
- threadUsage.cacheReadTokens += details.cacheReadTokens ?? 0;
283
- threadUsage.cacheWriteTokens += details.cacheWriteTokens ?? 0;
284
- }
285
- } catch { /* best-effort */ }
286
-
287
334
  const toolCalls: Array<{ toolName: string; input: Record<string, unknown>; toolCallId?: string }> =
288
335
  result.toolCalls ?? [];
289
336
 
290
337
  if (toolCalls.length === 0) {
291
- const text = result.text?.trim() ?? 'Done.';
338
+ const rawText = result.text?.trim() ?? '';
339
+ // Detect empty response (potential billing/auth error — model returned nothing)
340
+ if (!rawText && step === 0) {
341
+ const text = 'ERROR: LLM returned empty response with no tool calls on first step. This may indicate an API billing issue, authentication error, or rate limit.';
342
+ messages.push({ role: 'assistant', content: text });
343
+ return { messages, output: text, steps: step + 1 };
344
+ }
345
+ const text = rawText || 'Done.';
292
346
  messages.push({ role: 'assistant', content: text });
293
347
 
348
+ // RunComplete hook: allow middleware to inspect and optionally continue
349
+ if (config.hookRunner) {
350
+ const decision = await config.hookRunner.run({
351
+ event: 'RunComplete',
352
+ metadata: {
353
+ messages,
354
+ steps: step + 1,
355
+ output: text,
356
+ },
357
+ });
358
+ if (!decision.allow) {
359
+ // Hook wants the agent to keep going — inject reason as user guidance
360
+ messages.push({
361
+ role: 'user',
362
+ content: decision.reason ?? 'Continue — a required post-completion step was not performed.',
363
+ });
364
+ continue; // re-enter the loop for one more LLM step
365
+ }
366
+ }
367
+
294
368
  // Structured output: use generateObject on terminal step when schema is set
295
369
  if (config.outputSchema) {
296
370
  try {
@@ -308,25 +382,39 @@ export class AgentRunner {
308
382
  system: config.systemPrompt,
309
383
  abortSignal: config.signal,
310
384
  });
311
- return { messages, output: text, steps: step + 1, structuredOutput: structured.object, usage: threadUsage };
385
+ return { messages, output: text, steps: step + 1, structuredOutput: structured.object };
312
386
  } catch (err) {
313
387
  console.warn('[agent-runner] generateObject failed, falling back to text:', err instanceof Error ? err.message : err);
314
388
  }
315
389
  }
316
390
 
317
- return { messages, output: text, steps: step + 1, usage: threadUsage };
391
+ return { messages, output: text, steps: step + 1 };
318
392
  }
319
393
 
320
- const toolCallInfos = toolCalls.map(tc => ({
321
- toolCallId: tc.toolCallId ?? randomUUID(),
322
- toolName: tc.toolName,
323
- args: (tc as { input?: Record<string, unknown> }).input ?? {},
324
- }));
394
+ const toolCallInfos = toolCalls.map(tc => {
395
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
396
+ const raw = tc as any;
397
+ const info: import('../agent/types').ToolCallInfo = {
398
+ toolCallId: raw.toolCallId ?? randomUUID(),
399
+ toolName: raw.toolName,
400
+ args: raw.input ?? {},
401
+ };
402
+ // Preserve provider-specific metadata (e.g., Gemini thought signatures)
403
+ if (raw.providerMetadata || raw.experimental_providerMetadata) {
404
+ info.providerMetadata = raw.providerMetadata ?? raw.experimental_providerMetadata;
405
+ }
406
+ return info;
407
+ });
408
+
409
+ // Preserve response-level provider metadata (e.g., Gemini thought signatures)
410
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
411
+ const responseMetadata = (result as any).providerMetadata ?? (result as any).experimental_providerMetadata;
325
412
 
326
413
  messages.push({
327
414
  role: 'assistant',
328
415
  content: toolCalls.map(tc => `${tc.toolName}(${JSON.stringify((tc as { input?: Record<string, unknown> }).input ?? {}).slice(0, 100)})`).join(', '),
329
416
  toolCalls: toolCallInfos,
417
+ ...(responseMetadata ? { providerMetadata: responseMetadata } : {}),
330
418
  });
331
419
 
332
420
  for (const tc of toolCallInfos) {
@@ -337,6 +425,44 @@ export class AgentRunner {
337
425
  toolCallId: tc.toolCallId,
338
426
  };
339
427
 
428
+ // ReadFullResult: retrieve paged tool result content
429
+ if (tc.toolName === 'ReadFullResult' && config.resultPager) {
430
+ const ref = String(tc.args.ref ?? '');
431
+ const content = await config.resultPager.retrieve(ref);
432
+ if (!content) {
433
+ const errorText = 'ERROR: Content expired or not found. Use the summary above.';
434
+ messages.push({
435
+ role: 'tool',
436
+ content: errorText,
437
+ toolResults: [{ toolCallId: tc.toolCallId, toolName: tc.toolName, result: errorText, isError: true }],
438
+ });
439
+ continue;
440
+ }
441
+ let output = content;
442
+ const lr = tc.args.lineRange;
443
+ if (lr && typeof lr === 'object' && 'start' in lr && 'end' in lr) {
444
+ const start = Number(lr.start);
445
+ const end = Number(lr.end);
446
+ if (Number.isFinite(start) && Number.isFinite(end) && start >= 1 && end >= start) {
447
+ const lines = content.split('\n');
448
+ output = lines.slice(start - 1, end).join('\n');
449
+ }
450
+ }
451
+ // Hard cap — ReadFullResult is NEVER re-paged (prevents infinite recursion)
452
+ if (output.length > READ_FULL_RESULT_HARD_CAP) {
453
+ output = output.slice(0, READ_FULL_RESULT_HARD_CAP)
454
+ + `\n\n[Showing first ${READ_FULL_RESULT_HARD_CAP} of ${output.length} chars. Use lineRange for specific sections.]`;
455
+ }
456
+ config.onActivity?.({ type: 'tool_start', name: tc.toolName, args: tc.args, ts: Date.now() });
457
+ config.onActivity?.({ type: 'tool_end', name: tc.toolName, ok: true, ms: 0, preview: output.slice(0, 200), ts: Date.now() });
458
+ messages.push({
459
+ role: 'tool',
460
+ content: output,
461
+ toolResults: [{ toolCallId: tc.toolCallId, toolName: tc.toolName, result: output, isError: false }],
462
+ });
463
+ continue;
464
+ }
465
+
340
466
  // Layer 2: executor-level tool validation (defense-in-depth)
341
467
  if (config.allowedToolNames && !config.allowedToolNames.includes(tc.toolName)) {
342
468
  const resultText = `ERROR: Tool "${tc.toolName}" is not available in this profile.`;
@@ -372,10 +498,12 @@ export class AgentRunner {
372
498
  ...(config.downloadRawFile != null ? { downloadRawFile: config.downloadRawFile } : {}),
373
499
  });
374
500
  } catch (error) {
501
+ const errorMsg = error instanceof Error ? error.message : String(error);
375
502
  toolResult = {
376
503
  success: false,
377
504
  output: '',
378
- error: error instanceof Error ? error.message : String(error),
505
+ // Truncate error messages to prevent leaking long stack traces into context
506
+ error: errorMsg.length > 500 ? errorMsg.slice(0, 500) + '...' : errorMsg,
379
507
  };
380
508
  }
381
509
  const durationMs = Date.now() - start;
@@ -389,10 +517,42 @@ export class AgentRunner {
389
517
  ts: Date.now(),
390
518
  });
391
519
 
392
- const resultText = toolResult.success
520
+ let resultText = toolResult.success
393
521
  ? toolResult.output
394
522
  : `ERROR: ${toolResult.error ?? 'unknown failure'}`;
395
523
 
524
+ // Context paging: store large results externally, keep summary in context
525
+ if (
526
+ config.resultPager &&
527
+ toolResult.success &&
528
+ resultText.length > pageThreshold &&
529
+ !pagingExcludeSet.has(tc.toolName) &&
530
+ tc.toolName !== 'ReadFullResult' // Never re-page ReadFullResult output
531
+ ) {
532
+ try {
533
+ const paged = await config.resultPager.page(resultText, {
534
+ toolName: tc.toolName,
535
+ toolCallId: tc.toolCallId,
536
+ });
537
+ resultText = [
538
+ paged.summary,
539
+ '',
540
+ `[Full result: ${paged.originalLength} chars — call ReadFullResult("${paged.ref}") to retrieve]`,
541
+ ].join('\n');
542
+ } catch {
543
+ // Storage failed — fall back to prefix truncation
544
+ resultText = resultText.slice(0, pageThreshold)
545
+ + `\n\n[Truncated — ${resultText.length} chars total. Storage unavailable.]`;
546
+ }
547
+ }
548
+
549
+ // Fallback hard cap when no pager is configured
550
+ if (config.maxToolResultLength && resultText.length > config.maxToolResultLength) {
551
+ const originalLength = resultText.length;
552
+ resultText = resultText.slice(0, config.maxToolResultLength)
553
+ + `\n\n[Truncated — ${originalLength} chars total, showing first ${config.maxToolResultLength}.]`;
554
+ }
555
+
396
556
  messages.push({
397
557
  role: 'tool',
398
558
  content: resultText,
@@ -406,7 +566,51 @@ export class AgentRunner {
406
566
  }
407
567
  }
408
568
 
409
- return { messages, output: 'max steps reached', steps: config.maxSteps, usage: threadUsage };
569
+ // RunComplete hook at maxSteps boundary (e.g., enforce DownloadRawFile even if loop exhausted)
570
+ if (config.hookRunner) {
571
+ const decision = await config.hookRunner.run({
572
+ event: 'RunComplete',
573
+ metadata: { messages, steps: config.maxSteps, output: 'max steps reached' },
574
+ });
575
+ if (!decision.allow) {
576
+ messages.push({ role: 'user', content: decision.reason ?? 'Continue — a required post-completion step was not performed.' });
577
+ // One extra step to satisfy the hook
578
+ const extra = await (generateText as any)({
579
+ model: (config.createModel ?? defaultAnthropicProvider)(config.model),
580
+ tools: normalizeTools(effectiveTools),
581
+ messages: toModelMessages(messages),
582
+ system: cachedSystem,
583
+ abortSignal: config.signal,
584
+ });
585
+ const extraCalls: Array<{ toolName: string; input: Record<string, unknown>; toolCallId?: string }> =
586
+ extra.toolCalls ?? [];
587
+ if (extraCalls.length > 0) {
588
+ const tc = extraCalls[0]!;
589
+ const info: ToolCallInfo = {
590
+ toolCallId: (tc as any).toolCallId ?? randomUUID(),
591
+ toolName: tc.toolName,
592
+ args: tc.input ?? {},
593
+ };
594
+ messages.push({ role: 'assistant', content: '', toolCalls: [info] });
595
+ const toolResult = await executeTool(
596
+ { name: tc.toolName, args: tc.input ?? {} },
597
+ config.toolProvider,
598
+ {
599
+ ...(config.executeToolAction != null ? { executeToolAction: config.executeToolAction } : {}),
600
+ ...(config.hookRunner != null ? { hookRunner: config.hookRunner } : {}),
601
+ ...(config.downloadRawFile != null ? { downloadRawFile: config.downloadRawFile } : {}),
602
+ },
603
+ );
604
+ messages.push({
605
+ role: 'tool',
606
+ content: toolResult.output,
607
+ toolResults: [{ toolCallId: info.toolCallId, toolName: tc.toolName, result: toolResult.output, isError: !toolResult.success }],
608
+ });
609
+ }
610
+ }
611
+ }
612
+
613
+ return { messages, output: 'max steps reached', steps: config.maxSteps };
410
614
  }
411
615
  }
412
616
 
@@ -428,7 +632,10 @@ export interface CreateProcessConfig {
428
632
  /** Custom system prompt for this process (overrides PROCESS_SYSTEM_PROMPT). */
429
633
  processSystemPrompt?: string;
430
634
  /** Async skill instructions to prepend to system prompt (resolved during process startup). */
635
+ /** Async skill instructions to prepend to system prompt (resolved during process startup). */
431
636
  skillPromptPromise?: Promise<string | null>;
637
+ /** Skill reference with optional pre-loaded content and sub-guides. */
638
+ skillRefPromise?: Promise<{ name: string; path: string; content?: string; subGuides?: Record<string, string> } | null>;
432
639
  /** Allowed tool names for executor-level validation (defense-in-depth against hallucinated tool calls). */
433
640
  allowedToolNames?: string[];
434
641
  /** Zod schema for structured output on the terminal step. */
@@ -436,6 +643,23 @@ export interface CreateProcessConfig {
436
643
  outputSchema?: import('zod').ZodObject<any>;
437
644
  /** Few-shot demo messages prepended before context episodes. */
438
645
  demoMessages?: AgentMessage[];
646
+ /** Seed context messages injected into every process (user message, attachments, etc.). */
647
+ processSeedContext?: string | AgentMessage[];
648
+
649
+ /** Tool choice for process LLM calls. Default: 'auto'. */
650
+ toolChoice?: ToolChoiceConfig;
651
+ /** ResultPager for context paging. */
652
+ resultPager?: ResultPager;
653
+ /** Character threshold for paging. Default: 4000. */
654
+ resultPageThreshold?: number;
655
+ /** Tool names to never page. */
656
+ pagingExclude?: string[];
657
+ /** Hard cap on tool result length when no pager is configured. */
658
+ maxToolResultLength?: number;
659
+ /** Structured facts injected into the process system prompt. */
660
+ contextFacts?: string[];
661
+ /** Max context tokens for worker thread trimming. */
662
+ maxContextTokens?: number;
439
663
 
440
664
  // Runtime extras
441
665
  hookRunner?: HookRunner;
@@ -492,14 +716,38 @@ export function createProcess(
492
716
  const seed = [
493
717
  ...(config.demoMessages ?? []),
494
718
  ...(await seedPromise),
719
+ ...normalizeSeedContext(config.processSeedContext),
495
720
  ];
496
721
 
497
722
  // Build system prompt: base + optional skill instructions
498
723
  let systemPrompt = config.processSystemPrompt ?? PROCESS_SYSTEM_PROMPT;
499
- if (config.skillPromptPromise) {
724
+
725
+ // Inject skill + pre-read sub-guides directly into system prompt
726
+ const skillRef = config.skillRefPromise ? await config.skillRefPromise : null;
727
+ if (skillRef) {
728
+ // Build sub-guide content blocks
729
+ const subGuideBlocks = skillRef.subGuides
730
+ ? Object.entries(skillRef.subGuides)
731
+ .map(([file, content]) => `\n### Sub-guide: ${file}\n\n${content}`)
732
+ .join('\n')
733
+ : '';
734
+
735
+ systemPrompt += `\n\n<skill_system>
736
+ **Skill: ${skillRef.name}**
737
+
738
+ Follow these skill instructions precisely. Do NOT use alternative tools or libraries.
739
+
740
+ ${skillRef.content ?? ''}
741
+ ${subGuideBlocks}
742
+ </skill_system>`;
743
+ } else if (config.skillPromptPromise) {
744
+ // Legacy: full content injection (fallback)
500
745
  const skillInstructions = await config.skillPromptPromise;
501
746
  if (skillInstructions) {
502
- systemPrompt += '\n\n## Skill Instructions\n' + skillInstructions;
747
+ systemPrompt += '\n\n<skill_instructions>\n'
748
+ + 'IMPORTANT: Follow the skill instructions below precisely. They contain tested, working patterns.\n\n'
749
+ + skillInstructions
750
+ + '\n</skill_instructions>';
503
751
  }
504
752
  }
505
753
 
@@ -528,6 +776,13 @@ export function createProcess(
528
776
  'downloadRawFile',
529
777
  'allowedToolNames',
530
778
  'outputSchema',
779
+ 'toolChoice',
780
+ 'resultPager',
781
+ 'resultPageThreshold',
782
+ 'pagingExclude',
783
+ 'maxToolResultLength',
784
+ 'contextFacts',
785
+ 'maxContextTokens',
531
786
  ]),
532
787
  }),
533
788
  timeoutPromise(config.processTimeout),
@@ -564,7 +819,6 @@ export function createProcess(
564
819
  success: true,
565
820
  durationMs,
566
821
  resolvedModel: model,
567
- usage: result.usage,
568
822
  };
569
823
 
570
824
  process.result = processResult;
@@ -633,6 +887,41 @@ export function createProcess(
633
887
  return process;
634
888
  }
635
889
 
890
+ // ── Context trimming for worker threads ──
891
+
892
+ const STUB_THRESHOLD = 500;
893
+
894
+ /**
895
+ * Trim conversation context by stubbing large tool results in older messages.
896
+ * Preserves the most recent messages (hot zone) and stubs outputs in the cold zone.
897
+ */
898
+ function trimContext(messages: AgentMessage[], maxTokens: number): void {
899
+ // Estimate current size using the same estimator as ContextWindow
900
+ let totalTokens = 0;
901
+ for (const m of messages) {
902
+ const text = typeof m.content === 'string' ? m.content : '';
903
+ totalTokens += estimateTokens(text);
904
+ }
905
+ if (totalTokens <= maxTokens) return;
906
+
907
+ // Preserve last 60% of messages as hot zone
908
+ const hotBoundary = Math.floor(messages.length * 0.6);
909
+
910
+ for (let i = 0; i < hotBoundary; i++) {
911
+ const m = messages[i]!;
912
+ if (m.role === 'tool' && typeof m.content === 'string' && m.content.length > STUB_THRESHOLD) {
913
+ const toolName = m.toolResults?.[0]?.toolName ?? 'tool';
914
+ const stubbed = `[${toolName}: output stubbed, ${m.content.length} chars]`;
915
+ m.content = stubbed;
916
+ if (m.toolResults) {
917
+ for (const tr of m.toolResults) {
918
+ tr.result = stubbed;
919
+ }
920
+ }
921
+ }
922
+ }
923
+ }
924
+
636
925
  // ── Helpers ──
637
926
 
638
927
  async function getNextEpisodeIndex(store: EpisodeStore, taskId: string): Promise<number> {
@@ -678,6 +967,14 @@ async function buildSeedMessages(
678
967
  return messages;
679
968
  }
680
969
 
970
+ function normalizeSeedContext(ctx: string | AgentMessage[] | undefined): AgentMessage[] {
971
+ if (!ctx) return [];
972
+ if (typeof ctx === 'string') {
973
+ return [{ role: 'system', content: ctx }];
974
+ }
975
+ return ctx;
976
+ }
977
+
681
978
  function timeoutPromise(ms: number): Promise<never> {
682
979
  return new Promise((_, reject) =>
683
980
  setTimeout(() => reject(new Error(`Process timed out after ${ms}ms`)), ms)