@bluecopa/harness 0.1.0-snapshot.60 → 0.1.0-snapshot.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/types.ts +4 -0
- package/src/arc/agent-runner.ts +333 -36
- package/src/arc/arc-loop.ts +118 -97
- package/src/arc/message-convert.ts +15 -3
- package/src/arc/multi-model.ts +70 -0
- package/src/arc/profile-builder.ts +18 -2
- package/src/arc/result-pager.ts +77 -0
- package/src/arc/skill-resolver.ts +33 -2
- package/src/arc/types.ts +47 -11
- package/src/hooks/middleware.ts +95 -0
- package/src/interfaces/hooks.ts +2 -1
- package/src/loop/vercel-agent-loop.ts +16 -9
- package/tests/arc/middleware.test.ts +113 -0
- package/tests/arc/process-profiles.test.ts +7 -5
- package/tests/arc/result-paging.test.ts +392 -0
package/package.json
CHANGED
package/src/agent/types.ts
CHANGED
|
@@ -2,6 +2,8 @@ export interface ToolCallInfo {
|
|
|
2
2
|
toolCallId: string;
|
|
3
3
|
toolName: string;
|
|
4
4
|
args: Record<string, unknown>;
|
|
5
|
+
/** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
|
|
6
|
+
providerMetadata?: Record<string, unknown>;
|
|
5
7
|
}
|
|
6
8
|
|
|
7
9
|
export interface ToolResultInfo {
|
|
@@ -20,6 +22,8 @@ export interface AgentMessage {
|
|
|
20
22
|
content: string | ContentPart[];
|
|
21
23
|
toolCalls?: ToolCallInfo[]; // assistant messages: what tools were called
|
|
22
24
|
toolResults?: ToolResultInfo[]; // tool messages: results keyed by toolCallId
|
|
25
|
+
/** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
|
|
26
|
+
providerMetadata?: Record<string, unknown>;
|
|
23
27
|
}
|
|
24
28
|
|
|
25
29
|
/** Extract plain text from content (string or ContentPart[]). */
|
package/src/arc/agent-runner.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { randomUUID } from 'node:crypto';
|
|
2
|
-
import { generateText, generateObject } from 'ai';
|
|
2
|
+
import { generateText, generateObject, tool as aiTool } from 'ai';
|
|
3
3
|
import { anthropic as defaultAnthropicProvider } from '@ai-sdk/anthropic';
|
|
4
|
+
import { z } from 'zod';
|
|
4
5
|
import type { ModelFactory } from './types';
|
|
5
6
|
import type { AgentMessage, ToolCallAction } from '../agent/types';
|
|
6
7
|
import { getTextContent } from '../agent/types';
|
|
@@ -8,11 +9,13 @@ import type { ToolProvider, ToolResult } from '../interfaces/tool-provider';
|
|
|
8
9
|
import type { HookRunner } from '../hooks/hook-runner';
|
|
9
10
|
import type { PermissionManager } from '../permissions/permission-manager';
|
|
10
11
|
import type { HarnessTelemetry } from '../observability/otel';
|
|
11
|
-
import type { Activity, Process, ProcessEvent, ProcessRequest, ProcessResult, ArcLoopConfig } from './types';
|
|
12
|
+
import type { Activity, Process, ProcessEvent, ProcessRequest, ProcessResult, ArcLoopConfig, ToolChoiceConfig } from './types';
|
|
13
|
+
import { resolveToolChoice } from './types';
|
|
14
|
+
import type { ResultPager } from './result-pager';
|
|
12
15
|
import type { Episode, EpisodeStore, ModelTier } from './arc-types';
|
|
13
16
|
import type { ResiliencePolicy, ExecutionContext } from './resilience/types';
|
|
14
17
|
import { resolveModel, DEFAULT_MODEL_MAP } from './arc-types';
|
|
15
|
-
import { toModelMessages } from './message-convert';
|
|
18
|
+
import { toModelMessages, estimateTokens } from './message-convert';
|
|
16
19
|
import { EpisodeCompressor } from './episode-compressor';
|
|
17
20
|
import { pickDefined, normalizeTools } from './utils';
|
|
18
21
|
|
|
@@ -74,12 +77,21 @@ export async function firstEvent<T extends { type: string }>(
|
|
|
74
77
|
throw new Error(`Stream ended without '${type}' event`);
|
|
75
78
|
}
|
|
76
79
|
|
|
80
|
+
// ── Constants ──
|
|
81
|
+
|
|
82
|
+
/** Default character threshold above which tool results are paged externally. */
|
|
83
|
+
export const DEFAULT_PAGE_THRESHOLD = 4_000;
|
|
84
|
+
|
|
85
|
+
/** Hard cap on ReadFullResult output — never re-paged, prevents infinite recursion. */
|
|
86
|
+
export const READ_FULL_RESULT_HARD_CAP = 32_000;
|
|
87
|
+
|
|
77
88
|
// ── Process system prompt ──
|
|
78
89
|
|
|
79
90
|
const PROCESS_SYSTEM_PROMPT = [
|
|
80
91
|
'You are a focused execution thread within a larger agent system.',
|
|
81
92
|
'Complete the assigned task using the available tools.',
|
|
82
93
|
'Be efficient — accomplish the objective with minimal steps.',
|
|
94
|
+
'If your context includes the user\'s original message or attachment metadata, use that information directly.',
|
|
83
95
|
'When done, provide a brief summary of what you accomplished.',
|
|
84
96
|
].join(' ');
|
|
85
97
|
|
|
@@ -212,6 +224,23 @@ export interface AgentRunnerConfig {
|
|
|
212
224
|
|
|
213
225
|
/** Optional resilience policy applied to generateText calls. */
|
|
214
226
|
resilience?: ResiliencePolicy;
|
|
227
|
+
|
|
228
|
+
/** Tool choice for LLM calls. Supports per-turn callbacks. Default: 'auto'. */
|
|
229
|
+
toolChoice?: ToolChoiceConfig;
|
|
230
|
+
|
|
231
|
+
/** ResultPager for storing large tool results externally. When set, enables context paging. */
|
|
232
|
+
resultPager?: ResultPager;
|
|
233
|
+
/** Character threshold above which tool results are paged. Default: 4000. */
|
|
234
|
+
resultPageThreshold?: number;
|
|
235
|
+
/** Tool names to never page (e.g., ['Read', 'Edit'] — filesystem tools return needed content). */
|
|
236
|
+
pagingExclude?: string[];
|
|
237
|
+
/** Hard cap on tool result length (chars) when no resultPager is configured. Truncates with a note. No default (unlimited). */
|
|
238
|
+
maxToolResultLength?: number;
|
|
239
|
+
|
|
240
|
+
/** Structured facts injected into the system prompt (e.g., from long-term memory). */
|
|
241
|
+
contextFacts?: string[];
|
|
242
|
+
/** Max context tokens before old messages are trimmed. When set, stubs old tool results to keep within budget. */
|
|
243
|
+
maxContextTokens?: number;
|
|
215
244
|
}
|
|
216
245
|
|
|
217
246
|
export interface AgentRunResult {
|
|
@@ -220,8 +249,6 @@ export interface AgentRunResult {
|
|
|
220
249
|
steps: number;
|
|
221
250
|
/** Structured output from generateObject when outputSchema is set. */
|
|
222
251
|
structuredOutput?: Record<string, unknown>;
|
|
223
|
-
/** Token usage accumulated across all steps in this thread. */
|
|
224
|
-
usage?: { inputTokens: number; outputTokens: number; cacheReadTokens: number; cacheWriteTokens: number };
|
|
225
252
|
}
|
|
226
253
|
|
|
227
254
|
export class AgentRunner {
|
|
@@ -231,9 +258,36 @@ export class AgentRunner {
|
|
|
231
258
|
{ role: 'user', content: config.prompt },
|
|
232
259
|
];
|
|
233
260
|
|
|
234
|
-
//
|
|
235
|
-
const
|
|
236
|
-
|
|
261
|
+
// Build system prompt with optional structured facts
|
|
262
|
+
const systemContent = config.contextFacts?.length
|
|
263
|
+
? config.systemPrompt + '\n\n## Known Facts\n' + config.contextFacts.map(f => `- ${f}`).join('\n')
|
|
264
|
+
: config.systemPrompt;
|
|
265
|
+
|
|
266
|
+
const cachedSystem = [{
|
|
267
|
+
role: 'system' as const,
|
|
268
|
+
content: systemContent,
|
|
269
|
+
}];
|
|
270
|
+
|
|
271
|
+
// Pre-compute paging config (avoid per-iteration allocation)
|
|
272
|
+
const pageThreshold = config.resultPageThreshold ?? DEFAULT_PAGE_THRESHOLD;
|
|
273
|
+
const pagingExcludeSet = new Set(config.pagingExclude ?? []);
|
|
274
|
+
|
|
275
|
+
// Augment tools with ReadFullResult when paging is enabled
|
|
276
|
+
const effectiveTools = config.resultPager
|
|
277
|
+
? {
|
|
278
|
+
...config.tools,
|
|
279
|
+
ReadFullResult: aiTool({
|
|
280
|
+
description: 'Retrieve the full content of a paged tool result. Use when the summary is insufficient and you need the complete data.',
|
|
281
|
+
parameters: z.object({
|
|
282
|
+
ref: z.string().describe('The paged result reference from a previous tool output'),
|
|
283
|
+
lineRange: z.object({
|
|
284
|
+
start: z.number().int().min(1).describe('Start line (1-indexed, inclusive)'),
|
|
285
|
+
end: z.number().int().min(1).describe('End line (1-indexed, inclusive)'),
|
|
286
|
+
}).optional().describe('Optional line range to retrieve. Omit for full content.'),
|
|
287
|
+
}),
|
|
288
|
+
}),
|
|
289
|
+
}
|
|
290
|
+
: config.tools;
|
|
237
291
|
|
|
238
292
|
for (let step = 0; step < config.maxSteps; step++) {
|
|
239
293
|
config.signal.throwIfAborted();
|
|
@@ -244,12 +298,17 @@ export class AgentRunner {
|
|
|
244
298
|
}
|
|
245
299
|
}
|
|
246
300
|
|
|
301
|
+
// Context trimming: stub old tool results when context exceeds budget
|
|
302
|
+
if (config.maxContextTokens && step > 0) {
|
|
303
|
+
trimContext(messages, config.maxContextTokens);
|
|
304
|
+
}
|
|
305
|
+
|
|
247
306
|
const callLLM = async (effectiveSignal: AbortSignal) =>
|
|
248
307
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
249
308
|
(generateText as any)({
|
|
250
309
|
model: (config.createModel ?? defaultAnthropicProvider)(config.model),
|
|
251
|
-
tools: normalizeTools(
|
|
252
|
-
toolChoice:
|
|
310
|
+
tools: normalizeTools(effectiveTools),
|
|
311
|
+
toolChoice: resolveToolChoice(config.toolChoice, step),
|
|
253
312
|
messages: toModelMessages(messages),
|
|
254
313
|
system: cachedSystem,
|
|
255
314
|
abortSignal: effectiveSignal,
|
|
@@ -272,25 +331,40 @@ export class AgentRunner {
|
|
|
272
331
|
result = await callLLM(config.signal);
|
|
273
332
|
}
|
|
274
333
|
|
|
275
|
-
// Extract token usage from generateText result
|
|
276
|
-
try {
|
|
277
|
-
const usage = result.usage;
|
|
278
|
-
if (usage) {
|
|
279
|
-
threadUsage.inputTokens += usage.inputTokens ?? 0;
|
|
280
|
-
threadUsage.outputTokens += usage.outputTokens ?? 0;
|
|
281
|
-
const details = usage.inputTokenDetails ?? usage;
|
|
282
|
-
threadUsage.cacheReadTokens += details.cacheReadTokens ?? 0;
|
|
283
|
-
threadUsage.cacheWriteTokens += details.cacheWriteTokens ?? 0;
|
|
284
|
-
}
|
|
285
|
-
} catch { /* best-effort */ }
|
|
286
|
-
|
|
287
334
|
const toolCalls: Array<{ toolName: string; input: Record<string, unknown>; toolCallId?: string }> =
|
|
288
335
|
result.toolCalls ?? [];
|
|
289
336
|
|
|
290
337
|
if (toolCalls.length === 0) {
|
|
291
|
-
const
|
|
338
|
+
const rawText = result.text?.trim() ?? '';
|
|
339
|
+
// Detect empty response (potential billing/auth error — model returned nothing)
|
|
340
|
+
if (!rawText && step === 0) {
|
|
341
|
+
const text = 'ERROR: LLM returned empty response with no tool calls on first step. This may indicate an API billing issue, authentication error, or rate limit.';
|
|
342
|
+
messages.push({ role: 'assistant', content: text });
|
|
343
|
+
return { messages, output: text, steps: step + 1 };
|
|
344
|
+
}
|
|
345
|
+
const text = rawText || 'Done.';
|
|
292
346
|
messages.push({ role: 'assistant', content: text });
|
|
293
347
|
|
|
348
|
+
// RunComplete hook: allow middleware to inspect and optionally continue
|
|
349
|
+
if (config.hookRunner) {
|
|
350
|
+
const decision = await config.hookRunner.run({
|
|
351
|
+
event: 'RunComplete',
|
|
352
|
+
metadata: {
|
|
353
|
+
messages,
|
|
354
|
+
steps: step + 1,
|
|
355
|
+
output: text,
|
|
356
|
+
},
|
|
357
|
+
});
|
|
358
|
+
if (!decision.allow) {
|
|
359
|
+
// Hook wants the agent to keep going — inject reason as user guidance
|
|
360
|
+
messages.push({
|
|
361
|
+
role: 'user',
|
|
362
|
+
content: decision.reason ?? 'Continue — a required post-completion step was not performed.',
|
|
363
|
+
});
|
|
364
|
+
continue; // re-enter the loop for one more LLM step
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
294
368
|
// Structured output: use generateObject on terminal step when schema is set
|
|
295
369
|
if (config.outputSchema) {
|
|
296
370
|
try {
|
|
@@ -308,25 +382,39 @@ export class AgentRunner {
|
|
|
308
382
|
system: config.systemPrompt,
|
|
309
383
|
abortSignal: config.signal,
|
|
310
384
|
});
|
|
311
|
-
return { messages, output: text, steps: step + 1, structuredOutput: structured.object
|
|
385
|
+
return { messages, output: text, steps: step + 1, structuredOutput: structured.object };
|
|
312
386
|
} catch (err) {
|
|
313
387
|
console.warn('[agent-runner] generateObject failed, falling back to text:', err instanceof Error ? err.message : err);
|
|
314
388
|
}
|
|
315
389
|
}
|
|
316
390
|
|
|
317
|
-
return { messages, output: text, steps: step + 1
|
|
391
|
+
return { messages, output: text, steps: step + 1 };
|
|
318
392
|
}
|
|
319
393
|
|
|
320
|
-
const toolCallInfos = toolCalls.map(tc =>
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
394
|
+
const toolCallInfos = toolCalls.map(tc => {
|
|
395
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
396
|
+
const raw = tc as any;
|
|
397
|
+
const info: import('../agent/types').ToolCallInfo = {
|
|
398
|
+
toolCallId: raw.toolCallId ?? randomUUID(),
|
|
399
|
+
toolName: raw.toolName,
|
|
400
|
+
args: raw.input ?? {},
|
|
401
|
+
};
|
|
402
|
+
// Preserve provider-specific metadata (e.g., Gemini thought signatures)
|
|
403
|
+
if (raw.providerMetadata || raw.experimental_providerMetadata) {
|
|
404
|
+
info.providerMetadata = raw.providerMetadata ?? raw.experimental_providerMetadata;
|
|
405
|
+
}
|
|
406
|
+
return info;
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
// Preserve response-level provider metadata (e.g., Gemini thought signatures)
|
|
410
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
411
|
+
const responseMetadata = (result as any).providerMetadata ?? (result as any).experimental_providerMetadata;
|
|
325
412
|
|
|
326
413
|
messages.push({
|
|
327
414
|
role: 'assistant',
|
|
328
415
|
content: toolCalls.map(tc => `${tc.toolName}(${JSON.stringify((tc as { input?: Record<string, unknown> }).input ?? {}).slice(0, 100)})`).join(', '),
|
|
329
416
|
toolCalls: toolCallInfos,
|
|
417
|
+
...(responseMetadata ? { providerMetadata: responseMetadata } : {}),
|
|
330
418
|
});
|
|
331
419
|
|
|
332
420
|
for (const tc of toolCallInfos) {
|
|
@@ -337,6 +425,44 @@ export class AgentRunner {
|
|
|
337
425
|
toolCallId: tc.toolCallId,
|
|
338
426
|
};
|
|
339
427
|
|
|
428
|
+
// ReadFullResult: retrieve paged tool result content
|
|
429
|
+
if (tc.toolName === 'ReadFullResult' && config.resultPager) {
|
|
430
|
+
const ref = String(tc.args.ref ?? '');
|
|
431
|
+
const content = await config.resultPager.retrieve(ref);
|
|
432
|
+
if (!content) {
|
|
433
|
+
const errorText = 'ERROR: Content expired or not found. Use the summary above.';
|
|
434
|
+
messages.push({
|
|
435
|
+
role: 'tool',
|
|
436
|
+
content: errorText,
|
|
437
|
+
toolResults: [{ toolCallId: tc.toolCallId, toolName: tc.toolName, result: errorText, isError: true }],
|
|
438
|
+
});
|
|
439
|
+
continue;
|
|
440
|
+
}
|
|
441
|
+
let output = content;
|
|
442
|
+
const lr = tc.args.lineRange;
|
|
443
|
+
if (lr && typeof lr === 'object' && 'start' in lr && 'end' in lr) {
|
|
444
|
+
const start = Number(lr.start);
|
|
445
|
+
const end = Number(lr.end);
|
|
446
|
+
if (Number.isFinite(start) && Number.isFinite(end) && start >= 1 && end >= start) {
|
|
447
|
+
const lines = content.split('\n');
|
|
448
|
+
output = lines.slice(start - 1, end).join('\n');
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
// Hard cap — ReadFullResult is NEVER re-paged (prevents infinite recursion)
|
|
452
|
+
if (output.length > READ_FULL_RESULT_HARD_CAP) {
|
|
453
|
+
output = output.slice(0, READ_FULL_RESULT_HARD_CAP)
|
|
454
|
+
+ `\n\n[Showing first ${READ_FULL_RESULT_HARD_CAP} of ${output.length} chars. Use lineRange for specific sections.]`;
|
|
455
|
+
}
|
|
456
|
+
config.onActivity?.({ type: 'tool_start', name: tc.toolName, args: tc.args, ts: Date.now() });
|
|
457
|
+
config.onActivity?.({ type: 'tool_end', name: tc.toolName, ok: true, ms: 0, preview: output.slice(0, 200), ts: Date.now() });
|
|
458
|
+
messages.push({
|
|
459
|
+
role: 'tool',
|
|
460
|
+
content: output,
|
|
461
|
+
toolResults: [{ toolCallId: tc.toolCallId, toolName: tc.toolName, result: output, isError: false }],
|
|
462
|
+
});
|
|
463
|
+
continue;
|
|
464
|
+
}
|
|
465
|
+
|
|
340
466
|
// Layer 2: executor-level tool validation (defense-in-depth)
|
|
341
467
|
if (config.allowedToolNames && !config.allowedToolNames.includes(tc.toolName)) {
|
|
342
468
|
const resultText = `ERROR: Tool "${tc.toolName}" is not available in this profile.`;
|
|
@@ -372,10 +498,12 @@ export class AgentRunner {
|
|
|
372
498
|
...(config.downloadRawFile != null ? { downloadRawFile: config.downloadRawFile } : {}),
|
|
373
499
|
});
|
|
374
500
|
} catch (error) {
|
|
501
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
375
502
|
toolResult = {
|
|
376
503
|
success: false,
|
|
377
504
|
output: '',
|
|
378
|
-
|
|
505
|
+
// Truncate error messages to prevent leaking long stack traces into context
|
|
506
|
+
error: errorMsg.length > 500 ? errorMsg.slice(0, 500) + '...' : errorMsg,
|
|
379
507
|
};
|
|
380
508
|
}
|
|
381
509
|
const durationMs = Date.now() - start;
|
|
@@ -389,10 +517,42 @@ export class AgentRunner {
|
|
|
389
517
|
ts: Date.now(),
|
|
390
518
|
});
|
|
391
519
|
|
|
392
|
-
|
|
520
|
+
let resultText = toolResult.success
|
|
393
521
|
? toolResult.output
|
|
394
522
|
: `ERROR: ${toolResult.error ?? 'unknown failure'}`;
|
|
395
523
|
|
|
524
|
+
// Context paging: store large results externally, keep summary in context
|
|
525
|
+
if (
|
|
526
|
+
config.resultPager &&
|
|
527
|
+
toolResult.success &&
|
|
528
|
+
resultText.length > pageThreshold &&
|
|
529
|
+
!pagingExcludeSet.has(tc.toolName) &&
|
|
530
|
+
tc.toolName !== 'ReadFullResult' // Never re-page ReadFullResult output
|
|
531
|
+
) {
|
|
532
|
+
try {
|
|
533
|
+
const paged = await config.resultPager.page(resultText, {
|
|
534
|
+
toolName: tc.toolName,
|
|
535
|
+
toolCallId: tc.toolCallId,
|
|
536
|
+
});
|
|
537
|
+
resultText = [
|
|
538
|
+
paged.summary,
|
|
539
|
+
'',
|
|
540
|
+
`[Full result: ${paged.originalLength} chars — call ReadFullResult("${paged.ref}") to retrieve]`,
|
|
541
|
+
].join('\n');
|
|
542
|
+
} catch {
|
|
543
|
+
// Storage failed — fall back to prefix truncation
|
|
544
|
+
resultText = resultText.slice(0, pageThreshold)
|
|
545
|
+
+ `\n\n[Truncated — ${resultText.length} chars total. Storage unavailable.]`;
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// Fallback hard cap when no pager is configured
|
|
550
|
+
if (config.maxToolResultLength && resultText.length > config.maxToolResultLength) {
|
|
551
|
+
const originalLength = resultText.length;
|
|
552
|
+
resultText = resultText.slice(0, config.maxToolResultLength)
|
|
553
|
+
+ `\n\n[Truncated — ${originalLength} chars total, showing first ${config.maxToolResultLength}.]`;
|
|
554
|
+
}
|
|
555
|
+
|
|
396
556
|
messages.push({
|
|
397
557
|
role: 'tool',
|
|
398
558
|
content: resultText,
|
|
@@ -406,7 +566,51 @@ export class AgentRunner {
|
|
|
406
566
|
}
|
|
407
567
|
}
|
|
408
568
|
|
|
409
|
-
|
|
569
|
+
// RunComplete hook at maxSteps boundary (e.g., enforce DownloadRawFile even if loop exhausted)
|
|
570
|
+
if (config.hookRunner) {
|
|
571
|
+
const decision = await config.hookRunner.run({
|
|
572
|
+
event: 'RunComplete',
|
|
573
|
+
metadata: { messages, steps: config.maxSteps, output: 'max steps reached' },
|
|
574
|
+
});
|
|
575
|
+
if (!decision.allow) {
|
|
576
|
+
messages.push({ role: 'user', content: decision.reason ?? 'Continue — a required post-completion step was not performed.' });
|
|
577
|
+
// One extra step to satisfy the hook
|
|
578
|
+
const extra = await (generateText as any)({
|
|
579
|
+
model: (config.createModel ?? defaultAnthropicProvider)(config.model),
|
|
580
|
+
tools: normalizeTools(effectiveTools),
|
|
581
|
+
messages: toModelMessages(messages),
|
|
582
|
+
system: cachedSystem,
|
|
583
|
+
abortSignal: config.signal,
|
|
584
|
+
});
|
|
585
|
+
const extraCalls: Array<{ toolName: string; input: Record<string, unknown>; toolCallId?: string }> =
|
|
586
|
+
extra.toolCalls ?? [];
|
|
587
|
+
if (extraCalls.length > 0) {
|
|
588
|
+
const tc = extraCalls[0]!;
|
|
589
|
+
const info: ToolCallInfo = {
|
|
590
|
+
toolCallId: (tc as any).toolCallId ?? randomUUID(),
|
|
591
|
+
toolName: tc.toolName,
|
|
592
|
+
args: tc.input ?? {},
|
|
593
|
+
};
|
|
594
|
+
messages.push({ role: 'assistant', content: '', toolCalls: [info] });
|
|
595
|
+
const toolResult = await executeTool(
|
|
596
|
+
{ name: tc.toolName, args: tc.input ?? {} },
|
|
597
|
+
config.toolProvider,
|
|
598
|
+
{
|
|
599
|
+
...(config.executeToolAction != null ? { executeToolAction: config.executeToolAction } : {}),
|
|
600
|
+
...(config.hookRunner != null ? { hookRunner: config.hookRunner } : {}),
|
|
601
|
+
...(config.downloadRawFile != null ? { downloadRawFile: config.downloadRawFile } : {}),
|
|
602
|
+
},
|
|
603
|
+
);
|
|
604
|
+
messages.push({
|
|
605
|
+
role: 'tool',
|
|
606
|
+
content: toolResult.output,
|
|
607
|
+
toolResults: [{ toolCallId: info.toolCallId, toolName: tc.toolName, result: toolResult.output, isError: !toolResult.success }],
|
|
608
|
+
});
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
return { messages, output: 'max steps reached', steps: config.maxSteps };
|
|
410
614
|
}
|
|
411
615
|
}
|
|
412
616
|
|
|
@@ -428,7 +632,10 @@ export interface CreateProcessConfig {
|
|
|
428
632
|
/** Custom system prompt for this process (overrides PROCESS_SYSTEM_PROMPT). */
|
|
429
633
|
processSystemPrompt?: string;
|
|
430
634
|
/** Async skill instructions to prepend to system prompt (resolved during process startup). */
|
|
635
|
+
/** Async skill instructions to prepend to system prompt (resolved during process startup). */
|
|
431
636
|
skillPromptPromise?: Promise<string | null>;
|
|
637
|
+
/** Skill reference with optional pre-loaded content and sub-guides. */
|
|
638
|
+
skillRefPromise?: Promise<{ name: string; path: string; content?: string; subGuides?: Record<string, string> } | null>;
|
|
432
639
|
/** Allowed tool names for executor-level validation (defense-in-depth against hallucinated tool calls). */
|
|
433
640
|
allowedToolNames?: string[];
|
|
434
641
|
/** Zod schema for structured output on the terminal step. */
|
|
@@ -436,6 +643,23 @@ export interface CreateProcessConfig {
|
|
|
436
643
|
outputSchema?: import('zod').ZodObject<any>;
|
|
437
644
|
/** Few-shot demo messages prepended before context episodes. */
|
|
438
645
|
demoMessages?: AgentMessage[];
|
|
646
|
+
/** Seed context messages injected into every process (user message, attachments, etc.). */
|
|
647
|
+
processSeedContext?: string | AgentMessage[];
|
|
648
|
+
|
|
649
|
+
/** Tool choice for process LLM calls. Default: 'auto'. */
|
|
650
|
+
toolChoice?: ToolChoiceConfig;
|
|
651
|
+
/** ResultPager for context paging. */
|
|
652
|
+
resultPager?: ResultPager;
|
|
653
|
+
/** Character threshold for paging. Default: 4000. */
|
|
654
|
+
resultPageThreshold?: number;
|
|
655
|
+
/** Tool names to never page. */
|
|
656
|
+
pagingExclude?: string[];
|
|
657
|
+
/** Hard cap on tool result length when no pager is configured. */
|
|
658
|
+
maxToolResultLength?: number;
|
|
659
|
+
/** Structured facts injected into the process system prompt. */
|
|
660
|
+
contextFacts?: string[];
|
|
661
|
+
/** Max context tokens for worker thread trimming. */
|
|
662
|
+
maxContextTokens?: number;
|
|
439
663
|
|
|
440
664
|
// Runtime extras
|
|
441
665
|
hookRunner?: HookRunner;
|
|
@@ -492,14 +716,38 @@ export function createProcess(
|
|
|
492
716
|
const seed = [
|
|
493
717
|
...(config.demoMessages ?? []),
|
|
494
718
|
...(await seedPromise),
|
|
719
|
+
...normalizeSeedContext(config.processSeedContext),
|
|
495
720
|
];
|
|
496
721
|
|
|
497
722
|
// Build system prompt: base + optional skill instructions
|
|
498
723
|
let systemPrompt = config.processSystemPrompt ?? PROCESS_SYSTEM_PROMPT;
|
|
499
|
-
|
|
724
|
+
|
|
725
|
+
// Inject skill + pre-read sub-guides directly into system prompt
|
|
726
|
+
const skillRef = config.skillRefPromise ? await config.skillRefPromise : null;
|
|
727
|
+
if (skillRef) {
|
|
728
|
+
// Build sub-guide content blocks
|
|
729
|
+
const subGuideBlocks = skillRef.subGuides
|
|
730
|
+
? Object.entries(skillRef.subGuides)
|
|
731
|
+
.map(([file, content]) => `\n### Sub-guide: ${file}\n\n${content}`)
|
|
732
|
+
.join('\n')
|
|
733
|
+
: '';
|
|
734
|
+
|
|
735
|
+
systemPrompt += `\n\n<skill_system>
|
|
736
|
+
**Skill: ${skillRef.name}**
|
|
737
|
+
|
|
738
|
+
Follow these skill instructions precisely. Do NOT use alternative tools or libraries.
|
|
739
|
+
|
|
740
|
+
${skillRef.content ?? ''}
|
|
741
|
+
${subGuideBlocks}
|
|
742
|
+
</skill_system>`;
|
|
743
|
+
} else if (config.skillPromptPromise) {
|
|
744
|
+
// Legacy: full content injection (fallback)
|
|
500
745
|
const skillInstructions = await config.skillPromptPromise;
|
|
501
746
|
if (skillInstructions) {
|
|
502
|
-
systemPrompt += '\n\n
|
|
747
|
+
systemPrompt += '\n\n<skill_instructions>\n'
|
|
748
|
+
+ 'IMPORTANT: Follow the skill instructions below precisely. They contain tested, working patterns.\n\n'
|
|
749
|
+
+ skillInstructions
|
|
750
|
+
+ '\n</skill_instructions>';
|
|
503
751
|
}
|
|
504
752
|
}
|
|
505
753
|
|
|
@@ -528,6 +776,13 @@ export function createProcess(
|
|
|
528
776
|
'downloadRawFile',
|
|
529
777
|
'allowedToolNames',
|
|
530
778
|
'outputSchema',
|
|
779
|
+
'toolChoice',
|
|
780
|
+
'resultPager',
|
|
781
|
+
'resultPageThreshold',
|
|
782
|
+
'pagingExclude',
|
|
783
|
+
'maxToolResultLength',
|
|
784
|
+
'contextFacts',
|
|
785
|
+
'maxContextTokens',
|
|
531
786
|
]),
|
|
532
787
|
}),
|
|
533
788
|
timeoutPromise(config.processTimeout),
|
|
@@ -564,7 +819,6 @@ export function createProcess(
|
|
|
564
819
|
success: true,
|
|
565
820
|
durationMs,
|
|
566
821
|
resolvedModel: model,
|
|
567
|
-
usage: result.usage,
|
|
568
822
|
};
|
|
569
823
|
|
|
570
824
|
process.result = processResult;
|
|
@@ -633,6 +887,41 @@ export function createProcess(
|
|
|
633
887
|
return process;
|
|
634
888
|
}
|
|
635
889
|
|
|
890
|
+
// ── Context trimming for worker threads ──
|
|
891
|
+
|
|
892
|
+
const STUB_THRESHOLD = 500;
|
|
893
|
+
|
|
894
|
+
/**
|
|
895
|
+
* Trim conversation context by stubbing large tool results in older messages.
|
|
896
|
+
* Preserves the most recent messages (hot zone) and stubs outputs in the cold zone.
|
|
897
|
+
*/
|
|
898
|
+
function trimContext(messages: AgentMessage[], maxTokens: number): void {
|
|
899
|
+
// Estimate current size using the same estimator as ContextWindow
|
|
900
|
+
let totalTokens = 0;
|
|
901
|
+
for (const m of messages) {
|
|
902
|
+
const text = typeof m.content === 'string' ? m.content : '';
|
|
903
|
+
totalTokens += estimateTokens(text);
|
|
904
|
+
}
|
|
905
|
+
if (totalTokens <= maxTokens) return;
|
|
906
|
+
|
|
907
|
+
// Preserve last 60% of messages as hot zone
|
|
908
|
+
const hotBoundary = Math.floor(messages.length * 0.6);
|
|
909
|
+
|
|
910
|
+
for (let i = 0; i < hotBoundary; i++) {
|
|
911
|
+
const m = messages[i]!;
|
|
912
|
+
if (m.role === 'tool' && typeof m.content === 'string' && m.content.length > STUB_THRESHOLD) {
|
|
913
|
+
const toolName = m.toolResults?.[0]?.toolName ?? 'tool';
|
|
914
|
+
const stubbed = `[${toolName}: output stubbed, ${m.content.length} chars]`;
|
|
915
|
+
m.content = stubbed;
|
|
916
|
+
if (m.toolResults) {
|
|
917
|
+
for (const tr of m.toolResults) {
|
|
918
|
+
tr.result = stubbed;
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
|
|
636
925
|
// ── Helpers ──
|
|
637
926
|
|
|
638
927
|
async function getNextEpisodeIndex(store: EpisodeStore, taskId: string): Promise<number> {
|
|
@@ -678,6 +967,14 @@ async function buildSeedMessages(
|
|
|
678
967
|
return messages;
|
|
679
968
|
}
|
|
680
969
|
|
|
970
|
+
function normalizeSeedContext(ctx: string | AgentMessage[] | undefined): AgentMessage[] {
|
|
971
|
+
if (!ctx) return [];
|
|
972
|
+
if (typeof ctx === 'string') {
|
|
973
|
+
return [{ role: 'system', content: ctx }];
|
|
974
|
+
}
|
|
975
|
+
return ctx;
|
|
976
|
+
}
|
|
977
|
+
|
|
681
978
|
function timeoutPromise(ms: number): Promise<never> {
|
|
682
979
|
return new Promise((_, reject) =>
|
|
683
980
|
setTimeout(() => reject(new Error(`Process timed out after ${ms}ms`)), ms)
|