keystone-cli 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +114 -140
  2. package/package.json +6 -3
  3. package/src/cli.ts +54 -369
  4. package/src/commands/init.ts +15 -29
  5. package/src/db/memory-db.test.ts +45 -0
  6. package/src/db/memory-db.ts +47 -21
  7. package/src/db/sqlite-setup.ts +26 -3
  8. package/src/db/workflow-db.ts +12 -5
  9. package/src/parser/config-schema.ts +11 -13
  10. package/src/parser/schema.ts +4 -2
  11. package/src/runner/__test__/llm-mock-setup.ts +173 -0
  12. package/src/runner/__test__/llm-test-setup.ts +271 -0
  13. package/src/runner/engine-executor.test.ts +25 -18
  14. package/src/runner/executors/blueprint-executor.ts +0 -1
  15. package/src/runner/executors/dynamic-executor.ts +11 -6
  16. package/src/runner/executors/engine-executor.ts +5 -1
  17. package/src/runner/executors/llm-executor.ts +502 -1033
  18. package/src/runner/executors/memory-executor.ts +35 -19
  19. package/src/runner/executors/plan-executor.ts +0 -1
  20. package/src/runner/executors/types.ts +4 -4
  21. package/src/runner/llm-adapter.integration.test.ts +151 -0
  22. package/src/runner/llm-adapter.ts +263 -1401
  23. package/src/runner/llm-clarification.test.ts +91 -106
  24. package/src/runner/llm-executor.test.ts +217 -1181
  25. package/src/runner/memoization.test.ts +0 -1
  26. package/src/runner/recovery-security.test.ts +51 -20
  27. package/src/runner/reflexion.test.ts +55 -18
  28. package/src/runner/standard-tools-integration.test.ts +137 -87
  29. package/src/runner/step-executor.test.ts +36 -80
  30. package/src/runner/step-executor.ts +0 -2
  31. package/src/runner/test-harness.ts +3 -29
  32. package/src/runner/tool-integration.test.ts +122 -73
  33. package/src/runner/workflow-runner.ts +92 -35
  34. package/src/runner/workflow-scheduler.ts +11 -1
  35. package/src/runner/workflow-summary.ts +144 -0
  36. package/src/utils/auth-manager.test.ts +10 -520
  37. package/src/utils/auth-manager.ts +3 -756
  38. package/src/utils/config-loader.ts +12 -0
  39. package/src/utils/constants.ts +0 -17
  40. package/src/utils/process-sandbox.ts +15 -3
  41. package/src/runner/llm-adapter-runtime.test.ts +0 -209
  42. package/src/runner/llm-adapter.test.ts +0 -1012
@@ -1,3 +1,6 @@
1
+ import { tool as createTool, jsonSchema, streamText } from 'ai';
2
+ import type { TextPart, ToolCallPart, ToolResultPart } from 'ai';
3
+ import { z } from 'zod';
1
4
  import type { ExpressionContext } from '../../expression/evaluator';
2
5
  import { ExpressionEvaluator } from '../../expression/evaluator';
3
6
  import { parseAgent, resolveAgentPath } from '../../parser/agent-parser';
@@ -9,30 +12,81 @@ import { extractJson } from '../../utils/json-parser';
9
12
  import { ConsoleLogger, type Logger } from '../../utils/logger.ts';
10
13
  import { RedactionBuffer, Redactor } from '../../utils/redactor';
11
14
  import type { WorkflowEvent } from '../events.ts';
12
- import { type LLMAdapter, type LLMMessage, type LLMResponse, getAdapter } from '../llm-adapter';
15
+ import * as llmAdapter from '../llm-adapter';
16
+ import type { LLMMessage, LLMResponse } from '../llm-adapter';
13
17
  import { MCPClient } from '../mcp-client';
14
18
  import type { MCPManager, MCPServerConfig } from '../mcp-manager';
15
19
  import { STANDARD_TOOLS, validateStandardToolSecurity } from '../standard-tools';
16
20
  import type { StepResult } from './types.ts';
17
21
 
22
+ // --- AI SDK Message Types ---
23
+ // These types mirror the AI SDK's CoreMessage structure for type safety
24
+ // without tightly coupling to AI SDK internals that may change between versions.
25
+ // The types are intentionally permissive to handle various AI SDK part types.
26
+
27
+ interface CoreTextPart {
28
+ type: 'text';
29
+ text: string;
30
+ }
31
+
32
+ interface CoreToolCallPart {
33
+ type: 'tool-call';
34
+ toolCallId: string;
35
+ toolName: string;
36
+ args?: unknown;
37
+ input?: unknown;
38
+ }
39
+
40
+ interface CoreToolResultPart {
41
+ type: 'tool-result';
42
+ toolCallId: string;
43
+ toolName: string;
44
+ result: unknown;
45
+ output?: unknown;
46
+ }
47
+
48
+ // Additional AI SDK part types we want to handle gracefully
49
+ interface CoreOtherPart {
50
+ type: string;
51
+ [key: string]: unknown;
52
+ }
53
+
54
+ type CoreContentPart = CoreTextPart | CoreToolCallPart | CoreToolResultPart | CoreOtherPart;
55
+ type CoreMessageContent = string | CoreContentPart[];
56
+
57
+ interface CoreSystemMessage {
58
+ role: 'system';
59
+ content: string;
60
+ }
61
+
62
+ interface CoreUserMessage {
63
+ role: 'user';
64
+ content: string | CoreContentPart[];
65
+ }
66
+
67
+ interface CoreAssistantMessage {
68
+ role: 'assistant';
69
+ content: CoreMessageContent;
70
+ toolCalls?: ToolCallPart[];
71
+ }
72
+
73
+ interface CoreToolMessage {
74
+ role: 'tool';
75
+ content: CoreContentPart[];
76
+ }
77
+
78
+ type CoreMessage = CoreSystemMessage | CoreUserMessage | CoreAssistantMessage | CoreToolMessage;
79
+
18
80
  // Re-export for local use with shorter names
19
- const {
20
- SUMMARY_MESSAGE_NAME,
21
- SUMMARY_MESSAGE_MAX_BYTES,
22
- SUMMARY_INPUT_MESSAGE_MAX_BYTES,
23
- SUMMARY_INPUT_TOTAL_MAX_BYTES,
24
- SUMMARY_MODEL_BY_PROVIDER_TYPE,
25
- THINKING_OPEN_TAG,
26
- THINKING_CLOSE_TAG,
27
- TRANSFER_TOOL_NAME,
28
- CONTEXT_UPDATE_KEY,
29
- } = LLM;
81
+ const { THINKING_OPEN_TAG, THINKING_CLOSE_TAG, TRANSFER_TOOL_NAME, CONTEXT_UPDATE_KEY } = LLM;
30
82
 
31
83
  type LlmEventContext = {
32
84
  runId?: string;
33
85
  workflow?: string;
34
86
  };
35
87
 
88
+ // --- Helper Parser Logic (Kept from original) ---
89
+
36
90
  class ThoughtStreamParser {
37
91
  private buffer = '';
38
92
  private thoughtBuffer = '';
@@ -100,55 +154,6 @@ class ThoughtStreamParser {
100
154
  }
101
155
  }
102
156
 
103
- /**
104
- * Truncate message history to prevent unbounded memory growth.
105
- * Preserves system messages and keeps the most recent messages.
106
- */
107
- function estimateMessageBytes(message: LLMMessage): number {
108
- let size = 0;
109
- if (typeof message.content === 'string') {
110
- size += Buffer.byteLength(message.content, 'utf8');
111
- }
112
- if (message.tool_calls) {
113
- size += Buffer.byteLength(JSON.stringify(message.tool_calls), 'utf8');
114
- }
115
- if (message.reasoning) {
116
- size += Buffer.byteLength(JSON.stringify(message.reasoning), 'utf8');
117
- }
118
- if (message.name) {
119
- size += Buffer.byteLength(message.name, 'utf8');
120
- }
121
- return size;
122
- }
123
-
124
- function truncateStringByBytes(value: string, maxBytes: number): string {
125
- if (maxBytes <= 0) return '';
126
- if (Buffer.byteLength(value, 'utf8') <= maxBytes) return value;
127
-
128
- let low = 0;
129
- let high = value.length;
130
- while (low < high) {
131
- const mid = Math.ceil((low + high) / 2);
132
- const slice = value.slice(0, mid);
133
- if (Buffer.byteLength(slice, 'utf8') <= maxBytes) {
134
- low = mid;
135
- } else {
136
- high = mid - 1;
137
- }
138
- }
139
- return value.slice(0, low);
140
- }
141
-
142
- function truncateToolOutput(content: string, maxBytes: number): string {
143
- const contentBytes = Buffer.byteLength(content, 'utf8');
144
- if (contentBytes <= maxBytes) return content;
145
-
146
- const suffix = '... [truncated output]';
147
- const suffixBytes = Buffer.byteLength(suffix, 'utf8');
148
- const truncated = truncateStringByBytes(content, Math.max(0, maxBytes - suffixBytes));
149
- return `${truncated}${suffix}`;
150
- }
151
-
152
157
  function safeJsonStringify(value: unknown): string {
153
158
  try {
154
159
  return JSON.stringify(value);
@@ -169,252 +174,111 @@ function safeJsonStringify(value: unknown): string {
169
174
  }
170
175
  }
171
176
 
172
- function truncateMessages(
173
- messages: LLMMessage[],
174
- maxHistory: number,
175
- maxBytes: number
176
- ): LLMMessage[] {
177
- if (messages.length === 0) return messages;
178
-
179
- // Keep all system messages
180
- const systemMessages = messages.filter((m) => m.role === 'system');
181
- const nonSystem = messages.filter((m) => m.role !== 'system');
182
-
183
- // Keep most recent non-system messages, accounting for system messages
184
- const nonSystemLimit = Math.max(0, maxHistory - systemMessages.length);
185
- let keep = nonSystem.slice(-nonSystemLimit);
186
-
187
- // Enforce total byte budget with a most-recent tail
188
- if (maxBytes > 0) {
189
- const systemBytes = systemMessages.reduce((total, msg) => total + estimateMessageBytes(msg), 0);
190
- let remaining = maxBytes - systemBytes;
191
- if (remaining <= 0) {
192
- return systemMessages;
177
+ /**
178
+ * Maps Keystone LLMMessage to AI SDK CoreMessage
179
+ */
180
+ function mapToCoreMessages(messages: LLMMessage[]): CoreMessage[] {
181
+ return messages.map((m) => {
182
+ if (m.role === 'user') {
183
+ return { role: 'user', content: m.content || '' };
193
184
  }
194
-
195
- const tail: LLMMessage[] = [];
196
- for (let i = keep.length - 1; i >= 0; i--) {
197
- const msg = keep[i];
198
- const msgBytes = estimateMessageBytes(msg);
199
- if (msgBytes > remaining) break;
200
- tail.push(msg);
201
- remaining -= msgBytes;
185
+ if (m.role === 'assistant') {
186
+ if (m.tool_calls && m.tool_calls.length > 0) {
187
+ const toolCalls: ToolCallPart[] = m.tool_calls.map((tc) => ({
188
+ type: 'tool-call',
189
+ toolCallId: tc.id,
190
+ toolName: tc.function.name,
191
+ input: JSON.parse(tc.function.arguments),
192
+ }));
193
+ return { role: 'assistant', content: m.content || '', toolCalls };
194
+ }
195
+ return { role: 'assistant', content: m.content || '' };
202
196
  }
203
- keep = tail.reverse();
204
- }
205
-
206
- return [...systemMessages, ...keep];
207
- }
208
-
209
- function extractThoughtBlocks(content: string): { content: string; thoughts: string[] } {
210
- if (!content) return { content, thoughts: [] };
211
- const thoughts: string[] = [];
212
- let remaining = content;
213
-
214
- while (true) {
215
- const lower = remaining.toLowerCase();
216
- const openIndex = lower.indexOf(THINKING_OPEN_TAG);
217
- if (openIndex === -1) break;
218
- const closeIndex = lower.indexOf(THINKING_CLOSE_TAG, openIndex + THINKING_OPEN_TAG.length);
219
- if (closeIndex === -1) break;
220
-
221
- const before = remaining.slice(0, openIndex);
222
- const thought = remaining.slice(openIndex + THINKING_OPEN_TAG.length, closeIndex).trim();
223
- const after = remaining.slice(closeIndex + THINKING_CLOSE_TAG.length);
224
- if (thought) {
225
- thoughts.push(thought);
197
+ if (m.role === 'tool') {
198
+ return {
199
+ role: 'tool',
200
+ content: [
201
+ {
202
+ type: 'tool-result',
203
+ toolCallId: m.tool_call_id || '',
204
+ toolName: m.name || '',
205
+ result: m.content || '',
206
+ },
207
+ ],
208
+ };
226
209
  }
227
- remaining = `${before}${after}`;
228
- }
229
-
230
- return { content: remaining, thoughts };
231
- }
232
-
233
- function estimateConversationBytes(messages: LLMMessage[]): number {
234
- return messages.reduce((total, msg) => total + estimateMessageBytes(msg), 0);
235
- }
236
-
237
- function resolveSummaryModel(fullModelString: string, resolvedModel: string): string {
238
- try {
239
- const providerName = ConfigLoader.getProviderForModel(fullModelString);
240
- const config = ConfigLoader.load();
241
- const providerType = config.providers[providerName]?.type;
242
- return SUMMARY_MODEL_BY_PROVIDER_TYPE[providerType] ?? resolvedModel;
243
- } catch {
244
- return resolvedModel;
245
- }
246
- }
247
-
248
- function formatMessageForSummary(message: LLMMessage): string {
249
- const roleLabel = message.name ? `${message.role}(${message.name})` : message.role;
250
- const parts: string[] = [];
251
-
252
- if (typeof message.content === 'string' && message.content.length > 0) {
253
- parts.push(message.content);
254
- }
255
- if (message.tool_calls && message.tool_calls.length > 0) {
256
- parts.push(`tool_calls: ${safeJsonStringify(message.tool_calls)}`);
257
- }
258
- if (message.reasoning?.summary) {
259
- parts.push(`reasoning_summary: ${message.reasoning.summary}`);
260
- }
261
-
262
- const combined = parts.join('\n').trim();
263
- const trimmed = combined ? truncateStringByBytes(combined, SUMMARY_INPUT_MESSAGE_MAX_BYTES) : '';
264
- return `[${roleLabel}]${trimmed ? ` ${trimmed}` : ''}`;
210
+ // Default to system
211
+ return { role: 'system', content: m.content || '' };
212
+ });
265
213
  }
266
214
 
267
- function buildSummaryInput(messages: LLMMessage[]): string {
268
- const lines: string[] = [];
269
- let remaining = SUMMARY_INPUT_TOTAL_MAX_BYTES;
270
-
271
- for (const message of messages) {
272
- const formatted = formatMessageForSummary(message);
273
- const bytes = Buffer.byteLength(formatted, 'utf8');
274
- if (bytes > remaining) {
275
- if (remaining > 0) {
276
- lines.push(truncateStringByBytes(formatted, remaining));
215
+ /**
216
+ * Maps AI SDK CoreMessage to Keystone LLMMessage.
217
+ * Accepts readonly unknown[] to handle AI SDK ResponseMessage[] which varies by SDK version.
218
+ */
219
+ function mapFromCoreMessages(messages: readonly unknown[]): LLMMessage[] {
220
+ const keystoneMessages: LLMMessage[] = [];
221
+ for (const rawMsg of messages) {
222
+ // Type guard for message structure
223
+ const msg = rawMsg as { role: string; content?: unknown };
224
+ if (msg.role === 'assistant') {
225
+ const rawContent = msg.content;
226
+ const contentArray = Array.isArray(rawContent)
227
+ ? rawContent
228
+ : [{ type: 'text', text: String(rawContent || '') }];
229
+ const textPart = contentArray.find(
230
+ (p: { type?: string; text?: string }) => p.type === 'text'
231
+ );
232
+ const keystoneMsg: LLMMessage = {
233
+ role: 'assistant',
234
+ content: textPart?.text || '',
235
+ };
236
+ const toolCalls = contentArray.filter((p: { type?: string }) => p.type === 'tool-call');
237
+ if (toolCalls.length > 0) {
238
+ keystoneMsg.tool_calls = toolCalls.map(
239
+ (tc: { toolCallId?: string; toolName?: string; args?: unknown; input?: unknown }) => ({
240
+ id: tc.toolCallId || '',
241
+ type: 'function' as const,
242
+ function: {
243
+ name: tc.toolName || '',
244
+ arguments:
245
+ typeof tc.args === 'string' ? tc.args : JSON.stringify(tc.args || tc.input || {}),
246
+ },
247
+ })
248
+ );
277
249
  }
278
- break;
279
- }
280
- lines.push(formatted);
281
- remaining -= bytes;
282
- }
283
-
284
- return lines.join('\n');
285
- }
286
-
287
- async function summarizeMessagesIfNeeded(
288
- messages: LLMMessage[],
289
- options: {
290
- maxHistory: number;
291
- maxBytes: number;
292
- adapter: LLMAdapter;
293
- summaryModel: string;
294
- abortSignal?: AbortSignal;
295
- }
296
- ): Promise<{ messages: LLMMessage[]; usage?: LLMResponse['usage']; summarized: boolean }> {
297
- const systemMessages = messages.filter(
298
- (m) => m.role === 'system' && m.name !== SUMMARY_MESSAGE_NAME
299
- );
300
- const summaryMessages = messages.filter(
301
- (m) => m.role === 'system' && m.name === SUMMARY_MESSAGE_NAME
302
- );
303
- const nonSystemMessages = messages.filter((m) => m.role !== 'system');
304
-
305
- const maxNonSystem = Math.max(0, options.maxHistory - systemMessages.length - 1);
306
- const overCount = nonSystemMessages.length > maxNonSystem;
307
- const overBytes = options.maxBytes > 0 && estimateConversationBytes(messages) > options.maxBytes;
308
-
309
- if (!overCount && !overBytes) {
310
- return { messages, summarized: false };
311
- }
312
-
313
- if (maxNonSystem <= 0) {
314
- return {
315
- messages: truncateMessages(messages, options.maxHistory, options.maxBytes),
316
- summarized: false,
317
- };
318
- }
319
-
320
- const systemBytes = systemMessages.reduce((total, msg) => total + estimateMessageBytes(msg), 0);
321
- const availableBytes =
322
- options.maxBytes > 0
323
- ? options.maxBytes - systemBytes - SUMMARY_MESSAGE_MAX_BYTES
324
- : Number.POSITIVE_INFINITY;
325
-
326
- const tail: LLMMessage[] = [];
327
- let tailBytes = 0;
328
- for (let i = nonSystemMessages.length - 1; i >= 0; i--) {
329
- if (tail.length >= maxNonSystem) break;
330
- const msgBytes = estimateMessageBytes(nonSystemMessages[i]);
331
- if (options.maxBytes > 0 && tailBytes + msgBytes > availableBytes) {
332
- break;
333
- }
334
- tail.push(nonSystemMessages[i]);
335
- tailBytes += msgBytes;
336
- }
337
-
338
- const keepCount = tail.length;
339
- const summarizeCount = nonSystemMessages.length - keepCount;
340
- if (summarizeCount <= 0) {
341
- return { messages, summarized: false };
342
- }
343
-
344
- const toSummarize = nonSystemMessages.slice(0, summarizeCount);
345
- const existingSummary = summaryMessages
346
- .map((m) => (typeof m.content === 'string' ? m.content : ''))
347
- .filter((content) => content.trim().length > 0)
348
- .join('\n');
349
- const summaryInput = buildSummaryInput(toSummarize);
350
-
351
- if (!summaryInput.trim() && !existingSummary.trim()) {
352
- return {
353
- messages: truncateMessages(messages, options.maxHistory, options.maxBytes),
354
- summarized: false,
355
- };
356
- }
357
-
358
- const promptParts: string[] = [];
359
- if (existingSummary.trim()) {
360
- promptParts.push(`Existing summary:\n${existingSummary}`);
361
- }
362
- if (summaryInput.trim()) {
363
- promptParts.push(`Messages to summarize:\n${summaryInput}`);
364
- }
365
-
366
- const response = await options.adapter.chat(
367
- [
368
- {
369
- role: 'system',
370
- content:
371
- 'Summarize the conversation history for continued work. Focus on decisions, constraints, outputs, and open questions. Be concise and factual. Use short bullet points.',
372
- },
373
- {
374
- role: 'user',
375
- content: promptParts.join('\n\n'),
376
- },
377
- ],
378
- {
379
- model: options.summaryModel,
380
- signal: options.abortSignal,
250
+ keystoneMessages.push(keystoneMsg);
251
+ } else if (msg.role === 'tool') {
252
+ const rawContent = msg.content;
253
+ const contentArray = Array.isArray(rawContent) ? rawContent : [];
254
+ for (const part of contentArray) {
255
+ const typedPart = part as {
256
+ type?: string;
257
+ toolCallId?: string;
258
+ toolName?: string;
259
+ result?: unknown;
260
+ output?: unknown;
261
+ };
262
+ if (typedPart.type === 'tool-result') {
263
+ keystoneMessages.push({
264
+ role: 'tool',
265
+ tool_call_id: typedPart.toolCallId,
266
+ name: typedPart.toolName,
267
+ content:
268
+ typeof typedPart.result === 'string'
269
+ ? typedPart.result
270
+ : JSON.stringify(typedPart.result || typedPart.output || ''),
271
+ });
272
+ }
273
+ }
274
+ } else if (msg.role === 'user') {
275
+ keystoneMessages.push({ role: 'user', content: String(msg.content || '') });
381
276
  }
382
- );
383
-
384
- const summaryText =
385
- typeof response.message.content === 'string' ? response.message.content.trim() : '';
386
- if (!summaryText) {
387
- throw new Error('Summary model returned empty content');
388
277
  }
389
-
390
- const summaryContent = truncateStringByBytes(
391
- `Context summary:\n${summaryText}`,
392
- SUMMARY_MESSAGE_MAX_BYTES
393
- );
394
-
395
- const summaryMessage: LLMMessage = {
396
- role: 'system',
397
- name: SUMMARY_MESSAGE_NAME,
398
- content: summaryContent,
399
- };
400
-
401
- const combinedMessages = [...systemMessages, summaryMessage, ...tail.reverse()];
402
-
403
- return {
404
- messages: truncateMessages(combinedMessages, options.maxHistory, options.maxBytes),
405
- usage: response.usage,
406
- summarized: true,
407
- };
278
+ return keystoneMessages;
408
279
  }
409
280
 
410
- interface ToolDefinition {
411
- name: string;
412
- description?: string;
413
- parameters: unknown;
414
- source: 'agent' | 'step' | 'mcp' | 'standard' | 'handoff';
415
- execution?: Step;
416
- mcpClient?: MCPClient;
417
- }
281
+ // --- Main Execution Logic ---
418
282
 
419
283
  export async function executeLlmStep(
420
284
  step: LlmStep,
@@ -424,7 +288,6 @@ export async function executeLlmStep(
424
288
  mcpManager?: MCPManager,
425
289
  workflowDir?: string,
426
290
  abortSignal?: AbortSignal,
427
- getAdapterFn?: typeof getAdapter,
428
291
  emitEvent?: (event: WorkflowEvent) => void,
429
292
  eventContext?: LlmEventContext
430
293
  ): Promise<StepResult> {
@@ -442,823 +305,429 @@ export async function executeLlmStep(
442
305
  const prompt = ExpressionEvaluator.evaluateString(step.prompt, context);
443
306
 
444
307
  const fullModelString = provider ? `${provider}:${model}` : model;
445
- const { adapter, resolvedModel } = (getAdapterFn || getAdapter)(fullModelString);
446
-
447
- const buildSystemPrompt = (agent: Agent): string => {
448
- let systemPrompt = ExpressionEvaluator.evaluateString(agent.systemPrompt, context);
449
308
 
450
- // Inject project context if enabled
451
- const projectContext = ContextInjector.getContext(workflowDir || process.cwd(), []);
452
- const contextAddition = ContextInjector.generateSystemPromptAddition(projectContext);
453
- if (contextAddition) {
454
- systemPrompt = `${contextAddition}\n\n${systemPrompt}`;
309
+ // NOTE: getModel is the new AI SDK factory
310
+ const languageModel = await llmAdapter.getModel(fullModelString);
311
+
312
+ // Redaction setup
313
+ const redactor = new Redactor(context.secrets || {}, {
314
+ forcedSecrets: context.secretValues || [],
315
+ });
316
+ const redactionBuffer = new RedactionBuffer(redactor);
317
+ const thoughtStream = step.outputSchema ? null : new ThoughtStreamParser();
318
+ const eventTimestamp = () => new Date().toISOString();
319
+
320
+ const emitThought = (content: string, source: 'thinking' | 'reasoning') => {
321
+ const trimmed = redactor.redact(content.trim());
322
+ if (!trimmed) return;
323
+ logger.info(`šŸ’­ Thought (${source}): ${trimmed}`);
324
+ if (emitEvent && eventContext?.runId && eventContext?.workflow) {
325
+ emitEvent({
326
+ type: 'llm.thought',
327
+ timestamp: eventTimestamp(),
328
+ runId: eventContext.runId,
329
+ workflow: eventContext.workflow,
330
+ stepId: step.id,
331
+ content: trimmed,
332
+ source,
333
+ });
455
334
  }
335
+ };
456
336
 
457
- if (step.outputSchema) {
458
- systemPrompt += `\n\nIMPORTANT: You must output valid JSON that matches the following schema:\n${JSON.stringify(step.outputSchema, null, 2)}`;
337
+ const handleStreamChunk = (chunk: string) => {
338
+ const redactedChunk = redactionBuffer.process(chunk);
339
+ if (!thoughtStream) {
340
+ process.stdout.write(redactedChunk);
341
+ return;
342
+ }
343
+ const parsed = thoughtStream.process(redactedChunk);
344
+ if (parsed.output) {
345
+ process.stdout.write(parsed.output);
346
+ }
347
+ for (const thought of parsed.thoughts) {
348
+ emitThought(thought, 'thinking');
459
349
  }
460
- return systemPrompt;
461
350
  };
462
- let systemPrompt = buildSystemPrompt(activeAgent);
463
-
464
- let messages: LLMMessage[] = [];
465
- const maxToolOutputBytes = LIMITS.MAX_TOOL_OUTPUT_BYTES;
466
- const updateSystemPromptMessage = (newPrompt: string) => {
467
- const systemMessage = messages.find(
468
- (message) => message.role === 'system' && message.name !== SUMMARY_MESSAGE_NAME
469
- );
470
- if (systemMessage) {
471
- systemMessage.content = newPrompt;
351
+
352
+ const flushStream = () => {
353
+ const flushed = redactionBuffer.flush();
354
+ if (!thoughtStream) {
355
+ process.stdout.write(flushed);
472
356
  return;
473
357
  }
474
- messages.unshift({ role: 'system', content: newPrompt });
358
+ const parsed = thoughtStream.process(flushed);
359
+ if (parsed.output) {
360
+ process.stdout.write(parsed.output);
361
+ }
362
+ for (const thought of parsed.thoughts) {
363
+ emitThought(thought, 'thinking');
364
+ }
365
+ const final = thoughtStream.flush();
366
+ if (final.output) {
367
+ process.stdout.write(final.output);
368
+ }
369
+ for (const thought of final.thoughts) {
370
+ emitThought(thought, 'thinking');
371
+ }
475
372
  };
476
373
 
477
- // Resume from state if provided
374
+ // State for Agent Handoff Loop
375
+ let currentMessages: LLMMessage[] = [];
376
+ // Initial User Message
377
+ currentMessages.push({ role: 'user', content: prompt });
378
+
379
+ // Handle Resume
478
380
  const stepState =
479
381
  context.steps && typeof context.steps === 'object'
480
382
  ? (context.steps as Record<string, { output?: unknown }>)[step.id]
481
383
  : undefined;
482
- const stepOutput = stepState?.output;
483
- const resumeOutput =
484
- stepOutput && typeof stepOutput === 'object' && 'messages' in stepOutput
485
- ? stepOutput
486
- : context.output;
487
-
384
+ const resumeOutput = (stepState?.output as any)?.messages ? stepState?.output : context.output;
488
385
  if (resumeOutput && typeof resumeOutput === 'object' && 'messages' in resumeOutput) {
489
- messages.push(...(resumeOutput.messages as LLMMessage[]));
490
-
491
- // If we have an answer in inputs, add it as a tool result for the last tool call
492
- const stepInputs = context.inputs?.[step.id] as Record<string, unknown> | undefined;
493
- if (stepInputs && typeof stepInputs === 'object' && '__answer' in stepInputs) {
494
- const answer = stepInputs.__answer;
495
- const lastMessage = messages[messages.length - 1];
496
- const askCall = lastMessage?.tool_calls?.find((tc) => tc.function.name === 'ask');
497
- if (askCall) {
498
- messages.push({
499
- role: 'tool',
500
- tool_call_id: askCall.id,
501
- name: 'ask',
502
- content: truncateToolOutput(String(answer), maxToolOutputBytes),
503
- });
504
- }
505
- }
506
- updateSystemPromptMessage(systemPrompt);
507
- } else {
508
- messages.push({ role: 'system', content: systemPrompt }, { role: 'user', content: prompt });
386
+ const resumedMsgs = resumeOutput.messages as LLMMessage[];
387
+ // Filter out system messages as we rebuild system prompt each turn
388
+ currentMessages = resumedMsgs.filter((m) => m.role !== 'system');
509
389
  }
510
390
 
391
+ // MCP Client tracking for cleanup
511
392
  const localMcpClients: MCPClient[] = [];
512
- const baseTools: ToolDefinition[] = [];
513
393
 
514
394
  try {
515
- const registerBaseTool = (tool: ToolDefinition) => {
516
- baseTools.push(tool);
517
- };
518
-
519
- // 1. Add step tools
520
- if (step.tools) {
521
- for (const tool of step.tools) {
522
- registerBaseTool({
523
- name: tool.name,
524
- description: tool.description,
525
- parameters: tool.parameters || {
526
- type: 'object',
527
- properties: {},
528
- additionalProperties: true,
529
- },
530
- source: 'step',
531
- execution: tool.execution,
532
- });
533
- }
534
- }
395
+ // Agent Handoff Loop: We manually loop here (instead of relying solely on SDK's maxSteps)
396
+ // because Agent Handoffs require dynamically swapping the system prompt and tool set
397
+ // when the LLM calls transfer_to_agent. The SDK's maxSteps only handles tool call
398
+ // round-trips within a single agent context; it cannot swap the entire agent mid-execution.
399
+ while (true) {
400
+ if (abortSignal?.aborted) throw new Error('Step canceled');
401
+
402
+ // Build System Prompt
403
+ let systemPrompt = ExpressionEvaluator.evaluateString(activeAgent.systemPrompt, context);
404
+ const projectContext = ContextInjector.getContext(workflowDir || process.cwd(), []);
405
+ const contextAddition = ContextInjector.generateSystemPromptAddition(projectContext);
406
+ if (contextAddition) {
407
+ systemPrompt = `${contextAddition}\n\n${systemPrompt}`;
408
+ }
409
+ if (step.outputSchema) {
410
+ systemPrompt += `\n\nIMPORTANT: You must output valid JSON that matches the following schema:\n${JSON.stringify(step.outputSchema, null, 2)}`;
411
+ }
412
+
413
+ // Tool Registration
414
+ const aiTools: Record<string, any> = {};
415
+ let pendingTransfer: Agent | null = null;
416
+ let requiresSuspend = false;
417
+ let suspendData: any = null;
418
+
419
+ const registerTool = (
420
+ name: string,
421
+ description: string | undefined,
422
+ parameters: any,
423
+ execute: (args: any, context: { toolCallId: string }) => Promise<any>
424
+ ) => {
425
+ // Validate parameters is a valid JSON Schema object
426
+ if (!parameters || typeof parameters !== 'object' || Array.isArray(parameters)) {
427
+ throw new Error(`Invalid parameters for tool ${name}: must be a JSON Schema object.`);
428
+ }
535
429
 
536
- // 2. Add Standard tools
537
- if (step.useStandardTools) {
538
- for (const tool of STANDARD_TOOLS) {
539
- registerBaseTool({
540
- name: tool.name,
541
- description: tool.description,
542
- parameters: tool.parameters || {
543
- type: 'object',
544
- properties: {},
545
- additionalProperties: true,
430
+ // Safety: Ensure additionalProperties is false for object types if not specified
431
+ // This prevents the LLM from hallucinating arguments that are not in the schema
432
+ const safeParameters = { ...parameters };
433
+ if (
434
+ safeParameters.type === 'object' &&
435
+ safeParameters.properties &&
436
+ safeParameters.additionalProperties === undefined
437
+ ) {
438
+ safeParameters.additionalProperties = false;
439
+ }
440
+
441
+ aiTools[name] = (createTool as any)({
442
+ description,
443
+ parameters: jsonSchema(safeParameters),
444
+ execute: async (args: any, { toolCallId }: { toolCallId: string }) => {
445
+ logger.log(
446
+ ` šŸ› ļø Tool Call: ${name}${Object.keys(args).length ? ` ${safeJsonStringify(args)}` : ''}`
447
+ );
448
+ try {
449
+ return await execute(args, { toolCallId });
450
+ } catch (err) {
451
+ const errMsg = err instanceof Error ? err.message : String(err);
452
+ logger.error(` āœ— Tool Error (${name}): ${errMsg}`);
453
+ return { error: errMsg }; // Return as object for AI SDK
454
+ }
546
455
  },
547
- source: 'standard',
548
- execution: tool.execution,
549
456
  });
550
- }
551
- }
552
-
553
- // 3. Add Engine handoff tool
554
- if (step.handoff) {
555
- const toolName = step.handoff.name || 'handoff';
556
- const description =
557
- step.handoff.description || `Delegate to engine ${step.handoff.engine.command}`;
558
- const parameters = step.handoff.inputSchema || {
559
- type: 'object',
560
- properties: {},
561
- additionalProperties: true,
562
457
  };
563
458
 
564
- const handoffStep: Step = {
565
- id: `${step.id}-handoff`,
566
- type: 'engine',
567
- command: step.handoff.engine.command,
568
- args: step.handoff.engine.args,
569
- env: step.handoff.engine.env,
570
- cwd: step.handoff.engine.cwd,
571
- timeout: step.handoff.engine.timeout,
572
- outputSchema: step.handoff.engine.outputSchema,
573
- input: step.handoff.engine.input ?? '${{ args }}',
459
+ const applyContextUpdate = (value: unknown): unknown => {
460
+ if (!value || typeof value !== 'object' || Array.isArray(value)) return value;
461
+ const record = value as Record<string, unknown>;
462
+ if (!(CONTEXT_UPDATE_KEY in record)) return value;
463
+
464
+ const update = record[CONTEXT_UPDATE_KEY] as
465
+ | { env?: Record<string, string>; memory?: Record<string, unknown> }
466
+ | undefined;
467
+ if (update?.env) {
468
+ context.env = context.env || {};
469
+ Object.assign(context.env, update.env);
470
+ }
471
+ if (update?.memory) {
472
+ context.memory = context.memory || {};
473
+ Object.assign(context.memory, update.memory);
474
+ }
475
+ const { [CONTEXT_UPDATE_KEY]: _ignored, ...cleaned } = record;
476
+ return cleaned;
574
477
  };
575
478
 
576
- registerBaseTool({
577
- name: toolName,
578
- description,
579
- parameters,
580
- source: 'handoff',
581
- execution: handoffStep,
582
- });
583
- }
584
-
585
- // 4. Add MCP tools
586
- const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
587
- if (step.useGlobalMcp && mcpManager) {
588
- const globalServers = mcpManager.getGlobalServers();
589
- for (const globalServer of globalServers) {
590
- // Only add if not already explicitly listed
591
- const alreadyListed = mcpServersToConnect.some((s) => {
592
- const name = typeof s === 'string' ? s : s.name;
593
- return name === globalServer.name;
479
+ // 1. Agent Tools
480
+ for (const tool of activeAgent.tools) {
481
+ registerTool(tool.name, tool.description, tool.parameters, async (args) => {
482
+ if (tool.execution) {
483
+ const toolContext = { ...context, args };
484
+ const result = await executeStepFn(tool.execution, toolContext);
485
+ return result.status === 'success'
486
+ ? applyContextUpdate(result.output)
487
+ : `Error: ${result.error}`;
488
+ }
489
+ return `Error: Tool ${tool.name} has no implementation.`;
594
490
  });
595
- if (!alreadyListed) {
596
- mcpServersToConnect.push(globalServer);
491
+ }
492
+
493
+ // 2. Step Tools & Standard Tools
494
+ const extraTools = [...(step.tools || []), ...(step.useStandardTools ? STANDARD_TOOLS : [])];
495
+ for (const tool of extraTools) {
496
+ // Check valid standard tool security
497
+ if (!step.tools?.includes(tool as any)) {
498
+ // It is a standard tool
499
+ // Wrap execution with security check
500
+ registerTool(tool.name, tool.description, tool.parameters, async (args) => {
501
+ validateStandardToolSecurity(tool.name, args, {
502
+ allowOutsideCwd: step.allowOutsideCwd,
503
+ allowInsecure: step.allowInsecure,
504
+ });
505
+ if (tool.execution) {
506
+ const toolContext = { ...context, args };
507
+ const result = await executeStepFn(tool.execution, toolContext);
508
+ return result.status === 'success'
509
+ ? applyContextUpdate(result.output)
510
+ : `Error: ${result.error}`;
511
+ }
512
+ return 'Error: No execution defined';
513
+ });
514
+ } else {
515
+ // Custom step tool
516
+ registerTool(tool.name, tool.description, tool.parameters, async (args) => {
517
+ if (tool.execution) {
518
+ const toolContext = { ...context, args };
519
+ const result = await executeStepFn(tool.execution, toolContext);
520
+ return result.status === 'success'
521
+ ? applyContextUpdate(result.output)
522
+ : `Error: ${result.error}`;
523
+ }
524
+ return 'Error: No execution defined';
525
+ });
597
526
  }
598
527
  }
599
- }
600
528
 
601
- if (mcpServersToConnect.length > 0) {
602
- await Promise.all(
603
- mcpServersToConnect.map(async (server) => {
604
- let client: MCPClient | undefined;
605
- const serverName = typeof server === 'string' ? server : server.name;
529
+ // 3. MCP Tools
530
+ // (Logic to connect MCP servers same as before, simplified for brevity)
531
+ const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
532
+ if (step.useGlobalMcp && mcpManager) {
533
+ const globalServers = mcpManager.getGlobalServers();
534
+ for (const s of globalServers) {
535
+ if (
536
+ !mcpServersToConnect.some(
537
+ (existing) => (typeof existing === 'string' ? existing : existing.name) === s.name
538
+ )
539
+ ) {
540
+ mcpServersToConnect.push(s);
541
+ }
542
+ }
543
+ }
606
544
 
545
+ if (mcpServersToConnect.length > 0) {
546
+ for (const server of mcpServersToConnect) {
607
547
  try {
548
+ let client: MCPClient | undefined;
608
549
  if (mcpManager) {
609
- client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
610
- } else {
611
- // Fallback if no manager (should not happen in normal workflow run)
612
- if (typeof server === 'string') {
613
- logger.error(
614
- ` āœ— Cannot reference global MCP server '${server}' without MCPManager`
615
- );
616
- return;
617
- }
618
- logger.log(` šŸ”Œ Connecting to MCP server: ${server.name}`);
550
+ client = await mcpManager.getClient(server, logger);
551
+ } else if (typeof server !== 'string') {
619
552
  client = await MCPClient.createLocal(
620
- (server as MCPServerConfig).command || 'node',
621
- (server as MCPServerConfig).args || [],
622
- (server as MCPServerConfig).env || {}
553
+ server.command || 'node',
554
+ server.args || [],
555
+ server.env || {}
623
556
  );
624
557
  await client.initialize();
625
558
  localMcpClients.push(client);
626
559
  }
627
560
 
628
561
  if (client) {
629
- const mcpTools = await client.listTools();
630
- for (const tool of mcpTools) {
631
- registerBaseTool({
632
- name: tool.name,
633
- description: tool.description,
634
- parameters: tool.inputSchema,
635
- source: 'mcp',
636
- mcpClient: client,
562
+ const tools = await client.listTools();
563
+ for (const t of tools) {
564
+ registerTool(t.name, t.description, t.inputSchema, async (args) => {
565
+ const res = await client?.callTool(t.name, args);
566
+ // AI SDK expects serializable result. callTool returns useful JSON.
567
+ // We apply context update and return raw object handled by SDK.
568
+ return applyContextUpdate(res);
637
569
  });
638
570
  }
639
571
  }
640
- } catch (error) {
641
- logger.error(
642
- ` āœ— Failed to list tools from MCP server ${serverName}: ${error instanceof Error ? error.message : String(error)}`
572
+ } catch (e) {
573
+ logger.warn(
574
+ `Failed to connect/list MCP tools for ${typeof server === 'string' ? server : server.name}: ${e}`
643
575
  );
644
- if (!mcpManager && client) {
645
- client.stop();
646
- }
647
576
  }
648
- })
649
- );
650
- }
651
-
652
- const buildToolsForAgent = (agent: Agent) => {
653
- const allTools: ToolDefinition[] = [];
654
- const toolRegistry = new Map<string, string>();
655
- const registerTool = (tool: ToolDefinition) => {
656
- const existing = toolRegistry.get(tool.name);
657
- if (existing) {
658
- throw new Error(
659
- `Duplicate tool name "${tool.name}" from ${tool.source}; already defined by ${existing}. Rename one of them.`
660
- );
661
577
  }
662
- toolRegistry.set(tool.name, tool.source);
663
- allTools.push(tool);
664
- };
665
-
666
- for (const tool of agent.tools) {
667
- registerTool({
668
- name: tool.name,
669
- description: tool.description,
670
- parameters: tool.parameters || {
671
- type: 'object',
672
- properties: {},
673
- additionalProperties: true,
674
- },
675
- source: 'agent',
676
- execution: tool.execution,
677
- });
678
- }
679
-
680
- for (const tool of baseTools) {
681
- registerTool(tool);
682
578
  }
683
579
 
684
- const llmTools = allTools.map((t) => ({
685
- type: 'function' as const,
686
- function: {
687
- name: t.name,
688
- description: t.description,
689
- parameters: t.parameters as Record<string, unknown>,
690
- },
691
- }));
692
-
580
+ // 4. Special Tools: Ask & Transfer
693
581
  if (step.allowClarification) {
694
- if (toolRegistry.has('ask')) {
695
- throw new Error(
696
- 'Tool name "ask" is reserved for clarification. Rename your tool or disable allowClarification.'
697
- );
698
- }
699
- llmTools.push({
700
- type: 'function' as const,
701
- function: {
702
- name: 'ask',
703
- description:
704
- 'Ask the user a clarifying question if the initial request is ambiguous or missing information.',
705
- parameters: {
706
- type: 'object',
707
- properties: {
708
- question: {
709
- type: 'string',
710
- description: 'The question to ask the user',
711
- },
712
- },
713
- required: ['question'],
714
- } as Record<string, unknown>,
582
+ if (aiTools.ask) throw new Error('Tool "ask" is reserved.');
583
+ registerTool(
584
+ 'ask',
585
+ 'Ask the user a clarifying question.',
586
+ {
587
+ type: 'object',
588
+ properties: { question: { type: 'string' } },
589
+ required: ['question'],
715
590
  },
716
- });
591
+ async (args) => {
592
+ if (process.stdin.isTTY) {
593
+ logger.log(`\nšŸ¤” Question from ${activeAgent.name}: ${args.question}`);
594
+ const result = await executeStepFn(
595
+ {
596
+ id: `${step.id}-clarify`,
597
+ type: 'human',
598
+ message: args.question,
599
+ inputType: 'text',
600
+ } as Step,
601
+ context
602
+ );
603
+ return String(result.output);
604
+ }
605
+ requiresSuspend = true;
606
+ suspendData = { question: args.question }; // Will abort loop
607
+ return 'Suspended for user input';
608
+ }
609
+ );
717
610
  }
718
611
 
719
612
  if (step.allowedHandoffs && step.allowedHandoffs.length > 0) {
720
- if (toolRegistry.has(TRANSFER_TOOL_NAME)) {
721
- throw new Error(
722
- `Tool name "${TRANSFER_TOOL_NAME}" is reserved for agent handoffs. Rename your tool or disable allowedHandoffs.`
723
- );
724
- }
725
- llmTools.push({
726
- type: 'function' as const,
727
- function: {
728
- name: TRANSFER_TOOL_NAME,
729
- description: `Transfer control to another agent. Allowed agents: ${step.allowedHandoffs.join(', ')}`,
730
- parameters: {
731
- type: 'object',
732
- properties: {
733
- agent_name: {
734
- type: 'string',
735
- description: 'The name of the agent to transfer to',
736
- },
737
- },
738
- required: ['agent_name'],
739
- } as Record<string, unknown>,
613
+ if (aiTools[TRANSFER_TOOL_NAME])
614
+ throw new Error(`Tool "${TRANSFER_TOOL_NAME}" is reserved.`);
615
+ registerTool(
616
+ TRANSFER_TOOL_NAME,
617
+ `Transfer control to another agent. Allowed: ${step.allowedHandoffs.join(', ')}`,
618
+ {
619
+ type: 'object',
620
+ properties: { agent_name: { type: 'string' } },
621
+ required: ['agent_name'],
740
622
  },
741
- });
742
- }
743
-
744
- return { allTools, llmTools };
745
- };
746
-
747
- let allTools: ToolDefinition[] = [];
748
- let llmTools: {
749
- type: 'function';
750
- function: { name: string; description?: string; parameters: Record<string, unknown> };
751
- }[] = [];
752
-
753
- const refreshToolsForAgent = (agent: Agent) => {
754
- const toolSet = buildToolsForAgent(agent);
755
- allTools = toolSet.allTools;
756
- llmTools = toolSet.llmTools;
757
- };
758
-
759
- refreshToolsForAgent(activeAgent);
760
- const applyContextUpdate = (value: unknown): unknown => {
761
- if (!value || typeof value !== 'object' || Array.isArray(value)) {
762
- return value;
763
- }
764
-
765
- const record = value as Record<string, unknown>;
766
- if (!(CONTEXT_UPDATE_KEY in record)) {
767
- return value;
768
- }
769
-
770
- const update = record[CONTEXT_UPDATE_KEY];
771
- if (update && typeof update === 'object' && !Array.isArray(update)) {
772
- const updateRecord = update as Record<string, unknown>;
773
-
774
- if (
775
- updateRecord.env &&
776
- typeof updateRecord.env === 'object' &&
777
- !Array.isArray(updateRecord.env)
778
- ) {
779
- const envUpdates = updateRecord.env as Record<string, unknown>;
780
- context.env = context.env ?? {};
781
- context.envOverrides = context.envOverrides ?? {};
782
- for (const [key, val] of Object.entries(envUpdates)) {
783
- if (val === undefined) continue;
784
- const stringValue =
785
- typeof val === 'string'
786
- ? val
787
- : (() => {
788
- const json = safeJsonStringify(val);
789
- return typeof json === 'string' ? json : String(val);
790
- })();
791
- context.env[key] = stringValue;
792
- context.envOverrides[key] = stringValue;
623
+ async (args) => {
624
+ if (!step.allowedHandoffs?.includes(args.agent_name))
625
+ return `Error: Agent ${args.agent_name} not allowed.`;
626
+ try {
627
+ const nextAgentPath = resolveAgentPath(args.agent_name, workflowDir);
628
+ const nextAgent = parseAgent(nextAgentPath);
629
+ pendingTransfer = nextAgent;
630
+ return `Transferred to agent ${args.agent_name}.`;
631
+ } catch (e) {
632
+ return `Error resolving agent: ${e}`;
633
+ }
793
634
  }
794
- }
795
-
796
- if (
797
- updateRecord.memory &&
798
- typeof updateRecord.memory === 'object' &&
799
- !Array.isArray(updateRecord.memory)
800
- ) {
801
- context.memory = context.memory ?? {};
802
- Object.assign(context.memory, updateRecord.memory as Record<string, unknown>);
803
- }
635
+ );
804
636
  }
805
637
 
806
- const { [CONTEXT_UPDATE_KEY]: _ignored, ...cleaned } = record;
807
- return cleaned;
808
- };
809
- const applyAgentTransfer = (nextAgent: Agent) => {
810
- activeAgent = nextAgent;
811
- systemPrompt = buildSystemPrompt(activeAgent);
812
- updateSystemPromptMessage(systemPrompt);
813
- refreshToolsForAgent(activeAgent);
814
- };
815
-
816
- // ReAct Loop
817
- let iterations = 0;
818
- const maxIterations = step.maxIterations || 10;
819
- const totalUsage = {
820
- prompt_tokens: 0,
821
- completion_tokens: 0,
822
- total_tokens: 0,
823
- };
824
-
825
- // Create redactor once outside the loop for performance (regex compilation)
826
- const redactor = new Redactor(context.secrets || {}, {
827
- forcedSecrets: context.secretValues || [],
828
- });
829
- const redactionBuffer = new RedactionBuffer(redactor);
830
- const maxHistory = step.maxMessageHistory || LIMITS.MAX_MESSAGE_HISTORY;
831
- const maxConversationBytes = LIMITS.MAX_CONVERSATION_BYTES;
832
- const contextStrategy = step.contextStrategy || 'truncate';
833
- const summaryModel =
834
- contextStrategy === 'summary' || contextStrategy === 'auto'
835
- ? resolveSummaryModel(fullModelString, resolvedModel)
836
- : resolvedModel;
837
- const formatToolContent = (content: string): string =>
838
- truncateToolOutput(content, maxToolOutputBytes);
839
- const eventTimestamp = () => new Date().toISOString();
840
- const emitThought = (content: string, source: 'thinking' | 'reasoning') => {
841
- const trimmed = redactor.redact(content.trim());
842
- if (!trimmed) return;
843
- logger.info(`šŸ’­ Thought (${source}): ${trimmed}`);
844
- if (emitEvent && eventContext?.runId && eventContext?.workflow) {
845
- emitEvent({
846
- type: 'llm.thought',
847
- timestamp: eventTimestamp(),
848
- runId: eventContext.runId,
849
- workflow: eventContext.workflow,
850
- stepId: step.id,
851
- content: trimmed,
852
- source,
853
- });
854
- }
855
- };
856
- const thoughtStream = step.outputSchema ? null : new ThoughtStreamParser();
857
- let streamedThoughts = 0;
858
- const handleStreamChunk = (chunk: string) => {
859
- const redactedChunk = redactionBuffer.process(chunk);
860
- if (!thoughtStream) {
861
- process.stdout.write(redactedChunk);
862
- return;
863
- }
864
- const parsed = thoughtStream.process(redactedChunk);
865
- if (parsed.output) {
866
- process.stdout.write(parsed.output);
867
- }
868
- for (const thought of parsed.thoughts) {
869
- emitThought(thought, 'thinking');
870
- streamedThoughts += 1;
871
- }
872
- };
873
- const flushStream = () => {
874
- const flushed = redactionBuffer.flush();
875
- if (!thoughtStream) {
876
- process.stdout.write(flushed);
877
- return;
878
- }
879
- const parsed = thoughtStream.process(flushed);
880
- if (parsed.output) {
881
- process.stdout.write(parsed.output);
882
- }
883
- for (const thought of parsed.thoughts) {
884
- emitThought(thought, 'thinking');
885
- streamedThoughts += 1;
886
- }
887
- const final = thoughtStream.flush();
888
- if (final.output) {
889
- process.stdout.write(final.output);
890
- }
891
- for (const thought of final.thoughts) {
892
- emitThought(thought, 'thinking');
893
- streamedThoughts += 1;
894
- }
895
- };
896
- const applyContextStrategy = async () => {
897
- if (contextStrategy === 'summary' || contextStrategy === 'auto') {
898
- try {
899
- const result = await summarizeMessagesIfNeeded(messages, {
900
- maxHistory,
901
- maxBytes: maxConversationBytes,
902
- adapter,
903
- summaryModel,
904
- abortSignal,
905
- });
906
- messages = result.messages;
907
- if (result.usage) {
908
- totalUsage.prompt_tokens += result.usage.prompt_tokens;
909
- totalUsage.completion_tokens += result.usage.completion_tokens;
910
- totalUsage.total_tokens += result.usage.total_tokens;
638
+ // Execute Stream
639
+ const result = await streamText({
640
+ model: languageModel,
641
+ system: systemPrompt,
642
+ messages: mapToCoreMessages(currentMessages),
643
+ tools: aiTools,
644
+ toolChoice: 'auto',
645
+ maxSteps: step.maxIterations || 10,
646
+ onChunk: (event: any) => {
647
+ if (event.chunk.type === 'text-delta') {
648
+ handleStreamChunk(event.chunk.text);
911
649
  }
912
- return;
913
- } catch (error) {
914
- logger.warn(
915
- `Context summarization failed: ${error instanceof Error ? error.message : String(error)}`
916
- );
650
+ },
651
+ abortSignal,
652
+ } as any);
653
+
654
+ // Accumulate full text for output
655
+ // Accumulate full text for output
656
+ let fullText = '';
657
+ for await (const part of result.fullStream) {
658
+ if (part.type === 'text-delta') {
659
+ fullText += part.text;
917
660
  }
918
661
  }
919
662
 
920
- messages = truncateMessages(messages, maxHistory, maxConversationBytes);
921
- };
922
-
923
- while (iterations < maxIterations) {
924
- iterations++;
925
- if (abortSignal?.aborted) {
926
- throw new Error('Step canceled');
927
- }
928
- streamedThoughts = 0;
929
-
930
- // Apply context strategy to prevent unbounded growth
931
- await applyContextStrategy();
932
- const truncatedMessages = messages;
933
-
934
- const response = await adapter.chat(truncatedMessages, {
935
- model: resolvedModel,
936
- tools: llmTools.length > 0 ? llmTools : undefined,
937
- onStream: (chunk) => {
938
- if (!step.outputSchema) {
939
- handleStreamChunk(chunk);
940
- }
941
- },
942
- signal: abortSignal,
943
- responseSchema: step.outputSchema,
944
- });
945
-
946
663
  if (!step.outputSchema) {
947
664
  flushStream();
948
665
  }
949
666
 
950
- if (response.usage) {
951
- totalUsage.prompt_tokens += response.usage.prompt_tokens;
952
- totalUsage.completion_tokens += response.usage.completion_tokens;
953
- totalUsage.total_tokens += response.usage.total_tokens;
954
- }
955
-
956
- let { message } = response;
957
- if (typeof message.content === 'string' && message.content.length > 0) {
958
- const extracted = extractThoughtBlocks(message.content);
959
- if (extracted.content !== message.content) {
960
- message = { ...message, content: extracted.content };
961
- }
962
- if (streamedThoughts === 0) {
963
- for (const thought of extracted.thoughts) {
964
- emitThought(thought, 'thinking');
965
- }
966
- }
967
- }
968
- if (message.reasoning?.summary) {
969
- emitThought(message.reasoning.summary, 'reasoning');
970
- }
971
-
972
- messages.push(message);
973
-
974
- // 1. Check for native record_output tool call (forced by Anthropic adapter)
975
- const recordOutputCall = message.tool_calls?.find(
976
- (tc) => tc.function.name === 'record_output'
667
+ // Standardize history reconstruction using result.response
668
+ // AI SDK's result.response.messages contains the assistant/tool messages generated in this call.
669
+ // We merge them with our existing currentMessages to maintain full history across handoffs.
670
+ const response = await result.response;
671
+ const responseMessages = response.messages;
672
+ const newMessages = mapFromCoreMessages(responseMessages);
673
+
674
+ // Merge strategy: Keep all existing messages (user prompts + previous assistant/tool exchanges)
675
+ // and append new messages from this turn, avoiding duplicates by role/content matching
676
+ const existingNonSystem = currentMessages.filter((m) => m.role !== 'system');
677
+ const newNonDuplicate = newMessages.filter(
678
+ (nm) =>
679
+ !existingNonSystem.some(
680
+ (em) =>
681
+ em.role === nm.role &&
682
+ em.content === nm.content &&
683
+ em.tool_call_id === nm.tool_call_id
684
+ )
977
685
  );
978
- if (step.outputSchema && recordOutputCall) {
979
- let output: any;
980
- try {
981
- output =
982
- typeof recordOutputCall.function.arguments === 'string'
983
- ? JSON.parse(recordOutputCall.function.arguments)
984
- : recordOutputCall.function.arguments;
985
- return { status: 'success', output, usage: totalUsage };
986
- } catch (e) {
987
- logger.error(`Failed to parse native structured output: ${e}`);
988
- // Fall through to regular tool execution or retry if needed
989
- }
990
- }
991
-
992
- // 2. Handle direct output if no tool calls
993
- if (!message.tool_calls || message.tool_calls.length === 0) {
994
- let output: any = message.content;
686
+ currentMessages = [...existingNonSystem, ...newNonDuplicate];
995
687
 
996
- // If schema is defined, attempt to parse JSON
997
- if (step.outputSchema) {
998
- if (typeof output === 'string') {
999
- try {
1000
- output = extractJson(output);
1001
- } catch (e) {
1002
- const errorMessage = `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}`;
1003
- logger.error(` āš ļø ${errorMessage}. Retrying...`);
1004
-
1005
- messages.push({
1006
- role: 'user',
1007
- content: `Error: ${errorMessage}\n\nPlease correct your output to be valid JSON matching the schema.`,
1008
- });
1009
- continue;
1010
- }
1011
- }
1012
- }
688
+ const usageObj = await result.usage;
689
+ const totalUsage = {
690
+ prompt_tokens: usageObj?.inputTokens ?? 0,
691
+ completion_tokens: usageObj?.outputTokens ?? 0,
692
+ total_tokens: (usageObj?.inputTokens ?? 0) + (usageObj?.outputTokens ?? 0),
693
+ };
1013
694
 
695
+ if (requiresSuspend) {
1014
696
  return {
1015
- output,
1016
- status: 'success',
697
+ status: 'suspended',
698
+ output: { messages: currentMessages, ...suspendData },
1017
699
  usage: totalUsage,
1018
700
  };
1019
701
  }
1020
702
 
1021
- // 3. Execute tools
1022
- let pendingTransfer: Agent | null = null;
1023
- for (const toolCall of message.tool_calls) {
1024
- if (abortSignal?.aborted) {
1025
- throw new Error('Step canceled');
1026
- }
1027
- const argsStr = toolCall.function.arguments;
1028
- let displayArgs = '';
1029
- try {
1030
- const parsedArgs = JSON.parse(argsStr);
1031
- const keys = Object.keys(parsedArgs);
1032
- if (keys.length > 0) {
1033
- const formatted = JSON.stringify(parsedArgs);
1034
- displayArgs = formatted.length > 100 ? `${formatted.substring(0, 100)}...` : formatted;
1035
- }
1036
- } catch (e) {
1037
- displayArgs = argsStr.length > 100 ? `${argsStr.substring(0, 100)}...` : argsStr;
1038
- }
1039
-
1040
- logger.log(
1041
- ` šŸ› ļø Tool Call: ${toolCall.function.name}${displayArgs ? ` ${displayArgs}` : ''}`
1042
- );
1043
- const toolInfo = allTools.find((t) => t.name === toolCall.function.name);
1044
-
1045
- if (!toolInfo) {
1046
- if (toolCall.function.name === TRANSFER_TOOL_NAME) {
1047
- if (!step.allowedHandoffs || step.allowedHandoffs.length === 0) {
1048
- messages.push({
1049
- role: 'tool',
1050
- tool_call_id: toolCall.id,
1051
- name: TRANSFER_TOOL_NAME,
1052
- content: formatToolContent('Error: Agent handoffs are not enabled for this step.'),
1053
- });
1054
- continue;
1055
- }
1056
-
1057
- let args: { agent_name?: string };
1058
- try {
1059
- args = JSON.parse(toolCall.function.arguments);
1060
- } catch (e) {
1061
- messages.push({
1062
- role: 'tool',
1063
- tool_call_id: toolCall.id,
1064
- name: TRANSFER_TOOL_NAME,
1065
- content: formatToolContent(
1066
- `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`
1067
- ),
1068
- });
1069
- continue;
1070
- }
1071
-
1072
- if (!args.agent_name || typeof args.agent_name !== 'string') {
1073
- messages.push({
1074
- role: 'tool',
1075
- tool_call_id: toolCall.id,
1076
- name: TRANSFER_TOOL_NAME,
1077
- content: formatToolContent('Error: "agent_name" must be a string.'),
1078
- });
1079
- continue;
1080
- }
1081
-
1082
- if (!step.allowedHandoffs.includes(args.agent_name)) {
1083
- messages.push({
1084
- role: 'tool',
1085
- tool_call_id: toolCall.id,
1086
- name: TRANSFER_TOOL_NAME,
1087
- content: formatToolContent(
1088
- `Error: Agent "${args.agent_name}" is not allowed for this step.`
1089
- ),
1090
- });
1091
- continue;
1092
- }
1093
-
1094
- try {
1095
- const nextAgentPath = resolveAgentPath(args.agent_name, workflowDir);
1096
- const nextAgent = parseAgent(nextAgentPath);
1097
- pendingTransfer = nextAgent;
1098
- logger.log(` šŸ” Handoff: ${activeAgent.name} → ${args.agent_name}`);
1099
- messages.push({
1100
- role: 'tool',
1101
- tool_call_id: toolCall.id,
1102
- name: TRANSFER_TOOL_NAME,
1103
- content: formatToolContent(`Transferred to agent ${args.agent_name}.`),
1104
- });
1105
- } catch (error) {
1106
- messages.push({
1107
- role: 'tool',
1108
- tool_call_id: toolCall.id,
1109
- name: TRANSFER_TOOL_NAME,
1110
- content: formatToolContent(
1111
- `Error: ${error instanceof Error ? error.message : String(error)}`
1112
- ),
1113
- });
1114
- }
1115
- continue;
1116
- }
1117
-
1118
- if (toolCall.function.name === 'ask' && step.allowClarification) {
1119
- let args: { question: string };
1120
- try {
1121
- args = JSON.parse(toolCall.function.arguments);
1122
- } catch (e) {
1123
- messages.push({
1124
- role: 'tool',
1125
- tool_call_id: toolCall.id,
1126
- name: 'ask',
1127
- content: formatToolContent(
1128
- `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`
1129
- ),
1130
- });
1131
- continue;
1132
- }
1133
-
1134
- if (process.stdin.isTTY) {
1135
- // In TTY, we can use a human step to get the answer immediately
1136
- logger.log(`\nšŸ¤” Question from ${activeAgent.name}: ${args.question}`);
1137
- const result = await executeStepFn(
1138
- {
1139
- id: `${step.id}-clarify`,
1140
- type: 'human',
1141
- message: args.question,
1142
- inputType: 'text',
1143
- } as Step,
1144
- context
1145
- );
1146
-
1147
- messages.push({
1148
- role: 'tool',
1149
- tool_call_id: toolCall.id,
1150
- name: 'ask',
1151
- content: formatToolContent(String(result.output)),
1152
- });
1153
- continue;
1154
- }
1155
- // In non-TTY, we suspend
1156
- await applyContextStrategy();
1157
- return {
1158
- status: 'suspended',
1159
- output: {
1160
- messages,
1161
- question: args.question,
1162
- },
1163
- usage: totalUsage,
1164
- };
1165
- }
703
+ if (pendingTransfer) {
704
+ activeAgent = pendingTransfer;
705
+ logger.log(` šŸ” Handoff: Switching to agent ${activeAgent.name}`);
706
+ // Loop continues with new agent and updated history
707
+ continue;
708
+ }
1166
709
 
1167
- messages.push({
1168
- role: 'tool',
1169
- tool_call_id: toolCall.id,
1170
- name: toolCall.function.name,
1171
- content: formatToolContent(`Error: Tool ${toolCall.function.name} not found`),
1172
- });
1173
- continue;
1174
- }
710
+ // If no transfer, we are done.
1175
711
 
1176
- let args: Record<string, unknown>;
712
+ // Handle Output Schema parsing if needed
713
+ let output: any = fullText;
714
+ if (step.outputSchema) {
1177
715
  try {
1178
- args = JSON.parse(toolCall.function.arguments);
716
+ output = extractJson(fullText);
1179
717
  } catch (e) {
1180
- messages.push({
1181
- role: 'tool',
1182
- tool_call_id: toolCall.id,
1183
- name: toolCall.function.name,
1184
- content: formatToolContent(
1185
- `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`
1186
- ),
1187
- });
1188
- continue;
1189
- }
1190
-
1191
- if (toolInfo.source === 'mcp' && toolInfo.mcpClient) {
1192
- try {
1193
- const result = await toolInfo.mcpClient.callTool(toolInfo.name, args);
1194
- messages.push({
1195
- role: 'tool',
1196
- tool_call_id: toolCall.id,
1197
- name: toolCall.function.name,
1198
- content: formatToolContent(safeJsonStringify(applyContextUpdate(result))),
1199
- });
1200
- } catch (error) {
1201
- messages.push({
1202
- role: 'tool',
1203
- tool_call_id: toolCall.id,
1204
- name: toolCall.function.name,
1205
- content: formatToolContent(
1206
- `Error: ${error instanceof Error ? error.message : String(error)}`
1207
- ),
1208
- });
1209
- }
1210
- } else if (toolInfo.execution) {
1211
- // Security validation for standard tools
1212
- if (toolInfo.source === 'standard') {
1213
- try {
1214
- validateStandardToolSecurity(toolInfo.name, args, {
1215
- allowOutsideCwd: step.allowOutsideCwd,
1216
- allowInsecure: step.allowInsecure,
1217
- });
1218
- } catch (error) {
1219
- messages.push({
1220
- role: 'tool',
1221
- tool_call_id: toolCall.id,
1222
- name: toolCall.function.name,
1223
- content: formatToolContent(
1224
- `Security Error: ${error instanceof Error ? error.message : String(error)}`
1225
- ),
1226
- });
1227
- continue;
1228
- }
1229
- }
1230
-
1231
- // Execute the tool as a step
1232
- const toolContext: ExpressionContext = {
1233
- ...context,
1234
- args, // Use args to pass parameters to tool execution
1235
- };
1236
-
1237
- const result = await executeStepFn(toolInfo.execution, toolContext);
1238
- const toolOutput =
1239
- result.status === 'success'
1240
- ? safeJsonStringify(applyContextUpdate(result.output))
1241
- : `Error: ${result.error}`;
1242
-
1243
- messages.push({
1244
- role: 'tool',
1245
- tool_call_id: toolCall.id,
1246
- name: toolCall.function.name,
1247
- content: formatToolContent(toolOutput),
1248
- });
718
+ logger.error(
719
+ ' āš ļø Failed to parse output as JSON. Retrying not implemented in simple refactor.'
720
+ );
1249
721
  }
1250
722
  }
1251
723
 
1252
- if (pendingTransfer) {
1253
- applyAgentTransfer(pendingTransfer);
1254
- }
1255
-
1256
- await applyContextStrategy();
724
+ return {
725
+ status: 'success',
726
+ output,
727
+ usage: totalUsage,
728
+ };
1257
729
  }
1258
-
1259
- throw new Error('Max ReAct iterations reached');
1260
730
  } finally {
1261
- // Cleanup LOCAL MCP clients only. Shared clients are managed by MCPManager.
1262
731
  for (const client of localMcpClients) {
1263
732
  client.stop();
1264
733
  }