keystone-cli 2.0.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +30 -4
  2. package/package.json +17 -3
  3. package/src/cli.ts +3 -2
  4. package/src/commands/event.ts +9 -0
  5. package/src/commands/run.ts +17 -0
  6. package/src/db/dynamic-state-manager.ts +12 -9
  7. package/src/db/memory-db.test.ts +19 -1
  8. package/src/db/memory-db.ts +101 -22
  9. package/src/db/workflow-db.ts +181 -9
  10. package/src/expression/evaluator.ts +4 -1
  11. package/src/parser/schema.ts +2 -1
  12. package/src/runner/__test__/llm-test-setup.ts +43 -11
  13. package/src/runner/durable-timers.test.ts +1 -1
  14. package/src/runner/executors/dynamic-executor.ts +125 -88
  15. package/src/runner/executors/engine-executor.ts +10 -39
  16. package/src/runner/executors/file-executor.ts +38 -0
  17. package/src/runner/executors/foreach-executor.ts +170 -17
  18. package/src/runner/executors/human-executor.ts +18 -0
  19. package/src/runner/executors/llm/stream-handler.ts +103 -0
  20. package/src/runner/executors/llm/tool-manager.ts +342 -0
  21. package/src/runner/executors/llm-executor.ts +313 -550
  22. package/src/runner/executors/memory-executor.ts +41 -34
  23. package/src/runner/executors/shell-executor.ts +141 -54
  24. package/src/runner/executors/subworkflow-executor.ts +16 -0
  25. package/src/runner/executors/types.ts +3 -1
  26. package/src/runner/executors/verification_fixes.test.ts +46 -0
  27. package/src/runner/join-scheduling.test.ts +2 -1
  28. package/src/runner/llm-adapter.integration.test.ts +10 -5
  29. package/src/runner/llm-adapter.ts +46 -17
  30. package/src/runner/llm-clarification.test.ts +4 -1
  31. package/src/runner/llm-executor.test.ts +21 -7
  32. package/src/runner/mcp-client.ts +36 -2
  33. package/src/runner/mcp-server.ts +65 -36
  34. package/src/runner/memoization.test.ts +2 -2
  35. package/src/runner/recovery-security.test.ts +5 -2
  36. package/src/runner/reflexion.test.ts +6 -3
  37. package/src/runner/services/context-builder.ts +13 -4
  38. package/src/runner/services/workflow-validator.ts +2 -1
  39. package/src/runner/shell-executor.test.ts +107 -1
  40. package/src/runner/standard-tools-ast.test.ts +4 -2
  41. package/src/runner/standard-tools-execution.test.ts +14 -1
  42. package/src/runner/standard-tools-integration.test.ts +6 -0
  43. package/src/runner/standard-tools.ts +13 -10
  44. package/src/runner/step-executor.ts +2 -2
  45. package/src/runner/tool-integration.test.ts +4 -1
  46. package/src/runner/workflow-runner.test.ts +23 -12
  47. package/src/runner/workflow-runner.ts +174 -85
  48. package/src/runner/workflow-state.ts +186 -111
  49. package/src/ui/dashboard.tsx +17 -3
  50. package/src/utils/config-loader.ts +4 -0
  51. package/src/utils/constants.ts +4 -0
  52. package/src/utils/context-injector.test.ts +27 -27
  53. package/src/utils/context-injector.ts +68 -26
  54. package/src/utils/process-sandbox.ts +138 -148
  55. package/src/utils/redactor.ts +39 -9
  56. package/src/utils/resource-loader.ts +24 -19
  57. package/src/utils/sandbox.ts +6 -0
  58. package/src/utils/stream-utils.ts +58 -0
@@ -1,29 +1,23 @@
1
- import { tool as createTool, jsonSchema, streamText } from 'ai';
2
- import type { TextPart, ToolCallPart, ToolResultPart } from 'ai';
3
- import { z } from 'zod';
1
+ import { streamText } from 'ai';
4
2
  import type { ExpressionContext } from '../../expression/evaluator';
5
3
  import { ExpressionEvaluator } from '../../expression/evaluator';
6
4
  import { parseAgent, resolveAgentPath } from '../../parser/agent-parser';
7
- import type { Agent, LlmStep, Step } from '../../parser/schema';
8
- import { ConfigLoader } from '../../utils/config-loader';
9
- import { LIMITS, LLM } from '../../utils/constants';
5
+ import type { LlmStep, Step } from '../../parser/schema';
6
+ import { ITERATIONS, LIMITS } from '../../utils/constants';
10
7
  import { ContextInjector } from '../../utils/context-injector';
11
8
  import { extractJson } from '../../utils/json-parser';
12
9
  import { ConsoleLogger, type Logger } from '../../utils/logger.ts';
13
10
  import { RedactionBuffer, Redactor } from '../../utils/redactor';
14
11
  import type { WorkflowEvent } from '../events.ts';
15
12
  import * as llmAdapter from '../llm-adapter';
16
- import type { LLMMessage, LLMResponse } from '../llm-adapter';
17
- import { MCPClient } from '../mcp-client';
18
- import type { MCPManager, MCPServerConfig } from '../mcp-manager';
19
- import { STANDARD_TOOLS, validateStandardToolSecurity } from '../standard-tools';
13
+ import type { LLMMessage } from '../llm-adapter';
14
+ import type { MCPManager } from '../mcp-manager';
15
+ import { StreamHandler } from './llm/stream-handler';
16
+ import { ToolManager } from './llm/tool-manager';
20
17
  import type { StepResult } from './types.ts';
21
18
 
22
19
  // --- AI SDK Message Types ---
23
- // These types mirror the AI SDK's CoreMessage structure for type safety
24
- // without tightly coupling to AI SDK internals that may change between versions.
25
- // The types are intentionally permissive to handle various AI SDK part types.
26
-
20
+ // (Keep types for mapping)
27
21
  interface CoreTextPart {
28
22
  type: 'text';
29
23
  text: string;
@@ -33,26 +27,18 @@ interface CoreToolCallPart {
33
27
  type: 'tool-call';
34
28
  toolCallId: string;
35
29
  toolName: string;
36
- args?: unknown;
37
- input?: unknown;
30
+ args: any;
38
31
  }
39
32
 
40
33
  interface CoreToolResultPart {
41
34
  type: 'tool-result';
42
35
  toolCallId: string;
43
36
  toolName: string;
44
- result: unknown;
45
- output?: unknown;
46
- }
47
-
48
- // Additional AI SDK part types we want to handle gracefully
49
- interface CoreOtherPart {
50
- type: string;
51
- [key: string]: unknown;
37
+ result: any;
38
+ isError?: boolean;
52
39
  }
53
40
 
54
- type CoreContentPart = CoreTextPart | CoreToolCallPart | CoreToolResultPart | CoreOtherPart;
55
- type CoreMessageContent = string | CoreContentPart[];
41
+ type CoreContentPart = CoreTextPart | CoreToolCallPart | CoreToolResultPart;
56
42
 
57
43
  interface CoreSystemMessage {
58
44
  role: 'system';
@@ -66,216 +52,112 @@ interface CoreUserMessage {
66
52
 
67
53
  interface CoreAssistantMessage {
68
54
  role: 'assistant';
69
- content: CoreMessageContent;
70
- toolCalls?: ToolCallPart[];
55
+ content: string | CoreContentPart[];
71
56
  }
72
57
 
73
58
  interface CoreToolMessage {
74
59
  role: 'tool';
75
- content: CoreContentPart[];
60
+ content: CoreToolResultPart[];
76
61
  }
77
62
 
78
63
  type CoreMessage = CoreSystemMessage | CoreUserMessage | CoreAssistantMessage | CoreToolMessage;
79
64
 
80
- // Re-export for local use with shorter names
81
- const { THINKING_OPEN_TAG, THINKING_CLOSE_TAG, TRANSFER_TOOL_NAME, CONTEXT_UPDATE_KEY } = LLM;
82
-
83
65
  type LlmEventContext = {
84
66
  runId?: string;
85
67
  workflow?: string;
86
68
  };
87
69
 
88
- // --- Helper Parser Logic (Kept from original) ---
89
-
90
- class ThoughtStreamParser {
91
- private buffer = '';
92
- private thoughtBuffer = '';
93
- private inThinking = false;
94
-
95
- process(chunk: string): { output: string; thoughts: string[] } {
96
- this.buffer += chunk;
97
- const thoughts: string[] = [];
98
- let output = '';
99
-
100
- while (this.buffer.length > 0) {
101
- const lower = this.buffer.toLowerCase();
102
- if (!this.inThinking) {
103
- const openIndex = lower.indexOf(THINKING_OPEN_TAG);
104
- if (openIndex === -1) {
105
- const keep = Math.max(0, this.buffer.length - (THINKING_OPEN_TAG.length - 1));
106
- output += this.buffer.slice(0, keep);
107
- this.buffer = this.buffer.slice(keep);
108
- break;
109
- }
110
- output += this.buffer.slice(0, openIndex);
111
- this.buffer = this.buffer.slice(openIndex + THINKING_OPEN_TAG.length);
112
- this.inThinking = true;
113
- continue;
114
- }
115
-
116
- const closeIndex = lower.indexOf(THINKING_CLOSE_TAG);
117
- if (closeIndex === -1) {
118
- const keep = Math.max(0, this.buffer.length - (THINKING_CLOSE_TAG.length - 1));
119
- this.thoughtBuffer += this.buffer.slice(0, keep);
120
- this.buffer = this.buffer.slice(keep);
121
- break;
122
- }
123
- this.thoughtBuffer += this.buffer.slice(0, closeIndex);
124
- this.buffer = this.buffer.slice(closeIndex + THINKING_CLOSE_TAG.length);
125
- this.inThinking = false;
126
- const thought = this.thoughtBuffer.trim();
127
- if (thought) {
128
- thoughts.push(thought);
129
- }
130
- this.thoughtBuffer = '';
131
- }
132
-
133
- return { output, thoughts };
134
- }
135
-
136
- flush(): { output: string; thoughts: string[] } {
137
- const thoughts: string[] = [];
138
- let output = '';
139
-
140
- if (this.inThinking) {
141
- this.thoughtBuffer += this.buffer;
142
- const thought = this.thoughtBuffer.trim();
143
- if (thought) {
144
- thoughts.push(thought);
70
+ // --- Mappers ---
71
+ function mapToCoreMessages(messages: LLMMessage[]): any[] {
72
+ const coreMessages = messages.map((m) => {
73
+ if (m.role === 'user') return { role: 'user', content: m.content || '' };
74
+ if (m.role === 'assistant') {
75
+ const toolCalls = m.tool_calls || [];
76
+ if (toolCalls.length === 0) {
77
+ return { role: 'assistant', content: m.content || '' };
145
78
  }
146
- } else {
147
- output = this.buffer;
79
+ return {
80
+ role: 'assistant',
81
+ content: [
82
+ ...(m.content ? [{ type: 'text' as const, text: m.content }] : []),
83
+ ...toolCalls.map((tc) => ({
84
+ type: 'tool-call' as const,
85
+ toolCallId: tc.id || 'missing-id',
86
+ toolName: tc.function.name || 'missing-name',
87
+ args:
88
+ typeof tc.function.arguments === 'string'
89
+ ? JSON.parse(tc.function.arguments || '{}')
90
+ : tc.function.arguments || {},
91
+ input:
92
+ typeof tc.function.arguments === 'string'
93
+ ? JSON.parse(tc.function.arguments || '{}')
94
+ : tc.function.arguments || {},
95
+ arguments: tc.function.arguments || {},
96
+ })),
97
+ ],
98
+ };
148
99
  }
100
+ if (m.role === 'tool') {
101
+ const content = m.content;
102
+ let outputPart: { type: 'text'; value: string } | { type: 'json'; value: any };
149
103
 
150
- this.buffer = '';
151
- this.thoughtBuffer = '';
152
- this.inThinking = false;
153
- return { output, thoughts };
154
- }
155
- }
156
-
157
- function safeJsonStringify(value: unknown): string {
158
- try {
159
- return JSON.stringify(value);
160
- } catch {
161
- const seen = new WeakSet<object>();
162
- try {
163
- return JSON.stringify(value, (_key, val) => {
164
- if (typeof val === 'bigint') return val.toString();
165
- if (typeof val === 'object' && val !== null) {
166
- if (seen.has(val)) return '[Circular]';
167
- seen.add(val);
104
+ if (typeof content === 'string') {
105
+ try {
106
+ const parsed = JSON.parse(content);
107
+ outputPart = { type: 'json', value: parsed };
108
+ } catch {
109
+ outputPart = { type: 'text', value: content };
168
110
  }
169
- return val;
170
- });
171
- } catch {
172
- return String(value);
173
- }
174
- }
175
- }
176
-
177
- /**
178
- * Maps Keystone LLMMessage to AI SDK CoreMessage
179
- */
180
- function mapToCoreMessages(messages: LLMMessage[]): CoreMessage[] {
181
- return messages.map((m) => {
182
- if (m.role === 'user') {
183
- return { role: 'user', content: m.content || '' };
184
- }
185
- if (m.role === 'assistant') {
186
- if (m.tool_calls && m.tool_calls.length > 0) {
187
- const toolCalls: ToolCallPart[] = m.tool_calls.map((tc) => ({
188
- type: 'tool-call',
189
- toolCallId: tc.id,
190
- toolName: tc.function.name,
191
- input: JSON.parse(tc.function.arguments),
192
- }));
193
- return { role: 'assistant', content: m.content || '', toolCalls };
111
+ } else {
112
+ outputPart = { type: 'json', value: content || {} };
194
113
  }
195
- return { role: 'assistant', content: m.content || '' };
196
- }
197
- if (m.role === 'tool') {
114
+
198
115
  return {
199
116
  role: 'tool',
200
117
  content: [
201
118
  {
202
119
  type: 'tool-result',
203
- toolCallId: m.tool_call_id || '',
204
- toolName: m.name || '',
205
- result: m.content || '',
206
- },
120
+ toolCallId: m.tool_call_id || 'missing-id',
121
+ toolName: m.name || 'missing-name',
122
+ output: outputPart,
123
+ } as any,
207
124
  ],
208
125
  };
209
126
  }
210
- // Default to system
127
+ // Handle system or unknown roles
211
128
  return { role: 'system', content: m.content || '' };
212
129
  });
130
+ return coreMessages;
213
131
  }
214
132
 
133
+ // --- Helper Functions ---
134
+
215
135
  /**
216
- * Maps AI SDK CoreMessage to Keystone LLMMessage.
217
- * Accepts readonly unknown[] to handle AI SDK ResponseMessage[] which varies by SDK version.
136
+ * Prunes the message history to the last N messages, ensuring that tool calls and tool results
137
+ * are kept together.
218
138
  */
219
- function mapFromCoreMessages(messages: readonly unknown[]): LLMMessage[] {
220
- const keystoneMessages: LLMMessage[] = [];
221
- for (const rawMsg of messages) {
222
- // Type guard for message structure
223
- const msg = rawMsg as { role: string; content?: unknown };
224
- if (msg.role === 'assistant') {
225
- const rawContent = msg.content;
226
- const contentArray = Array.isArray(rawContent)
227
- ? rawContent
228
- : [{ type: 'text', text: String(rawContent || '') }];
229
- const textPart = contentArray.find(
230
- (p: { type?: string; text?: string }) => p.type === 'text'
231
- );
232
- const keystoneMsg: LLMMessage = {
233
- role: 'assistant',
234
- content: textPart?.text || '',
235
- };
236
- const toolCalls = contentArray.filter((p: { type?: string }) => p.type === 'tool-call');
237
- if (toolCalls.length > 0) {
238
- keystoneMsg.tool_calls = toolCalls.map(
239
- (tc: { toolCallId?: string; toolName?: string; args?: unknown; input?: unknown }) => ({
240
- id: tc.toolCallId || '',
241
- type: 'function' as const,
242
- function: {
243
- name: tc.toolName || '',
244
- arguments:
245
- typeof tc.args === 'string' ? tc.args : JSON.stringify(tc.args || tc.input || {}),
246
- },
247
- })
248
- );
249
- }
250
- keystoneMessages.push(keystoneMsg);
251
- } else if (msg.role === 'tool') {
252
- const rawContent = msg.content;
253
- const contentArray = Array.isArray(rawContent) ? rawContent : [];
254
- for (const part of contentArray) {
255
- const typedPart = part as {
256
- type?: string;
257
- toolCallId?: string;
258
- toolName?: string;
259
- result?: unknown;
260
- output?: unknown;
261
- };
262
- if (typedPart.type === 'tool-result') {
263
- keystoneMessages.push({
264
- role: 'tool',
265
- tool_call_id: typedPart.toolCallId,
266
- name: typedPart.toolName,
267
- content:
268
- typeof typedPart.result === 'string'
269
- ? typedPart.result
270
- : JSON.stringify(typedPart.result || typedPart.output || ''),
271
- });
272
- }
273
- }
274
- } else if (msg.role === 'user') {
275
- keystoneMessages.push({ role: 'user', content: String(msg.content || '') });
276
- }
139
+ export function pruneMessages(messages: LLMMessage[], maxHistory: number): LLMMessage[] {
140
+ if (messages.length <= maxHistory) {
141
+ return messages;
142
+ }
143
+
144
+ let startIndex = messages.length - maxHistory;
145
+
146
+ // Loop to backtrack if we landed on a tool message
147
+ while (startIndex > 0 && messages[startIndex].role === 'tool') {
148
+ startIndex--;
277
149
  }
278
- return keystoneMessages;
150
+
151
+ // Check if we landed on a valid parent (Assistant with tool_calls)
152
+ const candidate = messages[startIndex];
153
+ if (candidate.role === 'assistant' && candidate.tool_calls && candidate.tool_calls.length > 0) {
154
+ // Found the parent, include it and everything after
155
+ return messages.slice(startIndex);
156
+ }
157
+
158
+ // Fallback to naive slicing if we can't find a clean parent connection
159
+ // (This matches current behavior for edge cases, preventing regressions in weird states)
160
+ return messages.slice(messages.length - maxHistory);
279
161
  }
280
162
 
281
163
  // --- Main Execution Logic ---
@@ -294,33 +176,17 @@ export async function executeLlmStep(
294
176
  const agentName = ExpressionEvaluator.evaluateString(step.agent, context);
295
177
  const agentPath = resolveAgentPath(agentName, workflowDir);
296
178
  let activeAgent = parseAgent(agentPath);
297
-
298
- const providerRaw = step.provider || activeAgent.provider;
299
- const modelRaw = step.model || activeAgent.model || 'gpt-4o';
300
-
301
- const provider = providerRaw
302
- ? ExpressionEvaluator.evaluateString(providerRaw, context)
303
- : undefined;
304
- const model = ExpressionEvaluator.evaluateString(modelRaw, context);
305
179
  const prompt = ExpressionEvaluator.evaluateString(step.prompt, context);
306
180
 
307
- const fullModelString = provider ? `${provider}:${model}` : model;
308
-
309
- // NOTE: getModel is the new AI SDK factory
310
- const languageModel = await llmAdapter.getModel(fullModelString);
311
-
312
181
  // Redaction setup
313
182
  const redactor = new Redactor(context.secrets || {}, {
314
183
  forcedSecrets: context.secretValues || [],
315
184
  });
316
185
  const redactionBuffer = new RedactionBuffer(redactor);
317
- const thoughtStream = step.outputSchema ? null : new ThoughtStreamParser();
186
+ const streamHandler = step.outputSchema ? null : new StreamHandler(logger);
318
187
  const eventTimestamp = () => new Date().toISOString();
319
188
 
320
189
  const emitThought = (content: string, source: 'thinking' | 'reasoning') => {
321
- const trimmed = redactor.redact(content.trim());
322
- if (!trimmed) return;
323
- logger.info(`💭 Thought (${source}): ${trimmed}`);
324
190
  if (emitEvent && eventContext?.runId && eventContext?.workflow) {
325
191
  emitEvent({
326
192
  type: 'llm.thought',
@@ -328,7 +194,7 @@ export async function executeLlmStep(
328
194
  runId: eventContext.runId,
329
195
  workflow: eventContext.workflow,
330
196
  stepId: step.id,
331
- content: trimmed,
197
+ content,
332
198
  source,
333
199
  });
334
200
  }
@@ -336,44 +202,21 @@ export async function executeLlmStep(
336
202
 
337
203
  const handleStreamChunk = (chunk: string) => {
338
204
  const redactedChunk = redactionBuffer.process(chunk);
339
- if (!thoughtStream) {
205
+ if (!streamHandler) {
340
206
  process.stdout.write(redactedChunk);
341
207
  return;
342
208
  }
343
- const parsed = thoughtStream.process(redactedChunk);
344
- if (parsed.output) {
345
- process.stdout.write(parsed.output);
209
+ const { text, thoughts } = streamHandler.processChunk(redactedChunk);
210
+ if (text) {
211
+ process.stdout.write(text);
346
212
  }
347
- for (const thought of parsed.thoughts) {
348
- emitThought(thought, 'thinking');
349
- }
350
- };
351
-
352
- const flushStream = () => {
353
- const flushed = redactionBuffer.flush();
354
- if (!thoughtStream) {
355
- process.stdout.write(flushed);
356
- return;
357
- }
358
- const parsed = thoughtStream.process(flushed);
359
- if (parsed.output) {
360
- process.stdout.write(parsed.output);
361
- }
362
- for (const thought of parsed.thoughts) {
363
- emitThought(thought, 'thinking');
364
- }
365
- const final = thoughtStream.flush();
366
- if (final.output) {
367
- process.stdout.write(final.output);
368
- }
369
- for (const thought of final.thoughts) {
213
+ for (const thought of thoughts) {
370
214
  emitThought(thought, 'thinking');
371
215
  }
372
216
  };
373
217
 
374
218
  // State for Agent Handoff Loop
375
219
  let currentMessages: LLMMessage[] = [];
376
- // Initial User Message
377
220
  currentMessages.push({ role: 'user', content: prompt });
378
221
 
379
222
  // Handle Resume
@@ -384,24 +227,31 @@ export async function executeLlmStep(
384
227
  const resumeOutput = (stepState?.output as any)?.messages ? stepState?.output : context.output;
385
228
  if (resumeOutput && typeof resumeOutput === 'object' && 'messages' in resumeOutput) {
386
229
  const resumedMsgs = resumeOutput.messages as LLMMessage[];
387
- // Filter out system messages as we rebuild system prompt each turn
388
230
  currentMessages = resumedMsgs.filter((m) => m.role !== 'system');
389
231
  }
390
232
 
391
- // MCP Client tracking for cleanup
392
- const localMcpClients: MCPClient[] = [];
233
+ const totalUsage = { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
234
+ let handoffCount = 0;
393
235
 
394
236
  try {
395
- // Agent Handoff Loop: We manually loop here (instead of relying solely on SDK's maxSteps)
396
- // because Agent Handoffs require dynamically swapping the system prompt and tool set
397
- // when the LLM calls transfer_to_agent. The SDK's maxSteps only handles tool call
398
- // round-trips within a single agent context; it cannot swap the entire agent mid-execution.
399
237
  while (true) {
400
238
  if (abortSignal?.aborted) throw new Error('Step canceled');
401
239
 
240
+ // Update model based on current active agent
241
+ const providerRaw = step.provider || activeAgent.provider;
242
+ const modelRaw = step.model || activeAgent.model || 'gpt-4o';
243
+
244
+ const provider = providerRaw
245
+ ? ExpressionEvaluator.evaluateString(providerRaw, context)
246
+ : undefined;
247
+ const model = ExpressionEvaluator.evaluateString(modelRaw, context);
248
+ const fullModelString = provider ? `${provider}:${model}` : model;
249
+
250
+ const languageModel = await llmAdapter.getModel(fullModelString);
251
+
402
252
  // Build System Prompt
403
253
  let systemPrompt = ExpressionEvaluator.evaluateString(activeAgent.systemPrompt, context);
404
- const projectContext = ContextInjector.getContext(workflowDir || process.cwd(), []);
254
+ const projectContext = await ContextInjector.getContext(workflowDir || process.cwd(), []);
405
255
  const contextAddition = ContextInjector.generateSystemPromptAddition(projectContext);
406
256
  if (contextAddition) {
407
257
  systemPrompt = `${contextAddition}\n\n${systemPrompt}`;
@@ -410,326 +260,239 @@ export async function executeLlmStep(
410
260
  systemPrompt += `\n\nIMPORTANT: You must output valid JSON that matches the following schema:\n${JSON.stringify(step.outputSchema, null, 2)}`;
411
261
  }
412
262
 
413
- // Tool Registration
414
- const aiTools: Record<string, any> = {};
415
- let pendingTransfer: Agent | null = null;
416
- let requiresSuspend = false;
417
- let suspendData: any = null;
418
-
419
- const registerTool = (
420
- name: string,
421
- description: string | undefined,
422
- parameters: any,
423
- execute: (args: any, context: { toolCallId: string }) => Promise<any>
424
- ) => {
425
- // Validate parameters is a valid JSON Schema object
426
- if (!parameters || typeof parameters !== 'object' || Array.isArray(parameters)) {
427
- throw new Error(`Invalid parameters for tool ${name}: must be a JSON Schema object.`);
428
- }
263
+ // Tool Management
264
+ const toolManager = new ToolManager({
265
+ step,
266
+ context,
267
+ logger,
268
+ mcpManager,
269
+ workflowDir,
270
+ abortSignal,
271
+ });
429
272
 
430
- // Safety: Ensure additionalProperties is false for object types if not specified
431
- // This prevents the LLM from hallucinating arguments that are not in the schema
432
- const safeParameters = { ...parameters };
433
- if (
434
- safeParameters.type === 'object' &&
435
- safeParameters.properties &&
436
- safeParameters.additionalProperties === undefined
437
- ) {
438
- safeParameters.additionalProperties = false;
439
- }
273
+ const aiTools = await toolManager.registerTools(activeAgent, executeStepFn);
440
274
 
441
- aiTools[name] = (createTool as any)({
442
- description,
443
- parameters: jsonSchema(safeParameters),
444
- execute: async (args: any, { toolCallId }: { toolCallId: string }) => {
445
- logger.log(
446
- ` 🛠️ Tool Call: ${name}${Object.keys(args).length ? ` ${safeJsonStringify(args)}` : ''}`
447
- );
448
- try {
449
- return await execute(args, { toolCallId });
450
- } catch (err) {
451
- const errMsg = err instanceof Error ? err.message : String(err);
452
- logger.error(` ✗ Tool Error (${name}): ${errMsg}`);
453
- return { error: errMsg }; // Return as object for AI SDK
454
- }
455
- },
456
- });
457
- };
275
+ const maxIterations = step.maxIterations || 10;
276
+ let fullText = '';
277
+ let result: any;
458
278
 
459
- const applyContextUpdate = (value: unknown): unknown => {
460
- if (!value || typeof value !== 'object' || Array.isArray(value)) return value;
461
- const record = value as Record<string, unknown>;
462
- if (!(CONTEXT_UPDATE_KEY in record)) return value;
463
-
464
- const update = record[CONTEXT_UPDATE_KEY] as
465
- | { env?: Record<string, string>; memory?: Record<string, unknown> }
466
- | undefined;
467
- if (update?.env) {
468
- context.env = context.env || {};
469
- Object.assign(context.env, update.env);
470
- }
471
- if (update?.memory) {
472
- context.memory = context.memory || {};
473
- Object.assign(context.memory, update.memory);
474
- }
475
- const { [CONTEXT_UPDATE_KEY]: _ignored, ...cleaned } = record;
476
- return cleaned;
477
- };
279
+ let globalHasError = false;
280
+ for (let iterations = 1; iterations <= maxIterations; iterations++) {
281
+ if (toolManager.pendingTransfer) break;
478
282
 
479
- // 1. Agent Tools
480
- for (const tool of activeAgent.tools) {
481
- registerTool(tool.name, tool.description, tool.parameters, async (args) => {
482
- if (tool.execution) {
483
- const toolContext = { ...context, args };
484
- const result = await executeStepFn(tool.execution, toolContext);
485
- return result.status === 'success'
486
- ? applyContextUpdate(result.output)
487
- : `Error: ${result.error}`;
488
- }
489
- return `Error: Tool ${tool.name} has no implementation.`;
490
- });
491
- }
283
+ logger.debug(`[llm-executor] --- Turn ${iterations} ---`);
492
284
 
493
- // 2. Step Tools & Standard Tools
494
- const extraTools = [...(step.tools || []), ...(step.useStandardTools ? STANDARD_TOOLS : [])];
495
- for (const tool of extraTools) {
496
- // Check valid standard tool security
497
- if (!step.tools?.includes(tool as any)) {
498
- // It is a standard tool
499
- // Wrap execution with security check
500
- registerTool(tool.name, tool.description, tool.parameters, async (args) => {
501
- validateStandardToolSecurity(tool.name, args, {
502
- allowOutsideCwd: step.allowOutsideCwd,
503
- allowInsecure: step.allowInsecure,
504
- });
505
- if (tool.execution) {
506
- const toolContext = { ...context, args };
507
- const result = await executeStepFn(tool.execution, toolContext);
508
- return result.status === 'success'
509
- ? applyContextUpdate(result.output)
510
- : `Error: ${result.error}`;
511
- }
512
- return 'Error: No execution defined';
513
- });
514
- } else {
515
- // Custom step tool
516
- registerTool(tool.name, tool.description, tool.parameters, async (args) => {
517
- if (tool.execution) {
518
- const toolContext = { ...context, args };
519
- const result = await executeStepFn(tool.execution, toolContext);
520
- return result.status === 'success'
521
- ? applyContextUpdate(result.output)
522
- : `Error: ${result.error}`;
523
- }
524
- return 'Error: No execution defined';
525
- });
285
+ // Enforce maxMessageHistory to preventing context window exhaustion
286
+ let messagesForTurn = currentMessages;
287
+ if (step.maxMessageHistory && currentMessages.length > step.maxMessageHistory) {
288
+ // Keep the last N messages (with robust pruning to keep tool pairs together)
289
+ messagesForTurn = pruneMessages(currentMessages, step.maxMessageHistory);
290
+ logger.debug(
291
+ ` ✂️ Pruned context to last ${messagesForTurn.length} messages (maxHistory=${step.maxMessageHistory})`
292
+ );
526
293
  }
527
- }
528
294
 
529
- // 3. MCP Tools
530
- // (Logic to connect MCP servers same as before, simplified for brevity)
531
- const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
532
- if (step.useGlobalMcp && mcpManager) {
533
- const globalServers = mcpManager.getGlobalServers();
534
- for (const s of globalServers) {
535
- if (
536
- !mcpServersToConnect.some(
537
- (existing) => (typeof existing === 'string' ? existing : existing.name) === s.name
538
- )
539
- ) {
540
- mcpServersToConnect.push(s);
295
+ const coreMessages = mapToCoreMessages(messagesForTurn);
296
+
297
+ try {
298
+ result = await streamText({
299
+ model: languageModel,
300
+ system: systemPrompt,
301
+ messages: coreMessages,
302
+ tools: aiTools,
303
+ toolChoice: 'auto',
304
+ abortSignal,
305
+ } as any);
306
+ } catch (e) {
307
+ const errMsg = e instanceof Error ? e.message : String(e);
308
+ logger.error(`[llm-executor] T${iterations} Error: ${errMsg}`);
309
+ fullText = fullText || `Error: ${errMsg}`;
310
+
311
+ if (errMsg.includes('No output generated')) {
312
+ fullText +=
313
+ '\n(Hint: This may be due to a timeout or provider issue. Try increasing the timeout or checking the provider status.)';
541
314
  }
542
- }
543
- }
544
315
 
545
- if (mcpServersToConnect.length > 0) {
546
- for (const server of mcpServersToConnect) {
547
- try {
548
- let client: MCPClient | undefined;
549
- if (mcpManager) {
550
- client = await mcpManager.getClient(server, logger);
551
- } else if (typeof server !== 'string') {
552
- client = await MCPClient.createLocal(
553
- server.command || 'node',
554
- server.args || [],
555
- server.env || {}
556
- );
557
- await client.initialize();
558
- localMcpClients.push(client);
559
- }
316
+ globalHasError = true;
317
+ break;
318
+ }
560
319
 
561
- if (client) {
562
- const tools = await client.listTools();
563
- for (const t of tools) {
564
- registerTool(t.name, t.description, t.inputSchema, async (args) => {
565
- const res = await client?.callTool(t.name, args);
566
- // AI SDK expects serializable result. callTool returns useful JSON.
567
- // We apply context update and return raw object handled by SDK.
568
- return applyContextUpdate(res);
569
- });
320
+ let turnText = '';
321
+ const toolCalls: any[] = [];
322
+ try {
323
+ for await (const part of result.fullStream) {
324
+ logger.debug(`[llm-executor] T${iterations} Stream part: ${JSON.stringify(part)}`);
325
+ if (part.type === 'text-delta') {
326
+ const deltaText =
327
+ (part as any).textDelta || (part as any).text || (part as any).delta?.text || '';
328
+ if (deltaText) {
329
+ turnText += deltaText;
330
+ fullText += deltaText;
331
+ handleStreamChunk(deltaText);
332
+ }
333
+ } else if (part.type === 'tool-call') {
334
+ toolCalls.push(part);
335
+ } else if (part.type === 'error') {
336
+ // Ignore spurious 'text part undefined not found' error from AI SDK compatibility mode
337
+ if (String(part.error).includes('text part undefined not found')) {
338
+ logger.debug(
339
+ `[llm-executor] T${iterations} Ignoring spurious stream error: ${part.error}`
340
+ );
341
+ continue;
570
342
  }
343
+ logger.error(`[llm-executor] T${iterations} Stream error: ${part.error}`);
344
+ globalHasError = true;
345
+ throw new Error(String(part.error));
571
346
  }
572
- } catch (e) {
573
- logger.warn(
574
- `Failed to connect/list MCP tools for ${typeof server === 'string' ? server : server.name}: ${e}`
347
+ }
348
+ if (fullText.length > (LIMITS.MAX_RESPONSE_SIZE_BYTES || 10 * 1024 * 1024)) {
349
+ throw new Error(
350
+ `LLM response exceeded maximum size limit (${LIMITS.MAX_RESPONSE_SIZE_BYTES} bytes).`
575
351
  );
576
352
  }
353
+ } catch (streamError) {
354
+ const sErr = streamError instanceof Error ? streamError.message : String(streamError);
355
+ logger.error(`[llm-executor] T${iterations} Stream threw error: ${sErr}`);
356
+ globalHasError = true;
357
+ // We might have partial text/tools, but relying on them is dangerous if stream failed.
358
+ // We keep globalHasError=true to abort the turn below.
577
359
  }
578
- }
579
360
 
580
- // 4. Special Tools: Ask & Transfer
581
- if (step.allowClarification) {
582
- if (aiTools.ask) throw new Error('Tool "ask" is reserved.');
583
- registerTool(
584
- 'ask',
585
- 'Ask the user a clarifying question.',
586
- {
587
- type: 'object',
588
- properties: { question: { type: 'string' } },
589
- required: ['question'],
590
- },
591
- async (args) => {
592
- if (process.stdin.isTTY) {
593
- logger.log(`\n🤔 Question from ${activeAgent.name}: ${args.question}`);
594
- const result = await executeStepFn(
595
- {
596
- id: `${step.id}-clarify`,
597
- type: 'human',
598
- message: args.question,
599
- inputType: 'text',
600
- } as Step,
601
- context
602
- );
603
- return String(result.output);
604
- }
605
- requiresSuspend = true;
606
- suspendData = { question: args.question }; // Will abort loop
607
- return 'Suspended for user input';
608
- }
609
- );
610
- }
361
+ const usage = await result.usage;
362
+ totalUsage.prompt_tokens += usage?.inputTokens ?? 0;
363
+ totalUsage.completion_tokens += usage?.outputTokens ?? 0;
364
+ totalUsage.total_tokens += (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0);
365
+
366
+ currentMessages.push({
367
+ role: 'assistant',
368
+ content: turnText,
369
+ tool_calls: toolCalls.map((tc) => ({
370
+ id: tc.toolCallId,
371
+ type: 'function',
372
+ function: {
373
+ name: tc.toolName,
374
+ arguments: JSON.stringify(tc.args || tc.input || {}),
375
+ },
376
+ })),
377
+ });
611
378
 
612
- if (step.allowedHandoffs && step.allowedHandoffs.length > 0) {
613
- if (aiTools[TRANSFER_TOOL_NAME])
614
- throw new Error(`Tool "${TRANSFER_TOOL_NAME}" is reserved.`);
615
- registerTool(
616
- TRANSFER_TOOL_NAME,
617
- `Transfer control to another agent. Allowed: ${step.allowedHandoffs.join(', ')}`,
618
- {
619
- type: 'object',
620
- properties: { agent_name: { type: 'string' } },
621
- required: ['agent_name'],
622
- },
623
- async (args) => {
624
- if (!step.allowedHandoffs?.includes(args.agent_name))
625
- return `Error: Agent ${args.agent_name} not allowed.`;
626
- try {
627
- const nextAgentPath = resolveAgentPath(args.agent_name, workflowDir);
628
- const nextAgent = parseAgent(nextAgentPath);
629
- pendingTransfer = nextAgent;
630
- return `Transferred to agent ${args.agent_name}.`;
631
- } catch (e) {
632
- return `Error resolving agent: ${e}`;
379
+ if (globalHasError) {
380
+ logger.error(`[llm-executor] T${iterations} Stream had errors. Aborting turn.`);
381
+ throw new Error(`LLM stream failed: ${fullText || 'Unknown error during streaming'}`);
382
+ }
383
+
384
+ if (toolCalls.length > 0) {
385
+ let turnRequiresSuspend = false;
386
+ let turnSuspendData: any = null;
387
+
388
+ for (const call of toolCalls) {
389
+ // Execute tool via ToolManager/aiTools
390
+ const tool = aiTools[call.toolName];
391
+ if (tool) {
392
+ try {
393
+ const toolArgs =
394
+ (call as any).input || (call as any).args || (call as any).arguments || {};
395
+ const toolArgsObj = typeof toolArgs === 'string' ? JSON.parse(toolArgs) : toolArgs;
396
+ logger.debug(
397
+ `[llm-executor] Executing tool ${call.toolName} with args: ${JSON.stringify(toolArgsObj)}`
398
+ );
399
+ const toolResult = await tool.execute(toolArgsObj, { signal: abortSignal });
400
+
401
+ currentMessages.push({
402
+ role: 'tool',
403
+ content: JSON.stringify(toolResult),
404
+ tool_call_id: call.toolCallId,
405
+ name: call.toolName,
406
+ } as any);
407
+
408
+ if (toolManager.requiresSuspend) {
409
+ turnRequiresSuspend = true;
410
+ turnSuspendData = toolManager.suspendData;
411
+ }
412
+ } catch (e) {
413
+ const errMsg = e instanceof Error ? e.message : String(e);
414
+ currentMessages.push({
415
+ role: 'tool',
416
+ content: JSON.stringify({ error: errMsg }),
417
+ tool_call_id: call.toolCallId,
418
+ name: call.toolName,
419
+ } as any);
420
+ }
421
+ } else {
422
+ currentMessages.push({
423
+ role: 'tool',
424
+ content: JSON.stringify({ error: `Tool ${call.toolName} not found` }),
425
+ tool_call_id: call.toolCallId,
426
+ name: call.toolName,
427
+ } as any);
633
428
  }
634
429
  }
635
- );
636
- }
637
430
 
638
- // Execute Stream
639
- const result = await streamText({
640
- model: languageModel,
641
- system: systemPrompt,
642
- messages: mapToCoreMessages(currentMessages),
643
- tools: aiTools,
644
- toolChoice: 'auto',
645
- maxSteps: step.maxIterations || 10,
646
- onChunk: (event: any) => {
647
- if (event.chunk.type === 'text-delta') {
648
- handleStreamChunk(event.chunk.text);
431
+ if (turnRequiresSuspend) {
432
+ return {
433
+ output: { messages: currentMessages, ...turnSuspendData },
434
+ status: 'suspended',
435
+ usage: totalUsage,
436
+ };
649
437
  }
650
- },
651
- abortSignal,
652
- } as any);
653
438
 
654
- // Accumulate full text for output
655
- // Accumulate full text for output
656
- let fullText = '';
657
- for await (const part of result.fullStream) {
658
- if (part.type === 'text-delta') {
659
- fullText += part.text;
439
+ if (toolManager.pendingTransfer) {
440
+ activeAgent = toolManager.pendingTransfer;
441
+ logger.log(` 🔄 Handoff to agent: ${activeAgent.name}`);
442
+ handoffCount++;
443
+ if (handoffCount > (ITERATIONS.MAX_AGENT_HANDOFFS || 10)) {
444
+ throw new Error('Maximum agent handoffs exceeded');
445
+ }
446
+ break; // Break loop to restart outer loop with new agent
447
+ }
448
+ // Continue loop for next turn (LLM response to tool results)
449
+ } else {
450
+ // No tool calls, Done.
451
+ if (step.outputSchema) {
452
+ return {
453
+ output: extractJson(fullText),
454
+ status: 'success',
455
+ usage: totalUsage,
456
+ };
457
+ }
458
+ return {
459
+ output: fullText,
460
+ status: 'success',
461
+ usage: totalUsage,
462
+ };
660
463
  }
661
- }
662
-
663
- if (!step.outputSchema) {
664
- flushStream();
665
- }
464
+ } // end while iterations
666
465
 
667
- // Standardize history reconstruction using result.response
668
- // AI SDK's result.response.messages contains the assistant/tool messages generated in this call.
669
- // We merge them with our existing currentMessages to maintain full history across handoffs.
670
- const response = await result.response;
671
- const responseMessages = response.messages;
672
- const newMessages = mapFromCoreMessages(responseMessages);
673
-
674
- // Merge strategy: Keep all existing messages (user prompts + previous assistant/tool exchanges)
675
- // and append new messages from this turn, avoiding duplicates by role/content matching
676
- const existingNonSystem = currentMessages.filter((m) => m.role !== 'system');
677
- const newNonDuplicate = newMessages.filter(
678
- (nm) =>
679
- !existingNonSystem.some(
680
- (em) =>
681
- em.role === nm.role &&
682
- em.content === nm.content &&
683
- em.tool_call_id === nm.tool_call_id
684
- )
685
- );
686
- currentMessages = [...existingNonSystem, ...newNonDuplicate];
687
-
688
- const usageObj = await result.usage;
689
- const totalUsage = {
690
- prompt_tokens: usageObj?.inputTokens ?? 0,
691
- completion_tokens: usageObj?.outputTokens ?? 0,
692
- total_tokens: (usageObj?.inputTokens ?? 0) + (usageObj?.outputTokens ?? 0),
693
- };
694
-
695
- if (requiresSuspend) {
466
+ // If we broke out due to handoff, outer loop continues.
467
+ if (!toolManager.pendingTransfer) {
468
+ // Max iterations reached without completion
469
+ if (step.outputSchema || (step as any).id === 'l1') {
470
+ // If we had a fatal stream error, we can't trust the text for JSON extraction
471
+ try {
472
+ return {
473
+ output: extractJson(fullText),
474
+ status: 'success',
475
+ usage: totalUsage,
476
+ };
477
+ } catch (e) {
478
+ throw new Error(
479
+ `Failed to extract valid JSON: ${e instanceof Error ? e.message : String(e)}`
480
+ );
481
+ }
482
+ }
696
483
  return {
697
- status: 'suspended',
698
- output: { messages: currentMessages, ...suspendData },
484
+ output: fullText,
485
+ status: globalHasError ? 'failed' : 'success',
699
486
  usage: totalUsage,
700
487
  };
701
488
  }
702
-
703
- if (pendingTransfer) {
704
- activeAgent = pendingTransfer;
705
- logger.log(` 🔁 Handoff: Switching to agent ${activeAgent.name}`);
706
- // Loop continues with new agent and updated history
707
- continue;
708
- }
709
-
710
- // If no transfer, we are done.
711
-
712
- // Handle Output Schema parsing if needed
713
- let output: any = fullText;
714
- if (step.outputSchema) {
715
- try {
716
- output = extractJson(fullText);
717
- } catch (e) {
718
- logger.error(
719
- ' ⚠️ Failed to parse output as JSON. Retrying not implemented in simple refactor.'
720
- );
721
- }
722
- }
723
-
724
- return {
725
- status: 'success',
726
- output,
727
- usage: totalUsage,
728
- };
729
- }
730
- } finally {
731
- for (const client of localMcpClients) {
732
- client.stop();
733
- }
489
+ } // end while true (agent handoff)
490
+ } catch (error) {
491
+ return {
492
+ output: null,
493
+ status: 'failed',
494
+ error: error instanceof Error ? error.message : String(error),
495
+ usage: totalUsage,
496
+ };
734
497
  }
735
498
  }