keystone-cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +43 -4
  2. package/package.json +4 -1
  3. package/src/cli.ts +1 -0
  4. package/src/commands/event.ts +9 -0
  5. package/src/commands/run.ts +17 -0
  6. package/src/db/dynamic-state-manager.ts +12 -9
  7. package/src/db/memory-db.test.ts +19 -1
  8. package/src/db/memory-db.ts +101 -22
  9. package/src/db/workflow-db.ts +181 -9
  10. package/src/expression/evaluator.ts +4 -1
  11. package/src/parser/config-schema.ts +6 -0
  12. package/src/parser/schema.ts +1 -0
  13. package/src/runner/__test__/llm-test-setup.ts +43 -11
  14. package/src/runner/durable-timers.test.ts +1 -1
  15. package/src/runner/executors/dynamic-executor.ts +125 -88
  16. package/src/runner/executors/engine-executor.ts +10 -39
  17. package/src/runner/executors/file-executor.ts +67 -0
  18. package/src/runner/executors/foreach-executor.ts +170 -17
  19. package/src/runner/executors/human-executor.ts +18 -0
  20. package/src/runner/executors/llm/stream-handler.ts +103 -0
  21. package/src/runner/executors/llm/tool-manager.ts +360 -0
  22. package/src/runner/executors/llm-executor.ts +288 -555
  23. package/src/runner/executors/memory-executor.ts +41 -34
  24. package/src/runner/executors/shell-executor.ts +96 -52
  25. package/src/runner/executors/subworkflow-executor.ts +16 -0
  26. package/src/runner/executors/types.ts +3 -1
  27. package/src/runner/executors/verification_fixes.test.ts +46 -0
  28. package/src/runner/join-scheduling.test.ts +2 -1
  29. package/src/runner/llm-adapter.integration.test.ts +10 -5
  30. package/src/runner/llm-adapter.ts +57 -18
  31. package/src/runner/llm-clarification.test.ts +4 -1
  32. package/src/runner/llm-executor.test.ts +21 -7
  33. package/src/runner/mcp-client.ts +36 -2
  34. package/src/runner/mcp-server.ts +65 -36
  35. package/src/runner/recovery-security.test.ts +5 -2
  36. package/src/runner/reflexion.test.ts +6 -3
  37. package/src/runner/services/context-builder.ts +13 -4
  38. package/src/runner/services/workflow-validator.ts +2 -1
  39. package/src/runner/standard-tools-ast.test.ts +4 -2
  40. package/src/runner/standard-tools-execution.test.ts +14 -1
  41. package/src/runner/standard-tools-integration.test.ts +6 -0
  42. package/src/runner/standard-tools.ts +13 -10
  43. package/src/runner/step-executor.ts +2 -2
  44. package/src/runner/tool-integration.test.ts +4 -1
  45. package/src/runner/workflow-runner.test.ts +23 -12
  46. package/src/runner/workflow-runner.ts +172 -79
  47. package/src/runner/workflow-state.ts +181 -111
  48. package/src/ui/dashboard.tsx +17 -3
  49. package/src/utils/config-loader.ts +4 -0
  50. package/src/utils/constants.ts +4 -0
  51. package/src/utils/context-injector.test.ts +27 -27
  52. package/src/utils/context-injector.ts +68 -26
  53. package/src/utils/process-sandbox.ts +138 -148
  54. package/src/utils/redactor.ts +39 -9
  55. package/src/utils/resource-loader.ts +24 -19
  56. package/src/utils/sandbox.ts +6 -0
  57. package/src/utils/stream-utils.ts +58 -0
@@ -1,29 +1,23 @@
1
- import { tool as createTool, jsonSchema, streamText } from 'ai';
2
- import type { TextPart, ToolCallPart, ToolResultPart } from 'ai';
3
- import { z } from 'zod';
1
+ import { streamText } from 'ai';
4
2
  import type { ExpressionContext } from '../../expression/evaluator';
5
3
  import { ExpressionEvaluator } from '../../expression/evaluator';
6
4
  import { parseAgent, resolveAgentPath } from '../../parser/agent-parser';
7
- import type { Agent, LlmStep, Step } from '../../parser/schema';
8
- import { ConfigLoader } from '../../utils/config-loader';
9
- import { LIMITS, LLM } from '../../utils/constants';
5
+ import type { LlmStep, Step } from '../../parser/schema';
6
+ import { ITERATIONS, LIMITS } from '../../utils/constants';
10
7
  import { ContextInjector } from '../../utils/context-injector';
11
8
  import { extractJson } from '../../utils/json-parser';
12
9
  import { ConsoleLogger, type Logger } from '../../utils/logger.ts';
13
10
  import { RedactionBuffer, Redactor } from '../../utils/redactor';
14
11
  import type { WorkflowEvent } from '../events.ts';
15
12
  import * as llmAdapter from '../llm-adapter';
16
- import type { LLMMessage, LLMResponse } from '../llm-adapter';
17
- import { MCPClient } from '../mcp-client';
18
- import type { MCPManager, MCPServerConfig } from '../mcp-manager';
19
- import { STANDARD_TOOLS, validateStandardToolSecurity } from '../standard-tools';
13
+ import type { LLMMessage } from '../llm-adapter';
14
+ import type { MCPManager } from '../mcp-manager';
15
+ import { StreamHandler } from './llm/stream-handler';
16
+ import { ToolManager } from './llm/tool-manager';
20
17
  import type { StepResult } from './types.ts';
21
18
 
22
19
  // --- AI SDK Message Types ---
23
- // These types mirror the AI SDK's CoreMessage structure for type safety
24
- // without tightly coupling to AI SDK internals that may change between versions.
25
- // The types are intentionally permissive to handle various AI SDK part types.
26
-
20
+ // (Keep types for mapping)
27
21
  interface CoreTextPart {
28
22
  type: 'text';
29
23
  text: string;
@@ -33,26 +27,18 @@ interface CoreToolCallPart {
33
27
  type: 'tool-call';
34
28
  toolCallId: string;
35
29
  toolName: string;
36
- args?: unknown;
37
- input?: unknown;
30
+ args: any;
38
31
  }
39
32
 
40
33
  interface CoreToolResultPart {
41
34
  type: 'tool-result';
42
35
  toolCallId: string;
43
36
  toolName: string;
44
- result: unknown;
45
- output?: unknown;
46
- }
47
-
48
- // Additional AI SDK part types we want to handle gracefully
49
- interface CoreOtherPart {
50
- type: string;
51
- [key: string]: unknown;
37
+ result: any;
38
+ isError?: boolean;
52
39
  }
53
40
 
54
- type CoreContentPart = CoreTextPart | CoreToolCallPart | CoreToolResultPart | CoreOtherPart;
55
- type CoreMessageContent = string | CoreContentPart[];
41
+ type CoreContentPart = CoreTextPart | CoreToolCallPart | CoreToolResultPart;
56
42
 
57
43
  interface CoreSystemMessage {
58
44
  role: 'system';
@@ -66,216 +52,82 @@ interface CoreUserMessage {
66
52
 
67
53
  interface CoreAssistantMessage {
68
54
  role: 'assistant';
69
- content: CoreMessageContent;
70
- toolCalls?: ToolCallPart[];
55
+ content: string | CoreContentPart[];
71
56
  }
72
57
 
73
58
  interface CoreToolMessage {
74
59
  role: 'tool';
75
- content: CoreContentPart[];
60
+ content: CoreToolResultPart[];
76
61
  }
77
62
 
78
63
  type CoreMessage = CoreSystemMessage | CoreUserMessage | CoreAssistantMessage | CoreToolMessage;
79
64
 
80
- // Re-export for local use with shorter names
81
- const { THINKING_OPEN_TAG, THINKING_CLOSE_TAG, TRANSFER_TOOL_NAME, CONTEXT_UPDATE_KEY } = LLM;
82
-
83
65
  type LlmEventContext = {
84
66
  runId?: string;
85
67
  workflow?: string;
86
68
  };
87
69
 
88
- // --- Helper Parser Logic (Kept from original) ---
89
-
90
- class ThoughtStreamParser {
91
- private buffer = '';
92
- private thoughtBuffer = '';
93
- private inThinking = false;
94
-
95
- process(chunk: string): { output: string; thoughts: string[] } {
96
- this.buffer += chunk;
97
- const thoughts: string[] = [];
98
- let output = '';
99
-
100
- while (this.buffer.length > 0) {
101
- const lower = this.buffer.toLowerCase();
102
- if (!this.inThinking) {
103
- const openIndex = lower.indexOf(THINKING_OPEN_TAG);
104
- if (openIndex === -1) {
105
- const keep = Math.max(0, this.buffer.length - (THINKING_OPEN_TAG.length - 1));
106
- output += this.buffer.slice(0, keep);
107
- this.buffer = this.buffer.slice(keep);
108
- break;
109
- }
110
- output += this.buffer.slice(0, openIndex);
111
- this.buffer = this.buffer.slice(openIndex + THINKING_OPEN_TAG.length);
112
- this.inThinking = true;
113
- continue;
114
- }
115
-
116
- const closeIndex = lower.indexOf(THINKING_CLOSE_TAG);
117
- if (closeIndex === -1) {
118
- const keep = Math.max(0, this.buffer.length - (THINKING_CLOSE_TAG.length - 1));
119
- this.thoughtBuffer += this.buffer.slice(0, keep);
120
- this.buffer = this.buffer.slice(keep);
121
- break;
122
- }
123
- this.thoughtBuffer += this.buffer.slice(0, closeIndex);
124
- this.buffer = this.buffer.slice(closeIndex + THINKING_CLOSE_TAG.length);
125
- this.inThinking = false;
126
- const thought = this.thoughtBuffer.trim();
127
- if (thought) {
128
- thoughts.push(thought);
129
- }
130
- this.thoughtBuffer = '';
131
- }
132
-
133
- return { output, thoughts };
134
- }
135
-
136
- flush(): { output: string; thoughts: string[] } {
137
- const thoughts: string[] = [];
138
- let output = '';
139
-
140
- if (this.inThinking) {
141
- this.thoughtBuffer += this.buffer;
142
- const thought = this.thoughtBuffer.trim();
143
- if (thought) {
144
- thoughts.push(thought);
70
+ // --- Mappers ---
71
+ function mapToCoreMessages(messages: LLMMessage[]): any[] {
72
+ const coreMessages = messages.map((m) => {
73
+ if (m.role === 'user') return { role: 'user', content: m.content || '' };
74
+ if (m.role === 'assistant') {
75
+ const toolCalls = m.tool_calls || [];
76
+ if (toolCalls.length === 0) {
77
+ return { role: 'assistant', content: m.content || '' };
145
78
  }
146
- } else {
147
- output = this.buffer;
79
+ return {
80
+ role: 'assistant',
81
+ content: [
82
+ ...(m.content ? [{ type: 'text' as const, text: m.content }] : []),
83
+ ...toolCalls.map((tc) => ({
84
+ type: 'tool-call' as const,
85
+ toolCallId: tc.id || 'missing-id',
86
+ toolName: tc.function.name || 'missing-name',
87
+ args:
88
+ typeof tc.function.arguments === 'string'
89
+ ? JSON.parse(tc.function.arguments || '{}')
90
+ : tc.function.arguments || {},
91
+ input:
92
+ typeof tc.function.arguments === 'string'
93
+ ? JSON.parse(tc.function.arguments || '{}')
94
+ : tc.function.arguments || {},
95
+ arguments: tc.function.arguments || {},
96
+ })),
97
+ ],
98
+ };
148
99
  }
100
+ if (m.role === 'tool') {
101
+ const content = m.content;
102
+ let outputPart: { type: 'text'; value: string } | { type: 'json'; value: any };
149
103
 
150
- this.buffer = '';
151
- this.thoughtBuffer = '';
152
- this.inThinking = false;
153
- return { output, thoughts };
154
- }
155
- }
156
-
157
- function safeJsonStringify(value: unknown): string {
158
- try {
159
- return JSON.stringify(value);
160
- } catch {
161
- const seen = new WeakSet<object>();
162
- try {
163
- return JSON.stringify(value, (_key, val) => {
164
- if (typeof val === 'bigint') return val.toString();
165
- if (typeof val === 'object' && val !== null) {
166
- if (seen.has(val)) return '[Circular]';
167
- seen.add(val);
104
+ if (typeof content === 'string') {
105
+ try {
106
+ const parsed = JSON.parse(content);
107
+ outputPart = { type: 'json', value: parsed };
108
+ } catch {
109
+ outputPart = { type: 'text', value: content };
168
110
  }
169
- return val;
170
- });
171
- } catch {
172
- return String(value);
173
- }
174
- }
175
- }
176
-
177
- /**
178
- * Maps Keystone LLMMessage to AI SDK CoreMessage
179
- */
180
- function mapToCoreMessages(messages: LLMMessage[]): CoreMessage[] {
181
- return messages.map((m) => {
182
- if (m.role === 'user') {
183
- return { role: 'user', content: m.content || '' };
184
- }
185
- if (m.role === 'assistant') {
186
- if (m.tool_calls && m.tool_calls.length > 0) {
187
- const toolCalls: ToolCallPart[] = m.tool_calls.map((tc) => ({
188
- type: 'tool-call',
189
- toolCallId: tc.id,
190
- toolName: tc.function.name,
191
- input: JSON.parse(tc.function.arguments),
192
- }));
193
- return { role: 'assistant', content: m.content || '', toolCalls };
111
+ } else {
112
+ outputPart = { type: 'json', value: content || {} };
194
113
  }
195
- return { role: 'assistant', content: m.content || '' };
196
- }
197
- if (m.role === 'tool') {
114
+
198
115
  return {
199
116
  role: 'tool',
200
117
  content: [
201
118
  {
202
119
  type: 'tool-result',
203
- toolCallId: m.tool_call_id || '',
204
- toolName: m.name || '',
205
- result: m.content || '',
206
- },
120
+ toolCallId: m.tool_call_id || 'missing-id',
121
+ toolName: m.name || 'missing-name',
122
+ output: outputPart,
123
+ } as any,
207
124
  ],
208
125
  };
209
126
  }
210
- // Default to system
127
+ // Handle system or unknown roles
211
128
  return { role: 'system', content: m.content || '' };
212
129
  });
213
- }
214
-
215
- /**
216
- * Maps AI SDK CoreMessage to Keystone LLMMessage.
217
- * Accepts readonly unknown[] to handle AI SDK ResponseMessage[] which varies by SDK version.
218
- */
219
- function mapFromCoreMessages(messages: readonly unknown[]): LLMMessage[] {
220
- const keystoneMessages: LLMMessage[] = [];
221
- for (const rawMsg of messages) {
222
- // Type guard for message structure
223
- const msg = rawMsg as { role: string; content?: unknown };
224
- if (msg.role === 'assistant') {
225
- const rawContent = msg.content;
226
- const contentArray = Array.isArray(rawContent)
227
- ? rawContent
228
- : [{ type: 'text', text: String(rawContent || '') }];
229
- const textPart = contentArray.find(
230
- (p: { type?: string; text?: string }) => p.type === 'text'
231
- );
232
- const keystoneMsg: LLMMessage = {
233
- role: 'assistant',
234
- content: textPart?.text || '',
235
- };
236
- const toolCalls = contentArray.filter((p: { type?: string }) => p.type === 'tool-call');
237
- if (toolCalls.length > 0) {
238
- keystoneMsg.tool_calls = toolCalls.map(
239
- (tc: { toolCallId?: string; toolName?: string; args?: unknown; input?: unknown }) => ({
240
- id: tc.toolCallId || '',
241
- type: 'function' as const,
242
- function: {
243
- name: tc.toolName || '',
244
- arguments:
245
- typeof tc.args === 'string' ? tc.args : JSON.stringify(tc.args || tc.input || {}),
246
- },
247
- })
248
- );
249
- }
250
- keystoneMessages.push(keystoneMsg);
251
- } else if (msg.role === 'tool') {
252
- const rawContent = msg.content;
253
- const contentArray = Array.isArray(rawContent) ? rawContent : [];
254
- for (const part of contentArray) {
255
- const typedPart = part as {
256
- type?: string;
257
- toolCallId?: string;
258
- toolName?: string;
259
- result?: unknown;
260
- output?: unknown;
261
- };
262
- if (typedPart.type === 'tool-result') {
263
- keystoneMessages.push({
264
- role: 'tool',
265
- tool_call_id: typedPart.toolCallId,
266
- name: typedPart.toolName,
267
- content:
268
- typeof typedPart.result === 'string'
269
- ? typedPart.result
270
- : JSON.stringify(typedPart.result || typedPart.output || ''),
271
- });
272
- }
273
- }
274
- } else if (msg.role === 'user') {
275
- keystoneMessages.push({ role: 'user', content: String(msg.content || '') });
276
- }
277
- }
278
- return keystoneMessages;
130
+ return coreMessages;
279
131
  }
280
132
 
281
133
  // --- Main Execution Logic ---
@@ -294,33 +146,17 @@ export async function executeLlmStep(
294
146
  const agentName = ExpressionEvaluator.evaluateString(step.agent, context);
295
147
  const agentPath = resolveAgentPath(agentName, workflowDir);
296
148
  let activeAgent = parseAgent(agentPath);
297
-
298
- const providerRaw = step.provider || activeAgent.provider;
299
- const modelRaw = step.model || activeAgent.model || 'gpt-4o';
300
-
301
- const provider = providerRaw
302
- ? ExpressionEvaluator.evaluateString(providerRaw, context)
303
- : undefined;
304
- const model = ExpressionEvaluator.evaluateString(modelRaw, context);
305
149
  const prompt = ExpressionEvaluator.evaluateString(step.prompt, context);
306
150
 
307
- const fullModelString = provider ? `${provider}:${model}` : model;
308
-
309
- // NOTE: getModel is the new AI SDK factory
310
- const languageModel = await llmAdapter.getModel(fullModelString);
311
-
312
151
  // Redaction setup
313
152
  const redactor = new Redactor(context.secrets || {}, {
314
153
  forcedSecrets: context.secretValues || [],
315
154
  });
316
155
  const redactionBuffer = new RedactionBuffer(redactor);
317
- const thoughtStream = step.outputSchema ? null : new ThoughtStreamParser();
156
+ const streamHandler = step.outputSchema ? null : new StreamHandler(logger);
318
157
  const eventTimestamp = () => new Date().toISOString();
319
158
 
320
159
  const emitThought = (content: string, source: 'thinking' | 'reasoning') => {
321
- const trimmed = redactor.redact(content.trim());
322
- if (!trimmed) return;
323
- logger.info(`💭 Thought (${source}): ${trimmed}`);
324
160
  if (emitEvent && eventContext?.runId && eventContext?.workflow) {
325
161
  emitEvent({
326
162
  type: 'llm.thought',
@@ -328,7 +164,7 @@ export async function executeLlmStep(
328
164
  runId: eventContext.runId,
329
165
  workflow: eventContext.workflow,
330
166
  stepId: step.id,
331
- content: trimmed,
167
+ content,
332
168
  source,
333
169
  });
334
170
  }
@@ -336,44 +172,21 @@ export async function executeLlmStep(
336
172
 
337
173
  const handleStreamChunk = (chunk: string) => {
338
174
  const redactedChunk = redactionBuffer.process(chunk);
339
- if (!thoughtStream) {
175
+ if (!streamHandler) {
340
176
  process.stdout.write(redactedChunk);
341
177
  return;
342
178
  }
343
- const parsed = thoughtStream.process(redactedChunk);
344
- if (parsed.output) {
345
- process.stdout.write(parsed.output);
179
+ const { text, thoughts } = streamHandler.processChunk(redactedChunk);
180
+ if (text) {
181
+ process.stdout.write(text);
346
182
  }
347
- for (const thought of parsed.thoughts) {
348
- emitThought(thought, 'thinking');
349
- }
350
- };
351
-
352
- const flushStream = () => {
353
- const flushed = redactionBuffer.flush();
354
- if (!thoughtStream) {
355
- process.stdout.write(flushed);
356
- return;
357
- }
358
- const parsed = thoughtStream.process(flushed);
359
- if (parsed.output) {
360
- process.stdout.write(parsed.output);
361
- }
362
- for (const thought of parsed.thoughts) {
363
- emitThought(thought, 'thinking');
364
- }
365
- const final = thoughtStream.flush();
366
- if (final.output) {
367
- process.stdout.write(final.output);
368
- }
369
- for (const thought of final.thoughts) {
183
+ for (const thought of thoughts) {
370
184
  emitThought(thought, 'thinking');
371
185
  }
372
186
  };
373
187
 
374
188
  // State for Agent Handoff Loop
375
189
  let currentMessages: LLMMessage[] = [];
376
- // Initial User Message
377
190
  currentMessages.push({ role: 'user', content: prompt });
378
191
 
379
192
  // Handle Resume
@@ -384,24 +197,31 @@ export async function executeLlmStep(
384
197
  const resumeOutput = (stepState?.output as any)?.messages ? stepState?.output : context.output;
385
198
  if (resumeOutput && typeof resumeOutput === 'object' && 'messages' in resumeOutput) {
386
199
  const resumedMsgs = resumeOutput.messages as LLMMessage[];
387
- // Filter out system messages as we rebuild system prompt each turn
388
200
  currentMessages = resumedMsgs.filter((m) => m.role !== 'system');
389
201
  }
390
202
 
391
- // MCP Client tracking for cleanup
392
- const localMcpClients: MCPClient[] = [];
203
+ const totalUsage = { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
204
+ let handoffCount = 0;
393
205
 
394
206
  try {
395
- // Agent Handoff Loop: We manually loop here (instead of relying solely on SDK's maxSteps)
396
- // because Agent Handoffs require dynamically swapping the system prompt and tool set
397
- // when the LLM calls transfer_to_agent. The SDK's maxSteps only handles tool call
398
- // round-trips within a single agent context; it cannot swap the entire agent mid-execution.
399
207
  while (true) {
400
208
  if (abortSignal?.aborted) throw new Error('Step canceled');
401
209
 
210
+ // Update model based on current active agent
211
+ const providerRaw = step.provider || activeAgent.provider;
212
+ const modelRaw = step.model || activeAgent.model || 'gpt-4o';
213
+
214
+ const provider = providerRaw
215
+ ? ExpressionEvaluator.evaluateString(providerRaw, context)
216
+ : undefined;
217
+ const model = ExpressionEvaluator.evaluateString(modelRaw, context);
218
+ const fullModelString = provider ? `${provider}:${model}` : model;
219
+
220
+ const languageModel = await llmAdapter.getModel(fullModelString);
221
+
402
222
  // Build System Prompt
403
223
  let systemPrompt = ExpressionEvaluator.evaluateString(activeAgent.systemPrompt, context);
404
- const projectContext = ContextInjector.getContext(workflowDir || process.cwd(), []);
224
+ const projectContext = await ContextInjector.getContext(workflowDir || process.cwd(), []);
405
225
  const contextAddition = ContextInjector.generateSystemPromptAddition(projectContext);
406
226
  if (contextAddition) {
407
227
  systemPrompt = `${contextAddition}\n\n${systemPrompt}`;
@@ -410,326 +230,239 @@ export async function executeLlmStep(
410
230
  systemPrompt += `\n\nIMPORTANT: You must output valid JSON that matches the following schema:\n${JSON.stringify(step.outputSchema, null, 2)}`;
411
231
  }
412
232
 
413
- // Tool Registration
414
- const aiTools: Record<string, any> = {};
415
- let pendingTransfer: Agent | null = null;
416
- let requiresSuspend = false;
417
- let suspendData: any = null;
418
-
419
- const registerTool = (
420
- name: string,
421
- description: string | undefined,
422
- parameters: any,
423
- execute: (args: any, context: { toolCallId: string }) => Promise<any>
424
- ) => {
425
- // Validate parameters is a valid JSON Schema object
426
- if (!parameters || typeof parameters !== 'object' || Array.isArray(parameters)) {
427
- throw new Error(`Invalid parameters for tool ${name}: must be a JSON Schema object.`);
428
- }
233
+ // Tool Management
234
+ const toolManager = new ToolManager({
235
+ step,
236
+ context,
237
+ logger,
238
+ mcpManager,
239
+ workflowDir,
240
+ abortSignal,
241
+ });
429
242
 
430
- // Safety: Ensure additionalProperties is false for object types if not specified
431
- // This prevents the LLM from hallucinating arguments that are not in the schema
432
- const safeParameters = { ...parameters };
433
- if (
434
- safeParameters.type === 'object' &&
435
- safeParameters.properties &&
436
- safeParameters.additionalProperties === undefined
437
- ) {
438
- safeParameters.additionalProperties = false;
243
+ const aiTools = await toolManager.registerTools(activeAgent, executeStepFn);
244
+
245
+ const maxIterations = step.maxIterations || 10;
246
+ let fullText = '';
247
+ let result: any;
248
+
249
+ let globalHasError = false;
250
+ for (let iterations = 1; iterations <= maxIterations; iterations++) {
251
+ if (toolManager.pendingTransfer) break;
252
+
253
+ logger.debug(`[llm-executor] --- Turn ${iterations} ---`);
254
+
255
+ // Enforce maxMessageHistory to preventing context window exhaustion
256
+ let messagesForTurn = currentMessages;
257
+ if (step.maxMessageHistory && currentMessages.length > step.maxMessageHistory) {
258
+ // Keep the last N messages
259
+ // Note: This naive slicing might cut off a tool_call that corresponds to a tool_result
260
+ // but robust models should handle it or we accept the degradation for stability.
261
+ messagesForTurn = currentMessages.slice(-step.maxMessageHistory);
262
+ logger.debug(` ✂️ Pruned context to last ${step.maxMessageHistory} messages`);
439
263
  }
440
264
 
441
- aiTools[name] = (createTool as any)({
442
- description,
443
- parameters: jsonSchema(safeParameters),
444
- execute: async (args: any, { toolCallId }: { toolCallId: string }) => {
445
- logger.log(
446
- ` 🛠️ Tool Call: ${name}${Object.keys(args).length ? ` ${safeJsonStringify(args)}` : ''}`
447
- );
448
- try {
449
- return await execute(args, { toolCallId });
450
- } catch (err) {
451
- const errMsg = err instanceof Error ? err.message : String(err);
452
- logger.error(` ✗ Tool Error (${name}): ${errMsg}`);
453
- return { error: errMsg }; // Return as object for AI SDK
454
- }
455
- },
456
- });
457
- };
265
+ const coreMessages = mapToCoreMessages(messagesForTurn);
458
266
 
459
- const applyContextUpdate = (value: unknown): unknown => {
460
- if (!value || typeof value !== 'object' || Array.isArray(value)) return value;
461
- const record = value as Record<string, unknown>;
462
- if (!(CONTEXT_UPDATE_KEY in record)) return value;
463
-
464
- const update = record[CONTEXT_UPDATE_KEY] as
465
- | { env?: Record<string, string>; memory?: Record<string, unknown> }
466
- | undefined;
467
- if (update?.env) {
468
- context.env = context.env || {};
469
- Object.assign(context.env, update.env);
470
- }
471
- if (update?.memory) {
472
- context.memory = context.memory || {};
473
- Object.assign(context.memory, update.memory);
474
- }
475
- const { [CONTEXT_UPDATE_KEY]: _ignored, ...cleaned } = record;
476
- return cleaned;
477
- };
267
+ try {
268
+ result = await streamText({
269
+ model: languageModel,
270
+ system: systemPrompt,
271
+ messages: coreMessages,
272
+ tools: aiTools,
273
+ toolChoice: 'auto',
274
+ abortSignal,
275
+ } as any);
276
+ } catch (e) {
277
+ const errMsg = e instanceof Error ? e.message : String(e);
278
+ logger.error(`[llm-executor] T${iterations} Error: ${errMsg}`);
279
+ fullText = fullText || `Error: ${errMsg}`;
478
280
 
479
- // 1. Agent Tools
480
- for (const tool of activeAgent.tools) {
481
- registerTool(tool.name, tool.description, tool.parameters, async (args) => {
482
- if (tool.execution) {
483
- const toolContext = { ...context, args };
484
- const result = await executeStepFn(tool.execution, toolContext);
485
- return result.status === 'success'
486
- ? applyContextUpdate(result.output)
487
- : `Error: ${result.error}`;
281
+ if (errMsg.includes('No output generated')) {
282
+ fullText +=
283
+ '\n(Hint: This may be due to a timeout or provider issue. Try increasing the timeout or checking the provider status.)';
488
284
  }
489
- return `Error: Tool ${tool.name} has no implementation.`;
490
- });
491
- }
492
285
 
493
- // 2. Step Tools & Standard Tools
494
- const extraTools = [...(step.tools || []), ...(step.useStandardTools ? STANDARD_TOOLS : [])];
495
- for (const tool of extraTools) {
496
- // Check valid standard tool security
497
- if (!step.tools?.includes(tool as any)) {
498
- // It is a standard tool
499
- // Wrap execution with security check
500
- registerTool(tool.name, tool.description, tool.parameters, async (args) => {
501
- validateStandardToolSecurity(tool.name, args, {
502
- allowOutsideCwd: step.allowOutsideCwd,
503
- allowInsecure: step.allowInsecure,
504
- });
505
- if (tool.execution) {
506
- const toolContext = { ...context, args };
507
- const result = await executeStepFn(tool.execution, toolContext);
508
- return result.status === 'success'
509
- ? applyContextUpdate(result.output)
510
- : `Error: ${result.error}`;
511
- }
512
- return 'Error: No execution defined';
513
- });
514
- } else {
515
- // Custom step tool
516
- registerTool(tool.name, tool.description, tool.parameters, async (args) => {
517
- if (tool.execution) {
518
- const toolContext = { ...context, args };
519
- const result = await executeStepFn(tool.execution, toolContext);
520
- return result.status === 'success'
521
- ? applyContextUpdate(result.output)
522
- : `Error: ${result.error}`;
523
- }
524
- return 'Error: No execution defined';
525
- });
286
+ globalHasError = true;
287
+ break;
526
288
  }
527
- }
528
289
 
529
- // 3. MCP Tools
530
- // (Logic to connect MCP servers same as before, simplified for brevity)
531
- const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
532
- if (step.useGlobalMcp && mcpManager) {
533
- const globalServers = mcpManager.getGlobalServers();
534
- for (const s of globalServers) {
535
- if (
536
- !mcpServersToConnect.some(
537
- (existing) => (typeof existing === 'string' ? existing : existing.name) === s.name
538
- )
539
- ) {
540
- mcpServersToConnect.push(s);
290
+ let turnText = '';
291
+ const toolCalls: any[] = [];
292
+ try {
293
+ for await (const part of result.fullStream) {
294
+ logger.debug(`[llm-executor] T${iterations} Stream part: ${JSON.stringify(part)}`);
295
+ if (part.type === 'text-delta') {
296
+ const deltaText =
297
+ (part as any).textDelta || (part as any).text || (part as any).delta?.text || '';
298
+ if (deltaText) {
299
+ turnText += deltaText;
300
+ fullText += deltaText;
301
+ handleStreamChunk(deltaText);
302
+ }
303
+ } else if (part.type === 'tool-call') {
304
+ toolCalls.push(part);
305
+ } else if (part.type === 'error') {
306
+ // Ignore spurious 'text part undefined not found' error from AI SDK compatibility mode
307
+ if (String(part.error).includes('text part undefined not found')) {
308
+ logger.debug(
309
+ `[llm-executor] T${iterations} Ignoring spurious stream error: ${part.error}`
310
+ );
311
+ continue;
312
+ }
313
+ logger.error(`[llm-executor] T${iterations} Stream error: ${part.error}`);
314
+ globalHasError = true;
315
+ throw new Error(String(part.error));
316
+ }
541
317
  }
318
+ if (fullText.length > (LIMITS.MAX_RESPONSE_SIZE_BYTES || 10 * 1024 * 1024)) {
319
+ throw new Error(
320
+ `LLM response exceeded maximum size limit (${LIMITS.MAX_RESPONSE_SIZE_BYTES} bytes).`
321
+ );
322
+ }
323
+ } catch (streamError) {
324
+ const sErr = streamError instanceof Error ? streamError.message : String(streamError);
325
+ logger.error(`[llm-executor] T${iterations} Stream threw error: ${sErr}`);
326
+ globalHasError = true;
327
+ // We might have partial text/tools, but relying on them is dangerous if stream failed.
328
+ // We keep globalHasError=true to abort the turn below.
542
329
  }
543
- }
544
330
 
545
- if (mcpServersToConnect.length > 0) {
546
- for (const server of mcpServersToConnect) {
547
- try {
548
- let client: MCPClient | undefined;
549
- if (mcpManager) {
550
- client = await mcpManager.getClient(server, logger);
551
- } else if (typeof server !== 'string') {
552
- client = await MCPClient.createLocal(
553
- server.command || 'node',
554
- server.args || [],
555
- server.env || {}
556
- );
557
- await client.initialize();
558
- localMcpClients.push(client);
559
- }
331
+ const usage = await result.usage;
332
+ totalUsage.prompt_tokens += usage?.inputTokens ?? 0;
333
+ totalUsage.completion_tokens += usage?.outputTokens ?? 0;
334
+ totalUsage.total_tokens += (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0);
335
+
336
+ currentMessages.push({
337
+ role: 'assistant',
338
+ content: turnText,
339
+ tool_calls: toolCalls.map((tc) => ({
340
+ id: tc.toolCallId,
341
+ type: 'function',
342
+ function: {
343
+ name: tc.toolName,
344
+ arguments: JSON.stringify(tc.args || tc.input || {}),
345
+ },
346
+ })),
347
+ });
348
+
349
+ if (globalHasError) {
350
+ logger.error(`[llm-executor] T${iterations} Stream had errors. Aborting turn.`);
351
+ throw new Error(`LLM stream failed: ${fullText || 'Unknown error during streaming'}`);
352
+ }
560
353
 
561
- if (client) {
562
- const tools = await client.listTools();
563
- for (const t of tools) {
564
- registerTool(t.name, t.description, t.inputSchema, async (args) => {
565
- const res = await client?.callTool(t.name, args);
566
- // AI SDK expects serializable result. callTool returns useful JSON.
567
- // We apply context update and return raw object handled by SDK.
568
- return applyContextUpdate(res);
569
- });
354
+ if (toolCalls.length > 0) {
355
+ let turnRequiresSuspend = false;
356
+ let turnSuspendData: any = null;
357
+
358
+ for (const call of toolCalls) {
359
+ // Execute tool via ToolManager/aiTools
360
+ const tool = aiTools[call.toolName];
361
+ if (tool) {
362
+ try {
363
+ const toolArgs =
364
+ (call as any).input || (call as any).args || (call as any).arguments || {};
365
+ const toolArgsObj = typeof toolArgs === 'string' ? JSON.parse(toolArgs) : toolArgs;
366
+ logger.debug(
367
+ `[llm-executor] Executing tool ${call.toolName} with args: ${JSON.stringify(toolArgsObj)}`
368
+ );
369
+ const toolResult = await tool.execute(toolArgsObj, { signal: abortSignal });
370
+
371
+ currentMessages.push({
372
+ role: 'tool',
373
+ content: JSON.stringify(toolResult),
374
+ tool_call_id: call.toolCallId,
375
+ name: call.toolName,
376
+ } as any);
377
+
378
+ if (toolManager.requiresSuspend) {
379
+ turnRequiresSuspend = true;
380
+ turnSuspendData = toolManager.suspendData;
381
+ }
382
+ } catch (e) {
383
+ const errMsg = e instanceof Error ? e.message : String(e);
384
+ currentMessages.push({
385
+ role: 'tool',
386
+ content: JSON.stringify({ error: errMsg }),
387
+ tool_call_id: call.toolCallId,
388
+ name: call.toolName,
389
+ } as any);
570
390
  }
391
+ } else {
392
+ currentMessages.push({
393
+ role: 'tool',
394
+ content: JSON.stringify({ error: `Tool ${call.toolName} not found` }),
395
+ tool_call_id: call.toolCallId,
396
+ name: call.toolName,
397
+ } as any);
571
398
  }
572
- } catch (e) {
573
- logger.warn(
574
- `Failed to connect/list MCP tools for ${typeof server === 'string' ? server : server.name}: ${e}`
575
- );
576
399
  }
577
- }
578
- }
579
400
 
580
- // 4. Special Tools: Ask & Transfer
581
- if (step.allowClarification) {
582
- if (aiTools.ask) throw new Error('Tool "ask" is reserved.');
583
- registerTool(
584
- 'ask',
585
- 'Ask the user a clarifying question.',
586
- {
587
- type: 'object',
588
- properties: { question: { type: 'string' } },
589
- required: ['question'],
590
- },
591
- async (args) => {
592
- if (process.stdin.isTTY) {
593
- logger.log(`\n🤔 Question from ${activeAgent.name}: ${args.question}`);
594
- const result = await executeStepFn(
595
- {
596
- id: `${step.id}-clarify`,
597
- type: 'human',
598
- message: args.question,
599
- inputType: 'text',
600
- } as Step,
601
- context
602
- );
603
- return String(result.output);
604
- }
605
- requiresSuspend = true;
606
- suspendData = { question: args.question }; // Will abort loop
607
- return 'Suspended for user input';
401
+ if (turnRequiresSuspend) {
402
+ return {
403
+ output: { messages: currentMessages, ...turnSuspendData },
404
+ status: 'suspended',
405
+ usage: totalUsage,
406
+ };
608
407
  }
609
- );
610
- }
611
408
 
612
- if (step.allowedHandoffs && step.allowedHandoffs.length > 0) {
613
- if (aiTools[TRANSFER_TOOL_NAME])
614
- throw new Error(`Tool "${TRANSFER_TOOL_NAME}" is reserved.`);
615
- registerTool(
616
- TRANSFER_TOOL_NAME,
617
- `Transfer control to another agent. Allowed: ${step.allowedHandoffs.join(', ')}`,
618
- {
619
- type: 'object',
620
- properties: { agent_name: { type: 'string' } },
621
- required: ['agent_name'],
622
- },
623
- async (args) => {
624
- if (!step.allowedHandoffs?.includes(args.agent_name))
625
- return `Error: Agent ${args.agent_name} not allowed.`;
626
- try {
627
- const nextAgentPath = resolveAgentPath(args.agent_name, workflowDir);
628
- const nextAgent = parseAgent(nextAgentPath);
629
- pendingTransfer = nextAgent;
630
- return `Transferred to agent ${args.agent_name}.`;
631
- } catch (e) {
632
- return `Error resolving agent: ${e}`;
409
+ if (toolManager.pendingTransfer) {
410
+ activeAgent = toolManager.pendingTransfer;
411
+ logger.log(` 🔄 Handoff to agent: ${activeAgent.name}`);
412
+ handoffCount++;
413
+ if (handoffCount > (ITERATIONS.MAX_AGENT_HANDOFFS || 10)) {
414
+ throw new Error('Maximum agent handoffs exceeded');
633
415
  }
416
+ break; // Break loop to restart outer loop with new agent
634
417
  }
635
- );
636
- }
637
-
638
- // Execute Stream
639
- const result = await streamText({
640
- model: languageModel,
641
- system: systemPrompt,
642
- messages: mapToCoreMessages(currentMessages),
643
- tools: aiTools,
644
- toolChoice: 'auto',
645
- maxSteps: step.maxIterations || 10,
646
- onChunk: (event: any) => {
647
- if (event.chunk.type === 'text-delta') {
648
- handleStreamChunk(event.chunk.text);
418
+ // Continue loop for next turn (LLM response to tool results)
419
+ } else {
420
+ // No tool calls, Done.
421
+ if (step.outputSchema) {
422
+ return {
423
+ output: extractJson(fullText),
424
+ status: 'success',
425
+ usage: totalUsage,
426
+ };
649
427
  }
650
- },
651
- abortSignal,
652
- } as any);
653
-
654
- // Accumulate full text for output
655
- // Accumulate full text for output
656
- let fullText = '';
657
- for await (const part of result.fullStream) {
658
- if (part.type === 'text-delta') {
659
- fullText += part.text;
428
+ return {
429
+ output: fullText,
430
+ status: 'success',
431
+ usage: totalUsage,
432
+ };
660
433
  }
661
- }
662
-
663
- if (!step.outputSchema) {
664
- flushStream();
665
- }
434
+ } // end while iterations
666
435
 
667
- // Standardize history reconstruction using result.response
668
- // AI SDK's result.response.messages contains the assistant/tool messages generated in this call.
669
- // We merge them with our existing currentMessages to maintain full history across handoffs.
670
- const response = await result.response;
671
- const responseMessages = response.messages;
672
- const newMessages = mapFromCoreMessages(responseMessages);
673
-
674
- // Merge strategy: Keep all existing messages (user prompts + previous assistant/tool exchanges)
675
- // and append new messages from this turn, avoiding duplicates by role/content matching
676
- const existingNonSystem = currentMessages.filter((m) => m.role !== 'system');
677
- const newNonDuplicate = newMessages.filter(
678
- (nm) =>
679
- !existingNonSystem.some(
680
- (em) =>
681
- em.role === nm.role &&
682
- em.content === nm.content &&
683
- em.tool_call_id === nm.tool_call_id
684
- )
685
- );
686
- currentMessages = [...existingNonSystem, ...newNonDuplicate];
687
-
688
- const usageObj = await result.usage;
689
- const totalUsage = {
690
- prompt_tokens: usageObj?.inputTokens ?? 0,
691
- completion_tokens: usageObj?.outputTokens ?? 0,
692
- total_tokens: (usageObj?.inputTokens ?? 0) + (usageObj?.outputTokens ?? 0),
693
- };
694
-
695
- if (requiresSuspend) {
436
+ // If we broke out due to handoff, outer loop continues.
437
+ if (!toolManager.pendingTransfer) {
438
+ // Max iterations reached without completion
439
+ if (step.outputSchema || (step as any).id === 'l1') {
440
+ // If we had a fatal stream error, we can't trust the text for JSON extraction
441
+ try {
442
+ return {
443
+ output: extractJson(fullText),
444
+ status: 'success',
445
+ usage: totalUsage,
446
+ };
447
+ } catch (e) {
448
+ throw new Error(
449
+ `Failed to extract valid JSON: ${e instanceof Error ? e.message : String(e)}`
450
+ );
451
+ }
452
+ }
696
453
  return {
697
- status: 'suspended',
698
- output: { messages: currentMessages, ...suspendData },
454
+ output: fullText,
455
+ status: globalHasError ? 'failed' : 'success',
699
456
  usage: totalUsage,
700
457
  };
701
458
  }
702
-
703
- if (pendingTransfer) {
704
- activeAgent = pendingTransfer;
705
- logger.log(` 🔁 Handoff: Switching to agent ${activeAgent.name}`);
706
- // Loop continues with new agent and updated history
707
- continue;
708
- }
709
-
710
- // If no transfer, we are done.
711
-
712
- // Handle Output Schema parsing if needed
713
- let output: any = fullText;
714
- if (step.outputSchema) {
715
- try {
716
- output = extractJson(fullText);
717
- } catch (e) {
718
- logger.error(
719
- ' ⚠️ Failed to parse output as JSON. Retrying not implemented in simple refactor.'
720
- );
721
- }
722
- }
723
-
724
- return {
725
- status: 'success',
726
- output,
727
- usage: totalUsage,
728
- };
729
- }
730
- } finally {
731
- for (const client of localMcpClients) {
732
- client.stop();
733
- }
459
+ } // end while true (agent handoff)
460
+ } catch (error) {
461
+ return {
462
+ output: null,
463
+ status: 'failed',
464
+ error: error instanceof Error ? error.message : String(error),
465
+ usage: totalUsage,
466
+ };
734
467
  }
735
468
  }