keystone-cli 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/README.md +276 -32
  2. package/package.json +8 -4
  3. package/src/cli.ts +350 -416
  4. package/src/commands/doc.ts +31 -0
  5. package/src/commands/event.ts +29 -0
  6. package/src/commands/graph.ts +37 -0
  7. package/src/commands/index.ts +14 -0
  8. package/src/commands/init.ts +185 -0
  9. package/src/commands/run.ts +124 -0
  10. package/src/commands/schema.ts +40 -0
  11. package/src/commands/utils.ts +78 -0
  12. package/src/commands/validate.ts +111 -0
  13. package/src/db/workflow-db.test.ts +314 -0
  14. package/src/db/workflow-db.ts +810 -210
  15. package/src/expression/evaluator-audit.test.ts +4 -2
  16. package/src/expression/evaluator.test.ts +14 -1
  17. package/src/expression/evaluator.ts +166 -19
  18. package/src/parser/config-schema.ts +18 -0
  19. package/src/parser/schema.ts +153 -22
  20. package/src/parser/test-schema.ts +6 -6
  21. package/src/parser/workflow-parser.test.ts +24 -0
  22. package/src/parser/workflow-parser.ts +65 -3
  23. package/src/runner/auto-heal.test.ts +5 -6
  24. package/src/runner/blueprint-executor.test.ts +2 -2
  25. package/src/runner/debug-repl.test.ts +5 -8
  26. package/src/runner/debug-repl.ts +59 -16
  27. package/src/runner/durable-timers.test.ts +11 -2
  28. package/src/runner/engine-executor.test.ts +1 -1
  29. package/src/runner/events.ts +57 -0
  30. package/src/runner/executors/artifact-executor.ts +166 -0
  31. package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
  32. package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
  33. package/src/runner/executors/file-executor.test.ts +48 -0
  34. package/src/runner/executors/file-executor.ts +324 -0
  35. package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
  36. package/src/runner/executors/human-executor.ts +144 -0
  37. package/src/runner/executors/join-executor.ts +75 -0
  38. package/src/runner/executors/llm-executor.ts +1266 -0
  39. package/src/runner/executors/memory-executor.ts +71 -0
  40. package/src/runner/executors/plan-executor.ts +104 -0
  41. package/src/runner/executors/request-executor.ts +265 -0
  42. package/src/runner/executors/script-executor.ts +43 -0
  43. package/src/runner/executors/shell-executor.ts +403 -0
  44. package/src/runner/executors/subworkflow-executor.ts +114 -0
  45. package/src/runner/executors/types.ts +69 -0
  46. package/src/runner/executors/wait-executor.ts +59 -0
  47. package/src/runner/join-scheduling.test.ts +197 -0
  48. package/src/runner/llm-adapter-runtime.test.ts +209 -0
  49. package/src/runner/llm-adapter.test.ts +419 -24
  50. package/src/runner/llm-adapter.ts +130 -26
  51. package/src/runner/llm-clarification.test.ts +2 -1
  52. package/src/runner/llm-executor.test.ts +532 -17
  53. package/src/runner/mcp-client-audit.test.ts +1 -2
  54. package/src/runner/mcp-client.ts +136 -46
  55. package/src/runner/mcp-manager.test.ts +4 -0
  56. package/src/runner/mcp-server.test.ts +58 -0
  57. package/src/runner/mcp-server.ts +26 -0
  58. package/src/runner/memoization.test.ts +190 -0
  59. package/src/runner/optimization-runner.ts +4 -9
  60. package/src/runner/quality-gate.test.ts +69 -0
  61. package/src/runner/reflexion.test.ts +6 -17
  62. package/src/runner/resource-pool.ts +102 -14
  63. package/src/runner/services/context-builder.ts +144 -0
  64. package/src/runner/services/secret-manager.ts +105 -0
  65. package/src/runner/services/workflow-validator.ts +131 -0
  66. package/src/runner/shell-executor.test.ts +28 -4
  67. package/src/runner/standard-tools-ast.test.ts +196 -0
  68. package/src/runner/standard-tools-execution.test.ts +27 -0
  69. package/src/runner/standard-tools-integration.test.ts +6 -10
  70. package/src/runner/standard-tools.ts +339 -102
  71. package/src/runner/step-executor.test.ts +216 -4
  72. package/src/runner/step-executor.ts +69 -941
  73. package/src/runner/stream-utils.ts +7 -3
  74. package/src/runner/test-harness.ts +20 -1
  75. package/src/runner/timeout.test.ts +10 -0
  76. package/src/runner/timeout.ts +11 -2
  77. package/src/runner/tool-integration.test.ts +1 -1
  78. package/src/runner/wait-step.test.ts +102 -0
  79. package/src/runner/workflow-runner.test.ts +208 -15
  80. package/src/runner/workflow-runner.ts +890 -818
  81. package/src/runner/workflow-scheduler.ts +75 -0
  82. package/src/runner/workflow-state.ts +269 -0
  83. package/src/runner/workflow-subflows.test.ts +13 -12
  84. package/src/scripts/generate-schemas.ts +16 -0
  85. package/src/templates/agents/explore.md +1 -0
  86. package/src/templates/agents/general.md +1 -0
  87. package/src/templates/agents/handoff-router.md +14 -0
  88. package/src/templates/agents/handoff-specialist.md +15 -0
  89. package/src/templates/agents/keystone-architect.md +13 -44
  90. package/src/templates/agents/my-agent.md +1 -0
  91. package/src/templates/agents/software-engineer.md +1 -0
  92. package/src/templates/agents/summarizer.md +1 -0
  93. package/src/templates/agents/test-agent.md +1 -0
  94. package/src/templates/agents/tester.md +1 -0
  95. package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
  96. package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +2 -1
  97. package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
  98. package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
  99. package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
  100. package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
  101. package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
  102. package/src/templates/control-flow/idempotency-example.yaml +30 -0
  103. package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
  104. package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
  105. package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
  106. package/src/templates/features/artifact-example.yaml +39 -0
  107. package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
  108. package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
  109. package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
  110. package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
  111. package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
  112. package/src/templates/features/script-example.yaml +27 -0
  113. package/src/templates/patterns/agent-handoff.yaml +53 -0
  114. package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
  115. package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
  116. package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +1 -0
  117. package/src/templates/{composition-parent.yaml → patterns/composition-parent.yaml} +1 -0
  118. package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
  119. package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
  120. package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
  121. package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
  122. package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
  123. package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
  124. package/src/templates/scaffolding/review-loop.yaml +97 -0
  125. package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
  126. package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
  127. package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
  128. package/src/templates/testing/invalid.yaml +6 -0
  129. package/src/ui/dashboard.tsx +191 -33
  130. package/src/utils/auth-manager.test.ts +337 -0
  131. package/src/utils/auth-manager.ts +157 -61
  132. package/src/utils/blueprint-utils.ts +4 -6
  133. package/src/utils/config-loader.test.ts +2 -0
  134. package/src/utils/config-loader.ts +12 -3
  135. package/src/utils/constants.ts +76 -0
  136. package/src/utils/container.ts +63 -0
  137. package/src/utils/context-injector.test.ts +200 -0
  138. package/src/utils/context-injector.ts +244 -0
  139. package/src/utils/doc-generator.ts +85 -0
  140. package/src/utils/env-filter.ts +45 -0
  141. package/src/utils/json-parser.test.ts +12 -0
  142. package/src/utils/json-parser.ts +30 -5
  143. package/src/utils/logger.ts +12 -1
  144. package/src/utils/mermaid.ts +4 -0
  145. package/src/utils/paths.ts +52 -1
  146. package/src/utils/process-sandbox-worker.test.ts +46 -0
  147. package/src/utils/process-sandbox.ts +227 -14
  148. package/src/utils/redactor.test.ts +11 -6
  149. package/src/utils/redactor.ts +25 -9
  150. package/src/utils/sandbox.ts +3 -0
  151. package/src/runner/llm-executor.ts +0 -638
  152. package/src/runner/shell-executor.ts +0 -366
  153. package/src/templates/invalid.yaml +0 -5
@@ -0,0 +1,1266 @@
1
+ import type { ExpressionContext } from '../../expression/evaluator';
2
+ import { ExpressionEvaluator } from '../../expression/evaluator';
3
+ import { parseAgent, resolveAgentPath } from '../../parser/agent-parser';
4
+ import type { Agent, LlmStep, Step } from '../../parser/schema';
5
+ import { ConfigLoader } from '../../utils/config-loader';
6
+ import { LIMITS, LLM } from '../../utils/constants';
7
+ import { ContextInjector } from '../../utils/context-injector';
8
+ import { extractJson } from '../../utils/json-parser';
9
+ import { ConsoleLogger, type Logger } from '../../utils/logger.ts';
10
+ import { RedactionBuffer, Redactor } from '../../utils/redactor';
11
+ import type { WorkflowEvent } from '../events.ts';
12
+ import { type LLMAdapter, type LLMMessage, type LLMResponse, getAdapter } from '../llm-adapter';
13
+ import { MCPClient } from '../mcp-client';
14
+ import type { MCPManager, MCPServerConfig } from '../mcp-manager';
15
+ import { STANDARD_TOOLS, validateStandardToolSecurity } from '../standard-tools';
16
+ import type { StepResult } from './types.ts';
17
+
18
+ // Re-export for local use with shorter names
19
+ const {
20
+ SUMMARY_MESSAGE_NAME,
21
+ SUMMARY_MESSAGE_MAX_BYTES,
22
+ SUMMARY_INPUT_MESSAGE_MAX_BYTES,
23
+ SUMMARY_INPUT_TOTAL_MAX_BYTES,
24
+ SUMMARY_MODEL_BY_PROVIDER_TYPE,
25
+ THINKING_OPEN_TAG,
26
+ THINKING_CLOSE_TAG,
27
+ TRANSFER_TOOL_NAME,
28
+ CONTEXT_UPDATE_KEY,
29
+ } = LLM;
30
+
31
+ type LlmEventContext = {
32
+ runId?: string;
33
+ workflow?: string;
34
+ };
35
+
36
+ class ThoughtStreamParser {
37
+ private buffer = '';
38
+ private thoughtBuffer = '';
39
+ private inThinking = false;
40
+
41
+ process(chunk: string): { output: string; thoughts: string[] } {
42
+ this.buffer += chunk;
43
+ const thoughts: string[] = [];
44
+ let output = '';
45
+
46
+ while (this.buffer.length > 0) {
47
+ const lower = this.buffer.toLowerCase();
48
+ if (!this.inThinking) {
49
+ const openIndex = lower.indexOf(THINKING_OPEN_TAG);
50
+ if (openIndex === -1) {
51
+ const keep = Math.max(0, this.buffer.length - (THINKING_OPEN_TAG.length - 1));
52
+ output += this.buffer.slice(0, keep);
53
+ this.buffer = this.buffer.slice(keep);
54
+ break;
55
+ }
56
+ output += this.buffer.slice(0, openIndex);
57
+ this.buffer = this.buffer.slice(openIndex + THINKING_OPEN_TAG.length);
58
+ this.inThinking = true;
59
+ continue;
60
+ }
61
+
62
+ const closeIndex = lower.indexOf(THINKING_CLOSE_TAG);
63
+ if (closeIndex === -1) {
64
+ const keep = Math.max(0, this.buffer.length - (THINKING_CLOSE_TAG.length - 1));
65
+ this.thoughtBuffer += this.buffer.slice(0, keep);
66
+ this.buffer = this.buffer.slice(keep);
67
+ break;
68
+ }
69
+ this.thoughtBuffer += this.buffer.slice(0, closeIndex);
70
+ this.buffer = this.buffer.slice(closeIndex + THINKING_CLOSE_TAG.length);
71
+ this.inThinking = false;
72
+ const thought = this.thoughtBuffer.trim();
73
+ if (thought) {
74
+ thoughts.push(thought);
75
+ }
76
+ this.thoughtBuffer = '';
77
+ }
78
+
79
+ return { output, thoughts };
80
+ }
81
+
82
+ flush(): { output: string; thoughts: string[] } {
83
+ const thoughts: string[] = [];
84
+ let output = '';
85
+
86
+ if (this.inThinking) {
87
+ this.thoughtBuffer += this.buffer;
88
+ const thought = this.thoughtBuffer.trim();
89
+ if (thought) {
90
+ thoughts.push(thought);
91
+ }
92
+ } else {
93
+ output = this.buffer;
94
+ }
95
+
96
+ this.buffer = '';
97
+ this.thoughtBuffer = '';
98
+ this.inThinking = false;
99
+ return { output, thoughts };
100
+ }
101
+ }
102
+
103
+ /**
104
+ * Truncate message history to prevent unbounded memory growth.
105
+ * Preserves system messages and keeps the most recent messages.
106
+ */
107
+ function estimateMessageBytes(message: LLMMessage): number {
108
+ let size = 0;
109
+ if (typeof message.content === 'string') {
110
+ size += Buffer.byteLength(message.content, 'utf8');
111
+ }
112
+ if (message.tool_calls) {
113
+ size += Buffer.byteLength(JSON.stringify(message.tool_calls), 'utf8');
114
+ }
115
+ if (message.reasoning) {
116
+ size += Buffer.byteLength(JSON.stringify(message.reasoning), 'utf8');
117
+ }
118
+ if (message.name) {
119
+ size += Buffer.byteLength(message.name, 'utf8');
120
+ }
121
+ return size;
122
+ }
123
+
124
+ function truncateStringByBytes(value: string, maxBytes: number): string {
125
+ if (maxBytes <= 0) return '';
126
+ if (Buffer.byteLength(value, 'utf8') <= maxBytes) return value;
127
+
128
+ let low = 0;
129
+ let high = value.length;
130
+ while (low < high) {
131
+ const mid = Math.ceil((low + high) / 2);
132
+ const slice = value.slice(0, mid);
133
+ if (Buffer.byteLength(slice, 'utf8') <= maxBytes) {
134
+ low = mid;
135
+ } else {
136
+ high = mid - 1;
137
+ }
138
+ }
139
+ return value.slice(0, low);
140
+ }
141
+
142
+ function truncateToolOutput(content: string, maxBytes: number): string {
143
+ const contentBytes = Buffer.byteLength(content, 'utf8');
144
+ if (contentBytes <= maxBytes) return content;
145
+
146
+ const suffix = '... [truncated output]';
147
+ const suffixBytes = Buffer.byteLength(suffix, 'utf8');
148
+ const truncated = truncateStringByBytes(content, Math.max(0, maxBytes - suffixBytes));
149
+ return `${truncated}${suffix}`;
150
+ }
151
+
152
+ function safeJsonStringify(value: unknown): string {
153
+ try {
154
+ return JSON.stringify(value);
155
+ } catch {
156
+ const seen = new WeakSet<object>();
157
+ try {
158
+ return JSON.stringify(value, (_key, val) => {
159
+ if (typeof val === 'bigint') return val.toString();
160
+ if (typeof val === 'object' && val !== null) {
161
+ if (seen.has(val)) return '[Circular]';
162
+ seen.add(val);
163
+ }
164
+ return val;
165
+ });
166
+ } catch {
167
+ return String(value);
168
+ }
169
+ }
170
+ }
171
+
172
+ function truncateMessages(
173
+ messages: LLMMessage[],
174
+ maxHistory: number,
175
+ maxBytes: number
176
+ ): LLMMessage[] {
177
+ if (messages.length === 0) return messages;
178
+
179
+ // Keep all system messages
180
+ const systemMessages = messages.filter((m) => m.role === 'system');
181
+ const nonSystem = messages.filter((m) => m.role !== 'system');
182
+
183
+ // Keep most recent non-system messages, accounting for system messages
184
+ const nonSystemLimit = Math.max(0, maxHistory - systemMessages.length);
185
+ let keep = nonSystem.slice(-nonSystemLimit);
186
+
187
+ // Enforce total byte budget with a most-recent tail
188
+ if (maxBytes > 0) {
189
+ const systemBytes = systemMessages.reduce((total, msg) => total + estimateMessageBytes(msg), 0);
190
+ let remaining = maxBytes - systemBytes;
191
+ if (remaining <= 0) {
192
+ return systemMessages;
193
+ }
194
+
195
+ const tail: LLMMessage[] = [];
196
+ for (let i = keep.length - 1; i >= 0; i--) {
197
+ const msg = keep[i];
198
+ const msgBytes = estimateMessageBytes(msg);
199
+ if (msgBytes > remaining) break;
200
+ tail.push(msg);
201
+ remaining -= msgBytes;
202
+ }
203
+ keep = tail.reverse();
204
+ }
205
+
206
+ return [...systemMessages, ...keep];
207
+ }
208
+
209
+ function extractThoughtBlocks(content: string): { content: string; thoughts: string[] } {
210
+ if (!content) return { content, thoughts: [] };
211
+ const thoughts: string[] = [];
212
+ let remaining = content;
213
+
214
+ while (true) {
215
+ const lower = remaining.toLowerCase();
216
+ const openIndex = lower.indexOf(THINKING_OPEN_TAG);
217
+ if (openIndex === -1) break;
218
+ const closeIndex = lower.indexOf(THINKING_CLOSE_TAG, openIndex + THINKING_OPEN_TAG.length);
219
+ if (closeIndex === -1) break;
220
+
221
+ const before = remaining.slice(0, openIndex);
222
+ const thought = remaining.slice(openIndex + THINKING_OPEN_TAG.length, closeIndex).trim();
223
+ const after = remaining.slice(closeIndex + THINKING_CLOSE_TAG.length);
224
+ if (thought) {
225
+ thoughts.push(thought);
226
+ }
227
+ remaining = `${before}${after}`;
228
+ }
229
+
230
+ return { content: remaining, thoughts };
231
+ }
232
+
233
+ function estimateConversationBytes(messages: LLMMessage[]): number {
234
+ return messages.reduce((total, msg) => total + estimateMessageBytes(msg), 0);
235
+ }
236
+
237
+ function resolveSummaryModel(fullModelString: string, resolvedModel: string): string {
238
+ try {
239
+ const providerName = ConfigLoader.getProviderForModel(fullModelString);
240
+ const config = ConfigLoader.load();
241
+ const providerType = config.providers[providerName]?.type;
242
+ return SUMMARY_MODEL_BY_PROVIDER_TYPE[providerType] ?? resolvedModel;
243
+ } catch {
244
+ return resolvedModel;
245
+ }
246
+ }
247
+
248
+ function formatMessageForSummary(message: LLMMessage): string {
249
+ const roleLabel = message.name ? `${message.role}(${message.name})` : message.role;
250
+ const parts: string[] = [];
251
+
252
+ if (typeof message.content === 'string' && message.content.length > 0) {
253
+ parts.push(message.content);
254
+ }
255
+ if (message.tool_calls && message.tool_calls.length > 0) {
256
+ parts.push(`tool_calls: ${safeJsonStringify(message.tool_calls)}`);
257
+ }
258
+ if (message.reasoning?.summary) {
259
+ parts.push(`reasoning_summary: ${message.reasoning.summary}`);
260
+ }
261
+
262
+ const combined = parts.join('\n').trim();
263
+ const trimmed = combined ? truncateStringByBytes(combined, SUMMARY_INPUT_MESSAGE_MAX_BYTES) : '';
264
+ return `[${roleLabel}]${trimmed ? ` ${trimmed}` : ''}`;
265
+ }
266
+
267
+ function buildSummaryInput(messages: LLMMessage[]): string {
268
+ const lines: string[] = [];
269
+ let remaining = SUMMARY_INPUT_TOTAL_MAX_BYTES;
270
+
271
+ for (const message of messages) {
272
+ const formatted = formatMessageForSummary(message);
273
+ const bytes = Buffer.byteLength(formatted, 'utf8');
274
+ if (bytes > remaining) {
275
+ if (remaining > 0) {
276
+ lines.push(truncateStringByBytes(formatted, remaining));
277
+ }
278
+ break;
279
+ }
280
+ lines.push(formatted);
281
+ remaining -= bytes;
282
+ }
283
+
284
+ return lines.join('\n');
285
+ }
286
+
287
+ async function summarizeMessagesIfNeeded(
288
+ messages: LLMMessage[],
289
+ options: {
290
+ maxHistory: number;
291
+ maxBytes: number;
292
+ adapter: LLMAdapter;
293
+ summaryModel: string;
294
+ abortSignal?: AbortSignal;
295
+ }
296
+ ): Promise<{ messages: LLMMessage[]; usage?: LLMResponse['usage']; summarized: boolean }> {
297
+ const systemMessages = messages.filter(
298
+ (m) => m.role === 'system' && m.name !== SUMMARY_MESSAGE_NAME
299
+ );
300
+ const summaryMessages = messages.filter(
301
+ (m) => m.role === 'system' && m.name === SUMMARY_MESSAGE_NAME
302
+ );
303
+ const nonSystemMessages = messages.filter((m) => m.role !== 'system');
304
+
305
+ const maxNonSystem = Math.max(0, options.maxHistory - systemMessages.length - 1);
306
+ const overCount = nonSystemMessages.length > maxNonSystem;
307
+ const overBytes = options.maxBytes > 0 && estimateConversationBytes(messages) > options.maxBytes;
308
+
309
+ if (!overCount && !overBytes) {
310
+ return { messages, summarized: false };
311
+ }
312
+
313
+ if (maxNonSystem <= 0) {
314
+ return {
315
+ messages: truncateMessages(messages, options.maxHistory, options.maxBytes),
316
+ summarized: false,
317
+ };
318
+ }
319
+
320
+ const systemBytes = systemMessages.reduce((total, msg) => total + estimateMessageBytes(msg), 0);
321
+ const availableBytes =
322
+ options.maxBytes > 0
323
+ ? options.maxBytes - systemBytes - SUMMARY_MESSAGE_MAX_BYTES
324
+ : Number.POSITIVE_INFINITY;
325
+
326
+ const tail: LLMMessage[] = [];
327
+ let tailBytes = 0;
328
+ for (let i = nonSystemMessages.length - 1; i >= 0; i--) {
329
+ if (tail.length >= maxNonSystem) break;
330
+ const msgBytes = estimateMessageBytes(nonSystemMessages[i]);
331
+ if (options.maxBytes > 0 && tailBytes + msgBytes > availableBytes) {
332
+ break;
333
+ }
334
+ tail.push(nonSystemMessages[i]);
335
+ tailBytes += msgBytes;
336
+ }
337
+
338
+ const keepCount = tail.length;
339
+ const summarizeCount = nonSystemMessages.length - keepCount;
340
+ if (summarizeCount <= 0) {
341
+ return { messages, summarized: false };
342
+ }
343
+
344
+ const toSummarize = nonSystemMessages.slice(0, summarizeCount);
345
+ const existingSummary = summaryMessages
346
+ .map((m) => (typeof m.content === 'string' ? m.content : ''))
347
+ .filter((content) => content.trim().length > 0)
348
+ .join('\n');
349
+ const summaryInput = buildSummaryInput(toSummarize);
350
+
351
+ if (!summaryInput.trim() && !existingSummary.trim()) {
352
+ return {
353
+ messages: truncateMessages(messages, options.maxHistory, options.maxBytes),
354
+ summarized: false,
355
+ };
356
+ }
357
+
358
+ const promptParts: string[] = [];
359
+ if (existingSummary.trim()) {
360
+ promptParts.push(`Existing summary:\n${existingSummary}`);
361
+ }
362
+ if (summaryInput.trim()) {
363
+ promptParts.push(`Messages to summarize:\n${summaryInput}`);
364
+ }
365
+
366
+ const response = await options.adapter.chat(
367
+ [
368
+ {
369
+ role: 'system',
370
+ content:
371
+ 'Summarize the conversation history for continued work. Focus on decisions, constraints, outputs, and open questions. Be concise and factual. Use short bullet points.',
372
+ },
373
+ {
374
+ role: 'user',
375
+ content: promptParts.join('\n\n'),
376
+ },
377
+ ],
378
+ {
379
+ model: options.summaryModel,
380
+ signal: options.abortSignal,
381
+ }
382
+ );
383
+
384
+ const summaryText =
385
+ typeof response.message.content === 'string' ? response.message.content.trim() : '';
386
+ if (!summaryText) {
387
+ throw new Error('Summary model returned empty content');
388
+ }
389
+
390
+ const summaryContent = truncateStringByBytes(
391
+ `Context summary:\n${summaryText}`,
392
+ SUMMARY_MESSAGE_MAX_BYTES
393
+ );
394
+
395
+ const summaryMessage: LLMMessage = {
396
+ role: 'system',
397
+ name: SUMMARY_MESSAGE_NAME,
398
+ content: summaryContent,
399
+ };
400
+
401
+ const combinedMessages = [...systemMessages, summaryMessage, ...tail.reverse()];
402
+
403
+ return {
404
+ messages: truncateMessages(combinedMessages, options.maxHistory, options.maxBytes),
405
+ usage: response.usage,
406
+ summarized: true,
407
+ };
408
+ }
409
+
410
+ interface ToolDefinition {
411
+ name: string;
412
+ description?: string;
413
+ parameters: unknown;
414
+ source: 'agent' | 'step' | 'mcp' | 'standard' | 'handoff';
415
+ execution?: Step;
416
+ mcpClient?: MCPClient;
417
+ }
418
+
419
+ export async function executeLlmStep(
420
+ step: LlmStep,
421
+ context: ExpressionContext,
422
+ executeStepFn: (step: Step, context: ExpressionContext) => Promise<StepResult>,
423
+ logger: Logger = new ConsoleLogger(),
424
+ mcpManager?: MCPManager,
425
+ workflowDir?: string,
426
+ abortSignal?: AbortSignal,
427
+ getAdapterFn?: typeof getAdapter,
428
+ emitEvent?: (event: WorkflowEvent) => void,
429
+ eventContext?: LlmEventContext
430
+ ): Promise<StepResult> {
431
+ const agentName = ExpressionEvaluator.evaluateString(step.agent, context);
432
+ const agentPath = resolveAgentPath(agentName, workflowDir);
433
+ let activeAgent = parseAgent(agentPath);
434
+
435
+ const providerRaw = step.provider || activeAgent.provider;
436
+ const modelRaw = step.model || activeAgent.model || 'gpt-4o';
437
+
438
+ const provider = providerRaw
439
+ ? ExpressionEvaluator.evaluateString(providerRaw, context)
440
+ : undefined;
441
+ const model = ExpressionEvaluator.evaluateString(modelRaw, context);
442
+ const prompt = ExpressionEvaluator.evaluateString(step.prompt, context);
443
+
444
+ const fullModelString = provider ? `${provider}:${model}` : model;
445
+ const { adapter, resolvedModel } = (getAdapterFn || getAdapter)(fullModelString);
446
+
447
+ const buildSystemPrompt = (agent: Agent): string => {
448
+ let systemPrompt = ExpressionEvaluator.evaluateString(agent.systemPrompt, context);
449
+
450
+ // Inject project context if enabled
451
+ const projectContext = ContextInjector.getContext(workflowDir || process.cwd(), []);
452
+ const contextAddition = ContextInjector.generateSystemPromptAddition(projectContext);
453
+ if (contextAddition) {
454
+ systemPrompt = `${contextAddition}\n\n${systemPrompt}`;
455
+ }
456
+
457
+ if (step.outputSchema) {
458
+ systemPrompt += `\n\nIMPORTANT: You must output valid JSON that matches the following schema:\n${JSON.stringify(step.outputSchema, null, 2)}`;
459
+ }
460
+ return systemPrompt;
461
+ };
462
+ let systemPrompt = buildSystemPrompt(activeAgent);
463
+
464
+ let messages: LLMMessage[] = [];
465
+ const maxToolOutputBytes = LIMITS.MAX_TOOL_OUTPUT_BYTES;
466
+ const updateSystemPromptMessage = (newPrompt: string) => {
467
+ const systemMessage = messages.find(
468
+ (message) => message.role === 'system' && message.name !== SUMMARY_MESSAGE_NAME
469
+ );
470
+ if (systemMessage) {
471
+ systemMessage.content = newPrompt;
472
+ return;
473
+ }
474
+ messages.unshift({ role: 'system', content: newPrompt });
475
+ };
476
+
477
+ // Resume from state if provided
478
+ const stepState =
479
+ context.steps && typeof context.steps === 'object'
480
+ ? (context.steps as Record<string, { output?: unknown }>)[step.id]
481
+ : undefined;
482
+ const stepOutput = stepState?.output;
483
+ const resumeOutput =
484
+ stepOutput && typeof stepOutput === 'object' && 'messages' in stepOutput
485
+ ? stepOutput
486
+ : context.output;
487
+
488
+ if (resumeOutput && typeof resumeOutput === 'object' && 'messages' in resumeOutput) {
489
+ messages.push(...(resumeOutput.messages as LLMMessage[]));
490
+
491
+ // If we have an answer in inputs, add it as a tool result for the last tool call
492
+ const stepInputs = context.inputs?.[step.id] as Record<string, unknown> | undefined;
493
+ if (stepInputs && typeof stepInputs === 'object' && '__answer' in stepInputs) {
494
+ const answer = stepInputs.__answer;
495
+ const lastMessage = messages[messages.length - 1];
496
+ const askCall = lastMessage?.tool_calls?.find((tc) => tc.function.name === 'ask');
497
+ if (askCall) {
498
+ messages.push({
499
+ role: 'tool',
500
+ tool_call_id: askCall.id,
501
+ name: 'ask',
502
+ content: truncateToolOutput(String(answer), maxToolOutputBytes),
503
+ });
504
+ }
505
+ }
506
+ updateSystemPromptMessage(systemPrompt);
507
+ } else {
508
+ messages.push({ role: 'system', content: systemPrompt }, { role: 'user', content: prompt });
509
+ }
510
+
511
+ const localMcpClients: MCPClient[] = [];
512
+ const baseTools: ToolDefinition[] = [];
513
+
514
+ try {
515
+ const registerBaseTool = (tool: ToolDefinition) => {
516
+ baseTools.push(tool);
517
+ };
518
+
519
+ // 1. Add step tools
520
+ if (step.tools) {
521
+ for (const tool of step.tools) {
522
+ registerBaseTool({
523
+ name: tool.name,
524
+ description: tool.description,
525
+ parameters: tool.parameters || {
526
+ type: 'object',
527
+ properties: {},
528
+ additionalProperties: true,
529
+ },
530
+ source: 'step',
531
+ execution: tool.execution,
532
+ });
533
+ }
534
+ }
535
+
536
+ // 2. Add Standard tools
537
+ if (step.useStandardTools) {
538
+ for (const tool of STANDARD_TOOLS) {
539
+ registerBaseTool({
540
+ name: tool.name,
541
+ description: tool.description,
542
+ parameters: tool.parameters || {
543
+ type: 'object',
544
+ properties: {},
545
+ additionalProperties: true,
546
+ },
547
+ source: 'standard',
548
+ execution: tool.execution,
549
+ });
550
+ }
551
+ }
552
+
553
+ // 3. Add Engine handoff tool
554
+ if (step.handoff) {
555
+ const toolName = step.handoff.name || 'handoff';
556
+ const description =
557
+ step.handoff.description || `Delegate to engine ${step.handoff.engine.command}`;
558
+ const parameters = step.handoff.inputSchema || {
559
+ type: 'object',
560
+ properties: {},
561
+ additionalProperties: true,
562
+ };
563
+
564
+ const handoffStep: Step = {
565
+ id: `${step.id}-handoff`,
566
+ type: 'engine',
567
+ command: step.handoff.engine.command,
568
+ args: step.handoff.engine.args,
569
+ env: step.handoff.engine.env,
570
+ cwd: step.handoff.engine.cwd,
571
+ timeout: step.handoff.engine.timeout,
572
+ outputSchema: step.handoff.engine.outputSchema,
573
+ input: step.handoff.engine.input ?? '${{ args }}',
574
+ };
575
+
576
+ registerBaseTool({
577
+ name: toolName,
578
+ description,
579
+ parameters,
580
+ source: 'handoff',
581
+ execution: handoffStep,
582
+ });
583
+ }
584
+
585
+ // 4. Add MCP tools
586
+ const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
587
+ if (step.useGlobalMcp && mcpManager) {
588
+ const globalServers = mcpManager.getGlobalServers();
589
+ for (const globalServer of globalServers) {
590
+ // Only add if not already explicitly listed
591
+ const alreadyListed = mcpServersToConnect.some((s) => {
592
+ const name = typeof s === 'string' ? s : s.name;
593
+ return name === globalServer.name;
594
+ });
595
+ if (!alreadyListed) {
596
+ mcpServersToConnect.push(globalServer);
597
+ }
598
+ }
599
+ }
600
+
601
+ if (mcpServersToConnect.length > 0) {
602
+ await Promise.all(
603
+ mcpServersToConnect.map(async (server) => {
604
+ let client: MCPClient | undefined;
605
+ const serverName = typeof server === 'string' ? server : server.name;
606
+
607
+ try {
608
+ if (mcpManager) {
609
+ client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
610
+ } else {
611
+ // Fallback if no manager (should not happen in normal workflow run)
612
+ if (typeof server === 'string') {
613
+ logger.error(
614
+ ` ✗ Cannot reference global MCP server '${server}' without MCPManager`
615
+ );
616
+ return;
617
+ }
618
+ logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
619
+ client = await MCPClient.createLocal(
620
+ (server as MCPServerConfig).command || 'node',
621
+ (server as MCPServerConfig).args || [],
622
+ (server as MCPServerConfig).env || {}
623
+ );
624
+ await client.initialize();
625
+ localMcpClients.push(client);
626
+ }
627
+
628
+ if (client) {
629
+ const mcpTools = await client.listTools();
630
+ for (const tool of mcpTools) {
631
+ registerBaseTool({
632
+ name: tool.name,
633
+ description: tool.description,
634
+ parameters: tool.inputSchema,
635
+ source: 'mcp',
636
+ mcpClient: client,
637
+ });
638
+ }
639
+ }
640
+ } catch (error) {
641
+ logger.error(
642
+ ` ✗ Failed to list tools from MCP server ${serverName}: ${error instanceof Error ? error.message : String(error)}`
643
+ );
644
+ if (!mcpManager && client) {
645
+ client.stop();
646
+ }
647
+ }
648
+ })
649
+ );
650
+ }
651
+
652
+ const buildToolsForAgent = (agent: Agent) => {
653
+ const allTools: ToolDefinition[] = [];
654
+ const toolRegistry = new Map<string, string>();
655
+ const registerTool = (tool: ToolDefinition) => {
656
+ const existing = toolRegistry.get(tool.name);
657
+ if (existing) {
658
+ throw new Error(
659
+ `Duplicate tool name "${tool.name}" from ${tool.source}; already defined by ${existing}. Rename one of them.`
660
+ );
661
+ }
662
+ toolRegistry.set(tool.name, tool.source);
663
+ allTools.push(tool);
664
+ };
665
+
666
+ for (const tool of agent.tools) {
667
+ registerTool({
668
+ name: tool.name,
669
+ description: tool.description,
670
+ parameters: tool.parameters || {
671
+ type: 'object',
672
+ properties: {},
673
+ additionalProperties: true,
674
+ },
675
+ source: 'agent',
676
+ execution: tool.execution,
677
+ });
678
+ }
679
+
680
+ for (const tool of baseTools) {
681
+ registerTool(tool);
682
+ }
683
+
684
+ const llmTools = allTools.map((t) => ({
685
+ type: 'function' as const,
686
+ function: {
687
+ name: t.name,
688
+ description: t.description,
689
+ parameters: t.parameters as Record<string, unknown>,
690
+ },
691
+ }));
692
+
693
+ if (step.allowClarification) {
694
+ if (toolRegistry.has('ask')) {
695
+ throw new Error(
696
+ 'Tool name "ask" is reserved for clarification. Rename your tool or disable allowClarification.'
697
+ );
698
+ }
699
+ llmTools.push({
700
+ type: 'function' as const,
701
+ function: {
702
+ name: 'ask',
703
+ description:
704
+ 'Ask the user a clarifying question if the initial request is ambiguous or missing information.',
705
+ parameters: {
706
+ type: 'object',
707
+ properties: {
708
+ question: {
709
+ type: 'string',
710
+ description: 'The question to ask the user',
711
+ },
712
+ },
713
+ required: ['question'],
714
+ } as Record<string, unknown>,
715
+ },
716
+ });
717
+ }
718
+
719
+ if (step.allowedHandoffs && step.allowedHandoffs.length > 0) {
720
+ if (toolRegistry.has(TRANSFER_TOOL_NAME)) {
721
+ throw new Error(
722
+ `Tool name "${TRANSFER_TOOL_NAME}" is reserved for agent handoffs. Rename your tool or disable allowedHandoffs.`
723
+ );
724
+ }
725
+ llmTools.push({
726
+ type: 'function' as const,
727
+ function: {
728
+ name: TRANSFER_TOOL_NAME,
729
+ description: `Transfer control to another agent. Allowed agents: ${step.allowedHandoffs.join(', ')}`,
730
+ parameters: {
731
+ type: 'object',
732
+ properties: {
733
+ agent_name: {
734
+ type: 'string',
735
+ description: 'The name of the agent to transfer to',
736
+ },
737
+ },
738
+ required: ['agent_name'],
739
+ } as Record<string, unknown>,
740
+ },
741
+ });
742
+ }
743
+
744
+ return { allTools, llmTools };
745
+ };
746
+
747
+ let allTools: ToolDefinition[] = [];
748
+ let llmTools: {
749
+ type: 'function';
750
+ function: { name: string; description?: string; parameters: Record<string, unknown> };
751
+ }[] = [];
752
+
753
+ const refreshToolsForAgent = (agent: Agent) => {
754
+ const toolSet = buildToolsForAgent(agent);
755
+ allTools = toolSet.allTools;
756
+ llmTools = toolSet.llmTools;
757
+ };
758
+
759
+ refreshToolsForAgent(activeAgent);
760
+ const applyContextUpdate = (value: unknown): unknown => {
761
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
762
+ return value;
763
+ }
764
+
765
+ const record = value as Record<string, unknown>;
766
+ if (!(CONTEXT_UPDATE_KEY in record)) {
767
+ return value;
768
+ }
769
+
770
+ const update = record[CONTEXT_UPDATE_KEY];
771
+ if (update && typeof update === 'object' && !Array.isArray(update)) {
772
+ const updateRecord = update as Record<string, unknown>;
773
+
774
+ if (
775
+ updateRecord.env &&
776
+ typeof updateRecord.env === 'object' &&
777
+ !Array.isArray(updateRecord.env)
778
+ ) {
779
+ const envUpdates = updateRecord.env as Record<string, unknown>;
780
+ context.env = context.env ?? {};
781
+ context.envOverrides = context.envOverrides ?? {};
782
+ for (const [key, val] of Object.entries(envUpdates)) {
783
+ if (val === undefined) continue;
784
+ const stringValue =
785
+ typeof val === 'string'
786
+ ? val
787
+ : (() => {
788
+ const json = safeJsonStringify(val);
789
+ return typeof json === 'string' ? json : String(val);
790
+ })();
791
+ context.env[key] = stringValue;
792
+ context.envOverrides[key] = stringValue;
793
+ }
794
+ }
795
+
796
+ if (
797
+ updateRecord.memory &&
798
+ typeof updateRecord.memory === 'object' &&
799
+ !Array.isArray(updateRecord.memory)
800
+ ) {
801
+ context.memory = context.memory ?? {};
802
+ Object.assign(context.memory, updateRecord.memory as Record<string, unknown>);
803
+ }
804
+ }
805
+
806
+ const { [CONTEXT_UPDATE_KEY]: _ignored, ...cleaned } = record;
807
+ return cleaned;
808
+ };
809
+ const applyAgentTransfer = (nextAgent: Agent) => {
810
+ activeAgent = nextAgent;
811
+ systemPrompt = buildSystemPrompt(activeAgent);
812
+ updateSystemPromptMessage(systemPrompt);
813
+ refreshToolsForAgent(activeAgent);
814
+ };
815
+
816
+ // ReAct Loop
817
+ let iterations = 0;
818
+ const maxIterations = step.maxIterations || 10;
819
+ const totalUsage = {
820
+ prompt_tokens: 0,
821
+ completion_tokens: 0,
822
+ total_tokens: 0,
823
+ };
824
+
825
+ // Create redactor once outside the loop for performance (regex compilation)
826
+ const redactor = new Redactor(context.secrets || {}, {
827
+ forcedSecrets: context.secretValues || [],
828
+ });
829
+ const redactionBuffer = new RedactionBuffer(redactor);
830
+ const maxHistory = step.maxMessageHistory || LIMITS.MAX_MESSAGE_HISTORY;
831
+ const maxConversationBytes = LIMITS.MAX_CONVERSATION_BYTES;
832
+ const contextStrategy = step.contextStrategy || 'truncate';
833
+ const summaryModel =
834
+ contextStrategy === 'summary' || contextStrategy === 'auto'
835
+ ? resolveSummaryModel(fullModelString, resolvedModel)
836
+ : resolvedModel;
837
+ const formatToolContent = (content: string): string =>
838
+ truncateToolOutput(content, maxToolOutputBytes);
839
+ const eventTimestamp = () => new Date().toISOString();
840
+ const emitThought = (content: string, source: 'thinking' | 'reasoning') => {
841
+ const trimmed = redactor.redact(content.trim());
842
+ if (!trimmed) return;
843
+ logger.info(`💭 Thought (${source}): ${trimmed}`);
844
+ if (emitEvent && eventContext?.runId && eventContext?.workflow) {
845
+ emitEvent({
846
+ type: 'llm.thought',
847
+ timestamp: eventTimestamp(),
848
+ runId: eventContext.runId,
849
+ workflow: eventContext.workflow,
850
+ stepId: step.id,
851
+ content: trimmed,
852
+ source,
853
+ });
854
+ }
855
+ };
856
+ const thoughtStream = step.outputSchema ? null : new ThoughtStreamParser();
857
+ let streamedThoughts = 0;
858
+ const handleStreamChunk = (chunk: string) => {
859
+ const redactedChunk = redactionBuffer.process(chunk);
860
+ if (!thoughtStream) {
861
+ process.stdout.write(redactedChunk);
862
+ return;
863
+ }
864
+ const parsed = thoughtStream.process(redactedChunk);
865
+ if (parsed.output) {
866
+ process.stdout.write(parsed.output);
867
+ }
868
+ for (const thought of parsed.thoughts) {
869
+ emitThought(thought, 'thinking');
870
+ streamedThoughts += 1;
871
+ }
872
+ };
873
+ const flushStream = () => {
874
+ const flushed = redactionBuffer.flush();
875
+ if (!thoughtStream) {
876
+ process.stdout.write(flushed);
877
+ return;
878
+ }
879
+ const parsed = thoughtStream.process(flushed);
880
+ if (parsed.output) {
881
+ process.stdout.write(parsed.output);
882
+ }
883
+ for (const thought of parsed.thoughts) {
884
+ emitThought(thought, 'thinking');
885
+ streamedThoughts += 1;
886
+ }
887
+ const final = thoughtStream.flush();
888
+ if (final.output) {
889
+ process.stdout.write(final.output);
890
+ }
891
+ for (const thought of final.thoughts) {
892
+ emitThought(thought, 'thinking');
893
+ streamedThoughts += 1;
894
+ }
895
+ };
896
+ const applyContextStrategy = async () => {
897
+ if (contextStrategy === 'summary' || contextStrategy === 'auto') {
898
+ try {
899
+ const result = await summarizeMessagesIfNeeded(messages, {
900
+ maxHistory,
901
+ maxBytes: maxConversationBytes,
902
+ adapter,
903
+ summaryModel,
904
+ abortSignal,
905
+ });
906
+ messages = result.messages;
907
+ if (result.usage) {
908
+ totalUsage.prompt_tokens += result.usage.prompt_tokens;
909
+ totalUsage.completion_tokens += result.usage.completion_tokens;
910
+ totalUsage.total_tokens += result.usage.total_tokens;
911
+ }
912
+ return;
913
+ } catch (error) {
914
+ logger.warn(
915
+ `Context summarization failed: ${error instanceof Error ? error.message : String(error)}`
916
+ );
917
+ }
918
+ }
919
+
920
+ messages = truncateMessages(messages, maxHistory, maxConversationBytes);
921
+ };
922
+
923
+ while (iterations < maxIterations) {
924
+ iterations++;
925
+ if (abortSignal?.aborted) {
926
+ throw new Error('Step canceled');
927
+ }
928
+ streamedThoughts = 0;
929
+
930
+ // Apply context strategy to prevent unbounded growth
931
+ await applyContextStrategy();
932
+ const truncatedMessages = messages;
933
+
934
+ const response = await adapter.chat(truncatedMessages, {
935
+ model: resolvedModel,
936
+ tools: llmTools.length > 0 ? llmTools : undefined,
937
+ onStream: (chunk) => {
938
+ if (!step.outputSchema) {
939
+ handleStreamChunk(chunk);
940
+ }
941
+ },
942
+ signal: abortSignal,
943
+ responseSchema: step.outputSchema,
944
+ });
945
+
946
+ if (!step.outputSchema) {
947
+ flushStream();
948
+ }
949
+
950
+ if (response.usage) {
951
+ totalUsage.prompt_tokens += response.usage.prompt_tokens;
952
+ totalUsage.completion_tokens += response.usage.completion_tokens;
953
+ totalUsage.total_tokens += response.usage.total_tokens;
954
+ }
955
+
956
+ let { message } = response;
957
+ if (typeof message.content === 'string' && message.content.length > 0) {
958
+ const extracted = extractThoughtBlocks(message.content);
959
+ if (extracted.content !== message.content) {
960
+ message = { ...message, content: extracted.content };
961
+ }
962
+ if (streamedThoughts === 0) {
963
+ for (const thought of extracted.thoughts) {
964
+ emitThought(thought, 'thinking');
965
+ }
966
+ }
967
+ }
968
+ if (message.reasoning?.summary) {
969
+ emitThought(message.reasoning.summary, 'reasoning');
970
+ }
971
+
972
+ messages.push(message);
973
+
974
+ // 1. Check for native record_output tool call (forced by Anthropic adapter)
975
+ const recordOutputCall = message.tool_calls?.find(
976
+ (tc) => tc.function.name === 'record_output'
977
+ );
978
+ if (step.outputSchema && recordOutputCall) {
979
+ let output: any;
980
+ try {
981
+ output =
982
+ typeof recordOutputCall.function.arguments === 'string'
983
+ ? JSON.parse(recordOutputCall.function.arguments)
984
+ : recordOutputCall.function.arguments;
985
+ return { status: 'success', output, usage: totalUsage };
986
+ } catch (e) {
987
+ logger.error(`Failed to parse native structured output: ${e}`);
988
+ // Fall through to regular tool execution or retry if needed
989
+ }
990
+ }
991
+
992
+ // 2. Handle direct output if no tool calls
993
+ if (!message.tool_calls || message.tool_calls.length === 0) {
994
+ let output: any = message.content;
995
+
996
+ // If schema is defined, attempt to parse JSON
997
+ if (step.outputSchema) {
998
+ if (typeof output === 'string') {
999
+ try {
1000
+ output = extractJson(output);
1001
+ } catch (e) {
1002
+ const errorMessage = `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}`;
1003
+ logger.error(` ⚠️ ${errorMessage}. Retrying...`);
1004
+
1005
+ messages.push({
1006
+ role: 'user',
1007
+ content: `Error: ${errorMessage}\n\nPlease correct your output to be valid JSON matching the schema.`,
1008
+ });
1009
+ continue;
1010
+ }
1011
+ }
1012
+ }
1013
+
1014
+ return {
1015
+ output,
1016
+ status: 'success',
1017
+ usage: totalUsage,
1018
+ };
1019
+ }
1020
+
1021
+ // 3. Execute tools
1022
+ let pendingTransfer: Agent | null = null;
1023
+ for (const toolCall of message.tool_calls) {
1024
+ if (abortSignal?.aborted) {
1025
+ throw new Error('Step canceled');
1026
+ }
1027
+ const argsStr = toolCall.function.arguments;
1028
+ let displayArgs = '';
1029
+ try {
1030
+ const parsedArgs = JSON.parse(argsStr);
1031
+ const keys = Object.keys(parsedArgs);
1032
+ if (keys.length > 0) {
1033
+ const formatted = JSON.stringify(parsedArgs);
1034
+ displayArgs = formatted.length > 100 ? `${formatted.substring(0, 100)}...` : formatted;
1035
+ }
1036
+ } catch (e) {
1037
+ displayArgs = argsStr.length > 100 ? `${argsStr.substring(0, 100)}...` : argsStr;
1038
+ }
1039
+
1040
+ logger.log(
1041
+ ` 🛠️ Tool Call: ${toolCall.function.name}${displayArgs ? ` ${displayArgs}` : ''}`
1042
+ );
1043
+ const toolInfo = allTools.find((t) => t.name === toolCall.function.name);
1044
+
1045
+ if (!toolInfo) {
1046
+ if (toolCall.function.name === TRANSFER_TOOL_NAME) {
1047
+ if (!step.allowedHandoffs || step.allowedHandoffs.length === 0) {
1048
+ messages.push({
1049
+ role: 'tool',
1050
+ tool_call_id: toolCall.id,
1051
+ name: TRANSFER_TOOL_NAME,
1052
+ content: formatToolContent('Error: Agent handoffs are not enabled for this step.'),
1053
+ });
1054
+ continue;
1055
+ }
1056
+
1057
+ let args: { agent_name?: string };
1058
+ try {
1059
+ args = JSON.parse(toolCall.function.arguments);
1060
+ } catch (e) {
1061
+ messages.push({
1062
+ role: 'tool',
1063
+ tool_call_id: toolCall.id,
1064
+ name: TRANSFER_TOOL_NAME,
1065
+ content: formatToolContent(
1066
+ `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`
1067
+ ),
1068
+ });
1069
+ continue;
1070
+ }
1071
+
1072
+ if (!args.agent_name || typeof args.agent_name !== 'string') {
1073
+ messages.push({
1074
+ role: 'tool',
1075
+ tool_call_id: toolCall.id,
1076
+ name: TRANSFER_TOOL_NAME,
1077
+ content: formatToolContent('Error: "agent_name" must be a string.'),
1078
+ });
1079
+ continue;
1080
+ }
1081
+
1082
+ if (!step.allowedHandoffs.includes(args.agent_name)) {
1083
+ messages.push({
1084
+ role: 'tool',
1085
+ tool_call_id: toolCall.id,
1086
+ name: TRANSFER_TOOL_NAME,
1087
+ content: formatToolContent(
1088
+ `Error: Agent "${args.agent_name}" is not allowed for this step.`
1089
+ ),
1090
+ });
1091
+ continue;
1092
+ }
1093
+
1094
+ try {
1095
+ const nextAgentPath = resolveAgentPath(args.agent_name, workflowDir);
1096
+ const nextAgent = parseAgent(nextAgentPath);
1097
+ pendingTransfer = nextAgent;
1098
+ logger.log(` 🔁 Handoff: ${activeAgent.name} → ${args.agent_name}`);
1099
+ messages.push({
1100
+ role: 'tool',
1101
+ tool_call_id: toolCall.id,
1102
+ name: TRANSFER_TOOL_NAME,
1103
+ content: formatToolContent(`Transferred to agent ${args.agent_name}.`),
1104
+ });
1105
+ } catch (error) {
1106
+ messages.push({
1107
+ role: 'tool',
1108
+ tool_call_id: toolCall.id,
1109
+ name: TRANSFER_TOOL_NAME,
1110
+ content: formatToolContent(
1111
+ `Error: ${error instanceof Error ? error.message : String(error)}`
1112
+ ),
1113
+ });
1114
+ }
1115
+ continue;
1116
+ }
1117
+
1118
+ if (toolCall.function.name === 'ask' && step.allowClarification) {
1119
+ let args: { question: string };
1120
+ try {
1121
+ args = JSON.parse(toolCall.function.arguments);
1122
+ } catch (e) {
1123
+ messages.push({
1124
+ role: 'tool',
1125
+ tool_call_id: toolCall.id,
1126
+ name: 'ask',
1127
+ content: formatToolContent(
1128
+ `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`
1129
+ ),
1130
+ });
1131
+ continue;
1132
+ }
1133
+
1134
+ if (process.stdin.isTTY) {
1135
+ // In TTY, we can use a human step to get the answer immediately
1136
+ logger.log(`\n🤔 Question from ${activeAgent.name}: ${args.question}`);
1137
+ const result = await executeStepFn(
1138
+ {
1139
+ id: `${step.id}-clarify`,
1140
+ type: 'human',
1141
+ message: args.question,
1142
+ inputType: 'text',
1143
+ } as Step,
1144
+ context
1145
+ );
1146
+
1147
+ messages.push({
1148
+ role: 'tool',
1149
+ tool_call_id: toolCall.id,
1150
+ name: 'ask',
1151
+ content: formatToolContent(String(result.output)),
1152
+ });
1153
+ continue;
1154
+ }
1155
+ // In non-TTY, we suspend
1156
+ await applyContextStrategy();
1157
+ return {
1158
+ status: 'suspended',
1159
+ output: {
1160
+ messages,
1161
+ question: args.question,
1162
+ },
1163
+ usage: totalUsage,
1164
+ };
1165
+ }
1166
+
1167
+ messages.push({
1168
+ role: 'tool',
1169
+ tool_call_id: toolCall.id,
1170
+ name: toolCall.function.name,
1171
+ content: formatToolContent(`Error: Tool ${toolCall.function.name} not found`),
1172
+ });
1173
+ continue;
1174
+ }
1175
+
1176
+ let args: Record<string, unknown>;
1177
+ try {
1178
+ args = JSON.parse(toolCall.function.arguments);
1179
+ } catch (e) {
1180
+ messages.push({
1181
+ role: 'tool',
1182
+ tool_call_id: toolCall.id,
1183
+ name: toolCall.function.name,
1184
+ content: formatToolContent(
1185
+ `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`
1186
+ ),
1187
+ });
1188
+ continue;
1189
+ }
1190
+
1191
+ if (toolInfo.source === 'mcp' && toolInfo.mcpClient) {
1192
+ try {
1193
+ const result = await toolInfo.mcpClient.callTool(toolInfo.name, args);
1194
+ messages.push({
1195
+ role: 'tool',
1196
+ tool_call_id: toolCall.id,
1197
+ name: toolCall.function.name,
1198
+ content: formatToolContent(safeJsonStringify(applyContextUpdate(result))),
1199
+ });
1200
+ } catch (error) {
1201
+ messages.push({
1202
+ role: 'tool',
1203
+ tool_call_id: toolCall.id,
1204
+ name: toolCall.function.name,
1205
+ content: formatToolContent(
1206
+ `Error: ${error instanceof Error ? error.message : String(error)}`
1207
+ ),
1208
+ });
1209
+ }
1210
+ } else if (toolInfo.execution) {
1211
+ // Security validation for standard tools
1212
+ if (toolInfo.source === 'standard') {
1213
+ try {
1214
+ validateStandardToolSecurity(toolInfo.name, args, {
1215
+ allowOutsideCwd: step.allowOutsideCwd,
1216
+ allowInsecure: step.allowInsecure,
1217
+ });
1218
+ } catch (error) {
1219
+ messages.push({
1220
+ role: 'tool',
1221
+ tool_call_id: toolCall.id,
1222
+ name: toolCall.function.name,
1223
+ content: formatToolContent(
1224
+ `Security Error: ${error instanceof Error ? error.message : String(error)}`
1225
+ ),
1226
+ });
1227
+ continue;
1228
+ }
1229
+ }
1230
+
1231
+ // Execute the tool as a step
1232
+ const toolContext: ExpressionContext = {
1233
+ ...context,
1234
+ args, // Use args to pass parameters to tool execution
1235
+ };
1236
+
1237
+ const result = await executeStepFn(toolInfo.execution, toolContext);
1238
+ const toolOutput =
1239
+ result.status === 'success'
1240
+ ? safeJsonStringify(applyContextUpdate(result.output))
1241
+ : `Error: ${result.error}`;
1242
+
1243
+ messages.push({
1244
+ role: 'tool',
1245
+ tool_call_id: toolCall.id,
1246
+ name: toolCall.function.name,
1247
+ content: formatToolContent(toolOutput),
1248
+ });
1249
+ }
1250
+ }
1251
+
1252
+ if (pendingTransfer) {
1253
+ applyAgentTransfer(pendingTransfer);
1254
+ }
1255
+
1256
+ await applyContextStrategy();
1257
+ }
1258
+
1259
+ throw new Error('Max ReAct iterations reached');
1260
+ } finally {
1261
+ // Cleanup LOCAL MCP clients only. Shared clients are managed by MCPManager.
1262
+ for (const client of localMcpClients) {
1263
+ client.stop();
1264
+ }
1265
+ }
1266
+ }