keystone-cli 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/README.md +288 -24
  2. package/package.json +8 -4
  3. package/src/cli.ts +538 -419
  4. package/src/commands/doc.ts +31 -0
  5. package/src/commands/event.ts +29 -0
  6. package/src/commands/graph.ts +37 -0
  7. package/src/commands/index.ts +14 -0
  8. package/src/commands/init.ts +185 -0
  9. package/src/commands/run.ts +124 -0
  10. package/src/commands/schema.ts +40 -0
  11. package/src/commands/utils.ts +78 -0
  12. package/src/commands/validate.ts +111 -0
  13. package/src/db/memory-db.ts +50 -2
  14. package/src/db/workflow-db.test.ts +314 -0
  15. package/src/db/workflow-db.ts +810 -210
  16. package/src/expression/evaluator-audit.test.ts +4 -2
  17. package/src/expression/evaluator.test.ts +14 -1
  18. package/src/expression/evaluator.ts +166 -19
  19. package/src/parser/config-schema.ts +18 -0
  20. package/src/parser/schema.ts +153 -22
  21. package/src/parser/test-schema.ts +6 -6
  22. package/src/parser/workflow-parser.test.ts +24 -0
  23. package/src/parser/workflow-parser.ts +65 -3
  24. package/src/runner/auto-heal.test.ts +5 -6
  25. package/src/runner/blueprint-executor.test.ts +2 -2
  26. package/src/runner/debug-repl.test.ts +5 -8
  27. package/src/runner/debug-repl.ts +59 -16
  28. package/src/runner/durable-timers.test.ts +11 -2
  29. package/src/runner/engine-executor.test.ts +1 -1
  30. package/src/runner/events.ts +57 -0
  31. package/src/runner/executors/artifact-executor.ts +166 -0
  32. package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
  33. package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
  34. package/src/runner/executors/file-executor.test.ts +48 -0
  35. package/src/runner/executors/file-executor.ts +324 -0
  36. package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
  37. package/src/runner/executors/human-executor.ts +144 -0
  38. package/src/runner/executors/join-executor.ts +75 -0
  39. package/src/runner/executors/llm-executor.ts +1266 -0
  40. package/src/runner/executors/memory-executor.ts +71 -0
  41. package/src/runner/executors/plan-executor.ts +104 -0
  42. package/src/runner/executors/request-executor.ts +265 -0
  43. package/src/runner/executors/script-executor.ts +43 -0
  44. package/src/runner/executors/shell-executor.ts +403 -0
  45. package/src/runner/executors/subworkflow-executor.ts +114 -0
  46. package/src/runner/executors/types.ts +69 -0
  47. package/src/runner/executors/wait-executor.ts +59 -0
  48. package/src/runner/join-scheduling.test.ts +197 -0
  49. package/src/runner/llm-adapter-runtime.test.ts +209 -0
  50. package/src/runner/llm-adapter.test.ts +419 -24
  51. package/src/runner/llm-adapter.ts +414 -17
  52. package/src/runner/llm-clarification.test.ts +2 -1
  53. package/src/runner/llm-executor.test.ts +532 -17
  54. package/src/runner/mcp-client-audit.test.ts +1 -2
  55. package/src/runner/mcp-client.ts +136 -46
  56. package/src/runner/mcp-manager.test.ts +4 -0
  57. package/src/runner/mcp-server.test.ts +58 -0
  58. package/src/runner/mcp-server.ts +26 -0
  59. package/src/runner/memoization.test.ts +190 -0
  60. package/src/runner/optimization-runner.ts +4 -9
  61. package/src/runner/quality-gate.test.ts +69 -0
  62. package/src/runner/reflexion.test.ts +6 -17
  63. package/src/runner/resource-pool.ts +102 -14
  64. package/src/runner/services/context-builder.ts +144 -0
  65. package/src/runner/services/secret-manager.ts +105 -0
  66. package/src/runner/services/workflow-validator.ts +131 -0
  67. package/src/runner/shell-executor.test.ts +28 -4
  68. package/src/runner/standard-tools-ast.test.ts +196 -0
  69. package/src/runner/standard-tools-execution.test.ts +27 -0
  70. package/src/runner/standard-tools-integration.test.ts +6 -10
  71. package/src/runner/standard-tools.ts +339 -102
  72. package/src/runner/step-executor.test.ts +216 -4
  73. package/src/runner/step-executor.ts +69 -941
  74. package/src/runner/stream-utils.ts +7 -3
  75. package/src/runner/test-harness.ts +20 -1
  76. package/src/runner/timeout.test.ts +10 -0
  77. package/src/runner/timeout.ts +11 -2
  78. package/src/runner/tool-integration.test.ts +1 -1
  79. package/src/runner/wait-step.test.ts +102 -0
  80. package/src/runner/workflow-runner.test.ts +208 -15
  81. package/src/runner/workflow-runner.ts +890 -818
  82. package/src/runner/workflow-scheduler.ts +75 -0
  83. package/src/runner/workflow-state.ts +269 -0
  84. package/src/runner/workflow-subflows.test.ts +13 -12
  85. package/src/scripts/generate-schemas.ts +16 -0
  86. package/src/templates/agents/explore.md +1 -0
  87. package/src/templates/agents/general.md +1 -0
  88. package/src/templates/agents/handoff-router.md +14 -0
  89. package/src/templates/agents/handoff-specialist.md +15 -0
  90. package/src/templates/agents/keystone-architect.md +13 -44
  91. package/src/templates/agents/my-agent.md +1 -0
  92. package/src/templates/agents/software-engineer.md +1 -0
  93. package/src/templates/agents/summarizer.md +1 -0
  94. package/src/templates/agents/test-agent.md +1 -0
  95. package/src/templates/agents/tester.md +1 -0
  96. package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
  97. package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +2 -1
  98. package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
  99. package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
  100. package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
  101. package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
  102. package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
  103. package/src/templates/control-flow/idempotency-example.yaml +30 -0
  104. package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
  105. package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
  106. package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
  107. package/src/templates/features/artifact-example.yaml +39 -0
  108. package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
  109. package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
  110. package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
  111. package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
  112. package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
  113. package/src/templates/features/script-example.yaml +27 -0
  114. package/src/templates/patterns/agent-handoff.yaml +53 -0
  115. package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
  116. package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
  117. package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +1 -0
  118. package/src/templates/{composition-parent.yaml → patterns/composition-parent.yaml} +1 -0
  119. package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
  120. package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
  121. package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
  122. package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
  123. package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
  124. package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
  125. package/src/templates/scaffolding/review-loop.yaml +97 -0
  126. package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
  127. package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
  128. package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
  129. package/src/templates/testing/invalid.yaml +6 -0
  130. package/src/ui/dashboard.tsx +191 -33
  131. package/src/utils/auth-manager.test.ts +337 -0
  132. package/src/utils/auth-manager.ts +157 -61
  133. package/src/utils/blueprint-utils.ts +4 -6
  134. package/src/utils/config-loader.test.ts +2 -0
  135. package/src/utils/config-loader.ts +12 -3
  136. package/src/utils/constants.ts +76 -0
  137. package/src/utils/container.ts +63 -0
  138. package/src/utils/context-injector.test.ts +200 -0
  139. package/src/utils/context-injector.ts +244 -0
  140. package/src/utils/doc-generator.ts +85 -0
  141. package/src/utils/env-filter.ts +45 -0
  142. package/src/utils/json-parser.test.ts +12 -0
  143. package/src/utils/json-parser.ts +30 -5
  144. package/src/utils/logger.ts +12 -1
  145. package/src/utils/mermaid.ts +4 -0
  146. package/src/utils/paths.ts +52 -1
  147. package/src/utils/process-sandbox-worker.test.ts +46 -0
  148. package/src/utils/process-sandbox.ts +227 -14
  149. package/src/utils/redactor.test.ts +11 -6
  150. package/src/utils/redactor.ts +25 -9
  151. package/src/utils/sandbox.ts +3 -0
  152. package/src/utils/workflow-registry.test.ts +2 -2
  153. package/src/runner/llm-executor.ts +0 -638
  154. package/src/runner/shell-executor.ts +0 -366
  155. package/src/templates/invalid.yaml +0 -5
@@ -5,12 +5,14 @@ import { mkdirSync, writeFileSync } from 'node:fs';
5
5
  import { join } from 'node:path';
6
6
  import { Readable, Writable } from 'node:stream';
7
7
  import type { ExpressionContext } from '../expression/evaluator';
8
+ import { ExpressionEvaluator } from '../expression/evaluator';
9
+ import { parseAgent } from '../parser/agent-parser';
8
10
  import type { LlmStep, Step } from '../parser/schema';
11
+ import { ConsoleLogger, type Logger } from '../utils/logger';
12
+ import { executeLlmStep } from './executors/llm-executor.ts';
9
13
  import type { LLMAdapter, LLMMessage, LLMResponse, LLMTool } from './llm-adapter';
10
- import { executeLlmStep } from './llm-executor';
11
14
  import type { MCPServerConfig } from './mcp-manager';
12
15
  import type { StepResult } from './step-executor';
13
- import type { Logger } from './workflow-runner';
14
16
 
15
17
  // Mock adapters
16
18
  // Instead of mutating prototypes (which causes cross-test contamination),
@@ -187,6 +189,25 @@ tools:
187
189
  ---
188
190
  You are a test agent.`;
189
191
  writeFileSync(join(agentsDir, 'test-agent.md'), agentContent);
192
+
193
+ const handoffTargetContent = `---
194
+ name: handoff-target
195
+ model: gpt-4
196
+ tools:
197
+ - name: specialist-tool
198
+ execution:
199
+ type: shell
200
+ run: echo "specialist"
201
+ ---
202
+ You are the specialist for \${{ inputs.topic }}.`;
203
+ writeFileSync(join(agentsDir, 'handoff-target.md'), handoffTargetContent);
204
+
205
+ const contextAgentContent = `---
206
+ name: context-agent
207
+ model: gpt-4
208
+ ---
209
+ You are a context-aware agent.`;
210
+ writeFileSync(join(agentsDir, 'context-agent.md'), contextAgentContent);
190
211
  });
191
212
 
192
213
  afterAll(() => {
@@ -230,7 +251,7 @@ You are a test agent.`;
230
251
  };
231
252
  const context: ExpressionContext = { inputs: {}, steps: {} };
232
253
 
233
- const executeStepFn = async (s: Step) => {
254
+ const executeStepFn = async (s: any) => {
234
255
  if (s.type === 'shell') {
235
256
  return { status: 'success' as const, output: { stdout: 'tool result' } };
236
257
  }
@@ -262,7 +283,7 @@ You are a test agent.`;
262
283
  };
263
284
  const context: ExpressionContext = { inputs: {}, steps: {} };
264
285
 
265
- const executeStepFn = async (s: Step) => {
286
+ const executeStepFn = async (s: any) => {
266
287
  if (s.type === 'shell') {
267
288
  return { status: 'success' as const, output: { stdout: 'tool result' } };
268
289
  }
@@ -273,6 +294,8 @@ You are a test agent.`;
273
294
  log: mock(() => {}),
274
295
  error: mock(() => {}),
275
296
  warn: mock(() => {}),
297
+ info: mock(() => {}),
298
+ debug: mock(() => {}),
276
299
  };
277
300
 
278
301
  await executeLlmStep(
@@ -325,6 +348,63 @@ You are a test agent.`;
325
348
  expect(result.output).toEqual({ foo: 'bar' });
326
349
  });
327
350
 
351
+ it('should accept native structured output tool calls when responseSchema is provided', async () => {
352
+ const outputSchema = {
353
+ type: 'object',
354
+ properties: {
355
+ foo: { type: 'string' },
356
+ },
357
+ required: ['foo'],
358
+ };
359
+ let receivedSchema: unknown;
360
+
361
+ const chatMock = mock(async (_messages, options) => {
362
+ receivedSchema = options?.responseSchema;
363
+ return {
364
+ message: {
365
+ role: 'assistant',
366
+ content: null,
367
+ tool_calls: [
368
+ {
369
+ id: 'call-1',
370
+ type: 'function',
371
+ function: { name: 'record_output', arguments: '{"foo":"bar"}' },
372
+ },
373
+ ],
374
+ },
375
+ };
376
+ }) as unknown as LLMAdapter['chat'];
377
+ const getAdapter = createMockGetAdapter(chatMock);
378
+
379
+ const step: LlmStep = {
380
+ id: 'l1',
381
+ type: 'llm',
382
+ agent: 'test-agent',
383
+ prompt: 'give me json',
384
+ needs: [],
385
+ maxIterations: 5,
386
+ outputSchema,
387
+ };
388
+ const context: ExpressionContext = { inputs: {}, steps: {} };
389
+ const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
390
+
391
+ const result = await executeLlmStep(
392
+ step,
393
+ context,
394
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
395
+ undefined,
396
+ undefined,
397
+ undefined,
398
+ undefined,
399
+ getAdapter
400
+ );
401
+
402
+ expect(receivedSchema).toEqual(outputSchema);
403
+ expect(result.status).toBe('success');
404
+ expect(result.output).toEqual({ foo: 'bar' });
405
+ expect(executeStepFn).not.toHaveBeenCalled();
406
+ });
407
+
328
408
  it('should retry if LLM output fails schema validation', async () => {
329
409
  const step: LlmStep = {
330
410
  id: 'l1',
@@ -468,7 +548,7 @@ You are a test agent.`;
468
548
  context,
469
549
  executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
470
550
  console,
471
- mcpManager as unknown as { getClient: () => Promise<unknown> },
551
+ mcpManager as any,
472
552
  undefined,
473
553
  undefined,
474
554
  mockGetAdapter
@@ -527,7 +607,7 @@ You are a test agent.`;
527
607
  context,
528
608
  executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
529
609
  undefined,
530
- mcpManager as unknown as { getClient: () => Promise<unknown> },
610
+ mcpManager as any,
531
611
  undefined,
532
612
  undefined,
533
613
  getAdapter
@@ -570,10 +650,7 @@ You are a test agent.`;
570
650
  context,
571
651
  executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
572
652
  console,
573
- manager as unknown as {
574
- getClient: () => Promise<unknown>;
575
- getGlobalServers: () => unknown[];
576
- },
653
+ manager as any,
577
654
  undefined,
578
655
  undefined,
579
656
  getAdapter
@@ -603,7 +680,8 @@ You are a test agent.`;
603
680
  };
604
681
  const context: ExpressionContext = { inputs: {}, steps: {} };
605
682
  let toolExecuted = false;
606
- const executeStepFn = async (s: Step) => {
683
+
684
+ const executeStepFn = async (s: any) => {
607
685
  if (s.id === 'adhoc-step') {
608
686
  toolExecuted = true;
609
687
  return { status: 'success' as const, output: { stdout: 'adhoc result' } };
@@ -691,7 +769,7 @@ You are a test agent.`;
691
769
  getAdapter
692
770
  );
693
771
 
694
- expect(capturedStep?.type).toBe('engine');
772
+ expect((capturedStep as any)?.type).toBe('engine');
695
773
  expect(chatCount).toBe(2);
696
774
  });
697
775
 
@@ -728,6 +806,194 @@ You are a test agent.`;
728
806
  consoleSpy.mockRestore();
729
807
  });
730
808
 
809
+ it('should summarize messages when history is too long', async () => {
810
+ let summaryAttempted = false;
811
+ const chatMock = mock(async (messages: LLMMessage[]) => {
812
+ if (messages.find((m) => m.name === 'context_summary')) {
813
+ summaryAttempted = true;
814
+ }
815
+ return { message: { role: 'assistant', content: 'Resuming' } };
816
+ }) as unknown as LLMAdapter['chat'];
817
+
818
+ const getAdapter = (modelString: string) => {
819
+ const mockAdapter: LLMAdapter = {
820
+ chat: async (messages, options) => {
821
+ if (messages[0].role === 'system' && messages[0].content?.includes('Summarize')) {
822
+ return { message: { role: 'assistant', content: 'Summary text' } };
823
+ }
824
+ return chatMock(messages, options);
825
+ },
826
+ };
827
+ return { adapter: mockAdapter, resolvedModel: 'gpt-4' };
828
+ };
829
+
830
+ const step: LlmStep = {
831
+ id: 'l1',
832
+ type: 'llm',
833
+ agent: 'test-agent',
834
+ prompt: 'continue',
835
+ needs: [],
836
+ maxIterations: 1,
837
+ maxMessageHistory: 4, // Allow at least one non-system message before summarization
838
+ contextStrategy: 'summary',
839
+ };
840
+
841
+ const context: ExpressionContext = {
842
+ inputs: {},
843
+ steps: {
844
+ l1: {
845
+ output: {
846
+ messages: [
847
+ { role: 'user', content: 'm1' },
848
+ { role: 'assistant', content: 'm2' },
849
+ { role: 'user', content: 'm3' },
850
+ ],
851
+ },
852
+ },
853
+ },
854
+ };
855
+
856
+ const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
857
+
858
+ await executeLlmStep(
859
+ step,
860
+ context,
861
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
862
+ undefined,
863
+ undefined,
864
+ undefined,
865
+ undefined,
866
+ getAdapter
867
+ );
868
+
869
+ expect(summaryAttempted).toBe(true);
870
+ });
871
+
872
+ it('should fall back to truncation if summarization fails', async () => {
873
+ const logger: Logger = {
874
+ log: mock(() => {}),
875
+ error: mock(() => {}),
876
+ warn: mock(() => {}),
877
+ info: mock(() => {}),
878
+ debug: mock(() => {}),
879
+ };
880
+
881
+ const getAdapter = (modelString: string) => {
882
+ const mockAdapter: LLMAdapter = {
883
+ chat: async (messages) => {
884
+ if (messages[0].role === 'system' && messages[0].content?.includes('Summarize')) {
885
+ throw new Error('Summary failed');
886
+ }
887
+ return { message: { role: 'assistant', content: 'Truncated response' } };
888
+ },
889
+ };
890
+ return { adapter: mockAdapter, resolvedModel: 'gpt-4' };
891
+ };
892
+
893
+ const step: LlmStep = {
894
+ id: 'l1',
895
+ type: 'llm',
896
+ agent: 'test-agent',
897
+ prompt: 'continue',
898
+ needs: [],
899
+ maxIterations: 1,
900
+ maxMessageHistory: 4,
901
+ contextStrategy: 'summary',
902
+ };
903
+
904
+ const context: ExpressionContext = {
905
+ inputs: {},
906
+ steps: {
907
+ l1: {
908
+ output: {
909
+ messages: [
910
+ { role: 'user', content: 'm1' },
911
+ { role: 'assistant', content: 'm2' },
912
+ { role: 'user', content: 'm3' },
913
+ ],
914
+ },
915
+ },
916
+ },
917
+ };
918
+
919
+ const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
920
+
921
+ await executeLlmStep(
922
+ step,
923
+ context,
924
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
925
+ logger,
926
+ undefined,
927
+ undefined,
928
+ undefined,
929
+ getAdapter
930
+ );
931
+
932
+ expect(logger.warn).toHaveBeenCalledWith(
933
+ expect.stringContaining('Context summarization failed')
934
+ );
935
+ });
936
+
937
+ it('should extract thought blocks and emit thought events', async () => {
938
+ const logger: Logger = {
939
+ log: mock(() => {}),
940
+ error: mock(() => {}),
941
+ warn: mock(() => {}),
942
+ info: mock(() => {}),
943
+ debug: mock(() => {}),
944
+ };
945
+
946
+ const emitEvent = mock(() => {});
947
+ const eventContext = { runId: 'run-1', workflow: 'wf-1' };
948
+
949
+ const chatMock = mock(async () => {
950
+ return {
951
+ message: {
952
+ role: 'assistant',
953
+ content: '<thinking>I should do X</thinking>Final answer',
954
+ },
955
+ };
956
+ }) as unknown as LLMAdapter['chat'];
957
+
958
+ const getAdapter = () => ({
959
+ adapter: { chat: chatMock },
960
+ resolvedModel: 'gpt-4',
961
+ });
962
+
963
+ const step: LlmStep = {
964
+ id: 'l1',
965
+ type: 'llm',
966
+ agent: 'test-agent',
967
+ prompt: 'hello',
968
+ needs: [],
969
+ maxIterations: 10,
970
+ };
971
+
972
+ await executeLlmStep(
973
+ step,
974
+ { inputs: {}, steps: {} },
975
+ mock(async () => ({ status: 'success' as const, output: 'ok' })) as any,
976
+ logger,
977
+ undefined,
978
+ undefined,
979
+ undefined,
980
+ getAdapter as any,
981
+ emitEvent,
982
+ eventContext
983
+ );
984
+
985
+ expect(logger.info).toHaveBeenCalledWith(
986
+ expect.stringContaining('Thought (thinking): I should do X')
987
+ );
988
+ expect(emitEvent).toHaveBeenCalledWith(
989
+ expect.objectContaining({
990
+ type: 'llm.thought',
991
+ content: 'I should do X',
992
+ source: 'thinking',
993
+ })
994
+ );
995
+ });
996
+
731
997
  it('should not add global MCP server if already explicitly listed', async () => {
732
998
  const mockClient = createMockMcpClient();
733
999
  const manager = createMockMcpManager({
@@ -757,10 +1023,7 @@ You are a test agent.`;
757
1023
  context,
758
1024
  executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
759
1025
  console,
760
- manager as unknown as {
761
- getClient: () => Promise<unknown>;
762
- getGlobalServers: () => unknown[];
763
- },
1026
+ manager as any,
764
1027
  undefined,
765
1028
  undefined,
766
1029
  getAdapter
@@ -788,7 +1051,6 @@ You are a test agent.`;
788
1051
 
789
1052
  let capturedPrompt = '';
790
1053
  const chatMock = mock(async (messages: LLMMessage[]) => {
791
- // console.log('MESSAGES:', JSON.stringify(messages, null, 2));
792
1054
  capturedPrompt = messages.find((m) => m.role === 'user')?.content || '';
793
1055
  return { message: { role: 'assistant', content: 'Response' } };
794
1056
  }) as unknown as LLMAdapter['chat'];
@@ -810,4 +1072,257 @@ You are a test agent.`;
810
1072
  expect(capturedPrompt).toContain('"key": "value"');
811
1073
  expect(capturedPrompt).not.toContain('[object Object]');
812
1074
  });
1075
+
1076
+ it('should evaluate expressions in agent system prompts', async () => {
1077
+ const step: LlmStep = {
1078
+ id: 'l1',
1079
+ type: 'llm',
1080
+ agent: 'handoff-target',
1081
+ prompt: 'hello',
1082
+ needs: [],
1083
+ maxIterations: 3,
1084
+ };
1085
+ const context: ExpressionContext = { inputs: { topic: 'payments' }, steps: {} };
1086
+ let capturedSystem = '';
1087
+
1088
+ const chatMock = mock(async (messages: LLMMessage[]) => {
1089
+ const systemMessages = messages.filter((m) => m.role === 'system');
1090
+ capturedSystem =
1091
+ (systemMessages.find((m) => typeof m.content === 'string')?.content as string) || '';
1092
+ return { message: { role: 'assistant', content: 'ok' } };
1093
+ }) as unknown as LLMAdapter['chat'];
1094
+ const getAdapter = createMockGetAdapter(chatMock);
1095
+ const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
1096
+
1097
+ const result = await executeLlmStep(
1098
+ step,
1099
+ context,
1100
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
1101
+ undefined,
1102
+ undefined,
1103
+ undefined,
1104
+ undefined,
1105
+ getAdapter
1106
+ );
1107
+
1108
+ expect(result.status).toBe('success');
1109
+ expect(capturedSystem).toContain('payments');
1110
+ expect(capturedSystem).not.toContain('${{');
1111
+ });
1112
+
1113
+ it('should handle streaming chunks with thoughts', async () => {
1114
+ const step = {
1115
+ id: 'l-stream',
1116
+ type: 'llm' as const,
1117
+ agent: 'test-agent',
1118
+ prompt: 'stream this',
1119
+ needs: [],
1120
+ maxIterations: 1,
1121
+ };
1122
+
1123
+ // We can't easily add 'stream' to LlmStep without changing schema,
1124
+ // but we can mock the adapter to stream if onStream is provided.
1125
+
1126
+ const chatMock = mock(async (messages: LLMMessage[], options: any) => {
1127
+ if (options.onStream) {
1128
+ options.onStream('<thinking>thought</thinking>done');
1129
+ }
1130
+ return { message: { role: 'assistant', content: '<thinking>thought</thinking>done' } };
1131
+ }) as unknown as LLMAdapter['chat'];
1132
+
1133
+ const adapter = {
1134
+ chat: chatMock,
1135
+ } as any;
1136
+
1137
+ const context: ExpressionContext = { inputs: {}, steps: {} };
1138
+ spyOn(process.stdout, 'write').mockImplementation(() => true);
1139
+
1140
+ const emitThought = mock(() => {});
1141
+
1142
+ await executeLlmStep(
1143
+ step as any,
1144
+ context,
1145
+ mock(async () => ({ status: 'success' as const, output: 'ok' })) as any,
1146
+ new ConsoleLogger(),
1147
+ undefined,
1148
+ undefined,
1149
+ undefined,
1150
+ () => ({ adapter, resolvedModel: 'test-model' }),
1151
+ emitThought,
1152
+ { runId: 'test-run', workflow: 'test-wf' }
1153
+ );
1154
+
1155
+ expect(emitThought).toHaveBeenCalled();
1156
+ });
1157
+
1158
+ it('should transfer to allowed agent and swap system prompt/tools', async () => {
1159
+ let callCount = 0;
1160
+ let sawTransferTool = false;
1161
+ let sawOriginalTool = false;
1162
+ let sawTargetToolAfter = false;
1163
+ let sawOriginalToolAfter = false;
1164
+ let sawTargetPrompt = false;
1165
+
1166
+ const chatMock = mock(async (messages: LLMMessage[], options: { tools?: LLMTool[] }) => {
1167
+ callCount++;
1168
+ const toolNames = options.tools?.map((t) => t.function.name) || [];
1169
+
1170
+ if (callCount === 1) {
1171
+ sawTransferTool = toolNames.includes('transfer_to_agent');
1172
+ sawOriginalTool = toolNames.includes('test-tool');
1173
+ return {
1174
+ message: {
1175
+ role: 'assistant',
1176
+ content: null,
1177
+ tool_calls: [
1178
+ {
1179
+ id: 'call-transfer',
1180
+ type: 'function',
1181
+ function: {
1182
+ name: 'transfer_to_agent',
1183
+ arguments: '{"agent_name":"handoff-target"}',
1184
+ },
1185
+ },
1186
+ ],
1187
+ },
1188
+ };
1189
+ }
1190
+
1191
+ const systemMessages = messages.filter((m) => m.role === 'system');
1192
+ sawTargetPrompt = systemMessages.some(
1193
+ (m) => typeof m.content === 'string' && m.content.includes('specialist for billing')
1194
+ );
1195
+ sawTargetToolAfter = toolNames.includes('specialist-tool');
1196
+ sawOriginalToolAfter = toolNames.includes('test-tool');
1197
+
1198
+ return {
1199
+ message: { role: 'assistant', content: 'done' },
1200
+ };
1201
+ }) as unknown as LLMAdapter['chat'];
1202
+ const getAdapter = createMockGetAdapter(chatMock);
1203
+
1204
+ const step: LlmStep = {
1205
+ id: 'l1',
1206
+ type: 'llm',
1207
+ agent: 'test-agent',
1208
+ prompt: 'handoff',
1209
+ needs: [],
1210
+ maxIterations: 4,
1211
+ allowedHandoffs: ['handoff-target'],
1212
+ };
1213
+ const context: ExpressionContext = { inputs: { topic: 'billing' }, steps: {} };
1214
+ const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
1215
+
1216
+ const result = await executeLlmStep(
1217
+ step,
1218
+ context,
1219
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
1220
+ undefined,
1221
+ undefined,
1222
+ undefined,
1223
+ undefined,
1224
+ getAdapter
1225
+ );
1226
+
1227
+ expect(result.status).toBe('success');
1228
+ expect(sawTransferTool).toBe(true);
1229
+ expect(sawOriginalTool).toBe(true);
1230
+ expect(sawTargetToolAfter).toBe(true);
1231
+ expect(sawOriginalToolAfter).toBe(false);
1232
+ expect(sawTargetPrompt).toBe(true);
1233
+ });
1234
+
1235
+ it('should apply context updates from tool output', async () => {
1236
+ const step: LlmStep = {
1237
+ id: 'l1',
1238
+ type: 'llm',
1239
+ agent: 'context-agent',
1240
+ prompt: 'update context',
1241
+ needs: [],
1242
+ maxIterations: 4,
1243
+ tools: [
1244
+ {
1245
+ name: 'update-context',
1246
+ execution: {
1247
+ id: 'update-step',
1248
+ type: 'shell',
1249
+ run: 'echo update',
1250
+ },
1251
+ },
1252
+ {
1253
+ name: 'read-context',
1254
+ execution: {
1255
+ id: 'read-step',
1256
+ type: 'shell',
1257
+ run: 'echo read',
1258
+ },
1259
+ },
1260
+ ],
1261
+ };
1262
+ const context: ExpressionContext = { inputs: {}, steps: {} };
1263
+ let sawEnvUpdate = false;
1264
+ let sawMemoryUpdate = false;
1265
+
1266
+ const executeStepFn = async (_step: any, toolContext: ExpressionContext) => {
1267
+ if (_step.id === 'update-step') {
1268
+ return {
1269
+ status: 'success' as const,
1270
+ output: {
1271
+ __keystone_context: {
1272
+ env: { USER_ID: '123' },
1273
+ memory: { user: 'Ada' },
1274
+ },
1275
+ ok: true,
1276
+ },
1277
+ };
1278
+ }
1279
+ if (_step.id === 'read-step') {
1280
+ sawEnvUpdate = toolContext.env?.USER_ID === '123';
1281
+ sawMemoryUpdate = toolContext.memory?.user === 'Ada';
1282
+ return { status: 'success' as const, output: { seen: true } };
1283
+ }
1284
+ return { status: 'success' as const, output: 'ok' };
1285
+ };
1286
+
1287
+ let callCount = 0;
1288
+ const chatMock = mock(async () => {
1289
+ callCount++;
1290
+ if (callCount === 1) {
1291
+ return {
1292
+ message: {
1293
+ role: 'assistant',
1294
+ content: null,
1295
+ tool_calls: [
1296
+ {
1297
+ id: 'call-update',
1298
+ type: 'function',
1299
+ function: { name: 'update-context', arguments: '{}' },
1300
+ },
1301
+ {
1302
+ id: 'call-read',
1303
+ type: 'function',
1304
+ function: { name: 'read-context', arguments: '{}' },
1305
+ },
1306
+ ],
1307
+ },
1308
+ };
1309
+ }
1310
+ return { message: { role: 'assistant', content: 'done' } };
1311
+ }) as unknown as LLMAdapter['chat'];
1312
+ const getAdapter = createMockGetAdapter(chatMock);
1313
+
1314
+ await executeLlmStep(
1315
+ step,
1316
+ context,
1317
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
1318
+ undefined,
1319
+ undefined,
1320
+ undefined,
1321
+ undefined,
1322
+ getAdapter
1323
+ );
1324
+
1325
+ expect(sawEnvUpdate).toBe(true);
1326
+ expect(sawMemoryUpdate).toBe(true);
1327
+ });
813
1328
  });
@@ -17,8 +17,7 @@ describe('MCPClient Audit Fixes', () => {
17
17
  }),
18
18
  kill: () => {},
19
19
  on: () => {},
20
- // biome-ignore lint/suspicious/noExplicitAny: Mocking complex object
21
- } as any);
20
+ } as unknown as child_process.ChildProcess);
22
21
  });
23
22
 
24
23
  afterEach(() => {