keystone-cli 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +288 -24
- package/package.json +8 -4
- package/src/cli.ts +538 -419
- package/src/commands/doc.ts +31 -0
- package/src/commands/event.ts +29 -0
- package/src/commands/graph.ts +37 -0
- package/src/commands/index.ts +14 -0
- package/src/commands/init.ts +185 -0
- package/src/commands/run.ts +124 -0
- package/src/commands/schema.ts +40 -0
- package/src/commands/utils.ts +78 -0
- package/src/commands/validate.ts +111 -0
- package/src/db/memory-db.ts +50 -2
- package/src/db/workflow-db.test.ts +314 -0
- package/src/db/workflow-db.ts +810 -210
- package/src/expression/evaluator-audit.test.ts +4 -2
- package/src/expression/evaluator.test.ts +14 -1
- package/src/expression/evaluator.ts +166 -19
- package/src/parser/config-schema.ts +18 -0
- package/src/parser/schema.ts +153 -22
- package/src/parser/test-schema.ts +6 -6
- package/src/parser/workflow-parser.test.ts +24 -0
- package/src/parser/workflow-parser.ts +65 -3
- package/src/runner/auto-heal.test.ts +5 -6
- package/src/runner/blueprint-executor.test.ts +2 -2
- package/src/runner/debug-repl.test.ts +5 -8
- package/src/runner/debug-repl.ts +59 -16
- package/src/runner/durable-timers.test.ts +11 -2
- package/src/runner/engine-executor.test.ts +1 -1
- package/src/runner/events.ts +57 -0
- package/src/runner/executors/artifact-executor.ts +166 -0
- package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
- package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
- package/src/runner/executors/file-executor.test.ts +48 -0
- package/src/runner/executors/file-executor.ts +324 -0
- package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
- package/src/runner/executors/human-executor.ts +144 -0
- package/src/runner/executors/join-executor.ts +75 -0
- package/src/runner/executors/llm-executor.ts +1266 -0
- package/src/runner/executors/memory-executor.ts +71 -0
- package/src/runner/executors/plan-executor.ts +104 -0
- package/src/runner/executors/request-executor.ts +265 -0
- package/src/runner/executors/script-executor.ts +43 -0
- package/src/runner/executors/shell-executor.ts +403 -0
- package/src/runner/executors/subworkflow-executor.ts +114 -0
- package/src/runner/executors/types.ts +69 -0
- package/src/runner/executors/wait-executor.ts +59 -0
- package/src/runner/join-scheduling.test.ts +197 -0
- package/src/runner/llm-adapter-runtime.test.ts +209 -0
- package/src/runner/llm-adapter.test.ts +419 -24
- package/src/runner/llm-adapter.ts +414 -17
- package/src/runner/llm-clarification.test.ts +2 -1
- package/src/runner/llm-executor.test.ts +532 -17
- package/src/runner/mcp-client-audit.test.ts +1 -2
- package/src/runner/mcp-client.ts +136 -46
- package/src/runner/mcp-manager.test.ts +4 -0
- package/src/runner/mcp-server.test.ts +58 -0
- package/src/runner/mcp-server.ts +26 -0
- package/src/runner/memoization.test.ts +190 -0
- package/src/runner/optimization-runner.ts +4 -9
- package/src/runner/quality-gate.test.ts +69 -0
- package/src/runner/reflexion.test.ts +6 -17
- package/src/runner/resource-pool.ts +102 -14
- package/src/runner/services/context-builder.ts +144 -0
- package/src/runner/services/secret-manager.ts +105 -0
- package/src/runner/services/workflow-validator.ts +131 -0
- package/src/runner/shell-executor.test.ts +28 -4
- package/src/runner/standard-tools-ast.test.ts +196 -0
- package/src/runner/standard-tools-execution.test.ts +27 -0
- package/src/runner/standard-tools-integration.test.ts +6 -10
- package/src/runner/standard-tools.ts +339 -102
- package/src/runner/step-executor.test.ts +216 -4
- package/src/runner/step-executor.ts +69 -941
- package/src/runner/stream-utils.ts +7 -3
- package/src/runner/test-harness.ts +20 -1
- package/src/runner/timeout.test.ts +10 -0
- package/src/runner/timeout.ts +11 -2
- package/src/runner/tool-integration.test.ts +1 -1
- package/src/runner/wait-step.test.ts +102 -0
- package/src/runner/workflow-runner.test.ts +208 -15
- package/src/runner/workflow-runner.ts +890 -818
- package/src/runner/workflow-scheduler.ts +75 -0
- package/src/runner/workflow-state.ts +269 -0
- package/src/runner/workflow-subflows.test.ts +13 -12
- package/src/scripts/generate-schemas.ts +16 -0
- package/src/templates/agents/explore.md +1 -0
- package/src/templates/agents/general.md +1 -0
- package/src/templates/agents/handoff-router.md +14 -0
- package/src/templates/agents/handoff-specialist.md +15 -0
- package/src/templates/agents/keystone-architect.md +13 -44
- package/src/templates/agents/my-agent.md +1 -0
- package/src/templates/agents/software-engineer.md +1 -0
- package/src/templates/agents/summarizer.md +1 -0
- package/src/templates/agents/test-agent.md +1 -0
- package/src/templates/agents/tester.md +1 -0
- package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
- package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +2 -1
- package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
- package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
- package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
- package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
- package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
- package/src/templates/control-flow/idempotency-example.yaml +30 -0
- package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
- package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
- package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
- package/src/templates/features/artifact-example.yaml +39 -0
- package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
- package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
- package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
- package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
- package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
- package/src/templates/features/script-example.yaml +27 -0
- package/src/templates/patterns/agent-handoff.yaml +53 -0
- package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
- package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
- package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +1 -0
- package/src/templates/{composition-parent.yaml → patterns/composition-parent.yaml} +1 -0
- package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
- package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
- package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
- package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
- package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
- package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
- package/src/templates/scaffolding/review-loop.yaml +97 -0
- package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
- package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
- package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
- package/src/templates/testing/invalid.yaml +6 -0
- package/src/ui/dashboard.tsx +191 -33
- package/src/utils/auth-manager.test.ts +337 -0
- package/src/utils/auth-manager.ts +157 -61
- package/src/utils/blueprint-utils.ts +4 -6
- package/src/utils/config-loader.test.ts +2 -0
- package/src/utils/config-loader.ts +12 -3
- package/src/utils/constants.ts +76 -0
- package/src/utils/container.ts +63 -0
- package/src/utils/context-injector.test.ts +200 -0
- package/src/utils/context-injector.ts +244 -0
- package/src/utils/doc-generator.ts +85 -0
- package/src/utils/env-filter.ts +45 -0
- package/src/utils/json-parser.test.ts +12 -0
- package/src/utils/json-parser.ts +30 -5
- package/src/utils/logger.ts +12 -1
- package/src/utils/mermaid.ts +4 -0
- package/src/utils/paths.ts +52 -1
- package/src/utils/process-sandbox-worker.test.ts +46 -0
- package/src/utils/process-sandbox.ts +227 -14
- package/src/utils/redactor.test.ts +11 -6
- package/src/utils/redactor.ts +25 -9
- package/src/utils/sandbox.ts +3 -0
- package/src/utils/workflow-registry.test.ts +2 -2
- package/src/runner/llm-executor.ts +0 -638
- package/src/runner/shell-executor.ts +0 -366
- package/src/templates/invalid.yaml +0 -5
|
@@ -5,12 +5,14 @@ import { mkdirSync, writeFileSync } from 'node:fs';
|
|
|
5
5
|
import { join } from 'node:path';
|
|
6
6
|
import { Readable, Writable } from 'node:stream';
|
|
7
7
|
import type { ExpressionContext } from '../expression/evaluator';
|
|
8
|
+
import { ExpressionEvaluator } from '../expression/evaluator';
|
|
9
|
+
import { parseAgent } from '../parser/agent-parser';
|
|
8
10
|
import type { LlmStep, Step } from '../parser/schema';
|
|
11
|
+
import { ConsoleLogger, type Logger } from '../utils/logger';
|
|
12
|
+
import { executeLlmStep } from './executors/llm-executor.ts';
|
|
9
13
|
import type { LLMAdapter, LLMMessage, LLMResponse, LLMTool } from './llm-adapter';
|
|
10
|
-
import { executeLlmStep } from './llm-executor';
|
|
11
14
|
import type { MCPServerConfig } from './mcp-manager';
|
|
12
15
|
import type { StepResult } from './step-executor';
|
|
13
|
-
import type { Logger } from './workflow-runner';
|
|
14
16
|
|
|
15
17
|
// Mock adapters
|
|
16
18
|
// Instead of mutating prototypes (which causes cross-test contamination),
|
|
@@ -187,6 +189,25 @@ tools:
|
|
|
187
189
|
---
|
|
188
190
|
You are a test agent.`;
|
|
189
191
|
writeFileSync(join(agentsDir, 'test-agent.md'), agentContent);
|
|
192
|
+
|
|
193
|
+
const handoffTargetContent = `---
|
|
194
|
+
name: handoff-target
|
|
195
|
+
model: gpt-4
|
|
196
|
+
tools:
|
|
197
|
+
- name: specialist-tool
|
|
198
|
+
execution:
|
|
199
|
+
type: shell
|
|
200
|
+
run: echo "specialist"
|
|
201
|
+
---
|
|
202
|
+
You are the specialist for \${{ inputs.topic }}.`;
|
|
203
|
+
writeFileSync(join(agentsDir, 'handoff-target.md'), handoffTargetContent);
|
|
204
|
+
|
|
205
|
+
const contextAgentContent = `---
|
|
206
|
+
name: context-agent
|
|
207
|
+
model: gpt-4
|
|
208
|
+
---
|
|
209
|
+
You are a context-aware agent.`;
|
|
210
|
+
writeFileSync(join(agentsDir, 'context-agent.md'), contextAgentContent);
|
|
190
211
|
});
|
|
191
212
|
|
|
192
213
|
afterAll(() => {
|
|
@@ -230,7 +251,7 @@ You are a test agent.`;
|
|
|
230
251
|
};
|
|
231
252
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
232
253
|
|
|
233
|
-
const executeStepFn = async (s:
|
|
254
|
+
const executeStepFn = async (s: any) => {
|
|
234
255
|
if (s.type === 'shell') {
|
|
235
256
|
return { status: 'success' as const, output: { stdout: 'tool result' } };
|
|
236
257
|
}
|
|
@@ -262,7 +283,7 @@ You are a test agent.`;
|
|
|
262
283
|
};
|
|
263
284
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
264
285
|
|
|
265
|
-
const executeStepFn = async (s:
|
|
286
|
+
const executeStepFn = async (s: any) => {
|
|
266
287
|
if (s.type === 'shell') {
|
|
267
288
|
return { status: 'success' as const, output: { stdout: 'tool result' } };
|
|
268
289
|
}
|
|
@@ -273,6 +294,8 @@ You are a test agent.`;
|
|
|
273
294
|
log: mock(() => {}),
|
|
274
295
|
error: mock(() => {}),
|
|
275
296
|
warn: mock(() => {}),
|
|
297
|
+
info: mock(() => {}),
|
|
298
|
+
debug: mock(() => {}),
|
|
276
299
|
};
|
|
277
300
|
|
|
278
301
|
await executeLlmStep(
|
|
@@ -325,6 +348,63 @@ You are a test agent.`;
|
|
|
325
348
|
expect(result.output).toEqual({ foo: 'bar' });
|
|
326
349
|
});
|
|
327
350
|
|
|
351
|
+
it('should accept native structured output tool calls when responseSchema is provided', async () => {
|
|
352
|
+
const outputSchema = {
|
|
353
|
+
type: 'object',
|
|
354
|
+
properties: {
|
|
355
|
+
foo: { type: 'string' },
|
|
356
|
+
},
|
|
357
|
+
required: ['foo'],
|
|
358
|
+
};
|
|
359
|
+
let receivedSchema: unknown;
|
|
360
|
+
|
|
361
|
+
const chatMock = mock(async (_messages, options) => {
|
|
362
|
+
receivedSchema = options?.responseSchema;
|
|
363
|
+
return {
|
|
364
|
+
message: {
|
|
365
|
+
role: 'assistant',
|
|
366
|
+
content: null,
|
|
367
|
+
tool_calls: [
|
|
368
|
+
{
|
|
369
|
+
id: 'call-1',
|
|
370
|
+
type: 'function',
|
|
371
|
+
function: { name: 'record_output', arguments: '{"foo":"bar"}' },
|
|
372
|
+
},
|
|
373
|
+
],
|
|
374
|
+
},
|
|
375
|
+
};
|
|
376
|
+
}) as unknown as LLMAdapter['chat'];
|
|
377
|
+
const getAdapter = createMockGetAdapter(chatMock);
|
|
378
|
+
|
|
379
|
+
const step: LlmStep = {
|
|
380
|
+
id: 'l1',
|
|
381
|
+
type: 'llm',
|
|
382
|
+
agent: 'test-agent',
|
|
383
|
+
prompt: 'give me json',
|
|
384
|
+
needs: [],
|
|
385
|
+
maxIterations: 5,
|
|
386
|
+
outputSchema,
|
|
387
|
+
};
|
|
388
|
+
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
389
|
+
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
390
|
+
|
|
391
|
+
const result = await executeLlmStep(
|
|
392
|
+
step,
|
|
393
|
+
context,
|
|
394
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
395
|
+
undefined,
|
|
396
|
+
undefined,
|
|
397
|
+
undefined,
|
|
398
|
+
undefined,
|
|
399
|
+
getAdapter
|
|
400
|
+
);
|
|
401
|
+
|
|
402
|
+
expect(receivedSchema).toEqual(outputSchema);
|
|
403
|
+
expect(result.status).toBe('success');
|
|
404
|
+
expect(result.output).toEqual({ foo: 'bar' });
|
|
405
|
+
expect(executeStepFn).not.toHaveBeenCalled();
|
|
406
|
+
});
|
|
407
|
+
|
|
328
408
|
it('should retry if LLM output fails schema validation', async () => {
|
|
329
409
|
const step: LlmStep = {
|
|
330
410
|
id: 'l1',
|
|
@@ -468,7 +548,7 @@ You are a test agent.`;
|
|
|
468
548
|
context,
|
|
469
549
|
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
470
550
|
console,
|
|
471
|
-
mcpManager as
|
|
551
|
+
mcpManager as any,
|
|
472
552
|
undefined,
|
|
473
553
|
undefined,
|
|
474
554
|
mockGetAdapter
|
|
@@ -527,7 +607,7 @@ You are a test agent.`;
|
|
|
527
607
|
context,
|
|
528
608
|
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
529
609
|
undefined,
|
|
530
|
-
mcpManager as
|
|
610
|
+
mcpManager as any,
|
|
531
611
|
undefined,
|
|
532
612
|
undefined,
|
|
533
613
|
getAdapter
|
|
@@ -570,10 +650,7 @@ You are a test agent.`;
|
|
|
570
650
|
context,
|
|
571
651
|
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
572
652
|
console,
|
|
573
|
-
manager as
|
|
574
|
-
getClient: () => Promise<unknown>;
|
|
575
|
-
getGlobalServers: () => unknown[];
|
|
576
|
-
},
|
|
653
|
+
manager as any,
|
|
577
654
|
undefined,
|
|
578
655
|
undefined,
|
|
579
656
|
getAdapter
|
|
@@ -603,7 +680,8 @@ You are a test agent.`;
|
|
|
603
680
|
};
|
|
604
681
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
605
682
|
let toolExecuted = false;
|
|
606
|
-
|
|
683
|
+
|
|
684
|
+
const executeStepFn = async (s: any) => {
|
|
607
685
|
if (s.id === 'adhoc-step') {
|
|
608
686
|
toolExecuted = true;
|
|
609
687
|
return { status: 'success' as const, output: { stdout: 'adhoc result' } };
|
|
@@ -691,7 +769,7 @@ You are a test agent.`;
|
|
|
691
769
|
getAdapter
|
|
692
770
|
);
|
|
693
771
|
|
|
694
|
-
expect(capturedStep?.type).toBe('engine');
|
|
772
|
+
expect((capturedStep as any)?.type).toBe('engine');
|
|
695
773
|
expect(chatCount).toBe(2);
|
|
696
774
|
});
|
|
697
775
|
|
|
@@ -728,6 +806,194 @@ You are a test agent.`;
|
|
|
728
806
|
consoleSpy.mockRestore();
|
|
729
807
|
});
|
|
730
808
|
|
|
809
|
+
it('should summarize messages when history is too long', async () => {
|
|
810
|
+
let summaryAttempted = false;
|
|
811
|
+
const chatMock = mock(async (messages: LLMMessage[]) => {
|
|
812
|
+
if (messages.find((m) => m.name === 'context_summary')) {
|
|
813
|
+
summaryAttempted = true;
|
|
814
|
+
}
|
|
815
|
+
return { message: { role: 'assistant', content: 'Resuming' } };
|
|
816
|
+
}) as unknown as LLMAdapter['chat'];
|
|
817
|
+
|
|
818
|
+
const getAdapter = (modelString: string) => {
|
|
819
|
+
const mockAdapter: LLMAdapter = {
|
|
820
|
+
chat: async (messages, options) => {
|
|
821
|
+
if (messages[0].role === 'system' && messages[0].content?.includes('Summarize')) {
|
|
822
|
+
return { message: { role: 'assistant', content: 'Summary text' } };
|
|
823
|
+
}
|
|
824
|
+
return chatMock(messages, options);
|
|
825
|
+
},
|
|
826
|
+
};
|
|
827
|
+
return { adapter: mockAdapter, resolvedModel: 'gpt-4' };
|
|
828
|
+
};
|
|
829
|
+
|
|
830
|
+
const step: LlmStep = {
|
|
831
|
+
id: 'l1',
|
|
832
|
+
type: 'llm',
|
|
833
|
+
agent: 'test-agent',
|
|
834
|
+
prompt: 'continue',
|
|
835
|
+
needs: [],
|
|
836
|
+
maxIterations: 1,
|
|
837
|
+
maxMessageHistory: 4, // Allow at least one non-system message before summarization
|
|
838
|
+
contextStrategy: 'summary',
|
|
839
|
+
};
|
|
840
|
+
|
|
841
|
+
const context: ExpressionContext = {
|
|
842
|
+
inputs: {},
|
|
843
|
+
steps: {
|
|
844
|
+
l1: {
|
|
845
|
+
output: {
|
|
846
|
+
messages: [
|
|
847
|
+
{ role: 'user', content: 'm1' },
|
|
848
|
+
{ role: 'assistant', content: 'm2' },
|
|
849
|
+
{ role: 'user', content: 'm3' },
|
|
850
|
+
],
|
|
851
|
+
},
|
|
852
|
+
},
|
|
853
|
+
},
|
|
854
|
+
};
|
|
855
|
+
|
|
856
|
+
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
857
|
+
|
|
858
|
+
await executeLlmStep(
|
|
859
|
+
step,
|
|
860
|
+
context,
|
|
861
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
862
|
+
undefined,
|
|
863
|
+
undefined,
|
|
864
|
+
undefined,
|
|
865
|
+
undefined,
|
|
866
|
+
getAdapter
|
|
867
|
+
);
|
|
868
|
+
|
|
869
|
+
expect(summaryAttempted).toBe(true);
|
|
870
|
+
});
|
|
871
|
+
|
|
872
|
+
it('should fall back to truncation if summarization fails', async () => {
|
|
873
|
+
const logger: Logger = {
|
|
874
|
+
log: mock(() => {}),
|
|
875
|
+
error: mock(() => {}),
|
|
876
|
+
warn: mock(() => {}),
|
|
877
|
+
info: mock(() => {}),
|
|
878
|
+
debug: mock(() => {}),
|
|
879
|
+
};
|
|
880
|
+
|
|
881
|
+
const getAdapter = (modelString: string) => {
|
|
882
|
+
const mockAdapter: LLMAdapter = {
|
|
883
|
+
chat: async (messages) => {
|
|
884
|
+
if (messages[0].role === 'system' && messages[0].content?.includes('Summarize')) {
|
|
885
|
+
throw new Error('Summary failed');
|
|
886
|
+
}
|
|
887
|
+
return { message: { role: 'assistant', content: 'Truncated response' } };
|
|
888
|
+
},
|
|
889
|
+
};
|
|
890
|
+
return { adapter: mockAdapter, resolvedModel: 'gpt-4' };
|
|
891
|
+
};
|
|
892
|
+
|
|
893
|
+
const step: LlmStep = {
|
|
894
|
+
id: 'l1',
|
|
895
|
+
type: 'llm',
|
|
896
|
+
agent: 'test-agent',
|
|
897
|
+
prompt: 'continue',
|
|
898
|
+
needs: [],
|
|
899
|
+
maxIterations: 1,
|
|
900
|
+
maxMessageHistory: 4,
|
|
901
|
+
contextStrategy: 'summary',
|
|
902
|
+
};
|
|
903
|
+
|
|
904
|
+
const context: ExpressionContext = {
|
|
905
|
+
inputs: {},
|
|
906
|
+
steps: {
|
|
907
|
+
l1: {
|
|
908
|
+
output: {
|
|
909
|
+
messages: [
|
|
910
|
+
{ role: 'user', content: 'm1' },
|
|
911
|
+
{ role: 'assistant', content: 'm2' },
|
|
912
|
+
{ role: 'user', content: 'm3' },
|
|
913
|
+
],
|
|
914
|
+
},
|
|
915
|
+
},
|
|
916
|
+
},
|
|
917
|
+
};
|
|
918
|
+
|
|
919
|
+
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
920
|
+
|
|
921
|
+
await executeLlmStep(
|
|
922
|
+
step,
|
|
923
|
+
context,
|
|
924
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
925
|
+
logger,
|
|
926
|
+
undefined,
|
|
927
|
+
undefined,
|
|
928
|
+
undefined,
|
|
929
|
+
getAdapter
|
|
930
|
+
);
|
|
931
|
+
|
|
932
|
+
expect(logger.warn).toHaveBeenCalledWith(
|
|
933
|
+
expect.stringContaining('Context summarization failed')
|
|
934
|
+
);
|
|
935
|
+
});
|
|
936
|
+
|
|
937
|
+
it('should extract thought blocks and emit thought events', async () => {
|
|
938
|
+
const logger: Logger = {
|
|
939
|
+
log: mock(() => {}),
|
|
940
|
+
error: mock(() => {}),
|
|
941
|
+
warn: mock(() => {}),
|
|
942
|
+
info: mock(() => {}),
|
|
943
|
+
debug: mock(() => {}),
|
|
944
|
+
};
|
|
945
|
+
|
|
946
|
+
const emitEvent = mock(() => {});
|
|
947
|
+
const eventContext = { runId: 'run-1', workflow: 'wf-1' };
|
|
948
|
+
|
|
949
|
+
const chatMock = mock(async () => {
|
|
950
|
+
return {
|
|
951
|
+
message: {
|
|
952
|
+
role: 'assistant',
|
|
953
|
+
content: '<thinking>I should do X</thinking>Final answer',
|
|
954
|
+
},
|
|
955
|
+
};
|
|
956
|
+
}) as unknown as LLMAdapter['chat'];
|
|
957
|
+
|
|
958
|
+
const getAdapter = () => ({
|
|
959
|
+
adapter: { chat: chatMock },
|
|
960
|
+
resolvedModel: 'gpt-4',
|
|
961
|
+
});
|
|
962
|
+
|
|
963
|
+
const step: LlmStep = {
|
|
964
|
+
id: 'l1',
|
|
965
|
+
type: 'llm',
|
|
966
|
+
agent: 'test-agent',
|
|
967
|
+
prompt: 'hello',
|
|
968
|
+
needs: [],
|
|
969
|
+
maxIterations: 10,
|
|
970
|
+
};
|
|
971
|
+
|
|
972
|
+
await executeLlmStep(
|
|
973
|
+
step,
|
|
974
|
+
{ inputs: {}, steps: {} },
|
|
975
|
+
mock(async () => ({ status: 'success' as const, output: 'ok' })) as any,
|
|
976
|
+
logger,
|
|
977
|
+
undefined,
|
|
978
|
+
undefined,
|
|
979
|
+
undefined,
|
|
980
|
+
getAdapter as any,
|
|
981
|
+
emitEvent,
|
|
982
|
+
eventContext
|
|
983
|
+
);
|
|
984
|
+
|
|
985
|
+
expect(logger.info).toHaveBeenCalledWith(
|
|
986
|
+
expect.stringContaining('Thought (thinking): I should do X')
|
|
987
|
+
);
|
|
988
|
+
expect(emitEvent).toHaveBeenCalledWith(
|
|
989
|
+
expect.objectContaining({
|
|
990
|
+
type: 'llm.thought',
|
|
991
|
+
content: 'I should do X',
|
|
992
|
+
source: 'thinking',
|
|
993
|
+
})
|
|
994
|
+
);
|
|
995
|
+
});
|
|
996
|
+
|
|
731
997
|
it('should not add global MCP server if already explicitly listed', async () => {
|
|
732
998
|
const mockClient = createMockMcpClient();
|
|
733
999
|
const manager = createMockMcpManager({
|
|
@@ -757,10 +1023,7 @@ You are a test agent.`;
|
|
|
757
1023
|
context,
|
|
758
1024
|
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
759
1025
|
console,
|
|
760
|
-
manager as
|
|
761
|
-
getClient: () => Promise<unknown>;
|
|
762
|
-
getGlobalServers: () => unknown[];
|
|
763
|
-
},
|
|
1026
|
+
manager as any,
|
|
764
1027
|
undefined,
|
|
765
1028
|
undefined,
|
|
766
1029
|
getAdapter
|
|
@@ -788,7 +1051,6 @@ You are a test agent.`;
|
|
|
788
1051
|
|
|
789
1052
|
let capturedPrompt = '';
|
|
790
1053
|
const chatMock = mock(async (messages: LLMMessage[]) => {
|
|
791
|
-
// console.log('MESSAGES:', JSON.stringify(messages, null, 2));
|
|
792
1054
|
capturedPrompt = messages.find((m) => m.role === 'user')?.content || '';
|
|
793
1055
|
return { message: { role: 'assistant', content: 'Response' } };
|
|
794
1056
|
}) as unknown as LLMAdapter['chat'];
|
|
@@ -810,4 +1072,257 @@ You are a test agent.`;
|
|
|
810
1072
|
expect(capturedPrompt).toContain('"key": "value"');
|
|
811
1073
|
expect(capturedPrompt).not.toContain('[object Object]');
|
|
812
1074
|
});
|
|
1075
|
+
|
|
1076
|
+
it('should evaluate expressions in agent system prompts', async () => {
|
|
1077
|
+
const step: LlmStep = {
|
|
1078
|
+
id: 'l1',
|
|
1079
|
+
type: 'llm',
|
|
1080
|
+
agent: 'handoff-target',
|
|
1081
|
+
prompt: 'hello',
|
|
1082
|
+
needs: [],
|
|
1083
|
+
maxIterations: 3,
|
|
1084
|
+
};
|
|
1085
|
+
const context: ExpressionContext = { inputs: { topic: 'payments' }, steps: {} };
|
|
1086
|
+
let capturedSystem = '';
|
|
1087
|
+
|
|
1088
|
+
const chatMock = mock(async (messages: LLMMessage[]) => {
|
|
1089
|
+
const systemMessages = messages.filter((m) => m.role === 'system');
|
|
1090
|
+
capturedSystem =
|
|
1091
|
+
(systemMessages.find((m) => typeof m.content === 'string')?.content as string) || '';
|
|
1092
|
+
return { message: { role: 'assistant', content: 'ok' } };
|
|
1093
|
+
}) as unknown as LLMAdapter['chat'];
|
|
1094
|
+
const getAdapter = createMockGetAdapter(chatMock);
|
|
1095
|
+
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
1096
|
+
|
|
1097
|
+
const result = await executeLlmStep(
|
|
1098
|
+
step,
|
|
1099
|
+
context,
|
|
1100
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
1101
|
+
undefined,
|
|
1102
|
+
undefined,
|
|
1103
|
+
undefined,
|
|
1104
|
+
undefined,
|
|
1105
|
+
getAdapter
|
|
1106
|
+
);
|
|
1107
|
+
|
|
1108
|
+
expect(result.status).toBe('success');
|
|
1109
|
+
expect(capturedSystem).toContain('payments');
|
|
1110
|
+
expect(capturedSystem).not.toContain('${{');
|
|
1111
|
+
});
|
|
1112
|
+
|
|
1113
|
+
it('should handle streaming chunks with thoughts', async () => {
|
|
1114
|
+
const step = {
|
|
1115
|
+
id: 'l-stream',
|
|
1116
|
+
type: 'llm' as const,
|
|
1117
|
+
agent: 'test-agent',
|
|
1118
|
+
prompt: 'stream this',
|
|
1119
|
+
needs: [],
|
|
1120
|
+
maxIterations: 1,
|
|
1121
|
+
};
|
|
1122
|
+
|
|
1123
|
+
// We can't easily add 'stream' to LlmStep without changing schema,
|
|
1124
|
+
// but we can mock the adapter to stream if onStream is provided.
|
|
1125
|
+
|
|
1126
|
+
const chatMock = mock(async (messages: LLMMessage[], options: any) => {
|
|
1127
|
+
if (options.onStream) {
|
|
1128
|
+
options.onStream('<thinking>thought</thinking>done');
|
|
1129
|
+
}
|
|
1130
|
+
return { message: { role: 'assistant', content: '<thinking>thought</thinking>done' } };
|
|
1131
|
+
}) as unknown as LLMAdapter['chat'];
|
|
1132
|
+
|
|
1133
|
+
const adapter = {
|
|
1134
|
+
chat: chatMock,
|
|
1135
|
+
} as any;
|
|
1136
|
+
|
|
1137
|
+
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
1138
|
+
spyOn(process.stdout, 'write').mockImplementation(() => true);
|
|
1139
|
+
|
|
1140
|
+
const emitThought = mock(() => {});
|
|
1141
|
+
|
|
1142
|
+
await executeLlmStep(
|
|
1143
|
+
step as any,
|
|
1144
|
+
context,
|
|
1145
|
+
mock(async () => ({ status: 'success' as const, output: 'ok' })) as any,
|
|
1146
|
+
new ConsoleLogger(),
|
|
1147
|
+
undefined,
|
|
1148
|
+
undefined,
|
|
1149
|
+
undefined,
|
|
1150
|
+
() => ({ adapter, resolvedModel: 'test-model' }),
|
|
1151
|
+
emitThought,
|
|
1152
|
+
{ runId: 'test-run', workflow: 'test-wf' }
|
|
1153
|
+
);
|
|
1154
|
+
|
|
1155
|
+
expect(emitThought).toHaveBeenCalled();
|
|
1156
|
+
});
|
|
1157
|
+
|
|
1158
|
+
it('should transfer to allowed agent and swap system prompt/tools', async () => {
|
|
1159
|
+
let callCount = 0;
|
|
1160
|
+
let sawTransferTool = false;
|
|
1161
|
+
let sawOriginalTool = false;
|
|
1162
|
+
let sawTargetToolAfter = false;
|
|
1163
|
+
let sawOriginalToolAfter = false;
|
|
1164
|
+
let sawTargetPrompt = false;
|
|
1165
|
+
|
|
1166
|
+
const chatMock = mock(async (messages: LLMMessage[], options: { tools?: LLMTool[] }) => {
|
|
1167
|
+
callCount++;
|
|
1168
|
+
const toolNames = options.tools?.map((t) => t.function.name) || [];
|
|
1169
|
+
|
|
1170
|
+
if (callCount === 1) {
|
|
1171
|
+
sawTransferTool = toolNames.includes('transfer_to_agent');
|
|
1172
|
+
sawOriginalTool = toolNames.includes('test-tool');
|
|
1173
|
+
return {
|
|
1174
|
+
message: {
|
|
1175
|
+
role: 'assistant',
|
|
1176
|
+
content: null,
|
|
1177
|
+
tool_calls: [
|
|
1178
|
+
{
|
|
1179
|
+
id: 'call-transfer',
|
|
1180
|
+
type: 'function',
|
|
1181
|
+
function: {
|
|
1182
|
+
name: 'transfer_to_agent',
|
|
1183
|
+
arguments: '{"agent_name":"handoff-target"}',
|
|
1184
|
+
},
|
|
1185
|
+
},
|
|
1186
|
+
],
|
|
1187
|
+
},
|
|
1188
|
+
};
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
const systemMessages = messages.filter((m) => m.role === 'system');
|
|
1192
|
+
sawTargetPrompt = systemMessages.some(
|
|
1193
|
+
(m) => typeof m.content === 'string' && m.content.includes('specialist for billing')
|
|
1194
|
+
);
|
|
1195
|
+
sawTargetToolAfter = toolNames.includes('specialist-tool');
|
|
1196
|
+
sawOriginalToolAfter = toolNames.includes('test-tool');
|
|
1197
|
+
|
|
1198
|
+
return {
|
|
1199
|
+
message: { role: 'assistant', content: 'done' },
|
|
1200
|
+
};
|
|
1201
|
+
}) as unknown as LLMAdapter['chat'];
|
|
1202
|
+
const getAdapter = createMockGetAdapter(chatMock);
|
|
1203
|
+
|
|
1204
|
+
const step: LlmStep = {
|
|
1205
|
+
id: 'l1',
|
|
1206
|
+
type: 'llm',
|
|
1207
|
+
agent: 'test-agent',
|
|
1208
|
+
prompt: 'handoff',
|
|
1209
|
+
needs: [],
|
|
1210
|
+
maxIterations: 4,
|
|
1211
|
+
allowedHandoffs: ['handoff-target'],
|
|
1212
|
+
};
|
|
1213
|
+
const context: ExpressionContext = { inputs: { topic: 'billing' }, steps: {} };
|
|
1214
|
+
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
1215
|
+
|
|
1216
|
+
const result = await executeLlmStep(
|
|
1217
|
+
step,
|
|
1218
|
+
context,
|
|
1219
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
1220
|
+
undefined,
|
|
1221
|
+
undefined,
|
|
1222
|
+
undefined,
|
|
1223
|
+
undefined,
|
|
1224
|
+
getAdapter
|
|
1225
|
+
);
|
|
1226
|
+
|
|
1227
|
+
expect(result.status).toBe('success');
|
|
1228
|
+
expect(sawTransferTool).toBe(true);
|
|
1229
|
+
expect(sawOriginalTool).toBe(true);
|
|
1230
|
+
expect(sawTargetToolAfter).toBe(true);
|
|
1231
|
+
expect(sawOriginalToolAfter).toBe(false);
|
|
1232
|
+
expect(sawTargetPrompt).toBe(true);
|
|
1233
|
+
});
|
|
1234
|
+
|
|
1235
|
+
it('should apply context updates from tool output', async () => {
|
|
1236
|
+
const step: LlmStep = {
|
|
1237
|
+
id: 'l1',
|
|
1238
|
+
type: 'llm',
|
|
1239
|
+
agent: 'context-agent',
|
|
1240
|
+
prompt: 'update context',
|
|
1241
|
+
needs: [],
|
|
1242
|
+
maxIterations: 4,
|
|
1243
|
+
tools: [
|
|
1244
|
+
{
|
|
1245
|
+
name: 'update-context',
|
|
1246
|
+
execution: {
|
|
1247
|
+
id: 'update-step',
|
|
1248
|
+
type: 'shell',
|
|
1249
|
+
run: 'echo update',
|
|
1250
|
+
},
|
|
1251
|
+
},
|
|
1252
|
+
{
|
|
1253
|
+
name: 'read-context',
|
|
1254
|
+
execution: {
|
|
1255
|
+
id: 'read-step',
|
|
1256
|
+
type: 'shell',
|
|
1257
|
+
run: 'echo read',
|
|
1258
|
+
},
|
|
1259
|
+
},
|
|
1260
|
+
],
|
|
1261
|
+
};
|
|
1262
|
+
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
1263
|
+
let sawEnvUpdate = false;
|
|
1264
|
+
let sawMemoryUpdate = false;
|
|
1265
|
+
|
|
1266
|
+
const executeStepFn = async (_step: any, toolContext: ExpressionContext) => {
|
|
1267
|
+
if (_step.id === 'update-step') {
|
|
1268
|
+
return {
|
|
1269
|
+
status: 'success' as const,
|
|
1270
|
+
output: {
|
|
1271
|
+
__keystone_context: {
|
|
1272
|
+
env: { USER_ID: '123' },
|
|
1273
|
+
memory: { user: 'Ada' },
|
|
1274
|
+
},
|
|
1275
|
+
ok: true,
|
|
1276
|
+
},
|
|
1277
|
+
};
|
|
1278
|
+
}
|
|
1279
|
+
if (_step.id === 'read-step') {
|
|
1280
|
+
sawEnvUpdate = toolContext.env?.USER_ID === '123';
|
|
1281
|
+
sawMemoryUpdate = toolContext.memory?.user === 'Ada';
|
|
1282
|
+
return { status: 'success' as const, output: { seen: true } };
|
|
1283
|
+
}
|
|
1284
|
+
return { status: 'success' as const, output: 'ok' };
|
|
1285
|
+
};
|
|
1286
|
+
|
|
1287
|
+
let callCount = 0;
|
|
1288
|
+
const chatMock = mock(async () => {
|
|
1289
|
+
callCount++;
|
|
1290
|
+
if (callCount === 1) {
|
|
1291
|
+
return {
|
|
1292
|
+
message: {
|
|
1293
|
+
role: 'assistant',
|
|
1294
|
+
content: null,
|
|
1295
|
+
tool_calls: [
|
|
1296
|
+
{
|
|
1297
|
+
id: 'call-update',
|
|
1298
|
+
type: 'function',
|
|
1299
|
+
function: { name: 'update-context', arguments: '{}' },
|
|
1300
|
+
},
|
|
1301
|
+
{
|
|
1302
|
+
id: 'call-read',
|
|
1303
|
+
type: 'function',
|
|
1304
|
+
function: { name: 'read-context', arguments: '{}' },
|
|
1305
|
+
},
|
|
1306
|
+
],
|
|
1307
|
+
},
|
|
1308
|
+
};
|
|
1309
|
+
}
|
|
1310
|
+
return { message: { role: 'assistant', content: 'done' } };
|
|
1311
|
+
}) as unknown as LLMAdapter['chat'];
|
|
1312
|
+
const getAdapter = createMockGetAdapter(chatMock);
|
|
1313
|
+
|
|
1314
|
+
await executeLlmStep(
|
|
1315
|
+
step,
|
|
1316
|
+
context,
|
|
1317
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
1318
|
+
undefined,
|
|
1319
|
+
undefined,
|
|
1320
|
+
undefined,
|
|
1321
|
+
undefined,
|
|
1322
|
+
getAdapter
|
|
1323
|
+
);
|
|
1324
|
+
|
|
1325
|
+
expect(sawEnvUpdate).toBe(true);
|
|
1326
|
+
expect(sawMemoryUpdate).toBe(true);
|
|
1327
|
+
});
|
|
813
1328
|
});
|