keystone-cli 1.2.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +163 -138
- package/package.json +6 -3
- package/src/cli.ts +54 -369
- package/src/commands/init.ts +19 -27
- package/src/db/dynamic-state-manager.test.ts +319 -0
- package/src/db/dynamic-state-manager.ts +411 -0
- package/src/db/memory-db.test.ts +45 -0
- package/src/db/memory-db.ts +47 -21
- package/src/db/sqlite-setup.ts +26 -3
- package/src/db/workflow-db.ts +76 -5
- package/src/parser/config-schema.ts +11 -13
- package/src/parser/schema.ts +37 -2
- package/src/parser/workflow-parser.test.ts +3 -4
- package/src/parser/workflow-parser.ts +3 -62
- package/src/runner/__test__/llm-mock-setup.ts +173 -0
- package/src/runner/__test__/llm-test-setup.ts +271 -0
- package/src/runner/engine-executor.test.ts +25 -18
- package/src/runner/executors/blueprint-executor.ts +0 -1
- package/src/runner/executors/dynamic-executor.test.ts +613 -0
- package/src/runner/executors/dynamic-executor.ts +723 -0
- package/src/runner/executors/dynamic-types.ts +69 -0
- package/src/runner/executors/engine-executor.ts +5 -1
- package/src/runner/executors/llm-executor.ts +502 -1033
- package/src/runner/executors/memory-executor.ts +35 -19
- package/src/runner/executors/plan-executor.ts +0 -1
- package/src/runner/executors/types.ts +4 -4
- package/src/runner/llm-adapter.integration.test.ts +151 -0
- package/src/runner/llm-adapter.ts +263 -1401
- package/src/runner/llm-clarification.test.ts +91 -106
- package/src/runner/llm-executor.test.ts +217 -1181
- package/src/runner/memoization.test.ts +0 -1
- package/src/runner/recovery-security.test.ts +51 -20
- package/src/runner/reflexion.test.ts +55 -18
- package/src/runner/standard-tools-integration.test.ts +137 -87
- package/src/runner/step-executor.test.ts +36 -80
- package/src/runner/step-executor.ts +20 -2
- package/src/runner/test-harness.ts +3 -29
- package/src/runner/tool-integration.test.ts +122 -73
- package/src/runner/workflow-runner.ts +92 -35
- package/src/runner/workflow-scheduler.ts +11 -1
- package/src/runner/workflow-summary.ts +144 -0
- package/src/templates/dynamic-demo.yaml +31 -0
- package/src/templates/scaffolding/decompose-problem.yaml +1 -1
- package/src/templates/scaffolding/dynamic-decompose.yaml +39 -0
- package/src/utils/auth-manager.test.ts +10 -520
- package/src/utils/auth-manager.ts +3 -756
- package/src/utils/config-loader.ts +12 -0
- package/src/utils/constants.ts +0 -17
- package/src/utils/process-sandbox.ts +15 -3
- package/src/utils/topo-sort.ts +47 -0
- package/src/runner/llm-adapter-runtime.test.ts +0 -209
- package/src/runner/llm-adapter.test.ts +0 -1012
|
@@ -6,7 +6,6 @@ import { ExpressionEvaluator } from '../expression/evaluator';
|
|
|
6
6
|
import type { Workflow } from '../parser/schema';
|
|
7
7
|
import { container } from '../utils/container';
|
|
8
8
|
import { ConsoleLogger } from '../utils/logger';
|
|
9
|
-
import { getAdapter } from './llm-adapter';
|
|
10
9
|
import { WorkflowRunner } from './workflow-runner';
|
|
11
10
|
|
|
12
11
|
describe('Workflow Memoization (Auto-Hashing)', () => {
|
|
@@ -1,13 +1,46 @@
|
|
|
1
|
-
|
|
1
|
+
// Import shared mock setup FIRST (mock.module is in preload, these are the mock references)
|
|
2
|
+
import {
|
|
3
|
+
createUnifiedMockModel,
|
|
4
|
+
mockGetEmbeddingModel,
|
|
5
|
+
mockGetModel,
|
|
6
|
+
resetLlmMocks,
|
|
7
|
+
setCurrentChatFn,
|
|
8
|
+
setupLlmMocks,
|
|
9
|
+
} from './__test__/llm-test-setup';
|
|
10
|
+
|
|
11
|
+
import { ConfigLoader } from '../utils/config-loader';
|
|
12
|
+
|
|
13
|
+
import { beforeEach, describe, expect, jest, mock, test } from 'bun:test';
|
|
2
14
|
import type { Step, Workflow } from '../parser/schema';
|
|
3
|
-
|
|
15
|
+
|
|
16
|
+
// Note: mock.module() for llm-adapter is now handled by the preload file
|
|
17
|
+
// We should NOT mock 'ai' globally as it breaks other tests using the real ai SDK.
|
|
18
|
+
// Instead, we use a mock model that the real ai SDK calls.
|
|
4
19
|
|
|
5
20
|
describe('WorkflowRunner Recovery Security', () => {
|
|
6
21
|
beforeEach(() => {
|
|
7
22
|
jest.restoreAllMocks();
|
|
23
|
+
ConfigLoader.clear();
|
|
24
|
+
setupLlmMocks();
|
|
25
|
+
resetLlmMocks();
|
|
26
|
+
mockGetModel.mockResolvedValue(createUnifiedMockModel());
|
|
8
27
|
});
|
|
9
28
|
|
|
10
29
|
test('should NOT allow reflexion to overwrite critical step properties', async () => {
|
|
30
|
+
// Dynamic import to ensure mocks are applied
|
|
31
|
+
const { WorkflowRunner } = await import('./workflow-runner');
|
|
32
|
+
|
|
33
|
+
setCurrentChatFn(async () => ({
|
|
34
|
+
message: {
|
|
35
|
+
role: 'assistant',
|
|
36
|
+
content: JSON.stringify({
|
|
37
|
+
run: 'echo "fixed"',
|
|
38
|
+
type: 'script', // ATTEMPT TO CHANGE TYPE
|
|
39
|
+
id: 'malicious-id', // ATTEMPT TO CHANGE ID
|
|
40
|
+
}),
|
|
41
|
+
},
|
|
42
|
+
}));
|
|
43
|
+
|
|
11
44
|
const workflow: Workflow = {
|
|
12
45
|
name: 'reflexion-security-test',
|
|
13
46
|
steps: [
|
|
@@ -22,27 +55,11 @@ describe('WorkflowRunner Recovery Security', () => {
|
|
|
22
55
|
],
|
|
23
56
|
};
|
|
24
57
|
|
|
25
|
-
const mockGetAdapter = () => ({
|
|
26
|
-
adapter: {
|
|
27
|
-
chat: async () => ({
|
|
28
|
-
message: {
|
|
29
|
-
content: JSON.stringify({
|
|
30
|
-
run: 'echo "fixed"',
|
|
31
|
-
type: 'script', // ATTEMPT TO CHANGE TYPE
|
|
32
|
-
id: 'malicious-id', // ATTEMPT TO CHANGE ID
|
|
33
|
-
}),
|
|
34
|
-
},
|
|
35
|
-
}),
|
|
36
|
-
} as any,
|
|
37
|
-
resolvedModel: 'mock-model',
|
|
38
|
-
});
|
|
39
|
-
|
|
40
58
|
const spy = jest.fn();
|
|
41
59
|
|
|
42
60
|
const runner = new WorkflowRunner(workflow, {
|
|
43
|
-
logger: { log: () => {}, error: () => {}, warn: () => {}, debug: () => {} },
|
|
61
|
+
logger: { log: () => {}, error: () => {}, warn: () => {}, debug: () => {}, info: () => {} },
|
|
44
62
|
dbPath: ':memory:',
|
|
45
|
-
getAdapter: mockGetAdapter,
|
|
46
63
|
executeStep: spy as any,
|
|
47
64
|
});
|
|
48
65
|
|
|
@@ -71,6 +88,9 @@ describe('WorkflowRunner Recovery Security', () => {
|
|
|
71
88
|
});
|
|
72
89
|
|
|
73
90
|
test('should NOT allow auto_heal to overwrite critical step properties', async () => {
|
|
91
|
+
// Dynamic import to ensure mocks are applied
|
|
92
|
+
const { WorkflowRunner } = await import('./workflow-runner');
|
|
93
|
+
|
|
74
94
|
const workflow: Workflow = {
|
|
75
95
|
name: 'autoheal-security-test',
|
|
76
96
|
steps: [
|
|
@@ -88,7 +108,7 @@ describe('WorkflowRunner Recovery Security', () => {
|
|
|
88
108
|
|
|
89
109
|
const spy = jest.fn();
|
|
90
110
|
const runner = new WorkflowRunner(workflow, {
|
|
91
|
-
logger: { log: () => {}, error: () => {}, warn: () => {}, debug: () => {} },
|
|
111
|
+
logger: { log: () => {}, error: () => {}, warn: () => {}, debug: () => {}, info: () => {} },
|
|
92
112
|
dbPath: ':memory:',
|
|
93
113
|
executeStep: spy as any,
|
|
94
114
|
});
|
|
@@ -96,6 +116,17 @@ describe('WorkflowRunner Recovery Security', () => {
|
|
|
96
116
|
const db = (runner as any).db;
|
|
97
117
|
await db.createRun(runner.runId, workflow.name, {});
|
|
98
118
|
|
|
119
|
+
setCurrentChatFn(async () => ({
|
|
120
|
+
message: {
|
|
121
|
+
role: 'assistant',
|
|
122
|
+
content: JSON.stringify({
|
|
123
|
+
run: 'echo "fixed"',
|
|
124
|
+
type: 'script',
|
|
125
|
+
id: 'malicious-id',
|
|
126
|
+
}),
|
|
127
|
+
},
|
|
128
|
+
}));
|
|
129
|
+
|
|
99
130
|
spy.mockImplementation(async (step: any) => {
|
|
100
131
|
if (step.run === 'exit 1') {
|
|
101
132
|
return { status: 'failed', output: null, error: 'Command failed' };
|
|
@@ -1,14 +1,63 @@
|
|
|
1
|
-
|
|
1
|
+
// Import shared mock setup FIRST (mock.module is in preload, these are the mock references)
|
|
2
|
+
import {
|
|
3
|
+
createUnifiedMockModel,
|
|
4
|
+
mockGetModel,
|
|
5
|
+
resetLlmMocks,
|
|
6
|
+
setCurrentChatFn,
|
|
7
|
+
setupLlmMocks,
|
|
8
|
+
} from './__test__/llm-test-setup';
|
|
9
|
+
|
|
10
|
+
import { beforeAll, beforeEach, describe, expect, jest, mock, test } from 'bun:test';
|
|
2
11
|
import type { Step, Workflow } from '../parser/schema';
|
|
3
|
-
import
|
|
4
|
-
|
|
12
|
+
import { ConfigLoader } from '../utils/config-loader';
|
|
13
|
+
|
|
14
|
+
// Note: mock.module() for llm-adapter is now handled by the preload file
|
|
15
|
+
// We should NOT mock 'ai' globally as it breaks other tests using the real ai SDK.
|
|
16
|
+
// Instead, we use a mock model that the real ai SDK calls.
|
|
17
|
+
|
|
18
|
+
// Dynamic import holder
|
|
19
|
+
let WorkflowRunner: any;
|
|
5
20
|
|
|
6
21
|
describe('WorkflowRunner Reflexion', () => {
|
|
22
|
+
beforeAll(async () => {
|
|
23
|
+
// Set up config
|
|
24
|
+
ConfigLoader.setConfig({
|
|
25
|
+
providers: {
|
|
26
|
+
openai: { type: 'openai', package: '@ai-sdk/openai', api_key_env: 'OPENAI_API_KEY' },
|
|
27
|
+
},
|
|
28
|
+
default_provider: 'openai',
|
|
29
|
+
model_mappings: {},
|
|
30
|
+
storage: { retention_days: 30, redact_secrets_at_rest: true },
|
|
31
|
+
mcp_servers: {},
|
|
32
|
+
engines: { allowlist: {}, denylist: [] },
|
|
33
|
+
concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
|
|
34
|
+
expression: { strict: false },
|
|
35
|
+
} as any);
|
|
36
|
+
|
|
37
|
+
mockGetModel.mockResolvedValue(createUnifiedMockModel());
|
|
38
|
+
setupLlmMocks();
|
|
39
|
+
|
|
40
|
+
setCurrentChatFn(async () => ({
|
|
41
|
+
message: { role: 'assistant', content: JSON.stringify({ run: 'echo "fixed"' }) },
|
|
42
|
+
}));
|
|
43
|
+
|
|
44
|
+
// Import after mocks
|
|
45
|
+
const module = await import('./workflow-runner');
|
|
46
|
+
WorkflowRunner = module.WorkflowRunner;
|
|
47
|
+
});
|
|
48
|
+
|
|
7
49
|
beforeEach(() => {
|
|
50
|
+
ConfigLoader.clear();
|
|
8
51
|
jest.restoreAllMocks();
|
|
52
|
+
setupLlmMocks();
|
|
53
|
+
setupLlmMocks();
|
|
54
|
+
resetLlmMocks();
|
|
55
|
+
setCurrentChatFn(async () => ({
|
|
56
|
+
message: { role: 'assistant', content: JSON.stringify({ run: 'echo "fixed"' }) },
|
|
57
|
+
}));
|
|
9
58
|
});
|
|
10
59
|
|
|
11
|
-
test('should attempt to self-correct a failing step using
|
|
60
|
+
test('should attempt to self-correct a failing step using reflexion', async () => {
|
|
12
61
|
const workflow: Workflow = {
|
|
13
62
|
name: 'reflexion-test',
|
|
14
63
|
steps: [
|
|
@@ -24,30 +73,18 @@ describe('WorkflowRunner Reflexion', () => {
|
|
|
24
73
|
],
|
|
25
74
|
};
|
|
26
75
|
|
|
27
|
-
const mockGetAdapter = () => ({
|
|
28
|
-
adapter: {
|
|
29
|
-
chat: async () => ({
|
|
30
|
-
message: {
|
|
31
|
-
content: JSON.stringify({ run: 'echo "fixed"' }),
|
|
32
|
-
},
|
|
33
|
-
}),
|
|
34
|
-
} as any,
|
|
35
|
-
resolvedModel: 'mock-model',
|
|
36
|
-
});
|
|
37
|
-
|
|
38
76
|
const spy = jest.fn();
|
|
39
77
|
|
|
40
78
|
const runner = new WorkflowRunner(workflow, {
|
|
41
|
-
logger: { log: () => {}, error: () => {}, warn: () => {} },
|
|
79
|
+
logger: { log: () => {}, error: () => {}, warn: () => {}, debug: () => {}, info: () => {} },
|
|
42
80
|
dbPath: ':memory:',
|
|
43
|
-
getAdapter: mockGetAdapter,
|
|
44
81
|
executeStep: spy as any,
|
|
45
82
|
});
|
|
46
83
|
|
|
47
84
|
const db = (runner as any).db;
|
|
48
85
|
await db.createRun(runner.runId, workflow.name, {});
|
|
49
86
|
|
|
50
|
-
// First call fails, Reflexion logic kicks in (calling mocked
|
|
87
|
+
// First call fails, Reflexion logic kicks in (calling mocked generateText),
|
|
51
88
|
// then it retries with corrected command.
|
|
52
89
|
spy.mockImplementation(async (step: any) => {
|
|
53
90
|
if (step.run === 'exit 1') {
|
|
@@ -1,88 +1,147 @@
|
|
|
1
|
-
|
|
1
|
+
// Import shared mock setup FIRST (mock.module is in preload, these are the mock references)
|
|
2
|
+
import {
|
|
3
|
+
type MockLLMResponse,
|
|
4
|
+
createUnifiedMockModel,
|
|
5
|
+
mockGetModel,
|
|
6
|
+
resetLlmMocks,
|
|
7
|
+
setCurrentChatFn,
|
|
8
|
+
setupLlmMocks,
|
|
9
|
+
} from './__test__/llm-test-setup';
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
afterAll,
|
|
13
|
+
afterEach,
|
|
14
|
+
beforeAll,
|
|
15
|
+
beforeEach,
|
|
16
|
+
describe,
|
|
17
|
+
expect,
|
|
18
|
+
it,
|
|
19
|
+
mock,
|
|
20
|
+
spyOn,
|
|
21
|
+
} from 'bun:test';
|
|
2
22
|
import { existsSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
|
|
3
23
|
import { join } from 'node:path';
|
|
4
24
|
import type { ExpressionContext } from '../expression/evaluator';
|
|
5
|
-
import
|
|
6
|
-
import {
|
|
7
|
-
import
|
|
25
|
+
import * as agentParser from '../parser/agent-parser';
|
|
26
|
+
import type { Agent, LlmStep, Step } from '../parser/schema';
|
|
27
|
+
import { ConfigLoader } from '../utils/config-loader';
|
|
8
28
|
import type { StepResult } from './step-executor';
|
|
9
29
|
|
|
30
|
+
// Note: mock.module() is now handled by the preload file
|
|
31
|
+
|
|
32
|
+
// Dynamic import holder
|
|
33
|
+
let executeLlmStep: any;
|
|
34
|
+
|
|
35
|
+
// Local chat function wrapper for test-specific overrides
|
|
36
|
+
let currentChatFn: (messages: any[], options?: any) => Promise<MockLLMResponse>;
|
|
37
|
+
|
|
10
38
|
describe('Standard Tools Integration', () => {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
39
|
+
// Test fixtures
|
|
40
|
+
const testDir = join(process.cwd(), '.e2e-tmp', 'standard-tools-test');
|
|
41
|
+
let resolveAgentPathSpy: ReturnType<typeof spyOn>;
|
|
42
|
+
let parseAgentSpy: ReturnType<typeof spyOn>;
|
|
43
|
+
|
|
44
|
+
beforeAll(async () => {
|
|
45
|
+
// Setup config before importing the executor
|
|
46
|
+
ConfigLoader.setConfig({
|
|
47
|
+
default_provider: 'test-provider',
|
|
48
|
+
providers: {
|
|
49
|
+
'test-provider': {
|
|
50
|
+
type: 'openai',
|
|
51
|
+
package: '@ai-sdk/openai',
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
model_mappings: {},
|
|
55
|
+
} as any);
|
|
17
56
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
57
|
+
// Ensure the mock model is set up
|
|
58
|
+
setupLlmMocks();
|
|
59
|
+
|
|
60
|
+
// Dynamic import AFTER mocks are set up
|
|
61
|
+
const module = await import('./executors/llm-executor.ts');
|
|
62
|
+
executeLlmStep = module.executeLlmStep;
|
|
63
|
+
|
|
64
|
+
// Create test directory
|
|
65
|
+
if (!existsSync(testDir)) {
|
|
66
|
+
mkdirSync(testDir, { recursive: true });
|
|
23
67
|
}
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
68
|
+
writeFileSync(join(testDir, 'test.txt'), 'hello world');
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
beforeEach(() => {
|
|
72
|
+
ConfigLoader.clear();
|
|
73
|
+
// Setup mocks for each test
|
|
74
|
+
setupLlmMocks();
|
|
75
|
+
|
|
76
|
+
// Mock the agent parser to avoid needing actual agent files
|
|
77
|
+
resolveAgentPathSpy = spyOn(agentParser, 'resolveAgentPath').mockReturnValue(
|
|
78
|
+
'tool-test-agent.md'
|
|
33
79
|
);
|
|
80
|
+
parseAgentSpy = spyOn(agentParser, 'parseAgent').mockReturnValue({
|
|
81
|
+
name: 'tool-test-agent',
|
|
82
|
+
systemPrompt: 'Test agent for standard tools',
|
|
83
|
+
tools: [],
|
|
84
|
+
model: 'gpt-4o',
|
|
85
|
+
} as unknown as Agent);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
afterEach(() => {
|
|
89
|
+
resolveAgentPathSpy?.mockRestore();
|
|
90
|
+
parseAgentSpy?.mockRestore();
|
|
91
|
+
resetLlmMocks();
|
|
34
92
|
});
|
|
35
93
|
|
|
36
94
|
afterAll(() => {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
if (existsSync(agentPath)) {
|
|
40
|
-
rmSync(agentPath);
|
|
41
|
-
}
|
|
95
|
+
rmSync(testDir, { recursive: true, force: true });
|
|
96
|
+
ConfigLoader.clear();
|
|
42
97
|
});
|
|
43
98
|
|
|
44
99
|
it('should inject standard tools when useStandardTools is true', async () => {
|
|
45
|
-
let capturedTools:
|
|
100
|
+
let capturedTools: any[] = [];
|
|
101
|
+
let callCount = 0;
|
|
46
102
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
103
|
+
currentChatFn = async (messages, options) => {
|
|
104
|
+
callCount++;
|
|
105
|
+
capturedTools = options?.tools || [];
|
|
106
|
+
|
|
107
|
+
if (callCount === 1) {
|
|
108
|
+
return {
|
|
109
|
+
message: {
|
|
110
|
+
role: 'assistant',
|
|
111
|
+
content: 'I will read the file',
|
|
112
|
+
tool_calls: [
|
|
113
|
+
{
|
|
114
|
+
id: 'c1',
|
|
115
|
+
type: 'function',
|
|
116
|
+
function: { name: 'read_file', arguments: '{"path":"test.txt"}' },
|
|
60
117
|
},
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
}
|
|
64
|
-
|
|
118
|
+
],
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
message: { role: 'assistant', content: 'the file contents are hello world' },
|
|
125
|
+
usage: { prompt_tokens: 20, completion_tokens: 10, total_tokens: 30 },
|
|
65
126
|
};
|
|
66
|
-
}
|
|
67
|
-
|
|
127
|
+
};
|
|
128
|
+
setCurrentChatFn(currentChatFn as any);
|
|
68
129
|
|
|
69
130
|
const step: LlmStep = {
|
|
70
131
|
id: 'l1',
|
|
71
132
|
type: 'llm',
|
|
72
|
-
agent: 'test-agent',
|
|
73
|
-
needs: [],
|
|
133
|
+
agent: 'tool-test-agent',
|
|
74
134
|
prompt: 'read test.txt',
|
|
75
135
|
useStandardTools: true,
|
|
76
|
-
|
|
136
|
+
needs: [],
|
|
137
|
+
maxIterations: 3,
|
|
77
138
|
};
|
|
78
139
|
|
|
79
140
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
80
|
-
const executeStepFn = mock(async (
|
|
81
|
-
return { status: 'success', output: '
|
|
141
|
+
const executeStepFn = mock(async (step: Step) => {
|
|
142
|
+
return { status: 'success' as const, output: 'hello world' };
|
|
82
143
|
});
|
|
83
144
|
|
|
84
|
-
// We catch the "Max iterations reached" error because we set maxIterations to 1
|
|
85
|
-
// but we can still check if tools were injected and the tool call was made.
|
|
86
145
|
try {
|
|
87
146
|
await executeLlmStep(
|
|
88
147
|
step,
|
|
@@ -91,14 +150,13 @@ System prompt`,
|
|
|
91
150
|
undefined,
|
|
92
151
|
undefined,
|
|
93
152
|
undefined,
|
|
94
|
-
undefined
|
|
95
|
-
getAdapter
|
|
153
|
+
undefined
|
|
96
154
|
);
|
|
97
155
|
} catch (e) {
|
|
98
156
|
if ((e as Error).message !== 'Max ReAct iterations reached') throw e;
|
|
99
157
|
}
|
|
100
158
|
|
|
101
|
-
expect(capturedTools.some((t) => t.function.name === 'read_file')).toBe(true);
|
|
159
|
+
expect(capturedTools.some((t: any) => t.function.name === 'read_file')).toBe(true);
|
|
102
160
|
expect(executeStepFn).toHaveBeenCalled();
|
|
103
161
|
const toolStep = executeStepFn.mock.calls[0][0] as Step;
|
|
104
162
|
expect(toolStep.type).toBe('file');
|
|
@@ -119,20 +177,8 @@ System prompt`,
|
|
|
119
177
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
120
178
|
const executeStepFn = mock(async () => ({ status: 'success', output: '' }));
|
|
121
179
|
|
|
122
|
-
//
|
|
123
|
-
|
|
124
|
-
// Actually, in llm-executor.ts, it pushes a "Security Error" message if check fails and continues loop.
|
|
125
|
-
|
|
126
|
-
let securityErrorMessage = '';
|
|
127
|
-
const chatMock = mock(async (messages) => {
|
|
128
|
-
const lastMessage = messages[messages.length - 1];
|
|
129
|
-
if (lastMessage.role === 'tool') {
|
|
130
|
-
securityErrorMessage = lastMessage.content;
|
|
131
|
-
return {
|
|
132
|
-
message: { role: 'assistant', content: 'stop' },
|
|
133
|
-
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
|
|
134
|
-
};
|
|
135
|
-
}
|
|
180
|
+
// Mock makes a tool call to run_command which should be rejected
|
|
181
|
+
currentChatFn = async () => {
|
|
136
182
|
return {
|
|
137
183
|
message: {
|
|
138
184
|
role: 'assistant',
|
|
@@ -145,21 +191,25 @@ System prompt`,
|
|
|
145
191
|
],
|
|
146
192
|
},
|
|
147
193
|
};
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
194
|
+
};
|
|
195
|
+
setCurrentChatFn(currentChatFn as any);
|
|
196
|
+
|
|
197
|
+
// May throw max iterations or complete
|
|
198
|
+
try {
|
|
199
|
+
await executeLlmStep(
|
|
200
|
+
step,
|
|
201
|
+
context,
|
|
202
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
203
|
+
undefined,
|
|
204
|
+
undefined,
|
|
205
|
+
undefined,
|
|
206
|
+
undefined
|
|
207
|
+
);
|
|
208
|
+
} catch (e) {
|
|
209
|
+
// Expected to hit max iterations
|
|
210
|
+
}
|
|
161
211
|
|
|
162
|
-
|
|
212
|
+
// The key assertion: executeStepFn should NOT have been called for the risky command
|
|
163
213
|
expect(executeStepFn).not.toHaveBeenCalled();
|
|
164
214
|
});
|
|
165
215
|
});
|
|
@@ -31,7 +31,7 @@ import type {
|
|
|
31
31
|
import { ConfigLoader } from '../utils/config-loader';
|
|
32
32
|
import type { SafeSandbox } from '../utils/sandbox';
|
|
33
33
|
import type { executeLlmStep } from './executors/llm-executor.ts';
|
|
34
|
-
|
|
34
|
+
// Note: Memory tests use module mocking for getEmbeddingModel
|
|
35
35
|
import { executeStep } from './step-executor';
|
|
36
36
|
|
|
37
37
|
interface StepOutput {
|
|
@@ -564,95 +564,55 @@ describe('step-executor', () => {
|
|
|
564
564
|
search: mock(() => Promise.resolve([{ content: 'found', similarity: 0.9 }])),
|
|
565
565
|
};
|
|
566
566
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
},
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
const step = { id: 'm1', type: 'memory', op: 'store', text: 'foo' };
|
|
580
|
-
const result = await executeStep(step, context, undefined, {
|
|
581
|
-
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
567
|
+
// Set up config with embedding_model for memory tests
|
|
568
|
+
beforeEach(() => {
|
|
569
|
+
ConfigLoader.setConfig({
|
|
570
|
+
default_provider: 'openai',
|
|
571
|
+
providers: {},
|
|
572
|
+
model_mappings: {},
|
|
573
|
+
embedding_model: 'text-embedding-3-small',
|
|
574
|
+
storage: { retention_days: 30, redact_secrets_at_rest: true },
|
|
575
|
+
mcp_servers: {},
|
|
576
|
+
engines: { allowlist: {}, denylist: [] },
|
|
577
|
+
concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
|
|
578
|
+
expression: { strict: false },
|
|
582
579
|
});
|
|
583
|
-
|
|
584
|
-
|
|
580
|
+
mockMemoryDb.store.mockReset();
|
|
581
|
+
mockMemoryDb.search.mockReset();
|
|
582
|
+
mockMemoryDb.store.mockResolvedValue('mem-id');
|
|
583
|
+
mockMemoryDb.search.mockResolvedValue([{ content: 'found', similarity: 0.9 }]);
|
|
585
584
|
});
|
|
586
585
|
|
|
587
|
-
it('should fail if
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
599
|
-
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
586
|
+
it('should fail if no embedding model is configured', async () => {
|
|
587
|
+
ConfigLoader.setConfig({
|
|
588
|
+
default_provider: 'openai',
|
|
589
|
+
providers: {},
|
|
590
|
+
model_mappings: {},
|
|
591
|
+
// No embedding_model set
|
|
592
|
+
storage: { retention_days: 30, redact_secrets_at_rest: true },
|
|
593
|
+
mcp_servers: {},
|
|
594
|
+
engines: { allowlist: {}, denylist: [] },
|
|
595
|
+
concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
|
|
596
|
+
expression: { strict: false },
|
|
600
597
|
});
|
|
601
|
-
expect(result.status).toBe('failed');
|
|
602
|
-
expect(result.error).toContain('does not support embeddings');
|
|
603
|
-
});
|
|
604
|
-
|
|
605
|
-
it('should fail for non-local embedding models', async () => {
|
|
606
|
-
// @ts-ignore
|
|
607
|
-
const step = { id: 'm1', type: 'memory', op: 'store', text: 'foo', model: 'openai' };
|
|
608
598
|
// @ts-ignore
|
|
599
|
+
const step = { id: 'm1', type: 'memory', op: 'store', text: 'foo', needs: [] };
|
|
609
600
|
const result = await executeStep(step, context, undefined, {
|
|
610
601
|
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
611
|
-
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
612
602
|
});
|
|
613
603
|
expect(result.status).toBe('failed');
|
|
614
|
-
expect(result.error).toContain('
|
|
604
|
+
expect(result.error).toContain('No embedding model configured');
|
|
615
605
|
});
|
|
616
606
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
id: 'm1',
|
|
621
|
-
type: 'memory',
|
|
622
|
-
op: 'store',
|
|
623
|
-
text: 'foo',
|
|
624
|
-
metadata: { source: 'test' },
|
|
625
|
-
};
|
|
626
|
-
// @ts-ignore
|
|
627
|
-
const result = await executeStep(step, context, undefined, {
|
|
628
|
-
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
629
|
-
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
630
|
-
});
|
|
631
|
-
expect(result.status).toBe('success');
|
|
632
|
-
expect(result.output).toEqual({ id: 'mem-id', status: 'stored' });
|
|
633
|
-
expect(mockMemoryDb.store).toHaveBeenCalledWith('foo', [0.1, 0.2, 0.3], { source: 'test' });
|
|
634
|
-
});
|
|
635
|
-
|
|
636
|
-
it('should search memory', async () => {
|
|
637
|
-
// @ts-ignore
|
|
638
|
-
const step = { id: 'm1', type: 'memory', op: 'search', query: 'foo', limit: 5 };
|
|
639
|
-
// @ts-ignore
|
|
640
|
-
const result = await executeStep(step, context, undefined, {
|
|
641
|
-
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
642
|
-
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
643
|
-
});
|
|
644
|
-
expect(result.status).toBe('success');
|
|
645
|
-
expect(result.output).toEqual([{ content: 'found', similarity: 0.9 }]);
|
|
646
|
-
expect(mockMemoryDb.search).toHaveBeenCalledWith([0.1, 0.2, 0.3], 5);
|
|
647
|
-
});
|
|
607
|
+
// Note: Full integration tests for memory store/search require mocking the AI SDK
|
|
608
|
+
// The implementation uses getEmbeddingModel() + embed() from 'ai' package
|
|
609
|
+
// These tests verify the error handling logic
|
|
648
610
|
|
|
649
611
|
it('should fail store if text is missing', async () => {
|
|
650
612
|
// @ts-ignore
|
|
651
|
-
const step = { id: 'm1', type: 'memory', op: 'store' };
|
|
652
|
-
// @ts-ignore
|
|
613
|
+
const step = { id: 'm1', type: 'memory', op: 'store', needs: [] };
|
|
653
614
|
const result = await executeStep(step, context, undefined, {
|
|
654
615
|
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
655
|
-
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
656
616
|
});
|
|
657
617
|
expect(result.status).toBe('failed');
|
|
658
618
|
expect(result.error).toBe('Text is required for memory store operation');
|
|
@@ -660,11 +620,9 @@ describe('step-executor', () => {
|
|
|
660
620
|
|
|
661
621
|
it('should fail search if query is missing', async () => {
|
|
662
622
|
// @ts-ignore
|
|
663
|
-
const step = { id: 'm1', type: 'memory', op: 'search' };
|
|
664
|
-
// @ts-ignore
|
|
623
|
+
const step = { id: 'm1', type: 'memory', op: 'search', needs: [] };
|
|
665
624
|
const result = await executeStep(step, context, undefined, {
|
|
666
625
|
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
667
|
-
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
668
626
|
});
|
|
669
627
|
expect(result.status).toBe('failed');
|
|
670
628
|
expect(result.error).toBe('Query is required for memory search operation');
|
|
@@ -672,11 +630,9 @@ describe('step-executor', () => {
|
|
|
672
630
|
|
|
673
631
|
it('should fail for unknown memory operation', async () => {
|
|
674
632
|
// @ts-ignore
|
|
675
|
-
const step = { id: 'm1', type: 'memory', op: 'unknown', text: 'foo' };
|
|
676
|
-
// @ts-ignore
|
|
633
|
+
const step = { id: 'm1', type: 'memory', op: 'unknown', text: 'foo', needs: [] };
|
|
677
634
|
const result = await executeStep(step, context, undefined, {
|
|
678
635
|
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
679
|
-
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
680
636
|
});
|
|
681
637
|
expect(result.status).toBe('failed');
|
|
682
638
|
expect(result.error).toContain('Unknown memory operation');
|