keystone-cli 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/package.json +1 -1
- package/src/cli.ts +233 -21
- package/src/db/memory-db.ts +6 -0
- package/src/db/sqlite-setup.test.ts +47 -0
- package/src/db/workflow-db.ts +6 -0
- package/src/expression/evaluator.ts +2 -0
- package/src/parser/schema.ts +3 -0
- package/src/runner/debug-repl.test.ts +240 -6
- package/src/runner/llm-adapter.test.ts +10 -4
- package/src/runner/llm-executor.ts +39 -3
- package/src/runner/shell-executor.ts +40 -12
- package/src/runner/standard-tools-integration.test.ts +147 -0
- package/src/runner/standard-tools.test.ts +69 -0
- package/src/runner/standard-tools.ts +270 -0
- package/src/runner/step-executor.test.ts +194 -1
- package/src/runner/step-executor.ts +46 -15
- package/src/runner/stream-utils.test.ts +113 -7
- package/src/runner/stream-utils.ts +4 -4
- package/src/runner/workflow-runner.ts +14 -20
- package/src/templates/agents/keystone-architect.md +16 -2
- package/src/templates/agents/software-engineer.md +17 -0
- package/src/templates/memory-service.yaml +54 -0
- package/src/templates/robust-automation.yaml +44 -0
- package/src/templates/scaffold-feature.yaml +1 -0
|
@@ -1,19 +1,27 @@
|
|
|
1
|
-
import { describe, expect, test } from 'bun:test';
|
|
1
|
+
import { describe, expect, mock, spyOn, test } from 'bun:test';
|
|
2
|
+
import * as cp from 'node:child_process';
|
|
3
|
+
import * as fs from 'node:fs';
|
|
2
4
|
import { PassThrough } from 'node:stream';
|
|
3
5
|
import type { ExpressionContext } from '../expression/evaluator.ts';
|
|
4
6
|
import type { Step } from '../parser/schema.ts';
|
|
7
|
+
import type { Logger } from '../utils/logger.ts';
|
|
5
8
|
import { DebugRepl } from './debug-repl.ts';
|
|
6
9
|
|
|
7
10
|
describe('DebugRepl', () => {
|
|
8
11
|
const mockContext: ExpressionContext = { inputs: { foo: 'bar' } };
|
|
9
|
-
//
|
|
10
|
-
const mockStep: Step = { id: 'test-step', type: 'shell', run: 'echo "fail"' } as
|
|
12
|
+
// mock step typing
|
|
13
|
+
const mockStep: Step = { id: 'test-step', type: 'shell', run: 'echo "fail"' } as unknown as Step;
|
|
11
14
|
const mockError = new Error('Test Error');
|
|
12
15
|
|
|
13
16
|
test('should resolve with "skip" when user types "skip"', async () => {
|
|
14
17
|
const input = new PassThrough();
|
|
15
18
|
const output = new PassThrough();
|
|
16
|
-
const mockLogger
|
|
19
|
+
const mockLogger: Logger = {
|
|
20
|
+
log: mock(() => {}),
|
|
21
|
+
error: mock(() => {}),
|
|
22
|
+
warn: mock(() => {}),
|
|
23
|
+
info: mock(() => {}),
|
|
24
|
+
};
|
|
17
25
|
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
18
26
|
|
|
19
27
|
const promise = repl.start();
|
|
@@ -30,7 +38,12 @@ describe('DebugRepl', () => {
|
|
|
30
38
|
test('should resolve with "retry" when user types "retry"', async () => {
|
|
31
39
|
const input = new PassThrough();
|
|
32
40
|
const output = new PassThrough();
|
|
33
|
-
const mockLogger
|
|
41
|
+
const mockLogger: Logger = {
|
|
42
|
+
log: mock(() => {}),
|
|
43
|
+
error: mock(() => {}),
|
|
44
|
+
warn: mock(() => {}),
|
|
45
|
+
info: mock(() => {}),
|
|
46
|
+
};
|
|
34
47
|
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
35
48
|
|
|
36
49
|
const promise = repl.start();
|
|
@@ -48,7 +61,12 @@ describe('DebugRepl', () => {
|
|
|
48
61
|
test('should resolve with "continue_failure" when user types "exit"', async () => {
|
|
49
62
|
const input = new PassThrough();
|
|
50
63
|
const output = new PassThrough();
|
|
51
|
-
const mockLogger
|
|
64
|
+
const mockLogger: Logger = {
|
|
65
|
+
log: mock(() => {}),
|
|
66
|
+
error: mock(() => {}),
|
|
67
|
+
warn: mock(() => {}),
|
|
68
|
+
info: mock(() => {}),
|
|
69
|
+
};
|
|
52
70
|
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
53
71
|
|
|
54
72
|
const promise = repl.start();
|
|
@@ -60,6 +78,137 @@ describe('DebugRepl', () => {
|
|
|
60
78
|
expect(result).toEqual({ type: 'continue_failure' });
|
|
61
79
|
});
|
|
62
80
|
|
|
81
|
+
test('should handle "context" command', async () => {
|
|
82
|
+
const input = new PassThrough();
|
|
83
|
+
const output = new PassThrough();
|
|
84
|
+
const mockLogger: Logger = {
|
|
85
|
+
log: mock(() => {}),
|
|
86
|
+
error: mock(() => {}),
|
|
87
|
+
warn: mock(() => {}),
|
|
88
|
+
info: mock(() => {}),
|
|
89
|
+
};
|
|
90
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
91
|
+
|
|
92
|
+
repl.start();
|
|
93
|
+
|
|
94
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
95
|
+
input.write('context\n');
|
|
96
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
97
|
+
|
|
98
|
+
expect(mockLogger.log).toHaveBeenCalled();
|
|
99
|
+
// biome-ignore lint/suspicious/noExplicitAny: accessing mock property
|
|
100
|
+
const lastCall = (mockLogger.log as unknown as any).mock.calls.find((call: any[]) =>
|
|
101
|
+
String(call[0]).includes('foo')
|
|
102
|
+
);
|
|
103
|
+
expect(lastCall?.[0]).toContain('bar');
|
|
104
|
+
input.write('exit\n');
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test('should handle "eval" command', async () => {
|
|
108
|
+
const input = new PassThrough();
|
|
109
|
+
const output = new PassThrough();
|
|
110
|
+
const mockLogger: Logger = {
|
|
111
|
+
log: mock(() => {}),
|
|
112
|
+
error: mock(() => {}),
|
|
113
|
+
warn: mock(() => {}),
|
|
114
|
+
info: mock(() => {}),
|
|
115
|
+
};
|
|
116
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
117
|
+
|
|
118
|
+
repl.start();
|
|
119
|
+
|
|
120
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
121
|
+
input.write('eval inputs.foo\n');
|
|
122
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
123
|
+
|
|
124
|
+
expect(mockLogger.log).toHaveBeenCalledWith('bar');
|
|
125
|
+
input.write('exit\n');
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
test('should handle "eval" command with error', async () => {
|
|
129
|
+
const input = new PassThrough();
|
|
130
|
+
const output = new PassThrough();
|
|
131
|
+
const mockLogger: Logger = {
|
|
132
|
+
log: mock(() => {}),
|
|
133
|
+
error: mock(() => {}),
|
|
134
|
+
warn: mock(() => {}),
|
|
135
|
+
info: mock(() => {}),
|
|
136
|
+
};
|
|
137
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
138
|
+
|
|
139
|
+
repl.start();
|
|
140
|
+
|
|
141
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
142
|
+
input.write('eval nonExistent.bar\n');
|
|
143
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
144
|
+
|
|
145
|
+
expect(mockLogger.error).toHaveBeenCalled();
|
|
146
|
+
input.write('exit\n');
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
test('should handle "eval" command without arguments', async () => {
|
|
150
|
+
const input = new PassThrough();
|
|
151
|
+
const output = new PassThrough();
|
|
152
|
+
const mockLogger: Logger = {
|
|
153
|
+
log: mock(() => {}),
|
|
154
|
+
error: mock(() => {}),
|
|
155
|
+
warn: mock(() => {}),
|
|
156
|
+
info: mock(() => {}),
|
|
157
|
+
};
|
|
158
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
159
|
+
|
|
160
|
+
repl.start();
|
|
161
|
+
|
|
162
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
163
|
+
input.write('eval\n');
|
|
164
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
165
|
+
|
|
166
|
+
expect(mockLogger.log).toHaveBeenCalledWith('Usage: eval <expression>');
|
|
167
|
+
input.write('exit\n');
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test('should handle unknown command', async () => {
|
|
171
|
+
const input = new PassThrough();
|
|
172
|
+
const output = new PassThrough();
|
|
173
|
+
const mockLogger: Logger = {
|
|
174
|
+
log: mock(() => {}),
|
|
175
|
+
error: mock(() => {}),
|
|
176
|
+
warn: mock(() => {}),
|
|
177
|
+
info: mock(() => {}),
|
|
178
|
+
};
|
|
179
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
180
|
+
|
|
181
|
+
repl.start();
|
|
182
|
+
|
|
183
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
184
|
+
input.write('unknown_cmd\n');
|
|
185
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
186
|
+
|
|
187
|
+
expect(mockLogger.log).toHaveBeenCalledWith('Unknown command: unknown_cmd');
|
|
188
|
+
input.write('exit\n');
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
test('should handle empty input', async () => {
|
|
192
|
+
const input = new PassThrough();
|
|
193
|
+
const output = new PassThrough();
|
|
194
|
+
const mockLogger: Logger = {
|
|
195
|
+
log: mock(() => {}),
|
|
196
|
+
error: mock(() => {}),
|
|
197
|
+
warn: mock(() => {}),
|
|
198
|
+
info: mock(() => {}),
|
|
199
|
+
};
|
|
200
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
201
|
+
|
|
202
|
+
repl.start();
|
|
203
|
+
|
|
204
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
205
|
+
input.write('\n');
|
|
206
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
207
|
+
|
|
208
|
+
expect(mockLogger.log).not.toHaveBeenCalledWith('Unknown command: ');
|
|
209
|
+
input.write('exit\n');
|
|
210
|
+
});
|
|
211
|
+
|
|
63
212
|
test('should parse shell commands correctly', () => {
|
|
64
213
|
// We import the function dynamically to test it, or we assume it's exported
|
|
65
214
|
const { parseShellCommand } = require('./debug-repl.ts');
|
|
@@ -71,4 +220,89 @@ describe('DebugRepl', () => {
|
|
|
71
220
|
expect(parseShellCommand('editor -a -b -c')).toEqual(['editor', '-a', '-b', '-c']);
|
|
72
221
|
expect(parseShellCommand(' spaced command ')).toEqual(['spaced', 'command']);
|
|
73
222
|
});
|
|
223
|
+
|
|
224
|
+
test('should handle "edit" command and update step', async () => {
|
|
225
|
+
const input = new PassThrough();
|
|
226
|
+
const output = new PassThrough();
|
|
227
|
+
const mockLogger: Logger = {
|
|
228
|
+
log: mock(() => {}),
|
|
229
|
+
error: mock(() => {}),
|
|
230
|
+
warn: mock(() => {}),
|
|
231
|
+
info: mock(() => {}),
|
|
232
|
+
};
|
|
233
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
234
|
+
|
|
235
|
+
const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
|
|
236
|
+
// biome-ignore lint/suspicious/noExplicitAny: mocking child_process
|
|
237
|
+
() => ({ error: null, status: 0 }) as any
|
|
238
|
+
);
|
|
239
|
+
const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
|
|
240
|
+
const updatedStep = { ...mockStep, run: 'echo "fixed"' };
|
|
241
|
+
const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation((() =>
|
|
242
|
+
JSON.stringify(updatedStep)) as unknown as typeof fs.readFileSync);
|
|
243
|
+
const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
|
|
244
|
+
const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
|
|
245
|
+
|
|
246
|
+
try {
|
|
247
|
+
repl.start();
|
|
248
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
249
|
+
input.write('edit\n');
|
|
250
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
251
|
+
|
|
252
|
+
expect(mockLogger.log).toHaveBeenCalledWith(
|
|
253
|
+
expect.stringContaining('Step definition updated')
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
input.write('retry\n');
|
|
257
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
258
|
+
} finally {
|
|
259
|
+
spySpawnSync.mockRestore();
|
|
260
|
+
spyWriteFileSync.mockRestore();
|
|
261
|
+
spyReadFileSync.mockRestore();
|
|
262
|
+
spyExistsSync.mockRestore();
|
|
263
|
+
spyUnlinkSync.mockRestore();
|
|
264
|
+
}
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
test('should handle "edit" command with parse error', async () => {
|
|
268
|
+
const input = new PassThrough();
|
|
269
|
+
const output = new PassThrough();
|
|
270
|
+
const mockLogger: Logger = {
|
|
271
|
+
log: mock(() => {}),
|
|
272
|
+
error: mock(() => {}),
|
|
273
|
+
warn: mock(() => {}),
|
|
274
|
+
info: mock(() => {}),
|
|
275
|
+
};
|
|
276
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
277
|
+
|
|
278
|
+
const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
|
|
279
|
+
// biome-ignore lint/suspicious/noExplicitAny: mocking child_process
|
|
280
|
+
() => ({ error: null, status: 0 }) as any
|
|
281
|
+
);
|
|
282
|
+
const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
|
|
283
|
+
const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation(
|
|
284
|
+
(() => 'invalid json') as unknown as typeof fs.readFileSync
|
|
285
|
+
);
|
|
286
|
+
const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
|
|
287
|
+
const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
|
|
288
|
+
|
|
289
|
+
try {
|
|
290
|
+
repl.start();
|
|
291
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
292
|
+
input.write('edit\n');
|
|
293
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
294
|
+
|
|
295
|
+
expect(mockLogger.error).toHaveBeenCalledWith(
|
|
296
|
+
expect.stringContaining('Failed to parse JSON')
|
|
297
|
+
);
|
|
298
|
+
input.write('exit\n');
|
|
299
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
300
|
+
} finally {
|
|
301
|
+
spySpawnSync.mockRestore();
|
|
302
|
+
spyWriteFileSync.mockRestore();
|
|
303
|
+
spyReadFileSync.mockRestore();
|
|
304
|
+
spyExistsSync.mockRestore();
|
|
305
|
+
spyUnlinkSync.mockRestore();
|
|
306
|
+
}
|
|
307
|
+
});
|
|
74
308
|
});
|
|
@@ -105,7 +105,9 @@ describe('AnthropicAdapter', () => {
|
|
|
105
105
|
// @ts-ignore
|
|
106
106
|
const fetchMock = global.fetch as MockFetch;
|
|
107
107
|
// @ts-ignore
|
|
108
|
-
|
|
108
|
+
// @ts-ignore
|
|
109
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock fetch init
|
|
110
|
+
const [url, init] = fetchMock.mock.calls[0] as [string, any];
|
|
109
111
|
|
|
110
112
|
expect(url).toBe('https://api.anthropic.com/v1/messages');
|
|
111
113
|
expect(init.headers['x-api-key']).toBe('fake-anthropic-key');
|
|
@@ -179,7 +181,8 @@ describe('AnthropicAdapter', () => {
|
|
|
179
181
|
]);
|
|
180
182
|
|
|
181
183
|
// @ts-ignore
|
|
182
|
-
|
|
184
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock fetch init
|
|
185
|
+
const init = global.fetch.mock.calls[0][1] as any;
|
|
183
186
|
const body = JSON.parse(init.body);
|
|
184
187
|
expect(body.messages[0].role).toBe('assistant');
|
|
185
188
|
expect(body.messages[0].content).toHaveLength(2);
|
|
@@ -208,7 +211,8 @@ describe('AnthropicAdapter', () => {
|
|
|
208
211
|
]);
|
|
209
212
|
|
|
210
213
|
// @ts-ignore
|
|
211
|
-
|
|
214
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock fetch init
|
|
215
|
+
const init = global.fetch.mock.calls[0][1] as any;
|
|
212
216
|
const body = JSON.parse(init.body);
|
|
213
217
|
expect(body.messages[0].role).toBe('user');
|
|
214
218
|
expect(body.messages[0].content[0]).toEqual({
|
|
@@ -255,7 +259,9 @@ describe('CopilotAdapter', () => {
|
|
|
255
259
|
// @ts-ignore
|
|
256
260
|
const fetchMock = global.fetch as MockFetch;
|
|
257
261
|
// @ts-ignore
|
|
258
|
-
|
|
262
|
+
// @ts-ignore
|
|
263
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock fetch init
|
|
264
|
+
const [url, init] = fetchMock.mock.calls[0] as [string, any];
|
|
259
265
|
expect(url).toBe('https://api.githubcopilot.com/chat/completions');
|
|
260
266
|
expect(init.headers.Authorization).toBe('Bearer mock-token');
|
|
261
267
|
spy.mockRestore();
|
|
@@ -9,13 +9,14 @@ import { RedactionBuffer, Redactor } from '../utils/redactor';
|
|
|
9
9
|
import { type LLMMessage, getAdapter } from './llm-adapter';
|
|
10
10
|
import { MCPClient } from './mcp-client';
|
|
11
11
|
import type { MCPManager, MCPServerConfig } from './mcp-manager';
|
|
12
|
+
import { STANDARD_TOOLS, validateStandardToolSecurity } from './standard-tools';
|
|
12
13
|
import type { StepResult } from './step-executor';
|
|
13
14
|
|
|
14
15
|
interface ToolDefinition {
|
|
15
16
|
name: string;
|
|
16
17
|
description?: string;
|
|
17
18
|
parameters: unknown;
|
|
18
|
-
source: 'agent' | 'step' | 'mcp';
|
|
19
|
+
source: 'agent' | 'step' | 'mcp' | 'standard';
|
|
19
20
|
execution?: Step;
|
|
20
21
|
mcpClient?: MCPClient;
|
|
21
22
|
}
|
|
@@ -105,7 +106,24 @@ export async function executeLlmStep(
|
|
|
105
106
|
}
|
|
106
107
|
}
|
|
107
108
|
|
|
108
|
-
// 3. Add
|
|
109
|
+
// 3. Add Standard tools
|
|
110
|
+
if (step.useStandardTools) {
|
|
111
|
+
for (const tool of STANDARD_TOOLS) {
|
|
112
|
+
allTools.push({
|
|
113
|
+
name: tool.name,
|
|
114
|
+
description: tool.description,
|
|
115
|
+
parameters: tool.parameters || {
|
|
116
|
+
type: 'object',
|
|
117
|
+
properties: {},
|
|
118
|
+
additionalProperties: true,
|
|
119
|
+
},
|
|
120
|
+
source: 'standard',
|
|
121
|
+
execution: tool.execution,
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// 4. Add MCP tools
|
|
109
127
|
const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
|
|
110
128
|
if (step.useGlobalMcp && mcpManager) {
|
|
111
129
|
const globalServers = mcpManager.getGlobalServers();
|
|
@@ -374,10 +392,28 @@ export async function executeLlmStep(
|
|
|
374
392
|
});
|
|
375
393
|
}
|
|
376
394
|
} else if (toolInfo.execution) {
|
|
395
|
+
// Security validation for standard tools
|
|
396
|
+
if (toolInfo.source === 'standard') {
|
|
397
|
+
try {
|
|
398
|
+
validateStandardToolSecurity(toolInfo.name, args, {
|
|
399
|
+
allowOutsideCwd: step.allowOutsideCwd,
|
|
400
|
+
allowInsecure: step.allowInsecure,
|
|
401
|
+
});
|
|
402
|
+
} catch (error) {
|
|
403
|
+
messages.push({
|
|
404
|
+
role: 'tool',
|
|
405
|
+
tool_call_id: toolCall.id,
|
|
406
|
+
name: toolCall.function.name,
|
|
407
|
+
content: `Security Error: ${error instanceof Error ? error.message : String(error)}`,
|
|
408
|
+
});
|
|
409
|
+
continue;
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
377
413
|
// Execute the tool as a step
|
|
378
414
|
const toolContext: ExpressionContext = {
|
|
379
415
|
...context,
|
|
380
|
-
|
|
416
|
+
args, // Use args to pass parameters to tool execution
|
|
381
417
|
};
|
|
382
418
|
|
|
383
419
|
const result = await executeStepFn(toolInfo.execution, toolContext);
|
|
@@ -136,14 +136,11 @@ export async function executeShell(
|
|
|
136
136
|
const cwd = step.dir ? ExpressionEvaluator.evaluateString(step.dir, context) : undefined;
|
|
137
137
|
const mergedEnv = Object.keys(env).length > 0 ? { ...Bun.env, ...env } : Bun.env;
|
|
138
138
|
|
|
139
|
-
//
|
|
140
|
-
|
|
141
|
-
// This completely eliminates shell injection risks for simple commands.
|
|
142
|
-
const isSimpleCommand = /^[a-zA-Z0-9_\-./]+(?: [a-zA-Z0-9_\-./]+)*$/.test(command);
|
|
139
|
+
// Shell metacharacters that require a real shell
|
|
140
|
+
const hasShellMetas = /[|&;<>`$!]/.test(command);
|
|
143
141
|
|
|
144
142
|
// Common shell builtins that must run in a shell
|
|
145
|
-
const
|
|
146
|
-
const cmd = splitArgs[0];
|
|
143
|
+
const firstWord = command.trim().split(/\s+/)[0];
|
|
147
144
|
const isBuiltin = [
|
|
148
145
|
'exit',
|
|
149
146
|
'cd',
|
|
@@ -155,19 +152,50 @@ export async function executeShell(
|
|
|
155
152
|
'unalias',
|
|
156
153
|
'eval',
|
|
157
154
|
'set',
|
|
158
|
-
|
|
155
|
+
'true',
|
|
156
|
+
'false',
|
|
157
|
+
].includes(firstWord);
|
|
158
|
+
|
|
159
|
+
const canUseSpawn = !hasShellMetas && !isBuiltin;
|
|
159
160
|
|
|
160
161
|
try {
|
|
161
162
|
let stdoutString = '';
|
|
162
163
|
let stderrString = '';
|
|
163
164
|
let exitCode = 0;
|
|
164
165
|
|
|
165
|
-
if (
|
|
166
|
-
//
|
|
167
|
-
const args =
|
|
168
|
-
|
|
166
|
+
if (canUseSpawn) {
|
|
167
|
+
// Robust splitting that handles single and double quotes
|
|
168
|
+
const args: string[] = [];
|
|
169
|
+
let current = '';
|
|
170
|
+
let inQuote = false;
|
|
171
|
+
let quoteChar = '';
|
|
172
|
+
|
|
173
|
+
for (let i = 0; i < command.length; i++) {
|
|
174
|
+
const char = command[i];
|
|
175
|
+
if ((char === "'" || char === '"') && (i === 0 || command[i - 1] !== '\\')) {
|
|
176
|
+
if (inQuote && char === quoteChar) {
|
|
177
|
+
inQuote = false;
|
|
178
|
+
quoteChar = '';
|
|
179
|
+
} else if (!inQuote) {
|
|
180
|
+
inQuote = true;
|
|
181
|
+
quoteChar = char;
|
|
182
|
+
} else {
|
|
183
|
+
current += char;
|
|
184
|
+
}
|
|
185
|
+
} else if (/\s/.test(char) && !inQuote) {
|
|
186
|
+
if (current) {
|
|
187
|
+
args.push(current);
|
|
188
|
+
current = '';
|
|
189
|
+
}
|
|
190
|
+
} else {
|
|
191
|
+
current += char;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
if (current) args.push(current);
|
|
195
|
+
|
|
196
|
+
if (args.length === 0) throw new Error('Empty command');
|
|
169
197
|
|
|
170
|
-
const proc = Bun.spawn(
|
|
198
|
+
const proc = Bun.spawn(args, {
|
|
171
199
|
cwd,
|
|
172
200
|
env: mergedEnv,
|
|
173
201
|
stdout: 'pipe',
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { afterAll, beforeAll, describe, expect, it, mock, spyOn } from 'bun:test';
|
|
2
|
+
import type { ExpressionContext } from '../expression/evaluator';
|
|
3
|
+
import type { LlmStep, Step } from '../parser/schema';
|
|
4
|
+
import { ConsoleLogger } from '../utils/logger';
|
|
5
|
+
import { OpenAIAdapter } from './llm-adapter';
|
|
6
|
+
import { executeLlmStep } from './llm-executor';
|
|
7
|
+
|
|
8
|
+
describe('Standard Tools Integration', () => {
|
|
9
|
+
const originalOpenAIChat = OpenAIAdapter.prototype.chat;
|
|
10
|
+
|
|
11
|
+
beforeAll(() => {
|
|
12
|
+
// Mocking OpenAI Adapter
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
afterAll(() => {
|
|
16
|
+
OpenAIAdapter.prototype.chat = originalOpenAIChat;
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it('should inject standard tools when useStandardTools is true', async () => {
|
|
20
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock
|
|
21
|
+
let capturedTools: any[] = [];
|
|
22
|
+
|
|
23
|
+
OpenAIAdapter.prototype.chat = mock(async (messages, options) => {
|
|
24
|
+
capturedTools = options.tools || [];
|
|
25
|
+
return {
|
|
26
|
+
message: {
|
|
27
|
+
role: 'assistant',
|
|
28
|
+
content: 'I will read the file',
|
|
29
|
+
tool_calls: [
|
|
30
|
+
{
|
|
31
|
+
id: 'call_1',
|
|
32
|
+
type: 'function',
|
|
33
|
+
function: {
|
|
34
|
+
name: 'read_file',
|
|
35
|
+
arguments: JSON.stringify({ path: 'test.txt' }),
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
},
|
|
40
|
+
usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
|
|
41
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock
|
|
42
|
+
} as any;
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
const step: LlmStep = {
|
|
46
|
+
id: 'l1',
|
|
47
|
+
type: 'llm',
|
|
48
|
+
agent: 'test-agent',
|
|
49
|
+
needs: [],
|
|
50
|
+
prompt: 'read test.txt',
|
|
51
|
+
useStandardTools: true,
|
|
52
|
+
maxIterations: 1,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
56
|
+
const executeStepFn = mock(async (s: Step) => {
|
|
57
|
+
return { status: 'success', output: 'file content' };
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// We catch the "Max iterations reached" error because we set maxIterations to 1
|
|
61
|
+
// but we can still check if tools were injected and the tool call was made.
|
|
62
|
+
try {
|
|
63
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock
|
|
64
|
+
await executeLlmStep(step, context, executeStepFn as any);
|
|
65
|
+
} catch (e) {
|
|
66
|
+
if ((e as Error).message !== 'Max ReAct iterations reached') throw e;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
expect(capturedTools.some((t) => t.function.name === 'read_file')).toBe(true);
|
|
70
|
+
expect(executeStepFn).toHaveBeenCalled();
|
|
71
|
+
const toolStep = executeStepFn.mock.calls[0][0] as Step;
|
|
72
|
+
expect(toolStep.type).toBe('file');
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('should block risky standard tools without allowInsecure', async () => {
|
|
76
|
+
OpenAIAdapter.prototype.chat = mock(async (messages, options) => {
|
|
77
|
+
return {
|
|
78
|
+
message: {
|
|
79
|
+
role: 'assistant',
|
|
80
|
+
content: 'I will run a command',
|
|
81
|
+
tool_calls: [
|
|
82
|
+
{
|
|
83
|
+
id: 'call_2',
|
|
84
|
+
type: 'function',
|
|
85
|
+
function: {
|
|
86
|
+
name: 'run_command',
|
|
87
|
+
arguments: JSON.stringify({ command: 'rm -rf /' }),
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
],
|
|
91
|
+
},
|
|
92
|
+
usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
|
|
93
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock
|
|
94
|
+
} as any;
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
const step: LlmStep = {
|
|
98
|
+
id: 'l1',
|
|
99
|
+
type: 'llm',
|
|
100
|
+
agent: 'test-agent',
|
|
101
|
+
needs: [],
|
|
102
|
+
prompt: 'run risky command',
|
|
103
|
+
useStandardTools: true,
|
|
104
|
+
allowInsecure: false, // Explicitly false
|
|
105
|
+
maxIterations: 2,
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
109
|
+
const executeStepFn = mock(async () => ({ status: 'success', output: '' }));
|
|
110
|
+
|
|
111
|
+
// The execution should not throw, but it should return a tool error message to the LLM
|
|
112
|
+
// However, in our mock, we want to see if executeStepFn was called.
|
|
113
|
+
// Actually, in llm-executor.ts, it pushes a "Security Error" message if check fails and continues loop.
|
|
114
|
+
|
|
115
|
+
let securityErrorMessage = '';
|
|
116
|
+
OpenAIAdapter.prototype.chat = mock(async (messages) => {
|
|
117
|
+
const lastMessage = messages[messages.length - 1];
|
|
118
|
+
if (lastMessage.role === 'tool') {
|
|
119
|
+
securityErrorMessage = lastMessage.content;
|
|
120
|
+
return {
|
|
121
|
+
message: { role: 'assistant', content: 'stop' },
|
|
122
|
+
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
|
|
123
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock
|
|
124
|
+
} as any;
|
|
125
|
+
}
|
|
126
|
+
return {
|
|
127
|
+
message: {
|
|
128
|
+
role: 'assistant',
|
|
129
|
+
tool_calls: [
|
|
130
|
+
{
|
|
131
|
+
id: 'c2',
|
|
132
|
+
type: 'function',
|
|
133
|
+
function: { name: 'run_command', arguments: '{"command":"rm -rf /"}' },
|
|
134
|
+
},
|
|
135
|
+
],
|
|
136
|
+
},
|
|
137
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock
|
|
138
|
+
} as any;
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock
|
|
142
|
+
await executeLlmStep(step, context, executeStepFn as any);
|
|
143
|
+
|
|
144
|
+
expect(securityErrorMessage).toContain('Security Error');
|
|
145
|
+
expect(executeStepFn).not.toHaveBeenCalled();
|
|
146
|
+
});
|
|
147
|
+
});
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { describe, expect, it } from 'bun:test';
|
|
2
|
+
import * as fs from 'node:fs';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
|
+
import { STANDARD_TOOLS, validateStandardToolSecurity } from './standard-tools';
|
|
5
|
+
|
|
6
|
+
describe('Standard Tools Security', () => {
|
|
7
|
+
const options = { allowOutsideCwd: false, allowInsecure: false };
|
|
8
|
+
|
|
9
|
+
it('should allow paths within CWD', () => {
|
|
10
|
+
expect(() => {
|
|
11
|
+
validateStandardToolSecurity('read_file', { path: 'src/cli.ts' }, options);
|
|
12
|
+
}).not.toThrow();
|
|
13
|
+
expect(() => {
|
|
14
|
+
validateStandardToolSecurity('search_files', { pattern: '**/*.ts', dir: 'src' }, options);
|
|
15
|
+
}).not.toThrow();
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('should block paths outside CWD by default', () => {
|
|
19
|
+
expect(() => {
|
|
20
|
+
validateStandardToolSecurity('read_file', { path: '../../etc/passwd' }, options);
|
|
21
|
+
}).toThrow(/Access denied/);
|
|
22
|
+
expect(() => {
|
|
23
|
+
validateStandardToolSecurity('read_file_lines', { path: '../../etc/passwd' }, options);
|
|
24
|
+
}).toThrow(/Access denied/);
|
|
25
|
+
expect(() => {
|
|
26
|
+
validateStandardToolSecurity('search_files', { pattern: '*', dir: '/etc' }, options);
|
|
27
|
+
}).toThrow(/Access denied/);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it('should allow paths outside CWD if allowOutsideCwd is true', () => {
|
|
31
|
+
expect(() => {
|
|
32
|
+
validateStandardToolSecurity(
|
|
33
|
+
'read_file',
|
|
34
|
+
{ path: '../../etc/passwd' },
|
|
35
|
+
{ allowOutsideCwd: true }
|
|
36
|
+
);
|
|
37
|
+
}).not.toThrow();
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it('should block risky commands by default', () => {
|
|
41
|
+
expect(() => {
|
|
42
|
+
validateStandardToolSecurity('run_command', { command: 'ls; rm -rf /' }, options);
|
|
43
|
+
}).toThrow(/Security Error/);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('should allow risky commands if allowInsecure is true', () => {
|
|
47
|
+
expect(() => {
|
|
48
|
+
validateStandardToolSecurity(
|
|
49
|
+
'run_command',
|
|
50
|
+
{ command: 'ls; rm -rf /' },
|
|
51
|
+
{ allowInsecure: true }
|
|
52
|
+
);
|
|
53
|
+
}).not.toThrow();
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
describe('Standard Tools Definition', () => {
|
|
58
|
+
it('should have read_file tool', () => {
|
|
59
|
+
const readTool = STANDARD_TOOLS.find((t) => t.name === 'read_file');
|
|
60
|
+
expect(readTool).toBeDefined();
|
|
61
|
+
expect(readTool?.execution?.type).toBe('file');
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('should have list_files tool with script execution', () => {
|
|
65
|
+
const listTool = STANDARD_TOOLS.find((t) => t.name === 'list_files');
|
|
66
|
+
expect(listTool).toBeDefined();
|
|
67
|
+
expect(listTool?.execution?.type).toBe('script');
|
|
68
|
+
});
|
|
69
|
+
});
|