keystone-cli 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,27 @@
1
- import { describe, expect, test } from 'bun:test';
1
+ import { describe, expect, mock, spyOn, test } from 'bun:test';
2
+ import * as cp from 'node:child_process';
3
+ import * as fs from 'node:fs';
2
4
  import { PassThrough } from 'node:stream';
3
5
  import type { ExpressionContext } from '../expression/evaluator.ts';
4
6
  import type { Step } from '../parser/schema.ts';
7
+ import type { Logger } from '../utils/logger.ts';
5
8
  import { DebugRepl } from './debug-repl.ts';
6
9
 
7
10
  describe('DebugRepl', () => {
8
11
  const mockContext: ExpressionContext = { inputs: { foo: 'bar' } };
9
- // biome-ignore lint/suspicious/noExplicitAny: mock step typing
10
- const mockStep: Step = { id: 'test-step', type: 'shell', run: 'echo "fail"' } as any;
12
+ // mock step typing
13
+ const mockStep: Step = { id: 'test-step', type: 'shell', run: 'echo "fail"' } as unknown as Step;
11
14
  const mockError = new Error('Test Error');
12
15
 
13
16
  test('should resolve with "skip" when user types "skip"', async () => {
14
17
  const input = new PassThrough();
15
18
  const output = new PassThrough();
16
- const mockLogger = { log: () => {}, error: () => {}, warn: () => {} };
19
+ const mockLogger: Logger = {
20
+ log: mock(() => {}),
21
+ error: mock(() => {}),
22
+ warn: mock(() => {}),
23
+ info: mock(() => {}),
24
+ };
17
25
  const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
18
26
 
19
27
  const promise = repl.start();
@@ -30,7 +38,12 @@ describe('DebugRepl', () => {
30
38
  test('should resolve with "retry" when user types "retry"', async () => {
31
39
  const input = new PassThrough();
32
40
  const output = new PassThrough();
33
- const mockLogger = { log: () => {}, error: () => {}, warn: () => {} };
41
+ const mockLogger: Logger = {
42
+ log: mock(() => {}),
43
+ error: mock(() => {}),
44
+ warn: mock(() => {}),
45
+ info: mock(() => {}),
46
+ };
34
47
  const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
35
48
 
36
49
  const promise = repl.start();
@@ -48,7 +61,12 @@ describe('DebugRepl', () => {
48
61
  test('should resolve with "continue_failure" when user types "exit"', async () => {
49
62
  const input = new PassThrough();
50
63
  const output = new PassThrough();
51
- const mockLogger = { log: () => {}, error: () => {}, warn: () => {} };
64
+ const mockLogger: Logger = {
65
+ log: mock(() => {}),
66
+ error: mock(() => {}),
67
+ warn: mock(() => {}),
68
+ info: mock(() => {}),
69
+ };
52
70
  const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
53
71
 
54
72
  const promise = repl.start();
@@ -60,6 +78,137 @@ describe('DebugRepl', () => {
60
78
  expect(result).toEqual({ type: 'continue_failure' });
61
79
  });
62
80
 
81
+ test('should handle "context" command', async () => {
82
+ const input = new PassThrough();
83
+ const output = new PassThrough();
84
+ const mockLogger: Logger = {
85
+ log: mock(() => {}),
86
+ error: mock(() => {}),
87
+ warn: mock(() => {}),
88
+ info: mock(() => {}),
89
+ };
90
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
91
+
92
+ repl.start();
93
+
94
+ await new Promise((r) => setTimeout(r, 10));
95
+ input.write('context\n');
96
+ await new Promise((r) => setTimeout(r, 10));
97
+
98
+ expect(mockLogger.log).toHaveBeenCalled();
99
+ // biome-ignore lint/suspicious/noExplicitAny: accessing mock property
100
+ const lastCall = (mockLogger.log as unknown as any).mock.calls.find((call: any[]) =>
101
+ String(call[0]).includes('foo')
102
+ );
103
+ expect(lastCall?.[0]).toContain('bar');
104
+ input.write('exit\n');
105
+ });
106
+
107
+ test('should handle "eval" command', async () => {
108
+ const input = new PassThrough();
109
+ const output = new PassThrough();
110
+ const mockLogger: Logger = {
111
+ log: mock(() => {}),
112
+ error: mock(() => {}),
113
+ warn: mock(() => {}),
114
+ info: mock(() => {}),
115
+ };
116
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
117
+
118
+ repl.start();
119
+
120
+ await new Promise((r) => setTimeout(r, 10));
121
+ input.write('eval inputs.foo\n');
122
+ await new Promise((r) => setTimeout(r, 10));
123
+
124
+ expect(mockLogger.log).toHaveBeenCalledWith('bar');
125
+ input.write('exit\n');
126
+ });
127
+
128
+ test('should handle "eval" command with error', async () => {
129
+ const input = new PassThrough();
130
+ const output = new PassThrough();
131
+ const mockLogger: Logger = {
132
+ log: mock(() => {}),
133
+ error: mock(() => {}),
134
+ warn: mock(() => {}),
135
+ info: mock(() => {}),
136
+ };
137
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
138
+
139
+ repl.start();
140
+
141
+ await new Promise((r) => setTimeout(r, 10));
142
+ input.write('eval nonExistent.bar\n');
143
+ await new Promise((r) => setTimeout(r, 10));
144
+
145
+ expect(mockLogger.error).toHaveBeenCalled();
146
+ input.write('exit\n');
147
+ });
148
+
149
+ test('should handle "eval" command without arguments', async () => {
150
+ const input = new PassThrough();
151
+ const output = new PassThrough();
152
+ const mockLogger: Logger = {
153
+ log: mock(() => {}),
154
+ error: mock(() => {}),
155
+ warn: mock(() => {}),
156
+ info: mock(() => {}),
157
+ };
158
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
159
+
160
+ repl.start();
161
+
162
+ await new Promise((r) => setTimeout(r, 10));
163
+ input.write('eval\n');
164
+ await new Promise((r) => setTimeout(r, 10));
165
+
166
+ expect(mockLogger.log).toHaveBeenCalledWith('Usage: eval <expression>');
167
+ input.write('exit\n');
168
+ });
169
+
170
+ test('should handle unknown command', async () => {
171
+ const input = new PassThrough();
172
+ const output = new PassThrough();
173
+ const mockLogger: Logger = {
174
+ log: mock(() => {}),
175
+ error: mock(() => {}),
176
+ warn: mock(() => {}),
177
+ info: mock(() => {}),
178
+ };
179
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
180
+
181
+ repl.start();
182
+
183
+ await new Promise((r) => setTimeout(r, 10));
184
+ input.write('unknown_cmd\n');
185
+ await new Promise((r) => setTimeout(r, 10));
186
+
187
+ expect(mockLogger.log).toHaveBeenCalledWith('Unknown command: unknown_cmd');
188
+ input.write('exit\n');
189
+ });
190
+
191
+ test('should handle empty input', async () => {
192
+ const input = new PassThrough();
193
+ const output = new PassThrough();
194
+ const mockLogger: Logger = {
195
+ log: mock(() => {}),
196
+ error: mock(() => {}),
197
+ warn: mock(() => {}),
198
+ info: mock(() => {}),
199
+ };
200
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
201
+
202
+ repl.start();
203
+
204
+ await new Promise((r) => setTimeout(r, 10));
205
+ input.write('\n');
206
+ await new Promise((r) => setTimeout(r, 10));
207
+
208
+ expect(mockLogger.log).not.toHaveBeenCalledWith('Unknown command: ');
209
+ input.write('exit\n');
210
+ });
211
+
63
212
  test('should parse shell commands correctly', () => {
64
213
  // We import the function dynamically to test it, or we assume it's exported
65
214
  const { parseShellCommand } = require('./debug-repl.ts');
@@ -71,4 +220,89 @@ describe('DebugRepl', () => {
71
220
  expect(parseShellCommand('editor -a -b -c')).toEqual(['editor', '-a', '-b', '-c']);
72
221
  expect(parseShellCommand(' spaced command ')).toEqual(['spaced', 'command']);
73
222
  });
223
+
224
+ test('should handle "edit" command and update step', async () => {
225
+ const input = new PassThrough();
226
+ const output = new PassThrough();
227
+ const mockLogger: Logger = {
228
+ log: mock(() => {}),
229
+ error: mock(() => {}),
230
+ warn: mock(() => {}),
231
+ info: mock(() => {}),
232
+ };
233
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
234
+
235
+ const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
236
+ // biome-ignore lint/suspicious/noExplicitAny: mocking child_process
237
+ () => ({ error: null, status: 0 }) as any
238
+ );
239
+ const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
240
+ const updatedStep = { ...mockStep, run: 'echo "fixed"' };
241
+ const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation((() =>
242
+ JSON.stringify(updatedStep)) as unknown as typeof fs.readFileSync);
243
+ const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
244
+ const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
245
+
246
+ try {
247
+ repl.start();
248
+ await new Promise((r) => setTimeout(r, 50));
249
+ input.write('edit\n');
250
+ await new Promise((r) => setTimeout(r, 50));
251
+
252
+ expect(mockLogger.log).toHaveBeenCalledWith(
253
+ expect.stringContaining('Step definition updated')
254
+ );
255
+
256
+ input.write('retry\n');
257
+ await new Promise((r) => setTimeout(r, 50));
258
+ } finally {
259
+ spySpawnSync.mockRestore();
260
+ spyWriteFileSync.mockRestore();
261
+ spyReadFileSync.mockRestore();
262
+ spyExistsSync.mockRestore();
263
+ spyUnlinkSync.mockRestore();
264
+ }
265
+ });
266
+
267
+ test('should handle "edit" command with parse error', async () => {
268
+ const input = new PassThrough();
269
+ const output = new PassThrough();
270
+ const mockLogger: Logger = {
271
+ log: mock(() => {}),
272
+ error: mock(() => {}),
273
+ warn: mock(() => {}),
274
+ info: mock(() => {}),
275
+ };
276
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
277
+
278
+ const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
279
+ // biome-ignore lint/suspicious/noExplicitAny: mocking child_process
280
+ () => ({ error: null, status: 0 }) as any
281
+ );
282
+ const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
283
+ const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation(
284
+ (() => 'invalid json') as unknown as typeof fs.readFileSync
285
+ );
286
+ const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
287
+ const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
288
+
289
+ try {
290
+ repl.start();
291
+ await new Promise((r) => setTimeout(r, 50));
292
+ input.write('edit\n');
293
+ await new Promise((r) => setTimeout(r, 50));
294
+
295
+ expect(mockLogger.error).toHaveBeenCalledWith(
296
+ expect.stringContaining('Failed to parse JSON')
297
+ );
298
+ input.write('exit\n');
299
+ await new Promise((r) => setTimeout(r, 50));
300
+ } finally {
301
+ spySpawnSync.mockRestore();
302
+ spyWriteFileSync.mockRestore();
303
+ spyReadFileSync.mockRestore();
304
+ spyExistsSync.mockRestore();
305
+ spyUnlinkSync.mockRestore();
306
+ }
307
+ });
74
308
  });
@@ -105,7 +105,9 @@ describe('AnthropicAdapter', () => {
105
105
  // @ts-ignore
106
106
  const fetchMock = global.fetch as MockFetch;
107
107
  // @ts-ignore
108
- const [url, init] = fetchMock.mock.calls[0];
108
+ // @ts-ignore
109
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
110
+ const [url, init] = fetchMock.mock.calls[0] as [string, any];
109
111
 
110
112
  expect(url).toBe('https://api.anthropic.com/v1/messages');
111
113
  expect(init.headers['x-api-key']).toBe('fake-anthropic-key');
@@ -179,7 +181,8 @@ describe('AnthropicAdapter', () => {
179
181
  ]);
180
182
 
181
183
  // @ts-ignore
182
- const init = global.fetch.mock.calls[0][1];
184
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
185
+ const init = global.fetch.mock.calls[0][1] as any;
183
186
  const body = JSON.parse(init.body);
184
187
  expect(body.messages[0].role).toBe('assistant');
185
188
  expect(body.messages[0].content).toHaveLength(2);
@@ -208,7 +211,8 @@ describe('AnthropicAdapter', () => {
208
211
  ]);
209
212
 
210
213
  // @ts-ignore
211
- const init = global.fetch.mock.calls[0][1];
214
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
215
+ const init = global.fetch.mock.calls[0][1] as any;
212
216
  const body = JSON.parse(init.body);
213
217
  expect(body.messages[0].role).toBe('user');
214
218
  expect(body.messages[0].content[0]).toEqual({
@@ -255,7 +259,9 @@ describe('CopilotAdapter', () => {
255
259
  // @ts-ignore
256
260
  const fetchMock = global.fetch as MockFetch;
257
261
  // @ts-ignore
258
- const [url, init] = fetchMock.mock.calls[0];
262
+ // @ts-ignore
263
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
264
+ const [url, init] = fetchMock.mock.calls[0] as [string, any];
259
265
  expect(url).toBe('https://api.githubcopilot.com/chat/completions');
260
266
  expect(init.headers.Authorization).toBe('Bearer mock-token');
261
267
  spy.mockRestore();
@@ -9,13 +9,14 @@ import { RedactionBuffer, Redactor } from '../utils/redactor';
9
9
  import { type LLMMessage, getAdapter } from './llm-adapter';
10
10
  import { MCPClient } from './mcp-client';
11
11
  import type { MCPManager, MCPServerConfig } from './mcp-manager';
12
+ import { STANDARD_TOOLS, validateStandardToolSecurity } from './standard-tools';
12
13
  import type { StepResult } from './step-executor';
13
14
 
14
15
  interface ToolDefinition {
15
16
  name: string;
16
17
  description?: string;
17
18
  parameters: unknown;
18
- source: 'agent' | 'step' | 'mcp';
19
+ source: 'agent' | 'step' | 'mcp' | 'standard';
19
20
  execution?: Step;
20
21
  mcpClient?: MCPClient;
21
22
  }
@@ -105,7 +106,24 @@ export async function executeLlmStep(
105
106
  }
106
107
  }
107
108
 
108
- // 3. Add MCP tools
109
+ // 3. Add Standard tools
110
+ if (step.useStandardTools) {
111
+ for (const tool of STANDARD_TOOLS) {
112
+ allTools.push({
113
+ name: tool.name,
114
+ description: tool.description,
115
+ parameters: tool.parameters || {
116
+ type: 'object',
117
+ properties: {},
118
+ additionalProperties: true,
119
+ },
120
+ source: 'standard',
121
+ execution: tool.execution,
122
+ });
123
+ }
124
+ }
125
+
126
+ // 4. Add MCP tools
109
127
  const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
110
128
  if (step.useGlobalMcp && mcpManager) {
111
129
  const globalServers = mcpManager.getGlobalServers();
@@ -374,10 +392,28 @@ export async function executeLlmStep(
374
392
  });
375
393
  }
376
394
  } else if (toolInfo.execution) {
395
+ // Security validation for standard tools
396
+ if (toolInfo.source === 'standard') {
397
+ try {
398
+ validateStandardToolSecurity(toolInfo.name, args, {
399
+ allowOutsideCwd: step.allowOutsideCwd,
400
+ allowInsecure: step.allowInsecure,
401
+ });
402
+ } catch (error) {
403
+ messages.push({
404
+ role: 'tool',
405
+ tool_call_id: toolCall.id,
406
+ name: toolCall.function.name,
407
+ content: `Security Error: ${error instanceof Error ? error.message : String(error)}`,
408
+ });
409
+ continue;
410
+ }
411
+ }
412
+
377
413
  // Execute the tool as a step
378
414
  const toolContext: ExpressionContext = {
379
415
  ...context,
380
- item: args, // Use item to pass args to tool execution
416
+ args, // Use args to pass parameters to tool execution
381
417
  };
382
418
 
383
419
  const result = await executeStepFn(toolInfo.execution, toolContext);
@@ -136,14 +136,11 @@ export async function executeShell(
136
136
  const cwd = step.dir ? ExpressionEvaluator.evaluateString(step.dir, context) : undefined;
137
137
  const mergedEnv = Object.keys(env).length > 0 ? { ...Bun.env, ...env } : Bun.env;
138
138
 
139
- // Safe Fast Path: If command contains only safe characters (alphanumeric, -, _, ., /) and spaces,
140
- // we can split it and execute directly without a shell.
141
- // This completely eliminates shell injection risks for simple commands.
142
- const isSimpleCommand = /^[a-zA-Z0-9_\-./]+(?: [a-zA-Z0-9_\-./]+)*$/.test(command);
139
+ // Shell metacharacters that require a real shell
140
+ const hasShellMetas = /[|&;<>`$!]/.test(command);
143
141
 
144
142
  // Common shell builtins that must run in a shell
145
- const splitArgs = command.split(/\s+/);
146
- const cmd = splitArgs[0];
143
+ const firstWord = command.trim().split(/\s+/)[0];
147
144
  const isBuiltin = [
148
145
  'exit',
149
146
  'cd',
@@ -155,19 +152,50 @@ export async function executeShell(
155
152
  'unalias',
156
153
  'eval',
157
154
  'set',
158
- ].includes(cmd);
155
+ 'true',
156
+ 'false',
157
+ ].includes(firstWord);
158
+
159
+ const canUseSpawn = !hasShellMetas && !isBuiltin;
159
160
 
160
161
  try {
161
162
  let stdoutString = '';
162
163
  let stderrString = '';
163
164
  let exitCode = 0;
164
165
 
165
- if (isSimpleCommand && !isBuiltin) {
166
- // split by spaces
167
- const args = splitArgs.slice(1);
168
- if (!cmd) throw new Error('Empty command');
166
+ if (canUseSpawn) {
167
+ // Robust splitting that handles single and double quotes
168
+ const args: string[] = [];
169
+ let current = '';
170
+ let inQuote = false;
171
+ let quoteChar = '';
172
+
173
+ for (let i = 0; i < command.length; i++) {
174
+ const char = command[i];
175
+ if ((char === "'" || char === '"') && (i === 0 || command[i - 1] !== '\\')) {
176
+ if (inQuote && char === quoteChar) {
177
+ inQuote = false;
178
+ quoteChar = '';
179
+ } else if (!inQuote) {
180
+ inQuote = true;
181
+ quoteChar = char;
182
+ } else {
183
+ current += char;
184
+ }
185
+ } else if (/\s/.test(char) && !inQuote) {
186
+ if (current) {
187
+ args.push(current);
188
+ current = '';
189
+ }
190
+ } else {
191
+ current += char;
192
+ }
193
+ }
194
+ if (current) args.push(current);
195
+
196
+ if (args.length === 0) throw new Error('Empty command');
169
197
 
170
- const proc = Bun.spawn([cmd, ...args], {
198
+ const proc = Bun.spawn(args, {
171
199
  cwd,
172
200
  env: mergedEnv,
173
201
  stdout: 'pipe',
@@ -0,0 +1,147 @@
1
+ import { afterAll, beforeAll, describe, expect, it, mock, spyOn } from 'bun:test';
2
+ import type { ExpressionContext } from '../expression/evaluator';
3
+ import type { LlmStep, Step } from '../parser/schema';
4
+ import { ConsoleLogger } from '../utils/logger';
5
+ import { OpenAIAdapter } from './llm-adapter';
6
+ import { executeLlmStep } from './llm-executor';
7
+
8
+ describe('Standard Tools Integration', () => {
9
+ const originalOpenAIChat = OpenAIAdapter.prototype.chat;
10
+
11
+ beforeAll(() => {
12
+ // Mocking OpenAI Adapter
13
+ });
14
+
15
+ afterAll(() => {
16
+ OpenAIAdapter.prototype.chat = originalOpenAIChat;
17
+ });
18
+
19
+ it('should inject standard tools when useStandardTools is true', async () => {
20
+ // biome-ignore lint/suspicious/noExplicitAny: mock
21
+ let capturedTools: any[] = [];
22
+
23
+ OpenAIAdapter.prototype.chat = mock(async (messages, options) => {
24
+ capturedTools = options.tools || [];
25
+ return {
26
+ message: {
27
+ role: 'assistant',
28
+ content: 'I will read the file',
29
+ tool_calls: [
30
+ {
31
+ id: 'call_1',
32
+ type: 'function',
33
+ function: {
34
+ name: 'read_file',
35
+ arguments: JSON.stringify({ path: 'test.txt' }),
36
+ },
37
+ },
38
+ ],
39
+ },
40
+ usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
41
+ // biome-ignore lint/suspicious/noExplicitAny: mock
42
+ } as any;
43
+ });
44
+
45
+ const step: LlmStep = {
46
+ id: 'l1',
47
+ type: 'llm',
48
+ agent: 'test-agent',
49
+ needs: [],
50
+ prompt: 'read test.txt',
51
+ useStandardTools: true,
52
+ maxIterations: 1,
53
+ };
54
+
55
+ const context: ExpressionContext = { inputs: {}, steps: {} };
56
+ const executeStepFn = mock(async (s: Step) => {
57
+ return { status: 'success', output: 'file content' };
58
+ });
59
+
60
+ // We catch the "Max iterations reached" error because we set maxIterations to 1
61
+ // but we can still check if tools were injected and the tool call was made.
62
+ try {
63
+ // biome-ignore lint/suspicious/noExplicitAny: mock
64
+ await executeLlmStep(step, context, executeStepFn as any);
65
+ } catch (e) {
66
+ if ((e as Error).message !== 'Max ReAct iterations reached') throw e;
67
+ }
68
+
69
+ expect(capturedTools.some((t) => t.function.name === 'read_file')).toBe(true);
70
+ expect(executeStepFn).toHaveBeenCalled();
71
+ const toolStep = executeStepFn.mock.calls[0][0] as Step;
72
+ expect(toolStep.type).toBe('file');
73
+ });
74
+
75
+ it('should block risky standard tools without allowInsecure', async () => {
76
+ OpenAIAdapter.prototype.chat = mock(async (messages, options) => {
77
+ return {
78
+ message: {
79
+ role: 'assistant',
80
+ content: 'I will run a command',
81
+ tool_calls: [
82
+ {
83
+ id: 'call_2',
84
+ type: 'function',
85
+ function: {
86
+ name: 'run_command',
87
+ arguments: JSON.stringify({ command: 'rm -rf /' }),
88
+ },
89
+ },
90
+ ],
91
+ },
92
+ usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
93
+ // biome-ignore lint/suspicious/noExplicitAny: mock
94
+ } as any;
95
+ });
96
+
97
+ const step: LlmStep = {
98
+ id: 'l1',
99
+ type: 'llm',
100
+ agent: 'test-agent',
101
+ needs: [],
102
+ prompt: 'run risky command',
103
+ useStandardTools: true,
104
+ allowInsecure: false, // Explicitly false
105
+ maxIterations: 2,
106
+ };
107
+
108
+ const context: ExpressionContext = { inputs: {}, steps: {} };
109
+ const executeStepFn = mock(async () => ({ status: 'success', output: '' }));
110
+
111
+ // The execution should not throw, but it should return a tool error message to the LLM
112
+ // However, in our mock, we want to see if executeStepFn was called.
113
+ // Actually, in llm-executor.ts, it pushes a "Security Error" message if check fails and continues loop.
114
+
115
+ let securityErrorMessage = '';
116
+ OpenAIAdapter.prototype.chat = mock(async (messages) => {
117
+ const lastMessage = messages[messages.length - 1];
118
+ if (lastMessage.role === 'tool') {
119
+ securityErrorMessage = lastMessage.content;
120
+ return {
121
+ message: { role: 'assistant', content: 'stop' },
122
+ usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
123
+ // biome-ignore lint/suspicious/noExplicitAny: mock
124
+ } as any;
125
+ }
126
+ return {
127
+ message: {
128
+ role: 'assistant',
129
+ tool_calls: [
130
+ {
131
+ id: 'c2',
132
+ type: 'function',
133
+ function: { name: 'run_command', arguments: '{"command":"rm -rf /"}' },
134
+ },
135
+ ],
136
+ },
137
+ // biome-ignore lint/suspicious/noExplicitAny: mock
138
+ } as any;
139
+ });
140
+
141
+ // biome-ignore lint/suspicious/noExplicitAny: mock
142
+ await executeLlmStep(step, context, executeStepFn as any);
143
+
144
+ expect(securityErrorMessage).toContain('Security Error');
145
+ expect(executeStepFn).not.toHaveBeenCalled();
146
+ });
147
+ });
@@ -0,0 +1,69 @@
1
+ import { describe, expect, it } from 'bun:test';
2
+ import * as fs from 'node:fs';
3
+ import * as path from 'node:path';
4
+ import { STANDARD_TOOLS, validateStandardToolSecurity } from './standard-tools';
5
+
6
+ describe('Standard Tools Security', () => {
7
+ const options = { allowOutsideCwd: false, allowInsecure: false };
8
+
9
+ it('should allow paths within CWD', () => {
10
+ expect(() => {
11
+ validateStandardToolSecurity('read_file', { path: 'src/cli.ts' }, options);
12
+ }).not.toThrow();
13
+ expect(() => {
14
+ validateStandardToolSecurity('search_files', { pattern: '**/*.ts', dir: 'src' }, options);
15
+ }).not.toThrow();
16
+ });
17
+
18
+ it('should block paths outside CWD by default', () => {
19
+ expect(() => {
20
+ validateStandardToolSecurity('read_file', { path: '../../etc/passwd' }, options);
21
+ }).toThrow(/Access denied/);
22
+ expect(() => {
23
+ validateStandardToolSecurity('read_file_lines', { path: '../../etc/passwd' }, options);
24
+ }).toThrow(/Access denied/);
25
+ expect(() => {
26
+ validateStandardToolSecurity('search_files', { pattern: '*', dir: '/etc' }, options);
27
+ }).toThrow(/Access denied/);
28
+ });
29
+
30
+ it('should allow paths outside CWD if allowOutsideCwd is true', () => {
31
+ expect(() => {
32
+ validateStandardToolSecurity(
33
+ 'read_file',
34
+ { path: '../../etc/passwd' },
35
+ { allowOutsideCwd: true }
36
+ );
37
+ }).not.toThrow();
38
+ });
39
+
40
+ it('should block risky commands by default', () => {
41
+ expect(() => {
42
+ validateStandardToolSecurity('run_command', { command: 'ls; rm -rf /' }, options);
43
+ }).toThrow(/Security Error/);
44
+ });
45
+
46
+ it('should allow risky commands if allowInsecure is true', () => {
47
+ expect(() => {
48
+ validateStandardToolSecurity(
49
+ 'run_command',
50
+ { command: 'ls; rm -rf /' },
51
+ { allowInsecure: true }
52
+ );
53
+ }).not.toThrow();
54
+ });
55
+ });
56
+
57
+ describe('Standard Tools Definition', () => {
58
+ it('should have read_file tool', () => {
59
+ const readTool = STANDARD_TOOLS.find((t) => t.name === 'read_file');
60
+ expect(readTool).toBeDefined();
61
+ expect(readTool?.execution?.type).toBe('file');
62
+ });
63
+
64
+ it('should have list_files tool with script execution', () => {
65
+ const listTool = STANDARD_TOOLS.find((t) => t.name === 'list_files');
66
+ expect(listTool).toBeDefined();
67
+ expect(listTool?.execution?.type).toBe('script');
68
+ });
69
+ });