keystone-cli 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -260,6 +260,23 @@ finally:
260
260
  type: shell
261
261
  run: echo "Workflow finished"
262
262
 
263
+ ### Expression Syntax
264
+
265
+ Keystone uses `${{ }}` syntax for dynamic values. Expressions are evaluated using a safe AST parser.
266
+
267
+ - `${{ inputs.name }}`: Access workflow inputs.
268
+ - `${{ steps.id.output }}`: Access the raw output of a previous step.
269
+ - `${{ steps.id.outputs.field }}`: Access specific fields if the output is an object.
270
+ - `${{ steps.id.status }}`: Get the execution status of a step (`'success'`, `'failed'`, etc.).
271
+ - `${{ item }}`: Access the current item in a `foreach` loop.
272
+ - `${{ args.name }}`: Access tool arguments (available ONLY inside agent tool execution steps).
273
+ - `${{ secrets.NAME }}`: Access redacted secrets.
274
+ - `${{ env.NAME }}`: Access environment variables.
275
+
276
+ Standard JavaScript-like expressions are supported: `${{ steps.build.status == 'success' ? '🚀' : '❌' }}`.
277
+
278
+ ---
279
+
263
280
  outputs:
264
281
  slack_message: ${{ steps.notify.output }}
265
282
  ```
@@ -274,8 +291,11 @@ Keystone supports several specialized step types:
274
291
  - `llm`: Prompt an agent and get structured or unstructured responses. Supports `schema` (JSON Schema) for structured output.
275
292
  - `allowClarification`: Boolean (default `false`). If `true`, allows the LLM to ask clarifying questions back to the user or suspend the workflow if no human is available.
276
293
  - `maxIterations`: Number (default `10`). Maximum number of tool-calling loops allowed for the agent.
294
+ - `allowInsecure`: Boolean (default `false`). Set `true` to allow risky tool execution.
295
+ - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow tools to access files outside of the current working directory.
277
296
  - `request`: Make HTTP requests (GET, POST, etc.).
278
297
  - `file`: Read, write, or append to files.
298
+ - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow reading/writing files outside of the current working directory.
279
299
  - `human`: Pause execution for manual confirmation or text input.
280
300
  - `inputType: confirm`: Simple Enter-to-continue prompt.
281
301
  - `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
@@ -352,6 +372,8 @@ You are a technical communications expert. Your goal is to take technical output
352
372
 
353
373
  Agents can be equipped with tools, which are essentially workflow steps they can choose to execute. You can define tools in the agent definition, or directly in an LLM step within a workflow.
354
374
 
375
+ Tool arguments are passed to the tool's execution step via the `args` variable.
376
+
355
377
  **`.keystone/workflows/agents/developer.md`**
356
378
  ```markdown
357
379
  ---
@@ -363,6 +385,18 @@ tools:
363
385
  id: list-files-tool
364
386
  type: shell
365
387
  run: ls -F
388
+ - name: read_file
389
+ description: Read a specific file
390
+ parameters:
391
+ type: object
392
+ properties:
393
+ path: { type: string }
394
+ required: [path]
395
+ execution:
396
+ id: read-file-tool
397
+ type: file
398
+ op: read
399
+ path: ${{ args.path }}
366
400
  ---
367
401
  You are a software developer. You can use tools to explore the codebase.
368
402
  ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "keystone-cli",
3
- "version": "0.6.1",
3
+ "version": "0.7.0",
4
4
  "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
5
5
  "type": "module",
6
6
  "bin": {
@@ -29,6 +29,7 @@ export interface ExpressionContext {
29
29
  secrets?: Record<string, string>;
30
30
  steps?: Record<string, { output?: unknown; outputs?: Record<string, unknown>; status?: string }>;
31
31
  item?: unknown;
32
+ args?: unknown;
32
33
  index?: number;
33
34
  env?: Record<string, string>;
34
35
  output?: unknown;
@@ -295,6 +296,7 @@ export class ExpressionEvaluator {
295
296
  secrets: context.secrets || {},
296
297
  steps: context.steps || {},
297
298
  item: context.item,
299
+ args: context.args,
298
300
  index: context.index,
299
301
  env: context.env || {},
300
302
  stdout: contextAsRecord.stdout, // For transform expressions
@@ -95,6 +95,9 @@ const LlmStepSchema = BaseStepSchema.extend({
95
95
  ])
96
96
  )
97
97
  .optional(),
98
+ useStandardTools: z.boolean().optional(),
99
+ allowOutsideCwd: z.boolean().optional(),
100
+ allowInsecure: z.boolean().optional(),
98
101
  });
99
102
 
100
103
  const WorkflowStepSchema = BaseStepSchema.extend({
@@ -9,13 +9,14 @@ import { RedactionBuffer, Redactor } from '../utils/redactor';
9
9
  import { type LLMMessage, getAdapter } from './llm-adapter';
10
10
  import { MCPClient } from './mcp-client';
11
11
  import type { MCPManager, MCPServerConfig } from './mcp-manager';
12
+ import { STANDARD_TOOLS, validateStandardToolSecurity } from './standard-tools';
12
13
  import type { StepResult } from './step-executor';
13
14
 
14
15
  interface ToolDefinition {
15
16
  name: string;
16
17
  description?: string;
17
18
  parameters: unknown;
18
- source: 'agent' | 'step' | 'mcp';
19
+ source: 'agent' | 'step' | 'mcp' | 'standard';
19
20
  execution?: Step;
20
21
  mcpClient?: MCPClient;
21
22
  }
@@ -105,7 +106,24 @@ export async function executeLlmStep(
105
106
  }
106
107
  }
107
108
 
108
- // 3. Add MCP tools
109
+ // 3. Add Standard tools
110
+ if (step.useStandardTools) {
111
+ for (const tool of STANDARD_TOOLS) {
112
+ allTools.push({
113
+ name: tool.name,
114
+ description: tool.description,
115
+ parameters: tool.parameters || {
116
+ type: 'object',
117
+ properties: {},
118
+ additionalProperties: true,
119
+ },
120
+ source: 'standard',
121
+ execution: tool.execution,
122
+ });
123
+ }
124
+ }
125
+
126
+ // 4. Add MCP tools
109
127
  const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
110
128
  if (step.useGlobalMcp && mcpManager) {
111
129
  const globalServers = mcpManager.getGlobalServers();
@@ -374,10 +392,28 @@ export async function executeLlmStep(
374
392
  });
375
393
  }
376
394
  } else if (toolInfo.execution) {
395
+ // Security validation for standard tools
396
+ if (toolInfo.source === 'standard') {
397
+ try {
398
+ validateStandardToolSecurity(toolInfo.name, args, {
399
+ allowOutsideCwd: step.allowOutsideCwd,
400
+ allowInsecure: step.allowInsecure,
401
+ });
402
+ } catch (error) {
403
+ messages.push({
404
+ role: 'tool',
405
+ tool_call_id: toolCall.id,
406
+ name: toolCall.function.name,
407
+ content: `Security Error: ${error instanceof Error ? error.message : String(error)}`,
408
+ });
409
+ continue;
410
+ }
411
+ }
412
+
377
413
  // Execute the tool as a step
378
414
  const toolContext: ExpressionContext = {
379
415
  ...context,
380
- item: args, // Use item to pass args to tool execution
416
+ args, // Use args to pass parameters to tool execution
381
417
  };
382
418
 
383
419
  const result = await executeStepFn(toolInfo.execution, toolContext);
@@ -136,14 +136,11 @@ export async function executeShell(
136
136
  const cwd = step.dir ? ExpressionEvaluator.evaluateString(step.dir, context) : undefined;
137
137
  const mergedEnv = Object.keys(env).length > 0 ? { ...Bun.env, ...env } : Bun.env;
138
138
 
139
- // Safe Fast Path: If command contains only safe characters (alphanumeric, -, _, ., /) and spaces,
140
- // we can split it and execute directly without a shell.
141
- // This completely eliminates shell injection risks for simple commands.
142
- const isSimpleCommand = /^[a-zA-Z0-9_\-./]+(?: [a-zA-Z0-9_\-./]+)*$/.test(command);
139
+ // Shell metacharacters that require a real shell
140
+ const hasShellMetas = /[|&;<>`$!]/.test(command);
143
141
 
144
142
  // Common shell builtins that must run in a shell
145
- const splitArgs = command.split(/\s+/);
146
- const cmd = splitArgs[0];
143
+ const firstWord = command.trim().split(/\s+/)[0];
147
144
  const isBuiltin = [
148
145
  'exit',
149
146
  'cd',
@@ -155,19 +152,50 @@ export async function executeShell(
155
152
  'unalias',
156
153
  'eval',
157
154
  'set',
158
- ].includes(cmd);
155
+ 'true',
156
+ 'false',
157
+ ].includes(firstWord);
158
+
159
+ const canUseSpawn = !hasShellMetas && !isBuiltin;
159
160
 
160
161
  try {
161
162
  let stdoutString = '';
162
163
  let stderrString = '';
163
164
  let exitCode = 0;
164
165
 
165
- if (isSimpleCommand && !isBuiltin) {
166
- // split by spaces
167
- const args = splitArgs.slice(1);
168
- if (!cmd) throw new Error('Empty command');
166
+ if (canUseSpawn) {
167
+ // Robust splitting that handles single and double quotes
168
+ const args: string[] = [];
169
+ let current = '';
170
+ let inQuote = false;
171
+ let quoteChar = '';
172
+
173
+ for (let i = 0; i < command.length; i++) {
174
+ const char = command[i];
175
+ if ((char === "'" || char === '"') && (i === 0 || command[i - 1] !== '\\')) {
176
+ if (inQuote && char === quoteChar) {
177
+ inQuote = false;
178
+ quoteChar = '';
179
+ } else if (!inQuote) {
180
+ inQuote = true;
181
+ quoteChar = char;
182
+ } else {
183
+ current += char;
184
+ }
185
+ } else if (/\s/.test(char) && !inQuote) {
186
+ if (current) {
187
+ args.push(current);
188
+ current = '';
189
+ }
190
+ } else {
191
+ current += char;
192
+ }
193
+ }
194
+ if (current) args.push(current);
195
+
196
+ if (args.length === 0) throw new Error('Empty command');
169
197
 
170
- const proc = Bun.spawn([cmd, ...args], {
198
+ const proc = Bun.spawn(args, {
171
199
  cwd,
172
200
  env: mergedEnv,
173
201
  stdout: 'pipe',
@@ -0,0 +1,147 @@
1
+ import { afterAll, beforeAll, describe, expect, it, mock, spyOn } from 'bun:test';
2
+ import type { ExpressionContext } from '../expression/evaluator';
3
+ import type { LlmStep, Step } from '../parser/schema';
4
+ import { ConsoleLogger } from '../utils/logger';
5
+ import { OpenAIAdapter } from './llm-adapter';
6
+ import { executeLlmStep } from './llm-executor';
7
+
8
+ describe('Standard Tools Integration', () => {
9
+ const originalOpenAIChat = OpenAIAdapter.prototype.chat;
10
+
11
+ beforeAll(() => {
12
+ // Mocking OpenAI Adapter
13
+ });
14
+
15
+ afterAll(() => {
16
+ OpenAIAdapter.prototype.chat = originalOpenAIChat;
17
+ });
18
+
19
+ it('should inject standard tools when useStandardTools is true', async () => {
20
+ // biome-ignore lint/suspicious/noExplicitAny: mock
21
+ let capturedTools: any[] = [];
22
+
23
+ OpenAIAdapter.prototype.chat = mock(async (messages, options) => {
24
+ capturedTools = options.tools || [];
25
+ return {
26
+ message: {
27
+ role: 'assistant',
28
+ content: 'I will read the file',
29
+ tool_calls: [
30
+ {
31
+ id: 'call_1',
32
+ type: 'function',
33
+ function: {
34
+ name: 'read_file',
35
+ arguments: JSON.stringify({ path: 'test.txt' }),
36
+ },
37
+ },
38
+ ],
39
+ },
40
+ usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
41
+ // biome-ignore lint/suspicious/noExplicitAny: mock
42
+ } as any;
43
+ });
44
+
45
+ const step: LlmStep = {
46
+ id: 'l1',
47
+ type: 'llm',
48
+ agent: 'test-agent',
49
+ needs: [],
50
+ prompt: 'read test.txt',
51
+ useStandardTools: true,
52
+ maxIterations: 1,
53
+ };
54
+
55
+ const context: ExpressionContext = { inputs: {}, steps: {} };
56
+ const executeStepFn = mock(async (s: Step) => {
57
+ return { status: 'success', output: 'file content' };
58
+ });
59
+
60
+ // We catch the "Max iterations reached" error because we set maxIterations to 1
61
+ // but we can still check if tools were injected and the tool call was made.
62
+ try {
63
+ // biome-ignore lint/suspicious/noExplicitAny: mock
64
+ await executeLlmStep(step, context, executeStepFn as any);
65
+ } catch (e) {
66
+ if ((e as Error).message !== 'Max ReAct iterations reached') throw e;
67
+ }
68
+
69
+ expect(capturedTools.some((t) => t.function.name === 'read_file')).toBe(true);
70
+ expect(executeStepFn).toHaveBeenCalled();
71
+ const toolStep = executeStepFn.mock.calls[0][0] as Step;
72
+ expect(toolStep.type).toBe('file');
73
+ });
74
+
75
+ it('should block risky standard tools without allowInsecure', async () => {
76
+ OpenAIAdapter.prototype.chat = mock(async (messages, options) => {
77
+ return {
78
+ message: {
79
+ role: 'assistant',
80
+ content: 'I will run a command',
81
+ tool_calls: [
82
+ {
83
+ id: 'call_2',
84
+ type: 'function',
85
+ function: {
86
+ name: 'run_command',
87
+ arguments: JSON.stringify({ command: 'rm -rf /' }),
88
+ },
89
+ },
90
+ ],
91
+ },
92
+ usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
93
+ // biome-ignore lint/suspicious/noExplicitAny: mock
94
+ } as any;
95
+ });
96
+
97
+ const step: LlmStep = {
98
+ id: 'l1',
99
+ type: 'llm',
100
+ agent: 'test-agent',
101
+ needs: [],
102
+ prompt: 'run risky command',
103
+ useStandardTools: true,
104
+ allowInsecure: false, // Explicitly false
105
+ maxIterations: 2,
106
+ };
107
+
108
+ const context: ExpressionContext = { inputs: {}, steps: {} };
109
+ const executeStepFn = mock(async () => ({ status: 'success', output: '' }));
110
+
111
+ // The execution should not throw, but it should return a tool error message to the LLM
112
+ // However, in our mock, we want to see if executeStepFn was called.
113
+ // Actually, in llm-executor.ts, it pushes a "Security Error" message if check fails and continues loop.
114
+
115
+ let securityErrorMessage = '';
116
+ OpenAIAdapter.prototype.chat = mock(async (messages) => {
117
+ const lastMessage = messages[messages.length - 1];
118
+ if (lastMessage.role === 'tool') {
119
+ securityErrorMessage = lastMessage.content;
120
+ return {
121
+ message: { role: 'assistant', content: 'stop' },
122
+ usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
123
+ // biome-ignore lint/suspicious/noExplicitAny: mock
124
+ } as any;
125
+ }
126
+ return {
127
+ message: {
128
+ role: 'assistant',
129
+ tool_calls: [
130
+ {
131
+ id: 'c2',
132
+ type: 'function',
133
+ function: { name: 'run_command', arguments: '{"command":"rm -rf /"}' },
134
+ },
135
+ ],
136
+ },
137
+ // biome-ignore lint/suspicious/noExplicitAny: mock
138
+ } as any;
139
+ });
140
+
141
+ // biome-ignore lint/suspicious/noExplicitAny: mock
142
+ await executeLlmStep(step, context, executeStepFn as any);
143
+
144
+ expect(securityErrorMessage).toContain('Security Error');
145
+ expect(executeStepFn).not.toHaveBeenCalled();
146
+ });
147
+ });
@@ -0,0 +1,69 @@
1
+ import { describe, expect, it } from 'bun:test';
2
+ import * as fs from 'node:fs';
3
+ import * as path from 'node:path';
4
+ import { STANDARD_TOOLS, validateStandardToolSecurity } from './standard-tools';
5
+
6
+ describe('Standard Tools Security', () => {
7
+ const options = { allowOutsideCwd: false, allowInsecure: false };
8
+
9
+ it('should allow paths within CWD', () => {
10
+ expect(() => {
11
+ validateStandardToolSecurity('read_file', { path: 'src/cli.ts' }, options);
12
+ }).not.toThrow();
13
+ expect(() => {
14
+ validateStandardToolSecurity('search_files', { pattern: '**/*.ts', dir: 'src' }, options);
15
+ }).not.toThrow();
16
+ });
17
+
18
+ it('should block paths outside CWD by default', () => {
19
+ expect(() => {
20
+ validateStandardToolSecurity('read_file', { path: '../../etc/passwd' }, options);
21
+ }).toThrow(/Access denied/);
22
+ expect(() => {
23
+ validateStandardToolSecurity('read_file_lines', { path: '../../etc/passwd' }, options);
24
+ }).toThrow(/Access denied/);
25
+ expect(() => {
26
+ validateStandardToolSecurity('search_files', { pattern: '*', dir: '/etc' }, options);
27
+ }).toThrow(/Access denied/);
28
+ });
29
+
30
+ it('should allow paths outside CWD if allowOutsideCwd is true', () => {
31
+ expect(() => {
32
+ validateStandardToolSecurity(
33
+ 'read_file',
34
+ { path: '../../etc/passwd' },
35
+ { allowOutsideCwd: true }
36
+ );
37
+ }).not.toThrow();
38
+ });
39
+
40
+ it('should block risky commands by default', () => {
41
+ expect(() => {
42
+ validateStandardToolSecurity('run_command', { command: 'ls; rm -rf /' }, options);
43
+ }).toThrow(/Security Error/);
44
+ });
45
+
46
+ it('should allow risky commands if allowInsecure is true', () => {
47
+ expect(() => {
48
+ validateStandardToolSecurity(
49
+ 'run_command',
50
+ { command: 'ls; rm -rf /' },
51
+ { allowInsecure: true }
52
+ );
53
+ }).not.toThrow();
54
+ });
55
+ });
56
+
57
+ describe('Standard Tools Definition', () => {
58
+ it('should have read_file tool', () => {
59
+ const readTool = STANDARD_TOOLS.find((t) => t.name === 'read_file');
60
+ expect(readTool).toBeDefined();
61
+ expect(readTool?.execution?.type).toBe('file');
62
+ });
63
+
64
+ it('should have list_files tool with script execution', () => {
65
+ const listTool = STANDARD_TOOLS.find((t) => t.name === 'list_files');
66
+ expect(listTool).toBeDefined();
67
+ expect(listTool?.execution?.type).toBe('script');
68
+ });
69
+ });
@@ -0,0 +1,270 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ import { ExpressionEvaluator } from '../expression/evaluator';
4
+ import type { AgentTool, Step } from '../parser/schema';
5
+ import { detectShellInjectionRisk } from './shell-executor';
6
+
7
+ export const STANDARD_TOOLS: AgentTool[] = [
8
+ {
9
+ name: 'read_file',
10
+ description: 'Read the contents of a file',
11
+ parameters: {
12
+ type: 'object',
13
+ properties: {
14
+ path: { type: 'string', description: 'Path to the file to read' },
15
+ },
16
+ required: ['path'],
17
+ },
18
+ execution: {
19
+ id: 'std_read_file',
20
+ type: 'file',
21
+ op: 'read',
22
+ path: '${{ args.path }}',
23
+ },
24
+ },
25
+ {
26
+ name: 'read_file_lines',
27
+ description: 'Read a specific range of lines from a file',
28
+ parameters: {
29
+ type: 'object',
30
+ properties: {
31
+ path: { type: 'string', description: 'Path to the file to read' },
32
+ start: { type: 'number', description: 'Starting line number (1-indexed)', default: 1 },
33
+ count: { type: 'number', description: 'Number of lines to read', default: 100 },
34
+ },
35
+ required: ['path'],
36
+ },
37
+ execution: {
38
+ id: 'std_read_file_lines',
39
+ type: 'script',
40
+ run: `
41
+ const fs = require('node:fs');
42
+ const path = require('node:path');
43
+ const filePath = args.path;
44
+ const start = args.start || 1;
45
+ const count = args.count || 100;
46
+
47
+ if (!fs.existsSync(filePath)) {
48
+ throw new Error('File not found: ' + filePath);
49
+ }
50
+
51
+ const content = fs.readFileSync(filePath, 'utf8');
52
+ const lines = content.split('\\n');
53
+ return lines.slice(start - 1, start - 1 + count).join('\\n');
54
+ `,
55
+ allowInsecure: true,
56
+ },
57
+ },
58
+ {
59
+ name: 'write_file',
60
+ description: 'Write or overwrite a file with content',
61
+ parameters: {
62
+ type: 'object',
63
+ properties: {
64
+ path: { type: 'string', description: 'Path to the file to write' },
65
+ content: { type: 'string', description: 'Content to write to the file' },
66
+ },
67
+ required: ['path', 'content'],
68
+ },
69
+ execution: {
70
+ id: 'std_write_file',
71
+ type: 'file',
72
+ op: 'write',
73
+ path: '${{ args.path }}',
74
+ content: '${{ args.content }}',
75
+ },
76
+ },
77
+ {
78
+ name: 'list_files',
79
+ description: 'List files in a directory',
80
+ parameters: {
81
+ type: 'object',
82
+ properties: {
83
+ path: {
84
+ type: 'string',
85
+ description: 'Directory path (defaults to current directory)',
86
+ default: '.',
87
+ },
88
+ },
89
+ },
90
+ execution: {
91
+ id: 'std_list_files',
92
+ type: 'script',
93
+ run: `
94
+ const fs = require('node:fs');
95
+ const path = require('node:path');
96
+ const dir = args.path || '.';
97
+ if (fs.existsSync(dir)) {
98
+ const files = fs.readdirSync(dir, { withFileTypes: true });
99
+ return files.map(f => ({
100
+ name: f.name,
101
+ type: f.isDirectory() ? 'directory' : 'file',
102
+ size: f.isFile() ? fs.statSync(path.join(dir, f.name)).size : undefined
103
+ }));
104
+ }
105
+ throw new Error('Directory not found: ' + dir);
106
+ `,
107
+ allowInsecure: true,
108
+ },
109
+ },
110
+ {
111
+ name: 'search_files',
112
+ description: 'Search for files by pattern (glob)',
113
+ parameters: {
114
+ type: 'object',
115
+ properties: {
116
+ pattern: { type: 'string', description: 'Glob pattern (e.g. **/*.ts)' },
117
+ dir: { type: 'string', description: 'Directory to search in', default: '.' },
118
+ },
119
+ required: ['pattern'],
120
+ },
121
+ execution: {
122
+ id: 'std_search_files',
123
+ type: 'script',
124
+ run: `
125
+ const fs = require('node:fs');
126
+ const path = require('node:path');
127
+ const { globSync } = require('glob');
128
+ const dir = args.dir || '.';
129
+ const pattern = args.pattern;
130
+ try {
131
+ return globSync(pattern, { cwd: dir, nodir: true });
132
+ } catch (e) {
133
+ throw new Error('Search failed: ' + e.message);
134
+ }
135
+ `,
136
+ allowInsecure: true,
137
+ },
138
+ },
139
+ {
140
+ name: 'search_content',
141
+ description: 'Search for a string or regex within files',
142
+ parameters: {
143
+ type: 'object',
144
+ properties: {
145
+ query: { type: 'string', description: 'String or regex to search for' },
146
+ pattern: {
147
+ type: 'string',
148
+ description: 'Glob pattern of files to search in',
149
+ default: '**/*',
150
+ },
151
+ dir: { type: 'string', description: 'Directory to search in', default: '.' },
152
+ },
153
+ required: ['query'],
154
+ },
155
+ execution: {
156
+ id: 'std_search_content',
157
+ type: 'script',
158
+ run: `
159
+ const fs = require('node:fs');
160
+ const path = require('node:path');
161
+ const { globSync } = require('glob');
162
+ const dir = args.dir || '.';
163
+ const pattern = args.pattern || '**/*';
164
+ const query = args.query;
165
+ if (query.length > 500) {
166
+ throw new Error('Search query exceeds maximum length of 500 characters');
167
+ }
168
+ const isRegex = query.startsWith('/') && query.endsWith('/');
169
+ let regex;
170
+ try {
171
+ regex = isRegex ? new RegExp(query.slice(1, -1)) : new RegExp(query.replace(/[.*+?^$\\{}()|[\\]\\\\]/g, '\\\\$&'), 'i');
172
+ } catch (e) {
173
+ throw new Error('Invalid regular expression: ' + e.message);
174
+ }
175
+
176
+ const files = globSync(pattern, { cwd: dir, nodir: true });
177
+ const results = [];
178
+ for (const file of files) {
179
+ const fullPath = path.join(dir, file);
180
+ const content = fs.readFileSync(fullPath, 'utf8');
181
+ const lines = content.split('\\n');
182
+ for (let i = 0; i < lines.length; i++) {
183
+ if (regex.test(lines[i])) {
184
+ results.push({
185
+ file,
186
+ line: i + 1,
187
+ content: lines[i].trim()
188
+ });
189
+ }
190
+ if (results.length > 100) break; // Limit results
191
+ }
192
+ if (results.length > 100) break;
193
+ }
194
+ return results;
195
+ `,
196
+ allowInsecure: true,
197
+ },
198
+ },
199
+ {
200
+ name: 'run_command',
201
+ description: 'Run a shell command',
202
+ parameters: {
203
+ type: 'object',
204
+ properties: {
205
+ command: { type: 'string', description: 'The shell command to run' },
206
+ dir: { type: 'string', description: 'Working directory for the command' },
207
+ },
208
+ required: ['command'],
209
+ },
210
+ execution: {
211
+ id: 'std_run_command',
212
+ type: 'shell',
213
+ run: '${{ args.command }}',
214
+ dir: '${{ args.dir }}',
215
+ },
216
+ },
217
+ ];
218
+
219
+ /**
220
+ * Validate that a tool call is safe to execute based on the LLM step's security flags.
221
+ */
222
+ export function validateStandardToolSecurity(
223
+ toolName: string,
224
+ // biome-ignore lint/suspicious/noExplicitAny: arguments can be any shape
225
+ args: any,
226
+ options: { allowOutsideCwd?: boolean; allowInsecure?: boolean }
227
+ ): void {
228
+ // 1. Check path traversal for file tools
229
+ if (
230
+ [
231
+ 'read_file',
232
+ 'read_file_lines',
233
+ 'write_file',
234
+ 'list_files',
235
+ 'search_files',
236
+ 'search_content',
237
+ ].includes(toolName)
238
+ ) {
239
+ const rawPath = args.path || args.dir || '.';
240
+ const cwd = process.cwd();
241
+ const resolvedPath = path.resolve(cwd, rawPath);
242
+ const realCwd = fs.realpathSync(cwd);
243
+
244
+ const isWithin = (target: string) => {
245
+ // Find the first existing ancestor to resolve the real path correctly
246
+ let current = target;
247
+ while (current !== path.dirname(current) && !fs.existsSync(current)) {
248
+ current = path.dirname(current);
249
+ }
250
+ const realTarget = fs.existsSync(current) ? fs.realpathSync(current) : current;
251
+ const relativePath = path.relative(realCwd, realTarget);
252
+ return !(relativePath.startsWith('..') || path.isAbsolute(relativePath));
253
+ };
254
+
255
+ if (!options.allowOutsideCwd && !isWithin(resolvedPath)) {
256
+ throw new Error(
257
+ `Access denied: Path '${rawPath}' resolves outside the working directory. Use 'allowOutsideCwd: true' to override.`
258
+ );
259
+ }
260
+ }
261
+
262
+ // 2. Check shell risk for run_command
263
+ if (toolName === 'run_command' && !options.allowInsecure) {
264
+ if (detectShellInjectionRisk(args.command)) {
265
+ throw new Error(
266
+ `Security Error: Command contains risky shell characters. Use 'allowInsecure: true' on the llm step to execute this.`
267
+ );
268
+ }
269
+ }
270
+ }
@@ -402,7 +402,13 @@ async function executeRequestStep(
402
402
  output: {
403
403
  status: response.status,
404
404
  statusText: response.statusText,
405
- headers: Object.fromEntries(response.headers as unknown as Iterable<[string, string]>),
405
+ headers: (() => {
406
+ const h: Record<string, string> = {};
407
+ response.headers.forEach((v, k) => {
408
+ h[k] = v;
409
+ });
410
+ return h;
411
+ })(),
406
412
  data: responseData,
407
413
  },
408
414
  status: response.ok ? 'success' : 'failed',
@@ -435,7 +441,11 @@ async function executeHumanStep(
435
441
  return {
436
442
  output:
437
443
  step.inputType === 'confirm'
438
- ? answer === true || answer === 'true' || answer === 'yes' || answer === 'y'
444
+ ? answer === true ||
445
+ (typeof answer === 'string' &&
446
+ (answer.toLowerCase() === 'true' ||
447
+ answer.toLowerCase() === 'yes' ||
448
+ answer.toLowerCase() === 'y'))
439
449
  : answer,
440
450
  status: 'success',
441
451
  };
@@ -19,9 +19,9 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
19
19
  - **eval**: (Optional) Configuration for prompt optimization `{ scorer: 'llm'|'script', agent, prompt, run }`.
20
20
  - **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
21
21
  - **shell**: `{ id, type: 'shell', run, dir, env, allowInsecure, transform }` (Set `allowInsecure: true` to bypass risky command checks)
22
- - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, mcpServers }`
22
+ - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, useStandardTools, allowOutsideCwd, allowInsecure, mcpServers }`
23
23
  - **workflow**: `{ id, type: 'workflow', path, inputs }`
24
- - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
24
+ - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content, allowOutsideCwd }`
25
25
  - **request**: `{ id, type: 'request', url, method, body, headers }`
26
26
  - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
27
27
  - **sleep**: `{ id, type: 'sleep', duration }` (duration can be a number or expression string)
@@ -31,6 +31,17 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
31
31
  - **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
32
32
  - **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
33
33
 
34
+ ## Standard Tools
35
+ When `useStandardTools: true` is set on an `llm` step, the agent has access to:
36
+ - `read_file(path)`: Read file contents.
37
+ - `read_file_lines(path, start, count)`: Read a specific range of lines.
38
+ - `write_file(path, content)`: Write/overwrite file.
39
+ - `list_files(path)`: List directory contents.
40
+ - `search_files(pattern, dir)`: Search for files by pattern (glob).
41
+ - `search_content(query, pattern, dir)`: Search for text within files.
42
+ - `run_command(command, dir)`: Run shell commands (restricted by `allowInsecure`).
43
+ - **Path Gating**: Restricted to CWD by default. Use `allowOutsideCwd: true` to bypass.
44
+
34
45
  ## Agent Schema (.md)
35
46
  Markdown files with YAML frontmatter:
36
47
  - **name**: Agent name.
@@ -45,6 +56,9 @@ Markdown files with YAML frontmatter:
45
56
  - `${{ steps.id.output }}`
46
57
  - `${{ steps.id.status }}` (e.g., `'pending'`, `'running'`, `'success'`, `'failed'`, `'skipped'`)
47
58
  - `${{ args.paramName }}` (used inside agent tools)
59
+ - `${{ item }}` (current item in a `foreach` loop)
60
+ - `${{ secrets.NAME }}` (access redacted secrets)
61
+ - `${{ env.NAME }}` (access environment variables)
48
62
  - Standard JS-like expressions: `${{ steps.count > 0 ? 'yes' : 'no' }}`
49
63
 
50
64
  # Guidelines
@@ -0,0 +1,17 @@
1
+ ---
2
+ name: software-engineer
3
+ description: "Expert at writing and debugging code"
4
+ model: gpt-4o
5
+ ---
6
+
7
+ # Role
8
+ You are a Software Engineer. Your goal is to implement, refactor, and debug code based on user specifications.
9
+
10
+ # Guidelines
11
+ - Use `list_files` or `search_files` to understand the project structure.
12
+ - Use `search_content` to find where specific code or dependencies are located.
13
+ - Use `read_file` to examine code, or `read_file_lines` for large files.
14
+ - Use `write_file` to implement new features or fixes.
15
+ - Use `run_command` only when necessary for testing or building (e.g., `npm test`, `bun run build`).
16
+ - Be concise and follow best practices for the language you are writing in.
17
+ - Always verify your changes if possible by running tests.
@@ -0,0 +1,54 @@
1
+ name: memory-service
2
+ description: "Demonstrate long-term memory capabilities"
3
+
4
+ steps:
5
+ # Store information in memory
6
+ - id: remember_facts
7
+ type: memory
8
+ op: store
9
+ text: "Keystone CLI was initialized on 2025-01-01 by the engineering team."
10
+ metadata:
11
+ type: "fact"
12
+ confidence: 1.0
13
+
14
+ - id: remember_preference
15
+ type: memory
16
+ op: store
17
+ text: "The user prefers TypeScript over JavaScript for all projects."
18
+ metadata:
19
+ type: "preference"
20
+ confidence: 0.9
21
+ needs: [remember_facts]
22
+
23
+ # Search for information
24
+ - id: recall_preference
25
+ type: memory
26
+ op: search
27
+ query: "What language does the user like?"
28
+ limit: 1
29
+ needs: [remember_preference]
30
+
31
+ # Use recalled information in an LLM step
32
+ - id: confirm_memory
33
+ type: llm
34
+ agent: general
35
+ needs: [recall_preference]
36
+ prompt: |
37
+ Based on this memory:
38
+ ${{ steps.recall_preference.output[0].content }}
39
+
40
+ What programming language should I use? Answer in one word.
41
+ schema:
42
+ type: object
43
+ properties:
44
+ language:
45
+ type: string
46
+ required: [language]
47
+
48
+ - id: summary
49
+ type: shell
50
+ needs: [confirm_memory]
51
+ run: |
52
+ echo "Memory Service Demo Complete"
53
+ echo "Recalled: ${{ steps.recall_preference.output[0].content }}"
54
+ echo "Decision: ${{ steps.confirm_memory.output.language }}"
@@ -0,0 +1,44 @@
1
+ name: robust-automation
2
+ description: "Demonstrate auto-healing and reflexion features"
3
+
4
+ steps:
5
+ # Demonstration of auto-healing
6
+ # This step attempts to run a broken command, but the agent should fix it
7
+ - id: auto_heal_demo
8
+ type: shell
9
+ run: |
10
+ # This command has a typo and should fail
11
+ ech "Hello World"
12
+ auto_heal:
13
+ agent: software-engineer
14
+ maxAttempts: 2
15
+ model: gpt-4o
16
+
17
+ # Demonstration of reflexion (self-correction)
18
+ # This step asks for JSON but provides a prompt that might lead to text
19
+ # Reflexion should catch the schema validation error and retry
20
+ - id: reflexion_demo
21
+ type: llm
22
+ agent: general
23
+ needs: [auto_heal_demo]
24
+ prompt: |
25
+ Generate a list of 3 random colors. Just list them.
26
+ schema:
27
+ type: object
28
+ properties:
29
+ colors:
30
+ type: array
31
+ items:
32
+ type: string
33
+ required: [colors]
34
+ reflexion:
35
+ limit: 3
36
+ hint: "Ensure the output is valid JSON matching the schema."
37
+
38
+ - id: summary
39
+ type: shell
40
+ needs: [reflexion_demo]
41
+ run: |
42
+ echo "Robust automation demo complete."
43
+ echo "Healed Command Output: ${{ steps.auto_heal_demo.output.stdout }}"
44
+ echo "Reflexion Output: ${{ steps.reflexion_demo.output }}"
@@ -12,6 +12,7 @@ steps:
12
12
  agent: keystone-architect
13
13
  needs: [get_requirements]
14
14
  allowClarification: true
15
+ useStandardTools: true
15
16
  prompt: |
16
17
  The user wants to build the following:
17
18
  <user_requirements>