npm - keystone-cli - Versions diffs - 0.6.1 → 0.7.0 - Mend

keystone-cli 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +34 -0
package/package.json +1 -1
package/src/expression/evaluator.ts +2 -0
package/src/parser/schema.ts +3 -0
package/src/runner/llm-executor.ts +39 -3
package/src/runner/shell-executor.ts +40 -12
package/src/runner/standard-tools-integration.test.ts +147 -0
package/src/runner/standard-tools.test.ts +69 -0
package/src/runner/standard-tools.ts +270 -0
package/src/runner/step-executor.ts +12 -2
package/src/templates/agents/keystone-architect.md +16 -2
package/src/templates/agents/software-engineer.md +17 -0
package/src/templates/memory-service.yaml +54 -0
package/src/templates/robust-automation.yaml +44 -0
package/src/templates/scaffold-feature.yaml +1 -0

package/README.md CHANGED Viewed

@@ -260,6 +260,23 @@ finally:
     type: shell
     run: echo "Workflow finished"
+### Expression Syntax
+Keystone uses `${{ }}` syntax for dynamic values. Expressions are evaluated using a safe AST parser.
+- `${{ inputs.name }}`: Access workflow inputs.
+- `${{ steps.id.output }}`: Access the raw output of a previous step.
+- `${{ steps.id.outputs.field }}`: Access specific fields if the output is an object.
+- `${{ steps.id.status }}`: Get the execution status of a step (`'success'`, `'failed'`, etc.).
+- `${{ item }}`: Access the current item in a `foreach` loop.
+- `${{ args.name }}`: Access tool arguments (available ONLY inside agent tool execution steps).
+- `${{ secrets.NAME }}`: Access redacted secrets.
+- `${{ env.NAME }}`: Access environment variables.
+Standard JavaScript-like expressions are supported: `${{ steps.build.status == 'success' ? '🚀' : '❌' }}`.
+---
 outputs:
   slack_message: ${{ steps.notify.output }}
 ```
@@ -274,8 +291,11 @@ Keystone supports several specialized step types:
 - `llm`: Prompt an agent and get structured or unstructured responses. Supports `schema` (JSON Schema) for structured output.
   - `allowClarification`: Boolean (default `false`). If `true`, allows the LLM to ask clarifying questions back to the user or suspend the workflow if no human is available.
   - `maxIterations`: Number (default `10`). Maximum number of tool-calling loops allowed for the agent.
+  - `allowInsecure`: Boolean (default `false`). Set `true` to allow risky tool execution.
+  - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow tools to access files outside of the current working directory.
 - `request`: Make HTTP requests (GET, POST, etc.).
 - `file`: Read, write, or append to files.
+  - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow reading/writing files outside of the current working directory.
 - `human`: Pause execution for manual confirmation or text input.
   - `inputType: confirm`: Simple Enter-to-continue prompt.
   - `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
@@ -352,6 +372,8 @@ You are a technical communications expert. Your goal is to take technical output
 Agents can be equipped with tools, which are essentially workflow steps they can choose to execute. You can define tools in the agent definition, or directly in an LLM step within a workflow.
+Tool arguments are passed to the tool's execution step via the `args` variable.
 **`.keystone/workflows/agents/developer.md`**
 ```markdown
 ---
@@ -363,6 +385,18 @@ tools:
       id: list-files-tool
       type: shell
       run: ls -F
+  - name: read_file
+    description: Read a specific file
+    parameters:
+      type: object
+      properties:
+        path: { type: string }
+      required: [path]
+    execution:
+      id: read-file-tool
+      type: file
+      op: read
+      path: ${{ args.path }}
 ---
 You are a software developer. You can use tools to explore the codebase.
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "keystone-cli",
-  "version": "0.6.1",
+  "version": "0.7.0",
   "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
   "type": "module",
   "bin": {

package/src/expression/evaluator.ts CHANGED Viewed

@@ -29,6 +29,7 @@ export interface ExpressionContext {
   secrets?: Record<string, string>;
   steps?: Record<string, { output?: unknown; outputs?: Record<string, unknown>; status?: string }>;
   item?: unknown;
+  args?: unknown;
   index?: number;
   env?: Record<string, string>;
   output?: unknown;
@@ -295,6 +296,7 @@ export class ExpressionEvaluator {
           secrets: context.secrets || {},
           steps: context.steps || {},
           item: context.item,
+          args: context.args,
           index: context.index,
           env: context.env || {},
           stdout: contextAsRecord.stdout, // For transform expressions

package/src/parser/schema.ts CHANGED Viewed

@@ -95,6 +95,9 @@ const LlmStepSchema = BaseStepSchema.extend({
       ])
     )
     .optional(),
+  useStandardTools: z.boolean().optional(),
+  allowOutsideCwd: z.boolean().optional(),
+  allowInsecure: z.boolean().optional(),
 });
 const WorkflowStepSchema = BaseStepSchema.extend({

package/src/runner/llm-executor.ts CHANGED Viewed

@@ -9,13 +9,14 @@ import { RedactionBuffer, Redactor } from '../utils/redactor';
 import { type LLMMessage, getAdapter } from './llm-adapter';
 import { MCPClient } from './mcp-client';
 import type { MCPManager, MCPServerConfig } from './mcp-manager';
+import { STANDARD_TOOLS, validateStandardToolSecurity } from './standard-tools';
 import type { StepResult } from './step-executor';
 interface ToolDefinition {
   name: string;
   description?: string;
   parameters: unknown;
-  source: 'agent' | 'step' | 'mcp';
+  source: 'agent' | 'step' | 'mcp' | 'standard';
   execution?: Step;
   mcpClient?: MCPClient;
 }
@@ -105,7 +106,24 @@ export async function executeLlmStep(
       }
     }
-    // 3. Add MCP tools
+    // 3. Add Standard tools
+    if (step.useStandardTools) {
+      for (const tool of STANDARD_TOOLS) {
+        allTools.push({
+          name: tool.name,
+          description: tool.description,
+          parameters: tool.parameters || {
+            type: 'object',
+            properties: {},
+            additionalProperties: true,
+          },
+          source: 'standard',
+          execution: tool.execution,
+        });
+      }
+    }
+    // 4. Add MCP tools
     const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
     if (step.useGlobalMcp && mcpManager) {
       const globalServers = mcpManager.getGlobalServers();
@@ -374,10 +392,28 @@ export async function executeLlmStep(
             });
           }
         } else if (toolInfo.execution) {
+          // Security validation for standard tools
+          if (toolInfo.source === 'standard') {
+            try {
+              validateStandardToolSecurity(toolInfo.name, args, {
+                allowOutsideCwd: step.allowOutsideCwd,
+                allowInsecure: step.allowInsecure,
+              });
+            } catch (error) {
+              messages.push({
+                role: 'tool',
+                tool_call_id: toolCall.id,
+                name: toolCall.function.name,
+                content: `Security Error: ${error instanceof Error ? error.message : String(error)}`,
+              });
+              continue;
+            }
+          }
           // Execute the tool as a step
           const toolContext: ExpressionContext = {
             ...context,
-            item: args, // Use item to pass args to tool execution
+            args, // Use args to pass parameters to tool execution
           };
           const result = await executeStepFn(toolInfo.execution, toolContext);

package/src/runner/shell-executor.ts CHANGED Viewed

@@ -136,14 +136,11 @@ export async function executeShell(
   const cwd = step.dir ? ExpressionEvaluator.evaluateString(step.dir, context) : undefined;
   const mergedEnv = Object.keys(env).length > 0 ? { ...Bun.env, ...env } : Bun.env;
-  // Safe Fast Path: If command contains only safe characters (alphanumeric, -, _, ., /) and spaces,
-  // we can split it and execute directly without a shell.
-  // This completely eliminates shell injection risks for simple commands.
-  const isSimpleCommand = /^[a-zA-Z0-9_\-./]+(?: [a-zA-Z0-9_\-./]+)*$/.test(command);
+  // Shell metacharacters that require a real shell
+  const hasShellMetas = /[|&;<>`$!]/.test(command);
   // Common shell builtins that must run in a shell
-  const splitArgs = command.split(/\s+/);
-  const cmd = splitArgs[0];
+  const firstWord = command.trim().split(/\s+/)[0];
   const isBuiltin = [
     'exit',
     'cd',
@@ -155,19 +152,50 @@ export async function executeShell(
     'unalias',
     'eval',
     'set',
-  ].includes(cmd);
+    'true',
+    'false',
+  ].includes(firstWord);
+  const canUseSpawn = !hasShellMetas && !isBuiltin;
   try {
     let stdoutString = '';
     let stderrString = '';
     let exitCode = 0;
-    if (isSimpleCommand && !isBuiltin) {
-      // split by spaces
-      const args = splitArgs.slice(1);
-      if (!cmd) throw new Error('Empty command');
+    if (canUseSpawn) {
+      // Robust splitting that handles single and double quotes
+      const args: string[] = [];
+      let current = '';
+      let inQuote = false;
+      let quoteChar = '';
+      for (let i = 0; i < command.length; i++) {
+        const char = command[i];
+        if ((char === "'" || char === '"') && (i === 0 || command[i - 1] !== '\\')) {
+          if (inQuote && char === quoteChar) {
+            inQuote = false;
+            quoteChar = '';
+          } else if (!inQuote) {
+            inQuote = true;
+            quoteChar = char;
+          } else {
+            current += char;
+          }
+        } else if (/\s/.test(char) && !inQuote) {
+          if (current) {
+            args.push(current);
+            current = '';
+          }
+        } else {
+          current += char;
+        }
+      }
+      if (current) args.push(current);
+      if (args.length === 0) throw new Error('Empty command');
-      const proc = Bun.spawn([cmd, ...args], {
+      const proc = Bun.spawn(args, {
         cwd,
         env: mergedEnv,
         stdout: 'pipe',

package/src/runner/standard-tools-integration.test.ts ADDED Viewed

@@ -0,0 +1,147 @@
+import { afterAll, beforeAll, describe, expect, it, mock, spyOn } from 'bun:test';
+import type { ExpressionContext } from '../expression/evaluator';
+import type { LlmStep, Step } from '../parser/schema';
+import { ConsoleLogger } from '../utils/logger';
+import { OpenAIAdapter } from './llm-adapter';
+import { executeLlmStep } from './llm-executor';
+describe('Standard Tools Integration', () => {
+  const originalOpenAIChat = OpenAIAdapter.prototype.chat;
+  beforeAll(() => {
+    // Mocking OpenAI Adapter
+  });
+  afterAll(() => {
+    OpenAIAdapter.prototype.chat = originalOpenAIChat;
+  });
+  it('should inject standard tools when useStandardTools is true', async () => {
+    // biome-ignore lint/suspicious/noExplicitAny: mock
+    let capturedTools: any[] = [];
+    OpenAIAdapter.prototype.chat = mock(async (messages, options) => {
+      capturedTools = options.tools || [];
+      return {
+        message: {
+          role: 'assistant',
+          content: 'I will read the file',
+          tool_calls: [
+            {
+              id: 'call_1',
+              type: 'function',
+              function: {
+                name: 'read_file',
+                arguments: JSON.stringify({ path: 'test.txt' }),
+              },
+            },
+          ],
+        },
+        usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
+        // biome-ignore lint/suspicious/noExplicitAny: mock
+      } as any;
+    });
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'test-agent',
+      needs: [],
+      prompt: 'read test.txt',
+      useStandardTools: true,
+      maxIterations: 1,
+    };
+    const context: ExpressionContext = { inputs: {}, steps: {} };
+    const executeStepFn = mock(async (s: Step) => {
+      return { status: 'success', output: 'file content' };
+    });
+    // We catch the "Max iterations reached" error because we set maxIterations to 1
+    // but we can still check if tools were injected and the tool call was made.
+    try {
+      // biome-ignore lint/suspicious/noExplicitAny: mock
+      await executeLlmStep(step, context, executeStepFn as any);
+    } catch (e) {
+      if ((e as Error).message !== 'Max ReAct iterations reached') throw e;
+    }
+    expect(capturedTools.some((t) => t.function.name === 'read_file')).toBe(true);
+    expect(executeStepFn).toHaveBeenCalled();
+    const toolStep = executeStepFn.mock.calls[0][0] as Step;
+    expect(toolStep.type).toBe('file');
+  });
+  it('should block risky standard tools without allowInsecure', async () => {
+    OpenAIAdapter.prototype.chat = mock(async (messages, options) => {
+      return {
+        message: {
+          role: 'assistant',
+          content: 'I will run a command',
+          tool_calls: [
+            {
+              id: 'call_2',
+              type: 'function',
+              function: {
+                name: 'run_command',
+                arguments: JSON.stringify({ command: 'rm -rf /' }),
+              },
+            },
+          ],
+        },
+        usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
+        // biome-ignore lint/suspicious/noExplicitAny: mock
+      } as any;
+    });
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'test-agent',
+      needs: [],
+      prompt: 'run risky command',
+      useStandardTools: true,
+      allowInsecure: false, // Explicitly false
+      maxIterations: 2,
+    };
+    const context: ExpressionContext = { inputs: {}, steps: {} };
+    const executeStepFn = mock(async () => ({ status: 'success', output: '' }));
+    // The execution should not throw, but it should return a tool error message to the LLM
+    // However, in our mock, we want to see if executeStepFn was called.
+    // Actually, in llm-executor.ts, it pushes a "Security Error" message if check fails and continues loop.
+    let securityErrorMessage = '';
+    OpenAIAdapter.prototype.chat = mock(async (messages) => {
+      const lastMessage = messages[messages.length - 1];
+      if (lastMessage.role === 'tool') {
+        securityErrorMessage = lastMessage.content;
+        return {
+          message: { role: 'assistant', content: 'stop' },
+          usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+          // biome-ignore lint/suspicious/noExplicitAny: mock
+        } as any;
+      }
+      return {
+        message: {
+          role: 'assistant',
+          tool_calls: [
+            {
+              id: 'c2',
+              type: 'function',
+              function: { name: 'run_command', arguments: '{"command":"rm -rf /"}' },
+            },
+          ],
+        },
+        // biome-ignore lint/suspicious/noExplicitAny: mock
+      } as any;
+    });
+    // biome-ignore lint/suspicious/noExplicitAny: mock
+    await executeLlmStep(step, context, executeStepFn as any);
+    expect(securityErrorMessage).toContain('Security Error');
+    expect(executeStepFn).not.toHaveBeenCalled();
+  });
+});

package/src/runner/standard-tools.test.ts ADDED Viewed

@@ -0,0 +1,69 @@
+import { describe, expect, it } from 'bun:test';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import { STANDARD_TOOLS, validateStandardToolSecurity } from './standard-tools';
+describe('Standard Tools Security', () => {
+  const options = { allowOutsideCwd: false, allowInsecure: false };
+  it('should allow paths within CWD', () => {
+    expect(() => {
+      validateStandardToolSecurity('read_file', { path: 'src/cli.ts' }, options);
+    }).not.toThrow();
+    expect(() => {
+      validateStandardToolSecurity('search_files', { pattern: '**/*.ts', dir: 'src' }, options);
+    }).not.toThrow();
+  });
+  it('should block paths outside CWD by default', () => {
+    expect(() => {
+      validateStandardToolSecurity('read_file', { path: '../../etc/passwd' }, options);
+    }).toThrow(/Access denied/);
+    expect(() => {
+      validateStandardToolSecurity('read_file_lines', { path: '../../etc/passwd' }, options);
+    }).toThrow(/Access denied/);
+    expect(() => {
+      validateStandardToolSecurity('search_files', { pattern: '*', dir: '/etc' }, options);
+    }).toThrow(/Access denied/);
+  });
+  it('should allow paths outside CWD if allowOutsideCwd is true', () => {
+    expect(() => {
+      validateStandardToolSecurity(
+        'read_file',
+        { path: '../../etc/passwd' },
+        { allowOutsideCwd: true }
+      );
+    }).not.toThrow();
+  });
+  it('should block risky commands by default', () => {
+    expect(() => {
+      validateStandardToolSecurity('run_command', { command: 'ls; rm -rf /' }, options);
+    }).toThrow(/Security Error/);
+  });
+  it('should allow risky commands if allowInsecure is true', () => {
+    expect(() => {
+      validateStandardToolSecurity(
+        'run_command',
+        { command: 'ls; rm -rf /' },
+        { allowInsecure: true }
+      );
+    }).not.toThrow();
+  });
+});
+describe('Standard Tools Definition', () => {
+  it('should have read_file tool', () => {
+    const readTool = STANDARD_TOOLS.find((t) => t.name === 'read_file');
+    expect(readTool).toBeDefined();
+    expect(readTool?.execution?.type).toBe('file');
+  });
+  it('should have list_files tool with script execution', () => {
+    const listTool = STANDARD_TOOLS.find((t) => t.name === 'list_files');
+    expect(listTool).toBeDefined();
+    expect(listTool?.execution?.type).toBe('script');
+  });
+});

package/src/runner/standard-tools.ts ADDED Viewed

@@ -0,0 +1,270 @@
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import { ExpressionEvaluator } from '../expression/evaluator';
+import type { AgentTool, Step } from '../parser/schema';
+import { detectShellInjectionRisk } from './shell-executor';
+export const STANDARD_TOOLS: AgentTool[] = [
+  {
+    name: 'read_file',
+    description: 'Read the contents of a file',
+    parameters: {
+      type: 'object',
+      properties: {
+        path: { type: 'string', description: 'Path to the file to read' },
+      },
+      required: ['path'],
+    },
+    execution: {
+      id: 'std_read_file',
+      type: 'file',
+      op: 'read',
+      path: '${{ args.path }}',
+    },
+  },
+  {
+    name: 'read_file_lines',
+    description: 'Read a specific range of lines from a file',
+    parameters: {
+      type: 'object',
+      properties: {
+        path: { type: 'string', description: 'Path to the file to read' },
+        start: { type: 'number', description: 'Starting line number (1-indexed)', default: 1 },
+        count: { type: 'number', description: 'Number of lines to read', default: 100 },
+      },
+      required: ['path'],
+    },
+    execution: {
+      id: 'std_read_file_lines',
+      type: 'script',
+      run: `
+        const fs = require('node:fs');
+        const path = require('node:path');
+        const filePath = args.path;
+        const start = args.start || 1;
+        const count = args.count || 100;
+        if (!fs.existsSync(filePath)) {
+          throw new Error('File not found: ' + filePath);
+        }
+        const content = fs.readFileSync(filePath, 'utf8');
+        const lines = content.split('\\n');
+        return lines.slice(start - 1, start - 1 + count).join('\\n');
+      `,
+      allowInsecure: true,
+    },
+  },
+  {
+    name: 'write_file',
+    description: 'Write or overwrite a file with content',
+    parameters: {
+      type: 'object',
+      properties: {
+        path: { type: 'string', description: 'Path to the file to write' },
+        content: { type: 'string', description: 'Content to write to the file' },
+      },
+      required: ['path', 'content'],
+    },
+    execution: {
+      id: 'std_write_file',
+      type: 'file',
+      op: 'write',
+      path: '${{ args.path }}',
+      content: '${{ args.content }}',
+    },
+  },
+  {
+    name: 'list_files',
+    description: 'List files in a directory',
+    parameters: {
+      type: 'object',
+      properties: {
+        path: {
+          type: 'string',
+          description: 'Directory path (defaults to current directory)',
+          default: '.',
+        },
+      },
+    },
+    execution: {
+      id: 'std_list_files',
+      type: 'script',
+      run: `
+        const fs = require('node:fs');
+        const path = require('node:path');
+        const dir = args.path || '.';
+        if (fs.existsSync(dir)) {
+          const files = fs.readdirSync(dir, { withFileTypes: true });
+          return files.map(f => ({
+            name: f.name,
+            type: f.isDirectory() ? 'directory' : 'file',
+            size: f.isFile() ? fs.statSync(path.join(dir, f.name)).size : undefined
+          }));
+        }
+        throw new Error('Directory not found: ' + dir);
+      `,
+      allowInsecure: true,
+    },
+  },
+  {
+    name: 'search_files',
+    description: 'Search for files by pattern (glob)',
+    parameters: {
+      type: 'object',
+      properties: {
+        pattern: { type: 'string', description: 'Glob pattern (e.g. **/*.ts)' },
+        dir: { type: 'string', description: 'Directory to search in', default: '.' },
+      },
+      required: ['pattern'],
+    },
+    execution: {
+      id: 'std_search_files',
+      type: 'script',
+      run: `
+        const fs = require('node:fs');
+        const path = require('node:path');
+        const { globSync } = require('glob');
+        const dir = args.dir || '.';
+        const pattern = args.pattern;
+        try {
+          return globSync(pattern, { cwd: dir, nodir: true });
+        } catch (e) {
+          throw new Error('Search failed: ' + e.message);
+        }
+      `,
+      allowInsecure: true,
+    },
+  },
+  {
+    name: 'search_content',
+    description: 'Search for a string or regex within files',
+    parameters: {
+      type: 'object',
+      properties: {
+        query: { type: 'string', description: 'String or regex to search for' },
+        pattern: {
+          type: 'string',
+          description: 'Glob pattern of files to search in',
+          default: '**/*',
+        },
+        dir: { type: 'string', description: 'Directory to search in', default: '.' },
+      },
+      required: ['query'],
+    },
+    execution: {
+      id: 'std_search_content',
+      type: 'script',
+      run: `
+        const fs = require('node:fs');
+        const path = require('node:path');
+        const { globSync } = require('glob');
+        const dir = args.dir || '.';
+        const pattern = args.pattern || '**/*';
+        const query = args.query;
+        if (query.length > 500) {
+          throw new Error('Search query exceeds maximum length of 500 characters');
+        }
+        const isRegex = query.startsWith('/') && query.endsWith('/');
+        let regex;
+        try {
+          regex = isRegex ? new RegExp(query.slice(1, -1)) : new RegExp(query.replace(/[.*+?^$\\{}()|[\\]\\\\]/g, '\\\\$&'), 'i');
+        } catch (e) {
+          throw new Error('Invalid regular expression: ' + e.message);
+        }
+        const files = globSync(pattern, { cwd: dir, nodir: true });
+        const results = [];
+        for (const file of files) {
+          const fullPath = path.join(dir, file);
+          const content = fs.readFileSync(fullPath, 'utf8');
+          const lines = content.split('\\n');
+          for (let i = 0; i < lines.length; i++) {
+            if (regex.test(lines[i])) {
+              results.push({
+                file,
+                line: i + 1,
+                content: lines[i].trim()
+              });
+            }
+            if (results.length > 100) break; // Limit results
+          }
+          if (results.length > 100) break;
+        }
+        return results;
+      `,
+      allowInsecure: true,
+    },
+  },
+  {
+    name: 'run_command',
+    description: 'Run a shell command',
+    parameters: {
+      type: 'object',
+      properties: {
+        command: { type: 'string', description: 'The shell command to run' },
+        dir: { type: 'string', description: 'Working directory for the command' },
+      },
+      required: ['command'],
+    },
+    execution: {
+      id: 'std_run_command',
+      type: 'shell',
+      run: '${{ args.command }}',
+      dir: '${{ args.dir }}',
+    },
+  },
+];
+/**
+ * Validate that a tool call is safe to execute based on the LLM step's security flags.
+ */
+export function validateStandardToolSecurity(
+  toolName: string,
+  // biome-ignore lint/suspicious/noExplicitAny: arguments can be any shape
+  args: any,
+  options: { allowOutsideCwd?: boolean; allowInsecure?: boolean }
+): void {
+  // 1. Check path traversal for file tools
+  if (
+    [
+      'read_file',
+      'read_file_lines',
+      'write_file',
+      'list_files',
+      'search_files',
+      'search_content',
+    ].includes(toolName)
+  ) {
+    const rawPath = args.path || args.dir || '.';
+    const cwd = process.cwd();
+    const resolvedPath = path.resolve(cwd, rawPath);
+    const realCwd = fs.realpathSync(cwd);
+    const isWithin = (target: string) => {
+      // Find the first existing ancestor to resolve the real path correctly
+      let current = target;
+      while (current !== path.dirname(current) && !fs.existsSync(current)) {
+        current = path.dirname(current);
+      }
+      const realTarget = fs.existsSync(current) ? fs.realpathSync(current) : current;
+      const relativePath = path.relative(realCwd, realTarget);
+      return !(relativePath.startsWith('..') || path.isAbsolute(relativePath));
+    };
+    if (!options.allowOutsideCwd && !isWithin(resolvedPath)) {
+      throw new Error(
+        `Access denied: Path '${rawPath}' resolves outside the working directory. Use 'allowOutsideCwd: true' to override.`
+      );
+    }
+  }
+  // 2. Check shell risk for run_command
+  if (toolName === 'run_command' && !options.allowInsecure) {
+    if (detectShellInjectionRisk(args.command)) {
+      throw new Error(
+        `Security Error: Command contains risky shell characters. Use 'allowInsecure: true' on the llm step to execute this.`
+      );
+    }
+  }
+}

package/src/runner/step-executor.ts CHANGED Viewed

@@ -402,7 +402,13 @@ async function executeRequestStep(
     output: {
       status: response.status,
       statusText: response.statusText,
-      headers: Object.fromEntries(response.headers as unknown as Iterable<[string, string]>),
+      headers: (() => {
+        const h: Record<string, string> = {};
+        response.headers.forEach((v, k) => {
+          h[k] = v;
+        });
+        return h;
+      })(),
       data: responseData,
     },
     status: response.ok ? 'success' : 'failed',
@@ -435,7 +441,11 @@ async function executeHumanStep(
     return {
       output:
         step.inputType === 'confirm'
-          ? answer === true || answer === 'true' || answer === 'yes' || answer === 'y'
+          ? answer === true ||
+            (typeof answer === 'string' &&
+              (answer.toLowerCase() === 'true' ||
+                answer.toLowerCase() === 'yes' ||
+                answer.toLowerCase() === 'y'))
           : answer,
       status: 'success',
     };

package/src/templates/agents/keystone-architect.md CHANGED Viewed

@@ -19,9 +19,9 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
 - **eval**: (Optional) Configuration for prompt optimization `{ scorer: 'llm'|'script', agent, prompt, run }`.
 - **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
   - **shell**: `{ id, type: 'shell', run, dir, env, allowInsecure, transform }` (Set `allowInsecure: true` to bypass risky command checks)
-  - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, mcpServers }`
+  - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, useStandardTools, allowOutsideCwd, allowInsecure, mcpServers }`
   - **workflow**: `{ id, type: 'workflow', path, inputs }`
-  - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
+  - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content, allowOutsideCwd }`
   - **request**: `{ id, type: 'request', url, method, body, headers }`
   - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
   - **sleep**: `{ id, type: 'sleep', duration }` (duration can be a number or expression string)
@@ -31,6 +31,17 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
 - **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
 - **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
+## Standard Tools
+When `useStandardTools: true` is set on an `llm` step, the agent has access to:
+- `read_file(path)`: Read file contents.
+- `read_file_lines(path, start, count)`: Read a specific range of lines.
+- `write_file(path, content)`: Write/overwrite file.
+- `list_files(path)`: List directory contents.
+- `search_files(pattern, dir)`: Search for files by pattern (glob).
+- `search_content(query, pattern, dir)`: Search for text within files.
+- `run_command(command, dir)`: Run shell commands (restricted by `allowInsecure`).
+- **Path Gating**: Restricted to CWD by default. Use `allowOutsideCwd: true` to bypass.
 ## Agent Schema (.md)
 Markdown files with YAML frontmatter:
 - **name**: Agent name.
@@ -45,6 +56,9 @@ Markdown files with YAML frontmatter:
 - `${{ steps.id.output }}`
 - `${{ steps.id.status }}` (e.g., `'pending'`, `'running'`, `'success'`, `'failed'`, `'skipped'`)
 - `${{ args.paramName }}` (used inside agent tools)
+- `${{ item }}` (current item in a `foreach` loop)
+- `${{ secrets.NAME }}` (access redacted secrets)
+- `${{ env.NAME }}` (access environment variables)
 - Standard JS-like expressions: `${{ steps.count > 0 ? 'yes' : 'no' }}`
 # Guidelines

package/src/templates/agents/software-engineer.md ADDED Viewed

@@ -0,0 +1,17 @@
+---
+name: software-engineer
+description: "Expert at writing and debugging code"
+model: gpt-4o
+---
+# Role
+You are a Software Engineer. Your goal is to implement, refactor, and debug code based on user specifications.
+# Guidelines
+- Use `list_files` or `search_files` to understand the project structure.
+- Use `search_content` to find where specific code or dependencies are located.
+- Use `read_file` to examine code, or `read_file_lines` for large files.
+- Use `write_file` to implement new features or fixes.
+- Use `run_command` only when necessary for testing or building (e.g., `npm test`, `bun run build`).
+- Be concise and follow best practices for the language you are writing in.
+- Always verify your changes if possible by running tests.

package/src/templates/memory-service.yaml ADDED Viewed

@@ -0,0 +1,54 @@
+name: memory-service
+description: "Demonstrate long-term memory capabilities"
+steps:
+  # Store information in memory
+  - id: remember_facts
+    type: memory
+    op: store
+    text: "Keystone CLI was initialized on 2025-01-01 by the engineering team."
+    metadata:
+      type: "fact"
+      confidence: 1.0
+  - id: remember_preference
+    type: memory
+    op: store
+    text: "The user prefers TypeScript over JavaScript for all projects."
+    metadata:
+      type: "preference"
+      confidence: 0.9
+    needs: [remember_facts]
+  # Search for information
+  - id: recall_preference
+    type: memory
+    op: search
+    query: "What language does the user like?"
+    limit: 1
+    needs: [remember_preference]
+  # Use recalled information in an LLM step
+  - id: confirm_memory
+    type: llm
+    agent: general
+    needs: [recall_preference]
+    prompt: |
+      Based on this memory:
+      ${{ steps.recall_preference.output[0].content }}
+      What programming language should I use? Answer in one word.
+    schema:
+      type: object
+      properties:
+        language:
+          type: string
+      required: [language]
+  - id: summary
+    type: shell
+    needs: [confirm_memory]
+    run: |
+      echo "Memory Service Demo Complete"
+      echo "Recalled: ${{ steps.recall_preference.output[0].content }}"
+      echo "Decision: ${{ steps.confirm_memory.output.language }}"

package/src/templates/robust-automation.yaml ADDED Viewed

@@ -0,0 +1,44 @@
+name: robust-automation
+description: "Demonstrate auto-healing and reflexion features"
+steps:
+  # Demonstration of auto-healing
+  # This step attempts to run a broken command, but the agent should fix it
+  - id: auto_heal_demo
+    type: shell
+    run: |
+      # This command has a typo and should fail
+      ech "Hello World"
+    auto_heal:
+      agent: software-engineer
+      maxAttempts: 2
+      model: gpt-4o
+  # Demonstration of reflexion (self-correction)
+  # This step asks for JSON but provides a prompt that might lead to text
+  # Reflexion should catch the schema validation error and retry
+  - id: reflexion_demo
+    type: llm
+    agent: general
+    needs: [auto_heal_demo]
+    prompt: |
+      Generate a list of 3 random colors. Just list them.
+    schema:
+      type: object
+      properties:
+        colors:
+          type: array
+          items:
+            type: string
+      required: [colors]
+    reflexion:
+      limit: 3
+      hint: "Ensure the output is valid JSON matching the schema."
+  - id: summary
+    type: shell
+    needs: [reflexion_demo]
+    run: |
+      echo "Robust automation demo complete."
+      echo "Healed Command Output: ${{ steps.auto_heal_demo.output.stdout }}"
+      echo "Reflexion Output: ${{ steps.reflexion_demo.output }}"

package/src/templates/scaffold-feature.yaml CHANGED Viewed

@@ -12,6 +12,7 @@ steps:
     agent: keystone-architect
     needs: [get_requirements]
     allowClarification: true
+    useStandardTools: true
     prompt: |
       The user wants to build the following:
       <user_requirements>