npm - keystone-cli - Versions diffs - 0.6.0 → 0.7.0 - Mend

keystone-cli 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/README.md +34 -0
package/package.json +1 -1
package/src/cli.ts +233 -21
package/src/db/memory-db.ts +6 -0
package/src/db/sqlite-setup.test.ts +47 -0
package/src/db/workflow-db.ts +6 -0
package/src/expression/evaluator.ts +2 -0
package/src/parser/schema.ts +3 -0
package/src/runner/debug-repl.test.ts +240 -6
package/src/runner/llm-adapter.test.ts +10 -4
package/src/runner/llm-executor.ts +39 -3
package/src/runner/shell-executor.ts +40 -12
package/src/runner/standard-tools-integration.test.ts +147 -0
package/src/runner/standard-tools.test.ts +69 -0
package/src/runner/standard-tools.ts +270 -0
package/src/runner/step-executor.test.ts +194 -1
package/src/runner/step-executor.ts +46 -15
package/src/runner/stream-utils.test.ts +113 -7
package/src/runner/stream-utils.ts +4 -4
package/src/runner/workflow-runner.ts +14 -20
package/src/templates/agents/keystone-architect.md +16 -2
package/src/templates/agents/software-engineer.md +17 -0
package/src/templates/memory-service.yaml +54 -0
package/src/templates/robust-automation.yaml +44 -0
package/src/templates/scaffold-feature.yaml +1 -0

package/README.md CHANGED Viewed

@@ -260,6 +260,23 @@ finally:
     type: shell
     run: echo "Workflow finished"
+### Expression Syntax
+Keystone uses `${{ }}` syntax for dynamic values. Expressions are evaluated using a safe AST parser.
+- `${{ inputs.name }}`: Access workflow inputs.
+- `${{ steps.id.output }}`: Access the raw output of a previous step.
+- `${{ steps.id.outputs.field }}`: Access specific fields if the output is an object.
+- `${{ steps.id.status }}`: Get the execution status of a step (`'success'`, `'failed'`, etc.).
+- `${{ item }}`: Access the current item in a `foreach` loop.
+- `${{ args.name }}`: Access tool arguments (available ONLY inside agent tool execution steps).
+- `${{ secrets.NAME }}`: Access redacted secrets.
+- `${{ env.NAME }}`: Access environment variables.
+Standard JavaScript-like expressions are supported: `${{ steps.build.status == 'success' ? '🚀' : '❌' }}`.
+---
 outputs:
   slack_message: ${{ steps.notify.output }}
 ```
@@ -274,8 +291,11 @@ Keystone supports several specialized step types:
 - `llm`: Prompt an agent and get structured or unstructured responses. Supports `schema` (JSON Schema) for structured output.
   - `allowClarification`: Boolean (default `false`). If `true`, allows the LLM to ask clarifying questions back to the user or suspend the workflow if no human is available.
   - `maxIterations`: Number (default `10`). Maximum number of tool-calling loops allowed for the agent.
+  - `allowInsecure`: Boolean (default `false`). Set `true` to allow risky tool execution.
+  - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow tools to access files outside of the current working directory.
 - `request`: Make HTTP requests (GET, POST, etc.).
 - `file`: Read, write, or append to files.
+  - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow reading/writing files outside of the current working directory.
 - `human`: Pause execution for manual confirmation or text input.
   - `inputType: confirm`: Simple Enter-to-continue prompt.
   - `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
@@ -352,6 +372,8 @@ You are a technical communications expert. Your goal is to take technical output
 Agents can be equipped with tools, which are essentially workflow steps they can choose to execute. You can define tools in the agent definition, or directly in an LLM step within a workflow.
+Tool arguments are passed to the tool's execution step via the `args` variable.
 **`.keystone/workflows/agents/developer.md`**
 ```markdown
 ---
@@ -363,6 +385,18 @@ tools:
       id: list-files-tool
       type: shell
       run: ls -F
+  - name: read_file
+    description: Read a specific file
+    parameters:
+      type: object
+      properties:
+        path: { type: string }
+      required: [path]
+    execution:
+      id: read-file-tool
+      type: file
+      op: read
+      path: ${{ args.path }}
 ---
 You are a software developer. You can use tools to explore the codebase.
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "keystone-cli",
-  "version": "0.6.0",
+  "version": "0.7.0",
   "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
   "type": "module",
   "bin": {

package/src/cli.ts CHANGED Viewed

@@ -9,7 +9,7 @@ import architectAgent from './templates/agents/keystone-architect.md' with { typ
 // Default templates
 import scaffoldWorkflow from './templates/scaffold-feature.yaml' with { type: 'text' };
-import { WorkflowDb } from './db/workflow-db.ts';
+import { WorkflowDb, type WorkflowRun } from './db/workflow-db.ts';
 import { WorkflowParser } from './parser/workflow-parser.ts';
 import { ConfigLoader } from './utils/config-loader.ts';
 import { ConsoleLogger } from './utils/logger.ts';
@@ -279,7 +279,79 @@ program
     }
   });
-// ... (optimize command remains here) ...
+// ===== keystone workflows =====
+program
+  .command('workflows')
+  .description('List available workflows')
+  .action(() => {
+    const workflows = WorkflowRegistry.listWorkflows();
+    if (workflows.length === 0) {
+      console.log('No workflows found. Run "keystone init" to seed default workflows.');
+      return;
+    }
+    console.log('\n🏛️  Available Workflows:');
+    for (const w of workflows) {
+      console.log(`\n  ${w.name}`);
+      if (w.description) {
+        console.log(`    ${w.description}`);
+      }
+    }
+    console.log('');
+  });
+// ===== keystone optimize =====
+program
+  .command('optimize')
+  .description('Optimize a specific step in a workflow using iterative evaluation')
+  .argument('<workflow>', 'Workflow name or path to workflow file')
+  .requiredOption('-t, --target <step_id>', 'Target step ID to optimize')
+  .option('-n, --iterations <number>', 'Number of optimization iterations', '5')
+  .option('-i, --input <key=value...>', 'Input values for evaluation')
+  .action(async (workflowPath, options) => {
+    try {
+      const { OptimizationRunner } = await import('./runner/optimization-runner.ts');
+      const resolvedPath = WorkflowRegistry.resolvePath(workflowPath);
+      const workflow = WorkflowParser.loadWorkflow(resolvedPath);
+      // Parse inputs
+      const inputs: Record<string, unknown> = {};
+      if (options.input) {
+        for (const pair of options.input) {
+          const index = pair.indexOf('=');
+          if (index > 0) {
+            const key = pair.slice(0, index);
+            const value = pair.slice(index + 1);
+            try {
+              inputs[key] = JSON.parse(value);
+            } catch {
+              inputs[key] = value;
+            }
+          }
+        }
+      }
+      const runner = new OptimizationRunner(workflow, {
+        workflowPath: resolvedPath,
+        targetStepId: options.target,
+        iterations: Number.parseInt(options.iterations, 10),
+        inputs,
+      });
+      console.log('🏛️  Keystone Prompt Optimization');
+      const { bestPrompt, bestScore } = await runner.optimize();
+      console.log('\n✨ Optimization Complete!');
+      console.log(`🏆 Best Score: ${bestScore}/100`);
+      console.log('\nBest Prompt/Command:');
+      console.log(''.padEnd(80, '-'));
+      console.log(bestPrompt);
+      console.log(''.padEnd(80, '-'));
+    } catch (error) {
+      console.error('✗ Optimization failed:', error instanceof Error ? error.message : error);
+      process.exit(1);
+    }
+  });
 // ===== keystone resume =====
 program
@@ -347,40 +419,180 @@ program
     }
   });
-// ... (other commands) ...
-// ===== keystone maintenance =====
+// ===== keystone history =====
 program
-  .command('maintenance')
-  .description('Perform database maintenance (prune old runs and vacuum)')
-  .option('--days <days>', 'Delete runs older than this many days', '30')
+  .command('history')
+  .description('Show recent workflow runs')
+  .option('-l, --limit <number>', 'Limit the number of runs to show', '50')
   .action(async (options) => {
     try {
-      const days = Number.parseInt(options.days, 10);
-      if (Number.isNaN(days) || days < 0) {
-        console.error('✗ Invalid days value. Must be a positive number.');
-        process.exit(1);
+      const db = new WorkflowDb();
+      const limit = Number.parseInt(options.limit, 10);
+      const runs = await db.listRuns(limit);
+      db.close();
+      if (runs.length === 0) {
+        console.log('No workflow runs found.');
+        return;
       }
-      console.log('🧹 Starting maintenance...');
+      console.log('\n🏛️  Workflow Run History:');
+      console.log(''.padEnd(100, '-'));
+      console.log(
+        `${'ID'.padEnd(10)} ${'Workflow'.padEnd(25)} ${'Status'.padEnd(15)} ${'Started At'}`
+      );
+      console.log(''.padEnd(100, '-'));
+      for (const run of runs) {
+        const id = run.id.slice(0, 8);
+        const status = run.status;
+        const color =
+          status === 'success' ? '\x1b[32m' : status === 'failed' ? '\x1b[31m' : '\x1b[33m';
+        const reset = '\x1b[0m';
+        console.log(
+          `${id.padEnd(10)} ${run.workflow_name.padEnd(25)} ${color}${status.padEnd(
+            15
+          )}${reset} ${new Date(run.started_at).toLocaleString()}`
+        );
+      }
+      console.log('');
+    } catch (error) {
+      console.error('✗ Failed to list runs:', error instanceof Error ? error.message : error);
+      process.exit(1);
+    }
+  });
+// ===== keystone logs =====
+program
+  .command('logs')
+  .description('Show logs for a specific workflow run')
+  .argument('<run_id>', 'Run ID to show logs for')
+  .option('-v, --verbose', 'Show detailed step outputs')
+  .action(async (runId, options) => {
+    try {
       const db = new WorkflowDb();
+      const run = await db.getRun(runId);
-      console.log(`   Pruning runs older than ${days} days...`);
-      const deleted = await db.pruneRuns(days);
-      console.log(`   ✓ Deleted ${deleted} run(s)`);
+      if (!run) {
+        // Try searching by short ID
+        const allRuns = await db.listRuns(200);
+        const matching = allRuns.find((r) => r.id.startsWith(runId));
+        if (matching) {
+          const detailedRun = await db.getRun(matching.id);
+          if (detailedRun) {
+            await showRunLogs(detailedRun, db, !!options.verbose);
+            db.close();
+            return;
+          }
+        }
-      console.log('   Vacuuming database (reclaiming space)...');
-      await db.vacuum();
-      console.log('   ✓ Vacuum complete');
+        console.error(`✗ Run not found: ${runId}`);
+        db.close();
+        process.exit(1);
+      }
+      await showRunLogs(run, db, !!options.verbose);
       db.close();
-      console.log('\n✨ Maintenance completed successfully!');
     } catch (error) {
-      console.error('✗ Maintenance failed:', error instanceof Error ? error.message : error);
+      console.error('✗ Failed to show logs:', error instanceof Error ? error.message : error);
       process.exit(1);
     }
   });
+async function showRunLogs(run: WorkflowRun, db: WorkflowDb, verbose: boolean) {
+  console.log(`\n🏛️  Run: ${run.workflow_name} (${run.id})`);
+  console.log(`   Status: ${run.status}`);
+  console.log(`   Started: ${new Date(run.started_at).toLocaleString()}`);
+  if (run.completed_at) {
+    console.log(`   Completed: ${new Date(run.completed_at).toLocaleString()}`);
+  }
+  const steps = await db.getStepsByRun(run.id);
+  console.log(`\nSteps (${steps.length}):`);
+  console.log(''.padEnd(100, '-'));
+  for (const step of steps) {
+    const statusColor =
+      step.status === 'success' ? '\x1b[32m' : step.status === 'failed' ? '\x1b[31m' : '\x1b[33m';
+    const reset = '\x1b[0m';
+    let label = step.step_id;
+    if (step.iteration_index !== null) {
+      label += ` [${step.iteration_index}]`;
+    }
+    console.log(`${statusColor}${step.status.toUpperCase().padEnd(10)}${reset} ${label}`);
+    if (step.error) {
+      console.log(`           \x1b[31mError: ${step.error}\x1b[0m`);
+    }
+    if (verbose && step.output) {
+      try {
+        const output = JSON.parse(step.output);
+        console.log(
+          `           Output: ${JSON.stringify(output, null, 2).replace(/\n/g, '\n           ')}`
+        );
+      } catch {
+        console.log(`           Output: ${step.output}`);
+      }
+    }
+  }
+  if (run.outputs) {
+    console.log('\nFinal Outputs:');
+    try {
+      const parsed = JSON.parse(run.outputs);
+      console.log(JSON.stringify(parsed, null, 2));
+    } catch {
+      console.log(run.outputs);
+    }
+  }
+  if (run.error) {
+    console.log(`\n\x1b[31mWorkflow Error:\x1b[0m ${run.error}`);
+  }
+}
+// ===== keystone prune / maintenance =====
+async function performMaintenance(days: number) {
+  try {
+    console.log(`🧹 Starting maintenance (pruning runs older than ${days} days)...`);
+    const db = new WorkflowDb();
+    const count = await db.pruneRuns(days);
+    console.log(`   ✓ Pruned ${count} old run(s)`);
+    console.log('   Vacuuming database (reclaiming space)...');
+    await db.vacuum();
+    console.log('   ✓ Vacuum complete');
+    db.close();
+    console.log('\n✨ Maintenance completed successfully!');
+  } catch (error) {
+    console.error('✗ Maintenance failed:', error instanceof Error ? error.message : error);
+    process.exit(1);
+  }
+}
+program
+  .command('prune')
+  .description('Delete old workflow runs from the database (alias for maintenance)')
+  .option('--days <number>', 'Days to keep', '30')
+  .action(async (options) => {
+    const days = Number.parseInt(options.days, 10);
+    await performMaintenance(days);
+  });
+program
+  .command('maintenance')
+  .description('Perform database maintenance (prune old runs and vacuum)')
+  .option('--days <days>', 'Delete runs older than this many days', '30')
+  .action(async (options) => {
+    const days = Number.parseInt(options.days, 10);
+    await performMaintenance(days);
+  });
 // ===== keystone ui =====
 program
   .command('ui')

package/src/db/memory-db.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import type { Database } from 'bun:sqlite';
 import { randomUUID } from 'node:crypto';
+import { existsSync, mkdirSync } from 'node:fs';
+import { dirname } from 'node:path';
 import * as sqliteVec from 'sqlite-vec';
 import './sqlite-setup.ts';
@@ -22,6 +24,10 @@ export class MemoryDb {
       this.db = cached.db;
     } else {
       const { Database } = require('bun:sqlite');
+      const dir = dirname(dbPath);
+      if (!existsSync(dir)) {
+        mkdirSync(dir, { recursive: true });
+      }
       this.db = new Database(dbPath, { create: true });
       // Load sqlite-vec extension

package/src/db/sqlite-setup.test.ts ADDED Viewed

@@ -0,0 +1,47 @@
+import { afterEach, describe, expect, it, mock, spyOn } from 'bun:test';
+import type { Logger } from '../utils/logger';
+import { setupSqlite } from './sqlite-setup';
+describe('setupSqlite', () => {
+  const originalPlatform = process.platform;
+  afterEach(() => {
+    Object.defineProperty(process, 'platform', {
+      value: originalPlatform,
+    });
+  });
+  it('does nothing on non-darwin platforms', () => {
+    Object.defineProperty(process, 'platform', { value: 'linux' });
+    const logger: Logger = {
+      log: mock(() => {}),
+      warn: mock(() => {}),
+      error: mock(() => {}),
+      info: mock(() => {}),
+    };
+    setupSqlite(logger);
+    expect(logger.log).not.toHaveBeenCalled();
+    expect(logger.warn).not.toHaveBeenCalled();
+  });
+  it('logs warning if no custom sqlite found on darwin', () => {
+    Object.defineProperty(process, 'platform', { value: 'darwin' });
+    const logger: Logger = {
+      log: mock(() => {}),
+      warn: mock(() => {}),
+      error: mock(() => {}),
+      info: mock(() => {}),
+    };
+    // Mock Bun.spawnSync for brew
+    const spawnSpy = spyOn(Bun, 'spawnSync').mockImplementation(
+      () => ({ success: false }) as unknown as ReturnType<typeof Bun.spawnSync>
+    );
+    try {
+      setupSqlite(logger);
+    } finally {
+      spawnSpy.mockRestore();
+    }
+  });
+});

package/src/db/workflow-db.ts CHANGED Viewed

@@ -1,4 +1,6 @@
 import { Database } from 'bun:sqlite';
+import { existsSync, mkdirSync } from 'node:fs';
+import { dirname } from 'node:path';
 import './sqlite-setup.ts';
 import {
   StepStatus as StepStatusConst,
@@ -40,6 +42,10 @@ export class WorkflowDb {
   private db: Database;
   constructor(public readonly dbPath = '.keystone/state.db') {
+    const dir = dirname(dbPath);
+    if (!existsSync(dir)) {
+      mkdirSync(dir, { recursive: true });
+    }
     this.db = new Database(dbPath, { create: true });
     this.db.exec('PRAGMA journal_mode = WAL;'); // Write-ahead logging
     this.db.exec('PRAGMA foreign_keys = ON;'); // Enable foreign key enforcement

package/src/expression/evaluator.ts CHANGED Viewed

@@ -29,6 +29,7 @@ export interface ExpressionContext {
   secrets?: Record<string, string>;
   steps?: Record<string, { output?: unknown; outputs?: Record<string, unknown>; status?: string }>;
   item?: unknown;
+  args?: unknown;
   index?: number;
   env?: Record<string, string>;
   output?: unknown;
@@ -295,6 +296,7 @@ export class ExpressionEvaluator {
           secrets: context.secrets || {},
           steps: context.steps || {},
           item: context.item,
+          args: context.args,
           index: context.index,
           env: context.env || {},
           stdout: contextAsRecord.stdout, // For transform expressions

package/src/parser/schema.ts CHANGED Viewed

@@ -95,6 +95,9 @@ const LlmStepSchema = BaseStepSchema.extend({
       ])
     )
     .optional(),
+  useStandardTools: z.boolean().optional(),
+  allowOutsideCwd: z.boolean().optional(),
+  allowInsecure: z.boolean().optional(),
 });
 const WorkflowStepSchema = BaseStepSchema.extend({