npm - keystone-cli - Versions diffs - 2.1.0 → 2.1.2 - Mend

keystone-cli 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +1 -1
package/src/cli.ts +2 -2
package/src/parser/schema.ts +1 -1
package/src/runner/executors/file-executor.ts +9 -38
package/src/runner/executors/llm/tool-manager.ts +0 -18
package/src/runner/executors/llm-executor.ts +35 -5
package/src/runner/executors/shell-executor.ts +45 -2
package/src/runner/memoization.test.ts +2 -2
package/src/runner/shell-executor.test.ts +107 -1
package/src/runner/workflow-state.ts +14 -9
package/src/utils/resource-loader.ts +2 -1
package/src/templates/state.db +0 -0
package/src/templates/state.db-shm +0 -0
package/src/templates/state.db-wal +0 -0
package/src/templates/workflow.db +0 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "keystone-cli",
-  "version": "2.1.0",
+  "version": "2.1.2",
   "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
   "type": "module",
   "bin": {

package/src/cli.ts CHANGED Viewed

@@ -1522,7 +1522,7 @@ _keystone() {
       case $words[1] in
         run)
           _arguments \\
-            '(-i --input)'{-i,--input}'[Input values]:key=value' \\
+            '(-i --input)'{-i,--input}'[Input values]:key-value pair:_files' \
             ':workflow:__keystone_workflows'
           ;;
         graph)
@@ -1536,7 +1536,7 @@ _keystone() {
           ;;
         resume)
           _arguments \\
-            '(-i --input)'{-i,--input}'[Input values]:key=value' \\
+            '(-i --input)'{-i,--input}'[Input values]:key-value pair:_files' \
             ':run_id:__keystone_runs'
           ;;
         rerun)

package/src/parser/schema.ts CHANGED Viewed

@@ -148,7 +148,7 @@ export const BaseStepSchema = z.object({
 const ShellStepSchema = BaseStepSchema.extend({
   type: z.literal('shell'),
   run: z.string().optional(),
-  args: z.array(z.string()).optional(),
+  args: z.array(z.string()).min(1).optional(),
   dir: z.string().optional(),
   env: z.record(z.string()).optional(),
   allowOutsideCwd: z.boolean().optional(),

package/src/runner/executors/file-executor.ts CHANGED Viewed

@@ -1,4 +1,6 @@
+import * as child_process from 'node:child_process';
 import * as fs from 'node:fs';
+import * as os from 'node:os';
 import * as path from 'node:path';
 import type { ExpressionContext } from '../../expression/evaluator.ts';
 import { ExpressionEvaluator } from '../../expression/evaluator.ts';
@@ -115,60 +117,29 @@ export function parseUnifiedDiff(patch: string): UnifiedDiff {
 export function applyUnifiedDiff(content: string, patch: string, targetPath: string): string {
   // Try using system `patch` command first as it's more robust
   try {
-    const { spawnSync } = require('node:child_process');
-    // Check if patch is available (quick check)
-    // We assume standard unix `patch` or compatible.
-    // writing content to temp file and patch to temp file?
-    // actually, we can pipe to stdin.
-    // echo content | patch -o output
-    // But patch usually works on files.
-    // Since we are operating on in-memory strings (content), using `patch` binary requires tmp files.
-    // This might be slow for many small files.
-    // BUT the robustness is worth it.
-    const fs = require('node:fs');
-    const os = require('node:os');
-    const path = require('node:path');
     // Create temp dir
     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'keystone-patch-'));
     const tmpSrc = path.join(tmpDir, 'source');
     const tmpPatch = path.join(tmpDir, 'changes.patch');
+    const tmpResult = path.join(tmpDir, 'result');
     try {
       fs.writeFileSync(tmpSrc, content);
       fs.writeFileSync(tmpPatch, patch);
-      // Run patch: patch -p1 -i changes.patch -o output (if headers have paths)
-      // Or just patch tmpSrc < changes.patch?
-      // Unified diffs usually expect paths.
-      // If we force it...
-      // `patch` utility is tricky with paths.
-      // LLM generated diffs might have /dev/null or a/b paths.
-      // Let's try `git apply` if inside a git repo?
-      // No, we might not be in a git repo.
-      // Let's stick to the JS Custom Parser BUT make it more lenient/robust as per user request?
-      // User said: "rely on the system's patch or git apply"
-      // Let's try `patch -u -l --fuzz=2 -i patchfile srcfile -o outfile`
-      const result = spawnSync(
+      // Try `patch -u -l --fuzz=2 -i patchfile srcfile -o outfile`
+      const result = child_process.spawnSync(
         'patch',
-        ['-u', '-l', '--fuzz=2', '-i', tmpPatch, tmpSrc, '-o', path.join(tmpDir, 'result')],
+        ['-u', '-l', '--fuzz=2', '-i', tmpPatch, tmpSrc, '-o', tmpResult],
         {
           encoding: 'utf-8',
           stdio: 'pipe',
         }
       );
-      if (result.status === 0 && fs.existsSync(path.join(tmpDir, 'result'))) {
-        return fs.readFileSync(path.join(tmpDir, 'result'), 'utf-8');
+      if (result.status === 0 && fs.existsSync(tmpResult)) {
+        return fs.readFileSync(tmpResult, 'utf-8');
       }
-    } catch (e) {
-      // ignore
     } finally {
       // cleanup
       try {
@@ -176,7 +147,7 @@ export function applyUnifiedDiff(content: string, patch: string, targetPath: str
       } catch {}
     }
   } catch (e) {
-    // ignore
+    // Ignore errors and fallback to JS implementation
   }
   // Fallback to JS implementation

package/src/runner/executors/llm/tool-manager.ts CHANGED Viewed

@@ -73,24 +73,6 @@ export class ToolManager {
     // 2. Step Tools & Standard Tools
     const standardToolsRecord = STANDARD_TOOLS as any; // Handle index signature issue
-    const extraTools = [
-      ...(step.tools || []),
-      ...(step.useStandardTools ? Object.values(standardToolsRecord) : []),
-    ];
-    // Logic to merge standard tools correctly:
-    // If useStandardTools is true, we want all standard tools.
-    // But the loop above iterates over step.tools (definitions) + values?
-    // In original code: const extraTools = [...(step.tools || []), ...(step.useStandardTools ? STANDARD_TOOLS : [])];
-    // Wait, STANDARD_TOOLS is an object, not array.
-    // Original code issue: `step.useStandardTools ? STANDARD_TOOLS : []` -> if STANDARD_TOOLS is object, iterate?
-    // In original code: `for (const tool of extraTools)`
-    // If STANDARD_TOOLS is object, it is NOT iterable.
-    // The original code probably relied on `STANDARD_TOOLS` being iterable or `Object.values` was intended?
-    // Actually, `STANDARD_TOOLS` import in `llm-executor` might be different?
-    // No, strictly it probably failed if `useStandardTools` was true unless `STANDARD_TOOLS` is array-like.
-    // Let's assume `STANDARD_TOOLS` is a Record.
-    // I will iterate properly.
     const toolsToRegister: any[] = [...(step.tools || [])];
     if (step.useStandardTools === true) {

package/src/runner/executors/llm-executor.ts CHANGED Viewed

@@ -130,6 +130,36 @@ function mapToCoreMessages(messages: LLMMessage[]): any[] {
   return coreMessages;
 }
+// --- Helper Functions ---
+/**
+ * Prunes the message history to the last N messages, ensuring that tool calls and tool results
+ * are kept together.
+ */
+export function pruneMessages(messages: LLMMessage[], maxHistory: number): LLMMessage[] {
+  if (messages.length <= maxHistory) {
+    return messages;
+  }
+  let startIndex = messages.length - maxHistory;
+  // Loop to backtrack if we landed on a tool message
+  while (startIndex > 0 && messages[startIndex].role === 'tool') {
+    startIndex--;
+  }
+  // Check if we landed on a valid parent (Assistant with tool_calls)
+  const candidate = messages[startIndex];
+  if (candidate.role === 'assistant' && candidate.tool_calls && candidate.tool_calls.length > 0) {
+    // Found the parent, include it and everything after
+    return messages.slice(startIndex);
+  }
+  // Fallback to naive slicing if we can't find a clean parent connection
+  // (This matches current behavior for edge cases, preventing regressions in weird states)
+  return messages.slice(messages.length - maxHistory);
+}
 // --- Main Execution Logic ---
 export async function executeLlmStep(
@@ -255,11 +285,11 @@ export async function executeLlmStep(
         // Enforce maxMessageHistory to preventing context window exhaustion
         let messagesForTurn = currentMessages;
         if (step.maxMessageHistory && currentMessages.length > step.maxMessageHistory) {
-          // Keep the last N messages
-          // Note: This naive slicing might cut off a tool_call that corresponds to a tool_result
-          // but robust models should handle it or we accept the degradation for stability.
-          messagesForTurn = currentMessages.slice(-step.maxMessageHistory);
-          logger.debug(`  ✂️ Pruned context to last ${step.maxMessageHistory} messages`);
+          // Keep the last N messages (with robust pruning to keep tool pairs together)
+          messagesForTurn = pruneMessages(currentMessages, step.maxMessageHistory);
+          logger.debug(
+            `  ✂️ Pruned context to last ${messagesForTurn.length} messages (maxHistory=${step.maxMessageHistory})`
+          );
         }
         const coreMessages = mapToCoreMessages(messagesForTurn);

package/src/runner/executors/shell-executor.ts CHANGED Viewed

@@ -44,6 +44,9 @@ export async function executeShellStep(
   abortSignal?: AbortSignal
 ): Promise<StepResult> {
   if (step.args) {
+    if (step.args.length === 0) {
+      throw new Error('Shell step args must contain at least one element');
+    }
     // args are inherently safe from shell injection as they skip the shell
     // and pass the array directly to the OS via Bun.spawn.
@@ -56,7 +59,15 @@ export async function executeShellStep(
       };
     }
-    const result = await executeShellArgs(step.args, context, logger, abortSignal, step.dir);
+    const result = await executeShellArgs(
+      step.args,
+      context,
+      logger,
+      abortSignal,
+      step.dir,
+      step.env,
+      step.allowOutsideCwd
+    );
     return formatShellResult(result, logger);
   }
@@ -417,11 +428,43 @@ export async function executeShellArgs(
   context: ExpressionContext,
   logger: Logger = new ConsoleLogger(),
   abortSignal?: AbortSignal,
-  dir?: string
+  dir?: string,
+  stepEnv?: Record<string, string>,
+  allowOutsideCwd?: boolean
 ): Promise<ShellResult> {
+  if (argsTemplates.length === 0) {
+    throw new Error('Shell args must contain at least one element');
+  }
   const args = argsTemplates.map((t) => ExpressionEvaluator.evaluateString(t, context));
   const cwd = dir ? ExpressionEvaluator.evaluateString(dir, context) : undefined;
+  if (cwd) {
+    PathResolver.assertWithinCwd(cwd, allowOutsideCwd, 'Directory');
+  }
+  // Security Check: Enforce Denylist for direct args execution
+  const config = ConfigLoader.load();
+  if (config.engines?.denylist && config.engines.denylist.length > 0) {
+    const firstArg = args[0];
+    if (firstArg) {
+      let bin = firstArg;
+      if (bin.includes('/')) {
+        const parts = bin.split(/[/\\]/);
+        bin = parts[parts.length - 1];
+      }
+      if (config.engines.denylist.includes(bin)) {
+        throw new Error(
+          `Security Error: Command "${bin}" is in the denylist and cannot be executed.`
+        );
+      }
+    }
+  }
   const env: Record<string, string> = context.env ? { ...context.env } : {};
+  if (stepEnv) {
+    for (const [key, value] of Object.entries(stepEnv)) {
+      env[key] = ExpressionEvaluator.evaluateString(value, context);
+    }
+  }
   const hostEnv = filterSensitiveEnv(Bun.env);
   const mergedEnv = { ...hostEnv, ...env };
   const maxOutputBytes = LIMITS.MAX_PROCESS_OUTPUT_BYTES;

package/src/runner/memoization.test.ts CHANGED Viewed

@@ -86,13 +86,13 @@ describe('Workflow Memoization (Auto-Hashing)', () => {
     // We can check if `executeLlmStep` was called.
     let called = false;
-    const trackingExecute = async (s: any, c: any) => {
+    // Match signature of executeLlmStep (at least the required args)
+    const trackingExecute = async (s: any, c: any, _execFn: any, ..._args: any[]) => {
       called = true;
       return mockExecuteLlmStep(s, c);
     };
     // Override the executor for runner2 to track calls
-    // @ts-ignore - hacking private property or constructor option
     // Actually we passed it in constructor option.
     const runner2Tracked = new WorkflowRunner(workflow, {
       dbPath,

package/src/runner/shell-executor.test.ts CHANGED Viewed

@@ -1,9 +1,17 @@
 import { describe, expect, it } from 'bun:test';
+import { realpathSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { basename, resolve as resolvePath, sep } from 'node:path';
 import type { ExpressionContext } from '../expression/evaluator';
+import { ConfigSchema } from '../parser/config-schema';
 import type { ShellStep } from '../parser/schema';
-import { escapeShellArg, executeShell } from './executors/shell-executor.ts';
+import { ConfigLoader } from '../utils/config-loader';
+import { ConsoleLogger } from '../utils/logger';
+import { escapeShellArg, executeShell, executeShellStep } from './executors/shell-executor.ts';
 describe('shell-executor', () => {
+  const logger = new ConsoleLogger();
   describe('escapeShellArg', () => {
     it('should wrap in single quotes', () => {
       expect(escapeShellArg('hello')).toBe("'hello'");
@@ -174,4 +182,102 @@ describe('shell-executor', () => {
       expect(result.stdout.trim()).toBe('match');
     });
   });
+  describe('executeShellStep (args)', () => {
+    const context: ExpressionContext = {
+      inputs: {},
+      steps: {},
+      env: {},
+    };
+    it('should reject empty args', async () => {
+      const step: ShellStep = {
+        id: 'test',
+        type: 'shell',
+        needs: [],
+        args: [],
+      };
+      await expect(executeShellStep(step, context, logger)).rejects.toThrow(
+        /args must contain at least one element/
+      );
+    });
+    it('should apply step env for args execution', async () => {
+      const bunPath = process.execPath;
+      const step: ShellStep = {
+        id: 'test',
+        type: 'shell',
+        needs: [],
+        args: [bunPath, '-e', 'console.log(process.env.TEST_VAR ?? "")'],
+        env: { TEST_VAR: 'args-env' },
+      };
+      const result = await executeShellStep(step, context, logger);
+      expect(result.output?.stdout?.trim()).toBe('args-env');
+    });
+    it('should enforce denylist for args execution', async () => {
+      const bunPath = process.execPath;
+      const denied = basename(bunPath);
+      ConfigLoader.setConfig(
+        ConfigSchema.parse({
+          engines: { denylist: [denied] },
+        })
+      );
+      try {
+        const step: ShellStep = {
+          id: 'test',
+          type: 'shell',
+          needs: [],
+          args: [bunPath, '-e', 'console.log("nope")'],
+        };
+        await expect(executeShellStep(step, context, logger)).rejects.toThrow(/denylist/);
+      } finally {
+        ConfigLoader.clear();
+      }
+    });
+    it('should enforce allowOutsideCwd for args execution', async () => {
+      const bunPath = process.execPath;
+      const cwd = resolvePath(process.cwd());
+      let outsideDir = resolvePath(tmpdir());
+      if (outsideDir.startsWith(`${cwd}${sep}`)) {
+        const parent = resolvePath(cwd, '..');
+        if (parent !== cwd) {
+          outsideDir = parent;
+        }
+      }
+      if (outsideDir === cwd) {
+        return;
+      }
+      const step: ShellStep = {
+        id: 'test',
+        type: 'shell',
+        needs: [],
+        args: [bunPath, '-e', 'console.log(process.cwd())'],
+        dir: outsideDir,
+      };
+      await expect(executeShellStep(step, context, logger)).rejects.toThrow(
+        /outside the project directory/
+      );
+      const allowedStep: ShellStep = {
+        ...step,
+        allowOutsideCwd: true,
+      };
+      const result = await executeShellStep(allowedStep, context, logger);
+      const resolvedOutput = realpathSync(resolvePath(result.output?.stdout?.trim() || ''));
+      const resolvedOutside = realpathSync(outsideDir);
+      expect(resolvedOutput).toBe(resolvedOutside);
+    });
+  });
 });

package/src/runner/workflow-state.ts CHANGED Viewed

@@ -295,18 +295,23 @@ export class WorkflowState {
             );
           }
           const mappedOutputs = isLargeDataset ? {} : ForeachExecutor.aggregateOutputs(outputs);
+          // If the DB says the parent is RUNNING/PENDING but we have all items successfully completed,
+          // trust the derived status to prevent re-execution.
+          let finalStatus = mainExec.status as StepStatusType;
+          if (
+            allSuccess &&
+            hasAllItems &&
+            finalStatus !== StepStatus.SUCCESS &&
+            finalStatus !== StepStatus.SKIPPED
+          ) {
+            finalStatus = StepStatus.SUCCESS;
+          }
           this.stepContexts.set(stepId, {
             output: isLargeDataset ? [] : outputs,
             outputs: mappedOutputs,
-            status: mainExec.status as StepStatusType, // Trust the main status mostly? Or recompute?
-            // If main status says STARTED but we have all items success, maybe we should trust our recomputation?
-            // The original code sets status based on items.
-            // But if mainExec exists and has a status, that's authoritative for the "Parent".
-            // HOWEVER, if we are resuming, we might want to check if it matches reality.
-            // Let's stick to original logic:
-            // if (allSuccess && hasAllItems) status = SUCCESS...
-            // But wait, if main status is FAILED, using FAILED is correct.
-            // Let's mostly use the derived status for consistency in "incomplete" resumes.
+            status: finalStatus,
             items,
             foreachItems: persistedItems,
           } as ForeachStepContext);

package/src/utils/resource-loader.ts CHANGED Viewed

@@ -1,12 +1,13 @@
 import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
 import * as path from 'node:path';
 import { join } from 'node:path';
-import { bundleAssets } from './assets.macro.ts' with { type: 'macro' };
 // These are bundled at build-time (macro). If macros are unavailable at runtime,
 // fall back to an empty set so local filesystem reads still work.
 const EMBEDDED_ASSETS = (() => {
   try {
+    // Dynamic import to avoid Bun's security restrictions in node_modules
+    const { bundleAssets } = require('./assets.macro.ts');
     return bundleAssets();
   } catch (e) {
     return {};

package/src/templates/state.db DELETED Viewed

Binary file

package/src/templates/state.db-shm DELETED Viewed

Binary file

package/src/templates/state.db-wal DELETED Viewed

Binary file

package/src/templates/workflow.db DELETED Viewed

File without changes