keystone-cli 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "keystone-cli",
3
- "version": "2.1.0",
3
+ "version": "2.1.2",
4
4
  "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.ts CHANGED
@@ -1522,7 +1522,7 @@ _keystone() {
1522
1522
  case $words[1] in
1523
1523
  run)
1524
1524
  _arguments \\
1525
- '(-i --input)'{-i,--input}'[Input values]:key=value' \\
1525
+ '(-i --input)'{-i,--input}'[Input values]:key-value pair:_files' \
1526
1526
  ':workflow:__keystone_workflows'
1527
1527
  ;;
1528
1528
  graph)
@@ -1536,7 +1536,7 @@ _keystone() {
1536
1536
  ;;
1537
1537
  resume)
1538
1538
  _arguments \\
1539
- '(-i --input)'{-i,--input}'[Input values]:key=value' \\
1539
+ '(-i --input)'{-i,--input}'[Input values]:key-value pair:_files' \
1540
1540
  ':run_id:__keystone_runs'
1541
1541
  ;;
1542
1542
  rerun)
@@ -148,7 +148,7 @@ export const BaseStepSchema = z.object({
148
148
  const ShellStepSchema = BaseStepSchema.extend({
149
149
  type: z.literal('shell'),
150
150
  run: z.string().optional(),
151
- args: z.array(z.string()).optional(),
151
+ args: z.array(z.string()).min(1).optional(),
152
152
  dir: z.string().optional(),
153
153
  env: z.record(z.string()).optional(),
154
154
  allowOutsideCwd: z.boolean().optional(),
@@ -1,4 +1,6 @@
1
+ import * as child_process from 'node:child_process';
1
2
  import * as fs from 'node:fs';
3
+ import * as os from 'node:os';
2
4
  import * as path from 'node:path';
3
5
  import type { ExpressionContext } from '../../expression/evaluator.ts';
4
6
  import { ExpressionEvaluator } from '../../expression/evaluator.ts';
@@ -115,60 +117,29 @@ export function parseUnifiedDiff(patch: string): UnifiedDiff {
115
117
  export function applyUnifiedDiff(content: string, patch: string, targetPath: string): string {
116
118
  // Try using system `patch` command first as it's more robust
117
119
  try {
118
- const { spawnSync } = require('node:child_process');
119
-
120
- // Check if patch is available (quick check)
121
- // We assume standard unix `patch` or compatible.
122
- // writing content to temp file and patch to temp file?
123
- // actually, we can pipe to stdin.
124
- // echo content | patch -o output
125
- // But patch usually works on files.
126
-
127
- // Since we are operating on in-memory strings (content), using `patch` binary requires tmp files.
128
- // This might be slow for many small files.
129
- // BUT the robustness is worth it.
130
-
131
- const fs = require('node:fs');
132
- const os = require('node:os');
133
- const path = require('node:path');
134
-
135
120
  // Create temp dir
136
121
  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'keystone-patch-'));
137
122
  const tmpSrc = path.join(tmpDir, 'source');
138
123
  const tmpPatch = path.join(tmpDir, 'changes.patch');
124
+ const tmpResult = path.join(tmpDir, 'result');
139
125
 
140
126
  try {
141
127
  fs.writeFileSync(tmpSrc, content);
142
128
  fs.writeFileSync(tmpPatch, patch);
143
129
 
144
- // Run patch: patch -p1 -i changes.patch -o output (if headers have paths)
145
- // Or just patch tmpSrc < changes.patch?
146
- // Unified diffs usually expect paths.
147
- // If we force it...
148
- // `patch` utility is tricky with paths.
149
- // LLM generated diffs might have /dev/null or a/b paths.
150
-
151
- // Let's try `git apply` if inside a git repo?
152
- // No, we might not be in a git repo.
153
-
154
- // Let's stick to the JS Custom Parser BUT make it more lenient/robust as per user request?
155
- // User said: "rely on the system's patch or git apply"
156
-
157
- // Let's try `patch -u -l --fuzz=2 -i patchfile srcfile -o outfile`
158
- const result = spawnSync(
130
+ // Try `patch -u -l --fuzz=2 -i patchfile srcfile -o outfile`
131
+ const result = child_process.spawnSync(
159
132
  'patch',
160
- ['-u', '-l', '--fuzz=2', '-i', tmpPatch, tmpSrc, '-o', path.join(tmpDir, 'result')],
133
+ ['-u', '-l', '--fuzz=2', '-i', tmpPatch, tmpSrc, '-o', tmpResult],
161
134
  {
162
135
  encoding: 'utf-8',
163
136
  stdio: 'pipe',
164
137
  }
165
138
  );
166
139
 
167
- if (result.status === 0 && fs.existsSync(path.join(tmpDir, 'result'))) {
168
- return fs.readFileSync(path.join(tmpDir, 'result'), 'utf-8');
140
+ if (result.status === 0 && fs.existsSync(tmpResult)) {
141
+ return fs.readFileSync(tmpResult, 'utf-8');
169
142
  }
170
- } catch (e) {
171
- // ignore
172
143
  } finally {
173
144
  // cleanup
174
145
  try {
@@ -176,7 +147,7 @@ export function applyUnifiedDiff(content: string, patch: string, targetPath: str
176
147
  } catch {}
177
148
  }
178
149
  } catch (e) {
179
- // ignore
150
+ // Ignore errors and fallback to JS implementation
180
151
  }
181
152
 
182
153
  // Fallback to JS implementation
@@ -73,24 +73,6 @@ export class ToolManager {
73
73
 
74
74
  // 2. Step Tools & Standard Tools
75
75
  const standardToolsRecord = STANDARD_TOOLS as any; // Handle index signature issue
76
- const extraTools = [
77
- ...(step.tools || []),
78
- ...(step.useStandardTools ? Object.values(standardToolsRecord) : []),
79
- ];
80
-
81
- // Logic to merge standard tools correctly:
82
- // If useStandardTools is true, we want all standard tools.
83
- // But the loop above iterates over step.tools (definitions) + values?
84
- // In original code: const extraTools = [...(step.tools || []), ...(step.useStandardTools ? STANDARD_TOOLS : [])];
85
- // Wait, STANDARD_TOOLS is an object, not array.
86
- // Original code issue: `step.useStandardTools ? STANDARD_TOOLS : []` -> if STANDARD_TOOLS is object, iterate?
87
- // In original code: `for (const tool of extraTools)`
88
- // If STANDARD_TOOLS is object, it is NOT iterable.
89
- // The original code probably relied on `STANDARD_TOOLS` being iterable or `Object.values` was intended?
90
- // Actually, `STANDARD_TOOLS` import in `llm-executor` might be different?
91
- // No, strictly it probably failed if `useStandardTools` was true unless `STANDARD_TOOLS` is array-like.
92
- // Let's assume `STANDARD_TOOLS` is a Record.
93
- // I will iterate properly.
94
76
 
95
77
  const toolsToRegister: any[] = [...(step.tools || [])];
96
78
  if (step.useStandardTools === true) {
@@ -130,6 +130,36 @@ function mapToCoreMessages(messages: LLMMessage[]): any[] {
130
130
  return coreMessages;
131
131
  }
132
132
 
133
+ // --- Helper Functions ---
134
+
135
+ /**
136
+ * Prunes the message history to the last N messages, ensuring that tool calls and tool results
137
+ * are kept together.
138
+ */
139
+ export function pruneMessages(messages: LLMMessage[], maxHistory: number): LLMMessage[] {
140
+ if (messages.length <= maxHistory) {
141
+ return messages;
142
+ }
143
+
144
+ let startIndex = messages.length - maxHistory;
145
+
146
+ // Loop to backtrack if we landed on a tool message
147
+ while (startIndex > 0 && messages[startIndex].role === 'tool') {
148
+ startIndex--;
149
+ }
150
+
151
+ // Check if we landed on a valid parent (Assistant with tool_calls)
152
+ const candidate = messages[startIndex];
153
+ if (candidate.role === 'assistant' && candidate.tool_calls && candidate.tool_calls.length > 0) {
154
+ // Found the parent, include it and everything after
155
+ return messages.slice(startIndex);
156
+ }
157
+
158
+ // Fallback to naive slicing if we can't find a clean parent connection
159
+ // (This matches current behavior for edge cases, preventing regressions in weird states)
160
+ return messages.slice(messages.length - maxHistory);
161
+ }
162
+
133
163
  // --- Main Execution Logic ---
134
164
 
135
165
  export async function executeLlmStep(
@@ -255,11 +285,11 @@ export async function executeLlmStep(
255
285
  // Enforce maxMessageHistory to preventing context window exhaustion
256
286
  let messagesForTurn = currentMessages;
257
287
  if (step.maxMessageHistory && currentMessages.length > step.maxMessageHistory) {
258
- // Keep the last N messages
259
- // Note: This naive slicing might cut off a tool_call that corresponds to a tool_result
260
- // but robust models should handle it or we accept the degradation for stability.
261
- messagesForTurn = currentMessages.slice(-step.maxMessageHistory);
262
- logger.debug(` ✂️ Pruned context to last ${step.maxMessageHistory} messages`);
288
+ // Keep the last N messages (with robust pruning to keep tool pairs together)
289
+ messagesForTurn = pruneMessages(currentMessages, step.maxMessageHistory);
290
+ logger.debug(
291
+ ` ✂️ Pruned context to last ${messagesForTurn.length} messages (maxHistory=${step.maxMessageHistory})`
292
+ );
263
293
  }
264
294
 
265
295
  const coreMessages = mapToCoreMessages(messagesForTurn);
@@ -44,6 +44,9 @@ export async function executeShellStep(
44
44
  abortSignal?: AbortSignal
45
45
  ): Promise<StepResult> {
46
46
  if (step.args) {
47
+ if (step.args.length === 0) {
48
+ throw new Error('Shell step args must contain at least one element');
49
+ }
47
50
  // args are inherently safe from shell injection as they skip the shell
48
51
  // and pass the array directly to the OS via Bun.spawn.
49
52
 
@@ -56,7 +59,15 @@ export async function executeShellStep(
56
59
  };
57
60
  }
58
61
 
59
- const result = await executeShellArgs(step.args, context, logger, abortSignal, step.dir);
62
+ const result = await executeShellArgs(
63
+ step.args,
64
+ context,
65
+ logger,
66
+ abortSignal,
67
+ step.dir,
68
+ step.env,
69
+ step.allowOutsideCwd
70
+ );
60
71
  return formatShellResult(result, logger);
61
72
  }
62
73
 
@@ -417,11 +428,43 @@ export async function executeShellArgs(
417
428
  context: ExpressionContext,
418
429
  logger: Logger = new ConsoleLogger(),
419
430
  abortSignal?: AbortSignal,
420
- dir?: string
431
+ dir?: string,
432
+ stepEnv?: Record<string, string>,
433
+ allowOutsideCwd?: boolean
421
434
  ): Promise<ShellResult> {
435
+ if (argsTemplates.length === 0) {
436
+ throw new Error('Shell args must contain at least one element');
437
+ }
422
438
  const args = argsTemplates.map((t) => ExpressionEvaluator.evaluateString(t, context));
423
439
  const cwd = dir ? ExpressionEvaluator.evaluateString(dir, context) : undefined;
440
+ if (cwd) {
441
+ PathResolver.assertWithinCwd(cwd, allowOutsideCwd, 'Directory');
442
+ }
443
+
444
+ // Security Check: Enforce Denylist for direct args execution
445
+ const config = ConfigLoader.load();
446
+ if (config.engines?.denylist && config.engines.denylist.length > 0) {
447
+ const firstArg = args[0];
448
+ if (firstArg) {
449
+ let bin = firstArg;
450
+ if (bin.includes('/')) {
451
+ const parts = bin.split(/[/\\]/);
452
+ bin = parts[parts.length - 1];
453
+ }
454
+ if (config.engines.denylist.includes(bin)) {
455
+ throw new Error(
456
+ `Security Error: Command "${bin}" is in the denylist and cannot be executed.`
457
+ );
458
+ }
459
+ }
460
+ }
461
+
424
462
  const env: Record<string, string> = context.env ? { ...context.env } : {};
463
+ if (stepEnv) {
464
+ for (const [key, value] of Object.entries(stepEnv)) {
465
+ env[key] = ExpressionEvaluator.evaluateString(value, context);
466
+ }
467
+ }
425
468
  const hostEnv = filterSensitiveEnv(Bun.env);
426
469
  const mergedEnv = { ...hostEnv, ...env };
427
470
  const maxOutputBytes = LIMITS.MAX_PROCESS_OUTPUT_BYTES;
@@ -86,13 +86,13 @@ describe('Workflow Memoization (Auto-Hashing)', () => {
86
86
 
87
87
  // We can check if `executeLlmStep` was called.
88
88
  let called = false;
89
- const trackingExecute = async (s: any, c: any) => {
89
+ // Match signature of executeLlmStep (at least the required args)
90
+ const trackingExecute = async (s: any, c: any, _execFn: any, ..._args: any[]) => {
90
91
  called = true;
91
92
  return mockExecuteLlmStep(s, c);
92
93
  };
93
94
 
94
95
  // Override the executor for runner2 to track calls
95
- // @ts-ignore - hacking private property or constructor option
96
96
  // Actually we passed it in constructor option.
97
97
  const runner2Tracked = new WorkflowRunner(workflow, {
98
98
  dbPath,
@@ -1,9 +1,17 @@
1
1
  import { describe, expect, it } from 'bun:test';
2
+ import { realpathSync } from 'node:fs';
3
+ import { tmpdir } from 'node:os';
4
+ import { basename, resolve as resolvePath, sep } from 'node:path';
2
5
  import type { ExpressionContext } from '../expression/evaluator';
6
+ import { ConfigSchema } from '../parser/config-schema';
3
7
  import type { ShellStep } from '../parser/schema';
4
- import { escapeShellArg, executeShell } from './executors/shell-executor.ts';
8
+ import { ConfigLoader } from '../utils/config-loader';
9
+ import { ConsoleLogger } from '../utils/logger';
10
+ import { escapeShellArg, executeShell, executeShellStep } from './executors/shell-executor.ts';
5
11
 
6
12
  describe('shell-executor', () => {
13
+ const logger = new ConsoleLogger();
14
+
7
15
  describe('escapeShellArg', () => {
8
16
  it('should wrap in single quotes', () => {
9
17
  expect(escapeShellArg('hello')).toBe("'hello'");
@@ -174,4 +182,102 @@ describe('shell-executor', () => {
174
182
  expect(result.stdout.trim()).toBe('match');
175
183
  });
176
184
  });
185
+
186
+ describe('executeShellStep (args)', () => {
187
+ const context: ExpressionContext = {
188
+ inputs: {},
189
+ steps: {},
190
+ env: {},
191
+ };
192
+
193
+ it('should reject empty args', async () => {
194
+ const step: ShellStep = {
195
+ id: 'test',
196
+ type: 'shell',
197
+ needs: [],
198
+ args: [],
199
+ };
200
+
201
+ await expect(executeShellStep(step, context, logger)).rejects.toThrow(
202
+ /args must contain at least one element/
203
+ );
204
+ });
205
+
206
+ it('should apply step env for args execution', async () => {
207
+ const bunPath = process.execPath;
208
+ const step: ShellStep = {
209
+ id: 'test',
210
+ type: 'shell',
211
+ needs: [],
212
+ args: [bunPath, '-e', 'console.log(process.env.TEST_VAR ?? "")'],
213
+ env: { TEST_VAR: 'args-env' },
214
+ };
215
+
216
+ const result = await executeShellStep(step, context, logger);
217
+ expect(result.output?.stdout?.trim()).toBe('args-env');
218
+ });
219
+
220
+ it('should enforce denylist for args execution', async () => {
221
+ const bunPath = process.execPath;
222
+ const denied = basename(bunPath);
223
+
224
+ ConfigLoader.setConfig(
225
+ ConfigSchema.parse({
226
+ engines: { denylist: [denied] },
227
+ })
228
+ );
229
+
230
+ try {
231
+ const step: ShellStep = {
232
+ id: 'test',
233
+ type: 'shell',
234
+ needs: [],
235
+ args: [bunPath, '-e', 'console.log("nope")'],
236
+ };
237
+
238
+ await expect(executeShellStep(step, context, logger)).rejects.toThrow(/denylist/);
239
+ } finally {
240
+ ConfigLoader.clear();
241
+ }
242
+ });
243
+
244
+ it('should enforce allowOutsideCwd for args execution', async () => {
245
+ const bunPath = process.execPath;
246
+ const cwd = resolvePath(process.cwd());
247
+ let outsideDir = resolvePath(tmpdir());
248
+
249
+ if (outsideDir.startsWith(`${cwd}${sep}`)) {
250
+ const parent = resolvePath(cwd, '..');
251
+ if (parent !== cwd) {
252
+ outsideDir = parent;
253
+ }
254
+ }
255
+
256
+ if (outsideDir === cwd) {
257
+ return;
258
+ }
259
+
260
+ const step: ShellStep = {
261
+ id: 'test',
262
+ type: 'shell',
263
+ needs: [],
264
+ args: [bunPath, '-e', 'console.log(process.cwd())'],
265
+ dir: outsideDir,
266
+ };
267
+
268
+ await expect(executeShellStep(step, context, logger)).rejects.toThrow(
269
+ /outside the project directory/
270
+ );
271
+
272
+ const allowedStep: ShellStep = {
273
+ ...step,
274
+ allowOutsideCwd: true,
275
+ };
276
+
277
+ const result = await executeShellStep(allowedStep, context, logger);
278
+ const resolvedOutput = realpathSync(resolvePath(result.output?.stdout?.trim() || ''));
279
+ const resolvedOutside = realpathSync(outsideDir);
280
+ expect(resolvedOutput).toBe(resolvedOutside);
281
+ });
282
+ });
177
283
  });
@@ -295,18 +295,23 @@ export class WorkflowState {
295
295
  );
296
296
  }
297
297
  const mappedOutputs = isLargeDataset ? {} : ForeachExecutor.aggregateOutputs(outputs);
298
+
299
+ // If the DB says the parent is RUNNING/PENDING but we have all items successfully completed,
300
+ // trust the derived status to prevent re-execution.
301
+ let finalStatus = mainExec.status as StepStatusType;
302
+ if (
303
+ allSuccess &&
304
+ hasAllItems &&
305
+ finalStatus !== StepStatus.SUCCESS &&
306
+ finalStatus !== StepStatus.SKIPPED
307
+ ) {
308
+ finalStatus = StepStatus.SUCCESS;
309
+ }
310
+
298
311
  this.stepContexts.set(stepId, {
299
312
  output: isLargeDataset ? [] : outputs,
300
313
  outputs: mappedOutputs,
301
- status: mainExec.status as StepStatusType, // Trust the main status mostly? Or recompute?
302
- // If main status says STARTED but we have all items success, maybe we should trust our recomputation?
303
- // The original code sets status based on items.
304
- // But if mainExec exists and has a status, that's authoritative for the "Parent".
305
- // HOWEVER, if we are resuming, we might want to check if it matches reality.
306
- // Let's stick to original logic:
307
- // if (allSuccess && hasAllItems) status = SUCCESS...
308
- // But wait, if main status is FAILED, using FAILED is correct.
309
- // Let's mostly use the derived status for consistency in "incomplete" resumes.
314
+ status: finalStatus,
310
315
  items,
311
316
  foreachItems: persistedItems,
312
317
  } as ForeachStepContext);
@@ -1,12 +1,13 @@
1
1
  import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
2
2
  import * as path from 'node:path';
3
3
  import { join } from 'node:path';
4
- import { bundleAssets } from './assets.macro.ts' with { type: 'macro' };
5
4
 
6
5
  // These are bundled at build-time (macro). If macros are unavailable at runtime,
7
6
  // fall back to an empty set so local filesystem reads still work.
8
7
  const EMBEDDED_ASSETS = (() => {
9
8
  try {
9
+ // Dynamic import to avoid Bun's security restrictions in node_modules
10
+ const { bundleAssets } = require('./assets.macro.ts');
10
11
  return bundleAssets();
11
12
  } catch (e) {
12
13
  return {};
Binary file
Binary file
Binary file
File without changes