keystone-cli 2.0.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +30 -4
  2. package/package.json +17 -3
  3. package/src/cli.ts +3 -2
  4. package/src/commands/event.ts +9 -0
  5. package/src/commands/run.ts +17 -0
  6. package/src/db/dynamic-state-manager.ts +12 -9
  7. package/src/db/memory-db.test.ts +19 -1
  8. package/src/db/memory-db.ts +101 -22
  9. package/src/db/workflow-db.ts +181 -9
  10. package/src/expression/evaluator.ts +4 -1
  11. package/src/parser/schema.ts +2 -1
  12. package/src/runner/__test__/llm-test-setup.ts +43 -11
  13. package/src/runner/durable-timers.test.ts +1 -1
  14. package/src/runner/executors/dynamic-executor.ts +125 -88
  15. package/src/runner/executors/engine-executor.ts +10 -39
  16. package/src/runner/executors/file-executor.ts +38 -0
  17. package/src/runner/executors/foreach-executor.ts +170 -17
  18. package/src/runner/executors/human-executor.ts +18 -0
  19. package/src/runner/executors/llm/stream-handler.ts +103 -0
  20. package/src/runner/executors/llm/tool-manager.ts +342 -0
  21. package/src/runner/executors/llm-executor.ts +313 -550
  22. package/src/runner/executors/memory-executor.ts +41 -34
  23. package/src/runner/executors/shell-executor.ts +141 -54
  24. package/src/runner/executors/subworkflow-executor.ts +16 -0
  25. package/src/runner/executors/types.ts +3 -1
  26. package/src/runner/executors/verification_fixes.test.ts +46 -0
  27. package/src/runner/join-scheduling.test.ts +2 -1
  28. package/src/runner/llm-adapter.integration.test.ts +10 -5
  29. package/src/runner/llm-adapter.ts +46 -17
  30. package/src/runner/llm-clarification.test.ts +4 -1
  31. package/src/runner/llm-executor.test.ts +21 -7
  32. package/src/runner/mcp-client.ts +36 -2
  33. package/src/runner/mcp-server.ts +65 -36
  34. package/src/runner/memoization.test.ts +2 -2
  35. package/src/runner/recovery-security.test.ts +5 -2
  36. package/src/runner/reflexion.test.ts +6 -3
  37. package/src/runner/services/context-builder.ts +13 -4
  38. package/src/runner/services/workflow-validator.ts +2 -1
  39. package/src/runner/shell-executor.test.ts +107 -1
  40. package/src/runner/standard-tools-ast.test.ts +4 -2
  41. package/src/runner/standard-tools-execution.test.ts +14 -1
  42. package/src/runner/standard-tools-integration.test.ts +6 -0
  43. package/src/runner/standard-tools.ts +13 -10
  44. package/src/runner/step-executor.ts +2 -2
  45. package/src/runner/tool-integration.test.ts +4 -1
  46. package/src/runner/workflow-runner.test.ts +23 -12
  47. package/src/runner/workflow-runner.ts +174 -85
  48. package/src/runner/workflow-state.ts +186 -111
  49. package/src/ui/dashboard.tsx +17 -3
  50. package/src/utils/config-loader.ts +4 -0
  51. package/src/utils/constants.ts +4 -0
  52. package/src/utils/context-injector.test.ts +27 -27
  53. package/src/utils/context-injector.ts +68 -26
  54. package/src/utils/process-sandbox.ts +138 -148
  55. package/src/utils/redactor.ts +39 -9
  56. package/src/utils/resource-loader.ts +24 -19
  57. package/src/utils/sandbox.ts +6 -0
  58. package/src/utils/stream-utils.ts +58 -0
@@ -43,45 +43,52 @@ export async function executeMemoryStep(
43
43
 
44
44
  const memoryDb = memoryDbFromOptions || new MemoryDb('.keystone/memory.db', dimension);
45
45
 
46
- // Helper to get embedding using AI SDK
47
- const getEmbedding = async (text: string): Promise<number[]> => {
48
- const model = await getEmbeddingModel(modelName);
49
- const result = await embed({
50
- model,
51
- value: text,
52
- abortSignal,
53
- });
54
- return result.embedding;
55
- };
46
+ try {
47
+ // Helper to get embedding using AI SDK
48
+ const getEmbedding = async (text: string): Promise<number[]> => {
49
+ const model = await getEmbeddingModel(modelName);
50
+ const result = await embed({
51
+ model,
52
+ value: text,
53
+ abortSignal,
54
+ });
55
+ return result.embedding;
56
+ };
56
57
 
57
- switch (step.op) {
58
- case 'store': {
59
- const text = ExpressionEvaluator.evaluateString(step.text || '', context);
60
- if (!text) throw new Error('Text is required for memory store operation');
58
+ switch (step.op) {
59
+ case 'store': {
60
+ const text = ExpressionEvaluator.evaluateString(step.text || '', context);
61
+ if (!text) throw new Error('Text is required for memory store operation');
61
62
 
62
- const embedding = await getEmbedding(text);
63
- const metadata = step.metadata || {};
64
- const id = await memoryDb.store(text, embedding, metadata as Record<string, unknown>);
63
+ const embedding = await getEmbedding(text);
64
+ const metadata = step.metadata || {};
65
+ const id = await memoryDb.store(text, embedding, metadata as Record<string, unknown>);
65
66
 
66
- return {
67
- output: { id, status: 'stored' },
68
- status: 'success',
69
- };
70
- }
71
- case 'search': {
72
- const query = ExpressionEvaluator.evaluateString(step.query || '', context);
73
- if (!query) throw new Error('Query is required for memory search operation');
67
+ return {
68
+ output: { id, status: 'stored' },
69
+ status: 'success',
70
+ };
71
+ }
72
+ case 'search': {
73
+ const query = ExpressionEvaluator.evaluateString(step.query || '', context);
74
+ if (!query) throw new Error('Query is required for memory search operation');
74
75
 
75
- const embedding = await getEmbedding(query);
76
- const limit = step.limit || 5;
77
- const results = await memoryDb.search(embedding, limit);
76
+ const embedding = await getEmbedding(query);
77
+ const limit = step.limit || 5;
78
+ const results = await memoryDb.search(embedding, limit);
78
79
 
79
- return {
80
- output: results,
81
- status: 'success',
82
- };
80
+ return {
81
+ output: results,
82
+ status: 'success',
83
+ };
84
+ }
85
+ default:
86
+ throw new Error(`Unknown memory operation: ${(step as any).op}`);
87
+ }
88
+ } finally {
89
+ // Only close if we created it ourselves
90
+ if (!memoryDbFromOptions) {
91
+ memoryDb.close();
83
92
  }
84
- default:
85
- throw new Error(`Unknown memory operation: ${(step as any).op}`);
86
93
  }
87
94
  }
@@ -26,6 +26,7 @@
26
26
  import type { ExpressionContext } from '../../expression/evaluator.ts';
27
27
  import { ExpressionEvaluator } from '../../expression/evaluator.ts';
28
28
  import type { ShellStep } from '../../parser/schema.ts';
29
+ import { ConfigLoader } from '../../utils/config-loader.ts';
29
30
  import { LIMITS } from '../../utils/constants.ts';
30
31
  import { filterSensitiveEnv } from '../../utils/env-filter.ts';
31
32
  import { ConsoleLogger, type Logger } from '../../utils/logger.ts';
@@ -43,6 +44,9 @@ export async function executeShellStep(
43
44
  abortSignal?: AbortSignal
44
45
  ): Promise<StepResult> {
45
46
  if (step.args) {
47
+ if (step.args.length === 0) {
48
+ throw new Error('Shell step args must contain at least one element');
49
+ }
46
50
  // args are inherently safe from shell injection as they skip the shell
47
51
  // and pass the array directly to the OS via Bun.spawn.
48
52
 
@@ -55,7 +59,15 @@ export async function executeShellStep(
55
59
  };
56
60
  }
57
61
 
58
- const result = await executeShellArgs(step.args, context, logger, abortSignal, step.dir);
62
+ const result = await executeShellArgs(
63
+ step.args,
64
+ context,
65
+ logger,
66
+ abortSignal,
67
+ step.dir,
68
+ step.env,
69
+ step.allowOutsideCwd
70
+ );
59
71
  return formatShellResult(result, logger);
60
72
  }
61
73
 
@@ -146,6 +158,15 @@ function formatShellResult(result: ShellResult, logger: Logger): StepResult {
146
158
  */
147
159
  export function escapeShellArg(arg: unknown): string {
148
160
  const value = arg === null || arg === undefined ? '' : String(arg);
161
+
162
+ // Windows escaping (cmd.exe)
163
+ if (process.platform === 'win32') {
164
+ // Replace " with "" and wrap in double quotes
165
+ // This is the standard way to escape arguments for CRT-based programs in cmd
166
+ return `"${value.replace(/"/g, '""')}"`;
167
+ }
168
+
169
+ // POSIX escaping (sh)
149
170
  // Replace single quotes with '\'' (end quote, escaped quote, start quote)
150
171
  return `'${value.replace(/'/g, "'\\''")}'`;
151
172
  }
@@ -158,7 +179,7 @@ export interface ShellResult {
158
179
  stderrTruncated?: boolean;
159
180
  }
160
181
 
161
- const TRUNCATED_SUFFIX = '... [truncated output]';
182
+ import { TRUNCATED_SUFFIX, createOutputLimiter } from '../../utils/stream-utils.ts';
162
183
 
163
184
  async function readStreamWithLimit(
164
185
  stream: ReadableStream<Uint8Array> | null | undefined,
@@ -173,65 +194,40 @@ async function readStreamWithLimit(
173
194
  }
174
195
 
175
196
  const reader = stream.getReader();
176
- const decoder = new TextDecoder();
177
- let text = '';
178
- let bytesRead = 0;
197
+ const limiter = createOutputLimiter(maxBytes);
179
198
 
180
199
  while (true) {
181
200
  const { value, done } = await reader.read();
182
201
  if (done) break;
183
202
  if (!value) continue;
184
203
 
185
- if (bytesRead + value.byteLength > maxBytes) {
186
- const allowed = maxBytes - bytesRead;
187
- if (allowed > 0) {
188
- text += decoder.decode(value.slice(0, allowed), { stream: true });
189
- }
190
- text += decoder.decode();
204
+ limiter.append(Buffer.from(value));
205
+
206
+ if (limiter.truncated) {
191
207
  try {
192
208
  await reader.cancel();
193
209
  } catch {}
194
- return { text: `${text}${TRUNCATED_SUFFIX}`, truncated: true };
210
+ break;
195
211
  }
196
-
197
- bytesRead += value.byteLength;
198
- text += decoder.decode(value, { stream: true });
199
212
  }
200
213
 
201
- text += decoder.decode();
202
- return { text, truncated: false };
214
+ return { text: limiter.finalize(), truncated: limiter.truncated };
203
215
  }
204
216
 
205
217
  // Whitelist of allowed characters for secure shell command execution
206
- // Allows: Alphanumeric, space, and common safe punctuation (_ . / : @ , + - = ' " ! ~)
207
- // Blocks: Newlines (\n, \r), Pipes, redirects, subshells, variables ($), etc.
208
- const SAFE_SHELL_CHARS = /^[a-zA-Z0-9 _./:@,+=~'"!-]+$/;
218
+ // Allows: Alphanumeric, space, and common safe punctuation (_ . / : @ , + - =)
219
+ // Blocks: Quotes, Newlines, Pipes, redirects, subshells, variables, backslashes, etc.
220
+ const SAFE_SHELL_CHARS = /^[a-zA-Z0-9 _./:@,+=~"'-]+$/;
209
221
 
210
222
  export function detectShellInjectionRisk(rawCommand: string): boolean {
211
- // We scan the command to handle single quotes correctly.
212
- // Characters inside single quotes are considered escaped/literal and safe from shell injection.
213
- let inSingleQuote = false;
214
-
215
- for (let i = 0; i < rawCommand.length; i++) {
216
- const char = rawCommand[i];
217
-
218
- if (char === "'") {
219
- inSingleQuote = !inSingleQuote;
220
- continue;
221
- }
222
-
223
- // Outside single quotes, we enforce the strict whitelist
224
- if (!inSingleQuote) {
225
- if (!SAFE_SHELL_CHARS.test(char)) {
226
- return true;
227
- }
228
- }
229
- // Inside single quotes, everything is treated as a literal string by the shell,
230
- // so we don't need to block special characters.
231
- }
232
-
233
- // If we ended with an unclosed single quote, it's a syntax risk
234
- return inSingleQuote;
223
+ // We can safely ignore anything inside single quotes because our escape()
224
+ // function (which is the recommended way to interpolate) uses single quotes
225
+ // and correctly escapes nested single quotes as '\''.
226
+ // This regex matches '...' including correctly escaped internal single quotes.
227
+ const quotedRegex = /'([^']|'\\'')*'/g;
228
+ const stripped = rawCommand.replace(quotedRegex, "'QUOTED_STR'");
229
+
230
+ return !SAFE_SHELL_CHARS.test(stripped);
235
231
  }
236
232
 
237
233
  /**
@@ -256,13 +252,59 @@ export async function executeShell(
256
252
  if (!step.allowInsecure) {
257
253
  if (detectShellInjectionRisk(command)) {
258
254
  throw new Error(
259
- `Security Error: Command execution blocked.\nCommand: "${command.substring(0, 100)}${
255
+ `Security Error: Command execution blocked to prevent potential shell injection.\nCommand: "${command.substring(0, 100)}${
260
256
  command.length > 100 ? '...' : ''
261
- }"\nReason: Contains characters not in the strict whitelist (alphanumeric, whitespace, and _./:@,+=~-).\nThis protects against shell injection attacks.\nFix: either simplify your command or set 'allowInsecure: true' in your step definition if you trust the input.`
257
+ }"\nReason: Contains characters not in the strict whitelist (alphanumeric, whitespace, and _./:@,+=~-).\nThis protects against chaining malicious commands (e.g. '; rm -rf /'). It does NOT evaluate if the command itself is destructive.\nFix: either simplify your command or set 'allowInsecure: true' in your step definition if you trust the input.`
258
+ );
259
+ }
260
+
261
+ // Additional Check: Prevent Directory Traversal in Binary Path
262
+ // Even if it passes the whitelist, we don't want to allow 'cat ../../../etc/passwd'
263
+ // or executing '../../../../bin/malice'.
264
+ // We check for '..' characters which might indicate directory traversal.
265
+ if (command.includes('..') && (command.includes('/') || command.includes('\\'))) {
266
+ throw new Error(
267
+ `Security Error: Command blocked due to potential directory traversal ('..').\nCommand: "${command.substring(0, 100)}"\nTo allow relative paths outside the current directory, set 'allowInsecure: true'.`
262
268
  );
263
269
  }
264
270
  }
265
271
 
272
+ // Security Check: Enforce Denylist (e.g. rm, mkfs, etc.)
273
+ // We check this even if allowInsecure is true, because these are explicitly banned by policy.
274
+ const config = ConfigLoader.load();
275
+ if (config.engines?.denylist && config.engines.denylist.length > 0) {
276
+ // Robust parsing to get the command binary
277
+ // This handles:
278
+ // 1. Chained commands (e.g. "echo foo; rm -rf /")
279
+ // 2. Pre-command modifiers (e.g. "watch rm") - though difficult to do perfectly without a full shell parser,
280
+ // we can check for common dangerous patterns or just strictly check tokens.
281
+ //
282
+ // Strategy: Tokenize by shell delimiters (;, |, &, &&, ||, ``, $()) and check the first word of each segment.
283
+
284
+ // Split by command separators
285
+ const segments = command.split(/[;|&]|\$\(|\`|\r?\n/);
286
+
287
+ for (const segment of segments) {
288
+ if (!segment.trim()) continue;
289
+
290
+ // Get the first token of the segment
291
+ const tokens = segment.trim().split(/\s+/);
292
+ let bin = tokens[0];
293
+
294
+ // Handle path prefixes (e.g. /bin/rm -> rm)
295
+ if (bin.includes('/')) {
296
+ const parts = bin.split(/[/\\]/);
297
+ bin = parts[parts.length - 1];
298
+ }
299
+
300
+ if (config.engines.denylist.includes(bin)) {
301
+ throw new Error(
302
+ `Security Error: Command "${bin}" is in the denylist and cannot be executed.`
303
+ );
304
+ }
305
+ }
306
+ }
307
+
266
308
  // Evaluate environment variables
267
309
  const env: Record<string, string> = context.env ? { ...context.env } : {};
268
310
  if (step.env) {
@@ -300,10 +342,14 @@ export async function executeShell(
300
342
  let stderrTruncated = false;
301
343
  const maxOutputBytes = LIMITS.MAX_PROCESS_OUTPUT_BYTES;
302
344
 
303
- // Use 'sh -c' for everything to ensure consistent argument parsing
345
+ // Use 'sh -c' (POSIX) or 'cmd.exe /d /s /c' (Windows)
304
346
  // Security is guaranteed by the strict whitelist check above for allowInsecure: false
305
347
  // which prevents injection of metacharacters, quotes, escapes, etc.
306
- const proc = Bun.spawn(['sh', '-c', command], {
348
+ const isWindows = process.platform === 'win32';
349
+ const shellCommand = isWindows ? 'cmd.exe' : 'sh';
350
+ const shellArgs = isWindows ? ['/d', '/s', '/c'] : ['-c'];
351
+
352
+ const proc = Bun.spawn([shellCommand, ...shellArgs, command], {
307
353
  cwd: cwd || process.cwd(),
308
354
  env: mergedEnv,
309
355
  stdout: 'pipe',
@@ -323,9 +369,15 @@ export async function executeShell(
323
369
  const stdoutPromise = readStreamWithLimit(proc.stdout, maxOutputBytes);
324
370
  const stderrPromise = readStreamWithLimit(proc.stderr, maxOutputBytes);
325
371
 
326
- // Wait for exit
327
- exitCode = await proc.exited;
328
- const [stdoutResult, stderrResult] = await Promise.all([stdoutPromise, stderrPromise]);
372
+ // Wait for exit and streams simultaneously to prevent deadlocks
373
+ // (If the pipe fills up, the process blocks on write. If we await exit first, we never drain the pipe -> Deadlock)
374
+ const [exitResult, stdoutResult, stderrResult] = await Promise.all([
375
+ proc.exited,
376
+ stdoutPromise,
377
+ stderrPromise,
378
+ ]);
379
+
380
+ exitCode = exitResult;
329
381
 
330
382
  stdoutString = stdoutResult.text;
331
383
  stderrString = stderrResult.text;
@@ -376,11 +428,43 @@ export async function executeShellArgs(
376
428
  context: ExpressionContext,
377
429
  logger: Logger = new ConsoleLogger(),
378
430
  abortSignal?: AbortSignal,
379
- dir?: string
431
+ dir?: string,
432
+ stepEnv?: Record<string, string>,
433
+ allowOutsideCwd?: boolean
380
434
  ): Promise<ShellResult> {
435
+ if (argsTemplates.length === 0) {
436
+ throw new Error('Shell args must contain at least one element');
437
+ }
381
438
  const args = argsTemplates.map((t) => ExpressionEvaluator.evaluateString(t, context));
382
439
  const cwd = dir ? ExpressionEvaluator.evaluateString(dir, context) : undefined;
440
+ if (cwd) {
441
+ PathResolver.assertWithinCwd(cwd, allowOutsideCwd, 'Directory');
442
+ }
443
+
444
+ // Security Check: Enforce Denylist for direct args execution
445
+ const config = ConfigLoader.load();
446
+ if (config.engines?.denylist && config.engines.denylist.length > 0) {
447
+ const firstArg = args[0];
448
+ if (firstArg) {
449
+ let bin = firstArg;
450
+ if (bin.includes('/')) {
451
+ const parts = bin.split(/[/\\]/);
452
+ bin = parts[parts.length - 1];
453
+ }
454
+ if (config.engines.denylist.includes(bin)) {
455
+ throw new Error(
456
+ `Security Error: Command "${bin}" is in the denylist and cannot be executed.`
457
+ );
458
+ }
459
+ }
460
+ }
461
+
383
462
  const env: Record<string, string> = context.env ? { ...context.env } : {};
463
+ if (stepEnv) {
464
+ for (const [key, value] of Object.entries(stepEnv)) {
465
+ env[key] = ExpressionEvaluator.evaluateString(value, context);
466
+ }
467
+ }
384
468
  const hostEnv = filterSensitiveEnv(Bun.env);
385
469
  const mergedEnv = { ...hostEnv, ...env };
386
470
  const maxOutputBytes = LIMITS.MAX_PROCESS_OUTPUT_BYTES;
@@ -406,8 +490,11 @@ export async function executeShellArgs(
406
490
  const stdoutPromise = readStreamWithLimit(proc.stdout, maxOutputBytes);
407
491
  const stderrPromise = readStreamWithLimit(proc.stderr, maxOutputBytes);
408
492
 
409
- const exitCode = await proc.exited;
410
- const [stdoutResult, stderrResult] = await Promise.all([stdoutPromise, stderrPromise]);
493
+ const [exitCode, stdoutResult, stderrResult] = await Promise.all([
494
+ proc.exited,
495
+ stdoutPromise,
496
+ stderrPromise,
497
+ ]);
411
498
 
412
499
  if (abortSignal) {
413
500
  abortSignal.removeEventListener('abort', abortHandler);
@@ -35,6 +35,8 @@ export async function executeSubWorkflow(
35
35
  parentDepth: number;
36
36
  parentOptions: any;
37
37
  abortSignal?: AbortSignal;
38
+ stepExecutionId?: string;
39
+ parentDb?: any; // WorkflowDb
38
40
  }
39
41
  ): Promise<StepResult> {
40
42
  if (options.abortSignal?.aborted) {
@@ -57,6 +59,7 @@ export async function executeSubWorkflow(
57
59
  ...options.parentOptions,
58
60
  inputs,
59
61
  dbPath: options.parentDbPath,
62
+ db: options.parentDb, // Reuse existing DB connection
60
63
  logger: options.parentLogger,
61
64
  mcpManager: options.parentMcpManager,
62
65
  workflowDir: subWorkflowDir,
@@ -64,6 +67,19 @@ export async function executeSubWorkflow(
64
67
  signal: options.abortSignal,
65
68
  });
66
69
 
70
+ // Track sub-workflow run ID in parent step metadata for rollback safety
71
+ if (options.stepExecutionId && options.parentDb) {
72
+ try {
73
+ await options.parentDb.updateStepMetadata(options.stepExecutionId, {
74
+ __subRunId: subRunner.runId,
75
+ });
76
+ } catch (error) {
77
+ options.parentLogger.warn(
78
+ `Failed to store sub-workflow run ID in metadata: ${error instanceof Error ? error.message : String(error)}`
79
+ );
80
+ }
81
+ }
82
+
67
83
  try {
68
84
  const output = await subRunner.run();
69
85
 
@@ -46,7 +46,8 @@ export interface StepExecutorOptions {
46
46
  executeWorkflowFn?: (
47
47
  step: WorkflowStep,
48
48
  context: ExpressionContext,
49
- abortSignal?: AbortSignal
49
+ abortSignal?: AbortSignal,
50
+ stepExecutionId?: string
50
51
  ) => Promise<StepResult>;
51
52
  mcpManager?: MCPManager;
52
53
  db?: WorkflowDb;
@@ -62,6 +63,7 @@ export interface StepExecutorOptions {
62
63
  debug?: boolean;
63
64
  allowInsecure?: boolean;
64
65
  emitEvent?: (event: WorkflowEvent) => void;
66
+ depth?: number;
65
67
 
66
68
  executeStep?: (step: Step, context: ExpressionContext) => Promise<StepResult>; // To avoid circular dependency
67
69
  executeLlmStep?: typeof executeLlmStep;
@@ -0,0 +1,46 @@
1
+ import { describe, expect, test } from 'bun:test';
2
+ import { validateRemoteUrl } from '../mcp-client';
3
+ import { executeShell } from './shell-executor';
4
+
5
+ describe('Verification Fixes', () => {
6
+ describe('SSRF Protection (mcp-client)', () => {
7
+ test('validateRemoteUrl should throw on 127.0.0.1', async () => {
8
+ expect(validateRemoteUrl('https://127.0.0.1')).rejects.toThrow('SSRF Protection');
9
+ });
10
+
11
+ test('validateRemoteUrl should throw on localhost', async () => {
12
+ expect(validateRemoteUrl('https://localhost')).rejects.toThrow('SSRF Protection');
13
+ });
14
+
15
+ test('validateRemoteUrl should throw on metadata IP', async () => {
16
+ expect(validateRemoteUrl('https://169.254.169.254')).rejects.toThrow('SSRF Protection');
17
+ });
18
+ });
19
+
20
+ describe('Shell Path Traversal (shell-executor)', () => {
21
+ const mockContext = { env: {}, steps: {}, inputs: {}, envOverrides: {}, secrets: {} };
22
+
23
+ test('should block command with ".." and "/" in secure mode', async () => {
24
+ const step = {
25
+ id: 'test',
26
+ type: 'shell' as const,
27
+ run: 'cat ../secret.txt',
28
+ allowInsecure: false,
29
+ };
30
+ // It should throw BEFORE spawning
31
+ // The error message I added was "Directory Traversal" or similar
32
+ // Let's check the implementation: "Command blocked due to potential directory traversal"
33
+ await expect(executeShell(step, mockContext)).rejects.toThrow('Command blocked');
34
+ });
35
+
36
+ test('should block absolute path with ".." in secure mode', async () => {
37
+ const step = {
38
+ id: 'test',
39
+ type: 'shell' as const,
40
+ run: '/bin/ls ../',
41
+ allowInsecure: false,
42
+ };
43
+ await expect(executeShell(step, mockContext)).rejects.toThrow('Command blocked');
44
+ });
45
+ });
46
+ });
@@ -123,7 +123,7 @@ describe('Join Scheduling & Resume', () => {
123
123
  });
124
124
 
125
125
  it('should resume and retry a step that previously exhausted retries', async () => {
126
- const dbPath = 'test-resume-retry.db';
126
+ const dbPath = `test-resume-retry-${Date.now()}.db`;
127
127
  if (existsSync(dbPath)) rmSync(dbPath);
128
128
 
129
129
  const counterFile = `/tmp/keystone-test-resume-${Date.now()}.txt`;
@@ -177,6 +177,7 @@ describe('Join Scheduling & Resume', () => {
177
177
  // Verify it failed twice (initial + 1 retry)
178
178
  let val = await Bun.file(counterFile).text();
179
179
  expect(val.trim()).toBe('2');
180
+ await runner1.stop();
180
181
 
181
182
  // Now resume. It should try again (Run 3) and succeed.
182
183
  const runner2 = new WorkflowRunner(workflow, {
@@ -54,10 +54,13 @@ describe('LLM Adapter (AI SDK)', () => {
54
54
  model_mappings: {},
55
55
  } as any);
56
56
 
57
- // With shared setupLlmMocks, we expect 'mock' provider
57
+ // Mock the provider to return a callable function that returns a mock model
58
+ const mockProvider = (modelId: string) => mockLanguageModel;
59
+ spyOn(DynamicProviderRegistry, 'getProvider').mockResolvedValue(() => mockProvider);
60
+
58
61
  const model = (await getModel('model-name')) as any;
59
- expect(model.modelId).toBe('mock-model');
60
- expect(model.provider).toBe('mock');
62
+ expect(model.modelId).toBe('test-model');
63
+ expect(model.provider).toBe('test-provider');
61
64
  });
62
65
 
63
66
  it('should handle auth token retrieval for standard providers', async () => {
@@ -73,11 +76,13 @@ describe('LLM Adapter (AI SDK)', () => {
73
76
  model_mappings: {},
74
77
  } as any);
75
78
 
79
+ // Mock the provider to return a callable function
80
+ const mockProvider = (modelId: string) => mockLanguageModel;
81
+ spyOn(DynamicProviderRegistry, 'getProvider').mockResolvedValue(() => mockProvider);
76
82
  spyOn(ConfigLoader, 'getSecret').mockReturnValue('fake-token');
77
83
 
78
84
  const model = (await getModel('gpt-4')) as any;
79
- // With global mock, we mostly check it didn't throw and loaded the 'mock' provider
80
- expect(model.provider).toBe('mock');
85
+ expect(model.provider).toBe('test-provider');
81
86
  expect(ConfigLoader.getSecret).toHaveBeenCalledWith('OPENAI_API_KEY');
82
87
  });
83
88
  });
@@ -72,23 +72,30 @@ export type { LanguageModel, EmbeddingModel } from 'ai';
72
72
 
73
73
  const userRequire = createRequire(join(process.cwd(), 'package.json'));
74
74
 
75
- // Lazy-loaded global require to avoid blocking import time
75
+ // Lazy-loaded global require
76
76
  let globalRequire: NodeRequire | undefined;
77
- let globalRequireResolved = false;
78
-
79
- function getGlobalRequire(): NodeRequire | undefined {
80
- if (globalRequireResolved) {
77
+ let globalRequirePromise: Promise<NodeRequire | undefined> | null = null;
78
+
79
+ async function getGlobalRequire(): Promise<NodeRequire | undefined> {
80
+ if (globalRequire) return globalRequire;
81
+ if (globalRequirePromise) return globalRequirePromise;
82
+
83
+ globalRequirePromise = (async () => {
84
+ try {
85
+ const { exec } = await import('node:child_process');
86
+ const { promisify } = await import('node:util');
87
+ const execAsync = promisify(exec);
88
+
89
+ const { stdout } = await execAsync('npm root -g', { encoding: 'utf-8', timeout: 5000 });
90
+ const globalRoot = stdout.trim();
91
+ globalRequire = createRequire(join(globalRoot, 'package.json'));
92
+ } catch {
93
+ // Global npm root not found or command failed
94
+ }
81
95
  return globalRequire;
82
- }
83
- globalRequireResolved = true;
84
- try {
85
- const globalRoot = execSync('npm root -g', { encoding: 'utf-8' }).trim();
86
- globalRequire = createRequire(join(globalRoot, 'package.json'));
87
- } catch {
88
- // Global npm root not found - this is expected in some environments (e.g., containers, CI)
89
- // Global package resolution will be disabled silently
90
- }
91
- return globalRequire;
96
+ })();
97
+
98
+ return globalRequirePromise;
92
99
  }
93
100
 
94
101
  // Compatibility types for Keystone
@@ -157,14 +164,15 @@ export class DynamicProviderRegistry {
157
164
  let pkg: any;
158
165
  try {
159
166
  // Try local project first
160
- pkg = await import(config.package);
167
+ const localPath = userRequire.resolve(config.package);
168
+ pkg = await import(localPath);
161
169
  } catch {
162
170
  try {
163
171
  const pkgPath = userRequire.resolve(config.package);
164
172
  pkg = await import(pkgPath);
165
173
  } catch {
166
174
  // Try global if local resolution fails
167
- const globalReq = getGlobalRequire();
175
+ const globalReq = await getGlobalRequire();
168
176
  if (globalReq) {
169
177
  try {
170
178
  const globalPkgPath = globalReq.resolve(config.package);
@@ -218,8 +226,22 @@ export class DynamicProviderRegistry {
218
226
  return pkg.default;
219
227
  }
220
228
 
229
+ // Check for standard generic export names
230
+ if (typeof pkg.createProvider === 'function') {
231
+ DynamicProviderRegistry.loadedProviders.set(providerName, pkg.createProvider);
232
+ return pkg.createProvider;
233
+ }
234
+ if (typeof pkg.provider === 'function') {
235
+ DynamicProviderRegistry.loadedProviders.set(providerName, pkg.provider);
236
+ return pkg.provider;
237
+ }
238
+
221
239
  const firstFn = Object.values(pkg).find((v) => typeof v === 'function');
222
240
  if (firstFn) {
241
+ // Warn about loose resolution only if we really had to fall back this far
242
+ new ConsoleLogger().warn(
243
+ `[Keystone] Warning: Provider '${providerName}' resolution fell back to the first exported function found in '${config.package}'. This may be unstable.`
244
+ );
223
245
  DynamicProviderRegistry.loadedProviders.set(providerName, firstFn as any);
224
246
  return firstFn as any;
225
247
  }
@@ -307,6 +329,13 @@ export async function getModel(model: string): Promise<LanguageModel> {
307
329
 
308
330
  // AI SDK convention: provider(modelId)
309
331
  if (typeof provider === 'function') {
332
+ // Prefer explicit .chat() or .chatModel() if available to ensure correct protocol (Chat vs Completion)
333
+ if (typeof (provider as any).chat === 'function') {
334
+ return (provider as any).chat(resolvedModel);
335
+ }
336
+ if (typeof (provider as any).chatModel === 'function') {
337
+ return (provider as any).chatModel(resolvedModel);
338
+ }
310
339
  return (provider as any)(resolvedModel);
311
340
  }
312
341
 
@@ -14,6 +14,7 @@ import * as agentParser from '../parser/agent-parser';
14
14
  import type { Config } from '../parser/config-schema';
15
15
  import type { Agent, LlmStep, Step } from '../parser/schema';
16
16
  import { ConfigLoader } from '../utils/config-loader';
17
+ import * as llmAdapter from './llm-adapter';
17
18
  import type { LLMMessage } from './llm-adapter';
18
19
  import type { StepResult } from './step-executor';
19
20
 
@@ -28,10 +29,11 @@ let currentChatFn: (messages: any[], options?: any) => Promise<MockLLMResponse>;
28
29
  describe('LLM Clarification', () => {
29
30
  let resolveAgentPathSpy: ReturnType<typeof spyOn>;
30
31
  let parseAgentSpy: ReturnType<typeof spyOn>;
32
+ let getModelSpy: ReturnType<typeof spyOn>;
31
33
 
32
34
  beforeAll(async () => {
33
35
  setupLlmMocks();
34
- mockGetModel.mockResolvedValue(createUnifiedMockModel());
36
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
35
37
  const module = await import('./executors/llm-executor.ts');
36
38
  executeLlmStep = module.executeLlmStep;
37
39
  });
@@ -64,6 +66,7 @@ describe('LLM Clarification', () => {
64
66
  ConfigLoader.clear();
65
67
  resolveAgentPathSpy.mockRestore();
66
68
  parseAgentSpy.mockRestore();
69
+ getModelSpy?.mockClear();
67
70
  resetLlmMocks();
68
71
  });
69
72