keystone-cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +43 -4
  2. package/package.json +4 -1
  3. package/src/cli.ts +1 -0
  4. package/src/commands/event.ts +9 -0
  5. package/src/commands/run.ts +17 -0
  6. package/src/db/dynamic-state-manager.ts +12 -9
  7. package/src/db/memory-db.test.ts +19 -1
  8. package/src/db/memory-db.ts +101 -22
  9. package/src/db/workflow-db.ts +181 -9
  10. package/src/expression/evaluator.ts +4 -1
  11. package/src/parser/config-schema.ts +6 -0
  12. package/src/parser/schema.ts +1 -0
  13. package/src/runner/__test__/llm-test-setup.ts +43 -11
  14. package/src/runner/durable-timers.test.ts +1 -1
  15. package/src/runner/executors/dynamic-executor.ts +125 -88
  16. package/src/runner/executors/engine-executor.ts +10 -39
  17. package/src/runner/executors/file-executor.ts +67 -0
  18. package/src/runner/executors/foreach-executor.ts +170 -17
  19. package/src/runner/executors/human-executor.ts +18 -0
  20. package/src/runner/executors/llm/stream-handler.ts +103 -0
  21. package/src/runner/executors/llm/tool-manager.ts +360 -0
  22. package/src/runner/executors/llm-executor.ts +288 -555
  23. package/src/runner/executors/memory-executor.ts +41 -34
  24. package/src/runner/executors/shell-executor.ts +96 -52
  25. package/src/runner/executors/subworkflow-executor.ts +16 -0
  26. package/src/runner/executors/types.ts +3 -1
  27. package/src/runner/executors/verification_fixes.test.ts +46 -0
  28. package/src/runner/join-scheduling.test.ts +2 -1
  29. package/src/runner/llm-adapter.integration.test.ts +10 -5
  30. package/src/runner/llm-adapter.ts +57 -18
  31. package/src/runner/llm-clarification.test.ts +4 -1
  32. package/src/runner/llm-executor.test.ts +21 -7
  33. package/src/runner/mcp-client.ts +36 -2
  34. package/src/runner/mcp-server.ts +65 -36
  35. package/src/runner/recovery-security.test.ts +5 -2
  36. package/src/runner/reflexion.test.ts +6 -3
  37. package/src/runner/services/context-builder.ts +13 -4
  38. package/src/runner/services/workflow-validator.ts +2 -1
  39. package/src/runner/standard-tools-ast.test.ts +4 -2
  40. package/src/runner/standard-tools-execution.test.ts +14 -1
  41. package/src/runner/standard-tools-integration.test.ts +6 -0
  42. package/src/runner/standard-tools.ts +13 -10
  43. package/src/runner/step-executor.ts +2 -2
  44. package/src/runner/tool-integration.test.ts +4 -1
  45. package/src/runner/workflow-runner.test.ts +23 -12
  46. package/src/runner/workflow-runner.ts +172 -79
  47. package/src/runner/workflow-state.ts +181 -111
  48. package/src/ui/dashboard.tsx +17 -3
  49. package/src/utils/config-loader.ts +4 -0
  50. package/src/utils/constants.ts +4 -0
  51. package/src/utils/context-injector.test.ts +27 -27
  52. package/src/utils/context-injector.ts +68 -26
  53. package/src/utils/process-sandbox.ts +138 -148
  54. package/src/utils/redactor.ts +39 -9
  55. package/src/utils/resource-loader.ts +24 -19
  56. package/src/utils/sandbox.ts +6 -0
  57. package/src/utils/stream-utils.ts +58 -0
@@ -43,45 +43,52 @@ export async function executeMemoryStep(
43
43
 
44
44
  const memoryDb = memoryDbFromOptions || new MemoryDb('.keystone/memory.db', dimension);
45
45
 
46
- // Helper to get embedding using AI SDK
47
- const getEmbedding = async (text: string): Promise<number[]> => {
48
- const model = await getEmbeddingModel(modelName);
49
- const result = await embed({
50
- model,
51
- value: text,
52
- abortSignal,
53
- });
54
- return result.embedding;
55
- };
46
+ try {
47
+ // Helper to get embedding using AI SDK
48
+ const getEmbedding = async (text: string): Promise<number[]> => {
49
+ const model = await getEmbeddingModel(modelName);
50
+ const result = await embed({
51
+ model,
52
+ value: text,
53
+ abortSignal,
54
+ });
55
+ return result.embedding;
56
+ };
56
57
 
57
- switch (step.op) {
58
- case 'store': {
59
- const text = ExpressionEvaluator.evaluateString(step.text || '', context);
60
- if (!text) throw new Error('Text is required for memory store operation');
58
+ switch (step.op) {
59
+ case 'store': {
60
+ const text = ExpressionEvaluator.evaluateString(step.text || '', context);
61
+ if (!text) throw new Error('Text is required for memory store operation');
61
62
 
62
- const embedding = await getEmbedding(text);
63
- const metadata = step.metadata || {};
64
- const id = await memoryDb.store(text, embedding, metadata as Record<string, unknown>);
63
+ const embedding = await getEmbedding(text);
64
+ const metadata = step.metadata || {};
65
+ const id = await memoryDb.store(text, embedding, metadata as Record<string, unknown>);
65
66
 
66
- return {
67
- output: { id, status: 'stored' },
68
- status: 'success',
69
- };
70
- }
71
- case 'search': {
72
- const query = ExpressionEvaluator.evaluateString(step.query || '', context);
73
- if (!query) throw new Error('Query is required for memory search operation');
67
+ return {
68
+ output: { id, status: 'stored' },
69
+ status: 'success',
70
+ };
71
+ }
72
+ case 'search': {
73
+ const query = ExpressionEvaluator.evaluateString(step.query || '', context);
74
+ if (!query) throw new Error('Query is required for memory search operation');
74
75
 
75
- const embedding = await getEmbedding(query);
76
- const limit = step.limit || 5;
77
- const results = await memoryDb.search(embedding, limit);
76
+ const embedding = await getEmbedding(query);
77
+ const limit = step.limit || 5;
78
+ const results = await memoryDb.search(embedding, limit);
78
79
 
79
- return {
80
- output: results,
81
- status: 'success',
82
- };
80
+ return {
81
+ output: results,
82
+ status: 'success',
83
+ };
84
+ }
85
+ default:
86
+ throw new Error(`Unknown memory operation: ${(step as any).op}`);
87
+ }
88
+ } finally {
89
+ // Only close if we created it ourselves
90
+ if (!memoryDbFromOptions) {
91
+ memoryDb.close();
83
92
  }
84
- default:
85
- throw new Error(`Unknown memory operation: ${(step as any).op}`);
86
93
  }
87
94
  }
@@ -26,6 +26,7 @@
26
26
  import type { ExpressionContext } from '../../expression/evaluator.ts';
27
27
  import { ExpressionEvaluator } from '../../expression/evaluator.ts';
28
28
  import type { ShellStep } from '../../parser/schema.ts';
29
+ import { ConfigLoader } from '../../utils/config-loader.ts';
29
30
  import { LIMITS } from '../../utils/constants.ts';
30
31
  import { filterSensitiveEnv } from '../../utils/env-filter.ts';
31
32
  import { ConsoleLogger, type Logger } from '../../utils/logger.ts';
@@ -146,6 +147,15 @@ function formatShellResult(result: ShellResult, logger: Logger): StepResult {
146
147
  */
147
148
  export function escapeShellArg(arg: unknown): string {
148
149
  const value = arg === null || arg === undefined ? '' : String(arg);
150
+
151
+ // Windows escaping (cmd.exe)
152
+ if (process.platform === 'win32') {
153
+ // Replace " with "" and wrap in double quotes
154
+ // This is the standard way to escape arguments for CRT-based programs in cmd
155
+ return `"${value.replace(/"/g, '""')}"`;
156
+ }
157
+
158
+ // POSIX escaping (sh)
149
159
  // Replace single quotes with '\'' (end quote, escaped quote, start quote)
150
160
  return `'${value.replace(/'/g, "'\\''")}'`;
151
161
  }
@@ -158,7 +168,7 @@ export interface ShellResult {
158
168
  stderrTruncated?: boolean;
159
169
  }
160
170
 
161
- const TRUNCATED_SUFFIX = '... [truncated output]';
171
+ import { TRUNCATED_SUFFIX, createOutputLimiter } from '../../utils/stream-utils.ts';
162
172
 
163
173
  async function readStreamWithLimit(
164
174
  stream: ReadableStream<Uint8Array> | null | undefined,
@@ -173,65 +183,40 @@ async function readStreamWithLimit(
173
183
  }
174
184
 
175
185
  const reader = stream.getReader();
176
- const decoder = new TextDecoder();
177
- let text = '';
178
- let bytesRead = 0;
186
+ const limiter = createOutputLimiter(maxBytes);
179
187
 
180
188
  while (true) {
181
189
  const { value, done } = await reader.read();
182
190
  if (done) break;
183
191
  if (!value) continue;
184
192
 
185
- if (bytesRead + value.byteLength > maxBytes) {
186
- const allowed = maxBytes - bytesRead;
187
- if (allowed > 0) {
188
- text += decoder.decode(value.slice(0, allowed), { stream: true });
189
- }
190
- text += decoder.decode();
193
+ limiter.append(Buffer.from(value));
194
+
195
+ if (limiter.truncated) {
191
196
  try {
192
197
  await reader.cancel();
193
198
  } catch {}
194
- return { text: `${text}${TRUNCATED_SUFFIX}`, truncated: true };
199
+ break;
195
200
  }
196
-
197
- bytesRead += value.byteLength;
198
- text += decoder.decode(value, { stream: true });
199
201
  }
200
202
 
201
- text += decoder.decode();
202
- return { text, truncated: false };
203
+ return { text: limiter.finalize(), truncated: limiter.truncated };
203
204
  }
204
205
 
205
206
  // Whitelist of allowed characters for secure shell command execution
206
- // Allows: Alphanumeric, space, and common safe punctuation (_ . / : @ , + - = ' " ! ~)
207
- // Blocks: Newlines (\n, \r), Pipes, redirects, subshells, variables ($), etc.
208
- const SAFE_SHELL_CHARS = /^[a-zA-Z0-9 _./:@,+=~'"!-]+$/;
207
+ // Allows: Alphanumeric, space, and common safe punctuation (_ . / : @ , + - =)
208
+ // Blocks: Quotes, Newlines, Pipes, redirects, subshells, variables, backslashes, etc.
209
+ const SAFE_SHELL_CHARS = /^[a-zA-Z0-9 _./:@,+=~"'-]+$/;
209
210
 
210
211
  export function detectShellInjectionRisk(rawCommand: string): boolean {
211
- // We scan the command to handle single quotes correctly.
212
- // Characters inside single quotes are considered escaped/literal and safe from shell injection.
213
- let inSingleQuote = false;
214
-
215
- for (let i = 0; i < rawCommand.length; i++) {
216
- const char = rawCommand[i];
217
-
218
- if (char === "'") {
219
- inSingleQuote = !inSingleQuote;
220
- continue;
221
- }
222
-
223
- // Outside single quotes, we enforce the strict whitelist
224
- if (!inSingleQuote) {
225
- if (!SAFE_SHELL_CHARS.test(char)) {
226
- return true;
227
- }
228
- }
229
- // Inside single quotes, everything is treated as a literal string by the shell,
230
- // so we don't need to block special characters.
231
- }
232
-
233
- // If we ended with an unclosed single quote, it's a syntax risk
234
- return inSingleQuote;
212
+ // We can safely ignore anything inside single quotes because our escape()
213
+ // function (which is the recommended way to interpolate) uses single quotes
214
+ // and correctly escapes nested single quotes as '\''.
215
+ // This regex matches '...' including correctly escaped internal single quotes.
216
+ const quotedRegex = /'([^']|'\\'')*'/g;
217
+ const stripped = rawCommand.replace(quotedRegex, "'QUOTED_STR'");
218
+
219
+ return !SAFE_SHELL_CHARS.test(stripped);
235
220
  }
236
221
 
237
222
  /**
@@ -256,13 +241,59 @@ export async function executeShell(
256
241
  if (!step.allowInsecure) {
257
242
  if (detectShellInjectionRisk(command)) {
258
243
  throw new Error(
259
- `Security Error: Command execution blocked.\nCommand: "${command.substring(0, 100)}${
244
+ `Security Error: Command execution blocked to prevent potential shell injection.\nCommand: "${command.substring(0, 100)}${
260
245
  command.length > 100 ? '...' : ''
261
- }"\nReason: Contains characters not in the strict whitelist (alphanumeric, whitespace, and _./:@,+=~-).\nThis protects against shell injection attacks.\nFix: either simplify your command or set 'allowInsecure: true' in your step definition if you trust the input.`
246
+ }"\nReason: Contains characters not in the strict whitelist (alphanumeric, whitespace, and _./:@,+=~-).\nThis protects against chaining malicious commands (e.g. '; rm -rf /'). It does NOT evaluate if the command itself is destructive.\nFix: either simplify your command or set 'allowInsecure: true' in your step definition if you trust the input.`
247
+ );
248
+ }
249
+
250
+ // Additional Check: Prevent Directory Traversal in Binary Path
251
+ // Even if it passes the whitelist, we don't want to allow 'cat ../../../etc/passwd'
252
+ // or executing '../../../../bin/malice'.
253
+ // We check for '..' characters which might indicate directory traversal.
254
+ if (command.includes('..') && (command.includes('/') || command.includes('\\'))) {
255
+ throw new Error(
256
+ `Security Error: Command blocked due to potential directory traversal ('..').\nCommand: "${command.substring(0, 100)}"\nTo allow relative paths outside the current directory, set 'allowInsecure: true'.`
262
257
  );
263
258
  }
264
259
  }
265
260
 
261
+ // Security Check: Enforce Denylist (e.g. rm, mkfs, etc.)
262
+ // We check this even if allowInsecure is true, because these are explicitly banned by policy.
263
+ const config = ConfigLoader.load();
264
+ if (config.engines?.denylist && config.engines.denylist.length > 0) {
265
+ // Robust parsing to get the command binary
266
+ // This handles:
267
+ // 1. Chained commands (e.g. "echo foo; rm -rf /")
268
+ // 2. Pre-command modifiers (e.g. "watch rm") - though difficult to do perfectly without a full shell parser,
269
+ // we can check for common dangerous patterns or just strictly check tokens.
270
+ //
271
+ // Strategy: Tokenize by shell delimiters (;, |, &, &&, ||, ``, $()) and check the first word of each segment.
272
+
273
+ // Split by command separators
274
+ const segments = command.split(/[;|&]|\$\(|\`|\r?\n/);
275
+
276
+ for (const segment of segments) {
277
+ if (!segment.trim()) continue;
278
+
279
+ // Get the first token of the segment
280
+ const tokens = segment.trim().split(/\s+/);
281
+ let bin = tokens[0];
282
+
283
+ // Handle path prefixes (e.g. /bin/rm -> rm)
284
+ if (bin.includes('/')) {
285
+ const parts = bin.split(/[/\\]/);
286
+ bin = parts[parts.length - 1];
287
+ }
288
+
289
+ if (config.engines.denylist.includes(bin)) {
290
+ throw new Error(
291
+ `Security Error: Command "${bin}" is in the denylist and cannot be executed.`
292
+ );
293
+ }
294
+ }
295
+ }
296
+
266
297
  // Evaluate environment variables
267
298
  const env: Record<string, string> = context.env ? { ...context.env } : {};
268
299
  if (step.env) {
@@ -300,10 +331,14 @@ export async function executeShell(
300
331
  let stderrTruncated = false;
301
332
  const maxOutputBytes = LIMITS.MAX_PROCESS_OUTPUT_BYTES;
302
333
 
303
- // Use 'sh -c' for everything to ensure consistent argument parsing
334
+ // Use 'sh -c' (POSIX) or 'cmd.exe /d /s /c' (Windows)
304
335
  // Security is guaranteed by the strict whitelist check above for allowInsecure: false
305
336
  // which prevents injection of metacharacters, quotes, escapes, etc.
306
- const proc = Bun.spawn(['sh', '-c', command], {
337
+ const isWindows = process.platform === 'win32';
338
+ const shellCommand = isWindows ? 'cmd.exe' : 'sh';
339
+ const shellArgs = isWindows ? ['/d', '/s', '/c'] : ['-c'];
340
+
341
+ const proc = Bun.spawn([shellCommand, ...shellArgs, command], {
307
342
  cwd: cwd || process.cwd(),
308
343
  env: mergedEnv,
309
344
  stdout: 'pipe',
@@ -323,9 +358,15 @@ export async function executeShell(
323
358
  const stdoutPromise = readStreamWithLimit(proc.stdout, maxOutputBytes);
324
359
  const stderrPromise = readStreamWithLimit(proc.stderr, maxOutputBytes);
325
360
 
326
- // Wait for exit
327
- exitCode = await proc.exited;
328
- const [stdoutResult, stderrResult] = await Promise.all([stdoutPromise, stderrPromise]);
361
+ // Wait for exit and streams simultaneously to prevent deadlocks
362
+ // (If the pipe fills up, the process blocks on write. If we await exit first, we never drain the pipe -> Deadlock)
363
+ const [exitResult, stdoutResult, stderrResult] = await Promise.all([
364
+ proc.exited,
365
+ stdoutPromise,
366
+ stderrPromise,
367
+ ]);
368
+
369
+ exitCode = exitResult;
329
370
 
330
371
  stdoutString = stdoutResult.text;
331
372
  stderrString = stderrResult.text;
@@ -406,8 +447,11 @@ export async function executeShellArgs(
406
447
  const stdoutPromise = readStreamWithLimit(proc.stdout, maxOutputBytes);
407
448
  const stderrPromise = readStreamWithLimit(proc.stderr, maxOutputBytes);
408
449
 
409
- const exitCode = await proc.exited;
410
- const [stdoutResult, stderrResult] = await Promise.all([stdoutPromise, stderrPromise]);
450
+ const [exitCode, stdoutResult, stderrResult] = await Promise.all([
451
+ proc.exited,
452
+ stdoutPromise,
453
+ stderrPromise,
454
+ ]);
411
455
 
412
456
  if (abortSignal) {
413
457
  abortSignal.removeEventListener('abort', abortHandler);
@@ -35,6 +35,8 @@ export async function executeSubWorkflow(
35
35
  parentDepth: number;
36
36
  parentOptions: any;
37
37
  abortSignal?: AbortSignal;
38
+ stepExecutionId?: string;
39
+ parentDb?: any; // WorkflowDb
38
40
  }
39
41
  ): Promise<StepResult> {
40
42
  if (options.abortSignal?.aborted) {
@@ -57,6 +59,7 @@ export async function executeSubWorkflow(
57
59
  ...options.parentOptions,
58
60
  inputs,
59
61
  dbPath: options.parentDbPath,
62
+ db: options.parentDb, // Reuse existing DB connection
60
63
  logger: options.parentLogger,
61
64
  mcpManager: options.parentMcpManager,
62
65
  workflowDir: subWorkflowDir,
@@ -64,6 +67,19 @@ export async function executeSubWorkflow(
64
67
  signal: options.abortSignal,
65
68
  });
66
69
 
70
+ // Track sub-workflow run ID in parent step metadata for rollback safety
71
+ if (options.stepExecutionId && options.parentDb) {
72
+ try {
73
+ await options.parentDb.updateStepMetadata(options.stepExecutionId, {
74
+ __subRunId: subRunner.runId,
75
+ });
76
+ } catch (error) {
77
+ options.parentLogger.warn(
78
+ `Failed to store sub-workflow run ID in metadata: ${error instanceof Error ? error.message : String(error)}`
79
+ );
80
+ }
81
+ }
82
+
67
83
  try {
68
84
  const output = await subRunner.run();
69
85
 
@@ -46,7 +46,8 @@ export interface StepExecutorOptions {
46
46
  executeWorkflowFn?: (
47
47
  step: WorkflowStep,
48
48
  context: ExpressionContext,
49
- abortSignal?: AbortSignal
49
+ abortSignal?: AbortSignal,
50
+ stepExecutionId?: string
50
51
  ) => Promise<StepResult>;
51
52
  mcpManager?: MCPManager;
52
53
  db?: WorkflowDb;
@@ -62,6 +63,7 @@ export interface StepExecutorOptions {
62
63
  debug?: boolean;
63
64
  allowInsecure?: boolean;
64
65
  emitEvent?: (event: WorkflowEvent) => void;
66
+ depth?: number;
65
67
 
66
68
  executeStep?: (step: Step, context: ExpressionContext) => Promise<StepResult>; // To avoid circular dependency
67
69
  executeLlmStep?: typeof executeLlmStep;
@@ -0,0 +1,46 @@
1
+ import { describe, expect, test } from 'bun:test';
2
+ import { validateRemoteUrl } from '../mcp-client';
3
+ import { executeShell } from './shell-executor';
4
+
5
+ describe('Verification Fixes', () => {
6
+ describe('SSRF Protection (mcp-client)', () => {
7
+ test('validateRemoteUrl should throw on 127.0.0.1', async () => {
8
+ expect(validateRemoteUrl('https://127.0.0.1')).rejects.toThrow('SSRF Protection');
9
+ });
10
+
11
+ test('validateRemoteUrl should throw on localhost', async () => {
12
+ expect(validateRemoteUrl('https://localhost')).rejects.toThrow('SSRF Protection');
13
+ });
14
+
15
+ test('validateRemoteUrl should throw on metadata IP', async () => {
16
+ expect(validateRemoteUrl('https://169.254.169.254')).rejects.toThrow('SSRF Protection');
17
+ });
18
+ });
19
+
20
+ describe('Shell Path Traversal (shell-executor)', () => {
21
+ const mockContext = { env: {}, steps: {}, inputs: {}, envOverrides: {}, secrets: {} };
22
+
23
+ test('should block command with ".." and "/" in secure mode', async () => {
24
+ const step = {
25
+ id: 'test',
26
+ type: 'shell' as const,
27
+ run: 'cat ../secret.txt',
28
+ allowInsecure: false,
29
+ };
30
+ // It should throw BEFORE spawning
31
+ // The error message I added was "Directory Traversal" or similar
32
+ // Let's check the implementation: "Command blocked due to potential directory traversal"
33
+ await expect(executeShell(step, mockContext)).rejects.toThrow('Command blocked');
34
+ });
35
+
36
+ test('should block absolute path with ".." in secure mode', async () => {
37
+ const step = {
38
+ id: 'test',
39
+ type: 'shell' as const,
40
+ run: '/bin/ls ../',
41
+ allowInsecure: false,
42
+ };
43
+ await expect(executeShell(step, mockContext)).rejects.toThrow('Command blocked');
44
+ });
45
+ });
46
+ });
@@ -123,7 +123,7 @@ describe('Join Scheduling & Resume', () => {
123
123
  });
124
124
 
125
125
  it('should resume and retry a step that previously exhausted retries', async () => {
126
- const dbPath = 'test-resume-retry.db';
126
+ const dbPath = `test-resume-retry-${Date.now()}.db`;
127
127
  if (existsSync(dbPath)) rmSync(dbPath);
128
128
 
129
129
  const counterFile = `/tmp/keystone-test-resume-${Date.now()}.txt`;
@@ -177,6 +177,7 @@ describe('Join Scheduling & Resume', () => {
177
177
  // Verify it failed twice (initial + 1 retry)
178
178
  let val = await Bun.file(counterFile).text();
179
179
  expect(val.trim()).toBe('2');
180
+ await runner1.stop();
180
181
 
181
182
  // Now resume. It should try again (Run 3) and succeed.
182
183
  const runner2 = new WorkflowRunner(workflow, {
@@ -54,10 +54,13 @@ describe('LLM Adapter (AI SDK)', () => {
54
54
  model_mappings: {},
55
55
  } as any);
56
56
 
57
- // With shared setupLlmMocks, we expect 'mock' provider
57
+ // Mock the provider to return a callable function that returns a mock model
58
+ const mockProvider = (modelId: string) => mockLanguageModel;
59
+ spyOn(DynamicProviderRegistry, 'getProvider').mockResolvedValue(() => mockProvider);
60
+
58
61
  const model = (await getModel('model-name')) as any;
59
- expect(model.modelId).toBe('mock-model');
60
- expect(model.provider).toBe('mock');
62
+ expect(model.modelId).toBe('test-model');
63
+ expect(model.provider).toBe('test-provider');
61
64
  });
62
65
 
63
66
  it('should handle auth token retrieval for standard providers', async () => {
@@ -73,11 +76,13 @@ describe('LLM Adapter (AI SDK)', () => {
73
76
  model_mappings: {},
74
77
  } as any);
75
78
 
79
+ // Mock the provider to return a callable function
80
+ const mockProvider = (modelId: string) => mockLanguageModel;
81
+ spyOn(DynamicProviderRegistry, 'getProvider').mockResolvedValue(() => mockProvider);
76
82
  spyOn(ConfigLoader, 'getSecret').mockReturnValue('fake-token');
77
83
 
78
84
  const model = (await getModel('gpt-4')) as any;
79
- // With global mock, we mostly check it didn't throw and loaded the 'mock' provider
80
- expect(model.provider).toBe('mock');
85
+ expect(model.provider).toBe('test-provider');
81
86
  expect(ConfigLoader.getSecret).toHaveBeenCalledWith('OPENAI_API_KEY');
82
87
  });
83
88
  });
@@ -55,7 +55,7 @@ class LocalEmbeddingModel {
55
55
  return Array.from(output.data) as number[];
56
56
  })
57
57
  );
58
- return { embeddings };
58
+ return { embeddings, warnings: [] };
59
59
  }
60
60
 
61
61
  /**
@@ -72,23 +72,30 @@ export type { LanguageModel, EmbeddingModel } from 'ai';
72
72
 
73
73
  const userRequire = createRequire(join(process.cwd(), 'package.json'));
74
74
 
75
- // Lazy-loaded global require to avoid blocking import time
75
+ // Lazy-loaded global require
76
76
  let globalRequire: NodeRequire | undefined;
77
- let globalRequireResolved = false;
78
-
79
- function getGlobalRequire(): NodeRequire | undefined {
80
- if (globalRequireResolved) {
77
+ let globalRequirePromise: Promise<NodeRequire | undefined> | null = null;
78
+
79
+ async function getGlobalRequire(): Promise<NodeRequire | undefined> {
80
+ if (globalRequire) return globalRequire;
81
+ if (globalRequirePromise) return globalRequirePromise;
82
+
83
+ globalRequirePromise = (async () => {
84
+ try {
85
+ const { exec } = await import('node:child_process');
86
+ const { promisify } = await import('node:util');
87
+ const execAsync = promisify(exec);
88
+
89
+ const { stdout } = await execAsync('npm root -g', { encoding: 'utf-8', timeout: 5000 });
90
+ const globalRoot = stdout.trim();
91
+ globalRequire = createRequire(join(globalRoot, 'package.json'));
92
+ } catch {
93
+ // Global npm root not found or command failed
94
+ }
81
95
  return globalRequire;
82
- }
83
- globalRequireResolved = true;
84
- try {
85
- const globalRoot = execSync('npm root -g', { encoding: 'utf-8' }).trim();
86
- globalRequire = createRequire(join(globalRoot, 'package.json'));
87
- } catch {
88
- // Global npm root not found - this is expected in some environments (e.g., containers, CI)
89
- // Global package resolution will be disabled silently
90
- }
91
- return globalRequire;
96
+ })();
97
+
98
+ return globalRequirePromise;
92
99
  }
93
100
 
94
101
  // Compatibility types for Keystone
@@ -157,14 +164,15 @@ export class DynamicProviderRegistry {
157
164
  let pkg: any;
158
165
  try {
159
166
  // Try local project first
160
- pkg = await import(config.package);
167
+ const localPath = userRequire.resolve(config.package);
168
+ pkg = await import(localPath);
161
169
  } catch {
162
170
  try {
163
171
  const pkgPath = userRequire.resolve(config.package);
164
172
  pkg = await import(pkgPath);
165
173
  } catch {
166
174
  // Try global if local resolution fails
167
- const globalReq = getGlobalRequire();
175
+ const globalReq = await getGlobalRequire();
168
176
  if (globalReq) {
169
177
  try {
170
178
  const globalPkgPath = globalReq.resolve(config.package);
@@ -218,8 +226,22 @@ export class DynamicProviderRegistry {
218
226
  return pkg.default;
219
227
  }
220
228
 
229
+ // Check for standard generic export names
230
+ if (typeof pkg.createProvider === 'function') {
231
+ DynamicProviderRegistry.loadedProviders.set(providerName, pkg.createProvider);
232
+ return pkg.createProvider;
233
+ }
234
+ if (typeof pkg.provider === 'function') {
235
+ DynamicProviderRegistry.loadedProviders.set(providerName, pkg.provider);
236
+ return pkg.provider;
237
+ }
238
+
221
239
  const firstFn = Object.values(pkg).find((v) => typeof v === 'function');
222
240
  if (firstFn) {
241
+ // Warn about loose resolution only if we really had to fall back this far
242
+ new ConsoleLogger().warn(
243
+ `[Keystone] Warning: Provider '${providerName}' resolution fell back to the first exported function found in '${config.package}'. This may be unstable.`
244
+ );
223
245
  DynamicProviderRegistry.loadedProviders.set(providerName, firstFn as any);
224
246
  return firstFn as any;
225
247
  }
@@ -298,10 +320,22 @@ async function prepareProvider(
298
320
  }
299
321
 
300
322
  export async function getModel(model: string): Promise<LanguageModel> {
323
+ const configValues = ConfigLoader.load();
324
+ if (configValues.logging?.suppress_ai_sdk_warnings) {
325
+ process.env.AI_SDK_LOG_WARNINGS = 'false';
326
+ }
327
+
301
328
  const { provider, resolvedModel } = await prepareProvider(model);
302
329
 
303
330
  // AI SDK convention: provider(modelId)
304
331
  if (typeof provider === 'function') {
332
+ // Prefer explicit .chat() or .chatModel() if available to ensure correct protocol (Chat vs Completion)
333
+ if (typeof (provider as any).chat === 'function') {
334
+ return (provider as any).chat(resolvedModel);
335
+ }
336
+ if (typeof (provider as any).chatModel === 'function') {
337
+ return (provider as any).chatModel(resolvedModel);
338
+ }
305
339
  return (provider as any)(resolvedModel);
306
340
  }
307
341
 
@@ -321,6 +355,11 @@ export async function getModel(model: string): Promise<LanguageModel> {
321
355
  }
322
356
 
323
357
  export async function getEmbeddingModel(model: string): Promise<EmbeddingModel> {
358
+ const configValues = ConfigLoader.load();
359
+ if (configValues.logging?.suppress_ai_sdk_warnings) {
360
+ process.env.AI_SDK_LOG_WARNINGS = 'false';
361
+ }
362
+
324
363
  // 1. Check for local fallback
325
364
  if (model === 'local' || model === 'keystone-local') {
326
365
  return new LocalEmbeddingModel();
@@ -14,6 +14,7 @@ import * as agentParser from '../parser/agent-parser';
14
14
  import type { Config } from '../parser/config-schema';
15
15
  import type { Agent, LlmStep, Step } from '../parser/schema';
16
16
  import { ConfigLoader } from '../utils/config-loader';
17
+ import * as llmAdapter from './llm-adapter';
17
18
  import type { LLMMessage } from './llm-adapter';
18
19
  import type { StepResult } from './step-executor';
19
20
 
@@ -28,10 +29,11 @@ let currentChatFn: (messages: any[], options?: any) => Promise<MockLLMResponse>;
28
29
  describe('LLM Clarification', () => {
29
30
  let resolveAgentPathSpy: ReturnType<typeof spyOn>;
30
31
  let parseAgentSpy: ReturnType<typeof spyOn>;
32
+ let getModelSpy: ReturnType<typeof spyOn>;
31
33
 
32
34
  beforeAll(async () => {
33
35
  setupLlmMocks();
34
- mockGetModel.mockResolvedValue(createUnifiedMockModel());
36
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
35
37
  const module = await import('./executors/llm-executor.ts');
36
38
  executeLlmStep = module.executeLlmStep;
37
39
  });
@@ -64,6 +66,7 @@ describe('LLM Clarification', () => {
64
66
  ConfigLoader.clear();
65
67
  resolveAgentPathSpy.mockRestore();
66
68
  parseAgentSpy.mockRestore();
69
+ getModelSpy?.mockClear();
67
70
  resetLlmMocks();
68
71
  });
69
72