keystone-cli 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +114 -140
  2. package/package.json +6 -3
  3. package/src/cli.ts +54 -369
  4. package/src/commands/init.ts +15 -29
  5. package/src/db/memory-db.test.ts +45 -0
  6. package/src/db/memory-db.ts +47 -21
  7. package/src/db/sqlite-setup.ts +26 -3
  8. package/src/db/workflow-db.ts +12 -5
  9. package/src/parser/config-schema.ts +11 -13
  10. package/src/parser/schema.ts +4 -2
  11. package/src/runner/__test__/llm-mock-setup.ts +173 -0
  12. package/src/runner/__test__/llm-test-setup.ts +271 -0
  13. package/src/runner/engine-executor.test.ts +25 -18
  14. package/src/runner/executors/blueprint-executor.ts +0 -1
  15. package/src/runner/executors/dynamic-executor.ts +11 -6
  16. package/src/runner/executors/engine-executor.ts +5 -1
  17. package/src/runner/executors/llm-executor.ts +502 -1033
  18. package/src/runner/executors/memory-executor.ts +35 -19
  19. package/src/runner/executors/plan-executor.ts +0 -1
  20. package/src/runner/executors/types.ts +4 -4
  21. package/src/runner/llm-adapter.integration.test.ts +151 -0
  22. package/src/runner/llm-adapter.ts +263 -1401
  23. package/src/runner/llm-clarification.test.ts +91 -106
  24. package/src/runner/llm-executor.test.ts +217 -1181
  25. package/src/runner/memoization.test.ts +0 -1
  26. package/src/runner/recovery-security.test.ts +51 -20
  27. package/src/runner/reflexion.test.ts +55 -18
  28. package/src/runner/standard-tools-integration.test.ts +137 -87
  29. package/src/runner/step-executor.test.ts +36 -80
  30. package/src/runner/step-executor.ts +0 -2
  31. package/src/runner/test-harness.ts +3 -29
  32. package/src/runner/tool-integration.test.ts +122 -73
  33. package/src/runner/workflow-runner.ts +92 -35
  34. package/src/runner/workflow-scheduler.ts +11 -1
  35. package/src/runner/workflow-summary.ts +144 -0
  36. package/src/utils/auth-manager.test.ts +10 -520
  37. package/src/utils/auth-manager.ts +3 -756
  38. package/src/utils/config-loader.ts +12 -0
  39. package/src/utils/constants.ts +0 -17
  40. package/src/utils/process-sandbox.ts +15 -3
  41. package/src/runner/llm-adapter-runtime.test.ts +0 -209
  42. package/src/runner/llm-adapter.test.ts +0 -1012
@@ -0,0 +1,271 @@
1
+ /**
2
+ * LLM Test Setup - Helper for setting up LLM mocks
3
+ *
4
+ * This file provides helpers to mock the LLM adapter using spyOn, allowing
5
+ * tests to opt-in to mocking rather than having it applied globally.
6
+ */
7
+ import { mock, spyOn } from 'bun:test';
8
+ import * as llmAdapter from '../llm-adapter';
9
+
10
+ // Create singleton mock functions that all test files share
11
+ export const mockGetModel = mock();
12
+ export const mockGetEmbeddingModel = mock();
13
+ export const mockResetProviderRegistry = mock();
14
+ export const mockDynamicProviderRegistry = { getProvider: mock() };
15
+
16
+ // Shared types for test responses
17
+ export interface MockLLMResponse {
18
+ message: {
19
+ role: string;
20
+ content?: string | null;
21
+ tool_calls?: Array<{
22
+ id: string;
23
+ type: 'function';
24
+ function: { name: string; arguments: string };
25
+ }>;
26
+ };
27
+ usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
28
+ }
29
+
30
+ // Global current chat function that tests can set
31
+ let _currentChatFn: (messages: any[], options?: any) => Promise<MockLLMResponse> = async () => ({
32
+ message: { role: 'assistant', content: 'Default mock response' },
33
+ });
34
+
35
+ export function setCurrentChatFn(fn: typeof _currentChatFn) {
36
+ _currentChatFn = fn;
37
+ }
38
+
39
+ export function getCurrentChatFn() {
40
+ return _currentChatFn;
41
+ }
42
+
43
+ /**
44
+ * Creates a unified mock model that simulates AI SDK LanguageModel behavior.
45
+ */
46
+ export function createUnifiedMockModel() {
47
+ return {
48
+ specificationVersion: 'v2',
49
+ provider: 'mock',
50
+ modelId: 'mock-model',
51
+ doGenerate: async (options: any) => {
52
+ const mapMessages = (prompt: any[]) =>
53
+ prompt.flatMap((m: any) => {
54
+ let content = m.content || '';
55
+ if (Array.isArray(m.content)) {
56
+ const toolResults = m.content.filter((p: any) => p && p.type === 'tool-result');
57
+ if (toolResults.length > 0) {
58
+ return toolResults.map((tr: any) => ({
59
+ role: 'tool',
60
+ tool_call_id: tr.toolCallId,
61
+ content: JSON.stringify(tr.result),
62
+ }));
63
+ }
64
+ const textParts = m.content
65
+ .filter((p: any) => p && p.type === 'text')
66
+ .map((p: any) => p.text)
67
+ .join('');
68
+ if (textParts) content = textParts;
69
+ }
70
+ return [
71
+ {
72
+ role: m.role,
73
+ content: typeof content === 'string' ? content : JSON.stringify(content),
74
+ },
75
+ ];
76
+ });
77
+
78
+ const messages = mapMessages(options.prompt || options.input || []);
79
+ const tools = (options.tools || options.mode?.tools)?.map((t: any) => ({
80
+ type: 'function',
81
+ function: {
82
+ name: t.name,
83
+ description: t.description,
84
+ parameters: t.parameters || t.inputSchema,
85
+ },
86
+ }));
87
+
88
+ const response = await _currentChatFn(messages, { tools });
89
+
90
+ const toolCalls = response.message.tool_calls?.map((tc: any) => ({
91
+ type: 'tool-call' as const,
92
+ toolCallId: tc.id,
93
+ toolName: tc.function.name,
94
+ args:
95
+ typeof tc.function.arguments === 'string'
96
+ ? JSON.parse(tc.function.arguments)
97
+ : tc.function.arguments,
98
+ }));
99
+
100
+ const finalToolCalls = toolCalls && toolCalls.length > 0 ? toolCalls : undefined;
101
+ const text = response.message.content || ' ';
102
+
103
+ // Internal AI SDK v6.0.3+ seems to expect 'content' on the result object
104
+ // during generateText processing, even if not in the official v2 spec.
105
+ const content: any[] = [];
106
+ if (text) {
107
+ content.push({ type: 'text', text });
108
+ }
109
+ if (finalToolCalls && finalToolCalls.length > 0) {
110
+ for (const tc of finalToolCalls) {
111
+ content.push({
112
+ type: 'tool-call',
113
+ toolCallId: tc.toolCallId,
114
+ toolName: tc.toolName,
115
+ args: tc.args,
116
+ input: JSON.stringify(tc.args), // Add required input field
117
+ });
118
+ }
119
+ }
120
+
121
+ return {
122
+ text,
123
+ content,
124
+ toolCalls: finalToolCalls,
125
+ finishReason: finalToolCalls ? 'tool-calls' : 'stop',
126
+ usage: { promptTokens: 10, completionTokens: 5 },
127
+ rawResponse: { headers: {} },
128
+ responseMessages: [
129
+ {
130
+ role: 'assistant',
131
+ content,
132
+ },
133
+ ],
134
+ } as any;
135
+ },
136
+ doStream: async (options: any) => {
137
+ const mapMessages = (prompt: any[]) =>
138
+ prompt.flatMap((m: any) => {
139
+ let content = m.content || '';
140
+ if (Array.isArray(m.content)) {
141
+ const toolResults = m.content.filter((p: any) => p && p.type === 'tool-result');
142
+ if (toolResults.length > 0) {
143
+ return toolResults.map((tr: any) => ({
144
+ role: 'tool',
145
+ tool_call_id: tr.toolCallId,
146
+ content: JSON.stringify(tr.result),
147
+ }));
148
+ }
149
+ const textParts = m.content
150
+ .filter((p: any) => p && p.type === 'text')
151
+ .map((p: any) => p.text)
152
+ .join('');
153
+ if (textParts) content = textParts;
154
+ }
155
+ return [
156
+ {
157
+ role: m.role,
158
+ content: typeof content === 'string' ? content : JSON.stringify(content),
159
+ },
160
+ ];
161
+ });
162
+
163
+ const messages = mapMessages(options.prompt || options.input || []);
164
+ const tools = (options.tools || options.mode?.tools)?.map((t: any) => ({
165
+ type: 'function',
166
+ function: {
167
+ name: t.name,
168
+ description: t.description,
169
+ parameters: t.parameters || t.inputSchema,
170
+ },
171
+ }));
172
+
173
+ const response = await _currentChatFn(messages, { tools });
174
+
175
+ const stream = new ReadableStream({
176
+ async start(controller) {
177
+ if (response.message.content) {
178
+ controller.enqueue({
179
+ type: 'text-delta',
180
+ delta: response.message.content,
181
+ text: response.message.content,
182
+ });
183
+ }
184
+
185
+ const toolCalls = response.message.tool_calls?.map((tc: any) => ({
186
+ type: 'tool-call' as const,
187
+ toolCallId: tc.id,
188
+ toolName: tc.function.name,
189
+ args:
190
+ typeof tc.function.arguments === 'string'
191
+ ? JSON.parse(tc.function.arguments)
192
+ : tc.function.arguments,
193
+ id: tc.id,
194
+ name: tc.function.name,
195
+ input:
196
+ typeof tc.function.arguments === 'string'
197
+ ? tc.function.arguments
198
+ : JSON.stringify(tc.function.arguments),
199
+ }));
200
+
201
+ if (toolCalls?.length) {
202
+ for (const tc of toolCalls) {
203
+ controller.enqueue(tc);
204
+ }
205
+ }
206
+
207
+ controller.enqueue({
208
+ type: 'finish',
209
+ finishReason: toolCalls?.length ? 'tool-calls' : 'stop',
210
+ usage: { promptTokens: 10, completionTokens: 5 },
211
+ });
212
+
213
+ controller.close();
214
+ },
215
+ });
216
+
217
+ return { stream, rawResponse: { headers: {} } };
218
+ },
219
+ doEmbed: async (options: any) => {
220
+ return {
221
+ embeddings: options.values.map(() => [0.1, 0.2, 0.3]),
222
+ usage: { tokens: 5 },
223
+ };
224
+ },
225
+ };
226
+ }
227
+
228
+ /**
229
+ * Sets up the LLM mocks by mocking the provider packages.
230
+ * This allows llm-adapter to run its real logic but return mock models.
231
+ */
232
+
233
+ import { resetProviderRegistry } from '../llm-adapter';
234
+
235
+ export function setupLlmMocks() {
236
+ resetProviderRegistry(); // Clear cache to ensure new mock is used
237
+
238
+ // Provider factory (e.g. createOpenAI) returns a Provider Instance function
239
+ const mockProviderInstance = (modelId: string) => createUnifiedMockModel();
240
+ const mockProviderFactory = (options?: any) => mockProviderInstance;
241
+
242
+ // Add properties that some providers might export (like 'openai' object)
243
+ const mockProviderModule = {
244
+ openai: mockProviderFactory,
245
+ createOpenAI: mockProviderFactory,
246
+ anthropic: mockProviderFactory,
247
+ createAnthropic: mockProviderFactory,
248
+ google: mockProviderFactory,
249
+ createGoogleGenerativeAI: mockProviderFactory,
250
+ default: mockProviderFactory,
251
+ };
252
+
253
+ // Mock the provider packages
254
+ mock.module('@ai-sdk/openai', () => mockProviderModule);
255
+ mock.module('@ai-sdk/anthropic', () => mockProviderModule);
256
+ mock.module('@ai-sdk/google', () => mockProviderModule);
257
+
258
+ _currentChatFn = async () => ({
259
+ message: { role: 'assistant', content: 'Default mock response' },
260
+ });
261
+ }
262
+
263
+ /**
264
+ * Resets all mocks to default state. Call in afterEach.
265
+ */
266
+ export function resetLlmMocks() {
267
+ resetProviderRegistry();
268
+ _currentChatFn = async () => ({
269
+ message: { role: 'assistant', content: 'Default mock response' },
270
+ });
271
+ }
@@ -73,14 +73,21 @@ describe('engine-executor', () => {
73
73
  });
74
74
 
75
75
  it('should reject if PATH is not in env for non-absolute command', async () => {
76
- const step = createStep({
77
- cwd: '/tmp',
78
- env: { HOME: '/home' },
79
- });
76
+ const originalPath = process.env.PATH;
77
+ process.env.PATH = undefined;
80
78
 
81
- await expect(
82
- executeEngineStep(step, { inputs: {}, secrets: {}, env: {}, steps: {} })
83
- ).rejects.toThrow('requires env.PATH');
79
+ try {
80
+ const step = createStep({
81
+ cwd: '/tmp',
82
+ env: { HOME: '/home' },
83
+ });
84
+
85
+ await expect(
86
+ executeEngineStep(step, { inputs: {}, secrets: {}, env: {}, steps: {} })
87
+ ).rejects.toThrow('requires env.PATH');
88
+ } finally {
89
+ process.env.PATH = originalPath;
90
+ }
84
91
  });
85
92
 
86
93
  it('should reject if command is denied', async () => {
@@ -90,7 +97,7 @@ describe('engine-executor', () => {
90
97
  denylist: ['rm', 'dd'],
91
98
  allowlist: {},
92
99
  },
93
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
100
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
94
101
 
95
102
  try {
96
103
  const step = createStep({
@@ -117,7 +124,7 @@ describe('engine-executor', () => {
117
124
  python: { command: 'python3', version: '3.11', versionArgs: [], args: [] },
118
125
  },
119
126
  },
120
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
127
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
121
128
 
122
129
  try {
123
130
  const step = createStep({
@@ -143,7 +150,7 @@ describe('engine-executor', () => {
143
150
  echo: { command: 'echo', version: '', versionArgs: [], args: [] },
144
151
  },
145
152
  },
146
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
153
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
147
154
 
148
155
  try {
149
156
  mkdirSync(tempDir, { recursive: true });
@@ -175,7 +182,7 @@ describe('engine-executor', () => {
175
182
  echo: { command: 'echo', version: '999.0.0', versionArgs: [], args: [] },
176
183
  },
177
184
  },
178
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
185
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
179
186
 
180
187
  try {
181
188
  mkdirSync(tempDir, { recursive: true });
@@ -207,7 +214,7 @@ describe('engine-executor', () => {
207
214
  echo: { command: 'echo', version: '', versionArgs: [], args: [] },
208
215
  },
209
216
  },
210
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
217
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
211
218
 
212
219
  try {
213
220
  mkdirSync(tempDir, { recursive: true });
@@ -241,7 +248,7 @@ describe('engine-executor', () => {
241
248
  sh: { command: 'sh', version: '', versionArgs: [], args: [] },
242
249
  },
243
250
  },
244
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
251
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
245
252
 
246
253
  try {
247
254
  mkdirSync(tempDir, { recursive: true });
@@ -274,7 +281,7 @@ describe('engine-executor', () => {
274
281
  sh: { command: 'sh', version: '', versionArgs: [], args: [] },
275
282
  },
276
283
  },
277
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
284
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
278
285
 
279
286
  try {
280
287
  mkdirSync(tempDir, { recursive: true });
@@ -310,7 +317,7 @@ describe('engine-executor', () => {
310
317
  echo: { command: 'echo', version: '', versionArgs: [], args: [] },
311
318
  },
312
319
  },
313
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
320
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
314
321
 
315
322
  try {
316
323
  mkdirSync(tempDir, { recursive: true });
@@ -343,7 +350,7 @@ describe('engine-executor', () => {
343
350
  echo: { command: 'echo', version: '', versionArgs: [], args: [] },
344
351
  },
345
352
  },
346
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
353
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
347
354
 
348
355
  try {
349
356
  mkdirSync(tempDir, { recursive: true });
@@ -382,7 +389,7 @@ describe('engine-executor', () => {
382
389
  echo: { command: 'echo', version: '', versionArgs: [], args: [] },
383
390
  },
384
391
  },
385
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
392
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
386
393
 
387
394
  try {
388
395
  mkdirSync(tempDir, { recursive: true });
@@ -414,7 +421,7 @@ describe('engine-executor', () => {
414
421
  echo: { command: 'echo', version: 'test', versionArgs: ['test'], args: [] },
415
422
  },
416
423
  },
417
- } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
424
+ } as unknown as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
418
425
 
419
426
  try {
420
427
  mkdirSync(tempDir, { recursive: true });
@@ -129,7 +129,6 @@ export async function executeBlueprintStep(
129
129
  mcpManager,
130
130
  workflowDir,
131
131
  abortSignal,
132
- undefined,
133
132
  emitEvent,
134
133
  workflowName ? { runId, workflow: workflowName } : undefined
135
134
  );
@@ -9,7 +9,7 @@ import { DynamicStateManager } from '../../db/dynamic-state-manager.ts';
9
9
  import type { WorkflowDb } from '../../db/workflow-db.ts';
10
10
  import type { ExpressionContext } from '../../expression/evaluator.ts';
11
11
  import { ExpressionEvaluator } from '../../expression/evaluator.ts';
12
- import type { DynamicStep, LlmStep, Step } from '../../parser/schema.ts';
12
+ import type { DynamicStep, HumanStep, LlmStep, Step } from '../../parser/schema.ts';
13
13
  import type { Logger } from '../../utils/logger.ts';
14
14
  import { topologicalSort } from '../../utils/topo-sort.ts';
15
15
  import type { WorkflowEvent } from '../events.ts';
@@ -173,7 +173,7 @@ function convertToExecutableStep(
173
173
  ...baseProps,
174
174
  type: 'file' as const,
175
175
  path: generated.path || '',
176
- op: (generated.op as any) || (generated.inputs?.op as any) || 'read',
176
+ op: generated.op || (generated.inputs?.op as 'read' | 'write' | 'append') || 'read',
177
177
  content: generated.content || (generated.inputs?.content as string),
178
178
  };
179
179
 
@@ -307,7 +307,7 @@ async function initializeState(
307
307
  startedAt: dbState.startedAt,
308
308
  completedAt: dbState.completedAt,
309
309
  error: dbState.error,
310
- replanCount: (dbState as any).replanCount || 0,
310
+ replanCount: dbState.replanCount || 0,
311
311
  };
312
312
  } else {
313
313
  dbState = await stateManager.create({ runId, stepId: step.id, workflowId: state.workflowId });
@@ -379,7 +379,6 @@ async function handlePlanningPhase(
379
379
  mcpManager,
380
380
  workflowDir,
381
381
  abortSignal,
382
- undefined,
383
382
  emitEvent,
384
383
  workflowName && runId ? { runId, workflow: workflowName } : undefined
385
384
  );
@@ -428,7 +427,13 @@ async function handleConfirmationPhase(
428
427
  const planJson = JSON.stringify(state.generatedPlan, null, 2);
429
428
  const message = `Please review and confirm the generated plan:\n\n${planJson}\n\nType 'yes' to confirm or provide a modified JSON plan:`;
430
429
 
431
- const humanStep: any = { id: `${step.id}_confirm`, type: 'human', message, inputType: 'text' };
430
+ const humanStep: HumanStep = {
431
+ id: `${step.id}_confirm`,
432
+ type: 'human',
433
+ message,
434
+ inputType: 'text',
435
+ needs: [],
436
+ };
432
437
  const confirmResult = await (options.executeHumanStep || executeHumanStep)(
433
438
  humanStep,
434
439
  context,
@@ -677,7 +682,7 @@ async function handleExecutionError(
677
682
  dbState: DynamicStepState | null,
678
683
  stateManager: DynamicStateManager | null,
679
684
  saveState: ((stepId: string, state: DynamicStepState) => Promise<void>) | undefined,
680
- error: any
685
+ error: unknown
681
686
  ): Promise<StepResult> {
682
687
  state.status = 'failed';
683
688
  state.error = error instanceof Error ? error.message : String(error);
@@ -8,6 +8,7 @@ import { ExpressionEvaluator } from '../../expression/evaluator';
8
8
  import type { EngineStep } from '../../parser/schema';
9
9
  import { ConfigLoader } from '../../utils/config-loader';
10
10
  import { LIMITS } from '../../utils/constants';
11
+ import { filterSensitiveEnv } from '../../utils/env-filter';
11
12
  import { extractJson } from '../../utils/json-parser';
12
13
  import { ConsoleLogger, type Logger } from '../../utils/logger';
13
14
  import type { StepResult } from './types.ts';
@@ -294,7 +295,10 @@ export async function executeEngineStep(
294
295
  // This means args are passed directly to the process without shell interpretation.
295
296
  // Combined with the allowlist and version check, this is secure against injection.
296
297
 
297
- const env: Record<string, string> = {};
298
+ // Inherit safe host environment variables
299
+ const hostEnv = filterSensitiveEnv(process.env);
300
+ const env: Record<string, string> = { ...hostEnv };
301
+
298
302
  for (const [key, value] of Object.entries(step.env || {})) {
299
303
  env[key] = ExpressionEvaluator.evaluateString(value, context);
300
304
  }