keystone-cli 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +55 -8
  2. package/package.json +8 -17
  3. package/src/cli.ts +219 -166
  4. package/src/db/memory-db.test.ts +54 -0
  5. package/src/db/memory-db.ts +128 -0
  6. package/src/db/sqlite-setup.test.ts +47 -0
  7. package/src/db/sqlite-setup.ts +49 -0
  8. package/src/db/workflow-db.test.ts +41 -10
  9. package/src/db/workflow-db.ts +90 -28
  10. package/src/expression/evaluator.test.ts +19 -0
  11. package/src/expression/evaluator.ts +134 -39
  12. package/src/parser/schema.ts +41 -0
  13. package/src/runner/audit-verification.test.ts +23 -0
  14. package/src/runner/auto-heal.test.ts +64 -0
  15. package/src/runner/debug-repl.test.ts +308 -0
  16. package/src/runner/debug-repl.ts +225 -0
  17. package/src/runner/foreach-executor.ts +327 -0
  18. package/src/runner/llm-adapter.test.ts +37 -18
  19. package/src/runner/llm-adapter.ts +90 -112
  20. package/src/runner/llm-executor.test.ts +47 -6
  21. package/src/runner/llm-executor.ts +18 -3
  22. package/src/runner/mcp-client.audit.test.ts +69 -0
  23. package/src/runner/mcp-client.test.ts +12 -3
  24. package/src/runner/mcp-client.ts +199 -19
  25. package/src/runner/mcp-manager.ts +19 -8
  26. package/src/runner/mcp-server.test.ts +8 -5
  27. package/src/runner/mcp-server.ts +31 -17
  28. package/src/runner/optimization-runner.ts +305 -0
  29. package/src/runner/reflexion.test.ts +87 -0
  30. package/src/runner/shell-executor.test.ts +12 -0
  31. package/src/runner/shell-executor.ts +9 -6
  32. package/src/runner/step-executor.test.ts +240 -2
  33. package/src/runner/step-executor.ts +183 -68
  34. package/src/runner/stream-utils.test.ts +171 -0
  35. package/src/runner/stream-utils.ts +186 -0
  36. package/src/runner/workflow-runner.test.ts +4 -4
  37. package/src/runner/workflow-runner.ts +438 -259
  38. package/src/templates/agents/keystone-architect.md +6 -4
  39. package/src/templates/full-feature-demo.yaml +4 -4
  40. package/src/types/assets.d.ts +14 -0
  41. package/src/types/status.ts +1 -1
  42. package/src/ui/dashboard.tsx +38 -26
  43. package/src/utils/auth-manager.ts +3 -1
  44. package/src/utils/logger.test.ts +76 -0
  45. package/src/utils/logger.ts +39 -0
  46. package/src/utils/prompt.ts +75 -0
  47. package/src/utils/redactor.test.ts +86 -4
  48. package/src/utils/redactor.ts +48 -13
@@ -0,0 +1,327 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import type { WorkflowDb } from '../db/workflow-db.ts';
3
+ import { type ExpressionContext, ExpressionEvaluator } from '../expression/evaluator.ts';
4
+ import type { Step } from '../parser/schema.ts';
5
+ import { StepStatus, WorkflowStatus } from '../types/status.ts';
6
+ import type { Logger } from '../utils/logger.ts';
7
+ import { WorkflowSuspendedError } from './step-executor.ts';
8
+ import type { ForeachStepContext, StepContext } from './workflow-runner.ts';
9
+
10
+ export type ExecuteStepCallback = (
11
+ step: Step,
12
+ context: ExpressionContext,
13
+ stepExecId: string
14
+ ) => Promise<StepContext>;
15
+
16
+ export class ForeachExecutor {
17
+ private static readonly MEMORY_WARNING_THRESHOLD = 1000;
18
+ private hasWarnedMemory = false;
19
+
20
+ constructor(
21
+ private db: WorkflowDb,
22
+ private logger: Logger,
23
+ private executeStepFn: ExecuteStepCallback
24
+ ) {}
25
+
26
+ /**
27
+ * Aggregate outputs from multiple iterations of a foreach step
28
+ */
29
+ public static aggregateOutputs(outputs: unknown[]): Record<string, unknown> {
30
+ const parentOutputs: Record<string, unknown> = {};
31
+
32
+ const validOutputs = outputs.filter((o) => o !== undefined);
33
+ if (validOutputs.length === 0) return parentOutputs;
34
+
35
+ // We can only aggregate objects, and we assume all outputs have similar shape
36
+ const firstOutput = validOutputs[0];
37
+ if (typeof firstOutput !== 'object' || firstOutput === null) {
38
+ return parentOutputs;
39
+ }
40
+
41
+ // Collect all keys from all outputs
42
+ const keys = new Set<string>();
43
+ for (const output of validOutputs) {
44
+ if (typeof output === 'object' && output !== null) {
45
+ for (const key of Object.keys(output)) {
46
+ keys.add(key);
47
+ }
48
+ }
49
+ }
50
+
51
+ // For each key, create an array of values
52
+ for (const key of keys) {
53
+ parentOutputs[key] = outputs.map((output) => {
54
+ if (typeof output === 'object' && output !== null) {
55
+ return (output as Record<string, unknown>)[key];
56
+ }
57
+ return undefined;
58
+ });
59
+ }
60
+
61
+ return parentOutputs;
62
+ }
63
+
64
+ /**
65
+ * Execute a step with foreach logic
66
+ */
67
+ async execute(
68
+ step: Step,
69
+ baseContext: ExpressionContext,
70
+ runId: string,
71
+ existingContext?: ForeachStepContext
72
+ ): Promise<ForeachStepContext> {
73
+ if (!step.foreach) {
74
+ throw new Error('Step is not a foreach step');
75
+ }
76
+
77
+ const items = ExpressionEvaluator.evaluate(step.foreach, baseContext);
78
+ if (!Array.isArray(items)) {
79
+ throw new Error(`foreach expression must evaluate to an array: ${step.foreach}`);
80
+ }
81
+
82
+ this.logger.log(` ⤷ Executing step ${step.id} for ${items.length} items`);
83
+
84
+ if (items.length > ForeachExecutor.MEMORY_WARNING_THRESHOLD && !this.hasWarnedMemory) {
85
+ this.logger.warn(
86
+ ` ⚠️ Warning: Large foreach loop detected (${items.length} items). This may consume significant memory and lead to instability.`
87
+ );
88
+ this.hasWarnedMemory = true;
89
+ }
90
+
91
+ // Evaluate concurrency
92
+ let concurrencyLimit = items.length;
93
+ if (step.concurrency !== undefined) {
94
+ if (typeof step.concurrency === 'string') {
95
+ concurrencyLimit = Number(ExpressionEvaluator.evaluate(step.concurrency, baseContext));
96
+ if (!Number.isInteger(concurrencyLimit) || concurrencyLimit <= 0) {
97
+ throw new Error(
98
+ `concurrency must evaluate to a positive integer, got: ${concurrencyLimit}`
99
+ );
100
+ }
101
+ } else {
102
+ concurrencyLimit = step.concurrency;
103
+ if (!Number.isInteger(concurrencyLimit) || concurrencyLimit <= 0) {
104
+ throw new Error(`concurrency must be a positive integer, got: ${concurrencyLimit}`);
105
+ }
106
+ }
107
+ }
108
+
109
+ // Create parent step record in DB
110
+ const parentStepExecId = randomUUID();
111
+ await this.db.createStep(parentStepExecId, runId, step.id);
112
+ await this.db.startStep(parentStepExecId);
113
+
114
+ // Persist the foreach items
115
+ await this.db.completeStep(parentStepExecId, StepStatus.PENDING, { __foreachItems: items });
116
+
117
+ try {
118
+ // Initialize results array
119
+ const itemResults: StepContext[] = existingContext?.items || new Array(items.length);
120
+ const shouldCheckDb = !!existingContext;
121
+
122
+ // Ensure array is correct length
123
+ if (itemResults.length !== items.length) {
124
+ itemResults.length = items.length;
125
+ }
126
+
127
+ // Worker pool implementation
128
+ let currentIndex = 0;
129
+ let aborted = false;
130
+ const workers = new Array(Math.min(concurrencyLimit, items.length))
131
+ .fill(null)
132
+ .map(async () => {
133
+ const nextIndex = () => {
134
+ if (aborted) return null;
135
+ if (currentIndex >= items.length) return null;
136
+ const i = currentIndex;
137
+ currentIndex += 1;
138
+ return i;
139
+ };
140
+
141
+ while (true) {
142
+ const i = nextIndex();
143
+ if (i === null) break;
144
+
145
+ if (aborted) break;
146
+
147
+ const item = items[i];
148
+
149
+ // Skip if already successful or skipped
150
+ if (
151
+ itemResults[i] &&
152
+ (itemResults[i].status === StepStatus.SUCCESS ||
153
+ itemResults[i].status === StepStatus.SKIPPED)
154
+ ) {
155
+ continue;
156
+ }
157
+
158
+ // Build item-specific context
159
+ const itemContext = {
160
+ ...baseContext,
161
+ item,
162
+ index: i,
163
+ };
164
+
165
+ // Check DB again for robustness (resume flows only)
166
+ const existingExec = shouldCheckDb
167
+ ? await this.db.getStepByIteration(runId, step.id, i)
168
+ : undefined;
169
+ if (
170
+ existingExec &&
171
+ (existingExec.status === StepStatus.SUCCESS ||
172
+ existingExec.status === StepStatus.SKIPPED)
173
+ ) {
174
+ let output: unknown = null;
175
+ let itemStatus = existingExec.status as
176
+ | typeof StepStatus.SUCCESS
177
+ | typeof StepStatus.SKIPPED
178
+ | typeof StepStatus.FAILED;
179
+
180
+ try {
181
+ output = existingExec.output ? JSON.parse(existingExec.output) : null;
182
+ } catch (error) {
183
+ this.logger.warn(
184
+ `Failed to parse output for step ${step.id} iteration ${i}: ${error}`
185
+ );
186
+ output = { error: 'Failed to parse output' };
187
+ itemStatus = StepStatus.FAILED;
188
+ aborted = true; // Fail fast if we find corrupted data
189
+ try {
190
+ await this.db.completeStep(
191
+ existingExec.id,
192
+ StepStatus.FAILED,
193
+ output,
194
+ 'Failed to parse output'
195
+ );
196
+ } catch (dbError) {
197
+ this.logger.warn(
198
+ `Failed to update DB for corrupted output on step ${step.id} iteration ${i}: ${dbError}`
199
+ );
200
+ }
201
+ }
202
+ itemResults[i] = {
203
+ output,
204
+ outputs:
205
+ typeof output === 'object' && output !== null && !Array.isArray(output)
206
+ ? (output as Record<string, unknown>)
207
+ : {},
208
+ status: itemStatus,
209
+ } as StepContext;
210
+ continue;
211
+ }
212
+
213
+ if (aborted) break;
214
+
215
+ const stepExecId = randomUUID();
216
+ await this.db.createStep(stepExecId, runId, step.id, i);
217
+
218
+ // Execute and store result
219
+ try {
220
+ if (aborted) break;
221
+ this.logger.log(` ⤷ [${i + 1}/${items.length}] Executing iteration...`);
222
+ itemResults[i] = await this.executeStepFn(step, itemContext, stepExecId);
223
+ if (
224
+ itemResults[i].status === StepStatus.FAILED ||
225
+ itemResults[i].status === StepStatus.SUSPENDED
226
+ ) {
227
+ aborted = true;
228
+ }
229
+ } catch (error) {
230
+ aborted = true;
231
+ throw error;
232
+ }
233
+ }
234
+ });
235
+
236
+ const workerResults = await Promise.allSettled(workers);
237
+
238
+ // Check if any worker rejected (this would be due to an unexpected throw)
239
+ const firstError = workerResults.find((r) => r.status === 'rejected') as
240
+ | PromiseRejectedResult
241
+ | undefined;
242
+ if (firstError) {
243
+ throw firstError.reason;
244
+ }
245
+
246
+ // Aggregate results
247
+ const outputs = itemResults.map((r) => r?.output);
248
+ const allSuccess = itemResults.every((r) => r?.status === StepStatus.SUCCESS);
249
+ const anyFailed = itemResults.some((r) => r?.status === StepStatus.FAILED);
250
+ const anySuspended = itemResults.some((r) => r?.status === StepStatus.SUSPENDED);
251
+
252
+ // Aggregate usage
253
+ const aggregatedUsage = itemResults.reduce(
254
+ (acc, r) => {
255
+ if (r?.usage) {
256
+ acc.prompt_tokens += r.usage.prompt_tokens;
257
+ acc.completion_tokens += r.usage.completion_tokens;
258
+ acc.total_tokens += r.usage.total_tokens;
259
+ }
260
+ return acc;
261
+ },
262
+ { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
263
+ );
264
+
265
+ // Map child properties
266
+ const mappedOutputs = ForeachExecutor.aggregateOutputs(outputs);
267
+
268
+ // Determine final status
269
+ let finalStatus: (typeof StepStatus)[keyof typeof StepStatus] = StepStatus.FAILED;
270
+ if (allSuccess) {
271
+ finalStatus = StepStatus.SUCCESS;
272
+ } else if (anyFailed) {
273
+ finalStatus = StepStatus.FAILED;
274
+ } else if (anySuspended) {
275
+ finalStatus = StepStatus.SUSPENDED;
276
+ }
277
+
278
+ const aggregatedContext: ForeachStepContext = {
279
+ output: outputs,
280
+ outputs: mappedOutputs,
281
+ status: finalStatus,
282
+ items: itemResults,
283
+ usage: aggregatedUsage,
284
+ };
285
+
286
+ const persistedContext = {
287
+ ...aggregatedContext,
288
+ __foreachItems: items,
289
+ };
290
+
291
+ // Update parent step record
292
+ await this.db.completeStep(
293
+ parentStepExecId,
294
+ finalStatus,
295
+ persistedContext,
296
+ finalStatus === StepStatus.FAILED ? 'One or more iterations failed' : undefined
297
+ );
298
+
299
+ if (finalStatus === StepStatus.SUSPENDED) {
300
+ const suspendedItem = itemResults.find((r) => r.status === StepStatus.SUSPENDED);
301
+ throw new WorkflowSuspendedError(
302
+ suspendedItem?.error || 'Iteration suspended',
303
+ step.id,
304
+ 'text'
305
+ );
306
+ }
307
+
308
+ if (finalStatus === StepStatus.FAILED) {
309
+ throw new Error(`Step ${step.id} failed: one or more iterations failed`);
310
+ }
311
+
312
+ return aggregatedContext;
313
+ } catch (error) {
314
+ if (error instanceof WorkflowSuspendedError) {
315
+ throw error;
316
+ }
317
+ // Mark parent step as failed (if not already handled)
318
+ const errorMsg = error instanceof Error ? error.message : String(error);
319
+ try {
320
+ await this.db.completeStep(parentStepExecId, StepStatus.FAILED, null, errorMsg);
321
+ } catch (dbError) {
322
+ this.logger.error(`Failed to update DB on foreach error: ${dbError}`);
323
+ }
324
+ throw error;
325
+ }
326
+ }
327
+ }
@@ -105,7 +105,9 @@ describe('AnthropicAdapter', () => {
105
105
  // @ts-ignore
106
106
  const fetchMock = global.fetch as MockFetch;
107
107
  // @ts-ignore
108
- const [url, init] = fetchMock.mock.calls[0];
108
+ // @ts-ignore
109
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
110
+ const [url, init] = fetchMock.mock.calls[0] as [string, any];
109
111
 
110
112
  expect(url).toBe('https://api.anthropic.com/v1/messages');
111
113
  expect(init.headers['x-api-key']).toBe('fake-anthropic-key');
@@ -179,7 +181,8 @@ describe('AnthropicAdapter', () => {
179
181
  ]);
180
182
 
181
183
  // @ts-ignore
182
- const init = global.fetch.mock.calls[0][1];
184
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
185
+ const init = global.fetch.mock.calls[0][1] as any;
183
186
  const body = JSON.parse(init.body);
184
187
  expect(body.messages[0].role).toBe('assistant');
185
188
  expect(body.messages[0].content).toHaveLength(2);
@@ -208,7 +211,8 @@ describe('AnthropicAdapter', () => {
208
211
  ]);
209
212
 
210
213
  // @ts-ignore
211
- const init = global.fetch.mock.calls[0][1];
214
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
215
+ const init = global.fetch.mock.calls[0][1] as any;
212
216
  const body = JSON.parse(init.body);
213
217
  expect(body.messages[0].role).toBe('user');
214
218
  expect(body.messages[0].content[0]).toEqual({
@@ -255,7 +259,9 @@ describe('CopilotAdapter', () => {
255
259
  // @ts-ignore
256
260
  const fetchMock = global.fetch as MockFetch;
257
261
  // @ts-ignore
258
- const [url, init] = fetchMock.mock.calls[0];
262
+ // @ts-ignore
263
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
264
+ const [url, init] = fetchMock.mock.calls[0] as [string, any];
259
265
  expect(url).toBe('https://api.githubcopilot.com/chat/completions');
260
266
  expect(init.headers.Authorization).toBe('Bearer mock-token');
261
267
  spy.mockRestore();
@@ -272,33 +278,41 @@ describe('CopilotAdapter', () => {
272
278
 
273
279
  describe('getAdapter', () => {
274
280
  beforeEach(() => {
275
- spyOn(ConfigLoader, 'getProviderForModel').mockImplementation((model: string) => {
276
- if (model.startsWith('claude')) return 'anthropic';
277
- if (model.startsWith('gpt')) return 'openai';
278
- if (model.startsWith('copilot')) return 'copilot';
279
- return 'openai';
280
- });
281
- // @ts-ignore
282
- spyOn(ConfigLoader, 'load').mockReturnValue({
281
+ // Setup a clean config for each test
282
+ ConfigLoader.setConfig({
283
+ default_provider: 'openai',
283
284
  providers: {
284
285
  openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
285
286
  anthropic: { type: 'anthropic', api_key_env: 'ANTHROPIC_API_KEY' },
286
287
  copilot: { type: 'copilot' },
287
288
  },
289
+ model_mappings: {
290
+ 'claude-*': 'anthropic',
291
+ 'gpt-*': 'openai',
292
+ 'copilot:*': 'copilot',
293
+ },
294
+ storage: { retention_days: 30 },
295
+ workflows_directory: 'workflows',
296
+ mcp_servers: {},
288
297
  });
289
298
  });
290
299
 
291
300
  afterEach(() => {
292
- mock.restore();
301
+ ConfigLoader.clear();
293
302
  });
294
303
 
295
304
  it('should return OpenAIAdapter for gpt models', () => {
305
+ // ConfigLoader.getProviderForModel logic will handle this
296
306
  const { adapter, resolvedModel } = getAdapter('gpt-4');
297
307
  expect(adapter).toBeInstanceOf(OpenAIAdapter);
298
308
  expect(resolvedModel).toBe('gpt-4');
299
309
  });
300
310
 
301
311
  it('should return AnthropicAdapter for claude models', () => {
312
+ // Explicit mapping in our mock config above covers this if ConfigLoader logic works
313
+ // Or we rely on model name prefix if ConfigLoader has that default logic
314
+ // Let's ensure the mapping exists if we removed the spy
315
+ // ConfigLoader.getProviderForModel uses: explicit mapping OR default provider
302
316
  const { adapter, resolvedModel } = getAdapter('claude-3');
303
317
  expect(adapter).toBeInstanceOf(AnthropicAdapter);
304
318
  expect(resolvedModel).toBe('claude-3');
@@ -311,11 +325,16 @@ describe('getAdapter', () => {
311
325
  });
312
326
 
313
327
  it('should throw error for unknown provider', () => {
314
- // @ts-ignore
315
- ConfigLoader.getProviderForModel.mockReturnValue('unknown');
316
- // @ts-ignore
317
- ConfigLoader.load.mockReturnValue({ providers: {} });
328
+ // Set config with empty providers to force error
329
+ ConfigLoader.setConfig({
330
+ default_provider: 'unknown',
331
+ providers: {}, // No providers configured
332
+ model_mappings: {},
333
+ storage: { retention_days: 30 },
334
+ workflows_directory: 'workflows',
335
+ mcp_servers: {},
336
+ });
318
337
 
319
- expect(() => getAdapter('unknown-model')).toThrow(/Provider configuration not found/);
338
+ expect(() => getAdapter('unknown-model')).toThrow();
320
339
  });
321
340
  });