keystone-cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +43 -4
  2. package/package.json +4 -1
  3. package/src/cli.ts +1 -0
  4. package/src/commands/event.ts +9 -0
  5. package/src/commands/run.ts +17 -0
  6. package/src/db/dynamic-state-manager.ts +12 -9
  7. package/src/db/memory-db.test.ts +19 -1
  8. package/src/db/memory-db.ts +101 -22
  9. package/src/db/workflow-db.ts +181 -9
  10. package/src/expression/evaluator.ts +4 -1
  11. package/src/parser/config-schema.ts +6 -0
  12. package/src/parser/schema.ts +1 -0
  13. package/src/runner/__test__/llm-test-setup.ts +43 -11
  14. package/src/runner/durable-timers.test.ts +1 -1
  15. package/src/runner/executors/dynamic-executor.ts +125 -88
  16. package/src/runner/executors/engine-executor.ts +10 -39
  17. package/src/runner/executors/file-executor.ts +67 -0
  18. package/src/runner/executors/foreach-executor.ts +170 -17
  19. package/src/runner/executors/human-executor.ts +18 -0
  20. package/src/runner/executors/llm/stream-handler.ts +103 -0
  21. package/src/runner/executors/llm/tool-manager.ts +360 -0
  22. package/src/runner/executors/llm-executor.ts +288 -555
  23. package/src/runner/executors/memory-executor.ts +41 -34
  24. package/src/runner/executors/shell-executor.ts +96 -52
  25. package/src/runner/executors/subworkflow-executor.ts +16 -0
  26. package/src/runner/executors/types.ts +3 -1
  27. package/src/runner/executors/verification_fixes.test.ts +46 -0
  28. package/src/runner/join-scheduling.test.ts +2 -1
  29. package/src/runner/llm-adapter.integration.test.ts +10 -5
  30. package/src/runner/llm-adapter.ts +57 -18
  31. package/src/runner/llm-clarification.test.ts +4 -1
  32. package/src/runner/llm-executor.test.ts +21 -7
  33. package/src/runner/mcp-client.ts +36 -2
  34. package/src/runner/mcp-server.ts +65 -36
  35. package/src/runner/recovery-security.test.ts +5 -2
  36. package/src/runner/reflexion.test.ts +6 -3
  37. package/src/runner/services/context-builder.ts +13 -4
  38. package/src/runner/services/workflow-validator.ts +2 -1
  39. package/src/runner/standard-tools-ast.test.ts +4 -2
  40. package/src/runner/standard-tools-execution.test.ts +14 -1
  41. package/src/runner/standard-tools-integration.test.ts +6 -0
  42. package/src/runner/standard-tools.ts +13 -10
  43. package/src/runner/step-executor.ts +2 -2
  44. package/src/runner/tool-integration.test.ts +4 -1
  45. package/src/runner/workflow-runner.test.ts +23 -12
  46. package/src/runner/workflow-runner.ts +172 -79
  47. package/src/runner/workflow-state.ts +181 -111
  48. package/src/ui/dashboard.tsx +17 -3
  49. package/src/utils/config-loader.ts +4 -0
  50. package/src/utils/constants.ts +4 -0
  51. package/src/utils/context-injector.test.ts +27 -27
  52. package/src/utils/context-injector.ts +68 -26
  53. package/src/utils/process-sandbox.ts +138 -148
  54. package/src/utils/redactor.ts +39 -9
  55. package/src/utils/resource-loader.ts +24 -19
  56. package/src/utils/sandbox.ts +6 -0
  57. package/src/utils/stream-utils.ts +58 -0
@@ -38,6 +38,7 @@ export interface StepExecution {
38
38
  completed_at: string | null;
39
39
  retry_count: number;
40
40
  usage: string | null; // JSON
41
+ metadata: string | null; // JSON
41
42
  }
42
43
 
43
44
  export interface IdempotencyRecord {
@@ -84,6 +85,19 @@ export interface ThoughtEvent {
84
85
  created_at: string;
85
86
  }
86
87
 
88
+ export interface StepBatchUpdate {
89
+ type: 'start' | 'complete';
90
+ id: string;
91
+ data: {
92
+ status?: StepStatusType;
93
+ output?: unknown;
94
+ error?: string;
95
+ usage?: unknown;
96
+ startedAt?: string;
97
+ completedAt?: string;
98
+ };
99
+ }
100
+
87
101
  /**
88
102
  * Base error class for database operations
89
103
  */
@@ -117,10 +131,13 @@ export class WorkflowDb {
117
131
  private createStepStmt!: Statement;
118
132
  private startStepStmt!: Statement;
119
133
  private completeStepStmt!: Statement;
134
+ private updateStepMetadataStmt!: Statement;
120
135
  private incrementRetryStmt!: Statement;
121
136
  private getStepByIterationStmt!: Statement;
122
137
  private getMainStepStmt!: Statement;
123
138
  private getStepIterationsStmt!: Statement;
139
+ private getStepIterationsMetadataStmt!: Statement;
140
+ private countStepIterationsStmt!: Statement;
124
141
  private getStepsByRunStmt!: Statement;
125
142
  private getSuccessfulRunsStmt!: Statement;
126
143
  private getLastRunStmt!: Statement;
@@ -159,6 +176,7 @@ export class WorkflowDb {
159
176
  private clearTimersByRunStmt!: Statement;
160
177
  private clearAllTimersStmt!: Statement;
161
178
  private clearAllStepCacheStmt!: Statement;
179
+ private getSuspendedStepsForEventStmt!: Statement;
162
180
  private isClosed = false;
163
181
 
164
182
  constructor(public readonly dbPath = PathResolver.resolveDbPath()) {
@@ -206,8 +224,8 @@ export class WorkflowDb {
206
224
  AND status IN ('success', 'failed', 'canceled')
207
225
  `);
208
226
  this.createStepStmt = this.db.prepare(`
209
- INSERT INTO step_executions (id, run_id, step_id, iteration_index, status, retry_count)
210
- VALUES (?, ?, ?, ?, ?, ?)
227
+ INSERT INTO step_executions (id, run_id, step_id, iteration_index, status, retry_count, metadata)
228
+ VALUES (?, ?, ?, ?, ?, ?, ?)
211
229
  `);
212
230
  this.startStepStmt = this.db.prepare(`
213
231
  UPDATE step_executions
@@ -219,6 +237,11 @@ export class WorkflowDb {
219
237
  SET status = ?, output = ?, error = ?, completed_at = ?, usage = ?
220
238
  WHERE id = ?
221
239
  `);
240
+ this.updateStepMetadataStmt = this.db.prepare(`
241
+ UPDATE step_executions
242
+ SET metadata = ?
243
+ WHERE id = ?
244
+ `);
222
245
  this.incrementRetryStmt = this.db.prepare(`
223
246
  UPDATE step_executions
224
247
  SET retry_count = retry_count + 1
@@ -241,6 +264,16 @@ export class WorkflowDb {
241
264
  WHERE run_id = ? AND step_id = ? AND iteration_index IS NOT NULL
242
265
  ORDER BY iteration_index ASC
243
266
  `);
267
+ this.getStepIterationsMetadataStmt = this.db.prepare(`
268
+ SELECT id, run_id, step_id, status, error, usage, started_at, completed_at, iteration_index, CASE WHEN output IS NOT NULL THEN '{"truncated":true}' ELSE NULL END as output
269
+ FROM step_executions
270
+ WHERE run_id = ? AND step_id = ? AND iteration_index IS NOT NULL
271
+ ORDER BY iteration_index ASC
272
+ `);
273
+ this.countStepIterationsStmt = this.db.prepare(`
274
+ SELECT count(*) as count FROM step_executions
275
+ WHERE run_id = ? AND step_id = ? AND iteration_index IS NOT NULL
276
+ `);
244
277
  this.getStepsByRunStmt = this.db.prepare(`
245
278
  SELECT * FROM step_executions
246
279
  WHERE run_id = ?
@@ -405,6 +438,14 @@ export class WorkflowDb {
405
438
  this.clearTimersByRunStmt = this.db.prepare('DELETE FROM durable_timers WHERE run_id = ?');
406
439
  this.clearAllTimersStmt = this.db.prepare('DELETE FROM durable_timers');
407
440
  this.clearAllStepCacheStmt = this.db.prepare('DELETE FROM step_cache');
441
+ // PERFORMANCE NOTE: This uses a LIKE query on the 'output' column, which is not indexed for text search.
442
+ // If the number of suspended steps grows very large, this will become a performance bottleneck.
443
+ // Consider adding a dedicated 'wait_event' column if this becomes a scalability issue.
444
+ this.getSuspendedStepsForEventStmt = this.db.prepare(`
445
+ SELECT run_id FROM step_executions
446
+ WHERE status = 'suspended'
447
+ AND output LIKE ?
448
+ `);
408
449
  }
409
450
 
410
451
  /**
@@ -420,13 +461,57 @@ export class WorkflowDb {
420
461
  * Batch create multiple step executions in a single transaction.
421
462
  */
422
463
  public async batchCreateSteps(
423
- steps: Array<{ id: string; runId: string; stepId: string; iterationIndex: number | null }>
464
+ steps: Array<{
465
+ id: string;
466
+ runId: string;
467
+ stepId: string;
468
+ iterationIndex: number | null;
469
+ metadata?: Record<string, unknown>;
470
+ }>
424
471
  ): Promise<void> {
425
472
  if (steps.length === 0) return;
426
473
  await this.withRetry(() => {
427
474
  this.db.transaction(() => {
428
475
  for (const s of steps) {
429
- this.createStepStmt.run(s.id, s.runId, s.stepId, s.iterationIndex, 'pending', 0);
476
+ this.createStepStmt.run(
477
+ s.id,
478
+ s.runId,
479
+ s.stepId,
480
+ s.iterationIndex,
481
+ 'pending',
482
+ 0,
483
+ s.metadata ? JSON.stringify(s.metadata) : null
484
+ );
485
+ }
486
+ })();
487
+ });
488
+ }
489
+
490
+ /**
491
+ * Batch update step status (start or complete) in a single transaction.
492
+ * This reduces database contention for high-concurrency scenarios.
493
+ */
494
+ public async batchUpdateSteps(updates: StepBatchUpdate[]): Promise<void> {
495
+ if (updates.length === 0) return;
496
+ await this.withRetry(() => {
497
+ this.db.transaction(() => {
498
+ for (const update of updates) {
499
+ if (update.type === 'start') {
500
+ this.startStepStmt.run(
501
+ update.data.status || 'running',
502
+ update.data.startedAt || new Date().toISOString(),
503
+ update.id
504
+ );
505
+ } else if (update.type === 'complete') {
506
+ this.completeStepStmt.run(
507
+ update.data.status || 'success',
508
+ update.data.output ? JSON.stringify(update.data.output) : null,
509
+ update.data.error || null,
510
+ update.data.completedAt || new Date().toISOString(),
511
+ update.data.usage ? JSON.stringify(update.data.usage) : null,
512
+ update.id
513
+ );
514
+ }
430
515
  }
431
516
  })();
432
517
  });
@@ -620,6 +705,28 @@ export class WorkflowDb {
620
705
  PRAGMA user_version = 6;
621
706
  `);
622
707
  }
708
+
709
+ // Version 7: Add composite index for step retrieval optimization
710
+ if (version < 7) {
711
+ this.db.exec(`
712
+ CREATE INDEX IF NOT EXISTS idx_steps_run_ordered ON step_executions(run_id, started_at, iteration_index);
713
+ PRAGMA user_version = 7;
714
+ `);
715
+ }
716
+
717
+ // Version 8: Add metadata column to step_executions
718
+ if (version < 8) {
719
+ const hasMetadata = this.db
720
+ .query(
721
+ "SELECT count(*) as count FROM pragma_table_info('step_executions') WHERE name='metadata'"
722
+ )
723
+ .get() as { count: number };
724
+
725
+ if (hasMetadata.count === 0) {
726
+ this.db.exec('ALTER TABLE step_executions ADD COLUMN metadata TEXT;');
727
+ }
728
+ this.db.exec('PRAGMA user_version = 8;');
729
+ }
623
730
  }
624
731
 
625
732
  private initSchema(): void {
@@ -656,6 +763,7 @@ export class WorkflowDb {
656
763
  CREATE INDEX IF NOT EXISTS idx_steps_run ON step_executions(run_id);
657
764
  CREATE INDEX IF NOT EXISTS idx_steps_status ON step_executions(status);
658
765
  CREATE INDEX IF NOT EXISTS idx_steps_iteration ON step_executions(run_id, step_id, iteration_index);
766
+ CREATE INDEX IF NOT EXISTS idx_steps_run_ordered ON step_executions(run_id, started_at, iteration_index);
659
767
 
660
768
  CREATE TABLE IF NOT EXISTS idempotency_records (
661
769
  idempotency_key TEXT PRIMARY KEY,
@@ -791,10 +899,32 @@ export class WorkflowDb {
791
899
  const cutoffDate = new Date();
792
900
  cutoffDate.setDate(cutoffDate.getDate() - days);
793
901
  const cutoffIso = cutoffDate.toISOString();
902
+ let totalDeleted = 0;
903
+ const BATCH_SIZE = 1000;
794
904
 
795
- const result = this.pruneRunsStmt.run(cutoffIso);
905
+ // Prepare ad-hoc statement for batched deletion
906
+ // We use IN (SELECT ... LIMIT) because strict DELETE LIMIT is not standard SQL
907
+ const stmt = this.db.prepare(`
908
+ DELETE FROM workflow_runs
909
+ WHERE id IN (
910
+ SELECT id FROM workflow_runs
911
+ WHERE started_at < ?
912
+ AND status IN ('success', 'failed', 'canceled')
913
+ LIMIT ?
914
+ )
915
+ `);
796
916
 
797
- return result.changes;
917
+ try {
918
+ while (true) {
919
+ const result = stmt.run(cutoffIso, BATCH_SIZE);
920
+ if (result.changes === 0) break;
921
+ totalDeleted += result.changes;
922
+ }
923
+ } finally {
924
+ stmt.finalize();
925
+ }
926
+
927
+ return totalDeleted;
798
928
  });
799
929
  }
800
930
 
@@ -810,10 +940,19 @@ export class WorkflowDb {
810
940
  id: string,
811
941
  runId: string,
812
942
  stepId: string,
813
- iterationIndex: number | null = null
943
+ iterationIndex: number | null = null,
944
+ metadata?: Record<string, unknown>
814
945
  ): Promise<void> {
815
946
  await this.withRetry(() => {
816
- this.createStepStmt.run(id, runId, stepId, iterationIndex, 'pending', 0);
947
+ this.createStepStmt.run(
948
+ id,
949
+ runId,
950
+ stepId,
951
+ iterationIndex,
952
+ 'pending',
953
+ 0,
954
+ metadata ? JSON.stringify(metadata) : null
955
+ );
817
956
  });
818
957
  }
819
958
 
@@ -823,6 +962,12 @@ export class WorkflowDb {
823
962
  });
824
963
  }
825
964
 
965
+ async updateStepMetadata(id: string, metadata: Record<string, unknown>): Promise<void> {
966
+ await this.withRetry(() => {
967
+ this.updateStepMetadataStmt.run(JSON.stringify(metadata), id);
968
+ });
969
+ }
970
+
826
971
  async completeStep(
827
972
  id: string,
828
973
  status: StepStatus,
@@ -874,12 +1019,29 @@ export class WorkflowDb {
874
1019
  /**
875
1020
  * Get all iterations for a step
876
1021
  */
877
- public async getStepIterations(runId: string, stepId: string): Promise<StepExecution[]> {
1022
+ public async getStepIterations(
1023
+ runId: string,
1024
+ stepId: string,
1025
+ options?: { includeOutput?: boolean }
1026
+ ): Promise<StepExecution[]> {
878
1027
  return this.withRetry(() => {
1028
+ if (options?.includeOutput === false) {
1029
+ return this.getStepIterationsMetadataStmt.all(runId, stepId) as StepExecution[];
1030
+ }
879
1031
  return this.getStepIterationsStmt.all(runId, stepId) as StepExecution[];
880
1032
  });
881
1033
  }
882
1034
 
1035
+ /**
1036
+ * Count iterations for a step
1037
+ */
1038
+ public async countStepIterations(runId: string, stepId: string): Promise<number> {
1039
+ return this.withRetry(() => {
1040
+ const result = this.countStepIterationsStmt.get(runId, stepId) as { count: number };
1041
+ return result?.count || 0;
1042
+ });
1043
+ }
1044
+
883
1045
  /**
884
1046
  * Get all step executions for a workflow run
885
1047
  * @note Synchronous method - wrapped in sync retry logic
@@ -1486,4 +1648,14 @@ export class WorkflowDb {
1486
1648
  return this.listThoughtEventsStmt.all(limit) as ThoughtEvent[];
1487
1649
  });
1488
1650
  }
1651
+
1652
+ async getSuspendedStepsForEvent(eventName: string): Promise<string[]> {
1653
+ return this.withRetry(() => {
1654
+ // Look for runs suspended with { "event": eventName } in output
1655
+ // We use LIKE for a simple containment check since output is JSON
1656
+ const pattern = `%"event":"${eventName}"%`;
1657
+ const results = this.getSuspendedStepsForEventStmt.all(pattern) as { run_id: string }[];
1658
+ return results.map((r) => r.run_id);
1659
+ });
1660
+ }
1489
1661
  }
@@ -552,7 +552,10 @@ export class ExpressionEvaluator {
552
552
  case '+':
553
553
  // Support both string concatenation and numeric addition
554
554
  if (typeof left === 'string' || typeof right === 'string') {
555
- return String(left ?? '') + String(right ?? '');
555
+ // Handle null/undefined as empty strings for concatenation to match template behavior
556
+ const leftStr = left === null || left === undefined ? '' : String(left);
557
+ const rightStr = right === null || right === undefined ? '' : String(right);
558
+ return leftStr + rightStr;
556
559
  }
557
560
  return Number(left) + Number(right);
558
561
  case '-':
@@ -116,6 +116,12 @@ export const ConfigSchema = z.object({
116
116
  .optional(),
117
117
  })
118
118
  .optional(),
119
+ logging: z
120
+ .object({
121
+ suppress_security_warning: z.boolean().default(false),
122
+ suppress_ai_sdk_warnings: z.boolean().default(false),
123
+ })
124
+ .default({}),
119
125
  });
120
126
 
121
127
  export type Config = z.infer<typeof ConfigSchema>;
@@ -140,6 +140,7 @@ export const BaseStepSchema = z.object({
140
140
  outputRetries: z.number().int().min(0).optional(), // Max retries for output validation failures
141
141
  repairStrategy: z.enum(['reask', 'repair', 'hybrid']).optional(), // Strategy for output repair
142
142
  compensate: z.lazy(() => StepSchema).optional(), // Compensation step to run on rollback
143
+ failFast: z.boolean().optional(), // Stop iteration on first failure (default true)
143
144
  });
144
145
 
145
146
  // ===== Step Type Schemas =====
@@ -5,8 +5,12 @@
5
5
  * tests to opt-in to mocking rather than having it applied globally.
6
6
  */
7
7
  import { mock, spyOn } from 'bun:test';
8
+ import { ConfigLoader } from '../../utils/config-loader';
8
9
  import * as llmAdapter from '../llm-adapter';
9
10
 
11
+ // Disable AI SDK warnings for cleaner test output
12
+ (global as any).AI_SDK_LOG_WARNINGS = false;
13
+
10
14
  // Create singleton mock functions that all test files share
11
15
  export const mockGetModel = mock();
12
16
  export const mockGetEmbeddingModel = mock();
@@ -98,7 +102,7 @@ export function createUnifiedMockModel() {
98
102
  }));
99
103
 
100
104
  const finalToolCalls = toolCalls && toolCalls.length > 0 ? toolCalls : undefined;
101
- const text = response.message.content || ' ';
105
+ const text = response.message.content || '';
102
106
 
103
107
  // Internal AI SDK v6.0.3+ seems to expect 'content' on the result object
104
108
  // during generateText processing, even if not in the official v2 spec.
@@ -112,9 +116,8 @@ export function createUnifiedMockModel() {
112
116
  type: 'tool-call',
113
117
  toolCallId: tc.toolCallId,
114
118
  toolName: tc.toolName,
115
- args: tc.args,
116
- input: JSON.stringify(tc.args), // Add required input field
117
- });
119
+ input: tc.args || (tc as any).input || {},
120
+ } as any);
118
121
  }
119
122
  }
120
123
 
@@ -174,28 +177,34 @@ export function createUnifiedMockModel() {
174
177
 
175
178
  const stream = new ReadableStream({
176
179
  async start(controller) {
177
- if (response.message.content) {
180
+ if (response.message.content !== undefined && response.message.content !== null) {
178
181
  controller.enqueue({
179
182
  type: 'text-delta',
183
+ index: 0,
184
+ textDelta: response.message.content,
180
185
  delta: response.message.content,
181
- text: response.message.content,
182
- });
186
+ } as any);
183
187
  }
184
188
 
185
189
  const toolCalls = response.message.tool_calls?.map((tc: any) => ({
186
- type: 'tool-call' as const,
190
+ type: 'tool-call',
187
191
  toolCallId: tc.id,
188
192
  toolName: tc.function.name,
189
193
  args:
190
194
  typeof tc.function.arguments === 'string'
191
195
  ? JSON.parse(tc.function.arguments)
192
196
  : tc.function.arguments,
197
+ input:
198
+ typeof tc.function.arguments === 'string'
199
+ ? JSON.parse(tc.function.arguments)
200
+ : tc.function.arguments,
193
201
  id: tc.id,
194
202
  name: tc.function.name,
195
- input:
203
+ delta: JSON.stringify(
196
204
  typeof tc.function.arguments === 'string'
197
- ? tc.function.arguments
198
- : JSON.stringify(tc.function.arguments),
205
+ ? JSON.parse(tc.function.arguments)
206
+ : tc.function.arguments
207
+ ),
199
208
  }));
200
209
 
201
210
  if (toolCalls?.length) {
@@ -204,6 +213,7 @@ export function createUnifiedMockModel() {
204
213
  }
205
214
  }
206
215
 
216
+ // Finish event
207
217
  controller.enqueue({
208
218
  type: 'finish',
209
219
  finishReason: toolCalls?.length ? 'tool-calls' : 'stop',
@@ -235,6 +245,28 @@ import { resetProviderRegistry } from '../llm-adapter';
235
245
  export function setupLlmMocks() {
236
246
  resetProviderRegistry(); // Clear cache to ensure new mock is used
237
247
 
248
+ // Set a default mock configuration for tests to avoid interference from local config.yaml
249
+ ConfigLoader.setConfig({
250
+ default_provider: 'openai',
251
+ providers: {
252
+ openai: {
253
+ type: 'openai',
254
+ package: '@ai-sdk/openai',
255
+ },
256
+ anthropic: {
257
+ type: 'anthropic',
258
+ package: '@ai-sdk/anthropic',
259
+ },
260
+ },
261
+ model_mappings: {
262
+ 'claude-*': 'anthropic',
263
+ },
264
+ engines: {
265
+ allowlist: {},
266
+ denylist: [],
267
+ },
268
+ } as any);
269
+
238
270
  // Provider factory (e.g. createOpenAI) returns a Provider Instance function
239
271
  const mockProviderInstance = (modelId: string) => createUnifiedMockModel();
240
272
  const mockProviderFactory = (options?: any) => mockProviderInstance;
@@ -7,7 +7,7 @@ import { WorkflowSuspendedError, WorkflowWaitingError } from './step-executor';
7
7
  import { WorkflowRunner } from './workflow-runner';
8
8
 
9
9
  describe('Durable Timers Integration', () => {
10
- const dbPath = 'test-timers.db';
10
+ const dbPath = `test-timers-${randomUUID()}.db`;
11
11
  let db: WorkflowDb;
12
12
 
13
13
  beforeAll(() => {