keystone-cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +43 -4
  2. package/package.json +4 -1
  3. package/src/cli.ts +1 -0
  4. package/src/commands/event.ts +9 -0
  5. package/src/commands/run.ts +17 -0
  6. package/src/db/dynamic-state-manager.ts +12 -9
  7. package/src/db/memory-db.test.ts +19 -1
  8. package/src/db/memory-db.ts +101 -22
  9. package/src/db/workflow-db.ts +181 -9
  10. package/src/expression/evaluator.ts +4 -1
  11. package/src/parser/config-schema.ts +6 -0
  12. package/src/parser/schema.ts +1 -0
  13. package/src/runner/__test__/llm-test-setup.ts +43 -11
  14. package/src/runner/durable-timers.test.ts +1 -1
  15. package/src/runner/executors/dynamic-executor.ts +125 -88
  16. package/src/runner/executors/engine-executor.ts +10 -39
  17. package/src/runner/executors/file-executor.ts +67 -0
  18. package/src/runner/executors/foreach-executor.ts +170 -17
  19. package/src/runner/executors/human-executor.ts +18 -0
  20. package/src/runner/executors/llm/stream-handler.ts +103 -0
  21. package/src/runner/executors/llm/tool-manager.ts +360 -0
  22. package/src/runner/executors/llm-executor.ts +288 -555
  23. package/src/runner/executors/memory-executor.ts +41 -34
  24. package/src/runner/executors/shell-executor.ts +96 -52
  25. package/src/runner/executors/subworkflow-executor.ts +16 -0
  26. package/src/runner/executors/types.ts +3 -1
  27. package/src/runner/executors/verification_fixes.test.ts +46 -0
  28. package/src/runner/join-scheduling.test.ts +2 -1
  29. package/src/runner/llm-adapter.integration.test.ts +10 -5
  30. package/src/runner/llm-adapter.ts +57 -18
  31. package/src/runner/llm-clarification.test.ts +4 -1
  32. package/src/runner/llm-executor.test.ts +21 -7
  33. package/src/runner/mcp-client.ts +36 -2
  34. package/src/runner/mcp-server.ts +65 -36
  35. package/src/runner/recovery-security.test.ts +5 -2
  36. package/src/runner/reflexion.test.ts +6 -3
  37. package/src/runner/services/context-builder.ts +13 -4
  38. package/src/runner/services/workflow-validator.ts +2 -1
  39. package/src/runner/standard-tools-ast.test.ts +4 -2
  40. package/src/runner/standard-tools-execution.test.ts +14 -1
  41. package/src/runner/standard-tools-integration.test.ts +6 -0
  42. package/src/runner/standard-tools.ts +13 -10
  43. package/src/runner/step-executor.ts +2 -2
  44. package/src/runner/tool-integration.test.ts +4 -1
  45. package/src/runner/workflow-runner.test.ts +23 -12
  46. package/src/runner/workflow-runner.ts +172 -79
  47. package/src/runner/workflow-state.ts +181 -111
  48. package/src/ui/dashboard.tsx +17 -3
  49. package/src/utils/config-loader.ts +4 -0
  50. package/src/utils/constants.ts +4 -0
  51. package/src/utils/context-injector.test.ts +27 -27
  52. package/src/utils/context-injector.ts +68 -26
  53. package/src/utils/process-sandbox.ts +138 -148
  54. package/src/utils/redactor.ts +39 -9
  55. package/src/utils/resource-loader.ts +24 -19
  56. package/src/utils/sandbox.ts +6 -0
  57. package/src/utils/stream-utils.ts +58 -0
@@ -1,8 +1,9 @@
1
1
  import { randomUUID } from 'node:crypto';
2
- import type { WorkflowDb } from '../../db/workflow-db.ts';
2
+ import type { StepBatchUpdate, StepExecution, WorkflowDb } from '../../db/workflow-db.ts';
3
3
  import { type ExpressionContext, ExpressionEvaluator } from '../../expression/evaluator.ts';
4
4
  import type { Step } from '../../parser/schema.ts';
5
5
  import { StepStatus, type StepStatusType, WorkflowStatus } from '../../types/status.ts';
6
+ import { ConfigLoader } from '../../utils/config-loader.ts';
6
7
  import { LIMITS } from '../../utils/constants.ts';
7
8
  import type { Logger } from '../../utils/logger.ts';
8
9
  import type { ResourcePoolManager } from '../resource-pool.ts';
@@ -12,7 +13,9 @@ import { WorkflowSuspendedError } from './types.ts';
12
13
  export type ExecuteStepCallback = (
13
14
  step: Step,
14
15
  context: ExpressionContext,
15
- stepExecId: string
16
+ stepExecId: string,
17
+ idempotencyContext?: any,
18
+ options?: { skipStatusUpdates?: boolean }
16
19
  ) => Promise<StepContext>;
17
20
 
18
21
  export class ForeachExecutor {
@@ -26,6 +29,26 @@ export class ForeachExecutor {
26
29
  private resourcePool?: ResourcePoolManager
27
30
  ) {}
28
31
 
32
+ private writeQueue: StepBatchUpdate[] = [];
33
+ private flushPromise: Promise<void> = Promise.resolve();
34
+
35
+ private async flushWriteQueue() {
36
+ if (this.writeQueue.length === 0) return;
37
+ const updates = this.writeQueue.splice(0); // Take all
38
+
39
+ // Chain flush operations to ensure sequential execution
40
+ this.flushPromise = this.flushPromise.then(async () => {
41
+ try {
42
+ await this.db.batchUpdateSteps(updates);
43
+ } catch (e) {
44
+ this.logger.error(`Failed to flush batch updates for foreach executor: ${e}`);
45
+ // If critical persistence fails, we should probably stop?
46
+ // Or retry? batchUpdateSteps uses withRetry.
47
+ }
48
+ });
49
+ await this.flushPromise;
50
+ }
51
+
29
52
  /**
30
53
  * Aggregate outputs from multiple iterations of a foreach step
31
54
  */
@@ -113,7 +136,8 @@ export class ForeachExecutor {
113
136
 
114
137
  // Evaluate concurrency
115
138
  // Default to a safe limit (50) to prevent resource exhaustion/DoS, unless explicitly overridden.
116
- const DEFAULT_MAX_CONCURRENCY = 50;
139
+ const config = ConfigLoader.load();
140
+ const DEFAULT_MAX_CONCURRENCY = config.concurrency?.default ?? 50;
117
141
  let concurrencyLimit = Math.min(items.length, DEFAULT_MAX_CONCURRENCY);
118
142
 
119
143
  if (step.concurrency !== undefined) {
@@ -162,11 +186,17 @@ export class ForeachExecutor {
162
186
 
163
187
  // Optimization: Fetch all existing iterations in one query
164
188
  // This avoids N queries in the loop
165
- const existingIterations = new Map<number, any>();
189
+ const existingIterations = new Map<number, StepExecution>();
166
190
  if (shouldCheckDb) {
167
191
  try {
168
- // Use getStepIterations(runId, stepId) for optimized fetch
169
- const iterations = await this.db.getStepIterations(runId, step.id);
192
+ // Check count first to decide if we should load outputs
193
+ const count = await this.db.countStepIterations(runId, step.id);
194
+ const isLarge = count > 500; // Same threshold as LARGE_DATASET_THRESHOLD
195
+
196
+ // optimized fetch
197
+ const iterations = await this.db.getStepIterations(runId, step.id, {
198
+ includeOutput: !isLarge,
199
+ });
170
200
  for (const s of iterations) {
171
201
  if (typeof s.iteration_index === 'number') {
172
202
  existingIterations.set(s.iteration_index, s);
@@ -224,6 +254,15 @@ export class ForeachExecutor {
224
254
  } as StepContext;
225
255
  continue;
226
256
  }
257
+ if (existingExec) {
258
+ // It exists but is not successful (e.g. failed/running/pending).
259
+ // We need to register its ID so we can retry/resume it if needed.
260
+ // If the policy is to Retry, we might reuse the ID or validly continue.
261
+ // For now, let's reuse the ID ensuring iterationIds has it.
262
+ if (existingExec.id) {
263
+ iterationIds.set(i, existingExec.id);
264
+ }
265
+ }
227
266
  }
228
267
 
229
268
  // Needs execution
@@ -237,7 +276,15 @@ export class ForeachExecutor {
237
276
  await this.db.batchCreateSteps(toCreate);
238
277
  }
239
278
 
279
+ // Start the flusher loop
280
+ const flushInterval = setInterval(() => {
281
+ this.flushWriteQueue();
282
+ }, 100);
283
+
240
284
  // Worker pool implementation
285
+ const LARGE_DATASET_THRESHOLD = 500;
286
+ const isLargeDataset = items.length > LARGE_DATASET_THRESHOLD;
287
+
241
288
  let currentIndex = 0;
242
289
  let aborted = false;
243
290
  const workers = new Array(Math.min(concurrencyLimit, items.length))
@@ -292,16 +339,78 @@ export class ForeachExecutor {
292
339
  release = await this.resourcePool.acquire(poolName, { signal: this.abortSignal });
293
340
  }
294
341
 
295
- this.logger.log(` ⤷ [${i + 1}/${items.length}] Executing iteration...`);
296
- itemResults[i] = await this.executeStepFn(step, itemContext, stepExecId);
342
+ this.logger.debug(` ⤷ [${i + 1}/${items.length}] Processing iteration...`);
343
+
344
+ // Queue START event
345
+ this.writeQueue.push({
346
+ type: 'start',
347
+ id: stepExecId,
348
+ data: { status: StepStatus.RUNNING, startedAt: new Date().toISOString() },
349
+ });
350
+
351
+ // Execute step with skipStatusUpdates
352
+ const result = await this.executeStepFn(step, itemContext, stepExecId, undefined, {
353
+ skipStatusUpdates: true,
354
+ });
355
+
356
+ // Memory Optimization: If large dataset, don't store the full output in memory if possible.
357
+ if (isLargeDataset) {
358
+ // Keep a lightweight record
359
+ itemResults[i] = {
360
+ status: result.status,
361
+ output: {
362
+ _truncated: true,
363
+ _warning: 'Output dropped for memory optimization',
364
+ },
365
+ outputs: {},
366
+ error: result.error,
367
+ };
368
+ if (result.usage) itemResults[i].usage = result.usage;
369
+
370
+ // Explicitly clear the large result object to help GC
371
+ if (result.output) {
372
+ result.output = null;
373
+ }
374
+ } else {
375
+ itemResults[i] = result;
376
+ }
377
+
378
+ // Queue COMPLETE event
379
+ this.writeQueue.push({
380
+ type: 'complete',
381
+ id: stepExecId,
382
+ data: {
383
+ status: result.status,
384
+ output: result.output,
385
+ error: result.error,
386
+ usage: result.usage,
387
+ completedAt: new Date().toISOString(),
388
+ },
389
+ });
297
390
 
298
391
  // Track result size to prevent memory exhaustion
299
- if (itemResults[i]?.output !== undefined) {
392
+ if (!isLargeDataset && itemResults[i]?.output !== undefined) {
300
393
  try {
301
- estimatedResultsBytes += JSON.stringify(itemResults[i].output).length;
394
+ const output = itemResults[i].output;
395
+ // Approximate size of this item only, to avoid O(n^2) behavior
396
+ let itemSize = 0;
397
+ if (typeof output === 'string') {
398
+ itemSize = output.length;
399
+ } else if (output === null) {
400
+ itemSize = 4;
401
+ } else if (typeof output === 'object') {
402
+ // We use a simple heuristic for object size here.
403
+ // If it's already a very tight limit, we could use JSON.stringify(output).length
404
+ // but even that could be slow for many large objects.
405
+ // For now, let's use a very safe heuristic or a quick JSON.stringify.
406
+ itemSize = JSON.stringify(output).length;
407
+ } else {
408
+ itemSize = String(output).length;
409
+ }
410
+
411
+ estimatedResultsBytes += itemSize;
302
412
  } catch {
303
- // If serialization fails, estimate based on type
304
- estimatedResultsBytes += 1024;
413
+ estimatedResultsBytes += 1024; // Fallback estimate
305
414
  }
306
415
  if (estimatedResultsBytes > LIMITS.MAX_FOREACH_RESULTS_BYTES) {
307
416
  throw new Error(
@@ -315,13 +424,27 @@ export class ForeachExecutor {
315
424
  itemResults[i].status === StepStatus.FAILED ||
316
425
  itemResults[i].status === StepStatus.SUSPENDED
317
426
  ) {
318
- aborted = true;
427
+ if (step.failFast !== false) {
428
+ aborted = true;
429
+ }
319
430
  }
320
431
  } finally {
321
432
  release?.();
322
433
  }
323
434
  } catch (error) {
324
435
  if (error instanceof WorkflowSuspendedError) {
436
+ // If suspended, we need to mark the item as suspended in DB so resumption works
437
+ this.writeQueue.push({
438
+ type: 'complete',
439
+ id: stepExecId,
440
+ data: {
441
+ status: StepStatus.SUSPENDED,
442
+ error: error.message,
443
+ completedAt: new Date().toISOString(),
444
+ },
445
+ });
446
+ await this.flushWriteQueue();
447
+
325
448
  itemResults[i] = {
326
449
  status: StepStatus.SUSPENDED,
327
450
  output: null,
@@ -331,13 +454,32 @@ export class ForeachExecutor {
331
454
  aborted = true;
332
455
  return;
333
456
  }
334
- aborted = true;
457
+ // For other errors, queue failure
458
+ this.writeQueue.push({
459
+ type: 'complete',
460
+ id: stepExecId,
461
+ data: {
462
+ status: StepStatus.FAILED,
463
+ error: error instanceof Error ? error.message : String(error),
464
+ completedAt: new Date().toISOString(),
465
+ },
466
+ });
467
+
468
+ if (step.failFast !== false) {
469
+ aborted = true;
470
+ }
335
471
  throw error;
336
472
  }
337
473
  }
338
474
  });
339
475
 
340
- const workerResults = await Promise.allSettled(workers);
476
+ let workerResults: PromiseSettledResult<void>[];
477
+ try {
478
+ workerResults = await Promise.allSettled(workers);
479
+ } finally {
480
+ clearInterval(flushInterval);
481
+ await this.flushWriteQueue();
482
+ }
341
483
 
342
484
  // Check if any worker rejected (this would be due to an unexpected throw)
343
485
  const firstError = workerResults.find((r) => r.status === 'rejected') as
@@ -351,7 +493,17 @@ export class ForeachExecutor {
351
493
 
352
494
  // Aggregate results
353
495
  const outputs = itemResults.map((r) => r?.output);
354
- const allSuccess = itemResults.every((r) => r?.status === StepStatus.SUCCESS);
496
+
497
+ // If large dataset, warn that outputs are truncated in memory
498
+ if (isLargeDataset) {
499
+ this.logger.warn(
500
+ ' ⚠️ Optimized memory usage for large foreach loop. Aggregated outputs in context will be empty.'
501
+ );
502
+ }
503
+
504
+ const allSuccess = itemResults.every(
505
+ (r) => r?.status === StepStatus.SUCCESS || r?.status === StepStatus.SKIPPED
506
+ );
355
507
  const anyFailed = itemResults.some((r) => r?.status === StepStatus.FAILED);
356
508
  const anySuspended = itemResults.some((r) => r?.status === StepStatus.SUSPENDED);
357
509
 
@@ -369,7 +521,8 @@ export class ForeachExecutor {
369
521
  );
370
522
 
371
523
  // Map child properties
372
- const mappedOutputs = ForeachExecutor.aggregateOutputs(outputs);
524
+ // Optimization: Skip aggregation if large dataset to avoid OOM
525
+ const mappedOutputs = isLargeDataset ? {} : ForeachExecutor.aggregateOutputs(outputs);
373
526
 
374
527
  // Determine final status
375
528
  let finalStatus: (typeof StepStatus)[keyof typeof StepStatus] = StepStatus.FAILED;
@@ -5,6 +5,9 @@ import type { HumanStep, SleepStep } from '../../parser/schema.ts';
5
5
  import type { Logger } from '../../utils/logger.ts';
6
6
  import { type StepResult, WorkflowSuspendedError, WorkflowWaitingError } from './types.ts';
7
7
 
8
+ // Global lock to ensure only one human input prompt is active at any time
9
+ let terminalLock: Promise<void> = Promise.resolve();
10
+
8
11
  /**
9
12
  * Execute a human input step
10
13
  */
@@ -35,12 +38,25 @@ export async function executeHumanStep(
35
38
  throw new WorkflowSuspendedError(message, step.id, inputType);
36
39
  }
37
40
 
41
+ // Acquire terminal lock to prevent overlapping readline sessions
42
+ const myTurn = terminalLock.then(() => {});
43
+ terminalLock = myTurn.then(async () => {
44
+ // Settle time before starting a new prompt to clear any trailing input/echo
45
+ await new Promise((r) => setTimeout(r, 150));
46
+ });
47
+
48
+ await myTurn;
49
+
38
50
  const rl = readlinePromises.createInterface({
39
51
  input: process.stdin,
40
52
  output: process.stdout,
41
53
  });
42
54
 
43
55
  try {
56
+ // Clear visual clutter
57
+ process.stdout.write(
58
+ '\n--------------------------------------------------------------------------------\n'
59
+ );
44
60
  const prompt = inputType === 'confirm' ? `${message} [Y/n] ` : `${message} `;
45
61
  const answer = await rl.question(prompt);
46
62
 
@@ -61,6 +77,8 @@ export async function executeHumanStep(
61
77
  return { status: 'success', output: answer };
62
78
  } finally {
63
79
  rl.close();
80
+ // Wait for rl to fully release stdin
81
+ await new Promise((r) => setTimeout(r, 200));
64
82
  }
65
83
  }
66
84
 
@@ -0,0 +1,103 @@
1
+ import { LLM } from '../../../utils/constants';
2
+ import type { Logger } from '../../../utils/logger';
3
+
4
+ const { THINKING_OPEN_TAG, THINKING_CLOSE_TAG } = LLM;
5
+
6
+ export class ThoughtStreamParser {
7
+ private buffer = '';
8
+ private thoughtBuffer = '';
9
+ private inThinking = false;
10
+
11
+ process(chunk: string): { output: string; thoughts: string[] } {
12
+ this.buffer += chunk;
13
+ const thoughts: string[] = [];
14
+ let output = '';
15
+
16
+ while (this.buffer.length > 0) {
17
+ const lower = this.buffer.toLowerCase();
18
+ if (!this.inThinking) {
19
+ const openIndex = lower.indexOf(THINKING_OPEN_TAG);
20
+ if (openIndex === -1) {
21
+ const keep = Math.max(0, this.buffer.length - (THINKING_OPEN_TAG.length - 1));
22
+ output += this.buffer.slice(0, keep);
23
+ this.buffer = this.buffer.slice(keep);
24
+ break;
25
+ }
26
+ output += this.buffer.slice(0, openIndex);
27
+ this.buffer = this.buffer.slice(openIndex + THINKING_OPEN_TAG.length);
28
+ this.inThinking = true;
29
+ continue;
30
+ }
31
+
32
+ const closeIndex = lower.indexOf(THINKING_CLOSE_TAG);
33
+ if (closeIndex === -1) {
34
+ const keep = Math.max(0, this.buffer.length - (THINKING_CLOSE_TAG.length - 1));
35
+ this.thoughtBuffer += this.buffer.slice(0, keep);
36
+ this.buffer = this.buffer.slice(keep);
37
+ break;
38
+ }
39
+ this.thoughtBuffer += this.buffer.slice(0, closeIndex);
40
+ this.buffer = this.buffer.slice(closeIndex + THINKING_CLOSE_TAG.length);
41
+ this.inThinking = false;
42
+ const thought = this.thoughtBuffer.trim();
43
+ if (thought) {
44
+ thoughts.push(thought);
45
+ }
46
+ this.thoughtBuffer = '';
47
+ }
48
+
49
+ return { output, thoughts };
50
+ }
51
+
52
+ flush(): { output: string; thoughts: string[] } {
53
+ const thoughts: string[] = [];
54
+ let output = '';
55
+
56
+ if (this.inThinking) {
57
+ this.thoughtBuffer += this.buffer;
58
+ const thought = this.thoughtBuffer.trim();
59
+ if (thought) {
60
+ thoughts.push(thought);
61
+ }
62
+ } else {
63
+ output = this.buffer;
64
+ }
65
+
66
+ this.buffer = '';
67
+ this.thoughtBuffer = '';
68
+ this.inThinking = false;
69
+ return { output, thoughts };
70
+ }
71
+ }
72
+
73
+ export class StreamHandler {
74
+ private parser = new ThoughtStreamParser();
75
+
76
+ constructor(private logger: Logger) {}
77
+
78
+ processChunk(chunk: string): { text: string; thoughts: string[] } {
79
+ const { output, thoughts } = this.parser.process(chunk);
80
+
81
+ if (thoughts.length > 0) {
82
+ for (const t of thoughts) {
83
+ this.logger.info(` 💭 ${t}`);
84
+ }
85
+ }
86
+
87
+ // We might want to stream output to logger or just accumulate it
88
+ // The executor typically accumulates full text.
89
+ // For now, just return parsed parts.
90
+
91
+ return { text: output, thoughts };
92
+ }
93
+
94
+ flush(): { text: string; thoughts: string[] } {
95
+ const { output, thoughts } = this.parser.flush();
96
+ if (thoughts.length > 0) {
97
+ for (const t of thoughts) {
98
+ this.logger.info(` 💭 ${t}`);
99
+ }
100
+ }
101
+ return { text: output, thoughts };
102
+ }
103
+ }