npm - keystone-cli - Versions diffs - 2.0.0 → 2.1.0 - Mend

keystone-cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/README.md +43 -4
package/package.json +4 -1
package/src/cli.ts +1 -0
package/src/commands/event.ts +9 -0
package/src/commands/run.ts +17 -0
package/src/db/dynamic-state-manager.ts +12 -9
package/src/db/memory-db.test.ts +19 -1
package/src/db/memory-db.ts +101 -22
package/src/db/workflow-db.ts +181 -9
package/src/expression/evaluator.ts +4 -1
package/src/parser/config-schema.ts +6 -0
package/src/parser/schema.ts +1 -0
package/src/runner/__test__/llm-test-setup.ts +43 -11
package/src/runner/durable-timers.test.ts +1 -1
package/src/runner/executors/dynamic-executor.ts +125 -88
package/src/runner/executors/engine-executor.ts +10 -39
package/src/runner/executors/file-executor.ts +67 -0
package/src/runner/executors/foreach-executor.ts +170 -17
package/src/runner/executors/human-executor.ts +18 -0
package/src/runner/executors/llm/stream-handler.ts +103 -0
package/src/runner/executors/llm/tool-manager.ts +360 -0
package/src/runner/executors/llm-executor.ts +288 -555
package/src/runner/executors/memory-executor.ts +41 -34
package/src/runner/executors/shell-executor.ts +96 -52
package/src/runner/executors/subworkflow-executor.ts +16 -0
package/src/runner/executors/types.ts +3 -1
package/src/runner/executors/verification_fixes.test.ts +46 -0
package/src/runner/join-scheduling.test.ts +2 -1
package/src/runner/llm-adapter.integration.test.ts +10 -5
package/src/runner/llm-adapter.ts +57 -18
package/src/runner/llm-clarification.test.ts +4 -1
package/src/runner/llm-executor.test.ts +21 -7
package/src/runner/mcp-client.ts +36 -2
package/src/runner/mcp-server.ts +65 -36
package/src/runner/recovery-security.test.ts +5 -2
package/src/runner/reflexion.test.ts +6 -3
package/src/runner/services/context-builder.ts +13 -4
package/src/runner/services/workflow-validator.ts +2 -1
package/src/runner/standard-tools-ast.test.ts +4 -2
package/src/runner/standard-tools-execution.test.ts +14 -1
package/src/runner/standard-tools-integration.test.ts +6 -0
package/src/runner/standard-tools.ts +13 -10
package/src/runner/step-executor.ts +2 -2
package/src/runner/tool-integration.test.ts +4 -1
package/src/runner/workflow-runner.test.ts +23 -12
package/src/runner/workflow-runner.ts +172 -79
package/src/runner/workflow-state.ts +181 -111
package/src/ui/dashboard.tsx +17 -3
package/src/utils/config-loader.ts +4 -0
package/src/utils/constants.ts +4 -0
package/src/utils/context-injector.test.ts +27 -27
package/src/utils/context-injector.ts +68 -26
package/src/utils/process-sandbox.ts +138 -148
package/src/utils/redactor.ts +39 -9
package/src/utils/resource-loader.ts +24 -19
package/src/utils/sandbox.ts +6 -0
package/src/utils/stream-utils.ts +58 -0

package/src/runner/executors/foreach-executor.ts CHANGED Viewed

@@ -1,8 +1,9 @@
 import { randomUUID } from 'node:crypto';
-import type { WorkflowDb } from '../../db/workflow-db.ts';
+import type { StepBatchUpdate, StepExecution, WorkflowDb } from '../../db/workflow-db.ts';
 import { type ExpressionContext, ExpressionEvaluator } from '../../expression/evaluator.ts';
 import type { Step } from '../../parser/schema.ts';
 import { StepStatus, type StepStatusType, WorkflowStatus } from '../../types/status.ts';
+import { ConfigLoader } from '../../utils/config-loader.ts';
 import { LIMITS } from '../../utils/constants.ts';
 import type { Logger } from '../../utils/logger.ts';
 import type { ResourcePoolManager } from '../resource-pool.ts';
@@ -12,7 +13,9 @@ import { WorkflowSuspendedError } from './types.ts';
 export type ExecuteStepCallback = (
   step: Step,
   context: ExpressionContext,
-  stepExecId: string
+  stepExecId: string,
+  idempotencyContext?: any,
+  options?: { skipStatusUpdates?: boolean }
 ) => Promise<StepContext>;
 export class ForeachExecutor {
@@ -26,6 +29,26 @@ export class ForeachExecutor {
     private resourcePool?: ResourcePoolManager
   ) {}
+  private writeQueue: StepBatchUpdate[] = [];
+  private flushPromise: Promise<void> = Promise.resolve();
+  private async flushWriteQueue() {
+    if (this.writeQueue.length === 0) return;
+    const updates = this.writeQueue.splice(0); // Take all
+    // Chain flush operations to ensure sequential execution
+    this.flushPromise = this.flushPromise.then(async () => {
+      try {
+        await this.db.batchUpdateSteps(updates);
+      } catch (e) {
+        this.logger.error(`Failed to flush batch updates for foreach executor: ${e}`);
+        // If critical persistence fails, we should probably stop?
+        // Or retry? batchUpdateSteps uses withRetry.
+      }
+    });
+    await this.flushPromise;
+  }
   /**
    * Aggregate outputs from multiple iterations of a foreach step
    */
@@ -113,7 +136,8 @@ export class ForeachExecutor {
     // Evaluate concurrency
     // Default to a safe limit (50) to prevent resource exhaustion/DoS, unless explicitly overridden.
-    const DEFAULT_MAX_CONCURRENCY = 50;
+    const config = ConfigLoader.load();
+    const DEFAULT_MAX_CONCURRENCY = config.concurrency?.default ?? 50;
     let concurrencyLimit = Math.min(items.length, DEFAULT_MAX_CONCURRENCY);
     if (step.concurrency !== undefined) {
@@ -162,11 +186,17 @@ export class ForeachExecutor {
       // Optimization: Fetch all existing iterations in one query
       // This avoids N queries in the loop
-      const existingIterations = new Map<number, any>();
+      const existingIterations = new Map<number, StepExecution>();
       if (shouldCheckDb) {
         try {
-          // Use getStepIterations(runId, stepId) for optimized fetch
-          const iterations = await this.db.getStepIterations(runId, step.id);
+          // Check count first to decide if we should load outputs
+          const count = await this.db.countStepIterations(runId, step.id);
+          const isLarge = count > 500; // Same threshold as LARGE_DATASET_THRESHOLD
+          // optimized fetch
+          const iterations = await this.db.getStepIterations(runId, step.id, {
+            includeOutput: !isLarge,
+          });
           for (const s of iterations) {
             if (typeof s.iteration_index === 'number') {
               existingIterations.set(s.iteration_index, s);
@@ -224,6 +254,15 @@ export class ForeachExecutor {
             } as StepContext;
             continue;
           }
+          if (existingExec) {
+            // It exists but is not successful (e.g. failed/running/pending).
+            // We need to register its ID so we can retry/resume it if needed.
+            // If the policy is to Retry, we might reuse the ID or validly continue.
+            // For now, let's reuse the ID ensuring iterationIds has it.
+            if (existingExec.id) {
+              iterationIds.set(i, existingExec.id);
+            }
+          }
         }
         // Needs execution
@@ -237,7 +276,15 @@ export class ForeachExecutor {
         await this.db.batchCreateSteps(toCreate);
       }
+      // Start the flusher loop
+      const flushInterval = setInterval(() => {
+        this.flushWriteQueue();
+      }, 100);
       // Worker pool implementation
+      const LARGE_DATASET_THRESHOLD = 500;
+      const isLargeDataset = items.length > LARGE_DATASET_THRESHOLD;
       let currentIndex = 0;
       let aborted = false;
       const workers = new Array(Math.min(concurrencyLimit, items.length))
@@ -292,16 +339,78 @@ export class ForeachExecutor {
                   release = await this.resourcePool.acquire(poolName, { signal: this.abortSignal });
                 }
-                this.logger.log(`  ⤷ [${i + 1}/${items.length}] Executing iteration...`);
-                itemResults[i] = await this.executeStepFn(step, itemContext, stepExecId);
+                this.logger.debug(`  ⤷ [${i + 1}/${items.length}] Processing iteration...`);
+                // Queue START event
+                this.writeQueue.push({
+                  type: 'start',
+                  id: stepExecId,
+                  data: { status: StepStatus.RUNNING, startedAt: new Date().toISOString() },
+                });
+                // Execute step with skipStatusUpdates
+                const result = await this.executeStepFn(step, itemContext, stepExecId, undefined, {
+                  skipStatusUpdates: true,
+                });
+                // Memory Optimization: If large dataset, don't store the full output in memory if possible.
+                if (isLargeDataset) {
+                  // Keep a lightweight record
+                  itemResults[i] = {
+                    status: result.status,
+                    output: {
+                      _truncated: true,
+                      _warning: 'Output dropped for memory optimization',
+                    },
+                    outputs: {},
+                    error: result.error,
+                  };
+                  if (result.usage) itemResults[i].usage = result.usage;
+                  // Explicitly clear the large result object to help GC
+                  if (result.output) {
+                    result.output = null;
+                  }
+                } else {
+                  itemResults[i] = result;
+                }
+                // Queue COMPLETE event
+                this.writeQueue.push({
+                  type: 'complete',
+                  id: stepExecId,
+                  data: {
+                    status: result.status,
+                    output: result.output,
+                    error: result.error,
+                    usage: result.usage,
+                    completedAt: new Date().toISOString(),
+                  },
+                });
                 // Track result size to prevent memory exhaustion
-                if (itemResults[i]?.output !== undefined) {
+                if (!isLargeDataset && itemResults[i]?.output !== undefined) {
                   try {
-                    estimatedResultsBytes += JSON.stringify(itemResults[i].output).length;
+                    const output = itemResults[i].output;
+                    // Approximate size of this item only, to avoid O(n^2) behavior
+                    let itemSize = 0;
+                    if (typeof output === 'string') {
+                      itemSize = output.length;
+                    } else if (output === null) {
+                      itemSize = 4;
+                    } else if (typeof output === 'object') {
+                      // We use a simple heuristic for object size here.
+                      // If it's already a very tight limit, we could use JSON.stringify(output).length
+                      // but even that could be slow for many large objects.
+                      // For now, let's use a very safe heuristic or a quick JSON.stringify.
+                      itemSize = JSON.stringify(output).length;
+                    } else {
+                      itemSize = String(output).length;
+                    }
+                    estimatedResultsBytes += itemSize;
                   } catch {
-                    // If serialization fails, estimate based on type
-                    estimatedResultsBytes += 1024;
+                    estimatedResultsBytes += 1024; // Fallback estimate
                   }
                   if (estimatedResultsBytes > LIMITS.MAX_FOREACH_RESULTS_BYTES) {
                     throw new Error(
@@ -315,13 +424,27 @@ export class ForeachExecutor {
                   itemResults[i].status === StepStatus.FAILED ||
                   itemResults[i].status === StepStatus.SUSPENDED
                 ) {
-                  aborted = true;
+                  if (step.failFast !== false) {
+                    aborted = true;
+                  }
                 }
               } finally {
                 release?.();
               }
             } catch (error) {
               if (error instanceof WorkflowSuspendedError) {
+                // If suspended, we need to mark the item as suspended in DB so resumption works
+                this.writeQueue.push({
+                  type: 'complete',
+                  id: stepExecId,
+                  data: {
+                    status: StepStatus.SUSPENDED,
+                    error: error.message,
+                    completedAt: new Date().toISOString(),
+                  },
+                });
+                await this.flushWriteQueue();
                 itemResults[i] = {
                   status: StepStatus.SUSPENDED,
                   output: null,
@@ -331,13 +454,32 @@ export class ForeachExecutor {
                 aborted = true;
                 return;
               }
-              aborted = true;
+              // For other errors, queue failure
+              this.writeQueue.push({
+                type: 'complete',
+                id: stepExecId,
+                data: {
+                  status: StepStatus.FAILED,
+                  error: error instanceof Error ? error.message : String(error),
+                  completedAt: new Date().toISOString(),
+                },
+              });
+              if (step.failFast !== false) {
+                aborted = true;
+              }
               throw error;
             }
           }
         });
-      const workerResults = await Promise.allSettled(workers);
+      let workerResults: PromiseSettledResult<void>[];
+      try {
+        workerResults = await Promise.allSettled(workers);
+      } finally {
+        clearInterval(flushInterval);
+        await this.flushWriteQueue();
+      }
       // Check if any worker rejected (this would be due to an unexpected throw)
       const firstError = workerResults.find((r) => r.status === 'rejected') as
@@ -351,7 +493,17 @@ export class ForeachExecutor {
       // Aggregate results
       const outputs = itemResults.map((r) => r?.output);
-      const allSuccess = itemResults.every((r) => r?.status === StepStatus.SUCCESS);
+      // If large dataset, warn that outputs are truncated in memory
+      if (isLargeDataset) {
+        this.logger.warn(
+          '  ⚠️  Optimized memory usage for large foreach loop. Aggregated outputs in context will be empty.'
+        );
+      }
+      const allSuccess = itemResults.every(
+        (r) => r?.status === StepStatus.SUCCESS || r?.status === StepStatus.SKIPPED
+      );
       const anyFailed = itemResults.some((r) => r?.status === StepStatus.FAILED);
       const anySuspended = itemResults.some((r) => r?.status === StepStatus.SUSPENDED);
@@ -369,7 +521,8 @@ export class ForeachExecutor {
       );
       // Map child properties
-      const mappedOutputs = ForeachExecutor.aggregateOutputs(outputs);
+      // Optimization: Skip aggregation if large dataset to avoid OOM
+      const mappedOutputs = isLargeDataset ? {} : ForeachExecutor.aggregateOutputs(outputs);
       // Determine final status
       let finalStatus: (typeof StepStatus)[keyof typeof StepStatus] = StepStatus.FAILED;

package/src/runner/executors/human-executor.ts CHANGED Viewed

@@ -5,6 +5,9 @@ import type { HumanStep, SleepStep } from '../../parser/schema.ts';
 import type { Logger } from '../../utils/logger.ts';
 import { type StepResult, WorkflowSuspendedError, WorkflowWaitingError } from './types.ts';
+// Global lock to ensure only one human input prompt is active at any time
+let terminalLock: Promise<void> = Promise.resolve();
 /**
  * Execute a human input step
  */
@@ -35,12 +38,25 @@ export async function executeHumanStep(
     throw new WorkflowSuspendedError(message, step.id, inputType);
   }
+  // Acquire terminal lock to prevent overlapping readline sessions
+  const myTurn = terminalLock.then(() => {});
+  terminalLock = myTurn.then(async () => {
+    // Settle time before starting a new prompt to clear any trailing input/echo
+    await new Promise((r) => setTimeout(r, 150));
+  });
+  await myTurn;
   const rl = readlinePromises.createInterface({
     input: process.stdin,
     output: process.stdout,
   });
   try {
+    // Clear visual clutter
+    process.stdout.write(
+      '\n--------------------------------------------------------------------------------\n'
+    );
     const prompt = inputType === 'confirm' ? `${message} [Y/n] ` : `${message} `;
     const answer = await rl.question(prompt);
@@ -61,6 +77,8 @@ export async function executeHumanStep(
     return { status: 'success', output: answer };
   } finally {
     rl.close();
+    // Wait for rl to fully release stdin
+    await new Promise((r) => setTimeout(r, 200));
   }
 }

package/src/runner/executors/llm/stream-handler.ts ADDED Viewed

@@ -0,0 +1,103 @@
+import { LLM } from '../../../utils/constants';
+import type { Logger } from '../../../utils/logger';
+const { THINKING_OPEN_TAG, THINKING_CLOSE_TAG } = LLM;
+export class ThoughtStreamParser {
+  private buffer = '';
+  private thoughtBuffer = '';
+  private inThinking = false;
+  process(chunk: string): { output: string; thoughts: string[] } {
+    this.buffer += chunk;
+    const thoughts: string[] = [];
+    let output = '';
+    while (this.buffer.length > 0) {
+      const lower = this.buffer.toLowerCase();
+      if (!this.inThinking) {
+        const openIndex = lower.indexOf(THINKING_OPEN_TAG);
+        if (openIndex === -1) {
+          const keep = Math.max(0, this.buffer.length - (THINKING_OPEN_TAG.length - 1));
+          output += this.buffer.slice(0, keep);
+          this.buffer = this.buffer.slice(keep);
+          break;
+        }
+        output += this.buffer.slice(0, openIndex);
+        this.buffer = this.buffer.slice(openIndex + THINKING_OPEN_TAG.length);
+        this.inThinking = true;
+        continue;
+      }
+      const closeIndex = lower.indexOf(THINKING_CLOSE_TAG);
+      if (closeIndex === -1) {
+        const keep = Math.max(0, this.buffer.length - (THINKING_CLOSE_TAG.length - 1));
+        this.thoughtBuffer += this.buffer.slice(0, keep);
+        this.buffer = this.buffer.slice(keep);
+        break;
+      }
+      this.thoughtBuffer += this.buffer.slice(0, closeIndex);
+      this.buffer = this.buffer.slice(closeIndex + THINKING_CLOSE_TAG.length);
+      this.inThinking = false;
+      const thought = this.thoughtBuffer.trim();
+      if (thought) {
+        thoughts.push(thought);
+      }
+      this.thoughtBuffer = '';
+    }
+    return { output, thoughts };
+  }
+  flush(): { output: string; thoughts: string[] } {
+    const thoughts: string[] = [];
+    let output = '';
+    if (this.inThinking) {
+      this.thoughtBuffer += this.buffer;
+      const thought = this.thoughtBuffer.trim();
+      if (thought) {
+        thoughts.push(thought);
+      }
+    } else {
+      output = this.buffer;
+    }
+    this.buffer = '';
+    this.thoughtBuffer = '';
+    this.inThinking = false;
+    return { output, thoughts };
+  }
+}
+export class StreamHandler {
+  private parser = new ThoughtStreamParser();
+  constructor(private logger: Logger) {}
+  processChunk(chunk: string): { text: string; thoughts: string[] } {
+    const { output, thoughts } = this.parser.process(chunk);
+    if (thoughts.length > 0) {
+      for (const t of thoughts) {
+        this.logger.info(`  💭 ${t}`);
+      }
+    }
+    // We might want to stream output to logger or just accumulate it
+    // The executor typically accumulates full text.
+    // For now, just return parsed parts.
+    return { text: output, thoughts };
+  }
+  flush(): { text: string; thoughts: string[] } {
+    const { output, thoughts } = this.parser.flush();
+    if (thoughts.length > 0) {
+      for (const t of thoughts) {
+        this.logger.info(`  💭 ${t}`);
+      }
+    }
+    return { text: output, thoughts };
+  }
+}