keystone-cli 0.7.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +486 -54
  2. package/package.json +8 -2
  3. package/src/__fixtures__/index.ts +100 -0
  4. package/src/cli.ts +841 -91
  5. package/src/db/memory-db.ts +35 -1
  6. package/src/db/workflow-db.test.ts +24 -0
  7. package/src/db/workflow-db.ts +484 -14
  8. package/src/expression/evaluator.ts +68 -4
  9. package/src/parser/agent-parser.ts +6 -3
  10. package/src/parser/config-schema.ts +38 -2
  11. package/src/parser/schema.ts +192 -7
  12. package/src/parser/test-schema.ts +29 -0
  13. package/src/parser/workflow-parser.test.ts +54 -0
  14. package/src/parser/workflow-parser.ts +153 -7
  15. package/src/runner/aggregate-error.test.ts +57 -0
  16. package/src/runner/aggregate-error.ts +46 -0
  17. package/src/runner/audit-verification.test.ts +2 -2
  18. package/src/runner/auto-heal.test.ts +1 -1
  19. package/src/runner/blueprint-executor.test.ts +63 -0
  20. package/src/runner/blueprint-executor.ts +157 -0
  21. package/src/runner/concurrency-limit.test.ts +82 -0
  22. package/src/runner/debug-repl.ts +18 -3
  23. package/src/runner/durable-timers.test.ts +200 -0
  24. package/src/runner/engine-executor.test.ts +464 -0
  25. package/src/runner/engine-executor.ts +491 -0
  26. package/src/runner/foreach-executor.ts +30 -12
  27. package/src/runner/llm-adapter.test.ts +282 -5
  28. package/src/runner/llm-adapter.ts +581 -8
  29. package/src/runner/llm-clarification.test.ts +79 -21
  30. package/src/runner/llm-errors.ts +83 -0
  31. package/src/runner/llm-executor.test.ts +258 -219
  32. package/src/runner/llm-executor.ts +226 -29
  33. package/src/runner/mcp-client.ts +70 -3
  34. package/src/runner/mcp-manager.test.ts +52 -52
  35. package/src/runner/mcp-manager.ts +12 -5
  36. package/src/runner/mcp-server.test.ts +117 -78
  37. package/src/runner/mcp-server.ts +13 -4
  38. package/src/runner/optimization-runner.ts +48 -31
  39. package/src/runner/reflexion.test.ts +1 -1
  40. package/src/runner/resource-pool.test.ts +113 -0
  41. package/src/runner/resource-pool.ts +164 -0
  42. package/src/runner/shell-executor.ts +130 -32
  43. package/src/runner/standard-tools-execution.test.ts +39 -0
  44. package/src/runner/standard-tools-integration.test.ts +36 -36
  45. package/src/runner/standard-tools.test.ts +18 -0
  46. package/src/runner/standard-tools.ts +174 -93
  47. package/src/runner/step-executor.test.ts +176 -16
  48. package/src/runner/step-executor.ts +534 -83
  49. package/src/runner/stream-utils.test.ts +14 -0
  50. package/src/runner/subflow-outputs.test.ts +103 -0
  51. package/src/runner/test-harness.ts +161 -0
  52. package/src/runner/tool-integration.test.ts +73 -79
  53. package/src/runner/workflow-runner.test.ts +549 -15
  54. package/src/runner/workflow-runner.ts +1448 -79
  55. package/src/runner/workflow-subflows.test.ts +255 -0
  56. package/src/templates/agents/keystone-architect.md +17 -12
  57. package/src/templates/agents/tester.md +21 -0
  58. package/src/templates/child-rollback.yaml +11 -0
  59. package/src/templates/decompose-implement.yaml +53 -0
  60. package/src/templates/decompose-problem.yaml +159 -0
  61. package/src/templates/decompose-research.yaml +52 -0
  62. package/src/templates/decompose-review.yaml +51 -0
  63. package/src/templates/dev.yaml +134 -0
  64. package/src/templates/engine-example.yaml +33 -0
  65. package/src/templates/fan-out-fan-in.yaml +61 -0
  66. package/src/templates/memory-service.yaml +1 -1
  67. package/src/templates/parent-rollback.yaml +16 -0
  68. package/src/templates/robust-automation.yaml +1 -1
  69. package/src/templates/scaffold-feature.yaml +29 -27
  70. package/src/templates/scaffold-generate.yaml +41 -0
  71. package/src/templates/scaffold-plan.yaml +53 -0
  72. package/src/types/status.ts +3 -0
  73. package/src/ui/dashboard.tsx +4 -3
  74. package/src/utils/assets.macro.ts +36 -0
  75. package/src/utils/auth-manager.ts +585 -8
  76. package/src/utils/blueprint-utils.test.ts +49 -0
  77. package/src/utils/blueprint-utils.ts +80 -0
  78. package/src/utils/circuit-breaker.test.ts +177 -0
  79. package/src/utils/circuit-breaker.ts +160 -0
  80. package/src/utils/config-loader.test.ts +100 -13
  81. package/src/utils/config-loader.ts +44 -17
  82. package/src/utils/constants.ts +62 -0
  83. package/src/utils/error-renderer.test.ts +267 -0
  84. package/src/utils/error-renderer.ts +320 -0
  85. package/src/utils/json-parser.test.ts +4 -0
  86. package/src/utils/json-parser.ts +18 -1
  87. package/src/utils/mermaid.ts +4 -0
  88. package/src/utils/paths.test.ts +46 -0
  89. package/src/utils/paths.ts +70 -0
  90. package/src/utils/process-sandbox.test.ts +128 -0
  91. package/src/utils/process-sandbox.ts +293 -0
  92. package/src/utils/rate-limiter.test.ts +143 -0
  93. package/src/utils/rate-limiter.ts +221 -0
  94. package/src/utils/redactor.test.ts +23 -15
  95. package/src/utils/redactor.ts +65 -25
  96. package/src/utils/resource-loader.test.ts +54 -0
  97. package/src/utils/resource-loader.ts +158 -0
  98. package/src/utils/sandbox.test.ts +69 -4
  99. package/src/utils/sandbox.ts +69 -6
  100. package/src/utils/schema-validator.ts +65 -0
  101. package/src/utils/workflow-registry.test.ts +57 -0
  102. package/src/utils/workflow-registry.ts +45 -25
  103. /package/src/expression/{evaluator.audit.test.ts → evaluator-audit.test.ts} +0 -0
  104. /package/src/runner/{mcp-client.audit.test.ts → mcp-client-audit.test.ts} +0 -0
@@ -3,6 +3,8 @@ import type { ExpressionContext } from '../expression/evaluator.ts';
3
3
  import { ExpressionEvaluator } from '../expression/evaluator.ts';
4
4
  // Removed synchronous file I/O imports - using Bun's async file API instead
5
5
  import type {
6
+ BlueprintStep,
7
+ EngineStep,
6
8
  FileStep,
7
9
  HumanStep,
8
10
  MemoryStep,
@@ -14,13 +16,17 @@ import type {
14
16
  WorkflowStep,
15
17
  } from '../parser/schema.ts';
16
18
  import { ConsoleLogger, type Logger } from '../utils/logger.ts';
19
+ import { executeBlueprintStep } from './blueprint-executor.ts';
20
+ import { executeEngineStep } from './engine-executor.ts';
17
21
  import { getAdapter } from './llm-adapter.ts';
18
22
  import { detectShellInjectionRisk, executeShell } from './shell-executor.ts';
19
23
 
20
24
  import * as fs from 'node:fs';
25
+ import { createRequire } from 'node:module';
21
26
  import * as os from 'node:os';
22
27
  import * as path from 'node:path';
23
28
  import * as readline from 'node:readline/promises';
29
+ import { LIMITS, TIMEOUTS } from '../utils/constants.ts';
24
30
  import { SafeSandbox } from '../utils/sandbox.ts';
25
31
  import { executeLlmStep } from './llm-executor.ts';
26
32
  import { validateRemoteUrl } from './mcp-client.ts';
@@ -37,9 +43,20 @@ export class WorkflowSuspendedError extends Error {
37
43
  }
38
44
  }
39
45
 
46
+ export class WorkflowWaitingError extends Error {
47
+ constructor(
48
+ public readonly message: string,
49
+ public readonly stepId: string,
50
+ public readonly wakeAt?: string
51
+ ) {
52
+ super(message);
53
+ this.name = 'WorkflowWaitingError';
54
+ }
55
+ }
56
+
40
57
  export interface StepResult {
41
58
  output: unknown;
42
- status: 'success' | 'failed' | 'suspended';
59
+ status: 'success' | 'failed' | 'suspended' | 'skipped' | 'waiting';
43
60
  error?: string;
44
61
  usage?: {
45
62
  prompt_tokens: number;
@@ -57,11 +74,100 @@ export interface StepExecutorOptions {
57
74
  memoryDb?: MemoryDb;
58
75
  workflowDir?: string;
59
76
  dryRun?: boolean;
77
+ abortSignal?: AbortSignal;
78
+ runId?: string;
79
+ stepExecutionId?: string;
80
+ artifactRoot?: string;
81
+ redactForStorage?: (value: unknown) => unknown;
82
+ debug?: boolean;
83
+ allowInsecure?: boolean;
60
84
  // Dependency injection for testing
61
85
  getAdapter?: typeof getAdapter;
86
+ executeStep?: typeof executeStep;
87
+ executeLlmStep?: typeof executeLlmStep;
62
88
  sandbox?: typeof SafeSandbox;
63
89
  }
64
90
 
91
+ import type { JoinStep } from '../parser/schema.ts';
92
+
93
+ /**
94
+ * Execute a join step
95
+ */
96
+ async function executeJoinStep(
97
+ step: JoinStep,
98
+ context: ExpressionContext,
99
+ _logger: Logger
100
+ ): Promise<StepResult> {
101
+ // Join step logic:
102
+ // It aggregates outputs from its 'needs'.
103
+ // Since the runner ensures dependencies are met (or processed),
104
+ // we just need to collect the results from context.steps.
105
+
106
+ const inputs: Record<string, unknown> = {};
107
+ const statusMap: Record<string, string> = {};
108
+ const realStatusMap: Record<string, 'success' | 'failed'> = {}; // Status considering allowFailure errors
109
+ const errors: string[] = [];
110
+
111
+ for (const depId of step.needs) {
112
+ const depContext = context.steps?.[depId];
113
+ if (depContext) {
114
+ inputs[depId] = depContext.output;
115
+ if (depContext.status) {
116
+ statusMap[depId] = depContext.status;
117
+ }
118
+
119
+ // Determine effective status:
120
+ // If status is success but error exists (allowFailure), treat as failed for the join condition
121
+ const isRealSuccess = depContext.status === 'success' && !depContext.error;
122
+ realStatusMap[depId] = isRealSuccess ? 'success' : 'failed';
123
+
124
+ if (depContext.error) {
125
+ errors.push(`Dependency ${depId} failed: ${depContext.error}`);
126
+ }
127
+ }
128
+ }
129
+
130
+ // Validate condition
131
+ const condition = step.condition;
132
+ const total = step.needs.length;
133
+ // Use realStatusMap to count successes/failures
134
+ const successCount = Object.values(realStatusMap).filter((s) => s === 'success').length;
135
+
136
+ // Note: We use the strict success count.
137
+ // If a step was skipped, it's neither success nor failed in this binary map?
138
+ // Skipped steps usually mean "not run".
139
+ // If we want skipped steps to count as success? Probably not.
140
+ // Let's check skipped.
141
+
142
+ let passed = false;
143
+
144
+ if (condition === 'all') {
145
+ passed = successCount === total;
146
+ } else if (condition === 'any') {
147
+ passed = successCount > 0;
148
+ } else if (typeof condition === 'number') {
149
+ passed = successCount >= condition;
150
+ }
151
+
152
+ // NOTE: True "any" or "quorum" (partial completion) requires Runner support to schedule the join
153
+ // before all dependencies are done. Currently, the runner waits for ALL dependencies.
154
+ // So this logic works for 'all' or 'any' (if others failed but allowFailure was true).
155
+ // Use allowFailure on branches to support "best effort" joins with the current runner.
156
+
157
+ if (!passed) {
158
+ return {
159
+ output: { inputs, status: statusMap },
160
+ status: 'failed',
161
+ error: `Join condition '${condition}' not met. Success: ${successCount}/${total}. Errors: ${errors.join('; ')}`,
162
+ };
163
+ }
164
+
165
+ return {
166
+ output: { inputs, status: statusMap },
167
+ status: 'success',
168
+ };
169
+ }
170
+
65
171
  /**
66
172
  * Execute a single step based on its type
67
173
  */
@@ -77,36 +183,62 @@ export async function executeStep(
77
183
  memoryDb,
78
184
  workflowDir,
79
185
  dryRun,
186
+ abortSignal,
187
+ runId,
188
+ stepExecutionId,
189
+ artifactRoot,
190
+ redactForStorage,
80
191
  getAdapter: injectedGetAdapter,
192
+ executeStep: injectedExecuteStep,
193
+ executeLlmStep: injectedExecuteLlmStep,
81
194
  sandbox: injectedSandbox,
82
195
  } = options;
83
196
 
84
197
  try {
198
+ if (abortSignal?.aborted) {
199
+ throw new Error('Step canceled');
200
+ }
201
+ if (dryRun && step.type !== 'shell') {
202
+ logger.log(`[DRY RUN] Skipping ${step.type} step: ${step.id}`);
203
+ return {
204
+ output: null,
205
+ status: 'skipped',
206
+ };
207
+ }
208
+
85
209
  let result: StepResult;
86
210
  switch (step.type) {
87
211
  case 'shell':
88
- result = await executeShellStep(step, context, logger, dryRun);
212
+ result = await executeShellStep(step, context, logger, dryRun, abortSignal);
89
213
  break;
90
214
  case 'file':
91
215
  result = await executeFileStep(step, context, logger, dryRun);
92
216
  break;
93
217
  case 'request':
94
- result = await executeRequestStep(step, context, logger);
218
+ result = await executeRequestStep(step, context, logger, abortSignal);
95
219
  break;
96
220
  case 'human':
97
- result = await executeHumanStep(step, context, logger);
221
+ result = await executeHumanStep(step, context, logger, abortSignal);
98
222
  break;
99
223
  case 'sleep':
100
- result = await executeSleepStep(step, context, logger);
224
+ result = await executeSleepStep(step, context, logger, abortSignal);
101
225
  break;
102
226
  case 'llm':
103
- result = await executeLlmStep(
227
+ result = await (injectedExecuteLlmStep || executeLlmStep)(
104
228
  step,
105
229
  context,
106
- (s, c) => executeStep(s, c, logger, options),
230
+ (s, c) => {
231
+ const exec = injectedExecuteStep || executeStep;
232
+ return exec(s, c, logger, {
233
+ ...options,
234
+ stepExecutionId: undefined,
235
+ });
236
+ },
107
237
  logger,
108
238
  mcpManager,
109
- workflowDir
239
+ workflowDir,
240
+ abortSignal,
241
+ injectedGetAdapter
110
242
  );
111
243
  break;
112
244
  case 'memory':
@@ -119,7 +251,41 @@ export async function executeStep(
119
251
  result = await executeWorkflowFn(step, context);
120
252
  break;
121
253
  case 'script':
122
- result = await executeScriptStep(step, context, logger, injectedSandbox);
254
+ result = await executeScriptStep(step, context, logger, injectedSandbox, abortSignal);
255
+ break;
256
+ case 'engine':
257
+ result = await executeEngineStepWrapper(step, context, logger, {
258
+ abortSignal,
259
+ runId,
260
+ stepExecutionId,
261
+ artifactRoot,
262
+ redactForStorage,
263
+ });
264
+ break;
265
+ case 'blueprint':
266
+ result = await executeBlueprintStep(
267
+ step,
268
+ context,
269
+ (s, c) => executeStep(s, c, logger, options),
270
+ logger,
271
+ {
272
+ mcpManager,
273
+ workflowDir,
274
+ abortSignal,
275
+ runId,
276
+ artifactRoot,
277
+ }
278
+ );
279
+ break;
280
+ case 'join':
281
+ // Join is handled by the runner logic for aggregation, but we need a placeholder here
282
+ // or logic to aggregate results from dependencies.
283
+ // Actually, for 'all', 'any', 'quorum', the step *itself* should process the inputs.
284
+ // By the time executeStep is called, dependencies are met (for 'all').
285
+ // But for 'any', the runner must schedule it early.
286
+ // Assuming the runner handles scheduling, here we just return the aggregated output.
287
+ // We will assume 'context.steps' contains the dependency outputs.
288
+ result = await executeJoinStep(step, context, logger);
123
289
  break;
124
290
  default:
125
291
  throw new Error(`Unknown step type: ${(step as Step).type}`);
@@ -167,8 +333,12 @@ async function executeShellStep(
167
333
  step: ShellStep,
168
334
  context: ExpressionContext,
169
335
  logger: Logger,
170
- dryRun?: boolean
336
+ dryRun?: boolean,
337
+ abortSignal?: AbortSignal
171
338
  ): Promise<StepResult> {
339
+ if (abortSignal?.aborted) {
340
+ throw new Error('Step canceled');
341
+ }
172
342
  if (dryRun) {
173
343
  const command = ExpressionEvaluator.evaluateString(step.run, context);
174
344
  logger.log(`[DRY RUN] Would execute shell command: ${command}`);
@@ -187,7 +357,7 @@ async function executeShellStep(
187
357
  );
188
358
  }
189
359
 
190
- const result = await executeShell(step, context, logger);
360
+ const result = await executeShell(step, context, logger, abortSignal);
191
361
 
192
362
  if (result.stdout) {
193
363
  logger.log(result.stdout.trim());
@@ -199,6 +369,8 @@ async function executeShellStep(
199
369
  stdout: result.stdout,
200
370
  stderr: result.stderr,
201
371
  exitCode: result.exitCode,
372
+ stdoutTruncated: result.stdoutTruncated,
373
+ stderrTruncated: result.stderrTruncated,
202
374
  },
203
375
  status: 'failed',
204
376
  error: `Shell command exited with code ${result.exitCode}: ${result.stderr}`,
@@ -210,11 +382,76 @@ async function executeShellStep(
210
382
  stdout: result.stdout,
211
383
  stderr: result.stderr,
212
384
  exitCode: result.exitCode,
385
+ stdoutTruncated: result.stdoutTruncated,
386
+ stderrTruncated: result.stderrTruncated,
213
387
  },
214
388
  status: 'success',
215
389
  };
216
390
  }
217
391
 
392
+ async function executeEngineStepWrapper(
393
+ step: EngineStep,
394
+ context: ExpressionContext,
395
+ logger: Logger,
396
+ options: {
397
+ abortSignal?: AbortSignal;
398
+ runId?: string;
399
+ stepExecutionId?: string;
400
+ artifactRoot?: string;
401
+ redactForStorage?: (value: unknown) => unknown;
402
+ }
403
+ ): Promise<StepResult> {
404
+ const engineResult = await executeEngineStep(step, context, {
405
+ logger,
406
+ abortSignal: options.abortSignal,
407
+ runId: options.runId,
408
+ stepExecutionId: options.stepExecutionId,
409
+ artifactRoot: options.artifactRoot,
410
+ redactForStorage: options.redactForStorage,
411
+ });
412
+
413
+ const output = {
414
+ summary: engineResult.summary ?? null,
415
+ stdout: engineResult.stdout,
416
+ stderr: engineResult.stderr,
417
+ exitCode: engineResult.exitCode,
418
+ stdoutTruncated: engineResult.stdoutTruncated,
419
+ stderrTruncated: engineResult.stderrTruncated,
420
+ summarySource: engineResult.summarySource,
421
+ summaryFormat: engineResult.summaryFormat,
422
+ artifactPath: engineResult.artifactPath,
423
+ };
424
+
425
+ if (engineResult.exitCode !== 0) {
426
+ return {
427
+ output,
428
+ status: 'failed',
429
+ error: `Engine exited with code ${engineResult.exitCode}: ${engineResult.stderr}`,
430
+ };
431
+ }
432
+
433
+ if (engineResult.summaryError) {
434
+ return {
435
+ output,
436
+ status: 'failed',
437
+ error: `Engine summary parse failed: ${engineResult.summaryError}`,
438
+ };
439
+ }
440
+
441
+ if (engineResult.summary === null) {
442
+ return {
443
+ output,
444
+ status: 'failed',
445
+ error: `Engine step "${step.id}" did not produce a structured summary`,
446
+ };
447
+ }
448
+
449
+ return {
450
+ output,
451
+ status: 'success',
452
+ };
453
+ }
454
+
218
455
  /**
219
456
  * Execute a file step (read, write, append)
220
457
  */
@@ -281,6 +518,12 @@ async function executeFileStep(
281
518
  if (!(await file.exists())) {
282
519
  throw new Error(`File not found: ${targetPath}`);
283
520
  }
521
+ const stat = fs.statSync(targetPath);
522
+ if (stat.size > LIMITS.MAX_FILE_READ_BYTES) {
523
+ throw new Error(
524
+ `File exceeds maximum read size of ${LIMITS.MAX_FILE_READ_BYTES} bytes: ${targetPath}`
525
+ );
526
+ }
284
527
  const content = await file.text();
285
528
  return {
286
529
  output: content,
@@ -289,7 +532,7 @@ async function executeFileStep(
289
532
  }
290
533
 
291
534
  case 'write': {
292
- if (!step.content) {
535
+ if (step.content === undefined) {
293
536
  throw new Error('Content is required for write operation');
294
537
  }
295
538
  const content = ExpressionEvaluator.evaluateString(step.content, context);
@@ -308,7 +551,7 @@ async function executeFileStep(
308
551
  }
309
552
 
310
553
  case 'append': {
311
- if (!step.content) {
554
+ if (step.content === undefined) {
312
555
  throw new Error('Content is required for append operation');
313
556
  }
314
557
  const content = ExpressionEvaluator.evaluateString(step.content, context);
@@ -319,7 +562,7 @@ async function executeFileStep(
319
562
  fs.mkdirSync(dir, { recursive: true });
320
563
  }
321
564
 
322
- fs.appendFileSync(targetPath, content);
565
+ await fs.promises.appendFile(targetPath, content);
323
566
 
324
567
  return {
325
568
  output: { path: targetPath, bytes: content.length },
@@ -332,94 +575,246 @@ async function executeFileStep(
332
575
  }
333
576
  }
334
577
 
578
+ async function readResponseTextWithLimit(
579
+ response: Response,
580
+ maxBytes: number
581
+ ): Promise<{ text: string; truncated: boolean }> {
582
+ if (!response.body) {
583
+ return { text: '', truncated: false };
584
+ }
585
+
586
+ const reader = response.body.getReader();
587
+ const decoder = new TextDecoder();
588
+ let text = '';
589
+ let bytesRead = 0;
590
+
591
+ while (true) {
592
+ const { value, done } = await reader.read();
593
+ if (done) break;
594
+ if (!value) continue;
595
+
596
+ if (bytesRead + value.byteLength > maxBytes) {
597
+ const allowed = maxBytes - bytesRead;
598
+ if (allowed > 0) {
599
+ text += decoder.decode(value.slice(0, allowed), { stream: true });
600
+ }
601
+ text += decoder.decode();
602
+ try {
603
+ await reader.cancel();
604
+ } catch {}
605
+ return { text, truncated: true };
606
+ }
607
+
608
+ bytesRead += value.byteLength;
609
+ text += decoder.decode(value, { stream: true });
610
+ }
611
+
612
+ text += decoder.decode();
613
+ return { text, truncated: false };
614
+ }
615
+
335
616
  /**
336
617
  * Execute an HTTP request step
337
618
  */
338
619
  async function executeRequestStep(
339
620
  step: RequestStep,
340
621
  context: ExpressionContext,
341
- _logger: Logger
622
+ _logger: Logger,
623
+ abortSignal?: AbortSignal
342
624
  ): Promise<StepResult> {
625
+ if (abortSignal?.aborted) {
626
+ throw new Error('Step canceled');
627
+ }
343
628
  const url = ExpressionEvaluator.evaluateString(step.url, context);
629
+ const requestTimeoutMs = step.timeout ?? TIMEOUTS.DEFAULT_HTTP_TIMEOUT_MS;
630
+ const controller = new AbortController();
631
+ const onAbort = () => controller.abort(new Error('Step canceled'));
632
+ if (abortSignal) {
633
+ if (abortSignal.aborted) {
634
+ onAbort();
635
+ } else {
636
+ abortSignal.addEventListener('abort', onAbort, { once: true });
637
+ }
638
+ }
639
+ const timeoutId = setTimeout(() => {
640
+ controller.abort(new Error(`Request timed out after ${requestTimeoutMs}ms`));
641
+ }, requestTimeoutMs);
344
642
 
345
- // Validate URL to prevent SSRF
346
- await validateRemoteUrl(url);
643
+ try {
644
+ // Validate URL to prevent SSRF
645
+ await validateRemoteUrl(url, { allowInsecure: step.allowInsecure });
347
646
 
348
- // Evaluate headers
349
- const headers: Record<string, string> = {};
350
- if (step.headers) {
351
- for (const [key, value] of Object.entries(step.headers)) {
352
- headers[key] = ExpressionEvaluator.evaluateString(value, context);
647
+ // Evaluate headers
648
+ const headers: Record<string, string> = {};
649
+ if (step.headers) {
650
+ for (const [key, value] of Object.entries(step.headers)) {
651
+ headers[key] = ExpressionEvaluator.evaluateString(value, context);
652
+ }
353
653
  }
354
- }
355
654
 
356
- // Evaluate body
357
- let body: string | undefined;
358
- if (step.body) {
359
- const evaluatedBody = ExpressionEvaluator.evaluateObject(step.body, context);
655
+ // Evaluate body
656
+ let body: string | undefined;
657
+ if (step.body !== undefined) {
658
+ const evaluatedBody = ExpressionEvaluator.evaluateObject(step.body, context);
360
659
 
361
- const contentType = Object.entries(headers).find(
362
- ([k]) => k.toLowerCase() === 'content-type'
363
- )?.[1];
660
+ const contentType = Object.entries(headers).find(
661
+ ([k]) => k.toLowerCase() === 'content-type'
662
+ )?.[1];
364
663
 
365
- if (contentType?.includes('application/x-www-form-urlencoded')) {
366
- if (typeof evaluatedBody === 'object' && evaluatedBody !== null) {
367
- const params = new URLSearchParams();
368
- for (const [key, value] of Object.entries(evaluatedBody)) {
369
- params.append(key, String(value));
664
+ if (contentType?.includes('application/x-www-form-urlencoded')) {
665
+ if (typeof evaluatedBody === 'object' && evaluatedBody !== null) {
666
+ const params = new URLSearchParams();
667
+ for (const [key, value] of Object.entries(evaluatedBody)) {
668
+ params.append(key, String(value));
669
+ }
670
+ body = params.toString();
671
+ } else {
672
+ body = String(evaluatedBody);
370
673
  }
371
- body = params.toString();
372
674
  } else {
373
- body = String(evaluatedBody);
675
+ // Default to JSON if not form-encoded and not already a string
676
+ body = typeof evaluatedBody === 'string' ? evaluatedBody : JSON.stringify(evaluatedBody);
677
+
678
+ // Auto-set Content-Type to application/json if not already set and body is an object
679
+ if (!contentType && typeof evaluatedBody === 'object' && evaluatedBody !== null) {
680
+ headers['Content-Type'] = 'application/json';
681
+ }
374
682
  }
375
- } else {
376
- // Default to JSON if not form-encoded and not already a string
377
- body = typeof evaluatedBody === 'string' ? evaluatedBody : JSON.stringify(evaluatedBody);
683
+ }
684
+
685
+ const maxRedirects = 5;
686
+ let response: Response | undefined;
687
+ let currentUrl = url;
688
+ let currentMethod = step.method;
689
+ let currentBody = body;
690
+ const currentHeaders: Record<string, string> = { ...headers };
691
+ const safeCrossOriginHeaders = new Set(['accept', 'accept-language', 'user-agent']);
692
+ const removeHeader = (name: string) => {
693
+ const target = name.toLowerCase();
694
+ for (const key of Object.keys(currentHeaders)) {
695
+ if (key.toLowerCase() === target) {
696
+ delete currentHeaders[key];
697
+ }
698
+ }
699
+ };
700
+ const stripCrossOriginHeaders = () => {
701
+ for (const key of Object.keys(currentHeaders)) {
702
+ if (!safeCrossOriginHeaders.has(key.toLowerCase())) {
703
+ delete currentHeaders[key];
704
+ }
705
+ }
706
+ };
707
+
708
+ for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
709
+ response = await fetch(currentUrl, {
710
+ method: currentMethod,
711
+ headers: currentHeaders,
712
+ body: currentBody,
713
+ redirect: 'manual',
714
+ signal: controller.signal,
715
+ });
716
+
717
+ if (response.status >= 300 && response.status < 400) {
718
+ const location = response.headers.get('location');
719
+ if (!location) {
720
+ break;
721
+ }
722
+ if (redirectCount >= maxRedirects) {
723
+ throw new Error(`Request exceeded maximum redirects (${maxRedirects})`);
724
+ }
725
+
726
+ const nextUrl = new URL(location, currentUrl).href;
727
+ await validateRemoteUrl(nextUrl, { allowInsecure: step.allowInsecure });
728
+
729
+ let nextMethod = currentMethod;
730
+ let nextBody = currentBody;
731
+ if (
732
+ response.status === 303 ||
733
+ ((response.status === 301 || response.status === 302) &&
734
+ currentMethod !== 'GET' &&
735
+ currentMethod !== 'HEAD')
736
+ ) {
737
+ nextMethod = 'GET';
738
+ nextBody = undefined;
739
+ removeHeader('content-type');
740
+ }
378
741
 
379
- // Auto-set Content-Type to application/json if not already set and body is an object
380
- if (!contentType && typeof evaluatedBody === 'object' && evaluatedBody !== null) {
381
- headers['Content-Type'] = 'application/json';
742
+ const fromOrigin = new URL(currentUrl).origin;
743
+ const toOrigin = new URL(nextUrl).origin;
744
+ if (fromOrigin !== toOrigin) {
745
+ removeHeader('authorization');
746
+ removeHeader('proxy-authorization');
747
+ removeHeader('cookie');
748
+ if (!step.allowInsecure) {
749
+ if (nextMethod !== 'GET' && nextMethod !== 'HEAD') {
750
+ throw new Error(
751
+ `Cross-origin redirect blocked for ${nextMethod} request. Set allowInsecure to true to override.`
752
+ );
753
+ }
754
+ stripCrossOriginHeaders();
755
+ }
756
+ }
757
+
758
+ currentMethod = nextMethod;
759
+ currentBody = nextBody;
760
+ currentUrl = nextUrl;
761
+ continue;
382
762
  }
763
+
764
+ break;
383
765
  }
384
- }
385
766
 
386
- const response = await fetch(url, {
387
- method: step.method,
388
- headers,
389
- body,
390
- });
767
+ if (!response) {
768
+ throw new Error('Request failed: No response received');
769
+ }
391
770
 
392
- const responseText = await response.text();
393
- let responseData: unknown;
771
+ const maxResponseBytes = LIMITS.MAX_HTTP_RESPONSE_BYTES;
772
+ const { text: responseText, truncated } = await readResponseTextWithLimit(
773
+ response,
774
+ maxResponseBytes
775
+ );
776
+ let responseData: unknown;
394
777
 
395
- try {
396
- responseData = JSON.parse(responseText);
397
- } catch {
398
- responseData = responseText;
399
- }
778
+ try {
779
+ responseData = JSON.parse(responseText);
780
+ } catch {
781
+ responseData = responseText;
782
+ }
400
783
 
401
- return {
402
- output: {
403
- status: response.status,
404
- statusText: response.statusText,
405
- headers: (() => {
406
- const h: Record<string, string> = {};
407
- response.headers.forEach((v, k) => {
408
- h[k] = v;
409
- });
410
- return h;
411
- })(),
412
- data: responseData,
413
- },
414
- status: response.ok ? 'success' : 'failed',
415
- error: response.ok
416
- ? undefined
417
- : `HTTP ${response.status}: ${response.statusText}${
418
- responseText
419
- ? `\nResponse Body: ${responseText.substring(0, 500)}${responseText.length > 500 ? '...' : ''}`
420
- : ''
421
- }`,
422
- };
784
+ return {
785
+ output: {
786
+ status: response.status,
787
+ statusText: response.statusText,
788
+ headers: (() => {
789
+ const h: Record<string, string> = {};
790
+ response.headers.forEach((v, k) => {
791
+ h[k] = v;
792
+ });
793
+ return h;
794
+ })(),
795
+ data: responseData,
796
+ truncated,
797
+ maxBytes: maxResponseBytes,
798
+ },
799
+ status: response.ok ? 'success' : 'failed',
800
+ error: response.ok
801
+ ? undefined
802
+ : `HTTP ${response.status}: ${response.statusText}${
803
+ responseText
804
+ ? `\nResponse Body: ${responseText.substring(0, 500)}${responseText.length > 500 ? '...' : ''}${
805
+ truncated ? ' [truncated]' : ''
806
+ }`
807
+ : truncated
808
+ ? '\nResponse Body: [truncated]'
809
+ : ''
810
+ }`,
811
+ };
812
+ } finally {
813
+ clearTimeout(timeoutId);
814
+ if (abortSignal) {
815
+ abortSignal.removeEventListener('abort', onAbort);
816
+ }
817
+ }
423
818
  }
424
819
 
425
820
  /**
@@ -428,8 +823,12 @@ async function executeRequestStep(
428
823
  async function executeHumanStep(
429
824
  step: HumanStep,
430
825
  context: ExpressionContext,
431
- logger: Logger
826
+ logger: Logger,
827
+ abortSignal?: AbortSignal
432
828
  ): Promise<StepResult> {
829
+ if (abortSignal?.aborted) {
830
+ throw new Error('Step canceled');
831
+ }
433
832
  const message = ExpressionEvaluator.evaluateString(step.message, context);
434
833
 
435
834
  // Check if we have a resume answer
@@ -510,8 +909,12 @@ async function executeHumanStep(
510
909
  async function executeSleepStep(
511
910
  step: SleepStep,
512
911
  context: ExpressionContext,
513
- _logger: Logger
912
+ _logger: Logger,
913
+ abortSignal?: AbortSignal
514
914
  ): Promise<StepResult> {
915
+ if (abortSignal?.aborted) {
916
+ throw new Error('Step canceled');
917
+ }
515
918
  const evaluated = ExpressionEvaluator.evaluate(step.duration.toString(), context);
516
919
  const duration = Number(evaluated);
517
920
 
@@ -519,7 +922,39 @@ async function executeSleepStep(
519
922
  throw new Error(`Invalid sleep duration: ${evaluated}`);
520
923
  }
521
924
 
522
- await new Promise((resolve) => setTimeout(resolve, duration));
925
+ // For durable sleeps, return waiting status with wake time
926
+ // Threshold: 60s (60000ms) - only durably wait if requested AND long enough
927
+ if (step.durable && duration >= 60000) {
928
+ const wakeAt = new Date(Date.now() + duration).toISOString();
929
+ return {
930
+ output: { durable: true, wakeAt, durationMs: duration },
931
+ status: 'waiting',
932
+ };
933
+ }
934
+
935
+ await new Promise((resolve, reject) => {
936
+ const onAbort = () => {
937
+ clearTimeout(timeoutId);
938
+ reject(new Error('Step canceled'));
939
+ };
940
+ const cleanup = () => {
941
+ if (abortSignal) {
942
+ abortSignal.removeEventListener('abort', onAbort);
943
+ }
944
+ };
945
+ const timeoutId = setTimeout(() => {
946
+ cleanup();
947
+ resolve(undefined);
948
+ }, duration);
949
+ if (abortSignal) {
950
+ if (abortSignal.aborted) {
951
+ onAbort();
952
+ cleanup();
953
+ return;
954
+ }
955
+ abortSignal.addEventListener('abort', onAbort, { once: true });
956
+ }
957
+ });
523
958
 
524
959
  return {
525
960
  output: { slept: duration },
@@ -533,9 +968,13 @@ async function executeScriptStep(
533
968
  step: ScriptStep,
534
969
  context: ExpressionContext,
535
970
  _logger: Logger,
536
- sandbox = SafeSandbox
971
+ sandbox = SafeSandbox,
972
+ abortSignal?: AbortSignal
537
973
  ): Promise<StepResult> {
538
974
  try {
975
+ if (abortSignal?.aborted) {
976
+ throw new Error('Step canceled');
977
+ }
539
978
  if (!step.allowInsecure) {
540
979
  throw new Error(
541
980
  'Script execution is disabled by default because Bun uses an insecure VM sandbox. ' +
@@ -543,6 +982,8 @@ async function executeScriptStep(
543
982
  );
544
983
  }
545
984
 
985
+ const requireFn = createRequire(import.meta.url);
986
+
546
987
  const result = await sandbox.execute(
547
988
  step.run,
548
989
  {
@@ -550,9 +991,13 @@ async function executeScriptStep(
550
991
  secrets: context.secrets,
551
992
  steps: context.steps,
552
993
  env: context.env,
994
+ // biome-ignore lint/suspicious/noExplicitAny: args is dynamic
995
+ args: (context as any).args,
996
+ require: requireFn,
553
997
  },
554
998
  {
555
999
  timeout: step.timeout,
1000
+ logger: _logger,
556
1001
  }
557
1002
  );
558
1003
 
@@ -584,7 +1029,13 @@ async function executeMemoryStep(
584
1029
  }
585
1030
 
586
1031
  try {
587
- const { adapter, resolvedModel } = getAdapterFn(step.model || 'local');
1032
+ const requestedModel = step.model || 'local';
1033
+ if (requestedModel !== 'local' && !requestedModel.startsWith('local:')) {
1034
+ throw new Error(
1035
+ 'Memory steps only support local embeddings. Use model: local (or local:<model>) or omit the model.'
1036
+ );
1037
+ }
1038
+ const { adapter, resolvedModel } = getAdapterFn(requestedModel);
588
1039
  if (!adapter.embed) {
589
1040
  throw new Error(`Provider for model ${step.model || 'local'} does not support embeddings`);
590
1041
  }