keystone-cli 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "keystone-cli",
3
- "version": "0.6.0",
3
+ "version": "0.6.1",
4
4
  "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.ts CHANGED
@@ -9,7 +9,7 @@ import architectAgent from './templates/agents/keystone-architect.md' with { typ
9
9
  // Default templates
10
10
  import scaffoldWorkflow from './templates/scaffold-feature.yaml' with { type: 'text' };
11
11
 
12
- import { WorkflowDb } from './db/workflow-db.ts';
12
+ import { WorkflowDb, type WorkflowRun } from './db/workflow-db.ts';
13
13
  import { WorkflowParser } from './parser/workflow-parser.ts';
14
14
  import { ConfigLoader } from './utils/config-loader.ts';
15
15
  import { ConsoleLogger } from './utils/logger.ts';
@@ -279,7 +279,79 @@ program
279
279
  }
280
280
  });
281
281
 
282
- // ... (optimize command remains here) ...
282
+ // ===== keystone workflows =====
283
+ program
284
+ .command('workflows')
285
+ .description('List available workflows')
286
+ .action(() => {
287
+ const workflows = WorkflowRegistry.listWorkflows();
288
+ if (workflows.length === 0) {
289
+ console.log('No workflows found. Run "keystone init" to seed default workflows.');
290
+ return;
291
+ }
292
+
293
+ console.log('\nšŸ›ļø Available Workflows:');
294
+ for (const w of workflows) {
295
+ console.log(`\n ${w.name}`);
296
+ if (w.description) {
297
+ console.log(` ${w.description}`);
298
+ }
299
+ }
300
+ console.log('');
301
+ });
302
+
303
+ // ===== keystone optimize =====
304
+ program
305
+ .command('optimize')
306
+ .description('Optimize a specific step in a workflow using iterative evaluation')
307
+ .argument('<workflow>', 'Workflow name or path to workflow file')
308
+ .requiredOption('-t, --target <step_id>', 'Target step ID to optimize')
309
+ .option('-n, --iterations <number>', 'Number of optimization iterations', '5')
310
+ .option('-i, --input <key=value...>', 'Input values for evaluation')
311
+ .action(async (workflowPath, options) => {
312
+ try {
313
+ const { OptimizationRunner } = await import('./runner/optimization-runner.ts');
314
+ const resolvedPath = WorkflowRegistry.resolvePath(workflowPath);
315
+ const workflow = WorkflowParser.loadWorkflow(resolvedPath);
316
+
317
+ // Parse inputs
318
+ const inputs: Record<string, unknown> = {};
319
+ if (options.input) {
320
+ for (const pair of options.input) {
321
+ const index = pair.indexOf('=');
322
+ if (index > 0) {
323
+ const key = pair.slice(0, index);
324
+ const value = pair.slice(index + 1);
325
+ try {
326
+ inputs[key] = JSON.parse(value);
327
+ } catch {
328
+ inputs[key] = value;
329
+ }
330
+ }
331
+ }
332
+ }
333
+
334
+ const runner = new OptimizationRunner(workflow, {
335
+ workflowPath: resolvedPath,
336
+ targetStepId: options.target,
337
+ iterations: Number.parseInt(options.iterations, 10),
338
+ inputs,
339
+ });
340
+
341
+ console.log('šŸ›ļø Keystone Prompt Optimization');
342
+ const { bestPrompt, bestScore } = await runner.optimize();
343
+
344
+ console.log('\n✨ Optimization Complete!');
345
+ console.log(`šŸ† Best Score: ${bestScore}/100`);
346
+ console.log('\nBest Prompt/Command:');
347
+ console.log(''.padEnd(80, '-'));
348
+ console.log(bestPrompt);
349
+ console.log(''.padEnd(80, '-'));
350
+ } catch (error) {
351
+ console.error('āœ— Optimization failed:', error instanceof Error ? error.message : error);
352
+ process.exit(1);
353
+ }
354
+ });
283
355
 
284
356
  // ===== keystone resume =====
285
357
  program
@@ -347,40 +419,180 @@ program
347
419
  }
348
420
  });
349
421
 
350
- // ... (other commands) ...
351
-
352
- // ===== keystone maintenance =====
422
+ // ===== keystone history =====
353
423
  program
354
- .command('maintenance')
355
- .description('Perform database maintenance (prune old runs and vacuum)')
356
- .option('--days <days>', 'Delete runs older than this many days', '30')
424
+ .command('history')
425
+ .description('Show recent workflow runs')
426
+ .option('-l, --limit <number>', 'Limit the number of runs to show', '50')
357
427
  .action(async (options) => {
358
428
  try {
359
- const days = Number.parseInt(options.days, 10);
360
- if (Number.isNaN(days) || days < 0) {
361
- console.error('āœ— Invalid days value. Must be a positive number.');
362
- process.exit(1);
429
+ const db = new WorkflowDb();
430
+ const limit = Number.parseInt(options.limit, 10);
431
+ const runs = await db.listRuns(limit);
432
+ db.close();
433
+
434
+ if (runs.length === 0) {
435
+ console.log('No workflow runs found.');
436
+ return;
363
437
  }
364
438
 
365
- console.log('🧹 Starting maintenance...');
439
+ console.log('\nšŸ›ļø Workflow Run History:');
440
+ console.log(''.padEnd(100, '-'));
441
+ console.log(
442
+ `${'ID'.padEnd(10)} ${'Workflow'.padEnd(25)} ${'Status'.padEnd(15)} ${'Started At'}`
443
+ );
444
+ console.log(''.padEnd(100, '-'));
445
+
446
+ for (const run of runs) {
447
+ const id = run.id.slice(0, 8);
448
+ const status = run.status;
449
+ const color =
450
+ status === 'success' ? '\x1b[32m' : status === 'failed' ? '\x1b[31m' : '\x1b[33m';
451
+ const reset = '\x1b[0m';
452
+
453
+ console.log(
454
+ `${id.padEnd(10)} ${run.workflow_name.padEnd(25)} ${color}${status.padEnd(
455
+ 15
456
+ )}${reset} ${new Date(run.started_at).toLocaleString()}`
457
+ );
458
+ }
459
+ console.log('');
460
+ } catch (error) {
461
+ console.error('āœ— Failed to list runs:', error instanceof Error ? error.message : error);
462
+ process.exit(1);
463
+ }
464
+ });
465
+
466
+ // ===== keystone logs =====
467
+ program
468
+ .command('logs')
469
+ .description('Show logs for a specific workflow run')
470
+ .argument('<run_id>', 'Run ID to show logs for')
471
+ .option('-v, --verbose', 'Show detailed step outputs')
472
+ .action(async (runId, options) => {
473
+ try {
366
474
  const db = new WorkflowDb();
475
+ const run = await db.getRun(runId);
367
476
 
368
- console.log(` Pruning runs older than ${days} days...`);
369
- const deleted = await db.pruneRuns(days);
370
- console.log(` āœ“ Deleted ${deleted} run(s)`);
477
+ if (!run) {
478
+ // Try searching by short ID
479
+ const allRuns = await db.listRuns(200);
480
+ const matching = allRuns.find((r) => r.id.startsWith(runId));
481
+ if (matching) {
482
+ const detailedRun = await db.getRun(matching.id);
483
+ if (detailedRun) {
484
+ await showRunLogs(detailedRun, db, !!options.verbose);
485
+ db.close();
486
+ return;
487
+ }
488
+ }
371
489
 
372
- console.log(' Vacuuming database (reclaiming space)...');
373
- await db.vacuum();
374
- console.log(' āœ“ Vacuum complete');
490
+ console.error(`āœ— Run not found: ${runId}`);
491
+ db.close();
492
+ process.exit(1);
493
+ }
375
494
 
495
+ await showRunLogs(run, db, !!options.verbose);
376
496
  db.close();
377
- console.log('\n✨ Maintenance completed successfully!');
378
497
  } catch (error) {
379
- console.error('āœ— Maintenance failed:', error instanceof Error ? error.message : error);
498
+ console.error('āœ— Failed to show logs:', error instanceof Error ? error.message : error);
380
499
  process.exit(1);
381
500
  }
382
501
  });
383
502
 
503
+ async function showRunLogs(run: WorkflowRun, db: WorkflowDb, verbose: boolean) {
504
+ console.log(`\nšŸ›ļø Run: ${run.workflow_name} (${run.id})`);
505
+ console.log(` Status: ${run.status}`);
506
+ console.log(` Started: ${new Date(run.started_at).toLocaleString()}`);
507
+ if (run.completed_at) {
508
+ console.log(` Completed: ${new Date(run.completed_at).toLocaleString()}`);
509
+ }
510
+
511
+ const steps = await db.getStepsByRun(run.id);
512
+ console.log(`\nSteps (${steps.length}):`);
513
+ console.log(''.padEnd(100, '-'));
514
+
515
+ for (const step of steps) {
516
+ const statusColor =
517
+ step.status === 'success' ? '\x1b[32m' : step.status === 'failed' ? '\x1b[31m' : '\x1b[33m';
518
+ const reset = '\x1b[0m';
519
+
520
+ let label = step.step_id;
521
+ if (step.iteration_index !== null) {
522
+ label += ` [${step.iteration_index}]`;
523
+ }
524
+
525
+ console.log(`${statusColor}${step.status.toUpperCase().padEnd(10)}${reset} ${label}`);
526
+
527
+ if (step.error) {
528
+ console.log(` \x1b[31mError: ${step.error}\x1b[0m`);
529
+ }
530
+
531
+ if (verbose && step.output) {
532
+ try {
533
+ const output = JSON.parse(step.output);
534
+ console.log(
535
+ ` Output: ${JSON.stringify(output, null, 2).replace(/\n/g, '\n ')}`
536
+ );
537
+ } catch {
538
+ console.log(` Output: ${step.output}`);
539
+ }
540
+ }
541
+ }
542
+
543
+ if (run.outputs) {
544
+ console.log('\nFinal Outputs:');
545
+ try {
546
+ const parsed = JSON.parse(run.outputs);
547
+ console.log(JSON.stringify(parsed, null, 2));
548
+ } catch {
549
+ console.log(run.outputs);
550
+ }
551
+ }
552
+
553
+ if (run.error) {
554
+ console.log(`\n\x1b[31mWorkflow Error:\x1b[0m ${run.error}`);
555
+ }
556
+ }
557
+
558
+ // ===== keystone prune / maintenance =====
559
+ async function performMaintenance(days: number) {
560
+ try {
561
+ console.log(`🧹 Starting maintenance (pruning runs older than ${days} days)...`);
562
+ const db = new WorkflowDb();
563
+ const count = await db.pruneRuns(days);
564
+ console.log(` āœ“ Pruned ${count} old run(s)`);
565
+
566
+ console.log(' Vacuuming database (reclaiming space)...');
567
+ await db.vacuum();
568
+ console.log(' āœ“ Vacuum complete');
569
+
570
+ db.close();
571
+ console.log('\n✨ Maintenance completed successfully!');
572
+ } catch (error) {
573
+ console.error('āœ— Maintenance failed:', error instanceof Error ? error.message : error);
574
+ process.exit(1);
575
+ }
576
+ }
577
+
578
+ program
579
+ .command('prune')
580
+ .description('Delete old workflow runs from the database (alias for maintenance)')
581
+ .option('--days <number>', 'Days to keep', '30')
582
+ .action(async (options) => {
583
+ const days = Number.parseInt(options.days, 10);
584
+ await performMaintenance(days);
585
+ });
586
+
587
+ program
588
+ .command('maintenance')
589
+ .description('Perform database maintenance (prune old runs and vacuum)')
590
+ .option('--days <days>', 'Delete runs older than this many days', '30')
591
+ .action(async (options) => {
592
+ const days = Number.parseInt(options.days, 10);
593
+ await performMaintenance(days);
594
+ });
595
+
384
596
  // ===== keystone ui =====
385
597
  program
386
598
  .command('ui')
@@ -1,5 +1,7 @@
1
1
  import type { Database } from 'bun:sqlite';
2
2
  import { randomUUID } from 'node:crypto';
3
+ import { existsSync, mkdirSync } from 'node:fs';
4
+ import { dirname } from 'node:path';
3
5
  import * as sqliteVec from 'sqlite-vec';
4
6
  import './sqlite-setup.ts';
5
7
 
@@ -22,6 +24,10 @@ export class MemoryDb {
22
24
  this.db = cached.db;
23
25
  } else {
24
26
  const { Database } = require('bun:sqlite');
27
+ const dir = dirname(dbPath);
28
+ if (!existsSync(dir)) {
29
+ mkdirSync(dir, { recursive: true });
30
+ }
25
31
  this.db = new Database(dbPath, { create: true });
26
32
 
27
33
  // Load sqlite-vec extension
@@ -0,0 +1,47 @@
1
+ import { afterEach, describe, expect, it, mock, spyOn } from 'bun:test';
2
+ import type { Logger } from '../utils/logger';
3
+ import { setupSqlite } from './sqlite-setup';
4
+
5
+ describe('setupSqlite', () => {
6
+ const originalPlatform = process.platform;
7
+
8
+ afterEach(() => {
9
+ Object.defineProperty(process, 'platform', {
10
+ value: originalPlatform,
11
+ });
12
+ });
13
+
14
+ it('does nothing on non-darwin platforms', () => {
15
+ Object.defineProperty(process, 'platform', { value: 'linux' });
16
+ const logger: Logger = {
17
+ log: mock(() => {}),
18
+ warn: mock(() => {}),
19
+ error: mock(() => {}),
20
+ info: mock(() => {}),
21
+ };
22
+ setupSqlite(logger);
23
+ expect(logger.log).not.toHaveBeenCalled();
24
+ expect(logger.warn).not.toHaveBeenCalled();
25
+ });
26
+
27
+ it('logs warning if no custom sqlite found on darwin', () => {
28
+ Object.defineProperty(process, 'platform', { value: 'darwin' });
29
+ const logger: Logger = {
30
+ log: mock(() => {}),
31
+ warn: mock(() => {}),
32
+ error: mock(() => {}),
33
+ info: mock(() => {}),
34
+ };
35
+
36
+ // Mock Bun.spawnSync for brew
37
+ const spawnSpy = spyOn(Bun, 'spawnSync').mockImplementation(
38
+ () => ({ success: false }) as unknown as ReturnType<typeof Bun.spawnSync>
39
+ );
40
+
41
+ try {
42
+ setupSqlite(logger);
43
+ } finally {
44
+ spawnSpy.mockRestore();
45
+ }
46
+ });
47
+ });
@@ -1,4 +1,6 @@
1
1
  import { Database } from 'bun:sqlite';
2
+ import { existsSync, mkdirSync } from 'node:fs';
3
+ import { dirname } from 'node:path';
2
4
  import './sqlite-setup.ts';
3
5
  import {
4
6
  StepStatus as StepStatusConst,
@@ -40,6 +42,10 @@ export class WorkflowDb {
40
42
  private db: Database;
41
43
 
42
44
  constructor(public readonly dbPath = '.keystone/state.db') {
45
+ const dir = dirname(dbPath);
46
+ if (!existsSync(dir)) {
47
+ mkdirSync(dir, { recursive: true });
48
+ }
43
49
  this.db = new Database(dbPath, { create: true });
44
50
  this.db.exec('PRAGMA journal_mode = WAL;'); // Write-ahead logging
45
51
  this.db.exec('PRAGMA foreign_keys = ON;'); // Enable foreign key enforcement
@@ -1,19 +1,27 @@
1
- import { describe, expect, test } from 'bun:test';
1
+ import { describe, expect, mock, spyOn, test } from 'bun:test';
2
+ import * as cp from 'node:child_process';
3
+ import * as fs from 'node:fs';
2
4
  import { PassThrough } from 'node:stream';
3
5
  import type { ExpressionContext } from '../expression/evaluator.ts';
4
6
  import type { Step } from '../parser/schema.ts';
7
+ import type { Logger } from '../utils/logger.ts';
5
8
  import { DebugRepl } from './debug-repl.ts';
6
9
 
7
10
  describe('DebugRepl', () => {
8
11
  const mockContext: ExpressionContext = { inputs: { foo: 'bar' } };
9
- // biome-ignore lint/suspicious/noExplicitAny: mock step typing
10
- const mockStep: Step = { id: 'test-step', type: 'shell', run: 'echo "fail"' } as any;
12
+ // mock step typing
13
+ const mockStep: Step = { id: 'test-step', type: 'shell', run: 'echo "fail"' } as unknown as Step;
11
14
  const mockError = new Error('Test Error');
12
15
 
13
16
  test('should resolve with "skip" when user types "skip"', async () => {
14
17
  const input = new PassThrough();
15
18
  const output = new PassThrough();
16
- const mockLogger = { log: () => {}, error: () => {}, warn: () => {} };
19
+ const mockLogger: Logger = {
20
+ log: mock(() => {}),
21
+ error: mock(() => {}),
22
+ warn: mock(() => {}),
23
+ info: mock(() => {}),
24
+ };
17
25
  const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
18
26
 
19
27
  const promise = repl.start();
@@ -30,7 +38,12 @@ describe('DebugRepl', () => {
30
38
  test('should resolve with "retry" when user types "retry"', async () => {
31
39
  const input = new PassThrough();
32
40
  const output = new PassThrough();
33
- const mockLogger = { log: () => {}, error: () => {}, warn: () => {} };
41
+ const mockLogger: Logger = {
42
+ log: mock(() => {}),
43
+ error: mock(() => {}),
44
+ warn: mock(() => {}),
45
+ info: mock(() => {}),
46
+ };
34
47
  const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
35
48
 
36
49
  const promise = repl.start();
@@ -48,7 +61,12 @@ describe('DebugRepl', () => {
48
61
  test('should resolve with "continue_failure" when user types "exit"', async () => {
49
62
  const input = new PassThrough();
50
63
  const output = new PassThrough();
51
- const mockLogger = { log: () => {}, error: () => {}, warn: () => {} };
64
+ const mockLogger: Logger = {
65
+ log: mock(() => {}),
66
+ error: mock(() => {}),
67
+ warn: mock(() => {}),
68
+ info: mock(() => {}),
69
+ };
52
70
  const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
53
71
 
54
72
  const promise = repl.start();
@@ -60,6 +78,137 @@ describe('DebugRepl', () => {
60
78
  expect(result).toEqual({ type: 'continue_failure' });
61
79
  });
62
80
 
81
+ test('should handle "context" command', async () => {
82
+ const input = new PassThrough();
83
+ const output = new PassThrough();
84
+ const mockLogger: Logger = {
85
+ log: mock(() => {}),
86
+ error: mock(() => {}),
87
+ warn: mock(() => {}),
88
+ info: mock(() => {}),
89
+ };
90
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
91
+
92
+ repl.start();
93
+
94
+ await new Promise((r) => setTimeout(r, 10));
95
+ input.write('context\n');
96
+ await new Promise((r) => setTimeout(r, 10));
97
+
98
+ expect(mockLogger.log).toHaveBeenCalled();
99
+ // biome-ignore lint/suspicious/noExplicitAny: accessing mock property
100
+ const lastCall = (mockLogger.log as unknown as any).mock.calls.find((call: any[]) =>
101
+ String(call[0]).includes('foo')
102
+ );
103
+ expect(lastCall?.[0]).toContain('bar');
104
+ input.write('exit\n');
105
+ });
106
+
107
+ test('should handle "eval" command', async () => {
108
+ const input = new PassThrough();
109
+ const output = new PassThrough();
110
+ const mockLogger: Logger = {
111
+ log: mock(() => {}),
112
+ error: mock(() => {}),
113
+ warn: mock(() => {}),
114
+ info: mock(() => {}),
115
+ };
116
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
117
+
118
+ repl.start();
119
+
120
+ await new Promise((r) => setTimeout(r, 10));
121
+ input.write('eval inputs.foo\n');
122
+ await new Promise((r) => setTimeout(r, 10));
123
+
124
+ expect(mockLogger.log).toHaveBeenCalledWith('bar');
125
+ input.write('exit\n');
126
+ });
127
+
128
+ test('should handle "eval" command with error', async () => {
129
+ const input = new PassThrough();
130
+ const output = new PassThrough();
131
+ const mockLogger: Logger = {
132
+ log: mock(() => {}),
133
+ error: mock(() => {}),
134
+ warn: mock(() => {}),
135
+ info: mock(() => {}),
136
+ };
137
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
138
+
139
+ repl.start();
140
+
141
+ await new Promise((r) => setTimeout(r, 10));
142
+ input.write('eval nonExistent.bar\n');
143
+ await new Promise((r) => setTimeout(r, 10));
144
+
145
+ expect(mockLogger.error).toHaveBeenCalled();
146
+ input.write('exit\n');
147
+ });
148
+
149
+ test('should handle "eval" command without arguments', async () => {
150
+ const input = new PassThrough();
151
+ const output = new PassThrough();
152
+ const mockLogger: Logger = {
153
+ log: mock(() => {}),
154
+ error: mock(() => {}),
155
+ warn: mock(() => {}),
156
+ info: mock(() => {}),
157
+ };
158
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
159
+
160
+ repl.start();
161
+
162
+ await new Promise((r) => setTimeout(r, 10));
163
+ input.write('eval\n');
164
+ await new Promise((r) => setTimeout(r, 10));
165
+
166
+ expect(mockLogger.log).toHaveBeenCalledWith('Usage: eval <expression>');
167
+ input.write('exit\n');
168
+ });
169
+
170
+ test('should handle unknown command', async () => {
171
+ const input = new PassThrough();
172
+ const output = new PassThrough();
173
+ const mockLogger: Logger = {
174
+ log: mock(() => {}),
175
+ error: mock(() => {}),
176
+ warn: mock(() => {}),
177
+ info: mock(() => {}),
178
+ };
179
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
180
+
181
+ repl.start();
182
+
183
+ await new Promise((r) => setTimeout(r, 10));
184
+ input.write('unknown_cmd\n');
185
+ await new Promise((r) => setTimeout(r, 10));
186
+
187
+ expect(mockLogger.log).toHaveBeenCalledWith('Unknown command: unknown_cmd');
188
+ input.write('exit\n');
189
+ });
190
+
191
+ test('should handle empty input', async () => {
192
+ const input = new PassThrough();
193
+ const output = new PassThrough();
194
+ const mockLogger: Logger = {
195
+ log: mock(() => {}),
196
+ error: mock(() => {}),
197
+ warn: mock(() => {}),
198
+ info: mock(() => {}),
199
+ };
200
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
201
+
202
+ repl.start();
203
+
204
+ await new Promise((r) => setTimeout(r, 10));
205
+ input.write('\n');
206
+ await new Promise((r) => setTimeout(r, 10));
207
+
208
+ expect(mockLogger.log).not.toHaveBeenCalledWith('Unknown command: ');
209
+ input.write('exit\n');
210
+ });
211
+
63
212
  test('should parse shell commands correctly', () => {
64
213
  // We import the function dynamically to test it, or we assume it's exported
65
214
  const { parseShellCommand } = require('./debug-repl.ts');
@@ -71,4 +220,89 @@ describe('DebugRepl', () => {
71
220
  expect(parseShellCommand('editor -a -b -c')).toEqual(['editor', '-a', '-b', '-c']);
72
221
  expect(parseShellCommand(' spaced command ')).toEqual(['spaced', 'command']);
73
222
  });
223
+
224
+ test('should handle "edit" command and update step', async () => {
225
+ const input = new PassThrough();
226
+ const output = new PassThrough();
227
+ const mockLogger: Logger = {
228
+ log: mock(() => {}),
229
+ error: mock(() => {}),
230
+ warn: mock(() => {}),
231
+ info: mock(() => {}),
232
+ };
233
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
234
+
235
+ const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
236
+ // biome-ignore lint/suspicious/noExplicitAny: mocking child_process
237
+ () => ({ error: null, status: 0 }) as any
238
+ );
239
+ const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
240
+ const updatedStep = { ...mockStep, run: 'echo "fixed"' };
241
+ const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation((() =>
242
+ JSON.stringify(updatedStep)) as unknown as typeof fs.readFileSync);
243
+ const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
244
+ const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
245
+
246
+ try {
247
+ repl.start();
248
+ await new Promise((r) => setTimeout(r, 50));
249
+ input.write('edit\n');
250
+ await new Promise((r) => setTimeout(r, 50));
251
+
252
+ expect(mockLogger.log).toHaveBeenCalledWith(
253
+ expect.stringContaining('Step definition updated')
254
+ );
255
+
256
+ input.write('retry\n');
257
+ await new Promise((r) => setTimeout(r, 50));
258
+ } finally {
259
+ spySpawnSync.mockRestore();
260
+ spyWriteFileSync.mockRestore();
261
+ spyReadFileSync.mockRestore();
262
+ spyExistsSync.mockRestore();
263
+ spyUnlinkSync.mockRestore();
264
+ }
265
+ });
266
+
267
+ test('should handle "edit" command with parse error', async () => {
268
+ const input = new PassThrough();
269
+ const output = new PassThrough();
270
+ const mockLogger: Logger = {
271
+ log: mock(() => {}),
272
+ error: mock(() => {}),
273
+ warn: mock(() => {}),
274
+ info: mock(() => {}),
275
+ };
276
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
277
+
278
+ const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
279
+ // biome-ignore lint/suspicious/noExplicitAny: mocking child_process
280
+ () => ({ error: null, status: 0 }) as any
281
+ );
282
+ const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
283
+ const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation(
284
+ (() => 'invalid json') as unknown as typeof fs.readFileSync
285
+ );
286
+ const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
287
+ const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
288
+
289
+ try {
290
+ repl.start();
291
+ await new Promise((r) => setTimeout(r, 50));
292
+ input.write('edit\n');
293
+ await new Promise((r) => setTimeout(r, 50));
294
+
295
+ expect(mockLogger.error).toHaveBeenCalledWith(
296
+ expect.stringContaining('Failed to parse JSON')
297
+ );
298
+ input.write('exit\n');
299
+ await new Promise((r) => setTimeout(r, 50));
300
+ } finally {
301
+ spySpawnSync.mockRestore();
302
+ spyWriteFileSync.mockRestore();
303
+ spyReadFileSync.mockRestore();
304
+ spyExistsSync.mockRestore();
305
+ spyUnlinkSync.mockRestore();
306
+ }
307
+ });
74
308
  });
@@ -105,7 +105,9 @@ describe('AnthropicAdapter', () => {
105
105
  // @ts-ignore
106
106
  const fetchMock = global.fetch as MockFetch;
107
107
  // @ts-ignore
108
- const [url, init] = fetchMock.mock.calls[0];
108
+ // @ts-ignore
109
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
110
+ const [url, init] = fetchMock.mock.calls[0] as [string, any];
109
111
 
110
112
  expect(url).toBe('https://api.anthropic.com/v1/messages');
111
113
  expect(init.headers['x-api-key']).toBe('fake-anthropic-key');
@@ -179,7 +181,8 @@ describe('AnthropicAdapter', () => {
179
181
  ]);
180
182
 
181
183
  // @ts-ignore
182
- const init = global.fetch.mock.calls[0][1];
184
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
185
+ const init = global.fetch.mock.calls[0][1] as any;
183
186
  const body = JSON.parse(init.body);
184
187
  expect(body.messages[0].role).toBe('assistant');
185
188
  expect(body.messages[0].content).toHaveLength(2);
@@ -208,7 +211,8 @@ describe('AnthropicAdapter', () => {
208
211
  ]);
209
212
 
210
213
  // @ts-ignore
211
- const init = global.fetch.mock.calls[0][1];
214
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
215
+ const init = global.fetch.mock.calls[0][1] as any;
212
216
  const body = JSON.parse(init.body);
213
217
  expect(body.messages[0].role).toBe('user');
214
218
  expect(body.messages[0].content[0]).toEqual({
@@ -255,7 +259,9 @@ describe('CopilotAdapter', () => {
255
259
  // @ts-ignore
256
260
  const fetchMock = global.fetch as MockFetch;
257
261
  // @ts-ignore
258
- const [url, init] = fetchMock.mock.calls[0];
262
+ // @ts-ignore
263
+ // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
264
+ const [url, init] = fetchMock.mock.calls[0] as [string, any];
259
265
  expect(url).toBe('https://api.githubcopilot.com/chat/completions');
260
266
  expect(init.headers.Authorization).toBe('Bearer mock-token');
261
267
  spy.mockRestore();
@@ -13,6 +13,7 @@ import * as dns from 'node:dns/promises';
13
13
  import { mkdirSync, rmSync } from 'node:fs';
14
14
  import { tmpdir } from 'node:os';
15
15
  import { join } from 'node:path';
16
+ import type { MemoryDb } from '../db/memory-db';
16
17
  import type { ExpressionContext } from '../expression/evaluator';
17
18
  import type {
18
19
  FileStep,
@@ -22,6 +23,8 @@ import type {
22
23
  SleepStep,
23
24
  WorkflowStep,
24
25
  } from '../parser/schema';
26
+ import type { SafeSandbox } from '../utils/sandbox';
27
+ import type { getAdapter } from './llm-adapter';
25
28
  import { executeStep } from './step-executor';
26
29
 
27
30
  // Mock executeLlmStep
@@ -227,6 +230,196 @@ describe('step-executor', () => {
227
230
  }
228
231
  }
229
232
  });
233
+
234
+ it('should block path traversal outside cwd by default', async () => {
235
+ const outsidePath = join(process.cwd(), '..', 'outside.txt');
236
+ const step: FileStep = {
237
+ id: 'f1',
238
+ type: 'file',
239
+ needs: [],
240
+ op: 'read',
241
+ path: outsidePath,
242
+ };
243
+
244
+ const result = await executeStep(step, context);
245
+ expect(result.status).toBe('failed');
246
+ expect(result.error).toContain('Access denied');
247
+ });
248
+
249
+ it('should block path traversal with .. inside path resolving outside', async () => {
250
+ const outsidePath = 'foo/../../passwd';
251
+ const step: FileStep = {
252
+ id: 'f1',
253
+ type: 'file',
254
+ needs: [],
255
+ op: 'read',
256
+ path: outsidePath,
257
+ };
258
+
259
+ const result = await executeStep(step, context);
260
+ expect(result.status).toBe('failed');
261
+ expect(result.error).toContain('Access denied');
262
+ });
263
+ });
264
+
265
+ describe('script', () => {
266
+ const mockSandbox = {
267
+ execute: mock((code) => {
268
+ if (code === 'fail') throw new Error('Script failed');
269
+ return Promise.resolve('script-result');
270
+ }),
271
+ };
272
+
273
+ it('should fail if allowInsecure is not set', async () => {
274
+ // @ts-ignore
275
+ const step = {
276
+ id: 's1',
277
+ type: 'script',
278
+ run: 'console.log("hello")',
279
+ };
280
+ const result = await executeStep(step, context, undefined, {
281
+ sandbox: mockSandbox as unknown as typeof SafeSandbox,
282
+ });
283
+ expect(result.status).toBe('failed');
284
+ expect(result.error).toContain('Script execution is disabled by default');
285
+ });
286
+
287
+ it('should execute script if allowInsecure is true', async () => {
288
+ // @ts-ignore
289
+ const step = {
290
+ id: 's1',
291
+ type: 'script',
292
+ run: 'console.log("hello")',
293
+ allowInsecure: true,
294
+ };
295
+ const result = await executeStep(step, context, undefined, {
296
+ sandbox: mockSandbox as unknown as typeof SafeSandbox,
297
+ });
298
+ expect(result.status).toBe('success');
299
+ expect(result.output).toBe('script-result');
300
+ });
301
+
302
+ it('should handle script failure', async () => {
303
+ // @ts-ignore
304
+ const step = {
305
+ id: 's1',
306
+ type: 'script',
307
+ run: 'fail',
308
+ allowInsecure: true,
309
+ };
310
+ const result = await executeStep(step, context, undefined, {
311
+ sandbox: mockSandbox as unknown as typeof SafeSandbox,
312
+ });
313
+ expect(result.status).toBe('failed');
314
+ expect(result.error).toBe('Script failed');
315
+ });
316
+ });
317
+
318
+ describe('memory', () => {
319
+ const mockMemoryDb = {
320
+ store: mock(() => Promise.resolve('mem-id')),
321
+ search: mock(() => Promise.resolve([{ content: 'found', similarity: 0.9 }])),
322
+ };
323
+
324
+ const mockGetAdapter = mock((model) => {
325
+ if (model === 'no-embed') return { adapter: {}, resolvedModel: model };
326
+ return {
327
+ adapter: {
328
+ embed: mock((text) => Promise.resolve([0.1, 0.2, 0.3])),
329
+ },
330
+ resolvedModel: model,
331
+ };
332
+ });
333
+
334
+ it('should fail if memoryDb is not provided', async () => {
335
+ // @ts-ignore
336
+ const step = { id: 'm1', type: 'memory', op: 'store', text: 'foo' };
337
+ const result = await executeStep(step, context, undefined, {
338
+ getAdapter: mockGetAdapter as unknown as typeof getAdapter,
339
+ });
340
+ expect(result.status).toBe('failed');
341
+ expect(result.error).toBe('Memory database not initialized');
342
+ });
343
+
344
+ it('should fail if adapter does not support embedding', async () => {
345
+ // @ts-ignore
346
+ const step = { id: 'm1', type: 'memory', op: 'store', text: 'foo', model: 'no-embed' };
347
+ // @ts-ignore
348
+ const result = await executeStep(step, context, undefined, {
349
+ memoryDb: mockMemoryDb as unknown as MemoryDb,
350
+ getAdapter: mockGetAdapter as unknown as typeof getAdapter,
351
+ });
352
+ expect(result.status).toBe('failed');
353
+ expect(result.error).toContain('does not support embeddings');
354
+ });
355
+
356
+ it('should store memory', async () => {
357
+ // @ts-ignore
358
+ const step = {
359
+ id: 'm1',
360
+ type: 'memory',
361
+ op: 'store',
362
+ text: 'foo',
363
+ metadata: { source: 'test' },
364
+ };
365
+ // @ts-ignore
366
+ const result = await executeStep(step, context, undefined, {
367
+ memoryDb: mockMemoryDb as unknown as MemoryDb,
368
+ getAdapter: mockGetAdapter as unknown as typeof getAdapter,
369
+ });
370
+ expect(result.status).toBe('success');
371
+ expect(result.output).toEqual({ id: 'mem-id', status: 'stored' });
372
+ expect(mockMemoryDb.store).toHaveBeenCalledWith('foo', [0.1, 0.2, 0.3], { source: 'test' });
373
+ });
374
+
375
+ it('should search memory', async () => {
376
+ // @ts-ignore
377
+ const step = { id: 'm1', type: 'memory', op: 'search', query: 'foo', limit: 5 };
378
+ // @ts-ignore
379
+ const result = await executeStep(step, context, undefined, {
380
+ memoryDb: mockMemoryDb as unknown as MemoryDb,
381
+ getAdapter: mockGetAdapter as unknown as typeof getAdapter,
382
+ });
383
+ expect(result.status).toBe('success');
384
+ expect(result.output).toEqual([{ content: 'found', similarity: 0.9 }]);
385
+ expect(mockMemoryDb.search).toHaveBeenCalledWith([0.1, 0.2, 0.3], 5);
386
+ });
387
+
388
+ it('should fail store if text is missing', async () => {
389
+ // @ts-ignore
390
+ const step = { id: 'm1', type: 'memory', op: 'store' };
391
+ // @ts-ignore
392
+ const result = await executeStep(step, context, undefined, {
393
+ memoryDb: mockMemoryDb as unknown as MemoryDb,
394
+ getAdapter: mockGetAdapter as unknown as typeof getAdapter,
395
+ });
396
+ expect(result.status).toBe('failed');
397
+ expect(result.error).toBe('Text is required for memory store operation');
398
+ });
399
+
400
+ it('should fail search if query is missing', async () => {
401
+ // @ts-ignore
402
+ const step = { id: 'm1', type: 'memory', op: 'search' };
403
+ // @ts-ignore
404
+ const result = await executeStep(step, context, undefined, {
405
+ memoryDb: mockMemoryDb as unknown as MemoryDb,
406
+ getAdapter: mockGetAdapter as unknown as typeof getAdapter,
407
+ });
408
+ expect(result.status).toBe('failed');
409
+ expect(result.error).toBe('Query is required for memory search operation');
410
+ });
411
+
412
+ it('should fail for unknown memory operation', async () => {
413
+ // @ts-ignore
414
+ const step = { id: 'm1', type: 'memory', op: 'unknown', text: 'foo' };
415
+ // @ts-ignore
416
+ const result = await executeStep(step, context, undefined, {
417
+ memoryDb: mockMemoryDb as unknown as MemoryDb,
418
+ getAdapter: mockGetAdapter as unknown as typeof getAdapter,
419
+ });
420
+ expect(result.status).toBe('failed');
421
+ expect(result.error).toContain('Unknown memory operation');
422
+ });
230
423
  });
231
424
 
232
425
  describe('sleep', () => {
@@ -517,7 +710,7 @@ describe('step-executor', () => {
517
710
  );
518
711
 
519
712
  // @ts-ignore
520
- const result = await executeStep(step, context, undefined, executeWorkflowFn);
713
+ const result = await executeStep(step, context, undefined, { executeWorkflowFn });
521
714
  expect(result.status).toBe('success');
522
715
  expect(result.output).toBe('child-output');
523
716
  expect(executeWorkflowFn).toHaveBeenCalled();
@@ -48,6 +48,20 @@ export interface StepResult {
48
48
  };
49
49
  }
50
50
 
51
+ /**
52
+ * Execute a single step based on its type
53
+ */
54
+ export interface StepExecutorOptions {
55
+ executeWorkflowFn?: (step: WorkflowStep, context: ExpressionContext) => Promise<StepResult>;
56
+ mcpManager?: MCPManager;
57
+ memoryDb?: MemoryDb;
58
+ workflowDir?: string;
59
+ dryRun?: boolean;
60
+ // Dependency injection for testing
61
+ getAdapter?: typeof getAdapter;
62
+ sandbox?: typeof SafeSandbox;
63
+ }
64
+
51
65
  /**
52
66
  * Execute a single step based on its type
53
67
  */
@@ -55,12 +69,18 @@ export async function executeStep(
55
69
  step: Step,
56
70
  context: ExpressionContext,
57
71
  logger: Logger = new ConsoleLogger(),
58
- executeWorkflowFn?: (step: WorkflowStep, context: ExpressionContext) => Promise<StepResult>,
59
- mcpManager?: MCPManager,
60
- memoryDb?: MemoryDb,
61
- workflowDir?: string,
62
- dryRun?: boolean
72
+ options: StepExecutorOptions = {}
63
73
  ): Promise<StepResult> {
74
+ const {
75
+ executeWorkflowFn,
76
+ mcpManager,
77
+ memoryDb,
78
+ workflowDir,
79
+ dryRun,
80
+ getAdapter: injectedGetAdapter,
81
+ sandbox: injectedSandbox,
82
+ } = options;
83
+
64
84
  try {
65
85
  let result: StepResult;
66
86
  switch (step.type) {
@@ -83,15 +103,14 @@ export async function executeStep(
83
103
  result = await executeLlmStep(
84
104
  step,
85
105
  context,
86
- (s, c) =>
87
- executeStep(s, c, logger, executeWorkflowFn, mcpManager, memoryDb, workflowDir, dryRun),
106
+ (s, c) => executeStep(s, c, logger, options),
88
107
  logger,
89
108
  mcpManager,
90
109
  workflowDir
91
110
  );
92
111
  break;
93
112
  case 'memory':
94
- result = await executeMemoryStep(step, context, logger, memoryDb);
113
+ result = await executeMemoryStep(step, context, logger, memoryDb, injectedGetAdapter);
95
114
  break;
96
115
  case 'workflow':
97
116
  if (!executeWorkflowFn) {
@@ -100,7 +119,7 @@ export async function executeStep(
100
119
  result = await executeWorkflowFn(step, context);
101
120
  break;
102
121
  case 'script':
103
- result = await executeScriptStep(step, context, logger);
122
+ result = await executeScriptStep(step, context, logger, injectedSandbox);
104
123
  break;
105
124
  default:
106
125
  throw new Error(`Unknown step type: ${(step as Step).type}`);
@@ -383,7 +402,7 @@ async function executeRequestStep(
383
402
  output: {
384
403
  status: response.status,
385
404
  statusText: response.statusText,
386
- headers: Object.fromEntries(response.headers.entries()),
405
+ headers: Object.fromEntries(response.headers as unknown as Iterable<[string, string]>),
387
406
  data: responseData,
388
407
  },
389
408
  status: response.ok ? 'success' : 'failed',
@@ -503,7 +522,8 @@ async function executeSleepStep(
503
522
  async function executeScriptStep(
504
523
  step: ScriptStep,
505
524
  context: ExpressionContext,
506
- _logger: Logger
525
+ _logger: Logger,
526
+ sandbox = SafeSandbox
507
527
  ): Promise<StepResult> {
508
528
  try {
509
529
  if (!step.allowInsecure) {
@@ -513,7 +533,7 @@ async function executeScriptStep(
513
533
  );
514
534
  }
515
535
 
516
- const result = await SafeSandbox.execute(
536
+ const result = await sandbox.execute(
517
537
  step.run,
518
538
  {
519
539
  inputs: context.inputs,
@@ -546,14 +566,15 @@ async function executeMemoryStep(
546
566
  step: MemoryStep,
547
567
  context: ExpressionContext,
548
568
  logger: Logger,
549
- memoryDb?: MemoryDb
569
+ memoryDb?: MemoryDb,
570
+ getAdapterFn = getAdapter
550
571
  ): Promise<StepResult> {
551
572
  if (!memoryDb) {
552
573
  throw new Error('Memory database not initialized');
553
574
  }
554
575
 
555
576
  try {
556
- const { adapter, resolvedModel } = getAdapter(step.model || 'local');
577
+ const { adapter, resolvedModel } = getAdapterFn(step.model || 'local');
557
578
  if (!adapter.embed) {
558
579
  throw new Error(`Provider for model ${step.model || 'local'} does not support embeddings`);
559
580
  }
@@ -4,16 +4,24 @@ import { processOpenAIStream } from './stream-utils';
4
4
  const encoder = new TextEncoder();
5
5
 
6
6
  function responseFromChunks(chunks: string[]): Response {
7
- const stream = new ReadableStream({
8
- start(controller) {
9
- for (const chunk of chunks) {
10
- controller.enqueue(encoder.encode(chunk));
7
+ let index = 0;
8
+ const reader = {
9
+ async read(): Promise<{ done: boolean; value?: Uint8Array }> {
10
+ if (index >= chunks.length) {
11
+ return { done: true, value: undefined };
11
12
  }
12
- controller.close();
13
+ const value = encoder.encode(chunks[index]);
14
+ index += 1;
15
+ return { done: false, value };
13
16
  },
14
- });
17
+ async cancel(): Promise<void> {},
18
+ };
15
19
 
16
- return new Response(stream);
20
+ return {
21
+ body: {
22
+ getReader: () => reader,
23
+ },
24
+ } as Response;
17
25
  }
18
26
 
19
27
  describe('processOpenAIStream', () => {
@@ -61,5 +69,103 @@ describe('processOpenAIStream', () => {
61
69
 
62
70
  expect(result.message.content).toBe('ok');
63
71
  expect(logger.warn).toHaveBeenCalledTimes(1);
72
+ expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
73
+ });
74
+
75
+ it('throws error when buffer size is exceeded', async () => {
76
+ const response = responseFromChunks(['a'.repeat(1024 * 1024 + 1)]);
77
+ await expect(processOpenAIStream(response)).rejects.toThrow(
78
+ 'LLM stream line exceed maximum size'
79
+ );
80
+ });
81
+
82
+ it('throws error when response size limit is exceeded', async () => {
83
+ const response = responseFromChunks([
84
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
85
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}\n`,
86
+ ]);
87
+ await expect(processOpenAIStream(response)).rejects.toThrow(
88
+ 'LLM response exceeds maximum size'
89
+ );
90
+ });
91
+
92
+ it('throws error when tool call arguments size limit is exceeded', async () => {
93
+ const response = responseFromChunks([
94
+ `data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(600 * 1024)}"}}]}}]}\n`,
95
+ `data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(500 * 1024)}"}}]}}]}\n`,
96
+ ]);
97
+ await expect(processOpenAIStream(response)).rejects.toThrow(
98
+ 'LLM tool call arguments exceed maximum size'
99
+ );
100
+ });
101
+
102
+ it('handles and logs generic errors during chunk processing', async () => {
103
+ const logger = {
104
+ log: mock(() => {}),
105
+ error: mock(() => {}),
106
+ warn: mock(() => {}),
107
+ info: mock(() => {}),
108
+ };
109
+ // Mocking JSON.parse to throw a non-SyntaxError
110
+ const originalParse = JSON.parse;
111
+ JSON.parse = (str: string) => {
112
+ if (str === '{"trigger_error":true}') throw new Error('Generic error');
113
+ return originalParse(str);
114
+ };
115
+
116
+ try {
117
+ const response = responseFromChunks(['data: {"trigger_error":true}\n']);
118
+ await processOpenAIStream(response, { logger });
119
+ expect(logger.warn).toHaveBeenCalledTimes(1);
120
+ expect(logger.warn.mock.calls[0][0]).toContain(
121
+ 'Error processing chunk: Error: Generic error'
122
+ );
123
+ } finally {
124
+ JSON.parse = originalParse;
125
+ }
126
+ });
127
+
128
+ it('handles errors in the final line processing', async () => {
129
+ const logger = {
130
+ log: mock(() => {}),
131
+ error: mock(() => {}),
132
+ warn: mock(() => {}),
133
+ info: mock(() => {}),
134
+ };
135
+ const response = responseFromChunks(['data: {bad json}']); // No newline, triggers buffer processing
136
+
137
+ await processOpenAIStream(response, { logger });
138
+
139
+ expect(logger.warn).toHaveBeenCalledTimes(1);
140
+ expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
141
+ });
142
+
143
+ it('throws size limit error in final line processing', async () => {
144
+ const response = responseFromChunks([
145
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
146
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}`,
147
+ ]);
148
+ // The first line is ok, the second line is in the final buffer and exceeds size
149
+ await expect(processOpenAIStream(response)).rejects.toThrow(
150
+ 'LLM response exceeds maximum size'
151
+ );
152
+ });
153
+
154
+ it('bubbles up reader cancel errors', async () => {
155
+ const reader = {
156
+ read: async () => {
157
+ throw new Error('Read error');
158
+ },
159
+ cancel: async () => {
160
+ throw new Error('Cancel error');
161
+ },
162
+ };
163
+ const response = {
164
+ body: {
165
+ getReader: () => reader,
166
+ },
167
+ } as unknown as Response;
168
+
169
+ await expect(processOpenAIStream(response)).rejects.toThrow('Read error');
64
170
  });
65
171
  });
@@ -67,7 +67,7 @@ export async function processOpenAIStream(
67
67
  const toolCall = tc as ToolCallDelta;
68
68
  if (!toolCalls[toolCall.index]) {
69
69
  toolCalls[toolCall.index] = {
70
- id: toolCall.id,
70
+ id: toolCall.id || '',
71
71
  type: 'function',
72
72
  function: { name: '', arguments: '' },
73
73
  };
@@ -93,7 +93,7 @@ export async function processOpenAIStream(
93
93
  const activeLogger = options?.logger || new ConsoleLogger();
94
94
 
95
95
  // Rethrow size limit errors so they bubble up
96
- if (String(e).toLowerCase().includes('exceed maximum size')) {
96
+ if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
97
97
  throw e;
98
98
  }
99
99
 
@@ -137,7 +137,7 @@ export async function processOpenAIStream(
137
137
  const toolCall = tc as ToolCallDelta;
138
138
  if (!toolCalls[toolCall.index]) {
139
139
  toolCalls[toolCall.index] = {
140
- id: toolCall.id,
140
+ id: toolCall.id || '',
141
141
  type: 'function',
142
142
  function: { name: '', arguments: '' },
143
143
  };
@@ -161,7 +161,7 @@ export async function processOpenAIStream(
161
161
  }
162
162
  }
163
163
  } catch (e) {
164
- if (String(e).toLowerCase().includes('exceed maximum size')) {
164
+ if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
165
165
  throw e;
166
166
  }
167
167
  const activeLogger = options?.logger || new ConsoleLogger();
@@ -630,16 +630,13 @@ export class WorkflowRunner {
630
630
  }
631
631
 
632
632
  const operation = async () => {
633
- const result = await executeStep(
634
- stepToExecute,
635
- context,
636
- this.logger,
637
- this.executeSubWorkflow.bind(this),
638
- this.mcpManager,
639
- this.memoryDb,
640
- this.options.workflowDir,
641
- this.options.dryRun
642
- );
633
+ const result = await executeStep(stepToExecute, context, this.logger, {
634
+ executeWorkflowFn: this.executeSubWorkflow.bind(this),
635
+ mcpManager: this.mcpManager,
636
+ memoryDb: this.memoryDb,
637
+ workflowDir: this.options.workflowDir,
638
+ dryRun: this.options.dryRun,
639
+ });
643
640
  if (result.status === 'failed') {
644
641
  throw new Error(result.error || 'Step failed');
645
642
  }
@@ -868,16 +865,13 @@ Do not change the 'id' or 'type' or 'auto_heal' fields.
868
865
 
869
866
  // Execute the agent step
870
867
  // We use a fresh context but share secrets/env
871
- const result = await executeStep(
872
- agentStep,
873
- context,
874
- this.logger,
875
- this.executeSubWorkflow.bind(this),
876
- this.mcpManager,
877
- this.memoryDb,
878
- this.options.workflowDir,
879
- this.options.dryRun
880
- );
868
+ const result = await executeStep(agentStep, context, this.logger, {
869
+ executeWorkflowFn: this.executeSubWorkflow.bind(this),
870
+ mcpManager: this.mcpManager,
871
+ memoryDb: this.memoryDb,
872
+ workflowDir: this.options.workflowDir,
873
+ dryRun: this.options.dryRun,
874
+ });
881
875
 
882
876
  if (result.status !== 'success' || !result.output) {
883
877
  throw new Error(`Healer agent failed: ${result.error || 'No output'}`);