keystone-cli 0.6.0 ā 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.ts +233 -21
- package/src/db/memory-db.ts +6 -0
- package/src/db/sqlite-setup.test.ts +47 -0
- package/src/db/workflow-db.ts +6 -0
- package/src/runner/debug-repl.test.ts +240 -6
- package/src/runner/llm-adapter.test.ts +10 -4
- package/src/runner/step-executor.test.ts +194 -1
- package/src/runner/step-executor.ts +35 -14
- package/src/runner/stream-utils.test.ts +113 -7
- package/src/runner/stream-utils.ts +4 -4
- package/src/runner/workflow-runner.ts +14 -20
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -9,7 +9,7 @@ import architectAgent from './templates/agents/keystone-architect.md' with { typ
|
|
|
9
9
|
// Default templates
|
|
10
10
|
import scaffoldWorkflow from './templates/scaffold-feature.yaml' with { type: 'text' };
|
|
11
11
|
|
|
12
|
-
import { WorkflowDb } from './db/workflow-db.ts';
|
|
12
|
+
import { WorkflowDb, type WorkflowRun } from './db/workflow-db.ts';
|
|
13
13
|
import { WorkflowParser } from './parser/workflow-parser.ts';
|
|
14
14
|
import { ConfigLoader } from './utils/config-loader.ts';
|
|
15
15
|
import { ConsoleLogger } from './utils/logger.ts';
|
|
@@ -279,7 +279,79 @@ program
|
|
|
279
279
|
}
|
|
280
280
|
});
|
|
281
281
|
|
|
282
|
-
//
|
|
282
|
+
// ===== keystone workflows =====
|
|
283
|
+
program
|
|
284
|
+
.command('workflows')
|
|
285
|
+
.description('List available workflows')
|
|
286
|
+
.action(() => {
|
|
287
|
+
const workflows = WorkflowRegistry.listWorkflows();
|
|
288
|
+
if (workflows.length === 0) {
|
|
289
|
+
console.log('No workflows found. Run "keystone init" to seed default workflows.');
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
console.log('\nšļø Available Workflows:');
|
|
294
|
+
for (const w of workflows) {
|
|
295
|
+
console.log(`\n ${w.name}`);
|
|
296
|
+
if (w.description) {
|
|
297
|
+
console.log(` ${w.description}`);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
console.log('');
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
// ===== keystone optimize =====
|
|
304
|
+
program
|
|
305
|
+
.command('optimize')
|
|
306
|
+
.description('Optimize a specific step in a workflow using iterative evaluation')
|
|
307
|
+
.argument('<workflow>', 'Workflow name or path to workflow file')
|
|
308
|
+
.requiredOption('-t, --target <step_id>', 'Target step ID to optimize')
|
|
309
|
+
.option('-n, --iterations <number>', 'Number of optimization iterations', '5')
|
|
310
|
+
.option('-i, --input <key=value...>', 'Input values for evaluation')
|
|
311
|
+
.action(async (workflowPath, options) => {
|
|
312
|
+
try {
|
|
313
|
+
const { OptimizationRunner } = await import('./runner/optimization-runner.ts');
|
|
314
|
+
const resolvedPath = WorkflowRegistry.resolvePath(workflowPath);
|
|
315
|
+
const workflow = WorkflowParser.loadWorkflow(resolvedPath);
|
|
316
|
+
|
|
317
|
+
// Parse inputs
|
|
318
|
+
const inputs: Record<string, unknown> = {};
|
|
319
|
+
if (options.input) {
|
|
320
|
+
for (const pair of options.input) {
|
|
321
|
+
const index = pair.indexOf('=');
|
|
322
|
+
if (index > 0) {
|
|
323
|
+
const key = pair.slice(0, index);
|
|
324
|
+
const value = pair.slice(index + 1);
|
|
325
|
+
try {
|
|
326
|
+
inputs[key] = JSON.parse(value);
|
|
327
|
+
} catch {
|
|
328
|
+
inputs[key] = value;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const runner = new OptimizationRunner(workflow, {
|
|
335
|
+
workflowPath: resolvedPath,
|
|
336
|
+
targetStepId: options.target,
|
|
337
|
+
iterations: Number.parseInt(options.iterations, 10),
|
|
338
|
+
inputs,
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
console.log('šļø Keystone Prompt Optimization');
|
|
342
|
+
const { bestPrompt, bestScore } = await runner.optimize();
|
|
343
|
+
|
|
344
|
+
console.log('\n⨠Optimization Complete!');
|
|
345
|
+
console.log(`š Best Score: ${bestScore}/100`);
|
|
346
|
+
console.log('\nBest Prompt/Command:');
|
|
347
|
+
console.log(''.padEnd(80, '-'));
|
|
348
|
+
console.log(bestPrompt);
|
|
349
|
+
console.log(''.padEnd(80, '-'));
|
|
350
|
+
} catch (error) {
|
|
351
|
+
console.error('ā Optimization failed:', error instanceof Error ? error.message : error);
|
|
352
|
+
process.exit(1);
|
|
353
|
+
}
|
|
354
|
+
});
|
|
283
355
|
|
|
284
356
|
// ===== keystone resume =====
|
|
285
357
|
program
|
|
@@ -347,40 +419,180 @@ program
|
|
|
347
419
|
}
|
|
348
420
|
});
|
|
349
421
|
|
|
350
|
-
//
|
|
351
|
-
|
|
352
|
-
// ===== keystone maintenance =====
|
|
422
|
+
// ===== keystone history =====
|
|
353
423
|
program
|
|
354
|
-
.command('
|
|
355
|
-
.description('
|
|
356
|
-
.option('--
|
|
424
|
+
.command('history')
|
|
425
|
+
.description('Show recent workflow runs')
|
|
426
|
+
.option('-l, --limit <number>', 'Limit the number of runs to show', '50')
|
|
357
427
|
.action(async (options) => {
|
|
358
428
|
try {
|
|
359
|
-
const
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
429
|
+
const db = new WorkflowDb();
|
|
430
|
+
const limit = Number.parseInt(options.limit, 10);
|
|
431
|
+
const runs = await db.listRuns(limit);
|
|
432
|
+
db.close();
|
|
433
|
+
|
|
434
|
+
if (runs.length === 0) {
|
|
435
|
+
console.log('No workflow runs found.');
|
|
436
|
+
return;
|
|
363
437
|
}
|
|
364
438
|
|
|
365
|
-
console.log('
|
|
439
|
+
console.log('\nšļø Workflow Run History:');
|
|
440
|
+
console.log(''.padEnd(100, '-'));
|
|
441
|
+
console.log(
|
|
442
|
+
`${'ID'.padEnd(10)} ${'Workflow'.padEnd(25)} ${'Status'.padEnd(15)} ${'Started At'}`
|
|
443
|
+
);
|
|
444
|
+
console.log(''.padEnd(100, '-'));
|
|
445
|
+
|
|
446
|
+
for (const run of runs) {
|
|
447
|
+
const id = run.id.slice(0, 8);
|
|
448
|
+
const status = run.status;
|
|
449
|
+
const color =
|
|
450
|
+
status === 'success' ? '\x1b[32m' : status === 'failed' ? '\x1b[31m' : '\x1b[33m';
|
|
451
|
+
const reset = '\x1b[0m';
|
|
452
|
+
|
|
453
|
+
console.log(
|
|
454
|
+
`${id.padEnd(10)} ${run.workflow_name.padEnd(25)} ${color}${status.padEnd(
|
|
455
|
+
15
|
|
456
|
+
)}${reset} ${new Date(run.started_at).toLocaleString()}`
|
|
457
|
+
);
|
|
458
|
+
}
|
|
459
|
+
console.log('');
|
|
460
|
+
} catch (error) {
|
|
461
|
+
console.error('ā Failed to list runs:', error instanceof Error ? error.message : error);
|
|
462
|
+
process.exit(1);
|
|
463
|
+
}
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
// ===== keystone logs =====
|
|
467
|
+
program
|
|
468
|
+
.command('logs')
|
|
469
|
+
.description('Show logs for a specific workflow run')
|
|
470
|
+
.argument('<run_id>', 'Run ID to show logs for')
|
|
471
|
+
.option('-v, --verbose', 'Show detailed step outputs')
|
|
472
|
+
.action(async (runId, options) => {
|
|
473
|
+
try {
|
|
366
474
|
const db = new WorkflowDb();
|
|
475
|
+
const run = await db.getRun(runId);
|
|
367
476
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
477
|
+
if (!run) {
|
|
478
|
+
// Try searching by short ID
|
|
479
|
+
const allRuns = await db.listRuns(200);
|
|
480
|
+
const matching = allRuns.find((r) => r.id.startsWith(runId));
|
|
481
|
+
if (matching) {
|
|
482
|
+
const detailedRun = await db.getRun(matching.id);
|
|
483
|
+
if (detailedRun) {
|
|
484
|
+
await showRunLogs(detailedRun, db, !!options.verbose);
|
|
485
|
+
db.close();
|
|
486
|
+
return;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
371
489
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
490
|
+
console.error(`ā Run not found: ${runId}`);
|
|
491
|
+
db.close();
|
|
492
|
+
process.exit(1);
|
|
493
|
+
}
|
|
375
494
|
|
|
495
|
+
await showRunLogs(run, db, !!options.verbose);
|
|
376
496
|
db.close();
|
|
377
|
-
console.log('\n⨠Maintenance completed successfully!');
|
|
378
497
|
} catch (error) {
|
|
379
|
-
console.error('ā
|
|
498
|
+
console.error('ā Failed to show logs:', error instanceof Error ? error.message : error);
|
|
380
499
|
process.exit(1);
|
|
381
500
|
}
|
|
382
501
|
});
|
|
383
502
|
|
|
503
|
+
async function showRunLogs(run: WorkflowRun, db: WorkflowDb, verbose: boolean) {
|
|
504
|
+
console.log(`\nšļø Run: ${run.workflow_name} (${run.id})`);
|
|
505
|
+
console.log(` Status: ${run.status}`);
|
|
506
|
+
console.log(` Started: ${new Date(run.started_at).toLocaleString()}`);
|
|
507
|
+
if (run.completed_at) {
|
|
508
|
+
console.log(` Completed: ${new Date(run.completed_at).toLocaleString()}`);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
const steps = await db.getStepsByRun(run.id);
|
|
512
|
+
console.log(`\nSteps (${steps.length}):`);
|
|
513
|
+
console.log(''.padEnd(100, '-'));
|
|
514
|
+
|
|
515
|
+
for (const step of steps) {
|
|
516
|
+
const statusColor =
|
|
517
|
+
step.status === 'success' ? '\x1b[32m' : step.status === 'failed' ? '\x1b[31m' : '\x1b[33m';
|
|
518
|
+
const reset = '\x1b[0m';
|
|
519
|
+
|
|
520
|
+
let label = step.step_id;
|
|
521
|
+
if (step.iteration_index !== null) {
|
|
522
|
+
label += ` [${step.iteration_index}]`;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
console.log(`${statusColor}${step.status.toUpperCase().padEnd(10)}${reset} ${label}`);
|
|
526
|
+
|
|
527
|
+
if (step.error) {
|
|
528
|
+
console.log(` \x1b[31mError: ${step.error}\x1b[0m`);
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
if (verbose && step.output) {
|
|
532
|
+
try {
|
|
533
|
+
const output = JSON.parse(step.output);
|
|
534
|
+
console.log(
|
|
535
|
+
` Output: ${JSON.stringify(output, null, 2).replace(/\n/g, '\n ')}`
|
|
536
|
+
);
|
|
537
|
+
} catch {
|
|
538
|
+
console.log(` Output: ${step.output}`);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
if (run.outputs) {
|
|
544
|
+
console.log('\nFinal Outputs:');
|
|
545
|
+
try {
|
|
546
|
+
const parsed = JSON.parse(run.outputs);
|
|
547
|
+
console.log(JSON.stringify(parsed, null, 2));
|
|
548
|
+
} catch {
|
|
549
|
+
console.log(run.outputs);
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
if (run.error) {
|
|
554
|
+
console.log(`\n\x1b[31mWorkflow Error:\x1b[0m ${run.error}`);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
// ===== keystone prune / maintenance =====
|
|
559
|
+
async function performMaintenance(days: number) {
|
|
560
|
+
try {
|
|
561
|
+
console.log(`š§¹ Starting maintenance (pruning runs older than ${days} days)...`);
|
|
562
|
+
const db = new WorkflowDb();
|
|
563
|
+
const count = await db.pruneRuns(days);
|
|
564
|
+
console.log(` ā Pruned ${count} old run(s)`);
|
|
565
|
+
|
|
566
|
+
console.log(' Vacuuming database (reclaiming space)...');
|
|
567
|
+
await db.vacuum();
|
|
568
|
+
console.log(' ā Vacuum complete');
|
|
569
|
+
|
|
570
|
+
db.close();
|
|
571
|
+
console.log('\n⨠Maintenance completed successfully!');
|
|
572
|
+
} catch (error) {
|
|
573
|
+
console.error('ā Maintenance failed:', error instanceof Error ? error.message : error);
|
|
574
|
+
process.exit(1);
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
program
|
|
579
|
+
.command('prune')
|
|
580
|
+
.description('Delete old workflow runs from the database (alias for maintenance)')
|
|
581
|
+
.option('--days <number>', 'Days to keep', '30')
|
|
582
|
+
.action(async (options) => {
|
|
583
|
+
const days = Number.parseInt(options.days, 10);
|
|
584
|
+
await performMaintenance(days);
|
|
585
|
+
});
|
|
586
|
+
|
|
587
|
+
program
|
|
588
|
+
.command('maintenance')
|
|
589
|
+
.description('Perform database maintenance (prune old runs and vacuum)')
|
|
590
|
+
.option('--days <days>', 'Delete runs older than this many days', '30')
|
|
591
|
+
.action(async (options) => {
|
|
592
|
+
const days = Number.parseInt(options.days, 10);
|
|
593
|
+
await performMaintenance(days);
|
|
594
|
+
});
|
|
595
|
+
|
|
384
596
|
// ===== keystone ui =====
|
|
385
597
|
program
|
|
386
598
|
.command('ui')
|
package/src/db/memory-db.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import type { Database } from 'bun:sqlite';
|
|
2
2
|
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { existsSync, mkdirSync } from 'node:fs';
|
|
4
|
+
import { dirname } from 'node:path';
|
|
3
5
|
import * as sqliteVec from 'sqlite-vec';
|
|
4
6
|
import './sqlite-setup.ts';
|
|
5
7
|
|
|
@@ -22,6 +24,10 @@ export class MemoryDb {
|
|
|
22
24
|
this.db = cached.db;
|
|
23
25
|
} else {
|
|
24
26
|
const { Database } = require('bun:sqlite');
|
|
27
|
+
const dir = dirname(dbPath);
|
|
28
|
+
if (!existsSync(dir)) {
|
|
29
|
+
mkdirSync(dir, { recursive: true });
|
|
30
|
+
}
|
|
25
31
|
this.db = new Database(dbPath, { create: true });
|
|
26
32
|
|
|
27
33
|
// Load sqlite-vec extension
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { afterEach, describe, expect, it, mock, spyOn } from 'bun:test';
|
|
2
|
+
import type { Logger } from '../utils/logger';
|
|
3
|
+
import { setupSqlite } from './sqlite-setup';
|
|
4
|
+
|
|
5
|
+
describe('setupSqlite', () => {
|
|
6
|
+
const originalPlatform = process.platform;
|
|
7
|
+
|
|
8
|
+
afterEach(() => {
|
|
9
|
+
Object.defineProperty(process, 'platform', {
|
|
10
|
+
value: originalPlatform,
|
|
11
|
+
});
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it('does nothing on non-darwin platforms', () => {
|
|
15
|
+
Object.defineProperty(process, 'platform', { value: 'linux' });
|
|
16
|
+
const logger: Logger = {
|
|
17
|
+
log: mock(() => {}),
|
|
18
|
+
warn: mock(() => {}),
|
|
19
|
+
error: mock(() => {}),
|
|
20
|
+
info: mock(() => {}),
|
|
21
|
+
};
|
|
22
|
+
setupSqlite(logger);
|
|
23
|
+
expect(logger.log).not.toHaveBeenCalled();
|
|
24
|
+
expect(logger.warn).not.toHaveBeenCalled();
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it('logs warning if no custom sqlite found on darwin', () => {
|
|
28
|
+
Object.defineProperty(process, 'platform', { value: 'darwin' });
|
|
29
|
+
const logger: Logger = {
|
|
30
|
+
log: mock(() => {}),
|
|
31
|
+
warn: mock(() => {}),
|
|
32
|
+
error: mock(() => {}),
|
|
33
|
+
info: mock(() => {}),
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
// Mock Bun.spawnSync for brew
|
|
37
|
+
const spawnSpy = spyOn(Bun, 'spawnSync').mockImplementation(
|
|
38
|
+
() => ({ success: false }) as unknown as ReturnType<typeof Bun.spawnSync>
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
setupSqlite(logger);
|
|
43
|
+
} finally {
|
|
44
|
+
spawnSpy.mockRestore();
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
});
|
package/src/db/workflow-db.ts
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { Database } from 'bun:sqlite';
|
|
2
|
+
import { existsSync, mkdirSync } from 'node:fs';
|
|
3
|
+
import { dirname } from 'node:path';
|
|
2
4
|
import './sqlite-setup.ts';
|
|
3
5
|
import {
|
|
4
6
|
StepStatus as StepStatusConst,
|
|
@@ -40,6 +42,10 @@ export class WorkflowDb {
|
|
|
40
42
|
private db: Database;
|
|
41
43
|
|
|
42
44
|
constructor(public readonly dbPath = '.keystone/state.db') {
|
|
45
|
+
const dir = dirname(dbPath);
|
|
46
|
+
if (!existsSync(dir)) {
|
|
47
|
+
mkdirSync(dir, { recursive: true });
|
|
48
|
+
}
|
|
43
49
|
this.db = new Database(dbPath, { create: true });
|
|
44
50
|
this.db.exec('PRAGMA journal_mode = WAL;'); // Write-ahead logging
|
|
45
51
|
this.db.exec('PRAGMA foreign_keys = ON;'); // Enable foreign key enforcement
|
|
@@ -1,19 +1,27 @@
|
|
|
1
|
-
import { describe, expect, test } from 'bun:test';
|
|
1
|
+
import { describe, expect, mock, spyOn, test } from 'bun:test';
|
|
2
|
+
import * as cp from 'node:child_process';
|
|
3
|
+
import * as fs from 'node:fs';
|
|
2
4
|
import { PassThrough } from 'node:stream';
|
|
3
5
|
import type { ExpressionContext } from '../expression/evaluator.ts';
|
|
4
6
|
import type { Step } from '../parser/schema.ts';
|
|
7
|
+
import type { Logger } from '../utils/logger.ts';
|
|
5
8
|
import { DebugRepl } from './debug-repl.ts';
|
|
6
9
|
|
|
7
10
|
describe('DebugRepl', () => {
|
|
8
11
|
const mockContext: ExpressionContext = { inputs: { foo: 'bar' } };
|
|
9
|
-
//
|
|
10
|
-
const mockStep: Step = { id: 'test-step', type: 'shell', run: 'echo "fail"' } as
|
|
12
|
+
// mock step typing
|
|
13
|
+
const mockStep: Step = { id: 'test-step', type: 'shell', run: 'echo "fail"' } as unknown as Step;
|
|
11
14
|
const mockError = new Error('Test Error');
|
|
12
15
|
|
|
13
16
|
test('should resolve with "skip" when user types "skip"', async () => {
|
|
14
17
|
const input = new PassThrough();
|
|
15
18
|
const output = new PassThrough();
|
|
16
|
-
const mockLogger
|
|
19
|
+
const mockLogger: Logger = {
|
|
20
|
+
log: mock(() => {}),
|
|
21
|
+
error: mock(() => {}),
|
|
22
|
+
warn: mock(() => {}),
|
|
23
|
+
info: mock(() => {}),
|
|
24
|
+
};
|
|
17
25
|
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
18
26
|
|
|
19
27
|
const promise = repl.start();
|
|
@@ -30,7 +38,12 @@ describe('DebugRepl', () => {
|
|
|
30
38
|
test('should resolve with "retry" when user types "retry"', async () => {
|
|
31
39
|
const input = new PassThrough();
|
|
32
40
|
const output = new PassThrough();
|
|
33
|
-
const mockLogger
|
|
41
|
+
const mockLogger: Logger = {
|
|
42
|
+
log: mock(() => {}),
|
|
43
|
+
error: mock(() => {}),
|
|
44
|
+
warn: mock(() => {}),
|
|
45
|
+
info: mock(() => {}),
|
|
46
|
+
};
|
|
34
47
|
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
35
48
|
|
|
36
49
|
const promise = repl.start();
|
|
@@ -48,7 +61,12 @@ describe('DebugRepl', () => {
|
|
|
48
61
|
test('should resolve with "continue_failure" when user types "exit"', async () => {
|
|
49
62
|
const input = new PassThrough();
|
|
50
63
|
const output = new PassThrough();
|
|
51
|
-
const mockLogger
|
|
64
|
+
const mockLogger: Logger = {
|
|
65
|
+
log: mock(() => {}),
|
|
66
|
+
error: mock(() => {}),
|
|
67
|
+
warn: mock(() => {}),
|
|
68
|
+
info: mock(() => {}),
|
|
69
|
+
};
|
|
52
70
|
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
53
71
|
|
|
54
72
|
const promise = repl.start();
|
|
@@ -60,6 +78,137 @@ describe('DebugRepl', () => {
|
|
|
60
78
|
expect(result).toEqual({ type: 'continue_failure' });
|
|
61
79
|
});
|
|
62
80
|
|
|
81
|
+
test('should handle "context" command', async () => {
|
|
82
|
+
const input = new PassThrough();
|
|
83
|
+
const output = new PassThrough();
|
|
84
|
+
const mockLogger: Logger = {
|
|
85
|
+
log: mock(() => {}),
|
|
86
|
+
error: mock(() => {}),
|
|
87
|
+
warn: mock(() => {}),
|
|
88
|
+
info: mock(() => {}),
|
|
89
|
+
};
|
|
90
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
91
|
+
|
|
92
|
+
repl.start();
|
|
93
|
+
|
|
94
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
95
|
+
input.write('context\n');
|
|
96
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
97
|
+
|
|
98
|
+
expect(mockLogger.log).toHaveBeenCalled();
|
|
99
|
+
// biome-ignore lint/suspicious/noExplicitAny: accessing mock property
|
|
100
|
+
const lastCall = (mockLogger.log as unknown as any).mock.calls.find((call: any[]) =>
|
|
101
|
+
String(call[0]).includes('foo')
|
|
102
|
+
);
|
|
103
|
+
expect(lastCall?.[0]).toContain('bar');
|
|
104
|
+
input.write('exit\n');
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test('should handle "eval" command', async () => {
|
|
108
|
+
const input = new PassThrough();
|
|
109
|
+
const output = new PassThrough();
|
|
110
|
+
const mockLogger: Logger = {
|
|
111
|
+
log: mock(() => {}),
|
|
112
|
+
error: mock(() => {}),
|
|
113
|
+
warn: mock(() => {}),
|
|
114
|
+
info: mock(() => {}),
|
|
115
|
+
};
|
|
116
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
117
|
+
|
|
118
|
+
repl.start();
|
|
119
|
+
|
|
120
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
121
|
+
input.write('eval inputs.foo\n');
|
|
122
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
123
|
+
|
|
124
|
+
expect(mockLogger.log).toHaveBeenCalledWith('bar');
|
|
125
|
+
input.write('exit\n');
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
test('should handle "eval" command with error', async () => {
|
|
129
|
+
const input = new PassThrough();
|
|
130
|
+
const output = new PassThrough();
|
|
131
|
+
const mockLogger: Logger = {
|
|
132
|
+
log: mock(() => {}),
|
|
133
|
+
error: mock(() => {}),
|
|
134
|
+
warn: mock(() => {}),
|
|
135
|
+
info: mock(() => {}),
|
|
136
|
+
};
|
|
137
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
138
|
+
|
|
139
|
+
repl.start();
|
|
140
|
+
|
|
141
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
142
|
+
input.write('eval nonExistent.bar\n');
|
|
143
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
144
|
+
|
|
145
|
+
expect(mockLogger.error).toHaveBeenCalled();
|
|
146
|
+
input.write('exit\n');
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
test('should handle "eval" command without arguments', async () => {
|
|
150
|
+
const input = new PassThrough();
|
|
151
|
+
const output = new PassThrough();
|
|
152
|
+
const mockLogger: Logger = {
|
|
153
|
+
log: mock(() => {}),
|
|
154
|
+
error: mock(() => {}),
|
|
155
|
+
warn: mock(() => {}),
|
|
156
|
+
info: mock(() => {}),
|
|
157
|
+
};
|
|
158
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
159
|
+
|
|
160
|
+
repl.start();
|
|
161
|
+
|
|
162
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
163
|
+
input.write('eval\n');
|
|
164
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
165
|
+
|
|
166
|
+
expect(mockLogger.log).toHaveBeenCalledWith('Usage: eval <expression>');
|
|
167
|
+
input.write('exit\n');
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test('should handle unknown command', async () => {
|
|
171
|
+
const input = new PassThrough();
|
|
172
|
+
const output = new PassThrough();
|
|
173
|
+
const mockLogger: Logger = {
|
|
174
|
+
log: mock(() => {}),
|
|
175
|
+
error: mock(() => {}),
|
|
176
|
+
warn: mock(() => {}),
|
|
177
|
+
info: mock(() => {}),
|
|
178
|
+
};
|
|
179
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
180
|
+
|
|
181
|
+
repl.start();
|
|
182
|
+
|
|
183
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
184
|
+
input.write('unknown_cmd\n');
|
|
185
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
186
|
+
|
|
187
|
+
expect(mockLogger.log).toHaveBeenCalledWith('Unknown command: unknown_cmd');
|
|
188
|
+
input.write('exit\n');
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
test('should handle empty input', async () => {
|
|
192
|
+
const input = new PassThrough();
|
|
193
|
+
const output = new PassThrough();
|
|
194
|
+
const mockLogger: Logger = {
|
|
195
|
+
log: mock(() => {}),
|
|
196
|
+
error: mock(() => {}),
|
|
197
|
+
warn: mock(() => {}),
|
|
198
|
+
info: mock(() => {}),
|
|
199
|
+
};
|
|
200
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
201
|
+
|
|
202
|
+
repl.start();
|
|
203
|
+
|
|
204
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
205
|
+
input.write('\n');
|
|
206
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
207
|
+
|
|
208
|
+
expect(mockLogger.log).not.toHaveBeenCalledWith('Unknown command: ');
|
|
209
|
+
input.write('exit\n');
|
|
210
|
+
});
|
|
211
|
+
|
|
63
212
|
test('should parse shell commands correctly', () => {
|
|
64
213
|
// We import the function dynamically to test it, or we assume it's exported
|
|
65
214
|
const { parseShellCommand } = require('./debug-repl.ts');
|
|
@@ -71,4 +220,89 @@ describe('DebugRepl', () => {
|
|
|
71
220
|
expect(parseShellCommand('editor -a -b -c')).toEqual(['editor', '-a', '-b', '-c']);
|
|
72
221
|
expect(parseShellCommand(' spaced command ')).toEqual(['spaced', 'command']);
|
|
73
222
|
});
|
|
223
|
+
|
|
224
|
+
test('should handle "edit" command and update step', async () => {
|
|
225
|
+
const input = new PassThrough();
|
|
226
|
+
const output = new PassThrough();
|
|
227
|
+
const mockLogger: Logger = {
|
|
228
|
+
log: mock(() => {}),
|
|
229
|
+
error: mock(() => {}),
|
|
230
|
+
warn: mock(() => {}),
|
|
231
|
+
info: mock(() => {}),
|
|
232
|
+
};
|
|
233
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
234
|
+
|
|
235
|
+
const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
|
|
236
|
+
// biome-ignore lint/suspicious/noExplicitAny: mocking child_process
|
|
237
|
+
() => ({ error: null, status: 0 }) as any
|
|
238
|
+
);
|
|
239
|
+
const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
|
|
240
|
+
const updatedStep = { ...mockStep, run: 'echo "fixed"' };
|
|
241
|
+
const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation((() =>
|
|
242
|
+
JSON.stringify(updatedStep)) as unknown as typeof fs.readFileSync);
|
|
243
|
+
const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
|
|
244
|
+
const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
|
|
245
|
+
|
|
246
|
+
try {
|
|
247
|
+
repl.start();
|
|
248
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
249
|
+
input.write('edit\n');
|
|
250
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
251
|
+
|
|
252
|
+
expect(mockLogger.log).toHaveBeenCalledWith(
|
|
253
|
+
expect.stringContaining('Step definition updated')
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
input.write('retry\n');
|
|
257
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
258
|
+
} finally {
|
|
259
|
+
spySpawnSync.mockRestore();
|
|
260
|
+
spyWriteFileSync.mockRestore();
|
|
261
|
+
spyReadFileSync.mockRestore();
|
|
262
|
+
spyExistsSync.mockRestore();
|
|
263
|
+
spyUnlinkSync.mockRestore();
|
|
264
|
+
}
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
test('should handle "edit" command with parse error', async () => {
|
|
268
|
+
const input = new PassThrough();
|
|
269
|
+
const output = new PassThrough();
|
|
270
|
+
const mockLogger: Logger = {
|
|
271
|
+
log: mock(() => {}),
|
|
272
|
+
error: mock(() => {}),
|
|
273
|
+
warn: mock(() => {}),
|
|
274
|
+
info: mock(() => {}),
|
|
275
|
+
};
|
|
276
|
+
const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
|
|
277
|
+
|
|
278
|
+
const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
|
|
279
|
+
// biome-ignore lint/suspicious/noExplicitAny: mocking child_process
|
|
280
|
+
() => ({ error: null, status: 0 }) as any
|
|
281
|
+
);
|
|
282
|
+
const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
|
|
283
|
+
const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation(
|
|
284
|
+
(() => 'invalid json') as unknown as typeof fs.readFileSync
|
|
285
|
+
);
|
|
286
|
+
const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
|
|
287
|
+
const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
|
|
288
|
+
|
|
289
|
+
try {
|
|
290
|
+
repl.start();
|
|
291
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
292
|
+
input.write('edit\n');
|
|
293
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
294
|
+
|
|
295
|
+
expect(mockLogger.error).toHaveBeenCalledWith(
|
|
296
|
+
expect.stringContaining('Failed to parse JSON')
|
|
297
|
+
);
|
|
298
|
+
input.write('exit\n');
|
|
299
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
300
|
+
} finally {
|
|
301
|
+
spySpawnSync.mockRestore();
|
|
302
|
+
spyWriteFileSync.mockRestore();
|
|
303
|
+
spyReadFileSync.mockRestore();
|
|
304
|
+
spyExistsSync.mockRestore();
|
|
305
|
+
spyUnlinkSync.mockRestore();
|
|
306
|
+
}
|
|
307
|
+
});
|
|
74
308
|
});
|
|
@@ -105,7 +105,9 @@ describe('AnthropicAdapter', () => {
|
|
|
105
105
|
// @ts-ignore
|
|
106
106
|
const fetchMock = global.fetch as MockFetch;
|
|
107
107
|
// @ts-ignore
|
|
108
|
-
|
|
108
|
+
// @ts-ignore
|
|
109
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock fetch init
|
|
110
|
+
const [url, init] = fetchMock.mock.calls[0] as [string, any];
|
|
109
111
|
|
|
110
112
|
expect(url).toBe('https://api.anthropic.com/v1/messages');
|
|
111
113
|
expect(init.headers['x-api-key']).toBe('fake-anthropic-key');
|
|
@@ -179,7 +181,8 @@ describe('AnthropicAdapter', () => {
|
|
|
179
181
|
]);
|
|
180
182
|
|
|
181
183
|
// @ts-ignore
|
|
182
|
-
|
|
184
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock fetch init
|
|
185
|
+
const init = global.fetch.mock.calls[0][1] as any;
|
|
183
186
|
const body = JSON.parse(init.body);
|
|
184
187
|
expect(body.messages[0].role).toBe('assistant');
|
|
185
188
|
expect(body.messages[0].content).toHaveLength(2);
|
|
@@ -208,7 +211,8 @@ describe('AnthropicAdapter', () => {
|
|
|
208
211
|
]);
|
|
209
212
|
|
|
210
213
|
// @ts-ignore
|
|
211
|
-
|
|
214
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock fetch init
|
|
215
|
+
const init = global.fetch.mock.calls[0][1] as any;
|
|
212
216
|
const body = JSON.parse(init.body);
|
|
213
217
|
expect(body.messages[0].role).toBe('user');
|
|
214
218
|
expect(body.messages[0].content[0]).toEqual({
|
|
@@ -255,7 +259,9 @@ describe('CopilotAdapter', () => {
|
|
|
255
259
|
// @ts-ignore
|
|
256
260
|
const fetchMock = global.fetch as MockFetch;
|
|
257
261
|
// @ts-ignore
|
|
258
|
-
|
|
262
|
+
// @ts-ignore
|
|
263
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock fetch init
|
|
264
|
+
const [url, init] = fetchMock.mock.calls[0] as [string, any];
|
|
259
265
|
expect(url).toBe('https://api.githubcopilot.com/chat/completions');
|
|
260
266
|
expect(init.headers.Authorization).toBe('Bearer mock-token');
|
|
261
267
|
spy.mockRestore();
|
|
@@ -13,6 +13,7 @@ import * as dns from 'node:dns/promises';
|
|
|
13
13
|
import { mkdirSync, rmSync } from 'node:fs';
|
|
14
14
|
import { tmpdir } from 'node:os';
|
|
15
15
|
import { join } from 'node:path';
|
|
16
|
+
import type { MemoryDb } from '../db/memory-db';
|
|
16
17
|
import type { ExpressionContext } from '../expression/evaluator';
|
|
17
18
|
import type {
|
|
18
19
|
FileStep,
|
|
@@ -22,6 +23,8 @@ import type {
|
|
|
22
23
|
SleepStep,
|
|
23
24
|
WorkflowStep,
|
|
24
25
|
} from '../parser/schema';
|
|
26
|
+
import type { SafeSandbox } from '../utils/sandbox';
|
|
27
|
+
import type { getAdapter } from './llm-adapter';
|
|
25
28
|
import { executeStep } from './step-executor';
|
|
26
29
|
|
|
27
30
|
// Mock executeLlmStep
|
|
@@ -227,6 +230,196 @@ describe('step-executor', () => {
|
|
|
227
230
|
}
|
|
228
231
|
}
|
|
229
232
|
});
|
|
233
|
+
|
|
234
|
+
it('should block path traversal outside cwd by default', async () => {
|
|
235
|
+
const outsidePath = join(process.cwd(), '..', 'outside.txt');
|
|
236
|
+
const step: FileStep = {
|
|
237
|
+
id: 'f1',
|
|
238
|
+
type: 'file',
|
|
239
|
+
needs: [],
|
|
240
|
+
op: 'read',
|
|
241
|
+
path: outsidePath,
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
const result = await executeStep(step, context);
|
|
245
|
+
expect(result.status).toBe('failed');
|
|
246
|
+
expect(result.error).toContain('Access denied');
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
it('should block path traversal with .. inside path resolving outside', async () => {
|
|
250
|
+
const outsidePath = 'foo/../../passwd';
|
|
251
|
+
const step: FileStep = {
|
|
252
|
+
id: 'f1',
|
|
253
|
+
type: 'file',
|
|
254
|
+
needs: [],
|
|
255
|
+
op: 'read',
|
|
256
|
+
path: outsidePath,
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
const result = await executeStep(step, context);
|
|
260
|
+
expect(result.status).toBe('failed');
|
|
261
|
+
expect(result.error).toContain('Access denied');
|
|
262
|
+
});
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
describe('script', () => {
|
|
266
|
+
const mockSandbox = {
|
|
267
|
+
execute: mock((code) => {
|
|
268
|
+
if (code === 'fail') throw new Error('Script failed');
|
|
269
|
+
return Promise.resolve('script-result');
|
|
270
|
+
}),
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
it('should fail if allowInsecure is not set', async () => {
|
|
274
|
+
// @ts-ignore
|
|
275
|
+
const step = {
|
|
276
|
+
id: 's1',
|
|
277
|
+
type: 'script',
|
|
278
|
+
run: 'console.log("hello")',
|
|
279
|
+
};
|
|
280
|
+
const result = await executeStep(step, context, undefined, {
|
|
281
|
+
sandbox: mockSandbox as unknown as typeof SafeSandbox,
|
|
282
|
+
});
|
|
283
|
+
expect(result.status).toBe('failed');
|
|
284
|
+
expect(result.error).toContain('Script execution is disabled by default');
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
it('should execute script if allowInsecure is true', async () => {
|
|
288
|
+
// @ts-ignore
|
|
289
|
+
const step = {
|
|
290
|
+
id: 's1',
|
|
291
|
+
type: 'script',
|
|
292
|
+
run: 'console.log("hello")',
|
|
293
|
+
allowInsecure: true,
|
|
294
|
+
};
|
|
295
|
+
const result = await executeStep(step, context, undefined, {
|
|
296
|
+
sandbox: mockSandbox as unknown as typeof SafeSandbox,
|
|
297
|
+
});
|
|
298
|
+
expect(result.status).toBe('success');
|
|
299
|
+
expect(result.output).toBe('script-result');
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
it('should handle script failure', async () => {
|
|
303
|
+
// @ts-ignore
|
|
304
|
+
const step = {
|
|
305
|
+
id: 's1',
|
|
306
|
+
type: 'script',
|
|
307
|
+
run: 'fail',
|
|
308
|
+
allowInsecure: true,
|
|
309
|
+
};
|
|
310
|
+
const result = await executeStep(step, context, undefined, {
|
|
311
|
+
sandbox: mockSandbox as unknown as typeof SafeSandbox,
|
|
312
|
+
});
|
|
313
|
+
expect(result.status).toBe('failed');
|
|
314
|
+
expect(result.error).toBe('Script failed');
|
|
315
|
+
});
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
describe('memory', () => {
|
|
319
|
+
const mockMemoryDb = {
|
|
320
|
+
store: mock(() => Promise.resolve('mem-id')),
|
|
321
|
+
search: mock(() => Promise.resolve([{ content: 'found', similarity: 0.9 }])),
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
const mockGetAdapter = mock((model) => {
|
|
325
|
+
if (model === 'no-embed') return { adapter: {}, resolvedModel: model };
|
|
326
|
+
return {
|
|
327
|
+
adapter: {
|
|
328
|
+
embed: mock((text) => Promise.resolve([0.1, 0.2, 0.3])),
|
|
329
|
+
},
|
|
330
|
+
resolvedModel: model,
|
|
331
|
+
};
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
it('should fail if memoryDb is not provided', async () => {
|
|
335
|
+
// @ts-ignore
|
|
336
|
+
const step = { id: 'm1', type: 'memory', op: 'store', text: 'foo' };
|
|
337
|
+
const result = await executeStep(step, context, undefined, {
|
|
338
|
+
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
339
|
+
});
|
|
340
|
+
expect(result.status).toBe('failed');
|
|
341
|
+
expect(result.error).toBe('Memory database not initialized');
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
it('should fail if adapter does not support embedding', async () => {
|
|
345
|
+
// @ts-ignore
|
|
346
|
+
const step = { id: 'm1', type: 'memory', op: 'store', text: 'foo', model: 'no-embed' };
|
|
347
|
+
// @ts-ignore
|
|
348
|
+
const result = await executeStep(step, context, undefined, {
|
|
349
|
+
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
350
|
+
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
351
|
+
});
|
|
352
|
+
expect(result.status).toBe('failed');
|
|
353
|
+
expect(result.error).toContain('does not support embeddings');
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
it('should store memory', async () => {
|
|
357
|
+
// @ts-ignore
|
|
358
|
+
const step = {
|
|
359
|
+
id: 'm1',
|
|
360
|
+
type: 'memory',
|
|
361
|
+
op: 'store',
|
|
362
|
+
text: 'foo',
|
|
363
|
+
metadata: { source: 'test' },
|
|
364
|
+
};
|
|
365
|
+
// @ts-ignore
|
|
366
|
+
const result = await executeStep(step, context, undefined, {
|
|
367
|
+
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
368
|
+
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
369
|
+
});
|
|
370
|
+
expect(result.status).toBe('success');
|
|
371
|
+
expect(result.output).toEqual({ id: 'mem-id', status: 'stored' });
|
|
372
|
+
expect(mockMemoryDb.store).toHaveBeenCalledWith('foo', [0.1, 0.2, 0.3], { source: 'test' });
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
it('should search memory', async () => {
|
|
376
|
+
// @ts-ignore
|
|
377
|
+
const step = { id: 'm1', type: 'memory', op: 'search', query: 'foo', limit: 5 };
|
|
378
|
+
// @ts-ignore
|
|
379
|
+
const result = await executeStep(step, context, undefined, {
|
|
380
|
+
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
381
|
+
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
382
|
+
});
|
|
383
|
+
expect(result.status).toBe('success');
|
|
384
|
+
expect(result.output).toEqual([{ content: 'found', similarity: 0.9 }]);
|
|
385
|
+
expect(mockMemoryDb.search).toHaveBeenCalledWith([0.1, 0.2, 0.3], 5);
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
it('should fail store if text is missing', async () => {
|
|
389
|
+
// @ts-ignore
|
|
390
|
+
const step = { id: 'm1', type: 'memory', op: 'store' };
|
|
391
|
+
// @ts-ignore
|
|
392
|
+
const result = await executeStep(step, context, undefined, {
|
|
393
|
+
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
394
|
+
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
395
|
+
});
|
|
396
|
+
expect(result.status).toBe('failed');
|
|
397
|
+
expect(result.error).toBe('Text is required for memory store operation');
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
it('should fail search if query is missing', async () => {
|
|
401
|
+
// @ts-ignore
|
|
402
|
+
const step = { id: 'm1', type: 'memory', op: 'search' };
|
|
403
|
+
// @ts-ignore
|
|
404
|
+
const result = await executeStep(step, context, undefined, {
|
|
405
|
+
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
406
|
+
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
407
|
+
});
|
|
408
|
+
expect(result.status).toBe('failed');
|
|
409
|
+
expect(result.error).toBe('Query is required for memory search operation');
|
|
410
|
+
});
|
|
411
|
+
|
|
412
|
+
it('should fail for unknown memory operation', async () => {
|
|
413
|
+
// @ts-ignore
|
|
414
|
+
const step = { id: 'm1', type: 'memory', op: 'unknown', text: 'foo' };
|
|
415
|
+
// @ts-ignore
|
|
416
|
+
const result = await executeStep(step, context, undefined, {
|
|
417
|
+
memoryDb: mockMemoryDb as unknown as MemoryDb,
|
|
418
|
+
getAdapter: mockGetAdapter as unknown as typeof getAdapter,
|
|
419
|
+
});
|
|
420
|
+
expect(result.status).toBe('failed');
|
|
421
|
+
expect(result.error).toContain('Unknown memory operation');
|
|
422
|
+
});
|
|
230
423
|
});
|
|
231
424
|
|
|
232
425
|
describe('sleep', () => {
|
|
@@ -517,7 +710,7 @@ describe('step-executor', () => {
|
|
|
517
710
|
);
|
|
518
711
|
|
|
519
712
|
// @ts-ignore
|
|
520
|
-
const result = await executeStep(step, context, undefined, executeWorkflowFn);
|
|
713
|
+
const result = await executeStep(step, context, undefined, { executeWorkflowFn });
|
|
521
714
|
expect(result.status).toBe('success');
|
|
522
715
|
expect(result.output).toBe('child-output');
|
|
523
716
|
expect(executeWorkflowFn).toHaveBeenCalled();
|
|
@@ -48,6 +48,20 @@ export interface StepResult {
|
|
|
48
48
|
};
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
/**
|
|
52
|
+
* Execute a single step based on its type
|
|
53
|
+
*/
|
|
54
|
+
export interface StepExecutorOptions {
|
|
55
|
+
executeWorkflowFn?: (step: WorkflowStep, context: ExpressionContext) => Promise<StepResult>;
|
|
56
|
+
mcpManager?: MCPManager;
|
|
57
|
+
memoryDb?: MemoryDb;
|
|
58
|
+
workflowDir?: string;
|
|
59
|
+
dryRun?: boolean;
|
|
60
|
+
// Dependency injection for testing
|
|
61
|
+
getAdapter?: typeof getAdapter;
|
|
62
|
+
sandbox?: typeof SafeSandbox;
|
|
63
|
+
}
|
|
64
|
+
|
|
51
65
|
/**
|
|
52
66
|
* Execute a single step based on its type
|
|
53
67
|
*/
|
|
@@ -55,12 +69,18 @@ export async function executeStep(
|
|
|
55
69
|
step: Step,
|
|
56
70
|
context: ExpressionContext,
|
|
57
71
|
logger: Logger = new ConsoleLogger(),
|
|
58
|
-
|
|
59
|
-
mcpManager?: MCPManager,
|
|
60
|
-
memoryDb?: MemoryDb,
|
|
61
|
-
workflowDir?: string,
|
|
62
|
-
dryRun?: boolean
|
|
72
|
+
options: StepExecutorOptions = {}
|
|
63
73
|
): Promise<StepResult> {
|
|
74
|
+
const {
|
|
75
|
+
executeWorkflowFn,
|
|
76
|
+
mcpManager,
|
|
77
|
+
memoryDb,
|
|
78
|
+
workflowDir,
|
|
79
|
+
dryRun,
|
|
80
|
+
getAdapter: injectedGetAdapter,
|
|
81
|
+
sandbox: injectedSandbox,
|
|
82
|
+
} = options;
|
|
83
|
+
|
|
64
84
|
try {
|
|
65
85
|
let result: StepResult;
|
|
66
86
|
switch (step.type) {
|
|
@@ -83,15 +103,14 @@ export async function executeStep(
|
|
|
83
103
|
result = await executeLlmStep(
|
|
84
104
|
step,
|
|
85
105
|
context,
|
|
86
|
-
(s, c) =>
|
|
87
|
-
executeStep(s, c, logger, executeWorkflowFn, mcpManager, memoryDb, workflowDir, dryRun),
|
|
106
|
+
(s, c) => executeStep(s, c, logger, options),
|
|
88
107
|
logger,
|
|
89
108
|
mcpManager,
|
|
90
109
|
workflowDir
|
|
91
110
|
);
|
|
92
111
|
break;
|
|
93
112
|
case 'memory':
|
|
94
|
-
result = await executeMemoryStep(step, context, logger, memoryDb);
|
|
113
|
+
result = await executeMemoryStep(step, context, logger, memoryDb, injectedGetAdapter);
|
|
95
114
|
break;
|
|
96
115
|
case 'workflow':
|
|
97
116
|
if (!executeWorkflowFn) {
|
|
@@ -100,7 +119,7 @@ export async function executeStep(
|
|
|
100
119
|
result = await executeWorkflowFn(step, context);
|
|
101
120
|
break;
|
|
102
121
|
case 'script':
|
|
103
|
-
result = await executeScriptStep(step, context, logger);
|
|
122
|
+
result = await executeScriptStep(step, context, logger, injectedSandbox);
|
|
104
123
|
break;
|
|
105
124
|
default:
|
|
106
125
|
throw new Error(`Unknown step type: ${(step as Step).type}`);
|
|
@@ -383,7 +402,7 @@ async function executeRequestStep(
|
|
|
383
402
|
output: {
|
|
384
403
|
status: response.status,
|
|
385
404
|
statusText: response.statusText,
|
|
386
|
-
headers: Object.fromEntries(response.headers
|
|
405
|
+
headers: Object.fromEntries(response.headers as unknown as Iterable<[string, string]>),
|
|
387
406
|
data: responseData,
|
|
388
407
|
},
|
|
389
408
|
status: response.ok ? 'success' : 'failed',
|
|
@@ -503,7 +522,8 @@ async function executeSleepStep(
|
|
|
503
522
|
async function executeScriptStep(
|
|
504
523
|
step: ScriptStep,
|
|
505
524
|
context: ExpressionContext,
|
|
506
|
-
_logger: Logger
|
|
525
|
+
_logger: Logger,
|
|
526
|
+
sandbox = SafeSandbox
|
|
507
527
|
): Promise<StepResult> {
|
|
508
528
|
try {
|
|
509
529
|
if (!step.allowInsecure) {
|
|
@@ -513,7 +533,7 @@ async function executeScriptStep(
|
|
|
513
533
|
);
|
|
514
534
|
}
|
|
515
535
|
|
|
516
|
-
const result = await
|
|
536
|
+
const result = await sandbox.execute(
|
|
517
537
|
step.run,
|
|
518
538
|
{
|
|
519
539
|
inputs: context.inputs,
|
|
@@ -546,14 +566,15 @@ async function executeMemoryStep(
|
|
|
546
566
|
step: MemoryStep,
|
|
547
567
|
context: ExpressionContext,
|
|
548
568
|
logger: Logger,
|
|
549
|
-
memoryDb?: MemoryDb
|
|
569
|
+
memoryDb?: MemoryDb,
|
|
570
|
+
getAdapterFn = getAdapter
|
|
550
571
|
): Promise<StepResult> {
|
|
551
572
|
if (!memoryDb) {
|
|
552
573
|
throw new Error('Memory database not initialized');
|
|
553
574
|
}
|
|
554
575
|
|
|
555
576
|
try {
|
|
556
|
-
const { adapter, resolvedModel } =
|
|
577
|
+
const { adapter, resolvedModel } = getAdapterFn(step.model || 'local');
|
|
557
578
|
if (!adapter.embed) {
|
|
558
579
|
throw new Error(`Provider for model ${step.model || 'local'} does not support embeddings`);
|
|
559
580
|
}
|
|
@@ -4,16 +4,24 @@ import { processOpenAIStream } from './stream-utils';
|
|
|
4
4
|
const encoder = new TextEncoder();
|
|
5
5
|
|
|
6
6
|
function responseFromChunks(chunks: string[]): Response {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
let index = 0;
|
|
8
|
+
const reader = {
|
|
9
|
+
async read(): Promise<{ done: boolean; value?: Uint8Array }> {
|
|
10
|
+
if (index >= chunks.length) {
|
|
11
|
+
return { done: true, value: undefined };
|
|
11
12
|
}
|
|
12
|
-
|
|
13
|
+
const value = encoder.encode(chunks[index]);
|
|
14
|
+
index += 1;
|
|
15
|
+
return { done: false, value };
|
|
13
16
|
},
|
|
14
|
-
|
|
17
|
+
async cancel(): Promise<void> {},
|
|
18
|
+
};
|
|
15
19
|
|
|
16
|
-
return
|
|
20
|
+
return {
|
|
21
|
+
body: {
|
|
22
|
+
getReader: () => reader,
|
|
23
|
+
},
|
|
24
|
+
} as Response;
|
|
17
25
|
}
|
|
18
26
|
|
|
19
27
|
describe('processOpenAIStream', () => {
|
|
@@ -61,5 +69,103 @@ describe('processOpenAIStream', () => {
|
|
|
61
69
|
|
|
62
70
|
expect(result.message.content).toBe('ok');
|
|
63
71
|
expect(logger.warn).toHaveBeenCalledTimes(1);
|
|
72
|
+
expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('throws error when buffer size is exceeded', async () => {
|
|
76
|
+
const response = responseFromChunks(['a'.repeat(1024 * 1024 + 1)]);
|
|
77
|
+
await expect(processOpenAIStream(response)).rejects.toThrow(
|
|
78
|
+
'LLM stream line exceed maximum size'
|
|
79
|
+
);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('throws error when response size limit is exceeded', async () => {
|
|
83
|
+
const response = responseFromChunks([
|
|
84
|
+
`data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
|
|
85
|
+
`data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}\n`,
|
|
86
|
+
]);
|
|
87
|
+
await expect(processOpenAIStream(response)).rejects.toThrow(
|
|
88
|
+
'LLM response exceeds maximum size'
|
|
89
|
+
);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('throws error when tool call arguments size limit is exceeded', async () => {
|
|
93
|
+
const response = responseFromChunks([
|
|
94
|
+
`data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(600 * 1024)}"}}]}}]}\n`,
|
|
95
|
+
`data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(500 * 1024)}"}}]}}]}\n`,
|
|
96
|
+
]);
|
|
97
|
+
await expect(processOpenAIStream(response)).rejects.toThrow(
|
|
98
|
+
'LLM tool call arguments exceed maximum size'
|
|
99
|
+
);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it('handles and logs generic errors during chunk processing', async () => {
|
|
103
|
+
const logger = {
|
|
104
|
+
log: mock(() => {}),
|
|
105
|
+
error: mock(() => {}),
|
|
106
|
+
warn: mock(() => {}),
|
|
107
|
+
info: mock(() => {}),
|
|
108
|
+
};
|
|
109
|
+
// Mocking JSON.parse to throw a non-SyntaxError
|
|
110
|
+
const originalParse = JSON.parse;
|
|
111
|
+
JSON.parse = (str: string) => {
|
|
112
|
+
if (str === '{"trigger_error":true}') throw new Error('Generic error');
|
|
113
|
+
return originalParse(str);
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
try {
|
|
117
|
+
const response = responseFromChunks(['data: {"trigger_error":true}\n']);
|
|
118
|
+
await processOpenAIStream(response, { logger });
|
|
119
|
+
expect(logger.warn).toHaveBeenCalledTimes(1);
|
|
120
|
+
expect(logger.warn.mock.calls[0][0]).toContain(
|
|
121
|
+
'Error processing chunk: Error: Generic error'
|
|
122
|
+
);
|
|
123
|
+
} finally {
|
|
124
|
+
JSON.parse = originalParse;
|
|
125
|
+
}
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('handles errors in the final line processing', async () => {
|
|
129
|
+
const logger = {
|
|
130
|
+
log: mock(() => {}),
|
|
131
|
+
error: mock(() => {}),
|
|
132
|
+
warn: mock(() => {}),
|
|
133
|
+
info: mock(() => {}),
|
|
134
|
+
};
|
|
135
|
+
const response = responseFromChunks(['data: {bad json}']); // No newline, triggers buffer processing
|
|
136
|
+
|
|
137
|
+
await processOpenAIStream(response, { logger });
|
|
138
|
+
|
|
139
|
+
expect(logger.warn).toHaveBeenCalledTimes(1);
|
|
140
|
+
expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('throws size limit error in final line processing', async () => {
|
|
144
|
+
const response = responseFromChunks([
|
|
145
|
+
`data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
|
|
146
|
+
`data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}`,
|
|
147
|
+
]);
|
|
148
|
+
// The first line is ok, the second line is in the final buffer and exceeds size
|
|
149
|
+
await expect(processOpenAIStream(response)).rejects.toThrow(
|
|
150
|
+
'LLM response exceeds maximum size'
|
|
151
|
+
);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it('bubbles up reader cancel errors', async () => {
|
|
155
|
+
const reader = {
|
|
156
|
+
read: async () => {
|
|
157
|
+
throw new Error('Read error');
|
|
158
|
+
},
|
|
159
|
+
cancel: async () => {
|
|
160
|
+
throw new Error('Cancel error');
|
|
161
|
+
},
|
|
162
|
+
};
|
|
163
|
+
const response = {
|
|
164
|
+
body: {
|
|
165
|
+
getReader: () => reader,
|
|
166
|
+
},
|
|
167
|
+
} as unknown as Response;
|
|
168
|
+
|
|
169
|
+
await expect(processOpenAIStream(response)).rejects.toThrow('Read error');
|
|
64
170
|
});
|
|
65
171
|
});
|
|
@@ -67,7 +67,7 @@ export async function processOpenAIStream(
|
|
|
67
67
|
const toolCall = tc as ToolCallDelta;
|
|
68
68
|
if (!toolCalls[toolCall.index]) {
|
|
69
69
|
toolCalls[toolCall.index] = {
|
|
70
|
-
id: toolCall.id,
|
|
70
|
+
id: toolCall.id || '',
|
|
71
71
|
type: 'function',
|
|
72
72
|
function: { name: '', arguments: '' },
|
|
73
73
|
};
|
|
@@ -93,7 +93,7 @@ export async function processOpenAIStream(
|
|
|
93
93
|
const activeLogger = options?.logger || new ConsoleLogger();
|
|
94
94
|
|
|
95
95
|
// Rethrow size limit errors so they bubble up
|
|
96
|
-
if (
|
|
96
|
+
if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
|
|
97
97
|
throw e;
|
|
98
98
|
}
|
|
99
99
|
|
|
@@ -137,7 +137,7 @@ export async function processOpenAIStream(
|
|
|
137
137
|
const toolCall = tc as ToolCallDelta;
|
|
138
138
|
if (!toolCalls[toolCall.index]) {
|
|
139
139
|
toolCalls[toolCall.index] = {
|
|
140
|
-
id: toolCall.id,
|
|
140
|
+
id: toolCall.id || '',
|
|
141
141
|
type: 'function',
|
|
142
142
|
function: { name: '', arguments: '' },
|
|
143
143
|
};
|
|
@@ -161,7 +161,7 @@ export async function processOpenAIStream(
|
|
|
161
161
|
}
|
|
162
162
|
}
|
|
163
163
|
} catch (e) {
|
|
164
|
-
if (
|
|
164
|
+
if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
|
|
165
165
|
throw e;
|
|
166
166
|
}
|
|
167
167
|
const activeLogger = options?.logger || new ConsoleLogger();
|
|
@@ -630,16 +630,13 @@ export class WorkflowRunner {
|
|
|
630
630
|
}
|
|
631
631
|
|
|
632
632
|
const operation = async () => {
|
|
633
|
-
const result = await executeStep(
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
this.
|
|
637
|
-
this.
|
|
638
|
-
this.
|
|
639
|
-
|
|
640
|
-
this.options.workflowDir,
|
|
641
|
-
this.options.dryRun
|
|
642
|
-
);
|
|
633
|
+
const result = await executeStep(stepToExecute, context, this.logger, {
|
|
634
|
+
executeWorkflowFn: this.executeSubWorkflow.bind(this),
|
|
635
|
+
mcpManager: this.mcpManager,
|
|
636
|
+
memoryDb: this.memoryDb,
|
|
637
|
+
workflowDir: this.options.workflowDir,
|
|
638
|
+
dryRun: this.options.dryRun,
|
|
639
|
+
});
|
|
643
640
|
if (result.status === 'failed') {
|
|
644
641
|
throw new Error(result.error || 'Step failed');
|
|
645
642
|
}
|
|
@@ -868,16 +865,13 @@ Do not change the 'id' or 'type' or 'auto_heal' fields.
|
|
|
868
865
|
|
|
869
866
|
// Execute the agent step
|
|
870
867
|
// We use a fresh context but share secrets/env
|
|
871
|
-
const result = await executeStep(
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
this.
|
|
875
|
-
this.
|
|
876
|
-
this.
|
|
877
|
-
|
|
878
|
-
this.options.workflowDir,
|
|
879
|
-
this.options.dryRun
|
|
880
|
-
);
|
|
868
|
+
const result = await executeStep(agentStep, context, this.logger, {
|
|
869
|
+
executeWorkflowFn: this.executeSubWorkflow.bind(this),
|
|
870
|
+
mcpManager: this.mcpManager,
|
|
871
|
+
memoryDb: this.memoryDb,
|
|
872
|
+
workflowDir: this.options.workflowDir,
|
|
873
|
+
dryRun: this.options.dryRun,
|
|
874
|
+
});
|
|
881
875
|
|
|
882
876
|
if (result.status !== 'success' || !result.output) {
|
|
883
877
|
throw new Error(`Healer agent failed: ${result.error || 'No output'}`);
|