keystone-cli 0.8.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +486 -54
  2. package/package.json +8 -2
  3. package/src/__fixtures__/index.ts +100 -0
  4. package/src/cli.ts +809 -90
  5. package/src/db/memory-db.ts +35 -1
  6. package/src/db/workflow-db.test.ts +24 -0
  7. package/src/db/workflow-db.ts +469 -14
  8. package/src/expression/evaluator.ts +68 -4
  9. package/src/parser/agent-parser.ts +6 -3
  10. package/src/parser/config-schema.ts +38 -2
  11. package/src/parser/schema.ts +192 -7
  12. package/src/parser/test-schema.ts +29 -0
  13. package/src/parser/workflow-parser.test.ts +54 -0
  14. package/src/parser/workflow-parser.ts +153 -7
  15. package/src/runner/aggregate-error.test.ts +57 -0
  16. package/src/runner/aggregate-error.ts +46 -0
  17. package/src/runner/audit-verification.test.ts +2 -2
  18. package/src/runner/auto-heal.test.ts +1 -1
  19. package/src/runner/blueprint-executor.test.ts +63 -0
  20. package/src/runner/blueprint-executor.ts +157 -0
  21. package/src/runner/concurrency-limit.test.ts +82 -0
  22. package/src/runner/debug-repl.ts +18 -3
  23. package/src/runner/durable-timers.test.ts +200 -0
  24. package/src/runner/engine-executor.test.ts +464 -0
  25. package/src/runner/engine-executor.ts +491 -0
  26. package/src/runner/foreach-executor.ts +30 -12
  27. package/src/runner/llm-adapter.test.ts +282 -5
  28. package/src/runner/llm-adapter.ts +581 -8
  29. package/src/runner/llm-clarification.test.ts +79 -21
  30. package/src/runner/llm-errors.ts +83 -0
  31. package/src/runner/llm-executor.test.ts +258 -219
  32. package/src/runner/llm-executor.ts +226 -29
  33. package/src/runner/mcp-client.ts +70 -3
  34. package/src/runner/mcp-manager.test.ts +52 -52
  35. package/src/runner/mcp-manager.ts +12 -5
  36. package/src/runner/mcp-server.test.ts +117 -78
  37. package/src/runner/mcp-server.ts +13 -4
  38. package/src/runner/optimization-runner.ts +48 -31
  39. package/src/runner/reflexion.test.ts +1 -1
  40. package/src/runner/resource-pool.test.ts +113 -0
  41. package/src/runner/resource-pool.ts +164 -0
  42. package/src/runner/shell-executor.ts +130 -32
  43. package/src/runner/standard-tools-integration.test.ts +36 -36
  44. package/src/runner/standard-tools.test.ts +18 -0
  45. package/src/runner/standard-tools.ts +110 -37
  46. package/src/runner/step-executor.test.ts +176 -16
  47. package/src/runner/step-executor.ts +530 -86
  48. package/src/runner/stream-utils.test.ts +14 -0
  49. package/src/runner/subflow-outputs.test.ts +103 -0
  50. package/src/runner/test-harness.ts +161 -0
  51. package/src/runner/tool-integration.test.ts +73 -79
  52. package/src/runner/workflow-runner.test.ts +492 -15
  53. package/src/runner/workflow-runner.ts +1438 -79
  54. package/src/runner/workflow-subflows.test.ts +255 -0
  55. package/src/templates/agents/keystone-architect.md +17 -12
  56. package/src/templates/agents/tester.md +21 -0
  57. package/src/templates/child-rollback.yaml +11 -0
  58. package/src/templates/decompose-implement.yaml +53 -0
  59. package/src/templates/decompose-problem.yaml +159 -0
  60. package/src/templates/decompose-research.yaml +52 -0
  61. package/src/templates/decompose-review.yaml +51 -0
  62. package/src/templates/dev.yaml +134 -0
  63. package/src/templates/engine-example.yaml +33 -0
  64. package/src/templates/fan-out-fan-in.yaml +61 -0
  65. package/src/templates/memory-service.yaml +1 -1
  66. package/src/templates/parent-rollback.yaml +16 -0
  67. package/src/templates/robust-automation.yaml +1 -1
  68. package/src/templates/scaffold-feature.yaml +29 -27
  69. package/src/templates/scaffold-generate.yaml +41 -0
  70. package/src/templates/scaffold-plan.yaml +53 -0
  71. package/src/types/status.ts +3 -0
  72. package/src/ui/dashboard.tsx +4 -3
  73. package/src/utils/assets.macro.ts +36 -0
  74. package/src/utils/auth-manager.ts +585 -8
  75. package/src/utils/blueprint-utils.test.ts +49 -0
  76. package/src/utils/blueprint-utils.ts +80 -0
  77. package/src/utils/circuit-breaker.test.ts +177 -0
  78. package/src/utils/circuit-breaker.ts +160 -0
  79. package/src/utils/config-loader.test.ts +100 -13
  80. package/src/utils/config-loader.ts +44 -17
  81. package/src/utils/constants.ts +62 -0
  82. package/src/utils/error-renderer.test.ts +267 -0
  83. package/src/utils/error-renderer.ts +320 -0
  84. package/src/utils/json-parser.test.ts +4 -0
  85. package/src/utils/json-parser.ts +18 -1
  86. package/src/utils/mermaid.ts +4 -0
  87. package/src/utils/paths.test.ts +46 -0
  88. package/src/utils/paths.ts +70 -0
  89. package/src/utils/process-sandbox.test.ts +128 -0
  90. package/src/utils/process-sandbox.ts +293 -0
  91. package/src/utils/rate-limiter.test.ts +143 -0
  92. package/src/utils/rate-limiter.ts +221 -0
  93. package/src/utils/redactor.test.ts +23 -15
  94. package/src/utils/redactor.ts +65 -25
  95. package/src/utils/resource-loader.test.ts +54 -0
  96. package/src/utils/resource-loader.ts +158 -0
  97. package/src/utils/sandbox.test.ts +69 -4
  98. package/src/utils/sandbox.ts +69 -6
  99. package/src/utils/schema-validator.ts +65 -0
  100. package/src/utils/workflow-registry.test.ts +57 -0
  101. package/src/utils/workflow-registry.ts +45 -25
  102. /package/src/expression/{evaluator.audit.test.ts → evaluator-audit.test.ts} +0 -0
  103. /package/src/runner/{mcp-client.audit.test.ts → mcp-client-audit.test.ts} +0 -0
@@ -13,9 +13,11 @@ import * as dns from 'node:dns/promises';
13
13
  import { mkdirSync, rmSync } from 'node:fs';
14
14
  import { tmpdir } from 'node:os';
15
15
  import { join } from 'node:path';
16
+ import * as readlinePromises from 'node:readline/promises';
16
17
  import type { MemoryDb } from '../db/memory-db';
17
18
  import type { ExpressionContext } from '../expression/evaluator';
18
19
  import type {
20
+ EngineStep,
19
21
  FileStep,
20
22
  HumanStep,
21
23
  RequestStep,
@@ -23,18 +25,12 @@ import type {
23
25
  SleepStep,
24
26
  WorkflowStep,
25
27
  } from '../parser/schema';
28
+ import { ConfigLoader } from '../utils/config-loader';
26
29
  import type { SafeSandbox } from '../utils/sandbox';
27
30
  import type { getAdapter } from './llm-adapter';
31
+ import type { executeLlmStep } from './llm-executor';
28
32
  import { executeStep } from './step-executor';
29
33
 
30
- // Mock executeLlmStep
31
- mock.module('./llm-executor', () => ({
32
- // @ts-ignore
33
- executeLlmStep: mock((_step, _context, _callback) => {
34
- return Promise.resolve({ status: 'success', output: 'llm-output' });
35
- }),
36
- }));
37
-
38
34
  interface StepOutput {
39
35
  stdout: string;
40
36
  stderr: string;
@@ -46,16 +42,11 @@ interface RequestOutput {
46
42
  data: unknown;
47
43
  }
48
44
 
49
- // Mock node:readline/promises
50
45
  const mockRl = {
51
46
  question: mock(() => Promise.resolve('')),
52
47
  close: mock(() => {}),
53
48
  };
54
49
 
55
- mock.module('node:readline/promises', () => ({
56
- createInterface: mock(() => mockRl),
57
- }));
58
-
59
50
  describe('step-executor', () => {
60
51
  let context: ExpressionContext;
61
52
 
@@ -84,6 +75,10 @@ describe('step-executor', () => {
84
75
  };
85
76
  });
86
77
 
78
+ afterEach(() => {
79
+ ConfigLoader.clear();
80
+ });
81
+
87
82
  describe('shell', () => {
88
83
  it('should execute shell command', async () => {
89
84
  const step: ShellStep = {
@@ -313,6 +308,97 @@ describe('step-executor', () => {
313
308
  expect(result.status).toBe('failed');
314
309
  expect(result.error).toBe('Script failed');
315
310
  });
311
+
312
+ it('should pass logger to sandbox execution', async () => {
313
+ const logger = { log: mock(() => {}) };
314
+ // @ts-ignore
315
+ const step = {
316
+ id: 's1',
317
+ type: 'script',
318
+ run: 'return 1',
319
+ allowInsecure: true,
320
+ };
321
+
322
+ const mockSandbox = {
323
+ execute: async (
324
+ _code: string,
325
+ _context: Record<string, unknown>,
326
+ options?: { logger?: unknown }
327
+ ) => {
328
+ expect(options?.logger).toBe(logger);
329
+ return 'ok';
330
+ },
331
+ };
332
+
333
+ const result = await executeStep(
334
+ step,
335
+ context,
336
+ logger as unknown as Parameters<typeof executeStep>[2],
337
+ {
338
+ sandbox: mockSandbox as unknown as Parameters<typeof executeStep>[3]['sandbox'],
339
+ }
340
+ );
341
+
342
+ expect(result.status).toBe('success');
343
+ });
344
+ });
345
+
346
+ describe('engine', () => {
347
+ const artifactRoot = join(tempDir, 'engine-artifacts');
348
+
349
+ const setEngineConfig = (
350
+ allowlist: Record<string, { command: string; version: string; versionArgs?: string[] }>
351
+ ) => {
352
+ ConfigLoader.setConfig({
353
+ default_provider: 'openai',
354
+ providers: {},
355
+ model_mappings: {},
356
+ storage: { retention_days: 30, redact_secrets_at_rest: true },
357
+ mcp_servers: {},
358
+ engines: { allowlist, denylist: [] },
359
+ concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
360
+ });
361
+ };
362
+
363
+ it('should execute engine command and parse summary', async () => {
364
+ const version = (Bun.version || process.versions?.bun || '') as string;
365
+ setEngineConfig({ bun: { command: 'bun', version } });
366
+
367
+ const step: EngineStep = {
368
+ id: 'e1',
369
+ type: 'engine',
370
+ command: 'bun',
371
+ args: ['-e', 'console.log(JSON.stringify({ ok: true }))'],
372
+ env: { PATH: process.env.PATH || '' },
373
+ cwd: process.cwd(),
374
+ };
375
+
376
+ const result = await executeStep(step, context, undefined, { artifactRoot });
377
+ expect(result.status).toBe('success');
378
+ const output = result.output as { summary: { ok: boolean }; artifactPath?: string };
379
+ expect(output.summary).toEqual({ ok: true });
380
+ expect(output.artifactPath).toBeTruthy();
381
+
382
+ const artifactText = await Bun.file(output.artifactPath as string).text();
383
+ expect(artifactText).toContain('"ok": true');
384
+ });
385
+
386
+ it('should fail when engine command is not allowlisted', async () => {
387
+ setEngineConfig({});
388
+
389
+ const step: EngineStep = {
390
+ id: 'e1',
391
+ type: 'engine',
392
+ command: 'bun',
393
+ args: ['-e', 'console.log(JSON.stringify({ ok: true }))'],
394
+ env: { PATH: process.env.PATH || '' },
395
+ cwd: process.cwd(),
396
+ };
397
+
398
+ const result = await executeStep(step, context, undefined, { artifactRoot });
399
+ expect(result.status).toBe('failed');
400
+ expect(result.error).toContain('allowlist');
401
+ });
316
402
  });
317
403
 
318
404
  describe('memory', () => {
@@ -322,7 +408,7 @@ describe('step-executor', () => {
322
408
  };
323
409
 
324
410
  const mockGetAdapter = mock((model) => {
325
- if (model === 'no-embed') return { adapter: {}, resolvedModel: model };
411
+ if (model === 'local:no-embed') return { adapter: {}, resolvedModel: model };
326
412
  return {
327
413
  adapter: {
328
414
  embed: mock((text) => Promise.resolve([0.1, 0.2, 0.3])),
@@ -343,7 +429,13 @@ describe('step-executor', () => {
343
429
 
344
430
  it('should fail if adapter does not support embedding', async () => {
345
431
  // @ts-ignore
346
- const step = { id: 'm1', type: 'memory', op: 'store', text: 'foo', model: 'no-embed' };
432
+ const step = {
433
+ id: 'm1',
434
+ type: 'memory',
435
+ op: 'store',
436
+ text: 'foo',
437
+ model: 'local:no-embed',
438
+ };
347
439
  // @ts-ignore
348
440
  const result = await executeStep(step, context, undefined, {
349
441
  memoryDb: mockMemoryDb as unknown as MemoryDb,
@@ -353,6 +445,18 @@ describe('step-executor', () => {
353
445
  expect(result.error).toContain('does not support embeddings');
354
446
  });
355
447
 
448
+ it('should fail for non-local embedding models', async () => {
449
+ // @ts-ignore
450
+ const step = { id: 'm1', type: 'memory', op: 'store', text: 'foo', model: 'openai' };
451
+ // @ts-ignore
452
+ const result = await executeStep(step, context, undefined, {
453
+ memoryDb: mockMemoryDb as unknown as MemoryDb,
454
+ getAdapter: mockGetAdapter as unknown as typeof getAdapter,
455
+ });
456
+ expect(result.status).toBe('failed');
457
+ expect(result.error).toContain('only support local embeddings');
458
+ });
459
+
356
460
  it('should store memory', async () => {
357
461
  // @ts-ignore
358
462
  const step = {
@@ -585,17 +689,67 @@ describe('step-executor', () => {
585
689
  expect(result.error).toContain('HTTP 400: Bad Request');
586
690
  expect(result.error).toContain('Response Body: {"error": "bad request details"}');
587
691
  });
692
+
693
+ it('should drop auth headers on cross-origin redirects', async () => {
694
+ // @ts-ignore
695
+ global.fetch
696
+ .mockResolvedValueOnce(
697
+ new Response('', {
698
+ status: 302,
699
+ headers: { Location: 'https://other.example.com/next' },
700
+ })
701
+ )
702
+ .mockResolvedValueOnce(new Response('ok'));
703
+
704
+ const step: RequestStep = {
705
+ id: 'req-redirect',
706
+ type: 'request',
707
+ needs: [],
708
+ url: 'https://api.example.com/start',
709
+ method: 'GET',
710
+ headers: { Authorization: 'Bearer token' },
711
+ };
712
+
713
+ const result = await executeStep(step, context);
714
+ expect(result.status).toBe('success');
715
+
716
+ // @ts-ignore
717
+ const secondCall = global.fetch.mock.calls[1][1];
718
+ expect(secondCall.headers.Authorization).toBeUndefined();
719
+ });
720
+
721
+ it('should allow insecure request when allowInsecure is true', async () => {
722
+ // @ts-ignore
723
+ global.fetch.mockResolvedValue(new Response('ok'));
724
+
725
+ const step: RequestStep = {
726
+ id: 'req-insecure',
727
+ type: 'request',
728
+ needs: [],
729
+ url: 'http://localhost/test',
730
+ method: 'GET',
731
+ allowInsecure: true,
732
+ };
733
+
734
+ const result = await executeStep(step, context);
735
+ expect(result.status).toBe('success');
736
+ });
588
737
  });
589
738
 
590
739
  describe('human', () => {
591
740
  const originalIsTTY = process.stdin.isTTY;
741
+ let createInterfaceSpy: ReturnType<typeof spyOn>;
592
742
 
593
743
  beforeEach(() => {
594
744
  process.stdin.isTTY = true;
745
+ createInterfaceSpy = spyOn(readlinePromises, 'createInterface').mockReturnValue(
746
+ mockRl as unknown as ReturnType<typeof readlinePromises.createInterface>
747
+ );
595
748
  });
596
749
 
597
750
  afterEach(() => {
598
751
  process.stdin.isTTY = originalIsTTY;
752
+ createInterfaceSpy.mockRestore();
599
753
  });
600
754
 
601
755
  it('should handle human confirmation', async () => {
@@ -737,7 +891,13 @@ describe('step-executor', () => {
737
891
  type: 'llm',
738
892
  prompt: 'hello',
739
893
  };
740
- const result = await executeStep(step, context);
894
+ const executeLlmStepMock = mock(async () => ({
895
+ status: 'success',
896
+ output: 'llm-output',
897
+ })) as unknown as typeof executeLlmStep;
898
+ const result = await executeStep(step, context, undefined, {
899
+ executeLlmStep: executeLlmStepMock,
900
+ });
741
901
  expect(result.status).toBe('success');
742
902
  expect(result.output).toBe('llm-output');
743
903
  });