keystone-cli 0.8.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +486 -54
  2. package/package.json +8 -2
  3. package/src/__fixtures__/index.ts +100 -0
  4. package/src/cli.ts +809 -90
  5. package/src/db/memory-db.ts +35 -1
  6. package/src/db/workflow-db.test.ts +24 -0
  7. package/src/db/workflow-db.ts +469 -14
  8. package/src/expression/evaluator.ts +68 -4
  9. package/src/parser/agent-parser.ts +6 -3
  10. package/src/parser/config-schema.ts +38 -2
  11. package/src/parser/schema.ts +192 -7
  12. package/src/parser/test-schema.ts +29 -0
  13. package/src/parser/workflow-parser.test.ts +54 -0
  14. package/src/parser/workflow-parser.ts +153 -7
  15. package/src/runner/aggregate-error.test.ts +57 -0
  16. package/src/runner/aggregate-error.ts +46 -0
  17. package/src/runner/audit-verification.test.ts +2 -2
  18. package/src/runner/auto-heal.test.ts +1 -1
  19. package/src/runner/blueprint-executor.test.ts +63 -0
  20. package/src/runner/blueprint-executor.ts +157 -0
  21. package/src/runner/concurrency-limit.test.ts +82 -0
  22. package/src/runner/debug-repl.ts +18 -3
  23. package/src/runner/durable-timers.test.ts +200 -0
  24. package/src/runner/engine-executor.test.ts +464 -0
  25. package/src/runner/engine-executor.ts +491 -0
  26. package/src/runner/foreach-executor.ts +30 -12
  27. package/src/runner/llm-adapter.test.ts +282 -5
  28. package/src/runner/llm-adapter.ts +581 -8
  29. package/src/runner/llm-clarification.test.ts +79 -21
  30. package/src/runner/llm-errors.ts +83 -0
  31. package/src/runner/llm-executor.test.ts +258 -219
  32. package/src/runner/llm-executor.ts +226 -29
  33. package/src/runner/mcp-client.ts +70 -3
  34. package/src/runner/mcp-manager.test.ts +52 -52
  35. package/src/runner/mcp-manager.ts +12 -5
  36. package/src/runner/mcp-server.test.ts +117 -78
  37. package/src/runner/mcp-server.ts +13 -4
  38. package/src/runner/optimization-runner.ts +48 -31
  39. package/src/runner/reflexion.test.ts +1 -1
  40. package/src/runner/resource-pool.test.ts +113 -0
  41. package/src/runner/resource-pool.ts +164 -0
  42. package/src/runner/shell-executor.ts +130 -32
  43. package/src/runner/standard-tools-integration.test.ts +36 -36
  44. package/src/runner/standard-tools.test.ts +18 -0
  45. package/src/runner/standard-tools.ts +110 -37
  46. package/src/runner/step-executor.test.ts +176 -16
  47. package/src/runner/step-executor.ts +530 -86
  48. package/src/runner/stream-utils.test.ts +14 -0
  49. package/src/runner/subflow-outputs.test.ts +103 -0
  50. package/src/runner/test-harness.ts +161 -0
  51. package/src/runner/tool-integration.test.ts +73 -79
  52. package/src/runner/workflow-runner.test.ts +492 -15
  53. package/src/runner/workflow-runner.ts +1438 -79
  54. package/src/runner/workflow-subflows.test.ts +255 -0
  55. package/src/templates/agents/keystone-architect.md +17 -12
  56. package/src/templates/agents/tester.md +21 -0
  57. package/src/templates/child-rollback.yaml +11 -0
  58. package/src/templates/decompose-implement.yaml +53 -0
  59. package/src/templates/decompose-problem.yaml +159 -0
  60. package/src/templates/decompose-research.yaml +52 -0
  61. package/src/templates/decompose-review.yaml +51 -0
  62. package/src/templates/dev.yaml +134 -0
  63. package/src/templates/engine-example.yaml +33 -0
  64. package/src/templates/fan-out-fan-in.yaml +61 -0
  65. package/src/templates/memory-service.yaml +1 -1
  66. package/src/templates/parent-rollback.yaml +16 -0
  67. package/src/templates/robust-automation.yaml +1 -1
  68. package/src/templates/scaffold-feature.yaml +29 -27
  69. package/src/templates/scaffold-generate.yaml +41 -0
  70. package/src/templates/scaffold-plan.yaml +53 -0
  71. package/src/types/status.ts +3 -0
  72. package/src/ui/dashboard.tsx +4 -3
  73. package/src/utils/assets.macro.ts +36 -0
  74. package/src/utils/auth-manager.ts +585 -8
  75. package/src/utils/blueprint-utils.test.ts +49 -0
  76. package/src/utils/blueprint-utils.ts +80 -0
  77. package/src/utils/circuit-breaker.test.ts +177 -0
  78. package/src/utils/circuit-breaker.ts +160 -0
  79. package/src/utils/config-loader.test.ts +100 -13
  80. package/src/utils/config-loader.ts +44 -17
  81. package/src/utils/constants.ts +62 -0
  82. package/src/utils/error-renderer.test.ts +267 -0
  83. package/src/utils/error-renderer.ts +320 -0
  84. package/src/utils/json-parser.test.ts +4 -0
  85. package/src/utils/json-parser.ts +18 -1
  86. package/src/utils/mermaid.ts +4 -0
  87. package/src/utils/paths.test.ts +46 -0
  88. package/src/utils/paths.ts +70 -0
  89. package/src/utils/process-sandbox.test.ts +128 -0
  90. package/src/utils/process-sandbox.ts +293 -0
  91. package/src/utils/rate-limiter.test.ts +143 -0
  92. package/src/utils/rate-limiter.ts +221 -0
  93. package/src/utils/redactor.test.ts +23 -15
  94. package/src/utils/redactor.ts +65 -25
  95. package/src/utils/resource-loader.test.ts +54 -0
  96. package/src/utils/resource-loader.ts +158 -0
  97. package/src/utils/sandbox.test.ts +69 -4
  98. package/src/utils/sandbox.ts +69 -6
  99. package/src/utils/schema-validator.ts +65 -0
  100. package/src/utils/workflow-registry.test.ts +57 -0
  101. package/src/utils/workflow-registry.ts +45 -25
  102. /package/src/expression/{evaluator.audit.test.ts → evaluator-audit.test.ts} +0 -0
  103. /package/src/runner/{mcp-client.audit.test.ts → mcp-client-audit.test.ts} +0 -0
@@ -47,6 +47,21 @@ export class DebugRepl {
47
47
  rl.prompt();
48
48
 
49
49
  return new Promise((resolve) => {
50
+ let resolved = false;
51
+ const resolveOnce = (action: DebugAction) => {
52
+ if (resolved) return;
53
+ resolved = true;
54
+ resolve(action);
55
+ };
56
+
57
+ rl.on('close', () => {
58
+ resolveOnce({ type: 'continue_failure' });
59
+ });
60
+
61
+ rl.on('SIGINT', () => {
62
+ rl.close();
63
+ });
64
+
50
65
  rl.on('line', (line) => {
51
66
  const trimmed = line.trim();
52
67
  const [cmd, ...args] = trimmed.split(' ');
@@ -59,19 +74,19 @@ export class DebugRepl {
59
74
  break;
60
75
 
61
76
  case 'retry':
77
+ resolveOnce({ type: 'retry', modifiedStep: this.step });
62
78
  rl.close();
63
- resolve({ type: 'retry', modifiedStep: this.step });
64
79
  break;
65
80
 
66
81
  case 'skip':
82
+ resolveOnce({ type: 'skip' });
67
83
  rl.close();
68
- resolve({ type: 'skip' });
69
84
  break;
70
85
 
71
86
  case 'exit':
72
87
  case 'quit':
88
+ resolveOnce({ type: 'continue_failure' });
73
89
  rl.close();
74
- resolve({ type: 'continue_failure' });
75
90
  break;
76
91
 
77
92
  case 'edit': {
@@ -0,0 +1,200 @@
1
+ import { afterAll, beforeAll, describe, expect, it, mock } from 'bun:test';
2
+ import { randomUUID } from 'node:crypto';
3
+ import { WorkflowDb } from '../db/workflow-db';
4
+ import type { Workflow } from '../parser/schema';
5
+ import { StepStatus, WorkflowStatus } from '../types/status';
6
+ import { WorkflowSuspendedError, WorkflowWaitingError } from './step-executor';
7
+ import { WorkflowRunner } from './workflow-runner';
8
+
9
+ describe('Durable Timers Integration', () => {
10
+ const dbPath = 'test-timers.db';
11
+ let db: WorkflowDb;
12
+
13
+ beforeAll(() => {
14
+ db = new WorkflowDb(dbPath);
15
+ });
16
+
17
+ afterAll(() => {
18
+ db.close();
19
+ try {
20
+ const { rmSync } = require('node:fs');
21
+ rmSync(dbPath);
22
+ } catch {}
23
+ });
24
+
25
+ const sleepWorkflow: Workflow = {
26
+ name: 'sleep-test',
27
+ steps: [
28
+ {
29
+ id: 'wait',
30
+ type: 'sleep',
31
+ duration: 120000, // 2 minutes
32
+ durable: true,
33
+ needs: [],
34
+ },
35
+ ],
36
+ };
37
+
38
+ const humanWorkflow: Workflow = {
39
+ name: 'human-test',
40
+ steps: [
41
+ {
42
+ id: 'approve',
43
+ type: 'human',
44
+ message: 'Approve?',
45
+ needs: [],
46
+ },
47
+ ],
48
+ };
49
+
50
+ it('should suspend a durable sleep step and create a timer', async () => {
51
+ const runner = new WorkflowRunner(sleepWorkflow, { dbPath });
52
+ const runId = runner.runId;
53
+
54
+ try {
55
+ await runner.run();
56
+ } catch (error) {
57
+ if (!(error instanceof WorkflowWaitingError)) {
58
+ throw error;
59
+ }
60
+ expect(error.stepId).toBe('wait');
61
+ }
62
+
63
+ const run = await db.getRun(runId);
64
+ expect(run?.status).toBe(WorkflowStatus.PAUSED);
65
+
66
+ const steps = await db.getStepsByRun(runId);
67
+ expect(steps[0].status).toBe(StepStatus.WAITING);
68
+
69
+ const timer = await db.getTimerByStep(runId, 'wait');
70
+ expect(timer).toBeDefined();
71
+ expect(timer?.timer_type).toBe('sleep');
72
+ expect(timer?.wake_at).not.toBeNull();
73
+
74
+ if (!timer?.wake_at) {
75
+ throw new Error('Expected timer wake_at to be set');
76
+ }
77
+ const wakeAt = new Date(timer.wake_at);
78
+ expect(wakeAt.getTime()).toBeGreaterThan(Date.now());
79
+ });
80
+
81
+ it('should persist human waits without scheduling', async () => {
82
+ const originalIsTTY = process.stdin.isTTY;
83
+ process.stdin.isTTY = false; // Ensure human step suspends instead of waiting for input
84
+
85
+ const runner = new WorkflowRunner(humanWorkflow, { dbPath });
86
+ const runId = runner.runId;
87
+
88
+ try {
89
+ await runner.run();
90
+ } catch (error) {
91
+ if (!(error instanceof WorkflowSuspendedError)) {
92
+ throw error;
93
+ }
94
+ expect(error.stepId).toBe('approve');
95
+ } finally {
96
+ process.stdin.isTTY = originalIsTTY;
97
+ }
98
+
99
+ const run = await db.getRun(runId);
100
+ expect(run?.status).toBe(WorkflowStatus.PAUSED);
101
+
102
+ const steps = await db.getStepsByRun(runId);
103
+ expect(steps[0].status).toBe(StepStatus.SUSPENDED);
104
+
105
+ const timer = await db.getTimerByStep(runId, 'approve');
106
+ expect(timer).toBeDefined();
107
+ expect(timer?.timer_type).toBe('human');
108
+ expect(timer?.wake_at).toBeNull();
109
+
110
+ const pending = await db.getPendingTimers();
111
+ expect(pending.find((t) => t.step_id === 'approve')).toBeUndefined();
112
+ });
113
+
114
+ it('should resume a waiting run if the timer has NOT elapsed', async () => {
115
+ const runner = new WorkflowRunner(sleepWorkflow, { dbPath });
116
+ const runId = runner.runId;
117
+
118
+ // Start it once to get it waiting
119
+ try {
120
+ await runner.run();
121
+ } catch {}
122
+
123
+ // Now try to resume with a new runner instance
124
+ const resumeRunner = new WorkflowRunner(sleepWorkflow, {
125
+ dbPath,
126
+ resumeRunId: runId,
127
+ });
128
+
129
+ try {
130
+ await resumeRunner.run();
131
+ } catch (error) {
132
+ if (!(error instanceof WorkflowWaitingError)) {
133
+ throw error;
134
+ }
135
+ expect(error.stepId).toBe('wait');
136
+ }
137
+
138
+ const steps = await db.getStepsByRun(runId);
139
+ expect(steps[0].status).toBe(StepStatus.WAITING);
140
+ });
141
+
142
+ it('should NOT create duplicate timers on resume', async () => {
143
+ const runner = new WorkflowRunner(sleepWorkflow, { dbPath });
144
+ const runId = runner.runId;
145
+ try {
146
+ await runner.run();
147
+ } catch {}
148
+
149
+ const timersBefore = await db.listTimers(runId);
150
+ expect(timersBefore).toHaveLength(1);
151
+
152
+ const resumeRunner = new WorkflowRunner(sleepWorkflow, { dbPath, resumeRunId: runId });
153
+ try {
154
+ await resumeRunner.run();
155
+ } catch {}
156
+
157
+ const timersAfter = await db.listTimers(runId);
158
+ // After fix, it should NOT create a new timer if one is already pending
159
+ expect(timersAfter).toHaveLength(1);
160
+ });
161
+
162
+ it('should resume and COMPLETE a waiting run if the timer has elapsed', async () => {
163
+ const runner = new WorkflowRunner(sleepWorkflow, { dbPath });
164
+ const runId = runner.runId;
165
+
166
+ try {
167
+ await runner.run();
168
+ } catch {}
169
+
170
+ const timer = await db.getTimerByStep(runId, 'wait');
171
+ expect(timer).toBeDefined();
172
+ if (!timer) {
173
+ throw new Error('Expected timer to be created');
174
+ }
175
+
176
+ // Manually backdate the timer in the DB to simulate elapsed time
177
+ const pastDate = new Date(Date.now() - 1000).toISOString();
178
+ const { Database } = require('bun:sqlite');
179
+ const sqlite = new Database(dbPath);
180
+ sqlite.prepare('UPDATE durable_timers SET wake_at = ? WHERE id = ?').run(pastDate, timer.id);
181
+ sqlite.close();
182
+
183
+ const resumeRunner = new WorkflowRunner(sleepWorkflow, {
184
+ dbPath,
185
+ resumeRunId: runId,
186
+ });
187
+
188
+ const outputs = await resumeRunner.run();
189
+ expect(outputs).toBeDefined();
190
+
191
+ const run = await db.getRun(runId);
192
+ expect(run?.status).toBe(WorkflowStatus.SUCCESS);
193
+
194
+ const steps = await db.getStepsByRun(runId);
195
+ expect(steps[0].status).toBe(StepStatus.SUCCESS);
196
+
197
+ const finalTimer = await db.getTimer(timer.id);
198
+ expect(finalTimer?.completed_at).not.toBeNull();
199
+ });
200
+ });
@@ -0,0 +1,464 @@
1
+ import { afterEach, describe, expect, it, mock, spyOn } from 'bun:test';
2
+ import { mkdirSync, rmSync } from 'node:fs';
3
+ import { tmpdir } from 'node:os';
4
+ import { join } from 'node:path';
5
+ import type { EngineStep } from '../parser/schema';
6
+ import { executeEngineStep } from './engine-executor';
7
+
8
+ // Helper to create a minimal valid EngineStep for testing
9
+ const createStep = (overrides: Partial<EngineStep>): EngineStep =>
10
+ ({
11
+ id: 'test',
12
+ type: 'engine',
13
+ command: 'echo',
14
+ args: [],
15
+ cwd: '/tmp',
16
+ env: { PATH: '/usr/bin' },
17
+ needs: [],
18
+ ...overrides,
19
+ }) as EngineStep;
20
+
21
+ describe('engine-executor', () => {
22
+ const tempDir = join(tmpdir(), `engine-test-${Date.now()}`);
23
+
24
+ afterEach(() => {
25
+ try {
26
+ rmSync(tempDir, { recursive: true, force: true });
27
+ } catch {
28
+ // Ignore cleanup errors
29
+ }
30
+ });
31
+
32
+ describe('executeEngineStep', () => {
33
+ it('should reject if aborted before execution', async () => {
34
+ const controller = new AbortController();
35
+ controller.abort();
36
+
37
+ const step = createStep({
38
+ cwd: '.',
39
+ env: { PATH: '/usr/bin' },
40
+ });
41
+
42
+ await expect(
43
+ executeEngineStep(
44
+ step,
45
+ { inputs: {}, secrets: {}, env: {}, steps: {} },
46
+ {
47
+ abortSignal: controller.signal,
48
+ }
49
+ )
50
+ ).rejects.toThrow('Step canceled');
51
+ });
52
+
53
+ it('should reject if cwd is not provided', async () => {
54
+ const step = createStep({
55
+ cwd: '',
56
+ env: { PATH: '/usr/bin' },
57
+ });
58
+
59
+ await expect(
60
+ executeEngineStep(step, { inputs: {}, secrets: {}, env: {}, steps: {} })
61
+ ).rejects.toThrow('requires an explicit cwd');
62
+ });
63
+
64
+ it('should reject if env is not provided', async () => {
65
+ const step = createStep({
66
+ cwd: '/tmp',
67
+ env: undefined as unknown as Record<string, string>,
68
+ });
69
+
70
+ await expect(
71
+ executeEngineStep(step, { inputs: {}, secrets: {}, env: {}, steps: {} })
72
+ ).rejects.toThrow('requires an explicit env');
73
+ });
74
+
75
+ it('should reject if PATH is not in env for non-absolute command', async () => {
76
+ const step = createStep({
77
+ cwd: '/tmp',
78
+ env: { HOME: '/home' },
79
+ });
80
+
81
+ await expect(
82
+ executeEngineStep(step, { inputs: {}, secrets: {}, env: {}, steps: {} })
83
+ ).rejects.toThrow('requires env.PATH');
84
+ });
85
+
86
+ it('should reject if command is denied', async () => {
87
+ const ConfigLoader = await import('../utils/config-loader');
88
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
89
+ engines: {
90
+ denylist: ['rm', 'dd'],
91
+ allowlist: {},
92
+ },
93
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
94
+
95
+ try {
96
+ const step = createStep({
97
+ command: 'rm',
98
+ args: ['-rf', '/'],
99
+ cwd: '/tmp',
100
+ env: { PATH: '/usr/bin' },
101
+ });
102
+
103
+ await expect(
104
+ executeEngineStep(step, { inputs: {}, secrets: {}, env: {}, steps: {} })
105
+ ).rejects.toThrow('denied by engines.denylist');
106
+ } finally {
107
+ loadSpy.mockRestore();
108
+ }
109
+ });
110
+
111
+ it('should reject if command is not in allowlist', async () => {
112
+ const ConfigLoader = await import('../utils/config-loader');
113
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
114
+ engines: {
115
+ denylist: [],
116
+ allowlist: {
117
+ python: { command: 'python3', version: '3.11', versionArgs: [], args: [] },
118
+ },
119
+ },
120
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
121
+
122
+ try {
123
+ const step = createStep({
124
+ command: 'node',
125
+ cwd: '/tmp',
126
+ env: { PATH: '/usr/bin' },
127
+ });
128
+
129
+ await expect(
130
+ executeEngineStep(step, { inputs: {}, secrets: {}, env: {}, steps: {} })
131
+ ).rejects.toThrow('not in the allowlist');
132
+ } finally {
133
+ loadSpy.mockRestore();
134
+ }
135
+ });
136
+
137
+ it('should match allowlist by basename', async () => {
138
+ const ConfigLoader = await import('../utils/config-loader');
139
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
140
+ engines: {
141
+ denylist: [],
142
+ allowlist: {
143
+ echo: { command: 'echo', version: '', versionArgs: [], args: [] },
144
+ },
145
+ },
146
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
147
+
148
+ try {
149
+ mkdirSync(tempDir, { recursive: true });
150
+ const step = createStep({
151
+ command: '/bin/echo',
152
+ args: ['hello'],
153
+ cwd: tempDir,
154
+ env: { PATH: '/bin:/usr/bin' },
155
+ });
156
+
157
+ const result = await executeEngineStep(
158
+ step,
159
+ { inputs: {}, secrets: {}, env: {}, steps: {} },
160
+ { artifactRoot: tempDir }
161
+ );
162
+ expect(result.exitCode).toBe(0);
163
+ expect(result.stdout).toContain('hello');
164
+ } finally {
165
+ loadSpy.mockRestore();
166
+ }
167
+ });
168
+
169
+ it('should reject on version mismatch', async () => {
170
+ const ConfigLoader = await import('../utils/config-loader');
171
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
172
+ engines: {
173
+ denylist: [],
174
+ allowlist: {
175
+ echo: { command: 'echo', version: '999.0.0', versionArgs: [], args: [] },
176
+ },
177
+ },
178
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
179
+
180
+ try {
181
+ mkdirSync(tempDir, { recursive: true });
182
+ const step = createStep({
183
+ command: '/bin/echo',
184
+ args: ['hello'],
185
+ cwd: tempDir,
186
+ env: { PATH: '/bin:/usr/bin' },
187
+ });
188
+
189
+ await expect(
190
+ executeEngineStep(
191
+ step,
192
+ { inputs: {}, secrets: {}, env: {}, steps: {} },
193
+ { artifactRoot: tempDir }
194
+ )
195
+ ).rejects.toThrow('version mismatch');
196
+ } finally {
197
+ loadSpy.mockRestore();
198
+ }
199
+ });
200
+
201
+ it('should parse JSON summary from stdout', async () => {
202
+ const ConfigLoader = await import('../utils/config-loader');
203
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
204
+ engines: {
205
+ denylist: [],
206
+ allowlist: {
207
+ echo: { command: 'echo', version: '', versionArgs: [], args: [] },
208
+ },
209
+ },
210
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
211
+
212
+ try {
213
+ mkdirSync(tempDir, { recursive: true });
214
+ const step = createStep({
215
+ command: '/bin/echo',
216
+ args: ['{"result": "success"}'],
217
+ cwd: tempDir,
218
+ env: { PATH: '/bin:/usr/bin' },
219
+ });
220
+
221
+ const result = await executeEngineStep(
222
+ step,
223
+ { inputs: {}, secrets: {}, env: {}, steps: {} },
224
+ { artifactRoot: tempDir }
225
+ );
226
+
227
+ expect(result.summary).toEqual({ result: 'success' });
228
+ expect(result.summarySource).toBe('stdout');
229
+ expect(result.summaryFormat).toBe('json');
230
+ } finally {
231
+ loadSpy.mockRestore();
232
+ }
233
+ });
234
+
235
+ it('should parse YAML summary from stdout', async () => {
236
+ const ConfigLoader = await import('../utils/config-loader');
237
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
238
+ engines: {
239
+ denylist: [],
240
+ allowlist: {
241
+ sh: { command: 'sh', version: '', versionArgs: [], args: [] },
242
+ },
243
+ },
244
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
245
+
246
+ try {
247
+ mkdirSync(tempDir, { recursive: true });
248
+ const step = createStep({
249
+ command: '/bin/sh',
250
+ args: ['-c', 'echo "result: success"; echo "count: 42"'],
251
+ cwd: tempDir,
252
+ env: { PATH: '/bin:/usr/bin' },
253
+ });
254
+
255
+ const result = await executeEngineStep(
256
+ step,
257
+ { inputs: {}, secrets: {}, env: {}, steps: {} },
258
+ { artifactRoot: tempDir }
259
+ );
260
+
261
+ expect(result.summary).toEqual({ result: 'success', count: 42 });
262
+ expect(result.summaryFormat).toBe('yaml');
263
+ } finally {
264
+ loadSpy.mockRestore();
265
+ }
266
+ });
267
+
268
+ it('should handle summary file over stdout', async () => {
269
+ const ConfigLoader = await import('../utils/config-loader');
270
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
271
+ engines: {
272
+ denylist: [],
273
+ allowlist: {
274
+ sh: { command: 'sh', version: '', versionArgs: [], args: [] },
275
+ },
276
+ },
277
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
278
+
279
+ try {
280
+ mkdirSync(tempDir, { recursive: true });
281
+ const step = createStep({
282
+ command: '/bin/sh',
283
+ args: [
284
+ '-c',
285
+ 'echo \'{"from": "file"}\' > $KEYSTONE_ENGINE_SUMMARY_PATH && echo \'{"from": "stdout"}\'',
286
+ ],
287
+ cwd: tempDir,
288
+ env: { PATH: '/bin:/usr/bin' },
289
+ });
290
+
291
+ const result = await executeEngineStep(
292
+ step,
293
+ { inputs: {}, secrets: {}, env: {}, steps: {} },
294
+ { artifactRoot: tempDir }
295
+ );
296
+
297
+ expect(result.summary).toEqual({ from: 'file' });
298
+ expect(result.summarySource).toBe('file');
299
+ } finally {
300
+ loadSpy.mockRestore();
301
+ }
302
+ });
303
+
304
+ it('should handle invalid summary gracefully', async () => {
305
+ const ConfigLoader = await import('../utils/config-loader');
306
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
307
+ engines: {
308
+ denylist: [],
309
+ allowlist: {
310
+ echo: { command: 'echo', version: '', versionArgs: [], args: [] },
311
+ },
312
+ },
313
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
314
+
315
+ try {
316
+ mkdirSync(tempDir, { recursive: true });
317
+ const step = createStep({
318
+ command: '/bin/echo',
319
+ args: ['not valid json or yaml :: %%'],
320
+ cwd: tempDir,
321
+ env: { PATH: '/bin:/usr/bin' },
322
+ });
323
+
324
+ const result = await executeEngineStep(
325
+ step,
326
+ { inputs: {}, secrets: {}, env: {}, steps: {} },
327
+ { artifactRoot: tempDir }
328
+ );
329
+
330
+ expect(result.summary).toBeNull();
331
+ expect(result.summaryError).toBeDefined();
332
+ } finally {
333
+ loadSpy.mockRestore();
334
+ }
335
+ });
336
+
337
+ it('should apply redactForStorage to summary', async () => {
338
+ const ConfigLoader = await import('../utils/config-loader');
339
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
340
+ engines: {
341
+ denylist: [],
342
+ allowlist: {
343
+ echo: { command: 'echo', version: '', versionArgs: [], args: [] },
344
+ },
345
+ },
346
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
347
+
348
+ try {
349
+ mkdirSync(tempDir, { recursive: true });
350
+ const step = createStep({
351
+ command: '/bin/echo',
352
+ args: ['{"secret": "password123"}'],
353
+ cwd: tempDir,
354
+ env: { PATH: '/bin:/usr/bin' },
355
+ });
356
+
357
+ const redactForStorage = mock((value: unknown) => {
358
+ if (typeof value === 'object' && value !== null) {
359
+ return { ...(value as object), secret: '[REDACTED]' };
360
+ }
361
+ return value;
362
+ });
363
+
364
+ await executeEngineStep(
365
+ step,
366
+ { inputs: {}, secrets: {}, env: {}, steps: {} },
367
+ { artifactRoot: tempDir, redactForStorage }
368
+ );
369
+
370
+ expect(redactForStorage).toHaveBeenCalled();
371
+ } finally {
372
+ loadSpy.mockRestore();
373
+ }
374
+ });
375
+
376
+ it('should evaluate expressions in command args', async () => {
377
+ const ConfigLoader = await import('../utils/config-loader');
378
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
379
+ engines: {
380
+ denylist: [],
381
+ allowlist: {
382
+ echo: { command: 'echo', version: '', versionArgs: [], args: [] },
383
+ },
384
+ },
385
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
386
+
387
+ try {
388
+ mkdirSync(tempDir, { recursive: true });
389
+ const step = createStep({
390
+ command: '/bin/echo',
391
+ args: ['${{ inputs.message }}'],
392
+ cwd: tempDir,
393
+ env: { PATH: '/bin:/usr/bin' },
394
+ });
395
+
396
+ const result = await executeEngineStep(
397
+ step,
398
+ { inputs: { message: 'Hello, World!' }, secrets: {}, env: {}, steps: {} },
399
+ { artifactRoot: tempDir }
400
+ );
401
+
402
+ expect(result.stdout).toContain('Hello, World!');
403
+ } finally {
404
+ loadSpy.mockRestore();
405
+ }
406
+ });
407
+
408
+ it('should use custom versionArgs from allowlist', async () => {
409
+ const ConfigLoader = await import('../utils/config-loader');
410
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
411
+ engines: {
412
+ denylist: [],
413
+ allowlist: {
414
+ echo: { command: 'echo', version: 'test', versionArgs: ['test'], args: [] },
415
+ },
416
+ },
417
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
418
+
419
+ try {
420
+ mkdirSync(tempDir, { recursive: true });
421
+ const step = createStep({
422
+ command: '/bin/echo',
423
+ args: ['hello'],
424
+ cwd: tempDir,
425
+ env: { PATH: '/bin:/usr/bin' },
426
+ });
427
+
428
+ const result = await executeEngineStep(
429
+ step,
430
+ { inputs: {}, secrets: {}, env: {}, steps: {} },
431
+ { artifactRoot: tempDir }
432
+ );
433
+
434
+ expect(result.exitCode).toBe(0);
435
+ } finally {
436
+ loadSpy.mockRestore();
437
+ }
438
+ });
439
+
440
+ it('should handle wildcard patterns in denylist', async () => {
441
+ const ConfigLoader = await import('../utils/config-loader');
442
+ const loadSpy = spyOn(ConfigLoader.ConfigLoader, 'load').mockReturnValue({
443
+ engines: {
444
+ denylist: ['rm*', '*/rm'],
445
+ allowlist: {},
446
+ },
447
+ } as ReturnType<typeof ConfigLoader.ConfigLoader.load>);
448
+
449
+ try {
450
+ const step = createStep({
451
+ command: 'rmdir',
452
+ cwd: '/tmp',
453
+ env: { PATH: '/usr/bin' },
454
+ });
455
+
456
+ await expect(
457
+ executeEngineStep(step, { inputs: {}, secrets: {}, env: {}, steps: {} })
458
+ ).rejects.toThrow('denied by engines.denylist');
459
+ } finally {
460
+ loadSpy.mockRestore();
461
+ }
462
+ });
463
+ });
464
+ });