keystone-cli 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +55 -8
  2. package/package.json +8 -17
  3. package/src/cli.ts +219 -166
  4. package/src/db/memory-db.test.ts +54 -0
  5. package/src/db/memory-db.ts +128 -0
  6. package/src/db/sqlite-setup.test.ts +47 -0
  7. package/src/db/sqlite-setup.ts +49 -0
  8. package/src/db/workflow-db.test.ts +41 -10
  9. package/src/db/workflow-db.ts +90 -28
  10. package/src/expression/evaluator.test.ts +19 -0
  11. package/src/expression/evaluator.ts +134 -39
  12. package/src/parser/schema.ts +41 -0
  13. package/src/runner/audit-verification.test.ts +23 -0
  14. package/src/runner/auto-heal.test.ts +64 -0
  15. package/src/runner/debug-repl.test.ts +308 -0
  16. package/src/runner/debug-repl.ts +225 -0
  17. package/src/runner/foreach-executor.ts +327 -0
  18. package/src/runner/llm-adapter.test.ts +37 -18
  19. package/src/runner/llm-adapter.ts +90 -112
  20. package/src/runner/llm-executor.test.ts +47 -6
  21. package/src/runner/llm-executor.ts +18 -3
  22. package/src/runner/mcp-client.audit.test.ts +69 -0
  23. package/src/runner/mcp-client.test.ts +12 -3
  24. package/src/runner/mcp-client.ts +199 -19
  25. package/src/runner/mcp-manager.ts +19 -8
  26. package/src/runner/mcp-server.test.ts +8 -5
  27. package/src/runner/mcp-server.ts +31 -17
  28. package/src/runner/optimization-runner.ts +305 -0
  29. package/src/runner/reflexion.test.ts +87 -0
  30. package/src/runner/shell-executor.test.ts +12 -0
  31. package/src/runner/shell-executor.ts +9 -6
  32. package/src/runner/step-executor.test.ts +240 -2
  33. package/src/runner/step-executor.ts +183 -68
  34. package/src/runner/stream-utils.test.ts +171 -0
  35. package/src/runner/stream-utils.ts +186 -0
  36. package/src/runner/workflow-runner.test.ts +4 -4
  37. package/src/runner/workflow-runner.ts +438 -259
  38. package/src/templates/agents/keystone-architect.md +6 -4
  39. package/src/templates/full-feature-demo.yaml +4 -4
  40. package/src/types/assets.d.ts +14 -0
  41. package/src/types/status.ts +1 -1
  42. package/src/ui/dashboard.tsx +38 -26
  43. package/src/utils/auth-manager.ts +3 -1
  44. package/src/utils/logger.test.ts +76 -0
  45. package/src/utils/logger.ts +39 -0
  46. package/src/utils/prompt.ts +75 -0
  47. package/src/utils/redactor.test.ts +86 -4
  48. package/src/utils/redactor.ts +48 -13
@@ -0,0 +1,308 @@
1
+ import { describe, expect, mock, spyOn, test } from 'bun:test';
2
+ import * as cp from 'node:child_process';
3
+ import * as fs from 'node:fs';
4
+ import { PassThrough } from 'node:stream';
5
+ import type { ExpressionContext } from '../expression/evaluator.ts';
6
+ import type { Step } from '../parser/schema.ts';
7
+ import type { Logger } from '../utils/logger.ts';
8
+ import { DebugRepl } from './debug-repl.ts';
9
+
10
+ describe('DebugRepl', () => {
11
+ const mockContext: ExpressionContext = { inputs: { foo: 'bar' } };
12
+ // mock step typing
13
+ const mockStep: Step = { id: 'test-step', type: 'shell', run: 'echo "fail"' } as unknown as Step;
14
+ const mockError = new Error('Test Error');
15
+
16
+ test('should resolve with "skip" when user types "skip"', async () => {
17
+ const input = new PassThrough();
18
+ const output = new PassThrough();
19
+ const mockLogger: Logger = {
20
+ log: mock(() => {}),
21
+ error: mock(() => {}),
22
+ warn: mock(() => {}),
23
+ info: mock(() => {}),
24
+ };
25
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
26
+
27
+ const promise = repl.start();
28
+
29
+ // Wait a tick for prompt
30
+ await new Promise((r) => setTimeout(r, 10));
31
+
32
+ input.write('skip\n');
33
+
34
+ const result = await promise;
35
+ expect(result).toEqual({ type: 'skip' });
36
+ });
37
+
38
+ test('should resolve with "retry" when user types "retry"', async () => {
39
+ const input = new PassThrough();
40
+ const output = new PassThrough();
41
+ const mockLogger: Logger = {
42
+ log: mock(() => {}),
43
+ error: mock(() => {}),
44
+ warn: mock(() => {}),
45
+ info: mock(() => {}),
46
+ };
47
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
48
+
49
+ const promise = repl.start();
50
+
51
+ await new Promise((r) => setTimeout(r, 10));
52
+ input.write('retry\n');
53
+
54
+ const result = await promise;
55
+ expect(result.type).toBe('retry');
56
+ if (result.type === 'retry') {
57
+ expect(result.modifiedStep).toBe(mockStep);
58
+ }
59
+ });
60
+
61
+ test('should resolve with "continue_failure" when user types "exit"', async () => {
62
+ const input = new PassThrough();
63
+ const output = new PassThrough();
64
+ const mockLogger: Logger = {
65
+ log: mock(() => {}),
66
+ error: mock(() => {}),
67
+ warn: mock(() => {}),
68
+ info: mock(() => {}),
69
+ };
70
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
71
+
72
+ const promise = repl.start();
73
+
74
+ await new Promise((r) => setTimeout(r, 10));
75
+ input.write('exit\n');
76
+
77
+ const result = await promise;
78
+ expect(result).toEqual({ type: 'continue_failure' });
79
+ });
80
+
81
+ test('should handle "context" command', async () => {
82
+ const input = new PassThrough();
83
+ const output = new PassThrough();
84
+ const mockLogger: Logger = {
85
+ log: mock(() => {}),
86
+ error: mock(() => {}),
87
+ warn: mock(() => {}),
88
+ info: mock(() => {}),
89
+ };
90
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
91
+
92
+ repl.start();
93
+
94
+ await new Promise((r) => setTimeout(r, 10));
95
+ input.write('context\n');
96
+ await new Promise((r) => setTimeout(r, 10));
97
+
98
+ expect(mockLogger.log).toHaveBeenCalled();
99
+ // biome-ignore lint/suspicious/noExplicitAny: accessing mock property
100
+ const lastCall = (mockLogger.log as unknown as any).mock.calls.find((call: any[]) =>
101
+ String(call[0]).includes('foo')
102
+ );
103
+ expect(lastCall?.[0]).toContain('bar');
104
+ input.write('exit\n');
105
+ });
106
+
107
+ test('should handle "eval" command', async () => {
108
+ const input = new PassThrough();
109
+ const output = new PassThrough();
110
+ const mockLogger: Logger = {
111
+ log: mock(() => {}),
112
+ error: mock(() => {}),
113
+ warn: mock(() => {}),
114
+ info: mock(() => {}),
115
+ };
116
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
117
+
118
+ repl.start();
119
+
120
+ await new Promise((r) => setTimeout(r, 10));
121
+ input.write('eval inputs.foo\n');
122
+ await new Promise((r) => setTimeout(r, 10));
123
+
124
+ expect(mockLogger.log).toHaveBeenCalledWith('bar');
125
+ input.write('exit\n');
126
+ });
127
+
128
+ test('should handle "eval" command with error', async () => {
129
+ const input = new PassThrough();
130
+ const output = new PassThrough();
131
+ const mockLogger: Logger = {
132
+ log: mock(() => {}),
133
+ error: mock(() => {}),
134
+ warn: mock(() => {}),
135
+ info: mock(() => {}),
136
+ };
137
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
138
+
139
+ repl.start();
140
+
141
+ await new Promise((r) => setTimeout(r, 10));
142
+ input.write('eval nonExistent.bar\n');
143
+ await new Promise((r) => setTimeout(r, 10));
144
+
145
+ expect(mockLogger.error).toHaveBeenCalled();
146
+ input.write('exit\n');
147
+ });
148
+
149
+ test('should handle "eval" command without arguments', async () => {
150
+ const input = new PassThrough();
151
+ const output = new PassThrough();
152
+ const mockLogger: Logger = {
153
+ log: mock(() => {}),
154
+ error: mock(() => {}),
155
+ warn: mock(() => {}),
156
+ info: mock(() => {}),
157
+ };
158
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
159
+
160
+ repl.start();
161
+
162
+ await new Promise((r) => setTimeout(r, 10));
163
+ input.write('eval\n');
164
+ await new Promise((r) => setTimeout(r, 10));
165
+
166
+ expect(mockLogger.log).toHaveBeenCalledWith('Usage: eval <expression>');
167
+ input.write('exit\n');
168
+ });
169
+
170
+ test('should handle unknown command', async () => {
171
+ const input = new PassThrough();
172
+ const output = new PassThrough();
173
+ const mockLogger: Logger = {
174
+ log: mock(() => {}),
175
+ error: mock(() => {}),
176
+ warn: mock(() => {}),
177
+ info: mock(() => {}),
178
+ };
179
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
180
+
181
+ repl.start();
182
+
183
+ await new Promise((r) => setTimeout(r, 10));
184
+ input.write('unknown_cmd\n');
185
+ await new Promise((r) => setTimeout(r, 10));
186
+
187
+ expect(mockLogger.log).toHaveBeenCalledWith('Unknown command: unknown_cmd');
188
+ input.write('exit\n');
189
+ });
190
+
191
+ test('should handle empty input', async () => {
192
+ const input = new PassThrough();
193
+ const output = new PassThrough();
194
+ const mockLogger: Logger = {
195
+ log: mock(() => {}),
196
+ error: mock(() => {}),
197
+ warn: mock(() => {}),
198
+ info: mock(() => {}),
199
+ };
200
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
201
+
202
+ repl.start();
203
+
204
+ await new Promise((r) => setTimeout(r, 10));
205
+ input.write('\n');
206
+ await new Promise((r) => setTimeout(r, 10));
207
+
208
+ expect(mockLogger.log).not.toHaveBeenCalledWith('Unknown command: ');
209
+ input.write('exit\n');
210
+ });
211
+
212
+ test('should parse shell commands correctly', () => {
213
+ // We import the function dynamically to test it, or we assume it's exported
214
+ const { parseShellCommand } = require('./debug-repl.ts');
215
+
216
+ expect(parseShellCommand('code')).toEqual(['code']);
217
+ expect(parseShellCommand('code --wait')).toEqual(['code', '--wait']);
218
+ expect(parseShellCommand('code --wait "some file"')).toEqual(['code', '--wait', 'some file']);
219
+ expect(parseShellCommand("vim 'my file'")).toEqual(['vim', 'my file']);
220
+ expect(parseShellCommand('editor -a -b -c')).toEqual(['editor', '-a', '-b', '-c']);
221
+ expect(parseShellCommand(' spaced command ')).toEqual(['spaced', 'command']);
222
+ });
223
+
224
+ test('should handle "edit" command and update step', async () => {
225
+ const input = new PassThrough();
226
+ const output = new PassThrough();
227
+ const mockLogger: Logger = {
228
+ log: mock(() => {}),
229
+ error: mock(() => {}),
230
+ warn: mock(() => {}),
231
+ info: mock(() => {}),
232
+ };
233
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
234
+
235
+ const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
236
+ // biome-ignore lint/suspicious/noExplicitAny: mocking child_process
237
+ () => ({ error: null, status: 0 }) as any
238
+ );
239
+ const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
240
+ const updatedStep = { ...mockStep, run: 'echo "fixed"' };
241
+ const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation((() =>
242
+ JSON.stringify(updatedStep)) as unknown as typeof fs.readFileSync);
243
+ const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
244
+ const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
245
+
246
+ try {
247
+ repl.start();
248
+ await new Promise((r) => setTimeout(r, 50));
249
+ input.write('edit\n');
250
+ await new Promise((r) => setTimeout(r, 50));
251
+
252
+ expect(mockLogger.log).toHaveBeenCalledWith(
253
+ expect.stringContaining('Step definition updated')
254
+ );
255
+
256
+ input.write('retry\n');
257
+ await new Promise((r) => setTimeout(r, 50));
258
+ } finally {
259
+ spySpawnSync.mockRestore();
260
+ spyWriteFileSync.mockRestore();
261
+ spyReadFileSync.mockRestore();
262
+ spyExistsSync.mockRestore();
263
+ spyUnlinkSync.mockRestore();
264
+ }
265
+ });
266
+
267
+ test('should handle "edit" command with parse error', async () => {
268
+ const input = new PassThrough();
269
+ const output = new PassThrough();
270
+ const mockLogger: Logger = {
271
+ log: mock(() => {}),
272
+ error: mock(() => {}),
273
+ warn: mock(() => {}),
274
+ info: mock(() => {}),
275
+ };
276
+ const repl = new DebugRepl(mockContext, mockStep, mockError, mockLogger, input, output);
277
+
278
+ const spySpawnSync = spyOn(cp, 'spawnSync').mockImplementation(
279
+ // biome-ignore lint/suspicious/noExplicitAny: mocking child_process
280
+ () => ({ error: null, status: 0 }) as any
281
+ );
282
+ const spyWriteFileSync = spyOn(fs, 'writeFileSync').mockImplementation(() => {});
283
+ const spyReadFileSync = spyOn(fs, 'readFileSync').mockImplementation(
284
+ (() => 'invalid json') as unknown as typeof fs.readFileSync
285
+ );
286
+ const spyExistsSync = spyOn(fs, 'existsSync').mockImplementation(() => true);
287
+ const spyUnlinkSync = spyOn(fs, 'unlinkSync').mockImplementation(() => {});
288
+
289
+ try {
290
+ repl.start();
291
+ await new Promise((r) => setTimeout(r, 50));
292
+ input.write('edit\n');
293
+ await new Promise((r) => setTimeout(r, 50));
294
+
295
+ expect(mockLogger.error).toHaveBeenCalledWith(
296
+ expect.stringContaining('Failed to parse JSON')
297
+ );
298
+ input.write('exit\n');
299
+ await new Promise((r) => setTimeout(r, 50));
300
+ } finally {
301
+ spySpawnSync.mockRestore();
302
+ spyWriteFileSync.mockRestore();
303
+ spyReadFileSync.mockRestore();
304
+ spyExistsSync.mockRestore();
305
+ spyUnlinkSync.mockRestore();
306
+ }
307
+ });
308
+ });
@@ -0,0 +1,225 @@
1
+ import { spawnSync } from 'node:child_process';
2
+ import * as fs from 'node:fs';
3
+ import * as os from 'node:os';
4
+ import * as path from 'node:path';
5
+ import * as readline from 'node:readline';
6
+ import { stripVTControlCharacters } from 'node:util';
7
+ import { type ExpressionContext, ExpressionEvaluator } from '../expression/evaluator.ts';
8
+ import type { Step } from '../parser/schema.ts';
9
+ import { extractJson } from '../utils/json-parser.ts';
10
+
11
+ import { ConsoleLogger, type Logger } from '../utils/logger.ts';
12
+
13
+ export type DebugAction =
14
+ | { type: 'retry'; modifiedStep?: Step }
15
+ | { type: 'skip' }
16
+ | { type: 'continue_failure' }; // Default behavior (exit debug mode, let it fail)
17
+
18
+ export class DebugRepl {
19
+ constructor(
20
+ private context: ExpressionContext,
21
+ private step: Step,
22
+ private error: unknown,
23
+ private logger: Logger = new ConsoleLogger(),
24
+ private inputStream: NodeJS.ReadableStream = process.stdin,
25
+ private outputStream: NodeJS.WritableStream = process.stdout
26
+ ) {}
27
+
28
+ public async start(): Promise<DebugAction> {
29
+ this.logger.error(`\n❌ Step '${this.step.id}' failed.`);
30
+ this.logger.error(
31
+ ` Error: ${this.error instanceof Error ? this.error.message : String(this.error)}`
32
+ );
33
+ this.logger.log('\nEntering Debug Mode. Available commands:');
34
+ this.logger.log(' > context (view current inputs/outputs involved in this step)');
35
+ this.logger.log(' > retry (re-run step, optionally with edited definition)');
36
+ this.logger.log(' > edit (edit the step definition in your $EDITOR)');
37
+ this.logger.log(' > skip (skip this step and proceed)');
38
+ this.logger.log(' > eval <code> (run JS expression against context)');
39
+ this.logger.log(' > exit (resume failure/exit)');
40
+
41
+ const rl = readline.createInterface({
42
+ input: this.inputStream,
43
+ output: this.outputStream,
44
+ prompt: 'debug> ',
45
+ });
46
+
47
+ rl.prompt();
48
+
49
+ return new Promise((resolve) => {
50
+ rl.on('line', (line) => {
51
+ const trimmed = line.trim();
52
+ const [cmd, ...args] = trimmed.split(' ');
53
+ const argStr = args.join(' ');
54
+
55
+ switch (cmd) {
56
+ case 'context':
57
+ // Show meaningful context context
58
+ this.logger.log(JSON.stringify(this.context, null, 2));
59
+ break;
60
+
61
+ case 'retry':
62
+ rl.close();
63
+ resolve({ type: 'retry', modifiedStep: this.step });
64
+ break;
65
+
66
+ case 'skip':
67
+ rl.close();
68
+ resolve({ type: 'skip' });
69
+ break;
70
+
71
+ case 'exit':
72
+ case 'quit':
73
+ rl.close();
74
+ resolve({ type: 'continue_failure' });
75
+ break;
76
+
77
+ case 'edit': {
78
+ try {
79
+ const newStep = this.editStep(this.step);
80
+ if (newStep) {
81
+ this.step = newStep;
82
+ this.logger.log('✓ Step definition updated in memory. Type "retry" to run it.');
83
+ } else {
84
+ this.logger.log('No changes made.');
85
+ }
86
+ } catch (e) {
87
+ this.logger.error(`Error editing step: ${e}`);
88
+ }
89
+ break;
90
+ }
91
+
92
+ case 'eval':
93
+ try {
94
+ if (!argStr) {
95
+ this.logger.log('Usage: eval <expression>');
96
+ } else {
97
+ const result = ExpressionEvaluator.evaluateExpression(argStr, this.context);
98
+ this.logger.log(String(result));
99
+ }
100
+ } catch (e) {
101
+ this.logger.error(`Eval error: ${e instanceof Error ? e.message : String(e)}`);
102
+ }
103
+ break;
104
+
105
+ case '':
106
+ break;
107
+
108
+ default:
109
+ this.logger.log(`Unknown command: ${cmd}`);
110
+ break;
111
+ }
112
+
113
+ if (cmd !== 'retry' && cmd !== 'skip' && cmd !== 'exit' && cmd !== 'quit') {
114
+ rl.prompt();
115
+ }
116
+ });
117
+ });
118
+ }
119
+
120
+ private editStep(step: Step): Step | null {
121
+ const editorEnv = process.env.EDITOR || 'vim'; // Default to vim if not set
122
+ // Validate editor name to prevent shell injection (allow alphanumeric, dash, underscore, slash, and spaces for args)
123
+ // We strictly block semicolon, pipe, ampersand, backtick, $ to prevent command injection
124
+ const safeEditor = /^[\w./\s-]+$/.test(editorEnv) ? editorEnv : 'vi';
125
+ if (safeEditor !== editorEnv) {
126
+ this.logger.warn(
127
+ `Warning: $EDITOR value "${editorEnv}" contains unsafe characters. Falling back to "vi".`
128
+ );
129
+ }
130
+ // Sanitize step ID to prevent path traversal
131
+ const sanitizedId = step.id.replace(/[^a-zA-Z0-9_-]/g, '_');
132
+ const tempFile = path.join(os.tmpdir(), `keystone-step-${sanitizedId}-${Date.now()}.json`);
133
+
134
+ // Write step to temp file
135
+ fs.writeFileSync(tempFile, JSON.stringify(step, null, 2));
136
+
137
+ // Spawn editor
138
+ try {
139
+ // Parse editor string into command and args (e.g. "code --wait", "subl -w")
140
+ const [editorCmd, ...editorArgs] = parseShellCommand(safeEditor);
141
+
142
+ // Use stdio: 'inherit' to let the editor take over the terminal
143
+ // Note: shell: false for security - prevents injection via $EDITOR
144
+ const result = spawnSync(editorCmd, [...editorArgs, tempFile], {
145
+ stdio: 'inherit',
146
+ });
147
+
148
+ if (result.error) {
149
+ throw result.error;
150
+ }
151
+
152
+ // Read back
153
+ const content = fs.readFileSync(tempFile, 'utf-8');
154
+
155
+ // Parse JSON
156
+ // We use our safe extractor helper or just JSON.parse
157
+ try {
158
+ const newStep = JSON.parse(content);
159
+ // Basic validation: must have id and type
160
+ if (!newStep.id || !newStep.type) {
161
+ this.logger.error('Invalid step definition: missing id or type');
162
+ return null;
163
+ }
164
+ return newStep as Step;
165
+ } catch (e) {
166
+ this.logger.error('Failed to parse JSON from editor. Changes discarded.');
167
+ return null;
168
+ }
169
+ } finally {
170
+ if (fs.existsSync(tempFile)) {
171
+ fs.unlinkSync(tempFile);
172
+ }
173
+ }
174
+ }
175
+ }
176
+
177
+ /**
178
+ * Parses a shell command string into arguments, respecting quotes.
179
+ * Handles single quotes and double quotes.
180
+ * Example: 'code --wait' -> ['code', '--wait']
181
+ * Example: 'my-editor "some arg"' -> ['my-editor', 'some arg']
182
+ */
183
+ export function parseShellCommand(command: string): string[] {
184
+ const args: string[] = [];
185
+ let currentArg = '';
186
+ let inDoubleQuote = false;
187
+ let inSingleQuote = false;
188
+
189
+ for (let i = 0; i < command.length; i++) {
190
+ const char = command[i];
191
+
192
+ if (inDoubleQuote) {
193
+ if (char === '"') {
194
+ inDoubleQuote = false;
195
+ } else {
196
+ currentArg += char;
197
+ }
198
+ } else if (inSingleQuote) {
199
+ if (char === "'") {
200
+ inSingleQuote = false;
201
+ } else {
202
+ currentArg += char;
203
+ }
204
+ } else {
205
+ if (char === '"') {
206
+ inDoubleQuote = true;
207
+ } else if (char === "'") {
208
+ inSingleQuote = true;
209
+ } else if (char === ' ') {
210
+ if (currentArg.length > 0) {
211
+ args.push(currentArg);
212
+ currentArg = '';
213
+ }
214
+ } else {
215
+ currentArg += char;
216
+ }
217
+ }
218
+ }
219
+
220
+ if (currentArg.length > 0) {
221
+ args.push(currentArg);
222
+ }
223
+
224
+ return args;
225
+ }