keystone-cli 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +55 -8
  2. package/package.json +8 -17
  3. package/src/cli.ts +219 -166
  4. package/src/db/memory-db.test.ts +54 -0
  5. package/src/db/memory-db.ts +128 -0
  6. package/src/db/sqlite-setup.test.ts +47 -0
  7. package/src/db/sqlite-setup.ts +49 -0
  8. package/src/db/workflow-db.test.ts +41 -10
  9. package/src/db/workflow-db.ts +90 -28
  10. package/src/expression/evaluator.test.ts +19 -0
  11. package/src/expression/evaluator.ts +134 -39
  12. package/src/parser/schema.ts +41 -0
  13. package/src/runner/audit-verification.test.ts +23 -0
  14. package/src/runner/auto-heal.test.ts +64 -0
  15. package/src/runner/debug-repl.test.ts +308 -0
  16. package/src/runner/debug-repl.ts +225 -0
  17. package/src/runner/foreach-executor.ts +327 -0
  18. package/src/runner/llm-adapter.test.ts +37 -18
  19. package/src/runner/llm-adapter.ts +90 -112
  20. package/src/runner/llm-executor.test.ts +47 -6
  21. package/src/runner/llm-executor.ts +18 -3
  22. package/src/runner/mcp-client.audit.test.ts +69 -0
  23. package/src/runner/mcp-client.test.ts +12 -3
  24. package/src/runner/mcp-client.ts +199 -19
  25. package/src/runner/mcp-manager.ts +19 -8
  26. package/src/runner/mcp-server.test.ts +8 -5
  27. package/src/runner/mcp-server.ts +31 -17
  28. package/src/runner/optimization-runner.ts +305 -0
  29. package/src/runner/reflexion.test.ts +87 -0
  30. package/src/runner/shell-executor.test.ts +12 -0
  31. package/src/runner/shell-executor.ts +9 -6
  32. package/src/runner/step-executor.test.ts +240 -2
  33. package/src/runner/step-executor.ts +183 -68
  34. package/src/runner/stream-utils.test.ts +171 -0
  35. package/src/runner/stream-utils.ts +186 -0
  36. package/src/runner/workflow-runner.test.ts +4 -4
  37. package/src/runner/workflow-runner.ts +438 -259
  38. package/src/templates/agents/keystone-architect.md +6 -4
  39. package/src/templates/full-feature-demo.yaml +4 -4
  40. package/src/types/assets.d.ts +14 -0
  41. package/src/types/status.ts +1 -1
  42. package/src/ui/dashboard.tsx +38 -26
  43. package/src/utils/auth-manager.ts +3 -1
  44. package/src/utils/logger.test.ts +76 -0
  45. package/src/utils/logger.ts +39 -0
  46. package/src/utils/prompt.ts +75 -0
  47. package/src/utils/redactor.test.ts +86 -4
  48. package/src/utils/redactor.ts +48 -13
@@ -0,0 +1,171 @@
1
+ import { describe, expect, it, mock } from 'bun:test';
2
+ import { processOpenAIStream } from './stream-utils';
3
+
4
+ const encoder = new TextEncoder();
5
+
6
+ function responseFromChunks(chunks: string[]): Response {
7
+ let index = 0;
8
+ const reader = {
9
+ async read(): Promise<{ done: boolean; value?: Uint8Array }> {
10
+ if (index >= chunks.length) {
11
+ return { done: true, value: undefined };
12
+ }
13
+ const value = encoder.encode(chunks[index]);
14
+ index += 1;
15
+ return { done: false, value };
16
+ },
17
+ async cancel(): Promise<void> {},
18
+ };
19
+
20
+ return {
21
+ body: {
22
+ getReader: () => reader,
23
+ },
24
+ } as Response;
25
+ }
26
+
27
+ describe('processOpenAIStream', () => {
28
+ it('accumulates content and tool calls across chunks', async () => {
29
+ const onStream = mock(() => {});
30
+ const response = responseFromChunks([
31
+ 'data: {"choices":[{"delta":{"content":"hello "}}]}\n',
32
+ 'data: {"choices":[{"delta":{"content":"world","tool_calls":[{"index":0,"id":"call_1","function":{"name":"my_tool","arguments":"{\\"arg\\":"}}]}}]}\n',
33
+ 'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"1}"}}]}}]}\n',
34
+ 'data: [DONE]\n',
35
+ ]);
36
+
37
+ const result = await processOpenAIStream(response, { onStream });
38
+
39
+ expect(result.message.content).toBe('hello world');
40
+ expect(onStream).toHaveBeenCalledTimes(2);
41
+ expect(result.message.tool_calls?.[0]?.function?.name).toBe('my_tool');
42
+ expect(result.message.tool_calls?.[0]?.function?.arguments).toBe('{"arg":1}');
43
+ });
44
+
45
+ it('parses a final line without a newline', async () => {
46
+ const onStream = mock(() => {});
47
+ const response = responseFromChunks(['data: {"choices":[{"delta":{"content":"tail"}}]}']);
48
+
49
+ const result = await processOpenAIStream(response, { onStream });
50
+
51
+ expect(result.message.content).toBe('tail');
52
+ expect(onStream).toHaveBeenCalledTimes(1);
53
+ });
54
+
55
+ it('logs malformed JSON and continues processing', async () => {
56
+ const logger = {
57
+ log: mock(() => {}),
58
+ error: mock(() => {}),
59
+ warn: mock(() => {}),
60
+ info: mock(() => {}),
61
+ };
62
+ const response = responseFromChunks([
63
+ 'data: {bad json}\n',
64
+ 'data: {"choices":[{"delta":{"content":"ok"}}]}\n',
65
+ 'data: [DONE]\n',
66
+ ]);
67
+
68
+ const result = await processOpenAIStream(response, { logger });
69
+
70
+ expect(result.message.content).toBe('ok');
71
+ expect(logger.warn).toHaveBeenCalledTimes(1);
72
+ expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
73
+ });
74
+
75
+ it('throws error when buffer size is exceeded', async () => {
76
+ const response = responseFromChunks(['a'.repeat(1024 * 1024 + 1)]);
77
+ await expect(processOpenAIStream(response)).rejects.toThrow(
78
+ 'LLM stream line exceed maximum size'
79
+ );
80
+ });
81
+
82
+ it('throws error when response size limit is exceeded', async () => {
83
+ const response = responseFromChunks([
84
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
85
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}\n`,
86
+ ]);
87
+ await expect(processOpenAIStream(response)).rejects.toThrow(
88
+ 'LLM response exceeds maximum size'
89
+ );
90
+ });
91
+
92
+ it('throws error when tool call arguments size limit is exceeded', async () => {
93
+ const response = responseFromChunks([
94
+ `data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(600 * 1024)}"}}]}}]}\n`,
95
+ `data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(500 * 1024)}"}}]}}]}\n`,
96
+ ]);
97
+ await expect(processOpenAIStream(response)).rejects.toThrow(
98
+ 'LLM tool call arguments exceed maximum size'
99
+ );
100
+ });
101
+
102
+ it('handles and logs generic errors during chunk processing', async () => {
103
+ const logger = {
104
+ log: mock(() => {}),
105
+ error: mock(() => {}),
106
+ warn: mock(() => {}),
107
+ info: mock(() => {}),
108
+ };
109
+ // Mocking JSON.parse to throw a non-SyntaxError
110
+ const originalParse = JSON.parse;
111
+ JSON.parse = (str: string) => {
112
+ if (str === '{"trigger_error":true}') throw new Error('Generic error');
113
+ return originalParse(str);
114
+ };
115
+
116
+ try {
117
+ const response = responseFromChunks(['data: {"trigger_error":true}\n']);
118
+ await processOpenAIStream(response, { logger });
119
+ expect(logger.warn).toHaveBeenCalledTimes(1);
120
+ expect(logger.warn.mock.calls[0][0]).toContain(
121
+ 'Error processing chunk: Error: Generic error'
122
+ );
123
+ } finally {
124
+ JSON.parse = originalParse;
125
+ }
126
+ });
127
+
128
+ it('handles errors in the final line processing', async () => {
129
+ const logger = {
130
+ log: mock(() => {}),
131
+ error: mock(() => {}),
132
+ warn: mock(() => {}),
133
+ info: mock(() => {}),
134
+ };
135
+ const response = responseFromChunks(['data: {bad json}']); // No newline, triggers buffer processing
136
+
137
+ await processOpenAIStream(response, { logger });
138
+
139
+ expect(logger.warn).toHaveBeenCalledTimes(1);
140
+ expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
141
+ });
142
+
143
+ it('throws size limit error in final line processing', async () => {
144
+ const response = responseFromChunks([
145
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
146
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}`,
147
+ ]);
148
+ // The first line is ok, the second line is in the final buffer and exceeds size
149
+ await expect(processOpenAIStream(response)).rejects.toThrow(
150
+ 'LLM response exceeds maximum size'
151
+ );
152
+ });
153
+
154
+ it('bubbles up reader cancel errors', async () => {
155
+ const reader = {
156
+ read: async () => {
157
+ throw new Error('Read error');
158
+ },
159
+ cancel: async () => {
160
+ throw new Error('Cancel error');
161
+ },
162
+ };
163
+ const response = {
164
+ body: {
165
+ getReader: () => reader,
166
+ },
167
+ } as unknown as Response;
168
+
169
+ await expect(processOpenAIStream(response)).rejects.toThrow('Read error');
170
+ });
171
+ });
@@ -0,0 +1,186 @@
1
+ import { ConsoleLogger, type Logger } from '../utils/logger.ts';
2
+ import type { LLMResponse, LLMToolCall } from './llm-adapter.ts';
3
+
4
+ // Maximum response size to prevent memory exhaustion (1MB)
5
+ const MAX_RESPONSE_SIZE = 1024 * 1024;
6
+ const MAX_BUFFER_SIZE = MAX_RESPONSE_SIZE;
7
+
8
+ type ToolCallDelta = {
9
+ index: number;
10
+ id?: string;
11
+ function?: {
12
+ name?: string;
13
+ arguments?: string;
14
+ };
15
+ };
16
+
17
+ export async function processOpenAIStream(
18
+ response: Response,
19
+ options?: { onStream?: (chunk: string) => void; logger?: Logger },
20
+ streamLabel = 'OpenAI'
21
+ ): Promise<LLMResponse> {
22
+ if (!response.body) throw new Error('Response body is null');
23
+ const reader = response.body.getReader();
24
+ const decoder = new TextDecoder();
25
+ let fullContent = '';
26
+ const toolCalls: LLMToolCall[] = [];
27
+ let buffer = '';
28
+
29
+ try {
30
+ while (true) {
31
+ const { done, value } = await reader.read();
32
+ if (done) break;
33
+
34
+ const chunk = decoder.decode(value, { stream: true });
35
+ buffer += chunk;
36
+ if (buffer.length > MAX_BUFFER_SIZE) {
37
+ throw new Error(`LLM stream line exceed maximum size of ${MAX_BUFFER_SIZE} bytes`);
38
+ }
39
+ const lines = buffer.split('\n');
40
+ // Keep the last partial line in the buffer
41
+ buffer = lines.pop() || '';
42
+
43
+ for (const line of lines) {
44
+ const trimmedLine = line.trim();
45
+ if (trimmedLine === '' || trimmedLine === 'data: [DONE]') continue;
46
+ if (!trimmedLine.startsWith('data: ')) continue;
47
+
48
+ try {
49
+ const data = JSON.parse(trimmedLine.slice(6));
50
+
51
+ // Handle Copilot's occasional 'choices' missing or different structure if needed,
52
+ // but generally they match OpenAI.
53
+ // Some proxies might return null delta.
54
+ const delta = data.choices?.[0]?.delta;
55
+ if (!delta) continue;
56
+
57
+ if (delta.content) {
58
+ if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
59
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
60
+ }
61
+ fullContent += delta.content;
62
+ options?.onStream?.(delta.content);
63
+ }
64
+
65
+ if (delta.tool_calls) {
66
+ for (const tc of delta.tool_calls) {
67
+ const toolCall = tc as ToolCallDelta;
68
+ if (!toolCalls[toolCall.index]) {
69
+ toolCalls[toolCall.index] = {
70
+ id: toolCall.id || '',
71
+ type: 'function',
72
+ function: { name: '', arguments: '' },
73
+ };
74
+ }
75
+ const existing = toolCalls[toolCall.index];
76
+ if (toolCall.function?.name) existing.function.name += toolCall.function.name;
77
+ if (toolCall.function?.arguments) {
78
+ if (
79
+ fullContent.length +
80
+ toolCalls.reduce((acc, t) => acc + (t?.function?.arguments?.length || 0), 0) +
81
+ toolCall.function.arguments.length >
82
+ MAX_RESPONSE_SIZE
83
+ ) {
84
+ throw new Error(
85
+ `LLM tool call arguments exceed maximum size of ${MAX_RESPONSE_SIZE} bytes`
86
+ );
87
+ }
88
+ existing.function.arguments += toolCall.function.arguments;
89
+ }
90
+ }
91
+ }
92
+ } catch (e) {
93
+ const activeLogger = options?.logger || new ConsoleLogger();
94
+
95
+ // Rethrow size limit errors so they bubble up
96
+ if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
97
+ throw e;
98
+ }
99
+
100
+ if (e instanceof SyntaxError) {
101
+ activeLogger.warn(
102
+ `[${streamLabel} Stream] Malformed JSON line: ${line.slice(0, 80)}...`
103
+ );
104
+ } else {
105
+ activeLogger.warn(`[${streamLabel} Stream] Error processing chunk: ${e}`);
106
+ }
107
+ }
108
+ }
109
+ }
110
+ } catch (error) {
111
+ try {
112
+ await reader.cancel();
113
+ } catch {
114
+ // Ignore cancel errors while bubbling up the original issue.
115
+ }
116
+ throw error;
117
+ }
118
+
119
+ // Final check for any remaining data in the buffer (in case of no final newline)
120
+ if (buffer.trim()) {
121
+ const trimmedLine = buffer.trim();
122
+ if (trimmedLine.startsWith('data: ') && trimmedLine !== 'data: [DONE]') {
123
+ try {
124
+ const data = JSON.parse(trimmedLine.slice(6));
125
+ const delta = data.choices?.[0]?.delta;
126
+ if (delta) {
127
+ if (delta.content) {
128
+ if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
129
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
130
+ }
131
+ fullContent += delta.content;
132
+ options?.onStream?.(delta.content);
133
+ }
134
+ if (delta.tool_calls) {
135
+ // Tool calls in the very last chunk are unlikely but possible
136
+ for (const tc of delta.tool_calls) {
137
+ const toolCall = tc as ToolCallDelta;
138
+ if (!toolCalls[toolCall.index]) {
139
+ toolCalls[toolCall.index] = {
140
+ id: toolCall.id || '',
141
+ type: 'function',
142
+ function: { name: '', arguments: '' },
143
+ };
144
+ }
145
+ const existing = toolCalls[toolCall.index];
146
+ if (toolCall.function?.name) existing.function.name += toolCall.function.name;
147
+ if (toolCall.function?.arguments) {
148
+ if (
149
+ fullContent.length +
150
+ toolCalls.reduce((acc, t) => acc + (t?.function?.arguments?.length || 0), 0) +
151
+ toolCall.function.arguments.length >
152
+ MAX_RESPONSE_SIZE
153
+ ) {
154
+ throw new Error(
155
+ `LLM tool call arguments exceed maximum size of ${MAX_RESPONSE_SIZE} bytes`
156
+ );
157
+ }
158
+ existing.function.arguments += toolCall.function.arguments;
159
+ }
160
+ }
161
+ }
162
+ }
163
+ } catch (e) {
164
+ if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
165
+ throw e;
166
+ }
167
+ const activeLogger = options?.logger || new ConsoleLogger();
168
+ if (e instanceof SyntaxError) {
169
+ activeLogger.warn(
170
+ `[${streamLabel} Stream] Malformed JSON line: ${trimmedLine.slice(0, 80)}...`
171
+ );
172
+ } else {
173
+ activeLogger.warn(`[${streamLabel} Stream] Error processing final line: ${e}`);
174
+ }
175
+ }
176
+ }
177
+ }
178
+
179
+ return {
180
+ message: {
181
+ role: 'assistant',
182
+ content: fullContent || null,
183
+ tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
184
+ },
185
+ };
186
+ }
@@ -457,10 +457,10 @@ describe('WorkflowRunner', () => {
457
457
 
458
458
  // Check DB status - parent should be 'paused' and step should be 'suspended'
459
459
  const db = new WorkflowDb(resumeDbPath);
460
- const run = db.getRun(runId);
460
+ const run = await db.getRun(runId);
461
461
  expect(run?.status).toBe('paused');
462
462
 
463
- const steps = db.getStepsByRun(runId);
463
+ const steps = await db.getStepsByRun(runId);
464
464
  const parentStep = steps.find(
465
465
  (s: { step_id: string; iteration_index: number | null }) =>
466
466
  s.step_id === 'process' && s.iteration_index === null
@@ -481,8 +481,8 @@ describe('WorkflowRunner', () => {
481
481
  expect(outputs.results).toEqual(['ok', 'ok']);
482
482
 
483
483
  const finalDb = new WorkflowDb(resumeDbPath);
484
- const finalRun = finalDb.getRun(runId);
485
- expect(finalRun?.status).toBe('completed');
484
+ const finalRun = await finalDb.getRun(runId);
485
+ expect(finalRun?.status).toBe('success');
486
486
  finalDb.close();
487
487
 
488
488
  if (existsSync(resumeDbPath)) rmSync(resumeDbPath);