keystone-cli 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,16 +4,24 @@ import { processOpenAIStream } from './stream-utils';
4
4
  const encoder = new TextEncoder();
5
5
 
6
6
  function responseFromChunks(chunks: string[]): Response {
7
- const stream = new ReadableStream({
8
- start(controller) {
9
- for (const chunk of chunks) {
10
- controller.enqueue(encoder.encode(chunk));
7
+ let index = 0;
8
+ const reader = {
9
+ async read(): Promise<{ done: boolean; value?: Uint8Array }> {
10
+ if (index >= chunks.length) {
11
+ return { done: true, value: undefined };
11
12
  }
12
- controller.close();
13
+ const value = encoder.encode(chunks[index]);
14
+ index += 1;
15
+ return { done: false, value };
13
16
  },
14
- });
17
+ async cancel(): Promise<void> {},
18
+ };
15
19
 
16
- return new Response(stream);
20
+ return {
21
+ body: {
22
+ getReader: () => reader,
23
+ },
24
+ } as Response;
17
25
  }
18
26
 
19
27
  describe('processOpenAIStream', () => {
@@ -61,5 +69,103 @@ describe('processOpenAIStream', () => {
61
69
 
62
70
  expect(result.message.content).toBe('ok');
63
71
  expect(logger.warn).toHaveBeenCalledTimes(1);
72
+ expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
73
+ });
74
+
75
+ it('throws error when buffer size is exceeded', async () => {
76
+ const response = responseFromChunks(['a'.repeat(1024 * 1024 + 1)]);
77
+ await expect(processOpenAIStream(response)).rejects.toThrow(
78
+ 'LLM stream line exceed maximum size'
79
+ );
80
+ });
81
+
82
+ it('throws error when response size limit is exceeded', async () => {
83
+ const response = responseFromChunks([
84
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
85
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}\n`,
86
+ ]);
87
+ await expect(processOpenAIStream(response)).rejects.toThrow(
88
+ 'LLM response exceeds maximum size'
89
+ );
90
+ });
91
+
92
+ it('throws error when tool call arguments size limit is exceeded', async () => {
93
+ const response = responseFromChunks([
94
+ `data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(600 * 1024)}"}}]}}]}\n`,
95
+ `data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(500 * 1024)}"}}]}}]}\n`,
96
+ ]);
97
+ await expect(processOpenAIStream(response)).rejects.toThrow(
98
+ 'LLM tool call arguments exceed maximum size'
99
+ );
100
+ });
101
+
102
+ it('handles and logs generic errors during chunk processing', async () => {
103
+ const logger = {
104
+ log: mock(() => {}),
105
+ error: mock(() => {}),
106
+ warn: mock(() => {}),
107
+ info: mock(() => {}),
108
+ };
109
+ // Mocking JSON.parse to throw a non-SyntaxError
110
+ const originalParse = JSON.parse;
111
+ JSON.parse = (str: string) => {
112
+ if (str === '{"trigger_error":true}') throw new Error('Generic error');
113
+ return originalParse(str);
114
+ };
115
+
116
+ try {
117
+ const response = responseFromChunks(['data: {"trigger_error":true}\n']);
118
+ await processOpenAIStream(response, { logger });
119
+ expect(logger.warn).toHaveBeenCalledTimes(1);
120
+ expect(logger.warn.mock.calls[0][0]).toContain(
121
+ 'Error processing chunk: Error: Generic error'
122
+ );
123
+ } finally {
124
+ JSON.parse = originalParse;
125
+ }
126
+ });
127
+
128
+ it('handles errors in the final line processing', async () => {
129
+ const logger = {
130
+ log: mock(() => {}),
131
+ error: mock(() => {}),
132
+ warn: mock(() => {}),
133
+ info: mock(() => {}),
134
+ };
135
+ const response = responseFromChunks(['data: {bad json}']); // No newline, triggers buffer processing
136
+
137
+ await processOpenAIStream(response, { logger });
138
+
139
+ expect(logger.warn).toHaveBeenCalledTimes(1);
140
+ expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
141
+ });
142
+
143
+ it('throws size limit error in final line processing', async () => {
144
+ const response = responseFromChunks([
145
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
146
+ `data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}`,
147
+ ]);
148
+ // The first line is ok, the second line is in the final buffer and exceeds size
149
+ await expect(processOpenAIStream(response)).rejects.toThrow(
150
+ 'LLM response exceeds maximum size'
151
+ );
152
+ });
153
+
154
+ it('bubbles up reader cancel errors', async () => {
155
+ const reader = {
156
+ read: async () => {
157
+ throw new Error('Read error');
158
+ },
159
+ cancel: async () => {
160
+ throw new Error('Cancel error');
161
+ },
162
+ };
163
+ const response = {
164
+ body: {
165
+ getReader: () => reader,
166
+ },
167
+ } as unknown as Response;
168
+
169
+ await expect(processOpenAIStream(response)).rejects.toThrow('Read error');
64
170
  });
65
171
  });
@@ -67,7 +67,7 @@ export async function processOpenAIStream(
67
67
  const toolCall = tc as ToolCallDelta;
68
68
  if (!toolCalls[toolCall.index]) {
69
69
  toolCalls[toolCall.index] = {
70
- id: toolCall.id,
70
+ id: toolCall.id || '',
71
71
  type: 'function',
72
72
  function: { name: '', arguments: '' },
73
73
  };
@@ -93,7 +93,7 @@ export async function processOpenAIStream(
93
93
  const activeLogger = options?.logger || new ConsoleLogger();
94
94
 
95
95
  // Rethrow size limit errors so they bubble up
96
- if (String(e).toLowerCase().includes('exceed maximum size')) {
96
+ if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
97
97
  throw e;
98
98
  }
99
99
 
@@ -137,7 +137,7 @@ export async function processOpenAIStream(
137
137
  const toolCall = tc as ToolCallDelta;
138
138
  if (!toolCalls[toolCall.index]) {
139
139
  toolCalls[toolCall.index] = {
140
- id: toolCall.id,
140
+ id: toolCall.id || '',
141
141
  type: 'function',
142
142
  function: { name: '', arguments: '' },
143
143
  };
@@ -161,7 +161,7 @@ export async function processOpenAIStream(
161
161
  }
162
162
  }
163
163
  } catch (e) {
164
- if (String(e).toLowerCase().includes('exceed maximum size')) {
164
+ if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
165
165
  throw e;
166
166
  }
167
167
  const activeLogger = options?.logger || new ConsoleLogger();
@@ -630,16 +630,13 @@ export class WorkflowRunner {
630
630
  }
631
631
 
632
632
  const operation = async () => {
633
- const result = await executeStep(
634
- stepToExecute,
635
- context,
636
- this.logger,
637
- this.executeSubWorkflow.bind(this),
638
- this.mcpManager,
639
- this.memoryDb,
640
- this.options.workflowDir,
641
- this.options.dryRun
642
- );
633
+ const result = await executeStep(stepToExecute, context, this.logger, {
634
+ executeWorkflowFn: this.executeSubWorkflow.bind(this),
635
+ mcpManager: this.mcpManager,
636
+ memoryDb: this.memoryDb,
637
+ workflowDir: this.options.workflowDir,
638
+ dryRun: this.options.dryRun,
639
+ });
643
640
  if (result.status === 'failed') {
644
641
  throw new Error(result.error || 'Step failed');
645
642
  }
@@ -868,16 +865,13 @@ Do not change the 'id' or 'type' or 'auto_heal' fields.
868
865
 
869
866
  // Execute the agent step
870
867
  // We use a fresh context but share secrets/env
871
- const result = await executeStep(
872
- agentStep,
873
- context,
874
- this.logger,
875
- this.executeSubWorkflow.bind(this),
876
- this.mcpManager,
877
- this.memoryDb,
878
- this.options.workflowDir,
879
- this.options.dryRun
880
- );
868
+ const result = await executeStep(agentStep, context, this.logger, {
869
+ executeWorkflowFn: this.executeSubWorkflow.bind(this),
870
+ mcpManager: this.mcpManager,
871
+ memoryDb: this.memoryDb,
872
+ workflowDir: this.options.workflowDir,
873
+ dryRun: this.options.dryRun,
874
+ });
881
875
 
882
876
  if (result.status !== 'success' || !result.output) {
883
877
  throw new Error(`Healer agent failed: ${result.error || 'No output'}`);
@@ -19,9 +19,9 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
19
19
  - **eval**: (Optional) Configuration for prompt optimization `{ scorer: 'llm'|'script', agent, prompt, run }`.
20
20
  - **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
21
21
  - **shell**: `{ id, type: 'shell', run, dir, env, allowInsecure, transform }` (Set `allowInsecure: true` to bypass risky command checks)
22
- - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, mcpServers }`
22
+ - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, useStandardTools, allowOutsideCwd, allowInsecure, mcpServers }`
23
23
  - **workflow**: `{ id, type: 'workflow', path, inputs }`
24
- - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
24
+ - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content, allowOutsideCwd }`
25
25
  - **request**: `{ id, type: 'request', url, method, body, headers }`
26
26
  - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
27
27
  - **sleep**: `{ id, type: 'sleep', duration }` (duration can be a number or expression string)
@@ -31,6 +31,17 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
31
31
  - **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
32
32
  - **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
33
33
 
34
+ ## Standard Tools
35
+ When `useStandardTools: true` is set on an `llm` step, the agent has access to:
36
+ - `read_file(path)`: Read file contents.
37
+ - `read_file_lines(path, start, count)`: Read a specific range of lines.
38
+ - `write_file(path, content)`: Write/overwrite file.
39
+ - `list_files(path)`: List directory contents.
40
+ - `search_files(pattern, dir)`: Search for files by pattern (glob).
41
+ - `search_content(query, pattern, dir)`: Search for text within files.
42
+ - `run_command(command, dir)`: Run shell commands (restricted by `allowInsecure`).
43
+ - **Path Gating**: Restricted to CWD by default. Use `allowOutsideCwd: true` to bypass.
44
+
34
45
  ## Agent Schema (.md)
35
46
  Markdown files with YAML frontmatter:
36
47
  - **name**: Agent name.
@@ -45,6 +56,9 @@ Markdown files with YAML frontmatter:
45
56
  - `${{ steps.id.output }}`
46
57
  - `${{ steps.id.status }}` (e.g., `'pending'`, `'running'`, `'success'`, `'failed'`, `'skipped'`)
47
58
  - `${{ args.paramName }}` (used inside agent tools)
59
+ - `${{ item }}` (current item in a `foreach` loop)
60
+ - `${{ secrets.NAME }}` (access redacted secrets)
61
+ - `${{ env.NAME }}` (access environment variables)
48
62
  - Standard JS-like expressions: `${{ steps.count > 0 ? 'yes' : 'no' }}`
49
63
 
50
64
  # Guidelines
@@ -0,0 +1,17 @@
1
+ ---
2
+ name: software-engineer
3
+ description: "Expert at writing and debugging code"
4
+ model: gpt-4o
5
+ ---
6
+
7
+ # Role
8
+ You are a Software Engineer. Your goal is to implement, refactor, and debug code based on user specifications.
9
+
10
+ # Guidelines
11
+ - Use `list_files` or `search_files` to understand the project structure.
12
+ - Use `search_content` to find where specific code or dependencies are located.
13
+ - Use `read_file` to examine code, or `read_file_lines` for large files.
14
+ - Use `write_file` to implement new features or fixes.
15
+ - Use `run_command` only when necessary for testing or building (e.g., `npm test`, `bun run build`).
16
+ - Be concise and follow best practices for the language you are writing in.
17
+ - Always verify your changes if possible by running tests.
@@ -0,0 +1,54 @@
1
+ name: memory-service
2
+ description: "Demonstrate long-term memory capabilities"
3
+
4
+ steps:
5
+ # Store information in memory
6
+ - id: remember_facts
7
+ type: memory
8
+ op: store
9
+ text: "Keystone CLI was initialized on 2025-01-01 by the engineering team."
10
+ metadata:
11
+ type: "fact"
12
+ confidence: 1.0
13
+
14
+ - id: remember_preference
15
+ type: memory
16
+ op: store
17
+ text: "The user prefers TypeScript over JavaScript for all projects."
18
+ metadata:
19
+ type: "preference"
20
+ confidence: 0.9
21
+ needs: [remember_facts]
22
+
23
+ # Search for information
24
+ - id: recall_preference
25
+ type: memory
26
+ op: search
27
+ query: "What language does the user like?"
28
+ limit: 1
29
+ needs: [remember_preference]
30
+
31
+ # Use recalled information in an LLM step
32
+ - id: confirm_memory
33
+ type: llm
34
+ agent: general
35
+ needs: [recall_preference]
36
+ prompt: |
37
+ Based on this memory:
38
+ ${{ steps.recall_preference.output[0].content }}
39
+
40
+ What programming language should I use? Answer in one word.
41
+ schema:
42
+ type: object
43
+ properties:
44
+ language:
45
+ type: string
46
+ required: [language]
47
+
48
+ - id: summary
49
+ type: shell
50
+ needs: [confirm_memory]
51
+ run: |
52
+ echo "Memory Service Demo Complete"
53
+ echo "Recalled: ${{ steps.recall_preference.output[0].content }}"
54
+ echo "Decision: ${{ steps.confirm_memory.output.language }}"
@@ -0,0 +1,44 @@
1
+ name: robust-automation
2
+ description: "Demonstrate auto-healing and reflexion features"
3
+
4
+ steps:
5
+ # Demonstration of auto-healing
6
+ # This step attempts to run a broken command, but the agent should fix it
7
+ - id: auto_heal_demo
8
+ type: shell
9
+ run: |
10
+ # This command has a typo and should fail
11
+ ech "Hello World"
12
+ auto_heal:
13
+ agent: software-engineer
14
+ maxAttempts: 2
15
+ model: gpt-4o
16
+
17
+ # Demonstration of reflexion (self-correction)
18
+ # This step asks for JSON but provides a prompt that might lead to text
19
+ # Reflexion should catch the schema validation error and retry
20
+ - id: reflexion_demo
21
+ type: llm
22
+ agent: general
23
+ needs: [auto_heal_demo]
24
+ prompt: |
25
+ Generate a list of 3 random colors. Just list them.
26
+ schema:
27
+ type: object
28
+ properties:
29
+ colors:
30
+ type: array
31
+ items:
32
+ type: string
33
+ required: [colors]
34
+ reflexion:
35
+ limit: 3
36
+ hint: "Ensure the output is valid JSON matching the schema."
37
+
38
+ - id: summary
39
+ type: shell
40
+ needs: [reflexion_demo]
41
+ run: |
42
+ echo "Robust automation demo complete."
43
+ echo "Healed Command Output: ${{ steps.auto_heal_demo.output.stdout }}"
44
+ echo "Reflexion Output: ${{ steps.reflexion_demo.output }}"
@@ -12,6 +12,7 @@ steps:
12
12
  agent: keystone-architect
13
13
  needs: [get_requirements]
14
14
  allowClarification: true
15
+ useStandardTools: true
15
16
  prompt: |
16
17
  The user wants to build the following:
17
18
  <user_requirements>