npm - keystone-cli - Versions diffs - 0.6.0 → 0.7.0 - Mend

keystone-cli 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/README.md +34 -0
package/package.json +1 -1
package/src/cli.ts +233 -21
package/src/db/memory-db.ts +6 -0
package/src/db/sqlite-setup.test.ts +47 -0
package/src/db/workflow-db.ts +6 -0
package/src/expression/evaluator.ts +2 -0
package/src/parser/schema.ts +3 -0
package/src/runner/debug-repl.test.ts +240 -6
package/src/runner/llm-adapter.test.ts +10 -4
package/src/runner/llm-executor.ts +39 -3
package/src/runner/shell-executor.ts +40 -12
package/src/runner/standard-tools-integration.test.ts +147 -0
package/src/runner/standard-tools.test.ts +69 -0
package/src/runner/standard-tools.ts +270 -0
package/src/runner/step-executor.test.ts +194 -1
package/src/runner/step-executor.ts +46 -15
package/src/runner/stream-utils.test.ts +113 -7
package/src/runner/stream-utils.ts +4 -4
package/src/runner/workflow-runner.ts +14 -20
package/src/templates/agents/keystone-architect.md +16 -2
package/src/templates/agents/software-engineer.md +17 -0
package/src/templates/memory-service.yaml +54 -0
package/src/templates/robust-automation.yaml +44 -0
package/src/templates/scaffold-feature.yaml +1 -0

package/src/runner/stream-utils.test.ts CHANGED Viewed

@@ -4,16 +4,24 @@ import { processOpenAIStream } from './stream-utils';
 const encoder = new TextEncoder();
 function responseFromChunks(chunks: string[]): Response {
-  const stream = new ReadableStream({
-    start(controller) {
-      for (const chunk of chunks) {
-        controller.enqueue(encoder.encode(chunk));
+  let index = 0;
+  const reader = {
+    async read(): Promise<{ done: boolean; value?: Uint8Array }> {
+      if (index >= chunks.length) {
+        return { done: true, value: undefined };
       }
-      controller.close();
+      const value = encoder.encode(chunks[index]);
+      index += 1;
+      return { done: false, value };
     },
-  });
+    async cancel(): Promise<void> {},
+  };
-  return new Response(stream);
+  return {
+    body: {
+      getReader: () => reader,
+    },
+  } as Response;
 }
 describe('processOpenAIStream', () => {
@@ -61,5 +69,103 @@ describe('processOpenAIStream', () => {
     expect(result.message.content).toBe('ok');
     expect(logger.warn).toHaveBeenCalledTimes(1);
+    expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
+  });
+  it('throws error when buffer size is exceeded', async () => {
+    const response = responseFromChunks(['a'.repeat(1024 * 1024 + 1)]);
+    await expect(processOpenAIStream(response)).rejects.toThrow(
+      'LLM stream line exceed maximum size'
+    );
+  });
+  it('throws error when response size limit is exceeded', async () => {
+    const response = responseFromChunks([
+      `data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
+      `data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}\n`,
+    ]);
+    await expect(processOpenAIStream(response)).rejects.toThrow(
+      'LLM response exceeds maximum size'
+    );
+  });
+  it('throws error when tool call arguments size limit is exceeded', async () => {
+    const response = responseFromChunks([
+      `data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(600 * 1024)}"}}]}}]}\n`,
+      `data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"${'a'.repeat(500 * 1024)}"}}]}}]}\n`,
+    ]);
+    await expect(processOpenAIStream(response)).rejects.toThrow(
+      'LLM tool call arguments exceed maximum size'
+    );
+  });
+  it('handles and logs generic errors during chunk processing', async () => {
+    const logger = {
+      log: mock(() => {}),
+      error: mock(() => {}),
+      warn: mock(() => {}),
+      info: mock(() => {}),
+    };
+    // Mocking JSON.parse to throw a non-SyntaxError
+    const originalParse = JSON.parse;
+    JSON.parse = (str: string) => {
+      if (str === '{"trigger_error":true}') throw new Error('Generic error');
+      return originalParse(str);
+    };
+    try {
+      const response = responseFromChunks(['data: {"trigger_error":true}\n']);
+      await processOpenAIStream(response, { logger });
+      expect(logger.warn).toHaveBeenCalledTimes(1);
+      expect(logger.warn.mock.calls[0][0]).toContain(
+        'Error processing chunk: Error: Generic error'
+      );
+    } finally {
+      JSON.parse = originalParse;
+    }
+  });
+  it('handles errors in the final line processing', async () => {
+    const logger = {
+      log: mock(() => {}),
+      error: mock(() => {}),
+      warn: mock(() => {}),
+      info: mock(() => {}),
+    };
+    const response = responseFromChunks(['data: {bad json}']); // No newline, triggers buffer processing
+    await processOpenAIStream(response, { logger });
+    expect(logger.warn).toHaveBeenCalledTimes(1);
+    expect(logger.warn.mock.calls[0][0]).toContain('Malformed JSON line');
+  });
+  it('throws size limit error in final line processing', async () => {
+    const response = responseFromChunks([
+      `data: {"choices":[{"delta":{"content":"${'a'.repeat(600 * 1024)}"}}]}\n`,
+      `data: {"choices":[{"delta":{"content":"${'a'.repeat(500 * 1024)}"}}]}`,
+    ]);
+    // The first line is ok, the second line is in the final buffer and exceeds size
+    await expect(processOpenAIStream(response)).rejects.toThrow(
+      'LLM response exceeds maximum size'
+    );
+  });
+  it('bubbles up reader cancel errors', async () => {
+    const reader = {
+      read: async () => {
+        throw new Error('Read error');
+      },
+      cancel: async () => {
+        throw new Error('Cancel error');
+      },
+    };
+    const response = {
+      body: {
+        getReader: () => reader,
+      },
+    } as unknown as Response;
+    await expect(processOpenAIStream(response)).rejects.toThrow('Read error');
   });
 });

package/src/runner/stream-utils.ts CHANGED Viewed

@@ -67,7 +67,7 @@ export async function processOpenAIStream(
               const toolCall = tc as ToolCallDelta;
               if (!toolCalls[toolCall.index]) {
                 toolCalls[toolCall.index] = {
-                  id: toolCall.id,
+                  id: toolCall.id || '',
                   type: 'function',
                   function: { name: '', arguments: '' },
                 };
@@ -93,7 +93,7 @@ export async function processOpenAIStream(
           const activeLogger = options?.logger || new ConsoleLogger();
           // Rethrow size limit errors so they bubble up
-          if (String(e).toLowerCase().includes('exceed maximum size')) {
+          if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
             throw e;
           }
@@ -137,7 +137,7 @@ export async function processOpenAIStream(
               const toolCall = tc as ToolCallDelta;
               if (!toolCalls[toolCall.index]) {
                 toolCalls[toolCall.index] = {
-                  id: toolCall.id,
+                  id: toolCall.id || '',
                   type: 'function',
                   function: { name: '', arguments: '' },
                 };
@@ -161,7 +161,7 @@ export async function processOpenAIStream(
           }
         }
       } catch (e) {
-        if (String(e).toLowerCase().includes('exceed maximum size')) {
+        if (e instanceof Error && e.message.toLowerCase().includes('maximum size')) {
           throw e;
         }
         const activeLogger = options?.logger || new ConsoleLogger();

package/src/runner/workflow-runner.ts CHANGED Viewed

@@ -630,16 +630,13 @@ export class WorkflowRunner {
     }
     const operation = async () => {
-      const result = await executeStep(
-        stepToExecute,
-        context,
-        this.logger,
-        this.executeSubWorkflow.bind(this),
-        this.mcpManager,
-        this.memoryDb,
-        this.options.workflowDir,
-        this.options.dryRun
-      );
+      const result = await executeStep(stepToExecute, context, this.logger, {
+        executeWorkflowFn: this.executeSubWorkflow.bind(this),
+        mcpManager: this.mcpManager,
+        memoryDb: this.memoryDb,
+        workflowDir: this.options.workflowDir,
+        dryRun: this.options.dryRun,
+      });
       if (result.status === 'failed') {
         throw new Error(result.error || 'Step failed');
       }
@@ -868,16 +865,13 @@ Do not change the 'id' or 'type' or 'auto_heal' fields.
     // Execute the agent step
     // We use a fresh context but share secrets/env
-    const result = await executeStep(
-      agentStep,
-      context,
-      this.logger,
-      this.executeSubWorkflow.bind(this),
-      this.mcpManager,
-      this.memoryDb,
-      this.options.workflowDir,
-      this.options.dryRun
-    );
+    const result = await executeStep(agentStep, context, this.logger, {
+      executeWorkflowFn: this.executeSubWorkflow.bind(this),
+      mcpManager: this.mcpManager,
+      memoryDb: this.memoryDb,
+      workflowDir: this.options.workflowDir,
+      dryRun: this.options.dryRun,
+    });
     if (result.status !== 'success' || !result.output) {
       throw new Error(`Healer agent failed: ${result.error || 'No output'}`);

package/src/templates/agents/keystone-architect.md CHANGED Viewed

@@ -19,9 +19,9 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
 - **eval**: (Optional) Configuration for prompt optimization `{ scorer: 'llm'|'script', agent, prompt, run }`.
 - **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
   - **shell**: `{ id, type: 'shell', run, dir, env, allowInsecure, transform }` (Set `allowInsecure: true` to bypass risky command checks)
-  - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, mcpServers }`
+  - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, useStandardTools, allowOutsideCwd, allowInsecure, mcpServers }`
   - **workflow**: `{ id, type: 'workflow', path, inputs }`
-  - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
+  - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content, allowOutsideCwd }`
   - **request**: `{ id, type: 'request', url, method, body, headers }`
   - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
   - **sleep**: `{ id, type: 'sleep', duration }` (duration can be a number or expression string)
@@ -31,6 +31,17 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
 - **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
 - **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
+## Standard Tools
+When `useStandardTools: true` is set on an `llm` step, the agent has access to:
+- `read_file(path)`: Read file contents.
+- `read_file_lines(path, start, count)`: Read a specific range of lines.
+- `write_file(path, content)`: Write/overwrite file.
+- `list_files(path)`: List directory contents.
+- `search_files(pattern, dir)`: Search for files by pattern (glob).
+- `search_content(query, pattern, dir)`: Search for text within files.
+- `run_command(command, dir)`: Run shell commands (restricted by `allowInsecure`).
+- **Path Gating**: Restricted to CWD by default. Use `allowOutsideCwd: true` to bypass.
 ## Agent Schema (.md)
 Markdown files with YAML frontmatter:
 - **name**: Agent name.
@@ -45,6 +56,9 @@ Markdown files with YAML frontmatter:
 - `${{ steps.id.output }}`
 - `${{ steps.id.status }}` (e.g., `'pending'`, `'running'`, `'success'`, `'failed'`, `'skipped'`)
 - `${{ args.paramName }}` (used inside agent tools)
+- `${{ item }}` (current item in a `foreach` loop)
+- `${{ secrets.NAME }}` (access redacted secrets)
+- `${{ env.NAME }}` (access environment variables)
 - Standard JS-like expressions: `${{ steps.count > 0 ? 'yes' : 'no' }}`
 # Guidelines

package/src/templates/agents/software-engineer.md ADDED Viewed

@@ -0,0 +1,17 @@
+---
+name: software-engineer
+description: "Expert at writing and debugging code"
+model: gpt-4o
+---
+# Role
+You are a Software Engineer. Your goal is to implement, refactor, and debug code based on user specifications.
+# Guidelines
+- Use `list_files` or `search_files` to understand the project structure.
+- Use `search_content` to find where specific code or dependencies are located.
+- Use `read_file` to examine code, or `read_file_lines` for large files.
+- Use `write_file` to implement new features or fixes.
+- Use `run_command` only when necessary for testing or building (e.g., `npm test`, `bun run build`).
+- Be concise and follow best practices for the language you are writing in.
+- Always verify your changes if possible by running tests.

package/src/templates/memory-service.yaml ADDED Viewed

@@ -0,0 +1,54 @@
+name: memory-service
+description: "Demonstrate long-term memory capabilities"
+steps:
+  # Store information in memory
+  - id: remember_facts
+    type: memory
+    op: store
+    text: "Keystone CLI was initialized on 2025-01-01 by the engineering team."
+    metadata:
+      type: "fact"
+      confidence: 1.0
+  - id: remember_preference
+    type: memory
+    op: store
+    text: "The user prefers TypeScript over JavaScript for all projects."
+    metadata:
+      type: "preference"
+      confidence: 0.9
+    needs: [remember_facts]
+  # Search for information
+  - id: recall_preference
+    type: memory
+    op: search
+    query: "What language does the user like?"
+    limit: 1
+    needs: [remember_preference]
+  # Use recalled information in an LLM step
+  - id: confirm_memory
+    type: llm
+    agent: general
+    needs: [recall_preference]
+    prompt: |
+      Based on this memory:
+      ${{ steps.recall_preference.output[0].content }}
+      What programming language should I use? Answer in one word.
+    schema:
+      type: object
+      properties:
+        language:
+          type: string
+      required: [language]
+  - id: summary
+    type: shell
+    needs: [confirm_memory]
+    run: |
+      echo "Memory Service Demo Complete"
+      echo "Recalled: ${{ steps.recall_preference.output[0].content }}"
+      echo "Decision: ${{ steps.confirm_memory.output.language }}"

package/src/templates/robust-automation.yaml ADDED Viewed

@@ -0,0 +1,44 @@
+name: robust-automation
+description: "Demonstrate auto-healing and reflexion features"
+steps:
+  # Demonstration of auto-healing
+  # This step attempts to run a broken command, but the agent should fix it
+  - id: auto_heal_demo
+    type: shell
+    run: |
+      # This command has a typo and should fail
+      ech "Hello World"
+    auto_heal:
+      agent: software-engineer
+      maxAttempts: 2
+      model: gpt-4o
+  # Demonstration of reflexion (self-correction)
+  # This step asks for JSON but provides a prompt that might lead to text
+  # Reflexion should catch the schema validation error and retry
+  - id: reflexion_demo
+    type: llm
+    agent: general
+    needs: [auto_heal_demo]
+    prompt: |
+      Generate a list of 3 random colors. Just list them.
+    schema:
+      type: object
+      properties:
+        colors:
+          type: array
+          items:
+            type: string
+      required: [colors]
+    reflexion:
+      limit: 3
+      hint: "Ensure the output is valid JSON matching the schema."
+  - id: summary
+    type: shell
+    needs: [reflexion_demo]
+    run: |
+      echo "Robust automation demo complete."
+      echo "Healed Command Output: ${{ steps.auto_heal_demo.output.stdout }}"
+      echo "Reflexion Output: ${{ steps.reflexion_demo.output }}"

package/src/templates/scaffold-feature.yaml CHANGED Viewed

@@ -12,6 +12,7 @@ steps:
     agent: keystone-architect
     needs: [get_requirements]
     allowClarification: true
+    useStandardTools: true
     prompt: |
       The user wants to build the following:
       <user_requirements>