npm - keystone-cli - Versions diffs - 0.3.2 → 0.4.0 - Mend

keystone-cli 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +18 -1
package/package.json +1 -1
package/src/db/workflow-db.ts +26 -7
package/src/expression/evaluator.ts +1 -0
package/src/parser/agent-parser.test.ts +8 -5
package/src/parser/schema.ts +8 -2
package/src/runner/audit-verification.test.ts +106 -0
package/src/runner/llm-adapter.ts +196 -4
package/src/runner/llm-clarification.test.ts +182 -0
package/src/runner/llm-executor.ts +118 -26
package/src/runner/mcp-manager.ts +4 -1
package/src/runner/mcp-server.test.ts +115 -1
package/src/runner/mcp-server.ts +161 -4
package/src/runner/shell-executor.ts +1 -1
package/src/runner/step-executor.test.ts +33 -10
package/src/runner/step-executor.ts +110 -14
package/src/runner/workflow-runner.test.ts +132 -0
package/src/runner/workflow-runner.ts +118 -23
package/src/templates/agents/keystone-architect.md +13 -6
package/src/ui/dashboard.tsx +32 -4
package/src/utils/auth-manager.test.ts +31 -0
package/src/utils/auth-manager.ts +21 -5
package/src/utils/json-parser.test.ts +35 -0
package/src/utils/json-parser.ts +95 -0
package/src/utils/mermaid.ts +12 -0
package/src/utils/sandbox.test.ts +12 -4
package/src/utils/sandbox.ts +69 -49

package/src/runner/workflow-runner.ts CHANGED Viewed

@@ -25,7 +25,7 @@ class RedactingLogger implements Logger {
   constructor(
     private inner: Logger,
     private redactor: Redactor
-  ) { }
+  ) {}
   log(msg: string): void {
     this.inner.log(this.redactor.redact(msg));
@@ -48,12 +48,20 @@ export interface RunOptions {
   mcpManager?: MCPManager;
   preventExit?: boolean; // Defaults to false
   workflowDir?: string;
+  resumeInputs?: Record<string, unknown>;
+  dryRun?: boolean;
 }
 export interface StepContext {
   output?: unknown;
   outputs?: Record<string, unknown>;
   status: 'success' | 'failed' | 'skipped' | 'pending' | 'suspended';
+  error?: string;
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
 }
 // Type for foreach results - wraps array to ensure JSON serialization preserves all properties
@@ -97,7 +105,7 @@ export class WorkflowRunner {
       // Resume existing run
       this.runId = options.resumeRunId;
       this.resumeRunId = options.resumeRunId;
-      this.inputs = {}; // Will be loaded from DB in restoreState
+      this.inputs = options.resumeInputs || {}; // Start with resume inputs, will be merged with DB inputs in restoreState
     } else {
       // Start new run
       this.inputs = options.inputs || {};
@@ -131,8 +139,10 @@ export class WorkflowRunner {
     }
     // Restore inputs from the previous run to ensure consistency
+    // Merge with any resumeInputs provided (e.g. answers to human steps)
     try {
-      this.inputs = JSON.parse(run.inputs);
+      const storedInputs = JSON.parse(run.inputs);
+      this.inputs = { ...storedInputs, ...this.inputs };
     } catch (error) {
       throw new Error(
         `Failed to parse inputs from run: ${error instanceof Error ? error.message : String(error)}`
@@ -224,23 +234,33 @@ export class WorkflowRunner {
           items.length === expectedCount &&
           !Array.from({ length: expectedCount }).some((_, i) => !items[i]);
+        // Determine overall status based on iterations
+        let status: StepContext['status'] = 'success';
+        if (allSuccess && hasAllItems) {
+          status = 'success';
+        } else if (items.some((item) => item?.status === 'suspended')) {
+          status = 'suspended';
+        } else {
+          status = 'failed';
+        }
         // Always restore what we have to allow partial expression evaluation
         const mappedOutputs = this.aggregateOutputs(outputs);
         this.stepContexts.set(stepId, {
           output: outputs,
           outputs: mappedOutputs,
-          status: allSuccess && hasAllItems ? 'success' : 'failed',
+          status,
           items,
         } as ForeachStepContext);
         // Only mark as fully completed if all iterations completed successfully AND we have all items
-        if (allSuccess && hasAllItems) {
+        if (status === 'success') {
           completedStepIds.add(stepId);
         }
       } else {
         // Single execution step
         const exec = stepExecutions[0];
-        if (exec.status === 'success' || exec.status === 'skipped') {
+        if (exec.status === 'success' || exec.status === 'skipped' || exec.status === 'suspended') {
           const output = exec.output ? JSON.parse(exec.output) : null;
           this.stepContexts.set(stepId, {
             output,
@@ -248,9 +268,11 @@ export class WorkflowRunner {
               typeof output === 'object' && output !== null && !Array.isArray(output)
                 ? (output as Record<string, unknown>)
                 : {},
-            status: exec.status as 'success' | 'skipped',
+            status: exec.status as StepContext['status'],
           });
-          completedStepIds.add(stepId);
+          if (exec.status !== 'suspended') {
+            completedStepIds.add(stepId);
+          }
         }
       }
     }
@@ -274,7 +296,7 @@ export class WorkflowRunner {
         );
         this.logger.log('✓ Run status updated to failed');
       } catch (error) {
-        this.logger.error('Error during cleanup:', error);
+        this.logger.error(`Error during cleanup: ${error}`);
       }
       // Only exit if not embedded
@@ -424,7 +446,7 @@ export class WorkflowRunner {
           output: ctx.output,
           outputs: ctx.outputs,
           status: ctx.status,
-          items: ctx.items, // Allows ${{ steps.id.items[0] }} or ${{ steps.id.items.every(...) }}
+          items: ctx.items,
         };
       } else {
         stepsContext[stepId] = {
@@ -442,6 +464,9 @@ export class WorkflowRunner {
       item,
       index,
       env: this.workflow.env,
+      output: item
+        ? undefined
+        : this.stepContexts.get(this.workflow.steps.find((s) => !s.foreach)?.id || '')?.output,
     };
   }
@@ -487,7 +512,8 @@ export class WorkflowRunner {
         this.logger,
         this.executeSubWorkflow.bind(this),
         this.mcpManager,
-        this.options.workflowDir
+        this.options.workflowDir,
+        this.options.dryRun
       );
       if (result.status === 'failed') {
         throw new Error(result.error || 'Step failed');
@@ -509,11 +535,23 @@ export class WorkflowRunner {
       });
       if (result.status === 'suspended') {
-        await this.db.completeStep(stepExecId, 'pending', null, 'Waiting for human input');
+        await this.db.completeStep(
+          stepExecId,
+          'suspended',
+          result.output,
+          'Waiting for interaction',
+          result.usage
+        );
         return result;
       }
-      await this.db.completeStep(stepExecId, result.status, result.output, result.error);
+      await this.db.completeStep(
+        stepExecId,
+        result.status,
+        result.output,
+        result.error,
+        result.usage
+      );
       // Ensure outputs is always an object for consistent access
       let outputs: Record<string, unknown>;
@@ -533,6 +571,7 @@ export class WorkflowRunner {
         output: result.output,
         outputs,
         status: result.status,
+        usage: result.usage,
       };
     } catch (error) {
       const errorMsg = error instanceof Error ? error.message : String(error);
@@ -666,17 +705,40 @@ export class WorkflowRunner {
         // 3. ${{ steps.id.items.every(s => s.status == 'success') }} -> works via items array
         const outputs = itemResults.map((r) => r.output);
         const allSuccess = itemResults.every((r) => r.status === 'success');
+        const anySuspended = itemResults.some((r) => r.status === 'suspended');
+        // Aggregate usage from all items
+        const aggregatedUsage = itemResults.reduce(
+          (acc, r) => {
+            if (r.usage) {
+              acc.prompt_tokens += r.usage.prompt_tokens;
+              acc.completion_tokens += r.usage.completion_tokens;
+              acc.total_tokens += r.usage.total_tokens;
+            }
+            return acc;
+          },
+          { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
+        );
         // Map child properties for easier access
         // If outputs are [{ id: 1 }, { id: 2 }], then outputs.id = [1, 2]
         const mappedOutputs = this.aggregateOutputs(outputs);
+        // Determine final status
+        let finalStatus: StepContext['status'] = 'failed';
+        if (allSuccess) {
+          finalStatus = 'success';
+        } else if (anySuspended) {
+          finalStatus = 'suspended';
+        }
         // Use proper object structure that serializes correctly
         const aggregatedContext: ForeachStepContext = {
           output: outputs,
           outputs: mappedOutputs,
-          status: allSuccess ? 'success' : 'failed',
+          status: finalStatus,
           items: itemResults,
+          usage: aggregatedUsage,
         };
         this.stepContexts.set(step.id, aggregatedContext);
@@ -684,15 +746,29 @@ export class WorkflowRunner {
         // Update parent step record with aggregated status
         await this.db.completeStep(
           parentStepExecId,
-          allSuccess ? 'success' : 'failed',
+          finalStatus,
           aggregatedContext,
-          allSuccess ? undefined : 'One or more iterations failed'
+          finalStatus === 'failed' ? 'One or more iterations failed' : undefined
         );
-        if (!allSuccess) {
+        if (finalStatus === 'suspended') {
+          // If any iteration suspended, the whole step is suspended
+          // We assume for now that only human steps can suspend, and we'll use the first one's input type
+          const suspendedItem = itemResults.find((r) => r.status === 'suspended');
+          throw new WorkflowSuspendedError(
+            suspendedItem?.error || 'Iteration suspended',
+            step.id,
+            'text'
+          );
+        }
+        if (finalStatus === 'failed') {
           throw new Error(`Step ${step.id} failed: one or more iterations failed`);
         }
       } catch (error) {
+        if (error instanceof WorkflowSuspendedError) {
+          throw error;
+        }
         // Mark parent step as failed
         const errorMsg = error instanceof Error ? error.message : String(error);
         await this.db.completeStep(parentStepExecId, 'failed', null, errorMsg);
@@ -709,7 +785,7 @@ export class WorkflowRunner {
       this.stepContexts.set(step.id, result);
       if (result.status === 'suspended') {
-        const inputType = step.type === 'human' ? step.inputType : 'confirm';
+        const inputType = step.type === 'human' ? step.inputType : 'text';
         throw new WorkflowSuspendedError(result.error || 'Workflow suspended', step.id, inputType);
       }
@@ -779,13 +855,13 @@ export class WorkflowRunner {
       await this.restoreState();
     }
-    const isResume = this.stepContexts.size > 0;
+    const isResume = !!this.resumeRunId || this.stepContexts.size > 0;
     this.logger.log(`\n🏛️  ${isResume ? 'Resuming' : 'Running'} workflow: ${this.workflow.name}`);
     this.logger.log(`Run ID: ${this.runId}`);
     this.logger.log(
       '\n⚠️  Security Warning: Only run workflows from trusted sources.\n' +
-      '   Workflows can execute arbitrary shell commands and access your environment.\n'
+        '   Workflows can execute arbitrary shell commands and access your environment.\n'
     );
     // Apply defaults and validate inputs
@@ -803,7 +879,13 @@ export class WorkflowRunner {
       const stepMap = new Map(this.workflow.steps.map((s) => [s.id, s]));
       // Initialize completedSteps with already completed steps (for resume)
-      const completedSteps = new Set<string>(this.stepContexts.keys());
+      // Only include steps that were successful or skipped, so failed steps are retried
+      const completedSteps = new Set<string>();
+      for (const [id, ctx] of this.stepContexts.entries()) {
+        if (ctx.status === 'success' || ctx.status === 'skipped') {
+          completedSteps.add(id);
+        }
+      }
       // Filter out already completed steps from execution order
       const remainingSteps = executionOrder.filter((stepId) => !completedSteps.has(stepId));
@@ -826,7 +908,20 @@ export class WorkflowRunner {
       const totalSteps = executionOrder.length;
       const stepIndices = new Map(executionOrder.map((id, index) => [id, index + 1]));
-      // Execute steps in parallel where possible (respecting dependencies)
+      // Evaluate global concurrency limit
+      let globalConcurrencyLimit = remainingSteps.length;
+      if (this.workflow.concurrency !== undefined) {
+        const baseContext = this.buildContext();
+        if (typeof this.workflow.concurrency === 'string') {
+          globalConcurrencyLimit = Number(
+            ExpressionEvaluator.evaluate(this.workflow.concurrency, baseContext)
+          );
+        } else {
+          globalConcurrencyLimit = this.workflow.concurrency;
+        }
+      }
+      // Execute steps in parallel where possible (respecting dependencies and global concurrency)
       const pendingSteps = new Set(remainingSteps);
       const runningPromises = new Map<string, Promise<void>>();
@@ -840,7 +935,7 @@ export class WorkflowRunner {
             }
             const dependenciesMet = step.needs.every((dep: string) => completedSteps.has(dep));
-            if (dependenciesMet) {
+            if (dependenciesMet && runningPromises.size < globalConcurrencyLimit) {
               pendingSteps.delete(stepId);
               // Start execution

package/src/templates/agents/keystone-architect.md CHANGED Viewed

@@ -11,26 +11,31 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
 ## Workflow Schema (.yaml)
 - **name**: Unique identifier for the workflow.
+- **description**: (Optional) Description of the workflow.
 - **inputs**: Map of `{ type: string, default: any, description: string }` under the `inputs` key.
 - **outputs**: Map of expressions (e.g., `${{ steps.id.output }}`) under the `outputs` key.
+- **env**: (Optional) Map of workflow-level environment variables.
+- **concurrency**: (Optional) Global concurrency limit for the workflow (number or expression).
 - **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
   - **shell**: `{ id, type: 'shell', run, dir, env, transform }`
-  - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, mcpServers }`
+  - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, mcpServers }`
   - **workflow**: `{ id, type: 'workflow', path, inputs }`
   - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
   - **request**: `{ id, type: 'request', url, method, body, headers }`
   - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
-  - **sleep**: `{ id, type: 'sleep', duration }`
-  - **script**: `{ id, type: 'script', run }` (Executes JS in a secure sandbox)
-- **Common Step Fields**: `needs` (array of IDs), `if` (expression), `retry`, `foreach`, `concurrency`, `transform`.
+  - **sleep**: `{ id, type: 'sleep', duration }` (duration can be a number or expression string)
+  - **script**: `{ id, type: 'script', run, allowInsecure }` (Executes JS in a secure sandbox; set allowInsecure to true to allow fallback to insecure VM)
+- **Common Step Fields**: `needs` (array of IDs), `if` (expression), `timeout` (ms), `retry`, `foreach`, `concurrency`, `transform`.
 - **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
 - **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
 ## Agent Schema (.md)
 Markdown files with YAML frontmatter:
 - **name**: Agent name.
+- **description**: (Optional) Agent description.
+- **provider**: (Optional) Provider name.
 - **model**: (Optional) e.g., `gpt-4o`, `claude-sonnet-4.5`.
-- **tools**: Array of `{ name, parameters, execution }` where `execution` is a standard Step object.
+- **tools**: Array of `{ name, description, parameters, execution }` where `execution` is a standard Step object and `parameters` is a JSON Schema.
 - **Body**: The Markdown body is the `systemPrompt`.
 ## Expression Syntax
@@ -43,9 +48,11 @@ Markdown files with YAML frontmatter:
 # Guidelines
 - **User Interaction**: Use `human` steps when user input or approval is needed.
 - **Error Handling**: Use `retry` for flaky operations and `finally` for cleanup (e.g., removing temp files).
+- **Timeouts**: Set `timeout` on steps that might hang or take too long.
 - **Custom Logic**: Use `script` steps for data manipulation that is too complex for expressions.
 - **Agent Collaboration**: Create specialized agents for complex sub-tasks and coordinate them via `llm` steps.
-- **Discovery**: Use `mcpServers` in `llm` steps when the agent needs access to external tools or systems.
+- **Clarification**: Enable `allowClarification` in `llm` steps if the agent should be able to ask the user for missing info.
+- **Discovery**: Use `mcpServers` in `llm` steps when the agent needs access to external tools or systems. `mcpServers` can be a list of server names or configuration objects `{ name, command, args, env }`.
 # Output Instructions
 When asked to design a feature:

package/src/ui/dashboard.tsx CHANGED Viewed

@@ -7,6 +7,7 @@ interface Run {
   workflow_name: string;
   status: string;
   started_at: string;
+  total_tokens?: number;
 }
 const Dashboard = () => {
@@ -16,8 +17,27 @@ const Dashboard = () => {
   const fetchData = useCallback(() => {
     const db = new WorkflowDb();
     try {
-      const recentRuns = db.listRuns(10);
-      setRuns(recentRuns);
+      const recentRuns = db.listRuns(10) as (Run & { outputs: string | null })[];
+      const runsWithUsage = recentRuns.map((run) => {
+        let total_tokens = 0;
+        try {
+          // Get steps to aggregate tokens if not in outputs (future-proofing)
+          const steps = db.getStepsByRun(run.id);
+          total_tokens = steps.reduce((sum, s) => {
+            if (s.usage) {
+              try {
+                const u = JSON.parse(s.usage);
+                return sum + (u.total_tokens || 0);
+              } catch (e) {
+                return sum;
+              }
+            }
+            return sum;
+          }, 0);
+        } catch (e) {}
+        return { ...run, total_tokens };
+      });
+      setRuns(runsWithUsage);
     } catch (error) {
       console.error('Failed to fetch runs:', error);
     } finally {
@@ -71,11 +91,16 @@ const Dashboard = () => {
               STATUS
             </Text>
           </Box>
-          <Box>
+          <Box width={15}>
             <Text bold color="cyan">
               STARTED
             </Text>
           </Box>
+          <Box>
+            <Text bold color="cyan">
+              TOKENS
+            </Text>
+          </Box>
         </Box>
         <Box marginBottom={1}>
@@ -100,8 +125,11 @@ const Dashboard = () => {
                   {getStatusIcon(run.status)} {run.status.toUpperCase()}
                 </Text>
               </Box>
+              <Box width={15}>
+                <Text color="gray">{new Date(run.started_at).toLocaleTimeString()}</Text>
+              </Box>
               <Box>
-                <Text color="gray">{new Date(run.started_at).toLocaleString()}</Text>
+                <Text color="yellow">{run.total_tokens || 0}</Text>
               </Box>
             </Box>
           ))

package/src/utils/auth-manager.test.ts CHANGED Viewed

@@ -234,5 +234,36 @@ describe('AuthManager', () => {
         'The device code has expired'
       );
     });
+    it('pollGitHubDeviceLogin should timeout after 15 minutes', async () => {
+      // Mock fetch to always return authorization_pending
+      // @ts-ignore
+      global.fetch = mock(() =>
+        Promise.resolve(
+          new Response(
+            JSON.stringify({
+              error: 'authorization_pending',
+            }),
+            { status: 200 }
+          )
+        )
+      );
+      // Mock Date.now to simulate time passing
+      let now = Date.now();
+      const dateSpy = spyOn(Date, 'now').mockImplementation(() => {
+        const current = now;
+        now += 1000 * 60 * 16; // Advance 16 minutes on each call to trigger timeout immediately
+        return current;
+      });
+      try {
+        await expect(AuthManager.pollGitHubDeviceLogin('dev_code')).rejects.toThrow(
+          'Device login timed out'
+        );
+      } finally {
+        dateSpy.mockRestore();
+      }
+    });
   });
 });

package/src/utils/auth-manager.ts CHANGED Viewed

@@ -88,7 +88,12 @@ export class AuthManager {
     }>;
   }
-  static async pollGitHubDeviceLogin(deviceCode: string): Promise<string> {
+  static async pollGitHubDeviceLogin(
+    deviceCode: string,
+    intervalSeconds = 5,
+    expiresInSeconds = 900
+  ): Promise<string> {
+    let currentInterval = intervalSeconds;
     const poll = async (): Promise<string> => {
       const response = await fetch('https://github.com/login/oauth/access_token', {
         method: 'POST',
@@ -121,16 +126,27 @@ export class AuthManager {
         return ''; // Continue polling
       }
+      if (data.error === 'slow_down') {
+        // According to GitHub docs, "slow_down" means wait 5 seconds more
+        currentInterval += 5;
+        return '';
+      }
       throw new Error(data.error_description || data.error || 'Failed to get access token');
     };
-    // Poll every 5 seconds (GitHub's default interval is usually 5)
-    // In a real implementation, we should use the interval from initGitHubDeviceLogin
-    while (true) {
+    // Use interval and expiration from parameters
+    const startTime = Date.now();
+    const timeout = expiresInSeconds * 1000;
+    while (Date.now() - startTime < timeout) {
       const token = await poll();
       if (token) return token;
-      await new Promise((resolve) => setTimeout(resolve, 5000));
+      // Convert seconds to milliseconds
+      await new Promise((resolve) => setTimeout(resolve, currentInterval * 1000));
     }
+    throw new Error('Device login timed out');
   }
   static async getCopilotToken(): Promise<string | undefined> {

package/src/utils/json-parser.test.ts ADDED Viewed

@@ -0,0 +1,35 @@
+import { describe, expect, it } from 'bun:test';
+import { extractJson } from './json-parser';
+describe('json-parser', () => {
+  it('should extract JSON from markdown code blocks', () => {
+    const text = 'Here is the data:\n```json\n{"foo": "bar"}\n```\nHope that helps!';
+    expect(extractJson(text)).toEqual({ foo: 'bar' });
+  });
+  it('should extract JSON without markdown wrappers', () => {
+    const text = 'The result is {"key": "value"} and it works.';
+    expect(extractJson(text)).toEqual({ key: 'value' });
+  });
+  it('should handle nested structures with balanced braces', () => {
+    const text =
+      'Conversational preamble... {"outer": {"inner": [1, 2, 3]}, "active": true} conversational postscript.';
+    expect(extractJson(text)).toEqual({ outer: { inner: [1, 2, 3] }, active: true });
+  });
+  it('should handle strings with escaped braces', () => {
+    const text = 'Data: {"msg": "found a } brace", "id": 1}';
+    expect(extractJson(text)).toEqual({ msg: 'found a } brace', id: 1 });
+  });
+  it('should handle array root objects', () => {
+    const text = 'List: [{"id": 1}, {"id": 2}]';
+    expect(extractJson(text)).toEqual([{ id: 1 }, { id: 2 }]);
+  });
+  it('should throw if no JSON is found', () => {
+    const text = 'Hello world, no JSON here!';
+    expect(() => extractJson(text)).toThrow(/Failed to extract valid JSON/);
+  });
+});

package/src/utils/json-parser.ts ADDED Viewed

@@ -0,0 +1,95 @@
+/**
+ * Robustly extract JSON from a string that may contain other text or Markdown blocks.
+ */
+export function extractJson(text: string): unknown {
+  if (!text) return null;
+  // 1. Try to extract from Markdown code blocks first
+  const markdownRegex = /```(?:json)?\s*([\s\S]*?)\s*```/gi;
+  const blocks: string[] = [];
+  let match = markdownRegex.exec(text);
+  while (match !== null) {
+    blocks.push(match[1].trim());
+    match = markdownRegex.exec(text);
+  }
+  if (blocks.length > 0) {
+    // If there are multiple blocks, try to parse them. Use the first one that is valid JSON.
+    for (const block of blocks) {
+      try {
+        return JSON.parse(block);
+      } catch (e) {
+        // Continue to next block
+      }
+    }
+  }
+  // 2. Fallback: Find the first occurrence of { or [ and try to find its balanced closing counterpart
+  const firstBrace = text.indexOf('{');
+  const firstBracket = text.indexOf('[');
+  // Start from whichever comes first
+  let startIndex = -1;
+  if (firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket)) {
+    startIndex = firstBrace;
+  } else if (firstBracket !== -1) {
+    startIndex = firstBracket;
+  }
+  if (startIndex !== -1) {
+    const stopper = text[startIndex] === '{' ? '}' : ']';
+    const opener = text[startIndex];
+    // Simple balanced brace matching
+    let depth = 0;
+    let inString = false;
+    let escaped = false;
+    for (let i = startIndex; i < text.length; i++) {
+      const char = text[i];
+      if (escaped) {
+        escaped = false;
+        continue;
+      }
+      if (char === '\\') {
+        escaped = true;
+        continue;
+      }
+      if (char === '"') {
+        inString = !inString;
+        continue;
+      }
+      if (!inString) {
+        if (char === opener) {
+          depth++;
+        } else if (char === stopper) {
+          depth--;
+          if (depth === 0) {
+            const potentialJson = text.substring(startIndex, i + 1);
+            try {
+              return JSON.parse(potentialJson);
+            } catch (e) {
+              // Not valid JSON, keep looking for another matching brace if possible?
+              // Actually, if it's not valid yet, it might be a sub-brace.
+              // But we are tracking depth, so if we hit 0 and it's invalid, it's likely just bad text.
+            }
+          }
+        }
+      }
+    }
+  }
+  // 3. Last ditch effort: Try parsing the whole thing as is (after trimming)
+  try {
+    return JSON.parse(text.trim());
+  } catch (e) {
+    throw new Error(
+      `Failed to extract valid JSON from LLM response. Content: ${text.substring(0, 100)}...`
+    );
+  }
+}

package/src/utils/mermaid.ts CHANGED Viewed

@@ -29,6 +29,15 @@ export function generateMermaidGraph(workflow: Workflow): string {
       case 'shell':
         style = ':::shell';
         break;
+      case 'file':
+        style = ':::file';
+        break;
+      case 'request':
+        style = ':::request';
+        break;
+      case 'workflow':
+        style = ':::workflow';
+        break;
       default:
         style = ':::default';
     }
@@ -54,6 +63,9 @@ export function generateMermaidGraph(workflow: Workflow): string {
     '  classDef human fill:#fff3e0,stroke:#e65100,stroke-width:2px,stroke-dasharray: 5 5;'
   );
   lines.push('  classDef shell fill:#f3e5f5,stroke:#4a148c,stroke-width:1px;');
+  lines.push('  classDef file fill:#e8f5e9,stroke:#2e7d32,stroke-width:1px;');
+  lines.push('  classDef request fill:#fffde7,stroke:#fbc02d,stroke-width:1px;');
+  lines.push('  classDef workflow fill:#fce4ec,stroke:#c2185b,stroke-width:2px;');
   lines.push('  classDef default fill:#fff,stroke:#333,stroke-width:1px;');
   return lines.join('\n');