keystone-cli 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@ class RedactingLogger implements Logger {
25
25
  constructor(
26
26
  private inner: Logger,
27
27
  private redactor: Redactor
28
- ) { }
28
+ ) {}
29
29
 
30
30
  log(msg: string): void {
31
31
  this.inner.log(this.redactor.redact(msg));
@@ -48,12 +48,20 @@ export interface RunOptions {
48
48
  mcpManager?: MCPManager;
49
49
  preventExit?: boolean; // Defaults to false
50
50
  workflowDir?: string;
51
+ resumeInputs?: Record<string, unknown>;
52
+ dryRun?: boolean;
51
53
  }
52
54
 
53
55
  export interface StepContext {
54
56
  output?: unknown;
55
57
  outputs?: Record<string, unknown>;
56
58
  status: 'success' | 'failed' | 'skipped' | 'pending' | 'suspended';
59
+ error?: string;
60
+ usage?: {
61
+ prompt_tokens: number;
62
+ completion_tokens: number;
63
+ total_tokens: number;
64
+ };
57
65
  }
58
66
 
59
67
  // Type for foreach results - wraps array to ensure JSON serialization preserves all properties
@@ -97,7 +105,7 @@ export class WorkflowRunner {
97
105
  // Resume existing run
98
106
  this.runId = options.resumeRunId;
99
107
  this.resumeRunId = options.resumeRunId;
100
- this.inputs = {}; // Will be loaded from DB in restoreState
108
+ this.inputs = options.resumeInputs || {}; // Start with resume inputs, will be merged with DB inputs in restoreState
101
109
  } else {
102
110
  // Start new run
103
111
  this.inputs = options.inputs || {};
@@ -131,8 +139,10 @@ export class WorkflowRunner {
131
139
  }
132
140
 
133
141
  // Restore inputs from the previous run to ensure consistency
142
+ // Merge with any resumeInputs provided (e.g. answers to human steps)
134
143
  try {
135
- this.inputs = JSON.parse(run.inputs);
144
+ const storedInputs = JSON.parse(run.inputs);
145
+ this.inputs = { ...storedInputs, ...this.inputs };
136
146
  } catch (error) {
137
147
  throw new Error(
138
148
  `Failed to parse inputs from run: ${error instanceof Error ? error.message : String(error)}`
@@ -224,23 +234,33 @@ export class WorkflowRunner {
224
234
  items.length === expectedCount &&
225
235
  !Array.from({ length: expectedCount }).some((_, i) => !items[i]);
226
236
 
237
+ // Determine overall status based on iterations
238
+ let status: StepContext['status'] = 'success';
239
+ if (allSuccess && hasAllItems) {
240
+ status = 'success';
241
+ } else if (items.some((item) => item?.status === 'suspended')) {
242
+ status = 'suspended';
243
+ } else {
244
+ status = 'failed';
245
+ }
246
+
227
247
  // Always restore what we have to allow partial expression evaluation
228
248
  const mappedOutputs = this.aggregateOutputs(outputs);
229
249
  this.stepContexts.set(stepId, {
230
250
  output: outputs,
231
251
  outputs: mappedOutputs,
232
- status: allSuccess && hasAllItems ? 'success' : 'failed',
252
+ status,
233
253
  items,
234
254
  } as ForeachStepContext);
235
255
 
236
256
  // Only mark as fully completed if all iterations completed successfully AND we have all items
237
- if (allSuccess && hasAllItems) {
257
+ if (status === 'success') {
238
258
  completedStepIds.add(stepId);
239
259
  }
240
260
  } else {
241
261
  // Single execution step
242
262
  const exec = stepExecutions[0];
243
- if (exec.status === 'success' || exec.status === 'skipped') {
263
+ if (exec.status === 'success' || exec.status === 'skipped' || exec.status === 'suspended') {
244
264
  const output = exec.output ? JSON.parse(exec.output) : null;
245
265
  this.stepContexts.set(stepId, {
246
266
  output,
@@ -248,9 +268,11 @@ export class WorkflowRunner {
248
268
  typeof output === 'object' && output !== null && !Array.isArray(output)
249
269
  ? (output as Record<string, unknown>)
250
270
  : {},
251
- status: exec.status as 'success' | 'skipped',
271
+ status: exec.status as StepContext['status'],
252
272
  });
253
- completedStepIds.add(stepId);
273
+ if (exec.status !== 'suspended') {
274
+ completedStepIds.add(stepId);
275
+ }
254
276
  }
255
277
  }
256
278
  }
@@ -274,7 +296,7 @@ export class WorkflowRunner {
274
296
  );
275
297
  this.logger.log('✓ Run status updated to failed');
276
298
  } catch (error) {
277
- this.logger.error('Error during cleanup:', error);
299
+ this.logger.error(`Error during cleanup: ${error}`);
278
300
  }
279
301
 
280
302
  // Only exit if not embedded
@@ -424,7 +446,7 @@ export class WorkflowRunner {
424
446
  output: ctx.output,
425
447
  outputs: ctx.outputs,
426
448
  status: ctx.status,
427
- items: ctx.items, // Allows ${{ steps.id.items[0] }} or ${{ steps.id.items.every(...) }}
449
+ items: ctx.items,
428
450
  };
429
451
  } else {
430
452
  stepsContext[stepId] = {
@@ -442,6 +464,9 @@ export class WorkflowRunner {
442
464
  item,
443
465
  index,
444
466
  env: this.workflow.env,
467
+ output: item
468
+ ? undefined
469
+ : this.stepContexts.get(this.workflow.steps.find((s) => !s.foreach)?.id || '')?.output,
445
470
  };
446
471
  }
447
472
 
@@ -487,7 +512,8 @@ export class WorkflowRunner {
487
512
  this.logger,
488
513
  this.executeSubWorkflow.bind(this),
489
514
  this.mcpManager,
490
- this.options.workflowDir
515
+ this.options.workflowDir,
516
+ this.options.dryRun
491
517
  );
492
518
  if (result.status === 'failed') {
493
519
  throw new Error(result.error || 'Step failed');
@@ -509,11 +535,23 @@ export class WorkflowRunner {
509
535
  });
510
536
 
511
537
  if (result.status === 'suspended') {
512
- await this.db.completeStep(stepExecId, 'pending', null, 'Waiting for human input');
538
+ await this.db.completeStep(
539
+ stepExecId,
540
+ 'suspended',
541
+ result.output,
542
+ 'Waiting for interaction',
543
+ result.usage
544
+ );
513
545
  return result;
514
546
  }
515
547
 
516
- await this.db.completeStep(stepExecId, result.status, result.output, result.error);
548
+ await this.db.completeStep(
549
+ stepExecId,
550
+ result.status,
551
+ result.output,
552
+ result.error,
553
+ result.usage
554
+ );
517
555
 
518
556
  // Ensure outputs is always an object for consistent access
519
557
  let outputs: Record<string, unknown>;
@@ -533,6 +571,7 @@ export class WorkflowRunner {
533
571
  output: result.output,
534
572
  outputs,
535
573
  status: result.status,
574
+ usage: result.usage,
536
575
  };
537
576
  } catch (error) {
538
577
  const errorMsg = error instanceof Error ? error.message : String(error);
@@ -666,17 +705,40 @@ export class WorkflowRunner {
666
705
  // 3. ${{ steps.id.items.every(s => s.status == 'success') }} -> works via items array
667
706
  const outputs = itemResults.map((r) => r.output);
668
707
  const allSuccess = itemResults.every((r) => r.status === 'success');
708
+ const anySuspended = itemResults.some((r) => r.status === 'suspended');
709
+
710
+ // Aggregate usage from all items
711
+ const aggregatedUsage = itemResults.reduce(
712
+ (acc, r) => {
713
+ if (r.usage) {
714
+ acc.prompt_tokens += r.usage.prompt_tokens;
715
+ acc.completion_tokens += r.usage.completion_tokens;
716
+ acc.total_tokens += r.usage.total_tokens;
717
+ }
718
+ return acc;
719
+ },
720
+ { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
721
+ );
669
722
 
670
723
  // Map child properties for easier access
671
724
  // If outputs are [{ id: 1 }, { id: 2 }], then outputs.id = [1, 2]
672
725
  const mappedOutputs = this.aggregateOutputs(outputs);
673
726
 
727
+ // Determine final status
728
+ let finalStatus: StepContext['status'] = 'failed';
729
+ if (allSuccess) {
730
+ finalStatus = 'success';
731
+ } else if (anySuspended) {
732
+ finalStatus = 'suspended';
733
+ }
734
+
674
735
  // Use proper object structure that serializes correctly
675
736
  const aggregatedContext: ForeachStepContext = {
676
737
  output: outputs,
677
738
  outputs: mappedOutputs,
678
- status: allSuccess ? 'success' : 'failed',
739
+ status: finalStatus,
679
740
  items: itemResults,
741
+ usage: aggregatedUsage,
680
742
  };
681
743
 
682
744
  this.stepContexts.set(step.id, aggregatedContext);
@@ -684,15 +746,29 @@ export class WorkflowRunner {
684
746
  // Update parent step record with aggregated status
685
747
  await this.db.completeStep(
686
748
  parentStepExecId,
687
- allSuccess ? 'success' : 'failed',
749
+ finalStatus,
688
750
  aggregatedContext,
689
- allSuccess ? undefined : 'One or more iterations failed'
751
+ finalStatus === 'failed' ? 'One or more iterations failed' : undefined
690
752
  );
691
753
 
692
- if (!allSuccess) {
754
+ if (finalStatus === 'suspended') {
755
+ // If any iteration suspended, the whole step is suspended
756
+ // We assume for now that only human steps can suspend, and we'll use the first one's input type
757
+ const suspendedItem = itemResults.find((r) => r.status === 'suspended');
758
+ throw new WorkflowSuspendedError(
759
+ suspendedItem?.error || 'Iteration suspended',
760
+ step.id,
761
+ 'text'
762
+ );
763
+ }
764
+
765
+ if (finalStatus === 'failed') {
693
766
  throw new Error(`Step ${step.id} failed: one or more iterations failed`);
694
767
  }
695
768
  } catch (error) {
769
+ if (error instanceof WorkflowSuspendedError) {
770
+ throw error;
771
+ }
696
772
  // Mark parent step as failed
697
773
  const errorMsg = error instanceof Error ? error.message : String(error);
698
774
  await this.db.completeStep(parentStepExecId, 'failed', null, errorMsg);
@@ -709,7 +785,7 @@ export class WorkflowRunner {
709
785
  this.stepContexts.set(step.id, result);
710
786
 
711
787
  if (result.status === 'suspended') {
712
- const inputType = step.type === 'human' ? step.inputType : 'confirm';
788
+ const inputType = step.type === 'human' ? step.inputType : 'text';
713
789
  throw new WorkflowSuspendedError(result.error || 'Workflow suspended', step.id, inputType);
714
790
  }
715
791
 
@@ -779,13 +855,13 @@ export class WorkflowRunner {
779
855
  await this.restoreState();
780
856
  }
781
857
 
782
- const isResume = this.stepContexts.size > 0;
858
+ const isResume = !!this.resumeRunId || this.stepContexts.size > 0;
783
859
 
784
860
  this.logger.log(`\n🏛️ ${isResume ? 'Resuming' : 'Running'} workflow: ${this.workflow.name}`);
785
861
  this.logger.log(`Run ID: ${this.runId}`);
786
862
  this.logger.log(
787
863
  '\n⚠️ Security Warning: Only run workflows from trusted sources.\n' +
788
- ' Workflows can execute arbitrary shell commands and access your environment.\n'
864
+ ' Workflows can execute arbitrary shell commands and access your environment.\n'
789
865
  );
790
866
 
791
867
  // Apply defaults and validate inputs
@@ -803,7 +879,13 @@ export class WorkflowRunner {
803
879
  const stepMap = new Map(this.workflow.steps.map((s) => [s.id, s]));
804
880
 
805
881
  // Initialize completedSteps with already completed steps (for resume)
806
- const completedSteps = new Set<string>(this.stepContexts.keys());
882
+ // Only include steps that were successful or skipped, so failed steps are retried
883
+ const completedSteps = new Set<string>();
884
+ for (const [id, ctx] of this.stepContexts.entries()) {
885
+ if (ctx.status === 'success' || ctx.status === 'skipped') {
886
+ completedSteps.add(id);
887
+ }
888
+ }
807
889
 
808
890
  // Filter out already completed steps from execution order
809
891
  const remainingSteps = executionOrder.filter((stepId) => !completedSteps.has(stepId));
@@ -826,7 +908,20 @@ export class WorkflowRunner {
826
908
  const totalSteps = executionOrder.length;
827
909
  const stepIndices = new Map(executionOrder.map((id, index) => [id, index + 1]));
828
910
 
829
- // Execute steps in parallel where possible (respecting dependencies)
911
+ // Evaluate global concurrency limit
912
+ let globalConcurrencyLimit = remainingSteps.length;
913
+ if (this.workflow.concurrency !== undefined) {
914
+ const baseContext = this.buildContext();
915
+ if (typeof this.workflow.concurrency === 'string') {
916
+ globalConcurrencyLimit = Number(
917
+ ExpressionEvaluator.evaluate(this.workflow.concurrency, baseContext)
918
+ );
919
+ } else {
920
+ globalConcurrencyLimit = this.workflow.concurrency;
921
+ }
922
+ }
923
+
924
+ // Execute steps in parallel where possible (respecting dependencies and global concurrency)
830
925
  const pendingSteps = new Set(remainingSteps);
831
926
  const runningPromises = new Map<string, Promise<void>>();
832
927
 
@@ -840,7 +935,7 @@ export class WorkflowRunner {
840
935
  }
841
936
  const dependenciesMet = step.needs.every((dep: string) => completedSteps.has(dep));
842
937
 
843
- if (dependenciesMet) {
938
+ if (dependenciesMet && runningPromises.size < globalConcurrencyLimit) {
844
939
  pendingSteps.delete(stepId);
845
940
 
846
941
  // Start execution
@@ -11,26 +11,31 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
11
11
 
12
12
  ## Workflow Schema (.yaml)
13
13
  - **name**: Unique identifier for the workflow.
14
+ - **description**: (Optional) Description of the workflow.
14
15
  - **inputs**: Map of `{ type: string, default: any, description: string }` under the `inputs` key.
15
16
  - **outputs**: Map of expressions (e.g., `${{ steps.id.output }}`) under the `outputs` key.
17
+ - **env**: (Optional) Map of workflow-level environment variables.
18
+ - **concurrency**: (Optional) Global concurrency limit for the workflow (number or expression).
16
19
  - **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
17
20
  - **shell**: `{ id, type: 'shell', run, dir, env, transform }`
18
- - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, mcpServers }`
21
+ - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, mcpServers }`
19
22
  - **workflow**: `{ id, type: 'workflow', path, inputs }`
20
23
  - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
21
24
  - **request**: `{ id, type: 'request', url, method, body, headers }`
22
25
  - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
23
- - **sleep**: `{ id, type: 'sleep', duration }`
24
- - **script**: `{ id, type: 'script', run }` (Executes JS in a secure sandbox)
25
- - **Common Step Fields**: `needs` (array of IDs), `if` (expression), `retry`, `foreach`, `concurrency`, `transform`.
26
+ - **sleep**: `{ id, type: 'sleep', duration }` (duration can be a number or expression string)
27
+ - **script**: `{ id, type: 'script', run, allowInsecure }` (Executes JS in a secure sandbox; set allowInsecure to true to allow fallback to insecure VM)
28
+ - **Common Step Fields**: `needs` (array of IDs), `if` (expression), `timeout` (ms), `retry`, `foreach`, `concurrency`, `transform`.
26
29
  - **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
27
30
  - **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
28
31
 
29
32
  ## Agent Schema (.md)
30
33
  Markdown files with YAML frontmatter:
31
34
  - **name**: Agent name.
35
+ - **description**: (Optional) Agent description.
36
+ - **provider**: (Optional) Provider name.
32
37
  - **model**: (Optional) e.g., `gpt-4o`, `claude-sonnet-4.5`.
33
- - **tools**: Array of `{ name, parameters, execution }` where `execution` is a standard Step object.
38
+ - **tools**: Array of `{ name, description, parameters, execution }` where `execution` is a standard Step object and `parameters` is a JSON Schema.
34
39
  - **Body**: The Markdown body is the `systemPrompt`.
35
40
 
36
41
  ## Expression Syntax
@@ -43,9 +48,11 @@ Markdown files with YAML frontmatter:
43
48
  # Guidelines
44
49
  - **User Interaction**: Use `human` steps when user input or approval is needed.
45
50
  - **Error Handling**: Use `retry` for flaky operations and `finally` for cleanup (e.g., removing temp files).
51
+ - **Timeouts**: Set `timeout` on steps that might hang or take too long.
46
52
  - **Custom Logic**: Use `script` steps for data manipulation that is too complex for expressions.
47
53
  - **Agent Collaboration**: Create specialized agents for complex sub-tasks and coordinate them via `llm` steps.
48
- - **Discovery**: Use `mcpServers` in `llm` steps when the agent needs access to external tools or systems.
54
+ - **Clarification**: Enable `allowClarification` in `llm` steps if the agent should be able to ask the user for missing info.
55
+ - **Discovery**: Use `mcpServers` in `llm` steps when the agent needs access to external tools or systems. `mcpServers` can be a list of server names or configuration objects `{ name, command, args, env }`.
49
56
 
50
57
  # Output Instructions
51
58
  When asked to design a feature:
@@ -7,6 +7,7 @@ interface Run {
7
7
  workflow_name: string;
8
8
  status: string;
9
9
  started_at: string;
10
+ total_tokens?: number;
10
11
  }
11
12
 
12
13
  const Dashboard = () => {
@@ -16,8 +17,27 @@ const Dashboard = () => {
16
17
  const fetchData = useCallback(() => {
17
18
  const db = new WorkflowDb();
18
19
  try {
19
- const recentRuns = db.listRuns(10);
20
- setRuns(recentRuns);
20
+ const recentRuns = db.listRuns(10) as (Run & { outputs: string | null })[];
21
+ const runsWithUsage = recentRuns.map((run) => {
22
+ let total_tokens = 0;
23
+ try {
24
+ // Get steps to aggregate tokens if not in outputs (future-proofing)
25
+ const steps = db.getStepsByRun(run.id);
26
+ total_tokens = steps.reduce((sum, s) => {
27
+ if (s.usage) {
28
+ try {
29
+ const u = JSON.parse(s.usage);
30
+ return sum + (u.total_tokens || 0);
31
+ } catch (e) {
32
+ return sum;
33
+ }
34
+ }
35
+ return sum;
36
+ }, 0);
37
+ } catch (e) {}
38
+ return { ...run, total_tokens };
39
+ });
40
+ setRuns(runsWithUsage);
21
41
  } catch (error) {
22
42
  console.error('Failed to fetch runs:', error);
23
43
  } finally {
@@ -71,11 +91,16 @@ const Dashboard = () => {
71
91
  STATUS
72
92
  </Text>
73
93
  </Box>
74
- <Box>
94
+ <Box width={15}>
75
95
  <Text bold color="cyan">
76
96
  STARTED
77
97
  </Text>
78
98
  </Box>
99
+ <Box>
100
+ <Text bold color="cyan">
101
+ TOKENS
102
+ </Text>
103
+ </Box>
79
104
  </Box>
80
105
 
81
106
  <Box marginBottom={1}>
@@ -100,8 +125,11 @@ const Dashboard = () => {
100
125
  {getStatusIcon(run.status)} {run.status.toUpperCase()}
101
126
  </Text>
102
127
  </Box>
128
+ <Box width={15}>
129
+ <Text color="gray">{new Date(run.started_at).toLocaleTimeString()}</Text>
130
+ </Box>
103
131
  <Box>
104
- <Text color="gray">{new Date(run.started_at).toLocaleString()}</Text>
132
+ <Text color="yellow">{run.total_tokens || 0}</Text>
105
133
  </Box>
106
134
  </Box>
107
135
  ))
@@ -234,5 +234,36 @@ describe('AuthManager', () => {
234
234
  'The device code has expired'
235
235
  );
236
236
  });
237
+
238
+ it('pollGitHubDeviceLogin should timeout after 15 minutes', async () => {
239
+ // Mock fetch to always return authorization_pending
240
+ // @ts-ignore
241
+ global.fetch = mock(() =>
242
+ Promise.resolve(
243
+ new Response(
244
+ JSON.stringify({
245
+ error: 'authorization_pending',
246
+ }),
247
+ { status: 200 }
248
+ )
249
+ )
250
+ );
251
+
252
+ // Mock Date.now to simulate time passing
253
+ let now = Date.now();
254
+ const dateSpy = spyOn(Date, 'now').mockImplementation(() => {
255
+ const current = now;
256
+ now += 1000 * 60 * 16; // Advance 16 minutes on each call to trigger timeout immediately
257
+ return current;
258
+ });
259
+
260
+ try {
261
+ await expect(AuthManager.pollGitHubDeviceLogin('dev_code')).rejects.toThrow(
262
+ 'Device login timed out'
263
+ );
264
+ } finally {
265
+ dateSpy.mockRestore();
266
+ }
267
+ });
237
268
  });
238
269
  });
@@ -88,7 +88,12 @@ export class AuthManager {
88
88
  }>;
89
89
  }
90
90
 
91
- static async pollGitHubDeviceLogin(deviceCode: string): Promise<string> {
91
+ static async pollGitHubDeviceLogin(
92
+ deviceCode: string,
93
+ intervalSeconds = 5,
94
+ expiresInSeconds = 900
95
+ ): Promise<string> {
96
+ let currentInterval = intervalSeconds;
92
97
  const poll = async (): Promise<string> => {
93
98
  const response = await fetch('https://github.com/login/oauth/access_token', {
94
99
  method: 'POST',
@@ -121,16 +126,27 @@ export class AuthManager {
121
126
  return ''; // Continue polling
122
127
  }
123
128
 
129
+ if (data.error === 'slow_down') {
130
+ // According to GitHub docs, "slow_down" means wait 5 seconds more
131
+ currentInterval += 5;
132
+ return '';
133
+ }
134
+
124
135
  throw new Error(data.error_description || data.error || 'Failed to get access token');
125
136
  };
126
137
 
127
- // Poll every 5 seconds (GitHub's default interval is usually 5)
128
- // In a real implementation, we should use the interval from initGitHubDeviceLogin
129
- while (true) {
138
+ // Use interval and expiration from parameters
139
+ const startTime = Date.now();
140
+ const timeout = expiresInSeconds * 1000;
141
+
142
+ while (Date.now() - startTime < timeout) {
130
143
  const token = await poll();
131
144
  if (token) return token;
132
- await new Promise((resolve) => setTimeout(resolve, 5000));
145
+ // Convert seconds to milliseconds
146
+ await new Promise((resolve) => setTimeout(resolve, currentInterval * 1000));
133
147
  }
148
+
149
+ throw new Error('Device login timed out');
134
150
  }
135
151
 
136
152
  static async getCopilotToken(): Promise<string | undefined> {
@@ -0,0 +1,35 @@
1
+ import { describe, expect, it } from 'bun:test';
2
+ import { extractJson } from './json-parser';
3
+
4
+ describe('json-parser', () => {
5
+ it('should extract JSON from markdown code blocks', () => {
6
+ const text = 'Here is the data:\n```json\n{"foo": "bar"}\n```\nHope that helps!';
7
+ expect(extractJson(text)).toEqual({ foo: 'bar' });
8
+ });
9
+
10
+ it('should extract JSON without markdown wrappers', () => {
11
+ const text = 'The result is {"key": "value"} and it works.';
12
+ expect(extractJson(text)).toEqual({ key: 'value' });
13
+ });
14
+
15
+ it('should handle nested structures with balanced braces', () => {
16
+ const text =
17
+ 'Conversational preamble... {"outer": {"inner": [1, 2, 3]}, "active": true} conversational postscript.';
18
+ expect(extractJson(text)).toEqual({ outer: { inner: [1, 2, 3] }, active: true });
19
+ });
20
+
21
+ it('should handle strings with escaped braces', () => {
22
+ const text = 'Data: {"msg": "found a } brace", "id": 1}';
23
+ expect(extractJson(text)).toEqual({ msg: 'found a } brace', id: 1 });
24
+ });
25
+
26
+ it('should handle array root objects', () => {
27
+ const text = 'List: [{"id": 1}, {"id": 2}]';
28
+ expect(extractJson(text)).toEqual([{ id: 1 }, { id: 2 }]);
29
+ });
30
+
31
+ it('should throw if no JSON is found', () => {
32
+ const text = 'Hello world, no JSON here!';
33
+ expect(() => extractJson(text)).toThrow(/Failed to extract valid JSON/);
34
+ });
35
+ });
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Robustly extract JSON from a string that may contain other text or Markdown blocks.
3
+ */
4
+ export function extractJson(text: string): unknown {
5
+ if (!text) return null;
6
+
7
+ // 1. Try to extract from Markdown code blocks first
8
+ const markdownRegex = /```(?:json)?\s*([\s\S]*?)\s*```/gi;
9
+ const blocks: string[] = [];
10
+ let match = markdownRegex.exec(text);
11
+
12
+ while (match !== null) {
13
+ blocks.push(match[1].trim());
14
+ match = markdownRegex.exec(text);
15
+ }
16
+
17
+ if (blocks.length > 0) {
18
+ // If there are multiple blocks, try to parse them. Use the first one that is valid JSON.
19
+ for (const block of blocks) {
20
+ try {
21
+ return JSON.parse(block);
22
+ } catch (e) {
23
+ // Continue to next block
24
+ }
25
+ }
26
+ }
27
+
28
+ // 2. Fallback: Find the first occurrence of { or [ and try to find its balanced closing counterpart
29
+ const firstBrace = text.indexOf('{');
30
+ const firstBracket = text.indexOf('[');
31
+
32
+ // Start from whichever comes first
33
+ let startIndex = -1;
34
+ if (firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket)) {
35
+ startIndex = firstBrace;
36
+ } else if (firstBracket !== -1) {
37
+ startIndex = firstBracket;
38
+ }
39
+
40
+ if (startIndex !== -1) {
41
+ const stopper = text[startIndex] === '{' ? '}' : ']';
42
+ const opener = text[startIndex];
43
+
44
+ // Simple balanced brace matching
45
+ let depth = 0;
46
+ let inString = false;
47
+ let escaped = false;
48
+
49
+ for (let i = startIndex; i < text.length; i++) {
50
+ const char = text[i];
51
+
52
+ if (escaped) {
53
+ escaped = false;
54
+ continue;
55
+ }
56
+
57
+ if (char === '\\') {
58
+ escaped = true;
59
+ continue;
60
+ }
61
+
62
+ if (char === '"') {
63
+ inString = !inString;
64
+ continue;
65
+ }
66
+
67
+ if (!inString) {
68
+ if (char === opener) {
69
+ depth++;
70
+ } else if (char === stopper) {
71
+ depth--;
72
+ if (depth === 0) {
73
+ const potentialJson = text.substring(startIndex, i + 1);
74
+ try {
75
+ return JSON.parse(potentialJson);
76
+ } catch (e) {
77
+ // Not valid JSON, keep looking for another matching brace if possible?
78
+ // Actually, if it's not valid yet, it might be a sub-brace.
79
+ // But we are tracking depth, so if we hit 0 and it's invalid, it's likely just bad text.
80
+ }
81
+ }
82
+ }
83
+ }
84
+ }
85
+ }
86
+
87
+ // 3. Last ditch effort: Try parsing the whole thing as is (after trimming)
88
+ try {
89
+ return JSON.parse(text.trim());
90
+ } catch (e) {
91
+ throw new Error(
92
+ `Failed to extract valid JSON from LLM response. Content: ${text.substring(0, 100)}...`
93
+ );
94
+ }
95
+ }
@@ -29,6 +29,15 @@ export function generateMermaidGraph(workflow: Workflow): string {
29
29
  case 'shell':
30
30
  style = ':::shell';
31
31
  break;
32
+ case 'file':
33
+ style = ':::file';
34
+ break;
35
+ case 'request':
36
+ style = ':::request';
37
+ break;
38
+ case 'workflow':
39
+ style = ':::workflow';
40
+ break;
32
41
  default:
33
42
  style = ':::default';
34
43
  }
@@ -54,6 +63,9 @@ export function generateMermaidGraph(workflow: Workflow): string {
54
63
  ' classDef human fill:#fff3e0,stroke:#e65100,stroke-width:2px,stroke-dasharray: 5 5;'
55
64
  );
56
65
  lines.push(' classDef shell fill:#f3e5f5,stroke:#4a148c,stroke-width:1px;');
66
+ lines.push(' classDef file fill:#e8f5e9,stroke:#2e7d32,stroke-width:1px;');
67
+ lines.push(' classDef request fill:#fffde7,stroke:#fbc02d,stroke-width:1px;');
68
+ lines.push(' classDef workflow fill:#fce4ec,stroke:#c2185b,stroke-width:2px;');
57
69
  lines.push(' classDef default fill:#fff,stroke:#333,stroke-width:1px;');
58
70
 
59
71
  return lines.join('\n');