keystone-cli 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -1
- package/package.json +1 -1
- package/src/db/workflow-db.ts +26 -7
- package/src/expression/evaluator.ts +1 -0
- package/src/parser/agent-parser.test.ts +8 -5
- package/src/parser/schema.ts +8 -2
- package/src/runner/audit-verification.test.ts +106 -0
- package/src/runner/llm-adapter.ts +196 -4
- package/src/runner/llm-clarification.test.ts +182 -0
- package/src/runner/llm-executor.ts +118 -26
- package/src/runner/mcp-manager.ts +4 -1
- package/src/runner/mcp-server.test.ts +115 -1
- package/src/runner/mcp-server.ts +161 -4
- package/src/runner/shell-executor.ts +1 -1
- package/src/runner/step-executor.test.ts +33 -10
- package/src/runner/step-executor.ts +110 -14
- package/src/runner/workflow-runner.test.ts +132 -0
- package/src/runner/workflow-runner.ts +118 -23
- package/src/templates/agents/keystone-architect.md +13 -6
- package/src/ui/dashboard.tsx +32 -4
- package/src/utils/auth-manager.test.ts +31 -0
- package/src/utils/auth-manager.ts +21 -5
- package/src/utils/json-parser.test.ts +35 -0
- package/src/utils/json-parser.ts +95 -0
- package/src/utils/mermaid.ts +12 -0
- package/src/utils/sandbox.test.ts +12 -4
- package/src/utils/sandbox.ts +69 -49
|
@@ -25,7 +25,7 @@ class RedactingLogger implements Logger {
|
|
|
25
25
|
constructor(
|
|
26
26
|
private inner: Logger,
|
|
27
27
|
private redactor: Redactor
|
|
28
|
-
) {
|
|
28
|
+
) {}
|
|
29
29
|
|
|
30
30
|
log(msg: string): void {
|
|
31
31
|
this.inner.log(this.redactor.redact(msg));
|
|
@@ -48,12 +48,20 @@ export interface RunOptions {
|
|
|
48
48
|
mcpManager?: MCPManager;
|
|
49
49
|
preventExit?: boolean; // Defaults to false
|
|
50
50
|
workflowDir?: string;
|
|
51
|
+
resumeInputs?: Record<string, unknown>;
|
|
52
|
+
dryRun?: boolean;
|
|
51
53
|
}
|
|
52
54
|
|
|
53
55
|
export interface StepContext {
|
|
54
56
|
output?: unknown;
|
|
55
57
|
outputs?: Record<string, unknown>;
|
|
56
58
|
status: 'success' | 'failed' | 'skipped' | 'pending' | 'suspended';
|
|
59
|
+
error?: string;
|
|
60
|
+
usage?: {
|
|
61
|
+
prompt_tokens: number;
|
|
62
|
+
completion_tokens: number;
|
|
63
|
+
total_tokens: number;
|
|
64
|
+
};
|
|
57
65
|
}
|
|
58
66
|
|
|
59
67
|
// Type for foreach results - wraps array to ensure JSON serialization preserves all properties
|
|
@@ -97,7 +105,7 @@ export class WorkflowRunner {
|
|
|
97
105
|
// Resume existing run
|
|
98
106
|
this.runId = options.resumeRunId;
|
|
99
107
|
this.resumeRunId = options.resumeRunId;
|
|
100
|
-
this.inputs = {}; //
|
|
108
|
+
this.inputs = options.resumeInputs || {}; // Start with resume inputs, will be merged with DB inputs in restoreState
|
|
101
109
|
} else {
|
|
102
110
|
// Start new run
|
|
103
111
|
this.inputs = options.inputs || {};
|
|
@@ -131,8 +139,10 @@ export class WorkflowRunner {
|
|
|
131
139
|
}
|
|
132
140
|
|
|
133
141
|
// Restore inputs from the previous run to ensure consistency
|
|
142
|
+
// Merge with any resumeInputs provided (e.g. answers to human steps)
|
|
134
143
|
try {
|
|
135
|
-
|
|
144
|
+
const storedInputs = JSON.parse(run.inputs);
|
|
145
|
+
this.inputs = { ...storedInputs, ...this.inputs };
|
|
136
146
|
} catch (error) {
|
|
137
147
|
throw new Error(
|
|
138
148
|
`Failed to parse inputs from run: ${error instanceof Error ? error.message : String(error)}`
|
|
@@ -224,23 +234,33 @@ export class WorkflowRunner {
|
|
|
224
234
|
items.length === expectedCount &&
|
|
225
235
|
!Array.from({ length: expectedCount }).some((_, i) => !items[i]);
|
|
226
236
|
|
|
237
|
+
// Determine overall status based on iterations
|
|
238
|
+
let status: StepContext['status'] = 'success';
|
|
239
|
+
if (allSuccess && hasAllItems) {
|
|
240
|
+
status = 'success';
|
|
241
|
+
} else if (items.some((item) => item?.status === 'suspended')) {
|
|
242
|
+
status = 'suspended';
|
|
243
|
+
} else {
|
|
244
|
+
status = 'failed';
|
|
245
|
+
}
|
|
246
|
+
|
|
227
247
|
// Always restore what we have to allow partial expression evaluation
|
|
228
248
|
const mappedOutputs = this.aggregateOutputs(outputs);
|
|
229
249
|
this.stepContexts.set(stepId, {
|
|
230
250
|
output: outputs,
|
|
231
251
|
outputs: mappedOutputs,
|
|
232
|
-
status
|
|
252
|
+
status,
|
|
233
253
|
items,
|
|
234
254
|
} as ForeachStepContext);
|
|
235
255
|
|
|
236
256
|
// Only mark as fully completed if all iterations completed successfully AND we have all items
|
|
237
|
-
if (
|
|
257
|
+
if (status === 'success') {
|
|
238
258
|
completedStepIds.add(stepId);
|
|
239
259
|
}
|
|
240
260
|
} else {
|
|
241
261
|
// Single execution step
|
|
242
262
|
const exec = stepExecutions[0];
|
|
243
|
-
if (exec.status === 'success' || exec.status === 'skipped') {
|
|
263
|
+
if (exec.status === 'success' || exec.status === 'skipped' || exec.status === 'suspended') {
|
|
244
264
|
const output = exec.output ? JSON.parse(exec.output) : null;
|
|
245
265
|
this.stepContexts.set(stepId, {
|
|
246
266
|
output,
|
|
@@ -248,9 +268,11 @@ export class WorkflowRunner {
|
|
|
248
268
|
typeof output === 'object' && output !== null && !Array.isArray(output)
|
|
249
269
|
? (output as Record<string, unknown>)
|
|
250
270
|
: {},
|
|
251
|
-
status: exec.status as '
|
|
271
|
+
status: exec.status as StepContext['status'],
|
|
252
272
|
});
|
|
253
|
-
|
|
273
|
+
if (exec.status !== 'suspended') {
|
|
274
|
+
completedStepIds.add(stepId);
|
|
275
|
+
}
|
|
254
276
|
}
|
|
255
277
|
}
|
|
256
278
|
}
|
|
@@ -274,7 +296,7 @@ export class WorkflowRunner {
|
|
|
274
296
|
);
|
|
275
297
|
this.logger.log('✓ Run status updated to failed');
|
|
276
298
|
} catch (error) {
|
|
277
|
-
this.logger.error(
|
|
299
|
+
this.logger.error(`Error during cleanup: ${error}`);
|
|
278
300
|
}
|
|
279
301
|
|
|
280
302
|
// Only exit if not embedded
|
|
@@ -424,7 +446,7 @@ export class WorkflowRunner {
|
|
|
424
446
|
output: ctx.output,
|
|
425
447
|
outputs: ctx.outputs,
|
|
426
448
|
status: ctx.status,
|
|
427
|
-
items: ctx.items,
|
|
449
|
+
items: ctx.items,
|
|
428
450
|
};
|
|
429
451
|
} else {
|
|
430
452
|
stepsContext[stepId] = {
|
|
@@ -442,6 +464,9 @@ export class WorkflowRunner {
|
|
|
442
464
|
item,
|
|
443
465
|
index,
|
|
444
466
|
env: this.workflow.env,
|
|
467
|
+
output: item
|
|
468
|
+
? undefined
|
|
469
|
+
: this.stepContexts.get(this.workflow.steps.find((s) => !s.foreach)?.id || '')?.output,
|
|
445
470
|
};
|
|
446
471
|
}
|
|
447
472
|
|
|
@@ -487,7 +512,8 @@ export class WorkflowRunner {
|
|
|
487
512
|
this.logger,
|
|
488
513
|
this.executeSubWorkflow.bind(this),
|
|
489
514
|
this.mcpManager,
|
|
490
|
-
this.options.workflowDir
|
|
515
|
+
this.options.workflowDir,
|
|
516
|
+
this.options.dryRun
|
|
491
517
|
);
|
|
492
518
|
if (result.status === 'failed') {
|
|
493
519
|
throw new Error(result.error || 'Step failed');
|
|
@@ -509,11 +535,23 @@ export class WorkflowRunner {
|
|
|
509
535
|
});
|
|
510
536
|
|
|
511
537
|
if (result.status === 'suspended') {
|
|
512
|
-
await this.db.completeStep(
|
|
538
|
+
await this.db.completeStep(
|
|
539
|
+
stepExecId,
|
|
540
|
+
'suspended',
|
|
541
|
+
result.output,
|
|
542
|
+
'Waiting for interaction',
|
|
543
|
+
result.usage
|
|
544
|
+
);
|
|
513
545
|
return result;
|
|
514
546
|
}
|
|
515
547
|
|
|
516
|
-
await this.db.completeStep(
|
|
548
|
+
await this.db.completeStep(
|
|
549
|
+
stepExecId,
|
|
550
|
+
result.status,
|
|
551
|
+
result.output,
|
|
552
|
+
result.error,
|
|
553
|
+
result.usage
|
|
554
|
+
);
|
|
517
555
|
|
|
518
556
|
// Ensure outputs is always an object for consistent access
|
|
519
557
|
let outputs: Record<string, unknown>;
|
|
@@ -533,6 +571,7 @@ export class WorkflowRunner {
|
|
|
533
571
|
output: result.output,
|
|
534
572
|
outputs,
|
|
535
573
|
status: result.status,
|
|
574
|
+
usage: result.usage,
|
|
536
575
|
};
|
|
537
576
|
} catch (error) {
|
|
538
577
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
@@ -666,17 +705,40 @@ export class WorkflowRunner {
|
|
|
666
705
|
// 3. ${{ steps.id.items.every(s => s.status == 'success') }} -> works via items array
|
|
667
706
|
const outputs = itemResults.map((r) => r.output);
|
|
668
707
|
const allSuccess = itemResults.every((r) => r.status === 'success');
|
|
708
|
+
const anySuspended = itemResults.some((r) => r.status === 'suspended');
|
|
709
|
+
|
|
710
|
+
// Aggregate usage from all items
|
|
711
|
+
const aggregatedUsage = itemResults.reduce(
|
|
712
|
+
(acc, r) => {
|
|
713
|
+
if (r.usage) {
|
|
714
|
+
acc.prompt_tokens += r.usage.prompt_tokens;
|
|
715
|
+
acc.completion_tokens += r.usage.completion_tokens;
|
|
716
|
+
acc.total_tokens += r.usage.total_tokens;
|
|
717
|
+
}
|
|
718
|
+
return acc;
|
|
719
|
+
},
|
|
720
|
+
{ prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
|
|
721
|
+
);
|
|
669
722
|
|
|
670
723
|
// Map child properties for easier access
|
|
671
724
|
// If outputs are [{ id: 1 }, { id: 2 }], then outputs.id = [1, 2]
|
|
672
725
|
const mappedOutputs = this.aggregateOutputs(outputs);
|
|
673
726
|
|
|
727
|
+
// Determine final status
|
|
728
|
+
let finalStatus: StepContext['status'] = 'failed';
|
|
729
|
+
if (allSuccess) {
|
|
730
|
+
finalStatus = 'success';
|
|
731
|
+
} else if (anySuspended) {
|
|
732
|
+
finalStatus = 'suspended';
|
|
733
|
+
}
|
|
734
|
+
|
|
674
735
|
// Use proper object structure that serializes correctly
|
|
675
736
|
const aggregatedContext: ForeachStepContext = {
|
|
676
737
|
output: outputs,
|
|
677
738
|
outputs: mappedOutputs,
|
|
678
|
-
status:
|
|
739
|
+
status: finalStatus,
|
|
679
740
|
items: itemResults,
|
|
741
|
+
usage: aggregatedUsage,
|
|
680
742
|
};
|
|
681
743
|
|
|
682
744
|
this.stepContexts.set(step.id, aggregatedContext);
|
|
@@ -684,15 +746,29 @@ export class WorkflowRunner {
|
|
|
684
746
|
// Update parent step record with aggregated status
|
|
685
747
|
await this.db.completeStep(
|
|
686
748
|
parentStepExecId,
|
|
687
|
-
|
|
749
|
+
finalStatus,
|
|
688
750
|
aggregatedContext,
|
|
689
|
-
|
|
751
|
+
finalStatus === 'failed' ? 'One or more iterations failed' : undefined
|
|
690
752
|
);
|
|
691
753
|
|
|
692
|
-
if (
|
|
754
|
+
if (finalStatus === 'suspended') {
|
|
755
|
+
// If any iteration suspended, the whole step is suspended
|
|
756
|
+
// We assume for now that only human steps can suspend, and we'll use the first one's input type
|
|
757
|
+
const suspendedItem = itemResults.find((r) => r.status === 'suspended');
|
|
758
|
+
throw new WorkflowSuspendedError(
|
|
759
|
+
suspendedItem?.error || 'Iteration suspended',
|
|
760
|
+
step.id,
|
|
761
|
+
'text'
|
|
762
|
+
);
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
if (finalStatus === 'failed') {
|
|
693
766
|
throw new Error(`Step ${step.id} failed: one or more iterations failed`);
|
|
694
767
|
}
|
|
695
768
|
} catch (error) {
|
|
769
|
+
if (error instanceof WorkflowSuspendedError) {
|
|
770
|
+
throw error;
|
|
771
|
+
}
|
|
696
772
|
// Mark parent step as failed
|
|
697
773
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
698
774
|
await this.db.completeStep(parentStepExecId, 'failed', null, errorMsg);
|
|
@@ -709,7 +785,7 @@ export class WorkflowRunner {
|
|
|
709
785
|
this.stepContexts.set(step.id, result);
|
|
710
786
|
|
|
711
787
|
if (result.status === 'suspended') {
|
|
712
|
-
const inputType = step.type === 'human' ? step.inputType : '
|
|
788
|
+
const inputType = step.type === 'human' ? step.inputType : 'text';
|
|
713
789
|
throw new WorkflowSuspendedError(result.error || 'Workflow suspended', step.id, inputType);
|
|
714
790
|
}
|
|
715
791
|
|
|
@@ -779,13 +855,13 @@ export class WorkflowRunner {
|
|
|
779
855
|
await this.restoreState();
|
|
780
856
|
}
|
|
781
857
|
|
|
782
|
-
const isResume = this.stepContexts.size > 0;
|
|
858
|
+
const isResume = !!this.resumeRunId || this.stepContexts.size > 0;
|
|
783
859
|
|
|
784
860
|
this.logger.log(`\n🏛️ ${isResume ? 'Resuming' : 'Running'} workflow: ${this.workflow.name}`);
|
|
785
861
|
this.logger.log(`Run ID: ${this.runId}`);
|
|
786
862
|
this.logger.log(
|
|
787
863
|
'\n⚠️ Security Warning: Only run workflows from trusted sources.\n' +
|
|
788
|
-
|
|
864
|
+
' Workflows can execute arbitrary shell commands and access your environment.\n'
|
|
789
865
|
);
|
|
790
866
|
|
|
791
867
|
// Apply defaults and validate inputs
|
|
@@ -803,7 +879,13 @@ export class WorkflowRunner {
|
|
|
803
879
|
const stepMap = new Map(this.workflow.steps.map((s) => [s.id, s]));
|
|
804
880
|
|
|
805
881
|
// Initialize completedSteps with already completed steps (for resume)
|
|
806
|
-
|
|
882
|
+
// Only include steps that were successful or skipped, so failed steps are retried
|
|
883
|
+
const completedSteps = new Set<string>();
|
|
884
|
+
for (const [id, ctx] of this.stepContexts.entries()) {
|
|
885
|
+
if (ctx.status === 'success' || ctx.status === 'skipped') {
|
|
886
|
+
completedSteps.add(id);
|
|
887
|
+
}
|
|
888
|
+
}
|
|
807
889
|
|
|
808
890
|
// Filter out already completed steps from execution order
|
|
809
891
|
const remainingSteps = executionOrder.filter((stepId) => !completedSteps.has(stepId));
|
|
@@ -826,7 +908,20 @@ export class WorkflowRunner {
|
|
|
826
908
|
const totalSteps = executionOrder.length;
|
|
827
909
|
const stepIndices = new Map(executionOrder.map((id, index) => [id, index + 1]));
|
|
828
910
|
|
|
829
|
-
//
|
|
911
|
+
// Evaluate global concurrency limit
|
|
912
|
+
let globalConcurrencyLimit = remainingSteps.length;
|
|
913
|
+
if (this.workflow.concurrency !== undefined) {
|
|
914
|
+
const baseContext = this.buildContext();
|
|
915
|
+
if (typeof this.workflow.concurrency === 'string') {
|
|
916
|
+
globalConcurrencyLimit = Number(
|
|
917
|
+
ExpressionEvaluator.evaluate(this.workflow.concurrency, baseContext)
|
|
918
|
+
);
|
|
919
|
+
} else {
|
|
920
|
+
globalConcurrencyLimit = this.workflow.concurrency;
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
// Execute steps in parallel where possible (respecting dependencies and global concurrency)
|
|
830
925
|
const pendingSteps = new Set(remainingSteps);
|
|
831
926
|
const runningPromises = new Map<string, Promise<void>>();
|
|
832
927
|
|
|
@@ -840,7 +935,7 @@ export class WorkflowRunner {
|
|
|
840
935
|
}
|
|
841
936
|
const dependenciesMet = step.needs.every((dep: string) => completedSteps.has(dep));
|
|
842
937
|
|
|
843
|
-
if (dependenciesMet) {
|
|
938
|
+
if (dependenciesMet && runningPromises.size < globalConcurrencyLimit) {
|
|
844
939
|
pendingSteps.delete(stepId);
|
|
845
940
|
|
|
846
941
|
// Start execution
|
|
@@ -11,26 +11,31 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
|
|
|
11
11
|
|
|
12
12
|
## Workflow Schema (.yaml)
|
|
13
13
|
- **name**: Unique identifier for the workflow.
|
|
14
|
+
- **description**: (Optional) Description of the workflow.
|
|
14
15
|
- **inputs**: Map of `{ type: string, default: any, description: string }` under the `inputs` key.
|
|
15
16
|
- **outputs**: Map of expressions (e.g., `${{ steps.id.output }}`) under the `outputs` key.
|
|
17
|
+
- **env**: (Optional) Map of workflow-level environment variables.
|
|
18
|
+
- **concurrency**: (Optional) Global concurrency limit for the workflow (number or expression).
|
|
16
19
|
- **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
|
|
17
20
|
- **shell**: `{ id, type: 'shell', run, dir, env, transform }`
|
|
18
|
-
- **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, mcpServers }`
|
|
21
|
+
- **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, allowClarification, mcpServers }`
|
|
19
22
|
- **workflow**: `{ id, type: 'workflow', path, inputs }`
|
|
20
23
|
- **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
|
|
21
24
|
- **request**: `{ id, type: 'request', url, method, body, headers }`
|
|
22
25
|
- **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
|
|
23
|
-
- **sleep**: `{ id, type: 'sleep', duration }`
|
|
24
|
-
- **script**: `{ id, type: 'script', run }` (Executes JS in a secure sandbox)
|
|
25
|
-
- **Common Step Fields**: `needs` (array of IDs), `if` (expression), `retry`, `foreach`, `concurrency`, `transform`.
|
|
26
|
+
- **sleep**: `{ id, type: 'sleep', duration }` (duration can be a number or expression string)
|
|
27
|
+
- **script**: `{ id, type: 'script', run, allowInsecure }` (Executes JS in a secure sandbox; set allowInsecure to true to allow fallback to insecure VM)
|
|
28
|
+
- **Common Step Fields**: `needs` (array of IDs), `if` (expression), `timeout` (ms), `retry`, `foreach`, `concurrency`, `transform`.
|
|
26
29
|
- **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
|
|
27
30
|
- **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
|
|
28
31
|
|
|
29
32
|
## Agent Schema (.md)
|
|
30
33
|
Markdown files with YAML frontmatter:
|
|
31
34
|
- **name**: Agent name.
|
|
35
|
+
- **description**: (Optional) Agent description.
|
|
36
|
+
- **provider**: (Optional) Provider name.
|
|
32
37
|
- **model**: (Optional) e.g., `gpt-4o`, `claude-sonnet-4.5`.
|
|
33
|
-
- **tools**: Array of `{ name, parameters, execution }` where `execution` is a standard Step object.
|
|
38
|
+
- **tools**: Array of `{ name, description, parameters, execution }` where `execution` is a standard Step object and `parameters` is a JSON Schema.
|
|
34
39
|
- **Body**: The Markdown body is the `systemPrompt`.
|
|
35
40
|
|
|
36
41
|
## Expression Syntax
|
|
@@ -43,9 +48,11 @@ Markdown files with YAML frontmatter:
|
|
|
43
48
|
# Guidelines
|
|
44
49
|
- **User Interaction**: Use `human` steps when user input or approval is needed.
|
|
45
50
|
- **Error Handling**: Use `retry` for flaky operations and `finally` for cleanup (e.g., removing temp files).
|
|
51
|
+
- **Timeouts**: Set `timeout` on steps that might hang or take too long.
|
|
46
52
|
- **Custom Logic**: Use `script` steps for data manipulation that is too complex for expressions.
|
|
47
53
|
- **Agent Collaboration**: Create specialized agents for complex sub-tasks and coordinate them via `llm` steps.
|
|
48
|
-
- **
|
|
54
|
+
- **Clarification**: Enable `allowClarification` in `llm` steps if the agent should be able to ask the user for missing info.
|
|
55
|
+
- **Discovery**: Use `mcpServers` in `llm` steps when the agent needs access to external tools or systems. `mcpServers` can be a list of server names or configuration objects `{ name, command, args, env }`.
|
|
49
56
|
|
|
50
57
|
# Output Instructions
|
|
51
58
|
When asked to design a feature:
|
package/src/ui/dashboard.tsx
CHANGED
|
@@ -7,6 +7,7 @@ interface Run {
|
|
|
7
7
|
workflow_name: string;
|
|
8
8
|
status: string;
|
|
9
9
|
started_at: string;
|
|
10
|
+
total_tokens?: number;
|
|
10
11
|
}
|
|
11
12
|
|
|
12
13
|
const Dashboard = () => {
|
|
@@ -16,8 +17,27 @@ const Dashboard = () => {
|
|
|
16
17
|
const fetchData = useCallback(() => {
|
|
17
18
|
const db = new WorkflowDb();
|
|
18
19
|
try {
|
|
19
|
-
const recentRuns = db.listRuns(10);
|
|
20
|
-
|
|
20
|
+
const recentRuns = db.listRuns(10) as (Run & { outputs: string | null })[];
|
|
21
|
+
const runsWithUsage = recentRuns.map((run) => {
|
|
22
|
+
let total_tokens = 0;
|
|
23
|
+
try {
|
|
24
|
+
// Get steps to aggregate tokens if not in outputs (future-proofing)
|
|
25
|
+
const steps = db.getStepsByRun(run.id);
|
|
26
|
+
total_tokens = steps.reduce((sum, s) => {
|
|
27
|
+
if (s.usage) {
|
|
28
|
+
try {
|
|
29
|
+
const u = JSON.parse(s.usage);
|
|
30
|
+
return sum + (u.total_tokens || 0);
|
|
31
|
+
} catch (e) {
|
|
32
|
+
return sum;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return sum;
|
|
36
|
+
}, 0);
|
|
37
|
+
} catch (e) {}
|
|
38
|
+
return { ...run, total_tokens };
|
|
39
|
+
});
|
|
40
|
+
setRuns(runsWithUsage);
|
|
21
41
|
} catch (error) {
|
|
22
42
|
console.error('Failed to fetch runs:', error);
|
|
23
43
|
} finally {
|
|
@@ -71,11 +91,16 @@ const Dashboard = () => {
|
|
|
71
91
|
STATUS
|
|
72
92
|
</Text>
|
|
73
93
|
</Box>
|
|
74
|
-
<Box>
|
|
94
|
+
<Box width={15}>
|
|
75
95
|
<Text bold color="cyan">
|
|
76
96
|
STARTED
|
|
77
97
|
</Text>
|
|
78
98
|
</Box>
|
|
99
|
+
<Box>
|
|
100
|
+
<Text bold color="cyan">
|
|
101
|
+
TOKENS
|
|
102
|
+
</Text>
|
|
103
|
+
</Box>
|
|
79
104
|
</Box>
|
|
80
105
|
|
|
81
106
|
<Box marginBottom={1}>
|
|
@@ -100,8 +125,11 @@ const Dashboard = () => {
|
|
|
100
125
|
{getStatusIcon(run.status)} {run.status.toUpperCase()}
|
|
101
126
|
</Text>
|
|
102
127
|
</Box>
|
|
128
|
+
<Box width={15}>
|
|
129
|
+
<Text color="gray">{new Date(run.started_at).toLocaleTimeString()}</Text>
|
|
130
|
+
</Box>
|
|
103
131
|
<Box>
|
|
104
|
-
<Text color="
|
|
132
|
+
<Text color="yellow">{run.total_tokens || 0}</Text>
|
|
105
133
|
</Box>
|
|
106
134
|
</Box>
|
|
107
135
|
))
|
|
@@ -234,5 +234,36 @@ describe('AuthManager', () => {
|
|
|
234
234
|
'The device code has expired'
|
|
235
235
|
);
|
|
236
236
|
});
|
|
237
|
+
|
|
238
|
+
it('pollGitHubDeviceLogin should timeout after 15 minutes', async () => {
|
|
239
|
+
// Mock fetch to always return authorization_pending
|
|
240
|
+
// @ts-ignore
|
|
241
|
+
global.fetch = mock(() =>
|
|
242
|
+
Promise.resolve(
|
|
243
|
+
new Response(
|
|
244
|
+
JSON.stringify({
|
|
245
|
+
error: 'authorization_pending',
|
|
246
|
+
}),
|
|
247
|
+
{ status: 200 }
|
|
248
|
+
)
|
|
249
|
+
)
|
|
250
|
+
);
|
|
251
|
+
|
|
252
|
+
// Mock Date.now to simulate time passing
|
|
253
|
+
let now = Date.now();
|
|
254
|
+
const dateSpy = spyOn(Date, 'now').mockImplementation(() => {
|
|
255
|
+
const current = now;
|
|
256
|
+
now += 1000 * 60 * 16; // Advance 16 minutes on each call to trigger timeout immediately
|
|
257
|
+
return current;
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
try {
|
|
261
|
+
await expect(AuthManager.pollGitHubDeviceLogin('dev_code')).rejects.toThrow(
|
|
262
|
+
'Device login timed out'
|
|
263
|
+
);
|
|
264
|
+
} finally {
|
|
265
|
+
dateSpy.mockRestore();
|
|
266
|
+
}
|
|
267
|
+
});
|
|
237
268
|
});
|
|
238
269
|
});
|
|
@@ -88,7 +88,12 @@ export class AuthManager {
|
|
|
88
88
|
}>;
|
|
89
89
|
}
|
|
90
90
|
|
|
91
|
-
static async pollGitHubDeviceLogin(
|
|
91
|
+
static async pollGitHubDeviceLogin(
|
|
92
|
+
deviceCode: string,
|
|
93
|
+
intervalSeconds = 5,
|
|
94
|
+
expiresInSeconds = 900
|
|
95
|
+
): Promise<string> {
|
|
96
|
+
let currentInterval = intervalSeconds;
|
|
92
97
|
const poll = async (): Promise<string> => {
|
|
93
98
|
const response = await fetch('https://github.com/login/oauth/access_token', {
|
|
94
99
|
method: 'POST',
|
|
@@ -121,16 +126,27 @@ export class AuthManager {
|
|
|
121
126
|
return ''; // Continue polling
|
|
122
127
|
}
|
|
123
128
|
|
|
129
|
+
if (data.error === 'slow_down') {
|
|
130
|
+
// According to GitHub docs, "slow_down" means wait 5 seconds more
|
|
131
|
+
currentInterval += 5;
|
|
132
|
+
return '';
|
|
133
|
+
}
|
|
134
|
+
|
|
124
135
|
throw new Error(data.error_description || data.error || 'Failed to get access token');
|
|
125
136
|
};
|
|
126
137
|
|
|
127
|
-
//
|
|
128
|
-
|
|
129
|
-
|
|
138
|
+
// Use interval and expiration from parameters
|
|
139
|
+
const startTime = Date.now();
|
|
140
|
+
const timeout = expiresInSeconds * 1000;
|
|
141
|
+
|
|
142
|
+
while (Date.now() - startTime < timeout) {
|
|
130
143
|
const token = await poll();
|
|
131
144
|
if (token) return token;
|
|
132
|
-
|
|
145
|
+
// Convert seconds to milliseconds
|
|
146
|
+
await new Promise((resolve) => setTimeout(resolve, currentInterval * 1000));
|
|
133
147
|
}
|
|
148
|
+
|
|
149
|
+
throw new Error('Device login timed out');
|
|
134
150
|
}
|
|
135
151
|
|
|
136
152
|
static async getCopilotToken(): Promise<string | undefined> {
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { describe, expect, it } from 'bun:test';
|
|
2
|
+
import { extractJson } from './json-parser';
|
|
3
|
+
|
|
4
|
+
describe('json-parser', () => {
|
|
5
|
+
it('should extract JSON from markdown code blocks', () => {
|
|
6
|
+
const text = 'Here is the data:\n```json\n{"foo": "bar"}\n```\nHope that helps!';
|
|
7
|
+
expect(extractJson(text)).toEqual({ foo: 'bar' });
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
it('should extract JSON without markdown wrappers', () => {
|
|
11
|
+
const text = 'The result is {"key": "value"} and it works.';
|
|
12
|
+
expect(extractJson(text)).toEqual({ key: 'value' });
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it('should handle nested structures with balanced braces', () => {
|
|
16
|
+
const text =
|
|
17
|
+
'Conversational preamble... {"outer": {"inner": [1, 2, 3]}, "active": true} conversational postscript.';
|
|
18
|
+
expect(extractJson(text)).toEqual({ outer: { inner: [1, 2, 3] }, active: true });
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('should handle strings with escaped braces', () => {
|
|
22
|
+
const text = 'Data: {"msg": "found a } brace", "id": 1}';
|
|
23
|
+
expect(extractJson(text)).toEqual({ msg: 'found a } brace', id: 1 });
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('should handle array root objects', () => {
|
|
27
|
+
const text = 'List: [{"id": 1}, {"id": 2}]';
|
|
28
|
+
expect(extractJson(text)).toEqual([{ id: 1 }, { id: 2 }]);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it('should throw if no JSON is found', () => {
|
|
32
|
+
const text = 'Hello world, no JSON here!';
|
|
33
|
+
expect(() => extractJson(text)).toThrow(/Failed to extract valid JSON/);
|
|
34
|
+
});
|
|
35
|
+
});
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Robustly extract JSON from a string that may contain other text or Markdown blocks.
|
|
3
|
+
*/
|
|
4
|
+
export function extractJson(text: string): unknown {
|
|
5
|
+
if (!text) return null;
|
|
6
|
+
|
|
7
|
+
// 1. Try to extract from Markdown code blocks first
|
|
8
|
+
const markdownRegex = /```(?:json)?\s*([\s\S]*?)\s*```/gi;
|
|
9
|
+
const blocks: string[] = [];
|
|
10
|
+
let match = markdownRegex.exec(text);
|
|
11
|
+
|
|
12
|
+
while (match !== null) {
|
|
13
|
+
blocks.push(match[1].trim());
|
|
14
|
+
match = markdownRegex.exec(text);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if (blocks.length > 0) {
|
|
18
|
+
// If there are multiple blocks, try to parse them. Use the first one that is valid JSON.
|
|
19
|
+
for (const block of blocks) {
|
|
20
|
+
try {
|
|
21
|
+
return JSON.parse(block);
|
|
22
|
+
} catch (e) {
|
|
23
|
+
// Continue to next block
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// 2. Fallback: Find the first occurrence of { or [ and try to find its balanced closing counterpart
|
|
29
|
+
const firstBrace = text.indexOf('{');
|
|
30
|
+
const firstBracket = text.indexOf('[');
|
|
31
|
+
|
|
32
|
+
// Start from whichever comes first
|
|
33
|
+
let startIndex = -1;
|
|
34
|
+
if (firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket)) {
|
|
35
|
+
startIndex = firstBrace;
|
|
36
|
+
} else if (firstBracket !== -1) {
|
|
37
|
+
startIndex = firstBracket;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (startIndex !== -1) {
|
|
41
|
+
const stopper = text[startIndex] === '{' ? '}' : ']';
|
|
42
|
+
const opener = text[startIndex];
|
|
43
|
+
|
|
44
|
+
// Simple balanced brace matching
|
|
45
|
+
let depth = 0;
|
|
46
|
+
let inString = false;
|
|
47
|
+
let escaped = false;
|
|
48
|
+
|
|
49
|
+
for (let i = startIndex; i < text.length; i++) {
|
|
50
|
+
const char = text[i];
|
|
51
|
+
|
|
52
|
+
if (escaped) {
|
|
53
|
+
escaped = false;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (char === '\\') {
|
|
58
|
+
escaped = true;
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (char === '"') {
|
|
63
|
+
inString = !inString;
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (!inString) {
|
|
68
|
+
if (char === opener) {
|
|
69
|
+
depth++;
|
|
70
|
+
} else if (char === stopper) {
|
|
71
|
+
depth--;
|
|
72
|
+
if (depth === 0) {
|
|
73
|
+
const potentialJson = text.substring(startIndex, i + 1);
|
|
74
|
+
try {
|
|
75
|
+
return JSON.parse(potentialJson);
|
|
76
|
+
} catch (e) {
|
|
77
|
+
// Not valid JSON, keep looking for another matching brace if possible?
|
|
78
|
+
// Actually, if it's not valid yet, it might be a sub-brace.
|
|
79
|
+
// But we are tracking depth, so if we hit 0 and it's invalid, it's likely just bad text.
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// 3. Last ditch effort: Try parsing the whole thing as is (after trimming)
|
|
88
|
+
try {
|
|
89
|
+
return JSON.parse(text.trim());
|
|
90
|
+
} catch (e) {
|
|
91
|
+
throw new Error(
|
|
92
|
+
`Failed to extract valid JSON from LLM response. Content: ${text.substring(0, 100)}...`
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
}
|
package/src/utils/mermaid.ts
CHANGED
|
@@ -29,6 +29,15 @@ export function generateMermaidGraph(workflow: Workflow): string {
|
|
|
29
29
|
case 'shell':
|
|
30
30
|
style = ':::shell';
|
|
31
31
|
break;
|
|
32
|
+
case 'file':
|
|
33
|
+
style = ':::file';
|
|
34
|
+
break;
|
|
35
|
+
case 'request':
|
|
36
|
+
style = ':::request';
|
|
37
|
+
break;
|
|
38
|
+
case 'workflow':
|
|
39
|
+
style = ':::workflow';
|
|
40
|
+
break;
|
|
32
41
|
default:
|
|
33
42
|
style = ':::default';
|
|
34
43
|
}
|
|
@@ -54,6 +63,9 @@ export function generateMermaidGraph(workflow: Workflow): string {
|
|
|
54
63
|
' classDef human fill:#fff3e0,stroke:#e65100,stroke-width:2px,stroke-dasharray: 5 5;'
|
|
55
64
|
);
|
|
56
65
|
lines.push(' classDef shell fill:#f3e5f5,stroke:#4a148c,stroke-width:1px;');
|
|
66
|
+
lines.push(' classDef file fill:#e8f5e9,stroke:#2e7d32,stroke-width:1px;');
|
|
67
|
+
lines.push(' classDef request fill:#fffde7,stroke:#fbc02d,stroke-width:1px;');
|
|
68
|
+
lines.push(' classDef workflow fill:#fce4ec,stroke:#c2185b,stroke-width:2px;');
|
|
57
69
|
lines.push(' classDef default fill:#fff,stroke:#333,stroke-width:1px;');
|
|
58
70
|
|
|
59
71
|
return lines.join('\n');
|