keystone-cli 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/package.json +7 -2
- package/src/cli.ts +64 -10
- package/src/parser/schema.ts +19 -9
- package/src/runner/mcp-server.test.ts +22 -15
- package/src/runner/mcp-server.ts +21 -4
- package/src/runner/step-executor.test.ts +49 -6
- package/src/runner/step-executor.ts +51 -3
- package/src/runner/workflow-runner.ts +56 -22
- package/src/templates/agents/keystone-architect.md +12 -3
- package/src/templates/full-feature-demo.yaml +5 -0
- package/src/utils/mermaid.test.ts +18 -42
- package/src/utils/mermaid.ts +154 -20
- package/src/utils/redactor.test.ts +6 -0
- package/src/utils/redactor.ts +10 -1
- package/src/utils/sandbox.test.ts +29 -0
- package/src/utils/sandbox.ts +61 -0
package/README.md
CHANGED
|
@@ -136,8 +136,8 @@ mcp_servers:
|
|
|
136
136
|
github:
|
|
137
137
|
command: npx
|
|
138
138
|
args: ["-y", "@modelcontextprotocol/server-github"]
|
|
139
|
-
|
|
140
|
-
|
|
139
|
+
env:
|
|
140
|
+
GITHUB_PERSONAL_ACCESS_TOKEN: "your-github-pat" # Or omit if GITHUB_TOKEN is in your .env
|
|
141
141
|
|
|
142
142
|
storage:
|
|
143
143
|
|
|
@@ -265,6 +265,7 @@ Keystone supports several specialized step types:
|
|
|
265
265
|
- `inputType: confirm`: Simple Enter-to-continue prompt.
|
|
266
266
|
- `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
|
|
267
267
|
- `workflow`: Trigger another workflow as a sub-step.
|
|
268
|
+
- `script`: Run arbitrary JavaScript in a secure sandbox (`isolated-vm` with fallback to `node:vm`).
|
|
268
269
|
- `sleep`: Pause execution for a specified duration.
|
|
269
270
|
|
|
270
271
|
All steps support common features like `needs` (dependencies), `if` (conditionals), `retry`, `timeout`, `foreach` (parallel iteration), and `transform` (post-process output using expressions).
|
|
@@ -327,7 +328,7 @@ You are a software developer. You can use tools to explore the codebase.
|
|
|
327
328
|
Keystone can itself act as an MCP server, allowing other agents (like Claude Desktop or GitHub Copilot) to discover and run your workflows as tools.
|
|
328
329
|
|
|
329
330
|
```bash
|
|
330
|
-
keystone mcp
|
|
331
|
+
keystone mcp start
|
|
331
332
|
```
|
|
332
333
|
|
|
333
334
|
> **Note:** Workflow execution via the Keystone MCP server is synchronous. This provides a better experience for agents as they receive the final results directly, though it means the connection remains open for the duration of the workflow run.
|
|
@@ -396,7 +397,8 @@ In these examples, the agent will have access to all tools provided by the MCP s
|
|
|
396
397
|
| `auth login [provider]` | Login to an authentication provider (github, openai, anthropic) |
|
|
397
398
|
| `auth logout [provider]` | Logout and clear authentication tokens |
|
|
398
399
|
| `ui` | Open the interactive TUI dashboard |
|
|
399
|
-
| `mcp` | Start the Keystone MCP server |
|
|
400
|
+
| `mcp start` | Start the Keystone MCP server |
|
|
401
|
+
| `mcp login <server>` | Login to a remote MCP server |
|
|
400
402
|
| `completion [shell]` | Generate shell completion script (zsh, bash) |
|
|
401
403
|
| `prune [--days N]` | Cleanup old run data from the database |
|
|
402
404
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "keystone-cli",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.2",
|
|
4
4
|
"description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -38,9 +38,11 @@
|
|
|
38
38
|
"@jsep-plugin/object": "^1.2.2",
|
|
39
39
|
"@types/react": "^19.2.7",
|
|
40
40
|
"commander": "^12.1.0",
|
|
41
|
+
"dagre": "^0.8.5",
|
|
41
42
|
"ink": "^6.5.1",
|
|
42
43
|
"ink-select-input": "3.1.2",
|
|
43
44
|
"ink-spinner": "^5.0.0",
|
|
45
|
+
"isolated-vm": "^6.0.2",
|
|
44
46
|
"js-yaml": "^4.1.0",
|
|
45
47
|
"jsep": "^1.4.0",
|
|
46
48
|
"react": "^19.2.3",
|
|
@@ -48,7 +50,10 @@
|
|
|
48
50
|
},
|
|
49
51
|
"devDependencies": {
|
|
50
52
|
"@biomejs/biome": "^1.9.4",
|
|
51
|
-
"@types/
|
|
53
|
+
"@types/bun": "^1.3.5",
|
|
54
|
+
"@types/dagre": "^0.7.53",
|
|
55
|
+
"@types/js-yaml": "^4.0.9",
|
|
56
|
+
"@types/node": "^25.0.3"
|
|
52
57
|
},
|
|
53
58
|
"engines": {
|
|
54
59
|
"bun": ">=1.0.0"
|
package/src/cli.ts
CHANGED
|
@@ -12,7 +12,7 @@ import scaffoldWorkflow from './templates/scaffold-feature.yaml' with { type: 't
|
|
|
12
12
|
import { WorkflowDb } from './db/workflow-db.ts';
|
|
13
13
|
import { WorkflowParser } from './parser/workflow-parser.ts';
|
|
14
14
|
import { ConfigLoader } from './utils/config-loader.ts';
|
|
15
|
-
import { generateMermaidGraph,
|
|
15
|
+
import { generateMermaidGraph, renderWorkflowAsAscii } from './utils/mermaid.ts';
|
|
16
16
|
import { WorkflowRegistry } from './utils/workflow-registry.ts';
|
|
17
17
|
|
|
18
18
|
import pkg from '../package.json' with { type: 'json' };
|
|
@@ -204,12 +204,11 @@ program
|
|
|
204
204
|
try {
|
|
205
205
|
const resolvedPath = WorkflowRegistry.resolvePath(workflowPath);
|
|
206
206
|
const workflow = WorkflowParser.loadWorkflow(resolvedPath);
|
|
207
|
-
const
|
|
208
|
-
|
|
209
|
-
const ascii = await renderMermaidAsAscii(mermaid);
|
|
207
|
+
const ascii = renderWorkflowAsAscii(workflow);
|
|
210
208
|
if (ascii) {
|
|
211
209
|
console.log(`\n${ascii}\n`);
|
|
212
210
|
} else {
|
|
211
|
+
const mermaid = generateMermaidGraph(workflow);
|
|
213
212
|
console.log('\n```mermaid');
|
|
214
213
|
console.log(mermaid);
|
|
215
214
|
console.log('```\n');
|
|
@@ -614,11 +613,15 @@ const auth = program.command('auth').description('Authentication management');
|
|
|
614
613
|
auth
|
|
615
614
|
.command('login')
|
|
616
615
|
.description('Login to an authentication provider')
|
|
617
|
-
.
|
|
616
|
+
.argument('[provider]', 'Authentication provider', 'github')
|
|
617
|
+
.option(
|
|
618
|
+
'-p, --provider <provider>',
|
|
619
|
+
'Authentication provider (deprecated, use positional argument)'
|
|
620
|
+
)
|
|
618
621
|
.option('-t, --token <token>', 'Personal Access Token (if not using interactive mode)')
|
|
619
|
-
.action(async (options) => {
|
|
622
|
+
.action(async (providerArg, options) => {
|
|
620
623
|
const { AuthManager } = await import('./utils/auth-manager.ts');
|
|
621
|
-
const provider = options.provider.toLowerCase();
|
|
624
|
+
const provider = (options.provider || providerArg).toLowerCase();
|
|
622
625
|
|
|
623
626
|
if (provider === 'github') {
|
|
624
627
|
let token = options.token;
|
|
@@ -675,6 +678,31 @@ auth
|
|
|
675
678
|
console.error('✗ No token provided.');
|
|
676
679
|
process.exit(1);
|
|
677
680
|
}
|
|
681
|
+
} else if (provider === 'openai' || provider === 'anthropic') {
|
|
682
|
+
let key = options.token; // Use --token if provided as the API key
|
|
683
|
+
|
|
684
|
+
if (!key) {
|
|
685
|
+
console.log(`\n🔑 Login to ${provider.toUpperCase()}`);
|
|
686
|
+
console.log(` Please provide your ${provider.toUpperCase()} API key.\n`);
|
|
687
|
+
const prompt = 'API Key: ';
|
|
688
|
+
process.stdout.write(prompt);
|
|
689
|
+
for await (const line of console) {
|
|
690
|
+
key = line.trim();
|
|
691
|
+
break;
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
if (key) {
|
|
696
|
+
if (provider === 'openai') {
|
|
697
|
+
AuthManager.save({ openai_api_key: key });
|
|
698
|
+
} else {
|
|
699
|
+
AuthManager.save({ anthropic_api_key: key });
|
|
700
|
+
}
|
|
701
|
+
console.log(`\n✓ Successfully saved ${provider.toUpperCase()} API key.`);
|
|
702
|
+
} else {
|
|
703
|
+
console.error('✗ No API key provided.');
|
|
704
|
+
process.exit(1);
|
|
705
|
+
}
|
|
678
706
|
} else {
|
|
679
707
|
console.error(`✗ Unsupported provider: ${provider}`);
|
|
680
708
|
process.exit(1);
|
|
@@ -702,13 +730,33 @@ auth
|
|
|
702
730
|
}
|
|
703
731
|
} else if (provider) {
|
|
704
732
|
console.log(
|
|
705
|
-
` ⊘ Not logged into GitHub. Run "keystone auth login
|
|
733
|
+
` ⊘ Not logged into GitHub. Run "keystone auth login github" to authenticate.`
|
|
734
|
+
);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
if (!provider || provider === 'openai') {
|
|
739
|
+
if (auth.openai_api_key) {
|
|
740
|
+
console.log(' ✓ OpenAI API key configured');
|
|
741
|
+
} else if (provider) {
|
|
742
|
+
console.log(
|
|
743
|
+
` ⊘ OpenAI API key not configured. Run "keystone auth login openai" to authenticate.`
|
|
744
|
+
);
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
if (!provider || provider === 'anthropic') {
|
|
749
|
+
if (auth.anthropic_api_key) {
|
|
750
|
+
console.log(' ✓ Anthropic API key configured');
|
|
751
|
+
} else if (provider) {
|
|
752
|
+
console.log(
|
|
753
|
+
` ⊘ Anthropic API key not configured. Run "keystone auth login anthropic" to authenticate.`
|
|
706
754
|
);
|
|
707
755
|
}
|
|
708
756
|
}
|
|
709
757
|
|
|
710
|
-
if (!auth.github_token && !provider) {
|
|
711
|
-
console.log(' ⊘
|
|
758
|
+
if (!auth.github_token && !auth.openai_api_key && !auth.anthropic_api_key && !provider) {
|
|
759
|
+
console.log(' ⊘ No providers configured. Run "keystone auth login" to authenticate.');
|
|
712
760
|
}
|
|
713
761
|
});
|
|
714
762
|
|
|
@@ -731,6 +779,12 @@ auth
|
|
|
731
779
|
copilot_expires_at: undefined,
|
|
732
780
|
});
|
|
733
781
|
console.log('✓ Successfully logged out of GitHub.');
|
|
782
|
+
} else if (provider === 'openai') {
|
|
783
|
+
AuthManager.save({ openai_api_key: undefined });
|
|
784
|
+
console.log('✓ Successfully cleared OpenAI API key.');
|
|
785
|
+
} else if (provider === 'anthropic') {
|
|
786
|
+
AuthManager.save({ anthropic_api_key: undefined });
|
|
787
|
+
console.log('✓ Successfully cleared Anthropic API key.');
|
|
734
788
|
} else {
|
|
735
789
|
console.error(`✗ Unknown provider: ${provider}`);
|
|
736
790
|
process.exit(1);
|
package/src/parser/schema.ts
CHANGED
|
@@ -105,17 +105,26 @@ const SleepStepSchema = BaseStepSchema.extend({
|
|
|
105
105
|
duration: z.union([z.number().int().positive(), z.string()]),
|
|
106
106
|
});
|
|
107
107
|
|
|
108
|
+
const ScriptStepSchema = BaseStepSchema.extend({
|
|
109
|
+
type: z.literal('script'),
|
|
110
|
+
run: z.string(),
|
|
111
|
+
});
|
|
112
|
+
|
|
108
113
|
// ===== Discriminated Union for Steps =====
|
|
109
114
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
115
|
+
// biome-ignore lint/suspicious/noExplicitAny: Recursive Zod type
|
|
116
|
+
export const StepSchema: z.ZodType<any> = z.lazy(() =>
|
|
117
|
+
z.discriminatedUnion('type', [
|
|
118
|
+
ShellStepSchema,
|
|
119
|
+
LlmStepSchema,
|
|
120
|
+
WorkflowStepSchema,
|
|
121
|
+
FileStepSchema,
|
|
122
|
+
RequestStepSchema,
|
|
123
|
+
HumanStepSchema,
|
|
124
|
+
SleepStepSchema,
|
|
125
|
+
ScriptStepSchema,
|
|
126
|
+
])
|
|
127
|
+
);
|
|
119
128
|
|
|
120
129
|
// ===== Workflow Schema =====
|
|
121
130
|
|
|
@@ -152,6 +161,7 @@ export type FileStep = z.infer<typeof FileStepSchema>;
|
|
|
152
161
|
export type RequestStep = z.infer<typeof RequestStepSchema>;
|
|
153
162
|
export type HumanStep = z.infer<typeof HumanStepSchema>;
|
|
154
163
|
export type SleepStep = z.infer<typeof SleepStepSchema>;
|
|
164
|
+
export type ScriptStep = z.infer<typeof ScriptStepSchema>;
|
|
155
165
|
export type Workflow = z.infer<typeof WorkflowSchema>;
|
|
156
166
|
export type AgentTool = z.infer<typeof AgentToolSchema>;
|
|
157
167
|
export type Agent = z.infer<typeof AgentSchema>;
|
|
@@ -28,7 +28,7 @@ describe('MCPServer', () => {
|
|
|
28
28
|
method: 'initialize',
|
|
29
29
|
});
|
|
30
30
|
|
|
31
|
-
expect(response
|
|
31
|
+
expect(response?.result?.serverInfo?.name).toBe('keystone-mcp');
|
|
32
32
|
});
|
|
33
33
|
|
|
34
34
|
it('should list tools', async () => {
|
|
@@ -38,9 +38,9 @@ describe('MCPServer', () => {
|
|
|
38
38
|
method: 'tools/list',
|
|
39
39
|
});
|
|
40
40
|
|
|
41
|
-
expect(response
|
|
41
|
+
expect(response?.result?.tools).toHaveLength(5);
|
|
42
42
|
// @ts-ignore
|
|
43
|
-
expect(response
|
|
43
|
+
expect(response?.result?.tools?.map((t) => t.name)).toContain('run_workflow');
|
|
44
44
|
});
|
|
45
45
|
|
|
46
46
|
it('should call list_workflows tool', async () => {
|
|
@@ -55,7 +55,7 @@ describe('MCPServer', () => {
|
|
|
55
55
|
params: { name: 'list_workflows', arguments: {} },
|
|
56
56
|
});
|
|
57
57
|
|
|
58
|
-
expect(response
|
|
58
|
+
expect(response?.result?.content?.[0]?.text).toContain('test-wf');
|
|
59
59
|
});
|
|
60
60
|
|
|
61
61
|
it('should call run_workflow tool successfully', async () => {
|
|
@@ -104,8 +104,8 @@ describe('MCPServer', () => {
|
|
|
104
104
|
},
|
|
105
105
|
});
|
|
106
106
|
|
|
107
|
-
expect(response
|
|
108
|
-
expect(response
|
|
107
|
+
expect(response?.result?.isError).toBe(true);
|
|
108
|
+
expect(response?.result?.content?.[0]?.text).toContain('Workflow failed');
|
|
109
109
|
});
|
|
110
110
|
|
|
111
111
|
it('should handle workflow suspension in run_workflow', async () => {
|
|
@@ -130,7 +130,7 @@ describe('MCPServer', () => {
|
|
|
130
130
|
},
|
|
131
131
|
});
|
|
132
132
|
|
|
133
|
-
const result = JSON.parse(response
|
|
133
|
+
const result = JSON.parse(response?.result?.content?.[0]?.text);
|
|
134
134
|
expect(result.status).toBe('paused');
|
|
135
135
|
expect(result.run_id).toBe('run123');
|
|
136
136
|
expect(result.message).toBe('Input needed');
|
|
@@ -187,7 +187,7 @@ describe('MCPServer', () => {
|
|
|
187
187
|
params: { name: 'get_run_logs', arguments: { run_id: runId } },
|
|
188
188
|
});
|
|
189
189
|
|
|
190
|
-
const summary = JSON.parse(response
|
|
190
|
+
const summary = JSON.parse(response?.result?.content?.[0]?.text);
|
|
191
191
|
expect(summary.workflow).toBe('test-wf');
|
|
192
192
|
expect(summary.steps).toHaveLength(1);
|
|
193
193
|
expect(summary.steps[0].step).toBe('s1');
|
|
@@ -202,7 +202,7 @@ describe('MCPServer', () => {
|
|
|
202
202
|
params: { name: 'unknown_tool', arguments: {} },
|
|
203
203
|
});
|
|
204
204
|
|
|
205
|
-
expect(response
|
|
205
|
+
expect(response?.error?.message).toContain('Unknown tool');
|
|
206
206
|
});
|
|
207
207
|
|
|
208
208
|
it('should handle unknown method', async () => {
|
|
@@ -212,14 +212,21 @@ describe('MCPServer', () => {
|
|
|
212
212
|
method: 'unknown_method',
|
|
213
213
|
});
|
|
214
214
|
|
|
215
|
-
expect(response
|
|
215
|
+
expect(response?.error?.message).toContain('Method not found');
|
|
216
216
|
});
|
|
217
217
|
|
|
218
218
|
it('should start and handle messages from stdin', async () => {
|
|
219
|
-
const
|
|
219
|
+
const { PassThrough } = await import('node:stream');
|
|
220
|
+
const input = new PassThrough();
|
|
221
|
+
const outputStream = new PassThrough();
|
|
222
|
+
|
|
223
|
+
// Create a new server for this test to use the streams
|
|
224
|
+
const testServer = new MCPServer(db, input, outputStream);
|
|
225
|
+
|
|
226
|
+
const writeSpy = spyOn(outputStream, 'write').mockImplementation(() => true);
|
|
220
227
|
const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
|
|
221
228
|
|
|
222
|
-
const startPromise =
|
|
229
|
+
const startPromise = testServer.start();
|
|
223
230
|
|
|
224
231
|
// Simulate stdin data
|
|
225
232
|
const message = {
|
|
@@ -227,16 +234,16 @@ describe('MCPServer', () => {
|
|
|
227
234
|
id: 9,
|
|
228
235
|
method: 'initialize',
|
|
229
236
|
};
|
|
230
|
-
|
|
237
|
+
input.write(`${JSON.stringify(message)}\n`);
|
|
231
238
|
|
|
232
239
|
// Wait for async processing
|
|
233
|
-
await new Promise((resolve) => setTimeout(resolve,
|
|
240
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
234
241
|
|
|
235
242
|
expect(writeSpy).toHaveBeenCalled();
|
|
236
243
|
const output = JSON.parse(writeSpy.mock.calls[0][0] as string);
|
|
237
244
|
expect(output.id).toBe(9);
|
|
238
245
|
|
|
239
|
-
|
|
246
|
+
input.end();
|
|
240
247
|
await startPromise;
|
|
241
248
|
|
|
242
249
|
writeSpy.mockRestore();
|
package/src/runner/mcp-server.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as readline from 'node:readline';
|
|
2
|
+
import type { Readable, Writable } from 'node:stream';
|
|
2
3
|
import pkg from '../../package.json' with { type: 'json' };
|
|
3
4
|
import { WorkflowDb } from '../db/workflow-db';
|
|
4
5
|
import { WorkflowParser } from '../parser/workflow-parser';
|
|
@@ -16,14 +17,18 @@ interface MCPMessage {
|
|
|
16
17
|
|
|
17
18
|
export class MCPServer {
|
|
18
19
|
private db: WorkflowDb;
|
|
20
|
+
private input: Readable;
|
|
21
|
+
private output: Writable;
|
|
19
22
|
|
|
20
|
-
constructor(db?: WorkflowDb) {
|
|
23
|
+
constructor(db?: WorkflowDb, input: Readable = process.stdin, output: Writable = process.stdout) {
|
|
21
24
|
this.db = db || new WorkflowDb();
|
|
25
|
+
this.input = input;
|
|
26
|
+
this.output = output;
|
|
22
27
|
}
|
|
23
28
|
|
|
24
29
|
async start() {
|
|
25
30
|
const rl = readline.createInterface({
|
|
26
|
-
input:
|
|
31
|
+
input: this.input,
|
|
27
32
|
terminal: false,
|
|
28
33
|
});
|
|
29
34
|
|
|
@@ -35,7 +40,7 @@ export class MCPServer {
|
|
|
35
40
|
const message = JSON.parse(line) as MCPMessage;
|
|
36
41
|
const response = await this.handleMessage(message);
|
|
37
42
|
if (response) {
|
|
38
|
-
|
|
43
|
+
this.output.write(`${JSON.stringify(response)}\n`);
|
|
39
44
|
}
|
|
40
45
|
} catch (error) {
|
|
41
46
|
console.error('Error handling MCP message:', error);
|
|
@@ -46,6 +51,11 @@ export class MCPServer {
|
|
|
46
51
|
this.stop();
|
|
47
52
|
resolve();
|
|
48
53
|
});
|
|
54
|
+
|
|
55
|
+
// Handle stream errors
|
|
56
|
+
this.input.on('error', (err: Error) => {
|
|
57
|
+
console.error('stdin error:', err);
|
|
58
|
+
});
|
|
49
59
|
});
|
|
50
60
|
}
|
|
51
61
|
|
|
@@ -333,7 +343,14 @@ export class MCPServer {
|
|
|
333
343
|
}
|
|
334
344
|
|
|
335
345
|
// Fulfill the step in the DB
|
|
336
|
-
|
|
346
|
+
let output: unknown = input;
|
|
347
|
+
const lowerInput = input.trim().toLowerCase();
|
|
348
|
+
if (lowerInput === 'confirm' || lowerInput === 'y' || lowerInput === 'yes' || lowerInput === '') {
|
|
349
|
+
output = true;
|
|
350
|
+
} else if (lowerInput === 'n' || lowerInput === 'no') {
|
|
351
|
+
output = false;
|
|
352
|
+
}
|
|
353
|
+
|
|
337
354
|
await this.db.completeStep(pendingStep.id, 'success', output);
|
|
338
355
|
|
|
339
356
|
// Resume the workflow
|
|
@@ -34,7 +34,7 @@ interface RequestOutput {
|
|
|
34
34
|
// Mock node:readline/promises
|
|
35
35
|
const mockRl = {
|
|
36
36
|
question: mock(() => Promise.resolve('')),
|
|
37
|
-
close: mock(() => {}),
|
|
37
|
+
close: mock(() => { }),
|
|
38
38
|
};
|
|
39
39
|
|
|
40
40
|
mock.module('node:readline/promises', () => ({
|
|
@@ -49,13 +49,13 @@ describe('step-executor', () => {
|
|
|
49
49
|
beforeAll(() => {
|
|
50
50
|
try {
|
|
51
51
|
mkdirSync(tempDir, { recursive: true });
|
|
52
|
-
} catch (e) {}
|
|
52
|
+
} catch (e) { }
|
|
53
53
|
});
|
|
54
54
|
|
|
55
55
|
afterAll(() => {
|
|
56
56
|
try {
|
|
57
57
|
rmSync(tempDir, { recursive: true, force: true });
|
|
58
|
-
} catch (e) {}
|
|
58
|
+
} catch (e) { }
|
|
59
59
|
});
|
|
60
60
|
|
|
61
61
|
beforeEach(() => {
|
|
@@ -330,7 +330,7 @@ describe('step-executor', () => {
|
|
|
330
330
|
};
|
|
331
331
|
|
|
332
332
|
// @ts-ignore
|
|
333
|
-
const result = await executeStep(step, context, { log: () => {} });
|
|
333
|
+
const result = await executeStep(step, context, { log: () => { } });
|
|
334
334
|
expect(result.status).toBe('success');
|
|
335
335
|
expect(result.output).toBe(true);
|
|
336
336
|
expect(mockRl.question).toHaveBeenCalled();
|
|
@@ -347,11 +347,54 @@ describe('step-executor', () => {
|
|
|
347
347
|
};
|
|
348
348
|
|
|
349
349
|
// @ts-ignore
|
|
350
|
-
const result = await executeStep(step, context, { log: () => {} });
|
|
350
|
+
const result = await executeStep(step, context, { log: () => { } });
|
|
351
351
|
expect(result.status).toBe('success');
|
|
352
352
|
expect(result.output).toBe('user response');
|
|
353
353
|
});
|
|
354
354
|
|
|
355
|
+
it('should handle human confirmation (yes/no/empty)', async () => {
|
|
356
|
+
const step: HumanStep = {
|
|
357
|
+
id: 'h1',
|
|
358
|
+
type: 'human',
|
|
359
|
+
message: 'Proceed?',
|
|
360
|
+
inputType: 'confirm',
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
// Test 'yes'
|
|
364
|
+
mockRl.question.mockResolvedValue('yes');
|
|
365
|
+
// @ts-ignore
|
|
366
|
+
let result = await executeStep(step, context, { log: () => { } });
|
|
367
|
+
expect(result.output).toBe(true);
|
|
368
|
+
|
|
369
|
+
// Test 'no'
|
|
370
|
+
mockRl.question.mockResolvedValue('no');
|
|
371
|
+
// @ts-ignore
|
|
372
|
+
result = await executeStep(step, context, { log: () => { } });
|
|
373
|
+
expect(result.output).toBe(false);
|
|
374
|
+
|
|
375
|
+
// Test empty string (default to true)
|
|
376
|
+
mockRl.question.mockResolvedValue('');
|
|
377
|
+
// @ts-ignore
|
|
378
|
+
result = await executeStep(step, context, { log: () => { } });
|
|
379
|
+
expect(result.output).toBe(true);
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
it('should fallback to text in confirm mode', async () => {
|
|
383
|
+
mockRl.question.mockResolvedValue('some custom response');
|
|
384
|
+
|
|
385
|
+
const step: HumanStep = {
|
|
386
|
+
id: 'h1',
|
|
387
|
+
type: 'human',
|
|
388
|
+
message: 'Proceed?',
|
|
389
|
+
inputType: 'confirm',
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
// @ts-ignore
|
|
393
|
+
const result = await executeStep(step, context, { log: () => { } });
|
|
394
|
+
expect(result.status).toBe('success');
|
|
395
|
+
expect(result.output).toBe('some custom response');
|
|
396
|
+
});
|
|
397
|
+
|
|
355
398
|
it('should suspend if not a TTY', async () => {
|
|
356
399
|
process.stdin.isTTY = false;
|
|
357
400
|
|
|
@@ -363,7 +406,7 @@ describe('step-executor', () => {
|
|
|
363
406
|
};
|
|
364
407
|
|
|
365
408
|
// @ts-ignore
|
|
366
|
-
const result = await executeStep(step, context, { log: () => {} });
|
|
409
|
+
const result = await executeStep(step, context, { log: () => { } });
|
|
367
410
|
expect(result.status).toBe('suspended');
|
|
368
411
|
expect(result.error).toBe('Proceed?');
|
|
369
412
|
});
|
|
@@ -5,6 +5,7 @@ import type {
|
|
|
5
5
|
FileStep,
|
|
6
6
|
HumanStep,
|
|
7
7
|
RequestStep,
|
|
8
|
+
ScriptStep,
|
|
8
9
|
ShellStep,
|
|
9
10
|
SleepStep,
|
|
10
11
|
Step,
|
|
@@ -14,6 +15,7 @@ import { executeShell } from './shell-executor.ts';
|
|
|
14
15
|
import type { Logger } from './workflow-runner.ts';
|
|
15
16
|
|
|
16
17
|
import * as readline from 'node:readline/promises';
|
|
18
|
+
import { SafeSandbox } from '../utils/sandbox.ts';
|
|
17
19
|
import { executeLlmStep } from './llm-executor.ts';
|
|
18
20
|
import type { MCPManager } from './mcp-manager.ts';
|
|
19
21
|
|
|
@@ -79,6 +81,9 @@ export async function executeStep(
|
|
|
79
81
|
}
|
|
80
82
|
result = await executeWorkflowFn(step, context);
|
|
81
83
|
break;
|
|
84
|
+
case 'script':
|
|
85
|
+
result = await executeScriptStep(step, context, logger);
|
|
86
|
+
break;
|
|
82
87
|
default:
|
|
83
88
|
throw new Error(`Unknown step type: ${(step as Step).type}`);
|
|
84
89
|
}
|
|
@@ -324,10 +329,25 @@ async function executeHumanStep(
|
|
|
324
329
|
try {
|
|
325
330
|
if (step.inputType === 'confirm') {
|
|
326
331
|
logger.log(`\n❓ ${message}`);
|
|
327
|
-
const answer = await rl.question('
|
|
328
|
-
|
|
332
|
+
const answer = (await rl.question('Response (Y/n/text): ')).trim();
|
|
333
|
+
|
|
334
|
+
const lowerAnswer = answer.toLowerCase();
|
|
335
|
+
if (lowerAnswer === '' || lowerAnswer === 'y' || lowerAnswer === 'yes') {
|
|
336
|
+
return {
|
|
337
|
+
output: true,
|
|
338
|
+
status: 'success',
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
if (lowerAnswer === 'n' || lowerAnswer === 'no') {
|
|
342
|
+
return {
|
|
343
|
+
output: false,
|
|
344
|
+
status: 'success',
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// Fallback to text if it's not a clear yes/no
|
|
329
349
|
return {
|
|
330
|
-
output:
|
|
350
|
+
output: answer,
|
|
331
351
|
status: 'success',
|
|
332
352
|
};
|
|
333
353
|
}
|
|
@@ -367,3 +387,31 @@ async function executeSleepStep(
|
|
|
367
387
|
status: 'success',
|
|
368
388
|
};
|
|
369
389
|
}
|
|
390
|
+
/**
|
|
391
|
+
* Execute a script step in a safe sandbox
|
|
392
|
+
*/
|
|
393
|
+
async function executeScriptStep(
|
|
394
|
+
step: ScriptStep,
|
|
395
|
+
context: ExpressionContext,
|
|
396
|
+
_logger: Logger
|
|
397
|
+
): Promise<StepResult> {
|
|
398
|
+
try {
|
|
399
|
+
const result = await SafeSandbox.execute(step.run, {
|
|
400
|
+
inputs: context.inputs,
|
|
401
|
+
secrets: context.secrets,
|
|
402
|
+
steps: context.steps,
|
|
403
|
+
env: context.env,
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
return {
|
|
407
|
+
output: result,
|
|
408
|
+
status: 'success',
|
|
409
|
+
};
|
|
410
|
+
} catch (error) {
|
|
411
|
+
return {
|
|
412
|
+
output: null,
|
|
413
|
+
status: 'failed',
|
|
414
|
+
error: error instanceof Error ? error.message : String(error),
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
}
|
|
@@ -25,7 +25,7 @@ class RedactingLogger implements Logger {
|
|
|
25
25
|
constructor(
|
|
26
26
|
private inner: Logger,
|
|
27
27
|
private redactor: Redactor
|
|
28
|
-
) {}
|
|
28
|
+
) { }
|
|
29
29
|
|
|
30
30
|
log(msg: string): void {
|
|
31
31
|
this.inner.log(this.redactor.redact(msg));
|
|
@@ -53,7 +53,7 @@ export interface RunOptions {
|
|
|
53
53
|
export interface StepContext {
|
|
54
54
|
output?: unknown;
|
|
55
55
|
outputs?: Record<string, unknown>;
|
|
56
|
-
status: 'success' | 'failed' | 'skipped';
|
|
56
|
+
status: 'success' | 'failed' | 'skipped' | 'pending' | 'suspended';
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
// Type for foreach results - wraps array to ensure JSON serialization preserves all properties
|
|
@@ -196,7 +196,7 @@ export class WorkflowRunner {
|
|
|
196
196
|
items[exec.iteration_index] = {
|
|
197
197
|
output: null,
|
|
198
198
|
outputs: {},
|
|
199
|
-
status: exec.status as 'failed' | '
|
|
199
|
+
status: exec.status as 'failed' | 'pending' | 'success' | 'skipped' | 'suspended',
|
|
200
200
|
};
|
|
201
201
|
}
|
|
202
202
|
}
|
|
@@ -305,9 +305,37 @@ export class WorkflowRunner {
|
|
|
305
305
|
private loadSecrets(): Record<string, string> {
|
|
306
306
|
const secrets: Record<string, string> = {};
|
|
307
307
|
|
|
308
|
+
// Common non-secret environment variables to exclude from redaction
|
|
309
|
+
const blocklist = new Set([
|
|
310
|
+
'USER',
|
|
311
|
+
'PATH',
|
|
312
|
+
'SHELL',
|
|
313
|
+
'HOME',
|
|
314
|
+
'PWD',
|
|
315
|
+
'LOGNAME',
|
|
316
|
+
'LANG',
|
|
317
|
+
'TERM',
|
|
318
|
+
'EDITOR',
|
|
319
|
+
'VISUAL',
|
|
320
|
+
'_',
|
|
321
|
+
'SHLVL',
|
|
322
|
+
'LC_ALL',
|
|
323
|
+
'OLDPWD',
|
|
324
|
+
'DISPLAY',
|
|
325
|
+
'TMPDIR',
|
|
326
|
+
'SSH_AUTH_SOCK',
|
|
327
|
+
'XPC_FLAGS',
|
|
328
|
+
'XPC_SERVICE_NAME',
|
|
329
|
+
'ITERM_SESSION_ID',
|
|
330
|
+
'ITERM_PROFILE',
|
|
331
|
+
'TERM_PROGRAM',
|
|
332
|
+
'TERM_PROGRAM_VERSION',
|
|
333
|
+
'COLORTERM',
|
|
334
|
+
]);
|
|
335
|
+
|
|
308
336
|
// Bun automatically loads .env file
|
|
309
337
|
for (const [key, value] of Object.entries(Bun.env)) {
|
|
310
|
-
if (value) {
|
|
338
|
+
if (value && !blocklist.has(key)) {
|
|
311
339
|
secrets[key] = value;
|
|
312
340
|
}
|
|
313
341
|
}
|
|
@@ -485,11 +513,7 @@ export class WorkflowRunner {
|
|
|
485
513
|
return result;
|
|
486
514
|
}
|
|
487
515
|
|
|
488
|
-
|
|
489
|
-
const redactedOutput = this.redactor.redactValue(result.output);
|
|
490
|
-
const redactedError = result.error ? this.redactor.redact(result.error) : undefined;
|
|
491
|
-
|
|
492
|
-
await this.db.completeStep(stepExecId, result.status, redactedOutput, redactedError);
|
|
516
|
+
await this.db.completeStep(stepExecId, result.status, result.output, result.error);
|
|
493
517
|
|
|
494
518
|
// Ensure outputs is always an object for consistent access
|
|
495
519
|
let outputs: Record<string, unknown>;
|
|
@@ -621,6 +645,7 @@ export class WorkflowRunner {
|
|
|
621
645
|
|
|
622
646
|
// Execute and store result at correct index
|
|
623
647
|
try {
|
|
648
|
+
this.logger.log(` ⤷ [${i + 1}/${items.length}] Executing iteration...`);
|
|
624
649
|
itemResults[i] = await this.executeStepInternal(step, itemContext, stepExecId);
|
|
625
650
|
if (itemResults[i].status === 'failed') {
|
|
626
651
|
aborted = true;
|
|
@@ -760,7 +785,7 @@ export class WorkflowRunner {
|
|
|
760
785
|
this.logger.log(`Run ID: ${this.runId}`);
|
|
761
786
|
this.logger.log(
|
|
762
787
|
'\n⚠️ Security Warning: Only run workflows from trusted sources.\n' +
|
|
763
|
-
|
|
788
|
+
' Workflows can execute arbitrary shell commands and access your environment.\n'
|
|
764
789
|
);
|
|
765
790
|
|
|
766
791
|
// Apply defaults and validate inputs
|
|
@@ -787,8 +812,7 @@ export class WorkflowRunner {
|
|
|
787
812
|
this.logger.log('All steps already completed. Nothing to resume.\n');
|
|
788
813
|
// Evaluate outputs from completed state
|
|
789
814
|
const outputs = this.evaluateOutputs();
|
|
790
|
-
|
|
791
|
-
await this.db.updateRunStatus(this.runId, 'completed', redactedOutputs);
|
|
815
|
+
await this.db.updateRunStatus(this.runId, 'completed', outputs);
|
|
792
816
|
this.logger.log('✨ Workflow already completed!\n');
|
|
793
817
|
return outputs;
|
|
794
818
|
}
|
|
@@ -799,6 +823,9 @@ export class WorkflowRunner {
|
|
|
799
823
|
|
|
800
824
|
this.logger.log(`Execution order: ${executionOrder.join(' → ')}\n`);
|
|
801
825
|
|
|
826
|
+
const totalSteps = executionOrder.length;
|
|
827
|
+
const stepIndices = new Map(executionOrder.map((id, index) => [id, index + 1]));
|
|
828
|
+
|
|
802
829
|
// Execute steps in parallel where possible (respecting dependencies)
|
|
803
830
|
const pendingSteps = new Set(remainingSteps);
|
|
804
831
|
const runningPromises = new Map<string, Promise<void>>();
|
|
@@ -811,18 +838,21 @@ export class WorkflowRunner {
|
|
|
811
838
|
if (!step) {
|
|
812
839
|
throw new Error(`Step ${stepId} not found in workflow`);
|
|
813
840
|
}
|
|
814
|
-
const dependenciesMet = step.needs.every((dep) => completedSteps.has(dep));
|
|
841
|
+
const dependenciesMet = step.needs.every((dep: string) => completedSteps.has(dep));
|
|
815
842
|
|
|
816
843
|
if (dependenciesMet) {
|
|
817
844
|
pendingSteps.delete(stepId);
|
|
818
845
|
|
|
819
846
|
// Start execution
|
|
820
|
-
|
|
847
|
+
const stepIndex = stepIndices.get(stepId);
|
|
848
|
+
this.logger.log(
|
|
849
|
+
`[${stepIndex}/${totalSteps}] ▶ Executing step: ${step.id} (${step.type})`
|
|
850
|
+
);
|
|
821
851
|
const promise = this.executeStepWithForeach(step)
|
|
822
852
|
.then(() => {
|
|
823
853
|
completedSteps.add(stepId);
|
|
824
854
|
runningPromises.delete(stepId);
|
|
825
|
-
this.logger.log(`
|
|
855
|
+
this.logger.log(`[${stepIndex}/${totalSteps}] ✓ Step ${step.id} completed\n`);
|
|
826
856
|
})
|
|
827
857
|
.catch((err) => {
|
|
828
858
|
runningPromises.delete(stepId);
|
|
@@ -857,11 +887,8 @@ export class WorkflowRunner {
|
|
|
857
887
|
// Evaluate outputs
|
|
858
888
|
const outputs = this.evaluateOutputs();
|
|
859
889
|
|
|
860
|
-
// Redact secrets from outputs before storing
|
|
861
|
-
const redactedOutputs = this.redactor.redactValue(outputs) as Record<string, unknown>;
|
|
862
|
-
|
|
863
890
|
// Mark run as complete
|
|
864
|
-
await this.db.updateRunStatus(this.runId, 'completed',
|
|
891
|
+
await this.db.updateRunStatus(this.runId, 'completed', outputs);
|
|
865
892
|
|
|
866
893
|
this.logger.log('✨ Workflow completed successfully!\n');
|
|
867
894
|
|
|
@@ -900,6 +927,8 @@ export class WorkflowRunner {
|
|
|
900
927
|
const completedFinallySteps = new Set<string>();
|
|
901
928
|
const pendingFinallySteps = new Set(this.workflow.finally.map((s) => s.id));
|
|
902
929
|
const runningPromises = new Map<string, Promise<void>>();
|
|
930
|
+
const totalFinallySteps = this.workflow.finally.length;
|
|
931
|
+
const finallyStepIndices = new Map(this.workflow.finally.map((s, index) => [s.id, index + 1]));
|
|
903
932
|
|
|
904
933
|
try {
|
|
905
934
|
while (pendingFinallySteps.size > 0 || runningPromises.size > 0) {
|
|
@@ -909,18 +938,23 @@ export class WorkflowRunner {
|
|
|
909
938
|
|
|
910
939
|
// Dependencies can be from main steps (already in this.stepContexts) or previous finally steps
|
|
911
940
|
const dependenciesMet = step.needs.every(
|
|
912
|
-
(dep) => this.stepContexts.has(dep) || completedFinallySteps.has(dep)
|
|
941
|
+
(dep: string) => this.stepContexts.has(dep) || completedFinallySteps.has(dep)
|
|
913
942
|
);
|
|
914
943
|
|
|
915
944
|
if (dependenciesMet) {
|
|
916
945
|
pendingFinallySteps.delete(stepId);
|
|
917
946
|
|
|
918
|
-
|
|
947
|
+
const finallyStepIndex = finallyStepIndices.get(stepId);
|
|
948
|
+
this.logger.log(
|
|
949
|
+
`[${finallyStepIndex}/${totalFinallySteps}] ▶ Executing finally step: ${step.id} (${step.type})`
|
|
950
|
+
);
|
|
919
951
|
const promise = this.executeStepWithForeach(step)
|
|
920
952
|
.then(() => {
|
|
921
953
|
completedFinallySteps.add(stepId);
|
|
922
954
|
runningPromises.delete(stepId);
|
|
923
|
-
this.logger.log(
|
|
955
|
+
this.logger.log(
|
|
956
|
+
`[${finallyStepIndex}/${totalFinallySteps}] ✓ Finally step ${step.id} completed\n`
|
|
957
|
+
);
|
|
924
958
|
})
|
|
925
959
|
.catch((err) => {
|
|
926
960
|
runningPromises.delete(stepId);
|
|
@@ -15,13 +15,15 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
|
|
|
15
15
|
- **outputs**: Map of expressions (e.g., `${{ steps.id.output }}`) under the `outputs` key.
|
|
16
16
|
- **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
|
|
17
17
|
- **shell**: `{ id, type: 'shell', run, dir, env, transform }`
|
|
18
|
-
- **llm**: `{ id, type: 'llm', agent, prompt, schema }`
|
|
18
|
+
- **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, mcpServers }`
|
|
19
19
|
- **workflow**: `{ id, type: 'workflow', path, inputs }`
|
|
20
20
|
- **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
|
|
21
21
|
- **request**: `{ id, type: 'request', url, method, body, headers }`
|
|
22
|
-
- **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }`
|
|
22
|
+
- **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
|
|
23
23
|
- **sleep**: `{ id, type: 'sleep', duration }`
|
|
24
|
-
- **
|
|
24
|
+
- **script**: `{ id, type: 'script', run }` (Executes JS in a secure sandbox)
|
|
25
|
+
- **Common Step Fields**: `needs` (array of IDs), `if` (expression), `retry`, `foreach`, `concurrency`, `transform`.
|
|
26
|
+
- **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
|
|
25
27
|
- **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
|
|
26
28
|
|
|
27
29
|
## Agent Schema (.md)
|
|
@@ -38,6 +40,13 @@ Markdown files with YAML frontmatter:
|
|
|
38
40
|
- `${{ args.paramName }}` (used inside agent tools)
|
|
39
41
|
- Standard JS-like expressions: `${{ steps.count > 0 ? 'yes' : 'no' }}`
|
|
40
42
|
|
|
43
|
+
# Guidelines
|
|
44
|
+
- **User Interaction**: Use `human` steps when user input or approval is needed.
|
|
45
|
+
- **Error Handling**: Use `retry` for flaky operations and `finally` for cleanup (e.g., removing temp files).
|
|
46
|
+
- **Custom Logic**: Use `script` steps for data manipulation that is too complex for expressions.
|
|
47
|
+
- **Agent Collaboration**: Create specialized agents for complex sub-tasks and coordinate them via `llm` steps.
|
|
48
|
+
- **Discovery**: Use `mcpServers` in `llm` steps when the agent needs access to external tools or systems.
|
|
49
|
+
|
|
41
50
|
# Output Instructions
|
|
42
51
|
When asked to design a feature:
|
|
43
52
|
1. Provide the necessary Keystone files (Workflows and Agents).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { describe, expect, it
|
|
1
|
+
import { describe, expect, it } from 'bun:test';
|
|
2
2
|
import type { Workflow } from '../parser/schema';
|
|
3
|
-
import { generateMermaidGraph,
|
|
3
|
+
import { generateMermaidGraph, renderWorkflowAsAscii } from './mermaid';
|
|
4
4
|
|
|
5
5
|
describe('mermaid', () => {
|
|
6
6
|
it('should generate a mermaid graph from a workflow', () => {
|
|
@@ -16,7 +16,7 @@ describe('mermaid', () => {
|
|
|
16
16
|
const graph = generateMermaidGraph(workflow);
|
|
17
17
|
expect(graph).toContain('graph TD');
|
|
18
18
|
expect(graph).toContain('s1["s1\\n(shell)"]:::shell');
|
|
19
|
-
expect(graph).toContain('s2["s2\\n🤖 my-agent"]:::ai');
|
|
19
|
+
expect(graph).toContain('s2["s2\\n🤖 my-agent\\n(llm)"]:::ai');
|
|
20
20
|
expect(graph).toContain('s3["s3\\n(human)\\n❓ Conditional"]:::human');
|
|
21
21
|
expect(graph).toContain('s1 --> s2');
|
|
22
22
|
expect(graph).toContain('s2 --> s3');
|
|
@@ -31,45 +31,21 @@ describe('mermaid', () => {
|
|
|
31
31
|
expect(graph).toContain('(📚 Loop)');
|
|
32
32
|
});
|
|
33
33
|
|
|
34
|
-
it('should render
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
)
|
|
43
|
-
);
|
|
44
|
-
|
|
45
|
-
const result = await renderMermaidAsAscii('graph TD\n A --> B');
|
|
46
|
-
expect(result).toBe('ascii graph');
|
|
47
|
-
|
|
48
|
-
global.fetch = originalFetch;
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
it('should return null if API returns error', async () => {
|
|
52
|
-
const fetchSpy = spyOn(global, 'fetch').mockResolvedValue(
|
|
53
|
-
new Response('Error', { status: 500 })
|
|
54
|
-
);
|
|
55
|
-
const result = await renderMermaidAsAscii('graph TD; A-->B');
|
|
56
|
-
expect(result).toBeNull();
|
|
57
|
-
fetchSpy.mockRestore();
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
it('should return null if API returns failure message', async () => {
|
|
61
|
-
const fetchSpy = spyOn(global, 'fetch').mockResolvedValue(
|
|
62
|
-
new Response('Failed to render diagram', { status: 200 })
|
|
63
|
-
);
|
|
64
|
-
const result = await renderMermaidAsAscii('graph TD; A-->B');
|
|
65
|
-
expect(result).toBeNull();
|
|
66
|
-
fetchSpy.mockRestore();
|
|
67
|
-
});
|
|
34
|
+
it('should render workflow as ascii', () => {
|
|
35
|
+
const workflow: Workflow = {
|
|
36
|
+
name: 'test',
|
|
37
|
+
steps: [
|
|
38
|
+
{ id: 's1', type: 'shell', run: 'echo 1', needs: [] },
|
|
39
|
+
{ id: 's2', type: 'llm', agent: 'my-agent', prompt: 'hi', needs: ['s1'] },
|
|
40
|
+
],
|
|
41
|
+
} as unknown as Workflow;
|
|
68
42
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
expect(
|
|
73
|
-
|
|
43
|
+
const ascii = renderWorkflowAsAscii(workflow);
|
|
44
|
+
expect(ascii).toBeDefined();
|
|
45
|
+
expect(ascii).toContain('s1');
|
|
46
|
+
expect(ascii).toContain('s2 (AI: my-agent)');
|
|
47
|
+
expect(ascii).toContain('|');
|
|
48
|
+
expect(ascii).toContain('-');
|
|
49
|
+
expect(ascii).toContain('>');
|
|
74
50
|
});
|
|
75
51
|
});
|
package/src/utils/mermaid.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import dagre from 'dagre';
|
|
1
2
|
import type { Workflow } from '../parser/schema';
|
|
2
3
|
|
|
3
4
|
export function generateMermaidGraph(workflow: Workflow): string {
|
|
@@ -12,7 +13,7 @@ export function generateMermaidGraph(workflow: Workflow): string {
|
|
|
12
13
|
let label = `${step.id}\\n(${step.type})`;
|
|
13
14
|
|
|
14
15
|
// Add specific details based on type
|
|
15
|
-
if (step.type === 'llm') label = `${step.id}\\n🤖 ${step.agent}`;
|
|
16
|
+
if (step.type === 'llm') label = `${step.id}\\n🤖 ${step.agent}\\n(${step.type})`;
|
|
16
17
|
if (step.foreach) label += '\\n(📚 Loop)';
|
|
17
18
|
if (step.if) label += '\\n❓ Conditional';
|
|
18
19
|
|
|
@@ -59,29 +60,162 @@ export function generateMermaidGraph(workflow: Workflow): string {
|
|
|
59
60
|
}
|
|
60
61
|
|
|
61
62
|
/**
|
|
62
|
-
* Renders a
|
|
63
|
+
* Renders a workflow as a local ASCII graph using dagre for layout.
|
|
63
64
|
*/
|
|
64
|
-
export async function renderMermaidAsAscii(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
65
|
+
export async function renderMermaidAsAscii(_mermaid: string): Promise<string | null> {
|
|
66
|
+
// We no longer use the mermaid string for ASCII, we use the workflow object directly.
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function renderWorkflowAsAscii(workflow: Workflow): string {
|
|
71
|
+
const g = new dagre.graphlib.Graph();
|
|
72
|
+
g.setGraph({ rankdir: 'LR', nodesep: 2, edgesep: 1, ranksep: 4 });
|
|
73
|
+
g.setDefaultEdgeLabel(() => ({}));
|
|
74
|
+
|
|
75
|
+
const nodeWidth = 24;
|
|
76
|
+
const nodeHeight = 3;
|
|
77
|
+
|
|
78
|
+
for (const step of workflow.steps) {
|
|
79
|
+
let label = `${step.id} (${step.type})`;
|
|
80
|
+
if (step.type === 'llm') label = `${step.id} (AI: ${step.agent})`;
|
|
81
|
+
|
|
82
|
+
if (step.if) label = `IF ${label}`;
|
|
83
|
+
if (step.foreach) label = `LOOP ${label}`;
|
|
84
|
+
|
|
85
|
+
const width = Math.max(nodeWidth, label.length + 4);
|
|
86
|
+
g.setNode(step.id, { label, width, height: nodeHeight });
|
|
87
|
+
|
|
88
|
+
if (step.needs) {
|
|
89
|
+
for (const need of step.needs) {
|
|
90
|
+
g.setEdge(need, step.id);
|
|
91
|
+
}
|
|
76
92
|
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
dagre.layout(g);
|
|
96
|
+
|
|
97
|
+
// Canvas dimensions
|
|
98
|
+
let minX = Number.POSITIVE_INFINITY;
|
|
99
|
+
let minY = Number.POSITIVE_INFINITY;
|
|
100
|
+
let maxX = Number.NEGATIVE_INFINITY;
|
|
101
|
+
let maxY = Number.NEGATIVE_INFINITY;
|
|
102
|
+
|
|
103
|
+
for (const v of g.nodes()) {
|
|
104
|
+
const node = g.node(v);
|
|
105
|
+
minX = Math.min(minX, node.x - node.width / 2);
|
|
106
|
+
minY = Math.min(minY, node.y - node.height / 2);
|
|
107
|
+
maxX = Math.max(maxX, node.x + node.width / 2);
|
|
108
|
+
maxY = Math.max(maxY, node.y + node.height / 2);
|
|
109
|
+
}
|
|
77
110
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
111
|
+
for (const e of g.edges()) {
|
|
112
|
+
const edge = g.edge(e);
|
|
113
|
+
for (const p of edge.points) {
|
|
114
|
+
minX = Math.min(minX, p.x);
|
|
115
|
+
minY = Math.min(minY, p.y);
|
|
116
|
+
maxX = Math.max(maxX, p.x);
|
|
117
|
+
maxY = Math.max(maxY, p.y);
|
|
81
118
|
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const canvasWidth = Math.ceil(maxX - minX) + 10;
|
|
122
|
+
const canvasHeight = Math.ceil(maxY - minY) + 4;
|
|
123
|
+
const canvas = Array.from({ length: canvasHeight }, () => Array(canvasWidth).fill(' '));
|
|
82
124
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
125
|
+
const offsetX = Math.floor(-minX) + 2;
|
|
126
|
+
const offsetY = Math.floor(-minY) + 1;
|
|
127
|
+
|
|
128
|
+
// Helper to draw at coordinates
|
|
129
|
+
const draw = (x: number, y: number, char: string) => {
|
|
130
|
+
const ix = Math.floor(x) + offsetX;
|
|
131
|
+
const iy = Math.floor(y) + offsetY;
|
|
132
|
+
if (iy >= 0 && iy < canvas.length && ix >= 0 && ix < canvas[0].length) {
|
|
133
|
+
canvas[iy][ix] = char;
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
const drawText = (x: number, y: number, text: string) => {
|
|
138
|
+
const startX = Math.floor(x);
|
|
139
|
+
const startY = Math.floor(y);
|
|
140
|
+
for (let i = 0; i < text.length; i++) {
|
|
141
|
+
draw(startX + i, startY, text[i]);
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
// Draw Nodes
|
|
146
|
+
for (const v of g.nodes()) {
|
|
147
|
+
const node = g.node(v);
|
|
148
|
+
const x = node.x - node.width / 2;
|
|
149
|
+
const y = node.y - node.height / 2;
|
|
150
|
+
const w = node.width;
|
|
151
|
+
const h = node.height;
|
|
152
|
+
|
|
153
|
+
const startX = Math.floor(x);
|
|
154
|
+
const startY = Math.floor(y);
|
|
155
|
+
const endX = startX + Math.floor(w) - 1;
|
|
156
|
+
const endY = startY + Math.floor(h) - 1;
|
|
157
|
+
|
|
158
|
+
for (let i = startX; i <= endX; i++) {
|
|
159
|
+
draw(i, startY, '-');
|
|
160
|
+
draw(i, endY, '-');
|
|
161
|
+
}
|
|
162
|
+
for (let i = startY; i <= endY; i++) {
|
|
163
|
+
draw(startX, i, '|');
|
|
164
|
+
draw(endX, i, '|');
|
|
165
|
+
}
|
|
166
|
+
draw(startX, startY, '+');
|
|
167
|
+
draw(endX, startY, '+');
|
|
168
|
+
draw(startX, endY, '+');
|
|
169
|
+
draw(endX, endY, '+');
|
|
170
|
+
|
|
171
|
+
const labelX = x + Math.floor((w - (node.label?.length || 0)) / 2);
|
|
172
|
+
const labelY = y + Math.floor(h / 2);
|
|
173
|
+
drawText(labelX, labelY, node.label || '');
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Draw Edges
|
|
177
|
+
for (const e of g.edges()) {
|
|
178
|
+
const edge = g.edge(e);
|
|
179
|
+
const points = edge.points;
|
|
180
|
+
|
|
181
|
+
for (let i = 0; i < points.length - 1; i++) {
|
|
182
|
+
const p1 = points[i];
|
|
183
|
+
const p2 = points[i + 1];
|
|
184
|
+
|
|
185
|
+
const x1 = Math.floor(p1.x);
|
|
186
|
+
const y1 = Math.floor(p1.y);
|
|
187
|
+
const x2 = Math.floor(p2.x);
|
|
188
|
+
const y2 = Math.floor(p2.y);
|
|
189
|
+
|
|
190
|
+
if (x1 === x2) {
|
|
191
|
+
for (let y = Math.min(y1, y2); y <= Math.max(y1, y2); y++) draw(x1, y, '|');
|
|
192
|
+
} else if (y1 === y2) {
|
|
193
|
+
for (let x = Math.min(x1, x2); x <= Math.max(x1, x2); x++) draw(x, y1, '-');
|
|
194
|
+
} else {
|
|
195
|
+
const xStep = x2 > x1 ? 1 : -1;
|
|
196
|
+
const yStep = y2 > y1 ? 1 : -1;
|
|
197
|
+
|
|
198
|
+
if (x1 !== x2) {
|
|
199
|
+
for (let x = x1; x !== x2; x += xStep) {
|
|
200
|
+
draw(x, y1, '-');
|
|
201
|
+
}
|
|
202
|
+
draw(x2, y1, '+');
|
|
203
|
+
}
|
|
204
|
+
if (y1 !== y2) {
|
|
205
|
+
for (let y = y1 + yStep; y !== y2; y += yStep) {
|
|
206
|
+
draw(x2, y, '|');
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const lastPoint = points[points.length - 1];
|
|
213
|
+
const prevPoint = points[points.length - 2];
|
|
214
|
+
if (lastPoint.x > prevPoint.x) draw(lastPoint.x, lastPoint.y, '>');
|
|
215
|
+
else if (lastPoint.x < prevPoint.x) draw(lastPoint.x, lastPoint.y, '<');
|
|
216
|
+
else if (lastPoint.y > prevPoint.y) draw(lastPoint.x, lastPoint.y, 'v');
|
|
217
|
+
else if (lastPoint.y < prevPoint.y) draw(lastPoint.x, lastPoint.y, '^');
|
|
86
218
|
}
|
|
219
|
+
|
|
220
|
+
return canvas.map((row) => row.join('').trimEnd()).join('\n');
|
|
87
221
|
}
|
|
@@ -63,4 +63,10 @@ describe('Redactor', () => {
|
|
|
63
63
|
const text = 'a and 12 are safe, but abc is a secret';
|
|
64
64
|
expect(shortRedactor.redact(text)).toBe('a and 12 are safe, but ***REDACTED*** is a secret');
|
|
65
65
|
});
|
|
66
|
+
|
|
67
|
+
it('should not redact substrings of larger words when using alphanumeric secrets', () => {
|
|
68
|
+
const wordRedactor = new Redactor({ USER: 'mark' });
|
|
69
|
+
const text = 'mark went to the marketplace';
|
|
70
|
+
expect(wordRedactor.redact(text)).toBe('***REDACTED*** went to the marketplace');
|
|
71
|
+
});
|
|
66
72
|
});
|
package/src/utils/redactor.ts
CHANGED
|
@@ -30,7 +30,16 @@ export class Redactor {
|
|
|
30
30
|
// Use a global replace to handle multiple occurrences
|
|
31
31
|
// Escape special regex characters in the secret
|
|
32
32
|
const escaped = secret.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
33
|
-
|
|
33
|
+
|
|
34
|
+
// Use word boundaries if the secret starts/ends with an alphanumeric character
|
|
35
|
+
// to avoid partial matches (e.g. redacting 'mark' in 'marketplace')
|
|
36
|
+
const startBoundary = /^\w/.test(secret) ? '\\b' : '';
|
|
37
|
+
const endBoundary = /\w$/.test(secret) ? '\\b' : '';
|
|
38
|
+
|
|
39
|
+
redacted = redacted.replace(
|
|
40
|
+
new RegExp(`${startBoundary}${escaped}${endBoundary}`, 'g'),
|
|
41
|
+
'***REDACTED***'
|
|
42
|
+
);
|
|
34
43
|
}
|
|
35
44
|
return redacted;
|
|
36
45
|
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { describe, expect, it } from 'bun:test';
|
|
2
|
+
import { SafeSandbox } from './sandbox';
|
|
3
|
+
|
|
4
|
+
describe('SafeSandbox', () => {
|
|
5
|
+
it('should execute basic arithmetic', async () => {
|
|
6
|
+
const result = await SafeSandbox.execute('1 + 2');
|
|
7
|
+
expect(result).toBe(3);
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
it('should have access to context variables', async () => {
|
|
11
|
+
const result = await SafeSandbox.execute('a + b', { a: 10, b: 20 });
|
|
12
|
+
expect(result).toBe(30);
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it('should not have access to Node.js globals', async () => {
|
|
16
|
+
const result = await SafeSandbox.execute('typeof process');
|
|
17
|
+
expect(result).toBe('undefined');
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it('should handle object results', async () => {
|
|
21
|
+
const result = await SafeSandbox.execute('({ x: 1, y: 2 })');
|
|
22
|
+
expect(result).toEqual({ x: 1, y: 2 });
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it('should respect timeouts', async () => {
|
|
26
|
+
const promise = SafeSandbox.execute('while(true) {}', {}, { timeout: 100 });
|
|
27
|
+
await expect(promise).rejects.toThrow();
|
|
28
|
+
});
|
|
29
|
+
});
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import * as vm from 'node:vm';
|
|
2
|
+
|
|
3
|
+
export interface SandboxOptions {
|
|
4
|
+
timeout?: number;
|
|
5
|
+
memoryLimit?: number;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export class SafeSandbox {
|
|
9
|
+
/**
|
|
10
|
+
* Execute a script in a secure sandbox
|
|
11
|
+
*/
|
|
12
|
+
static async execute(
|
|
13
|
+
code: string,
|
|
14
|
+
context: Record<string, unknown> = {},
|
|
15
|
+
options: SandboxOptions = {}
|
|
16
|
+
): Promise<unknown> {
|
|
17
|
+
try {
|
|
18
|
+
// Try to use isolated-vm if available (dynamic import)
|
|
19
|
+
// Note: This will likely fail on Bun as it expects V8 host symbols
|
|
20
|
+
const ivm = await import('isolated-vm').then((m) => m.default || m).catch(() => null);
|
|
21
|
+
|
|
22
|
+
if (ivm && typeof ivm.Isolate === 'function') {
|
|
23
|
+
const isolate = new ivm.Isolate({ memoryLimit: options.memoryLimit || 128 });
|
|
24
|
+
try {
|
|
25
|
+
const contextInstance = await isolate.createContext();
|
|
26
|
+
const jail = contextInstance.global;
|
|
27
|
+
|
|
28
|
+
// Set up global context
|
|
29
|
+
await jail.set('global', jail.derefInto());
|
|
30
|
+
|
|
31
|
+
// Inject context variables
|
|
32
|
+
for (const [key, value] of Object.entries(context)) {
|
|
33
|
+
// Only copy non-undefined values
|
|
34
|
+
if (value !== undefined) {
|
|
35
|
+
await jail.set(key, new ivm.ExternalCopy(value).copyInto());
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const script = await isolate.compileScript(code);
|
|
40
|
+
const result = await script.run(contextInstance, { timeout: options.timeout || 5000 });
|
|
41
|
+
|
|
42
|
+
if (result && typeof result === 'object' && result instanceof ivm.Reference) {
|
|
43
|
+
return await result.copy();
|
|
44
|
+
}
|
|
45
|
+
return result;
|
|
46
|
+
} finally {
|
|
47
|
+
isolate.dispose();
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
} catch (e) {
|
|
51
|
+
// Fallback to node:vm if isolated-vm fails to load or run
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Fallback implementation using node:vm (built-in)
|
|
55
|
+
const sandbox = { ...context };
|
|
56
|
+
return vm.runInNewContext(code, sandbox, {
|
|
57
|
+
timeout: options.timeout || 5000,
|
|
58
|
+
displayErrors: true,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
}
|