keystone-cli 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -136,8 +136,8 @@ mcp_servers:
136
136
  github:
137
137
  command: npx
138
138
  args: ["-y", "@modelcontextprotocol/server-github"]
139
- env:
140
- GITHUB_PERSONAL_ACCESS_TOKEN: "your-github-pat" # Or omit if GITHUB_TOKEN is in your .env
139
+ env:
140
+ GITHUB_PERSONAL_ACCESS_TOKEN: "your-github-pat" # Or omit if GITHUB_TOKEN is in your .env
141
141
 
142
142
  storage:
143
143
 
@@ -265,6 +265,7 @@ Keystone supports several specialized step types:
265
265
  - `inputType: confirm`: Simple Enter-to-continue prompt.
266
266
  - `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
267
267
  - `workflow`: Trigger another workflow as a sub-step.
268
+ - `script`: Run arbitrary JavaScript in a secure sandbox (`isolated-vm` with fallback to `node:vm`).
268
269
  - `sleep`: Pause execution for a specified duration.
269
270
 
270
271
  All steps support common features like `needs` (dependencies), `if` (conditionals), `retry`, `timeout`, `foreach` (parallel iteration), and `transform` (post-process output using expressions).
@@ -327,7 +328,7 @@ You are a software developer. You can use tools to explore the codebase.
327
328
  Keystone can itself act as an MCP server, allowing other agents (like Claude Desktop or GitHub Copilot) to discover and run your workflows as tools.
328
329
 
329
330
  ```bash
330
- keystone mcp
331
+ keystone mcp start
331
332
  ```
332
333
 
333
334
  > **Note:** Workflow execution via the Keystone MCP server is synchronous. This provides a better experience for agents as they receive the final results directly, though it means the connection remains open for the duration of the workflow run.
@@ -396,7 +397,8 @@ In these examples, the agent will have access to all tools provided by the MCP s
396
397
  | `auth login [provider]` | Login to an authentication provider (github, openai, anthropic) |
397
398
  | `auth logout [provider]` | Logout and clear authentication tokens |
398
399
  | `ui` | Open the interactive TUI dashboard |
399
- | `mcp` | Start the Keystone MCP server |
400
+ | `mcp start` | Start the Keystone MCP server |
401
+ | `mcp login <server>` | Login to a remote MCP server |
400
402
  | `completion [shell]` | Generate shell completion script (zsh, bash) |
401
403
  | `prune [--days N]` | Cleanup old run data from the database |
402
404
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "keystone-cli",
3
- "version": "0.3.0",
3
+ "version": "0.3.2",
4
4
  "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
5
5
  "type": "module",
6
6
  "bin": {
@@ -38,9 +38,11 @@
38
38
  "@jsep-plugin/object": "^1.2.2",
39
39
  "@types/react": "^19.2.7",
40
40
  "commander": "^12.1.0",
41
+ "dagre": "^0.8.5",
41
42
  "ink": "^6.5.1",
42
43
  "ink-select-input": "3.1.2",
43
44
  "ink-spinner": "^5.0.0",
45
+ "isolated-vm": "^6.0.2",
44
46
  "js-yaml": "^4.1.0",
45
47
  "jsep": "^1.4.0",
46
48
  "react": "^19.2.3",
@@ -48,7 +50,10 @@
48
50
  },
49
51
  "devDependencies": {
50
52
  "@biomejs/biome": "^1.9.4",
51
- "@types/js-yaml": "^4.0.9"
53
+ "@types/bun": "^1.3.5",
54
+ "@types/dagre": "^0.7.53",
55
+ "@types/js-yaml": "^4.0.9",
56
+ "@types/node": "^25.0.3"
52
57
  },
53
58
  "engines": {
54
59
  "bun": ">=1.0.0"
package/src/cli.ts CHANGED
@@ -12,7 +12,7 @@ import scaffoldWorkflow from './templates/scaffold-feature.yaml' with { type: 't
12
12
  import { WorkflowDb } from './db/workflow-db.ts';
13
13
  import { WorkflowParser } from './parser/workflow-parser.ts';
14
14
  import { ConfigLoader } from './utils/config-loader.ts';
15
- import { generateMermaidGraph, renderMermaidAsAscii } from './utils/mermaid.ts';
15
+ import { generateMermaidGraph, renderWorkflowAsAscii } from './utils/mermaid.ts';
16
16
  import { WorkflowRegistry } from './utils/workflow-registry.ts';
17
17
 
18
18
  import pkg from '../package.json' with { type: 'json' };
@@ -204,12 +204,11 @@ program
204
204
  try {
205
205
  const resolvedPath = WorkflowRegistry.resolvePath(workflowPath);
206
206
  const workflow = WorkflowParser.loadWorkflow(resolvedPath);
207
- const mermaid = generateMermaidGraph(workflow);
208
-
209
- const ascii = await renderMermaidAsAscii(mermaid);
207
+ const ascii = renderWorkflowAsAscii(workflow);
210
208
  if (ascii) {
211
209
  console.log(`\n${ascii}\n`);
212
210
  } else {
211
+ const mermaid = generateMermaidGraph(workflow);
213
212
  console.log('\n```mermaid');
214
213
  console.log(mermaid);
215
214
  console.log('```\n');
@@ -614,11 +613,15 @@ const auth = program.command('auth').description('Authentication management');
614
613
  auth
615
614
  .command('login')
616
615
  .description('Login to an authentication provider')
617
- .option('-p, --provider <provider>', 'Authentication provider', 'github')
616
+ .argument('[provider]', 'Authentication provider', 'github')
617
+ .option(
618
+ '-p, --provider <provider>',
619
+ 'Authentication provider (deprecated, use positional argument)'
620
+ )
618
621
  .option('-t, --token <token>', 'Personal Access Token (if not using interactive mode)')
619
- .action(async (options) => {
622
+ .action(async (providerArg, options) => {
620
623
  const { AuthManager } = await import('./utils/auth-manager.ts');
621
- const provider = options.provider.toLowerCase();
624
+ const provider = (options.provider || providerArg).toLowerCase();
622
625
 
623
626
  if (provider === 'github') {
624
627
  let token = options.token;
@@ -675,6 +678,31 @@ auth
675
678
  console.error('✗ No token provided.');
676
679
  process.exit(1);
677
680
  }
681
+ } else if (provider === 'openai' || provider === 'anthropic') {
682
+ let key = options.token; // Use --token if provided as the API key
683
+
684
+ if (!key) {
685
+ console.log(`\n🔑 Login to ${provider.toUpperCase()}`);
686
+ console.log(` Please provide your ${provider.toUpperCase()} API key.\n`);
687
+ const prompt = 'API Key: ';
688
+ process.stdout.write(prompt);
689
+ for await (const line of console) {
690
+ key = line.trim();
691
+ break;
692
+ }
693
+ }
694
+
695
+ if (key) {
696
+ if (provider === 'openai') {
697
+ AuthManager.save({ openai_api_key: key });
698
+ } else {
699
+ AuthManager.save({ anthropic_api_key: key });
700
+ }
701
+ console.log(`\n✓ Successfully saved ${provider.toUpperCase()} API key.`);
702
+ } else {
703
+ console.error('✗ No API key provided.');
704
+ process.exit(1);
705
+ }
678
706
  } else {
679
707
  console.error(`✗ Unsupported provider: ${provider}`);
680
708
  process.exit(1);
@@ -702,13 +730,33 @@ auth
702
730
  }
703
731
  } else if (provider) {
704
732
  console.log(
705
- ` ⊘ Not logged into GitHub. Run "keystone auth login --provider github" to authenticate.`
733
+ ` ⊘ Not logged into GitHub. Run "keystone auth login github" to authenticate.`
734
+ );
735
+ }
736
+ }
737
+
738
+ if (!provider || provider === 'openai') {
739
+ if (auth.openai_api_key) {
740
+ console.log(' ✓ OpenAI API key configured');
741
+ } else if (provider) {
742
+ console.log(
743
+ ` ⊘ OpenAI API key not configured. Run "keystone auth login openai" to authenticate.`
744
+ );
745
+ }
746
+ }
747
+
748
+ if (!provider || provider === 'anthropic') {
749
+ if (auth.anthropic_api_key) {
750
+ console.log(' ✓ Anthropic API key configured');
751
+ } else if (provider) {
752
+ console.log(
753
+ ` ⊘ Anthropic API key not configured. Run "keystone auth login anthropic" to authenticate.`
706
754
  );
707
755
  }
708
756
  }
709
757
 
710
- if (!auth.github_token && !provider) {
711
- console.log(' ⊘ Not logged in. Run "keystone auth login" to authenticate.');
758
+ if (!auth.github_token && !auth.openai_api_key && !auth.anthropic_api_key && !provider) {
759
+ console.log(' ⊘ No providers configured. Run "keystone auth login" to authenticate.');
712
760
  }
713
761
  });
714
762
 
@@ -731,6 +779,12 @@ auth
731
779
  copilot_expires_at: undefined,
732
780
  });
733
781
  console.log('✓ Successfully logged out of GitHub.');
782
+ } else if (provider === 'openai') {
783
+ AuthManager.save({ openai_api_key: undefined });
784
+ console.log('✓ Successfully cleared OpenAI API key.');
785
+ } else if (provider === 'anthropic') {
786
+ AuthManager.save({ anthropic_api_key: undefined });
787
+ console.log('✓ Successfully cleared Anthropic API key.');
734
788
  } else {
735
789
  console.error(`✗ Unknown provider: ${provider}`);
736
790
  process.exit(1);
@@ -105,17 +105,26 @@ const SleepStepSchema = BaseStepSchema.extend({
105
105
  duration: z.union([z.number().int().positive(), z.string()]),
106
106
  });
107
107
 
108
+ const ScriptStepSchema = BaseStepSchema.extend({
109
+ type: z.literal('script'),
110
+ run: z.string(),
111
+ });
112
+
108
113
  // ===== Discriminated Union for Steps =====
109
114
 
110
- export const StepSchema = z.discriminatedUnion('type', [
111
- ShellStepSchema,
112
- LlmStepSchema,
113
- WorkflowStepSchema,
114
- FileStepSchema,
115
- RequestStepSchema,
116
- HumanStepSchema,
117
- SleepStepSchema,
118
- ]);
115
+ // biome-ignore lint/suspicious/noExplicitAny: Recursive Zod type
116
+ export const StepSchema: z.ZodType<any> = z.lazy(() =>
117
+ z.discriminatedUnion('type', [
118
+ ShellStepSchema,
119
+ LlmStepSchema,
120
+ WorkflowStepSchema,
121
+ FileStepSchema,
122
+ RequestStepSchema,
123
+ HumanStepSchema,
124
+ SleepStepSchema,
125
+ ScriptStepSchema,
126
+ ])
127
+ );
119
128
 
120
129
  // ===== Workflow Schema =====
121
130
 
@@ -152,6 +161,7 @@ export type FileStep = z.infer<typeof FileStepSchema>;
152
161
  export type RequestStep = z.infer<typeof RequestStepSchema>;
153
162
  export type HumanStep = z.infer<typeof HumanStepSchema>;
154
163
  export type SleepStep = z.infer<typeof SleepStepSchema>;
164
+ export type ScriptStep = z.infer<typeof ScriptStepSchema>;
155
165
  export type Workflow = z.infer<typeof WorkflowSchema>;
156
166
  export type AgentTool = z.infer<typeof AgentToolSchema>;
157
167
  export type Agent = z.infer<typeof AgentSchema>;
@@ -28,7 +28,7 @@ describe('MCPServer', () => {
28
28
  method: 'initialize',
29
29
  });
30
30
 
31
- expect(response.result.serverInfo.name).toBe('keystone-mcp');
31
+ expect(response?.result?.serverInfo?.name).toBe('keystone-mcp');
32
32
  });
33
33
 
34
34
  it('should list tools', async () => {
@@ -38,9 +38,9 @@ describe('MCPServer', () => {
38
38
  method: 'tools/list',
39
39
  });
40
40
 
41
- expect(response.result.tools).toHaveLength(5);
41
+ expect(response?.result?.tools).toHaveLength(5);
42
42
  // @ts-ignore
43
- expect(response.result.tools.map((t) => t.name)).toContain('run_workflow');
43
+ expect(response?.result?.tools?.map((t) => t.name)).toContain('run_workflow');
44
44
  });
45
45
 
46
46
  it('should call list_workflows tool', async () => {
@@ -55,7 +55,7 @@ describe('MCPServer', () => {
55
55
  params: { name: 'list_workflows', arguments: {} },
56
56
  });
57
57
 
58
- expect(response.result.content[0].text).toContain('test-wf');
58
+ expect(response?.result?.content?.[0]?.text).toContain('test-wf');
59
59
  });
60
60
 
61
61
  it('should call run_workflow tool successfully', async () => {
@@ -104,8 +104,8 @@ describe('MCPServer', () => {
104
104
  },
105
105
  });
106
106
 
107
- expect(response.result.isError).toBe(true);
108
- expect(response.result.content[0].text).toContain('Workflow failed');
107
+ expect(response?.result?.isError).toBe(true);
108
+ expect(response?.result?.content?.[0]?.text).toContain('Workflow failed');
109
109
  });
110
110
 
111
111
  it('should handle workflow suspension in run_workflow', async () => {
@@ -130,7 +130,7 @@ describe('MCPServer', () => {
130
130
  },
131
131
  });
132
132
 
133
- const result = JSON.parse(response.result.content[0].text);
133
+ const result = JSON.parse(response?.result?.content?.[0]?.text);
134
134
  expect(result.status).toBe('paused');
135
135
  expect(result.run_id).toBe('run123');
136
136
  expect(result.message).toBe('Input needed');
@@ -187,7 +187,7 @@ describe('MCPServer', () => {
187
187
  params: { name: 'get_run_logs', arguments: { run_id: runId } },
188
188
  });
189
189
 
190
- const summary = JSON.parse(response.result.content[0].text);
190
+ const summary = JSON.parse(response?.result?.content?.[0]?.text);
191
191
  expect(summary.workflow).toBe('test-wf');
192
192
  expect(summary.steps).toHaveLength(1);
193
193
  expect(summary.steps[0].step).toBe('s1');
@@ -202,7 +202,7 @@ describe('MCPServer', () => {
202
202
  params: { name: 'unknown_tool', arguments: {} },
203
203
  });
204
204
 
205
- expect(response.error.message).toContain('Unknown tool');
205
+ expect(response?.error?.message).toContain('Unknown tool');
206
206
  });
207
207
 
208
208
  it('should handle unknown method', async () => {
@@ -212,14 +212,21 @@ describe('MCPServer', () => {
212
212
  method: 'unknown_method',
213
213
  });
214
214
 
215
- expect(response.error.message).toContain('Method not found');
215
+ expect(response?.error?.message).toContain('Method not found');
216
216
  });
217
217
 
218
218
  it('should start and handle messages from stdin', async () => {
219
- const writeSpy = spyOn(process.stdout, 'write').mockImplementation(() => true);
219
+ const { PassThrough } = await import('node:stream');
220
+ const input = new PassThrough();
221
+ const outputStream = new PassThrough();
222
+
223
+ // Create a new server for this test to use the streams
224
+ const testServer = new MCPServer(db, input, outputStream);
225
+
226
+ const writeSpy = spyOn(outputStream, 'write').mockImplementation(() => true);
220
227
  const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
221
228
 
222
- const startPromise = server.start();
229
+ const startPromise = testServer.start();
223
230
 
224
231
  // Simulate stdin data
225
232
  const message = {
@@ -227,16 +234,16 @@ describe('MCPServer', () => {
227
234
  id: 9,
228
235
  method: 'initialize',
229
236
  };
230
- process.stdin.emit('data', Buffer.from(`${JSON.stringify(message)}\n`));
237
+ input.write(`${JSON.stringify(message)}\n`);
231
238
 
232
239
  // Wait for async processing
233
- await new Promise((resolve) => setTimeout(resolve, 50));
240
+ await new Promise((resolve) => setTimeout(resolve, 100));
234
241
 
235
242
  expect(writeSpy).toHaveBeenCalled();
236
243
  const output = JSON.parse(writeSpy.mock.calls[0][0] as string);
237
244
  expect(output.id).toBe(9);
238
245
 
239
- process.stdin.emit('close');
246
+ input.end();
240
247
  await startPromise;
241
248
 
242
249
  writeSpy.mockRestore();
@@ -1,4 +1,5 @@
1
1
  import * as readline from 'node:readline';
2
+ import type { Readable, Writable } from 'node:stream';
2
3
  import pkg from '../../package.json' with { type: 'json' };
3
4
  import { WorkflowDb } from '../db/workflow-db';
4
5
  import { WorkflowParser } from '../parser/workflow-parser';
@@ -16,14 +17,18 @@ interface MCPMessage {
16
17
 
17
18
  export class MCPServer {
18
19
  private db: WorkflowDb;
20
+ private input: Readable;
21
+ private output: Writable;
19
22
 
20
- constructor(db?: WorkflowDb) {
23
+ constructor(db?: WorkflowDb, input: Readable = process.stdin, output: Writable = process.stdout) {
21
24
  this.db = db || new WorkflowDb();
25
+ this.input = input;
26
+ this.output = output;
22
27
  }
23
28
 
24
29
  async start() {
25
30
  const rl = readline.createInterface({
26
- input: process.stdin,
31
+ input: this.input,
27
32
  terminal: false,
28
33
  });
29
34
 
@@ -35,7 +40,7 @@ export class MCPServer {
35
40
  const message = JSON.parse(line) as MCPMessage;
36
41
  const response = await this.handleMessage(message);
37
42
  if (response) {
38
- process.stdout.write(`${JSON.stringify(response)}\n`);
43
+ this.output.write(`${JSON.stringify(response)}\n`);
39
44
  }
40
45
  } catch (error) {
41
46
  console.error('Error handling MCP message:', error);
@@ -46,6 +51,11 @@ export class MCPServer {
46
51
  this.stop();
47
52
  resolve();
48
53
  });
54
+
55
+ // Handle stream errors
56
+ this.input.on('error', (err: Error) => {
57
+ console.error('stdin error:', err);
58
+ });
49
59
  });
50
60
  }
51
61
 
@@ -333,7 +343,14 @@ export class MCPServer {
333
343
  }
334
344
 
335
345
  // Fulfill the step in the DB
336
- const output = input === 'confirm' ? true : input;
346
+ let output: unknown = input;
347
+ const lowerInput = input.trim().toLowerCase();
348
+ if (lowerInput === 'confirm' || lowerInput === 'y' || lowerInput === 'yes' || lowerInput === '') {
349
+ output = true;
350
+ } else if (lowerInput === 'n' || lowerInput === 'no') {
351
+ output = false;
352
+ }
353
+
337
354
  await this.db.completeStep(pendingStep.id, 'success', output);
338
355
 
339
356
  // Resume the workflow
@@ -34,7 +34,7 @@ interface RequestOutput {
34
34
  // Mock node:readline/promises
35
35
  const mockRl = {
36
36
  question: mock(() => Promise.resolve('')),
37
- close: mock(() => {}),
37
+ close: mock(() => { }),
38
38
  };
39
39
 
40
40
  mock.module('node:readline/promises', () => ({
@@ -49,13 +49,13 @@ describe('step-executor', () => {
49
49
  beforeAll(() => {
50
50
  try {
51
51
  mkdirSync(tempDir, { recursive: true });
52
- } catch (e) {}
52
+ } catch (e) { }
53
53
  });
54
54
 
55
55
  afterAll(() => {
56
56
  try {
57
57
  rmSync(tempDir, { recursive: true, force: true });
58
- } catch (e) {}
58
+ } catch (e) { }
59
59
  });
60
60
 
61
61
  beforeEach(() => {
@@ -330,7 +330,7 @@ describe('step-executor', () => {
330
330
  };
331
331
 
332
332
  // @ts-ignore
333
- const result = await executeStep(step, context, { log: () => {} });
333
+ const result = await executeStep(step, context, { log: () => { } });
334
334
  expect(result.status).toBe('success');
335
335
  expect(result.output).toBe(true);
336
336
  expect(mockRl.question).toHaveBeenCalled();
@@ -347,11 +347,54 @@ describe('step-executor', () => {
347
347
  };
348
348
 
349
349
  // @ts-ignore
350
- const result = await executeStep(step, context, { log: () => {} });
350
+ const result = await executeStep(step, context, { log: () => { } });
351
351
  expect(result.status).toBe('success');
352
352
  expect(result.output).toBe('user response');
353
353
  });
354
354
 
355
+ it('should handle human confirmation (yes/no/empty)', async () => {
356
+ const step: HumanStep = {
357
+ id: 'h1',
358
+ type: 'human',
359
+ message: 'Proceed?',
360
+ inputType: 'confirm',
361
+ };
362
+
363
+ // Test 'yes'
364
+ mockRl.question.mockResolvedValue('yes');
365
+ // @ts-ignore
366
+ let result = await executeStep(step, context, { log: () => { } });
367
+ expect(result.output).toBe(true);
368
+
369
+ // Test 'no'
370
+ mockRl.question.mockResolvedValue('no');
371
+ // @ts-ignore
372
+ result = await executeStep(step, context, { log: () => { } });
373
+ expect(result.output).toBe(false);
374
+
375
+ // Test empty string (default to true)
376
+ mockRl.question.mockResolvedValue('');
377
+ // @ts-ignore
378
+ result = await executeStep(step, context, { log: () => { } });
379
+ expect(result.output).toBe(true);
380
+ });
381
+
382
+ it('should fallback to text in confirm mode', async () => {
383
+ mockRl.question.mockResolvedValue('some custom response');
384
+
385
+ const step: HumanStep = {
386
+ id: 'h1',
387
+ type: 'human',
388
+ message: 'Proceed?',
389
+ inputType: 'confirm',
390
+ };
391
+
392
+ // @ts-ignore
393
+ const result = await executeStep(step, context, { log: () => { } });
394
+ expect(result.status).toBe('success');
395
+ expect(result.output).toBe('some custom response');
396
+ });
397
+
355
398
  it('should suspend if not a TTY', async () => {
356
399
  process.stdin.isTTY = false;
357
400
 
@@ -363,7 +406,7 @@ describe('step-executor', () => {
363
406
  };
364
407
 
365
408
  // @ts-ignore
366
- const result = await executeStep(step, context, { log: () => {} });
409
+ const result = await executeStep(step, context, { log: () => { } });
367
410
  expect(result.status).toBe('suspended');
368
411
  expect(result.error).toBe('Proceed?');
369
412
  });
@@ -5,6 +5,7 @@ import type {
5
5
  FileStep,
6
6
  HumanStep,
7
7
  RequestStep,
8
+ ScriptStep,
8
9
  ShellStep,
9
10
  SleepStep,
10
11
  Step,
@@ -14,6 +15,7 @@ import { executeShell } from './shell-executor.ts';
14
15
  import type { Logger } from './workflow-runner.ts';
15
16
 
16
17
  import * as readline from 'node:readline/promises';
18
+ import { SafeSandbox } from '../utils/sandbox.ts';
17
19
  import { executeLlmStep } from './llm-executor.ts';
18
20
  import type { MCPManager } from './mcp-manager.ts';
19
21
 
@@ -79,6 +81,9 @@ export async function executeStep(
79
81
  }
80
82
  result = await executeWorkflowFn(step, context);
81
83
  break;
84
+ case 'script':
85
+ result = await executeScriptStep(step, context, logger);
86
+ break;
82
87
  default:
83
88
  throw new Error(`Unknown step type: ${(step as Step).type}`);
84
89
  }
@@ -324,10 +329,25 @@ async function executeHumanStep(
324
329
  try {
325
330
  if (step.inputType === 'confirm') {
326
331
  logger.log(`\n❓ ${message}`);
327
- const answer = await rl.question('Confirm? (Y/n): ');
328
- const isConfirmed = answer.toLowerCase() !== 'n';
332
+ const answer = (await rl.question('Response (Y/n/text): ')).trim();
333
+
334
+ const lowerAnswer = answer.toLowerCase();
335
+ if (lowerAnswer === '' || lowerAnswer === 'y' || lowerAnswer === 'yes') {
336
+ return {
337
+ output: true,
338
+ status: 'success',
339
+ };
340
+ }
341
+ if (lowerAnswer === 'n' || lowerAnswer === 'no') {
342
+ return {
343
+ output: false,
344
+ status: 'success',
345
+ };
346
+ }
347
+
348
+ // Fallback to text if it's not a clear yes/no
329
349
  return {
330
- output: isConfirmed,
350
+ output: answer,
331
351
  status: 'success',
332
352
  };
333
353
  }
@@ -367,3 +387,31 @@ async function executeSleepStep(
367
387
  status: 'success',
368
388
  };
369
389
  }
390
+ /**
391
+ * Execute a script step in a safe sandbox
392
+ */
393
+ async function executeScriptStep(
394
+ step: ScriptStep,
395
+ context: ExpressionContext,
396
+ _logger: Logger
397
+ ): Promise<StepResult> {
398
+ try {
399
+ const result = await SafeSandbox.execute(step.run, {
400
+ inputs: context.inputs,
401
+ secrets: context.secrets,
402
+ steps: context.steps,
403
+ env: context.env,
404
+ });
405
+
406
+ return {
407
+ output: result,
408
+ status: 'success',
409
+ };
410
+ } catch (error) {
411
+ return {
412
+ output: null,
413
+ status: 'failed',
414
+ error: error instanceof Error ? error.message : String(error),
415
+ };
416
+ }
417
+ }
@@ -25,7 +25,7 @@ class RedactingLogger implements Logger {
25
25
  constructor(
26
26
  private inner: Logger,
27
27
  private redactor: Redactor
28
- ) {}
28
+ ) { }
29
29
 
30
30
  log(msg: string): void {
31
31
  this.inner.log(this.redactor.redact(msg));
@@ -53,7 +53,7 @@ export interface RunOptions {
53
53
  export interface StepContext {
54
54
  output?: unknown;
55
55
  outputs?: Record<string, unknown>;
56
- status: 'success' | 'failed' | 'skipped';
56
+ status: 'success' | 'failed' | 'skipped' | 'pending' | 'suspended';
57
57
  }
58
58
 
59
59
  // Type for foreach results - wraps array to ensure JSON serialization preserves all properties
@@ -196,7 +196,7 @@ export class WorkflowRunner {
196
196
  items[exec.iteration_index] = {
197
197
  output: null,
198
198
  outputs: {},
199
- status: exec.status as 'failed' | 'running' | 'pending',
199
+ status: exec.status as 'failed' | 'pending' | 'success' | 'skipped' | 'suspended',
200
200
  };
201
201
  }
202
202
  }
@@ -305,9 +305,37 @@ export class WorkflowRunner {
305
305
  private loadSecrets(): Record<string, string> {
306
306
  const secrets: Record<string, string> = {};
307
307
 
308
+ // Common non-secret environment variables to exclude from redaction
309
+ const blocklist = new Set([
310
+ 'USER',
311
+ 'PATH',
312
+ 'SHELL',
313
+ 'HOME',
314
+ 'PWD',
315
+ 'LOGNAME',
316
+ 'LANG',
317
+ 'TERM',
318
+ 'EDITOR',
319
+ 'VISUAL',
320
+ '_',
321
+ 'SHLVL',
322
+ 'LC_ALL',
323
+ 'OLDPWD',
324
+ 'DISPLAY',
325
+ 'TMPDIR',
326
+ 'SSH_AUTH_SOCK',
327
+ 'XPC_FLAGS',
328
+ 'XPC_SERVICE_NAME',
329
+ 'ITERM_SESSION_ID',
330
+ 'ITERM_PROFILE',
331
+ 'TERM_PROGRAM',
332
+ 'TERM_PROGRAM_VERSION',
333
+ 'COLORTERM',
334
+ ]);
335
+
308
336
  // Bun automatically loads .env file
309
337
  for (const [key, value] of Object.entries(Bun.env)) {
310
- if (value) {
338
+ if (value && !blocklist.has(key)) {
311
339
  secrets[key] = value;
312
340
  }
313
341
  }
@@ -485,11 +513,7 @@ export class WorkflowRunner {
485
513
  return result;
486
514
  }
487
515
 
488
- // Redact secrets from output and error before storing
489
- const redactedOutput = this.redactor.redactValue(result.output);
490
- const redactedError = result.error ? this.redactor.redact(result.error) : undefined;
491
-
492
- await this.db.completeStep(stepExecId, result.status, redactedOutput, redactedError);
516
+ await this.db.completeStep(stepExecId, result.status, result.output, result.error);
493
517
 
494
518
  // Ensure outputs is always an object for consistent access
495
519
  let outputs: Record<string, unknown>;
@@ -621,6 +645,7 @@ export class WorkflowRunner {
621
645
 
622
646
  // Execute and store result at correct index
623
647
  try {
648
+ this.logger.log(` ⤷ [${i + 1}/${items.length}] Executing iteration...`);
624
649
  itemResults[i] = await this.executeStepInternal(step, itemContext, stepExecId);
625
650
  if (itemResults[i].status === 'failed') {
626
651
  aborted = true;
@@ -760,7 +785,7 @@ export class WorkflowRunner {
760
785
  this.logger.log(`Run ID: ${this.runId}`);
761
786
  this.logger.log(
762
787
  '\n⚠️ Security Warning: Only run workflows from trusted sources.\n' +
763
- ' Workflows can execute arbitrary shell commands and access your environment.\n'
788
+ ' Workflows can execute arbitrary shell commands and access your environment.\n'
764
789
  );
765
790
 
766
791
  // Apply defaults and validate inputs
@@ -787,8 +812,7 @@ export class WorkflowRunner {
787
812
  this.logger.log('All steps already completed. Nothing to resume.\n');
788
813
  // Evaluate outputs from completed state
789
814
  const outputs = this.evaluateOutputs();
790
- const redactedOutputs = this.redactor.redactValue(outputs) as Record<string, unknown>;
791
- await this.db.updateRunStatus(this.runId, 'completed', redactedOutputs);
815
+ await this.db.updateRunStatus(this.runId, 'completed', outputs);
792
816
  this.logger.log('✨ Workflow already completed!\n');
793
817
  return outputs;
794
818
  }
@@ -799,6 +823,9 @@ export class WorkflowRunner {
799
823
 
800
824
  this.logger.log(`Execution order: ${executionOrder.join(' → ')}\n`);
801
825
 
826
+ const totalSteps = executionOrder.length;
827
+ const stepIndices = new Map(executionOrder.map((id, index) => [id, index + 1]));
828
+
802
829
  // Execute steps in parallel where possible (respecting dependencies)
803
830
  const pendingSteps = new Set(remainingSteps);
804
831
  const runningPromises = new Map<string, Promise<void>>();
@@ -811,18 +838,21 @@ export class WorkflowRunner {
811
838
  if (!step) {
812
839
  throw new Error(`Step ${stepId} not found in workflow`);
813
840
  }
814
- const dependenciesMet = step.needs.every((dep) => completedSteps.has(dep));
841
+ const dependenciesMet = step.needs.every((dep: string) => completedSteps.has(dep));
815
842
 
816
843
  if (dependenciesMet) {
817
844
  pendingSteps.delete(stepId);
818
845
 
819
846
  // Start execution
820
- this.logger.log(`▶ Executing step: ${step.id} (${step.type})`);
847
+ const stepIndex = stepIndices.get(stepId);
848
+ this.logger.log(
849
+ `[${stepIndex}/${totalSteps}] ▶ Executing step: ${step.id} (${step.type})`
850
+ );
821
851
  const promise = this.executeStepWithForeach(step)
822
852
  .then(() => {
823
853
  completedSteps.add(stepId);
824
854
  runningPromises.delete(stepId);
825
- this.logger.log(` ✓ Step ${step.id} completed\n`);
855
+ this.logger.log(`[${stepIndex}/${totalSteps}] ✓ Step ${step.id} completed\n`);
826
856
  })
827
857
  .catch((err) => {
828
858
  runningPromises.delete(stepId);
@@ -857,11 +887,8 @@ export class WorkflowRunner {
857
887
  // Evaluate outputs
858
888
  const outputs = this.evaluateOutputs();
859
889
 
860
- // Redact secrets from outputs before storing
861
- const redactedOutputs = this.redactor.redactValue(outputs) as Record<string, unknown>;
862
-
863
890
  // Mark run as complete
864
- await this.db.updateRunStatus(this.runId, 'completed', redactedOutputs);
891
+ await this.db.updateRunStatus(this.runId, 'completed', outputs);
865
892
 
866
893
  this.logger.log('✨ Workflow completed successfully!\n');
867
894
 
@@ -900,6 +927,8 @@ export class WorkflowRunner {
900
927
  const completedFinallySteps = new Set<string>();
901
928
  const pendingFinallySteps = new Set(this.workflow.finally.map((s) => s.id));
902
929
  const runningPromises = new Map<string, Promise<void>>();
930
+ const totalFinallySteps = this.workflow.finally.length;
931
+ const finallyStepIndices = new Map(this.workflow.finally.map((s, index) => [s.id, index + 1]));
903
932
 
904
933
  try {
905
934
  while (pendingFinallySteps.size > 0 || runningPromises.size > 0) {
@@ -909,18 +938,23 @@ export class WorkflowRunner {
909
938
 
910
939
  // Dependencies can be from main steps (already in this.stepContexts) or previous finally steps
911
940
  const dependenciesMet = step.needs.every(
912
- (dep) => this.stepContexts.has(dep) || completedFinallySteps.has(dep)
941
+ (dep: string) => this.stepContexts.has(dep) || completedFinallySteps.has(dep)
913
942
  );
914
943
 
915
944
  if (dependenciesMet) {
916
945
  pendingFinallySteps.delete(stepId);
917
946
 
918
- this.logger.log(`▶ Executing finally step: ${step.id} (${step.type})`);
947
+ const finallyStepIndex = finallyStepIndices.get(stepId);
948
+ this.logger.log(
949
+ `[${finallyStepIndex}/${totalFinallySteps}] ▶ Executing finally step: ${step.id} (${step.type})`
950
+ );
919
951
  const promise = this.executeStepWithForeach(step)
920
952
  .then(() => {
921
953
  completedFinallySteps.add(stepId);
922
954
  runningPromises.delete(stepId);
923
- this.logger.log(` ✓ Finally step ${step.id} completed\n`);
955
+ this.logger.log(
956
+ `[${finallyStepIndex}/${totalFinallySteps}] ✓ Finally step ${step.id} completed\n`
957
+ );
924
958
  })
925
959
  .catch((err) => {
926
960
  runningPromises.delete(stepId);
@@ -15,13 +15,15 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
15
15
  - **outputs**: Map of expressions (e.g., `${{ steps.id.output }}`) under the `outputs` key.
16
16
  - **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
17
17
  - **shell**: `{ id, type: 'shell', run, dir, env, transform }`
18
- - **llm**: `{ id, type: 'llm', agent, prompt, schema }`
18
+ - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, mcpServers }`
19
19
  - **workflow**: `{ id, type: 'workflow', path, inputs }`
20
20
  - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
21
21
  - **request**: `{ id, type: 'request', url, method, body, headers }`
22
- - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }`
22
+ - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
23
23
  - **sleep**: `{ id, type: 'sleep', duration }`
24
- - **Common Step Fields**: `needs` (array of IDs), `if` (expression), `retry`, `foreach`, `concurrency`.
24
+ - **script**: `{ id, type: 'script', run }` (Executes JS in a secure sandbox)
25
+ - **Common Step Fields**: `needs` (array of IDs), `if` (expression), `retry`, `foreach`, `concurrency`, `transform`.
26
+ - **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
25
27
  - **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
26
28
 
27
29
  ## Agent Schema (.md)
@@ -38,6 +40,13 @@ Markdown files with YAML frontmatter:
38
40
  - `${{ args.paramName }}` (used inside agent tools)
39
41
  - Standard JS-like expressions: `${{ steps.count > 0 ? 'yes' : 'no' }}`
40
42
 
43
+ # Guidelines
44
+ - **User Interaction**: Use `human` steps when user input or approval is needed.
45
+ - **Error Handling**: Use `retry` for flaky operations and `finally` for cleanup (e.g., removing temp files).
46
+ - **Custom Logic**: Use `script` steps for data manipulation that is too complex for expressions.
47
+ - **Agent Collaboration**: Create specialized agents for complex sub-tasks and coordinate them via `llm` steps.
48
+ - **Discovery**: Use `mcpServers` in `llm` steps when the agent needs access to external tools or systems.
49
+
41
50
  # Output Instructions
42
51
  When asked to design a feature:
43
52
  1. Provide the necessary Keystone files (Workflows and Agents).
@@ -62,3 +62,8 @@ steps:
62
62
  type: sleep
63
63
  duration: 100
64
64
  needs: [api_test]
65
+
66
+ finally:
67
+ - id: cleanup
68
+ type: shell
69
+ run: rm /tmp/keystone-test.txt
@@ -1,6 +1,6 @@
1
- import { describe, expect, it, mock, spyOn } from 'bun:test';
1
+ import { describe, expect, it } from 'bun:test';
2
2
  import type { Workflow } from '../parser/schema';
3
- import { generateMermaidGraph, renderMermaidAsAscii } from './mermaid';
3
+ import { generateMermaidGraph, renderWorkflowAsAscii } from './mermaid';
4
4
 
5
5
  describe('mermaid', () => {
6
6
  it('should generate a mermaid graph from a workflow', () => {
@@ -16,7 +16,7 @@ describe('mermaid', () => {
16
16
  const graph = generateMermaidGraph(workflow);
17
17
  expect(graph).toContain('graph TD');
18
18
  expect(graph).toContain('s1["s1\\n(shell)"]:::shell');
19
- expect(graph).toContain('s2["s2\\n🤖 my-agent"]:::ai');
19
+ expect(graph).toContain('s2["s2\\n🤖 my-agent\\n(llm)"]:::ai');
20
20
  expect(graph).toContain('s3["s3\\n(human)\\n❓ Conditional"]:::human');
21
21
  expect(graph).toContain('s1 --> s2');
22
22
  expect(graph).toContain('s2 --> s3');
@@ -31,45 +31,21 @@ describe('mermaid', () => {
31
31
  expect(graph).toContain('(📚 Loop)');
32
32
  });
33
33
 
34
- it('should render mermaid as ascii', async () => {
35
- const originalFetch = global.fetch;
36
- // @ts-ignore
37
- global.fetch = mock(() =>
38
- Promise.resolve(
39
- new Response('ascii graph', {
40
- status: 200,
41
- })
42
- )
43
- );
44
-
45
- const result = await renderMermaidAsAscii('graph TD\n A --> B');
46
- expect(result).toBe('ascii graph');
47
-
48
- global.fetch = originalFetch;
49
- });
50
-
51
- it('should return null if API returns error', async () => {
52
- const fetchSpy = spyOn(global, 'fetch').mockResolvedValue(
53
- new Response('Error', { status: 500 })
54
- );
55
- const result = await renderMermaidAsAscii('graph TD; A-->B');
56
- expect(result).toBeNull();
57
- fetchSpy.mockRestore();
58
- });
59
-
60
- it('should return null if API returns failure message', async () => {
61
- const fetchSpy = spyOn(global, 'fetch').mockResolvedValue(
62
- new Response('Failed to render diagram', { status: 200 })
63
- );
64
- const result = await renderMermaidAsAscii('graph TD; A-->B');
65
- expect(result).toBeNull();
66
- fetchSpy.mockRestore();
67
- });
34
+ it('should render workflow as ascii', () => {
35
+ const workflow: Workflow = {
36
+ name: 'test',
37
+ steps: [
38
+ { id: 's1', type: 'shell', run: 'echo 1', needs: [] },
39
+ { id: 's2', type: 'llm', agent: 'my-agent', prompt: 'hi', needs: ['s1'] },
40
+ ],
41
+ } as unknown as Workflow;
68
42
 
69
- it('should return null if fetch throws', async () => {
70
- const fetchSpy = spyOn(global, 'fetch').mockRejectedValue(new Error('Network error'));
71
- const result = await renderMermaidAsAscii('graph TD; A-->B');
72
- expect(result).toBeNull();
73
- fetchSpy.mockRestore();
43
+ const ascii = renderWorkflowAsAscii(workflow);
44
+ expect(ascii).toBeDefined();
45
+ expect(ascii).toContain('s1');
46
+ expect(ascii).toContain('s2 (AI: my-agent)');
47
+ expect(ascii).toContain('|');
48
+ expect(ascii).toContain('-');
49
+ expect(ascii).toContain('>');
74
50
  });
75
51
  });
@@ -1,3 +1,4 @@
1
+ import dagre from 'dagre';
1
2
  import type { Workflow } from '../parser/schema';
2
3
 
3
4
  export function generateMermaidGraph(workflow: Workflow): string {
@@ -12,7 +13,7 @@ export function generateMermaidGraph(workflow: Workflow): string {
12
13
  let label = `${step.id}\\n(${step.type})`;
13
14
 
14
15
  // Add specific details based on type
15
- if (step.type === 'llm') label = `${step.id}\\n🤖 ${step.agent}`;
16
+ if (step.type === 'llm') label = `${step.id}\\n🤖 ${step.agent}\\n(${step.type})`;
16
17
  if (step.foreach) label += '\\n(📚 Loop)';
17
18
  if (step.if) label += '\\n❓ Conditional';
18
19
 
@@ -59,29 +60,162 @@ export function generateMermaidGraph(workflow: Workflow): string {
59
60
  }
60
61
 
61
62
  /**
62
- * Renders a Mermaid graph as ASCII using mermaid-ascii.art
63
+ * Renders a workflow as a local ASCII graph using dagre for layout.
63
64
  */
64
- export async function renderMermaidAsAscii(mermaid: string): Promise<string | null> {
65
- try {
66
- const response = await fetch('https://mermaid-ascii.art', {
67
- method: 'POST',
68
- headers: {
69
- 'Content-Type': 'application/x-www-form-urlencoded',
70
- },
71
- body: `mermaid=${encodeURIComponent(mermaid)}`,
72
- });
73
-
74
- if (!response.ok) {
75
- return null;
65
+ export async function renderMermaidAsAscii(_mermaid: string): Promise<string | null> {
66
+ // We no longer use the mermaid string for ASCII, we use the workflow object directly.
67
+ return null;
68
+ }
69
+
70
+ export function renderWorkflowAsAscii(workflow: Workflow): string {
71
+ const g = new dagre.graphlib.Graph();
72
+ g.setGraph({ rankdir: 'LR', nodesep: 2, edgesep: 1, ranksep: 4 });
73
+ g.setDefaultEdgeLabel(() => ({}));
74
+
75
+ const nodeWidth = 24;
76
+ const nodeHeight = 3;
77
+
78
+ for (const step of workflow.steps) {
79
+ let label = `${step.id} (${step.type})`;
80
+ if (step.type === 'llm') label = `${step.id} (AI: ${step.agent})`;
81
+
82
+ if (step.if) label = `IF ${label}`;
83
+ if (step.foreach) label = `LOOP ${label}`;
84
+
85
+ const width = Math.max(nodeWidth, label.length + 4);
86
+ g.setNode(step.id, { label, width, height: nodeHeight });
87
+
88
+ if (step.needs) {
89
+ for (const need of step.needs) {
90
+ g.setEdge(need, step.id);
91
+ }
76
92
  }
93
+ }
94
+
95
+ dagre.layout(g);
96
+
97
+ // Canvas dimensions
98
+ let minX = Number.POSITIVE_INFINITY;
99
+ let minY = Number.POSITIVE_INFINITY;
100
+ let maxX = Number.NEGATIVE_INFINITY;
101
+ let maxY = Number.NEGATIVE_INFINITY;
102
+
103
+ for (const v of g.nodes()) {
104
+ const node = g.node(v);
105
+ minX = Math.min(minX, node.x - node.width / 2);
106
+ minY = Math.min(minY, node.y - node.height / 2);
107
+ maxX = Math.max(maxX, node.x + node.width / 2);
108
+ maxY = Math.max(maxY, node.y + node.height / 2);
109
+ }
77
110
 
78
- const ascii = await response.text();
79
- if (ascii.includes('Failed to render diagram')) {
80
- return null;
111
+ for (const e of g.edges()) {
112
+ const edge = g.edge(e);
113
+ for (const p of edge.points) {
114
+ minX = Math.min(minX, p.x);
115
+ minY = Math.min(minY, p.y);
116
+ maxX = Math.max(maxX, p.x);
117
+ maxY = Math.max(maxY, p.y);
81
118
  }
119
+ }
120
+
121
+ const canvasWidth = Math.ceil(maxX - minX) + 10;
122
+ const canvasHeight = Math.ceil(maxY - minY) + 4;
123
+ const canvas = Array.from({ length: canvasHeight }, () => Array(canvasWidth).fill(' '));
82
124
 
83
- return ascii;
84
- } catch {
85
- return null;
125
+ const offsetX = Math.floor(-minX) + 2;
126
+ const offsetY = Math.floor(-minY) + 1;
127
+
128
+ // Helper to draw at coordinates
129
+ const draw = (x: number, y: number, char: string) => {
130
+ const ix = Math.floor(x) + offsetX;
131
+ const iy = Math.floor(y) + offsetY;
132
+ if (iy >= 0 && iy < canvas.length && ix >= 0 && ix < canvas[0].length) {
133
+ canvas[iy][ix] = char;
134
+ }
135
+ };
136
+
137
+ const drawText = (x: number, y: number, text: string) => {
138
+ const startX = Math.floor(x);
139
+ const startY = Math.floor(y);
140
+ for (let i = 0; i < text.length; i++) {
141
+ draw(startX + i, startY, text[i]);
142
+ }
143
+ };
144
+
145
+ // Draw Nodes
146
+ for (const v of g.nodes()) {
147
+ const node = g.node(v);
148
+ const x = node.x - node.width / 2;
149
+ const y = node.y - node.height / 2;
150
+ const w = node.width;
151
+ const h = node.height;
152
+
153
+ const startX = Math.floor(x);
154
+ const startY = Math.floor(y);
155
+ const endX = startX + Math.floor(w) - 1;
156
+ const endY = startY + Math.floor(h) - 1;
157
+
158
+ for (let i = startX; i <= endX; i++) {
159
+ draw(i, startY, '-');
160
+ draw(i, endY, '-');
161
+ }
162
+ for (let i = startY; i <= endY; i++) {
163
+ draw(startX, i, '|');
164
+ draw(endX, i, '|');
165
+ }
166
+ draw(startX, startY, '+');
167
+ draw(endX, startY, '+');
168
+ draw(startX, endY, '+');
169
+ draw(endX, endY, '+');
170
+
171
+ const labelX = x + Math.floor((w - (node.label?.length || 0)) / 2);
172
+ const labelY = y + Math.floor(h / 2);
173
+ drawText(labelX, labelY, node.label || '');
174
+ }
175
+
176
+ // Draw Edges
177
+ for (const e of g.edges()) {
178
+ const edge = g.edge(e);
179
+ const points = edge.points;
180
+
181
+ for (let i = 0; i < points.length - 1; i++) {
182
+ const p1 = points[i];
183
+ const p2 = points[i + 1];
184
+
185
+ const x1 = Math.floor(p1.x);
186
+ const y1 = Math.floor(p1.y);
187
+ const x2 = Math.floor(p2.x);
188
+ const y2 = Math.floor(p2.y);
189
+
190
+ if (x1 === x2) {
191
+ for (let y = Math.min(y1, y2); y <= Math.max(y1, y2); y++) draw(x1, y, '|');
192
+ } else if (y1 === y2) {
193
+ for (let x = Math.min(x1, x2); x <= Math.max(x1, x2); x++) draw(x, y1, '-');
194
+ } else {
195
+ const xStep = x2 > x1 ? 1 : -1;
196
+ const yStep = y2 > y1 ? 1 : -1;
197
+
198
+ if (x1 !== x2) {
199
+ for (let x = x1; x !== x2; x += xStep) {
200
+ draw(x, y1, '-');
201
+ }
202
+ draw(x2, y1, '+');
203
+ }
204
+ if (y1 !== y2) {
205
+ for (let y = y1 + yStep; y !== y2; y += yStep) {
206
+ draw(x2, y, '|');
207
+ }
208
+ }
209
+ }
210
+ }
211
+
212
+ const lastPoint = points[points.length - 1];
213
+ const prevPoint = points[points.length - 2];
214
+ if (lastPoint.x > prevPoint.x) draw(lastPoint.x, lastPoint.y, '>');
215
+ else if (lastPoint.x < prevPoint.x) draw(lastPoint.x, lastPoint.y, '<');
216
+ else if (lastPoint.y > prevPoint.y) draw(lastPoint.x, lastPoint.y, 'v');
217
+ else if (lastPoint.y < prevPoint.y) draw(lastPoint.x, lastPoint.y, '^');
86
218
  }
219
+
220
+ return canvas.map((row) => row.join('').trimEnd()).join('\n');
87
221
  }
@@ -63,4 +63,10 @@ describe('Redactor', () => {
63
63
  const text = 'a and 12 are safe, but abc is a secret';
64
64
  expect(shortRedactor.redact(text)).toBe('a and 12 are safe, but ***REDACTED*** is a secret');
65
65
  });
66
+
67
+ it('should not redact substrings of larger words when using alphanumeric secrets', () => {
68
+ const wordRedactor = new Redactor({ USER: 'mark' });
69
+ const text = 'mark went to the marketplace';
70
+ expect(wordRedactor.redact(text)).toBe('***REDACTED*** went to the marketplace');
71
+ });
66
72
  });
@@ -30,7 +30,16 @@ export class Redactor {
30
30
  // Use a global replace to handle multiple occurrences
31
31
  // Escape special regex characters in the secret
32
32
  const escaped = secret.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
33
- redacted = redacted.replace(new RegExp(escaped, 'g'), '***REDACTED***');
33
+
34
+ // Use word boundaries if the secret starts/ends with an alphanumeric character
35
+ // to avoid partial matches (e.g. redacting 'mark' in 'marketplace')
36
+ const startBoundary = /^\w/.test(secret) ? '\\b' : '';
37
+ const endBoundary = /\w$/.test(secret) ? '\\b' : '';
38
+
39
+ redacted = redacted.replace(
40
+ new RegExp(`${startBoundary}${escaped}${endBoundary}`, 'g'),
41
+ '***REDACTED***'
42
+ );
34
43
  }
35
44
  return redacted;
36
45
  }
@@ -0,0 +1,29 @@
1
+ import { describe, expect, it } from 'bun:test';
2
+ import { SafeSandbox } from './sandbox';
3
+
4
+ describe('SafeSandbox', () => {
5
+ it('should execute basic arithmetic', async () => {
6
+ const result = await SafeSandbox.execute('1 + 2');
7
+ expect(result).toBe(3);
8
+ });
9
+
10
+ it('should have access to context variables', async () => {
11
+ const result = await SafeSandbox.execute('a + b', { a: 10, b: 20 });
12
+ expect(result).toBe(30);
13
+ });
14
+
15
+ it('should not have access to Node.js globals', async () => {
16
+ const result = await SafeSandbox.execute('typeof process');
17
+ expect(result).toBe('undefined');
18
+ });
19
+
20
+ it('should handle object results', async () => {
21
+ const result = await SafeSandbox.execute('({ x: 1, y: 2 })');
22
+ expect(result).toEqual({ x: 1, y: 2 });
23
+ });
24
+
25
+ it('should respect timeouts', async () => {
26
+ const promise = SafeSandbox.execute('while(true) {}', {}, { timeout: 100 });
27
+ await expect(promise).rejects.toThrow();
28
+ });
29
+ });
@@ -0,0 +1,61 @@
1
+ import * as vm from 'node:vm';
2
+
3
+ export interface SandboxOptions {
4
+ timeout?: number;
5
+ memoryLimit?: number;
6
+ }
7
+
8
+ export class SafeSandbox {
9
+ /**
10
+ * Execute a script in a secure sandbox
11
+ */
12
+ static async execute(
13
+ code: string,
14
+ context: Record<string, unknown> = {},
15
+ options: SandboxOptions = {}
16
+ ): Promise<unknown> {
17
+ try {
18
+ // Try to use isolated-vm if available (dynamic import)
19
+ // Note: This will likely fail on Bun as it expects V8 host symbols
20
+ const ivm = await import('isolated-vm').then((m) => m.default || m).catch(() => null);
21
+
22
+ if (ivm && typeof ivm.Isolate === 'function') {
23
+ const isolate = new ivm.Isolate({ memoryLimit: options.memoryLimit || 128 });
24
+ try {
25
+ const contextInstance = await isolate.createContext();
26
+ const jail = contextInstance.global;
27
+
28
+ // Set up global context
29
+ await jail.set('global', jail.derefInto());
30
+
31
+ // Inject context variables
32
+ for (const [key, value] of Object.entries(context)) {
33
+ // Only copy non-undefined values
34
+ if (value !== undefined) {
35
+ await jail.set(key, new ivm.ExternalCopy(value).copyInto());
36
+ }
37
+ }
38
+
39
+ const script = await isolate.compileScript(code);
40
+ const result = await script.run(contextInstance, { timeout: options.timeout || 5000 });
41
+
42
+ if (result && typeof result === 'object' && result instanceof ivm.Reference) {
43
+ return await result.copy();
44
+ }
45
+ return result;
46
+ } finally {
47
+ isolate.dispose();
48
+ }
49
+ }
50
+ } catch (e) {
51
+ // Fallback to node:vm if isolated-vm fails to load or run
52
+ }
53
+
54
+ // Fallback implementation using node:vm (built-in)
55
+ const sandbox = { ...context };
56
+ return vm.runInNewContext(code, sandbox, {
57
+ timeout: options.timeout || 5000,
58
+ displayErrors: true,
59
+ });
60
+ }
61
+ }