keystone-cli 0.3.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -259,6 +259,7 @@ Keystone supports several specialized step types:
259
259
 
260
260
  - `shell`: Run arbitrary shell commands.
261
261
  - `llm`: Prompt an agent and get structured or unstructured responses. Supports `schema` (JSON Schema) for structured output.
262
+ - `allowClarification`: Boolean (default `false`). If `true`, allows the LLM to ask clarifying questions back to the user or suspend the workflow if no human is available.
262
263
  - `request`: Make HTTP requests (GET, POST, etc.).
263
264
  - `file`: Read, write, or append to files.
264
265
  - `human`: Pause execution for manual confirmation or text input.
@@ -331,7 +332,23 @@ Keystone can itself act as an MCP server, allowing other agents (like Claude Des
331
332
  keystone mcp start
332
333
  ```
333
334
 
334
- > **Note:** Workflow execution via the Keystone MCP server is synchronous. This provides a better experience for agents as they receive the final results directly, though it means the connection remains open for the duration of the workflow run.
335
+ #### Sync vs Async Execution
336
+
337
+ The MCP server provides two modes for running workflows:
338
+
339
+ | Tool | Mode | Use Case |
340
+ |------|------|----------|
341
+ | `run_workflow` | **Sync** | Short workflows. Blocks until completion, returns outputs directly. |
342
+ | `start_workflow` | **Async** | Long workflows. Returns immediately with a `run_id`, use `get_run_status` to poll. |
343
+
344
+ **Example: Async execution pattern**
345
+ ```
346
+ 1. Agent calls start_workflow → { run_id: "abc", status: "running" }
347
+ 2. Agent polls get_run_status → { status: "running" }
348
+ 3. Agent polls get_run_status → { status: "completed", outputs: {...} }
349
+ ```
350
+
351
+ The async pattern is ideal for LLM-heavy workflows that may take minutes to complete.
335
352
 
336
353
  #### Global MCP Servers
337
354
  Define shared MCP servers in `.keystone/config.yaml` to reuse them across different workflows. Keystone ensures that multiple steps using the same global server will share a single running process.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "keystone-cli",
3
- "version": "0.3.2",
3
+ "version": "0.4.1",
4
4
  "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  import { Database } from 'bun:sqlite';
2
2
 
3
3
  export type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'paused';
4
- export type StepStatus = 'pending' | 'running' | 'success' | 'failed' | 'skipped';
4
+ export type StepStatus = 'pending' | 'running' | 'success' | 'failed' | 'skipped' | 'suspended';
5
5
 
6
6
  export interface WorkflowRun {
7
7
  id: string;
@@ -25,18 +25,35 @@ export interface StepExecution {
25
25
  started_at: string | null;
26
26
  completed_at: string | null;
27
27
  retry_count: number;
28
+ usage: string | null; // JSON
28
29
  }
29
30
 
30
31
  export class WorkflowDb {
31
32
  private db: Database;
32
33
 
33
- constructor(dbPath = '.keystone/state.db') {
34
+ constructor(public readonly dbPath = '.keystone/state.db') {
34
35
  this.db = new Database(dbPath, { create: true });
35
36
  this.db.exec('PRAGMA journal_mode = WAL;'); // Write-ahead logging
36
37
  this.db.exec('PRAGMA foreign_keys = ON;'); // Enable foreign key enforcement
37
38
  this.initSchema();
38
39
  }
39
40
 
41
+ /**
42
+ * Type guard to check if an error is a SQLite busy error
43
+ */
44
+ private isSQLiteBusyError(error: unknown): boolean {
45
+ if (typeof error === 'object' && error !== null) {
46
+ const err = error as { code?: string | number; message?: string };
47
+ return (
48
+ err.code === 'SQLITE_BUSY' ||
49
+ err.code === 5 ||
50
+ (typeof err.message === 'string' &&
51
+ (err.message.includes('SQLITE_BUSY') || err.message.includes('database is locked')))
52
+ );
53
+ }
54
+ return false;
55
+ }
56
+
40
57
  /**
41
58
  * Retry wrapper for SQLite operations that may encounter SQLITE_BUSY errors
42
59
  * during high concurrency scenarios (e.g., foreach loops)
@@ -49,9 +66,8 @@ export class WorkflowDb {
49
66
  return operation();
50
67
  } catch (error) {
51
68
  // Check if this is a SQLITE_BUSY error
52
- const errorMsg = error instanceof Error ? error.message : String(error);
53
- if (errorMsg.includes('SQLITE_BUSY') || errorMsg.includes('database is locked')) {
54
- lastError = error instanceof Error ? error : new Error(errorMsg);
69
+ if (this.isSQLiteBusyError(error)) {
70
+ lastError = error instanceof Error ? error : new Error(String(error));
55
71
  // Exponential backoff: 10ms, 20ms, 40ms, 80ms, 160ms
56
72
  const delayMs = 10 * 2 ** attempt;
57
73
  await Bun.sleep(delayMs);
@@ -89,6 +105,7 @@ export class WorkflowDb {
89
105
  started_at TEXT,
90
106
  completed_at TEXT,
91
107
  retry_count INTEGER DEFAULT 0,
108
+ usage TEXT,
92
109
  FOREIGN KEY (run_id) REFERENCES workflow_runs(id) ON DELETE CASCADE
93
110
  );
94
111
 
@@ -204,12 +221,13 @@ export class WorkflowDb {
204
221
  id: string,
205
222
  status: StepStatus,
206
223
  output?: unknown,
207
- error?: string
224
+ error?: string,
225
+ usage?: unknown
208
226
  ): Promise<void> {
209
227
  await this.withRetry(() => {
210
228
  const stmt = this.db.prepare(`
211
229
  UPDATE step_executions
212
- SET status = ?, output = ?, error = ?, completed_at = ?
230
+ SET status = ?, output = ?, error = ?, completed_at = ?, usage = ?
213
231
  WHERE id = ?
214
232
  `);
215
233
  stmt.run(
@@ -217,6 +235,7 @@ export class WorkflowDb {
217
235
  output ? JSON.stringify(output) : null,
218
236
  error || null,
219
237
  new Date().toISOString(),
238
+ usage ? JSON.stringify(usage) : null,
220
239
  id
221
240
  );
222
241
  });
@@ -31,6 +31,7 @@ export interface ExpressionContext {
31
31
  item?: unknown;
32
32
  index?: number;
33
33
  env?: Record<string, string>;
34
+ output?: unknown;
34
35
  }
35
36
 
36
37
  type ASTNode = jsep.Expression;
@@ -1,16 +1,19 @@
1
- import { afterAll, describe, expect, it, spyOn } from 'bun:test';
1
+ import { afterAll, beforeAll, describe, expect, it, spyOn } from 'bun:test';
2
2
  import { mkdirSync, rmSync, writeFileSync } from 'node:fs';
3
3
  import * as os from 'node:os';
4
4
  import { join } from 'node:path';
5
5
  import { parseAgent, resolveAgentPath } from './agent-parser';
6
6
 
7
7
  describe('agent-parser', () => {
8
- const tempDir = join(process.cwd(), 'temp-test-agents');
8
+ // Use a unique temp directory with random suffix to prevent parallel test collisions
9
+ const tempDir = join(
10
+ process.cwd(),
11
+ `temp-test-agents-${Date.now()}-${Math.random().toString(36).slice(2)}`
12
+ );
9
13
 
10
- // Setup temp directory
11
- try {
14
+ beforeAll(() => {
12
15
  mkdirSync(tempDir, { recursive: true });
13
- } catch (e) {}
16
+ });
14
17
 
15
18
  afterAll(() => {
16
19
  try {
@@ -21,7 +21,7 @@ const RetrySchema = z.object({
21
21
  const BaseStepSchema = z.object({
22
22
  id: z.string(),
23
23
  type: z.string(),
24
- needs: z.array(z.string()).default([]),
24
+ needs: z.array(z.string()).optional().default([]),
25
25
  if: z.string().optional(),
26
26
  timeout: z.number().int().positive().optional(),
27
27
  retry: RetrySchema.optional(),
@@ -58,15 +58,19 @@ const LlmStepSchema = BaseStepSchema.extend({
58
58
  tools: z.array(AgentToolSchema).optional(),
59
59
  maxIterations: z.number().int().positive().default(10),
60
60
  useGlobalMcp: z.boolean().optional(),
61
+ allowClarification: z.boolean().optional(),
61
62
  mcpServers: z
62
63
  .array(
63
64
  z.union([
64
65
  z.string(),
65
66
  z.object({
66
67
  name: z.string(),
67
- command: z.string(),
68
+ type: z.enum(['local', 'remote']).optional(),
69
+ command: z.string().optional(),
68
70
  args: z.array(z.string()).optional(),
69
71
  env: z.record(z.string()).optional(),
72
+ url: z.string().optional(),
73
+ headers: z.record(z.string()).optional(),
70
74
  }),
71
75
  ])
72
76
  )
@@ -108,6 +112,7 @@ const SleepStepSchema = BaseStepSchema.extend({
108
112
  const ScriptStepSchema = BaseStepSchema.extend({
109
113
  type: z.literal('script'),
110
114
  run: z.string(),
115
+ allowInsecure: z.boolean().optional().default(false),
111
116
  });
112
117
 
113
118
  // ===== Discriminated Union for Steps =====
@@ -134,6 +139,7 @@ export const WorkflowSchema = z.object({
134
139
  inputs: z.record(InputSchema).optional(),
135
140
  outputs: z.record(z.string()).optional(),
136
141
  env: z.record(z.string()).optional(),
142
+ concurrency: z.union([z.number().int().positive(), z.string()]).optional(),
137
143
  steps: z.array(StepSchema),
138
144
  finally: z.array(StepSchema).optional(),
139
145
  });
@@ -0,0 +1,106 @@
1
+ import { describe, expect, it, mock, spyOn } from 'bun:test';
2
+ import { Redactor } from '../utils/redactor';
3
+ import { SafeSandbox } from '../utils/sandbox';
4
+ import { MCPManager } from './mcp-manager';
5
+
6
+ // Type for accessing private methods in tests
7
+ type MCPManagerPrivate = {
8
+ getServerKey(config: {
9
+ name: string;
10
+ type?: 'local' | 'remote';
11
+ command?: string;
12
+ args?: string[];
13
+ url?: string;
14
+ }): string;
15
+ };
16
+
17
+ describe('Audit Fixes Verification', () => {
18
+ describe('Secret Redaction', () => {
19
+ it('should redact secrets in text', () => {
20
+ const secrets = { MY_SECRET: 'super-secret-value' };
21
+ const redactor = new Redactor(secrets);
22
+
23
+ const input = 'This contains super-secret-value in the text.';
24
+ const result = redactor.redact(input);
25
+
26
+ expect(result).toContain('***REDACTED***');
27
+ expect(result).not.toContain('super-secret-value');
28
+ });
29
+
30
+ it('should handle partial matches correctly', () => {
31
+ const secrets = { MY_SECRET: 'abc123' };
32
+ const redactor = new Redactor(secrets);
33
+
34
+ const input = 'The value abc123 should be redacted.';
35
+ const result = redactor.redact(input);
36
+
37
+ expect(result).toContain('***REDACTED***');
38
+ expect(result).not.toContain('abc123');
39
+ });
40
+ });
41
+
42
+ describe('Sandbox Security', () => {
43
+ it('should throw by default if isolated-vm is missing and insecure fallback is disabled', async () => {
44
+ const code = '1 + 1';
45
+ expect(SafeSandbox.execute(code, {}, { allowInsecureFallback: false })).rejects.toThrow(
46
+ /secure sandbox failed/
47
+ );
48
+ });
49
+
50
+ it('should allow execution if allowInsecureFallback is true', async () => {
51
+ const code = '1 + 1';
52
+ const result = await SafeSandbox.execute(code, {}, { allowInsecureFallback: true });
53
+ expect(result).toBe(2);
54
+ });
55
+ });
56
+
57
+ describe('MCP Client Uniqueness', () => {
58
+ it('should generate unique keys for different ad-hoc configs with same name', async () => {
59
+ const manager = new MCPManager();
60
+
61
+ const config1 = {
62
+ name: 'test-server',
63
+ type: 'local' as const,
64
+ command: 'echo',
65
+ args: ['hello'],
66
+ };
67
+
68
+ const config2 = {
69
+ name: 'test-server',
70
+ type: 'local' as const,
71
+ command: 'echo',
72
+ args: ['world'],
73
+ };
74
+
75
+ const key1 = (manager as unknown as MCPManagerPrivate).getServerKey(config1);
76
+ const key2 = (manager as unknown as MCPManagerPrivate).getServerKey(config2);
77
+
78
+ expect(key1).not.toBe(key2);
79
+ expect(key1).toContain('hello');
80
+ expect(key2).toContain('world');
81
+ });
82
+
83
+ it('should generate unique keys for remote servers', async () => {
84
+ const manager = new MCPManager();
85
+
86
+ const config1 = {
87
+ name: 'remote-server',
88
+ type: 'remote' as const,
89
+ url: 'https://api1.example.com',
90
+ };
91
+
92
+ const config2 = {
93
+ name: 'remote-server',
94
+ type: 'remote' as const,
95
+ url: 'https://api2.example.com',
96
+ };
97
+
98
+ const key1 = (manager as unknown as MCPManagerPrivate).getServerKey(config1);
99
+ const key2 = (manager as unknown as MCPManagerPrivate).getServerKey(config2);
100
+
101
+ expect(key1).not.toBe(key2);
102
+ expect(key1).toContain('api1');
103
+ expect(key2).toContain('api2');
104
+ });
105
+ });
106
+ });
@@ -39,7 +39,11 @@ export interface LLMTool {
39
39
  export interface LLMAdapter {
40
40
  chat(
41
41
  messages: LLMMessage[],
42
- options?: { model?: string; tools?: LLMTool[] }
42
+ options?: {
43
+ model?: string;
44
+ tools?: LLMTool[];
45
+ onStream?: (chunk: string) => void;
46
+ }
43
47
  ): Promise<LLMResponse>;
44
48
  }
45
49
 
@@ -58,8 +62,14 @@ export class OpenAIAdapter implements LLMAdapter {
58
62
 
59
63
  async chat(
60
64
  messages: LLMMessage[],
61
- options?: { model?: string; tools?: LLMTool[] }
65
+ options?: {
66
+ model?: string;
67
+ tools?: LLMTool[];
68
+ onStream?: (chunk: string) => void;
69
+ }
62
70
  ): Promise<LLMResponse> {
71
+ const isStreaming = !!options?.onStream;
72
+
63
73
  const response = await fetch(`${this.baseUrl}/chat/completions`, {
64
74
  method: 'POST',
65
75
  headers: {
@@ -70,6 +80,7 @@ export class OpenAIAdapter implements LLMAdapter {
70
80
  model: options?.model || 'gpt-4o',
71
81
  messages,
72
82
  tools: options?.tools,
83
+ stream: isStreaming,
73
84
  }),
74
85
  });
75
86
 
@@ -78,6 +89,62 @@ export class OpenAIAdapter implements LLMAdapter {
78
89
  throw new Error(`OpenAI API error: ${response.status} ${response.statusText} - ${error}`);
79
90
  }
80
91
 
92
+ if (isStreaming) {
93
+ if (!response.body) throw new Error('Response body is null');
94
+ const reader = response.body.getReader();
95
+ const decoder = new TextDecoder();
96
+ let fullContent = '';
97
+ const toolCalls: LLMToolCall[] = [];
98
+
99
+ while (true) {
100
+ const { done, value } = await reader.read();
101
+ if (done) break;
102
+
103
+ const chunk = decoder.decode(value);
104
+ const lines = chunk.split('\n').filter((line) => line.trim() !== '');
105
+
106
+ for (const line of lines) {
107
+ if (line.includes('[DONE]')) continue;
108
+ if (!line.startsWith('data: ')) continue;
109
+
110
+ try {
111
+ const data = JSON.parse(line.slice(6));
112
+ const delta = data.choices[0].delta;
113
+
114
+ if (delta.content) {
115
+ fullContent += delta.content;
116
+ options.onStream?.(delta.content);
117
+ }
118
+
119
+ if (delta.tool_calls) {
120
+ for (const tc of delta.tool_calls) {
121
+ if (!toolCalls[tc.index]) {
122
+ toolCalls[tc.index] = {
123
+ id: tc.id,
124
+ type: 'function',
125
+ function: { name: '', arguments: '' },
126
+ };
127
+ }
128
+ const existing = toolCalls[tc.index];
129
+ if (tc.function?.name) existing.function.name += tc.function.name;
130
+ if (tc.function?.arguments) existing.function.arguments += tc.function.arguments;
131
+ }
132
+ }
133
+ } catch (e) {
134
+ // Ignore parse errors for incomplete chunks
135
+ }
136
+ }
137
+ }
138
+
139
+ return {
140
+ message: {
141
+ role: 'assistant',
142
+ content: fullContent || null,
143
+ tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
144
+ },
145
+ };
146
+ }
147
+
81
148
  const data = (await response.json()) as {
82
149
  choices: { message: LLMMessage }[];
83
150
  usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
@@ -104,8 +171,13 @@ export class AnthropicAdapter implements LLMAdapter {
104
171
 
105
172
  async chat(
106
173
  messages: LLMMessage[],
107
- options?: { model?: string; tools?: LLMTool[] }
174
+ options?: {
175
+ model?: string;
176
+ tools?: LLMTool[];
177
+ onStream?: (chunk: string) => void;
178
+ }
108
179
  ): Promise<LLMResponse> {
180
+ const isStreaming = !!options?.onStream;
109
181
  const system = messages.find((m) => m.role === 'system')?.content || undefined;
110
182
 
111
183
  // Anthropic requires alternating user/assistant roles.
@@ -201,6 +273,7 @@ export class AnthropicAdapter implements LLMAdapter {
201
273
  messages: anthropicMessages,
202
274
  tools: anthropicTools,
203
275
  max_tokens: 4096,
276
+ stream: isStreaming,
204
277
  }),
205
278
  });
206
279
 
@@ -209,6 +282,61 @@ export class AnthropicAdapter implements LLMAdapter {
209
282
  throw new Error(`Anthropic API error: ${response.status} ${response.statusText} - ${error}`);
210
283
  }
211
284
 
285
+ if (isStreaming) {
286
+ if (!response.body) throw new Error('Response body is null');
287
+ const reader = response.body.getReader();
288
+ const decoder = new TextDecoder();
289
+ let fullContent = '';
290
+ const toolCalls: { id: string; name: string; inputString: string }[] = [];
291
+
292
+ while (true) {
293
+ const { done, value } = await reader.read();
294
+ if (done) break;
295
+
296
+ const chunk = decoder.decode(value);
297
+ const lines = chunk.split('\n').filter((line) => line.trim() !== '');
298
+
299
+ for (const line of lines) {
300
+ if (!line.startsWith('data: ')) continue;
301
+
302
+ try {
303
+ const data = JSON.parse(line.slice(6));
304
+ if (data.type === 'content_block_delta' && data.delta?.text) {
305
+ fullContent += data.delta.text;
306
+ options.onStream?.(data.delta.text);
307
+ }
308
+
309
+ if (data.type === 'content_block_start' && data.content_block?.type === 'tool_use') {
310
+ toolCalls.push({
311
+ id: data.content_block.id,
312
+ name: data.content_block.name,
313
+ inputString: '',
314
+ });
315
+ }
316
+
317
+ if (data.type === 'tool_use_delta' && data.delta?.partial_json) {
318
+ const lastTool = toolCalls[toolCalls.length - 1];
319
+ if (lastTool) lastTool.inputString += data.delta.partial_json;
320
+ }
321
+ } catch (e) {
322
+ // Ignore parse errors
323
+ }
324
+ }
325
+ }
326
+
327
+ return {
328
+ message: {
329
+ role: 'assistant',
330
+ content: fullContent || null,
331
+ tool_calls: toolCalls.map((tc) => ({
332
+ id: tc.id,
333
+ type: 'function',
334
+ function: { name: tc.name, arguments: tc.inputString },
335
+ })),
336
+ },
337
+ };
338
+ }
339
+
212
340
  const data = (await response.json()) as {
213
341
  content: {
214
342
  type: 'text' | 'tool_use';
@@ -256,8 +384,13 @@ export class CopilotAdapter implements LLMAdapter {
256
384
 
257
385
  async chat(
258
386
  messages: LLMMessage[],
259
- options?: { model?: string; tools?: LLMTool[] }
387
+ options?: {
388
+ model?: string;
389
+ tools?: LLMTool[];
390
+ onStream?: (chunk: string) => void;
391
+ }
260
392
  ): Promise<LLMResponse> {
393
+ const isStreaming = !!options?.onStream;
261
394
  const token = await AuthManager.getCopilotToken();
262
395
  if (!token) {
263
396
  throw new Error('GitHub Copilot token not found. Please run "keystone auth login" first.');
@@ -276,6 +409,7 @@ export class CopilotAdapter implements LLMAdapter {
276
409
  model: options?.model || 'gpt-4o',
277
410
  messages,
278
411
  tools: options?.tools,
412
+ stream: isStreaming,
279
413
  }),
280
414
  });
281
415
 
@@ -284,6 +418,64 @@ export class CopilotAdapter implements LLMAdapter {
284
418
  throw new Error(`Copilot API error: ${response.status} ${response.statusText} - ${error}`);
285
419
  }
286
420
 
421
+ if (isStreaming) {
422
+ // Use the same streaming logic as OpenAIAdapter since Copilot uses OpenAI API
423
+ if (!response.body) throw new Error('Response body is null');
424
+ const reader = response.body.getReader();
425
+ const decoder = new TextDecoder();
426
+ let fullContent = '';
427
+ const toolCalls: LLMToolCall[] = [];
428
+
429
+ while (true) {
430
+ const { done, value } = await reader.read();
431
+ if (done) break;
432
+
433
+ const chunk = decoder.decode(value);
434
+ const lines = chunk.split('\n').filter((line) => line.trim() !== '');
435
+
436
+ for (const line of lines) {
437
+ if (line.includes('[DONE]')) continue;
438
+ if (!line.startsWith('data: ')) continue;
439
+
440
+ try {
441
+ const data = JSON.parse(line.slice(6));
442
+ if (!data.choices?.[0]?.delta) continue;
443
+ const delta = data.choices[0].delta;
444
+
445
+ if (delta.content) {
446
+ fullContent += delta.content;
447
+ options.onStream?.(delta.content);
448
+ }
449
+
450
+ if (delta.tool_calls) {
451
+ for (const tc of delta.tool_calls) {
452
+ if (!toolCalls[tc.index]) {
453
+ toolCalls[tc.index] = {
454
+ id: tc.id,
455
+ type: 'function',
456
+ function: { name: '', arguments: '' },
457
+ };
458
+ }
459
+ const existing = toolCalls[tc.index];
460
+ if (tc.function?.name) existing.function.name += tc.function.name;
461
+ if (tc.function?.arguments) existing.function.arguments += tc.function.arguments;
462
+ }
463
+ }
464
+ } catch (e) {
465
+ // Ignore parse errors
466
+ }
467
+ }
468
+ }
469
+
470
+ return {
471
+ message: {
472
+ role: 'assistant',
473
+ content: fullContent || null,
474
+ tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
475
+ },
476
+ };
477
+ }
478
+
287
479
  const data = (await response.json()) as {
288
480
  choices: { message: LLMMessage }[];
289
481
  usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };