keystone-cli 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ import { afterAll, beforeAll, describe, expect, test } from 'bun:test';
2
+ import { AUTO_LOAD_SECRET_PREFIXES } from '../../utils/env-constants';
3
+ import { SecretManager } from '../services/secret-manager';
4
+ import { detectShellInjectionRisk } from './shell-executor';
5
+
6
+ describe('Security Fixes', () => {
7
+ describe('ShellExecutor Command Injection', () => {
8
+ test('detectShellInjectionRisk should block newlines', () => {
9
+ // Regular space should be allowed
10
+ expect(detectShellInjectionRisk('echo hello')).toBe(false);
11
+
12
+ // Newline characters should be detected as risk
13
+ expect(detectShellInjectionRisk('echo hello\n')).toBe(true);
14
+ expect(detectShellInjectionRisk('echo hello\r')).toBe(true);
15
+ expect(detectShellInjectionRisk('echo hello\nrm -rf /')).toBe(true);
16
+
17
+ // Standard allowed characters should still pass
18
+ expect(detectShellInjectionRisk('curl -X POST https://example.com/api')).toBe(false);
19
+ expect(detectShellInjectionRisk('file_name-v1.0+beta.txt')).toBe(false);
20
+ });
21
+
22
+ test('detectShellInjectionRisk should correctly handle quotes', () => {
23
+ // Content inside single quotes is considered safe (literal)
24
+ expect(detectShellInjectionRisk("echo 'safe string with ; and |'")).toBe(false);
25
+
26
+ // But unsafe chars outside quotes must be caught
27
+ expect(detectShellInjectionRisk("echo 'safe'; rm -rf /")).toBe(true);
28
+ });
29
+ });
30
+
31
+ describe('SecretManager Auto-loading', () => {
32
+ // Save original env to restore later
33
+ const originalEnv = { ...Bun.env };
34
+
35
+ afterAll(() => {
36
+ // Restore env
37
+ for (const key of Object.keys(Bun.env)) {
38
+ delete Bun.env[key];
39
+ }
40
+ for (const [key, value] of Object.entries(originalEnv)) {
41
+ if (value) Bun.env[key] = value;
42
+ }
43
+ });
44
+
45
+ test('should only load secrets with allowed prefixes', () => {
46
+ // Setup test env vars
47
+ Bun.env.KEYSTONE_TEST_SECRET = 'secret-value-1';
48
+ Bun.env.GITHUB_TOKEN = 'gh-token-123';
49
+ Bun.env.MY_RANDOM_TOKEN = 'unsafe-token-should-not-load';
50
+ Bun.env.SOME_API_KEY = 'unsafe-api-key';
51
+
52
+ const secretManager = new SecretManager();
53
+ const secrets = secretManager.loadSecrets();
54
+
55
+ expect(secrets.KEYSTONE_TEST_SECRET).toBe('secret-value-1');
56
+ expect(secrets.GITHUB_TOKEN).toBe('gh-token-123');
57
+
58
+ // Should NOT load these even though they contain "TOKEN" or "KEY"
59
+ expect(secrets.MY_RANDOM_TOKEN).toBeUndefined();
60
+ expect(secrets.SOME_API_KEY).toBeUndefined();
61
+ });
62
+
63
+ test('env constants should match implementation', () => {
64
+ // Quick sanity check that our test logic matches the constants
65
+ expect(AUTO_LOAD_SECRET_PREFIXES).toContain('KEYSTONE_');
66
+ expect(AUTO_LOAD_SECRET_PREFIXES).toContain('GITHUB_');
67
+ });
68
+ });
69
+ });
@@ -43,6 +43,9 @@ export async function executeShellStep(
43
43
  abortSignal?: AbortSignal
44
44
  ): Promise<StepResult> {
45
45
  if (step.args) {
46
+ // args are inherently safe from shell injection as they skip the shell
47
+ // and pass the array directly to the OS via Bun.spawn.
48
+
46
49
  const command = step.args.map((a) => ExpressionEvaluator.evaluateString(a, context)).join(' ');
47
50
  if (dryRun) {
48
51
  logger.log(`[DRY RUN] Would execute: ${command}`);
@@ -200,13 +203,35 @@ async function readStreamWithLimit(
200
203
  }
201
204
 
202
205
  // Whitelist of allowed characters for secure shell command execution
203
- // Allows: Alphanumeric, whitespace, and common safe punctuation (_ . / : @ , + - = ' " !)
204
- // Blocks: Backslashes, pipes, redirects, subshells, variables ($), etc.
205
- const SAFE_SHELL_CHARS = /^[a-zA-Z0-9\s_./:@,+=~'"!-]+$/;
206
+ // Allows: Alphanumeric, space, and common safe punctuation (_ . / : @ , + - = ' " ! ~)
207
+ // Blocks: Newlines (\n, \r), Pipes, redirects, subshells, variables ($), etc.
208
+ const SAFE_SHELL_CHARS = /^[a-zA-Z0-9 _./:@,+=~'"!-]+$/;
206
209
 
207
210
  export function detectShellInjectionRisk(rawCommand: string): boolean {
208
- // If the command contains any character NOT in the whitelist, it's considered risky
209
- return !SAFE_SHELL_CHARS.test(rawCommand);
211
+ // We scan the command to handle single quotes correctly.
212
+ // Characters inside single quotes are considered escaped/literal and safe from shell injection.
213
+ let inSingleQuote = false;
214
+
215
+ for (let i = 0; i < rawCommand.length; i++) {
216
+ const char = rawCommand[i];
217
+
218
+ if (char === "'") {
219
+ inSingleQuote = !inSingleQuote;
220
+ continue;
221
+ }
222
+
223
+ // Outside single quotes, we enforce the strict whitelist
224
+ if (!inSingleQuote) {
225
+ if (!SAFE_SHELL_CHARS.test(char)) {
226
+ return true;
227
+ }
228
+ }
229
+ // Inside single quotes, everything is treated as a literal string by the shell,
230
+ // so we don't need to block special characters.
231
+ }
232
+
233
+ // If we ended with an unclosed single quote, it's a syntax risk
234
+ return inSingleQuote;
210
235
  }
211
236
 
212
237
  /**
@@ -0,0 +1,83 @@
1
+ import { afterEach, describe, expect, it } from 'bun:test';
2
+ import { existsSync, rmSync } from 'node:fs';
3
+ import { MemoryDb } from '../db/memory-db';
4
+ import { WorkflowDb } from '../db/workflow-db';
5
+ import type { Workflow } from '../parser/schema';
6
+ import { container } from '../utils/container';
7
+ import { ConsoleLogger } from '../utils/logger';
8
+ import { WorkflowRunner } from './workflow-runner';
9
+
10
+ describe('Workflow Memoization Leak (Args Check)', () => {
11
+ const dbPath = 'test-memoization-leak.db';
12
+
13
+ container.register('logger', new ConsoleLogger());
14
+ container.register('db', new WorkflowDb(dbPath));
15
+ container.register('memoryDb', new MemoryDb());
16
+
17
+ afterEach(() => {
18
+ if (existsSync(dbPath)) {
19
+ rmSync(dbPath);
20
+ }
21
+ });
22
+
23
+ it('should NOT collide for shell steps with same command but different args', async () => {
24
+ const workflow: Workflow = {
25
+ name: 'memoize-args-wf',
26
+ inputs: {
27
+ arg: { type: 'string' },
28
+ },
29
+ steps: [
30
+ {
31
+ id: 's1',
32
+ type: 'shell',
33
+ args: ['echo', '${{ inputs.arg }}'],
34
+ allowInsecure: true,
35
+ memoize: true,
36
+ needs: [],
37
+ },
38
+ ],
39
+ outputs: {
40
+ out: '${{ steps.s1.output.stdout.trim() }}',
41
+ },
42
+ } as unknown as Workflow;
43
+
44
+ let executeCount = 0;
45
+ const trackedExecuteStep = async (step: any, context: any, logger: any, options: any) => {
46
+ if (step.id === 's1') executeCount++;
47
+ const { executeStep } = await import('./step-executor');
48
+ return executeStep(step, context, logger, options);
49
+ };
50
+
51
+ // Run 1: arg=A
52
+ const runner1 = new WorkflowRunner(workflow, {
53
+ dbPath,
54
+ inputs: { arg: 'A' },
55
+ executeStep: trackedExecuteStep,
56
+ });
57
+ const out1 = await runner1.run();
58
+ expect(out1.out).toBe('A');
59
+ expect(executeCount).toBe(1);
60
+
61
+ // Run 2: arg=A -> Cache Hit
62
+ const runner2 = new WorkflowRunner(workflow, {
63
+ dbPath,
64
+ inputs: { arg: 'A' },
65
+ executeStep: trackedExecuteStep,
66
+ });
67
+ executeCount = 0;
68
+ const out2 = await runner2.run();
69
+ expect(out2.out).toBe('A');
70
+ expect(executeCount).toBe(0); // Memoized
71
+
72
+ // Run 3: arg=B -> Execute (Must not collide with A)
73
+ const runner3 = new WorkflowRunner(workflow, {
74
+ dbPath,
75
+ inputs: { arg: 'B' },
76
+ executeStep: trackedExecuteStep,
77
+ });
78
+ executeCount = 0;
79
+ const out3 = await runner3.run();
80
+ expect(out3.out).toBe('B');
81
+ expect(executeCount).toBe(1); // Should execute because args are different!
82
+ });
83
+ });
@@ -0,0 +1,132 @@
1
+ import { beforeEach, describe, expect, jest, test } from 'bun:test';
2
+ import type { Step, Workflow } from '../parser/schema';
3
+ import { WorkflowRunner } from './workflow-runner';
4
+
5
+ describe('WorkflowRunner Recovery Security', () => {
6
+ beforeEach(() => {
7
+ jest.restoreAllMocks();
8
+ });
9
+
10
+ test('should NOT allow reflexion to overwrite critical step properties', async () => {
11
+ const workflow: Workflow = {
12
+ name: 'reflexion-security-test',
13
+ steps: [
14
+ {
15
+ id: 'fail-step',
16
+ type: 'shell',
17
+ run: 'exit 1',
18
+ reflexion: {
19
+ limit: 2,
20
+ },
21
+ } as Step,
22
+ ],
23
+ };
24
+
25
+ const mockGetAdapter = () => ({
26
+ adapter: {
27
+ chat: async () => ({
28
+ message: {
29
+ content: JSON.stringify({
30
+ run: 'echo "fixed"',
31
+ type: 'script', // ATTEMPT TO CHANGE TYPE
32
+ id: 'malicious-id', // ATTEMPT TO CHANGE ID
33
+ }),
34
+ },
35
+ }),
36
+ } as any,
37
+ resolvedModel: 'mock-model',
38
+ });
39
+
40
+ const spy = jest.fn();
41
+
42
+ const runner = new WorkflowRunner(workflow, {
43
+ logger: { log: () => {}, error: () => {}, warn: () => {}, debug: () => {} },
44
+ dbPath: ':memory:',
45
+ getAdapter: mockGetAdapter,
46
+ executeStep: spy as any,
47
+ });
48
+
49
+ const db = (runner as any).db;
50
+ await db.createRun(runner.runId, workflow.name, {});
51
+
52
+ spy.mockImplementation(async (step: any) => {
53
+ if (step.run === 'exit 1') {
54
+ return { status: 'failed', output: null, error: 'Command failed' };
55
+ }
56
+ return { status: 'success', output: 'fixed' };
57
+ });
58
+
59
+ await (runner as any).executeStepWithForeach(workflow.steps[0]);
60
+
61
+ // Expectations:
62
+ // 1. First execution (fails)
63
+ // 2. Reflexion happens
64
+ // 3. Second execution (retry)
65
+ expect(spy).toHaveBeenCalledTimes(2);
66
+
67
+ const secondCallArg = spy.mock.calls[1][0] as any;
68
+ expect(secondCallArg.run).toBe('echo "fixed"');
69
+ expect(secondCallArg.type).toBe('shell'); // Should still be shell
70
+ expect(secondCallArg.id).toBe('fail-step'); // Should still be fail-step
71
+ });
72
+
73
+ test('should NOT allow auto_heal to overwrite critical step properties', async () => {
74
+ const workflow: Workflow = {
75
+ name: 'autoheal-security-test',
76
+ steps: [
77
+ {
78
+ id: 'fail-step',
79
+ type: 'shell',
80
+ run: 'exit 1',
81
+ auto_heal: {
82
+ maxAttempts: 1,
83
+ agent: 'healer',
84
+ },
85
+ } as Step,
86
+ ],
87
+ };
88
+
89
+ const spy = jest.fn();
90
+ const runner = new WorkflowRunner(workflow, {
91
+ logger: { log: () => {}, error: () => {}, warn: () => {}, debug: () => {} },
92
+ dbPath: ':memory:',
93
+ executeStep: spy as any,
94
+ });
95
+
96
+ const db = (runner as any).db;
97
+ await db.createRun(runner.runId, workflow.name, {});
98
+
99
+ spy.mockImplementation(async (step: any) => {
100
+ if (step.run === 'exit 1') {
101
+ return { status: 'failed', output: null, error: 'Command failed' };
102
+ }
103
+ if (step.id === 'fail-step' && step.run === 'echo "fixed"') {
104
+ return { status: 'success', output: 'fixed' };
105
+ }
106
+ // This is the healer agent call itself
107
+ if (step.id === 'fail-step-healer') {
108
+ return {
109
+ status: 'success',
110
+ output: {
111
+ run: 'echo "fixed"',
112
+ type: 'script', // ATTEMPT TO CHANGE TYPE
113
+ id: 'malicious-id', // ATTEMPT TO CHANGE ID
114
+ },
115
+ };
116
+ }
117
+ return { status: 'failed', error: 'Unexpected step' };
118
+ });
119
+
120
+ await (runner as any).executeStepWithForeach(workflow.steps[0]);
121
+
122
+ // 1. Initial fail
123
+ // 2. Healer call
124
+ // 3. Retry
125
+ expect(spy).toHaveBeenCalledTimes(3);
126
+
127
+ const retryCallArg = spy.mock.calls[2][0] as any;
128
+ expect(retryCallArg.run).toBe('echo "fixed"');
129
+ expect(retryCallArg.type).toBe('shell');
130
+ expect(retryCallArg.id).toBe('fail-step');
131
+ });
132
+ });
@@ -113,21 +113,124 @@ export class ContextBuilder {
113
113
  }
114
114
  }
115
115
  return stripUndefined({
116
- run: ExpressionEvaluator.evaluateString((step as any).run, context),
116
+ run: ExpressionEvaluator.evaluateString(step.run, context),
117
+ args: step.args?.map((arg: string) => ExpressionEvaluator.evaluateString(arg, context)),
118
+ dir: step.dir ? ExpressionEvaluator.evaluateString(step.dir, context) : undefined,
117
119
  env,
120
+ allowInsecure: step.allowInsecure,
118
121
  });
119
122
  }
120
- case 'file': {
123
+ case 'file':
121
124
  return stripUndefined({
122
- path: ExpressionEvaluator.evaluateString((step as any).path, context),
123
- content: (step as any).content
124
- ? ExpressionEvaluator.evaluateString((step as any).content, context)
125
+ path: ExpressionEvaluator.evaluateString(step.path, context),
126
+ content:
127
+ step.content !== undefined
128
+ ? ExpressionEvaluator.evaluateString(step.content as string, context)
129
+ : undefined,
130
+ op: step.op,
131
+ allowOutsideCwd: step.allowOutsideCwd,
132
+ });
133
+ case 'artifact':
134
+ return stripUndefined({
135
+ op: step.op,
136
+ name: ExpressionEvaluator.evaluateString(step.name, context),
137
+ paths: step.paths?.map((p: string) => ExpressionEvaluator.evaluateString(p, context)),
138
+ path: step.path
139
+ ? ExpressionEvaluator.evaluateString(step.path as string, context)
125
140
  : undefined,
126
- op: (step as any).op,
141
+ allowOutsideCwd: step.allowOutsideCwd,
142
+ });
143
+ case 'request': {
144
+ let headers: Record<string, string> | undefined;
145
+ if (step.headers) {
146
+ headers = {};
147
+ for (const [key, value] of Object.entries(step.headers)) {
148
+ headers[key] = ExpressionEvaluator.evaluateString(value as string, context);
149
+ }
150
+ }
151
+ return stripUndefined({
152
+ url: ExpressionEvaluator.evaluateString(step.url, context),
153
+ method: step.method,
154
+ headers,
155
+ body:
156
+ step.body !== undefined
157
+ ? ExpressionEvaluator.evaluateObject(step.body, context)
158
+ : undefined,
159
+ allowInsecure: step.allowInsecure,
160
+ });
161
+ }
162
+ case 'human':
163
+ return stripUndefined({
164
+ message: ExpressionEvaluator.evaluateString(step.message, context),
165
+ inputType: step.inputType,
127
166
  });
167
+ case 'sleep': {
168
+ const evaluated = ExpressionEvaluator.evaluate(step.duration.toString(), context);
169
+ return { duration: Number(evaluated) };
128
170
  }
171
+ case 'llm':
172
+ return stripUndefined({
173
+ agent: ExpressionEvaluator.evaluateString(step.agent, context),
174
+ provider: step.provider
175
+ ? ExpressionEvaluator.evaluateString(step.provider, context)
176
+ : undefined,
177
+ model: step.model ? ExpressionEvaluator.evaluateString(step.model, context) : undefined,
178
+ prompt: ExpressionEvaluator.evaluateString(step.prompt, context),
179
+ tools: step.tools,
180
+ maxIterations: step.maxIterations,
181
+ useGlobalMcp: step.useGlobalMcp,
182
+ allowClarification: step.allowClarification,
183
+ mcpServers: step.mcpServers,
184
+ useStandardTools: step.useStandardTools,
185
+ allowOutsideCwd: step.allowOutsideCwd,
186
+ allowInsecure: step.allowInsecure,
187
+ });
188
+ case 'workflow':
189
+ return stripUndefined({
190
+ path: step.path,
191
+ inputs: step.inputs
192
+ ? ExpressionEvaluator.evaluateObject(step.inputs, context)
193
+ : undefined,
194
+ });
195
+ case 'script':
196
+ return stripUndefined({
197
+ run: step.run,
198
+ allowInsecure: step.allowInsecure,
199
+ });
200
+ case 'engine': {
201
+ const env: Record<string, string> = {};
202
+ for (const [key, value] of Object.entries(step.env || {})) {
203
+ env[key] = ExpressionEvaluator.evaluateString(value as string, context);
204
+ }
205
+ return stripUndefined({
206
+ command: ExpressionEvaluator.evaluateString(step.command, context),
207
+ args: step.args?.map((arg: string) => ExpressionEvaluator.evaluateString(arg, context)),
208
+ input:
209
+ step.input !== undefined
210
+ ? ExpressionEvaluator.evaluateObject(step.input, context)
211
+ : undefined,
212
+ env,
213
+ cwd: step.cwd ? ExpressionEvaluator.evaluateString(step.cwd, context) : undefined,
214
+ });
215
+ }
216
+ case 'memory':
217
+ return stripUndefined({
218
+ op: step.op,
219
+ query: step.query ? ExpressionEvaluator.evaluateString(step.query, context) : undefined,
220
+ text: step.text ? ExpressionEvaluator.evaluateString(step.text, context) : undefined,
221
+ model: step.model,
222
+ metadata: step.metadata
223
+ ? ExpressionEvaluator.evaluateObject(step.metadata, context)
224
+ : undefined,
225
+ limit: step.limit,
226
+ });
227
+ case 'wait':
228
+ return stripUndefined({
229
+ event: ExpressionEvaluator.evaluateString(step.event, context),
230
+ oneShot: step.oneShot,
231
+ });
129
232
  default: {
130
- // For most steps, we just pass through properties which might contain expressions
233
+ // For fallback, pass through properties which might contain expressions
131
234
  const inputs: Record<string, unknown> = {};
132
235
  for (const [key, value] of Object.entries(step)) {
133
236
  if (key === 'id' || key === 'type' || key === 'if' || key === 'foreach') continue;
@@ -1,4 +1,5 @@
1
- import { RedactionBuffer, Redactor } from '../../utils/redactor';
1
+ import { AUTO_LOAD_SECRET_PREFIXES } from '../../utils/env-constants.ts';
2
+ import { RedactionBuffer, Redactor } from '../../utils/redactor.ts';
2
3
 
3
4
  export class SecretManager {
4
5
  private secretValues: string[] = [];
@@ -49,12 +50,17 @@ export class SecretManager {
49
50
  }
50
51
  }
51
52
 
52
- // Include pattern-matched secrets from Bun.env (safe-ish way to get common secrets)
53
- const secretPatterns = [/token/i, /key/i, /secret/i, /password/i, /auth/i, /api/i];
53
+ // Include pattern-matched secrets from Bun.env (safe way using prefix whitelist)
54
54
  for (const [key, value] of Object.entries(Bun.env)) {
55
- if (value && secretPatterns.some((p) => p.test(key))) {
56
- // Skip common system non-secret variables that might match patterns
57
- if (safeSystemVars.includes(key)) continue;
55
+ if (!value) continue;
56
+
57
+ // Skip common system non-secret variables
58
+ if (safeSystemVars.includes(key)) continue;
59
+
60
+ // Check against allowed prefixes
61
+ const isSecret = AUTO_LOAD_SECRET_PREFIXES.some((prefix) => key.startsWith(prefix));
62
+
63
+ if (isSecret) {
58
64
  secrets[key] = value;
59
65
  }
60
66
  }
@@ -8,6 +8,7 @@ import { executeArtifactStep } from './executors/artifact-executor.ts';
8
8
  import { executeBlueprintStep } from './executors/blueprint-executor.ts';
9
9
  import { executeEngineStepWrapper } from './executors/engine-executor.ts';
10
10
  import { executeFileStep } from './executors/file-executor.ts';
11
+ import { executeGitStep } from './executors/git-executor.ts';
11
12
  import { executeHumanStep, executeSleepStep } from './executors/human-executor.ts';
12
13
  import { executeJoinStep } from './executors/join-executor.ts';
13
14
  import { executeLlmStep } from './executors/llm-executor.ts';
@@ -167,6 +168,9 @@ export async function executeStep(
167
168
  case 'join':
168
169
  result = await executeJoinStep(step, context, logger, abortSignal);
169
170
  break;
171
+ case 'git':
172
+ result = await executeGitStep(step, context, logger, abortSignal);
173
+ break;
170
174
  default:
171
175
  throw new Error(`Unknown step type: ${(step as Step).type}`);
172
176
  }