keystone-cli 0.5.0 ā 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -8
- package/package.json +5 -3
- package/src/cli.ts +33 -192
- package/src/db/memory-db.test.ts +54 -0
- package/src/db/memory-db.ts +122 -0
- package/src/db/sqlite-setup.ts +49 -0
- package/src/db/workflow-db.test.ts +41 -10
- package/src/db/workflow-db.ts +84 -28
- package/src/expression/evaluator.test.ts +19 -0
- package/src/expression/evaluator.ts +134 -39
- package/src/parser/schema.ts +41 -0
- package/src/runner/audit-verification.test.ts +23 -0
- package/src/runner/auto-heal.test.ts +64 -0
- package/src/runner/debug-repl.test.ts +74 -0
- package/src/runner/debug-repl.ts +225 -0
- package/src/runner/foreach-executor.ts +327 -0
- package/src/runner/llm-adapter.test.ts +27 -14
- package/src/runner/llm-adapter.ts +90 -112
- package/src/runner/llm-executor.test.ts +91 -6
- package/src/runner/llm-executor.ts +26 -6
- package/src/runner/mcp-client.audit.test.ts +69 -0
- package/src/runner/mcp-client.test.ts +12 -3
- package/src/runner/mcp-client.ts +199 -19
- package/src/runner/mcp-manager.ts +19 -8
- package/src/runner/mcp-server.test.ts +8 -5
- package/src/runner/mcp-server.ts +31 -17
- package/src/runner/optimization-runner.ts +305 -0
- package/src/runner/reflexion.test.ts +87 -0
- package/src/runner/shell-executor.test.ts +12 -0
- package/src/runner/shell-executor.ts +9 -6
- package/src/runner/step-executor.test.ts +46 -1
- package/src/runner/step-executor.ts +154 -60
- package/src/runner/stream-utils.test.ts +65 -0
- package/src/runner/stream-utils.ts +186 -0
- package/src/runner/workflow-runner.test.ts +4 -4
- package/src/runner/workflow-runner.ts +436 -251
- package/src/templates/agents/keystone-architect.md +6 -4
- package/src/templates/full-feature-demo.yaml +4 -4
- package/src/types/assets.d.ts +14 -0
- package/src/types/status.ts +1 -1
- package/src/ui/dashboard.tsx +38 -26
- package/src/utils/auth-manager.ts +3 -1
- package/src/utils/logger.test.ts +76 -0
- package/src/utils/logger.ts +39 -0
- package/src/utils/prompt.ts +75 -0
- package/src/utils/redactor.test.ts +86 -4
- package/src/utils/redactor.ts +48 -13
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import { dirname } from 'node:path';
|
|
3
|
+
import { stringify } from 'yaml';
|
|
4
|
+
import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
|
|
5
|
+
import type { LlmStep, Step, Workflow } from '../parser/schema';
|
|
6
|
+
import { extractJson } from '../utils/json-parser';
|
|
7
|
+
import { getAdapter } from './llm-adapter';
|
|
8
|
+
import { executeLlmStep } from './llm-executor';
|
|
9
|
+
import { WorkflowRunner } from './workflow-runner';
|
|
10
|
+
|
|
11
|
+
export interface OptimizationOptions {
|
|
12
|
+
workflowPath: string;
|
|
13
|
+
targetStepId: string;
|
|
14
|
+
inputs?: Record<string, unknown>;
|
|
15
|
+
iterations?: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export class OptimizationRunner {
|
|
19
|
+
private workflow: Workflow;
|
|
20
|
+
private workflowPath: string;
|
|
21
|
+
private targetStepId: string;
|
|
22
|
+
private iterations: number;
|
|
23
|
+
private inputs: Record<string, unknown>;
|
|
24
|
+
|
|
25
|
+
constructor(workflow: Workflow, options: OptimizationOptions) {
|
|
26
|
+
this.workflow = workflow;
|
|
27
|
+
this.workflowPath = options.workflowPath;
|
|
28
|
+
this.targetStepId = options.targetStepId;
|
|
29
|
+
this.iterations = options.iterations || 5;
|
|
30
|
+
this.inputs = options.inputs || {};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
public async optimize(): Promise<{ bestPrompt: string; bestScore: number }> {
|
|
34
|
+
if (!this.workflow.eval) {
|
|
35
|
+
throw new Error('Workflow is missing "eval" configuration');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const targetStep = this.workflow.steps.find((s) => s.id === this.targetStepId);
|
|
39
|
+
if (!targetStep || (targetStep.type !== 'llm' && targetStep.type !== 'shell')) {
|
|
40
|
+
throw new Error(`Target step "${this.targetStepId}" not found or is not an LLM/Shell step`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
console.log(`\nš Optimizing step: ${this.targetStepId} (${targetStep.type})`);
|
|
44
|
+
console.log(`š Iterations: ${this.iterations}`);
|
|
45
|
+
|
|
46
|
+
let bestPrompt =
|
|
47
|
+
targetStep.type === 'llm'
|
|
48
|
+
? (targetStep as LlmStep).prompt
|
|
49
|
+
: // biome-ignore lint/suspicious/noExplicitAny: generic step access
|
|
50
|
+
(targetStep as any).run;
|
|
51
|
+
let bestScore = -1;
|
|
52
|
+
let currentPrompt = bestPrompt;
|
|
53
|
+
|
|
54
|
+
for (let i = 1; i <= this.iterations; i++) {
|
|
55
|
+
console.log(`\n--- Iteration ${i}/${this.iterations} ---`);
|
|
56
|
+
console.log(
|
|
57
|
+
`Current Prompt: ${currentPrompt.substring(0, 100)}${currentPrompt.length > 100 ? '...' : ''}`
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
// 1. Run the workflow until the target step (or full run for simplicity in MVP)
|
|
61
|
+
// Note: In a more optimized version, we'd only run dependencies once.
|
|
62
|
+
// For now, we run a full WorkflowRunner but with the modified prompt.
|
|
63
|
+
const modifiedWorkflow = JSON.parse(JSON.stringify(this.workflow));
|
|
64
|
+
const modifiedTargetStep = modifiedWorkflow.steps.find(
|
|
65
|
+
(s: { id: string }) => s.id === this.targetStepId
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
if (modifiedTargetStep.type === 'llm') {
|
|
69
|
+
modifiedTargetStep.prompt = currentPrompt;
|
|
70
|
+
} else {
|
|
71
|
+
modifiedTargetStep.run = currentPrompt;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const runner = new WorkflowRunner(modifiedWorkflow, {
|
|
75
|
+
inputs: this.inputs,
|
|
76
|
+
workflowDir: dirname(this.workflowPath),
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const outputs = await runner.run();
|
|
80
|
+
|
|
81
|
+
// 2. Evaluate the result
|
|
82
|
+
const score = await this.evaluate(outputs);
|
|
83
|
+
console.log(`Score: ${score}/100`);
|
|
84
|
+
|
|
85
|
+
if (score > bestScore) {
|
|
86
|
+
bestScore = score;
|
|
87
|
+
bestPrompt = currentPrompt;
|
|
88
|
+
console.log(`⨠New best score: ${bestScore}`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// 3. Suggest next prompt (if not last iteration)
|
|
92
|
+
if (i < this.iterations) {
|
|
93
|
+
currentPrompt = await this.suggestNextPrompt(currentPrompt, score, outputs);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
await this.saveBestPrompt(bestPrompt);
|
|
98
|
+
return { bestPrompt, bestScore };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
private async evaluate(outputs: Record<string, unknown>): Promise<number> {
|
|
102
|
+
const { eval: evalConfig } = this.workflow;
|
|
103
|
+
if (!evalConfig) return 0;
|
|
104
|
+
|
|
105
|
+
if (evalConfig.scorer === 'script') {
|
|
106
|
+
// Note: getAdapter already imported at top level
|
|
107
|
+
const { executeStep } = await import('./step-executor');
|
|
108
|
+
|
|
109
|
+
// Create a context with outputs available
|
|
110
|
+
const context = {
|
|
111
|
+
inputs: this.inputs,
|
|
112
|
+
steps: {},
|
|
113
|
+
// biome-ignore lint/suspicious/noExplicitAny: environment access
|
|
114
|
+
secrets: Bun.env as any,
|
|
115
|
+
env: this.workflow.env,
|
|
116
|
+
outputs, // Direct access
|
|
117
|
+
output: outputs, // For convenience
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const scriptStep: Step = {
|
|
121
|
+
id: 'evaluator',
|
|
122
|
+
type: 'script',
|
|
123
|
+
run: evalConfig.run || 'echo 0',
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
// Execute script
|
|
127
|
+
// We need to inject the outputs into the environment or allow the script to access them via template
|
|
128
|
+
// The step executor handles interpolation in the 'run' command.
|
|
129
|
+
// But if the script is extensive, it might be tricky.
|
|
130
|
+
// For now, assume the user interpolates scores like `node score.js ${{ outputs.foo }}`
|
|
131
|
+
|
|
132
|
+
// We need a proper step executor call here.
|
|
133
|
+
// We'll mock the missing dependencies for executeStep as we did for executeLlmStep,
|
|
134
|
+
// but we need to pass the context correctly.
|
|
135
|
+
|
|
136
|
+
// Note: OptimizationRunner should probably import executeStep
|
|
137
|
+
const { SafeSandbox } = await import('../utils/sandbox');
|
|
138
|
+
try {
|
|
139
|
+
const result = await SafeSandbox.execute(scriptStep.run, context, { timeout: 5000 });
|
|
140
|
+
if (typeof result === 'object' && result !== null && 'stdout' in result) {
|
|
141
|
+
// biome-ignore lint/suspicious/noExplicitAny: result typing
|
|
142
|
+
const match = (result as any).stdout.match(/\d+/);
|
|
143
|
+
if (match) return Number.parseInt(match[0], 10);
|
|
144
|
+
}
|
|
145
|
+
// If raw result is number
|
|
146
|
+
if (typeof result === 'number') return result;
|
|
147
|
+
// If string
|
|
148
|
+
if (typeof result === 'string') {
|
|
149
|
+
const match = result.match(/\d+/);
|
|
150
|
+
if (match) return Number.parseInt(match[0], 10);
|
|
151
|
+
}
|
|
152
|
+
} catch (e) {
|
|
153
|
+
console.error('Eval script failed:', e);
|
|
154
|
+
}
|
|
155
|
+
return 0;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// LLM Scorer
|
|
159
|
+
if (!evalConfig.agent || !evalConfig.prompt) {
|
|
160
|
+
console.warn('Skipping LLM evaluation: agent or prompt missing');
|
|
161
|
+
return 0;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const evalStep: LlmStep = {
|
|
165
|
+
id: 'evaluator',
|
|
166
|
+
type: 'llm',
|
|
167
|
+
agent: evalConfig.agent,
|
|
168
|
+
prompt: `${evalConfig.prompt}\n\nOutputs to evaluate:\n${JSON.stringify(outputs, null, 2)}`,
|
|
169
|
+
needs: [],
|
|
170
|
+
maxIterations: 10,
|
|
171
|
+
schema: {
|
|
172
|
+
type: 'object',
|
|
173
|
+
properties: {
|
|
174
|
+
score: { type: 'number', minimum: 0, maximum: 100 },
|
|
175
|
+
},
|
|
176
|
+
required: ['score'],
|
|
177
|
+
},
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
// Use a temporary runner/context for evaluation
|
|
181
|
+
// We need a minimal context for executeLlmStep
|
|
182
|
+
const context = {
|
|
183
|
+
inputs: this.inputs,
|
|
184
|
+
steps: {},
|
|
185
|
+
// biome-ignore lint/suspicious/noExplicitAny: environment access
|
|
186
|
+
secrets: Bun.env as any,
|
|
187
|
+
env: this.workflow.env,
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
const result = await executeLlmStep(
|
|
191
|
+
evalStep,
|
|
192
|
+
// biome-ignore lint/suspicious/noExplicitAny: context typing
|
|
193
|
+
context as any,
|
|
194
|
+
async () => {
|
|
195
|
+
throw new Error('Tools not supported in eval');
|
|
196
|
+
},
|
|
197
|
+
console
|
|
198
|
+
);
|
|
199
|
+
|
|
200
|
+
if (result.status === 'success' && result.output && typeof result.output === 'object') {
|
|
201
|
+
// biome-ignore lint/suspicious/noExplicitAny: output typing
|
|
202
|
+
return (result.output as any).score || 0;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Try to extract number if JSON failed but text output exists
|
|
206
|
+
if (typeof result.output === 'string') {
|
|
207
|
+
const match = result.output.match(/\d+/);
|
|
208
|
+
if (match) return Number.parseInt(match[0], 10);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
return 0;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
private async suggestNextPrompt(
|
|
215
|
+
currentPrompt: string,
|
|
216
|
+
lastScore: number,
|
|
217
|
+
lastOutputs: Record<string, unknown>
|
|
218
|
+
): Promise<string> {
|
|
219
|
+
const metaStep: LlmStep = {
|
|
220
|
+
id: 'optimizer',
|
|
221
|
+
type: 'llm',
|
|
222
|
+
agent: 'general', // Or a specialized "optimizer" agent if available
|
|
223
|
+
needs: [],
|
|
224
|
+
maxIterations: 10,
|
|
225
|
+
prompt: `You are an expert prompt engineer. Your task is to optimize a system prompt to get a higher score.
|
|
226
|
+
Current Prompt:
|
|
227
|
+
"""
|
|
228
|
+
${currentPrompt}
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
Last Score: ${lastScore}/100
|
|
232
|
+
|
|
233
|
+
Last Outputs:
|
|
234
|
+
${JSON.stringify(lastOutputs, null, 2)}
|
|
235
|
+
|
|
236
|
+
Evaluation Criteria:
|
|
237
|
+
${this.workflow.eval?.prompt || this.workflow.eval?.run}
|
|
238
|
+
|
|
239
|
+
Suggest a slightly modified version of the prompt that might improve the score.
|
|
240
|
+
Maintain the same core instructions but refine the phrasing, add constraints, or clarify expectations.
|
|
241
|
+
Return ONLY the new prompt text.`,
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
const context = {
|
|
245
|
+
inputs: this.inputs,
|
|
246
|
+
steps: {},
|
|
247
|
+
// biome-ignore lint/suspicious/noExplicitAny: environment access
|
|
248
|
+
secrets: Bun.env as any,
|
|
249
|
+
env: this.workflow.env,
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
try {
|
|
253
|
+
const result = await executeLlmStep(
|
|
254
|
+
metaStep,
|
|
255
|
+
// biome-ignore lint/suspicious/noExplicitAny: context typing
|
|
256
|
+
context as any,
|
|
257
|
+
async () => {
|
|
258
|
+
throw new Error('Tools not supported in meta-opt');
|
|
259
|
+
},
|
|
260
|
+
console,
|
|
261
|
+
undefined,
|
|
262
|
+
dirname(this.workflowPath) // Pass workflowDir to resolve agent
|
|
263
|
+
);
|
|
264
|
+
if (result.status === 'success' && typeof result.output === 'string') {
|
|
265
|
+
return result.output.trim();
|
|
266
|
+
}
|
|
267
|
+
} catch (e) {
|
|
268
|
+
console.warn(` ā ļø Meta-optimizer failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
269
|
+
// Adding a dummy mutation for testing purposes if env var is set
|
|
270
|
+
if (Bun.env.TEST_OPTIMIZER) {
|
|
271
|
+
return `${currentPrompt}!`;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return currentPrompt; // Fallback to current
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
private async saveBestPrompt(prompt: string): Promise<void> {
|
|
279
|
+
console.log(`\nš¾ Saving best prompt to ${this.workflowPath}`);
|
|
280
|
+
|
|
281
|
+
// We need to be careful here. The prompt might be in the workflow YAML directly,
|
|
282
|
+
// or it might be in an agent file.
|
|
283
|
+
|
|
284
|
+
const targetStep = this.workflow.steps.find((s) => s.id === this.targetStepId);
|
|
285
|
+
|
|
286
|
+
console.log(`--- BEST PROMPT/RUN ---\n${prompt}\n-----------------------`);
|
|
287
|
+
|
|
288
|
+
if (targetStep?.type === 'llm') {
|
|
289
|
+
const agentPath = resolveAgentPath((targetStep as LlmStep).agent, dirname(this.workflowPath));
|
|
290
|
+
try {
|
|
291
|
+
// For MVP, we just logged it. Automatic replacement in arbitrary files is risky without robust parsing.
|
|
292
|
+
// But we can try to warn/notify.
|
|
293
|
+
console.log(
|
|
294
|
+
`To apply this optimization, update the 'systemPrompt' or instruction in: ${agentPath}`
|
|
295
|
+
);
|
|
296
|
+
} catch (e) {
|
|
297
|
+
console.warn(`Could not locate agent file: ${e}`);
|
|
298
|
+
}
|
|
299
|
+
} else {
|
|
300
|
+
console.log(
|
|
301
|
+
`To apply this optimization, update the 'run' command for step '${this.targetStepId}' in ${this.workflowPath}`
|
|
302
|
+
);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, jest, mock, test } from 'bun:test';
|
|
2
|
+
import type { Step, Workflow } from '../parser/schema';
|
|
3
|
+
import * as StepExecutor from './step-executor';
|
|
4
|
+
import { WorkflowRunner } from './workflow-runner';
|
|
5
|
+
|
|
6
|
+
// Mock the LLM Adapter
|
|
7
|
+
|
|
8
|
+
describe('WorkflowRunner Reflexion', () => {
|
|
9
|
+
beforeEach(() => {
|
|
10
|
+
jest.restoreAllMocks();
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
test('should attempt to self-correct a failing step using flexion', async () => {
|
|
14
|
+
const workflow: Workflow = {
|
|
15
|
+
name: 'reflexion-test',
|
|
16
|
+
steps: [
|
|
17
|
+
{
|
|
18
|
+
id: 'fail-step',
|
|
19
|
+
type: 'shell',
|
|
20
|
+
run: 'exit 1',
|
|
21
|
+
reflexion: {
|
|
22
|
+
limit: 2,
|
|
23
|
+
hint: 'fix it',
|
|
24
|
+
},
|
|
25
|
+
} as Step,
|
|
26
|
+
],
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
const mockGetAdapter = () => ({
|
|
30
|
+
adapter: {
|
|
31
|
+
chat: async () => ({
|
|
32
|
+
message: {
|
|
33
|
+
content: JSON.stringify({ run: 'echo "fixed"' }),
|
|
34
|
+
},
|
|
35
|
+
}),
|
|
36
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock adapter
|
|
37
|
+
} as any,
|
|
38
|
+
resolvedModel: 'mock-model',
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
const runner = new WorkflowRunner(workflow, {
|
|
42
|
+
logger: { log: () => {}, error: () => {}, warn: () => {} },
|
|
43
|
+
dbPath: ':memory:',
|
|
44
|
+
getAdapter: mockGetAdapter,
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
// biome-ignore lint/suspicious/noExplicitAny: Accessing private property for testing
|
|
48
|
+
const db = (runner as any).db;
|
|
49
|
+
await db.createRun(runner.getRunId(), workflow.name, {});
|
|
50
|
+
|
|
51
|
+
const spy = jest.spyOn(StepExecutor, 'executeStep');
|
|
52
|
+
|
|
53
|
+
// First call fails, Reflexion logic kicks in (calling mocked getAdapter),
|
|
54
|
+
// then it retries with corrected command.
|
|
55
|
+
spy.mockImplementation(async (step, _context) => {
|
|
56
|
+
// Original failing command
|
|
57
|
+
// biome-ignore lint/suspicious/noExplicitAny: Accessing run property dynamically
|
|
58
|
+
if ((step as any).run === 'exit 1') {
|
|
59
|
+
return { status: 'failed', output: null, error: 'Command failed' };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Corrected command from mock
|
|
63
|
+
// biome-ignore lint/suspicious/noExplicitAny: Accessing run property dynamically
|
|
64
|
+
if ((step as any).run === 'echo "fixed"') {
|
|
65
|
+
return { status: 'success', output: 'fixed' };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return { status: 'failed', output: null, error: 'Unknown step' };
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// biome-ignore lint/suspicious/noExplicitAny: Accessing private property for testing
|
|
72
|
+
await (runner as any).executeStepWithForeach(workflow.steps[0]);
|
|
73
|
+
|
|
74
|
+
// Expectations:
|
|
75
|
+
// 1. First execution (fails)
|
|
76
|
+
// 2. Reflexion happens (internal, not executeStep)
|
|
77
|
+
// 3. Second execution (retry with new command)
|
|
78
|
+
expect(spy).toHaveBeenCalledTimes(2);
|
|
79
|
+
|
|
80
|
+
// Verify the second call had the corrected command
|
|
81
|
+
// biome-ignore lint/suspicious/noExplicitAny: mock call args typing
|
|
82
|
+
const secondCallArg = spy.mock.calls[1][0] as any;
|
|
83
|
+
expect(secondCallArg.run).toBe('echo "fixed"');
|
|
84
|
+
|
|
85
|
+
spy.mockRestore();
|
|
86
|
+
});
|
|
87
|
+
});
|
|
@@ -137,5 +137,17 @@ describe('shell-executor', () => {
|
|
|
137
137
|
|
|
138
138
|
await expect(executeShell(step, context)).rejects.toThrow(/Security Error/);
|
|
139
139
|
});
|
|
140
|
+
it('should allow flow control with semicolons', async () => {
|
|
141
|
+
const step: ShellStep = {
|
|
142
|
+
id: 'test',
|
|
143
|
+
type: 'shell',
|
|
144
|
+
needs: [],
|
|
145
|
+
run: 'if [ "1" = "1" ]; then echo "match"; fi',
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
const result = await executeShell(step, context);
|
|
149
|
+
expect(result.exitCode).toBe(0);
|
|
150
|
+
expect(result.stdout.trim()).toBe('match');
|
|
151
|
+
});
|
|
140
152
|
});
|
|
141
153
|
});
|
|
@@ -29,7 +29,7 @@ import { $ } from 'bun';
|
|
|
29
29
|
import type { ExpressionContext } from '../expression/evaluator.ts';
|
|
30
30
|
import { ExpressionEvaluator } from '../expression/evaluator.ts';
|
|
31
31
|
import type { ShellStep } from '../parser/schema.ts';
|
|
32
|
-
import type
|
|
32
|
+
import { ConsoleLogger, type Logger } from '../utils/logger.ts';
|
|
33
33
|
|
|
34
34
|
/**
|
|
35
35
|
* Escape a shell argument for safe use in shell commands
|
|
@@ -41,7 +41,7 @@ import type { Logger } from './workflow-runner.ts';
|
|
|
41
41
|
* - id: safe_echo
|
|
42
42
|
* type: shell
|
|
43
43
|
* # Use this pattern to safely interpolate user inputs:
|
|
44
|
-
* run: echo ${{ inputs.message }} # Safe:
|
|
44
|
+
* run: echo ${{ escape(inputs.message) }} # Safe: explicitly escaped
|
|
45
45
|
* # Avoid patterns like: sh -c "echo $USER_INPUT" where USER_INPUT is raw
|
|
46
46
|
* ```
|
|
47
47
|
*/
|
|
@@ -63,7 +63,7 @@ export interface ShellResult {
|
|
|
63
63
|
// Pre-compiled dangerous patterns for performance
|
|
64
64
|
// These patterns are designed to detect likely injection attempts while minimizing false positives
|
|
65
65
|
const DANGEROUS_PATTERNS: RegExp[] = [
|
|
66
|
-
/;\s
|
|
66
|
+
/;\s*(?:rm|chmod|chown|mkfs|dd)\b/, // Command chaining with destructive commands
|
|
67
67
|
/\|\s*(?:sh|bash|zsh|ksh|dash|csh|python|python[23]?|node|ruby|perl|php|lua)\b/, // Piping to shell/interpreter (download-and-execute pattern)
|
|
68
68
|
/\|\s*(?:sudo|su)\b/, // Piping to privilege escalation
|
|
69
69
|
/&&\s*(?:rm|chmod|chown|mkfs|dd)\b/, // AND chaining with destructive commands
|
|
@@ -98,9 +98,12 @@ const DANGEROUS_PATTERNS: RegExp[] = [
|
|
|
98
98
|
/\d*>&-\s*/, // Closing file descriptors
|
|
99
99
|
];
|
|
100
100
|
|
|
101
|
+
// Combine all patterns into single regex for O(m) matching instead of O(nĆm)
|
|
102
|
+
const COMBINED_DANGEROUS_PATTERN = new RegExp(DANGEROUS_PATTERNS.map((r) => r.source).join('|'));
|
|
103
|
+
|
|
101
104
|
export function detectShellInjectionRisk(command: string): boolean {
|
|
102
|
-
//
|
|
103
|
-
return
|
|
105
|
+
// Use combined pattern for single-pass matching
|
|
106
|
+
return COMBINED_DANGEROUS_PATTERN.test(command);
|
|
104
107
|
}
|
|
105
108
|
|
|
106
109
|
/**
|
|
@@ -109,7 +112,7 @@ export function detectShellInjectionRisk(command: string): boolean {
|
|
|
109
112
|
export async function executeShell(
|
|
110
113
|
step: ShellStep,
|
|
111
114
|
context: ExpressionContext,
|
|
112
|
-
logger: Logger =
|
|
115
|
+
logger: Logger = new ConsoleLogger()
|
|
113
116
|
): Promise<ShellResult> {
|
|
114
117
|
// Evaluate the command string
|
|
115
118
|
const command = ExpressionEvaluator.evaluateString(step.run, context);
|
|
@@ -1,5 +1,17 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
afterAll,
|
|
3
|
+
afterEach,
|
|
4
|
+
beforeAll,
|
|
5
|
+
beforeEach,
|
|
6
|
+
describe,
|
|
7
|
+
expect,
|
|
8
|
+
it,
|
|
9
|
+
mock,
|
|
10
|
+
spyOn,
|
|
11
|
+
} from 'bun:test';
|
|
12
|
+
import * as dns from 'node:dns/promises';
|
|
2
13
|
import { mkdirSync, rmSync } from 'node:fs';
|
|
14
|
+
import { tmpdir } from 'node:os';
|
|
3
15
|
import { join } from 'node:path';
|
|
4
16
|
import type { ExpressionContext } from '../expression/evaluator';
|
|
5
17
|
import type {
|
|
@@ -187,6 +199,34 @@ describe('step-executor', () => {
|
|
|
187
199
|
expect(result.status).toBe('failed');
|
|
188
200
|
expect(result.error).toContain('Unknown file operation');
|
|
189
201
|
});
|
|
202
|
+
|
|
203
|
+
it('should allow file paths outside cwd when allowOutsideCwd is true', async () => {
|
|
204
|
+
const outsidePath = join(tmpdir(), `keystone-test-${Date.now()}.txt`);
|
|
205
|
+
|
|
206
|
+
const writeStep: FileStep = {
|
|
207
|
+
id: 'w-outside',
|
|
208
|
+
type: 'file',
|
|
209
|
+
needs: [],
|
|
210
|
+
op: 'write',
|
|
211
|
+
path: outsidePath,
|
|
212
|
+
content: 'outside',
|
|
213
|
+
allowOutsideCwd: true,
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
try {
|
|
217
|
+
const writeResult = await executeStep(writeStep, context);
|
|
218
|
+
expect(writeResult.status).toBe('success');
|
|
219
|
+
|
|
220
|
+
const content = await Bun.file(outsidePath).text();
|
|
221
|
+
expect(content).toBe('outside');
|
|
222
|
+
} finally {
|
|
223
|
+
try {
|
|
224
|
+
rmSync(outsidePath);
|
|
225
|
+
} catch (e) {
|
|
226
|
+
// Ignore cleanup errors
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
});
|
|
190
230
|
});
|
|
191
231
|
|
|
192
232
|
describe('sleep', () => {
|
|
@@ -207,14 +247,19 @@ describe('step-executor', () => {
|
|
|
207
247
|
|
|
208
248
|
describe('request', () => {
|
|
209
249
|
const originalFetch = global.fetch;
|
|
250
|
+
let lookupSpy: ReturnType<typeof spyOn>;
|
|
210
251
|
|
|
211
252
|
beforeEach(() => {
|
|
212
253
|
// @ts-ignore
|
|
213
254
|
global.fetch = mock();
|
|
255
|
+
lookupSpy = spyOn(dns, 'lookup').mockResolvedValue([
|
|
256
|
+
{ address: '93.184.216.34', family: 4 },
|
|
257
|
+
] as unknown as Awaited<ReturnType<typeof dns.lookup>>);
|
|
214
258
|
});
|
|
215
259
|
|
|
216
260
|
afterEach(() => {
|
|
217
261
|
global.fetch = originalFetch;
|
|
262
|
+
lookupSpy.mockRestore();
|
|
218
263
|
});
|
|
219
264
|
|
|
220
265
|
it('should perform an HTTP request', async () => {
|