keystone-cli 0.7.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +486 -54
- package/package.json +8 -2
- package/src/__fixtures__/index.ts +100 -0
- package/src/cli.ts +841 -91
- package/src/db/memory-db.ts +35 -1
- package/src/db/workflow-db.test.ts +24 -0
- package/src/db/workflow-db.ts +484 -14
- package/src/expression/evaluator.ts +68 -4
- package/src/parser/agent-parser.ts +6 -3
- package/src/parser/config-schema.ts +38 -2
- package/src/parser/schema.ts +192 -7
- package/src/parser/test-schema.ts +29 -0
- package/src/parser/workflow-parser.test.ts +54 -0
- package/src/parser/workflow-parser.ts +153 -7
- package/src/runner/aggregate-error.test.ts +57 -0
- package/src/runner/aggregate-error.ts +46 -0
- package/src/runner/audit-verification.test.ts +2 -2
- package/src/runner/auto-heal.test.ts +1 -1
- package/src/runner/blueprint-executor.test.ts +63 -0
- package/src/runner/blueprint-executor.ts +157 -0
- package/src/runner/concurrency-limit.test.ts +82 -0
- package/src/runner/debug-repl.ts +18 -3
- package/src/runner/durable-timers.test.ts +200 -0
- package/src/runner/engine-executor.test.ts +464 -0
- package/src/runner/engine-executor.ts +491 -0
- package/src/runner/foreach-executor.ts +30 -12
- package/src/runner/llm-adapter.test.ts +282 -5
- package/src/runner/llm-adapter.ts +581 -8
- package/src/runner/llm-clarification.test.ts +79 -21
- package/src/runner/llm-errors.ts +83 -0
- package/src/runner/llm-executor.test.ts +258 -219
- package/src/runner/llm-executor.ts +226 -29
- package/src/runner/mcp-client.ts +70 -3
- package/src/runner/mcp-manager.test.ts +52 -52
- package/src/runner/mcp-manager.ts +12 -5
- package/src/runner/mcp-server.test.ts +117 -78
- package/src/runner/mcp-server.ts +13 -4
- package/src/runner/optimization-runner.ts +48 -31
- package/src/runner/reflexion.test.ts +1 -1
- package/src/runner/resource-pool.test.ts +113 -0
- package/src/runner/resource-pool.ts +164 -0
- package/src/runner/shell-executor.ts +130 -32
- package/src/runner/standard-tools-execution.test.ts +39 -0
- package/src/runner/standard-tools-integration.test.ts +36 -36
- package/src/runner/standard-tools.test.ts +18 -0
- package/src/runner/standard-tools.ts +174 -93
- package/src/runner/step-executor.test.ts +176 -16
- package/src/runner/step-executor.ts +534 -83
- package/src/runner/stream-utils.test.ts +14 -0
- package/src/runner/subflow-outputs.test.ts +103 -0
- package/src/runner/test-harness.ts +161 -0
- package/src/runner/tool-integration.test.ts +73 -79
- package/src/runner/workflow-runner.test.ts +549 -15
- package/src/runner/workflow-runner.ts +1448 -79
- package/src/runner/workflow-subflows.test.ts +255 -0
- package/src/templates/agents/keystone-architect.md +17 -12
- package/src/templates/agents/tester.md +21 -0
- package/src/templates/child-rollback.yaml +11 -0
- package/src/templates/decompose-implement.yaml +53 -0
- package/src/templates/decompose-problem.yaml +159 -0
- package/src/templates/decompose-research.yaml +52 -0
- package/src/templates/decompose-review.yaml +51 -0
- package/src/templates/dev.yaml +134 -0
- package/src/templates/engine-example.yaml +33 -0
- package/src/templates/fan-out-fan-in.yaml +61 -0
- package/src/templates/memory-service.yaml +1 -1
- package/src/templates/parent-rollback.yaml +16 -0
- package/src/templates/robust-automation.yaml +1 -1
- package/src/templates/scaffold-feature.yaml +29 -27
- package/src/templates/scaffold-generate.yaml +41 -0
- package/src/templates/scaffold-plan.yaml +53 -0
- package/src/types/status.ts +3 -0
- package/src/ui/dashboard.tsx +4 -3
- package/src/utils/assets.macro.ts +36 -0
- package/src/utils/auth-manager.ts +585 -8
- package/src/utils/blueprint-utils.test.ts +49 -0
- package/src/utils/blueprint-utils.ts +80 -0
- package/src/utils/circuit-breaker.test.ts +177 -0
- package/src/utils/circuit-breaker.ts +160 -0
- package/src/utils/config-loader.test.ts +100 -13
- package/src/utils/config-loader.ts +44 -17
- package/src/utils/constants.ts +62 -0
- package/src/utils/error-renderer.test.ts +267 -0
- package/src/utils/error-renderer.ts +320 -0
- package/src/utils/json-parser.test.ts +4 -0
- package/src/utils/json-parser.ts +18 -1
- package/src/utils/mermaid.ts +4 -0
- package/src/utils/paths.test.ts +46 -0
- package/src/utils/paths.ts +70 -0
- package/src/utils/process-sandbox.test.ts +128 -0
- package/src/utils/process-sandbox.ts +293 -0
- package/src/utils/rate-limiter.test.ts +143 -0
- package/src/utils/rate-limiter.ts +221 -0
- package/src/utils/redactor.test.ts +23 -15
- package/src/utils/redactor.ts +65 -25
- package/src/utils/resource-loader.test.ts +54 -0
- package/src/utils/resource-loader.ts +158 -0
- package/src/utils/sandbox.test.ts +69 -4
- package/src/utils/sandbox.ts +69 -6
- package/src/utils/schema-validator.ts +65 -0
- package/src/utils/workflow-registry.test.ts +57 -0
- package/src/utils/workflow-registry.ts +45 -25
- /package/src/expression/{evaluator.audit.test.ts → evaluator-audit.test.ts} +0 -0
- /package/src/runner/{mcp-client.audit.test.ts → mcp-client-audit.test.ts} +0 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import { afterAll, describe, expect, it } from 'bun:test';
|
|
2
|
+
import { existsSync, rmSync } from 'node:fs';
|
|
3
|
+
import { WorkflowDb } from '../db/workflow-db';
|
|
4
|
+
import type { Workflow } from '../parser/schema';
|
|
5
|
+
import type { Logger } from '../utils/logger';
|
|
6
|
+
import { WorkflowRunner } from './workflow-runner';
|
|
7
|
+
|
|
8
|
+
describe('WorkflowRunner - Subflows & Compensations', () => {
|
|
9
|
+
const dbPath = ':memory:';
|
|
10
|
+
|
|
11
|
+
it('should execute parallel branches and join with "all" condition', async () => {
|
|
12
|
+
const workflow: Workflow = {
|
|
13
|
+
name: 'fan-out-in-all',
|
|
14
|
+
steps: [
|
|
15
|
+
{ id: 'branch1', type: 'shell', run: 'echo "b1"', needs: [] },
|
|
16
|
+
{ id: 'branch2', type: 'shell', run: 'echo "b2"', needs: [] },
|
|
17
|
+
{
|
|
18
|
+
id: 'join',
|
|
19
|
+
type: 'join',
|
|
20
|
+
target: 'steps',
|
|
21
|
+
condition: 'all',
|
|
22
|
+
needs: ['branch1', 'branch2'],
|
|
23
|
+
},
|
|
24
|
+
],
|
|
25
|
+
outputs: {
|
|
26
|
+
b1: '${{ steps.join.output.inputs.branch1.stdout.trim() }}',
|
|
27
|
+
b2: '${{ steps.join.output.inputs.branch2.stdout.trim() }}',
|
|
28
|
+
},
|
|
29
|
+
} as unknown as Workflow;
|
|
30
|
+
|
|
31
|
+
const runner = new WorkflowRunner(workflow, { dbPath });
|
|
32
|
+
const outputs = await runner.run();
|
|
33
|
+
expect(outputs.b1).toBe('b1');
|
|
34
|
+
expect(outputs.b2).toBe('b2');
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('should fail join step if condition "all" is not met (due to allowFailure error)', async () => {
|
|
38
|
+
// Branch2 fails but allows failure. Join "all" should strictly fail because Branch2 is not a "real" success.
|
|
39
|
+
const workflow: Workflow = {
|
|
40
|
+
name: 'fan-out-in-fail',
|
|
41
|
+
steps: [
|
|
42
|
+
{ id: 'branch1', type: 'shell', run: 'echo "b1"', needs: [] },
|
|
43
|
+
{ id: 'branch2', type: 'shell', run: 'exit 1', needs: [], allowFailure: true },
|
|
44
|
+
{
|
|
45
|
+
id: 'join',
|
|
46
|
+
type: 'join',
|
|
47
|
+
condition: 'all',
|
|
48
|
+
needs: ['branch1', 'branch2'],
|
|
49
|
+
},
|
|
50
|
+
],
|
|
51
|
+
} as unknown as Workflow;
|
|
52
|
+
|
|
53
|
+
const runner = new WorkflowRunner(workflow, { dbPath });
|
|
54
|
+
await expect(runner.run()).rejects.toThrow(/Join condition 'all' not met/);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('should pass join step with "any" condition if one branch succeeds', async () => {
|
|
58
|
+
const workflow: Workflow = {
|
|
59
|
+
name: 'fan-out-in-any',
|
|
60
|
+
steps: [
|
|
61
|
+
{ id: 'branch1', type: 'shell', run: 'echo "b1"', needs: [] },
|
|
62
|
+
{ id: 'branch2', type: 'shell', run: 'exit 1', needs: [], allowFailure: true },
|
|
63
|
+
{
|
|
64
|
+
id: 'join',
|
|
65
|
+
type: 'join',
|
|
66
|
+
condition: 'any',
|
|
67
|
+
needs: ['branch1', 'branch2'],
|
|
68
|
+
},
|
|
69
|
+
],
|
|
70
|
+
outputs: {
|
|
71
|
+
status1: '${{ steps.join.output.status.branch1 }}',
|
|
72
|
+
status2: '${{ steps.join.output.status.branch2 }}',
|
|
73
|
+
},
|
|
74
|
+
} as unknown as Workflow;
|
|
75
|
+
|
|
76
|
+
const runner = new WorkflowRunner(workflow, { dbPath });
|
|
77
|
+
const outputs = await runner.run();
|
|
78
|
+
expect(outputs.status1).toBe('success');
|
|
79
|
+
// status2 might be undefined or 'success' depending on race, but shouldn't fail the test
|
|
80
|
+
const status2 = typeof outputs.status2 === 'string' ? outputs.status2 : undefined;
|
|
81
|
+
expect(status2 === undefined || status2 === 'success').toBe(true);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it('should register and execute compensations on failure', async () => {
|
|
85
|
+
const compDbPath = 'test-compensation.db';
|
|
86
|
+
if (existsSync(compDbPath)) rmSync(compDbPath);
|
|
87
|
+
|
|
88
|
+
const workflow: Workflow = {
|
|
89
|
+
name: 'comp-wf',
|
|
90
|
+
steps: [
|
|
91
|
+
{
|
|
92
|
+
id: 'step1',
|
|
93
|
+
type: 'shell',
|
|
94
|
+
run: 'echo "step1"',
|
|
95
|
+
needs: [],
|
|
96
|
+
compensate: {
|
|
97
|
+
id: 'undo1',
|
|
98
|
+
type: 'shell',
|
|
99
|
+
run: 'echo "undoing step1"',
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
id: 'step2',
|
|
104
|
+
type: 'shell',
|
|
105
|
+
run: 'echo "step2"',
|
|
106
|
+
needs: ['step1'],
|
|
107
|
+
compensate: {
|
|
108
|
+
id: 'undo2',
|
|
109
|
+
type: 'shell',
|
|
110
|
+
run: 'echo "undoing step2"',
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
id: 'fail',
|
|
115
|
+
type: 'shell',
|
|
116
|
+
run: 'exit 1',
|
|
117
|
+
needs: ['step2'],
|
|
118
|
+
},
|
|
119
|
+
],
|
|
120
|
+
} as unknown as Workflow;
|
|
121
|
+
|
|
122
|
+
const logs: string[] = [];
|
|
123
|
+
const logger: Logger = {
|
|
124
|
+
log: (msg: string) => logs.push(msg),
|
|
125
|
+
error: (msg: string) => logs.push(msg),
|
|
126
|
+
warn: (msg: string) => logs.push(`WARN: ${msg}`),
|
|
127
|
+
info: (msg: string) => logs.push(`INFO: ${msg}`),
|
|
128
|
+
debug: (msg: string) => logs.push(`DEBUG: ${msg}`),
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
const runner = new WorkflowRunner(workflow, { dbPath: compDbPath, logger });
|
|
132
|
+
|
|
133
|
+
try {
|
|
134
|
+
await runner.run();
|
|
135
|
+
} catch (e) {
|
|
136
|
+
// Expected failure
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Verify compensations ran in reverse order
|
|
140
|
+
const undo2Index = logs.findIndex((l) => l.includes('undoing step2'));
|
|
141
|
+
const undo1Index = logs.findIndex((l) => l.includes('undoing step1'));
|
|
142
|
+
|
|
143
|
+
if (undo2Index === -1 || undo1Index === -1 || undo2Index >= undo1Index) {
|
|
144
|
+
console.log('--- COMPENSATION LOGS ---');
|
|
145
|
+
console.log(logs.filter((l) => l.includes('undoing') || l.includes('rollback')).join('\n'));
|
|
146
|
+
console.log('--- END ---');
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
expect(undo2Index).toBeGreaterThan(-1);
|
|
150
|
+
expect(undo1Index).toBeGreaterThan(-1);
|
|
151
|
+
expect(undo2Index).toBeLessThan(undo1Index); // undo2 before undo1
|
|
152
|
+
|
|
153
|
+
// Verify DB records
|
|
154
|
+
const db = new WorkflowDb(compDbPath);
|
|
155
|
+
const runId = runner.runId;
|
|
156
|
+
const comps = await db.getAllCompensations(runId);
|
|
157
|
+
expect(comps.length).toBe(2);
|
|
158
|
+
db.close();
|
|
159
|
+
|
|
160
|
+
if (existsSync(compDbPath)) rmSync(compDbPath);
|
|
161
|
+
});
|
|
162
|
+
it('should execute join step early if condition is "any" and one branch finishes', async () => {
|
|
163
|
+
// This is hard to test deterministically without timing, but we can verify it executes
|
|
164
|
+
const workflow: Workflow = {
|
|
165
|
+
name: 'early-join',
|
|
166
|
+
steps: [
|
|
167
|
+
{ id: 'slow', type: 'shell', run: 'sleep 0.1 && echo "slow"', needs: [] },
|
|
168
|
+
{ id: 'fast', type: 'shell', run: 'echo "fast"', needs: [] },
|
|
169
|
+
{
|
|
170
|
+
id: 'early_join',
|
|
171
|
+
type: 'join',
|
|
172
|
+
condition: 'any',
|
|
173
|
+
needs: ['slow', 'fast'],
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
id: 'after_join',
|
|
177
|
+
type: 'shell',
|
|
178
|
+
run: 'echo "after_join"',
|
|
179
|
+
needs: ['early_join'],
|
|
180
|
+
},
|
|
181
|
+
],
|
|
182
|
+
outputs: {
|
|
183
|
+
order: '${{ steps }}',
|
|
184
|
+
},
|
|
185
|
+
} as unknown as Workflow;
|
|
186
|
+
|
|
187
|
+
const logs: string[] = [];
|
|
188
|
+
const logger: Logger = {
|
|
189
|
+
log: (msg: string) => logs.push(msg),
|
|
190
|
+
error: (msg: string) => logs.push(msg),
|
|
191
|
+
warn: () => {},
|
|
192
|
+
info: () => {},
|
|
193
|
+
debug: () => {},
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
const runner = new WorkflowRunner(workflow, { dbPath, logger });
|
|
197
|
+
await runner.run();
|
|
198
|
+
|
|
199
|
+
// Verify after_join started BEFORE slow finished
|
|
200
|
+
const afterJoinStart = logs.findIndex((l) => l.includes('Executing step: after_join'));
|
|
201
|
+
const slowFinished = logs.findIndex((l) => l.includes('Step slow completed'));
|
|
202
|
+
|
|
203
|
+
expect(afterJoinStart).toBeGreaterThan(-1);
|
|
204
|
+
expect(slowFinished).toBeGreaterThan(-1);
|
|
205
|
+
expect(afterJoinStart).toBeLessThan(slowFinished);
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it('should execute top-level workflow compensation on failure', async () => {
|
|
209
|
+
const wfCompDbPath = 'test-wf-compensation.db';
|
|
210
|
+
if (existsSync(wfCompDbPath)) rmSync(wfCompDbPath);
|
|
211
|
+
|
|
212
|
+
const workflow: Workflow = {
|
|
213
|
+
name: 'wf-comp',
|
|
214
|
+
compensate: {
|
|
215
|
+
id: 'wf-undo',
|
|
216
|
+
type: 'shell',
|
|
217
|
+
run: 'echo "undoing workflow"',
|
|
218
|
+
},
|
|
219
|
+
steps: [
|
|
220
|
+
{
|
|
221
|
+
id: 'step1',
|
|
222
|
+
type: 'shell',
|
|
223
|
+
run: 'exit 1',
|
|
224
|
+
needs: [],
|
|
225
|
+
},
|
|
226
|
+
],
|
|
227
|
+
} as unknown as Workflow;
|
|
228
|
+
|
|
229
|
+
const logs: string[] = [];
|
|
230
|
+
const logger: Logger = {
|
|
231
|
+
log: (msg: string) => logs.push(msg),
|
|
232
|
+
error: (msg: string) => logs.push(msg),
|
|
233
|
+
warn: () => {},
|
|
234
|
+
info: () => {},
|
|
235
|
+
debug: () => {},
|
|
236
|
+
};
|
|
237
|
+
|
|
238
|
+
const runner = new WorkflowRunner(workflow, { dbPath: wfCompDbPath, logger });
|
|
239
|
+
try {
|
|
240
|
+
await runner.run();
|
|
241
|
+
} catch (e) {
|
|
242
|
+
// Expected failure
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const wfUndoIndex = logs.findIndex((l) => l.includes('undoing workflow'));
|
|
246
|
+
if (wfUndoIndex === -1) {
|
|
247
|
+
console.log('--- WF COMP LOGS ---');
|
|
248
|
+
console.log(logs.join('\n'));
|
|
249
|
+
console.log('--- END ---');
|
|
250
|
+
}
|
|
251
|
+
expect(wfUndoIndex).toBeGreaterThan(-1);
|
|
252
|
+
|
|
253
|
+
if (existsSync(wfCompDbPath)) rmSync(wfCompDbPath);
|
|
254
|
+
});
|
|
255
|
+
});
|
|
@@ -14,20 +14,25 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
|
|
|
14
14
|
- **description**: (Optional) Description of the workflow.
|
|
15
15
|
- **inputs**: Map of `{ type: 'string'|'number'|'boolean'|'array'|'object', default: any, description: string }` under the `inputs` key.
|
|
16
16
|
- **outputs**: Map of expressions (e.g., `${{ steps.id.output }}`) under the `outputs` key.
|
|
17
|
+
- **outputSchema**: (Optional) JSON Schema for final workflow outputs.
|
|
17
18
|
- **env**: (Optional) Map of workflow-level environment variables.
|
|
18
|
-
- **concurrency**: (Optional) Global concurrency limit for the workflow
|
|
19
|
-
- **
|
|
19
|
+
- **concurrency**: (Optional) Global concurrency limit for the workflow.
|
|
20
|
+
- **pools**: (Optional) Map of resource pools `{ pool_name: limit }`.
|
|
21
|
+
- **compensate**: (Optional) Workflow-level compensation step.
|
|
22
|
+
- **eval**: (Optional) Configuration for prompt optimization `{ scorer: 'llm'|'script', agent, prompt, run, allowInsecure, allowSecrets }`.
|
|
20
23
|
- **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
|
|
21
|
-
- **shell**: `{ id, type: 'shell', run, dir, env, allowInsecure, transform }`
|
|
22
|
-
- **llm**: `{ id, type: 'llm', agent, prompt,
|
|
23
|
-
- **workflow**: `{ id, type: 'workflow', path, inputs }`
|
|
24
|
+
- **shell**: `{ id, type: 'shell', run, dir, env, allowInsecure, transform }`
|
|
25
|
+
- **llm**: `{ id, type: 'llm', agent, prompt, outputSchema, provider, model, tools, maxIterations, maxMessageHistory, useGlobalMcp, allowClarification, useStandardTools, allowOutsideCwd, allowInsecure, mcpServers, handoff }`
|
|
26
|
+
- **workflow**: `{ id, type: 'workflow', path, inputs, outputMapping }`
|
|
24
27
|
- **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content, allowOutsideCwd }`
|
|
25
|
-
- **request**: `{ id, type: 'request', url, method, body, headers }`
|
|
26
|
-
- **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }`
|
|
27
|
-
- **sleep**: `{ id, type: 'sleep', duration }` (
|
|
28
|
-
- **script**: `{ id, type: 'script', run, allowInsecure }`
|
|
28
|
+
- **request**: `{ id, type: 'request', url, method, body, headers, allowInsecure }`
|
|
29
|
+
- **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }`
|
|
30
|
+
- **sleep**: `{ id, type: 'sleep', duration, durable }` (use `durable: true` for sleeps >= 60s)
|
|
31
|
+
- **script**: `{ id, type: 'script', run, allowInsecure }`
|
|
32
|
+
- **engine**: `{ id, type: 'engine', command, args, input, env, cwd, outputSchema }`
|
|
29
33
|
- **memory**: `{ id, type: 'memory', op: 'search'|'store', query, text, model, metadata, limit }`
|
|
30
|
-
- **
|
|
34
|
+
- **join**: `{ id, type: 'join', target: 'steps'|'branches', condition: 'all'|'any'|number }`
|
|
35
|
+
- **Common Step Fields**: `needs` (array), `if` (expr), `timeout` (ms), `retry` (`{ count, backoff, baseDelay }`), `auto_heal`, `reflexion`, `learn`, `foreach`, `concurrency`, `pool`, `compensate`, `transform`, `inputSchema`, `outputSchema`, `outputRetries`, `repairStrategy`.
|
|
31
36
|
- **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
|
|
32
37
|
- **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
|
|
33
38
|
|
|
@@ -68,8 +73,8 @@ Markdown files with YAML frontmatter:
|
|
|
68
73
|
- **Custom Logic**: Use `script` steps for data manipulation that is too complex for expressions.
|
|
69
74
|
- **Agent Collaboration**: Create specialized agents for complex sub-tasks and coordinate them via `llm` steps.
|
|
70
75
|
- **Clarification**: Enable `allowClarification` in `llm` steps if the agent should be able to ask the user for missing info.
|
|
71
|
-
- **Discovery**: Use `mcpServers` in `llm` steps
|
|
72
|
-
- Local: `{ name, command, args, env, timeout }`
|
|
76
|
+
- **Discovery**: Use `mcpServers` in `llm` steps. `mcpServers` can be a list of server names or configuration objects:
|
|
77
|
+
- Local: `{ name, type: 'local', command, args, env, timeout }`
|
|
73
78
|
- Remote: `{ name, type: 'remote', url, headers, timeout }`
|
|
74
79
|
|
|
75
80
|
# Seeking Clarification
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: tester
|
|
3
|
+
description: "Expert at writing and running tests for Keystone CLI"
|
|
4
|
+
model: gpt-4o
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Role
|
|
8
|
+
You are the Keystone Tester. Your goal is to ensure the reliability and correctness of the Keystone CLI by writing comprehensive tests and verifying that changes do not introduce regressions.
|
|
9
|
+
|
|
10
|
+
# Guidelines
|
|
11
|
+
- Use `run_command` to execute tests (e.g., `bun test`, `bun test <file>`).
|
|
12
|
+
- Use `list_files` and `read_file` to examine existing tests for patterns.
|
|
13
|
+
- When a test fails, analyze the output to identify the cause.
|
|
14
|
+
- Use `write_file` to create new test files or update existing ones.
|
|
15
|
+
- Always use the `keystone test` command to verify workflow-level functionality if applicable.
|
|
16
|
+
- Follow the project's testing conventions (using `bun:test` for unit tests and `TestHarness` for workflow tests).
|
|
17
|
+
|
|
18
|
+
# Knowledge Base
|
|
19
|
+
- **Unit Tests**: Located in `src/**/*.test.ts`. Use `bun test` to run.
|
|
20
|
+
- **Workflow Tests**: Located in `.keystone/tests/`. Use `keystone test` to run.
|
|
21
|
+
- **Test Harness**: Use `src/runner/test-harness.ts` for deterministic workflow testing.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
name: nested-rollback-child
|
|
2
|
+
description: Child workflow with a side effect and compensation
|
|
3
|
+
|
|
4
|
+
steps:
|
|
5
|
+
- id: child_action
|
|
6
|
+
type: shell
|
|
7
|
+
run: echo "Child action executed"
|
|
8
|
+
compensate:
|
|
9
|
+
id: undo_child_action
|
|
10
|
+
type: shell
|
|
11
|
+
run: echo "Undoing child action..."
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
name: decompose-implement
|
|
2
|
+
description: "Implementation task sub-workflow for a decomposed problem"
|
|
3
|
+
|
|
4
|
+
inputs:
|
|
5
|
+
problem: { type: string }
|
|
6
|
+
context: { type: string, default: "" }
|
|
7
|
+
constraints: { type: string, default: "" }
|
|
8
|
+
task: { type: object }
|
|
9
|
+
research: { type: object, default: {} }
|
|
10
|
+
|
|
11
|
+
outputs:
|
|
12
|
+
summary: ${{ steps.implement.output.summary }}
|
|
13
|
+
files_changed: ${{ steps.implement.output.files_changed }}
|
|
14
|
+
open_questions: ${{ steps.implement.output.open_questions }}
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- id: implement
|
|
18
|
+
type: llm
|
|
19
|
+
agent: software-engineer
|
|
20
|
+
allowClarification: true
|
|
21
|
+
useStandardTools: true
|
|
22
|
+
prompt: |
|
|
23
|
+
You are implementing a task for a larger problem.
|
|
24
|
+
|
|
25
|
+
Problem:
|
|
26
|
+
${{ inputs.problem }}
|
|
27
|
+
|
|
28
|
+
Context:
|
|
29
|
+
${{ inputs.context }}
|
|
30
|
+
|
|
31
|
+
Constraints:
|
|
32
|
+
${{ inputs.constraints }}
|
|
33
|
+
|
|
34
|
+
Task:
|
|
35
|
+
${{ inputs.task }}
|
|
36
|
+
|
|
37
|
+
Research findings:
|
|
38
|
+
${{ inputs.research }}
|
|
39
|
+
|
|
40
|
+
If code changes are needed, use available tools to make them.
|
|
41
|
+
Return only the structured JSON required by the schema.
|
|
42
|
+
outputSchema:
|
|
43
|
+
type: object
|
|
44
|
+
properties:
|
|
45
|
+
summary:
|
|
46
|
+
type: string
|
|
47
|
+
files_changed:
|
|
48
|
+
type: array
|
|
49
|
+
items: { type: string }
|
|
50
|
+
open_questions:
|
|
51
|
+
type: array
|
|
52
|
+
items: { type: string }
|
|
53
|
+
required: [summary]
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
name: decompose-problem
|
|
2
|
+
description: "Decompose a complex problem into research, implementation, and review sub-workflows"
|
|
3
|
+
|
|
4
|
+
inputs:
|
|
5
|
+
problem: { type: string }
|
|
6
|
+
context: { type: string, default: "" }
|
|
7
|
+
constraints: { type: string, default: "" }
|
|
8
|
+
max_parallel: { type: number, default: 3 }
|
|
9
|
+
|
|
10
|
+
outputs:
|
|
11
|
+
plan: ${{ steps.plan.output }}
|
|
12
|
+
research: ${{ steps.research.output }}
|
|
13
|
+
implementation: ${{ steps.implementation.output }}
|
|
14
|
+
review: ${{ steps.review.output }}
|
|
15
|
+
summary: ${{ steps.summary.output }}
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- id: plan
|
|
19
|
+
type: llm
|
|
20
|
+
agent: general
|
|
21
|
+
allowClarification: true
|
|
22
|
+
prompt: |
|
|
23
|
+
You are a planner. Decompose the problem into research, implementation,
|
|
24
|
+
and review tasks. Keep tasks small, specific, and measurable.
|
|
25
|
+
|
|
26
|
+
Problem:
|
|
27
|
+
${{ inputs.problem }}
|
|
28
|
+
|
|
29
|
+
Context:
|
|
30
|
+
${{ inputs.context }}
|
|
31
|
+
|
|
32
|
+
Constraints:
|
|
33
|
+
${{ inputs.constraints }}
|
|
34
|
+
|
|
35
|
+
Return empty arrays if a category is not needed.
|
|
36
|
+
Return only the structured JSON required by the schema.
|
|
37
|
+
outputSchema:
|
|
38
|
+
type: object
|
|
39
|
+
properties:
|
|
40
|
+
research:
|
|
41
|
+
type: array
|
|
42
|
+
items:
|
|
43
|
+
type: object
|
|
44
|
+
properties:
|
|
45
|
+
id: { type: string }
|
|
46
|
+
title: { type: string }
|
|
47
|
+
details: { type: string }
|
|
48
|
+
acceptance:
|
|
49
|
+
type: array
|
|
50
|
+
items: { type: string }
|
|
51
|
+
required: [id, title, details]
|
|
52
|
+
implementation:
|
|
53
|
+
type: array
|
|
54
|
+
items:
|
|
55
|
+
type: object
|
|
56
|
+
properties:
|
|
57
|
+
id: { type: string }
|
|
58
|
+
title: { type: string }
|
|
59
|
+
details: { type: string }
|
|
60
|
+
acceptance:
|
|
61
|
+
type: array
|
|
62
|
+
items: { type: string }
|
|
63
|
+
required: [id, title, details]
|
|
64
|
+
review:
|
|
65
|
+
type: array
|
|
66
|
+
items:
|
|
67
|
+
type: object
|
|
68
|
+
properties:
|
|
69
|
+
id: { type: string }
|
|
70
|
+
title: { type: string }
|
|
71
|
+
details: { type: string }
|
|
72
|
+
acceptance:
|
|
73
|
+
type: array
|
|
74
|
+
items: { type: string }
|
|
75
|
+
required: [id, title, details]
|
|
76
|
+
success_criteria:
|
|
77
|
+
type: array
|
|
78
|
+
items: { type: string }
|
|
79
|
+
notes:
|
|
80
|
+
type: string
|
|
81
|
+
required: [research, implementation, review]
|
|
82
|
+
|
|
83
|
+
- id: approve_plan
|
|
84
|
+
type: human
|
|
85
|
+
needs: [plan]
|
|
86
|
+
inputType: confirm
|
|
87
|
+
message: "Approve the plan and run sub-workflows?"
|
|
88
|
+
|
|
89
|
+
- id: research
|
|
90
|
+
type: workflow
|
|
91
|
+
needs: [approve_plan]
|
|
92
|
+
foreach: ${{ steps.plan.output.research }}
|
|
93
|
+
concurrency: ${{ inputs.max_parallel }}
|
|
94
|
+
path: decompose-research
|
|
95
|
+
inputs:
|
|
96
|
+
problem: ${{ inputs.problem }}
|
|
97
|
+
context: ${{ inputs.context }}
|
|
98
|
+
constraints: ${{ inputs.constraints }}
|
|
99
|
+
task: ${{ item }}
|
|
100
|
+
|
|
101
|
+
- id: implementation
|
|
102
|
+
type: workflow
|
|
103
|
+
needs: [research]
|
|
104
|
+
foreach: ${{ steps.plan.output.implementation }}
|
|
105
|
+
concurrency: ${{ inputs.max_parallel }}
|
|
106
|
+
path: decompose-implement
|
|
107
|
+
inputs:
|
|
108
|
+
problem: ${{ inputs.problem }}
|
|
109
|
+
context: ${{ inputs.context }}
|
|
110
|
+
constraints: ${{ inputs.constraints }}
|
|
111
|
+
research: ${{ steps.research.outputs }}
|
|
112
|
+
task: ${{ item }}
|
|
113
|
+
|
|
114
|
+
- id: review
|
|
115
|
+
type: workflow
|
|
116
|
+
needs: [implementation]
|
|
117
|
+
foreach: ${{ steps.plan.output.review }}
|
|
118
|
+
concurrency: ${{ inputs.max_parallel }}
|
|
119
|
+
path: decompose-review
|
|
120
|
+
inputs:
|
|
121
|
+
problem: ${{ inputs.problem }}
|
|
122
|
+
context: ${{ inputs.context }}
|
|
123
|
+
constraints: ${{ inputs.constraints }}
|
|
124
|
+
implementation: ${{ steps.implementation.outputs }}
|
|
125
|
+
task: ${{ item }}
|
|
126
|
+
|
|
127
|
+
- id: summary
|
|
128
|
+
type: llm
|
|
129
|
+
agent: summarizer
|
|
130
|
+
needs: [review]
|
|
131
|
+
prompt: |
|
|
132
|
+
Summarize the results of the workflow.
|
|
133
|
+
|
|
134
|
+
Problem:
|
|
135
|
+
${{ inputs.problem }}
|
|
136
|
+
|
|
137
|
+
Plan:
|
|
138
|
+
${{ steps.plan.output }}
|
|
139
|
+
|
|
140
|
+
Research results:
|
|
141
|
+
${{ steps.research.outputs }}
|
|
142
|
+
|
|
143
|
+
Implementation results:
|
|
144
|
+
${{ steps.implementation.outputs }}
|
|
145
|
+
|
|
146
|
+
Review results:
|
|
147
|
+
${{ steps.review.outputs }}
|
|
148
|
+
outputSchema:
|
|
149
|
+
type: object
|
|
150
|
+
properties:
|
|
151
|
+
summary:
|
|
152
|
+
type: string
|
|
153
|
+
open_questions:
|
|
154
|
+
type: array
|
|
155
|
+
items: { type: string }
|
|
156
|
+
risks:
|
|
157
|
+
type: array
|
|
158
|
+
items: { type: string }
|
|
159
|
+
required: [summary]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
name: decompose-research
|
|
2
|
+
description: "Research task sub-workflow for a decomposed problem"
|
|
3
|
+
|
|
4
|
+
inputs:
|
|
5
|
+
problem: { type: string }
|
|
6
|
+
context: { type: string, default: "" }
|
|
7
|
+
constraints: { type: string, default: "" }
|
|
8
|
+
task: { type: object }
|
|
9
|
+
|
|
10
|
+
outputs:
|
|
11
|
+
summary: ${{ steps.research.output.summary }}
|
|
12
|
+
findings: ${{ steps.research.output.findings }}
|
|
13
|
+
assumptions: ${{ steps.research.output.assumptions }}
|
|
14
|
+
open_questions: ${{ steps.research.output.open_questions }}
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- id: research
|
|
18
|
+
type: llm
|
|
19
|
+
agent: explore
|
|
20
|
+
useStandardTools: true
|
|
21
|
+
prompt: |
|
|
22
|
+
You are researching a task for a larger problem.
|
|
23
|
+
|
|
24
|
+
Problem:
|
|
25
|
+
${{ inputs.problem }}
|
|
26
|
+
|
|
27
|
+
Context:
|
|
28
|
+
${{ inputs.context }}
|
|
29
|
+
|
|
30
|
+
Constraints:
|
|
31
|
+
${{ inputs.constraints }}
|
|
32
|
+
|
|
33
|
+
Task:
|
|
34
|
+
${{ inputs.task }}
|
|
35
|
+
|
|
36
|
+
Provide concise, actionable research.
|
|
37
|
+
Return only the structured JSON required by the schema.
|
|
38
|
+
outputSchema:
|
|
39
|
+
type: object
|
|
40
|
+
properties:
|
|
41
|
+
summary:
|
|
42
|
+
type: string
|
|
43
|
+
findings:
|
|
44
|
+
type: array
|
|
45
|
+
items: { type: string }
|
|
46
|
+
assumptions:
|
|
47
|
+
type: array
|
|
48
|
+
items: { type: string }
|
|
49
|
+
open_questions:
|
|
50
|
+
type: array
|
|
51
|
+
items: { type: string }
|
|
52
|
+
required: [summary]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
name: decompose-review
|
|
2
|
+
description: "Review task sub-workflow for a decomposed problem"
|
|
3
|
+
|
|
4
|
+
inputs:
|
|
5
|
+
problem: { type: string }
|
|
6
|
+
context: { type: string, default: "" }
|
|
7
|
+
constraints: { type: string, default: "" }
|
|
8
|
+
task: { type: object }
|
|
9
|
+
implementation: { type: object, default: {} }
|
|
10
|
+
|
|
11
|
+
outputs:
|
|
12
|
+
approved: ${{ steps.review.output.approved }}
|
|
13
|
+
issues: ${{ steps.review.output.issues }}
|
|
14
|
+
suggestions: ${{ steps.review.output.suggestions }}
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- id: review
|
|
18
|
+
type: llm
|
|
19
|
+
agent: general
|
|
20
|
+
prompt: |
|
|
21
|
+
Review the implementation results for the task.
|
|
22
|
+
|
|
23
|
+
Problem:
|
|
24
|
+
${{ inputs.problem }}
|
|
25
|
+
|
|
26
|
+
Context:
|
|
27
|
+
${{ inputs.context }}
|
|
28
|
+
|
|
29
|
+
Constraints:
|
|
30
|
+
${{ inputs.constraints }}
|
|
31
|
+
|
|
32
|
+
Task:
|
|
33
|
+
${{ inputs.task }}
|
|
34
|
+
|
|
35
|
+
Implementation results:
|
|
36
|
+
${{ inputs.implementation }}
|
|
37
|
+
|
|
38
|
+
Identify issues, risks, and missing tests. Be direct and specific.
|
|
39
|
+
Return only the structured JSON required by the schema.
|
|
40
|
+
outputSchema:
|
|
41
|
+
type: object
|
|
42
|
+
properties:
|
|
43
|
+
approved:
|
|
44
|
+
type: boolean
|
|
45
|
+
issues:
|
|
46
|
+
type: array
|
|
47
|
+
items: { type: string }
|
|
48
|
+
suggestions:
|
|
49
|
+
type: array
|
|
50
|
+
items: { type: string }
|
|
51
|
+
required: [approved]
|