keystone-cli 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/README.md +276 -32
  2. package/package.json +8 -4
  3. package/src/cli.ts +350 -416
  4. package/src/commands/doc.ts +31 -0
  5. package/src/commands/event.ts +29 -0
  6. package/src/commands/graph.ts +37 -0
  7. package/src/commands/index.ts +14 -0
  8. package/src/commands/init.ts +185 -0
  9. package/src/commands/run.ts +124 -0
  10. package/src/commands/schema.ts +40 -0
  11. package/src/commands/utils.ts +78 -0
  12. package/src/commands/validate.ts +111 -0
  13. package/src/db/workflow-db.test.ts +314 -0
  14. package/src/db/workflow-db.ts +810 -210
  15. package/src/expression/evaluator-audit.test.ts +4 -2
  16. package/src/expression/evaluator.test.ts +14 -1
  17. package/src/expression/evaluator.ts +166 -19
  18. package/src/parser/config-schema.ts +18 -0
  19. package/src/parser/schema.ts +153 -22
  20. package/src/parser/test-schema.ts +6 -6
  21. package/src/parser/workflow-parser.test.ts +24 -0
  22. package/src/parser/workflow-parser.ts +65 -3
  23. package/src/runner/auto-heal.test.ts +5 -6
  24. package/src/runner/blueprint-executor.test.ts +2 -2
  25. package/src/runner/debug-repl.test.ts +5 -8
  26. package/src/runner/debug-repl.ts +59 -16
  27. package/src/runner/durable-timers.test.ts +11 -2
  28. package/src/runner/engine-executor.test.ts +1 -1
  29. package/src/runner/events.ts +57 -0
  30. package/src/runner/executors/artifact-executor.ts +166 -0
  31. package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
  32. package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
  33. package/src/runner/executors/file-executor.test.ts +48 -0
  34. package/src/runner/executors/file-executor.ts +324 -0
  35. package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
  36. package/src/runner/executors/human-executor.ts +144 -0
  37. package/src/runner/executors/join-executor.ts +75 -0
  38. package/src/runner/executors/llm-executor.ts +1266 -0
  39. package/src/runner/executors/memory-executor.ts +71 -0
  40. package/src/runner/executors/plan-executor.ts +104 -0
  41. package/src/runner/executors/request-executor.ts +265 -0
  42. package/src/runner/executors/script-executor.ts +43 -0
  43. package/src/runner/executors/shell-executor.ts +403 -0
  44. package/src/runner/executors/subworkflow-executor.ts +114 -0
  45. package/src/runner/executors/types.ts +69 -0
  46. package/src/runner/executors/wait-executor.ts +59 -0
  47. package/src/runner/join-scheduling.test.ts +197 -0
  48. package/src/runner/llm-adapter-runtime.test.ts +209 -0
  49. package/src/runner/llm-adapter.test.ts +419 -24
  50. package/src/runner/llm-adapter.ts +130 -26
  51. package/src/runner/llm-clarification.test.ts +2 -1
  52. package/src/runner/llm-executor.test.ts +532 -17
  53. package/src/runner/mcp-client-audit.test.ts +1 -2
  54. package/src/runner/mcp-client.ts +136 -46
  55. package/src/runner/mcp-manager.test.ts +4 -0
  56. package/src/runner/mcp-server.test.ts +58 -0
  57. package/src/runner/mcp-server.ts +26 -0
  58. package/src/runner/memoization.test.ts +190 -0
  59. package/src/runner/optimization-runner.ts +4 -9
  60. package/src/runner/quality-gate.test.ts +69 -0
  61. package/src/runner/reflexion.test.ts +6 -17
  62. package/src/runner/resource-pool.ts +102 -14
  63. package/src/runner/services/context-builder.ts +144 -0
  64. package/src/runner/services/secret-manager.ts +105 -0
  65. package/src/runner/services/workflow-validator.ts +131 -0
  66. package/src/runner/shell-executor.test.ts +28 -4
  67. package/src/runner/standard-tools-ast.test.ts +196 -0
  68. package/src/runner/standard-tools-execution.test.ts +27 -0
  69. package/src/runner/standard-tools-integration.test.ts +6 -10
  70. package/src/runner/standard-tools.ts +339 -102
  71. package/src/runner/step-executor.test.ts +216 -4
  72. package/src/runner/step-executor.ts +69 -941
  73. package/src/runner/stream-utils.ts +7 -3
  74. package/src/runner/test-harness.ts +20 -1
  75. package/src/runner/timeout.test.ts +10 -0
  76. package/src/runner/timeout.ts +11 -2
  77. package/src/runner/tool-integration.test.ts +1 -1
  78. package/src/runner/wait-step.test.ts +102 -0
  79. package/src/runner/workflow-runner.test.ts +208 -15
  80. package/src/runner/workflow-runner.ts +890 -818
  81. package/src/runner/workflow-scheduler.ts +75 -0
  82. package/src/runner/workflow-state.ts +269 -0
  83. package/src/runner/workflow-subflows.test.ts +13 -12
  84. package/src/scripts/generate-schemas.ts +16 -0
  85. package/src/templates/agents/explore.md +1 -0
  86. package/src/templates/agents/general.md +1 -0
  87. package/src/templates/agents/handoff-router.md +14 -0
  88. package/src/templates/agents/handoff-specialist.md +15 -0
  89. package/src/templates/agents/keystone-architect.md +13 -44
  90. package/src/templates/agents/my-agent.md +1 -0
  91. package/src/templates/agents/software-engineer.md +1 -0
  92. package/src/templates/agents/summarizer.md +1 -0
  93. package/src/templates/agents/test-agent.md +1 -0
  94. package/src/templates/agents/tester.md +1 -0
  95. package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
  96. package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +2 -1
  97. package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
  98. package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
  99. package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
  100. package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
  101. package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
  102. package/src/templates/control-flow/idempotency-example.yaml +30 -0
  103. package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
  104. package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
  105. package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
  106. package/src/templates/features/artifact-example.yaml +39 -0
  107. package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
  108. package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
  109. package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
  110. package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
  111. package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
  112. package/src/templates/features/script-example.yaml +27 -0
  113. package/src/templates/patterns/agent-handoff.yaml +53 -0
  114. package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
  115. package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
  116. package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +1 -0
  117. package/src/templates/{composition-parent.yaml → patterns/composition-parent.yaml} +1 -0
  118. package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
  119. package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
  120. package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
  121. package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
  122. package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
  123. package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
  124. package/src/templates/scaffolding/review-loop.yaml +97 -0
  125. package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
  126. package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
  127. package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
  128. package/src/templates/testing/invalid.yaml +6 -0
  129. package/src/ui/dashboard.tsx +191 -33
  130. package/src/utils/auth-manager.test.ts +337 -0
  131. package/src/utils/auth-manager.ts +157 -61
  132. package/src/utils/blueprint-utils.ts +4 -6
  133. package/src/utils/config-loader.test.ts +2 -0
  134. package/src/utils/config-loader.ts +12 -3
  135. package/src/utils/constants.ts +76 -0
  136. package/src/utils/container.ts +63 -0
  137. package/src/utils/context-injector.test.ts +200 -0
  138. package/src/utils/context-injector.ts +244 -0
  139. package/src/utils/doc-generator.ts +85 -0
  140. package/src/utils/env-filter.ts +45 -0
  141. package/src/utils/json-parser.test.ts +12 -0
  142. package/src/utils/json-parser.ts +30 -5
  143. package/src/utils/logger.ts +12 -1
  144. package/src/utils/mermaid.ts +4 -0
  145. package/src/utils/paths.ts +52 -1
  146. package/src/utils/process-sandbox-worker.test.ts +46 -0
  147. package/src/utils/process-sandbox.ts +227 -14
  148. package/src/utils/redactor.test.ts +11 -6
  149. package/src/utils/redactor.ts +25 -9
  150. package/src/utils/sandbox.ts +3 -0
  151. package/src/runner/llm-executor.ts +0 -638
  152. package/src/runner/shell-executor.ts +0 -366
  153. package/src/templates/invalid.yaml +0 -5
@@ -0,0 +1,75 @@
1
+ import type { JoinStep, Step, Workflow } from '../parser/schema.ts';
2
+ import { WorkflowParser } from '../parser/workflow-parser.ts';
3
+
4
+ export class WorkflowScheduler {
5
+ private executionOrder: string[];
6
+ private pendingSteps: Set<string>;
7
+ private completedSteps: Set<string>;
8
+ private stepMap: Map<string, Step>;
9
+
10
+ constructor(
11
+ private readonly workflow: Workflow,
12
+ alreadyCompleted: Set<string> = new Set()
13
+ ) {
14
+ this.executionOrder = WorkflowParser.topologicalSort(workflow);
15
+ this.stepMap = new Map(workflow.steps.map((s) => [s.id, s]));
16
+
17
+ // Initialize completed steps (from already completed/restored state)
18
+ this.completedSteps = new Set(alreadyCompleted);
19
+
20
+ // Remaining steps to execute
21
+ const remaining = this.executionOrder.filter((id) => !this.completedSteps.has(id));
22
+ this.pendingSteps = new Set(remaining);
23
+ }
24
+
25
+ public getExecutionOrder(): string[] {
26
+ return this.executionOrder;
27
+ }
28
+
29
+ public getPendingCount(): number {
30
+ return this.pendingSteps.size;
31
+ }
32
+
33
+ public isComplete(): boolean {
34
+ return this.pendingSteps.size === 0;
35
+ }
36
+
37
+ public markStepComplete(stepId: string): void {
38
+ this.completedSteps.add(stepId);
39
+ this.pendingSteps.delete(stepId);
40
+ }
41
+
42
+ public getRunnableSteps(runningCount: number, globalConcurrencyLimit: number): Step[] {
43
+ const runnable: Step[] = [];
44
+
45
+ for (const stepId of this.pendingSteps) {
46
+ if (runningCount + runnable.length >= globalConcurrencyLimit) {
47
+ break;
48
+ }
49
+
50
+ const step = this.stepMap.get(stepId);
51
+ if (!step) continue;
52
+
53
+ if (this.isStepReady(step)) {
54
+ runnable.push(step);
55
+ }
56
+ }
57
+
58
+ return runnable;
59
+ }
60
+
61
+ public startStep(stepId: string): void {
62
+ this.pendingSteps.delete(stepId);
63
+ }
64
+
65
+ private isStepReady(step: Step): boolean {
66
+ if (step.type === 'join') {
67
+ const joinStep = step as JoinStep;
68
+ const needs = joinStep.needs ?? [];
69
+ if (needs.length === 0) return true;
70
+ return needs.every((dep) => this.completedSteps.has(dep));
71
+ }
72
+ const needs = step.needs ?? [];
73
+ return needs.every((dep: string) => this.completedSteps.has(dep));
74
+ }
75
+ }
@@ -0,0 +1,269 @@
1
+ import type { WorkflowDb } from '../db/workflow-db.ts';
2
+ import type { ExpressionContext } from '../expression/evaluator.ts';
3
+ import { ExpressionEvaluator } from '../expression/evaluator.ts';
4
+ import type { Workflow } from '../parser/schema.ts';
5
+ import { WorkflowParser } from '../parser/workflow-parser.ts';
6
+ import type { StepStatusType } from '../types/status.ts';
7
+ import { StepStatus, WorkflowStatus } from '../types/status.ts';
8
+ import type { Logger } from '../utils/logger.ts';
9
+ import { ForeachExecutor } from './executors/foreach-executor.ts';
10
+
11
+ export interface StepContext {
12
+ output?: unknown;
13
+ outputs?: Record<string, unknown>;
14
+ status: StepStatusType;
15
+ error?: string;
16
+ usage?: {
17
+ prompt_tokens: number;
18
+ completion_tokens: number;
19
+ total_tokens: number;
20
+ };
21
+ }
22
+
23
+ export interface ForeachStepContext extends StepContext {
24
+ items: StepContext[];
25
+ foreachItems?: unknown[];
26
+ }
27
+
28
+ export class WorkflowState {
29
+ private stepContexts: Map<string, StepContext | ForeachStepContext> = new Map();
30
+
31
+ constructor(
32
+ private readonly runId: string,
33
+ private readonly workflow: Workflow,
34
+ private readonly db: WorkflowDb,
35
+ private readonly inputs: Record<string, unknown>,
36
+ private readonly secrets: Record<string, string>,
37
+ private readonly logger: Logger
38
+ ) {}
39
+
40
+ public get(stepId: string): StepContext | ForeachStepContext | undefined {
41
+ return this.stepContexts.get(stepId);
42
+ }
43
+
44
+ public set(stepId: string, context: StepContext | ForeachStepContext): void {
45
+ this.stepContexts.set(stepId, context);
46
+ }
47
+
48
+ public has(stepId: string): boolean {
49
+ return this.stepContexts.has(stepId);
50
+ }
51
+
52
+ public entries() {
53
+ return this.stepContexts.entries();
54
+ }
55
+
56
+ public get size(): number {
57
+ return this.stepContexts.size;
58
+ }
59
+
60
+ public getCompletedStepIds(): Set<string> {
61
+ const completed = new Set<string>();
62
+ for (const [stepId, context] of this.stepContexts.entries()) {
63
+ if (context.status === StepStatus.SUCCESS || context.status === StepStatus.SKIPPED) {
64
+ completed.add(stepId);
65
+ }
66
+ }
67
+ return completed;
68
+ }
69
+
70
+ public buildContext(item?: unknown, index?: number): ExpressionContext {
71
+ const stepsContext: Record<string, any> = {};
72
+
73
+ for (const [stepId, ctx] of this.stepContexts.entries()) {
74
+ stepsContext[stepId] = {
75
+ output: ctx.output,
76
+ outputs: ctx.outputs,
77
+ status: ctx.status,
78
+ error: ctx.error,
79
+ ...('items' in ctx ? { items: (ctx as ForeachStepContext).items } : {}),
80
+ };
81
+ }
82
+
83
+ return {
84
+ inputs: this.inputs,
85
+ secrets: this.secrets,
86
+ steps: stepsContext,
87
+ item,
88
+ index,
89
+ env: process.env as Record<string, string>,
90
+ };
91
+ }
92
+
93
+ public async restore(): Promise<void> {
94
+ const run = await this.db.getRun(this.runId);
95
+ if (!run) {
96
+ throw new Error(`Run ${this.runId} not found`);
97
+ }
98
+
99
+ // Restore inputs if they exist
100
+ if (run.inputs && run.inputs !== 'null' && run.inputs !== '') {
101
+ try {
102
+ const storedInputs = JSON.parse(run.inputs);
103
+ // Merge stored inputs, provided inputs to constructor have precedence
104
+ Object.assign(this.inputs, { ...storedInputs, ...this.inputs });
105
+ } catch (e) {
106
+ this.logger.error(`Failed to parse persisted inputs for run ${this.runId}`);
107
+ }
108
+ }
109
+
110
+ // Load all step executions for this run
111
+ const steps = await this.db.getStepsByRun(this.runId);
112
+
113
+ // Group steps by step_id
114
+ const stepExecutionsByStepId = new Map<string, typeof steps>();
115
+ for (const step of steps) {
116
+ if (!stepExecutionsByStepId.has(step.step_id)) {
117
+ stepExecutionsByStepId.set(step.step_id, []);
118
+ }
119
+ stepExecutionsByStepId.get(step.step_id)?.push(step);
120
+ }
121
+
122
+ const executionOrder = WorkflowParser.topologicalSort(this.workflow);
123
+
124
+ for (const stepId of executionOrder) {
125
+ const stepExecutions = stepExecutionsByStepId.get(stepId);
126
+ if (!stepExecutions || stepExecutions.length === 0) continue;
127
+
128
+ const stepDef = this.workflow.steps.find((s) => s.id === stepId);
129
+ if (!stepDef) continue;
130
+
131
+ const isForeach = !!stepDef.foreach;
132
+
133
+ if (isForeach) {
134
+ const items: StepContext[] = [];
135
+ const outputs: unknown[] = [];
136
+ let allSuccess = true;
137
+
138
+ const sortedExecs = [...stepExecutions].sort((a, b) => {
139
+ // Sort by iteration_index asc, then by created_at desc (newest first)
140
+ if ((a.iteration_index ?? 0) !== (b.iteration_index ?? 0)) {
141
+ return (a.iteration_index ?? 0) - (b.iteration_index ?? 0);
142
+ }
143
+ // If started_at is available, use it (newest first).
144
+ // Fallback to stable sort if nothing else.
145
+ if (a.started_at && b.started_at) {
146
+ return new Date(b.started_at).getTime() - new Date(a.started_at).getTime();
147
+ }
148
+ if (a.step_id && b.step_id) return 0; // Stability
149
+ return 0;
150
+ });
151
+
152
+ // Dedup by iteration_index, keeping the first (newest)
153
+ const uniqueExecs: typeof steps = [];
154
+ const seenIndices = new Set<number>();
155
+ for (const ex of sortedExecs) {
156
+ const idx = ex.iteration_index ?? 0;
157
+ if (!seenIndices.has(idx)) {
158
+ seenIndices.add(idx);
159
+ uniqueExecs.push(ex);
160
+ }
161
+ }
162
+
163
+ for (const exec of uniqueExecs) {
164
+ if (exec.iteration_index === null) continue;
165
+
166
+ let output: unknown = null;
167
+ if (exec.output) {
168
+ try {
169
+ output = JSON.parse(exec.output);
170
+ } catch (e) {
171
+ /* ignore */
172
+ }
173
+ }
174
+
175
+ items[exec.iteration_index] = {
176
+ output,
177
+ outputs:
178
+ typeof output === 'object' && output !== null && !Array.isArray(output)
179
+ ? (output as any)
180
+ : {},
181
+ status: exec.status as StepStatusType,
182
+ error: exec.error || undefined,
183
+ };
184
+ outputs[exec.iteration_index] = output;
185
+ if (exec.status !== StepStatus.SUCCESS && exec.status !== StepStatus.SKIPPED) {
186
+ allSuccess = false;
187
+ }
188
+ }
189
+
190
+ // deterministic resume support
191
+ let expectedCount = -1;
192
+ let persistedItems: unknown[] | undefined;
193
+ const parentExec = stepExecutions.find((e) => e.iteration_index === null);
194
+ if (parentExec?.output) {
195
+ try {
196
+ const parsed = JSON.parse(parentExec.output);
197
+ if (parsed.__foreachItems && Array.isArray(parsed.__foreachItems)) {
198
+ persistedItems = parsed.__foreachItems;
199
+ expectedCount = parsed.__foreachItems.length;
200
+ }
201
+ } catch {
202
+ /* ignore */
203
+ }
204
+ }
205
+
206
+ if (expectedCount === -1 && stepDef.foreach) {
207
+ try {
208
+ const baseContext = this.buildContext();
209
+ const foreachItems = ExpressionEvaluator.evaluate(stepDef.foreach, baseContext);
210
+ if (Array.isArray(foreachItems)) expectedCount = foreachItems.length;
211
+ } catch {
212
+ allSuccess = false;
213
+ }
214
+ }
215
+
216
+ const hasAllItems =
217
+ expectedCount !== -1 &&
218
+ items.length === expectedCount &&
219
+ !Array.from({ length: expectedCount }).some((_, i) => !items[i]);
220
+
221
+ let status: StepStatusType = StepStatus.SUCCESS;
222
+ if (allSuccess && hasAllItems) {
223
+ status = StepStatus.SUCCESS;
224
+ } else if (items.some((i) => i?.status === StepStatus.SUSPENDED)) {
225
+ status = StepStatus.SUSPENDED;
226
+ } else {
227
+ status = StepStatus.FAILED;
228
+ }
229
+
230
+ const mappedOutputs = ForeachExecutor.aggregateOutputs(outputs);
231
+ this.stepContexts.set(stepId, {
232
+ output: outputs,
233
+ outputs: mappedOutputs,
234
+ status,
235
+ items,
236
+ foreachItems: persistedItems,
237
+ } as ForeachStepContext);
238
+ } else {
239
+ // Fix: Sort by started_at desc (newest first) to avoid restoring stale retries
240
+ const sorted = [...stepExecutions].sort((a, b) => {
241
+ if (a.started_at && b.started_at) {
242
+ return new Date(b.started_at).getTime() - new Date(a.started_at).getTime();
243
+ }
244
+ return 0;
245
+ });
246
+ const exec = sorted[0];
247
+ let output: unknown = null;
248
+ if (exec.output) {
249
+ try {
250
+ output = JSON.parse(exec.output);
251
+ } catch (e) {
252
+ /* ignore */
253
+ }
254
+ }
255
+
256
+ this.stepContexts.set(stepId, {
257
+ output,
258
+ outputs:
259
+ typeof output === 'object' && output !== null && !Array.isArray(output)
260
+ ? (output as any)
261
+ : {},
262
+ status: exec.status as StepStatusType,
263
+ error: exec.error || undefined,
264
+ });
265
+ }
266
+ }
267
+ this.logger.log(`✓ Restored state: ${this.stepContexts.size} step(s) hydrated`);
268
+ }
269
+ }
@@ -141,9 +141,6 @@ describe('WorkflowRunner - Subflows & Compensations', () => {
141
141
  const undo1Index = logs.findIndex((l) => l.includes('undoing step1'));
142
142
 
143
143
  if (undo2Index === -1 || undo1Index === -1 || undo2Index >= undo1Index) {
144
- console.log('--- COMPENSATION LOGS ---');
145
- console.log(logs.filter((l) => l.includes('undoing') || l.includes('rollback')).join('\n'));
146
- console.log('--- END ---');
147
144
  }
148
145
 
149
146
  expect(undo2Index).toBeGreaterThan(-1);
@@ -159,12 +156,19 @@ describe('WorkflowRunner - Subflows & Compensations', () => {
159
156
 
160
157
  if (existsSync(compDbPath)) rmSync(compDbPath);
161
158
  });
162
- it('should execute join step early if condition is "any" and one branch finishes', async () => {
163
- // This is hard to test deterministically without timing, but we can verify it executes
159
+ it('should NOT execute join step early if condition is "any" (must wait for all dependencies to finish)', async () => {
160
+ // New behavior: Join waits for all dependencies to finish (success or failure)
161
+ // before evaluating the condition. This prevents missing inputs.
164
162
  const workflow: Workflow = {
165
- name: 'early-join',
163
+ name: 'delayed-join',
166
164
  steps: [
167
- { id: 'slow', type: 'shell', run: 'sleep 0.1 && echo "slow"', needs: [] },
165
+ {
166
+ id: 'slow',
167
+ type: 'shell',
168
+ run: 'sleep 0.1 && echo "slow"',
169
+ allowInsecure: true,
170
+ needs: [],
171
+ },
168
172
  { id: 'fast', type: 'shell', run: 'echo "fast"', needs: [] },
169
173
  {
170
174
  id: 'early_join',
@@ -196,13 +200,13 @@ describe('WorkflowRunner - Subflows & Compensations', () => {
196
200
  const runner = new WorkflowRunner(workflow, { dbPath, logger });
197
201
  await runner.run();
198
202
 
199
- // Verify after_join started BEFORE slow finished
203
+ // Verify after_join started AFTER slow finished
200
204
  const afterJoinStart = logs.findIndex((l) => l.includes('Executing step: after_join'));
201
205
  const slowFinished = logs.findIndex((l) => l.includes('Step slow completed'));
202
206
 
203
207
  expect(afterJoinStart).toBeGreaterThan(-1);
204
208
  expect(slowFinished).toBeGreaterThan(-1);
205
- expect(afterJoinStart).toBeLessThan(slowFinished);
209
+ expect(afterJoinStart).toBeGreaterThan(slowFinished);
206
210
  });
207
211
 
208
212
  it('should execute top-level workflow compensation on failure', async () => {
@@ -244,9 +248,6 @@ describe('WorkflowRunner - Subflows & Compensations', () => {
244
248
 
245
249
  const wfUndoIndex = logs.findIndex((l) => l.includes('undoing workflow'));
246
250
  if (wfUndoIndex === -1) {
247
- console.log('--- WF COMP LOGS ---');
248
- console.log(logs.join('\n'));
249
- console.log('--- END ---');
250
251
  }
251
252
  expect(wfUndoIndex).toBeGreaterThan(-1);
252
253
 
@@ -0,0 +1,16 @@
1
+ import { writeFileSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { zodToJsonSchema } from 'zod-to-json-schema';
4
+ import { AgentSchema, WorkflowSchema } from '../parser/schema';
5
+
6
+ const schemasDir = join(process.cwd(), 'schemas');
7
+
8
+ // Workflow Schema
9
+ const workflowJson = zodToJsonSchema(WorkflowSchema, 'keystone-workflow');
10
+ writeFileSync(join(schemasDir, 'workflow.json'), JSON.stringify(workflowJson, null, 2));
11
+
12
+ // Agent Schema
13
+ // We omit systemPrompt because it comes from the markdown body, not the frontmatter
14
+ const agentFrontmatterSchema = AgentSchema.omit({ systemPrompt: true });
15
+ const agentJson = zodToJsonSchema(agentFrontmatterSchema, 'keystone-agent');
16
+ writeFileSync(join(schemasDir, 'agent.json'), JSON.stringify(agentJson, null, 2));
@@ -1,4 +1,5 @@
1
1
  ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
2
3
  name: explore
3
4
  description: Agent for exploring and understanding codebases
4
5
  model: claude-sonnet-4.5
@@ -1,4 +1,5 @@
1
1
  ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
2
3
  name: general
3
4
  description: "A general-purpose assistant for various tasks"
4
5
  model: gpt-4o
@@ -0,0 +1,14 @@
1
+ ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
3
+ name: handoff-router
4
+ description: "Routes work to specialists when needed."
5
+ model: gpt-4o
6
+ ---
7
+
8
+ # Role
9
+ You are a router agent.
10
+
11
+ # Instructions
12
+ - Always call `remember_context` with the current user and topic.
13
+ - If you need deeper expertise, call `transfer_to_agent` with `handoff-specialist`.
14
+ - Provide a concise final response after any handoff.
@@ -0,0 +1,15 @@
1
+ ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
3
+ name: handoff-specialist
4
+ description: "Specialist agent for complex topics."
5
+ model: gpt-4o
6
+ ---
7
+
8
+ # Role
9
+ You are a specialist for ${{ inputs.topic }}.
10
+
11
+ # Context
12
+ If available, address ${{ memory.user }} and confirm the focus is ${{ memory.topic }}.
13
+
14
+ # Output
15
+ Provide concise, expert guidance tailored to the topic.
@@ -1,4 +1,5 @@
1
1
  ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
2
3
  name: keystone-architect
3
4
  description: "Expert at designing Keystone workflows and agents"
4
5
  model: gpt-4o
@@ -9,52 +10,19 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
9
10
 
10
11
  # Knowledge Base
11
12
 
12
- ## Workflow Schema (.yaml)
13
- - **name**: Unique identifier for the workflow.
14
- - **description**: (Optional) Description of the workflow.
15
- - **inputs**: Map of `{ type: 'string'|'number'|'boolean'|'array'|'object', default: any, description?: string }` under the `inputs` key.
16
- - **outputs**: Map of expressions (e.g., `${{ steps.id.output }}`) under the `outputs` key.
17
- - **outputSchema**: (Optional) JSON Schema for final workflow outputs.
18
- - **env**: (Optional) Map of workflow-level environment variables.
19
- - **concurrency**: (Optional) Global concurrency limit for the workflow.
20
- - **pools**: (Optional) Map of resource pools `{ pool_name: limit }`.
21
- - **compensate**: (Optional) Workflow-level compensation step.
22
- - **eval**: (Optional) Configuration for prompt optimization `{ scorer: 'llm'|'script', agent, prompt, run, allowInsecure, allowSecrets }`.
23
- - **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
24
- - **shell**: `{ id, type: 'shell', run, dir, env, allowInsecure, transform }`
25
- - **llm**: `{ id, type: 'llm', agent, prompt, outputSchema, provider, model, tools, maxIterations, maxMessageHistory, useGlobalMcp, allowClarification, useStandardTools, allowOutsideCwd, allowInsecure, mcpServers, handoff }`
26
- - **workflow**: `{ id, type: 'workflow', path, inputs, outputMapping }`
27
- - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content, allowOutsideCwd }`
28
- - **request**: `{ id, type: 'request', url, method, body, headers, allowInsecure }`
29
- - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }`
30
- - **sleep**: `{ id, type: 'sleep', duration, durable }` (use `durable: true` for sleeps >= 60s)
31
- - **script**: `{ id, type: 'script', run, allowInsecure }`
32
- - **engine**: `{ id, type: 'engine', command, args, input, env, cwd, outputSchema }`
33
- - **memory**: `{ id, type: 'memory', op: 'search'|'store', query, text, model, metadata, limit }`
34
- - **join**: `{ id, type: 'join', target: 'steps'|'branches', condition: 'all'|'any'|number }`
35
- - **Common Step Fields**: `needs` (array), `if` (expr), `timeout` (ms), `retry` (`{ count, backoff, baseDelay }`), `auto_heal`, `reflexion`, `learn`, `foreach`, `concurrency`, `pool`, `compensate`, `transform`, `inputSchema`, `outputSchema`, `outputRetries`, `repairStrategy`.
36
- - **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
37
- - **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
13
+ ## 📖 Source of Truth
14
+ You MUST consult the latest schemas before designing any workflow or agent. Use your `fetch` tool (or `request` step) to read:
15
+ - **Workflow Schema**: [https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/workflow.json](https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/workflow.json)
16
+ - **Agent Schema**: [https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json](https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json)
38
17
 
39
- ## Standard Tools
40
- When `useStandardTools: true` is set on an `llm` step, the agent has access to:
41
- - `read_file(path)`: Read file contents.
42
- - `read_file_lines(path, start, count)`: Read a specific range of lines.
43
- - `write_file(path, content)`: Write/overwrite file.
44
- - `list_files(path)`: List directory contents.
45
- - `search_files(pattern, dir)`: Search for files by pattern (glob).
46
- - `search_content(query, pattern, dir)`: Search for text within files.
47
- - `run_command(command, dir)`: Run shell commands (restricted by `allowInsecure`).
48
- - **Path Gating**: Restricted to CWD by default. Use `allowOutsideCwd: true` to bypass.
49
18
 
50
- ## Agent Schema (.md)
51
- Markdown files with YAML frontmatter:
52
- - **name**: Agent name.
53
- - **description**: (Optional) Agent description.
54
- - **provider**: (Optional) Provider name.
55
- - **model**: (Optional) e.g., `gpt-4o`, `claude-sonnet-4.5`.
56
- - **tools**: Array of `{ name, description, parameters, execution }` where `execution` is a standard Step object and `parameters` is a JSON Schema.
57
- - **Body**: The Markdown body is the `systemPrompt`.
19
+ If you are running in the Keystone CLI repository, you can also use `read_file` on `schemas/workflow.json` and `schemas/agent.json`.
20
+
21
+ ## Guidelines
22
+ 1. **Always Consult Schema**: Do not rely on your internal training data for Keystone schema fields. Fetch or read the JSON schemas above to ensure you are using the latest properties and types.
23
+ 2. **Schema-Driven Design**: For every step type (shell, llm, request, etc.), check the `workflow.json` schema to see available fields, defaults, and requirements.
24
+ 3. **Tool Awareness**: Check the `STANDARD_TOOLS` array in the codebase (or consult your available tools) to see what built-in capabilities you can leverage.
25
+
58
26
 
59
27
  ## Expression Syntax
60
28
  - `${{ inputs.name }}`
@@ -64,6 +32,7 @@ Markdown files with YAML frontmatter:
64
32
  - `${{ item }}` (current item in a `foreach` loop)
65
33
  - `${{ secrets.NAME }}` (access redacted secrets)
66
34
  - `${{ env.NAME }}` (access environment variables)
35
+ - `${{ memory.key }}` (tool-driven memory updates)
67
36
  - Standard JS-like expressions: `${{ steps.count > 0 ? 'yes' : 'no' }}`
68
37
 
69
38
  # Guidelines
@@ -1,3 +1,4 @@
1
1
  ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
2
3
  name: my-agent
3
4
  ---
@@ -1,4 +1,5 @@
1
1
  ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
2
3
  name: software-engineer
3
4
  description: "Expert at writing and debugging code"
4
5
  model: gpt-4o
@@ -1,4 +1,5 @@
1
1
  ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
2
3
  name: summarizer
3
4
  description: "Summarizes text content"
4
5
  model: gpt-4o
@@ -1,4 +1,5 @@
1
1
  ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
2
3
  name: test-agent
3
4
  model: gpt-4
4
5
  tools:
@@ -1,4 +1,5 @@
1
1
  ---
2
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/agent.json
2
3
  name: tester
3
4
  description: "Expert at writing and running tests for Keystone CLI"
4
5
  model: gpt-4o
@@ -1,3 +1,4 @@
1
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/workflow.json
1
2
  name: basic-inputs
2
3
  description: "A simple workflow that greets a user with optional repetition"
3
4
  inputs:
@@ -13,6 +14,7 @@ inputs:
13
14
  steps:
14
15
  - id: hello
15
16
  type: shell
17
+ allowInsecure: true
16
18
  run: |
17
19
  for i in $(seq 1 ${{ inputs.count }}); do
18
20
  echo "Hello, ${{ escape(inputs.user_name) }}! (Attempt $i)"
@@ -1,3 +1,4 @@
1
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/workflow.json
1
2
  name: basic-shell
2
3
  description: "A simple example workflow demonstrating basic features"
3
4
 
@@ -17,4 +18,4 @@ steps:
17
18
  - id: print_message
18
19
  type: shell
19
20
  needs: [create_message]
20
- run: echo "Generated message - ${{ steps.create_message.output }}"
21
+ run: echo "Generated message - ${{ escape(steps.create_message.output) }}"
@@ -1,3 +1,4 @@
1
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/workflow.json
1
2
  name: full-feature-demo
2
3
  description: "A comprehensive workflow demonstrating multiple feature types"
3
4
 
@@ -41,6 +42,7 @@ steps:
41
42
  - id: count_files
42
43
  type: shell
43
44
  needs: [write_file]
45
+ allowInsecure: true
44
46
  run: ls ./tmp/keystone-*.txt | wc -l
45
47
  transform: parseInt(stdout.trim())
46
48
 
@@ -1,3 +1,4 @@
1
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/workflow.json
1
2
  name: stop-watch
2
3
  description: "A simple stopwatch workflow"
3
4
  steps:
@@ -1,3 +1,4 @@
1
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/workflow.json
1
2
  name: nested-rollback-child
2
3
  description: Child workflow with a side effect and compensation
3
4
 
@@ -1,3 +1,4 @@
1
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/workflow.json
1
2
  name: cleanup-finally
2
3
  description: "Test the finally block"
3
4
 
@@ -1,3 +1,4 @@
1
+ $schema: https://raw.githubusercontent.com/mhingston/keystone-cli/main/schemas/workflow.json
1
2
  name: fan-out-fan-in-example
2
3
  description: Demonstrates dynamic join conditions and nested compensations
3
4
 
@@ -17,6 +18,7 @@ steps:
17
18
 
18
19
  - id: parallel_1
19
20
  type: shell
21
+ allowInsecure: true
20
22
  run: sleep 2 && echo "Parallel 1 done"
21
23
  needs: [prepare]
22
24
  compensate:
@@ -26,6 +28,7 @@ steps:
26
28
 
27
29
  - id: parallel_2
28
30
  type: shell
31
+ allowInsecure: true
29
32
  run: |
30
33
  echo "Parallel 2 failing intentionally..."
31
34
  exit 1