keystone-cli 0.7.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +486 -54
  2. package/package.json +8 -2
  3. package/src/__fixtures__/index.ts +100 -0
  4. package/src/cli.ts +841 -91
  5. package/src/db/memory-db.ts +35 -1
  6. package/src/db/workflow-db.test.ts +24 -0
  7. package/src/db/workflow-db.ts +484 -14
  8. package/src/expression/evaluator.ts +68 -4
  9. package/src/parser/agent-parser.ts +6 -3
  10. package/src/parser/config-schema.ts +38 -2
  11. package/src/parser/schema.ts +192 -7
  12. package/src/parser/test-schema.ts +29 -0
  13. package/src/parser/workflow-parser.test.ts +54 -0
  14. package/src/parser/workflow-parser.ts +153 -7
  15. package/src/runner/aggregate-error.test.ts +57 -0
  16. package/src/runner/aggregate-error.ts +46 -0
  17. package/src/runner/audit-verification.test.ts +2 -2
  18. package/src/runner/auto-heal.test.ts +1 -1
  19. package/src/runner/blueprint-executor.test.ts +63 -0
  20. package/src/runner/blueprint-executor.ts +157 -0
  21. package/src/runner/concurrency-limit.test.ts +82 -0
  22. package/src/runner/debug-repl.ts +18 -3
  23. package/src/runner/durable-timers.test.ts +200 -0
  24. package/src/runner/engine-executor.test.ts +464 -0
  25. package/src/runner/engine-executor.ts +491 -0
  26. package/src/runner/foreach-executor.ts +30 -12
  27. package/src/runner/llm-adapter.test.ts +282 -5
  28. package/src/runner/llm-adapter.ts +581 -8
  29. package/src/runner/llm-clarification.test.ts +79 -21
  30. package/src/runner/llm-errors.ts +83 -0
  31. package/src/runner/llm-executor.test.ts +258 -219
  32. package/src/runner/llm-executor.ts +226 -29
  33. package/src/runner/mcp-client.ts +70 -3
  34. package/src/runner/mcp-manager.test.ts +52 -52
  35. package/src/runner/mcp-manager.ts +12 -5
  36. package/src/runner/mcp-server.test.ts +117 -78
  37. package/src/runner/mcp-server.ts +13 -4
  38. package/src/runner/optimization-runner.ts +48 -31
  39. package/src/runner/reflexion.test.ts +1 -1
  40. package/src/runner/resource-pool.test.ts +113 -0
  41. package/src/runner/resource-pool.ts +164 -0
  42. package/src/runner/shell-executor.ts +130 -32
  43. package/src/runner/standard-tools-execution.test.ts +39 -0
  44. package/src/runner/standard-tools-integration.test.ts +36 -36
  45. package/src/runner/standard-tools.test.ts +18 -0
  46. package/src/runner/standard-tools.ts +174 -93
  47. package/src/runner/step-executor.test.ts +176 -16
  48. package/src/runner/step-executor.ts +534 -83
  49. package/src/runner/stream-utils.test.ts +14 -0
  50. package/src/runner/subflow-outputs.test.ts +103 -0
  51. package/src/runner/test-harness.ts +161 -0
  52. package/src/runner/tool-integration.test.ts +73 -79
  53. package/src/runner/workflow-runner.test.ts +549 -15
  54. package/src/runner/workflow-runner.ts +1448 -79
  55. package/src/runner/workflow-subflows.test.ts +255 -0
  56. package/src/templates/agents/keystone-architect.md +17 -12
  57. package/src/templates/agents/tester.md +21 -0
  58. package/src/templates/child-rollback.yaml +11 -0
  59. package/src/templates/decompose-implement.yaml +53 -0
  60. package/src/templates/decompose-problem.yaml +159 -0
  61. package/src/templates/decompose-research.yaml +52 -0
  62. package/src/templates/decompose-review.yaml +51 -0
  63. package/src/templates/dev.yaml +134 -0
  64. package/src/templates/engine-example.yaml +33 -0
  65. package/src/templates/fan-out-fan-in.yaml +61 -0
  66. package/src/templates/memory-service.yaml +1 -1
  67. package/src/templates/parent-rollback.yaml +16 -0
  68. package/src/templates/robust-automation.yaml +1 -1
  69. package/src/templates/scaffold-feature.yaml +29 -27
  70. package/src/templates/scaffold-generate.yaml +41 -0
  71. package/src/templates/scaffold-plan.yaml +53 -0
  72. package/src/types/status.ts +3 -0
  73. package/src/ui/dashboard.tsx +4 -3
  74. package/src/utils/assets.macro.ts +36 -0
  75. package/src/utils/auth-manager.ts +585 -8
  76. package/src/utils/blueprint-utils.test.ts +49 -0
  77. package/src/utils/blueprint-utils.ts +80 -0
  78. package/src/utils/circuit-breaker.test.ts +177 -0
  79. package/src/utils/circuit-breaker.ts +160 -0
  80. package/src/utils/config-loader.test.ts +100 -13
  81. package/src/utils/config-loader.ts +44 -17
  82. package/src/utils/constants.ts +62 -0
  83. package/src/utils/error-renderer.test.ts +267 -0
  84. package/src/utils/error-renderer.ts +320 -0
  85. package/src/utils/json-parser.test.ts +4 -0
  86. package/src/utils/json-parser.ts +18 -1
  87. package/src/utils/mermaid.ts +4 -0
  88. package/src/utils/paths.test.ts +46 -0
  89. package/src/utils/paths.ts +70 -0
  90. package/src/utils/process-sandbox.test.ts +128 -0
  91. package/src/utils/process-sandbox.ts +293 -0
  92. package/src/utils/rate-limiter.test.ts +143 -0
  93. package/src/utils/rate-limiter.ts +221 -0
  94. package/src/utils/redactor.test.ts +23 -15
  95. package/src/utils/redactor.ts +65 -25
  96. package/src/utils/resource-loader.test.ts +54 -0
  97. package/src/utils/resource-loader.ts +158 -0
  98. package/src/utils/sandbox.test.ts +69 -4
  99. package/src/utils/sandbox.ts +69 -6
  100. package/src/utils/schema-validator.ts +65 -0
  101. package/src/utils/workflow-registry.test.ts +57 -0
  102. package/src/utils/workflow-registry.ts +45 -25
  103. /package/src/expression/{evaluator.audit.test.ts → evaluator-audit.test.ts} +0 -0
  104. /package/src/runner/{mcp-client.audit.test.ts → mcp-client-audit.test.ts} +0 -0
package/README.md CHANGED
@@ -14,6 +14,26 @@ Keystone allows you to define complex automation workflows using a simple YAML s
14
14
 
15
15
  ---
16
16
 
17
+ ## 📚 Table of Contents
18
+
19
+ - [Features](#features)
20
+ - [Installation](#installation)
21
+ - [Quick Start](#quick-start)
22
+ - [Bundled Workflows](#bundled-workflows)
23
+ - [Configuration](#configuration)
24
+ - [Workflow Example](#workflow-example)
25
+ - [Expression Syntax](#expression-syntax)
26
+ - [Step Types](#step-types)
27
+ - [Advanced Features](#advanced-features)
28
+ - [Agent Definitions](#agent-definitions)
29
+ - [CLI Commands](#cli-commands)
30
+ - [Security](#security)
31
+ - [Architecture](#architecture)
32
+ - [Project Structure](#project-structure)
33
+ - [License](#license)
34
+
35
+ ---
36
+
17
37
  ## ✨ Features
18
38
 
19
39
  - ⚡ **Local-First:** Built on Bun with a local SQLite database for state management.
@@ -25,8 +45,8 @@ Keystone allows you to define complex automation workflows using a simple YAML s
25
45
  - 🛠️ **Extensible:** Support for shell, file, HTTP request, LLM, and sub-workflow steps.
26
46
  - 🔌 **MCP Support:** Integrated Model Context Protocol server.
27
47
  - 🛡️ **Secret Redaction:** Automatically redacts environment variables and secrets from logs and outputs.
28
- - 🧠 **Semantic Memory:** Store and retrieve step outputs using vector embeddings/RAG.
29
- - 🎯 **Prompt Optimization:** Automatically optimize prompts using iterative evaluation (DSPy-style).
48
+ - 🧠 **Semantic Memory:** Store/search text with vector embeddings (and auto-index via `learn`).
49
+ - 🎯 **Prompt Optimization:** Iteratively optimize prompts via `keystone optimize` + workflow `eval`.
30
50
 
31
51
  ---
32
52
 
@@ -74,7 +94,7 @@ source <(keystone completion bash)
74
94
  ```bash
75
95
  keystone init
76
96
  ```
77
- This creates the `.keystone/` directory for configuration and seeds `.keystone/workflows/` with default automation files and agents (like `scaffold-feature` and `keystone-architect`).
97
+ This creates the `.keystone/` directory for configuration and seeds `.keystone/workflows/` plus `.keystone/workflows/agents/` with bundled workflows and agents (see "Bundled Workflows" below).
78
98
 
79
99
  ### 2. Configure your Environment
80
100
  Add your API keys to the generated `.env` file:
@@ -86,11 +106,19 @@ Alternatively, you can use the built-in authentication management:
86
106
  ```bash
87
107
  keystone auth login openai
88
108
  keystone auth login anthropic
109
+ keystone auth login anthropic-claude
110
+ keystone auth login openai-chatgpt
111
+ keystone auth login gemini
112
+ keystone auth login github
89
113
  ```
114
+ Use `anthropic-claude` for Claude Pro/Max subscriptions (OAuth) instead of an API key.
115
+ Use `openai-chatgpt` for ChatGPT Plus/Pro subscriptions (OAuth) instead of an API key.
116
+ Use `gemini` (alias `google-gemini`) for Google Gemini subscriptions (OAuth) instead of an API key.
117
+ Use `github` to authenticate GitHub Copilot via the GitHub device flow.
90
118
 
91
119
  ### 3. Run a Workflow
92
120
  ```bash
93
- keystone run basic-shell
121
+ keystone run scaffold-feature
94
122
  ```
95
123
  Keystone automatically looks in `.keystone/workflows/` (locally and in your home directory) for `.yaml` or `.yml` files.
96
124
 
@@ -101,9 +129,39 @@ keystone ui
101
129
 
102
130
  ---
103
131
 
132
+ ## 🧰 Bundled Workflows
133
+
134
+ `keystone init` seeds these workflows under `.keystone/workflows/` (and the agents they rely on under `.keystone/workflows/agents/`):
135
+
136
+ - `scaffold-feature`: Interactive workflow scaffolder. Prompts for requirements, plans files, generates content, and writes them.
137
+ - `scaffold-plan`: Generates a file plan from `requirements` input.
138
+ - `scaffold-generate`: Generates file contents from `requirements` plus a `files` plan.
139
+ - `decompose-problem`: Decomposes a problem into research/implementation/review tasks, waits for approval, runs sub-workflows, and summarizes.
140
+ - `decompose-research`: Runs a single research task (`task`) with optional `context`/`constraints`.
141
+ - `decompose-implement`: Runs a single implementation task (`task`) with optional `research` findings.
142
+ - `decompose-review`: Reviews a single implementation task (`task`) with optional `implementation` results.
143
+ - `dev`: Self-bootstrapping DevMode workflow for an interactive plan/implement/verify loop.
144
+
145
+ Example runs:
146
+ ```bash
147
+ keystone run scaffold-feature
148
+ keystone run decompose-problem -i problem="Add caching to the API" -i context="Node/Bun service"
149
+ ```
150
+
151
+ The sub-workflows are used by the top-level workflows, but can be run directly if you want just one phase.
152
+
153
+ ---
154
+
104
155
  ## ⚙️ Configuration
105
156
 
106
- Keystone uses a local configuration file at `.keystone/config.yaml` to manage model providers and model mappings.
157
+ Keystone loads configuration from project `.keystone/config.yaml` (and user-level config; see `keystone config show` for search order) to manage model providers and model mappings.
158
+
159
+ Search order (highest precedence first):
160
+ - `KEYSTONE_CONFIG`
161
+ - `.keystone/config.yaml` or `.keystone/config.yml`
162
+ - `$XDG_CONFIG_HOME/keystone/config.yaml` or `~/.config/keystone/config.yaml` (and `.yml`)
163
+
164
+ Global state (when enabled) is stored at `$XDG_DATA_HOME/keystone/state.db` or `~/.local/share/keystone/state.db`.
107
165
 
108
166
  ```yaml
109
167
  default_provider: openai
@@ -114,11 +172,23 @@ providers:
114
172
  base_url: https://api.openai.com/v1
115
173
  api_key_env: OPENAI_API_KEY
116
174
  default_model: gpt-4o
175
+ openai-chatgpt:
176
+ type: openai-chatgpt
177
+ base_url: https://api.openai.com/v1
178
+ default_model: gpt-5-codex
117
179
  anthropic:
118
180
  type: anthropic
119
181
  base_url: https://api.anthropic.com/v1
120
182
  api_key_env: ANTHROPIC_API_KEY
121
183
  default_model: claude-3-5-sonnet-20240620
184
+ anthropic-claude:
185
+ type: anthropic-claude
186
+ base_url: https://api.anthropic.com/v1
187
+ default_model: claude-3-5-sonnet-20240620
188
+ google-gemini:
189
+ type: google-gemini
190
+ base_url: https://cloudcode-pa.googleapis.com
191
+ default_model: gemini-3-pro-high
122
192
  groq:
123
193
  type: openai
124
194
  base_url: https://api.groq.com/openai/v1
@@ -126,8 +196,11 @@ providers:
126
196
  default_model: llama-3.3-70b-versatile
127
197
 
128
198
  model_mappings:
199
+ "gpt-5*": openai-chatgpt
129
200
  "gpt-*": openai
201
+ "claude-4*": anthropic-claude
130
202
  "claude-*": anthropic
203
+ "gemini-*": google-gemini
131
204
  "o1-*": openai
132
205
  "llama-*": groq
133
206
 
@@ -141,11 +214,21 @@ mcp_servers:
141
214
  env:
142
215
  GITHUB_PERSONAL_ACCESS_TOKEN: "your-github-pat" # Or omit if GITHUB_TOKEN is in your .env
143
216
 
144
- storage:
217
+ engines:
218
+ allowlist:
219
+ codex:
220
+ command: codex
221
+ version: "1.2.3"
222
+ versionArgs: ["--version"]
223
+ denylist: ["bash", "sh"]
145
224
 
225
+ storage:
146
226
  retention_days: 30
227
+ redact_secrets_at_rest: true
147
228
  ```
148
229
 
230
+ `storage.retention_days` sets the default window used by `keystone maintenance` / `keystone prune`. `storage.redact_secrets_at_rest` controls whether secret inputs and known secrets are redacted before storing run data (default `true`).
231
+
149
232
  ### Model & Provider Resolution
150
233
 
151
234
  Keystone resolves which provider to use for a model in the following order:
@@ -196,6 +279,36 @@ providers:
196
279
 
197
280
  Authentication tokens for Copilot are managed automatically after the initial login.
198
281
 
282
+ ### OpenAI ChatGPT Plus/Pro (OAuth)
283
+
284
+ Keystone supports using your ChatGPT Plus/Pro subscription (OAuth) instead of an API key:
285
+
286
+ ```bash
287
+ keystone auth login openai-chatgpt
288
+ ```
289
+
290
+ Then map models to the `openai-chatgpt` provider in your config.
291
+
292
+ ### Anthropic Claude Pro/Max (OAuth)
293
+
294
+ Keystone supports using your Claude Pro/Max subscription (OAuth) instead of an API key:
295
+
296
+ ```bash
297
+ keystone auth login anthropic-claude
298
+ ```
299
+
300
+ Then map models to the `anthropic-claude` provider in your config. This flow uses the Claude web auth code and refreshes tokens automatically.
301
+
302
+ ### Google Gemini (OAuth)
303
+
304
+ Keystone supports using your Google Gemini subscription (OAuth) instead of an API key:
305
+
306
+ ```bash
307
+ keystone auth login gemini
308
+ ```
309
+
310
+ Then map models to the `google-gemini` provider in your config.
311
+
199
312
  ### API Key Management
200
313
 
201
314
  For other providers, you can either store API keys in a `.env` file in your project root:
@@ -260,6 +373,10 @@ finally:
260
373
  type: shell
261
374
  run: echo "Workflow finished"
262
375
 
376
+ outputs:
377
+ slack_message: ${{ steps.notify.output }}
378
+ ```
379
+
263
380
  ### Expression Syntax
264
381
 
265
382
  Keystone uses `${{ }}` syntax for dynamic values. Expressions are evaluated using a safe AST parser.
@@ -271,15 +388,12 @@ Keystone uses `${{ }}` syntax for dynamic values. Expressions are evaluated usin
271
388
  - `${{ item }}`: Access the current item in a `foreach` loop.
272
389
  - `${{ args.name }}`: Access tool arguments (available ONLY inside agent tool execution steps).
273
390
  - `${{ secrets.NAME }}`: Access redacted secrets.
274
- - `${{ env.NAME }}`: Access environment variables.
275
-
276
- Standard JavaScript-like expressions are supported: `${{ steps.build.status == 'success' ? '🚀' : '❌' }}`.
391
+ - `${{ env.NAME }}`: Access environment variables (process env merged with workflow-level `env`).
392
+ Workflow-level `env` is evaluated per step; if an expression cannot be resolved yet, the variable is skipped with a warning.
277
393
 
278
- ---
394
+ Inputs support `values` for enums and `secret: true` for sensitive values (redacted in logs and at rest by default; resumptions may require re-entry).
279
395
 
280
- outputs:
281
- slack_message: ${{ steps.notify.output }}
282
- ```
396
+ Standard JavaScript-like expressions are supported: `${{ steps.build.status == 'success' ? '🚀' : '❌' }}`.
283
397
 
284
398
  ---
285
399
 
@@ -287,27 +401,148 @@ outputs:
287
401
 
288
402
  Keystone supports several specialized step types:
289
403
 
404
+ - Any step can optionally define `inputSchema` and/or `outputSchema` (JSON Schema) to validate evaluated inputs before execution and outputs after completion.
405
+
290
406
  - `shell`: Run arbitrary shell commands.
291
- - `llm`: Prompt an agent and get structured or unstructured responses. Supports `schema` (JSON Schema) for structured output.
407
+ - `llm`: Prompt an agent and get structured or unstructured responses. Supports `outputSchema` (JSON Schema) for structured output.
292
408
  - `allowClarification`: Boolean (default `false`). If `true`, allows the LLM to ask clarifying questions back to the user or suspend the workflow if no human is available.
293
409
  - `maxIterations`: Number (default `10`). Maximum number of tool-calling loops allowed for the agent.
294
410
  - `allowInsecure`: Boolean (default `false`). Set `true` to allow risky tool execution.
295
411
  - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow tools to access files outside of the current working directory.
412
+ - `handoff`: Optional engine tool definition that lets the LLM delegate work to an allowlisted external CLI with structured inputs.
296
413
  - `request`: Make HTTP requests (GET, POST, etc.).
414
+ - `allowInsecure`: Boolean (default `false`). If `true`, skips SSRF protections and allows non-HTTPS/local URLs.
415
+ - Cross-origin redirects are blocked for non-GET/HEAD requests unless `allowInsecure: true`; on cross-origin redirects, non-essential headers are stripped.
297
416
  - `file`: Read, write, or append to files.
298
417
  - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow reading/writing files outside of the current working directory.
299
418
  - `human`: Pause execution for manual confirmation or text input.
300
419
  - `inputType: confirm`: Simple Enter-to-continue prompt.
301
420
  - `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
302
421
  - `workflow`: Trigger another workflow as a sub-step.
303
- - `script`: Run arbitrary JavaScript in a sandbox. On Bun, uses `node:vm` (since `isolated-vm` requires V8).
304
- - ⚠️ **Security Note:** The `node:vm` sandbox is not secure against malicious code. Only run scripts from trusted sources.
422
+ - `outputMapping`: Map sub-workflow outputs to step outputs.
423
+ ```yaml
424
+ outputMapping:
425
+ final_result: result_from_subflow
426
+ status: state
427
+ ```
428
+ - `join`: Aggregate outputs from dependencies and enforce a completion condition.
429
+ - `target`: `'steps'` (default) or `'branches'` (for foreach).
430
+ - `condition`: `'all'` (default), `'any'`, or a number.
431
+ - `blueprint`: Generate a structured system blueprint with an agent (persisted as an artifact).
432
+ - `script`: Run JavaScript in a sandboxed subprocess. Requires `allowInsecure: true`.
305
433
  - `sleep`: Pause execution for a specified duration.
434
+ - `durable`: Boolean (default `false`). If `true` and duration >= 60s, the wait is persisted and can resume after restarts.
306
435
  - `memory`: Store or retrieve information from the semantic memory vector database.
436
+ - `engine`: Run an allowlisted external CLI and capture a structured summary.
437
+ - `env` and `cwd` are required and must be explicit.
438
+ - `input` is sent to stdin (objects/arrays are JSON-encoded).
439
+ - Summary is parsed from stdout or a file at `KEYSTONE_ENGINE_SUMMARY_PATH` and stored as an artifact.
440
+
441
+ ### Human Steps in Non-Interactive Mode
442
+ If stdin is not a TTY (CI, piped input), `human` steps suspend. Resume by providing an answer via inputs using the step id and `__answer`:
443
+
444
+ ```bash
445
+ keystone run my-workflow --resume -i approve='{"__answer":true}'
446
+ keystone resume <run_id> -i ask='{"__answer":"hello"}'
447
+ ```
448
+
449
+ Human steps remain suspended until they receive an answer; the scheduler only resumes sleep timers.
450
+
451
+ ### Durable Sleeps and Scheduler
452
+ For long waits, set `durable: true` on `sleep` steps (>=60s) to persist across restarts:
307
453
 
308
- All steps support common features like `needs` (dependencies), `if` (conditionals), `retry`, `timeout`, `foreach` (parallel iteration), `concurrency` (max parallel items for foreach), `transform` (post-process output using expressions), `learn` (auto-index for few-shot), and `reflexion` (self-correction loop).
454
+ ```yaml
455
+ - id: wait_for_window
456
+ type: sleep
457
+ duration: 900000 # 15 minutes
458
+ durable: true
459
+ ```
309
460
 
310
- Workflows also support a top-level `concurrency` field to limit how many steps can run in parallel across the entire workflow. This must be a positive integer.
461
+ Run the scheduler to resume runs when timers elapse:
462
+
463
+ ```bash
464
+ keystone scheduler --interval 30
465
+ ```
466
+
467
+ All steps support common features:
468
+ - `needs`: Array of step IDs this step depends on.
469
+ - `if`: Conditional expression.
470
+ - `retry`: `{ count, backoff: 'linear'|'exponential', baseDelay }`.
471
+ - `timeout`: Maximum execution time in milliseconds.
472
+ - `foreach`: Iterate over an array in parallel.
473
+ - `concurrency`: Limit parallel items for `foreach` (must be a positive integer).
474
+ - `pool`: Assign step to a resource pool.
475
+ - `compensate`: Step to run if the workflow rolls back.
476
+ - `transform`: Post-process output using expressions.
477
+ - `learn`: Auto-index for few-shot.
478
+ - `reflexion`: Self-correction loop.
479
+ - `auto_heal`: LLM-powered automatic error recovery (alias: `autoHeal`).
480
+ - `inputSchema` / `outputSchema`: JSON Schema validation.
481
+ - `outputRetries`: Max retries for output validation failures.
482
+ - `repairStrategy`: Strategy for output repair (`reask`, `repair`, `hybrid`).
483
+
484
+ Workflows also support a top-level `concurrency` field to limit how many steps can run in parallel across the entire workflow. This must resolve to a positive integer (number or expression).
485
+
486
+ ### Engine Steps
487
+ Engine steps run allowlisted external CLIs and capture a structured summary for safe chaining.
488
+
489
+ **Configuration (`.keystone/config.yaml`)**
490
+ ```yaml
491
+ engines:
492
+ allowlist:
493
+ codex:
494
+ command: codex
495
+ version: "1.2.3"
496
+ versionArgs: ["--version"]
497
+ ```
498
+
499
+ **Workflow example**
500
+ ```yaml
501
+ - id: run_engine
502
+ type: engine
503
+ command: codex
504
+ args: ["run"]
505
+ cwd: .
506
+ env:
507
+ PATH: ${{ env.PATH }}
508
+ input:
509
+ task: "Summarize the repository"
510
+ outputSchema:
511
+ type: object
512
+ properties:
513
+ summary: { type: string }
514
+ required: [summary]
515
+ ```
516
+
517
+ The engine can optionally write a summary file to `KEYSTONE_ENGINE_SUMMARY_PATH`. Otherwise, Keystone attempts to parse JSON/YAML from stdout and stores the summary as an artifact.
518
+
519
+ ### LLM Handoff to Engine
520
+ Use `handoff` to expose an engine tool to the LLM with structured inputs:
521
+
522
+ ```yaml
523
+ - id: delegate
524
+ type: llm
525
+ agent: planner
526
+ prompt: "Decide what to run and delegate to the engine."
527
+ handoff:
528
+ name: run_engine
529
+ inputSchema:
530
+ type: object
531
+ properties:
532
+ task: { type: string }
533
+ required: [task]
534
+ engine:
535
+ command: codex
536
+ args: ["run"]
537
+ cwd: .
538
+ env:
539
+ PATH: ${{ env.PATH }}
540
+ outputSchema:
541
+ type: object
542
+ properties:
543
+ summary: { type: string }
544
+ required: [summary]
545
+ ```
311
546
 
312
547
  ### Self-Healing Steps
313
548
  Steps can be configured to automatically recover from failures using an LLM agent.
@@ -337,6 +572,7 @@ When a step fails, the specified agent is invoked with the error details. The ag
337
572
  foreach: ${{ steps.list_files.output }}
338
573
  concurrency: 5 # Process 5 files at a time (must be a positive integer)
339
574
  run: echo "Processing ${{ item }}"
575
+ ```
340
576
 
341
577
  #### Example: Script Step
342
578
  ```yaml
@@ -344,11 +580,139 @@ When a step fails, the specified agent is invoked with the error details. The ag
344
580
  type: script
345
581
  allowInsecure: true
346
582
  run: |
347
- const data = context.steps.fetch_data.output;
583
+ const data = steps.fetch_data.output;
348
584
  return data.map(i => i.value * 2).reduce((a, b) => a + b, 0);
349
585
  ```
586
+
587
+ ---
588
+
589
+ ## 🔧 Advanced Features
590
+
591
+ ### Idempotency Keys
592
+
593
+ Make retries and resume operations safe for side-effecting steps by specifying an `idempotencyKey`. When a key matches a previous successful execution, the cached result is returned instead of re-executing the step.
594
+
595
+ ```yaml
596
+ - id: charge_customer
597
+ type: request
598
+ url: https://api.stripe.com/charge
599
+ body: { amount: 100, customer: ${{ inputs.customer_id }} }
600
+ # Expression that evaluates to a unique key for this operation
601
+ idempotencyKey: '"charge-" + inputs.customer_id + "-" + inputs.order_id'
602
+ # Optional: dedupe across runs and expire after a TTL
603
+ idempotencyScope: global
604
+ idempotencyTtlSeconds: 86400
605
+ ```
606
+
607
+ If a key is already in-flight, the step fails with an in-flight error to avoid duplicate side effects. To bypass deduplication for a run, use `keystone run --no-dedup`.
608
+
609
+ Manage idempotency records via CLI:
610
+ - `keystone dedup list` - View all idempotency records
611
+ - `keystone dedup clear <run_id>` - Clear records for a specific run
612
+ - `keystone dedup clear --all` - Clear all records
613
+ - `keystone dedup prune` - Remove expired records
614
+
615
+ ### AllowFailure Pattern
616
+
617
+ Enable fail-forward steps that continue workflow execution even when they fail. Useful for agentic exploration where some attempts may naturally fail.
618
+
619
+ ```yaml
620
+ - id: try_approach_a
621
+ type: llm
622
+ agent: explorer
623
+ prompt: "Try approach A to solve the problem"
624
+ allowFailure: true # Workflow continues if this fails
625
+
626
+ - id: analyze_results
627
+ type: llm
628
+ agent: analyst
629
+ prompt: |
630
+ Approach A status: ${{ steps.try_approach_a.status }}
631
+ Error (if any): ${{ steps.try_approach_a.error }}
632
+ Output: ${{ steps.try_approach_a.output }}
350
633
  ```
351
634
 
635
+ The step's `status` will be `'success'` even when it fails internally, but the `error` field will contain the failure details.
636
+
637
+ ### Global Errors Block
638
+
639
+ Define workflow-level error handling that runs when a step exhausts retries. Access failure context via `last_failed_step`.
640
+
641
+ ```yaml
642
+ name: resilient-workflow
643
+ steps:
644
+ - id: critical_step
645
+ type: shell
646
+ run: exit 1
647
+ retry: { count: 2, backoff: exponential }
648
+
649
+ errors:
650
+ - id: analyze_failure
651
+ type: llm
652
+ agent: debugger
653
+ prompt: |
654
+ Step ${{ last_failed_step.id }} failed with:
655
+ Error: ${{ last_failed_step.error }}
656
+ Suggest remediation steps.
657
+ ```
658
+
659
+ The errors block runs after all retries/auto_heal are exhausted and before the `finally` block.
660
+
661
+ ### Input Enums and Secrets
662
+
663
+ Constrain input values and mark sensitive data for automatic redaction.
664
+
665
+ ```yaml
666
+ inputs:
667
+ environment:
668
+ type: string
669
+ values: [dev, staging, prod] # Only these values allowed
670
+ default: dev
671
+ api_key:
672
+ type: string
673
+ secret: true # Redacted in logs and at rest
674
+ ```
675
+
676
+ Schema validation errors include path-level details and are surfaced before/after step execution.
677
+
678
+ ### Resource Pools
679
+
680
+ Manage concurrency for external resources (like APIs or databases) across a workflow using `pools`.
681
+
682
+ ```yaml
683
+ name: rate-limited-workflow
684
+ pools:
685
+ api_pool: 2 # Limit to 2 concurrent steps using this pool
686
+
687
+ steps:
688
+ - id: step1
689
+ type: request
690
+ url: ...
691
+ pool: api_pool
692
+
693
+ - id: step2
694
+ type: request
695
+ url: ...
696
+ pool: api_pool
697
+ ```
698
+
699
+ ### Compensations (Rollback)
700
+
701
+ Define "undo" actions for steps that have side effects. Compensations run in reverse order (LIFO) if a workflow fails or is cancelled.
702
+
703
+ ```yaml
704
+ - id: create_user
705
+ type: request
706
+ url: https://api.example.com/users
707
+ compensate:
708
+ id: delete_user
709
+ type: request
710
+ url: https://api.example.com/users/${{ steps.create_user.outputs.id }}
711
+ method: DELETE
712
+ ```
713
+
714
+ You can also define a workflow-level `compensate` step to handle overall cleanup.
715
+
352
716
  ---
353
717
 
354
718
  ## 🤖 Agent Definitions
@@ -380,7 +744,34 @@ Keystone comes with a set of **Standard Tools** that can be enabled for any agen
380
744
  - `list_files`: List files in a directory (arguments: `path`)
381
745
  - `search_files`: Search for files by glob pattern (arguments: `pattern`, `dir`)
382
746
  - `search_content`: Search for string or regex within files (arguments: `query`, `dir`, `pattern`)
383
- - `run_command`: Run a shell command (arguments: `command`, `dir`). Requires `allowInsecure: true` on the step unless whitelisted.
747
+ - `run_command`: Run a shell command (arguments: `command`, `dir`). Risky commands require `allowInsecure: true` on the LLM step.
748
+
749
+ #### Standard Tool Examples
750
+
751
+ Agents can use these tools to interact with their environment. Here is how they appear when used by an agent:
752
+
753
+ **Read File:**
754
+ ```yaml
755
+ - name: read_file
756
+ arguments:
757
+ path: "src/utils/logger.ts"
758
+ ```
759
+
760
+ **Write File:**
761
+ ```yaml
762
+ - name: write_file
763
+ arguments:
764
+ path: "new_file.txt"
765
+ content: "Hello from Keystone!"
766
+ ```
767
+
768
+ **Run Command:**
769
+ ```yaml
770
+ - name: run_command
771
+ arguments:
772
+ command: "ls -la"
773
+ dir: "."
774
+ ```
384
775
 
385
776
  Tool arguments are passed to the tool's execution step via the `args` variable.
386
777
 
@@ -487,54 +878,95 @@ In these examples, the agent will have access to all tools provided by the MCP s
487
878
  | Command | Description |
488
879
  | :--- | :--- |
489
880
  | `init` | Initialize a new Keystone project |
490
- | `run <workflow>` | Execute a workflow (use `-i key=val` for inputs, `--dry-run` to test, `--debug` for REPL) |
491
- | `optimize <workflow>` | Optimize a specific step in a workflow (requires --target) |
492
- | `resume <run_id>` | Resume a failed or paused workflow |
881
+ | `run <workflow>` | Execute a workflow (use `-i key=val`, `--resume` to auto-resume, `--dry-run`, `--debug`, `--no-dedup`, `--explain`) |
882
+ | `resume <run_id>` | Resume a failed/paused/crashed workflow by ID (use `-i key=val` to answer human steps) |
493
883
  | `validate [path]` | Check workflow files for errors |
494
884
  | `workflows` | List available workflows |
495
885
  | `history` | Show recent workflow runs |
496
886
  | `logs <run_id>` | View logs, outputs, and errors for a specific run (`-v` for full output) |
497
887
  | `graph <workflow>` | Generate a Mermaid diagram of the workflow |
498
- | `config` | Show current configuration and providers |
888
+ | `test [path]` | Run workflow tests with fixtures and snapshots |
889
+ | `optimize <workflow>` | Optimize a specific step in a workflow (requires --target and workflow `eval`) |
890
+ | `compile` | Compile a project into a single executable with embedded assets |
891
+ | `dev <task>` | Run the self-bootstrapping DevMode workflow |
892
+ | `manifest` | Show embedded assets manifest |
893
+ | `config show` | Show current configuration and discovery paths (alias: `list`) |
499
894
  | `auth status [provider]` | Show authentication status |
500
- | `auth login [provider]` | Login to an authentication provider (github, openai, anthropic) |
895
+ | `auth login [provider]` | Login to an authentication provider (github, openai, anthropic, openai-chatgpt, anthropic-claude, gemini/google-gemini) |
501
896
  | `auth logout [provider]` | Logout and clear authentication tokens |
502
897
  | `ui` | Open the interactive TUI dashboard |
503
898
  | `mcp start` | Start the Keystone MCP server |
504
899
  | `mcp login <server>` | Login to a remote MCP server |
900
+ | `scheduler` | Run the durable timer scheduler to resume sleep timers |
901
+ | `timers list` | List durable timers |
902
+ | `timers clear` | Clear durable timers by run ID or `--all` |
903
+ | `dedup list [run_id]` | List idempotency records (optionally filter by run) |
904
+ | `dedup clear <target>` | Clear idempotency records by run ID or `--all` |
905
+ | `dedup prune` | Remove expired idempotency records |
505
906
  | `completion [shell]` | Generate shell completion script (zsh, bash) |
506
907
  | `maintenance [--days N]` | Perform database maintenance (prune old runs and vacuum) |
908
+ | `prune [--days N]` | Alias for `maintenance` |
507
909
 
508
910
  ---
509
-
510
- ## 🛡️ Security
511
-
512
- ### Shell Execution
513
- By default, Keystone analyzes shell commands for potentially dangerous patterns (like shell injection, `rm -rf`, piped commands). If a risk is detected:
514
- - In interactive mode, the user is prompted for confirmation.
515
- - In non-interactive mode, the step is suspended or failed.
516
-
517
- You can bypass this check if you trust the command:
518
- ```yaml
519
- - id: deploy
520
- type: shell
521
- run: ./deploy.sh ${{ inputs.env }}
522
- allowInsecure: true
523
- ```
524
-
525
- ### Expression Safety
526
- Expressions `${{ }}` are evaluated using a safe AST parser (`jsep`) which:
527
- - Prevents arbitrary code execution (no `eval` or `Function`).
528
- - Whitelists safe global objects (`Math`, `JSON`, `Date`, etc.).
529
- - Blocks access to sensitive properties (`constructor`, `__proto__`).
530
- - Enforces a maximum template length to prevent ReDoS attacks.
531
-
532
- ### Script Sandboxing
533
- The `script` step uses Node.js `vm` module. While it provides isolation for variables, it is **not a security boundary** for malicious code. Only run scripts from trusted sources.
534
-
535
- ---
536
-
537
- ## 📂 Project Structure
911
+
912
+ Input keys passed via `-i key=val` must be alphanumeric/underscore and cannot be `__proto__`, `constructor`, or `prototype`.
913
+
914
+ ### Dry Run
915
+ `keystone run --dry-run` prints shell commands without executing them and skips non-shell steps (including human prompts). Outputs from skipped steps are empty, so conditional branches may differ from a real run.
916
+
917
+ ## 🛡️ Security
918
+
919
+ ### Shell Execution
920
+ Keystone blocks shell commands that match common injection/destructive patterns (like `rm -rf /` or pipes to shells). To run them, set `allowInsecure: true` on the step. Prefer `${{ escape(...) }}` when interpolating user input.
921
+
922
+ You can bypass this check if you trust the command:
923
+ ```yaml
924
+ - id: deploy
925
+ type: shell
926
+ run: ./deploy.sh ${{ inputs.env }}
927
+ allowInsecure: true
928
+ ```
929
+
930
+ ### Expression Safety
931
+ Expressions `${{ }}` are evaluated using a safe AST parser (`jsep`) which:
932
+ - Prevents arbitrary code execution (no `eval` or `Function`).
933
+ - Whitelists safe global objects (`Math`, `JSON`, `Date`, etc.).
934
+ - Blocks access to sensitive properties (`constructor`, `__proto__`).
935
+ - Enforces a maximum template length to prevent ReDoS attacks.
936
+
937
+ ### Script Sandboxing
938
+ Script steps run in a separate subprocess by default. This reduces risk but is **not a security boundary** for malicious code. Script steps are disabled by default; set `allowInsecure: true` to run them.
939
+
940
+ ### HTTP Requests
941
+ Request steps enforce SSRF protections and require HTTPS by default. Cross-origin redirects are blocked for non-GET/HEAD requests unless `allowInsecure: true`, and non-essential headers are stripped on cross-origin redirects.
942
+
943
+ ---
944
+
945
+ ## 🏗️ Architecture
946
+
947
+ ```mermaid
948
+ graph TD
949
+ CLI[CLI Entry Point] --> WR[WorkflowRunner]
950
+ CLI --> MCP[MCP Server]
951
+ WR --> SE[Step Executor]
952
+ WR --> FE[ForeachExecutor]
953
+ WR --> DB[(WorkflowDb)]
954
+ SE --> LLM[LLM Executor]
955
+ SE --> Shell[Shell Executor]
956
+ SE --> File[File Operations]
957
+ SE --> HTTP[HTTP Requests]
958
+ SE --> Human[Human Input]
959
+ LLM --> Adapters[LLM Adapters]
960
+ Adapters --> OpenAI
961
+ Adapters --> Anthropic
962
+ Adapters --> Copilot
963
+ Adapters --> ChatGPT
964
+ LLM --> MCPClient[MCP Client]
965
+ WR --> Eval[Expression Evaluator]
966
+ WR --> Pool[Resource Pool Manager]
967
+ ```
968
+
969
+ ## 📂 Project Structure
538
970
 
539
971
  - `src/db/`: SQLite persistence layer.
540
972
  - `src/runner/`: The core execution engine, handles parallelization and retries.