keystone-cli 1.0.3 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/README.md +276 -32
  2. package/package.json +8 -4
  3. package/src/cli.ts +350 -416
  4. package/src/commands/doc.ts +31 -0
  5. package/src/commands/event.ts +29 -0
  6. package/src/commands/graph.ts +37 -0
  7. package/src/commands/index.ts +14 -0
  8. package/src/commands/init.ts +185 -0
  9. package/src/commands/run.ts +124 -0
  10. package/src/commands/schema.ts +40 -0
  11. package/src/commands/utils.ts +78 -0
  12. package/src/commands/validate.ts +111 -0
  13. package/src/db/workflow-db.test.ts +314 -0
  14. package/src/db/workflow-db.ts +810 -210
  15. package/src/expression/evaluator-audit.test.ts +4 -2
  16. package/src/expression/evaluator.test.ts +14 -1
  17. package/src/expression/evaluator.ts +166 -19
  18. package/src/parser/config-schema.ts +18 -0
  19. package/src/parser/schema.ts +153 -22
  20. package/src/parser/test-schema.ts +6 -6
  21. package/src/parser/workflow-parser.test.ts +24 -0
  22. package/src/parser/workflow-parser.ts +65 -3
  23. package/src/runner/auto-heal.test.ts +5 -6
  24. package/src/runner/blueprint-executor.test.ts +2 -2
  25. package/src/runner/debug-repl.test.ts +5 -8
  26. package/src/runner/debug-repl.ts +59 -16
  27. package/src/runner/durable-timers.test.ts +11 -2
  28. package/src/runner/engine-executor.test.ts +1 -1
  29. package/src/runner/events.ts +57 -0
  30. package/src/runner/executors/artifact-executor.ts +166 -0
  31. package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
  32. package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
  33. package/src/runner/executors/file-executor.test.ts +48 -0
  34. package/src/runner/executors/file-executor.ts +324 -0
  35. package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
  36. package/src/runner/executors/human-executor.ts +144 -0
  37. package/src/runner/executors/join-executor.ts +75 -0
  38. package/src/runner/executors/llm-executor.ts +1266 -0
  39. package/src/runner/executors/memory-executor.ts +71 -0
  40. package/src/runner/executors/plan-executor.ts +104 -0
  41. package/src/runner/executors/request-executor.ts +265 -0
  42. package/src/runner/executors/script-executor.ts +43 -0
  43. package/src/runner/executors/shell-executor.ts +403 -0
  44. package/src/runner/executors/subworkflow-executor.ts +114 -0
  45. package/src/runner/executors/types.ts +69 -0
  46. package/src/runner/executors/wait-executor.ts +59 -0
  47. package/src/runner/join-scheduling.test.ts +197 -0
  48. package/src/runner/llm-adapter-runtime.test.ts +209 -0
  49. package/src/runner/llm-adapter.test.ts +419 -24
  50. package/src/runner/llm-adapter.ts +130 -26
  51. package/src/runner/llm-clarification.test.ts +2 -1
  52. package/src/runner/llm-executor.test.ts +532 -17
  53. package/src/runner/mcp-client-audit.test.ts +1 -2
  54. package/src/runner/mcp-client.ts +136 -46
  55. package/src/runner/mcp-manager.test.ts +4 -0
  56. package/src/runner/mcp-server.test.ts +58 -0
  57. package/src/runner/mcp-server.ts +26 -0
  58. package/src/runner/memoization.test.ts +190 -0
  59. package/src/runner/optimization-runner.ts +4 -9
  60. package/src/runner/quality-gate.test.ts +69 -0
  61. package/src/runner/reflexion.test.ts +6 -17
  62. package/src/runner/resource-pool.ts +102 -14
  63. package/src/runner/services/context-builder.ts +144 -0
  64. package/src/runner/services/secret-manager.ts +105 -0
  65. package/src/runner/services/workflow-validator.ts +131 -0
  66. package/src/runner/shell-executor.test.ts +28 -4
  67. package/src/runner/standard-tools-ast.test.ts +196 -0
  68. package/src/runner/standard-tools-execution.test.ts +27 -0
  69. package/src/runner/standard-tools-integration.test.ts +6 -10
  70. package/src/runner/standard-tools.ts +339 -102
  71. package/src/runner/step-executor.test.ts +216 -4
  72. package/src/runner/step-executor.ts +69 -941
  73. package/src/runner/stream-utils.ts +7 -3
  74. package/src/runner/test-harness.ts +20 -1
  75. package/src/runner/timeout.test.ts +10 -0
  76. package/src/runner/timeout.ts +11 -2
  77. package/src/runner/tool-integration.test.ts +1 -1
  78. package/src/runner/wait-step.test.ts +102 -0
  79. package/src/runner/workflow-runner.test.ts +208 -15
  80. package/src/runner/workflow-runner.ts +890 -818
  81. package/src/runner/workflow-scheduler.ts +75 -0
  82. package/src/runner/workflow-state.ts +269 -0
  83. package/src/runner/workflow-subflows.test.ts +13 -12
  84. package/src/scripts/generate-schemas.ts +16 -0
  85. package/src/templates/agents/explore.md +1 -0
  86. package/src/templates/agents/general.md +1 -0
  87. package/src/templates/agents/handoff-router.md +14 -0
  88. package/src/templates/agents/handoff-specialist.md +15 -0
  89. package/src/templates/agents/keystone-architect.md +13 -44
  90. package/src/templates/agents/my-agent.md +1 -0
  91. package/src/templates/agents/software-engineer.md +1 -0
  92. package/src/templates/agents/summarizer.md +1 -0
  93. package/src/templates/agents/test-agent.md +1 -0
  94. package/src/templates/agents/tester.md +1 -0
  95. package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
  96. package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +4 -1
  97. package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
  98. package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
  99. package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
  100. package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
  101. package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
  102. package/src/templates/control-flow/idempotency-example.yaml +30 -0
  103. package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
  104. package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
  105. package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
  106. package/src/templates/features/artifact-example.yaml +40 -0
  107. package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
  108. package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
  109. package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
  110. package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
  111. package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
  112. package/src/templates/features/script-example.yaml +28 -0
  113. package/src/templates/patterns/agent-handoff.yaml +53 -0
  114. package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
  115. package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
  116. package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +2 -1
  117. package/src/templates/patterns/composition-parent.yaml +18 -0
  118. package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
  119. package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
  120. package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
  121. package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
  122. package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
  123. package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
  124. package/src/templates/scaffolding/review-loop.yaml +97 -0
  125. package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
  126. package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
  127. package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
  128. package/src/templates/testing/invalid.yaml +6 -0
  129. package/src/ui/dashboard.tsx +191 -33
  130. package/src/utils/auth-manager.test.ts +337 -0
  131. package/src/utils/auth-manager.ts +157 -61
  132. package/src/utils/blueprint-utils.ts +4 -6
  133. package/src/utils/config-loader.test.ts +2 -0
  134. package/src/utils/config-loader.ts +12 -3
  135. package/src/utils/constants.ts +76 -0
  136. package/src/utils/container.ts +63 -0
  137. package/src/utils/context-injector.test.ts +200 -0
  138. package/src/utils/context-injector.ts +244 -0
  139. package/src/utils/doc-generator.ts +85 -0
  140. package/src/utils/env-filter.ts +45 -0
  141. package/src/utils/json-parser.test.ts +12 -0
  142. package/src/utils/json-parser.ts +30 -5
  143. package/src/utils/logger.ts +12 -1
  144. package/src/utils/mermaid.ts +4 -0
  145. package/src/utils/paths.ts +52 -1
  146. package/src/utils/process-sandbox-worker.test.ts +46 -0
  147. package/src/utils/process-sandbox.ts +227 -14
  148. package/src/utils/redactor.test.ts +11 -6
  149. package/src/utils/redactor.ts +25 -9
  150. package/src/utils/sandbox.ts +3 -0
  151. package/src/runner/llm-executor.ts +0 -638
  152. package/src/runner/shell-executor.ts +0 -366
  153. package/src/templates/composition-parent.yaml +0 -14
  154. package/src/templates/invalid.yaml +0 -5
package/README.md CHANGED
@@ -47,6 +47,7 @@ Keystone allows you to define complex automation workflows using a simple YAML s
47
47
  - 🛡️ **Secret Redaction:** Automatically redacts environment variables and secrets from logs and outputs.
48
48
  - 🧠 **Semantic Memory:** Store/search text with vector embeddings (and auto-index via `learn`).
49
49
  - 🎯 **Prompt Optimization:** Iteratively optimize prompts via `keystone optimize` + workflow `eval`.
50
+ - 📖 **Documentation Generator:** Automatically generate Markdown documentation from your workflow definitions.
50
51
 
51
52
  ---
52
53
 
@@ -137,6 +138,10 @@ Top-level workflows:
137
138
  - `scaffold-feature`: Interactive workflow scaffolder. Prompts for requirements, plans files, generates content, and writes them.
138
139
  - `decompose-problem`: Decomposes a problem into research/implementation/review tasks, waits for approval, runs sub-workflows, and summarizes.
139
140
  - `dev`: Self-bootstrapping DevMode workflow for an interactive plan/implement/verify loop.
141
+ - `agent-handoff`: Demonstrates agent handoffs and tool-driven context updates.
142
+ - `script-example`: Demonstrates sandboxed JavaScript execution.
143
+ - `artifact-example`: Demonstrates artifact upload and download between steps.
144
+ - `idempotency-example`: Demonstrates safe retries for side-effecting steps.
140
145
 
141
146
  Sub-workflows:
142
147
  - `scaffold-plan`: Generates a file plan from `requirements` input.
@@ -144,11 +149,13 @@ Sub-workflows:
144
149
  - `decompose-research`: Runs a single research task (`task`) with optional `context`/`constraints`.
145
150
  - `decompose-implement`: Runs a single implementation task (`task`) with optional `research` findings.
146
151
  - `decompose-review`: Reviews a single implementation task (`task`) with optional `implementation` results.
152
+ - `review-loop`: Reusable generate → critique → refine loop with a quality gate.
147
153
 
148
154
  Example runs:
149
155
  ```bash
150
156
  keystone run scaffold-feature
151
157
  keystone run decompose-problem -i problem="Add caching to the API" -i context="Node/Bun service"
158
+ keystone run agent-handoff -i topic="billing" -i user="Ada"
152
159
  ```
153
160
 
154
161
  Sub-workflows are used by the top-level workflows, but can be run directly if you want just one phase.
@@ -164,7 +171,7 @@ Search order (highest precedence first):
164
171
  - `.keystone/config.yaml` or `.keystone/config.yml`
165
172
  - `$XDG_CONFIG_HOME/keystone/config.yaml` or `~/.config/keystone/config.yaml` (and `.yml`)
166
173
 
167
- Global state (when enabled) is stored at `$XDG_DATA_HOME/keystone/state.db` or `~/.local/share/keystone/state.db`.
174
+ State is stored at `.keystone/state.db` by default (project-local).
168
175
 
169
176
  ```yaml
170
177
  default_provider: openai
@@ -228,10 +235,36 @@ engines:
228
235
  storage:
229
236
  retention_days: 30
230
237
  redact_secrets_at_rest: true
238
+
239
+ expression:
240
+ strict: false
231
241
  ```
232
242
 
233
243
  `storage.retention_days` sets the default window used by `keystone maintenance` / `keystone prune`. `storage.redact_secrets_at_rest` controls whether secret inputs and known secrets are redacted before storing run data (default `true`).
234
244
 
245
+ ### Context Injection (Opt-in)
246
+
247
+ Keystone can automatically inject project context files (`README.md`, `AGENTS.md`, `.cursor/rules`, `.claude/rules`) into LLM system prompts. This helps agents understand your project's conventions and guidelines.
248
+
249
+ ```yaml
250
+ features:
251
+ context_injection:
252
+ enabled: true # Opt-in feature (default: false)
253
+ search_depth: 3 # How many directories up to search (default: 3)
254
+ sources: # Which context sources to include
255
+ - readme # README.md files
256
+ - agents_md # AGENTS.md files
257
+ - cursor_rules # .cursor/rules or .claude/rules
258
+ ```
259
+
260
+ When enabled, Keystone will:
261
+ 1. Search from the workflow directory up to the project root
262
+ 2. Find the nearest `README.md` and `AGENTS.md` files
263
+ 3. Parse rules from `.cursor/rules` or `.claude/rules` directories
264
+ 4. Prepend this context to the LLM system prompt
265
+
266
+ Context is cached for 1 minute to avoid redundant file reads.
267
+
235
268
  ### Model & Provider Resolution
236
269
 
237
270
  Keystone resolves which provider to use for a model in the following order:
@@ -390,14 +423,21 @@ Keystone uses `${{ }}` syntax for dynamic values. Expressions are evaluated usin
390
423
  - `${{ steps.id.status }}`: Get the execution status of a step (`'success'`, `'failed'`, etc.).
391
424
  - `${{ item }}`: Access the current item in a `foreach` loop.
392
425
  - `${{ args.name }}`: Access tool arguments (available ONLY inside agent tool execution steps).
393
- - `${{ secrets.NAME }}`: Access redacted secrets.
426
+ - `${{ secrets.NAME }}`: Access secret values (redacted in logs and at rest).
394
427
  - `${{ env.NAME }}`: Access environment variables (process env merged with workflow-level `env`).
395
428
  Workflow-level `env` is evaluated per step; if an expression cannot be resolved yet, the variable is skipped with a warning.
429
+ - `${{ memory.key }}`: Access mutable workflow memory (populated by tools via `__keystone_context`).
396
430
 
397
431
  Inputs support `values` for enums and `secret: true` for sensitive values (redacted in logs and at rest by default; resumptions may require re-entry).
398
432
 
399
433
  Standard JavaScript-like expressions are supported: `${{ steps.build.status == 'success' ? '🚀' : '❌' }}`.
400
434
 
435
+ Strict expression mode can be enabled in `.keystone/config.yaml` to fail fast on malformed `${{ }}`:
436
+ ```yaml
437
+ expression:
438
+ strict: true
439
+ ```
440
+
401
441
  ---
402
442
 
403
443
  ## 🏗️ Step Types
@@ -409,15 +449,30 @@ Keystone supports several specialized step types:
409
449
  - `shell`: Run arbitrary shell commands.
410
450
  - `llm`: Prompt an agent and get structured or unstructured responses. Supports `outputSchema` (JSON Schema) for structured output.
411
451
  - `allowClarification`: Boolean (default `false`). If `true`, allows the LLM to ask clarifying questions back to the user or suspend the workflow if no human is available.
452
+ - `allowedHandoffs`: Optional list of agent names that can be transferred to via `transfer_to_agent`.
412
453
  - `maxIterations`: Number (default `10`). Maximum number of tool-calling loops allowed for the agent.
454
+ - `maxMessageHistory`: Number (default `50`). Max messages to retain in history before truncation/summary.
455
+ - `contextStrategy`: `'truncate'|'summary'|'auto'` (default `truncate`). Summarizes older history into a system message when limits are exceeded.
456
+ - `qualityGate`: Optional reviewer config `{ agent, prompt?, provider?, model?, maxAttempts? }`. If review fails, the step is refined and re-run.
413
457
  - `allowInsecure`: Boolean (default `false`). Set `true` to allow risky tool execution.
414
458
  - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow tools to access files outside of the current working directory.
415
459
  - `handoff`: Optional engine tool definition that lets the LLM delegate work to an allowlisted external CLI with structured inputs.
460
+ - `plan`: Create a dynamic task list for orchestration.
461
+ - `goal`: Required planning goal (string).
462
+ - `context` / `constraints`: Optional strings to guide the plan.
463
+ - `prompt`: Optional override of the planning prompt.
464
+ - Plan steps accept the same LLM options as `llm`, including tools, handoffs, and `allowedHandoffs`.
416
465
  - `request`: Make HTTP requests (GET, POST, etc.).
417
466
  - `allowInsecure`: Boolean (default `false`). If `true`, skips SSRF protections and allows non-HTTPS/local URLs.
418
467
  - Cross-origin redirects are blocked for non-GET/HEAD requests unless `allowInsecure: true`; on cross-origin redirects, non-essential headers are stripped.
419
- - `file`: Read, write, or append to files.
468
+ - `file`: Read, write, append, or patch files.
420
469
  - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow reading/writing files outside of the current working directory.
470
+ - `op: patch`: Apply a unified diff or search/replace blocks via `content`.
471
+ - Search/replace blocks use `<<<<<<< SEARCH`, `=======`, `>>>>>>> REPLACE` and must match exactly once.
472
+ - `artifact`: Upload or download files as named artifacts.
473
+ - `op: upload`: Requires `name` and `paths` (glob patterns).
474
+ - `op: download`: Requires `name` and `path` (destination directory).
475
+ - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow paths outside of the current working directory.
421
476
  - `human`: Pause execution for manual confirmation or text input.
422
477
  - `inputType: confirm`: Simple Enter-to-continue prompt.
423
478
  - `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
@@ -429,13 +484,36 @@ Keystone supports several specialized step types:
429
484
  status: state
430
485
  ```
431
486
  - `join`: Aggregate outputs from dependencies and enforce a completion condition.
432
- - `target`: `'steps'` (default) or `'branches'` (for foreach).
433
487
  - `condition`: `'all'` (default), `'any'`, or a number.
488
+ - `target`: Reserved for future use; currently ignored.
434
489
  - `blueprint`: Generate a structured system blueprint with an agent (persisted as an artifact).
435
490
  - `script`: Run JavaScript in a sandboxed subprocess. Requires `allowInsecure: true`.
436
- - `sleep`: Pause execution for a specified duration.
491
+ - `sleep`: Pause execution for a specified duration or until a timestamp.
492
+ - `duration`: Milliseconds (number or expression).
493
+ - `until`: Date/time string (evaluated), parsed by `Date`.
437
494
  - `durable`: Boolean (default `false`). If `true` and duration >= 60s, the wait is persisted and can resume after restarts.
495
+ - `wait`: Pause execution until an event is triggered.
496
+ - `event`: Event name (string or expression).
497
+ - `oneShot`: Boolean (default `true`). If `true`, consumes the event after it fires.
438
498
  - `memory`: Store or retrieve information from the semantic memory vector database.
499
+ - `op: store`: Store text with metadata.
500
+ - `op: search`: Search for similar text using vector embeddings.
501
+ - `text` / `query`: The content to store or search for.
502
+ - `metadata`: Optional object for filtering or additional context.
503
+ - `limit`: Number of results to return (default `5`).
504
+ ```yaml
505
+ - id: remember_preference
506
+ type: memory
507
+ op: store
508
+ text: "User prefers dark mode"
509
+ metadata: { user: "alice" }
510
+
511
+ - id: recall_preference
512
+ type: memory
513
+ op: search
514
+ query: "What is the user's preference?"
515
+ limit: 1
516
+ ```
439
517
  - `engine`: Run an allowlisted external CLI and capture a structured summary.
440
518
  - `env` and `cwd` are required and must be explicit.
441
519
  - `input` is sent to stdin (objects/arrays are JSON-encoded).
@@ -471,15 +549,18 @@ All steps support common features:
471
549
  - `needs`: Array of step IDs this step depends on.
472
550
  - `if`: Conditional expression.
473
551
  - `retry`: `{ count, backoff: 'linear'|'exponential', baseDelay }`.
474
- - `timeout`: Maximum execution time in milliseconds.
552
+ - `timeout`: Maximum execution time in milliseconds (best-effort; supported steps receive an abort signal).
475
553
  - `foreach`: Iterate over an array in parallel.
476
554
  - `concurrency`: Limit parallel items for `foreach` (must be a positive integer).
555
+ - `strategy.matrix`: Experimental parser-time expansion into `foreach` (prefer explicit `foreach` for now).
477
556
  - `pool`: Assign step to a resource pool.
557
+ - `breakpoint`: Pause before executing the step when running with `--debug`.
478
558
  - `compensate`: Step to run if the workflow rolls back.
479
559
  - `transform`: Post-process output using expressions.
480
560
  - `learn`: Auto-index for few-shot.
481
561
  - `reflexion`: Self-correction loop.
482
562
  - `auto_heal`: LLM-powered automatic error recovery.
563
+ - `memoize`: Cache step outputs across runs (`memoizeTtlSeconds` controls expiry).
483
564
  - `inputSchema` / `outputSchema`: JSON Schema validation.
484
565
  - `outputRetries`: Max retries for output validation failures.
485
566
  - `repairStrategy`: Strategy for output repair (`reask`, `repair`, `hybrid`).
@@ -547,6 +628,39 @@ Use `handoff` to expose an engine tool to the LLM with structured inputs:
547
628
  required: [summary]
548
629
  ```
549
630
 
631
+ ### Agent Handoffs (Swarm-Style)
632
+ Allow the LLM to switch to a specialist agent mid-step by defining `allowedHandoffs`. This injects a standard tool `transfer_to_agent({ agent_name })` and swaps the system prompt + tool set while preserving conversation history.
633
+
634
+ ```yaml
635
+ - id: route
636
+ type: llm
637
+ agent: handoff-router
638
+ prompt: "Route the task, then answer."
639
+ allowedHandoffs: [handoff-specialist]
640
+ ```
641
+
642
+ Agent prompts can use `${{ }}` expressions (evaluated against the workflow context) for dynamic system prompts.
643
+
644
+ ```markdown
645
+ ---
646
+ name: handoff-specialist
647
+ ---
648
+ You are the specialist for ${{ inputs.topic }}.
649
+ ```
650
+
651
+ ### Tool-Driven Context Updates
652
+ Tools can return `__keystone_context` to update workflow memory/env immediately. These values become available to subsequent tool calls and steps via `${{ memory.* }}` and `${{ env.* }}`.
653
+
654
+ ```json
655
+ {
656
+ "__keystone_context": {
657
+ "memory": { "user": "Ada" },
658
+ "env": { "CURRENT_TOPIC": "billing" }
659
+ },
660
+ "stored": true
661
+ }
662
+ ```
663
+
550
664
  ### Self-Healing Steps
551
665
  Steps can be configured to automatically recover from failures using an LLM agent.
552
666
 
@@ -566,6 +680,8 @@ When a step fails, the specified agent is invoked with the error details. The ag
566
680
  ```yaml
567
681
  - id: list_files
568
682
  type: shell
683
+ # Globbing (*) requires allowInsecure: true
684
+ allowInsecure: true
569
685
  run: ls *.txt
570
686
  # Post-process stdout into an array of filenames
571
687
  transform: ${{ stdout.trim().split('\n') }}
@@ -577,6 +693,24 @@ When a step fails, the specified agent is invoked with the error details. The ag
577
693
  run: echo "Processing ${{ item }}"
578
694
  ```
579
695
 
696
+ #### Example: Matrix Strategy (manual foreach)
697
+ Until `strategy.matrix` is wired end-to-end, use explicit `foreach` with an array expression:
698
+
699
+ ```yaml
700
+ - id: test_matrix
701
+ type: shell
702
+ foreach: ${{ [
703
+ { node: 18, os: "ubuntu" },
704
+ { node: 18, os: "macos" },
705
+ { node: 20, os: "ubuntu" },
706
+ { node: 20, os: "macos" },
707
+ { node: 22, os: "ubuntu" },
708
+ { node: 22, os: "macos" }
709
+ ] }}
710
+ allowInsecure: true # Required for '=' in arguments
711
+ run: echo "node=${{ item.node }} os=${{ item.os }}"
712
+ ```
713
+
580
714
  #### Example: Script Step
581
715
  ```yaml
582
716
  - id: calculate
@@ -637,6 +771,51 @@ Enable fail-forward steps that continue workflow execution even when they fail.
637
771
 
638
772
  The step's `status` will be `'success'` even when it fails internally, but the `error` field will contain the failure details.
639
773
 
774
+ ### Breakpoints
775
+
776
+ Pause before executing a step when running with `--debug`. In non-TTY environments, the workflow is paused until resumed in a TTY.
777
+
778
+ ```yaml
779
+ - id: inspect_context
780
+ type: shell
781
+ breakpoint: true
782
+ run: echo "Inspecting before execution"
783
+ ```
784
+
785
+ ### Artifacts
786
+
787
+ Upload and download files between steps without hardcoded artifact paths.
788
+
789
+ ```yaml
790
+ - id: build
791
+ type: shell
792
+ run: bun build
793
+
794
+ - id: upload_build
795
+ type: artifact
796
+ op: upload
797
+ name: build
798
+ paths: ["dist/**"]
799
+
800
+ - id: download_build
801
+ type: artifact
802
+ op: download
803
+ name: build
804
+ path: ./tmp/build
805
+ ```
806
+
807
+ Upload outputs include `artifactPath` and `files` for downstream references.
808
+
809
+ ### Structured Events
810
+
811
+ Emit NDJSON events for step and workflow lifecycle updates:
812
+
813
+ ```bash
814
+ keystone run workflow.yaml --events
815
+ ```
816
+
817
+ Events include `workflow.start`, `step.start`, `step.end`, and `workflow.complete`.
818
+
640
819
  ### Global Errors Block
641
820
 
642
821
  Define workflow-level error handling that runs when a step exhausts retries. Access failure context via `last_failed_step`.
@@ -699,6 +878,27 @@ steps:
699
878
  pool: api_pool
700
879
  ```
701
880
 
881
+ ### Automated Testing
882
+
883
+ Run workflow tests with fixtures and snapshots. Keystone includes a **Safe Mode** that blocks side-effecting steps (shell, request, file writes) by default during tests unless explicitly allowed or mocked.
884
+
885
+ ```yaml
886
+ name: my-test
887
+ workflow: my-workflow
888
+ options:
889
+ allowSideEffects: false # Default
890
+ fixture:
891
+ inputs: { name: "test" }
892
+ mocks:
893
+ - step: write_file
894
+ response: { success: true }
895
+ ```
896
+
897
+ Run tests via CLI:
898
+ ```bash
899
+ keystone test .keystone/tests/
900
+ ```
901
+
702
902
  ### Compensations (Rollback)
703
903
 
704
904
  Define "undo" actions for steps that have side effects. Compensations run in reverse order (LIFO) if a workflow fails or is cancelled.
@@ -744,10 +944,14 @@ Keystone comes with a set of **Standard Tools** that can be enabled for any agen
744
944
  - `read_file`: Read the contents of a file (arguments: `path`)
745
945
  - `read_file_lines`: Read a specific range of lines from a file (arguments: `path`, `start`, `count`)
746
946
  - `write_file`: Write or overwrite a file (arguments: `path`, `content`)
947
+ - `append_file`: Append content to a file, creating it if it doesn't exist (arguments: `path`, `content`)
747
948
  - `list_files`: List files in a directory (arguments: `path`)
748
949
  - `search_files`: Search for files by glob pattern (arguments: `pattern`, `dir`)
749
950
  - `search_content`: Search for string or regex within files (arguments: `query`, `dir`, `pattern`)
750
951
  - `run_command`: Run a shell command (arguments: `command`, `dir`). Risky commands require `allowInsecure: true` on the LLM step.
952
+ - `ast_grep_search`: Search for structural code patterns using AST matching (arguments: `pattern`, `language`, `paths`). More precise than regex for code refactoring.
953
+ - `ast_grep_replace`: Replace structural code patterns using AST-aware rewriting (arguments: `pattern`, `rewrite`, `language`, `paths`). Safer than regex for code refactoring.
954
+ - `fetch`: Fetch content from a URL via GET request (arguments: `url`).
751
955
 
752
956
  #### Standard Tool Examples
753
957
 
@@ -776,6 +980,25 @@ Agents can use these tools to interact with their environment. Here is how they
776
980
  dir: "."
777
981
  ```
778
982
 
983
+ **AST-Grep Search (find all console.log calls):**
984
+ ```yaml
985
+ - name: ast_grep_search
986
+ arguments:
987
+ pattern: "console.log($A)"
988
+ language: "typescript"
989
+ paths: ["src/"]
990
+ ```
991
+
992
+ **AST-Grep Replace (refactor console.log to logger.info):**
993
+ ```yaml
994
+ - name: ast_grep_replace
995
+ arguments:
996
+ pattern: "console.log($A)"
997
+ rewrite: "logger.info($A)"
998
+ language: "typescript"
999
+ paths: ["src/"]
1000
+ ```
1001
+
779
1002
  Tool arguments are passed to the tool's execution step via the `args` variable.
780
1003
 
781
1004
  **`.keystone/workflows/agents/developer.md`**
@@ -823,11 +1046,14 @@ The MCP server provides two modes for running workflows:
823
1046
  ```
824
1047
  1. Agent calls start_workflow → { run_id: "abc", status: "running" }
825
1048
  2. Agent polls get_run_status → { status: "running" }
826
- 3. Agent polls get_run_status → { status: "completed", outputs: {...} }
1049
+ 3. Agent polls get_run_status → { status: "success", outputs: {...} }
827
1050
  ```
828
1051
 
829
1052
  The async pattern is ideal for LLM-heavy workflows that may take minutes to complete.
830
1053
 
1054
+ When an async run pauses for a human step, the MCP server emits a notification:
1055
+ `notifications/keystone.human_input` with the run ID, step ID, input type, and instructions.
1056
+
831
1057
  #### Global MCP Servers
832
1058
  Define shared MCP servers in `.keystone/config.yaml` to reuse them across different workflows. Keystone ensures that multiple steps using the same global server will share a single running process.
833
1059
 
@@ -881,13 +1107,18 @@ In these examples, the agent will have access to all tools provided by the MCP s
881
1107
  | Command | Description |
882
1108
  | :--- | :--- |
883
1109
  | `init` | Initialize a new Keystone project |
884
- | `run <workflow>` | Execute a workflow (use `-i key=val`, `--resume` to auto-resume, `--dry-run`, `--debug`, `--no-dedup`, `--explain`) |
885
- | `resume <run_id>` | Resume a failed/paused/crashed workflow by ID (use `-i key=val` to answer human steps) |
1110
+ | `schema` | Generate JSON Schema for workflow and agent definitions (`-o` for output dir) |
1111
+ | `run <workflow>` | Execute a workflow (use `-i key=val`, `--resume` to auto-resume, `--dry-run`, `--debug`, `--no-dedup`, `--explain`, `--events`) |
1112
+ | `watch <workflow>` | Watch a workflow and re-run on changes (`--debug`, `--events`, `--debounce`) |
1113
+ | `resume <run_id>` | Resume a failed/paused/crashed workflow by ID (use `-i key=val` to answer human steps, `--events` for NDJSON) |
1114
+ | `rerun <workflow>` | Rerun a workflow from a specific step (use `--from <step_id>` and optional `--run <run_id>`, `--events`) |
886
1115
  | `validate [path]` | Check workflow files for errors |
1116
+ | `lint [path]` | Alias for `validate` |
887
1117
  | `workflows` | List available workflows |
888
1118
  | `history` | Show recent workflow runs |
889
1119
  | `logs <run_id>` | View logs, outputs, and errors for a specific run (`-v` for full output) |
890
1120
  | `graph <workflow>` | Generate a Mermaid diagram of the workflow |
1121
+ | `doc <workflow>` | Generate Markdown documentation for a workflow |
891
1122
  | `test [path]` | Run workflow tests with fixtures and snapshots |
892
1123
  | `optimize <workflow>` | Optimize a specific step in a workflow (requires --target and workflow `eval`) |
893
1124
  | `compile` | Compile a project into a single executable with embedded assets |
@@ -901,6 +1132,7 @@ In these examples, the agent will have access to all tools provided by the MCP s
901
1132
  | `mcp start` | Start the Keystone MCP server |
902
1133
  | `mcp login <server>` | Login to a remote MCP server |
903
1134
  | `scheduler` | Run the durable timer scheduler to resume sleep timers |
1135
+ | `event <name> [data]` | Trigger an event to resume `wait` steps (data can be JSON) |
904
1136
  | `timers list` | List durable timers |
905
1137
  | `timers clear` | Clear durable timers by run ID or `--all` |
906
1138
  | `dedup list [run_id]` | List idempotency records (optionally filter by run) |
@@ -912,6 +1144,14 @@ In these examples, the agent will have access to all tools provided by the MCP s
912
1144
 
913
1145
  ---
914
1146
 
1147
+ ### Watch Mode
1148
+
1149
+ Use `keystone watch` to re-run a workflow when the workflow file or its input files change:
1150
+
1151
+ ```bash
1152
+ keystone watch workflow.yaml
1153
+ ```
1154
+
915
1155
  ### Compile
916
1156
  `keystone compile -o ./keystone-app` emits the executable plus a `keystone-runtime/` directory next to it.
917
1157
  Ship both together if you use memory/embeddings (the runtime folder includes native deps like ONNX Runtime,
@@ -959,30 +1199,35 @@ Request steps enforce SSRF protections and require HTTPS by default. Cross-origi
959
1199
  ```mermaid
960
1200
  graph TD
961
1201
  CLI[CLI Entry Point] --> WR[WorkflowRunner]
962
- CLI --> MCP[MCP Server]
963
- WR --> SE[Step Executor]
964
- WR --> FE[ForeachExecutor]
965
- WR --> DB[(WorkflowDb)]
966
- SE --> LLM[LLM Executor]
967
- SE --> Shell[Shell Executor]
968
- SE --> File[File Operations]
969
- SE --> HTTP[HTTP Requests]
970
- SE --> Human[Human Input]
971
- SE --> Engine[Engine Executor]
972
- SE --> Script[Script Step]
973
- SE --> Sleep[Sleep Step]
974
- SE --> Memory[Memory operations]
975
- SE --> Workflow[Sub-workflows]
1202
+ CLI --> MCPServer[MCP Server]
1203
+
1204
+ subgraph "Core Orchestration"
1205
+ WR --> Scheduler[WorkflowScheduler]
1206
+ WR --> State[WorkflowState]
1207
+ WR --> Pool[Resource Pool Manager]
1208
+ WR --> Eval[Expression Evaluator]
1209
+ end
1210
+
1211
+ WR --> EX[Step Executor]
1212
+ WR --> FE[Foreach Executor]
1213
+ WR --> Workflow[Sub-workflows]
1214
+
1215
+ State --> DB[(WorkflowDb)]
1216
+ Scheduler --> Parser[WorkflowParser]
1217
+
1218
+ EX --> LLM[LLM Executor]
1219
+ EX --> Shell[Shell Executor]
1220
+ EX --> File[File Operations]
1221
+ EX --> HTTP[HTTP Requests]
1222
+ EX --> Human[Human Input]
1223
+ EX --> Engine[Engine Executor]
1224
+ EX --> Script[Script Step]
1225
+ EX --> Sleep[Sleep Step]
1226
+ EX --> Memory[Memory operations]
1227
+
976
1228
  LLM --> Adapters[LLM Adapters]
977
- Adapters --> OpenAI
978
- Adapters --> Anthropic
979
- Adapters --> Gemini
980
- Adapters --> Copilot
981
- Adapters --> ChatGPT
982
- Adapters --> Local
1229
+ Adapters --> Providers[OpenAI, Anthropic, Gemini, Copilot, etc.]
983
1230
  LLM --> MCPClient[MCP Client]
984
- WR --> Eval[Expression Evaluator]
985
- WR --> Pool[Resource Pool Manager]
986
1231
  ```
987
1232
 
988
1233
  ## 📂 Project Structure
@@ -996,7 +1241,6 @@ graph TD
996
1241
  - `src/ui/`: Ink-powered TUI dashboard.
997
1242
  - `src/utils/`: Shared utilities (auth, redaction, config loading).
998
1243
  - `src/types/`: Core type definitions.
999
- - `src/e2e-tests/`: End-to-end test suite.
1000
1244
  - `.keystone/workflows/`: Your YAML workflow definitions.
1001
1245
 
1002
1246
  ---
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "keystone-cli",
3
- "version": "1.0.3",
3
+ "version": "1.1.1",
4
4
  "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
5
5
  "type": "module",
6
6
  "bin": {
@@ -11,7 +11,8 @@
11
11
  "test": "bun test",
12
12
  "lint": "biome check .",
13
13
  "lint:fix": "biome check --write .",
14
- "format": "biome format --write ."
14
+ "format": "biome format --write .",
15
+ "schema:generate": "bun run src/scripts/generate-schemas.ts"
15
16
  },
16
17
  "keywords": ["workflow", "orchestrator", "agentic", "automation", "bun"],
17
18
  "author": "Mark Hingston",
@@ -23,6 +24,8 @@
23
24
  "homepage": "https://github.com/mhingston/keystone-cli#readme",
24
25
  "files": ["src", "README.md", "LICENSE", "logo.png"],
25
26
  "dependencies": {
27
+ "@ast-grep/cli": "^0.40.3",
28
+ "@ast-grep/napi": "^0.40.3",
26
29
  "@jsep-plugin/arrow": "^1.0.6",
27
30
  "@jsep-plugin/object": "^1.2.2",
28
31
  "@types/react": "^19.0.0",
@@ -30,15 +33,16 @@
30
33
  "ajv": "^8.12.0",
31
34
  "commander": "^12.1.0",
32
35
  "dagre": "^0.8.5",
36
+ "glob": "^10.4.5",
33
37
  "ink": "^6.5.1",
34
38
  "ink-select-input": "3.1.2",
35
39
  "ink-spinner": "^5.0.0",
36
40
  "js-yaml": "^4.1.0",
37
41
  "jsep": "^1.4.0",
38
- "glob": "^10.4.5",
39
42
  "react": "^19.0.0",
40
43
  "sqlite-vec": "0.1.6",
41
- "zod": "^3.23.8"
44
+ "zod": "^3.23.8",
45
+ "zod-to-json-schema": "^3.25.1"
42
46
  },
43
47
  "optionalDependencies": {
44
48
  "re2": "^1.21.4"