npm - keystone-cli - Versions diffs - 1.0.3 → 1.1.1 - Mend

keystone-cli 1.0.3 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (154) hide show

package/README.md +276 -32
package/package.json +8 -4
package/src/cli.ts +350 -416
package/src/commands/doc.ts +31 -0
package/src/commands/event.ts +29 -0
package/src/commands/graph.ts +37 -0
package/src/commands/index.ts +14 -0
package/src/commands/init.ts +185 -0
package/src/commands/run.ts +124 -0
package/src/commands/schema.ts +40 -0
package/src/commands/utils.ts +78 -0
package/src/commands/validate.ts +111 -0
package/src/db/workflow-db.test.ts +314 -0
package/src/db/workflow-db.ts +810 -210
package/src/expression/evaluator-audit.test.ts +4 -2
package/src/expression/evaluator.test.ts +14 -1
package/src/expression/evaluator.ts +166 -19
package/src/parser/config-schema.ts +18 -0
package/src/parser/schema.ts +153 -22
package/src/parser/test-schema.ts +6 -6
package/src/parser/workflow-parser.test.ts +24 -0
package/src/parser/workflow-parser.ts +65 -3
package/src/runner/auto-heal.test.ts +5 -6
package/src/runner/blueprint-executor.test.ts +2 -2
package/src/runner/debug-repl.test.ts +5 -8
package/src/runner/debug-repl.ts +59 -16
package/src/runner/durable-timers.test.ts +11 -2
package/src/runner/engine-executor.test.ts +1 -1
package/src/runner/events.ts +57 -0
package/src/runner/executors/artifact-executor.ts +166 -0
package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
package/src/runner/executors/file-executor.test.ts +48 -0
package/src/runner/executors/file-executor.ts +324 -0
package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
package/src/runner/executors/human-executor.ts +144 -0
package/src/runner/executors/join-executor.ts +75 -0
package/src/runner/executors/llm-executor.ts +1266 -0
package/src/runner/executors/memory-executor.ts +71 -0
package/src/runner/executors/plan-executor.ts +104 -0
package/src/runner/executors/request-executor.ts +265 -0
package/src/runner/executors/script-executor.ts +43 -0
package/src/runner/executors/shell-executor.ts +403 -0
package/src/runner/executors/subworkflow-executor.ts +114 -0
package/src/runner/executors/types.ts +69 -0
package/src/runner/executors/wait-executor.ts +59 -0
package/src/runner/join-scheduling.test.ts +197 -0
package/src/runner/llm-adapter-runtime.test.ts +209 -0
package/src/runner/llm-adapter.test.ts +419 -24
package/src/runner/llm-adapter.ts +130 -26
package/src/runner/llm-clarification.test.ts +2 -1
package/src/runner/llm-executor.test.ts +532 -17
package/src/runner/mcp-client-audit.test.ts +1 -2
package/src/runner/mcp-client.ts +136 -46
package/src/runner/mcp-manager.test.ts +4 -0
package/src/runner/mcp-server.test.ts +58 -0
package/src/runner/mcp-server.ts +26 -0
package/src/runner/memoization.test.ts +190 -0
package/src/runner/optimization-runner.ts +4 -9
package/src/runner/quality-gate.test.ts +69 -0
package/src/runner/reflexion.test.ts +6 -17
package/src/runner/resource-pool.ts +102 -14
package/src/runner/services/context-builder.ts +144 -0
package/src/runner/services/secret-manager.ts +105 -0
package/src/runner/services/workflow-validator.ts +131 -0
package/src/runner/shell-executor.test.ts +28 -4
package/src/runner/standard-tools-ast.test.ts +196 -0
package/src/runner/standard-tools-execution.test.ts +27 -0
package/src/runner/standard-tools-integration.test.ts +6 -10
package/src/runner/standard-tools.ts +339 -102
package/src/runner/step-executor.test.ts +216 -4
package/src/runner/step-executor.ts +69 -941
package/src/runner/stream-utils.ts +7 -3
package/src/runner/test-harness.ts +20 -1
package/src/runner/timeout.test.ts +10 -0
package/src/runner/timeout.ts +11 -2
package/src/runner/tool-integration.test.ts +1 -1
package/src/runner/wait-step.test.ts +102 -0
package/src/runner/workflow-runner.test.ts +208 -15
package/src/runner/workflow-runner.ts +890 -818
package/src/runner/workflow-scheduler.ts +75 -0
package/src/runner/workflow-state.ts +269 -0
package/src/runner/workflow-subflows.test.ts +13 -12
package/src/scripts/generate-schemas.ts +16 -0
package/src/templates/agents/explore.md +1 -0
package/src/templates/agents/general.md +1 -0
package/src/templates/agents/handoff-router.md +14 -0
package/src/templates/agents/handoff-specialist.md +15 -0
package/src/templates/agents/keystone-architect.md +13 -44
package/src/templates/agents/my-agent.md +1 -0
package/src/templates/agents/software-engineer.md +1 -0
package/src/templates/agents/summarizer.md +1 -0
package/src/templates/agents/test-agent.md +1 -0
package/src/templates/agents/tester.md +1 -0
package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +4 -1
package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
package/src/templates/control-flow/idempotency-example.yaml +30 -0
package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
package/src/templates/features/artifact-example.yaml +40 -0
package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
package/src/templates/features/script-example.yaml +28 -0
package/src/templates/patterns/agent-handoff.yaml +53 -0
package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +2 -1
package/src/templates/patterns/composition-parent.yaml +18 -0
package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
package/src/templates/scaffolding/review-loop.yaml +97 -0
package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
package/src/templates/testing/invalid.yaml +6 -0
package/src/ui/dashboard.tsx +191 -33
package/src/utils/auth-manager.test.ts +337 -0
package/src/utils/auth-manager.ts +157 -61
package/src/utils/blueprint-utils.ts +4 -6
package/src/utils/config-loader.test.ts +2 -0
package/src/utils/config-loader.ts +12 -3
package/src/utils/constants.ts +76 -0
package/src/utils/container.ts +63 -0
package/src/utils/context-injector.test.ts +200 -0
package/src/utils/context-injector.ts +244 -0
package/src/utils/doc-generator.ts +85 -0
package/src/utils/env-filter.ts +45 -0
package/src/utils/json-parser.test.ts +12 -0
package/src/utils/json-parser.ts +30 -5
package/src/utils/logger.ts +12 -1
package/src/utils/mermaid.ts +4 -0
package/src/utils/paths.ts +52 -1
package/src/utils/process-sandbox-worker.test.ts +46 -0
package/src/utils/process-sandbox.ts +227 -14
package/src/utils/redactor.test.ts +11 -6
package/src/utils/redactor.ts +25 -9
package/src/utils/sandbox.ts +3 -0
package/src/runner/llm-executor.ts +0 -638
package/src/runner/shell-executor.ts +0 -366
package/src/templates/composition-parent.yaml +0 -14
package/src/templates/invalid.yaml +0 -5

package/README.md CHANGED Viewed

@@ -47,6 +47,7 @@ Keystone allows you to define complex automation workflows using a simple YAML s
 - 🛡️ **Secret Redaction:** Automatically redacts environment variables and secrets from logs and outputs.
 - 🧠 **Semantic Memory:** Store/search text with vector embeddings (and auto-index via `learn`).
 - 🎯 **Prompt Optimization:** Iteratively optimize prompts via `keystone optimize` + workflow `eval`.
+- 📖 **Documentation Generator:** Automatically generate Markdown documentation from your workflow definitions.
 ---
@@ -137,6 +138,10 @@ Top-level workflows:
 - `scaffold-feature`: Interactive workflow scaffolder. Prompts for requirements, plans files, generates content, and writes them.
 - `decompose-problem`: Decomposes a problem into research/implementation/review tasks, waits for approval, runs sub-workflows, and summarizes.
 - `dev`: Self-bootstrapping DevMode workflow for an interactive plan/implement/verify loop.
+- `agent-handoff`: Demonstrates agent handoffs and tool-driven context updates.
+- `script-example`: Demonstrates sandboxed JavaScript execution.
+- `artifact-example`: Demonstrates artifact upload and download between steps.
+- `idempotency-example`: Demonstrates safe retries for side-effecting steps.
 Sub-workflows:
 - `scaffold-plan`: Generates a file plan from `requirements` input.
@@ -144,11 +149,13 @@ Sub-workflows:
 - `decompose-research`: Runs a single research task (`task`) with optional `context`/`constraints`.
 - `decompose-implement`: Runs a single implementation task (`task`) with optional `research` findings.
 - `decompose-review`: Reviews a single implementation task (`task`) with optional `implementation` results.
+- `review-loop`: Reusable generate → critique → refine loop with a quality gate.
 Example runs:
 ```bash
 keystone run scaffold-feature
 keystone run decompose-problem -i problem="Add caching to the API" -i context="Node/Bun service"
+keystone run agent-handoff -i topic="billing" -i user="Ada"
 ```
 Sub-workflows are used by the top-level workflows, but can be run directly if you want just one phase.
@@ -164,7 +171,7 @@ Search order (highest precedence first):
 - `.keystone/config.yaml` or `.keystone/config.yml`
 - `$XDG_CONFIG_HOME/keystone/config.yaml` or `~/.config/keystone/config.yaml` (and `.yml`)
-Global state (when enabled) is stored at `$XDG_DATA_HOME/keystone/state.db` or `~/.local/share/keystone/state.db`.
+State is stored at `.keystone/state.db` by default (project-local).
 ```yaml
 default_provider: openai
@@ -228,10 +235,36 @@ engines:
 storage:
   retention_days: 30
   redact_secrets_at_rest: true
+expression:
+  strict: false
 ```
 `storage.retention_days` sets the default window used by `keystone maintenance` / `keystone prune`. `storage.redact_secrets_at_rest` controls whether secret inputs and known secrets are redacted before storing run data (default `true`).
+### Context Injection (Opt-in)
+Keystone can automatically inject project context files (`README.md`, `AGENTS.md`, `.cursor/rules`, `.claude/rules`) into LLM system prompts. This helps agents understand your project's conventions and guidelines.
+```yaml
+features:
+  context_injection:
+    enabled: true              # Opt-in feature (default: false)
+    search_depth: 3            # How many directories up to search (default: 3)
+    sources:                   # Which context sources to include
+      - readme                 # README.md files
+      - agents_md              # AGENTS.md files
+      - cursor_rules           # .cursor/rules or .claude/rules
+```
+When enabled, Keystone will:
+1. Search from the workflow directory up to the project root
+2. Find the nearest `README.md` and `AGENTS.md` files
+3. Parse rules from `.cursor/rules` or `.claude/rules` directories
+4. Prepend this context to the LLM system prompt
+Context is cached for 1 minute to avoid redundant file reads.
 ### Model & Provider Resolution
 Keystone resolves which provider to use for a model in the following order:
@@ -390,14 +423,21 @@ Keystone uses `${{ }}` syntax for dynamic values. Expressions are evaluated usin
 - `${{ steps.id.status }}`: Get the execution status of a step (`'success'`, `'failed'`, etc.).
 - `${{ item }}`: Access the current item in a `foreach` loop.
 - `${{ args.name }}`: Access tool arguments (available ONLY inside agent tool execution steps).
-- `${{ secrets.NAME }}`: Access redacted secrets.
+- `${{ secrets.NAME }}`: Access secret values (redacted in logs and at rest).
 - `${{ env.NAME }}`: Access environment variables (process env merged with workflow-level `env`).
   Workflow-level `env` is evaluated per step; if an expression cannot be resolved yet, the variable is skipped with a warning.
+- `${{ memory.key }}`: Access mutable workflow memory (populated by tools via `__keystone_context`).
 Inputs support `values` for enums and `secret: true` for sensitive values (redacted in logs and at rest by default; resumptions may require re-entry).
 Standard JavaScript-like expressions are supported: `${{ steps.build.status == 'success' ? '🚀' : '❌' }}`.
+Strict expression mode can be enabled in `.keystone/config.yaml` to fail fast on malformed `${{ }}`:
+```yaml
+expression:
+  strict: true
+```
 ---
 ## 🏗️ Step Types
@@ -409,15 +449,30 @@ Keystone supports several specialized step types:
 - `shell`: Run arbitrary shell commands.
 - `llm`: Prompt an agent and get structured or unstructured responses. Supports `outputSchema` (JSON Schema) for structured output.
   - `allowClarification`: Boolean (default `false`). If `true`, allows the LLM to ask clarifying questions back to the user or suspend the workflow if no human is available.
+  - `allowedHandoffs`: Optional list of agent names that can be transferred to via `transfer_to_agent`.
   - `maxIterations`: Number (default `10`). Maximum number of tool-calling loops allowed for the agent.
+  - `maxMessageHistory`: Number (default `50`). Max messages to retain in history before truncation/summary.
+  - `contextStrategy`: `'truncate'|'summary'|'auto'` (default `truncate`). Summarizes older history into a system message when limits are exceeded.
+  - `qualityGate`: Optional reviewer config `{ agent, prompt?, provider?, model?, maxAttempts? }`. If review fails, the step is refined and re-run.
   - `allowInsecure`: Boolean (default `false`). Set `true` to allow risky tool execution.
   - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow tools to access files outside of the current working directory.
   - `handoff`: Optional engine tool definition that lets the LLM delegate work to an allowlisted external CLI with structured inputs.
+- `plan`: Create a dynamic task list for orchestration.
+  - `goal`: Required planning goal (string).
+  - `context` / `constraints`: Optional strings to guide the plan.
+  - `prompt`: Optional override of the planning prompt.
+  - Plan steps accept the same LLM options as `llm`, including tools, handoffs, and `allowedHandoffs`.
 - `request`: Make HTTP requests (GET, POST, etc.).
   - `allowInsecure`: Boolean (default `false`). If `true`, skips SSRF protections and allows non-HTTPS/local URLs.
   - Cross-origin redirects are blocked for non-GET/HEAD requests unless `allowInsecure: true`; on cross-origin redirects, non-essential headers are stripped.
-- `file`: Read, write, or append to files.
+- `file`: Read, write, append, or patch files.
   - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow reading/writing files outside of the current working directory.
+  - `op: patch`: Apply a unified diff or search/replace blocks via `content`.
+    - Search/replace blocks use `<<<<<<< SEARCH`, `=======`, `>>>>>>> REPLACE` and must match exactly once.
+- `artifact`: Upload or download files as named artifacts.
+  - `op: upload`: Requires `name` and `paths` (glob patterns).
+  - `op: download`: Requires `name` and `path` (destination directory).
+  - `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow paths outside of the current working directory.
 - `human`: Pause execution for manual confirmation or text input.
   - `inputType: confirm`: Simple Enter-to-continue prompt.
   - `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
@@ -429,13 +484,36 @@ Keystone supports several specialized step types:
       status: state
     ```
 - `join`: Aggregate outputs from dependencies and enforce a completion condition.
-  - `target`: `'steps'` (default) or `'branches'` (for foreach).
   - `condition`: `'all'` (default), `'any'`, or a number.
+  - `target`: Reserved for future use; currently ignored.
 - `blueprint`: Generate a structured system blueprint with an agent (persisted as an artifact).
 - `script`: Run JavaScript in a sandboxed subprocess. Requires `allowInsecure: true`.
-- `sleep`: Pause execution for a specified duration.
+- `sleep`: Pause execution for a specified duration or until a timestamp.
+  - `duration`: Milliseconds (number or expression).
+  - `until`: Date/time string (evaluated), parsed by `Date`.
   - `durable`: Boolean (default `false`). If `true` and duration >= 60s, the wait is persisted and can resume after restarts.
+- `wait`: Pause execution until an event is triggered.
+  - `event`: Event name (string or expression).
+  - `oneShot`: Boolean (default `true`). If `true`, consumes the event after it fires.
 - `memory`: Store or retrieve information from the semantic memory vector database.
+  - `op: store`: Store text with metadata.
+  - `op: search`: Search for similar text using vector embeddings.
+  - `text` / `query`: The content to store or search for.
+  - `metadata`: Optional object for filtering or additional context.
+  - `limit`: Number of results to return (default `5`).
+  ```yaml
+  - id: remember_preference
+    type: memory
+    op: store
+    text: "User prefers dark mode"
+    metadata: { user: "alice" }
+  - id: recall_preference
+    type: memory
+    op: search
+    query: "What is the user's preference?"
+    limit: 1
+  ```
 - `engine`: Run an allowlisted external CLI and capture a structured summary.
   - `env` and `cwd` are required and must be explicit.
   - `input` is sent to stdin (objects/arrays are JSON-encoded).
@@ -471,15 +549,18 @@ All steps support common features:
 - `needs`: Array of step IDs this step depends on.
 - `if`: Conditional expression.
 - `retry`: `{ count, backoff: 'linear'|'exponential', baseDelay }`.
-- `timeout`: Maximum execution time in milliseconds.
+- `timeout`: Maximum execution time in milliseconds (best-effort; supported steps receive an abort signal).
 - `foreach`: Iterate over an array in parallel.
 - `concurrency`: Limit parallel items for `foreach` (must be a positive integer).
+- `strategy.matrix`: Experimental parser-time expansion into `foreach` (prefer explicit `foreach` for now).
 - `pool`: Assign step to a resource pool.
+- `breakpoint`: Pause before executing the step when running with `--debug`.
 - `compensate`: Step to run if the workflow rolls back.
 - `transform`: Post-process output using expressions.
 - `learn`: Auto-index for few-shot.
 - `reflexion`: Self-correction loop.
 - `auto_heal`: LLM-powered automatic error recovery.
+- `memoize`: Cache step outputs across runs (`memoizeTtlSeconds` controls expiry).
 - `inputSchema` / `outputSchema`: JSON Schema validation.
 - `outputRetries`: Max retries for output validation failures.
 - `repairStrategy`: Strategy for output repair (`reask`, `repair`, `hybrid`).
@@ -547,6 +628,39 @@ Use `handoff` to expose an engine tool to the LLM with structured inputs:
         required: [summary]
 ```
+### Agent Handoffs (Swarm-Style)
+Allow the LLM to switch to a specialist agent mid-step by defining `allowedHandoffs`. This injects a standard tool `transfer_to_agent({ agent_name })` and swaps the system prompt + tool set while preserving conversation history.
+```yaml
+- id: route
+  type: llm
+  agent: handoff-router
+  prompt: "Route the task, then answer."
+  allowedHandoffs: [handoff-specialist]
+```
+Agent prompts can use `${{ }}` expressions (evaluated against the workflow context) for dynamic system prompts.
+```markdown
+---
+name: handoff-specialist
+---
+You are the specialist for ${{ inputs.topic }}.
+```
+### Tool-Driven Context Updates
+Tools can return `__keystone_context` to update workflow memory/env immediately. These values become available to subsequent tool calls and steps via `${{ memory.* }}` and `${{ env.* }}`.
+```json
+{
+  "__keystone_context": {
+    "memory": { "user": "Ada" },
+    "env": { "CURRENT_TOPIC": "billing" }
+  },
+  "stored": true
+}
+```
 ### Self-Healing Steps
 Steps can be configured to automatically recover from failures using an LLM agent.
@@ -566,6 +680,8 @@ When a step fails, the specified agent is invoked with the error details. The ag
 ```yaml
 - id: list_files
   type: shell
+  # Globbing (*) requires allowInsecure: true
+  allowInsecure: true
   run: ls *.txt
   # Post-process stdout into an array of filenames
   transform: ${{ stdout.trim().split('\n') }}
@@ -577,6 +693,24 @@ When a step fails, the specified agent is invoked with the error details. The ag
   run: echo "Processing ${{ item }}"
 ```
+#### Example: Matrix Strategy (manual foreach)
+Until `strategy.matrix` is wired end-to-end, use explicit `foreach` with an array expression:
+```yaml
+- id: test_matrix
+  type: shell
+  foreach: ${{ [
+    { node: 18, os: "ubuntu" },
+    { node: 18, os: "macos" },
+    { node: 20, os: "ubuntu" },
+    { node: 20, os: "macos" },
+    { node: 22, os: "ubuntu" },
+    { node: 22, os: "macos" }
+  ] }}
+  allowInsecure: true # Required for '=' in arguments
+  run: echo "node=${{ item.node }} os=${{ item.os }}"
+```
 #### Example: Script Step
 ```yaml
 - id: calculate
@@ -637,6 +771,51 @@ Enable fail-forward steps that continue workflow execution even when they fail.
 The step's `status` will be `'success'` even when it fails internally, but the `error` field will contain the failure details.
+### Breakpoints
+Pause before executing a step when running with `--debug`. In non-TTY environments, the workflow is paused until resumed in a TTY.
+```yaml
+- id: inspect_context
+  type: shell
+  breakpoint: true
+  run: echo "Inspecting before execution"
+```
+### Artifacts
+Upload and download files between steps without hardcoded artifact paths.
+```yaml
+- id: build
+  type: shell
+  run: bun build
+- id: upload_build
+  type: artifact
+  op: upload
+  name: build
+  paths: ["dist/**"]
+- id: download_build
+  type: artifact
+  op: download
+  name: build
+  path: ./tmp/build
+```
+Upload outputs include `artifactPath` and `files` for downstream references.
+### Structured Events
+Emit NDJSON events for step and workflow lifecycle updates:
+```bash
+keystone run workflow.yaml --events
+```
+Events include `workflow.start`, `step.start`, `step.end`, and `workflow.complete`.
 ### Global Errors Block
 Define workflow-level error handling that runs when a step exhausts retries. Access failure context via `last_failed_step`.
@@ -699,6 +878,27 @@ steps:
     pool: api_pool
 ```
+### Automated Testing
+Run workflow tests with fixtures and snapshots. Keystone includes a **Safe Mode** that blocks side-effecting steps (shell, request, file writes) by default during tests unless explicitly allowed or mocked.
+```yaml
+name: my-test
+workflow: my-workflow
+options:
+  allowSideEffects: false # Default
+fixture:
+  inputs: { name: "test" }
+  mocks:
+    - step: write_file
+      response: { success: true }
+```
+Run tests via CLI:
+```bash
+keystone test .keystone/tests/
+```
 ### Compensations (Rollback)
 Define "undo" actions for steps that have side effects. Compensations run in reverse order (LIFO) if a workflow fails or is cancelled.
@@ -744,10 +944,14 @@ Keystone comes with a set of **Standard Tools** that can be enabled for any agen
 - `read_file`: Read the contents of a file (arguments: `path`)
 - `read_file_lines`: Read a specific range of lines from a file (arguments: `path`, `start`, `count`)
 - `write_file`: Write or overwrite a file (arguments: `path`, `content`)
+- `append_file`: Append content to a file, creating it if it doesn't exist (arguments: `path`, `content`)
 - `list_files`: List files in a directory (arguments: `path`)
 - `search_files`: Search for files by glob pattern (arguments: `pattern`, `dir`)
 - `search_content`: Search for string or regex within files (arguments: `query`, `dir`, `pattern`)
 - `run_command`: Run a shell command (arguments: `command`, `dir`). Risky commands require `allowInsecure: true` on the LLM step.
+- `ast_grep_search`: Search for structural code patterns using AST matching (arguments: `pattern`, `language`, `paths`). More precise than regex for code refactoring.
+- `ast_grep_replace`: Replace structural code patterns using AST-aware rewriting (arguments: `pattern`, `rewrite`, `language`, `paths`). Safer than regex for code refactoring.
+- `fetch`: Fetch content from a URL via GET request (arguments: `url`).
 #### Standard Tool Examples
@@ -776,6 +980,25 @@ Agents can use these tools to interact with their environment. Here is how they
     dir: "."
 ```
+**AST-Grep Search (find all console.log calls):**
+```yaml
+- name: ast_grep_search
+  arguments:
+    pattern: "console.log($A)"
+    language: "typescript"
+    paths: ["src/"]
+```
+**AST-Grep Replace (refactor console.log to logger.info):**
+```yaml
+- name: ast_grep_replace
+  arguments:
+    pattern: "console.log($A)"
+    rewrite: "logger.info($A)"
+    language: "typescript"
+    paths: ["src/"]
+```
 Tool arguments are passed to the tool's execution step via the `args` variable.
 **`.keystone/workflows/agents/developer.md`**
@@ -823,11 +1046,14 @@ The MCP server provides two modes for running workflows:
 ```
 1. Agent calls start_workflow → { run_id: "abc", status: "running" }
 2. Agent polls get_run_status → { status: "running" }
-3. Agent polls get_run_status → { status: "completed", outputs: {...} }
+3. Agent polls get_run_status → { status: "success", outputs: {...} }
 ```
 The async pattern is ideal for LLM-heavy workflows that may take minutes to complete.
+When an async run pauses for a human step, the MCP server emits a notification:
+`notifications/keystone.human_input` with the run ID, step ID, input type, and instructions.
 #### Global MCP Servers
 Define shared MCP servers in `.keystone/config.yaml` to reuse them across different workflows. Keystone ensures that multiple steps using the same global server will share a single running process.
@@ -881,13 +1107,18 @@ In these examples, the agent will have access to all tools provided by the MCP s
 | Command | Description |
 | :--- | :--- |
 | `init` | Initialize a new Keystone project |
-| `run <workflow>` | Execute a workflow (use `-i key=val`, `--resume` to auto-resume, `--dry-run`, `--debug`, `--no-dedup`, `--explain`) |
-| `resume <run_id>` | Resume a failed/paused/crashed workflow by ID (use `-i key=val` to answer human steps) |
+| `schema` | Generate JSON Schema for workflow and agent definitions (`-o` for output dir) |
+| `run <workflow>` | Execute a workflow (use `-i key=val`, `--resume` to auto-resume, `--dry-run`, `--debug`, `--no-dedup`, `--explain`, `--events`) |
+| `watch <workflow>` | Watch a workflow and re-run on changes (`--debug`, `--events`, `--debounce`) |
+| `resume <run_id>` | Resume a failed/paused/crashed workflow by ID (use `-i key=val` to answer human steps, `--events` for NDJSON) |
+| `rerun <workflow>` | Rerun a workflow from a specific step (use `--from <step_id>` and optional `--run <run_id>`, `--events`) |
 | `validate [path]` | Check workflow files for errors |
+| `lint [path]` | Alias for `validate` |
 | `workflows` | List available workflows |
 | `history` | Show recent workflow runs |
 | `logs <run_id>` | View logs, outputs, and errors for a specific run (`-v` for full output) |
 | `graph <workflow>` | Generate a Mermaid diagram of the workflow |
+| `doc <workflow>` | Generate Markdown documentation for a workflow |
 | `test [path]` | Run workflow tests with fixtures and snapshots |
 | `optimize <workflow>` | Optimize a specific step in a workflow (requires --target and workflow `eval`) |
 | `compile` | Compile a project into a single executable with embedded assets |
@@ -901,6 +1132,7 @@ In these examples, the agent will have access to all tools provided by the MCP s
 | `mcp start` | Start the Keystone MCP server |
 | `mcp login <server>` | Login to a remote MCP server |
 | `scheduler` | Run the durable timer scheduler to resume sleep timers |
+| `event <name> [data]` | Trigger an event to resume `wait` steps (data can be JSON) |
 | `timers list` | List durable timers |
 | `timers clear` | Clear durable timers by run ID or `--all` |
 | `dedup list [run_id]` | List idempotency records (optionally filter by run) |
@@ -912,6 +1144,14 @@ In these examples, the agent will have access to all tools provided by the MCP s
 ---
+### Watch Mode
+Use `keystone watch` to re-run a workflow when the workflow file or its input files change:
+```bash
+keystone watch workflow.yaml
+```
 ### Compile
 `keystone compile -o ./keystone-app` emits the executable plus a `keystone-runtime/` directory next to it.
 Ship both together if you use memory/embeddings (the runtime folder includes native deps like ONNX Runtime,
@@ -959,30 +1199,35 @@ Request steps enforce SSRF protections and require HTTPS by default. Cross-origi
 ```mermaid
 graph TD
     CLI[CLI Entry Point] --> WR[WorkflowRunner]
-    CLI --> MCP[MCP Server]
-    WR --> SE[Step Executor]
-    WR --> FE[ForeachExecutor]
-    WR --> DB[(WorkflowDb)]
-    SE --> LLM[LLM Executor]
-    SE --> Shell[Shell Executor]
-    SE --> File[File Operations]
-    SE --> HTTP[HTTP Requests]
-    SE --> Human[Human Input]
-    SE --> Engine[Engine Executor]
-    SE --> Script[Script Step]
-    SE --> Sleep[Sleep Step]
-    SE --> Memory[Memory operations]
-    SE --> Workflow[Sub-workflows]
+    CLI --> MCPServer[MCP Server]
+    subgraph "Core Orchestration"
+        WR --> Scheduler[WorkflowScheduler]
+        WR --> State[WorkflowState]
+        WR --> Pool[Resource Pool Manager]
+        WR --> Eval[Expression Evaluator]
+    end
+    WR --> EX[Step Executor]
+    WR --> FE[Foreach Executor]
+    WR --> Workflow[Sub-workflows]
+    State --> DB[(WorkflowDb)]
+    Scheduler --> Parser[WorkflowParser]
+    EX --> LLM[LLM Executor]
+    EX --> Shell[Shell Executor]
+    EX --> File[File Operations]
+    EX --> HTTP[HTTP Requests]
+    EX --> Human[Human Input]
+    EX --> Engine[Engine Executor]
+    EX --> Script[Script Step]
+    EX --> Sleep[Sleep Step]
+    EX --> Memory[Memory operations]
     LLM --> Adapters[LLM Adapters]
-    Adapters --> OpenAI
-    Adapters --> Anthropic
-    Adapters --> Gemini
-    Adapters --> Copilot
-    Adapters --> ChatGPT
-    Adapters --> Local
+    Adapters --> Providers[OpenAI, Anthropic, Gemini, Copilot, etc.]
     LLM --> MCPClient[MCP Client]
-    WR --> Eval[Expression Evaluator]
-    WR --> Pool[Resource Pool Manager]
 ```
 ## 📂 Project Structure
@@ -996,7 +1241,6 @@ graph TD
 - `src/ui/`: Ink-powered TUI dashboard.
 - `src/utils/`: Shared utilities (auth, redaction, config loading).
 - `src/types/`: Core type definitions.
-- `src/e2e-tests/`: End-to-end test suite.
 - `.keystone/workflows/`: Your YAML workflow definitions.
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "keystone-cli",
-  "version": "1.0.3",
+  "version": "1.1.1",
   "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
   "type": "module",
   "bin": {
@@ -11,7 +11,8 @@
     "test": "bun test",
     "lint": "biome check .",
     "lint:fix": "biome check --write .",
-    "format": "biome format --write ."
+    "format": "biome format --write .",
+    "schema:generate": "bun run src/scripts/generate-schemas.ts"
   },
   "keywords": ["workflow", "orchestrator", "agentic", "automation", "bun"],
   "author": "Mark Hingston",
@@ -23,6 +24,8 @@
   "homepage": "https://github.com/mhingston/keystone-cli#readme",
   "files": ["src", "README.md", "LICENSE", "logo.png"],
   "dependencies": {
+    "@ast-grep/cli": "^0.40.3",
+    "@ast-grep/napi": "^0.40.3",
     "@jsep-plugin/arrow": "^1.0.6",
     "@jsep-plugin/object": "^1.2.2",
     "@types/react": "^19.0.0",
@@ -30,15 +33,16 @@
     "ajv": "^8.12.0",
     "commander": "^12.1.0",
     "dagre": "^0.8.5",
+    "glob": "^10.4.5",
     "ink": "^6.5.1",
     "ink-select-input": "3.1.2",
     "ink-spinner": "^5.0.0",
     "js-yaml": "^4.1.0",
     "jsep": "^1.4.0",
-    "glob": "^10.4.5",
     "react": "^19.0.0",
     "sqlite-vec": "0.1.6",
-    "zod": "^3.23.8"
+    "zod": "^3.23.8",
+    "zod-to-json-schema": "^3.25.1"
   },
   "optionalDependencies": {
     "re2": "^1.21.4"