keystone-cli 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +276 -32
- package/package.json +8 -4
- package/src/cli.ts +350 -416
- package/src/commands/doc.ts +31 -0
- package/src/commands/event.ts +29 -0
- package/src/commands/graph.ts +37 -0
- package/src/commands/index.ts +14 -0
- package/src/commands/init.ts +185 -0
- package/src/commands/run.ts +124 -0
- package/src/commands/schema.ts +40 -0
- package/src/commands/utils.ts +78 -0
- package/src/commands/validate.ts +111 -0
- package/src/db/workflow-db.test.ts +314 -0
- package/src/db/workflow-db.ts +810 -210
- package/src/expression/evaluator-audit.test.ts +4 -2
- package/src/expression/evaluator.test.ts +14 -1
- package/src/expression/evaluator.ts +166 -19
- package/src/parser/config-schema.ts +18 -0
- package/src/parser/schema.ts +153 -22
- package/src/parser/test-schema.ts +6 -6
- package/src/parser/workflow-parser.test.ts +24 -0
- package/src/parser/workflow-parser.ts +65 -3
- package/src/runner/auto-heal.test.ts +5 -6
- package/src/runner/blueprint-executor.test.ts +2 -2
- package/src/runner/debug-repl.test.ts +5 -8
- package/src/runner/debug-repl.ts +59 -16
- package/src/runner/durable-timers.test.ts +11 -2
- package/src/runner/engine-executor.test.ts +1 -1
- package/src/runner/events.ts +57 -0
- package/src/runner/executors/artifact-executor.ts +166 -0
- package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
- package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
- package/src/runner/executors/file-executor.test.ts +48 -0
- package/src/runner/executors/file-executor.ts +324 -0
- package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
- package/src/runner/executors/human-executor.ts +144 -0
- package/src/runner/executors/join-executor.ts +75 -0
- package/src/runner/executors/llm-executor.ts +1266 -0
- package/src/runner/executors/memory-executor.ts +71 -0
- package/src/runner/executors/plan-executor.ts +104 -0
- package/src/runner/executors/request-executor.ts +265 -0
- package/src/runner/executors/script-executor.ts +43 -0
- package/src/runner/executors/shell-executor.ts +403 -0
- package/src/runner/executors/subworkflow-executor.ts +114 -0
- package/src/runner/executors/types.ts +69 -0
- package/src/runner/executors/wait-executor.ts +59 -0
- package/src/runner/join-scheduling.test.ts +197 -0
- package/src/runner/llm-adapter-runtime.test.ts +209 -0
- package/src/runner/llm-adapter.test.ts +419 -24
- package/src/runner/llm-adapter.ts +130 -26
- package/src/runner/llm-clarification.test.ts +2 -1
- package/src/runner/llm-executor.test.ts +532 -17
- package/src/runner/mcp-client-audit.test.ts +1 -2
- package/src/runner/mcp-client.ts +136 -46
- package/src/runner/mcp-manager.test.ts +4 -0
- package/src/runner/mcp-server.test.ts +58 -0
- package/src/runner/mcp-server.ts +26 -0
- package/src/runner/memoization.test.ts +190 -0
- package/src/runner/optimization-runner.ts +4 -9
- package/src/runner/quality-gate.test.ts +69 -0
- package/src/runner/reflexion.test.ts +6 -17
- package/src/runner/resource-pool.ts +102 -14
- package/src/runner/services/context-builder.ts +144 -0
- package/src/runner/services/secret-manager.ts +105 -0
- package/src/runner/services/workflow-validator.ts +131 -0
- package/src/runner/shell-executor.test.ts +28 -4
- package/src/runner/standard-tools-ast.test.ts +196 -0
- package/src/runner/standard-tools-execution.test.ts +27 -0
- package/src/runner/standard-tools-integration.test.ts +6 -10
- package/src/runner/standard-tools.ts +339 -102
- package/src/runner/step-executor.test.ts +216 -4
- package/src/runner/step-executor.ts +69 -941
- package/src/runner/stream-utils.ts +7 -3
- package/src/runner/test-harness.ts +20 -1
- package/src/runner/timeout.test.ts +10 -0
- package/src/runner/timeout.ts +11 -2
- package/src/runner/tool-integration.test.ts +1 -1
- package/src/runner/wait-step.test.ts +102 -0
- package/src/runner/workflow-runner.test.ts +208 -15
- package/src/runner/workflow-runner.ts +890 -818
- package/src/runner/workflow-scheduler.ts +75 -0
- package/src/runner/workflow-state.ts +269 -0
- package/src/runner/workflow-subflows.test.ts +13 -12
- package/src/scripts/generate-schemas.ts +16 -0
- package/src/templates/agents/explore.md +1 -0
- package/src/templates/agents/general.md +1 -0
- package/src/templates/agents/handoff-router.md +14 -0
- package/src/templates/agents/handoff-specialist.md +15 -0
- package/src/templates/agents/keystone-architect.md +13 -44
- package/src/templates/agents/my-agent.md +1 -0
- package/src/templates/agents/software-engineer.md +1 -0
- package/src/templates/agents/summarizer.md +1 -0
- package/src/templates/agents/test-agent.md +1 -0
- package/src/templates/agents/tester.md +1 -0
- package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
- package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +2 -1
- package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
- package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
- package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
- package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
- package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
- package/src/templates/control-flow/idempotency-example.yaml +30 -0
- package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
- package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
- package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
- package/src/templates/features/artifact-example.yaml +39 -0
- package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
- package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
- package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
- package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
- package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
- package/src/templates/features/script-example.yaml +27 -0
- package/src/templates/patterns/agent-handoff.yaml +53 -0
- package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
- package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
- package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +1 -0
- package/src/templates/{composition-parent.yaml → patterns/composition-parent.yaml} +1 -0
- package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
- package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
- package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
- package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
- package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
- package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
- package/src/templates/scaffolding/review-loop.yaml +97 -0
- package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
- package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
- package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
- package/src/templates/testing/invalid.yaml +6 -0
- package/src/ui/dashboard.tsx +191 -33
- package/src/utils/auth-manager.test.ts +337 -0
- package/src/utils/auth-manager.ts +157 -61
- package/src/utils/blueprint-utils.ts +4 -6
- package/src/utils/config-loader.test.ts +2 -0
- package/src/utils/config-loader.ts +12 -3
- package/src/utils/constants.ts +76 -0
- package/src/utils/container.ts +63 -0
- package/src/utils/context-injector.test.ts +200 -0
- package/src/utils/context-injector.ts +244 -0
- package/src/utils/doc-generator.ts +85 -0
- package/src/utils/env-filter.ts +45 -0
- package/src/utils/json-parser.test.ts +12 -0
- package/src/utils/json-parser.ts +30 -5
- package/src/utils/logger.ts +12 -1
- package/src/utils/mermaid.ts +4 -0
- package/src/utils/paths.ts +52 -1
- package/src/utils/process-sandbox-worker.test.ts +46 -0
- package/src/utils/process-sandbox.ts +227 -14
- package/src/utils/redactor.test.ts +11 -6
- package/src/utils/redactor.ts +25 -9
- package/src/utils/sandbox.ts +3 -0
- package/src/runner/llm-executor.ts +0 -638
- package/src/runner/shell-executor.ts +0 -366
- package/src/templates/invalid.yaml +0 -5
package/README.md
CHANGED
|
@@ -47,6 +47,7 @@ Keystone allows you to define complex automation workflows using a simple YAML s
|
|
|
47
47
|
- 🛡️ **Secret Redaction:** Automatically redacts environment variables and secrets from logs and outputs.
|
|
48
48
|
- 🧠 **Semantic Memory:** Store/search text with vector embeddings (and auto-index via `learn`).
|
|
49
49
|
- 🎯 **Prompt Optimization:** Iteratively optimize prompts via `keystone optimize` + workflow `eval`.
|
|
50
|
+
- 📖 **Documentation Generator:** Automatically generate Markdown documentation from your workflow definitions.
|
|
50
51
|
|
|
51
52
|
---
|
|
52
53
|
|
|
@@ -137,6 +138,10 @@ Top-level workflows:
|
|
|
137
138
|
- `scaffold-feature`: Interactive workflow scaffolder. Prompts for requirements, plans files, generates content, and writes them.
|
|
138
139
|
- `decompose-problem`: Decomposes a problem into research/implementation/review tasks, waits for approval, runs sub-workflows, and summarizes.
|
|
139
140
|
- `dev`: Self-bootstrapping DevMode workflow for an interactive plan/implement/verify loop.
|
|
141
|
+
- `agent-handoff`: Demonstrates agent handoffs and tool-driven context updates.
|
|
142
|
+
- `script-example`: Demonstrates sandboxed JavaScript execution.
|
|
143
|
+
- `artifact-example`: Demonstrates artifact upload and download between steps.
|
|
144
|
+
- `idempotency-example`: Demonstrates safe retries for side-effecting steps.
|
|
140
145
|
|
|
141
146
|
Sub-workflows:
|
|
142
147
|
- `scaffold-plan`: Generates a file plan from `requirements` input.
|
|
@@ -144,11 +149,13 @@ Sub-workflows:
|
|
|
144
149
|
- `decompose-research`: Runs a single research task (`task`) with optional `context`/`constraints`.
|
|
145
150
|
- `decompose-implement`: Runs a single implementation task (`task`) with optional `research` findings.
|
|
146
151
|
- `decompose-review`: Reviews a single implementation task (`task`) with optional `implementation` results.
|
|
152
|
+
- `review-loop`: Reusable generate → critique → refine loop with a quality gate.
|
|
147
153
|
|
|
148
154
|
Example runs:
|
|
149
155
|
```bash
|
|
150
156
|
keystone run scaffold-feature
|
|
151
157
|
keystone run decompose-problem -i problem="Add caching to the API" -i context="Node/Bun service"
|
|
158
|
+
keystone run agent-handoff -i topic="billing" -i user="Ada"
|
|
152
159
|
```
|
|
153
160
|
|
|
154
161
|
Sub-workflows are used by the top-level workflows, but can be run directly if you want just one phase.
|
|
@@ -164,7 +171,7 @@ Search order (highest precedence first):
|
|
|
164
171
|
- `.keystone/config.yaml` or `.keystone/config.yml`
|
|
165
172
|
- `$XDG_CONFIG_HOME/keystone/config.yaml` or `~/.config/keystone/config.yaml` (and `.yml`)
|
|
166
173
|
|
|
167
|
-
|
|
174
|
+
State is stored at `.keystone/state.db` by default (project-local).
|
|
168
175
|
|
|
169
176
|
```yaml
|
|
170
177
|
default_provider: openai
|
|
@@ -228,10 +235,36 @@ engines:
|
|
|
228
235
|
storage:
|
|
229
236
|
retention_days: 30
|
|
230
237
|
redact_secrets_at_rest: true
|
|
238
|
+
|
|
239
|
+
expression:
|
|
240
|
+
strict: false
|
|
231
241
|
```
|
|
232
242
|
|
|
233
243
|
`storage.retention_days` sets the default window used by `keystone maintenance` / `keystone prune`. `storage.redact_secrets_at_rest` controls whether secret inputs and known secrets are redacted before storing run data (default `true`).
|
|
234
244
|
|
|
245
|
+
### Context Injection (Opt-in)
|
|
246
|
+
|
|
247
|
+
Keystone can automatically inject project context files (`README.md`, `AGENTS.md`, `.cursor/rules`, `.claude/rules`) into LLM system prompts. This helps agents understand your project's conventions and guidelines.
|
|
248
|
+
|
|
249
|
+
```yaml
|
|
250
|
+
features:
|
|
251
|
+
context_injection:
|
|
252
|
+
enabled: true # Opt-in feature (default: false)
|
|
253
|
+
search_depth: 3 # How many directories up to search (default: 3)
|
|
254
|
+
sources: # Which context sources to include
|
|
255
|
+
- readme # README.md files
|
|
256
|
+
- agents_md # AGENTS.md files
|
|
257
|
+
- cursor_rules # .cursor/rules or .claude/rules
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
When enabled, Keystone will:
|
|
261
|
+
1. Search from the workflow directory up to the project root
|
|
262
|
+
2. Find the nearest `README.md` and `AGENTS.md` files
|
|
263
|
+
3. Parse rules from `.cursor/rules` or `.claude/rules` directories
|
|
264
|
+
4. Prepend this context to the LLM system prompt
|
|
265
|
+
|
|
266
|
+
Context is cached for 1 minute to avoid redundant file reads.
|
|
267
|
+
|
|
235
268
|
### Model & Provider Resolution
|
|
236
269
|
|
|
237
270
|
Keystone resolves which provider to use for a model in the following order:
|
|
@@ -390,14 +423,21 @@ Keystone uses `${{ }}` syntax for dynamic values. Expressions are evaluated usin
|
|
|
390
423
|
- `${{ steps.id.status }}`: Get the execution status of a step (`'success'`, `'failed'`, etc.).
|
|
391
424
|
- `${{ item }}`: Access the current item in a `foreach` loop.
|
|
392
425
|
- `${{ args.name }}`: Access tool arguments (available ONLY inside agent tool execution steps).
|
|
393
|
-
- `${{ secrets.NAME }}`: Access redacted
|
|
426
|
+
- `${{ secrets.NAME }}`: Access secret values (redacted in logs and at rest).
|
|
394
427
|
- `${{ env.NAME }}`: Access environment variables (process env merged with workflow-level `env`).
|
|
395
428
|
Workflow-level `env` is evaluated per step; if an expression cannot be resolved yet, the variable is skipped with a warning.
|
|
429
|
+
- `${{ memory.key }}`: Access mutable workflow memory (populated by tools via `__keystone_context`).
|
|
396
430
|
|
|
397
431
|
Inputs support `values` for enums and `secret: true` for sensitive values (redacted in logs and at rest by default; resumptions may require re-entry).
|
|
398
432
|
|
|
399
433
|
Standard JavaScript-like expressions are supported: `${{ steps.build.status == 'success' ? '🚀' : '❌' }}`.
|
|
400
434
|
|
|
435
|
+
Strict expression mode can be enabled in `.keystone/config.yaml` to fail fast on malformed `${{ }}`:
|
|
436
|
+
```yaml
|
|
437
|
+
expression:
|
|
438
|
+
strict: true
|
|
439
|
+
```
|
|
440
|
+
|
|
401
441
|
---
|
|
402
442
|
|
|
403
443
|
## 🏗️ Step Types
|
|
@@ -409,15 +449,30 @@ Keystone supports several specialized step types:
|
|
|
409
449
|
- `shell`: Run arbitrary shell commands.
|
|
410
450
|
- `llm`: Prompt an agent and get structured or unstructured responses. Supports `outputSchema` (JSON Schema) for structured output.
|
|
411
451
|
- `allowClarification`: Boolean (default `false`). If `true`, allows the LLM to ask clarifying questions back to the user or suspend the workflow if no human is available.
|
|
452
|
+
- `allowedHandoffs`: Optional list of agent names that can be transferred to via `transfer_to_agent`.
|
|
412
453
|
- `maxIterations`: Number (default `10`). Maximum number of tool-calling loops allowed for the agent.
|
|
454
|
+
- `maxMessageHistory`: Number (default `50`). Max messages to retain in history before truncation/summary.
|
|
455
|
+
- `contextStrategy`: `'truncate'|'summary'|'auto'` (default `truncate`). Summarizes older history into a system message when limits are exceeded.
|
|
456
|
+
- `qualityGate`: Optional reviewer config `{ agent, prompt?, provider?, model?, maxAttempts? }`. If review fails, the step is refined and re-run.
|
|
413
457
|
- `allowInsecure`: Boolean (default `false`). Set `true` to allow risky tool execution.
|
|
414
458
|
- `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow tools to access files outside of the current working directory.
|
|
415
459
|
- `handoff`: Optional engine tool definition that lets the LLM delegate work to an allowlisted external CLI with structured inputs.
|
|
460
|
+
- `plan`: Create a dynamic task list for orchestration.
|
|
461
|
+
- `goal`: Required planning goal (string).
|
|
462
|
+
- `context` / `constraints`: Optional strings to guide the plan.
|
|
463
|
+
- `prompt`: Optional override of the planning prompt.
|
|
464
|
+
- Plan steps accept the same LLM options as `llm`, including tools, handoffs, and `allowedHandoffs`.
|
|
416
465
|
- `request`: Make HTTP requests (GET, POST, etc.).
|
|
417
466
|
- `allowInsecure`: Boolean (default `false`). If `true`, skips SSRF protections and allows non-HTTPS/local URLs.
|
|
418
467
|
- Cross-origin redirects are blocked for non-GET/HEAD requests unless `allowInsecure: true`; on cross-origin redirects, non-essential headers are stripped.
|
|
419
|
-
- `file`: Read, write, or
|
|
468
|
+
- `file`: Read, write, append, or patch files.
|
|
420
469
|
- `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow reading/writing files outside of the current working directory.
|
|
470
|
+
- `op: patch`: Apply a unified diff or search/replace blocks via `content`.
|
|
471
|
+
- Search/replace blocks use `<<<<<<< SEARCH`, `=======`, `>>>>>>> REPLACE` and must match exactly once.
|
|
472
|
+
- `artifact`: Upload or download files as named artifacts.
|
|
473
|
+
- `op: upload`: Requires `name` and `paths` (glob patterns).
|
|
474
|
+
- `op: download`: Requires `name` and `path` (destination directory).
|
|
475
|
+
- `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow paths outside of the current working directory.
|
|
421
476
|
- `human`: Pause execution for manual confirmation or text input.
|
|
422
477
|
- `inputType: confirm`: Simple Enter-to-continue prompt.
|
|
423
478
|
- `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
|
|
@@ -429,13 +484,36 @@ Keystone supports several specialized step types:
|
|
|
429
484
|
status: state
|
|
430
485
|
```
|
|
431
486
|
- `join`: Aggregate outputs from dependencies and enforce a completion condition.
|
|
432
|
-
- `target`: `'steps'` (default) or `'branches'` (for foreach).
|
|
433
487
|
- `condition`: `'all'` (default), `'any'`, or a number.
|
|
488
|
+
- `target`: Reserved for future use; currently ignored.
|
|
434
489
|
- `blueprint`: Generate a structured system blueprint with an agent (persisted as an artifact).
|
|
435
490
|
- `script`: Run JavaScript in a sandboxed subprocess. Requires `allowInsecure: true`.
|
|
436
|
-
- `sleep`: Pause execution for a specified duration.
|
|
491
|
+
- `sleep`: Pause execution for a specified duration or until a timestamp.
|
|
492
|
+
- `duration`: Milliseconds (number or expression).
|
|
493
|
+
- `until`: Date/time string (evaluated), parsed by `Date`.
|
|
437
494
|
- `durable`: Boolean (default `false`). If `true` and duration >= 60s, the wait is persisted and can resume after restarts.
|
|
495
|
+
- `wait`: Pause execution until an event is triggered.
|
|
496
|
+
- `event`: Event name (string or expression).
|
|
497
|
+
- `oneShot`: Boolean (default `true`). If `true`, consumes the event after it fires.
|
|
438
498
|
- `memory`: Store or retrieve information from the semantic memory vector database.
|
|
499
|
+
- `op: store`: Store text with metadata.
|
|
500
|
+
- `op: search`: Search for similar text using vector embeddings.
|
|
501
|
+
- `text` / `query`: The content to store or search for.
|
|
502
|
+
- `metadata`: Optional object for filtering or additional context.
|
|
503
|
+
- `limit`: Number of results to return (default `5`).
|
|
504
|
+
```yaml
|
|
505
|
+
- id: remember_preference
|
|
506
|
+
type: memory
|
|
507
|
+
op: store
|
|
508
|
+
text: "User prefers dark mode"
|
|
509
|
+
metadata: { user: "alice" }
|
|
510
|
+
|
|
511
|
+
- id: recall_preference
|
|
512
|
+
type: memory
|
|
513
|
+
op: search
|
|
514
|
+
query: "What is the user's preference?"
|
|
515
|
+
limit: 1
|
|
516
|
+
```
|
|
439
517
|
- `engine`: Run an allowlisted external CLI and capture a structured summary.
|
|
440
518
|
- `env` and `cwd` are required and must be explicit.
|
|
441
519
|
- `input` is sent to stdin (objects/arrays are JSON-encoded).
|
|
@@ -471,15 +549,18 @@ All steps support common features:
|
|
|
471
549
|
- `needs`: Array of step IDs this step depends on.
|
|
472
550
|
- `if`: Conditional expression.
|
|
473
551
|
- `retry`: `{ count, backoff: 'linear'|'exponential', baseDelay }`.
|
|
474
|
-
- `timeout`: Maximum execution time in milliseconds.
|
|
552
|
+
- `timeout`: Maximum execution time in milliseconds (best-effort; supported steps receive an abort signal).
|
|
475
553
|
- `foreach`: Iterate over an array in parallel.
|
|
476
554
|
- `concurrency`: Limit parallel items for `foreach` (must be a positive integer).
|
|
555
|
+
- `strategy.matrix`: Experimental parser-time expansion into `foreach` (prefer explicit `foreach` for now).
|
|
477
556
|
- `pool`: Assign step to a resource pool.
|
|
557
|
+
- `breakpoint`: Pause before executing the step when running with `--debug`.
|
|
478
558
|
- `compensate`: Step to run if the workflow rolls back.
|
|
479
559
|
- `transform`: Post-process output using expressions.
|
|
480
560
|
- `learn`: Auto-index for few-shot.
|
|
481
561
|
- `reflexion`: Self-correction loop.
|
|
482
562
|
- `auto_heal`: LLM-powered automatic error recovery.
|
|
563
|
+
- `memoize`: Cache step outputs across runs (`memoizeTtlSeconds` controls expiry).
|
|
483
564
|
- `inputSchema` / `outputSchema`: JSON Schema validation.
|
|
484
565
|
- `outputRetries`: Max retries for output validation failures.
|
|
485
566
|
- `repairStrategy`: Strategy for output repair (`reask`, `repair`, `hybrid`).
|
|
@@ -547,6 +628,39 @@ Use `handoff` to expose an engine tool to the LLM with structured inputs:
|
|
|
547
628
|
required: [summary]
|
|
548
629
|
```
|
|
549
630
|
|
|
631
|
+
### Agent Handoffs (Swarm-Style)
|
|
632
|
+
Allow the LLM to switch to a specialist agent mid-step by defining `allowedHandoffs`. This injects a standard tool `transfer_to_agent({ agent_name })` and swaps the system prompt + tool set while preserving conversation history.
|
|
633
|
+
|
|
634
|
+
```yaml
|
|
635
|
+
- id: route
|
|
636
|
+
type: llm
|
|
637
|
+
agent: handoff-router
|
|
638
|
+
prompt: "Route the task, then answer."
|
|
639
|
+
allowedHandoffs: [handoff-specialist]
|
|
640
|
+
```
|
|
641
|
+
|
|
642
|
+
Agent prompts can use `${{ }}` expressions (evaluated against the workflow context) for dynamic system prompts.
|
|
643
|
+
|
|
644
|
+
```markdown
|
|
645
|
+
---
|
|
646
|
+
name: handoff-specialist
|
|
647
|
+
---
|
|
648
|
+
You are the specialist for ${{ inputs.topic }}.
|
|
649
|
+
```
|
|
650
|
+
|
|
651
|
+
### Tool-Driven Context Updates
|
|
652
|
+
Tools can return `__keystone_context` to update workflow memory/env immediately. These values become available to subsequent tool calls and steps via `${{ memory.* }}` and `${{ env.* }}`.
|
|
653
|
+
|
|
654
|
+
```json
|
|
655
|
+
{
|
|
656
|
+
"__keystone_context": {
|
|
657
|
+
"memory": { "user": "Ada" },
|
|
658
|
+
"env": { "CURRENT_TOPIC": "billing" }
|
|
659
|
+
},
|
|
660
|
+
"stored": true
|
|
661
|
+
}
|
|
662
|
+
```
|
|
663
|
+
|
|
550
664
|
### Self-Healing Steps
|
|
551
665
|
Steps can be configured to automatically recover from failures using an LLM agent.
|
|
552
666
|
|
|
@@ -566,6 +680,8 @@ When a step fails, the specified agent is invoked with the error details. The ag
|
|
|
566
680
|
```yaml
|
|
567
681
|
- id: list_files
|
|
568
682
|
type: shell
|
|
683
|
+
# Globbing (*) requires allowInsecure: true
|
|
684
|
+
allowInsecure: true
|
|
569
685
|
run: ls *.txt
|
|
570
686
|
# Post-process stdout into an array of filenames
|
|
571
687
|
transform: ${{ stdout.trim().split('\n') }}
|
|
@@ -577,6 +693,24 @@ When a step fails, the specified agent is invoked with the error details. The ag
|
|
|
577
693
|
run: echo "Processing ${{ item }}"
|
|
578
694
|
```
|
|
579
695
|
|
|
696
|
+
#### Example: Matrix Strategy (manual foreach)
|
|
697
|
+
Until `strategy.matrix` is wired end-to-end, use explicit `foreach` with an array expression:
|
|
698
|
+
|
|
699
|
+
```yaml
|
|
700
|
+
- id: test_matrix
|
|
701
|
+
type: shell
|
|
702
|
+
foreach: ${{ [
|
|
703
|
+
{ node: 18, os: "ubuntu" },
|
|
704
|
+
{ node: 18, os: "macos" },
|
|
705
|
+
{ node: 20, os: "ubuntu" },
|
|
706
|
+
{ node: 20, os: "macos" },
|
|
707
|
+
{ node: 22, os: "ubuntu" },
|
|
708
|
+
{ node: 22, os: "macos" }
|
|
709
|
+
] }}
|
|
710
|
+
allowInsecure: true # Required for '=' in arguments
|
|
711
|
+
run: echo "node=${{ item.node }} os=${{ item.os }}"
|
|
712
|
+
```
|
|
713
|
+
|
|
580
714
|
#### Example: Script Step
|
|
581
715
|
```yaml
|
|
582
716
|
- id: calculate
|
|
@@ -637,6 +771,51 @@ Enable fail-forward steps that continue workflow execution even when they fail.
|
|
|
637
771
|
|
|
638
772
|
The step's `status` will be `'success'` even when it fails internally, but the `error` field will contain the failure details.
|
|
639
773
|
|
|
774
|
+
### Breakpoints
|
|
775
|
+
|
|
776
|
+
Pause before executing a step when running with `--debug`. In non-TTY environments, the workflow is paused until resumed in a TTY.
|
|
777
|
+
|
|
778
|
+
```yaml
|
|
779
|
+
- id: inspect_context
|
|
780
|
+
type: shell
|
|
781
|
+
breakpoint: true
|
|
782
|
+
run: echo "Inspecting before execution"
|
|
783
|
+
```
|
|
784
|
+
|
|
785
|
+
### Artifacts
|
|
786
|
+
|
|
787
|
+
Upload and download files between steps without hardcoded artifact paths.
|
|
788
|
+
|
|
789
|
+
```yaml
|
|
790
|
+
- id: build
|
|
791
|
+
type: shell
|
|
792
|
+
run: bun build
|
|
793
|
+
|
|
794
|
+
- id: upload_build
|
|
795
|
+
type: artifact
|
|
796
|
+
op: upload
|
|
797
|
+
name: build
|
|
798
|
+
paths: ["dist/**"]
|
|
799
|
+
|
|
800
|
+
- id: download_build
|
|
801
|
+
type: artifact
|
|
802
|
+
op: download
|
|
803
|
+
name: build
|
|
804
|
+
path: ./tmp/build
|
|
805
|
+
```
|
|
806
|
+
|
|
807
|
+
Upload outputs include `artifactPath` and `files` for downstream references.
|
|
808
|
+
|
|
809
|
+
### Structured Events
|
|
810
|
+
|
|
811
|
+
Emit NDJSON events for step and workflow lifecycle updates:
|
|
812
|
+
|
|
813
|
+
```bash
|
|
814
|
+
keystone run workflow.yaml --events
|
|
815
|
+
```
|
|
816
|
+
|
|
817
|
+
Events include `workflow.start`, `step.start`, `step.end`, and `workflow.complete`.
|
|
818
|
+
|
|
640
819
|
### Global Errors Block
|
|
641
820
|
|
|
642
821
|
Define workflow-level error handling that runs when a step exhausts retries. Access failure context via `last_failed_step`.
|
|
@@ -699,6 +878,27 @@ steps:
|
|
|
699
878
|
pool: api_pool
|
|
700
879
|
```
|
|
701
880
|
|
|
881
|
+
### Automated Testing
|
|
882
|
+
|
|
883
|
+
Run workflow tests with fixtures and snapshots. Keystone includes a **Safe Mode** that blocks side-effecting steps (shell, request, file writes) by default during tests unless explicitly allowed or mocked.
|
|
884
|
+
|
|
885
|
+
```yaml
|
|
886
|
+
name: my-test
|
|
887
|
+
workflow: my-workflow
|
|
888
|
+
options:
|
|
889
|
+
allowSideEffects: false # Default
|
|
890
|
+
fixture:
|
|
891
|
+
inputs: { name: "test" }
|
|
892
|
+
mocks:
|
|
893
|
+
- step: write_file
|
|
894
|
+
response: { success: true }
|
|
895
|
+
```
|
|
896
|
+
|
|
897
|
+
Run tests via CLI:
|
|
898
|
+
```bash
|
|
899
|
+
keystone test .keystone/tests/
|
|
900
|
+
```
|
|
901
|
+
|
|
702
902
|
### Compensations (Rollback)
|
|
703
903
|
|
|
704
904
|
Define "undo" actions for steps that have side effects. Compensations run in reverse order (LIFO) if a workflow fails or is cancelled.
|
|
@@ -744,10 +944,14 @@ Keystone comes with a set of **Standard Tools** that can be enabled for any agen
|
|
|
744
944
|
- `read_file`: Read the contents of a file (arguments: `path`)
|
|
745
945
|
- `read_file_lines`: Read a specific range of lines from a file (arguments: `path`, `start`, `count`)
|
|
746
946
|
- `write_file`: Write or overwrite a file (arguments: `path`, `content`)
|
|
947
|
+
- `append_file`: Append content to a file, creating it if it doesn't exist (arguments: `path`, `content`)
|
|
747
948
|
- `list_files`: List files in a directory (arguments: `path`)
|
|
748
949
|
- `search_files`: Search for files by glob pattern (arguments: `pattern`, `dir`)
|
|
749
950
|
- `search_content`: Search for string or regex within files (arguments: `query`, `dir`, `pattern`)
|
|
750
951
|
- `run_command`: Run a shell command (arguments: `command`, `dir`). Risky commands require `allowInsecure: true` on the LLM step.
|
|
952
|
+
- `ast_grep_search`: Search for structural code patterns using AST matching (arguments: `pattern`, `language`, `paths`). More precise than regex for code refactoring.
|
|
953
|
+
- `ast_grep_replace`: Replace structural code patterns using AST-aware rewriting (arguments: `pattern`, `rewrite`, `language`, `paths`). Safer than regex for code refactoring.
|
|
954
|
+
- `fetch`: Fetch content from a URL via GET request (arguments: `url`).
|
|
751
955
|
|
|
752
956
|
#### Standard Tool Examples
|
|
753
957
|
|
|
@@ -776,6 +980,25 @@ Agents can use these tools to interact with their environment. Here is how they
|
|
|
776
980
|
dir: "."
|
|
777
981
|
```
|
|
778
982
|
|
|
983
|
+
**AST-Grep Search (find all console.log calls):**
|
|
984
|
+
```yaml
|
|
985
|
+
- name: ast_grep_search
|
|
986
|
+
arguments:
|
|
987
|
+
pattern: "console.log($A)"
|
|
988
|
+
language: "typescript"
|
|
989
|
+
paths: ["src/"]
|
|
990
|
+
```
|
|
991
|
+
|
|
992
|
+
**AST-Grep Replace (refactor console.log to logger.info):**
|
|
993
|
+
```yaml
|
|
994
|
+
- name: ast_grep_replace
|
|
995
|
+
arguments:
|
|
996
|
+
pattern: "console.log($A)"
|
|
997
|
+
rewrite: "logger.info($A)"
|
|
998
|
+
language: "typescript"
|
|
999
|
+
paths: ["src/"]
|
|
1000
|
+
```
|
|
1001
|
+
|
|
779
1002
|
Tool arguments are passed to the tool's execution step via the `args` variable.
|
|
780
1003
|
|
|
781
1004
|
**`.keystone/workflows/agents/developer.md`**
|
|
@@ -823,11 +1046,14 @@ The MCP server provides two modes for running workflows:
|
|
|
823
1046
|
```
|
|
824
1047
|
1. Agent calls start_workflow → { run_id: "abc", status: "running" }
|
|
825
1048
|
2. Agent polls get_run_status → { status: "running" }
|
|
826
|
-
3. Agent polls get_run_status → { status: "
|
|
1049
|
+
3. Agent polls get_run_status → { status: "success", outputs: {...} }
|
|
827
1050
|
```
|
|
828
1051
|
|
|
829
1052
|
The async pattern is ideal for LLM-heavy workflows that may take minutes to complete.
|
|
830
1053
|
|
|
1054
|
+
When an async run pauses for a human step, the MCP server emits a notification:
|
|
1055
|
+
`notifications/keystone.human_input` with the run ID, step ID, input type, and instructions.
|
|
1056
|
+
|
|
831
1057
|
#### Global MCP Servers
|
|
832
1058
|
Define shared MCP servers in `.keystone/config.yaml` to reuse them across different workflows. Keystone ensures that multiple steps using the same global server will share a single running process.
|
|
833
1059
|
|
|
@@ -881,13 +1107,18 @@ In these examples, the agent will have access to all tools provided by the MCP s
|
|
|
881
1107
|
| Command | Description |
|
|
882
1108
|
| :--- | :--- |
|
|
883
1109
|
| `init` | Initialize a new Keystone project |
|
|
884
|
-
| `
|
|
885
|
-
| `
|
|
1110
|
+
| `schema` | Generate JSON Schema for workflow and agent definitions (`-o` for output dir) |
|
|
1111
|
+
| `run <workflow>` | Execute a workflow (use `-i key=val`, `--resume` to auto-resume, `--dry-run`, `--debug`, `--no-dedup`, `--explain`, `--events`) |
|
|
1112
|
+
| `watch <workflow>` | Watch a workflow and re-run on changes (`--debug`, `--events`, `--debounce`) |
|
|
1113
|
+
| `resume <run_id>` | Resume a failed/paused/crashed workflow by ID (use `-i key=val` to answer human steps, `--events` for NDJSON) |
|
|
1114
|
+
| `rerun <workflow>` | Rerun a workflow from a specific step (use `--from <step_id>` and optional `--run <run_id>`, `--events`) |
|
|
886
1115
|
| `validate [path]` | Check workflow files for errors |
|
|
1116
|
+
| `lint [path]` | Alias for `validate` |
|
|
887
1117
|
| `workflows` | List available workflows |
|
|
888
1118
|
| `history` | Show recent workflow runs |
|
|
889
1119
|
| `logs <run_id>` | View logs, outputs, and errors for a specific run (`-v` for full output) |
|
|
890
1120
|
| `graph <workflow>` | Generate a Mermaid diagram of the workflow |
|
|
1121
|
+
| `doc <workflow>` | Generate Markdown documentation for a workflow |
|
|
891
1122
|
| `test [path]` | Run workflow tests with fixtures and snapshots |
|
|
892
1123
|
| `optimize <workflow>` | Optimize a specific step in a workflow (requires --target and workflow `eval`) |
|
|
893
1124
|
| `compile` | Compile a project into a single executable with embedded assets |
|
|
@@ -901,6 +1132,7 @@ In these examples, the agent will have access to all tools provided by the MCP s
|
|
|
901
1132
|
| `mcp start` | Start the Keystone MCP server |
|
|
902
1133
|
| `mcp login <server>` | Login to a remote MCP server |
|
|
903
1134
|
| `scheduler` | Run the durable timer scheduler to resume sleep timers |
|
|
1135
|
+
| `event <name> [data]` | Trigger an event to resume `wait` steps (data can be JSON) |
|
|
904
1136
|
| `timers list` | List durable timers |
|
|
905
1137
|
| `timers clear` | Clear durable timers by run ID or `--all` |
|
|
906
1138
|
| `dedup list [run_id]` | List idempotency records (optionally filter by run) |
|
|
@@ -912,6 +1144,14 @@ In these examples, the agent will have access to all tools provided by the MCP s
|
|
|
912
1144
|
|
|
913
1145
|
---
|
|
914
1146
|
|
|
1147
|
+
### Watch Mode
|
|
1148
|
+
|
|
1149
|
+
Use `keystone watch` to re-run a workflow when the workflow file or its input files change:
|
|
1150
|
+
|
|
1151
|
+
```bash
|
|
1152
|
+
keystone watch workflow.yaml
|
|
1153
|
+
```
|
|
1154
|
+
|
|
915
1155
|
### Compile
|
|
916
1156
|
`keystone compile -o ./keystone-app` emits the executable plus a `keystone-runtime/` directory next to it.
|
|
917
1157
|
Ship both together if you use memory/embeddings (the runtime folder includes native deps like ONNX Runtime,
|
|
@@ -959,30 +1199,35 @@ Request steps enforce SSRF protections and require HTTPS by default. Cross-origi
|
|
|
959
1199
|
```mermaid
|
|
960
1200
|
graph TD
|
|
961
1201
|
CLI[CLI Entry Point] --> WR[WorkflowRunner]
|
|
962
|
-
CLI -->
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
1202
|
+
CLI --> MCPServer[MCP Server]
|
|
1203
|
+
|
|
1204
|
+
subgraph "Core Orchestration"
|
|
1205
|
+
WR --> Scheduler[WorkflowScheduler]
|
|
1206
|
+
WR --> State[WorkflowState]
|
|
1207
|
+
WR --> Pool[Resource Pool Manager]
|
|
1208
|
+
WR --> Eval[Expression Evaluator]
|
|
1209
|
+
end
|
|
1210
|
+
|
|
1211
|
+
WR --> EX[Step Executor]
|
|
1212
|
+
WR --> FE[Foreach Executor]
|
|
1213
|
+
WR --> Workflow[Sub-workflows]
|
|
1214
|
+
|
|
1215
|
+
State --> DB[(WorkflowDb)]
|
|
1216
|
+
Scheduler --> Parser[WorkflowParser]
|
|
1217
|
+
|
|
1218
|
+
EX --> LLM[LLM Executor]
|
|
1219
|
+
EX --> Shell[Shell Executor]
|
|
1220
|
+
EX --> File[File Operations]
|
|
1221
|
+
EX --> HTTP[HTTP Requests]
|
|
1222
|
+
EX --> Human[Human Input]
|
|
1223
|
+
EX --> Engine[Engine Executor]
|
|
1224
|
+
EX --> Script[Script Step]
|
|
1225
|
+
EX --> Sleep[Sleep Step]
|
|
1226
|
+
EX --> Memory[Memory operations]
|
|
1227
|
+
|
|
976
1228
|
LLM --> Adapters[LLM Adapters]
|
|
977
|
-
Adapters --> OpenAI
|
|
978
|
-
Adapters --> Anthropic
|
|
979
|
-
Adapters --> Gemini
|
|
980
|
-
Adapters --> Copilot
|
|
981
|
-
Adapters --> ChatGPT
|
|
982
|
-
Adapters --> Local
|
|
1229
|
+
Adapters --> Providers[OpenAI, Anthropic, Gemini, Copilot, etc.]
|
|
983
1230
|
LLM --> MCPClient[MCP Client]
|
|
984
|
-
WR --> Eval[Expression Evaluator]
|
|
985
|
-
WR --> Pool[Resource Pool Manager]
|
|
986
1231
|
```
|
|
987
1232
|
|
|
988
1233
|
## 📂 Project Structure
|
|
@@ -996,7 +1241,6 @@ graph TD
|
|
|
996
1241
|
- `src/ui/`: Ink-powered TUI dashboard.
|
|
997
1242
|
- `src/utils/`: Shared utilities (auth, redaction, config loading).
|
|
998
1243
|
- `src/types/`: Core type definitions.
|
|
999
|
-
- `src/e2e-tests/`: End-to-end test suite.
|
|
1000
1244
|
- `.keystone/workflows/`: Your YAML workflow definitions.
|
|
1001
1245
|
|
|
1002
1246
|
---
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "keystone-cli",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
"test": "bun test",
|
|
12
12
|
"lint": "biome check .",
|
|
13
13
|
"lint:fix": "biome check --write .",
|
|
14
|
-
"format": "biome format --write ."
|
|
14
|
+
"format": "biome format --write .",
|
|
15
|
+
"schema:generate": "bun run src/scripts/generate-schemas.ts"
|
|
15
16
|
},
|
|
16
17
|
"keywords": ["workflow", "orchestrator", "agentic", "automation", "bun"],
|
|
17
18
|
"author": "Mark Hingston",
|
|
@@ -23,6 +24,8 @@
|
|
|
23
24
|
"homepage": "https://github.com/mhingston/keystone-cli#readme",
|
|
24
25
|
"files": ["src", "README.md", "LICENSE", "logo.png"],
|
|
25
26
|
"dependencies": {
|
|
27
|
+
"@ast-grep/cli": "^0.40.3",
|
|
28
|
+
"@ast-grep/napi": "^0.40.3",
|
|
26
29
|
"@jsep-plugin/arrow": "^1.0.6",
|
|
27
30
|
"@jsep-plugin/object": "^1.2.2",
|
|
28
31
|
"@types/react": "^19.0.0",
|
|
@@ -30,15 +33,16 @@
|
|
|
30
33
|
"ajv": "^8.12.0",
|
|
31
34
|
"commander": "^12.1.0",
|
|
32
35
|
"dagre": "^0.8.5",
|
|
36
|
+
"glob": "^10.4.5",
|
|
33
37
|
"ink": "^6.5.1",
|
|
34
38
|
"ink-select-input": "3.1.2",
|
|
35
39
|
"ink-spinner": "^5.0.0",
|
|
36
40
|
"js-yaml": "^4.1.0",
|
|
37
41
|
"jsep": "^1.4.0",
|
|
38
|
-
"glob": "^10.4.5",
|
|
39
42
|
"react": "^19.0.0",
|
|
40
43
|
"sqlite-vec": "0.1.6",
|
|
41
|
-
"zod": "^3.23.8"
|
|
44
|
+
"zod": "^3.23.8",
|
|
45
|
+
"zod-to-json-schema": "^3.25.1"
|
|
42
46
|
},
|
|
43
47
|
"optionalDependencies": {
|
|
44
48
|
"re2": "^1.21.4"
|