@kontourai/flow-agents 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/.github/workflows/ci.yml +6 -1
  2. package/.github/workflows/kit-gates-demo.yml +6 -2
  3. package/.github/workflows/runtime-compat.yml +5 -2
  4. package/CHANGELOG.md +51 -0
  5. package/CONTRIBUTING.md +30 -0
  6. package/README.md +26 -5
  7. package/agents/dev.json +1 -1
  8. package/agents/tool-planner.json +1 -1
  9. package/build/src/cli/{flow-kit.js → kit.js} +122 -108
  10. package/build/src/cli/validate-source-tree.js +4 -4
  11. package/build/src/cli/workflow-sidecar.js +70 -5
  12. package/build/src/cli.js +3 -3
  13. package/build/src/flow-kit/validate.js +89 -62
  14. package/build/src/tools/build-universal-bundles.js +78 -17
  15. package/build/src/tools/generate-context-map.js +49 -7
  16. package/build/src/tools/validate-source-tree.js +32 -1
  17. package/console.telemetry.json +1 -1
  18. package/docs/adr/0004-gates-expect-surface-claims.md +7 -7
  19. package/docs/adr/0007-flow-skill-kit-tool-boundary.md +169 -0
  20. package/docs/adr/0007-skill-audit.md +112 -0
  21. package/docs/adr/0008-kit-operation-boundary.md +88 -0
  22. package/docs/context-map.md +18 -22
  23. package/docs/flow-kit-repository-contract.md +5 -5
  24. package/docs/getting-started.md +177 -0
  25. package/docs/index.md +19 -8
  26. package/docs/kit-authoring-guide.md +125 -13
  27. package/docs/knowledge-kit.md +2 -2
  28. package/docs/operating-layers.md +2 -2
  29. package/docs/spec/runtime-hook-surface.md +1 -1
  30. package/docs/veritas-integration.md +4 -4
  31. package/docs/vision.md +1 -1
  32. package/docs/workflow-eval-strategy.md +2 -2
  33. package/docs/workflow-usage-guide.md +2 -2
  34. package/evals/acceptance/test_opencode_harness.sh +18 -10
  35. package/evals/acceptance/test_pi_harness.sh +10 -6
  36. package/evals/ci/run-baseline.sh +1 -1
  37. package/evals/fixtures/builder-kit-workflow-state/happy-path.json +2 -2
  38. package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +2 -2
  39. package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +1 -1
  40. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +4 -4
  41. package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +4 -4
  42. package/evals/fixtures/kit-conformance-levels/k0-flows-only/flows/review.flow.json +4 -4
  43. package/evals/fixtures/kit-conformance-levels/k1-agent-extension/flows/build.flow.json +4 -4
  44. package/evals/fixtures/kit-conformance-levels/k2-with-evals/flows/synthesize.flow.json +4 -4
  45. package/evals/fixtures/kit-conformance-levels/third-party-extension/flows/review.flow.json +4 -4
  46. package/evals/fixtures/pull-work-provider/github-issues.json +5 -5
  47. package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +2 -2
  48. package/evals/fixtures/surface-trust/artifact-absent.json +2 -2
  49. package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +2 -2
  50. package/evals/fixtures/surface-trust/missing-authority-trust-report.json +2 -2
  51. package/evals/fixtures/surface-trust/provider-absent.json +2 -2
  52. package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +2 -2
  53. package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +2 -2
  54. package/evals/integration/test_activate_npx_context.sh +2 -2
  55. package/evals/integration/test_bundle_install.sh +17 -12
  56. package/evals/integration/test_console_learning_projection.sh +2 -2
  57. package/evals/integration/test_flow_kit_install_git.sh +7 -7
  58. package/evals/integration/test_flow_kit_repository.sh +4 -4
  59. package/evals/integration/test_goal_fit_hook.sh +144 -0
  60. package/evals/integration/test_kit_conformance_levels.sh +56 -2
  61. package/evals/integration/test_local_flow_kit_install.sh +7 -7
  62. package/evals/integration/test_publish_change_helper.sh +1 -1
  63. package/evals/integration/test_pull_work_provider.sh +1 -1
  64. package/evals/integration/test_runtime_adapter_activation.sh +3 -3
  65. package/evals/integration/test_workflow_sidecar_writer.sh +9 -9
  66. package/evals/lib/node.sh +2 -2
  67. package/evals/static/test_package.sh +3 -3
  68. package/evals/static/test_workflow_skills.sh +19 -19
  69. package/integrations/strands/flow_agents_strands/steering.py +1 -1
  70. package/integrations/strands-ts/src/hooks.ts +1 -1
  71. package/kits/builder/flows/build.flow.json +48 -48
  72. package/kits/builder/flows/shape.flow.json +36 -36
  73. package/kits/builder/kit.json +17 -0
  74. package/{skills → kits/builder/skills}/builder-shape/SKILL.md +4 -4
  75. package/{skills → kits/builder/skills}/idea-to-backlog/SKILL.md +1 -1
  76. package/kits/knowledge/adapters/obsidian-store/index.js +137 -26
  77. package/kits/knowledge/evals/contract-suite/suite.test.js +90 -0
  78. package/kits/knowledge/flows/compile.flow.json +12 -12
  79. package/kits/knowledge/flows/consolidate.flow.json +16 -16
  80. package/kits/knowledge/flows/ingest.flow.json +12 -12
  81. package/kits/knowledge/flows/retire.flow.json +16 -16
  82. package/kits/knowledge/flows/store-contract.flow.json +12 -12
  83. package/kits/knowledge/flows/synthesize.flow.json +16 -16
  84. package/kits/knowledge/kit.json +16 -9
  85. package/kits/release-evidence/flows/release-evidence.flow.json +3 -3
  86. package/package.json +11 -5
  87. package/packaging/packs.json +1 -21
  88. package/schemas/workflow-evidence.schema.json +2 -1
  89. package/scripts/README.md +1 -1
  90. package/scripts/hooks/stop-goal-fit.js +66 -18
  91. package/scripts/kit.js +2 -0
  92. package/skills/README.md +23 -0
  93. package/src/cli/{flow-kit.ts → kit.ts} +124 -109
  94. package/src/cli/validate-source-tree.ts +4 -4
  95. package/src/cli/workflow-sidecar.ts +62 -4
  96. package/src/cli.ts +3 -3
  97. package/src/flow-kit/validate.ts +118 -58
  98. package/src/tools/build-universal-bundles.ts +74 -13
  99. package/src/tools/generate-context-map.ts +36 -6
  100. package/src/tools/validate-source-tree.ts +27 -1
  101. package/scripts/flow-kit.js +0 -2
  102. package/skills/context-budget/SKILL.md +0 -40
  103. package/skills/explore/SKILL.md +0 -137
  104. package/skills/feedback-loop/SKILL.md +0 -87
  105. package/skills/frontend-design/SKILL.md +0 -80
  106. /package/{skills → kits/builder/skills}/deliver/SKILL.md +0 -0
  107. /package/{skills → kits/builder/skills}/design-probe/SKILL.md +0 -0
  108. /package/{skills → kits/builder/skills}/evidence-gate/SKILL.md +0 -0
  109. /package/{skills → kits/builder/skills}/execute-plan/SKILL.md +0 -0
  110. /package/{skills → kits/builder/skills}/fix-bug/SKILL.md +0 -0
  111. /package/{skills → kits/builder/skills}/learning-review/SKILL.md +0 -0
  112. /package/{skills → kits/builder/skills}/pickup-probe/SKILL.md +0 -0
  113. /package/{skills → kits/builder/skills}/plan-work/SKILL.md +0 -0
  114. /package/{skills → kits/builder/skills}/pull-work/SKILL.md +0 -0
  115. /package/{skills → kits/builder/skills}/release-readiness/SKILL.md +0 -0
  116. /package/{skills → kits/builder/skills}/review-work/SKILL.md +0 -0
  117. /package/{skills → kits/builder/skills}/tdd-workflow/SKILL.md +0 -0
  118. /package/{skills → kits/builder/skills}/verify-work/SKILL.md +0 -0
  119. /package/{skills → kits/knowledge/skills}/knowledge-capture/SKILL.md +0 -0
@@ -40,7 +40,9 @@ jobs:
40
40
  mkdir -p .flow-cli
41
41
  cd .flow-cli
42
42
  printf '{"name":"flow-cli-host","private":true}\n' > package.json
43
- npm install --no-save @kontourai/flow
43
+ # Pinned to ~1.3.0: gate evidence uses the Hachure trust.bundle format
44
+ # (kontourai/flow#84). flow-agents migrated surface.claim -> trust.bundle.
45
+ npm install --no-save @kontourai/flow@~1.3.0
44
46
 
45
47
  - name: Install shell tools
46
48
  run: |
@@ -216,6 +218,9 @@ jobs:
216
218
  continue-on-error: true
217
219
  run: bash evals/ci/run-baseline.sh --check flow-kit-install-git-integration
218
220
 
221
+ - name: Console learning projection integration
222
+ continue-on-error: true
223
+ run: bash evals/ci/run-baseline.sh --check console-learning-projection-integration
219
224
 
220
225
  - name: Context map integration
221
226
  continue-on-error: true
@@ -50,7 +50,9 @@ jobs:
50
50
  mkdir -p .flow-cli
51
51
  cd .flow-cli
52
52
  printf '{"name":"flow-cli-host","private":true}\n' > package.json
53
- npm install --no-save @kontourai/flow
53
+ # Pinned to ~1.3.0: gate evidence uses the Hachure trust.bundle format
54
+ # (kontourai/flow#84). flow-agents migrated surface.claim -> trust.bundle.
55
+ npm install --no-save @kontourai/flow@~1.3.0
54
56
  env:
55
57
  FLOW_CLI_ROOT: ${{ github.workspace }}/.flow-cli/node_modules/@kontourai/flow
56
58
 
@@ -113,7 +115,9 @@ jobs:
113
115
  mkdir -p .flow-cli
114
116
  cd .flow-cli
115
117
  printf '{"name":"flow-cli-host","private":true}\n' > package.json
116
- npm install --no-save @kontourai/flow
118
+ # Pinned to ~1.3.0: gate evidence uses the Hachure trust.bundle format
119
+ # (kontourai/flow#84). flow-agents migrated surface.claim -> trust.bundle.
120
+ npm install --no-save @kontourai/flow@~1.3.0
117
121
  env:
118
122
  FLOW_CLI_ROOT: ${{ github.workspace }}/.flow-cli/node_modules/@kontourai/flow
119
123
 
@@ -40,7 +40,7 @@ jobs:
40
40
  uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
41
41
 
42
42
  - name: Set up Node.js
43
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
43
+ uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
44
44
  with:
45
45
  node-version: 24
46
46
 
@@ -49,6 +49,9 @@ jobs:
49
49
  ${{ matrix.install }}
50
50
  ${{ matrix.version }}
51
51
 
52
+ - name: Install dependencies
53
+ run: npm ci
54
+
52
55
  - name: Build bundles
53
56
  run: npm run build:bundles
54
57
 
@@ -67,7 +70,7 @@ jobs:
67
70
  uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
68
71
 
69
72
  - name: Set up Node.js
70
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
73
+ uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
71
74
  with:
72
75
  node-version: 24
73
76
 
package/CHANGELOG.md CHANGED
@@ -1,5 +1,56 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.3.0](https://github.com/kontourai/flow-agents/compare/v1.2.0...v1.3.0) (2026-06-16)
4
+
5
+
6
+ ### Features
7
+
8
+ * add kit TRUST axis to inspect output — orthogonal to K-levels (issue [#79](https://github.com/kontourai/flow-agents/issues/79)) ([2a353d1](https://github.com/kontourai/flow-agents/commit/2a353d17ffb1da8b0fc23f442f52aa0676a1fabe))
9
+ * add TRUST axis to kit inspect — orthogonal to K-level capability (issue [#79](https://github.com/kontourai/flow-agents/issues/79)) ([02ac699](https://github.com/kontourai/flow-agents/commit/02ac699227c4071c16c936c3e01e5fd013466baf))
10
+ * **knowledge:** rendered-body-as-storage in Obsidian adapter ([baef40f](https://github.com/kontourai/flow-agents/commit/baef40f46f4016ba8b6c8afd1c61b91cade1de12))
11
+ * **knowledge:** rendered-body-as-storage in Obsidian adapter ([0a31c32](https://github.com/kontourai/flow-agents/commit/0a31c3233ee8772b000cb42dbef0a3fdc38ccf1c))
12
+ * migrate gate evidence from surface.claim to Hachure trust.bundle ([#97](https://github.com/kontourai/flow-agents/issues/97)) ([8ed43c4](https://github.com/kontourai/flow-agents/commit/8ed43c46c2a6887d32cd850bc8b2d97e7829f825))
13
+
14
+
15
+ ### Fixes
16
+
17
+ * **#74:** console-learning test cross-platform + un-quarantine; docs([#39](https://github.com/kontourai/flow-agents/issues/39)): live-validation rule ([89b2bdb](https://github.com/kontourai/flow-agents/commit/89b2bdb44f3fa5ea629135f7e93410eee92efb1c))
18
+ * **#74:** un-quarantine console-learning test — passes 12/12 on Linux CI ([#89](https://github.com/kontourai/flow-agents/issues/89)) ([371ecd2](https://github.com/kontourai/flow-agents/commit/371ecd22cbd8e80b6404cbdd2825d4a94fb6573c))
19
+ * **#75:** assert opencode plugin load via factory marker file ([#96](https://github.com/kontourai/flow-agents/issues/96)) ([6c09288](https://github.com/kontourai/flow-agents/commit/6c092883bc4b2fd5a893431991ab75921f8b080b))
20
+ * acceptance harnesses poll for all required telemetry events (canary flake [#75](https://github.com/kontourai/flow-agents/issues/75)) ([a27b4ff](https://github.com/kontourai/flow-agents/commit/a27b4ff48c88908419ef079c447d8a9930aa707a))
21
+ * acceptance harnesses skip (not fail) when no telemetry produced — no-provider CI ([d9cba18](https://github.com/kontourai/flow-agents/commit/d9cba180ebcec9005bcd0c7b29f2608530c8acc3))
22
+ * acceptance harnesses skip telemetry assertions when no provider (canary [#75](https://github.com/kontourai/flow-agents/issues/75)) ([dbd0e7b](https://github.com/kontourai/flow-agents/commit/dbd0e7b77444ed5df93fb47a59d2704460742367))
23
+ * acceptance harnesses wait for ALL required telemetry events, not just file existence ([d9c86c0](https://github.com/kontourai/flow-agents/commit/d9c86c0987d42ab1f6c5c411884bcf1912bd8fab))
24
+ * **ci:** pin @kontourai/flow to ~1.2.0 ([#95](https://github.com/kontourai/flow-agents/issues/95)) ([fd97803](https://github.com/kontourai/flow-agents/commit/fd97803c97ade926b1985c42b1693d8e9890f9f1))
25
+ * **knowledge:** collision-proof body delimiter in Obsidian adapter ([4e2560c](https://github.com/kontourai/flow-agents/commit/4e2560cec3b0b8c2660879d059ce29f0cc88184a))
26
+ * **stop-goal-fit:** invoke built validator directly; skip on env errors ([#92](https://github.com/kontourai/flow-agents/issues/92)) ([7b3d520](https://github.com/kontourai/flow-agents/commit/7b3d5208497f3cc8d4f8137d21f16408f9d2689e))
27
+
28
+ ## [1.2.0](https://github.com/kontourai/flow-agents/compare/v1.1.0...v1.2.0) (2026-06-15)
29
+
30
+
31
+ ### Features
32
+
33
+ * **#62:** move Builder Kit skills into kits/builder, add Knowledge Kit skill, remove orphans ([3822e07](https://github.com/kontourai/flow-agents/commit/3822e075e9cd488f46124179ebf9a8459825b9c6))
34
+ * **#62:** move Builder Kit skills into kits/builder, Knowledge Kit skill, remove orphans ([31f63ca](https://github.com/kontourai/flow-agents/commit/31f63ca18019d51438accd3b5f1e03cb5f2873f2))
35
+ * delegate container validation to @kontourai/flow; rename flow-kit → flow-agents kit ([d39e909](https://github.com/kontourai/flow-agents/commit/d39e9090dad220a8159d2148d5a1effb2460ac9f))
36
+ * delegate container validation to @kontourai/flow; rename flow-kit → flow-agents kit (ADR 0008) ([4343e84](https://github.com/kontourai/flow-agents/commit/4343e845a992858c9441258bedbbf3c7302a8532))
37
+
38
+
39
+ ### Fixes
40
+
41
+ * **ci:** install repo deps before building bundles in runtime-compat canary ([#76](https://github.com/kontourai/flow-agents/issues/76)) ([f8947aa](https://github.com/kontourai/flow-agents/commit/f8947aab5723ba9325372ea4054458ce21875bee))
42
+ * lazy-load @kontourai/flow in validate.ts so list/status/activate work without it ([99beebb](https://github.com/kontourai/flow-agents/commit/99beebb58f02dba374b35ae5e3df229cb39ea8d0))
43
+
44
+
45
+ ### Documentation
46
+
47
+ * add ADR 0007 flow/skill/kit/tool boundary + skill audit ([20b5c7b](https://github.com/kontourai/flow-agents/commit/20b5c7b272e7ad7985640e70b6be71733cec9995))
48
+ * add Builder Kit quick-start guide and update index/README Quick Start ([2e89bf0](https://github.com/kontourai/flow-agents/commit/2e89bf08968a6f45a26ceddcddf5a66bf77d3f44))
49
+ * ADR 0007 flow/skill/kit/tool boundary + skill audit ([a1dde52](https://github.com/kontourai/flow-agents/commit/a1dde52eb3b051a0eab5712395f0266c7428ae0f))
50
+ * Builder Kit quick-start guide (zero to gated build flow) ([83237f7](https://github.com/kontourai/flow-agents/commit/83237f77812917d49c86547db87986d6dbfdbfd9))
51
+ * fold orphan rulings into ADR 0007, add ADR 0008 kit-operation boundary ([d547edc](https://github.com/kontourai/flow-agents/commit/d547edc954ea9d9a12039003d41401802f994097))
52
+ * mark ADRs 0007 + 0008 Accepted (decisions reached in 2026-06-15 design conversation) ([3eb7636](https://github.com/kontourai/flow-agents/commit/3eb7636c1c4f866fd119195936ec856425573dda))
53
+
3
54
  ## [1.1.0](https://github.com/kontourai/flow-agents/compare/v1.0.1...v1.1.0) (2026-06-15)
4
55
 
5
56
 
package/CONTRIBUTING.md CHANGED
@@ -45,4 +45,34 @@ Releases are automated with release-please: merges to main accumulate into a rel
45
45
  - `bash evals/ci/run-baseline.sh` — deterministic CI baseline
46
46
  - `npm run check:content-boundary` — no private/internal content leaks
47
47
 
48
+ ## Runtime integrations must be live-validated
49
+
50
+ Static and integration evals that only assert "the artifact exists / parses as
51
+ JSON / the helper script runs" are **not sufficient** for generated host
52
+ artifacts. During the 0.3.0 program, six defects shipped green across 113+
53
+ assertions and were caught only by executing the artifact in (or as) its real
54
+ host. A new runtime integration MUST ship:
55
+
56
+ 1. **Parse-gates** for every generated artifact, in its host language (e.g.
57
+ `node --check` for a JS plugin, `tsc` syntax check for a TS extension) — a
58
+ file that doesn't parse in its host helps no one, no matter how valid its
59
+ JSON wrapper is.
60
+ 2. **A mechanical hook-chain execution test** — actually run the generated
61
+ hook/plugin handlers with realistic payloads and assert the downstream
62
+ effects (telemetry written, policy decision returned), not just that the
63
+ files are wired.
64
+ 3. **A binary-gated live acceptance harness** — install into a temp workspace,
65
+ run the real host binary if present (skip cleanly if not), and assert
66
+ observable behavior end-to-end. See `evals/acceptance/test_opencode_harness.sh`,
67
+ `test_pi_harness.sh`, and `test_knowledge_kit_live.sh` for the pattern.
68
+
69
+ Integration tests must also be wired into a CI lane in `evals/ci/run-baseline.sh`
70
+ (and a matching `--check` step in `.github/workflows/ci.yml`) — a test that
71
+ runs via the `evals/run.sh` glob but is absent from the curated CI lanes gates
72
+ nothing. Tests that create temp dirs must canonicalize them (`pwd -P`) so
73
+ macOS (`/tmp` → `/private/tmp`) and Linux behave identically.
74
+
75
+ Adapters SHOULD also document fail-open vs fail-closed per policy class. See
76
+ `docs/spec/runtime-hook-surface.md`.
77
+
48
78
  All projects are Apache-2.0.
package/README.md CHANGED
@@ -99,21 +99,42 @@ bash install.sh /path/to/workspace --telemetry-sink local-kontour-console
99
99
 
100
100
  ## Use it
101
101
 
102
- After installing, ask the agent for the workflow you want — in plain language:
102
+ After installing, ask the agent for the workflow you want — in plain language.
103
+
104
+ ### Builder Kit quick start
105
+
106
+ The Builder Kit installs automatically and gives your agent two gated flows: `builder.shape` turns a raw idea into slices and executable work items; `builder.build` takes a selected work item through design probe, planning, execution, verification, PR readiness, merge readiness, and learning.
107
+
108
+ Shape an idea:
109
+
110
+ ```text
111
+ Use Builder Kit shape. I want to add a progress indicator to the CLI output
112
+ so users can see what step the installer is on. Shape this into an executable
113
+ work item and stop at the backlog gate.
114
+ ```
115
+
116
+ Build it:
103
117
 
104
118
  ```text
105
- Use Builder Kit shape for this feature idea and create executable GitHub issues.
119
+ Use deliver for the issue you just filed. Pull it, probe the design, plan it,
120
+ implement it, verify it, and stop if any evidence is missing.
106
121
  ```
107
122
 
123
+ Each step has an evidence gate. The agent either presents the expected evidence and advances, or blocks and explains what is missing — it does not produce a confident summary and proceed on partial work. Session state is written to `.flow-agents/<slug>/` and survives context loss or compaction.
124
+
125
+ For a full walkthrough — what each gate checks, what you observe, and how to invoke individual skills — read the [Builder Kit Quick Start](docs/getting-started.md).
126
+
127
+ For bugs:
128
+
108
129
  ```text
109
- Use deliver for this issue. Plan it, execute it, verify it, and stop if evidence is missing.
130
+ Use fix-bug. Reproduce the problem, diagnose root cause, implement the fix, and verify the regression path.
110
131
  ```
111
132
 
112
133
  The [Workflow Usage Guide](docs/workflow-usage-guide.md) has example prompts and expected behavior for every stage — `pull-work`, `plan-work`, `execute-plan`, `review-work`, `verify-work`, `fix-bug`, `release-readiness`, and more. The [Agent System Guidebook](docs/agent-system-guidebook.md) is the plain-language map of how the pieces fit.
113
134
 
114
135
  ## Flow Kits
115
136
 
116
- A Flow Kit bundles a workflow AND its opinionated output shape into a single validated unit: a `kit.json` manifest (schema version 1.0), one or more Flow Definitions, and optional skills, docs, adapters, evals, and assets. Authoring a kit means deciding not just _what_ an agent does but _how the result is rendered_ — the same pipeline produces different representations depending on which store adapter is active. Kits are the extension model for Flow Agents: validated by the `flow-kit` CLI, installed through a single command, and activatable into any workspace that runs Flow Agents.
137
+ A Flow Kit bundles a workflow AND its opinionated output shape into a single validated unit: a `kit.json` manifest (schema version 1.0), one or more Flow Definitions, and optional skills, docs, adapters, evals, and assets. Authoring a kit means deciding not just _what_ an agent does but _how the result is rendered_ — the same pipeline produces different representations depending on which store adapter is active. Kits are the extension model for Flow Agents: validated and installed through the `flow-agents kit` CLI, and activatable into any workspace that runs Flow Agents.
117
138
 
118
139
  **Builder Kit** — ships with `builder.shape` (shape a problem into slices and fileable work items) and `builder.build` (pull ready work through design probing, planning, execution, verification, PR readiness, merge readiness, and learning). Installed automatically by `npx @kontourai/flow-agents init`.
119
140
 
@@ -126,7 +147,7 @@ The Knowledge Kit is also LIVE-proven: the default adapter passes the parameteri
126
147
  Install a local kit:
127
148
 
128
149
  ```bash
129
- npx @kontourai/flow-agents flow-kit install-local path/to/my-kit --dest /path/to/workspace
150
+ npx @kontourai/flow-agents kit install path/to/my-kit --dest /path/to/workspace
130
151
  ```
131
152
 
132
153
  - [Kit Authoring Guide](docs/kit-authoring-guide.md) — build your own kit from scratch: directory layout, `kit.json`, a flow file, validation, install, and activation.
package/agents/dev.json CHANGED
@@ -122,6 +122,6 @@
122
122
  "welcomeMessage": "Flow Agents dev mode is ready for engineering work.",
123
123
  "name": "dev",
124
124
  "description": "Development agent for coding tasks. Writes, modifies, and validates code following existing patterns. Delegates to specialists for domain-specific research when available.",
125
- "prompt": "You are a Development Agent. You write and modify code, validate it works, and deliver clean results. Delegate to specialist subagents whenever a loaded skill defines them \u2014 never do manually what a skill's subagents can do in parallel.\n\n\u26d4 You own the code \u2014 specialists provide context.\n\n## Flow Kit Boundary\nFlow owns Flow Definition gate semantics, typed `expects`, `kind: \"surface.claim\"`, trusted producer config, and gate overrides. Flow Agents coordinates Flow Kit installation, runtime adapters, local control, and workflow artifacts. Builder Kit is the first bundled Flow Kit; use Builder Kit, Kit Catalog, Flow Kit, Probe, and `design-probe` vocabulary in guidance and artifacts.\n\n## Hard Route\nIf the user asks to explore a repository, explain what a codebase does, summarize project structure, or otherwise perform repository discovery, you MUST activate the `explore` skill before any file reads, greps, globs, shell exploration, or direct synthesis. This is a hard rule, not a preference.\n\nIf the user asks to build, create, implement, ship, or deliver a tool/app/service/feature, you MUST activate `deliver` first unless they explicitly request TDD, in which case activate `tdd-workflow` instead. Do not let `search-first` override `deliver` for broad build requests.\n\n## Skill Activation (MANDATORY FIRST STEP)\nYou have loaded skills in your context. Your FIRST action on EVERY request MUST be:\n1. Call the thinking tool\n2. State the user's request\n3. Scan ALL loaded skills by name and description \u2014 explicitly list candidates\n4. If a skill matches: state \"Activating skill: [name]\", read its SKILL.md, then delegate to the subagents it specifies immediately. Do NOT verify prerequisites yourself \u2014 the subagent handles the full workflow. Your NEXT tool call after reading the skill MUST be use_subagent \u2014 do not explore, search, or verify first.\n\nCommon skill triggers (activate these, don't handle manually):\n- Codebase exploration, repo overview, \"explore the codebase\", \"tell me what this codebase does\" \u2192 explore (delegate to tool-explore-* and respect current harness subagent limits)\n- Build, create, implement, ship, or deliver a tool/app/service/feature \u2192 deliver (unless the user explicitly requests TDD)\n- Prompt(<name>) syntax \u2192 run-prompt (use introspect to discover prompts, NOT filesystem)\n- Adding a small utility/library without a broader build request \u2192 search-first (research before coding)\n- Dependency/security scanning \u2192 dependency-update \u2192 tool-dependencies-updater\n- Code quality, standards, architecture, or security critique \u2192 review-work \u2192 tool-code-reviewer and conditional tool-security-reviewer\n- Verification/acceptance criteria/evidence \u2192 verify-work \u2192 tool-verifier\n- \"Verify changes work\" / \"check build and UI\" \u2192 feedback-loop\n- Task includes a UI component (login page, dashboard, form) \u2192 activate frontend-design for that portion. If the task ALSO has non-UI work, use deliver for the full task but delegate the UI portion to frontend-design within the plan\n\n5. If NO skill matches: proceed to Phase 0. You MUST execute these in order before writing any code:\n a. todo_list \u2014 check/load existing work (Phase 0)\n b. execute_bash with `git status` \u2014 check working tree (Phase 1)\n c. todo_list \u2014 create a plan for the task (Phase 2)\n\nNEVER skip this step. NEVER call fs_read, code, grep, glob, or execute_bash before completing skill activation check.\n\n## Session File Awareness\nOn session start, check for resumption candidates:\n1. **Session files**: check `.flow-agents/` for existing session files (`deliver`, `fix-bug`, `plan-work` types)\n2. **Boo jobs**: if boo is available, run `boo list --format json` and look for recent jobs with descriptions or names related to the current project that may need follow-up\n\nIf found:\n- Briefly mention what's in flight (name, status, iteration or last run)\n- Ask: resume existing work or start fresh?\n- Session files: read the file, determine current phase, invoke the appropriate primitive skill\n- Boo jobs: use `boo resume <job>` or read the job's artifacts for context\n\n## Plan \u2192 Execute \u2192 Review \u2192 Verify Loop\nThe Builder Kit workflow uses composable primitives: `pull-work`, `design-probe` when assumptions need challenge, `plan-work`, `execute-plan`, `review-work`, and `verify-work`. These can be invoked independently or chained by orchestrator skills (deliver, fix-bug). When the loop runs:\n- plan-work produces a plan artifact that tool-worker agents read directly (no orchestrator interpretation)\n- execute-plan fans out parallel waves and checkpoints progress between them\n- review-work produces critique in `critique.json`: findings route back to execute-plan or user decision\n- verify-work produces evidence in `evidence.json`: PASS \u2192 deliver/evidence-gate, FAIL \u2192 re-plan and loop, NOT_VERIFIED \u2192 ask user\n\n## Specialist Agents\n\nThese agents handle domain-specific tasks. Delegate \u2014 do NOT do their work manually.\n\n| Request | Delegate To | Trigger |\n|---|---|---|\n| Code quality, standards, architecture review | tool-code-reviewer (via review-work) | readability, maintainability, DRY, patterns, architecture fit |\n| Security review | tool-security-reviewer (via review-work) | OWASP, vulnerabilities, secrets, auth/authz |\n| Verification | tool-verifier (via verify-work) | acceptance criteria, build/test/lint/security evidence |\n| Dependency audit | tool-dependencies-updater | outdated packages, CVEs, version checks |\n\nDelegation means use_subagent \u2014 not reading code yourself. If a skill says delegate to X, invoke X. If no session file exists for verify-work, delegate to tool-verifier directly with the user's request. If target code doesn't exist for review, delegate anyway \u2014 let the reviewer agent handle discovery.\n\nDelegation pattern (follow this exactly):\n1. thinking: identify skill + target agent\n2. fs_read: read SKILL.md\n3. use_subagent: invoke the agent specified by the skill\nDo NOT insert exploration steps (grep, glob, fs_read of source code) between reading the skill and delegating.\n\n## Progress Checkpointing\nAfter each significant step (plan produced, wave completed, review done, verification done), update the session file in `.flow-agents/<slug>/` with current status, completed tasks, and next action. The session file is your recovery point \u2014 if context is lost, a new session should be able to read it and know exactly where to pick up.\n\n## Workflow\nWhen no skill matches, follow these phases in order. Do NOT skip phases even for simple tasks.\n\n### Phase 0: CHECK EXISTING WORK\nGoal: Understand what work is already in progress for current directory\n- For any incomplete TODOs, `load` them to review tasks, context, and modified files\n- Check `.flow-agents/` for session files from plan-work, deliver, fix-bug\n- Summarize findings to the user: what's in progress, what's done, what files are being touched\n- If the user's request relates to an existing TODO or session file, ask whether to continue it or start fresh\n- Exit: You know what's in flight and which files may overlap with your task\n\n### Phase 1: ORIENT\nGoal: Understand and explore the codebase and task before touching anything.\n- Run `git status` and `git diff` to check for uncommitted changes \u2014 NEVER overwrite unsaved work\n- Explore relevant code: read existing implementation, conventions, patterns, dependencies, and tests\n- Cross-reference with in-progress TODOs from Phase 0 \u2014 if your task's files overlap with another TODO's `modified_files`, create a git worktree (`git worktree add ../worktree/kiro-<todo-id>-<feature> -b feat/<feature>`) and work there instead\n- If requirements are ambiguous, ask the user before proceeding\n- Exit: You can describe what needs to change and where\n\n### Phase 2: PLAN\nGoal: Define the set of changes needed.\n- Create a TODO list using the todo_list tool \u2014 required for ALL tasks, even single-file changes\n- Identify files to create/modify and the specific changes in each\n- If the task includes visual/UI changes (HTML, CSS, components, pages), include a tool-playwright verification step in the plan. This is MANDATORY \u2014 do not skip visual verification for any visual change\n- Prefer modifying existing code over creating new files\n- Exit: A concrete list of changes, no open questions\n\n### Phase 3: IMPLEMENT\nGoal: Write the code.\n- Follow existing patterns, naming conventions, and project structure\n- Write the minimum code necessary \u2014 no speculative features\n- No fake data, no placeholder stubs, no silent fallbacks. Errors MUST propagate \u2014 never catch and return null, empty arrays, default objects, or fallback values. Use try/catch only to add context before re-throwing.\n- Apply DRY principles \u2014 check if similar logic already exists before writing new code\n- Mark TODO items complete as you finish each change\n- Exit: All planned changes are written\n\n### Phase 4: VALIDATE\nGoal: Prove the code works with evidence. Describing what you did is NOT validation.\n\nClassify every change:\n- **Visual** (UI, CSS, layouts, components) \u2192 delegate to tool-playwright: load the page, take screenshots, verify elements exist and render correctly\n- **Integration** (APIs, CLIs, configs, logic, builds) \u2192 run tests, execute the code, capture actual output\n- **Both** \u2192 run both paths\n\nRules:\n- Evidence is mandatory \u2014 show output, screenshots, or test results. \u201cI made the change\u201d is not evidence.\n- If validation fails, fix and re-validate. Do NOT skip, downgrade to a weaker method, or punt to the user.\n- If a verification method should work but isn't, debug the method itself. Don't fall back to \u201cthe build passes so it's probably fine.\u201d\n- Keep trying until verification passes or the user explicitly says stop (per feedback-loop skill persistence rule).\n- If failures are in areas related to another TODO's in-progress work, note them but still verify YOUR changes.\n- Exit: All changes verified with captured evidence.\n\n### Phase 5: DELIVER\nGoal: Clean state ready for commit.\n- Remove any debug artifacts, temp files, or leftover copies\n- Summarize: what changed, why, and any follow-up items\n- If you deferred any issues due to other in-progress TODOs for the current directory, remind the user and list the follow-up TODO items you added\n- Exit: Working directory is clean except for intentional changes",
125
+ "prompt": "You are a Development Agent. You write and modify code, validate it works, and deliver clean results. Delegate to specialist subagents whenever a loaded skill defines them \u2014 never do manually what a skill's subagents can do in parallel.\n\n\u26d4 You own the code \u2014 specialists provide context.\n\n## Flow Kit Boundary\nFlow owns Flow Definition gate semantics, typed `expects`, `kind: \"trust.bundle\"`, trusted producer config, and gate overrides. Flow Agents coordinates Flow Kit installation, runtime adapters, local control, and workflow artifacts. Builder Kit is the first bundled Flow Kit; use Builder Kit, Kit Catalog, Flow Kit, Probe, and `design-probe` vocabulary in guidance and artifacts.\n\n## Hard Route\nIf the user asks to explore a repository, explain what a codebase does, summarize project structure, or otherwise perform repository discovery, you MUST activate the `explore` skill before any file reads, greps, globs, shell exploration, or direct synthesis. This is a hard rule, not a preference.\n\nIf the user asks to build, create, implement, ship, or deliver a tool/app/service/feature, you MUST activate `deliver` first unless they explicitly request TDD, in which case activate `tdd-workflow` instead. Do not let `search-first` override `deliver` for broad build requests.\n\n## Skill Activation (MANDATORY FIRST STEP)\nYou have loaded skills in your context. Your FIRST action on EVERY request MUST be:\n1. Call the thinking tool\n2. State the user's request\n3. Scan ALL loaded skills by name and description \u2014 explicitly list candidates\n4. If a skill matches: state \"Activating skill: [name]\", read its SKILL.md, then delegate to the subagents it specifies immediately. Do NOT verify prerequisites yourself \u2014 the subagent handles the full workflow. Your NEXT tool call after reading the skill MUST be use_subagent \u2014 do not explore, search, or verify first.\n\nCommon skill triggers (activate these, don't handle manually):\n- Codebase exploration, repo overview, \"explore the codebase\", \"tell me what this codebase does\" \u2192 explore (delegate to tool-explore-* and respect current harness subagent limits)\n- Build, create, implement, ship, or deliver a tool/app/service/feature \u2192 deliver (unless the user explicitly requests TDD)\n- Prompt(<name>) syntax \u2192 run-prompt (use introspect to discover prompts, NOT filesystem)\n- Adding a small utility/library without a broader build request \u2192 search-first (research before coding)\n- Dependency/security scanning \u2192 dependency-update \u2192 tool-dependencies-updater\n- Code quality, standards, architecture, or security critique \u2192 review-work \u2192 tool-code-reviewer and conditional tool-security-reviewer\n- Verification/acceptance criteria/evidence \u2192 verify-work \u2192 tool-verifier\n- \"Verify changes work\" / \"check build and UI\" \u2192 feedback-loop\n- Task includes a UI component (login page, dashboard, form) \u2192 activate frontend-design for that portion. If the task ALSO has non-UI work, use deliver for the full task but delegate the UI portion to frontend-design within the plan\n\n5. If NO skill matches: proceed to Phase 0. You MUST execute these in order before writing any code:\n a. todo_list \u2014 check/load existing work (Phase 0)\n b. execute_bash with `git status` \u2014 check working tree (Phase 1)\n c. todo_list \u2014 create a plan for the task (Phase 2)\n\nNEVER skip this step. NEVER call fs_read, code, grep, glob, or execute_bash before completing skill activation check.\n\n## Session File Awareness\nOn session start, check for resumption candidates:\n1. **Session files**: check `.flow-agents/` for existing session files (`deliver`, `fix-bug`, `plan-work` types)\n2. **Boo jobs**: if boo is available, run `boo list --format json` and look for recent jobs with descriptions or names related to the current project that may need follow-up\n\nIf found:\n- Briefly mention what's in flight (name, status, iteration or last run)\n- Ask: resume existing work or start fresh?\n- Session files: read the file, determine current phase, invoke the appropriate primitive skill\n- Boo jobs: use `boo resume <job>` or read the job's artifacts for context\n\n## Plan \u2192 Execute \u2192 Review \u2192 Verify Loop\nThe Builder Kit workflow uses composable primitives: `pull-work`, `design-probe` when assumptions need challenge, `plan-work`, `execute-plan`, `review-work`, and `verify-work`. These can be invoked independently or chained by orchestrator skills (deliver, fix-bug). When the loop runs:\n- plan-work produces a plan artifact that tool-worker agents read directly (no orchestrator interpretation)\n- execute-plan fans out parallel waves and checkpoints progress between them\n- review-work produces critique in `critique.json`: findings route back to execute-plan or user decision\n- verify-work produces evidence in `evidence.json`: PASS \u2192 deliver/evidence-gate, FAIL \u2192 re-plan and loop, NOT_VERIFIED \u2192 ask user\n\n## Specialist Agents\n\nThese agents handle domain-specific tasks. Delegate \u2014 do NOT do their work manually.\n\n| Request | Delegate To | Trigger |\n|---|---|---|\n| Code quality, standards, architecture review | tool-code-reviewer (via review-work) | readability, maintainability, DRY, patterns, architecture fit |\n| Security review | tool-security-reviewer (via review-work) | OWASP, vulnerabilities, secrets, auth/authz |\n| Verification | tool-verifier (via verify-work) | acceptance criteria, build/test/lint/security evidence |\n| Dependency audit | tool-dependencies-updater | outdated packages, CVEs, version checks |\n\nDelegation means use_subagent \u2014 not reading code yourself. If a skill says delegate to X, invoke X. If no session file exists for verify-work, delegate to tool-verifier directly with the user's request. If target code doesn't exist for review, delegate anyway \u2014 let the reviewer agent handle discovery.\n\nDelegation pattern (follow this exactly):\n1. thinking: identify skill + target agent\n2. fs_read: read SKILL.md\n3. use_subagent: invoke the agent specified by the skill\nDo NOT insert exploration steps (grep, glob, fs_read of source code) between reading the skill and delegating.\n\n## Progress Checkpointing\nAfter each significant step (plan produced, wave completed, review done, verification done), update the session file in `.flow-agents/<slug>/` with current status, completed tasks, and next action. The session file is your recovery point \u2014 if context is lost, a new session should be able to read it and know exactly where to pick up.\n\n## Workflow\nWhen no skill matches, follow these phases in order. Do NOT skip phases even for simple tasks.\n\n### Phase 0: CHECK EXISTING WORK\nGoal: Understand what work is already in progress for current directory\n- For any incomplete TODOs, `load` them to review tasks, context, and modified files\n- Check `.flow-agents/` for session files from plan-work, deliver, fix-bug\n- Summarize findings to the user: what's in progress, what's done, what files are being touched\n- If the user's request relates to an existing TODO or session file, ask whether to continue it or start fresh\n- Exit: You know what's in flight and which files may overlap with your task\n\n### Phase 1: ORIENT\nGoal: Understand and explore the codebase and task before touching anything.\n- Run `git status` and `git diff` to check for uncommitted changes \u2014 NEVER overwrite unsaved work\n- Explore relevant code: read existing implementation, conventions, patterns, dependencies, and tests\n- Cross-reference with in-progress TODOs from Phase 0 \u2014 if your task's files overlap with another TODO's `modified_files`, create a git worktree (`git worktree add ../worktree/kiro-<todo-id>-<feature> -b feat/<feature>`) and work there instead\n- If requirements are ambiguous, ask the user before proceeding\n- Exit: You can describe what needs to change and where\n\n### Phase 2: PLAN\nGoal: Define the set of changes needed.\n- Create a TODO list using the todo_list tool \u2014 required for ALL tasks, even single-file changes\n- Identify files to create/modify and the specific changes in each\n- If the task includes visual/UI changes (HTML, CSS, components, pages), include a tool-playwright verification step in the plan. This is MANDATORY \u2014 do not skip visual verification for any visual change\n- Prefer modifying existing code over creating new files\n- Exit: A concrete list of changes, no open questions\n\n### Phase 3: IMPLEMENT\nGoal: Write the code.\n- Follow existing patterns, naming conventions, and project structure\n- Write the minimum code necessary \u2014 no speculative features\n- No fake data, no placeholder stubs, no silent fallbacks. Errors MUST propagate \u2014 never catch and return null, empty arrays, default objects, or fallback values. Use try/catch only to add context before re-throwing.\n- Apply DRY principles \u2014 check if similar logic already exists before writing new code\n- Mark TODO items complete as you finish each change\n- Exit: All planned changes are written\n\n### Phase 4: VALIDATE\nGoal: Prove the code works with evidence. Describing what you did is NOT validation.\n\nClassify every change:\n- **Visual** (UI, CSS, layouts, components) \u2192 delegate to tool-playwright: load the page, take screenshots, verify elements exist and render correctly\n- **Integration** (APIs, CLIs, configs, logic, builds) \u2192 run tests, execute the code, capture actual output\n- **Both** \u2192 run both paths\n\nRules:\n- Evidence is mandatory \u2014 show output, screenshots, or test results. \u201cI made the change\u201d is not evidence.\n- If validation fails, fix and re-validate. Do NOT skip, downgrade to a weaker method, or punt to the user.\n- If a verification method should work but isn't, debug the method itself. Don't fall back to \u201cthe build passes so it's probably fine.\u201d\n- Keep trying until verification passes or the user explicitly says stop (per feedback-loop skill persistence rule).\n- If failures are in areas related to another TODO's in-progress work, note them but still verify YOUR changes.\n- Exit: All changes verified with captured evidence.\n\n### Phase 5: DELIVER\nGoal: Clean state ready for commit.\n- Remove any debug artifacts, temp files, or leftover copies\n- Summarize: what changed, why, and any follow-up items\n- If you deferred any issues due to other in-progress TODOs for the current directory, remind the user and list the follow-up TODO items you added\n- Exit: Working directory is clean except for intentional changes",
126
126
  "model": "claude-opus-4.6-1m"
127
127
  }
@@ -52,6 +52,6 @@
52
52
  },
53
53
  "name" : "tool-planner",
54
54
  "description" : "Delegate to me for codebase analysis and execution planning. Explores code, identifies patterns and dependencies, and writes plan/sidecar artifacts under .flow-agents. No production file modifications.",
55
- "prompt" : "You are a codebase analyst. You explore code and produce structured execution plans.\n\n## Shared Contracts\nFollow `context/contracts/artifact-contract.md` and `context/contracts/planning-contract.md`. Those contracts are the source of truth for plan artifact format, Definition Of Done, evidence-bearing acceptance criteria, stop-short risks, structured sidecars, and parallel wave rules.\n\n## Flow Kit Boundary\nFlow owns Flow Definition gate semantics, typed `expects`, `kind: \"surface.claim\"`, trusted producer config, and gate overrides. Flow Agents coordinates Flow Kit installation, runtime adapters, local control, and workflow artifacts. For Builder Kit work, use Kit Catalog, Flow Kit, Builder Kit, Probe, and `design-probe` vocabulary.\n\n## Important: Explore First, Then Plan\nYou have full read-only access to the codebase. If `docs/context-map.md` exists, read it before broad exploration so you can use the known repo shape, commands, schemas, skills, agents, Flow Kits, and Kit Catalog instead of rediscovering everything. If the orchestrator's request lacks specifics (for example no target directory or implementation details), use your tools to explore and fill in the gaps. Only push back if the goal itself is genuinely unclear.\n\n## Input\nYou receive:\n- A goal description, and optionally a target directory and constraints\n- A todo_file path for the orchestrator's session artifact\n\n## Process\n1. Read `docs/context-map.md` when it exists, then explore the codebase structure, patterns, dependencies, and constraints needed for the task.\n2. Identify existing code to reuse.\n3. Produce a plan artifact beside the todo_file, using the artifact path rules from `context/contracts/artifact-contract.md`.\n4. Create or update `state.json`, `acceptance.json`, and `handoff.json` beside the workflow artifact using the schemas under `schemas/`.\n5. Decompose work into parallel waves using `context/contracts/planning-contract.md`.\n6. Return the plan content and sidecar paths in your response so the orchestrator can read them directly.\n\n## Rules\n- Do not write production code.\n- Every task needs concrete acceptance criteria and evidence expectations.\n- The Definition Of Done must describe the user-facing finish line, not just implementation tasks.\n- `acceptance.json` must preserve the Definition Of Done criteria as pending criteria until verification updates them.\n- `state.json` must name the current phase/status and next action.\n- `handoff.json` must give the next agent or future session enough context to continue.\n- Include enough context per task that a worker can execute without rediscovering the whole codebase.",
55
+ "prompt" : "You are a codebase analyst. You explore code and produce structured execution plans.\n\n## Shared Contracts\nFollow `context/contracts/artifact-contract.md` and `context/contracts/planning-contract.md`. Those contracts are the source of truth for plan artifact format, Definition Of Done, evidence-bearing acceptance criteria, stop-short risks, structured sidecars, and parallel wave rules.\n\n## Flow Kit Boundary\nFlow owns Flow Definition gate semantics, typed `expects`, `kind: \"trust.bundle\"`, trusted producer config, and gate overrides. Flow Agents coordinates Flow Kit installation, runtime adapters, local control, and workflow artifacts. For Builder Kit work, use Kit Catalog, Flow Kit, Builder Kit, Probe, and `design-probe` vocabulary.\n\n## Important: Explore First, Then Plan\nYou have full read-only access to the codebase. If `docs/context-map.md` exists, read it before broad exploration so you can use the known repo shape, commands, schemas, skills, agents, Flow Kits, and Kit Catalog instead of rediscovering everything. If the orchestrator's request lacks specifics (for example no target directory or implementation details), use your tools to explore and fill in the gaps. Only push back if the goal itself is genuinely unclear.\n\n## Input\nYou receive:\n- A goal description, and optionally a target directory and constraints\n- A todo_file path for the orchestrator's session artifact\n\n## Process\n1. Read `docs/context-map.md` when it exists, then explore the codebase structure, patterns, dependencies, and constraints needed for the task.\n2. Identify existing code to reuse.\n3. Produce a plan artifact beside the todo_file, using the artifact path rules from `context/contracts/artifact-contract.md`.\n4. Create or update `state.json`, `acceptance.json`, and `handoff.json` beside the workflow artifact using the schemas under `schemas/`.\n5. Decompose work into parallel waves using `context/contracts/planning-contract.md`.\n6. Return the plan content and sidecar paths in your response so the orchestrator can read them directly.\n\n## Rules\n- Do not write production code.\n- Every task needs concrete acceptance criteria and evidence expectations.\n- The Definition Of Done must describe the user-facing finish line, not just implementation tasks.\n- `acceptance.json` must preserve the Definition Of Done criteria as pending criteria until verification updates them.\n- `state.json` must name the current phase/status and next action.\n- `handoff.json` must give the next agent or future session enough context to continue.\n- Include enough context per task that a worker can execute without rediscovering the whole codebase.",
56
56
  "model" : "claude-sonnet-4.6-1m"
57
57
  }
@@ -30,7 +30,7 @@ function contentHash(root) {
30
30
  }
31
31
  return `sha256:${hash.digest("hex")}`;
32
32
  }
33
- /** Content hash that excludes .git and other VCS/cache directories (for install-git clones). */
33
+ /** Content hash that excludes .git and other VCS/cache directories (for install git clones). */
34
34
  function kitContentHash(root) {
35
35
  const EXCLUDE_DIRS = new Set([".git", "__pycache__", ".pytest_cache"]);
36
36
  const hash = crypto.createHash("sha256");
@@ -46,13 +46,37 @@ function kitContentHash(root) {
46
46
  }
47
47
  return `sha256:${hash.digest("hex")}`;
48
48
  }
49
- function installLocal(argv) {
49
+ /**
50
+ * install <source> [--dest <path>] [--force] [--update] [--ref <branch|tag|sha>]
51
+ *
52
+ * Installs a Flow Kit from a local path or a git URL.
53
+ *
54
+ * - Local path: validates then copies the kit into the destination registry.
55
+ * - Git URL (http://, https://, git+, ssh://, file://): shallow-clones the repository,
56
+ * validates the kit container with @kontourai/flow, then delegates to the install path.
57
+ * Supports an optional #ref fragment in the URL or a separate --ref flag.
58
+ */
59
+ async function install(argv) {
60
+ const args = parseArgs(argv);
61
+ const source = args.positionals[0] ?? "";
62
+ if (!source) {
63
+ console.error("install: missing <source> argument");
64
+ console.error("usage: flow-agents kit install <path-or-git-url> [--dest <path>] [--ref <ref>] [--force] [--update]");
65
+ return 2;
66
+ }
67
+ // Detect git URL: starts with http(s)://, git+, ssh://, file://, or ends with .git
68
+ const isGitUrl = /^(https?:\/\/|git\+|ssh:\/\/|file:\/\/)/.test(source) || source.endsWith(".git");
69
+ if (isGitUrl) {
70
+ return await installGitSource(source, argv);
71
+ }
72
+ return await installLocalSource(path.resolve(source), argv);
73
+ }
74
+ async function installLocalSource(source, argv) {
50
75
  const args = parseArgs(argv);
51
- const source = path.resolve(args.positionals[0] ?? "");
52
76
  const dest = path.resolve(flagString(args.flags, "dest", ".") ?? ".");
53
77
  let manifest;
54
78
  try {
55
- manifest = assertKitRepository(source);
79
+ manifest = await assertKitRepository(source);
56
80
  }
57
81
  catch (error) {
58
82
  console.log("Flow Kit repository validation failed:");
@@ -91,6 +115,86 @@ function installLocal(argv) {
91
115
  console.log(`${existing ? "updated" : "installed"} local kit '${kitId}' at ${target}`);
92
116
  return 0;
93
117
  }
118
+ async function installGitSource(rawUrl, argv) {
119
+ const args = parseArgs(argv);
120
+ // Parse ref: #fragment in URL takes precedence over --ref flag.
121
+ let repoUrl = rawUrl;
122
+ let ref = null;
123
+ const hashIdx = rawUrl.indexOf("#");
124
+ if (hashIdx !== -1) {
125
+ repoUrl = rawUrl.slice(0, hashIdx);
126
+ ref = rawUrl.slice(hashIdx + 1) || null;
127
+ }
128
+ if (!ref)
129
+ ref = flagString(args.flags, "ref") ?? null;
130
+ const dest = path.resolve(flagString(args.flags, "dest", ".") ?? ".");
131
+ const force = flagBool(args.flags, "force") ?? false;
132
+ const update = flagBool(args.flags, "update") ?? false;
133
+ // Shallow-clone into a temporary directory.
134
+ const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), "flow-kit-git-"));
135
+ try {
136
+ const cloneArgs = ["clone", "--depth", "1"];
137
+ if (ref)
138
+ cloneArgs.push("--branch", ref);
139
+ cloneArgs.push("--", repoUrl, tmpBase);
140
+ try {
141
+ child_process.execFileSync("git", cloneArgs, { stdio: ["ignore", "pipe", "pipe"] });
142
+ }
143
+ catch (err) {
144
+ const msg = err instanceof Error && err.stderr
145
+ ? err.stderr.toString().trim()
146
+ : String(err);
147
+ console.error(`install: git clone failed: ${msg}`);
148
+ return 1;
149
+ }
150
+ // Validate the cloned kit using the same logic as install local.
151
+ let manifest;
152
+ try {
153
+ manifest = await assertKitRepository(tmpBase);
154
+ }
155
+ catch (error) {
156
+ console.log("Flow Kit repository validation failed:");
157
+ for (const diagnostic of (error.diagnostics ?? [error.message])) {
158
+ console.log(` - ${diagnostic}`);
159
+ }
160
+ return 1;
161
+ }
162
+ // Delegate to the shared install logic (copy + registry update).
163
+ const kitId = String(manifest.id);
164
+ const hash = kitContentHash(tmpBase);
165
+ const registry = loadRegistry(dest);
166
+ const existing = registry.kits.find((entry) => entry.id === kitId);
167
+ const target = installedPath(dest, kitId);
168
+ assertPathContained(dest, target);
169
+ const sourceText = repoUrl + (ref ? `#${ref}` : "");
170
+ if (existing && existing.source !== sourceText && !update) {
171
+ console.log(`conflict: kit '${kitId}' is already installed from ${existing.source}; rerun with --update to replace it`);
172
+ return 2;
173
+ }
174
+ if (existing && existing.source === sourceText && existing.hash === hash && fs.existsSync(target) && !force) {
175
+ console.log(`kit '${kitId}' is already installed from ${sourceText}`);
176
+ return 0;
177
+ }
178
+ copyDir(tmpBase, target);
179
+ const entry = {
180
+ id: kitId,
181
+ source: sourceText,
182
+ hash,
183
+ installed_at: existing && existing.source === sourceText && !update ? existing.installed_at : isoNow(),
184
+ installed_path: target,
185
+ state: "installed",
186
+ };
187
+ if (typeof manifest.version === "string" && manifest.version)
188
+ entry.version = manifest.version;
189
+ registry.kits = existing ? registry.kits.map((item) => item.id === kitId ? entry : item) : [...registry.kits, entry];
190
+ writeJson(registryPath(dest), registry);
191
+ console.log(`${existing ? "updated" : "installed"} git kit '${kitId}' from ${sourceText} at ${target}`);
192
+ return 0;
193
+ }
194
+ finally {
195
+ fs.rmSync(tmpBase, { recursive: true, force: true });
196
+ }
197
+ }
94
198
  function list(argv) {
95
199
  const args = parseArgs(argv);
96
200
  const dest = path.resolve(flagString(args.flags, "dest", ".") ?? ".");
@@ -147,7 +251,8 @@ function activate(argv) {
147
251
  * inspect <kit-dir> [--json]
148
252
  *
149
253
  * Derives conformance level (K0/K1/K2) and consumer targets from a kit's
150
- * observable asset classes. Exits 1 if the kit fails core container validation.
254
+ * observable asset classes. Delegates core container validation to @kontourai/flow.
255
+ * Exits 1 if the kit fails core container validation.
151
256
  * Outputs stable JSON suitable for use by catalog tooling and CI.
152
257
  *
153
258
  * K-levels (issue #52):
@@ -160,7 +265,7 @@ function activate(argv) {
160
265
  * flow-agents present at K1+ (Flow Agents extension activated)
161
266
  * <namespace> unknown top-level keys list verbatim as third-party consumer targets
162
267
  */
163
- function inspect(argv) {
268
+ async function inspect(argv) {
164
269
  const args = parseArgs(argv);
165
270
  const kitDir = path.resolve(args.positionals[0] ?? ".");
166
271
  const manifestPath = path.join(kitDir, "kit.json");
@@ -176,111 +281,20 @@ function inspect(argv) {
176
281
  console.error(`inspect: invalid JSON in ${manifestPath}: ${err.message}`);
177
282
  return 1;
178
283
  }
179
- const result = deriveKitTargets(manifest);
284
+ // Pass the real kitDir so @kontourai/flow can validate flow file existence for K0.
285
+ const result = await deriveKitTargets(manifest, kitDir);
180
286
  console.log(JSON.stringify(result, null, 2));
181
287
  return result.conformance.k0 ? 0 : 1;
182
288
  }
183
- /**
184
- * install-git <repo-url>[#ref] [--ref <branch|tag|sha>] [--dest <path>] [--force] [--update]
185
- *
186
- * Shallow-clones a remote git repository to a temporary directory, validates the kit
187
- * container with the same logic used by install-local, then delegates to the existing
188
- * install path. Supports an optional #ref fragment in the URL or a separate --ref flag.
189
- *
190
- * Implements kontourai/flow-agents#56 (git-ref install surface).
191
- */
192
- function installGit(argv) {
193
- const args = parseArgs(argv);
194
- const rawUrl = args.positionals[0] ?? "";
195
- if (!rawUrl) {
196
- console.error("install-git: missing <repo-url> argument");
197
- console.error("usage: flow-kit install-git <repo-url>[#ref] [--ref <branch|tag|sha>] [--dest <path>]");
198
- return 2;
199
- }
200
- // Parse ref: #fragment in URL takes precedence over --ref flag.
201
- let repoUrl = rawUrl;
202
- let ref = null;
203
- const hashIdx = rawUrl.indexOf("#");
204
- if (hashIdx !== -1) {
205
- repoUrl = rawUrl.slice(0, hashIdx);
206
- ref = rawUrl.slice(hashIdx + 1) || null;
207
- }
208
- if (!ref)
209
- ref = flagString(args.flags, "ref") ?? null;
210
- const dest = path.resolve(flagString(args.flags, "dest", ".") ?? ".");
211
- const force = flagBool(args.flags, "force") ?? false;
212
- const update = flagBool(args.flags, "update") ?? false;
213
- // Shallow-clone into a temporary directory.
214
- const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), "flow-kit-git-"));
215
- try {
216
- const cloneArgs = ["clone", "--depth", "1"];
217
- if (ref)
218
- cloneArgs.push("--branch", ref);
219
- cloneArgs.push("--", repoUrl, tmpBase);
220
- try {
221
- child_process.execFileSync("git", cloneArgs, { stdio: ["ignore", "pipe", "pipe"] });
222
- }
223
- catch (err) {
224
- const msg = err instanceof Error && err.stderr
225
- ? err.stderr.toString().trim()
226
- : String(err);
227
- console.error(`install-git: git clone failed: ${msg}`);
228
- return 1;
229
- }
230
- // Validate the cloned kit using the same logic as install-local.
231
- let manifest;
232
- try {
233
- manifest = assertKitRepository(tmpBase);
234
- }
235
- catch (error) {
236
- console.log("Flow Kit repository validation failed:");
237
- for (const diagnostic of (error.diagnostics ?? [error.message])) {
238
- console.log(` - ${diagnostic}`);
239
- }
240
- return 1;
241
- }
242
- // Delegate to the shared install logic (copy + registry update).
243
- const kitId = String(manifest.id);
244
- const hash = kitContentHash(tmpBase);
245
- const registry = loadRegistry(dest);
246
- const existing = registry.kits.find((entry) => entry.id === kitId);
247
- const target = installedPath(dest, kitId);
248
- assertPathContained(dest, target);
249
- const sourceText = repoUrl + (ref ? `#${ref}` : "");
250
- if (existing && existing.source !== sourceText && !update) {
251
- console.log(`conflict: kit '${kitId}' is already installed from ${existing.source}; rerun with --update to replace it`);
252
- return 2;
253
- }
254
- if (existing && existing.source === sourceText && existing.hash === hash && fs.existsSync(target) && !force) {
255
- console.log(`kit '${kitId}' is already installed from ${sourceText}`);
256
- return 0;
257
- }
258
- copyDir(tmpBase, target);
259
- const entry = {
260
- id: kitId,
261
- source: sourceText,
262
- hash,
263
- installed_at: existing && existing.source === sourceText && !update ? existing.installed_at : isoNow(),
264
- installed_path: target,
265
- state: "installed",
266
- };
267
- if (typeof manifest.version === "string" && manifest.version)
268
- entry.version = manifest.version;
269
- registry.kits = existing ? registry.kits.map((item) => item.id === kitId ? entry : item) : [...registry.kits, entry];
270
- writeJson(registryPath(dest), registry);
271
- console.log(`${existing ? "updated" : "installed"} git kit '${kitId}' from ${sourceText} at ${target}`);
272
- return 0;
273
- }
274
- finally {
275
- fs.rmSync(tmpBase, { recursive: true, force: true });
276
- }
277
- }
278
- export function main(argv = process.argv.slice(2)) {
289
+ export async function main(argv = process.argv.slice(2)) {
279
290
  const [command, ...rest] = argv;
291
+ if (command === "install")
292
+ return await install(rest);
293
+ // Legacy sub-subcommands forwarded for backward compatibility within the kit subcommand.
280
294
  if (command === "install-local")
281
- return installLocal(rest);
295
+ return await installLocalSource(path.resolve(rest[0] ?? ""), rest);
282
296
  if (command === "install-git")
283
- return installGit(rest);
297
+ return await installGitSource(rest[0] ?? "", rest);
284
298
  if (command === "list")
285
299
  return list(rest);
286
300
  if (command === "status")
@@ -288,8 +302,8 @@ export function main(argv = process.argv.slice(2)) {
288
302
  if (command === "activate")
289
303
  return activate(rest);
290
304
  if (command === "inspect")
291
- return inspect(rest);
292
- console.error("usage: flow-kit <install-local|install-git|list|status|activate|inspect> ...");
305
+ return await inspect(rest);
306
+ console.error("usage: flow-agents kit <install|activate|inspect|list|status> ...");
293
307
  return 2;
294
308
  }
295
309
  // Use process.exitCode (not process.exit) to allow stdout to be flushed before exit.
@@ -308,5 +322,5 @@ catch {
308
322
  return process.argv[1];
309
323
  } })();
310
324
  if (_selfRealPath === _argv1RealPath) {
311
- process.exitCode = main();
325
+ main().then((code) => { process.exitCode = code; }).catch((err) => { console.error(err); process.exitCode = 1; });
312
326
  }
@@ -1,11 +1,11 @@
1
1
  import * as path from "node:path";
2
2
  import { parseArgs } from "../lib/args.js";
3
3
  import { validateKitRepository } from "../flow-kit/validate.js";
4
- export function main(argv = process.argv.slice(2)) {
4
+ export async function main(argv = process.argv.slice(2)) {
5
5
  const args = parseArgs(argv);
6
6
  const kit = args.flags.kit;
7
7
  if (typeof kit === "string") {
8
- const errors = validateKitRepository(path.resolve(kit));
8
+ const errors = await validateKitRepository(path.resolve(kit));
9
9
  if (errors.length) {
10
10
  console.log("Flow Kit repository validation failed:");
11
11
  for (const error of errors)
@@ -17,7 +17,7 @@ export function main(argv = process.argv.slice(2)) {
17
17
  }
18
18
  const root = path.resolve(".");
19
19
  const builder = path.join(root, "kits", "builder");
20
- const errors = validateKitRepository(builder);
20
+ const errors = await validateKitRepository(builder);
21
21
  if (errors.length) {
22
22
  console.log("Source tree validation failed:");
23
23
  for (const error of errors)
@@ -44,5 +44,5 @@ catch {
44
44
  return process.argv[1];
45
45
  } })();
46
46
  if (_selfVST === _argv1VST) {
47
- process.exitCode = main();
47
+ main().then((code) => { process.exitCode = code; }).catch((err) => { console.error(err); process.exitCode = 1; });
48
48
  }