@kontourai/flow-agents 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +6 -1
- package/.github/workflows/kit-gates-demo.yml +6 -2
- package/CHANGELOG.md +33 -0
- package/CONTRIBUTING.md +30 -0
- package/agents/dev.json +1 -1
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/console-learning-projection.d.ts +1 -0
- package/build/src/cli/effective-backlog-settings.d.ts +1 -0
- package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
- package/build/src/cli/init.d.ts +17 -0
- package/build/src/cli/kit.d.ts +1 -0
- package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
- package/build/src/cli/publish-change-helper.d.ts +1 -0
- package/build/src/cli/pull-work-provider.d.ts +1 -0
- package/build/src/cli/runtime-adapter.d.ts +1 -0
- package/build/src/cli/telemetry-doctor.d.ts +1 -0
- package/build/src/cli/usage-feedback.d.ts +1 -0
- package/build/src/cli/utterance-check.d.ts +1 -0
- package/build/src/cli/validate-hook-influence.d.ts +1 -0
- package/build/src/cli/validate-source-tree.d.ts +1 -0
- package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
- package/build/src/cli/veritas-governance.d.ts +1 -0
- package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
- package/build/src/cli/workflow-sidecar.d.ts +32 -0
- package/build/src/cli/workflow-sidecar.js +119 -22
- package/build/src/cli.d.ts +2 -0
- package/build/src/flow-kit/validate.d.ts +81 -0
- package/build/src/flow-kit/validate.js +32 -1
- package/build/src/index.d.ts +5 -0
- package/build/src/index.js +36 -0
- package/build/src/lib/args.d.ts +8 -0
- package/build/src/lib/fs.d.ts +7 -0
- package/build/src/lib/workflow-learning-projection.d.ts +132 -0
- package/build/src/runtime-adapters.d.ts +18 -0
- package/build/src/tools/build-universal-bundles.d.ts +2 -0
- package/build/src/tools/build-universal-bundles.js +14 -0
- package/build/src/tools/common.d.ts +9 -0
- package/build/src/tools/filter-installed-packs.d.ts +2 -0
- package/build/src/tools/generate-context-map.d.ts +2 -0
- package/build/src/tools/validate-package.d.ts +2 -0
- package/build/src/tools/validate-source-tree.d.ts +2 -0
- package/console.telemetry.json +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +7 -7
- package/docs/developer-architecture.md +14 -0
- package/docs/kit-authoring-guide.md +99 -6
- package/docs/operating-layers.md +2 -2
- package/docs/spec/runtime-hook-surface.md +16 -1
- package/docs/veritas-integration.md +4 -4
- package/docs/workflow-eval-strategy.md +2 -2
- package/docs/workflow-usage-guide.md +1 -1
- package/evals/acceptance/test_opencode_harness.sh +18 -10
- package/evals/acceptance/test_pi_harness.sh +10 -6
- package/evals/ci/run-baseline.sh +1 -1
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +4 -4
- package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +4 -4
- package/evals/fixtures/kit-conformance-levels/k0-flows-only/flows/review.flow.json +4 -4
- package/evals/fixtures/kit-conformance-levels/k1-agent-extension/flows/build.flow.json +4 -4
- package/evals/fixtures/kit-conformance-levels/k2-with-evals/flows/synthesize.flow.json +4 -4
- package/evals/fixtures/kit-conformance-levels/third-party-extension/flows/review.flow.json +4 -4
- package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +2 -2
- package/evals/fixtures/surface-trust/artifact-absent.json +2 -2
- package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +2 -2
- package/evals/fixtures/surface-trust/missing-authority-trust-report.json +2 -2
- package/evals/fixtures/surface-trust/provider-absent.json +2 -2
- package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +2 -2
- package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +2 -2
- package/evals/integration/test_console_learning_projection.sh +1 -1
- package/evals/integration/test_goal_fit_hook.sh +144 -0
- package/evals/integration/test_hook_category_behaviors.sh +14 -0
- package/evals/integration/test_kit_conformance_levels.sh +55 -1
- package/evals/integration/test_workflow_sidecar_writer.sh +9 -9
- package/evals/run.sh +2 -0
- package/evals/static/test_library_exports.sh +85 -0
- package/evals/static/test_package.sh +3 -3
- package/evals/static/test_universal_bundles.sh +15 -0
- package/evals/static/test_workflow_skills.sh +4 -4
- package/kits/builder/flows/build.flow.json +48 -48
- package/kits/builder/flows/shape.flow.json +36 -36
- package/kits/knowledge/adapters/obsidian-store/index.js +137 -26
- package/kits/knowledge/evals/contract-suite/suite.test.js +90 -0
- package/kits/knowledge/flows/compile.flow.json +12 -12
- package/kits/knowledge/flows/consolidate.flow.json +16 -16
- package/kits/knowledge/flows/ingest.flow.json +12 -12
- package/kits/knowledge/flows/retire.flow.json +16 -16
- package/kits/knowledge/flows/store-contract.flow.json +12 -12
- package/kits/knowledge/flows/synthesize.flow.json +16 -16
- package/kits/release-evidence/flows/release-evidence.flow.json +3 -3
- package/package.json +14 -2
- package/schemas/workflow-evidence.schema.json +2 -1
- package/scripts/hooks/stop-goal-fit.js +66 -18
- package/src/cli/workflow-sidecar.ts +101 -21
- package/src/flow-kit/validate.ts +55 -1
- package/src/index.ts +53 -0
- package/src/tools/build-universal-bundles.ts +14 -0
- package/tsconfig.json +1 -0
package/.github/workflows/ci.yml
CHANGED
|
@@ -40,7 +40,9 @@ jobs:
|
|
|
40
40
|
mkdir -p .flow-cli
|
|
41
41
|
cd .flow-cli
|
|
42
42
|
printf '{"name":"flow-cli-host","private":true}\n' > package.json
|
|
43
|
-
|
|
43
|
+
# Pinned to ~1.3.0: gate evidence uses the Hachure trust.bundle format
|
|
44
|
+
# (kontourai/flow#84). flow-agents migrated surface.claim -> trust.bundle.
|
|
45
|
+
npm install --no-save @kontourai/flow@~1.3.0
|
|
44
46
|
|
|
45
47
|
- name: Install shell tools
|
|
46
48
|
run: |
|
|
@@ -216,6 +218,9 @@ jobs:
|
|
|
216
218
|
continue-on-error: true
|
|
217
219
|
run: bash evals/ci/run-baseline.sh --check flow-kit-install-git-integration
|
|
218
220
|
|
|
221
|
+
- name: Console learning projection integration
|
|
222
|
+
continue-on-error: true
|
|
223
|
+
run: bash evals/ci/run-baseline.sh --check console-learning-projection-integration
|
|
219
224
|
|
|
220
225
|
- name: Context map integration
|
|
221
226
|
continue-on-error: true
|
|
@@ -50,7 +50,9 @@ jobs:
|
|
|
50
50
|
mkdir -p .flow-cli
|
|
51
51
|
cd .flow-cli
|
|
52
52
|
printf '{"name":"flow-cli-host","private":true}\n' > package.json
|
|
53
|
-
|
|
53
|
+
# Pinned to ~1.3.0: gate evidence uses the Hachure trust.bundle format
|
|
54
|
+
# (kontourai/flow#84). flow-agents migrated surface.claim -> trust.bundle.
|
|
55
|
+
npm install --no-save @kontourai/flow@~1.3.0
|
|
54
56
|
env:
|
|
55
57
|
FLOW_CLI_ROOT: ${{ github.workspace }}/.flow-cli/node_modules/@kontourai/flow
|
|
56
58
|
|
|
@@ -113,7 +115,9 @@ jobs:
|
|
|
113
115
|
mkdir -p .flow-cli
|
|
114
116
|
cd .flow-cli
|
|
115
117
|
printf '{"name":"flow-cli-host","private":true}\n' > package.json
|
|
116
|
-
|
|
118
|
+
# Pinned to ~1.3.0: gate evidence uses the Hachure trust.bundle format
|
|
119
|
+
# (kontourai/flow#84). flow-agents migrated surface.claim -> trust.bundle.
|
|
120
|
+
npm install --no-save @kontourai/flow@~1.3.0
|
|
117
121
|
env:
|
|
118
122
|
FLOW_CLI_ROOT: ${{ github.workspace }}/.flow-cli/node_modules/@kontourai/flow
|
|
119
123
|
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,38 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.4.0](https://github.com/kontourai/flow-agents/compare/v1.3.0...v1.4.0) (2026-06-16)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* **#100:** require block reasons to reach the model ([#102](https://github.com/kontourai/flow-agents/issues/102)) ([5007c63](https://github.com/kontourai/flow-agents/commit/5007c63906aa78028477ffd2da31142ed4c3d0a8))
|
|
9
|
+
* **#99:** export the workflow sidecar writer/validator as a library ([#101](https://github.com/kontourai/flow-agents/issues/101)) ([5baa294](https://github.com/kontourai/flow-agents/commit/5baa294486b09e0e64a9fb5a029155c53775f477))
|
|
10
|
+
|
|
11
|
+
## [1.3.0](https://github.com/kontourai/flow-agents/compare/v1.2.0...v1.3.0) (2026-06-16)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
### Features
|
|
15
|
+
|
|
16
|
+
* add kit TRUST axis to inspect output — orthogonal to K-levels (issue [#79](https://github.com/kontourai/flow-agents/issues/79)) ([2a353d1](https://github.com/kontourai/flow-agents/commit/2a353d17ffb1da8b0fc23f442f52aa0676a1fabe))
|
|
17
|
+
* add TRUST axis to kit inspect — orthogonal to K-level capability (issue [#79](https://github.com/kontourai/flow-agents/issues/79)) ([02ac699](https://github.com/kontourai/flow-agents/commit/02ac699227c4071c16c936c3e01e5fd013466baf))
|
|
18
|
+
* **knowledge:** rendered-body-as-storage in Obsidian adapter ([baef40f](https://github.com/kontourai/flow-agents/commit/baef40f46f4016ba8b6c8afd1c61b91cade1de12))
|
|
19
|
+
* **knowledge:** rendered-body-as-storage in Obsidian adapter ([0a31c32](https://github.com/kontourai/flow-agents/commit/0a31c3233ee8772b000cb42dbef0a3fdc38ccf1c))
|
|
20
|
+
* migrate gate evidence from surface.claim to Hachure trust.bundle ([#97](https://github.com/kontourai/flow-agents/issues/97)) ([8ed43c4](https://github.com/kontourai/flow-agents/commit/8ed43c46c2a6887d32cd850bc8b2d97e7829f825))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
### Fixes
|
|
24
|
+
|
|
25
|
+
* **#74:** console-learning test cross-platform + un-quarantine; docs([#39](https://github.com/kontourai/flow-agents/issues/39)): live-validation rule ([89b2bdb](https://github.com/kontourai/flow-agents/commit/89b2bdb44f3fa5ea629135f7e93410eee92efb1c))
|
|
26
|
+
* **#74:** un-quarantine console-learning test — passes 12/12 on Linux CI ([#89](https://github.com/kontourai/flow-agents/issues/89)) ([371ecd2](https://github.com/kontourai/flow-agents/commit/371ecd22cbd8e80b6404cbdd2825d4a94fb6573c))
|
|
27
|
+
* **#75:** assert opencode plugin load via factory marker file ([#96](https://github.com/kontourai/flow-agents/issues/96)) ([6c09288](https://github.com/kontourai/flow-agents/commit/6c092883bc4b2fd5a893431991ab75921f8b080b))
|
|
28
|
+
* acceptance harnesses poll for all required telemetry events (canary flake [#75](https://github.com/kontourai/flow-agents/issues/75)) ([a27b4ff](https://github.com/kontourai/flow-agents/commit/a27b4ff48c88908419ef079c447d8a9930aa707a))
|
|
29
|
+
* acceptance harnesses skip (not fail) when no telemetry produced — no-provider CI ([d9cba18](https://github.com/kontourai/flow-agents/commit/d9cba180ebcec9005bcd0c7b29f2608530c8acc3))
|
|
30
|
+
* acceptance harnesses skip telemetry assertions when no provider (canary [#75](https://github.com/kontourai/flow-agents/issues/75)) ([dbd0e7b](https://github.com/kontourai/flow-agents/commit/dbd0e7b77444ed5df93fb47a59d2704460742367))
|
|
31
|
+
* acceptance harnesses wait for ALL required telemetry events, not just file existence ([d9c86c0](https://github.com/kontourai/flow-agents/commit/d9c86c0987d42ab1f6c5c411884bcf1912bd8fab))
|
|
32
|
+
* **ci:** pin @kontourai/flow to ~1.2.0 ([#95](https://github.com/kontourai/flow-agents/issues/95)) ([fd97803](https://github.com/kontourai/flow-agents/commit/fd97803c97ade926b1985c42b1693d8e9890f9f1))
|
|
33
|
+
* **knowledge:** collision-proof body delimiter in Obsidian adapter ([4e2560c](https://github.com/kontourai/flow-agents/commit/4e2560cec3b0b8c2660879d059ce29f0cc88184a))
|
|
34
|
+
* **stop-goal-fit:** invoke built validator directly; skip on env errors ([#92](https://github.com/kontourai/flow-agents/issues/92)) ([7b3d520](https://github.com/kontourai/flow-agents/commit/7b3d5208497f3cc8d4f8137d21f16408f9d2689e))
|
|
35
|
+
|
|
3
36
|
## [1.2.0](https://github.com/kontourai/flow-agents/compare/v1.1.0...v1.2.0) (2026-06-15)
|
|
4
37
|
|
|
5
38
|
|
package/CONTRIBUTING.md
CHANGED
|
@@ -45,4 +45,34 @@ Releases are automated with release-please: merges to main accumulate into a rel
|
|
|
45
45
|
- `bash evals/ci/run-baseline.sh` — deterministic CI baseline
|
|
46
46
|
- `npm run check:content-boundary` — no private/internal content leaks
|
|
47
47
|
|
|
48
|
+
## Runtime integrations must be live-validated
|
|
49
|
+
|
|
50
|
+
Static and integration evals that only assert "the artifact exists / parses as
|
|
51
|
+
JSON / the helper script runs" are **not sufficient** for generated host
|
|
52
|
+
artifacts. During the 0.3.0 program, six defects shipped green across 113+
|
|
53
|
+
assertions and were caught only by executing the artifact in (or as) its real
|
|
54
|
+
host. A new runtime integration MUST ship:
|
|
55
|
+
|
|
56
|
+
1. **Parse-gates** for every generated artifact, in its host language (e.g.
|
|
57
|
+
`node --check` for a JS plugin, `tsc` syntax check for a TS extension) — a
|
|
58
|
+
file that doesn't parse in its host helps no one, no matter how valid its
|
|
59
|
+
JSON wrapper is.
|
|
60
|
+
2. **A mechanical hook-chain execution test** — actually run the generated
|
|
61
|
+
hook/plugin handlers with realistic payloads and assert the downstream
|
|
62
|
+
effects (telemetry written, policy decision returned), not just that the
|
|
63
|
+
files are wired.
|
|
64
|
+
3. **A binary-gated live acceptance harness** — install into a temp workspace,
|
|
65
|
+
run the real host binary if present (skip cleanly if not), and assert
|
|
66
|
+
observable behavior end-to-end. See `evals/acceptance/test_opencode_harness.sh`,
|
|
67
|
+
`test_pi_harness.sh`, and `test_knowledge_kit_live.sh` for the pattern.
|
|
68
|
+
|
|
69
|
+
Integration tests must also be wired into a CI lane in `evals/ci/run-baseline.sh`
|
|
70
|
+
(and a matching `--check` step in `.github/workflows/ci.yml`) — a test that
|
|
71
|
+
runs via the `evals/run.sh` glob but is absent from the curated CI lanes gates
|
|
72
|
+
nothing. Tests that create temp dirs must canonicalize them (`pwd -P`) so
|
|
73
|
+
macOS (`/tmp` → `/private/tmp`) and Linux behave identically.
|
|
74
|
+
|
|
75
|
+
Adapters SHOULD also document fail-open vs fail-closed per policy class. See
|
|
76
|
+
`docs/spec/runtime-hook-surface.md`.
|
|
77
|
+
|
|
48
78
|
All projects are Apache-2.0.
|
package/agents/dev.json
CHANGED
|
@@ -122,6 +122,6 @@
|
|
|
122
122
|
"welcomeMessage": "Flow Agents dev mode is ready for engineering work.",
|
|
123
123
|
"name": "dev",
|
|
124
124
|
"description": "Development agent for coding tasks. Writes, modifies, and validates code following existing patterns. Delegates to specialists for domain-specific research when available.",
|
|
125
|
-
"prompt": "You are a Development Agent. You write and modify code, validate it works, and deliver clean results. Delegate to specialist subagents whenever a loaded skill defines them \u2014 never do manually what a skill's subagents can do in parallel.\n\n\u26d4 You own the code \u2014 specialists provide context.\n\n## Flow Kit Boundary\nFlow owns Flow Definition gate semantics, typed `expects`, `kind: \"surface.claim\"`, trusted producer config, and gate overrides. Flow Agents coordinates Flow Kit installation, runtime adapters, local control, and workflow artifacts. Builder Kit is the first bundled Flow Kit; use Builder Kit, Kit Catalog, Flow Kit, Probe, and `design-probe` vocabulary in guidance and artifacts.\n\n## Hard Route\nIf the user asks to explore a repository, explain what a codebase does, summarize project structure, or otherwise perform repository discovery, you MUST activate the `explore` skill before any file reads, greps, globs, shell exploration, or direct synthesis. This is a hard rule, not a preference.\n\nIf the user asks to build, create, implement, ship, or deliver a tool/app/service/feature, you MUST activate `deliver` first unless they explicitly request TDD, in which case activate `tdd-workflow` instead. Do not let `search-first` override `deliver` for broad build requests.\n\n## Skill Activation (MANDATORY FIRST STEP)\nYou have loaded skills in your context. Your FIRST action on EVERY request MUST be:\n1. Call the thinking tool\n2. State the user's request\n3. Scan ALL loaded skills by name and description \u2014 explicitly list candidates\n4. If a skill matches: state \"Activating skill: [name]\", read its SKILL.md, then delegate to the subagents it specifies immediately. Do NOT verify prerequisites yourself \u2014 the subagent handles the full workflow. Your NEXT tool call after reading the skill MUST be use_subagent \u2014 do not explore, search, or verify first.\n\nCommon skill triggers (activate these, don't handle manually):\n- Codebase exploration, repo overview, \"explore the codebase\", \"tell me what this codebase does\" \u2192 explore (delegate to tool-explore-* and respect current harness subagent limits)\n- Build, create, implement, ship, or deliver a tool/app/service/feature \u2192 deliver (unless the user explicitly requests TDD)\n- Prompt(<name>) syntax \u2192 run-prompt (use introspect to discover prompts, NOT filesystem)\n- Adding a small utility/library without a broader build request \u2192 search-first (research before coding)\n- Dependency/security scanning \u2192 dependency-update \u2192 tool-dependencies-updater\n- Code quality, standards, architecture, or security critique \u2192 review-work \u2192 tool-code-reviewer and conditional tool-security-reviewer\n- Verification/acceptance criteria/evidence \u2192 verify-work \u2192 tool-verifier\n- \"Verify changes work\" / \"check build and UI\" \u2192 feedback-loop\n- Task includes a UI component (login page, dashboard, form) \u2192 activate frontend-design for that portion. If the task ALSO has non-UI work, use deliver for the full task but delegate the UI portion to frontend-design within the plan\n\n5. If NO skill matches: proceed to Phase 0. You MUST execute these in order before writing any code:\n a. todo_list \u2014 check/load existing work (Phase 0)\n b. execute_bash with `git status` \u2014 check working tree (Phase 1)\n c. todo_list \u2014 create a plan for the task (Phase 2)\n\nNEVER skip this step. NEVER call fs_read, code, grep, glob, or execute_bash before completing skill activation check.\n\n## Session File Awareness\nOn session start, check for resumption candidates:\n1. **Session files**: check `.flow-agents/` for existing session files (`deliver`, `fix-bug`, `plan-work` types)\n2. **Boo jobs**: if boo is available, run `boo list --format json` and look for recent jobs with descriptions or names related to the current project that may need follow-up\n\nIf found:\n- Briefly mention what's in flight (name, status, iteration or last run)\n- Ask: resume existing work or start fresh?\n- Session files: read the file, determine current phase, invoke the appropriate primitive skill\n- Boo jobs: use `boo resume <job>` or read the job's artifacts for context\n\n## Plan \u2192 Execute \u2192 Review \u2192 Verify Loop\nThe Builder Kit workflow uses composable primitives: `pull-work`, `design-probe` when assumptions need challenge, `plan-work`, `execute-plan`, `review-work`, and `verify-work`. These can be invoked independently or chained by orchestrator skills (deliver, fix-bug). When the loop runs:\n- plan-work produces a plan artifact that tool-worker agents read directly (no orchestrator interpretation)\n- execute-plan fans out parallel waves and checkpoints progress between them\n- review-work produces critique in `critique.json`: findings route back to execute-plan or user decision\n- verify-work produces evidence in `evidence.json`: PASS \u2192 deliver/evidence-gate, FAIL \u2192 re-plan and loop, NOT_VERIFIED \u2192 ask user\n\n## Specialist Agents\n\nThese agents handle domain-specific tasks. Delegate \u2014 do NOT do their work manually.\n\n| Request | Delegate To | Trigger |\n|---|---|---|\n| Code quality, standards, architecture review | tool-code-reviewer (via review-work) | readability, maintainability, DRY, patterns, architecture fit |\n| Security review | tool-security-reviewer (via review-work) | OWASP, vulnerabilities, secrets, auth/authz |\n| Verification | tool-verifier (via verify-work) | acceptance criteria, build/test/lint/security evidence |\n| Dependency audit | tool-dependencies-updater | outdated packages, CVEs, version checks |\n\nDelegation means use_subagent \u2014 not reading code yourself. If a skill says delegate to X, invoke X. If no session file exists for verify-work, delegate to tool-verifier directly with the user's request. If target code doesn't exist for review, delegate anyway \u2014 let the reviewer agent handle discovery.\n\nDelegation pattern (follow this exactly):\n1. thinking: identify skill + target agent\n2. fs_read: read SKILL.md\n3. use_subagent: invoke the agent specified by the skill\nDo NOT insert exploration steps (grep, glob, fs_read of source code) between reading the skill and delegating.\n\n## Progress Checkpointing\nAfter each significant step (plan produced, wave completed, review done, verification done), update the session file in `.flow-agents/<slug>/` with current status, completed tasks, and next action. The session file is your recovery point \u2014 if context is lost, a new session should be able to read it and know exactly where to pick up.\n\n## Workflow\nWhen no skill matches, follow these phases in order. Do NOT skip phases even for simple tasks.\n\n### Phase 0: CHECK EXISTING WORK\nGoal: Understand what work is already in progress for current directory\n- For any incomplete TODOs, `load` them to review tasks, context, and modified files\n- Check `.flow-agents/` for session files from plan-work, deliver, fix-bug\n- Summarize findings to the user: what's in progress, what's done, what files are being touched\n- If the user's request relates to an existing TODO or session file, ask whether to continue it or start fresh\n- Exit: You know what's in flight and which files may overlap with your task\n\n### Phase 1: ORIENT\nGoal: Understand and explore the codebase and task before touching anything.\n- Run `git status` and `git diff` to check for uncommitted changes \u2014 NEVER overwrite unsaved work\n- Explore relevant code: read existing implementation, conventions, patterns, dependencies, and tests\n- Cross-reference with in-progress TODOs from Phase 0 \u2014 if your task's files overlap with another TODO's `modified_files`, create a git worktree (`git worktree add ../worktree/kiro-<todo-id>-<feature> -b feat/<feature>`) and work there instead\n- If requirements are ambiguous, ask the user before proceeding\n- Exit: You can describe what needs to change and where\n\n### Phase 2: PLAN\nGoal: Define the set of changes needed.\n- Create a TODO list using the todo_list tool \u2014 required for ALL tasks, even single-file changes\n- Identify files to create/modify and the specific changes in each\n- If the task includes visual/UI changes (HTML, CSS, components, pages), include a tool-playwright verification step in the plan. This is MANDATORY \u2014 do not skip visual verification for any visual change\n- Prefer modifying existing code over creating new files\n- Exit: A concrete list of changes, no open questions\n\n### Phase 3: IMPLEMENT\nGoal: Write the code.\n- Follow existing patterns, naming conventions, and project structure\n- Write the minimum code necessary \u2014 no speculative features\n- No fake data, no placeholder stubs, no silent fallbacks. Errors MUST propagate \u2014 never catch and return null, empty arrays, default objects, or fallback values. Use try/catch only to add context before re-throwing.\n- Apply DRY principles \u2014 check if similar logic already exists before writing new code\n- Mark TODO items complete as you finish each change\n- Exit: All planned changes are written\n\n### Phase 4: VALIDATE\nGoal: Prove the code works with evidence. Describing what you did is NOT validation.\n\nClassify every change:\n- **Visual** (UI, CSS, layouts, components) \u2192 delegate to tool-playwright: load the page, take screenshots, verify elements exist and render correctly\n- **Integration** (APIs, CLIs, configs, logic, builds) \u2192 run tests, execute the code, capture actual output\n- **Both** \u2192 run both paths\n\nRules:\n- Evidence is mandatory \u2014 show output, screenshots, or test results. \u201cI made the change\u201d is not evidence.\n- If validation fails, fix and re-validate. Do NOT skip, downgrade to a weaker method, or punt to the user.\n- If a verification method should work but isn't, debug the method itself. Don't fall back to \u201cthe build passes so it's probably fine.\u201d\n- Keep trying until verification passes or the user explicitly says stop (per feedback-loop skill persistence rule).\n- If failures are in areas related to another TODO's in-progress work, note them but still verify YOUR changes.\n- Exit: All changes verified with captured evidence.\n\n### Phase 5: DELIVER\nGoal: Clean state ready for commit.\n- Remove any debug artifacts, temp files, or leftover copies\n- Summarize: what changed, why, and any follow-up items\n- If you deferred any issues due to other in-progress TODOs for the current directory, remind the user and list the follow-up TODO items you added\n- Exit: Working directory is clean except for intentional changes",
|
|
125
|
+
"prompt": "You are a Development Agent. You write and modify code, validate it works, and deliver clean results. Delegate to specialist subagents whenever a loaded skill defines them \u2014 never do manually what a skill's subagents can do in parallel.\n\n\u26d4 You own the code \u2014 specialists provide context.\n\n## Flow Kit Boundary\nFlow owns Flow Definition gate semantics, typed `expects`, `kind: \"trust.bundle\"`, trusted producer config, and gate overrides. Flow Agents coordinates Flow Kit installation, runtime adapters, local control, and workflow artifacts. Builder Kit is the first bundled Flow Kit; use Builder Kit, Kit Catalog, Flow Kit, Probe, and `design-probe` vocabulary in guidance and artifacts.\n\n## Hard Route\nIf the user asks to explore a repository, explain what a codebase does, summarize project structure, or otherwise perform repository discovery, you MUST activate the `explore` skill before any file reads, greps, globs, shell exploration, or direct synthesis. This is a hard rule, not a preference.\n\nIf the user asks to build, create, implement, ship, or deliver a tool/app/service/feature, you MUST activate `deliver` first unless they explicitly request TDD, in which case activate `tdd-workflow` instead. Do not let `search-first` override `deliver` for broad build requests.\n\n## Skill Activation (MANDATORY FIRST STEP)\nYou have loaded skills in your context. Your FIRST action on EVERY request MUST be:\n1. Call the thinking tool\n2. State the user's request\n3. Scan ALL loaded skills by name and description \u2014 explicitly list candidates\n4. If a skill matches: state \"Activating skill: [name]\", read its SKILL.md, then delegate to the subagents it specifies immediately. Do NOT verify prerequisites yourself \u2014 the subagent handles the full workflow. Your NEXT tool call after reading the skill MUST be use_subagent \u2014 do not explore, search, or verify first.\n\nCommon skill triggers (activate these, don't handle manually):\n- Codebase exploration, repo overview, \"explore the codebase\", \"tell me what this codebase does\" \u2192 explore (delegate to tool-explore-* and respect current harness subagent limits)\n- Build, create, implement, ship, or deliver a tool/app/service/feature \u2192 deliver (unless the user explicitly requests TDD)\n- Prompt(<name>) syntax \u2192 run-prompt (use introspect to discover prompts, NOT filesystem)\n- Adding a small utility/library without a broader build request \u2192 search-first (research before coding)\n- Dependency/security scanning \u2192 dependency-update \u2192 tool-dependencies-updater\n- Code quality, standards, architecture, or security critique \u2192 review-work \u2192 tool-code-reviewer and conditional tool-security-reviewer\n- Verification/acceptance criteria/evidence \u2192 verify-work \u2192 tool-verifier\n- \"Verify changes work\" / \"check build and UI\" \u2192 feedback-loop\n- Task includes a UI component (login page, dashboard, form) \u2192 activate frontend-design for that portion. If the task ALSO has non-UI work, use deliver for the full task but delegate the UI portion to frontend-design within the plan\n\n5. If NO skill matches: proceed to Phase 0. You MUST execute these in order before writing any code:\n a. todo_list \u2014 check/load existing work (Phase 0)\n b. execute_bash with `git status` \u2014 check working tree (Phase 1)\n c. todo_list \u2014 create a plan for the task (Phase 2)\n\nNEVER skip this step. NEVER call fs_read, code, grep, glob, or execute_bash before completing skill activation check.\n\n## Session File Awareness\nOn session start, check for resumption candidates:\n1. **Session files**: check `.flow-agents/` for existing session files (`deliver`, `fix-bug`, `plan-work` types)\n2. **Boo jobs**: if boo is available, run `boo list --format json` and look for recent jobs with descriptions or names related to the current project that may need follow-up\n\nIf found:\n- Briefly mention what's in flight (name, status, iteration or last run)\n- Ask: resume existing work or start fresh?\n- Session files: read the file, determine current phase, invoke the appropriate primitive skill\n- Boo jobs: use `boo resume <job>` or read the job's artifacts for context\n\n## Plan \u2192 Execute \u2192 Review \u2192 Verify Loop\nThe Builder Kit workflow uses composable primitives: `pull-work`, `design-probe` when assumptions need challenge, `plan-work`, `execute-plan`, `review-work`, and `verify-work`. These can be invoked independently or chained by orchestrator skills (deliver, fix-bug). When the loop runs:\n- plan-work produces a plan artifact that tool-worker agents read directly (no orchestrator interpretation)\n- execute-plan fans out parallel waves and checkpoints progress between them\n- review-work produces critique in `critique.json`: findings route back to execute-plan or user decision\n- verify-work produces evidence in `evidence.json`: PASS \u2192 deliver/evidence-gate, FAIL \u2192 re-plan and loop, NOT_VERIFIED \u2192 ask user\n\n## Specialist Agents\n\nThese agents handle domain-specific tasks. Delegate \u2014 do NOT do their work manually.\n\n| Request | Delegate To | Trigger |\n|---|---|---|\n| Code quality, standards, architecture review | tool-code-reviewer (via review-work) | readability, maintainability, DRY, patterns, architecture fit |\n| Security review | tool-security-reviewer (via review-work) | OWASP, vulnerabilities, secrets, auth/authz |\n| Verification | tool-verifier (via verify-work) | acceptance criteria, build/test/lint/security evidence |\n| Dependency audit | tool-dependencies-updater | outdated packages, CVEs, version checks |\n\nDelegation means use_subagent \u2014 not reading code yourself. If a skill says delegate to X, invoke X. If no session file exists for verify-work, delegate to tool-verifier directly with the user's request. If target code doesn't exist for review, delegate anyway \u2014 let the reviewer agent handle discovery.\n\nDelegation pattern (follow this exactly):\n1. thinking: identify skill + target agent\n2. fs_read: read SKILL.md\n3. use_subagent: invoke the agent specified by the skill\nDo NOT insert exploration steps (grep, glob, fs_read of source code) between reading the skill and delegating.\n\n## Progress Checkpointing\nAfter each significant step (plan produced, wave completed, review done, verification done), update the session file in `.flow-agents/<slug>/` with current status, completed tasks, and next action. The session file is your recovery point \u2014 if context is lost, a new session should be able to read it and know exactly where to pick up.\n\n## Workflow\nWhen no skill matches, follow these phases in order. Do NOT skip phases even for simple tasks.\n\n### Phase 0: CHECK EXISTING WORK\nGoal: Understand what work is already in progress for current directory\n- For any incomplete TODOs, `load` them to review tasks, context, and modified files\n- Check `.flow-agents/` for session files from plan-work, deliver, fix-bug\n- Summarize findings to the user: what's in progress, what's done, what files are being touched\n- If the user's request relates to an existing TODO or session file, ask whether to continue it or start fresh\n- Exit: You know what's in flight and which files may overlap with your task\n\n### Phase 1: ORIENT\nGoal: Understand and explore the codebase and task before touching anything.\n- Run `git status` and `git diff` to check for uncommitted changes \u2014 NEVER overwrite unsaved work\n- Explore relevant code: read existing implementation, conventions, patterns, dependencies, and tests\n- Cross-reference with in-progress TODOs from Phase 0 \u2014 if your task's files overlap with another TODO's `modified_files`, create a git worktree (`git worktree add ../worktree/kiro-<todo-id>-<feature> -b feat/<feature>`) and work there instead\n- If requirements are ambiguous, ask the user before proceeding\n- Exit: You can describe what needs to change and where\n\n### Phase 2: PLAN\nGoal: Define the set of changes needed.\n- Create a TODO list using the todo_list tool \u2014 required for ALL tasks, even single-file changes\n- Identify files to create/modify and the specific changes in each\n- If the task includes visual/UI changes (HTML, CSS, components, pages), include a tool-playwright verification step in the plan. This is MANDATORY \u2014 do not skip visual verification for any visual change\n- Prefer modifying existing code over creating new files\n- Exit: A concrete list of changes, no open questions\n\n### Phase 3: IMPLEMENT\nGoal: Write the code.\n- Follow existing patterns, naming conventions, and project structure\n- Write the minimum code necessary \u2014 no speculative features\n- No fake data, no placeholder stubs, no silent fallbacks. Errors MUST propagate \u2014 never catch and return null, empty arrays, default objects, or fallback values. Use try/catch only to add context before re-throwing.\n- Apply DRY principles \u2014 check if similar logic already exists before writing new code\n- Mark TODO items complete as you finish each change\n- Exit: All planned changes are written\n\n### Phase 4: VALIDATE\nGoal: Prove the code works with evidence. Describing what you did is NOT validation.\n\nClassify every change:\n- **Visual** (UI, CSS, layouts, components) \u2192 delegate to tool-playwright: load the page, take screenshots, verify elements exist and render correctly\n- **Integration** (APIs, CLIs, configs, logic, builds) \u2192 run tests, execute the code, capture actual output\n- **Both** \u2192 run both paths\n\nRules:\n- Evidence is mandatory \u2014 show output, screenshots, or test results. \u201cI made the change\u201d is not evidence.\n- If validation fails, fix and re-validate. Do NOT skip, downgrade to a weaker method, or punt to the user.\n- If a verification method should work but isn't, debug the method itself. Don't fall back to \u201cthe build passes so it's probably fine.\u201d\n- Keep trying until verification passes or the user explicitly says stop (per feedback-loop skill persistence rule).\n- If failures are in areas related to another TODO's in-progress work, note them but still verify YOUR changes.\n- Exit: All changes verified with captured evidence.\n\n### Phase 5: DELIVER\nGoal: Clean state ready for commit.\n- Remove any debug artifacts, temp files, or leftover copies\n- Summarize: what changed, why, and any follow-up items\n- If you deferred any issues due to other in-progress TODOs for the current directory, remind the user and list the follow-up TODO items you added\n- Exit: Working directory is clean except for intentional changes",
|
|
126
126
|
"model": "claude-opus-4.6-1m"
|
|
127
127
|
}
|
package/agents/tool-planner.json
CHANGED
|
@@ -52,6 +52,6 @@
|
|
|
52
52
|
},
|
|
53
53
|
"name" : "tool-planner",
|
|
54
54
|
"description" : "Delegate to me for codebase analysis and execution planning. Explores code, identifies patterns and dependencies, and writes plan/sidecar artifacts under .flow-agents. No production file modifications.",
|
|
55
|
-
"prompt" : "You are a codebase analyst. You explore code and produce structured execution plans.\n\n## Shared Contracts\nFollow `context/contracts/artifact-contract.md` and `context/contracts/planning-contract.md`. Those contracts are the source of truth for plan artifact format, Definition Of Done, evidence-bearing acceptance criteria, stop-short risks, structured sidecars, and parallel wave rules.\n\n## Flow Kit Boundary\nFlow owns Flow Definition gate semantics, typed `expects`, `kind: \"
|
|
55
|
+
"prompt" : "You are a codebase analyst. You explore code and produce structured execution plans.\n\n## Shared Contracts\nFollow `context/contracts/artifact-contract.md` and `context/contracts/planning-contract.md`. Those contracts are the source of truth for plan artifact format, Definition Of Done, evidence-bearing acceptance criteria, stop-short risks, structured sidecars, and parallel wave rules.\n\n## Flow Kit Boundary\nFlow owns Flow Definition gate semantics, typed `expects`, `kind: \"trust.bundle\"`, trusted producer config, and gate overrides. Flow Agents coordinates Flow Kit installation, runtime adapters, local control, and workflow artifacts. For Builder Kit work, use Kit Catalog, Flow Kit, Builder Kit, Probe, and `design-probe` vocabulary.\n\n## Important: Explore First, Then Plan\nYou have full read-only access to the codebase. If `docs/context-map.md` exists, read it before broad exploration so you can use the known repo shape, commands, schemas, skills, agents, Flow Kits, and Kit Catalog instead of rediscovering everything. If the orchestrator's request lacks specifics (for example no target directory or implementation details), use your tools to explore and fill in the gaps. Only push back if the goal itself is genuinely unclear.\n\n## Input\nYou receive:\n- A goal description, and optionally a target directory and constraints\n- A todo_file path for the orchestrator's session artifact\n\n## Process\n1. Read `docs/context-map.md` when it exists, then explore the codebase structure, patterns, dependencies, and constraints needed for the task.\n2. Identify existing code to reuse.\n3. Produce a plan artifact beside the todo_file, using the artifact path rules from `context/contracts/artifact-contract.md`.\n4. Create or update `state.json`, `acceptance.json`, and `handoff.json` beside the workflow artifact using the schemas under `schemas/`.\n5. Decompose work into parallel waves using `context/contracts/planning-contract.md`.\n6. Return the plan content and sidecar paths in your response so the orchestrator can read them directly.\n\n## Rules\n- Do not write production code.\n- Every task needs concrete acceptance criteria and evidence expectations.\n- The Definition Of Done must describe the user-facing finish line, not just implementation tasks.\n- `acceptance.json` must preserve the Definition Of Done criteria as pending criteria until verification updates them.\n- `state.json` must name the current phase/status and next action.\n- `handoff.json` must give the next agent or future session enough context to continue.\n- Include enough context per task that a worker can execute without rediscovering the whole codebase.",
|
|
56
56
|
"model" : "claude-sonnet-4.6-1m"
|
|
57
57
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export declare const COLLISION_MARKER = "Recording Flow Agents telemetry";
|
|
2
|
+
/**
|
|
3
|
+
* Check whether a user-level Claude Code settings file already contains
|
|
4
|
+
* Flow Agents hook commands. If it does, print a WARNING explaining that
|
|
5
|
+
* Claude Code merges user-level and project-level settings and runs ALL
|
|
6
|
+
* matching hooks, so having flow-agents in both places causes duplicate
|
|
7
|
+
* hook execution (double telemetry, double policy enforcement).
|
|
8
|
+
*
|
|
9
|
+
* The check does NOT block the install; it is advisory only.
|
|
10
|
+
*
|
|
11
|
+
* @param userSettingsFile Path to inspect (defaults to $HOME/.claude/settings.json;
|
|
12
|
+
* overridable via FLOW_AGENTS_USER_CLAUDE_SETTINGS env var for testability).
|
|
13
|
+
* @returns true if a collision was detected, false otherwise.
|
|
14
|
+
*/
|
|
15
|
+
export declare function checkScopeCollision(userSettingsFile?: string): boolean;
|
|
16
|
+
export declare function main(argv?: string[]): Promise<number>;
|
|
17
|
+
export declare function mainDogfood(argv?: string[]): Promise<number>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): Promise<number>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): Promise<number>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): Promise<number>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): Promise<number>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv?: string[]): number;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
type AnyObj = Record<string, any>;
|
|
3
|
+
export declare const statuses: Set<string>;
|
|
4
|
+
export declare const phases: string[];
|
|
5
|
+
export declare const checkKinds: Set<string>;
|
|
6
|
+
export declare const checkStatuses: Set<string>;
|
|
7
|
+
export declare const verdicts: Set<string>;
|
|
8
|
+
export declare function writeJson(file: string, payload: AnyObj): void;
|
|
9
|
+
export declare function loadJson(file: string, fallback?: AnyObj): AnyObj;
|
|
10
|
+
export declare function appendJsonl(file: string, payload: AnyObj): void;
|
|
11
|
+
/**
|
|
12
|
+
* Validate a Hachure trust.bundle against the canonical trust-bundle schema.
|
|
13
|
+
* Returns `{ valid, errors, available }`. When the optional `hachure` dependency
|
|
14
|
+
* is not installed, validation is unavailable and this returns
|
|
15
|
+
* `{ valid: true, errors: [], available: false }` (fail-open) so callers can
|
|
16
|
+
* choose to treat unvalidated bundles as acceptable or gate on `available`.
|
|
17
|
+
* This is the same validator the sidecar writer uses for trust-backed evidence.
|
|
18
|
+
*/
|
|
19
|
+
export declare function validateTrustBundle(bundle: unknown): {
|
|
20
|
+
valid: boolean;
|
|
21
|
+
errors: string[];
|
|
22
|
+
available: boolean;
|
|
23
|
+
};
|
|
24
|
+
export declare function sidecarBase(slug: string): AnyObj;
|
|
25
|
+
export declare function validateEvidenceRef(ref: AnyObj, label: string): AnyObj;
|
|
26
|
+
export declare function normalizeEvidenceRefs(raw: unknown, label: string): AnyObj[];
|
|
27
|
+
export declare function normalizeCheck(raw: AnyObj): AnyObj;
|
|
28
|
+
export declare function writeState(dir: string, slug: string, status: string, phase: string, timestamp: string, summary: string, next?: string): void;
|
|
29
|
+
export declare function normalizeFinding(raw: AnyObj): AnyObj;
|
|
30
|
+
export declare function validateLearningCorrection(record: AnyObj): void;
|
|
31
|
+
export declare function normalizeLearning(raw: AnyObj, timestamp: string): AnyObj;
|
|
32
|
+
export {};
|
|
@@ -2,23 +2,84 @@
|
|
|
2
2
|
import * as fs from "node:fs";
|
|
3
3
|
import * as path from "node:path";
|
|
4
4
|
import { execFileSync } from "node:child_process";
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
const
|
|
5
|
+
import { createRequire } from "node:module";
|
|
6
|
+
import { fileURLToPath } from "node:url";
|
|
7
|
+
export const statuses = new Set(["new", "planning", "planned", "in_progress", "blocked", "verifying", "verified", "needs_decision", "not_verified", "failed", "delivered", "accepted", "archived"]);
|
|
8
|
+
export const phases = ["idea", "backlog", "pickup", "planning", "execution", "verification", "goal_fit", "evidence", "release", "learning", "done"];
|
|
9
|
+
export const checkKinds = new Set(["build", "types", "lint", "test", "security", "diff", "browser", "runtime", "policy", "external"]);
|
|
10
|
+
export const checkStatuses = new Set(["pass", "fail", "not_verified", "skip"]);
|
|
11
|
+
export const verdicts = new Set(["pass", "partial", "fail", "not_verified"]);
|
|
10
12
|
function now() { return new Date().toISOString().replace(/\.\d{3}Z$/, "Z"); }
|
|
11
13
|
function read(file) { return fs.readFileSync(file, "utf8"); }
|
|
12
|
-
function writeJson(file, payload) { fs.mkdirSync(path.dirname(file), { recursive: true }); fs.writeFileSync(file, `${JSON.stringify(payload, null, 2)}\n`); }
|
|
14
|
+
export function writeJson(file, payload) { fs.mkdirSync(path.dirname(file), { recursive: true }); fs.writeFileSync(file, `${JSON.stringify(payload, null, 2)}\n`); }
|
|
13
15
|
function printJson(payload) { console.log(JSON.stringify(payload).replace(/":/g, '": ').replace(/,"/g, ', "')); }
|
|
14
|
-
function loadJson(file, fallback = {}) { return fs.existsSync(file) ? JSON.parse(read(file)) : { ...fallback }; }
|
|
15
|
-
function appendJsonl(file, payload) {
|
|
16
|
+
export function loadJson(file, fallback = {}) { return fs.existsSync(file) ? JSON.parse(read(file)) : { ...fallback }; }
|
|
17
|
+
export function appendJsonl(file, payload) {
|
|
16
18
|
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
17
19
|
const line = JSON.stringify(payload, Object.keys(payload).sort()).replace(/":/g, '": ').replace(/,"/g, ', "');
|
|
18
20
|
fs.appendFileSync(file, `${line}\n`);
|
|
19
21
|
}
|
|
20
22
|
function die(message) { throw new Error(message); }
|
|
21
23
|
function slugify(value, fallback) { return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "") || fallback; }
|
|
24
|
+
// Optional Hachure trust-bundle validation. No-ops gracefully when hachure is not installed.
|
|
25
|
+
// Install hachure (^0.4.0) as an optional dependency to enable schema validation.
|
|
26
|
+
function tryLoadHachureValidator() {
|
|
27
|
+
try {
|
|
28
|
+
const _require = createRequire(import.meta.url);
|
|
29
|
+
const hachureDir = path.dirname(_require.resolve("hachure"));
|
|
30
|
+
const schemasDir = path.join(hachureDir, "schemas");
|
|
31
|
+
const Ajv = _require("ajv/dist/2020");
|
|
32
|
+
const schemas = {};
|
|
33
|
+
for (const file of fs.readdirSync(schemasDir)) {
|
|
34
|
+
if (!file.endsWith(".schema.json"))
|
|
35
|
+
continue;
|
|
36
|
+
schemas[file] = JSON.parse(fs.readFileSync(path.join(schemasDir, file), "utf8"));
|
|
37
|
+
}
|
|
38
|
+
const ajv = new Ajv({ strict: false, allErrors: true });
|
|
39
|
+
for (const [filename, schema] of Object.entries(schemas)) {
|
|
40
|
+
if (filename === "trust-bundle.schema.json")
|
|
41
|
+
continue;
|
|
42
|
+
ajv.addSchema(schema, filename);
|
|
43
|
+
}
|
|
44
|
+
const trustBundleSchema = schemas["trust-bundle.schema.json"];
|
|
45
|
+
if (!trustBundleSchema)
|
|
46
|
+
return null;
|
|
47
|
+
const validate = ajv.compile(trustBundleSchema);
|
|
48
|
+
return (bundle) => {
|
|
49
|
+
const valid = validate(bundle);
|
|
50
|
+
if (valid)
|
|
51
|
+
return { valid: true, errors: [] };
|
|
52
|
+
const errors = (validate.errors ?? []).map((err) => {
|
|
53
|
+
const loc = err.instancePath || err.schemaPath || "";
|
|
54
|
+
return `${loc} ${err.message ?? "invalid"}`.trim();
|
|
55
|
+
});
|
|
56
|
+
return { valid: false, errors };
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
let _hachureValidator;
|
|
64
|
+
function getHachureValidator() {
|
|
65
|
+
if (_hachureValidator === undefined)
|
|
66
|
+
_hachureValidator = tryLoadHachureValidator();
|
|
67
|
+
return _hachureValidator;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Validate a Hachure trust.bundle against the canonical trust-bundle schema.
|
|
71
|
+
* Returns `{ valid, errors, available }`. When the optional `hachure` dependency
|
|
72
|
+
* is not installed, validation is unavailable and this returns
|
|
73
|
+
* `{ valid: true, errors: [], available: false }` (fail-open) so callers can
|
|
74
|
+
* choose to treat unvalidated bundles as acceptable or gate on `available`.
|
|
75
|
+
* This is the same validator the sidecar writer uses for trust-backed evidence.
|
|
76
|
+
*/
|
|
77
|
+
export function validateTrustBundle(bundle) {
|
|
78
|
+
const validate = getHachureValidator();
|
|
79
|
+
if (!validate)
|
|
80
|
+
return { valid: true, errors: [], available: false };
|
|
81
|
+
return { ...validate(bundle), available: true };
|
|
82
|
+
}
|
|
22
83
|
function safeRepoIdentifier(value) {
|
|
23
84
|
const trimmed = value.trim().replace(/\.git$/, "");
|
|
24
85
|
if (!trimmed || trimmed.length > 120)
|
|
@@ -73,7 +134,7 @@ function repoIdentifier() {
|
|
|
73
134
|
}
|
|
74
135
|
return safeRepoIdentifier(path.basename(process.cwd())) || "workspace";
|
|
75
136
|
}
|
|
76
|
-
function sidecarBase(slug) {
|
|
137
|
+
export function sidecarBase(slug) {
|
|
77
138
|
return { schema_version: "1.0", task_slug: slug, repo: repoIdentifier() };
|
|
78
139
|
}
|
|
79
140
|
function parseArgs(argv) {
|
|
@@ -382,7 +443,7 @@ function hasNonEmptyString(value) {
|
|
|
382
443
|
function hasPositiveInteger(value) {
|
|
383
444
|
return Number.isInteger(value) && Number(value) >= 1;
|
|
384
445
|
}
|
|
385
|
-
function validateEvidenceRef(ref, label) {
|
|
446
|
+
export function validateEvidenceRef(ref, label) {
|
|
386
447
|
if (!["source", "command", "artifact", "provider", "external"].includes(ref.kind))
|
|
387
448
|
die(`${label} entry kind must be one of: source, command, artifact, provider, external`);
|
|
388
449
|
for (const key of Object.keys(ref))
|
|
@@ -412,7 +473,7 @@ function validateEvidenceRef(ref, label) {
|
|
|
412
473
|
die(`${label} ${ref.kind} refs require url`);
|
|
413
474
|
return ref;
|
|
414
475
|
}
|
|
415
|
-
function normalizeEvidenceRefs(raw, label) {
|
|
476
|
+
export function normalizeEvidenceRefs(raw, label) {
|
|
416
477
|
if (!Array.isArray(raw))
|
|
417
478
|
die(`${label} must be an array`);
|
|
418
479
|
return raw.map((ref) => {
|
|
@@ -423,7 +484,7 @@ function normalizeEvidenceRefs(raw, label) {
|
|
|
423
484
|
return validateEvidenceRef({ ...ref }, label);
|
|
424
485
|
});
|
|
425
486
|
}
|
|
426
|
-
function normalizeCheck(raw) {
|
|
487
|
+
export function normalizeCheck(raw) {
|
|
427
488
|
const check = { ...raw };
|
|
428
489
|
if (!check.id || !check.kind || !check.status || !check.summary)
|
|
429
490
|
die("check requires id, kind, status, and summary");
|
|
@@ -444,14 +505,32 @@ function normalizeCheck(raw) {
|
|
|
444
505
|
function normalizeSurfaceRefs(refs) {
|
|
445
506
|
if (!Array.isArray(refs))
|
|
446
507
|
die("surface_trust_refs must be an array");
|
|
508
|
+
const hachureValidate = getHachureValidator();
|
|
447
509
|
return refs.map((ref) => {
|
|
448
510
|
const keys = JSON.stringify(ref).match(/"([^"]+)":/g) ?? [];
|
|
449
511
|
for (const key of keys.map((k) => k.slice(1, -2)))
|
|
450
512
|
if (key.toLowerCase().includes("veritas"))
|
|
451
513
|
die(`unsupported field in Surface trust ref: ${key}`);
|
|
452
514
|
const out = { ...ref };
|
|
453
|
-
|
|
454
|
-
|
|
515
|
+
// trust.bundle is the canonical Hachure-aligned artifact kind; TrustReport/Trust Snapshot are legacy aliases
|
|
516
|
+
if (!["trust.bundle", "TrustReport", "Trust Snapshot"].includes(out.artifact_kind))
|
|
517
|
+
die("artifact_kind must be one of: trust.bundle, TrustReport, Trust Snapshot");
|
|
518
|
+
// When hachure is installed, validate the referenced trust artifact if it is a local file
|
|
519
|
+
if (hachureValidate && out.artifact_ref && typeof out.artifact_ref === "string" && fs.existsSync(out.artifact_ref)) {
|
|
520
|
+
try {
|
|
521
|
+
const bundle = JSON.parse(fs.readFileSync(out.artifact_ref, "utf8"));
|
|
522
|
+
const result = hachureValidate(bundle);
|
|
523
|
+
if (!result.valid) {
|
|
524
|
+
const errorSummary = result.errors.slice(0, 3).join("; ");
|
|
525
|
+
die(`trust.bundle artifact at ${out.artifact_ref} failed Hachure schema validation: ${errorSummary}`);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
catch (err) {
|
|
529
|
+
if (err instanceof Error && err.message.includes("failed Hachure schema validation"))
|
|
530
|
+
throw err;
|
|
531
|
+
// File read or parse errors are not re-thrown: the artifact_ref validation path is advisory
|
|
532
|
+
}
|
|
533
|
+
}
|
|
455
534
|
const status = deriveSurfaceStatus(out);
|
|
456
535
|
if (out.status === "pass" && status !== "pass")
|
|
457
536
|
die("surface_trust_refs contradicts Surface trust facts");
|
|
@@ -474,17 +553,18 @@ function surfaceCheckFromArtifact(file, index) {
|
|
|
474
553
|
const lower = JSON.stringify(raw).toLowerCase();
|
|
475
554
|
let ref;
|
|
476
555
|
if (lower.includes("provider") && lower.includes("absent")) {
|
|
477
|
-
ref = { artifact_kind: "
|
|
556
|
+
ref = { artifact_kind: "trust.bundle", artifact_ref: file, gate_id: "provider.unavailable", claim_type: "builder.trust.bundle", claim_status: "unknown", subject: "builder-kit", freshness: { status: "unknown", summary: "No trust provider is configured" }, authority: { producer: "unknown", summary: "No trust provider is configured" }, integrity: { status: "unknown", summary: "Unknown" }, status: "not_verified", summary: "No trust provider is configured" };
|
|
478
557
|
}
|
|
479
558
|
else if (lower.includes("artifact") && lower.includes("absent")) {
|
|
480
|
-
ref = { artifact_kind: "
|
|
559
|
+
ref = { artifact_kind: "trust.bundle", artifact_ref: file, gate_id: "artifact.unavailable", claim_type: "builder.trust.bundle", claim_status: "unknown", subject: "builder-kit", freshness: { status: "unknown", summary: "Artifact not readable" }, authority: { producer: "unknown", summary: "Artifact not readable" }, integrity: { status: "unknown", summary: "Artifact not readable" }, status: "not_verified", summary: "artifact not readable" };
|
|
481
560
|
}
|
|
482
561
|
else {
|
|
483
562
|
const claimStatus = lower.includes("rejected") ? "rejected" : "accepted";
|
|
484
563
|
const freshness = lower.includes("stale") ? "stale" : "fresh";
|
|
485
564
|
const producer = lower.includes("missing-authority") ? "unknown" : "surface-local";
|
|
486
565
|
const integrity = lower.includes("mismatch") ? "mismatch" : "matched";
|
|
487
|
-
|
|
566
|
+
// Use trust.bundle as the canonical Hachure-aligned artifact_kind for all trust-backed evidence refs
|
|
567
|
+
ref = { artifact_kind: "trust.bundle", artifact_ref: file, gate_id: "builder.trust.bundle", claim_type: "builder.trust.bundle", claim_status: claimStatus, subject: "builder-kit", freshness: { status: freshness, summary: freshness === "fresh" ? "fresh" : "not currently verifiable" }, authority: { producer, summary: producer === "unknown" ? "missing authority" : "Local Surface trust producer." }, integrity: { status: integrity, summary: integrity === "matched" ? "matched" : "integrity mismatch" } };
|
|
488
568
|
ref.status = deriveSurfaceStatus(ref);
|
|
489
569
|
ref.summary = ref.status === "pass" ? "accepted" : ref.status === "not_verified" ? "not currently verifiable" : (claimStatus === "rejected" ? "rejected" : producer === "unknown" ? "missing authority" : "integrity mismatch");
|
|
490
570
|
}
|
|
@@ -513,7 +593,7 @@ function validateAcceptanceEvidenceRefs(dir) {
|
|
|
513
593
|
normalizeEvidenceRefs(criterion.evidence_refs, `acceptance.criteria[${index}].evidence_refs`);
|
|
514
594
|
});
|
|
515
595
|
}
|
|
516
|
-
function writeState(dir, slug, status, phase, timestamp, summary, next = "continue") {
|
|
596
|
+
export function writeState(dir, slug, status, phase, timestamp, summary, next = "continue") {
|
|
517
597
|
writeJson(path.join(dir, "state.json"), { ...loadJson(path.join(dir, "state.json")), ...sidecarBase(slug), status, phase, updated_at: timestamp, artifact_paths: relArtifacts(dir), next_action: { status: next, summary } });
|
|
518
598
|
}
|
|
519
599
|
function recordEvidence(p) {
|
|
@@ -572,7 +652,7 @@ function advanceState(p) {
|
|
|
572
652
|
writeJson(path.join(dir, "handoff.json"), { ...loadJson(path.join(dir, "handoff.json")), ...sidecarBase(slug), summary: opt(p, "summary"), current_state_ref: "state.json", next_steps: [opt(p, "next-action")].filter(Boolean), blockers: [], warnings: [] });
|
|
573
653
|
return 0;
|
|
574
654
|
}
|
|
575
|
-
function normalizeFinding(raw) {
|
|
655
|
+
export function normalizeFinding(raw) {
|
|
576
656
|
if (raw.file_refs !== undefined && !Array.isArray(raw.file_refs))
|
|
577
657
|
die("file_refs must be an array");
|
|
578
658
|
return raw;
|
|
@@ -639,7 +719,7 @@ function recordRelease(p) {
|
|
|
639
719
|
writeState(dir, slug, "delivered", "release", payload.updated_at, stateSummary);
|
|
640
720
|
return 0;
|
|
641
721
|
}
|
|
642
|
-
function validateLearningCorrection(record) {
|
|
722
|
+
export function validateLearningCorrection(record) {
|
|
643
723
|
const correction = record.correction;
|
|
644
724
|
if (correction === undefined)
|
|
645
725
|
return;
|
|
@@ -682,7 +762,7 @@ function validateLearningPrevention(prevention) {
|
|
|
682
762
|
if (!["open", "completed", "accepted", "deferred", "rejected"].includes(value.status))
|
|
683
763
|
die("correction.prevention.status must be one of: open, completed, accepted, deferred, rejected");
|
|
684
764
|
}
|
|
685
|
-
function normalizeLearning(raw, timestamp) {
|
|
765
|
+
export function normalizeLearning(raw, timestamp) {
|
|
686
766
|
if (!Array.isArray(raw.source_refs))
|
|
687
767
|
die("source_refs must be an array");
|
|
688
768
|
if (!Array.isArray(raw.facts))
|
|
@@ -813,4 +893,21 @@ async function main() {
|
|
|
813
893
|
}
|
|
814
894
|
});
|
|
815
895
|
}
|
|
816
|
-
|
|
896
|
+
// Run the CLI only when executed directly, not when imported as a library.
|
|
897
|
+
// Resolve real paths to handle symlinks (e.g. /tmp -> /private/tmp on macOS) so the
|
|
898
|
+
// entry-point guard fires correctly when the module is loaded directly as a script.
|
|
899
|
+
const _selfRealPath = (() => { try {
|
|
900
|
+
return fs.realpathSync(fileURLToPath(import.meta.url));
|
|
901
|
+
}
|
|
902
|
+
catch {
|
|
903
|
+
return fileURLToPath(import.meta.url);
|
|
904
|
+
} })();
|
|
905
|
+
const _argv1RealPath = (() => { try {
|
|
906
|
+
return fs.realpathSync(process.argv[1]);
|
|
907
|
+
}
|
|
908
|
+
catch {
|
|
909
|
+
return process.argv[1];
|
|
910
|
+
} })();
|
|
911
|
+
if (_selfRealPath === _argv1RealPath) {
|
|
912
|
+
main().then((code) => process.exit(code)).catch((error) => { console.error(error instanceof Error ? error.message : String(error)); process.exit(1); });
|
|
913
|
+
}
|