@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +95 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/init.js +242 -20
  14. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  15. package/build/src/cli/verify.d.ts +1 -0
  16. package/build/src/cli/verify.js +90 -0
  17. package/build/src/cli/workflow-sidecar.d.ts +300 -8
  18. package/build/src/cli/workflow-sidecar.js +1934 -83
  19. package/build/src/cli.js +2 -3
  20. package/build/src/lib/flow-resolver.d.ts +82 -0
  21. package/build/src/lib/flow-resolver.js +237 -0
  22. package/build/src/tools/build-universal-bundles.js +34 -22
  23. package/build/src/tools/generate-context-map.js +3 -16
  24. package/build/src/tools/validate-source-tree.d.ts +1 -1
  25. package/build/src/tools/validate-source-tree.js +42 -162
  26. package/context/contracts/artifact-contract.md +10 -0
  27. package/context/contracts/delivery-contract.md +1 -0
  28. package/context/contracts/review-contract.md +1 -0
  29. package/context/contracts/verification-contract.md +2 -0
  30. package/context/gate-awareness.md +39 -0
  31. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  32. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  33. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  34. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  35. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  36. package/docs/adr/0007-skill-audit.md +1 -1
  37. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  38. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  39. package/docs/adr/0011-mcp-posture.md +100 -0
  40. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  41. package/docs/adr/0013-context-lifecycle.md +151 -0
  42. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  43. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  44. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  45. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  46. package/docs/agent-system-guidebook.md +5 -12
  47. package/docs/context-map.md +4 -10
  48. package/docs/index.md +3 -2
  49. package/docs/integrations/framework-adapter.md +19 -6
  50. package/docs/integrations/index.md +2 -2
  51. package/docs/north-star.md +4 -4
  52. package/docs/operating-layers.md +3 -3
  53. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  54. package/docs/repository-structure.md +2 -2
  55. package/docs/skills-map.md +1 -0
  56. package/docs/spec/runtime-hook-surface.md +62 -9
  57. package/docs/standards-register.md +3 -3
  58. package/docs/survey-utterance-check.md +1 -1
  59. package/docs/trust-anchor-adoption.md +197 -0
  60. package/docs/verifiable-trust.md +95 -0
  61. package/docs/veritas-integration.md +2 -2
  62. package/docs/workflow-usage-guide.md +69 -0
  63. package/evals/acceptance/DEMO-false-completion.md +144 -0
  64. package/evals/acceptance/demo-cast.sh +92 -0
  65. package/evals/acceptance/demo-false-completion.sh +72 -0
  66. package/evals/acceptance/demo-real-evidence.sh +104 -0
  67. package/evals/acceptance/demo.tape +29 -0
  68. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  69. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  70. package/evals/acceptance/prove-teeth.sh +105 -0
  71. package/evals/ci/antigaming-suite.sh +54 -0
  72. package/evals/ci/run-baseline.sh +2 -0
  73. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  75. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  77. package/evals/integration/test_builder_step_producers.sh +379 -0
  78. package/evals/integration/test_bundle_install.sh +35 -71
  79. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  80. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  81. package/evals/integration/test_checkpoint_signing.sh +489 -0
  82. package/evals/integration/test_claim_lookup.sh +352 -0
  83. package/evals/integration/test_command_log_integrity.sh +275 -0
  84. package/evals/integration/test_context_map.sh +0 -2
  85. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  86. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  87. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  88. package/evals/integration/test_flow_kit_repository.sh +2 -0
  89. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  90. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  91. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  92. package/evals/integration/test_gate_lockdown.sh +1137 -0
  93. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  94. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  95. package/evals/integration/test_goal_fit_hook.sh +69 -4
  96. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  97. package/evals/integration/test_install_merge.sh +1176 -0
  98. package/evals/integration/test_mint_attestation.sh +373 -0
  99. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  100. package/evals/integration/test_publish_delivery.sh +269 -0
  101. package/evals/integration/test_reconcile_soundness.sh +528 -0
  102. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  103. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  104. package/evals/integration/test_trust_checkpoint.sh +325 -0
  105. package/evals/integration/test_trust_reconcile.sh +293 -0
  106. package/evals/integration/test_verify_cli.sh +208 -0
  107. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  108. package/evals/lib/node.sh +0 -6
  109. package/evals/run.sh +45 -0
  110. package/evals/static/test_workflow_skills.sh +6 -13
  111. package/install.sh +0 -7
  112. package/integrations/strands-ts/README.md +25 -15
  113. package/integrations/veritas/flow-agents.adapter.json +1 -2
  114. package/kits/builder/flows/build.flow.json +59 -12
  115. package/kits/builder/kit.json +85 -15
  116. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  117. package/kits/builder/skills/deliver/SKILL.md +36 -6
  118. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  119. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  120. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  121. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  122. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  123. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  124. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  125. package/kits/knowledge/adapters/default-store/index.js +38 -0
  126. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  127. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  128. package/kits/knowledge/docs/store-contract.md +314 -0
  129. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  130. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  131. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  132. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  133. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  134. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  135. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  136. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  137. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  138. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  139. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  140. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  141. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  142. package/kits/knowledge/kit.json +51 -1
  143. package/package.json +4 -4
  144. package/packaging/conformance/README.md +10 -2
  145. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  146. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  147. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  148. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  151. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  152. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  153. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  154. package/packaging/conformance/run-conformance.js +1 -1
  155. package/scripts/README.md +2 -1
  156. package/scripts/build-universal-bundles.js +0 -1
  157. package/scripts/ci/mint-attestation.js +221 -0
  158. package/scripts/ci/trust-reconcile.js +545 -0
  159. package/scripts/hooks/config-protection.js +423 -1
  160. package/scripts/hooks/evidence-capture.js +348 -0
  161. package/scripts/hooks/lib/liveness-read.js +113 -0
  162. package/scripts/hooks/run-hook.js +6 -1
  163. package/scripts/hooks/stop-goal-fit.js +1471 -79
  164. package/scripts/hooks/workflow-steering.js +135 -5
  165. package/scripts/install-codex-home.sh +39 -0
  166. package/scripts/install-merge.js +330 -0
  167. package/src/cli/init.ts +218 -20
  168. package/src/cli/validate-workflow-artifacts.ts +18 -2
  169. package/src/cli/verify.ts +100 -0
  170. package/src/cli/workflow-sidecar.ts +2064 -77
  171. package/src/cli.ts +2 -3
  172. package/src/lib/flow-resolver.ts +284 -0
  173. package/src/tools/build-universal-bundles.ts +34 -21
  174. package/src/tools/generate-context-map.ts +3 -17
  175. package/src/tools/validate-source-tree.ts +44 -104
  176. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  177. package/build/src/tools/filter-installed-packs.js +0 -135
  178. package/packaging/packs.json +0 -49
  179. package/scripts/filter-installed-packs.js +0 -2
  180. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,61 @@
1
+ {
2
+ "id": "knowledge.glossary-sync",
3
+ "version": "1.0",
4
+ "steps": [
5
+ { "id": "collect", "next": "extract" },
6
+ { "id": "extract", "next": "diff-gate" },
7
+ { "id": "diff-gate", "next": "propose-gate" },
8
+ { "id": "propose-gate", "next": "done" },
9
+ { "id": "done", "next": null }
10
+ ],
11
+ "gates": {
12
+ "collect-gate": {
13
+ "step": "collect",
14
+ "expects": [
15
+ {
16
+ "id": "glossary-sources-resolved",
17
+ "kind": "trust.bundle",
18
+ "required": true,
19
+ "description": "The configurable glossary source list has been resolved to concrete canonical docs. Each source is a record id or a { category, prefix } selector; a source id that does not exist fails the gate (the source list is evidence — a typo must not pass silently). Auditing is opt-in: an empty source list resolves to nothing and the flow does nothing.",
20
+ "bundle_claim": {
21
+ "claimType": "knowledge.glossary-sync.collect",
22
+ "subjectType": "artifact",
23
+ "accepted_statuses": ["trusted", "accepted"]
24
+ }
25
+ }
26
+ ]
27
+ },
28
+ "diff-gate": {
29
+ "step": "diff-gate",
30
+ "expects": [
31
+ {
32
+ "id": "entries-classified-cite-source",
33
+ "kind": "trust.bundle",
34
+ "required": true,
35
+ "description": "Every term→definition entry extracted from a canonical doc (via the pluggable term extractor) is classified against the existing concept records and cites its evidence: the canonical source doc id + title, the extracted term and definition, and (for an outdated entry) the existing concept's drifted body. An entry is classified 'gap' when no concept captures the term, 'outdated' when a concept exists but its body has drifted from the canonical definition (whitespace-insensitive), or 'current' when they match. No entry is emitted without the source doc it came from. This step NEVER mutates a record — it only classifies.",
36
+ "bundle_claim": {
37
+ "claimType": "knowledge.glossary-sync.diff",
38
+ "subjectType": "artifact",
39
+ "accepted_statuses": ["trusted", "accepted"]
40
+ }
41
+ }
42
+ ]
43
+ },
44
+ "propose-gate": {
45
+ "step": "propose-gate",
46
+ "expects": [
47
+ {
48
+ "id": "gaps-and-drift-routed-through-existing-ops",
49
+ "kind": "trust.bundle",
50
+ "required": true,
51
+ "description": "Read-only by default: the flow returns the classification plan (gaps / outdated / current) and mutates nothing. When apply mode is enabled, every gap and outdated entry is enacted through the EXISTING concept-record ops with the canonical source doc as the proposer (it is the evidence for the definition): a gap is store.create(concept) then propose+apply; an outdated concept is store.propose + store.apply with the canonical definition as new_body. No new mutation path is forked — the operator's adopt/refresh decision rides the same gated propose→apply lineage every other concept mutation uses.",
52
+ "bundle_claim": {
53
+ "claimType": "knowledge.glossary-sync.propose",
54
+ "subjectType": "artifact",
55
+ "accepted_statuses": ["trusted", "accepted"]
56
+ }
57
+ }
58
+ ]
59
+ }
60
+ }
61
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "id": "knowledge.hygiene-review",
3
+ "version": "1.0",
4
+ "steps": [
5
+ { "id": "orchestrate", "next": "review-gate" },
6
+ { "id": "review-gate", "next": "done" },
7
+ { "id": "done", "next": null }
8
+ ],
9
+ "gates": {
10
+ "orchestrate-gate": {
11
+ "step": "orchestrate",
12
+ "expects": [
13
+ {
14
+ "id": "sub-flows-run-via-existing-methods",
15
+ "kind": "trust.bundle",
16
+ "required": true,
17
+ "description": "Each opted-in hygiene audit was run through its EXISTING flow-runner method and EXISTING gates — knowledge.audit-freshness, knowledge.detect-contradictions, knowledge.glossary-sync, knowledge.canonicalize-category — and its findings collected. This orchestrator reimplements NO detection logic and forks NO new gate: the freshness/contradiction/canonicalize flags arrive already vouched by their own flag-/propose-gates, and glossary entries arrive already classified by glossary-sync's diff-gate. Each audit is opt-in (mirroring the flows): an audit whose config block is omitted is skipped and surfaced as skipped, so an empty review does nothing. This step is read-only by default — like the flows it runs, it mutates no record of its own.",
18
+ "bundle_claim": {
19
+ "claimType": "knowledge.hygiene-review.orchestrate",
20
+ "subjectType": "artifact",
21
+ "accepted_statuses": ["trusted", "accepted"]
22
+ }
23
+ }
24
+ ]
25
+ },
26
+ "review-gate": {
27
+ "step": "review-gate",
28
+ "expects": [
29
+ {
30
+ "id": "proposals-route-through-existing-gates",
31
+ "kind": "trust.bundle",
32
+ "required": true,
33
+ "description": "The unified review presents every collected proposal as one of three operator decisions — adopt / retire / merge — and each proposal cites its sourceFlow plus the underlying evidence the originating flow's gate already guaranteed (no proposal is synthesized here). hygiene-review forks NO propose→approve gate of its own: the operator enacts each decision by routing it back through the source flow's EXISTING gated op — retire via knowledge.retire; adopt via a fresh capture/compile (audit-freshness refresh) or via glossary-sync apply (store.create→propose→apply for a gap, store.propose→apply for drift, canonical doc as proposer); merge via an update/recategorize. The ONLY mutation path this orchestrator can trigger is glossary apply, which it delegates verbatim to knowledge.glossary-sync's own propose→apply lineage — it never writes the store directly. Read-only by default (consume-never-fork).",
34
+ "bundle_claim": {
35
+ "claimType": "knowledge.hygiene-review.review",
36
+ "subjectType": "artifact",
37
+ "accepted_statuses": ["trusted", "accepted"]
38
+ }
39
+ }
40
+ ]
41
+ }
42
+ }
43
+ }
@@ -34,6 +34,31 @@
34
34
  "id": "knowledge.retire",
35
35
  "path": "flows/retire.flow.json",
36
36
  "description": "Retire implemented or obsolete records from the working set via gated lifecycle: identify → propose-retirement → evidence-gate → apply-or-reject. Evidence required: retirement rationale + implementedByRef (when targeting 'implemented' status) or supersededByRef (optional, for 'retired'). Rejection leaves record status byte-identical. Retired records remain fully queryable with provenance via includeRetired flag."
37
+ },
38
+ {
39
+ "id": "knowledge.audit-freshness",
40
+ "path": "flows/audit-freshness.flow.json",
41
+ "description": "Hygiene audit (optional, configurable): flag records past per-category staleness thresholds and propose archive/refresh for each. Read-only: collect → measure → flag-gate. Thresholds are per-category (dot-hierarchy longest-prefix, with optional default); a category with no threshold is skipped (opt-in). Each flag cites last-mutation (max of updated_at and latest mutation_log entry) + the threshold that fired + computed age. Forks no mutation path — the operator routes each flag through knowledge.retire (archive) or a fresh capture/compile (refresh)."
42
+ },
43
+ {
44
+ "id": "knowledge.canonicalize-category",
45
+ "path": "flows/canonicalize-category.flow.json",
46
+ "description": "Hygiene audit (optional, configurable): audit category sprawl and propose flattening/retirement. Read-only: survey → assess → propose-gate. Flags three sprawl kinds (each independently toggleable): orphan-prefix (an intermediate prefix node holding no record directly, or a deep path carrying a single record → flatten), too-many-leaves (a parent fanning out past the configured leaf budget → regroup), implemented-active (a still-active record carrying an operator-supplied implemented-marker tag → retire). Every finding cites its evidence (the metric that fired + offending category/record ids). Forks no mutation path — the operator routes each finding through knowledge.retire (retire) or an update/recategorize (flatten/regroup)."
47
+ },
48
+ {
49
+ "id": "knowledge.glossary-sync",
50
+ "path": "flows/glossary-sync.flow.json",
51
+ "description": "Hygiene sync (optional, configurable): keep the glossary (concept records) in sync with the canonical docs that define those terms. Read-only by default: collect → extract → diff-gate → propose-gate. Surveys a configurable glossary source list (record ids and/or category selectors — opt-in), extracts term→definition entries via a pluggable extractor, and classifies each against existing concepts: gap (no concept) → propose a canonical definition; outdated (concept body drifted from the canonical, whitespace-insensitive) → propose the update; current → no-op. Each entry cites its source doc. With apply=true it enacts the plan through the existing concept-record ops (create → propose → apply for gaps; propose → apply for drift) with the canonical doc as proposer — forks no mutation path."
52
+ },
53
+ {
54
+ "id": "knowledge.detect-contradictions",
55
+ "path": "flows/detect-contradictions.flow.json",
56
+ "description": "Hygiene audit (optional, configurable): compare compiled records within a category and flag conflicting assertions with BOTH record ids. Read-only: collect → compare → flag-gate. Reuses the similarity adapter to scope comparisons to records about the same thing, then a pluggable contradiction fn (default: opposing-polarity heuristic) judges each similar pair. Auditing is opt-in per category. Each flag cites both record ids + shared category + the reason that fired. Forks no mutation path — the operator routes each flag through knowledge.retire (drop the stale assertion) or a fresh capture/compile/consolidate (reconcile)."
57
+ },
58
+ {
59
+ "id": "knowledge.hygiene-review",
60
+ "path": "flows/hygiene-review.flow.json",
61
+ "description": "Hygiene orchestrator (#106 #5, closes the issue): a THIN, opt-in orchestrator over the four hygiene flows (audit-freshness, detect-contradictions, glossary-sync, canonicalize-category). orchestrate → review-gate. Runs each opted-in audit via its EXISTING flow-runner method + gates (reimplements no detection), folds their findings into one operator-facing review of proposed actions normalized to adopt / retire / merge, each citing its sourceFlow + the evidence its origin flow's gate already vouched. Read-only by default; forks NO propose→approve gate — the operator enacts each decision through the source flow's existing gated op (retire via knowledge.retire; adopt via capture/compile or glossary-sync apply; merge via update/recategorize). The only mutation it can trigger is glossary apply, delegated verbatim to knowledge.glossary-sync's propose→apply lineage (consume-never-fork)."
37
62
  }
38
63
  ],
39
64
  "docs": [
@@ -55,7 +80,7 @@
55
80
  {
56
81
  "id": "knowledge.flow-runner",
57
82
  "path": "adapters/flow-runner/index.js",
58
- "description": "Executable flow logic: capture(rawText, meta) → classified raw record; compile(rawIds[]) → compiled record with provenance links; synthesize(conceptId | topicSelector, options) → concept summary proposal with mutation gate; consolidate(snapshotId | topicSelector, options) → decision snapshot consolidation with supersede-not-delete; retire(recordId, options) → gated status lifecycle transition (active→implemented→retired) with working-set exclusion. Emits canonical telemetry events at gate points."
83
+ "description": "Executable flow logic: capture(rawText, meta) → classified raw record; compile(rawIds[]) → compiled record with provenance links; synthesize(conceptId | topicSelector, options) → concept summary proposal with mutation gate; consolidate(snapshotId | topicSelector, options) → decision snapshot consolidation with supersede-not-delete; retire(recordId, options) → gated status lifecycle transition (active→implemented→retired) with working-set exclusion; auditFreshness(options) → read-only hygiene audit returning flags (last-mutation + threshold cited) proposing archive/refresh for records past per-category staleness thresholds; canonicalizeCategory(options) → read-only hygiene audit returning category-sprawl findings (orphan-prefix/too-many-leaves/implemented-active, each citing its metric + offending ids) proposing flatten/regroup/retire; glossarySync(options) → read-only-by-default glossary hygiene that extracts term→definition entries from a configurable list of canonical docs and classifies each against existing concepts (gap/outdated/current), optionally enacting gaps+drift through the existing concept-record create→propose→apply ops; detectContradictions(options) → read-only hygiene audit comparing compiled records within a category (similarity adapter scopes the comparison; pluggable contradiction fn judges each similar pair) returning flags that cite BOTH conflicting record ids + reason. Emits canonical telemetry events at gate points."
59
84
  },
60
85
  {
61
86
  "id": "knowledge.similarity-vector",
@@ -103,6 +128,31 @@
103
128
  "id": "knowledge.entity-cards-suite",
104
129
  "path": "evals/entities/suite.test.js",
105
130
  "description": "Eval cases for person/entity cards (issue #48): AC1-AC4 — entity extraction from Attendees lines, exact-match resolution, possible-duplicate detection, merge via propose/apply/reject (union aliases+backlinks, supersede duplicate), Obsidian people/ folder rendering, and extended contract suite (person type validity) on both adapters."
131
+ },
132
+ {
133
+ "id": "knowledge.audit-freshness-suite",
134
+ "path": "evals/audit-freshness/suite.test.js",
135
+ "description": "Eval cases for audit-freshness (#106 hygiene #1): per-category threshold resolution (dot-hierarchy longest-prefix + default + opt-in skip); each flag cites last-mutation + threshold + age; boundary (age == threshold not flagged, age > threshold flagged); last-mutation derived from max(updated_at, latest mutation_log entry); retired records excluded; read-only invariant (no record mutated, no flag without evidence); proposed-action resolution; gate telemetry."
136
+ },
137
+ {
138
+ "id": "knowledge.canonicalize-category-suite",
139
+ "path": "evals/canonicalize-category/suite.test.js",
140
+ "description": "Eval cases for canonicalize-category (#106 hygiene #4): orphan-prefix detection (empty intermediate node + single-record deep path → flatten); too-many-leaves fan-out budget (boundary: count == budget not flagged, > budget flagged → regroup, lists the leaves); implemented-active (status:active + implemented-marker tag → retire, case-insensitive markers); every finding cites its metric + evidence + offending ids; each check is independently toggleable + opt-in (disabled check / empty markers → no findings); retired records excluded; read-only invariant (no record mutated); gate telemetry (survey-gate + propose-gate); module-level export."
141
+ },
142
+ {
143
+ "id": "knowledge.glossary-sync-suite",
144
+ "path": "evals/glossary-sync/suite.test.js",
145
+ "description": "Eval cases for glossary-sync (#106 hygiene #3): default term extraction from canonical docs (bold/colon glossary lines); classification — gap (no concept), outdated (concept body drifted, whitespace-insensitive), current (matches); case/space-insensitive term matching within the resolved concept category; configurable source list (record id + category selector, opt-in empty list, unknown source rejected); pluggable term extractor; read-only-by-default invariant (no record mutated without apply); apply mode enacts gaps via create→propose→apply and drift via propose→apply with the canonical doc as proposer (consume-never-fork, proposes-link + mutation-log evidence); gate telemetry."
146
+ },
147
+ {
148
+ "id": "knowledge.detect-contradictions-suite",
149
+ "path": "evals/detect-contradictions/suite.test.js",
150
+ "description": "Eval cases for detect-contradictions (#106 hygiene #2): default opposing-polarity heuristic flags affirm-vs-negate over a shared subject; agreeing records not flagged; every flag cites BOTH record ids + category + reason; similarity scoping (an empty similarity detector yields no flags); pluggable contradiction fn overrides the default verdict; opt-in category scoping; cross-category pairs never formed; each unordered pair compared at most once; retired compiled records excluded; read-only invariant; gate telemetry; module-level export."
151
+ },
152
+ {
153
+ "id": "knowledge.hygiene-review-suite",
154
+ "path": "evals/hygiene-review/suite.test.js",
155
+ "description": "Eval cases for hygiene-review (#106 hygiene #5, closes the issue): opt-in orchestration (omitted audit block is skipped; empty review does nothing); each opted-in audit runs via its existing flow-runner method and its findings are collected verbatim (no detection reimplemented); proposals are normalized to adopt/retire/merge and each cites its sourceFlow + evidence; thin-orchestrator/consume-never-fork invariants — read-only by default mutates no record, no new gate is forked, the only mutation (glossary apply:true) is delegated to glossarySync's own gated propose→apply (verified via proposes-link + mutation-log on the resulting concept); sub-flow telemetry is folded in plus hygiene-review's own orchestrate-gate + review-gate; module-level export delegates to the runner."
106
156
  }
107
157
  ],
108
158
  "skills": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kontourai/flow-agents",
3
- "version": "1.4.0",
3
+ "version": "2.0.0",
4
4
  "description": "Flow Agents — a Kontour product that applies Flow and Veritas discipline as a portable process layer inside the agent tools you already use: Claude Code, Codex, Kiro, opencode, pi, and GitHub Actions — with framework adapters (AWS Strands preview) on the same policy-engine contract.",
5
5
  "keywords": [
6
6
  "agents",
@@ -44,7 +44,6 @@
44
44
  "flow-agents-console-learning-projection": "build/src/cli.js",
45
45
  "flow-agents-context-map": "build/src/cli.js",
46
46
  "flow-agents-effective-backlog-settings": "build/src/cli.js",
47
- "flow-agents-filter-installed-packs": "build/src/cli.js",
48
47
  "flow-agents-fixture-retirement-audit": "build/src/cli.js",
49
48
  "flow-agents-promote-workflow-artifact": "build/src/cli.js",
50
49
  "flow-agents-publish-change": "build/src/cli.js",
@@ -105,7 +104,6 @@
105
104
  "context-map": "npm run build --silent && node build/src/cli.js context-map",
106
105
  "context-map:check": "npm run build --silent && node build/src/cli.js context-map --check",
107
106
  "build:bundles": "npm run build --silent && node build/src/cli.js build-bundles",
108
- "filter:packs": "npm run build --silent && node build/src/cli.js filter-installed-packs",
109
107
  "validate:package": "npm run build --silent && node build/src/cli.js validate-package",
110
108
  "workflow:sidecar": "npm run build --silent && node build/src/cli/workflow-sidecar.js",
111
109
  "workflow:validate-artifacts": "npm run build --silent && node build/src/cli/validate-workflow-artifacts.js",
@@ -126,6 +124,7 @@
126
124
  "eval": "bash evals/run.sh",
127
125
  "eval:static": "bash evals/run.sh static",
128
126
  "eval:integration": "bash evals/run.sh integration",
127
+ "trust-reconcile-verify": "npm run build && npm run eval:static",
129
128
  "eval:acceptance": "bash evals/run.sh acceptance",
130
129
  "eval:llm": "bash evals/run.sh llm",
131
130
  "eval:llm:codex": "bash evals/run.sh llm dev --runtime codex",
@@ -145,6 +144,7 @@
145
144
  "@kontourai/flow": "~1.3.0"
146
145
  },
147
146
  "optionalDependencies": {
148
- "hachure": "^0.4.0"
147
+ "hachure": "^0.4.0",
148
+ "@kontourai/surface": "^1.2.0"
149
149
  }
150
150
  }
@@ -84,12 +84,20 @@ Full payload/decision schema is documented in `docs/spec/runtime-hook-surface.md
84
84
  | `stop-goal-fit--allow-clean-cwd.json` | stop-goal-fit | stop | L1 | No warnings in clean workspace |
85
85
  | `stop-goal-fit--warn-active-delivery.json` | stop-goal-fit | stop | L1 | Warnings for active delivery without DOD/GoalFit |
86
86
  | `stop-goal-fit--block-strict-mode.json` | stop-goal-fit | stop | L2 | Exit 2 with FLOW_AGENTS_GOAL_FIT_STRICT=true |
87
+ | `stop-goal-fit--block-mode.json` | stop-goal-fit | stop | L2 | Exit 2 with FLOW_AGENTS_GOAL_FIT_MODE=block |
88
+ | `stop-goal-fit--off-mode.json` | stop-goal-fit | stop | L1 | Silent (exit 0, no stderr) with FLOW_AGENTS_GOAL_FIT_MODE=off |
87
89
  | `workflow-steering--allow-no-state.json` | workflow-steering | userPromptSubmit | L1 | Pass-through when no active workflow state |
88
90
  | `workflow-steering--inject-active-state.json` | workflow-steering | userPromptSubmit | L1 | Injects STATE hint for blocked task |
89
91
  | `workflow-steering--inject-subagent-steering.json` | workflow-steering | postToolUse | L1 | Injects EXECUTION COMPLETE hint after tool-worker |
92
+ | `workflow-steering--reground-active-prompt.json` | workflow-steering | userPromptSubmit | L1 | Re-grounds an ordinary in_progress task (not just flagged states) |
93
+ | `workflow-steering--reground-session-start.json` | workflow-steering | sessionStart | L1 | Re-grounds the active goal on SessionStart (survives compaction/resume) |
90
94
 
91
95
  Fixtures with `workspace_setup` create a temporary directory with the listed files before invoking the adapter, and clean it up afterward. The `cwd` field in those payloads is replaced with the temp directory path at runtime.
92
96
 
97
+ ### Goal-fit enforcement mode
98
+
99
+ `stop-goal-fit` enforcement is controlled by `FLOW_AGENTS_GOAL_FIT_MODE` (`block` | `warn` | `off`); the legacy `FLOW_AGENTS_GOAL_FIT_STRICT=true` is honored as an alias for `block`. The canonical engine default is `warn`, so the conformance contract stays warning-by-default. Shipped L2 runtime configs (Claude Code, Codex) set `block` by default — overridable per-operator via the env var — so the installed product enforces while the engine default and these fixtures remain warn. In `block` mode the same goal-fit gap is refused up to `FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS` (default 3) consecutive times, then released to avoid trapping the agent on an unsatisfiable goal.
100
+
93
101
  ---
94
102
 
95
103
  ## How to declare conformance
@@ -102,8 +110,8 @@ engine_contract_version: "1.0"
102
110
  runner_version: "run-conformance.js"
103
111
  test_date: 2026-06-11
104
112
  verdict: PASS
105
- fixture_count: 12
106
- fixtures_passed: 12
113
+ fixture_count: 18
114
+ fixtures_passed: 18
107
115
  gaps: [] # List any declared gaps here
108
116
  ```
109
117
 
@@ -0,0 +1,29 @@
1
+ {
2
+ "description": "evidence-capture records a command execution to the append-only log and passes through (exit 0, echoes input) — it is non-blocking by contract",
3
+ "policy_class": "evidence-capture",
4
+ "canonical_event": "postToolUse",
5
+ "conformance_level": "L2",
6
+ "hook_id": "evidence-capture",
7
+ "hook_script": "evidence-capture.js",
8
+ "payload": {
9
+ "hook_event_name": "PostToolUse",
10
+ "tool_name": "Bash",
11
+ "cwd": "__TEMP_WORKSPACE__",
12
+ "tool_input": {
13
+ "command": "npm test"
14
+ },
15
+ "tool_response": {
16
+ "exitCode": 0,
17
+ "stdout": "ok"
18
+ }
19
+ },
20
+ "workspace_setup": {
21
+ "AGENTS.md": "# Test Repo\n",
22
+ ".flow-agents/cap-task/state.json": "{\"schema_version\":\"1.0\",\"task_slug\":\"cap-task\",\"status\":\"in_progress\",\"phase\":\"verification\",\"updated_at\":\"2026-06-23T00:00:00Z\"}"
23
+ },
24
+ "expected": {
25
+ "exit_code": 0,
26
+ "stdout_echoes_input": true,
27
+ "stderr_is_empty": true
28
+ }
29
+ }
@@ -0,0 +1,29 @@
1
+ {
2
+ "description": "stop-goal-fit blocks (exit 2) when the canonical Hachure trust.bundle carries a high-impact claim Surface marked `disputed` \u2014 even on a terminal/delivered task; trust.bundle is the sole verification artifact (4c bundle-only). ADR 0010 Phase 2: the gate enforces on the bundle the producers emit, not only bespoke sidecars.",
3
+ "policy_class": "stop-goal-fit",
4
+ "canonical_event": "stop",
5
+ "conformance_level": "L2",
6
+ "hook_id": "stop-goal-fit",
7
+ "hook_script": "stop-goal-fit.js",
8
+ "payload": {
9
+ "hook_event_name": "Stop",
10
+ "cwd": "__TEMP_WORKSPACE__"
11
+ },
12
+ "workspace_setup": {
13
+ "AGENTS.md": "# Test Repo\n",
14
+ ".flow-agents/false-bundle/false-bundle--deliver.md": "# False Bundle\n\nbranch: main\nstatus: delivered\ntype: deliver\n\n## Definition Of Done\n- [x] tests pass\n\n## Goal Fit Gate\n- [x] acceptance verified\n\n### Verdict: PASS\n",
15
+ ".flow-agents/false-bundle/state.json": "{\"schema_version\":\"1.0\",\"task_slug\":\"false-bundle\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-23T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}",
16
+ ".flow-agents/false-bundle/trust.bundle": "{\"schemaVersion\":3,\"source\":\"flow-agents/workflow-sidecar\",\"claims\":[{\"id\":\"c1\",\"subjectId\":\"false-bundle/unit-tests\",\"claimType\":\"workflow.check.command\",\"fieldOrBehavior\":\"unit tests\",\"value\":\"fail\",\"impactLevel\":\"high\",\"status\":\"disputed\",\"createdAt\":\"2026-06-23T00:00:00Z\",\"updatedAt\":\"2026-06-23T00:00:00Z\"}],\"evidence\":[],\"policies\":[],\"events\":[]}"
17
+ },
18
+ "env": {
19
+ "FLOW_AGENTS_GOAL_FIT_MODE": "block",
20
+ "FLOW_AGENTS_GOAL_FIT_BACKSTOP": "skip"
21
+ },
22
+ "expected": {
23
+ "exit_code": 2,
24
+ "stderr_contains": [
25
+ "trust.bundle claim disputed",
26
+ "caught false-completion"
27
+ ]
28
+ }
29
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "description": "stop-goal-fit blocks (exit 2) when the trust.bundle claims a command check passed but the deterministic capture log (command-log.jsonl) recorded that same command as FAIL \u2014 a caught false-completion (bundle-only; 4c)",
3
+ "policy_class": "stop-goal-fit",
4
+ "canonical_event": "stop",
5
+ "conformance_level": "L2",
6
+ "hook_id": "stop-goal-fit",
7
+ "hook_script": "stop-goal-fit.js",
8
+ "payload": {
9
+ "hook_event_name": "Stop",
10
+ "cwd": "__TEMP_WORKSPACE__"
11
+ },
12
+ "workspace_setup": {
13
+ "AGENTS.md": "# Test Repo\n",
14
+ ".flow-agents/false-pass/false-pass--deliver.md": "# False Pass\n\nbranch: main\nstatus: delivered\ntype: deliver\n\n## Definition Of Done\n- [x] tests pass\n\n## Goal Fit Gate\n- [x] acceptance verified\n\n### Verdict: PASS\n",
15
+ ".flow-agents/false-pass/state.json": "{\"schema_version\":\"1.0\",\"task_slug\":\"false-pass\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-23T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}",
16
+ ".flow-agents/false-pass/command-log.jsonl": "{\"command\":\"npm test\",\"observedResult\":\"fail\",\"exitCode\":1,\"capturedAt\":\"2026-06-23T00:00:00Z\",\"source\":\"postToolUse-capture\"}\n",
17
+ ".flow-agents/false-pass/trust.bundle": "{\"schemaVersion\": 3, \"source\": \"flow-agents/workflow-sidecar\", \"claims\": [{\"id\": \"c1\", \"subjectId\": \"false-pass/unit-tests\", \"claimType\": \"workflow.check.command\", \"fieldOrBehavior\": \"unit tests\", \"value\": \"pass\", \"impactLevel\": \"high\", \"status\": \"verified\", \"createdAt\": \"2026-06-23T00:00:00Z\", \"updatedAt\": \"2026-06-23T00:00:00Z\"}], \"evidence\": [{\"id\": \"ev:c1\", \"claimId\": \"c1\", \"evidenceType\": \"test_output\", \"method\": \"validation\", \"sourceRef\": \"false-pass/command-log.jsonl\", \"excerptOrSummary\": \"unit tests\", \"observedAt\": \"2026-06-23T00:00:00Z\", \"collectedBy\": \"flow-agents/workflow-sidecar\", \"passing\": true, \"execution\": {\"runner\": \"bash\", \"label\": \"npm test\", \"isError\": false}}], \"policies\": [], \"events\": [{\"id\": \"evt:c1\", \"claimId\": \"c1\", \"status\": \"verified\", \"actor\": \"flow-agents/workflow-sidecar\", \"method\": \"validation\", \"evidenceIds\": [\"ev:c1\"], \"createdAt\": \"2026-06-23T00:00:00Z\", \"verifiedAt\": \"2026-06-23T00:00:00Z\"}]}"
18
+ },
19
+ "env": {
20
+ "FLOW_AGENTS_GOAL_FIT_MODE": "block",
21
+ "FLOW_AGENTS_GOAL_FIT_BACKSTOP": "skip"
22
+ },
23
+ "expected": {
24
+ "exit_code": 2,
25
+ "stderr_contains": [
26
+ "caught false-completion",
27
+ "npm test"
28
+ ]
29
+ }
30
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "description": "stop-goal-fit blocks (exit 2) when FLOW_AGENTS_GOAL_FIT_MODE=block for an active delivery artifact missing DOD and Goal Fit Gate",
3
+ "policy_class": "stop-goal-fit",
4
+ "canonical_event": "stop",
5
+ "conformance_level": "L2",
6
+ "hook_id": "stop-goal-fit",
7
+ "hook_script": "stop-goal-fit.js",
8
+ "payload": {
9
+ "hook_event_name": "Stop",
10
+ "cwd": "__TEMP_WORKSPACE__"
11
+ },
12
+ "workspace_setup": {
13
+ "AGENTS.md": "# Test Repo\n",
14
+ ".flow-agents/my-task/my-task--deliver.md": "# My Task\n\nbranch: main\nstatus: executing\ntype: deliver\n\n## Plan\n\nWork TBD.\n"
15
+ },
16
+ "env": {
17
+ "FLOW_AGENTS_GOAL_FIT_MODE": "block"
18
+ },
19
+ "expected": {
20
+ "exit_code": 2,
21
+ "stderr_contains": ["status:executing"]
22
+ }
23
+ }
@@ -0,0 +1,24 @@
1
+ {
2
+ "description": "stop-goal-fit stays silent (exit 0, no stderr) when FLOW_AGENTS_GOAL_FIT_MODE=off even for an active incomplete delivery artifact",
3
+ "policy_class": "stop-goal-fit",
4
+ "canonical_event": "stop",
5
+ "conformance_level": "L1",
6
+ "hook_id": "stop-goal-fit",
7
+ "hook_script": "stop-goal-fit.js",
8
+ "payload": {
9
+ "hook_event_name": "Stop",
10
+ "cwd": "__TEMP_WORKSPACE__"
11
+ },
12
+ "workspace_setup": {
13
+ "AGENTS.md": "# Test Repo\n",
14
+ ".flow-agents/my-task/my-task--deliver.md": "# My Task\n\nbranch: main\nstatus: executing\ntype: deliver\n\n## Plan\n\nWork TBD.\n"
15
+ },
16
+ "env": {
17
+ "FLOW_AGENTS_GOAL_FIT_MODE": "off"
18
+ },
19
+ "expected": {
20
+ "exit_code": 0,
21
+ "stdout_echoes_input": true,
22
+ "stderr_is_empty": true
23
+ }
24
+ }
@@ -1,5 +1,5 @@
1
1
  {
2
- "description": "stop-goal-fit warns (exit 0, stderr has warnings) for an active delivery artifact missing DOD and Goal Fit Gate",
2
+ "description": "stop-goal-fit warns (exit 0, stderr has warnings) for an active delivery artifact; validates the warn mode status signal fires without Builder heading checks",
3
3
  "policy_class": "stop-goal-fit",
4
4
  "canonical_event": "stop",
5
5
  "conformance_level": "L1",
@@ -13,9 +13,12 @@
13
13
  "AGENTS.md": "# Test Repo\n",
14
14
  ".flow-agents/my-task/my-task--deliver.md": "# My Task\n\nbranch: main\nstatus: executing\ntype: deliver\n\n## Plan\n\nWork TBD.\n"
15
15
  },
16
+ "env": {
17
+ "FLOW_AGENTS_GOAL_FIT_MODE": "warn"
18
+ },
16
19
  "expected": {
17
20
  "exit_code": 0,
18
21
  "stdout_echoes_input": true,
19
- "stderr_contains": ["status:executing", "Definition Of Done", "Goal Fit Gate"]
22
+ "stderr_contains": ["status:executing"]
20
23
  }
21
24
  }
@@ -0,0 +1,23 @@
1
+ {
2
+ "description": "stop-goal-fit emits NOT_VERIFIED — warning (exit 2 in block mode) when a delivery session has neither trust.bundle nor state.json",
3
+ "policy_class": "stop-goal-fit",
4
+ "canonical_event": "stop",
5
+ "conformance_level": "L2",
6
+ "hook_id": "stop-goal-fit",
7
+ "hook_script": "stop-goal-fit.js",
8
+ "payload": {
9
+ "hook_event_name": "Stop",
10
+ "cwd": "__TEMP_WORKSPACE__"
11
+ },
12
+ "workspace_setup": {
13
+ "AGENTS.md": "# Test Repo\n",
14
+ ".flow-agents/my-task/my-task--deliver.md": "# My Task\n\nbranch: main\nstatus: executing\ntype: deliver\n\n## Plan\n\nWork in progress — no sidecars yet.\n"
15
+ },
16
+ "env": {
17
+ "FLOW_AGENTS_GOAL_FIT_MODE": "block"
18
+ },
19
+ "expected": {
20
+ "exit_code": 2,
21
+ "stderr_contains": ["NOT_VERIFIED —", "trust.bundle"]
22
+ }
23
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "description": "workflow-steering re-grounds an ordinary in_progress task (not just flagged states) at UserPromptSubmit so an in-flight goal survives long gaps and compaction",
3
+ "policy_class": "workflow-steering",
4
+ "canonical_event": "userPromptSubmit",
5
+ "conformance_level": "L1",
6
+ "hook_id": "workflow-steering",
7
+ "hook_script": "workflow-steering.js",
8
+ "payload": {
9
+ "hook_event_name": "UserPromptSubmit",
10
+ "cwd": "__TEMP_WORKSPACE__",
11
+ "prompt": "continue"
12
+ },
13
+ "workspace_setup": {
14
+ "AGENTS.md": "# Test Repo\n",
15
+ ".flow-agents/build-feature/state.json": {
16
+ "task_slug": "build-feature",
17
+ "status": "in_progress",
18
+ "phase": "execution",
19
+ "next_action": {
20
+ "summary": "Wire the auth middleware and add tests.",
21
+ "status": "in_progress",
22
+ "target_phase": "verification"
23
+ }
24
+ }
25
+ },
26
+ "expected": {
27
+ "exit_code": 0,
28
+ "stdout_contains": ["WORKFLOW STATE:", "STATE:", "build-feature", "in_progress"]
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "description": "workflow-steering re-grounds the active goal on SessionStart (which fires after context compaction and on resume), so the goal/phase/next-step survive context loss",
3
+ "policy_class": "workflow-steering",
4
+ "canonical_event": "sessionStart",
5
+ "conformance_level": "L1",
6
+ "hook_id": "workflow-steering",
7
+ "hook_script": "workflow-steering.js",
8
+ "payload": {
9
+ "hook_event_name": "SessionStart",
10
+ "cwd": "__TEMP_WORKSPACE__",
11
+ "source": "compact"
12
+ },
13
+ "workspace_setup": {
14
+ "AGENTS.md": "# Test Repo\n",
15
+ ".flow-agents/build-feature/state.json": {
16
+ "task_slug": "build-feature",
17
+ "status": "in_progress",
18
+ "phase": "execution",
19
+ "next_action": {
20
+ "summary": "Wire the auth middleware and add tests.",
21
+ "status": "in_progress",
22
+ "target_phase": "verification"
23
+ }
24
+ }
25
+ },
26
+ "expected": {
27
+ "exit_code": 0,
28
+ "stdout_contains": ["WORKFLOW STATE:", "STATE:", "build-feature"]
29
+ }
30
+ }
@@ -35,7 +35,7 @@ const LEVEL_ORDER = ['L0', 'L1', 'L2'];
35
35
  const LEVEL_POLICY_CLASSES = {
36
36
  L0: new Set([]), // L0: telemetry only — no policy fixtures required
37
37
  L1: new Set(['workflow-steering', 'stop-goal-fit']),
38
- L2: new Set(['workflow-steering', 'stop-goal-fit', 'quality-gate', 'config-protection']),
38
+ L2: new Set(['workflow-steering', 'stop-goal-fit', 'quality-gate', 'config-protection', 'evidence-capture']),
39
39
  };
40
40
 
41
41
  // -----------------------------------------------------------------------
package/scripts/README.md CHANGED
@@ -9,7 +9,6 @@ These files are stable launchers for TypeScript code compiled under `build/src/`
9
9
  | Wrapper | Compiled implementation |
10
10
  | --- | --- |
11
11
  | `build-universal-bundles.js` | `build/src/tools/build-universal-bundles.js` |
12
- | `filter-installed-packs.js` | `build/src/tools/filter-installed-packs.js` |
13
12
  | `generate-context-map.js` | `build/src/tools/generate-context-map.js` |
14
13
  | `kit.js` | `build/src/cli/kit.js` |
15
14
  | `pull-work-provider.js` | `build/src/cli/pull-work-provider.js` |
@@ -53,6 +52,7 @@ renamed, or changes category, update the table and the validator together.
53
52
  | `codex-telemetry-hook.js` | telemetry shim | `evals/integration/test_hook_category_behaviors.sh`, `evals/integration/test_telemetry.sh` | Captures Codex hook telemetry and fails open. |
54
53
  | `run-hook.js` | hook runner | `evals/integration/test_hook_category_behaviors.sh`, `evals/integration/test_goal_fit_hook.sh`, `evals/integration/test_workflow_steering_hook.sh` | Applies profile/disable flags, traversal checks, and hook execution. |
55
54
  | `config-protection.js` | policy hook | `evals/integration/test_hook_category_behaviors.sh` | Blocks unsafe runtime config edits. |
55
+ | `evidence-capture.js` | policy hook | `evals/integration/test_evidence_capture_hook.sh` | Deterministically captures command executions to `.flow-agents/<slug>/command-log.jsonl` so evidence is machine-recorded, not model-claimed (cross-referenced by stop-goal-fit). |
56
56
  | `governance-audit.sh` | policy hook | `evals/integration/test_hook_category_behaviors.sh`, `evals/integration/test_telemetry.sh` | Emits governance/Veritas audit context when configured. |
57
57
  | `opencode-hook-adapter.js` | runtime adapter | `evals/integration/test_bundle_install.sh` | Translates opencode plugin events into the shared hook runner contract. |
58
58
  | `opencode-telemetry-hook.js` | telemetry shim | `evals/integration/test_bundle_install.sh` | Captures opencode plugin telemetry and fails open. |
@@ -69,6 +69,7 @@ renamed, or changes category, update the table and the validator together.
69
69
  | `desktop-notify.sh` | local notification helper | `evals/integration/test_hook_category_behaviors.sh` | Optional local desktop notification helper. |
70
70
  | `lib/audit-transport.sh` | shared hook library | `evals/integration/test_hook_category_behaviors.sh`, `evals/integration/test_telemetry.sh` | Shared audit event transport functions. |
71
71
  | `lib/hook-flags.js` | shared hook library | `evals/integration/test_hook_category_behaviors.sh` | Shared profile/disable flag parsing. |
72
+ | `lib/liveness-read.js` | shared hook library | `evals/integration/test_session_resume_roundtrip.sh` | Shared liveness event reader + freshness check (`readLivenessEvents`, `freshHolders`); consumed by the reground hook and `workflow-sidecar liveness status`. |
72
73
  | `lib/patterns.sh` | shared hook library | `evals/integration/test_hook_category_behaviors.sh`, `evals/integration/test_telemetry.sh` | Shared shell pattern constants. |
73
74
  | `lib/resolve-formatter.js` | shared hook library | `evals/integration/test_hook_category_behaviors.sh` | Shared formatter resolution helper. |
74
75
 
@@ -1,3 +1,2 @@
1
1
  #!/usr/bin/env node
2
- // Supports FLOW_AGENTS_PACKS through the TypeScript bundle builder.
3
2
  import("../build/src/tools/build-universal-bundles.js").then(({ main }) => process.exit(main()));