@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +95 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/init.js +242 -20
  14. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  15. package/build/src/cli/verify.d.ts +1 -0
  16. package/build/src/cli/verify.js +90 -0
  17. package/build/src/cli/workflow-sidecar.d.ts +300 -8
  18. package/build/src/cli/workflow-sidecar.js +1934 -83
  19. package/build/src/cli.js +2 -3
  20. package/build/src/lib/flow-resolver.d.ts +82 -0
  21. package/build/src/lib/flow-resolver.js +237 -0
  22. package/build/src/tools/build-universal-bundles.js +34 -22
  23. package/build/src/tools/generate-context-map.js +3 -16
  24. package/build/src/tools/validate-source-tree.d.ts +1 -1
  25. package/build/src/tools/validate-source-tree.js +42 -162
  26. package/context/contracts/artifact-contract.md +10 -0
  27. package/context/contracts/delivery-contract.md +1 -0
  28. package/context/contracts/review-contract.md +1 -0
  29. package/context/contracts/verification-contract.md +2 -0
  30. package/context/gate-awareness.md +39 -0
  31. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  32. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  33. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  34. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  35. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  36. package/docs/adr/0007-skill-audit.md +1 -1
  37. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  38. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  39. package/docs/adr/0011-mcp-posture.md +100 -0
  40. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  41. package/docs/adr/0013-context-lifecycle.md +151 -0
  42. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  43. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  44. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  45. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  46. package/docs/agent-system-guidebook.md +5 -12
  47. package/docs/context-map.md +4 -10
  48. package/docs/index.md +3 -2
  49. package/docs/integrations/framework-adapter.md +19 -6
  50. package/docs/integrations/index.md +2 -2
  51. package/docs/north-star.md +4 -4
  52. package/docs/operating-layers.md +3 -3
  53. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  54. package/docs/repository-structure.md +2 -2
  55. package/docs/skills-map.md +1 -0
  56. package/docs/spec/runtime-hook-surface.md +62 -9
  57. package/docs/standards-register.md +3 -3
  58. package/docs/survey-utterance-check.md +1 -1
  59. package/docs/trust-anchor-adoption.md +197 -0
  60. package/docs/verifiable-trust.md +95 -0
  61. package/docs/veritas-integration.md +2 -2
  62. package/docs/workflow-usage-guide.md +69 -0
  63. package/evals/acceptance/DEMO-false-completion.md +144 -0
  64. package/evals/acceptance/demo-cast.sh +92 -0
  65. package/evals/acceptance/demo-false-completion.sh +72 -0
  66. package/evals/acceptance/demo-real-evidence.sh +104 -0
  67. package/evals/acceptance/demo.tape +29 -0
  68. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  69. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  70. package/evals/acceptance/prove-teeth.sh +105 -0
  71. package/evals/ci/antigaming-suite.sh +54 -0
  72. package/evals/ci/run-baseline.sh +2 -0
  73. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  75. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  77. package/evals/integration/test_builder_step_producers.sh +379 -0
  78. package/evals/integration/test_bundle_install.sh +35 -71
  79. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  80. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  81. package/evals/integration/test_checkpoint_signing.sh +489 -0
  82. package/evals/integration/test_claim_lookup.sh +352 -0
  83. package/evals/integration/test_command_log_integrity.sh +275 -0
  84. package/evals/integration/test_context_map.sh +0 -2
  85. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  86. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  87. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  88. package/evals/integration/test_flow_kit_repository.sh +2 -0
  89. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  90. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  91. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  92. package/evals/integration/test_gate_lockdown.sh +1137 -0
  93. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  94. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  95. package/evals/integration/test_goal_fit_hook.sh +69 -4
  96. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  97. package/evals/integration/test_install_merge.sh +1176 -0
  98. package/evals/integration/test_mint_attestation.sh +373 -0
  99. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  100. package/evals/integration/test_publish_delivery.sh +269 -0
  101. package/evals/integration/test_reconcile_soundness.sh +528 -0
  102. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  103. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  104. package/evals/integration/test_trust_checkpoint.sh +325 -0
  105. package/evals/integration/test_trust_reconcile.sh +293 -0
  106. package/evals/integration/test_verify_cli.sh +208 -0
  107. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  108. package/evals/lib/node.sh +0 -6
  109. package/evals/run.sh +45 -0
  110. package/evals/static/test_workflow_skills.sh +6 -13
  111. package/install.sh +0 -7
  112. package/integrations/strands-ts/README.md +25 -15
  113. package/integrations/veritas/flow-agents.adapter.json +1 -2
  114. package/kits/builder/flows/build.flow.json +59 -12
  115. package/kits/builder/kit.json +85 -15
  116. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  117. package/kits/builder/skills/deliver/SKILL.md +36 -6
  118. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  119. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  120. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  121. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  122. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  123. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  124. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  125. package/kits/knowledge/adapters/default-store/index.js +38 -0
  126. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  127. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  128. package/kits/knowledge/docs/store-contract.md +314 -0
  129. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  130. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  131. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  132. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  133. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  134. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  135. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  136. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  137. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  138. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  139. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  140. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  141. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  142. package/kits/knowledge/kit.json +51 -1
  143. package/package.json +4 -4
  144. package/packaging/conformance/README.md +10 -2
  145. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  146. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  147. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  148. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  151. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  152. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  153. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  154. package/packaging/conformance/run-conformance.js +1 -1
  155. package/scripts/README.md +2 -1
  156. package/scripts/build-universal-bundles.js +0 -1
  157. package/scripts/ci/mint-attestation.js +221 -0
  158. package/scripts/ci/trust-reconcile.js +545 -0
  159. package/scripts/hooks/config-protection.js +423 -1
  160. package/scripts/hooks/evidence-capture.js +348 -0
  161. package/scripts/hooks/lib/liveness-read.js +113 -0
  162. package/scripts/hooks/run-hook.js +6 -1
  163. package/scripts/hooks/stop-goal-fit.js +1471 -79
  164. package/scripts/hooks/workflow-steering.js +135 -5
  165. package/scripts/install-codex-home.sh +39 -0
  166. package/scripts/install-merge.js +330 -0
  167. package/src/cli/init.ts +218 -20
  168. package/src/cli/validate-workflow-artifacts.ts +18 -2
  169. package/src/cli/verify.ts +100 -0
  170. package/src/cli/workflow-sidecar.ts +2064 -77
  171. package/src/cli.ts +2 -3
  172. package/src/lib/flow-resolver.ts +284 -0
  173. package/src/tools/build-universal-bundles.ts +34 -21
  174. package/src/tools/generate-context-map.ts +3 -17
  175. package/src/tools/validate-source-tree.ts +44 -104
  176. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  177. package/build/src/tools/filter-installed-packs.js +0 -135
  178. package/packaging/packs.json +0 -49
  179. package/scripts/filter-installed-packs.js +0 -2
  180. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,396 @@
1
+ /**
2
+ * Knowledge Kit — Hygiene-Review Eval Suite (#106 hygiene #5, closes the issue)
3
+ *
4
+ * knowledge.hygiene-review is a THIN ORCHESTRATOR over the four hygiene flows
5
+ * (#106 #1–#4): audit-freshness, detect-contradictions, glossary-sync,
6
+ * canonicalize-category. It runs each opted-in audit through its EXISTING
7
+ * flow-runner method + EXISTING gates, reimplements NO detection logic, and
8
+ * folds the findings into one operator-facing review of proposed actions
9
+ * normalized to adopt / retire / merge. It forks NO new propose→approve gate:
10
+ * read-only by default, and the only mutation it can trigger (glossary
11
+ * apply:true) is delegated verbatim to glossarySync's own gated propose→apply
12
+ * lineage (consume-never-fork).
13
+ *
14
+ * Covers:
15
+ * - opt-in orchestration: an omitted audit block is skipped (surfaced as
16
+ * skipped); an empty review runs nothing and proposes nothing.
17
+ * - each opted-in audit runs via its existing method and its findings are
18
+ * collected verbatim — the orchestrator detects nothing of its own.
19
+ * - normalization to adopt / retire / merge, each proposal citing its
20
+ * sourceFlow + the evidence its origin flow's gate already vouched.
21
+ * - thin-orchestrator / consume-never-fork invariants:
22
+ * * read-only by default mutates no record;
23
+ * * the only mutation (glossary apply:true) is delegated to glossarySync —
24
+ * proven by the resulting concept's "proposes" link + propose/apply
25
+ * mutation_log entries (the same lineage glossary-sync's own suite checks).
26
+ * - sub-flow telemetry is folded in, plus hygiene-review's own
27
+ * orchestrate-gate + review-gate.
28
+ * - module-level hygieneReview export delegates to the runner.
29
+ *
30
+ * Run:
31
+ * node --test kits/knowledge/evals/hygiene-review/suite.test.js
32
+ */
33
+
34
+ import { test, describe, before, after } from "node:test";
35
+ import assert from "node:assert/strict";
36
+ import * as fs from "node:fs";
37
+ import * as path from "node:path";
38
+ import * as os from "node:os";
39
+ import { fileURLToPath } from "node:url";
40
+
41
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
42
+ const KIT_ROOT = path.resolve(__dirname, "../..");
43
+
44
+ const adapterPath = path.join(KIT_ROOT, "adapters/default-store/index.js");
45
+ const runnerPath = path.join(KIT_ROOT, "adapters/flow-runner/index.js");
46
+
47
+ const { DefaultKnowledgeStore } = await import(adapterPath);
48
+ const { KnowledgeFlowRunner, hygieneReview } = await import(runnerPath);
49
+
50
+ // ---------------------------------------------------------------------------
51
+ // Helpers
52
+ // ---------------------------------------------------------------------------
53
+
54
+ function makeTempDir() {
55
+ return fs.mkdtempSync(path.join(os.tmpdir(), "knowledge-hygiene-review-"));
56
+ }
57
+
58
+ function makeStore(dir) {
59
+ return new DefaultKnowledgeStore({ storeRoot: dir });
60
+ }
61
+
62
+ function makeRunner(store, dir) {
63
+ return new KnowledgeFlowRunner({
64
+ store,
65
+ workspace: dir,
66
+ agent: "hygiene-review-test-runner",
67
+ sessionId: "hygiene-review-session-001",
68
+ });
69
+ }
70
+
71
+ function readTelemetryEvents(dir) {
72
+ const sinkPath = path.join(dir, ".telemetry", "full.jsonl");
73
+ if (!fs.existsSync(sinkPath)) return [];
74
+ return fs.readFileSync(sinkPath, "utf8")
75
+ .trim()
76
+ .split("\n")
77
+ .filter(Boolean)
78
+ .map((line) => JSON.parse(line));
79
+ }
80
+
81
+ function recordBytes(dir, id) {
82
+ return fs.readFileSync(path.join(dir, "records", `${id}.md`), "utf8");
83
+ }
84
+
85
+ const DAY_MS = 86_400_000;
86
+ const NOW = "2026-06-25T00:00:00.000Z";
87
+
88
+ function daysAgo(days) {
89
+ return new Date(Date.parse(NOW) - days * DAY_MS).toISOString();
90
+ }
91
+
92
+ /** Create a record then force its timestamps to a chosen age (black-box on age). */
93
+ async function createAtAge(store, dir, { id, type, title, category, days }) {
94
+ await store.create({
95
+ id, type, title, body: `Body of ${title}`, category,
96
+ provenance: { agent: "fixture" },
97
+ });
98
+ const recPath = path.join(dir, "records", `${id}.md`);
99
+ const at = daysAgo(days);
100
+ const text = fs.readFileSync(recPath, "utf8")
101
+ .replace(/created_at: .*/, `created_at: ${at}`)
102
+ .replace(/updated_at: .*/, `updated_at: ${at}`);
103
+ fs.writeFileSync(recPath, text, "utf8");
104
+ return id;
105
+ }
106
+
107
+ // A canonical glossary doc → drives a glossary GAP (no concept for the term).
108
+ const GLOSSARY_BODY = [
109
+ "# Hygiene Glossary",
110
+ "",
111
+ "**Backpressure** — A mechanism to slow producers when consumers lag.",
112
+ ].join("\n");
113
+
114
+ // Contradiction detector that always fires on a pair → exercises the
115
+ // detect-contradictions sub-flow deterministically (the orchestrator passes it
116
+ // straight through to the existing method; no detection lives here).
117
+ const alwaysContradict = () => ({ reason: "fixture: forced contradiction" });
118
+ const allSimilar = async (record, candidates) => candidates.map((c) => c.id);
119
+
120
+ // ---------------------------------------------------------------------------
121
+ // Suite
122
+ // ---------------------------------------------------------------------------
123
+
124
+ describe("Knowledge Kit Hygiene-Review Suite (#106 #5)", () => {
125
+ let dir;
126
+ let store;
127
+ let runner;
128
+ let glossaryDocId;
129
+
130
+ before(async () => {
131
+ dir = makeTempDir();
132
+ store = makeStore(dir);
133
+ runner = makeRunner(store, dir);
134
+
135
+ // #1 audit-freshness fixtures: one stale (archive), one stale (refresh).
136
+ await createAtAge(store, dir, {
137
+ id: "stale-archive", type: "raw", title: "Stale → archive",
138
+ category: "radar.signals", days: 30,
139
+ });
140
+ await createAtAge(store, dir, {
141
+ id: "stale-refresh", type: "compiled", title: "Stale → refresh",
142
+ category: "ops.decisions", days: 400,
143
+ });
144
+
145
+ // #2 detect-contradictions fixtures: two compiled records in one category.
146
+ await store.create({
147
+ type: "compiled", id: "contra-a", title: "Service uses REST",
148
+ body: "We use REST.", category: "arch.api", provenance: { agent: "fixture" },
149
+ });
150
+ await store.create({
151
+ type: "compiled", id: "contra-b", title: "Service uses gRPC",
152
+ body: "We do not use REST.", category: "arch.api", provenance: { agent: "fixture" },
153
+ });
154
+
155
+ // #3 glossary-sync fixture: a canonical doc whose term has no concept (gap).
156
+ glossaryDocId = await store.create({
157
+ type: "compiled", title: "Hygiene Glossary (canonical)",
158
+ body: GLOSSARY_BODY, category: "eng.glossary", provenance: { agent: "fixture" },
159
+ });
160
+
161
+ // #4 canonicalize-category fixture: an active record tagged implemented.
162
+ await store.create({
163
+ type: "compiled", id: "impl-active", title: "Implemented but active",
164
+ body: "Shipped already.", category: "work.items", tags: ["implemented"],
165
+ provenance: { agent: "fixture" },
166
+ });
167
+ });
168
+
169
+ after(() => {
170
+ if (dir) fs.rmSync(dir, { recursive: true, force: true });
171
+ });
172
+
173
+ // A full opt-in config exercising all four sub-flows (read-only).
174
+ const fullConfig = () => ({
175
+ freshness: {
176
+ now: NOW,
177
+ thresholds: { "radar.signals": 7, "ops.decisions": 365 },
178
+ actions: { "radar.signals": "archive" },
179
+ defaultAction: "refresh",
180
+ },
181
+ contradictions: {
182
+ categories: ["arch.api"],
183
+ similarityDetector: allSimilar,
184
+ contradictionDetector: alwaysContradict,
185
+ },
186
+ glossary: { sources: [glossaryDocId] },
187
+ canonicalize: { implementedMarkers: ["implemented"] },
188
+ });
189
+
190
+ test("opt-in: omitted audit blocks are skipped; an empty review does nothing", async () => {
191
+ const empty = await runner.hygieneReview({});
192
+ assert.deepEqual(
193
+ empty.ranFlows, [],
194
+ "no flow runs when no audit block is provided"
195
+ );
196
+ assert.deepEqual(
197
+ empty.skippedFlows.sort(),
198
+ [
199
+ "knowledge.audit-freshness",
200
+ "knowledge.canonicalize-category",
201
+ "knowledge.detect-contradictions",
202
+ "knowledge.glossary-sync",
203
+ ],
204
+ "every audit is surfaced as skipped"
205
+ );
206
+ assert.equal(empty.proposals.length, 0, "an empty review proposes nothing");
207
+ assert.deepEqual(empty.summary, { total: 0, adopt: 0, retire: 0, merge: 0 });
208
+ });
209
+
210
+ test("runs only the opted-in audits", async () => {
211
+ const result = await runner.hygieneReview({
212
+ freshness: fullConfig().freshness,
213
+ });
214
+ assert.deepEqual(result.ranFlows, ["knowledge.audit-freshness"]);
215
+ assert.ok(result.skippedFlows.includes("knowledge.detect-contradictions"));
216
+ assert.ok(result.skippedFlows.includes("knowledge.glossary-sync"));
217
+ assert.ok(result.skippedFlows.includes("knowledge.canonicalize-category"));
218
+ });
219
+
220
+ test("orchestrates all four flows and collects every finding verbatim", async () => {
221
+ const result = await runner.hygieneReview(fullConfig());
222
+
223
+ assert.deepEqual(
224
+ result.ranFlows.sort(),
225
+ [
226
+ "knowledge.audit-freshness",
227
+ "knowledge.canonicalize-category",
228
+ "knowledge.detect-contradictions",
229
+ "knowledge.glossary-sync",
230
+ ],
231
+ "all four hygiene flows ran"
232
+ );
233
+ assert.deepEqual(result.skippedFlows, [], "nothing skipped in a full review");
234
+
235
+ // Each sub-audit's own result object is carried through untouched.
236
+ assert.ok(result.audits.freshness.flags.length >= 2, "freshness flags collected");
237
+ assert.ok(result.audits.contradictions.flags.length >= 1, "contradiction flags collected");
238
+ assert.ok(result.audits.glossary.gaps.length >= 1, "glossary gaps collected");
239
+ assert.ok(result.audits.canonicalize.findings.length >= 1, "canonicalize findings collected");
240
+
241
+ // Every proposal traces back to one of the four flows — none synthesized here.
242
+ const sources = new Set(result.proposals.map((p) => p.sourceFlow));
243
+ assert.ok(sources.has("knowledge.audit-freshness"));
244
+ assert.ok(sources.has("knowledge.detect-contradictions"));
245
+ assert.ok(sources.has("knowledge.glossary-sync"));
246
+ assert.ok(sources.has("knowledge.canonicalize-category"));
247
+ });
248
+
249
+ test("normalizes proposals to adopt / retire / merge, each citing source + evidence", async () => {
250
+ const result = await runner.hygieneReview(fullConfig());
251
+
252
+ for (const p of result.proposals) {
253
+ assert.ok(
254
+ ["adopt", "retire", "merge"].includes(p.decision),
255
+ `decision ${p.decision} is one of adopt/retire/merge`
256
+ );
257
+ assert.ok(p.sourceFlow, "proposal cites its source flow");
258
+ assert.ok(p.route, "proposal names the existing gated op it routes through");
259
+ assert.ok(Array.isArray(p.recordIds) && p.recordIds.length >= 1, "proposal cites record ids");
260
+ assert.ok(p.evidence && typeof p.evidence === "object", "proposal carries evidence");
261
+ }
262
+
263
+ // audit-freshness archive → retire; refresh → adopt.
264
+ const archive = result.proposals.find(
265
+ (p) => p.sourceFlow === "knowledge.audit-freshness" && p.recordIds.includes("stale-archive")
266
+ );
267
+ assert.equal(archive.decision, "retire", "an archive flag normalizes to retire");
268
+ assert.equal(archive.route, "knowledge.retire");
269
+ const refresh = result.proposals.find(
270
+ (p) => p.sourceFlow === "knowledge.audit-freshness" && p.recordIds.includes("stale-refresh")
271
+ );
272
+ assert.equal(refresh.decision, "adopt", "a refresh flag normalizes to adopt");
273
+
274
+ // contradiction → retire, citing BOTH ids.
275
+ const contra = result.proposals.find((p) => p.sourceFlow === "knowledge.detect-contradictions");
276
+ assert.equal(contra.decision, "retire");
277
+ assert.deepEqual(contra.recordIds.sort(), ["contra-a", "contra-b"], "cites both record ids");
278
+ assert.match(contra.evidence.reason, /contradiction/);
279
+
280
+ // glossary gap → adopt.
281
+ const gap = result.proposals.find((p) => p.sourceFlow === "knowledge.glossary-sync");
282
+ assert.equal(gap.decision, "adopt");
283
+ assert.equal(gap.evidence.term, "Backpressure");
284
+
285
+ // canonicalize maps proposedAction "retire" → retire, and flatten/regroup → merge.
286
+ const canonProps = result.proposals.filter((p) => p.sourceFlow === "knowledge.canonicalize-category");
287
+ const implActive = canonProps.find((p) => p.evidence.kind === "implemented-active");
288
+ assert.equal(implActive.decision, "retire", "implemented-active normalizes to retire");
289
+ assert.ok(implActive.recordIds.includes("impl-active"));
290
+ const flatten = canonProps.find((p) => p.proposedAction === "flatten" || p.proposedAction === "regroup");
291
+ if (flatten) {
292
+ assert.equal(flatten.decision, "merge", "flatten/regroup normalizes to merge");
293
+ }
294
+
295
+ // summary tallies match.
296
+ const tally = { total: 0, adopt: 0, retire: 0, merge: 0 };
297
+ for (const p of result.proposals) { tally.total += 1; tally[p.decision] += 1; }
298
+ assert.deepEqual(result.summary, tally, "summary tallies the proposals");
299
+ });
300
+
301
+ test("read-only by default: orchestration mutates no record (no forked gate)", async () => {
302
+ const ids = fs.readdirSync(path.join(dir, "records")).map((f) => f.replace(/\.md$/, ""));
303
+ const before = {};
304
+ for (const id of ids) before[id] = recordBytes(dir, id);
305
+
306
+ const result = await runner.hygieneReview(fullConfig());
307
+
308
+ const idsAfter = fs.readdirSync(path.join(dir, "records")).map((f) => f.replace(/\.md$/, ""));
309
+ assert.deepEqual(idsAfter.sort(), ids.sort(), "no record created by a read-only review");
310
+ for (const id of ids) {
311
+ assert.equal(recordBytes(dir, id), before[id], `record ${id} byte-identical after review`);
312
+ }
313
+ // No glossary apply happened (apply not requested).
314
+ assert.equal(result.audits.glossary.applied.length, 0, "nothing applied in read-only mode");
315
+ });
316
+
317
+ test("the only mutation (glossary apply) is delegated to glossarySync's gated propose→apply", async () => {
318
+ const adir = makeTempDir();
319
+ const astore = makeStore(adir);
320
+ const arunner = makeRunner(astore, adir);
321
+ try {
322
+ const docId = await astore.create({
323
+ type: "compiled", title: "Apply glossary", category: "app.glossary",
324
+ body: "**Idempotency** — An op applied many times with the same effect.",
325
+ provenance: { agent: "fixture" },
326
+ });
327
+ const result = await arunner.hygieneReview({
328
+ glossary: { sources: [docId], apply: true },
329
+ });
330
+
331
+ // The orchestrator did not write the store itself — it delegated.
332
+ assert.equal(result.audits.glossary.applied.length, 1, "the gap was applied via glossarySync");
333
+ const { conceptId, action } = result.audits.glossary.applied[0];
334
+ assert.equal(action, "create");
335
+
336
+ // Prove the EXISTING gated propose→apply lineage was used (consume-never-fork):
337
+ // the canonical doc proposes the concept, and the concept's mutation_log
338
+ // carries propose + apply — identical to glossary-sync's own suite.
339
+ const { forward } = await astore.getLinks(docId);
340
+ assert.ok(
341
+ forward.some((l) => l.target_id === conceptId && l.kind === "proposes"),
342
+ "canonical doc proposes the concept (existing gated lineage, not a forked gate)"
343
+ );
344
+ const concept = await astore.get(conceptId);
345
+ const ops = (concept.mutation_log || []).map((e) => e.op);
346
+ assert.ok(ops.includes("propose") && ops.includes("apply"), "gated propose→apply path used");
347
+ } finally {
348
+ fs.rmSync(adir, { recursive: true, force: true });
349
+ }
350
+ });
351
+
352
+ test("telemetry: sub-flow gates are folded in plus hygiene-review's own gates", async () => {
353
+ const tdir = makeTempDir();
354
+ const tstore = makeStore(tdir);
355
+ const trunner = makeRunner(tstore, tdir);
356
+ try {
357
+ await createAtAge(tstore, tdir, {
358
+ id: "t-stale", type: "raw", title: "Telemetry stale",
359
+ category: "radar.signals", days: 30,
360
+ });
361
+ const result = await trunner.hygieneReview({
362
+ freshness: { now: NOW, thresholds: { "radar.signals": 7 } },
363
+ });
364
+
365
+ // Returned events include both the sub-flow's gates and our own.
366
+ const blob = JSON.stringify(result.telemetryEvents);
367
+ assert.ok(blob.includes("knowledge.audit-freshness"), "sub-flow telemetry folded in");
368
+ assert.ok(blob.includes("knowledge.hygiene-review"), "orchestrator emits its own gate telemetry");
369
+ assert.ok(blob.includes("orchestrate-gate"), "orchestrate-gate emitted");
370
+ assert.ok(blob.includes("review-gate"), "review-gate emitted");
371
+
372
+ // And our own gates are persisted to the sink.
373
+ const persisted = readTelemetryEvents(tdir);
374
+ const reviewEvents = persisted.filter((e) =>
375
+ JSON.stringify(e).includes("knowledge.hygiene-review")
376
+ );
377
+ assert.ok(reviewEvents.length > 0, "hygiene-review telemetry persisted to the sink");
378
+ } finally {
379
+ fs.rmSync(tdir, { recursive: true, force: true });
380
+ }
381
+ });
382
+
383
+ test("module-level hygieneReview export delegates to the runner", async () => {
384
+ const result = await hygieneReview({
385
+ store,
386
+ workspace: dir,
387
+ agent: "hygiene-review-test-runner",
388
+ freshness: { now: NOW, thresholds: { "radar.signals": 7 }, actions: { "radar.signals": "archive" } },
389
+ });
390
+ assert.deepEqual(result.ranFlows, ["knowledge.audit-freshness"]);
391
+ assert.ok(
392
+ result.proposals.some((p) => p.recordIds.includes("stale-archive") && p.decision === "retire"),
393
+ "the module-level export produces the same normalized proposals"
394
+ );
395
+ });
396
+ });
@@ -1171,3 +1171,148 @@ describe("store.retire — direct op tests", () => {
1171
1171
  }
1172
1172
  });
1173
1173
  });
1174
+
1175
+ // ---------------------------------------------------------------------------
1176
+ // #106 — close-proposal: applying a retirement proposal auto-closes the spent
1177
+ // proposal artifact (no dangling active twin; double-prefixed twin never spawns)
1178
+ // ---------------------------------------------------------------------------
1179
+
1180
+ describe("#106 — close-proposal: apply auto-retires the spent proposal artifact", () => {
1181
+ // Helper: count non-retired records whose title marks them a retirement-proposal
1182
+ // artifact (these are the dangling "Retirement proposal: …" records #106 is about).
1183
+ function activeProposalArtifacts(store) {
1184
+ return store
1185
+ ._allRecords()
1186
+ .filter(
1187
+ (r) =>
1188
+ (r.status || "active") !== "retired" &&
1189
+ typeof r.title === "string" &&
1190
+ r.title.startsWith("Retirement proposal")
1191
+ );
1192
+ }
1193
+
1194
+ test("after apply, the proposal artifact is retired and leaves no active twin; the change persisted", async () => {
1195
+ const dir = makeTempDir();
1196
+ try {
1197
+ const { store, compiledId1 } = await buildFixture(dir);
1198
+ const runner = makeRunner(store, dir);
1199
+
1200
+ const result = await runner.retire(compiledId1, {
1201
+ targetStatus: "retired",
1202
+ rationale: "Superseded by the versioning policy.",
1203
+ decision: "apply",
1204
+ });
1205
+
1206
+ // The applied change persisted: the target record is retired.
1207
+ const target = await store.get(compiledId1);
1208
+ assert.equal(target.status, "retired", "target record was retired (change persisted)");
1209
+
1210
+ // The spent proposal artifact is auto-closed (retired), not left active.
1211
+ const artifact = await store.get(result.proposerId);
1212
+ assert.ok(artifact, "proposal artifact still exists (closed, not deleted)");
1213
+ assert.equal(
1214
+ artifact.status,
1215
+ "retired",
1216
+ "spent proposal artifact is auto-retired after apply (#106)"
1217
+ );
1218
+ assert.equal(result.proposalClosed, true, "retire() reports the artifact was closed");
1219
+
1220
+ // No dangling active "Retirement proposal: …" twin remains.
1221
+ assert.equal(
1222
+ activeProposalArtifacts(store).length,
1223
+ 0,
1224
+ "no active proposal-artifact twin remains after apply (#106)"
1225
+ );
1226
+ } finally {
1227
+ fs.rmSync(dir, { recursive: true, force: true });
1228
+ }
1229
+ });
1230
+
1231
+ test("a hygiene sweep over active records no longer spawns a double-prefixed twin", async () => {
1232
+ const dir = makeTempDir();
1233
+ try {
1234
+ const { store, compiledId1 } = await buildFixture(dir);
1235
+ const runner = makeRunner(store, dir);
1236
+
1237
+ // Retire a record (apply) — historically left an active proposal artifact.
1238
+ await runner.retire(compiledId1, {
1239
+ targetStatus: "retired",
1240
+ rationale: "Superseded.",
1241
+ decision: "apply",
1242
+ });
1243
+
1244
+ // Simulate a hygiene sweep: retire every still-active proposal artifact.
1245
+ // With the fix there are none, so the sweep is a no-op and no
1246
+ // "Retirement proposal: Retirement proposal: …" twin can be born.
1247
+ const before = activeProposalArtifacts(store);
1248
+ for (const artifact of before) {
1249
+ await runner.retire(artifact.id, {
1250
+ targetStatus: "retired",
1251
+ rationale: "Cleaning up dangling proposal artifact.",
1252
+ decision: "apply",
1253
+ });
1254
+ }
1255
+
1256
+ const doublePrefixed = store
1257
+ ._allRecords()
1258
+ .filter(
1259
+ (r) =>
1260
+ typeof r.title === "string" &&
1261
+ r.title.startsWith("Retirement proposal: Retirement proposal")
1262
+ );
1263
+ assert.equal(
1264
+ doublePrefixed.length,
1265
+ 0,
1266
+ "no double-prefixed 'Retirement proposal: Retirement proposal: …' twin exists (#106)"
1267
+ );
1268
+ assert.equal(
1269
+ activeProposalArtifacts(store).length,
1270
+ 0,
1271
+ "no active proposal artifacts remain after the sweep (#106)"
1272
+ );
1273
+ } finally {
1274
+ fs.rmSync(dir, { recursive: true, force: true });
1275
+ }
1276
+ });
1277
+
1278
+ test("reject is unchanged — the proposal artifact stays active (proposal not spent)", async () => {
1279
+ const dir = makeTempDir();
1280
+ try {
1281
+ const { store, compiledId1 } = await buildFixture(dir);
1282
+ const runner = makeRunner(store, dir);
1283
+
1284
+ const before = await store.get(compiledId1);
1285
+
1286
+ const result = await runner.retire(compiledId1, {
1287
+ targetStatus: "retired",
1288
+ rationale: "Proposing retirement.",
1289
+ decision: "reject",
1290
+ rejectReason: "Not ready to retire this yet.",
1291
+ });
1292
+
1293
+ // Target status is byte-identical (rejection does not mutate it).
1294
+ const after = await store.get(compiledId1);
1295
+ assert.equal(
1296
+ after.status || "active",
1297
+ before.status || "active",
1298
+ "reject leaves target status unchanged"
1299
+ );
1300
+
1301
+ // The proposal artifact remains active — the proposal was declined, not
1302
+ // spent, so it is NOT auto-closed (close happens only on apply).
1303
+ const artifact = await store.get(result.proposerId);
1304
+ assert.equal(
1305
+ artifact.status || "active",
1306
+ "active",
1307
+ "reject leaves the proposal artifact active (not closed)"
1308
+ );
1309
+ assert.equal(
1310
+ result.proposalClosed,
1311
+ false,
1312
+ "retire(reject) does not report a closed artifact"
1313
+ );
1314
+ } finally {
1315
+ fs.rmSync(dir, { recursive: true, force: true });
1316
+ }
1317
+ });
1318
+ });
@@ -0,0 +1,44 @@
1
+ {
2
+ "id": "knowledge.audit-freshness",
3
+ "version": "1.0",
4
+ "steps": [
5
+ { "id": "collect", "next": "measure" },
6
+ { "id": "measure", "next": "flag-gate" },
7
+ { "id": "flag-gate", "next": "done" },
8
+ { "id": "done", "next": null }
9
+ ],
10
+ "gates": {
11
+ "collect-gate": {
12
+ "step": "collect",
13
+ "expects": [
14
+ {
15
+ "id": "working-set-collected",
16
+ "kind": "trust.bundle",
17
+ "required": true,
18
+ "description": "The working set to audit has been collected: the configured record types are listed (retired records are already excluded — retired is terminal, so there is nothing to flag there). The per-category staleness thresholds and any default threshold are surfaced for the measure step. This is a read-only collection — no record is mutated.",
19
+ "bundle_claim": {
20
+ "claimType": "knowledge.audit-freshness.collect",
21
+ "subjectType": "artifact",
22
+ "accepted_statuses": ["trusted", "accepted"]
23
+ }
24
+ }
25
+ ]
26
+ },
27
+ "flag-gate": {
28
+ "step": "flag-gate",
29
+ "expects": [
30
+ {
31
+ "id": "freshness-flags-cite-evidence",
32
+ "kind": "trust.bundle",
33
+ "required": true,
34
+ "description": "Every freshness flag cites its evidence: the record's last-mutation timestamp (the most recent of updated_at and the latest mutation_log entry), the staleness threshold that fired (matched category key + threshold in days), and the computed age in days. A flag is emitted only when the record's age strictly exceeds its resolved threshold; categories with no configured threshold (and no default) are skipped (auditing is opt-in). Each flag proposes an action (archive or refresh) for the operator to route through the existing gated flows (knowledge.retire to archive; a fresh capture/compile to refresh). This step NEVER mutates a record — it only proposes.",
35
+ "bundle_claim": {
36
+ "claimType": "knowledge.audit-freshness.flags",
37
+ "subjectType": "artifact",
38
+ "accepted_statuses": ["trusted", "accepted"]
39
+ }
40
+ }
41
+ ]
42
+ }
43
+ }
44
+ }
@@ -0,0 +1,44 @@
1
+ {
2
+ "id": "knowledge.canonicalize-category",
3
+ "version": "1.0",
4
+ "steps": [
5
+ { "id": "survey", "next": "assess" },
6
+ { "id": "assess", "next": "propose-gate" },
7
+ { "id": "propose-gate", "next": "done" },
8
+ { "id": "done", "next": null }
9
+ ],
10
+ "gates": {
11
+ "survey-gate": {
12
+ "step": "survey",
13
+ "expects": [
14
+ {
15
+ "id": "category-tree-surveyed",
16
+ "kind": "trust.bundle",
17
+ "required": true,
18
+ "description": "The category hierarchy of the working set has been surveyed: the configured record types are listed and their categories expanded into a prefix tree (retired records are already excluded — retired is terminal, so it is not sprawl to flatten). The enabled checks (orphan-prefix, leaf fan-out budget, implemented-marker tags) are surfaced for the assess step. This is a read-only survey — no record is mutated.",
19
+ "bundle_claim": {
20
+ "claimType": "knowledge.canonicalize-category.survey",
21
+ "subjectType": "artifact",
22
+ "accepted_statuses": ["trusted", "accepted"]
23
+ }
24
+ }
25
+ ]
26
+ },
27
+ "propose-gate": {
28
+ "step": "propose-gate",
29
+ "expects": [
30
+ {
31
+ "id": "sprawl-findings-cite-evidence",
32
+ "kind": "trust.bundle",
33
+ "required": true,
34
+ "description": "Every category-sprawl finding cites its evidence: its kind (orphan-prefix | too-many-leaves | implemented-active), the offending category, the affected record ids, the metric that fired, and an evidence object explaining it (e.g. directRecordCount/subtreeRecordCount for an orphan prefix; leafCount vs the configured budget + the leaf list for too-many-leaves; the matched implemented markers + status for implemented-active). A finding is emitted only when its check is enabled and its condition holds; a disabled check (or an empty implemented-marker list) contributes nothing (the audit is optional + configurable). Each finding proposes an action (flatten | regroup | retire) for the operator to route through the existing gated flows (knowledge.retire to retire; an update/recategorize to flatten or regroup). This step NEVER mutates a record — it only proposes.",
35
+ "bundle_claim": {
36
+ "claimType": "knowledge.canonicalize-category.findings",
37
+ "subjectType": "artifact",
38
+ "accepted_statuses": ["trusted", "accepted"]
39
+ }
40
+ }
41
+ ]
42
+ }
43
+ }
44
+ }
@@ -0,0 +1,44 @@
1
+ {
2
+ "id": "knowledge.detect-contradictions",
3
+ "version": "1.0",
4
+ "steps": [
5
+ { "id": "collect", "next": "compare" },
6
+ { "id": "compare", "next": "flag-gate" },
7
+ { "id": "flag-gate", "next": "done" },
8
+ { "id": "done", "next": null }
9
+ ],
10
+ "gates": {
11
+ "collect-gate": {
12
+ "step": "collect",
13
+ "expects": [
14
+ {
15
+ "id": "comparison-set-collected",
16
+ "kind": "trust.bundle",
17
+ "required": true,
18
+ "description": "The comparison set has been collected per category in scope: compiled records are listed by category (retired records are already excluded — retired is terminal, so there is nothing to compare there). Only the categories with a configured threshold (or a default) are audited; a category with no configured policy is skipped (contradiction detection is opt-in). The comparison is scoped to similar records within a category via the similarity adapter — contradictions only matter between records about the same thing. This is a read-only collection — no record is mutated.",
19
+ "bundle_claim": {
20
+ "claimType": "knowledge.detect-contradictions.collect",
21
+ "subjectType": "artifact",
22
+ "accepted_statuses": ["trusted", "accepted"]
23
+ }
24
+ }
25
+ ]
26
+ },
27
+ "flag-gate": {
28
+ "step": "flag-gate",
29
+ "expects": [
30
+ {
31
+ "id": "contradiction-flags-cite-both-ids",
32
+ "kind": "trust.bundle",
33
+ "required": true,
34
+ "description": "Every contradiction flag cites its evidence: BOTH conflicting record ids (recordIdA + recordIdB), their shared category, and the reason returned by the contradiction fn that fired. A pair is flagged only when (1) the similarity adapter deems the two records similar (same subject) AND (2) the pluggable contradiction fn reports a conflict between their assertions. The contradiction fn is configurable per call; the default heuristic detects opposing polarity assertions over a shared subject. This step NEVER mutates a record — it only proposes. The operator routes each flag through an existing gated flow (knowledge.retire to drop the stale assertion, or a fresh capture/compile/consolidate to reconcile).",
35
+ "bundle_claim": {
36
+ "claimType": "knowledge.detect-contradictions.flags",
37
+ "subjectType": "artifact",
38
+ "accepted_statuses": ["trusted", "accepted"]
39
+ }
40
+ }
41
+ ]
42
+ }
43
+ }
44
+ }