@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +95 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/init.js +242 -20
  14. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  15. package/build/src/cli/verify.d.ts +1 -0
  16. package/build/src/cli/verify.js +90 -0
  17. package/build/src/cli/workflow-sidecar.d.ts +300 -8
  18. package/build/src/cli/workflow-sidecar.js +1934 -83
  19. package/build/src/cli.js +2 -3
  20. package/build/src/lib/flow-resolver.d.ts +82 -0
  21. package/build/src/lib/flow-resolver.js +237 -0
  22. package/build/src/tools/build-universal-bundles.js +34 -22
  23. package/build/src/tools/generate-context-map.js +3 -16
  24. package/build/src/tools/validate-source-tree.d.ts +1 -1
  25. package/build/src/tools/validate-source-tree.js +42 -162
  26. package/context/contracts/artifact-contract.md +10 -0
  27. package/context/contracts/delivery-contract.md +1 -0
  28. package/context/contracts/review-contract.md +1 -0
  29. package/context/contracts/verification-contract.md +2 -0
  30. package/context/gate-awareness.md +39 -0
  31. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  32. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  33. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  34. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  35. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  36. package/docs/adr/0007-skill-audit.md +1 -1
  37. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  38. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  39. package/docs/adr/0011-mcp-posture.md +100 -0
  40. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  41. package/docs/adr/0013-context-lifecycle.md +151 -0
  42. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  43. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  44. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  45. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  46. package/docs/agent-system-guidebook.md +5 -12
  47. package/docs/context-map.md +4 -10
  48. package/docs/index.md +3 -2
  49. package/docs/integrations/framework-adapter.md +19 -6
  50. package/docs/integrations/index.md +2 -2
  51. package/docs/north-star.md +4 -4
  52. package/docs/operating-layers.md +3 -3
  53. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  54. package/docs/repository-structure.md +2 -2
  55. package/docs/skills-map.md +1 -0
  56. package/docs/spec/runtime-hook-surface.md +62 -9
  57. package/docs/standards-register.md +3 -3
  58. package/docs/survey-utterance-check.md +1 -1
  59. package/docs/trust-anchor-adoption.md +197 -0
  60. package/docs/verifiable-trust.md +95 -0
  61. package/docs/veritas-integration.md +2 -2
  62. package/docs/workflow-usage-guide.md +69 -0
  63. package/evals/acceptance/DEMO-false-completion.md +144 -0
  64. package/evals/acceptance/demo-cast.sh +92 -0
  65. package/evals/acceptance/demo-false-completion.sh +72 -0
  66. package/evals/acceptance/demo-real-evidence.sh +104 -0
  67. package/evals/acceptance/demo.tape +29 -0
  68. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  69. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  70. package/evals/acceptance/prove-teeth.sh +105 -0
  71. package/evals/ci/antigaming-suite.sh +54 -0
  72. package/evals/ci/run-baseline.sh +2 -0
  73. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  75. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  77. package/evals/integration/test_builder_step_producers.sh +379 -0
  78. package/evals/integration/test_bundle_install.sh +35 -71
  79. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  80. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  81. package/evals/integration/test_checkpoint_signing.sh +489 -0
  82. package/evals/integration/test_claim_lookup.sh +352 -0
  83. package/evals/integration/test_command_log_integrity.sh +275 -0
  84. package/evals/integration/test_context_map.sh +0 -2
  85. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  86. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  87. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  88. package/evals/integration/test_flow_kit_repository.sh +2 -0
  89. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  90. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  91. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  92. package/evals/integration/test_gate_lockdown.sh +1137 -0
  93. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  94. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  95. package/evals/integration/test_goal_fit_hook.sh +69 -4
  96. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  97. package/evals/integration/test_install_merge.sh +1176 -0
  98. package/evals/integration/test_mint_attestation.sh +373 -0
  99. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  100. package/evals/integration/test_publish_delivery.sh +269 -0
  101. package/evals/integration/test_reconcile_soundness.sh +528 -0
  102. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  103. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  104. package/evals/integration/test_trust_checkpoint.sh +325 -0
  105. package/evals/integration/test_trust_reconcile.sh +293 -0
  106. package/evals/integration/test_verify_cli.sh +208 -0
  107. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  108. package/evals/lib/node.sh +0 -6
  109. package/evals/run.sh +45 -0
  110. package/evals/static/test_workflow_skills.sh +6 -13
  111. package/install.sh +0 -7
  112. package/integrations/strands-ts/README.md +25 -15
  113. package/integrations/veritas/flow-agents.adapter.json +1 -2
  114. package/kits/builder/flows/build.flow.json +59 -12
  115. package/kits/builder/kit.json +85 -15
  116. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  117. package/kits/builder/skills/deliver/SKILL.md +36 -6
  118. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  119. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  120. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  121. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  122. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  123. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  124. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  125. package/kits/knowledge/adapters/default-store/index.js +38 -0
  126. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  127. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  128. package/kits/knowledge/docs/store-contract.md +314 -0
  129. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  130. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  131. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  132. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  133. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  134. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  135. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  136. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  137. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  138. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  139. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  140. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  141. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  142. package/kits/knowledge/kit.json +51 -1
  143. package/package.json +4 -4
  144. package/packaging/conformance/README.md +10 -2
  145. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  146. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  147. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  148. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  151. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  152. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  153. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  154. package/packaging/conformance/run-conformance.js +1 -1
  155. package/scripts/README.md +2 -1
  156. package/scripts/build-universal-bundles.js +0 -1
  157. package/scripts/ci/mint-attestation.js +221 -0
  158. package/scripts/ci/trust-reconcile.js +545 -0
  159. package/scripts/hooks/config-protection.js +423 -1
  160. package/scripts/hooks/evidence-capture.js +348 -0
  161. package/scripts/hooks/lib/liveness-read.js +113 -0
  162. package/scripts/hooks/run-hook.js +6 -1
  163. package/scripts/hooks/stop-goal-fit.js +1471 -79
  164. package/scripts/hooks/workflow-steering.js +135 -5
  165. package/scripts/install-codex-home.sh +39 -0
  166. package/scripts/install-merge.js +330 -0
  167. package/src/cli/init.ts +218 -20
  168. package/src/cli/validate-workflow-artifacts.ts +18 -2
  169. package/src/cli/verify.ts +100 -0
  170. package/src/cli/workflow-sidecar.ts +2064 -77
  171. package/src/cli.ts +2 -3
  172. package/src/lib/flow-resolver.ts +284 -0
  173. package/src/tools/build-universal-bundles.ts +34 -21
  174. package/src/tools/generate-context-map.ts +3 -17
  175. package/src/tools/validate-source-tree.ts +44 -104
  176. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  177. package/build/src/tools/filter-installed-packs.js +0 -135
  178. package/packaging/packs.json +0 -49
  179. package/scripts/filter-installed-packs.js +0 -2
  180. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,368 @@
1
+ /**
2
+ * Knowledge Kit — Audit-Freshness Eval Suite (#106 hygiene #1)
3
+ *
4
+ * knowledge.audit-freshness flags records past per-category staleness
5
+ * thresholds and proposes archive/refresh — each flag citing its evidence
6
+ * (last-mutation + the threshold that fired). The audit is READ-ONLY,
7
+ * OPTIONAL, and CONFIGURABLE (per-category thresholds).
8
+ *
9
+ * Covers:
10
+ * - per-category threshold resolution: dot-hierarchy longest-prefix wins
11
+ * over a shorter prefix; an explicit default catches unmatched categories;
12
+ * a category with no threshold (and no default) is skipped (opt-in).
13
+ * - boundary: age == threshold is NOT flagged; age > threshold IS flagged.
14
+ * - every flag cites lastMutationAt + thresholdDays + matchedThresholdKey
15
+ * + ageDays (the evidence guarantee).
16
+ * - last-mutation is the max of updated_at and the latest mutation_log entry.
17
+ * - retired records are never flagged (terminal, excluded from working set).
18
+ * - read-only invariant: no record is mutated by the audit.
19
+ * - proposed-action resolution (default + per-category override).
20
+ * - gate telemetry (collect-gate + flag-gate) is emitted.
21
+ *
22
+ * Run:
23
+ * node --test kits/knowledge/evals/audit-freshness/suite.test.js
24
+ */
25
+
26
+ import { test, describe, before, after } from "node:test";
27
+ import assert from "node:assert/strict";
28
+ import * as fs from "node:fs";
29
+ import * as path from "node:path";
30
+ import * as os from "node:os";
31
+ import { fileURLToPath } from "node:url";
32
+
33
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
34
+ const KIT_ROOT = path.resolve(__dirname, "../..");
35
+
36
+ const adapterPath = path.join(KIT_ROOT, "adapters/default-store/index.js");
37
+ const runnerPath = path.join(KIT_ROOT, "adapters/flow-runner/index.js");
38
+
39
+ const { DefaultKnowledgeStore } = await import(adapterPath);
40
+ const { KnowledgeFlowRunner, auditFreshness } = await import(runnerPath);
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Helpers
44
+ // ---------------------------------------------------------------------------
45
+
46
+ function makeTempDir() {
47
+ return fs.mkdtempSync(path.join(os.tmpdir(), "knowledge-audit-freshness-"));
48
+ }
49
+
50
+ function makeStore(dir) {
51
+ return new DefaultKnowledgeStore({ storeRoot: dir });
52
+ }
53
+
54
+ function makeRunner(store, dir) {
55
+ return new KnowledgeFlowRunner({
56
+ store,
57
+ workspace: dir,
58
+ agent: "audit-freshness-test-runner",
59
+ sessionId: "audit-freshness-session-001",
60
+ });
61
+ }
62
+
63
+ function readTelemetryEvents(dir) {
64
+ const sinkPath = path.join(dir, ".telemetry", "full.jsonl");
65
+ if (!fs.existsSync(sinkPath)) return [];
66
+ return fs.readFileSync(sinkPath, "utf8")
67
+ .trim()
68
+ .split("\n")
69
+ .filter(Boolean)
70
+ .map((line) => JSON.parse(line));
71
+ }
72
+
73
+ const DAY_MS = 86_400_000;
74
+
75
+ // A fixed reference "now" so age math is deterministic regardless of wall clock.
76
+ const NOW = "2026-06-25T00:00:00.000Z";
77
+
78
+ function daysAgo(days) {
79
+ return new Date(Date.parse(NOW) - days * DAY_MS).toISOString();
80
+ }
81
+
82
+ /**
83
+ * Create a record, then force its updated_at (and optionally created_at) to an
84
+ * arbitrary instant by rewriting the markdown directly — the store stamps the
85
+ * current time on create, which we don't want to depend on. This keeps the test
86
+ * black-box on the audit while controlling its sole input (age).
87
+ */
88
+ function createAtAge(store, dir, { id, type, title, category, days, mutationLog }) {
89
+ const recPath = path.join(dir, "records", `${id}.md`);
90
+ // Build the record file directly via the store, then patch timestamps.
91
+ return store
92
+ .create({ id, type, title, body: `Body of ${title}`, category, provenance: { agent: "fixture" } })
93
+ .then(() => {
94
+ const text = fs.readFileSync(recPath, "utf8");
95
+ const at = daysAgo(days);
96
+ let patched = text
97
+ .replace(/created_at: .*/, `created_at: ${at}`)
98
+ .replace(/updated_at: .*/, `updated_at: ${at}`);
99
+ if (mutationLog) {
100
+ // Inject a mutation_log with a single entry at the given instant.
101
+ patched = patched.replace(
102
+ /mutation_log: \[\]/,
103
+ `mutation_log:\n - op: update\n at: ${mutationLog}\n agent: fixture`
104
+ );
105
+ }
106
+ fs.writeFileSync(recPath, patched, "utf8");
107
+ return id;
108
+ });
109
+ }
110
+
111
+ // ---------------------------------------------------------------------------
112
+ // Suite
113
+ // ---------------------------------------------------------------------------
114
+
115
+ describe("Knowledge Kit Audit-Freshness Suite (#106)", () => {
116
+ let dir;
117
+ let store;
118
+ let runner;
119
+
120
+ before(async () => {
121
+ dir = makeTempDir();
122
+ store = makeStore(dir);
123
+ runner = makeRunner(store, dir);
124
+
125
+ // radar.signals: a 14-day record (stale at 7) + a 3-day record (fresh).
126
+ await createAtAge(store, dir, {
127
+ id: "radar-stale", type: "raw", title: "Radar: weak signal",
128
+ category: "radar.signals", days: 14,
129
+ });
130
+ await createAtAge(store, dir, {
131
+ id: "radar-fresh", type: "raw", title: "Radar: hot signal",
132
+ category: "radar.signals", days: 3,
133
+ });
134
+ // ops.decisions: 200-day record — fresh against a 365-day threshold.
135
+ await createAtAge(store, dir, {
136
+ id: "decision-young", type: "compiled", title: "Decision: use REST",
137
+ category: "ops.decisions", days: 200,
138
+ });
139
+ // ops.decisions: 400-day record — stale against the 365-day threshold.
140
+ await createAtAge(store, dir, {
141
+ id: "decision-old", type: "compiled", title: "Decision: legacy SOAP",
142
+ category: "ops.decisions", days: 400,
143
+ });
144
+ // misc: 9999-day record but no threshold configured → opt-out, never flagged.
145
+ await createAtAge(store, dir, {
146
+ id: "misc-ancient", type: "raw", title: "Misc: ancient scratch",
147
+ category: "misc.scratch", days: 9999,
148
+ });
149
+ });
150
+
151
+ after(() => {
152
+ if (dir) fs.rmSync(dir, { recursive: true, force: true });
153
+ });
154
+
155
+ const config = () => ({
156
+ now: NOW,
157
+ thresholds: {
158
+ "radar.signals": 7,
159
+ "ops.decisions": 365,
160
+ },
161
+ // no default → misc.* is opt-out
162
+ });
163
+
164
+ test("flags only records strictly past their per-category threshold", async () => {
165
+ const result = await runner.auditFreshness(config());
166
+ const flaggedIds = result.flags.map((f) => f.recordId).sort();
167
+ assert.deepEqual(
168
+ flaggedIds,
169
+ ["decision-old", "radar-stale"],
170
+ "exactly the two records past threshold are flagged"
171
+ );
172
+ });
173
+
174
+ test("opt-out: category with no threshold (and no default) is skipped, never flagged", async () => {
175
+ const result = await runner.auditFreshness(config());
176
+ assert.ok(
177
+ !result.flags.some((f) => f.recordId === "misc-ancient"),
178
+ "misc.scratch has no threshold → not flagged despite being 9999 days old"
179
+ );
180
+ assert.equal(result.skipped, 1, "the one misc.* record is counted as skipped");
181
+ assert.equal(result.audited, 4, "the four threshold-resolvable records are audited");
182
+ });
183
+
184
+ test("every flag cites its evidence: last-mutation + threshold + matched key + age", async () => {
185
+ const result = await runner.auditFreshness(config());
186
+ for (const flag of result.flags) {
187
+ assert.ok(flag.lastMutationAt, `flag for ${flag.recordId} cites lastMutationAt`);
188
+ assert.ok(!Number.isNaN(Date.parse(flag.lastMutationAt)), "lastMutationAt is a valid timestamp");
189
+ assert.equal(typeof flag.thresholdDays, "number", "flag cites thresholdDays");
190
+ assert.equal(typeof flag.ageDays, "number", "flag cites ageDays");
191
+ assert.ok(flag.ageDays > flag.thresholdDays, "ageDays strictly exceeds thresholdDays");
192
+ assert.ok(flag.matchedThresholdKey, "flag cites the matched threshold key");
193
+ }
194
+ const radar = result.flags.find((f) => f.recordId === "radar-stale");
195
+ assert.equal(radar.thresholdDays, 7);
196
+ assert.equal(radar.matchedThresholdKey, "radar.signals");
197
+ assert.equal(radar.ageDays, 14);
198
+ });
199
+
200
+ test("longest-prefix wins: radar.signals threshold beats a shorter radar threshold", async () => {
201
+ // Add a coarser radar threshold of 30 days; radar.signals (7) must still win.
202
+ const result = await runner.auditFreshness({
203
+ now: NOW,
204
+ thresholds: { radar: 30, "radar.signals": 7, "ops.decisions": 365 },
205
+ });
206
+ const radar = result.flags.find((f) => f.recordId === "radar-stale");
207
+ assert.ok(radar, "radar-stale (14d) is flagged under the 7-day radar.signals threshold");
208
+ assert.equal(radar.matchedThresholdKey, "radar.signals");
209
+ assert.equal(radar.thresholdDays, 7);
210
+ });
211
+
212
+ test("default threshold catches otherwise-unmatched categories", async () => {
213
+ const result = await runner.auditFreshness({
214
+ now: NOW,
215
+ thresholds: { "radar.signals": 7 },
216
+ defaultThresholdDays: 365,
217
+ });
218
+ const misc = result.flags.find((f) => f.recordId === "misc-ancient");
219
+ assert.ok(misc, "misc.scratch (9999d) is now flagged under the default threshold");
220
+ assert.equal(misc.matchedThresholdKey, "*", "default match is surfaced as '*'");
221
+ assert.equal(misc.thresholdDays, 365);
222
+ });
223
+
224
+ test("boundary: age == threshold is not flagged; age > threshold is flagged", async () => {
225
+ const bdir = makeTempDir();
226
+ const bstore = makeStore(bdir);
227
+ const brunner = makeRunner(bstore, bdir);
228
+ try {
229
+ await createAtAge(bstore, bdir, {
230
+ id: "exactly-7", type: "raw", title: "Exactly at threshold",
231
+ category: "radar.signals", days: 7,
232
+ });
233
+ await createAtAge(bstore, bdir, {
234
+ id: "just-over-7", type: "raw", title: "One day past threshold",
235
+ category: "radar.signals", days: 8,
236
+ });
237
+ const result = await brunner.auditFreshness({
238
+ now: NOW,
239
+ thresholds: { "radar.signals": 7 },
240
+ });
241
+ const ids = result.flags.map((f) => f.recordId);
242
+ assert.ok(!ids.includes("exactly-7"), "age == threshold is NOT flagged");
243
+ assert.ok(ids.includes("just-over-7"), "age > threshold IS flagged");
244
+ } finally {
245
+ fs.rmSync(bdir, { recursive: true, force: true });
246
+ }
247
+ });
248
+
249
+ test("last-mutation is the max of updated_at and the latest mutation_log entry", async () => {
250
+ const mdir = makeTempDir();
251
+ const mstore = makeStore(mdir);
252
+ const mrunner = makeRunner(mstore, mdir);
253
+ try {
254
+ // updated_at is 100 days old, but a mutation_log entry is only 2 days old —
255
+ // the record is FRESH (last mutation = 2 days ago, under the 7-day threshold).
256
+ await createAtAge(mstore, mdir, {
257
+ id: "log-newer", type: "raw", title: "Stale stamp, fresh log",
258
+ category: "radar.signals", days: 100, mutationLog: daysAgo(2),
259
+ });
260
+ const result = await mrunner.auditFreshness({
261
+ now: NOW,
262
+ thresholds: { "radar.signals": 7 },
263
+ });
264
+ assert.ok(
265
+ !result.flags.some((f) => f.recordId === "log-newer"),
266
+ "the newer mutation_log entry makes the record fresh — not flagged"
267
+ );
268
+ } finally {
269
+ fs.rmSync(mdir, { recursive: true, force: true });
270
+ }
271
+ });
272
+
273
+ test("retired records are never flagged (terminal, excluded from working set)", async () => {
274
+ const rdir = makeTempDir();
275
+ const rstore = makeStore(rdir);
276
+ const rrunner = makeRunner(rstore, rdir);
277
+ try {
278
+ await createAtAge(rstore, rdir, {
279
+ id: "old-but-retired", type: "compiled", title: "Ancient, already retired",
280
+ category: "ops.decisions", days: 5000,
281
+ });
282
+ // Retire it via the store op (active → retired).
283
+ await rstore.retire("old-but-retired", "retired", {
284
+ agent: "fixture",
285
+ rationale: "Superseded long ago.",
286
+ });
287
+ const result = await rrunner.auditFreshness({
288
+ now: NOW,
289
+ thresholds: { "ops.decisions": 365 },
290
+ });
291
+ assert.ok(
292
+ !result.flags.some((f) => f.recordId === "old-but-retired"),
293
+ "a retired record is never flagged regardless of age"
294
+ );
295
+ } finally {
296
+ fs.rmSync(rdir, { recursive: true, force: true });
297
+ }
298
+ });
299
+
300
+ test("read-only invariant: the audit mutates no record", async () => {
301
+ const before = {};
302
+ for (const id of ["radar-stale", "decision-old", "misc-ancient"]) {
303
+ before[id] = fs.readFileSync(path.join(dir, "records", `${id}.md`), "utf8");
304
+ }
305
+ await runner.auditFreshness(config());
306
+ for (const id of Object.keys(before)) {
307
+ const after = fs.readFileSync(path.join(dir, "records", `${id}.md`), "utf8");
308
+ assert.equal(after, before[id], `record ${id} is byte-identical after the audit`);
309
+ }
310
+ });
311
+
312
+ test("proposed action: default refresh, per-category override honoured", async () => {
313
+ const result = await runner.auditFreshness({
314
+ now: NOW,
315
+ thresholds: { "radar.signals": 7, "ops.decisions": 365 },
316
+ actions: { "radar.signals": "archive" },
317
+ defaultAction: "refresh",
318
+ });
319
+ const radar = result.flags.find((f) => f.recordId === "radar-stale");
320
+ const decision = result.flags.find((f) => f.recordId === "decision-old");
321
+ assert.equal(radar.proposedAction, "archive", "per-category action override applies");
322
+ assert.equal(decision.proposedAction, "refresh", "unmatched category falls back to defaultAction");
323
+ });
324
+
325
+ test("gate telemetry: collect-gate and flag-gate events are emitted", async () => {
326
+ const tdir = makeTempDir();
327
+ const tstore = makeStore(tdir);
328
+ const trunner = makeRunner(tstore, tdir);
329
+ try {
330
+ await createAtAge(tstore, tdir, {
331
+ id: "t-stale", type: "raw", title: "Telemetry stale",
332
+ category: "radar.signals", days: 30,
333
+ });
334
+ const result = await trunner.auditFreshness({
335
+ now: NOW,
336
+ thresholds: { "radar.signals": 7 },
337
+ });
338
+ // Returned events
339
+ const returnedGates = result.telemetryEvents
340
+ .map((e) => e.gate || e?.context?.gate)
341
+ .filter(Boolean);
342
+ assert.ok(
343
+ result.telemetryEvents.length >= 4,
344
+ "collect-gate + flag-gate produce at least 4 in/out events"
345
+ );
346
+ // Persisted events reference the audit flow.
347
+ const persisted = readTelemetryEvents(tdir);
348
+ const auditEvents = persisted.filter((e) =>
349
+ JSON.stringify(e).includes("knowledge.audit-freshness")
350
+ );
351
+ assert.ok(auditEvents.length > 0, "audit flow telemetry is persisted to the sink");
352
+ } finally {
353
+ fs.rmSync(tdir, { recursive: true, force: true });
354
+ }
355
+ });
356
+
357
+ test("module-level auditFreshness export delegates to the runner", async () => {
358
+ const result = await auditFreshness({
359
+ store,
360
+ workspace: dir,
361
+ agent: "audit-freshness-test-runner",
362
+ now: NOW,
363
+ thresholds: { "radar.signals": 7, "ops.decisions": 365 },
364
+ });
365
+ const flaggedIds = result.flags.map((f) => f.recordId).sort();
366
+ assert.deepEqual(flaggedIds, ["decision-old", "radar-stale"]);
367
+ });
368
+ });