cap-pro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. package/.claude-plugin/README.md +26 -0
  2. package/.claude-plugin/marketplace.json +24 -0
  3. package/.claude-plugin/plugin.json +24 -0
  4. package/LICENSE +21 -0
  5. package/README.ja-JP.md +834 -0
  6. package/README.ko-KR.md +823 -0
  7. package/README.md +806 -0
  8. package/README.pt-BR.md +452 -0
  9. package/README.zh-CN.md +800 -0
  10. package/agents/cap-architect.md +269 -0
  11. package/agents/cap-brainstormer.md +207 -0
  12. package/agents/cap-curator.md +276 -0
  13. package/agents/cap-debugger.md +365 -0
  14. package/agents/cap-designer.md +246 -0
  15. package/agents/cap-historian.md +464 -0
  16. package/agents/cap-migrator.md +291 -0
  17. package/agents/cap-prototyper.md +197 -0
  18. package/agents/cap-validator.md +308 -0
  19. package/bin/install.js +5433 -0
  20. package/cap/bin/cap-tools.cjs +853 -0
  21. package/cap/bin/lib/arc-scanner.cjs +344 -0
  22. package/cap/bin/lib/cap-affinity-engine.cjs +862 -0
  23. package/cap/bin/lib/cap-anchor.cjs +228 -0
  24. package/cap/bin/lib/cap-annotation-writer.cjs +340 -0
  25. package/cap/bin/lib/cap-checkpoint.cjs +434 -0
  26. package/cap/bin/lib/cap-cluster-detect.cjs +945 -0
  27. package/cap/bin/lib/cap-cluster-display.cjs +52 -0
  28. package/cap/bin/lib/cap-cluster-format.cjs +245 -0
  29. package/cap/bin/lib/cap-cluster-helpers.cjs +295 -0
  30. package/cap/bin/lib/cap-cluster-io.cjs +212 -0
  31. package/cap/bin/lib/cap-completeness.cjs +540 -0
  32. package/cap/bin/lib/cap-deps.cjs +583 -0
  33. package/cap/bin/lib/cap-design-families.cjs +332 -0
  34. package/cap/bin/lib/cap-design.cjs +966 -0
  35. package/cap/bin/lib/cap-divergence-detector.cjs +400 -0
  36. package/cap/bin/lib/cap-doctor.cjs +752 -0
  37. package/cap/bin/lib/cap-feature-map-internals.cjs +19 -0
  38. package/cap/bin/lib/cap-feature-map-migrate.cjs +335 -0
  39. package/cap/bin/lib/cap-feature-map-monorepo.cjs +885 -0
  40. package/cap/bin/lib/cap-feature-map-shard.cjs +315 -0
  41. package/cap/bin/lib/cap-feature-map.cjs +1943 -0
  42. package/cap/bin/lib/cap-fitness-score.cjs +1075 -0
  43. package/cap/bin/lib/cap-impact-analysis.cjs +652 -0
  44. package/cap/bin/lib/cap-learn-review.cjs +1072 -0
  45. package/cap/bin/lib/cap-learning-signals.cjs +627 -0
  46. package/cap/bin/lib/cap-loader.cjs +227 -0
  47. package/cap/bin/lib/cap-logger.cjs +57 -0
  48. package/cap/bin/lib/cap-memory-bridge.cjs +764 -0
  49. package/cap/bin/lib/cap-memory-confidence.cjs +452 -0
  50. package/cap/bin/lib/cap-memory-dir.cjs +987 -0
  51. package/cap/bin/lib/cap-memory-engine.cjs +698 -0
  52. package/cap/bin/lib/cap-memory-extends.cjs +398 -0
  53. package/cap/bin/lib/cap-memory-graph.cjs +790 -0
  54. package/cap/bin/lib/cap-memory-migrate.cjs +2015 -0
  55. package/cap/bin/lib/cap-memory-pin.cjs +183 -0
  56. package/cap/bin/lib/cap-memory-platform.cjs +490 -0
  57. package/cap/bin/lib/cap-memory-prune.cjs +707 -0
  58. package/cap/bin/lib/cap-memory-schema.cjs +812 -0
  59. package/cap/bin/lib/cap-migrate-tags.cjs +309 -0
  60. package/cap/bin/lib/cap-migrate.cjs +540 -0
  61. package/cap/bin/lib/cap-pattern-apply.cjs +1203 -0
  62. package/cap/bin/lib/cap-pattern-pipeline.cjs +1034 -0
  63. package/cap/bin/lib/cap-plugin-manifest.cjs +80 -0
  64. package/cap/bin/lib/cap-realtime-affinity.cjs +399 -0
  65. package/cap/bin/lib/cap-reconcile.cjs +570 -0
  66. package/cap/bin/lib/cap-research-gate.cjs +218 -0
  67. package/cap/bin/lib/cap-scope-filter.cjs +402 -0
  68. package/cap/bin/lib/cap-semantic-pipeline.cjs +1038 -0
  69. package/cap/bin/lib/cap-session-extract.cjs +987 -0
  70. package/cap/bin/lib/cap-session.cjs +445 -0
  71. package/cap/bin/lib/cap-snapshot-linkage.cjs +963 -0
  72. package/cap/bin/lib/cap-stack-docs.cjs +646 -0
  73. package/cap/bin/lib/cap-tag-observer.cjs +371 -0
  74. package/cap/bin/lib/cap-tag-scanner.cjs +1766 -0
  75. package/cap/bin/lib/cap-telemetry.cjs +466 -0
  76. package/cap/bin/lib/cap-test-audit.cjs +1438 -0
  77. package/cap/bin/lib/cap-thread-migrator.cjs +307 -0
  78. package/cap/bin/lib/cap-thread-synthesis.cjs +545 -0
  79. package/cap/bin/lib/cap-thread-tracker.cjs +519 -0
  80. package/cap/bin/lib/cap-trace.cjs +399 -0
  81. package/cap/bin/lib/cap-trust-mode.cjs +336 -0
  82. package/cap/bin/lib/cap-ui-design-editor.cjs +642 -0
  83. package/cap/bin/lib/cap-ui-mind-map.cjs +712 -0
  84. package/cap/bin/lib/cap-ui-thread-nav.cjs +693 -0
  85. package/cap/bin/lib/cap-ui.cjs +1245 -0
  86. package/cap/bin/lib/cap-upgrade.cjs +1028 -0
  87. package/cap/bin/lib/cli/arg-helpers.cjs +49 -0
  88. package/cap/bin/lib/cli/frontmatter-router.cjs +31 -0
  89. package/cap/bin/lib/cli/init-router.cjs +68 -0
  90. package/cap/bin/lib/cli/phase-router.cjs +102 -0
  91. package/cap/bin/lib/cli/state-router.cjs +61 -0
  92. package/cap/bin/lib/cli/template-router.cjs +37 -0
  93. package/cap/bin/lib/cli/uat-router.cjs +29 -0
  94. package/cap/bin/lib/cli/validation-router.cjs +26 -0
  95. package/cap/bin/lib/cli/verification-router.cjs +31 -0
  96. package/cap/bin/lib/cli/workstream-router.cjs +39 -0
  97. package/cap/bin/lib/commands.cjs +961 -0
  98. package/cap/bin/lib/config.cjs +467 -0
  99. package/cap/bin/lib/convention-reader.cjs +258 -0
  100. package/cap/bin/lib/core.cjs +1241 -0
  101. package/cap/bin/lib/feature-aggregator.cjs +423 -0
  102. package/cap/bin/lib/frontmatter.cjs +337 -0
  103. package/cap/bin/lib/init.cjs +1443 -0
  104. package/cap/bin/lib/manifest-generator.cjs +383 -0
  105. package/cap/bin/lib/milestone.cjs +253 -0
  106. package/cap/bin/lib/model-profiles.cjs +69 -0
  107. package/cap/bin/lib/monorepo-context.cjs +226 -0
  108. package/cap/bin/lib/monorepo-migrator.cjs +509 -0
  109. package/cap/bin/lib/phase.cjs +889 -0
  110. package/cap/bin/lib/profile-output.cjs +989 -0
  111. package/cap/bin/lib/profile-pipeline.cjs +540 -0
  112. package/cap/bin/lib/roadmap.cjs +330 -0
  113. package/cap/bin/lib/security.cjs +394 -0
  114. package/cap/bin/lib/session-manager.cjs +292 -0
  115. package/cap/bin/lib/skeleton-generator.cjs +179 -0
  116. package/cap/bin/lib/state.cjs +1032 -0
  117. package/cap/bin/lib/template.cjs +231 -0
  118. package/cap/bin/lib/test-detector.cjs +62 -0
  119. package/cap/bin/lib/uat.cjs +283 -0
  120. package/cap/bin/lib/verify.cjs +889 -0
  121. package/cap/bin/lib/workspace-detector.cjs +371 -0
  122. package/cap/bin/lib/workstream.cjs +492 -0
  123. package/cap/commands/gsd/workstreams.md +63 -0
  124. package/cap/references/arc-standard.md +315 -0
  125. package/cap/references/cap-agent-architecture.md +101 -0
  126. package/cap/references/cap-gitignore-template +9 -0
  127. package/cap/references/cap-zero-deps.md +158 -0
  128. package/cap/references/checkpoints.md +778 -0
  129. package/cap/references/continuation-format.md +249 -0
  130. package/cap/references/contract-test-templates.md +312 -0
  131. package/cap/references/feature-map-template.md +25 -0
  132. package/cap/references/git-integration.md +295 -0
  133. package/cap/references/git-planning-commit.md +38 -0
  134. package/cap/references/model-profiles.md +174 -0
  135. package/cap/references/phase-numbering.md +126 -0
  136. package/cap/references/planning-config.md +202 -0
  137. package/cap/references/property-test-templates.md +316 -0
  138. package/cap/references/security-test-templates.md +347 -0
  139. package/cap/references/session-template.json +8 -0
  140. package/cap/references/tdd.md +263 -0
  141. package/cap/references/user-profiling.md +681 -0
  142. package/cap/references/verification-patterns.md +612 -0
  143. package/cap/templates/UAT.md +265 -0
  144. package/cap/templates/claude-md.md +175 -0
  145. package/cap/templates/codebase/architecture.md +255 -0
  146. package/cap/templates/codebase/concerns.md +310 -0
  147. package/cap/templates/codebase/conventions.md +307 -0
  148. package/cap/templates/codebase/integrations.md +280 -0
  149. package/cap/templates/codebase/stack.md +186 -0
  150. package/cap/templates/codebase/structure.md +285 -0
  151. package/cap/templates/codebase/testing.md +480 -0
  152. package/cap/templates/config.json +44 -0
  153. package/cap/templates/context.md +352 -0
  154. package/cap/templates/continue-here.md +78 -0
  155. package/cap/templates/copilot-instructions.md +7 -0
  156. package/cap/templates/debug-subagent-prompt.md +91 -0
  157. package/cap/templates/discussion-log.md +63 -0
  158. package/cap/templates/milestone-archive.md +123 -0
  159. package/cap/templates/milestone.md +115 -0
  160. package/cap/templates/phase-prompt.md +610 -0
  161. package/cap/templates/planner-subagent-prompt.md +117 -0
  162. package/cap/templates/project.md +186 -0
  163. package/cap/templates/requirements.md +231 -0
  164. package/cap/templates/research-project/ARCHITECTURE.md +204 -0
  165. package/cap/templates/research-project/FEATURES.md +147 -0
  166. package/cap/templates/research-project/PITFALLS.md +200 -0
  167. package/cap/templates/research-project/STACK.md +120 -0
  168. package/cap/templates/research-project/SUMMARY.md +170 -0
  169. package/cap/templates/research.md +552 -0
  170. package/cap/templates/roadmap.md +202 -0
  171. package/cap/templates/state.md +176 -0
  172. package/cap/templates/summary.md +364 -0
  173. package/cap/templates/user-preferences.md +498 -0
  174. package/cap/templates/verification-report.md +322 -0
  175. package/cap/workflows/add-phase.md +112 -0
  176. package/cap/workflows/add-tests.md +351 -0
  177. package/cap/workflows/add-todo.md +158 -0
  178. package/cap/workflows/audit-milestone.md +340 -0
  179. package/cap/workflows/audit-uat.md +109 -0
  180. package/cap/workflows/autonomous.md +891 -0
  181. package/cap/workflows/check-todos.md +177 -0
  182. package/cap/workflows/cleanup.md +152 -0
  183. package/cap/workflows/complete-milestone.md +767 -0
  184. package/cap/workflows/diagnose-issues.md +231 -0
  185. package/cap/workflows/discovery-phase.md +289 -0
  186. package/cap/workflows/discuss-phase-assumptions.md +653 -0
  187. package/cap/workflows/discuss-phase.md +1049 -0
  188. package/cap/workflows/do.md +104 -0
  189. package/cap/workflows/execute-phase.md +846 -0
  190. package/cap/workflows/execute-plan.md +514 -0
  191. package/cap/workflows/fast.md +105 -0
  192. package/cap/workflows/forensics.md +265 -0
  193. package/cap/workflows/health.md +181 -0
  194. package/cap/workflows/help.md +660 -0
  195. package/cap/workflows/insert-phase.md +130 -0
  196. package/cap/workflows/list-phase-assumptions.md +178 -0
  197. package/cap/workflows/list-workspaces.md +56 -0
  198. package/cap/workflows/manager.md +362 -0
  199. package/cap/workflows/map-codebase.md +377 -0
  200. package/cap/workflows/milestone-summary.md +223 -0
  201. package/cap/workflows/new-milestone.md +486 -0
  202. package/cap/workflows/new-project.md +1250 -0
  203. package/cap/workflows/new-workspace.md +237 -0
  204. package/cap/workflows/next.md +97 -0
  205. package/cap/workflows/node-repair.md +92 -0
  206. package/cap/workflows/note.md +156 -0
  207. package/cap/workflows/pause-work.md +176 -0
  208. package/cap/workflows/plan-milestone-gaps.md +273 -0
  209. package/cap/workflows/plan-phase.md +857 -0
  210. package/cap/workflows/plant-seed.md +169 -0
  211. package/cap/workflows/pr-branch.md +129 -0
  212. package/cap/workflows/profile-user.md +449 -0
  213. package/cap/workflows/progress.md +507 -0
  214. package/cap/workflows/quick.md +757 -0
  215. package/cap/workflows/remove-phase.md +155 -0
  216. package/cap/workflows/remove-workspace.md +90 -0
  217. package/cap/workflows/research-phase.md +82 -0
  218. package/cap/workflows/resume-project.md +326 -0
  219. package/cap/workflows/review.md +228 -0
  220. package/cap/workflows/session-report.md +146 -0
  221. package/cap/workflows/settings.md +283 -0
  222. package/cap/workflows/ship.md +228 -0
  223. package/cap/workflows/stats.md +60 -0
  224. package/cap/workflows/transition.md +671 -0
  225. package/cap/workflows/ui-phase.md +298 -0
  226. package/cap/workflows/ui-review.md +161 -0
  227. package/cap/workflows/update.md +323 -0
  228. package/cap/workflows/validate-phase.md +170 -0
  229. package/cap/workflows/verify-phase.md +254 -0
  230. package/cap/workflows/verify-work.md +637 -0
  231. package/commands/cap/annotate.md +165 -0
  232. package/commands/cap/brainstorm.md +393 -0
  233. package/commands/cap/checkpoint.md +106 -0
  234. package/commands/cap/completeness.md +94 -0
  235. package/commands/cap/continue.md +72 -0
  236. package/commands/cap/debug.md +588 -0
  237. package/commands/cap/deps.md +169 -0
  238. package/commands/cap/design.md +479 -0
  239. package/commands/cap/init.md +354 -0
  240. package/commands/cap/iterate.md +249 -0
  241. package/commands/cap/learn.md +459 -0
  242. package/commands/cap/memory.md +275 -0
  243. package/commands/cap/migrate-feature-map.md +91 -0
  244. package/commands/cap/migrate-memory.md +108 -0
  245. package/commands/cap/migrate-tags.md +91 -0
  246. package/commands/cap/migrate.md +131 -0
  247. package/commands/cap/prototype.md +510 -0
  248. package/commands/cap/reconcile.md +121 -0
  249. package/commands/cap/review.md +360 -0
  250. package/commands/cap/save.md +72 -0
  251. package/commands/cap/scan.md +404 -0
  252. package/commands/cap/start.md +356 -0
  253. package/commands/cap/status.md +118 -0
  254. package/commands/cap/test-audit.md +262 -0
  255. package/commands/cap/test.md +394 -0
  256. package/commands/cap/trace.md +133 -0
  257. package/commands/cap/ui.md +167 -0
  258. package/hooks/dist/cap-check-update.js +115 -0
  259. package/hooks/dist/cap-context-monitor.js +185 -0
  260. package/hooks/dist/cap-learn-review-hook.js +114 -0
  261. package/hooks/dist/cap-learning-hook.js +192 -0
  262. package/hooks/dist/cap-memory.js +299 -0
  263. package/hooks/dist/cap-prompt-guard.js +97 -0
  264. package/hooks/dist/cap-statusline.js +157 -0
  265. package/hooks/dist/cap-tag-observer.js +115 -0
  266. package/hooks/dist/cap-version-check.js +112 -0
  267. package/hooks/dist/cap-workflow-guard.js +175 -0
  268. package/hooks/hooks.json +55 -0
  269. package/package.json +58 -0
  270. package/scripts/base64-scan.sh +262 -0
  271. package/scripts/build-hooks.js +93 -0
  272. package/scripts/cap-removal-checklist.md +202 -0
  273. package/scripts/prompt-injection-scan.sh +199 -0
  274. package/scripts/run-tests.cjs +181 -0
  275. package/scripts/secret-scan.sh +227 -0
@@ -0,0 +1,1034 @@
1
+ // @cap-context CAP F-071 Extract Patterns via Heuristics and LLM — pure-compute pipeline that turns
2
+ // @cap-history(sessions:2, edits:17, since:2026-05-05, learned:2026-05-06) Frequently modified — 2 sessions, 17 edits
3
+ // raw F-070 learning signals into actionable P-NNN patterns. Stage 1 is deterministic
4
+ // (TF-IDF / RegEx clustering / frequency); Stage 2 is the LLM stage triggered when a
5
+ // candidate hits the threshold (≥ 3 similar overrides OR ≥ 1 regret). All LLM-bound
6
+ // payload is counts + hashes only — no raw signal records, no user text, no paths.
7
+ // @cap-decision(F-071/D1) LLM call mechanism — Host-LLM via Skill-Briefing pattern. The pipeline writes
8
+ // an aggregate briefing to .cap/learning/queue/P-NNN.md; the /cap:learn skill instructs
9
+ // the outer agent (Claude running the session) to read the briefing and write the result
10
+ // to .cap/learning/patterns/P-NNN.json. There is NO HTTPS client, NO API key, NO SDK
11
+ // dependency. This mirrors how /cap:prototype hands a task to cap-prototyper.
12
+ // @cap-decision(F-071/D2) Trigger — manual via /cap:learn skill. NOT auto on /cap:scan, NOT on Stop-Hook.
13
+ // Auto-triggering would burn through the user's LLM budget without consent.
14
+ // @cap-decision(F-071/D3) LLM input shape — Counts + Hashes only. No FEATURE-MAP context, no
15
+ // tag-description text, no raw signal records. The strict path. The briefing schema is:
16
+ // { candidateId, signalType, count, byFeature: [{featureId, count}], topContextHashes:
17
+ // [{hash, count}] }. Anything beyond this MUST go through hashContext first or be denied.
18
+ // @cap-decision(F-071/D4) TF-IDF tokens are tuples, not free text — `${signalType}|${featureId}|${
19
+ // targetFileHash || decisionId}`. The privacy boundary already hashed the path, so the
20
+ // token-string is hash-clean by construction. Documents are sessions (groupBy sessionId).
21
+ // TF · IDF ranks within-session; absolute count provides the AC-2 threshold path
22
+ // (count >= 3 override / >= 1 regret) regardless of TF-IDF rank.
23
+ // @cap-decision(F-071/D5) P-NNN allocation is compute-on-read from filenames. AC-6 demands "sequential,
24
+ // never renumbered" — gaps are fine; allocator returns max(existing IDs) + 1, scanning
25
+ // .cap/learning/patterns/P-*.json AND .cap/learning/queue/P-*.md (queue burns IDs too,
26
+ // because a deferred candidate retains its assigned ID across sessions). No .next-id
27
+ // file: that drifts when developers manually delete a pattern file or move things around.
28
+ // @cap-decision(F-071/D7) "Similar overrides" means the same (signalType, featureId, contextKey) tuple
29
+ // — i.e. SAME feature AND SAME target file (or decisionId for regret). 3 overrides spread
30
+ // across 3 different featureIds do NOT trigger Stage 2; 3 edits across 3 different files
31
+ // of the same feature do NOT trigger Stage 2. STRICT match.
32
+ // Why: early-phase self-learning needs cluster cohesion — Stage 2's LLM can only distill
33
+ // a meaningful L2/L3 pattern from semantically similar records. Loose (featureId-only)
34
+ // matching would produce heterogeneous clusters that the LLM cannot synthesise honestly,
35
+ // and would burn the 3-call budget on low-signal candidates. F-074 unlearn would then
36
+ // auto-retract them, wasting the budget round-trip. F-072 fitness scoring + F-074 will
37
+ // surface coverage gaps over time; if strict turns out to be too narrow, loose-mode is
38
+ // an additive future change (a parallel candidate class), not a refactor.
39
+ // Confirmed by user before ship — see PIN-2 in the F-071 test-audit report.
40
+ // @cap-constraint Zero external dependencies: node:fs, node:path only. We re-use cap-telemetry.cjs for
41
+ // hashContext (privacy primitive) and readBudget / getLlmUsage (budget primitive), and
42
+ // cap-learning-signals.cjs#getSignals as the SOLE input source. We never read JSONL
43
+ // files directly; the F-070 query API is the contract.
44
+ // @cap-risk(F-071/AC-3) PRIVACY BOUNDARY — every place that constructs an LLM-bound briefing payload
45
+ // carries this tag. The briefing must contain ONLY hex hashes and integer counts. Any
46
+ // future contributor adding a `description`, `summary`, `path`, or `signalRaw` field
47
+ // violates AC-3. Tests perform byte-level needle-search on the briefing markdown.
48
+ // @cap-risk(F-071/AC-4) BUDGET BOUNDARY — promotion to Stage 2 must be gated by readBudget +
49
+ // getLlmUsage. A regression that bypasses the gate would silently burn through the
50
+ // user's wallet. The gate is in promoteCandidates(); tests pre-load recordLlmCall
51
+ // entries and assert overflow lands in the queue with deferred:budget.
52
+
53
+ 'use strict';
54
+
55
+ // @cap-feature(feature:F-071, primary:true) Pattern Pipeline — heuristic Stage 1 + LLM-briefing Stage 2.
56
+
57
+ const fs = require('node:fs');
58
+ const path = require('node:path');
59
+
60
+ const telemetry = require('./cap-telemetry.cjs');
61
+ const learningSignals = require('./cap-learning-signals.cjs');
62
+
63
+ // -----------------------------------------------------------------------------
64
+ // Constants — kept top-of-file so tests and downstream consumers (F-072/F-073)
65
+ // reference exactly one place.
66
+ // -----------------------------------------------------------------------------
67
+
68
+ const CAP_DIR = '.cap';
69
+ const LEARNING_DIR = 'learning';
70
+ const CANDIDATES_DIR = 'candidates';
71
+ const PATTERNS_DIR = 'patterns';
72
+ const QUEUE_DIR = 'queue';
73
+
74
+ // AC-2: thresholds. Centralised so a future tuning lives in one place and the
75
+ // adversarial tests can verify exact behaviour.
76
+ const THRESHOLD_OVERRIDE_COUNT = 3;
77
+ const THRESHOLD_REGRET_COUNT = 1;
78
+
79
+ // AC-1: TF-IDF top-K within each session. K=5 covers the high-signal head;
80
+ // anything below is noise or single-occurrence.
81
+ const TFIDF_TOP_K_PER_SESSION = 5;
82
+
83
+ // Length cap for any string field that might land in a briefing or pattern record.
84
+ // Mirrors cap-telemetry.cjs#ID_MAX so a hostile caller cannot smuggle a prompt
85
+ // through e.g. a manipulated featureId or contextHash field.
86
+ const ID_MAX = 200;
87
+
88
+ // P-NNN ID format.
89
+ const PATTERN_ID_PREFIX = 'P-';
90
+ const PATTERN_ID_PAD = 3;
91
+
92
+ /**
93
+ * @typedef {Object} HeuristicCandidate
94
+ * @property {string} candidateId - Stable hash of the (signalType + featureId + contextKey) tuple. Used as the briefing dedup key.
95
+ * @property {'override'|'memory-ref'|'regret'} signalType
96
+ * @property {string|null} featureId - Most-frequent featureId across the records that produced this candidate.
97
+ * @property {number} count - Total record count contributing to this candidate.
98
+ * @property {number} score - Maximum TF-IDF score for this candidate's token across all sessions.
99
+ * Separate from `count`: F-072 (fitness) and F-073 (review) can sort by either depending on what
100
+ * they need. Magnitude (TF-IDF) reveals "rare-but-concentrated" patterns; count reveals "loud"
101
+ * patterns. The orchestrator default-sorts by count for stable strong-cluster-first ordering.
102
+ * @property {Array<{featureId: string|null, count: number}>} byFeature - Per-feature breakdown, sorted descending by count.
103
+ * @property {Array<{hash: string, count: number}>} topContextHashes - Top-N context hashes that produced this candidate, sorted descending by count.
104
+ * @property {{kind:'L1', target:string, from:number, to:number, rationale:string}} suggestion - Heuristic-only L1 proposal — Stage 2 may upgrade this to L2/L3.
105
+ */
106
+
107
+ /**
108
+ * @typedef {Object} PatternRecord
109
+ * @property {string} id - 'P-NNN'.
110
+ * @property {string} createdAt - ISO timestamp.
111
+ * @property {'L1'|'L2'|'L3'} level
112
+ * @property {string|null} featureRef - Feature ID this pattern targets (e.g. 'F-070').
113
+ * @property {'heuristic'|'llm'} source - Whether this was promoted via Stage 2 (llm) or persisted heuristic-only (heuristic).
114
+ * @property {boolean} degraded - True when LLM stage was unavailable and the heuristic-only suggestion is final.
115
+ * @property {number} confidence - 0..1.
116
+ * @property {Object} suggestion - Shape depends on `level` (L1: parameter tweak, L2: rule, L3: prompt-template patch).
117
+ * @property {{candidateId:string, signalType:string, count:number, topContextHashes:Array<{hash:string,count:number}>}} evidence
118
+ */
119
+
120
+ // -----------------------------------------------------------------------------
121
+ // Internal helpers — directory + IO
122
+ // -----------------------------------------------------------------------------
123
+
124
+ function ensureDir(dir) {
125
+ try {
126
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
127
+ } catch (_e) {
128
+ // Public boundary callers swallow errors; the next write will surface persistent IO problems.
129
+ }
130
+ }
131
+
132
+ function learningRoot(projectRoot) {
133
+ return path.join(projectRoot, CAP_DIR, LEARNING_DIR);
134
+ }
135
+
136
+ function candidatesDir(projectRoot) {
137
+ return path.join(learningRoot(projectRoot), CANDIDATES_DIR);
138
+ }
139
+
140
+ function patternsDir(projectRoot) {
141
+ return path.join(learningRoot(projectRoot), PATTERNS_DIR);
142
+ }
143
+
144
+ function queueDir(projectRoot) {
145
+ return path.join(learningRoot(projectRoot), QUEUE_DIR);
146
+ }
147
+
148
+ // -----------------------------------------------------------------------------
149
+ // Read-side wiring for F-074 applied-state — closes the V5 self-learning loop.
150
+ //
151
+ // @cap-decision(F-071/D9) Read .cap/learning/applied-state.json directly with a tiny inline helper
152
+ // instead of `require('./cap-pattern-apply.cjs')`. cap-pattern-apply already requires
153
+ // cap-pattern-pipeline, so importing it here would create a circular dependency.
154
+ // Schema is owned by F-074 and documented at cap-pattern-apply#readAppliedState
155
+ // (F-074/D2): { version:1, l1:{ '<featureId>/<KEY>': value }, l2:[], l3:[] }.
156
+ // -----------------------------------------------------------------------------
157
+
158
+ const APPLIED_STATE_RELATIVE = path.join(CAP_DIR, LEARNING_DIR, 'applied-state.json');
159
+
160
+ /**
161
+ * Look up the L1 override value for a given featureId+key. Returns `null` when the file is missing,
162
+ * malformed, the key is absent, or the value fails the validator. Pure read, never throws.
163
+ *
164
+ * @cap-risk(F-071/D9) The applied-state file is hand-editable. A user (or a buggy pattern) could
165
+ * stuff a string, NaN, or negative number into the L1 map. The validator below
166
+ * is the trust boundary — anything that fails it falls back to the constant
167
+ * default. The strict integer check exists so a malformed file cannot weaken
168
+ * promotion gates (e.g. `to: -1` would otherwise allow every cluster through).
169
+ *
170
+ * @param {string} projectRoot
171
+ * @param {string} featureId - 'F-070' style; null/non-string returns null.
172
+ * @param {string} key - Sub-key, e.g. 'threshold'. Combined as `${featureId}/${key}` per F-074/D2.
173
+ * @param {(v: unknown) => boolean} validator - True when the value is acceptable. Mandatory.
174
+ * @returns {*} The validated value, or null.
175
+ */
176
+ function readAppliedL1(projectRoot, featureId, key, validator) {
177
+ if (typeof projectRoot !== 'string' || projectRoot.length === 0) return null;
178
+ if (typeof featureId !== 'string' || featureId.length === 0) return null;
179
+ if (typeof key !== 'string' || key.length === 0) return null;
180
+ if (typeof validator !== 'function') return null;
181
+ const fp = path.join(projectRoot, APPLIED_STATE_RELATIVE);
182
+ let raw;
183
+ try {
184
+ raw = fs.readFileSync(fp, 'utf8');
185
+ } catch (_e) {
186
+ return null;
187
+ }
188
+ let parsed;
189
+ try {
190
+ parsed = JSON.parse(raw);
191
+ } catch (_e) {
192
+ return null;
193
+ }
194
+ const l1 = parsed && parsed.l1;
195
+ if (!l1 || typeof l1 !== 'object' || Array.isArray(l1)) return null;
196
+ const value = l1[`${featureId}/${key}`];
197
+ if (value === undefined) return null;
198
+ return validator(value) ? value : null;
199
+ }
200
+
201
+ /**
202
+ * Strict positive-integer validator for threshold values. Rejects strings, floats, NaN, Infinity,
203
+ * negatives, and zero. Threshold of 0 would mean "every cluster promotes immediately", which is
204
+ * semantically broken — refuse it at the boundary.
205
+ * @param {unknown} v
206
+ * @returns {boolean}
207
+ */
208
+ function isPositiveIntegerThreshold(v) {
209
+ return typeof v === 'number' && Number.isInteger(v) && v > 0 && Number.isFinite(v);
210
+ }
211
+
212
+ /**
213
+ * Compute the effective threshold for a (signalType, featureId) pair, honouring any L1 override
214
+ * applied via F-074. Falls back to the module constant when no override is applicable.
215
+ *
216
+ * Lookup precedence:
217
+ * 1. applied-state.json#l1[`${featureId}/threshold`] — when projectRoot + featureId provided
218
+ * 2. THRESHOLD_REGRET_COUNT (regret) / THRESHOLD_OVERRIDE_COUNT (override, anything else)
219
+ *
220
+ * memory-ref candidates never promote; callers gate them out before reaching here.
221
+ *
222
+ * @param {string|null|undefined} projectRoot
223
+ * @param {string} signalType
224
+ * @param {string|null|undefined} featureId
225
+ * @returns {number}
226
+ */
227
+ function getEffectiveThreshold(projectRoot, signalType, featureId) {
228
+ const fallback = signalType === 'regret' ? THRESHOLD_REGRET_COUNT : THRESHOLD_OVERRIDE_COUNT;
229
+ const override = readAppliedL1(projectRoot, featureId, 'threshold', isPositiveIntegerThreshold);
230
+ return override === null ? fallback : override;
231
+ }
232
+
233
+ /**
234
+ * Cap a string at ID_MAX, return null for non-strings or empty.
235
+ */
236
+ function capId(v) {
237
+ if (typeof v !== 'string' || v.length === 0) return null;
238
+ return v.slice(0, ID_MAX);
239
+ }
240
+
241
+ // -----------------------------------------------------------------------------
242
+ // TF-IDF tokenizer — operates on hash-tuples, NOT free text.
243
+ //
244
+ // @cap-decision(F-071/D4) Tokens are tuples like `${signalType}|${featureId}|${contextKey}`. Documents
245
+ // are sessions. The privacy boundary in F-070 already hashed paths and decision
246
+ // fields, so token-strings are hash-clean by construction. This is the unusual bit:
247
+ // standard TF-IDF runs on word tokens; we run it on structured hash-tuples. The same
248
+ // math still applies (TF · IDF ranks token rarity within a session), just over a
249
+ // different alphabet.
250
+ // -----------------------------------------------------------------------------
251
+
252
+ /**
253
+ * Build a stable tuple-token from a signal record. The contextKey is the part that distinguishes
254
+ * different "instances" of the same problem within the same featureId — for overrides we use
255
+ * targetFileHash; for regrets we use decisionId; for memory-refs we use the contextHash.
256
+ *
257
+ * @param {object} record
258
+ * @returns {string}
259
+ */
260
+ function buildToken(record) {
261
+ const safe = record || {};
262
+ const signalType = capId(safe.signalType) || 'unknown';
263
+ const featureId = capId(safe.featureId) || 'unassigned';
264
+ let contextKey;
265
+ if (signalType === 'override') {
266
+ contextKey = capId(safe.targetFileHash) || capId(safe.contextHash) || capId(safe.subType) || 'unknown';
267
+ } else if (signalType === 'regret') {
268
+ contextKey = capId(safe.decisionId) || capId(safe.contextHash) || 'unknown';
269
+ } else {
270
+ // memory-ref
271
+ contextKey = capId(safe.memoryFileHash) || capId(safe.contextHash) || 'unknown';
272
+ }
273
+ return `${signalType}|${featureId}|${contextKey}`;
274
+ }
275
+
276
+ /**
277
+ * Group records by sessionId. Records without a sessionId go into the `__no-session__` bucket
278
+ * so they still contribute to global counts — but their TF-IDF treats the bucket as a single
279
+ * synthetic session, which is the safe default (under-counts rather than over-promotes).
280
+ *
281
+ * @param {Array<object>} records
282
+ * @returns {Map<string, Array<object>>}
283
+ */
284
+ function groupBySession(records) {
285
+ const map = new Map();
286
+ for (const r of records || []) {
287
+ const sid = (r && typeof r.sessionId === 'string' && r.sessionId.length > 0)
288
+ ? r.sessionId
289
+ : '__no-session__';
290
+ if (!map.has(sid)) map.set(sid, []);
291
+ map.get(sid).push(r);
292
+ }
293
+ return map;
294
+ }
295
+
296
+ /**
297
+ * Compute TF-IDF scores for tokens within each session. Returns a flat array of
298
+ * { token, sessionId, tfidf, count } entries — one per (token × session) pair.
299
+ *
300
+ * TF = count of token in session.
301
+ * IDF = log(totalSessions / sessionsContainingToken).
302
+ * For a single-session corpus IDF = log(1) = 0; we floor IDF at a small epsilon
303
+ * so TF·IDF still ranks within the lone session by raw frequency.
304
+ *
305
+ * @param {Array<object>} records
306
+ * @returns {{ tokenScores: Array<{token:string, sessionId:string, tfidf:number, count:number}>, sessionsByToken: Map<string,Set<string>>, recordsByToken: Map<string, Array<object>> }}
307
+ */
308
+ function computeTfIdf(records) {
309
+ const sessions = groupBySession(records);
310
+ const totalSessions = Math.max(1, sessions.size);
311
+ const sessionsByToken = new Map();
312
+ const recordsByToken = new Map();
313
+
314
+ // Per-session token frequencies.
315
+ /** @type {Map<string, Map<string, number>>} */
316
+ const sessionTokenCounts = new Map();
317
+ for (const [sid, sessionRecords] of sessions.entries()) {
318
+ /** @type {Map<string, number>} */
319
+ const counts = new Map();
320
+ for (const r of sessionRecords) {
321
+ const t = buildToken(r);
322
+ counts.set(t, (counts.get(t) || 0) + 1);
323
+ if (!sessionsByToken.has(t)) sessionsByToken.set(t, new Set());
324
+ sessionsByToken.get(t).add(sid);
325
+ if (!recordsByToken.has(t)) recordsByToken.set(t, []);
326
+ recordsByToken.get(t).push(r);
327
+ }
328
+ sessionTokenCounts.set(sid, counts);
329
+ }
330
+
331
+ const tokenScores = [];
332
+ for (const [sid, counts] of sessionTokenCounts.entries()) {
333
+ for (const [token, tf] of counts.entries()) {
334
+ const docFreq = sessionsByToken.get(token).size;
335
+ // IDF with a small floor so single-session corpora still rank.
336
+ const idf = Math.max(0.01, Math.log(totalSessions / Math.max(1, docFreq)));
337
+ tokenScores.push({ token, sessionId: sid, tfidf: tf * idf, count: tf });
338
+ }
339
+ }
340
+
341
+ return { tokenScores, sessionsByToken, recordsByToken };
342
+ }
343
+
344
+ /**
345
+ * Pick the top-K tokens per session by TF-IDF, then deduplicate to a flat set
346
+ * (a token reaching top-K in any session is selected). The result is the set of
347
+ * "interesting" tokens; downstream code attaches global counts and applies the
348
+ * AC-2 threshold or the absolute-count fallback.
349
+ *
350
+ * @param {Array<{token:string, sessionId:string, tfidf:number}>} tokenScores
351
+ * @param {number} k
352
+ * @returns {Set<string>}
353
+ */
354
+ function topKTokensPerSession(tokenScores, k) {
355
+ /** @type {Map<string, Array<{token:string, tfidf:number}>>} */
356
+ const bySession = new Map();
357
+ for (const s of tokenScores) {
358
+ if (!bySession.has(s.sessionId)) bySession.set(s.sessionId, []);
359
+ bySession.get(s.sessionId).push({ token: s.token, tfidf: s.tfidf });
360
+ }
361
+ const selected = new Set();
362
+ for (const [, arr] of bySession.entries()) {
363
+ arr.sort((a, b) => b.tfidf - a.tfidf);
364
+ for (let i = 0; i < Math.min(k, arr.length); i++) {
365
+ selected.add(arr[i].token);
366
+ }
367
+ }
368
+ return selected;
369
+ }
370
+
371
+ // -----------------------------------------------------------------------------
372
+ // Heuristic stage — Stage 1
373
+ // -----------------------------------------------------------------------------
374
+
375
+ // @cap-todo(ac:F-071/AC-1) Stage-1 deterministic heuristic engine: TF-IDF + RegEx-Cluster + Frequency
376
+ // on signal records. Writes per-candidate JSON to .cap/learning/candidates/.
377
+ /**
378
+ * Run Stage 1 — the deterministic heuristic engine — over all signals across the three F-070
379
+ * collectors. Returns a list of HeuristicCandidate objects sorted by descending score, and writes
380
+ * one `.cap/learning/candidates/<candidateId>.json` per candidate.
381
+ *
382
+ * Pure compute over the F-070 query API — never reads JSONL files directly. AC-7 budget reading is
383
+ * NOT performed here; that's the orchestrator's job (Step 4 of /cap:learn).
384
+ *
385
+ * @param {string} projectRoot
386
+ * @param {Object} [options]
387
+ * @param {string} [options.sessionId] - Optional filter — only consider records from this session.
388
+ * @param {number} [options.topK] - Override TFIDF_TOP_K_PER_SESSION (mostly for tests).
389
+ * @param {boolean} [options.persist] - When false, candidates are returned but not written to disk. Default true.
390
+ * @returns {{ candidates: HeuristicCandidate[], errors: string[] }}
391
+ */
392
+ function runHeuristicStage(projectRoot, options) {
393
+ const opts = options || {};
394
+ const errors = [];
395
+ if (typeof projectRoot !== 'string' || projectRoot.length === 0) {
396
+ return { candidates: [], errors: ['projectRoot is required'] };
397
+ }
398
+ const persist = opts.persist !== false;
399
+ const topK = typeof opts.topK === 'number' && opts.topK > 0 ? opts.topK : TFIDF_TOP_K_PER_SESSION;
400
+
401
+ // Collect all three signal types via the F-070 query API. The range filter is honoured iff
402
+ // sessionId is supplied — otherwise we operate on the full corpus. AC-1 doesn't restrict
403
+ // the range; consumers wanting a window pass sessionId or a future range.
404
+ const range = opts.sessionId ? { sessionId: opts.sessionId } : undefined;
405
+ let overrides = [];
406
+ let memoryRefs = [];
407
+ let regrets = [];
408
+ try {
409
+ overrides = learningSignals.getSignals(projectRoot, 'override', range) || [];
410
+ } catch (e) {
411
+ errors.push(`getSignals(override) failed: ${e && e.message ? e.message : 'unknown'}`);
412
+ }
413
+ try {
414
+ memoryRefs = learningSignals.getSignals(projectRoot, 'memory-ref', range) || [];
415
+ } catch (e) {
416
+ errors.push(`getSignals(memory-ref) failed: ${e && e.message ? e.message : 'unknown'}`);
417
+ }
418
+ try {
419
+ regrets = learningSignals.getSignals(projectRoot, 'regret', range) || [];
420
+ } catch (e) {
421
+ errors.push(`getSignals(regret) failed: ${e && e.message ? e.message : 'unknown'}`);
422
+ }
423
+
424
+ const allRecords = [...overrides, ...memoryRefs, ...regrets];
425
+ if (allRecords.length === 0) {
426
+ return { candidates: [], errors };
427
+ }
428
+
429
+ // TF-IDF on the union — but we then walk each token and inspect its records' actual signalType.
430
+ // That keeps memory-ref counts visible alongside override / regret counts in the same ranking.
431
+ const { tokenScores, recordsByToken } = computeTfIdf(allRecords);
432
+ const topTokens = topKTokensPerSession(tokenScores, topK);
433
+
434
+ // Map<token, maxTfidf> — used by candidate() to populate the persisted `score` field separately
435
+ // from the record `count`. We keep both because F-072 (fitness) and F-073 (review) may want to
436
+ // sort by either; pre-computing the per-token max keeps candidate() pure.
437
+ // @cap-decision(F-071/D6) `score` (TF-IDF magnitude) and `count` (record count) are persisted as
438
+ // separate fields. Splitting was a PIN-decision before ship — F-072 will pick.
439
+ /** @type {Map<string, number>} */
440
+ const maxTfidfByToken = new Map();
441
+ for (const s of tokenScores) {
442
+ const cur = maxTfidfByToken.get(s.token) || 0;
443
+ if (s.tfidf > cur) maxTfidfByToken.set(s.token, s.tfidf);
444
+ }
445
+
446
+ // ALSO include any token whose absolute count meets the AC-2 threshold, even if it didn't make
447
+ // it into the per-session top-K. This is the "frequency" arm of AC-1's heuristic engine.
448
+ // @cap-todo(ac:F-071/AC-1) Frequency-analysis arm: tokens with count >= threshold are considered
449
+ // regardless of TF-IDF rank.
450
+ // @cap-decision(F-071/D9) Effective threshold respects per-featureId L1 overrides from F-074
451
+ // applied-state.json. The token's first record carries the featureId; if a user
452
+ // applied P-NNN that proposed `F-070/threshold: 4`, the F-070 cluster needs 4
453
+ // records (not 3) to reach the frequency arm.
454
+ for (const [token, recs] of recordsByToken.entries()) {
455
+ const recsArr = recs;
456
+ const sigType = (recsArr[0] && recsArr[0].signalType) || 'unknown';
457
+ const featureIdForToken = recsArr[0] && capId(recsArr[0].featureId);
458
+ const requiredCount = getEffectiveThreshold(projectRoot, sigType, featureIdForToken);
459
+ if (recsArr.length >= requiredCount) topTokens.add(token);
460
+ }
461
+
462
+ /** @type {HeuristicCandidate[]} */
463
+ const candidates = [];
464
+ for (const token of topTokens) {
465
+ const recs = recordsByToken.get(token) || [];
466
+ if (recs.length === 0) continue;
467
+
468
+ // RegEx-cluster arm: group regret tokens by decisionId family. The token already encodes
469
+ // featureId, so a "family" is simply (signalType + featureId) — same family already shares
470
+ // a candidate. The clustering effect is implicit in the tuple-token construction.
471
+ // @cap-todo(ac:F-071/AC-1) RegEx-Cluster arm — the `signalType|featureId|contextKey` tuple IS
472
+ // the cluster key. Tokens are members of the same cluster iff they share
473
+ // the (signalType, featureId) prefix; the contextKey distinguishes
474
+ // instances within the cluster.
475
+
476
+ candidate(candidates, token, recs, maxTfidfByToken.get(token) || 0, projectRoot);
477
+ }
478
+
479
+ // @cap-decision(F-071/D9) Post-collection effective-threshold filter. The TF-IDF arm could still
480
+ // bubble up a "rare-but-concentrated" cluster whose count is below an applied
481
+ // threshold; in the V5 loop the user has explicitly said "I don't want F-X
482
+ // candidates until 4 records accumulate", so we drop them here instead of
483
+ // surfacing them in the review board where they'd just produce noise. Stage 2
484
+ // promotion (`checkThreshold`) is also threshold-aware as defense-in-depth.
485
+ const filtered = candidates.filter((c) => {
486
+ if (c.signalType === 'memory-ref') return true; // memory-ref carries positive signal — never filtered.
487
+ const required = getEffectiveThreshold(projectRoot, c.signalType, c.featureId);
488
+ return Number(c.count) >= required;
489
+ });
490
+
491
+ // Sort by count descending so the orchestrator processes the loudest clusters first.
492
+ // F-072 / F-073 may resort by score (TF-IDF magnitude) when "rare-but-concentrated" matters more
493
+ // than "loud" — both fields are persisted on the candidate.
494
+ filtered.sort((a, b) => b.count - a.count);
495
+ candidates.length = 0;
496
+ for (const c of filtered) candidates.push(c);
497
+
498
+ if (persist && candidates.length > 0) {
499
+ ensureDir(candidatesDir(projectRoot));
500
+ for (const c of candidates) {
501
+ try {
502
+ const fp = path.join(candidatesDir(projectRoot), `${c.candidateId}.json`);
503
+ fs.writeFileSync(fp, JSON.stringify(c, null, 2) + '\n', 'utf8');
504
+ } catch (e) {
505
+ errors.push(`persist candidate ${c.candidateId} failed: ${e && e.message ? e.message : 'unknown'}`);
506
+ }
507
+ }
508
+ }
509
+
510
+ return { candidates, errors };
511
+ }
512
+
513
+ /**
514
+ * Build a HeuristicCandidate from a token and its contributing records. Pushed onto the accumulator.
515
+ * Internal helper for runHeuristicStage.
516
+ *
517
+ * @param {HeuristicCandidate[]} acc
518
+ * @param {string} token
519
+ * @param {Array<object>} recs
520
+ * @param {number} tfidfScore - Maximum TF-IDF score for this token across all sessions.
521
+ * @param {string} [projectRoot] - Forwarded to buildHeuristicSuggestion so the L1 `from` reflects
522
+ * any applied F-074 threshold override; absent => fallback to constants. (F-071/D9)
523
+ */
524
+ function candidate(acc, token, recs, tfidfScore, projectRoot) {
525
+ const signalType = recs[0].signalType;
526
+
527
+ // Per-feature breakdown, sorted descending.
528
+ /** @type {Map<string|null, number>} */
529
+ const featureCounts = new Map();
530
+ for (const r of recs) {
531
+ const fid = capId(r.featureId);
532
+ featureCounts.set(fid, (featureCounts.get(fid) || 0) + 1);
533
+ }
534
+ const byFeature = [...featureCounts.entries()]
535
+ .map(([featureId, count]) => ({ featureId, count }))
536
+ .sort((a, b) => b.count - a.count);
537
+
538
+ // Top context hashes — the contextHash field is the F-070 dedup key; we count occurrences.
539
+ /** @type {Map<string, number>} */
540
+ const hashCounts = new Map();
541
+ for (const r of recs) {
542
+ // @cap-risk(F-071/AC-3) Only the contextHash hex string is taken — never the targetFile,
543
+ // never the decisionId, never any free-text field. The privacy gate
544
+ // in F-070 already hashed those at the source.
545
+ const h = capId(r.contextHash);
546
+ if (!h) continue;
547
+ hashCounts.set(h, (hashCounts.get(h) || 0) + 1);
548
+ }
549
+ const topContextHashes = [...hashCounts.entries()]
550
+ .map(([hash, count]) => ({ hash, count }))
551
+ .sort((a, b) => b.count - a.count)
552
+ .slice(0, 5);
553
+
554
+ // candidateId = stable hash of the token. Re-using telemetry.hashContext keeps the hash function
555
+ // identical to the F-070 / F-061 privacy gate — single source of truth.
556
+ const candidateId = telemetry.hashContext(token);
557
+
558
+ const dominantFeature = byFeature[0] && byFeature[0].featureId;
559
+ const score = tfidfScore; // TF-IDF magnitude — separate from `count` per @cap-decision(F-071/D6)
560
+
561
+ // Heuristic-only L1 suggestion — a parameter tweak the user could apply WITHOUT an LLM call.
562
+ // This is the "graceful degradation" payload (AC-5): if Stage 2 is skipped, this still ships.
563
+ const suggestion = buildHeuristicSuggestion(signalType, recs, dominantFeature, projectRoot);
564
+
565
+ acc.push({
566
+ candidateId,
567
+ signalType,
568
+ featureId: dominantFeature,
569
+ count: recs.length,
570
+ score,
571
+ byFeature,
572
+ topContextHashes,
573
+ suggestion,
574
+ });
575
+ }
576
+
577
+ // @cap-risk(F-071/AC-1) L1 oscillation: each run raises threshold by `to = recs.length + 1`. Two
578
+ // consecutive runs on a 4-record cluster with threshold 3 propose 4, then on a
579
+ // 4-record cluster with threshold 4 propose 5, … unbounded climb. The dampener
580
+ // lives in F-072 (fitness scoring): a low-fitness pattern is auto-retracted by
581
+ // F-074, breaking the loop. If F-072 is removed or skipped, this heuristic
582
+ // becomes unstable. Do not loosen `to = recs.length + 1` without F-072 in place.
583
+ /**
584
+ * Build a heuristic-only L1 suggestion. The shape mirrors the L1 example in the F-071 brief:
585
+ * { kind:'L1', target, from, to, rationale }.
586
+ *
587
+ * @param {string} signalType
588
+ * @param {Array<object>} recs
589
+ * @param {string|null} featureId
590
+ * @param {string} [projectRoot] - When provided, `from` reflects the effective threshold (any
591
+ * applied F-074 override), not just the constant default. (F-071/D9)
592
+ * @returns {{kind:'L1', target:string, from:number, to:number, rationale:string}}
593
+ */
594
+ function buildHeuristicSuggestion(signalType, recs, featureId, projectRoot) {
595
+ // Default: propose raising the AC-2 threshold so the same cluster wouldn't promote next time.
596
+ // The "from" anchors at the current threshold; "to" proposes the next step (count + 1) so the
597
+ // cluster has to grow further before re-triggering.
598
+ const target = featureId ? `${featureId}/threshold` : 'F-071/threshold';
599
+ const from = getEffectiveThreshold(projectRoot, signalType, featureId);
600
+ const to = recs.length + 1;
601
+ // @cap-risk(F-071/AC-3) The rationale is a pure-structural string — count + featureId.
602
+ // No raw paths, no decision text. Safe to persist.
603
+ const rationale = `Cluster of ${recs.length} ${signalType} signals${
604
+ featureId ? ` on ${featureId}` : ''
605
+ } would not have triggered if threshold had been ${to}.`;
606
+ return { kind: 'L1', target, from, to, rationale };
607
+ }
608
+
609
+ // -----------------------------------------------------------------------------
610
+ // Threshold check — AC-2
611
+ // -----------------------------------------------------------------------------
612
+
613
+ // @cap-todo(ac:F-071/AC-2) Stage-2 trigger: candidate hits threshold (>=3 similar overrides OR >=1 regret).
614
+ /**
615
+ * Decide whether a candidate qualifies for Stage 2. Memory-ref candidates never trigger Stage 2 —
616
+ * memory-ref tells you a memory is *valuable*, not that something is *wrong*; promoting it would
617
+ * waste the LLM budget on positive-signal data.
618
+ *
619
+ * Override candidates additionally must share `featureId` across all records (the candidate token
620
+ * already encodes featureId, so this is implicit when the candidate was built from a single token).
621
+ *
622
+ * @cap-decision(F-071/D9) Optional `projectRoot` consults applied-state.json for a per-featureId
623
+ * override. Backwards-compatible: when projectRoot is omitted, behaviour falls
624
+ * through to the module constants exactly as before, so existing callers
625
+ * (and the AC-2 unit tests) keep working unchanged.
626
+ *
627
+ * @param {HeuristicCandidate} candidate
628
+ * @param {string} [projectRoot]
629
+ * @returns {boolean}
630
+ */
631
+ function checkThreshold(candidate, projectRoot) {
632
+ if (!candidate || typeof candidate !== 'object') return false;
633
+ if (candidate.signalType === 'memory-ref') return false;
634
+ if (candidate.signalType !== 'override' && candidate.signalType !== 'regret') return false;
635
+ const required = getEffectiveThreshold(projectRoot, candidate.signalType, candidate.featureId);
636
+ return Number(candidate.count) >= required;
637
+ }
638
+
639
+ // -----------------------------------------------------------------------------
640
+ // P-NNN allocation — compute-on-read from filenames
641
+ // -----------------------------------------------------------------------------
642
+
643
+ // @cap-todo(ac:F-071/AC-6) P-NNN allocation: sequential, never renumbered. Compute-on-read.
644
+ /**
645
+ * Allocate the next P-NNN id by scanning .cap/learning/patterns/P-*.json AND
646
+ * .cap/learning/queue/P-*.md filenames. Returns 'P-001' when no files exist.
647
+ *
648
+ * AC-6 contract: "sequential, never renumbered" — gaps are fine. We return max(existing IDs) + 1.
649
+ * If P-005 exists in the queue and P-001/P-002 in patterns, next is P-006. Pattern files and queue
650
+ * files share the ID namespace because a deferred candidate retains its assigned ID across sessions.
651
+ *
652
+ * @param {string} projectRoot
653
+ * @returns {string} 'P-NNN'
654
+ */
655
+ function allocatePatternId(projectRoot) {
656
+ const ids = listExistingPatternIds(projectRoot);
657
+ let max = 0;
658
+ for (const id of ids) {
659
+ const n = parsePatternId(id);
660
+ if (n != null && n > max) max = n;
661
+ }
662
+ return formatPatternId(max + 1);
663
+ }
664
+
665
+ /**
666
+ * List every P-NNN id present in patterns/ (json) or queue/ (md). De-duplicated.
667
+ *
668
+ * @param {string} projectRoot
669
+ * @returns {string[]}
670
+ */
671
+ function listExistingPatternIds(projectRoot) {
672
+ const ids = new Set();
673
+ const scan = (dir, suffix) => {
674
+ if (!fs.existsSync(dir)) return;
675
+ let entries;
676
+ try {
677
+ entries = fs.readdirSync(dir);
678
+ } catch (_e) {
679
+ return;
680
+ }
681
+ for (const f of entries) {
682
+ if (!f.endsWith(suffix)) continue;
683
+ const base = f.slice(0, -suffix.length);
684
+ if (/^P-\d+$/.test(base)) ids.add(base);
685
+ }
686
+ };
687
+ scan(patternsDir(projectRoot), '.json');
688
+ scan(queueDir(projectRoot), '.md');
689
+ return [...ids];
690
+ }
691
+
692
+ function parsePatternId(id) {
693
+ const m = /^P-(\d+)$/.exec(id || '');
694
+ return m ? parseInt(m[1], 10) : null;
695
+ }
696
+
697
+ function formatPatternId(n) {
698
+ return `${PATTERN_ID_PREFIX}${String(n).padStart(PATTERN_ID_PAD, '0')}`;
699
+ }
700
+
701
+ // -----------------------------------------------------------------------------
702
+ // Briefing builder — Stage 2 input (counts + hashes only)
703
+ // -----------------------------------------------------------------------------
704
+
705
+ // @cap-todo(ac:F-071/AC-3) PRIVACY-CRITICAL — LLM input is counts + hashes only. Constructs the
706
+ // structured aggregate { candidateId, signalType, count, byFeature,
707
+ // topContextHashes } and writes it to .cap/learning/queue/P-NNN.md as the
708
+ // briefing the outer agent will read.
709
+ // @cap-risk(F-071/AC-3) This is THE place where LLM-bound payload is constructed. Any new field
710
+ // added here MUST be a count or a hex hash. No paths, no decision text,
711
+ // no record verbatim, no targetFile string. The adversarial test injects
712
+ // SECRET_NEEDLE values into every input field and asserts zero needle bytes
713
+ // in the briefing markdown.
714
+ /**
715
+ * Build a briefing for Stage 2 and persist it to .cap/learning/queue/P-NNN.md.
716
+ *
717
+ * The briefing is the ONLY artifact the outer agent (LLM) reads. It MUST contain only counts and
718
+ * hex hashes — never raw paths, decision text, or record verbatim. The structured payload is also
719
+ * returned for testing and for the orchestrator to forward to the agent.
720
+ *
721
+ * @param {HeuristicCandidate} candidate
722
+ * @param {string} projectRoot
723
+ * @param {Object} [options]
724
+ * @param {string} [options.id] - Pre-allocated P-NNN id (optional; allocated if omitted).
725
+ * @param {boolean} [options.deferred] - When true, the briefing carries a `deferred: budget` marker.
726
+ * @returns {{ id: string, briefingPath: string, payload: object }|null}
727
+ */
728
+ function buildBriefing(candidate, projectRoot, options) {
729
+ if (!candidate || typeof candidate !== 'object') return null;
730
+ if (typeof projectRoot !== 'string' || projectRoot.length === 0) return null;
731
+
732
+ const opts = options || {};
733
+ const id = opts.id || allocatePatternId(projectRoot);
734
+ const deferred = opts.deferred === true;
735
+
736
+ // @cap-risk(F-071/AC-3) Build the payload from STRUCTURED COUNTS + HEX HASHES only.
737
+ // Validate every hash is hex via /^[0-9a-f]+$/ — anything else is dropped
738
+ // defensively. This guards against a bug upstream (e.g. a future contributor
739
+ // passing the raw path through here by mistake).
740
+ // @cap-risk(F-071/AC-3) featureId is structured metadata, but the briefing enforces strict shape
741
+ // /^F-\d{3,}$/ — anything else collapses to null. A future contributor who
742
+ // tries to smuggle text via a hand-crafted featureId (e.g. by writing the
743
+ // record with a non-conforming string) will see the field disappear from
744
+ // the briefing rather than leak. The featureId-as-smuggle-channel attack is
745
+ // proven impossible in tests (cap-pattern-pipeline-adversarial.test.cjs).
746
+ const safeFeature = (s) => {
747
+ const v = capId(s);
748
+ if (v == null) return null;
749
+ return /^F-\d{3,}$/.test(v) ? v : null;
750
+ };
751
+ const isHexHash = (h) => typeof h === 'string' && /^[0-9a-f]+$/.test(h) && h.length <= 64;
752
+
753
+ const byFeature = (Array.isArray(candidate.byFeature) ? candidate.byFeature : [])
754
+ .map((row) => ({ featureId: safeFeature(row && row.featureId), count: Math.max(0, Number(row && row.count) || 0) }))
755
+ .filter((row) => Number.isFinite(row.count));
756
+ const topContextHashes = (Array.isArray(candidate.topContextHashes) ? candidate.topContextHashes : [])
757
+ .filter((row) => row && isHexHash(row.hash))
758
+ .map((row) => ({ hash: row.hash, count: Math.max(0, Number(row.count) || 0) }));
759
+
760
+ const payload = {
761
+ candidateId: typeof candidate.candidateId === 'string' && /^[0-9a-f]+$/.test(candidate.candidateId)
762
+ ? candidate.candidateId
763
+ : telemetry.hashContext(String(candidate.candidateId || 'unknown')),
764
+ signalType: candidate.signalType === 'override' || candidate.signalType === 'regret'
765
+ ? candidate.signalType
766
+ : 'unknown',
767
+ count: Math.max(0, Number(candidate.count) || 0),
768
+ byFeature,
769
+ topContextHashes,
770
+ };
771
+
772
+ ensureDir(queueDir(projectRoot));
773
+ const briefingPath = path.join(queueDir(projectRoot), `${id}.md`);
774
+
775
+ // Markdown body — pure counts + hashes. The frontmatter carries the deferred marker (AC-4).
776
+ const md = renderBriefingMarkdown(id, payload, deferred);
777
+ try {
778
+ fs.writeFileSync(briefingPath, md, 'utf8');
779
+ } catch (_e) {
780
+ return null;
781
+ }
782
+
783
+ return { id, briefingPath, payload };
784
+ }
785
+
786
+ /**
787
+ * Render the briefing markdown. Frontmatter + sections; the payload is the only source of content.
788
+ *
789
+ * @param {string} id
790
+ * @param {object} payload
791
+ * @param {boolean} deferred
792
+ * @returns {string}
793
+ */
794
+ function renderBriefingMarkdown(id, payload, deferred) {
795
+ const lines = [];
796
+ lines.push('---');
797
+ lines.push(`id: ${id}`);
798
+ lines.push(`signalType: ${payload.signalType}`);
799
+ lines.push(`count: ${payload.count}`);
800
+ lines.push(`candidateId: ${payload.candidateId}`);
801
+ if (deferred) lines.push('deferred: budget');
802
+ lines.push('---');
803
+ lines.push('');
804
+ lines.push(`# Pattern Briefing ${id}`);
805
+ lines.push('');
806
+ lines.push('Counts + hashes only. No raw signals, no user text, no file paths. (F-071/AC-3)');
807
+ lines.push('');
808
+ lines.push('## Aggregate');
809
+ lines.push('');
810
+ lines.push(`- signalType: \`${payload.signalType}\``);
811
+ lines.push(`- count: ${payload.count}`);
812
+ lines.push(`- candidateId: \`${payload.candidateId}\``);
813
+ lines.push('');
814
+ lines.push('## By Feature');
815
+ lines.push('');
816
+ if (payload.byFeature.length === 0) {
817
+ lines.push('_(none)_');
818
+ } else {
819
+ for (const row of payload.byFeature) {
820
+ lines.push(`- \`${row.featureId == null ? '(unassigned)' : row.featureId}\` — ${row.count}`);
821
+ }
822
+ }
823
+ lines.push('');
824
+ lines.push('## Top Context Hashes');
825
+ lines.push('');
826
+ if (payload.topContextHashes.length === 0) {
827
+ lines.push('_(none)_');
828
+ } else {
829
+ for (const row of payload.topContextHashes) {
830
+ lines.push(`- \`${row.hash}\` — ${row.count}`);
831
+ }
832
+ }
833
+ lines.push('');
834
+ lines.push('## Task');
835
+ lines.push('');
836
+ lines.push('Choose ONE of L1 / L2 / L3 and write the result to');
837
+ lines.push(`\`.cap/learning/patterns/${id}.json\` matching the documented schema.`);
838
+ lines.push('');
839
+ return lines.join('\n');
840
+ }
841
+
842
+ // -----------------------------------------------------------------------------
843
+ // Pattern persistence — write/read P-NNN.json
844
+ // -----------------------------------------------------------------------------
845
+
846
+ // @cap-todo(ac:F-071/AC-5) Graceful degradation — when LLM stage cannot run, persist the heuristic
847
+ // L1 suggestion with degraded:true. Marked via markDegraded() helper.
848
+ // @cap-todo(ac:F-071/AC-6) PatternRecord schema persisted here: id, level, featureRef, source,
849
+ // degraded, confidence, suggestion, evidence.
850
+ /**
851
+ * Persist a PatternRecord to .cap/learning/patterns/P-NNN.json. Lazy-creates the directory.
852
+ *
853
+ * @param {string} projectRoot
854
+ * @param {PatternRecord} pattern
855
+ * @returns {boolean}
856
+ */
857
+ function recordPatternSuggestion(projectRoot, pattern) {
858
+ if (typeof projectRoot !== 'string' || projectRoot.length === 0) return false;
859
+ if (!pattern || typeof pattern !== 'object') return false;
860
+ if (typeof pattern.id !== 'string' || !/^P-\d+$/.test(pattern.id)) return false;
861
+
862
+ ensureDir(patternsDir(projectRoot));
863
+ const fp = path.join(patternsDir(projectRoot), `${pattern.id}.json`);
864
+ try {
865
+ fs.writeFileSync(fp, JSON.stringify(pattern, null, 2) + '\n', 'utf8');
866
+ return true;
867
+ } catch (_e) {
868
+ return false;
869
+ }
870
+ }
871
+
872
+ /**
873
+ * Persist a heuristic-only PatternRecord (degraded path). Helper used by the orchestrator's
874
+ * AC-5 fallback when an outer agent doesn't process the briefing in this session.
875
+ *
876
+ * @cap-decision(F-071/D8) Clobber protection: if `patterns/<id>.json` already exists with
877
+ * `source !== 'heuristic'` (i.e. an LLM stage actually produced a pattern for this id), the
878
+ * degraded fallback MUST NOT overwrite it. Returns `{ written: false, reason: 'llm-pattern-exists' }`
879
+ * so the orchestrator knows to log instead of silently clobbering. Without this guard, a slow
880
+ * Stage-2 LLM result followed by a Step-5 fallback in the same session could silently lose the
881
+ * higher-quality LLM pattern. Foot-gun for F-072/F-073 wirers — closed pre-ship per Stage-2 review.
882
+ * @cap-risk(F-071/AC-5) Two heuristic-only runs over the same id WILL overwrite (latest-wins is the
883
+ * intended degraded contract). The guard only blocks heuristic-over-llm clobber, not heuristic-
884
+ * over-heuristic refresh.
885
+ *
886
+ * @param {string} projectRoot
887
+ * @param {string} id - 'P-NNN'
888
+ * @param {HeuristicCandidate} candidate
889
+ * @returns {boolean | { written: boolean, reason?: string, prior?: { source: string, level: string } }}
890
+ * - `true` when the degraded record was written (back-compat with prior boolean callers).
891
+ * - `false` when the candidate was nullish or the write itself failed.
892
+ * - `{ written: false, reason: 'llm-pattern-exists', prior }` when an LLM pattern was preserved.
893
+ */
894
+ function markDegraded(projectRoot, id, candidate) {
895
+ if (!candidate) return false;
896
+
897
+ // Clobber-protection: read any existing pattern at this id and refuse to overwrite an LLM record.
898
+ try {
899
+ const existingPath = path.join(patternsDir(projectRoot), `${id}.json`);
900
+ if (fs.existsSync(existingPath)) {
901
+ const existing = JSON.parse(fs.readFileSync(existingPath, 'utf8'));
902
+ if (existing && existing.source && existing.source !== 'heuristic') {
903
+ return {
904
+ written: false,
905
+ reason: 'llm-pattern-exists',
906
+ prior: { source: existing.source, level: existing.level },
907
+ };
908
+ }
909
+ }
910
+ } catch (_e) {
911
+ // Read failure → fall through to write (latest-wins for malformed prior records).
912
+ }
913
+
914
+ /** @type {PatternRecord} */
915
+ const pattern = {
916
+ id,
917
+ createdAt: new Date().toISOString(),
918
+ level: 'L1',
919
+ featureRef: candidate.featureId || null,
920
+ source: 'heuristic',
921
+ degraded: true,
922
+ confidence: 0.5,
923
+ suggestion: candidate.suggestion,
924
+ evidence: {
925
+ candidateId: candidate.candidateId,
926
+ signalType: candidate.signalType,
927
+ count: candidate.count,
928
+ topContextHashes: candidate.topContextHashes || [],
929
+ },
930
+ };
931
+ return recordPatternSuggestion(projectRoot, pattern);
932
+ }
933
+
934
+ /**
935
+ * List all persisted PatternRecords. Reads `.cap/learning/patterns/P-*.json`. Tolerant to missing
936
+ * directory and malformed files — they're skipped.
937
+ *
938
+ * @param {string} projectRoot
939
+ * @returns {Array<PatternRecord>}
940
+ */
941
+ function listPatterns(projectRoot) {
942
+ const dir = patternsDir(projectRoot);
943
+ if (!fs.existsSync(dir)) return [];
944
+ let entries;
945
+ try {
946
+ entries = fs.readdirSync(dir);
947
+ } catch (_e) {
948
+ return [];
949
+ }
950
+ const out = [];
951
+ for (const f of entries) {
952
+ if (!f.endsWith('.json')) continue;
953
+ if (!/^P-\d+\.json$/.test(f)) continue;
954
+ try {
955
+ const raw = fs.readFileSync(path.join(dir, f), 'utf8');
956
+ const parsed = JSON.parse(raw);
957
+ if (parsed && typeof parsed === 'object') out.push(parsed);
958
+ } catch (_e) {
959
+ // Skip — malformed pattern files must not crash listing.
960
+ }
961
+ }
962
+ // Sort by id ascending so consumers get a stable deterministic order.
963
+ out.sort((a, b) => {
964
+ const na = parsePatternId(a.id) || 0;
965
+ const nb = parsePatternId(b.id) || 0;
966
+ return na - nb;
967
+ });
968
+ return out;
969
+ }
970
+
971
+ // -----------------------------------------------------------------------------
972
+ // Budget gate — AC-4 / AC-7
973
+ // -----------------------------------------------------------------------------
974
+
975
+ // @cap-todo(ac:F-071/AC-4) Budget hard-limit: 3 LLM calls per session by default. Overflow lands in
976
+ // .cap/learning/queue/ with deferred:budget. Re-uses readBudget +
977
+ // getLlmUsage from cap-telemetry.cjs — single source of truth.
978
+ // @cap-todo(ac:F-071/AC-7) Budget override from .cap/learning/config.json#llmBudgetPerSession.
979
+ // Honoured automatically because we delegate to telemetry.readBudget().
980
+ // @cap-risk(F-071/AC-4) The budget gate is THE reason we can ship Stage 2. A regression that
981
+ // bypasses readBudget / getLlmUsage would burn through the user's wallet
982
+ // silently. Every promotion path in this module routes through this function.
983
+ /**
984
+ * Compute the remaining LLM-call budget for a session. Returns 0 when the session is at or over
985
+ * the budget cap.
986
+ *
987
+ * @param {string} projectRoot
988
+ * @param {string|null} sessionId
989
+ * @returns {{ budget: number, used: number, remaining: number, source: 'config'|'default' }}
990
+ */
991
+ function getSessionBudgetState(projectRoot, sessionId) {
992
+ const { budget, source } = telemetry.readBudget(projectRoot);
993
+ let used = 0;
994
+ if (sessionId) {
995
+ try {
996
+ const calls = telemetry.getLlmUsage(projectRoot, { sessionId }) || [];
997
+ used = calls.length;
998
+ } catch (_e) {
999
+ used = 0;
1000
+ }
1001
+ }
1002
+ const remaining = Math.max(0, budget - used);
1003
+ return { budget, used, remaining, source };
1004
+ }
1005
+
1006
+ // -----------------------------------------------------------------------------
1007
+ // Exports — keep this list minimal. F-072 / F-073 should consume only these.
1008
+ // -----------------------------------------------------------------------------
1009
+
1010
+ module.exports = {
1011
+ // constants — exported for tests
1012
+ CAP_DIR,
1013
+ LEARNING_DIR,
1014
+ CANDIDATES_DIR,
1015
+ PATTERNS_DIR,
1016
+ QUEUE_DIR,
1017
+ THRESHOLD_OVERRIDE_COUNT,
1018
+ THRESHOLD_REGRET_COUNT,
1019
+ TFIDF_TOP_K_PER_SESSION,
1020
+ // public API
1021
+ runHeuristicStage,
1022
+ checkThreshold,
1023
+ allocatePatternId,
1024
+ buildBriefing,
1025
+ recordPatternSuggestion,
1026
+ markDegraded,
1027
+ listPatterns,
1028
+ getSessionBudgetState,
1029
+ getEffectiveThreshold,
1030
+ // path helpers — exported for tests / consumers; kept private from public docs
1031
+ candidatesDir,
1032
+ patternsDir,
1033
+ queueDir,
1034
+ };