cap-pro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. package/.claude-plugin/README.md +26 -0
  2. package/.claude-plugin/marketplace.json +24 -0
  3. package/.claude-plugin/plugin.json +24 -0
  4. package/LICENSE +21 -0
  5. package/README.ja-JP.md +834 -0
  6. package/README.ko-KR.md +823 -0
  7. package/README.md +806 -0
  8. package/README.pt-BR.md +452 -0
  9. package/README.zh-CN.md +800 -0
  10. package/agents/cap-architect.md +269 -0
  11. package/agents/cap-brainstormer.md +207 -0
  12. package/agents/cap-curator.md +276 -0
  13. package/agents/cap-debugger.md +365 -0
  14. package/agents/cap-designer.md +246 -0
  15. package/agents/cap-historian.md +464 -0
  16. package/agents/cap-migrator.md +291 -0
  17. package/agents/cap-prototyper.md +197 -0
  18. package/agents/cap-validator.md +308 -0
  19. package/bin/install.js +5433 -0
  20. package/cap/bin/cap-tools.cjs +853 -0
  21. package/cap/bin/lib/arc-scanner.cjs +344 -0
  22. package/cap/bin/lib/cap-affinity-engine.cjs +862 -0
  23. package/cap/bin/lib/cap-anchor.cjs +228 -0
  24. package/cap/bin/lib/cap-annotation-writer.cjs +340 -0
  25. package/cap/bin/lib/cap-checkpoint.cjs +434 -0
  26. package/cap/bin/lib/cap-cluster-detect.cjs +945 -0
  27. package/cap/bin/lib/cap-cluster-display.cjs +52 -0
  28. package/cap/bin/lib/cap-cluster-format.cjs +245 -0
  29. package/cap/bin/lib/cap-cluster-helpers.cjs +295 -0
  30. package/cap/bin/lib/cap-cluster-io.cjs +212 -0
  31. package/cap/bin/lib/cap-completeness.cjs +540 -0
  32. package/cap/bin/lib/cap-deps.cjs +583 -0
  33. package/cap/bin/lib/cap-design-families.cjs +332 -0
  34. package/cap/bin/lib/cap-design.cjs +966 -0
  35. package/cap/bin/lib/cap-divergence-detector.cjs +400 -0
  36. package/cap/bin/lib/cap-doctor.cjs +752 -0
  37. package/cap/bin/lib/cap-feature-map-internals.cjs +19 -0
  38. package/cap/bin/lib/cap-feature-map-migrate.cjs +335 -0
  39. package/cap/bin/lib/cap-feature-map-monorepo.cjs +885 -0
  40. package/cap/bin/lib/cap-feature-map-shard.cjs +315 -0
  41. package/cap/bin/lib/cap-feature-map.cjs +1943 -0
  42. package/cap/bin/lib/cap-fitness-score.cjs +1075 -0
  43. package/cap/bin/lib/cap-impact-analysis.cjs +652 -0
  44. package/cap/bin/lib/cap-learn-review.cjs +1072 -0
  45. package/cap/bin/lib/cap-learning-signals.cjs +627 -0
  46. package/cap/bin/lib/cap-loader.cjs +227 -0
  47. package/cap/bin/lib/cap-logger.cjs +57 -0
  48. package/cap/bin/lib/cap-memory-bridge.cjs +764 -0
  49. package/cap/bin/lib/cap-memory-confidence.cjs +452 -0
  50. package/cap/bin/lib/cap-memory-dir.cjs +987 -0
  51. package/cap/bin/lib/cap-memory-engine.cjs +698 -0
  52. package/cap/bin/lib/cap-memory-extends.cjs +398 -0
  53. package/cap/bin/lib/cap-memory-graph.cjs +790 -0
  54. package/cap/bin/lib/cap-memory-migrate.cjs +2015 -0
  55. package/cap/bin/lib/cap-memory-pin.cjs +183 -0
  56. package/cap/bin/lib/cap-memory-platform.cjs +490 -0
  57. package/cap/bin/lib/cap-memory-prune.cjs +707 -0
  58. package/cap/bin/lib/cap-memory-schema.cjs +812 -0
  59. package/cap/bin/lib/cap-migrate-tags.cjs +309 -0
  60. package/cap/bin/lib/cap-migrate.cjs +540 -0
  61. package/cap/bin/lib/cap-pattern-apply.cjs +1203 -0
  62. package/cap/bin/lib/cap-pattern-pipeline.cjs +1034 -0
  63. package/cap/bin/lib/cap-plugin-manifest.cjs +80 -0
  64. package/cap/bin/lib/cap-realtime-affinity.cjs +399 -0
  65. package/cap/bin/lib/cap-reconcile.cjs +570 -0
  66. package/cap/bin/lib/cap-research-gate.cjs +218 -0
  67. package/cap/bin/lib/cap-scope-filter.cjs +402 -0
  68. package/cap/bin/lib/cap-semantic-pipeline.cjs +1038 -0
  69. package/cap/bin/lib/cap-session-extract.cjs +987 -0
  70. package/cap/bin/lib/cap-session.cjs +445 -0
  71. package/cap/bin/lib/cap-snapshot-linkage.cjs +963 -0
  72. package/cap/bin/lib/cap-stack-docs.cjs +646 -0
  73. package/cap/bin/lib/cap-tag-observer.cjs +371 -0
  74. package/cap/bin/lib/cap-tag-scanner.cjs +1766 -0
  75. package/cap/bin/lib/cap-telemetry.cjs +466 -0
  76. package/cap/bin/lib/cap-test-audit.cjs +1438 -0
  77. package/cap/bin/lib/cap-thread-migrator.cjs +307 -0
  78. package/cap/bin/lib/cap-thread-synthesis.cjs +545 -0
  79. package/cap/bin/lib/cap-thread-tracker.cjs +519 -0
  80. package/cap/bin/lib/cap-trace.cjs +399 -0
  81. package/cap/bin/lib/cap-trust-mode.cjs +336 -0
  82. package/cap/bin/lib/cap-ui-design-editor.cjs +642 -0
  83. package/cap/bin/lib/cap-ui-mind-map.cjs +712 -0
  84. package/cap/bin/lib/cap-ui-thread-nav.cjs +693 -0
  85. package/cap/bin/lib/cap-ui.cjs +1245 -0
  86. package/cap/bin/lib/cap-upgrade.cjs +1028 -0
  87. package/cap/bin/lib/cli/arg-helpers.cjs +49 -0
  88. package/cap/bin/lib/cli/frontmatter-router.cjs +31 -0
  89. package/cap/bin/lib/cli/init-router.cjs +68 -0
  90. package/cap/bin/lib/cli/phase-router.cjs +102 -0
  91. package/cap/bin/lib/cli/state-router.cjs +61 -0
  92. package/cap/bin/lib/cli/template-router.cjs +37 -0
  93. package/cap/bin/lib/cli/uat-router.cjs +29 -0
  94. package/cap/bin/lib/cli/validation-router.cjs +26 -0
  95. package/cap/bin/lib/cli/verification-router.cjs +31 -0
  96. package/cap/bin/lib/cli/workstream-router.cjs +39 -0
  97. package/cap/bin/lib/commands.cjs +961 -0
  98. package/cap/bin/lib/config.cjs +467 -0
  99. package/cap/bin/lib/convention-reader.cjs +258 -0
  100. package/cap/bin/lib/core.cjs +1241 -0
  101. package/cap/bin/lib/feature-aggregator.cjs +423 -0
  102. package/cap/bin/lib/frontmatter.cjs +337 -0
  103. package/cap/bin/lib/init.cjs +1443 -0
  104. package/cap/bin/lib/manifest-generator.cjs +383 -0
  105. package/cap/bin/lib/milestone.cjs +253 -0
  106. package/cap/bin/lib/model-profiles.cjs +69 -0
  107. package/cap/bin/lib/monorepo-context.cjs +226 -0
  108. package/cap/bin/lib/monorepo-migrator.cjs +509 -0
  109. package/cap/bin/lib/phase.cjs +889 -0
  110. package/cap/bin/lib/profile-output.cjs +989 -0
  111. package/cap/bin/lib/profile-pipeline.cjs +540 -0
  112. package/cap/bin/lib/roadmap.cjs +330 -0
  113. package/cap/bin/lib/security.cjs +394 -0
  114. package/cap/bin/lib/session-manager.cjs +292 -0
  115. package/cap/bin/lib/skeleton-generator.cjs +179 -0
  116. package/cap/bin/lib/state.cjs +1032 -0
  117. package/cap/bin/lib/template.cjs +231 -0
  118. package/cap/bin/lib/test-detector.cjs +62 -0
  119. package/cap/bin/lib/uat.cjs +283 -0
  120. package/cap/bin/lib/verify.cjs +889 -0
  121. package/cap/bin/lib/workspace-detector.cjs +371 -0
  122. package/cap/bin/lib/workstream.cjs +492 -0
  123. package/cap/commands/gsd/workstreams.md +63 -0
  124. package/cap/references/arc-standard.md +315 -0
  125. package/cap/references/cap-agent-architecture.md +101 -0
  126. package/cap/references/cap-gitignore-template +9 -0
  127. package/cap/references/cap-zero-deps.md +158 -0
  128. package/cap/references/checkpoints.md +778 -0
  129. package/cap/references/continuation-format.md +249 -0
  130. package/cap/references/contract-test-templates.md +312 -0
  131. package/cap/references/feature-map-template.md +25 -0
  132. package/cap/references/git-integration.md +295 -0
  133. package/cap/references/git-planning-commit.md +38 -0
  134. package/cap/references/model-profiles.md +174 -0
  135. package/cap/references/phase-numbering.md +126 -0
  136. package/cap/references/planning-config.md +202 -0
  137. package/cap/references/property-test-templates.md +316 -0
  138. package/cap/references/security-test-templates.md +347 -0
  139. package/cap/references/session-template.json +8 -0
  140. package/cap/references/tdd.md +263 -0
  141. package/cap/references/user-profiling.md +681 -0
  142. package/cap/references/verification-patterns.md +612 -0
  143. package/cap/templates/UAT.md +265 -0
  144. package/cap/templates/claude-md.md +175 -0
  145. package/cap/templates/codebase/architecture.md +255 -0
  146. package/cap/templates/codebase/concerns.md +310 -0
  147. package/cap/templates/codebase/conventions.md +307 -0
  148. package/cap/templates/codebase/integrations.md +280 -0
  149. package/cap/templates/codebase/stack.md +186 -0
  150. package/cap/templates/codebase/structure.md +285 -0
  151. package/cap/templates/codebase/testing.md +480 -0
  152. package/cap/templates/config.json +44 -0
  153. package/cap/templates/context.md +352 -0
  154. package/cap/templates/continue-here.md +78 -0
  155. package/cap/templates/copilot-instructions.md +7 -0
  156. package/cap/templates/debug-subagent-prompt.md +91 -0
  157. package/cap/templates/discussion-log.md +63 -0
  158. package/cap/templates/milestone-archive.md +123 -0
  159. package/cap/templates/milestone.md +115 -0
  160. package/cap/templates/phase-prompt.md +610 -0
  161. package/cap/templates/planner-subagent-prompt.md +117 -0
  162. package/cap/templates/project.md +186 -0
  163. package/cap/templates/requirements.md +231 -0
  164. package/cap/templates/research-project/ARCHITECTURE.md +204 -0
  165. package/cap/templates/research-project/FEATURES.md +147 -0
  166. package/cap/templates/research-project/PITFALLS.md +200 -0
  167. package/cap/templates/research-project/STACK.md +120 -0
  168. package/cap/templates/research-project/SUMMARY.md +170 -0
  169. package/cap/templates/research.md +552 -0
  170. package/cap/templates/roadmap.md +202 -0
  171. package/cap/templates/state.md +176 -0
  172. package/cap/templates/summary.md +364 -0
  173. package/cap/templates/user-preferences.md +498 -0
  174. package/cap/templates/verification-report.md +322 -0
  175. package/cap/workflows/add-phase.md +112 -0
  176. package/cap/workflows/add-tests.md +351 -0
  177. package/cap/workflows/add-todo.md +158 -0
  178. package/cap/workflows/audit-milestone.md +340 -0
  179. package/cap/workflows/audit-uat.md +109 -0
  180. package/cap/workflows/autonomous.md +891 -0
  181. package/cap/workflows/check-todos.md +177 -0
  182. package/cap/workflows/cleanup.md +152 -0
  183. package/cap/workflows/complete-milestone.md +767 -0
  184. package/cap/workflows/diagnose-issues.md +231 -0
  185. package/cap/workflows/discovery-phase.md +289 -0
  186. package/cap/workflows/discuss-phase-assumptions.md +653 -0
  187. package/cap/workflows/discuss-phase.md +1049 -0
  188. package/cap/workflows/do.md +104 -0
  189. package/cap/workflows/execute-phase.md +846 -0
  190. package/cap/workflows/execute-plan.md +514 -0
  191. package/cap/workflows/fast.md +105 -0
  192. package/cap/workflows/forensics.md +265 -0
  193. package/cap/workflows/health.md +181 -0
  194. package/cap/workflows/help.md +660 -0
  195. package/cap/workflows/insert-phase.md +130 -0
  196. package/cap/workflows/list-phase-assumptions.md +178 -0
  197. package/cap/workflows/list-workspaces.md +56 -0
  198. package/cap/workflows/manager.md +362 -0
  199. package/cap/workflows/map-codebase.md +377 -0
  200. package/cap/workflows/milestone-summary.md +223 -0
  201. package/cap/workflows/new-milestone.md +486 -0
  202. package/cap/workflows/new-project.md +1250 -0
  203. package/cap/workflows/new-workspace.md +237 -0
  204. package/cap/workflows/next.md +97 -0
  205. package/cap/workflows/node-repair.md +92 -0
  206. package/cap/workflows/note.md +156 -0
  207. package/cap/workflows/pause-work.md +176 -0
  208. package/cap/workflows/plan-milestone-gaps.md +273 -0
  209. package/cap/workflows/plan-phase.md +857 -0
  210. package/cap/workflows/plant-seed.md +169 -0
  211. package/cap/workflows/pr-branch.md +129 -0
  212. package/cap/workflows/profile-user.md +449 -0
  213. package/cap/workflows/progress.md +507 -0
  214. package/cap/workflows/quick.md +757 -0
  215. package/cap/workflows/remove-phase.md +155 -0
  216. package/cap/workflows/remove-workspace.md +90 -0
  217. package/cap/workflows/research-phase.md +82 -0
  218. package/cap/workflows/resume-project.md +326 -0
  219. package/cap/workflows/review.md +228 -0
  220. package/cap/workflows/session-report.md +146 -0
  221. package/cap/workflows/settings.md +283 -0
  222. package/cap/workflows/ship.md +228 -0
  223. package/cap/workflows/stats.md +60 -0
  224. package/cap/workflows/transition.md +671 -0
  225. package/cap/workflows/ui-phase.md +298 -0
  226. package/cap/workflows/ui-review.md +161 -0
  227. package/cap/workflows/update.md +323 -0
  228. package/cap/workflows/validate-phase.md +170 -0
  229. package/cap/workflows/verify-phase.md +254 -0
  230. package/cap/workflows/verify-work.md +637 -0
  231. package/commands/cap/annotate.md +165 -0
  232. package/commands/cap/brainstorm.md +393 -0
  233. package/commands/cap/checkpoint.md +106 -0
  234. package/commands/cap/completeness.md +94 -0
  235. package/commands/cap/continue.md +72 -0
  236. package/commands/cap/debug.md +588 -0
  237. package/commands/cap/deps.md +169 -0
  238. package/commands/cap/design.md +479 -0
  239. package/commands/cap/init.md +354 -0
  240. package/commands/cap/iterate.md +249 -0
  241. package/commands/cap/learn.md +459 -0
  242. package/commands/cap/memory.md +275 -0
  243. package/commands/cap/migrate-feature-map.md +91 -0
  244. package/commands/cap/migrate-memory.md +108 -0
  245. package/commands/cap/migrate-tags.md +91 -0
  246. package/commands/cap/migrate.md +131 -0
  247. package/commands/cap/prototype.md +510 -0
  248. package/commands/cap/reconcile.md +121 -0
  249. package/commands/cap/review.md +360 -0
  250. package/commands/cap/save.md +72 -0
  251. package/commands/cap/scan.md +404 -0
  252. package/commands/cap/start.md +356 -0
  253. package/commands/cap/status.md +118 -0
  254. package/commands/cap/test-audit.md +262 -0
  255. package/commands/cap/test.md +394 -0
  256. package/commands/cap/trace.md +133 -0
  257. package/commands/cap/ui.md +167 -0
  258. package/hooks/dist/cap-check-update.js +115 -0
  259. package/hooks/dist/cap-context-monitor.js +185 -0
  260. package/hooks/dist/cap-learn-review-hook.js +114 -0
  261. package/hooks/dist/cap-learning-hook.js +192 -0
  262. package/hooks/dist/cap-memory.js +299 -0
  263. package/hooks/dist/cap-prompt-guard.js +97 -0
  264. package/hooks/dist/cap-statusline.js +157 -0
  265. package/hooks/dist/cap-tag-observer.js +115 -0
  266. package/hooks/dist/cap-version-check.js +112 -0
  267. package/hooks/dist/cap-workflow-guard.js +175 -0
  268. package/hooks/hooks.json +55 -0
  269. package/package.json +58 -0
  270. package/scripts/base64-scan.sh +262 -0
  271. package/scripts/build-hooks.js +93 -0
  272. package/scripts/cap-removal-checklist.md +202 -0
  273. package/scripts/prompt-injection-scan.sh +199 -0
  274. package/scripts/run-tests.cjs +181 -0
  275. package/scripts/secret-scan.sh +227 -0
@@ -0,0 +1,1038 @@
1
+ // @cap-feature(feature:F-037) Semantic Analysis Pipeline — 3-stage pipeline for computing thread similarity via TF-IDF, concept taxonomy, and graph propagation
2
+ // @cap-decision Pure logic module with zero I/O and zero dependencies. All functions accept data as input and return numeric scores.
3
+ // @cap-decision Three-stage architecture: Stage 1 (text signals) provides lexical similarity, Stage 2 (concept signals) provides semantic similarity via taxonomy, Stage 3 (graph propagation) discovers transitive connections.
4
+ // @cap-decision Weights within Stage 1 are TF-IDF=0.5, N-gram=0.2, Jaccard=0.1; Stage 2 concept vector=0.2. These sum to 1.0 and represent the full pipeline blend.
5
+
6
+ 'use strict';
7
+
8
+ // --- Types ---
9
+
10
+ /**
11
+ * @typedef {Object} Thread
12
+ * @property {string} id - Thread ID (thr-XXXX)
13
+ * @property {string} problemStatement - Problem being explored
14
+ * @property {string} solutionShape - Solution direction
15
+ * @property {string[]} boundaryDecisions - Key decisions
16
+ * @property {string[]} featureIds - Associated feature IDs
17
+ * @property {string[]} keywords - Problem-space keywords
18
+ */
19
+
20
+ /**
21
+ * @typedef {Object} MemoryGraph
22
+ * @property {Object<string, GraphNode>} nodes
23
+ * @property {GraphEdge[]} edges
24
+ */
25
+
26
+ /**
27
+ * @typedef {Object} GraphNode
28
+ * @property {string} type
29
+ * @property {string} id
30
+ * @property {string} label
31
+ * @property {boolean} active
32
+ * @property {Object} metadata
33
+ */
34
+
35
+ /**
36
+ * @typedef {Object} GraphEdge
37
+ * @property {string} source
38
+ * @property {string} target
39
+ * @property {string} type
40
+ * @property {boolean} active
41
+ * @property {Object} metadata
42
+ */
43
+
44
+ /**
45
+ * @typedef {Object} Corpus
46
+ * @property {Map<string, number>} docFrequency - term -> number of docs containing it
47
+ * @property {number} docCount - total documents in corpus
48
+ */
49
+
50
+ /**
51
+ * @typedef {Object<string, number>} SparseVector
52
+ * Map of term -> TF-IDF weight
53
+ */
54
+
55
+ /**
56
+ * @typedef {Object} CooccurrenceEntry
57
+ * @property {number} count - Times this concept pair co-occurred
58
+ * @property {string[]} threads - Thread IDs where co-occurrence was observed
59
+ */
60
+
61
+ /**
62
+ * @typedef {Object<string, CooccurrenceEntry>} CooccurrenceMatrix
63
+ * Key format: "conceptA|conceptB" (alphabetically ordered)
64
+ */
65
+
66
+ /**
67
+ * @typedef {Object} Stage1Result
68
+ * @property {number} tfidf - TF-IDF cosine similarity (weight 0.5)
69
+ * @property {number} ngram - Trigram overlap (weight 0.2)
70
+ * @property {number} jaccard - Keyword Jaccard (weight 0.1)
71
+ * @property {number} combined - Weighted combination
72
+ */
73
+
74
+ /**
75
+ * @typedef {Object} Stage2Result
76
+ * @property {number} conceptSim - Concept vector cosine similarity
77
+ * @property {number} combined - Weighted combination (weight 0.2)
78
+ */
79
+
80
+ /**
81
+ * @typedef {Object} PipelineResult
82
+ * @property {Stage1Result} stage1 - Text signal scores
83
+ * @property {Stage2Result} stage2 - Concept signal scores
84
+ * @property {Object<string, number>} stage3 - Propagated scores keyed by thread-pair ID
85
+ * @property {number} finalScore - Full pipeline score (0.0-1.0)
86
+ */
87
+
88
+ /**
89
+ * @typedef {Object} PipelineContext
90
+ * @property {Thread[]} allThreads - All threads for corpus building
91
+ * @property {MemoryGraph} [graph] - Memory graph for Stage 3
92
+ * @property {Object<string, string[]>} [taxonomy] - Optional taxonomy override
93
+ * @property {Object} [propagationOptions] - { iterations: number, damping: number }
94
+ */
95
+
96
+ // --- Stop Words ---
97
+
98
+ /** @type {Set<string>} */
99
+ const STOP_WORDS = new Set([
100
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
101
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
102
+ 'should', 'may', 'might', 'shall', 'can', 'need', 'must', 'ought',
103
+ 'and', 'but', 'or', 'nor', 'not', 'so', 'yet', 'both', 'either',
104
+ 'neither', 'each', 'every', 'all', 'any', 'few', 'more', 'most',
105
+ 'other', 'some', 'such', 'no', 'only', 'own', 'same', 'than',
106
+ 'too', 'very', 'just', 'because', 'as', 'until', 'while', 'of',
107
+ 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'through',
108
+ 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up',
109
+ 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again',
110
+ 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why',
111
+ 'how', 'what', 'which', 'who', 'whom', 'this', 'that', 'these',
112
+ 'those', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'you',
113
+ 'your', 'yours', 'he', 'him', 'his', 'she', 'her', 'hers', 'it',
114
+ 'its', 'they', 'them', 'their', 'theirs', 'also', 'into', 'if',
115
+ ]);
116
+
117
+ // --- Stage 1 Pipeline Weights ---
118
+ // @cap-decision Stage 1 weights: TF-IDF dominates at 0.5 because term frequency is the strongest lexical signal. N-gram at 0.2 handles typos/morphology. Jaccard at 0.1 is a simple fallback. Remaining 0.2 goes to Stage 2 concept similarity.
119
+
120
+ const STAGE1_WEIGHT_TFIDF = 0.5;
121
+ const STAGE1_WEIGHT_NGRAM = 0.2;
122
+ const STAGE1_WEIGHT_JACCARD = 0.1;
123
+ const STAGE2_WEIGHT_CONCEPT = 0.2;
124
+
125
+ // ============================================================================
126
+ // Stage 1: Text Signals
127
+ // ============================================================================
128
+
129
+ // --- Tokenization ---
130
+
131
+ /**
132
+ * Tokenize text into lowercase terms, filtering stop words and short tokens.
133
+ * @param {string} text - Raw text input
134
+ * @returns {string[]} Array of tokens (may contain duplicates for TF counting)
135
+ */
136
+ // @cap-todo(ac:F-037/AC-1) Tokenizer shared by TF-IDF and Jaccard stages
137
+ function tokenize(text) {
138
+ if (!text || typeof text !== 'string') return [];
139
+ return text
140
+ .toLowerCase()
141
+ .replace(/[^a-z0-9\s-]/g, ' ')
142
+ .split(/\s+/)
143
+ .filter(w => w.length >= 3 && !STOP_WORDS.has(w));
144
+ }
145
+
146
+ /**
147
+ * Extract the full searchable text from a thread object.
148
+ * Concatenates problemStatement, solutionShape, and boundaryDecisions.
149
+ * @param {Thread} thread
150
+ * @returns {string}
151
+ */
152
+ function getThreadText(thread) {
153
+ const parts = [];
154
+ if (thread.problemStatement) parts.push(thread.problemStatement);
155
+ if (thread.solutionShape) parts.push(thread.solutionShape);
156
+ if (Array.isArray(thread.boundaryDecisions)) {
157
+ parts.push(thread.boundaryDecisions.join(' '));
158
+ }
159
+ return parts.join(' ');
160
+ }
161
+
162
+ // --- TF-IDF (AC-1) ---
163
+
164
+ /**
165
+ * Build a corpus from an array of threads for IDF computation.
166
+ * @param {Thread[]} threads - All threads in the system
167
+ * @returns {Corpus} Corpus with document frequency map and document count
168
+ */
169
+ // @cap-todo(ac:F-037/AC-1) Build corpus from all thread texts for IDF calculation
170
+ function buildCorpus(threads) {
171
+ /** @type {Map<string, number>} */
172
+ const docFrequency = new Map();
173
+ let docCount = 0;
174
+
175
+ for (const thread of threads) {
176
+ const text = getThreadText(thread);
177
+ const tokens = tokenize(text);
178
+ // Deduplicate tokens per document for DF counting
179
+ const uniqueTerms = new Set(tokens);
180
+ for (const term of uniqueTerms) {
181
+ docFrequency.set(term, (docFrequency.get(term) || 0) + 1);
182
+ }
183
+ docCount++;
184
+ }
185
+
186
+ return { docFrequency, docCount };
187
+ }
188
+
189
+ /**
190
+ * Compute a TF-IDF vector for a given text against a corpus.
191
+ * TF(term, doc) = frequency / total terms in doc
192
+ * IDF(term, corpus) = log(N / (1 + docs containing term))
193
+ * @param {string} text - Text to vectorize
194
+ * @param {Corpus} corpus - Pre-built corpus
195
+ * @returns {Map<string, number>} Sparse TF-IDF vector
196
+ */
197
+ // @cap-todo(ac:F-037/AC-1) TF-IDF vector computation: TF * IDF with +1 smoothing on IDF denominator
198
+ function computeTfIdfVector(text, corpus) {
199
+ const tokens = tokenize(text);
200
+ /** @type {Map<string, number>} */
201
+ const vector = new Map();
202
+
203
+ if (tokens.length === 0) return vector;
204
+
205
+ // Count term frequencies
206
+ /** @type {Map<string, number>} */
207
+ const termCounts = new Map();
208
+ for (const token of tokens) {
209
+ termCounts.set(token, (termCounts.get(token) || 0) + 1);
210
+ }
211
+
212
+ const totalTerms = tokens.length;
213
+ const N = corpus.docCount;
214
+
215
+ for (const [term, count] of termCounts) {
216
+ const tf = count / totalTerms;
217
+ const df = corpus.docFrequency.get(term) || 0;
218
+ // +1 in denominator avoids division by zero for unknown terms
219
+ const idf = Math.log(N / (1 + df));
220
+ const tfidf = tf * idf;
221
+ if (tfidf > 0) {
222
+ vector.set(term, tfidf);
223
+ }
224
+ }
225
+
226
+ return vector;
227
+ }
228
+
229
+ /**
230
+ * Compute cosine similarity between two sparse vectors.
231
+ * cosine = dot(A, B) / (|A| * |B|)
232
+ * @param {Map<string, number>} vecA - First sparse vector
233
+ * @param {Map<string, number>} vecB - Second sparse vector
234
+ * @returns {number} Cosine similarity (0.0-1.0)
235
+ */
236
+ function cosineSimilarity(vecA, vecB) {
237
+ if (vecA.size === 0 || vecB.size === 0) return 0;
238
+
239
+ let dotProduct = 0;
240
+ let normA = 0;
241
+ let normB = 0;
242
+
243
+ // Iterate over the smaller vector for efficiency
244
+ const [smaller, larger] = vecA.size <= vecB.size ? [vecA, vecB] : [vecB, vecA];
245
+
246
+ for (const [term, valA] of smaller) {
247
+ const valB = larger.get(term);
248
+ if (valB !== undefined) {
249
+ dotProduct += valA * valB;
250
+ }
251
+ }
252
+
253
+ for (const val of vecA.values()) {
254
+ normA += val * val;
255
+ }
256
+ for (const val of vecB.values()) {
257
+ normB += val * val;
258
+ }
259
+
260
+ normA = Math.sqrt(normA);
261
+ normB = Math.sqrt(normB);
262
+
263
+ if (normA === 0 || normB === 0) return 0;
264
+
265
+ return clamp01(dotProduct / (normA * normB));
266
+ }
267
+
268
+ /**
269
+ * Compute TF-IDF cosine similarity between two threads.
270
+ * @param {Thread} threadA
271
+ * @param {Thread} threadB
272
+ * @param {Corpus} corpus - Pre-built corpus from all threads
273
+ * @returns {number} Cosine similarity (0.0-1.0)
274
+ */
275
+ // @cap-todo(ac:F-037/AC-1) TF-IDF cosine similarity with weight 0.5 in the pipeline blend
276
+ function tfidfSimilarity(threadA, threadB, corpus) {
277
+ const textA = getThreadText(threadA);
278
+ const textB = getThreadText(threadB);
279
+ const vecA = computeTfIdfVector(textA, corpus);
280
+ const vecB = computeTfIdfVector(textB, corpus);
281
+ return cosineSimilarity(vecA, vecB);
282
+ }
283
+
284
+ // --- Character N-Gram Overlap (AC-2) ---
285
+
286
+ /**
287
+ * Extract character trigrams from text.
288
+ * "session" -> Set(["ses", "ess", "ssi", "sio", "ion"])
289
+ * @param {string} text - Input text
290
+ * @returns {Set<string>} Set of character trigrams
291
+ */
292
+ // @cap-todo(ac:F-037/AC-2) Trigram extraction for typo-resilient matching
293
+ function extractTrigrams(text) {
294
+ if (!text || typeof text !== 'string') return new Set();
295
+
296
+ const normalized = text.toLowerCase().replace(/[^a-z0-9]/g, '');
297
+ const trigrams = new Set();
298
+
299
+ for (let i = 0; i <= normalized.length - 3; i++) {
300
+ trigrams.add(normalized.substring(i, i + 3));
301
+ }
302
+
303
+ return trigrams;
304
+ }
305
+
306
+ /**
307
+ * Compute trigram-based Jaccard similarity between two texts.
308
+ * Catches morphological variants and typos: "authenticate" <-> "authentication".
309
+ * @param {string} textA
310
+ * @param {string} textB
311
+ * @returns {number} Similarity score (0.0-1.0)
312
+ */
313
+ // @cap-todo(ac:F-037/AC-2) Character N-Gram overlap with weight 0.2 for typo-resilient matching
314
+ function trigramSimilarity(textA, textB) {
315
+ const gramsA = extractTrigrams(textA);
316
+ const gramsB = extractTrigrams(textB);
317
+
318
+ if (gramsA.size === 0 && gramsB.size === 0) return 0;
319
+ if (gramsA.size === 0 || gramsB.size === 0) return 0;
320
+
321
+ let intersectionSize = 0;
322
+ for (const gram of gramsA) {
323
+ if (gramsB.has(gram)) intersectionSize++;
324
+ }
325
+
326
+ const unionSize = new Set([...gramsA, ...gramsB]).size;
327
+ if (unionSize === 0) return 0;
328
+
329
+ return clamp01(intersectionSize / unionSize);
330
+ }
331
+
332
+ // --- Jaccard Keywords (AC-3) ---
333
+
334
+ /**
335
+ * Compute Jaccard similarity over keyword sets from two threads.
336
+ * Uses thread.keywords arrays directly.
337
+ * @param {string[]} keywordsA - Keywords from thread A
338
+ * @param {string[]} keywordsB - Keywords from thread B
339
+ * @returns {number} Jaccard similarity (0.0-1.0)
340
+ */
341
+ // @cap-todo(ac:F-037/AC-3) Jaccard keyword similarity with weight 0.1 as simple fallback signal
342
+ function jaccardKeywordSimilarity(keywordsA, keywordsB) {
343
+ const setA = new Set((keywordsA || []).map(k => k.toLowerCase()));
344
+ const setB = new Set((keywordsB || []).map(k => k.toLowerCase()));
345
+
346
+ if (setA.size === 0 && setB.size === 0) return 0;
347
+
348
+ let intersectionSize = 0;
349
+ for (const kw of setA) {
350
+ if (setB.has(kw)) intersectionSize++;
351
+ }
352
+
353
+ const unionSize = new Set([...setA, ...setB]).size;
354
+ if (unionSize === 0) return 0;
355
+
356
+ return clamp01(intersectionSize / unionSize);
357
+ }
358
+
359
+ // --- Stage 1 Combined ---
360
+
361
+ /**
362
+ * Compute all Stage 1 text signals for a thread pair.
363
+ * @param {Thread} threadA
364
+ * @param {Thread} threadB
365
+ * @param {Corpus} corpus - Pre-built corpus
366
+ * @returns {Stage1Result}
367
+ */
368
+ function computeStage1(threadA, threadB, corpus) {
369
+ const tfidf = tfidfSimilarity(threadA, threadB, corpus);
370
+
371
+ const textA = getThreadText(threadA);
372
+ const textB = getThreadText(threadB);
373
+ const ngram = trigramSimilarity(textA, textB);
374
+
375
+ const jaccard = jaccardKeywordSimilarity(threadA.keywords, threadB.keywords);
376
+
377
+ const combined = (tfidf * STAGE1_WEIGHT_TFIDF)
378
+ + (ngram * STAGE1_WEIGHT_NGRAM)
379
+ + (jaccard * STAGE1_WEIGHT_JACCARD);
380
+
381
+ return { tfidf, ngram, jaccard, combined };
382
+ }
383
+
384
+ // ============================================================================
385
+ // Stage 2: Concept Signals
386
+ // ============================================================================
387
+
388
+ // --- Seed Taxonomy (AC-4) ---
389
+ // @cap-todo(ac:F-037/AC-4) Embedded seed taxonomy of 25 universal software development concepts, no external config
390
+ // @cap-decision Taxonomy concepts chosen for breadth across typical software projects. Keywords are lowercase stems/fragments that trigger concept association.
391
+
392
+ const SEED_TAXONOMY = {
393
+ 'authentication': ['auth', 'login', 'logout', 'session', 'token', 'jwt', 'oauth', 'sso', 'password', 'credential'],
394
+ 'authorization': ['permission', 'role', 'access', 'policy', 'rbac', 'rls', 'acl', 'grant'],
395
+ 'database': ['sql', 'query', 'table', 'column', 'migration', 'schema', 'index', 'foreign', 'constraint'],
396
+ 'api': ['endpoint', 'route', 'request', 'response', 'rest', 'graphql', 'middleware', 'handler'],
397
+ 'testing': ['test', 'assert', 'mock', 'stub', 'coverage', 'vitest', 'jest', 'spec'],
398
+ 'caching': ['cache', 'redis', 'ttl', 'invalidate', 'stale', 'refresh', 'memoize'],
399
+ 'deployment': ['deploy', 'pipeline', 'docker', 'container', 'kubernetes', 'staging', 'production'],
400
+ 'ui-frontend': ['component', 'render', 'react', 'vue', 'svelte', 'tailwind', 'css', 'layout', 'responsive'],
401
+ 'state-management': ['state', 'store', 'reducer', 'context', 'redux', 'zustand', 'signal'],
402
+ 'file-io': ['file', 'read', 'write', 'stream', 'buffer', 'upload', 'download', 'storage'],
403
+ 'error-handling': ['error', 'exception', 'catch', 'throw', 'retry', 'fallback', 'timeout'],
404
+ 'configuration': ['config', 'env', 'environment', 'setting', 'option', 'flag', 'feature-flag'],
405
+ 'logging': ['log', 'debug', 'trace', 'monitor', 'observability', 'metric', 'alert'],
406
+ 'security': ['encrypt', 'hash', 'csrf', 'xss', 'injection', 'sanitize', 'vulnerability', 'secure'],
407
+ 'performance': ['optimize', 'latency', 'throughput', 'benchmark', 'profile', 'memory', 'cpu'],
408
+ 'data-validation': ['validate', 'schema', 'zod', 'type', 'check', 'constraint', 'format'],
409
+ 'messaging': ['queue', 'event', 'publish', 'subscribe', 'webhook', 'notification', 'email'],
410
+ 'search': ['search', 'index', 'filter', 'sort', 'paginate', 'fulltext'],
411
+ 'version-control': ['git', 'branch', 'commit', 'merge', 'rebase', 'diff', 'conflict'],
412
+ 'documentation': ['docs', 'readme', 'comment', 'jsdoc', 'markdown', 'changelog'],
413
+ 'build-tooling': ['build', 'bundle', 'compile', 'transpile', 'webpack', 'esbuild', 'vite'],
414
+ 'networking': ['http', 'socket', 'websocket', 'fetch', 'cors', 'proxy', 'ssl', 'tls'],
415
+ 'serialization': ['json', 'parse', 'stringify', 'serialize', 'deserialize', 'encode', 'decode'],
416
+ 'concurrency': ['async', 'await', 'promise', 'parallel', 'worker', 'thread', 'mutex', 'lock'],
417
+ 'migration': ['migrate', 'upgrade', 'backward', 'compatible', 'version', 'legacy', 'deprecate'],
418
+ };
419
+
420
+ /** Concept names in stable order for vector indexing. */
421
+ const CONCEPT_NAMES = Object.keys(SEED_TAXONOMY).sort();
422
+
423
+ // --- Co-occurrence Matrix (AC-5) ---
424
+
425
+ /**
426
+ * Build a co-occurrence matrix from observed thread data.
427
+ * Tracks which concept pairs appear together across threads.
428
+ * @param {Thread[]} threads - All threads to analyze
429
+ * @param {Object<string, string[]>} [taxonomy] - Taxonomy to use (defaults to SEED_TAXONOMY)
430
+ * @returns {CooccurrenceMatrix} Matrix keyed by "conceptA|conceptB"
431
+ */
432
+ // @cap-todo(ac:F-037/AC-5) Co-occurrence matrix auto-learns from observed thread data
433
+ function buildCooccurrenceMatrix(threads, taxonomy) {
434
+ const tax = taxonomy || SEED_TAXONOMY;
435
+ /** @type {CooccurrenceMatrix} */
436
+ const matrix = {};
437
+
438
+ for (const thread of threads) {
439
+ const text = getThreadText(thread);
440
+ const tokens = new Set(tokenize(text));
441
+
442
+ // Identify which concepts are present in this thread
443
+ const presentConcepts = [];
444
+ for (const [concept, keywords] of Object.entries(tax)) {
445
+ const hits = keywords.filter(kw => tokens.has(kw) || textContainsKeyword(text, kw));
446
+ if (hits.length > 0) {
447
+ presentConcepts.push(concept);
448
+ }
449
+ }
450
+
451
+ // Record co-occurrences for every pair of present concepts
452
+ for (let i = 0; i < presentConcepts.length; i++) {
453
+ for (let j = i + 1; j < presentConcepts.length; j++) {
454
+ const key = makeCooccurrenceKey(presentConcepts[i], presentConcepts[j]);
455
+ if (!matrix[key]) {
456
+ matrix[key] = { count: 0, threads: [] };
457
+ }
458
+ matrix[key].count++;
459
+ matrix[key].threads.push(thread.id);
460
+ }
461
+ }
462
+ }
463
+
464
+ return matrix;
465
+ }
466
+
467
+ /**
468
+ * Check if text contains a keyword (case-insensitive substring match).
469
+ * Used for taxonomy keywords that might be substrings of larger words.
470
+ * @param {string} text
471
+ * @param {string} keyword
472
+ * @returns {boolean}
473
+ */
474
+ function textContainsKeyword(text, keyword) {
475
+ return text.toLowerCase().indexOf(keyword.toLowerCase()) !== -1;
476
+ }
477
+
478
+ /**
479
+ * Create a stable co-occurrence key from two concept names.
480
+ * Alphabetically ordered to ensure "a|b" === "b|a".
481
+ * @param {string} conceptA
482
+ * @param {string} conceptB
483
+ * @returns {string}
484
+ */
485
+ function makeCooccurrenceKey(conceptA, conceptB) {
486
+ return conceptA < conceptB
487
+ ? `${conceptA}|${conceptB}`
488
+ : `${conceptB}|${conceptA}`;
489
+ }
490
+
491
+ /**
492
+ * Get confirmed concept pairs that have co-occurred at or above a threshold.
493
+ * @param {CooccurrenceMatrix} matrix
494
+ * @param {number} [threshold=5] - Minimum co-occurrence count
495
+ * @returns {Array<{key: string, count: number, concepts: [string, string]}>}
496
+ */
497
+ // @cap-todo(ac:F-037/AC-5) Confirmed pairs override seed weights at >= 5 co-occurrences
498
+ function getConfirmedPairs(matrix, threshold) {
499
+ const minCount = typeof threshold === 'number' ? threshold : 5;
500
+ const confirmed = [];
501
+
502
+ for (const [key, entry] of Object.entries(matrix)) {
503
+ if (entry.count >= minCount) {
504
+ const [conceptA, conceptB] = key.split('|');
505
+ confirmed.push({ key, count: entry.count, concepts: [conceptA, conceptB] });
506
+ }
507
+ }
508
+
509
+ return confirmed.sort((a, b) => b.count - a.count);
510
+ }
511
+
512
+ // --- Concept Vector Projection (AC-6) ---
513
+
514
+ /**
515
+ * Project thread text into concept space using the taxonomy.
516
+ * For each concept, score = number of matching keywords found in the text,
517
+ * normalized by the total keyword count for that concept.
518
+ * @param {string} text - Thread text
519
+ * @param {Object<string, string[]>} [taxonomy] - Taxonomy to use
520
+ * @returns {Map<string, number>} Concept vector (concept name -> score)
521
+ */
522
+ // @cap-todo(ac:F-037/AC-6) Concept vector similarity via concept space projection + cosine distance
523
+ function projectToConcepts(text, taxonomy) {
524
+ const tax = taxonomy || SEED_TAXONOMY;
525
+ /** @type {Map<string, number>} */
526
+ const vector = new Map();
527
+
528
+ if (!text || typeof text !== 'string') return vector;
529
+
530
+ const lowerText = text.toLowerCase();
531
+
532
+ for (const [concept, keywords] of Object.entries(tax)) {
533
+ let matchCount = 0;
534
+ for (const kw of keywords) {
535
+ if (lowerText.indexOf(kw) !== -1) {
536
+ matchCount++;
537
+ }
538
+ }
539
+ // Normalize by keyword list length to avoid bias toward concepts with more keywords
540
+ const score = keywords.length > 0 ? matchCount / keywords.length : 0;
541
+ if (score > 0) {
542
+ vector.set(concept, score);
543
+ }
544
+ }
545
+
546
+ return vector;
547
+ }
548
+
549
+ /**
550
+ * Apply co-occurrence boost to concept vectors.
551
+ * When confirmed pairs are found, boost the weaker concept in the pair
552
+ * based on the co-occurrence strength.
553
+ * @param {Map<string, number>} vector - Original concept vector
554
+ * @param {CooccurrenceMatrix} matrix - Co-occurrence data
555
+ * @param {number} [threshold=5] - Minimum co-occurrences to trigger boost
556
+ * @returns {Map<string, number>} Boosted concept vector
557
+ */
558
+ // @cap-decision Co-occurrence boost adds 0.1 * (count/maxCount) to the weaker concept in a confirmed pair. This is a gentle nudge, not an override, to preserve the seed taxonomy signal.
559
+ function applyCooccurrenceBoost(vector, matrix, threshold) {
560
+ const confirmed = getConfirmedPairs(matrix, threshold);
561
+ if (confirmed.length === 0) return vector;
562
+
563
+ const boosted = new Map(vector);
564
+ const maxCount = confirmed[0].count; // Already sorted descending
565
+
566
+ for (const pair of confirmed) {
567
+ const [conceptA, conceptB] = pair.concepts;
568
+ const scoreA = boosted.get(conceptA) || 0;
569
+ const scoreB = boosted.get(conceptB) || 0;
570
+
571
+ // Only boost if at least one concept is present in the vector
572
+ if (scoreA > 0 || scoreB > 0) {
573
+ const boostFactor = 0.1 * (pair.count / maxCount);
574
+
575
+ // Boost the weaker concept toward the stronger one
576
+ if (scoreA > 0 && scoreB === 0) {
577
+ boosted.set(conceptB, boostFactor * scoreA);
578
+ } else if (scoreB > 0 && scoreA === 0) {
579
+ boosted.set(conceptA, boostFactor * scoreB);
580
+ }
581
+ // If both present, no boost needed — they already co-occur
582
+ }
583
+ }
584
+
585
+ return boosted;
586
+ }
587
+
588
+ /**
589
+ * Compute concept vector similarity between two threads.
590
+ * Projects both threads into concept space, applies co-occurrence boost,
591
+ * then computes cosine similarity.
592
+ * @param {Thread} threadA
593
+ * @param {Thread} threadB
594
+ * @param {Object<string, string[]>} [taxonomy]
595
+ * @param {CooccurrenceMatrix} [cooccurrenceMatrix]
596
+ * @returns {number} Concept similarity (0.0-1.0)
597
+ */
598
+ function conceptVectorSimilarity(threadA, threadB, taxonomy, cooccurrenceMatrix) {
599
+ const tax = taxonomy || SEED_TAXONOMY;
600
+ const textA = getThreadText(threadA);
601
+ const textB = getThreadText(threadB);
602
+
603
+ let vecA = projectToConcepts(textA, tax);
604
+ let vecB = projectToConcepts(textB, tax);
605
+
606
+ // Apply co-occurrence boost if matrix is available
607
+ if (cooccurrenceMatrix) {
608
+ vecA = applyCooccurrenceBoost(vecA, cooccurrenceMatrix);
609
+ vecB = applyCooccurrenceBoost(vecB, cooccurrenceMatrix);
610
+ }
611
+
612
+ return cosineSimilarity(vecA, vecB);
613
+ }
614
+
615
+ // --- Stage 2 Combined ---
616
+
617
+ /**
618
+ * Compute all Stage 2 concept signals for a thread pair.
619
+ * @param {Thread} threadA
620
+ * @param {Thread} threadB
621
+ * @param {Thread[]} allThreads - All threads for co-occurrence matrix
622
+ * @param {Object<string, string[]>} [taxonomy]
623
+ * @returns {Stage2Result}
624
+ */
625
+ function computeStage2(threadA, threadB, allThreads, taxonomy) {
626
+ const tax = taxonomy || SEED_TAXONOMY;
627
+ const matrix = buildCooccurrenceMatrix(allThreads, tax);
628
+ const conceptSim = conceptVectorSimilarity(threadA, threadB, tax, matrix);
629
+
630
+ return {
631
+ conceptSim,
632
+ combined: conceptSim * STAGE2_WEIGHT_CONCEPT,
633
+ };
634
+ }
635
+
636
+ // ============================================================================
637
+ // Stage 3: Graph Propagation
638
+ // ============================================================================
639
+
640
+ // @cap-todo(ac:F-037/AC-7) Iterative relaxation propagates affinity scores through memory graph edges
641
+
642
+ /**
643
+ * Find the graph node ID for a thread by its thread ID.
644
+ * @param {MemoryGraph} graph
645
+ * @param {string} threadId
646
+ * @returns {string|null}
647
+ */
648
+ function findThreadNodeId(graph, threadId) {
649
+ for (const [nodeId, node] of Object.entries(graph.nodes || {})) {
650
+ if (node.type === 'thread' && node.metadata && node.metadata.threadId === threadId) {
651
+ return nodeId;
652
+ }
653
+ }
654
+ return null;
655
+ }
656
+
657
+ /**
658
+ * Get all active neighbor node IDs for a given node.
659
+ * Returns map of neighborId -> edge weight (from metadata.compositeScore or 1.0).
660
+ * @param {MemoryGraph} graph
661
+ * @param {string} nodeId
662
+ * @returns {Map<string, number>} neighborId -> edge weight
663
+ */
664
+ function getWeightedNeighbors(graph, nodeId) {
665
+ const neighbors = new Map();
666
+ for (const edge of (graph.edges || [])) {
667
+ if (!edge.active) continue;
668
+ let neighborId = null;
669
+ if (edge.source === nodeId) neighborId = edge.target;
670
+ else if (edge.target === nodeId) neighborId = edge.source;
671
+ if (neighborId) {
672
+ // Use affinity score as edge weight if available, otherwise 1.0
673
+ const weight = (edge.metadata && typeof edge.metadata.compositeScore === 'number')
674
+ ? edge.metadata.compositeScore
675
+ : 1.0;
676
+ // Keep the strongest edge if multiple edges connect the same pair
677
+ const existing = neighbors.get(neighborId) || 0;
678
+ if (weight > existing) {
679
+ neighbors.set(neighborId, weight);
680
+ }
681
+ }
682
+ }
683
+ return neighbors;
684
+ }
685
+
686
+ /**
687
+ * Propagate affinity scores through the memory graph using iterative relaxation.
688
+ *
689
+ * Algorithm:
690
+ * 1. Initialize scores from direct pairwise similarities (initialScores)
691
+ * 2. For each iteration:
692
+ * a. For each thread node, collect neighbor scores weighted by edge strength
693
+ * b. New score = damping * neighborContribution + (1 - damping) * initialScore
694
+ * 3. Return final propagated scores
695
+ *
696
+ * This strengthens connections between threads that share many intermediaries
697
+ * and weakens false connections that lack graph support.
698
+ *
699
+ * @param {MemoryGraph} graph - The memory graph with nodes and weighted edges
700
+ * @param {Object<string, number>} initialScores - Keyed by "threadIdA|threadIdB", values 0.0-1.0
701
+ * @param {Object} [options]
702
+ * @param {number} [options.iterations=5] - Number of relaxation iterations (3-5 recommended)
703
+ * @param {number} [options.damping=0.7] - Damping factor (0.0-1.0). Higher = more propagation influence.
704
+ * @returns {Object<string, number>} Propagated scores keyed the same as initialScores
705
+ */
706
+ // @cap-todo(ac:F-037/AC-7) Graph propagation: 3-5 iterations, damping 0.7
707
+ function propagateScores(graph, initialScores, options) {
708
+ const iterations = (options && typeof options.iterations === 'number') ? options.iterations : 5;
709
+ const damping = (options && typeof options.damping === 'number') ? options.damping : 0.7;
710
+
711
+ if (!graph || !graph.nodes || !initialScores) {
712
+ return { ...(initialScores || {}) };
713
+ }
714
+
715
+ // Build a lookup of thread ID -> graph node ID
716
+ /** @type {Map<string, string>} threadId -> nodeId */
717
+ const threadToNode = new Map();
718
+ for (const [nodeId, node] of Object.entries(graph.nodes)) {
719
+ if (node.type === 'thread' && node.metadata && node.metadata.threadId) {
720
+ threadToNode.set(node.metadata.threadId, nodeId);
721
+ }
722
+ }
723
+
724
+ // Build adjacency with weights for all thread nodes
725
+ /** @type {Map<string, Map<string, number>>} nodeId -> Map(neighborNodeId -> weight) */
726
+ const adjacency = new Map();
727
+ for (const nodeId of threadToNode.values()) {
728
+ adjacency.set(nodeId, getWeightedNeighbors(graph, nodeId));
729
+ }
730
+
731
+ // Build a nodeId -> threadId reverse lookup
732
+ /** @type {Map<string, string>} */
733
+ const nodeToThread = new Map();
734
+ for (const [tid, nid] of threadToNode) {
735
+ nodeToThread.set(nid, tid);
736
+ }
737
+
738
+ // Current scores — start from initial
739
+ let currentScores = { ...initialScores };
740
+
741
+ // Iterative relaxation
742
+ for (let iter = 0; iter < iterations; iter++) {
743
+ const nextScores = {};
744
+
745
+ for (const [pairKey, initialScore] of Object.entries(initialScores)) {
746
+ const [tidA, tidB] = pairKey.split('|');
747
+ const nodeA = threadToNode.get(tidA);
748
+ const nodeB = threadToNode.get(tidB);
749
+
750
+ if (!nodeA || !nodeB) {
751
+ nextScores[pairKey] = initialScore;
752
+ continue;
753
+ }
754
+
755
+ // Compute neighbor contribution: average of scores between
756
+ // nodeA's neighbors and nodeB, and nodeB's neighbors and nodeA
757
+ const neighborsA = adjacency.get(nodeA) || new Map();
758
+ const neighborsB = adjacency.get(nodeB) || new Map();
759
+
760
+ let neighborSum = 0;
761
+ let neighborCount = 0;
762
+
763
+ // Contribution from A's neighbors toward B
764
+ for (const [neighborNodeId, edgeWeight] of neighborsA) {
765
+ const neighborThreadId = nodeToThread.get(neighborNodeId);
766
+ if (!neighborThreadId) continue;
767
+ // Look up score between this neighbor and threadB
768
+ const key1 = makePairKey(neighborThreadId, tidB);
769
+ const score = currentScores[key1];
770
+ if (score !== undefined) {
771
+ neighborSum += score * edgeWeight;
772
+ neighborCount++;
773
+ }
774
+ }
775
+
776
+ // Contribution from B's neighbors toward A
777
+ for (const [neighborNodeId, edgeWeight] of neighborsB) {
778
+ const neighborThreadId = nodeToThread.get(neighborNodeId);
779
+ if (!neighborThreadId) continue;
780
+ const key1 = makePairKey(neighborThreadId, tidA);
781
+ const score = currentScores[key1];
782
+ if (score !== undefined) {
783
+ neighborSum += score * edgeWeight;
784
+ neighborCount++;
785
+ }
786
+ }
787
+
788
+ const neighborContribution = neighborCount > 0 ? neighborSum / neighborCount : 0;
789
+
790
+ // Relaxation formula: blend of neighbor signal and original score
791
+ nextScores[pairKey] = clamp01(
792
+ damping * neighborContribution + (1 - damping) * initialScore
793
+ );
794
+ }
795
+
796
+ currentScores = nextScores;
797
+ }
798
+
799
+ return currentScores;
800
+ }
801
+
802
+ /**
803
+ * Create a stable pair key from two thread IDs (alphabetically ordered).
804
+ * @param {string} tidA
805
+ * @param {string} tidB
806
+ * @returns {string}
807
+ */
808
+ function makePairKey(tidA, tidB) {
809
+ return tidA < tidB ? `${tidA}|${tidB}` : `${tidB}|${tidA}`;
810
+ }
811
+
812
+ // ============================================================================
813
+ // Full Pipeline
814
+ // ============================================================================
815
+
816
+ /**
817
+ * Run the complete 3-stage semantic analysis pipeline for a thread pair.
818
+ *
819
+ * Stage 1: Text signals (TF-IDF 0.5 + N-gram 0.2 + Jaccard 0.1)
820
+ * Stage 2: Concept signals (concept vector similarity 0.2)
821
+ * Stage 3: Graph propagation (optional, refines scores via transitive connections)
822
+ *
823
+ * @param {Thread} threadA - First thread
824
+ * @param {Thread} threadB - Second thread
825
+ * @param {PipelineContext} context - All threads, optional graph, taxonomy overrides
826
+ * @returns {PipelineResult}
827
+ */
828
+ // @cap-todo(ac:F-037/AC-8) Pure logic pipeline — no I/O, all data passed as arguments
829
+ function runPipeline(threadA, threadB, context) {
830
+ const allThreads = (context && context.allThreads) || [threadA, threadB];
831
+ const taxonomy = (context && context.taxonomy) || SEED_TAXONOMY;
832
+ const graph = (context && context.graph) || null;
833
+ const propagationOptions = (context && context.propagationOptions) || { iterations: 5, damping: 0.7 };
834
+
835
+ // Stage 1: Text signals
836
+ const corpus = buildCorpus(allThreads);
837
+ const stage1 = computeStage1(threadA, threadB, corpus);
838
+
839
+ // Stage 2: Concept signals
840
+ const stage2 = computeStage2(threadA, threadB, allThreads, taxonomy);
841
+
842
+ // Pre-propagation score (stages 1 + 2)
843
+ const directScore = clamp01(stage1.combined + stage2.combined);
844
+
845
+ // Stage 3: Graph propagation (optional)
846
+ let stage3 = {};
847
+ let finalScore = directScore;
848
+
849
+ if (graph && graph.nodes && Object.keys(graph.nodes).length > 0) {
850
+ const pairKey = makePairKey(threadA.id, threadB.id);
851
+ const initialScores = { [pairKey]: directScore };
852
+
853
+ // Include existing affinity edges as additional initial scores
854
+ // so propagation can leverage the full graph
855
+ for (const edge of (graph.edges || [])) {
856
+ if (!edge.active || edge.type !== 'affinity') continue;
857
+ if (!edge.metadata || typeof edge.metadata.compositeScore !== 'number') continue;
858
+
859
+ const sourceNode = graph.nodes[edge.source];
860
+ const targetNode = graph.nodes[edge.target];
861
+ if (!sourceNode || !targetNode) continue;
862
+ if (sourceNode.type !== 'thread' || targetNode.type !== 'thread') continue;
863
+
864
+ const sTid = sourceNode.metadata && sourceNode.metadata.threadId;
865
+ const tTid = targetNode.metadata && targetNode.metadata.threadId;
866
+ if (!sTid || !tTid) continue;
867
+
868
+ const existingKey = makePairKey(sTid, tTid);
869
+ if (existingKey !== pairKey && initialScores[existingKey] === undefined) {
870
+ initialScores[existingKey] = edge.metadata.compositeScore;
871
+ }
872
+ }
873
+
874
+ stage3 = propagateScores(graph, initialScores, propagationOptions);
875
+ finalScore = clamp01(stage3[pairKey] !== undefined ? stage3[pairKey] : directScore);
876
+ }
877
+
878
+ return {
879
+ stage1,
880
+ stage2,
881
+ stage3,
882
+ finalScore,
883
+ };
884
+ }
885
+
886
+ /**
887
+ * Run the pipeline for all unique thread pairs.
888
+ * Returns a Map keyed by "threadIdA|threadIdB" -> PipelineResult.
889
+ * @param {Thread[]} threads - All threads
890
+ * @param {PipelineContext} context
891
+ * @returns {Map<string, PipelineResult>}
892
+ */
893
+ function runPipelineBatch(threads, context) {
894
+ const results = new Map();
895
+ const allThreads = (context && context.allThreads) || threads;
896
+ const corpus = buildCorpus(allThreads);
897
+ const taxonomy = (context && context.taxonomy) || SEED_TAXONOMY;
898
+ const matrix = buildCooccurrenceMatrix(allThreads, taxonomy);
899
+ const graph = (context && context.graph) || null;
900
+ const propagationOptions = (context && context.propagationOptions) || { iterations: 5, damping: 0.7 };
901
+
902
+ // Compute direct scores for all pairs
903
+ /** @type {Object<string, number>} */
904
+ const directScores = {};
905
+
906
+ for (let i = 0; i < threads.length; i++) {
907
+ for (let j = i + 1; j < threads.length; j++) {
908
+ const a = threads[i];
909
+ const b = threads[j];
910
+ const pairKey = makePairKey(a.id, b.id);
911
+
912
+ // Stage 1
913
+ const tfidf = tfidfSimilarity(a, b, corpus);
914
+ const textA = getThreadText(a);
915
+ const textB = getThreadText(b);
916
+ const ngram = trigramSimilarity(textA, textB);
917
+ const jaccard = jaccardKeywordSimilarity(a.keywords, b.keywords);
918
+ const stage1Combined = (tfidf * STAGE1_WEIGHT_TFIDF)
919
+ + (ngram * STAGE1_WEIGHT_NGRAM)
920
+ + (jaccard * STAGE1_WEIGHT_JACCARD);
921
+
922
+ const stage1 = { tfidf, ngram, jaccard, combined: stage1Combined };
923
+
924
+ // Stage 2
925
+ const conceptSim = conceptVectorSimilarity(a, b, taxonomy, matrix);
926
+ const stage2 = { conceptSim, combined: conceptSim * STAGE2_WEIGHT_CONCEPT };
927
+
928
+ const directScore = clamp01(stage1.combined + stage2.combined);
929
+ directScores[pairKey] = directScore;
930
+
931
+ results.set(pairKey, {
932
+ stage1,
933
+ stage2,
934
+ stage3: {},
935
+ finalScore: directScore,
936
+ });
937
+ }
938
+ }
939
+
940
+ // Stage 3: batch graph propagation
941
+ if (graph && graph.nodes && Object.keys(graph.nodes).length > 0) {
942
+ // Add existing affinity edges
943
+ for (const edge of (graph.edges || [])) {
944
+ if (!edge.active || edge.type !== 'affinity') continue;
945
+ if (!edge.metadata || typeof edge.metadata.compositeScore !== 'number') continue;
946
+
947
+ const sourceNode = graph.nodes[edge.source];
948
+ const targetNode = graph.nodes[edge.target];
949
+ if (!sourceNode || !targetNode) continue;
950
+ if (sourceNode.type !== 'thread' || targetNode.type !== 'thread') continue;
951
+
952
+ const sTid = sourceNode.metadata && sourceNode.metadata.threadId;
953
+ const tTid = targetNode.metadata && targetNode.metadata.threadId;
954
+ if (!sTid || !tTid) continue;
955
+
956
+ const existingKey = makePairKey(sTid, tTid);
957
+ if (directScores[existingKey] === undefined) {
958
+ directScores[existingKey] = edge.metadata.compositeScore;
959
+ }
960
+ }
961
+
962
+ const propagated = propagateScores(graph, directScores, propagationOptions);
963
+
964
+ // Update results with propagated scores
965
+ for (const [pairKey, result] of results) {
966
+ result.stage3 = propagated;
967
+ if (propagated[pairKey] !== undefined) {
968
+ result.finalScore = clamp01(propagated[pairKey]);
969
+ }
970
+ }
971
+ }
972
+
973
+ return results;
974
+ }
975
+
976
+ // ============================================================================
977
+ // Utility
978
+ // ============================================================================
979
+
980
+ /**
981
+ * Clamp a number to [0.0, 1.0].
982
+ * @param {number} n
983
+ * @returns {number}
984
+ */
985
+ function clamp01(n) {
986
+ return Math.max(0, Math.min(1, n));
987
+ }
988
+
989
+ // ============================================================================
990
+ // Module Exports
991
+ // ============================================================================
992
+
993
+ // @cap-decision Exporting internal helpers with _ prefix for testing, matching cap-affinity-engine.cjs convention.
994
+
995
+ module.exports = {
996
+ // --- Full Pipeline ---
997
+ runPipeline,
998
+ runPipelineBatch,
999
+
1000
+ // --- Stage 1: Text Signals ---
1001
+ computeStage1,
1002
+ tfidfSimilarity,
1003
+ trigramSimilarity,
1004
+ jaccardKeywordSimilarity,
1005
+
1006
+ // --- Stage 2: Concept Signals ---
1007
+ computeStage2,
1008
+ conceptVectorSimilarity,
1009
+ buildCooccurrenceMatrix,
1010
+ getConfirmedPairs,
1011
+ projectToConcepts,
1012
+
1013
+ // --- Stage 3: Graph Propagation ---
1014
+ propagateScores,
1015
+
1016
+ // --- Constants ---
1017
+ SEED_TAXONOMY,
1018
+ CONCEPT_NAMES,
1019
+ STAGE1_WEIGHT_TFIDF,
1020
+ STAGE1_WEIGHT_NGRAM,
1021
+ STAGE1_WEIGHT_JACCARD,
1022
+ STAGE2_WEIGHT_CONCEPT,
1023
+
1024
+ // --- Internals (for testing) ---
1025
+ _tokenize: tokenize,
1026
+ _getThreadText: getThreadText,
1027
+ _buildCorpus: buildCorpus,
1028
+ _computeTfIdfVector: computeTfIdfVector,
1029
+ _cosineSimilarity: cosineSimilarity,
1030
+ _extractTrigrams: extractTrigrams,
1031
+ _makeCooccurrenceKey: makeCooccurrenceKey,
1032
+ _applyCooccurrenceBoost: applyCooccurrenceBoost,
1033
+ _findThreadNodeId: findThreadNodeId,
1034
+ _getWeightedNeighbors: getWeightedNeighbors,
1035
+ _makePairKey: makePairKey,
1036
+ _clamp01: clamp01,
1037
+ _textContainsKeyword: textContainsKeyword,
1038
+ };