@unerr-ai/unerr 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (363) hide show
  1. package/README.md +6 -0
  2. package/dist/cli.js +37236 -35793
  3. package/package.json +1 -1
  4. package/dist/behaviors/agent-llm-bridge.js +0 -166
  5. package/dist/behaviors/architecture-guard.js +0 -256
  6. package/dist/behaviors/auto-doc.js +0 -247
  7. package/dist/behaviors/cascade-guard.js +0 -289
  8. package/dist/behaviors/change-narrative.js +0 -270
  9. package/dist/behaviors/convention-drift.js +0 -290
  10. package/dist/behaviors/framework.js +0 -235
  11. package/dist/behaviors/guard-formatter.js +0 -44
  12. package/dist/behaviors/incomplete-work.js +0 -270
  13. package/dist/behaviors/loop-breaker.js +0 -300
  14. package/dist/behaviors/session-continuity.js +0 -208
  15. package/dist/commands/branches.js +0 -97
  16. package/dist/commands/check-commit.js +0 -225
  17. package/dist/commands/compress-output.js +0 -64
  18. package/dist/commands/config-verify.js +0 -243
  19. package/dist/commands/daemon.js +0 -905
  20. package/dist/commands/dashboard.js +0 -52
  21. package/dist/commands/debug.js +0 -200
  22. package/dist/commands/enrich.js +0 -184
  23. package/dist/commands/exec.js +0 -233
  24. package/dist/commands/gain.js +0 -156
  25. package/dist/commands/hook.js +0 -88
  26. package/dist/commands/index.js +0 -88
  27. package/dist/commands/init.js +0 -74
  28. package/dist/commands/install.js +0 -505
  29. package/dist/commands/learn.js +0 -116
  30. package/dist/commands/manifest.js +0 -193
  31. package/dist/commands/rewind.js +0 -103
  32. package/dist/commands/serve.js +0 -19
  33. package/dist/commands/setup-wizard.js +0 -414
  34. package/dist/commands/skills.js +0 -64
  35. package/dist/commands/stats.js +0 -20
  36. package/dist/commands/status.js +0 -654
  37. package/dist/commands/timeline.js +0 -139
  38. package/dist/commands/uninstall.js +0 -230
  39. package/dist/components/App.js +0 -109
  40. package/dist/components/Banner.js +0 -12
  41. package/dist/components/ConfirmPrompt.js +0 -25
  42. package/dist/components/DriftSummary.js +0 -23
  43. package/dist/components/GradeBadge.js +0 -15
  44. package/dist/components/HealthCard.js +0 -18
  45. package/dist/components/InkSpinner.js +0 -22
  46. package/dist/components/InputBox.js +0 -17
  47. package/dist/components/KeyValue.js +0 -13
  48. package/dist/components/MessageList.js +0 -14
  49. package/dist/components/ProgressBar.js +0 -26
  50. package/dist/components/Section.js +0 -16
  51. package/dist/components/SessionSummaryCard.js +0 -73
  52. package/dist/components/StartupDisplay.js +0 -24
  53. package/dist/components/StatusDashboard.js +0 -57
  54. package/dist/components/StatusLine.js +0 -8
  55. package/dist/components/StepLine.js +0 -22
  56. package/dist/components/Theme.js +0 -20
  57. package/dist/components/ToolProgress.js +0 -8
  58. package/dist/components/ViolationList.js +0 -21
  59. package/dist/components/render.js +0 -13
  60. package/dist/config/agent-registry.js +0 -237
  61. package/dist/config/claude-settings-hooks.js +0 -304
  62. package/dist/config/hook-installer.js +0 -65
  63. package/dist/config/instruction-writer.js +0 -388
  64. package/dist/config/mcp-config-writer.js +0 -266
  65. package/dist/config/settings.js +0 -174
  66. package/dist/config/tool-detector.js +0 -42
  67. package/dist/config/value-surfacing.js +0 -119
  68. package/dist/core/context-assembly.js +0 -108
  69. package/dist/core/conversation.js +0 -33
  70. package/dist/core/local-chat-provider.js +0 -475
  71. package/dist/core/provider-factory.js +0 -55
  72. package/dist/core/providers.js +0 -90
  73. package/dist/core/query-engine.js +0 -174
  74. package/dist/daemon/api.js +0 -312
  75. package/dist/daemon/autostart.js +0 -119
  76. package/dist/daemon/bootstrap.js +0 -39
  77. package/dist/daemon/client.js +0 -164
  78. package/dist/daemon/detect-ci.js +0 -81
  79. package/dist/daemon/platform-linux.js +0 -146
  80. package/dist/daemon/platform-macos.js +0 -134
  81. package/dist/daemon/platform-windows.js +0 -116
  82. package/dist/daemon/process-manager.js +0 -299
  83. package/dist/daemon/protocol.js +0 -23
  84. package/dist/daemon/registry.js +0 -270
  85. package/dist/daemon/settings-schema.js +0 -72
  86. package/dist/daemon/system-health.js +0 -134
  87. package/dist/daemon/version-checker.js +0 -262
  88. package/dist/daemon/warm-start.js +0 -223
  89. package/dist/entrypoints/cli.js +0 -1043
  90. package/dist/entrypoints/daemon.js +0 -380
  91. package/dist/entrypoints/repl.js +0 -147
  92. package/dist/hooks/adapters/claude-code.js +0 -90
  93. package/dist/hooks/adapters/cline.js +0 -100
  94. package/dist/hooks/adapters/cursor.js +0 -98
  95. package/dist/hooks/hook-dedup.js +0 -79
  96. package/dist/hooks/hook-runner.js +0 -113
  97. package/dist/hooks/navigation-hooks.js +0 -175
  98. package/dist/hooks/prompt-hooks.js +0 -63
  99. package/dist/hooks/shell-hooks.js +0 -47
  100. package/dist/ignore.js +0 -111
  101. package/dist/intelligence/approach-suggester.js +0 -61
  102. package/dist/intelligence/ast-extractor.js +0 -2615
  103. package/dist/intelligence/ast-worker.js +0 -34
  104. package/dist/intelligence/background-indexer.js +0 -121
  105. package/dist/intelligence/blast-radius.js +0 -200
  106. package/dist/intelligence/community-detection.js +0 -691
  107. package/dist/intelligence/community-detector.js +0 -184
  108. package/dist/intelligence/computation-scheduler.js +0 -75
  109. package/dist/intelligence/confidence-propagation.js +0 -47
  110. package/dist/intelligence/convention-detector.js +0 -242
  111. package/dist/intelligence/convention-learner.js +0 -205
  112. package/dist/intelligence/convention-matcher.js +0 -205
  113. package/dist/intelligence/cozo-schema.js +0 -376
  114. package/dist/intelligence/decision-point-detector.js +0 -90
  115. package/dist/intelligence/deep-dive-tools.js +0 -586
  116. package/dist/intelligence/durability-scorer.js +0 -84
  117. package/dist/intelligence/exploration-cost.js +0 -204
  118. package/dist/intelligence/exploration-pattern-tracker.js +0 -61
  119. package/dist/intelligence/fact-generator.js +0 -322
  120. package/dist/intelligence/facts-schema.js +0 -90
  121. package/dist/intelligence/file-intelligence.js +0 -59
  122. package/dist/intelligence/graph-holder.js +0 -220
  123. package/dist/intelligence/graph-temporal-joiner.js +0 -238
  124. package/dist/intelligence/health-grade.js +0 -423
  125. package/dist/intelligence/health-grader.js +0 -200
  126. package/dist/intelligence/health-map-data.js +0 -259
  127. package/dist/intelligence/import-symbols.js +0 -136
  128. package/dist/intelligence/incremental-indexer.js +0 -658
  129. package/dist/intelligence/indexer/centrality.js +0 -62
  130. package/dist/intelligence/indexer/cfg-context.js +0 -95
  131. package/dist/intelligence/indexer/confidence.js +0 -34
  132. package/dist/intelligence/indexer/cross-file-resolver.js +0 -104
  133. package/dist/intelligence/indexer/edge-repair.js +0 -89
  134. package/dist/intelligence/indexer/entity-key.js +0 -17
  135. package/dist/intelligence/indexer/export-map.js +0 -132
  136. package/dist/intelligence/indexer/git-cochange.js +0 -128
  137. package/dist/intelligence/indexer/graph-patch.js +0 -147
  138. package/dist/intelligence/indexer/incremental.js +0 -78
  139. package/dist/intelligence/indexer/ingest.js +0 -160
  140. package/dist/intelligence/indexer/language-detect.js +0 -226
  141. package/dist/intelligence/indexer/metadata.js +0 -63
  142. package/dist/intelligence/indexer/mutation-tracker.js +0 -79
  143. package/dist/intelligence/indexer/orchestrator.js +0 -155
  144. package/dist/intelligence/indexer/plugin-interface.js +0 -31
  145. package/dist/intelligence/indexer/plugins/csharp.js +0 -440
  146. package/dist/intelligence/indexer/plugins/go.js +0 -335
  147. package/dist/intelligence/indexer/plugins/java.js +0 -370
  148. package/dist/intelligence/indexer/plugins/python.js +0 -358
  149. package/dist/intelligence/indexer/plugins/regex-fallback.js +0 -82
  150. package/dist/intelligence/indexer/plugins/ruby.js +0 -290
  151. package/dist/intelligence/indexer/plugins/rust.js +0 -484
  152. package/dist/intelligence/indexer/plugins/tier2-generic.js +0 -310
  153. package/dist/intelligence/indexer/plugins/typescript.js +0 -456
  154. package/dist/intelligence/indexer/resource-monitor.js +0 -93
  155. package/dist/intelligence/indexer/scip/decoder.js +0 -253
  156. package/dist/intelligence/indexer/scip/detector.js +0 -232
  157. package/dist/intelligence/indexer/scip/downloader.js +0 -427
  158. package/dist/intelligence/indexer/scip/fallback.js +0 -34
  159. package/dist/intelligence/indexer/scip/merger.js +0 -109
  160. package/dist/intelligence/indexer/scip/orchestrator.js +0 -433
  161. package/dist/intelligence/indexer/scip/runner.js +0 -98
  162. package/dist/intelligence/indexer/snapshot.js +0 -66
  163. package/dist/intelligence/indexer/test-detector.js +0 -196
  164. package/dist/intelligence/indexer/watch-integration.js +0 -61
  165. package/dist/intelligence/indexer/worker.js +0 -85
  166. package/dist/intelligence/local-convention-detector.js +0 -437
  167. package/dist/intelligence/local-embeddings.js +0 -190
  168. package/dist/intelligence/local-graph.js +0 -1946
  169. package/dist/intelligence/local-indexer.js +0 -1575
  170. package/dist/intelligence/local-llm.js +0 -163
  171. package/dist/intelligence/local-rule-generator.js +0 -154
  172. package/dist/intelligence/local-snapshot.js +0 -213
  173. package/dist/intelligence/negative-knowledge.js +0 -103
  174. package/dist/intelligence/persistent-db.js +0 -85
  175. package/dist/intelligence/query-router.js +0 -2556
  176. package/dist/intelligence/risk-classifier.js +0 -116
  177. package/dist/intelligence/rule-evaluator.js +0 -380
  178. package/dist/intelligence/rule-generator.js +0 -49
  179. package/dist/intelligence/search-index.js +0 -173
  180. package/dist/intelligence/semantic/docstring-extractor.js +0 -67
  181. package/dist/intelligence/semantic/embedding-store.js +0 -52
  182. package/dist/intelligence/semantic/enrichment-orchestrator.js +0 -48
  183. package/dist/intelligence/semantic/git-message-miner.js +0 -114
  184. package/dist/intelligence/semantic/identifier-tokenizer.js +0 -51
  185. package/dist/intelligence/semantic/node2vec-embeddings.js +0 -71
  186. package/dist/intelligence/semantic/node2vec-walks.js +0 -103
  187. package/dist/intelligence/semantic/path-domain-inference.js +0 -112
  188. package/dist/intelligence/semantic/similarity-engine.js +0 -60
  189. package/dist/intelligence/semantic/tfidf-vectors.js +0 -88
  190. package/dist/intelligence/session-brief-builder.js +0 -159
  191. package/dist/intelligence/session-context.js +0 -221
  192. package/dist/intelligence/session-health-monitor.js +0 -211
  193. package/dist/intelligence/session-narrative.js +0 -197
  194. package/dist/intelligence/session-pattern-analyzer.js +0 -218
  195. package/dist/intelligence/signal-scorer.js +0 -390
  196. package/dist/intelligence/signal-show-store.js +0 -182
  197. package/dist/intelligence/smart-truncate.js +0 -158
  198. package/dist/intelligence/subgraph-cache.js +0 -88
  199. package/dist/intelligence/temporal-facts.js +0 -494
  200. package/dist/intelligence/token-estimator.js +0 -100
  201. package/dist/intelligence/tool-injector.js +0 -87
  202. package/dist/intelligence/tree-sitter-loader.js +0 -71
  203. package/dist/intelligence/worker-pool.js +0 -116
  204. package/dist/proxy/arg-validator.js +0 -79
  205. package/dist/proxy/auto-bootstrap.js +0 -167
  206. package/dist/proxy/bridge.js +0 -147
  207. package/dist/proxy/budget-enforcer.js +0 -70
  208. package/dist/proxy/compression-quality-monitor.js +0 -160
  209. package/dist/proxy/compression-stats.js +0 -51
  210. package/dist/proxy/context-rot-detector.js +0 -137
  211. package/dist/proxy/drift-detector.js +0 -139
  212. package/dist/proxy/efficiency-tracker.js +0 -79
  213. package/dist/proxy/fact-ranking.js +0 -154
  214. package/dist/proxy/format-encoder.js +0 -266
  215. package/dist/proxy/http-transport.js +0 -90
  216. package/dist/proxy/lifecycle-actor.js +0 -55
  217. package/dist/proxy/lifecycle-machine.js +0 -187
  218. package/dist/proxy/log-tailer.js +0 -265
  219. package/dist/proxy/model-pricing.js +0 -98
  220. package/dist/proxy/network-firewall.js +0 -141
  221. package/dist/proxy/nudge-state.js +0 -93
  222. package/dist/proxy/output-compressor.js +0 -185
  223. package/dist/proxy/pid-lock.js +0 -291
  224. package/dist/proxy/proxy-context.js +0 -11
  225. package/dist/proxy/proxy.js +0 -2633
  226. package/dist/proxy/response-enrichment.js +0 -32
  227. package/dist/proxy/response-envelope.js +0 -313
  228. package/dist/proxy/session-dedup.js +0 -82
  229. package/dist/proxy/session-legend.js +0 -30
  230. package/dist/proxy/session-persistence.js +0 -210
  231. package/dist/proxy/session-resume.js +0 -94
  232. package/dist/proxy/session-stats.js +0 -513
  233. package/dist/proxy/shell-classifier.js +0 -1346
  234. package/dist/proxy/shell-compression-log.js +0 -93
  235. package/dist/proxy/shell-compressor.js +0 -390
  236. package/dist/proxy/shell-graph-boost.js +0 -202
  237. package/dist/proxy/shell-monitor-map.js +0 -18
  238. package/dist/proxy/shell-stats.js +0 -54
  239. package/dist/proxy/shell-strategies/cloud.js +0 -215
  240. package/dist/proxy/shell-strategies/diff.js +0 -159
  241. package/dist/proxy/shell-strategies/error-diagnostic.js +0 -796
  242. package/dist/proxy/shell-strategies/filter-dsl.js +0 -358
  243. package/dist/proxy/shell-strategies/git-status.js +0 -177
  244. package/dist/proxy/shell-strategies/key-value.js +0 -193
  245. package/dist/proxy/shell-strategies/log-text.js +0 -154
  246. package/dist/proxy/shell-strategies/omni.js +0 -188
  247. package/dist/proxy/shell-strategies/progress.js +0 -55
  248. package/dist/proxy/shell-strategies/redact.js +0 -76
  249. package/dist/proxy/shell-strategies/structured.js +0 -241
  250. package/dist/proxy/shell-strategies/tabular.js +0 -243
  251. package/dist/proxy/shell-strategies/test-results-types.js +0 -13
  252. package/dist/proxy/shell-strategies/test-results.js +0 -784
  253. package/dist/proxy/shell-strategies/tree-paths.js +0 -144
  254. package/dist/proxy/shell-strategies/yaml.js +0 -182
  255. package/dist/proxy/shell-tee.js +0 -111
  256. package/dist/proxy/signal-dedup.js +0 -171
  257. package/dist/proxy/startup-renderer.js +0 -158
  258. package/dist/proxy/task-token-display.js +0 -38
  259. package/dist/proxy/token-counter.js +0 -61
  260. package/dist/proxy/tool-clusters.js +0 -273
  261. package/dist/proxy/tool-definitions.js +0 -525
  262. package/dist/proxy/transport-mux.js +0 -229
  263. package/dist/proxy/wire-cap.js +0 -268
  264. package/dist/rules/developer.mozilla.org.json +0 -9
  265. package/dist/rules/github.com.json +0 -21
  266. package/dist/schemas/api/skills.js +0 -19
  267. package/dist/schemas/common/errors.js +0 -7
  268. package/dist/schemas/common/headers.js +0 -5
  269. package/dist/schemas/entities/edge.js +0 -25
  270. package/dist/schemas/entities/entity.js +0 -22
  271. package/dist/schemas/entities/rule.js +0 -18
  272. package/dist/schemas/index.js +0 -14
  273. package/dist/server/event-bus.js +0 -59
  274. package/dist/server/http.js +0 -156
  275. package/dist/server/middleware.js +0 -70
  276. package/dist/server/routes/drift.js +0 -97
  277. package/dist/server/routes/intelligence.js +0 -1217
  278. package/dist/server/routes/reasoning-quality.js +0 -444
  279. package/dist/server/routes/session.js +0 -86
  280. package/dist/server/routes/stream.js +0 -120
  281. package/dist/server/routes/system.js +0 -73
  282. package/dist/server/routes/temporal.js +0 -170
  283. package/dist/server/routes/timeline.js +0 -232
  284. package/dist/server/routes/token-flow.js +0 -403
  285. package/dist/skills/effectiveness-tracker.js +0 -93
  286. package/dist/skills/local-pack.js +0 -380
  287. package/dist/skills/resolver.js +0 -495
  288. package/dist/state-detector.js +0 -83
  289. package/dist/timeline/intent-detector.js +0 -263
  290. package/dist/timeline/loop-miner.js +0 -140
  291. package/dist/timeline/open-threads.js +0 -49
  292. package/dist/timeline/signal-reinforcer.js +0 -62
  293. package/dist/timeline/timeline-bootstrap.js +0 -151
  294. package/dist/timeline/timeline-store.js +0 -618
  295. package/dist/tools/coding/bash.js +0 -49
  296. package/dist/tools/coding/file-edit.js +0 -72
  297. package/dist/tools/coding/file-outline.js +0 -227
  298. package/dist/tools/coding/file-read-protocol.js +0 -425
  299. package/dist/tools/coding/file-read.js +0 -35
  300. package/dist/tools/coding/file-write.js +0 -43
  301. package/dist/tools/coding/glob-tool.js +0 -109
  302. package/dist/tools/coding/grep.js +0 -162
  303. package/dist/tools/coding/index.js +0 -27
  304. package/dist/tools/intelligence/index.js +0 -269
  305. package/dist/tools/intelligence/record-fact.js +0 -48
  306. package/dist/tools/intelligence/timeline-markers.js +0 -130
  307. package/dist/tools/registry.js +0 -47
  308. package/dist/tools/types.js +0 -8
  309. package/dist/tracking/auto-snapshot-triggers.js +0 -246
  310. package/dist/tracking/branch-context.js +0 -115
  311. package/dist/tracking/branch-snapshot.js +0 -217
  312. package/dist/tracking/causal-bridge.js +0 -317
  313. package/dist/tracking/circuit-breaker.js +0 -147
  314. package/dist/tracking/commit-watcher.js +0 -114
  315. package/dist/tracking/context-ledger.js +0 -119
  316. package/dist/tracking/correction-detector.js +0 -324
  317. package/dist/tracking/drift-tracker.js +0 -874
  318. package/dist/tracking/durability-tracker.js +0 -94
  319. package/dist/tracking/entity-rewind.js +0 -200
  320. package/dist/tracking/file-hash-state.js +0 -114
  321. package/dist/tracking/git-attribution.js +0 -132
  322. package/dist/tracking/git-trailers.js +0 -171
  323. package/dist/tracking/intelligence-counter.js +0 -46
  324. package/dist/tracking/intent-correlator.js +0 -202
  325. package/dist/tracking/intent-encoder.js +0 -52
  326. package/dist/tracking/intent-token-tracker.js +0 -159
  327. package/dist/tracking/ledger-archiver.js +0 -94
  328. package/dist/tracking/ledger-chains.js +0 -245
  329. package/dist/tracking/metrics-store.js +0 -361
  330. package/dist/tracking/native-watcher.js +0 -131
  331. package/dist/tracking/offline-rewind.js +0 -295
  332. package/dist/tracking/pending-violations.js +0 -74
  333. package/dist/tracking/persistence-effectiveness.js +0 -167
  334. package/dist/tracking/prompt-durability.js +0 -202
  335. package/dist/tracking/quality-signals.js +0 -213
  336. package/dist/tracking/redactor.js +0 -73
  337. package/dist/tracking/rewind-engine.js +0 -161
  338. package/dist/tracking/session-history.js +0 -128
  339. package/dist/tracking/session-receipt.js +0 -88
  340. package/dist/tracking/session-summary-writer.js +0 -157
  341. package/dist/tracking/shadow-ledger.js +0 -321
  342. package/dist/tracking/stash-manager.js +0 -258
  343. package/dist/tracking/timeline-fork.js +0 -213
  344. package/dist/tracking/timeline.js +0 -69
  345. package/dist/tracking/token-flow.js +0 -276
  346. package/dist/tracking/turn-segmenter.js +0 -122
  347. package/dist/tracking/weekly-accumulator.js +0 -179
  348. package/dist/tracking/working-snapshots.js +0 -188
  349. package/dist/tracking/workspace-manifest.js +0 -176
  350. package/dist/transport/http.js +0 -102
  351. package/dist/utils/counterfactual.js +0 -65
  352. package/dist/utils/deep-link.js +0 -34
  353. package/dist/utils/detect.js +0 -193
  354. package/dist/utils/exec.js +0 -73
  355. package/dist/utils/file-logger.js +0 -87
  356. package/dist/utils/format-error.js +0 -29
  357. package/dist/utils/git.js +0 -181
  358. package/dist/utils/log.js +0 -57
  359. package/dist/utils/logger.js +0 -35
  360. package/dist/utils/mcp-content-json.js +0 -8
  361. package/dist/utils/session-logger.js +0 -154
  362. package/dist/utils/startup-log.js +0 -512
  363. package/dist/utils/ui.js +0 -56
@@ -1,173 +0,0 @@
1
- /**
2
- * Local search index using CozoDB relations.
3
- *
4
- * Tokenizes entity names (camelCase, snake_case, PascalCase) and stores
5
- * as search_tokens relation for fast local text search.
6
- *
7
- * Sprint 6.6: IDF-weighted scoring — rare tokens rank higher than common ones.
8
- * Pre-computes IDF weights during buildSearchIndex() for O(1) lookup at query time.
9
- */
10
- /**
11
- * Tokenize an entity name into searchable tokens.
12
- * Handles camelCase, PascalCase, snake_case, and kebab-case.
13
- */
14
- export function tokenize(name) {
15
- // Split on non-alphanumeric, then split camelCase
16
- const parts = name
17
- .replace(/([a-z])([A-Z])/g, "$1 $2")
18
- .replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2")
19
- .split(/[^a-zA-Z0-9]+/)
20
- .filter((p) => p.length > 0)
21
- .map((p) => p.toLowerCase());
22
- return [...new Set(parts)];
23
- }
24
- /**
25
- * Build search index from entities already loaded in CozoDB.
26
- * Reads all entities, tokenizes names, populates search_tokens,
27
- * and computes IDF weights in token_doc_frequency.
28
- */
29
- export async function buildSearchIndex(db) {
30
- // Read all entities
31
- let result;
32
- try {
33
- result = await db.run("?[key, name] := *entities{key, name}");
34
- }
35
- catch (err) {
36
- process.stderr.write(`[unerr:search-index] Failed to read entities: ${err instanceof Error ? err.message : JSON.stringify(err)}\n`);
37
- return; // Cannot build search index without entities
38
- }
39
- if (!result?.rows)
40
- return;
41
- const totalEntities = result.rows.length;
42
- process.stderr.write(`[unerr:search-index] Building index for ${totalEntities} entities\n`);
43
- // Track document frequency: how many entities contain each token
44
- const tokenDocCount = new Map();
45
- for (const row of result.rows) {
46
- const [key, name] = row;
47
- const tokens = tokenize(name);
48
- for (const token of tokens) {
49
- tokenDocCount.set(token, (tokenDocCount.get(token) ?? 0) + 1);
50
- try {
51
- await db.run("?[token, entity_key] <- [[$token, $key]] :put search_tokens { token, entity_key }", { token, key });
52
- }
53
- catch {
54
- // Duplicate — ignore
55
- }
56
- }
57
- }
58
- // Store document frequencies + pre-computed IDF weights
59
- let idfErrors = 0;
60
- for (const [token, docCount] of tokenDocCount) {
61
- const idf = totalEntities > 0 ? Math.log(totalEntities / Math.max(docCount, 1)) : 0;
62
- try {
63
- await db.run("?[token, doc_count, idf] <- [[$token, $dc, $idf]] :put token_doc_frequency { token => doc_count, idf }", { token, dc: docCount, idf });
64
- }
65
- catch {
66
- idfErrors++;
67
- }
68
- }
69
- process.stderr.write(`[unerr:search-index] Done: ${tokenDocCount.size} tokens indexed, ${idfErrors} IDF errors\n`);
70
- }
71
- /**
72
- * Incrementally update search index for specific entities.
73
- * Removes old tokens for given keys, re-tokenizes, and updates IDF weights.
74
- */
75
- export async function updateSearchIndexIncremental(db, changedKeys, deletedKeys) {
76
- if (changedKeys.size === 0 && deletedKeys.size === 0)
77
- return;
78
- const allKeys = new Set([...changedKeys, ...deletedKeys]);
79
- // Remove old tokens for all affected keys
80
- for (const key of allKeys) {
81
- try {
82
- await db.run("?[token, entity_key] := *search_tokens{token, entity_key}, entity_key = $key :rm search_tokens { token, entity_key }", { key });
83
- }
84
- catch {
85
- /* safe */
86
- }
87
- }
88
- // Re-tokenize changed entities (not deleted ones)
89
- if (changedKeys.size === 0)
90
- return;
91
- // Fetch names for changed keys
92
- const keyRows = [...changedKeys]
93
- .map((k) => `["${k.replace(/"/g, '\\"')}"]`)
94
- .join(", ");
95
- let result;
96
- try {
97
- result = await db.run(`
98
- keys[k] <- [${keyRows}]
99
- ?[key, name] := keys[k], *entities{key: k, name}, key = k
100
- `);
101
- }
102
- catch {
103
- return;
104
- }
105
- if (!result?.rows)
106
- return;
107
- // Insert new tokens
108
- for (const row of result.rows) {
109
- const [key, name] = row;
110
- const tokens = tokenize(name);
111
- for (const token of tokens) {
112
- try {
113
- await db.run("?[token, entity_key] <- [[$token, $key]] :put search_tokens { token, entity_key }", { token, key });
114
- }
115
- catch {
116
- /* safe */
117
- }
118
- }
119
- }
120
- // Recompute IDF for all tokens (lightweight — just counts + math)
121
- try {
122
- const totalResult = await db.run("?[count(key)] := *entities{key}");
123
- const totalEntities = totalResult.rows?.[0]?.[0] ?? 1;
124
- const tokenResult = await db.run("?[token, count(entity_key)] := *search_tokens{token, entity_key}");
125
- if (tokenResult.rows) {
126
- for (const row of tokenResult.rows) {
127
- const [token, docCount] = row;
128
- const idf = Math.log(totalEntities / Math.max(docCount, 1));
129
- try {
130
- await db.run("?[token, doc_count, idf] <- [[$token, $dc, $idf]] :put token_doc_frequency { token => doc_count, idf }", { token, dc: docCount, idf });
131
- }
132
- catch {
133
- /* safe */
134
- }
135
- }
136
- }
137
- }
138
- catch {
139
- /* IDF update failed — search still works, just with stale weights */
140
- }
141
- }
142
- /**
143
- * Search local entities by query string.
144
- * Tokenizes query, finds matching entities via token intersection,
145
- * ranks by IDF-weighted score (rare tokens contribute more to score).
146
- */
147
- export async function searchLocal(db, query, limit = 20) {
148
- const queryTokens = tokenize(query);
149
- if (queryTokens.length === 0)
150
- return [];
151
- // Find entities that match ANY token, sum IDF weights per entity
152
- const tokenRows = queryTokens.map((t) => `["${t}"]`).join(", ");
153
- let result;
154
- try {
155
- result = await db.run(`
156
- tokens[t] <- [${tokenRows}]
157
- matched[ek, sum(w)] := tokens[t], *search_tokens[t, ek], *token_doc_frequency[t, _, w]
158
- ?[ek, score, name, kind, fp] := matched[ek, score],
159
- *entities{key: ek, kind, name, file_path: fp}
160
- :order -score
161
- :limit ${limit}
162
- `);
163
- }
164
- catch {
165
- return [];
166
- }
167
- if (!result?.rows)
168
- return [];
169
- return result.rows.map((row) => {
170
- const [key, score, name, kind, file_path] = row;
171
- return { key, name, kind, file_path, score };
172
- });
173
- }
@@ -1,67 +0,0 @@
1
- /**
2
- * Docstring/Comment Extractor — extracts documentation annotations from entities.
3
- *
4
- * Reads JSDoc, Python docstrings, Go doc comments, etc. from the source
5
- * and associates them with the closest entity declaration.
6
- */
7
- /**
8
- * Extract doc comment text from raw source at a specific line.
9
- * Looks backward from the entity start line for comment blocks.
10
- */
11
- export function extractDocComment(source, entityStartLine) {
12
- const lines = source.split("\n");
13
- const lineIdx = entityStartLine - 1;
14
- if (lineIdx <= 0 || lineIdx >= lines.length)
15
- return null;
16
- const commentLines = [];
17
- let i = lineIdx - 1;
18
- while (i >= 0) {
19
- const line = lines[i]?.trim();
20
- if (line === undefined)
21
- break;
22
- if (line.startsWith("*") || line.startsWith("//") || line.startsWith("#")) {
23
- commentLines.unshift(line);
24
- i--;
25
- }
26
- else if (line === "/**" || line === "*/") {
27
- commentLines.unshift(line);
28
- i--;
29
- }
30
- else if (line.startsWith("/*")) {
31
- commentLines.unshift(line);
32
- break;
33
- }
34
- else if (line === '"""' || line === "'''") {
35
- commentLines.unshift(line);
36
- break;
37
- }
38
- else {
39
- break;
40
- }
41
- }
42
- if (commentLines.length === 0)
43
- return null;
44
- const text = commentLines
45
- .join("\n")
46
- .replace(/^\/\*\*?\s*|\s*\*\/$/g, "")
47
- .replace(/^\s*\*\s?/gm, "")
48
- .replace(/^\/\/\s?/gm, "")
49
- .replace(/^#\s?/gm, "")
50
- .replace(/^"""|'''|"""|'''/gm, "")
51
- .trim();
52
- return text.length > 0 ? text.slice(0, 500) : null;
53
- }
54
- /**
55
- * Extract JSDoc tags (@param, @returns, @deprecated, etc.)
56
- */
57
- export function extractDocTags(docstring) {
58
- const tagPattern = /@(\w+)/g;
59
- const tags = [];
60
- let match;
61
- match = tagPattern.exec(docstring);
62
- while (match !== null) {
63
- tags.push(match[1]);
64
- match = tagPattern.exec(docstring);
65
- }
66
- return [...new Set(tags)];
67
- }
@@ -1,52 +0,0 @@
1
- /**
2
- * CozoDB Embedding Store — stores Float32Array embeddings inline.
3
- *
4
- * Each entity gets a 128-dim embedding (512 bytes) stored as a
5
- * base64-encoded string in CozoDB. Provides cosine similarity
6
- * queries in <5ms.
7
- */
8
- /**
9
- * Encode a Float32Array to a base64 string for CozoDB storage.
10
- */
11
- export function encodeEmbedding(embedding) {
12
- const buffer = Buffer.from(embedding.buffer, embedding.byteOffset, embedding.byteLength);
13
- return buffer.toString("base64");
14
- }
15
- /**
16
- * Decode a base64 string back to Float32Array.
17
- */
18
- export function decodeEmbedding(encoded) {
19
- const buffer = Buffer.from(encoded, "base64");
20
- return new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4);
21
- }
22
- /**
23
- * Compute cosine similarity between two embeddings.
24
- */
25
- export function cosineSimilarity(a, b) {
26
- if (a.length !== b.length)
27
- return 0;
28
- let dot = 0;
29
- let normA = 0;
30
- let normB = 0;
31
- for (let i = 0; i < a.length; i++) {
32
- dot += a[i] * b[i];
33
- normA += a[i] * a[i];
34
- normB += b[i] * b[i];
35
- }
36
- const denom = Math.sqrt(normA) * Math.sqrt(normB);
37
- return denom > 0 ? dot / denom : 0;
38
- }
39
- /**
40
- * Find the top-K most similar entities to a query embedding.
41
- */
42
- export function findSimilar(query, allEmbeddings, topK = 10, excludeKey) {
43
- const results = [];
44
- for (const [key, embedding] of allEmbeddings) {
45
- if (key === excludeKey)
46
- continue;
47
- const sim = cosineSimilarity(query, embedding);
48
- results.push({ entityKey: key, similarity: sim });
49
- }
50
- results.sort((a, b) => b.similarity - a.similarity);
51
- return results.slice(0, topK);
52
- }
@@ -1,48 +0,0 @@
1
- /**
2
- * Semantic Enrichment Orchestrator — runs after Phase 1 indexing.
3
- *
4
- * Pipeline: tokenize → TF-IDF → Node2Vec → git mine → combine → store.
5
- * Runs in background, non-blocking to MCP proxy startup.
6
- */
7
- import { createModuleLogger } from "../../utils/logger.js";
8
- import { tokenizeIdentifier } from "./identifier-tokenizer.js";
9
- import { trainEmbeddings } from "./node2vec-embeddings.js";
10
- import { buildAdjacencyFromEdges, generateWalks } from "./node2vec-walks.js";
11
- import { getPrimaryDomain } from "./path-domain-inference.js";
12
- import { buildCorpus, computeTfIdfVector } from "./tfidf-vectors.js";
13
- const log = createModuleLogger("semantic-enrichment");
14
- /**
15
- * Run the full semantic enrichment pipeline.
16
- */
17
- export async function runEnrichment(input) {
18
- const start = performance.now();
19
- const { entities, edges } = input;
20
- if (entities.length === 0) {
21
- return { embeddings: new Map(), entityCount: 0, durationMs: 0 };
22
- }
23
- log.info(`Starting semantic enrichment for ${entities.length} entities`);
24
- const identifiers = entities.map((e) => e.name);
25
- const corpus = buildCorpus(identifiers);
26
- const adjacency = buildAdjacencyFromEdges(edges);
27
- const walks = generateWalks(adjacency, {
28
- seed: 42,
29
- walksPerNode: 5,
30
- walkLength: 20,
31
- });
32
- const structuralEmbeddings = trainEmbeddings(walks);
33
- const embeddings = new Map();
34
- for (const entity of entities) {
35
- const lexical = computeTfIdfVector(entity.name, corpus);
36
- const structural = structuralEmbeddings.get(entity.key) ?? new Float32Array(64);
37
- const domain = getPrimaryDomain(entity.file_path);
38
- embeddings.set(entity.key, {
39
- lexical,
40
- structural,
41
- domain,
42
- keywords: tokenizeIdentifier(entity.name),
43
- });
44
- }
45
- const durationMs = performance.now() - start;
46
- log.info(`Semantic enrichment complete: ${entities.length} entities in ${Math.round(durationMs)}ms`);
47
- return { embeddings, entityCount: entities.length, durationMs };
48
- }
@@ -1,114 +0,0 @@
1
- /**
2
- * Git Commit Message Miner — maps commit messages to entities via blame.
3
- *
4
- * Extracts semantic context from git history:
5
- * - Which commits touched which entities (via file + line intersection)
6
- * - Commit message keywords → entity semantic labels
7
- */
8
- import { gitQuery } from "../../utils/exec.js";
9
- /**
10
- * Get recent commits that touched a specific file.
11
- */
12
- export async function getFileCommits(filePath, cwd, maxCount = 20) {
13
- const output = await gitQuery(["log", `--max-count=${maxCount}`, "--format=%H|%s|%ci", "--", filePath], cwd);
14
- if (!output)
15
- return [];
16
- return output
17
- .split("\n")
18
- .filter(Boolean)
19
- .map((line) => {
20
- const [hash, message, date] = line.split("|");
21
- return { hash: hash ?? "", message: message ?? "", date: date ?? "" };
22
- })
23
- .filter((c) => c.hash.length > 0);
24
- }
25
- /**
26
- * Extract keywords from commit messages (for semantic context).
27
- */
28
- export function extractKeywords(messages) {
29
- const stopWords = new Set([
30
- "a",
31
- "an",
32
- "the",
33
- "is",
34
- "are",
35
- "was",
36
- "were",
37
- "be",
38
- "been",
39
- "being",
40
- "have",
41
- "has",
42
- "had",
43
- "do",
44
- "does",
45
- "did",
46
- "will",
47
- "would",
48
- "could",
49
- "should",
50
- "may",
51
- "might",
52
- "shall",
53
- "can",
54
- "need",
55
- "dare",
56
- "ought",
57
- "and",
58
- "or",
59
- "but",
60
- "not",
61
- "no",
62
- "nor",
63
- "for",
64
- "to",
65
- "in",
66
- "on",
67
- "at",
68
- "by",
69
- "from",
70
- "with",
71
- "of",
72
- "it",
73
- "its",
74
- "this",
75
- "that",
76
- ]);
77
- const keywords = new Map();
78
- for (const msg of messages) {
79
- const words = msg
80
- .toLowerCase()
81
- .replace(/[^a-z0-9\s]/g, " ")
82
- .split(/\s+/)
83
- .filter((w) => w.length > 2 && !stopWords.has(w));
84
- for (const word of words) {
85
- keywords.set(word, (keywords.get(word) ?? 0) + 1);
86
- }
87
- }
88
- return [...keywords.entries()]
89
- .sort((a, b) => b[1] - a[1])
90
- .slice(0, 20)
91
- .map(([word]) => word);
92
- }
93
- /**
94
- * Mine git context for a set of entities.
95
- */
96
- export async function mineEntityContext(entities, cwd) {
97
- const contexts = new Map();
98
- const fileCache = new Map();
99
- for (const entity of entities) {
100
- let commits = fileCache.get(entity.file_path);
101
- if (!commits) {
102
- commits = await getFileCommits(entity.file_path, cwd);
103
- fileCache.set(entity.file_path, commits);
104
- }
105
- const messages = commits.map((c) => c.message);
106
- const keywords = extractKeywords(messages);
107
- contexts.set(entity.key, {
108
- entityKey: entity.key,
109
- commitMessages: messages.slice(0, 5),
110
- keywords,
111
- });
112
- }
113
- return contexts;
114
- }
@@ -1,51 +0,0 @@
1
- /**
2
- * Identifier Tokenizer — splits code identifiers into semantic tokens.
3
- *
4
- * Handles: camelCase, PascalCase, snake_case, kebab-case, SCREAMING_CASE,
5
- * acronyms (e.g., HTMLParser → ["html", "parser"]), numbers.
6
- *
7
- * Performance: <1ms per entity (pure string splitting).
8
- */
9
- export function tokenizeIdentifier(name) {
10
- if (!name || name.length === 0)
11
- return [];
12
- const normalized = name
13
- .replace(/[-_./\\]/g, " ")
14
- .replace(/([a-z\d])([A-Z])/g, "$1 $2")
15
- .replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2");
16
- const raw = normalized.split(/\s+/).filter(Boolean);
17
- const tokens = [];
18
- for (const part of raw) {
19
- const lower = part.toLowerCase();
20
- if (lower.length <= 1 && /\d/.test(lower))
21
- continue;
22
- if (lower.length > 0)
23
- tokens.push(lower);
24
- }
25
- return tokens;
26
- }
27
- /**
28
- * Tokenize a file path into semantic tokens.
29
- */
30
- export function tokenizeFilePath(filePath) {
31
- const parts = filePath.split(/[/\\]/).filter(Boolean);
32
- const tokens = [];
33
- for (const part of parts) {
34
- const withoutExt = part.replace(/\.[^.]+$/, "");
35
- tokens.push(...tokenizeIdentifier(withoutExt));
36
- }
37
- return tokens;
38
- }
39
- /**
40
- * Build a token frequency map from an array of identifiers.
41
- */
42
- export function buildTokenFrequency(identifiers) {
43
- const freq = new Map();
44
- for (const id of identifiers) {
45
- const tokens = tokenizeIdentifier(id);
46
- for (const token of tokens) {
47
- freq.set(token, (freq.get(token) ?? 0) + 1);
48
- }
49
- }
50
- return freq;
51
- }
@@ -1,71 +0,0 @@
1
- /**
2
- * Node2Vec Embedding Trainer — Skip-gram on walk sequences.
3
- *
4
- * Converts random walks into 64-dimensional structural embeddings.
5
- * Uses a simplified Skip-gram model: for each (center, context) pair
6
- * in the walk window, accumulate co-occurrence → reduce to fixed dims.
7
- *
8
- * Performance: <2s for 1K entities (10 walks × 40 steps).
9
- */
10
- const STRUCTURAL_DIM = 64;
11
- /**
12
- * Train Node2Vec embeddings from random walks.
13
- * Returns a map from entity key to 64-dim Float32Array.
14
- */
15
- export function trainEmbeddings(walks, windowSize = 5) {
16
- const cooccurrence = new Map();
17
- for (const walk of walks) {
18
- for (let i = 0; i < walk.length; i++) {
19
- const center = walk[i];
20
- if (!cooccurrence.has(center)) {
21
- cooccurrence.set(center, new Map());
22
- }
23
- const centerMap = cooccurrence.get(center);
24
- const start = Math.max(0, i - windowSize);
25
- const end = Math.min(walk.length - 1, i + windowSize);
26
- for (let j = start; j <= end; j++) {
27
- if (j === i)
28
- continue;
29
- const context = walk[j];
30
- centerMap.set(context, (centerMap.get(context) ?? 0) + 1);
31
- }
32
- }
33
- }
34
- const allNodes = [...cooccurrence.keys()];
35
- const nodeIndex = new Map(allNodes.map((n, i) => [n, i]));
36
- const embeddings = new Map();
37
- for (const node of allNodes) {
38
- const vec = new Float32Array(STRUCTURAL_DIM);
39
- const neighbors = cooccurrence.get(node) ?? new Map();
40
- for (const [neighbor, weight] of neighbors) {
41
- const nIdx = nodeIndex.get(neighbor) ?? 0;
42
- const hashIdx = (nIdx * 2654435761) % STRUCTURAL_DIM;
43
- vec[Math.abs(hashIdx)] += weight;
44
- }
45
- normalize(vec);
46
- embeddings.set(node, vec);
47
- }
48
- return embeddings;
49
- }
50
- /**
51
- * Combine lexical (TF-IDF) and structural (Node2Vec) into 128-dim embedding.
52
- */
53
- export function combineEmbeddings(lexical, structural) {
54
- const combined = new Float32Array(128);
55
- combined.set(lexical, 0);
56
- combined.set(structural, 64);
57
- return combined;
58
- }
59
- function normalize(vector) {
60
- let norm = 0;
61
- for (let i = 0; i < vector.length; i++) {
62
- norm += vector[i] * vector[i];
63
- }
64
- norm = Math.sqrt(norm);
65
- if (norm > 0) {
66
- for (let i = 0; i < vector.length; i++) {
67
- vector[i] /= norm;
68
- }
69
- }
70
- }
71
- export { STRUCTURAL_DIM };
@@ -1,103 +0,0 @@
1
- /**
2
- * Node2Vec Random Walk Generator — seeded walks on the entity graph.
3
- *
4
- * Produces random walk sequences that capture graph structure.
5
- * Uses biased random walks (Node2Vec with p/q parameters) to balance
6
- * between BFS-like (local) and DFS-like (exploration) behavior.
7
- *
8
- * Seeded for deterministic, reproducible walks.
9
- */
10
- const DEFAULT_CONFIG = {
11
- walkLength: 40,
12
- walksPerNode: 10,
13
- p: 1.0,
14
- q: 1.0,
15
- seed: 42,
16
- };
17
- /**
18
- * Seeded PRNG (xorshift32). Deterministic given same seed.
19
- */
20
- function createRng(seed) {
21
- let state = seed | 0;
22
- if (state === 0)
23
- state = 1;
24
- return () => {
25
- state ^= state << 13;
26
- state ^= state >> 17;
27
- state ^= state << 5;
28
- return (state >>> 0) / 0xffffffff;
29
- };
30
- }
31
- /**
32
- * Generate random walks from a graph.
33
- * Returns array of walks, where each walk is a sequence of entity keys.
34
- */
35
- export function generateWalks(adjacency, config = {}) {
36
- const cfg = { ...DEFAULT_CONFIG, ...config };
37
- const rng = createRng(cfg.seed);
38
- const nodes = [...adjacency.keys()];
39
- const walks = [];
40
- for (let w = 0; w < cfg.walksPerNode; w++) {
41
- for (const startNode of nodes) {
42
- const walk = [startNode];
43
- let current = startNode;
44
- let prev = null;
45
- for (let step = 1; step < cfg.walkLength; step++) {
46
- const neighbors = adjacency.get(current) ?? [];
47
- if (neighbors.length === 0)
48
- break;
49
- if (prev === null) {
50
- const idx = Math.floor(rng() * neighbors.length);
51
- const next = neighbors[idx];
52
- walk.push(next);
53
- prev = current;
54
- current = next;
55
- }
56
- else {
57
- const weights = [];
58
- for (const neighbor of neighbors) {
59
- if (neighbor === prev) {
60
- weights.push(1 / cfg.p);
61
- }
62
- else if ((adjacency.get(prev) ?? []).includes(neighbor)) {
63
- weights.push(1);
64
- }
65
- else {
66
- weights.push(1 / cfg.q);
67
- }
68
- }
69
- const totalWeight = weights.reduce((a, b) => a + b, 0);
70
- let r = rng() * totalWeight;
71
- let chosen = neighbors[0];
72
- for (let i = 0; i < weights.length; i++) {
73
- r -= weights[i];
74
- if (r <= 0) {
75
- chosen = neighbors[i];
76
- break;
77
- }
78
- }
79
- walk.push(chosen);
80
- prev = current;
81
- current = chosen;
82
- }
83
- }
84
- walks.push(walk);
85
- }
86
- }
87
- return walks;
88
- }
89
- /**
90
- * Build adjacency list from edge data.
91
- */
92
- export function buildAdjacencyFromEdges(edges) {
93
- const adj = new Map();
94
- for (const edge of edges) {
95
- if (!adj.has(edge.from_key))
96
- adj.set(edge.from_key, []);
97
- if (!adj.has(edge.to_key))
98
- adj.set(edge.to_key, []);
99
- adj.get(edge.from_key)?.push(edge.to_key);
100
- adj.get(edge.to_key)?.push(edge.from_key);
101
- }
102
- return adj;
103
- }