@unerr-ai/unerr 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (363) hide show
  1. package/README.md +36 -45
  2. package/dist/cli.js +37443 -36022
  3. package/package.json +2 -1
  4. package/dist/behaviors/agent-llm-bridge.js +0 -166
  5. package/dist/behaviors/architecture-guard.js +0 -256
  6. package/dist/behaviors/auto-doc.js +0 -247
  7. package/dist/behaviors/cascade-guard.js +0 -289
  8. package/dist/behaviors/change-narrative.js +0 -270
  9. package/dist/behaviors/convention-drift.js +0 -290
  10. package/dist/behaviors/framework.js +0 -235
  11. package/dist/behaviors/guard-formatter.js +0 -44
  12. package/dist/behaviors/incomplete-work.js +0 -270
  13. package/dist/behaviors/loop-breaker.js +0 -300
  14. package/dist/behaviors/session-continuity.js +0 -208
  15. package/dist/commands/branches.js +0 -97
  16. package/dist/commands/check-commit.js +0 -225
  17. package/dist/commands/compress-output.js +0 -64
  18. package/dist/commands/config-verify.js +0 -243
  19. package/dist/commands/daemon.js +0 -905
  20. package/dist/commands/dashboard.js +0 -52
  21. package/dist/commands/debug.js +0 -200
  22. package/dist/commands/enrich.js +0 -184
  23. package/dist/commands/exec.js +0 -233
  24. package/dist/commands/gain.js +0 -156
  25. package/dist/commands/hook.js +0 -88
  26. package/dist/commands/index.js +0 -88
  27. package/dist/commands/init.js +0 -74
  28. package/dist/commands/install.js +0 -505
  29. package/dist/commands/learn.js +0 -116
  30. package/dist/commands/manifest.js +0 -193
  31. package/dist/commands/rewind.js +0 -103
  32. package/dist/commands/serve.js +0 -19
  33. package/dist/commands/setup-wizard.js +0 -414
  34. package/dist/commands/skills.js +0 -64
  35. package/dist/commands/stats.js +0 -20
  36. package/dist/commands/status.js +0 -654
  37. package/dist/commands/timeline.js +0 -139
  38. package/dist/commands/uninstall.js +0 -230
  39. package/dist/components/App.js +0 -109
  40. package/dist/components/Banner.js +0 -12
  41. package/dist/components/ConfirmPrompt.js +0 -25
  42. package/dist/components/DriftSummary.js +0 -23
  43. package/dist/components/GradeBadge.js +0 -15
  44. package/dist/components/HealthCard.js +0 -18
  45. package/dist/components/InkSpinner.js +0 -22
  46. package/dist/components/InputBox.js +0 -17
  47. package/dist/components/KeyValue.js +0 -13
  48. package/dist/components/MessageList.js +0 -14
  49. package/dist/components/ProgressBar.js +0 -26
  50. package/dist/components/Section.js +0 -16
  51. package/dist/components/SessionSummaryCard.js +0 -73
  52. package/dist/components/StartupDisplay.js +0 -24
  53. package/dist/components/StatusDashboard.js +0 -57
  54. package/dist/components/StatusLine.js +0 -8
  55. package/dist/components/StepLine.js +0 -22
  56. package/dist/components/Theme.js +0 -20
  57. package/dist/components/ToolProgress.js +0 -8
  58. package/dist/components/ViolationList.js +0 -21
  59. package/dist/components/render.js +0 -13
  60. package/dist/config/agent-registry.js +0 -237
  61. package/dist/config/claude-settings-hooks.js +0 -304
  62. package/dist/config/hook-installer.js +0 -65
  63. package/dist/config/instruction-writer.js +0 -388
  64. package/dist/config/mcp-config-writer.js +0 -266
  65. package/dist/config/settings.js +0 -174
  66. package/dist/config/tool-detector.js +0 -42
  67. package/dist/config/value-surfacing.js +0 -119
  68. package/dist/core/context-assembly.js +0 -108
  69. package/dist/core/conversation.js +0 -33
  70. package/dist/core/local-chat-provider.js +0 -475
  71. package/dist/core/provider-factory.js +0 -55
  72. package/dist/core/providers.js +0 -90
  73. package/dist/core/query-engine.js +0 -174
  74. package/dist/daemon/api.js +0 -312
  75. package/dist/daemon/autostart.js +0 -119
  76. package/dist/daemon/bootstrap.js +0 -39
  77. package/dist/daemon/client.js +0 -164
  78. package/dist/daemon/detect-ci.js +0 -81
  79. package/dist/daemon/platform-linux.js +0 -146
  80. package/dist/daemon/platform-macos.js +0 -134
  81. package/dist/daemon/platform-windows.js +0 -116
  82. package/dist/daemon/process-manager.js +0 -299
  83. package/dist/daemon/protocol.js +0 -23
  84. package/dist/daemon/registry.js +0 -270
  85. package/dist/daemon/settings-schema.js +0 -72
  86. package/dist/daemon/system-health.js +0 -134
  87. package/dist/daemon/version-checker.js +0 -262
  88. package/dist/daemon/warm-start.js +0 -223
  89. package/dist/entrypoints/cli.js +0 -1043
  90. package/dist/entrypoints/daemon.js +0 -380
  91. package/dist/entrypoints/repl.js +0 -147
  92. package/dist/hooks/adapters/claude-code.js +0 -90
  93. package/dist/hooks/adapters/cline.js +0 -100
  94. package/dist/hooks/adapters/cursor.js +0 -98
  95. package/dist/hooks/hook-dedup.js +0 -79
  96. package/dist/hooks/hook-runner.js +0 -113
  97. package/dist/hooks/navigation-hooks.js +0 -175
  98. package/dist/hooks/prompt-hooks.js +0 -63
  99. package/dist/hooks/shell-hooks.js +0 -47
  100. package/dist/ignore.js +0 -111
  101. package/dist/intelligence/approach-suggester.js +0 -61
  102. package/dist/intelligence/ast-extractor.js +0 -2615
  103. package/dist/intelligence/ast-worker.js +0 -34
  104. package/dist/intelligence/background-indexer.js +0 -121
  105. package/dist/intelligence/blast-radius.js +0 -200
  106. package/dist/intelligence/community-detection.js +0 -691
  107. package/dist/intelligence/community-detector.js +0 -184
  108. package/dist/intelligence/computation-scheduler.js +0 -75
  109. package/dist/intelligence/confidence-propagation.js +0 -47
  110. package/dist/intelligence/convention-detector.js +0 -242
  111. package/dist/intelligence/convention-learner.js +0 -205
  112. package/dist/intelligence/convention-matcher.js +0 -205
  113. package/dist/intelligence/cozo-schema.js +0 -376
  114. package/dist/intelligence/decision-point-detector.js +0 -90
  115. package/dist/intelligence/deep-dive-tools.js +0 -586
  116. package/dist/intelligence/durability-scorer.js +0 -84
  117. package/dist/intelligence/exploration-cost.js +0 -204
  118. package/dist/intelligence/exploration-pattern-tracker.js +0 -61
  119. package/dist/intelligence/fact-generator.js +0 -322
  120. package/dist/intelligence/facts-schema.js +0 -90
  121. package/dist/intelligence/file-intelligence.js +0 -59
  122. package/dist/intelligence/graph-holder.js +0 -220
  123. package/dist/intelligence/graph-temporal-joiner.js +0 -238
  124. package/dist/intelligence/health-grade.js +0 -423
  125. package/dist/intelligence/health-grader.js +0 -200
  126. package/dist/intelligence/health-map-data.js +0 -259
  127. package/dist/intelligence/import-symbols.js +0 -136
  128. package/dist/intelligence/incremental-indexer.js +0 -658
  129. package/dist/intelligence/indexer/centrality.js +0 -62
  130. package/dist/intelligence/indexer/cfg-context.js +0 -95
  131. package/dist/intelligence/indexer/confidence.js +0 -34
  132. package/dist/intelligence/indexer/cross-file-resolver.js +0 -104
  133. package/dist/intelligence/indexer/edge-repair.js +0 -89
  134. package/dist/intelligence/indexer/entity-key.js +0 -17
  135. package/dist/intelligence/indexer/export-map.js +0 -132
  136. package/dist/intelligence/indexer/git-cochange.js +0 -128
  137. package/dist/intelligence/indexer/graph-patch.js +0 -147
  138. package/dist/intelligence/indexer/incremental.js +0 -78
  139. package/dist/intelligence/indexer/ingest.js +0 -160
  140. package/dist/intelligence/indexer/language-detect.js +0 -226
  141. package/dist/intelligence/indexer/metadata.js +0 -63
  142. package/dist/intelligence/indexer/mutation-tracker.js +0 -79
  143. package/dist/intelligence/indexer/orchestrator.js +0 -155
  144. package/dist/intelligence/indexer/plugin-interface.js +0 -31
  145. package/dist/intelligence/indexer/plugins/csharp.js +0 -440
  146. package/dist/intelligence/indexer/plugins/go.js +0 -335
  147. package/dist/intelligence/indexer/plugins/java.js +0 -370
  148. package/dist/intelligence/indexer/plugins/python.js +0 -358
  149. package/dist/intelligence/indexer/plugins/regex-fallback.js +0 -82
  150. package/dist/intelligence/indexer/plugins/ruby.js +0 -290
  151. package/dist/intelligence/indexer/plugins/rust.js +0 -484
  152. package/dist/intelligence/indexer/plugins/tier2-generic.js +0 -310
  153. package/dist/intelligence/indexer/plugins/typescript.js +0 -456
  154. package/dist/intelligence/indexer/resource-monitor.js +0 -93
  155. package/dist/intelligence/indexer/scip/decoder.js +0 -253
  156. package/dist/intelligence/indexer/scip/detector.js +0 -232
  157. package/dist/intelligence/indexer/scip/downloader.js +0 -427
  158. package/dist/intelligence/indexer/scip/fallback.js +0 -34
  159. package/dist/intelligence/indexer/scip/merger.js +0 -109
  160. package/dist/intelligence/indexer/scip/orchestrator.js +0 -433
  161. package/dist/intelligence/indexer/scip/runner.js +0 -98
  162. package/dist/intelligence/indexer/snapshot.js +0 -66
  163. package/dist/intelligence/indexer/test-detector.js +0 -196
  164. package/dist/intelligence/indexer/watch-integration.js +0 -61
  165. package/dist/intelligence/indexer/worker.js +0 -85
  166. package/dist/intelligence/local-convention-detector.js +0 -437
  167. package/dist/intelligence/local-embeddings.js +0 -190
  168. package/dist/intelligence/local-graph.js +0 -1946
  169. package/dist/intelligence/local-indexer.js +0 -1575
  170. package/dist/intelligence/local-llm.js +0 -163
  171. package/dist/intelligence/local-rule-generator.js +0 -154
  172. package/dist/intelligence/local-snapshot.js +0 -213
  173. package/dist/intelligence/negative-knowledge.js +0 -103
  174. package/dist/intelligence/persistent-db.js +0 -85
  175. package/dist/intelligence/query-router.js +0 -2556
  176. package/dist/intelligence/risk-classifier.js +0 -116
  177. package/dist/intelligence/rule-evaluator.js +0 -380
  178. package/dist/intelligence/rule-generator.js +0 -49
  179. package/dist/intelligence/search-index.js +0 -173
  180. package/dist/intelligence/semantic/docstring-extractor.js +0 -67
  181. package/dist/intelligence/semantic/embedding-store.js +0 -52
  182. package/dist/intelligence/semantic/enrichment-orchestrator.js +0 -48
  183. package/dist/intelligence/semantic/git-message-miner.js +0 -114
  184. package/dist/intelligence/semantic/identifier-tokenizer.js +0 -51
  185. package/dist/intelligence/semantic/node2vec-embeddings.js +0 -71
  186. package/dist/intelligence/semantic/node2vec-walks.js +0 -103
  187. package/dist/intelligence/semantic/path-domain-inference.js +0 -112
  188. package/dist/intelligence/semantic/similarity-engine.js +0 -60
  189. package/dist/intelligence/semantic/tfidf-vectors.js +0 -88
  190. package/dist/intelligence/session-brief-builder.js +0 -159
  191. package/dist/intelligence/session-context.js +0 -221
  192. package/dist/intelligence/session-health-monitor.js +0 -211
  193. package/dist/intelligence/session-narrative.js +0 -197
  194. package/dist/intelligence/session-pattern-analyzer.js +0 -218
  195. package/dist/intelligence/signal-scorer.js +0 -390
  196. package/dist/intelligence/signal-show-store.js +0 -182
  197. package/dist/intelligence/smart-truncate.js +0 -158
  198. package/dist/intelligence/subgraph-cache.js +0 -88
  199. package/dist/intelligence/temporal-facts.js +0 -494
  200. package/dist/intelligence/token-estimator.js +0 -100
  201. package/dist/intelligence/tool-injector.js +0 -87
  202. package/dist/intelligence/tree-sitter-loader.js +0 -71
  203. package/dist/intelligence/worker-pool.js +0 -116
  204. package/dist/proxy/arg-validator.js +0 -79
  205. package/dist/proxy/auto-bootstrap.js +0 -167
  206. package/dist/proxy/bridge.js +0 -147
  207. package/dist/proxy/budget-enforcer.js +0 -70
  208. package/dist/proxy/compression-quality-monitor.js +0 -160
  209. package/dist/proxy/compression-stats.js +0 -51
  210. package/dist/proxy/context-rot-detector.js +0 -137
  211. package/dist/proxy/drift-detector.js +0 -139
  212. package/dist/proxy/efficiency-tracker.js +0 -79
  213. package/dist/proxy/fact-ranking.js +0 -154
  214. package/dist/proxy/format-encoder.js +0 -266
  215. package/dist/proxy/http-transport.js +0 -90
  216. package/dist/proxy/lifecycle-actor.js +0 -55
  217. package/dist/proxy/lifecycle-machine.js +0 -187
  218. package/dist/proxy/log-tailer.js +0 -265
  219. package/dist/proxy/model-pricing.js +0 -98
  220. package/dist/proxy/network-firewall.js +0 -141
  221. package/dist/proxy/nudge-state.js +0 -93
  222. package/dist/proxy/output-compressor.js +0 -185
  223. package/dist/proxy/pid-lock.js +0 -291
  224. package/dist/proxy/proxy-context.js +0 -11
  225. package/dist/proxy/proxy.js +0 -2633
  226. package/dist/proxy/response-enrichment.js +0 -32
  227. package/dist/proxy/response-envelope.js +0 -313
  228. package/dist/proxy/session-dedup.js +0 -82
  229. package/dist/proxy/session-legend.js +0 -30
  230. package/dist/proxy/session-persistence.js +0 -210
  231. package/dist/proxy/session-resume.js +0 -94
  232. package/dist/proxy/session-stats.js +0 -513
  233. package/dist/proxy/shell-classifier.js +0 -1346
  234. package/dist/proxy/shell-compression-log.js +0 -93
  235. package/dist/proxy/shell-compressor.js +0 -390
  236. package/dist/proxy/shell-graph-boost.js +0 -202
  237. package/dist/proxy/shell-monitor-map.js +0 -18
  238. package/dist/proxy/shell-stats.js +0 -54
  239. package/dist/proxy/shell-strategies/cloud.js +0 -215
  240. package/dist/proxy/shell-strategies/diff.js +0 -159
  241. package/dist/proxy/shell-strategies/error-diagnostic.js +0 -796
  242. package/dist/proxy/shell-strategies/filter-dsl.js +0 -358
  243. package/dist/proxy/shell-strategies/git-status.js +0 -177
  244. package/dist/proxy/shell-strategies/key-value.js +0 -193
  245. package/dist/proxy/shell-strategies/log-text.js +0 -154
  246. package/dist/proxy/shell-strategies/omni.js +0 -188
  247. package/dist/proxy/shell-strategies/progress.js +0 -55
  248. package/dist/proxy/shell-strategies/redact.js +0 -76
  249. package/dist/proxy/shell-strategies/structured.js +0 -241
  250. package/dist/proxy/shell-strategies/tabular.js +0 -243
  251. package/dist/proxy/shell-strategies/test-results-types.js +0 -13
  252. package/dist/proxy/shell-strategies/test-results.js +0 -784
  253. package/dist/proxy/shell-strategies/tree-paths.js +0 -144
  254. package/dist/proxy/shell-strategies/yaml.js +0 -182
  255. package/dist/proxy/shell-tee.js +0 -111
  256. package/dist/proxy/signal-dedup.js +0 -171
  257. package/dist/proxy/startup-renderer.js +0 -158
  258. package/dist/proxy/task-token-display.js +0 -38
  259. package/dist/proxy/token-counter.js +0 -61
  260. package/dist/proxy/tool-clusters.js +0 -273
  261. package/dist/proxy/tool-definitions.js +0 -525
  262. package/dist/proxy/transport-mux.js +0 -229
  263. package/dist/proxy/wire-cap.js +0 -268
  264. package/dist/rules/developer.mozilla.org.json +0 -9
  265. package/dist/rules/github.com.json +0 -21
  266. package/dist/schemas/api/skills.js +0 -19
  267. package/dist/schemas/common/errors.js +0 -7
  268. package/dist/schemas/common/headers.js +0 -5
  269. package/dist/schemas/entities/edge.js +0 -25
  270. package/dist/schemas/entities/entity.js +0 -22
  271. package/dist/schemas/entities/rule.js +0 -18
  272. package/dist/schemas/index.js +0 -14
  273. package/dist/server/event-bus.js +0 -59
  274. package/dist/server/http.js +0 -156
  275. package/dist/server/middleware.js +0 -70
  276. package/dist/server/routes/drift.js +0 -97
  277. package/dist/server/routes/intelligence.js +0 -1217
  278. package/dist/server/routes/reasoning-quality.js +0 -444
  279. package/dist/server/routes/session.js +0 -86
  280. package/dist/server/routes/stream.js +0 -120
  281. package/dist/server/routes/system.js +0 -73
  282. package/dist/server/routes/temporal.js +0 -170
  283. package/dist/server/routes/timeline.js +0 -232
  284. package/dist/server/routes/token-flow.js +0 -403
  285. package/dist/skills/effectiveness-tracker.js +0 -93
  286. package/dist/skills/local-pack.js +0 -380
  287. package/dist/skills/resolver.js +0 -495
  288. package/dist/state-detector.js +0 -83
  289. package/dist/timeline/intent-detector.js +0 -263
  290. package/dist/timeline/loop-miner.js +0 -140
  291. package/dist/timeline/open-threads.js +0 -49
  292. package/dist/timeline/signal-reinforcer.js +0 -62
  293. package/dist/timeline/timeline-bootstrap.js +0 -151
  294. package/dist/timeline/timeline-store.js +0 -618
  295. package/dist/tools/coding/bash.js +0 -49
  296. package/dist/tools/coding/file-edit.js +0 -72
  297. package/dist/tools/coding/file-outline.js +0 -227
  298. package/dist/tools/coding/file-read-protocol.js +0 -425
  299. package/dist/tools/coding/file-read.js +0 -35
  300. package/dist/tools/coding/file-write.js +0 -43
  301. package/dist/tools/coding/glob-tool.js +0 -109
  302. package/dist/tools/coding/grep.js +0 -162
  303. package/dist/tools/coding/index.js +0 -27
  304. package/dist/tools/intelligence/index.js +0 -269
  305. package/dist/tools/intelligence/record-fact.js +0 -48
  306. package/dist/tools/intelligence/timeline-markers.js +0 -130
  307. package/dist/tools/registry.js +0 -47
  308. package/dist/tools/types.js +0 -8
  309. package/dist/tracking/auto-snapshot-triggers.js +0 -246
  310. package/dist/tracking/branch-context.js +0 -115
  311. package/dist/tracking/branch-snapshot.js +0 -217
  312. package/dist/tracking/causal-bridge.js +0 -317
  313. package/dist/tracking/circuit-breaker.js +0 -147
  314. package/dist/tracking/commit-watcher.js +0 -114
  315. package/dist/tracking/context-ledger.js +0 -119
  316. package/dist/tracking/correction-detector.js +0 -324
  317. package/dist/tracking/drift-tracker.js +0 -874
  318. package/dist/tracking/durability-tracker.js +0 -94
  319. package/dist/tracking/entity-rewind.js +0 -200
  320. package/dist/tracking/file-hash-state.js +0 -114
  321. package/dist/tracking/git-attribution.js +0 -132
  322. package/dist/tracking/git-trailers.js +0 -171
  323. package/dist/tracking/intelligence-counter.js +0 -46
  324. package/dist/tracking/intent-correlator.js +0 -202
  325. package/dist/tracking/intent-encoder.js +0 -52
  326. package/dist/tracking/intent-token-tracker.js +0 -159
  327. package/dist/tracking/ledger-archiver.js +0 -94
  328. package/dist/tracking/ledger-chains.js +0 -245
  329. package/dist/tracking/metrics-store.js +0 -361
  330. package/dist/tracking/native-watcher.js +0 -131
  331. package/dist/tracking/offline-rewind.js +0 -295
  332. package/dist/tracking/pending-violations.js +0 -74
  333. package/dist/tracking/persistence-effectiveness.js +0 -167
  334. package/dist/tracking/prompt-durability.js +0 -202
  335. package/dist/tracking/quality-signals.js +0 -213
  336. package/dist/tracking/redactor.js +0 -73
  337. package/dist/tracking/rewind-engine.js +0 -161
  338. package/dist/tracking/session-history.js +0 -128
  339. package/dist/tracking/session-receipt.js +0 -88
  340. package/dist/tracking/session-summary-writer.js +0 -157
  341. package/dist/tracking/shadow-ledger.js +0 -321
  342. package/dist/tracking/stash-manager.js +0 -258
  343. package/dist/tracking/timeline-fork.js +0 -213
  344. package/dist/tracking/timeline.js +0 -69
  345. package/dist/tracking/token-flow.js +0 -276
  346. package/dist/tracking/turn-segmenter.js +0 -122
  347. package/dist/tracking/weekly-accumulator.js +0 -179
  348. package/dist/tracking/working-snapshots.js +0 -188
  349. package/dist/tracking/workspace-manifest.js +0 -176
  350. package/dist/transport/http.js +0 -102
  351. package/dist/utils/counterfactual.js +0 -65
  352. package/dist/utils/deep-link.js +0 -34
  353. package/dist/utils/detect.js +0 -193
  354. package/dist/utils/exec.js +0 -73
  355. package/dist/utils/file-logger.js +0 -87
  356. package/dist/utils/format-error.js +0 -29
  357. package/dist/utils/git.js +0 -181
  358. package/dist/utils/log.js +0 -57
  359. package/dist/utils/logger.js +0 -35
  360. package/dist/utils/mcp-content-json.js +0 -8
  361. package/dist/utils/session-logger.js +0 -154
  362. package/dist/utils/startup-log.js +0 -512
  363. package/dist/utils/ui.js +0 -56
@@ -1,1575 +0,0 @@
1
- /**
2
- * Local Indexing Pipeline — Sprint L2.1
3
- *
4
- * Walks the project directory, extracts entities and edges using tree-sitter
5
- * AST analysis (with regex fallback), populates CozoDB, triggers community
6
- * detection, and builds the search index. Builds the graph locally
7
- * for Local Mode operation.
8
- *
9
- * Pipeline phases:
10
- * 1. Discover source files (respecting exclusion patterns)
11
- * 2. Extract entities per file (tree-sitter AST)
12
- * 3. Extract edges per file (imports, calls, extends, implements)
13
- * 4. Cross-file edge resolution (match import refs to entity keys)
14
- * 5. Compute derived fields (fan_in, fan_out, risk_level)
15
- * 6. Populate CozoDB (entities, edges, file_index)
16
- * 7. Community detection (Louvain via graphology)
17
- * 8. Build search index
18
- *
19
- * All logging to stderr. Never touches stdout.
20
- */
21
- import { readFileSync, readdirSync, statSync } from "node:fs";
22
- import { basename, extname, join, relative } from "node:path";
23
- import { formatUnknownError } from "../utils/format-error.js";
24
- import { entityKey, extractEdgesAsync, extractEntitiesAsync, } from "./ast-extractor.js";
25
- import { detectCascadedCommunities } from "./community-detection.js";
26
- import { computeCoChangeEdges } from "./indexer/git-cochange.js";
27
- import { enrichWithScip } from "./indexer/scip/orchestrator.js";
28
- import { isTestFile } from "./indexer/test-detector.js";
29
- import { detectLocalConventions } from "./local-convention-detector.js";
30
- import { generateLocalRules } from "./local-rule-generator.js";
31
- import { persistLocalSnapshot } from "./local-snapshot.js";
32
- import { buildSearchIndex, tokenize } from "./search-index.js";
33
- // ── Configuration ────────────────────────────────────────────────
34
- /** File extensions to index (must match ast-extractor detectLanguage). */
35
- const INDEXABLE_EXTENSIONS = new Set([
36
- ".ts",
37
- ".tsx",
38
- ".js",
39
- ".jsx",
40
- ".mjs",
41
- ".cjs",
42
- ".py",
43
- ".go",
44
- ".java",
45
- ".rs",
46
- ".c",
47
- ".h",
48
- ".cpp",
49
- ".cc",
50
- ".cxx",
51
- ".hpp",
52
- ]);
53
- /** Directories to skip during walk. */
54
- const EXCLUDED_DIRS = new Set([
55
- "node_modules",
56
- "dist",
57
- "build",
58
- "out",
59
- ".git",
60
- ".hg",
61
- ".svn",
62
- "coverage",
63
- "__pycache__",
64
- ".mypy_cache",
65
- ".pytest_cache",
66
- "vendor",
67
- "target",
68
- ".next",
69
- ".nuxt",
70
- ".output",
71
- ".unerr",
72
- ".cache",
73
- ".turbo",
74
- ".parcel-cache",
75
- ]);
76
- /**
77
- * Path-prefix exclusions (relative to projectRoot). Used for paths that should
78
- * be excluded only when they match a specific location, not as a bare basename.
79
- * Example: `.claude/worktrees/` (Claude Code agent sandboxes) — using bare
80
- * `worktrees` would over-match any user-named folder. Path-prefix is precise.
81
- */
82
- const EXCLUDED_PATH_PREFIXES = [
83
- ".claude/worktrees",
84
- ".cursor/worktrees",
85
- ".idea/worktrees",
86
- ];
87
- function isExcludedPath(relPath) {
88
- for (const prefix of EXCLUDED_PATH_PREFIXES) {
89
- if (relPath === prefix || relPath.startsWith(`${prefix}/`))
90
- return true;
91
- }
92
- return false;
93
- }
94
- /** Maximum file size to index (1MB). */
95
- const MAX_FILE_SIZE = 1_048_576;
96
- /** Document/config file extensions — discoverable via search but no code entity extraction. */
97
- const DOCUMENT_EXTENSIONS = new Set([
98
- ".md",
99
- ".mdx",
100
- ".txt",
101
- ".rst",
102
- ".adoc",
103
- ".org",
104
- ".yaml",
105
- ".yml",
106
- ".toml",
107
- ".json",
108
- ".xml",
109
- ".tf",
110
- ".tfvars",
111
- ".hcl",
112
- ".proto",
113
- ".graphql",
114
- ".gql",
115
- ".sql",
116
- ".sh",
117
- ".bash",
118
- ".zsh",
119
- ".html",
120
- ".css",
121
- ".scss",
122
- ".sass",
123
- ".less",
124
- ".vue",
125
- ".svelte",
126
- ".astro",
127
- ".j2",
128
- ".jinja2",
129
- ".tmpl",
130
- ".hbs",
131
- ".ejs",
132
- ".pug",
133
- ".prisma",
134
- ".avsc",
135
- ".thrift",
136
- ".smithy",
137
- ".cmake",
138
- ".bazel",
139
- ".bzl",
140
- ".mk",
141
- ".csv",
142
- ".tsv",
143
- ".ini",
144
- ".cfg",
145
- ".conf",
146
- ".properties",
147
- ".env.example",
148
- ".editorconfig",
149
- ".dockerfile",
150
- ".tex",
151
- ".latex",
152
- ".eslintrc",
153
- ".prettierrc",
154
- ".stylelintrc",
155
- ".pylintrc",
156
- ".flake8",
157
- ]);
158
- /** Well-known config/build files without standard extensions. */
159
- const DOCUMENT_NAMES = new Set([
160
- "Dockerfile",
161
- "Makefile",
162
- "Procfile",
163
- "Vagrantfile",
164
- "Jenkinsfile",
165
- "Gemfile",
166
- "Rakefile",
167
- ".gitignore",
168
- ".dockerignore",
169
- ".prettierrc",
170
- ".editorconfig",
171
- ".eslintrc",
172
- ".stylelintrc",
173
- "biome.json",
174
- "tslint.json",
175
- ".gitlab-ci.yml",
176
- ]);
177
- /** CI/CD directories that should be walked despite starting with ".". */
178
- const CI_CD_DIRS = new Set([".github", ".circleci"]);
179
- /** Max bytes to read from a document file for token extraction. */
180
- const DOC_READ_LIMIT = 8192;
181
- /** stderr logger */
182
- const log = {
183
- info: (msg) => process.stderr.write(`[unerr] ${msg}\n`),
184
- verbose: (msg, verbose) => {
185
- if (verbose)
186
- process.stderr.write(`[unerr] ${msg}\n`);
187
- },
188
- };
189
- // ── Main Pipeline ────────────────────────────────────────────────
190
- /**
191
- * Index a local project: walk files, extract entities + edges, populate CozoDB.
192
- *
193
- * @param projectRoot - Absolute path to the project root (where .git lives)
194
- * @param graphStore - CozoGraphStore instance (schema already initialized)
195
- * @param repoId - Repository ID for entity key generation
196
- * @param opts - Optional verbose/callback settings
197
- */
198
- export async function indexLocalProject(projectRoot, graphStore, repoId, opts) {
199
- const startTime = Date.now();
200
- const progress = opts?.onProgress;
201
- // Phase 0: Mark reindex start. We DON'T clear upfront — queries remain valid against
202
- // stale-but-present data during the ~8s reindex window. After populate completes,
203
- // Phase 6.1 removes orphaned entities not seen in this index run.
204
- const indexedEntityKeys = new Set();
205
- // Phase 1: Discover source files
206
- progress?.({
207
- processed: 0,
208
- total: 0,
209
- phase: "discovering",
210
- currentFile: null,
211
- });
212
- const files = discoverSourceFiles(projectRoot);
213
- log.info(`Indexing project... (${files.length} files found)`);
214
- // Phase 2+3: Extract entities and edges per file
215
- const allEntities = [];
216
- const allRawEdges = [];
217
- const fileEntityMap = new Map();
218
- let filesProcessed = 0;
219
- for (const absPath of files) {
220
- const relPath = relative(projectRoot, absPath);
221
- progress?.({
222
- processed: filesProcessed,
223
- total: files.length,
224
- phase: "extracting",
225
- currentFile: relPath,
226
- });
227
- let content;
228
- try {
229
- content = readFileSync(absPath, "utf-8");
230
- }
231
- catch {
232
- filesProcessed++;
233
- continue;
234
- }
235
- // Extract entities
236
- const entities = await extractEntitiesAsync(content, relPath);
237
- fileEntityMap.set(relPath, entities);
238
- // Convert to CompactEntity with entity keys.
239
- // is_test now comes exclusively from the AST extractor — TS/JS test-
240
- // primitive callbacks (it/describe/test/...) are extracted as entities
241
- // with is_test=true, Rust cfg(test) blocks set it via plugin. Top-level
242
- // helpers (seedEntities, MockEntity, createTestDb) in test files stay
243
- // is_test=false. The old fileIsTest fallback was lossy — it conflated
244
- // "lives in a test file" with "is a test", polluting test-coverage
245
- // results with fixtures/helpers.
246
- for (const entity of entities) {
247
- const key = entityKey(repoId, relPath, entity.kind, entity.name, entity.signature);
248
- allEntities.push({
249
- key,
250
- kind: entity.kind,
251
- name: entity.name,
252
- file_path: relPath,
253
- start_line: entity.line_start,
254
- end_line: entity.line_end,
255
- signature: entity.signature,
256
- body: "", // Skip body storage for indexing performance
257
- fan_in: 0,
258
- fan_out: 0,
259
- risk_level: "normal",
260
- community: -1,
261
- is_test: entity.is_test ?? false,
262
- parent_class: entity.parent_class,
263
- });
264
- }
265
- // Extract edges (tag with source file for cross-file resolution)
266
- const edges = await extractEdgesAsync(content, relPath, entities);
267
- for (const edge of edges) {
268
- allRawEdges.push({ ...edge, source_file: relPath });
269
- }
270
- filesProcessed++;
271
- log.verbose(`${relPath}: ${entities.length} entities, ${edges.length} edges`, opts?.verbose);
272
- opts?.onFileIndexed?.(relPath, entities.length, edges.length);
273
- }
274
- // Phase 4: Cross-file edge resolution
275
- progress?.({
276
- processed: filesProcessed,
277
- total: files.length,
278
- phase: "resolving",
279
- currentFile: null,
280
- });
281
- const entityByName = buildEntityNameIndex(allEntities);
282
- const { edges: resolvedEdges, fileImportEdges } = resolveEdges(allRawEdges, entityByName, allEntities, repoId, fileEntityMap);
283
- // Phase 4.5: SCIP enrichment (inline — adds cross-file edges tree-sitter missed)
284
- progress?.({
285
- processed: filesProcessed,
286
- total: files.length,
287
- phase: "scip",
288
- currentFile: null,
289
- });
290
- const relativeFiles = files.map((f) => relative(projectRoot, f));
291
- const scipResult = await enrichWithScip(relativeFiles, projectRoot, resolvedEdges.map((e) => ({
292
- from_key: e.from_key,
293
- to_key: e.to_key,
294
- type: e.type,
295
- file_path: "",
296
- line: 0,
297
- })), allEntities.map((e) => ({
298
- key: e.key,
299
- name: e.name,
300
- file_path: e.file_path,
301
- })));
302
- if (scipResult.mergeResult) {
303
- log.info(`SCIP: ${scipResult.mergeResult.edgesUpgraded} edges verified, ${scipResult.mergeResult.newEdgesFromScip} new edges added (${scipResult.language})`);
304
- }
305
- // Phase 5: Compute fan_in, fan_out, risk_level
306
- computeDerivedFields(allEntities, resolvedEdges);
307
- // Phase 5.1: R.11 — Create "tests" edges (test entity → source entity it exercises)
308
- const testEdges = resolveTestEdges(allEntities, resolvedEdges, fileImportEdges);
309
- if (testEdges.length > 0) {
310
- resolvedEdges.push(...testEdges);
311
- log.info(`Test graph: ${testEdges.length} test→source edges created`);
312
- }
313
- // Phase 5.5: R.4 — Compute file→file co-change edges from git history
314
- const coChangeEdges = computeCoChangeEdges(projectRoot);
315
- const coChangeCompactEdges = coChangeEdges.map((e) => ({
316
- from_key: `file:${e.from_file}`,
317
- to_key: `file:${e.to_file}`,
318
- type: "co_changes",
319
- }));
320
- // Phase 6: Populate CozoDB (includes R.1 file entities, R.2 contains edges)
321
- progress?.({
322
- processed: filesProcessed,
323
- total: files.length,
324
- phase: "populating",
325
- currentFile: null,
326
- });
327
- // R.3 + R.4: Combine code edges with file-level import + co-change edges
328
- const allEdges = [
329
- ...resolvedEdges,
330
- ...fileImportEdges,
331
- ...coChangeCompactEdges,
332
- ];
333
- await populateCozoDB(graphStore, allEntities, allEdges);
334
- // Phase 6.1: Index document files for search discoverability
335
- progress?.({
336
- processed: filesProcessed,
337
- total: files.length,
338
- phase: "documents",
339
- currentFile: null,
340
- });
341
- const docKeys = await indexDocumentFiles(projectRoot, graphStore);
342
- // Phase 6.2: Remove orphaned entities not present in this index run.
343
- // This handles deleted files/entities — the graph was NOT cleared upfront (to remain
344
- // queryable during reindex), so stale entries must be pruned after fresh data is in place.
345
- for (const e of allEntities)
346
- indexedEntityKeys.add(e.key);
347
- // Also include file-level entities
348
- for (const e of allEntities) {
349
- if (e.file_path)
350
- indexedEntityKeys.add(`file:${e.file_path}`);
351
- }
352
- // Include doc entities so they aren't pruned as orphans
353
- for (const dk of docKeys)
354
- indexedEntityKeys.add(dk);
355
- await removeOrphanedEntities(graphStore, indexedEntityKeys);
356
- // Phase 6.3: Clear stale drift overlay/edges. The baseline `entities` and `edges`
357
- // relations have just been refreshed, so any pre-existing drift_overlay /
358
- // drift_edges rows describe deltas against the OLD baseline and would
359
- // incorrectly mark freshly-indexed entities as modified (issue #8).
360
- await graphStore.clearDriftOverlay();
361
- // Phase 6.5: Materialize L1 edges (file→file, class→class weighted aggregates)
362
- await materializeL1Edges(graphStore);
363
- // Phase 7: Community detection (handled inside CozoGraphStore)
364
- progress?.({
365
- processed: filesProcessed,
366
- total: files.length,
367
- phase: "communities",
368
- currentFile: null,
369
- });
370
- const communityCount = await runCommunityDetection(graphStore);
371
- // Phase 8: Convention detection + rule generation (L6)
372
- progress?.({
373
- processed: filesProcessed,
374
- total: files.length,
375
- phase: "conventions",
376
- currentFile: null,
377
- });
378
- const { patternCount, ruleCount } = await runConventionDetection(graphStore, repoId);
379
- // Phase 9: Build search index
380
- progress?.({
381
- processed: filesProcessed,
382
- total: files.length,
383
- phase: "search",
384
- currentFile: null,
385
- });
386
- await buildSearchIndex(graphStore.db);
387
- // Phase 10: Persist local snapshot for fast subsequent boots
388
- progress?.({
389
- processed: filesProcessed,
390
- total: files.length,
391
- phase: "snapshot",
392
- currentFile: null,
393
- });
394
- await persistLocalSnapshot(projectRoot, repoId, allEntities, resolvedEdges);
395
- const elapsedMs = Date.now() - startTime;
396
- log.info(`Indexed ${files.length} files → ${allEntities.length} entities, ${resolvedEdges.length} edges, ${docKeys.length} docs in ${elapsedMs}ms`);
397
- return {
398
- fileCount: files.length,
399
- entityCount: allEntities.length,
400
- edgeCount: resolvedEdges.length,
401
- communityCount,
402
- patternCount,
403
- ruleCount,
404
- docCount: docKeys.length,
405
- elapsedMs,
406
- scip: scipResult.mergeResult
407
- ? {
408
- language: scipResult.language,
409
- edgesVerified: scipResult.mergeResult.edgesUpgraded,
410
- newEdges: scipResult.mergeResult.newEdgesFromScip,
411
- durationMs: scipResult.runResult?.durationMs ?? 0,
412
- }
413
- : undefined,
414
- };
415
- }
416
- /**
417
- * Re-index a single file incrementally (Sprint L2.5).
418
- * Removes old entities for the file, extracts new ones, upserts into CozoDB.
419
- */
420
- export async function reindexFile(projectRoot, filePath, graphStore, repoId) {
421
- const relPath = filePath.startsWith("/")
422
- ? relative(projectRoot, filePath)
423
- : filePath;
424
- const absPath = filePath.startsWith("/")
425
- ? filePath
426
- : join(projectRoot, filePath);
427
- // Remove old entities for this file
428
- await removeFileEntities(graphStore, relPath);
429
- // Read and extract
430
- let content;
431
- try {
432
- content = readFileSync(absPath, "utf-8");
433
- }
434
- catch {
435
- return { entities: 0, edges: 0 };
436
- }
437
- const entities = await extractEntitiesAsync(content, relPath);
438
- // is_test now comes from the AST extractor (test-primitive callbacks) only.
439
- // Top-level helpers in test files are no longer auto-flagged. See the main
440
- // batch indexer for full rationale.
441
- const compactEntities = entities.map((e) => ({
442
- key: entityKey(repoId, relPath, e.kind, e.name, e.signature),
443
- kind: e.kind,
444
- name: e.name,
445
- file_path: relPath,
446
- start_line: e.line_start,
447
- end_line: e.line_end,
448
- signature: e.signature,
449
- body: "",
450
- fan_in: 0,
451
- fan_out: 0,
452
- risk_level: "normal",
453
- community: -1,
454
- is_test: e.is_test ?? false,
455
- parent_class: e.parent_class,
456
- }));
457
- const rawEdges = await extractEdgesAsync(content, relPath, entities);
458
- // Simple name-based edge resolution for single-file re-index
459
- const resolvedEdges = [];
460
- for (const edge of rawEdges) {
461
- const fromKey = resolveEntityName(edge.from_name, relPath, repoId, entities);
462
- const toKey = await resolveEntityNameGlobal(edge.to_name, graphStore);
463
- if (fromKey && toKey) {
464
- resolvedEdges.push({ from_key: fromKey, to_key: toKey, type: edge.type });
465
- }
466
- }
467
- // Insert new entities and edges
468
- for (const entity of compactEntities) {
469
- await insertEntity(graphStore, entity);
470
- }
471
- for (const edge of resolvedEdges) {
472
- await insertEdge(graphStore, edge);
473
- }
474
- // Rebuild search index for changed entities
475
- await buildSearchIndex(graphStore.db);
476
- return { entities: compactEntities.length, edges: resolvedEdges.length };
477
- }
478
- // ── Phase 1: File Discovery ──────────────────────────────────────
479
- /** Walk project directory and collect indexable source files. */
480
- export function discoverSourceFiles(projectRoot) {
481
- const files = [];
482
- walkDir(projectRoot, files, projectRoot);
483
- return files;
484
- }
485
- function walkDir(dir, files, projectRoot) {
486
- let entries;
487
- try {
488
- entries = readdirSync(dir);
489
- }
490
- catch {
491
- return;
492
- }
493
- for (const entry of entries) {
494
- if (EXCLUDED_DIRS.has(entry))
495
- continue;
496
- if (entry.startsWith(".") && entry !== ".")
497
- continue;
498
- const fullPath = join(dir, entry);
499
- // Path-aware exclusion (e.g. `.claude/worktrees/`) — defense-in-depth even
500
- // when a parent dir might pass dot-skip via a CI/CD whitelist exception.
501
- const relPath = relative(projectRoot, fullPath);
502
- if (isExcludedPath(relPath))
503
- continue;
504
- let stat;
505
- try {
506
- stat = statSync(fullPath);
507
- }
508
- catch {
509
- continue;
510
- }
511
- if (stat.isDirectory()) {
512
- walkDir(fullPath, files, projectRoot);
513
- }
514
- else if (stat.isFile()) {
515
- const ext = extname(entry).toLowerCase();
516
- if (INDEXABLE_EXTENSIONS.has(ext) && stat.size <= MAX_FILE_SIZE) {
517
- files.push(fullPath);
518
- }
519
- }
520
- }
521
- }
522
- // ── Phase 4: Cross-File Edge Resolution ──────────────────────────
523
- /** Build a name → entity key index for cross-file resolution. */
524
- function buildEntityNameIndex(entities) {
525
- const index = new Map();
526
- for (const entity of entities) {
527
- const baseName = entity.name.split(".").pop() ?? entity.name;
528
- const list = index.get(baseName) ?? [];
529
- list.push(entity);
530
- index.set(baseName, list);
531
- // Also index by full name
532
- if (entity.name !== baseName) {
533
- const fullList = index.get(entity.name) ?? [];
534
- fullList.push(entity);
535
- index.set(entity.name, fullList);
536
- }
537
- }
538
- return index;
539
- }
540
- function resolveEdges(rawEdges, entityByName, allEntities, repoId, fileEntityMap) {
541
- const resolved = [];
542
- const seen = new Set();
543
- // R.3: Track file→file import pairs for deduplication
544
- const fileImportPairs = new Set();
545
- // Build file path index for import source resolution (all languages)
546
- const allFilePaths = new Set();
547
- for (const entity of allEntities) {
548
- if (entity.file_path)
549
- allFilePaths.add(entity.file_path);
550
- }
551
- for (const edge of rawEdges) {
552
- // R.3 (all languages): file-level import edges → resolve to file→file
553
- if (edge.from_name === "__file__" &&
554
- edge.type === "imports" &&
555
- edge.import_source) {
556
- const sourceFile = edge.source_file;
557
- const targetFile = resolveImportSourceToFile(edge.import_source, sourceFile, allFilePaths);
558
- if (targetFile && targetFile !== sourceFile) {
559
- fileImportPairs.add(`${sourceFile}\0${targetFile}`);
560
- }
561
- continue;
562
- }
563
- // Resolve target by name
564
- const targets = entityByName.get(edge.to_name);
565
- if (!targets || targets.length === 0)
566
- continue;
567
- // Pick best target (prefer same import_source path, else first match)
568
- const target = targets.length === 1
569
- ? targets[0]
570
- : (pickBestTarget(targets, edge.import_source) ?? targets[0]);
571
- // Resolve source
572
- if (edge.from_name === "__file__") {
573
- // For test files, file-scope calls (inside describe/it) should still create
574
- // edges from the file module entity — this enables "tests" edge resolution
575
- if (edge.type === "calls" && isTestFile(edge.source_file)) {
576
- const fromKey = `file:${edge.source_file}`;
577
- const edgeKey = `${fromKey}:${target.key}:${edge.type}`;
578
- if (seen.has(edgeKey))
579
- continue;
580
- seen.add(edgeKey);
581
- resolved.push({
582
- from_key: fromKey,
583
- to_key: target.key,
584
- type: edge.type,
585
- });
586
- // Also record file→file import pair for the file graph
587
- const targetFile = target.file_path;
588
- if (targetFile && targetFile !== edge.source_file) {
589
- fileImportPairs.add(`${edge.source_file}\0${targetFile}`);
590
- }
591
- }
592
- continue;
593
- }
594
- const sources = entityByName.get(edge.from_name);
595
- if (!sources || sources.length === 0)
596
- continue;
597
- // Prefer same-file match, then non-class over class (method call should resolve to method, not class)
598
- const from = sources.length === 1
599
- ? sources[0]
600
- : (sources.find((s) => s.file_path === edge.source_file && s.kind !== "class") ??
601
- sources.find((s) => s.file_path === edge.source_file) ??
602
- sources.find((s) => s.kind !== "class") ??
603
- sources[0]);
604
- const edgeKey = `${from.key}:${target.key}:${edge.type}`;
605
- if (seen.has(edgeKey))
606
- continue;
607
- seen.add(edgeKey);
608
- resolved.push({
609
- from_key: from.key,
610
- to_key: target.key,
611
- type: edge.type,
612
- });
613
- // R.3: If this is a cross-file edge, record a file→file import
614
- const sourceFile = from.file_path;
615
- const targetFile = target.file_path;
616
- if (sourceFile && targetFile && sourceFile !== targetFile) {
617
- fileImportPairs.add(`${sourceFile}\0${targetFile}`);
618
- }
619
- }
620
- // R.3: Build deduplicated file→file import edges
621
- const fileImportEdges = Array.from(fileImportPairs, (pair) => {
622
- const [from, to] = pair.split("\0");
623
- return { from_key: `file:${from}`, to_key: `file:${to}`, type: "imports" };
624
- });
625
- return { edges: resolved, fileImportEdges };
626
- }
627
- /**
628
- * Resolve a language-native import source to an actual project file path.
629
- *
630
- * Supports all languages:
631
- * - TS/JS: "./module" or "../utils/helper" → relative path resolution
632
- * - Python: "os.path" or ".models" → dotted module → slash path
633
- * - Go: "github.com/user/repo/pkg" → match last segments against project files
634
- * - Java: "com.example.Foo" → dot→slash path
635
- * - Rust: "crate::module::sub" → "src/module/sub.rs" or "src/module/sub/mod.rs"
636
- * - Ruby: "path/to/file" → direct path match
637
- * - C#: "System.Collections.Generic" → dot→slash path
638
- *
639
- * Returns the matched file path (relative), or null if unresolvable (external dep).
640
- * @internal Exported for testing.
641
- */
642
- export function resolveImportSourceToFile(importSource, sourceFile, projectFiles) {
643
- // Skip empty or clearly external imports
644
- if (!importSource || importSource.length === 0)
645
- return null;
646
- // ── TS/JS relative imports ──
647
- if (importSource.startsWith(".") || importSource.startsWith("/")) {
648
- return resolveRelativeImport(importSource, sourceFile, projectFiles);
649
- }
650
- // ── Python dotted imports (relative with leading dots) ──
651
- if (importSource.startsWith("..")) {
652
- // Relative Python import: "..models" → go up directories
653
- const dots = importSource.match(/^(\.+)/)?.[1]?.length ?? 0;
654
- const remainder = importSource.slice(dots).replace(/\./g, "/");
655
- const parts = sourceFile.split("/");
656
- const base = parts.slice(0, -dots).join("/");
657
- return matchFileCandidates(base ? `${base}/${remainder}` : remainder, projectFiles, ["py"]);
658
- }
659
- // ── Language detection based on source file extension ──
660
- const ext = extname(sourceFile).toLowerCase();
661
- switch (ext) {
662
- case ".ts":
663
- case ".tsx":
664
- case ".js":
665
- case ".jsx":
666
- case ".mjs":
667
- case ".cjs":
668
- // Bare specifier (npm package) — skip external
669
- return null;
670
- case ".py": {
671
- // Python: dotted.module.path → slash path
672
- const pyPath = importSource.replace(/\./g, "/");
673
- return matchFileCandidates(pyPath, projectFiles, ["py"]);
674
- }
675
- case ".go": {
676
- // Go: full package path — match trailing segments against project files
677
- // Internal imports contain the module path; match by last 1-3 segments
678
- const segments = importSource.split("/");
679
- for (let i = Math.max(0, segments.length - 3); i < segments.length; i++) {
680
- const suffix = segments.slice(i).join("/");
681
- // Go packages are directories — look for any .go file in that dir
682
- for (const fp of projectFiles) {
683
- if (fp.endsWith(".go") && fp.includes(`${suffix}/`)) {
684
- // Return the directory's first file as representative
685
- return fp;
686
- }
687
- // Also match if it's the directory itself with a file inside
688
- const dir = fp.substring(0, fp.lastIndexOf("/"));
689
- if (dir.endsWith(suffix))
690
- return fp;
691
- }
692
- }
693
- return null;
694
- }
695
- case ".java": {
696
- // Java: com.example.Foo → com/example/Foo.java
697
- const javaPath = importSource.replace(/\./g, "/");
698
- return matchFileCandidates(javaPath, projectFiles, ["java"]);
699
- }
700
- case ".rs": {
701
- // Rust: crate::module::sub → src/module/sub.rs or src/module/sub/mod.rs
702
- const rustPath = importSource
703
- .replace(/^crate::/, "src/")
704
- .replace(/^self::/, sourceFile.replace(/\/[^/]+$/, "/"))
705
- .replace(/^super::/, sourceFile.replace(/\/[^/]+\/[^/]+$/, "/"))
706
- .replace(/::/g, "/");
707
- return matchFileCandidates(rustPath, projectFiles, ["rs"]);
708
- }
709
- case ".rb": {
710
- // Ruby: require paths are often relative file paths
711
- return matchFileCandidates(importSource, projectFiles, ["rb"]);
712
- }
713
- case ".cs": {
714
- // C#: namespace.Type → namespace/Type.cs
715
- const csPath = importSource.replace(/\./g, "/");
716
- return matchFileCandidates(csPath, projectFiles, ["cs"]);
717
- }
718
- default:
719
- return null;
720
- }
721
- }
722
- /** Resolve TS/JS relative import to a project file. */
723
- function resolveRelativeImport(importSource, sourceFile, projectFiles) {
724
- const sourceDir = sourceFile.substring(0, sourceFile.lastIndexOf("/"));
725
- const candidates = [];
726
- // Normalize the relative path
727
- const parts = `${sourceDir}/${importSource.replace(/^\.\//, "")}`.split("/");
728
- const resolved = [];
729
- for (const p of parts) {
730
- if (p === "..")
731
- resolved.pop();
732
- else if (p !== "." && p !== "")
733
- resolved.push(p);
734
- }
735
- const base = resolved.join("/");
736
- // NodeNext: .js in source maps to .ts on disk — strip known extensions first
737
- const jsExtMatch = base.match(/\.(js|jsx|mjs|cjs)$/);
738
- const baseNoExt = jsExtMatch ? base.slice(0, -jsExtMatch[0].length) : base;
739
- // Try with extensions (prefer stripped base for NodeNext .js→.ts mapping)
740
- const tsExts = ["ts", "tsx", "js", "jsx", "mjs", "cjs"];
741
- if (jsExtMatch) {
742
- // .js import → try .ts, .tsx first
743
- for (const e of tsExts) {
744
- candidates.push(`${baseNoExt}.${e}`);
745
- }
746
- // Also try as directory index
747
- for (const e of tsExts) {
748
- candidates.push(`${baseNoExt}/index.${e}`);
749
- }
750
- }
751
- for (const e of tsExts) {
752
- candidates.push(`${base}.${e}`);
753
- }
754
- // Try as directory index
755
- for (const e of tsExts) {
756
- candidates.push(`${base}/index.${e}`);
757
- }
758
- // Already has extension
759
- candidates.push(base);
760
- for (const c of candidates) {
761
- if (projectFiles.has(c))
762
- return c;
763
- }
764
- return null;
765
- }
766
- /**
767
- * Match a resolved path (without extension) against project files.
768
- * Tries exact match, then with common extensions, then as directory index.
769
- */
770
- function matchFileCandidates(basePath, projectFiles, extensions) {
771
- if (!basePath)
772
- return null;
773
- // Direct match (already has extension or is exact)
774
- if (projectFiles.has(basePath))
775
- return basePath;
776
- // With extension
777
- for (const ext of extensions) {
778
- const withExt = `${basePath}.${ext}`;
779
- if (projectFiles.has(withExt))
780
- return withExt;
781
- }
782
- // As directory with index/mod file
783
- const indexNames = extensions.includes("py")
784
- ? ["__init__"]
785
- : extensions.includes("rs")
786
- ? ["mod"]
787
- : ["index"];
788
- for (const idx of indexNames) {
789
- for (const ext of extensions) {
790
- const asDir = `${basePath}/${idx}.${ext}`;
791
- if (projectFiles.has(asDir))
792
- return asDir;
793
- }
794
- }
795
- // Suffix match: import might not include full path prefix
796
- // e.g., "models.user" → "app/models/user.py"
797
- for (const ext of extensions) {
798
- const suffix = `/${basePath}.${ext}`;
799
- for (const fp of projectFiles) {
800
- if (fp.endsWith(suffix))
801
- return fp;
802
- }
803
- }
804
- // Suffix match for directory index (e.g., "models" → "app/models/__init__.py")
805
- for (const idx of indexNames) {
806
- for (const ext of extensions) {
807
- const dirSuffix = `/${basePath}/${idx}.${ext}`;
808
- for (const fp of projectFiles) {
809
- if (fp.endsWith(dirSuffix))
810
- return fp;
811
- }
812
- }
813
- }
814
- return null;
815
- }
816
- /** Pick the best matching entity when multiple share the same name. */
817
- function pickBestTarget(targets, importSource) {
818
- if (!importSource || targets.length <= 1)
819
- return targets[0] ?? null;
820
- // Prefer target whose file_path contains the import source path
821
- const normalized = importSource
822
- .replace(/^\.\//, "")
823
- .replace(/\.(ts|js|tsx|jsx|mjs|cjs)$/, "");
824
- for (const t of targets) {
825
- const tPath = t.file_path.replace(/\.(ts|js|tsx|jsx|mjs|cjs)$/, "");
826
- if (tPath.endsWith(normalized) || tPath.includes(normalized)) {
827
- return t;
828
- }
829
- }
830
- return targets[0] ?? null;
831
- }
832
- // ── Phase 5.1: Test Edge Resolution ─────────────────────────────
833
- /**
834
- * Create "tests" edges connecting test entities to the source entities they exercise.
835
- *
836
- * Strategy: For each test entity that has outbound "calls" edges to non-test entities,
837
- * create a "tests" edge to each called source entity in the same file stem (subject file).
838
- * Falls back to all called non-test entities if no subject file match.
839
- */
840
- function resolveTestEdges(entities, edges, fileImportEdges = []) {
841
- const testEdges = [];
842
- const entityMap = new Map();
843
- for (const e of entities)
844
- entityMap.set(e.key, e);
845
- // Build set of project files for subject resolution
846
- const projectFiles = new Set();
847
- for (const e of entities) {
848
- if (e.file_path)
849
- projectFiles.add(e.file_path);
850
- }
851
- // Group test entities by file
852
- const testEntities = entities.filter((e) => e.is_test);
853
- if (testEntities.length === 0)
854
- return testEdges;
855
- // Build outbound calls index: source_key → Set<target_key>
856
- const callsFrom = new Map();
857
- for (const edge of edges) {
858
- if (edge.type === "calls") {
859
- let targets = callsFrom.get(edge.from_key);
860
- if (!targets) {
861
- targets = new Set();
862
- callsFrom.set(edge.from_key, targets);
863
- }
864
- targets.add(edge.to_key);
865
- }
866
- }
867
- // Build file→file import index from file-level import edges (file:A → file:B)
868
- const fileImportsFrom = new Map(); // file → Set<imported file>
869
- for (const edge of fileImportEdges) {
870
- if (edge.type === "imports" && edge.from_key.startsWith("file:")) {
871
- const fromFile = edge.from_key.slice(5); // strip "file:" prefix
872
- const toFile = edge.to_key.startsWith("file:")
873
- ? edge.to_key.slice(5)
874
- : edge.to_key;
875
- let targets = fileImportsFrom.get(fromFile);
876
- if (!targets) {
877
- targets = new Set();
878
- fileImportsFrom.set(fromFile, targets);
879
- }
880
- targets.add(toFile);
881
- }
882
- }
883
- // Build file → non-test entities index for import-based fallback
884
- const sourceEntitiesByFile = new Map();
885
- for (const e of entities) {
886
- if (e.is_test || !e.file_path)
887
- continue;
888
- const list = sourceEntitiesByFile.get(e.file_path) ?? [];
889
- list.push(e);
890
- sourceEntitiesByFile.set(e.file_path, list);
891
- }
892
- // Group test entities by file for fallback resolution
893
- const testEntitiesByFile = new Map();
894
- for (const te of testEntities) {
895
- if (!te.file_path)
896
- continue;
897
- const list = testEntitiesByFile.get(te.file_path) ?? [];
898
- list.push(te);
899
- testEntitiesByFile.set(te.file_path, list);
900
- }
901
- // For each test entity, find source entities it calls
902
- const seen = new Set();
903
- const filesWithCallEdges = new Set(); // track which test files got call-based edges
904
- for (const testEntity of testEntities) {
905
- const targets = callsFrom.get(testEntity.key);
906
- if (!targets)
907
- continue;
908
- for (const targetKey of targets) {
909
- const target = entityMap.get(targetKey);
910
- // Only create tests edge to non-test entities
911
- if (!target || target.is_test)
912
- continue;
913
- const edgeKey = `${testEntity.key}→${targetKey}`;
914
- if (seen.has(edgeKey))
915
- continue;
916
- seen.add(edgeKey);
917
- testEdges.push({
918
- from_key: testEntity.key,
919
- to_key: targetKey,
920
- type: "tests",
921
- });
922
- if (testEntity.file_path)
923
- filesWithCallEdges.add(testEntity.file_path);
924
- }
925
- }
926
- // Also check file-module-level calls (file:<path> → target) for test files.
927
- // These come from file-scope calls (inside describe/it blocks) that were resolved
928
- // with the file module entity as source.
929
- for (const [testFile, testEnts] of testEntitiesByFile) {
930
- if (filesWithCallEdges.has(testFile))
931
- continue; // already resolved via entity-level calls
932
- const fileModuleKey = `file:${testFile}`;
933
- const fileTargets = callsFrom.get(fileModuleKey);
934
- if (!fileTargets)
935
- continue;
936
- const representative = testEnts[0];
937
- if (!representative)
938
- continue;
939
- for (const targetKey of fileTargets) {
940
- const target = entityMap.get(targetKey);
941
- if (!target || target.is_test)
942
- continue;
943
- const edgeKey = `${representative.key}→${targetKey}`;
944
- if (seen.has(edgeKey))
945
- continue;
946
- seen.add(edgeKey);
947
- testEdges.push({
948
- from_key: representative.key,
949
- to_key: targetKey,
950
- type: "tests",
951
- });
952
- filesWithCallEdges.add(testFile);
953
- }
954
- }
955
- // Fallback: for test files with NO call-based "tests" edges, use file imports
956
- // to associate them with the source entities they import from
957
- for (const [testFile, testEnts] of testEntitiesByFile) {
958
- if (filesWithCallEdges.has(testFile))
959
- continue; // already has call-based edges
960
- const importedFiles = fileImportsFrom.get(testFile);
961
- if (!importedFiles)
962
- continue;
963
- // Pick a representative test entity (first one, e.g. a helper function or the describe block)
964
- const representative = testEnts[0];
965
- if (!representative)
966
- continue;
967
- for (const importedFile of importedFiles) {
968
- const sourceEnts = sourceEntitiesByFile.get(importedFile);
969
- if (!sourceEnts)
970
- continue;
971
- // Create "tests" edges to top-level entities in the imported source file
972
- // (filter to classes/functions — skip internal types/interfaces for cleaner graph)
973
- for (const srcEnt of sourceEnts) {
974
- if (srcEnt.kind === "type" || srcEnt.kind === "interface")
975
- continue;
976
- const edgeKey = `${representative.key}→${srcEnt.key}`;
977
- if (seen.has(edgeKey))
978
- continue;
979
- seen.add(edgeKey);
980
- testEdges.push({
981
- from_key: representative.key,
982
- to_key: srcEnt.key,
983
- type: "tests",
984
- });
985
- }
986
- }
987
- }
988
- return testEdges;
989
- }
990
- // ── Phase 5: Derived Fields ──────────────────────────────────────
991
- /** Compute fan_in, fan_out, and risk_level for all entities. */
992
- function computeDerivedFields(entities, edges) {
993
- const fanInMap = new Map();
994
- const fanOutMap = new Map();
995
- for (const edge of edges) {
996
- fanOutMap.set(edge.from_key, (fanOutMap.get(edge.from_key) ?? 0) + 1);
997
- fanInMap.set(edge.to_key, (fanInMap.get(edge.to_key) ?? 0) + 1);
998
- }
999
- for (const entity of entities) {
1000
- entity.fan_in = fanInMap.get(entity.key) ?? 0;
1001
- entity.fan_out = fanOutMap.get(entity.key) ?? 0;
1002
- entity.risk_level = computeRiskLevel(entity.fan_in, entity.fan_out);
1003
- }
1004
- }
1005
- /** Risk classification based on fan_in and fan_out. */
1006
- function computeRiskLevel(fanIn, fanOut) {
1007
- // High risk: many callers (chokepoint) or many callees (fragile hub)
1008
- if (fanIn >= 10 || fanOut >= 15)
1009
- return "high";
1010
- if (fanIn >= 5 || fanOut >= 8)
1011
- return "medium";
1012
- return "normal";
1013
- }
1014
- // ── Phase 6: CozoDB Population ───────────────────────────────────
1015
- /** Insert all entities and edges into CozoDB. */
1016
- async function populateCozoDB(graphStore, entities, edges) {
1017
- // R.1: Create file-level entities (file:<path> with kind="module")
1018
- const filePaths = new Set();
1019
- for (const e of entities) {
1020
- if (e.file_path)
1021
- filePaths.add(e.file_path);
1022
- }
1023
- for (const fp of filePaths) {
1024
- try {
1025
- await graphStore.db.run(`?[key, kind, name, file_path, start_line, end_line, signature, body, fan_in, fan_out, risk_level, is_test] <- [[$key, "module", $name, $fp, 0, 0, "", "", 0, 0, "normal", $is_test]]
1026
- :put entities { key => kind, name, file_path, start_line, end_line, signature, body, fan_in, fan_out, risk_level, is_test }`, { key: `file:${fp}`, name: basename(fp), fp, is_test: isTestFile(fp) });
1027
- }
1028
- catch (err) {
1029
- const msg = err instanceof Error ? err.message : JSON.stringify(err);
1030
- process.stderr.write(`[unerr] ⚠ File entity insert failed for ${fp}: ${msg}\n`);
1031
- }
1032
- }
1033
- // Batch insert code entities
1034
- for (const entity of entities) {
1035
- await insertEntity(graphStore, entity);
1036
- }
1037
- // R.2: Create contains edges (file → entity)
1038
- for (const entity of entities) {
1039
- if (!entity.file_path)
1040
- continue;
1041
- try {
1042
- await graphStore.db.run(`?[from_key, to_key, type, sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode] <- [[$from, $to, "contains", -1, "", "", false, "", 0, false, false, "", ""]]
1043
- :put edges { from_key, to_key, type => sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode }`, { from: `file:${entity.file_path}`, to: entity.key });
1044
- }
1045
- catch (err) {
1046
- process.stderr.write(`[unerr] ⚠ Contains edge insert failed: ${err instanceof Error ? err.message : String(err)}\n`);
1047
- }
1048
- }
1049
- // R.3b: Create class→method containment edges for entities with parent_class
1050
- for (const entity of entities) {
1051
- if (!entity.parent_class || !entity.file_path)
1052
- continue;
1053
- // Find the class entity key in the same file
1054
- const classEntity = entities.find((e) => e.kind === "class" &&
1055
- e.name === entity.parent_class &&
1056
- e.file_path === entity.file_path);
1057
- if (!classEntity)
1058
- continue;
1059
- try {
1060
- await graphStore.db.run(`?[from_key, to_key, type, sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode] <- [[$from, $to, "contains", -1, "", "", false, "", 0, false, false, "", ""]]
1061
- :put edges { from_key, to_key, type => sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode }`, { from: classEntity.key, to: entity.key });
1062
- }
1063
- catch (err) {
1064
- process.stderr.write(`[unerr] ⚠ Class→method edge insert failed: ${err instanceof Error ? err.message : String(err)}\n`);
1065
- }
1066
- }
1067
- // Batch insert code edges
1068
- for (const edge of edges) {
1069
- await insertEdge(graphStore, edge);
1070
- }
1071
- }
1072
- async function insertEntity(graphStore, entity) {
1073
- try {
1074
- await graphStore.db.run(`?[key, kind, name, file_path, start_line, end_line, signature, body, fan_in, fan_out, risk_level, is_test] <- [[$key, $kind, $name, $fp, $sl, $el, $sig, $body, $fi, $fo, $rl, $is_test]]
1075
- :put entities { key => kind, name, file_path, start_line, end_line, signature, body, fan_in, fan_out, risk_level, is_test }`, {
1076
- key: entity.key,
1077
- kind: entity.kind,
1078
- name: entity.name,
1079
- fp: entity.file_path,
1080
- sl: entity.start_line ?? 0,
1081
- el: entity.end_line ?? 0,
1082
- sig: entity.signature ?? "",
1083
- body: entity.body ?? "",
1084
- fi: entity.fan_in ?? 0,
1085
- fo: entity.fan_out ?? 0,
1086
- rl: entity.risk_level ?? "normal",
1087
- is_test: entity.is_test ?? false,
1088
- });
1089
- // File index
1090
- await graphStore.db.run("?[file_path, entity_key] <- [[$fp, $key]] :put file_index { file_path, entity_key }", { fp: entity.file_path, key: entity.key });
1091
- }
1092
- catch (err) {
1093
- process.stderr.write(`[unerr] ⚠ Entity insert failed for ${entity.key}: ${formatUnknownError(err)}\n`);
1094
- }
1095
- }
1096
- async function insertEdge(graphStore, edge) {
1097
- try {
1098
- await graphStore.db.run(`?[from_key, to_key, type, sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode] <- [[$from, $to, $type, -1, "", "", false, "", 0, false, false, "", ""]]
1099
- :put edges { from_key, to_key, type => sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode }`, {
1100
- from: edge.from_key,
1101
- to: edge.to_key,
1102
- type: edge.type,
1103
- });
1104
- }
1105
- catch (err) {
1106
- process.stderr.write(`[unerr] ⚠ Edge insert failed (${edge.from_key} → ${edge.to_key}): ${err instanceof Error ? err.message : String(err)}\n`);
1107
- }
1108
- }
1109
- // ── Phase 6.5: L1 Edge Materialization ──────────────────────────
1110
- /**
1111
- * Materialize L1 aggregate edges from L0 entity data.
1112
- * Creates weighted file→file and class→class edges in CozoDB.
1113
- */
1114
- async function materializeL1Edges(graphStore) {
1115
- const db = graphStore.db;
1116
- // 4B: Weighted file-to-file edges — aggregate L0 edges by file pair and type
1117
- try {
1118
- const fileEdgeResult = await db.run(`
1119
- ?[from_file, to_file, edge_type, count(from_key)] :=
1120
- *edges{from_key, to_key, type: edge_type},
1121
- edge_type != "contains",
1122
- *entities{key: from_key, file_path: from_file},
1123
- *entities{key: to_key, file_path: to_file},
1124
- from_file != to_file
1125
- `);
1126
- if (fileEdgeResult.rows?.length) {
1127
- for (const row of fileEdgeResult.rows) {
1128
- await db.run(`?[from_file, to_file, edge_type, weight, updated_at] <- [[$ff, $tf, $et, $w, $ts]]
1129
- :put file_edges { from_file, to_file, edge_type => weight, updated_at }`, {
1130
- ff: row[0],
1131
- tf: row[1],
1132
- et: row[2],
1133
- w: row[3],
1134
- ts: Date.now(),
1135
- });
1136
- }
1137
- log.info(`L1 file edges: ${fileEdgeResult.rows.length} materialized`);
1138
- }
1139
- }
1140
- catch (err) {
1141
- log.info(`L1 file edge materialization failed: ${err instanceof Error ? err.message : err}`);
1142
- }
1143
- // 4C: Weighted class-to-class edges — aggregate L0 method edges by owning class
1144
- try {
1145
- const classEdgeResult = await db.run(`
1146
- ?[from_class, to_class, edge_type, count(from_key)] :=
1147
- *edges{from_key, to_key, type: edge_type},
1148
- edge_type != "contains",
1149
- *edges{from_key: fc, to_key: from_key, type: "contains"},
1150
- *entities{key: fc, kind: "class"},
1151
- *edges{from_key: tc, to_key: to_key, type: "contains"},
1152
- *entities{key: tc, kind: "class"},
1153
- from_class = fc, to_class = tc,
1154
- from_class != to_class
1155
- `);
1156
- if (classEdgeResult.rows?.length) {
1157
- for (const row of classEdgeResult.rows) {
1158
- await db.run(`?[from_class, to_class, edge_type, weight, updated_at] <- [[$fc, $tc, $et, $w, $ts]]
1159
- :put class_edges { from_class, to_class, edge_type => weight, updated_at }`, {
1160
- fc: row[0],
1161
- tc: row[1],
1162
- et: row[2],
1163
- w: row[3],
1164
- ts: Date.now(),
1165
- });
1166
- }
1167
- log.info(`L1 class edges: ${classEdgeResult.rows.length} materialized`);
1168
- }
1169
- }
1170
- catch (err) {
1171
- log.info(`L1 class edge materialization failed: ${err instanceof Error ? err.message : err}`);
1172
- }
1173
- }
1174
- // ── Phase 7: Community Detection ─────────────────────────────────
1175
- /** Run cascaded multi-level community detection. Returns macro-community count. */
1176
- export async function runCommunityDetection(graphStore) {
1177
- const db = graphStore.db;
1178
- // Extract entities (key, kind, file_path)
1179
- let entityResult;
1180
- try {
1181
- entityResult = await db.run("?[key, kind, file_path] := *entities{key, kind, file_path}");
1182
- }
1183
- catch (err) {
1184
- process.stderr.write(`[unerr] ⚠ Entity query failed during community detection: ${err instanceof Error ? err.message : String(err)}\n`);
1185
- return 0;
1186
- }
1187
- if (!entityResult?.rows?.length)
1188
- return 0;
1189
- const entities = entityResult.rows.map((row) => ({
1190
- key: row[0],
1191
- kind: row[1],
1192
- file_path: row[2],
1193
- }));
1194
- // Extract entity edges
1195
- let edgeResult;
1196
- try {
1197
- edgeResult = await db.run("?[from_key, to_key, type] := *edges{from_key, to_key, type}");
1198
- }
1199
- catch (err) {
1200
- process.stderr.write(`[unerr] ⚠ Edge query failed during community detection: ${err instanceof Error ? err.message : String(err)}\n`);
1201
- return 0;
1202
- }
1203
- const entityEdges = (edgeResult?.rows ?? []).map((row) => ({
1204
- from_key: row[0],
1205
- to_key: row[1],
1206
- type: row[2],
1207
- }));
1208
- // Extract materialized file edges
1209
- let fileEdgeResult;
1210
- try {
1211
- fileEdgeResult = await db.run("?[from_file, to_file, edge_type, weight] := *file_edges{from_file, to_file, edge_type, weight}");
1212
- }
1213
- catch {
1214
- fileEdgeResult = { rows: [] };
1215
- }
1216
- const fileEdges = (fileEdgeResult?.rows ?? []).map((row) => ({
1217
- from_file: row[0],
1218
- to_file: row[1],
1219
- edge_type: row[2],
1220
- weight: row[3],
1221
- }));
1222
- // Run cascaded community detection
1223
- const result = detectCascadedCommunities(fileEdges, entities, entityEdges);
1224
- // Write entity community assignments (hierarchical IDs)
1225
- for (const [key, communityId] of result.entityAssignments) {
1226
- try {
1227
- await db.run("?[key, community] <- [[$key, $cid]] :update entities { key => community }", { key, cid: communityId });
1228
- }
1229
- catch (err) {
1230
- process.stderr.write(`[unerr] ⚠ Community assignment update failed: ${err instanceof Error ? err.message : String(err)}\n`);
1231
- }
1232
- }
1233
- // Write macro-community metadata (repurposed `communities` relation for file-level)
1234
- for (const c of result.macroCommunities) {
1235
- try {
1236
- await db.run("?[id, label, size, cohesion] <- [[$id, $label, $size, $cohesion]] :put communities { id => label, size, cohesion }", { id: c.id, label: c.label, size: c.size, cohesion: c.cohesion });
1237
- }
1238
- catch (err) {
1239
- process.stderr.write(`[unerr] ⚠ Community metadata write failed: ${err instanceof Error ? err.message : String(err)}\n`);
1240
- }
1241
- }
1242
- // Write file community assignments
1243
- for (const fc of result.fileCommunities) {
1244
- try {
1245
- await db.run(`?[file_path, community, label, cohesion, updated_at] <- [[$fp, $cid, $label, $cohesion, $ts]]
1246
- :put file_communities { file_path => community, label, cohesion, updated_at }`, {
1247
- fp: fc.file_path,
1248
- cid: fc.community,
1249
- label: fc.label,
1250
- cohesion: fc.cohesion,
1251
- ts: Date.now(),
1252
- });
1253
- }
1254
- catch (err) {
1255
- process.stderr.write(`[unerr] ⚠ File community write failed: ${err instanceof Error ? err.message : String(err)}\n`);
1256
- }
1257
- }
1258
- log.info(`Community detection: ${result.macroCommunities.length} macro-communities, ${result.entityAssignments.size} entity assignments`);
1259
- return result.macroCommunities.length;
1260
- }
1261
- /**
1262
- * Phase 8: Convention detection and rule generation (Sprint L6).
1263
- * Analyzes indexed entities to detect naming/structure/import conventions,
1264
- * generates evaluable rules, and loads both into CozoDB.
1265
- */
1266
- export async function runConventionDetection(graphStore, repoId) {
1267
- try {
1268
- const detection = await detectLocalConventions(graphStore.db);
1269
- await graphStore.loadPatterns(detection.patterns);
1270
- const generation = generateLocalRules(detection.conventions, repoId);
1271
- await graphStore.loadRules(generation.rules);
1272
- log.info(`Conventions: ${detection.patterns.length} patterns detected, ${generation.rules.length} rules generated`);
1273
- if (detection.stats.naming > 0) {
1274
- log.info(` Naming: ${detection.stats.naming}`);
1275
- }
1276
- if (detection.stats.structure > 0) {
1277
- log.info(` Structure: ${detection.stats.structure}`);
1278
- }
1279
- if (detection.stats.importDirection > 0) {
1280
- log.info(` Import direction: ${detection.stats.importDirection}`);
1281
- }
1282
- return {
1283
- patternCount: detection.patterns.length,
1284
- ruleCount: generation.rules.length,
1285
- };
1286
- }
1287
- catch (err) {
1288
- log.info(`Convention detection skipped: ${err instanceof Error ? err.message : String(err)}`);
1289
- return { patternCount: 0, ruleCount: 0 };
1290
- }
1291
- }
1292
- // ── Helpers ──────────────────────────────────────────────────────
1293
- /** Remove all entities and edges for a given file path. */
1294
- async function removeFileEntities(graphStore, relPath) {
1295
- const db = graphStore.db;
1296
- // Get entity keys for this file
1297
- const result = await db.run("?[entity_key] := *file_index[file_path, entity_key], file_path = $fp", { fp: relPath });
1298
- const keys = result.rows.map((r) => r[0]);
1299
- // Remove entities
1300
- for (const key of keys) {
1301
- try {
1302
- await db.run("?[key] <- [[$key]] :rm entities { key }", { key });
1303
- }
1304
- catch {
1305
- /* entity may not exist */
1306
- }
1307
- }
1308
- // Remove file index entries
1309
- try {
1310
- await db.run("?[file_path, entity_key] := *file_index[file_path, entity_key], file_path = $fp :rm file_index { file_path, entity_key }", { fp: relPath });
1311
- }
1312
- catch {
1313
- /* may not exist */
1314
- }
1315
- // Remove edges involving these entities
1316
- for (const key of keys) {
1317
- try {
1318
- await db.run("?[from_key, to_key, type] := *edges[from_key, to_key, type, _, _, _, _, _, _, _, _, _, _], from_key = $key :rm edges { from_key, to_key, type }", { key });
1319
- }
1320
- catch {
1321
- /* ignore */
1322
- }
1323
- try {
1324
- await db.run("?[from_key, to_key, type] := *edges[from_key, to_key, type, _, _, _, _, _, _, _, _, _, _], to_key = $key :rm edges { from_key, to_key, type }", { key });
1325
- }
1326
- catch {
1327
- /* ignore */
1328
- }
1329
- }
1330
- // Remove search tokens
1331
- for (const key of keys) {
1332
- try {
1333
- await db.run("?[token, entity_key] := *search_tokens[token, entity_key], entity_key = $key :rm search_tokens { token, entity_key }", { key });
1334
- }
1335
- catch {
1336
- /* ignore */
1337
- }
1338
- }
1339
- }
1340
- /**
1341
- * Remove entities (and their edges, file_index, search_tokens) that are NOT in the
1342
- * current index run. This prunes stale data from deleted files/entities without
1343
- * clearing the graph upfront — so queries remain valid during the reindex window.
1344
- */
1345
- async function removeOrphanedEntities(graphStore, liveKeys) {
1346
- const db = graphStore.db;
1347
- // Get all existing entity keys from the graph
1348
- let existingKeys;
1349
- try {
1350
- const result = await db.run("?[key] := *entities{key}");
1351
- existingKeys = result.rows.map((r) => r[0]);
1352
- }
1353
- catch {
1354
- return; // entities relation may not exist on first boot
1355
- }
1356
- const orphanKeys = existingKeys.filter((k) => !liveKeys.has(k));
1357
- if (orphanKeys.length === 0)
1358
- return;
1359
- log.info(`Removing ${orphanKeys.length} orphaned entities from graph`);
1360
- for (const key of orphanKeys) {
1361
- try {
1362
- // Remove edges referencing this entity
1363
- await db.run("?[from_key, to_key, type] := *edges{from_key, to_key, type}, from_key = $key :rm edges {from_key, to_key, type}", { key });
1364
- await db.run("?[from_key, to_key, type] := *edges{from_key, to_key, type}, to_key = $key :rm edges {from_key, to_key, type}", { key });
1365
- // Remove search tokens
1366
- await db.run("?[token, entity_key] := *search_tokens[token, entity_key], entity_key = $key :rm search_tokens {token, entity_key}", { key });
1367
- // Remove file index entries
1368
- await db.run("?[file_path, entity_key] := *file_index[file_path, entity_key], entity_key = $key :rm file_index {file_path, entity_key}", { key });
1369
- // Remove the entity itself
1370
- await db.run("?[key] <- [[$key]] :rm entities {key}", { key });
1371
- }
1372
- catch {
1373
- // Ignore per-entity removal failures
1374
- }
1375
- }
1376
- }
1377
- /** Resolve a local entity name to its key within the same file. */
1378
- function resolveEntityName(name, filePath, repoId, entities) {
1379
- if (name === "__file__")
1380
- return null;
1381
- const match = entities.find((e) => e.name === name);
1382
- if (match) {
1383
- return entityKey(repoId, filePath, match.kind, match.name, match.signature);
1384
- }
1385
- return null;
1386
- }
1387
- /** Resolve an entity name by searching the global CozoDB graph. */
1388
- async function resolveEntityNameGlobal(name, graphStore) {
1389
- try {
1390
- const result = await graphStore.db.run("?[key] := *entities{key, name}, name = $name :limit 1", { name });
1391
- if (result.rows.length > 0) {
1392
- return result.rows[0]?.[0];
1393
- }
1394
- }
1395
- catch {
1396
- /* ignore */
1397
- }
1398
- return null;
1399
- }
1400
- // ── Document File Discovery & Indexing ──────────────────────────
1401
- /** Format-specific regex patterns for extracting searchable tokens from document files. */
1402
- const DOC_TOKEN_PATTERNS = {
1403
- ".md": /^#{1,6}\s+(.+)$/gm,
1404
- ".mdx": /^#{1,6}\s+(.+)$/gm,
1405
- ".tf": /^(?:resource|module|variable|data|output)\s+"([^"]+)"(?:\s+"([^"]+)")?/gm,
1406
- ".hcl": /^(?:resource|module|variable|data|output)\s+"([^"]+)"(?:\s+"([^"]+)")?/gm,
1407
- ".sql": /CREATE\s+(?:TABLE|VIEW|FUNCTION|PROCEDURE|INDEX)\s+(?:IF\s+NOT\s+EXISTS\s+)?["'`]?(\w+)/gim,
1408
- ".graphql": /^(?:type|query|mutation|subscription|input|enum|interface)\s+(\w+)/gm,
1409
- ".gql": /^(?:type|query|mutation|subscription|input|enum|interface)\s+(\w+)/gm,
1410
- ".sh": /^(?:function\s+)?(\w+)\s*\(\)/gm,
1411
- ".bash": /^(?:function\s+)?(\w+)\s*\(\)/gm,
1412
- ".zsh": /^(?:function\s+)?(\w+)\s*\(\)/gm,
1413
- ".proto": /^(?:message|service|enum|rpc)\s+(\w+)/gm,
1414
- ".prisma": /^(?:model|enum|type|datasource|generator)\s+(\w+)/gm,
1415
- ".smithy": /^(?:service|resource|operation|structure|union|enum)\s+(\w+)/gm,
1416
- ".cmake": /^(?:project|add_library|add_executable|find_package)\s*\(\s*(\w+)/gm,
1417
- ".bazel": /^(?:cc_library|cc_binary|java_library|py_library|go_library)\s*\(\s*name\s*=\s*"([^"]+)"/gm,
1418
- ".bzl": /^def\s+(\w+)\s*\(/gm,
1419
- ".tex": /\\(?:section|subsection|chapter|title)\{([^}]+)\}/gm,
1420
- ".latex": /\\(?:section|subsection|chapter|title)\{([^}]+)\}/gm,
1421
- ".html": /<(?:h[1-6]|title)[^>]*>([^<]+)</gim,
1422
- ".vue": /<(?:h[1-6]|title)[^>]*>([^<]+)</gim,
1423
- ".svelte": /<(?:h[1-6]|title)[^>]*>([^<]+)</gim,
1424
- ".thrift": /^(?:service|struct|enum|exception|typedef)\s+(\w+)/gm,
1425
- ".avsc": /"name"\s*:\s*"([^"]+)"/gm,
1426
- };
1427
- /**
1428
- * Walk directories for document files. Allows CI/CD dirs (.github, .circleci)
1429
- * through the dot-directory filter. Skips files already covered by INDEXABLE_EXTENSIONS.
1430
- */
1431
- function walkDirForDocs(dir, files, projectRoot) {
1432
- let entries;
1433
- try {
1434
- entries = readdirSync(dir);
1435
- }
1436
- catch {
1437
- return;
1438
- }
1439
- for (const entry of entries) {
1440
- const absPath = join(dir, entry);
1441
- let stats;
1442
- try {
1443
- stats = statSync(absPath);
1444
- }
1445
- catch {
1446
- continue;
1447
- }
1448
- if (stats.isDirectory()) {
1449
- if (EXCLUDED_DIRS.has(entry))
1450
- continue;
1451
- // Skip dot-directories except whitelisted CI/CD dirs
1452
- if (entry.startsWith(".") && !CI_CD_DIRS.has(entry))
1453
- continue;
1454
- // Path-aware exclusion (e.g. `.claude/worktrees/`) — even when a parent
1455
- // dot-dir is whitelisted, sandboxed worktree paths must stay out.
1456
- if (isExcludedPath(relative(projectRoot, absPath)))
1457
- continue;
1458
- walkDirForDocs(absPath, files, projectRoot);
1459
- continue;
1460
- }
1461
- if (!stats.isFile())
1462
- continue;
1463
- if (stats.size > MAX_FILE_SIZE)
1464
- continue;
1465
- const ext = extname(entry).toLowerCase();
1466
- const name = basename(entry);
1467
- // Skip files already handled by code indexing
1468
- if (INDEXABLE_EXTENSIONS.has(ext))
1469
- continue;
1470
- // Match by extension or by well-known filename
1471
- if (DOCUMENT_EXTENSIONS.has(ext) || DOCUMENT_NAMES.has(name)) {
1472
- files.push(absPath);
1473
- }
1474
- }
1475
- }
1476
- /** Discover all document/config files in the project. */
1477
- function discoverDocumentFiles(projectRoot) {
1478
- const files = [];
1479
- walkDirForDocs(projectRoot, files, projectRoot);
1480
- return files;
1481
- }
1482
- /**
1483
- * Extract searchable tokens from a document file.
1484
- * Tokenizes filename, directory path segments, and format-specific content (capped at 8KB).
1485
- */
1486
- function extractDocTokens(absPath, relPath) {
1487
- const tokens = new Set(tokenize(basename(relPath)));
1488
- // Add directory path tokens (e.g. "docs", "architecture", "config")
1489
- for (const segment of relPath.split("/").slice(0, -1)) {
1490
- if (segment && !EXCLUDED_DIRS.has(segment)) {
1491
- for (const t of tokenize(segment))
1492
- tokens.add(t);
1493
- }
1494
- }
1495
- const ext = extname(absPath).toLowerCase();
1496
- try {
1497
- const content = readFileSync(absPath, "utf-8").slice(0, DOC_READ_LIMIT);
1498
- const pattern = DOC_TOKEN_PATTERNS[ext];
1499
- if (pattern) {
1500
- pattern.lastIndex = 0;
1501
- let match;
1502
- match = pattern.exec(content);
1503
- while (match !== null) {
1504
- if (match[1])
1505
- for (const t of tokenize(match[1]))
1506
- tokens.add(t);
1507
- if (match[2])
1508
- for (const t of tokenize(match[2]))
1509
- tokens.add(t);
1510
- match = pattern.exec(content);
1511
- }
1512
- }
1513
- // For YAML: extract top-level keys (non-indented key: lines)
1514
- if (ext === ".yaml" || ext === ".yml") {
1515
- const yamlKeyRegex = /^([a-zA-Z_][\w-]*)\s*:/gm;
1516
- let match;
1517
- match = yamlKeyRegex.exec(content);
1518
- while (match !== null) {
1519
- if (match[1])
1520
- for (const t of tokenize(match[1]))
1521
- tokens.add(t);
1522
- match = yamlKeyRegex.exec(content);
1523
- }
1524
- }
1525
- }
1526
- catch {
1527
- /* unreadable — filename/path tokens only */
1528
- }
1529
- return [...tokens];
1530
- }
1531
- /**
1532
- * Index document/config files for search discoverability.
1533
- * Creates lightweight entities (kind: "document") with search tokens.
1534
- * Returns the list of doc entity keys for orphan cleanup.
1535
- */
1536
- async function indexDocumentFiles(projectRoot, graphStore) {
1537
- const docFiles = discoverDocumentFiles(projectRoot);
1538
- if (docFiles.length === 0)
1539
- return [];
1540
- const docKeys = [];
1541
- for (const absPath of docFiles) {
1542
- const relPath = relative(projectRoot, absPath);
1543
- const name = basename(relPath);
1544
- const key = `doc:${relPath}`;
1545
- docKeys.push(key);
1546
- // Insert entity (kind: "document")
1547
- try {
1548
- await graphStore.db.run('?[key, kind, name, file_path, fan_in, fan_out, risk_level] <- [[$key, "document", $name, $fp, 0, 0, "low"]] :put entities { key => kind, name, file_path, fan_in, fan_out, risk_level }', { key, name, fp: relPath });
1549
- }
1550
- catch {
1551
- continue;
1552
- }
1553
- // Extract and insert search tokens
1554
- const docTokens = extractDocTokens(absPath, relPath);
1555
- for (const token of docTokens) {
1556
- try {
1557
- await graphStore.db.run("?[token, entity_key] <- [[$token, $key]] :put search_tokens { token, entity_key }", { token, key });
1558
- }
1559
- catch {
1560
- /* duplicate — safe */
1561
- }
1562
- }
1563
- // file_index entry
1564
- try {
1565
- await graphStore.db.run("?[file_path, entity_key] <- [[$fp, $key]] :put file_index { file_path, entity_key }", { fp: relPath, key });
1566
- }
1567
- catch {
1568
- /* safe */
1569
- }
1570
- }
1571
- if (docKeys.length > 0) {
1572
- log.info(`Documents: ${docKeys.length} doc/config files indexed for search`);
1573
- }
1574
- return docKeys;
1575
- }