@unerr-ai/unerr 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -45
- package/dist/cli.js +37443 -36022
- package/package.json +2 -1
- package/dist/behaviors/agent-llm-bridge.js +0 -166
- package/dist/behaviors/architecture-guard.js +0 -256
- package/dist/behaviors/auto-doc.js +0 -247
- package/dist/behaviors/cascade-guard.js +0 -289
- package/dist/behaviors/change-narrative.js +0 -270
- package/dist/behaviors/convention-drift.js +0 -290
- package/dist/behaviors/framework.js +0 -235
- package/dist/behaviors/guard-formatter.js +0 -44
- package/dist/behaviors/incomplete-work.js +0 -270
- package/dist/behaviors/loop-breaker.js +0 -300
- package/dist/behaviors/session-continuity.js +0 -208
- package/dist/commands/branches.js +0 -97
- package/dist/commands/check-commit.js +0 -225
- package/dist/commands/compress-output.js +0 -64
- package/dist/commands/config-verify.js +0 -243
- package/dist/commands/daemon.js +0 -905
- package/dist/commands/dashboard.js +0 -52
- package/dist/commands/debug.js +0 -200
- package/dist/commands/enrich.js +0 -184
- package/dist/commands/exec.js +0 -233
- package/dist/commands/gain.js +0 -156
- package/dist/commands/hook.js +0 -88
- package/dist/commands/index.js +0 -88
- package/dist/commands/init.js +0 -74
- package/dist/commands/install.js +0 -505
- package/dist/commands/learn.js +0 -116
- package/dist/commands/manifest.js +0 -193
- package/dist/commands/rewind.js +0 -103
- package/dist/commands/serve.js +0 -19
- package/dist/commands/setup-wizard.js +0 -414
- package/dist/commands/skills.js +0 -64
- package/dist/commands/stats.js +0 -20
- package/dist/commands/status.js +0 -654
- package/dist/commands/timeline.js +0 -139
- package/dist/commands/uninstall.js +0 -230
- package/dist/components/App.js +0 -109
- package/dist/components/Banner.js +0 -12
- package/dist/components/ConfirmPrompt.js +0 -25
- package/dist/components/DriftSummary.js +0 -23
- package/dist/components/GradeBadge.js +0 -15
- package/dist/components/HealthCard.js +0 -18
- package/dist/components/InkSpinner.js +0 -22
- package/dist/components/InputBox.js +0 -17
- package/dist/components/KeyValue.js +0 -13
- package/dist/components/MessageList.js +0 -14
- package/dist/components/ProgressBar.js +0 -26
- package/dist/components/Section.js +0 -16
- package/dist/components/SessionSummaryCard.js +0 -73
- package/dist/components/StartupDisplay.js +0 -24
- package/dist/components/StatusDashboard.js +0 -57
- package/dist/components/StatusLine.js +0 -8
- package/dist/components/StepLine.js +0 -22
- package/dist/components/Theme.js +0 -20
- package/dist/components/ToolProgress.js +0 -8
- package/dist/components/ViolationList.js +0 -21
- package/dist/components/render.js +0 -13
- package/dist/config/agent-registry.js +0 -237
- package/dist/config/claude-settings-hooks.js +0 -304
- package/dist/config/hook-installer.js +0 -65
- package/dist/config/instruction-writer.js +0 -388
- package/dist/config/mcp-config-writer.js +0 -266
- package/dist/config/settings.js +0 -174
- package/dist/config/tool-detector.js +0 -42
- package/dist/config/value-surfacing.js +0 -119
- package/dist/core/context-assembly.js +0 -108
- package/dist/core/conversation.js +0 -33
- package/dist/core/local-chat-provider.js +0 -475
- package/dist/core/provider-factory.js +0 -55
- package/dist/core/providers.js +0 -90
- package/dist/core/query-engine.js +0 -174
- package/dist/daemon/api.js +0 -312
- package/dist/daemon/autostart.js +0 -119
- package/dist/daemon/bootstrap.js +0 -39
- package/dist/daemon/client.js +0 -164
- package/dist/daemon/detect-ci.js +0 -81
- package/dist/daemon/platform-linux.js +0 -146
- package/dist/daemon/platform-macos.js +0 -134
- package/dist/daemon/platform-windows.js +0 -116
- package/dist/daemon/process-manager.js +0 -299
- package/dist/daemon/protocol.js +0 -23
- package/dist/daemon/registry.js +0 -270
- package/dist/daemon/settings-schema.js +0 -72
- package/dist/daemon/system-health.js +0 -134
- package/dist/daemon/version-checker.js +0 -262
- package/dist/daemon/warm-start.js +0 -223
- package/dist/entrypoints/cli.js +0 -1043
- package/dist/entrypoints/daemon.js +0 -380
- package/dist/entrypoints/repl.js +0 -147
- package/dist/hooks/adapters/claude-code.js +0 -90
- package/dist/hooks/adapters/cline.js +0 -100
- package/dist/hooks/adapters/cursor.js +0 -98
- package/dist/hooks/hook-dedup.js +0 -79
- package/dist/hooks/hook-runner.js +0 -113
- package/dist/hooks/navigation-hooks.js +0 -175
- package/dist/hooks/prompt-hooks.js +0 -63
- package/dist/hooks/shell-hooks.js +0 -47
- package/dist/ignore.js +0 -111
- package/dist/intelligence/approach-suggester.js +0 -61
- package/dist/intelligence/ast-extractor.js +0 -2615
- package/dist/intelligence/ast-worker.js +0 -34
- package/dist/intelligence/background-indexer.js +0 -121
- package/dist/intelligence/blast-radius.js +0 -200
- package/dist/intelligence/community-detection.js +0 -691
- package/dist/intelligence/community-detector.js +0 -184
- package/dist/intelligence/computation-scheduler.js +0 -75
- package/dist/intelligence/confidence-propagation.js +0 -47
- package/dist/intelligence/convention-detector.js +0 -242
- package/dist/intelligence/convention-learner.js +0 -205
- package/dist/intelligence/convention-matcher.js +0 -205
- package/dist/intelligence/cozo-schema.js +0 -376
- package/dist/intelligence/decision-point-detector.js +0 -90
- package/dist/intelligence/deep-dive-tools.js +0 -586
- package/dist/intelligence/durability-scorer.js +0 -84
- package/dist/intelligence/exploration-cost.js +0 -204
- package/dist/intelligence/exploration-pattern-tracker.js +0 -61
- package/dist/intelligence/fact-generator.js +0 -322
- package/dist/intelligence/facts-schema.js +0 -90
- package/dist/intelligence/file-intelligence.js +0 -59
- package/dist/intelligence/graph-holder.js +0 -220
- package/dist/intelligence/graph-temporal-joiner.js +0 -238
- package/dist/intelligence/health-grade.js +0 -423
- package/dist/intelligence/health-grader.js +0 -200
- package/dist/intelligence/health-map-data.js +0 -259
- package/dist/intelligence/import-symbols.js +0 -136
- package/dist/intelligence/incremental-indexer.js +0 -658
- package/dist/intelligence/indexer/centrality.js +0 -62
- package/dist/intelligence/indexer/cfg-context.js +0 -95
- package/dist/intelligence/indexer/confidence.js +0 -34
- package/dist/intelligence/indexer/cross-file-resolver.js +0 -104
- package/dist/intelligence/indexer/edge-repair.js +0 -89
- package/dist/intelligence/indexer/entity-key.js +0 -17
- package/dist/intelligence/indexer/export-map.js +0 -132
- package/dist/intelligence/indexer/git-cochange.js +0 -128
- package/dist/intelligence/indexer/graph-patch.js +0 -147
- package/dist/intelligence/indexer/incremental.js +0 -78
- package/dist/intelligence/indexer/ingest.js +0 -160
- package/dist/intelligence/indexer/language-detect.js +0 -226
- package/dist/intelligence/indexer/metadata.js +0 -63
- package/dist/intelligence/indexer/mutation-tracker.js +0 -79
- package/dist/intelligence/indexer/orchestrator.js +0 -155
- package/dist/intelligence/indexer/plugin-interface.js +0 -31
- package/dist/intelligence/indexer/plugins/csharp.js +0 -440
- package/dist/intelligence/indexer/plugins/go.js +0 -335
- package/dist/intelligence/indexer/plugins/java.js +0 -370
- package/dist/intelligence/indexer/plugins/python.js +0 -358
- package/dist/intelligence/indexer/plugins/regex-fallback.js +0 -82
- package/dist/intelligence/indexer/plugins/ruby.js +0 -290
- package/dist/intelligence/indexer/plugins/rust.js +0 -484
- package/dist/intelligence/indexer/plugins/tier2-generic.js +0 -310
- package/dist/intelligence/indexer/plugins/typescript.js +0 -456
- package/dist/intelligence/indexer/resource-monitor.js +0 -93
- package/dist/intelligence/indexer/scip/decoder.js +0 -253
- package/dist/intelligence/indexer/scip/detector.js +0 -232
- package/dist/intelligence/indexer/scip/downloader.js +0 -427
- package/dist/intelligence/indexer/scip/fallback.js +0 -34
- package/dist/intelligence/indexer/scip/merger.js +0 -109
- package/dist/intelligence/indexer/scip/orchestrator.js +0 -433
- package/dist/intelligence/indexer/scip/runner.js +0 -98
- package/dist/intelligence/indexer/snapshot.js +0 -66
- package/dist/intelligence/indexer/test-detector.js +0 -196
- package/dist/intelligence/indexer/watch-integration.js +0 -61
- package/dist/intelligence/indexer/worker.js +0 -85
- package/dist/intelligence/local-convention-detector.js +0 -437
- package/dist/intelligence/local-embeddings.js +0 -190
- package/dist/intelligence/local-graph.js +0 -1946
- package/dist/intelligence/local-indexer.js +0 -1575
- package/dist/intelligence/local-llm.js +0 -163
- package/dist/intelligence/local-rule-generator.js +0 -154
- package/dist/intelligence/local-snapshot.js +0 -213
- package/dist/intelligence/negative-knowledge.js +0 -103
- package/dist/intelligence/persistent-db.js +0 -85
- package/dist/intelligence/query-router.js +0 -2556
- package/dist/intelligence/risk-classifier.js +0 -116
- package/dist/intelligence/rule-evaluator.js +0 -380
- package/dist/intelligence/rule-generator.js +0 -49
- package/dist/intelligence/search-index.js +0 -173
- package/dist/intelligence/semantic/docstring-extractor.js +0 -67
- package/dist/intelligence/semantic/embedding-store.js +0 -52
- package/dist/intelligence/semantic/enrichment-orchestrator.js +0 -48
- package/dist/intelligence/semantic/git-message-miner.js +0 -114
- package/dist/intelligence/semantic/identifier-tokenizer.js +0 -51
- package/dist/intelligence/semantic/node2vec-embeddings.js +0 -71
- package/dist/intelligence/semantic/node2vec-walks.js +0 -103
- package/dist/intelligence/semantic/path-domain-inference.js +0 -112
- package/dist/intelligence/semantic/similarity-engine.js +0 -60
- package/dist/intelligence/semantic/tfidf-vectors.js +0 -88
- package/dist/intelligence/session-brief-builder.js +0 -159
- package/dist/intelligence/session-context.js +0 -221
- package/dist/intelligence/session-health-monitor.js +0 -211
- package/dist/intelligence/session-narrative.js +0 -197
- package/dist/intelligence/session-pattern-analyzer.js +0 -218
- package/dist/intelligence/signal-scorer.js +0 -390
- package/dist/intelligence/signal-show-store.js +0 -182
- package/dist/intelligence/smart-truncate.js +0 -158
- package/dist/intelligence/subgraph-cache.js +0 -88
- package/dist/intelligence/temporal-facts.js +0 -494
- package/dist/intelligence/token-estimator.js +0 -100
- package/dist/intelligence/tool-injector.js +0 -87
- package/dist/intelligence/tree-sitter-loader.js +0 -71
- package/dist/intelligence/worker-pool.js +0 -116
- package/dist/proxy/arg-validator.js +0 -79
- package/dist/proxy/auto-bootstrap.js +0 -167
- package/dist/proxy/bridge.js +0 -147
- package/dist/proxy/budget-enforcer.js +0 -70
- package/dist/proxy/compression-quality-monitor.js +0 -160
- package/dist/proxy/compression-stats.js +0 -51
- package/dist/proxy/context-rot-detector.js +0 -137
- package/dist/proxy/drift-detector.js +0 -139
- package/dist/proxy/efficiency-tracker.js +0 -79
- package/dist/proxy/fact-ranking.js +0 -154
- package/dist/proxy/format-encoder.js +0 -266
- package/dist/proxy/http-transport.js +0 -90
- package/dist/proxy/lifecycle-actor.js +0 -55
- package/dist/proxy/lifecycle-machine.js +0 -187
- package/dist/proxy/log-tailer.js +0 -265
- package/dist/proxy/model-pricing.js +0 -98
- package/dist/proxy/network-firewall.js +0 -141
- package/dist/proxy/nudge-state.js +0 -93
- package/dist/proxy/output-compressor.js +0 -185
- package/dist/proxy/pid-lock.js +0 -291
- package/dist/proxy/proxy-context.js +0 -11
- package/dist/proxy/proxy.js +0 -2633
- package/dist/proxy/response-enrichment.js +0 -32
- package/dist/proxy/response-envelope.js +0 -313
- package/dist/proxy/session-dedup.js +0 -82
- package/dist/proxy/session-legend.js +0 -30
- package/dist/proxy/session-persistence.js +0 -210
- package/dist/proxy/session-resume.js +0 -94
- package/dist/proxy/session-stats.js +0 -513
- package/dist/proxy/shell-classifier.js +0 -1346
- package/dist/proxy/shell-compression-log.js +0 -93
- package/dist/proxy/shell-compressor.js +0 -390
- package/dist/proxy/shell-graph-boost.js +0 -202
- package/dist/proxy/shell-monitor-map.js +0 -18
- package/dist/proxy/shell-stats.js +0 -54
- package/dist/proxy/shell-strategies/cloud.js +0 -215
- package/dist/proxy/shell-strategies/diff.js +0 -159
- package/dist/proxy/shell-strategies/error-diagnostic.js +0 -796
- package/dist/proxy/shell-strategies/filter-dsl.js +0 -358
- package/dist/proxy/shell-strategies/git-status.js +0 -177
- package/dist/proxy/shell-strategies/key-value.js +0 -193
- package/dist/proxy/shell-strategies/log-text.js +0 -154
- package/dist/proxy/shell-strategies/omni.js +0 -188
- package/dist/proxy/shell-strategies/progress.js +0 -55
- package/dist/proxy/shell-strategies/redact.js +0 -76
- package/dist/proxy/shell-strategies/structured.js +0 -241
- package/dist/proxy/shell-strategies/tabular.js +0 -243
- package/dist/proxy/shell-strategies/test-results-types.js +0 -13
- package/dist/proxy/shell-strategies/test-results.js +0 -784
- package/dist/proxy/shell-strategies/tree-paths.js +0 -144
- package/dist/proxy/shell-strategies/yaml.js +0 -182
- package/dist/proxy/shell-tee.js +0 -111
- package/dist/proxy/signal-dedup.js +0 -171
- package/dist/proxy/startup-renderer.js +0 -158
- package/dist/proxy/task-token-display.js +0 -38
- package/dist/proxy/token-counter.js +0 -61
- package/dist/proxy/tool-clusters.js +0 -273
- package/dist/proxy/tool-definitions.js +0 -525
- package/dist/proxy/transport-mux.js +0 -229
- package/dist/proxy/wire-cap.js +0 -268
- package/dist/rules/developer.mozilla.org.json +0 -9
- package/dist/rules/github.com.json +0 -21
- package/dist/schemas/api/skills.js +0 -19
- package/dist/schemas/common/errors.js +0 -7
- package/dist/schemas/common/headers.js +0 -5
- package/dist/schemas/entities/edge.js +0 -25
- package/dist/schemas/entities/entity.js +0 -22
- package/dist/schemas/entities/rule.js +0 -18
- package/dist/schemas/index.js +0 -14
- package/dist/server/event-bus.js +0 -59
- package/dist/server/http.js +0 -156
- package/dist/server/middleware.js +0 -70
- package/dist/server/routes/drift.js +0 -97
- package/dist/server/routes/intelligence.js +0 -1217
- package/dist/server/routes/reasoning-quality.js +0 -444
- package/dist/server/routes/session.js +0 -86
- package/dist/server/routes/stream.js +0 -120
- package/dist/server/routes/system.js +0 -73
- package/dist/server/routes/temporal.js +0 -170
- package/dist/server/routes/timeline.js +0 -232
- package/dist/server/routes/token-flow.js +0 -403
- package/dist/skills/effectiveness-tracker.js +0 -93
- package/dist/skills/local-pack.js +0 -380
- package/dist/skills/resolver.js +0 -495
- package/dist/state-detector.js +0 -83
- package/dist/timeline/intent-detector.js +0 -263
- package/dist/timeline/loop-miner.js +0 -140
- package/dist/timeline/open-threads.js +0 -49
- package/dist/timeline/signal-reinforcer.js +0 -62
- package/dist/timeline/timeline-bootstrap.js +0 -151
- package/dist/timeline/timeline-store.js +0 -618
- package/dist/tools/coding/bash.js +0 -49
- package/dist/tools/coding/file-edit.js +0 -72
- package/dist/tools/coding/file-outline.js +0 -227
- package/dist/tools/coding/file-read-protocol.js +0 -425
- package/dist/tools/coding/file-read.js +0 -35
- package/dist/tools/coding/file-write.js +0 -43
- package/dist/tools/coding/glob-tool.js +0 -109
- package/dist/tools/coding/grep.js +0 -162
- package/dist/tools/coding/index.js +0 -27
- package/dist/tools/intelligence/index.js +0 -269
- package/dist/tools/intelligence/record-fact.js +0 -48
- package/dist/tools/intelligence/timeline-markers.js +0 -130
- package/dist/tools/registry.js +0 -47
- package/dist/tools/types.js +0 -8
- package/dist/tracking/auto-snapshot-triggers.js +0 -246
- package/dist/tracking/branch-context.js +0 -115
- package/dist/tracking/branch-snapshot.js +0 -217
- package/dist/tracking/causal-bridge.js +0 -317
- package/dist/tracking/circuit-breaker.js +0 -147
- package/dist/tracking/commit-watcher.js +0 -114
- package/dist/tracking/context-ledger.js +0 -119
- package/dist/tracking/correction-detector.js +0 -324
- package/dist/tracking/drift-tracker.js +0 -874
- package/dist/tracking/durability-tracker.js +0 -94
- package/dist/tracking/entity-rewind.js +0 -200
- package/dist/tracking/file-hash-state.js +0 -114
- package/dist/tracking/git-attribution.js +0 -132
- package/dist/tracking/git-trailers.js +0 -171
- package/dist/tracking/intelligence-counter.js +0 -46
- package/dist/tracking/intent-correlator.js +0 -202
- package/dist/tracking/intent-encoder.js +0 -52
- package/dist/tracking/intent-token-tracker.js +0 -159
- package/dist/tracking/ledger-archiver.js +0 -94
- package/dist/tracking/ledger-chains.js +0 -245
- package/dist/tracking/metrics-store.js +0 -361
- package/dist/tracking/native-watcher.js +0 -131
- package/dist/tracking/offline-rewind.js +0 -295
- package/dist/tracking/pending-violations.js +0 -74
- package/dist/tracking/persistence-effectiveness.js +0 -167
- package/dist/tracking/prompt-durability.js +0 -202
- package/dist/tracking/quality-signals.js +0 -213
- package/dist/tracking/redactor.js +0 -73
- package/dist/tracking/rewind-engine.js +0 -161
- package/dist/tracking/session-history.js +0 -128
- package/dist/tracking/session-receipt.js +0 -88
- package/dist/tracking/session-summary-writer.js +0 -157
- package/dist/tracking/shadow-ledger.js +0 -321
- package/dist/tracking/stash-manager.js +0 -258
- package/dist/tracking/timeline-fork.js +0 -213
- package/dist/tracking/timeline.js +0 -69
- package/dist/tracking/token-flow.js +0 -276
- package/dist/tracking/turn-segmenter.js +0 -122
- package/dist/tracking/weekly-accumulator.js +0 -179
- package/dist/tracking/working-snapshots.js +0 -188
- package/dist/tracking/workspace-manifest.js +0 -176
- package/dist/transport/http.js +0 -102
- package/dist/utils/counterfactual.js +0 -65
- package/dist/utils/deep-link.js +0 -34
- package/dist/utils/detect.js +0 -193
- package/dist/utils/exec.js +0 -73
- package/dist/utils/file-logger.js +0 -87
- package/dist/utils/format-error.js +0 -29
- package/dist/utils/git.js +0 -181
- package/dist/utils/log.js +0 -57
- package/dist/utils/logger.js +0 -35
- package/dist/utils/mcp-content-json.js +0 -8
- package/dist/utils/session-logger.js +0 -154
- package/dist/utils/startup-log.js +0 -512
- package/dist/utils/ui.js +0 -56
|
@@ -1,1575 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Local Indexing Pipeline — Sprint L2.1
|
|
3
|
-
*
|
|
4
|
-
* Walks the project directory, extracts entities and edges using tree-sitter
|
|
5
|
-
* AST analysis (with regex fallback), populates CozoDB, triggers community
|
|
6
|
-
* detection, and builds the search index. Builds the graph locally
|
|
7
|
-
* for Local Mode operation.
|
|
8
|
-
*
|
|
9
|
-
* Pipeline phases:
|
|
10
|
-
* 1. Discover source files (respecting exclusion patterns)
|
|
11
|
-
* 2. Extract entities per file (tree-sitter AST)
|
|
12
|
-
* 3. Extract edges per file (imports, calls, extends, implements)
|
|
13
|
-
* 4. Cross-file edge resolution (match import refs to entity keys)
|
|
14
|
-
* 5. Compute derived fields (fan_in, fan_out, risk_level)
|
|
15
|
-
* 6. Populate CozoDB (entities, edges, file_index)
|
|
16
|
-
* 7. Community detection (Louvain via graphology)
|
|
17
|
-
* 8. Build search index
|
|
18
|
-
*
|
|
19
|
-
* All logging to stderr. Never touches stdout.
|
|
20
|
-
*/
|
|
21
|
-
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
22
|
-
import { basename, extname, join, relative } from "node:path";
|
|
23
|
-
import { formatUnknownError } from "../utils/format-error.js";
|
|
24
|
-
import { entityKey, extractEdgesAsync, extractEntitiesAsync, } from "./ast-extractor.js";
|
|
25
|
-
import { detectCascadedCommunities } from "./community-detection.js";
|
|
26
|
-
import { computeCoChangeEdges } from "./indexer/git-cochange.js";
|
|
27
|
-
import { enrichWithScip } from "./indexer/scip/orchestrator.js";
|
|
28
|
-
import { isTestFile } from "./indexer/test-detector.js";
|
|
29
|
-
import { detectLocalConventions } from "./local-convention-detector.js";
|
|
30
|
-
import { generateLocalRules } from "./local-rule-generator.js";
|
|
31
|
-
import { persistLocalSnapshot } from "./local-snapshot.js";
|
|
32
|
-
import { buildSearchIndex, tokenize } from "./search-index.js";
|
|
33
|
-
// ── Configuration ────────────────────────────────────────────────
|
|
34
|
-
/** File extensions to index (must match ast-extractor detectLanguage). */
|
|
35
|
-
const INDEXABLE_EXTENSIONS = new Set([
|
|
36
|
-
".ts",
|
|
37
|
-
".tsx",
|
|
38
|
-
".js",
|
|
39
|
-
".jsx",
|
|
40
|
-
".mjs",
|
|
41
|
-
".cjs",
|
|
42
|
-
".py",
|
|
43
|
-
".go",
|
|
44
|
-
".java",
|
|
45
|
-
".rs",
|
|
46
|
-
".c",
|
|
47
|
-
".h",
|
|
48
|
-
".cpp",
|
|
49
|
-
".cc",
|
|
50
|
-
".cxx",
|
|
51
|
-
".hpp",
|
|
52
|
-
]);
|
|
53
|
-
/** Directories to skip during walk. */
|
|
54
|
-
const EXCLUDED_DIRS = new Set([
|
|
55
|
-
"node_modules",
|
|
56
|
-
"dist",
|
|
57
|
-
"build",
|
|
58
|
-
"out",
|
|
59
|
-
".git",
|
|
60
|
-
".hg",
|
|
61
|
-
".svn",
|
|
62
|
-
"coverage",
|
|
63
|
-
"__pycache__",
|
|
64
|
-
".mypy_cache",
|
|
65
|
-
".pytest_cache",
|
|
66
|
-
"vendor",
|
|
67
|
-
"target",
|
|
68
|
-
".next",
|
|
69
|
-
".nuxt",
|
|
70
|
-
".output",
|
|
71
|
-
".unerr",
|
|
72
|
-
".cache",
|
|
73
|
-
".turbo",
|
|
74
|
-
".parcel-cache",
|
|
75
|
-
]);
|
|
76
|
-
/**
|
|
77
|
-
* Path-prefix exclusions (relative to projectRoot). Used for paths that should
|
|
78
|
-
* be excluded only when they match a specific location, not as a bare basename.
|
|
79
|
-
* Example: `.claude/worktrees/` (Claude Code agent sandboxes) — using bare
|
|
80
|
-
* `worktrees` would over-match any user-named folder. Path-prefix is precise.
|
|
81
|
-
*/
|
|
82
|
-
const EXCLUDED_PATH_PREFIXES = [
|
|
83
|
-
".claude/worktrees",
|
|
84
|
-
".cursor/worktrees",
|
|
85
|
-
".idea/worktrees",
|
|
86
|
-
];
|
|
87
|
-
function isExcludedPath(relPath) {
|
|
88
|
-
for (const prefix of EXCLUDED_PATH_PREFIXES) {
|
|
89
|
-
if (relPath === prefix || relPath.startsWith(`${prefix}/`))
|
|
90
|
-
return true;
|
|
91
|
-
}
|
|
92
|
-
return false;
|
|
93
|
-
}
|
|
94
|
-
/** Maximum file size to index (1MB). */
|
|
95
|
-
const MAX_FILE_SIZE = 1_048_576;
|
|
96
|
-
/** Document/config file extensions — discoverable via search but no code entity extraction. */
|
|
97
|
-
const DOCUMENT_EXTENSIONS = new Set([
|
|
98
|
-
".md",
|
|
99
|
-
".mdx",
|
|
100
|
-
".txt",
|
|
101
|
-
".rst",
|
|
102
|
-
".adoc",
|
|
103
|
-
".org",
|
|
104
|
-
".yaml",
|
|
105
|
-
".yml",
|
|
106
|
-
".toml",
|
|
107
|
-
".json",
|
|
108
|
-
".xml",
|
|
109
|
-
".tf",
|
|
110
|
-
".tfvars",
|
|
111
|
-
".hcl",
|
|
112
|
-
".proto",
|
|
113
|
-
".graphql",
|
|
114
|
-
".gql",
|
|
115
|
-
".sql",
|
|
116
|
-
".sh",
|
|
117
|
-
".bash",
|
|
118
|
-
".zsh",
|
|
119
|
-
".html",
|
|
120
|
-
".css",
|
|
121
|
-
".scss",
|
|
122
|
-
".sass",
|
|
123
|
-
".less",
|
|
124
|
-
".vue",
|
|
125
|
-
".svelte",
|
|
126
|
-
".astro",
|
|
127
|
-
".j2",
|
|
128
|
-
".jinja2",
|
|
129
|
-
".tmpl",
|
|
130
|
-
".hbs",
|
|
131
|
-
".ejs",
|
|
132
|
-
".pug",
|
|
133
|
-
".prisma",
|
|
134
|
-
".avsc",
|
|
135
|
-
".thrift",
|
|
136
|
-
".smithy",
|
|
137
|
-
".cmake",
|
|
138
|
-
".bazel",
|
|
139
|
-
".bzl",
|
|
140
|
-
".mk",
|
|
141
|
-
".csv",
|
|
142
|
-
".tsv",
|
|
143
|
-
".ini",
|
|
144
|
-
".cfg",
|
|
145
|
-
".conf",
|
|
146
|
-
".properties",
|
|
147
|
-
".env.example",
|
|
148
|
-
".editorconfig",
|
|
149
|
-
".dockerfile",
|
|
150
|
-
".tex",
|
|
151
|
-
".latex",
|
|
152
|
-
".eslintrc",
|
|
153
|
-
".prettierrc",
|
|
154
|
-
".stylelintrc",
|
|
155
|
-
".pylintrc",
|
|
156
|
-
".flake8",
|
|
157
|
-
]);
|
|
158
|
-
/** Well-known config/build files without standard extensions. */
|
|
159
|
-
const DOCUMENT_NAMES = new Set([
|
|
160
|
-
"Dockerfile",
|
|
161
|
-
"Makefile",
|
|
162
|
-
"Procfile",
|
|
163
|
-
"Vagrantfile",
|
|
164
|
-
"Jenkinsfile",
|
|
165
|
-
"Gemfile",
|
|
166
|
-
"Rakefile",
|
|
167
|
-
".gitignore",
|
|
168
|
-
".dockerignore",
|
|
169
|
-
".prettierrc",
|
|
170
|
-
".editorconfig",
|
|
171
|
-
".eslintrc",
|
|
172
|
-
".stylelintrc",
|
|
173
|
-
"biome.json",
|
|
174
|
-
"tslint.json",
|
|
175
|
-
".gitlab-ci.yml",
|
|
176
|
-
]);
|
|
177
|
-
/** CI/CD directories that should be walked despite starting with ".". */
|
|
178
|
-
const CI_CD_DIRS = new Set([".github", ".circleci"]);
|
|
179
|
-
/** Max bytes to read from a document file for token extraction. */
|
|
180
|
-
const DOC_READ_LIMIT = 8192;
|
|
181
|
-
/** stderr logger */
|
|
182
|
-
const log = {
|
|
183
|
-
info: (msg) => process.stderr.write(`[unerr] ${msg}\n`),
|
|
184
|
-
verbose: (msg, verbose) => {
|
|
185
|
-
if (verbose)
|
|
186
|
-
process.stderr.write(`[unerr] ${msg}\n`);
|
|
187
|
-
},
|
|
188
|
-
};
|
|
189
|
-
// ── Main Pipeline ────────────────────────────────────────────────
|
|
190
|
-
/**
|
|
191
|
-
* Index a local project: walk files, extract entities + edges, populate CozoDB.
|
|
192
|
-
*
|
|
193
|
-
* @param projectRoot - Absolute path to the project root (where .git lives)
|
|
194
|
-
* @param graphStore - CozoGraphStore instance (schema already initialized)
|
|
195
|
-
* @param repoId - Repository ID for entity key generation
|
|
196
|
-
* @param opts - Optional verbose/callback settings
|
|
197
|
-
*/
|
|
198
|
-
export async function indexLocalProject(projectRoot, graphStore, repoId, opts) {
|
|
199
|
-
const startTime = Date.now();
|
|
200
|
-
const progress = opts?.onProgress;
|
|
201
|
-
// Phase 0: Mark reindex start. We DON'T clear upfront — queries remain valid against
|
|
202
|
-
// stale-but-present data during the ~8s reindex window. After populate completes,
|
|
203
|
-
// Phase 6.1 removes orphaned entities not seen in this index run.
|
|
204
|
-
const indexedEntityKeys = new Set();
|
|
205
|
-
// Phase 1: Discover source files
|
|
206
|
-
progress?.({
|
|
207
|
-
processed: 0,
|
|
208
|
-
total: 0,
|
|
209
|
-
phase: "discovering",
|
|
210
|
-
currentFile: null,
|
|
211
|
-
});
|
|
212
|
-
const files = discoverSourceFiles(projectRoot);
|
|
213
|
-
log.info(`Indexing project... (${files.length} files found)`);
|
|
214
|
-
// Phase 2+3: Extract entities and edges per file
|
|
215
|
-
const allEntities = [];
|
|
216
|
-
const allRawEdges = [];
|
|
217
|
-
const fileEntityMap = new Map();
|
|
218
|
-
let filesProcessed = 0;
|
|
219
|
-
for (const absPath of files) {
|
|
220
|
-
const relPath = relative(projectRoot, absPath);
|
|
221
|
-
progress?.({
|
|
222
|
-
processed: filesProcessed,
|
|
223
|
-
total: files.length,
|
|
224
|
-
phase: "extracting",
|
|
225
|
-
currentFile: relPath,
|
|
226
|
-
});
|
|
227
|
-
let content;
|
|
228
|
-
try {
|
|
229
|
-
content = readFileSync(absPath, "utf-8");
|
|
230
|
-
}
|
|
231
|
-
catch {
|
|
232
|
-
filesProcessed++;
|
|
233
|
-
continue;
|
|
234
|
-
}
|
|
235
|
-
// Extract entities
|
|
236
|
-
const entities = await extractEntitiesAsync(content, relPath);
|
|
237
|
-
fileEntityMap.set(relPath, entities);
|
|
238
|
-
// Convert to CompactEntity with entity keys.
|
|
239
|
-
// is_test now comes exclusively from the AST extractor — TS/JS test-
|
|
240
|
-
// primitive callbacks (it/describe/test/...) are extracted as entities
|
|
241
|
-
// with is_test=true, Rust cfg(test) blocks set it via plugin. Top-level
|
|
242
|
-
// helpers (seedEntities, MockEntity, createTestDb) in test files stay
|
|
243
|
-
// is_test=false. The old fileIsTest fallback was lossy — it conflated
|
|
244
|
-
// "lives in a test file" with "is a test", polluting test-coverage
|
|
245
|
-
// results with fixtures/helpers.
|
|
246
|
-
for (const entity of entities) {
|
|
247
|
-
const key = entityKey(repoId, relPath, entity.kind, entity.name, entity.signature);
|
|
248
|
-
allEntities.push({
|
|
249
|
-
key,
|
|
250
|
-
kind: entity.kind,
|
|
251
|
-
name: entity.name,
|
|
252
|
-
file_path: relPath,
|
|
253
|
-
start_line: entity.line_start,
|
|
254
|
-
end_line: entity.line_end,
|
|
255
|
-
signature: entity.signature,
|
|
256
|
-
body: "", // Skip body storage for indexing performance
|
|
257
|
-
fan_in: 0,
|
|
258
|
-
fan_out: 0,
|
|
259
|
-
risk_level: "normal",
|
|
260
|
-
community: -1,
|
|
261
|
-
is_test: entity.is_test ?? false,
|
|
262
|
-
parent_class: entity.parent_class,
|
|
263
|
-
});
|
|
264
|
-
}
|
|
265
|
-
// Extract edges (tag with source file for cross-file resolution)
|
|
266
|
-
const edges = await extractEdgesAsync(content, relPath, entities);
|
|
267
|
-
for (const edge of edges) {
|
|
268
|
-
allRawEdges.push({ ...edge, source_file: relPath });
|
|
269
|
-
}
|
|
270
|
-
filesProcessed++;
|
|
271
|
-
log.verbose(`${relPath}: ${entities.length} entities, ${edges.length} edges`, opts?.verbose);
|
|
272
|
-
opts?.onFileIndexed?.(relPath, entities.length, edges.length);
|
|
273
|
-
}
|
|
274
|
-
// Phase 4: Cross-file edge resolution
|
|
275
|
-
progress?.({
|
|
276
|
-
processed: filesProcessed,
|
|
277
|
-
total: files.length,
|
|
278
|
-
phase: "resolving",
|
|
279
|
-
currentFile: null,
|
|
280
|
-
});
|
|
281
|
-
const entityByName = buildEntityNameIndex(allEntities);
|
|
282
|
-
const { edges: resolvedEdges, fileImportEdges } = resolveEdges(allRawEdges, entityByName, allEntities, repoId, fileEntityMap);
|
|
283
|
-
// Phase 4.5: SCIP enrichment (inline — adds cross-file edges tree-sitter missed)
|
|
284
|
-
progress?.({
|
|
285
|
-
processed: filesProcessed,
|
|
286
|
-
total: files.length,
|
|
287
|
-
phase: "scip",
|
|
288
|
-
currentFile: null,
|
|
289
|
-
});
|
|
290
|
-
const relativeFiles = files.map((f) => relative(projectRoot, f));
|
|
291
|
-
const scipResult = await enrichWithScip(relativeFiles, projectRoot, resolvedEdges.map((e) => ({
|
|
292
|
-
from_key: e.from_key,
|
|
293
|
-
to_key: e.to_key,
|
|
294
|
-
type: e.type,
|
|
295
|
-
file_path: "",
|
|
296
|
-
line: 0,
|
|
297
|
-
})), allEntities.map((e) => ({
|
|
298
|
-
key: e.key,
|
|
299
|
-
name: e.name,
|
|
300
|
-
file_path: e.file_path,
|
|
301
|
-
})));
|
|
302
|
-
if (scipResult.mergeResult) {
|
|
303
|
-
log.info(`SCIP: ${scipResult.mergeResult.edgesUpgraded} edges verified, ${scipResult.mergeResult.newEdgesFromScip} new edges added (${scipResult.language})`);
|
|
304
|
-
}
|
|
305
|
-
// Phase 5: Compute fan_in, fan_out, risk_level
|
|
306
|
-
computeDerivedFields(allEntities, resolvedEdges);
|
|
307
|
-
// Phase 5.1: R.11 — Create "tests" edges (test entity → source entity it exercises)
|
|
308
|
-
const testEdges = resolveTestEdges(allEntities, resolvedEdges, fileImportEdges);
|
|
309
|
-
if (testEdges.length > 0) {
|
|
310
|
-
resolvedEdges.push(...testEdges);
|
|
311
|
-
log.info(`Test graph: ${testEdges.length} test→source edges created`);
|
|
312
|
-
}
|
|
313
|
-
// Phase 5.5: R.4 — Compute file→file co-change edges from git history
|
|
314
|
-
const coChangeEdges = computeCoChangeEdges(projectRoot);
|
|
315
|
-
const coChangeCompactEdges = coChangeEdges.map((e) => ({
|
|
316
|
-
from_key: `file:${e.from_file}`,
|
|
317
|
-
to_key: `file:${e.to_file}`,
|
|
318
|
-
type: "co_changes",
|
|
319
|
-
}));
|
|
320
|
-
// Phase 6: Populate CozoDB (includes R.1 file entities, R.2 contains edges)
|
|
321
|
-
progress?.({
|
|
322
|
-
processed: filesProcessed,
|
|
323
|
-
total: files.length,
|
|
324
|
-
phase: "populating",
|
|
325
|
-
currentFile: null,
|
|
326
|
-
});
|
|
327
|
-
// R.3 + R.4: Combine code edges with file-level import + co-change edges
|
|
328
|
-
const allEdges = [
|
|
329
|
-
...resolvedEdges,
|
|
330
|
-
...fileImportEdges,
|
|
331
|
-
...coChangeCompactEdges,
|
|
332
|
-
];
|
|
333
|
-
await populateCozoDB(graphStore, allEntities, allEdges);
|
|
334
|
-
// Phase 6.1: Index document files for search discoverability
|
|
335
|
-
progress?.({
|
|
336
|
-
processed: filesProcessed,
|
|
337
|
-
total: files.length,
|
|
338
|
-
phase: "documents",
|
|
339
|
-
currentFile: null,
|
|
340
|
-
});
|
|
341
|
-
const docKeys = await indexDocumentFiles(projectRoot, graphStore);
|
|
342
|
-
// Phase 6.2: Remove orphaned entities not present in this index run.
|
|
343
|
-
// This handles deleted files/entities — the graph was NOT cleared upfront (to remain
|
|
344
|
-
// queryable during reindex), so stale entries must be pruned after fresh data is in place.
|
|
345
|
-
for (const e of allEntities)
|
|
346
|
-
indexedEntityKeys.add(e.key);
|
|
347
|
-
// Also include file-level entities
|
|
348
|
-
for (const e of allEntities) {
|
|
349
|
-
if (e.file_path)
|
|
350
|
-
indexedEntityKeys.add(`file:${e.file_path}`);
|
|
351
|
-
}
|
|
352
|
-
// Include doc entities so they aren't pruned as orphans
|
|
353
|
-
for (const dk of docKeys)
|
|
354
|
-
indexedEntityKeys.add(dk);
|
|
355
|
-
await removeOrphanedEntities(graphStore, indexedEntityKeys);
|
|
356
|
-
// Phase 6.3: Clear stale drift overlay/edges. The baseline `entities` and `edges`
|
|
357
|
-
// relations have just been refreshed, so any pre-existing drift_overlay /
|
|
358
|
-
// drift_edges rows describe deltas against the OLD baseline and would
|
|
359
|
-
// incorrectly mark freshly-indexed entities as modified (issue #8).
|
|
360
|
-
await graphStore.clearDriftOverlay();
|
|
361
|
-
// Phase 6.5: Materialize L1 edges (file→file, class→class weighted aggregates)
|
|
362
|
-
await materializeL1Edges(graphStore);
|
|
363
|
-
// Phase 7: Community detection (handled inside CozoGraphStore)
|
|
364
|
-
progress?.({
|
|
365
|
-
processed: filesProcessed,
|
|
366
|
-
total: files.length,
|
|
367
|
-
phase: "communities",
|
|
368
|
-
currentFile: null,
|
|
369
|
-
});
|
|
370
|
-
const communityCount = await runCommunityDetection(graphStore);
|
|
371
|
-
// Phase 8: Convention detection + rule generation (L6)
|
|
372
|
-
progress?.({
|
|
373
|
-
processed: filesProcessed,
|
|
374
|
-
total: files.length,
|
|
375
|
-
phase: "conventions",
|
|
376
|
-
currentFile: null,
|
|
377
|
-
});
|
|
378
|
-
const { patternCount, ruleCount } = await runConventionDetection(graphStore, repoId);
|
|
379
|
-
// Phase 9: Build search index
|
|
380
|
-
progress?.({
|
|
381
|
-
processed: filesProcessed,
|
|
382
|
-
total: files.length,
|
|
383
|
-
phase: "search",
|
|
384
|
-
currentFile: null,
|
|
385
|
-
});
|
|
386
|
-
await buildSearchIndex(graphStore.db);
|
|
387
|
-
// Phase 10: Persist local snapshot for fast subsequent boots
|
|
388
|
-
progress?.({
|
|
389
|
-
processed: filesProcessed,
|
|
390
|
-
total: files.length,
|
|
391
|
-
phase: "snapshot",
|
|
392
|
-
currentFile: null,
|
|
393
|
-
});
|
|
394
|
-
await persistLocalSnapshot(projectRoot, repoId, allEntities, resolvedEdges);
|
|
395
|
-
const elapsedMs = Date.now() - startTime;
|
|
396
|
-
log.info(`Indexed ${files.length} files → ${allEntities.length} entities, ${resolvedEdges.length} edges, ${docKeys.length} docs in ${elapsedMs}ms`);
|
|
397
|
-
return {
|
|
398
|
-
fileCount: files.length,
|
|
399
|
-
entityCount: allEntities.length,
|
|
400
|
-
edgeCount: resolvedEdges.length,
|
|
401
|
-
communityCount,
|
|
402
|
-
patternCount,
|
|
403
|
-
ruleCount,
|
|
404
|
-
docCount: docKeys.length,
|
|
405
|
-
elapsedMs,
|
|
406
|
-
scip: scipResult.mergeResult
|
|
407
|
-
? {
|
|
408
|
-
language: scipResult.language,
|
|
409
|
-
edgesVerified: scipResult.mergeResult.edgesUpgraded,
|
|
410
|
-
newEdges: scipResult.mergeResult.newEdgesFromScip,
|
|
411
|
-
durationMs: scipResult.runResult?.durationMs ?? 0,
|
|
412
|
-
}
|
|
413
|
-
: undefined,
|
|
414
|
-
};
|
|
415
|
-
}
|
|
416
|
-
/**
|
|
417
|
-
* Re-index a single file incrementally (Sprint L2.5).
|
|
418
|
-
* Removes old entities for the file, extracts new ones, upserts into CozoDB.
|
|
419
|
-
*/
|
|
420
|
-
export async function reindexFile(projectRoot, filePath, graphStore, repoId) {
|
|
421
|
-
const relPath = filePath.startsWith("/")
|
|
422
|
-
? relative(projectRoot, filePath)
|
|
423
|
-
: filePath;
|
|
424
|
-
const absPath = filePath.startsWith("/")
|
|
425
|
-
? filePath
|
|
426
|
-
: join(projectRoot, filePath);
|
|
427
|
-
// Remove old entities for this file
|
|
428
|
-
await removeFileEntities(graphStore, relPath);
|
|
429
|
-
// Read and extract
|
|
430
|
-
let content;
|
|
431
|
-
try {
|
|
432
|
-
content = readFileSync(absPath, "utf-8");
|
|
433
|
-
}
|
|
434
|
-
catch {
|
|
435
|
-
return { entities: 0, edges: 0 };
|
|
436
|
-
}
|
|
437
|
-
const entities = await extractEntitiesAsync(content, relPath);
|
|
438
|
-
// is_test now comes from the AST extractor (test-primitive callbacks) only.
|
|
439
|
-
// Top-level helpers in test files are no longer auto-flagged. See the main
|
|
440
|
-
// batch indexer for full rationale.
|
|
441
|
-
const compactEntities = entities.map((e) => ({
|
|
442
|
-
key: entityKey(repoId, relPath, e.kind, e.name, e.signature),
|
|
443
|
-
kind: e.kind,
|
|
444
|
-
name: e.name,
|
|
445
|
-
file_path: relPath,
|
|
446
|
-
start_line: e.line_start,
|
|
447
|
-
end_line: e.line_end,
|
|
448
|
-
signature: e.signature,
|
|
449
|
-
body: "",
|
|
450
|
-
fan_in: 0,
|
|
451
|
-
fan_out: 0,
|
|
452
|
-
risk_level: "normal",
|
|
453
|
-
community: -1,
|
|
454
|
-
is_test: e.is_test ?? false,
|
|
455
|
-
parent_class: e.parent_class,
|
|
456
|
-
}));
|
|
457
|
-
const rawEdges = await extractEdgesAsync(content, relPath, entities);
|
|
458
|
-
// Simple name-based edge resolution for single-file re-index
|
|
459
|
-
const resolvedEdges = [];
|
|
460
|
-
for (const edge of rawEdges) {
|
|
461
|
-
const fromKey = resolveEntityName(edge.from_name, relPath, repoId, entities);
|
|
462
|
-
const toKey = await resolveEntityNameGlobal(edge.to_name, graphStore);
|
|
463
|
-
if (fromKey && toKey) {
|
|
464
|
-
resolvedEdges.push({ from_key: fromKey, to_key: toKey, type: edge.type });
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
// Insert new entities and edges
|
|
468
|
-
for (const entity of compactEntities) {
|
|
469
|
-
await insertEntity(graphStore, entity);
|
|
470
|
-
}
|
|
471
|
-
for (const edge of resolvedEdges) {
|
|
472
|
-
await insertEdge(graphStore, edge);
|
|
473
|
-
}
|
|
474
|
-
// Rebuild search index for changed entities
|
|
475
|
-
await buildSearchIndex(graphStore.db);
|
|
476
|
-
return { entities: compactEntities.length, edges: resolvedEdges.length };
|
|
477
|
-
}
|
|
478
|
-
// ── Phase 1: File Discovery ──────────────────────────────────────
|
|
479
|
-
/** Walk project directory and collect indexable source files. */
|
|
480
|
-
export function discoverSourceFiles(projectRoot) {
|
|
481
|
-
const files = [];
|
|
482
|
-
walkDir(projectRoot, files, projectRoot);
|
|
483
|
-
return files;
|
|
484
|
-
}
|
|
485
|
-
function walkDir(dir, files, projectRoot) {
|
|
486
|
-
let entries;
|
|
487
|
-
try {
|
|
488
|
-
entries = readdirSync(dir);
|
|
489
|
-
}
|
|
490
|
-
catch {
|
|
491
|
-
return;
|
|
492
|
-
}
|
|
493
|
-
for (const entry of entries) {
|
|
494
|
-
if (EXCLUDED_DIRS.has(entry))
|
|
495
|
-
continue;
|
|
496
|
-
if (entry.startsWith(".") && entry !== ".")
|
|
497
|
-
continue;
|
|
498
|
-
const fullPath = join(dir, entry);
|
|
499
|
-
// Path-aware exclusion (e.g. `.claude/worktrees/`) — defense-in-depth even
|
|
500
|
-
// when a parent dir might pass dot-skip via a CI/CD whitelist exception.
|
|
501
|
-
const relPath = relative(projectRoot, fullPath);
|
|
502
|
-
if (isExcludedPath(relPath))
|
|
503
|
-
continue;
|
|
504
|
-
let stat;
|
|
505
|
-
try {
|
|
506
|
-
stat = statSync(fullPath);
|
|
507
|
-
}
|
|
508
|
-
catch {
|
|
509
|
-
continue;
|
|
510
|
-
}
|
|
511
|
-
if (stat.isDirectory()) {
|
|
512
|
-
walkDir(fullPath, files, projectRoot);
|
|
513
|
-
}
|
|
514
|
-
else if (stat.isFile()) {
|
|
515
|
-
const ext = extname(entry).toLowerCase();
|
|
516
|
-
if (INDEXABLE_EXTENSIONS.has(ext) && stat.size <= MAX_FILE_SIZE) {
|
|
517
|
-
files.push(fullPath);
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
}
|
|
522
|
-
// ── Phase 4: Cross-File Edge Resolution ──────────────────────────
|
|
523
|
-
/** Build a name → entity key index for cross-file resolution. */
|
|
524
|
-
function buildEntityNameIndex(entities) {
|
|
525
|
-
const index = new Map();
|
|
526
|
-
for (const entity of entities) {
|
|
527
|
-
const baseName = entity.name.split(".").pop() ?? entity.name;
|
|
528
|
-
const list = index.get(baseName) ?? [];
|
|
529
|
-
list.push(entity);
|
|
530
|
-
index.set(baseName, list);
|
|
531
|
-
// Also index by full name
|
|
532
|
-
if (entity.name !== baseName) {
|
|
533
|
-
const fullList = index.get(entity.name) ?? [];
|
|
534
|
-
fullList.push(entity);
|
|
535
|
-
index.set(entity.name, fullList);
|
|
536
|
-
}
|
|
537
|
-
}
|
|
538
|
-
return index;
|
|
539
|
-
}
|
|
540
|
-
function resolveEdges(rawEdges, entityByName, allEntities, repoId, fileEntityMap) {
|
|
541
|
-
const resolved = [];
|
|
542
|
-
const seen = new Set();
|
|
543
|
-
// R.3: Track file→file import pairs for deduplication
|
|
544
|
-
const fileImportPairs = new Set();
|
|
545
|
-
// Build file path index for import source resolution (all languages)
|
|
546
|
-
const allFilePaths = new Set();
|
|
547
|
-
for (const entity of allEntities) {
|
|
548
|
-
if (entity.file_path)
|
|
549
|
-
allFilePaths.add(entity.file_path);
|
|
550
|
-
}
|
|
551
|
-
for (const edge of rawEdges) {
|
|
552
|
-
// R.3 (all languages): file-level import edges → resolve to file→file
|
|
553
|
-
if (edge.from_name === "__file__" &&
|
|
554
|
-
edge.type === "imports" &&
|
|
555
|
-
edge.import_source) {
|
|
556
|
-
const sourceFile = edge.source_file;
|
|
557
|
-
const targetFile = resolveImportSourceToFile(edge.import_source, sourceFile, allFilePaths);
|
|
558
|
-
if (targetFile && targetFile !== sourceFile) {
|
|
559
|
-
fileImportPairs.add(`${sourceFile}\0${targetFile}`);
|
|
560
|
-
}
|
|
561
|
-
continue;
|
|
562
|
-
}
|
|
563
|
-
// Resolve target by name
|
|
564
|
-
const targets = entityByName.get(edge.to_name);
|
|
565
|
-
if (!targets || targets.length === 0)
|
|
566
|
-
continue;
|
|
567
|
-
// Pick best target (prefer same import_source path, else first match)
|
|
568
|
-
const target = targets.length === 1
|
|
569
|
-
? targets[0]
|
|
570
|
-
: (pickBestTarget(targets, edge.import_source) ?? targets[0]);
|
|
571
|
-
// Resolve source
|
|
572
|
-
if (edge.from_name === "__file__") {
|
|
573
|
-
// For test files, file-scope calls (inside describe/it) should still create
|
|
574
|
-
// edges from the file module entity — this enables "tests" edge resolution
|
|
575
|
-
if (edge.type === "calls" && isTestFile(edge.source_file)) {
|
|
576
|
-
const fromKey = `file:${edge.source_file}`;
|
|
577
|
-
const edgeKey = `${fromKey}:${target.key}:${edge.type}`;
|
|
578
|
-
if (seen.has(edgeKey))
|
|
579
|
-
continue;
|
|
580
|
-
seen.add(edgeKey);
|
|
581
|
-
resolved.push({
|
|
582
|
-
from_key: fromKey,
|
|
583
|
-
to_key: target.key,
|
|
584
|
-
type: edge.type,
|
|
585
|
-
});
|
|
586
|
-
// Also record file→file import pair for the file graph
|
|
587
|
-
const targetFile = target.file_path;
|
|
588
|
-
if (targetFile && targetFile !== edge.source_file) {
|
|
589
|
-
fileImportPairs.add(`${edge.source_file}\0${targetFile}`);
|
|
590
|
-
}
|
|
591
|
-
}
|
|
592
|
-
continue;
|
|
593
|
-
}
|
|
594
|
-
const sources = entityByName.get(edge.from_name);
|
|
595
|
-
if (!sources || sources.length === 0)
|
|
596
|
-
continue;
|
|
597
|
-
// Prefer same-file match, then non-class over class (method call should resolve to method, not class)
|
|
598
|
-
const from = sources.length === 1
|
|
599
|
-
? sources[0]
|
|
600
|
-
: (sources.find((s) => s.file_path === edge.source_file && s.kind !== "class") ??
|
|
601
|
-
sources.find((s) => s.file_path === edge.source_file) ??
|
|
602
|
-
sources.find((s) => s.kind !== "class") ??
|
|
603
|
-
sources[0]);
|
|
604
|
-
const edgeKey = `${from.key}:${target.key}:${edge.type}`;
|
|
605
|
-
if (seen.has(edgeKey))
|
|
606
|
-
continue;
|
|
607
|
-
seen.add(edgeKey);
|
|
608
|
-
resolved.push({
|
|
609
|
-
from_key: from.key,
|
|
610
|
-
to_key: target.key,
|
|
611
|
-
type: edge.type,
|
|
612
|
-
});
|
|
613
|
-
// R.3: If this is a cross-file edge, record a file→file import
|
|
614
|
-
const sourceFile = from.file_path;
|
|
615
|
-
const targetFile = target.file_path;
|
|
616
|
-
if (sourceFile && targetFile && sourceFile !== targetFile) {
|
|
617
|
-
fileImportPairs.add(`${sourceFile}\0${targetFile}`);
|
|
618
|
-
}
|
|
619
|
-
}
|
|
620
|
-
// R.3: Build deduplicated file→file import edges
|
|
621
|
-
const fileImportEdges = Array.from(fileImportPairs, (pair) => {
|
|
622
|
-
const [from, to] = pair.split("\0");
|
|
623
|
-
return { from_key: `file:${from}`, to_key: `file:${to}`, type: "imports" };
|
|
624
|
-
});
|
|
625
|
-
return { edges: resolved, fileImportEdges };
|
|
626
|
-
}
|
|
627
|
-
/**
|
|
628
|
-
* Resolve a language-native import source to an actual project file path.
|
|
629
|
-
*
|
|
630
|
-
* Supports all languages:
|
|
631
|
-
* - TS/JS: "./module" or "../utils/helper" → relative path resolution
|
|
632
|
-
* - Python: "os.path" or ".models" → dotted module → slash path
|
|
633
|
-
* - Go: "github.com/user/repo/pkg" → match last segments against project files
|
|
634
|
-
* - Java: "com.example.Foo" → dot→slash path
|
|
635
|
-
* - Rust: "crate::module::sub" → "src/module/sub.rs" or "src/module/sub/mod.rs"
|
|
636
|
-
* - Ruby: "path/to/file" → direct path match
|
|
637
|
-
* - C#: "System.Collections.Generic" → dot→slash path
|
|
638
|
-
*
|
|
639
|
-
* Returns the matched file path (relative), or null if unresolvable (external dep).
|
|
640
|
-
* @internal Exported for testing.
|
|
641
|
-
*/
|
|
642
|
-
export function resolveImportSourceToFile(importSource, sourceFile, projectFiles) {
|
|
643
|
-
// Skip empty or clearly external imports
|
|
644
|
-
if (!importSource || importSource.length === 0)
|
|
645
|
-
return null;
|
|
646
|
-
// ── TS/JS relative imports ──
|
|
647
|
-
if (importSource.startsWith(".") || importSource.startsWith("/")) {
|
|
648
|
-
return resolveRelativeImport(importSource, sourceFile, projectFiles);
|
|
649
|
-
}
|
|
650
|
-
// ── Python dotted imports (relative with leading dots) ──
|
|
651
|
-
if (importSource.startsWith("..")) {
|
|
652
|
-
// Relative Python import: "..models" → go up directories
|
|
653
|
-
const dots = importSource.match(/^(\.+)/)?.[1]?.length ?? 0;
|
|
654
|
-
const remainder = importSource.slice(dots).replace(/\./g, "/");
|
|
655
|
-
const parts = sourceFile.split("/");
|
|
656
|
-
const base = parts.slice(0, -dots).join("/");
|
|
657
|
-
return matchFileCandidates(base ? `${base}/${remainder}` : remainder, projectFiles, ["py"]);
|
|
658
|
-
}
|
|
659
|
-
// ── Language detection based on source file extension ──
|
|
660
|
-
const ext = extname(sourceFile).toLowerCase();
|
|
661
|
-
switch (ext) {
|
|
662
|
-
case ".ts":
|
|
663
|
-
case ".tsx":
|
|
664
|
-
case ".js":
|
|
665
|
-
case ".jsx":
|
|
666
|
-
case ".mjs":
|
|
667
|
-
case ".cjs":
|
|
668
|
-
// Bare specifier (npm package) — skip external
|
|
669
|
-
return null;
|
|
670
|
-
case ".py": {
|
|
671
|
-
// Python: dotted.module.path → slash path
|
|
672
|
-
const pyPath = importSource.replace(/\./g, "/");
|
|
673
|
-
return matchFileCandidates(pyPath, projectFiles, ["py"]);
|
|
674
|
-
}
|
|
675
|
-
case ".go": {
|
|
676
|
-
// Go: full package path — match trailing segments against project files
|
|
677
|
-
// Internal imports contain the module path; match by last 1-3 segments
|
|
678
|
-
const segments = importSource.split("/");
|
|
679
|
-
for (let i = Math.max(0, segments.length - 3); i < segments.length; i++) {
|
|
680
|
-
const suffix = segments.slice(i).join("/");
|
|
681
|
-
// Go packages are directories — look for any .go file in that dir
|
|
682
|
-
for (const fp of projectFiles) {
|
|
683
|
-
if (fp.endsWith(".go") && fp.includes(`${suffix}/`)) {
|
|
684
|
-
// Return the directory's first file as representative
|
|
685
|
-
return fp;
|
|
686
|
-
}
|
|
687
|
-
// Also match if it's the directory itself with a file inside
|
|
688
|
-
const dir = fp.substring(0, fp.lastIndexOf("/"));
|
|
689
|
-
if (dir.endsWith(suffix))
|
|
690
|
-
return fp;
|
|
691
|
-
}
|
|
692
|
-
}
|
|
693
|
-
return null;
|
|
694
|
-
}
|
|
695
|
-
case ".java": {
|
|
696
|
-
// Java: com.example.Foo → com/example/Foo.java
|
|
697
|
-
const javaPath = importSource.replace(/\./g, "/");
|
|
698
|
-
return matchFileCandidates(javaPath, projectFiles, ["java"]);
|
|
699
|
-
}
|
|
700
|
-
case ".rs": {
|
|
701
|
-
// Rust: crate::module::sub → src/module/sub.rs or src/module/sub/mod.rs
|
|
702
|
-
const rustPath = importSource
|
|
703
|
-
.replace(/^crate::/, "src/")
|
|
704
|
-
.replace(/^self::/, sourceFile.replace(/\/[^/]+$/, "/"))
|
|
705
|
-
.replace(/^super::/, sourceFile.replace(/\/[^/]+\/[^/]+$/, "/"))
|
|
706
|
-
.replace(/::/g, "/");
|
|
707
|
-
return matchFileCandidates(rustPath, projectFiles, ["rs"]);
|
|
708
|
-
}
|
|
709
|
-
case ".rb": {
|
|
710
|
-
// Ruby: require paths are often relative file paths
|
|
711
|
-
return matchFileCandidates(importSource, projectFiles, ["rb"]);
|
|
712
|
-
}
|
|
713
|
-
case ".cs": {
|
|
714
|
-
// C#: namespace.Type → namespace/Type.cs
|
|
715
|
-
const csPath = importSource.replace(/\./g, "/");
|
|
716
|
-
return matchFileCandidates(csPath, projectFiles, ["cs"]);
|
|
717
|
-
}
|
|
718
|
-
default:
|
|
719
|
-
return null;
|
|
720
|
-
}
|
|
721
|
-
}
|
|
722
|
-
/** Resolve TS/JS relative import to a project file. */
|
|
723
|
-
function resolveRelativeImport(importSource, sourceFile, projectFiles) {
|
|
724
|
-
const sourceDir = sourceFile.substring(0, sourceFile.lastIndexOf("/"));
|
|
725
|
-
const candidates = [];
|
|
726
|
-
// Normalize the relative path
|
|
727
|
-
const parts = `${sourceDir}/${importSource.replace(/^\.\//, "")}`.split("/");
|
|
728
|
-
const resolved = [];
|
|
729
|
-
for (const p of parts) {
|
|
730
|
-
if (p === "..")
|
|
731
|
-
resolved.pop();
|
|
732
|
-
else if (p !== "." && p !== "")
|
|
733
|
-
resolved.push(p);
|
|
734
|
-
}
|
|
735
|
-
const base = resolved.join("/");
|
|
736
|
-
// NodeNext: .js in source maps to .ts on disk — strip known extensions first
|
|
737
|
-
const jsExtMatch = base.match(/\.(js|jsx|mjs|cjs)$/);
|
|
738
|
-
const baseNoExt = jsExtMatch ? base.slice(0, -jsExtMatch[0].length) : base;
|
|
739
|
-
// Try with extensions (prefer stripped base for NodeNext .js→.ts mapping)
|
|
740
|
-
const tsExts = ["ts", "tsx", "js", "jsx", "mjs", "cjs"];
|
|
741
|
-
if (jsExtMatch) {
|
|
742
|
-
// .js import → try .ts, .tsx first
|
|
743
|
-
for (const e of tsExts) {
|
|
744
|
-
candidates.push(`${baseNoExt}.${e}`);
|
|
745
|
-
}
|
|
746
|
-
// Also try as directory index
|
|
747
|
-
for (const e of tsExts) {
|
|
748
|
-
candidates.push(`${baseNoExt}/index.${e}`);
|
|
749
|
-
}
|
|
750
|
-
}
|
|
751
|
-
for (const e of tsExts) {
|
|
752
|
-
candidates.push(`${base}.${e}`);
|
|
753
|
-
}
|
|
754
|
-
// Try as directory index
|
|
755
|
-
for (const e of tsExts) {
|
|
756
|
-
candidates.push(`${base}/index.${e}`);
|
|
757
|
-
}
|
|
758
|
-
// Already has extension
|
|
759
|
-
candidates.push(base);
|
|
760
|
-
for (const c of candidates) {
|
|
761
|
-
if (projectFiles.has(c))
|
|
762
|
-
return c;
|
|
763
|
-
}
|
|
764
|
-
return null;
|
|
765
|
-
}
|
|
766
|
-
/**
|
|
767
|
-
* Match a resolved path (without extension) against project files.
|
|
768
|
-
* Tries exact match, then with common extensions, then as directory index.
|
|
769
|
-
*/
|
|
770
|
-
function matchFileCandidates(basePath, projectFiles, extensions) {
|
|
771
|
-
if (!basePath)
|
|
772
|
-
return null;
|
|
773
|
-
// Direct match (already has extension or is exact)
|
|
774
|
-
if (projectFiles.has(basePath))
|
|
775
|
-
return basePath;
|
|
776
|
-
// With extension
|
|
777
|
-
for (const ext of extensions) {
|
|
778
|
-
const withExt = `${basePath}.${ext}`;
|
|
779
|
-
if (projectFiles.has(withExt))
|
|
780
|
-
return withExt;
|
|
781
|
-
}
|
|
782
|
-
// As directory with index/mod file
|
|
783
|
-
const indexNames = extensions.includes("py")
|
|
784
|
-
? ["__init__"]
|
|
785
|
-
: extensions.includes("rs")
|
|
786
|
-
? ["mod"]
|
|
787
|
-
: ["index"];
|
|
788
|
-
for (const idx of indexNames) {
|
|
789
|
-
for (const ext of extensions) {
|
|
790
|
-
const asDir = `${basePath}/${idx}.${ext}`;
|
|
791
|
-
if (projectFiles.has(asDir))
|
|
792
|
-
return asDir;
|
|
793
|
-
}
|
|
794
|
-
}
|
|
795
|
-
// Suffix match: import might not include full path prefix
|
|
796
|
-
// e.g., "models.user" → "app/models/user.py"
|
|
797
|
-
for (const ext of extensions) {
|
|
798
|
-
const suffix = `/${basePath}.${ext}`;
|
|
799
|
-
for (const fp of projectFiles) {
|
|
800
|
-
if (fp.endsWith(suffix))
|
|
801
|
-
return fp;
|
|
802
|
-
}
|
|
803
|
-
}
|
|
804
|
-
// Suffix match for directory index (e.g., "models" → "app/models/__init__.py")
|
|
805
|
-
for (const idx of indexNames) {
|
|
806
|
-
for (const ext of extensions) {
|
|
807
|
-
const dirSuffix = `/${basePath}/${idx}.${ext}`;
|
|
808
|
-
for (const fp of projectFiles) {
|
|
809
|
-
if (fp.endsWith(dirSuffix))
|
|
810
|
-
return fp;
|
|
811
|
-
}
|
|
812
|
-
}
|
|
813
|
-
}
|
|
814
|
-
return null;
|
|
815
|
-
}
|
|
816
|
-
/** Pick the best matching entity when multiple share the same name. */
|
|
817
|
-
function pickBestTarget(targets, importSource) {
|
|
818
|
-
if (!importSource || targets.length <= 1)
|
|
819
|
-
return targets[0] ?? null;
|
|
820
|
-
// Prefer target whose file_path contains the import source path
|
|
821
|
-
const normalized = importSource
|
|
822
|
-
.replace(/^\.\//, "")
|
|
823
|
-
.replace(/\.(ts|js|tsx|jsx|mjs|cjs)$/, "");
|
|
824
|
-
for (const t of targets) {
|
|
825
|
-
const tPath = t.file_path.replace(/\.(ts|js|tsx|jsx|mjs|cjs)$/, "");
|
|
826
|
-
if (tPath.endsWith(normalized) || tPath.includes(normalized)) {
|
|
827
|
-
return t;
|
|
828
|
-
}
|
|
829
|
-
}
|
|
830
|
-
return targets[0] ?? null;
|
|
831
|
-
}
|
|
832
|
-
// ── Phase 5.1: Test Edge Resolution ─────────────────────────────
|
|
833
|
-
/**
|
|
834
|
-
* Create "tests" edges connecting test entities to the source entities they exercise.
|
|
835
|
-
*
|
|
836
|
-
* Strategy: For each test entity that has outbound "calls" edges to non-test entities,
|
|
837
|
-
* create a "tests" edge to each called source entity in the same file stem (subject file).
|
|
838
|
-
* Falls back to all called non-test entities if no subject file match.
|
|
839
|
-
*/
|
|
840
|
-
function resolveTestEdges(entities, edges, fileImportEdges = []) {
|
|
841
|
-
const testEdges = [];
|
|
842
|
-
const entityMap = new Map();
|
|
843
|
-
for (const e of entities)
|
|
844
|
-
entityMap.set(e.key, e);
|
|
845
|
-
// Build set of project files for subject resolution
|
|
846
|
-
const projectFiles = new Set();
|
|
847
|
-
for (const e of entities) {
|
|
848
|
-
if (e.file_path)
|
|
849
|
-
projectFiles.add(e.file_path);
|
|
850
|
-
}
|
|
851
|
-
// Group test entities by file
|
|
852
|
-
const testEntities = entities.filter((e) => e.is_test);
|
|
853
|
-
if (testEntities.length === 0)
|
|
854
|
-
return testEdges;
|
|
855
|
-
// Build outbound calls index: source_key → Set<target_key>
|
|
856
|
-
const callsFrom = new Map();
|
|
857
|
-
for (const edge of edges) {
|
|
858
|
-
if (edge.type === "calls") {
|
|
859
|
-
let targets = callsFrom.get(edge.from_key);
|
|
860
|
-
if (!targets) {
|
|
861
|
-
targets = new Set();
|
|
862
|
-
callsFrom.set(edge.from_key, targets);
|
|
863
|
-
}
|
|
864
|
-
targets.add(edge.to_key);
|
|
865
|
-
}
|
|
866
|
-
}
|
|
867
|
-
// Build file→file import index from file-level import edges (file:A → file:B)
|
|
868
|
-
const fileImportsFrom = new Map(); // file → Set<imported file>
|
|
869
|
-
for (const edge of fileImportEdges) {
|
|
870
|
-
if (edge.type === "imports" && edge.from_key.startsWith("file:")) {
|
|
871
|
-
const fromFile = edge.from_key.slice(5); // strip "file:" prefix
|
|
872
|
-
const toFile = edge.to_key.startsWith("file:")
|
|
873
|
-
? edge.to_key.slice(5)
|
|
874
|
-
: edge.to_key;
|
|
875
|
-
let targets = fileImportsFrom.get(fromFile);
|
|
876
|
-
if (!targets) {
|
|
877
|
-
targets = new Set();
|
|
878
|
-
fileImportsFrom.set(fromFile, targets);
|
|
879
|
-
}
|
|
880
|
-
targets.add(toFile);
|
|
881
|
-
}
|
|
882
|
-
}
|
|
883
|
-
// Build file → non-test entities index for import-based fallback
|
|
884
|
-
const sourceEntitiesByFile = new Map();
|
|
885
|
-
for (const e of entities) {
|
|
886
|
-
if (e.is_test || !e.file_path)
|
|
887
|
-
continue;
|
|
888
|
-
const list = sourceEntitiesByFile.get(e.file_path) ?? [];
|
|
889
|
-
list.push(e);
|
|
890
|
-
sourceEntitiesByFile.set(e.file_path, list);
|
|
891
|
-
}
|
|
892
|
-
// Group test entities by file for fallback resolution
|
|
893
|
-
const testEntitiesByFile = new Map();
|
|
894
|
-
for (const te of testEntities) {
|
|
895
|
-
if (!te.file_path)
|
|
896
|
-
continue;
|
|
897
|
-
const list = testEntitiesByFile.get(te.file_path) ?? [];
|
|
898
|
-
list.push(te);
|
|
899
|
-
testEntitiesByFile.set(te.file_path, list);
|
|
900
|
-
}
|
|
901
|
-
// For each test entity, find source entities it calls
|
|
902
|
-
const seen = new Set();
|
|
903
|
-
const filesWithCallEdges = new Set(); // track which test files got call-based edges
|
|
904
|
-
for (const testEntity of testEntities) {
|
|
905
|
-
const targets = callsFrom.get(testEntity.key);
|
|
906
|
-
if (!targets)
|
|
907
|
-
continue;
|
|
908
|
-
for (const targetKey of targets) {
|
|
909
|
-
const target = entityMap.get(targetKey);
|
|
910
|
-
// Only create tests edge to non-test entities
|
|
911
|
-
if (!target || target.is_test)
|
|
912
|
-
continue;
|
|
913
|
-
const edgeKey = `${testEntity.key}→${targetKey}`;
|
|
914
|
-
if (seen.has(edgeKey))
|
|
915
|
-
continue;
|
|
916
|
-
seen.add(edgeKey);
|
|
917
|
-
testEdges.push({
|
|
918
|
-
from_key: testEntity.key,
|
|
919
|
-
to_key: targetKey,
|
|
920
|
-
type: "tests",
|
|
921
|
-
});
|
|
922
|
-
if (testEntity.file_path)
|
|
923
|
-
filesWithCallEdges.add(testEntity.file_path);
|
|
924
|
-
}
|
|
925
|
-
}
|
|
926
|
-
// Also check file-module-level calls (file:<path> → target) for test files.
|
|
927
|
-
// These come from file-scope calls (inside describe/it blocks) that were resolved
|
|
928
|
-
// with the file module entity as source.
|
|
929
|
-
for (const [testFile, testEnts] of testEntitiesByFile) {
|
|
930
|
-
if (filesWithCallEdges.has(testFile))
|
|
931
|
-
continue; // already resolved via entity-level calls
|
|
932
|
-
const fileModuleKey = `file:${testFile}`;
|
|
933
|
-
const fileTargets = callsFrom.get(fileModuleKey);
|
|
934
|
-
if (!fileTargets)
|
|
935
|
-
continue;
|
|
936
|
-
const representative = testEnts[0];
|
|
937
|
-
if (!representative)
|
|
938
|
-
continue;
|
|
939
|
-
for (const targetKey of fileTargets) {
|
|
940
|
-
const target = entityMap.get(targetKey);
|
|
941
|
-
if (!target || target.is_test)
|
|
942
|
-
continue;
|
|
943
|
-
const edgeKey = `${representative.key}→${targetKey}`;
|
|
944
|
-
if (seen.has(edgeKey))
|
|
945
|
-
continue;
|
|
946
|
-
seen.add(edgeKey);
|
|
947
|
-
testEdges.push({
|
|
948
|
-
from_key: representative.key,
|
|
949
|
-
to_key: targetKey,
|
|
950
|
-
type: "tests",
|
|
951
|
-
});
|
|
952
|
-
filesWithCallEdges.add(testFile);
|
|
953
|
-
}
|
|
954
|
-
}
|
|
955
|
-
// Fallback: for test files with NO call-based "tests" edges, use file imports
|
|
956
|
-
// to associate them with the source entities they import from
|
|
957
|
-
for (const [testFile, testEnts] of testEntitiesByFile) {
|
|
958
|
-
if (filesWithCallEdges.has(testFile))
|
|
959
|
-
continue; // already has call-based edges
|
|
960
|
-
const importedFiles = fileImportsFrom.get(testFile);
|
|
961
|
-
if (!importedFiles)
|
|
962
|
-
continue;
|
|
963
|
-
// Pick a representative test entity (first one, e.g. a helper function or the describe block)
|
|
964
|
-
const representative = testEnts[0];
|
|
965
|
-
if (!representative)
|
|
966
|
-
continue;
|
|
967
|
-
for (const importedFile of importedFiles) {
|
|
968
|
-
const sourceEnts = sourceEntitiesByFile.get(importedFile);
|
|
969
|
-
if (!sourceEnts)
|
|
970
|
-
continue;
|
|
971
|
-
// Create "tests" edges to top-level entities in the imported source file
|
|
972
|
-
// (filter to classes/functions — skip internal types/interfaces for cleaner graph)
|
|
973
|
-
for (const srcEnt of sourceEnts) {
|
|
974
|
-
if (srcEnt.kind === "type" || srcEnt.kind === "interface")
|
|
975
|
-
continue;
|
|
976
|
-
const edgeKey = `${representative.key}→${srcEnt.key}`;
|
|
977
|
-
if (seen.has(edgeKey))
|
|
978
|
-
continue;
|
|
979
|
-
seen.add(edgeKey);
|
|
980
|
-
testEdges.push({
|
|
981
|
-
from_key: representative.key,
|
|
982
|
-
to_key: srcEnt.key,
|
|
983
|
-
type: "tests",
|
|
984
|
-
});
|
|
985
|
-
}
|
|
986
|
-
}
|
|
987
|
-
}
|
|
988
|
-
return testEdges;
|
|
989
|
-
}
|
|
990
|
-
// ── Phase 5: Derived Fields ──────────────────────────────────────
|
|
991
|
-
/** Compute fan_in, fan_out, and risk_level for all entities. */
|
|
992
|
-
function computeDerivedFields(entities, edges) {
|
|
993
|
-
const fanInMap = new Map();
|
|
994
|
-
const fanOutMap = new Map();
|
|
995
|
-
for (const edge of edges) {
|
|
996
|
-
fanOutMap.set(edge.from_key, (fanOutMap.get(edge.from_key) ?? 0) + 1);
|
|
997
|
-
fanInMap.set(edge.to_key, (fanInMap.get(edge.to_key) ?? 0) + 1);
|
|
998
|
-
}
|
|
999
|
-
for (const entity of entities) {
|
|
1000
|
-
entity.fan_in = fanInMap.get(entity.key) ?? 0;
|
|
1001
|
-
entity.fan_out = fanOutMap.get(entity.key) ?? 0;
|
|
1002
|
-
entity.risk_level = computeRiskLevel(entity.fan_in, entity.fan_out);
|
|
1003
|
-
}
|
|
1004
|
-
}
|
|
1005
|
-
/** Risk classification based on fan_in and fan_out. */
|
|
1006
|
-
function computeRiskLevel(fanIn, fanOut) {
|
|
1007
|
-
// High risk: many callers (chokepoint) or many callees (fragile hub)
|
|
1008
|
-
if (fanIn >= 10 || fanOut >= 15)
|
|
1009
|
-
return "high";
|
|
1010
|
-
if (fanIn >= 5 || fanOut >= 8)
|
|
1011
|
-
return "medium";
|
|
1012
|
-
return "normal";
|
|
1013
|
-
}
|
|
1014
|
-
// ── Phase 6: CozoDB Population ───────────────────────────────────
|
|
1015
|
-
/** Insert all entities and edges into CozoDB. */
|
|
1016
|
-
async function populateCozoDB(graphStore, entities, edges) {
|
|
1017
|
-
// R.1: Create file-level entities (file:<path> with kind="module")
|
|
1018
|
-
const filePaths = new Set();
|
|
1019
|
-
for (const e of entities) {
|
|
1020
|
-
if (e.file_path)
|
|
1021
|
-
filePaths.add(e.file_path);
|
|
1022
|
-
}
|
|
1023
|
-
for (const fp of filePaths) {
|
|
1024
|
-
try {
|
|
1025
|
-
await graphStore.db.run(`?[key, kind, name, file_path, start_line, end_line, signature, body, fan_in, fan_out, risk_level, is_test] <- [[$key, "module", $name, $fp, 0, 0, "", "", 0, 0, "normal", $is_test]]
|
|
1026
|
-
:put entities { key => kind, name, file_path, start_line, end_line, signature, body, fan_in, fan_out, risk_level, is_test }`, { key: `file:${fp}`, name: basename(fp), fp, is_test: isTestFile(fp) });
|
|
1027
|
-
}
|
|
1028
|
-
catch (err) {
|
|
1029
|
-
const msg = err instanceof Error ? err.message : JSON.stringify(err);
|
|
1030
|
-
process.stderr.write(`[unerr] ⚠ File entity insert failed for ${fp}: ${msg}\n`);
|
|
1031
|
-
}
|
|
1032
|
-
}
|
|
1033
|
-
// Batch insert code entities
|
|
1034
|
-
for (const entity of entities) {
|
|
1035
|
-
await insertEntity(graphStore, entity);
|
|
1036
|
-
}
|
|
1037
|
-
// R.2: Create contains edges (file → entity)
|
|
1038
|
-
for (const entity of entities) {
|
|
1039
|
-
if (!entity.file_path)
|
|
1040
|
-
continue;
|
|
1041
|
-
try {
|
|
1042
|
-
await graphStore.db.run(`?[from_key, to_key, type, sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode] <- [[$from, $to, "contains", -1, "", "", false, "", 0, false, false, "", ""]]
|
|
1043
|
-
:put edges { from_key, to_key, type => sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode }`, { from: `file:${entity.file_path}`, to: entity.key });
|
|
1044
|
-
}
|
|
1045
|
-
catch (err) {
|
|
1046
|
-
process.stderr.write(`[unerr] ⚠ Contains edge insert failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1047
|
-
}
|
|
1048
|
-
}
|
|
1049
|
-
// R.3b: Create class→method containment edges for entities with parent_class
|
|
1050
|
-
for (const entity of entities) {
|
|
1051
|
-
if (!entity.parent_class || !entity.file_path)
|
|
1052
|
-
continue;
|
|
1053
|
-
// Find the class entity key in the same file
|
|
1054
|
-
const classEntity = entities.find((e) => e.kind === "class" &&
|
|
1055
|
-
e.name === entity.parent_class &&
|
|
1056
|
-
e.file_path === entity.file_path);
|
|
1057
|
-
if (!classEntity)
|
|
1058
|
-
continue;
|
|
1059
|
-
try {
|
|
1060
|
-
await graphStore.db.run(`?[from_key, to_key, type, sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode] <- [[$from, $to, "contains", -1, "", "", false, "", 0, false, false, "", ""]]
|
|
1061
|
-
:put edges { from_key, to_key, type => sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode }`, { from: classEntity.key, to: entity.key });
|
|
1062
|
-
}
|
|
1063
|
-
catch (err) {
|
|
1064
|
-
process.stderr.write(`[unerr] ⚠ Class→method edge insert failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1065
|
-
}
|
|
1066
|
-
}
|
|
1067
|
-
// Batch insert code edges
|
|
1068
|
-
for (const edge of edges) {
|
|
1069
|
-
await insertEdge(graphStore, edge);
|
|
1070
|
-
}
|
|
1071
|
-
}
|
|
1072
|
-
async function insertEntity(graphStore, entity) {
|
|
1073
|
-
try {
|
|
1074
|
-
await graphStore.db.run(`?[key, kind, name, file_path, start_line, end_line, signature, body, fan_in, fan_out, risk_level, is_test] <- [[$key, $kind, $name, $fp, $sl, $el, $sig, $body, $fi, $fo, $rl, $is_test]]
|
|
1075
|
-
:put entities { key => kind, name, file_path, start_line, end_line, signature, body, fan_in, fan_out, risk_level, is_test }`, {
|
|
1076
|
-
key: entity.key,
|
|
1077
|
-
kind: entity.kind,
|
|
1078
|
-
name: entity.name,
|
|
1079
|
-
fp: entity.file_path,
|
|
1080
|
-
sl: entity.start_line ?? 0,
|
|
1081
|
-
el: entity.end_line ?? 0,
|
|
1082
|
-
sig: entity.signature ?? "",
|
|
1083
|
-
body: entity.body ?? "",
|
|
1084
|
-
fi: entity.fan_in ?? 0,
|
|
1085
|
-
fo: entity.fan_out ?? 0,
|
|
1086
|
-
rl: entity.risk_level ?? "normal",
|
|
1087
|
-
is_test: entity.is_test ?? false,
|
|
1088
|
-
});
|
|
1089
|
-
// File index
|
|
1090
|
-
await graphStore.db.run("?[file_path, entity_key] <- [[$fp, $key]] :put file_index { file_path, entity_key }", { fp: entity.file_path, key: entity.key });
|
|
1091
|
-
}
|
|
1092
|
-
catch (err) {
|
|
1093
|
-
process.stderr.write(`[unerr] ⚠ Entity insert failed for ${entity.key}: ${formatUnknownError(err)}\n`);
|
|
1094
|
-
}
|
|
1095
|
-
}
|
|
1096
|
-
async function insertEdge(graphStore, edge) {
|
|
1097
|
-
try {
|
|
1098
|
-
await graphStore.db.run(`?[from_key, to_key, type, sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode] <- [[$from, $to, $type, -1, "", "", false, "", 0, false, false, "", ""]]
|
|
1099
|
-
:put edges { from_key, to_key, type => sequence_order, condition, branch_kind, is_loop, loop_kind, nesting_depth, is_try_guarded, is_error_handler, mutation_target, mutation_mode }`, {
|
|
1100
|
-
from: edge.from_key,
|
|
1101
|
-
to: edge.to_key,
|
|
1102
|
-
type: edge.type,
|
|
1103
|
-
});
|
|
1104
|
-
}
|
|
1105
|
-
catch (err) {
|
|
1106
|
-
process.stderr.write(`[unerr] ⚠ Edge insert failed (${edge.from_key} → ${edge.to_key}): ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1107
|
-
}
|
|
1108
|
-
}
|
|
1109
|
-
// ── Phase 6.5: L1 Edge Materialization ──────────────────────────
|
|
1110
|
-
/**
|
|
1111
|
-
* Materialize L1 aggregate edges from L0 entity data.
|
|
1112
|
-
* Creates weighted file→file and class→class edges in CozoDB.
|
|
1113
|
-
*/
|
|
1114
|
-
async function materializeL1Edges(graphStore) {
|
|
1115
|
-
const db = graphStore.db;
|
|
1116
|
-
// 4B: Weighted file-to-file edges — aggregate L0 edges by file pair and type
|
|
1117
|
-
try {
|
|
1118
|
-
const fileEdgeResult = await db.run(`
|
|
1119
|
-
?[from_file, to_file, edge_type, count(from_key)] :=
|
|
1120
|
-
*edges{from_key, to_key, type: edge_type},
|
|
1121
|
-
edge_type != "contains",
|
|
1122
|
-
*entities{key: from_key, file_path: from_file},
|
|
1123
|
-
*entities{key: to_key, file_path: to_file},
|
|
1124
|
-
from_file != to_file
|
|
1125
|
-
`);
|
|
1126
|
-
if (fileEdgeResult.rows?.length) {
|
|
1127
|
-
for (const row of fileEdgeResult.rows) {
|
|
1128
|
-
await db.run(`?[from_file, to_file, edge_type, weight, updated_at] <- [[$ff, $tf, $et, $w, $ts]]
|
|
1129
|
-
:put file_edges { from_file, to_file, edge_type => weight, updated_at }`, {
|
|
1130
|
-
ff: row[0],
|
|
1131
|
-
tf: row[1],
|
|
1132
|
-
et: row[2],
|
|
1133
|
-
w: row[3],
|
|
1134
|
-
ts: Date.now(),
|
|
1135
|
-
});
|
|
1136
|
-
}
|
|
1137
|
-
log.info(`L1 file edges: ${fileEdgeResult.rows.length} materialized`);
|
|
1138
|
-
}
|
|
1139
|
-
}
|
|
1140
|
-
catch (err) {
|
|
1141
|
-
log.info(`L1 file edge materialization failed: ${err instanceof Error ? err.message : err}`);
|
|
1142
|
-
}
|
|
1143
|
-
// 4C: Weighted class-to-class edges — aggregate L0 method edges by owning class
|
|
1144
|
-
try {
|
|
1145
|
-
const classEdgeResult = await db.run(`
|
|
1146
|
-
?[from_class, to_class, edge_type, count(from_key)] :=
|
|
1147
|
-
*edges{from_key, to_key, type: edge_type},
|
|
1148
|
-
edge_type != "contains",
|
|
1149
|
-
*edges{from_key: fc, to_key: from_key, type: "contains"},
|
|
1150
|
-
*entities{key: fc, kind: "class"},
|
|
1151
|
-
*edges{from_key: tc, to_key: to_key, type: "contains"},
|
|
1152
|
-
*entities{key: tc, kind: "class"},
|
|
1153
|
-
from_class = fc, to_class = tc,
|
|
1154
|
-
from_class != to_class
|
|
1155
|
-
`);
|
|
1156
|
-
if (classEdgeResult.rows?.length) {
|
|
1157
|
-
for (const row of classEdgeResult.rows) {
|
|
1158
|
-
await db.run(`?[from_class, to_class, edge_type, weight, updated_at] <- [[$fc, $tc, $et, $w, $ts]]
|
|
1159
|
-
:put class_edges { from_class, to_class, edge_type => weight, updated_at }`, {
|
|
1160
|
-
fc: row[0],
|
|
1161
|
-
tc: row[1],
|
|
1162
|
-
et: row[2],
|
|
1163
|
-
w: row[3],
|
|
1164
|
-
ts: Date.now(),
|
|
1165
|
-
});
|
|
1166
|
-
}
|
|
1167
|
-
log.info(`L1 class edges: ${classEdgeResult.rows.length} materialized`);
|
|
1168
|
-
}
|
|
1169
|
-
}
|
|
1170
|
-
catch (err) {
|
|
1171
|
-
log.info(`L1 class edge materialization failed: ${err instanceof Error ? err.message : err}`);
|
|
1172
|
-
}
|
|
1173
|
-
}
|
|
1174
|
-
// ── Phase 7: Community Detection ─────────────────────────────────
|
|
1175
|
-
/** Run cascaded multi-level community detection. Returns macro-community count. */
|
|
1176
|
-
export async function runCommunityDetection(graphStore) {
|
|
1177
|
-
const db = graphStore.db;
|
|
1178
|
-
// Extract entities (key, kind, file_path)
|
|
1179
|
-
let entityResult;
|
|
1180
|
-
try {
|
|
1181
|
-
entityResult = await db.run("?[key, kind, file_path] := *entities{key, kind, file_path}");
|
|
1182
|
-
}
|
|
1183
|
-
catch (err) {
|
|
1184
|
-
process.stderr.write(`[unerr] ⚠ Entity query failed during community detection: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1185
|
-
return 0;
|
|
1186
|
-
}
|
|
1187
|
-
if (!entityResult?.rows?.length)
|
|
1188
|
-
return 0;
|
|
1189
|
-
const entities = entityResult.rows.map((row) => ({
|
|
1190
|
-
key: row[0],
|
|
1191
|
-
kind: row[1],
|
|
1192
|
-
file_path: row[2],
|
|
1193
|
-
}));
|
|
1194
|
-
// Extract entity edges
|
|
1195
|
-
let edgeResult;
|
|
1196
|
-
try {
|
|
1197
|
-
edgeResult = await db.run("?[from_key, to_key, type] := *edges{from_key, to_key, type}");
|
|
1198
|
-
}
|
|
1199
|
-
catch (err) {
|
|
1200
|
-
process.stderr.write(`[unerr] ⚠ Edge query failed during community detection: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1201
|
-
return 0;
|
|
1202
|
-
}
|
|
1203
|
-
const entityEdges = (edgeResult?.rows ?? []).map((row) => ({
|
|
1204
|
-
from_key: row[0],
|
|
1205
|
-
to_key: row[1],
|
|
1206
|
-
type: row[2],
|
|
1207
|
-
}));
|
|
1208
|
-
// Extract materialized file edges
|
|
1209
|
-
let fileEdgeResult;
|
|
1210
|
-
try {
|
|
1211
|
-
fileEdgeResult = await db.run("?[from_file, to_file, edge_type, weight] := *file_edges{from_file, to_file, edge_type, weight}");
|
|
1212
|
-
}
|
|
1213
|
-
catch {
|
|
1214
|
-
fileEdgeResult = { rows: [] };
|
|
1215
|
-
}
|
|
1216
|
-
const fileEdges = (fileEdgeResult?.rows ?? []).map((row) => ({
|
|
1217
|
-
from_file: row[0],
|
|
1218
|
-
to_file: row[1],
|
|
1219
|
-
edge_type: row[2],
|
|
1220
|
-
weight: row[3],
|
|
1221
|
-
}));
|
|
1222
|
-
// Run cascaded community detection
|
|
1223
|
-
const result = detectCascadedCommunities(fileEdges, entities, entityEdges);
|
|
1224
|
-
// Write entity community assignments (hierarchical IDs)
|
|
1225
|
-
for (const [key, communityId] of result.entityAssignments) {
|
|
1226
|
-
try {
|
|
1227
|
-
await db.run("?[key, community] <- [[$key, $cid]] :update entities { key => community }", { key, cid: communityId });
|
|
1228
|
-
}
|
|
1229
|
-
catch (err) {
|
|
1230
|
-
process.stderr.write(`[unerr] ⚠ Community assignment update failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1231
|
-
}
|
|
1232
|
-
}
|
|
1233
|
-
// Write macro-community metadata (repurposed `communities` relation for file-level)
|
|
1234
|
-
for (const c of result.macroCommunities) {
|
|
1235
|
-
try {
|
|
1236
|
-
await db.run("?[id, label, size, cohesion] <- [[$id, $label, $size, $cohesion]] :put communities { id => label, size, cohesion }", { id: c.id, label: c.label, size: c.size, cohesion: c.cohesion });
|
|
1237
|
-
}
|
|
1238
|
-
catch (err) {
|
|
1239
|
-
process.stderr.write(`[unerr] ⚠ Community metadata write failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1240
|
-
}
|
|
1241
|
-
}
|
|
1242
|
-
// Write file community assignments
|
|
1243
|
-
for (const fc of result.fileCommunities) {
|
|
1244
|
-
try {
|
|
1245
|
-
await db.run(`?[file_path, community, label, cohesion, updated_at] <- [[$fp, $cid, $label, $cohesion, $ts]]
|
|
1246
|
-
:put file_communities { file_path => community, label, cohesion, updated_at }`, {
|
|
1247
|
-
fp: fc.file_path,
|
|
1248
|
-
cid: fc.community,
|
|
1249
|
-
label: fc.label,
|
|
1250
|
-
cohesion: fc.cohesion,
|
|
1251
|
-
ts: Date.now(),
|
|
1252
|
-
});
|
|
1253
|
-
}
|
|
1254
|
-
catch (err) {
|
|
1255
|
-
process.stderr.write(`[unerr] ⚠ File community write failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1256
|
-
}
|
|
1257
|
-
}
|
|
1258
|
-
log.info(`Community detection: ${result.macroCommunities.length} macro-communities, ${result.entityAssignments.size} entity assignments`);
|
|
1259
|
-
return result.macroCommunities.length;
|
|
1260
|
-
}
|
|
1261
|
-
/**
|
|
1262
|
-
* Phase 8: Convention detection and rule generation (Sprint L6).
|
|
1263
|
-
* Analyzes indexed entities to detect naming/structure/import conventions,
|
|
1264
|
-
* generates evaluable rules, and loads both into CozoDB.
|
|
1265
|
-
*/
|
|
1266
|
-
export async function runConventionDetection(graphStore, repoId) {
|
|
1267
|
-
try {
|
|
1268
|
-
const detection = await detectLocalConventions(graphStore.db);
|
|
1269
|
-
await graphStore.loadPatterns(detection.patterns);
|
|
1270
|
-
const generation = generateLocalRules(detection.conventions, repoId);
|
|
1271
|
-
await graphStore.loadRules(generation.rules);
|
|
1272
|
-
log.info(`Conventions: ${detection.patterns.length} patterns detected, ${generation.rules.length} rules generated`);
|
|
1273
|
-
if (detection.stats.naming > 0) {
|
|
1274
|
-
log.info(` Naming: ${detection.stats.naming}`);
|
|
1275
|
-
}
|
|
1276
|
-
if (detection.stats.structure > 0) {
|
|
1277
|
-
log.info(` Structure: ${detection.stats.structure}`);
|
|
1278
|
-
}
|
|
1279
|
-
if (detection.stats.importDirection > 0) {
|
|
1280
|
-
log.info(` Import direction: ${detection.stats.importDirection}`);
|
|
1281
|
-
}
|
|
1282
|
-
return {
|
|
1283
|
-
patternCount: detection.patterns.length,
|
|
1284
|
-
ruleCount: generation.rules.length,
|
|
1285
|
-
};
|
|
1286
|
-
}
|
|
1287
|
-
catch (err) {
|
|
1288
|
-
log.info(`Convention detection skipped: ${err instanceof Error ? err.message : String(err)}`);
|
|
1289
|
-
return { patternCount: 0, ruleCount: 0 };
|
|
1290
|
-
}
|
|
1291
|
-
}
|
|
1292
|
-
// ── Helpers ──────────────────────────────────────────────────────
|
|
1293
|
-
/** Remove all entities and edges for a given file path. */
|
|
1294
|
-
async function removeFileEntities(graphStore, relPath) {
|
|
1295
|
-
const db = graphStore.db;
|
|
1296
|
-
// Get entity keys for this file
|
|
1297
|
-
const result = await db.run("?[entity_key] := *file_index[file_path, entity_key], file_path = $fp", { fp: relPath });
|
|
1298
|
-
const keys = result.rows.map((r) => r[0]);
|
|
1299
|
-
// Remove entities
|
|
1300
|
-
for (const key of keys) {
|
|
1301
|
-
try {
|
|
1302
|
-
await db.run("?[key] <- [[$key]] :rm entities { key }", { key });
|
|
1303
|
-
}
|
|
1304
|
-
catch {
|
|
1305
|
-
/* entity may not exist */
|
|
1306
|
-
}
|
|
1307
|
-
}
|
|
1308
|
-
// Remove file index entries
|
|
1309
|
-
try {
|
|
1310
|
-
await db.run("?[file_path, entity_key] := *file_index[file_path, entity_key], file_path = $fp :rm file_index { file_path, entity_key }", { fp: relPath });
|
|
1311
|
-
}
|
|
1312
|
-
catch {
|
|
1313
|
-
/* may not exist */
|
|
1314
|
-
}
|
|
1315
|
-
// Remove edges involving these entities
|
|
1316
|
-
for (const key of keys) {
|
|
1317
|
-
try {
|
|
1318
|
-
await db.run("?[from_key, to_key, type] := *edges[from_key, to_key, type, _, _, _, _, _, _, _, _, _, _], from_key = $key :rm edges { from_key, to_key, type }", { key });
|
|
1319
|
-
}
|
|
1320
|
-
catch {
|
|
1321
|
-
/* ignore */
|
|
1322
|
-
}
|
|
1323
|
-
try {
|
|
1324
|
-
await db.run("?[from_key, to_key, type] := *edges[from_key, to_key, type, _, _, _, _, _, _, _, _, _, _], to_key = $key :rm edges { from_key, to_key, type }", { key });
|
|
1325
|
-
}
|
|
1326
|
-
catch {
|
|
1327
|
-
/* ignore */
|
|
1328
|
-
}
|
|
1329
|
-
}
|
|
1330
|
-
// Remove search tokens
|
|
1331
|
-
for (const key of keys) {
|
|
1332
|
-
try {
|
|
1333
|
-
await db.run("?[token, entity_key] := *search_tokens[token, entity_key], entity_key = $key :rm search_tokens { token, entity_key }", { key });
|
|
1334
|
-
}
|
|
1335
|
-
catch {
|
|
1336
|
-
/* ignore */
|
|
1337
|
-
}
|
|
1338
|
-
}
|
|
1339
|
-
}
|
|
1340
|
-
/**
|
|
1341
|
-
* Remove entities (and their edges, file_index, search_tokens) that are NOT in the
|
|
1342
|
-
* current index run. This prunes stale data from deleted files/entities without
|
|
1343
|
-
* clearing the graph upfront — so queries remain valid during the reindex window.
|
|
1344
|
-
*/
|
|
1345
|
-
async function removeOrphanedEntities(graphStore, liveKeys) {
|
|
1346
|
-
const db = graphStore.db;
|
|
1347
|
-
// Get all existing entity keys from the graph
|
|
1348
|
-
let existingKeys;
|
|
1349
|
-
try {
|
|
1350
|
-
const result = await db.run("?[key] := *entities{key}");
|
|
1351
|
-
existingKeys = result.rows.map((r) => r[0]);
|
|
1352
|
-
}
|
|
1353
|
-
catch {
|
|
1354
|
-
return; // entities relation may not exist on first boot
|
|
1355
|
-
}
|
|
1356
|
-
const orphanKeys = existingKeys.filter((k) => !liveKeys.has(k));
|
|
1357
|
-
if (orphanKeys.length === 0)
|
|
1358
|
-
return;
|
|
1359
|
-
log.info(`Removing ${orphanKeys.length} orphaned entities from graph`);
|
|
1360
|
-
for (const key of orphanKeys) {
|
|
1361
|
-
try {
|
|
1362
|
-
// Remove edges referencing this entity
|
|
1363
|
-
await db.run("?[from_key, to_key, type] := *edges{from_key, to_key, type}, from_key = $key :rm edges {from_key, to_key, type}", { key });
|
|
1364
|
-
await db.run("?[from_key, to_key, type] := *edges{from_key, to_key, type}, to_key = $key :rm edges {from_key, to_key, type}", { key });
|
|
1365
|
-
// Remove search tokens
|
|
1366
|
-
await db.run("?[token, entity_key] := *search_tokens[token, entity_key], entity_key = $key :rm search_tokens {token, entity_key}", { key });
|
|
1367
|
-
// Remove file index entries
|
|
1368
|
-
await db.run("?[file_path, entity_key] := *file_index[file_path, entity_key], entity_key = $key :rm file_index {file_path, entity_key}", { key });
|
|
1369
|
-
// Remove the entity itself
|
|
1370
|
-
await db.run("?[key] <- [[$key]] :rm entities {key}", { key });
|
|
1371
|
-
}
|
|
1372
|
-
catch {
|
|
1373
|
-
// Ignore per-entity removal failures
|
|
1374
|
-
}
|
|
1375
|
-
}
|
|
1376
|
-
}
|
|
1377
|
-
/** Resolve a local entity name to its key within the same file. */
|
|
1378
|
-
function resolveEntityName(name, filePath, repoId, entities) {
|
|
1379
|
-
if (name === "__file__")
|
|
1380
|
-
return null;
|
|
1381
|
-
const match = entities.find((e) => e.name === name);
|
|
1382
|
-
if (match) {
|
|
1383
|
-
return entityKey(repoId, filePath, match.kind, match.name, match.signature);
|
|
1384
|
-
}
|
|
1385
|
-
return null;
|
|
1386
|
-
}
|
|
1387
|
-
/** Resolve an entity name by searching the global CozoDB graph. */
|
|
1388
|
-
async function resolveEntityNameGlobal(name, graphStore) {
|
|
1389
|
-
try {
|
|
1390
|
-
const result = await graphStore.db.run("?[key] := *entities{key, name}, name = $name :limit 1", { name });
|
|
1391
|
-
if (result.rows.length > 0) {
|
|
1392
|
-
return result.rows[0]?.[0];
|
|
1393
|
-
}
|
|
1394
|
-
}
|
|
1395
|
-
catch {
|
|
1396
|
-
/* ignore */
|
|
1397
|
-
}
|
|
1398
|
-
return null;
|
|
1399
|
-
}
|
|
1400
|
-
// ── Document File Discovery & Indexing ──────────────────────────
|
|
1401
|
-
/** Format-specific regex patterns for extracting searchable tokens from document files. */
|
|
1402
|
-
const DOC_TOKEN_PATTERNS = {
|
|
1403
|
-
".md": /^#{1,6}\s+(.+)$/gm,
|
|
1404
|
-
".mdx": /^#{1,6}\s+(.+)$/gm,
|
|
1405
|
-
".tf": /^(?:resource|module|variable|data|output)\s+"([^"]+)"(?:\s+"([^"]+)")?/gm,
|
|
1406
|
-
".hcl": /^(?:resource|module|variable|data|output)\s+"([^"]+)"(?:\s+"([^"]+)")?/gm,
|
|
1407
|
-
".sql": /CREATE\s+(?:TABLE|VIEW|FUNCTION|PROCEDURE|INDEX)\s+(?:IF\s+NOT\s+EXISTS\s+)?["'`]?(\w+)/gim,
|
|
1408
|
-
".graphql": /^(?:type|query|mutation|subscription|input|enum|interface)\s+(\w+)/gm,
|
|
1409
|
-
".gql": /^(?:type|query|mutation|subscription|input|enum|interface)\s+(\w+)/gm,
|
|
1410
|
-
".sh": /^(?:function\s+)?(\w+)\s*\(\)/gm,
|
|
1411
|
-
".bash": /^(?:function\s+)?(\w+)\s*\(\)/gm,
|
|
1412
|
-
".zsh": /^(?:function\s+)?(\w+)\s*\(\)/gm,
|
|
1413
|
-
".proto": /^(?:message|service|enum|rpc)\s+(\w+)/gm,
|
|
1414
|
-
".prisma": /^(?:model|enum|type|datasource|generator)\s+(\w+)/gm,
|
|
1415
|
-
".smithy": /^(?:service|resource|operation|structure|union|enum)\s+(\w+)/gm,
|
|
1416
|
-
".cmake": /^(?:project|add_library|add_executable|find_package)\s*\(\s*(\w+)/gm,
|
|
1417
|
-
".bazel": /^(?:cc_library|cc_binary|java_library|py_library|go_library)\s*\(\s*name\s*=\s*"([^"]+)"/gm,
|
|
1418
|
-
".bzl": /^def\s+(\w+)\s*\(/gm,
|
|
1419
|
-
".tex": /\\(?:section|subsection|chapter|title)\{([^}]+)\}/gm,
|
|
1420
|
-
".latex": /\\(?:section|subsection|chapter|title)\{([^}]+)\}/gm,
|
|
1421
|
-
".html": /<(?:h[1-6]|title)[^>]*>([^<]+)</gim,
|
|
1422
|
-
".vue": /<(?:h[1-6]|title)[^>]*>([^<]+)</gim,
|
|
1423
|
-
".svelte": /<(?:h[1-6]|title)[^>]*>([^<]+)</gim,
|
|
1424
|
-
".thrift": /^(?:service|struct|enum|exception|typedef)\s+(\w+)/gm,
|
|
1425
|
-
".avsc": /"name"\s*:\s*"([^"]+)"/gm,
|
|
1426
|
-
};
|
|
1427
|
-
/**
|
|
1428
|
-
* Walk directories for document files. Allows CI/CD dirs (.github, .circleci)
|
|
1429
|
-
* through the dot-directory filter. Skips files already covered by INDEXABLE_EXTENSIONS.
|
|
1430
|
-
*/
|
|
1431
|
-
function walkDirForDocs(dir, files, projectRoot) {
|
|
1432
|
-
let entries;
|
|
1433
|
-
try {
|
|
1434
|
-
entries = readdirSync(dir);
|
|
1435
|
-
}
|
|
1436
|
-
catch {
|
|
1437
|
-
return;
|
|
1438
|
-
}
|
|
1439
|
-
for (const entry of entries) {
|
|
1440
|
-
const absPath = join(dir, entry);
|
|
1441
|
-
let stats;
|
|
1442
|
-
try {
|
|
1443
|
-
stats = statSync(absPath);
|
|
1444
|
-
}
|
|
1445
|
-
catch {
|
|
1446
|
-
continue;
|
|
1447
|
-
}
|
|
1448
|
-
if (stats.isDirectory()) {
|
|
1449
|
-
if (EXCLUDED_DIRS.has(entry))
|
|
1450
|
-
continue;
|
|
1451
|
-
// Skip dot-directories except whitelisted CI/CD dirs
|
|
1452
|
-
if (entry.startsWith(".") && !CI_CD_DIRS.has(entry))
|
|
1453
|
-
continue;
|
|
1454
|
-
// Path-aware exclusion (e.g. `.claude/worktrees/`) — even when a parent
|
|
1455
|
-
// dot-dir is whitelisted, sandboxed worktree paths must stay out.
|
|
1456
|
-
if (isExcludedPath(relative(projectRoot, absPath)))
|
|
1457
|
-
continue;
|
|
1458
|
-
walkDirForDocs(absPath, files, projectRoot);
|
|
1459
|
-
continue;
|
|
1460
|
-
}
|
|
1461
|
-
if (!stats.isFile())
|
|
1462
|
-
continue;
|
|
1463
|
-
if (stats.size > MAX_FILE_SIZE)
|
|
1464
|
-
continue;
|
|
1465
|
-
const ext = extname(entry).toLowerCase();
|
|
1466
|
-
const name = basename(entry);
|
|
1467
|
-
// Skip files already handled by code indexing
|
|
1468
|
-
if (INDEXABLE_EXTENSIONS.has(ext))
|
|
1469
|
-
continue;
|
|
1470
|
-
// Match by extension or by well-known filename
|
|
1471
|
-
if (DOCUMENT_EXTENSIONS.has(ext) || DOCUMENT_NAMES.has(name)) {
|
|
1472
|
-
files.push(absPath);
|
|
1473
|
-
}
|
|
1474
|
-
}
|
|
1475
|
-
}
|
|
1476
|
-
/** Discover all document/config files in the project. */
|
|
1477
|
-
function discoverDocumentFiles(projectRoot) {
|
|
1478
|
-
const files = [];
|
|
1479
|
-
walkDirForDocs(projectRoot, files, projectRoot);
|
|
1480
|
-
return files;
|
|
1481
|
-
}
|
|
1482
|
-
/**
|
|
1483
|
-
* Extract searchable tokens from a document file.
|
|
1484
|
-
* Tokenizes filename, directory path segments, and format-specific content (capped at 8KB).
|
|
1485
|
-
*/
|
|
1486
|
-
function extractDocTokens(absPath, relPath) {
|
|
1487
|
-
const tokens = new Set(tokenize(basename(relPath)));
|
|
1488
|
-
// Add directory path tokens (e.g. "docs", "architecture", "config")
|
|
1489
|
-
for (const segment of relPath.split("/").slice(0, -1)) {
|
|
1490
|
-
if (segment && !EXCLUDED_DIRS.has(segment)) {
|
|
1491
|
-
for (const t of tokenize(segment))
|
|
1492
|
-
tokens.add(t);
|
|
1493
|
-
}
|
|
1494
|
-
}
|
|
1495
|
-
const ext = extname(absPath).toLowerCase();
|
|
1496
|
-
try {
|
|
1497
|
-
const content = readFileSync(absPath, "utf-8").slice(0, DOC_READ_LIMIT);
|
|
1498
|
-
const pattern = DOC_TOKEN_PATTERNS[ext];
|
|
1499
|
-
if (pattern) {
|
|
1500
|
-
pattern.lastIndex = 0;
|
|
1501
|
-
let match;
|
|
1502
|
-
match = pattern.exec(content);
|
|
1503
|
-
while (match !== null) {
|
|
1504
|
-
if (match[1])
|
|
1505
|
-
for (const t of tokenize(match[1]))
|
|
1506
|
-
tokens.add(t);
|
|
1507
|
-
if (match[2])
|
|
1508
|
-
for (const t of tokenize(match[2]))
|
|
1509
|
-
tokens.add(t);
|
|
1510
|
-
match = pattern.exec(content);
|
|
1511
|
-
}
|
|
1512
|
-
}
|
|
1513
|
-
// For YAML: extract top-level keys (non-indented key: lines)
|
|
1514
|
-
if (ext === ".yaml" || ext === ".yml") {
|
|
1515
|
-
const yamlKeyRegex = /^([a-zA-Z_][\w-]*)\s*:/gm;
|
|
1516
|
-
let match;
|
|
1517
|
-
match = yamlKeyRegex.exec(content);
|
|
1518
|
-
while (match !== null) {
|
|
1519
|
-
if (match[1])
|
|
1520
|
-
for (const t of tokenize(match[1]))
|
|
1521
|
-
tokens.add(t);
|
|
1522
|
-
match = yamlKeyRegex.exec(content);
|
|
1523
|
-
}
|
|
1524
|
-
}
|
|
1525
|
-
}
|
|
1526
|
-
catch {
|
|
1527
|
-
/* unreadable — filename/path tokens only */
|
|
1528
|
-
}
|
|
1529
|
-
return [...tokens];
|
|
1530
|
-
}
|
|
1531
|
-
/**
|
|
1532
|
-
* Index document/config files for search discoverability.
|
|
1533
|
-
* Creates lightweight entities (kind: "document") with search tokens.
|
|
1534
|
-
* Returns the list of doc entity keys for orphan cleanup.
|
|
1535
|
-
*/
|
|
1536
|
-
async function indexDocumentFiles(projectRoot, graphStore) {
|
|
1537
|
-
const docFiles = discoverDocumentFiles(projectRoot);
|
|
1538
|
-
if (docFiles.length === 0)
|
|
1539
|
-
return [];
|
|
1540
|
-
const docKeys = [];
|
|
1541
|
-
for (const absPath of docFiles) {
|
|
1542
|
-
const relPath = relative(projectRoot, absPath);
|
|
1543
|
-
const name = basename(relPath);
|
|
1544
|
-
const key = `doc:${relPath}`;
|
|
1545
|
-
docKeys.push(key);
|
|
1546
|
-
// Insert entity (kind: "document")
|
|
1547
|
-
try {
|
|
1548
|
-
await graphStore.db.run('?[key, kind, name, file_path, fan_in, fan_out, risk_level] <- [[$key, "document", $name, $fp, 0, 0, "low"]] :put entities { key => kind, name, file_path, fan_in, fan_out, risk_level }', { key, name, fp: relPath });
|
|
1549
|
-
}
|
|
1550
|
-
catch {
|
|
1551
|
-
continue;
|
|
1552
|
-
}
|
|
1553
|
-
// Extract and insert search tokens
|
|
1554
|
-
const docTokens = extractDocTokens(absPath, relPath);
|
|
1555
|
-
for (const token of docTokens) {
|
|
1556
|
-
try {
|
|
1557
|
-
await graphStore.db.run("?[token, entity_key] <- [[$token, $key]] :put search_tokens { token, entity_key }", { token, key });
|
|
1558
|
-
}
|
|
1559
|
-
catch {
|
|
1560
|
-
/* duplicate — safe */
|
|
1561
|
-
}
|
|
1562
|
-
}
|
|
1563
|
-
// file_index entry
|
|
1564
|
-
try {
|
|
1565
|
-
await graphStore.db.run("?[file_path, entity_key] <- [[$fp, $key]] :put file_index { file_path, entity_key }", { fp: relPath, key });
|
|
1566
|
-
}
|
|
1567
|
-
catch {
|
|
1568
|
-
/* safe */
|
|
1569
|
-
}
|
|
1570
|
-
}
|
|
1571
|
-
if (docKeys.length > 0) {
|
|
1572
|
-
log.info(`Documents: ${docKeys.length} doc/config files indexed for search`);
|
|
1573
|
-
}
|
|
1574
|
-
return docKeys;
|
|
1575
|
-
}
|