@unerr-ai/unerr 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -45
- package/dist/cli.js +37443 -36022
- package/package.json +2 -1
- package/dist/behaviors/agent-llm-bridge.js +0 -166
- package/dist/behaviors/architecture-guard.js +0 -256
- package/dist/behaviors/auto-doc.js +0 -247
- package/dist/behaviors/cascade-guard.js +0 -289
- package/dist/behaviors/change-narrative.js +0 -270
- package/dist/behaviors/convention-drift.js +0 -290
- package/dist/behaviors/framework.js +0 -235
- package/dist/behaviors/guard-formatter.js +0 -44
- package/dist/behaviors/incomplete-work.js +0 -270
- package/dist/behaviors/loop-breaker.js +0 -300
- package/dist/behaviors/session-continuity.js +0 -208
- package/dist/commands/branches.js +0 -97
- package/dist/commands/check-commit.js +0 -225
- package/dist/commands/compress-output.js +0 -64
- package/dist/commands/config-verify.js +0 -243
- package/dist/commands/daemon.js +0 -905
- package/dist/commands/dashboard.js +0 -52
- package/dist/commands/debug.js +0 -200
- package/dist/commands/enrich.js +0 -184
- package/dist/commands/exec.js +0 -233
- package/dist/commands/gain.js +0 -156
- package/dist/commands/hook.js +0 -88
- package/dist/commands/index.js +0 -88
- package/dist/commands/init.js +0 -74
- package/dist/commands/install.js +0 -505
- package/dist/commands/learn.js +0 -116
- package/dist/commands/manifest.js +0 -193
- package/dist/commands/rewind.js +0 -103
- package/dist/commands/serve.js +0 -19
- package/dist/commands/setup-wizard.js +0 -414
- package/dist/commands/skills.js +0 -64
- package/dist/commands/stats.js +0 -20
- package/dist/commands/status.js +0 -654
- package/dist/commands/timeline.js +0 -139
- package/dist/commands/uninstall.js +0 -230
- package/dist/components/App.js +0 -109
- package/dist/components/Banner.js +0 -12
- package/dist/components/ConfirmPrompt.js +0 -25
- package/dist/components/DriftSummary.js +0 -23
- package/dist/components/GradeBadge.js +0 -15
- package/dist/components/HealthCard.js +0 -18
- package/dist/components/InkSpinner.js +0 -22
- package/dist/components/InputBox.js +0 -17
- package/dist/components/KeyValue.js +0 -13
- package/dist/components/MessageList.js +0 -14
- package/dist/components/ProgressBar.js +0 -26
- package/dist/components/Section.js +0 -16
- package/dist/components/SessionSummaryCard.js +0 -73
- package/dist/components/StartupDisplay.js +0 -24
- package/dist/components/StatusDashboard.js +0 -57
- package/dist/components/StatusLine.js +0 -8
- package/dist/components/StepLine.js +0 -22
- package/dist/components/Theme.js +0 -20
- package/dist/components/ToolProgress.js +0 -8
- package/dist/components/ViolationList.js +0 -21
- package/dist/components/render.js +0 -13
- package/dist/config/agent-registry.js +0 -237
- package/dist/config/claude-settings-hooks.js +0 -304
- package/dist/config/hook-installer.js +0 -65
- package/dist/config/instruction-writer.js +0 -388
- package/dist/config/mcp-config-writer.js +0 -266
- package/dist/config/settings.js +0 -174
- package/dist/config/tool-detector.js +0 -42
- package/dist/config/value-surfacing.js +0 -119
- package/dist/core/context-assembly.js +0 -108
- package/dist/core/conversation.js +0 -33
- package/dist/core/local-chat-provider.js +0 -475
- package/dist/core/provider-factory.js +0 -55
- package/dist/core/providers.js +0 -90
- package/dist/core/query-engine.js +0 -174
- package/dist/daemon/api.js +0 -312
- package/dist/daemon/autostart.js +0 -119
- package/dist/daemon/bootstrap.js +0 -39
- package/dist/daemon/client.js +0 -164
- package/dist/daemon/detect-ci.js +0 -81
- package/dist/daemon/platform-linux.js +0 -146
- package/dist/daemon/platform-macos.js +0 -134
- package/dist/daemon/platform-windows.js +0 -116
- package/dist/daemon/process-manager.js +0 -299
- package/dist/daemon/protocol.js +0 -23
- package/dist/daemon/registry.js +0 -270
- package/dist/daemon/settings-schema.js +0 -72
- package/dist/daemon/system-health.js +0 -134
- package/dist/daemon/version-checker.js +0 -262
- package/dist/daemon/warm-start.js +0 -223
- package/dist/entrypoints/cli.js +0 -1043
- package/dist/entrypoints/daemon.js +0 -380
- package/dist/entrypoints/repl.js +0 -147
- package/dist/hooks/adapters/claude-code.js +0 -90
- package/dist/hooks/adapters/cline.js +0 -100
- package/dist/hooks/adapters/cursor.js +0 -98
- package/dist/hooks/hook-dedup.js +0 -79
- package/dist/hooks/hook-runner.js +0 -113
- package/dist/hooks/navigation-hooks.js +0 -175
- package/dist/hooks/prompt-hooks.js +0 -63
- package/dist/hooks/shell-hooks.js +0 -47
- package/dist/ignore.js +0 -111
- package/dist/intelligence/approach-suggester.js +0 -61
- package/dist/intelligence/ast-extractor.js +0 -2615
- package/dist/intelligence/ast-worker.js +0 -34
- package/dist/intelligence/background-indexer.js +0 -121
- package/dist/intelligence/blast-radius.js +0 -200
- package/dist/intelligence/community-detection.js +0 -691
- package/dist/intelligence/community-detector.js +0 -184
- package/dist/intelligence/computation-scheduler.js +0 -75
- package/dist/intelligence/confidence-propagation.js +0 -47
- package/dist/intelligence/convention-detector.js +0 -242
- package/dist/intelligence/convention-learner.js +0 -205
- package/dist/intelligence/convention-matcher.js +0 -205
- package/dist/intelligence/cozo-schema.js +0 -376
- package/dist/intelligence/decision-point-detector.js +0 -90
- package/dist/intelligence/deep-dive-tools.js +0 -586
- package/dist/intelligence/durability-scorer.js +0 -84
- package/dist/intelligence/exploration-cost.js +0 -204
- package/dist/intelligence/exploration-pattern-tracker.js +0 -61
- package/dist/intelligence/fact-generator.js +0 -322
- package/dist/intelligence/facts-schema.js +0 -90
- package/dist/intelligence/file-intelligence.js +0 -59
- package/dist/intelligence/graph-holder.js +0 -220
- package/dist/intelligence/graph-temporal-joiner.js +0 -238
- package/dist/intelligence/health-grade.js +0 -423
- package/dist/intelligence/health-grader.js +0 -200
- package/dist/intelligence/health-map-data.js +0 -259
- package/dist/intelligence/import-symbols.js +0 -136
- package/dist/intelligence/incremental-indexer.js +0 -658
- package/dist/intelligence/indexer/centrality.js +0 -62
- package/dist/intelligence/indexer/cfg-context.js +0 -95
- package/dist/intelligence/indexer/confidence.js +0 -34
- package/dist/intelligence/indexer/cross-file-resolver.js +0 -104
- package/dist/intelligence/indexer/edge-repair.js +0 -89
- package/dist/intelligence/indexer/entity-key.js +0 -17
- package/dist/intelligence/indexer/export-map.js +0 -132
- package/dist/intelligence/indexer/git-cochange.js +0 -128
- package/dist/intelligence/indexer/graph-patch.js +0 -147
- package/dist/intelligence/indexer/incremental.js +0 -78
- package/dist/intelligence/indexer/ingest.js +0 -160
- package/dist/intelligence/indexer/language-detect.js +0 -226
- package/dist/intelligence/indexer/metadata.js +0 -63
- package/dist/intelligence/indexer/mutation-tracker.js +0 -79
- package/dist/intelligence/indexer/orchestrator.js +0 -155
- package/dist/intelligence/indexer/plugin-interface.js +0 -31
- package/dist/intelligence/indexer/plugins/csharp.js +0 -440
- package/dist/intelligence/indexer/plugins/go.js +0 -335
- package/dist/intelligence/indexer/plugins/java.js +0 -370
- package/dist/intelligence/indexer/plugins/python.js +0 -358
- package/dist/intelligence/indexer/plugins/regex-fallback.js +0 -82
- package/dist/intelligence/indexer/plugins/ruby.js +0 -290
- package/dist/intelligence/indexer/plugins/rust.js +0 -484
- package/dist/intelligence/indexer/plugins/tier2-generic.js +0 -310
- package/dist/intelligence/indexer/plugins/typescript.js +0 -456
- package/dist/intelligence/indexer/resource-monitor.js +0 -93
- package/dist/intelligence/indexer/scip/decoder.js +0 -253
- package/dist/intelligence/indexer/scip/detector.js +0 -232
- package/dist/intelligence/indexer/scip/downloader.js +0 -427
- package/dist/intelligence/indexer/scip/fallback.js +0 -34
- package/dist/intelligence/indexer/scip/merger.js +0 -109
- package/dist/intelligence/indexer/scip/orchestrator.js +0 -433
- package/dist/intelligence/indexer/scip/runner.js +0 -98
- package/dist/intelligence/indexer/snapshot.js +0 -66
- package/dist/intelligence/indexer/test-detector.js +0 -196
- package/dist/intelligence/indexer/watch-integration.js +0 -61
- package/dist/intelligence/indexer/worker.js +0 -85
- package/dist/intelligence/local-convention-detector.js +0 -437
- package/dist/intelligence/local-embeddings.js +0 -190
- package/dist/intelligence/local-graph.js +0 -1946
- package/dist/intelligence/local-indexer.js +0 -1575
- package/dist/intelligence/local-llm.js +0 -163
- package/dist/intelligence/local-rule-generator.js +0 -154
- package/dist/intelligence/local-snapshot.js +0 -213
- package/dist/intelligence/negative-knowledge.js +0 -103
- package/dist/intelligence/persistent-db.js +0 -85
- package/dist/intelligence/query-router.js +0 -2556
- package/dist/intelligence/risk-classifier.js +0 -116
- package/dist/intelligence/rule-evaluator.js +0 -380
- package/dist/intelligence/rule-generator.js +0 -49
- package/dist/intelligence/search-index.js +0 -173
- package/dist/intelligence/semantic/docstring-extractor.js +0 -67
- package/dist/intelligence/semantic/embedding-store.js +0 -52
- package/dist/intelligence/semantic/enrichment-orchestrator.js +0 -48
- package/dist/intelligence/semantic/git-message-miner.js +0 -114
- package/dist/intelligence/semantic/identifier-tokenizer.js +0 -51
- package/dist/intelligence/semantic/node2vec-embeddings.js +0 -71
- package/dist/intelligence/semantic/node2vec-walks.js +0 -103
- package/dist/intelligence/semantic/path-domain-inference.js +0 -112
- package/dist/intelligence/semantic/similarity-engine.js +0 -60
- package/dist/intelligence/semantic/tfidf-vectors.js +0 -88
- package/dist/intelligence/session-brief-builder.js +0 -159
- package/dist/intelligence/session-context.js +0 -221
- package/dist/intelligence/session-health-monitor.js +0 -211
- package/dist/intelligence/session-narrative.js +0 -197
- package/dist/intelligence/session-pattern-analyzer.js +0 -218
- package/dist/intelligence/signal-scorer.js +0 -390
- package/dist/intelligence/signal-show-store.js +0 -182
- package/dist/intelligence/smart-truncate.js +0 -158
- package/dist/intelligence/subgraph-cache.js +0 -88
- package/dist/intelligence/temporal-facts.js +0 -494
- package/dist/intelligence/token-estimator.js +0 -100
- package/dist/intelligence/tool-injector.js +0 -87
- package/dist/intelligence/tree-sitter-loader.js +0 -71
- package/dist/intelligence/worker-pool.js +0 -116
- package/dist/proxy/arg-validator.js +0 -79
- package/dist/proxy/auto-bootstrap.js +0 -167
- package/dist/proxy/bridge.js +0 -147
- package/dist/proxy/budget-enforcer.js +0 -70
- package/dist/proxy/compression-quality-monitor.js +0 -160
- package/dist/proxy/compression-stats.js +0 -51
- package/dist/proxy/context-rot-detector.js +0 -137
- package/dist/proxy/drift-detector.js +0 -139
- package/dist/proxy/efficiency-tracker.js +0 -79
- package/dist/proxy/fact-ranking.js +0 -154
- package/dist/proxy/format-encoder.js +0 -266
- package/dist/proxy/http-transport.js +0 -90
- package/dist/proxy/lifecycle-actor.js +0 -55
- package/dist/proxy/lifecycle-machine.js +0 -187
- package/dist/proxy/log-tailer.js +0 -265
- package/dist/proxy/model-pricing.js +0 -98
- package/dist/proxy/network-firewall.js +0 -141
- package/dist/proxy/nudge-state.js +0 -93
- package/dist/proxy/output-compressor.js +0 -185
- package/dist/proxy/pid-lock.js +0 -291
- package/dist/proxy/proxy-context.js +0 -11
- package/dist/proxy/proxy.js +0 -2633
- package/dist/proxy/response-enrichment.js +0 -32
- package/dist/proxy/response-envelope.js +0 -313
- package/dist/proxy/session-dedup.js +0 -82
- package/dist/proxy/session-legend.js +0 -30
- package/dist/proxy/session-persistence.js +0 -210
- package/dist/proxy/session-resume.js +0 -94
- package/dist/proxy/session-stats.js +0 -513
- package/dist/proxy/shell-classifier.js +0 -1346
- package/dist/proxy/shell-compression-log.js +0 -93
- package/dist/proxy/shell-compressor.js +0 -390
- package/dist/proxy/shell-graph-boost.js +0 -202
- package/dist/proxy/shell-monitor-map.js +0 -18
- package/dist/proxy/shell-stats.js +0 -54
- package/dist/proxy/shell-strategies/cloud.js +0 -215
- package/dist/proxy/shell-strategies/diff.js +0 -159
- package/dist/proxy/shell-strategies/error-diagnostic.js +0 -796
- package/dist/proxy/shell-strategies/filter-dsl.js +0 -358
- package/dist/proxy/shell-strategies/git-status.js +0 -177
- package/dist/proxy/shell-strategies/key-value.js +0 -193
- package/dist/proxy/shell-strategies/log-text.js +0 -154
- package/dist/proxy/shell-strategies/omni.js +0 -188
- package/dist/proxy/shell-strategies/progress.js +0 -55
- package/dist/proxy/shell-strategies/redact.js +0 -76
- package/dist/proxy/shell-strategies/structured.js +0 -241
- package/dist/proxy/shell-strategies/tabular.js +0 -243
- package/dist/proxy/shell-strategies/test-results-types.js +0 -13
- package/dist/proxy/shell-strategies/test-results.js +0 -784
- package/dist/proxy/shell-strategies/tree-paths.js +0 -144
- package/dist/proxy/shell-strategies/yaml.js +0 -182
- package/dist/proxy/shell-tee.js +0 -111
- package/dist/proxy/signal-dedup.js +0 -171
- package/dist/proxy/startup-renderer.js +0 -158
- package/dist/proxy/task-token-display.js +0 -38
- package/dist/proxy/token-counter.js +0 -61
- package/dist/proxy/tool-clusters.js +0 -273
- package/dist/proxy/tool-definitions.js +0 -525
- package/dist/proxy/transport-mux.js +0 -229
- package/dist/proxy/wire-cap.js +0 -268
- package/dist/rules/developer.mozilla.org.json +0 -9
- package/dist/rules/github.com.json +0 -21
- package/dist/schemas/api/skills.js +0 -19
- package/dist/schemas/common/errors.js +0 -7
- package/dist/schemas/common/headers.js +0 -5
- package/dist/schemas/entities/edge.js +0 -25
- package/dist/schemas/entities/entity.js +0 -22
- package/dist/schemas/entities/rule.js +0 -18
- package/dist/schemas/index.js +0 -14
- package/dist/server/event-bus.js +0 -59
- package/dist/server/http.js +0 -156
- package/dist/server/middleware.js +0 -70
- package/dist/server/routes/drift.js +0 -97
- package/dist/server/routes/intelligence.js +0 -1217
- package/dist/server/routes/reasoning-quality.js +0 -444
- package/dist/server/routes/session.js +0 -86
- package/dist/server/routes/stream.js +0 -120
- package/dist/server/routes/system.js +0 -73
- package/dist/server/routes/temporal.js +0 -170
- package/dist/server/routes/timeline.js +0 -232
- package/dist/server/routes/token-flow.js +0 -403
- package/dist/skills/effectiveness-tracker.js +0 -93
- package/dist/skills/local-pack.js +0 -380
- package/dist/skills/resolver.js +0 -495
- package/dist/state-detector.js +0 -83
- package/dist/timeline/intent-detector.js +0 -263
- package/dist/timeline/loop-miner.js +0 -140
- package/dist/timeline/open-threads.js +0 -49
- package/dist/timeline/signal-reinforcer.js +0 -62
- package/dist/timeline/timeline-bootstrap.js +0 -151
- package/dist/timeline/timeline-store.js +0 -618
- package/dist/tools/coding/bash.js +0 -49
- package/dist/tools/coding/file-edit.js +0 -72
- package/dist/tools/coding/file-outline.js +0 -227
- package/dist/tools/coding/file-read-protocol.js +0 -425
- package/dist/tools/coding/file-read.js +0 -35
- package/dist/tools/coding/file-write.js +0 -43
- package/dist/tools/coding/glob-tool.js +0 -109
- package/dist/tools/coding/grep.js +0 -162
- package/dist/tools/coding/index.js +0 -27
- package/dist/tools/intelligence/index.js +0 -269
- package/dist/tools/intelligence/record-fact.js +0 -48
- package/dist/tools/intelligence/timeline-markers.js +0 -130
- package/dist/tools/registry.js +0 -47
- package/dist/tools/types.js +0 -8
- package/dist/tracking/auto-snapshot-triggers.js +0 -246
- package/dist/tracking/branch-context.js +0 -115
- package/dist/tracking/branch-snapshot.js +0 -217
- package/dist/tracking/causal-bridge.js +0 -317
- package/dist/tracking/circuit-breaker.js +0 -147
- package/dist/tracking/commit-watcher.js +0 -114
- package/dist/tracking/context-ledger.js +0 -119
- package/dist/tracking/correction-detector.js +0 -324
- package/dist/tracking/drift-tracker.js +0 -874
- package/dist/tracking/durability-tracker.js +0 -94
- package/dist/tracking/entity-rewind.js +0 -200
- package/dist/tracking/file-hash-state.js +0 -114
- package/dist/tracking/git-attribution.js +0 -132
- package/dist/tracking/git-trailers.js +0 -171
- package/dist/tracking/intelligence-counter.js +0 -46
- package/dist/tracking/intent-correlator.js +0 -202
- package/dist/tracking/intent-encoder.js +0 -52
- package/dist/tracking/intent-token-tracker.js +0 -159
- package/dist/tracking/ledger-archiver.js +0 -94
- package/dist/tracking/ledger-chains.js +0 -245
- package/dist/tracking/metrics-store.js +0 -361
- package/dist/tracking/native-watcher.js +0 -131
- package/dist/tracking/offline-rewind.js +0 -295
- package/dist/tracking/pending-violations.js +0 -74
- package/dist/tracking/persistence-effectiveness.js +0 -167
- package/dist/tracking/prompt-durability.js +0 -202
- package/dist/tracking/quality-signals.js +0 -213
- package/dist/tracking/redactor.js +0 -73
- package/dist/tracking/rewind-engine.js +0 -161
- package/dist/tracking/session-history.js +0 -128
- package/dist/tracking/session-receipt.js +0 -88
- package/dist/tracking/session-summary-writer.js +0 -157
- package/dist/tracking/shadow-ledger.js +0 -321
- package/dist/tracking/stash-manager.js +0 -258
- package/dist/tracking/timeline-fork.js +0 -213
- package/dist/tracking/timeline.js +0 -69
- package/dist/tracking/token-flow.js +0 -276
- package/dist/tracking/turn-segmenter.js +0 -122
- package/dist/tracking/weekly-accumulator.js +0 -179
- package/dist/tracking/working-snapshots.js +0 -188
- package/dist/tracking/workspace-manifest.js +0 -176
- package/dist/transport/http.js +0 -102
- package/dist/utils/counterfactual.js +0 -65
- package/dist/utils/deep-link.js +0 -34
- package/dist/utils/detect.js +0 -193
- package/dist/utils/exec.js +0 -73
- package/dist/utils/file-logger.js +0 -87
- package/dist/utils/format-error.js +0 -29
- package/dist/utils/git.js +0 -181
- package/dist/utils/log.js +0 -57
- package/dist/utils/logger.js +0 -35
- package/dist/utils/mcp-content-json.js +0 -8
- package/dist/utils/session-logger.js +0 -154
- package/dist/utils/startup-log.js +0 -512
- package/dist/utils/ui.js +0 -56
|
@@ -1,437 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Sprint L6.1: Local Convention Pattern Detector
|
|
3
|
-
*
|
|
4
|
-
* Analyzes locally-indexed CozoDB entities to detect coding conventions:
|
|
5
|
-
* 1. Naming conventions — regex analysis of entity names by kind
|
|
6
|
-
* 2. File structure patterns — single-kind directories, barrel exports
|
|
7
|
-
* 3. Import direction patterns — leaf modules (imported but never import outside)
|
|
8
|
-
*
|
|
9
|
-
* All detection is STRUCTURAL (regex + graph topology). Zero LLM calls.
|
|
10
|
-
* Performance target: <500ms for 10K entities.
|
|
11
|
-
*
|
|
12
|
-
* Produces CompactPattern[] compatible with CozoDB `patterns` relation
|
|
13
|
-
* and CozoGraphStore.loadPatterns().
|
|
14
|
-
*/
|
|
15
|
-
import { dirname } from "node:path";
|
|
16
|
-
import { isTestFile } from "./indexer/test-detector.js";
|
|
17
|
-
// ── Naming Pattern Regexes ──────────────────────────────────────
|
|
18
|
-
const CAMEL_CASE = /^[a-z][a-zA-Z0-9]*$/;
|
|
19
|
-
const PASCAL_CASE = /^[A-Z][a-zA-Z0-9]*$/;
|
|
20
|
-
const SNAKE_CASE = /^[a-z][a-z0-9]*(?:_[a-z0-9]+)*$/;
|
|
21
|
-
const SCREAMING_SNAKE = /^[A-Z][A-Z0-9]*(?:_[A-Z0-9]+)*$/;
|
|
22
|
-
const KEBAB_CASE = /^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$/;
|
|
23
|
-
const NAMING_PATTERNS = [
|
|
24
|
-
{
|
|
25
|
-
id: "camelCase",
|
|
26
|
-
name: "camelCase",
|
|
27
|
-
regex: CAMEL_CASE,
|
|
28
|
-
description: "camelCase naming",
|
|
29
|
-
},
|
|
30
|
-
{
|
|
31
|
-
id: "PascalCase",
|
|
32
|
-
name: "PascalCase",
|
|
33
|
-
regex: PASCAL_CASE,
|
|
34
|
-
description: "PascalCase naming",
|
|
35
|
-
},
|
|
36
|
-
{
|
|
37
|
-
id: "snake_case",
|
|
38
|
-
name: "snake_case",
|
|
39
|
-
regex: SNAKE_CASE,
|
|
40
|
-
description: "snake_case naming",
|
|
41
|
-
},
|
|
42
|
-
{
|
|
43
|
-
id: "SCREAMING_SNAKE",
|
|
44
|
-
name: "SCREAMING_SNAKE_CASE",
|
|
45
|
-
regex: SCREAMING_SNAKE,
|
|
46
|
-
description: "SCREAMING_SNAKE_CASE naming",
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
id: "kebab-case",
|
|
50
|
-
name: "kebab-case",
|
|
51
|
-
regex: KEBAB_CASE,
|
|
52
|
-
description: "kebab-case naming",
|
|
53
|
-
},
|
|
54
|
-
];
|
|
55
|
-
/** Minimum entities of a kind to consider a convention meaningful. */
|
|
56
|
-
const MIN_SAMPLE_SIZE = 3;
|
|
57
|
-
/** Minimum adherence ratio to consider a naming pattern a convention. */
|
|
58
|
-
const MIN_ADHERENCE = 0.6;
|
|
59
|
-
/** Maximum exemplar keys to store per pattern. */
|
|
60
|
-
const MAX_EXEMPLARS = 5;
|
|
61
|
-
// ── Main Detection Function ─────────────────────────────────────
|
|
62
|
-
/**
|
|
63
|
-
* Detect local conventions from the populated CozoDB graph.
|
|
64
|
-
*
|
|
65
|
-
* Queries entities and edges directly via Datalog for maximum performance.
|
|
66
|
-
* Does NOT require getAllEntities/getAllEdges methods on CozoGraphStore.
|
|
67
|
-
*/
|
|
68
|
-
export async function detectLocalConventions(db) {
|
|
69
|
-
const start = Date.now();
|
|
70
|
-
// Query all entities
|
|
71
|
-
const entities = await queryAllEntities(db);
|
|
72
|
-
// Query all edges
|
|
73
|
-
const edges = await queryAllEdges(db);
|
|
74
|
-
// Query communities
|
|
75
|
-
const communities = await queryAllCommunities(db);
|
|
76
|
-
const conventions = [];
|
|
77
|
-
// 1. Naming conventions
|
|
78
|
-
const namingConventions = detectNamingConventions(entities);
|
|
79
|
-
conventions.push(...namingConventions);
|
|
80
|
-
// 2. File structure patterns
|
|
81
|
-
const structureConventions = detectFileStructurePatterns(entities);
|
|
82
|
-
conventions.push(...structureConventions);
|
|
83
|
-
// 3. Import direction patterns
|
|
84
|
-
const importConventions = detectImportDirectionPatterns(entities, edges, communities);
|
|
85
|
-
conventions.push(...importConventions);
|
|
86
|
-
// Convert to CompactPattern[] for CozoDB
|
|
87
|
-
const patterns = conventions.map((c) => ({
|
|
88
|
-
key: c.key,
|
|
89
|
-
name: c.name,
|
|
90
|
-
kind: c.kind,
|
|
91
|
-
frequency: c.frequency,
|
|
92
|
-
confidence: c.confidence,
|
|
93
|
-
exemplar_keys: c.exemplarKeys,
|
|
94
|
-
promoted_rule_key: `local-rule-${c.key}`,
|
|
95
|
-
}));
|
|
96
|
-
const elapsedMs = Date.now() - start;
|
|
97
|
-
return {
|
|
98
|
-
patterns,
|
|
99
|
-
conventions,
|
|
100
|
-
stats: {
|
|
101
|
-
naming: namingConventions.length,
|
|
102
|
-
structure: structureConventions.length,
|
|
103
|
-
importDirection: importConventions.length,
|
|
104
|
-
totalEntities: entities.length,
|
|
105
|
-
elapsedMs,
|
|
106
|
-
},
|
|
107
|
-
};
|
|
108
|
-
}
|
|
109
|
-
// ── CozoDB Queries ──────────────────────────────────────────────
|
|
110
|
-
async function queryAllEntities(db) {
|
|
111
|
-
try {
|
|
112
|
-
const result = await db.run("?[key, kind, name, file_path] := *entities{key, kind, name, file_path}");
|
|
113
|
-
if (!result?.rows)
|
|
114
|
-
return [];
|
|
115
|
-
return result.rows.map((row) => {
|
|
116
|
-
const [key, kind, name, file_path] = row;
|
|
117
|
-
return { key, kind, name, file_path };
|
|
118
|
-
});
|
|
119
|
-
}
|
|
120
|
-
catch {
|
|
121
|
-
return [];
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
async function queryAllEdges(db) {
|
|
125
|
-
try {
|
|
126
|
-
const result = await db.run("?[from_key, to_key, type] := *edges{from_key, to_key, type}");
|
|
127
|
-
if (!result?.rows)
|
|
128
|
-
return [];
|
|
129
|
-
return result.rows.map((row) => {
|
|
130
|
-
const [from_key, to_key, type] = row;
|
|
131
|
-
return { from_key, to_key, type };
|
|
132
|
-
});
|
|
133
|
-
}
|
|
134
|
-
catch {
|
|
135
|
-
return [];
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
async function queryAllCommunities(db) {
|
|
139
|
-
try {
|
|
140
|
-
const result = await db.run("?[id, label, size] := *communities{id, label, size}");
|
|
141
|
-
if (!result?.rows)
|
|
142
|
-
return [];
|
|
143
|
-
return result.rows.map((row) => {
|
|
144
|
-
const [id, label, size] = row;
|
|
145
|
-
return { id, label, size };
|
|
146
|
-
});
|
|
147
|
-
}
|
|
148
|
-
catch {
|
|
149
|
-
return [];
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
// ── 1. Naming Convention Detection ──────────────────────────────
|
|
153
|
-
function pluralizeKind(kind) {
|
|
154
|
-
if (kind === "class")
|
|
155
|
-
return "classes";
|
|
156
|
-
if (kind.endsWith("s") || kind.endsWith("x") || kind.endsWith("z")) {
|
|
157
|
-
return `${kind}es`;
|
|
158
|
-
}
|
|
159
|
-
return `${kind}s`;
|
|
160
|
-
}
|
|
161
|
-
function detectNamingConventions(entities) {
|
|
162
|
-
const conventions = [];
|
|
163
|
-
// Group entities by kind
|
|
164
|
-
const byKind = new Map();
|
|
165
|
-
for (const e of entities) {
|
|
166
|
-
const existing = byKind.get(e.kind);
|
|
167
|
-
if (existing) {
|
|
168
|
-
existing.push(e);
|
|
169
|
-
}
|
|
170
|
-
else {
|
|
171
|
-
byKind.set(e.kind, [e]);
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
for (const [kind, kindEntities] of byKind) {
|
|
175
|
-
if (kindEntities.length < MIN_SAMPLE_SIZE)
|
|
176
|
-
continue;
|
|
177
|
-
// Test each naming pattern against this kind's entities
|
|
178
|
-
for (const pattern of NAMING_PATTERNS) {
|
|
179
|
-
const matching = kindEntities.filter((e) => pattern.regex.test(e.name));
|
|
180
|
-
const adherence = matching.length / kindEntities.length;
|
|
181
|
-
if (adherence >= MIN_ADHERENCE) {
|
|
182
|
-
const exemplars = matching.slice(0, MAX_EXEMPLARS).map((e) => e.key);
|
|
183
|
-
const kindPlural = pluralizeKind(kind);
|
|
184
|
-
conventions.push({
|
|
185
|
-
key: `naming-${kind}-${pattern.id}`,
|
|
186
|
-
name: `${pattern.name} ${kindPlural}`,
|
|
187
|
-
kind: "naming",
|
|
188
|
-
frequency: matching.length,
|
|
189
|
-
confidence: adherence,
|
|
190
|
-
exemplarKeys: exemplars,
|
|
191
|
-
detail: `${matching.length}/${kindEntities.length} ${kindPlural} use ${pattern.name} (${Math.round(adherence * 100)}%)`,
|
|
192
|
-
});
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
// Special: React component detection (PascalCase functions in .tsx files)
|
|
196
|
-
if (kind === "function") {
|
|
197
|
-
const tsxFunctions = kindEntities.filter((e) => e.file_path.endsWith(".tsx"));
|
|
198
|
-
if (tsxFunctions.length >= MIN_SAMPLE_SIZE) {
|
|
199
|
-
const pascalTsx = tsxFunctions.filter((e) => PASCAL_CASE.test(e.name));
|
|
200
|
-
const adherence = pascalTsx.length / tsxFunctions.length;
|
|
201
|
-
if (adherence >= MIN_ADHERENCE) {
|
|
202
|
-
conventions.push({
|
|
203
|
-
key: "naming-component-PascalCase",
|
|
204
|
-
name: "PascalCase React components",
|
|
205
|
-
kind: "naming",
|
|
206
|
-
frequency: pascalTsx.length,
|
|
207
|
-
confidence: adherence,
|
|
208
|
-
exemplarKeys: pascalTsx.slice(0, MAX_EXEMPLARS).map((e) => e.key),
|
|
209
|
-
detail: `${pascalTsx.length}/${tsxFunctions.length} .tsx functions use PascalCase (${Math.round(adherence * 100)}%)`,
|
|
210
|
-
});
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
return conventions;
|
|
216
|
-
}
|
|
217
|
-
// ── 2. File Structure Pattern Detection ─────────────────────────
|
|
218
|
-
function detectFileStructurePatterns(entities) {
|
|
219
|
-
const conventions = [];
|
|
220
|
-
// Group entities by directory
|
|
221
|
-
const byDir = new Map();
|
|
222
|
-
for (const e of entities) {
|
|
223
|
-
const dir = dirname(e.file_path);
|
|
224
|
-
const existing = byDir.get(dir);
|
|
225
|
-
if (existing) {
|
|
226
|
-
existing.push(e);
|
|
227
|
-
}
|
|
228
|
-
else {
|
|
229
|
-
byDir.set(dir, [e]);
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
// Detect single-kind directories (all entities in dir are same kind)
|
|
233
|
-
const singleKindDirs = [];
|
|
234
|
-
for (const [dir, dirEntities] of byDir) {
|
|
235
|
-
if (dirEntities.length < MIN_SAMPLE_SIZE)
|
|
236
|
-
continue;
|
|
237
|
-
const kinds = new Set(dirEntities.map((e) => e.kind));
|
|
238
|
-
if (kinds.size === 1) {
|
|
239
|
-
const kind = [...kinds][0];
|
|
240
|
-
singleKindDirs.push(dir);
|
|
241
|
-
conventions.push({
|
|
242
|
-
key: `structure-single-kind-${sanitizeKey(dir)}`,
|
|
243
|
-
name: `${dir}/ contains only ${kind}s`,
|
|
244
|
-
kind: "structure",
|
|
245
|
-
frequency: dirEntities.length,
|
|
246
|
-
confidence: 1.0,
|
|
247
|
-
exemplarKeys: dirEntities.slice(0, MAX_EXEMPLARS).map((e) => e.key),
|
|
248
|
-
detail: `All ${dirEntities.length} entities in ${dir}/ are ${kind}s`,
|
|
249
|
-
});
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
// Detect barrel export pattern (index.ts files that re-export)
|
|
253
|
-
const indexFiles = entities.filter((e) => e.file_path.endsWith("/index.ts") ||
|
|
254
|
-
e.file_path.endsWith("/index.js") ||
|
|
255
|
-
e.file_path === "index.ts" ||
|
|
256
|
-
e.file_path === "index.js");
|
|
257
|
-
if (indexFiles.length >= MIN_SAMPLE_SIZE) {
|
|
258
|
-
// Count directories that have index files
|
|
259
|
-
const dirsWithIndex = new Set(indexFiles.map((e) => dirname(e.file_path)));
|
|
260
|
-
const totalDirs = new Set(entities.map((e) => dirname(e.file_path))).size;
|
|
261
|
-
if (totalDirs > 0) {
|
|
262
|
-
const adherence = dirsWithIndex.size / totalDirs;
|
|
263
|
-
if (adherence >= 0.3) {
|
|
264
|
-
// 30% threshold for barrel pattern
|
|
265
|
-
conventions.push({
|
|
266
|
-
key: "structure-barrel-exports",
|
|
267
|
-
name: "Barrel export pattern (index.ts)",
|
|
268
|
-
kind: "structure",
|
|
269
|
-
frequency: indexFiles.length,
|
|
270
|
-
confidence: adherence,
|
|
271
|
-
exemplarKeys: indexFiles.slice(0, MAX_EXEMPLARS).map((e) => e.key),
|
|
272
|
-
detail: `${dirsWithIndex.size}/${totalDirs} directories use barrel exports (${Math.round(adherence * 100)}%)`,
|
|
273
|
-
});
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
// Detect test file co-location pattern
|
|
278
|
-
const sourceFiles = new Set();
|
|
279
|
-
const testFiles = new Set();
|
|
280
|
-
for (const e of entities) {
|
|
281
|
-
if (isTestFile(e.file_path)) {
|
|
282
|
-
testFiles.add(e.file_path);
|
|
283
|
-
}
|
|
284
|
-
else {
|
|
285
|
-
sourceFiles.add(e.file_path);
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
if (testFiles.size >= MIN_SAMPLE_SIZE) {
|
|
289
|
-
// Check if tests are co-located (same directory) or segregated (__tests__/)
|
|
290
|
-
const segregatedTests = [...testFiles].filter((f) => f.includes("__tests__/"));
|
|
291
|
-
const colocatedTests = [...testFiles].filter((f) => !f.includes("__tests__/"));
|
|
292
|
-
if (segregatedTests.length > colocatedTests.length) {
|
|
293
|
-
conventions.push({
|
|
294
|
-
key: "structure-test-segregated",
|
|
295
|
-
name: "Tests in __tests__/ directories",
|
|
296
|
-
kind: "structure",
|
|
297
|
-
frequency: segregatedTests.length,
|
|
298
|
-
confidence: segregatedTests.length / testFiles.size,
|
|
299
|
-
exemplarKeys: [],
|
|
300
|
-
detail: `${segregatedTests.length}/${testFiles.size} test files use __tests__/ pattern`,
|
|
301
|
-
});
|
|
302
|
-
}
|
|
303
|
-
else if (colocatedTests.length > 0) {
|
|
304
|
-
conventions.push({
|
|
305
|
-
key: "structure-test-colocated",
|
|
306
|
-
name: "Co-located test files (.test.ts sibling)",
|
|
307
|
-
kind: "structure",
|
|
308
|
-
frequency: colocatedTests.length,
|
|
309
|
-
confidence: colocatedTests.length / testFiles.size,
|
|
310
|
-
exemplarKeys: [],
|
|
311
|
-
detail: `${colocatedTests.length}/${testFiles.size} test files are co-located with source`,
|
|
312
|
-
});
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
// Roll up single-kind directories into a convention if there are enough
|
|
316
|
-
if (singleKindDirs.length >= 2) {
|
|
317
|
-
conventions.push({
|
|
318
|
-
key: "structure-single-kind-dirs",
|
|
319
|
-
name: "Single-purpose directories",
|
|
320
|
-
kind: "structure",
|
|
321
|
-
frequency: singleKindDirs.length,
|
|
322
|
-
confidence: singleKindDirs.length / byDir.size,
|
|
323
|
-
exemplarKeys: [],
|
|
324
|
-
detail: `${singleKindDirs.length}/${byDir.size} directories contain only one entity kind`,
|
|
325
|
-
});
|
|
326
|
-
}
|
|
327
|
-
return conventions;
|
|
328
|
-
}
|
|
329
|
-
// ── 3. Import Direction Pattern Detection ───────────────────────
|
|
330
|
-
function detectImportDirectionPatterns(entities, edges, communities) {
|
|
331
|
-
const conventions = [];
|
|
332
|
-
if (communities.length === 0 || edges.length === 0)
|
|
333
|
-
return conventions;
|
|
334
|
-
// Build entity → community mapping from CozoDB
|
|
335
|
-
const entityCommunity = new Map();
|
|
336
|
-
for (const e of entities) {
|
|
337
|
-
// Community is assigned during community detection and stored in entities
|
|
338
|
-
// We query it separately for efficiency
|
|
339
|
-
}
|
|
340
|
-
// Query entity community assignments
|
|
341
|
-
const importEdges = edges.filter((e) => e.type === "imports");
|
|
342
|
-
if (importEdges.length === 0)
|
|
343
|
-
return conventions;
|
|
344
|
-
// Build file → community mapping (approximate: use entity file_path grouping)
|
|
345
|
-
const fileEntities = new Map();
|
|
346
|
-
for (const e of entities) {
|
|
347
|
-
const existing = fileEntities.get(e.file_path);
|
|
348
|
-
if (existing) {
|
|
349
|
-
existing.push(e.key);
|
|
350
|
-
}
|
|
351
|
-
else {
|
|
352
|
-
fileEntities.set(e.file_path, [e.key]);
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
// Track inbound and outbound imports per file directory (layer proxy)
|
|
356
|
-
const dirImports = new Map();
|
|
357
|
-
for (const edge of importEdges) {
|
|
358
|
-
// Find file paths for from_key and to_key
|
|
359
|
-
const fromEntity = entities.find((e) => e.key === edge.from_key);
|
|
360
|
-
const toEntity = entities.find((e) => e.key === edge.to_key);
|
|
361
|
-
if (!fromEntity || !toEntity)
|
|
362
|
-
continue;
|
|
363
|
-
const fromDir = dirname(fromEntity.file_path);
|
|
364
|
-
const toDir = dirname(toEntity.file_path);
|
|
365
|
-
if (fromDir === toDir)
|
|
366
|
-
continue; // Skip intra-directory imports
|
|
367
|
-
// Count outbound from fromDir
|
|
368
|
-
const fromStats = dirImports.get(fromDir) ?? { inbound: 0, outbound: 0 };
|
|
369
|
-
fromStats.outbound++;
|
|
370
|
-
dirImports.set(fromDir, fromStats);
|
|
371
|
-
// Count inbound to toDir
|
|
372
|
-
const toStats = dirImports.get(toDir) ?? { inbound: 0, outbound: 0 };
|
|
373
|
-
toStats.inbound++;
|
|
374
|
-
dirImports.set(toDir, toStats);
|
|
375
|
-
}
|
|
376
|
-
// Detect leaf modules (imported by others but never import outside)
|
|
377
|
-
for (const [dir, stats] of dirImports) {
|
|
378
|
-
if (stats.inbound > 0 && stats.outbound === 0) {
|
|
379
|
-
conventions.push({
|
|
380
|
-
key: `import-direction-leaf-${sanitizeKey(dir)}`,
|
|
381
|
-
name: `${dir}/ is a leaf module`,
|
|
382
|
-
kind: "import_direction",
|
|
383
|
-
frequency: stats.inbound,
|
|
384
|
-
confidence: 1.0,
|
|
385
|
-
exemplarKeys: [],
|
|
386
|
-
detail: `${dir}/ is imported ${stats.inbound} times but imports nothing outside itself`,
|
|
387
|
-
});
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
// Detect layered architecture (some dirs only import "downward")
|
|
391
|
-
// A layer is identified when a dir has many outbound imports to specific target dirs
|
|
392
|
-
const layerPairs = new Map();
|
|
393
|
-
for (const edge of importEdges) {
|
|
394
|
-
const fromEntity = entities.find((e) => e.key === edge.from_key);
|
|
395
|
-
const toEntity = entities.find((e) => e.key === edge.to_key);
|
|
396
|
-
if (!fromEntity || !toEntity)
|
|
397
|
-
continue;
|
|
398
|
-
const fromDir = dirname(fromEntity.file_path);
|
|
399
|
-
const toDir = dirname(toEntity.file_path);
|
|
400
|
-
if (fromDir === toDir)
|
|
401
|
-
continue;
|
|
402
|
-
const pairKey = `${fromDir}->${toDir}`;
|
|
403
|
-
layerPairs.set(pairKey, (layerPairs.get(pairKey) ?? 0) + 1);
|
|
404
|
-
}
|
|
405
|
-
// Find one-directional layer pairs (A imports B but B never imports A)
|
|
406
|
-
const unidirectionalPairs = [];
|
|
407
|
-
for (const [pair, count] of layerPairs) {
|
|
408
|
-
if (count < MIN_SAMPLE_SIZE)
|
|
409
|
-
continue;
|
|
410
|
-
const [fromDir, toDir] = pair.split("->");
|
|
411
|
-
const reversePair = `${toDir}->${fromDir}`;
|
|
412
|
-
if (!layerPairs.has(reversePair)) {
|
|
413
|
-
unidirectionalPairs.push({ from: fromDir, to: toDir, count });
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
if (unidirectionalPairs.length >= 2) {
|
|
417
|
-
conventions.push({
|
|
418
|
-
key: "import-direction-layered-architecture",
|
|
419
|
-
name: "Layered architecture (unidirectional imports)",
|
|
420
|
-
kind: "import_direction",
|
|
421
|
-
frequency: unidirectionalPairs.length,
|
|
422
|
-
confidence: unidirectionalPairs.length / Math.max(layerPairs.size, 1),
|
|
423
|
-
exemplarKeys: [],
|
|
424
|
-
detail: `${unidirectionalPairs.length} directory pairs have unidirectional import relationships`,
|
|
425
|
-
});
|
|
426
|
-
}
|
|
427
|
-
return conventions;
|
|
428
|
-
}
|
|
429
|
-
// ── Utilities ───────────────────────────────────────────────────
|
|
430
|
-
/** Sanitize a directory path for use as a CozoDB key. */
|
|
431
|
-
function sanitizeKey(input) {
|
|
432
|
-
return input
|
|
433
|
-
.replace(/[^a-zA-Z0-9_-]/g, "-")
|
|
434
|
-
.replace(/-+/g, "-")
|
|
435
|
-
.replace(/^-|-$/g, "")
|
|
436
|
-
.slice(0, 60);
|
|
437
|
-
}
|
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Local Embedding Store & Semantic Search — Sprint L3.2.
|
|
3
|
-
*
|
|
4
|
-
* Stores per-entity embedding vectors in CozoDB's entity_embeddings relation.
|
|
5
|
-
* Provides cosine similarity search for semantic_search and find_similar tools,
|
|
6
|
-
* using local vector storage.
|
|
7
|
-
*
|
|
8
|
-
* All embedding computation is delegated to the BYO-LLM adapter (local-llm.ts).
|
|
9
|
-
* Vectors are stored as JSON-encoded float arrays in CozoDB (no native vector type).
|
|
10
|
-
*/
|
|
11
|
-
// ── Cosine Similarity ────────────────────────────────────────
|
|
12
|
-
function cosineSimilarity(a, b) {
|
|
13
|
-
if (a.length !== b.length || a.length === 0)
|
|
14
|
-
return 0;
|
|
15
|
-
let dot = 0;
|
|
16
|
-
let normA = 0;
|
|
17
|
-
let normB = 0;
|
|
18
|
-
for (let i = 0; i < a.length; i++) {
|
|
19
|
-
const ai = a[i] ?? 0;
|
|
20
|
-
const bi = b[i] ?? 0;
|
|
21
|
-
dot += ai * bi;
|
|
22
|
-
normA += ai * ai;
|
|
23
|
-
normB += bi * bi;
|
|
24
|
-
}
|
|
25
|
-
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
26
|
-
return denom === 0 ? 0 : dot / denom;
|
|
27
|
-
}
|
|
28
|
-
// ── LocalEmbeddingStore ──────────────────────────────────────
|
|
29
|
-
export class LocalEmbeddingStore {
|
|
30
|
-
db;
|
|
31
|
-
adapter;
|
|
32
|
-
constructor(db, adapter) {
|
|
33
|
-
this.db = db;
|
|
34
|
-
this.adapter = adapter;
|
|
35
|
-
}
|
|
36
|
-
/**
|
|
37
|
-
* Compute and store embeddings for a batch of entities.
|
|
38
|
-
* Skips entities that already have embeddings from the same model.
|
|
39
|
-
*
|
|
40
|
-
* Returns the number of new embeddings computed.
|
|
41
|
-
*/
|
|
42
|
-
async computeEmbeddings(entities) {
|
|
43
|
-
const t0 = performance.now();
|
|
44
|
-
const model = this.adapter.embeddingModel;
|
|
45
|
-
// Filter out entities that already have embeddings from this model
|
|
46
|
-
const existing = new Set();
|
|
47
|
-
const result = await this.db.run("?[entity_key] := *entity_embeddings{entity_key, model}, model = $model", { model });
|
|
48
|
-
for (const row of result.rows) {
|
|
49
|
-
existing.add(row[0]);
|
|
50
|
-
}
|
|
51
|
-
const toEmbed = entities.filter((e) => !existing.has(e.key));
|
|
52
|
-
if (toEmbed.length === 0) {
|
|
53
|
-
return {
|
|
54
|
-
computed: 0,
|
|
55
|
-
skipped: entities.length,
|
|
56
|
-
elapsedMs: performance.now() - t0,
|
|
57
|
-
};
|
|
58
|
-
}
|
|
59
|
-
// Build text representation for each entity
|
|
60
|
-
const texts = toEmbed.map((e) => `${e.kind} ${e.name} ${e.signature}`);
|
|
61
|
-
// Batch embed with concurrency limit
|
|
62
|
-
const batchSize = 64;
|
|
63
|
-
let totalComputed = 0;
|
|
64
|
-
for (let i = 0; i < texts.length; i += batchSize) {
|
|
65
|
-
const batchTexts = texts.slice(i, i + batchSize);
|
|
66
|
-
const batchEntities = toEmbed.slice(i, i + batchSize);
|
|
67
|
-
const embResult = await this.adapter.embed(batchTexts);
|
|
68
|
-
const now = new Date().toISOString();
|
|
69
|
-
for (let j = 0; j < batchEntities.length; j++) {
|
|
70
|
-
const entity = batchEntities[j];
|
|
71
|
-
const vector = embResult.embeddings[j];
|
|
72
|
-
if (!entity || !vector)
|
|
73
|
-
continue;
|
|
74
|
-
this.storeEmbedding(entity.key, vector, model, now);
|
|
75
|
-
totalComputed++;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
return {
|
|
79
|
-
computed: totalComputed,
|
|
80
|
-
skipped: entities.length - totalComputed,
|
|
81
|
-
elapsedMs: performance.now() - t0,
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
/**
|
|
85
|
-
* Store a single embedding vector for an entity.
|
|
86
|
-
*/
|
|
87
|
-
async storeEmbedding(entityKey, vector, model, computedAt) {
|
|
88
|
-
await this.db.run(`?[entity_key, vector_json, model, dimensions, computed_at] <- [[$ek, $vj, $m, $d, $ca]]
|
|
89
|
-
:put entity_embeddings {entity_key => vector_json, model, dimensions, computed_at}`, {
|
|
90
|
-
ek: entityKey,
|
|
91
|
-
vj: JSON.stringify(vector),
|
|
92
|
-
m: model,
|
|
93
|
-
d: vector.length,
|
|
94
|
-
ca: computedAt,
|
|
95
|
-
});
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Semantic search: embed the query text, then find the top-K most
|
|
99
|
-
* similar entities by cosine similarity.
|
|
100
|
-
*/
|
|
101
|
-
async semanticSearch(query, topK = 10, minSimilarity = 0.3) {
|
|
102
|
-
// Embed the query
|
|
103
|
-
const embResult = await this.adapter.embed([query]);
|
|
104
|
-
const queryVec = embResult.embeddings[0];
|
|
105
|
-
if (!queryVec)
|
|
106
|
-
return [];
|
|
107
|
-
return this.findByVector(queryVec, topK, minSimilarity);
|
|
108
|
-
}
|
|
109
|
-
/**
|
|
110
|
-
* Find similar entities to a given entity key.
|
|
111
|
-
*/
|
|
112
|
-
async findSimilar(entityKey, topK = 10, minSimilarity = 0.3) {
|
|
113
|
-
// Load the entity's embedding
|
|
114
|
-
const result = await this.db.run("?[vector_json] := *entity_embeddings{entity_key, vector_json}, entity_key = $ek", { ek: entityKey });
|
|
115
|
-
if (result.rows.length === 0)
|
|
116
|
-
return [];
|
|
117
|
-
const vector = JSON.parse(result.rows[0]?.[0]);
|
|
118
|
-
// Exclude the query entity itself
|
|
119
|
-
return (await this.findByVector(vector, topK + 1, minSimilarity))
|
|
120
|
-
.filter((r) => r.entityKey !== entityKey)
|
|
121
|
-
.slice(0, topK);
|
|
122
|
-
}
|
|
123
|
-
/**
|
|
124
|
-
* Core similarity search: compare a vector against all stored embeddings.
|
|
125
|
-
*/
|
|
126
|
-
async findByVector(queryVec, topK, minSimilarity) {
|
|
127
|
-
// Load all embeddings (CozoDB has no native vector ops — we do cosine in JS)
|
|
128
|
-
const all = await this.db.run("?[entity_key, vector_json] := *entity_embeddings{entity_key, vector_json}");
|
|
129
|
-
const scored = [];
|
|
130
|
-
for (const row of all.rows) {
|
|
131
|
-
const key = row[0];
|
|
132
|
-
const vec = JSON.parse(row[1]);
|
|
133
|
-
const sim = cosineSimilarity(queryVec, vec);
|
|
134
|
-
if (sim >= minSimilarity) {
|
|
135
|
-
scored.push({ entityKey: key, similarity: sim });
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
// Sort descending by similarity, take top K
|
|
139
|
-
scored.sort((a, b) => b.similarity - a.similarity);
|
|
140
|
-
const topResults = scored.slice(0, topK);
|
|
141
|
-
// Enrich with entity metadata
|
|
142
|
-
const results = [];
|
|
143
|
-
for (const s of topResults) {
|
|
144
|
-
const entity = await this.db.run(`?[kind, name, file_path, signature] :=
|
|
145
|
-
*entities{key, kind, name, file_path, signature}, key = $key`, { key: s.entityKey });
|
|
146
|
-
const row = entity.rows[0];
|
|
147
|
-
results.push({
|
|
148
|
-
entityKey: s.entityKey,
|
|
149
|
-
similarity: Math.round(s.similarity * 1000) / 1000,
|
|
150
|
-
kind: row ? row[0] : "",
|
|
151
|
-
name: row ? row[1] : "",
|
|
152
|
-
filePath: row ? row[2] : "",
|
|
153
|
-
signature: row ? row[3] : "",
|
|
154
|
-
});
|
|
155
|
-
}
|
|
156
|
-
return results;
|
|
157
|
-
}
|
|
158
|
-
/**
|
|
159
|
-
* Get stats about stored embeddings.
|
|
160
|
-
*/
|
|
161
|
-
async getStats() {
|
|
162
|
-
const countResult = await this.db.run("?[count(entity_key)] := *entity_embeddings{entity_key}");
|
|
163
|
-
const total = countResult.rows.length > 0 ? countResult.rows[0]?.[0] : 0;
|
|
164
|
-
const modelResult = await this.db.run("?[model, dimensions] := *entity_embeddings{entity_key, model, dimensions}, limit 1");
|
|
165
|
-
const model = modelResult.rows.length > 0 ? modelResult.rows[0]?.[0] : "";
|
|
166
|
-
const dims = modelResult.rows.length > 0 ? modelResult.rows[0]?.[1] : 0;
|
|
167
|
-
return { totalEmbeddings: total, model, dimensions: dims };
|
|
168
|
-
}
|
|
169
|
-
/**
|
|
170
|
-
* Check if an entity has an embedding stored.
|
|
171
|
-
*/
|
|
172
|
-
async hasEmbedding(entityKey) {
|
|
173
|
-
const result = await this.db.run("?[entity_key] := *entity_embeddings{entity_key}, entity_key = $ek", { ek: entityKey });
|
|
174
|
-
return result.rows.length > 0;
|
|
175
|
-
}
|
|
176
|
-
/**
|
|
177
|
-
* Clear all stored embeddings. Used when model changes or on re-index.
|
|
178
|
-
*/
|
|
179
|
-
async clearAll() {
|
|
180
|
-
const count = await this.db.run("?[count(entity_key)] := *entity_embeddings{entity_key}");
|
|
181
|
-
const total = count.rows.length > 0 ? count.rows[0]?.[0] : 0;
|
|
182
|
-
if (total > 0) {
|
|
183
|
-
const keys = await this.db.run("?[entity_key] := *entity_embeddings{entity_key}");
|
|
184
|
-
for (const row of keys.rows) {
|
|
185
|
-
await this.db.run("?[entity_key] <- [[$ek]] :rm entity_embeddings {entity_key}", { ek: row[0] });
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
return total;
|
|
189
|
-
}
|
|
190
|
-
}
|