@zuvia-software-solutions/code-mapper 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +215 -0
- package/dist/cli/ai-context.d.ts +19 -0
- package/dist/cli/ai-context.js +168 -0
- package/dist/cli/analyze.d.ts +7 -0
- package/dist/cli/analyze.js +325 -0
- package/dist/cli/augment.d.ts +7 -0
- package/dist/cli/augment.js +27 -0
- package/dist/cli/clean.d.ts +5 -0
- package/dist/cli/clean.js +56 -0
- package/dist/cli/eval-server.d.ts +25 -0
- package/dist/cli/eval-server.js +365 -0
- package/dist/cli/index.d.ts +6 -0
- package/dist/cli/index.js +102 -0
- package/dist/cli/lazy-action.d.ts +6 -0
- package/dist/cli/lazy-action.js +19 -0
- package/dist/cli/list.d.ts +2 -0
- package/dist/cli/list.js +27 -0
- package/dist/cli/mcp.d.ts +8 -0
- package/dist/cli/mcp.js +35 -0
- package/dist/cli/refresh.d.ts +12 -0
- package/dist/cli/refresh.js +165 -0
- package/dist/cli/serve.d.ts +5 -0
- package/dist/cli/serve.js +8 -0
- package/dist/cli/setup.d.ts +6 -0
- package/dist/cli/setup.js +218 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.js +33 -0
- package/dist/cli/tool.d.ts +28 -0
- package/dist/cli/tool.js +87 -0
- package/dist/config/ignore-service.d.ts +32 -0
- package/dist/config/ignore-service.js +282 -0
- package/dist/config/supported-languages.d.ts +23 -0
- package/dist/config/supported-languages.js +52 -0
- package/dist/core/augmentation/engine.d.ts +22 -0
- package/dist/core/augmentation/engine.js +232 -0
- package/dist/core/embeddings/embedder.d.ts +35 -0
- package/dist/core/embeddings/embedder.js +171 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +41 -0
- package/dist/core/embeddings/embedding-pipeline.js +402 -0
- package/dist/core/embeddings/index.d.ts +5 -0
- package/dist/core/embeddings/index.js +6 -0
- package/dist/core/embeddings/text-generator.d.ts +20 -0
- package/dist/core/embeddings/text-generator.js +159 -0
- package/dist/core/embeddings/types.d.ts +60 -0
- package/dist/core/embeddings/types.js +23 -0
- package/dist/core/graph/graph.d.ts +4 -0
- package/dist/core/graph/graph.js +65 -0
- package/dist/core/graph/types.d.ts +69 -0
- package/dist/core/graph/types.js +3 -0
- package/dist/core/incremental/child-process.d.ts +8 -0
- package/dist/core/incremental/child-process.js +649 -0
- package/dist/core/incremental/refresh-coordinator.d.ts +32 -0
- package/dist/core/incremental/refresh-coordinator.js +147 -0
- package/dist/core/incremental/types.d.ts +78 -0
- package/dist/core/incremental/types.js +153 -0
- package/dist/core/incremental/watcher.d.ts +63 -0
- package/dist/core/incremental/watcher.js +338 -0
- package/dist/core/ingestion/ast-cache.d.ts +12 -0
- package/dist/core/ingestion/ast-cache.js +34 -0
- package/dist/core/ingestion/call-processor.d.ts +34 -0
- package/dist/core/ingestion/call-processor.js +937 -0
- package/dist/core/ingestion/call-routing.d.ts +40 -0
- package/dist/core/ingestion/call-routing.js +97 -0
- package/dist/core/ingestion/cluster-enricher.d.ts +30 -0
- package/dist/core/ingestion/cluster-enricher.js +151 -0
- package/dist/core/ingestion/community-processor.d.ts +26 -0
- package/dist/core/ingestion/community-processor.js +272 -0
- package/dist/core/ingestion/constants.d.ts +5 -0
- package/dist/core/ingestion/constants.js +8 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +23 -0
- package/dist/core/ingestion/entry-point-scoring.js +317 -0
- package/dist/core/ingestion/export-detection.d.ts +11 -0
- package/dist/core/ingestion/export-detection.js +203 -0
- package/dist/core/ingestion/filesystem-walker.d.ts +18 -0
- package/dist/core/ingestion/filesystem-walker.js +64 -0
- package/dist/core/ingestion/framework-detection.d.ts +42 -0
- package/dist/core/ingestion/framework-detection.js +405 -0
- package/dist/core/ingestion/heritage-processor.d.ts +15 -0
- package/dist/core/ingestion/heritage-processor.js +237 -0
- package/dist/core/ingestion/import-processor.d.ts +31 -0
- package/dist/core/ingestion/import-processor.js +416 -0
- package/dist/core/ingestion/language-config.d.ts +32 -0
- package/dist/core/ingestion/language-config.js +161 -0
- package/dist/core/ingestion/mro-processor.d.ts +32 -0
- package/dist/core/ingestion/mro-processor.js +343 -0
- package/dist/core/ingestion/named-binding-extraction.d.ts +51 -0
- package/dist/core/ingestion/named-binding-extraction.js +343 -0
- package/dist/core/ingestion/parsing-processor.d.ts +20 -0
- package/dist/core/ingestion/parsing-processor.js +282 -0
- package/dist/core/ingestion/pipeline.d.ts +3 -0
- package/dist/core/ingestion/pipeline.js +416 -0
- package/dist/core/ingestion/process-processor.d.ts +42 -0
- package/dist/core/ingestion/process-processor.js +357 -0
- package/dist/core/ingestion/resolution-context.d.ts +40 -0
- package/dist/core/ingestion/resolution-context.js +171 -0
- package/dist/core/ingestion/resolvers/csharp.d.ts +10 -0
- package/dist/core/ingestion/resolvers/csharp.js +101 -0
- package/dist/core/ingestion/resolvers/go.d.ts +8 -0
- package/dist/core/ingestion/resolvers/go.js +33 -0
- package/dist/core/ingestion/resolvers/index.d.ts +14 -0
- package/dist/core/ingestion/resolvers/index.js +10 -0
- package/dist/core/ingestion/resolvers/jvm.d.ts +9 -0
- package/dist/core/ingestion/resolvers/jvm.js +74 -0
- package/dist/core/ingestion/resolvers/php.d.ts +7 -0
- package/dist/core/ingestion/resolvers/php.js +30 -0
- package/dist/core/ingestion/resolvers/ruby.d.ts +9 -0
- package/dist/core/ingestion/resolvers/ruby.js +13 -0
- package/dist/core/ingestion/resolvers/rust.d.ts +5 -0
- package/dist/core/ingestion/resolvers/rust.js +62 -0
- package/dist/core/ingestion/resolvers/standard.d.ts +16 -0
- package/dist/core/ingestion/resolvers/standard.js +144 -0
- package/dist/core/ingestion/resolvers/utils.d.ts +18 -0
- package/dist/core/ingestion/resolvers/utils.js +113 -0
- package/dist/core/ingestion/structure-processor.d.ts +4 -0
- package/dist/core/ingestion/structure-processor.js +39 -0
- package/dist/core/ingestion/symbol-table.d.ts +34 -0
- package/dist/core/ingestion/symbol-table.js +48 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +20 -0
- package/dist/core/ingestion/tree-sitter-queries.js +691 -0
- package/dist/core/ingestion/type-env.d.ts +52 -0
- package/dist/core/ingestion/type-env.js +349 -0
- package/dist/core/ingestion/type-extractors/c-cpp.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/c-cpp.js +214 -0
- package/dist/core/ingestion/type-extractors/csharp.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/csharp.js +224 -0
- package/dist/core/ingestion/type-extractors/go.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/go.js +261 -0
- package/dist/core/ingestion/type-extractors/index.d.ts +20 -0
- package/dist/core/ingestion/type-extractors/index.js +30 -0
- package/dist/core/ingestion/type-extractors/jvm.d.ts +5 -0
- package/dist/core/ingestion/type-extractors/jvm.js +386 -0
- package/dist/core/ingestion/type-extractors/php.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/php.js +280 -0
- package/dist/core/ingestion/type-extractors/python.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/python.js +175 -0
- package/dist/core/ingestion/type-extractors/ruby.d.ts +12 -0
- package/dist/core/ingestion/type-extractors/ruby.js +218 -0
- package/dist/core/ingestion/type-extractors/rust.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/rust.js +290 -0
- package/dist/core/ingestion/type-extractors/shared.d.ts +81 -0
- package/dist/core/ingestion/type-extractors/shared.js +322 -0
- package/dist/core/ingestion/type-extractors/swift.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/swift.js +140 -0
- package/dist/core/ingestion/type-extractors/types.d.ts +111 -0
- package/dist/core/ingestion/type-extractors/types.js +4 -0
- package/dist/core/ingestion/type-extractors/typescript.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/typescript.js +227 -0
- package/dist/core/ingestion/utils.d.ts +73 -0
- package/dist/core/ingestion/utils.js +992 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +99 -0
- package/dist/core/ingestion/workers/parse-worker.js +1055 -0
- package/dist/core/ingestion/workers/worker-pool.d.ts +15 -0
- package/dist/core/ingestion/workers/worker-pool.js +123 -0
- package/dist/core/lbug/csv-generator.d.ts +28 -0
- package/dist/core/lbug/csv-generator.js +355 -0
- package/dist/core/lbug/lbug-adapter.d.ts +96 -0
- package/dist/core/lbug/lbug-adapter.js +753 -0
- package/dist/core/lbug/schema.d.ts +46 -0
- package/dist/core/lbug/schema.js +402 -0
- package/dist/core/search/bm25-index.d.ts +20 -0
- package/dist/core/search/bm25-index.js +123 -0
- package/dist/core/search/hybrid-search.d.ts +32 -0
- package/dist/core/search/hybrid-search.js +131 -0
- package/dist/core/search/query-cache.d.ts +18 -0
- package/dist/core/search/query-cache.js +47 -0
- package/dist/core/search/query-expansion.d.ts +19 -0
- package/dist/core/search/query-expansion.js +75 -0
- package/dist/core/search/reranker.d.ts +29 -0
- package/dist/core/search/reranker.js +122 -0
- package/dist/core/search/types.d.ts +154 -0
- package/dist/core/search/types.js +51 -0
- package/dist/core/semantic/tsgo-service.d.ts +67 -0
- package/dist/core/semantic/tsgo-service.js +355 -0
- package/dist/core/tree-sitter/parser-loader.d.ts +12 -0
- package/dist/core/tree-sitter/parser-loader.js +71 -0
- package/dist/lib/memory-guard.d.ts +35 -0
- package/dist/lib/memory-guard.js +70 -0
- package/dist/lib/utils.d.ts +3 -0
- package/dist/lib/utils.js +6 -0
- package/dist/mcp/compatible-stdio-transport.d.ts +32 -0
- package/dist/mcp/compatible-stdio-transport.js +209 -0
- package/dist/mcp/core/embedder.d.ts +24 -0
- package/dist/mcp/core/embedder.js +168 -0
- package/dist/mcp/core/lbug-adapter.d.ts +29 -0
- package/dist/mcp/core/lbug-adapter.js +330 -0
- package/dist/mcp/local/local-backend.d.ts +188 -0
- package/dist/mcp/local/local-backend.js +2759 -0
- package/dist/mcp/resources.d.ts +22 -0
- package/dist/mcp/resources.js +379 -0
- package/dist/mcp/server.d.ts +10 -0
- package/dist/mcp/server.js +217 -0
- package/dist/mcp/staleness.d.ts +10 -0
- package/dist/mcp/staleness.js +25 -0
- package/dist/mcp/tools.d.ts +21 -0
- package/dist/mcp/tools.js +202 -0
- package/dist/server/api.d.ts +5 -0
- package/dist/server/api.js +340 -0
- package/dist/server/mcp-http.d.ts +7 -0
- package/dist/server/mcp-http.js +95 -0
- package/dist/storage/git.d.ts +6 -0
- package/dist/storage/git.js +35 -0
- package/dist/storage/repo-manager.d.ts +87 -0
- package/dist/storage/repo-manager.js +249 -0
- package/dist/types/pipeline.d.ts +35 -0
- package/dist/types/pipeline.js +20 -0
- package/hooks/claude/code-mapper-hook.cjs +238 -0
- package/hooks/claude/pre-tool-use.sh +79 -0
- package/hooks/claude/session-start.sh +42 -0
- package/models/mlx-embedder.py +185 -0
- package/package.json +100 -0
- package/scripts/patch-tree-sitter-swift.cjs +74 -0
- package/vendor/leiden/index.cjs +355 -0
- package/vendor/leiden/utils.cjs +392 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
// code-mapper/src/core/search/hybrid-search.ts
|
|
2
|
+
/**
|
|
3
|
+
* @file hybrid-search.ts
|
|
4
|
+
* @description Combines BM25 (keyword) and semantic (embedding) search using
|
|
5
|
+
* weighted Reciprocal Rank Fusion (RRF) to merge rankings without score normalization
|
|
6
|
+
*
|
|
7
|
+
* All constants come from types.ts (single source of truth)
|
|
8
|
+
* This is the ONLY RRF implementation — local-backend.ts must import this, never reimplement
|
|
9
|
+
*/
|
|
10
|
+
import { searchFTSFromLbug } from './bm25-index.js';
|
|
11
|
+
import { DEFAULT_RRF_CONFIG, } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Merge BM25 and semantic results using weighted Reciprocal Rank Fusion
|
|
14
|
+
*
|
|
15
|
+
* Formula: rrfScore = weight * (1 / (k + rank))
|
|
16
|
+
* Results found by both methods get both scores summed (consensus boost)
|
|
17
|
+
*
|
|
18
|
+
* @param bm25Results - BM25 keyword search results (pre-sorted by score desc)
|
|
19
|
+
* @param semanticResults - Semantic embedding search results (pre-sorted by distance asc)
|
|
20
|
+
* @param config - RRF configuration (weights, k, limit)
|
|
21
|
+
*/
|
|
22
|
+
export function mergeWithRRF(bm25Results, semanticResults, config = {}) {
|
|
23
|
+
const { bm25Weight, semanticWeight, k, limit } = { ...DEFAULT_RRF_CONFIG, ...config };
|
|
24
|
+
// Mutable accumulator for building results
|
|
25
|
+
const merged = new Map();
|
|
26
|
+
// Score BM25 results — key by nodeId for symbol-level merging
|
|
27
|
+
for (let i = 0; i < bm25Results.length; i++) {
|
|
28
|
+
const r = bm25Results[i];
|
|
29
|
+
const rrfScore = bm25Weight * (1 / (k + i + 1)); // rank is 1-indexed
|
|
30
|
+
const key = r.nodeId || r.filePath; // symbol-level key when available
|
|
31
|
+
merged.set(key, {
|
|
32
|
+
filePath: r.filePath,
|
|
33
|
+
score: rrfScore,
|
|
34
|
+
sources: ['bm25'],
|
|
35
|
+
bm25Score: r.score,
|
|
36
|
+
nodeId: r.nodeId,
|
|
37
|
+
name: r.name,
|
|
38
|
+
label: r.type,
|
|
39
|
+
startLine: r.startLine,
|
|
40
|
+
endLine: r.endLine,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
// Score and merge semantic results — same symbol-level keying
|
|
44
|
+
for (let i = 0; i < semanticResults.length; i++) {
|
|
45
|
+
const r = semanticResults[i];
|
|
46
|
+
const rrfScore = semanticWeight * (1 / (k + i + 1));
|
|
47
|
+
const key = r.nodeId || r.filePath;
|
|
48
|
+
const existing = merged.get(key);
|
|
49
|
+
if (existing) {
|
|
50
|
+
// Same symbol found by both methods — consensus boost
|
|
51
|
+
existing.score += rrfScore;
|
|
52
|
+
existing.sources.push('semantic');
|
|
53
|
+
existing.semanticScore = 1 - r.distance;
|
|
54
|
+
// Prefer semantic metadata (has startLine/endLine from embedding table)
|
|
55
|
+
if (!existing.nodeId)
|
|
56
|
+
existing.nodeId = r.nodeId;
|
|
57
|
+
if (!existing.name)
|
|
58
|
+
existing.name = r.name;
|
|
59
|
+
if (!existing.label)
|
|
60
|
+
existing.label = r.label;
|
|
61
|
+
if (r.startLine != null)
|
|
62
|
+
existing.startLine = r.startLine;
|
|
63
|
+
if (r.endLine != null)
|
|
64
|
+
existing.endLine = r.endLine;
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
merged.set(key, {
|
|
68
|
+
filePath: r.filePath,
|
|
69
|
+
score: rrfScore,
|
|
70
|
+
sources: ['semantic'],
|
|
71
|
+
semanticScore: 1 - r.distance,
|
|
72
|
+
nodeId: r.nodeId,
|
|
73
|
+
name: r.name,
|
|
74
|
+
label: r.label,
|
|
75
|
+
startLine: r.startLine,
|
|
76
|
+
endLine: r.endLine,
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// Sort by combined RRF score descending, assign final ranks
|
|
81
|
+
const sorted = Array.from(merged.values())
|
|
82
|
+
.sort((a, b) => b.score - a.score)
|
|
83
|
+
.slice(0, limit);
|
|
84
|
+
return sorted.map((r, i) => ({
|
|
85
|
+
filePath: r.filePath,
|
|
86
|
+
score: r.score,
|
|
87
|
+
rank: i + 1,
|
|
88
|
+
sources: r.sources,
|
|
89
|
+
bm25Score: r.bm25Score,
|
|
90
|
+
semanticScore: r.semanticScore,
|
|
91
|
+
nodeId: r.nodeId,
|
|
92
|
+
name: r.name,
|
|
93
|
+
label: r.label,
|
|
94
|
+
startLine: r.startLine,
|
|
95
|
+
endLine: r.endLine,
|
|
96
|
+
}));
|
|
97
|
+
}
|
|
98
|
+
// Hybrid search convenience functions
|
|
99
|
+
/** Check if hybrid search is available (FTS always available when DB is open) */
|
|
100
|
+
export function isHybridSearchReady() {
|
|
101
|
+
return true;
|
|
102
|
+
}
|
|
103
|
+
/** Format hybrid results for LLM consumption */
|
|
104
|
+
export function formatHybridResults(results) {
|
|
105
|
+
if (results.length === 0)
|
|
106
|
+
return 'No results found.';
|
|
107
|
+
const formatted = results.map((r, i) => {
|
|
108
|
+
const sources = r.sources.join(' + ');
|
|
109
|
+
const location = r.startLine ? ` (lines ${r.startLine}-${r.endLine})` : '';
|
|
110
|
+
const label = r.label ? `${r.label}: ` : 'File: ';
|
|
111
|
+
const name = r.name || r.filePath.split('/').pop() || r.filePath;
|
|
112
|
+
return `[${i + 1}] ${label}${name}
|
|
113
|
+
File: ${r.filePath}${location}
|
|
114
|
+
Found by: ${sources}
|
|
115
|
+
Relevance: ${r.score.toFixed(4)}`;
|
|
116
|
+
});
|
|
117
|
+
return `Found ${results.length} results:\n\n${formatted.join('\n\n')}`;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Execute BM25 + semantic search and merge with weighted RRF
|
|
121
|
+
*
|
|
122
|
+
* Uses LadybugDB FTS for always-fresh BM25 results
|
|
123
|
+
* The semanticSearch function is injected to keep this module environment-agnostic
|
|
124
|
+
*/
|
|
125
|
+
export async function hybridSearch(query, limit, executeQuery, semanticSearchFn) {
|
|
126
|
+
const [bm25Results, semanticResults] = await Promise.all([
|
|
127
|
+
searchFTSFromLbug(query, limit),
|
|
128
|
+
semanticSearchFn(executeQuery, query, limit),
|
|
129
|
+
]);
|
|
130
|
+
return mergeWithRRF(bm25Results, semanticResults, { limit });
|
|
131
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file query-cache.ts
|
|
3
|
+
* @description LRU cache for embedding query vectors.
|
|
4
|
+
*
|
|
5
|
+
* Same query text always produces the same embedding vector.
|
|
6
|
+
* Caching avoids redundant model inference (~50-100ms per embed).
|
|
7
|
+
*/
|
|
8
|
+
export declare class QueryEmbeddingCache {
|
|
9
|
+
private readonly cache;
|
|
10
|
+
private readonly maxSize;
|
|
11
|
+
constructor(maxSize?: number);
|
|
12
|
+
get(query: string): readonly number[] | undefined;
|
|
13
|
+
set(query: string, embedding: readonly number[]): void;
|
|
14
|
+
get size(): number;
|
|
15
|
+
clear(): void;
|
|
16
|
+
}
|
|
17
|
+
/** Singleton cache for MCP server lifetime */
|
|
18
|
+
export declare const queryEmbeddingCache: QueryEmbeddingCache;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
// code-mapper/src/core/search/query-cache.ts
|
|
2
|
+
/**
|
|
3
|
+
* @file query-cache.ts
|
|
4
|
+
* @description LRU cache for embedding query vectors.
|
|
5
|
+
*
|
|
6
|
+
* Same query text always produces the same embedding vector.
|
|
7
|
+
* Caching avoids redundant model inference (~50-100ms per embed).
|
|
8
|
+
*/
|
|
9
|
+
const DEFAULT_MAX_SIZE = 128;
|
|
10
|
+
export class QueryEmbeddingCache {
|
|
11
|
+
cache = new Map();
|
|
12
|
+
maxSize;
|
|
13
|
+
constructor(maxSize = DEFAULT_MAX_SIZE) {
|
|
14
|
+
this.maxSize = maxSize;
|
|
15
|
+
}
|
|
16
|
+
get(query) {
|
|
17
|
+
const cached = this.cache.get(query);
|
|
18
|
+
if (cached !== undefined) {
|
|
19
|
+
// Move to end (most recently used) by re-inserting
|
|
20
|
+
this.cache.delete(query);
|
|
21
|
+
this.cache.set(query, cached);
|
|
22
|
+
}
|
|
23
|
+
return cached;
|
|
24
|
+
}
|
|
25
|
+
set(query, embedding) {
|
|
26
|
+
// If already exists, delete first to refresh order
|
|
27
|
+
if (this.cache.has(query)) {
|
|
28
|
+
this.cache.delete(query);
|
|
29
|
+
}
|
|
30
|
+
// Evict oldest if at capacity
|
|
31
|
+
if (this.cache.size >= this.maxSize) {
|
|
32
|
+
const oldest = this.cache.keys().next().value;
|
|
33
|
+
if (oldest !== undefined) {
|
|
34
|
+
this.cache.delete(oldest);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
this.cache.set(query, embedding);
|
|
38
|
+
}
|
|
39
|
+
get size() {
|
|
40
|
+
return this.cache.size;
|
|
41
|
+
}
|
|
42
|
+
clear() {
|
|
43
|
+
this.cache.clear();
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
/** Singleton cache for MCP server lifetime */
|
|
47
|
+
export const queryEmbeddingCache = new QueryEmbeddingCache();
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file query-expansion.ts
|
|
3
|
+
* @description Pseudo-Relevance Feedback (PRF) for BM25 query expansion.
|
|
4
|
+
*
|
|
5
|
+
* Takes top-k BM25 results, extracts high-value terms (function/class names),
|
|
6
|
+
* and appends them to the original query for a second search pass.
|
|
7
|
+
* This helps BM25 find synonyms and related terms it would otherwise miss
|
|
8
|
+
* (e.g. "auth" expands to include "validateToken", "session").
|
|
9
|
+
*
|
|
10
|
+
* Fully local, no external dependencies.
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Expand a query using pseudo-relevance feedback.
|
|
14
|
+
*
|
|
15
|
+
* @param originalQuery - The user's original search query
|
|
16
|
+
* @param topResultNames - Names of symbols from top BM25 results
|
|
17
|
+
* @returns Expanded query string with additional terms appended
|
|
18
|
+
*/
|
|
19
|
+
export declare function expandQuery(originalQuery: string, topResultNames: readonly string[]): string;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// code-mapper/src/core/search/query-expansion.ts
|
|
2
|
+
/**
|
|
3
|
+
* @file query-expansion.ts
|
|
4
|
+
* @description Pseudo-Relevance Feedback (PRF) for BM25 query expansion.
|
|
5
|
+
*
|
|
6
|
+
* Takes top-k BM25 results, extracts high-value terms (function/class names),
|
|
7
|
+
* and appends them to the original query for a second search pass.
|
|
8
|
+
* This helps BM25 find synonyms and related terms it would otherwise miss
|
|
9
|
+
* (e.g. "auth" expands to include "validateToken", "session").
|
|
10
|
+
*
|
|
11
|
+
* Fully local, no external dependencies.
|
|
12
|
+
*/
|
|
13
|
+
/** Number of top results to use for expansion */
|
|
14
|
+
const PRF_TOP_K = 3;
|
|
15
|
+
/** Max terms to add from PRF */
|
|
16
|
+
const PRF_MAX_TERMS = 5;
|
|
17
|
+
/** Minimum term length to consider */
|
|
18
|
+
const PRF_MIN_TERM_LENGTH = 3;
|
|
19
|
+
/** Common noise words to exclude from expansion */
|
|
20
|
+
const STOP_WORDS = new Set([
|
|
21
|
+
'function', 'class', 'method', 'interface', 'const', 'let', 'var',
|
|
22
|
+
'return', 'import', 'export', 'from', 'async', 'await', 'this',
|
|
23
|
+
'true', 'false', 'null', 'undefined', 'new', 'type', 'string',
|
|
24
|
+
'number', 'boolean', 'void', 'any', 'object', 'array', 'file',
|
|
25
|
+
'index', 'test', 'spec', 'module', 'default', 'private', 'public',
|
|
26
|
+
'static', 'readonly', 'abstract', 'extends', 'implements',
|
|
27
|
+
]);
|
|
28
|
+
/**
|
|
29
|
+
* Extract expansion terms from top BM25 results.
|
|
30
|
+
*
|
|
31
|
+
* Splits symbol names by camelCase/PascalCase/snake_case boundaries,
|
|
32
|
+
* deduplicates, and filters noise.
|
|
33
|
+
*/
|
|
34
|
+
function extractTerms(symbolNames) {
|
|
35
|
+
const termCounts = new Map();
|
|
36
|
+
for (const name of symbolNames) {
|
|
37
|
+
// Split camelCase/PascalCase: "validateUserToken" → ["validate", "user", "token"]
|
|
38
|
+
const parts = name
|
|
39
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
40
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
41
|
+
.replace(/[_\-./]/g, ' ')
|
|
42
|
+
.toLowerCase()
|
|
43
|
+
.split(/\s+/)
|
|
44
|
+
.filter(t => t.length >= PRF_MIN_TERM_LENGTH && !STOP_WORDS.has(t));
|
|
45
|
+
for (const part of parts) {
|
|
46
|
+
termCounts.set(part, (termCounts.get(part) ?? 0) + 1);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
// Sort by frequency, take top terms
|
|
50
|
+
return Array.from(termCounts.entries())
|
|
51
|
+
.sort((a, b) => b[1] - a[1])
|
|
52
|
+
.slice(0, PRF_MAX_TERMS)
|
|
53
|
+
.map(([term]) => term);
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Expand a query using pseudo-relevance feedback.
|
|
57
|
+
*
|
|
58
|
+
* @param originalQuery - The user's original search query
|
|
59
|
+
* @param topResultNames - Names of symbols from top BM25 results
|
|
60
|
+
* @returns Expanded query string with additional terms appended
|
|
61
|
+
*/
|
|
62
|
+
export function expandQuery(originalQuery, topResultNames) {
|
|
63
|
+
if (topResultNames.length === 0)
|
|
64
|
+
return originalQuery;
|
|
65
|
+
const topK = topResultNames.slice(0, PRF_TOP_K);
|
|
66
|
+
const expansionTerms = extractTerms(topK);
|
|
67
|
+
if (expansionTerms.length === 0)
|
|
68
|
+
return originalQuery;
|
|
69
|
+
// Filter out terms already in the query
|
|
70
|
+
const queryLower = originalQuery.toLowerCase();
|
|
71
|
+
const newTerms = expansionTerms.filter(t => !queryLower.includes(t));
|
|
72
|
+
if (newTerms.length === 0)
|
|
73
|
+
return originalQuery;
|
|
74
|
+
return `${originalQuery} ${newTerms.join(' ')}`;
|
|
75
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file reranker.ts
|
|
3
|
+
* @description Cross-encoder re-ranker for improving top-k precision.
|
|
4
|
+
*
|
|
5
|
+
* Uses bge-reranker-v2-m3 (ONNX) via transformers.js — fully local, no external APIs.
|
|
6
|
+
* Lazy-loaded on first use to avoid blocking MCP server startup.
|
|
7
|
+
*
|
|
8
|
+
* Pipeline: BM25 + semantic → RRF merge (top N) → re-rank (top K)
|
|
9
|
+
* The re-ranker scores (query, passage) pairs jointly, giving much higher precision
|
|
10
|
+
* than bi-encoder similarity for the final ranking.
|
|
11
|
+
*/
|
|
12
|
+
import type { RerankResult } from './types.js';
|
|
13
|
+
interface RerankInput {
|
|
14
|
+
readonly id: string;
|
|
15
|
+
readonly text: string;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Re-rank passages against a query using a cross-encoder model.
|
|
19
|
+
*
|
|
20
|
+
* @param query - The search query
|
|
21
|
+
* @param passages - Array of {id, text} to score against the query
|
|
22
|
+
* @returns Passages sorted by cross-encoder score (highest first)
|
|
23
|
+
*/
|
|
24
|
+
export declare function rerank(query: string, passages: readonly RerankInput[]): Promise<RerankResult[]>;
|
|
25
|
+
/** Check if the re-ranker model is loaded */
|
|
26
|
+
export declare function isRerankerReady(): boolean;
|
|
27
|
+
/** Dispose the re-ranker model */
|
|
28
|
+
export declare function disposeReranker(): Promise<void>;
|
|
29
|
+
export {};
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
// code-mapper/src/core/search/reranker.ts
|
|
2
|
+
/**
|
|
3
|
+
* @file reranker.ts
|
|
4
|
+
* @description Cross-encoder re-ranker for improving top-k precision.
|
|
5
|
+
*
|
|
6
|
+
* Uses bge-reranker-v2-m3 (ONNX) via transformers.js — fully local, no external APIs.
|
|
7
|
+
* Lazy-loaded on first use to avoid blocking MCP server startup.
|
|
8
|
+
*
|
|
9
|
+
* Pipeline: BM25 + semantic → RRF merge (top N) → re-rank (top K)
|
|
10
|
+
* The re-ranker scores (query, passage) pairs jointly, giving much higher precision
|
|
11
|
+
* than bi-encoder similarity for the final ranking.
|
|
12
|
+
*/
|
|
13
|
+
// ============================================================================
|
|
14
|
+
// MODEL CONFIGURATION
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// BGE reranker base — cross-encoder that understands code structure.
|
|
17
|
+
// 278M params, ONNX, runs on CPU via transformers.js.
|
|
18
|
+
// Tested: correctly ranks "initLbug" > "checkout" > "parseCode" > "README" for "database connection pool".
|
|
19
|
+
const RERANKER_MODEL_ID = 'Xenova/bge-reranker-base';
|
|
20
|
+
let rerankerState = null;
|
|
21
|
+
let initPromise = null;
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// INITIALIZATION
|
|
24
|
+
// ============================================================================
|
|
25
|
+
async function initReranker() {
|
|
26
|
+
if (rerankerState)
|
|
27
|
+
return rerankerState;
|
|
28
|
+
if (initPromise)
|
|
29
|
+
return initPromise;
|
|
30
|
+
initPromise = (async () => {
|
|
31
|
+
try {
|
|
32
|
+
console.error('Code Mapper: Loading re-ranker model (first use may take a moment)...');
|
|
33
|
+
const { AutoModelForSequenceClassification, AutoTokenizer } = await import('@huggingface/transformers');
|
|
34
|
+
// Silence stdout during model load (prevents ONNX output from corrupting MCP stdio)
|
|
35
|
+
const origStdout = process.stdout.write;
|
|
36
|
+
const origStderr = process.stderr.write;
|
|
37
|
+
process.stdout.write = (() => true);
|
|
38
|
+
process.stderr.write = (() => true);
|
|
39
|
+
let tokenizer;
|
|
40
|
+
let model;
|
|
41
|
+
try {
|
|
42
|
+
tokenizer = await AutoTokenizer.from_pretrained(RERANKER_MODEL_ID);
|
|
43
|
+
model = await AutoModelForSequenceClassification.from_pretrained(RERANKER_MODEL_ID);
|
|
44
|
+
}
|
|
45
|
+
finally {
|
|
46
|
+
process.stdout.write = origStdout;
|
|
47
|
+
process.stderr.write = origStderr;
|
|
48
|
+
}
|
|
49
|
+
console.error('Code Mapper: Re-ranker model loaded');
|
|
50
|
+
rerankerState = { model, tokenizer };
|
|
51
|
+
return rerankerState;
|
|
52
|
+
}
|
|
53
|
+
catch (error) {
|
|
54
|
+
initPromise = null;
|
|
55
|
+
rerankerState = null;
|
|
56
|
+
throw error;
|
|
57
|
+
}
|
|
58
|
+
})();
|
|
59
|
+
return initPromise;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Re-rank passages against a query using a cross-encoder model.
|
|
63
|
+
*
|
|
64
|
+
* @param query - The search query
|
|
65
|
+
* @param passages - Array of {id, text} to score against the query
|
|
66
|
+
* @returns Passages sorted by cross-encoder score (highest first)
|
|
67
|
+
*/
|
|
68
|
+
export async function rerank(query, passages) {
|
|
69
|
+
if (passages.length === 0)
|
|
70
|
+
return [];
|
|
71
|
+
if (passages.length === 1) {
|
|
72
|
+
return [{ id: passages[0].id, score: 1, originalRank: 1 }];
|
|
73
|
+
}
|
|
74
|
+
let state;
|
|
75
|
+
try {
|
|
76
|
+
state = await initReranker();
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
// If re-ranker fails to load, return original order with synthetic scores
|
|
80
|
+
console.error('Code Mapper: Re-ranker unavailable, using original ranking');
|
|
81
|
+
return passages.map((p, i) => ({
|
|
82
|
+
id: p.id,
|
|
83
|
+
score: passages.length - i,
|
|
84
|
+
originalRank: i + 1,
|
|
85
|
+
}));
|
|
86
|
+
}
|
|
87
|
+
const { tokenizer, model } = state;
|
|
88
|
+
const tok = tokenizer;
|
|
89
|
+
const mod = model;
|
|
90
|
+
// Tokenize all (query, passage) pairs
|
|
91
|
+
const queries = passages.map(() => query);
|
|
92
|
+
const texts = passages.map(p => p.text);
|
|
93
|
+
const inputs = tok.call
|
|
94
|
+
? await tokenizer(queries, { text_pair: texts, padding: true, truncation: true })
|
|
95
|
+
: await tok.__call__(queries, { text_pair: texts, padding: true, truncation: true });
|
|
96
|
+
const output = await model(inputs);
|
|
97
|
+
const logits = output.logits;
|
|
98
|
+
const scores = Array.from(logits.data);
|
|
99
|
+
// Pair with original info and sort by score descending
|
|
100
|
+
return passages
|
|
101
|
+
.map((p, i) => ({
|
|
102
|
+
id: p.id,
|
|
103
|
+
score: scores[i],
|
|
104
|
+
originalRank: i + 1,
|
|
105
|
+
}))
|
|
106
|
+
.sort((a, b) => b.score - a.score);
|
|
107
|
+
}
|
|
108
|
+
/** Check if the re-ranker model is loaded */
|
|
109
|
+
export function isRerankerReady() {
|
|
110
|
+
return rerankerState !== null;
|
|
111
|
+
}
|
|
112
|
+
/** Dispose the re-ranker model */
|
|
113
|
+
export async function disposeReranker() {
|
|
114
|
+
if (rerankerState) {
|
|
115
|
+
const model = rerankerState.model;
|
|
116
|
+
if (typeof model.dispose === 'function') {
|
|
117
|
+
await model.dispose();
|
|
118
|
+
}
|
|
119
|
+
rerankerState = null;
|
|
120
|
+
initPromise = null;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file types.ts
|
|
3
|
+
* @description Single source of truth for all search system types and constants.
|
|
4
|
+
*
|
|
5
|
+
* Every search-related type, constant, and configuration lives here.
|
|
6
|
+
* No other file should declare search constants — import from here.
|
|
7
|
+
* Compiler enforces this via branded types and exhaustive checks.
|
|
8
|
+
*/
|
|
9
|
+
/** RRF fusion constant. Higher = more weight to lower-ranked results. */
|
|
10
|
+
export declare const RRF_K: 60;
|
|
11
|
+
/** Default max cosine distance for semantic search — filters noise before RRF merge */
|
|
12
|
+
export declare const DEFAULT_MAX_SEMANTIC_DISTANCE: 0.85;
|
|
13
|
+
/** Default number of processes to return */
|
|
14
|
+
export declare const DEFAULT_PROCESS_LIMIT: 5;
|
|
15
|
+
/** Default max symbols per process */
|
|
16
|
+
export declare const DEFAULT_MAX_SYMBOLS_PER_PROCESS: 10;
|
|
17
|
+
/** Max standalone definitions to return (keep tight — File-type noise is filtered separately) */
|
|
18
|
+
export declare const DEFAULT_MAX_DEFINITIONS: 5;
|
|
19
|
+
/** Cohesion boost weight in process ranking (0-1). Higher = prefer tight functional units. */
|
|
20
|
+
export declare const COHESION_WEIGHT: 0.25;
|
|
21
|
+
/** Max candidates to pass to batch DB lookups (process/cluster/content) after reranking */
|
|
22
|
+
export declare const MAX_BATCH_CANDIDATES: 25;
|
|
23
|
+
/** BM25 PRF expansion threshold — expand when top score is weak OR results are sparse */
|
|
24
|
+
export declare const PRF_SPARSE_THRESHOLD: 5;
|
|
25
|
+
export declare const PRF_WEAK_SCORE_THRESHOLD: 1;
|
|
26
|
+
/**
|
|
27
|
+
* FTS-indexed node tables and their index names.
|
|
28
|
+
* Adding a table here automatically includes it in BM25 search.
|
|
29
|
+
* Removing one is a compile error wherever it's referenced.
|
|
30
|
+
*/
|
|
31
|
+
export declare const FTS_TABLES: readonly [{
|
|
32
|
+
readonly table: "File";
|
|
33
|
+
readonly index: "file_fts";
|
|
34
|
+
}, {
|
|
35
|
+
readonly table: "Function";
|
|
36
|
+
readonly index: "function_fts";
|
|
37
|
+
}, {
|
|
38
|
+
readonly table: "Class";
|
|
39
|
+
readonly index: "class_fts";
|
|
40
|
+
}, {
|
|
41
|
+
readonly table: "Method";
|
|
42
|
+
readonly index: "method_fts";
|
|
43
|
+
}, {
|
|
44
|
+
readonly table: "Interface";
|
|
45
|
+
readonly index: "interface_fts";
|
|
46
|
+
}];
|
|
47
|
+
export type FTSTableName = typeof FTS_TABLES[number]['table'];
|
|
48
|
+
export type FTSIndexName = typeof FTS_TABLES[number]['index'];
|
|
49
|
+
export type SearchSource = 'bm25' | 'semantic';
|
|
50
|
+
export interface BM25SearchResult {
|
|
51
|
+
readonly nodeId: string;
|
|
52
|
+
readonly name: string;
|
|
53
|
+
readonly type: string;
|
|
54
|
+
readonly filePath: string;
|
|
55
|
+
readonly score: number;
|
|
56
|
+
readonly rank: number;
|
|
57
|
+
readonly startLine?: number;
|
|
58
|
+
readonly endLine?: number;
|
|
59
|
+
}
|
|
60
|
+
export interface SemanticSearchResult {
|
|
61
|
+
readonly nodeId: string;
|
|
62
|
+
readonly name: string;
|
|
63
|
+
readonly label: string;
|
|
64
|
+
readonly filePath: string;
|
|
65
|
+
readonly distance: number;
|
|
66
|
+
readonly startLine?: number;
|
|
67
|
+
readonly endLine?: number;
|
|
68
|
+
}
|
|
69
|
+
export interface HybridSearchResult {
|
|
70
|
+
readonly filePath: string;
|
|
71
|
+
readonly score: number;
|
|
72
|
+
readonly rank: number;
|
|
73
|
+
readonly sources: readonly SearchSource[];
|
|
74
|
+
readonly nodeId?: string;
|
|
75
|
+
readonly name?: string;
|
|
76
|
+
readonly label?: string;
|
|
77
|
+
readonly startLine?: number;
|
|
78
|
+
readonly endLine?: number;
|
|
79
|
+
readonly bm25Score?: number;
|
|
80
|
+
readonly semanticScore?: number;
|
|
81
|
+
}
|
|
82
|
+
export interface SymbolEntry {
|
|
83
|
+
readonly id: string;
|
|
84
|
+
readonly name: string;
|
|
85
|
+
readonly type: string;
|
|
86
|
+
readonly filePath: string;
|
|
87
|
+
readonly startLine?: number;
|
|
88
|
+
readonly endLine?: number;
|
|
89
|
+
readonly module?: string;
|
|
90
|
+
readonly content?: string;
|
|
91
|
+
}
|
|
92
|
+
export interface ProcessSymbolEntry extends SymbolEntry {
|
|
93
|
+
readonly process_id: string;
|
|
94
|
+
readonly step_index: number;
|
|
95
|
+
}
|
|
96
|
+
export interface ProcessEntry {
|
|
97
|
+
readonly id: string;
|
|
98
|
+
readonly label: string;
|
|
99
|
+
readonly heuristicLabel: string;
|
|
100
|
+
readonly processType: string;
|
|
101
|
+
readonly stepCount: number;
|
|
102
|
+
readonly totalScore: number;
|
|
103
|
+
readonly cohesionBoost: number;
|
|
104
|
+
readonly symbols: ProcessSymbolEntry[];
|
|
105
|
+
}
|
|
106
|
+
export interface QueryResponse {
|
|
107
|
+
readonly processes: ReadonlyArray<{
|
|
108
|
+
readonly id: string;
|
|
109
|
+
readonly summary: string;
|
|
110
|
+
readonly priority: number;
|
|
111
|
+
readonly symbol_count: number;
|
|
112
|
+
readonly process_type: string;
|
|
113
|
+
readonly step_count: number;
|
|
114
|
+
}>;
|
|
115
|
+
readonly process_symbols: readonly ProcessSymbolEntry[];
|
|
116
|
+
readonly definitions: readonly SymbolEntry[];
|
|
117
|
+
}
|
|
118
|
+
export interface QueryParams {
|
|
119
|
+
readonly query: string;
|
|
120
|
+
readonly task_context?: string;
|
|
121
|
+
readonly goal?: string;
|
|
122
|
+
readonly limit?: number;
|
|
123
|
+
readonly max_symbols?: number;
|
|
124
|
+
readonly include_content?: boolean;
|
|
125
|
+
}
|
|
126
|
+
export interface RRFConfig {
|
|
127
|
+
/** Weight for BM25 results (0-1). Default 0.6. */
|
|
128
|
+
readonly bm25Weight: number;
|
|
129
|
+
/** Weight for semantic results (0-1). Default 0.4. */
|
|
130
|
+
readonly semanticWeight: number;
|
|
131
|
+
/** RRF constant K. Default 60. */
|
|
132
|
+
readonly k: number;
|
|
133
|
+
/** Max results to return. */
|
|
134
|
+
readonly limit: number;
|
|
135
|
+
}
|
|
136
|
+
export declare const DEFAULT_RRF_CONFIG: RRFConfig;
|
|
137
|
+
export interface RerankResult {
|
|
138
|
+
readonly id: string;
|
|
139
|
+
readonly score: number;
|
|
140
|
+
readonly originalRank: number;
|
|
141
|
+
}
|
|
142
|
+
/** Execute a raw Cypher query and return rows */
|
|
143
|
+
export type CypherExecutor = (cypher: string) => Promise<readonly Record<string, unknown>[]>;
|
|
144
|
+
/** Execute a parameterized Cypher query */
|
|
145
|
+
export type ParameterizedCypherExecutor = (repoId: string, cypher: string, params: Record<string, unknown>) => Promise<readonly Record<string, unknown>[]>;
|
|
146
|
+
export interface SearchPipelineItem {
|
|
147
|
+
readonly nodeId?: string;
|
|
148
|
+
readonly name: string;
|
|
149
|
+
readonly type: string;
|
|
150
|
+
readonly filePath: string;
|
|
151
|
+
readonly startLine?: number;
|
|
152
|
+
readonly endLine?: number;
|
|
153
|
+
readonly score: number;
|
|
154
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// code-mapper/src/core/search/types.ts
|
|
2
|
+
/**
|
|
3
|
+
* @file types.ts
|
|
4
|
+
* @description Single source of truth for all search system types and constants.
|
|
5
|
+
*
|
|
6
|
+
* Every search-related type, constant, and configuration lives here.
|
|
7
|
+
* No other file should declare search constants — import from here.
|
|
8
|
+
* Compiler enforces this via branded types and exhaustive checks.
|
|
9
|
+
*/
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// SEARCH CONFIGURATION — single source of truth
|
|
12
|
+
// ============================================================================
|
|
13
|
+
/** RRF fusion constant. Higher = more weight to lower-ranked results. */
|
|
14
|
+
export const RRF_K = 60;
|
|
15
|
+
/** Default max cosine distance for semantic search — filters noise before RRF merge */
|
|
16
|
+
export const DEFAULT_MAX_SEMANTIC_DISTANCE = 0.85;
|
|
17
|
+
/** Default number of processes to return */
|
|
18
|
+
export const DEFAULT_PROCESS_LIMIT = 5;
|
|
19
|
+
/** Default max symbols per process */
|
|
20
|
+
export const DEFAULT_MAX_SYMBOLS_PER_PROCESS = 10;
|
|
21
|
+
/** Max standalone definitions to return (keep tight — File-type noise is filtered separately) */
|
|
22
|
+
export const DEFAULT_MAX_DEFINITIONS = 5;
|
|
23
|
+
/** Cohesion boost weight in process ranking (0-1). Higher = prefer tight functional units. */
|
|
24
|
+
export const COHESION_WEIGHT = 0.25;
|
|
25
|
+
/** Max candidates to pass to batch DB lookups (process/cluster/content) after reranking */
|
|
26
|
+
export const MAX_BATCH_CANDIDATES = 25;
|
|
27
|
+
// RERANKER_BLEND_WEIGHT removed — C4 simplified to alpha blending in local-backend.ts
|
|
28
|
+
/** BM25 PRF expansion threshold — expand when top score is weak OR results are sparse */
|
|
29
|
+
export const PRF_SPARSE_THRESHOLD = 5;
|
|
30
|
+
export const PRF_WEAK_SCORE_THRESHOLD = 1.0;
|
|
31
|
+
// ============================================================================
|
|
32
|
+
// FTS TABLE REGISTRY — exhaustive, compiler-checked
|
|
33
|
+
// ============================================================================
|
|
34
|
+
/**
|
|
35
|
+
* FTS-indexed node tables and their index names.
|
|
36
|
+
* Adding a table here automatically includes it in BM25 search.
|
|
37
|
+
* Removing one is a compile error wherever it's referenced.
|
|
38
|
+
*/
|
|
39
|
+
export const FTS_TABLES = [
|
|
40
|
+
{ table: 'File', index: 'file_fts' },
|
|
41
|
+
{ table: 'Function', index: 'function_fts' },
|
|
42
|
+
{ table: 'Class', index: 'class_fts' },
|
|
43
|
+
{ table: 'Method', index: 'method_fts' },
|
|
44
|
+
{ table: 'Interface', index: 'interface_fts' },
|
|
45
|
+
];
|
|
46
|
+
export const DEFAULT_RRF_CONFIG = {
|
|
47
|
+
bm25Weight: 0.6,
|
|
48
|
+
semanticWeight: 0.4,
|
|
49
|
+
k: RRF_K,
|
|
50
|
+
limit: 10,
|
|
51
|
+
};
|