viberag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +219 -0
- package/dist/cli/__tests__/mcp-setup.test.d.ts +6 -0
- package/dist/cli/__tests__/mcp-setup.test.js +597 -0
- package/dist/cli/app.d.ts +2 -0
- package/dist/cli/app.js +238 -0
- package/dist/cli/commands/handlers.d.ts +57 -0
- package/dist/cli/commands/handlers.js +231 -0
- package/dist/cli/commands/index.d.ts +2 -0
- package/dist/cli/commands/index.js +2 -0
- package/dist/cli/commands/mcp-setup.d.ts +107 -0
- package/dist/cli/commands/mcp-setup.js +509 -0
- package/dist/cli/commands/useRagCommands.d.ts +23 -0
- package/dist/cli/commands/useRagCommands.js +180 -0
- package/dist/cli/components/CleanWizard.d.ts +17 -0
- package/dist/cli/components/CleanWizard.js +169 -0
- package/dist/cli/components/InitWizard.d.ts +20 -0
- package/dist/cli/components/InitWizard.js +370 -0
- package/dist/cli/components/McpSetupWizard.d.ts +37 -0
- package/dist/cli/components/McpSetupWizard.js +387 -0
- package/dist/cli/components/SearchResultsDisplay.d.ts +13 -0
- package/dist/cli/components/SearchResultsDisplay.js +130 -0
- package/dist/cli/components/WelcomeBanner.d.ts +10 -0
- package/dist/cli/components/WelcomeBanner.js +26 -0
- package/dist/cli/components/index.d.ts +1 -0
- package/dist/cli/components/index.js +1 -0
- package/dist/cli/data/mcp-editors.d.ts +80 -0
- package/dist/cli/data/mcp-editors.js +270 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +26 -0
- package/dist/cli-bundle.cjs +5269 -0
- package/dist/common/commands/terminalSetup.d.ts +2 -0
- package/dist/common/commands/terminalSetup.js +144 -0
- package/dist/common/components/CommandSuggestions.d.ts +9 -0
- package/dist/common/components/CommandSuggestions.js +20 -0
- package/dist/common/components/StaticWithResize.d.ts +23 -0
- package/dist/common/components/StaticWithResize.js +62 -0
- package/dist/common/components/StatusBar.d.ts +8 -0
- package/dist/common/components/StatusBar.js +64 -0
- package/dist/common/components/TextInput.d.ts +12 -0
- package/dist/common/components/TextInput.js +239 -0
- package/dist/common/components/index.d.ts +3 -0
- package/dist/common/components/index.js +3 -0
- package/dist/common/hooks/index.d.ts +4 -0
- package/dist/common/hooks/index.js +4 -0
- package/dist/common/hooks/useCommandHistory.d.ts +7 -0
- package/dist/common/hooks/useCommandHistory.js +51 -0
- package/dist/common/hooks/useCtrlC.d.ts +9 -0
- package/dist/common/hooks/useCtrlC.js +40 -0
- package/dist/common/hooks/useKittyKeyboard.d.ts +10 -0
- package/dist/common/hooks/useKittyKeyboard.js +26 -0
- package/dist/common/hooks/useStaticOutputBuffer.d.ts +31 -0
- package/dist/common/hooks/useStaticOutputBuffer.js +58 -0
- package/dist/common/hooks/useTerminalResize.d.ts +28 -0
- package/dist/common/hooks/useTerminalResize.js +51 -0
- package/dist/common/hooks/useTextBuffer.d.ts +13 -0
- package/dist/common/hooks/useTextBuffer.js +165 -0
- package/dist/common/index.d.ts +13 -0
- package/dist/common/index.js +17 -0
- package/dist/common/types.d.ts +162 -0
- package/dist/common/types.js +1 -0
- package/dist/mcp/index.d.ts +12 -0
- package/dist/mcp/index.js +66 -0
- package/dist/mcp/server.d.ts +25 -0
- package/dist/mcp/server.js +837 -0
- package/dist/mcp/watcher.d.ts +86 -0
- package/dist/mcp/watcher.js +334 -0
- package/dist/rag/__tests__/grammar-smoke.test.d.ts +9 -0
- package/dist/rag/__tests__/grammar-smoke.test.js +161 -0
- package/dist/rag/__tests__/helpers.d.ts +30 -0
- package/dist/rag/__tests__/helpers.js +67 -0
- package/dist/rag/__tests__/merkle.test.d.ts +5 -0
- package/dist/rag/__tests__/merkle.test.js +161 -0
- package/dist/rag/__tests__/metadata-extraction.test.d.ts +10 -0
- package/dist/rag/__tests__/metadata-extraction.test.js +202 -0
- package/dist/rag/__tests__/multi-language.test.d.ts +13 -0
- package/dist/rag/__tests__/multi-language.test.js +535 -0
- package/dist/rag/__tests__/rag.test.d.ts +10 -0
- package/dist/rag/__tests__/rag.test.js +311 -0
- package/dist/rag/__tests__/search-exhaustive.test.d.ts +9 -0
- package/dist/rag/__tests__/search-exhaustive.test.js +87 -0
- package/dist/rag/__tests__/search-filters.test.d.ts +10 -0
- package/dist/rag/__tests__/search-filters.test.js +250 -0
- package/dist/rag/__tests__/search-modes.test.d.ts +8 -0
- package/dist/rag/__tests__/search-modes.test.js +133 -0
- package/dist/rag/config/index.d.ts +61 -0
- package/dist/rag/config/index.js +111 -0
- package/dist/rag/constants.d.ts +41 -0
- package/dist/rag/constants.js +57 -0
- package/dist/rag/embeddings/fastembed.d.ts +62 -0
- package/dist/rag/embeddings/fastembed.js +124 -0
- package/dist/rag/embeddings/gemini.d.ts +26 -0
- package/dist/rag/embeddings/gemini.js +116 -0
- package/dist/rag/embeddings/index.d.ts +10 -0
- package/dist/rag/embeddings/index.js +9 -0
- package/dist/rag/embeddings/local-4b.d.ts +28 -0
- package/dist/rag/embeddings/local-4b.js +51 -0
- package/dist/rag/embeddings/local.d.ts +29 -0
- package/dist/rag/embeddings/local.js +119 -0
- package/dist/rag/embeddings/mistral.d.ts +22 -0
- package/dist/rag/embeddings/mistral.js +85 -0
- package/dist/rag/embeddings/openai.d.ts +22 -0
- package/dist/rag/embeddings/openai.js +85 -0
- package/dist/rag/embeddings/types.d.ts +37 -0
- package/dist/rag/embeddings/types.js +1 -0
- package/dist/rag/gitignore/index.d.ts +57 -0
- package/dist/rag/gitignore/index.js +178 -0
- package/dist/rag/index.d.ts +15 -0
- package/dist/rag/index.js +25 -0
- package/dist/rag/indexer/chunker.d.ts +129 -0
- package/dist/rag/indexer/chunker.js +1352 -0
- package/dist/rag/indexer/index.d.ts +6 -0
- package/dist/rag/indexer/index.js +6 -0
- package/dist/rag/indexer/indexer.d.ts +73 -0
- package/dist/rag/indexer/indexer.js +356 -0
- package/dist/rag/indexer/types.d.ts +68 -0
- package/dist/rag/indexer/types.js +47 -0
- package/dist/rag/logger/index.d.ts +20 -0
- package/dist/rag/logger/index.js +75 -0
- package/dist/rag/manifest/index.d.ts +50 -0
- package/dist/rag/manifest/index.js +97 -0
- package/dist/rag/merkle/diff.d.ts +26 -0
- package/dist/rag/merkle/diff.js +95 -0
- package/dist/rag/merkle/hash.d.ts +34 -0
- package/dist/rag/merkle/hash.js +165 -0
- package/dist/rag/merkle/index.d.ts +68 -0
- package/dist/rag/merkle/index.js +298 -0
- package/dist/rag/merkle/node.d.ts +51 -0
- package/dist/rag/merkle/node.js +69 -0
- package/dist/rag/search/filters.d.ts +21 -0
- package/dist/rag/search/filters.js +100 -0
- package/dist/rag/search/fts.d.ts +32 -0
- package/dist/rag/search/fts.js +61 -0
- package/dist/rag/search/hybrid.d.ts +17 -0
- package/dist/rag/search/hybrid.js +58 -0
- package/dist/rag/search/index.d.ts +89 -0
- package/dist/rag/search/index.js +367 -0
- package/dist/rag/search/types.d.ts +130 -0
- package/dist/rag/search/types.js +4 -0
- package/dist/rag/search/vector.d.ts +25 -0
- package/dist/rag/search/vector.js +44 -0
- package/dist/rag/storage/index.d.ts +92 -0
- package/dist/rag/storage/index.js +287 -0
- package/dist/rag/storage/lancedb-native.d.ts +7 -0
- package/dist/rag/storage/lancedb-native.js +10 -0
- package/dist/rag/storage/schema.d.ts +23 -0
- package/dist/rag/storage/schema.js +50 -0
- package/dist/rag/storage/types.d.ts +100 -0
- package/dist/rag/storage/types.js +68 -0
- package/package.json +67 -0
- package/scripts/check-node-version.js +37 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid search combining vector and FTS with RRF reranking.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Reciprocal Rank Fusion (RRF) constant.
|
|
6
|
+
* Higher values give more weight to lower-ranked results.
|
|
7
|
+
*/
|
|
8
|
+
const RRF_K = 60;
|
|
9
|
+
/**
|
|
10
|
+
* Combine vector and FTS results using Reciprocal Rank Fusion.
|
|
11
|
+
*
|
|
12
|
+
* RRF formula: score = sum(1 / (k + rank))
|
|
13
|
+
* where k is a constant (typically 60) and rank is 1-indexed.
|
|
14
|
+
*
|
|
15
|
+
* @param vectorResults - Results from vector search
|
|
16
|
+
* @param ftsResults - Results from FTS search
|
|
17
|
+
* @param limit - Maximum number of results to return
|
|
18
|
+
* @param vectorWeight - Weight for vector results (0.0-1.0, default 0.7)
|
|
19
|
+
* @returns Combined and reranked results
|
|
20
|
+
*/
|
|
21
|
+
export function hybridRerank(vectorResults, ftsResults, limit, vectorWeight = 0.7) {
|
|
22
|
+
const ftsWeight = 1 - vectorWeight;
|
|
23
|
+
const scores = new Map();
|
|
24
|
+
const resultMap = new Map();
|
|
25
|
+
const vectorScores = new Map();
|
|
26
|
+
const ftsScoresMap = new Map();
|
|
27
|
+
// Score from vector results
|
|
28
|
+
vectorResults.forEach((result, rank) => {
|
|
29
|
+
const rrfScore = vectorWeight * (1 / (RRF_K + rank + 1));
|
|
30
|
+
scores.set(result.id, (scores.get(result.id) ?? 0) + rrfScore);
|
|
31
|
+
resultMap.set(result.id, result);
|
|
32
|
+
vectorScores.set(result.id, result.vectorScore ?? result.score);
|
|
33
|
+
});
|
|
34
|
+
// Score from FTS results
|
|
35
|
+
ftsResults.forEach((result, rank) => {
|
|
36
|
+
const rrfScore = ftsWeight * (1 / (RRF_K + rank + 1));
|
|
37
|
+
scores.set(result.id, (scores.get(result.id) ?? 0) + rrfScore);
|
|
38
|
+
ftsScoresMap.set(result.id, result.ftsScore ?? result.score);
|
|
39
|
+
// Keep the result with more info (prefer vector result if exists)
|
|
40
|
+
if (!resultMap.has(result.id)) {
|
|
41
|
+
resultMap.set(result.id, result);
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
// Sort by combined RRF score and take top results
|
|
45
|
+
const sortedIds = [...scores.entries()]
|
|
46
|
+
.sort((a, b) => b[1] - a[1])
|
|
47
|
+
.slice(0, limit)
|
|
48
|
+
.map(([id]) => id);
|
|
49
|
+
return sortedIds.map(id => {
|
|
50
|
+
const result = resultMap.get(id);
|
|
51
|
+
return {
|
|
52
|
+
...result,
|
|
53
|
+
score: scores.get(id),
|
|
54
|
+
vectorScore: vectorScores.get(id),
|
|
55
|
+
ftsScore: ftsScoresMap.get(id),
|
|
56
|
+
};
|
|
57
|
+
});
|
|
58
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search module for code search.
|
|
3
|
+
*
|
|
4
|
+
* Supports multiple search modes:
|
|
5
|
+
* - semantic: Dense vector search for conceptual queries
|
|
6
|
+
* - exact: BM25/FTS for symbol names and exact matches
|
|
7
|
+
* - hybrid: Combined vector + BM25 with RRF (default)
|
|
8
|
+
* - definition: Direct metadata lookup for symbol definitions
|
|
9
|
+
* - similar: Vector search with code snippet as query
|
|
10
|
+
*/
|
|
11
|
+
import type { Logger } from '../logger/index.js';
|
|
12
|
+
import type { SearchOptions, SearchResults } from './types.js';
|
|
13
|
+
export type { SearchDebugInfo, SearchFilters, SearchMode, SearchOptions, SearchResult, SearchResults, } from './types.js';
|
|
14
|
+
export { vectorSearch } from './vector.js';
|
|
15
|
+
export { ftsSearch, ensureFtsIndex } from './fts.js';
|
|
16
|
+
export { hybridRerank } from './hybrid.js';
|
|
17
|
+
/**
|
|
18
|
+
* Search engine for code search.
|
|
19
|
+
* Supports vector, FTS, hybrid, definition, and similar search modes.
|
|
20
|
+
*/
|
|
21
|
+
export declare class SearchEngine {
|
|
22
|
+
private readonly projectRoot;
|
|
23
|
+
private storage;
|
|
24
|
+
private embeddings;
|
|
25
|
+
private logger;
|
|
26
|
+
private initialized;
|
|
27
|
+
constructor(projectRoot: string, logger?: Logger);
|
|
28
|
+
/**
|
|
29
|
+
* Primary search method. Dispatches to appropriate search mode.
|
|
30
|
+
*/
|
|
31
|
+
search(query: string, options?: SearchOptions): Promise<SearchResults>;
|
|
32
|
+
/**
|
|
33
|
+
* Semantic search: Dense vector search only.
|
|
34
|
+
* Best for conceptual queries like "how does auth work?"
|
|
35
|
+
*/
|
|
36
|
+
private searchSemantic;
|
|
37
|
+
/**
|
|
38
|
+
* Exact search: BM25/FTS only.
|
|
39
|
+
* Best for symbol names and exact string matches.
|
|
40
|
+
*/
|
|
41
|
+
private searchExact;
|
|
42
|
+
/**
|
|
43
|
+
* Hybrid search: Vector + BM25 with RRF reranking.
|
|
44
|
+
* Good general-purpose search.
|
|
45
|
+
*
|
|
46
|
+
* @param autoBoost - When true, increase BM25 weight and oversample if vector scores are low
|
|
47
|
+
* @param autoBoostThreshold - Vector score threshold below which auto-boost activates
|
|
48
|
+
* @param returnDebug - Include debug info in results for AI evaluation
|
|
49
|
+
*/
|
|
50
|
+
private searchHybrid;
|
|
51
|
+
/**
|
|
52
|
+
* Definition search: Direct metadata lookup.
|
|
53
|
+
* Best for "where is X defined?" queries.
|
|
54
|
+
*/
|
|
55
|
+
private searchDefinition;
|
|
56
|
+
/**
|
|
57
|
+
* Similar search: Vector search with code snippet as query.
|
|
58
|
+
* Best for "find code like this" queries.
|
|
59
|
+
*/
|
|
60
|
+
private searchSimilar;
|
|
61
|
+
/**
|
|
62
|
+
* Perform vector-only search. (Legacy method)
|
|
63
|
+
*/
|
|
64
|
+
searchVector(query: string, limit?: number): Promise<SearchResults>;
|
|
65
|
+
/**
|
|
66
|
+
* Perform FTS-only search. (Legacy method)
|
|
67
|
+
*/
|
|
68
|
+
searchFts(query: string, limit?: number): Promise<SearchResults>;
|
|
69
|
+
/**
|
|
70
|
+
* Initialize the search engine.
|
|
71
|
+
*/
|
|
72
|
+
private ensureInitialized;
|
|
73
|
+
/**
|
|
74
|
+
* Create the appropriate embedding provider based on config.
|
|
75
|
+
*/
|
|
76
|
+
private createEmbeddingProvider;
|
|
77
|
+
/**
|
|
78
|
+
* Get the code chunks table.
|
|
79
|
+
*/
|
|
80
|
+
private getTable;
|
|
81
|
+
/**
|
|
82
|
+
* Log a message.
|
|
83
|
+
*/
|
|
84
|
+
private log;
|
|
85
|
+
/**
|
|
86
|
+
* Close the search engine and free resources.
|
|
87
|
+
*/
|
|
88
|
+
close(): void;
|
|
89
|
+
}
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search module for code search.
|
|
3
|
+
*
|
|
4
|
+
* Supports multiple search modes:
|
|
5
|
+
* - semantic: Dense vector search for conceptual queries
|
|
6
|
+
* - exact: BM25/FTS for symbol names and exact matches
|
|
7
|
+
* - hybrid: Combined vector + BM25 with RRF (default)
|
|
8
|
+
* - definition: Direct metadata lookup for symbol definitions
|
|
9
|
+
* - similar: Vector search with code snippet as query
|
|
10
|
+
*/
|
|
11
|
+
import { loadConfig } from '../config/index.js';
|
|
12
|
+
import { GeminiEmbeddingProvider, Local4BEmbeddingProvider, LocalEmbeddingProvider, MistralEmbeddingProvider, OpenAIEmbeddingProvider, } from '../embeddings/index.js';
|
|
13
|
+
import { Storage } from '../storage/index.js';
|
|
14
|
+
import { buildDefinitionFilter, buildFilterClause } from './filters.js';
|
|
15
|
+
import { ftsSearch } from './fts.js';
|
|
16
|
+
import { hybridRerank } from './hybrid.js';
|
|
17
|
+
import { vectorSearch } from './vector.js';
|
|
18
|
+
export { vectorSearch } from './vector.js';
|
|
19
|
+
export { ftsSearch, ensureFtsIndex } from './fts.js';
|
|
20
|
+
export { hybridRerank } from './hybrid.js';
|
|
21
|
+
/** Default search limit */
|
|
22
|
+
const DEFAULT_LIMIT = 10;
|
|
23
|
+
/** Exhaustive mode limit (high but bounded) */
|
|
24
|
+
const EXHAUSTIVE_LIMIT = 500;
|
|
25
|
+
/** Default BM25 weight for hybrid search */
|
|
26
|
+
const DEFAULT_BM25_WEIGHT = 0.3;
|
|
27
|
+
/** Default oversample multiplier for hybrid search */
|
|
28
|
+
const DEFAULT_OVERSAMPLE_MULTIPLIER = 2;
|
|
29
|
+
/** Maximum oversample multiplier (for low vector confidence) */
|
|
30
|
+
const MAX_OVERSAMPLE_MULTIPLIER = 4;
|
|
31
|
+
/**
|
|
32
|
+
* Search engine for code search.
|
|
33
|
+
* Supports vector, FTS, hybrid, definition, and similar search modes.
|
|
34
|
+
*/
|
|
35
|
+
export class SearchEngine {
|
|
36
|
+
constructor(projectRoot, logger) {
|
|
37
|
+
Object.defineProperty(this, "projectRoot", {
|
|
38
|
+
enumerable: true,
|
|
39
|
+
configurable: true,
|
|
40
|
+
writable: true,
|
|
41
|
+
value: void 0
|
|
42
|
+
});
|
|
43
|
+
Object.defineProperty(this, "storage", {
|
|
44
|
+
enumerable: true,
|
|
45
|
+
configurable: true,
|
|
46
|
+
writable: true,
|
|
47
|
+
value: null
|
|
48
|
+
});
|
|
49
|
+
Object.defineProperty(this, "embeddings", {
|
|
50
|
+
enumerable: true,
|
|
51
|
+
configurable: true,
|
|
52
|
+
writable: true,
|
|
53
|
+
value: null
|
|
54
|
+
});
|
|
55
|
+
Object.defineProperty(this, "logger", {
|
|
56
|
+
enumerable: true,
|
|
57
|
+
configurable: true,
|
|
58
|
+
writable: true,
|
|
59
|
+
value: null
|
|
60
|
+
});
|
|
61
|
+
Object.defineProperty(this, "initialized", {
|
|
62
|
+
enumerable: true,
|
|
63
|
+
configurable: true,
|
|
64
|
+
writable: true,
|
|
65
|
+
value: false
|
|
66
|
+
});
|
|
67
|
+
this.projectRoot = projectRoot;
|
|
68
|
+
this.logger = logger ?? null;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Primary search method. Dispatches to appropriate search mode.
|
|
72
|
+
*/
|
|
73
|
+
async search(query, options = {}) {
|
|
74
|
+
const start = Date.now();
|
|
75
|
+
const mode = options.mode ?? 'hybrid';
|
|
76
|
+
const limit = options.exhaustive
|
|
77
|
+
? EXHAUSTIVE_LIMIT
|
|
78
|
+
: (options.limit ?? DEFAULT_LIMIT);
|
|
79
|
+
const filterClause = buildFilterClause(options.filters);
|
|
80
|
+
await this.ensureInitialized();
|
|
81
|
+
const table = await this.getTable();
|
|
82
|
+
let results;
|
|
83
|
+
switch (mode) {
|
|
84
|
+
case 'semantic':
|
|
85
|
+
results = await this.searchSemantic(table, query, limit, filterClause, options.minScore);
|
|
86
|
+
break;
|
|
87
|
+
case 'exact':
|
|
88
|
+
results = await this.searchExact(table, query, limit, filterClause, options.minScore);
|
|
89
|
+
break;
|
|
90
|
+
case 'definition':
|
|
91
|
+
results = await this.searchDefinition(table, options.symbolName ?? query, limit, options.filters?.type, filterClause);
|
|
92
|
+
break;
|
|
93
|
+
case 'similar':
|
|
94
|
+
results = await this.searchSimilar(table, options.codeSnippet ?? query, limit, filterClause, options.minScore);
|
|
95
|
+
break;
|
|
96
|
+
case 'hybrid':
|
|
97
|
+
default:
|
|
98
|
+
results = await this.searchHybrid(table, query, limit, options.bm25Weight ?? DEFAULT_BM25_WEIGHT, filterClause, options.minScore, options.autoBoost ?? true, options.autoBoostThreshold ?? 0.3, options.returnDebug ?? false);
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
// Add total matches for exhaustive mode
|
|
102
|
+
if (options.exhaustive) {
|
|
103
|
+
results.totalMatches = results.results.length;
|
|
104
|
+
}
|
|
105
|
+
results.elapsedMs = Date.now() - start;
|
|
106
|
+
return results;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Semantic search: Dense vector search only.
|
|
110
|
+
* Best for conceptual queries like "how does auth work?"
|
|
111
|
+
*/
|
|
112
|
+
async searchSemantic(table, query, limit, filterClause, minScore) {
|
|
113
|
+
const queryVector = await this.embeddings.embedSingle(query);
|
|
114
|
+
const results = await vectorSearch(table, queryVector, {
|
|
115
|
+
limit,
|
|
116
|
+
filterClause,
|
|
117
|
+
minScore,
|
|
118
|
+
});
|
|
119
|
+
return {
|
|
120
|
+
results,
|
|
121
|
+
query,
|
|
122
|
+
searchType: 'semantic',
|
|
123
|
+
elapsedMs: 0,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Exact search: BM25/FTS only.
|
|
128
|
+
* Best for symbol names and exact string matches.
|
|
129
|
+
*/
|
|
130
|
+
async searchExact(table, query, limit, filterClause, minScore) {
|
|
131
|
+
const results = await ftsSearch(table, query, {
|
|
132
|
+
limit,
|
|
133
|
+
filterClause,
|
|
134
|
+
minScore,
|
|
135
|
+
});
|
|
136
|
+
return {
|
|
137
|
+
results,
|
|
138
|
+
query,
|
|
139
|
+
searchType: 'exact',
|
|
140
|
+
elapsedMs: 0,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Hybrid search: Vector + BM25 with RRF reranking.
|
|
145
|
+
* Good general-purpose search.
|
|
146
|
+
*
|
|
147
|
+
* @param autoBoost - When true, increase BM25 weight and oversample if vector scores are low
|
|
148
|
+
* @param autoBoostThreshold - Vector score threshold below which auto-boost activates
|
|
149
|
+
* @param returnDebug - Include debug info in results for AI evaluation
|
|
150
|
+
*/
|
|
151
|
+
async searchHybrid(table, query, limit, bm25Weight, filterClause, minScore, autoBoost = true, autoBoostThreshold = 0.3, returnDebug = false) {
|
|
152
|
+
const queryVector = await this.embeddings.embedSingle(query);
|
|
153
|
+
// Initial search with default oversample to assess vector confidence
|
|
154
|
+
const initialOversample = limit * DEFAULT_OVERSAMPLE_MULTIPLIER;
|
|
155
|
+
const [initialVectorResults, initialFtsResults] = await Promise.all([
|
|
156
|
+
vectorSearch(table, queryVector, {
|
|
157
|
+
limit: initialOversample,
|
|
158
|
+
filterClause,
|
|
159
|
+
}),
|
|
160
|
+
ftsSearch(table, query, {
|
|
161
|
+
limit: initialOversample,
|
|
162
|
+
filterClause,
|
|
163
|
+
}),
|
|
164
|
+
]);
|
|
165
|
+
// Calculate confidence metrics
|
|
166
|
+
const maxVectorScore = Math.max(...initialVectorResults.map(r => r.score), 0);
|
|
167
|
+
const maxFtsScore = Math.max(...initialFtsResults.map(r => r.ftsScore ?? r.score), 0);
|
|
168
|
+
// Dynamic oversample: increase when vector confidence is low
|
|
169
|
+
let oversampleMultiplier = DEFAULT_OVERSAMPLE_MULTIPLIER;
|
|
170
|
+
let dynamicOversampleApplied = false;
|
|
171
|
+
if (autoBoost && maxVectorScore < autoBoostThreshold) {
|
|
172
|
+
// Linear scale from 2x to 4x based on how low vector scores are
|
|
173
|
+
// At threshold (0.3): 2x, at 0: 4x
|
|
174
|
+
const boost = 1 - maxVectorScore / autoBoostThreshold;
|
|
175
|
+
oversampleMultiplier =
|
|
176
|
+
DEFAULT_OVERSAMPLE_MULTIPLIER +
|
|
177
|
+
boost * (MAX_OVERSAMPLE_MULTIPLIER - DEFAULT_OVERSAMPLE_MULTIPLIER);
|
|
178
|
+
dynamicOversampleApplied =
|
|
179
|
+
oversampleMultiplier > DEFAULT_OVERSAMPLE_MULTIPLIER;
|
|
180
|
+
}
|
|
181
|
+
const effectiveOversample = Math.round(limit * oversampleMultiplier);
|
|
182
|
+
// If we need more results due to dynamic oversample, fetch additional
|
|
183
|
+
let vectorResults = initialVectorResults;
|
|
184
|
+
let ftsResults = initialFtsResults;
|
|
185
|
+
if (effectiveOversample > initialOversample) {
|
|
186
|
+
// Re-fetch with higher limit
|
|
187
|
+
[vectorResults, ftsResults] = await Promise.all([
|
|
188
|
+
vectorSearch(table, queryVector, {
|
|
189
|
+
limit: effectiveOversample,
|
|
190
|
+
filterClause,
|
|
191
|
+
}),
|
|
192
|
+
ftsSearch(table, query, {
|
|
193
|
+
limit: effectiveOversample,
|
|
194
|
+
filterClause,
|
|
195
|
+
}),
|
|
196
|
+
]);
|
|
197
|
+
}
|
|
198
|
+
// Auto-boost: increase BM25 weight when vector confidence is low
|
|
199
|
+
let effectiveBm25Weight = bm25Weight;
|
|
200
|
+
let autoBoostApplied = false;
|
|
201
|
+
if (autoBoost && maxVectorScore < autoBoostThreshold) {
|
|
202
|
+
// Calculate boost factor: higher boost when vector scores are lower
|
|
203
|
+
const boost = (autoBoostThreshold - maxVectorScore) / autoBoostThreshold;
|
|
204
|
+
// Increase BM25 weight by up to 0.5, capped at 0.9
|
|
205
|
+
effectiveBm25Weight = Math.min(0.9, bm25Weight + boost * 0.5);
|
|
206
|
+
autoBoostApplied = effectiveBm25Weight !== bm25Weight;
|
|
207
|
+
}
|
|
208
|
+
// Combine with RRF using effective weight
|
|
209
|
+
const vectorWeight = 1 - effectiveBm25Weight;
|
|
210
|
+
let results = hybridRerank(vectorResults, ftsResults, limit, vectorWeight);
|
|
211
|
+
// Apply minScore filter
|
|
212
|
+
if (minScore) {
|
|
213
|
+
results = results.filter(r => r.score >= minScore);
|
|
214
|
+
}
|
|
215
|
+
// Build debug info if requested
|
|
216
|
+
const debug = returnDebug
|
|
217
|
+
? {
|
|
218
|
+
maxVectorScore,
|
|
219
|
+
maxFtsScore,
|
|
220
|
+
requestedBm25Weight: bm25Weight,
|
|
221
|
+
effectiveBm25Weight,
|
|
222
|
+
autoBoostApplied,
|
|
223
|
+
autoBoostThreshold,
|
|
224
|
+
vectorResultCount: vectorResults.length,
|
|
225
|
+
ftsResultCount: ftsResults.length,
|
|
226
|
+
oversampleMultiplier,
|
|
227
|
+
dynamicOversampleApplied,
|
|
228
|
+
}
|
|
229
|
+
: undefined;
|
|
230
|
+
return {
|
|
231
|
+
results,
|
|
232
|
+
query,
|
|
233
|
+
searchType: 'hybrid',
|
|
234
|
+
elapsedMs: 0,
|
|
235
|
+
debug,
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Definition search: Direct metadata lookup.
|
|
240
|
+
* Best for "where is X defined?" queries.
|
|
241
|
+
*/
|
|
242
|
+
async searchDefinition(table, symbolName, limit, typeFilter, additionalFilter) {
|
|
243
|
+
const definitionFilter = buildDefinitionFilter(symbolName, typeFilter);
|
|
244
|
+
// Combine with additional filters
|
|
245
|
+
const fullFilter = additionalFilter
|
|
246
|
+
? `(${definitionFilter}) AND (${additionalFilter})`
|
|
247
|
+
: definitionFilter;
|
|
248
|
+
// Use table query directly for metadata lookup
|
|
249
|
+
const queryResults = await table
|
|
250
|
+
.query()
|
|
251
|
+
.where(fullFilter)
|
|
252
|
+
.limit(limit)
|
|
253
|
+
.toArray();
|
|
254
|
+
const results = queryResults.map((row, index) => {
|
|
255
|
+
const chunk = row;
|
|
256
|
+
return {
|
|
257
|
+
id: chunk.id,
|
|
258
|
+
text: chunk.text,
|
|
259
|
+
filepath: chunk.filepath,
|
|
260
|
+
filename: chunk.filename,
|
|
261
|
+
name: chunk.name,
|
|
262
|
+
type: chunk.type,
|
|
263
|
+
startLine: chunk.start_line,
|
|
264
|
+
endLine: chunk.end_line,
|
|
265
|
+
score: 1 / (index + 1), // Rank-based score
|
|
266
|
+
signature: chunk.signature,
|
|
267
|
+
isExported: chunk.is_exported,
|
|
268
|
+
};
|
|
269
|
+
});
|
|
270
|
+
return {
|
|
271
|
+
results,
|
|
272
|
+
query: symbolName,
|
|
273
|
+
searchType: 'definition',
|
|
274
|
+
elapsedMs: 0,
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Similar search: Vector search with code snippet as query.
|
|
279
|
+
* Best for "find code like this" queries.
|
|
280
|
+
*/
|
|
281
|
+
async searchSimilar(table, codeSnippet, limit, filterClause, minScore) {
|
|
282
|
+
// Embed the code snippet directly
|
|
283
|
+
const queryVector = await this.embeddings.embedSingle(codeSnippet);
|
|
284
|
+
const results = await vectorSearch(table, queryVector, {
|
|
285
|
+
limit,
|
|
286
|
+
filterClause,
|
|
287
|
+
minScore,
|
|
288
|
+
});
|
|
289
|
+
return {
|
|
290
|
+
results,
|
|
291
|
+
query: codeSnippet.substring(0, 100) + (codeSnippet.length > 100 ? '...' : ''),
|
|
292
|
+
searchType: 'similar',
|
|
293
|
+
elapsedMs: 0,
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Perform vector-only search. (Legacy method)
|
|
298
|
+
*/
|
|
299
|
+
async searchVector(query, limit = DEFAULT_LIMIT) {
|
|
300
|
+
return this.search(query, { mode: 'semantic', limit });
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Perform FTS-only search. (Legacy method)
|
|
304
|
+
*/
|
|
305
|
+
async searchFts(query, limit = DEFAULT_LIMIT) {
|
|
306
|
+
return this.search(query, { mode: 'exact', limit });
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Initialize the search engine.
|
|
310
|
+
*/
|
|
311
|
+
async ensureInitialized() {
|
|
312
|
+
if (this.initialized)
|
|
313
|
+
return;
|
|
314
|
+
const config = await loadConfig(this.projectRoot);
|
|
315
|
+
// Initialize storage
|
|
316
|
+
this.storage = new Storage(this.projectRoot, config.embeddingDimensions);
|
|
317
|
+
await this.storage.connect();
|
|
318
|
+
// Initialize embeddings with config (includes apiKey for cloud providers)
|
|
319
|
+
this.embeddings = this.createEmbeddingProvider(config);
|
|
320
|
+
await this.embeddings.initialize();
|
|
321
|
+
this.initialized = true;
|
|
322
|
+
this.log('info', 'SearchEngine initialized');
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Create the appropriate embedding provider based on config.
|
|
326
|
+
*/
|
|
327
|
+
createEmbeddingProvider(config) {
|
|
328
|
+
const apiKey = config.apiKey;
|
|
329
|
+
switch (config.embeddingProvider) {
|
|
330
|
+
case 'local':
|
|
331
|
+
return new LocalEmbeddingProvider();
|
|
332
|
+
case 'local-4b':
|
|
333
|
+
return new Local4BEmbeddingProvider();
|
|
334
|
+
case 'gemini':
|
|
335
|
+
return new GeminiEmbeddingProvider(apiKey);
|
|
336
|
+
case 'mistral':
|
|
337
|
+
return new MistralEmbeddingProvider(apiKey);
|
|
338
|
+
case 'openai':
|
|
339
|
+
return new OpenAIEmbeddingProvider(apiKey);
|
|
340
|
+
default:
|
|
341
|
+
throw new Error(`Unknown embedding provider: ${config.embeddingProvider}`);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Get the code chunks table.
|
|
346
|
+
*/
|
|
347
|
+
async getTable() {
|
|
348
|
+
return this.storage.getChunksTable();
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Log a message.
|
|
352
|
+
*/
|
|
353
|
+
log(level, message) {
|
|
354
|
+
if (!this.logger)
|
|
355
|
+
return;
|
|
356
|
+
this.logger[level]('Search', message);
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Close the search engine and free resources.
|
|
360
|
+
*/
|
|
361
|
+
close() {
|
|
362
|
+
this.storage?.close();
|
|
363
|
+
this.embeddings?.close();
|
|
364
|
+
this.initialized = false;
|
|
365
|
+
this.log('info', 'SearchEngine closed');
|
|
366
|
+
}
|
|
367
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search result types.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Search mode determines the search strategy.
|
|
6
|
+
*/
|
|
7
|
+
export type SearchMode = 'semantic' | 'exact' | 'hybrid' | 'definition' | 'similar';
|
|
8
|
+
/**
|
|
9
|
+
* A single search result.
|
|
10
|
+
*/
|
|
11
|
+
export interface SearchResult {
|
|
12
|
+
/** Unique ID: "{filepath}:{startLine}" */
|
|
13
|
+
id: string;
|
|
14
|
+
/** Source code content */
|
|
15
|
+
text: string;
|
|
16
|
+
/** Relative file path */
|
|
17
|
+
filepath: string;
|
|
18
|
+
/** Just the filename */
|
|
19
|
+
filename: string;
|
|
20
|
+
/** Symbol name */
|
|
21
|
+
name: string;
|
|
22
|
+
/** Chunk type: function, class, method, or module */
|
|
23
|
+
type: string;
|
|
24
|
+
/** Start line number (1-indexed) */
|
|
25
|
+
startLine: number;
|
|
26
|
+
/** End line number (1-indexed) */
|
|
27
|
+
endLine: number;
|
|
28
|
+
/** Combined score (for hybrid search) */
|
|
29
|
+
score: number;
|
|
30
|
+
/** Vector similarity score (optional) */
|
|
31
|
+
vectorScore?: number;
|
|
32
|
+
/** FTS/BM25 score (optional) */
|
|
33
|
+
ftsScore?: number;
|
|
34
|
+
/** Function/method signature (if available) */
|
|
35
|
+
signature?: string | null;
|
|
36
|
+
/** Whether symbol is exported */
|
|
37
|
+
isExported?: boolean;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Debug information for search quality evaluation.
|
|
41
|
+
* Helps AI agents understand search effectiveness and tune parameters.
|
|
42
|
+
*/
|
|
43
|
+
export interface SearchDebugInfo {
|
|
44
|
+
/** Maximum vector similarity score across results */
|
|
45
|
+
maxVectorScore: number;
|
|
46
|
+
/** Maximum FTS/BM25 score across results */
|
|
47
|
+
maxFtsScore: number;
|
|
48
|
+
/** BM25 weight requested by caller */
|
|
49
|
+
requestedBm25Weight: number;
|
|
50
|
+
/** Effective BM25 weight after auto-boost */
|
|
51
|
+
effectiveBm25Weight: number;
|
|
52
|
+
/** Whether auto-boost was applied */
|
|
53
|
+
autoBoostApplied: boolean;
|
|
54
|
+
/** Auto-boost threshold used */
|
|
55
|
+
autoBoostThreshold: number;
|
|
56
|
+
/** Number of results from vector search */
|
|
57
|
+
vectorResultCount: number;
|
|
58
|
+
/** Number of results from FTS search */
|
|
59
|
+
ftsResultCount: number;
|
|
60
|
+
/** Oversample multiplier used (2-4x) */
|
|
61
|
+
oversampleMultiplier?: number;
|
|
62
|
+
/** Whether dynamic oversample boost was applied */
|
|
63
|
+
dynamicOversampleApplied?: boolean;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Collection of search results with metadata.
|
|
67
|
+
*/
|
|
68
|
+
export interface SearchResults {
|
|
69
|
+
/** Array of search results */
|
|
70
|
+
results: SearchResult[];
|
|
71
|
+
/** Original search query */
|
|
72
|
+
query: string;
|
|
73
|
+
/** Type of search performed */
|
|
74
|
+
searchType: SearchMode;
|
|
75
|
+
/** Time taken in milliseconds */
|
|
76
|
+
elapsedMs: number;
|
|
77
|
+
/** Total matches (when exhaustive=true) */
|
|
78
|
+
totalMatches?: number;
|
|
79
|
+
/** Debug info for hybrid search (when return_debug=true) */
|
|
80
|
+
debug?: SearchDebugInfo;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Transparent, AI-controlled filters.
|
|
84
|
+
* AI sees exactly what's being filtered.
|
|
85
|
+
*/
|
|
86
|
+
export interface SearchFilters {
|
|
87
|
+
/** Scope to files starting with this path prefix (e.g., "src/api/") */
|
|
88
|
+
pathPrefix?: string;
|
|
89
|
+
/** Must contain ALL of these strings in path */
|
|
90
|
+
pathContains?: string[];
|
|
91
|
+
/** Must not contain ANY of these strings in path */
|
|
92
|
+
pathNotContains?: string[];
|
|
93
|
+
/** Filter by chunk type: function, class, method, module */
|
|
94
|
+
type?: ('function' | 'class' | 'method' | 'module')[];
|
|
95
|
+
/** Filter by file extension (e.g., [".ts", ".tsx"]) */
|
|
96
|
+
extension?: string[];
|
|
97
|
+
/** Only exported/public symbols */
|
|
98
|
+
isExported?: boolean;
|
|
99
|
+
/** Decorator name contains this string (e.g., "Get", "route") */
|
|
100
|
+
decoratorContains?: string;
|
|
101
|
+
/** Has documentation/docstring */
|
|
102
|
+
hasDocstring?: boolean;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Options for search operations.
|
|
106
|
+
*/
|
|
107
|
+
export interface SearchOptions {
|
|
108
|
+
/** Search mode (default: 'hybrid') */
|
|
109
|
+
mode?: SearchMode;
|
|
110
|
+
/** Maximum number of results (default: 10) */
|
|
111
|
+
limit?: number;
|
|
112
|
+
/** Weight for BM25 in hybrid search (0.0-1.0, default: 0.3) */
|
|
113
|
+
bm25Weight?: number;
|
|
114
|
+
/** Return all matches above threshold (default: false) */
|
|
115
|
+
exhaustive?: boolean;
|
|
116
|
+
/** Minimum score threshold 0-1 (default: 0) */
|
|
117
|
+
minScore?: number;
|
|
118
|
+
/** Transparent filters */
|
|
119
|
+
filters?: SearchFilters;
|
|
120
|
+
/** Code snippet for 'similar' mode */
|
|
121
|
+
codeSnippet?: string;
|
|
122
|
+
/** Symbol name for 'definition' mode */
|
|
123
|
+
symbolName?: string;
|
|
124
|
+
/** Enable auto-boost of BM25 weight when vector scores are low (default: true) */
|
|
125
|
+
autoBoost?: boolean;
|
|
126
|
+
/** Vector score threshold below which auto-boost activates (default: 0.3) */
|
|
127
|
+
autoBoostThreshold?: number;
|
|
128
|
+
/** Include debug info in results (default: false) */
|
|
129
|
+
returnDebug?: boolean;
|
|
130
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector similarity search using LanceDB.
|
|
3
|
+
*/
|
|
4
|
+
import type { Table } from '@lancedb/lancedb';
|
|
5
|
+
import type { SearchResult } from './types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Options for vector search.
|
|
8
|
+
*/
|
|
9
|
+
export interface VectorSearchOptions {
|
|
10
|
+
/** Maximum number of results */
|
|
11
|
+
limit: number;
|
|
12
|
+
/** LanceDB WHERE clause filter */
|
|
13
|
+
filterClause?: string;
|
|
14
|
+
/** Minimum score threshold (0-1) */
|
|
15
|
+
minScore?: number;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Perform vector similarity search.
|
|
19
|
+
*
|
|
20
|
+
* @param table - LanceDB table to search
|
|
21
|
+
* @param queryVector - Query embedding vector
|
|
22
|
+
* @param options - Search options
|
|
23
|
+
* @returns Array of search results with vector scores
|
|
24
|
+
*/
|
|
25
|
+
export declare function vectorSearch(table: Table, queryVector: number[], options: VectorSearchOptions | number): Promise<SearchResult[]>;
|