claude-eidetic 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +87 -0
- package/dist/config.js +65 -0
- package/dist/core/indexer.d.ts +18 -0
- package/dist/core/indexer.js +169 -0
- package/dist/core/preview.d.ts +14 -0
- package/dist/core/preview.js +61 -0
- package/dist/core/searcher.d.ts +24 -0
- package/dist/core/searcher.js +101 -0
- package/dist/core/snapshot-io.d.ts +6 -0
- package/dist/core/snapshot-io.js +39 -0
- package/dist/core/sync.d.ts +35 -0
- package/dist/core/sync.js +188 -0
- package/dist/embedding/factory.d.ts +17 -0
- package/dist/embedding/factory.js +41 -0
- package/dist/embedding/openai.d.ts +45 -0
- package/dist/embedding/openai.js +243 -0
- package/dist/embedding/truncate.d.ts +6 -0
- package/dist/embedding/truncate.js +14 -0
- package/dist/embedding/types.d.ts +18 -0
- package/dist/embedding/types.js +2 -0
- package/dist/errors.d.ts +17 -0
- package/dist/errors.js +21 -0
- package/dist/format.d.ts +12 -0
- package/dist/format.js +97 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +109 -0
- package/dist/infra/qdrant-bootstrap.d.ts +2 -0
- package/dist/infra/qdrant-bootstrap.js +94 -0
- package/dist/paths.d.ts +11 -0
- package/dist/paths.js +41 -0
- package/dist/splitter/ast.d.ts +13 -0
- package/dist/splitter/ast.js +169 -0
- package/dist/splitter/line.d.ts +14 -0
- package/dist/splitter/line.js +109 -0
- package/dist/splitter/types.d.ts +11 -0
- package/dist/splitter/types.js +2 -0
- package/dist/state/registry.d.ts +8 -0
- package/dist/state/registry.js +33 -0
- package/dist/state/snapshot.d.ts +26 -0
- package/dist/state/snapshot.js +101 -0
- package/dist/tool-schemas.d.ts +135 -0
- package/dist/tool-schemas.js +162 -0
- package/dist/tools.d.ts +40 -0
- package/dist/tools.js +169 -0
- package/dist/vectordb/milvus.d.ts +33 -0
- package/dist/vectordb/milvus.js +328 -0
- package/dist/vectordb/qdrant.d.ts +51 -0
- package/dist/vectordb/qdrant.js +241 -0
- package/dist/vectordb/types.d.ts +35 -0
- package/dist/vectordb/types.js +2 -0
- package/package.json +62 -0
package/dist/tools.js
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import { normalizePath, pathToCollectionName } from './paths.js';
|
|
2
|
+
import { indexCodebase, previewCodebase, deleteSnapshot } from './core/indexer.js';
|
|
3
|
+
import { getConfig } from './config.js';
|
|
4
|
+
import { searchCode, formatSearchResults, formatCompactResults } from './core/searcher.js';
|
|
5
|
+
import { registerProject, resolveProject, listProjects } from './state/registry.js';
|
|
6
|
+
import { textResult, formatPreview, formatIndexResult, formatListIndexed } from './format.js';
|
|
7
|
+
function resolvePath(args) {
|
|
8
|
+
const pathArg = args.path;
|
|
9
|
+
if (pathArg)
|
|
10
|
+
return normalizePath(pathArg);
|
|
11
|
+
const projectArg = args.project;
|
|
12
|
+
if (projectArg)
|
|
13
|
+
return resolveProject(projectArg);
|
|
14
|
+
return undefined;
|
|
15
|
+
}
|
|
16
|
+
function noPathError() {
|
|
17
|
+
const projects = listProjects();
|
|
18
|
+
const names = Object.keys(projects);
|
|
19
|
+
if (names.length > 0) {
|
|
20
|
+
const list = names.map(n => ` - ${n} → ${projects[n]}`).join('\n');
|
|
21
|
+
return textResult(`Error: provide \`path\` or \`project\`. Registered projects:\n${list}`);
|
|
22
|
+
}
|
|
23
|
+
return textResult('Error: provide \`path\` (absolute) or \`project\` (name). No projects registered yet — index a codebase first.');
|
|
24
|
+
}
|
|
25
|
+
const locks = new Map();
|
|
26
|
+
async function withMutex(key, fn) {
|
|
27
|
+
// Chain onto any existing operation for this key (FIFO ordering, no race)
|
|
28
|
+
const prev = locks.get(key) ?? Promise.resolve();
|
|
29
|
+
let resolve;
|
|
30
|
+
const current = new Promise(r => { resolve = r; });
|
|
31
|
+
locks.set(key, current);
|
|
32
|
+
// Wait for previous operation to complete
|
|
33
|
+
await prev;
|
|
34
|
+
try {
|
|
35
|
+
return await fn();
|
|
36
|
+
}
|
|
37
|
+
finally {
|
|
38
|
+
resolve();
|
|
39
|
+
// Only delete if we're still the latest operation
|
|
40
|
+
if (locks.get(key) === current) {
|
|
41
|
+
locks.delete(key);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
export class ToolHandlers {
|
|
46
|
+
embedding;
|
|
47
|
+
vectordb;
|
|
48
|
+
state;
|
|
49
|
+
constructor(embedding, vectordb, state) {
|
|
50
|
+
this.embedding = embedding;
|
|
51
|
+
this.vectordb = vectordb;
|
|
52
|
+
this.state = state;
|
|
53
|
+
}
|
|
54
|
+
async handleIndexCodebase(args) {
|
|
55
|
+
const normalizedPath = resolvePath(args);
|
|
56
|
+
if (!normalizedPath)
|
|
57
|
+
return noPathError();
|
|
58
|
+
const force = args.force ?? false;
|
|
59
|
+
const dryRun = args.dryRun ?? false;
|
|
60
|
+
const config = getConfig();
|
|
61
|
+
const customExt = args.customExtensions ?? config.customExtensions;
|
|
62
|
+
const customIgnore = args.customIgnorePatterns ?? config.customIgnorePatterns;
|
|
63
|
+
const collectionName = pathToCollectionName(normalizedPath);
|
|
64
|
+
if (dryRun) {
|
|
65
|
+
try {
|
|
66
|
+
const preview = await previewCodebase(normalizedPath, this.embedding, customExt, customIgnore);
|
|
67
|
+
return textResult(formatPreview(preview, normalizedPath));
|
|
68
|
+
}
|
|
69
|
+
catch (err) {
|
|
70
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
71
|
+
return textResult(`Error previewing ${normalizedPath}: ${message}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return withMutex(normalizedPath, async () => {
|
|
75
|
+
this.state.setIndexing(normalizedPath, collectionName);
|
|
76
|
+
try {
|
|
77
|
+
const result = await indexCodebase(normalizedPath, this.embedding, this.vectordb, force, (pct, msg) => this.state.updateProgress(normalizedPath, pct, msg), customExt, customIgnore);
|
|
78
|
+
this.state.setIndexed(normalizedPath, result.totalFiles, result.totalChunks);
|
|
79
|
+
registerProject(normalizedPath);
|
|
80
|
+
return textResult(formatIndexResult(result, normalizedPath));
|
|
81
|
+
}
|
|
82
|
+
catch (err) {
|
|
83
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
84
|
+
this.state.setError(normalizedPath, message);
|
|
85
|
+
return textResult(`Error indexing ${normalizedPath}: ${message}`);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
async handleSearchCode(args) {
|
|
90
|
+
const query = args.query;
|
|
91
|
+
if (!query)
|
|
92
|
+
return textResult('Error: "query" is required. Provide a natural language search query.');
|
|
93
|
+
const normalizedPath = resolvePath(args);
|
|
94
|
+
if (!normalizedPath)
|
|
95
|
+
return noPathError();
|
|
96
|
+
const rawLimit = args.limit;
|
|
97
|
+
const limit = (rawLimit !== undefined && Number.isFinite(rawLimit) && rawLimit >= 1)
|
|
98
|
+
? rawLimit
|
|
99
|
+
: undefined;
|
|
100
|
+
const extensionFilter = args.extensionFilter;
|
|
101
|
+
const compact = args.compact !== false; // default true
|
|
102
|
+
try {
|
|
103
|
+
const results = await searchCode(normalizedPath, query, this.embedding, this.vectordb, { limit, extensionFilter });
|
|
104
|
+
const formatted = compact
|
|
105
|
+
? formatCompactResults(results, query, normalizedPath)
|
|
106
|
+
: formatSearchResults(results, query, normalizedPath);
|
|
107
|
+
return textResult(formatted);
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
111
|
+
return textResult(`Error: ${message}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
async handleClearIndex(args) {
|
|
115
|
+
const normalizedPath = resolvePath(args);
|
|
116
|
+
if (!normalizedPath)
|
|
117
|
+
return noPathError();
|
|
118
|
+
const collectionName = pathToCollectionName(normalizedPath);
|
|
119
|
+
return withMutex(normalizedPath, async () => {
|
|
120
|
+
try {
|
|
121
|
+
await this.vectordb.dropCollection(collectionName);
|
|
122
|
+
deleteSnapshot(normalizedPath);
|
|
123
|
+
this.state.remove(normalizedPath);
|
|
124
|
+
return textResult(`Index cleared for ${normalizedPath}.`);
|
|
125
|
+
}
|
|
126
|
+
catch (err) {
|
|
127
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
128
|
+
return textResult(`Error clearing index: ${message}`);
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
async handleGetIndexingStatus(args) {
|
|
133
|
+
const normalizedPath = resolvePath(args);
|
|
134
|
+
if (!normalizedPath)
|
|
135
|
+
return noPathError();
|
|
136
|
+
const state = this.state.getState(normalizedPath);
|
|
137
|
+
if (!state) {
|
|
138
|
+
const collectionName = pathToCollectionName(normalizedPath);
|
|
139
|
+
const exists = await this.vectordb.hasCollection(collectionName);
|
|
140
|
+
if (exists) {
|
|
141
|
+
return textResult(`Codebase at ${normalizedPath} is indexed (status loaded from vector DB).`);
|
|
142
|
+
}
|
|
143
|
+
return textResult(`Codebase at ${normalizedPath} is not indexed.`);
|
|
144
|
+
}
|
|
145
|
+
const lines = [`Status for ${normalizedPath}: **${state.status}**`];
|
|
146
|
+
if (state.status === 'indexing' && state.progress !== undefined) {
|
|
147
|
+
lines.push(`Progress: ${state.progress}% - ${state.progressMessage ?? ''}`);
|
|
148
|
+
}
|
|
149
|
+
if (state.status === 'indexed') {
|
|
150
|
+
lines.push(`Last indexed: ${state.lastIndexed ?? 'unknown'}`);
|
|
151
|
+
if (state.totalFiles)
|
|
152
|
+
lines.push(`Files: ${state.totalFiles}`);
|
|
153
|
+
if (state.totalChunks)
|
|
154
|
+
lines.push(`Chunks: ${state.totalChunks}`);
|
|
155
|
+
}
|
|
156
|
+
if (state.status === 'error') {
|
|
157
|
+
lines.push(`Error: ${state.error ?? 'unknown'}`);
|
|
158
|
+
}
|
|
159
|
+
return textResult(lines.join('\n'));
|
|
160
|
+
}
|
|
161
|
+
async handleListIndexed() {
|
|
162
|
+
const states = this.state.getAllStates();
|
|
163
|
+
if (states.length === 0) {
|
|
164
|
+
return textResult('No codebases are currently indexed in this session.\n\nUse `index_codebase` to index a codebase first.');
|
|
165
|
+
}
|
|
166
|
+
return textResult(formatListIndexed(states));
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
//# sourceMappingURL=tools.js.map
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { VectorDB, CodeDocument, HybridSearchParams, SearchResult } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Detects the "data type 104 not supported" error from older Milvus versions
|
|
4
|
+
* that lack SparseFloatVector support (< v2.4).
|
|
5
|
+
*/
|
|
6
|
+
export declare function isSparseUnsupportedError(err: unknown): boolean;
|
|
7
|
+
export declare class MilvusVectorDB implements VectorDB {
|
|
8
|
+
private client;
|
|
9
|
+
private initPromise;
|
|
10
|
+
private hybridCollections;
|
|
11
|
+
constructor(client?: any);
|
|
12
|
+
private initialize;
|
|
13
|
+
private ready;
|
|
14
|
+
createCollection(name: string, dimension: number): Promise<void>;
|
|
15
|
+
private createHybridCollection;
|
|
16
|
+
private createDenseOnlyCollection;
|
|
17
|
+
hasCollection(name: string): Promise<boolean>;
|
|
18
|
+
dropCollection(name: string): Promise<void>;
|
|
19
|
+
insert(name: string, documents: CodeDocument[]): Promise<void>;
|
|
20
|
+
search(name: string, params: HybridSearchParams): Promise<SearchResult[]>;
|
|
21
|
+
private hybridSearch;
|
|
22
|
+
private denseOnlySearch;
|
|
23
|
+
private mapResults;
|
|
24
|
+
/**
|
|
25
|
+
* Detect whether a collection has the sparse_vector field (hybrid) or not.
|
|
26
|
+
* Caches result in hybridCollections set.
|
|
27
|
+
*/
|
|
28
|
+
private detectHybrid;
|
|
29
|
+
deleteByPath(name: string, relativePath: string): Promise<void>;
|
|
30
|
+
private ensureLoaded;
|
|
31
|
+
private waitForLoad;
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=milvus.d.ts.map
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
import { VectorDBError } from '../errors.js';
|
|
2
|
+
import { getConfig } from '../config.js';
|
|
3
|
+
// Dynamic import -- @zilliz/milvus2-sdk-node is an optional dependency
|
|
4
|
+
let MilvusClient;
|
|
5
|
+
let DataType;
|
|
6
|
+
let MetricType;
|
|
7
|
+
let FunctionType;
|
|
8
|
+
let LoadState;
|
|
9
|
+
async function loadMilvusSDK() {
|
|
10
|
+
try {
|
|
11
|
+
const sdk = await import('@zilliz/milvus2-sdk-node');
|
|
12
|
+
MilvusClient = sdk.MilvusClient;
|
|
13
|
+
DataType = sdk.DataType;
|
|
14
|
+
MetricType = sdk.MetricType;
|
|
15
|
+
FunctionType = sdk.FunctionType;
|
|
16
|
+
LoadState = sdk.LoadState;
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
throw new VectorDBError('Milvus SDK not installed. Run: npm install @zilliz/milvus2-sdk-node');
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
const RRF_K = 60;
|
|
23
|
+
/**
|
|
24
|
+
* Detects the "data type 104 not supported" error from older Milvus versions
|
|
25
|
+
* that lack SparseFloatVector support (< v2.4).
|
|
26
|
+
*/
|
|
27
|
+
export function isSparseUnsupportedError(err) {
|
|
28
|
+
const msg = String(err && typeof err === 'object' && 'reason' in err ? err.reason : err);
|
|
29
|
+
return /data type[:\s]*104/i.test(msg) || /not supported/i.test(msg) && /104/.test(msg);
|
|
30
|
+
}
|
|
31
|
+
export class MilvusVectorDB {
|
|
32
|
+
client = null;
|
|
33
|
+
initPromise;
|
|
34
|
+
hybridCollections = new Set();
|
|
35
|
+
constructor(client) {
|
|
36
|
+
if (client) {
|
|
37
|
+
this.client = client;
|
|
38
|
+
this.initPromise = Promise.resolve();
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
this.initPromise = this.initialize();
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
async initialize() {
|
|
45
|
+
await loadMilvusSDK();
|
|
46
|
+
const config = getConfig();
|
|
47
|
+
this.client = new MilvusClient({
|
|
48
|
+
address: config.milvusAddress,
|
|
49
|
+
...(config.milvusToken ? { token: config.milvusToken } : {}),
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
async ready() {
|
|
53
|
+
await this.initPromise;
|
|
54
|
+
if (!this.client)
|
|
55
|
+
throw new VectorDBError('Milvus client not initialized');
|
|
56
|
+
}
|
|
57
|
+
async createCollection(name, dimension) {
|
|
58
|
+
await this.ready();
|
|
59
|
+
// Try hybrid first, fall back to dense-only if Milvus version doesn't support sparse
|
|
60
|
+
try {
|
|
61
|
+
await this.createHybridCollection(name, dimension);
|
|
62
|
+
this.hybridCollections.add(name);
|
|
63
|
+
console.log(`Created hybrid collection "${name}" (dense + BM25 sparse)`);
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
catch (err) {
|
|
67
|
+
if (isSparseUnsupportedError(err)) {
|
|
68
|
+
console.warn(`Milvus does not support SparseFloatVector (requires >= v2.4). ` +
|
|
69
|
+
`Falling back to dense-only collection for "${name}".`);
|
|
70
|
+
// Clean up the failed collection attempt
|
|
71
|
+
try {
|
|
72
|
+
await this.client.dropCollection({ collection_name: name });
|
|
73
|
+
}
|
|
74
|
+
catch (cleanupErr) {
|
|
75
|
+
console.warn(`Failed to clean up collection "${name}": ${cleanupErr}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
throw new VectorDBError(`Failed to create Milvus collection "${name}"`, err);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
try {
|
|
83
|
+
await this.createDenseOnlyCollection(name, dimension);
|
|
84
|
+
console.log(`Created dense-only collection "${name}"`);
|
|
85
|
+
}
|
|
86
|
+
catch (err) {
|
|
87
|
+
throw new VectorDBError(`Failed to create Milvus collection "${name}"`, err);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
async createHybridCollection(name, dimension) {
|
|
91
|
+
const schema = [
|
|
92
|
+
{ name: 'id', data_type: DataType.VarChar, max_length: 128, is_primary_key: true },
|
|
93
|
+
{ name: 'content', data_type: DataType.VarChar, max_length: 65535, enable_analyzer: true },
|
|
94
|
+
{ name: 'vector', data_type: DataType.FloatVector, dim: dimension },
|
|
95
|
+
{ name: 'sparse_vector', data_type: DataType.SparseFloatVector },
|
|
96
|
+
{ name: 'relativePath', data_type: DataType.VarChar, max_length: 1024 },
|
|
97
|
+
{ name: 'startLine', data_type: DataType.Int64 },
|
|
98
|
+
{ name: 'endLine', data_type: DataType.Int64 },
|
|
99
|
+
{ name: 'fileExtension', data_type: DataType.VarChar, max_length: 32 },
|
|
100
|
+
{ name: 'language', data_type: DataType.VarChar, max_length: 64 },
|
|
101
|
+
];
|
|
102
|
+
const functions = [{
|
|
103
|
+
name: 'content_bm25',
|
|
104
|
+
type: FunctionType.BM25,
|
|
105
|
+
input_field_names: ['content'],
|
|
106
|
+
output_field_names: ['sparse_vector'],
|
|
107
|
+
params: {},
|
|
108
|
+
}];
|
|
109
|
+
await this.client.createCollection({
|
|
110
|
+
collection_name: name,
|
|
111
|
+
fields: schema,
|
|
112
|
+
functions,
|
|
113
|
+
});
|
|
114
|
+
await this.client.createIndex({
|
|
115
|
+
collection_name: name,
|
|
116
|
+
field_name: 'vector',
|
|
117
|
+
index_type: 'AUTOINDEX',
|
|
118
|
+
metric_type: MetricType.COSINE,
|
|
119
|
+
});
|
|
120
|
+
await this.client.createIndex({
|
|
121
|
+
collection_name: name,
|
|
122
|
+
field_name: 'sparse_vector',
|
|
123
|
+
index_type: 'SPARSE_INVERTED_INDEX',
|
|
124
|
+
metric_type: MetricType.BM25,
|
|
125
|
+
});
|
|
126
|
+
await this.waitForLoad(name);
|
|
127
|
+
}
|
|
128
|
+
async createDenseOnlyCollection(name, dimension) {
|
|
129
|
+
const schema = [
|
|
130
|
+
{ name: 'id', data_type: DataType.VarChar, max_length: 128, is_primary_key: true },
|
|
131
|
+
{ name: 'content', data_type: DataType.VarChar, max_length: 65535 },
|
|
132
|
+
{ name: 'vector', data_type: DataType.FloatVector, dim: dimension },
|
|
133
|
+
{ name: 'relativePath', data_type: DataType.VarChar, max_length: 1024 },
|
|
134
|
+
{ name: 'startLine', data_type: DataType.Int64 },
|
|
135
|
+
{ name: 'endLine', data_type: DataType.Int64 },
|
|
136
|
+
{ name: 'fileExtension', data_type: DataType.VarChar, max_length: 32 },
|
|
137
|
+
{ name: 'language', data_type: DataType.VarChar, max_length: 64 },
|
|
138
|
+
];
|
|
139
|
+
await this.client.createCollection({
|
|
140
|
+
collection_name: name,
|
|
141
|
+
fields: schema,
|
|
142
|
+
});
|
|
143
|
+
await this.client.createIndex({
|
|
144
|
+
collection_name: name,
|
|
145
|
+
field_name: 'vector',
|
|
146
|
+
index_type: 'AUTOINDEX',
|
|
147
|
+
metric_type: MetricType.COSINE,
|
|
148
|
+
});
|
|
149
|
+
await this.waitForLoad(name);
|
|
150
|
+
}
|
|
151
|
+
async hasCollection(name) {
|
|
152
|
+
await this.ready();
|
|
153
|
+
try {
|
|
154
|
+
const result = await this.client.hasCollection({ collection_name: name });
|
|
155
|
+
return Boolean(result.value);
|
|
156
|
+
}
|
|
157
|
+
catch {
|
|
158
|
+
return false;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
async dropCollection(name) {
|
|
162
|
+
await this.ready();
|
|
163
|
+
try {
|
|
164
|
+
if (await this.hasCollection(name)) {
|
|
165
|
+
await this.client.dropCollection({ collection_name: name });
|
|
166
|
+
}
|
|
167
|
+
this.hybridCollections.delete(name);
|
|
168
|
+
}
|
|
169
|
+
catch (err) {
|
|
170
|
+
throw new VectorDBError(`Failed to drop Milvus collection "${name}"`, err);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
async insert(name, documents) {
|
|
174
|
+
if (documents.length === 0)
|
|
175
|
+
return;
|
|
176
|
+
await this.ready();
|
|
177
|
+
await this.ensureLoaded(name);
|
|
178
|
+
try {
|
|
179
|
+
const data = documents.map(doc => ({
|
|
180
|
+
id: doc.id,
|
|
181
|
+
content: doc.content,
|
|
182
|
+
vector: doc.vector,
|
|
183
|
+
relativePath: doc.relativePath,
|
|
184
|
+
startLine: doc.startLine,
|
|
185
|
+
endLine: doc.endLine,
|
|
186
|
+
fileExtension: doc.fileExtension,
|
|
187
|
+
language: doc.language,
|
|
188
|
+
}));
|
|
189
|
+
await this.client.insert({ collection_name: name, data });
|
|
190
|
+
}
|
|
191
|
+
catch (err) {
|
|
192
|
+
throw new VectorDBError(`Failed to insert into Milvus collection "${name}"`, err);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
async search(name, params) {
|
|
196
|
+
await this.ready();
|
|
197
|
+
await this.ensureLoaded(name);
|
|
198
|
+
const isHybrid = await this.detectHybrid(name);
|
|
199
|
+
try {
|
|
200
|
+
let expr;
|
|
201
|
+
if (params.extensionFilter?.length) {
|
|
202
|
+
const exts = params.extensionFilter.map(e => `"${e.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`).join(', ');
|
|
203
|
+
expr = `fileExtension in [${exts}]`;
|
|
204
|
+
}
|
|
205
|
+
if (isHybrid) {
|
|
206
|
+
return await this.hybridSearch(name, params, expr);
|
|
207
|
+
}
|
|
208
|
+
return await this.denseOnlySearch(name, params, expr);
|
|
209
|
+
}
|
|
210
|
+
catch (err) {
|
|
211
|
+
throw new VectorDBError(`Milvus search failed in collection "${name}"`, err);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
async hybridSearch(name, params, expr) {
|
|
215
|
+
const limit = params.limit * 2;
|
|
216
|
+
const searchParams = {
|
|
217
|
+
collection_name: name,
|
|
218
|
+
data: [
|
|
219
|
+
{
|
|
220
|
+
data: [params.queryVector],
|
|
221
|
+
anns_field: 'vector',
|
|
222
|
+
param: { nprobe: 10 },
|
|
223
|
+
limit,
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
data: params.queryText,
|
|
227
|
+
anns_field: 'sparse_vector',
|
|
228
|
+
param: { drop_ratio_search: 0.2 },
|
|
229
|
+
limit,
|
|
230
|
+
},
|
|
231
|
+
],
|
|
232
|
+
limit: params.limit,
|
|
233
|
+
rerank: { strategy: 'rrf', params: { k: RRF_K } },
|
|
234
|
+
output_fields: ['id', 'content', 'relativePath', 'startLine', 'endLine', 'fileExtension', 'language'],
|
|
235
|
+
};
|
|
236
|
+
if (expr)
|
|
237
|
+
searchParams.expr = expr;
|
|
238
|
+
const result = await this.client.search(searchParams);
|
|
239
|
+
return this.mapResults(result);
|
|
240
|
+
}
|
|
241
|
+
async denseOnlySearch(name, params, expr) {
|
|
242
|
+
const searchParams = {
|
|
243
|
+
collection_name: name,
|
|
244
|
+
data: [params.queryVector],
|
|
245
|
+
limit: params.limit,
|
|
246
|
+
output_fields: ['id', 'content', 'relativePath', 'startLine', 'endLine', 'fileExtension', 'language'],
|
|
247
|
+
};
|
|
248
|
+
if (expr)
|
|
249
|
+
searchParams.expr = expr;
|
|
250
|
+
const result = await this.client.search(searchParams);
|
|
251
|
+
return this.mapResults(result);
|
|
252
|
+
}
|
|
253
|
+
mapResults(result) {
|
|
254
|
+
if (!result.results?.length)
|
|
255
|
+
return [];
|
|
256
|
+
return result.results.map((r) => ({
|
|
257
|
+
content: r.content ?? '',
|
|
258
|
+
relativePath: r.relativePath ?? '',
|
|
259
|
+
startLine: r.startLine ?? 0,
|
|
260
|
+
endLine: r.endLine ?? 0,
|
|
261
|
+
fileExtension: r.fileExtension ?? '',
|
|
262
|
+
language: r.language ?? '',
|
|
263
|
+
score: r.score ?? 0,
|
|
264
|
+
}));
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Detect whether a collection has the sparse_vector field (hybrid) or not.
|
|
268
|
+
* Caches result in hybridCollections set.
|
|
269
|
+
*/
|
|
270
|
+
async detectHybrid(name) {
|
|
271
|
+
if (this.hybridCollections.has(name))
|
|
272
|
+
return true;
|
|
273
|
+
try {
|
|
274
|
+
const desc = await this.client.describeCollection({ collection_name: name });
|
|
275
|
+
const fields = desc.schema?.fields ?? [];
|
|
276
|
+
const hasSparse = fields.some((f) => f.name === 'sparse_vector');
|
|
277
|
+
if (hasSparse) {
|
|
278
|
+
this.hybridCollections.add(name);
|
|
279
|
+
}
|
|
280
|
+
return hasSparse;
|
|
281
|
+
}
|
|
282
|
+
catch {
|
|
283
|
+
return false;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
async deleteByPath(name, relativePath) {
|
|
287
|
+
await this.ready();
|
|
288
|
+
await this.ensureLoaded(name);
|
|
289
|
+
try {
|
|
290
|
+
const escaped = relativePath.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
|
|
291
|
+
await this.client.delete({
|
|
292
|
+
collection_name: name,
|
|
293
|
+
filter: `relativePath == "${escaped}"`,
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
catch (err) {
|
|
297
|
+
throw new VectorDBError(`Failed to delete by path "${relativePath}" from "${name}"`, err);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
async ensureLoaded(name) {
|
|
301
|
+
try {
|
|
302
|
+
const result = await this.client.getLoadState({ collection_name: name });
|
|
303
|
+
if (result.state !== LoadState.LoadStateLoaded) {
|
|
304
|
+
await this.client.loadCollection({ collection_name: name });
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
catch (err) {
|
|
308
|
+
console.warn(`Failed to ensure collection "${name}" is loaded: ${err}`);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
async waitForLoad(name, timeoutMs = 30_000) {
|
|
312
|
+
await this.client.loadCollection({ collection_name: name });
|
|
313
|
+
const start = Date.now();
|
|
314
|
+
while (Date.now() - start < timeoutMs) {
|
|
315
|
+
try {
|
|
316
|
+
const result = await this.client.getLoadState({ collection_name: name });
|
|
317
|
+
if (result.state === LoadState.LoadStateLoaded)
|
|
318
|
+
return;
|
|
319
|
+
}
|
|
320
|
+
catch (err) {
|
|
321
|
+
console.warn(`Load state check failed for "${name}": ${err}`);
|
|
322
|
+
}
|
|
323
|
+
await new Promise(r => setTimeout(r, 500));
|
|
324
|
+
}
|
|
325
|
+
throw new VectorDBError(`Collection "${name}" failed to load within ${timeoutMs}ms`);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
//# sourceMappingURL=milvus.js.map
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { VectorDB, CodeDocument, HybridSearchParams, SearchResult } from './types.js';
|
|
2
|
+
export declare const RRF_K = 5;
|
|
3
|
+
export declare const RRF_ALPHA = 0.7;
|
|
4
|
+
export declare class QdrantVectorDB implements VectorDB {
|
|
5
|
+
private client;
|
|
6
|
+
constructor(url?: string, apiKey?: string);
|
|
7
|
+
createCollection(name: string, dimension: number): Promise<void>;
|
|
8
|
+
hasCollection(name: string): Promise<boolean>;
|
|
9
|
+
dropCollection(name: string): Promise<void>;
|
|
10
|
+
insert(name: string, documents: CodeDocument[]): Promise<void>;
|
|
11
|
+
search(name: string, params: HybridSearchParams): Promise<SearchResult[]>;
|
|
12
|
+
deleteByPath(name: string, relativePath: string): Promise<void>;
|
|
13
|
+
}
|
|
14
|
+
interface RankedPoint {
|
|
15
|
+
id: string | number;
|
|
16
|
+
payload?: Record<string, unknown> | null;
|
|
17
|
+
rawScore: number;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Rank text-match results by normalized term frequency.
|
|
21
|
+
*
|
|
22
|
+
* Qdrant's scroll API returns text-filtered points in storage order (not by
|
|
23
|
+
* relevance). Before feeding these into RRF, we score each result by how many
|
|
24
|
+
* query terms appear in its content, normalized by word count to avoid bias
|
|
25
|
+
* toward longer chunks. Returns points sorted best-first with rawScore attached
|
|
26
|
+
* so RRF can blend rank position with content-based signal.
|
|
27
|
+
*/
|
|
28
|
+
export declare function rankByTermFrequency(points: {
|
|
29
|
+
id: string | number;
|
|
30
|
+
payload?: Record<string, unknown> | null;
|
|
31
|
+
}[], queryText: string): RankedPoint[];
|
|
32
|
+
interface ScoredPayload {
|
|
33
|
+
id: string | number;
|
|
34
|
+
content: string;
|
|
35
|
+
relativePath: string;
|
|
36
|
+
startLine: number;
|
|
37
|
+
endLine: number;
|
|
38
|
+
fileExtension: string;
|
|
39
|
+
language: string;
|
|
40
|
+
}
|
|
41
|
+
export declare function extractPayload(point: {
|
|
42
|
+
id: string | number;
|
|
43
|
+
payload?: Record<string, unknown> | null;
|
|
44
|
+
}): ScoredPayload;
|
|
45
|
+
export declare function reciprocalRankFusion(denseResults: {
|
|
46
|
+
id: string | number;
|
|
47
|
+
score?: number;
|
|
48
|
+
payload?: Record<string, unknown> | null;
|
|
49
|
+
}[], textResults: RankedPoint[], limit: number): SearchResult[];
|
|
50
|
+
export {};
|
|
51
|
+
//# sourceMappingURL=qdrant.d.ts.map
|