@liendev/core 0.20.1 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -26
- package/dist/config/global-config.d.ts +37 -0
- package/dist/config/global-config.d.ts.map +1 -0
- package/dist/config/global-config.js +160 -0
- package/dist/config/global-config.js.map +1 -0
- package/dist/config/merge.js +1 -1
- package/dist/config/merge.js.map +1 -1
- package/dist/config/schema.d.ts +8 -1
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +2 -2
- package/dist/config/schema.js.map +1 -1
- package/dist/config/service.d.ts +2 -25
- package/dist/config/service.d.ts.map +1 -1
- package/dist/config/service.js +4 -79
- package/dist/config/service.js.map +1 -1
- package/dist/constants.d.ts +0 -1
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +0 -3
- package/dist/constants.js.map +1 -1
- package/dist/index.d.ts +20 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +25 -9
- package/dist/index.js.map +1 -1
- package/dist/indexer/ast/chunker.d.ts +2 -0
- package/dist/indexer/ast/chunker.d.ts.map +1 -1
- package/dist/indexer/ast/chunker.js +13 -7
- package/dist/indexer/ast/chunker.js.map +1 -1
- package/dist/indexer/change-detector.d.ts +2 -3
- package/dist/indexer/change-detector.d.ts.map +1 -1
- package/dist/indexer/change-detector.js +19 -34
- package/dist/indexer/change-detector.js.map +1 -1
- package/dist/indexer/chunk-batch-processor.d.ts +2 -2
- package/dist/indexer/chunk-batch-processor.d.ts.map +1 -1
- package/dist/indexer/chunk-batch-processor.js.map +1 -1
- package/dist/indexer/chunker.d.ts +2 -0
- package/dist/indexer/chunker.d.ts.map +1 -1
- package/dist/indexer/chunker.js +9 -5
- package/dist/indexer/chunker.js.map +1 -1
- package/dist/indexer/incremental.d.ts +4 -4
- package/dist/indexer/incremental.d.ts.map +1 -1
- package/dist/indexer/incremental.js +35 -23
- package/dist/indexer/incremental.js.map +1 -1
- package/dist/indexer/index.d.ts +3 -1
- package/dist/indexer/index.d.ts.map +1 -1
- package/dist/indexer/index.js +109 -47
- package/dist/indexer/index.js.map +1 -1
- package/dist/indexer/json-template-chunker.d.ts +4 -1
- package/dist/indexer/json-template-chunker.d.ts.map +1 -1
- package/dist/indexer/json-template-chunker.js +3 -1
- package/dist/indexer/json-template-chunker.js.map +1 -1
- package/dist/indexer/liquid-chunker.d.ts +4 -1
- package/dist/indexer/liquid-chunker.d.ts.map +1 -1
- package/dist/indexer/liquid-chunker.js +16 -14
- package/dist/indexer/liquid-chunker.js.map +1 -1
- package/dist/indexer/progress-tracker.d.ts +3 -1
- package/dist/indexer/progress-tracker.d.ts.map +1 -1
- package/dist/indexer/progress-tracker.js +8 -2
- package/dist/indexer/progress-tracker.js.map +1 -1
- package/dist/indexer/types.d.ts +22 -0
- package/dist/indexer/types.d.ts.map +1 -1
- package/dist/insights/complexity-analyzer.d.ts +6 -5
- package/dist/insights/complexity-analyzer.d.ts.map +1 -1
- package/dist/insights/complexity-analyzer.js +33 -14
- package/dist/insights/complexity-analyzer.js.map +1 -1
- package/dist/test/helpers/mock-embeddings.d.ts +12 -0
- package/dist/test/helpers/mock-embeddings.d.ts.map +1 -0
- package/dist/test/helpers/mock-embeddings.js +43 -0
- package/dist/test/helpers/mock-embeddings.js.map +1 -0
- package/dist/test/helpers/test-db.d.ts +21 -0
- package/dist/test/helpers/test-db.d.ts.map +1 -0
- package/dist/test/helpers/test-db.js +48 -0
- package/dist/test/helpers/test-db.js.map +1 -0
- package/dist/vectordb/factory.d.ts +15 -0
- package/dist/vectordb/factory.d.ts.map +1 -0
- package/dist/vectordb/factory.js +117 -0
- package/dist/vectordb/factory.js.map +1 -0
- package/dist/vectordb/qdrant-payload-mapper.d.ts +80 -0
- package/dist/vectordb/qdrant-payload-mapper.d.ts.map +1 -0
- package/dist/vectordb/qdrant-payload-mapper.js +129 -0
- package/dist/vectordb/qdrant-payload-mapper.js.map +1 -0
- package/dist/vectordb/qdrant.d.ts +201 -0
- package/dist/vectordb/qdrant.d.ts.map +1 -0
- package/dist/vectordb/qdrant.js +659 -0
- package/dist/vectordb/qdrant.js.map +1 -0
- package/dist/vectordb/types.d.ts +26 -1
- package/dist/vectordb/types.d.ts.map +1 -1
- package/package.json +2 -1
- package/dist/config/loader.d.ts +0 -12
- package/dist/config/loader.d.ts.map +0 -1
- package/dist/config/loader.js +0 -46
- package/dist/config/loader.js.map +0 -1
- package/dist/config/migration-manager.d.ts +0 -46
- package/dist/config/migration-manager.d.ts.map +0 -1
- package/dist/config/migration-manager.js +0 -119
- package/dist/config/migration-manager.js.map +0 -1
- package/dist/config/migration.d.ts +0 -20
- package/dist/config/migration.d.ts.map +0 -1
- package/dist/config/migration.js +0 -155
- package/dist/config/migration.js.map +0 -1
|
@@ -0,0 +1,659 @@
|
|
|
1
|
+
import { QdrantClient } from '@qdrant/js-client-rest';
|
|
2
|
+
import crypto from 'crypto';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
import { EMBEDDING_DIMENSION } from '../embeddings/types.js';
|
|
6
|
+
import { calculateRelevance } from './relevance.js';
|
|
7
|
+
import { DatabaseError } from '../errors/index.js';
|
|
8
|
+
import { readVersionFile } from './version.js';
|
|
9
|
+
import { QdrantPayloadMapper } from './qdrant-payload-mapper.js';
|
|
10
|
+
/**
|
|
11
|
+
* Builder class for constructing Qdrant filters.
|
|
12
|
+
* Simplifies filter construction and reduces complexity.
|
|
13
|
+
*/
|
|
14
|
+
class QdrantFilterBuilder {
|
|
15
|
+
filter;
|
|
16
|
+
constructor(orgId) {
|
|
17
|
+
this.filter = {
|
|
18
|
+
must: [{ key: 'orgId', match: { value: orgId } }],
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
addRepoContext(repoId, branch, commitSha) {
|
|
22
|
+
this.filter.must.push({ key: 'repoId', match: { value: repoId } }, { key: 'branch', match: { value: branch } }, { key: 'commitSha', match: { value: commitSha } });
|
|
23
|
+
return this;
|
|
24
|
+
}
|
|
25
|
+
addRepoIds(repoIds) {
|
|
26
|
+
const cleanedRepoIds = repoIds
|
|
27
|
+
.map(id => id.trim())
|
|
28
|
+
.filter(id => id.length > 0);
|
|
29
|
+
// If caller passed repoIds but all were empty/invalid after cleaning,
|
|
30
|
+
// fail fast instead of silently dropping the repoId filter (which would
|
|
31
|
+
// otherwise widen the query to all repos in the org).
|
|
32
|
+
if (repoIds.length > 0 && cleanedRepoIds.length === 0) {
|
|
33
|
+
throw new Error('Invalid repoIds: all provided repoIds are empty or whitespace. ' +
|
|
34
|
+
'Provide at least one non-empty repoId or omit repoIds entirely.');
|
|
35
|
+
}
|
|
36
|
+
if (cleanedRepoIds.length > 0) {
|
|
37
|
+
this.filter.must.push({
|
|
38
|
+
key: 'repoId',
|
|
39
|
+
match: { any: cleanedRepoIds },
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
return this;
|
|
43
|
+
}
|
|
44
|
+
addLanguage(language) {
|
|
45
|
+
const cleanedLanguage = language.trim();
|
|
46
|
+
if (cleanedLanguage.length === 0) {
|
|
47
|
+
throw new Error('Invalid language: language must be a non-empty, non-whitespace string.');
|
|
48
|
+
}
|
|
49
|
+
this.filter.must.push({ key: 'language', match: { value: cleanedLanguage } });
|
|
50
|
+
return this;
|
|
51
|
+
}
|
|
52
|
+
addSymbolType(symbolType) {
|
|
53
|
+
const cleanedSymbolType = symbolType.trim();
|
|
54
|
+
if (cleanedSymbolType.length === 0) {
|
|
55
|
+
throw new Error('Invalid symbolType: symbolType must be a non-empty, non-whitespace string.');
|
|
56
|
+
}
|
|
57
|
+
this.filter.must.push({ key: 'symbolType', match: { value: cleanedSymbolType } });
|
|
58
|
+
return this;
|
|
59
|
+
}
|
|
60
|
+
addPattern(pattern, key = 'file') {
|
|
61
|
+
const cleanedPattern = pattern.trim();
|
|
62
|
+
if (cleanedPattern.length === 0) {
|
|
63
|
+
throw new Error('Invalid pattern: pattern must be a non-empty, non-whitespace string.');
|
|
64
|
+
}
|
|
65
|
+
this.filter.must.push({ key, match: { text: cleanedPattern } });
|
|
66
|
+
return this;
|
|
67
|
+
}
|
|
68
|
+
addBranch(branch) {
|
|
69
|
+
const cleanedBranch = branch.trim();
|
|
70
|
+
// Prevent constructing a filter for an empty/whitespace-only branch,
|
|
71
|
+
// which would search for `branch == ""` and almost certainly return no results.
|
|
72
|
+
if (cleanedBranch.length === 0) {
|
|
73
|
+
throw new Error('Invalid branch: branch must be a non-empty, non-whitespace string.');
|
|
74
|
+
}
|
|
75
|
+
this.filter.must.push({ key: 'branch', match: { value: cleanedBranch } });
|
|
76
|
+
return this;
|
|
77
|
+
}
|
|
78
|
+
build() {
|
|
79
|
+
return this.filter;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Validate filter options for buildBaseFilter.
|
|
84
|
+
*
|
|
85
|
+
* This is a separate function to enable unit testing of validation logic.
|
|
86
|
+
* The validations ensure that conflicting options are not used together.
|
|
87
|
+
*
|
|
88
|
+
* @param options - Filter options to validate
|
|
89
|
+
* @throws Error if conflicting options are detected
|
|
90
|
+
*/
|
|
91
|
+
export function validateFilterOptions(options) {
|
|
92
|
+
// Validate: includeCurrentRepo and repoIds are mutually exclusive
|
|
93
|
+
// Note: `includeCurrentRepo !== false` treats undefined as "enabled" (default behavior).
|
|
94
|
+
// Callers must explicitly pass includeCurrentRepo=false when using repoIds for cross-repo queries.
|
|
95
|
+
if (options.includeCurrentRepo !== false && options.repoIds && options.repoIds.length > 0) {
|
|
96
|
+
throw new Error('Cannot use repoIds when includeCurrentRepo is enabled (the default). ' +
|
|
97
|
+
'These options are mutually exclusive. Set includeCurrentRepo=false to perform cross-repo queries with repoIds.');
|
|
98
|
+
}
|
|
99
|
+
// Validate: branch parameter should only be used when includeCurrentRepo is false.
|
|
100
|
+
// As above, `includeCurrentRepo !== false` treats both undefined and true as "enabled"
|
|
101
|
+
// for the current repo context, so callers must explicitly pass false for cross-repo.
|
|
102
|
+
if (options.branch && options.includeCurrentRepo !== false) {
|
|
103
|
+
throw new Error('Cannot use branch parameter when includeCurrentRepo is enabled (the default). ' +
|
|
104
|
+
'Branch is automatically included via the current repo context. Set includeCurrentRepo=false to specify a branch explicitly.');
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* QdrantDB implements VectorDBInterface using Qdrant vector database.
|
|
109
|
+
*
|
|
110
|
+
* Features:
|
|
111
|
+
* - Multi-tenant support via payload filtering (orgId/repoId)
|
|
112
|
+
* - Branch and commit isolation for PR workflows
|
|
113
|
+
* - Collection naming: `lien_org_{orgId}`
|
|
114
|
+
* - Cross-repo search by omitting repoId filter
|
|
115
|
+
* - Tenant isolation via orgId filtering
|
|
116
|
+
* - Point ID generation includes branch/commit to prevent collisions
|
|
117
|
+
*
|
|
118
|
+
* Data Isolation:
|
|
119
|
+
* All queries are filtered by orgId, repoId, branch, and commitSha by default.
|
|
120
|
+
* This ensures that different branches and commits have isolated data, preventing
|
|
121
|
+
* PRs from overwriting each other's indices. Use cross-repo methods (searchCrossRepo,
|
|
122
|
+
* scanCrossRepo) to query across repositories within an organization.
|
|
123
|
+
*/
|
|
124
|
+
export class QdrantDB {
|
|
125
|
+
client;
|
|
126
|
+
collectionName;
|
|
127
|
+
orgId;
|
|
128
|
+
repoId;
|
|
129
|
+
branch;
|
|
130
|
+
commitSha;
|
|
131
|
+
initialized = false;
|
|
132
|
+
dbPath; // For compatibility with manifest/version file operations
|
|
133
|
+
lastVersionCheck = 0;
|
|
134
|
+
currentVersion = 0;
|
|
135
|
+
payloadMapper;
|
|
136
|
+
constructor(url, apiKey, orgId, projectRoot, branch, commitSha) {
|
|
137
|
+
this.client = new QdrantClient({
|
|
138
|
+
url,
|
|
139
|
+
apiKey, // Optional, required for Qdrant Cloud
|
|
140
|
+
});
|
|
141
|
+
this.orgId = orgId;
|
|
142
|
+
this.repoId = this.extractRepoId(projectRoot);
|
|
143
|
+
this.branch = branch;
|
|
144
|
+
this.commitSha = commitSha;
|
|
145
|
+
// Collection naming: one per org
|
|
146
|
+
this.collectionName = `lien_org_${orgId}`;
|
|
147
|
+
// Initialize payload mapper
|
|
148
|
+
this.payloadMapper = new QdrantPayloadMapper(this.orgId, this.repoId, this.branch, this.commitSha);
|
|
149
|
+
// dbPath is used for manifest and version files (stored locally even with Qdrant)
|
|
150
|
+
// Use same path structure as LanceDB for consistency
|
|
151
|
+
const projectName = path.basename(projectRoot);
|
|
152
|
+
const pathHash = crypto
|
|
153
|
+
.createHash('md5')
|
|
154
|
+
.update(projectRoot)
|
|
155
|
+
.digest('hex')
|
|
156
|
+
.substring(0, 8);
|
|
157
|
+
this.dbPath = path.join(os.homedir(), '.lien', 'indices', `${projectName}-${pathHash}`);
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Extract repository identifier from project root.
|
|
161
|
+
* Uses project name + path hash for stable, unique identification.
|
|
162
|
+
*/
|
|
163
|
+
extractRepoId(projectRoot) {
|
|
164
|
+
const projectName = path.basename(projectRoot);
|
|
165
|
+
const pathHash = crypto
|
|
166
|
+
.createHash('md5')
|
|
167
|
+
.update(projectRoot)
|
|
168
|
+
.digest('hex')
|
|
169
|
+
.substring(0, 8);
|
|
170
|
+
return `${projectName}-${pathHash}`;
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Generate a unique point ID from chunk metadata.
|
|
174
|
+
* Uses hash of file path + line range + branch + commitSha for stable identification.
|
|
175
|
+
* Includes branch/commit to prevent ID collisions across branches.
|
|
176
|
+
*
|
|
177
|
+
* **Hash Algorithm Choice:**
|
|
178
|
+
* Uses MD5 for performance and collision likelihood acceptable for this use case.
|
|
179
|
+
* - MD5 is deprecated for cryptographic purposes but suitable for non-security ID generation
|
|
180
|
+
* - Collision probability is extremely low: ~1 in 2^64 for random inputs
|
|
181
|
+
* - Input includes file path, line range, branch, and commit SHA, making collisions
|
|
182
|
+
* even less likely in practice
|
|
183
|
+
* - For typical codebases (thousands to hundreds of thousands of chunks), collision risk
|
|
184
|
+
* is negligible
|
|
185
|
+
* - If scaling to millions of chunks across many repos, consider upgrading to SHA-256
|
|
186
|
+
* for additional collision resistance (at ~10% performance cost)
|
|
187
|
+
*/
|
|
188
|
+
generatePointId(metadata) {
|
|
189
|
+
const idString = `${metadata.file}:${metadata.startLine}:${metadata.endLine}:${this.branch}:${this.commitSha}`;
|
|
190
|
+
return crypto.createHash('md5').update(idString).digest('hex');
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Build base filter for Qdrant queries.
|
|
194
|
+
* Uses builder pattern to simplify filter construction.
|
|
195
|
+
*
|
|
196
|
+
* **Important constraints:**
|
|
197
|
+
* - `includeCurrentRepo` and `repoIds` are mutually exclusive.
|
|
198
|
+
* - `includeCurrentRepo` defaults to `true` when `undefined` (treats `undefined` as "enabled").
|
|
199
|
+
* - To use `repoIds` for cross-repo queries, you must explicitly pass `includeCurrentRepo: false`.
|
|
200
|
+
* - The `branch` parameter can only be used when `includeCurrentRepo` is explicitly `false`.
|
|
201
|
+
* When `includeCurrentRepo` is enabled (default), branch is automatically included via
|
|
202
|
+
* the current repo context (`addRepoContext`).
|
|
203
|
+
*
|
|
204
|
+
* @param options - Filter options
|
|
205
|
+
* @param options.includeCurrentRepo - Whether to filter by current repo context (default: true when undefined).
|
|
206
|
+
* Must be explicitly `false` to use `repoIds` or `branch` parameters.
|
|
207
|
+
* @param options.repoIds - Repository IDs to filter by (requires `includeCurrentRepo: false`).
|
|
208
|
+
* @param options.branch - Branch name to filter by (requires `includeCurrentRepo: false`).
|
|
209
|
+
* @returns Qdrant filter object
|
|
210
|
+
*/
|
|
211
|
+
buildBaseFilter(options) {
|
|
212
|
+
// Validate filter options (extracted to enable unit testing)
|
|
213
|
+
validateFilterOptions({
|
|
214
|
+
repoIds: options.repoIds,
|
|
215
|
+
branch: options.branch,
|
|
216
|
+
includeCurrentRepo: options.includeCurrentRepo,
|
|
217
|
+
});
|
|
218
|
+
const builder = new QdrantFilterBuilder(this.orgId);
|
|
219
|
+
if (options.includeCurrentRepo !== false) {
|
|
220
|
+
builder.addRepoContext(this.repoId, this.branch, this.commitSha);
|
|
221
|
+
}
|
|
222
|
+
if (options.repoIds) {
|
|
223
|
+
builder.addRepoIds(options.repoIds);
|
|
224
|
+
}
|
|
225
|
+
// Validate language is non-empty if explicitly provided (even if empty string)
|
|
226
|
+
if (options.language !== undefined) {
|
|
227
|
+
builder.addLanguage(options.language);
|
|
228
|
+
}
|
|
229
|
+
// Validate symbolType is non-empty if explicitly provided (even if empty string)
|
|
230
|
+
if (options.symbolType !== undefined) {
|
|
231
|
+
builder.addSymbolType(options.symbolType);
|
|
232
|
+
}
|
|
233
|
+
// Validate pattern is non-empty if explicitly provided (even if empty string)
|
|
234
|
+
if (options.pattern !== undefined) {
|
|
235
|
+
builder.addPattern(options.pattern, options.patternKey);
|
|
236
|
+
}
|
|
237
|
+
// Only add branch filter when includeCurrentRepo is false
|
|
238
|
+
// When includeCurrentRepo is true, branch is already added via addRepoContext
|
|
239
|
+
// Validate branch is non-empty if explicitly provided (even if empty string)
|
|
240
|
+
if (options.branch !== undefined && options.includeCurrentRepo === false) {
|
|
241
|
+
// addBranch will validate that branch is non-empty and non-whitespace
|
|
242
|
+
builder.addBranch(options.branch);
|
|
243
|
+
}
|
|
244
|
+
return builder.build();
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Map Qdrant scroll results to SearchResult format.
|
|
248
|
+
*
|
|
249
|
+
* Note: Scroll/scan operations do not compute semantic similarity scores.
|
|
250
|
+
* For these results, score is always 0 and relevance is set to 'not_relevant'
|
|
251
|
+
* to indicate that the results are unscored (not that they are useless).
|
|
252
|
+
*/
|
|
253
|
+
mapScrollResults(results) {
|
|
254
|
+
return (results.points || []).map((point) => ({
|
|
255
|
+
content: point.payload?.content || '',
|
|
256
|
+
metadata: this.payloadMapper.fromPayload(point.payload || {}),
|
|
257
|
+
score: 0,
|
|
258
|
+
relevance: 'not_relevant',
|
|
259
|
+
}));
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Execute a scroll query with error handling.
|
|
263
|
+
*/
|
|
264
|
+
async executeScrollQuery(filter, limit, errorContext) {
|
|
265
|
+
if (!this.initialized) {
|
|
266
|
+
throw new DatabaseError('Qdrant database not initialized');
|
|
267
|
+
}
|
|
268
|
+
try {
|
|
269
|
+
const results = await this.client.scroll(this.collectionName, {
|
|
270
|
+
filter,
|
|
271
|
+
limit,
|
|
272
|
+
with_payload: true,
|
|
273
|
+
with_vector: false,
|
|
274
|
+
});
|
|
275
|
+
return this.mapScrollResults(results);
|
|
276
|
+
}
|
|
277
|
+
catch (error) {
|
|
278
|
+
throw new DatabaseError(`Failed to ${errorContext}: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
async initialize() {
|
|
282
|
+
try {
|
|
283
|
+
// Check if collection exists (returns { exists: boolean })
|
|
284
|
+
const collectionCheck = await this.client.collectionExists(this.collectionName);
|
|
285
|
+
if (!collectionCheck.exists) {
|
|
286
|
+
// Create collection with proper vector configuration
|
|
287
|
+
await this.client.createCollection(this.collectionName, {
|
|
288
|
+
vectors: {
|
|
289
|
+
size: EMBEDDING_DIMENSION,
|
|
290
|
+
distance: 'Cosine',
|
|
291
|
+
},
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
// Read and cache the current version
|
|
295
|
+
try {
|
|
296
|
+
this.currentVersion = await readVersionFile(this.dbPath);
|
|
297
|
+
}
|
|
298
|
+
catch {
|
|
299
|
+
// Version file doesn't exist yet, will be created on first index
|
|
300
|
+
this.currentVersion = 0;
|
|
301
|
+
}
|
|
302
|
+
this.initialized = true;
|
|
303
|
+
}
|
|
304
|
+
catch (error) {
|
|
305
|
+
throw new DatabaseError(`Failed to initialize Qdrant database: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Validate batch input arrays have matching lengths.
|
|
310
|
+
*/
|
|
311
|
+
validateBatchInputs(vectors, metadatas, contents) {
|
|
312
|
+
if (!this.initialized) {
|
|
313
|
+
throw new DatabaseError('Qdrant database not initialized');
|
|
314
|
+
}
|
|
315
|
+
if (vectors.length !== metadatas.length || vectors.length !== contents.length) {
|
|
316
|
+
throw new DatabaseError('Vectors, metadatas, and contents arrays must have the same length', {
|
|
317
|
+
vectorsLength: vectors.length,
|
|
318
|
+
metadatasLength: metadatas.length,
|
|
319
|
+
contentsLength: contents.length,
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Prepare Qdrant points from vectors, metadatas, and contents.
|
|
325
|
+
*/
|
|
326
|
+
preparePoints(vectors, metadatas, contents) {
|
|
327
|
+
return vectors.map((vector, i) => {
|
|
328
|
+
const metadata = metadatas[i];
|
|
329
|
+
const payload = this.payloadMapper.toPayload(metadata, contents[i]);
|
|
330
|
+
return {
|
|
331
|
+
id: this.generatePointId(metadata),
|
|
332
|
+
vector: Array.from(vector),
|
|
333
|
+
payload,
|
|
334
|
+
};
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
async insertBatch(vectors, metadatas, contents) {
|
|
338
|
+
this.validateBatchInputs(vectors, metadatas, contents);
|
|
339
|
+
if (vectors.length === 0) {
|
|
340
|
+
return; // No-op for empty batches
|
|
341
|
+
}
|
|
342
|
+
try {
|
|
343
|
+
const points = this.preparePoints(vectors, metadatas, contents);
|
|
344
|
+
// Upsert points in batches (Qdrant recommends batches of 100-1000)
|
|
345
|
+
const batchSize = 100;
|
|
346
|
+
for (let i = 0; i < points.length; i += batchSize) {
|
|
347
|
+
const batch = points.slice(i, Math.min(i + batchSize, points.length));
|
|
348
|
+
await this.client.upsert(this.collectionName, {
|
|
349
|
+
wait: true,
|
|
350
|
+
points: batch,
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
catch (error) {
|
|
355
|
+
throw new DatabaseError(`Failed to insert batch into Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
async search(queryVector, limit = 5, _query // Optional query string (not used in vector search, but kept for interface compatibility)
|
|
359
|
+
) {
|
|
360
|
+
if (!this.initialized) {
|
|
361
|
+
throw new DatabaseError('Qdrant database not initialized');
|
|
362
|
+
}
|
|
363
|
+
try {
|
|
364
|
+
// Search with tenant isolation (filter by orgId, repoId, branch, and commitSha)
|
|
365
|
+
const results = await this.client.search(this.collectionName, {
|
|
366
|
+
vector: Array.from(queryVector),
|
|
367
|
+
limit,
|
|
368
|
+
filter: {
|
|
369
|
+
must: [
|
|
370
|
+
{ key: 'orgId', match: { value: this.orgId } },
|
|
371
|
+
{ key: 'repoId', match: { value: this.repoId } },
|
|
372
|
+
{ key: 'branch', match: { value: this.branch } },
|
|
373
|
+
{ key: 'commitSha', match: { value: this.commitSha } },
|
|
374
|
+
],
|
|
375
|
+
},
|
|
376
|
+
});
|
|
377
|
+
return results.map(result => ({
|
|
378
|
+
content: result.payload?.content || '',
|
|
379
|
+
metadata: this.payloadMapper.fromPayload(result.payload || {}),
|
|
380
|
+
score: result.score || 0,
|
|
381
|
+
relevance: calculateRelevance(result.score || 0),
|
|
382
|
+
}));
|
|
383
|
+
}
|
|
384
|
+
catch (error) {
|
|
385
|
+
throw new DatabaseError(`Failed to search Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Search across all repos in the organization (cross-repo search).
|
|
390
|
+
*
|
|
391
|
+
* - Omits repoId filter by default to enable true cross-repo queries.
|
|
392
|
+
* - When repoIds are provided, restricts results to those repositories only.
|
|
393
|
+
* - When branch is omitted, returns chunks from all branches and commits
|
|
394
|
+
* (including historical PR branches and stale commits).
|
|
395
|
+
* - When branch is provided, filters by branch name only and still returns
|
|
396
|
+
* chunks from all commits on that branch across the selected repos.
|
|
397
|
+
*
|
|
398
|
+
* This is a low-level primitive for cross-repo augmentation. Higher-level
|
|
399
|
+
* workflows (e.g. \"latest commit only\") should be built on top of this API.
|
|
400
|
+
*
|
|
401
|
+
* @param queryVector - Query vector for semantic search
|
|
402
|
+
* @param limit - Maximum number of results to return (default: 5)
|
|
403
|
+
* @param options - Optional search options
|
|
404
|
+
* @param options.repoIds - Repository IDs to filter by (optional)
|
|
405
|
+
* @param options.branch - Branch name to filter by (optional)
|
|
406
|
+
*/
|
|
407
|
+
async searchCrossRepo(queryVector, limit = 5, options) {
|
|
408
|
+
if (!this.initialized) {
|
|
409
|
+
throw new DatabaseError('Qdrant database not initialized');
|
|
410
|
+
}
|
|
411
|
+
try {
|
|
412
|
+
// Use buildBaseFilter for consistency with scanCrossRepo and other methods
|
|
413
|
+
// This provides automatic validation for empty repoIds arrays, whitespace-only branches, etc.
|
|
414
|
+
const filter = this.buildBaseFilter({
|
|
415
|
+
includeCurrentRepo: false,
|
|
416
|
+
repoIds: options?.repoIds,
|
|
417
|
+
branch: options?.branch,
|
|
418
|
+
});
|
|
419
|
+
const results = await this.client.search(this.collectionName, {
|
|
420
|
+
vector: Array.from(queryVector),
|
|
421
|
+
limit,
|
|
422
|
+
filter,
|
|
423
|
+
});
|
|
424
|
+
return results.map(result => ({
|
|
425
|
+
content: result.payload?.content || '',
|
|
426
|
+
metadata: this.payloadMapper.fromPayload(result.payload || {}),
|
|
427
|
+
score: result.score || 0,
|
|
428
|
+
relevance: calculateRelevance(result.score || 0),
|
|
429
|
+
}));
|
|
430
|
+
}
|
|
431
|
+
catch (error) {
|
|
432
|
+
throw new DatabaseError(`Failed to search Qdrant (cross-repo): ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
async scanWithFilter(options) {
|
|
436
|
+
const filter = this.buildBaseFilter({
|
|
437
|
+
language: options.language,
|
|
438
|
+
pattern: options.pattern,
|
|
439
|
+
patternKey: 'file',
|
|
440
|
+
includeCurrentRepo: true,
|
|
441
|
+
});
|
|
442
|
+
return this.executeScrollQuery(filter, options.limit || 100, 'scan Qdrant');
|
|
443
|
+
}
|
|
444
|
+
async scanAll(options = {}) {
|
|
445
|
+
// Use scanWithFilter with a high limit to get all chunks
|
|
446
|
+
return this.scanWithFilter({
|
|
447
|
+
...options,
|
|
448
|
+
limit: 100000, // High limit for "all" chunks
|
|
449
|
+
});
|
|
450
|
+
}
|
|
451
|
+
/**
|
|
452
|
+
* Scan with filter across all repos in the organization (cross-repo).
|
|
453
|
+
*
|
|
454
|
+
* - Omits repoId filter by default to enable true cross-repo scans.
|
|
455
|
+
* - When repoIds are provided, restricts results to those repositories only.
|
|
456
|
+
* - When branch is omitted, returns chunks from all branches and commits
|
|
457
|
+
* (including historical PR branches and stale commits).
|
|
458
|
+
* - When branch is provided, filters by branch name only and still returns
|
|
459
|
+
* chunks from all commits on that branch across the selected repos.
|
|
460
|
+
*
|
|
461
|
+
* Like searchCrossRepo, this is a low-level primitive. Higher-level behavior
|
|
462
|
+
* such as \"latest commit only\" should be implemented in orchestrating code.
|
|
463
|
+
*/
|
|
464
|
+
async scanCrossRepo(options) {
|
|
465
|
+
const filter = this.buildBaseFilter({
|
|
466
|
+
language: options.language,
|
|
467
|
+
pattern: options.pattern,
|
|
468
|
+
patternKey: 'file',
|
|
469
|
+
repoIds: options.repoIds,
|
|
470
|
+
branch: options.branch,
|
|
471
|
+
includeCurrentRepo: false, // Cross-repo: don't filter by current repo
|
|
472
|
+
});
|
|
473
|
+
return this.executeScrollQuery(filter, options.limit || 10000, // Higher default for cross-repo
|
|
474
|
+
'scan Qdrant (cross-repo)');
|
|
475
|
+
}
|
|
476
|
+
async querySymbols(options) {
|
|
477
|
+
const filter = this.buildBaseFilter({
|
|
478
|
+
language: options.language,
|
|
479
|
+
pattern: options.pattern,
|
|
480
|
+
patternKey: 'symbolName',
|
|
481
|
+
symbolType: options.symbolType,
|
|
482
|
+
includeCurrentRepo: true,
|
|
483
|
+
});
|
|
484
|
+
return this.executeScrollQuery(filter, options.limit || 100, 'query symbols in Qdrant');
|
|
485
|
+
}
|
|
486
|
+
async clear() {
|
|
487
|
+
if (!this.initialized) {
|
|
488
|
+
throw new DatabaseError('Qdrant database not initialized');
|
|
489
|
+
}
|
|
490
|
+
try {
|
|
491
|
+
// Check if collection exists before trying to clear it (returns { exists: boolean })
|
|
492
|
+
const collectionCheck = await this.client.collectionExists(this.collectionName);
|
|
493
|
+
if (!collectionCheck.exists) {
|
|
494
|
+
// Collection doesn't exist yet, nothing to clear
|
|
495
|
+
return;
|
|
496
|
+
}
|
|
497
|
+
// Delete all points for this repository and branch/commit only
|
|
498
|
+
// This ensures we only clear the current branch's data, not all branches
|
|
499
|
+
await this.client.delete(this.collectionName, {
|
|
500
|
+
filter: {
|
|
501
|
+
must: [
|
|
502
|
+
{ key: 'orgId', match: { value: this.orgId } },
|
|
503
|
+
{ key: 'repoId', match: { value: this.repoId } },
|
|
504
|
+
{ key: 'branch', match: { value: this.branch } },
|
|
505
|
+
{ key: 'commitSha', match: { value: this.commitSha } },
|
|
506
|
+
],
|
|
507
|
+
},
|
|
508
|
+
});
|
|
509
|
+
}
|
|
510
|
+
catch (error) {
|
|
511
|
+
throw new DatabaseError(`Failed to clear Qdrant collection: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
/**
|
|
515
|
+
* Clear all data for a specific branch (all commits).
|
|
516
|
+
*
|
|
517
|
+
* Qdrant-only helper: this is not part of the generic VectorDBInterface and
|
|
518
|
+
* is intended for cloud/PR workflows where multiple commits exist per branch.
|
|
519
|
+
* LanceDB and other backends do not implement this method.
|
|
520
|
+
*
|
|
521
|
+
* @param branch - Branch name to clear (defaults to current branch)
|
|
522
|
+
*/
|
|
523
|
+
async clearBranch(branch) {
|
|
524
|
+
if (!this.initialized) {
|
|
525
|
+
throw new DatabaseError('Qdrant database not initialized');
|
|
526
|
+
}
|
|
527
|
+
const targetBranch = branch ?? this.branch;
|
|
528
|
+
try {
|
|
529
|
+
const collectionCheck = await this.client.collectionExists(this.collectionName);
|
|
530
|
+
if (!collectionCheck.exists) {
|
|
531
|
+
// Collection doesn't exist yet, nothing to clear
|
|
532
|
+
return;
|
|
533
|
+
}
|
|
534
|
+
// Delete all points for this repository and branch (all commits)
|
|
535
|
+
await this.client.delete(this.collectionName, {
|
|
536
|
+
filter: {
|
|
537
|
+
must: [
|
|
538
|
+
{ key: 'orgId', match: { value: this.orgId } },
|
|
539
|
+
{ key: 'repoId', match: { value: this.repoId } },
|
|
540
|
+
{ key: 'branch', match: { value: targetBranch } },
|
|
541
|
+
],
|
|
542
|
+
},
|
|
543
|
+
});
|
|
544
|
+
}
|
|
545
|
+
catch (error) {
|
|
546
|
+
throw new DatabaseError(`Failed to clear branch from Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName, branch: targetBranch });
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
async deleteByFile(filepath) {
|
|
550
|
+
if (!this.initialized) {
|
|
551
|
+
throw new DatabaseError('Qdrant database not initialized');
|
|
552
|
+
}
|
|
553
|
+
try {
|
|
554
|
+
await this.client.delete(this.collectionName, {
|
|
555
|
+
filter: {
|
|
556
|
+
must: [
|
|
557
|
+
{ key: 'orgId', match: { value: this.orgId } },
|
|
558
|
+
{ key: 'repoId', match: { value: this.repoId } },
|
|
559
|
+
{ key: 'branch', match: { value: this.branch } },
|
|
560
|
+
{ key: 'commitSha', match: { value: this.commitSha } },
|
|
561
|
+
{ key: 'file', match: { value: filepath } },
|
|
562
|
+
],
|
|
563
|
+
},
|
|
564
|
+
});
|
|
565
|
+
}
|
|
566
|
+
catch (error) {
|
|
567
|
+
throw new DatabaseError(`Failed to delete file from Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName, filepath });
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
async updateFile(filepath, vectors, metadatas, contents) {
|
|
571
|
+
if (!this.initialized) {
|
|
572
|
+
throw new DatabaseError('Qdrant database not initialized');
|
|
573
|
+
}
|
|
574
|
+
if (vectors.length !== metadatas.length || vectors.length !== contents.length) {
|
|
575
|
+
throw new DatabaseError('Vectors, metadatas, and contents arrays must have the same length');
|
|
576
|
+
}
|
|
577
|
+
try {
|
|
578
|
+
// Delete existing chunks for this file
|
|
579
|
+
await this.deleteByFile(filepath);
|
|
580
|
+
// Insert new chunks
|
|
581
|
+
if (vectors.length > 0) {
|
|
582
|
+
await this.insertBatch(vectors, metadatas, contents);
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
catch (error) {
|
|
586
|
+
throw new DatabaseError(`Failed to update file in Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName, filepath });
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
async hasData() {
|
|
590
|
+
if (!this.initialized) {
|
|
591
|
+
return false;
|
|
592
|
+
}
|
|
593
|
+
try {
|
|
594
|
+
const info = await this.client.getCollection(this.collectionName);
|
|
595
|
+
return (info.points_count || 0) > 0;
|
|
596
|
+
}
|
|
597
|
+
catch {
|
|
598
|
+
return false;
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
/**
|
|
602
|
+
* Get the collection name (useful for debugging).
|
|
603
|
+
*/
|
|
604
|
+
getCollectionName() {
|
|
605
|
+
return this.collectionName;
|
|
606
|
+
}
|
|
607
|
+
/**
|
|
608
|
+
* Get the organization ID.
|
|
609
|
+
*/
|
|
610
|
+
getOrgId() {
|
|
611
|
+
return this.orgId;
|
|
612
|
+
}
|
|
613
|
+
/**
|
|
614
|
+
* Get the repository ID.
|
|
615
|
+
*/
|
|
616
|
+
getRepoId() {
|
|
617
|
+
return this.repoId;
|
|
618
|
+
}
|
|
619
|
+
async checkVersion() {
|
|
620
|
+
const now = Date.now();
|
|
621
|
+
// Cache version checks for 1 second to minimize I/O
|
|
622
|
+
if (now - this.lastVersionCheck < 1000) {
|
|
623
|
+
return false;
|
|
624
|
+
}
|
|
625
|
+
this.lastVersionCheck = now;
|
|
626
|
+
try {
|
|
627
|
+
const version = await readVersionFile(this.dbPath);
|
|
628
|
+
if (version > this.currentVersion) {
|
|
629
|
+
this.currentVersion = version;
|
|
630
|
+
return true;
|
|
631
|
+
}
|
|
632
|
+
return false;
|
|
633
|
+
}
|
|
634
|
+
catch (error) {
|
|
635
|
+
// If we can't read version file, don't reconnect
|
|
636
|
+
return false;
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
async reconnect() {
|
|
640
|
+
try {
|
|
641
|
+
// For Qdrant, reconnection just means re-reading the version
|
|
642
|
+
// The client connection is stateless, so we just need to refresh version cache
|
|
643
|
+
await this.initialize();
|
|
644
|
+
}
|
|
645
|
+
catch (error) {
|
|
646
|
+
throw new DatabaseError(`Failed to reconnect to Qdrant database: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
getCurrentVersion() {
|
|
650
|
+
return this.currentVersion;
|
|
651
|
+
}
|
|
652
|
+
getVersionDate() {
|
|
653
|
+
if (this.currentVersion === 0) {
|
|
654
|
+
return 'Unknown';
|
|
655
|
+
}
|
|
656
|
+
return new Date(this.currentVersion).toLocaleString();
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
//# sourceMappingURL=qdrant.js.map
|