@liendev/core 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/indexer/types.d.ts +10 -0
- package/dist/indexer/types.d.ts.map +1 -1
- package/dist/vectordb/factory.d.ts.map +1 -1
- package/dist/vectordb/factory.js +93 -40
- package/dist/vectordb/factory.js.map +1 -1
- package/dist/vectordb/qdrant-payload-mapper.d.ts +34 -1
- package/dist/vectordb/qdrant-payload-mapper.d.ts.map +1 -1
- package/dist/vectordb/qdrant-payload-mapper.js +92 -41
- package/dist/vectordb/qdrant-payload-mapper.js.map +1 -1
- package/dist/vectordb/qdrant.d.ts +119 -5
- package/dist/vectordb/qdrant.d.ts.map +1 -1
- package/dist/vectordb/qdrant.js +333 -136
- package/dist/vectordb/qdrant.js.map +1 -1
- package/dist/vectordb/types.d.ts +15 -0
- package/dist/vectordb/types.d.ts.map +1 -1
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"qdrant.d.ts","sourceRoot":"","sources":["../../src/vectordb/qdrant.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,YAAY,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"qdrant.d.ts","sourceRoot":"","sources":["../../src/vectordb/qdrant.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,YAAY,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AA4HpD;;;;;;;;GAQG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE;IAC7C,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,kBAAkB,CAAC,EAAE,OAAO,CAAC;CAC9B,GAAG,IAAI,CAoBP;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAAkB;IACrC,SAAgB,MAAM,EAAE,MAAM,CAAC;IAC/B,OAAO,CAAC,gBAAgB,CAAa;IACrC,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,aAAa,CAAsB;gBAGzC,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM;IAgCnB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAUrB;;;;;;;;;;;;;;;OAeG;IACH,OAAO,CAAC,eAAe;IAKvB;;;;;;;;;;;;;;;;;;OAkBG;IACH,OAAO,CAAC,eAAe;IAoDvB;;;;;;OAMG;IACH,OAAO,CAAC,gBAAgB;IASxB;;OAEG;YACW,kBAAkB;IA2B1B,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAgCjC;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAkB3B;;OAEG;IACH,OAAO,CAAC,aAAa;IAiBf,WAAW,CACf,OAAO,EAAE,YAAY,EAAE,EACvB,SAAS,EAAE,aAAa,EAAE,EAC1B,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,IAAI,CAAC;IA2BV,MAAM,CACV,WAAW,EAAE,YAAY,EACzB,KAAK,GAAE,MAAU,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,OAAO,CAAC,YAAY,EAAE,CAAC;IAkC1B;;;;;;;;;;;;;;;;;;OAkBG;IACG,eAAe,CACnB,WAAW,EAAE,YAAY,EACzB,KAAK,GAAE,MAAU,EACjB,OAAO,CAAC,EAAE;QACR,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QACnB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GACA,OAAO,CAAC,YAAY,EAAE,CAAC;IAkCpB,cAAc,CAAC,OAAO,EAAE;QAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAWrB,OAAO,CAAC,OAAO,GAAE;QACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;KACb,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAQhC;;;;;;;;;;;;OAYG;IACG,aAAa,CAAC,OAAO,EAAE;QAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QACnB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAiBrB,YAAY,CAAC,OAAO,EAAE;QAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,UAAU,CAAC,EAAE,UAAU,GAAG,OAAO,GAAG,WAAW,CAAC;QAChD,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAYrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAiC5B;;;;;;;;OAQG;IACG,WAAW,CAAC,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAgC3C,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAyB7C,UAAU,CACd,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,YAAY,EAAE,EACvB,SAAS,EAAE,aAAa,EAAE,EAC1B,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,IAAI,CAAC;IAyBV,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IAajC;;OAEG;IACH,iBAAiB,IAAI,MAAM;IAI3B;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,SAAS,IAAI,MAAM;IAIb,YAAY,IAAI,OAAO,CAAC,OAAO,CAAC;IAyBhC,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;IAahC,iBAAiB,IAAI,MAAM;IAI3B,cAAc,IAAI,MAAM;CAMzB"}
|
package/dist/vectordb/qdrant.js
CHANGED
|
@@ -7,36 +7,145 @@ import { calculateRelevance } from './relevance.js';
|
|
|
7
7
|
import { DatabaseError } from '../errors/index.js';
|
|
8
8
|
import { readVersionFile } from './version.js';
|
|
9
9
|
import { QdrantPayloadMapper } from './qdrant-payload-mapper.js';
|
|
10
|
+
/**
|
|
11
|
+
* Builder class for constructing Qdrant filters.
|
|
12
|
+
* Simplifies filter construction and reduces complexity.
|
|
13
|
+
*/
|
|
14
|
+
class QdrantFilterBuilder {
|
|
15
|
+
filter;
|
|
16
|
+
constructor(orgId) {
|
|
17
|
+
this.filter = {
|
|
18
|
+
must: [{ key: 'orgId', match: { value: orgId } }],
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
addRepoContext(repoId, branch, commitSha) {
|
|
22
|
+
this.filter.must.push({ key: 'repoId', match: { value: repoId } }, { key: 'branch', match: { value: branch } }, { key: 'commitSha', match: { value: commitSha } });
|
|
23
|
+
return this;
|
|
24
|
+
}
|
|
25
|
+
addRepoIds(repoIds) {
|
|
26
|
+
const cleanedRepoIds = repoIds
|
|
27
|
+
.map(id => id.trim())
|
|
28
|
+
.filter(id => id.length > 0);
|
|
29
|
+
// If caller passed repoIds but all were empty/invalid after cleaning,
|
|
30
|
+
// fail fast instead of silently dropping the repoId filter (which would
|
|
31
|
+
// otherwise widen the query to all repos in the org).
|
|
32
|
+
if (repoIds.length > 0 && cleanedRepoIds.length === 0) {
|
|
33
|
+
throw new Error('Invalid repoIds: all provided repoIds are empty or whitespace. ' +
|
|
34
|
+
'Provide at least one non-empty repoId or omit repoIds entirely.');
|
|
35
|
+
}
|
|
36
|
+
if (cleanedRepoIds.length > 0) {
|
|
37
|
+
this.filter.must.push({
|
|
38
|
+
key: 'repoId',
|
|
39
|
+
match: { any: cleanedRepoIds },
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
return this;
|
|
43
|
+
}
|
|
44
|
+
addLanguage(language) {
|
|
45
|
+
const cleanedLanguage = language.trim();
|
|
46
|
+
if (cleanedLanguage.length === 0) {
|
|
47
|
+
throw new Error('Invalid language: language must be a non-empty, non-whitespace string.');
|
|
48
|
+
}
|
|
49
|
+
this.filter.must.push({ key: 'language', match: { value: cleanedLanguage } });
|
|
50
|
+
return this;
|
|
51
|
+
}
|
|
52
|
+
addSymbolType(symbolType) {
|
|
53
|
+
const cleanedSymbolType = symbolType.trim();
|
|
54
|
+
if (cleanedSymbolType.length === 0) {
|
|
55
|
+
throw new Error('Invalid symbolType: symbolType must be a non-empty, non-whitespace string.');
|
|
56
|
+
}
|
|
57
|
+
this.filter.must.push({ key: 'symbolType', match: { value: cleanedSymbolType } });
|
|
58
|
+
return this;
|
|
59
|
+
}
|
|
60
|
+
addPattern(pattern, key = 'file') {
|
|
61
|
+
const cleanedPattern = pattern.trim();
|
|
62
|
+
if (cleanedPattern.length === 0) {
|
|
63
|
+
throw new Error('Invalid pattern: pattern must be a non-empty, non-whitespace string.');
|
|
64
|
+
}
|
|
65
|
+
this.filter.must.push({ key, match: { text: cleanedPattern } });
|
|
66
|
+
return this;
|
|
67
|
+
}
|
|
68
|
+
addBranch(branch) {
|
|
69
|
+
const cleanedBranch = branch.trim();
|
|
70
|
+
// Prevent constructing a filter for an empty/whitespace-only branch,
|
|
71
|
+
// which would search for `branch == ""` and almost certainly return no results.
|
|
72
|
+
if (cleanedBranch.length === 0) {
|
|
73
|
+
throw new Error('Invalid branch: branch must be a non-empty, non-whitespace string.');
|
|
74
|
+
}
|
|
75
|
+
this.filter.must.push({ key: 'branch', match: { value: cleanedBranch } });
|
|
76
|
+
return this;
|
|
77
|
+
}
|
|
78
|
+
build() {
|
|
79
|
+
return this.filter;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Validate filter options for buildBaseFilter.
|
|
84
|
+
*
|
|
85
|
+
* This is a separate function to enable unit testing of validation logic.
|
|
86
|
+
* The validations ensure that conflicting options are not used together.
|
|
87
|
+
*
|
|
88
|
+
* @param options - Filter options to validate
|
|
89
|
+
* @throws Error if conflicting options are detected
|
|
90
|
+
*/
|
|
91
|
+
export function validateFilterOptions(options) {
|
|
92
|
+
// Validate: includeCurrentRepo and repoIds are mutually exclusive
|
|
93
|
+
// Note: `includeCurrentRepo !== false` treats undefined as "enabled" (default behavior).
|
|
94
|
+
// Callers must explicitly pass includeCurrentRepo=false when using repoIds for cross-repo queries.
|
|
95
|
+
if (options.includeCurrentRepo !== false && options.repoIds && options.repoIds.length > 0) {
|
|
96
|
+
throw new Error('Cannot use repoIds when includeCurrentRepo is enabled (the default). ' +
|
|
97
|
+
'These options are mutually exclusive. Set includeCurrentRepo=false to perform cross-repo queries with repoIds.');
|
|
98
|
+
}
|
|
99
|
+
// Validate: branch parameter should only be used when includeCurrentRepo is false.
|
|
100
|
+
// As above, `includeCurrentRepo !== false` treats both undefined and true as "enabled"
|
|
101
|
+
// for the current repo context, so callers must explicitly pass false for cross-repo.
|
|
102
|
+
if (options.branch && options.includeCurrentRepo !== false) {
|
|
103
|
+
throw new Error('Cannot use branch parameter when includeCurrentRepo is enabled (the default). ' +
|
|
104
|
+
'Branch is automatically included via the current repo context. Set includeCurrentRepo=false to specify a branch explicitly.');
|
|
105
|
+
}
|
|
106
|
+
}
|
|
10
107
|
/**
|
|
11
108
|
* QdrantDB implements VectorDBInterface using Qdrant vector database.
|
|
12
109
|
*
|
|
13
110
|
* Features:
|
|
14
111
|
* - Multi-tenant support via payload filtering (orgId/repoId)
|
|
112
|
+
* - Branch and commit isolation for PR workflows
|
|
15
113
|
* - Collection naming: `lien_org_{orgId}`
|
|
16
114
|
* - Cross-repo search by omitting repoId filter
|
|
17
115
|
* - Tenant isolation via orgId filtering
|
|
116
|
+
* - Point ID generation includes branch/commit to prevent collisions
|
|
117
|
+
*
|
|
118
|
+
* Data Isolation:
|
|
119
|
+
* All queries are filtered by orgId, repoId, branch, and commitSha by default.
|
|
120
|
+
* This ensures that different branches and commits have isolated data, preventing
|
|
121
|
+
* PRs from overwriting each other's indices. Use cross-repo methods (searchCrossRepo,
|
|
122
|
+
* scanCrossRepo) to query across repositories within an organization.
|
|
18
123
|
*/
|
|
19
124
|
export class QdrantDB {
|
|
20
125
|
client;
|
|
21
126
|
collectionName;
|
|
22
127
|
orgId;
|
|
23
128
|
repoId;
|
|
129
|
+
branch;
|
|
130
|
+
commitSha;
|
|
24
131
|
initialized = false;
|
|
25
132
|
dbPath; // For compatibility with manifest/version file operations
|
|
26
133
|
lastVersionCheck = 0;
|
|
27
134
|
currentVersion = 0;
|
|
28
135
|
payloadMapper;
|
|
29
|
-
constructor(url, apiKey, orgId, projectRoot) {
|
|
136
|
+
constructor(url, apiKey, orgId, projectRoot, branch, commitSha) {
|
|
30
137
|
this.client = new QdrantClient({
|
|
31
138
|
url,
|
|
32
139
|
apiKey, // Optional, required for Qdrant Cloud
|
|
33
140
|
});
|
|
34
141
|
this.orgId = orgId;
|
|
35
142
|
this.repoId = this.extractRepoId(projectRoot);
|
|
143
|
+
this.branch = branch;
|
|
144
|
+
this.commitSha = commitSha;
|
|
36
145
|
// Collection naming: one per org
|
|
37
146
|
this.collectionName = `lien_org_${orgId}`;
|
|
38
147
|
// Initialize payload mapper
|
|
39
|
-
this.payloadMapper = new QdrantPayloadMapper(this.orgId, this.repoId);
|
|
148
|
+
this.payloadMapper = new QdrantPayloadMapper(this.orgId, this.repoId, this.branch, this.commitSha);
|
|
40
149
|
// dbPath is used for manifest and version files (stored locally even with Qdrant)
|
|
41
150
|
// Use same path structure as LanceDB for consistency
|
|
42
151
|
const projectName = path.basename(projectRoot);
|
|
@@ -62,12 +171,113 @@ export class QdrantDB {
|
|
|
62
171
|
}
|
|
63
172
|
/**
|
|
64
173
|
* Generate a unique point ID from chunk metadata.
|
|
65
|
-
* Uses hash of file path + line range for stable identification.
|
|
174
|
+
* Uses hash of file path + line range + branch + commitSha for stable identification.
|
|
175
|
+
* Includes branch/commit to prevent ID collisions across branches.
|
|
176
|
+
*
|
|
177
|
+
* **Hash Algorithm Choice:**
|
|
178
|
+
* Uses MD5 for performance and collision likelihood acceptable for this use case.
|
|
179
|
+
* - MD5 is deprecated for cryptographic purposes but suitable for non-security ID generation
|
|
180
|
+
* - Collision probability is extremely low: ~1 in 2^64 for random inputs
|
|
181
|
+
* - Input includes file path, line range, branch, and commit SHA, making collisions
|
|
182
|
+
* even less likely in practice
|
|
183
|
+
* - For typical codebases (thousands to hundreds of thousands of chunks), collision risk
|
|
184
|
+
* is negligible
|
|
185
|
+
* - If scaling to millions of chunks across many repos, consider upgrading to SHA-256
|
|
186
|
+
* for additional collision resistance (at ~10% performance cost)
|
|
66
187
|
*/
|
|
67
188
|
generatePointId(metadata) {
|
|
68
|
-
const idString = `${metadata.file}:${metadata.startLine}:${metadata.endLine}`;
|
|
189
|
+
const idString = `${metadata.file}:${metadata.startLine}:${metadata.endLine}:${this.branch}:${this.commitSha}`;
|
|
69
190
|
return crypto.createHash('md5').update(idString).digest('hex');
|
|
70
191
|
}
|
|
192
|
+
/**
|
|
193
|
+
* Build base filter for Qdrant queries.
|
|
194
|
+
* Uses builder pattern to simplify filter construction.
|
|
195
|
+
*
|
|
196
|
+
* **Important constraints:**
|
|
197
|
+
* - `includeCurrentRepo` and `repoIds` are mutually exclusive.
|
|
198
|
+
* - `includeCurrentRepo` defaults to `true` when `undefined` (treats `undefined` as "enabled").
|
|
199
|
+
* - To use `repoIds` for cross-repo queries, you must explicitly pass `includeCurrentRepo: false`.
|
|
200
|
+
* - The `branch` parameter can only be used when `includeCurrentRepo` is explicitly `false`.
|
|
201
|
+
* When `includeCurrentRepo` is enabled (default), branch is automatically included via
|
|
202
|
+
* the current repo context (`addRepoContext`).
|
|
203
|
+
*
|
|
204
|
+
* @param options - Filter options
|
|
205
|
+
* @param options.includeCurrentRepo - Whether to filter by current repo context (default: true when undefined).
|
|
206
|
+
* Must be explicitly `false` to use `repoIds` or `branch` parameters.
|
|
207
|
+
* @param options.repoIds - Repository IDs to filter by (requires `includeCurrentRepo: false`).
|
|
208
|
+
* @param options.branch - Branch name to filter by (requires `includeCurrentRepo: false`).
|
|
209
|
+
* @returns Qdrant filter object
|
|
210
|
+
*/
|
|
211
|
+
buildBaseFilter(options) {
|
|
212
|
+
// Validate filter options (extracted to enable unit testing)
|
|
213
|
+
validateFilterOptions({
|
|
214
|
+
repoIds: options.repoIds,
|
|
215
|
+
branch: options.branch,
|
|
216
|
+
includeCurrentRepo: options.includeCurrentRepo,
|
|
217
|
+
});
|
|
218
|
+
const builder = new QdrantFilterBuilder(this.orgId);
|
|
219
|
+
if (options.includeCurrentRepo !== false) {
|
|
220
|
+
builder.addRepoContext(this.repoId, this.branch, this.commitSha);
|
|
221
|
+
}
|
|
222
|
+
if (options.repoIds) {
|
|
223
|
+
builder.addRepoIds(options.repoIds);
|
|
224
|
+
}
|
|
225
|
+
// Validate language is non-empty if explicitly provided (even if empty string)
|
|
226
|
+
if (options.language !== undefined) {
|
|
227
|
+
builder.addLanguage(options.language);
|
|
228
|
+
}
|
|
229
|
+
// Validate symbolType is non-empty if explicitly provided (even if empty string)
|
|
230
|
+
if (options.symbolType !== undefined) {
|
|
231
|
+
builder.addSymbolType(options.symbolType);
|
|
232
|
+
}
|
|
233
|
+
// Validate pattern is non-empty if explicitly provided (even if empty string)
|
|
234
|
+
if (options.pattern !== undefined) {
|
|
235
|
+
builder.addPattern(options.pattern, options.patternKey);
|
|
236
|
+
}
|
|
237
|
+
// Only add branch filter when includeCurrentRepo is false
|
|
238
|
+
// When includeCurrentRepo is true, branch is already added via addRepoContext
|
|
239
|
+
// Validate branch is non-empty if explicitly provided (even if empty string)
|
|
240
|
+
if (options.branch !== undefined && options.includeCurrentRepo === false) {
|
|
241
|
+
// addBranch will validate that branch is non-empty and non-whitespace
|
|
242
|
+
builder.addBranch(options.branch);
|
|
243
|
+
}
|
|
244
|
+
return builder.build();
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Map Qdrant scroll results to SearchResult format.
|
|
248
|
+
*
|
|
249
|
+
* Note: Scroll/scan operations do not compute semantic similarity scores.
|
|
250
|
+
* For these results, score is always 0 and relevance is set to 'not_relevant'
|
|
251
|
+
* to indicate that the results are unscored (not that they are useless).
|
|
252
|
+
*/
|
|
253
|
+
mapScrollResults(results) {
|
|
254
|
+
return (results.points || []).map((point) => ({
|
|
255
|
+
content: point.payload?.content || '',
|
|
256
|
+
metadata: this.payloadMapper.fromPayload(point.payload || {}),
|
|
257
|
+
score: 0,
|
|
258
|
+
relevance: 'not_relevant',
|
|
259
|
+
}));
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Execute a scroll query with error handling.
|
|
263
|
+
*/
|
|
264
|
+
async executeScrollQuery(filter, limit, errorContext) {
|
|
265
|
+
if (!this.initialized) {
|
|
266
|
+
throw new DatabaseError('Qdrant database not initialized');
|
|
267
|
+
}
|
|
268
|
+
try {
|
|
269
|
+
const results = await this.client.scroll(this.collectionName, {
|
|
270
|
+
filter,
|
|
271
|
+
limit,
|
|
272
|
+
with_payload: true,
|
|
273
|
+
with_vector: false,
|
|
274
|
+
});
|
|
275
|
+
return this.mapScrollResults(results);
|
|
276
|
+
}
|
|
277
|
+
catch (error) {
|
|
278
|
+
throw new DatabaseError(`Failed to ${errorContext}: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
279
|
+
}
|
|
280
|
+
}
|
|
71
281
|
async initialize() {
|
|
72
282
|
try {
|
|
73
283
|
// Check if collection exists (returns { exists: boolean })
|
|
@@ -95,7 +305,10 @@ export class QdrantDB {
|
|
|
95
305
|
throw new DatabaseError(`Failed to initialize Qdrant database: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
96
306
|
}
|
|
97
307
|
}
|
|
98
|
-
|
|
308
|
+
/**
|
|
309
|
+
* Validate batch input arrays have matching lengths.
|
|
310
|
+
*/
|
|
311
|
+
validateBatchInputs(vectors, metadatas, contents) {
|
|
99
312
|
if (!this.initialized) {
|
|
100
313
|
throw new DatabaseError('Qdrant database not initialized');
|
|
101
314
|
}
|
|
@@ -106,20 +319,28 @@ export class QdrantDB {
|
|
|
106
319
|
contentsLength: contents.length,
|
|
107
320
|
});
|
|
108
321
|
}
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Prepare Qdrant points from vectors, metadatas, and contents.
|
|
325
|
+
*/
|
|
326
|
+
preparePoints(vectors, metadatas, contents) {
|
|
327
|
+
return vectors.map((vector, i) => {
|
|
328
|
+
const metadata = metadatas[i];
|
|
329
|
+
const payload = this.payloadMapper.toPayload(metadata, contents[i]);
|
|
330
|
+
return {
|
|
331
|
+
id: this.generatePointId(metadata),
|
|
332
|
+
vector: Array.from(vector),
|
|
333
|
+
payload,
|
|
334
|
+
};
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
async insertBatch(vectors, metadatas, contents) {
|
|
338
|
+
this.validateBatchInputs(vectors, metadatas, contents);
|
|
109
339
|
if (vectors.length === 0) {
|
|
110
340
|
return; // No-op for empty batches
|
|
111
341
|
}
|
|
112
342
|
try {
|
|
113
|
-
|
|
114
|
-
const points = vectors.map((vector, i) => {
|
|
115
|
-
const metadata = metadatas[i];
|
|
116
|
-
const payload = this.payloadMapper.toPayload(metadata, contents[i]);
|
|
117
|
-
return {
|
|
118
|
-
id: this.generatePointId(metadata),
|
|
119
|
-
vector: Array.from(vector),
|
|
120
|
-
payload,
|
|
121
|
-
};
|
|
122
|
-
});
|
|
343
|
+
const points = this.preparePoints(vectors, metadatas, contents);
|
|
123
344
|
// Upsert points in batches (Qdrant recommends batches of 100-1000)
|
|
124
345
|
const batchSize = 100;
|
|
125
346
|
for (let i = 0; i < points.length; i += batchSize) {
|
|
@@ -140,7 +361,7 @@ export class QdrantDB {
|
|
|
140
361
|
throw new DatabaseError('Qdrant database not initialized');
|
|
141
362
|
}
|
|
142
363
|
try {
|
|
143
|
-
// Search with tenant isolation (filter by orgId and
|
|
364
|
+
// Search with tenant isolation (filter by orgId, repoId, branch, and commitSha)
|
|
144
365
|
const results = await this.client.search(this.collectionName, {
|
|
145
366
|
vector: Array.from(queryVector),
|
|
146
367
|
limit,
|
|
@@ -148,6 +369,8 @@ export class QdrantDB {
|
|
|
148
369
|
must: [
|
|
149
370
|
{ key: 'orgId', match: { value: this.orgId } },
|
|
150
371
|
{ key: 'repoId', match: { value: this.repoId } },
|
|
372
|
+
{ key: 'branch', match: { value: this.branch } },
|
|
373
|
+
{ key: 'commitSha', match: { value: this.commitSha } },
|
|
151
374
|
],
|
|
152
375
|
},
|
|
153
376
|
});
|
|
@@ -164,25 +387,35 @@ export class QdrantDB {
|
|
|
164
387
|
}
|
|
165
388
|
/**
|
|
166
389
|
* Search across all repos in the organization (cross-repo search).
|
|
167
|
-
*
|
|
390
|
+
*
|
|
391
|
+
* - Omits repoId filter by default to enable true cross-repo queries.
|
|
392
|
+
* - When repoIds are provided, restricts results to those repositories only.
|
|
393
|
+
* - When branch is omitted, returns chunks from all branches and commits
|
|
394
|
+
* (including historical PR branches and stale commits).
|
|
395
|
+
* - When branch is provided, filters by branch name only and still returns
|
|
396
|
+
* chunks from all commits on that branch across the selected repos.
|
|
397
|
+
*
|
|
398
|
+
* This is a low-level primitive for cross-repo augmentation. Higher-level
|
|
399
|
+
* workflows (e.g. \"latest commit only\") should be built on top of this API.
|
|
400
|
+
*
|
|
401
|
+
* @param queryVector - Query vector for semantic search
|
|
402
|
+
* @param limit - Maximum number of results to return (default: 5)
|
|
403
|
+
* @param options - Optional search options
|
|
404
|
+
* @param options.repoIds - Repository IDs to filter by (optional)
|
|
405
|
+
* @param options.branch - Branch name to filter by (optional)
|
|
168
406
|
*/
|
|
169
|
-
async searchCrossRepo(queryVector, limit = 5,
|
|
407
|
+
async searchCrossRepo(queryVector, limit = 5, options) {
|
|
170
408
|
if (!this.initialized) {
|
|
171
409
|
throw new DatabaseError('Qdrant database not initialized');
|
|
172
410
|
}
|
|
173
411
|
try {
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
filter.must.push({
|
|
182
|
-
key: 'repoId',
|
|
183
|
-
match: { any: repoIds },
|
|
184
|
-
});
|
|
185
|
-
}
|
|
412
|
+
// Use buildBaseFilter for consistency with scanCrossRepo and other methods
|
|
413
|
+
// This provides automatic validation for empty repoIds arrays, whitespace-only branches, etc.
|
|
414
|
+
const filter = this.buildBaseFilter({
|
|
415
|
+
includeCurrentRepo: false,
|
|
416
|
+
repoIds: options?.repoIds,
|
|
417
|
+
branch: options?.branch,
|
|
418
|
+
});
|
|
186
419
|
const results = await this.client.search(this.collectionName, {
|
|
187
420
|
vector: Array.from(queryVector),
|
|
188
421
|
limit,
|
|
@@ -200,40 +433,13 @@ export class QdrantDB {
|
|
|
200
433
|
}
|
|
201
434
|
}
|
|
202
435
|
async scanWithFilter(options) {
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
{ key: 'repoId', match: { value: this.repoId } },
|
|
211
|
-
],
|
|
212
|
-
};
|
|
213
|
-
if (options.language) {
|
|
214
|
-
filter.must.push({ key: 'language', match: { value: options.language } });
|
|
215
|
-
}
|
|
216
|
-
if (options.pattern) {
|
|
217
|
-
// Qdrant supports regex in match filters
|
|
218
|
-
filter.must.push({ key: 'file', match: { text: options.pattern } });
|
|
219
|
-
}
|
|
220
|
-
const limit = options.limit || 100;
|
|
221
|
-
const results = await this.client.scroll(this.collectionName, {
|
|
222
|
-
filter,
|
|
223
|
-
limit,
|
|
224
|
-
with_payload: true,
|
|
225
|
-
with_vector: false,
|
|
226
|
-
});
|
|
227
|
-
return (results.points || []).map(point => ({
|
|
228
|
-
content: point.payload?.content || '',
|
|
229
|
-
metadata: this.payloadMapper.fromPayload(point.payload || {}),
|
|
230
|
-
score: 0, // No relevance score for filtered scans
|
|
231
|
-
relevance: 'not_relevant',
|
|
232
|
-
}));
|
|
233
|
-
}
|
|
234
|
-
catch (error) {
|
|
235
|
-
throw new DatabaseError(`Failed to scan Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
236
|
-
}
|
|
436
|
+
const filter = this.buildBaseFilter({
|
|
437
|
+
language: options.language,
|
|
438
|
+
pattern: options.pattern,
|
|
439
|
+
patternKey: 'file',
|
|
440
|
+
includeCurrentRepo: true,
|
|
441
|
+
});
|
|
442
|
+
return this.executeScrollQuery(filter, options.limit || 100, 'scan Qdrant');
|
|
237
443
|
}
|
|
238
444
|
async scanAll(options = {}) {
|
|
239
445
|
// Use scanWithFilter with a high limit to get all chunks
|
|
@@ -244,111 +450,100 @@ export class QdrantDB {
|
|
|
244
450
|
}
|
|
245
451
|
/**
|
|
246
452
|
* Scan with filter across all repos in the organization (cross-repo).
|
|
247
|
-
*
|
|
453
|
+
*
|
|
454
|
+
* - Omits repoId filter by default to enable true cross-repo scans.
|
|
455
|
+
* - When repoIds are provided, restricts results to those repositories only.
|
|
456
|
+
* - When branch is omitted, returns chunks from all branches and commits
|
|
457
|
+
* (including historical PR branches and stale commits).
|
|
458
|
+
* - When branch is provided, filters by branch name only and still returns
|
|
459
|
+
* chunks from all commits on that branch across the selected repos.
|
|
460
|
+
*
|
|
461
|
+
* Like searchCrossRepo, this is a low-level primitive. Higher-level behavior
|
|
462
|
+
* such as \"latest commit only\" should be implemented in orchestrating code.
|
|
248
463
|
*/
|
|
249
464
|
async scanCrossRepo(options) {
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
if (options.repoIds && options.repoIds.length > 0) {
|
|
261
|
-
filter.must.push({
|
|
262
|
-
key: 'repoId',
|
|
263
|
-
match: { any: options.repoIds },
|
|
264
|
-
});
|
|
265
|
-
}
|
|
266
|
-
if (options.language) {
|
|
267
|
-
filter.must.push({ key: 'language', match: { value: options.language } });
|
|
268
|
-
}
|
|
269
|
-
if (options.pattern) {
|
|
270
|
-
filter.must.push({ key: 'file', match: { text: options.pattern } });
|
|
271
|
-
}
|
|
272
|
-
const limit = options.limit || 10000; // Higher default for cross-repo
|
|
273
|
-
const results = await this.client.scroll(this.collectionName, {
|
|
274
|
-
filter,
|
|
275
|
-
limit,
|
|
276
|
-
with_payload: true,
|
|
277
|
-
with_vector: false,
|
|
278
|
-
});
|
|
279
|
-
return (results.points || []).map(point => ({
|
|
280
|
-
content: point.payload?.content || '',
|
|
281
|
-
metadata: this.payloadMapper.fromPayload(point.payload || {}),
|
|
282
|
-
score: 0,
|
|
283
|
-
relevance: 'not_relevant',
|
|
284
|
-
}));
|
|
285
|
-
}
|
|
286
|
-
catch (error) {
|
|
287
|
-
throw new DatabaseError(`Failed to scan Qdrant (cross-repo): ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
288
|
-
}
|
|
465
|
+
const filter = this.buildBaseFilter({
|
|
466
|
+
language: options.language,
|
|
467
|
+
pattern: options.pattern,
|
|
468
|
+
patternKey: 'file',
|
|
469
|
+
repoIds: options.repoIds,
|
|
470
|
+
branch: options.branch,
|
|
471
|
+
includeCurrentRepo: false, // Cross-repo: don't filter by current repo
|
|
472
|
+
});
|
|
473
|
+
return this.executeScrollQuery(filter, options.limit || 10000, // Higher default for cross-repo
|
|
474
|
+
'scan Qdrant (cross-repo)');
|
|
289
475
|
}
|
|
290
476
|
async querySymbols(options) {
|
|
477
|
+
const filter = this.buildBaseFilter({
|
|
478
|
+
language: options.language,
|
|
479
|
+
pattern: options.pattern,
|
|
480
|
+
patternKey: 'symbolName',
|
|
481
|
+
symbolType: options.symbolType,
|
|
482
|
+
includeCurrentRepo: true,
|
|
483
|
+
});
|
|
484
|
+
return this.executeScrollQuery(filter, options.limit || 100, 'query symbols in Qdrant');
|
|
485
|
+
}
|
|
486
|
+
async clear() {
|
|
291
487
|
if (!this.initialized) {
|
|
292
488
|
throw new DatabaseError('Qdrant database not initialized');
|
|
293
489
|
}
|
|
294
490
|
try {
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
};
|
|
301
|
-
if (options.language) {
|
|
302
|
-
filter.must.push({ key: 'language', match: { value: options.language } });
|
|
303
|
-
}
|
|
304
|
-
if (options.symbolType) {
|
|
305
|
-
filter.must.push({ key: 'symbolType', match: { value: options.symbolType } });
|
|
306
|
-
}
|
|
307
|
-
if (options.pattern) {
|
|
308
|
-
filter.must.push({ key: 'symbolName', match: { text: options.pattern } });
|
|
491
|
+
// Check if collection exists before trying to clear it (returns { exists: boolean })
|
|
492
|
+
const collectionCheck = await this.client.collectionExists(this.collectionName);
|
|
493
|
+
if (!collectionCheck.exists) {
|
|
494
|
+
// Collection doesn't exist yet, nothing to clear
|
|
495
|
+
return;
|
|
309
496
|
}
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
497
|
+
// Delete all points for this repository and branch/commit only
|
|
498
|
+
// This ensures we only clear the current branch's data, not all branches
|
|
499
|
+
await this.client.delete(this.collectionName, {
|
|
500
|
+
filter: {
|
|
501
|
+
must: [
|
|
502
|
+
{ key: 'orgId', match: { value: this.orgId } },
|
|
503
|
+
{ key: 'repoId', match: { value: this.repoId } },
|
|
504
|
+
{ key: 'branch', match: { value: this.branch } },
|
|
505
|
+
{ key: 'commitSha', match: { value: this.commitSha } },
|
|
506
|
+
],
|
|
507
|
+
},
|
|
316
508
|
});
|
|
317
|
-
return (results.points || []).map(point => ({
|
|
318
|
-
content: point.payload?.content || '',
|
|
319
|
-
metadata: this.payloadMapper.fromPayload(point.payload || {}),
|
|
320
|
-
score: 0,
|
|
321
|
-
relevance: 'not_relevant',
|
|
322
|
-
}));
|
|
323
509
|
}
|
|
324
510
|
catch (error) {
|
|
325
|
-
throw new DatabaseError(`Failed to
|
|
511
|
+
throw new DatabaseError(`Failed to clear Qdrant collection: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
|
|
326
512
|
}
|
|
327
513
|
}
|
|
328
|
-
|
|
514
|
+
/**
|
|
515
|
+
* Clear all data for a specific branch (all commits).
|
|
516
|
+
*
|
|
517
|
+
* Qdrant-only helper: this is not part of the generic VectorDBInterface and
|
|
518
|
+
* is intended for cloud/PR workflows where multiple commits exist per branch.
|
|
519
|
+
* LanceDB and other backends do not implement this method.
|
|
520
|
+
*
|
|
521
|
+
* @param branch - Branch name to clear (defaults to current branch)
|
|
522
|
+
*/
|
|
523
|
+
async clearBranch(branch) {
|
|
329
524
|
if (!this.initialized) {
|
|
330
525
|
throw new DatabaseError('Qdrant database not initialized');
|
|
331
526
|
}
|
|
527
|
+
const targetBranch = branch ?? this.branch;
|
|
332
528
|
try {
|
|
333
|
-
// Check if collection exists before trying to clear it (returns { exists: boolean })
|
|
334
529
|
const collectionCheck = await this.client.collectionExists(this.collectionName);
|
|
335
530
|
if (!collectionCheck.exists) {
|
|
336
531
|
// Collection doesn't exist yet, nothing to clear
|
|
337
532
|
return;
|
|
338
533
|
}
|
|
339
|
-
// Delete all points for this repository
|
|
340
|
-
// This ensures we only clear the current repo's data, not all repos in the org
|
|
534
|
+
// Delete all points for this repository and branch (all commits)
|
|
341
535
|
await this.client.delete(this.collectionName, {
|
|
342
536
|
filter: {
|
|
343
537
|
must: [
|
|
344
538
|
{ key: 'orgId', match: { value: this.orgId } },
|
|
345
539
|
{ key: 'repoId', match: { value: this.repoId } },
|
|
540
|
+
{ key: 'branch', match: { value: targetBranch } },
|
|
346
541
|
],
|
|
347
542
|
},
|
|
348
543
|
});
|
|
349
544
|
}
|
|
350
545
|
catch (error) {
|
|
351
|
-
throw new DatabaseError(`Failed to clear Qdrant
|
|
546
|
+
throw new DatabaseError(`Failed to clear branch from Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName, branch: targetBranch });
|
|
352
547
|
}
|
|
353
548
|
}
|
|
354
549
|
async deleteByFile(filepath) {
|
|
@@ -361,6 +556,8 @@ export class QdrantDB {
|
|
|
361
556
|
must: [
|
|
362
557
|
{ key: 'orgId', match: { value: this.orgId } },
|
|
363
558
|
{ key: 'repoId', match: { value: this.repoId } },
|
|
559
|
+
{ key: 'branch', match: { value: this.branch } },
|
|
560
|
+
{ key: 'commitSha', match: { value: this.commitSha } },
|
|
364
561
|
{ key: 'file', match: { value: filepath } },
|
|
365
562
|
],
|
|
366
563
|
},
|