@liendev/core 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"qdrant.d.ts","sourceRoot":"","sources":["../../src/vectordb/qdrant.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,YAAY,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAOpD;;;;;;;;GAQG;AACH,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,WAAW,CAAkB;IACrC,SAAgB,MAAM,EAAE,MAAM,CAAC;IAC/B,OAAO,CAAC,gBAAgB,CAAa;IACrC,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,aAAa,CAAsB;gBAGzC,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM;IA8BrB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAUrB;;;OAGG;IACH,OAAO,CAAC,eAAe;IAMjB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAgC3B,WAAW,CACf,OAAO,EAAE,YAAY,EAAE,EACvB,SAAS,EAAE,aAAa,EAAE,EAC1B,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,IAAI,CAAC;IA+CV,MAAM,CACV,WAAW,EAAE,YAAY,EACzB,KAAK,GAAE,MAAU,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,OAAO,CAAC,YAAY,EAAE,CAAC;IAgC1B;;;OAGG;IACG,eAAe,CACnB,WAAW,EAAE,YAAY,EACzB,KAAK,GAAE,MAAU,EACjB,OAAO,CAAC,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,YAAY,EAAE,CAAC;IAwCpB,cAAc,CAAC,OAAO,EAAE;QAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IA4CrB,OAAO,CAAC,OAAO,GAAE;QACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;KACb,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAQhC;;;OAGG;IACG,aAAa,CAAC,OAAO,EAAE;QAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;KACpB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAkDrB,YAAY,CAAC,OAAO,EAAE;QAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,UAAU,CAAC,EAAE,UAAU,GAAG,OAAO,GAAG,WAAW,CAAC;QAChD,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IA+CrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IA+BtB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAuB7C,UAAU,CACd,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,YAAY,EAAE,EACvB,SAAS,EAAE,aAAa,EAAE,EAC1B,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,IAAI,CAAC;IAyBV,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IAajC;;OAEG;IACH,iBAAiB,IAAI,MAAM;IAI3B;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,SAAS,IAAI,MAAM;IAIb,YAAY,IAAI,OAAO,CAAC,OAAO,CAAC;IAyBhC,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;IAahC,iBAAiB,IAAI,MAAM;IAI3B,cAAc,IAAI,MAAM;CAMzB"}
1
+ {"version":3,"file":"qdrant.d.ts","sourceRoot":"","sources":["../../src/vectordb/qdrant.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,YAAY,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AA4HpD;;;;;;;;GAQG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE;IAC7C,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,kBAAkB,CAAC,EAAE,OAAO,CAAC;CAC9B,GAAG,IAAI,CAoBP;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAAkB;IACrC,SAAgB,MAAM,EAAE,MAAM,CAAC;IAC/B,OAAO,CAAC,gBAAgB,CAAa;IACrC,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,aAAa,CAAsB;gBAGzC,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM;IAgCnB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAUrB;;;;;;;;;;;;;;;OAeG;IACH,OAAO,CAAC,eAAe;IAKvB;;;;;;;;;;;;;;;;;;OAkBG;IACH,OAAO,CAAC,eAAe;IAoDvB;;;;;;OAMG;IACH,OAAO,CAAC,gBAAgB;IASxB;;OAEG;YACW,kBAAkB;IA2B1B,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAgCjC;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAkB3B;;OAEG;IACH,OAAO,CAAC,aAAa;IAiBf,WAAW,CACf,OAAO,EAAE,YAAY,EAAE,EACvB,SAAS,EAAE,aAAa,EAAE,EAC1B,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,IAAI,CAAC;IA2BV,MAAM,CACV,WAAW,EAAE,YAAY,EACzB,KAAK,GAAE,MAAU,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,OAAO,CAAC,YAAY,EAAE,CAAC;IAkC1B;;;;;;;;;;;;;;;;;;OAkBG;IACG,eAAe,CACnB,WAAW,EAAE,YAAY,EACzB,KAAK,GAAE,MAAU,EACjB,OAAO,CAAC,EAAE;QACR,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QACnB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GACA,OAAO,CAAC,YAAY,EAAE,CAAC;IAkCpB,cAAc,CAAC,OAAO,EAAE;QAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAWrB,OAAO,CAAC,OAAO,GAAE;QACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;KACb,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAQhC;;;;;;;;;;;;OAYG;IACG,aAAa,CAAC,OAAO,EAAE;QAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QACnB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAiBrB,YAAY,CAAC,OAAO,EAAE;QAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,UAAU,CAAC,EAAE,UAAU,GAAG,OAAO,GAAG,WAAW,CAAC;QAChD,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAYrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAiC5B;;;;;;;;OAQG;IACG,WAAW,CAAC,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAgC3C,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAyB7C,UAAU,CACd,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,YAAY,EAAE,EACvB,SAAS,EAAE,aAAa,EAAE,EAC1B,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,IAAI,CAAC;IAyBV,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IAajC;;OAEG;IACH,iBAAiB,IAAI,MAAM;IAI3B;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,SAAS,IAAI,MAAM;IAIb,YAAY,IAAI,OAAO,CAAC,OAAO,CAAC;IAyBhC,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;IAahC,iBAAiB,IAAI,MAAM;IAI3B,cAAc,IAAI,MAAM;CAMzB"}
@@ -7,36 +7,145 @@ import { calculateRelevance } from './relevance.js';
7
7
  import { DatabaseError } from '../errors/index.js';
8
8
  import { readVersionFile } from './version.js';
9
9
  import { QdrantPayloadMapper } from './qdrant-payload-mapper.js';
10
+ /**
11
+ * Builder class for constructing Qdrant filters.
12
+ * Simplifies filter construction and reduces complexity.
13
+ */
14
+ class QdrantFilterBuilder {
15
+ filter;
16
+ constructor(orgId) {
17
+ this.filter = {
18
+ must: [{ key: 'orgId', match: { value: orgId } }],
19
+ };
20
+ }
21
+ addRepoContext(repoId, branch, commitSha) {
22
+ this.filter.must.push({ key: 'repoId', match: { value: repoId } }, { key: 'branch', match: { value: branch } }, { key: 'commitSha', match: { value: commitSha } });
23
+ return this;
24
+ }
25
+ addRepoIds(repoIds) {
26
+ const cleanedRepoIds = repoIds
27
+ .map(id => id.trim())
28
+ .filter(id => id.length > 0);
29
+ // If caller passed repoIds but all were empty/invalid after cleaning,
30
+ // fail fast instead of silently dropping the repoId filter (which would
31
+ // otherwise widen the query to all repos in the org).
32
+ if (repoIds.length > 0 && cleanedRepoIds.length === 0) {
33
+ throw new Error('Invalid repoIds: all provided repoIds are empty or whitespace. ' +
34
+ 'Provide at least one non-empty repoId or omit repoIds entirely.');
35
+ }
36
+ if (cleanedRepoIds.length > 0) {
37
+ this.filter.must.push({
38
+ key: 'repoId',
39
+ match: { any: cleanedRepoIds },
40
+ });
41
+ }
42
+ return this;
43
+ }
44
+ addLanguage(language) {
45
+ const cleanedLanguage = language.trim();
46
+ if (cleanedLanguage.length === 0) {
47
+ throw new Error('Invalid language: language must be a non-empty, non-whitespace string.');
48
+ }
49
+ this.filter.must.push({ key: 'language', match: { value: cleanedLanguage } });
50
+ return this;
51
+ }
52
+ addSymbolType(symbolType) {
53
+ const cleanedSymbolType = symbolType.trim();
54
+ if (cleanedSymbolType.length === 0) {
55
+ throw new Error('Invalid symbolType: symbolType must be a non-empty, non-whitespace string.');
56
+ }
57
+ this.filter.must.push({ key: 'symbolType', match: { value: cleanedSymbolType } });
58
+ return this;
59
+ }
60
+ addPattern(pattern, key = 'file') {
61
+ const cleanedPattern = pattern.trim();
62
+ if (cleanedPattern.length === 0) {
63
+ throw new Error('Invalid pattern: pattern must be a non-empty, non-whitespace string.');
64
+ }
65
+ this.filter.must.push({ key, match: { text: cleanedPattern } });
66
+ return this;
67
+ }
68
+ addBranch(branch) {
69
+ const cleanedBranch = branch.trim();
70
+ // Prevent constructing a filter for an empty/whitespace-only branch,
71
+ // which would search for `branch == ""` and almost certainly return no results.
72
+ if (cleanedBranch.length === 0) {
73
+ throw new Error('Invalid branch: branch must be a non-empty, non-whitespace string.');
74
+ }
75
+ this.filter.must.push({ key: 'branch', match: { value: cleanedBranch } });
76
+ return this;
77
+ }
78
+ build() {
79
+ return this.filter;
80
+ }
81
+ }
82
+ /**
83
+ * Validate filter options for buildBaseFilter.
84
+ *
85
+ * This is a separate function to enable unit testing of validation logic.
86
+ * The validations ensure that conflicting options are not used together.
87
+ *
88
+ * @param options - Filter options to validate
89
+ * @throws Error if conflicting options are detected
90
+ */
91
+ export function validateFilterOptions(options) {
92
+ // Validate: includeCurrentRepo and repoIds are mutually exclusive
93
+ // Note: `includeCurrentRepo !== false` treats undefined as "enabled" (default behavior).
94
+ // Callers must explicitly pass includeCurrentRepo=false when using repoIds for cross-repo queries.
95
+ if (options.includeCurrentRepo !== false && options.repoIds && options.repoIds.length > 0) {
96
+ throw new Error('Cannot use repoIds when includeCurrentRepo is enabled (the default). ' +
97
+ 'These options are mutually exclusive. Set includeCurrentRepo=false to perform cross-repo queries with repoIds.');
98
+ }
99
+ // Validate: branch parameter should only be used when includeCurrentRepo is false.
100
+ // As above, `includeCurrentRepo !== false` treats both undefined and true as "enabled"
101
+ // for the current repo context, so callers must explicitly pass false for cross-repo.
102
+ if (options.branch && options.includeCurrentRepo !== false) {
103
+ throw new Error('Cannot use branch parameter when includeCurrentRepo is enabled (the default). ' +
104
+ 'Branch is automatically included via the current repo context. Set includeCurrentRepo=false to specify a branch explicitly.');
105
+ }
106
+ }
10
107
  /**
11
108
  * QdrantDB implements VectorDBInterface using Qdrant vector database.
12
109
  *
13
110
  * Features:
14
111
  * - Multi-tenant support via payload filtering (orgId/repoId)
112
+ * - Branch and commit isolation for PR workflows
15
113
  * - Collection naming: `lien_org_{orgId}`
16
114
  * - Cross-repo search by omitting repoId filter
17
115
  * - Tenant isolation via orgId filtering
116
+ * - Point ID generation includes branch/commit to prevent collisions
117
+ *
118
+ * Data Isolation:
119
+ * All queries are filtered by orgId, repoId, branch, and commitSha by default.
120
+ * This ensures that different branches and commits have isolated data, preventing
121
+ * PRs from overwriting each other's indices. Use cross-repo methods (searchCrossRepo,
122
+ * scanCrossRepo) to query across repositories within an organization.
18
123
  */
19
124
  export class QdrantDB {
20
125
  client;
21
126
  collectionName;
22
127
  orgId;
23
128
  repoId;
129
+ branch;
130
+ commitSha;
24
131
  initialized = false;
25
132
  dbPath; // For compatibility with manifest/version file operations
26
133
  lastVersionCheck = 0;
27
134
  currentVersion = 0;
28
135
  payloadMapper;
29
- constructor(url, apiKey, orgId, projectRoot) {
136
+ constructor(url, apiKey, orgId, projectRoot, branch, commitSha) {
30
137
  this.client = new QdrantClient({
31
138
  url,
32
139
  apiKey, // Optional, required for Qdrant Cloud
33
140
  });
34
141
  this.orgId = orgId;
35
142
  this.repoId = this.extractRepoId(projectRoot);
143
+ this.branch = branch;
144
+ this.commitSha = commitSha;
36
145
  // Collection naming: one per org
37
146
  this.collectionName = `lien_org_${orgId}`;
38
147
  // Initialize payload mapper
39
- this.payloadMapper = new QdrantPayloadMapper(this.orgId, this.repoId);
148
+ this.payloadMapper = new QdrantPayloadMapper(this.orgId, this.repoId, this.branch, this.commitSha);
40
149
  // dbPath is used for manifest and version files (stored locally even with Qdrant)
41
150
  // Use same path structure as LanceDB for consistency
42
151
  const projectName = path.basename(projectRoot);
@@ -62,12 +171,113 @@ export class QdrantDB {
62
171
  }
63
172
  /**
64
173
  * Generate a unique point ID from chunk metadata.
65
- * Uses hash of file path + line range for stable identification.
174
+ * Uses hash of file path + line range + branch + commitSha for stable identification.
175
+ * Includes branch/commit to prevent ID collisions across branches.
176
+ *
177
+ * **Hash Algorithm Choice:**
178
+ * Uses MD5 for performance and collision likelihood acceptable for this use case.
179
+ * - MD5 is deprecated for cryptographic purposes but suitable for non-security ID generation
180
+ * - Collision probability is extremely low: ~1 in 2^64 for random inputs
181
+ * - Input includes file path, line range, branch, and commit SHA, making collisions
182
+ * even less likely in practice
183
+ * - For typical codebases (thousands to hundreds of thousands of chunks), collision risk
184
+ * is negligible
185
+ * - If scaling to millions of chunks across many repos, consider upgrading to SHA-256
186
+ * for additional collision resistance (at ~10% performance cost)
66
187
  */
67
188
  generatePointId(metadata) {
68
- const idString = `${metadata.file}:${metadata.startLine}:${metadata.endLine}`;
189
+ const idString = `${metadata.file}:${metadata.startLine}:${metadata.endLine}:${this.branch}:${this.commitSha}`;
69
190
  return crypto.createHash('md5').update(idString).digest('hex');
70
191
  }
192
+ /**
193
+ * Build base filter for Qdrant queries.
194
+ * Uses builder pattern to simplify filter construction.
195
+ *
196
+ * **Important constraints:**
197
+ * - `includeCurrentRepo` and `repoIds` are mutually exclusive.
198
+ * - `includeCurrentRepo` defaults to `true` when `undefined` (treats `undefined` as "enabled").
199
+ * - To use `repoIds` for cross-repo queries, you must explicitly pass `includeCurrentRepo: false`.
200
+ * - The `branch` parameter can only be used when `includeCurrentRepo` is explicitly `false`.
201
+ * When `includeCurrentRepo` is enabled (default), branch is automatically included via
202
+ * the current repo context (`addRepoContext`).
203
+ *
204
+ * @param options - Filter options
205
+ * @param options.includeCurrentRepo - Whether to filter by current repo context (default: true when undefined).
206
+ * Must be explicitly `false` to use `repoIds` or `branch` parameters.
207
+ * @param options.repoIds - Repository IDs to filter by (requires `includeCurrentRepo: false`).
208
+ * @param options.branch - Branch name to filter by (requires `includeCurrentRepo: false`).
209
+ * @returns Qdrant filter object
210
+ */
211
+ buildBaseFilter(options) {
212
+ // Validate filter options (extracted to enable unit testing)
213
+ validateFilterOptions({
214
+ repoIds: options.repoIds,
215
+ branch: options.branch,
216
+ includeCurrentRepo: options.includeCurrentRepo,
217
+ });
218
+ const builder = new QdrantFilterBuilder(this.orgId);
219
+ if (options.includeCurrentRepo !== false) {
220
+ builder.addRepoContext(this.repoId, this.branch, this.commitSha);
221
+ }
222
+ if (options.repoIds) {
223
+ builder.addRepoIds(options.repoIds);
224
+ }
225
+ // Validate language is non-empty if explicitly provided (even if empty string)
226
+ if (options.language !== undefined) {
227
+ builder.addLanguage(options.language);
228
+ }
229
+ // Validate symbolType is non-empty if explicitly provided (even if empty string)
230
+ if (options.symbolType !== undefined) {
231
+ builder.addSymbolType(options.symbolType);
232
+ }
233
+ // Validate pattern is non-empty if explicitly provided (even if empty string)
234
+ if (options.pattern !== undefined) {
235
+ builder.addPattern(options.pattern, options.patternKey);
236
+ }
237
+ // Only add branch filter when includeCurrentRepo is false
238
+ // When includeCurrentRepo is true, branch is already added via addRepoContext
239
+ // Validate branch is non-empty if explicitly provided (even if empty string)
240
+ if (options.branch !== undefined && options.includeCurrentRepo === false) {
241
+ // addBranch will validate that branch is non-empty and non-whitespace
242
+ builder.addBranch(options.branch);
243
+ }
244
+ return builder.build();
245
+ }
246
+ /**
247
+ * Map Qdrant scroll results to SearchResult format.
248
+ *
249
+ * Note: Scroll/scan operations do not compute semantic similarity scores.
250
+ * For these results, score is always 0 and relevance is set to 'not_relevant'
251
+ * to indicate that the results are unscored (not that they are useless).
252
+ */
253
+ mapScrollResults(results) {
254
+ return (results.points || []).map((point) => ({
255
+ content: point.payload?.content || '',
256
+ metadata: this.payloadMapper.fromPayload(point.payload || {}),
257
+ score: 0,
258
+ relevance: 'not_relevant',
259
+ }));
260
+ }
261
+ /**
262
+ * Execute a scroll query with error handling.
263
+ */
264
+ async executeScrollQuery(filter, limit, errorContext) {
265
+ if (!this.initialized) {
266
+ throw new DatabaseError('Qdrant database not initialized');
267
+ }
268
+ try {
269
+ const results = await this.client.scroll(this.collectionName, {
270
+ filter,
271
+ limit,
272
+ with_payload: true,
273
+ with_vector: false,
274
+ });
275
+ return this.mapScrollResults(results);
276
+ }
277
+ catch (error) {
278
+ throw new DatabaseError(`Failed to ${errorContext}: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
279
+ }
280
+ }
71
281
  async initialize() {
72
282
  try {
73
283
  // Check if collection exists (returns { exists: boolean })
@@ -95,7 +305,10 @@ export class QdrantDB {
95
305
  throw new DatabaseError(`Failed to initialize Qdrant database: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
96
306
  }
97
307
  }
98
- async insertBatch(vectors, metadatas, contents) {
308
+ /**
309
+ * Validate batch input arrays have matching lengths.
310
+ */
311
+ validateBatchInputs(vectors, metadatas, contents) {
99
312
  if (!this.initialized) {
100
313
  throw new DatabaseError('Qdrant database not initialized');
101
314
  }
@@ -106,20 +319,28 @@ export class QdrantDB {
106
319
  contentsLength: contents.length,
107
320
  });
108
321
  }
322
+ }
323
+ /**
324
+ * Prepare Qdrant points from vectors, metadatas, and contents.
325
+ */
326
+ preparePoints(vectors, metadatas, contents) {
327
+ return vectors.map((vector, i) => {
328
+ const metadata = metadatas[i];
329
+ const payload = this.payloadMapper.toPayload(metadata, contents[i]);
330
+ return {
331
+ id: this.generatePointId(metadata),
332
+ vector: Array.from(vector),
333
+ payload,
334
+ };
335
+ });
336
+ }
337
+ async insertBatch(vectors, metadatas, contents) {
338
+ this.validateBatchInputs(vectors, metadatas, contents);
109
339
  if (vectors.length === 0) {
110
340
  return; // No-op for empty batches
111
341
  }
112
342
  try {
113
- // Prepare points for upsert
114
- const points = vectors.map((vector, i) => {
115
- const metadata = metadatas[i];
116
- const payload = this.payloadMapper.toPayload(metadata, contents[i]);
117
- return {
118
- id: this.generatePointId(metadata),
119
- vector: Array.from(vector),
120
- payload,
121
- };
122
- });
343
+ const points = this.preparePoints(vectors, metadatas, contents);
123
344
  // Upsert points in batches (Qdrant recommends batches of 100-1000)
124
345
  const batchSize = 100;
125
346
  for (let i = 0; i < points.length; i += batchSize) {
@@ -140,7 +361,7 @@ export class QdrantDB {
140
361
  throw new DatabaseError('Qdrant database not initialized');
141
362
  }
142
363
  try {
143
- // Search with tenant isolation (filter by orgId and repoId)
364
+ // Search with tenant isolation (filter by orgId, repoId, branch, and commitSha)
144
365
  const results = await this.client.search(this.collectionName, {
145
366
  vector: Array.from(queryVector),
146
367
  limit,
@@ -148,6 +369,8 @@ export class QdrantDB {
148
369
  must: [
149
370
  { key: 'orgId', match: { value: this.orgId } },
150
371
  { key: 'repoId', match: { value: this.repoId } },
372
+ { key: 'branch', match: { value: this.branch } },
373
+ { key: 'commitSha', match: { value: this.commitSha } },
151
374
  ],
152
375
  },
153
376
  });
@@ -164,25 +387,35 @@ export class QdrantDB {
164
387
  }
165
388
  /**
166
389
  * Search across all repos in the organization (cross-repo search).
167
- * Omits repoId filter to enable cross-repo queries.
390
+ *
391
+ * - Omits repoId filter by default to enable true cross-repo queries.
392
+ * - When repoIds are provided, restricts results to those repositories only.
393
+ * - When branch is omitted, returns chunks from all branches and commits
394
+ * (including historical PR branches and stale commits).
395
+ * - When branch is provided, filters by branch name only and still returns
396
+ * chunks from all commits on that branch across the selected repos.
397
+ *
398
+ * This is a low-level primitive for cross-repo augmentation. Higher-level
399
+ * workflows (e.g. \"latest commit only\") should be built on top of this API.
400
+ *
401
+ * @param queryVector - Query vector for semantic search
402
+ * @param limit - Maximum number of results to return (default: 5)
403
+ * @param options - Optional search options
404
+ * @param options.repoIds - Repository IDs to filter by (optional)
405
+ * @param options.branch - Branch name to filter by (optional)
168
406
  */
169
- async searchCrossRepo(queryVector, limit = 5, repoIds) {
407
+ async searchCrossRepo(queryVector, limit = 5, options) {
170
408
  if (!this.initialized) {
171
409
  throw new DatabaseError('Qdrant database not initialized');
172
410
  }
173
411
  try {
174
- const filter = {
175
- must: [
176
- { key: 'orgId', match: { value: this.orgId } },
177
- ],
178
- };
179
- // Optionally filter to specific repos
180
- if (repoIds && repoIds.length > 0) {
181
- filter.must.push({
182
- key: 'repoId',
183
- match: { any: repoIds },
184
- });
185
- }
412
+ // Use buildBaseFilter for consistency with scanCrossRepo and other methods
413
+ // This provides automatic validation for empty repoIds arrays, whitespace-only branches, etc.
414
+ const filter = this.buildBaseFilter({
415
+ includeCurrentRepo: false,
416
+ repoIds: options?.repoIds,
417
+ branch: options?.branch,
418
+ });
186
419
  const results = await this.client.search(this.collectionName, {
187
420
  vector: Array.from(queryVector),
188
421
  limit,
@@ -200,40 +433,13 @@ export class QdrantDB {
200
433
  }
201
434
  }
202
435
  async scanWithFilter(options) {
203
- if (!this.initialized) {
204
- throw new DatabaseError('Qdrant database not initialized');
205
- }
206
- try {
207
- const filter = {
208
- must: [
209
- { key: 'orgId', match: { value: this.orgId } },
210
- { key: 'repoId', match: { value: this.repoId } },
211
- ],
212
- };
213
- if (options.language) {
214
- filter.must.push({ key: 'language', match: { value: options.language } });
215
- }
216
- if (options.pattern) {
217
- // Qdrant supports regex in match filters
218
- filter.must.push({ key: 'file', match: { text: options.pattern } });
219
- }
220
- const limit = options.limit || 100;
221
- const results = await this.client.scroll(this.collectionName, {
222
- filter,
223
- limit,
224
- with_payload: true,
225
- with_vector: false,
226
- });
227
- return (results.points || []).map(point => ({
228
- content: point.payload?.content || '',
229
- metadata: this.payloadMapper.fromPayload(point.payload || {}),
230
- score: 0, // No relevance score for filtered scans
231
- relevance: 'not_relevant',
232
- }));
233
- }
234
- catch (error) {
235
- throw new DatabaseError(`Failed to scan Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
236
- }
436
+ const filter = this.buildBaseFilter({
437
+ language: options.language,
438
+ pattern: options.pattern,
439
+ patternKey: 'file',
440
+ includeCurrentRepo: true,
441
+ });
442
+ return this.executeScrollQuery(filter, options.limit || 100, 'scan Qdrant');
237
443
  }
238
444
  async scanAll(options = {}) {
239
445
  // Use scanWithFilter with a high limit to get all chunks
@@ -244,111 +450,100 @@ export class QdrantDB {
244
450
  }
245
451
  /**
246
452
  * Scan with filter across all repos in the organization (cross-repo).
247
- * Omits repoId filter to enable cross-repo queries.
453
+ *
454
+ * - Omits repoId filter by default to enable true cross-repo scans.
455
+ * - When repoIds are provided, restricts results to those repositories only.
456
+ * - When branch is omitted, returns chunks from all branches and commits
457
+ * (including historical PR branches and stale commits).
458
+ * - When branch is provided, filters by branch name only and still returns
459
+ * chunks from all commits on that branch across the selected repos.
460
+ *
461
+ * Like searchCrossRepo, this is a low-level primitive. Higher-level behavior
462
+ * such as \"latest commit only\" should be implemented in orchestrating code.
248
463
  */
249
464
  async scanCrossRepo(options) {
250
- if (!this.initialized) {
251
- throw new DatabaseError('Qdrant database not initialized');
252
- }
253
- try {
254
- const filter = {
255
- must: [
256
- { key: 'orgId', match: { value: this.orgId } },
257
- ],
258
- };
259
- // Optionally filter to specific repos
260
- if (options.repoIds && options.repoIds.length > 0) {
261
- filter.must.push({
262
- key: 'repoId',
263
- match: { any: options.repoIds },
264
- });
265
- }
266
- if (options.language) {
267
- filter.must.push({ key: 'language', match: { value: options.language } });
268
- }
269
- if (options.pattern) {
270
- filter.must.push({ key: 'file', match: { text: options.pattern } });
271
- }
272
- const limit = options.limit || 10000; // Higher default for cross-repo
273
- const results = await this.client.scroll(this.collectionName, {
274
- filter,
275
- limit,
276
- with_payload: true,
277
- with_vector: false,
278
- });
279
- return (results.points || []).map(point => ({
280
- content: point.payload?.content || '',
281
- metadata: this.payloadMapper.fromPayload(point.payload || {}),
282
- score: 0,
283
- relevance: 'not_relevant',
284
- }));
285
- }
286
- catch (error) {
287
- throw new DatabaseError(`Failed to scan Qdrant (cross-repo): ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
288
- }
465
+ const filter = this.buildBaseFilter({
466
+ language: options.language,
467
+ pattern: options.pattern,
468
+ patternKey: 'file',
469
+ repoIds: options.repoIds,
470
+ branch: options.branch,
471
+ includeCurrentRepo: false, // Cross-repo: don't filter by current repo
472
+ });
473
+ return this.executeScrollQuery(filter, options.limit || 10000, // Higher default for cross-repo
474
+ 'scan Qdrant (cross-repo)');
289
475
  }
290
476
  async querySymbols(options) {
477
+ const filter = this.buildBaseFilter({
478
+ language: options.language,
479
+ pattern: options.pattern,
480
+ patternKey: 'symbolName',
481
+ symbolType: options.symbolType,
482
+ includeCurrentRepo: true,
483
+ });
484
+ return this.executeScrollQuery(filter, options.limit || 100, 'query symbols in Qdrant');
485
+ }
486
+ async clear() {
291
487
  if (!this.initialized) {
292
488
  throw new DatabaseError('Qdrant database not initialized');
293
489
  }
294
490
  try {
295
- const filter = {
296
- must: [
297
- { key: 'orgId', match: { value: this.orgId } },
298
- { key: 'repoId', match: { value: this.repoId } },
299
- ],
300
- };
301
- if (options.language) {
302
- filter.must.push({ key: 'language', match: { value: options.language } });
303
- }
304
- if (options.symbolType) {
305
- filter.must.push({ key: 'symbolType', match: { value: options.symbolType } });
306
- }
307
- if (options.pattern) {
308
- filter.must.push({ key: 'symbolName', match: { text: options.pattern } });
491
+ // Check if collection exists before trying to clear it (returns { exists: boolean })
492
+ const collectionCheck = await this.client.collectionExists(this.collectionName);
493
+ if (!collectionCheck.exists) {
494
+ // Collection doesn't exist yet, nothing to clear
495
+ return;
309
496
  }
310
- const limit = options.limit || 100;
311
- const results = await this.client.scroll(this.collectionName, {
312
- filter,
313
- limit,
314
- with_payload: true,
315
- with_vector: false,
497
+ // Delete all points for this repository and branch/commit only
498
+ // This ensures we only clear the current branch's data, not all branches
499
+ await this.client.delete(this.collectionName, {
500
+ filter: {
501
+ must: [
502
+ { key: 'orgId', match: { value: this.orgId } },
503
+ { key: 'repoId', match: { value: this.repoId } },
504
+ { key: 'branch', match: { value: this.branch } },
505
+ { key: 'commitSha', match: { value: this.commitSha } },
506
+ ],
507
+ },
316
508
  });
317
- return (results.points || []).map(point => ({
318
- content: point.payload?.content || '',
319
- metadata: this.payloadMapper.fromPayload(point.payload || {}),
320
- score: 0,
321
- relevance: 'not_relevant',
322
- }));
323
509
  }
324
510
  catch (error) {
325
- throw new DatabaseError(`Failed to query symbols in Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
511
+ throw new DatabaseError(`Failed to clear Qdrant collection: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
326
512
  }
327
513
  }
328
- async clear() {
514
+ /**
515
+ * Clear all data for a specific branch (all commits).
516
+ *
517
+ * Qdrant-only helper: this is not part of the generic VectorDBInterface and
518
+ * is intended for cloud/PR workflows where multiple commits exist per branch.
519
+ * LanceDB and other backends do not implement this method.
520
+ *
521
+ * @param branch - Branch name to clear (defaults to current branch)
522
+ */
523
+ async clearBranch(branch) {
329
524
  if (!this.initialized) {
330
525
  throw new DatabaseError('Qdrant database not initialized');
331
526
  }
527
+ const targetBranch = branch ?? this.branch;
332
528
  try {
333
- // Check if collection exists before trying to clear it (returns { exists: boolean })
334
529
  const collectionCheck = await this.client.collectionExists(this.collectionName);
335
530
  if (!collectionCheck.exists) {
336
531
  // Collection doesn't exist yet, nothing to clear
337
532
  return;
338
533
  }
339
- // Delete all points for this repository only (filter by both orgId and repoId)
340
- // This ensures we only clear the current repo's data, not all repos in the org
534
+ // Delete all points for this repository and branch (all commits)
341
535
  await this.client.delete(this.collectionName, {
342
536
  filter: {
343
537
  must: [
344
538
  { key: 'orgId', match: { value: this.orgId } },
345
539
  { key: 'repoId', match: { value: this.repoId } },
540
+ { key: 'branch', match: { value: targetBranch } },
346
541
  ],
347
542
  },
348
543
  });
349
544
  }
350
545
  catch (error) {
351
- throw new DatabaseError(`Failed to clear Qdrant collection: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName });
546
+ throw new DatabaseError(`Failed to clear branch from Qdrant: ${error instanceof Error ? error.message : String(error)}`, { collectionName: this.collectionName, branch: targetBranch });
352
547
  }
353
548
  }
354
549
  async deleteByFile(filepath) {
@@ -361,6 +556,8 @@ export class QdrantDB {
361
556
  must: [
362
557
  { key: 'orgId', match: { value: this.orgId } },
363
558
  { key: 'repoId', match: { value: this.repoId } },
559
+ { key: 'branch', match: { value: this.branch } },
560
+ { key: 'commitSha', match: { value: this.commitSha } },
364
561
  { key: 'file', match: { value: filepath } },
365
562
  ],
366
563
  },