@mhalder/qdrant-mcp-server 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,569 @@
1
+ /**
2
+ * Federated and contextual search tools registration
3
+ *
4
+ * Provides advanced search capabilities:
5
+ * - contextual_search: Combined git + code search for a single repository
6
+ * - federated_search: Search across multiple indexed repositories
7
+ */
8
+
9
+ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
10
+ import type { CodeIndexer } from "../code/indexer.js";
11
+ import type { CodeSearchResult } from "../code/types.js";
12
+ import type { GitHistoryIndexer } from "../git/indexer.js";
13
+ import type { GitSearchResult } from "../git/types.js";
14
+ import * as schemas from "./schemas.js";
15
+
16
+ // ============================================================================
17
+ // Types
18
+ // ============================================================================
19
+
20
+ export interface FederatedToolDependencies {
21
+ codeIndexer: CodeIndexer;
22
+ gitHistoryIndexer: GitHistoryIndexer;
23
+ }
24
+
25
+ /**
26
+ * Links a code chunk to commits that modified the file
27
+ */
28
+ export interface CodeCommitCorrelation {
29
+ codeResult: CodeSearchResult;
30
+ relatedCommits: Array<{
31
+ shortHash: string;
32
+ subject: string;
33
+ author: string;
34
+ date: string;
35
+ }>;
36
+ }
37
+
38
+ /**
39
+ * Combined code + git search results with correlations
40
+ */
41
+ export interface ContextualSearchResult {
42
+ codeResults: CodeSearchResult[];
43
+ gitResults: GitSearchResult[];
44
+ correlations: CodeCommitCorrelation[];
45
+ metadata: {
46
+ path: string;
47
+ query: string;
48
+ codeResultCount: number;
49
+ gitResultCount: number;
50
+ correlationCount: number;
51
+ };
52
+ }
53
+
54
+ /**
55
+ * Result with repository attribution
56
+ */
57
+ export type FederatedResult =
58
+ | (CodeSearchResult & { resultType: "code"; repoPath: string })
59
+ | (GitSearchResult & { resultType: "git"; repoPath: string });
60
+
61
+ /**
62
+ * Federated search response with results and metadata
63
+ */
64
+ export interface FederatedSearchResponse {
65
+ results: FederatedResult[];
66
+ metadata: {
67
+ query: string;
68
+ searchType: "code" | "git" | "both";
69
+ repositoriesSearched: string[];
70
+ totalResults: number;
71
+ };
72
+ }
73
+
74
+ // ============================================================================
75
+ // Helper Functions
76
+ // ============================================================================
77
+
78
+ /**
79
+ * Build correlations between code results and git history
80
+ * Links code chunks to commits that modified the same file
81
+ */
82
+ export function buildCorrelations(
83
+ codeResults: CodeSearchResult[],
84
+ gitResults: GitSearchResult[],
85
+ ): CodeCommitCorrelation[] {
86
+ const correlations: CodeCommitCorrelation[] = [];
87
+
88
+ for (const codeResult of codeResults) {
89
+ const relatedCommits: CodeCommitCorrelation["relatedCommits"] = [];
90
+
91
+ // Find commits that modified this file
92
+ for (const gitResult of gitResults) {
93
+ // Check if any file in the commit matches the code result's file path
94
+ const matchesFile = gitResult.files.some((file) =>
95
+ pathsMatch(codeResult.filePath, file),
96
+ );
97
+
98
+ if (matchesFile) {
99
+ relatedCommits.push({
100
+ shortHash: gitResult.shortHash,
101
+ subject: gitResult.subject,
102
+ author: gitResult.author,
103
+ date: gitResult.date,
104
+ });
105
+ }
106
+ }
107
+
108
+ if (relatedCommits.length > 0) {
109
+ correlations.push({
110
+ codeResult,
111
+ relatedCommits,
112
+ });
113
+ }
114
+ }
115
+
116
+ return correlations;
117
+ }
118
+
119
+ /**
120
+ * Normalize a file path for comparison
121
+ */
122
+ function normalizePath(path: string): string {
123
+ return path.replace(/\\/g, "/").toLowerCase();
124
+ }
125
+
126
+ /**
127
+ * Check if two paths refer to the same file by comparing path segments from the end.
128
+ * This handles relative vs absolute paths while avoiding false positives.
129
+ *
130
+ * Examples:
131
+ * - "app/models/user.ts" vs "models/user.ts" → true (suffix match)
132
+ * - "app/models/user.ts" vs "lib/user.ts" → false (different parent dir)
133
+ * - "src/user.ts" vs "user.ts" → true (suffix match)
134
+ */
135
+ export function pathsMatch(path1: string, path2: string): boolean {
136
+ const segments1 = normalizePath(path1).split("/").filter(Boolean);
137
+ const segments2 = normalizePath(path2).split("/").filter(Boolean);
138
+
139
+ // Compare from the end - shorter path must be a suffix of longer path
140
+ const shorter = segments1.length <= segments2.length ? segments1 : segments2;
141
+ const longer = segments1.length <= segments2.length ? segments2 : segments1;
142
+
143
+ const offset = longer.length - shorter.length;
144
+ for (let i = 0; i < shorter.length; i++) {
145
+ if (shorter[i] !== longer[offset + i]) {
146
+ return false;
147
+ }
148
+ }
149
+
150
+ return shorter.length > 0;
151
+ }
152
+
153
+ /**
154
+ * Normalize scores to [0, 1] range using min-max normalization
155
+ */
156
+ export function normalizeScores<T extends { score: number }>(
157
+ results: T[],
158
+ ): T[] {
159
+ if (results.length === 0) return [];
160
+ if (results.length === 1) return results.map((r) => ({ ...r, score: 1 }));
161
+
162
+ const scores = results.map((r) => r.score);
163
+ const minScore = Math.min(...scores);
164
+ const maxScore = Math.max(...scores);
165
+
166
+ // If all scores are identical, normalize to 1
167
+ if (maxScore === minScore) {
168
+ return results.map((r) => ({ ...r, score: 1 }));
169
+ }
170
+
171
+ return results.map((r) => ({
172
+ ...r,
173
+ score: (r.score - minScore) / (maxScore - minScore),
174
+ }));
175
+ }
176
+
177
+ /**
178
+ * Calculate Reciprocal Rank Fusion score
179
+ * RRF formula: sum(1 / (k + rank)) where k=60 prevents high ranks from dominating
180
+ */
181
+ export function calculateRRFScore(ranks: number[]): number {
182
+ const k = 60;
183
+ return ranks.reduce((sum, rank) => sum + 1 / (k + rank), 0);
184
+ }
185
+
186
+ // ============================================================================
187
+ // Tool Implementations
188
+ // ============================================================================
189
+
190
+ /**
191
+ * Perform contextual search across code and git history
192
+ */
193
+ async function performContextualSearch(
194
+ codeIndexer: CodeIndexer,
195
+ gitHistoryIndexer: GitHistoryIndexer,
196
+ params: {
197
+ path: string;
198
+ query: string;
199
+ codeLimit?: number;
200
+ gitLimit?: number;
201
+ correlate?: boolean;
202
+ },
203
+ ): Promise<ContextualSearchResult> {
204
+ const { path, query, codeLimit = 5, gitLimit = 5, correlate = true } = params;
205
+
206
+ // Validate both indexes exist
207
+ const [codeStatus, gitStatus] = await Promise.all([
208
+ codeIndexer.getIndexStatus(path),
209
+ gitHistoryIndexer.getIndexStatus(path),
210
+ ]);
211
+
212
+ if (codeStatus.status !== "indexed") {
213
+ throw new Error(
214
+ `Code index not found for "${path}". Run index_codebase first.`,
215
+ );
216
+ }
217
+
218
+ if (gitStatus.status !== "indexed") {
219
+ throw new Error(
220
+ `Git history index not found for "${path}". Run index_git_history first.`,
221
+ );
222
+ }
223
+
224
+ // Execute searches in parallel
225
+ const [codeResults, gitResults] = await Promise.all([
226
+ codeIndexer.searchCode(path, query, { limit: codeLimit }),
227
+ gitHistoryIndexer.searchHistory(path, query, { limit: gitLimit }),
228
+ ]);
229
+
230
+ // Build correlations if requested
231
+ const correlations = correlate
232
+ ? buildCorrelations(codeResults, gitResults)
233
+ : [];
234
+
235
+ return {
236
+ codeResults,
237
+ gitResults,
238
+ correlations,
239
+ metadata: {
240
+ path,
241
+ query,
242
+ codeResultCount: codeResults.length,
243
+ gitResultCount: gitResults.length,
244
+ correlationCount: correlations.length,
245
+ },
246
+ };
247
+ }
248
+
249
+ /**
250
+ * Perform federated search across multiple repositories
251
+ */
252
+ async function performFederatedSearch(
253
+ codeIndexer: CodeIndexer,
254
+ gitHistoryIndexer: GitHistoryIndexer,
255
+ params: {
256
+ paths: string[];
257
+ query: string;
258
+ searchType?: "code" | "git" | "both";
259
+ limit?: number;
260
+ },
261
+ ): Promise<FederatedSearchResponse> {
262
+ const { paths, query, searchType = "both", limit = 20 } = params;
263
+
264
+ // Fail-fast validation: check all paths are indexed
265
+ const validationPromises = paths.map(async (path) => {
266
+ const errors: string[] = [];
267
+
268
+ if (searchType === "code" || searchType === "both") {
269
+ const codeStatus = await codeIndexer.getIndexStatus(path);
270
+ if (codeStatus.status !== "indexed") {
271
+ errors.push(`Code index not found for "${path}"`);
272
+ }
273
+ }
274
+
275
+ if (searchType === "git" || searchType === "both") {
276
+ const gitStatus = await gitHistoryIndexer.getIndexStatus(path);
277
+ if (gitStatus.status !== "indexed") {
278
+ errors.push(`Git history index not found for "${path}"`);
279
+ }
280
+ }
281
+
282
+ return { path, errors };
283
+ });
284
+
285
+ const validationResults = await Promise.all(validationPromises);
286
+ const allErrors = validationResults.flatMap((v) => v.errors);
287
+
288
+ if (allErrors.length > 0) {
289
+ throw new Error(`Index validation failed:\n${allErrors.join("\n")}`);
290
+ }
291
+
292
+ // Parallel search across all repositories
293
+ const searchPromises: Promise<FederatedResult[]>[] = [];
294
+
295
+ for (const path of paths) {
296
+ if (searchType === "code" || searchType === "both") {
297
+ searchPromises.push(
298
+ codeIndexer
299
+ .searchCode(path, query, { limit: Math.ceil(limit / paths.length) })
300
+ .then((results) =>
301
+ results.map((r) => ({
302
+ ...r,
303
+ resultType: "code" as const,
304
+ repoPath: path,
305
+ })),
306
+ ),
307
+ );
308
+ }
309
+
310
+ if (searchType === "git" || searchType === "both") {
311
+ searchPromises.push(
312
+ gitHistoryIndexer
313
+ .searchHistory(path, query, {
314
+ limit: Math.ceil(limit / paths.length),
315
+ })
316
+ .then((results) =>
317
+ results.map((r) => ({
318
+ ...r,
319
+ resultType: "git" as const,
320
+ repoPath: path,
321
+ })),
322
+ ),
323
+ );
324
+ }
325
+ }
326
+
327
+ const searchResults = await Promise.all(searchPromises);
328
+ const allResults = searchResults.flat();
329
+
330
+ // Normalize scores per result type to ensure fair comparison
331
+ const codeResults = allResults.filter(
332
+ (r): r is FederatedResult & { resultType: "code" } =>
333
+ r.resultType === "code",
334
+ );
335
+ const gitResults = allResults.filter(
336
+ (r): r is FederatedResult & { resultType: "git" } => r.resultType === "git",
337
+ );
338
+
339
+ const normalizedCode = normalizeScores(codeResults);
340
+ const normalizedGit = normalizeScores(gitResults);
341
+ const normalizedResults = [...normalizedCode, ...normalizedGit];
342
+
343
+ // Apply RRF ranking
344
+ // Rank within each repo+type group for fair cross-repo interleaving
345
+ // This ensures top results from each repo get similar RRF scores
346
+ const groupedResults = new Map<string, FederatedResult[]>();
347
+ for (const result of normalizedResults) {
348
+ const key = `${result.repoPath}:${result.resultType}`;
349
+ if (!groupedResults.has(key)) {
350
+ groupedResults.set(key, []);
351
+ }
352
+ groupedResults.get(key)!.push(result);
353
+ }
354
+
355
+ // Sort each group by score and create rank lookup
356
+ const rankLookup = new Map<FederatedResult, number>();
357
+ for (const group of groupedResults.values()) {
358
+ group.sort((a, b) => b.score - a.score);
359
+ group.forEach((result, index) => {
360
+ rankLookup.set(result, index + 1);
361
+ });
362
+ }
363
+
364
+ // Calculate RRF scores based on per-repo ranks
365
+ const rankedResults = normalizedResults.map((result) => {
366
+ const rank = rankLookup.get(result) ?? 1;
367
+ return {
368
+ result,
369
+ rank,
370
+ rrfScore: calculateRRFScore([rank]),
371
+ };
372
+ });
373
+
374
+ // Sort by RRF score (higher is better)
375
+ rankedResults.sort((a, b) => b.rrfScore - a.rrfScore);
376
+
377
+ // Return top results up to limit
378
+ const topResults = rankedResults.slice(0, limit).map((r) => r.result);
379
+
380
+ return {
381
+ results: topResults,
382
+ metadata: {
383
+ query,
384
+ searchType,
385
+ repositoriesSearched: paths,
386
+ totalResults: topResults.length,
387
+ },
388
+ };
389
+ }
390
+
391
+ // ============================================================================
392
+ // Tool Registration
393
+ // ============================================================================
394
+
395
+ /**
396
+ * Register federated search tools on the MCP server
397
+ */
398
+ export function registerFederatedTools(
399
+ server: McpServer,
400
+ deps: FederatedToolDependencies,
401
+ ): void {
402
+ const { codeIndexer, gitHistoryIndexer } = deps;
403
+
404
+ // contextual_search
405
+ server.registerTool(
406
+ "contextual_search",
407
+ {
408
+ title: "Contextual Search",
409
+ description:
410
+ "Combined semantic search across code and git history for a single repository. " +
411
+ "Returns code chunks, relevant commits, and correlations showing which commits " +
412
+ "modified which files. Useful for understanding code evolution and finding related changes.",
413
+ inputSchema: schemas.ContextualSearchSchema,
414
+ },
415
+ async ({ path, query, codeLimit, gitLimit, correlate }) => {
416
+ try {
417
+ const result = await performContextualSearch(
418
+ codeIndexer,
419
+ gitHistoryIndexer,
420
+ { path, query, codeLimit, gitLimit, correlate },
421
+ );
422
+
423
+ // Format output
424
+ const sections: string[] = [];
425
+
426
+ // Code results section
427
+ if (result.codeResults.length > 0) {
428
+ sections.push("## Code Results\n");
429
+ result.codeResults.forEach((r, idx) => {
430
+ sections.push(
431
+ `### ${idx + 1}. ${r.filePath}:${r.startLine}-${r.endLine} (score: ${r.score.toFixed(3)})\n` +
432
+ `Language: ${r.language}\n` +
433
+ "```" +
434
+ r.language +
435
+ "\n" +
436
+ r.content +
437
+ "\n```\n",
438
+ );
439
+ });
440
+ }
441
+
442
+ // Git results section
443
+ if (result.gitResults.length > 0) {
444
+ sections.push("\n## Git History Results\n");
445
+ result.gitResults.forEach((r, idx) => {
446
+ sections.push(
447
+ `### ${idx + 1}. ${r.shortHash} - ${r.subject} (score: ${r.score.toFixed(3)})\n` +
448
+ `Author: ${r.author} | Date: ${r.date} | Type: ${r.commitType}\n` +
449
+ `Files: ${r.files.slice(0, 5).join(", ")}${r.files.length > 5 ? ` (+${r.files.length - 5} more)` : ""}\n`,
450
+ );
451
+ });
452
+ }
453
+
454
+ // Correlations section
455
+ if (result.correlations.length > 0) {
456
+ sections.push("\n## Correlations (Code ↔ Commits)\n");
457
+ result.correlations.forEach((c) => {
458
+ const commits = c.relatedCommits
459
+ .slice(0, 3)
460
+ .map((commit) => ` - ${commit.shortHash}: ${commit.subject}`)
461
+ .join("\n");
462
+ sections.push(
463
+ `**${c.codeResult.filePath}:${c.codeResult.startLine}** modified by:\n${commits}\n`,
464
+ );
465
+ });
466
+ }
467
+
468
+ // Summary
469
+ const summary =
470
+ `\n---\nFound ${result.metadata.codeResultCount} code result(s), ` +
471
+ `${result.metadata.gitResultCount} git result(s), ` +
472
+ `${result.metadata.correlationCount} correlation(s).`;
473
+ sections.push(summary);
474
+
475
+ return {
476
+ content: [{ type: "text", text: sections.join("\n") }],
477
+ };
478
+ } catch (error) {
479
+ return {
480
+ content: [
481
+ {
482
+ type: "text",
483
+ text: `Error: ${error instanceof Error ? error.message : String(error)}`,
484
+ },
485
+ ],
486
+ isError: true,
487
+ };
488
+ }
489
+ },
490
+ );
491
+
492
+ // federated_search
493
+ server.registerTool(
494
+ "federated_search",
495
+ {
496
+ title: "Federated Search",
497
+ description:
498
+ "Search across multiple indexed repositories simultaneously. " +
499
+ "Combines and ranks results using Reciprocal Rank Fusion (RRF) for fair cross-repository comparison. " +
500
+ "Supports code-only, git-only, or combined search modes.",
501
+ inputSchema: schemas.FederatedSearchSchema,
502
+ },
503
+ async ({ paths, query, searchType, limit }) => {
504
+ try {
505
+ const response = await performFederatedSearch(
506
+ codeIndexer,
507
+ gitHistoryIndexer,
508
+ { paths, query, searchType, limit },
509
+ );
510
+
511
+ if (response.results.length === 0) {
512
+ return {
513
+ content: [
514
+ {
515
+ type: "text",
516
+ text: `No results found for query "${query}" across ${paths.length} repository(ies).`,
517
+ },
518
+ ],
519
+ };
520
+ }
521
+
522
+ // Format results
523
+ const sections: string[] = [
524
+ `# Federated Search Results\n` +
525
+ `Query: "${query}" | Type: ${response.metadata.searchType} | ` +
526
+ `Repositories: ${response.metadata.repositoriesSearched.length}\n`,
527
+ ];
528
+
529
+ response.results.forEach((r, idx) => {
530
+ if (r.resultType === "code") {
531
+ sections.push(
532
+ `## ${idx + 1}. [CODE] ${r.filePath}:${r.startLine}-${r.endLine}\n` +
533
+ `Repository: ${r.repoPath} | Language: ${r.language} | Score: ${r.score.toFixed(3)}\n` +
534
+ "```" +
535
+ r.language +
536
+ "\n" +
537
+ r.content +
538
+ "\n```\n",
539
+ );
540
+ } else {
541
+ sections.push(
542
+ `## ${idx + 1}. [GIT] ${r.shortHash} - ${r.subject}\n` +
543
+ `Repository: ${r.repoPath} | Author: ${r.author} | Date: ${r.date} | Score: ${r.score.toFixed(3)}\n` +
544
+ `Type: ${r.commitType} | Files: ${r.files.slice(0, 3).join(", ")}${r.files.length > 3 ? ` (+${r.files.length - 3} more)` : ""}\n`,
545
+ );
546
+ }
547
+ });
548
+
549
+ sections.push(
550
+ `\n---\nTotal: ${response.metadata.totalResults} result(s) from ${response.metadata.repositoriesSearched.length} repository(ies).`,
551
+ );
552
+
553
+ return {
554
+ content: [{ type: "text", text: sections.join("\n") }],
555
+ };
556
+ } catch (error) {
557
+ return {
558
+ content: [
559
+ {
560
+ type: "text",
561
+ text: `Error: ${error instanceof Error ? error.message : String(error)}`,
562
+ },
563
+ ],
564
+ isError: true,
565
+ };
566
+ }
567
+ },
568
+ );
569
+ }
@@ -10,6 +10,7 @@ import type { QdrantManager } from "../qdrant/client.js";
10
10
  import { registerCodeTools } from "./code.js";
11
11
  import { registerCollectionTools } from "./collection.js";
12
12
  import { registerDocumentTools } from "./document.js";
13
+ import { registerFederatedTools } from "./federated.js";
13
14
  import { registerGitHistoryTools } from "./git-history.js";
14
15
  import { registerSearchTools } from "./search.js";
15
16
 
@@ -49,6 +50,11 @@ export function registerAllTools(
49
50
  registerGitHistoryTools(server, {
50
51
  gitHistoryIndexer: deps.gitHistoryIndexer,
51
52
  });
53
+
54
+ registerFederatedTools(server, {
55
+ codeIndexer: deps.codeIndexer,
56
+ gitHistoryIndexer: deps.gitHistoryIndexer,
57
+ });
52
58
  }
53
59
 
54
60
  // Re-export schemas for external use
@@ -203,3 +203,42 @@ export const GetGitIndexStatusSchema = {
203
203
  export const ClearGitIndexSchema = {
204
204
  path: z.string().describe("Path to git repository"),
205
205
  };
206
+
207
+ // Contextual Search - Combined git + code search
208
+ export const ContextualSearchSchema = {
209
+ path: z
210
+ .string()
211
+ .describe(
212
+ "Path to git repository (must be indexed for both code and git history)",
213
+ ),
214
+ query: z.string().describe("Natural language search query"),
215
+ codeLimit: z
216
+ .number()
217
+ .optional()
218
+ .describe("Maximum number of code results (default: 5)"),
219
+ gitLimit: z
220
+ .number()
221
+ .optional()
222
+ .describe("Maximum number of git history results (default: 5)"),
223
+ correlate: z
224
+ .boolean()
225
+ .optional()
226
+ .describe("Link code chunks to commits that modified them (default: true)"),
227
+ };
228
+
229
+ // Federated Search - Multi-repository search
230
+ export const FederatedSearchSchema = {
231
+ paths: z
232
+ .array(z.string())
233
+ .min(1)
234
+ .describe("Array of repository paths to search (must all be indexed)"),
235
+ query: z.string().describe("Natural language search query"),
236
+ searchType: z
237
+ .enum(["code", "git", "both"])
238
+ .optional()
239
+ .describe("Type of search (default: both)"),
240
+ limit: z
241
+ .number()
242
+ .optional()
243
+ .describe("Total maximum results across all repositories (default: 20)"),
244
+ };