@mhalder/qdrant-mcp-server 1.1.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +36 -0
- package/biome.json +34 -0
- package/build/embeddings/sparse.d.ts +40 -0
- package/build/embeddings/sparse.d.ts.map +1 -0
- package/build/embeddings/sparse.js +105 -0
- package/build/embeddings/sparse.js.map +1 -0
- package/build/embeddings/sparse.test.d.ts +2 -0
- package/build/embeddings/sparse.test.d.ts.map +1 -0
- package/build/embeddings/sparse.test.js +69 -0
- package/build/embeddings/sparse.test.js.map +1 -0
- package/build/index.js +333 -32
- package/build/index.js.map +1 -1
- package/build/qdrant/client.d.ts +21 -2
- package/build/qdrant/client.d.ts.map +1 -1
- package/build/qdrant/client.js +131 -17
- package/build/qdrant/client.js.map +1 -1
- package/build/qdrant/client.test.js +429 -21
- package/build/qdrant/client.test.js.map +1 -1
- package/build/transport.test.d.ts +2 -0
- package/build/transport.test.d.ts.map +1 -0
- package/build/transport.test.js +168 -0
- package/build/transport.test.js.map +1 -0
- package/examples/README.md +16 -1
- package/examples/basic/README.md +1 -0
- package/examples/hybrid-search/README.md +236 -0
- package/package.json +3 -1
- package/src/embeddings/sparse.test.ts +87 -0
- package/src/embeddings/sparse.ts +127 -0
- package/src/index.ts +393 -59
- package/src/qdrant/client.test.ts +544 -56
- package/src/qdrant/client.ts +162 -22
- package/src/transport.test.ts +202 -0
- package/vitest.config.ts +3 -3
package/src/index.ts
CHANGED
|
@@ -1,31 +1,43 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
import { dirname, join } from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
3
6
|
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
4
7
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
8
|
+
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
5
9
|
import {
|
|
6
10
|
CallToolRequestSchema,
|
|
7
11
|
ListResourcesRequestSchema,
|
|
8
12
|
ListToolsRequestSchema,
|
|
9
13
|
ReadResourceRequestSchema,
|
|
10
14
|
} from "@modelcontextprotocol/sdk/types.js";
|
|
11
|
-
import
|
|
12
|
-
import
|
|
15
|
+
import Bottleneck from "bottleneck";
|
|
16
|
+
import express from "express";
|
|
13
17
|
import { z } from "zod";
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
18
|
+
import { EmbeddingProviderFactory } from "./embeddings/factory.js";
|
|
19
|
+
import { BM25SparseVectorGenerator } from "./embeddings/sparse.js";
|
|
20
|
+
import { QdrantManager } from "./qdrant/client.js";
|
|
17
21
|
|
|
18
22
|
// Read package.json for version
|
|
19
23
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
20
|
-
const pkg = JSON.parse(
|
|
21
|
-
readFileSync(join(__dirname, "../package.json"), "utf-8"),
|
|
22
|
-
);
|
|
24
|
+
const pkg = JSON.parse(readFileSync(join(__dirname, "../package.json"), "utf-8"));
|
|
23
25
|
|
|
24
26
|
// Validate environment variables
|
|
25
27
|
const QDRANT_URL = process.env.QDRANT_URL || "http://localhost:6333";
|
|
26
|
-
const EMBEDDING_PROVIDER = (
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
const EMBEDDING_PROVIDER = (process.env.EMBEDDING_PROVIDER || "ollama").toLowerCase();
|
|
29
|
+
const TRANSPORT_MODE = (process.env.TRANSPORT_MODE || "stdio").toLowerCase();
|
|
30
|
+
const HTTP_PORT = parseInt(process.env.HTTP_PORT || "3000", 10);
|
|
31
|
+
|
|
32
|
+
// Validate HTTP_PORT when HTTP mode is selected
|
|
33
|
+
if (TRANSPORT_MODE === "http") {
|
|
34
|
+
if (Number.isNaN(HTTP_PORT) || HTTP_PORT < 1 || HTTP_PORT > 65535) {
|
|
35
|
+
console.error(
|
|
36
|
+
`Error: Invalid HTTP_PORT "${process.env.HTTP_PORT}". Must be a number between 1 and 65535.`
|
|
37
|
+
);
|
|
38
|
+
process.exit(1);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
29
41
|
|
|
30
42
|
// Check for required API keys based on provider
|
|
31
43
|
if (EMBEDDING_PROVIDER !== "ollama") {
|
|
@@ -47,15 +59,13 @@ if (EMBEDDING_PROVIDER !== "ollama") {
|
|
|
47
59
|
break;
|
|
48
60
|
default:
|
|
49
61
|
console.error(
|
|
50
|
-
`Error: Unknown embedding provider "${EMBEDDING_PROVIDER}". Supported providers: openai, cohere, voyage, ollama
|
|
62
|
+
`Error: Unknown embedding provider "${EMBEDDING_PROVIDER}". Supported providers: openai, cohere, voyage, ollama.`
|
|
51
63
|
);
|
|
52
64
|
process.exit(1);
|
|
53
65
|
}
|
|
54
66
|
|
|
55
67
|
if (!apiKey) {
|
|
56
|
-
console.error(
|
|
57
|
-
`Error: ${requiredKeyName} is required for ${EMBEDDING_PROVIDER} provider.`,
|
|
58
|
-
);
|
|
68
|
+
console.error(`Error: ${requiredKeyName} is required for ${EMBEDDING_PROVIDER} provider.`);
|
|
59
69
|
process.exit(1);
|
|
60
70
|
}
|
|
61
71
|
}
|
|
@@ -64,8 +74,7 @@ if (EMBEDDING_PROVIDER !== "ollama") {
|
|
|
64
74
|
async function checkOllamaAvailability() {
|
|
65
75
|
if (EMBEDDING_PROVIDER === "ollama") {
|
|
66
76
|
const baseUrl = process.env.EMBEDDING_BASE_URL || "http://localhost:11434";
|
|
67
|
-
const isLocalhost =
|
|
68
|
-
baseUrl.includes("localhost") || baseUrl.includes("127.0.0.1");
|
|
77
|
+
const isLocalhost = baseUrl.includes("localhost") || baseUrl.includes("127.0.0.1");
|
|
69
78
|
|
|
70
79
|
try {
|
|
71
80
|
const response = await fetch(`${baseUrl}/api/version`);
|
|
@@ -78,7 +87,7 @@ async function checkOllamaAvailability() {
|
|
|
78
87
|
const { models } = await tagsResponse.json();
|
|
79
88
|
const modelName = process.env.EMBEDDING_MODEL || "nomic-embed-text";
|
|
80
89
|
const modelExists = models.some(
|
|
81
|
-
(m: any) => m.name === modelName || m.name.startsWith(`${modelName}:`)
|
|
90
|
+
(m: any) => m.name === modelName || m.name.startsWith(`${modelName}:`)
|
|
82
91
|
);
|
|
83
92
|
|
|
84
93
|
if (!modelExists) {
|
|
@@ -141,7 +150,7 @@ const server = new Server(
|
|
|
141
150
|
tools: {},
|
|
142
151
|
resources: {},
|
|
143
152
|
},
|
|
144
|
-
}
|
|
153
|
+
}
|
|
145
154
|
);
|
|
146
155
|
|
|
147
156
|
// Tool schemas
|
|
@@ -151,6 +160,10 @@ const CreateCollectionSchema = z.object({
|
|
|
151
160
|
.enum(["Cosine", "Euclid", "Dot"])
|
|
152
161
|
.optional()
|
|
153
162
|
.describe("Distance metric (default: Cosine)"),
|
|
163
|
+
enableHybrid: z
|
|
164
|
+
.boolean()
|
|
165
|
+
.optional()
|
|
166
|
+
.describe("Enable hybrid search with sparse vectors (default: false)"),
|
|
154
167
|
});
|
|
155
168
|
|
|
156
169
|
const AddDocumentsSchema = z.object({
|
|
@@ -158,15 +171,13 @@ const AddDocumentsSchema = z.object({
|
|
|
158
171
|
documents: z
|
|
159
172
|
.array(
|
|
160
173
|
z.object({
|
|
161
|
-
id: z
|
|
162
|
-
.union([z.string(), z.number()])
|
|
163
|
-
.describe("Unique identifier for the document"),
|
|
174
|
+
id: z.union([z.string(), z.number()]).describe("Unique identifier for the document"),
|
|
164
175
|
text: z.string().describe("Text content to embed and store"),
|
|
165
176
|
metadata: z
|
|
166
177
|
.record(z.any())
|
|
167
178
|
.optional()
|
|
168
179
|
.describe("Optional metadata to store with the document"),
|
|
169
|
-
})
|
|
180
|
+
})
|
|
170
181
|
)
|
|
171
182
|
.describe("Array of documents to add"),
|
|
172
183
|
});
|
|
@@ -174,10 +185,7 @@ const AddDocumentsSchema = z.object({
|
|
|
174
185
|
const SemanticSearchSchema = z.object({
|
|
175
186
|
collection: z.string().describe("Name of the collection to search"),
|
|
176
187
|
query: z.string().describe("Search query text"),
|
|
177
|
-
limit: z
|
|
178
|
-
.number()
|
|
179
|
-
.optional()
|
|
180
|
-
.describe("Maximum number of results (default: 5)"),
|
|
188
|
+
limit: z.number().optional().describe("Maximum number of results (default: 5)"),
|
|
181
189
|
filter: z.record(z.any()).optional().describe("Optional metadata filter"),
|
|
182
190
|
});
|
|
183
191
|
|
|
@@ -191,9 +199,14 @@ const GetCollectionInfoSchema = z.object({
|
|
|
191
199
|
|
|
192
200
|
const DeleteDocumentsSchema = z.object({
|
|
193
201
|
collection: z.string().describe("Name of the collection"),
|
|
194
|
-
ids: z
|
|
195
|
-
|
|
196
|
-
|
|
202
|
+
ids: z.array(z.union([z.string(), z.number()])).describe("Array of document IDs to delete"),
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
const HybridSearchSchema = z.object({
|
|
206
|
+
collection: z.string().describe("Name of the collection to search"),
|
|
207
|
+
query: z.string().describe("Search query text"),
|
|
208
|
+
limit: z.number().optional().describe("Maximum number of results (default: 5)"),
|
|
209
|
+
filter: z.record(z.any()).optional().describe("Optional metadata filter"),
|
|
197
210
|
});
|
|
198
211
|
|
|
199
212
|
// List available tools
|
|
@@ -203,7 +216,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
203
216
|
{
|
|
204
217
|
name: "create_collection",
|
|
205
218
|
description:
|
|
206
|
-
"Create a new vector collection in Qdrant. The collection will be configured with the embedding provider's dimensions automatically.",
|
|
219
|
+
"Create a new vector collection in Qdrant. The collection will be configured with the embedding provider's dimensions automatically. Set enableHybrid to true to enable hybrid search combining semantic and keyword search.",
|
|
207
220
|
inputSchema: {
|
|
208
221
|
type: "object",
|
|
209
222
|
properties: {
|
|
@@ -216,6 +229,10 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
216
229
|
enum: ["Cosine", "Euclid", "Dot"],
|
|
217
230
|
description: "Distance metric (default: Cosine)",
|
|
218
231
|
},
|
|
232
|
+
enableHybrid: {
|
|
233
|
+
type: "boolean",
|
|
234
|
+
description: "Enable hybrid search with sparse vectors (default: false)",
|
|
235
|
+
},
|
|
219
236
|
},
|
|
220
237
|
required: ["name"],
|
|
221
238
|
},
|
|
@@ -323,8 +340,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
323
340
|
},
|
|
324
341
|
{
|
|
325
342
|
name: "delete_documents",
|
|
326
|
-
description:
|
|
327
|
-
"Delete specific documents from a collection by their IDs.",
|
|
343
|
+
description: "Delete specific documents from a collection by their IDs.",
|
|
328
344
|
inputSchema: {
|
|
329
345
|
type: "object",
|
|
330
346
|
properties: {
|
|
@@ -343,6 +359,33 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
343
359
|
required: ["collection", "ids"],
|
|
344
360
|
},
|
|
345
361
|
},
|
|
362
|
+
{
|
|
363
|
+
name: "hybrid_search",
|
|
364
|
+
description:
|
|
365
|
+
"Perform hybrid search combining semantic vector search with keyword search using BM25. This provides better results by combining the strengths of both approaches. The collection must be created with enableHybrid set to true.",
|
|
366
|
+
inputSchema: {
|
|
367
|
+
type: "object",
|
|
368
|
+
properties: {
|
|
369
|
+
collection: {
|
|
370
|
+
type: "string",
|
|
371
|
+
description: "Name of the collection to search",
|
|
372
|
+
},
|
|
373
|
+
query: {
|
|
374
|
+
type: "string",
|
|
375
|
+
description: "Search query text",
|
|
376
|
+
},
|
|
377
|
+
limit: {
|
|
378
|
+
type: "number",
|
|
379
|
+
description: "Maximum number of results (default: 5)",
|
|
380
|
+
},
|
|
381
|
+
filter: {
|
|
382
|
+
type: "object",
|
|
383
|
+
description: "Optional metadata filter",
|
|
384
|
+
},
|
|
385
|
+
},
|
|
386
|
+
required: ["collection", "query"],
|
|
387
|
+
},
|
|
388
|
+
},
|
|
346
389
|
],
|
|
347
390
|
};
|
|
348
391
|
});
|
|
@@ -354,14 +397,20 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
354
397
|
try {
|
|
355
398
|
switch (name) {
|
|
356
399
|
case "create_collection": {
|
|
357
|
-
const { name, distance } = CreateCollectionSchema.parse(args);
|
|
400
|
+
const { name, distance, enableHybrid } = CreateCollectionSchema.parse(args);
|
|
358
401
|
const vectorSize = embeddings.getDimensions();
|
|
359
|
-
await qdrant.createCollection(name, vectorSize, distance);
|
|
402
|
+
await qdrant.createCollection(name, vectorSize, distance, enableHybrid || false);
|
|
403
|
+
|
|
404
|
+
let message = `Collection "${name}" created successfully with ${vectorSize} dimensions and ${distance || "Cosine"} distance metric.`;
|
|
405
|
+
if (enableHybrid) {
|
|
406
|
+
message += " Hybrid search is enabled for this collection.";
|
|
407
|
+
}
|
|
408
|
+
|
|
360
409
|
return {
|
|
361
410
|
content: [
|
|
362
411
|
{
|
|
363
412
|
type: "text",
|
|
364
|
-
text:
|
|
413
|
+
text: message,
|
|
365
414
|
},
|
|
366
415
|
],
|
|
367
416
|
};
|
|
@@ -370,7 +419,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
370
419
|
case "add_documents": {
|
|
371
420
|
const { collection, documents } = AddDocumentsSchema.parse(args);
|
|
372
421
|
|
|
373
|
-
// Check if collection exists
|
|
422
|
+
// Check if collection exists and get info
|
|
374
423
|
const exists = await qdrant.collectionExists(collection);
|
|
375
424
|
if (!exists) {
|
|
376
425
|
return {
|
|
@@ -384,21 +433,41 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
384
433
|
};
|
|
385
434
|
}
|
|
386
435
|
|
|
436
|
+
const collectionInfo = await qdrant.getCollectionInfo(collection);
|
|
437
|
+
|
|
387
438
|
// Generate embeddings for all documents
|
|
388
439
|
const texts = documents.map((doc) => doc.text);
|
|
389
440
|
const embeddingResults = await embeddings.embedBatch(texts);
|
|
390
441
|
|
|
391
|
-
//
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
442
|
+
// If hybrid search is enabled, generate sparse vectors and use appropriate method
|
|
443
|
+
if (collectionInfo.hybridEnabled) {
|
|
444
|
+
const sparseGenerator = new BM25SparseVectorGenerator();
|
|
445
|
+
|
|
446
|
+
// Prepare points with both dense and sparse vectors
|
|
447
|
+
const points = documents.map((doc, index) => ({
|
|
448
|
+
id: doc.id,
|
|
449
|
+
vector: embeddingResults[index].embedding,
|
|
450
|
+
sparseVector: sparseGenerator.generate(doc.text),
|
|
451
|
+
payload: {
|
|
452
|
+
text: doc.text,
|
|
453
|
+
...doc.metadata,
|
|
454
|
+
},
|
|
455
|
+
}));
|
|
456
|
+
|
|
457
|
+
await qdrant.addPointsWithSparse(collection, points);
|
|
458
|
+
} else {
|
|
459
|
+
// Standard dense-only vectors
|
|
460
|
+
const points = documents.map((doc, index) => ({
|
|
461
|
+
id: doc.id,
|
|
462
|
+
vector: embeddingResults[index].embedding,
|
|
463
|
+
payload: {
|
|
464
|
+
text: doc.text,
|
|
465
|
+
...doc.metadata,
|
|
466
|
+
},
|
|
467
|
+
}));
|
|
400
468
|
|
|
401
|
-
|
|
469
|
+
await qdrant.addPoints(collection, points);
|
|
470
|
+
}
|
|
402
471
|
|
|
403
472
|
return {
|
|
404
473
|
content: [
|
|
@@ -411,8 +480,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
411
480
|
}
|
|
412
481
|
|
|
413
482
|
case "semantic_search": {
|
|
414
|
-
const { collection, query, limit, filter } =
|
|
415
|
-
SemanticSearchSchema.parse(args);
|
|
483
|
+
const { collection, query, limit, filter } = SemanticSearchSchema.parse(args);
|
|
416
484
|
|
|
417
485
|
// Check if collection exists
|
|
418
486
|
const exists = await qdrant.collectionExists(collection);
|
|
@@ -432,12 +500,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
432
500
|
const { embedding } = await embeddings.embed(query);
|
|
433
501
|
|
|
434
502
|
// Search
|
|
435
|
-
const results = await qdrant.search(
|
|
436
|
-
collection,
|
|
437
|
-
embedding,
|
|
438
|
-
limit || 5,
|
|
439
|
-
filter,
|
|
440
|
-
);
|
|
503
|
+
const results = await qdrant.search(collection, embedding, limit || 5, filter);
|
|
441
504
|
|
|
442
505
|
return {
|
|
443
506
|
content: [
|
|
@@ -500,6 +563,63 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
500
563
|
};
|
|
501
564
|
}
|
|
502
565
|
|
|
566
|
+
case "hybrid_search": {
|
|
567
|
+
const { collection, query, limit, filter } = HybridSearchSchema.parse(args);
|
|
568
|
+
|
|
569
|
+
// Check if collection exists
|
|
570
|
+
const exists = await qdrant.collectionExists(collection);
|
|
571
|
+
if (!exists) {
|
|
572
|
+
return {
|
|
573
|
+
content: [
|
|
574
|
+
{
|
|
575
|
+
type: "text",
|
|
576
|
+
text: `Error: Collection "${collection}" does not exist.`,
|
|
577
|
+
},
|
|
578
|
+
],
|
|
579
|
+
isError: true,
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
// Check if collection has hybrid search enabled
|
|
584
|
+
const collectionInfo = await qdrant.getCollectionInfo(collection);
|
|
585
|
+
if (!collectionInfo.hybridEnabled) {
|
|
586
|
+
return {
|
|
587
|
+
content: [
|
|
588
|
+
{
|
|
589
|
+
type: "text",
|
|
590
|
+
text: `Error: Collection "${collection}" does not have hybrid search enabled. Create a new collection with enableHybrid set to true.`,
|
|
591
|
+
},
|
|
592
|
+
],
|
|
593
|
+
isError: true,
|
|
594
|
+
};
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
// Generate dense embedding for query
|
|
598
|
+
const { embedding } = await embeddings.embed(query);
|
|
599
|
+
|
|
600
|
+
// Generate sparse vector for query
|
|
601
|
+
const sparseGenerator = new BM25SparseVectorGenerator();
|
|
602
|
+
const sparseVector = sparseGenerator.generate(query);
|
|
603
|
+
|
|
604
|
+
// Perform hybrid search
|
|
605
|
+
const results = await qdrant.hybridSearch(
|
|
606
|
+
collection,
|
|
607
|
+
embedding,
|
|
608
|
+
sparseVector,
|
|
609
|
+
limit || 5,
|
|
610
|
+
filter
|
|
611
|
+
);
|
|
612
|
+
|
|
613
|
+
return {
|
|
614
|
+
content: [
|
|
615
|
+
{
|
|
616
|
+
type: "text",
|
|
617
|
+
text: JSON.stringify(results, null, 2),
|
|
618
|
+
},
|
|
619
|
+
],
|
|
620
|
+
};
|
|
621
|
+
}
|
|
622
|
+
|
|
503
623
|
default:
|
|
504
624
|
return {
|
|
505
625
|
content: [
|
|
@@ -513,8 +633,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
513
633
|
}
|
|
514
634
|
} catch (error: any) {
|
|
515
635
|
// Enhanced error details for debugging
|
|
516
|
-
const errorDetails =
|
|
517
|
-
error instanceof Error ? error.message : JSON.stringify(error, null, 2);
|
|
636
|
+
const errorDetails = error instanceof Error ? error.message : JSON.stringify(error, null, 2);
|
|
518
637
|
|
|
519
638
|
console.error("Tool execution error:", {
|
|
520
639
|
tool: name,
|
|
@@ -600,14 +719,229 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
|
600
719
|
};
|
|
601
720
|
});
|
|
602
721
|
|
|
603
|
-
// Start server
|
|
604
|
-
async function
|
|
722
|
+
// Start server with stdio transport
|
|
723
|
+
async function startStdioServer() {
|
|
605
724
|
await checkOllamaAvailability();
|
|
606
725
|
const transport = new StdioServerTransport();
|
|
607
726
|
await server.connect(transport);
|
|
608
727
|
console.error("Qdrant MCP server running on stdio");
|
|
609
728
|
}
|
|
610
729
|
|
|
730
|
+
// Start server with HTTP transport
|
|
731
|
+
async function startHttpServer() {
|
|
732
|
+
await checkOllamaAvailability();
|
|
733
|
+
|
|
734
|
+
const app = express();
|
|
735
|
+
app.use(express.json({ limit: "10mb" }));
|
|
736
|
+
|
|
737
|
+
// Configure Express to trust proxy for correct IP detection
|
|
738
|
+
app.set("trust proxy", true);
|
|
739
|
+
|
|
740
|
+
// Rate limiter group: max 100 requests per 15 minutes per IP, max 10 concurrent per IP
|
|
741
|
+
const rateLimiterGroup = new Bottleneck.Group({
|
|
742
|
+
reservoir: 100, // initial capacity per IP
|
|
743
|
+
reservoirRefreshAmount: 100, // refresh back to 100
|
|
744
|
+
reservoirRefreshInterval: 15 * 60 * 1000, // every 15 minutes
|
|
745
|
+
maxConcurrent: 10, // max concurrent requests per IP
|
|
746
|
+
});
|
|
747
|
+
|
|
748
|
+
// Periodic cleanup of inactive rate limiters to prevent memory leaks
|
|
749
|
+
// Track last access time for each IP
|
|
750
|
+
const ipLastAccess = new Map<string, number>();
|
|
751
|
+
const INACTIVE_TIMEOUT = 60 * 60 * 1000; // 1 hour
|
|
752
|
+
|
|
753
|
+
const cleanupIntervalId = setInterval(() => {
|
|
754
|
+
const now = Date.now();
|
|
755
|
+
const keysToDelete: string[] = [];
|
|
756
|
+
|
|
757
|
+
ipLastAccess.forEach((lastAccess, ip) => {
|
|
758
|
+
if (now - lastAccess > INACTIVE_TIMEOUT) {
|
|
759
|
+
keysToDelete.push(ip);
|
|
760
|
+
}
|
|
761
|
+
});
|
|
762
|
+
|
|
763
|
+
keysToDelete.forEach((ip) => {
|
|
764
|
+
rateLimiterGroup.deleteKey(ip);
|
|
765
|
+
ipLastAccess.delete(ip);
|
|
766
|
+
});
|
|
767
|
+
|
|
768
|
+
if (keysToDelete.length > 0) {
|
|
769
|
+
console.error(`Cleaned up ${keysToDelete.length} inactive rate limiters`);
|
|
770
|
+
}
|
|
771
|
+
}, INACTIVE_TIMEOUT);
|
|
772
|
+
|
|
773
|
+
// Rate limiting middleware
|
|
774
|
+
const rateLimitMiddleware = async (
|
|
775
|
+
req: express.Request,
|
|
776
|
+
res: express.Response,
|
|
777
|
+
next: express.NextFunction
|
|
778
|
+
) => {
|
|
779
|
+
const clientIp = req.ip || req.socket.remoteAddress || "unknown";
|
|
780
|
+
|
|
781
|
+
try {
|
|
782
|
+
// Update last access time for this IP
|
|
783
|
+
ipLastAccess.set(clientIp, Date.now());
|
|
784
|
+
|
|
785
|
+
// Get or create a limiter for this specific IP
|
|
786
|
+
const limiter = rateLimiterGroup.key(clientIp);
|
|
787
|
+
await limiter.schedule(() => Promise.resolve());
|
|
788
|
+
next();
|
|
789
|
+
} catch (error) {
|
|
790
|
+
// Differentiate between rate limit errors and unexpected errors
|
|
791
|
+
if (error instanceof Bottleneck.BottleneckError) {
|
|
792
|
+
// Rate limit exceeded or Bottleneck operational error
|
|
793
|
+
console.error(`Rate limit exceeded for IP ${clientIp}:`, error.message);
|
|
794
|
+
} else {
|
|
795
|
+
// Unexpected error in rate limiting logic
|
|
796
|
+
console.error("Unexpected rate limiting error:", error);
|
|
797
|
+
}
|
|
798
|
+
res.status(429).json({
|
|
799
|
+
jsonrpc: "2.0",
|
|
800
|
+
error: {
|
|
801
|
+
code: -32000,
|
|
802
|
+
message: "Too many requests",
|
|
803
|
+
},
|
|
804
|
+
id: null,
|
|
805
|
+
});
|
|
806
|
+
}
|
|
807
|
+
};
|
|
808
|
+
|
|
809
|
+
// Health check endpoint
|
|
810
|
+
app.get("/health", (_req, res) => {
|
|
811
|
+
res.json({
|
|
812
|
+
status: "ok",
|
|
813
|
+
mode: TRANSPORT_MODE,
|
|
814
|
+
version: pkg.version,
|
|
815
|
+
embedding_provider: EMBEDDING_PROVIDER,
|
|
816
|
+
});
|
|
817
|
+
});
|
|
818
|
+
|
|
819
|
+
app.post("/mcp", rateLimitMiddleware, async (req, res) => {
|
|
820
|
+
const REQUEST_TIMEOUT = 60000; // 60 seconds
|
|
821
|
+
let timeoutId: NodeJS.Timeout | undefined;
|
|
822
|
+
let isTimedOut = false;
|
|
823
|
+
let transportClosed = false;
|
|
824
|
+
|
|
825
|
+
// Create a new transport for each request in stateless mode.
|
|
826
|
+
// This prevents request ID collisions when different clients use the same JSON-RPC request IDs.
|
|
827
|
+
const transport = new StreamableHTTPServerTransport({
|
|
828
|
+
sessionIdGenerator: undefined, // Stateless mode
|
|
829
|
+
enableJsonResponse: true,
|
|
830
|
+
});
|
|
831
|
+
|
|
832
|
+
// Helper to safely close transport only once
|
|
833
|
+
const closeTransport = async () => {
|
|
834
|
+
if (!transportClosed) {
|
|
835
|
+
transportClosed = true;
|
|
836
|
+
await transport.close().catch((e) => console.error("Error closing transport:", e));
|
|
837
|
+
}
|
|
838
|
+
};
|
|
839
|
+
|
|
840
|
+
try {
|
|
841
|
+
// Set request timeout
|
|
842
|
+
timeoutId = setTimeout(async () => {
|
|
843
|
+
isTimedOut = true;
|
|
844
|
+
// Close transport on timeout to prevent resource leaks
|
|
845
|
+
await closeTransport();
|
|
846
|
+
if (!res.headersSent) {
|
|
847
|
+
res.status(408).json({
|
|
848
|
+
jsonrpc: "2.0",
|
|
849
|
+
error: {
|
|
850
|
+
code: -32000,
|
|
851
|
+
message: "Request timeout",
|
|
852
|
+
},
|
|
853
|
+
id: null,
|
|
854
|
+
});
|
|
855
|
+
}
|
|
856
|
+
}, REQUEST_TIMEOUT);
|
|
857
|
+
|
|
858
|
+
// Clean up transport when response closes
|
|
859
|
+
res.on("close", async () => {
|
|
860
|
+
await closeTransport();
|
|
861
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
862
|
+
});
|
|
863
|
+
|
|
864
|
+
// Connect the transport to the shared server instance.
|
|
865
|
+
// In stateless mode, each request gets a new transport connection.
|
|
866
|
+
await server.connect(transport);
|
|
867
|
+
await transport.handleRequest(req, res, req.body);
|
|
868
|
+
|
|
869
|
+
// Clear timeout immediately on success to prevent race condition
|
|
870
|
+
if (timeoutId) {
|
|
871
|
+
clearTimeout(timeoutId);
|
|
872
|
+
timeoutId = undefined;
|
|
873
|
+
}
|
|
874
|
+
} catch (error) {
|
|
875
|
+
console.error("Error handling MCP request:", error);
|
|
876
|
+
if (!res.headersSent && !isTimedOut) {
|
|
877
|
+
res.status(500).json({
|
|
878
|
+
jsonrpc: "2.0",
|
|
879
|
+
error: {
|
|
880
|
+
code: -32603,
|
|
881
|
+
message: "Internal server error",
|
|
882
|
+
},
|
|
883
|
+
id: null,
|
|
884
|
+
});
|
|
885
|
+
}
|
|
886
|
+
} finally {
|
|
887
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
888
|
+
// Ensure transport is closed even if an error occurs
|
|
889
|
+
await closeTransport();
|
|
890
|
+
}
|
|
891
|
+
});
|
|
892
|
+
|
|
893
|
+
const httpServer = app
|
|
894
|
+
.listen(HTTP_PORT, () => {
|
|
895
|
+
console.error(`Qdrant MCP server running on http://localhost:${HTTP_PORT}/mcp`);
|
|
896
|
+
})
|
|
897
|
+
.on("error", (error) => {
|
|
898
|
+
console.error("HTTP server error:", error);
|
|
899
|
+
process.exit(1);
|
|
900
|
+
});
|
|
901
|
+
|
|
902
|
+
// Graceful shutdown handling
|
|
903
|
+
let isShuttingDown = false;
|
|
904
|
+
|
|
905
|
+
const shutdown = () => {
|
|
906
|
+
if (isShuttingDown) return;
|
|
907
|
+
isShuttingDown = true;
|
|
908
|
+
|
|
909
|
+
console.error("Shutdown signal received, closing HTTP server gracefully...");
|
|
910
|
+
|
|
911
|
+
// Clear the cleanup interval to allow graceful shutdown
|
|
912
|
+
clearInterval(cleanupIntervalId);
|
|
913
|
+
|
|
914
|
+
// Force shutdown after 10 seconds
|
|
915
|
+
const forceTimeout = setTimeout(() => {
|
|
916
|
+
console.error("Forcing shutdown after timeout");
|
|
917
|
+
process.exit(1);
|
|
918
|
+
}, 10000);
|
|
919
|
+
|
|
920
|
+
httpServer.close(() => {
|
|
921
|
+
clearTimeout(forceTimeout);
|
|
922
|
+
console.error("HTTP server closed");
|
|
923
|
+
process.exit(0);
|
|
924
|
+
});
|
|
925
|
+
};
|
|
926
|
+
|
|
927
|
+
process.on("SIGTERM", shutdown);
|
|
928
|
+
process.on("SIGINT", shutdown);
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
// Main entry point
|
|
932
|
+
async function main() {
|
|
933
|
+
if (TRANSPORT_MODE === "http") {
|
|
934
|
+
await startHttpServer();
|
|
935
|
+
} else if (TRANSPORT_MODE === "stdio") {
|
|
936
|
+
await startStdioServer();
|
|
937
|
+
} else {
|
|
938
|
+
console.error(
|
|
939
|
+
`Error: Invalid TRANSPORT_MODE "${TRANSPORT_MODE}". Supported modes: stdio, http.`
|
|
940
|
+
);
|
|
941
|
+
process.exit(1);
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
|
|
611
945
|
main().catch((error) => {
|
|
612
946
|
console.error("Fatal error:", error);
|
|
613
947
|
process.exit(1);
|