@mhalder/qdrant-mcp-server 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/.env.example +92 -0
  2. package/.github/workflows/ci.yml +61 -0
  3. package/.github/workflows/claude-code-review.yml +57 -0
  4. package/.github/workflows/claude.yml +50 -0
  5. package/.github/workflows/release.yml +52 -0
  6. package/.husky/commit-msg +1 -0
  7. package/.husky/pre-commit +1 -0
  8. package/.releaserc.json +59 -0
  9. package/.yamlfmt +4 -0
  10. package/CHANGELOG.md +73 -0
  11. package/CONTRIBUTING.md +176 -0
  12. package/LICENSE +21 -0
  13. package/README.md +714 -0
  14. package/build/embeddings/base.d.ts +23 -0
  15. package/build/embeddings/base.d.ts.map +1 -0
  16. package/build/embeddings/base.js +2 -0
  17. package/build/embeddings/base.js.map +1 -0
  18. package/build/embeddings/cohere.d.ts +17 -0
  19. package/build/embeddings/cohere.d.ts.map +1 -0
  20. package/build/embeddings/cohere.js +102 -0
  21. package/build/embeddings/cohere.js.map +1 -0
  22. package/build/embeddings/cohere.test.d.ts +2 -0
  23. package/build/embeddings/cohere.test.d.ts.map +1 -0
  24. package/build/embeddings/cohere.test.js +279 -0
  25. package/build/embeddings/cohere.test.js.map +1 -0
  26. package/build/embeddings/factory.d.ts +10 -0
  27. package/build/embeddings/factory.d.ts.map +1 -0
  28. package/build/embeddings/factory.js +98 -0
  29. package/build/embeddings/factory.js.map +1 -0
  30. package/build/embeddings/factory.test.d.ts +2 -0
  31. package/build/embeddings/factory.test.d.ts.map +1 -0
  32. package/build/embeddings/factory.test.js +329 -0
  33. package/build/embeddings/factory.test.js.map +1 -0
  34. package/build/embeddings/ollama.d.ts +18 -0
  35. package/build/embeddings/ollama.d.ts.map +1 -0
  36. package/build/embeddings/ollama.js +135 -0
  37. package/build/embeddings/ollama.js.map +1 -0
  38. package/build/embeddings/ollama.test.d.ts +2 -0
  39. package/build/embeddings/ollama.test.d.ts.map +1 -0
  40. package/build/embeddings/ollama.test.js +399 -0
  41. package/build/embeddings/ollama.test.js.map +1 -0
  42. package/build/embeddings/openai.d.ts +16 -0
  43. package/build/embeddings/openai.d.ts.map +1 -0
  44. package/build/embeddings/openai.js +108 -0
  45. package/build/embeddings/openai.js.map +1 -0
  46. package/build/embeddings/openai.test.d.ts +2 -0
  47. package/build/embeddings/openai.test.d.ts.map +1 -0
  48. package/build/embeddings/openai.test.js +283 -0
  49. package/build/embeddings/openai.test.js.map +1 -0
  50. package/build/embeddings/voyage.d.ts +19 -0
  51. package/build/embeddings/voyage.d.ts.map +1 -0
  52. package/build/embeddings/voyage.js +113 -0
  53. package/build/embeddings/voyage.js.map +1 -0
  54. package/build/embeddings/voyage.test.d.ts +2 -0
  55. package/build/embeddings/voyage.test.d.ts.map +1 -0
  56. package/build/embeddings/voyage.test.js +371 -0
  57. package/build/embeddings/voyage.test.js.map +1 -0
  58. package/build/index.d.ts +3 -0
  59. package/build/index.d.ts.map +1 -0
  60. package/build/index.js +534 -0
  61. package/build/index.js.map +1 -0
  62. package/build/index.test.d.ts +2 -0
  63. package/build/index.test.d.ts.map +1 -0
  64. package/build/index.test.js +241 -0
  65. package/build/index.test.js.map +1 -0
  66. package/build/qdrant/client.d.ts +37 -0
  67. package/build/qdrant/client.d.ts.map +1 -0
  68. package/build/qdrant/client.js +142 -0
  69. package/build/qdrant/client.js.map +1 -0
  70. package/build/qdrant/client.test.d.ts +2 -0
  71. package/build/qdrant/client.test.d.ts.map +1 -0
  72. package/build/qdrant/client.test.js +340 -0
  73. package/build/qdrant/client.test.js.map +1 -0
  74. package/commitlint.config.js +25 -0
  75. package/docker-compose.yml +22 -0
  76. package/docs/test_report.md +259 -0
  77. package/examples/README.md +315 -0
  78. package/examples/basic/README.md +111 -0
  79. package/examples/filters/README.md +262 -0
  80. package/examples/knowledge-base/README.md +207 -0
  81. package/examples/rate-limiting/README.md +376 -0
  82. package/package.json +59 -0
  83. package/scripts/verify-providers.js +238 -0
  84. package/src/embeddings/base.ts +25 -0
  85. package/src/embeddings/cohere.test.ts +408 -0
  86. package/src/embeddings/cohere.ts +152 -0
  87. package/src/embeddings/factory.test.ts +453 -0
  88. package/src/embeddings/factory.ts +163 -0
  89. package/src/embeddings/ollama.test.ts +543 -0
  90. package/src/embeddings/ollama.ts +196 -0
  91. package/src/embeddings/openai.test.ts +402 -0
  92. package/src/embeddings/openai.ts +158 -0
  93. package/src/embeddings/voyage.test.ts +520 -0
  94. package/src/embeddings/voyage.ts +168 -0
  95. package/src/index.test.ts +304 -0
  96. package/src/index.ts +614 -0
  97. package/src/qdrant/client.test.ts +456 -0
  98. package/src/qdrant/client.ts +195 -0
  99. package/tsconfig.json +19 -0
  100. package/vitest.config.ts +37 -0
@@ -0,0 +1,207 @@
1
+ # Knowledge Base Example
2
+
3
+ This example shows how to build a searchable documentation system with rich metadata for organization and filtering.
4
+
5
+ ## Use Case
6
+
7
+ You're building a company knowledge base with:
8
+
9
+ - Documentation from multiple teams
10
+ - Articles with different topics and difficulty levels
11
+ - Content that needs to be searchable and filterable
12
+
13
+ ## What You'll Learn
14
+
15
+ - Organizing documents with metadata
16
+ - Using metadata for categorization
17
+ - Filtering searches by metadata fields
18
+ - Building a scalable knowledge base structure
19
+
20
+ ## Setup
21
+
22
+ ### 1. Create the Collection
23
+
24
+ ```
25
+ Create a collection named "company-kb"
26
+ ```
27
+
28
+ ### 2. Add Structured Documents
29
+
30
+ ```
31
+ Add these documents to company-kb:
32
+ - id: "eng-001", text: "Our API uses REST principles with JSON payloads. Authentication is handled via JWT tokens in the Authorization header.", metadata: {"team": "engineering", "topic": "api", "difficulty": "intermediate", "category": "technical"}
33
+ - id: "eng-002", text: "To deploy to production, merge your PR to main. The CI/CD pipeline automatically runs tests and deploys if all checks pass.", metadata: {"team": "engineering", "topic": "deployment", "difficulty": "beginner", "category": "process"}
34
+ - id: "hr-001", text: "New employees receive benefits information during onboarding. Health insurance enrollment must be completed within 30 days.", metadata: {"team": "hr", "topic": "benefits", "difficulty": "beginner", "category": "policy"}
35
+ - id: "hr-002", text: "Performance reviews occur quarterly. Managers should prepare feedback and schedule 1-on-1 meetings two weeks in advance.", metadata: {"team": "hr", "topic": "performance", "difficulty": "beginner", "category": "process"}
36
+ - id: "sales-001", text: "Our enterprise pricing model includes volume discounts for contracts over $100k annually. Custom SLAs are available.", metadata: {"team": "sales", "topic": "pricing", "difficulty": "advanced", "category": "business"}
37
+ - id: "sales-002", text: "The sales pipeline has four stages: Lead, Qualified, Proposal, and Closed. Update Salesforce after each customer interaction.", metadata: {"team": "sales", "topic": "pipeline", "difficulty": "beginner", "category": "process"}
38
+ ```
39
+
40
+ ## Search Examples
41
+
42
+ ### Basic Search (No Filters)
43
+
44
+ ```
45
+ Search company-kb for "how do I deploy code"
46
+ ```
47
+
48
+ Expected: Returns deployment-related docs (eng-002 likely ranks highest)
49
+
50
+ ### Filter by Team
51
+
52
+ ```
53
+ Search company-kb for "process documentation" with filter {"must": [{"key": "team", "match": {"value": "engineering"}}]}
54
+ ```
55
+
56
+ Returns only engineering team documents.
57
+
58
+ ### Filter by Difficulty
59
+
60
+ ```
61
+ Search company-kb for "getting started" with filter {"must": [{"key": "difficulty", "match": {"value": "beginner"}}]}
62
+ ```
63
+
64
+ Returns beginner-friendly documentation.
65
+
66
+ ### Multiple Filters (AND)
67
+
68
+ ```
69
+ Search company-kb for "company procedures" with filter {"must": [{"key": "category", "match": {"value": "process"}}, {"key": "difficulty", "match": {"value": "beginner"}}]}
70
+ ```
71
+
72
+ Returns beginner process documents only.
73
+
74
+ ### Filter by Topic
75
+
76
+ ```
77
+ Search company-kb for "pricing information" with filter {"must": [{"key": "team", "match": {"value": "sales"}}]}
78
+ ```
79
+
80
+ Restricts search to sales team content.
81
+
82
+ ## Metadata Design Best Practices
83
+
84
+ ### 1. Consistent Schema
85
+
86
+ Use the same metadata fields across all documents:
87
+
88
+ ```json
89
+ {
90
+ "team": "string",
91
+ "topic": "string",
92
+ "difficulty": "beginner|intermediate|advanced",
93
+ "category": "technical|process|policy|business"
94
+ }
95
+ ```
96
+
97
+ ### 2. Hierarchical Organization
98
+
99
+ Consider nesting metadata for complex taxonomies:
100
+
101
+ ```json
102
+ {
103
+ "team": "engineering",
104
+ "subteam": "backend",
105
+ "topic": "api",
106
+ "subtopic": "authentication"
107
+ }
108
+ ```
109
+
110
+ ### 3. Multiple Tags
111
+
112
+ Use arrays for multi-category documents:
113
+
114
+ ```json
115
+ {
116
+ "tags": ["api", "security", "authentication"],
117
+ "relevant_teams": ["engineering", "security"]
118
+ }
119
+ ```
120
+
121
+ ### 4. Timestamps and Versioning
122
+
123
+ Track freshness and versions:
124
+
125
+ ```json
126
+ {
127
+ "created_at": "2024-01-15",
128
+ "updated_at": "2024-03-20",
129
+ "version": "2.1",
130
+ "status": "published"
131
+ }
132
+ ```
133
+
134
+ ## Scaling Your Knowledge Base
135
+
136
+ ### Add More Content Types
137
+
138
+ - Code examples with language tags
139
+ - Video transcripts with duration metadata
140
+ - Meeting notes with attendees and dates
141
+ - Product specs with version numbers
142
+
143
+ ### Implement Access Control
144
+
145
+ Use metadata for permissions:
146
+
147
+ ```json
148
+ {
149
+ "visibility": "public|internal|confidential",
150
+ "authorized_teams": ["engineering", "leadership"]
151
+ }
152
+ ```
153
+
154
+ Then filter searches based on user permissions.
155
+
156
+ ### Track Usage
157
+
158
+ Add metadata for analytics:
159
+
160
+ ```json
161
+ {
162
+ "views": 0,
163
+ "last_accessed": null,
164
+ "author": "user@company.com"
165
+ }
166
+ ```
167
+
168
+ ## Maintenance
169
+
170
+ ### Update Documents
171
+
172
+ To update content, delete and re-add:
173
+
174
+ ```
175
+ Delete documents ["eng-001"] from company-kb
176
+
177
+ Add these documents to company-kb:
178
+ - id: "eng-001", text: "Updated API documentation...", metadata: {...}
179
+ ```
180
+
181
+ ### Archive Old Content
182
+
183
+ Use status metadata to hide outdated docs:
184
+
185
+ ```json
186
+ {
187
+ "status": "archived",
188
+ "archived_date": "2024-12-01"
189
+ }
190
+ ```
191
+
192
+ Then filter searches to exclude archived content:
193
+
194
+ ```
195
+ Search company-kb for "deployment" with filter {"must_not": [{"key": "status", "match": {"value": "archived"}}]}
196
+ ```
197
+
198
+ ## Clean Up
199
+
200
+ ```
201
+ Delete collection "company-kb"
202
+ ```
203
+
204
+ ## Next Steps
205
+
206
+ - [Advanced Filtering Examples](../filters/) - Learn complex filter patterns
207
+ - See the main README for information on batch document operations
@@ -0,0 +1,376 @@
1
+ # Rate Limiting Example
2
+
3
+ Learn how the Qdrant MCP Server handles embedding provider API rate limits automatically with intelligent throttling and retry mechanisms.
4
+
5
+ ## Overview
6
+
7
+ This example demonstrates:
8
+
9
+ - How rate limiting prevents API failures (for cloud providers)
10
+ - Configuring rate limits for your embedding provider
11
+ - Batch operations with automatic throttling
12
+ - Exponential backoff retry behavior
13
+ - Monitoring rate limit events
14
+ - Why Ollama doesn't need rate limiting (local processing)
15
+
16
+ **Time:** 10-15 minutes
17
+ **Difficulty:** Beginner to Intermediate
18
+
19
+ ## Why Rate Limiting Matters
20
+
21
+ **Ollama (Default):** Since Ollama runs locally, there are no API rate limits! You can process as many embeddings as your system can handle.
22
+
23
+ **Cloud Embedding Providers** (OpenAI, Cohere, Voyage AI) enforce rate limits based on your account tier:
24
+
25
+ **OpenAI:**
26
+ | Tier | Requests/Minute |
27
+ | ------- | --------------- |
28
+ | Free | 500 |
29
+ | Tier 1 | 3,500 |
30
+ | Tier 2 | 5,000 |
31
+ | Tier 3+ | 10,000+ |
32
+
33
+ **Other Cloud Providers:**
34
+
35
+ - **Cohere**: ~100 requests/minute (varies by plan)
36
+ - **Voyage AI**: ~300 requests/minute (varies by plan)
37
+
38
+ Without rate limiting, batch operations with cloud providers can exceed these limits and fail. This is one reason why **Ollama is the default** - no rate limits to worry about!
39
+
40
+ ## How It Works
41
+
42
+ The server automatically:
43
+
44
+ 1. **Throttles Requests**: Queues API calls to stay within limits
45
+ 2. **Retries on Failure**: Uses exponential backoff (1s, 2s, 4s, 8s...)
46
+ 3. **Respects Retry-After**: Follows provider retry guidance (when available)
47
+ 4. **Provides Feedback**: Shows retry progress in console
48
+
49
+ ## Configuration
50
+
51
+ ### Ollama Settings (Default - No Rate Limiting Needed)
52
+
53
+ ```bash
54
+ EMBEDDING_PROVIDER=ollama # or omit (ollama is default)
55
+ EMBEDDING_BASE_URL=http://localhost:11434
56
+ EMBEDDING_MODEL=nomic-embed-text
57
+ # No rate limit configuration needed - runs locally!
58
+ ```
59
+
60
+ ### OpenAI Settings
61
+
62
+ **Default (Tier 1 Paid):**
63
+
64
+ ```bash
65
+ EMBEDDING_PROVIDER=openai
66
+ EMBEDDING_MAX_REQUESTS_PER_MINUTE=3500
67
+ EMBEDDING_RETRY_ATTEMPTS=3
68
+ EMBEDDING_RETRY_DELAY=1000
69
+ ```
70
+
71
+ **Free Tier:**
72
+
73
+ ```bash
74
+ EMBEDDING_PROVIDER=openai
75
+ EMBEDDING_MAX_REQUESTS_PER_MINUTE=500
76
+ EMBEDDING_RETRY_ATTEMPTS=5
77
+ EMBEDDING_RETRY_DELAY=2000
78
+ ```
79
+
80
+ ### Cohere Settings
81
+
82
+ ```bash
83
+ EMBEDDING_PROVIDER=cohere
84
+ EMBEDDING_MAX_REQUESTS_PER_MINUTE=100
85
+ EMBEDDING_RETRY_ATTEMPTS=3
86
+ EMBEDDING_RETRY_DELAY=1000
87
+ ```
88
+
89
+ ### Voyage AI Settings
90
+
91
+ ```bash
92
+ EMBEDDING_PROVIDER=voyage
93
+ EMBEDDING_MAX_REQUESTS_PER_MINUTE=300
94
+ EMBEDDING_RETRY_ATTEMPTS=3
95
+ EMBEDDING_RETRY_DELAY=1000
96
+ ```
97
+
98
+ ### Ollama Settings (Local)
99
+
100
+ ```bash
101
+ EMBEDDING_PROVIDER=ollama
102
+ EMBEDDING_MAX_REQUESTS_PER_MINUTE=1000
103
+ EMBEDDING_RETRY_ATTEMPTS=3
104
+ EMBEDDING_RETRY_DELAY=500
105
+ ```
106
+
107
+ ## Example: Batch Document Processing
108
+
109
+ Let's test rate limiting by adding many documents at once.
110
+
111
+ ### Step 1: Create Collection
112
+
113
+ ```
114
+ Create a collection named "rate-limit-test"
115
+ ```
116
+
117
+ ### Step 2: Add Batch of Documents
118
+
119
+ Try adding multiple documents in a single operation:
120
+
121
+ ```
122
+ Add these documents to "rate-limit-test":
123
+ - id: 1, text: "Introduction to machine learning algorithms", metadata: {"topic": "ml"}
124
+ - id: 2, text: "Deep learning neural networks explained", metadata: {"topic": "dl"}
125
+ - id: 3, text: "Natural language processing fundamentals", metadata: {"topic": "nlp"}
126
+ - id: 4, text: "Computer vision and image recognition", metadata: {"topic": "cv"}
127
+ - id: 5, text: "Reinforcement learning strategies", metadata: {"topic": "rl"}
128
+ - id: 6, text: "Data preprocessing and feature engineering", metadata: {"topic": "data"}
129
+ - id: 7, text: "Model evaluation and validation techniques", metadata: {"topic": "eval"}
130
+ - id: 8, text: "Hyperparameter optimization methods", metadata: {"topic": "tuning"}
131
+ - id: 9, text: "Transfer learning and fine-tuning", metadata: {"topic": "transfer"}
132
+ - id: 10, text: "Ensemble methods and boosting", metadata: {"topic": "ensemble"}
133
+ ```
134
+
135
+ **What happens:**
136
+
137
+ - The server generates embeddings for all 10 documents
138
+ - Requests are automatically queued and throttled
139
+ - If rate limits are hit, automatic retry with backoff occurs
140
+ - Console shows retry messages with wait times
141
+
142
+ ### Step 3: Test Search
143
+
144
+ ```
145
+ Search "rate-limit-test" for "neural networks and deep learning"
146
+ ```
147
+
148
+ ### Step 4: Monitor Console Output
149
+
150
+ Watch for rate limiting messages:
151
+
152
+ ```
153
+ Rate limit reached. Retrying in 1.0s (attempt 1/3)...
154
+ Rate limit reached. Retrying in 2.0s (attempt 2/3)...
155
+ ```
156
+
157
+ These messages indicate:
158
+
159
+ - Rate limit was detected (429 error)
160
+ - Automatic retry is in progress
161
+ - Current attempt number and delay
162
+
163
+ ## Simulating Rate Limit Scenarios
164
+
165
+ ### Scenario 1: Free Tier User
166
+
167
+ **Configuration:**
168
+
169
+ ```bash
170
+ OPENAI_MAX_REQUESTS_PER_MINUTE=500
171
+ ```
172
+
173
+ **Test:** Add 50 documents in batches of 10
174
+
175
+ - Server automatically spaces requests
176
+ - No manual rate limit handling needed
177
+ - Operations complete successfully
178
+
179
+ ### Scenario 2: High-Volume Batch
180
+
181
+ **Test:** Add 100+ documents
182
+
183
+ - Create collection: `batch-test-collection`
184
+ - Add documents in chunks
185
+ - Server queues requests automatically
186
+ - Monitor console for throttling behavior
187
+
188
+ ### Scenario 3: Concurrent Operations
189
+
190
+ **Test:** Multiple searches simultaneously
191
+
192
+ - Perform several searches in quick succession
193
+ - Rate limiter queues them appropriately
194
+ - All complete without errors
195
+
196
+ ## Best Practices
197
+
198
+ ### 1. Configure for Your Provider
199
+
200
+ Always set `EMBEDDING_MAX_REQUESTS_PER_MINUTE` to match your provider's limits:
201
+
202
+ **OpenAI:**
203
+
204
+ ```bash
205
+ # Check your tier at: https://platform.openai.com/account/limits
206
+ EMBEDDING_MAX_REQUESTS_PER_MINUTE=<your-limit>
207
+ ```
208
+
209
+ **Other Providers:**
210
+
211
+ - Check your provider's dashboard for rate limits
212
+ - Start conservative and increase if needed
213
+
214
+ ### 2. Adjust Retry Settings for Reliability
215
+
216
+ For critical operations, increase retry attempts:
217
+
218
+ ```bash
219
+ EMBEDDING_RETRY_ATTEMPTS=5 # More resilient
220
+ ```
221
+
222
+ For development/testing, reduce retries:
223
+
224
+ ```bash
225
+ EMBEDDING_RETRY_ATTEMPTS=1 # Fail faster
226
+ ```
227
+
228
+ ### 3. Batch Operations Wisely
229
+
230
+ Most embedding providers support batch operations:
231
+
232
+ - **OpenAI**: Up to 2048 texts per request
233
+ - **Cohere**: Batch support available
234
+ - **Voyage AI**: Batch support available
235
+ - **Ollama**: Sequential processing (one at a time)
236
+
237
+ The server automatically uses batch APIs when available for efficiency.
238
+
239
+ ### 4. Monitor Your Usage
240
+
241
+ Watch console output during operations:
242
+
243
+ - No messages = smooth operation
244
+ - Retry messages = hitting limits (consider reducing rate)
245
+ - Error after max retries = need to reduce request volume
246
+
247
+ ## Understanding Retry Behavior
248
+
249
+ ### Exponential Backoff Example
250
+
251
+ With `OPENAI_RETRY_DELAY=1000`:
252
+
253
+ | Attempt | Delay | Total Wait |
254
+ | ------- | ----- | ---------- |
255
+ | 1st | 1s | 1s |
256
+ | 2nd | 2s | 3s |
257
+ | 3rd | 4s | 7s |
258
+ | 4th | 8s | 15s |
259
+
260
+ ### Retry-After Header
261
+
262
+ If the provider provides a `Retry-After` header (OpenAI, some others):
263
+
264
+ - Server uses that exact delay
265
+ - Ignores exponential backoff
266
+ - Ensures optimal recovery
267
+
268
+ ## Error Messages
269
+
270
+ ### Success Messages
271
+
272
+ ```
273
+ Successfully added 10 document(s) to collection "rate-limit-test".
274
+ ```
275
+
276
+ ### Retry Messages (Normal)
277
+
278
+ ```
279
+ Rate limit reached. Retrying in 2.0s (attempt 1/3)...
280
+ ```
281
+
282
+ **Action:** None needed, automatic retry in progress
283
+
284
+ ### Max Retries Exceeded (Rare)
285
+
286
+ ```
287
+ Error: [Provider] API rate limit exceeded after 3 retry attempts.
288
+ Please try again later or reduce request frequency.
289
+ ```
290
+
291
+ **Action:**
292
+
293
+ - Wait a few minutes
294
+ - Reduce `EMBEDDING_MAX_REQUESTS_PER_MINUTE`
295
+ - Check your provider's dashboard for current usage
296
+
297
+ ## Integration with Claude Code
298
+
299
+ The rate limiting works seamlessly with Claude Code.
300
+
301
+ **Example with Ollama (Default - No Rate Limits):**
302
+
303
+ ```json
304
+ {
305
+ "mcpServers": {
306
+ "qdrant": {
307
+ "command": "node",
308
+ "args": ["/path/to/qdrant-mcp-server/build/index.js"],
309
+ "env": {
310
+ "QDRANT_URL": "http://localhost:6333",
311
+ "EMBEDDING_BASE_URL": "http://localhost:11434"
312
+ }
313
+ }
314
+ }
315
+ }
316
+ ```
317
+
318
+ **Example with OpenAI (Alternative):**
319
+
320
+ ```json
321
+ {
322
+ "mcpServers": {
323
+ "qdrant": {
324
+ "command": "node",
325
+ "args": ["/path/to/qdrant-mcp-server/build/index.js"],
326
+ "env": {
327
+ "EMBEDDING_PROVIDER": "openai",
328
+ "OPENAI_API_KEY": "sk-your-key",
329
+ "QDRANT_URL": "http://localhost:6333",
330
+ "EMBEDDING_MAX_REQUESTS_PER_MINUTE": "3500",
331
+ "EMBEDDING_RETRY_ATTEMPTS": "3",
332
+ "EMBEDDING_RETRY_DELAY": "1000"
333
+ }
334
+ }
335
+ }
336
+ }
337
+ ```
338
+
339
+ ## Cleanup
340
+
341
+ ```
342
+ Delete collection "rate-limit-test"
343
+ ```
344
+
345
+ ## Key Takeaways
346
+
347
+ 1. ✅ **Ollama Default**: No rate limits with local processing
348
+ 2. ✅ **Automatic**: Rate limiting works out-of-the-box for cloud providers
349
+ 3. ✅ **Configurable**: Adjust for your cloud provider tier
350
+ 4. ✅ **Resilient**: Exponential backoff handles temporary issues
351
+ 5. ✅ **Transparent**: Console feedback shows what's happening
352
+ 6. ✅ **Efficient**: Batch operations optimize API usage
353
+
354
+ ## Next Steps
355
+
356
+ - Explore [Knowledge Base example](../knowledge-base/) for real-world usage
357
+ - Learn [Advanced Filtering](../filters/) for complex queries
358
+ - Read [main README](../../README.md) for all configuration options
359
+
360
+ ## Troubleshooting
361
+
362
+ ### Still Getting Rate Limit Errors?
363
+
364
+ 1. **Check your provider's limits**: Visit your provider's dashboard
365
+ 2. **Reduce request rate**: Lower `EMBEDDING_MAX_REQUESTS_PER_MINUTE` by 20%
366
+ 3. **Increase retry attempts**: Set `EMBEDDING_RETRY_ATTEMPTS=5`
367
+ 4. **Wait between batches**: For very large operations, split into multiple sessions
368
+
369
+ ### Slow Performance?
370
+
371
+ If operations seem slow:
372
+
373
+ - This is expected with rate limiting
374
+ - It's better than failed operations
375
+ - Upgrade your provider's tier for higher limits
376
+ - Consider using Ollama for unlimited local processing
package/package.json ADDED
@@ -0,0 +1,59 @@
1
+ {
2
+ "name": "@mhalder/qdrant-mcp-server",
3
+ "version": "1.1.0",
4
+ "description": "MCP server for semantic search using local Qdrant and Ollama (default) with support for OpenAI, Cohere, and Voyage AI",
5
+ "type": "module",
6
+ "bin": {
7
+ "qdrant-mcp-server": "build/index.js"
8
+ },
9
+ "scripts": {
10
+ "build": "tsc",
11
+ "dev": "tsx src/index.ts",
12
+ "test": "vitest",
13
+ "test:ui": "vitest --ui",
14
+ "test:coverage": "vitest --coverage",
15
+ "test:providers": "node scripts/verify-providers.js",
16
+ "type-check": "tsc --noEmit",
17
+ "prepare": "husky"
18
+ },
19
+ "keywords": [
20
+ "mcp",
21
+ "qdrant",
22
+ "vector-search",
23
+ "semantic-search",
24
+ "embeddings"
25
+ ],
26
+ "author": "mhalder",
27
+ "license": "MIT",
28
+ "repository": {
29
+ "type": "git",
30
+ "url": "https://github.com/mhalder/qdrant-mcp-server.git"
31
+ },
32
+ "publishConfig": {
33
+ "access": "public"
34
+ },
35
+ "dependencies": {
36
+ "@modelcontextprotocol/sdk": "^1.0.4",
37
+ "@qdrant/js-client-rest": "^1.12.0",
38
+ "bottleneck": "^2.19.5",
39
+ "cohere-ai": "^7.19.0",
40
+ "openai": "^4.77.3",
41
+ "zod": "^3.24.1"
42
+ },
43
+ "devDependencies": {
44
+ "@commitlint/cli": "^20.1.0",
45
+ "@commitlint/config-conventional": "^20.0.0",
46
+ "@semantic-release/changelog": "^6.0.3",
47
+ "@semantic-release/git": "^10.0.1",
48
+ "@semantic-release/github": "^11.0.6",
49
+ "@semantic-release/npm": "^12.0.2",
50
+ "@types/node": "^22.10.5",
51
+ "@vitest/coverage-v8": "^2.1.8",
52
+ "@vitest/ui": "^2.1.8",
53
+ "husky": "^9.1.7",
54
+ "semantic-release": "^24.2.9",
55
+ "tsx": "^4.19.2",
56
+ "typescript": "^5.7.2",
57
+ "vitest": "^2.1.8"
58
+ }
59
+ }