@mhalder/qdrant-mcp-server 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +92 -0
- package/.github/workflows/ci.yml +61 -0
- package/.github/workflows/claude-code-review.yml +57 -0
- package/.github/workflows/claude.yml +50 -0
- package/.github/workflows/release.yml +52 -0
- package/.husky/commit-msg +1 -0
- package/.husky/pre-commit +1 -0
- package/.releaserc.json +59 -0
- package/.yamlfmt +4 -0
- package/CHANGELOG.md +73 -0
- package/CONTRIBUTING.md +176 -0
- package/LICENSE +21 -0
- package/README.md +714 -0
- package/build/embeddings/base.d.ts +23 -0
- package/build/embeddings/base.d.ts.map +1 -0
- package/build/embeddings/base.js +2 -0
- package/build/embeddings/base.js.map +1 -0
- package/build/embeddings/cohere.d.ts +17 -0
- package/build/embeddings/cohere.d.ts.map +1 -0
- package/build/embeddings/cohere.js +102 -0
- package/build/embeddings/cohere.js.map +1 -0
- package/build/embeddings/cohere.test.d.ts +2 -0
- package/build/embeddings/cohere.test.d.ts.map +1 -0
- package/build/embeddings/cohere.test.js +279 -0
- package/build/embeddings/cohere.test.js.map +1 -0
- package/build/embeddings/factory.d.ts +10 -0
- package/build/embeddings/factory.d.ts.map +1 -0
- package/build/embeddings/factory.js +98 -0
- package/build/embeddings/factory.js.map +1 -0
- package/build/embeddings/factory.test.d.ts +2 -0
- package/build/embeddings/factory.test.d.ts.map +1 -0
- package/build/embeddings/factory.test.js +329 -0
- package/build/embeddings/factory.test.js.map +1 -0
- package/build/embeddings/ollama.d.ts +18 -0
- package/build/embeddings/ollama.d.ts.map +1 -0
- package/build/embeddings/ollama.js +135 -0
- package/build/embeddings/ollama.js.map +1 -0
- package/build/embeddings/ollama.test.d.ts +2 -0
- package/build/embeddings/ollama.test.d.ts.map +1 -0
- package/build/embeddings/ollama.test.js +399 -0
- package/build/embeddings/ollama.test.js.map +1 -0
- package/build/embeddings/openai.d.ts +16 -0
- package/build/embeddings/openai.d.ts.map +1 -0
- package/build/embeddings/openai.js +108 -0
- package/build/embeddings/openai.js.map +1 -0
- package/build/embeddings/openai.test.d.ts +2 -0
- package/build/embeddings/openai.test.d.ts.map +1 -0
- package/build/embeddings/openai.test.js +283 -0
- package/build/embeddings/openai.test.js.map +1 -0
- package/build/embeddings/voyage.d.ts +19 -0
- package/build/embeddings/voyage.d.ts.map +1 -0
- package/build/embeddings/voyage.js +113 -0
- package/build/embeddings/voyage.js.map +1 -0
- package/build/embeddings/voyage.test.d.ts +2 -0
- package/build/embeddings/voyage.test.d.ts.map +1 -0
- package/build/embeddings/voyage.test.js +371 -0
- package/build/embeddings/voyage.test.js.map +1 -0
- package/build/index.d.ts +3 -0
- package/build/index.d.ts.map +1 -0
- package/build/index.js +534 -0
- package/build/index.js.map +1 -0
- package/build/index.test.d.ts +2 -0
- package/build/index.test.d.ts.map +1 -0
- package/build/index.test.js +241 -0
- package/build/index.test.js.map +1 -0
- package/build/qdrant/client.d.ts +37 -0
- package/build/qdrant/client.d.ts.map +1 -0
- package/build/qdrant/client.js +142 -0
- package/build/qdrant/client.js.map +1 -0
- package/build/qdrant/client.test.d.ts +2 -0
- package/build/qdrant/client.test.d.ts.map +1 -0
- package/build/qdrant/client.test.js +340 -0
- package/build/qdrant/client.test.js.map +1 -0
- package/commitlint.config.js +25 -0
- package/docker-compose.yml +22 -0
- package/docs/test_report.md +259 -0
- package/examples/README.md +315 -0
- package/examples/basic/README.md +111 -0
- package/examples/filters/README.md +262 -0
- package/examples/knowledge-base/README.md +207 -0
- package/examples/rate-limiting/README.md +376 -0
- package/package.json +59 -0
- package/scripts/verify-providers.js +238 -0
- package/src/embeddings/base.ts +25 -0
- package/src/embeddings/cohere.test.ts +408 -0
- package/src/embeddings/cohere.ts +152 -0
- package/src/embeddings/factory.test.ts +453 -0
- package/src/embeddings/factory.ts +163 -0
- package/src/embeddings/ollama.test.ts +543 -0
- package/src/embeddings/ollama.ts +196 -0
- package/src/embeddings/openai.test.ts +402 -0
- package/src/embeddings/openai.ts +158 -0
- package/src/embeddings/voyage.test.ts +520 -0
- package/src/embeddings/voyage.ts +168 -0
- package/src/index.test.ts +304 -0
- package/src/index.ts +614 -0
- package/src/qdrant/client.test.ts +456 -0
- package/src/qdrant/client.ts +195 -0
- package/tsconfig.json +19 -0
- package/vitest.config.ts +37 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Knowledge Base Example
|
|
2
|
+
|
|
3
|
+
This example shows how to build a searchable documentation system with rich metadata for organization and filtering.
|
|
4
|
+
|
|
5
|
+
## Use Case
|
|
6
|
+
|
|
7
|
+
You're building a company knowledge base with:
|
|
8
|
+
|
|
9
|
+
- Documentation from multiple teams
|
|
10
|
+
- Articles with different topics and difficulty levels
|
|
11
|
+
- Content that needs to be searchable and filterable
|
|
12
|
+
|
|
13
|
+
## What You'll Learn
|
|
14
|
+
|
|
15
|
+
- Organizing documents with metadata
|
|
16
|
+
- Using metadata for categorization
|
|
17
|
+
- Filtering searches by metadata fields
|
|
18
|
+
- Building a scalable knowledge base structure
|
|
19
|
+
|
|
20
|
+
## Setup
|
|
21
|
+
|
|
22
|
+
### 1. Create the Collection
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
Create a collection named "company-kb"
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### 2. Add Structured Documents
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
Add these documents to company-kb:
|
|
32
|
+
- id: "eng-001", text: "Our API uses REST principles with JSON payloads. Authentication is handled via JWT tokens in the Authorization header.", metadata: {"team": "engineering", "topic": "api", "difficulty": "intermediate", "category": "technical"}
|
|
33
|
+
- id: "eng-002", text: "To deploy to production, merge your PR to main. The CI/CD pipeline automatically runs tests and deploys if all checks pass.", metadata: {"team": "engineering", "topic": "deployment", "difficulty": "beginner", "category": "process"}
|
|
34
|
+
- id: "hr-001", text: "New employees receive benefits information during onboarding. Health insurance enrollment must be completed within 30 days.", metadata: {"team": "hr", "topic": "benefits", "difficulty": "beginner", "category": "policy"}
|
|
35
|
+
- id: "hr-002", text: "Performance reviews occur quarterly. Managers should prepare feedback and schedule 1-on-1 meetings two weeks in advance.", metadata: {"team": "hr", "topic": "performance", "difficulty": "beginner", "category": "process"}
|
|
36
|
+
- id: "sales-001", text: "Our enterprise pricing model includes volume discounts for contracts over $100k annually. Custom SLAs are available.", metadata: {"team": "sales", "topic": "pricing", "difficulty": "advanced", "category": "business"}
|
|
37
|
+
- id: "sales-002", text: "The sales pipeline has four stages: Lead, Qualified, Proposal, and Closed. Update Salesforce after each customer interaction.", metadata: {"team": "sales", "topic": "pipeline", "difficulty": "beginner", "category": "process"}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Search Examples
|
|
41
|
+
|
|
42
|
+
### Basic Search (No Filters)
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
Search company-kb for "how do I deploy code"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Expected: Returns deployment-related docs (eng-002 likely ranks highest)
|
|
49
|
+
|
|
50
|
+
### Filter by Team
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
Search company-kb for "process documentation" with filter {"must": [{"key": "team", "match": {"value": "engineering"}}]}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Returns only engineering team documents.
|
|
57
|
+
|
|
58
|
+
### Filter by Difficulty
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
Search company-kb for "getting started" with filter {"must": [{"key": "difficulty", "match": {"value": "beginner"}}]}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Returns beginner-friendly documentation.
|
|
65
|
+
|
|
66
|
+
### Multiple Filters (AND)
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
Search company-kb for "company procedures" with filter {"must": [{"key": "category", "match": {"value": "process"}}, {"key": "difficulty", "match": {"value": "beginner"}}]}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Returns beginner process documents only.
|
|
73
|
+
|
|
74
|
+
### Filter by Topic
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
Search company-kb for "pricing information" with filter {"must": [{"key": "team", "match": {"value": "sales"}}]}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Restricts search to sales team content.
|
|
81
|
+
|
|
82
|
+
## Metadata Design Best Practices
|
|
83
|
+
|
|
84
|
+
### 1. Consistent Schema
|
|
85
|
+
|
|
86
|
+
Use the same metadata fields across all documents:
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"team": "string",
|
|
91
|
+
"topic": "string",
|
|
92
|
+
"difficulty": "beginner|intermediate|advanced",
|
|
93
|
+
"category": "technical|process|policy|business"
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### 2. Hierarchical Organization
|
|
98
|
+
|
|
99
|
+
Consider nesting metadata for complex taxonomies:
|
|
100
|
+
|
|
101
|
+
```json
|
|
102
|
+
{
|
|
103
|
+
"team": "engineering",
|
|
104
|
+
"subteam": "backend",
|
|
105
|
+
"topic": "api",
|
|
106
|
+
"subtopic": "authentication"
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### 3. Multiple Tags
|
|
111
|
+
|
|
112
|
+
Use arrays for multi-category documents:
|
|
113
|
+
|
|
114
|
+
```json
|
|
115
|
+
{
|
|
116
|
+
"tags": ["api", "security", "authentication"],
|
|
117
|
+
"relevant_teams": ["engineering", "security"]
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### 4. Timestamps and Versioning
|
|
122
|
+
|
|
123
|
+
Track freshness and versions:
|
|
124
|
+
|
|
125
|
+
```json
|
|
126
|
+
{
|
|
127
|
+
"created_at": "2024-01-15",
|
|
128
|
+
"updated_at": "2024-03-20",
|
|
129
|
+
"version": "2.1",
|
|
130
|
+
"status": "published"
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Scaling Your Knowledge Base
|
|
135
|
+
|
|
136
|
+
### Add More Content Types
|
|
137
|
+
|
|
138
|
+
- Code examples with language tags
|
|
139
|
+
- Video transcripts with duration metadata
|
|
140
|
+
- Meeting notes with attendees and dates
|
|
141
|
+
- Product specs with version numbers
|
|
142
|
+
|
|
143
|
+
### Implement Access Control
|
|
144
|
+
|
|
145
|
+
Use metadata for permissions:
|
|
146
|
+
|
|
147
|
+
```json
|
|
148
|
+
{
|
|
149
|
+
"visibility": "public|internal|confidential",
|
|
150
|
+
"authorized_teams": ["engineering", "leadership"]
|
|
151
|
+
}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Then filter searches based on user permissions.
|
|
155
|
+
|
|
156
|
+
### Track Usage
|
|
157
|
+
|
|
158
|
+
Add metadata for analytics:
|
|
159
|
+
|
|
160
|
+
```json
|
|
161
|
+
{
|
|
162
|
+
"views": 0,
|
|
163
|
+
"last_accessed": null,
|
|
164
|
+
"author": "user@company.com"
|
|
165
|
+
}
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Maintenance
|
|
169
|
+
|
|
170
|
+
### Update Documents
|
|
171
|
+
|
|
172
|
+
To update content, delete and re-add:
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
Delete documents ["eng-001"] from company-kb
|
|
176
|
+
|
|
177
|
+
Add these documents to company-kb:
|
|
178
|
+
- id: "eng-001", text: "Updated API documentation...", metadata: {...}
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Archive Old Content
|
|
182
|
+
|
|
183
|
+
Use status metadata to hide outdated docs:
|
|
184
|
+
|
|
185
|
+
```json
|
|
186
|
+
{
|
|
187
|
+
"status": "archived",
|
|
188
|
+
"archived_date": "2024-12-01"
|
|
189
|
+
}
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Then filter searches to exclude archived content:
|
|
193
|
+
|
|
194
|
+
```
|
|
195
|
+
Search company-kb for "deployment" with filter {"must_not": [{"key": "status", "match": {"value": "archived"}}]}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Clean Up
|
|
199
|
+
|
|
200
|
+
```
|
|
201
|
+
Delete collection "company-kb"
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
## Next Steps
|
|
205
|
+
|
|
206
|
+
- [Advanced Filtering Examples](../filters/) - Learn complex filter patterns
|
|
207
|
+
- See the main README for information on batch document operations
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
# Rate Limiting Example
|
|
2
|
+
|
|
3
|
+
Learn how the Qdrant MCP Server handles embedding provider API rate limits automatically with intelligent throttling and retry mechanisms.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This example demonstrates:
|
|
8
|
+
|
|
9
|
+
- How rate limiting prevents API failures (for cloud providers)
|
|
10
|
+
- Configuring rate limits for your embedding provider
|
|
11
|
+
- Batch operations with automatic throttling
|
|
12
|
+
- Exponential backoff retry behavior
|
|
13
|
+
- Monitoring rate limit events
|
|
14
|
+
- Why Ollama doesn't need rate limiting (local processing)
|
|
15
|
+
|
|
16
|
+
**Time:** 10-15 minutes
|
|
17
|
+
**Difficulty:** Beginner to Intermediate
|
|
18
|
+
|
|
19
|
+
## Why Rate Limiting Matters
|
|
20
|
+
|
|
21
|
+
**Ollama (Default):** Since Ollama runs locally, there are no API rate limits! You can process as many embeddings as your system can handle.
|
|
22
|
+
|
|
23
|
+
**Cloud Embedding Providers** (OpenAI, Cohere, Voyage AI) enforce rate limits based on your account tier:
|
|
24
|
+
|
|
25
|
+
**OpenAI:**
|
|
26
|
+
| Tier | Requests/Minute |
|
|
27
|
+
| ------- | --------------- |
|
|
28
|
+
| Free | 500 |
|
|
29
|
+
| Tier 1 | 3,500 |
|
|
30
|
+
| Tier 2 | 5,000 |
|
|
31
|
+
| Tier 3+ | 10,000+ |
|
|
32
|
+
|
|
33
|
+
**Other Cloud Providers:**
|
|
34
|
+
|
|
35
|
+
- **Cohere**: ~100 requests/minute (varies by plan)
|
|
36
|
+
- **Voyage AI**: ~300 requests/minute (varies by plan)
|
|
37
|
+
|
|
38
|
+
Without rate limiting, batch operations with cloud providers can exceed these limits and fail. This is one reason why **Ollama is the default** - no rate limits to worry about!
|
|
39
|
+
|
|
40
|
+
## How It Works
|
|
41
|
+
|
|
42
|
+
The server automatically:
|
|
43
|
+
|
|
44
|
+
1. **Throttles Requests**: Queues API calls to stay within limits
|
|
45
|
+
2. **Retries on Failure**: Uses exponential backoff (1s, 2s, 4s, 8s...)
|
|
46
|
+
3. **Respects Retry-After**: Follows provider retry guidance (when available)
|
|
47
|
+
4. **Provides Feedback**: Shows retry progress in console
|
|
48
|
+
|
|
49
|
+
## Configuration
|
|
50
|
+
|
|
51
|
+
### Ollama Settings (Default - No Rate Limiting Needed)
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
EMBEDDING_PROVIDER=ollama # or omit (ollama is default)
|
|
55
|
+
EMBEDDING_BASE_URL=http://localhost:11434
|
|
56
|
+
EMBEDDING_MODEL=nomic-embed-text
|
|
57
|
+
# No rate limit configuration needed - runs locally!
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### OpenAI Settings
|
|
61
|
+
|
|
62
|
+
**Default (Tier 1 Paid):**
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
EMBEDDING_PROVIDER=openai
|
|
66
|
+
EMBEDDING_MAX_REQUESTS_PER_MINUTE=3500
|
|
67
|
+
EMBEDDING_RETRY_ATTEMPTS=3
|
|
68
|
+
EMBEDDING_RETRY_DELAY=1000
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**Free Tier:**
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
EMBEDDING_PROVIDER=openai
|
|
75
|
+
EMBEDDING_MAX_REQUESTS_PER_MINUTE=500
|
|
76
|
+
EMBEDDING_RETRY_ATTEMPTS=5
|
|
77
|
+
EMBEDDING_RETRY_DELAY=2000
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Cohere Settings
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
EMBEDDING_PROVIDER=cohere
|
|
84
|
+
EMBEDDING_MAX_REQUESTS_PER_MINUTE=100
|
|
85
|
+
EMBEDDING_RETRY_ATTEMPTS=3
|
|
86
|
+
EMBEDDING_RETRY_DELAY=1000
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Voyage AI Settings
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
EMBEDDING_PROVIDER=voyage
|
|
93
|
+
EMBEDDING_MAX_REQUESTS_PER_MINUTE=300
|
|
94
|
+
EMBEDDING_RETRY_ATTEMPTS=3
|
|
95
|
+
EMBEDDING_RETRY_DELAY=1000
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Ollama Settings (Local)
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
EMBEDDING_PROVIDER=ollama
|
|
102
|
+
EMBEDDING_MAX_REQUESTS_PER_MINUTE=1000
|
|
103
|
+
EMBEDDING_RETRY_ATTEMPTS=3
|
|
104
|
+
EMBEDDING_RETRY_DELAY=500
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Example: Batch Document Processing
|
|
108
|
+
|
|
109
|
+
Let's test rate limiting by adding many documents at once.
|
|
110
|
+
|
|
111
|
+
### Step 1: Create Collection
|
|
112
|
+
|
|
113
|
+
```
|
|
114
|
+
Create a collection named "rate-limit-test"
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Step 2: Add Batch of Documents
|
|
118
|
+
|
|
119
|
+
Try adding multiple documents in a single operation:
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
Add these documents to "rate-limit-test":
|
|
123
|
+
- id: 1, text: "Introduction to machine learning algorithms", metadata: {"topic": "ml"}
|
|
124
|
+
- id: 2, text: "Deep learning neural networks explained", metadata: {"topic": "dl"}
|
|
125
|
+
- id: 3, text: "Natural language processing fundamentals", metadata: {"topic": "nlp"}
|
|
126
|
+
- id: 4, text: "Computer vision and image recognition", metadata: {"topic": "cv"}
|
|
127
|
+
- id: 5, text: "Reinforcement learning strategies", metadata: {"topic": "rl"}
|
|
128
|
+
- id: 6, text: "Data preprocessing and feature engineering", metadata: {"topic": "data"}
|
|
129
|
+
- id: 7, text: "Model evaluation and validation techniques", metadata: {"topic": "eval"}
|
|
130
|
+
- id: 8, text: "Hyperparameter optimization methods", metadata: {"topic": "tuning"}
|
|
131
|
+
- id: 9, text: "Transfer learning and fine-tuning", metadata: {"topic": "transfer"}
|
|
132
|
+
- id: 10, text: "Ensemble methods and boosting", metadata: {"topic": "ensemble"}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**What happens:**
|
|
136
|
+
|
|
137
|
+
- The server generates embeddings for all 10 documents
|
|
138
|
+
- Requests are automatically queued and throttled
|
|
139
|
+
- If rate limits are hit, automatic retry with backoff occurs
|
|
140
|
+
- Console shows retry messages with wait times
|
|
141
|
+
|
|
142
|
+
### Step 3: Test Search
|
|
143
|
+
|
|
144
|
+
```
|
|
145
|
+
Search "rate-limit-test" for "neural networks and deep learning"
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Step 4: Monitor Console Output
|
|
149
|
+
|
|
150
|
+
Watch for rate limiting messages:
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
Rate limit reached. Retrying in 1.0s (attempt 1/3)...
|
|
154
|
+
Rate limit reached. Retrying in 2.0s (attempt 2/3)...
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
These messages indicate:
|
|
158
|
+
|
|
159
|
+
- Rate limit was detected (429 error)
|
|
160
|
+
- Automatic retry is in progress
|
|
161
|
+
- Current attempt number and delay
|
|
162
|
+
|
|
163
|
+
## Simulating Rate Limit Scenarios
|
|
164
|
+
|
|
165
|
+
### Scenario 1: Free Tier User
|
|
166
|
+
|
|
167
|
+
**Configuration:**
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
OPENAI_MAX_REQUESTS_PER_MINUTE=500
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**Test:** Add 50 documents in batches of 10
|
|
174
|
+
|
|
175
|
+
- Server automatically spaces requests
|
|
176
|
+
- No manual rate limit handling needed
|
|
177
|
+
- Operations complete successfully
|
|
178
|
+
|
|
179
|
+
### Scenario 2: High-Volume Batch
|
|
180
|
+
|
|
181
|
+
**Test:** Add 100+ documents
|
|
182
|
+
|
|
183
|
+
- Create collection: `batch-test-collection`
|
|
184
|
+
- Add documents in chunks
|
|
185
|
+
- Server queues requests automatically
|
|
186
|
+
- Monitor console for throttling behavior
|
|
187
|
+
|
|
188
|
+
### Scenario 3: Concurrent Operations
|
|
189
|
+
|
|
190
|
+
**Test:** Multiple searches simultaneously
|
|
191
|
+
|
|
192
|
+
- Perform several searches in quick succession
|
|
193
|
+
- Rate limiter queues them appropriately
|
|
194
|
+
- All complete without errors
|
|
195
|
+
|
|
196
|
+
## Best Practices
|
|
197
|
+
|
|
198
|
+
### 1. Configure for Your Provider
|
|
199
|
+
|
|
200
|
+
Always set `EMBEDDING_MAX_REQUESTS_PER_MINUTE` to match your provider's limits:
|
|
201
|
+
|
|
202
|
+
**OpenAI:**
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
# Check your tier at: https://platform.openai.com/account/limits
|
|
206
|
+
EMBEDDING_MAX_REQUESTS_PER_MINUTE=<your-limit>
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
**Other Providers:**
|
|
210
|
+
|
|
211
|
+
- Check your provider's dashboard for rate limits
|
|
212
|
+
- Start conservative and increase if needed
|
|
213
|
+
|
|
214
|
+
### 2. Adjust Retry Settings for Reliability
|
|
215
|
+
|
|
216
|
+
For critical operations, increase retry attempts:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
EMBEDDING_RETRY_ATTEMPTS=5 # More resilient
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
For development/testing, reduce retries:
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
EMBEDDING_RETRY_ATTEMPTS=1 # Fail faster
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### 3. Batch Operations Wisely
|
|
229
|
+
|
|
230
|
+
Most embedding providers support batch operations:
|
|
231
|
+
|
|
232
|
+
- **OpenAI**: Up to 2048 texts per request
|
|
233
|
+
- **Cohere**: Batch support available
|
|
234
|
+
- **Voyage AI**: Batch support available
|
|
235
|
+
- **Ollama**: Sequential processing (one at a time)
|
|
236
|
+
|
|
237
|
+
The server automatically uses batch APIs when available for efficiency.
|
|
238
|
+
|
|
239
|
+
### 4. Monitor Your Usage
|
|
240
|
+
|
|
241
|
+
Watch console output during operations:
|
|
242
|
+
|
|
243
|
+
- No messages = smooth operation
|
|
244
|
+
- Retry messages = hitting limits (consider reducing rate)
|
|
245
|
+
- Error after max retries = need to reduce request volume
|
|
246
|
+
|
|
247
|
+
## Understanding Retry Behavior
|
|
248
|
+
|
|
249
|
+
### Exponential Backoff Example
|
|
250
|
+
|
|
251
|
+
With `OPENAI_RETRY_DELAY=1000`:
|
|
252
|
+
|
|
253
|
+
| Attempt | Delay | Total Wait |
|
|
254
|
+
| ------- | ----- | ---------- |
|
|
255
|
+
| 1st | 1s | 1s |
|
|
256
|
+
| 2nd | 2s | 3s |
|
|
257
|
+
| 3rd | 4s | 7s |
|
|
258
|
+
| 4th | 8s | 15s |
|
|
259
|
+
|
|
260
|
+
### Retry-After Header
|
|
261
|
+
|
|
262
|
+
If the provider provides a `Retry-After` header (OpenAI, some others):
|
|
263
|
+
|
|
264
|
+
- Server uses that exact delay
|
|
265
|
+
- Ignores exponential backoff
|
|
266
|
+
- Ensures optimal recovery
|
|
267
|
+
|
|
268
|
+
## Error Messages
|
|
269
|
+
|
|
270
|
+
### Success Messages
|
|
271
|
+
|
|
272
|
+
```
|
|
273
|
+
Successfully added 10 document(s) to collection "rate-limit-test".
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Retry Messages (Normal)
|
|
277
|
+
|
|
278
|
+
```
|
|
279
|
+
Rate limit reached. Retrying in 2.0s (attempt 1/3)...
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
**Action:** None needed, automatic retry in progress
|
|
283
|
+
|
|
284
|
+
### Max Retries Exceeded (Rare)
|
|
285
|
+
|
|
286
|
+
```
|
|
287
|
+
Error: [Provider] API rate limit exceeded after 3 retry attempts.
|
|
288
|
+
Please try again later or reduce request frequency.
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
**Action:**
|
|
292
|
+
|
|
293
|
+
- Wait a few minutes
|
|
294
|
+
- Reduce `EMBEDDING_MAX_REQUESTS_PER_MINUTE`
|
|
295
|
+
- Check your provider's dashboard for current usage
|
|
296
|
+
|
|
297
|
+
## Integration with Claude Code
|
|
298
|
+
|
|
299
|
+
The rate limiting works seamlessly with Claude Code.
|
|
300
|
+
|
|
301
|
+
**Example with Ollama (Default - No Rate Limits):**
|
|
302
|
+
|
|
303
|
+
```json
|
|
304
|
+
{
|
|
305
|
+
"mcpServers": {
|
|
306
|
+
"qdrant": {
|
|
307
|
+
"command": "node",
|
|
308
|
+
"args": ["/path/to/qdrant-mcp-server/build/index.js"],
|
|
309
|
+
"env": {
|
|
310
|
+
"QDRANT_URL": "http://localhost:6333",
|
|
311
|
+
"EMBEDDING_BASE_URL": "http://localhost:11434"
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
**Example with OpenAI (Alternative):**
|
|
319
|
+
|
|
320
|
+
```json
|
|
321
|
+
{
|
|
322
|
+
"mcpServers": {
|
|
323
|
+
"qdrant": {
|
|
324
|
+
"command": "node",
|
|
325
|
+
"args": ["/path/to/qdrant-mcp-server/build/index.js"],
|
|
326
|
+
"env": {
|
|
327
|
+
"EMBEDDING_PROVIDER": "openai",
|
|
328
|
+
"OPENAI_API_KEY": "sk-your-key",
|
|
329
|
+
"QDRANT_URL": "http://localhost:6333",
|
|
330
|
+
"EMBEDDING_MAX_REQUESTS_PER_MINUTE": "3500",
|
|
331
|
+
"EMBEDDING_RETRY_ATTEMPTS": "3",
|
|
332
|
+
"EMBEDDING_RETRY_DELAY": "1000"
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
## Cleanup
|
|
340
|
+
|
|
341
|
+
```
|
|
342
|
+
Delete collection "rate-limit-test"
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
## Key Takeaways
|
|
346
|
+
|
|
347
|
+
1. ✅ **Ollama Default**: No rate limits with local processing
|
|
348
|
+
2. ✅ **Automatic**: Rate limiting works out-of-the-box for cloud providers
|
|
349
|
+
3. ✅ **Configurable**: Adjust for your cloud provider tier
|
|
350
|
+
4. ✅ **Resilient**: Exponential backoff handles temporary issues
|
|
351
|
+
5. ✅ **Transparent**: Console feedback shows what's happening
|
|
352
|
+
6. ✅ **Efficient**: Batch operations optimize API usage
|
|
353
|
+
|
|
354
|
+
## Next Steps
|
|
355
|
+
|
|
356
|
+
- Explore [Knowledge Base example](../knowledge-base/) for real-world usage
|
|
357
|
+
- Learn [Advanced Filtering](../filters/) for complex queries
|
|
358
|
+
- Read [main README](../../README.md) for all configuration options
|
|
359
|
+
|
|
360
|
+
## Troubleshooting
|
|
361
|
+
|
|
362
|
+
### Still Getting Rate Limit Errors?
|
|
363
|
+
|
|
364
|
+
1. **Check your provider's limits**: Visit your provider's dashboard
|
|
365
|
+
2. **Reduce request rate**: Lower `EMBEDDING_MAX_REQUESTS_PER_MINUTE` by 20%
|
|
366
|
+
3. **Increase retry attempts**: Set `EMBEDDING_RETRY_ATTEMPTS=5`
|
|
367
|
+
4. **Wait between batches**: For very large operations, split into multiple sessions
|
|
368
|
+
|
|
369
|
+
### Slow Performance?
|
|
370
|
+
|
|
371
|
+
If operations seem slow:
|
|
372
|
+
|
|
373
|
+
- This is expected with rate limiting
|
|
374
|
+
- It's better than failed operations
|
|
375
|
+
- Upgrade your provider's tier for higher limits
|
|
376
|
+
- Consider using Ollama for unlimited local processing
|
package/package.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mhalder/qdrant-mcp-server",
|
|
3
|
+
"version": "1.1.0",
|
|
4
|
+
"description": "MCP server for semantic search using local Qdrant and Ollama (default) with support for OpenAI, Cohere, and Voyage AI",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"qdrant-mcp-server": "build/index.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"build": "tsc",
|
|
11
|
+
"dev": "tsx src/index.ts",
|
|
12
|
+
"test": "vitest",
|
|
13
|
+
"test:ui": "vitest --ui",
|
|
14
|
+
"test:coverage": "vitest --coverage",
|
|
15
|
+
"test:providers": "node scripts/verify-providers.js",
|
|
16
|
+
"type-check": "tsc --noEmit",
|
|
17
|
+
"prepare": "husky"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"mcp",
|
|
21
|
+
"qdrant",
|
|
22
|
+
"vector-search",
|
|
23
|
+
"semantic-search",
|
|
24
|
+
"embeddings"
|
|
25
|
+
],
|
|
26
|
+
"author": "mhalder",
|
|
27
|
+
"license": "MIT",
|
|
28
|
+
"repository": {
|
|
29
|
+
"type": "git",
|
|
30
|
+
"url": "https://github.com/mhalder/qdrant-mcp-server.git"
|
|
31
|
+
},
|
|
32
|
+
"publishConfig": {
|
|
33
|
+
"access": "public"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@modelcontextprotocol/sdk": "^1.0.4",
|
|
37
|
+
"@qdrant/js-client-rest": "^1.12.0",
|
|
38
|
+
"bottleneck": "^2.19.5",
|
|
39
|
+
"cohere-ai": "^7.19.0",
|
|
40
|
+
"openai": "^4.77.3",
|
|
41
|
+
"zod": "^3.24.1"
|
|
42
|
+
},
|
|
43
|
+
"devDependencies": {
|
|
44
|
+
"@commitlint/cli": "^20.1.0",
|
|
45
|
+
"@commitlint/config-conventional": "^20.0.0",
|
|
46
|
+
"@semantic-release/changelog": "^6.0.3",
|
|
47
|
+
"@semantic-release/git": "^10.0.1",
|
|
48
|
+
"@semantic-release/github": "^11.0.6",
|
|
49
|
+
"@semantic-release/npm": "^12.0.2",
|
|
50
|
+
"@types/node": "^22.10.5",
|
|
51
|
+
"@vitest/coverage-v8": "^2.1.8",
|
|
52
|
+
"@vitest/ui": "^2.1.8",
|
|
53
|
+
"husky": "^9.1.7",
|
|
54
|
+
"semantic-release": "^24.2.9",
|
|
55
|
+
"tsx": "^4.19.2",
|
|
56
|
+
"typescript": "^5.7.2",
|
|
57
|
+
"vitest": "^2.1.8"
|
|
58
|
+
}
|
|
59
|
+
}
|