@mhalder/qdrant-mcp-server 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,199 @@
1
+ # Hybrid Search
2
+
3
+ Combine semantic vector search with keyword (BM25) search for more accurate and comprehensive results.
4
+
5
+ **Time:** 15-20 minutes | **Difficulty:** Intermediate
6
+
7
+ ## What is Hybrid Search?
8
+
9
+ Hybrid search combines two search approaches:
10
+
11
+ 1. **Semantic Search**: Understands meaning and context using vector embeddings
12
+ 2. **Keyword Search**: Exact term matching using BM25 sparse vectors
13
+
14
+ The results are merged using **Reciprocal Rank Fusion (RRF)**, which combines rankings from both methods to produce the best overall results.
15
+
16
+ ## When to Use Hybrid Search
17
+
18
+ Hybrid search is ideal for:
19
+
20
+ - **Technical documentation**: Users search for exact function names + concepts
21
+ - **Product search**: Match SKUs/model numbers + descriptions
22
+ - **Legal documents**: Exact citations + semantic context
23
+ - **Code search**: Function names + natural language descriptions
24
+ - **Mixed queries**: "authentication JWT" (semantic + exact keyword)
25
+
26
+ ## Benefits
27
+
28
+ - **Best of both worlds**: Precision (keyword) + recall (semantic)
29
+ - **Better results for ambiguous queries**
30
+ - **Handles typos** (semantic) and **exact matches** (keyword)
31
+ - **More control** over result relevance
32
+
33
+ ## Workflow
34
+
35
+ ### 1. Create a Collection with Hybrid Search Enabled
36
+
37
+ ```
38
+ Create a collection named "technical_docs" with Cosine distance and enableHybrid set to true
39
+ ```
40
+
41
+ **Important**: Set `enableHybrid: true` to enable hybrid search capabilities.
42
+
43
+ ### 2. Add Documents
44
+
45
+ Documents are automatically indexed for both semantic and keyword search:
46
+
47
+ ```
48
+ Add these documents to technical_docs:
49
+ - id: 1, text: "The authenticateUser function validates JWT tokens for user sessions",
50
+ metadata: {"category": "authentication", "type": "function"}
51
+ - id: 2, text: "JWT (JSON Web Token) is a compact URL-safe means of representing claims",
52
+ metadata: {"category": "authentication", "type": "definition"}
53
+ - id: 3, text: "OAuth2 provides authorization framework for third-party applications",
54
+ metadata: {"category": "authentication", "type": "protocol"}
55
+ - id: 4, text: "The login endpoint requires username and password credentials",
56
+ metadata: {"category": "authentication", "type": "endpoint"}
57
+ ```
58
+
59
+ ### 3. Perform Hybrid Search
60
+
61
+ Search using both semantic understanding and keyword matching:
62
+
63
+ ```
64
+ Search technical_docs for "JWT authentication function" with limit 3 using hybrid_search
65
+ ```
66
+
67
+ **Result**: Documents are ranked by combining:
68
+
69
+ - Semantic similarity to "authentication function"
70
+ - Exact keyword matches for "JWT"
71
+
72
+ ### 4. Hybrid Search with Filters
73
+
74
+ Combine hybrid search with metadata filtering:
75
+
76
+ ```
77
+ Search technical_docs for "JWT token validation" with limit 2 and filter {"type": "function"} using hybrid_search
78
+ ```
79
+
80
+ ## Comparison: Semantic vs Hybrid Search
81
+
82
+ ### Semantic Search Only
83
+
84
+ ```
85
+ Search technical_docs for "JWT authentication" with limit 3 using semantic_search
86
+ ```
87
+
88
+ **Result**: May miss documents with exact "JWT" match if they're not semantically similar.
89
+
90
+ ### Hybrid Search
91
+
92
+ ```
93
+ Search technical_docs for "JWT authentication" with limit 3 using hybrid_search
94
+ ```
95
+
96
+ **Result**: Finds both:
97
+
98
+ - Documents semantically related to authentication
99
+ - Documents with exact "JWT" keyword match
100
+ - Best combination ranked by RRF
101
+
102
+ ## Example Scenarios
103
+
104
+ ### Scenario 1: Exact Term + Context
105
+
106
+ **Query**: "authenticateUser JWT"
107
+
108
+ **Hybrid Search finds**:
109
+
110
+ 1. Documents with `authenticateUser` function name (keyword match)
111
+ 2. Documents about JWT authentication (semantic match)
112
+ 3. Best combination of both
113
+
114
+ **Pure semantic search might miss**: Exact function name if using different terminology.
115
+
116
+ ### Scenario 2: Acronym + Description
117
+
118
+ **Query**: "API rate limiting"
119
+
120
+ **Hybrid Search finds**:
121
+
122
+ 1. Documents with "API" acronym (keyword match)
123
+ 2. Documents about rate limiting concepts (semantic match)
124
+ 3. Documents mentioning "API rate limiting" get highest score
125
+
126
+ ### Scenario 3: Typos + Exact Terms
127
+
128
+ **Query**: "OAuth2 authentification"
129
+
130
+ **Hybrid Search finds**:
131
+
132
+ 1. "OAuth2" exact matches (keyword - ignores typo in other term)
133
+ 2. Authentication concepts (semantic - understands "authentification" ≈ "authentication")
134
+
135
+ ## Technical Details
136
+
137
+ ### How It Works
138
+
139
+ 1. **Dense Vector Generation**: Your query is embedded using the configured embedding provider (Ollama, OpenAI, etc.)
140
+ 2. **Sparse Vector Generation**: Query is tokenized and BM25 scores are calculated
141
+ 3. **Parallel Search**: Both vectors are searched simultaneously
142
+ 4. **Result Fusion**: RRF combines rankings from both searches
143
+ 5. **Final Ranking**: Merged results with combined relevance scores
144
+
145
+ ### BM25 Sparse Vectors
146
+
147
+ The server uses a lightweight BM25 implementation for sparse vectors:
148
+
149
+ - Tokenization: Lowercase + whitespace splitting
150
+ - IDF scoring: Inverse document frequency
151
+ - Configurable parameters: k1=1.2, b=0.75
152
+
153
+ ### Reciprocal Rank Fusion (RRF)
154
+
155
+ RRF formula: `score = Σ(1 / (k + rank))` where k=60 (default)
156
+
157
+ Benefits:
158
+
159
+ - No score normalization needed
160
+ - Robust to differences in score scales
161
+ - Works well for combining different ranking methods
162
+
163
+ ## Best Practices
164
+
165
+ 1. **Enable hybrid for technical content**: Use when exact terms matter
166
+ 2. **Use semantic for general content**: Natural language queries without technical terms
167
+ 3. **Combine with filters**: Narrow down results by category or type
168
+ 4. **Test both approaches**: Compare semantic vs hybrid for your use case
169
+ 5. **Monitor performance**: Hybrid search requires more computation
170
+
171
+ ## Performance Considerations
172
+
173
+ - **Storage**: Hybrid collections require more space (dense + sparse vectors)
174
+ - **Indexing**: Document indexing is slightly slower
175
+ - **Query time**: Hybrid search performs two searches and fusion
176
+ - **Scalability**: Qdrant optimizes both vector types efficiently
177
+
178
+ ## Troubleshooting
179
+
180
+ ### "Collection does not have hybrid search enabled"
181
+
182
+ **Solution**: Create a new collection with `enableHybrid: true`. Existing collections cannot be converted.
183
+
184
+ ### Poor results with hybrid search
185
+
186
+ **Try**:
187
+
188
+ 1. Adjust query phrasing to include key terms
189
+ 2. Use metadata filters to narrow scope
190
+ 3. Increase `limit` to see more results
191
+ 4. Compare with pure semantic search
192
+
193
+ ### Slow query performance
194
+
195
+ **Solutions**:
196
+
197
+ 1. Reduce prefetch limit (contact support for tuning)
198
+ 2. Add filters to narrow search space
199
+ 3. Use fewer documents or partition data
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mhalder/qdrant-mcp-server",
3
- "version": "1.1.1",
3
+ "version": "1.2.0",
4
4
  "description": "MCP server for semantic search using local Qdrant and Ollama (default) with support for OpenAI, Cohere, and Voyage AI",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,87 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { BM25SparseVectorGenerator } from "./sparse.js";
3
+
4
+ describe("BM25SparseVectorGenerator", () => {
5
+ it("should generate sparse vectors for simple text", () => {
6
+ const generator = new BM25SparseVectorGenerator();
7
+ const result = generator.generate("hello world");
8
+
9
+ expect(result.indices).toBeDefined();
10
+ expect(result.values).toBeDefined();
11
+ expect(result.indices.length).toBeGreaterThan(0);
12
+ expect(result.values.length).toBe(result.indices.length);
13
+ });
14
+
15
+ it("should generate different vectors for different texts", () => {
16
+ const generator = new BM25SparseVectorGenerator();
17
+ const result1 = generator.generate("hello world");
18
+ const result2 = generator.generate("goodbye world");
19
+
20
+ // Different texts should have different sparse representations
21
+ expect(result1.indices).not.toEqual(result2.indices);
22
+ });
23
+
24
+ it("should generate consistent vectors for the same text", () => {
25
+ const generator = new BM25SparseVectorGenerator();
26
+ const result1 = generator.generate("hello world");
27
+ const result2 = generator.generate("hello world");
28
+
29
+ expect(result1.indices).toEqual(result2.indices);
30
+ expect(result1.values).toEqual(result2.values);
31
+ });
32
+
33
+ it("should handle empty strings", () => {
34
+ const generator = new BM25SparseVectorGenerator();
35
+ const result = generator.generate("");
36
+
37
+ expect(result.indices).toHaveLength(0);
38
+ expect(result.values).toHaveLength(0);
39
+ });
40
+
41
+ it("should handle special characters and punctuation", () => {
42
+ const generator = new BM25SparseVectorGenerator();
43
+ const result = generator.generate("hello, world! how are you?");
44
+
45
+ expect(result.indices).toBeDefined();
46
+ expect(result.values).toBeDefined();
47
+ expect(result.indices.length).toBeGreaterThan(0);
48
+ });
49
+
50
+ it("should train on corpus and generate IDF scores", () => {
51
+ const generator = new BM25SparseVectorGenerator();
52
+ const corpus = ["the quick brown fox", "jumps over the lazy dog", "the fox is quick"];
53
+
54
+ generator.train(corpus);
55
+ const result = generator.generate("quick fox");
56
+
57
+ expect(result.indices).toBeDefined();
58
+ expect(result.values).toBeDefined();
59
+ expect(result.indices.length).toBeGreaterThan(0);
60
+ });
61
+
62
+ it("should use static generateSimple method", () => {
63
+ const result = BM25SparseVectorGenerator.generateSimple("hello world");
64
+
65
+ expect(result.indices).toBeDefined();
66
+ expect(result.values).toBeDefined();
67
+ expect(result.indices.length).toBeGreaterThan(0);
68
+ });
69
+
70
+ it("should lowercase and tokenize text properly", () => {
71
+ const generator = new BM25SparseVectorGenerator();
72
+ const result1 = generator.generate("HELLO WORLD");
73
+ const result2 = generator.generate("hello world");
74
+
75
+ // Should produce same results due to lowercasing
76
+ expect(result1.indices).toEqual(result2.indices);
77
+ });
78
+
79
+ it("should generate positive values", () => {
80
+ const generator = new BM25SparseVectorGenerator();
81
+ const result = generator.generate("hello world");
82
+
83
+ result.values.forEach((value) => {
84
+ expect(value).toBeGreaterThan(0);
85
+ });
86
+ });
87
+ });
@@ -0,0 +1,127 @@
1
+ /**
2
+ * BM25 Sparse Vector Generator
3
+ *
4
+ * This module provides a simple BM25-like sparse vector generation for keyword search.
5
+ * For production use, consider using a proper BM25 implementation or Qdrant's built-in
6
+ * sparse vector generation via FastEmbed.
7
+ */
8
+
9
+ import type { SparseVector } from "../qdrant/client.js";
10
+
11
+ interface TokenFrequency {
12
+ [token: string]: number;
13
+ }
14
+
15
+ export class BM25SparseVectorGenerator {
16
+ private vocabulary: Map<string, number>;
17
+ private idfScores: Map<string, number>;
18
+ private documentCount: number;
19
+ private k1: number;
20
+ private b: number;
21
+
22
+ constructor(k1: number = 1.2, b: number = 0.75) {
23
+ this.vocabulary = new Map();
24
+ this.idfScores = new Map();
25
+ this.documentCount = 0;
26
+ this.k1 = k1;
27
+ this.b = b;
28
+ }
29
+
30
+ /**
31
+ * Tokenize text into words (simple whitespace tokenization + lowercase)
32
+ */
33
+ private tokenize(text: string): string[] {
34
+ return text
35
+ .toLowerCase()
36
+ .replace(/[^\w\s]/g, " ")
37
+ .split(/\s+/)
38
+ .filter((token) => token.length > 0);
39
+ }
40
+
41
+ /**
42
+ * Calculate term frequency for a document
43
+ */
44
+ private getTermFrequency(tokens: string[]): TokenFrequency {
45
+ const tf: TokenFrequency = {};
46
+ for (const token of tokens) {
47
+ tf[token] = (tf[token] || 0) + 1;
48
+ }
49
+ return tf;
50
+ }
51
+
52
+ /**
53
+ * Build vocabulary from training documents (optional pre-training step)
54
+ * In a simple implementation, we can skip this and use on-the-fly vocabulary
55
+ */
56
+ train(documents: string[]): void {
57
+ this.documentCount = documents.length;
58
+ const documentFrequency = new Map<string, number>();
59
+
60
+ // Calculate document frequency for each term
61
+ for (const doc of documents) {
62
+ const tokens = this.tokenize(doc);
63
+ const uniqueTokens = new Set(tokens);
64
+
65
+ for (const token of uniqueTokens) {
66
+ if (!this.vocabulary.has(token)) {
67
+ this.vocabulary.set(token, this.vocabulary.size);
68
+ }
69
+ documentFrequency.set(token, (documentFrequency.get(token) || 0) + 1);
70
+ }
71
+ }
72
+
73
+ // Calculate IDF scores
74
+ for (const [token, df] of documentFrequency.entries()) {
75
+ const idf = Math.log((this.documentCount - df + 0.5) / (df + 0.5) + 1.0);
76
+ this.idfScores.set(token, idf);
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Generate sparse vector for a query or document
82
+ * Returns indices and values for non-zero dimensions
83
+ */
84
+ generate(text: string, avgDocLength: number = 50): SparseVector {
85
+ const tokens = this.tokenize(text);
86
+ const tf = this.getTermFrequency(tokens);
87
+ const docLength = tokens.length;
88
+
89
+ const indices: number[] = [];
90
+ const values: number[] = [];
91
+
92
+ // Calculate BM25 score for each term
93
+ for (const [token, freq] of Object.entries(tf)) {
94
+ // Ensure token is in vocabulary
95
+ if (!this.vocabulary.has(token)) {
96
+ // For unseen tokens, add them to vocabulary dynamically
97
+ this.vocabulary.set(token, this.vocabulary.size);
98
+ }
99
+
100
+ const index = this.vocabulary.get(token)!;
101
+
102
+ // Use a default IDF if not trained
103
+ const idf = this.idfScores.get(token) || 1.0;
104
+
105
+ // BM25 formula
106
+ const numerator = freq * (this.k1 + 1);
107
+ const denominator = freq + this.k1 * (1 - this.b + this.b * (docLength / avgDocLength));
108
+ const score = idf * (numerator / denominator);
109
+
110
+ if (score > 0) {
111
+ indices.push(index);
112
+ values.push(score);
113
+ }
114
+ }
115
+
116
+ return { indices, values };
117
+ }
118
+
119
+ /**
120
+ * Simple static method for generating sparse vectors without training
121
+ * Useful for quick implementation
122
+ */
123
+ static generateSimple(text: string): SparseVector {
124
+ const generator = new BM25SparseVectorGenerator();
125
+ return generator.generate(text);
126
+ }
127
+ }