@mhalder/qdrant-mcp-server 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/.codecov.yml +16 -0
  2. package/CHANGELOG.md +25 -0
  3. package/README.md +304 -9
  4. package/build/code/chunker/base.d.ts +19 -0
  5. package/build/code/chunker/base.d.ts.map +1 -0
  6. package/build/code/chunker/base.js +5 -0
  7. package/build/code/chunker/base.js.map +1 -0
  8. package/build/code/chunker/character-chunker.d.ts +22 -0
  9. package/build/code/chunker/character-chunker.d.ts.map +1 -0
  10. package/build/code/chunker/character-chunker.js +111 -0
  11. package/build/code/chunker/character-chunker.js.map +1 -0
  12. package/build/code/chunker/tree-sitter-chunker.d.ts +29 -0
  13. package/build/code/chunker/tree-sitter-chunker.d.ts.map +1 -0
  14. package/build/code/chunker/tree-sitter-chunker.js +213 -0
  15. package/build/code/chunker/tree-sitter-chunker.js.map +1 -0
  16. package/build/code/config.d.ts +11 -0
  17. package/build/code/config.d.ts.map +1 -0
  18. package/build/code/config.js +145 -0
  19. package/build/code/config.js.map +1 -0
  20. package/build/code/indexer.d.ts +42 -0
  21. package/build/code/indexer.d.ts.map +1 -0
  22. package/build/code/indexer.js +508 -0
  23. package/build/code/indexer.js.map +1 -0
  24. package/build/code/metadata.d.ts +32 -0
  25. package/build/code/metadata.d.ts.map +1 -0
  26. package/build/code/metadata.js +128 -0
  27. package/build/code/metadata.js.map +1 -0
  28. package/build/code/scanner.d.ts +35 -0
  29. package/build/code/scanner.d.ts.map +1 -0
  30. package/build/code/scanner.js +108 -0
  31. package/build/code/scanner.js.map +1 -0
  32. package/build/code/sync/merkle.d.ts +45 -0
  33. package/build/code/sync/merkle.d.ts.map +1 -0
  34. package/build/code/sync/merkle.js +116 -0
  35. package/build/code/sync/merkle.js.map +1 -0
  36. package/build/code/sync/snapshot.d.ts +41 -0
  37. package/build/code/sync/snapshot.d.ts.map +1 -0
  38. package/build/code/sync/snapshot.js +91 -0
  39. package/build/code/sync/snapshot.js.map +1 -0
  40. package/build/code/sync/synchronizer.d.ts +53 -0
  41. package/build/code/sync/synchronizer.d.ts.map +1 -0
  42. package/build/code/sync/synchronizer.js +132 -0
  43. package/build/code/sync/synchronizer.js.map +1 -0
  44. package/build/code/types.d.ts +98 -0
  45. package/build/code/types.d.ts.map +1 -0
  46. package/build/code/types.js +5 -0
  47. package/build/code/types.js.map +1 -0
  48. package/build/index.js +321 -6
  49. package/build/index.js.map +1 -1
  50. package/build/prompts/index.d.ts +7 -0
  51. package/build/prompts/index.d.ts.map +1 -0
  52. package/build/prompts/index.js +7 -0
  53. package/build/prompts/index.js.map +1 -0
  54. package/build/prompts/index.test.d.ts +2 -0
  55. package/build/prompts/index.test.d.ts.map +1 -0
  56. package/build/prompts/index.test.js +25 -0
  57. package/build/prompts/index.test.js.map +1 -0
  58. package/build/prompts/loader.d.ts +25 -0
  59. package/build/prompts/loader.d.ts.map +1 -0
  60. package/build/prompts/loader.js +81 -0
  61. package/build/prompts/loader.js.map +1 -0
  62. package/build/prompts/loader.test.d.ts +2 -0
  63. package/build/prompts/loader.test.d.ts.map +1 -0
  64. package/build/prompts/loader.test.js +417 -0
  65. package/build/prompts/loader.test.js.map +1 -0
  66. package/build/prompts/template.d.ts +20 -0
  67. package/build/prompts/template.d.ts.map +1 -0
  68. package/build/prompts/template.js +52 -0
  69. package/build/prompts/template.js.map +1 -0
  70. package/build/prompts/template.test.d.ts +2 -0
  71. package/build/prompts/template.test.d.ts.map +1 -0
  72. package/build/prompts/template.test.js +163 -0
  73. package/build/prompts/template.test.js.map +1 -0
  74. package/build/prompts/types.d.ts +34 -0
  75. package/build/prompts/types.d.ts.map +1 -0
  76. package/build/prompts/types.js +5 -0
  77. package/build/prompts/types.js.map +1 -0
  78. package/examples/code-search/README.md +271 -0
  79. package/package.json +13 -1
  80. package/prompts.example.json +96 -0
  81. package/src/code/chunker/base.ts +22 -0
  82. package/src/code/chunker/character-chunker.ts +131 -0
  83. package/src/code/chunker/tree-sitter-chunker.ts +250 -0
  84. package/src/code/config.ts +156 -0
  85. package/src/code/indexer.ts +613 -0
  86. package/src/code/metadata.ts +153 -0
  87. package/src/code/scanner.ts +124 -0
  88. package/src/code/sync/merkle.ts +136 -0
  89. package/src/code/sync/snapshot.ts +110 -0
  90. package/src/code/sync/synchronizer.ts +154 -0
  91. package/src/code/types.ts +117 -0
  92. package/src/index.ts +382 -5
  93. package/src/prompts/index.test.ts +29 -0
  94. package/src/prompts/index.ts +7 -0
  95. package/src/prompts/loader.test.ts +494 -0
  96. package/src/prompts/loader.ts +90 -0
  97. package/src/prompts/template.test.ts +212 -0
  98. package/src/prompts/template.ts +69 -0
  99. package/src/prompts/types.ts +37 -0
  100. package/tests/code/chunker/character-chunker.test.ts +141 -0
  101. package/tests/code/chunker/tree-sitter-chunker.test.ts +275 -0
  102. package/tests/code/fixtures/sample-py/calculator.py +32 -0
  103. package/tests/code/fixtures/sample-ts/async-operations.ts +120 -0
  104. package/tests/code/fixtures/sample-ts/auth.ts +31 -0
  105. package/tests/code/fixtures/sample-ts/config.ts +52 -0
  106. package/tests/code/fixtures/sample-ts/database.ts +50 -0
  107. package/tests/code/fixtures/sample-ts/index.ts +39 -0
  108. package/tests/code/fixtures/sample-ts/types-advanced.ts +132 -0
  109. package/tests/code/fixtures/sample-ts/utils.ts +105 -0
  110. package/tests/code/fixtures/sample-ts/validator.ts +169 -0
  111. package/tests/code/indexer.test.ts +828 -0
  112. package/tests/code/integration.test.ts +708 -0
  113. package/tests/code/metadata.test.ts +457 -0
  114. package/tests/code/scanner.test.ts +131 -0
  115. package/tests/code/sync/merkle.test.ts +406 -0
  116. package/tests/code/sync/snapshot.test.ts +360 -0
  117. package/tests/code/sync/synchronizer.test.ts +501 -0
  118. package/vitest.config.ts +1 -0
package/.codecov.yml ADDED
@@ -0,0 +1,16 @@
1
+ coverage:
2
+ status:
3
+ project:
4
+ default:
5
+ target: auto
6
+ threshold: 5%
7
+ informational: true
8
+ patch:
9
+ default:
10
+ target: 80%
11
+ threshold: 5%
12
+
13
+ comment:
14
+ layout: "header, diff, flags, components"
15
+ behavior: default
16
+ require_changes: false
package/CHANGELOG.md CHANGED
@@ -1,3 +1,28 @@
1
+ ## 1.5.0 (2025-10-30)
2
+
3
+ * Merge pull request #32 from mhalder/feature/code-vectorization ([50c6cb0](https://github.com/mhalder/qdrant-mcp-server/commit/50c6cb0)), closes [#32](https://github.com/mhalder/qdrant-mcp-server/issues/32)
4
+ * ci: add codecov configuration for coverage thresholds ([153a85e](https://github.com/mhalder/qdrant-mcp-server/commit/153a85e))
5
+ * ci: make codecov project check informational ([f8a09c3](https://github.com/mhalder/qdrant-mcp-server/commit/f8a09c3))
6
+ * test: add comprehensive test suite for code vectorization (802/840 passing) ([969c000](https://github.com/mhalder/qdrant-mcp-server/commit/969c000))
7
+ * test: add path validation tests to improve coverage ([4cbf5ed](https://github.com/mhalder/qdrant-mcp-server/commit/4cbf5ed))
8
+ * test: fix test samples and secret detection (809/840 passing) ([963a5c7](https://github.com/mhalder/qdrant-mcp-server/commit/963a5c7))
9
+ * test: improve test coverage to 97.72% and fix race conditions ([78a75cd](https://github.com/mhalder/qdrant-mcp-server/commit/78a75cd))
10
+ * feat: add code vectorization for semantic code search (#31) ([2a3745e](https://github.com/mhalder/qdrant-mcp-server/commit/2a3745e)), closes [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31) [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31)
11
+ * feat: add hybrid search and advanced filtering for code search (#31) ([bfb022e](https://github.com/mhalder/qdrant-mcp-server/commit/bfb022e)), closes [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31) [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31)
12
+ * feat: add incremental re-indexing with Merkle tree change detection (#31) ([dcd7c55](https://github.com/mhalder/qdrant-mcp-server/commit/dcd7c55)), closes [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31) [Hi#level](https://github.com/Hi/issues/level) [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31)
13
+ * feat: add path traversal validation for security ([0d783cc](https://github.com/mhalder/qdrant-mcp-server/commit/0d783cc)), closes [#2](https://github.com/mhalder/qdrant-mcp-server/issues/2)
14
+ * fix: improve test coverage and fix critical bugs (830/840 passing) ([32c13b2](https://github.com/mhalder/qdrant-mcp-server/commit/32c13b2))
15
+ * fix: resolve tree-sitter peer dependency conflicts for Node.js 22.x ([61d614f](https://github.com/mhalder/qdrant-mcp-server/commit/61d614f))
16
+ * docs: add comprehensive code vectorization examples and documentation ([1e0d48d](https://github.com/mhalder/qdrant-mcp-server/commit/1e0d48d))
17
+ * style: format code vectorization module with project formatter ([beb71bb](https://github.com/mhalder/qdrant-mcp-server/commit/beb71bb))
18
+
19
+ ## 1.4.0 (2025-10-13)
20
+
21
+ * Merge pull request #29 from mhalder/feature/configurable-prompts ([f9652b1](https://github.com/mhalder/qdrant-mcp-server/commit/f9652b1)), closes [#29](https://github.com/mhalder/qdrant-mcp-server/issues/29)
22
+ * feat: add support for configurable MCP prompts ([e3cc6c2](https://github.com/mhalder/qdrant-mcp-server/commit/e3cc6c2)), closes [#28](https://github.com/mhalder/qdrant-mcp-server/issues/28)
23
+ * feat: set default prompts.json path with auto-loading ([203694a](https://github.com/mhalder/qdrant-mcp-server/commit/203694a))
24
+ * test: add comprehensive tests for prompts feature ([bb9ca66](https://github.com/mhalder/qdrant-mcp-server/commit/bb9ca66))
25
+
1
26
  ## <small>1.3.1 (2025-10-12)</small>
2
27
 
3
28
  * Merge pull request #27 from mhalder/fix/empty-responses ([30d33a7](https://github.com/mhalder/qdrant-mcp-server/commit/30d33a7)), closes [#27](https://github.com/mhalder/qdrant-mcp-server/issues/27)
package/README.md CHANGED
@@ -9,9 +9,12 @@ A Model Context Protocol (MCP) server providing semantic search capabilities usi
9
9
 
10
10
  - **Zero Setup**: Works out of the box with Ollama - no API keys required
11
11
  - **Privacy-First**: Local embeddings and vector storage - data never leaves your machine
12
+ - **Code Vectorization**: Intelligent codebase indexing with AST-aware chunking and semantic code search
12
13
  - **Multiple Providers**: Ollama (default), OpenAI, Cohere, and Voyage AI
13
14
  - **Hybrid Search**: Combine semantic and keyword search for better results
14
15
  - **Semantic Search**: Natural language search with metadata filtering
16
+ - **Incremental Indexing**: Efficient updates - only re-index changed files
17
+ - **Configurable Prompts**: Create custom prompts for guided workflows without code changes
15
18
  - **Rate Limiting**: Intelligent throttling with exponential backoff
16
19
  - **Full CRUD**: Create, search, and manage collections and documents
17
20
  - **Flexible Deployment**: Run locally (stdio) or as a remote HTTP server
@@ -123,11 +126,273 @@ See [Advanced Configuration](#advanced-configuration) section below for all opti
123
126
  | `hybrid_search` | Hybrid search combining semantic and keyword (BM25) search with RRF |
124
127
  | `delete_documents` | Delete specific documents by ID |
125
128
 
129
+ ### Code Vectorization
130
+
131
+ | Tool | Description |
132
+ | ------------------ | ---------------------------------------------------------------------------------------------- |
133
+ | `index_codebase` | Index a codebase for semantic code search with AST-aware chunking |
134
+ | `search_code` | Search indexed codebase using natural language queries |
135
+ | `reindex_changes` | Incrementally re-index only changed files (detects added/modified/deleted) |
136
+ | `get_index_status` | Get indexing status and statistics for a codebase |
137
+ | `clear_index` | Delete all indexed data for a codebase |
138
+
126
139
  ### Resources
127
140
 
128
141
  - `qdrant://collections` - List all collections
129
142
  - `qdrant://collection/{name}` - Collection details
130
143
 
144
+ ## Configurable Prompts
145
+
146
+ Create custom prompts tailored to your specific use cases without modifying code. Prompts provide guided workflows for common tasks.
147
+
148
+ **Note**: By default, the server looks for `prompts.json` in the project root directory. If the file exists, prompts are automatically loaded. You can specify a custom path using the `PROMPTS_CONFIG_FILE` environment variable.
149
+
150
+ ### Setup
151
+
152
+ 1. **Create a prompts configuration file** (e.g., `prompts.json` in the project root):
153
+
154
+ See [`prompts.example.json`](prompts.example.json) for example configurations you can copy and customize.
155
+
156
+ 2. **Configure the server** (optional - only needed for custom path):
157
+
158
+ If you place `prompts.json` in the project root, no additional configuration is needed. To use a custom path:
159
+
160
+ ```json
161
+ {
162
+ "mcpServers": {
163
+ "qdrant": {
164
+ "command": "node",
165
+ "args": ["/path/to/qdrant-mcp-server/build/index.js"],
166
+ "env": {
167
+ "QDRANT_URL": "http://localhost:6333",
168
+ "PROMPTS_CONFIG_FILE": "/custom/path/to/prompts.json"
169
+ }
170
+ }
171
+ }
172
+ }
173
+ ```
174
+
175
+ 3. **Use prompts** in your AI assistant:
176
+
177
+ **Claude Code:**
178
+
179
+ ```bash
180
+ /mcp__qdrant__find_similar_docs papers "neural networks" 10
181
+ ```
182
+
183
+ **VSCode:**
184
+
185
+ ```bash
186
+ /mcp.qdrant.find_similar_docs papers "neural networks" 10
187
+ ```
188
+
189
+ ### Example Prompts
190
+
191
+ See [`prompts.example.json`](prompts.example.json) for ready-to-use prompts including:
192
+
193
+ - `find_similar_docs` - Semantic search with result explanation
194
+ - `setup_rag_collection` - Create RAG-optimized collections
195
+ - `analyze_collection` - Collection insights and recommendations
196
+ - `bulk_add_documents` - Guided bulk document insertion
197
+ - `search_with_filter` - Metadata filtering assistance
198
+ - `compare_search_methods` - Semantic vs hybrid search comparison
199
+ - `collection_maintenance` - Maintenance and cleanup workflows
200
+ - `migrate_to_hybrid` - Collection migration guide
201
+
202
+ ### Template Syntax
203
+
204
+ Templates use `{{variable}}` placeholders:
205
+
206
+ - Required arguments must be provided
207
+ - Optional arguments use defaults if not specified
208
+ - Unknown variables are left as-is in the output
209
+
210
+ ## Code Vectorization
211
+
212
+ Intelligently index and search your codebase using semantic code search. Perfect for AI-assisted development, code exploration, and understanding large codebases.
213
+
214
+ ### Features
215
+
216
+ - **AST-Aware Chunking**: Intelligent code splitting at function/class boundaries using tree-sitter
217
+ - **Multi-Language Support**: 35+ file types including TypeScript, Python, Java, Go, Rust, C++, and more
218
+ - **Incremental Updates**: Only re-index changed files for fast updates
219
+ - **Smart Ignore Patterns**: Respects .gitignore, .dockerignore, and custom .contextignore files
220
+ - **Semantic Search**: Natural language queries to find relevant code
221
+ - **Metadata Filtering**: Filter by file type, path patterns, or language
222
+ - **Local-First**: All processing happens locally - your code never leaves your machine
223
+
224
+ ### Quick Start
225
+
226
+ **1. Index your codebase:**
227
+
228
+ ```bash
229
+ # Via Claude Code MCP tool
230
+ /mcp__qdrant__index_codebase /path/to/your/project
231
+ ```
232
+
233
+ **2. Search your code:**
234
+
235
+ ```bash
236
+ # Natural language search
237
+ /mcp__qdrant__search_code /path/to/your/project "authentication middleware"
238
+
239
+ # Filter by file type
240
+ /mcp__qdrant__search_code /path/to/your/project "database schema" --fileTypes .ts,.js
241
+
242
+ # Filter by path pattern
243
+ /mcp__qdrant__search_code /path/to/your/project "API endpoints" --pathPattern src/api/**
244
+ ```
245
+
246
+ **3. Update after changes:**
247
+
248
+ ```bash
249
+ # Incrementally re-index only changed files
250
+ /mcp__qdrant__reindex_changes /path/to/your/project
251
+ ```
252
+
253
+ ### Usage Examples
254
+
255
+ #### Index a TypeScript Project
256
+
257
+ ```typescript
258
+ // The MCP tool automatically:
259
+ // 1. Scans all .ts, .tsx, .js, .jsx files
260
+ // 2. Respects .gitignore patterns (skips node_modules, dist, etc.)
261
+ // 3. Chunks code at function/class boundaries
262
+ // 4. Generates embeddings using your configured provider
263
+ // 5. Stores in Qdrant with metadata (file path, line numbers, language)
264
+
265
+ index_codebase({
266
+ path: "/workspace/my-app",
267
+ forceReindex: false // Set to true to re-index from scratch
268
+ })
269
+
270
+ // Output:
271
+ // ✓ Indexed 247 files (1,823 chunks) in 45.2s
272
+ ```
273
+
274
+ #### Search for Authentication Code
275
+
276
+ ```typescript
277
+ search_code({
278
+ path: "/workspace/my-app",
279
+ query: "how does user authentication work?",
280
+ limit: 5
281
+ })
282
+
283
+ // Results include file path, line numbers, and code snippets:
284
+ // [
285
+ // {
286
+ // filePath: "src/auth/middleware.ts",
287
+ // startLine: 15,
288
+ // endLine: 42,
289
+ // content: "export async function authenticateUser(req: Request) { ... }",
290
+ // score: 0.89,
291
+ // language: "typescript"
292
+ // },
293
+ // ...
294
+ // ]
295
+ ```
296
+
297
+ #### Search with Filters
298
+
299
+ ```typescript
300
+ // Only search TypeScript files
301
+ search_code({
302
+ path: "/workspace/my-app",
303
+ query: "error handling patterns",
304
+ fileTypes: [".ts", ".tsx"],
305
+ limit: 10
306
+ })
307
+
308
+ // Only search in specific directories
309
+ search_code({
310
+ path: "/workspace/my-app",
311
+ query: "API route handlers",
312
+ pathPattern: "src/api/**",
313
+ limit: 10
314
+ })
315
+ ```
316
+
317
+ #### Incremental Re-indexing
318
+
319
+ ```typescript
320
+ // After making changes to your codebase
321
+ reindex_changes({
322
+ path: "/workspace/my-app"
323
+ })
324
+
325
+ // Output:
326
+ // ✓ Updated: +3 files added, ~5 files modified, -1 files deleted
327
+ // ✓ Chunks: +47 added, -23 deleted in 8.3s
328
+ ```
329
+
330
+ #### Check Indexing Status
331
+
332
+ ```typescript
333
+ get_index_status({
334
+ path: "/workspace/my-app"
335
+ })
336
+
337
+ // Output:
338
+ // {
339
+ // isIndexed: true,
340
+ // collectionName: "code_a3f8d2e1",
341
+ // chunksCount: 1823,
342
+ // filesCount: 247,
343
+ // lastUpdated: "2025-01-30T10:15:00Z",
344
+ // languages: ["typescript", "javascript", "json"]
345
+ // }
346
+ ```
347
+
348
+ ### Supported Languages
349
+
350
+ **Programming Languages** (35+ file types):
351
+ - **Web**: TypeScript, JavaScript, Vue, Svelte
352
+ - **Backend**: Python, Java, Go, Rust, Ruby, PHP
353
+ - **Systems**: C, C++, C#
354
+ - **Mobile**: Swift, Kotlin, Dart
355
+ - **Functional**: Scala, Clojure, Haskell, OCaml
356
+ - **Scripting**: Bash, Shell, Fish
357
+ - **Data**: SQL, GraphQL, Protocol Buffers
358
+ - **Config**: JSON, YAML, TOML, XML, Markdown
359
+
360
+ See [configuration](#code-vectorization-configuration) for full list and customization options.
361
+
362
+ ### Custom Ignore Patterns
363
+
364
+ Create a `.contextignore` file in your project root to specify additional patterns to ignore:
365
+
366
+ ```gitignore
367
+ # .contextignore
368
+ **/test/**
369
+ **/*.test.ts
370
+ **/*.spec.ts
371
+ **/fixtures/**
372
+ **/mocks/**
373
+ **/__tests__/**
374
+ ```
375
+
376
+ ### Best Practices
377
+
378
+ 1. **Index Once, Update Incrementally**: Use `index_codebase` for initial indexing, then `reindex_changes` for updates
379
+ 2. **Use Filters**: Narrow search scope with `fileTypes` and `pathPattern` for better results
380
+ 3. **Meaningful Queries**: Use natural language that describes what you're looking for (e.g., "database connection pooling" instead of "db")
381
+ 4. **Check Status First**: Use `get_index_status` to verify a codebase is indexed before searching
382
+ 5. **Local Embedding**: Use Ollama (default) to keep everything local and private
383
+
384
+ ### Performance
385
+
386
+ Typical performance on a modern laptop (Apple M1/M2 or similar):
387
+
388
+ | Codebase Size | Files | Indexing Time | Search Latency |
389
+ |--------------|-------|---------------|----------------|
390
+ | Small (10k LOC) | 50 | ~10s | <100ms |
391
+ | Medium (100k LOC) | 500 | ~2min | <200ms |
392
+ | Large (500k LOC) | 2,500 | ~10min | <500ms |
393
+
394
+ **Note**: Indexing time varies based on embedding provider. Ollama (local) is fastest for initial indexing.
395
+
131
396
  ## Examples
132
397
 
133
398
  See [examples/](examples/) directory for detailed guides:
@@ -136,17 +401,26 @@ See [examples/](examples/) directory for detailed guides:
136
401
  - **[Knowledge Base](examples/knowledge-base/)** - Structured documentation with metadata
137
402
  - **[Advanced Filtering](examples/filters/)** - Complex boolean filters
138
403
  - **[Rate Limiting](examples/rate-limiting/)** - Batch processing with cloud providers
404
+ - **[Code Search](examples/code-search/)** - Index codebases and semantic code search
139
405
 
140
406
  ## Advanced Configuration
141
407
 
142
408
  ### Environment Variables
143
409
 
410
+ #### Core Configuration
411
+
144
412
  | Variable | Description | Default |
145
413
  | ----------------------------------- | -------------------------------------- | --------------------- |
146
414
  | `TRANSPORT_MODE` | "stdio" or "http" | stdio |
147
415
  | `HTTP_PORT` | Port for HTTP transport | 3000 |
148
416
  | `EMBEDDING_PROVIDER` | "ollama", "openai", "cohere", "voyage" | ollama |
149
417
  | `QDRANT_URL` | Qdrant server URL | http://localhost:6333 |
418
+ | `PROMPTS_CONFIG_FILE` | Path to prompts configuration JSON | prompts.json |
419
+
420
+ #### Embedding Configuration
421
+
422
+ | Variable | Description | Default |
423
+ | ----------------------------------- | -------------------------------------- | --------------------- |
150
424
  | `EMBEDDING_MODEL` | Model name | Provider-specific |
151
425
  | `EMBEDDING_BASE_URL` | Custom API URL | Provider-specific |
152
426
  | `EMBEDDING_MAX_REQUESTS_PER_MINUTE` | Rate limit | Provider-specific |
@@ -156,6 +430,18 @@ See [examples/](examples/) directory for detailed guides:
156
430
  | `COHERE_API_KEY` | Cohere API key | - |
157
431
  | `VOYAGE_API_KEY` | Voyage AI API key | - |
158
432
 
433
+ #### Code Vectorization Configuration
434
+
435
+ | Variable | Description | Default |
436
+ | ------------------------- | ------------------------------------------------ | ------- |
437
+ | `CODE_CHUNK_SIZE` | Maximum chunk size in characters | 2500 |
438
+ | `CODE_CHUNK_OVERLAP` | Overlap between chunks in characters | 300 |
439
+ | `CODE_ENABLE_AST` | Enable AST-aware chunking (tree-sitter) | true |
440
+ | `CODE_BATCH_SIZE` | Number of chunks to embed in one batch | 100 |
441
+ | `CODE_CUSTOM_EXTENSIONS` | Additional file extensions (comma-separated) | - |
442
+ | `CODE_CUSTOM_IGNORE` | Additional ignore patterns (comma-separated) | - |
443
+ | `CODE_DEFAULT_LIMIT` | Default search result limit | 5 |
444
+
159
445
  ### Provider Comparison
160
446
 
161
447
  | Provider | Models | Dimensions | Rate Limit | Notes |
@@ -169,15 +455,20 @@ See [examples/](examples/) directory for detailed guides:
169
455
 
170
456
  ## Troubleshooting
171
457
 
172
- | Issue | Solution |
173
- | ---------------------- | ---------------------------------------------------------------------------- |
174
- | **Qdrant not running** | `docker compose up -d` |
175
- | **Collection missing** | Create collection first before adding documents |
176
- | **Ollama not running** | Verify with `curl http://localhost:11434`, start with `docker compose up -d` |
177
- | **Model missing** | `docker exec ollama ollama pull nomic-embed-text` |
178
- | **Rate limit errors** | Adjust `EMBEDDING_MAX_REQUESTS_PER_MINUTE` to match your provider tier |
179
- | **API key errors** | Verify correct API key in environment configuration |
180
- | **Filter errors** | Ensure Qdrant filter format, check field names match metadata |
458
+ | Issue | Solution |
459
+ | ------------------------------- | ---------------------------------------------------------------------------- |
460
+ | **Qdrant not running** | `docker compose up -d` |
461
+ | **Collection missing** | Create collection first before adding documents |
462
+ | **Ollama not running** | Verify with `curl http://localhost:11434`, start with `docker compose up -d` |
463
+ | **Model missing** | `docker exec ollama ollama pull nomic-embed-text` |
464
+ | **Rate limit errors** | Adjust `EMBEDDING_MAX_REQUESTS_PER_MINUTE` to match your provider tier |
465
+ | **API key errors** | Verify correct API key in environment configuration |
466
+ | **Filter errors** | Ensure Qdrant filter format, check field names match metadata |
467
+ | **Codebase not indexed** | Run `index_codebase` before `search_code` |
468
+ | **Slow indexing** | Use Ollama (local) for faster indexing, or increase `CODE_BATCH_SIZE` |
469
+ | **Files not found** | Check `.gitignore` and `.contextignore` patterns |
470
+ | **Search returns no results** | Try broader queries, check if codebase is indexed with `get_index_status` |
471
+ | **Out of memory during index** | Reduce `CODE_CHUNK_SIZE` or `CODE_BATCH_SIZE` |
181
472
 
182
473
  ## Development
183
474
 
@@ -208,6 +499,10 @@ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for:
208
499
 
209
500
  **Automated releases**: Semantic versioning via conventional commits - `feat:` → minor, `fix:` → patch, `BREAKING CHANGE:` → major.
210
501
 
502
+ ## Acknowledgments
503
+
504
+ The code vectorization feature is inspired by and builds upon concepts from the excellent [claude-context](https://github.com/zilliztech/claude-context) project (MIT License, Copyright 2025 Zilliz).
505
+
211
506
  ## License
212
507
 
213
508
  MIT - see [LICENSE](LICENSE) file.
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Base interface for code chunkers
3
+ */
4
+ import type { CodeChunk } from "../types.js";
5
+ export interface CodeChunker {
6
+ /**
7
+ * Split code into semantic chunks
8
+ */
9
+ chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]>;
10
+ /**
11
+ * Check if language is supported by this chunker
12
+ */
13
+ supportsLanguage(language: string): boolean;
14
+ /**
15
+ * Get chunking strategy name
16
+ */
17
+ getStrategyName(): string;
18
+ }
19
+ //# sourceMappingURL=base.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../../src/code/chunker/base.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE7C,MAAM,WAAW,WAAW;IAC1B;;OAEG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IAE9E;;OAEG;IACH,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;IAE5C;;OAEG;IACH,eAAe,IAAI,MAAM,CAAC;CAC3B"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Base interface for code chunkers
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=base.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base.js","sourceRoot":"","sources":["../../../src/code/chunker/base.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,22 @@
1
+ /**
2
+ * CharacterChunker - Simple character-based chunking with overlap
3
+ * Used as fallback when AST parsing is not available
4
+ */
5
+ import type { ChunkerConfig, CodeChunk } from "../types.js";
6
+ import type { CodeChunker } from "./base.js";
7
+ export declare class CharacterChunker implements CodeChunker {
8
+ private config;
9
+ constructor(config: ChunkerConfig);
10
+ chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]>;
11
+ supportsLanguage(_language: string): boolean;
12
+ getStrategyName(): string;
13
+ /**
14
+ * Find a good break point in the code (empty line, closing brace, etc.)
15
+ */
16
+ private findBreakPoint;
17
+ /**
18
+ * Calculate number of lines to overlap based on chunk size
19
+ */
20
+ private calculateOverlapLines;
21
+ }
22
+ //# sourceMappingURL=character-chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"character-chunker.d.ts","sourceRoot":"","sources":["../../../src/code/chunker/character-chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAE7C,qBAAa,gBAAiB,YAAW,WAAW;IACtC,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,aAAa;IAEnC,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IA6EnF,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO;IAK5C,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,OAAO,CAAC,cAAc;IAsBtB;;OAEG;IACH,OAAO,CAAC,qBAAqB;CAK9B"}
@@ -0,0 +1,111 @@
1
+ /**
2
+ * CharacterChunker - Simple character-based chunking with overlap
3
+ * Used as fallback when AST parsing is not available
4
+ */
5
+ export class CharacterChunker {
6
+ config;
7
+ constructor(config) {
8
+ this.config = config;
9
+ }
10
+ async chunk(code, filePath, language) {
11
+ const chunks = [];
12
+ const lines = code.split("\n");
13
+ let currentChunk = "";
14
+ let currentStartLine = 1;
15
+ let currentLineCount = 0;
16
+ let chunkIndex = 0;
17
+ for (let i = 0; i < lines.length; i++) {
18
+ const line = lines[i];
19
+ currentChunk += `${line}\n`;
20
+ currentLineCount++;
21
+ // Check if we've reached chunk size
22
+ if (currentChunk.length >= this.config.chunkSize) {
23
+ // Try to find a good break point (end of function, class, or empty line)
24
+ const breakPoint = this.findBreakPoint(lines, i + 1);
25
+ if (breakPoint > i && breakPoint - i < 20) {
26
+ // Include lines up to break point, but respect maxChunkSize
27
+ for (let j = i + 1; j <= breakPoint && j < lines.length; j++) {
28
+ const nextLine = `${lines[j]}\n`;
29
+ // Stop if adding this line would exceed maxChunkSize
30
+ if (currentChunk.length + nextLine.length > this.config.maxChunkSize) {
31
+ break;
32
+ }
33
+ currentChunk += nextLine;
34
+ currentLineCount++;
35
+ i = j;
36
+ }
37
+ }
38
+ // Create chunk
39
+ chunks.push({
40
+ content: currentChunk.trim(),
41
+ startLine: currentStartLine,
42
+ endLine: currentStartLine + currentLineCount - 1,
43
+ metadata: {
44
+ filePath,
45
+ language,
46
+ chunkIndex,
47
+ chunkType: "block",
48
+ },
49
+ });
50
+ chunkIndex++;
51
+ // Calculate overlap
52
+ const overlapLines = this.calculateOverlapLines(currentLineCount);
53
+ const _overlapStart = Math.max(0, currentLineCount - overlapLines);
54
+ // Start new chunk with overlap
55
+ currentChunk = `${lines.slice(i - overlapLines + 1, i + 1).join("\n")}\n`;
56
+ currentStartLine = currentStartLine + currentLineCount - overlapLines;
57
+ currentLineCount = overlapLines;
58
+ }
59
+ }
60
+ // Add remaining content as final chunk
61
+ if (currentChunk.trim().length > 50) {
62
+ chunks.push({
63
+ content: currentChunk.trim(),
64
+ startLine: currentStartLine,
65
+ endLine: currentStartLine + currentLineCount - 1,
66
+ metadata: {
67
+ filePath,
68
+ language,
69
+ chunkIndex,
70
+ chunkType: "block",
71
+ },
72
+ });
73
+ }
74
+ return chunks;
75
+ }
76
+ supportsLanguage(_language) {
77
+ // Character chunker supports all languages
78
+ return true;
79
+ }
80
+ getStrategyName() {
81
+ return "character-based";
82
+ }
83
+ /**
84
+ * Find a good break point in the code (empty line, closing brace, etc.)
85
+ */
86
+ findBreakPoint(lines, startIdx) {
87
+ const searchWindow = Math.min(20, lines.length - startIdx);
88
+ for (let i = 0; i < searchWindow; i++) {
89
+ const line = lines[startIdx + i]?.trim() || "";
90
+ // Good break points
91
+ if (line === "" ||
92
+ line === "}" ||
93
+ line === "};" ||
94
+ line === "]);" ||
95
+ line.startsWith("//") ||
96
+ line.startsWith("#")) {
97
+ return startIdx + i;
98
+ }
99
+ }
100
+ return startIdx;
101
+ }
102
+ /**
103
+ * Calculate number of lines to overlap based on chunk size
104
+ */
105
+ calculateOverlapLines(totalLines) {
106
+ const overlapChars = this.config.chunkOverlap;
107
+ const avgCharsPerLine = this.config.chunkSize / Math.max(totalLines, 1);
108
+ return Math.floor(overlapChars / Math.max(avgCharsPerLine, 1));
109
+ }
110
+ }
111
+ //# sourceMappingURL=character-chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"character-chunker.js","sourceRoot":"","sources":["../../../src/code/chunker/character-chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,OAAO,gBAAgB;IACP;IAApB,YAAoB,MAAqB;QAArB,WAAM,GAAN,MAAM,CAAe;IAAG,CAAC;IAE7C,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAgB,EAAE,QAAgB;QAC1D,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/B,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,YAAY,IAAI,GAAG,IAAI,IAAI,CAAC;YAC5B,gBAAgB,EAAE,CAAC;YAEnB,oCAAoC;YACpC,IAAI,YAAY,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBACjD,yEAAyE;gBACzE,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;gBAErD,IAAI,UAAU,GAAG,CAAC,IAAI,UAAU,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC;oBAC1C,4DAA4D;oBAC5D,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,UAAU,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBAC7D,MAAM,QAAQ,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;wBACjC,qDAAqD;wBACrD,IAAI,YAAY,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;4BACrE,MAAM;wBACR,CAAC;wBACD,YAAY,IAAI,QAAQ,CAAC;wBACzB,gBAAgB,EAAE,CAAC;wBACnB,CAAC,GAAG,CAAC,CAAC;oBACR,CAAC;gBACH,CAAC;gBAED,eAAe;gBACf,MAAM,CAAC,IAAI,CAAC;oBACV,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;oBAC5B,SAAS,EAAE,gBAAgB;oBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;oBAChD,QAAQ,EAAE;wBACR,QAAQ;wBACR,QAAQ;wBACR,UAAU;wBACV,SAAS,EAAE,OAAO;qBACnB;iBACF,CAAC,CAAC;gBAEH,UAAU,EAAE,CAAC;gBAEb,oBAAoB;gBACpB,MAAM,YAAY,GAAG,IAAI,CAAC,qBAAqB,CAAC,gBAAgB,CAAC,CAAC;gBAClE,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,gBAAgB,GAAG,YAAY,CAAC,CAAC;gBAEnE,+BAA+B;gBAC/B,YAAY,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;gBAC1E,gBAAgB,GAAG,gBAAgB,GAAG,gBAAgB,GAAG,YAAY,CAAC;gBACtE,gBAAgB,GAAG,YAAY,CAAC;YAClC,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC;gBACV,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;gBAC5B,SAAS,EAAE,gBAAgB;gBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;gBAChD,QAAQ,EAAE;oBACR,QAAQ;oBACR,QAAQ;oBACR,UAAU;oBACV,SAAS,EAAE,OAAO;iBACnB;aACF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,gBAAgB,CAAC,SAAiB;QAChC,2CAA2C;QAC3C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,eAAe;QACb,OAAO,iBAAiB,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,KAAe,EAAE,QAAgB;QACtD,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC;QAE3D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAE/C,oBAAoB;YACpB,IACE,IAAI,KAAK,EAAE;gBACX,IAAI,KAAK,GAAG;gBACZ,IAAI,KAAK,IAAI;gBACb,IAAI,KAAK,KAAK;gBACd,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;gBACrB,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EACpB,CAAC;gBACD,OAAO,QAAQ,GAAG,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,UAAkB;QAC9C,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;QAC9C,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QACxE,OAAO,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;CACF"}
@@ -0,0 +1,29 @@
1
+ /**
2
+ * TreeSitterChunker - AST-aware code chunking using tree-sitter
3
+ * Primary chunking strategy for supported languages
4
+ */
5
+ import type { ChunkerConfig, CodeChunk } from "../types.js";
6
+ import type { CodeChunker } from "./base.js";
7
+ export declare class TreeSitterChunker implements CodeChunker {
8
+ private config;
9
+ private languages;
10
+ private fallbackChunker;
11
+ constructor(config: ChunkerConfig);
12
+ private initializeParsers;
13
+ chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]>;
14
+ supportsLanguage(language: string): boolean;
15
+ getStrategyName(): string;
16
+ /**
17
+ * Find all chunkable nodes in the AST
18
+ */
19
+ private findChunkableNodes;
20
+ /**
21
+ * Extract function/class name from AST node
22
+ */
23
+ private extractName;
24
+ /**
25
+ * Map AST node type to chunk type
26
+ */
27
+ private getChunkType;
28
+ }
29
+ //# sourceMappingURL=tree-sitter-chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree-sitter-chunker.d.ts","sourceRoot":"","sources":["../../../src/code/chunker/tree-sitter-chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAYH,OAAO,KAAK,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAQ7C,qBAAa,iBAAkB,YAAW,WAAW;IAIvC,OAAO,CAAC,MAAM;IAH1B,OAAO,CAAC,SAAS,CAA0C;IAC3D,OAAO,CAAC,eAAe,CAAmB;gBAEtB,MAAM,EAAE,aAAa;IAKzC,OAAO,CAAC,iBAAiB;IAgFnB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAoEnF,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAI3C,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAsB1B;;OAEG;IACH,OAAO,CAAC,WAAW;IAiBnB;;OAEG;IACH,OAAO,CAAC,YAAY;CAYrB"}