@mhalder/qdrant-mcp-server 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.codecov.yml +16 -0
  2. package/.github/workflows/claude-code-review.yml +6 -5
  3. package/.releaserc.json +8 -1
  4. package/CHANGELOG.md +34 -0
  5. package/README.md +259 -9
  6. package/build/code/chunker/base.d.ts +19 -0
  7. package/build/code/chunker/base.d.ts.map +1 -0
  8. package/build/code/chunker/base.js +5 -0
  9. package/build/code/chunker/base.js.map +1 -0
  10. package/build/code/chunker/character-chunker.d.ts +22 -0
  11. package/build/code/chunker/character-chunker.d.ts.map +1 -0
  12. package/build/code/chunker/character-chunker.js +111 -0
  13. package/build/code/chunker/character-chunker.js.map +1 -0
  14. package/build/code/chunker/tree-sitter-chunker.d.ts +29 -0
  15. package/build/code/chunker/tree-sitter-chunker.d.ts.map +1 -0
  16. package/build/code/chunker/tree-sitter-chunker.js +213 -0
  17. package/build/code/chunker/tree-sitter-chunker.js.map +1 -0
  18. package/build/code/config.d.ts +11 -0
  19. package/build/code/config.d.ts.map +1 -0
  20. package/build/code/config.js +145 -0
  21. package/build/code/config.js.map +1 -0
  22. package/build/code/indexer.d.ts +42 -0
  23. package/build/code/indexer.d.ts.map +1 -0
  24. package/build/code/indexer.js +508 -0
  25. package/build/code/indexer.js.map +1 -0
  26. package/build/code/metadata.d.ts +32 -0
  27. package/build/code/metadata.d.ts.map +1 -0
  28. package/build/code/metadata.js +128 -0
  29. package/build/code/metadata.js.map +1 -0
  30. package/build/code/scanner.d.ts +35 -0
  31. package/build/code/scanner.d.ts.map +1 -0
  32. package/build/code/scanner.js +108 -0
  33. package/build/code/scanner.js.map +1 -0
  34. package/build/code/sync/merkle.d.ts +45 -0
  35. package/build/code/sync/merkle.d.ts.map +1 -0
  36. package/build/code/sync/merkle.js +116 -0
  37. package/build/code/sync/merkle.js.map +1 -0
  38. package/build/code/sync/snapshot.d.ts +41 -0
  39. package/build/code/sync/snapshot.d.ts.map +1 -0
  40. package/build/code/sync/snapshot.js +91 -0
  41. package/build/code/sync/snapshot.js.map +1 -0
  42. package/build/code/sync/synchronizer.d.ts +53 -0
  43. package/build/code/sync/synchronizer.d.ts.map +1 -0
  44. package/build/code/sync/synchronizer.js +132 -0
  45. package/build/code/sync/synchronizer.js.map +1 -0
  46. package/build/code/types.d.ts +98 -0
  47. package/build/code/types.d.ts.map +1 -0
  48. package/build/code/types.js +5 -0
  49. package/build/code/types.js.map +1 -0
  50. package/build/index.js +252 -1
  51. package/build/index.js.map +1 -1
  52. package/build/qdrant/client.d.ts +1 -1
  53. package/build/qdrant/client.d.ts.map +1 -1
  54. package/build/qdrant/client.js +2 -2
  55. package/build/qdrant/client.js.map +1 -1
  56. package/build/qdrant/client.test.js +16 -0
  57. package/build/qdrant/client.test.js.map +1 -1
  58. package/examples/code-search/README.md +271 -0
  59. package/package.json +15 -2
  60. package/src/code/chunker/base.ts +22 -0
  61. package/src/code/chunker/character-chunker.ts +131 -0
  62. package/src/code/chunker/tree-sitter-chunker.ts +250 -0
  63. package/src/code/config.ts +156 -0
  64. package/src/code/indexer.ts +613 -0
  65. package/src/code/metadata.ts +153 -0
  66. package/src/code/scanner.ts +124 -0
  67. package/src/code/sync/merkle.ts +136 -0
  68. package/src/code/sync/snapshot.ts +110 -0
  69. package/src/code/sync/synchronizer.ts +154 -0
  70. package/src/code/types.ts +117 -0
  71. package/src/index.ts +298 -1
  72. package/src/qdrant/client.test.ts +20 -0
  73. package/src/qdrant/client.ts +2 -2
  74. package/tests/code/chunker/character-chunker.test.ts +141 -0
  75. package/tests/code/chunker/tree-sitter-chunker.test.ts +275 -0
  76. package/tests/code/fixtures/sample-py/calculator.py +32 -0
  77. package/tests/code/fixtures/sample-ts/async-operations.ts +120 -0
  78. package/tests/code/fixtures/sample-ts/auth.ts +31 -0
  79. package/tests/code/fixtures/sample-ts/config.ts +52 -0
  80. package/tests/code/fixtures/sample-ts/database.ts +50 -0
  81. package/tests/code/fixtures/sample-ts/index.ts +39 -0
  82. package/tests/code/fixtures/sample-ts/types-advanced.ts +132 -0
  83. package/tests/code/fixtures/sample-ts/utils.ts +105 -0
  84. package/tests/code/fixtures/sample-ts/validator.ts +169 -0
  85. package/tests/code/indexer.test.ts +828 -0
  86. package/tests/code/integration.test.ts +708 -0
  87. package/tests/code/metadata.test.ts +457 -0
  88. package/tests/code/scanner.test.ts +131 -0
  89. package/tests/code/sync/merkle.test.ts +406 -0
  90. package/tests/code/sync/snapshot.test.ts +360 -0
  91. package/tests/code/sync/synchronizer.test.ts +501 -0
  92. package/vitest.config.ts +1 -0
package/.codecov.yml ADDED
@@ -0,0 +1,16 @@
1
+ coverage:
2
+ status:
3
+ project:
4
+ default:
5
+ target: auto
6
+ threshold: 5%
7
+ informational: true
8
+ patch:
9
+ default:
10
+ target: 80%
11
+ threshold: 5%
12
+
13
+ comment:
14
+ layout: "header, diff, flags, components"
15
+ behavior: default
16
+ require_changes: false
@@ -10,6 +10,12 @@ on:
10
10
  # - "src/**/*.js"
11
11
  # - "src/**/*.jsx"
12
12
 
13
+ permissions:
14
+ contents: read
15
+ pull-requests: write
16
+ issues: write
17
+ id-token: write
18
+
13
19
  jobs:
14
20
  claude-review:
15
21
  # Optional: Filter by PR author
@@ -19,11 +25,6 @@ jobs:
19
25
  # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
20
26
 
21
27
  runs-on: ubuntu-latest
22
- permissions:
23
- contents: read
24
- pull-requests: read
25
- issues: read
26
- id-token: write
27
28
 
28
29
  steps:
29
30
  - name: Checkout repository
package/.releaserc.json CHANGED
@@ -46,7 +46,14 @@
46
46
  "changelogFile": "CHANGELOG.md"
47
47
  }
48
48
  ],
49
- "@semantic-release/npm",
49
+ [
50
+ "@semantic-release/npm",
51
+ {
52
+ "npmPublish": true,
53
+ "pkgRoot": ".",
54
+ "provenance": true
55
+ }
56
+ ],
50
57
  [
51
58
  "@semantic-release/git",
52
59
  {
package/CHANGELOG.md CHANGED
@@ -1,3 +1,37 @@
1
+ ## 1.6.0 (2026-01-17)
2
+
3
+ * Merge pull request #34 from No-Smoke/feature/add-qdrant-api-key-support ([c6af3ae](https://github.com/mhalder/qdrant-mcp-server/commit/c6af3ae)), closes [#34](https://github.com/mhalder/qdrant-mcp-server/issues/34)
4
+ * Merge pull request #37 from mhalder/fix/trusted-publishing-and-workflows ([e6a464d](https://github.com/mhalder/qdrant-mcp-server/commit/e6a464d)), closes [#37](https://github.com/mhalder/qdrant-mcp-server/issues/37)
5
+ * Merge pull request #38 from mhalder/fix/npm-provenance-publishing ([53dce5f](https://github.com/mhalder/qdrant-mcp-server/commit/53dce5f)), closes [#38](https://github.com/mhalder/qdrant-mcp-server/issues/38)
6
+ * Merge pull request #39 from mhalder/fix/add-npm-token-for-verification ([aaa32c1](https://github.com/mhalder/qdrant-mcp-server/commit/aaa32c1)), closes [#39](https://github.com/mhalder/qdrant-mcp-server/issues/39)
7
+ * Merge pull request #40 from mhalder/fix/remove-registry-url-from-setup-node ([f507b1b](https://github.com/mhalder/qdrant-mcp-server/commit/f507b1b)), closes [#40](https://github.com/mhalder/qdrant-mcp-server/issues/40)
8
+ * ci: add NPM_TOKEN for semantic-release verification ([19f746d](https://github.com/mhalder/qdrant-mcp-server/commit/19f746d))
9
+ * ci: enable npm provenance for OIDC trusted publishing ([2e18bea](https://github.com/mhalder/qdrant-mcp-server/commit/2e18bea))
10
+ * ci: remove registry-url from setup-node ([4e7d496](https://github.com/mhalder/qdrant-mcp-server/commit/4e7d496))
11
+ * ci: switch to npm trusted publishing and fix workflow permissions ([93ee2ba](https://github.com/mhalder/qdrant-mcp-server/commit/93ee2ba))
12
+ * test: add constructor tests for apiKey parameter ([aa497b0](https://github.com/mhalder/qdrant-mcp-server/commit/aa497b0))
13
+ * docs: Add QDRANT_API_KEY documentation ([f5dd238](https://github.com/mhalder/qdrant-mcp-server/commit/f5dd238))
14
+ * feat: Add QDRANT_API_KEY support to QdrantManager ([ae7b3b2](https://github.com/mhalder/qdrant-mcp-server/commit/ae7b3b2))
15
+ * feat: Read QDRANT_API_KEY from environment and pass to QdrantManager ([1b3a263](https://github.com/mhalder/qdrant-mcp-server/commit/1b3a263))
16
+
17
+ ## 1.5.0 (2025-10-30)
18
+
19
+ * Merge pull request #32 from mhalder/feature/code-vectorization ([50c6cb0](https://github.com/mhalder/qdrant-mcp-server/commit/50c6cb0)), closes [#32](https://github.com/mhalder/qdrant-mcp-server/issues/32)
20
+ * ci: add codecov configuration for coverage thresholds ([153a85e](https://github.com/mhalder/qdrant-mcp-server/commit/153a85e))
21
+ * ci: make codecov project check informational ([f8a09c3](https://github.com/mhalder/qdrant-mcp-server/commit/f8a09c3))
22
+ * test: add comprehensive test suite for code vectorization (802/840 passing) ([969c000](https://github.com/mhalder/qdrant-mcp-server/commit/969c000))
23
+ * test: add path validation tests to improve coverage ([4cbf5ed](https://github.com/mhalder/qdrant-mcp-server/commit/4cbf5ed))
24
+ * test: fix test samples and secret detection (809/840 passing) ([963a5c7](https://github.com/mhalder/qdrant-mcp-server/commit/963a5c7))
25
+ * test: improve test coverage to 97.72% and fix race conditions ([78a75cd](https://github.com/mhalder/qdrant-mcp-server/commit/78a75cd))
26
+ * feat: add code vectorization for semantic code search (#31) ([2a3745e](https://github.com/mhalder/qdrant-mcp-server/commit/2a3745e)), closes [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31) [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31)
27
+ * feat: add hybrid search and advanced filtering for code search (#31) ([bfb022e](https://github.com/mhalder/qdrant-mcp-server/commit/bfb022e)), closes [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31) [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31)
28
+ * feat: add incremental re-indexing with Merkle tree change detection (#31) ([dcd7c55](https://github.com/mhalder/qdrant-mcp-server/commit/dcd7c55)), closes [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31) [Hi#level](https://github.com/Hi/issues/level) [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31)
29
+ * feat: add path traversal validation for security ([0d783cc](https://github.com/mhalder/qdrant-mcp-server/commit/0d783cc)), closes [#2](https://github.com/mhalder/qdrant-mcp-server/issues/2)
30
+ * fix: improve test coverage and fix critical bugs (830/840 passing) ([32c13b2](https://github.com/mhalder/qdrant-mcp-server/commit/32c13b2))
31
+ * fix: resolve tree-sitter peer dependency conflicts for Node.js 22.x ([61d614f](https://github.com/mhalder/qdrant-mcp-server/commit/61d614f))
32
+ * docs: add comprehensive code vectorization examples and documentation ([1e0d48d](https://github.com/mhalder/qdrant-mcp-server/commit/1e0d48d))
33
+ * style: format code vectorization module with project formatter ([beb71bb](https://github.com/mhalder/qdrant-mcp-server/commit/beb71bb))
34
+
1
35
  ## 1.4.0 (2025-10-13)
2
36
 
3
37
  * Merge pull request #29 from mhalder/feature/configurable-prompts ([f9652b1](https://github.com/mhalder/qdrant-mcp-server/commit/f9652b1)), closes [#29](https://github.com/mhalder/qdrant-mcp-server/issues/29)
package/README.md CHANGED
@@ -9,13 +9,16 @@ A Model Context Protocol (MCP) server providing semantic search capabilities usi
9
9
 
10
10
  - **Zero Setup**: Works out of the box with Ollama - no API keys required
11
11
  - **Privacy-First**: Local embeddings and vector storage - data never leaves your machine
12
+ - **Code Vectorization**: Intelligent codebase indexing with AST-aware chunking and semantic code search
12
13
  - **Multiple Providers**: Ollama (default), OpenAI, Cohere, and Voyage AI
13
14
  - **Hybrid Search**: Combine semantic and keyword search for better results
14
15
  - **Semantic Search**: Natural language search with metadata filtering
16
+ - **Incremental Indexing**: Efficient updates - only re-index changed files
15
17
  - **Configurable Prompts**: Create custom prompts for guided workflows without code changes
16
18
  - **Rate Limiting**: Intelligent throttling with exponential backoff
17
19
  - **Full CRUD**: Create, search, and manage collections and documents
18
20
  - **Flexible Deployment**: Run locally (stdio) or as a remote HTTP server
21
+ - **API Key Authentication**: Connect to secured Qdrant instances (Qdrant Cloud, self-hosted with API keys)
19
22
 
20
23
  ## Quick Start
21
24
 
@@ -61,6 +64,26 @@ Add to `~/.claude/claude_code_config.json`:
61
64
  }
62
65
  ```
63
66
 
67
+ #### Connecting to Secured Qdrant Instances
68
+
69
+ For Qdrant Cloud or self-hosted instances with API key authentication:
70
+
71
+ ```json
72
+ {
73
+ "mcpServers": {
74
+ "qdrant": {
75
+ "command": "node",
76
+ "args": ["/path/to/qdrant-mcp-server/build/index.js"],
77
+ "env": {
78
+ "QDRANT_URL": "https://your-cluster.qdrant.io:6333",
79
+ "QDRANT_API_KEY": "your-api-key-here",
80
+ "EMBEDDING_BASE_URL": "http://localhost:11434"
81
+ }
82
+ }
83
+ }
84
+ }
85
+ ```
86
+
64
87
  #### Remote Setup (HTTP transport)
65
88
 
66
89
  > **⚠️ Security Warning**: When deploying the HTTP transport in production:
@@ -124,6 +147,16 @@ See [Advanced Configuration](#advanced-configuration) section below for all opti
124
147
  | `hybrid_search` | Hybrid search combining semantic and keyword (BM25) search with RRF |
125
148
  | `delete_documents` | Delete specific documents by ID |
126
149
 
150
+ ### Code Vectorization
151
+
152
+ | Tool | Description |
153
+ | ------------------ | ---------------------------------------------------------------------------------------------- |
154
+ | `index_codebase` | Index a codebase for semantic code search with AST-aware chunking |
155
+ | `search_code` | Search indexed codebase using natural language queries |
156
+ | `reindex_changes` | Incrementally re-index only changed files (detects added/modified/deleted) |
157
+ | `get_index_status` | Get indexing status and statistics for a codebase |
158
+ | `clear_index` | Delete all indexed data for a codebase |
159
+
127
160
  ### Resources
128
161
 
129
162
  - `qdrant://collections` - List all collections
@@ -195,6 +228,192 @@ Templates use `{{variable}}` placeholders:
195
228
  - Optional arguments use defaults if not specified
196
229
  - Unknown variables are left as-is in the output
197
230
 
231
+ ## Code Vectorization
232
+
233
+ Intelligently index and search your codebase using semantic code search. Perfect for AI-assisted development, code exploration, and understanding large codebases.
234
+
235
+ ### Features
236
+
237
+ - **AST-Aware Chunking**: Intelligent code splitting at function/class boundaries using tree-sitter
238
+ - **Multi-Language Support**: 35+ file types including TypeScript, Python, Java, Go, Rust, C++, and more
239
+ - **Incremental Updates**: Only re-index changed files for fast updates
240
+ - **Smart Ignore Patterns**: Respects .gitignore, .dockerignore, and custom .contextignore files
241
+ - **Semantic Search**: Natural language queries to find relevant code
242
+ - **Metadata Filtering**: Filter by file type, path patterns, or language
243
+ - **Local-First**: All processing happens locally - your code never leaves your machine
244
+
245
+ ### Quick Start
246
+
247
+ **1. Index your codebase:**
248
+
249
+ ```bash
250
+ # Via Claude Code MCP tool
251
+ /mcp__qdrant__index_codebase /path/to/your/project
252
+ ```
253
+
254
+ **2. Search your code:**
255
+
256
+ ```bash
257
+ # Natural language search
258
+ /mcp__qdrant__search_code /path/to/your/project "authentication middleware"
259
+
260
+ # Filter by file type
261
+ /mcp__qdrant__search_code /path/to/your/project "database schema" --fileTypes .ts,.js
262
+
263
+ # Filter by path pattern
264
+ /mcp__qdrant__search_code /path/to/your/project "API endpoints" --pathPattern src/api/**
265
+ ```
266
+
267
+ **3. Update after changes:**
268
+
269
+ ```bash
270
+ # Incrementally re-index only changed files
271
+ /mcp__qdrant__reindex_changes /path/to/your/project
272
+ ```
273
+
274
+ ### Usage Examples
275
+
276
+ #### Index a TypeScript Project
277
+
278
+ ```typescript
279
+ // The MCP tool automatically:
280
+ // 1. Scans all .ts, .tsx, .js, .jsx files
281
+ // 2. Respects .gitignore patterns (skips node_modules, dist, etc.)
282
+ // 3. Chunks code at function/class boundaries
283
+ // 4. Generates embeddings using your configured provider
284
+ // 5. Stores in Qdrant with metadata (file path, line numbers, language)
285
+
286
+ index_codebase({
287
+ path: "/workspace/my-app",
288
+ forceReindex: false // Set to true to re-index from scratch
289
+ })
290
+
291
+ // Output:
292
+ // ✓ Indexed 247 files (1,823 chunks) in 45.2s
293
+ ```
294
+
295
+ #### Search for Authentication Code
296
+
297
+ ```typescript
298
+ search_code({
299
+ path: "/workspace/my-app",
300
+ query: "how does user authentication work?",
301
+ limit: 5
302
+ })
303
+
304
+ // Results include file path, line numbers, and code snippets:
305
+ // [
306
+ // {
307
+ // filePath: "src/auth/middleware.ts",
308
+ // startLine: 15,
309
+ // endLine: 42,
310
+ // content: "export async function authenticateUser(req: Request) { ... }",
311
+ // score: 0.89,
312
+ // language: "typescript"
313
+ // },
314
+ // ...
315
+ // ]
316
+ ```
317
+
318
+ #### Search with Filters
319
+
320
+ ```typescript
321
+ // Only search TypeScript files
322
+ search_code({
323
+ path: "/workspace/my-app",
324
+ query: "error handling patterns",
325
+ fileTypes: [".ts", ".tsx"],
326
+ limit: 10
327
+ })
328
+
329
+ // Only search in specific directories
330
+ search_code({
331
+ path: "/workspace/my-app",
332
+ query: "API route handlers",
333
+ pathPattern: "src/api/**",
334
+ limit: 10
335
+ })
336
+ ```
337
+
338
+ #### Incremental Re-indexing
339
+
340
+ ```typescript
341
+ // After making changes to your codebase
342
+ reindex_changes({
343
+ path: "/workspace/my-app"
344
+ })
345
+
346
+ // Output:
347
+ // ✓ Updated: +3 files added, ~5 files modified, -1 files deleted
348
+ // ✓ Chunks: +47 added, -23 deleted in 8.3s
349
+ ```
350
+
351
+ #### Check Indexing Status
352
+
353
+ ```typescript
354
+ get_index_status({
355
+ path: "/workspace/my-app"
356
+ })
357
+
358
+ // Output:
359
+ // {
360
+ // isIndexed: true,
361
+ // collectionName: "code_a3f8d2e1",
362
+ // chunksCount: 1823,
363
+ // filesCount: 247,
364
+ // lastUpdated: "2025-01-30T10:15:00Z",
365
+ // languages: ["typescript", "javascript", "json"]
366
+ // }
367
+ ```
368
+
369
+ ### Supported Languages
370
+
371
+ **Programming Languages** (35+ file types):
372
+ - **Web**: TypeScript, JavaScript, Vue, Svelte
373
+ - **Backend**: Python, Java, Go, Rust, Ruby, PHP
374
+ - **Systems**: C, C++, C#
375
+ - **Mobile**: Swift, Kotlin, Dart
376
+ - **Functional**: Scala, Clojure, Haskell, OCaml
377
+ - **Scripting**: Bash, Shell, Fish
378
+ - **Data**: SQL, GraphQL, Protocol Buffers
379
+ - **Config**: JSON, YAML, TOML, XML, Markdown
380
+
381
+ See [configuration](#code-vectorization-configuration) for full list and customization options.
382
+
383
+ ### Custom Ignore Patterns
384
+
385
+ Create a `.contextignore` file in your project root to specify additional patterns to ignore:
386
+
387
+ ```gitignore
388
+ # .contextignore
389
+ **/test/**
390
+ **/*.test.ts
391
+ **/*.spec.ts
392
+ **/fixtures/**
393
+ **/mocks/**
394
+ **/__tests__/**
395
+ ```
396
+
397
+ ### Best Practices
398
+
399
+ 1. **Index Once, Update Incrementally**: Use `index_codebase` for initial indexing, then `reindex_changes` for updates
400
+ 2. **Use Filters**: Narrow search scope with `fileTypes` and `pathPattern` for better results
401
+ 3. **Meaningful Queries**: Use natural language that describes what you're looking for (e.g., "database connection pooling" instead of "db")
402
+ 4. **Check Status First**: Use `get_index_status` to verify a codebase is indexed before searching
403
+ 5. **Local Embedding**: Use Ollama (default) to keep everything local and private
404
+
405
+ ### Performance
406
+
407
+ Typical performance on a modern laptop (Apple M1/M2 or similar):
408
+
409
+ | Codebase Size | Files | Indexing Time | Search Latency |
410
+ |--------------|-------|---------------|----------------|
411
+ | Small (10k LOC) | 50 | ~10s | <100ms |
412
+ | Medium (100k LOC) | 500 | ~2min | <200ms |
413
+ | Large (500k LOC) | 2,500 | ~10min | <500ms |
414
+
415
+ **Note**: Indexing time varies based on embedding provider. Ollama (local) is fastest for initial indexing.
416
+
198
417
  ## Examples
199
418
 
200
419
  See [examples/](examples/) directory for detailed guides:
@@ -203,18 +422,27 @@ See [examples/](examples/) directory for detailed guides:
203
422
  - **[Knowledge Base](examples/knowledge-base/)** - Structured documentation with metadata
204
423
  - **[Advanced Filtering](examples/filters/)** - Complex boolean filters
205
424
  - **[Rate Limiting](examples/rate-limiting/)** - Batch processing with cloud providers
425
+ - **[Code Search](examples/code-search/)** - Index codebases and semantic code search
206
426
 
207
427
  ## Advanced Configuration
208
428
 
209
429
  ### Environment Variables
210
430
 
431
+ #### Core Configuration
432
+
211
433
  | Variable | Description | Default |
212
434
  | ----------------------------------- | -------------------------------------- | --------------------- |
213
435
  | `TRANSPORT_MODE` | "stdio" or "http" | stdio |
214
436
  | `HTTP_PORT` | Port for HTTP transport | 3000 |
215
437
  | `EMBEDDING_PROVIDER` | "ollama", "openai", "cohere", "voyage" | ollama |
216
438
  | `QDRANT_URL` | Qdrant server URL | http://localhost:6333 |
439
+ | `QDRANT_API_KEY` | API key for Qdrant authentication | - |
217
440
  | `PROMPTS_CONFIG_FILE` | Path to prompts configuration JSON | prompts.json |
441
+
442
+ #### Embedding Configuration
443
+
444
+ | Variable | Description | Default |
445
+ | ----------------------------------- | -------------------------------------- | --------------------- |
218
446
  | `EMBEDDING_MODEL` | Model name | Provider-specific |
219
447
  | `EMBEDDING_BASE_URL` | Custom API URL | Provider-specific |
220
448
  | `EMBEDDING_MAX_REQUESTS_PER_MINUTE` | Rate limit | Provider-specific |
@@ -224,6 +452,18 @@ See [examples/](examples/) directory for detailed guides:
224
452
  | `COHERE_API_KEY` | Cohere API key | - |
225
453
  | `VOYAGE_API_KEY` | Voyage AI API key | - |
226
454
 
455
+ #### Code Vectorization Configuration
456
+
457
+ | Variable | Description | Default |
458
+ | ------------------------- | ------------------------------------------------ | ------- |
459
+ | `CODE_CHUNK_SIZE` | Maximum chunk size in characters | 2500 |
460
+ | `CODE_CHUNK_OVERLAP` | Overlap between chunks in characters | 300 |
461
+ | `CODE_ENABLE_AST` | Enable AST-aware chunking (tree-sitter) | true |
462
+ | `CODE_BATCH_SIZE` | Number of chunks to embed in one batch | 100 |
463
+ | `CODE_CUSTOM_EXTENSIONS` | Additional file extensions (comma-separated) | - |
464
+ | `CODE_CUSTOM_IGNORE` | Additional ignore patterns (comma-separated) | - |
465
+ | `CODE_DEFAULT_LIMIT` | Default search result limit | 5 |
466
+
227
467
  ### Provider Comparison
228
468
 
229
469
  | Provider | Models | Dimensions | Rate Limit | Notes |
@@ -237,15 +477,21 @@ See [examples/](examples/) directory for detailed guides:
237
477
 
238
478
  ## Troubleshooting
239
479
 
240
- | Issue | Solution |
241
- | ---------------------- | ---------------------------------------------------------------------------- |
242
- | **Qdrant not running** | `docker compose up -d` |
243
- | **Collection missing** | Create collection first before adding documents |
244
- | **Ollama not running** | Verify with `curl http://localhost:11434`, start with `docker compose up -d` |
245
- | **Model missing** | `docker exec ollama ollama pull nomic-embed-text` |
246
- | **Rate limit errors** | Adjust `EMBEDDING_MAX_REQUESTS_PER_MINUTE` to match your provider tier |
247
- | **API key errors** | Verify correct API key in environment configuration |
248
- | **Filter errors** | Ensure Qdrant filter format, check field names match metadata |
480
+ | Issue | Solution |
481
+ | ------------------------------- | ---------------------------------------------------------------------------- |
482
+ | **Qdrant not running** | `docker compose up -d` |
483
+ | **Collection missing** | Create collection first before adding documents |
484
+ | **Ollama not running** | Verify with `curl http://localhost:11434`, start with `docker compose up -d` |
485
+ | **Model missing** | `docker exec ollama ollama pull nomic-embed-text` |
486
+ | **Rate limit errors** | Adjust `EMBEDDING_MAX_REQUESTS_PER_MINUTE` to match your provider tier |
487
+ | **API key errors** | Verify correct API key in environment configuration |
488
+ | **Qdrant unauthorized** | Set `QDRANT_API_KEY` environment variable for secured instances |
489
+ | **Filter errors** | Ensure Qdrant filter format, check field names match metadata |
490
+ | **Codebase not indexed** | Run `index_codebase` before `search_code` |
491
+ | **Slow indexing** | Use Ollama (local) for faster indexing, or increase `CODE_BATCH_SIZE` |
492
+ | **Files not found** | Check `.gitignore` and `.contextignore` patterns |
493
+ | **Search returns no results** | Try broader queries, check if codebase is indexed with `get_index_status` |
494
+ | **Out of memory during index** | Reduce `CODE_CHUNK_SIZE` or `CODE_BATCH_SIZE` |
249
495
 
250
496
  ## Development
251
497
 
@@ -276,6 +522,10 @@ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for:
276
522
 
277
523
  **Automated releases**: Semantic versioning via conventional commits - `feat:` → minor, `fix:` → patch, `BREAKING CHANGE:` → major.
278
524
 
525
+ ## Acknowledgments
526
+
527
+ The code vectorization feature is inspired by and builds upon concepts from the excellent [claude-context](https://github.com/zilliztech/claude-context) project (MIT License, Copyright 2025 Zilliz).
528
+
279
529
  ## License
280
530
 
281
531
  MIT - see [LICENSE](LICENSE) file.
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Base interface for code chunkers
3
+ */
4
+ import type { CodeChunk } from "../types.js";
5
+ export interface CodeChunker {
6
+ /**
7
+ * Split code into semantic chunks
8
+ */
9
+ chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]>;
10
+ /**
11
+ * Check if language is supported by this chunker
12
+ */
13
+ supportsLanguage(language: string): boolean;
14
+ /**
15
+ * Get chunking strategy name
16
+ */
17
+ getStrategyName(): string;
18
+ }
19
+ //# sourceMappingURL=base.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../../src/code/chunker/base.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE7C,MAAM,WAAW,WAAW;IAC1B;;OAEG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IAE9E;;OAEG;IACH,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;IAE5C;;OAEG;IACH,eAAe,IAAI,MAAM,CAAC;CAC3B"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Base interface for code chunkers
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=base.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base.js","sourceRoot":"","sources":["../../../src/code/chunker/base.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,22 @@
1
+ /**
2
+ * CharacterChunker - Simple character-based chunking with overlap
3
+ * Used as fallback when AST parsing is not available
4
+ */
5
+ import type { ChunkerConfig, CodeChunk } from "../types.js";
6
+ import type { CodeChunker } from "./base.js";
7
+ export declare class CharacterChunker implements CodeChunker {
8
+ private config;
9
+ constructor(config: ChunkerConfig);
10
+ chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]>;
11
+ supportsLanguage(_language: string): boolean;
12
+ getStrategyName(): string;
13
+ /**
14
+ * Find a good break point in the code (empty line, closing brace, etc.)
15
+ */
16
+ private findBreakPoint;
17
+ /**
18
+ * Calculate number of lines to overlap based on chunk size
19
+ */
20
+ private calculateOverlapLines;
21
+ }
22
+ //# sourceMappingURL=character-chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"character-chunker.d.ts","sourceRoot":"","sources":["../../../src/code/chunker/character-chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAE7C,qBAAa,gBAAiB,YAAW,WAAW;IACtC,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,aAAa;IAEnC,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IA6EnF,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO;IAK5C,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,OAAO,CAAC,cAAc;IAsBtB;;OAEG;IACH,OAAO,CAAC,qBAAqB;CAK9B"}
@@ -0,0 +1,111 @@
1
+ /**
2
+ * CharacterChunker - Simple character-based chunking with overlap
3
+ * Used as fallback when AST parsing is not available
4
+ */
5
+ export class CharacterChunker {
6
+ config;
7
+ constructor(config) {
8
+ this.config = config;
9
+ }
10
+ async chunk(code, filePath, language) {
11
+ const chunks = [];
12
+ const lines = code.split("\n");
13
+ let currentChunk = "";
14
+ let currentStartLine = 1;
15
+ let currentLineCount = 0;
16
+ let chunkIndex = 0;
17
+ for (let i = 0; i < lines.length; i++) {
18
+ const line = lines[i];
19
+ currentChunk += `${line}\n`;
20
+ currentLineCount++;
21
+ // Check if we've reached chunk size
22
+ if (currentChunk.length >= this.config.chunkSize) {
23
+ // Try to find a good break point (end of function, class, or empty line)
24
+ const breakPoint = this.findBreakPoint(lines, i + 1);
25
+ if (breakPoint > i && breakPoint - i < 20) {
26
+ // Include lines up to break point, but respect maxChunkSize
27
+ for (let j = i + 1; j <= breakPoint && j < lines.length; j++) {
28
+ const nextLine = `${lines[j]}\n`;
29
+ // Stop if adding this line would exceed maxChunkSize
30
+ if (currentChunk.length + nextLine.length > this.config.maxChunkSize) {
31
+ break;
32
+ }
33
+ currentChunk += nextLine;
34
+ currentLineCount++;
35
+ i = j;
36
+ }
37
+ }
38
+ // Create chunk
39
+ chunks.push({
40
+ content: currentChunk.trim(),
41
+ startLine: currentStartLine,
42
+ endLine: currentStartLine + currentLineCount - 1,
43
+ metadata: {
44
+ filePath,
45
+ language,
46
+ chunkIndex,
47
+ chunkType: "block",
48
+ },
49
+ });
50
+ chunkIndex++;
51
+ // Calculate overlap
52
+ const overlapLines = this.calculateOverlapLines(currentLineCount);
53
+ const _overlapStart = Math.max(0, currentLineCount - overlapLines);
54
+ // Start new chunk with overlap
55
+ currentChunk = `${lines.slice(i - overlapLines + 1, i + 1).join("\n")}\n`;
56
+ currentStartLine = currentStartLine + currentLineCount - overlapLines;
57
+ currentLineCount = overlapLines;
58
+ }
59
+ }
60
+ // Add remaining content as final chunk
61
+ if (currentChunk.trim().length > 50) {
62
+ chunks.push({
63
+ content: currentChunk.trim(),
64
+ startLine: currentStartLine,
65
+ endLine: currentStartLine + currentLineCount - 1,
66
+ metadata: {
67
+ filePath,
68
+ language,
69
+ chunkIndex,
70
+ chunkType: "block",
71
+ },
72
+ });
73
+ }
74
+ return chunks;
75
+ }
76
+ supportsLanguage(_language) {
77
+ // Character chunker supports all languages
78
+ return true;
79
+ }
80
+ getStrategyName() {
81
+ return "character-based";
82
+ }
83
+ /**
84
+ * Find a good break point in the code (empty line, closing brace, etc.)
85
+ */
86
+ findBreakPoint(lines, startIdx) {
87
+ const searchWindow = Math.min(20, lines.length - startIdx);
88
+ for (let i = 0; i < searchWindow; i++) {
89
+ const line = lines[startIdx + i]?.trim() || "";
90
+ // Good break points
91
+ if (line === "" ||
92
+ line === "}" ||
93
+ line === "};" ||
94
+ line === "]);" ||
95
+ line.startsWith("//") ||
96
+ line.startsWith("#")) {
97
+ return startIdx + i;
98
+ }
99
+ }
100
+ return startIdx;
101
+ }
102
+ /**
103
+ * Calculate number of lines to overlap based on chunk size
104
+ */
105
+ calculateOverlapLines(totalLines) {
106
+ const overlapChars = this.config.chunkOverlap;
107
+ const avgCharsPerLine = this.config.chunkSize / Math.max(totalLines, 1);
108
+ return Math.floor(overlapChars / Math.max(avgCharsPerLine, 1));
109
+ }
110
+ }
111
+ //# sourceMappingURL=character-chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"character-chunker.js","sourceRoot":"","sources":["../../../src/code/chunker/character-chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,OAAO,gBAAgB;IACP;IAApB,YAAoB,MAAqB;QAArB,WAAM,GAAN,MAAM,CAAe;IAAG,CAAC;IAE7C,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAgB,EAAE,QAAgB;QAC1D,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/B,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,YAAY,IAAI,GAAG,IAAI,IAAI,CAAC;YAC5B,gBAAgB,EAAE,CAAC;YAEnB,oCAAoC;YACpC,IAAI,YAAY,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBACjD,yEAAyE;gBACzE,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;gBAErD,IAAI,UAAU,GAAG,CAAC,IAAI,UAAU,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC;oBAC1C,4DAA4D;oBAC5D,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,UAAU,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBAC7D,MAAM,QAAQ,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;wBACjC,qDAAqD;wBACrD,IAAI,YAAY,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;4BACrE,MAAM;wBACR,CAAC;wBACD,YAAY,IAAI,QAAQ,CAAC;wBACzB,gBAAgB,EAAE,CAAC;wBACnB,CAAC,GAAG,CAAC,CAAC;oBACR,CAAC;gBACH,CAAC;gBAED,eAAe;gBACf,MAAM,CAAC,IAAI,CAAC;oBACV,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;oBAC5B,SAAS,EAAE,gBAAgB;oBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;oBAChD,QAAQ,EAAE;wBACR,QAAQ;wBACR,QAAQ;wBACR,UAAU;wBACV,SAAS,EAAE,OAAO;qBACnB;iBACF,CAAC,CAAC;gBAEH,UAAU,EAAE,CAAC;gBAEb,oBAAoB;gBACpB,MAAM,YAAY,GAAG,IAAI,CAAC,qBAAqB,CAAC,gBAAgB,CAAC,CAAC;gBAClE,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,gBAAgB,GAAG,YAAY,CAAC,CAAC;gBAEnE,+BAA+B;gBAC/B,YAAY,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;gBAC1E,gBAAgB,GAAG,gBAAgB,GAAG,gBAAgB,GAAG,YAAY,CAAC;gBACtE,gBAAgB,GAAG,YAAY,CAAC;YAClC,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC;gBACV,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;gBAC5B,SAAS,EAAE,gBAAgB;gBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;gBAChD,QAAQ,EAAE;oBACR,QAAQ;oBACR,QAAQ;oBACR,UAAU;oBACV,SAAS,EAAE,OAAO;iBACnB;aACF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,gBAAgB,CAAC,SAAiB;QAChC,2CAA2C;QAC3C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,eAAe;QACb,OAAO,iBAAiB,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,KAAe,EAAE,QAAgB;QACtD,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC;QAE3D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAE/C,oBAAoB;YACpB,IACE,IAAI,KAAK,EAAE;gBACX,IAAI,KAAK,GAAG;gBACZ,IAAI,KAAK,IAAI;gBACb,IAAI,KAAK,KAAK;gBACd,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;gBACrB,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EACpB,CAAC;gBACD,OAAO,QAAQ,GAAG,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,UAAkB;QAC9C,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;QAC9C,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QACxE,OAAO,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;CACF"}
@@ -0,0 +1,29 @@
1
+ /**
2
+ * TreeSitterChunker - AST-aware code chunking using tree-sitter
3
+ * Primary chunking strategy for supported languages
4
+ */
5
+ import type { ChunkerConfig, CodeChunk } from "../types.js";
6
+ import type { CodeChunker } from "./base.js";
7
+ export declare class TreeSitterChunker implements CodeChunker {
8
+ private config;
9
+ private languages;
10
+ private fallbackChunker;
11
+ constructor(config: ChunkerConfig);
12
+ private initializeParsers;
13
+ chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]>;
14
+ supportsLanguage(language: string): boolean;
15
+ getStrategyName(): string;
16
+ /**
17
+ * Find all chunkable nodes in the AST
18
+ */
19
+ private findChunkableNodes;
20
+ /**
21
+ * Extract function/class name from AST node
22
+ */
23
+ private extractName;
24
+ /**
25
+ * Map AST node type to chunk type
26
+ */
27
+ private getChunkType;
28
+ }
29
+ //# sourceMappingURL=tree-sitter-chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree-sitter-chunker.d.ts","sourceRoot":"","sources":["../../../src/code/chunker/tree-sitter-chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAYH,OAAO,KAAK,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAQ7C,qBAAa,iBAAkB,YAAW,WAAW;IAIvC,OAAO,CAAC,MAAM;IAH1B,OAAO,CAAC,SAAS,CAA0C;IAC3D,OAAO,CAAC,eAAe,CAAmB;gBAEtB,MAAM,EAAE,aAAa;IAKzC,OAAO,CAAC,iBAAiB;IAgFnB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAoEnF,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAI3C,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAsB1B;;OAEG;IACH,OAAO,CAAC,WAAW;IAiBnB;;OAEG;IACH,OAAO,CAAC,YAAY;CAYrB"}