@mhalder/qdrant-mcp-server 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codecov.yml +16 -0
- package/CHANGELOG.md +18 -0
- package/README.md +236 -9
- package/build/code/chunker/base.d.ts +19 -0
- package/build/code/chunker/base.d.ts.map +1 -0
- package/build/code/chunker/base.js +5 -0
- package/build/code/chunker/base.js.map +1 -0
- package/build/code/chunker/character-chunker.d.ts +22 -0
- package/build/code/chunker/character-chunker.d.ts.map +1 -0
- package/build/code/chunker/character-chunker.js +111 -0
- package/build/code/chunker/character-chunker.js.map +1 -0
- package/build/code/chunker/tree-sitter-chunker.d.ts +29 -0
- package/build/code/chunker/tree-sitter-chunker.d.ts.map +1 -0
- package/build/code/chunker/tree-sitter-chunker.js +213 -0
- package/build/code/chunker/tree-sitter-chunker.js.map +1 -0
- package/build/code/config.d.ts +11 -0
- package/build/code/config.d.ts.map +1 -0
- package/build/code/config.js +145 -0
- package/build/code/config.js.map +1 -0
- package/build/code/indexer.d.ts +42 -0
- package/build/code/indexer.d.ts.map +1 -0
- package/build/code/indexer.js +508 -0
- package/build/code/indexer.js.map +1 -0
- package/build/code/metadata.d.ts +32 -0
- package/build/code/metadata.d.ts.map +1 -0
- package/build/code/metadata.js +128 -0
- package/build/code/metadata.js.map +1 -0
- package/build/code/scanner.d.ts +35 -0
- package/build/code/scanner.d.ts.map +1 -0
- package/build/code/scanner.js +108 -0
- package/build/code/scanner.js.map +1 -0
- package/build/code/sync/merkle.d.ts +45 -0
- package/build/code/sync/merkle.d.ts.map +1 -0
- package/build/code/sync/merkle.js +116 -0
- package/build/code/sync/merkle.js.map +1 -0
- package/build/code/sync/snapshot.d.ts +41 -0
- package/build/code/sync/snapshot.d.ts.map +1 -0
- package/build/code/sync/snapshot.js +91 -0
- package/build/code/sync/snapshot.js.map +1 -0
- package/build/code/sync/synchronizer.d.ts +53 -0
- package/build/code/sync/synchronizer.d.ts.map +1 -0
- package/build/code/sync/synchronizer.js +132 -0
- package/build/code/sync/synchronizer.js.map +1 -0
- package/build/code/types.d.ts +98 -0
- package/build/code/types.d.ts.map +1 -0
- package/build/code/types.js +5 -0
- package/build/code/types.js.map +1 -0
- package/build/index.js +250 -0
- package/build/index.js.map +1 -1
- package/examples/code-search/README.md +271 -0
- package/package.json +13 -1
- package/src/code/chunker/base.ts +22 -0
- package/src/code/chunker/character-chunker.ts +131 -0
- package/src/code/chunker/tree-sitter-chunker.ts +250 -0
- package/src/code/config.ts +156 -0
- package/src/code/indexer.ts +613 -0
- package/src/code/metadata.ts +153 -0
- package/src/code/scanner.ts +124 -0
- package/src/code/sync/merkle.ts +136 -0
- package/src/code/sync/snapshot.ts +110 -0
- package/src/code/sync/synchronizer.ts +154 -0
- package/src/code/types.ts +117 -0
- package/src/index.ts +296 -0
- package/tests/code/chunker/character-chunker.test.ts +141 -0
- package/tests/code/chunker/tree-sitter-chunker.test.ts +275 -0
- package/tests/code/fixtures/sample-py/calculator.py +32 -0
- package/tests/code/fixtures/sample-ts/async-operations.ts +120 -0
- package/tests/code/fixtures/sample-ts/auth.ts +31 -0
- package/tests/code/fixtures/sample-ts/config.ts +52 -0
- package/tests/code/fixtures/sample-ts/database.ts +50 -0
- package/tests/code/fixtures/sample-ts/index.ts +39 -0
- package/tests/code/fixtures/sample-ts/types-advanced.ts +132 -0
- package/tests/code/fixtures/sample-ts/utils.ts +105 -0
- package/tests/code/fixtures/sample-ts/validator.ts +169 -0
- package/tests/code/indexer.test.ts +828 -0
- package/tests/code/integration.test.ts +708 -0
- package/tests/code/metadata.test.ts +457 -0
- package/tests/code/scanner.test.ts +131 -0
- package/tests/code/sync/merkle.test.ts +406 -0
- package/tests/code/sync/snapshot.test.ts +360 -0
- package/tests/code/sync/synchronizer.test.ts +501 -0
- package/vitest.config.ts +1 -0
package/.codecov.yml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
coverage:
|
|
2
|
+
status:
|
|
3
|
+
project:
|
|
4
|
+
default:
|
|
5
|
+
target: auto
|
|
6
|
+
threshold: 5%
|
|
7
|
+
informational: true
|
|
8
|
+
patch:
|
|
9
|
+
default:
|
|
10
|
+
target: 80%
|
|
11
|
+
threshold: 5%
|
|
12
|
+
|
|
13
|
+
comment:
|
|
14
|
+
layout: "header, diff, flags, components"
|
|
15
|
+
behavior: default
|
|
16
|
+
require_changes: false
|
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,21 @@
|
|
|
1
|
+
## 1.5.0 (2025-10-30)
|
|
2
|
+
|
|
3
|
+
* Merge pull request #32 from mhalder/feature/code-vectorization ([50c6cb0](https://github.com/mhalder/qdrant-mcp-server/commit/50c6cb0)), closes [#32](https://github.com/mhalder/qdrant-mcp-server/issues/32)
|
|
4
|
+
* ci: add codecov configuration for coverage thresholds ([153a85e](https://github.com/mhalder/qdrant-mcp-server/commit/153a85e))
|
|
5
|
+
* ci: make codecov project check informational ([f8a09c3](https://github.com/mhalder/qdrant-mcp-server/commit/f8a09c3))
|
|
6
|
+
* test: add comprehensive test suite for code vectorization (802/840 passing) ([969c000](https://github.com/mhalder/qdrant-mcp-server/commit/969c000))
|
|
7
|
+
* test: add path validation tests to improve coverage ([4cbf5ed](https://github.com/mhalder/qdrant-mcp-server/commit/4cbf5ed))
|
|
8
|
+
* test: fix test samples and secret detection (809/840 passing) ([963a5c7](https://github.com/mhalder/qdrant-mcp-server/commit/963a5c7))
|
|
9
|
+
* test: improve test coverage to 97.72% and fix race conditions ([78a75cd](https://github.com/mhalder/qdrant-mcp-server/commit/78a75cd))
|
|
10
|
+
* feat: add code vectorization for semantic code search (#31) ([2a3745e](https://github.com/mhalder/qdrant-mcp-server/commit/2a3745e)), closes [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31) [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31)
|
|
11
|
+
* feat: add hybrid search and advanced filtering for code search (#31) ([bfb022e](https://github.com/mhalder/qdrant-mcp-server/commit/bfb022e)), closes [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31) [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31)
|
|
12
|
+
* feat: add incremental re-indexing with Merkle tree change detection (#31) ([dcd7c55](https://github.com/mhalder/qdrant-mcp-server/commit/dcd7c55)), closes [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31) [Hi#level](https://github.com/Hi/issues/level) [#31](https://github.com/mhalder/qdrant-mcp-server/issues/31)
|
|
13
|
+
* feat: add path traversal validation for security ([0d783cc](https://github.com/mhalder/qdrant-mcp-server/commit/0d783cc)), closes [#2](https://github.com/mhalder/qdrant-mcp-server/issues/2)
|
|
14
|
+
* fix: improve test coverage and fix critical bugs (830/840 passing) ([32c13b2](https://github.com/mhalder/qdrant-mcp-server/commit/32c13b2))
|
|
15
|
+
* fix: resolve tree-sitter peer dependency conflicts for Node.js 22.x ([61d614f](https://github.com/mhalder/qdrant-mcp-server/commit/61d614f))
|
|
16
|
+
* docs: add comprehensive code vectorization examples and documentation ([1e0d48d](https://github.com/mhalder/qdrant-mcp-server/commit/1e0d48d))
|
|
17
|
+
* style: format code vectorization module with project formatter ([beb71bb](https://github.com/mhalder/qdrant-mcp-server/commit/beb71bb))
|
|
18
|
+
|
|
1
19
|
## 1.4.0 (2025-10-13)
|
|
2
20
|
|
|
3
21
|
* Merge pull request #29 from mhalder/feature/configurable-prompts ([f9652b1](https://github.com/mhalder/qdrant-mcp-server/commit/f9652b1)), closes [#29](https://github.com/mhalder/qdrant-mcp-server/issues/29)
|
package/README.md
CHANGED
|
@@ -9,9 +9,11 @@ A Model Context Protocol (MCP) server providing semantic search capabilities usi
|
|
|
9
9
|
|
|
10
10
|
- **Zero Setup**: Works out of the box with Ollama - no API keys required
|
|
11
11
|
- **Privacy-First**: Local embeddings and vector storage - data never leaves your machine
|
|
12
|
+
- **Code Vectorization**: Intelligent codebase indexing with AST-aware chunking and semantic code search
|
|
12
13
|
- **Multiple Providers**: Ollama (default), OpenAI, Cohere, and Voyage AI
|
|
13
14
|
- **Hybrid Search**: Combine semantic and keyword search for better results
|
|
14
15
|
- **Semantic Search**: Natural language search with metadata filtering
|
|
16
|
+
- **Incremental Indexing**: Efficient updates - only re-index changed files
|
|
15
17
|
- **Configurable Prompts**: Create custom prompts for guided workflows without code changes
|
|
16
18
|
- **Rate Limiting**: Intelligent throttling with exponential backoff
|
|
17
19
|
- **Full CRUD**: Create, search, and manage collections and documents
|
|
@@ -124,6 +126,16 @@ See [Advanced Configuration](#advanced-configuration) section below for all opti
|
|
|
124
126
|
| `hybrid_search` | Hybrid search combining semantic and keyword (BM25) search with RRF |
|
|
125
127
|
| `delete_documents` | Delete specific documents by ID |
|
|
126
128
|
|
|
129
|
+
### Code Vectorization
|
|
130
|
+
|
|
131
|
+
| Tool | Description |
|
|
132
|
+
| ------------------ | ---------------------------------------------------------------------------------------------- |
|
|
133
|
+
| `index_codebase` | Index a codebase for semantic code search with AST-aware chunking |
|
|
134
|
+
| `search_code` | Search indexed codebase using natural language queries |
|
|
135
|
+
| `reindex_changes` | Incrementally re-index only changed files (detects added/modified/deleted) |
|
|
136
|
+
| `get_index_status` | Get indexing status and statistics for a codebase |
|
|
137
|
+
| `clear_index` | Delete all indexed data for a codebase |
|
|
138
|
+
|
|
127
139
|
### Resources
|
|
128
140
|
|
|
129
141
|
- `qdrant://collections` - List all collections
|
|
@@ -195,6 +207,192 @@ Templates use `{{variable}}` placeholders:
|
|
|
195
207
|
- Optional arguments use defaults if not specified
|
|
196
208
|
- Unknown variables are left as-is in the output
|
|
197
209
|
|
|
210
|
+
## Code Vectorization
|
|
211
|
+
|
|
212
|
+
Intelligently index and search your codebase using semantic code search. Perfect for AI-assisted development, code exploration, and understanding large codebases.
|
|
213
|
+
|
|
214
|
+
### Features
|
|
215
|
+
|
|
216
|
+
- **AST-Aware Chunking**: Intelligent code splitting at function/class boundaries using tree-sitter
|
|
217
|
+
- **Multi-Language Support**: 35+ file types including TypeScript, Python, Java, Go, Rust, C++, and more
|
|
218
|
+
- **Incremental Updates**: Only re-index changed files for fast updates
|
|
219
|
+
- **Smart Ignore Patterns**: Respects .gitignore, .dockerignore, and custom .contextignore files
|
|
220
|
+
- **Semantic Search**: Natural language queries to find relevant code
|
|
221
|
+
- **Metadata Filtering**: Filter by file type, path patterns, or language
|
|
222
|
+
- **Local-First**: All processing happens locally - your code never leaves your machine
|
|
223
|
+
|
|
224
|
+
### Quick Start
|
|
225
|
+
|
|
226
|
+
**1. Index your codebase:**
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
# Via Claude Code MCP tool
|
|
230
|
+
/mcp__qdrant__index_codebase /path/to/your/project
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
**2. Search your code:**
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
# Natural language search
|
|
237
|
+
/mcp__qdrant__search_code /path/to/your/project "authentication middleware"
|
|
238
|
+
|
|
239
|
+
# Filter by file type
|
|
240
|
+
/mcp__qdrant__search_code /path/to/your/project "database schema" --fileTypes .ts,.js
|
|
241
|
+
|
|
242
|
+
# Filter by path pattern
|
|
243
|
+
/mcp__qdrant__search_code /path/to/your/project "API endpoints" --pathPattern src/api/**
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
**3. Update after changes:**
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
# Incrementally re-index only changed files
|
|
250
|
+
/mcp__qdrant__reindex_changes /path/to/your/project
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### Usage Examples
|
|
254
|
+
|
|
255
|
+
#### Index a TypeScript Project
|
|
256
|
+
|
|
257
|
+
```typescript
|
|
258
|
+
// The MCP tool automatically:
|
|
259
|
+
// 1. Scans all .ts, .tsx, .js, .jsx files
|
|
260
|
+
// 2. Respects .gitignore patterns (skips node_modules, dist, etc.)
|
|
261
|
+
// 3. Chunks code at function/class boundaries
|
|
262
|
+
// 4. Generates embeddings using your configured provider
|
|
263
|
+
// 5. Stores in Qdrant with metadata (file path, line numbers, language)
|
|
264
|
+
|
|
265
|
+
index_codebase({
|
|
266
|
+
path: "/workspace/my-app",
|
|
267
|
+
forceReindex: false // Set to true to re-index from scratch
|
|
268
|
+
})
|
|
269
|
+
|
|
270
|
+
// Output:
|
|
271
|
+
// ✓ Indexed 247 files (1,823 chunks) in 45.2s
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
#### Search for Authentication Code
|
|
275
|
+
|
|
276
|
+
```typescript
|
|
277
|
+
search_code({
|
|
278
|
+
path: "/workspace/my-app",
|
|
279
|
+
query: "how does user authentication work?",
|
|
280
|
+
limit: 5
|
|
281
|
+
})
|
|
282
|
+
|
|
283
|
+
// Results include file path, line numbers, and code snippets:
|
|
284
|
+
// [
|
|
285
|
+
// {
|
|
286
|
+
// filePath: "src/auth/middleware.ts",
|
|
287
|
+
// startLine: 15,
|
|
288
|
+
// endLine: 42,
|
|
289
|
+
// content: "export async function authenticateUser(req: Request) { ... }",
|
|
290
|
+
// score: 0.89,
|
|
291
|
+
// language: "typescript"
|
|
292
|
+
// },
|
|
293
|
+
// ...
|
|
294
|
+
// ]
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
#### Search with Filters
|
|
298
|
+
|
|
299
|
+
```typescript
|
|
300
|
+
// Only search TypeScript files
|
|
301
|
+
search_code({
|
|
302
|
+
path: "/workspace/my-app",
|
|
303
|
+
query: "error handling patterns",
|
|
304
|
+
fileTypes: [".ts", ".tsx"],
|
|
305
|
+
limit: 10
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
// Only search in specific directories
|
|
309
|
+
search_code({
|
|
310
|
+
path: "/workspace/my-app",
|
|
311
|
+
query: "API route handlers",
|
|
312
|
+
pathPattern: "src/api/**",
|
|
313
|
+
limit: 10
|
|
314
|
+
})
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
#### Incremental Re-indexing
|
|
318
|
+
|
|
319
|
+
```typescript
|
|
320
|
+
// After making changes to your codebase
|
|
321
|
+
reindex_changes({
|
|
322
|
+
path: "/workspace/my-app"
|
|
323
|
+
})
|
|
324
|
+
|
|
325
|
+
// Output:
|
|
326
|
+
// ✓ Updated: +3 files added, ~5 files modified, -1 files deleted
|
|
327
|
+
// ✓ Chunks: +47 added, -23 deleted in 8.3s
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
#### Check Indexing Status
|
|
331
|
+
|
|
332
|
+
```typescript
|
|
333
|
+
get_index_status({
|
|
334
|
+
path: "/workspace/my-app"
|
|
335
|
+
})
|
|
336
|
+
|
|
337
|
+
// Output:
|
|
338
|
+
// {
|
|
339
|
+
// isIndexed: true,
|
|
340
|
+
// collectionName: "code_a3f8d2e1",
|
|
341
|
+
// chunksCount: 1823,
|
|
342
|
+
// filesCount: 247,
|
|
343
|
+
// lastUpdated: "2025-01-30T10:15:00Z",
|
|
344
|
+
// languages: ["typescript", "javascript", "json"]
|
|
345
|
+
// }
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
### Supported Languages
|
|
349
|
+
|
|
350
|
+
**Programming Languages** (35+ file types):
|
|
351
|
+
- **Web**: TypeScript, JavaScript, Vue, Svelte
|
|
352
|
+
- **Backend**: Python, Java, Go, Rust, Ruby, PHP
|
|
353
|
+
- **Systems**: C, C++, C#
|
|
354
|
+
- **Mobile**: Swift, Kotlin, Dart
|
|
355
|
+
- **Functional**: Scala, Clojure, Haskell, OCaml
|
|
356
|
+
- **Scripting**: Bash, Shell, Fish
|
|
357
|
+
- **Data**: SQL, GraphQL, Protocol Buffers
|
|
358
|
+
- **Config**: JSON, YAML, TOML, XML, Markdown
|
|
359
|
+
|
|
360
|
+
See [configuration](#code-vectorization-configuration) for full list and customization options.
|
|
361
|
+
|
|
362
|
+
### Custom Ignore Patterns
|
|
363
|
+
|
|
364
|
+
Create a `.contextignore` file in your project root to specify additional patterns to ignore:
|
|
365
|
+
|
|
366
|
+
```gitignore
|
|
367
|
+
# .contextignore
|
|
368
|
+
**/test/**
|
|
369
|
+
**/*.test.ts
|
|
370
|
+
**/*.spec.ts
|
|
371
|
+
**/fixtures/**
|
|
372
|
+
**/mocks/**
|
|
373
|
+
**/__tests__/**
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
### Best Practices
|
|
377
|
+
|
|
378
|
+
1. **Index Once, Update Incrementally**: Use `index_codebase` for initial indexing, then `reindex_changes` for updates
|
|
379
|
+
2. **Use Filters**: Narrow search scope with `fileTypes` and `pathPattern` for better results
|
|
380
|
+
3. **Meaningful Queries**: Use natural language that describes what you're looking for (e.g., "database connection pooling" instead of "db")
|
|
381
|
+
4. **Check Status First**: Use `get_index_status` to verify a codebase is indexed before searching
|
|
382
|
+
5. **Local Embedding**: Use Ollama (default) to keep everything local and private
|
|
383
|
+
|
|
384
|
+
### Performance
|
|
385
|
+
|
|
386
|
+
Typical performance on a modern laptop (Apple M1/M2 or similar):
|
|
387
|
+
|
|
388
|
+
| Codebase Size | Files | Indexing Time | Search Latency |
|
|
389
|
+
|--------------|-------|---------------|----------------|
|
|
390
|
+
| Small (10k LOC) | 50 | ~10s | <100ms |
|
|
391
|
+
| Medium (100k LOC) | 500 | ~2min | <200ms |
|
|
392
|
+
| Large (500k LOC) | 2,500 | ~10min | <500ms |
|
|
393
|
+
|
|
394
|
+
**Note**: Indexing time varies based on embedding provider. Ollama (local) is fastest for initial indexing.
|
|
395
|
+
|
|
198
396
|
## Examples
|
|
199
397
|
|
|
200
398
|
See [examples/](examples/) directory for detailed guides:
|
|
@@ -203,11 +401,14 @@ See [examples/](examples/) directory for detailed guides:
|
|
|
203
401
|
- **[Knowledge Base](examples/knowledge-base/)** - Structured documentation with metadata
|
|
204
402
|
- **[Advanced Filtering](examples/filters/)** - Complex boolean filters
|
|
205
403
|
- **[Rate Limiting](examples/rate-limiting/)** - Batch processing with cloud providers
|
|
404
|
+
- **[Code Search](examples/code-search/)** - Index codebases and semantic code search
|
|
206
405
|
|
|
207
406
|
## Advanced Configuration
|
|
208
407
|
|
|
209
408
|
### Environment Variables
|
|
210
409
|
|
|
410
|
+
#### Core Configuration
|
|
411
|
+
|
|
211
412
|
| Variable | Description | Default |
|
|
212
413
|
| ----------------------------------- | -------------------------------------- | --------------------- |
|
|
213
414
|
| `TRANSPORT_MODE` | "stdio" or "http" | stdio |
|
|
@@ -215,6 +416,11 @@ See [examples/](examples/) directory for detailed guides:
|
|
|
215
416
|
| `EMBEDDING_PROVIDER` | "ollama", "openai", "cohere", "voyage" | ollama |
|
|
216
417
|
| `QDRANT_URL` | Qdrant server URL | http://localhost:6333 |
|
|
217
418
|
| `PROMPTS_CONFIG_FILE` | Path to prompts configuration JSON | prompts.json |
|
|
419
|
+
|
|
420
|
+
#### Embedding Configuration
|
|
421
|
+
|
|
422
|
+
| Variable | Description | Default |
|
|
423
|
+
| ----------------------------------- | -------------------------------------- | --------------------- |
|
|
218
424
|
| `EMBEDDING_MODEL` | Model name | Provider-specific |
|
|
219
425
|
| `EMBEDDING_BASE_URL` | Custom API URL | Provider-specific |
|
|
220
426
|
| `EMBEDDING_MAX_REQUESTS_PER_MINUTE` | Rate limit | Provider-specific |
|
|
@@ -224,6 +430,18 @@ See [examples/](examples/) directory for detailed guides:
|
|
|
224
430
|
| `COHERE_API_KEY` | Cohere API key | - |
|
|
225
431
|
| `VOYAGE_API_KEY` | Voyage AI API key | - |
|
|
226
432
|
|
|
433
|
+
#### Code Vectorization Configuration
|
|
434
|
+
|
|
435
|
+
| Variable | Description | Default |
|
|
436
|
+
| ------------------------- | ------------------------------------------------ | ------- |
|
|
437
|
+
| `CODE_CHUNK_SIZE` | Maximum chunk size in characters | 2500 |
|
|
438
|
+
| `CODE_CHUNK_OVERLAP` | Overlap between chunks in characters | 300 |
|
|
439
|
+
| `CODE_ENABLE_AST` | Enable AST-aware chunking (tree-sitter) | true |
|
|
440
|
+
| `CODE_BATCH_SIZE` | Number of chunks to embed in one batch | 100 |
|
|
441
|
+
| `CODE_CUSTOM_EXTENSIONS` | Additional file extensions (comma-separated) | - |
|
|
442
|
+
| `CODE_CUSTOM_IGNORE` | Additional ignore patterns (comma-separated) | - |
|
|
443
|
+
| `CODE_DEFAULT_LIMIT` | Default search result limit | 5 |
|
|
444
|
+
|
|
227
445
|
### Provider Comparison
|
|
228
446
|
|
|
229
447
|
| Provider | Models | Dimensions | Rate Limit | Notes |
|
|
@@ -237,15 +455,20 @@ See [examples/](examples/) directory for detailed guides:
|
|
|
237
455
|
|
|
238
456
|
## Troubleshooting
|
|
239
457
|
|
|
240
|
-
| Issue
|
|
241
|
-
|
|
|
242
|
-
| **Qdrant not running**
|
|
243
|
-
| **Collection missing**
|
|
244
|
-
| **Ollama not running**
|
|
245
|
-
| **Model missing**
|
|
246
|
-
| **Rate limit errors**
|
|
247
|
-
| **API key errors**
|
|
248
|
-
| **Filter errors**
|
|
458
|
+
| Issue | Solution |
|
|
459
|
+
| ------------------------------- | ---------------------------------------------------------------------------- |
|
|
460
|
+
| **Qdrant not running** | `docker compose up -d` |
|
|
461
|
+
| **Collection missing** | Create collection first before adding documents |
|
|
462
|
+
| **Ollama not running** | Verify with `curl http://localhost:11434`, start with `docker compose up -d` |
|
|
463
|
+
| **Model missing** | `docker exec ollama ollama pull nomic-embed-text` |
|
|
464
|
+
| **Rate limit errors** | Adjust `EMBEDDING_MAX_REQUESTS_PER_MINUTE` to match your provider tier |
|
|
465
|
+
| **API key errors** | Verify correct API key in environment configuration |
|
|
466
|
+
| **Filter errors** | Ensure Qdrant filter format, check field names match metadata |
|
|
467
|
+
| **Codebase not indexed** | Run `index_codebase` before `search_code` |
|
|
468
|
+
| **Slow indexing** | Use Ollama (local) for faster indexing, or increase `CODE_BATCH_SIZE` |
|
|
469
|
+
| **Files not found** | Check `.gitignore` and `.contextignore` patterns |
|
|
470
|
+
| **Search returns no results** | Try broader queries, check if codebase is indexed with `get_index_status` |
|
|
471
|
+
| **Out of memory during index** | Reduce `CODE_CHUNK_SIZE` or `CODE_BATCH_SIZE` |
|
|
249
472
|
|
|
250
473
|
## Development
|
|
251
474
|
|
|
@@ -276,6 +499,10 @@ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for:
|
|
|
276
499
|
|
|
277
500
|
**Automated releases**: Semantic versioning via conventional commits - `feat:` → minor, `fix:` → patch, `BREAKING CHANGE:` → major.
|
|
278
501
|
|
|
502
|
+
## Acknowledgments
|
|
503
|
+
|
|
504
|
+
The code vectorization feature is inspired by and builds upon concepts from the excellent [claude-context](https://github.com/zilliztech/claude-context) project (MIT License, Copyright 2025 Zilliz).
|
|
505
|
+
|
|
279
506
|
## License
|
|
280
507
|
|
|
281
508
|
MIT - see [LICENSE](LICENSE) file.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base interface for code chunkers
|
|
3
|
+
*/
|
|
4
|
+
import type { CodeChunk } from "../types.js";
|
|
5
|
+
export interface CodeChunker {
|
|
6
|
+
/**
|
|
7
|
+
* Split code into semantic chunks
|
|
8
|
+
*/
|
|
9
|
+
chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]>;
|
|
10
|
+
/**
|
|
11
|
+
* Check if language is supported by this chunker
|
|
12
|
+
*/
|
|
13
|
+
supportsLanguage(language: string): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Get chunking strategy name
|
|
16
|
+
*/
|
|
17
|
+
getStrategyName(): string;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=base.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../../src/code/chunker/base.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE7C,MAAM,WAAW,WAAW;IAC1B;;OAEG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IAE9E;;OAEG;IACH,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;IAE5C;;OAEG;IACH,eAAe,IAAI,MAAM,CAAC;CAC3B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base.js","sourceRoot":"","sources":["../../../src/code/chunker/base.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CharacterChunker - Simple character-based chunking with overlap
|
|
3
|
+
* Used as fallback when AST parsing is not available
|
|
4
|
+
*/
|
|
5
|
+
import type { ChunkerConfig, CodeChunk } from "../types.js";
|
|
6
|
+
import type { CodeChunker } from "./base.js";
|
|
7
|
+
export declare class CharacterChunker implements CodeChunker {
|
|
8
|
+
private config;
|
|
9
|
+
constructor(config: ChunkerConfig);
|
|
10
|
+
chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]>;
|
|
11
|
+
supportsLanguage(_language: string): boolean;
|
|
12
|
+
getStrategyName(): string;
|
|
13
|
+
/**
|
|
14
|
+
* Find a good break point in the code (empty line, closing brace, etc.)
|
|
15
|
+
*/
|
|
16
|
+
private findBreakPoint;
|
|
17
|
+
/**
|
|
18
|
+
* Calculate number of lines to overlap based on chunk size
|
|
19
|
+
*/
|
|
20
|
+
private calculateOverlapLines;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=character-chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"character-chunker.d.ts","sourceRoot":"","sources":["../../../src/code/chunker/character-chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAE7C,qBAAa,gBAAiB,YAAW,WAAW;IACtC,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,aAAa;IAEnC,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IA6EnF,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO;IAK5C,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,OAAO,CAAC,cAAc;IAsBtB;;OAEG;IACH,OAAO,CAAC,qBAAqB;CAK9B"}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CharacterChunker - Simple character-based chunking with overlap
|
|
3
|
+
* Used as fallback when AST parsing is not available
|
|
4
|
+
*/
|
|
5
|
+
export class CharacterChunker {
|
|
6
|
+
config;
|
|
7
|
+
constructor(config) {
|
|
8
|
+
this.config = config;
|
|
9
|
+
}
|
|
10
|
+
async chunk(code, filePath, language) {
|
|
11
|
+
const chunks = [];
|
|
12
|
+
const lines = code.split("\n");
|
|
13
|
+
let currentChunk = "";
|
|
14
|
+
let currentStartLine = 1;
|
|
15
|
+
let currentLineCount = 0;
|
|
16
|
+
let chunkIndex = 0;
|
|
17
|
+
for (let i = 0; i < lines.length; i++) {
|
|
18
|
+
const line = lines[i];
|
|
19
|
+
currentChunk += `${line}\n`;
|
|
20
|
+
currentLineCount++;
|
|
21
|
+
// Check if we've reached chunk size
|
|
22
|
+
if (currentChunk.length >= this.config.chunkSize) {
|
|
23
|
+
// Try to find a good break point (end of function, class, or empty line)
|
|
24
|
+
const breakPoint = this.findBreakPoint(lines, i + 1);
|
|
25
|
+
if (breakPoint > i && breakPoint - i < 20) {
|
|
26
|
+
// Include lines up to break point, but respect maxChunkSize
|
|
27
|
+
for (let j = i + 1; j <= breakPoint && j < lines.length; j++) {
|
|
28
|
+
const nextLine = `${lines[j]}\n`;
|
|
29
|
+
// Stop if adding this line would exceed maxChunkSize
|
|
30
|
+
if (currentChunk.length + nextLine.length > this.config.maxChunkSize) {
|
|
31
|
+
break;
|
|
32
|
+
}
|
|
33
|
+
currentChunk += nextLine;
|
|
34
|
+
currentLineCount++;
|
|
35
|
+
i = j;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
// Create chunk
|
|
39
|
+
chunks.push({
|
|
40
|
+
content: currentChunk.trim(),
|
|
41
|
+
startLine: currentStartLine,
|
|
42
|
+
endLine: currentStartLine + currentLineCount - 1,
|
|
43
|
+
metadata: {
|
|
44
|
+
filePath,
|
|
45
|
+
language,
|
|
46
|
+
chunkIndex,
|
|
47
|
+
chunkType: "block",
|
|
48
|
+
},
|
|
49
|
+
});
|
|
50
|
+
chunkIndex++;
|
|
51
|
+
// Calculate overlap
|
|
52
|
+
const overlapLines = this.calculateOverlapLines(currentLineCount);
|
|
53
|
+
const _overlapStart = Math.max(0, currentLineCount - overlapLines);
|
|
54
|
+
// Start new chunk with overlap
|
|
55
|
+
currentChunk = `${lines.slice(i - overlapLines + 1, i + 1).join("\n")}\n`;
|
|
56
|
+
currentStartLine = currentStartLine + currentLineCount - overlapLines;
|
|
57
|
+
currentLineCount = overlapLines;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// Add remaining content as final chunk
|
|
61
|
+
if (currentChunk.trim().length > 50) {
|
|
62
|
+
chunks.push({
|
|
63
|
+
content: currentChunk.trim(),
|
|
64
|
+
startLine: currentStartLine,
|
|
65
|
+
endLine: currentStartLine + currentLineCount - 1,
|
|
66
|
+
metadata: {
|
|
67
|
+
filePath,
|
|
68
|
+
language,
|
|
69
|
+
chunkIndex,
|
|
70
|
+
chunkType: "block",
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
return chunks;
|
|
75
|
+
}
|
|
76
|
+
supportsLanguage(_language) {
|
|
77
|
+
// Character chunker supports all languages
|
|
78
|
+
return true;
|
|
79
|
+
}
|
|
80
|
+
getStrategyName() {
|
|
81
|
+
return "character-based";
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Find a good break point in the code (empty line, closing brace, etc.)
|
|
85
|
+
*/
|
|
86
|
+
findBreakPoint(lines, startIdx) {
|
|
87
|
+
const searchWindow = Math.min(20, lines.length - startIdx);
|
|
88
|
+
for (let i = 0; i < searchWindow; i++) {
|
|
89
|
+
const line = lines[startIdx + i]?.trim() || "";
|
|
90
|
+
// Good break points
|
|
91
|
+
if (line === "" ||
|
|
92
|
+
line === "}" ||
|
|
93
|
+
line === "};" ||
|
|
94
|
+
line === "]);" ||
|
|
95
|
+
line.startsWith("//") ||
|
|
96
|
+
line.startsWith("#")) {
|
|
97
|
+
return startIdx + i;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return startIdx;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Calculate number of lines to overlap based on chunk size
|
|
104
|
+
*/
|
|
105
|
+
calculateOverlapLines(totalLines) {
|
|
106
|
+
const overlapChars = this.config.chunkOverlap;
|
|
107
|
+
const avgCharsPerLine = this.config.chunkSize / Math.max(totalLines, 1);
|
|
108
|
+
return Math.floor(overlapChars / Math.max(avgCharsPerLine, 1));
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
//# sourceMappingURL=character-chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"character-chunker.js","sourceRoot":"","sources":["../../../src/code/chunker/character-chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,OAAO,gBAAgB;IACP;IAApB,YAAoB,MAAqB;QAArB,WAAM,GAAN,MAAM,CAAe;IAAG,CAAC;IAE7C,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAgB,EAAE,QAAgB;QAC1D,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/B,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,YAAY,IAAI,GAAG,IAAI,IAAI,CAAC;YAC5B,gBAAgB,EAAE,CAAC;YAEnB,oCAAoC;YACpC,IAAI,YAAY,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBACjD,yEAAyE;gBACzE,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;gBAErD,IAAI,UAAU,GAAG,CAAC,IAAI,UAAU,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC;oBAC1C,4DAA4D;oBAC5D,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,UAAU,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBAC7D,MAAM,QAAQ,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;wBACjC,qDAAqD;wBACrD,IAAI,YAAY,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;4BACrE,MAAM;wBACR,CAAC;wBACD,YAAY,IAAI,QAAQ,CAAC;wBACzB,gBAAgB,EAAE,CAAC;wBACnB,CAAC,GAAG,CAAC,CAAC;oBACR,CAAC;gBACH,CAAC;gBAED,eAAe;gBACf,MAAM,CAAC,IAAI,CAAC;oBACV,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;oBAC5B,SAAS,EAAE,gBAAgB;oBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;oBAChD,QAAQ,EAAE;wBACR,QAAQ;wBACR,QAAQ;wBACR,UAAU;wBACV,SAAS,EAAE,OAAO;qBACnB;iBACF,CAAC,CAAC;gBAEH,UAAU,EAAE,CAAC;gBAEb,oBAAoB;gBACpB,MAAM,YAAY,GAAG,IAAI,CAAC,qBAAqB,CAAC,gBAAgB,CAAC,CAAC;gBAClE,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,gBAAgB,GAAG,YAAY,CAAC,CAAC;gBAEnE,+BAA+B;gBAC/B,YAAY,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;gBAC1E,gBAAgB,GAAG,gBAAgB,GAAG,gBAAgB,GAAG,YAAY,CAAC;gBACtE,gBAAgB,GAAG,YAAY,CAAC;YAClC,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC;gBACV,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;gBAC5B,SAAS,EAAE,gBAAgB;gBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;gBAChD,QAAQ,EAAE;oBACR,QAAQ;oBACR,QAAQ;oBACR,UAAU;oBACV,SAAS,EAAE,OAAO;iBACnB;aACF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,gBAAgB,CAAC,SAAiB;QAChC,2CAA2C;QAC3C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,eAAe;QACb,OAAO,iBAAiB,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,KAAe,EAAE,QAAgB;QACtD,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC;QAE3D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAE/C,oBAAoB;YACpB,IACE,IAAI,KAAK,EAAE;gBACX,IAAI,KAAK,GAAG;gBACZ,IAAI,KAAK,IAAI;gBACb,IAAI,KAAK,KAAK;gBACd,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;gBACrB,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EACpB,CAAC;gBACD,OAAO,QAAQ,GAAG,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,UAAkB;QAC9C,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;QAC9C,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QACxE,OAAO,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;CACF"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TreeSitterChunker - AST-aware code chunking using tree-sitter
|
|
3
|
+
* Primary chunking strategy for supported languages
|
|
4
|
+
*/
|
|
5
|
+
import type { ChunkerConfig, CodeChunk } from "../types.js";
|
|
6
|
+
import type { CodeChunker } from "./base.js";
|
|
7
|
+
export declare class TreeSitterChunker implements CodeChunker {
|
|
8
|
+
private config;
|
|
9
|
+
private languages;
|
|
10
|
+
private fallbackChunker;
|
|
11
|
+
constructor(config: ChunkerConfig);
|
|
12
|
+
private initializeParsers;
|
|
13
|
+
chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]>;
|
|
14
|
+
supportsLanguage(language: string): boolean;
|
|
15
|
+
getStrategyName(): string;
|
|
16
|
+
/**
|
|
17
|
+
* Find all chunkable nodes in the AST
|
|
18
|
+
*/
|
|
19
|
+
private findChunkableNodes;
|
|
20
|
+
/**
|
|
21
|
+
* Extract function/class name from AST node
|
|
22
|
+
*/
|
|
23
|
+
private extractName;
|
|
24
|
+
/**
|
|
25
|
+
* Map AST node type to chunk type
|
|
26
|
+
*/
|
|
27
|
+
private getChunkType;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=tree-sitter-chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tree-sitter-chunker.d.ts","sourceRoot":"","sources":["../../../src/code/chunker/tree-sitter-chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAYH,OAAO,KAAK,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAQ7C,qBAAa,iBAAkB,YAAW,WAAW;IAIvC,OAAO,CAAC,MAAM;IAH1B,OAAO,CAAC,SAAS,CAA0C;IAC3D,OAAO,CAAC,eAAe,CAAmB;gBAEtB,MAAM,EAAE,aAAa;IAKzC,OAAO,CAAC,iBAAiB;IAgFnB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAoEnF,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAI3C,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAsB1B;;OAEG;IACH,OAAO,CAAC,WAAW;IAiBnB;;OAEG;IACH,OAAO,CAAC,YAAY;CAYrB"}
|