code-memory 1.0.13__tar.gz → 1.0.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_memory-1.0.13 → code_memory-1.0.15}/PKG-INFO +46 -36
- {code_memory-1.0.13 → code_memory-1.0.15}/README.md +45 -35
- code_memory-1.0.15/assets/logo.png +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/db.py +5 -3
- {code_memory-1.0.13 → code_memory-1.0.15}/doc_parser.py +16 -6
- {code_memory-1.0.13 → code_memory-1.0.15}/parser.py +29 -1
- {code_memory-1.0.13 → code_memory-1.0.15}/pyproject.toml +1 -1
- {code_memory-1.0.13 → code_memory-1.0.15}/server.py +53 -6
- {code_memory-1.0.13 → code_memory-1.0.15}/tests/test_tools.py +18 -1
- {code_memory-1.0.13 → code_memory-1.0.15}/uv.lock +1 -1
- {code_memory-1.0.13 → code_memory-1.0.15}/.github/workflows/ci.yml +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/.github/workflows/publish.yml +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/.github/workflows/release-binaries.yml +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/.gitignore +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/.python-version +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/CHANGELOG.md +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/CONTRIBUTING.md +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/LICENSE +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/Makefile +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/code-memory.spec +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/errors.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/git_search.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/hooks/hook-sentence_transformers.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/hooks/hook-sqlite_vec.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/hooks/hook-tree_sitter.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/hooks/hook-tree_sitter_languages.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/logging_config.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/prompts/milestone_1.xml +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/prompts/milestone_2.xml +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/prompts/milestone_3.xml +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/prompts/milestone_4.xml +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/prompts/milestone_5.xml +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/prompts/milestone_6.xml +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/queries.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/tests/__init__.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/tests/conftest.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/tests/test_errors.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/tests/test_logging.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/tests/test_validation.py +0 -0
- {code_memory-1.0.13 → code_memory-1.0.15}/validation.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-memory
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.15
|
|
4
4
|
Summary: A deterministic, high-precision code intelligence MCP server
|
|
5
5
|
Project-URL: Homepage, https://github.com/kapillamba4/code-memory
|
|
6
6
|
Project-URL: Documentation, https://github.com/kapillamba4/code-memory#readme
|
|
@@ -44,46 +44,29 @@ Description-Content-Type: text/markdown
|
|
|
44
44
|
|
|
45
45
|
# code-memory
|
|
46
46
|
|
|
47
|
+
<img src="assets/logo.png" alt="code-memory logo" width="100%">
|
|
48
|
+
|
|
47
49
|
A deterministic, high-precision **code intelligence layer** exposed as a [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) server.
|
|
48
50
|
|
|
49
|
-
|
|
51
|
+
- **No API key required** — runs entirely locally with sentence-transformers
|
|
52
|
+
- **1 min setup** — just `uvx code-memory` and you're ready
|
|
53
|
+
- **Token saving by 50%** — precise code retrieval instead of dumping entire files
|
|
54
|
+
|
|
55
|
+
**Please help star code-memory if you like this project!**
|
|
56
|
+
|
|
57
|
+
## Why code-memory?
|
|
58
|
+
|
|
59
|
+
Finding the right context from a large codebase is **expensive**, **inaccurate**, and **limited by context windows**. Dumping files into prompts wastes tokens, and LLMs lose track of the actual task as context fills up.
|
|
60
|
+
|
|
61
|
+
Instead of manually hunting with `grep`/`find` or dumping raw file text, `code-memory` runs semantic searches against a locally indexed codebase. Inspired by [claude-context](https://github.com/redmonkez12/claude-context), but designed from the ground up for large-scale local search.
|
|
50
62
|
|
|
51
63
|
## Supported Languages
|
|
52
64
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|----------|------------|
|
|
59
|
-
| Python | `.py` |
|
|
60
|
-
| JavaScript | `.js`, `.jsx` |
|
|
61
|
-
| TypeScript | `.ts`, `.tsx` |
|
|
62
|
-
| Java | `.java` |
|
|
63
|
-
| Go | `.go` |
|
|
64
|
-
| Rust | `.rs` |
|
|
65
|
-
| C | `.c`, `.h` |
|
|
66
|
-
| C++ | `.cpp`, `.hpp`, `.cc`, `.cxx` |
|
|
67
|
-
| Ruby | `.rb` |
|
|
68
|
-
| Kotlin | `.kt`, `.kts` |
|
|
69
|
-
|
|
70
|
-
### Fallback Support (Whole-file Indexing)
|
|
71
|
-
|
|
72
|
-
These file types are indexed as complete units for BM25 and semantic search:
|
|
73
|
-
|
|
74
|
-
| Category | Extensions |
|
|
75
|
-
|----------|------------|
|
|
76
|
-
| C# | `.cs` |
|
|
77
|
-
| Swift | `.swift` |
|
|
78
|
-
| Scala | `.scala` |
|
|
79
|
-
| Lua | `.lua` |
|
|
80
|
-
| Shell | `.sh`, `.bash`, `.zsh` |
|
|
81
|
-
| Config | `.yaml`, `.yml`, `.toml`, `.json` |
|
|
82
|
-
| Web | `.html`, `.css`, `.scss` |
|
|
83
|
-
| Database | `.sql` |
|
|
84
|
-
| Docs | `.md`, `.txt` |
|
|
85
|
-
|
|
86
|
-
> **Note:** Files and directories matching patterns in your `.gitignore` are automatically skipped during indexing. This excludes build artifacts, dependencies, and other generated files.
|
|
65
|
+
**Full AST Support** (structural parsing with symbol extraction): Python, JavaScript/TypeScript, Java, Go, Rust, C/C++, Ruby, Kotlin
|
|
66
|
+
|
|
67
|
+
**Fallback Support** (whole-file indexing): C#, Swift, Scala, Lua, Shell, Config (yaml/toml/json), Web (html/css), SQL, Markdown
|
|
68
|
+
|
|
69
|
+
> Files matching `.gitignore` patterns are automatically skipped.
|
|
87
70
|
|
|
88
71
|
## Architecture: Progressive Disclosure
|
|
89
72
|
|
|
@@ -283,12 +266,39 @@ For Windows:
|
|
|
283
266
|
| Variable | Description | Default |
|
|
284
267
|
|----------|-------------|---------|
|
|
285
268
|
| `CODE_MEMORY_LOG_LEVEL` | Logging verbosity (DEBUG, INFO, WARNING, ERROR) | INFO |
|
|
269
|
+
| `EMBEDDING_MODEL` | HuggingFace model ID for embeddings | `nomic-ai/nomic-embed-text-v1.5` |
|
|
286
270
|
|
|
287
271
|
Example:
|
|
288
272
|
```bash
|
|
289
273
|
CODE_MEMORY_LOG_LEVEL=DEBUG uvx code-memory
|
|
290
274
|
```
|
|
291
275
|
|
|
276
|
+
### Custom Embedding Model
|
|
277
|
+
|
|
278
|
+
You can use a different embedding model by setting the `EMBEDDING_MODEL` environment variable:
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
EMBEDDING_MODEL="BAAI/bge-small-en-v1.5" uvx code-memory
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
For MCP hosts, add the environment variable to your configuration:
|
|
285
|
+
|
|
286
|
+
```json
|
|
287
|
+
{
|
|
288
|
+
"mcpServers": {
|
|
289
|
+
"code-memory": {
|
|
290
|
+
"command": "uvx",
|
|
291
|
+
"args": ["code-memory"],
|
|
292
|
+
"env": {
|
|
293
|
+
"EMBEDDING_MODEL": "BAAI/bge-small-en-v1.5"
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
> **Note:** Changing the embedding model will invalidate existing indexes. You'll need to re-run `index_codebase` after switching models.
|
|
301
|
+
|
|
292
302
|
## Tools
|
|
293
303
|
|
|
294
304
|
### `index_codebase`
|
|
@@ -1,45 +1,28 @@
|
|
|
1
1
|
# code-memory
|
|
2
2
|
|
|
3
|
+
<img src="assets/logo.png" alt="code-memory logo" width="100%">
|
|
4
|
+
|
|
3
5
|
A deterministic, high-precision **code intelligence layer** exposed as a [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) server.
|
|
4
6
|
|
|
5
|
-
|
|
7
|
+
- **No API key required** — runs entirely locally with sentence-transformers
|
|
8
|
+
- **1 min setup** — just `uvx code-memory` and you're ready
|
|
9
|
+
- **Token saving by 50%** — precise code retrieval instead of dumping entire files
|
|
10
|
+
|
|
11
|
+
**Please help star code-memory if you like this project!**
|
|
12
|
+
|
|
13
|
+
## Why code-memory?
|
|
14
|
+
|
|
15
|
+
Finding the right context from a large codebase is **expensive**, **inaccurate**, and **limited by context windows**. Dumping files into prompts wastes tokens, and LLMs lose track of the actual task as context fills up.
|
|
16
|
+
|
|
17
|
+
Instead of manually hunting with `grep`/`find` or dumping raw file text, `code-memory` runs semantic searches against a locally indexed codebase. Inspired by [claude-context](https://github.com/redmonkez12/claude-context), but designed from the ground up for large-scale local search.
|
|
6
18
|
|
|
7
19
|
## Supported Languages
|
|
8
20
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|----------|------------|
|
|
15
|
-
| Python | `.py` |
|
|
16
|
-
| JavaScript | `.js`, `.jsx` |
|
|
17
|
-
| TypeScript | `.ts`, `.tsx` |
|
|
18
|
-
| Java | `.java` |
|
|
19
|
-
| Go | `.go` |
|
|
20
|
-
| Rust | `.rs` |
|
|
21
|
-
| C | `.c`, `.h` |
|
|
22
|
-
| C++ | `.cpp`, `.hpp`, `.cc`, `.cxx` |
|
|
23
|
-
| Ruby | `.rb` |
|
|
24
|
-
| Kotlin | `.kt`, `.kts` |
|
|
25
|
-
|
|
26
|
-
### Fallback Support (Whole-file Indexing)
|
|
27
|
-
|
|
28
|
-
These file types are indexed as complete units for BM25 and semantic search:
|
|
29
|
-
|
|
30
|
-
| Category | Extensions |
|
|
31
|
-
|----------|------------|
|
|
32
|
-
| C# | `.cs` |
|
|
33
|
-
| Swift | `.swift` |
|
|
34
|
-
| Scala | `.scala` |
|
|
35
|
-
| Lua | `.lua` |
|
|
36
|
-
| Shell | `.sh`, `.bash`, `.zsh` |
|
|
37
|
-
| Config | `.yaml`, `.yml`, `.toml`, `.json` |
|
|
38
|
-
| Web | `.html`, `.css`, `.scss` |
|
|
39
|
-
| Database | `.sql` |
|
|
40
|
-
| Docs | `.md`, `.txt` |
|
|
41
|
-
|
|
42
|
-
> **Note:** Files and directories matching patterns in your `.gitignore` are automatically skipped during indexing. This excludes build artifacts, dependencies, and other generated files.
|
|
21
|
+
**Full AST Support** (structural parsing with symbol extraction): Python, JavaScript/TypeScript, Java, Go, Rust, C/C++, Ruby, Kotlin
|
|
22
|
+
|
|
23
|
+
**Fallback Support** (whole-file indexing): C#, Swift, Scala, Lua, Shell, Config (yaml/toml/json), Web (html/css), SQL, Markdown
|
|
24
|
+
|
|
25
|
+
> Files matching `.gitignore` patterns are automatically skipped.
|
|
43
26
|
|
|
44
27
|
## Architecture: Progressive Disclosure
|
|
45
28
|
|
|
@@ -239,12 +222,39 @@ For Windows:
|
|
|
239
222
|
| Variable | Description | Default |
|
|
240
223
|
|----------|-------------|---------|
|
|
241
224
|
| `CODE_MEMORY_LOG_LEVEL` | Logging verbosity (DEBUG, INFO, WARNING, ERROR) | INFO |
|
|
225
|
+
| `EMBEDDING_MODEL` | HuggingFace model ID for embeddings | `nomic-ai/nomic-embed-text-v1.5` |
|
|
242
226
|
|
|
243
227
|
Example:
|
|
244
228
|
```bash
|
|
245
229
|
CODE_MEMORY_LOG_LEVEL=DEBUG uvx code-memory
|
|
246
230
|
```
|
|
247
231
|
|
|
232
|
+
### Custom Embedding Model
|
|
233
|
+
|
|
234
|
+
You can use a different embedding model by setting the `EMBEDDING_MODEL` environment variable:
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
EMBEDDING_MODEL="BAAI/bge-small-en-v1.5" uvx code-memory
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
For MCP hosts, add the environment variable to your configuration:
|
|
241
|
+
|
|
242
|
+
```json
|
|
243
|
+
{
|
|
244
|
+
"mcpServers": {
|
|
245
|
+
"code-memory": {
|
|
246
|
+
"command": "uvx",
|
|
247
|
+
"args": ["code-memory"],
|
|
248
|
+
"env": {
|
|
249
|
+
"EMBEDDING_MODEL": "BAAI/bge-small-en-v1.5"
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
> **Note:** Changing the embedding model will invalidate existing indexes. You'll need to re-run `index_codebase` after switching models.
|
|
257
|
+
|
|
248
258
|
## Tools
|
|
249
259
|
|
|
250
260
|
### `index_codebase`
|
|
Binary file
|
|
@@ -12,6 +12,7 @@ All writes use upsert semantics so re-indexing is idempotent.
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
14
|
import logging
|
|
15
|
+
import os
|
|
15
16
|
import sqlite3
|
|
16
17
|
from contextlib import contextmanager
|
|
17
18
|
from typing import TYPE_CHECKING
|
|
@@ -31,8 +32,9 @@ logger = logging.getLogger(__name__)
|
|
|
31
32
|
_model = None
|
|
32
33
|
_embedding_dim = None
|
|
33
34
|
|
|
34
|
-
# Model identifier -
|
|
35
|
-
|
|
35
|
+
# Model identifier - can be overridden via EMBEDDING_MODEL environment variable
|
|
36
|
+
DEFAULT_EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
|
|
37
|
+
EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL)
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
def get_embedding_model():
|
|
@@ -46,7 +48,7 @@ def get_embedding_model():
|
|
|
46
48
|
)
|
|
47
49
|
# Cache the embedding dimension from the model
|
|
48
50
|
_embedding_dim = _model.get_sentence_embedding_dimension()
|
|
49
|
-
logger.info(f"Loaded embedding model with dimension: {_embedding_dim}")
|
|
51
|
+
logger.info(f"Loaded embedding model '{EMBEDDING_MODEL_NAME}' with dimension: {_embedding_dim}")
|
|
50
52
|
return _model
|
|
51
53
|
|
|
52
54
|
|
|
@@ -337,12 +337,15 @@ def index_doc_file(
|
|
|
337
337
|
}
|
|
338
338
|
|
|
339
339
|
|
|
340
|
-
def index_doc_directory(dirpath: str, db) -> list[dict]:
|
|
340
|
+
def index_doc_directory(dirpath: str, db, progress_callback=None, progress_offset: int = 0, progress_total: int = 0) -> list[dict]:
|
|
341
341
|
"""Recursively index all documentation in a directory.
|
|
342
342
|
|
|
343
343
|
Args:
|
|
344
344
|
dirpath: Root directory to search.
|
|
345
345
|
db: Database connection.
|
|
346
|
+
progress_callback: Optional callback(current, total, message) for progress updates.
|
|
347
|
+
progress_offset: Offset to add to current count (for combined progress with code indexing).
|
|
348
|
+
progress_total: Total files across all indexing phases.
|
|
346
349
|
|
|
347
350
|
Returns:
|
|
348
351
|
List of result dicts from index_doc_file.
|
|
@@ -350,16 +353,23 @@ def index_doc_directory(dirpath: str, db) -> list[dict]:
|
|
|
350
353
|
abs_dir = os.path.abspath(dirpath)
|
|
351
354
|
results = []
|
|
352
355
|
|
|
356
|
+
# First pass: count files
|
|
357
|
+
doc_files = []
|
|
353
358
|
for root, dirs, files in os.walk(abs_dir):
|
|
354
|
-
# Skip unwanted directories
|
|
355
359
|
dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith(".")]
|
|
356
|
-
|
|
357
360
|
for filename in files:
|
|
358
361
|
ext = os.path.splitext(filename)[1].lower()
|
|
359
362
|
if ext in DOC_EXTENSIONS:
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
+
doc_files.append(os.path.join(root, filename))
|
|
364
|
+
|
|
365
|
+
# Index files with progress reporting
|
|
366
|
+
for i, filepath in enumerate(doc_files):
|
|
367
|
+
result = index_doc_file(filepath, db)
|
|
368
|
+
results.append(result)
|
|
369
|
+
|
|
370
|
+
if progress_callback:
|
|
371
|
+
current = progress_offset + i + 1
|
|
372
|
+
progress_callback(current, progress_total, f"Indexing docs: {os.path.basename(filepath)}")
|
|
363
373
|
|
|
364
374
|
return results
|
|
365
375
|
|
|
@@ -451,7 +451,7 @@ def index_file(filepath: str, db) -> dict:
|
|
|
451
451
|
# Directory indexer
|
|
452
452
|
# ---------------------------------------------------------------------------
|
|
453
453
|
|
|
454
|
-
def index_directory(dirpath: str, db) -> list[dict]:
|
|
454
|
+
def index_directory(dirpath: str, db, progress_callback=None) -> list[dict]:
|
|
455
455
|
"""Recursively index all source files under *dirpath*.
|
|
456
456
|
|
|
457
457
|
Skips directories in ``_SKIP_DIRS``, files matching ``.gitignore`` patterns
|
|
@@ -461,6 +461,7 @@ def index_directory(dirpath: str, db) -> list[dict]:
|
|
|
461
461
|
Args:
|
|
462
462
|
dirpath: Root directory to scan.
|
|
463
463
|
db: An open ``sqlite3.Connection`` from ``db.get_db()``.
|
|
464
|
+
progress_callback: Optional callback(current, total, message) for progress updates.
|
|
464
465
|
|
|
465
466
|
Returns:
|
|
466
467
|
A list of per-file result dicts (see :func:`index_file`).
|
|
@@ -475,6 +476,28 @@ def index_directory(dirpath: str, db) -> list[dict]:
|
|
|
475
476
|
gitignore = GitignoreMatcher(dirpath)
|
|
476
477
|
logger.debug("Initialized gitignore matcher for %s", dirpath)
|
|
477
478
|
|
|
479
|
+
# First pass: count total files for progress reporting
|
|
480
|
+
total_files = 0
|
|
481
|
+
file_list = []
|
|
482
|
+
for root, dirs, files in os.walk(dirpath, topdown=True):
|
|
483
|
+
rel_root = os.path.relpath(root, dirpath)
|
|
484
|
+
if rel_root != ".":
|
|
485
|
+
gitignore.check_dir_for_gitignore(root, rel_root)
|
|
486
|
+
dirs[:] = [d for d in dirs if d not in _SKIP_DIRS and not d.endswith(".egg-info")
|
|
487
|
+
and not gitignore.should_skip(os.path.join(rel_root, d) if rel_root != "." else d, is_dir=True)]
|
|
488
|
+
for fname in sorted(files):
|
|
489
|
+
rel_path = os.path.join(rel_root, fname) if rel_root != "." else fname
|
|
490
|
+
if gitignore.should_skip(rel_path, is_dir=False):
|
|
491
|
+
continue
|
|
492
|
+
ext = os.path.splitext(fname)[1].lower()
|
|
493
|
+
if ext in _SOURCE_EXTENSIONS or _load_language(ext) is not None:
|
|
494
|
+
file_list.append(os.path.join(root, fname))
|
|
495
|
+
total_files += 1
|
|
496
|
+
|
|
497
|
+
# Reset gitignore for actual indexing pass
|
|
498
|
+
gitignore = GitignoreMatcher(dirpath)
|
|
499
|
+
|
|
500
|
+
files_processed = 0
|
|
478
501
|
for root, dirs, files in os.walk(dirpath, topdown=True):
|
|
479
502
|
rel_root = os.path.relpath(root, dirpath)
|
|
480
503
|
|
|
@@ -519,6 +542,11 @@ def index_directory(dirpath: str, db) -> list[dict]:
|
|
|
519
542
|
"error": True,
|
|
520
543
|
})
|
|
521
544
|
|
|
545
|
+
# Report progress
|
|
546
|
+
files_processed += 1
|
|
547
|
+
if progress_callback:
|
|
548
|
+
progress_callback(files_processed, total_files, f"Indexing code: {fname}")
|
|
549
|
+
|
|
522
550
|
# Log performance summary
|
|
523
551
|
total_elapsed = time.perf_counter() - total_start
|
|
524
552
|
total_symbols = sum(r.get("symbols_indexed", 0) for r in results)
|
|
@@ -12,9 +12,10 @@ architecture:
|
|
|
12
12
|
|
|
13
13
|
from __future__ import annotations
|
|
14
14
|
|
|
15
|
+
import asyncio
|
|
15
16
|
from typing import Literal
|
|
16
17
|
|
|
17
|
-
from mcp.server.fastmcp import FastMCP
|
|
18
|
+
from mcp.server.fastmcp import Context, FastMCP
|
|
18
19
|
|
|
19
20
|
import db as db_mod
|
|
20
21
|
import doc_parser as doc_parser_mod
|
|
@@ -275,7 +276,7 @@ def search_code(
|
|
|
275
276
|
|
|
276
277
|
# ── Tool 2: index_codebase ────────────────────────────────────────────────
|
|
277
278
|
@mcp.tool()
|
|
278
|
-
def index_codebase(directory: str) -> dict:
|
|
279
|
+
async def index_codebase(directory: str, ctx: Context) -> dict:
|
|
279
280
|
"""YOU MUST CALL THIS TOOL FIRST before using search_code or search_docs. Use this tool to build the searchable index that powers all other code intelligence features.
|
|
280
281
|
|
|
281
282
|
TRIGGER: Call this tool immediately when:
|
|
@@ -312,11 +313,36 @@ def index_codebase(directory: str) -> dict:
|
|
|
312
313
|
|
|
313
314
|
database = db_mod.get_db(str(directory_path))
|
|
314
315
|
|
|
315
|
-
#
|
|
316
|
+
# Report initial progress
|
|
317
|
+
await ctx.report_progress(0, 100, "Starting indexing...")
|
|
318
|
+
|
|
319
|
+
# Create progress callback that schedules progress updates on the event loop
|
|
320
|
+
loop = asyncio.get_running_loop()
|
|
321
|
+
progress_state = {"current": 0, "total": 0, "phase": "code"}
|
|
322
|
+
|
|
323
|
+
def sync_progress_callback(current: int, total: int, message: str):
|
|
324
|
+
"""Sync callback that schedules async progress reporting."""
|
|
325
|
+
progress_state["current"] = current
|
|
326
|
+
progress_state["total"] = total
|
|
327
|
+
# Schedule the async progress report on the event loop
|
|
328
|
+
asyncio.run_coroutine_threadsafe(
|
|
329
|
+
ctx.report_progress(current, total, message),
|
|
330
|
+
loop
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Index code files in a thread to allow progress reporting
|
|
316
334
|
code_logger = logging_config.IndexingLogger("code")
|
|
317
335
|
code_logger.start(str(directory_path))
|
|
318
336
|
|
|
319
|
-
|
|
337
|
+
await ctx.report_progress(0, 100, "Scanning code files...")
|
|
338
|
+
|
|
339
|
+
code_results = await asyncio.to_thread(
|
|
340
|
+
parser_mod.index_directory,
|
|
341
|
+
str(directory_path),
|
|
342
|
+
database,
|
|
343
|
+
sync_progress_callback
|
|
344
|
+
)
|
|
345
|
+
|
|
320
346
|
for r in code_results:
|
|
321
347
|
if r.get("skipped"):
|
|
322
348
|
code_logger.file_skipped(r.get("file", "unknown"), r.get("reason", "unknown"))
|
|
@@ -331,7 +357,21 @@ def index_codebase(directory: str) -> dict:
|
|
|
331
357
|
doc_logger = logging_config.IndexingLogger("documentation")
|
|
332
358
|
doc_logger.start(str(directory_path))
|
|
333
359
|
|
|
334
|
-
|
|
360
|
+
# Calculate progress offset for doc indexing
|
|
361
|
+
code_file_count = len(code_results)
|
|
362
|
+
doc_progress_offset = code_file_count
|
|
363
|
+
|
|
364
|
+
await ctx.report_progress(code_file_count, code_file_count, "Scanning documentation files...")
|
|
365
|
+
|
|
366
|
+
doc_results = await asyncio.to_thread(
|
|
367
|
+
doc_parser_mod.index_doc_directory,
|
|
368
|
+
str(directory_path),
|
|
369
|
+
database,
|
|
370
|
+
sync_progress_callback,
|
|
371
|
+
doc_progress_offset,
|
|
372
|
+
code_file_count # Will be updated by callback
|
|
373
|
+
)
|
|
374
|
+
|
|
335
375
|
for r in doc_results:
|
|
336
376
|
if r.get("skipped"):
|
|
337
377
|
doc_logger.file_skipped(r.get("file", "unknown"), r.get("reason", "unknown"))
|
|
@@ -343,12 +383,18 @@ def index_codebase(directory: str) -> dict:
|
|
|
343
383
|
doc_skipped = [r for r in doc_results if r.get("skipped")]
|
|
344
384
|
|
|
345
385
|
# Extract docstrings from indexed code
|
|
346
|
-
|
|
386
|
+
await ctx.report_progress(0, 0, "Extracting docstrings...")
|
|
387
|
+
docstring_results = await asyncio.to_thread(
|
|
388
|
+
doc_parser_mod.extract_docstrings_from_code,
|
|
389
|
+
database
|
|
390
|
+
)
|
|
347
391
|
|
|
348
392
|
total_symbols = sum(r.get("symbols_indexed", 0) for r in indexed)
|
|
349
393
|
total_chunks = sum(r.get("chunks_indexed", 0) for r in doc_indexed)
|
|
350
394
|
log.set_result_count(total_symbols + total_chunks + len(docstring_results))
|
|
351
395
|
|
|
396
|
+
await ctx.report_progress(100, 100, "Indexing complete!")
|
|
397
|
+
|
|
352
398
|
return {
|
|
353
399
|
"status": "ok",
|
|
354
400
|
"directory": str(directory_path),
|
|
@@ -563,6 +609,7 @@ def search_history(
|
|
|
563
609
|
def main():
|
|
564
610
|
"""Entry point for the MCP server when installed as a package."""
|
|
565
611
|
# Warm up embedding model to avoid cold-start latency
|
|
612
|
+
logger.info(f"Using embedding model: {db_mod.EMBEDDING_MODEL_NAME}")
|
|
566
613
|
logger.info("Warming up embedding model...")
|
|
567
614
|
db_mod.warmup_embedding_model()
|
|
568
615
|
logger.info("Embedding model ready")
|
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
from unittest.mock import AsyncMock
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MockContext:
|
|
9
|
+
"""Mock MCP Context for testing."""
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self.report_progress = AsyncMock()
|
|
13
|
+
|
|
5
14
|
|
|
6
15
|
class TestSearchCodeValidation:
|
|
7
16
|
"""Tests for search_code tool input validation."""
|
|
@@ -84,8 +93,16 @@ class TestIndexCodebaseValidation:
|
|
|
84
93
|
|
|
85
94
|
def test_nonexistent_directory_returns_error(self):
|
|
86
95
|
"""Test that nonexistent directory returns structured error."""
|
|
96
|
+
import asyncio
|
|
97
|
+
|
|
87
98
|
import server
|
|
88
|
-
|
|
99
|
+
ctx = MockContext()
|
|
100
|
+
|
|
101
|
+
async def run_test():
|
|
102
|
+
result = await server.index_codebase("/nonexistent/directory", ctx)
|
|
103
|
+
return result
|
|
104
|
+
|
|
105
|
+
result = asyncio.run(run_test())
|
|
89
106
|
assert result.get("error") is True
|
|
90
107
|
assert "ValidationError" in result.get("error_type", "")
|
|
91
108
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|