mcp-codebase-index 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_codebase_index-0.1.1/.github/workflows/ci.yml +32 -0
- mcp_codebase_index-0.1.1/.github/workflows/publish.yml +36 -0
- mcp_codebase_index-0.1.1/.gitignore +57 -0
- mcp_codebase_index-0.1.1/LICENSE +21 -0
- mcp_codebase_index-0.1.1/PKG-INFO +176 -0
- mcp_codebase_index-0.1.1/README.md +131 -0
- mcp_codebase_index-0.1.1/pyproject.toml +51 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/__init__.py +3 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/annotator.py +51 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/generic_annotator.py +21 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/models.py +110 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/project_indexer.py +573 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/python_annotator.py +298 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/query_api.py +610 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/server.py +428 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/text_annotator.py +137 -0
- mcp_codebase_index-0.1.1/src/mcp_codebase_index/typescript_annotator.py +389 -0
- mcp_codebase_index-0.1.1/tests/test_markup_python.py +432 -0
- mcp_codebase_index-0.1.1/tests/test_markup_text.py +179 -0
- mcp_codebase_index-0.1.1/tests/test_markup_typescript.py +281 -0
- mcp_codebase_index-0.1.1/tests/test_project_indexer.py +504 -0
- mcp_codebase_index-0.1.1/tests/test_query_api.py +686 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: |
|
|
26
|
+
pip install -e ".[dev]"
|
|
27
|
+
|
|
28
|
+
- name: Lint with ruff
|
|
29
|
+
run: ruff check src/ tests/
|
|
30
|
+
|
|
31
|
+
- name: Run tests
|
|
32
|
+
run: pytest tests/ -v
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Publish release assets
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.11"
|
|
20
|
+
|
|
21
|
+
- name: Install build tools
|
|
22
|
+
run: pip install build twine
|
|
23
|
+
|
|
24
|
+
- name: Build package
|
|
25
|
+
run: python -m build
|
|
26
|
+
|
|
27
|
+
- name: Upload assets to release
|
|
28
|
+
env:
|
|
29
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
30
|
+
run: gh release upload "${{ github.event.release.tag_name }}" dist/*
|
|
31
|
+
|
|
32
|
+
- name: Publish to PyPI
|
|
33
|
+
env:
|
|
34
|
+
TWINE_USERNAME: __token__
|
|
35
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
|
36
|
+
run: twine upload dist/*
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# ── Secrets & credentials ──────────────────────
|
|
2
|
+
.env
|
|
3
|
+
.env.*
|
|
4
|
+
!.env.example
|
|
5
|
+
*.pem
|
|
6
|
+
*.key
|
|
7
|
+
*.crt
|
|
8
|
+
*.p12
|
|
9
|
+
*.pfx
|
|
10
|
+
*.jks
|
|
11
|
+
credentials.json
|
|
12
|
+
service-account*.json
|
|
13
|
+
*secret*
|
|
14
|
+
!*secret*.py
|
|
15
|
+
|
|
16
|
+
# ── Python ─────────────────────────────────────
|
|
17
|
+
__pycache__/
|
|
18
|
+
*.py[cod]
|
|
19
|
+
*$py.class
|
|
20
|
+
*.egg-info/
|
|
21
|
+
dist/
|
|
22
|
+
build/
|
|
23
|
+
.eggs/
|
|
24
|
+
*.egg
|
|
25
|
+
*.so
|
|
26
|
+
*.whl
|
|
27
|
+
|
|
28
|
+
# ── Virtual environments ───────────────────────
|
|
29
|
+
.venv/
|
|
30
|
+
venv/
|
|
31
|
+
env/
|
|
32
|
+
ENV/
|
|
33
|
+
|
|
34
|
+
# ── IDE & editor ───────────────────────────────
|
|
35
|
+
.idea/
|
|
36
|
+
.vscode/
|
|
37
|
+
*.swp
|
|
38
|
+
*.swo
|
|
39
|
+
*~
|
|
40
|
+
.DS_Store
|
|
41
|
+
Thumbs.db
|
|
42
|
+
|
|
43
|
+
# ── Testing & linting ─────────────────────────
|
|
44
|
+
.mypy_cache/
|
|
45
|
+
.pytest_cache/
|
|
46
|
+
.ruff_cache/
|
|
47
|
+
.coverage
|
|
48
|
+
htmlcov/
|
|
49
|
+
.tox/
|
|
50
|
+
|
|
51
|
+
# ── Claude Code local state ───────────────────
|
|
52
|
+
.claude/
|
|
53
|
+
|
|
54
|
+
# ── Misc ───────────────────────────────────────
|
|
55
|
+
*.log
|
|
56
|
+
*.bak
|
|
57
|
+
*.tmp
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Michael Doyle
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcp-codebase-index
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Structural codebase indexer with MCP server for AI-assisted development
|
|
5
|
+
Project-URL: Homepage, https://github.com/MikeRecognex/mcp-codebase-index
|
|
6
|
+
Project-URL: Repository, https://github.com/MikeRecognex/mcp-codebase-index
|
|
7
|
+
Author: Michael Doyle
|
|
8
|
+
License: MIT License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2026 Michael Doyle
|
|
11
|
+
|
|
12
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
+
in the Software without restriction, including without limitation the rights
|
|
15
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
+
furnished to do so, subject to the following conditions:
|
|
18
|
+
|
|
19
|
+
The above copyright notice and this permission notice shall be included in all
|
|
20
|
+
copies or substantial portions of the Software.
|
|
21
|
+
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
+
SOFTWARE.
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Keywords: code-navigation,codebase,indexer,mcp,structural-analysis
|
|
31
|
+
Classifier: Development Status :: 3 - Alpha
|
|
32
|
+
Classifier: Intended Audience :: Developers
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
38
|
+
Requires-Python: >=3.11
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
42
|
+
Provides-Extra: mcp
|
|
43
|
+
Requires-Dist: mcp>=1.0; extra == 'mcp'
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
|
|
46
|
+
# mcp-codebase-index
|
|
47
|
+
|
|
48
|
+
A structural codebase indexer with an [MCP](https://modelcontextprotocol.io) server for AI-assisted development. Zero runtime dependencies — uses Python's `ast` module for Python analysis and regex for TypeScript/JS. Requires Python 3.11+.
|
|
49
|
+
|
|
50
|
+
## What It Does
|
|
51
|
+
|
|
52
|
+
Indexes codebases by parsing source files into structural metadata -- functions, classes, imports, dependency graphs, and cross-file call chains -- then exposes 17 query tools via the Model Context Protocol, enabling Claude Code and other MCP clients to navigate codebases efficiently without reading entire files.
|
|
53
|
+
|
|
54
|
+
## Language Support
|
|
55
|
+
|
|
56
|
+
| Language | Method | Extracts |
|
|
57
|
+
|----------|--------|----------|
|
|
58
|
+
| Python (`.py`) | AST parsing | Functions, classes, methods, imports, dependency graph |
|
|
59
|
+
| TypeScript/JS (`.ts`, `.tsx`, `.js`, `.jsx`) | Regex-based | Functions, arrow functions, classes, interfaces, type aliases, imports |
|
|
60
|
+
| Markdown/Text (`.md`, `.txt`, `.rst`) | Heading detection | Sections (# headings, underlines, numbered, ALL-CAPS) |
|
|
61
|
+
| Other | Generic | Line counts only |
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install "mcp-codebase-index[mcp]"
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
The `[mcp]` extra includes the MCP server dependency. Omit it if you only need the programmatic API.
|
|
70
|
+
|
|
71
|
+
For development (from a local clone):
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install -e ".[dev,mcp]"
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## MCP Server
|
|
78
|
+
|
|
79
|
+
### Running
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
# As a console script
|
|
83
|
+
PROJECT_ROOT=/path/to/project mcp-codebase-index
|
|
84
|
+
|
|
85
|
+
# As a Python module
|
|
86
|
+
PROJECT_ROOT=/path/to/project python -m mcp_codebase_index.server
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
`PROJECT_ROOT` specifies which directory to index. Defaults to the current working directory.
|
|
90
|
+
|
|
91
|
+
### Configuring with Claude Code
|
|
92
|
+
|
|
93
|
+
Add to your project's `.mcp.json`:
|
|
94
|
+
|
|
95
|
+
```json
|
|
96
|
+
{
|
|
97
|
+
"mcpServers": {
|
|
98
|
+
"codebase-index": {
|
|
99
|
+
"command": "mcp-codebase-index",
|
|
100
|
+
"env": {
|
|
101
|
+
"PROJECT_ROOT": "/path/to/project"
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Or using the Python module directly (useful if installed in a virtualenv):
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{
|
|
112
|
+
"mcpServers": {
|
|
113
|
+
"codebase-index": {
|
|
114
|
+
"command": "/path/to/.venv/bin/python3",
|
|
115
|
+
"args": ["-m", "mcp_codebase_index.server"],
|
|
116
|
+
"env": {
|
|
117
|
+
"PROJECT_ROOT": "/path/to/project"
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Available Tools (17)
|
|
125
|
+
|
|
126
|
+
| Tool | Description |
|
|
127
|
+
|------|-------------|
|
|
128
|
+
| `get_project_summary` | File count, packages, top classes/functions |
|
|
129
|
+
| `list_files` | List indexed files with optional glob filter |
|
|
130
|
+
| `get_structure_summary` | Structure of a file or the whole project |
|
|
131
|
+
| `get_functions` | List functions with name, lines, params |
|
|
132
|
+
| `get_classes` | List classes with name, lines, methods, bases |
|
|
133
|
+
| `get_imports` | List imports with module, names, line |
|
|
134
|
+
| `get_function_source` | Full source of a function/method |
|
|
135
|
+
| `get_class_source` | Full source of a class |
|
|
136
|
+
| `find_symbol` | Find where a symbol is defined (file, line, type) |
|
|
137
|
+
| `get_dependencies` | What a symbol calls/uses |
|
|
138
|
+
| `get_dependents` | What calls/uses a symbol |
|
|
139
|
+
| `get_change_impact` | Direct + transitive dependents |
|
|
140
|
+
| `get_call_chain` | Shortest dependency path (BFS) |
|
|
141
|
+
| `get_file_dependencies` | Files imported by a given file |
|
|
142
|
+
| `get_file_dependents` | Files that import from a given file |
|
|
143
|
+
| `search_codebase` | Regex search across all files (max 100 results) |
|
|
144
|
+
| `reindex` | Re-index the project after file changes (MCP server only) |
|
|
145
|
+
|
|
146
|
+
## Programmatic Usage
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from mcp_codebase_index.project_indexer import ProjectIndexer
|
|
150
|
+
from mcp_codebase_index.query_api import create_project_query_functions
|
|
151
|
+
|
|
152
|
+
indexer = ProjectIndexer("/path/to/project", include_patterns=["**/*.py"])
|
|
153
|
+
index = indexer.index()
|
|
154
|
+
query_funcs = create_project_query_functions(index)
|
|
155
|
+
|
|
156
|
+
# Use query functions
|
|
157
|
+
print(query_funcs["get_project_summary"]())
|
|
158
|
+
print(query_funcs["find_symbol"]("MyClass"))
|
|
159
|
+
print(query_funcs["get_change_impact"]("some_function"))
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Development
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
pip install -e ".[dev,mcp]"
|
|
166
|
+
pytest tests/ -v
|
|
167
|
+
ruff check src/ tests/
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## References
|
|
171
|
+
|
|
172
|
+
The structural indexer was originally developed as part of the [RMLPlus](https://github.com/MikeRecognex/RMLPlus) project, an implementation of the [Recursive Language Models](https://arxiv.org/abs/2512.24601) framework.
|
|
173
|
+
|
|
174
|
+
## License
|
|
175
|
+
|
|
176
|
+
MIT
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# mcp-codebase-index
|
|
2
|
+
|
|
3
|
+
A structural codebase indexer with an [MCP](https://modelcontextprotocol.io) server for AI-assisted development. Zero runtime dependencies — uses Python's `ast` module for Python analysis and regex for TypeScript/JS. Requires Python 3.11+.
|
|
4
|
+
|
|
5
|
+
## What It Does
|
|
6
|
+
|
|
7
|
+
Indexes codebases by parsing source files into structural metadata -- functions, classes, imports, dependency graphs, and cross-file call chains -- then exposes 17 query tools via the Model Context Protocol, enabling Claude Code and other MCP clients to navigate codebases efficiently without reading entire files.
|
|
8
|
+
|
|
9
|
+
## Language Support
|
|
10
|
+
|
|
11
|
+
| Language | Method | Extracts |
|
|
12
|
+
|----------|--------|----------|
|
|
13
|
+
| Python (`.py`) | AST parsing | Functions, classes, methods, imports, dependency graph |
|
|
14
|
+
| TypeScript/JS (`.ts`, `.tsx`, `.js`, `.jsx`) | Regex-based | Functions, arrow functions, classes, interfaces, type aliases, imports |
|
|
15
|
+
| Markdown/Text (`.md`, `.txt`, `.rst`) | Heading detection | Sections (# headings, underlines, numbered, ALL-CAPS) |
|
|
16
|
+
| Other | Generic | Line counts only |
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install "mcp-codebase-index[mcp]"
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
The `[mcp]` extra includes the MCP server dependency. Omit it if you only need the programmatic API.
|
|
25
|
+
|
|
26
|
+
For development (from a local clone):
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install -e ".[dev,mcp]"
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## MCP Server
|
|
33
|
+
|
|
34
|
+
### Running
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# As a console script
|
|
38
|
+
PROJECT_ROOT=/path/to/project mcp-codebase-index
|
|
39
|
+
|
|
40
|
+
# As a Python module
|
|
41
|
+
PROJECT_ROOT=/path/to/project python -m mcp_codebase_index.server
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
`PROJECT_ROOT` specifies which directory to index. Defaults to the current working directory.
|
|
45
|
+
|
|
46
|
+
### Configuring with Claude Code
|
|
47
|
+
|
|
48
|
+
Add to your project's `.mcp.json`:
|
|
49
|
+
|
|
50
|
+
```json
|
|
51
|
+
{
|
|
52
|
+
"mcpServers": {
|
|
53
|
+
"codebase-index": {
|
|
54
|
+
"command": "mcp-codebase-index",
|
|
55
|
+
"env": {
|
|
56
|
+
"PROJECT_ROOT": "/path/to/project"
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Or using the Python module directly (useful if installed in a virtualenv):
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"mcpServers": {
|
|
68
|
+
"codebase-index": {
|
|
69
|
+
"command": "/path/to/.venv/bin/python3",
|
|
70
|
+
"args": ["-m", "mcp_codebase_index.server"],
|
|
71
|
+
"env": {
|
|
72
|
+
"PROJECT_ROOT": "/path/to/project"
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Available Tools (17)
|
|
80
|
+
|
|
81
|
+
| Tool | Description |
|
|
82
|
+
|------|-------------|
|
|
83
|
+
| `get_project_summary` | File count, packages, top classes/functions |
|
|
84
|
+
| `list_files` | List indexed files with optional glob filter |
|
|
85
|
+
| `get_structure_summary` | Structure of a file or the whole project |
|
|
86
|
+
| `get_functions` | List functions with name, lines, params |
|
|
87
|
+
| `get_classes` | List classes with name, lines, methods, bases |
|
|
88
|
+
| `get_imports` | List imports with module, names, line |
|
|
89
|
+
| `get_function_source` | Full source of a function/method |
|
|
90
|
+
| `get_class_source` | Full source of a class |
|
|
91
|
+
| `find_symbol` | Find where a symbol is defined (file, line, type) |
|
|
92
|
+
| `get_dependencies` | What a symbol calls/uses |
|
|
93
|
+
| `get_dependents` | What calls/uses a symbol |
|
|
94
|
+
| `get_change_impact` | Direct + transitive dependents |
|
|
95
|
+
| `get_call_chain` | Shortest dependency path (BFS) |
|
|
96
|
+
| `get_file_dependencies` | Files imported by a given file |
|
|
97
|
+
| `get_file_dependents` | Files that import from a given file |
|
|
98
|
+
| `search_codebase` | Regex search across all files (max 100 results) |
|
|
99
|
+
| `reindex` | Re-index the project after file changes (MCP server only) |
|
|
100
|
+
|
|
101
|
+
## Programmatic Usage
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from mcp_codebase_index.project_indexer import ProjectIndexer
|
|
105
|
+
from mcp_codebase_index.query_api import create_project_query_functions
|
|
106
|
+
|
|
107
|
+
indexer = ProjectIndexer("/path/to/project", include_patterns=["**/*.py"])
|
|
108
|
+
index = indexer.index()
|
|
109
|
+
query_funcs = create_project_query_functions(index)
|
|
110
|
+
|
|
111
|
+
# Use query functions
|
|
112
|
+
print(query_funcs["get_project_summary"]())
|
|
113
|
+
print(query_funcs["find_symbol"]("MyClass"))
|
|
114
|
+
print(query_funcs["get_change_impact"]("some_function"))
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Development
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
pip install -e ".[dev,mcp]"
|
|
121
|
+
pytest tests/ -v
|
|
122
|
+
ruff check src/ tests/
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## References
|
|
126
|
+
|
|
127
|
+
The structural indexer was originally developed as part of the [RMLPlus](https://github.com/MikeRecognex/RMLPlus) project, an implementation of the [Recursive Language Models](https://arxiv.org/abs/2512.24601) framework.
|
|
128
|
+
|
|
129
|
+
## License
|
|
130
|
+
|
|
131
|
+
MIT
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "mcp-codebase-index"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "Structural codebase indexer with MCP server for AI-assisted development"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
license = {file = "LICENSE"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Michael Doyle"},
|
|
14
|
+
]
|
|
15
|
+
keywords = ["mcp", "codebase", "indexer", "code-navigation", "structural-analysis"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Software Development :: Libraries",
|
|
24
|
+
]
|
|
25
|
+
dependencies = []
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
mcp = ["mcp>=1.0"]
|
|
29
|
+
dev = ["pytest>=8.0", "ruff>=0.5"]
|
|
30
|
+
|
|
31
|
+
[project.scripts]
|
|
32
|
+
mcp-codebase-index = "mcp_codebase_index.server:main_sync"
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
Homepage = "https://github.com/MikeRecognex/mcp-codebase-index"
|
|
36
|
+
Repository = "https://github.com/MikeRecognex/mcp-codebase-index"
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["src/mcp_codebase_index"]
|
|
40
|
+
|
|
41
|
+
[tool.pytest.ini_options]
|
|
42
|
+
testpaths = ["tests"]
|
|
43
|
+
timeout = 30
|
|
44
|
+
|
|
45
|
+
[tool.ruff]
|
|
46
|
+
target-version = "py311"
|
|
47
|
+
line-length = 100
|
|
48
|
+
|
|
49
|
+
[tool.mypy]
|
|
50
|
+
python_version = "3.11"
|
|
51
|
+
strict = true
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Dispatch layer that selects the appropriate annotator by file type."""
|
|
2
|
+
|
|
3
|
+
from mcp_codebase_index.generic_annotator import annotate_generic
|
|
4
|
+
from mcp_codebase_index.models import StructuralMetadata
|
|
5
|
+
from mcp_codebase_index.python_annotator import annotate_python
|
|
6
|
+
from mcp_codebase_index.text_annotator import annotate_text
|
|
7
|
+
from mcp_codebase_index.typescript_annotator import annotate_typescript
|
|
8
|
+
|
|
9
|
+
_EXTENSION_MAP: dict[str, str] = {
|
|
10
|
+
".py": "python",
|
|
11
|
+
".pyw": "python",
|
|
12
|
+
".md": "text",
|
|
13
|
+
".txt": "text",
|
|
14
|
+
".rst": "text",
|
|
15
|
+
".ts": "typescript",
|
|
16
|
+
".tsx": "typescript",
|
|
17
|
+
".js": "javascript",
|
|
18
|
+
".jsx": "javascript",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def annotate(
|
|
23
|
+
text: str,
|
|
24
|
+
source_name: str = "<source>",
|
|
25
|
+
file_type: str | None = None,
|
|
26
|
+
) -> StructuralMetadata:
|
|
27
|
+
"""Annotate text with structural metadata.
|
|
28
|
+
|
|
29
|
+
Dispatch rules:
|
|
30
|
+
- file_type overrides extension-based detection
|
|
31
|
+
- .py -> python annotator
|
|
32
|
+
- .md, .txt, .rst -> text annotator
|
|
33
|
+
- .ts, .tsx -> typescript annotator
|
|
34
|
+
- .js, .jsx -> typescript annotator (close enough for regex-based parsing)
|
|
35
|
+
- Otherwise -> generic annotator (line-only)
|
|
36
|
+
"""
|
|
37
|
+
if file_type is None:
|
|
38
|
+
# Detect from source_name extension
|
|
39
|
+
dot_idx = source_name.rfind(".")
|
|
40
|
+
if dot_idx >= 0:
|
|
41
|
+
ext = source_name[dot_idx:].lower()
|
|
42
|
+
file_type = _EXTENSION_MAP.get(ext)
|
|
43
|
+
|
|
44
|
+
if file_type == "python":
|
|
45
|
+
return annotate_python(text, source_name)
|
|
46
|
+
elif file_type == "text":
|
|
47
|
+
return annotate_text(text, source_name)
|
|
48
|
+
elif file_type in ("typescript", "javascript"):
|
|
49
|
+
return annotate_typescript(text, source_name)
|
|
50
|
+
else:
|
|
51
|
+
return annotate_generic(text, source_name)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Generic fallback annotator providing line-only metadata."""
|
|
2
|
+
|
|
3
|
+
from mcp_codebase_index.models import StructuralMetadata
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def annotate_generic(text: str, source_name: str = "<source>") -> StructuralMetadata:
|
|
7
|
+
"""Create minimal structural metadata with just line information."""
|
|
8
|
+
lines = text.splitlines()
|
|
9
|
+
offsets: list[int] = []
|
|
10
|
+
offset = 0
|
|
11
|
+
for line in lines:
|
|
12
|
+
offsets.append(offset)
|
|
13
|
+
offset += len(line) + 1 # +1 for newline
|
|
14
|
+
|
|
15
|
+
return StructuralMetadata(
|
|
16
|
+
source_name=source_name,
|
|
17
|
+
total_lines=len(lines),
|
|
18
|
+
total_chars=len(text),
|
|
19
|
+
lines=lines,
|
|
20
|
+
line_char_offsets=offsets,
|
|
21
|
+
)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Structural metadata models for codebase indexing."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass(frozen=True)
|
|
7
|
+
class LineRange:
|
|
8
|
+
"""A range of lines (1-indexed, inclusive on both ends)."""
|
|
9
|
+
|
|
10
|
+
start: int
|
|
11
|
+
end: int
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class FunctionInfo:
|
|
16
|
+
"""Metadata about a function or method."""
|
|
17
|
+
|
|
18
|
+
name: str
|
|
19
|
+
qualified_name: str # e.g., "MyClass.my_method"
|
|
20
|
+
line_range: LineRange
|
|
21
|
+
parameters: list[str]
|
|
22
|
+
decorators: list[str] # Decorator names (without @)
|
|
23
|
+
docstring: str | None
|
|
24
|
+
is_method: bool
|
|
25
|
+
parent_class: str | None # None for top-level functions
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class ClassInfo:
|
|
30
|
+
"""Metadata about a class."""
|
|
31
|
+
|
|
32
|
+
name: str
|
|
33
|
+
line_range: LineRange
|
|
34
|
+
base_classes: list[str]
|
|
35
|
+
methods: list[FunctionInfo]
|
|
36
|
+
decorators: list[str]
|
|
37
|
+
docstring: str | None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class ImportInfo:
|
|
42
|
+
"""Metadata about an import statement."""
|
|
43
|
+
|
|
44
|
+
module: str # e.g., "os.path"
|
|
45
|
+
names: list[str] # e.g., ["join", "exists"] for "from os.path import join, exists"
|
|
46
|
+
alias: str | None # e.g., "np" for "import numpy as np"
|
|
47
|
+
line_number: int
|
|
48
|
+
is_from_import: bool # True for "from X import Y", False for "import X"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class SectionInfo:
|
|
53
|
+
"""Metadata about a section in a text document."""
|
|
54
|
+
|
|
55
|
+
title: str
|
|
56
|
+
level: int # Heading level (1 = top-level, 2 = subsection, etc.)
|
|
57
|
+
line_range: LineRange
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class StructuralMetadata:
|
|
62
|
+
"""Complete structural metadata for a single file or text document."""
|
|
63
|
+
|
|
64
|
+
# Source
|
|
65
|
+
source_name: str # Filename or identifier
|
|
66
|
+
total_lines: int
|
|
67
|
+
total_chars: int
|
|
68
|
+
|
|
69
|
+
# Line data (always populated)
|
|
70
|
+
lines: list[str] # All lines (0-indexed internally, but API uses 1-indexed)
|
|
71
|
+
line_char_offsets: list[int] # Character offset of each line start
|
|
72
|
+
|
|
73
|
+
# Code structure (populated for code files)
|
|
74
|
+
functions: list[FunctionInfo] = field(default_factory=list)
|
|
75
|
+
classes: list[ClassInfo] = field(default_factory=list)
|
|
76
|
+
imports: list[ImportInfo] = field(default_factory=list)
|
|
77
|
+
|
|
78
|
+
# Text structure (populated for text/markdown files)
|
|
79
|
+
sections: list[SectionInfo] = field(default_factory=list)
|
|
80
|
+
|
|
81
|
+
# Dependency map (populated for code files)
|
|
82
|
+
# Maps each function/class name to the names it references
|
|
83
|
+
dependency_graph: dict[str, list[str]] = field(default_factory=dict)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class ProjectIndex:
|
|
88
|
+
"""Structural index for an entire codebase."""
|
|
89
|
+
|
|
90
|
+
root_path: str
|
|
91
|
+
files: dict[str, StructuralMetadata] = field(default_factory=dict)
|
|
92
|
+
|
|
93
|
+
# Cross-file dependency graphs
|
|
94
|
+
global_dependency_graph: dict[str, set[str]] = field(default_factory=dict)
|
|
95
|
+
reverse_dependency_graph: dict[str, set[str]] = field(default_factory=dict)
|
|
96
|
+
|
|
97
|
+
# File-level import graph
|
|
98
|
+
import_graph: dict[str, set[str]] = field(default_factory=dict)
|
|
99
|
+
reverse_import_graph: dict[str, set[str]] = field(default_factory=dict)
|
|
100
|
+
|
|
101
|
+
# Global symbol table: symbol_name -> file_path where defined
|
|
102
|
+
symbol_table: dict[str, str] = field(default_factory=dict)
|
|
103
|
+
|
|
104
|
+
# Stats
|
|
105
|
+
total_files: int = 0
|
|
106
|
+
total_lines: int = 0
|
|
107
|
+
total_functions: int = 0
|
|
108
|
+
total_classes: int = 0
|
|
109
|
+
index_build_time_seconds: float = 0.0
|
|
110
|
+
index_memory_bytes: int = 0
|