mcp-architect 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_architect-0.1.0/.gitignore +22 -0
- mcp_architect-0.1.0/LICENSE +21 -0
- mcp_architect-0.1.0/PKG-INFO +176 -0
- mcp_architect-0.1.0/README.md +155 -0
- mcp_architect-0.1.0/pyproject.toml +36 -0
- mcp_architect-0.1.0/src/mcp_architect/__init__.py +10 -0
- mcp_architect-0.1.0/src/mcp_architect/__main__.py +4 -0
- mcp_architect-0.1.0/src/mcp_architect/analysis/__init__.py +16 -0
- mcp_architect-0.1.0/src/mcp_architect/analysis/deps.py +148 -0
- mcp_architect-0.1.0/src/mcp_architect/analysis/hotspots.py +84 -0
- mcp_architect-0.1.0/src/mcp_architect/analysis/modules.py +90 -0
- mcp_architect-0.1.0/src/mcp_architect/analysis/stack.py +114 -0
- mcp_architect-0.1.0/src/mcp_architect/analysis/walk.py +67 -0
- mcp_architect-0.1.0/src/mcp_architect/server.py +156 -0
- mcp_architect-0.1.0/tests/test_analysis.py +74 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
.venv/
|
|
9
|
+
venv/
|
|
10
|
+
env/
|
|
11
|
+
|
|
12
|
+
# Tooling
|
|
13
|
+
.pytest_cache/
|
|
14
|
+
.mypy_cache/
|
|
15
|
+
.ruff_cache/
|
|
16
|
+
.coverage
|
|
17
|
+
htmlcov/
|
|
18
|
+
|
|
19
|
+
# OS / editor
|
|
20
|
+
.DS_Store
|
|
21
|
+
.idea/
|
|
22
|
+
.vscode/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kannan Dharmalingam
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcp-architect
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Give any AI assistant real architectural understanding of a codebase — local, private, zero-config MCP server.
|
|
5
|
+
Project-URL: Homepage, https://github.com/kannajune/mcp-architect
|
|
6
|
+
Project-URL: Issues, https://github.com/kannajune/mcp-architect/issues
|
|
7
|
+
Author: Kannan Dharmalingam
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agents,ai,architecture,claude,code-analysis,cursor,developer-tools,llm,mcp,model-context-protocol
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: mcp>=1.2.0
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# 🏛️ mcp-architect
|
|
23
|
+
|
|
24
|
+
> **Stop pasting your file tree into Claude.** Give any AI assistant *real* architectural understanding of a codebase — **local, private, zero‑config.**
|
|
25
|
+
|
|
26
|
+
[](LICENSE)
|
|
27
|
+
[](https://www.python.org/)
|
|
28
|
+
[](https://modelcontextprotocol.io/)
|
|
29
|
+
|
|
30
|
+
AI coding assistants are great at *files* but blind to *architecture*. Every session you re‑explain the structure, paste the file tree, and hope it guesses your module boundaries right. **mcp-architect** is an [MCP](https://modelcontextprotocol.io/) server that hands your assistant a structured map of any codebase — tech stack, dependency graph, hotspots, and module summaries — computed **100% locally** with **no API keys and no model required**.
|
|
31
|
+
|
|
32
|
+
It works with **Claude Desktop, Cursor, Windsurf, Cline**, or any MCP client.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Why
|
|
37
|
+
|
|
38
|
+
| Without mcp-architect | With mcp-architect |
|
|
39
|
+
|---|---|
|
|
40
|
+
| "Here's my file tree, please figure out the structure…" | `architecture_overview` → stack, entry points, structure in one call |
|
|
41
|
+
| AI guesses how modules relate | `dependency_graph` → real import graph + circular‑dependency detection |
|
|
42
|
+
| "Which files matter?" | `hotspots` → largest, most complex, most‑changed, highest‑risk |
|
|
43
|
+
| Re‑explaining a package every time | `explain` → classes, functions, and deps of any folder |
|
|
44
|
+
|
|
45
|
+
Everything runs on your machine. Your code never leaves it.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Quickstart
|
|
50
|
+
|
|
51
|
+
### 1. Add it to your MCP client
|
|
52
|
+
|
|
53
|
+
**Claude Desktop** — edit `claude_desktop_config.json`:
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
{
|
|
57
|
+
"mcpServers": {
|
|
58
|
+
"architect": {
|
|
59
|
+
"command": "uvx",
|
|
60
|
+
"args": ["mcp-architect"]
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
> No PyPI yet? Run straight from source:
|
|
67
|
+
> ```json
|
|
68
|
+
> { "mcpServers": { "architect": {
|
|
69
|
+
> "command": "uvx",
|
|
70
|
+
> "args": ["--from", "git+https://github.com/kannajune/mcp-architect", "mcp-architect"]
|
|
71
|
+
> } } }
|
|
72
|
+
> ```
|
|
73
|
+
|
|
74
|
+
Restart your client. **That's it** — no keys, no model download.
|
|
75
|
+
|
|
76
|
+
### 2. Ask your assistant
|
|
77
|
+
|
|
78
|
+
> *"Use the architect tools to give me an overview of `~/code/my-app`, then show me its dependency graph and the highest‑risk files."*
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## What you get
|
|
83
|
+
|
|
84
|
+
```text
|
|
85
|
+
# Architecture Overview — my-app
|
|
86
|
+
|
|
87
|
+
**151 files · 17,368 lines of code**
|
|
88
|
+
|
|
89
|
+
## Languages
|
|
90
|
+
- **Python** — 93 files, 13,683 LOC
|
|
91
|
+
- **TypeScript** — 23 files, 3,120 LOC
|
|
92
|
+
|
|
93
|
+
## Frameworks / key libraries
|
|
94
|
+
- FastAPI
|
|
95
|
+
- React
|
|
96
|
+
- Tailwind CSS
|
|
97
|
+
|
|
98
|
+
## Entry points
|
|
99
|
+
- main.py
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
```text
|
|
103
|
+
# Dependency Graph — my-app
|
|
104
|
+
|
|
105
|
+
**118 modules · 172 internal import edges**
|
|
106
|
+
|
|
107
|
+
## Most depended-upon (architectural hubs)
|
|
108
|
+
- `app.signals.signal_parser` — imported by 12 modules
|
|
109
|
+
- `app.core.integrations_registry` — imported by 11 modules
|
|
110
|
+
|
|
111
|
+
## Circular dependencies
|
|
112
|
+
✅ no circular dependencies found
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Tools
|
|
116
|
+
|
|
117
|
+
| Tool | What it tells the AI |
|
|
118
|
+
|------|----------------------|
|
|
119
|
+
| `architecture_overview` | Languages, frameworks, ecosystems, size, top‑level structure, entry points |
|
|
120
|
+
| `dependency_graph` | Internal import graph, architectural hubs, **circular dependencies** |
|
|
121
|
+
| `hotspots` | Largest / most complex / most‑changed (git) / highest‑risk files |
|
|
122
|
+
| `explain` | Deep‑dive a folder or file: classes, functions, external deps |
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Design principles
|
|
127
|
+
|
|
128
|
+
- **Zero heavy dependencies.** Pure Python standard library for all analysis (`ast`, `os`, `re`). The only runtime dep is the MCP SDK itself. Installs in seconds.
|
|
129
|
+
- **Local & private.** No network calls, no telemetry, no LLM. Your source never leaves your machine.
|
|
130
|
+
- **Language‑aware.** Full AST parsing for Python; import parsing for JavaScript/TypeScript; file/LOC stats for 25+ languages.
|
|
131
|
+
- **Decoupled core.** The analysis layer (`mcp_architect.analysis`) is importable and testable on its own — use it as a plain Python library too.
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from mcp_architect.analysis import get_overview, get_dependency_graph
|
|
135
|
+
print(get_overview("~/code/my-app")["frameworks"])
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
> The dependency and complexity analysis is **heuristic** — designed to give an AI useful, fast situational awareness, not to replace a full static analyzer.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Pin to one project (optional)
|
|
143
|
+
|
|
144
|
+
Set `MCP_ARCHITECT_ROOT` so tools default to a fixed repo and you can omit paths:
|
|
145
|
+
|
|
146
|
+
```json
|
|
147
|
+
{ "mcpServers": { "architect": {
|
|
148
|
+
"command": "uvx", "args": ["mcp-architect"],
|
|
149
|
+
"env": { "MCP_ARCHITECT_ROOT": "/Users/you/code/my-app" }
|
|
150
|
+
} } }
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Roadmap
|
|
156
|
+
|
|
157
|
+
- [ ] Mermaid dependency‑diagram output
|
|
158
|
+
- [ ] Layered‑architecture / boundary‑violation detection
|
|
159
|
+
- [ ] Go, Rust & Java import graphs
|
|
160
|
+
- [ ] Optional local‑LLM (Ollama) narrative summaries
|
|
161
|
+
- [ ] `compare` tool for before/after architecture diffs
|
|
162
|
+
|
|
163
|
+
Contributions welcome — see [CONTRIBUTING](#contributing).
|
|
164
|
+
|
|
165
|
+
## Contributing
|
|
166
|
+
|
|
167
|
+
PRs and issues welcome! Run the tests with:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
pip install -e ".[dev]"
|
|
171
|
+
pytest
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## License
|
|
175
|
+
|
|
176
|
+
[MIT](LICENSE) © Kannan Dharmalingam
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# 🏛️ mcp-architect
|
|
2
|
+
|
|
3
|
+
> **Stop pasting your file tree into Claude.** Give any AI assistant *real* architectural understanding of a codebase — **local, private, zero‑config.**
|
|
4
|
+
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](https://www.python.org/)
|
|
7
|
+
[](https://modelcontextprotocol.io/)
|
|
8
|
+
|
|
9
|
+
AI coding assistants are great at *files* but blind to *architecture*. Every session you re‑explain the structure, paste the file tree, and hope it guesses your module boundaries right. **mcp-architect** is an [MCP](https://modelcontextprotocol.io/) server that hands your assistant a structured map of any codebase — tech stack, dependency graph, hotspots, and module summaries — computed **100% locally** with **no API keys and no model required**.
|
|
10
|
+
|
|
11
|
+
It works with **Claude Desktop, Cursor, Windsurf, Cline**, or any MCP client.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Why
|
|
16
|
+
|
|
17
|
+
| Without mcp-architect | With mcp-architect |
|
|
18
|
+
|---|---|
|
|
19
|
+
| "Here's my file tree, please figure out the structure…" | `architecture_overview` → stack, entry points, structure in one call |
|
|
20
|
+
| AI guesses how modules relate | `dependency_graph` → real import graph + circular‑dependency detection |
|
|
21
|
+
| "Which files matter?" | `hotspots` → largest, most complex, most‑changed, highest‑risk |
|
|
22
|
+
| Re‑explaining a package every time | `explain` → classes, functions, and deps of any folder |
|
|
23
|
+
|
|
24
|
+
Everything runs on your machine. Your code never leaves it.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quickstart
|
|
29
|
+
|
|
30
|
+
### 1. Add it to your MCP client
|
|
31
|
+
|
|
32
|
+
**Claude Desktop** — edit `claude_desktop_config.json`:
|
|
33
|
+
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"mcpServers": {
|
|
37
|
+
"architect": {
|
|
38
|
+
"command": "uvx",
|
|
39
|
+
"args": ["mcp-architect"]
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
> No PyPI yet? Run straight from source:
|
|
46
|
+
> ```json
|
|
47
|
+
> { "mcpServers": { "architect": {
|
|
48
|
+
> "command": "uvx",
|
|
49
|
+
> "args": ["--from", "git+https://github.com/kannajune/mcp-architect", "mcp-architect"]
|
|
50
|
+
> } } }
|
|
51
|
+
> ```
|
|
52
|
+
|
|
53
|
+
Restart your client. **That's it** — no keys, no model download.
|
|
54
|
+
|
|
55
|
+
### 2. Ask your assistant
|
|
56
|
+
|
|
57
|
+
> *"Use the architect tools to give me an overview of `~/code/my-app`, then show me its dependency graph and the highest‑risk files."*
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## What you get
|
|
62
|
+
|
|
63
|
+
```text
|
|
64
|
+
# Architecture Overview — my-app
|
|
65
|
+
|
|
66
|
+
**151 files · 17,368 lines of code**
|
|
67
|
+
|
|
68
|
+
## Languages
|
|
69
|
+
- **Python** — 93 files, 13,683 LOC
|
|
70
|
+
- **TypeScript** — 23 files, 3,120 LOC
|
|
71
|
+
|
|
72
|
+
## Frameworks / key libraries
|
|
73
|
+
- FastAPI
|
|
74
|
+
- React
|
|
75
|
+
- Tailwind CSS
|
|
76
|
+
|
|
77
|
+
## Entry points
|
|
78
|
+
- main.py
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
```text
|
|
82
|
+
# Dependency Graph — my-app
|
|
83
|
+
|
|
84
|
+
**118 modules · 172 internal import edges**
|
|
85
|
+
|
|
86
|
+
## Most depended-upon (architectural hubs)
|
|
87
|
+
- `app.signals.signal_parser` — imported by 12 modules
|
|
88
|
+
- `app.core.integrations_registry` — imported by 11 modules
|
|
89
|
+
|
|
90
|
+
## Circular dependencies
|
|
91
|
+
✅ no circular dependencies found
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Tools
|
|
95
|
+
|
|
96
|
+
| Tool | What it tells the AI |
|
|
97
|
+
|------|----------------------|
|
|
98
|
+
| `architecture_overview` | Languages, frameworks, ecosystems, size, top‑level structure, entry points |
|
|
99
|
+
| `dependency_graph` | Internal import graph, architectural hubs, **circular dependencies** |
|
|
100
|
+
| `hotspots` | Largest / most complex / most‑changed (git) / highest‑risk files |
|
|
101
|
+
| `explain` | Deep‑dive a folder or file: classes, functions, external deps |
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Design principles
|
|
106
|
+
|
|
107
|
+
- **Zero heavy dependencies.** Pure Python standard library for all analysis (`ast`, `os`, `re`). The only runtime dep is the MCP SDK itself. Installs in seconds.
|
|
108
|
+
- **Local & private.** No network calls, no telemetry, no LLM. Your source never leaves your machine.
|
|
109
|
+
- **Language‑aware.** Full AST parsing for Python; import parsing for JavaScript/TypeScript; file/LOC stats for 25+ languages.
|
|
110
|
+
- **Decoupled core.** The analysis layer (`mcp_architect.analysis`) is importable and testable on its own — use it as a plain Python library too.
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from mcp_architect.analysis import get_overview, get_dependency_graph
|
|
114
|
+
print(get_overview("~/code/my-app")["frameworks"])
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
> The dependency and complexity analysis is **heuristic** — designed to give an AI useful, fast situational awareness, not to replace a full static analyzer.
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Pin to one project (optional)
|
|
122
|
+
|
|
123
|
+
Set `MCP_ARCHITECT_ROOT` so tools default to a fixed repo and you can omit paths:
|
|
124
|
+
|
|
125
|
+
```json
|
|
126
|
+
{ "mcpServers": { "architect": {
|
|
127
|
+
"command": "uvx", "args": ["mcp-architect"],
|
|
128
|
+
"env": { "MCP_ARCHITECT_ROOT": "/Users/you/code/my-app" }
|
|
129
|
+
} } }
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Roadmap
|
|
135
|
+
|
|
136
|
+
- [ ] Mermaid dependency‑diagram output
|
|
137
|
+
- [ ] Layered‑architecture / boundary‑violation detection
|
|
138
|
+
- [ ] Go, Rust & Java import graphs
|
|
139
|
+
- [ ] Optional local‑LLM (Ollama) narrative summaries
|
|
140
|
+
- [ ] `compare` tool for before/after architecture diffs
|
|
141
|
+
|
|
142
|
+
Contributions welcome — see [CONTRIBUTING](#contributing).
|
|
143
|
+
|
|
144
|
+
## Contributing
|
|
145
|
+
|
|
146
|
+
PRs and issues welcome! Run the tests with:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
pip install -e ".[dev]"
|
|
150
|
+
pytest
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
[MIT](LICENSE) © Kannan Dharmalingam
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mcp-architect"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Give any AI assistant real architectural understanding of a codebase — local, private, zero-config MCP server."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [{ name = "Kannan Dharmalingam" }]
|
|
9
|
+
keywords = ["mcp", "model-context-protocol", "ai", "agents", "code-analysis", "architecture", "claude", "cursor", "llm", "developer-tools"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Topic :: Software Development :: Libraries",
|
|
16
|
+
]
|
|
17
|
+
dependencies = [
|
|
18
|
+
"mcp>=1.2.0",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.urls]
|
|
22
|
+
Homepage = "https://github.com/kannajune/mcp-architect"
|
|
23
|
+
Issues = "https://github.com/kannajune/mcp-architect/issues"
|
|
24
|
+
|
|
25
|
+
[project.scripts]
|
|
26
|
+
mcp-architect = "mcp_architect.server:main"
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
dev = ["pytest>=8.0"]
|
|
30
|
+
|
|
31
|
+
[build-system]
|
|
32
|
+
requires = ["hatchling"]
|
|
33
|
+
build-backend = "hatchling.build"
|
|
34
|
+
|
|
35
|
+
[tool.hatch.build.targets.wheel]
|
|
36
|
+
packages = ["src/mcp_architect"]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""mcp-architect — give any AI assistant real architectural understanding of a codebase.
|
|
2
|
+
|
|
3
|
+
The analysis layer (``mcp_architect.analysis``) is pure-stdlib and importable
|
|
4
|
+
without the optional ``mcp`` dependency; the MCP server lives in
|
|
5
|
+
``mcp_architect.server``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
9
|
+
|
|
10
|
+
__all__ = ["__version__"]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Pure, dependency-free codebase-analysis functions.
|
|
2
|
+
|
|
3
|
+
These are intentionally decoupled from MCP so they can be unit-tested and
|
|
4
|
+
reused on their own.
|
|
5
|
+
"""
|
|
6
|
+
from .deps import get_dependency_graph
|
|
7
|
+
from .hotspots import find_hotspots
|
|
8
|
+
from .modules import explain_module
|
|
9
|
+
from .stack import get_overview
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"get_overview",
|
|
13
|
+
"get_dependency_graph",
|
|
14
|
+
"find_hotspots",
|
|
15
|
+
"explain_module",
|
|
16
|
+
]
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Build an internal module dependency graph and detect import cycles.
|
|
2
|
+
|
|
3
|
+
Heuristic, dependency-free: full AST for Python, regex for JS/TS. It maps
|
|
4
|
+
*internal* imports (modules that resolve to files inside the repo) so you see
|
|
5
|
+
how the codebase is wired together — not third-party packages.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import ast
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from .walk import iter_files, read_text, rel
|
|
14
|
+
|
|
15
|
+
_PY = {".py"}
|
|
16
|
+
_JS = {".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}
|
|
17
|
+
|
|
18
|
+
_JS_IMPORT = re.compile(
|
|
19
|
+
r"""(?:import\s[^'"]*?from\s*|import\s*|require\(\s*|export\s[^'"]*?from\s*)['"]([^'"]+)['"]""",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _py_module_name(path: Path, root: Path) -> str:
|
|
24
|
+
parts = rel(path, root)[:-3].split("/") # strip .py
|
|
25
|
+
if parts[-1] == "__init__":
|
|
26
|
+
parts = parts[:-1]
|
|
27
|
+
return ".".join(parts)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _collect_python(root: Path) -> dict[str, set[str]]:
|
|
31
|
+
files = [f for f in iter_files(root) if f.suffix in _PY]
|
|
32
|
+
modules = {_py_module_name(f, root): f for f in files}
|
|
33
|
+
# Top-level internal names (packages/modules) to match imports against.
|
|
34
|
+
internal_roots = {m.split(".")[0] for m in modules if m}
|
|
35
|
+
graph: dict[str, set[str]] = {m: set() for m in modules}
|
|
36
|
+
|
|
37
|
+
for mod, path in modules.items():
|
|
38
|
+
try:
|
|
39
|
+
tree = ast.parse(read_text(path))
|
|
40
|
+
except SyntaxError:
|
|
41
|
+
continue
|
|
42
|
+
for node in ast.walk(tree):
|
|
43
|
+
targets: list[str] = []
|
|
44
|
+
if isinstance(node, ast.Import):
|
|
45
|
+
targets = [a.name for a in node.names]
|
|
46
|
+
elif isinstance(node, ast.ImportFrom) and node.module and node.level == 0:
|
|
47
|
+
targets = [node.module]
|
|
48
|
+
elif isinstance(node, ast.ImportFrom) and node.level:
|
|
49
|
+
# relative import: resolve against current package
|
|
50
|
+
base = mod.split(".")[: -node.level] if mod else []
|
|
51
|
+
mod_part = node.module.split(".") if node.module else []
|
|
52
|
+
targets = [".".join(base + mod_part)]
|
|
53
|
+
for t in targets:
|
|
54
|
+
if not t:
|
|
55
|
+
continue
|
|
56
|
+
if t.split(".")[0] not in internal_roots:
|
|
57
|
+
continue
|
|
58
|
+
# match the longest internal module that is a prefix
|
|
59
|
+
best = max(
|
|
60
|
+
(m for m in modules if t == m or t.startswith(m + ".") or m.startswith(t + ".")),
|
|
61
|
+
key=len,
|
|
62
|
+
default=None,
|
|
63
|
+
)
|
|
64
|
+
if best and best != mod:
|
|
65
|
+
graph[mod].add(best)
|
|
66
|
+
return graph
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _resolve_js(import_path: str, from_file: Path, root: Path, files: set[Path]) -> str | None:
|
|
70
|
+
if not import_path.startswith("."):
|
|
71
|
+
return None # external package
|
|
72
|
+
target = (from_file.parent / import_path).resolve()
|
|
73
|
+
candidates = [target]
|
|
74
|
+
for ext in (".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"):
|
|
75
|
+
candidates.append(target.with_suffix(ext))
|
|
76
|
+
candidates.append(target / f"index{ext}")
|
|
77
|
+
for c in candidates:
|
|
78
|
+
if c in files:
|
|
79
|
+
return rel(c, root)
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _collect_js(root: Path) -> dict[str, set[str]]:
|
|
84
|
+
files = [f for f in iter_files(root) if f.suffix in _JS]
|
|
85
|
+
fileset = {f.resolve() for f in files}
|
|
86
|
+
graph: dict[str, set[str]] = {rel(f, root): set() for f in files}
|
|
87
|
+
for f in files:
|
|
88
|
+
text = read_text(f)
|
|
89
|
+
for m in _JS_IMPORT.finditer(text):
|
|
90
|
+
resolved = _resolve_js(m.group(1), f, root, fileset)
|
|
91
|
+
if resolved and resolved != rel(f, root):
|
|
92
|
+
graph[rel(f, root)].add(resolved)
|
|
93
|
+
return graph
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _find_cycles(graph: dict[str, set[str]]) -> list[list[str]]:
|
|
97
|
+
cycles: list[list[str]] = []
|
|
98
|
+
seen_pairs: set[tuple[str, ...]] = set()
|
|
99
|
+
WHITE, GREY, BLACK = 0, 1, 2
|
|
100
|
+
color = {n: WHITE for n in graph}
|
|
101
|
+
stack: list[str] = []
|
|
102
|
+
|
|
103
|
+
def dfs(node: str) -> None:
|
|
104
|
+
color[node] = GREY
|
|
105
|
+
stack.append(node)
|
|
106
|
+
for nxt in graph.get(node, ()):
|
|
107
|
+
if color.get(nxt, BLACK) == GREY:
|
|
108
|
+
cycle = stack[stack.index(nxt):] + [nxt]
|
|
109
|
+
key = tuple(sorted(set(cycle)))
|
|
110
|
+
if key not in seen_pairs:
|
|
111
|
+
seen_pairs.add(key)
|
|
112
|
+
cycles.append(cycle)
|
|
113
|
+
elif color.get(nxt, BLACK) == WHITE:
|
|
114
|
+
dfs(nxt)
|
|
115
|
+
stack.pop()
|
|
116
|
+
color[node] = BLACK
|
|
117
|
+
|
|
118
|
+
for n in list(graph):
|
|
119
|
+
if color[n] == WHITE:
|
|
120
|
+
dfs(n)
|
|
121
|
+
return cycles
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_dependency_graph(root: str | Path, language: str = "auto") -> dict:
|
|
125
|
+
root = Path(root)
|
|
126
|
+
graph: dict[str, set[str]] = {}
|
|
127
|
+
lang = language.lower()
|
|
128
|
+
if lang in ("auto", "python", "py"):
|
|
129
|
+
graph.update(_collect_python(root))
|
|
130
|
+
if lang in ("auto", "js", "ts", "javascript", "typescript"):
|
|
131
|
+
graph.update(_collect_js(root))
|
|
132
|
+
|
|
133
|
+
edge_count = sum(len(v) for v in graph.values())
|
|
134
|
+
fan_in: dict[str, int] = {}
|
|
135
|
+
for deps in graph.values():
|
|
136
|
+
for d in deps:
|
|
137
|
+
fan_in[d] = fan_in.get(d, 0) + 1
|
|
138
|
+
most_depended = sorted(fan_in.items(), key=lambda kv: kv[1], reverse=True)[:10]
|
|
139
|
+
cycles = _find_cycles(graph)
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
"root": str(root),
|
|
143
|
+
"modules": len(graph),
|
|
144
|
+
"edges": edge_count,
|
|
145
|
+
"most_depended_upon": [{"module": m, "imported_by": c} for m, c in most_depended],
|
|
146
|
+
"cycles": [" -> ".join(c) for c in cycles[:15]],
|
|
147
|
+
"edges_by_module": {k: sorted(v) for k, v in sorted(graph.items()) if v},
|
|
148
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Surface the files most worth a human's (or AI's) attention.
|
|
2
|
+
|
|
3
|
+
Combines size, a cheap cyclomatic-complexity proxy, and git churn (how often a
|
|
4
|
+
file changes) — the classic signals for "where the risk lives".
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
import subprocess
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from .walk import CODE_EXTS, count_loc, iter_files, read_text, rel
|
|
13
|
+
|
|
14
|
+
# Tokens that introduce a branch / decision point.
|
|
15
|
+
_BRANCH = re.compile(
|
|
16
|
+
r"\b(if|elif|else if|for|while|case|catch|except|&&|\|\||\?\s|switch)\b"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _complexity(text: str) -> int:
|
|
21
|
+
return len(_BRANCH.findall(text)) + 1
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _git_churn(root: Path) -> dict[str, int]:
|
|
25
|
+
if not (root / ".git").exists():
|
|
26
|
+
return {}
|
|
27
|
+
try:
|
|
28
|
+
out = subprocess.run(
|
|
29
|
+
["git", "-C", str(root), "log", "--no-merges", "--name-only", "--format="],
|
|
30
|
+
capture_output=True, text=True, timeout=20,
|
|
31
|
+
).stdout
|
|
32
|
+
except (subprocess.SubprocessError, OSError):
|
|
33
|
+
return {}
|
|
34
|
+
churn: dict[str, int] = {}
|
|
35
|
+
for line in out.splitlines():
|
|
36
|
+
line = line.strip()
|
|
37
|
+
if line:
|
|
38
|
+
churn[line] = churn.get(line, 0) + 1
|
|
39
|
+
return churn
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def find_hotspots(root: str | Path, top: int = 10) -> dict:
|
|
43
|
+
root = Path(root)
|
|
44
|
+
rows = []
|
|
45
|
+
for f in iter_files(root):
|
|
46
|
+
if f.suffix.lower() not in CODE_EXTS:
|
|
47
|
+
continue
|
|
48
|
+
text = read_text(f)
|
|
49
|
+
if not text:
|
|
50
|
+
continue
|
|
51
|
+
rows.append({
|
|
52
|
+
"file": rel(f, root),
|
|
53
|
+
"loc": count_loc(text),
|
|
54
|
+
"complexity": _complexity(text),
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
churn = _git_churn(root)
|
|
58
|
+
for r in rows:
|
|
59
|
+
r["changes"] = churn.get(r["file"], 0)
|
|
60
|
+
|
|
61
|
+
largest = sorted(rows, key=lambda r: r["loc"], reverse=True)[:top]
|
|
62
|
+
most_complex = sorted(rows, key=lambda r: r["complexity"], reverse=True)[:top]
|
|
63
|
+
most_changed = (
|
|
64
|
+
sorted([r for r in rows if r["changes"]], key=lambda r: r["changes"], reverse=True)[:top]
|
|
65
|
+
if churn else []
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# "Risk" = big AND complex AND frequently changed.
|
|
69
|
+
for r in rows:
|
|
70
|
+
r["risk"] = r["loc"] * 0.4 + r["complexity"] * 2 + r["changes"] * 5
|
|
71
|
+
risky = sorted(rows, key=lambda r: r["risk"], reverse=True)[:top]
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
"root": str(root),
|
|
75
|
+
"files_analyzed": len(rows),
|
|
76
|
+
"git_history_available": bool(churn),
|
|
77
|
+
"largest": [{k: r[k] for k in ("file", "loc")} for r in largest],
|
|
78
|
+
"most_complex": [{k: r[k] for k in ("file", "complexity", "loc")} for r in most_complex],
|
|
79
|
+
"most_changed": [{k: r[k] for k in ("file", "changes")} for r in most_changed],
|
|
80
|
+
"highest_risk": [
|
|
81
|
+
{"file": r["file"], "loc": r["loc"], "complexity": r["complexity"], "changes": r["changes"]}
|
|
82
|
+
for r in risky
|
|
83
|
+
],
|
|
84
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Explain a single module/folder: its files, public symbols, and imports."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import ast
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .walk import count_loc, iter_files, read_text, rel
|
|
9
|
+
|
|
10
|
+
_PY = {".py"}
|
|
11
|
+
_JS = {".js", ".jsx", ".ts", ".tsx"}
|
|
12
|
+
|
|
13
|
+
_JS_SYMBOL = re.compile(
|
|
14
|
+
r"^\s*export\s+(?:default\s+)?(?:async\s+)?(?:function|class|const|let|var)\s+([A-Za-z0-9_]+)",
|
|
15
|
+
re.MULTILINE,
|
|
16
|
+
)
|
|
17
|
+
_JS_IMPORT_SRC = re.compile(r"""from\s*['"]([^'"]+)['"]|require\(\s*['"]([^'"]+)['"]""")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _py_symbols(text: str) -> tuple[list[str], list[str]]:
|
|
21
|
+
try:
|
|
22
|
+
tree = ast.parse(text)
|
|
23
|
+
except SyntaxError:
|
|
24
|
+
return [], []
|
|
25
|
+
classes, funcs = [], []
|
|
26
|
+
for node in tree.body:
|
|
27
|
+
if isinstance(node, ast.ClassDef):
|
|
28
|
+
classes.append(node.name)
|
|
29
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
30
|
+
funcs.append(node.name)
|
|
31
|
+
return classes, funcs
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _py_imports(text: str) -> set[str]:
|
|
35
|
+
out: set[str] = set()
|
|
36
|
+
try:
|
|
37
|
+
tree = ast.parse(text)
|
|
38
|
+
except SyntaxError:
|
|
39
|
+
return out
|
|
40
|
+
for node in ast.walk(tree):
|
|
41
|
+
if isinstance(node, ast.Import):
|
|
42
|
+
out.update(a.name.split(".")[0] for a in node.names)
|
|
43
|
+
elif isinstance(node, ast.ImportFrom) and node.module:
|
|
44
|
+
out.add(node.module.split(".")[0])
|
|
45
|
+
return out
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def explain_module(root: str | Path, module: str = ".") -> dict:
|
|
49
|
+
root = Path(root)
|
|
50
|
+
target = (root / module).resolve()
|
|
51
|
+
if not target.exists():
|
|
52
|
+
return {"error": f"path not found: {module}"}
|
|
53
|
+
|
|
54
|
+
base = target if target.is_dir() else target.parent
|
|
55
|
+
files_out = []
|
|
56
|
+
all_imports: set[str] = set()
|
|
57
|
+
total_loc = 0
|
|
58
|
+
|
|
59
|
+
paths = [target] if target.is_file() else [
|
|
60
|
+
f for f in iter_files(target) if f.suffix in (_PY | _JS)
|
|
61
|
+
]
|
|
62
|
+
for f in paths:
|
|
63
|
+
text = read_text(f)
|
|
64
|
+
loc = count_loc(text)
|
|
65
|
+
total_loc += loc
|
|
66
|
+
if f.suffix in _PY:
|
|
67
|
+
classes, funcs = _py_symbols(text)
|
|
68
|
+
all_imports |= _py_imports(text)
|
|
69
|
+
else:
|
|
70
|
+
syms = _JS_SYMBOL.findall(text)
|
|
71
|
+
classes, funcs = [], syms
|
|
72
|
+
for a, b in _JS_IMPORT_SRC.findall(text):
|
|
73
|
+
src = a or b
|
|
74
|
+
if not src.startswith("."):
|
|
75
|
+
all_imports.add(src.split("/")[0])
|
|
76
|
+
files_out.append({
|
|
77
|
+
"file": rel(f, root),
|
|
78
|
+
"loc": loc,
|
|
79
|
+
"classes": classes,
|
|
80
|
+
"functions": funcs[:25],
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
files_out.sort(key=lambda r: r["loc"], reverse=True)
|
|
84
|
+
return {
|
|
85
|
+
"module": rel(target, root) if target != root else ".",
|
|
86
|
+
"files": len(files_out),
|
|
87
|
+
"total_loc": total_loc,
|
|
88
|
+
"external_imports": sorted(i for i in all_imports if i and i.isidentifier()),
|
|
89
|
+
"file_details": files_out[:40],
|
|
90
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Detect the technology stack, size, and entry points of a codebase."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .walk import LANG_BY_EXT, count_loc, iter_files, read_text, rel
|
|
10
|
+
|
|
11
|
+
# Manifest file -> (ecosystem, dependency-keys to scan)
|
|
12
|
+
_MANIFESTS = {
|
|
13
|
+
"package.json": "Node.js",
|
|
14
|
+
"pyproject.toml": "Python",
|
|
15
|
+
"requirements.txt": "Python",
|
|
16
|
+
"go.mod": "Go",
|
|
17
|
+
"Cargo.toml": "Rust",
|
|
18
|
+
"pom.xml": "Java (Maven)",
|
|
19
|
+
"build.gradle": "Java/Kotlin (Gradle)",
|
|
20
|
+
"Gemfile": "Ruby",
|
|
21
|
+
"composer.json": "PHP",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
# Library substring -> friendly framework name.
|
|
25
|
+
_FRAMEWORK_HINTS = {
|
|
26
|
+
"next": "Next.js", "react": "React", "@angular/core": "Angular",
|
|
27
|
+
"vue": "Vue", "svelte": "Svelte", "express": "Express",
|
|
28
|
+
"@nestjs/core": "NestJS", "fastify": "Fastify", "fastapi": "FastAPI",
|
|
29
|
+
"django": "Django", "flask": "Flask", "starlette": "Starlette",
|
|
30
|
+
"langchain": "LangChain", "langgraph": "LangGraph", "mcp": "MCP",
|
|
31
|
+
"spring-boot": "Spring Boot", "rails": "Ruby on Rails",
|
|
32
|
+
"laravel": "Laravel", "tailwindcss": "Tailwind CSS",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
_ENTRY_CANDIDATES = (
|
|
36
|
+
"main.py", "app.py", "manage.py", "__main__.py", "server.py",
|
|
37
|
+
"index.js", "index.ts", "main.go", "main.rs", "Program.cs",
|
|
38
|
+
"src/index.ts", "src/index.js", "src/main.ts", "src/main.py",
|
|
39
|
+
"src/app/page.tsx", "cmd",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _scan_frameworks(root: Path) -> tuple[set[str], list[str]]:
|
|
44
|
+
ecosystems: set[str] = set()
|
|
45
|
+
frameworks: set[str] = set()
|
|
46
|
+
for f in iter_files(root):
|
|
47
|
+
name = f.name
|
|
48
|
+
if name not in _MANIFESTS:
|
|
49
|
+
continue
|
|
50
|
+
ecosystems.add(_MANIFESTS[name])
|
|
51
|
+
text = read_text(f)
|
|
52
|
+
if name == "package.json":
|
|
53
|
+
try:
|
|
54
|
+
data = json.loads(text)
|
|
55
|
+
deps = {**data.get("dependencies", {}), **data.get("devDependencies", {})}
|
|
56
|
+
for dep in deps:
|
|
57
|
+
for hint, fw in _FRAMEWORK_HINTS.items():
|
|
58
|
+
if dep == hint or dep.startswith(hint):
|
|
59
|
+
frameworks.add(fw)
|
|
60
|
+
except (json.JSONDecodeError, AttributeError):
|
|
61
|
+
pass
|
|
62
|
+
else:
|
|
63
|
+
low = text.lower()
|
|
64
|
+
for hint, fw in _FRAMEWORK_HINTS.items():
|
|
65
|
+
if re.search(rf"(^|[^a-z0-9_]){re.escape(hint)}([^a-z0-9_]|$)", low):
|
|
66
|
+
frameworks.add(fw)
|
|
67
|
+
return ecosystems, sorted(frameworks)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _find_entry_points(root: Path) -> list[str]:
|
|
71
|
+
found = []
|
|
72
|
+
for cand in _ENTRY_CANDIDATES:
|
|
73
|
+
p = root / cand
|
|
74
|
+
if p.exists():
|
|
75
|
+
found.append(cand)
|
|
76
|
+
return found
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_overview(root: str | Path) -> dict:
|
|
80
|
+
root = Path(root)
|
|
81
|
+
lang_files: Counter[str] = Counter()
|
|
82
|
+
lang_loc: Counter[str] = Counter()
|
|
83
|
+
total_files = 0
|
|
84
|
+
total_loc = 0
|
|
85
|
+
top_dirs: Counter[str] = Counter()
|
|
86
|
+
|
|
87
|
+
for f in iter_files(root):
|
|
88
|
+
total_files += 1
|
|
89
|
+
parts = rel(f, root).split("/")
|
|
90
|
+
if len(parts) > 1:
|
|
91
|
+
top_dirs[parts[0]] += 1
|
|
92
|
+
lang = LANG_BY_EXT.get(f.suffix.lower())
|
|
93
|
+
if lang in (None, "JSON", "YAML", "TOML", "Markdown"):
|
|
94
|
+
continue
|
|
95
|
+
loc = count_loc(read_text(f))
|
|
96
|
+
lang_files[lang] += 1
|
|
97
|
+
lang_loc[lang] += loc
|
|
98
|
+
total_loc += loc
|
|
99
|
+
|
|
100
|
+
ecosystems, frameworks = _scan_frameworks(root)
|
|
101
|
+
languages = [
|
|
102
|
+
{"language": lang, "files": lang_files[lang], "loc": lang_loc[lang]}
|
|
103
|
+
for lang, _ in lang_loc.most_common()
|
|
104
|
+
]
|
|
105
|
+
return {
|
|
106
|
+
"root": str(root),
|
|
107
|
+
"total_files": total_files,
|
|
108
|
+
"total_code_loc": total_loc,
|
|
109
|
+
"languages": languages,
|
|
110
|
+
"ecosystems": sorted(ecosystems),
|
|
111
|
+
"frameworks": frameworks,
|
|
112
|
+
"top_level_dirs": [d for d, _ in top_dirs.most_common(12)],
|
|
113
|
+
"entry_points": _find_entry_points(root),
|
|
114
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Filesystem walking utilities shared by the analysis modules."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterator
|
|
7
|
+
|
|
8
|
+
# Directories that never contain meaningful source for architecture analysis.
|
|
9
|
+
IGNORE_DIRS = {
|
|
10
|
+
".git", ".hg", ".svn", "node_modules", ".venv", "venv", "env",
|
|
11
|
+
"__pycache__", ".next", "out", "dist", "build", ".mypy_cache",
|
|
12
|
+
".pytest_cache", ".ruff_cache", "target", "coverage", ".turbo",
|
|
13
|
+
"vendor", ".cache", ".gradle", "bin", "obj", "site-packages",
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
LANG_BY_EXT = {
|
|
17
|
+
".py": "Python", ".js": "JavaScript", ".jsx": "JavaScript",
|
|
18
|
+
".ts": "TypeScript", ".tsx": "TypeScript", ".go": "Go",
|
|
19
|
+
".rs": "Rust", ".java": "Java", ".rb": "Ruby", ".php": "PHP",
|
|
20
|
+
".cs": "C#", ".cpp": "C++", ".cc": "C++", ".c": "C", ".h": "C/C++",
|
|
21
|
+
".kt": "Kotlin", ".swift": "Swift", ".scala": "Scala", ".sh": "Shell",
|
|
22
|
+
".css": "CSS", ".scss": "CSS", ".less": "CSS", ".html": "HTML",
|
|
23
|
+
".vue": "Vue", ".svelte": "Svelte", ".sql": "SQL", ".md": "Markdown",
|
|
24
|
+
".yml": "YAML", ".yaml": "YAML", ".json": "JSON", ".toml": "TOML",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
# Extensions we treat as "source code" for LOC / complexity purposes.
|
|
28
|
+
CODE_EXTS = {
|
|
29
|
+
".py", ".js", ".jsx", ".ts", ".tsx", ".go", ".rs", ".java", ".rb",
|
|
30
|
+
".php", ".cs", ".cpp", ".cc", ".c", ".h", ".kt", ".swift", ".scala",
|
|
31
|
+
".sh", ".vue", ".svelte",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def iter_files(root: Path, *, include_hidden: bool = False) -> Iterator[Path]:
|
|
36
|
+
"""Yield every file under ``root``, pruning vendored/build directories."""
|
|
37
|
+
root = Path(root)
|
|
38
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
39
|
+
dirnames[:] = [
|
|
40
|
+
d for d in dirnames
|
|
41
|
+
if d not in IGNORE_DIRS and (include_hidden or not d.startswith("."))
|
|
42
|
+
]
|
|
43
|
+
for name in filenames:
|
|
44
|
+
if not include_hidden and name.startswith("."):
|
|
45
|
+
continue
|
|
46
|
+
yield Path(dirpath) / name
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def read_text(path: Path) -> str:
|
|
50
|
+
"""Read a file as UTF-8, ignoring undecodable bytes; '' on failure."""
|
|
51
|
+
try:
|
|
52
|
+
return Path(path).read_text(encoding="utf-8", errors="ignore")
|
|
53
|
+
except (OSError, ValueError):
|
|
54
|
+
return ""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def count_loc(text: str) -> int:
|
|
58
|
+
"""Count non-blank lines."""
|
|
59
|
+
return sum(1 for line in text.splitlines() if line.strip())
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def rel(path: Path, root: Path) -> str:
|
|
63
|
+
"""POSIX-style path relative to root (stable across platforms)."""
|
|
64
|
+
try:
|
|
65
|
+
return Path(path).relative_to(root).as_posix()
|
|
66
|
+
except ValueError:
|
|
67
|
+
return Path(path).as_posix()
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""MCP server exposing codebase-architecture tools to any MCP client.
|
|
2
|
+
|
|
3
|
+
Run with: mcp-architect (stdio transport, for Claude Desktop / Cursor)
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from mcp.server.fastmcp import FastMCP
|
|
11
|
+
|
|
12
|
+
from .analysis import (
|
|
13
|
+
explain_module,
|
|
14
|
+
find_hotspots,
|
|
15
|
+
get_dependency_graph,
|
|
16
|
+
get_overview,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
mcp = FastMCP("mcp-architect")
|
|
20
|
+
|
|
21
|
+
# Optional: pin analysis to a fixed project so clients can omit `path`.
|
|
22
|
+
_ROOT_ENV = os.environ.get("MCP_ARCHITECT_ROOT")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _resolve(path: str) -> Path:
|
|
26
|
+
base = Path(_ROOT_ENV).expanduser() if _ROOT_ENV else Path.cwd()
|
|
27
|
+
p = (base / path).expanduser() if not os.path.isabs(path) else Path(path)
|
|
28
|
+
return p.resolve()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _bullets(items: list[str]) -> str:
|
|
32
|
+
return "\n".join(f"- {i}" for i in items) if items else "_none_"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@mcp.tool()
|
|
36
|
+
def architecture_overview(path: str = ".") -> str:
|
|
37
|
+
"""High-level map of a codebase: languages, frameworks, size, structure, and
|
|
38
|
+
entry points. Start here to understand an unfamiliar repo.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
path: Repo path to analyze. Relative to the server's working directory
|
|
42
|
+
(or MCP_ARCHITECT_ROOT if set). Defaults to the whole project.
|
|
43
|
+
"""
|
|
44
|
+
root = _resolve(path)
|
|
45
|
+
if not root.is_dir():
|
|
46
|
+
return f"❌ Not a directory: {root}"
|
|
47
|
+
d = get_overview(root)
|
|
48
|
+
langs = "\n".join(
|
|
49
|
+
f"- **{l['language']}** — {l['files']} files, {l['loc']:,} LOC"
|
|
50
|
+
for l in d["languages"][:8]
|
|
51
|
+
) or "_no source files detected_"
|
|
52
|
+
return (
|
|
53
|
+
f"# Architecture Overview — `{root.name}`\n\n"
|
|
54
|
+
f"**{d['total_files']:,} files · {d['total_code_loc']:,} lines of code**\n\n"
|
|
55
|
+
f"## Languages\n{langs}\n\n"
|
|
56
|
+
f"## Ecosystems\n{_bullets(d['ecosystems'])}\n\n"
|
|
57
|
+
f"## Frameworks / key libraries\n{_bullets(d['frameworks'])}\n\n"
|
|
58
|
+
f"## Top-level structure\n{_bullets(d['top_level_dirs'])}\n\n"
|
|
59
|
+
f"## Entry points\n{_bullets(d['entry_points'])}\n"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@mcp.tool()
|
|
64
|
+
def dependency_graph(path: str = ".", language: str = "auto") -> str:
|
|
65
|
+
"""Map how internal modules import each other, the most-depended-upon
|
|
66
|
+
modules, and any circular dependencies. Use to understand coupling.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
path: Repo path to analyze.
|
|
70
|
+
language: 'auto', 'python', or 'js'/'ts'.
|
|
71
|
+
"""
|
|
72
|
+
root = _resolve(path)
|
|
73
|
+
if not root.is_dir():
|
|
74
|
+
return f"❌ Not a directory: {root}"
|
|
75
|
+
d = get_dependency_graph(root, language)
|
|
76
|
+
hubs = "\n".join(
|
|
77
|
+
f"- `{m['module']}` — imported by {m['imported_by']} modules"
|
|
78
|
+
for m in d["most_depended_upon"]
|
|
79
|
+
) or "_none_"
|
|
80
|
+
cycles = (
|
|
81
|
+
"\n".join(f"- 🔁 {c}" for c in d["cycles"])
|
|
82
|
+
if d["cycles"] else "✅ _no circular dependencies found_"
|
|
83
|
+
)
|
|
84
|
+
return (
|
|
85
|
+
f"# Dependency Graph — `{root.name}`\n\n"
|
|
86
|
+
f"**{d['modules']} modules · {d['edges']} internal import edges**\n\n"
|
|
87
|
+
f"## Most depended-upon (architectural hubs)\n{hubs}\n\n"
|
|
88
|
+
f"## Circular dependencies\n{cycles}\n"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@mcp.tool()
|
|
93
|
+
def hotspots(path: str = ".", top: int = 10) -> str:
|
|
94
|
+
"""Find the files most worth attention: largest, most complex, most
|
|
95
|
+
frequently changed (git), and highest combined risk.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
path: Repo path to analyze.
|
|
99
|
+
top: How many files per category (default 10).
|
|
100
|
+
"""
|
|
101
|
+
root = _resolve(path)
|
|
102
|
+
if not root.is_dir():
|
|
103
|
+
return f"❌ Not a directory: {root}"
|
|
104
|
+
d = find_hotspots(root, top)
|
|
105
|
+
risk = "\n".join(
|
|
106
|
+
f"- `{r['file']}` — {r['loc']} LOC, complexity {r['complexity']}, "
|
|
107
|
+
f"{r['changes']} changes" for r in d["highest_risk"]
|
|
108
|
+
) or "_none_"
|
|
109
|
+
largest = "\n".join(f"- `{r['file']}` — {r['loc']} LOC" for r in d["largest"])
|
|
110
|
+
note = "" if d["git_history_available"] else (
|
|
111
|
+
"\n> ℹ️ No git history found, so change-frequency is unavailable.\n"
|
|
112
|
+
)
|
|
113
|
+
return (
|
|
114
|
+
f"# Hotspots — `{root.name}`\n\n"
|
|
115
|
+
f"_{d['files_analyzed']} source files analyzed._{note}\n"
|
|
116
|
+
f"## Highest risk (big + complex + churny)\n{risk}\n\n"
|
|
117
|
+
f"## Largest files\n{largest}\n"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@mcp.tool()
|
|
122
|
+
def explain(path: str = ".", module: str = ".") -> str:
|
|
123
|
+
"""Deep-dive a single folder or file: its files, public classes/functions,
|
|
124
|
+
and external dependencies.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
path: Repo root.
|
|
128
|
+
module: Sub-path within the repo (folder or file) to explain.
|
|
129
|
+
"""
|
|
130
|
+
root = _resolve(path)
|
|
131
|
+
if not root.exists():
|
|
132
|
+
return f"❌ Path not found: {root}"
|
|
133
|
+
d = explain_module(root, module)
|
|
134
|
+
if "error" in d:
|
|
135
|
+
return f"❌ {d['error']}"
|
|
136
|
+
details = "\n".join(
|
|
137
|
+
f"- `{f['file']}` ({f['loc']} LOC)"
|
|
138
|
+
+ (f" — classes: {', '.join(f['classes'])}" if f["classes"] else "")
|
|
139
|
+
+ (f" — functions: {', '.join(f['functions'][:8])}" if f["functions"] else "")
|
|
140
|
+
for f in d["file_details"]
|
|
141
|
+
) or "_no source files_"
|
|
142
|
+
return (
|
|
143
|
+
f"# Module — `{d['module']}`\n\n"
|
|
144
|
+
f"**{d['files']} files · {d['total_loc']:,} LOC**\n\n"
|
|
145
|
+
f"## External dependencies\n{_bullets(d['external_imports'][:25])}\n\n"
|
|
146
|
+
f"## Files & symbols\n{details}\n"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def main() -> None:
|
|
151
|
+
"""Console-script entry point (stdio transport)."""
|
|
152
|
+
mcp.run()
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
if __name__ == "__main__":
|
|
156
|
+
main()
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Tests for the pure analysis layer (no MCP dependency required)."""
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from mcp_architect.analysis import (
|
|
7
|
+
explain_module,
|
|
8
|
+
find_hotspots,
|
|
9
|
+
get_dependency_graph,
|
|
10
|
+
get_overview,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture
|
|
15
|
+
def sample(tmp_path: Path) -> Path:
|
|
16
|
+
"""A tiny multi-file Python project with a deliberate import + a cycle."""
|
|
17
|
+
pkg = tmp_path / "app"
|
|
18
|
+
pkg.mkdir()
|
|
19
|
+
(pkg / "__init__.py").write_text("")
|
|
20
|
+
(pkg / "core.py").write_text(
|
|
21
|
+
"import os\n\n"
|
|
22
|
+
"class Engine:\n"
|
|
23
|
+
" def run(self, x):\n"
|
|
24
|
+
" if x:\n"
|
|
25
|
+
" return 1\n"
|
|
26
|
+
" for i in range(x):\n"
|
|
27
|
+
" pass\n"
|
|
28
|
+
" return 0\n"
|
|
29
|
+
)
|
|
30
|
+
(pkg / "api.py").write_text(
|
|
31
|
+
"from app.core import Engine\n\n"
|
|
32
|
+
"def handler():\n"
|
|
33
|
+
" return Engine().run(1)\n"
|
|
34
|
+
)
|
|
35
|
+
(tmp_path / "main.py").write_text("from app.api import handler\n")
|
|
36
|
+
(tmp_path / "pyproject.toml").write_text(
|
|
37
|
+
'[project]\nname="x"\ndependencies=["fastapi"]\n'
|
|
38
|
+
)
|
|
39
|
+
return tmp_path
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_overview_detects_language_and_framework(sample: Path):
|
|
43
|
+
ov = get_overview(sample)
|
|
44
|
+
assert ov["total_files"] >= 4
|
|
45
|
+
langs = {l["language"] for l in ov["languages"]}
|
|
46
|
+
assert "Python" in langs
|
|
47
|
+
assert "FastAPI" in ov["frameworks"]
|
|
48
|
+
assert "main.py" in ov["entry_points"]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_dependency_graph_finds_internal_edges(sample: Path):
|
|
52
|
+
dg = get_dependency_graph(sample, "python")
|
|
53
|
+
assert dg["modules"] >= 3
|
|
54
|
+
assert dg["edges"] >= 2
|
|
55
|
+
hubs = {h["module"] for h in dg["most_depended_upon"]}
|
|
56
|
+
assert any("core" in h for h in hubs)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_hotspots_returns_largest(sample: Path):
|
|
60
|
+
hs = find_hotspots(sample, top=5)
|
|
61
|
+
assert hs["files_analyzed"] >= 3
|
|
62
|
+
assert hs["largest"]
|
|
63
|
+
assert hs["largest"][0]["loc"] > 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_explain_module(sample: Path):
|
|
67
|
+
em = explain_module(sample, "app")
|
|
68
|
+
assert em["files"] >= 2
|
|
69
|
+
classes = {c for f in em["file_details"] for c in f["classes"]}
|
|
70
|
+
assert "Engine" in classes
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_explain_missing_path(sample: Path):
|
|
74
|
+
assert "error" in explain_module(sample, "does/not/exist")
|