dependaman 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dependaman-1.0.0/LICENSE +21 -0
- dependaman-1.0.0/PKG-INFO +170 -0
- dependaman-1.0.0/README.md +156 -0
- dependaman-1.0.0/dependaman/__init__.py +3 -0
- dependaman-1.0.0/dependaman/__main__.py +20 -0
- dependaman-1.0.0/dependaman/analysis.py +75 -0
- dependaman-1.0.0/dependaman/core.py +62 -0
- dependaman-1.0.0/dependaman/discovery.py +105 -0
- dependaman-1.0.0/dependaman/git.py +120 -0
- dependaman-1.0.0/dependaman/graph.py +51 -0
- dependaman-1.0.0/dependaman/parser.py +121 -0
- dependaman-1.0.0/dependaman/pool.py +20 -0
- dependaman-1.0.0/dependaman/renderer.py +69 -0
- dependaman-1.0.0/dependaman.egg-info/PKG-INFO +170 -0
- dependaman-1.0.0/dependaman.egg-info/SOURCES.txt +19 -0
- dependaman-1.0.0/dependaman.egg-info/dependency_links.txt +1 -0
- dependaman-1.0.0/dependaman.egg-info/entry_points.txt +2 -0
- dependaman-1.0.0/dependaman.egg-info/top_level.txt +1 -0
- dependaman-1.0.0/pyproject.toml +31 -0
- dependaman-1.0.0/setup.cfg +4 -0
- dependaman-1.0.0/test/test_discovery.py +38 -0
dependaman-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jacobo Mateo Bedoya Oquendo, Dependaman contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dependaman
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Understand your Python dependencies. Find cycles, dead modules, and architecture problems.
|
|
5
|
+
Author-email: Jacobo Bedoya <jacobobedoya@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://codeberg.org/jacobitosuperstar/DependaMan
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.12
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
# DependaMan
|
|
16
|
+
|
|
17
|
+
Understand your Python project's internal structure. Find cycles, dead modules,
|
|
18
|
+
hotspots, and architecture problems — visualized as an interactive graph.
|
|
19
|
+
|
|
20
|
+
No external Python dependencies. Pure stdlib only.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Goal
|
|
25
|
+
|
|
26
|
+
Given a Python project directory, DependaMan produces an interactive HTML graph
|
|
27
|
+
showing:
|
|
28
|
+
|
|
29
|
+
- Which modules import which (directed dependency graph)
|
|
30
|
+
- Which modules are never imported (dead code candidates)
|
|
31
|
+
- Circular import chains
|
|
32
|
+
- Modules with high fan-in (many dependents) or high fan-out (many dependencies)
|
|
33
|
+
- Module size (lines of code, number of functions/classes)
|
|
34
|
+
- Git churn: how often each module changes
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Architecture
|
|
39
|
+
|
|
40
|
+
### Phase 1 — File Discovery
|
|
41
|
+
Walk the project directory, collect all `.py` files, and determine the package
|
|
42
|
+
root. Distinguish internal modules from external ones (stdlib + third-party are
|
|
43
|
+
ignored).
|
|
44
|
+
|
|
45
|
+
### Phase 2 — Import Parsing
|
|
46
|
+
Use `ast` to parse each file and extract `import` and `from ... import`
|
|
47
|
+
statements. Resolve relative imports. Filter to internal-only imports.
|
|
48
|
+
|
|
49
|
+
### Phase 3 — Graph Construction
|
|
50
|
+
Build a directed graph as an adjacency structure:
|
|
51
|
+
- Node = internal module
|
|
52
|
+
- Edge A → B = "module A imports module B"
|
|
53
|
+
|
|
54
|
+
Attach metadata to each node: file path, line count, function/class count.
|
|
55
|
+
|
|
56
|
+
### Phase 4 — Analysis
|
|
57
|
+
Run these passes on the graph:
|
|
58
|
+
|
|
59
|
+
- **Dead code**: nodes with no incoming edges and not an entry point
|
|
60
|
+
- **Circular imports**: detect cycles (DFS-based)
|
|
61
|
+
- **Hotspots**: nodes ranked by fan-in (most imported)
|
|
62
|
+
- **Coupling**: nodes ranked by fan-out (imports the most)
|
|
63
|
+
|
|
64
|
+
### Phase 5 — Git Integration
|
|
65
|
+
Use `subprocess` + `git log` to compute per-file:
|
|
66
|
+
- Commit frequency (how often it changes)
|
|
67
|
+
- Lines added/removed over time (churn)
|
|
68
|
+
- Last author that changed the file
|
|
69
|
+
|
|
70
|
+
Attach this data to graph nodes. Optional: skipped if the project is not a git
|
|
71
|
+
repo.
|
|
72
|
+
|
|
73
|
+
### Phase 6 — HTML Output
|
|
74
|
+
Generate a self-contained `.html` file (or return HTML as a string for web
|
|
75
|
+
integration).
|
|
76
|
+
|
|
77
|
+
The HTML template is a static string embedded in Python. Only the data changes
|
|
78
|
+
between runs. Python serializes the graph to JSON and injects it into the
|
|
79
|
+
template — no templating library needed:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import json
|
|
83
|
+
|
|
84
|
+
data = json.dumps({"nodes": [...], "edges": [...]})
|
|
85
|
+
html = TEMPLATE.replace("__GRAPH_DATA__", data)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
The template contains a `<script>` block that reads the injected data and
|
|
89
|
+
renders the graph using the browser's `<canvas>` or SVG API. No external JS
|
|
90
|
+
libraries required.
|
|
91
|
+
|
|
92
|
+
The graph supports:
|
|
93
|
+
- **Hover tooltips**: quick summary (import count, churn score)
|
|
94
|
+
- **Click modals**: full detail panel (git log, list of dependents/dependencies,
|
|
95
|
+
size metrics)
|
|
96
|
+
|
|
97
|
+
The output function signature is designed to be framework-agnostic:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
def render(graph, analysis) -> str: # returns HTML string
|
|
101
|
+
...
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
This makes it trivial to plug into FastAPI, Flask, or any other framework:
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
# FastAPI example
|
|
108
|
+
@app.get("/graph", response_class=HTMLResponse)
|
|
109
|
+
def dependency_graph():
|
|
110
|
+
graph = build_graph(".")
|
|
111
|
+
analysis = analyze(graph)
|
|
112
|
+
return render(graph, analysis)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Package Structure
|
|
118
|
+
|
|
119
|
+
DependaMan is distributed as a Python package (`dependaman`). The current layout:
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
dependaman/
|
|
123
|
+
__init__.py # public API: dependaman()
|
|
124
|
+
__main__.py # CLI entry point
|
|
125
|
+
core.py # orchestration
|
|
126
|
+
discovery.py # Phase 1 — file discovery
|
|
127
|
+
parser.py # Phase 2 — import parsing
|
|
128
|
+
graph.py # Phase 3 — graph construction
|
|
129
|
+
analysis.py # Phase 4 — analysis passes
|
|
130
|
+
git.py # Phase 5 — git integration
|
|
131
|
+
renderer.py # Phase 6 — HTML output
|
|
132
|
+
pool.py # GIL-aware executor selection
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Entry point via `pyproject.toml`:
|
|
136
|
+
```
|
|
137
|
+
[project.scripts]
|
|
138
|
+
dependaman = "dependaman.__main__:dependaman"
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Usage
|
|
142
|
+
|
|
143
|
+
**CLI:**
|
|
144
|
+
```bash
|
|
145
|
+
dependaman # analyzes current directory, opens browser
|
|
146
|
+
dependaman /path/to/project
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Python API:**
|
|
150
|
+
```python
|
|
151
|
+
from dependaman import dependaman
|
|
152
|
+
|
|
153
|
+
html = dependaman(".", in_memory=True) # returns HTML string
|
|
154
|
+
dependaman(".") # writes output.html + opens browser
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Roadmap
|
|
160
|
+
|
|
161
|
+
- [X] Phase 1: File discovery
|
|
162
|
+
- [X] Phase 2: Import parsing (`ast`)
|
|
163
|
+
- [X] Phase 3: Graph construction
|
|
164
|
+
- [X] Phase 4: Analysis (dead code, cycles, hotspots)
|
|
165
|
+
- [X] Phase 5: Git integration
|
|
166
|
+
- [X] Phase 6: HTML renderer
|
|
167
|
+
- [X] Phase 7: Unused symbol detection (functions, classes, methods never imported)
|
|
168
|
+
- [X] Phase 8: Installable package (`uv pip install -e .`)
|
|
169
|
+
- [X] Phase 9: CLI entry point with auto project root detection and browser open
|
|
170
|
+
- [X] Phase 10: Performance — parallel git stats (ThreadPoolExecutor), GIL-aware pool for parsing and graph construction
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# DependaMan
|
|
2
|
+
|
|
3
|
+
Understand your Python project's internal structure. Find cycles, dead modules,
|
|
4
|
+
hotspots, and architecture problems — visualized as an interactive graph.
|
|
5
|
+
|
|
6
|
+
No external Python dependencies. Pure stdlib only.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Goal
|
|
11
|
+
|
|
12
|
+
Given a Python project directory, DependaMan produces an interactive HTML graph
|
|
13
|
+
showing:
|
|
14
|
+
|
|
15
|
+
- Which modules import which (directed dependency graph)
|
|
16
|
+
- Which modules are never imported (dead code candidates)
|
|
17
|
+
- Circular import chains
|
|
18
|
+
- Modules with high fan-in (many dependents) or high fan-out (many dependencies)
|
|
19
|
+
- Module size (lines of code, number of functions/classes)
|
|
20
|
+
- Git churn: how often each module changes
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Architecture
|
|
25
|
+
|
|
26
|
+
### Phase 1 — File Discovery
|
|
27
|
+
Walk the project directory, collect all `.py` files, and determine the package
|
|
28
|
+
root. Distinguish internal modules from external ones (stdlib + third-party are
|
|
29
|
+
ignored).
|
|
30
|
+
|
|
31
|
+
### Phase 2 — Import Parsing
|
|
32
|
+
Use `ast` to parse each file and extract `import` and `from ... import`
|
|
33
|
+
statements. Resolve relative imports. Filter to internal-only imports.
|
|
34
|
+
|
|
35
|
+
### Phase 3 — Graph Construction
|
|
36
|
+
Build a directed graph as an adjacency structure:
|
|
37
|
+
- Node = internal module
|
|
38
|
+
- Edge A → B = "module A imports module B"
|
|
39
|
+
|
|
40
|
+
Attach metadata to each node: file path, line count, function/class count.
|
|
41
|
+
|
|
42
|
+
### Phase 4 — Analysis
|
|
43
|
+
Run these passes on the graph:
|
|
44
|
+
|
|
45
|
+
- **Dead code**: nodes with no incoming edges and not an entry point
|
|
46
|
+
- **Circular imports**: detect cycles (DFS-based)
|
|
47
|
+
- **Hotspots**: nodes ranked by fan-in (most imported)
|
|
48
|
+
- **Coupling**: nodes ranked by fan-out (imports the most)
|
|
49
|
+
|
|
50
|
+
### Phase 5 — Git Integration
|
|
51
|
+
Use `subprocess` + `git log` to compute per-file:
|
|
52
|
+
- Commit frequency (how often it changes)
|
|
53
|
+
- Lines added/removed over time (churn)
|
|
54
|
+
- Last author that changed the file
|
|
55
|
+
|
|
56
|
+
Attach this data to graph nodes. Optional: skipped if the project is not a git
|
|
57
|
+
repo.
|
|
58
|
+
|
|
59
|
+
### Phase 6 — HTML Output
|
|
60
|
+
Generate a self-contained `.html` file (or return HTML as a string for web
|
|
61
|
+
integration).
|
|
62
|
+
|
|
63
|
+
The HTML template is a static string embedded in Python. Only the data changes
|
|
64
|
+
between runs. Python serializes the graph to JSON and injects it into the
|
|
65
|
+
template — no templating library needed:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import json
|
|
69
|
+
|
|
70
|
+
data = json.dumps({"nodes": [...], "edges": [...]})
|
|
71
|
+
html = TEMPLATE.replace("__GRAPH_DATA__", data)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
The template contains a `<script>` block that reads the injected data and
|
|
75
|
+
renders the graph using the browser's `<canvas>` or SVG API. No external JS
|
|
76
|
+
libraries required.
|
|
77
|
+
|
|
78
|
+
The graph supports:
|
|
79
|
+
- **Hover tooltips**: quick summary (import count, churn score)
|
|
80
|
+
- **Click modals**: full detail panel (git log, list of dependents/dependencies,
|
|
81
|
+
size metrics)
|
|
82
|
+
|
|
83
|
+
The output function signature is designed to be framework-agnostic:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
def render(graph, analysis) -> str: # returns HTML string
|
|
87
|
+
...
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
This makes it trivial to plug into FastAPI, Flask, or any other framework:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
# FastAPI example
|
|
94
|
+
@app.get("/graph", response_class=HTMLResponse)
|
|
95
|
+
def dependency_graph():
|
|
96
|
+
graph = build_graph(".")
|
|
97
|
+
analysis = analyze(graph)
|
|
98
|
+
return render(graph, analysis)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## Package Structure
|
|
104
|
+
|
|
105
|
+
DependaMan is distributed as a Python package (`dependaman`). The current layout:
|
|
106
|
+
|
|
107
|
+
```
|
|
108
|
+
dependaman/
|
|
109
|
+
__init__.py # public API: dependaman()
|
|
110
|
+
__main__.py # CLI entry point
|
|
111
|
+
core.py # orchestration
|
|
112
|
+
discovery.py # Phase 1 — file discovery
|
|
113
|
+
parser.py # Phase 2 — import parsing
|
|
114
|
+
graph.py # Phase 3 — graph construction
|
|
115
|
+
analysis.py # Phase 4 — analysis passes
|
|
116
|
+
git.py # Phase 5 — git integration
|
|
117
|
+
renderer.py # Phase 6 — HTML output
|
|
118
|
+
pool.py # GIL-aware executor selection
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Entry point via `pyproject.toml`:
|
|
122
|
+
```
|
|
123
|
+
[project.scripts]
|
|
124
|
+
dependaman = "dependaman.__main__:dependaman"
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Usage
|
|
128
|
+
|
|
129
|
+
**CLI:**
|
|
130
|
+
```bash
|
|
131
|
+
dependaman # analyzes current directory, opens browser
|
|
132
|
+
dependaman /path/to/project
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**Python API:**
|
|
136
|
+
```python
|
|
137
|
+
from dependaman import dependaman
|
|
138
|
+
|
|
139
|
+
html = dependaman(".", in_memory=True) # returns HTML string
|
|
140
|
+
dependaman(".") # writes output.html + opens browser
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Roadmap
|
|
146
|
+
|
|
147
|
+
- [X] Phase 1: File discovery
|
|
148
|
+
- [X] Phase 2: Import parsing (`ast`)
|
|
149
|
+
- [X] Phase 3: Graph construction
|
|
150
|
+
- [X] Phase 4: Analysis (dead code, cycles, hotspots)
|
|
151
|
+
- [X] Phase 5: Git integration
|
|
152
|
+
- [X] Phase 6: HTML renderer
|
|
153
|
+
- [X] Phase 7: Unused symbol detection (functions, classes, methods never imported)
|
|
154
|
+
- [X] Phase 8: Installable package (`uv pip install -e .`)
|
|
155
|
+
- [X] Phase 9: CLI entry point with auto project root detection and browser open
|
|
156
|
+
- [X] Phase 10: Performance — parallel git stats (ThreadPoolExecutor), GIL-aware pool for parsing and graph construction
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""__main__.py
|
|
2
|
+
|
|
3
|
+
Orchestator. Here is where everything takes shape. In dependaman we build all
|
|
4
|
+
tools that are needed to make the code analysis, here we put them together so
|
|
5
|
+
with a simple command/function call we can execute the project analysis.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .core import dependaman
|
|
12
|
+
from .discovery import find_project_root
|
|
13
|
+
|
|
14
|
+
if __name__ == "__main__":
|
|
15
|
+
path: Path = (
|
|
16
|
+
Path(sys.argv[1]).resolve()
|
|
17
|
+
if len(sys.argv) > 1
|
|
18
|
+
else find_project_root(Path.cwd())
|
|
19
|
+
)
|
|
20
|
+
dependaman(path)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""analysis.py"""
|
|
2
|
+
|
|
3
|
+
from .discovery import Module
|
|
4
|
+
from .parser import definition_collector
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def fanin_analyzer(
|
|
8
|
+
project_nodes: set[str],
|
|
9
|
+
graph: dict[str, set[str]],
|
|
10
|
+
) -> dict[str, int]:
|
|
11
|
+
"""Checks how many times each module is being imported."""
|
|
12
|
+
fanin = dict.fromkeys(project_nodes, 0)
|
|
13
|
+
for _, value in graph.items():
|
|
14
|
+
for edge in value:
|
|
15
|
+
fanin[edge] += 1
|
|
16
|
+
return fanin
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def fanout_analyzer(graph: dict[str, set[str]]) -> dict[str, int]:
|
|
20
|
+
"""Checks how many imports each module has."""
|
|
21
|
+
fanout = dict.fromkeys(graph.keys(), 0)
|
|
22
|
+
for key, value in graph.items():
|
|
23
|
+
fanout[key] += len(value)
|
|
24
|
+
return fanout
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _dfs(
|
|
28
|
+
key: str,
|
|
29
|
+
graph: dict[str, set[str]],
|
|
30
|
+
current_path: list[str],
|
|
31
|
+
visited_nodes: set[str],
|
|
32
|
+
) -> list[str] | None:
|
|
33
|
+
"""Walking the directed graph of dependencies."""
|
|
34
|
+
dfs = []
|
|
35
|
+
current_path.append(key)
|
|
36
|
+
|
|
37
|
+
for neighbor in graph[key]:
|
|
38
|
+
if neighbor in current_path:
|
|
39
|
+
dfs = current_path[current_path.index(neighbor) :].copy()
|
|
40
|
+
dfs.append(neighbor)
|
|
41
|
+
break
|
|
42
|
+
if neighbor not in visited_nodes and neighbor in graph:
|
|
43
|
+
dfs = _dfs(neighbor, graph, current_path, visited_nodes)
|
|
44
|
+
if dfs:
|
|
45
|
+
break
|
|
46
|
+
current_path.pop(current_path.index(key))
|
|
47
|
+
visited_nodes.add(key)
|
|
48
|
+
return dfs
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def circular_imports(graph: dict[str, set[str]]) -> list[list[str]]:
|
|
52
|
+
"""Checks if there are any circular imports that we should worry about."""
|
|
53
|
+
visited_nodes = set()
|
|
54
|
+
circular_imports = []
|
|
55
|
+
for key in graph.keys():
|
|
56
|
+
current_path = []
|
|
57
|
+
if key in visited_nodes:
|
|
58
|
+
continue
|
|
59
|
+
eval = _dfs(key, graph, current_path, visited_nodes)
|
|
60
|
+
if eval:
|
|
61
|
+
circular_imports.append(eval)
|
|
62
|
+
return circular_imports
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def symbols_to_review_detector(
|
|
66
|
+
module: Module, project_import_strings: set[str]
|
|
67
|
+
) -> set[str]:
|
|
68
|
+
"""we get all the import strings that are in the project and match them
|
|
69
|
+
with all the function and class definitions that are in a module, we return
|
|
70
|
+
the defined functions/classes in the module that are not imported anywhere.
|
|
71
|
+
"""
|
|
72
|
+
definitions: set[str] = set()
|
|
73
|
+
definitions.update(definition_collector(module))
|
|
74
|
+
symbols_to_review: set[str] = definitions - project_import_strings
|
|
75
|
+
return symbols_to_review
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""core.py
|
|
2
|
+
|
|
3
|
+
Orchestator. Here is where everything takes shape. In dependaman we build all
|
|
4
|
+
tools that are needed to make the code analysis, here we put them together so
|
|
5
|
+
with a simple command/function call we can execute the project analysis.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import webbrowser
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .analysis import (
|
|
12
|
+
circular_imports,
|
|
13
|
+
fanin_analyzer,
|
|
14
|
+
fanout_analyzer,
|
|
15
|
+
)
|
|
16
|
+
from .discovery import discover, module_level_node_generator
|
|
17
|
+
from .git import git_commit_freq
|
|
18
|
+
from .graph import speed_grapher
|
|
19
|
+
from .parser import project_import_strings
|
|
20
|
+
from .renderer import data_constructor, render
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def dependaman(path=".", in_memory=False) -> str | None:
|
|
24
|
+
"""Given the path of the project found, the folder structure and nodes of
|
|
25
|
+
the project are generated. Then the different files are being parsed and
|
|
26
|
+
from them we get the import nodes of each one of them. After both nodes are
|
|
27
|
+
created, we match an see for each module, which other modules are imported
|
|
28
|
+
to them.
|
|
29
|
+
|
|
30
|
+
With that information we can analyze which modules are the ones that import
|
|
31
|
+
the most, which are the ones that are imported the most and if there are
|
|
32
|
+
circular imports detect them.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
file_source = discover(path)
|
|
36
|
+
project_nodes: set[str] = module_level_node_generator(file_source)
|
|
37
|
+
project_imports: set[str] = project_import_strings(file_source)
|
|
38
|
+
directed_graph = speed_grapher(file_source, project_nodes)
|
|
39
|
+
fanin_analysis = fanin_analyzer(project_nodes, directed_graph)
|
|
40
|
+
fanout_analysis = fanout_analyzer(directed_graph)
|
|
41
|
+
circular_imports_analysis = circular_imports(directed_graph)
|
|
42
|
+
git_freq = git_commit_freq(file_source, project_nodes)
|
|
43
|
+
|
|
44
|
+
data = data_constructor(
|
|
45
|
+
file_source,
|
|
46
|
+
project_nodes,
|
|
47
|
+
directed_graph,
|
|
48
|
+
circular_imports_analysis,
|
|
49
|
+
fanin_analysis,
|
|
50
|
+
fanout_analysis,
|
|
51
|
+
git_freq,
|
|
52
|
+
project_imports,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
rendered_data: str = render(data)
|
|
56
|
+
|
|
57
|
+
if in_memory:
|
|
58
|
+
return rendered_data
|
|
59
|
+
|
|
60
|
+
with open("output.html", "w") as f:
|
|
61
|
+
f.write(rendered_data)
|
|
62
|
+
webbrowser.open(f"file://{Path('output.html').resolve()}")
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""discovery.py"""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
EXCLUDED_DIRS: tuple = (
|
|
7
|
+
".venv",
|
|
8
|
+
"__pycache__",
|
|
9
|
+
".git",
|
|
10
|
+
"build",
|
|
11
|
+
"dist",
|
|
12
|
+
".eggs",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
PROJECT_MARKERS = (
|
|
16
|
+
"pyproject.toml",
|
|
17
|
+
"setup.py",
|
|
18
|
+
"requirements.txt",
|
|
19
|
+
"setup.cfg",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def find_project_root(start: Path) -> Path:
|
|
24
|
+
"""We walk up the directory from start and try to find any of the files
|
|
25
|
+
that are in the PROJECT_MARKERS. When we find one, we found the origin
|
|
26
|
+
folder of the project.
|
|
27
|
+
"""
|
|
28
|
+
for parent in [start, *start.parents]:
|
|
29
|
+
if any((parent / marker).exists() for marker in PROJECT_MARKERS):
|
|
30
|
+
return parent
|
|
31
|
+
return start
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(slots=True)
|
|
35
|
+
class Module:
|
|
36
|
+
name: str
|
|
37
|
+
path: Path
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(slots=True)
|
|
41
|
+
class Package:
|
|
42
|
+
name: str
|
|
43
|
+
modules: list[Module]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def discover(root: str | Path) -> list[Package]:
|
|
47
|
+
"""Walking throught the root directory matching all the *.py files and
|
|
48
|
+
removing all the files that may be in the excluded directories defined at
|
|
49
|
+
the package level.
|
|
50
|
+
"""
|
|
51
|
+
root = Path(root).resolve()
|
|
52
|
+
|
|
53
|
+
packages: list[Package] = []
|
|
54
|
+
|
|
55
|
+
# Collecting root-level standalone scripts if there is not an init file in
|
|
56
|
+
# the root folder
|
|
57
|
+
if not (root / "__init__.py").exists():
|
|
58
|
+
root_modules: list[Module] = []
|
|
59
|
+
for file in root.iterdir():
|
|
60
|
+
if file.suffix == ".py" and file.name != "__init__.py":
|
|
61
|
+
root_modules.append(Module(file.stem, file.relative_to(root)))
|
|
62
|
+
|
|
63
|
+
if root_modules:
|
|
64
|
+
packages.append(Package(".", root_modules))
|
|
65
|
+
|
|
66
|
+
for path in root.rglob("__init__.py"):
|
|
67
|
+
# skipping the folders that are in the excluded directoreis list.
|
|
68
|
+
if any(part in EXCLUDED_DIRS for part in path.parts):
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
package_dir: Path = path.parent
|
|
72
|
+
relative_path: Path = package_dir.relative_to(root)
|
|
73
|
+
package_name = ".".join(relative_path.parts)
|
|
74
|
+
|
|
75
|
+
# Iterating over the contents of each of the found directories.
|
|
76
|
+
modules: list[Module] = []
|
|
77
|
+
for file in package_dir.iterdir():
|
|
78
|
+
if file.suffix == ".py" and file.name != "__init__.py":
|
|
79
|
+
modules.append(Module(file.stem, file.relative_to(root)))
|
|
80
|
+
|
|
81
|
+
packages.append(Package(package_name, modules))
|
|
82
|
+
|
|
83
|
+
return packages
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def node_name_gen(
|
|
87
|
+
module: Module,
|
|
88
|
+
) -> str:
|
|
89
|
+
"""Creates the node string of a module."""
|
|
90
|
+
node: list[str] = list(module.path.parts)
|
|
91
|
+
node.pop(-1)
|
|
92
|
+
node.append(module.name)
|
|
93
|
+
return ".".join(node)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def module_level_node_generator(packages: list[Package]) -> set[str]:
|
|
97
|
+
"""Creating the set of nodes that we will use to filter out the internal
|
|
98
|
+
packages.
|
|
99
|
+
"""
|
|
100
|
+
module_level_nodes = set()
|
|
101
|
+
for package in packages:
|
|
102
|
+
module_level_nodes.add(package.name)
|
|
103
|
+
for module in package.modules:
|
|
104
|
+
module_level_nodes.add(node_name_gen(module))
|
|
105
|
+
return module_level_nodes
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""git.py"""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
+
from typing import TypedDict
|
|
7
|
+
|
|
8
|
+
from .discovery import Module, Package, node_name_gen
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def check_git_project() -> bool:
|
|
12
|
+
"""Checks if we are in a directory with git."""
|
|
13
|
+
try:
|
|
14
|
+
result = subprocess.run(
|
|
15
|
+
["git", "rev-parse", "--is-inside-work-tree"],
|
|
16
|
+
capture_output=True,
|
|
17
|
+
text=True,
|
|
18
|
+
check=True,
|
|
19
|
+
)
|
|
20
|
+
return result.stdout.strip() == "true"
|
|
21
|
+
except subprocess.CalledProcessError:
|
|
22
|
+
return False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# TODO: the main issue is that we are doing a subprocess call per each file
|
|
26
|
+
# that is in the project. Because we are waiting for IO, we could throw all of
|
|
27
|
+
# this to a threadpool or a coroutine to make all of this in parallel, as doing
|
|
28
|
+
# the complete `git log` would require a full parse change and we would also
|
|
29
|
+
# need a way to get the last person that worked on that node, which would
|
|
30
|
+
# complicate the process greatly.
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class GitStats(TypedDict):
|
|
34
|
+
node: str
|
|
35
|
+
n_commits: int
|
|
36
|
+
added_lines: int
|
|
37
|
+
removed_lines: int
|
|
38
|
+
last_author: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_git_stats(
|
|
42
|
+
node: str,
|
|
43
|
+
module: Module,
|
|
44
|
+
) -> GitStats:
|
|
45
|
+
"""returns the git stats for each module, which compose for our case,
|
|
46
|
+
number of commits, added lines, removed lines, last author that touched
|
|
47
|
+
that module.
|
|
48
|
+
"""
|
|
49
|
+
git_freq = subprocess.run(
|
|
50
|
+
[
|
|
51
|
+
"git",
|
|
52
|
+
"log",
|
|
53
|
+
"--numstat",
|
|
54
|
+
"--pretty=format:",
|
|
55
|
+
"--",
|
|
56
|
+
f"{module.path}",
|
|
57
|
+
],
|
|
58
|
+
check=True,
|
|
59
|
+
capture_output=True,
|
|
60
|
+
text=True,
|
|
61
|
+
)
|
|
62
|
+
raw_commits = git_freq.stdout.strip().split()
|
|
63
|
+
# here we are taking the first two values of the iteration even
|
|
64
|
+
# thought we are jumping them in range of 3
|
|
65
|
+
commits = [raw_commits[i : i + 2] for i in range(0, len(raw_commits), 3)]
|
|
66
|
+
added_lines = sum([int(commit[0]) for commit in commits])
|
|
67
|
+
removed_lines = sum([int(commit[1]) for commit in commits])
|
|
68
|
+
n_commits = len(commits)
|
|
69
|
+
git_last_author = subprocess.run(
|
|
70
|
+
[
|
|
71
|
+
"git",
|
|
72
|
+
"log",
|
|
73
|
+
"-1",
|
|
74
|
+
"--pretty=format:%ae",
|
|
75
|
+
"--",
|
|
76
|
+
f"{module.path}",
|
|
77
|
+
],
|
|
78
|
+
check=True,
|
|
79
|
+
capture_output=True,
|
|
80
|
+
text=True,
|
|
81
|
+
)
|
|
82
|
+
git_stats = GitStats(
|
|
83
|
+
node=node,
|
|
84
|
+
n_commits=n_commits,
|
|
85
|
+
added_lines=added_lines,
|
|
86
|
+
removed_lines=removed_lines,
|
|
87
|
+
last_author=git_last_author.stdout.strip(),
|
|
88
|
+
)
|
|
89
|
+
return git_stats
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def git_commit_freq(
|
|
93
|
+
file_source: list[Package],
|
|
94
|
+
project_nodes: set[str],
|
|
95
|
+
) -> defaultdict[str, dict[str, int | str]]:
|
|
96
|
+
"""Checks the amount of commits that each node has."""
|
|
97
|
+
if not check_git_project():
|
|
98
|
+
return defaultdict(dict)
|
|
99
|
+
|
|
100
|
+
freq = defaultdict(dict)
|
|
101
|
+
|
|
102
|
+
# DROWN ME IN A POOL BROTHER!!!!!!!
|
|
103
|
+
with ThreadPoolExecutor() as pool:
|
|
104
|
+
futures = []
|
|
105
|
+
for package in file_source:
|
|
106
|
+
for module in package.modules:
|
|
107
|
+
node: str = node_name_gen(module)
|
|
108
|
+
|
|
109
|
+
if node not in project_nodes:
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
# send me to hell
|
|
113
|
+
futures.append(pool.submit(_get_git_stats, node, module))
|
|
114
|
+
|
|
115
|
+
# the party is over, pick me up when you are done
|
|
116
|
+
for future in as_completed(futures):
|
|
117
|
+
results: GitStats = future.result()
|
|
118
|
+
r_node = results.pop("node")
|
|
119
|
+
freq[r_node] = results
|
|
120
|
+
return freq
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""graph.py"""
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from concurrent.futures import Future, as_completed
|
|
5
|
+
|
|
6
|
+
from .discovery import Module, Package, node_name_gen
|
|
7
|
+
from .parser import parse
|
|
8
|
+
from .pool import MIN_FILES_FOR_POOL, executor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _parse_module_edges(
|
|
12
|
+
module: Module, project_nodes: set[str]
|
|
13
|
+
) -> tuple[str, set[str]]:
|
|
14
|
+
"""module edges creator."""
|
|
15
|
+
return node_name_gen(module), {e for e in parse(module) if e in project_nodes}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# NOTE: DROWN ME BABY!!!, another pool optimization.
|
|
19
|
+
def speed_grapher(
|
|
20
|
+
file_source: list[Package],
|
|
21
|
+
project_nodes: set[str],
|
|
22
|
+
) -> dict[str, set]:
|
|
23
|
+
"""Given the file source we create the directed graph of dependencies for
|
|
24
|
+
each one of the nodes.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# creating a default dict that creates an empty set on missing keys.
|
|
28
|
+
directed_graph = defaultdict(set)
|
|
29
|
+
|
|
30
|
+
modules: list[Module] = [
|
|
31
|
+
module for package in file_source for module in package.modules
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
# looking at the pool from afar
|
|
35
|
+
if len(modules) < MIN_FILES_FOR_POOL:
|
|
36
|
+
for module in modules:
|
|
37
|
+
node, edge = _parse_module_edges(module, project_nodes)
|
|
38
|
+
directed_graph[node] = edge
|
|
39
|
+
return directed_graph
|
|
40
|
+
|
|
41
|
+
# drowned!!
|
|
42
|
+
with executor() as pool:
|
|
43
|
+
futures: list[Future[tuple[str, set[str]]]] = [
|
|
44
|
+
pool.submit(_parse_module_edges, module, project_nodes)
|
|
45
|
+
for module in modules
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
for future in as_completed(futures):
|
|
49
|
+
node, edge = future.result()
|
|
50
|
+
directed_graph[node] = edge
|
|
51
|
+
return directed_graph
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""parser.py"""
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
from concurrent.futures import Future, as_completed
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from .discovery import Module, Package, node_name_gen
|
|
8
|
+
from .pool import MIN_FILES_FOR_POOL, executor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(slots=True)
|
|
12
|
+
class DpdmanImport:
|
|
13
|
+
module: str | None
|
|
14
|
+
names: list[str]
|
|
15
|
+
level: int
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def node_generator(
|
|
19
|
+
module: Module,
|
|
20
|
+
dpdman_import: DpdmanImport,
|
|
21
|
+
) -> set[str]:
|
|
22
|
+
"""With the DpdmanImport corresponding to a module, we can create the
|
|
23
|
+
import string nodes corresponding to it.
|
|
24
|
+
"""
|
|
25
|
+
import_strings: set[str] = set()
|
|
26
|
+
parents = []
|
|
27
|
+
if dpdman_import.level != 0:
|
|
28
|
+
module_path = list(module.path.parts)
|
|
29
|
+
module_path.pop(-1)
|
|
30
|
+
n = len(module_path)
|
|
31
|
+
parents = module_path[: (n - (dpdman_import.level - 1))]
|
|
32
|
+
if dpdman_import.module:
|
|
33
|
+
parents.append(dpdman_import.module)
|
|
34
|
+
import_str = ".".join(parents)
|
|
35
|
+
|
|
36
|
+
# NOTE: Because we are in the module level, we are taking the functions and
|
|
37
|
+
# classes too, which is good for the future. So for now, we will add also
|
|
38
|
+
# the initial parents concatenation to the list, as we cannot differenciate
|
|
39
|
+
# sym names from modules.
|
|
40
|
+
# if not dpdman_import.names:
|
|
41
|
+
# import_strings.append(import_str)
|
|
42
|
+
# return import_strings
|
|
43
|
+
import_strings.add(import_str)
|
|
44
|
+
|
|
45
|
+
for name in dpdman_import.names:
|
|
46
|
+
import_strings.add(f"{import_str}.{name}")
|
|
47
|
+
return import_strings
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def parse(module: Module) -> set[str]:
|
|
51
|
+
"""Analizes the imports a Module and returns its corresponding list of
|
|
52
|
+
import string nodes.
|
|
53
|
+
"""
|
|
54
|
+
text_source = module.path.read_text(encoding="utf-8")
|
|
55
|
+
tree = ast.parse(text_source)
|
|
56
|
+
|
|
57
|
+
# Creating DpdmanImport
|
|
58
|
+
import_strings: set[str] = set()
|
|
59
|
+
for node in ast.walk(tree):
|
|
60
|
+
if isinstance(node, ast.Import):
|
|
61
|
+
for imported_package in node.names:
|
|
62
|
+
dpdman_import = DpdmanImport(
|
|
63
|
+
module=imported_package.name,
|
|
64
|
+
names=[],
|
|
65
|
+
level=0,
|
|
66
|
+
)
|
|
67
|
+
import_strings.update(node_generator(module, dpdman_import))
|
|
68
|
+
elif isinstance(node, ast.ImportFrom):
|
|
69
|
+
dpdman_import = DpdmanImport(
|
|
70
|
+
module=node.module,
|
|
71
|
+
names=[alias.name for alias in node.names],
|
|
72
|
+
level=node.level,
|
|
73
|
+
)
|
|
74
|
+
import_strings.update(node_generator(module, dpdman_import))
|
|
75
|
+
return import_strings
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# NOTE: The project needs to be large for this optimization to take place, else
|
|
79
|
+
# the memory of creating a new process/thread pool and their shutting down will
|
|
80
|
+
# take away the gains.
|
|
81
|
+
def project_import_strings(file_source: list[Package]) -> set[str]:
|
|
82
|
+
"""Returns all the import nodes of the project."""
|
|
83
|
+
project_imports = set()
|
|
84
|
+
|
|
85
|
+
modules: list[Module] = [
|
|
86
|
+
module for package in file_source for module in package.modules
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
# Be vigilant of how many files are the ones needed for the sweet spot.
|
|
90
|
+
if len(modules) < MIN_FILES_FOR_POOL:
|
|
91
|
+
for module in modules:
|
|
92
|
+
project_imports.update(parse(module))
|
|
93
|
+
return project_imports
|
|
94
|
+
|
|
95
|
+
with executor() as pool:
|
|
96
|
+
futures: list[Future[set[str]]] = [
|
|
97
|
+
pool.submit(parse, module) for module in modules
|
|
98
|
+
]
|
|
99
|
+
for future in as_completed(futures):
|
|
100
|
+
project_imports.update(future.result())
|
|
101
|
+
return project_imports
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def definition_collector(module: Module) -> set[str]:
|
|
105
|
+
"""Getting all the functions and class definitions that are in each of the
|
|
106
|
+
modules.
|
|
107
|
+
"""
|
|
108
|
+
text_source = module.path.read_text(encoding="utf-8")
|
|
109
|
+
tree = ast.parse(text_source)
|
|
110
|
+
|
|
111
|
+
# Creating the functions and classes definitions
|
|
112
|
+
definitions: set[str] = set()
|
|
113
|
+
|
|
114
|
+
# creating the node string
|
|
115
|
+
module_name = node_name_gen(module)
|
|
116
|
+
# we will only walk the top level nodes, not all the code.
|
|
117
|
+
for node in ast.iter_child_nodes(tree):
|
|
118
|
+
if isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef)):
|
|
119
|
+
definition = f"{module_name}.{node.name}"
|
|
120
|
+
definitions.add(definition)
|
|
121
|
+
return definitions
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""pool.py
|
|
2
|
+
|
|
3
|
+
Drown me baby!!
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import sys
|
|
7
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
gil_enabled: bool = sys._is_gil_enabled()
|
|
11
|
+
except AttributeError:
|
|
12
|
+
gil_enabled = True
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
MIN_FILES_FOR_POOL: int = 100
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
executor: type[ProcessPoolExecutor | ThreadPoolExecutor] = (
|
|
19
|
+
ProcessPoolExecutor if gil_enabled else ThreadPoolExecutor
|
|
20
|
+
)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""renderer.py"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .analysis import symbols_to_review_detector
|
|
7
|
+
from .discovery import Package, node_name_gen
|
|
8
|
+
|
|
9
|
+
CURRENT_FOLDER_LOCATION: Path = Path(__file__).parent
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def data_constructor(
|
|
13
|
+
file_source: list[Package],
|
|
14
|
+
project_nodes: set[str],
|
|
15
|
+
directed_graph: dict[str, set],
|
|
16
|
+
cycles: list[list[str]],
|
|
17
|
+
fanin: dict[str, int],
|
|
18
|
+
fanout: dict[str, int],
|
|
19
|
+
git_frequency: dict[str, dict[str, int | str]],
|
|
20
|
+
project_imports: set[str],
|
|
21
|
+
) -> dict:
|
|
22
|
+
"""Returns the nodes with all the information"""
|
|
23
|
+
|
|
24
|
+
data: dict = {}
|
|
25
|
+
|
|
26
|
+
data["packages"] = []
|
|
27
|
+
data["cycles"] = cycles
|
|
28
|
+
|
|
29
|
+
for package in file_source:
|
|
30
|
+
package_info = {"id": package.name, "modules": []}
|
|
31
|
+
for module in package.modules:
|
|
32
|
+
node: str = node_name_gen(module)
|
|
33
|
+
|
|
34
|
+
if node not in project_nodes:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
git_info = git_frequency.get(node, {})
|
|
38
|
+
package_info["modules"].append(
|
|
39
|
+
{
|
|
40
|
+
"id": node,
|
|
41
|
+
"edges": list(directed_graph.get(node, set())),
|
|
42
|
+
"fan_in": fanin.get(node, 0),
|
|
43
|
+
"fan_out": fanout.get(node, 0),
|
|
44
|
+
"n_commits": git_info.get("n_commits", 0),
|
|
45
|
+
"added_lines": git_info.get("added_lines", 0),
|
|
46
|
+
"removed_lines": git_info.get("removed_lines", 0),
|
|
47
|
+
"last_author": git_info.get("last_author", ""),
|
|
48
|
+
"symbols_to_review": list(
|
|
49
|
+
symbols_to_review_detector(module, project_imports)
|
|
50
|
+
),
|
|
51
|
+
}
|
|
52
|
+
)
|
|
53
|
+
data["packages"].append(package_info)
|
|
54
|
+
|
|
55
|
+
return data
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def render(data: dict) -> str:
|
|
59
|
+
"""Creates a HTML string"""
|
|
60
|
+
json_data: str = json.dumps(data)
|
|
61
|
+
html_path = f"{CURRENT_FOLDER_LOCATION}/dependaman_template.html"
|
|
62
|
+
js_path = f"{CURRENT_FOLDER_LOCATION}/dependaman_graph.js"
|
|
63
|
+
with open(html_path) as html:
|
|
64
|
+
with open(js_path) as js:
|
|
65
|
+
js_data = js.read()
|
|
66
|
+
html_data = html.read()
|
|
67
|
+
html_data = html_data.replace("__GRAPH_DATA__", json_data)
|
|
68
|
+
html_data = html_data.replace("__GRAPH_JS__", f"<script>{js_data}</script>")
|
|
69
|
+
return html_data
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dependaman
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Understand your Python dependencies. Find cycles, dead modules, and architecture problems.
|
|
5
|
+
Author-email: Jacobo Bedoya <jacobobedoya@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://codeberg.org/jacobitosuperstar/DependaMan
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.12
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
# DependaMan
|
|
16
|
+
|
|
17
|
+
Understand your Python project's internal structure. Find cycles, dead modules,
|
|
18
|
+
hotspots, and architecture problems — visualized as an interactive graph.
|
|
19
|
+
|
|
20
|
+
No external Python dependencies. Pure stdlib only.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Goal
|
|
25
|
+
|
|
26
|
+
Given a Python project directory, DependaMan produces an interactive HTML graph
|
|
27
|
+
showing:
|
|
28
|
+
|
|
29
|
+
- Which modules import which (directed dependency graph)
|
|
30
|
+
- Which modules are never imported (dead code candidates)
|
|
31
|
+
- Circular import chains
|
|
32
|
+
- Modules with high fan-in (many dependents) or high fan-out (many dependencies)
|
|
33
|
+
- Module size (lines of code, number of functions/classes)
|
|
34
|
+
- Git churn: how often each module changes
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Architecture
|
|
39
|
+
|
|
40
|
+
### Phase 1 — File Discovery
|
|
41
|
+
Walk the project directory, collect all `.py` files, and determine the package
|
|
42
|
+
root. Distinguish internal modules from external ones (stdlib + third-party are
|
|
43
|
+
ignored).
|
|
44
|
+
|
|
45
|
+
### Phase 2 — Import Parsing
|
|
46
|
+
Use `ast` to parse each file and extract `import` and `from ... import`
|
|
47
|
+
statements. Resolve relative imports. Filter to internal-only imports.
|
|
48
|
+
|
|
49
|
+
### Phase 3 — Graph Construction
|
|
50
|
+
Build a directed graph as an adjacency structure:
|
|
51
|
+
- Node = internal module
|
|
52
|
+
- Edge A → B = "module A imports module B"
|
|
53
|
+
|
|
54
|
+
Attach metadata to each node: file path, line count, function/class count.
|
|
55
|
+
|
|
56
|
+
### Phase 4 — Analysis
|
|
57
|
+
Run these passes on the graph:
|
|
58
|
+
|
|
59
|
+
- **Dead code**: nodes with no incoming edges and not an entry point
|
|
60
|
+
- **Circular imports**: detect cycles (DFS-based)
|
|
61
|
+
- **Hotspots**: nodes ranked by fan-in (most imported)
|
|
62
|
+
- **Coupling**: nodes ranked by fan-out (imports the most)
|
|
63
|
+
|
|
64
|
+
### Phase 5 — Git Integration
|
|
65
|
+
Use `subprocess` + `git log` to compute per-file:
|
|
66
|
+
- Commit frequency (how often it changes)
|
|
67
|
+
- Lines added/removed over time (churn)
|
|
68
|
+
- Last author that changed the file
|
|
69
|
+
|
|
70
|
+
Attach this data to graph nodes. Optional: skipped if the project is not a git
|
|
71
|
+
repo.
|
|
72
|
+
|
|
73
|
+
### Phase 6 — HTML Output
|
|
74
|
+
Generate a self-contained `.html` file (or return HTML as a string for web
|
|
75
|
+
integration).
|
|
76
|
+
|
|
77
|
+
The HTML template is a static string embedded in Python. Only the data changes
|
|
78
|
+
between runs. Python serializes the graph to JSON and injects it into the
|
|
79
|
+
template — no templating library needed:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import json
|
|
83
|
+
|
|
84
|
+
data = json.dumps({"nodes": [...], "edges": [...]})
|
|
85
|
+
html = TEMPLATE.replace("__GRAPH_DATA__", data)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
The template contains a `<script>` block that reads the injected data and
|
|
89
|
+
renders the graph using the browser's `<canvas>` or SVG API. No external JS
|
|
90
|
+
libraries required.
|
|
91
|
+
|
|
92
|
+
The graph supports:
|
|
93
|
+
- **Hover tooltips**: quick summary (import count, churn score)
|
|
94
|
+
- **Click modals**: full detail panel (git log, list of dependents/dependencies,
|
|
95
|
+
size metrics)
|
|
96
|
+
|
|
97
|
+
The output function signature is designed to be framework-agnostic:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
def render(graph, analysis) -> str: # returns HTML string
|
|
101
|
+
...
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
This makes it trivial to plug into FastAPI, Flask, or any other framework:
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
# FastAPI example
|
|
108
|
+
@app.get("/graph", response_class=HTMLResponse)
|
|
109
|
+
def dependency_graph():
|
|
110
|
+
graph = build_graph(".")
|
|
111
|
+
analysis = analyze(graph)
|
|
112
|
+
return render(graph, analysis)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Package Structure
|
|
118
|
+
|
|
119
|
+
DependaMan is distributed as a Python package (`dependaman`). The current layout:
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
dependaman/
|
|
123
|
+
__init__.py # public API: dependaman()
|
|
124
|
+
__main__.py # CLI entry point
|
|
125
|
+
core.py # orchestration
|
|
126
|
+
discovery.py # Phase 1 — file discovery
|
|
127
|
+
parser.py # Phase 2 — import parsing
|
|
128
|
+
graph.py # Phase 3 — graph construction
|
|
129
|
+
analysis.py # Phase 4 — analysis passes
|
|
130
|
+
git.py # Phase 5 — git integration
|
|
131
|
+
renderer.py # Phase 6 — HTML output
|
|
132
|
+
pool.py # GIL-aware executor selection
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Entry point via `pyproject.toml`:
|
|
136
|
+
```
|
|
137
|
+
[project.scripts]
|
|
138
|
+
dependaman = "dependaman.__main__:dependaman"
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Usage
|
|
142
|
+
|
|
143
|
+
**CLI:**
|
|
144
|
+
```bash
|
|
145
|
+
dependaman # analyzes current directory, opens browser
|
|
146
|
+
dependaman /path/to/project
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Python API:**
|
|
150
|
+
```python
|
|
151
|
+
from dependaman import dependaman
|
|
152
|
+
|
|
153
|
+
html = dependaman(".", in_memory=True) # returns HTML string
|
|
154
|
+
dependaman(".") # writes output.html + opens browser
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Roadmap
|
|
160
|
+
|
|
161
|
+
- [X] Phase 1: File discovery
|
|
162
|
+
- [X] Phase 2: Import parsing (`ast`)
|
|
163
|
+
- [X] Phase 3: Graph construction
|
|
164
|
+
- [X] Phase 4: Analysis (dead code, cycles, hotspots)
|
|
165
|
+
- [X] Phase 5: Git integration
|
|
166
|
+
- [X] Phase 6: HTML renderer
|
|
167
|
+
- [X] Phase 7: Unused symbol detection (functions, classes, methods never imported)
|
|
168
|
+
- [X] Phase 8: Installable package (`uv pip install -e .`)
|
|
169
|
+
- [X] Phase 9: CLI entry point with auto project root detection and browser open
|
|
170
|
+
- [X] Phase 10: Performance — parallel git stats (ThreadPoolExecutor), GIL-aware pool for parsing and graph construction
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
dependaman/__init__.py
|
|
5
|
+
dependaman/__main__.py
|
|
6
|
+
dependaman/analysis.py
|
|
7
|
+
dependaman/core.py
|
|
8
|
+
dependaman/discovery.py
|
|
9
|
+
dependaman/git.py
|
|
10
|
+
dependaman/graph.py
|
|
11
|
+
dependaman/parser.py
|
|
12
|
+
dependaman/pool.py
|
|
13
|
+
dependaman/renderer.py
|
|
14
|
+
dependaman.egg-info/PKG-INFO
|
|
15
|
+
dependaman.egg-info/SOURCES.txt
|
|
16
|
+
dependaman.egg-info/dependency_links.txt
|
|
17
|
+
dependaman.egg-info/entry_points.txt
|
|
18
|
+
dependaman.egg-info/top_level.txt
|
|
19
|
+
test/test_discovery.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
dependaman
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "dependaman"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Understand your Python dependencies. Find cycles, dead modules, and architecture problems."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
dependencies = []
|
|
8
|
+
license = "MIT"
|
|
9
|
+
license-files = ["LICENSE"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Programming Language :: Python :: 3",
|
|
12
|
+
"Operating System :: OS Independent",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[[project.authors]]
|
|
16
|
+
name = "Jacobo Bedoya"
|
|
17
|
+
email = "jacobobedoya@gmail.com"
|
|
18
|
+
|
|
19
|
+
[project.urls]
|
|
20
|
+
Homepage = "https://codeberg.org/jacobitosuperstar/DependaMan"
|
|
21
|
+
|
|
22
|
+
[project.scripts]
|
|
23
|
+
dependaman = "dependaman.__main__:dependaman"
|
|
24
|
+
|
|
25
|
+
[dependency-groups]
|
|
26
|
+
dev = [
|
|
27
|
+
"pytest>=9.0.2",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[tool.setuptools.packages.find]
|
|
31
|
+
include = ["dependaman*"]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from pathlib import PosixPath
|
|
2
|
+
|
|
3
|
+
from dependaman import discovery
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_discover(tmp_path):
|
|
7
|
+
"""Testing file discovery with"""
|
|
8
|
+
# Files that we should find
|
|
9
|
+
# main dir
|
|
10
|
+
(tmp_path / "testing_project").mkdir()
|
|
11
|
+
(tmp_path / "testing_project/__init__.py").touch()
|
|
12
|
+
(tmp_path / "testing_project/project.py").touch()
|
|
13
|
+
# nested package
|
|
14
|
+
(tmp_path / "testing_project/nested_package").mkdir()
|
|
15
|
+
(tmp_path / "testing_project/nested_package/__init__.py").touch()
|
|
16
|
+
(tmp_path / "testing_project/nested_package/nested_project.py").touch()
|
|
17
|
+
|
|
18
|
+
# Files that we should ignore
|
|
19
|
+
(tmp_path / ".venv").mkdir()
|
|
20
|
+
(tmp_path / ".venv/file_inside_ignored.py").touch()
|
|
21
|
+
(tmp_path / ".venv/another_file_inside_ignored.py").touch()
|
|
22
|
+
|
|
23
|
+
result: list[discovery.Package] = discovery.discover(tmp_path)
|
|
24
|
+
assert len(result) == 2
|
|
25
|
+
|
|
26
|
+
testing_packages = [package.name for package in result]
|
|
27
|
+
assert (
|
|
28
|
+
"testing_project" in testing_packages
|
|
29
|
+
and "testing_project.nested_package" in testing_packages
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
testing_modules = []
|
|
33
|
+
for package in result:
|
|
34
|
+
testing_modules.extend(package.modules)
|
|
35
|
+
assert (
|
|
36
|
+
discovery.Module("project", PosixPath("testing_project/project.py")) in testing_modules
|
|
37
|
+
and discovery.Module("nested_project", PosixPath("testing_project/nested_package/nested_project.py")) in testing_modules
|
|
38
|
+
)
|