codebase-digest-ai 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_digest_ai-0.1.1/LICENSE +21 -0
- codebase_digest_ai-0.1.1/MANIFEST.in +25 -0
- codebase_digest_ai-0.1.1/PKG-INFO +233 -0
- codebase_digest_ai-0.1.1/README.md +194 -0
- codebase_digest_ai-0.1.1/codebase_digest/__init__.py +8 -0
- codebase_digest_ai-0.1.1/codebase_digest/analyzer/__init__.py +7 -0
- codebase_digest_ai-0.1.1/codebase_digest/analyzer/codebase_analyzer.py +183 -0
- codebase_digest_ai-0.1.1/codebase_digest/analyzer/flow_analyzer.py +164 -0
- codebase_digest_ai-0.1.1/codebase_digest/analyzer/metrics_analyzer.py +130 -0
- codebase_digest_ai-0.1.1/codebase_digest/cli/__init__.py +1 -0
- codebase_digest_ai-0.1.1/codebase_digest/cli/main.py +284 -0
- codebase_digest_ai-0.1.1/codebase_digest/exporters/__init__.py +9 -0
- codebase_digest_ai-0.1.1/codebase_digest/exporters/graph_exporter.py +1038 -0
- codebase_digest_ai-0.1.1/codebase_digest/exporters/html_exporter.py +1052 -0
- codebase_digest_ai-0.1.1/codebase_digest/exporters/json_exporter.py +105 -0
- codebase_digest_ai-0.1.1/codebase_digest/exporters/markdown_exporter.py +273 -0
- codebase_digest_ai-0.1.1/codebase_digest/exporters/readme_exporter.py +306 -0
- codebase_digest_ai-0.1.1/codebase_digest/models.py +81 -0
- codebase_digest_ai-0.1.1/codebase_digest/parser/__init__.py +7 -0
- codebase_digest_ai-0.1.1/codebase_digest/parser/base.py +41 -0
- codebase_digest_ai-0.1.1/codebase_digest/parser/javascript_parser.py +36 -0
- codebase_digest_ai-0.1.1/codebase_digest/parser/python_parser.py +270 -0
- codebase_digest_ai-0.1.1/codebase_digest_ai.egg-info/PKG-INFO +233 -0
- codebase_digest_ai-0.1.1/codebase_digest_ai.egg-info/SOURCES.txt +28 -0
- codebase_digest_ai-0.1.1/codebase_digest_ai.egg-info/dependency_links.txt +1 -0
- codebase_digest_ai-0.1.1/codebase_digest_ai.egg-info/entry_points.txt +2 -0
- codebase_digest_ai-0.1.1/codebase_digest_ai.egg-info/requires.txt +14 -0
- codebase_digest_ai-0.1.1/codebase_digest_ai.egg-info/top_level.txt +1 -0
- codebase_digest_ai-0.1.1/pyproject.toml +85 -0
- codebase_digest_ai-0.1.1/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Codebase Digest
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include LICENSE
|
|
3
|
+
include pyproject.toml
|
|
4
|
+
|
|
5
|
+
# Python source
|
|
6
|
+
recursive-include codebase_digest *.py
|
|
7
|
+
|
|
8
|
+
# Templates + report assets (CRITICAL)
|
|
9
|
+
recursive-include codebase_digest *.html
|
|
10
|
+
recursive-include codebase_digest *.md
|
|
11
|
+
recursive-include codebase_digest *.j2
|
|
12
|
+
recursive-include codebase_digest *.json
|
|
13
|
+
|
|
14
|
+
# Cleanup
|
|
15
|
+
recursive-exclude * __pycache__
|
|
16
|
+
recursive-exclude * *.py[co]
|
|
17
|
+
|
|
18
|
+
exclude .gitignore
|
|
19
|
+
exclude install.py
|
|
20
|
+
|
|
21
|
+
prune example_project
|
|
22
|
+
prune lib
|
|
23
|
+
prune .digest
|
|
24
|
+
prune .vscode
|
|
25
|
+
prune tests
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codebase-digest-ai
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: AI-native code intelligence engine for semantic codebase analysis
|
|
5
|
+
Author: Harsh Bothara
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/codebase-digest/codebase-digest
|
|
8
|
+
Project-URL: Documentation, https://github.com/codebase-digest/codebase-digest#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/codebase-digest/codebase-digest
|
|
10
|
+
Project-URL: Issues, https://github.com/codebase-digest/codebase-digest/issues
|
|
11
|
+
Keywords: code-analysis,ast,static-analysis,documentation,ai,developer-tools
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: typer>=0.9.0
|
|
26
|
+
Requires-Dist: networkx>=3.0
|
|
27
|
+
Requires-Dist: rich>=13.0.0
|
|
28
|
+
Requires-Dist: jinja2>=3.1.0
|
|
29
|
+
Requires-Dist: pathspec>=0.11.0
|
|
30
|
+
Requires-Dist: pyvis>=0.3.2
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
35
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: build>=0.10.0; extra == "dev"
|
|
37
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
|
|
40
|
+
# codebase-digest
|
|
41
|
+
|
|
42
|
+
🚀 **AI-Native Code Intelligence Engine**
|
|
43
|
+
|
|
44
|
+
Transform any codebase into semantic architectural understanding, execution flows, and human-readable engineering reports.
|
|
45
|
+
|
|
46
|
+
## 🧱 What It Does
|
|
47
|
+
|
|
48
|
+
This is NOT a repo summarizer. This is a code intelligence engine that explains:
|
|
49
|
+
- **What this system does** - Infers project purpose from domain entities
|
|
50
|
+
- **How data flows** - Maps execution paths and call relationships
|
|
51
|
+
- **Where logic lives** - Identifies core components and their responsibilities
|
|
52
|
+
- **What domains exist** - Detects business entities (User, Payment, Wallet, etc.)
|
|
53
|
+
- **What files matter** - Highlights entry points and key modules
|
|
54
|
+
|
|
55
|
+
## ✨ Features
|
|
56
|
+
|
|
57
|
+
- **🔍 Semantic Analysis**: Extract functions, classes, methods, and imports with full context
|
|
58
|
+
- **📊 Interactive Call Graphs**: Visualize function relationships and execution flows
|
|
59
|
+
- **🏗️ Domain Entity Detection**: Automatically identify core business objects
|
|
60
|
+
- **🔄 Execution Flow Mapping**: Trace request paths through the system
|
|
61
|
+
- **📋 Project README Generation**: Auto-generate documentation for new developers
|
|
62
|
+
- **📈 Multi-format Output**: HTML dashboards + Markdown reports + JSON data + Interactive graphs
|
|
63
|
+
|
|
64
|
+
## 🚀 Quick Start
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Install
|
|
68
|
+
pip install codebase-digest
|
|
69
|
+
|
|
70
|
+
# Analyze current directory
|
|
71
|
+
codebase-digest build
|
|
72
|
+
|
|
73
|
+
# Analyze specific directory
|
|
74
|
+
codebase-digest build /path/to/project
|
|
75
|
+
|
|
76
|
+
# Generate with interactive call graph
|
|
77
|
+
codebase-digest build --graph
|
|
78
|
+
|
|
79
|
+
# Quick stats
|
|
80
|
+
codebase-digest stats
|
|
81
|
+
|
|
82
|
+
# Search for patterns
|
|
83
|
+
codebase-digest query "wallet"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## 📁 Output Structure
|
|
87
|
+
|
|
88
|
+
Generates `.digest/` directory with comprehensive analysis:
|
|
89
|
+
```
|
|
90
|
+
.digest/
|
|
91
|
+
├── README.md # Project documentation for developers
|
|
92
|
+
├── callgraph.html # Interactive call graph visualization
|
|
93
|
+
├── report.html # Comprehensive HTML dashboard
|
|
94
|
+
├── architecture.md # Technical architecture breakdown
|
|
95
|
+
├── flows.md # Execution flow documentation
|
|
96
|
+
├── ai-context.md # AI-optimized context file
|
|
97
|
+
└── entities.json # Structured analysis data
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## 📊 Example Output
|
|
101
|
+
|
|
102
|
+
For a Python financial services project:
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
📊 Codebase Statistics
|
|
106
|
+
┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓
|
|
107
|
+
┃ Total Files ┃ 4 ┃
|
|
108
|
+
┃ Lines of Code ┃ 189 ┃
|
|
109
|
+
┃ Languages ┃ Python ┃
|
|
110
|
+
┃ Functions ┃ 24 ┃
|
|
111
|
+
┃ Classes ┃ 8 ┃
|
|
112
|
+
┃ Domain Entities ┃ 7 ┃
|
|
113
|
+
┃ Execution Flows ┃ 4 ┃
|
|
114
|
+
┃ Complexity Score ┃ 1.8 ┃
|
|
115
|
+
┗━━━━━━━━━━━━━━━━━━┻━━━━━━━━┛
|
|
116
|
+
|
|
117
|
+
Graph Stats: 29 nodes, 27 edges, 7 components
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**Generated README.md excerpt:**
|
|
121
|
+
```markdown
|
|
122
|
+
# Project Overview
|
|
123
|
+
|
|
124
|
+
This is a financial services application that provides user management,
|
|
125
|
+
payment processing, and digital wallet functionality. The system is built
|
|
126
|
+
with a service-oriented architecture using Python dataclasses for domain
|
|
127
|
+
modeling and separate service layers for business logic.
|
|
128
|
+
|
|
129
|
+
## Architecture
|
|
130
|
+
|
|
131
|
+
The application follows a layered architecture with clear separation of concerns:
|
|
132
|
+
- **Domain Layer**: Contains core business entities (User, Payment, Wallet)
|
|
133
|
+
- **Service Layer**: Implements business logic (UserService, PaymentService)
|
|
134
|
+
- **Application Layer**: Handles bootstrapping and orchestration
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## 💡 Commands
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# Full analysis with all outputs
|
|
141
|
+
codebase-digest build [PATH]
|
|
142
|
+
|
|
143
|
+
# Specific formats
|
|
144
|
+
codebase-digest build --format html # HTML dashboard only
|
|
145
|
+
codebase-digest build --format markdown # Markdown reports only
|
|
146
|
+
codebase-digest build --format json # JSON data only
|
|
147
|
+
|
|
148
|
+
# Interactive call graph with depth filtering
|
|
149
|
+
codebase-digest build --graph --graph-depth 3
|
|
150
|
+
|
|
151
|
+
# Quick metrics and search
|
|
152
|
+
codebase-digest stats [PATH] # Project statistics
|
|
153
|
+
codebase-digest query "search term" [PATH] # Search patterns
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## 🎯 Key Features
|
|
157
|
+
|
|
158
|
+
### 🕸️ Interactive Call Graph
|
|
159
|
+
- **Probabilistic entrypoint detection** - Finds real execution starting points
|
|
160
|
+
- **Noise filtering** - Removes builtin calls and isolated nodes
|
|
161
|
+
- **Depth filtering** - Focus on core execution spine
|
|
162
|
+
- **Professional UI** - GitHub/Linear/Notion inspired design
|
|
163
|
+
|
|
164
|
+
### 📝 Smart README Generation
|
|
165
|
+
- **Project type inference** - Detects financial, e-commerce, CMS patterns
|
|
166
|
+
- **Architecture analysis** - Service-oriented vs modular detection
|
|
167
|
+
- **Run instructions** - Inferred from entry points
|
|
168
|
+
- **Future improvements** - Realistic enhancement suggestions
|
|
169
|
+
|
|
170
|
+
### 🔍 Semantic Understanding
|
|
171
|
+
- **Symbol-aware analysis** - True function-level relationships
|
|
172
|
+
- **Domain entity detection** - Business object identification
|
|
173
|
+
- **Execution flow mapping** - Startup and runtime sequences
|
|
174
|
+
- **Cross-file analysis** - Import and dependency tracking
|
|
175
|
+
|
|
176
|
+
## 🛠️ Tech Stack
|
|
177
|
+
|
|
178
|
+
- **Python 3.10+** - Core language
|
|
179
|
+
- **AST parsing** - Deep Python code analysis
|
|
180
|
+
- **NetworkX** - Call graph analysis and visualization
|
|
181
|
+
- **vis.js** - Interactive graph rendering
|
|
182
|
+
- **Typer** - CLI interface
|
|
183
|
+
- **Rich** - Beautiful terminal output
|
|
184
|
+
|
|
185
|
+
## 📋 Supported Languages
|
|
186
|
+
|
|
187
|
+
- ✅ **Python** - Full AST analysis with call graphs
|
|
188
|
+
- 🚧 **JavaScript/TypeScript** - Parser implemented, integration in progress
|
|
189
|
+
- 🚧 **Java** - Planned
|
|
190
|
+
- 🚧 **Go** - Planned
|
|
191
|
+
|
|
192
|
+
## 🎯 Use Cases
|
|
193
|
+
|
|
194
|
+
- **New Developer Onboarding** - Understand unfamiliar codebases quickly
|
|
195
|
+
- **Code Reviews** - Architectural overview and impact analysis
|
|
196
|
+
- **Documentation Generation** - Auto-generate project documentation
|
|
197
|
+
- **Refactoring Planning** - Identify core components and dependencies
|
|
198
|
+
- **AI-Assisted Development** - Provide context for LLM code assistance
|
|
199
|
+
|
|
200
|
+
## 🔧 Development
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
# Install development dependencies
|
|
204
|
+
pip install -e ".[dev]"
|
|
205
|
+
|
|
206
|
+
# Run tests
|
|
207
|
+
pytest
|
|
208
|
+
|
|
209
|
+
# Format code
|
|
210
|
+
black .
|
|
211
|
+
isort .
|
|
212
|
+
|
|
213
|
+
# Type checking
|
|
214
|
+
mypy codebase_digest/
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## 🤝 Contributing
|
|
218
|
+
|
|
219
|
+
1. Fork the repository
|
|
220
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
221
|
+
3. Make your changes
|
|
222
|
+
4. Add tests if applicable
|
|
223
|
+
5. Submit a pull request
|
|
224
|
+
|
|
225
|
+
## 📄 License
|
|
226
|
+
|
|
227
|
+
MIT License - see LICENSE file for details.
|
|
228
|
+
|
|
229
|
+
## 🙏 Acknowledgments
|
|
230
|
+
|
|
231
|
+
- Built with modern Python tooling and best practices
|
|
232
|
+
- Inspired by professional developer tools (JetBrains, Sourcegraph)
|
|
233
|
+
- Designed for AI-native development workflows
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# codebase-digest
|
|
2
|
+
|
|
3
|
+
🚀 **AI-Native Code Intelligence Engine**
|
|
4
|
+
|
|
5
|
+
Transform any codebase into semantic architectural understanding, execution flows, and human-readable engineering reports.
|
|
6
|
+
|
|
7
|
+
## 🧱 What It Does
|
|
8
|
+
|
|
9
|
+
This is NOT a repo summarizer. This is a code intelligence engine that explains:
|
|
10
|
+
- **What this system does** - Infers project purpose from domain entities
|
|
11
|
+
- **How data flows** - Maps execution paths and call relationships
|
|
12
|
+
- **Where logic lives** - Identifies core components and their responsibilities
|
|
13
|
+
- **What domains exist** - Detects business entities (User, Payment, Wallet, etc.)
|
|
14
|
+
- **What files matter** - Highlights entry points and key modules
|
|
15
|
+
|
|
16
|
+
## ✨ Features
|
|
17
|
+
|
|
18
|
+
- **🔍 Semantic Analysis**: Extract functions, classes, methods, and imports with full context
|
|
19
|
+
- **📊 Interactive Call Graphs**: Visualize function relationships and execution flows
|
|
20
|
+
- **🏗️ Domain Entity Detection**: Automatically identify core business objects
|
|
21
|
+
- **🔄 Execution Flow Mapping**: Trace request paths through the system
|
|
22
|
+
- **📋 Project README Generation**: Auto-generate documentation for new developers
|
|
23
|
+
- **📈 Multi-format Output**: HTML dashboards + Markdown reports + JSON data + Interactive graphs
|
|
24
|
+
|
|
25
|
+
## 🚀 Quick Start
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Install
|
|
29
|
+
pip install codebase-digest
|
|
30
|
+
|
|
31
|
+
# Analyze current directory
|
|
32
|
+
codebase-digest build
|
|
33
|
+
|
|
34
|
+
# Analyze specific directory
|
|
35
|
+
codebase-digest build /path/to/project
|
|
36
|
+
|
|
37
|
+
# Generate with interactive call graph
|
|
38
|
+
codebase-digest build --graph
|
|
39
|
+
|
|
40
|
+
# Quick stats
|
|
41
|
+
codebase-digest stats
|
|
42
|
+
|
|
43
|
+
# Search for patterns
|
|
44
|
+
codebase-digest query "wallet"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## 📁 Output Structure
|
|
48
|
+
|
|
49
|
+
Generates `.digest/` directory with comprehensive analysis:
|
|
50
|
+
```
|
|
51
|
+
.digest/
|
|
52
|
+
├── README.md # Project documentation for developers
|
|
53
|
+
├── callgraph.html # Interactive call graph visualization
|
|
54
|
+
├── report.html # Comprehensive HTML dashboard
|
|
55
|
+
├── architecture.md # Technical architecture breakdown
|
|
56
|
+
├── flows.md # Execution flow documentation
|
|
57
|
+
├── ai-context.md # AI-optimized context file
|
|
58
|
+
└── entities.json # Structured analysis data
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 📊 Example Output
|
|
62
|
+
|
|
63
|
+
For a Python financial services project:
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
📊 Codebase Statistics
|
|
67
|
+
┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓
|
|
68
|
+
┃ Total Files ┃ 4 ┃
|
|
69
|
+
┃ Lines of Code ┃ 189 ┃
|
|
70
|
+
┃ Languages ┃ Python ┃
|
|
71
|
+
┃ Functions ┃ 24 ┃
|
|
72
|
+
┃ Classes ┃ 8 ┃
|
|
73
|
+
┃ Domain Entities ┃ 7 ┃
|
|
74
|
+
┃ Execution Flows ┃ 4 ┃
|
|
75
|
+
┃ Complexity Score ┃ 1.8 ┃
|
|
76
|
+
┗━━━━━━━━━━━━━━━━━━┻━━━━━━━━┛
|
|
77
|
+
|
|
78
|
+
Graph Stats: 29 nodes, 27 edges, 7 components
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Generated README.md excerpt:**
|
|
82
|
+
```markdown
|
|
83
|
+
# Project Overview
|
|
84
|
+
|
|
85
|
+
This is a financial services application that provides user management,
|
|
86
|
+
payment processing, and digital wallet functionality. The system is built
|
|
87
|
+
with a service-oriented architecture using Python dataclasses for domain
|
|
88
|
+
modeling and separate service layers for business logic.
|
|
89
|
+
|
|
90
|
+
## Architecture
|
|
91
|
+
|
|
92
|
+
The application follows a layered architecture with clear separation of concerns:
|
|
93
|
+
- **Domain Layer**: Contains core business entities (User, Payment, Wallet)
|
|
94
|
+
- **Service Layer**: Implements business logic (UserService, PaymentService)
|
|
95
|
+
- **Application Layer**: Handles bootstrapping and orchestration
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## 💡 Commands
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
# Full analysis with all outputs
|
|
102
|
+
codebase-digest build [PATH]
|
|
103
|
+
|
|
104
|
+
# Specific formats
|
|
105
|
+
codebase-digest build --format html # HTML dashboard only
|
|
106
|
+
codebase-digest build --format markdown # Markdown reports only
|
|
107
|
+
codebase-digest build --format json # JSON data only
|
|
108
|
+
|
|
109
|
+
# Interactive call graph with depth filtering
|
|
110
|
+
codebase-digest build --graph --graph-depth 3
|
|
111
|
+
|
|
112
|
+
# Quick metrics and search
|
|
113
|
+
codebase-digest stats [PATH] # Project statistics
|
|
114
|
+
codebase-digest query "search term" [PATH] # Search patterns
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## 🎯 Key Features
|
|
118
|
+
|
|
119
|
+
### 🕸️ Interactive Call Graph
|
|
120
|
+
- **Probabilistic entrypoint detection** - Finds real execution starting points
|
|
121
|
+
- **Noise filtering** - Removes builtin calls and isolated nodes
|
|
122
|
+
- **Depth filtering** - Focus on core execution spine
|
|
123
|
+
- **Professional UI** - GitHub/Linear/Notion inspired design
|
|
124
|
+
|
|
125
|
+
### 📝 Smart README Generation
|
|
126
|
+
- **Project type inference** - Detects financial, e-commerce, CMS patterns
|
|
127
|
+
- **Architecture analysis** - Service-oriented vs modular detection
|
|
128
|
+
- **Run instructions** - Inferred from entry points
|
|
129
|
+
- **Future improvements** - Realistic enhancement suggestions
|
|
130
|
+
|
|
131
|
+
### 🔍 Semantic Understanding
|
|
132
|
+
- **Symbol-aware analysis** - True function-level relationships
|
|
133
|
+
- **Domain entity detection** - Business object identification
|
|
134
|
+
- **Execution flow mapping** - Startup and runtime sequences
|
|
135
|
+
- **Cross-file analysis** - Import and dependency tracking
|
|
136
|
+
|
|
137
|
+
## 🛠️ Tech Stack
|
|
138
|
+
|
|
139
|
+
- **Python 3.10+** - Core language
|
|
140
|
+
- **AST parsing** - Deep Python code analysis
|
|
141
|
+
- **NetworkX** - Call graph analysis and visualization
|
|
142
|
+
- **vis.js** - Interactive graph rendering
|
|
143
|
+
- **Typer** - CLI interface
|
|
144
|
+
- **Rich** - Beautiful terminal output
|
|
145
|
+
|
|
146
|
+
## 📋 Supported Languages
|
|
147
|
+
|
|
148
|
+
- ✅ **Python** - Full AST analysis with call graphs
|
|
149
|
+
- 🚧 **JavaScript/TypeScript** - Parser implemented, integration in progress
|
|
150
|
+
- 🚧 **Java** - Planned
|
|
151
|
+
- 🚧 **Go** - Planned
|
|
152
|
+
|
|
153
|
+
## 🎯 Use Cases
|
|
154
|
+
|
|
155
|
+
- **New Developer Onboarding** - Understand unfamiliar codebases quickly
|
|
156
|
+
- **Code Reviews** - Architectural overview and impact analysis
|
|
157
|
+
- **Documentation Generation** - Auto-generate project documentation
|
|
158
|
+
- **Refactoring Planning** - Identify core components and dependencies
|
|
159
|
+
- **AI-Assisted Development** - Provide context for LLM code assistance
|
|
160
|
+
|
|
161
|
+
## 🔧 Development
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
# Install development dependencies
|
|
165
|
+
pip install -e ".[dev]"
|
|
166
|
+
|
|
167
|
+
# Run tests
|
|
168
|
+
pytest
|
|
169
|
+
|
|
170
|
+
# Format code
|
|
171
|
+
black .
|
|
172
|
+
isort .
|
|
173
|
+
|
|
174
|
+
# Type checking
|
|
175
|
+
mypy codebase_digest/
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## 🤝 Contributing
|
|
179
|
+
|
|
180
|
+
1. Fork the repository
|
|
181
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
182
|
+
3. Make your changes
|
|
183
|
+
4. Add tests if applicable
|
|
184
|
+
5. Submit a pull request
|
|
185
|
+
|
|
186
|
+
## 📄 License
|
|
187
|
+
|
|
188
|
+
MIT License - see LICENSE file for details.
|
|
189
|
+
|
|
190
|
+
## 🙏 Acknowledgments
|
|
191
|
+
|
|
192
|
+
- Built with modern Python tooling and best practices
|
|
193
|
+
- Inspired by professional developer tools (JetBrains, Sourcegraph)
|
|
194
|
+
- Designed for AI-native development workflows
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Main codebase analyzer that orchestrates parsing and analysis."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Set
|
|
6
|
+
|
|
7
|
+
from ..models import CodebaseAnalysis
|
|
8
|
+
from ..parser import PythonParser, JavaScriptParser, BaseParser
|
|
9
|
+
from .flow_analyzer import FlowAnalyzer
|
|
10
|
+
from .metrics_analyzer import MetricsAnalyzer
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CodebaseAnalyzer:
|
|
14
|
+
"""Main analyzer that coordinates parsing and analysis of a codebase."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, root_path: Path):
|
|
17
|
+
self.root_path = Path(root_path)
|
|
18
|
+
self.parsers: Dict[str, BaseParser] = {}
|
|
19
|
+
self._register_parsers()
|
|
20
|
+
|
|
21
|
+
# Ignore patterns
|
|
22
|
+
self.ignore_patterns = {
|
|
23
|
+
'__pycache__', '.git', '.svn', '.hg', 'node_modules',
|
|
24
|
+
'.pytest_cache', '.mypy_cache', '.tox', 'venv', 'env',
|
|
25
|
+
'.venv', 'dist', 'build', '*.egg-info', '.DS_Store'
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
def _register_parsers(self):
|
|
29
|
+
"""Register available parsers."""
|
|
30
|
+
# Register parsers by extension without instantiating
|
|
31
|
+
python_extensions = ['.py']
|
|
32
|
+
js_extensions = ['.js', '.jsx', '.ts', '.tsx']
|
|
33
|
+
|
|
34
|
+
for ext in python_extensions:
|
|
35
|
+
self.parsers[ext] = PythonParser
|
|
36
|
+
|
|
37
|
+
for ext in js_extensions:
|
|
38
|
+
self.parsers[ext] = JavaScriptParser
|
|
39
|
+
|
|
40
|
+
def analyze(self) -> CodebaseAnalysis:
|
|
41
|
+
"""Perform complete codebase analysis."""
|
|
42
|
+
analysis = CodebaseAnalysis(root_path=self.root_path)
|
|
43
|
+
|
|
44
|
+
# Find all relevant files
|
|
45
|
+
files = self._find_source_files()
|
|
46
|
+
analysis.total_files = len(files)
|
|
47
|
+
|
|
48
|
+
# Parse each file
|
|
49
|
+
for file_path in files:
|
|
50
|
+
self._parse_file(file_path, analysis)
|
|
51
|
+
|
|
52
|
+
# Detect entry points
|
|
53
|
+
analysis.entry_points = self._detect_entry_points(files)
|
|
54
|
+
|
|
55
|
+
# Analyze execution flows
|
|
56
|
+
flow_analyzer = FlowAnalyzer(analysis)
|
|
57
|
+
analysis.execution_flows = flow_analyzer.analyze_flows()
|
|
58
|
+
|
|
59
|
+
# Calculate metrics
|
|
60
|
+
metrics_analyzer = MetricsAnalyzer(analysis)
|
|
61
|
+
analysis.total_lines = metrics_analyzer.count_total_lines(files)
|
|
62
|
+
analysis.languages = metrics_analyzer.detect_languages(files)
|
|
63
|
+
analysis.complexity_score = metrics_analyzer.calculate_complexity()
|
|
64
|
+
|
|
65
|
+
# Build directory tree
|
|
66
|
+
analysis.directory_tree = self._build_directory_tree()
|
|
67
|
+
|
|
68
|
+
return analysis
|
|
69
|
+
|
|
70
|
+
def _find_source_files(self) -> List[Path]:
|
|
71
|
+
"""Find all source files in the codebase."""
|
|
72
|
+
files = []
|
|
73
|
+
|
|
74
|
+
for root, dirs, filenames in os.walk(self.root_path):
|
|
75
|
+
# Filter out ignored directories
|
|
76
|
+
dirs[:] = [d for d in dirs if not self._should_ignore(d)]
|
|
77
|
+
|
|
78
|
+
for filename in filenames:
|
|
79
|
+
file_path = Path(root) / filename
|
|
80
|
+
if self._is_source_file(file_path) and not self._should_ignore(filename):
|
|
81
|
+
files.append(file_path)
|
|
82
|
+
|
|
83
|
+
return files
|
|
84
|
+
|
|
85
|
+
def _is_source_file(self, file_path: Path) -> bool:
|
|
86
|
+
"""Check if file is a source code file."""
|
|
87
|
+
return file_path.suffix in self.parsers
|
|
88
|
+
|
|
89
|
+
def _should_ignore(self, name: str) -> bool:
|
|
90
|
+
"""Check if file/directory should be ignored."""
|
|
91
|
+
for pattern in self.ignore_patterns:
|
|
92
|
+
if pattern.startswith('*'):
|
|
93
|
+
if name.endswith(pattern[1:]):
|
|
94
|
+
return True
|
|
95
|
+
elif name == pattern or name.startswith(pattern):
|
|
96
|
+
return True
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
def _parse_file(self, file_path: Path, analysis: CodebaseAnalysis):
|
|
100
|
+
"""Parse a single file and add results to analysis."""
|
|
101
|
+
parser_class = self.parsers.get(file_path.suffix)
|
|
102
|
+
if not parser_class:
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
parser = parser_class(file_path)
|
|
107
|
+
|
|
108
|
+
# Parse symbols
|
|
109
|
+
symbols = parser.parse_symbols()
|
|
110
|
+
analysis.symbols.extend(symbols)
|
|
111
|
+
|
|
112
|
+
# Parse imports
|
|
113
|
+
imports = parser.parse_imports()
|
|
114
|
+
analysis.imports.extend(imports)
|
|
115
|
+
|
|
116
|
+
# Parse calls
|
|
117
|
+
calls = parser.parse_calls()
|
|
118
|
+
analysis.call_relations.extend(calls)
|
|
119
|
+
|
|
120
|
+
# Parse domain entities
|
|
121
|
+
entities = parser.parse_domain_entities()
|
|
122
|
+
analysis.domain_entities.extend(entities)
|
|
123
|
+
|
|
124
|
+
except Exception as e:
|
|
125
|
+
# Log error but continue processing
|
|
126
|
+
print(f"Error parsing {file_path}: {e}")
|
|
127
|
+
|
|
128
|
+
def _detect_entry_points(self, files: List[Path]) -> List[Path]:
|
|
129
|
+
"""Detect likely entry points in the codebase."""
|
|
130
|
+
entry_points = []
|
|
131
|
+
|
|
132
|
+
# Common entry point patterns
|
|
133
|
+
entry_patterns = [
|
|
134
|
+
'main.py', 'app.py', 'server.py', 'run.py', 'start.py',
|
|
135
|
+
'manage.py', '__main__.py', 'wsgi.py', 'asgi.py',
|
|
136
|
+
'index.js', 'server.js', 'app.js', 'main.js'
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
for file_path in files:
|
|
140
|
+
if file_path.name in entry_patterns:
|
|
141
|
+
entry_points.append(file_path)
|
|
142
|
+
elif file_path.name == '__init__.py':
|
|
143
|
+
# Check if it's a package entry point
|
|
144
|
+
if self._is_package_entry_point(file_path):
|
|
145
|
+
entry_points.append(file_path)
|
|
146
|
+
|
|
147
|
+
return entry_points
|
|
148
|
+
|
|
149
|
+
def _is_package_entry_point(self, init_file: Path) -> bool:
|
|
150
|
+
"""Check if __init__.py file is a package entry point."""
|
|
151
|
+
try:
|
|
152
|
+
content = init_file.read_text(encoding='utf-8')
|
|
153
|
+
# Simple heuristic: contains main execution logic
|
|
154
|
+
return 'if __name__' in content or 'main(' in content
|
|
155
|
+
except:
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
def _build_directory_tree(self) -> Dict:
|
|
159
|
+
"""Build a directory tree structure."""
|
|
160
|
+
tree = {}
|
|
161
|
+
|
|
162
|
+
for root, dirs, files in os.walk(self.root_path):
|
|
163
|
+
# Filter ignored directories
|
|
164
|
+
dirs[:] = [d for d in dirs if not self._should_ignore(d)]
|
|
165
|
+
|
|
166
|
+
rel_path = Path(root).relative_to(self.root_path)
|
|
167
|
+
|
|
168
|
+
# Build nested structure
|
|
169
|
+
current = tree
|
|
170
|
+
for part in rel_path.parts:
|
|
171
|
+
if part not in current:
|
|
172
|
+
current[part] = {}
|
|
173
|
+
current = current[part]
|
|
174
|
+
|
|
175
|
+
# Add files
|
|
176
|
+
source_files = [f for f in files
|
|
177
|
+
if self._is_source_file(Path(root) / f)
|
|
178
|
+
and not self._should_ignore(f)]
|
|
179
|
+
|
|
180
|
+
if source_files:
|
|
181
|
+
current['_files'] = source_files
|
|
182
|
+
|
|
183
|
+
return tree
|