codebase-digest-ai 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_digest/__init__.py +8 -0
- codebase_digest/analyzer/__init__.py +7 -0
- codebase_digest/analyzer/codebase_analyzer.py +183 -0
- codebase_digest/analyzer/flow_analyzer.py +164 -0
- codebase_digest/analyzer/metrics_analyzer.py +130 -0
- codebase_digest/cli/__init__.py +1 -0
- codebase_digest/cli/main.py +284 -0
- codebase_digest/exporters/__init__.py +9 -0
- codebase_digest/exporters/graph_exporter.py +1038 -0
- codebase_digest/exporters/html_exporter.py +1052 -0
- codebase_digest/exporters/json_exporter.py +105 -0
- codebase_digest/exporters/markdown_exporter.py +273 -0
- codebase_digest/exporters/readme_exporter.py +306 -0
- codebase_digest/models.py +81 -0
- codebase_digest/parser/__init__.py +7 -0
- codebase_digest/parser/base.py +41 -0
- codebase_digest/parser/javascript_parser.py +36 -0
- codebase_digest/parser/python_parser.py +270 -0
- codebase_digest_ai-0.1.1.dist-info/METADATA +233 -0
- codebase_digest_ai-0.1.1.dist-info/RECORD +24 -0
- codebase_digest_ai-0.1.1.dist-info/WHEEL +5 -0
- codebase_digest_ai-0.1.1.dist-info/entry_points.txt +2 -0
- codebase_digest_ai-0.1.1.dist-info/licenses/LICENSE +21 -0
- codebase_digest_ai-0.1.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""JSON data exporter."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Any
|
|
7
|
+
|
|
8
|
+
from ..models import CodebaseAnalysis, Symbol, Import, CallRelation, DomainEntity, ExecutionFlow
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class JSONExporter:
|
|
12
|
+
"""Exports analysis results to JSON format."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, analysis: CodebaseAnalysis):
|
|
15
|
+
self.analysis = analysis
|
|
16
|
+
|
|
17
|
+
def export(self, output_path: Path) -> None:
|
|
18
|
+
"""Export analysis to JSON file."""
|
|
19
|
+
json_data = self._generate_json()
|
|
20
|
+
output_path.write_text(json.dumps(json_data, indent=2), encoding='utf-8')
|
|
21
|
+
|
|
22
|
+
def _generate_json(self) -> Dict[str, Any]:
|
|
23
|
+
"""Generate complete JSON data structure."""
|
|
24
|
+
return {
|
|
25
|
+
"metadata": {
|
|
26
|
+
"project_name": self.analysis.root_path.name,
|
|
27
|
+
"root_path": str(self.analysis.root_path),
|
|
28
|
+
"generated_at": datetime.now().isoformat(),
|
|
29
|
+
"version": "0.1.0"
|
|
30
|
+
},
|
|
31
|
+
"metrics": {
|
|
32
|
+
"total_files": self.analysis.total_files,
|
|
33
|
+
"total_lines": self.analysis.total_lines,
|
|
34
|
+
"languages": list(self.analysis.languages),
|
|
35
|
+
"complexity_score": self.analysis.complexity_score,
|
|
36
|
+
"symbol_count": len(self.analysis.symbols),
|
|
37
|
+
"import_count": len(self.analysis.imports),
|
|
38
|
+
"call_relation_count": len(self.analysis.call_relations),
|
|
39
|
+
"domain_entity_count": len(self.analysis.domain_entities),
|
|
40
|
+
"execution_flow_count": len(self.analysis.execution_flows)
|
|
41
|
+
},
|
|
42
|
+
"entry_points": [str(ep) for ep in self.analysis.entry_points],
|
|
43
|
+
"symbols": [self._serialize_symbol(symbol) for symbol in self.analysis.symbols],
|
|
44
|
+
"imports": [self._serialize_import(imp) for imp in self.analysis.imports],
|
|
45
|
+
"call_relations": [self._serialize_call_relation(call) for call in self.analysis.call_relations],
|
|
46
|
+
"domain_entities": [self._serialize_domain_entity(entity) for entity in self.analysis.domain_entities],
|
|
47
|
+
"execution_flows": [self._serialize_execution_flow(flow) for flow in self.analysis.execution_flows],
|
|
48
|
+
"directory_tree": self.analysis.directory_tree
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def _serialize_symbol(self, symbol: Symbol) -> Dict[str, Any]:
|
|
52
|
+
"""Serialize a Symbol to JSON-compatible dict."""
|
|
53
|
+
return {
|
|
54
|
+
"name": symbol.name,
|
|
55
|
+
"type": symbol.type,
|
|
56
|
+
"file_path": str(symbol.file_path),
|
|
57
|
+
"line_number": symbol.line_number,
|
|
58
|
+
"docstring": symbol.docstring,
|
|
59
|
+
"parameters": symbol.parameters,
|
|
60
|
+
"return_type": symbol.return_type,
|
|
61
|
+
"decorators": symbol.decorators
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
def _serialize_import(self, imp: Import) -> Dict[str, Any]:
|
|
65
|
+
"""Serialize an Import to JSON-compatible dict."""
|
|
66
|
+
return {
|
|
67
|
+
"module": imp.module,
|
|
68
|
+
"names": imp.names,
|
|
69
|
+
"alias": imp.alias,
|
|
70
|
+
"file_path": str(imp.file_path) if imp.file_path else None,
|
|
71
|
+
"line_number": imp.line_number
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def _serialize_call_relation(self, call: CallRelation) -> Dict[str, Any]:
|
|
75
|
+
"""Serialize a CallRelation to JSON-compatible dict."""
|
|
76
|
+
return {
|
|
77
|
+
"caller": call.caller_symbol.name,
|
|
78
|
+
"callee": call.callee_name,
|
|
79
|
+
"caller_file": str(call.caller_symbol.file_path),
|
|
80
|
+
"callee_file": str(call.callee_file) if call.callee_file else None,
|
|
81
|
+
"line_number": call.line_number
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def _serialize_domain_entity(self, entity: DomainEntity) -> Dict[str, Any]:
|
|
85
|
+
"""Serialize a DomainEntity to JSON-compatible dict."""
|
|
86
|
+
return {
|
|
87
|
+
"name": entity.name,
|
|
88
|
+
"type": entity.type,
|
|
89
|
+
"file_path": str(entity.file_path),
|
|
90
|
+
"fields": entity.fields,
|
|
91
|
+
"methods": entity.methods,
|
|
92
|
+
"creation_points": entity.creation_points,
|
|
93
|
+
"modification_points": entity.modification_points,
|
|
94
|
+
"validation_points": entity.validation_points
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
def _serialize_execution_flow(self, flow: ExecutionFlow) -> Dict[str, Any]:
|
|
98
|
+
"""Serialize an ExecutionFlow to JSON-compatible dict."""
|
|
99
|
+
return {
|
|
100
|
+
"name": flow.name,
|
|
101
|
+
"entry_point": flow.entry_point,
|
|
102
|
+
"steps": flow.steps,
|
|
103
|
+
"files_involved": [str(f) for f in flow.files_involved],
|
|
104
|
+
"description": flow.description
|
|
105
|
+
}
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""Markdown report exporter."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict
|
|
6
|
+
|
|
7
|
+
from ..models import CodebaseAnalysis
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MarkdownExporter:
|
|
11
|
+
"""Exports analysis results to Markdown format."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, analysis: CodebaseAnalysis):
|
|
14
|
+
self.analysis = analysis
|
|
15
|
+
|
|
16
|
+
def export(self, output_path: Path) -> None:
|
|
17
|
+
"""Export analysis to Markdown file."""
|
|
18
|
+
markdown_content = self._generate_markdown()
|
|
19
|
+
output_path.write_text(markdown_content, encoding='utf-8')
|
|
20
|
+
|
|
21
|
+
def _generate_markdown(self) -> str:
|
|
22
|
+
"""Generate complete Markdown report."""
|
|
23
|
+
return f"""# {self.analysis.root_path.name}
|
|
24
|
+
|
|
25
|
+
**Codebase Analysis Report**
|
|
26
|
+
|
|
27
|
+
*Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*
|
|
28
|
+
|
|
29
|
+
## 📊 Summary
|
|
30
|
+
|
|
31
|
+
| Metric | Value |
|
|
32
|
+
|--------|-------|
|
|
33
|
+
| Total Files | {self.analysis.total_files} |
|
|
34
|
+
| Languages | {len(self.analysis.languages)} |
|
|
35
|
+
| Lines of Code | {self.analysis.total_lines:,} |
|
|
36
|
+
| Complexity Score | {self.analysis.complexity_score:.1f} |
|
|
37
|
+
|
|
38
|
+
{self._generate_overview()}
|
|
39
|
+
|
|
40
|
+
{self._generate_architecture()}
|
|
41
|
+
|
|
42
|
+
{self._generate_directory_structure()}
|
|
43
|
+
|
|
44
|
+
{self._generate_key_components()}
|
|
45
|
+
|
|
46
|
+
{self._generate_core_logic()}
|
|
47
|
+
|
|
48
|
+
{self._generate_dependencies()}
|
|
49
|
+
|
|
50
|
+
{self._generate_data_flow()}
|
|
51
|
+
|
|
52
|
+
{self._generate_risks()}
|
|
53
|
+
|
|
54
|
+
{self._generate_recommendations()}
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def _generate_overview(self) -> str:
|
|
58
|
+
"""Generate overview section."""
|
|
59
|
+
entry_points = [str(ep.relative_to(self.analysis.root_path)) for ep in self.analysis.entry_points]
|
|
60
|
+
entry_points_str = ', '.join(entry_points) if entry_points else 'None detected'
|
|
61
|
+
|
|
62
|
+
languages_badges = ' '.join(f'`{lang}`' for lang in sorted(self.analysis.languages))
|
|
63
|
+
|
|
64
|
+
return f"""## 🚀 Overview
|
|
65
|
+
|
|
66
|
+
This codebase contains {self.analysis.total_files} files across {len(self.analysis.languages)} programming languages, with a total of {self.analysis.total_lines:,} lines of code.
|
|
67
|
+
|
|
68
|
+
### Entry Points
|
|
69
|
+
```
|
|
70
|
+
{entry_points_str}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Languages
|
|
74
|
+
{languages_badges}
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def _generate_architecture(self) -> str:
|
|
78
|
+
"""Generate architecture section."""
|
|
79
|
+
function_count = len([s for s in self.analysis.symbols if s.type == 'function'])
|
|
80
|
+
class_count = len([s for s in self.analysis.symbols if s.type == 'class'])
|
|
81
|
+
method_count = len([s for s in self.analysis.symbols if s.type == 'method'])
|
|
82
|
+
|
|
83
|
+
return f"""## 🧱 Architecture
|
|
84
|
+
|
|
85
|
+
The codebase follows a modular architecture with {len(self.analysis.symbols)} defined symbols and {len(self.analysis.call_relations)} call relationships.
|
|
86
|
+
|
|
87
|
+
### Key Statistics
|
|
88
|
+
- **Functions:** {function_count}
|
|
89
|
+
- **Classes:** {class_count}
|
|
90
|
+
- **Methods:** {method_count}
|
|
91
|
+
- **Domain Entities:** {len(self.analysis.domain_entities)}
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def _generate_directory_structure(self) -> str:
|
|
95
|
+
"""Generate directory structure section."""
|
|
96
|
+
tree_md = self._render_directory_tree(self.analysis.directory_tree)
|
|
97
|
+
|
|
98
|
+
return f"""## 📁 Directory Structure
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
{self.analysis.root_path.name}/
|
|
102
|
+
{tree_md}
|
|
103
|
+
```
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def _render_directory_tree(self, tree: Dict, prefix: str = "") -> str:
|
|
107
|
+
"""Render directory tree as Markdown."""
|
|
108
|
+
if not tree:
|
|
109
|
+
return ""
|
|
110
|
+
|
|
111
|
+
lines = []
|
|
112
|
+
items = list(tree.items())
|
|
113
|
+
|
|
114
|
+
for i, (key, value) in enumerate(items):
|
|
115
|
+
is_last = i == len(items) - 1
|
|
116
|
+
current_prefix = "└── " if is_last else "├── "
|
|
117
|
+
|
|
118
|
+
if key == '_files':
|
|
119
|
+
for j, file in enumerate(value):
|
|
120
|
+
file_is_last = j == len(value) - 1
|
|
121
|
+
file_prefix = "└── " if file_is_last else "├── "
|
|
122
|
+
lines.append(f"{prefix}{' ' if is_last else '│ '}{file_prefix}{file}")
|
|
123
|
+
else:
|
|
124
|
+
lines.append(f"{prefix}{current_prefix}{key}/")
|
|
125
|
+
if isinstance(value, dict):
|
|
126
|
+
next_prefix = prefix + (" " if is_last else "│ ")
|
|
127
|
+
lines.append(self._render_directory_tree(value, next_prefix))
|
|
128
|
+
|
|
129
|
+
return "\n".join(filter(None, lines))
|
|
130
|
+
|
|
131
|
+
def _generate_key_components(self) -> str:
|
|
132
|
+
"""Generate key components section."""
|
|
133
|
+
if not self.analysis.symbols:
|
|
134
|
+
return "## 🔧 Key Components\n\nNo components detected."
|
|
135
|
+
|
|
136
|
+
# Group by type
|
|
137
|
+
functions = [s for s in self.analysis.symbols if s.type == 'function'][:10]
|
|
138
|
+
classes = [s for s in self.analysis.symbols if s.type == 'class'][:10]
|
|
139
|
+
|
|
140
|
+
content = "## 🔧 Key Components\n\n"
|
|
141
|
+
|
|
142
|
+
if functions:
|
|
143
|
+
content += "### Functions\n"
|
|
144
|
+
for func in functions:
|
|
145
|
+
rel_path = func.file_path.relative_to(self.analysis.root_path)
|
|
146
|
+
content += f"- `{func.name}()` - {rel_path}:{func.line_number}\n"
|
|
147
|
+
content += "\n"
|
|
148
|
+
|
|
149
|
+
if classes:
|
|
150
|
+
content += "### Classes\n"
|
|
151
|
+
for cls in classes:
|
|
152
|
+
rel_path = cls.file_path.relative_to(self.analysis.root_path)
|
|
153
|
+
content += f"- `{cls.name}` - {rel_path}:{cls.line_number}\n"
|
|
154
|
+
content += "\n"
|
|
155
|
+
|
|
156
|
+
return content
|
|
157
|
+
|
|
158
|
+
def _generate_core_logic(self) -> str:
|
|
159
|
+
"""Generate core logic section."""
|
|
160
|
+
if not self.analysis.execution_flows:
|
|
161
|
+
return "## ⚡ Core Logic\n\nNo execution flows detected."
|
|
162
|
+
|
|
163
|
+
content = f"## ⚡ Core Logic\n\nIdentified {len(self.analysis.execution_flows)} execution flows:\n\n"
|
|
164
|
+
|
|
165
|
+
for flow in self.analysis.execution_flows:
|
|
166
|
+
content += f"### {flow.name}\n"
|
|
167
|
+
content += f"{flow.description}\n\n"
|
|
168
|
+
|
|
169
|
+
if flow.steps:
|
|
170
|
+
steps_str = " → ".join(flow.steps[:5])
|
|
171
|
+
if len(flow.steps) > 5:
|
|
172
|
+
steps_str += f" ... (+{len(flow.steps) - 5} more)"
|
|
173
|
+
content += f"```\n{steps_str}\n```\n\n"
|
|
174
|
+
|
|
175
|
+
return content
|
|
176
|
+
|
|
177
|
+
def _generate_dependencies(self) -> str:
|
|
178
|
+
"""Generate dependencies section."""
|
|
179
|
+
if not self.analysis.imports:
|
|
180
|
+
return "## 📦 Dependencies\n\nNo imports detected."
|
|
181
|
+
|
|
182
|
+
# Group imports by module
|
|
183
|
+
import_counts = {}
|
|
184
|
+
for imp in self.analysis.imports:
|
|
185
|
+
if imp.module not in import_counts:
|
|
186
|
+
import_counts[imp.module] = 0
|
|
187
|
+
import_counts[imp.module] += 1
|
|
188
|
+
|
|
189
|
+
# Sort by frequency
|
|
190
|
+
top_imports = sorted(import_counts.items(), key=lambda x: x[1], reverse=True)[:10]
|
|
191
|
+
|
|
192
|
+
content = "## 📦 Dependencies\n\n### Top Imported Modules\n\n"
|
|
193
|
+
content += "| Module | Import Count |\n"
|
|
194
|
+
content += "|--------|-------------|\n"
|
|
195
|
+
|
|
196
|
+
for module, count in top_imports:
|
|
197
|
+
content += f"| `{module}` | {count} |\n"
|
|
198
|
+
|
|
199
|
+
return content + "\n"
|
|
200
|
+
|
|
201
|
+
def _generate_data_flow(self) -> str:
|
|
202
|
+
"""Generate data flow section."""
|
|
203
|
+
if not self.analysis.domain_entities:
|
|
204
|
+
return "## 🔄 Data Flow\n\nNo domain entities detected."
|
|
205
|
+
|
|
206
|
+
content = f"## 🔄 Data Flow\n\nIdentified {len(self.analysis.domain_entities)} domain entities:\n\n"
|
|
207
|
+
|
|
208
|
+
for entity in self.analysis.domain_entities:
|
|
209
|
+
content += f"### {entity.name}\n"
|
|
210
|
+
content += f"- **Type:** {entity.type}\n"
|
|
211
|
+
content += f"- **File:** {entity.file_path.relative_to(self.analysis.root_path)}\n"
|
|
212
|
+
|
|
213
|
+
if entity.fields:
|
|
214
|
+
fields_str = ", ".join(entity.fields[:5])
|
|
215
|
+
if len(entity.fields) > 5:
|
|
216
|
+
fields_str += f" ... (+{len(entity.fields) - 5} more)"
|
|
217
|
+
content += f"- **Fields:** {fields_str}\n"
|
|
218
|
+
|
|
219
|
+
if entity.methods:
|
|
220
|
+
methods_str = ", ".join(entity.methods[:3])
|
|
221
|
+
if len(entity.methods) > 3:
|
|
222
|
+
methods_str += f" ... (+{len(entity.methods) - 3} more)"
|
|
223
|
+
content += f"- **Methods:** {methods_str}\n"
|
|
224
|
+
|
|
225
|
+
content += "\n"
|
|
226
|
+
|
|
227
|
+
return content
|
|
228
|
+
|
|
229
|
+
def _generate_risks(self) -> str:
|
|
230
|
+
"""Generate risks and technical debt section."""
|
|
231
|
+
risks = []
|
|
232
|
+
|
|
233
|
+
if self.analysis.complexity_score > 70:
|
|
234
|
+
risks.append("High complexity score indicates potential maintainability issues")
|
|
235
|
+
|
|
236
|
+
if len(self.analysis.entry_points) == 0:
|
|
237
|
+
risks.append("No clear entry points detected - may indicate unclear application structure")
|
|
238
|
+
|
|
239
|
+
if len(self.analysis.execution_flows) < 2:
|
|
240
|
+
risks.append("Limited execution flows detected - may indicate incomplete analysis or simple codebase")
|
|
241
|
+
|
|
242
|
+
content = "## ⚠️ Known Issues\n\n"
|
|
243
|
+
|
|
244
|
+
if risks:
|
|
245
|
+
for risk in risks:
|
|
246
|
+
content += f"- {risk}\n"
|
|
247
|
+
else:
|
|
248
|
+
content += "No significant risks detected in the current analysis.\n"
|
|
249
|
+
|
|
250
|
+
return content + "\n"
|
|
251
|
+
|
|
252
|
+
def _generate_recommendations(self) -> str:
|
|
253
|
+
"""Generate recommendations section."""
|
|
254
|
+
recommendations = []
|
|
255
|
+
|
|
256
|
+
if self.analysis.complexity_score > 50:
|
|
257
|
+
recommendations.append("Consider refactoring complex functions to improve maintainability")
|
|
258
|
+
|
|
259
|
+
if len(self.analysis.domain_entities) > 0:
|
|
260
|
+
recommendations.append("Document domain entities and their relationships for better understanding")
|
|
261
|
+
|
|
262
|
+
if len(self.analysis.execution_flows) > 0:
|
|
263
|
+
recommendations.append("Create sequence diagrams for critical execution flows")
|
|
264
|
+
|
|
265
|
+
recommendations.append("Add comprehensive unit tests for core business logic")
|
|
266
|
+
recommendations.append("Consider implementing code documentation standards")
|
|
267
|
+
|
|
268
|
+
content = "## 💡 Recommendations\n\n"
|
|
269
|
+
|
|
270
|
+
for i, rec in enumerate(recommendations, 1):
|
|
271
|
+
content += f"{i}. {rec}\n"
|
|
272
|
+
|
|
273
|
+
return content
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""README.md exporter for target projects."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
from ..models import CodebaseAnalysis
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ReadmeExporter:
|
|
10
|
+
"""Exports project README.md based on analysis results."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, analysis: CodebaseAnalysis):
|
|
13
|
+
self.analysis = analysis
|
|
14
|
+
|
|
15
|
+
def export(self, output_path: Path) -> None:
|
|
16
|
+
"""Export project README.md file."""
|
|
17
|
+
readme_content = self._generate_readme()
|
|
18
|
+
output_path.write_text(readme_content, encoding='utf-8')
|
|
19
|
+
|
|
20
|
+
def _generate_readme(self) -> str:
|
|
21
|
+
"""Generate complete project README.md."""
|
|
22
|
+
return f"""# Project Overview
|
|
23
|
+
|
|
24
|
+
{self._generate_project_description()}
|
|
25
|
+
|
|
26
|
+
## Architecture
|
|
27
|
+
|
|
28
|
+
{self._generate_architecture_description()}
|
|
29
|
+
|
|
30
|
+
## Execution Flow
|
|
31
|
+
|
|
32
|
+
{self._generate_execution_flow_description()}
|
|
33
|
+
|
|
34
|
+
## Core Components
|
|
35
|
+
|
|
36
|
+
{self._generate_core_components()}
|
|
37
|
+
|
|
38
|
+
## Project Structure
|
|
39
|
+
|
|
40
|
+
{self._generate_project_structure()}
|
|
41
|
+
|
|
42
|
+
## How To Run
|
|
43
|
+
|
|
44
|
+
{self._generate_run_instructions()}
|
|
45
|
+
|
|
46
|
+
## Generated Artifacts
|
|
47
|
+
|
|
48
|
+
{self._generate_artifacts_description()}
|
|
49
|
+
|
|
50
|
+
## Development Notes
|
|
51
|
+
|
|
52
|
+
{self._generate_development_notes()}
|
|
53
|
+
|
|
54
|
+
## Future Improvements
|
|
55
|
+
|
|
56
|
+
{self._generate_future_improvements()}
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def _generate_project_description(self) -> str:
|
|
60
|
+
"""Generate high-level project description based on domain entities and structure."""
|
|
61
|
+
# Infer project type from domain entities and structure
|
|
62
|
+
entity_names = [entity.name.lower() for entity in self.analysis.domain_entities]
|
|
63
|
+
|
|
64
|
+
# Check for common patterns
|
|
65
|
+
if any(name in entity_names for name in ['user', 'payment', 'wallet', 'account']):
|
|
66
|
+
project_type = "financial services application"
|
|
67
|
+
description = "that provides user management, payment processing, and digital wallet functionality"
|
|
68
|
+
elif any(name in entity_names for name in ['product', 'order', 'cart', 'inventory']):
|
|
69
|
+
project_type = "e-commerce application"
|
|
70
|
+
description = "that handles product catalog, order management, and inventory tracking"
|
|
71
|
+
elif any(name in entity_names for name in ['post', 'comment', 'user', 'article']):
|
|
72
|
+
project_type = "content management system"
|
|
73
|
+
description = "that manages articles, user interactions, and content publishing"
|
|
74
|
+
elif any(name in entity_names for name in ['task', 'project', 'user', 'team']):
|
|
75
|
+
project_type = "project management application"
|
|
76
|
+
description = "that handles task tracking, project coordination, and team collaboration"
|
|
77
|
+
else:
|
|
78
|
+
# Generic description based on structure
|
|
79
|
+
if len(self.analysis.domain_entities) > 0:
|
|
80
|
+
project_type = "business application"
|
|
81
|
+
description = f"built with {len(self.analysis.domain_entities)} core domain entities and service-oriented architecture"
|
|
82
|
+
else:
|
|
83
|
+
project_type = "software application"
|
|
84
|
+
description = f"containing {len(self.analysis.symbols)} components across {len(self.analysis.languages)} programming languages"
|
|
85
|
+
|
|
86
|
+
return f"This is a {project_type} {description}. The system is built with a {'service-oriented' if self._has_service_pattern() else 'modular'} architecture using {self._get_primary_language()} {'dataclasses for domain modeling and separate service layers for business logic' if self._has_service_pattern() else 'for implementation'}."
|
|
87
|
+
|
|
88
|
+
def _generate_architecture_description(self) -> str:
|
|
89
|
+
"""Generate architecture description based on actual code structure."""
|
|
90
|
+
# Analyze file structure to determine architecture
|
|
91
|
+
files_by_name = {f.name: f for f in self.analysis.entry_points}
|
|
92
|
+
has_models = any('model' in str(f).lower() for f in files_by_name)
|
|
93
|
+
has_services = any('service' in str(f).lower() for f in files_by_name)
|
|
94
|
+
has_controllers = any('controller' in str(f).lower() or 'view' in str(f).lower() for f in files_by_name)
|
|
95
|
+
|
|
96
|
+
if has_models and has_services:
|
|
97
|
+
return """The application follows a layered architecture with clear separation of concerns:
|
|
98
|
+
|
|
99
|
+
- **Domain Layer**: Contains core business entities with their associated behaviors
|
|
100
|
+
- **Service Layer**: Implements business logic through dedicated service classes
|
|
101
|
+
- **Application Layer**: Handles application bootstrapping, configuration, and orchestration
|
|
102
|
+
|
|
103
|
+
The system uses dependency injection patterns where components are coordinated to provide a cohesive platform."""
|
|
104
|
+
else:
|
|
105
|
+
return f"""The application follows a modular architecture with {len(self.analysis.symbols)} defined symbols and {len(self.analysis.call_relations)} call relationships.
|
|
106
|
+
|
|
107
|
+
Key architectural elements:
|
|
108
|
+
- **Functions:** {len([s for s in self.analysis.symbols if s.type == 'function'])}
|
|
109
|
+
- **Classes:** {len([s for s in self.analysis.symbols if s.type == 'class'])}
|
|
110
|
+
- **Methods:** {len([s for s in self.analysis.symbols if s.type == 'method'])}"""
|
|
111
|
+
|
|
112
|
+
def _generate_execution_flow_description(self) -> str:
|
|
113
|
+
"""Generate execution flow description based on detected flows."""
|
|
114
|
+
if not self.analysis.execution_flows:
|
|
115
|
+
return "The application execution flow has not been fully analyzed. Please refer to the main entry points for startup sequence."
|
|
116
|
+
|
|
117
|
+
main_flow = None
|
|
118
|
+
for flow in self.analysis.execution_flows:
|
|
119
|
+
if 'main' in flow.name.lower() or flow.entry_point == 'main':
|
|
120
|
+
main_flow = flow
|
|
121
|
+
break
|
|
122
|
+
|
|
123
|
+
if main_flow:
|
|
124
|
+
steps_description = " → ".join(main_flow.steps[:4])
|
|
125
|
+
if len(main_flow.steps) > 4:
|
|
126
|
+
steps_description += " → ..."
|
|
127
|
+
|
|
128
|
+
return f"""The application starts through the `{main_flow.entry_point}()` function which follows this sequence:
|
|
129
|
+
|
|
130
|
+
{steps_description}
|
|
131
|
+
|
|
132
|
+
The runtime execution involves {len(self.analysis.execution_flows)} major flows including startup, core business operations, and data processing."""
|
|
133
|
+
else:
|
|
134
|
+
return f"The application implements {len(self.analysis.execution_flows)} execution flows for different operational scenarios. The main entry points handle initialization, business logic execution, and system coordination."
|
|
135
|
+
|
|
136
|
+
def _generate_core_components(self) -> str:
|
|
137
|
+
"""Generate core components description."""
|
|
138
|
+
content = ""
|
|
139
|
+
|
|
140
|
+
# Domain Models
|
|
141
|
+
domain_entities = self.analysis.domain_entities
|
|
142
|
+
if domain_entities:
|
|
143
|
+
content += "### Domain Models\n\n"
|
|
144
|
+
for entity in domain_entities[:5]: # Limit to top 5
|
|
145
|
+
content += f"- **{entity.name}**: "
|
|
146
|
+
if entity.fields:
|
|
147
|
+
content += f"Manages {', '.join(entity.fields[:3])}{'...' if len(entity.fields) > 3 else ''}"
|
|
148
|
+
if entity.methods:
|
|
149
|
+
content += f"\n - Methods: {', '.join([f'`{m}()`' for m in entity.methods[:3]])}"
|
|
150
|
+
content += "\n\n"
|
|
151
|
+
|
|
152
|
+
# Service Classes (if detected)
|
|
153
|
+
service_classes = [s for s in self.analysis.symbols if s.type == 'class' and 'service' in s.name.lower()]
|
|
154
|
+
if service_classes:
|
|
155
|
+
content += "### Service Classes\n\n"
|
|
156
|
+
for service in service_classes[:5]:
|
|
157
|
+
content += f"- **{service.name}**: "
|
|
158
|
+
# Try to infer responsibility from name
|
|
159
|
+
if 'user' in service.name.lower():
|
|
160
|
+
content += "User lifecycle management, authentication, and CRUD operations"
|
|
161
|
+
elif 'payment' in service.name.lower():
|
|
162
|
+
content += "Payment creation, processing, and retrieval"
|
|
163
|
+
elif 'wallet' in service.name.lower():
|
|
164
|
+
content += "Wallet management and fund transfers"
|
|
165
|
+
else:
|
|
166
|
+
content += f"Business logic implementation for {service.name.replace('Service', '').lower()} operations"
|
|
167
|
+
content += "\n"
|
|
168
|
+
|
|
169
|
+
return content if content else "Core components are organized as functions and classes providing the main application functionality."
|
|
170
|
+
|
|
171
|
+
def _generate_project_structure(self) -> str:
|
|
172
|
+
"""Generate project structure description."""
|
|
173
|
+
# Get main files
|
|
174
|
+
main_files = []
|
|
175
|
+
for entry_point in self.analysis.entry_points:
|
|
176
|
+
rel_path = entry_point.relative_to(self.analysis.root_path)
|
|
177
|
+
main_files.append(str(rel_path))
|
|
178
|
+
|
|
179
|
+
# Add other important files based on symbols
|
|
180
|
+
important_files = set()
|
|
181
|
+
for symbol in self.analysis.symbols:
|
|
182
|
+
rel_path = symbol.file_path.relative_to(self.analysis.root_path)
|
|
183
|
+
important_files.add(str(rel_path))
|
|
184
|
+
|
|
185
|
+
structure = f"```\n{self.analysis.root_path.name}/\n"
|
|
186
|
+
for file_path in sorted(list(important_files)[:8]): # Limit to 8 files
|
|
187
|
+
file_name = Path(file_path).name
|
|
188
|
+
if 'main' in file_name.lower():
|
|
189
|
+
structure += f"├── {file_name} # Application entry point and configuration\n"
|
|
190
|
+
elif 'model' in file_name.lower():
|
|
191
|
+
structure += f"├── {file_name} # Domain entities and business objects\n"
|
|
192
|
+
elif 'service' in file_name.lower():
|
|
193
|
+
structure += f"├── {file_name} # Business logic and service implementations\n"
|
|
194
|
+
elif file_name == '__init__.py':
|
|
195
|
+
structure += f"└── {file_name} # Package initialization\n"
|
|
196
|
+
else:
|
|
197
|
+
structure += f"├── {file_name}\n"
|
|
198
|
+
structure += "```"
|
|
199
|
+
|
|
200
|
+
return structure
|
|
201
|
+
|
|
202
|
+
def _generate_run_instructions(self) -> str:
|
|
203
|
+
"""Generate run instructions based on entry points."""
|
|
204
|
+
if self.analysis.entry_points:
|
|
205
|
+
main_entry = self.analysis.entry_points[0]
|
|
206
|
+
rel_path = main_entry.relative_to(self.analysis.root_path)
|
|
207
|
+
|
|
208
|
+
if str(rel_path) == 'main.py':
|
|
209
|
+
return """Execute the application using:
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
python main.py
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
The application will initialize the configuration, set up database connections, and start the web server."""
|
|
216
|
+
else:
|
|
217
|
+
return f"""Execute the application using:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
python {rel_path}
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
This will start the main application process."""
|
|
224
|
+
else:
|
|
225
|
+
return "Please refer to the project documentation for specific run instructions."
|
|
226
|
+
|
|
227
|
+
def _generate_artifacts_description(self) -> str:
|
|
228
|
+
"""Generate description of analysis artifacts."""
|
|
229
|
+
return """The following analysis artifacts provide insights into the codebase structure:
|
|
230
|
+
|
|
231
|
+
- **callgraph.html**: Interactive visualization showing function call relationships and execution flow from main entry points through service layers
|
|
232
|
+
- **report.html**: Comprehensive codebase analysis including metrics, complexity scores, and architectural overview
|
|
233
|
+
- **architecture.md**: Detailed breakdown of system components, domain entities, and execution flows
|
|
234
|
+
- **flows.md**: Documentation of identified execution paths including startup flow and business operations
|
|
235
|
+
- **ai-context.md**: Semantic understanding of the codebase optimized for AI-assisted development and maintenance"""
|
|
236
|
+
|
|
237
|
+
def _generate_development_notes(self) -> str:
|
|
238
|
+
"""Generate development notes based on code patterns."""
|
|
239
|
+
notes = []
|
|
240
|
+
|
|
241
|
+
# Check for common patterns
|
|
242
|
+
if self._has_service_pattern():
|
|
243
|
+
notes.append("**Service Layer Pattern**: Business logic is encapsulated in dedicated service classes rather than being embedded in domain models")
|
|
244
|
+
|
|
245
|
+
if self._has_dependency_injection():
|
|
246
|
+
notes.append("**Dependency Injection**: Services are injected into components for better testability and modularity")
|
|
247
|
+
|
|
248
|
+
if self._uses_dataclasses():
|
|
249
|
+
notes.append("**Domain-Driven Design**: Core business concepts are modeled as first-class entities with their own behaviors")
|
|
250
|
+
|
|
251
|
+
# Add language-specific notes
|
|
252
|
+
if 'Python' in self.analysis.languages:
|
|
253
|
+
notes.append("The system uses Python's `dataclass` decorator for clean domain modeling and leverages appropriate types for data handling")
|
|
254
|
+
|
|
255
|
+
# Add architectural notes
|
|
256
|
+
if len(self.analysis.execution_flows) > 2:
|
|
257
|
+
notes.append(f"Key data flows include: {', '.join([flow.name.replace('_', ' ') for flow in self.analysis.execution_flows[:3]])}")
|
|
258
|
+
|
|
259
|
+
content = "The application implements several key design patterns:\n\n"
|
|
260
|
+
for note in notes:
|
|
261
|
+
content += f"- {note}\n"
|
|
262
|
+
|
|
263
|
+
return content
|
|
264
|
+
|
|
265
|
+
def _generate_future_improvements(self) -> str:
|
|
266
|
+
"""Generate realistic future improvements."""
|
|
267
|
+
improvements = []
|
|
268
|
+
|
|
269
|
+
# Based on domain entities
|
|
270
|
+
if any('payment' in entity.name.lower() for entity in self.analysis.domain_entities):
|
|
271
|
+
improvements.append("**Event-Driven Architecture**: Implement domain events for payment processing and wallet transactions to enable better audit trails and integration with external systems")
|
|
272
|
+
|
|
273
|
+
# Based on architecture
|
|
274
|
+
if self._has_service_pattern():
|
|
275
|
+
improvements.append("**API Layer**: Add REST API endpoints with proper authentication and authorization to expose the service functionality to external clients")
|
|
276
|
+
|
|
277
|
+
# Generic improvements
|
|
278
|
+
improvements.append("**Persistent Storage**: Integrate with a proper database system (PostgreSQL/MySQL) replacing any in-memory data structures with persistent storage and transaction support")
|
|
279
|
+
|
|
280
|
+
content = ""
|
|
281
|
+
for i, improvement in enumerate(improvements, 1):
|
|
282
|
+
content += f"{i}. {improvement}\n\n"
|
|
283
|
+
|
|
284
|
+
return content.rstrip()
|
|
285
|
+
|
|
286
|
+
def _has_service_pattern(self) -> bool:
|
|
287
|
+
"""Check if the codebase uses service pattern."""
|
|
288
|
+
return any('service' in symbol.name.lower() for symbol in self.analysis.symbols if symbol.type == 'class')
|
|
289
|
+
|
|
290
|
+
def _has_dependency_injection(self) -> bool:
|
|
291
|
+
"""Check if the codebase uses dependency injection."""
|
|
292
|
+
# Look for constructor parameters that are classes
|
|
293
|
+
for symbol in self.analysis.symbols:
|
|
294
|
+
if symbol.type == 'method' and symbol.name.endswith('.__init__') and len(symbol.parameters) > 2:
|
|
295
|
+
return True
|
|
296
|
+
return False
|
|
297
|
+
|
|
298
|
+
def _uses_dataclasses(self) -> bool:
|
|
299
|
+
"""Check if the codebase uses dataclasses."""
|
|
300
|
+
return any('dataclass' in symbol.decorators for symbol in self.analysis.symbols if symbol.decorators)
|
|
301
|
+
|
|
302
|
+
def _get_primary_language(self) -> str:
|
|
303
|
+
"""Get the primary programming language."""
|
|
304
|
+
if self.analysis.languages:
|
|
305
|
+
return list(self.analysis.languages)[0]
|
|
306
|
+
return "Python"
|