codegraph-gen 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,311 @@
1
+ from pathlib import Path
2
+ import click
3
+ from rich.console import Console
4
+ from rich.table import Table
5
+ from rich.progress import (
6
+ Progress,
7
+ SpinnerColumn,
8
+ TextColumn,
9
+ BarColumn,
10
+ MofNCompleteColumn,
11
+ )
12
+
13
+ from codegraph_gen.config import CodegraphConfig, DEFAULT_EXCLUSIONS
14
+
15
+ console = Console()
16
+
17
+
18
+ @click.group()
19
+ def cli():
20
+ """codegraph - Build a Markdown knowledge graph of your codebase for AI analysis."""
21
+ pass
22
+
23
+
24
+ @cli.command()
25
+ @click.argument(
26
+ "src_dir",
27
+ type=click.Path(exists=True, file_okay=False, path_type=Path),
28
+ default=".",
29
+ )
30
+ @click.option(
31
+ "--output",
32
+ "-o",
33
+ type=click.Path(path_type=Path),
34
+ default=Path(".codegraph"),
35
+ help="Directory where the Markdown vault will be written.",
36
+ )
37
+ @click.option(
38
+ "--exclude",
39
+ "-e",
40
+ multiple=True,
41
+ type=str,
42
+ help="Additional folder names/patterns to exclude from scanning.",
43
+ )
44
+ @click.option(
45
+ "--parallel/--no-parallel",
46
+ default=True,
47
+ help="Enable/disable parallel parsing (using multiprocessing).",
48
+ )
49
+ @click.option(
50
+ "--workers",
51
+ "-w",
52
+ type=int,
53
+ default=None,
54
+ help="Number of worker processes to use for parallel parsing.",
55
+ )
56
+ @click.option(
57
+ "--cache/--no-cache",
58
+ default=True,
59
+ help="Enable/disable incremental parsing cache.",
60
+ )
61
+ def build(
62
+ src_dir: Path,
63
+ output: Path,
64
+ exclude: list[str],
65
+ parallel: bool,
66
+ workers: int | None,
67
+ cache: bool,
68
+ ):
69
+ """Parses the codebase in SRC_DIR and exports the Markdown graph vault."""
70
+ console.print("[bold blue]Starting codegraph analysis...[/bold blue]")
71
+
72
+ # 1. Prepare configuration
73
+ exclusions = set(DEFAULT_EXCLUSIONS)
74
+ if exclude:
75
+ exclusions.update(exclude)
76
+
77
+ import os
78
+
79
+ if not parallel:
80
+ max_workers = 1
81
+ elif workers is not None:
82
+ max_workers = workers
83
+ else:
84
+ max_workers = os.cpu_count() or 4
85
+
86
+ config = CodegraphConfig(
87
+ workspace_dir=src_dir.resolve(),
88
+ output_dir=output.resolve(),
89
+ exclusions=exclusions,
90
+ max_workers=max_workers,
91
+ use_cache=cache,
92
+ )
93
+
94
+ from codegraph_gen.engine import CodegraphEngine, PipelineStage
95
+
96
+ engine = CodegraphEngine(config)
97
+
98
+ # Run pipeline with click progress bar
99
+ with Progress(
100
+ SpinnerColumn(),
101
+ TextColumn("[progress.description]{task.description}"),
102
+ BarColumn(),
103
+ MofNCompleteColumn(),
104
+ console=console,
105
+ ) as progress:
106
+ task = progress.add_task("Initializing...", total=None)
107
+
108
+ def progress_callback(stage: PipelineStage, current_item, idx, total):
109
+ if stage == PipelineStage.DISCOVERING:
110
+ progress.update(task, description="Discovering source files...")
111
+ elif stage == PipelineStage.PARSING:
112
+ if total > 0:
113
+ progress.update(task, total=total)
114
+ progress.update(
115
+ task,
116
+ description=f"Parsing {current_item.name if current_item else ''}",
117
+ completed=idx,
118
+ )
119
+ elif stage == PipelineStage.BUILDING:
120
+ progress.update(task, description="Building reference graph...")
121
+ elif stage == PipelineStage.CLUSTERING:
122
+ progress.update(task, description="Clustering components...")
123
+ elif stage == PipelineStage.ANALYZING:
124
+ progress.update(task, description="Analyzing graph metrics...")
125
+ elif stage == PipelineStage.RENDERING:
126
+ progress.update(task, description="Rendering Markdown vault...")
127
+ elif stage == PipelineStage.WRITING:
128
+ progress.update(task, description="Writing files to disk...")
129
+ elif stage == PipelineStage.COMPLETED:
130
+ progress.update(task, description="Done!")
131
+
132
+ result = engine.run_pipeline(progress_callback=progress_callback)
133
+
134
+ G = result.graph
135
+ if G.number_of_nodes() == 0:
136
+ console.print("[bold yellow]Completed build, but graph is empty.[/bold yellow]")
137
+ return
138
+
139
+ files_count = len(result.files)
140
+ symbols_count = G.number_of_nodes() - files_count
141
+
142
+ console.print(f"Found [green]{files_count}[/green] supported files to analyze.")
143
+ console.print(
144
+ f"Assembled graph with [green]{G.number_of_nodes()}[/green] nodes and [green]{G.number_of_edges()}[/green] edges."
145
+ )
146
+ console.print(f" - Files: {files_count}")
147
+ console.print(f" - Symbols (Classes/Functions/Methods): {symbols_count}")
148
+
149
+ console.print(
150
+ "[bold green]Success! Codebase knowledge graph built successfully.[/bold green]"
151
+ )
152
+
153
+ table = Table(title="Logical Components Summary")
154
+ table.add_column("Component Name", style="cyan", no_wrap=True)
155
+ table.add_column("Cohesion (Density)", style="magenta")
156
+ table.add_column("Size (Nodes)", style="green")
157
+
158
+ for cid, members in result.components.items():
159
+ table.add_row(
160
+ result.component_names[cid],
161
+ str(result.cohesion_scores[cid]),
162
+ str(len(members)),
163
+ )
164
+
165
+ console.print(table)
166
+ console.print(
167
+ f"\nView the main graph entrypoint at: [bold underline]{config.absolute_output_dir}/README.md[/bold underline]"
168
+ )
169
+ console.print(
170
+ f"💡 [bold yellow]AI Insight Tip:[/bold yellow] Ask your AI Agent (e.g. Antigravity, Claude Code, Codex) to read [bold]{config.absolute_output_dir}/AGENT_PROMPT.md[/bold] and write the architectural report directly to [bold]{config.absolute_output_dir}/README.md[/bold].\n"
171
+ )
172
+
173
+
174
+ @cli.command()
175
+ @click.option(
176
+ "--platform",
177
+ "-p",
178
+ default="codex",
179
+ type=click.Choice(["codex", "antigravity"]),
180
+ help="The AI agent platform to integrate with.",
181
+ )
182
+ def install(platform: str):
183
+ """Installs the codegraph slash command into your AI Agent's global config."""
184
+ console.print(
185
+ f"[bold blue]Installing codegraph integration for {platform}...[/bold blue]"
186
+ )
187
+
188
+ # 1. Resolve skills directory based on target platform
189
+ if platform == "codex":
190
+ skills_dir = Path.home() / ".codex" / "skills" / "codegraph"
191
+ elif platform == "antigravity":
192
+ skills_dir = Path.home() / ".gemini" / "config" / "skills" / "codegraph"
193
+ else:
194
+ skills_dir = Path.home() / ".codex" / "skills" / "codegraph"
195
+
196
+ # 2. Skill file content
197
+ skill_content = """---
198
+ name: codegraph
199
+ description: "Build a Markdown codebase knowledge graph using codegraph, perform logical component clustering, analyze god nodes/circular dependencies, and write deep architectural insights to .codegraph/README.md."
200
+ trigger: /codegraph
201
+ ---
202
+
203
+ # /codegraph
204
+
205
+ Build a codebase knowledge graph using `codegraph` for any folder, cluster symbols into logical components, detect god nodes and cycles, and perform a deep architectural analysis to write insights directly to the `.codegraph/README.md` vault.
206
+
207
+ ## Usage
208
+
209
+ ```
210
+ /codegraph # Run the full build & AI analysis pipeline on the current directory
211
+ /codegraph <path> # Run the pipeline on a specific subfolder/path
212
+ /codegraph --exclude <pattern> # Build and exclude specific folders/patterns
213
+ ```
214
+
215
+ ## What You Must Do When Invoked
216
+
217
+ If the user invoked `/codegraph` with no path, do not ask the user for a path. Instead of scanning the entire project root directory `.` (which may include non-essential scripts, docs, or huge subfolders), you MUST prioritize targeting the primary source directory (e.g. `src/`, `lib/`, `app/`) and test directory (e.g. `tests/`, `test/`).
218
+ - If specific source or test folders are found, run the build targeting those folders, or build the root `.` but exclude other non-code/non-test directories (e.g., `docs/`, `scripts/`, `examples/`) using the `--exclude` flag to keep the graph focused on code and tests.
219
+ - Otherwise, default to `.` (current directory).
220
+
221
+ Follow these steps in order. Do not skip any steps.
222
+
223
+ ### Step 1 - Ensure codegraph is installed
224
+
225
+ Check and locate the `codegraph` executable. To support virtual environments, resolve the binary in the following priority order:
226
+ 1. Local virtual environment: `.venv/bin/codegraph` or `venv/bin/codegraph`
227
+ 2. Global command: `codegraph` (installed globally or via uv tool)
228
+
229
+ You can use this shell logic to resolve the executable:
230
+ ```bash
231
+ if [ -f ".venv/bin/codegraph" ]; then
232
+ CODEGRAPH_BIN=".venv/bin/codegraph"
233
+ elif [ -f "venv/bin/codegraph" ]; then
234
+ CODEGRAPH_BIN="venv/bin/codegraph"
235
+ else
236
+ if ! command -v codegraph >/dev/null 2>&1; then
237
+ uv tool install codegraph
238
+ fi
239
+ CODEGRAPH_BIN="codegraph"
240
+ fi
241
+ echo "Using codegraph binary: $CODEGRAPH_BIN"
242
+ ```
243
+
244
+ ### Step 2 - Build the Knowledge Graph
245
+
246
+ Run the resolved `$CODEGRAPH_BIN` on the specified directory:
247
+ ```bash
248
+ $CODEGRAPH_BIN build INPUT_PATH
249
+ # Or with additional exclude arguments if provided by the user
250
+ ```
251
+ *(Replace `INPUT_PATH` with the resolved target path, e.g. `.`)*
252
+
253
+ If the command fails or errors out, capture the terminal stderr/logs, display them to the user with a helpful explanation, and ask them if they want to exclude specific directories or fix the errors. Do not fail silently.
254
+
255
+ ### Step 3 - Perform Deep Architectural Analysis
256
+
257
+ Once the graph is built successfully:
258
+ 1. Read the newly generated `<path>/.codegraph/AGENT_PROMPT.md` file using your file reading tools.
259
+ 2. Read the project statistics, communities, god nodes, and cycle warnings from it.
260
+ 3. Perform a deep, professional architectural review of the codebase (using **English** as the report language), combined with deep insight analysis of the code implementation of existing features.
261
+ 4. Focus your review on:
262
+ - **System Architecture Evaluation**: Explain the design patterns, modularity level, and alignment between physical directories and logical components in the codebase.
263
+ - **Core Abstractions & Boundary Evaluation**: Deeply analyze God Nodes to determine which ones are core support and which ones have excessive responsibilities (God Object / Fat Class) that may lead to high risk.
264
+ - **Potential Bottlenecks & Architectural Refactoring Recommendations**: Point out high-coupling risk points and negative impacts of circular dependencies, and provide specific, actionable refactoring optimization plans (e.g., decoupling, extracting interfaces, dependency inversion).
265
+ 5. Read the existing `<path>/.codegraph/README.md` first. If there's an existing `## AI Architectural Insights` section, merge your new findings with it rather than silently overwriting and discarding previous edits.
266
+ 6. Write the completed report into `<path>/.codegraph/README.md` under the `## AI Architectural Insights` section, replacing any placeholder instructions.
267
+
268
+ ### Step 4 - Present Summary to the User
269
+
270
+ Finally, reply to the user in English, summarizing:
271
+ - The graph statistics (number of files, symbols, edges).
272
+ - The logical component summary (with sizes and cohesion scores).
273
+ - A brief bulleted summary of your key architectural findings and recommendations.
274
+ - Clickable markdown links pointing to:
275
+ - The main entrypoint: `[README.md](file:///<absolute_path_to_vault>/README.md)`
276
+ - The agent guidelines: `[AGENTS.md](file:///<absolute_path_to_vault>/AGENTS.md)`
277
+ - The detailed components folder: `[components/](file:///<absolute_path_to_vault>/components/)`
278
+ """
279
+
280
+ try:
281
+ skills_dir.mkdir(parents=True, exist_ok=True)
282
+ skill_file = skills_dir / "SKILL.md"
283
+ skill_file.write_text(skill_content, encoding="utf-8")
284
+ console.print(
285
+ f"[bold green]Successfully installed /codegraph slash command to: [underline]{skill_file}[/underline][/bold green]"
286
+ )
287
+ except Exception as e:
288
+ console.print(f"[bold red]Failed to write skill configuration: {e}[/bold red]")
289
+
290
+
291
+ @cli.command()
292
+ def info():
293
+ """Prints tool info and supported languages."""
294
+ try:
295
+ from importlib.metadata import version
296
+
297
+ ver = version("codegraph")
298
+ except Exception:
299
+ ver = "0.2.0"
300
+ console.print(f"[bold]codegraph v{ver}[/bold]")
301
+ console.print(
302
+ "Supported languages: Python, JavaScript, TypeScript, Go, Rust, Swift"
303
+ )
304
+
305
+
306
+ def main():
307
+ cli()
308
+
309
+
310
+ if __name__ == "__main__":
311
+ main()
codegraph_gen/ai.py ADDED
@@ -0,0 +1,77 @@
1
+ import logging
2
+ import networkx as nx
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+ def build_agent_prompt(
8
+ G: nx.DiGraph,
9
+ components: dict[int, list[str]],
10
+ cohesion_scores: dict[int, float],
11
+ component_names: dict[int, str],
12
+ god_nodes: list[dict],
13
+ cycles: list[list[str]],
14
+ mermaid_graph: str,
15
+ ) -> str:
16
+ """
17
+ Constructs a detailed architectural prompt designed for external AI agents
18
+ (such as Antigravity, Claude Code, or Codex) to read and perform deep
19
+ architectural analysis.
20
+ """
21
+ logger.info("Generating agent analysis prompt based on graph structure...")
22
+
23
+ # Format metadata lists
24
+ files_count = sum(1 for _, d in G.nodes(data=True) if d.get("type") == "file")
25
+ symbols_count = G.number_of_nodes() - files_count
26
+
27
+ comp_list = []
28
+ for cid, members in components.items():
29
+ comp_list.append(
30
+ f"- {component_names[cid]}: cohesion={cohesion_scores[cid]:.4f}, members_count={len(members)}"
31
+ )
32
+ comp_str = "\n".join(comp_list)
33
+
34
+ god_list = []
35
+ for node in god_nodes:
36
+ sf = G.nodes[node["id"]].get("source_file", "")
37
+ god_list.append(
38
+ f"- {node['label']} ({node['type']}): degree={node['degree']}, file={sf}"
39
+ )
40
+ god_str = "\n".join(god_list)
41
+
42
+ cycle_list = []
43
+ for c in cycles:
44
+ cycle_list.append(" -> ".join(c + [c[0]]))
45
+ cycle_str = "\n".join(cycle_list) if cycle_list else "No circular dependencies"
46
+
47
+ prompt = f"""# Codebase Architecture Analysis Prompt
48
+
49
+ You are a senior software architecture expert. Based on the codebase knowledge graph metadata and relationships between major components provided below, write a profound "AI Architectural Insights Report" for this project (written in English).
50
+
51
+ [Codebase Graph Statistics]
52
+ - Number of physical files: {files_count}
53
+ - Number of symbols (classes/structs/functions/methods): {symbols_count}
54
+ - Total number of dependency and call edges: {G.number_of_edges()}
55
+
56
+ [Modularity Components]
57
+ {comp_str}
58
+
59
+ [Component Dependency Graph (Mermaid Flowchart)]
60
+ ```mermaid
61
+ {mermaid_graph}
62
+ ```
63
+
64
+ [God Nodes (degree represents the total number of connected symbols)]
65
+ {god_str}
66
+
67
+ [File-level Circular Import Dependencies (Import Cycles)]
68
+ {cycle_str}
69
+
70
+ Please provide deep architectural insights based on the codebase structure and component relationships, focusing on the following three aspects:
71
+ 1. **System Architecture Evaluation**: Explain the design patterns, modularity level, and alignment between physical directories and logical components in the codebase.
72
+ 2. **Core Abstractions & Boundary Evaluation**: Deeply analyze God Nodes to determine which ones are core support and which ones have excessive responsibilities (God Object / Fat Class) that may lead to high risk.
73
+ 3. **Potential Bottlenecks & Architectural Refactoring Recommendations**: Point out high-coupling risk points and negative impacts of circular dependencies, and provide specific, actionable refactoring optimization plans (e.g., decoupling, extracting interfaces, dependency inversion).
74
+
75
+ Please output in standard Markdown format, clear and professional, without code block wrapper markers like ```markdown and ``` at the beginning and end. Output the content directly.
76
+ """
77
+ return prompt
@@ -0,0 +1,100 @@
1
+ import logging
2
+ import networkx as nx
3
+ from pydantic import BaseModel
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class AnalysisResult(BaseModel):
9
+ god_nodes: list[dict]
10
+ cycles: list[list[str]]
11
+ inter_comp_deps: dict[int, dict[int, int]]
12
+
13
+
14
+ def find_god_nodes(G: nx.DiGraph, top_n: int = 10) -> list[dict]:
15
+ """
16
+ Identifies the most connected nodes (highest degree) in the graph.
17
+ """
18
+ degrees = dict(G.degree())
19
+ sorted_nodes = sorted(degrees.items(), key=lambda item: item[1], reverse=True)
20
+
21
+ god_nodes = []
22
+ for nid, deg in sorted_nodes[:top_n]:
23
+ node_data = G.nodes[nid]
24
+ god_nodes.append(
25
+ {
26
+ "id": nid,
27
+ "label": node_data.get("label", nid),
28
+ "type": node_data.get("type", "unknown"),
29
+ "degree": deg,
30
+ }
31
+ )
32
+
33
+ return god_nodes
34
+
35
+
36
+ def find_import_cycles(G: nx.DiGraph) -> list[list[str]]:
37
+ """
38
+ Detects circular imports at the file level in the graph.
39
+ """
40
+ # Create a subgraph of only file nodes and import edges
41
+ file_nodes = [n for n, d in G.nodes(data=True) if d.get("type") == "file"]
42
+
43
+ file_subgraph = G.subgraph(file_nodes).copy()
44
+
45
+ # Keep only 'imports' edges
46
+ non_import_edges = [
47
+ (u, v)
48
+ for u, v, d in file_subgraph.edges(data=True)
49
+ if d.get("relation") != "imports"
50
+ ]
51
+ file_subgraph.remove_edges_from(non_import_edges)
52
+
53
+ # Run cycle detection
54
+ try:
55
+ cycles = list(nx.simple_cycles(file_subgraph))
56
+ # Sort by length
57
+ cycles.sort(key=len)
58
+ return cycles
59
+ except Exception as e:
60
+ logger.error(f"Error finding import cycles: {e}")
61
+ return []
62
+
63
+
64
+ def calculate_inter_component_dependencies(
65
+ G: nx.DiGraph, components: dict[int, list[str]]
66
+ ) -> dict[int, dict[int, int]]:
67
+ """
68
+ Computes dependencies between different components.
69
+ Returns:
70
+ dict mapping source_component_id -> { target_component_id -> connection_count }
71
+ """
72
+ inter_comp_deps = {cid: {} for cid in components}
73
+
74
+ # Map member to component for O(1) lookups
75
+ member_to_comp = {}
76
+ for cid, members in components.items():
77
+ for member in members:
78
+ member_to_comp[member] = cid
79
+
80
+ for u, v in G.edges():
81
+ u_comp = member_to_comp.get(u)
82
+ v_comp = member_to_comp.get(v)
83
+ if u_comp and v_comp and u_comp != v_comp:
84
+ inter_comp_deps[u_comp][v_comp] = inter_comp_deps[u_comp].get(v_comp, 0) + 1
85
+
86
+ return inter_comp_deps
87
+
88
+
89
+ def analyze_graph(G: nx.DiGraph, components: dict[int, list[str]]) -> AnalysisResult:
90
+ """
91
+ Runs full architectural metric analysis on the graph.
92
+ """
93
+ logger.info("Analyzing codebase graph metrics...")
94
+ god_nodes = find_god_nodes(G, 10)
95
+ cycles = find_import_cycles(G)
96
+ inter_comp_deps = calculate_inter_component_dependencies(G, components)
97
+
98
+ return AnalysisResult(
99
+ god_nodes=god_nodes, cycles=cycles, inter_comp_deps=inter_comp_deps
100
+ )