codegraph-gen 0.2.0__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/PKG-INFO +26 -28
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/README.md +23 -26
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/pyproject.toml +3 -3
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/__main__.py +5 -5
- codegraph_gen-1.1.0/src/codegraph_gen/builder.py +27 -0
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/config.py +1 -1
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/detect.py +9 -5
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/engine.py +23 -20
- codegraph_gen-1.1.0/src/codegraph_gen/parser/__init__.py +31 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/base.py +154 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/cpp.py +335 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/go.py +259 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/javascript.py +345 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/kotlin.py +351 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/python.py +360 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/rust.py +450 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/swift.py +306 -0
- codegraph_gen-1.1.0/src/codegraph_gen/resolver.py +650 -0
- codegraph_gen-1.1.0/src/codegraph_gen/resolver_strategy.py +411 -0
- codegraph_gen-0.2.0/src/codegraph_gen/parser/base.py → codegraph_gen-1.1.0/src/codegraph_gen/schema.py +15 -9
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/writer.py +3 -3
- codegraph_gen-0.2.0/src/codegraph_gen/builder.py +0 -747
- codegraph_gen-0.2.0/src/codegraph_gen/parser/__init__.py +0 -27
- codegraph_gen-0.2.0/src/codegraph_gen/parser/cpp.py +0 -349
- codegraph_gen-0.2.0/src/codegraph_gen/parser/go.py +0 -268
- codegraph_gen-0.2.0/src/codegraph_gen/parser/javascript.py +0 -370
- codegraph_gen-0.2.0/src/codegraph_gen/parser/kotlin.py +0 -387
- codegraph_gen-0.2.0/src/codegraph_gen/parser/python.py +0 -415
- codegraph_gen-0.2.0/src/codegraph_gen/parser/rust.py +0 -497
- codegraph_gen-0.2.0/src/codegraph_gen/parser/swift.py +0 -327
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/__init__.py +0 -0
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/ai.py +0 -0
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/analyzer.py +0 -0
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/cluster.py +0 -0
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/py.typed +0 -0
- {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/renderer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: codegraph-gen
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: AST-based codebase knowledge graph generator in Markdown
|
|
5
5
|
Keywords: knowledge-graph,ast,codebase,markdown,tree-sitter,visualization,static-analysis,ai-agent,obsidian
|
|
6
6
|
Author: twn39
|
|
@@ -10,6 +10,7 @@ Classifier: Development Status :: 4 - Beta
|
|
|
10
10
|
Classifier: Intended Audience :: Developers
|
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
15
|
Classifier: Topic :: Software Development :: Code Generators
|
|
15
16
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -27,23 +28,30 @@ Requires-Dist: pydantic>=2.0.0
|
|
|
27
28
|
Requires-Dist: tree-sitter-c>=0.24.2
|
|
28
29
|
Requires-Dist: tree-sitter-cpp>=0.23.4
|
|
29
30
|
Requires-Dist: tree-sitter-kotlin>=1.1.0
|
|
30
|
-
Requires-Python: >=3.
|
|
31
|
+
Requires-Python: >=3.11
|
|
31
32
|
Project-URL: Homepage, https://github.com/twn39/codegraph
|
|
32
33
|
Project-URL: Repository, https://github.com/twn39/codegraph
|
|
33
34
|
Project-URL: Issues, https://github.com/twn39/codegraph/issues
|
|
34
35
|
Description-Content-Type: text/markdown
|
|
35
36
|
|
|
36
|
-
|
|
37
|
+
<h1 align="center">codegraph-gen</h1>
|
|
37
38
|
|
|
38
|
-
|
|
39
|
+
<p align="center">
|
|
40
|
+
<a href="https://pypi.org/project/codegraph-gen/"><img src="https://img.shields.io/pypi/v/codegraph-gen.svg" alt="PyPI Version"></a>
|
|
41
|
+
<img src="https://img.shields.io/badge/python-3.11+-blue.svg" alt="Python Version">
|
|
42
|
+
<img src="https://img.shields.io/badge/code%20style-ruff-26b22f.svg" alt="Ruff">
|
|
43
|
+
<img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License">
|
|
44
|
+
</p>
|
|
39
45
|
|
|
40
|
-
|
|
46
|
+
`codegraph-gen` 是一个面向 AI Agent(如 Antigravity、Codex、Claude Code 等)的静态代码知识图谱生成工具。它能够静态解析多语言 codebase,通过社区发现算法自动进行组件聚类,并导出为由标准 Markdown 文件组成的关联图谱库(Obsidian-like vault),极大地辅助 AI Agent 在本地进行精准的架构理解、逻辑导航与深度洞察分析。
|
|
47
|
+
|
|
48
|
+
与基于图形化 Canvas 渲染的知识图谱不同,`codegraph-gen` 采用全 Markdown 的扁平结构存储。它专门为 LLM 设计,摒弃了昂贵且复杂的数据库依赖,让 AI Agent 可以通过标准文件读取与路径导航(Relative Links)轻松周游整个代码库。
|
|
41
49
|
|
|
42
50
|
---
|
|
43
51
|
|
|
44
52
|
## 🚀 核心特性
|
|
45
53
|
|
|
46
|
-
- **多语言 AST 解析**:基于 `tree-sitter`,原生支持 **Python, JavaScript, TypeScript, Go, Rust, Swift**。
|
|
54
|
+
- **多语言 AST 解析**:基于 `tree-sitter`,原生支持 **Python, JavaScript, TypeScript, Kotlin, Go, Rust, Swift**。
|
|
47
55
|
- **语义边解析与绑定**:静态解析跨文件的函数/方法调用(`calls`)、类型继承/接口实现(`inherits`/`implements`)以及文件导入关系(`imports`)。
|
|
48
56
|
- **逻辑组件自动聚类**:利用贪心模块度社区发现算法(Louvain Modularity Clustering)将紧密耦合的文件和符号自动聚类为 **Component(逻辑组件)**,并根据组件核心节点智能命名。
|
|
49
57
|
- **架构脆弱性分析**:自动识别 **God Nodes(度数最高的核心抽象)**,并静态检测文件级别的 **循环导入依赖(Circular Imports)**。
|
|
@@ -53,39 +61,29 @@ Description-Content-Type: text/markdown
|
|
|
53
61
|
|
|
54
62
|
## 📦 架构概览
|
|
55
63
|
|
|
56
|
-
|
|
57
|
-
graph TD
|
|
58
|
-
A[工作区源码 Workspace] --> B[detect: 语言识别与过滤]
|
|
59
|
-
B --> C[parser: Tree-Sitter AST 符号提取]
|
|
60
|
-
C --> D[builder: NetworkX 语义图组装与绑定]
|
|
61
|
-
D --> E[cluster: 社区模块度聚类命名]
|
|
62
|
-
E --> F[analyze: 上帝节点与循环导入分析]
|
|
63
|
-
F --> G[export: 导出至 .codegraph/]
|
|
64
|
-
G --> H[AGENT_PROMPT.md / AGENTS.md / README.md / nodes / components]
|
|
65
|
-
```
|
|
64
|
+
- **工作区源码 Workspace** -> **detect: 语言识别与过滤** -> **parser: Tree-Sitter AST 符号提取** -> **builder: NetworkX 语义图组装与绑定** -> **cluster: 社区模块度聚类命名** -> **analyze: 上帝节点与循环导入分析** -> **export: 导出至 .codegraph/** -> **生成 AGENT_PROMPT.md / AGENTS.md / README.md / nodes / components**
|
|
66
65
|
|
|
67
66
|
---
|
|
68
67
|
|
|
69
68
|
## 🛠️ 安装指南
|
|
70
69
|
|
|
71
|
-
推荐使用 [uv](https://github.com/astral-sh/uv)
|
|
70
|
+
推荐使用 [uv](https://github.com/astral-sh/uv) 直接安装工具:
|
|
72
71
|
|
|
73
72
|
```bash
|
|
74
|
-
#
|
|
75
|
-
|
|
76
|
-
|
|
73
|
+
# 全局安装 (推荐)
|
|
74
|
+
uv tool install codegraph-gen
|
|
75
|
+
```
|
|
77
76
|
|
|
78
|
-
|
|
79
|
-
uv sync
|
|
80
|
-
source .venv/bin/activate
|
|
77
|
+
如果需要在当前虚拟环境中作为库依赖安装:
|
|
81
78
|
|
|
82
|
-
|
|
83
|
-
|
|
79
|
+
```bash
|
|
80
|
+
# 安装至当前项目/环境
|
|
81
|
+
uv pip install codegraph-gen
|
|
84
82
|
```
|
|
85
83
|
|
|
86
|
-
###
|
|
84
|
+
### 注册 AI Agent 斜杠命令
|
|
87
85
|
|
|
88
|
-
`codegraph` 支持一键将 `/codegraph` 自定义斜杠命令注册到您的 AI Agent(如 Codex 或 Antigravity)的全局配置中:
|
|
86
|
+
`codegraph-gen` 支持一键将 `/codegraph` 自定义斜杠命令注册到您的 AI Agent(如 Codex 或 Antigravity)的全局配置中:
|
|
89
87
|
|
|
90
88
|
```bash
|
|
91
89
|
# 为 Codex / Antigravity 注入 /codegraph 全局斜杠命令
|
|
@@ -130,7 +128,7 @@ codegraph build . --exclude extra_folder/ --exclude docs/
|
|
|
130
128
|
|
|
131
129
|
## 🤖 与 AI Agent(Codex / Antigravity / Claude Code)协同分析
|
|
132
130
|
|
|
133
|
-
`codegraph` 的核心设计思想是**离线构建,Agent 分析**。这避免了在 CLI 中直接硬编码大模型 API,降低了使用成本,并充分利用了你当前对话中功能更强、带有上下文读取能力的外部 Agent。
|
|
131
|
+
`codegraph-gen` 的核心设计思想是**离线构建,Agent 分析**。这避免了在 CLI 中直接硬编码大模型 API,降低了使用成本,并充分利用了你当前对话中功能更强、带有上下文读取能力的外部 Agent。
|
|
134
132
|
|
|
135
133
|
### 步骤 1:生成本地图谱
|
|
136
134
|
|
|
@@ -1,14 +1,21 @@
|
|
|
1
|
-
|
|
1
|
+
<h1 align="center">codegraph-gen</h1>
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
<p align="center">
|
|
4
|
+
<a href="https://pypi.org/project/codegraph-gen/"><img src="https://img.shields.io/pypi/v/codegraph-gen.svg" alt="PyPI Version"></a>
|
|
5
|
+
<img src="https://img.shields.io/badge/python-3.11+-blue.svg" alt="Python Version">
|
|
6
|
+
<img src="https://img.shields.io/badge/code%20style-ruff-26b22f.svg" alt="Ruff">
|
|
7
|
+
<img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License">
|
|
8
|
+
</p>
|
|
4
9
|
|
|
5
|
-
|
|
10
|
+
`codegraph-gen` 是一个面向 AI Agent(如 Antigravity、Codex、Claude Code 等)的静态代码知识图谱生成工具。它能够静态解析多语言 codebase,通过社区发现算法自动进行组件聚类,并导出为由标准 Markdown 文件组成的关联图谱库(Obsidian-like vault),极大地辅助 AI Agent 在本地进行精准的架构理解、逻辑导航与深度洞察分析。
|
|
11
|
+
|
|
12
|
+
与基于图形化 Canvas 渲染的知识图谱不同,`codegraph-gen` 采用全 Markdown 的扁平结构存储。它专门为 LLM 设计,摒弃了昂贵且复杂的数据库依赖,让 AI Agent 可以通过标准文件读取与路径导航(Relative Links)轻松周游整个代码库。
|
|
6
13
|
|
|
7
14
|
---
|
|
8
15
|
|
|
9
16
|
## 🚀 核心特性
|
|
10
17
|
|
|
11
|
-
- **多语言 AST 解析**:基于 `tree-sitter`,原生支持 **Python, JavaScript, TypeScript, Go, Rust, Swift**。
|
|
18
|
+
- **多语言 AST 解析**:基于 `tree-sitter`,原生支持 **Python, JavaScript, TypeScript, Kotlin, Go, Rust, Swift**。
|
|
12
19
|
- **语义边解析与绑定**:静态解析跨文件的函数/方法调用(`calls`)、类型继承/接口实现(`inherits`/`implements`)以及文件导入关系(`imports`)。
|
|
13
20
|
- **逻辑组件自动聚类**:利用贪心模块度社区发现算法(Louvain Modularity Clustering)将紧密耦合的文件和符号自动聚类为 **Component(逻辑组件)**,并根据组件核心节点智能命名。
|
|
14
21
|
- **架构脆弱性分析**:自动识别 **God Nodes(度数最高的核心抽象)**,并静态检测文件级别的 **循环导入依赖(Circular Imports)**。
|
|
@@ -18,39 +25,29 @@
|
|
|
18
25
|
|
|
19
26
|
## 📦 架构概览
|
|
20
27
|
|
|
21
|
-
|
|
22
|
-
graph TD
|
|
23
|
-
A[工作区源码 Workspace] --> B[detect: 语言识别与过滤]
|
|
24
|
-
B --> C[parser: Tree-Sitter AST 符号提取]
|
|
25
|
-
C --> D[builder: NetworkX 语义图组装与绑定]
|
|
26
|
-
D --> E[cluster: 社区模块度聚类命名]
|
|
27
|
-
E --> F[analyze: 上帝节点与循环导入分析]
|
|
28
|
-
F --> G[export: 导出至 .codegraph/]
|
|
29
|
-
G --> H[AGENT_PROMPT.md / AGENTS.md / README.md / nodes / components]
|
|
30
|
-
```
|
|
28
|
+
- **工作区源码 Workspace** -> **detect: 语言识别与过滤** -> **parser: Tree-Sitter AST 符号提取** -> **builder: NetworkX 语义图组装与绑定** -> **cluster: 社区模块度聚类命名** -> **analyze: 上帝节点与循环导入分析** -> **export: 导出至 .codegraph/** -> **生成 AGENT_PROMPT.md / AGENTS.md / README.md / nodes / components**
|
|
31
29
|
|
|
32
30
|
---
|
|
33
31
|
|
|
34
32
|
## 🛠️ 安装指南
|
|
35
33
|
|
|
36
|
-
推荐使用 [uv](https://github.com/astral-sh/uv)
|
|
34
|
+
推荐使用 [uv](https://github.com/astral-sh/uv) 直接安装工具:
|
|
37
35
|
|
|
38
36
|
```bash
|
|
39
|
-
#
|
|
40
|
-
|
|
41
|
-
|
|
37
|
+
# 全局安装 (推荐)
|
|
38
|
+
uv tool install codegraph-gen
|
|
39
|
+
```
|
|
42
40
|
|
|
43
|
-
|
|
44
|
-
uv sync
|
|
45
|
-
source .venv/bin/activate
|
|
41
|
+
如果需要在当前虚拟环境中作为库依赖安装:
|
|
46
42
|
|
|
47
|
-
|
|
48
|
-
|
|
43
|
+
```bash
|
|
44
|
+
# 安装至当前项目/环境
|
|
45
|
+
uv pip install codegraph-gen
|
|
49
46
|
```
|
|
50
47
|
|
|
51
|
-
###
|
|
48
|
+
### 注册 AI Agent 斜杠命令
|
|
52
49
|
|
|
53
|
-
`codegraph` 支持一键将 `/codegraph` 自定义斜杠命令注册到您的 AI Agent(如 Codex 或 Antigravity)的全局配置中:
|
|
50
|
+
`codegraph-gen` 支持一键将 `/codegraph` 自定义斜杠命令注册到您的 AI Agent(如 Codex 或 Antigravity)的全局配置中:
|
|
54
51
|
|
|
55
52
|
```bash
|
|
56
53
|
# 为 Codex / Antigravity 注入 /codegraph 全局斜杠命令
|
|
@@ -95,7 +92,7 @@ codegraph build . --exclude extra_folder/ --exclude docs/
|
|
|
95
92
|
|
|
96
93
|
## 🤖 与 AI Agent(Codex / Antigravity / Claude Code)协同分析
|
|
97
94
|
|
|
98
|
-
`codegraph` 的核心设计思想是**离线构建,Agent 分析**。这避免了在 CLI 中直接硬编码大模型 API,降低了使用成本,并充分利用了你当前对话中功能更强、带有上下文读取能力的外部 Agent。
|
|
95
|
+
`codegraph-gen` 的核心设计思想是**离线构建,Agent 分析**。这避免了在 CLI 中直接硬编码大模型 API,降低了使用成本,并充分利用了你当前对话中功能更强、带有上下文读取能力的外部 Agent。
|
|
99
96
|
|
|
100
97
|
### 步骤 1:生成本地图谱
|
|
101
98
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "codegraph-gen"
|
|
3
|
-
version = "
|
|
3
|
+
version = "1.1.0"
|
|
4
4
|
description = "AST-based codebase knowledge graph generator in Markdown"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -23,11 +23,12 @@ classifiers = [
|
|
|
23
23
|
"Intended Audience :: Developers",
|
|
24
24
|
"License :: OSI Approved :: MIT License",
|
|
25
25
|
"Programming Language :: Python :: 3",
|
|
26
|
+
"Programming Language :: Python :: 3.11",
|
|
26
27
|
"Programming Language :: Python :: 3.12",
|
|
27
28
|
"Topic :: Software Development :: Code Generators",
|
|
28
29
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
29
30
|
]
|
|
30
|
-
requires-python = ">=3.
|
|
31
|
+
requires-python = ">=3.11"
|
|
31
32
|
dependencies = [
|
|
32
33
|
"networkx>=3.0",
|
|
33
34
|
"tree-sitter>=0.23.0",
|
|
@@ -59,7 +60,6 @@ Issues = "https://github.com/twn39/codegraph/issues"
|
|
|
59
60
|
|
|
60
61
|
[project.scripts]
|
|
61
62
|
codegraph = "codegraph_gen.__main__:main"
|
|
62
|
-
codegraph-gen = "codegraph_gen.__main__:main"
|
|
63
63
|
|
|
64
64
|
[build-system]
|
|
65
65
|
requires = ["uv_build>=0.9.26,<0.10.0"]
|
|
@@ -93,7 +93,7 @@ def build(
|
|
|
93
93
|
|
|
94
94
|
from codegraph_gen.engine import CodegraphEngine, PipelineStage
|
|
95
95
|
|
|
96
|
-
engine = CodegraphEngine(
|
|
96
|
+
engine = CodegraphEngine()
|
|
97
97
|
|
|
98
98
|
# Run pipeline with click progress bar
|
|
99
99
|
with Progress(
|
|
@@ -129,7 +129,7 @@ def build(
|
|
|
129
129
|
elif stage == PipelineStage.COMPLETED:
|
|
130
130
|
progress.update(task, description="Done!")
|
|
131
131
|
|
|
132
|
-
result = engine.run_pipeline(progress_callback=progress_callback)
|
|
132
|
+
result = engine.run_pipeline(config, progress_callback=progress_callback)
|
|
133
133
|
|
|
134
134
|
G = result.graph
|
|
135
135
|
if G.number_of_nodes() == 0:
|
|
@@ -294,12 +294,12 @@ def info():
|
|
|
294
294
|
try:
|
|
295
295
|
from importlib.metadata import version
|
|
296
296
|
|
|
297
|
-
ver = version("codegraph")
|
|
297
|
+
ver = version("codegraph-gen")
|
|
298
298
|
except Exception:
|
|
299
|
-
ver = "
|
|
299
|
+
ver = "1.1.0"
|
|
300
300
|
console.print(f"[bold]codegraph v{ver}[/bold]")
|
|
301
301
|
console.print(
|
|
302
|
-
"Supported languages: Python, JavaScript, TypeScript, Go, Rust, Swift"
|
|
302
|
+
"Supported languages: Python, JavaScript, TypeScript, Kotlin, Go, Rust, Swift"
|
|
303
303
|
)
|
|
304
304
|
|
|
305
305
|
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import networkx as nx
|
|
4
|
+
from codegraph_gen.schema import ExtractionResult
|
|
5
|
+
from codegraph_gen.resolver import TypeResolver
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_graph(extractions: list[ExtractionResult], workspace_dir: Path) -> nx.DiGraph:
|
|
11
|
+
"""
|
|
12
|
+
Assembles a list of ExtractionResults into a single directed graph
|
|
13
|
+
and resolves call, inherit, and import edges using a two-pass scope resolver.
|
|
14
|
+
"""
|
|
15
|
+
G = nx.DiGraph()
|
|
16
|
+
|
|
17
|
+
# 1. Add all nodes to the graph
|
|
18
|
+
for ext in extractions:
|
|
19
|
+
for node in ext.nodes:
|
|
20
|
+
G.add_node(node.id, **node.model_dump())
|
|
21
|
+
|
|
22
|
+
# 2. Run Type Resolver (Two-pass type inference & scope/edge resolution)
|
|
23
|
+
resolver = TypeResolver(G, extractions, workspace_dir)
|
|
24
|
+
resolver.propagate_types()
|
|
25
|
+
resolver.resolve_all_edges()
|
|
26
|
+
|
|
27
|
+
return G
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
|
-
from codegraph_gen.
|
|
4
|
+
from codegraph_gen.schema import ExtractionResult
|
|
5
5
|
|
|
6
6
|
# Default exclusions for files and directories we want to ignore
|
|
7
7
|
DEFAULT_EXCLUSIONS = {
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from codegraph_gen.config import
|
|
3
|
+
from codegraph_gen.config import LANGUAGE_EXTENSIONS
|
|
4
4
|
|
|
5
5
|
logger = logging.getLogger(__name__)
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def discover_files(
|
|
8
|
+
def discover_files(
|
|
9
|
+
workspace_dir: Path,
|
|
10
|
+
languages: set[str],
|
|
11
|
+
exclusions: set[str],
|
|
12
|
+
) -> list[tuple[Path, str]]:
|
|
9
13
|
"""
|
|
10
14
|
Recursively discovers source files in the workspace directory.
|
|
11
15
|
Filters by allowed languages and ignores files/directories in exclusions.
|
|
@@ -14,17 +18,17 @@ def discover_files(config: CodegraphConfig) -> list[tuple[Path, str]]:
|
|
|
14
18
|
List of tuples: (absolute_file_path, language_name)
|
|
15
19
|
"""
|
|
16
20
|
found_files = []
|
|
17
|
-
workspace =
|
|
21
|
+
workspace = workspace_dir.resolve()
|
|
18
22
|
|
|
19
23
|
# Map extension -> language
|
|
20
24
|
ext_to_lang = {}
|
|
21
|
-
for lang in
|
|
25
|
+
for lang in languages:
|
|
22
26
|
if lang in LANGUAGE_EXTENSIONS:
|
|
23
27
|
for ext in LANGUAGE_EXTENSIONS[lang]:
|
|
24
28
|
ext_to_lang[ext] = lang
|
|
25
29
|
|
|
26
30
|
# Normalize exclusions to lowercase for case-insensitive matching
|
|
27
|
-
exclusions_lower = {exc.lower() for exc in
|
|
31
|
+
exclusions_lower = {exc.lower() for exc in exclusions}
|
|
28
32
|
|
|
29
33
|
def is_ignored(path: Path) -> bool:
|
|
30
34
|
# Check if any part of the path is in exclusions_lower
|
|
@@ -9,7 +9,7 @@ import networkx as nx
|
|
|
9
9
|
from pydantic import BaseModel, ConfigDict
|
|
10
10
|
|
|
11
11
|
from codegraph_gen.config import CodegraphConfig, CacheEntry
|
|
12
|
-
from codegraph_gen.
|
|
12
|
+
from codegraph_gen.schema import ExtractionResult
|
|
13
13
|
from codegraph_gen.detect import discover_files
|
|
14
14
|
from codegraph_gen.parser import get_parser
|
|
15
15
|
from codegraph_gen.builder import build_graph
|
|
@@ -77,13 +77,12 @@ class PipelineResult(BaseModel):
|
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
class CodegraphEngine:
|
|
80
|
-
def __init__(self
|
|
81
|
-
self.config = config
|
|
82
|
-
self.renderer = MarkdownRenderer(config.workspace_dir)
|
|
80
|
+
def __init__(self):
|
|
83
81
|
self.writer = VaultWriter()
|
|
84
82
|
|
|
85
83
|
def run_pipeline(
|
|
86
84
|
self,
|
|
85
|
+
config: CodegraphConfig,
|
|
87
86
|
progress_callback: Optional[
|
|
88
87
|
Callable[[PipelineStage, Any, int, int], None]
|
|
89
88
|
] = None,
|
|
@@ -91,14 +90,18 @@ class CodegraphEngine:
|
|
|
91
90
|
"""
|
|
92
91
|
Runs the full codegraph generation pipeline.
|
|
93
92
|
Args:
|
|
93
|
+
config: Configuration settings.
|
|
94
94
|
progress_callback: A function taking (stage, current_item, index, total)
|
|
95
95
|
"""
|
|
96
96
|
logger.info("Starting codegraph engine pipeline...")
|
|
97
|
+
renderer = MarkdownRenderer(config.workspace_dir)
|
|
97
98
|
|
|
98
99
|
# 1. Discover files
|
|
99
100
|
if progress_callback:
|
|
100
101
|
progress_callback(PipelineStage.DISCOVERING, None, 0, 0)
|
|
101
|
-
files = discover_files(
|
|
102
|
+
files = discover_files(
|
|
103
|
+
config.workspace_dir, config.languages, config.exclusions
|
|
104
|
+
)
|
|
102
105
|
if not files:
|
|
103
106
|
logger.warning("No supported files found.")
|
|
104
107
|
if progress_callback:
|
|
@@ -116,9 +119,9 @@ class CodegraphEngine:
|
|
|
116
119
|
extractions = []
|
|
117
120
|
total_files = len(files)
|
|
118
121
|
|
|
119
|
-
cache_path =
|
|
122
|
+
cache_path = config.absolute_output_dir / "cache.json"
|
|
120
123
|
cache_entries = {}
|
|
121
|
-
if
|
|
124
|
+
if config.use_cache and cache_path.exists():
|
|
122
125
|
try:
|
|
123
126
|
with open(cache_path, "r", encoding="utf-8") as f:
|
|
124
127
|
cache_data = json.load(f)
|
|
@@ -132,7 +135,7 @@ class CodegraphEngine:
|
|
|
132
135
|
new_cache_entries = {}
|
|
133
136
|
|
|
134
137
|
for file_path, lang in files:
|
|
135
|
-
rel_path = str(file_path.relative_to(
|
|
138
|
+
rel_path = str(file_path.relative_to(config.workspace_dir))
|
|
136
139
|
try:
|
|
137
140
|
stat = file_path.stat()
|
|
138
141
|
mtime = stat.st_mtime
|
|
@@ -170,7 +173,7 @@ class CodegraphEngine:
|
|
|
170
173
|
if progress_callback:
|
|
171
174
|
progress_callback(PipelineStage.PARSING, None, total_files, total_files)
|
|
172
175
|
else:
|
|
173
|
-
max_workers =
|
|
176
|
+
max_workers = config.max_workers
|
|
174
177
|
if max_workers > 1 and len(files_to_parse) > 1:
|
|
175
178
|
logger.info(
|
|
176
179
|
f"Parsing {len(files_to_parse)} files in parallel with {max_workers} workers..."
|
|
@@ -183,7 +186,7 @@ class CodegraphEngine:
|
|
|
183
186
|
_parse_file_worker,
|
|
184
187
|
file_path,
|
|
185
188
|
lang,
|
|
186
|
-
|
|
189
|
+
config.workspace_dir,
|
|
187
190
|
): (file_path, rel_path, mtime, size, file_hash)
|
|
188
191
|
for file_path, lang, rel_path, mtime, size, file_hash in files_to_parse
|
|
189
192
|
}
|
|
@@ -235,7 +238,7 @@ class CodegraphEngine:
|
|
|
235
238
|
)
|
|
236
239
|
try:
|
|
237
240
|
parser = get_parser(lang)
|
|
238
|
-
result = parser.parse_file(file_path,
|
|
241
|
+
result = parser.parse_file(file_path, config.workspace_dir)
|
|
239
242
|
extractions.append(result)
|
|
240
243
|
if file_hash:
|
|
241
244
|
new_cache_entries[rel_path] = CacheEntry(
|
|
@@ -247,7 +250,7 @@ class CodegraphEngine:
|
|
|
247
250
|
# 3. Build graph
|
|
248
251
|
if progress_callback:
|
|
249
252
|
progress_callback(PipelineStage.BUILDING, None, 0, 0)
|
|
250
|
-
G = build_graph(extractions,
|
|
253
|
+
G = build_graph(extractions, config.workspace_dir)
|
|
251
254
|
|
|
252
255
|
# 4. Component clustering
|
|
253
256
|
if progress_callback:
|
|
@@ -271,7 +274,7 @@ class CodegraphEngine:
|
|
|
271
274
|
rendered_nodes = {}
|
|
272
275
|
for nid, ndata in G.nodes(data=True):
|
|
273
276
|
fname = get_node_filename(nid)
|
|
274
|
-
content =
|
|
277
|
+
content = renderer.render_node_page(nid, ndata, G, node_component_map)
|
|
275
278
|
rendered_nodes[fname] = content
|
|
276
279
|
|
|
277
280
|
rendered_components = {}
|
|
@@ -279,7 +282,7 @@ class CodegraphEngine:
|
|
|
279
282
|
comp_name = component_names[cid]
|
|
280
283
|
cohesion = cohesion_scores[cid]
|
|
281
284
|
fname = get_component_filename(comp_name)
|
|
282
|
-
content =
|
|
285
|
+
content = renderer.render_component_page(
|
|
283
286
|
cid,
|
|
284
287
|
members,
|
|
285
288
|
G,
|
|
@@ -292,7 +295,7 @@ class CodegraphEngine:
|
|
|
292
295
|
|
|
293
296
|
# Check if README already has AI Insights and preserve it
|
|
294
297
|
ai_insights = None
|
|
295
|
-
readme_path =
|
|
298
|
+
readme_path = config.absolute_output_dir / "README.md"
|
|
296
299
|
if readme_path.exists():
|
|
297
300
|
try:
|
|
298
301
|
old_readme = readme_path.read_text(encoding="utf-8")
|
|
@@ -315,7 +318,7 @@ class CodegraphEngine:
|
|
|
315
318
|
f"Could not read existing README.md to preserve AI insights: {e}"
|
|
316
319
|
)
|
|
317
320
|
|
|
318
|
-
readme_content =
|
|
321
|
+
readme_content = renderer.render_readme(
|
|
319
322
|
G,
|
|
320
323
|
components,
|
|
321
324
|
cohesion_scores,
|
|
@@ -324,7 +327,7 @@ class CodegraphEngine:
|
|
|
324
327
|
ai_insights=ai_insights,
|
|
325
328
|
)
|
|
326
329
|
|
|
327
|
-
prompt_content =
|
|
330
|
+
prompt_content = renderer.render_agent_prompt(
|
|
328
331
|
G, components, cohesion_scores, component_names, analysis
|
|
329
332
|
)
|
|
330
333
|
|
|
@@ -332,7 +335,7 @@ class CodegraphEngine:
|
|
|
332
335
|
if progress_callback:
|
|
333
336
|
progress_callback(PipelineStage.WRITING, None, 0, 0)
|
|
334
337
|
self.writer.write_vault(
|
|
335
|
-
|
|
338
|
+
config.absolute_output_dir,
|
|
336
339
|
rendered_nodes,
|
|
337
340
|
rendered_components,
|
|
338
341
|
readme_content,
|
|
@@ -340,9 +343,9 @@ class CodegraphEngine:
|
|
|
340
343
|
)
|
|
341
344
|
|
|
342
345
|
# Write updated cache back to disk
|
|
343
|
-
if
|
|
346
|
+
if config.use_cache:
|
|
344
347
|
try:
|
|
345
|
-
|
|
348
|
+
config.absolute_output_dir.mkdir(parents=True, exist_ok=True)
|
|
346
349
|
with open(cache_path, "w", encoding="utf-8") as f:
|
|
347
350
|
json.dump(
|
|
348
351
|
{k: v.model_dump() for k, v in new_cache_entries.items()},
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import logging
|
|
3
|
+
import pkgutil
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from codegraph_gen.parser.base import BaseParser, _PARSER_REGISTRY
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
# Dynamic package scan & load to trigger @register_parser registrations
|
|
11
|
+
package_dir = str(Path(__file__).parent)
|
|
12
|
+
for _, module_name, _ in pkgutil.iter_modules([package_dir]):
|
|
13
|
+
if module_name == "base":
|
|
14
|
+
continue
|
|
15
|
+
full_module_name = f"{__name__}.{module_name}"
|
|
16
|
+
if full_module_name not in sys.modules:
|
|
17
|
+
try:
|
|
18
|
+
importlib.import_module(full_module_name)
|
|
19
|
+
except Exception as e:
|
|
20
|
+
logger.error(
|
|
21
|
+
f"Defensive Loading: Failed to import parser module {full_module_name}: {e}",
|
|
22
|
+
exc_info=True,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_parser(language: str) -> BaseParser:
|
|
27
|
+
"""Returns an instance of the parser for the given language."""
|
|
28
|
+
lang_lower = language.lower()
|
|
29
|
+
if lang_lower not in _PARSER_REGISTRY:
|
|
30
|
+
raise ValueError(f"Unsupported language: {language}")
|
|
31
|
+
return _PARSER_REGISTRY[lang_lower]()
|