codegraph-gen 0.2.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/PKG-INFO +26 -28
  2. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/README.md +23 -26
  3. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/pyproject.toml +3 -3
  4. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/__main__.py +5 -5
  5. codegraph_gen-1.1.0/src/codegraph_gen/builder.py +27 -0
  6. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/config.py +1 -1
  7. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/detect.py +9 -5
  8. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/engine.py +23 -20
  9. codegraph_gen-1.1.0/src/codegraph_gen/parser/__init__.py +31 -0
  10. codegraph_gen-1.1.0/src/codegraph_gen/parser/base.py +154 -0
  11. codegraph_gen-1.1.0/src/codegraph_gen/parser/cpp.py +335 -0
  12. codegraph_gen-1.1.0/src/codegraph_gen/parser/go.py +259 -0
  13. codegraph_gen-1.1.0/src/codegraph_gen/parser/javascript.py +345 -0
  14. codegraph_gen-1.1.0/src/codegraph_gen/parser/kotlin.py +351 -0
  15. codegraph_gen-1.1.0/src/codegraph_gen/parser/python.py +360 -0
  16. codegraph_gen-1.1.0/src/codegraph_gen/parser/rust.py +450 -0
  17. codegraph_gen-1.1.0/src/codegraph_gen/parser/swift.py +306 -0
  18. codegraph_gen-1.1.0/src/codegraph_gen/resolver.py +650 -0
  19. codegraph_gen-1.1.0/src/codegraph_gen/resolver_strategy.py +411 -0
  20. codegraph_gen-0.2.0/src/codegraph_gen/parser/base.py → codegraph_gen-1.1.0/src/codegraph_gen/schema.py +15 -9
  21. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/writer.py +3 -3
  22. codegraph_gen-0.2.0/src/codegraph_gen/builder.py +0 -747
  23. codegraph_gen-0.2.0/src/codegraph_gen/parser/__init__.py +0 -27
  24. codegraph_gen-0.2.0/src/codegraph_gen/parser/cpp.py +0 -349
  25. codegraph_gen-0.2.0/src/codegraph_gen/parser/go.py +0 -268
  26. codegraph_gen-0.2.0/src/codegraph_gen/parser/javascript.py +0 -370
  27. codegraph_gen-0.2.0/src/codegraph_gen/parser/kotlin.py +0 -387
  28. codegraph_gen-0.2.0/src/codegraph_gen/parser/python.py +0 -415
  29. codegraph_gen-0.2.0/src/codegraph_gen/parser/rust.py +0 -497
  30. codegraph_gen-0.2.0/src/codegraph_gen/parser/swift.py +0 -327
  31. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/__init__.py +0 -0
  32. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/ai.py +0 -0
  33. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/analyzer.py +0 -0
  34. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/cluster.py +0 -0
  35. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/py.typed +0 -0
  36. {codegraph_gen-0.2.0 → codegraph_gen-1.1.0}/src/codegraph_gen/renderer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: codegraph-gen
3
- Version: 0.2.0
3
+ Version: 1.1.0
4
4
  Summary: AST-based codebase knowledge graph generator in Markdown
5
5
  Keywords: knowledge-graph,ast,codebase,markdown,tree-sitter,visualization,static-analysis,ai-agent,obsidian
6
6
  Author: twn39
@@ -10,6 +10,7 @@ Classifier: Development Status :: 4 - Beta
10
10
  Classifier: Intended Audience :: Developers
11
11
  Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
13
14
  Classifier: Programming Language :: Python :: 3.12
14
15
  Classifier: Topic :: Software Development :: Code Generators
15
16
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
@@ -27,23 +28,30 @@ Requires-Dist: pydantic>=2.0.0
27
28
  Requires-Dist: tree-sitter-c>=0.24.2
28
29
  Requires-Dist: tree-sitter-cpp>=0.23.4
29
30
  Requires-Dist: tree-sitter-kotlin>=1.1.0
30
- Requires-Python: >=3.12
31
+ Requires-Python: >=3.11
31
32
  Project-URL: Homepage, https://github.com/twn39/codegraph
32
33
  Project-URL: Repository, https://github.com/twn39/codegraph
33
34
  Project-URL: Issues, https://github.com/twn39/codegraph/issues
34
35
  Description-Content-Type: text/markdown
35
36
 
36
- # codegraph
37
+ <h1 align="center">codegraph-gen</h1>
37
38
 
38
- `codegraph` 是一个面向 AI Agent(如 Antigravity、Codex、Claude Code 等)的静态代码知识图谱生成工具。它能够静态解析多语言 codebase,通过社区发现算法自动进行组件聚类,并导出为由标准 Markdown 文件组成的关联图谱库(Obsidian-like vault),极大地辅助 AI Agent 在本地进行精准的架构理解、逻辑导航与深度洞察分析。
39
+ <p align="center">
40
+ <a href="https://pypi.org/project/codegraph-gen/"><img src="https://img.shields.io/pypi/v/codegraph-gen.svg" alt="PyPI Version"></a>
41
+ <img src="https://img.shields.io/badge/python-3.11+-blue.svg" alt="Python Version">
42
+ <img src="https://img.shields.io/badge/code%20style-ruff-26b22f.svg" alt="Ruff">
43
+ <img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License">
44
+ </p>
39
45
 
40
- 与基于图形化 Canvas 渲染的知识图谱不同,`codegraph` 采用全 Markdown 的扁平结构存储。它专门为 LLM 设计,摒弃了昂贵且复杂的数据库依赖,让 AI Agent 可以通过标准文件读取与路径导航(Relative Links)轻松周游整个代码库。
46
+ `codegraph-gen` 是一个面向 AI Agent(如 Antigravity、Codex、Claude Code 等)的静态代码知识图谱生成工具。它能够静态解析多语言 codebase,通过社区发现算法自动进行组件聚类,并导出为由标准 Markdown 文件组成的关联图谱库(Obsidian-like vault),极大地辅助 AI Agent 在本地进行精准的架构理解、逻辑导航与深度洞察分析。
47
+
48
+ 与基于图形化 Canvas 渲染的知识图谱不同,`codegraph-gen` 采用全 Markdown 的扁平结构存储。它专门为 LLM 设计,摒弃了昂贵且复杂的数据库依赖,让 AI Agent 可以通过标准文件读取与路径导航(Relative Links)轻松周游整个代码库。
41
49
 
42
50
  ---
43
51
 
44
52
  ## 🚀 核心特性
45
53
 
46
- - **多语言 AST 解析**:基于 `tree-sitter`,原生支持 **Python, JavaScript, TypeScript, Go, Rust, Swift**。
54
+ - **多语言 AST 解析**:基于 `tree-sitter`,原生支持 **Python, JavaScript, TypeScript, Kotlin, Go, Rust, Swift**。
47
55
  - **语义边解析与绑定**:静态解析跨文件的函数/方法调用(`calls`)、类型继承/接口实现(`inherits`/`implements`)以及文件导入关系(`imports`)。
48
56
  - **逻辑组件自动聚类**:利用贪心模块度社区发现算法(Louvain Modularity Clustering)将紧密耦合的文件和符号自动聚类为 **Component(逻辑组件)**,并根据组件核心节点智能命名。
49
57
  - **架构脆弱性分析**:自动识别 **God Nodes(度数最高的核心抽象)**,并静态检测文件级别的 **循环导入依赖(Circular Imports)**。
@@ -53,39 +61,29 @@ Description-Content-Type: text/markdown
53
61
 
54
62
  ## 📦 架构概览
55
63
 
56
- ```mermaid
57
- graph TD
58
- A[工作区源码 Workspace] --> B[detect: 语言识别与过滤]
59
- B --> C[parser: Tree-Sitter AST 符号提取]
60
- C --> D[builder: NetworkX 语义图组装与绑定]
61
- D --> E[cluster: 社区模块度聚类命名]
62
- E --> F[analyze: 上帝节点与循环导入分析]
63
- F --> G[export: 导出至 .codegraph/]
64
- G --> H[AGENT_PROMPT.md / AGENTS.md / README.md / nodes / components]
65
- ```
64
+ - **工作区源码 Workspace** -> **detect: 语言识别与过滤** -> **parser: Tree-Sitter AST 符号提取** -> **builder: NetworkX 语义图组装与绑定** -> **cluster: 社区模块度聚类命名** -> **analyze: 上帝节点与循环导入分析** -> **export: 导出至 .codegraph/** -> **生成 AGENT_PROMPT.md / AGENTS.md / README.md / nodes / components**
66
65
 
67
66
  ---
68
67
 
69
68
  ## 🛠️ 安装指南
70
69
 
71
- 推荐使用 [uv](https://github.com/astral-sh/uv) 管理项目依赖与虚拟环境:
70
+ 推荐使用 [uv](https://github.com/astral-sh/uv) 直接安装工具:
72
71
 
73
72
  ```bash
74
- # 克隆仓库
75
- git clone <repository-url>
76
- cd codegraph
73
+ # 全局安装 (推荐)
74
+ uv tool install codegraph-gen
75
+ ```
77
76
 
78
- # 同步依赖并激活虚拟环境
79
- uv sync
80
- source .venv/bin/activate
77
+ 如果需要在当前虚拟环境中作为库依赖安装:
81
78
 
82
- # 全局安装 (推荐)
83
- uv tool install --force --no-cache .
79
+ ```bash
80
+ # 安装至当前项目/环境
81
+ uv pip install codegraph-gen
84
82
  ```
85
83
 
86
- ### 2. 注入 AI Agent 斜杠命令集成
84
+ ### 注册 AI Agent 斜杠命令
87
85
 
88
- `codegraph` 支持一键将 `/codegraph` 自定义斜杠命令注册到您的 AI Agent(如 Codex 或 Antigravity)的全局配置中:
86
+ `codegraph-gen` 支持一键将 `/codegraph` 自定义斜杠命令注册到您的 AI Agent(如 Codex 或 Antigravity)的全局配置中:
89
87
 
90
88
  ```bash
91
89
  # 为 Codex / Antigravity 注入 /codegraph 全局斜杠命令
@@ -130,7 +128,7 @@ codegraph build . --exclude extra_folder/ --exclude docs/
130
128
 
131
129
  ## 🤖 与 AI Agent(Codex / Antigravity / Claude Code)协同分析
132
130
 
133
- `codegraph` 的核心设计思想是**离线构建,Agent 分析**。这避免了在 CLI 中直接硬编码大模型 API,降低了使用成本,并充分利用了你当前对话中功能更强、带有上下文读取能力的外部 Agent。
131
+ `codegraph-gen` 的核心设计思想是**离线构建,Agent 分析**。这避免了在 CLI 中直接硬编码大模型 API,降低了使用成本,并充分利用了你当前对话中功能更强、带有上下文读取能力的外部 Agent。
134
132
 
135
133
  ### 步骤 1:生成本地图谱
136
134
 
@@ -1,14 +1,21 @@
1
- # codegraph
1
+ <h1 align="center">codegraph-gen</h1>
2
2
 
3
- `codegraph` 是一个面向 AI Agent(如 Antigravity、Codex、Claude Code 等)的静态代码知识图谱生成工具。它能够静态解析多语言 codebase,通过社区发现算法自动进行组件聚类,并导出为由标准 Markdown 文件组成的关联图谱库(Obsidian-like vault),极大地辅助 AI Agent 在本地进行精准的架构理解、逻辑导航与深度洞察分析。
3
+ <p align="center">
4
+ <a href="https://pypi.org/project/codegraph-gen/"><img src="https://img.shields.io/pypi/v/codegraph-gen.svg" alt="PyPI Version"></a>
5
+ <img src="https://img.shields.io/badge/python-3.11+-blue.svg" alt="Python Version">
6
+ <img src="https://img.shields.io/badge/code%20style-ruff-26b22f.svg" alt="Ruff">
7
+ <img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License">
8
+ </p>
4
9
 
5
- 与基于图形化 Canvas 渲染的知识图谱不同,`codegraph` 采用全 Markdown 的扁平结构存储。它专门为 LLM 设计,摒弃了昂贵且复杂的数据库依赖,让 AI Agent 可以通过标准文件读取与路径导航(Relative Links)轻松周游整个代码库。
10
+ `codegraph-gen` 是一个面向 AI Agent(如 Antigravity、Codex、Claude Code 等)的静态代码知识图谱生成工具。它能够静态解析多语言 codebase,通过社区发现算法自动进行组件聚类,并导出为由标准 Markdown 文件组成的关联图谱库(Obsidian-like vault),极大地辅助 AI Agent 在本地进行精准的架构理解、逻辑导航与深度洞察分析。
11
+
12
+ 与基于图形化 Canvas 渲染的知识图谱不同,`codegraph-gen` 采用全 Markdown 的扁平结构存储。它专门为 LLM 设计,摒弃了昂贵且复杂的数据库依赖,让 AI Agent 可以通过标准文件读取与路径导航(Relative Links)轻松周游整个代码库。
6
13
 
7
14
  ---
8
15
 
9
16
  ## 🚀 核心特性
10
17
 
11
- - **多语言 AST 解析**:基于 `tree-sitter`,原生支持 **Python, JavaScript, TypeScript, Go, Rust, Swift**。
18
+ - **多语言 AST 解析**:基于 `tree-sitter`,原生支持 **Python, JavaScript, TypeScript, Kotlin, Go, Rust, Swift**。
12
19
  - **语义边解析与绑定**:静态解析跨文件的函数/方法调用(`calls`)、类型继承/接口实现(`inherits`/`implements`)以及文件导入关系(`imports`)。
13
20
  - **逻辑组件自动聚类**:利用贪心模块度社区发现算法(Louvain Modularity Clustering)将紧密耦合的文件和符号自动聚类为 **Component(逻辑组件)**,并根据组件核心节点智能命名。
14
21
  - **架构脆弱性分析**:自动识别 **God Nodes(度数最高的核心抽象)**,并静态检测文件级别的 **循环导入依赖(Circular Imports)**。
@@ -18,39 +25,29 @@
18
25
 
19
26
  ## 📦 架构概览
20
27
 
21
- ```mermaid
22
- graph TD
23
- A[工作区源码 Workspace] --> B[detect: 语言识别与过滤]
24
- B --> C[parser: Tree-Sitter AST 符号提取]
25
- C --> D[builder: NetworkX 语义图组装与绑定]
26
- D --> E[cluster: 社区模块度聚类命名]
27
- E --> F[analyze: 上帝节点与循环导入分析]
28
- F --> G[export: 导出至 .codegraph/]
29
- G --> H[AGENT_PROMPT.md / AGENTS.md / README.md / nodes / components]
30
- ```
28
+ - **工作区源码 Workspace** -> **detect: 语言识别与过滤** -> **parser: Tree-Sitter AST 符号提取** -> **builder: NetworkX 语义图组装与绑定** -> **cluster: 社区模块度聚类命名** -> **analyze: 上帝节点与循环导入分析** -> **export: 导出至 .codegraph/** -> **生成 AGENT_PROMPT.md / AGENTS.md / README.md / nodes / components**
31
29
 
32
30
  ---
33
31
 
34
32
  ## 🛠️ 安装指南
35
33
 
36
- 推荐使用 [uv](https://github.com/astral-sh/uv) 管理项目依赖与虚拟环境:
34
+ 推荐使用 [uv](https://github.com/astral-sh/uv) 直接安装工具:
37
35
 
38
36
  ```bash
39
- # 克隆仓库
40
- git clone <repository-url>
41
- cd codegraph
37
+ # 全局安装 (推荐)
38
+ uv tool install codegraph-gen
39
+ ```
42
40
 
43
- # 同步依赖并激活虚拟环境
44
- uv sync
45
- source .venv/bin/activate
41
+ 如果需要在当前虚拟环境中作为库依赖安装:
46
42
 
47
- # 全局安装 (推荐)
48
- uv tool install --force --no-cache .
43
+ ```bash
44
+ # 安装至当前项目/环境
45
+ uv pip install codegraph-gen
49
46
  ```
50
47
 
51
- ### 2. 注入 AI Agent 斜杠命令集成
48
+ ### 注册 AI Agent 斜杠命令
52
49
 
53
- `codegraph` 支持一键将 `/codegraph` 自定义斜杠命令注册到您的 AI Agent(如 Codex 或 Antigravity)的全局配置中:
50
+ `codegraph-gen` 支持一键将 `/codegraph` 自定义斜杠命令注册到您的 AI Agent(如 Codex 或 Antigravity)的全局配置中:
54
51
 
55
52
  ```bash
56
53
  # 为 Codex / Antigravity 注入 /codegraph 全局斜杠命令
@@ -95,7 +92,7 @@ codegraph build . --exclude extra_folder/ --exclude docs/
95
92
 
96
93
  ## 🤖 与 AI Agent(Codex / Antigravity / Claude Code)协同分析
97
94
 
98
- `codegraph` 的核心设计思想是**离线构建,Agent 分析**。这避免了在 CLI 中直接硬编码大模型 API,降低了使用成本,并充分利用了你当前对话中功能更强、带有上下文读取能力的外部 Agent。
95
+ `codegraph-gen` 的核心设计思想是**离线构建,Agent 分析**。这避免了在 CLI 中直接硬编码大模型 API,降低了使用成本,并充分利用了你当前对话中功能更强、带有上下文读取能力的外部 Agent。
99
96
 
100
97
  ### 步骤 1:生成本地图谱
101
98
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codegraph-gen"
3
- version = "0.2.0"
3
+ version = "1.1.0"
4
4
  description = "AST-based codebase knowledge graph generator in Markdown"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -23,11 +23,12 @@ classifiers = [
23
23
  "Intended Audience :: Developers",
24
24
  "License :: OSI Approved :: MIT License",
25
25
  "Programming Language :: Python :: 3",
26
+ "Programming Language :: Python :: 3.11",
26
27
  "Programming Language :: Python :: 3.12",
27
28
  "Topic :: Software Development :: Code Generators",
28
29
  "Topic :: Software Development :: Libraries :: Python Modules",
29
30
  ]
30
- requires-python = ">=3.12"
31
+ requires-python = ">=3.11"
31
32
  dependencies = [
32
33
  "networkx>=3.0",
33
34
  "tree-sitter>=0.23.0",
@@ -59,7 +60,6 @@ Issues = "https://github.com/twn39/codegraph/issues"
59
60
 
60
61
  [project.scripts]
61
62
  codegraph = "codegraph_gen.__main__:main"
62
- codegraph-gen = "codegraph_gen.__main__:main"
63
63
 
64
64
  [build-system]
65
65
  requires = ["uv_build>=0.9.26,<0.10.0"]
@@ -93,7 +93,7 @@ def build(
93
93
 
94
94
  from codegraph_gen.engine import CodegraphEngine, PipelineStage
95
95
 
96
- engine = CodegraphEngine(config)
96
+ engine = CodegraphEngine()
97
97
 
98
98
  # Run pipeline with click progress bar
99
99
  with Progress(
@@ -129,7 +129,7 @@ def build(
129
129
  elif stage == PipelineStage.COMPLETED:
130
130
  progress.update(task, description="Done!")
131
131
 
132
- result = engine.run_pipeline(progress_callback=progress_callback)
132
+ result = engine.run_pipeline(config, progress_callback=progress_callback)
133
133
 
134
134
  G = result.graph
135
135
  if G.number_of_nodes() == 0:
@@ -294,12 +294,12 @@ def info():
294
294
  try:
295
295
  from importlib.metadata import version
296
296
 
297
- ver = version("codegraph")
297
+ ver = version("codegraph-gen")
298
298
  except Exception:
299
- ver = "0.2.0"
299
+ ver = "1.1.0"
300
300
  console.print(f"[bold]codegraph v{ver}[/bold]")
301
301
  console.print(
302
- "Supported languages: Python, JavaScript, TypeScript, Go, Rust, Swift"
302
+ "Supported languages: Python, JavaScript, TypeScript, Kotlin, Go, Rust, Swift"
303
303
  )
304
304
 
305
305
 
@@ -0,0 +1,27 @@
1
+ import logging
2
+ from pathlib import Path
3
+ import networkx as nx
4
+ from codegraph_gen.schema import ExtractionResult
5
+ from codegraph_gen.resolver import TypeResolver
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def build_graph(extractions: list[ExtractionResult], workspace_dir: Path) -> nx.DiGraph:
11
+ """
12
+ Assembles a list of ExtractionResults into a single directed graph
13
+ and resolves call, inherit, and import edges using a two-pass scope resolver.
14
+ """
15
+ G = nx.DiGraph()
16
+
17
+ # 1. Add all nodes to the graph
18
+ for ext in extractions:
19
+ for node in ext.nodes:
20
+ G.add_node(node.id, **node.model_dump())
21
+
22
+ # 2. Run Type Resolver (Two-pass type inference & scope/edge resolution)
23
+ resolver = TypeResolver(G, extractions, workspace_dir)
24
+ resolver.propagate_types()
25
+ resolver.resolve_all_edges()
26
+
27
+ return G
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  from pathlib import Path
3
3
  from pydantic import BaseModel, Field
4
- from codegraph_gen.parser.base import ExtractionResult
4
+ from codegraph_gen.schema import ExtractionResult
5
5
 
6
6
  # Default exclusions for files and directories we want to ignore
7
7
  DEFAULT_EXCLUSIONS = {
@@ -1,11 +1,15 @@
1
1
  import logging
2
2
  from pathlib import Path
3
- from codegraph_gen.config import CodegraphConfig, LANGUAGE_EXTENSIONS
3
+ from codegraph_gen.config import LANGUAGE_EXTENSIONS
4
4
 
5
5
  logger = logging.getLogger(__name__)
6
6
 
7
7
 
8
- def discover_files(config: CodegraphConfig) -> list[tuple[Path, str]]:
8
+ def discover_files(
9
+ workspace_dir: Path,
10
+ languages: set[str],
11
+ exclusions: set[str],
12
+ ) -> list[tuple[Path, str]]:
9
13
  """
10
14
  Recursively discovers source files in the workspace directory.
11
15
  Filters by allowed languages and ignores files/directories in exclusions.
@@ -14,17 +18,17 @@ def discover_files(config: CodegraphConfig) -> list[tuple[Path, str]]:
14
18
  List of tuples: (absolute_file_path, language_name)
15
19
  """
16
20
  found_files = []
17
- workspace = config.workspace_dir.resolve()
21
+ workspace = workspace_dir.resolve()
18
22
 
19
23
  # Map extension -> language
20
24
  ext_to_lang = {}
21
- for lang in config.languages:
25
+ for lang in languages:
22
26
  if lang in LANGUAGE_EXTENSIONS:
23
27
  for ext in LANGUAGE_EXTENSIONS[lang]:
24
28
  ext_to_lang[ext] = lang
25
29
 
26
30
  # Normalize exclusions to lowercase for case-insensitive matching
27
- exclusions_lower = {exc.lower() for exc in config.exclusions}
31
+ exclusions_lower = {exc.lower() for exc in exclusions}
28
32
 
29
33
  def is_ignored(path: Path) -> bool:
30
34
  # Check if any part of the path is in exclusions_lower
@@ -9,7 +9,7 @@ import networkx as nx
9
9
  from pydantic import BaseModel, ConfigDict
10
10
 
11
11
  from codegraph_gen.config import CodegraphConfig, CacheEntry
12
- from codegraph_gen.parser.base import ExtractionResult
12
+ from codegraph_gen.schema import ExtractionResult
13
13
  from codegraph_gen.detect import discover_files
14
14
  from codegraph_gen.parser import get_parser
15
15
  from codegraph_gen.builder import build_graph
@@ -77,13 +77,12 @@ class PipelineResult(BaseModel):
77
77
 
78
78
 
79
79
  class CodegraphEngine:
80
- def __init__(self, config: CodegraphConfig):
81
- self.config = config
82
- self.renderer = MarkdownRenderer(config.workspace_dir)
80
+ def __init__(self):
83
81
  self.writer = VaultWriter()
84
82
 
85
83
  def run_pipeline(
86
84
  self,
85
+ config: CodegraphConfig,
87
86
  progress_callback: Optional[
88
87
  Callable[[PipelineStage, Any, int, int], None]
89
88
  ] = None,
@@ -91,14 +90,18 @@ class CodegraphEngine:
91
90
  """
92
91
  Runs the full codegraph generation pipeline.
93
92
  Args:
93
+ config: Configuration settings.
94
94
  progress_callback: A function taking (stage, current_item, index, total)
95
95
  """
96
96
  logger.info("Starting codegraph engine pipeline...")
97
+ renderer = MarkdownRenderer(config.workspace_dir)
97
98
 
98
99
  # 1. Discover files
99
100
  if progress_callback:
100
101
  progress_callback(PipelineStage.DISCOVERING, None, 0, 0)
101
- files = discover_files(self.config)
102
+ files = discover_files(
103
+ config.workspace_dir, config.languages, config.exclusions
104
+ )
102
105
  if not files:
103
106
  logger.warning("No supported files found.")
104
107
  if progress_callback:
@@ -116,9 +119,9 @@ class CodegraphEngine:
116
119
  extractions = []
117
120
  total_files = len(files)
118
121
 
119
- cache_path = self.config.absolute_output_dir / "cache.json"
122
+ cache_path = config.absolute_output_dir / "cache.json"
120
123
  cache_entries = {}
121
- if self.config.use_cache and cache_path.exists():
124
+ if config.use_cache and cache_path.exists():
122
125
  try:
123
126
  with open(cache_path, "r", encoding="utf-8") as f:
124
127
  cache_data = json.load(f)
@@ -132,7 +135,7 @@ class CodegraphEngine:
132
135
  new_cache_entries = {}
133
136
 
134
137
  for file_path, lang in files:
135
- rel_path = str(file_path.relative_to(self.config.workspace_dir))
138
+ rel_path = str(file_path.relative_to(config.workspace_dir))
136
139
  try:
137
140
  stat = file_path.stat()
138
141
  mtime = stat.st_mtime
@@ -170,7 +173,7 @@ class CodegraphEngine:
170
173
  if progress_callback:
171
174
  progress_callback(PipelineStage.PARSING, None, total_files, total_files)
172
175
  else:
173
- max_workers = self.config.max_workers
176
+ max_workers = config.max_workers
174
177
  if max_workers > 1 and len(files_to_parse) > 1:
175
178
  logger.info(
176
179
  f"Parsing {len(files_to_parse)} files in parallel with {max_workers} workers..."
@@ -183,7 +186,7 @@ class CodegraphEngine:
183
186
  _parse_file_worker,
184
187
  file_path,
185
188
  lang,
186
- self.config.workspace_dir,
189
+ config.workspace_dir,
187
190
  ): (file_path, rel_path, mtime, size, file_hash)
188
191
  for file_path, lang, rel_path, mtime, size, file_hash in files_to_parse
189
192
  }
@@ -235,7 +238,7 @@ class CodegraphEngine:
235
238
  )
236
239
  try:
237
240
  parser = get_parser(lang)
238
- result = parser.parse_file(file_path, self.config.workspace_dir)
241
+ result = parser.parse_file(file_path, config.workspace_dir)
239
242
  extractions.append(result)
240
243
  if file_hash:
241
244
  new_cache_entries[rel_path] = CacheEntry(
@@ -247,7 +250,7 @@ class CodegraphEngine:
247
250
  # 3. Build graph
248
251
  if progress_callback:
249
252
  progress_callback(PipelineStage.BUILDING, None, 0, 0)
250
- G = build_graph(extractions, self.config.workspace_dir)
253
+ G = build_graph(extractions, config.workspace_dir)
251
254
 
252
255
  # 4. Component clustering
253
256
  if progress_callback:
@@ -271,7 +274,7 @@ class CodegraphEngine:
271
274
  rendered_nodes = {}
272
275
  for nid, ndata in G.nodes(data=True):
273
276
  fname = get_node_filename(nid)
274
- content = self.renderer.render_node_page(nid, ndata, G, node_component_map)
277
+ content = renderer.render_node_page(nid, ndata, G, node_component_map)
275
278
  rendered_nodes[fname] = content
276
279
 
277
280
  rendered_components = {}
@@ -279,7 +282,7 @@ class CodegraphEngine:
279
282
  comp_name = component_names[cid]
280
283
  cohesion = cohesion_scores[cid]
281
284
  fname = get_component_filename(comp_name)
282
- content = self.renderer.render_component_page(
285
+ content = renderer.render_component_page(
283
286
  cid,
284
287
  members,
285
288
  G,
@@ -292,7 +295,7 @@ class CodegraphEngine:
292
295
 
293
296
  # Check if README already has AI Insights and preserve it
294
297
  ai_insights = None
295
- readme_path = self.config.absolute_output_dir / "README.md"
298
+ readme_path = config.absolute_output_dir / "README.md"
296
299
  if readme_path.exists():
297
300
  try:
298
301
  old_readme = readme_path.read_text(encoding="utf-8")
@@ -315,7 +318,7 @@ class CodegraphEngine:
315
318
  f"Could not read existing README.md to preserve AI insights: {e}"
316
319
  )
317
320
 
318
- readme_content = self.renderer.render_readme(
321
+ readme_content = renderer.render_readme(
319
322
  G,
320
323
  components,
321
324
  cohesion_scores,
@@ -324,7 +327,7 @@ class CodegraphEngine:
324
327
  ai_insights=ai_insights,
325
328
  )
326
329
 
327
- prompt_content = self.renderer.render_agent_prompt(
330
+ prompt_content = renderer.render_agent_prompt(
328
331
  G, components, cohesion_scores, component_names, analysis
329
332
  )
330
333
 
@@ -332,7 +335,7 @@ class CodegraphEngine:
332
335
  if progress_callback:
333
336
  progress_callback(PipelineStage.WRITING, None, 0, 0)
334
337
  self.writer.write_vault(
335
- self.config.absolute_output_dir,
338
+ config.absolute_output_dir,
336
339
  rendered_nodes,
337
340
  rendered_components,
338
341
  readme_content,
@@ -340,9 +343,9 @@ class CodegraphEngine:
340
343
  )
341
344
 
342
345
  # Write updated cache back to disk
343
- if self.config.use_cache:
346
+ if config.use_cache:
344
347
  try:
345
- self.config.absolute_output_dir.mkdir(parents=True, exist_ok=True)
348
+ config.absolute_output_dir.mkdir(parents=True, exist_ok=True)
346
349
  with open(cache_path, "w", encoding="utf-8") as f:
347
350
  json.dump(
348
351
  {k: v.model_dump() for k, v in new_cache_entries.items()},
@@ -0,0 +1,31 @@
1
+ import importlib
2
+ import logging
3
+ import pkgutil
4
+ import sys
5
+ from pathlib import Path
6
+ from codegraph_gen.parser.base import BaseParser, _PARSER_REGISTRY
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Dynamic package scan & load to trigger @register_parser registrations
11
+ package_dir = str(Path(__file__).parent)
12
+ for _, module_name, _ in pkgutil.iter_modules([package_dir]):
13
+ if module_name == "base":
14
+ continue
15
+ full_module_name = f"{__name__}.{module_name}"
16
+ if full_module_name not in sys.modules:
17
+ try:
18
+ importlib.import_module(full_module_name)
19
+ except Exception as e:
20
+ logger.error(
21
+ f"Defensive Loading: Failed to import parser module {full_module_name}: {e}",
22
+ exc_info=True,
23
+ )
24
+
25
+
26
+ def get_parser(language: str) -> BaseParser:
27
+ """Returns an instance of the parser for the given language."""
28
+ lang_lower = language.lower()
29
+ if lang_lower not in _PARSER_REGISTRY:
30
+ raise ValueError(f"Unsupported language: {language}")
31
+ return _PARSER_REGISTRY[lang_lower]()