codegraph-gen 1.0.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/PKG-INFO +1 -1
  2. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/pyproject.toml +1 -1
  3. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/__main__.py +3 -3
  4. codegraph_gen-1.1.0/src/codegraph_gen/builder.py +27 -0
  5. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/config.py +1 -1
  6. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/detect.py +9 -5
  7. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/engine.py +23 -20
  8. codegraph_gen-1.1.0/src/codegraph_gen/parser/__init__.py +31 -0
  9. codegraph_gen-1.1.0/src/codegraph_gen/parser/base.py +154 -0
  10. codegraph_gen-1.1.0/src/codegraph_gen/parser/cpp.py +335 -0
  11. codegraph_gen-1.1.0/src/codegraph_gen/parser/go.py +259 -0
  12. codegraph_gen-1.1.0/src/codegraph_gen/parser/javascript.py +345 -0
  13. codegraph_gen-1.1.0/src/codegraph_gen/parser/kotlin.py +351 -0
  14. codegraph_gen-1.1.0/src/codegraph_gen/parser/python.py +360 -0
  15. codegraph_gen-1.1.0/src/codegraph_gen/parser/rust.py +450 -0
  16. codegraph_gen-1.1.0/src/codegraph_gen/parser/swift.py +306 -0
  17. codegraph_gen-1.1.0/src/codegraph_gen/resolver.py +650 -0
  18. codegraph_gen-1.1.0/src/codegraph_gen/resolver_strategy.py +411 -0
  19. codegraph_gen-1.0.0/src/codegraph_gen/parser/base.py → codegraph_gen-1.1.0/src/codegraph_gen/schema.py +15 -9
  20. codegraph_gen-1.0.0/src/codegraph_gen/builder.py +0 -747
  21. codegraph_gen-1.0.0/src/codegraph_gen/parser/__init__.py +0 -27
  22. codegraph_gen-1.0.0/src/codegraph_gen/parser/cpp.py +0 -349
  23. codegraph_gen-1.0.0/src/codegraph_gen/parser/go.py +0 -268
  24. codegraph_gen-1.0.0/src/codegraph_gen/parser/javascript.py +0 -370
  25. codegraph_gen-1.0.0/src/codegraph_gen/parser/kotlin.py +0 -387
  26. codegraph_gen-1.0.0/src/codegraph_gen/parser/python.py +0 -415
  27. codegraph_gen-1.0.0/src/codegraph_gen/parser/rust.py +0 -497
  28. codegraph_gen-1.0.0/src/codegraph_gen/parser/swift.py +0 -327
  29. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/README.md +0 -0
  30. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/__init__.py +0 -0
  31. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/ai.py +0 -0
  32. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/analyzer.py +0 -0
  33. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/cluster.py +0 -0
  34. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/py.typed +0 -0
  35. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/renderer.py +0 -0
  36. {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/writer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: codegraph-gen
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: AST-based codebase knowledge graph generator in Markdown
5
5
  Keywords: knowledge-graph,ast,codebase,markdown,tree-sitter,visualization,static-analysis,ai-agent,obsidian
6
6
  Author: twn39
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codegraph-gen"
3
- version = "1.0.0"
3
+ version = "1.1.0"
4
4
  description = "AST-based codebase knowledge graph generator in Markdown"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -93,7 +93,7 @@ def build(
93
93
 
94
94
  from codegraph_gen.engine import CodegraphEngine, PipelineStage
95
95
 
96
- engine = CodegraphEngine(config)
96
+ engine = CodegraphEngine()
97
97
 
98
98
  # Run pipeline with click progress bar
99
99
  with Progress(
@@ -129,7 +129,7 @@ def build(
129
129
  elif stage == PipelineStage.COMPLETED:
130
130
  progress.update(task, description="Done!")
131
131
 
132
- result = engine.run_pipeline(progress_callback=progress_callback)
132
+ result = engine.run_pipeline(config, progress_callback=progress_callback)
133
133
 
134
134
  G = result.graph
135
135
  if G.number_of_nodes() == 0:
@@ -296,7 +296,7 @@ def info():
296
296
 
297
297
  ver = version("codegraph-gen")
298
298
  except Exception:
299
- ver = "1.0.0"
299
+ ver = "1.1.0"
300
300
  console.print(f"[bold]codegraph v{ver}[/bold]")
301
301
  console.print(
302
302
  "Supported languages: Python, JavaScript, TypeScript, Kotlin, Go, Rust, Swift"
@@ -0,0 +1,27 @@
1
+ import logging
2
+ from pathlib import Path
3
+ import networkx as nx
4
+ from codegraph_gen.schema import ExtractionResult
5
+ from codegraph_gen.resolver import TypeResolver
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def build_graph(extractions: list[ExtractionResult], workspace_dir: Path) -> nx.DiGraph:
11
+ """
12
+ Assembles a list of ExtractionResults into a single directed graph
13
+ and resolves call, inherit, and import edges using a two-pass scope resolver.
14
+ """
15
+ G = nx.DiGraph()
16
+
17
+ # 1. Add all nodes to the graph
18
+ for ext in extractions:
19
+ for node in ext.nodes:
20
+ G.add_node(node.id, **node.model_dump())
21
+
22
+ # 2. Run Type Resolver (Two-pass type inference & scope/edge resolution)
23
+ resolver = TypeResolver(G, extractions, workspace_dir)
24
+ resolver.propagate_types()
25
+ resolver.resolve_all_edges()
26
+
27
+ return G
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  from pathlib import Path
3
3
  from pydantic import BaseModel, Field
4
- from codegraph_gen.parser.base import ExtractionResult
4
+ from codegraph_gen.schema import ExtractionResult
5
5
 
6
6
  # Default exclusions for files and directories we want to ignore
7
7
  DEFAULT_EXCLUSIONS = {
@@ -1,11 +1,15 @@
1
1
  import logging
2
2
  from pathlib import Path
3
- from codegraph_gen.config import CodegraphConfig, LANGUAGE_EXTENSIONS
3
+ from codegraph_gen.config import LANGUAGE_EXTENSIONS
4
4
 
5
5
  logger = logging.getLogger(__name__)
6
6
 
7
7
 
8
- def discover_files(config: CodegraphConfig) -> list[tuple[Path, str]]:
8
+ def discover_files(
9
+ workspace_dir: Path,
10
+ languages: set[str],
11
+ exclusions: set[str],
12
+ ) -> list[tuple[Path, str]]:
9
13
  """
10
14
  Recursively discovers source files in the workspace directory.
11
15
  Filters by allowed languages and ignores files/directories in exclusions.
@@ -14,17 +18,17 @@ def discover_files(config: CodegraphConfig) -> list[tuple[Path, str]]:
14
18
  List of tuples: (absolute_file_path, language_name)
15
19
  """
16
20
  found_files = []
17
- workspace = config.workspace_dir.resolve()
21
+ workspace = workspace_dir.resolve()
18
22
 
19
23
  # Map extension -> language
20
24
  ext_to_lang = {}
21
- for lang in config.languages:
25
+ for lang in languages:
22
26
  if lang in LANGUAGE_EXTENSIONS:
23
27
  for ext in LANGUAGE_EXTENSIONS[lang]:
24
28
  ext_to_lang[ext] = lang
25
29
 
26
30
  # Normalize exclusions to lowercase for case-insensitive matching
27
- exclusions_lower = {exc.lower() for exc in config.exclusions}
31
+ exclusions_lower = {exc.lower() for exc in exclusions}
28
32
 
29
33
  def is_ignored(path: Path) -> bool:
30
34
  # Check if any part of the path is in exclusions_lower
@@ -9,7 +9,7 @@ import networkx as nx
9
9
  from pydantic import BaseModel, ConfigDict
10
10
 
11
11
  from codegraph_gen.config import CodegraphConfig, CacheEntry
12
- from codegraph_gen.parser.base import ExtractionResult
12
+ from codegraph_gen.schema import ExtractionResult
13
13
  from codegraph_gen.detect import discover_files
14
14
  from codegraph_gen.parser import get_parser
15
15
  from codegraph_gen.builder import build_graph
@@ -77,13 +77,12 @@ class PipelineResult(BaseModel):
77
77
 
78
78
 
79
79
  class CodegraphEngine:
80
- def __init__(self, config: CodegraphConfig):
81
- self.config = config
82
- self.renderer = MarkdownRenderer(config.workspace_dir)
80
+ def __init__(self):
83
81
  self.writer = VaultWriter()
84
82
 
85
83
  def run_pipeline(
86
84
  self,
85
+ config: CodegraphConfig,
87
86
  progress_callback: Optional[
88
87
  Callable[[PipelineStage, Any, int, int], None]
89
88
  ] = None,
@@ -91,14 +90,18 @@ class CodegraphEngine:
91
90
  """
92
91
  Runs the full codegraph generation pipeline.
93
92
  Args:
93
+ config: Configuration settings.
94
94
  progress_callback: A function taking (stage, current_item, index, total)
95
95
  """
96
96
  logger.info("Starting codegraph engine pipeline...")
97
+ renderer = MarkdownRenderer(config.workspace_dir)
97
98
 
98
99
  # 1. Discover files
99
100
  if progress_callback:
100
101
  progress_callback(PipelineStage.DISCOVERING, None, 0, 0)
101
- files = discover_files(self.config)
102
+ files = discover_files(
103
+ config.workspace_dir, config.languages, config.exclusions
104
+ )
102
105
  if not files:
103
106
  logger.warning("No supported files found.")
104
107
  if progress_callback:
@@ -116,9 +119,9 @@ class CodegraphEngine:
116
119
  extractions = []
117
120
  total_files = len(files)
118
121
 
119
- cache_path = self.config.absolute_output_dir / "cache.json"
122
+ cache_path = config.absolute_output_dir / "cache.json"
120
123
  cache_entries = {}
121
- if self.config.use_cache and cache_path.exists():
124
+ if config.use_cache and cache_path.exists():
122
125
  try:
123
126
  with open(cache_path, "r", encoding="utf-8") as f:
124
127
  cache_data = json.load(f)
@@ -132,7 +135,7 @@ class CodegraphEngine:
132
135
  new_cache_entries = {}
133
136
 
134
137
  for file_path, lang in files:
135
- rel_path = str(file_path.relative_to(self.config.workspace_dir))
138
+ rel_path = str(file_path.relative_to(config.workspace_dir))
136
139
  try:
137
140
  stat = file_path.stat()
138
141
  mtime = stat.st_mtime
@@ -170,7 +173,7 @@ class CodegraphEngine:
170
173
  if progress_callback:
171
174
  progress_callback(PipelineStage.PARSING, None, total_files, total_files)
172
175
  else:
173
- max_workers = self.config.max_workers
176
+ max_workers = config.max_workers
174
177
  if max_workers > 1 and len(files_to_parse) > 1:
175
178
  logger.info(
176
179
  f"Parsing {len(files_to_parse)} files in parallel with {max_workers} workers..."
@@ -183,7 +186,7 @@ class CodegraphEngine:
183
186
  _parse_file_worker,
184
187
  file_path,
185
188
  lang,
186
- self.config.workspace_dir,
189
+ config.workspace_dir,
187
190
  ): (file_path, rel_path, mtime, size, file_hash)
188
191
  for file_path, lang, rel_path, mtime, size, file_hash in files_to_parse
189
192
  }
@@ -235,7 +238,7 @@ class CodegraphEngine:
235
238
  )
236
239
  try:
237
240
  parser = get_parser(lang)
238
- result = parser.parse_file(file_path, self.config.workspace_dir)
241
+ result = parser.parse_file(file_path, config.workspace_dir)
239
242
  extractions.append(result)
240
243
  if file_hash:
241
244
  new_cache_entries[rel_path] = CacheEntry(
@@ -247,7 +250,7 @@ class CodegraphEngine:
247
250
  # 3. Build graph
248
251
  if progress_callback:
249
252
  progress_callback(PipelineStage.BUILDING, None, 0, 0)
250
- G = build_graph(extractions, self.config.workspace_dir)
253
+ G = build_graph(extractions, config.workspace_dir)
251
254
 
252
255
  # 4. Component clustering
253
256
  if progress_callback:
@@ -271,7 +274,7 @@ class CodegraphEngine:
271
274
  rendered_nodes = {}
272
275
  for nid, ndata in G.nodes(data=True):
273
276
  fname = get_node_filename(nid)
274
- content = self.renderer.render_node_page(nid, ndata, G, node_component_map)
277
+ content = renderer.render_node_page(nid, ndata, G, node_component_map)
275
278
  rendered_nodes[fname] = content
276
279
 
277
280
  rendered_components = {}
@@ -279,7 +282,7 @@ class CodegraphEngine:
279
282
  comp_name = component_names[cid]
280
283
  cohesion = cohesion_scores[cid]
281
284
  fname = get_component_filename(comp_name)
282
- content = self.renderer.render_component_page(
285
+ content = renderer.render_component_page(
283
286
  cid,
284
287
  members,
285
288
  G,
@@ -292,7 +295,7 @@ class CodegraphEngine:
292
295
 
293
296
  # Check if README already has AI Insights and preserve it
294
297
  ai_insights = None
295
- readme_path = self.config.absolute_output_dir / "README.md"
298
+ readme_path = config.absolute_output_dir / "README.md"
296
299
  if readme_path.exists():
297
300
  try:
298
301
  old_readme = readme_path.read_text(encoding="utf-8")
@@ -315,7 +318,7 @@ class CodegraphEngine:
315
318
  f"Could not read existing README.md to preserve AI insights: {e}"
316
319
  )
317
320
 
318
- readme_content = self.renderer.render_readme(
321
+ readme_content = renderer.render_readme(
319
322
  G,
320
323
  components,
321
324
  cohesion_scores,
@@ -324,7 +327,7 @@ class CodegraphEngine:
324
327
  ai_insights=ai_insights,
325
328
  )
326
329
 
327
- prompt_content = self.renderer.render_agent_prompt(
330
+ prompt_content = renderer.render_agent_prompt(
328
331
  G, components, cohesion_scores, component_names, analysis
329
332
  )
330
333
 
@@ -332,7 +335,7 @@ class CodegraphEngine:
332
335
  if progress_callback:
333
336
  progress_callback(PipelineStage.WRITING, None, 0, 0)
334
337
  self.writer.write_vault(
335
- self.config.absolute_output_dir,
338
+ config.absolute_output_dir,
336
339
  rendered_nodes,
337
340
  rendered_components,
338
341
  readme_content,
@@ -340,9 +343,9 @@ class CodegraphEngine:
340
343
  )
341
344
 
342
345
  # Write updated cache back to disk
343
- if self.config.use_cache:
346
+ if config.use_cache:
344
347
  try:
345
- self.config.absolute_output_dir.mkdir(parents=True, exist_ok=True)
348
+ config.absolute_output_dir.mkdir(parents=True, exist_ok=True)
346
349
  with open(cache_path, "w", encoding="utf-8") as f:
347
350
  json.dump(
348
351
  {k: v.model_dump() for k, v in new_cache_entries.items()},
@@ -0,0 +1,31 @@
1
+ import importlib
2
+ import logging
3
+ import pkgutil
4
+ import sys
5
+ from pathlib import Path
6
+ from codegraph_gen.parser.base import BaseParser, _PARSER_REGISTRY
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Dynamic package scan & load to trigger @register_parser registrations
11
+ package_dir = str(Path(__file__).parent)
12
+ for _, module_name, _ in pkgutil.iter_modules([package_dir]):
13
+ if module_name == "base":
14
+ continue
15
+ full_module_name = f"{__name__}.{module_name}"
16
+ if full_module_name not in sys.modules:
17
+ try:
18
+ importlib.import_module(full_module_name)
19
+ except Exception as e:
20
+ logger.error(
21
+ f"Defensive Loading: Failed to import parser module {full_module_name}: {e}",
22
+ exc_info=True,
23
+ )
24
+
25
+
26
+ def get_parser(language: str) -> BaseParser:
27
+ """Returns an instance of the parser for the given language."""
28
+ lang_lower = language.lower()
29
+ if lang_lower not in _PARSER_REGISTRY:
30
+ raise ValueError(f"Unsupported language: {language}")
31
+ return _PARSER_REGISTRY[lang_lower]()
@@ -0,0 +1,154 @@
1
+ from abc import ABC, abstractmethod
2
+ import logging
3
+ from pathlib import Path
4
+ import tree_sitter
5
+ from codegraph_gen.schema import (
6
+ NodeSchema,
7
+ EdgeSchema,
8
+ ExtractionResult,
9
+ SymbolCollector,
10
+ )
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class BaseParser(ABC):
16
+ """Abstract base class for all language-specific AST parsers."""
17
+
18
+ @abstractmethod
19
+ def parse_file(self, file_path: Path, workspace_dir: Path) -> ExtractionResult:
20
+ """Parses a file and extracts symbols (nodes) and relations (edges)."""
21
+ pass
22
+
23
+
24
+ _PARSER_REGISTRY: dict[str, type[BaseParser]] = {}
25
+
26
+
27
+ def register_parser(*languages: str):
28
+ """Decorator to register a BaseParser subclass for one or more languages."""
29
+
30
+ def decorator(cls: type[BaseParser]):
31
+ for lang in languages:
32
+ _PARSER_REGISTRY[lang.lower()] = cls
33
+ return cls
34
+
35
+ return decorator
36
+
37
+
38
+ class ScopeTracker:
39
+ def __init__(self, initial_scope_id: str, initial_scope_type: str = "file"):
40
+ self._stack: list[tuple[str, str]] = [(initial_scope_id, initial_scope_type)]
41
+
42
+ def push(self, scope_id: str, scope_type: str) -> "ScopeTracker":
43
+ """Pushes a scope onto the stack. Returns self to act as a context manager."""
44
+ self._stack.append((scope_id, scope_type))
45
+ return self
46
+
47
+ def pop(self) -> tuple[str, str]:
48
+ """Pops the innermost scope from the stack."""
49
+ if len(self._stack) <= 1:
50
+ raise IndexError("Cannot pop the root scope")
51
+ return self._stack.pop()
52
+
53
+ def __enter__(self) -> "ScopeTracker":
54
+ return self
55
+
56
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
57
+ self.pop()
58
+
59
+ @property
60
+ def current_id(self) -> str:
61
+ return self._stack[-1][0] if self._stack else ""
62
+
63
+ @property
64
+ def current_type(self) -> str:
65
+ return self._stack[-1][1] if self._stack else ""
66
+
67
+ @property
68
+ def stack(self) -> list[tuple[str, str]]:
69
+ return self._stack
70
+
71
+ def find_parent_by_type(self, type_name: str) -> str | None:
72
+ """Searches the stack from innermost to outermost for a specific scope type."""
73
+ for scope_id, scope_type in reversed(self._stack):
74
+ if scope_type == type_name:
75
+ return scope_id
76
+ return None
77
+
78
+
79
+ class ASTVisitor:
80
+ """Optimized base AST Visitor for dynamic routing and AST traversal."""
81
+
82
+ def __init__(self, source: bytes, rel_path: str, collector: SymbolCollector):
83
+ self.source = source
84
+ self.rel_path = rel_path
85
+ self.collector = collector
86
+ self._visitor_cache = {}
87
+ self.scope = ScopeTracker(rel_path, "file")
88
+
89
+ def add_node(self, node: NodeSchema) -> None:
90
+ """Helper to collect a node via the collector."""
91
+ self.collector.add_node(node)
92
+
93
+ def add_edge(self, edge: EdgeSchema) -> None:
94
+ """Helper to collect an edge via the collector."""
95
+ self.collector.add_edge(edge)
96
+
97
+ @property
98
+ def scope_stack(self) -> list[tuple[str, str]]:
99
+ """Deprecated: Use self.scope instead. Kept for backward compatibility."""
100
+ return self.scope.stack
101
+
102
+ def visit(self, node: tree_sitter.Node) -> None:
103
+ """Visits a node by dynamically routing to visit_NodeType."""
104
+ if node.type == "ERROR" or (hasattr(node, "is_error") and node.is_error):
105
+ logger.debug(f"Skipping syntax error node: {node}")
106
+ return
107
+
108
+ node_type = node.type
109
+ visitor = self._visitor_cache.get(node_type)
110
+ if visitor is None:
111
+ # Replace characters invalid in Python identifiers
112
+ safe_type = node_type.replace("-", "_").replace(".", "_")
113
+ visitor = getattr(self, f"visit_{safe_type}", self.generic_visit)
114
+ self._visitor_cache[node_type] = visitor
115
+
116
+ try:
117
+ visitor(node)
118
+ except Exception as e:
119
+ logger.error(
120
+ f"Error visiting node of type {node.type} at line {node.start_point[0] + 1}: {e}",
121
+ exc_info=True,
122
+ )
123
+
124
+ def generic_visit(self, node: tree_sitter.Node) -> None:
125
+ """Default recursive traversal. Prunes known leaf nodes."""
126
+ if node.type in (
127
+ "string",
128
+ "comment",
129
+ "line_comment",
130
+ "block_comment",
131
+ "number",
132
+ "true",
133
+ "false",
134
+ "null",
135
+ ):
136
+ return
137
+ for child in node.children:
138
+ self.visit(child)
139
+
140
+ def get_text(self, node: tree_sitter.Node) -> str:
141
+ """Helper to extract text from a node using the source bytes."""
142
+ return (
143
+ self.source[node.start_byte : node.end_byte]
144
+ .decode("utf-8", errors="replace")
145
+ .strip()
146
+ )
147
+
148
+ def get_line_range(self, node: tree_sitter.Node) -> tuple[int, int]:
149
+ """Helper to extract 1-indexed line start and end points."""
150
+ return node.start_point[0] + 1, node.end_point[0] + 1
151
+
152
+ def get_current_parent_id(self) -> str:
153
+ """Helper to retrieve the current parent scope's ID."""
154
+ return self.scope.current_id