PyPI - codegraph-gen - Versions diffs - 1.0.0__tar.gz → 1.1.0__tar.gz - Mend

codegraph-gen 1.0.0tar.gz → 1.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: codegraph-gen
-Version: 1.0.0
+Version: 1.1.0
 Summary: AST-based codebase knowledge graph generator in Markdown
 Keywords: knowledge-graph,ast,codebase,markdown,tree-sitter,visualization,static-analysis,ai-agent,obsidian
 Author: twn39

{codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "codegraph-gen"
-version = "1.0.0"
+version = "1.1.0"
 description = "AST-based codebase knowledge graph generator in Markdown"
 readme = "README.md"
 authors = [

{codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/__main__.py RENAMED Viewed

@@ -93,7 +93,7 @@ def build(
     from codegraph_gen.engine import CodegraphEngine, PipelineStage
-    engine = CodegraphEngine(config)
+    engine = CodegraphEngine()
     # Run pipeline with click progress bar
     with Progress(
@@ -129,7 +129,7 @@ def build(
             elif stage == PipelineStage.COMPLETED:
                 progress.update(task, description="Done!")
-        result = engine.run_pipeline(progress_callback=progress_callback)
+        result = engine.run_pipeline(config, progress_callback=progress_callback)
     G = result.graph
     if G.number_of_nodes() == 0:
@@ -296,7 +296,7 @@ def info():
         ver = version("codegraph-gen")
     except Exception:
-        ver = "1.0.0"
+        ver = "1.1.0"
     console.print(f"[bold]codegraph v{ver}[/bold]")
     console.print(
         "Supported languages: Python, JavaScript, TypeScript, Kotlin, Go, Rust, Swift"

codegraph_gen-1.1.0/src/codegraph_gen/builder.py ADDED Viewed

@@ -0,0 +1,27 @@
+import logging
+from pathlib import Path
+import networkx as nx
+from codegraph_gen.schema import ExtractionResult
+from codegraph_gen.resolver import TypeResolver
+logger = logging.getLogger(__name__)
+def build_graph(extractions: list[ExtractionResult], workspace_dir: Path) -> nx.DiGraph:
+    """
+    Assembles a list of ExtractionResults into a single directed graph
+    and resolves call, inherit, and import edges using a two-pass scope resolver.
+    """
+    G = nx.DiGraph()
+    # 1. Add all nodes to the graph
+    for ext in extractions:
+        for node in ext.nodes:
+            G.add_node(node.id, **node.model_dump())
+    # 2. Run Type Resolver (Two-pass type inference & scope/edge resolution)
+    resolver = TypeResolver(G, extractions, workspace_dir)
+    resolver.propagate_types()
+    resolver.resolve_all_edges()
+    return G

{codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/config.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import os
 from pathlib import Path
 from pydantic import BaseModel, Field
-from codegraph_gen.parser.base import ExtractionResult
+from codegraph_gen.schema import ExtractionResult
 # Default exclusions for files and directories we want to ignore
 DEFAULT_EXCLUSIONS = {

{codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/detect.py RENAMED Viewed

@@ -1,11 +1,15 @@
 import logging
 from pathlib import Path
-from codegraph_gen.config import CodegraphConfig, LANGUAGE_EXTENSIONS
+from codegraph_gen.config import LANGUAGE_EXTENSIONS
 logger = logging.getLogger(__name__)
-def discover_files(config: CodegraphConfig) -> list[tuple[Path, str]]:
+def discover_files(
+    workspace_dir: Path,
+    languages: set[str],
+    exclusions: set[str],
+) -> list[tuple[Path, str]]:
     """
     Recursively discovers source files in the workspace directory.
     Filters by allowed languages and ignores files/directories in exclusions.
@@ -14,17 +18,17 @@ def discover_files(config: CodegraphConfig) -> list[tuple[Path, str]]:
         List of tuples: (absolute_file_path, language_name)
     """
     found_files = []
-    workspace = config.workspace_dir.resolve()
+    workspace = workspace_dir.resolve()
     # Map extension -> language
     ext_to_lang = {}
-    for lang in config.languages:
+    for lang in languages:
         if lang in LANGUAGE_EXTENSIONS:
             for ext in LANGUAGE_EXTENSIONS[lang]:
                 ext_to_lang[ext] = lang
     # Normalize exclusions to lowercase for case-insensitive matching
-    exclusions_lower = {exc.lower() for exc in config.exclusions}
+    exclusions_lower = {exc.lower() for exc in exclusions}
     def is_ignored(path: Path) -> bool:
         # Check if any part of the path is in exclusions_lower

{codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/engine.py RENAMED Viewed

@@ -9,7 +9,7 @@ import networkx as nx
 from pydantic import BaseModel, ConfigDict
 from codegraph_gen.config import CodegraphConfig, CacheEntry
-from codegraph_gen.parser.base import ExtractionResult
+from codegraph_gen.schema import ExtractionResult
 from codegraph_gen.detect import discover_files
 from codegraph_gen.parser import get_parser
 from codegraph_gen.builder import build_graph
@@ -77,13 +77,12 @@ class PipelineResult(BaseModel):
 class CodegraphEngine:
-    def __init__(self, config: CodegraphConfig):
-        self.config = config
-        self.renderer = MarkdownRenderer(config.workspace_dir)
+    def __init__(self):
         self.writer = VaultWriter()
     def run_pipeline(
         self,
+        config: CodegraphConfig,
         progress_callback: Optional[
             Callable[[PipelineStage, Any, int, int], None]
         ] = None,
@@ -91,14 +90,18 @@ class CodegraphEngine:
         """
         Runs the full codegraph generation pipeline.
         Args:
+            config: Configuration settings.
             progress_callback: A function taking (stage, current_item, index, total)
         """
         logger.info("Starting codegraph engine pipeline...")
+        renderer = MarkdownRenderer(config.workspace_dir)
         # 1. Discover files
         if progress_callback:
             progress_callback(PipelineStage.DISCOVERING, None, 0, 0)
-        files = discover_files(self.config)
+        files = discover_files(
+            config.workspace_dir, config.languages, config.exclusions
+        )
         if not files:
             logger.warning("No supported files found.")
             if progress_callback:
@@ -116,9 +119,9 @@ class CodegraphEngine:
         extractions = []
         total_files = len(files)
-        cache_path = self.config.absolute_output_dir / "cache.json"
+        cache_path = config.absolute_output_dir / "cache.json"
         cache_entries = {}
-        if self.config.use_cache and cache_path.exists():
+        if config.use_cache and cache_path.exists():
             try:
                 with open(cache_path, "r", encoding="utf-8") as f:
                     cache_data = json.load(f)
@@ -132,7 +135,7 @@ class CodegraphEngine:
         new_cache_entries = {}
         for file_path, lang in files:
-            rel_path = str(file_path.relative_to(self.config.workspace_dir))
+            rel_path = str(file_path.relative_to(config.workspace_dir))
             try:
                 stat = file_path.stat()
                 mtime = stat.st_mtime
@@ -170,7 +173,7 @@ class CodegraphEngine:
             if progress_callback:
                 progress_callback(PipelineStage.PARSING, None, total_files, total_files)
         else:
-            max_workers = self.config.max_workers
+            max_workers = config.max_workers
             if max_workers > 1 and len(files_to_parse) > 1:
                 logger.info(
                     f"Parsing {len(files_to_parse)} files in parallel with {max_workers} workers..."
@@ -183,7 +186,7 @@ class CodegraphEngine:
                             _parse_file_worker,
                             file_path,
                             lang,
-                            self.config.workspace_dir,
+                            config.workspace_dir,
                         ): (file_path, rel_path, mtime, size, file_hash)
                         for file_path, lang, rel_path, mtime, size, file_hash in files_to_parse
                     }
@@ -235,7 +238,7 @@ class CodegraphEngine:
                         )
                     try:
                         parser = get_parser(lang)
-                        result = parser.parse_file(file_path, self.config.workspace_dir)
+                        result = parser.parse_file(file_path, config.workspace_dir)
                         extractions.append(result)
                         if file_hash:
                             new_cache_entries[rel_path] = CacheEntry(
@@ -247,7 +250,7 @@ class CodegraphEngine:
         # 3. Build graph
         if progress_callback:
             progress_callback(PipelineStage.BUILDING, None, 0, 0)
-        G = build_graph(extractions, self.config.workspace_dir)
+        G = build_graph(extractions, config.workspace_dir)
         # 4. Component clustering
         if progress_callback:
@@ -271,7 +274,7 @@ class CodegraphEngine:
         rendered_nodes = {}
         for nid, ndata in G.nodes(data=True):
             fname = get_node_filename(nid)
-            content = self.renderer.render_node_page(nid, ndata, G, node_component_map)
+            content = renderer.render_node_page(nid, ndata, G, node_component_map)
             rendered_nodes[fname] = content
         rendered_components = {}
@@ -279,7 +282,7 @@ class CodegraphEngine:
             comp_name = component_names[cid]
             cohesion = cohesion_scores[cid]
             fname = get_component_filename(comp_name)
-            content = self.renderer.render_component_page(
+            content = renderer.render_component_page(
                 cid,
                 members,
                 G,
@@ -292,7 +295,7 @@ class CodegraphEngine:
         # Check if README already has AI Insights and preserve it
         ai_insights = None
-        readme_path = self.config.absolute_output_dir / "README.md"
+        readme_path = config.absolute_output_dir / "README.md"
         if readme_path.exists():
             try:
                 old_readme = readme_path.read_text(encoding="utf-8")
@@ -315,7 +318,7 @@ class CodegraphEngine:
                     f"Could not read existing README.md to preserve AI insights: {e}"
                 )
-        readme_content = self.renderer.render_readme(
+        readme_content = renderer.render_readme(
             G,
             components,
             cohesion_scores,
@@ -324,7 +327,7 @@ class CodegraphEngine:
             ai_insights=ai_insights,
         )
-        prompt_content = self.renderer.render_agent_prompt(
+        prompt_content = renderer.render_agent_prompt(
             G, components, cohesion_scores, component_names, analysis
         )
@@ -332,7 +335,7 @@ class CodegraphEngine:
         if progress_callback:
             progress_callback(PipelineStage.WRITING, None, 0, 0)
         self.writer.write_vault(
-            self.config.absolute_output_dir,
+            config.absolute_output_dir,
             rendered_nodes,
             rendered_components,
             readme_content,
@@ -340,9 +343,9 @@ class CodegraphEngine:
         )
         # Write updated cache back to disk
-        if self.config.use_cache:
+        if config.use_cache:
             try:
-                self.config.absolute_output_dir.mkdir(parents=True, exist_ok=True)
+                config.absolute_output_dir.mkdir(parents=True, exist_ok=True)
                 with open(cache_path, "w", encoding="utf-8") as f:
                     json.dump(
                         {k: v.model_dump() for k, v in new_cache_entries.items()},

codegraph_gen-1.1.0/src/codegraph_gen/parser/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+import importlib
+import logging
+import pkgutil
+import sys
+from pathlib import Path
+from codegraph_gen.parser.base import BaseParser, _PARSER_REGISTRY
+logger = logging.getLogger(__name__)
+# Dynamic package scan & load to trigger @register_parser registrations
+package_dir = str(Path(__file__).parent)
+for _, module_name, _ in pkgutil.iter_modules([package_dir]):
+    if module_name == "base":
+        continue
+    full_module_name = f"{__name__}.{module_name}"
+    if full_module_name not in sys.modules:
+        try:
+            importlib.import_module(full_module_name)
+        except Exception as e:
+            logger.error(
+                f"Defensive Loading: Failed to import parser module {full_module_name}: {e}",
+                exc_info=True,
+            )
+def get_parser(language: str) -> BaseParser:
+    """Returns an instance of the parser for the given language."""
+    lang_lower = language.lower()
+    if lang_lower not in _PARSER_REGISTRY:
+        raise ValueError(f"Unsupported language: {language}")
+    return _PARSER_REGISTRY[lang_lower]()

codegraph_gen-1.1.0/src/codegraph_gen/parser/base.py ADDED Viewed

@@ -0,0 +1,154 @@
+from abc import ABC, abstractmethod
+import logging
+from pathlib import Path
+import tree_sitter
+from codegraph_gen.schema import (
+    NodeSchema,
+    EdgeSchema,
+    ExtractionResult,
+    SymbolCollector,
+)
+logger = logging.getLogger(__name__)
+class BaseParser(ABC):
+    """Abstract base class for all language-specific AST parsers."""
+    @abstractmethod
+    def parse_file(self, file_path: Path, workspace_dir: Path) -> ExtractionResult:
+        """Parses a file and extracts symbols (nodes) and relations (edges)."""
+        pass
+_PARSER_REGISTRY: dict[str, type[BaseParser]] = {}
+def register_parser(*languages: str):
+    """Decorator to register a BaseParser subclass for one or more languages."""
+    def decorator(cls: type[BaseParser]):
+        for lang in languages:
+            _PARSER_REGISTRY[lang.lower()] = cls
+        return cls
+    return decorator
+class ScopeTracker:
+    def __init__(self, initial_scope_id: str, initial_scope_type: str = "file"):
+        self._stack: list[tuple[str, str]] = [(initial_scope_id, initial_scope_type)]
+    def push(self, scope_id: str, scope_type: str) -> "ScopeTracker":
+        """Pushes a scope onto the stack. Returns self to act as a context manager."""
+        self._stack.append((scope_id, scope_type))
+        return self
+    def pop(self) -> tuple[str, str]:
+        """Pops the innermost scope from the stack."""
+        if len(self._stack) <= 1:
+            raise IndexError("Cannot pop the root scope")
+        return self._stack.pop()
+    def __enter__(self) -> "ScopeTracker":
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        self.pop()
+    @property
+    def current_id(self) -> str:
+        return self._stack[-1][0] if self._stack else ""
+    @property
+    def current_type(self) -> str:
+        return self._stack[-1][1] if self._stack else ""
+    @property
+    def stack(self) -> list[tuple[str, str]]:
+        return self._stack
+    def find_parent_by_type(self, type_name: str) -> str | None:
+        """Searches the stack from innermost to outermost for a specific scope type."""
+        for scope_id, scope_type in reversed(self._stack):
+            if scope_type == type_name:
+                return scope_id
+        return None
+class ASTVisitor:
+    """Optimized base AST Visitor for dynamic routing and AST traversal."""
+    def __init__(self, source: bytes, rel_path: str, collector: SymbolCollector):
+        self.source = source
+        self.rel_path = rel_path
+        self.collector = collector
+        self._visitor_cache = {}
+        self.scope = ScopeTracker(rel_path, "file")
+    def add_node(self, node: NodeSchema) -> None:
+        """Helper to collect a node via the collector."""
+        self.collector.add_node(node)
+    def add_edge(self, edge: EdgeSchema) -> None:
+        """Helper to collect an edge via the collector."""
+        self.collector.add_edge(edge)
+    @property
+    def scope_stack(self) -> list[tuple[str, str]]:
+        """Deprecated: Use self.scope instead. Kept for backward compatibility."""
+        return self.scope.stack
+    def visit(self, node: tree_sitter.Node) -> None:
+        """Visits a node by dynamically routing to visit_NodeType."""
+        if node.type == "ERROR" or (hasattr(node, "is_error") and node.is_error):
+            logger.debug(f"Skipping syntax error node: {node}")
+            return
+        node_type = node.type
+        visitor = self._visitor_cache.get(node_type)
+        if visitor is None:
+            # Replace characters invalid in Python identifiers
+            safe_type = node_type.replace("-", "_").replace(".", "_")
+            visitor = getattr(self, f"visit_{safe_type}", self.generic_visit)
+            self._visitor_cache[node_type] = visitor
+        try:
+            visitor(node)
+        except Exception as e:
+            logger.error(
+                f"Error visiting node of type {node.type} at line {node.start_point[0] + 1}: {e}",
+                exc_info=True,
+            )
+    def generic_visit(self, node: tree_sitter.Node) -> None:
+        """Default recursive traversal. Prunes known leaf nodes."""
+        if node.type in (
+            "string",
+            "comment",
+            "line_comment",
+            "block_comment",
+            "number",
+            "true",
+            "false",
+            "null",
+        ):
+            return
+        for child in node.children:
+            self.visit(child)
+    def get_text(self, node: tree_sitter.Node) -> str:
+        """Helper to extract text from a node using the source bytes."""
+        return (
+            self.source[node.start_byte : node.end_byte]
+            .decode("utf-8", errors="replace")
+            .strip()
+        )
+    def get_line_range(self, node: tree_sitter.Node) -> tuple[int, int]:
+        """Helper to extract 1-indexed line start and end points."""
+        return node.start_point[0] + 1, node.end_point[0] + 1
+    def get_current_parent_id(self) -> str:
+        """Helper to retrieve the current parent scope's ID."""
+        return self.scope.current_id

codegraph-gen 1.0.0__tar.gz → 1.1.0__tar.gz

codegraph-gen 1.0.0tar.gz → 1.1.0tar.gz