PyPI - gdscript-code-graph - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gdscript-code-graph 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

gdscript_code_graph/__init__.py +21 -0
gdscript_code_graph/cli.py +71 -0
gdscript_code_graph/discovery.py +64 -0
gdscript_code_graph/graph.py +103 -0
gdscript_code_graph/metrics.py +395 -0
gdscript_code_graph/parsing.py +73 -0
gdscript_code_graph/relationships.py +392 -0
gdscript_code_graph/schema.py +62 -0
gdscript_code_graph-1.0.0.dist-info/METADATA +12 -0
gdscript_code_graph-1.0.0.dist-info/RECORD +13 -0
gdscript_code_graph-1.0.0.dist-info/WHEEL +4 -0
gdscript_code_graph-1.0.0.dist-info/entry_points.txt +2 -0
gdscript_code_graph-1.0.0.dist-info/licenses/LICENSE.md +7 -0

gdscript_code_graph/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+from __future__ import annotations
+__version__ = "1.0.0"
+from .discovery import ProjectFiles, discover_project
+from .graph import build_graph, serialize_graph
+from .schema import Evidence, FunctionMetrics, Graph, GraphLink, GraphNode, Meta, NodeMetrics
+__all__ = [
+    "ProjectFiles",
+    "discover_project",
+    "build_graph",
+    "serialize_graph",
+    "Evidence",
+    "FunctionMetrics",
+    "Graph",
+    "GraphLink",
+    "GraphNode",
+    "Meta",
+    "NodeMetrics",
+]

gdscript_code_graph/cli.py ADDED Viewed

@@ -0,0 +1,71 @@
+from __future__ import annotations
+import sys
+from pathlib import Path
+import click
+from .discovery import discover_project
+from .graph import build_graph, serialize_graph
+@click.group()
+def main():
+    """GDScript code metrics analyzer."""
+    pass
+@main.command()
+@click.argument("project_dir", type=click.Path(exists=True, file_okay=False))
+@click.option(
+    "--out",
+    "-o",
+    type=click.Path(),
+    default=None,
+    help="Output file path. Defaults to stdout.",
+)
+@click.option(
+    "--repo-name",
+    type=str,
+    default=None,
+    help="Repository name for the output. Defaults to directory name.",
+)
+@click.option(
+    "--exclude",
+    "-e",
+    multiple=True,
+    help="Directory names to exclude (repeatable). Example: --exclude addons --exclude test",
+)
+def analyze(
+    project_dir: str,
+    out: str | None,
+    repo_name: str | None,
+    exclude: tuple[str, ...],
+) -> None:
+    """Analyze a Godot project directory for code metrics."""
+    project_path = Path(project_dir)
+    if repo_name is None:
+        repo_name = project_path.name
+    try:
+        project = discover_project(
+            project_path,
+            exclude_dirs=list(exclude) if exclude else None,
+        )
+    except FileNotFoundError:
+        click.echo(
+            f"Error: No project.godot found in or above '{project_dir}'",
+            err=True,
+        )
+        sys.exit(1)
+    graph = build_graph(project, repo_name)
+    json_output = serialize_graph(graph)
+    if out is not None:
+        out_path = Path(out)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        out_path.write_text(json_output, encoding="utf-8")
+    else:
+        click.echo(json_output)

gdscript_code_graph/discovery.py ADDED Viewed

@@ -0,0 +1,64 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+@dataclass
+class ProjectFiles:
+    project_root: Path          # directory containing project.godot
+    gd_files: list[Path]        # absolute paths to all .gd files
+    def to_res_path(self, abs_path: Path) -> str:
+        rel = abs_path.relative_to(self.project_root)
+        return "res://" + rel.as_posix()
+def find_project_root(start_dir: Path) -> Path:
+    """Check start_dir / "project.godot". If not found, walk upward.
+    Raise FileNotFoundError if no project.godot is found anywhere.
+    """
+    current = start_dir.resolve()
+    while True:
+        if (current / "project.godot").exists():
+            return current
+        parent = current.parent
+        if parent == current:
+            raise FileNotFoundError(
+                f"No project.godot found in {start_dir} or any parent directory"
+            )
+        current = parent
+def discover_project(
+    project_dir: Path,
+    exclude_dirs: list[str] | None = None,
+) -> ProjectFiles:
+    """Call find_project_root, then glob ``**/*.gd`` under the project root.
+    Always excludes the ``.godot/`` directory (Godot internal cache).
+    Additional directories can be excluded via *exclude_dirs* -- each entry
+    is matched against every component of the file's path relative to the
+    project root.  For example, ``exclude_dirs=["addons", "test"]`` will
+    skip any ``.gd`` file whose relative path contains an ``addons`` or
+    ``test`` directory component.
+    Sort files for deterministic output.
+    Return empty list (not error) if zero ``.gd`` files found.
+    """
+    root = find_project_root(project_dir)
+    # Always exclude .godot; merge user-supplied directories.
+    always_excluded = {".godot"}
+    if exclude_dirs:
+        always_excluded.update(exclude_dirs)
+    gd_files = sorted(
+        resolved
+        for p in root.rglob("*.gd")
+        if not always_excluded.intersection(
+            (resolved := p.resolve()).relative_to(root).parts
+        )
+    )
+    return ProjectFiles(project_root=root, gd_files=gd_files)

gdscript_code_graph/graph.py ADDED Viewed

@@ -0,0 +1,103 @@
+from __future__ import annotations
+import json
+from dataclasses import asdict
+from datetime import datetime, timezone
+from pathlib import Path
+from gdscript_code_graph.discovery import ProjectFiles
+from gdscript_code_graph.metrics import compute_metrics
+from gdscript_code_graph.parsing import parse_all
+from gdscript_code_graph.relationships import (
+    build_class_name_table,
+    extract_extends,
+    extract_preloads,
+    extract_returns,
+    extract_typed_deps,
+    resolve_relationships_with_evidence,
+)
+from gdscript_code_graph.schema import (
+    FunctionMetrics,
+    Graph,
+    GraphNode,
+    Meta,
+    NodeMetrics,
+)
+def build_graph(project: ProjectFiles, repo_name: str) -> Graph:
+    """Run the full analysis pipeline and assemble a Graph.
+    Pipeline:
+    1. Parse all .gd files
+    2. Build class name lookup table
+    3. For each file: compute metrics, look up class_name, extract raw
+       relationships, build node
+    4. Resolve relationships with evidence (class names to paths, skip
+       built-ins, aggregate evidence arrays)
+    5. Assemble Graph with schema_version="1.0" and metadata
+    """
+    # Step 1: Parse
+    parse_results = parse_all(project)
+    # Step 2: Build class name table
+    class_name_table = build_class_name_table(parse_results)
+    res_path_to_class_name = {v: k for k, v in class_name_table.items()}
+    # Step 3: Process each file
+    nodes: list[GraphNode] = []
+    all_raw_rels = []
+    for pr in parse_results:
+        # Compute metrics
+        file_metrics = compute_metrics(pr.source, pr.tree)
+        # Node name: class_name if declared, else filename stem
+        name = res_path_to_class_name.get(pr.res_path) or Path(pr.file_path).stem
+        # Build node
+        node = GraphNode(
+            id=pr.res_path,
+            kind="script",
+            language="gdscript",
+            name=name,
+            metrics=NodeMetrics(
+                loc=file_metrics.loc,
+                max_cc=file_metrics.max_cc,
+                median_cc=file_metrics.median_cc,
+                mi=file_metrics.mi,
+                mi_min=file_metrics.mi_min,
+                mi_median=file_metrics.mi_median,
+                functions=file_metrics.functions,
+            ),
+            tags=[],
+        )
+        nodes.append(node)
+        # Extract raw relationships (only if tree is valid)
+        if pr.tree is not None:
+            all_raw_rels.extend(extract_extends(pr.tree, pr.res_path))
+            all_raw_rels.extend(extract_preloads(pr.tree, pr.res_path))
+            all_raw_rels.extend(extract_typed_deps(pr.tree, pr.res_path))
+            all_raw_rels.extend(extract_returns(pr.tree, pr.res_path))
+    # Step 4: Resolve relationships and build GraphLinks with evidence
+    known_res_paths = {pr.res_path for pr in parse_results}
+    links = resolve_relationships_with_evidence(
+        all_raw_rels, class_name_table, known_res_paths
+    )
+    # Step 5: Assemble Graph
+    now = datetime.now(timezone.utc).isoformat()
+    return Graph(
+        schema_version="1.0",
+        meta=Meta(repo=repo_name, generated_at=now),
+        nodes=nodes,
+        links=links,
+    )
+def serialize_graph(graph: Graph) -> str:
+    """Serialize a Graph to a JSON string."""
+    return json.dumps(asdict(graph), indent=2)

gdscript_code_graph/metrics.py ADDED Viewed

@@ -0,0 +1,395 @@
+from __future__ import annotations
+import math
+import statistics
+from dataclasses import dataclass
+from lark import Tree, Token
+from gdscript_code_graph.schema import FunctionMetrics
+@dataclass
+class FileMetrics:
+    loc: int
+    max_cc: int | None
+    median_cc: float | None
+    mi: float | None
+    mi_min: float | None
+    mi_median: float | None
+    functions: list[FunctionMetrics]
+@dataclass
+class HalsteadResult:
+    volume: float       # N * log2(n)
+    vocabulary: int     # n = unique operators + unique operands
+    length: int         # N = total operators + total operands
+# ---------------------------------------------------------------------------
+# Halstead classification tables
+# ---------------------------------------------------------------------------
+# Token types that count as operators (explicit named tokens).
+_OPERATOR_TOKEN_TYPES = frozenset({
+    "DOT", "EQUAL", "MINUS", "PLUS", "STAR", "SLASH", "PERCENT",
+    "MORETHAN", "LESSTHAN",
+    "AND", "OR", "NOT",
+    "IF", "ELSE",
+})
+# Token types that count as operands.
+_OPERAND_TOKEN_TYPES = frozenset({
+    "NAME", "NUMBER", "REGULAR_STRING", "TYPE_HINT",
+})
+# Subtree node types that represent keyword operators absorbed by the grammar.
+# Each occurrence contributes exactly one keyword operator.  Only *wrapper*
+# nodes are listed (not inner variants like class_var_typed_assgnd) to
+# avoid double-counting.
+_KEYWORD_SUBTREE_MAP: dict[str, str] = {
+    "extends_stmt": "extends",
+    "classname_stmt": "class_name",
+    "func_def": "func",
+    "class_var_stmt": "var",
+    "func_var_stmt": "var",
+    "const_stmt": "const",
+    "if_branch": "if",
+    "elif_branch": "elif",
+    "else_branch": "else",
+    "for_stmt": "for",
+    "for_stmt_typed": "for",
+    "while_stmt": "while",
+    "match_stmt": "match",
+    "pass_stmt": "pass",
+    "return_stmt": "return",
+    "signal_stmt": "signal",
+}
+# ---------------------------------------------------------------------------
+# LOC
+# ---------------------------------------------------------------------------
+def compute_loc(source: str) -> int:
+    """Count non-empty, non-comment-only lines."""
+    return sum(
+        1 for line in source.splitlines()
+        if line.strip() and not line.strip().startswith("#")
+    )
+# ---------------------------------------------------------------------------
+# Cyclomatic complexity
+# ---------------------------------------------------------------------------
+def compute_cyclomatic_complexity(tree: Tree) -> int:
+    """Compute cyclomatic complexity from a Lark AST.
+    CC = 1 (base) plus count of branching constructs:
+      - if_branch, elif_branch, while_stmt, for_stmt, for_stmt_typed, match_branch
+      - and/&& tokens in and_test / asless_and_test nodes
+      - or/|| tokens in or_test / asless_or_test nodes
+      - ternary "if" tokens in test_expr / asless_test_expr nodes
+    """
+    cc = 1
+    for subtree in tree.iter_subtrees():
+        node_type = subtree.data
+        # Direct branch nodes: +1 each
+        if node_type in (
+            "if_branch",
+            "elif_branch",
+            "while_stmt",
+            "for_stmt",
+            "for_stmt_typed",
+            "match_branch",
+        ):
+            cc += 1
+        # Boolean operators in and_test / asless_and_test:
+        # "and" has token type AND, "&&" has token type __ANON_3
+        if node_type in ("and_test", "asless_and_test"):
+            for child in subtree.children:
+                if isinstance(child, Token) and child.type in ("AND", "__ANON_3"):
+                    cc += 1
+        # Boolean operators in or_test / asless_or_test:
+        # "or" has token type OR, "||" has token type __ANON_2
+        if node_type in ("or_test", "asless_or_test"):
+            for child in subtree.children:
+                if isinstance(child, Token) and child.type in ("OR", "__ANON_2"):
+                    cc += 1
+        # Ternary expressions: test_expr / asless_test_expr with "if" token
+        if node_type in ("test_expr", "asless_test_expr"):
+            for child in subtree.children:
+                if isinstance(child, Token) and child.type == "IF":
+                    cc += 1
+    return cc
+def _extract_func_name(func_def: Tree) -> str:
+    """Extract function name from a func_def subtree.
+    Looks for the ``func_header`` child and returns the first ``NAME`` token.
+    Returns ``"<unknown>"`` if no name is found.
+    """
+    for child in func_def.children:
+        if isinstance(child, Tree) and child.data == "func_header":
+            for header_child in child.children:
+                if isinstance(header_child, Token) and header_child.type == "NAME":
+                    return str(header_child)
+            break
+    return "<unknown>"
+def compute_function_loc(source: str, start_line: int, end_line: int) -> int:
+    """Count non-empty, non-comment lines within a function's source range.
+    ``start_line`` and ``end_line`` are 1-based line numbers from Lark AST
+    metadata.  Lark's ``end_line`` points past the last code line of the
+    function (it includes trailing ``_NL`` tokens), so ``end_line - 1``
+    is always >= the last code line, making the slice inclusive of all
+    function lines.
+    Extracts lines ``[start_line-1 : end_line-1]`` and applies the same
+    counting logic as :func:`compute_loc`.
+    """
+    # Verified: Lark's meta.end_line accounts for trailing _NL tokens,
+    # so end_line - 1 correctly includes the last code line of the function.
+    lines = source.splitlines()
+    func_lines = lines[start_line - 1 : end_line - 1]
+    return sum(
+        1 for line in func_lines
+        if line.strip() and not line.strip().startswith("#")
+    )
+def compute_function_metrics(
+    func_def: Tree, source: str
+) -> FunctionMetrics:
+    """Compute all metrics for a single function.
+    Extracts name, line number, CC, LOC, Halstead volume, and MI from
+    the ``func_def`` subtree and its corresponding source lines.
+    Returns a :class:`FunctionMetrics` with ``mi=None`` if LOC=0 or
+    Halstead volume=0.
+    """
+    name = _extract_func_name(func_def)
+    line = func_def.meta.line
+    end_line = func_def.meta.end_line
+    cc = compute_cyclomatic_complexity(func_def)
+    loc = compute_function_loc(source, line, end_line)
+    halstead = compute_halstead_volume(func_def)
+    mi: float | None = None
+    if loc > 0 and halstead.volume > 0:
+        mi = compute_maintainability_index(loc, cc, halstead.volume)
+    return FunctionMetrics(name=name, line=line, cc=cc, loc=loc, mi=mi)
+def compute_all_function_metrics(
+    tree: Tree, source: str
+) -> list[FunctionMetrics]:
+    """Compute metrics for every function in a Lark AST.
+    Walks the tree to find ``func_def`` subtrees.  For each, calls
+    :func:`compute_function_metrics` to get name, line, CC, LOC, and MI.
+    Returns a list of :class:`FunctionMetrics` — one per function in source
+    order.  Files with no functions return an empty list.
+    """
+    results: list[FunctionMetrics] = []
+    for subtree in tree.iter_subtrees():
+        if subtree.data != "func_def":
+            continue
+        results.append(compute_function_metrics(subtree, source))
+    return results
+def aggregate_cc(
+    per_func: list[FunctionMetrics],
+) -> tuple[int, float]:
+    """Aggregate per-function CC into max and median.
+    Returns ``(max_cc, median_cc)``.
+    - ``max_cc``: the highest CC among all functions (hotspot detection).
+    - ``median_cc``: the median CC across all functions (typical complexity),
+      rounded to 1 decimal place.
+    If the list is empty (no functions in the file), returns ``(1, 1.0)``
+    as a baseline — any executable code path has a minimum CC of 1.
+    """
+    if not per_func:
+        return (1, 1.0)
+    cc_values = [f.cc for f in per_func]
+    max_cc = max(cc_values)
+    median_cc = round(float(statistics.median(cc_values)), 1)
+    return (max_cc, median_cc)
+def aggregate_mi(
+    per_func: list[FunctionMetrics],
+) -> tuple[float | None, float | None]:
+    """Aggregate per-function MI into min and median.
+    Returns ``(mi_min, mi_median)``.
+    - ``mi_min``: the lowest MI among all functions (worst maintainability).
+    - ``mi_median``: the median MI across all functions, rounded to 2 decimals.
+    Returns ``(None, None)`` if no functions have a non-None MI value.
+    """
+    mi_values = [f.mi for f in per_func if f.mi is not None]
+    if not mi_values:
+        return (None, None)
+    mi_min = min(mi_values)
+    mi_median = round(float(statistics.median(mi_values)), 2)
+    return (mi_min, mi_median)
+# ---------------------------------------------------------------------------
+# Halstead volume
+# ---------------------------------------------------------------------------
+def compute_halstead_volume(tree: Tree) -> HalsteadResult:
+    """Compute Halstead volume from a Lark AST.
+    Walks the tree once, classifying Token leaves and subtree types
+    into operators and operands.
+    Operator tokens: named types (DOT, EQUAL, PLUS, …) plus any __ANON_*
+    tokens (compound assignment, comparisons, &&, ||, etc.).
+    Operand tokens: NAME, NUMBER, REGULAR_STRING, TYPE_HINT.
+    Keyword operators (absorbed by the grammar): recovered from subtree
+    node types like extends_stmt → "extends", func_def → "func", etc.
+    Returns a HalsteadResult with volume (N × log₂(n)), vocabulary (n),
+    and program length (N).
+    """
+    operators: list[str] = []
+    operands: list[str] = []
+    for subtree in tree.iter_subtrees():
+        # Recover keyword operators from subtree types
+        keyword = _KEYWORD_SUBTREE_MAP.get(str(subtree.data))
+        if keyword is not None:
+            operators.append(keyword)
+        # Classify Token leaves
+        for child in subtree.children:
+            if not isinstance(child, Token):
+                continue
+            if child.type in _OPERATOR_TOKEN_TYPES:
+                operators.append(str(child))
+            elif child.type in _OPERAND_TOKEN_TYPES:
+                operands.append(str(child))
+            elif child.type.startswith("__ANON"):
+                operators.append(str(child))
+    n1 = len(set(operators))    # unique operators
+    n2 = len(set(operands))     # unique operands
+    n = n1 + n2                 # vocabulary
+    big_n = len(operators) + len(operands)  # program length
+    volume = big_n * math.log2(n) if n > 0 else 0.0
+    return HalsteadResult(volume=volume, vocabulary=n, length=big_n)
+# ---------------------------------------------------------------------------
+# Maintainability index
+# ---------------------------------------------------------------------------
+def compute_maintainability_index(
+    loc: int, cc: int, halstead_volume: float
+) -> float:
+    """Compute Maintainability Index from LOC, CC, and Halstead Volume.
+    Uses the standard MI formula (0–171 scale):
+        MI = 171 − 5.2 × ln(V) − 0.23 × CC − 16.2 × ln(LOC)
+    Result is clamped to a minimum of 0.
+    Requires ``loc > 0`` and ``halstead_volume > 0`` (both are arguments
+    to ``math.log``).  Raises ``ValueError`` if either is <= 0.
+    """
+    if loc <= 0:
+        raise ValueError(f"loc must be > 0, got {loc}")
+    if halstead_volume <= 0:
+        raise ValueError(f"halstead_volume must be > 0, got {halstead_volume}")
+    mi = (
+        171
+        - 5.2 * math.log(halstead_volume)
+        - 0.23 * cc
+        - 16.2 * math.log(loc)
+    )
+    return round(max(0.0, mi), 2)
+# ---------------------------------------------------------------------------
+# Aggregate entry point
+# ---------------------------------------------------------------------------
+def compute_metrics(source: str, tree: Tree | None) -> FileMetrics:
+    """Compute all metrics for a single file.
+    LOC is always computed from raw source text.
+    Per-function metrics (CC, LOC, MI) are computed from the AST if available.
+    CC is aggregated into max_cc and median_cc.  Both are None if parse failed.
+    File-level MI is computed when tree is available, max_cc is known, LOC > 0,
+    and whole-file Halstead volume > 0.  MI uses max_cc in its formula.
+    Per-function MI is aggregated into mi_min and mi_median.
+    """
+    loc = compute_loc(source)
+    max_cc: int | None = None
+    median_cc: float | None = None
+    mi: float | None = None
+    mi_min: float | None = None
+    mi_median: float | None = None
+    functions: list[FunctionMetrics] = []
+    if tree is not None:
+        functions = compute_all_function_metrics(tree, source)
+        max_cc, median_cc = aggregate_cc(functions)
+        mi_min, mi_median = aggregate_mi(functions)
+    # File-level MI uses max_cc (worst per-function CC) rather than the
+    # standard total-file CC.  This penalises files containing a single
+    # highly complex function instead of diluting the score across many
+    # simple functions.  Per-function MI (mi_min, mi_median) already
+    # uses each function's own CC for granular analysis.
+    if tree is not None and max_cc is not None and loc > 0:
+        halstead = compute_halstead_volume(tree)
+        if halstead.volume > 0:
+            mi = compute_maintainability_index(loc, max_cc, halstead.volume)
+    return FileMetrics(
+        loc=loc,
+        max_cc=max_cc,
+        median_cc=median_cc,
+        mi=mi,
+        mi_min=mi_min,
+        mi_median=mi_median,
+        functions=functions,
+    )

gdscript_code_graph/parsing.py ADDED Viewed

@@ -0,0 +1,73 @@
+from __future__ import annotations
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from gdtoolkit.parser import parser as gdparser
+from lark import Tree
+from gdscript_code_graph.discovery import ProjectFiles
+logger = logging.getLogger(__name__)
+@dataclass
+class ParseResult:
+    file_path: Path
+    res_path: str
+    tree: Tree | None       # None if parse failed
+    source: str             # raw source text (always populated)
+    error: str | None       # error message if parse failed
+def parse_file(file_path: Path, res_path: str) -> ParseResult:
+    """Parse a single GDScript file and return a ParseResult.
+    Uses gdtoolkit's parser with gather_metadata=True to get .meta.line
+    on AST nodes. Gracefully handles parse errors and encoding issues --
+    a broken file never raises; the error is captured in the result.
+    """
+    try:
+        source = file_path.read_text(encoding="utf-8")
+    except UnicodeDecodeError as exc:
+        logger.warning("Failed to read %s: %s", file_path, exc)
+        return ParseResult(
+            file_path=file_path,
+            res_path=res_path,
+            tree=None,
+            source="",
+            error=str(exc),
+        )
+    try:
+        tree = gdparser.parse(source, gather_metadata=True)
+        return ParseResult(
+            file_path=file_path,
+            res_path=res_path,
+            tree=tree,
+            source=source,
+            error=None,
+        )
+    except Exception as exc:
+        logger.warning("Failed to parse %s: %s", file_path, exc)
+        return ParseResult(
+            file_path=file_path,
+            res_path=res_path,
+            tree=None,
+            source=source,
+            error=str(exc),
+        )
+def parse_all(project: ProjectFiles) -> list[ParseResult]:
+    """Parse all .gd files in the project.
+    Never aborts the whole batch -- individual file errors are captured
+    in each ParseResult.
+    """
+    results = []
+    for file_path in project.gd_files:
+        res_path = project.to_res_path(file_path)
+        results.append(parse_file(file_path, res_path))
+    return results

gdscript_code_graph/relationships.py ADDED Viewed

@@ -0,0 +1,392 @@
+from __future__ import annotations
+import logging
+import re
+from collections import defaultdict
+from dataclasses import dataclass
+from lark import Tree, Token
+from gdscript_code_graph.parsing import ParseResult
+from gdscript_code_graph.schema import Evidence, GraphLink
+logger = logging.getLogger(__name__)
+_ARRAY_TYPE_RE = re.compile(r"Array\[(\w+)\]")
+@dataclass
+class RawRelationship:
+    source_res_path: str
+    target: str             # res:// path OR class name
+    kind: str               # "extends", "preloads", "loads"
+    line: int
+def _extract_string_value(string_tree: Tree) -> str:
+    """Extract the unquoted string value from a Lark ``string`` tree node.
+    The ``string`` node has a single child token of type ``REGULAR_STRING``
+    (e.g. ``"res://actors/character.gd"``).  We strip the surrounding
+    double-quotes to return the raw path.
+    """
+    token = str(string_tree.children[0])
+    if len(token) >= 2 and token[0] in ('"', "'") and token[-1] == token[0]:
+        return token[1:-1]
+    return token
+def extract_class_name(tree: Tree) -> str | None:
+    """Return the declared ``class_name`` from a parsed GDScript AST, or None.
+    Handles both ``classname_stmt`` (e.g. ``class_name Player``) and
+    ``classname_extends_stmt`` (e.g. ``class_name Foo extends Bar``) forms.
+    In the latter case the *first* ``NAME`` token is the class name.
+    """
+    for subtree in tree.iter_subtrees():
+        if subtree.data == "classname_stmt":
+            for child in subtree.children:
+                if isinstance(child, Token) and child.type == "NAME":
+                    return str(child)
+        if subtree.data == "classname_extends_stmt":
+            for child in subtree.children:
+                if isinstance(child, Token) and child.type == "NAME":
+                    return str(child)
+    return None
+def extract_extends(tree: Tree, source_res_path: str) -> list[RawRelationship]:
+    """Extract ``extends`` relationships from a parsed GDScript AST.
+    Handles three forms:
+    - ``extends ClassName`` -- extends by class name (NAME token)
+    - ``extends "res://path.gd"`` -- extends by path (string subtree)
+    - ``class_name Foo extends Bar`` -- classname_extends_stmt (second NAME
+      token after the class name is the extends target)
+    """
+    results: list[RawRelationship] = []
+    for subtree in tree.iter_subtrees():
+        if subtree.data == "extends_stmt":
+            line = getattr(subtree.meta, "line", 0)
+            for child in subtree.children:
+                if isinstance(child, Tree) and child.data == "string":
+                    # extends "res://path/to/file.gd"
+                    target = _extract_string_value(child)
+                    results.append(RawRelationship(
+                        source_res_path=source_res_path,
+                        target=target,
+                        kind="extends",
+                        line=line,
+                    ))
+                elif isinstance(child, Token) and child.type == "NAME":
+                    # extends ClassName
+                    results.append(RawRelationship(
+                        source_res_path=source_res_path,
+                        target=str(child),
+                        kind="extends",
+                        line=line,
+                    ))
+        elif subtree.data == "classname_extends_stmt":
+            line = getattr(subtree.meta, "line", 0)
+            # Children are NAME tokens; the first is the class_name, the
+            # second (after the implicit ``extends`` keyword) is the target.
+            name_tokens = [
+                child for child in subtree.children
+                if isinstance(child, Token) and child.type == "NAME"
+            ]
+            if len(name_tokens) >= 2:
+                target = str(name_tokens[1])
+                results.append(RawRelationship(
+                    source_res_path=source_res_path,
+                    target=target,
+                    kind="extends",
+                    line=line,
+                ))
+    return results
+def extract_preloads(tree: Tree, source_res_path: str) -> list[RawRelationship]:
+    """Extract ``preload(...)`` and ``load(...)`` relationships from an AST.
+    Walks the tree looking for:
+    1. ``standalone_call`` nodes whose first child is a ``NAME`` token equal
+       to ``preload`` or ``load`` (bare calls like ``preload("res://...")``).
+    2. ``getattr_call`` nodes where the ``getattr`` subtree ends with a
+       ``NAME`` token equal to ``load`` (e.g. ``ResourceLoader.load("res://...")``).
+       Only ``load`` is handled here -- ``preload`` is a GDScript keyword and
+       is never called via attribute access.
+    If the call has a ``string`` argument that starts with ``res://``, it is
+    recorded.
+    """
+    results: list[RawRelationship] = []
+    for subtree in tree.iter_subtrees():
+        if subtree.data == "standalone_call":
+            children = subtree.children
+            if not children:
+                continue
+            first = children[0]
+            if not (isinstance(first, Token) and first.type == "NAME"):
+                continue
+            func_name = str(first)
+            if func_name not in ("preload", "load"):
+                continue
+            kind = func_name + "s"  # "preloads" or "loads"
+            line = getattr(subtree.meta, "line", 0)
+            # Look for string argument among remaining children
+            for child in children[1:]:
+                if isinstance(child, Tree) and child.data == "string":
+                    target = _extract_string_value(child)
+                    if target.startswith("res://"):
+                        results.append(RawRelationship(
+                            source_res_path=source_res_path,
+                            target=target,
+                            kind=kind,
+                            line=line,
+                        ))
+        elif subtree.data == "getattr_call":
+            children = subtree.children
+            if not children:
+                continue
+            # First child should be a ``getattr`` subtree.
+            first = children[0]
+            if not (isinstance(first, Tree) and first.data == "getattr"):
+                continue
+            # The last NAME token in the getattr chain is the method name.
+            name_tokens = [
+                child for child in first.children
+                if isinstance(child, Token) and child.type == "NAME"
+            ]
+            if not name_tokens or str(name_tokens[-1]) != "load":
+                continue
+            line = getattr(subtree.meta, "line", 0)
+            # Look for string arguments among remaining children
+            for child in children[1:]:
+                if isinstance(child, Tree) and child.data == "string":
+                    target = _extract_string_value(child)
+                    if target.startswith("res://"):
+                        results.append(RawRelationship(
+                            source_res_path=source_res_path,
+                            target=target,
+                            kind="loads",
+                            line=line,
+                        ))
+    return results
+def extract_type_from_hint(type_hint: str) -> str:
+    """Extract the inner type name from a TYPE_HINT token value.
+    Handles ``Array[Type]`` by returning the inner type (``Type``).
+    For plain types like ``Player`` returns the value as-is.
+    Examples::
+        >>> extract_type_from_hint("Player")
+        'Player'
+        >>> extract_type_from_hint("Array[Item]")
+        'Item'
+        >>> extract_type_from_hint("Array")
+        'Array'
+    """
+    match = _ARRAY_TYPE_RE.match(type_hint)
+    if match:
+        return match.group(1)
+    return type_hint
+def extract_typed_deps(
+    tree: Tree, source_res_path: str
+) -> list[RawRelationship]:
+    """Extract typed-dependency relationships from class variable declarations.
+    Detects patterns:
+    - ``var x: Type`` (``class_var_typed`` AST node)
+    - ``var x: Type = value`` (``class_var_typed_assgnd`` AST node)
+    - ``var x: Array[Type]`` / ``var x: Array[Type] = []``
+    Each produces a ``RawRelationship`` with ``kind="typed_dependency"``
+    where the target is the type name (or inner type for ``Array[Type]``).
+    Built-in types are *not* filtered here -- that happens during resolution
+    when the type name is looked up in the class-name table.
+    """
+    results: list[RawRelationship] = []
+    for subtree in tree.iter_subtrees():
+        if subtree.data not in ("class_var_typed", "class_var_typed_assgnd"):
+            continue
+        type_hint: str | None = None
+        for child in subtree.children:
+            if isinstance(child, Token) and child.type == "TYPE_HINT":
+                type_hint = str(child)
+                break
+        if type_hint is None:
+            continue
+        target = extract_type_from_hint(type_hint)
+        line = getattr(subtree.meta, "line", 0)
+        results.append(RawRelationship(
+            source_res_path=source_res_path,
+            target=target,
+            kind="typed_dependency",
+            line=line,
+        ))
+    return results
+def extract_returns(
+    tree: Tree, source_res_path: str
+) -> list[RawRelationship]:
+    """Extract return-type relationships from function declarations.
+    Detects ``func foo() -> Type:`` patterns by inspecting ``func_header``
+    AST nodes for a ``TYPE_HINT`` token (which represents the return type).
+    Each produces a ``RawRelationship`` with ``kind="returns"`` where the
+    target is the type name (or inner type for ``Array[Type]``).  Built-in
+    types are *not* filtered here -- that happens during resolution.
+    """
+    results: list[RawRelationship] = []
+    for subtree in tree.iter_subtrees():
+        if subtree.data != "func_header":
+            continue
+        # The return type is the last TYPE_HINT token that is a direct child of
+        # func_header.  Parameter type hints live inside func_args (a Tree child),
+        # so they are not visited here.  We iterate in reverse to grab the return
+        # type first and break immediately.
+        return_type: str | None = None
+        for child in reversed(subtree.children):
+            if isinstance(child, Token) and child.type == "TYPE_HINT":
+                return_type = str(child)
+                break
+        if return_type is None:
+            continue
+        target = extract_type_from_hint(return_type)
+        line = getattr(subtree.meta, "line", 0)
+        results.append(RawRelationship(
+            source_res_path=source_res_path,
+            target=target,
+            kind="returns",
+            line=line,
+        ))
+    return results
+def build_class_name_table(
+    parse_results: list[ParseResult],
+) -> dict[str, str]:
+    """Build a mapping of class_name declarations to their ``res://`` paths.
+    Iterates over all successfully parsed files and extracts any
+    ``class_name`` declaration.  If two files declare the same class name
+    a warning is logged and the later entry wins.
+    """
+    table: dict[str, str] = {}
+    for pr in parse_results:
+        if pr.tree is None:
+            continue
+        name = extract_class_name(pr.tree)
+        if name is None:
+            continue
+        if name in table:
+            logger.warning(
+                "Duplicate class_name %r: %s and %s",
+                name,
+                table[name],
+                pr.res_path,
+            )
+        table[name] = pr.res_path
+    return table
+def _resolve_target(
+    rel: RawRelationship,
+    class_name_table: dict[str, str],
+    known_res_paths: set[str],
+) -> str | None:
+    """Resolve a single relationship target to a ``res://`` path, or None.
+    - If the target already starts with ``res://``, it is returned directly --
+      but only if it appears in *known_res_paths*.
+    - If the target is a class name, it is looked up in *class_name_table*.
+    - Returns ``None`` if the target cannot be resolved (built-in class,
+      unknown path).
+    """
+    if rel.target.startswith("res://"):
+        if rel.target not in known_res_paths:
+            return None
+        return rel.target
+    target_path = class_name_table.get(rel.target)
+    return target_path  # None if built-in / unknown
+def resolve_relationships_with_evidence(
+    raw_rels: list[RawRelationship],
+    class_name_table: dict[str, str],
+    known_res_paths: set[str],
+) -> list[GraphLink]:
+    """Resolve raw relationships to ``GraphLink`` objects with evidence.
+    Collects **all** occurrences of each ``(source, target, kind)`` tuple
+    into an ``evidence`` array and sets ``weight`` to the occurrence count.
+    - If the target already starts with ``res://``, it is used directly --
+      but only if it appears in *known_res_paths* (i.e. it actually exists
+      in the project).
+    - If the target is a class name, it is looked up in *class_name_table*.
+      If found, the corresponding ``res://`` path is used.  If not found,
+      the target is assumed to be a built-in Godot class and the
+      relationship is **skipped**.
+    Returns a sorted list of ``GraphLink`` objects (sorted by
+    ``(source, target, kind)`` for deterministic output).
+    """
+    evidence_map: dict[tuple[str, str, str], list[Evidence]] = defaultdict(list)
+    for rel in raw_rels:
+        target_path = _resolve_target(rel, class_name_table, known_res_paths)
+        if target_path is None:
+            continue
+        key = (rel.source_res_path, target_path, rel.kind)
+        evidence_map[key].append(Evidence(
+            file=rel.source_res_path,
+            line=rel.line,
+        ))
+    return [
+        GraphLink(
+            source=source,
+            target=target,
+            kind=kind,
+            weight=len(evidence_list),
+            evidence=evidence_list,
+        )
+        for (source, target, kind), evidence_list in sorted(evidence_map.items())
+    ]

gdscript_code_graph/schema.py ADDED Viewed

@@ -0,0 +1,62 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+@dataclass
+class Evidence:
+    file: str           # res:// path where the relationship was found
+    line: int           # line number
+@dataclass
+class FunctionMetrics:
+    name: str               # function name
+    line: int               # start line number
+    cc: int                 # cyclomatic complexity
+    loc: int                # non-empty, non-comment lines in this function
+    mi: float | None        # maintainability index (None if loc=0 or volume=0)
+@dataclass
+class NodeMetrics:
+    loc: int                    # non-empty, non-comment lines
+    max_cc: int | None          # max per-function cyclomatic complexity (None if parse failed)
+    median_cc: float | None     # median per-function cyclomatic complexity (None if parse failed)
+    mi: float | None = None     # file-level maintainability index (None if parse failed or empty file)
+    mi_min: float | None = None     # worst per-function MI (None if no functions have MI)
+    mi_median: float | None = None  # median per-function MI (None if no functions have MI)
+    functions: list[FunctionMetrics] = field(default_factory=list)  # per-function detail
+@dataclass
+class GraphNode:
+    id: str             # res:// path (stable identifier)
+    kind: str           # "script" for v1
+    language: str       # "gdscript" for v1
+    name: str           # class_name if declared, else filename stem
+    metrics: NodeMetrics
+    tags: list[str] = field(default_factory=list)
+@dataclass
+class GraphLink:
+    source: str         # res:// path
+    target: str         # res:// path
+    kind: str           # "extends", "preloads", "loads", "typed_dependency", "returns"
+    weight: int         # number of occurrences
+    evidence: list[Evidence] = field(default_factory=list)
+@dataclass
+class Meta:
+    repo: str
+    generated_at: str   # ISO 8601
+@dataclass
+class Graph:
+    schema_version: str
+    meta: Meta
+    nodes: list[GraphNode]
+    links: list[GraphLink]

gdscript_code_graph-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,12 @@
+Metadata-Version: 2.4
+Name: gdscript-code-graph
+Version: 1.0.0
+License-Expression: MIT
+License-File: LICENSE.md
+Requires-Python: >=3.10
+Requires-Dist: click>=8.0
+Requires-Dist: gdtoolkit>=4.0
+Provides-Extra: dev
+Requires-Dist: pip-audit; extra == 'dev'
+Requires-Dist: pytest-cov; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'

gdscript_code_graph-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+gdscript_code_graph/__init__.py,sha256=xoY9iVUOrPcctMLIKk8ojgRba9a5Dn5_lNUxf6GbTU8,473
+gdscript_code_graph/cli.py,sha256=JN5SAteYCYZCHFKC6ewn0-Y9hdmkgTZj1XnAZku23TM,1686
+gdscript_code_graph/discovery.py,sha256=VLhYsvanc2xg97QLFCniZthBp4nkJVk-JBYqyiqqMfY,2114
+gdscript_code_graph/graph.py,sha256=yU6dRpzBBzxyhAGO5h0xyiwLDh_8Ryx4QSQhcwmcnTw,3272
+gdscript_code_graph/metrics.py,sha256=Crvb2t31IMgWZXchlilkqB93BGWWC2zSSZmMfnPO0Mo,13376
+gdscript_code_graph/parsing.py,sha256=DO4ZlLPCtryw6I_RFDnUgI_y6HVhZ7xEAyYS3t_Zv1M,2124
+gdscript_code_graph/relationships.py,sha256=glkJh77QbVY5QpJuADwUlRt59fif5oKP7WG6v0FmlRg,13956
+gdscript_code_graph/schema.py,sha256=Jf-KLku7qulLp0twxRTjUbWc0ufzW8Sqnekts9hWR9I,2030
+gdscript_code_graph-1.0.0.dist-info/METADATA,sha256=QVllj4Fie0DsOdImb1ohhFoAaTdEFY-keUGfklMBQbg,338
+gdscript_code_graph-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+gdscript_code_graph-1.0.0.dist-info/entry_points.txt,sha256=fHjGFILR1wKFP25D3YwjX9jJSVKqKiOw5MrCNPQ-jAk,69
+gdscript_code_graph-1.0.0.dist-info/licenses/LICENSE.md,sha256=2GMLnPKJEWOp69wddxQbbBPprY_XUfc4b0So0f4TBZA,1061
+gdscript_code_graph-1.0.0.dist-info/RECORD,,

gdscript_code_graph-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any

gdscript_code_graph-1.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ gdscript-code-graph = gdscript_code_graph.cli:main

gdscript_code_graph-1.0.0.dist-info/licenses/LICENSE.md ADDED Viewed

@@ -0,0 +1,7 @@
+Copyright 2026 Mike Rötgers
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.