gdscript-code-graph 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+ __version__ = "1.0.0"
4
+
5
+ from .discovery import ProjectFiles, discover_project
6
+ from .graph import build_graph, serialize_graph
7
+ from .schema import Evidence, FunctionMetrics, Graph, GraphLink, GraphNode, Meta, NodeMetrics
8
+
9
+ __all__ = [
10
+ "ProjectFiles",
11
+ "discover_project",
12
+ "build_graph",
13
+ "serialize_graph",
14
+ "Evidence",
15
+ "FunctionMetrics",
16
+ "Graph",
17
+ "GraphLink",
18
+ "GraphNode",
19
+ "Meta",
20
+ "NodeMetrics",
21
+ ]
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import click
7
+
8
+ from .discovery import discover_project
9
+ from .graph import build_graph, serialize_graph
10
+
11
+
12
+ @click.group()
13
+ def main():
14
+ """GDScript code metrics analyzer."""
15
+ pass
16
+
17
+
18
+ @main.command()
19
+ @click.argument("project_dir", type=click.Path(exists=True, file_okay=False))
20
+ @click.option(
21
+ "--out",
22
+ "-o",
23
+ type=click.Path(),
24
+ default=None,
25
+ help="Output file path. Defaults to stdout.",
26
+ )
27
+ @click.option(
28
+ "--repo-name",
29
+ type=str,
30
+ default=None,
31
+ help="Repository name for the output. Defaults to directory name.",
32
+ )
33
+ @click.option(
34
+ "--exclude",
35
+ "-e",
36
+ multiple=True,
37
+ help="Directory names to exclude (repeatable). Example: --exclude addons --exclude test",
38
+ )
39
+ def analyze(
40
+ project_dir: str,
41
+ out: str | None,
42
+ repo_name: str | None,
43
+ exclude: tuple[str, ...],
44
+ ) -> None:
45
+ """Analyze a Godot project directory for code metrics."""
46
+ project_path = Path(project_dir)
47
+
48
+ if repo_name is None:
49
+ repo_name = project_path.name
50
+
51
+ try:
52
+ project = discover_project(
53
+ project_path,
54
+ exclude_dirs=list(exclude) if exclude else None,
55
+ )
56
+ except FileNotFoundError:
57
+ click.echo(
58
+ f"Error: No project.godot found in or above '{project_dir}'",
59
+ err=True,
60
+ )
61
+ sys.exit(1)
62
+
63
+ graph = build_graph(project, repo_name)
64
+ json_output = serialize_graph(graph)
65
+
66
+ if out is not None:
67
+ out_path = Path(out)
68
+ out_path.parent.mkdir(parents=True, exist_ok=True)
69
+ out_path.write_text(json_output, encoding="utf-8")
70
+ else:
71
+ click.echo(json_output)
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+
7
+ @dataclass
8
+ class ProjectFiles:
9
+ project_root: Path # directory containing project.godot
10
+ gd_files: list[Path] # absolute paths to all .gd files
11
+
12
+ def to_res_path(self, abs_path: Path) -> str:
13
+ rel = abs_path.relative_to(self.project_root)
14
+ return "res://" + rel.as_posix()
15
+
16
+
17
+ def find_project_root(start_dir: Path) -> Path:
18
+ """Check start_dir / "project.godot". If not found, walk upward.
19
+
20
+ Raise FileNotFoundError if no project.godot is found anywhere.
21
+ """
22
+ current = start_dir.resolve()
23
+ while True:
24
+ if (current / "project.godot").exists():
25
+ return current
26
+ parent = current.parent
27
+ if parent == current:
28
+ raise FileNotFoundError(
29
+ f"No project.godot found in {start_dir} or any parent directory"
30
+ )
31
+ current = parent
32
+
33
+
34
+ def discover_project(
35
+ project_dir: Path,
36
+ exclude_dirs: list[str] | None = None,
37
+ ) -> ProjectFiles:
38
+ """Call find_project_root, then glob ``**/*.gd`` under the project root.
39
+
40
+ Always excludes the ``.godot/`` directory (Godot internal cache).
41
+ Additional directories can be excluded via *exclude_dirs* -- each entry
42
+ is matched against every component of the file's path relative to the
43
+ project root. For example, ``exclude_dirs=["addons", "test"]`` will
44
+ skip any ``.gd`` file whose relative path contains an ``addons`` or
45
+ ``test`` directory component.
46
+
47
+ Sort files for deterministic output.
48
+ Return empty list (not error) if zero ``.gd`` files found.
49
+ """
50
+ root = find_project_root(project_dir)
51
+
52
+ # Always exclude .godot; merge user-supplied directories.
53
+ always_excluded = {".godot"}
54
+ if exclude_dirs:
55
+ always_excluded.update(exclude_dirs)
56
+
57
+ gd_files = sorted(
58
+ resolved
59
+ for p in root.rglob("*.gd")
60
+ if not always_excluded.intersection(
61
+ (resolved := p.resolve()).relative_to(root).parts
62
+ )
63
+ )
64
+ return ProjectFiles(project_root=root, gd_files=gd_files)
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import asdict
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+
8
+ from gdscript_code_graph.discovery import ProjectFiles
9
+ from gdscript_code_graph.metrics import compute_metrics
10
+ from gdscript_code_graph.parsing import parse_all
11
+ from gdscript_code_graph.relationships import (
12
+ build_class_name_table,
13
+ extract_extends,
14
+ extract_preloads,
15
+ extract_returns,
16
+ extract_typed_deps,
17
+ resolve_relationships_with_evidence,
18
+ )
19
+ from gdscript_code_graph.schema import (
20
+ FunctionMetrics,
21
+ Graph,
22
+ GraphNode,
23
+ Meta,
24
+ NodeMetrics,
25
+ )
26
+
27
+
28
+ def build_graph(project: ProjectFiles, repo_name: str) -> Graph:
29
+ """Run the full analysis pipeline and assemble a Graph.
30
+
31
+ Pipeline:
32
+ 1. Parse all .gd files
33
+ 2. Build class name lookup table
34
+ 3. For each file: compute metrics, look up class_name, extract raw
35
+ relationships, build node
36
+ 4. Resolve relationships with evidence (class names to paths, skip
37
+ built-ins, aggregate evidence arrays)
38
+ 5. Assemble Graph with schema_version="1.0" and metadata
39
+ """
40
+ # Step 1: Parse
41
+ parse_results = parse_all(project)
42
+
43
+ # Step 2: Build class name table
44
+ class_name_table = build_class_name_table(parse_results)
45
+ res_path_to_class_name = {v: k for k, v in class_name_table.items()}
46
+
47
+ # Step 3: Process each file
48
+ nodes: list[GraphNode] = []
49
+ all_raw_rels = []
50
+
51
+ for pr in parse_results:
52
+ # Compute metrics
53
+ file_metrics = compute_metrics(pr.source, pr.tree)
54
+
55
+ # Node name: class_name if declared, else filename stem
56
+ name = res_path_to_class_name.get(pr.res_path) or Path(pr.file_path).stem
57
+
58
+ # Build node
59
+ node = GraphNode(
60
+ id=pr.res_path,
61
+ kind="script",
62
+ language="gdscript",
63
+ name=name,
64
+ metrics=NodeMetrics(
65
+ loc=file_metrics.loc,
66
+ max_cc=file_metrics.max_cc,
67
+ median_cc=file_metrics.median_cc,
68
+ mi=file_metrics.mi,
69
+ mi_min=file_metrics.mi_min,
70
+ mi_median=file_metrics.mi_median,
71
+ functions=file_metrics.functions,
72
+ ),
73
+ tags=[],
74
+ )
75
+ nodes.append(node)
76
+
77
+ # Extract raw relationships (only if tree is valid)
78
+ if pr.tree is not None:
79
+ all_raw_rels.extend(extract_extends(pr.tree, pr.res_path))
80
+ all_raw_rels.extend(extract_preloads(pr.tree, pr.res_path))
81
+ all_raw_rels.extend(extract_typed_deps(pr.tree, pr.res_path))
82
+ all_raw_rels.extend(extract_returns(pr.tree, pr.res_path))
83
+
84
+ # Step 4: Resolve relationships and build GraphLinks with evidence
85
+ known_res_paths = {pr.res_path for pr in parse_results}
86
+ links = resolve_relationships_with_evidence(
87
+ all_raw_rels, class_name_table, known_res_paths
88
+ )
89
+
90
+ # Step 5: Assemble Graph
91
+ now = datetime.now(timezone.utc).isoformat()
92
+
93
+ return Graph(
94
+ schema_version="1.0",
95
+ meta=Meta(repo=repo_name, generated_at=now),
96
+ nodes=nodes,
97
+ links=links,
98
+ )
99
+
100
+
101
+ def serialize_graph(graph: Graph) -> str:
102
+ """Serialize a Graph to a JSON string."""
103
+ return json.dumps(asdict(graph), indent=2)
@@ -0,0 +1,395 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import statistics
5
+ from dataclasses import dataclass
6
+
7
+ from lark import Tree, Token
8
+
9
+ from gdscript_code_graph.schema import FunctionMetrics
10
+
11
+
12
+ @dataclass
13
+ class FileMetrics:
14
+ loc: int
15
+ max_cc: int | None
16
+ median_cc: float | None
17
+ mi: float | None
18
+ mi_min: float | None
19
+ mi_median: float | None
20
+ functions: list[FunctionMetrics]
21
+
22
+
23
+ @dataclass
24
+ class HalsteadResult:
25
+ volume: float # N * log2(n)
26
+ vocabulary: int # n = unique operators + unique operands
27
+ length: int # N = total operators + total operands
28
+
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Halstead classification tables
32
+ # ---------------------------------------------------------------------------
33
+
34
+ # Token types that count as operators (explicit named tokens).
35
+ _OPERATOR_TOKEN_TYPES = frozenset({
36
+ "DOT", "EQUAL", "MINUS", "PLUS", "STAR", "SLASH", "PERCENT",
37
+ "MORETHAN", "LESSTHAN",
38
+ "AND", "OR", "NOT",
39
+ "IF", "ELSE",
40
+ })
41
+
42
+ # Token types that count as operands.
43
+ _OPERAND_TOKEN_TYPES = frozenset({
44
+ "NAME", "NUMBER", "REGULAR_STRING", "TYPE_HINT",
45
+ })
46
+
47
+ # Subtree node types that represent keyword operators absorbed by the grammar.
48
+ # Each occurrence contributes exactly one keyword operator. Only *wrapper*
49
+ # nodes are listed (not inner variants like class_var_typed_assgnd) to
50
+ # avoid double-counting.
51
+ _KEYWORD_SUBTREE_MAP: dict[str, str] = {
52
+ "extends_stmt": "extends",
53
+ "classname_stmt": "class_name",
54
+ "func_def": "func",
55
+ "class_var_stmt": "var",
56
+ "func_var_stmt": "var",
57
+ "const_stmt": "const",
58
+ "if_branch": "if",
59
+ "elif_branch": "elif",
60
+ "else_branch": "else",
61
+ "for_stmt": "for",
62
+ "for_stmt_typed": "for",
63
+ "while_stmt": "while",
64
+ "match_stmt": "match",
65
+ "pass_stmt": "pass",
66
+ "return_stmt": "return",
67
+ "signal_stmt": "signal",
68
+ }
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # LOC
73
+ # ---------------------------------------------------------------------------
74
+
75
+
76
+ def compute_loc(source: str) -> int:
77
+ """Count non-empty, non-comment-only lines."""
78
+ return sum(
79
+ 1 for line in source.splitlines()
80
+ if line.strip() and not line.strip().startswith("#")
81
+ )
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Cyclomatic complexity
86
+ # ---------------------------------------------------------------------------
87
+
88
+
89
+ def compute_cyclomatic_complexity(tree: Tree) -> int:
90
+ """Compute cyclomatic complexity from a Lark AST.
91
+
92
+ CC = 1 (base) plus count of branching constructs:
93
+ - if_branch, elif_branch, while_stmt, for_stmt, for_stmt_typed, match_branch
94
+ - and/&& tokens in and_test / asless_and_test nodes
95
+ - or/|| tokens in or_test / asless_or_test nodes
96
+ - ternary "if" tokens in test_expr / asless_test_expr nodes
97
+ """
98
+ cc = 1
99
+
100
+ for subtree in tree.iter_subtrees():
101
+ node_type = subtree.data
102
+
103
+ # Direct branch nodes: +1 each
104
+ if node_type in (
105
+ "if_branch",
106
+ "elif_branch",
107
+ "while_stmt",
108
+ "for_stmt",
109
+ "for_stmt_typed",
110
+ "match_branch",
111
+ ):
112
+ cc += 1
113
+
114
+ # Boolean operators in and_test / asless_and_test:
115
+ # "and" has token type AND, "&&" has token type __ANON_3
116
+ if node_type in ("and_test", "asless_and_test"):
117
+ for child in subtree.children:
118
+ if isinstance(child, Token) and child.type in ("AND", "__ANON_3"):
119
+ cc += 1
120
+
121
+ # Boolean operators in or_test / asless_or_test:
122
+ # "or" has token type OR, "||" has token type __ANON_2
123
+ if node_type in ("or_test", "asless_or_test"):
124
+ for child in subtree.children:
125
+ if isinstance(child, Token) and child.type in ("OR", "__ANON_2"):
126
+ cc += 1
127
+
128
+ # Ternary expressions: test_expr / asless_test_expr with "if" token
129
+ if node_type in ("test_expr", "asless_test_expr"):
130
+ for child in subtree.children:
131
+ if isinstance(child, Token) and child.type == "IF":
132
+ cc += 1
133
+
134
+ return cc
135
+
136
+
137
+ def _extract_func_name(func_def: Tree) -> str:
138
+ """Extract function name from a func_def subtree.
139
+
140
+ Looks for the ``func_header`` child and returns the first ``NAME`` token.
141
+ Returns ``"<unknown>"`` if no name is found.
142
+ """
143
+ for child in func_def.children:
144
+ if isinstance(child, Tree) and child.data == "func_header":
145
+ for header_child in child.children:
146
+ if isinstance(header_child, Token) and header_child.type == "NAME":
147
+ return str(header_child)
148
+ break
149
+ return "<unknown>"
150
+
151
+
152
+ def compute_function_loc(source: str, start_line: int, end_line: int) -> int:
153
+ """Count non-empty, non-comment lines within a function's source range.
154
+
155
+ ``start_line`` and ``end_line`` are 1-based line numbers from Lark AST
156
+ metadata. Lark's ``end_line`` points past the last code line of the
157
+ function (it includes trailing ``_NL`` tokens), so ``end_line - 1``
158
+ is always >= the last code line, making the slice inclusive of all
159
+ function lines.
160
+
161
+ Extracts lines ``[start_line-1 : end_line-1]`` and applies the same
162
+ counting logic as :func:`compute_loc`.
163
+ """
164
+ # Verified: Lark's meta.end_line accounts for trailing _NL tokens,
165
+ # so end_line - 1 correctly includes the last code line of the function.
166
+ lines = source.splitlines()
167
+ func_lines = lines[start_line - 1 : end_line - 1]
168
+ return sum(
169
+ 1 for line in func_lines
170
+ if line.strip() and not line.strip().startswith("#")
171
+ )
172
+
173
+
174
+ def compute_function_metrics(
175
+ func_def: Tree, source: str
176
+ ) -> FunctionMetrics:
177
+ """Compute all metrics for a single function.
178
+
179
+ Extracts name, line number, CC, LOC, Halstead volume, and MI from
180
+ the ``func_def`` subtree and its corresponding source lines.
181
+
182
+ Returns a :class:`FunctionMetrics` with ``mi=None`` if LOC=0 or
183
+ Halstead volume=0.
184
+ """
185
+ name = _extract_func_name(func_def)
186
+ line = func_def.meta.line
187
+ end_line = func_def.meta.end_line
188
+
189
+ cc = compute_cyclomatic_complexity(func_def)
190
+ loc = compute_function_loc(source, line, end_line)
191
+ halstead = compute_halstead_volume(func_def)
192
+
193
+ mi: float | None = None
194
+ if loc > 0 and halstead.volume > 0:
195
+ mi = compute_maintainability_index(loc, cc, halstead.volume)
196
+
197
+ return FunctionMetrics(name=name, line=line, cc=cc, loc=loc, mi=mi)
198
+
199
+
200
+ def compute_all_function_metrics(
201
+ tree: Tree, source: str
202
+ ) -> list[FunctionMetrics]:
203
+ """Compute metrics for every function in a Lark AST.
204
+
205
+ Walks the tree to find ``func_def`` subtrees. For each, calls
206
+ :func:`compute_function_metrics` to get name, line, CC, LOC, and MI.
207
+
208
+ Returns a list of :class:`FunctionMetrics` — one per function in source
209
+ order. Files with no functions return an empty list.
210
+ """
211
+ results: list[FunctionMetrics] = []
212
+
213
+ for subtree in tree.iter_subtrees():
214
+ if subtree.data != "func_def":
215
+ continue
216
+ results.append(compute_function_metrics(subtree, source))
217
+
218
+ return results
219
+
220
+
221
+ def aggregate_cc(
222
+ per_func: list[FunctionMetrics],
223
+ ) -> tuple[int, float]:
224
+ """Aggregate per-function CC into max and median.
225
+
226
+ Returns ``(max_cc, median_cc)``.
227
+
228
+ - ``max_cc``: the highest CC among all functions (hotspot detection).
229
+ - ``median_cc``: the median CC across all functions (typical complexity),
230
+ rounded to 1 decimal place.
231
+
232
+ If the list is empty (no functions in the file), returns ``(1, 1.0)``
233
+ as a baseline — any executable code path has a minimum CC of 1.
234
+ """
235
+ if not per_func:
236
+ return (1, 1.0)
237
+
238
+ cc_values = [f.cc for f in per_func]
239
+ max_cc = max(cc_values)
240
+ median_cc = round(float(statistics.median(cc_values)), 1)
241
+ return (max_cc, median_cc)
242
+
243
+
244
+ def aggregate_mi(
245
+ per_func: list[FunctionMetrics],
246
+ ) -> tuple[float | None, float | None]:
247
+ """Aggregate per-function MI into min and median.
248
+
249
+ Returns ``(mi_min, mi_median)``.
250
+
251
+ - ``mi_min``: the lowest MI among all functions (worst maintainability).
252
+ - ``mi_median``: the median MI across all functions, rounded to 2 decimals.
253
+
254
+ Returns ``(None, None)`` if no functions have a non-None MI value.
255
+ """
256
+ mi_values = [f.mi for f in per_func if f.mi is not None]
257
+ if not mi_values:
258
+ return (None, None)
259
+
260
+ mi_min = min(mi_values)
261
+ mi_median = round(float(statistics.median(mi_values)), 2)
262
+ return (mi_min, mi_median)
263
+
264
+
265
+ # ---------------------------------------------------------------------------
266
+ # Halstead volume
267
+ # ---------------------------------------------------------------------------
268
+
269
+
270
+ def compute_halstead_volume(tree: Tree) -> HalsteadResult:
271
+ """Compute Halstead volume from a Lark AST.
272
+
273
+ Walks the tree once, classifying Token leaves and subtree types
274
+ into operators and operands.
275
+
276
+ Operator tokens: named types (DOT, EQUAL, PLUS, …) plus any __ANON_*
277
+ tokens (compound assignment, comparisons, &&, ||, etc.).
278
+
279
+ Operand tokens: NAME, NUMBER, REGULAR_STRING, TYPE_HINT.
280
+
281
+ Keyword operators (absorbed by the grammar): recovered from subtree
282
+ node types like extends_stmt → "extends", func_def → "func", etc.
283
+
284
+ Returns a HalsteadResult with volume (N × log₂(n)), vocabulary (n),
285
+ and program length (N).
286
+ """
287
+ operators: list[str] = []
288
+ operands: list[str] = []
289
+
290
+ for subtree in tree.iter_subtrees():
291
+ # Recover keyword operators from subtree types
292
+ keyword = _KEYWORD_SUBTREE_MAP.get(str(subtree.data))
293
+ if keyword is not None:
294
+ operators.append(keyword)
295
+
296
+ # Classify Token leaves
297
+ for child in subtree.children:
298
+ if not isinstance(child, Token):
299
+ continue
300
+ if child.type in _OPERATOR_TOKEN_TYPES:
301
+ operators.append(str(child))
302
+ elif child.type in _OPERAND_TOKEN_TYPES:
303
+ operands.append(str(child))
304
+ elif child.type.startswith("__ANON"):
305
+ operators.append(str(child))
306
+
307
+ n1 = len(set(operators)) # unique operators
308
+ n2 = len(set(operands)) # unique operands
309
+ n = n1 + n2 # vocabulary
310
+ big_n = len(operators) + len(operands) # program length
311
+
312
+ volume = big_n * math.log2(n) if n > 0 else 0.0
313
+
314
+ return HalsteadResult(volume=volume, vocabulary=n, length=big_n)
315
+
316
+
317
+ # ---------------------------------------------------------------------------
318
+ # Maintainability index
319
+ # ---------------------------------------------------------------------------
320
+
321
+
322
+ def compute_maintainability_index(
323
+ loc: int, cc: int, halstead_volume: float
324
+ ) -> float:
325
+ """Compute Maintainability Index from LOC, CC, and Halstead Volume.
326
+
327
+ Uses the standard MI formula (0–171 scale):
328
+ MI = 171 − 5.2 × ln(V) − 0.23 × CC − 16.2 × ln(LOC)
329
+
330
+ Result is clamped to a minimum of 0.
331
+
332
+ Requires ``loc > 0`` and ``halstead_volume > 0`` (both are arguments
333
+ to ``math.log``). Raises ``ValueError`` if either is <= 0.
334
+ """
335
+ if loc <= 0:
336
+ raise ValueError(f"loc must be > 0, got {loc}")
337
+ if halstead_volume <= 0:
338
+ raise ValueError(f"halstead_volume must be > 0, got {halstead_volume}")
339
+ mi = (
340
+ 171
341
+ - 5.2 * math.log(halstead_volume)
342
+ - 0.23 * cc
343
+ - 16.2 * math.log(loc)
344
+ )
345
+ return round(max(0.0, mi), 2)
346
+
347
+
348
+ # ---------------------------------------------------------------------------
349
+ # Aggregate entry point
350
+ # ---------------------------------------------------------------------------
351
+
352
+
353
+ def compute_metrics(source: str, tree: Tree | None) -> FileMetrics:
354
+ """Compute all metrics for a single file.
355
+
356
+ LOC is always computed from raw source text.
357
+ Per-function metrics (CC, LOC, MI) are computed from the AST if available.
358
+ CC is aggregated into max_cc and median_cc. Both are None if parse failed.
359
+ File-level MI is computed when tree is available, max_cc is known, LOC > 0,
360
+ and whole-file Halstead volume > 0. MI uses max_cc in its formula.
361
+ Per-function MI is aggregated into mi_min and mi_median.
362
+ """
363
+ loc = compute_loc(source)
364
+
365
+ max_cc: int | None = None
366
+ median_cc: float | None = None
367
+ mi: float | None = None
368
+ mi_min: float | None = None
369
+ mi_median: float | None = None
370
+ functions: list[FunctionMetrics] = []
371
+
372
+ if tree is not None:
373
+ functions = compute_all_function_metrics(tree, source)
374
+ max_cc, median_cc = aggregate_cc(functions)
375
+ mi_min, mi_median = aggregate_mi(functions)
376
+
377
+ # File-level MI uses max_cc (worst per-function CC) rather than the
378
+ # standard total-file CC. This penalises files containing a single
379
+ # highly complex function instead of diluting the score across many
380
+ # simple functions. Per-function MI (mi_min, mi_median) already
381
+ # uses each function's own CC for granular analysis.
382
+ if tree is not None and max_cc is not None and loc > 0:
383
+ halstead = compute_halstead_volume(tree)
384
+ if halstead.volume > 0:
385
+ mi = compute_maintainability_index(loc, max_cc, halstead.volume)
386
+
387
+ return FileMetrics(
388
+ loc=loc,
389
+ max_cc=max_cc,
390
+ median_cc=median_cc,
391
+ mi=mi,
392
+ mi_min=mi_min,
393
+ mi_median=mi_median,
394
+ functions=functions,
395
+ )
@@ -0,0 +1,73 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ from gdtoolkit.parser import parser as gdparser
8
+ from lark import Tree
9
+
10
+ from gdscript_code_graph.discovery import ProjectFiles
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class ParseResult:
17
+ file_path: Path
18
+ res_path: str
19
+ tree: Tree | None # None if parse failed
20
+ source: str # raw source text (always populated)
21
+ error: str | None # error message if parse failed
22
+
23
+
24
+ def parse_file(file_path: Path, res_path: str) -> ParseResult:
25
+ """Parse a single GDScript file and return a ParseResult.
26
+
27
+ Uses gdtoolkit's parser with gather_metadata=True to get .meta.line
28
+ on AST nodes. Gracefully handles parse errors and encoding issues --
29
+ a broken file never raises; the error is captured in the result.
30
+ """
31
+ try:
32
+ source = file_path.read_text(encoding="utf-8")
33
+ except UnicodeDecodeError as exc:
34
+ logger.warning("Failed to read %s: %s", file_path, exc)
35
+ return ParseResult(
36
+ file_path=file_path,
37
+ res_path=res_path,
38
+ tree=None,
39
+ source="",
40
+ error=str(exc),
41
+ )
42
+
43
+ try:
44
+ tree = gdparser.parse(source, gather_metadata=True)
45
+ return ParseResult(
46
+ file_path=file_path,
47
+ res_path=res_path,
48
+ tree=tree,
49
+ source=source,
50
+ error=None,
51
+ )
52
+ except Exception as exc:
53
+ logger.warning("Failed to parse %s: %s", file_path, exc)
54
+ return ParseResult(
55
+ file_path=file_path,
56
+ res_path=res_path,
57
+ tree=None,
58
+ source=source,
59
+ error=str(exc),
60
+ )
61
+
62
+
63
+ def parse_all(project: ProjectFiles) -> list[ParseResult]:
64
+ """Parse all .gd files in the project.
65
+
66
+ Never aborts the whole batch -- individual file errors are captured
67
+ in each ParseResult.
68
+ """
69
+ results = []
70
+ for file_path in project.gd_files:
71
+ res_path = project.to_res_path(file_path)
72
+ results.append(parse_file(file_path, res_path))
73
+ return results
@@ -0,0 +1,392 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import re
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass
7
+
8
+ from lark import Tree, Token
9
+
10
+ from gdscript_code_graph.parsing import ParseResult
11
+ from gdscript_code_graph.schema import Evidence, GraphLink
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _ARRAY_TYPE_RE = re.compile(r"Array\[(\w+)\]")
16
+
17
+
18
+ @dataclass
19
+ class RawRelationship:
20
+ source_res_path: str
21
+ target: str # res:// path OR class name
22
+ kind: str # "extends", "preloads", "loads"
23
+ line: int
24
+
25
+
26
+ def _extract_string_value(string_tree: Tree) -> str:
27
+ """Extract the unquoted string value from a Lark ``string`` tree node.
28
+
29
+ The ``string`` node has a single child token of type ``REGULAR_STRING``
30
+ (e.g. ``"res://actors/character.gd"``). We strip the surrounding
31
+ double-quotes to return the raw path.
32
+ """
33
+ token = str(string_tree.children[0])
34
+ if len(token) >= 2 and token[0] in ('"', "'") and token[-1] == token[0]:
35
+ return token[1:-1]
36
+ return token
37
+
38
+
39
+ def extract_class_name(tree: Tree) -> str | None:
40
+ """Return the declared ``class_name`` from a parsed GDScript AST, or None.
41
+
42
+ Handles both ``classname_stmt`` (e.g. ``class_name Player``) and
43
+ ``classname_extends_stmt`` (e.g. ``class_name Foo extends Bar``) forms.
44
+ In the latter case the *first* ``NAME`` token is the class name.
45
+ """
46
+ for subtree in tree.iter_subtrees():
47
+ if subtree.data == "classname_stmt":
48
+ for child in subtree.children:
49
+ if isinstance(child, Token) and child.type == "NAME":
50
+ return str(child)
51
+ if subtree.data == "classname_extends_stmt":
52
+ for child in subtree.children:
53
+ if isinstance(child, Token) and child.type == "NAME":
54
+ return str(child)
55
+ return None
56
+
57
+
58
+ def extract_extends(tree: Tree, source_res_path: str) -> list[RawRelationship]:
59
+ """Extract ``extends`` relationships from a parsed GDScript AST.
60
+
61
+ Handles three forms:
62
+ - ``extends ClassName`` -- extends by class name (NAME token)
63
+ - ``extends "res://path.gd"`` -- extends by path (string subtree)
64
+ - ``class_name Foo extends Bar`` -- classname_extends_stmt (second NAME
65
+ token after the class name is the extends target)
66
+ """
67
+ results: list[RawRelationship] = []
68
+
69
+ for subtree in tree.iter_subtrees():
70
+ if subtree.data == "extends_stmt":
71
+ line = getattr(subtree.meta, "line", 0)
72
+ for child in subtree.children:
73
+ if isinstance(child, Tree) and child.data == "string":
74
+ # extends "res://path/to/file.gd"
75
+ target = _extract_string_value(child)
76
+ results.append(RawRelationship(
77
+ source_res_path=source_res_path,
78
+ target=target,
79
+ kind="extends",
80
+ line=line,
81
+ ))
82
+ elif isinstance(child, Token) and child.type == "NAME":
83
+ # extends ClassName
84
+ results.append(RawRelationship(
85
+ source_res_path=source_res_path,
86
+ target=str(child),
87
+ kind="extends",
88
+ line=line,
89
+ ))
90
+
91
+ elif subtree.data == "classname_extends_stmt":
92
+ line = getattr(subtree.meta, "line", 0)
93
+ # Children are NAME tokens; the first is the class_name, the
94
+ # second (after the implicit ``extends`` keyword) is the target.
95
+ name_tokens = [
96
+ child for child in subtree.children
97
+ if isinstance(child, Token) and child.type == "NAME"
98
+ ]
99
+ if len(name_tokens) >= 2:
100
+ target = str(name_tokens[1])
101
+ results.append(RawRelationship(
102
+ source_res_path=source_res_path,
103
+ target=target,
104
+ kind="extends",
105
+ line=line,
106
+ ))
107
+
108
+ return results
109
+
110
+
111
+ def extract_preloads(tree: Tree, source_res_path: str) -> list[RawRelationship]:
112
+ """Extract ``preload(...)`` and ``load(...)`` relationships from an AST.
113
+
114
+ Walks the tree looking for:
115
+
116
+ 1. ``standalone_call`` nodes whose first child is a ``NAME`` token equal
117
+ to ``preload`` or ``load`` (bare calls like ``preload("res://...")``).
118
+ 2. ``getattr_call`` nodes where the ``getattr`` subtree ends with a
119
+ ``NAME`` token equal to ``load`` (e.g. ``ResourceLoader.load("res://...")``).
120
+ Only ``load`` is handled here -- ``preload`` is a GDScript keyword and
121
+ is never called via attribute access.
122
+
123
+ If the call has a ``string`` argument that starts with ``res://``, it is
124
+ recorded.
125
+ """
126
+ results: list[RawRelationship] = []
127
+
128
+ for subtree in tree.iter_subtrees():
129
+ if subtree.data == "standalone_call":
130
+ children = subtree.children
131
+ if not children:
132
+ continue
133
+
134
+ first = children[0]
135
+ if not (isinstance(first, Token) and first.type == "NAME"):
136
+ continue
137
+
138
+ func_name = str(first)
139
+ if func_name not in ("preload", "load"):
140
+ continue
141
+
142
+ kind = func_name + "s" # "preloads" or "loads"
143
+ line = getattr(subtree.meta, "line", 0)
144
+
145
+ # Look for string argument among remaining children
146
+ for child in children[1:]:
147
+ if isinstance(child, Tree) and child.data == "string":
148
+ target = _extract_string_value(child)
149
+ if target.startswith("res://"):
150
+ results.append(RawRelationship(
151
+ source_res_path=source_res_path,
152
+ target=target,
153
+ kind=kind,
154
+ line=line,
155
+ ))
156
+
157
+ elif subtree.data == "getattr_call":
158
+ children = subtree.children
159
+ if not children:
160
+ continue
161
+
162
+ # First child should be a ``getattr`` subtree.
163
+ first = children[0]
164
+ if not (isinstance(first, Tree) and first.data == "getattr"):
165
+ continue
166
+
167
+ # The last NAME token in the getattr chain is the method name.
168
+ name_tokens = [
169
+ child for child in first.children
170
+ if isinstance(child, Token) and child.type == "NAME"
171
+ ]
172
+ if not name_tokens or str(name_tokens[-1]) != "load":
173
+ continue
174
+
175
+ line = getattr(subtree.meta, "line", 0)
176
+
177
+ # Look for string arguments among remaining children
178
+ for child in children[1:]:
179
+ if isinstance(child, Tree) and child.data == "string":
180
+ target = _extract_string_value(child)
181
+ if target.startswith("res://"):
182
+ results.append(RawRelationship(
183
+ source_res_path=source_res_path,
184
+ target=target,
185
+ kind="loads",
186
+ line=line,
187
+ ))
188
+
189
+ return results
190
+
191
+
192
+ def extract_type_from_hint(type_hint: str) -> str:
193
+ """Extract the inner type name from a TYPE_HINT token value.
194
+
195
+ Handles ``Array[Type]`` by returning the inner type (``Type``).
196
+ For plain types like ``Player`` returns the value as-is.
197
+
198
+ Examples::
199
+
200
+ >>> extract_type_from_hint("Player")
201
+ 'Player'
202
+ >>> extract_type_from_hint("Array[Item]")
203
+ 'Item'
204
+ >>> extract_type_from_hint("Array")
205
+ 'Array'
206
+ """
207
+ match = _ARRAY_TYPE_RE.match(type_hint)
208
+ if match:
209
+ return match.group(1)
210
+ return type_hint
211
+
212
+
213
+ def extract_typed_deps(
214
+ tree: Tree, source_res_path: str
215
+ ) -> list[RawRelationship]:
216
+ """Extract typed-dependency relationships from class variable declarations.
217
+
218
+ Detects patterns:
219
+
220
+ - ``var x: Type`` (``class_var_typed`` AST node)
221
+ - ``var x: Type = value`` (``class_var_typed_assgnd`` AST node)
222
+ - ``var x: Array[Type]`` / ``var x: Array[Type] = []``
223
+
224
+ Each produces a ``RawRelationship`` with ``kind="typed_dependency"``
225
+ where the target is the type name (or inner type for ``Array[Type]``).
226
+ Built-in types are *not* filtered here -- that happens during resolution
227
+ when the type name is looked up in the class-name table.
228
+ """
229
+ results: list[RawRelationship] = []
230
+
231
+ for subtree in tree.iter_subtrees():
232
+ if subtree.data not in ("class_var_typed", "class_var_typed_assgnd"):
233
+ continue
234
+
235
+ type_hint: str | None = None
236
+ for child in subtree.children:
237
+ if isinstance(child, Token) and child.type == "TYPE_HINT":
238
+ type_hint = str(child)
239
+ break
240
+
241
+ if type_hint is None:
242
+ continue
243
+
244
+ target = extract_type_from_hint(type_hint)
245
+ line = getattr(subtree.meta, "line", 0)
246
+ results.append(RawRelationship(
247
+ source_res_path=source_res_path,
248
+ target=target,
249
+ kind="typed_dependency",
250
+ line=line,
251
+ ))
252
+
253
+ return results
254
+
255
+
256
+ def extract_returns(
257
+ tree: Tree, source_res_path: str
258
+ ) -> list[RawRelationship]:
259
+ """Extract return-type relationships from function declarations.
260
+
261
+ Detects ``func foo() -> Type:`` patterns by inspecting ``func_header``
262
+ AST nodes for a ``TYPE_HINT`` token (which represents the return type).
263
+
264
+ Each produces a ``RawRelationship`` with ``kind="returns"`` where the
265
+ target is the type name (or inner type for ``Array[Type]``). Built-in
266
+ types are *not* filtered here -- that happens during resolution.
267
+ """
268
+ results: list[RawRelationship] = []
269
+
270
+ for subtree in tree.iter_subtrees():
271
+ if subtree.data != "func_header":
272
+ continue
273
+
274
+ # The return type is the last TYPE_HINT token that is a direct child of
275
+ # func_header. Parameter type hints live inside func_args (a Tree child),
276
+ # so they are not visited here. We iterate in reverse to grab the return
277
+ # type first and break immediately.
278
+ return_type: str | None = None
279
+ for child in reversed(subtree.children):
280
+ if isinstance(child, Token) and child.type == "TYPE_HINT":
281
+ return_type = str(child)
282
+ break
283
+
284
+ if return_type is None:
285
+ continue
286
+
287
+ target = extract_type_from_hint(return_type)
288
+ line = getattr(subtree.meta, "line", 0)
289
+ results.append(RawRelationship(
290
+ source_res_path=source_res_path,
291
+ target=target,
292
+ kind="returns",
293
+ line=line,
294
+ ))
295
+
296
+ return results
297
+
298
+
299
+ def build_class_name_table(
300
+ parse_results: list[ParseResult],
301
+ ) -> dict[str, str]:
302
+ """Build a mapping of class_name declarations to their ``res://`` paths.
303
+
304
+ Iterates over all successfully parsed files and extracts any
305
+ ``class_name`` declaration. If two files declare the same class name
306
+ a warning is logged and the later entry wins.
307
+ """
308
+ table: dict[str, str] = {}
309
+
310
+ for pr in parse_results:
311
+ if pr.tree is None:
312
+ continue
313
+ name = extract_class_name(pr.tree)
314
+ if name is None:
315
+ continue
316
+ if name in table:
317
+ logger.warning(
318
+ "Duplicate class_name %r: %s and %s",
319
+ name,
320
+ table[name],
321
+ pr.res_path,
322
+ )
323
+ table[name] = pr.res_path
324
+
325
+ return table
326
+
327
+
328
+ def _resolve_target(
329
+ rel: RawRelationship,
330
+ class_name_table: dict[str, str],
331
+ known_res_paths: set[str],
332
+ ) -> str | None:
333
+ """Resolve a single relationship target to a ``res://`` path, or None.
334
+
335
+ - If the target already starts with ``res://``, it is returned directly --
336
+ but only if it appears in *known_res_paths*.
337
+ - If the target is a class name, it is looked up in *class_name_table*.
338
+ - Returns ``None`` if the target cannot be resolved (built-in class,
339
+ unknown path).
340
+ """
341
+ if rel.target.startswith("res://"):
342
+ if rel.target not in known_res_paths:
343
+ return None
344
+ return rel.target
345
+ target_path = class_name_table.get(rel.target)
346
+ return target_path # None if built-in / unknown
347
+
348
+
349
+ def resolve_relationships_with_evidence(
350
+ raw_rels: list[RawRelationship],
351
+ class_name_table: dict[str, str],
352
+ known_res_paths: set[str],
353
+ ) -> list[GraphLink]:
354
+ """Resolve raw relationships to ``GraphLink`` objects with evidence.
355
+
356
+ Collects **all** occurrences of each ``(source, target, kind)`` tuple
357
+ into an ``evidence`` array and sets ``weight`` to the occurrence count.
358
+
359
+ - If the target already starts with ``res://``, it is used directly --
360
+ but only if it appears in *known_res_paths* (i.e. it actually exists
361
+ in the project).
362
+ - If the target is a class name, it is looked up in *class_name_table*.
363
+ If found, the corresponding ``res://`` path is used. If not found,
364
+ the target is assumed to be a built-in Godot class and the
365
+ relationship is **skipped**.
366
+
367
+ Returns a sorted list of ``GraphLink`` objects (sorted by
368
+ ``(source, target, kind)`` for deterministic output).
369
+ """
370
+ evidence_map: dict[tuple[str, str, str], list[Evidence]] = defaultdict(list)
371
+
372
+ for rel in raw_rels:
373
+ target_path = _resolve_target(rel, class_name_table, known_res_paths)
374
+ if target_path is None:
375
+ continue
376
+
377
+ key = (rel.source_res_path, target_path, rel.kind)
378
+ evidence_map[key].append(Evidence(
379
+ file=rel.source_res_path,
380
+ line=rel.line,
381
+ ))
382
+
383
+ return [
384
+ GraphLink(
385
+ source=source,
386
+ target=target,
387
+ kind=kind,
388
+ weight=len(evidence_list),
389
+ evidence=evidence_list,
390
+ )
391
+ for (source, target, kind), evidence_list in sorted(evidence_map.items())
392
+ ]
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+
5
+
6
+ @dataclass
7
+ class Evidence:
8
+ file: str # res:// path where the relationship was found
9
+ line: int # line number
10
+
11
+
12
+ @dataclass
13
+ class FunctionMetrics:
14
+ name: str # function name
15
+ line: int # start line number
16
+ cc: int # cyclomatic complexity
17
+ loc: int # non-empty, non-comment lines in this function
18
+ mi: float | None # maintainability index (None if loc=0 or volume=0)
19
+
20
+
21
+ @dataclass
22
+ class NodeMetrics:
23
+ loc: int # non-empty, non-comment lines
24
+ max_cc: int | None # max per-function cyclomatic complexity (None if parse failed)
25
+ median_cc: float | None # median per-function cyclomatic complexity (None if parse failed)
26
+ mi: float | None = None # file-level maintainability index (None if parse failed or empty file)
27
+ mi_min: float | None = None # worst per-function MI (None if no functions have MI)
28
+ mi_median: float | None = None # median per-function MI (None if no functions have MI)
29
+ functions: list[FunctionMetrics] = field(default_factory=list) # per-function detail
30
+
31
+
32
+ @dataclass
33
+ class GraphNode:
34
+ id: str # res:// path (stable identifier)
35
+ kind: str # "script" for v1
36
+ language: str # "gdscript" for v1
37
+ name: str # class_name if declared, else filename stem
38
+ metrics: NodeMetrics
39
+ tags: list[str] = field(default_factory=list)
40
+
41
+
42
+ @dataclass
43
+ class GraphLink:
44
+ source: str # res:// path
45
+ target: str # res:// path
46
+ kind: str # "extends", "preloads", "loads", "typed_dependency", "returns"
47
+ weight: int # number of occurrences
48
+ evidence: list[Evidence] = field(default_factory=list)
49
+
50
+
51
+ @dataclass
52
+ class Meta:
53
+ repo: str
54
+ generated_at: str # ISO 8601
55
+
56
+
57
+ @dataclass
58
+ class Graph:
59
+ schema_version: str
60
+ meta: Meta
61
+ nodes: list[GraphNode]
62
+ links: list[GraphLink]
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: gdscript-code-graph
3
+ Version: 1.0.0
4
+ License-Expression: MIT
5
+ License-File: LICENSE.md
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: click>=8.0
8
+ Requires-Dist: gdtoolkit>=4.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: pip-audit; extra == 'dev'
11
+ Requires-Dist: pytest-cov; extra == 'dev'
12
+ Requires-Dist: pytest>=7.0; extra == 'dev'
@@ -0,0 +1,13 @@
1
+ gdscript_code_graph/__init__.py,sha256=xoY9iVUOrPcctMLIKk8ojgRba9a5Dn5_lNUxf6GbTU8,473
2
+ gdscript_code_graph/cli.py,sha256=JN5SAteYCYZCHFKC6ewn0-Y9hdmkgTZj1XnAZku23TM,1686
3
+ gdscript_code_graph/discovery.py,sha256=VLhYsvanc2xg97QLFCniZthBp4nkJVk-JBYqyiqqMfY,2114
4
+ gdscript_code_graph/graph.py,sha256=yU6dRpzBBzxyhAGO5h0xyiwLDh_8Ryx4QSQhcwmcnTw,3272
5
+ gdscript_code_graph/metrics.py,sha256=Crvb2t31IMgWZXchlilkqB93BGWWC2zSSZmMfnPO0Mo,13376
6
+ gdscript_code_graph/parsing.py,sha256=DO4ZlLPCtryw6I_RFDnUgI_y6HVhZ7xEAyYS3t_Zv1M,2124
7
+ gdscript_code_graph/relationships.py,sha256=glkJh77QbVY5QpJuADwUlRt59fif5oKP7WG6v0FmlRg,13956
8
+ gdscript_code_graph/schema.py,sha256=Jf-KLku7qulLp0twxRTjUbWc0ufzW8Sqnekts9hWR9I,2030
9
+ gdscript_code_graph-1.0.0.dist-info/METADATA,sha256=QVllj4Fie0DsOdImb1ohhFoAaTdEFY-keUGfklMBQbg,338
10
+ gdscript_code_graph-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
11
+ gdscript_code_graph-1.0.0.dist-info/entry_points.txt,sha256=fHjGFILR1wKFP25D3YwjX9jJSVKqKiOw5MrCNPQ-jAk,69
12
+ gdscript_code_graph-1.0.0.dist-info/licenses/LICENSE.md,sha256=2GMLnPKJEWOp69wddxQbbBPprY_XUfc4b0So0f4TBZA,1061
13
+ gdscript_code_graph-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ gdscript-code-graph = gdscript_code_graph.cli:main
@@ -0,0 +1,7 @@
1
+ Copyright 2026 Mike Rötgers
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.