codedebrief 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. codedebrief/__init__.py +12 -0
  2. codedebrief/analysis/__init__.py +16 -0
  3. codedebrief/analysis/common.py +527 -0
  4. codedebrief/analysis/discovery.py +100 -0
  5. codedebrief/analysis/languages/__init__.py +6 -0
  6. codedebrief/analysis/languages/_common.py +68 -0
  7. codedebrief/analysis/languages/c.py +96 -0
  8. codedebrief/analysis/languages/cpp.py +146 -0
  9. codedebrief/analysis/languages/csharp.py +137 -0
  10. codedebrief/analysis/languages/go.py +157 -0
  11. codedebrief/analysis/languages/java.py +158 -0
  12. codedebrief/analysis/languages/php.py +83 -0
  13. codedebrief/analysis/languages/ruby.py +75 -0
  14. codedebrief/analysis/languages/rust.py +96 -0
  15. codedebrief/analysis/project.py +373 -0
  16. codedebrief/analysis/python.py +939 -0
  17. codedebrief/analysis/registry.py +320 -0
  18. codedebrief/analysis/treesitter.py +884 -0
  19. codedebrief/analysis/typescript.py +1019 -0
  20. codedebrief/artifacts.py +49 -0
  21. codedebrief/cli.py +585 -0
  22. codedebrief/config.py +226 -0
  23. codedebrief/doctor.py +175 -0
  24. codedebrief/install.py +441 -0
  25. codedebrief/mcp_server.py +2720 -0
  26. codedebrief/model.py +189 -0
  27. codedebrief/py.typed +1 -0
  28. codedebrief/quality.py +392 -0
  29. codedebrief/query.py +641 -0
  30. codedebrief/render/__init__.py +6 -0
  31. codedebrief/render/assets/generated/codedebrief-viewer-runtime.iife.js +10 -0
  32. codedebrief/render/assets/panels.js +462 -0
  33. codedebrief/render/assets/shell.js +1649 -0
  34. codedebrief/render/assets/styles.css +1715 -0
  35. codedebrief/render/assets/tree.js +616 -0
  36. codedebrief/render/html.py +191 -0
  37. codedebrief/render/markdown.py +153 -0
  38. codedebrief/render/payload.py +326 -0
  39. codedebrief/render/snapshot.py +769 -0
  40. codedebrief/schema/codedebrief.schema.json +449 -0
  41. codedebrief/util.py +65 -0
  42. codedebrief/validation.py +214 -0
  43. codedebrief-0.11.0.dist-info/METADATA +426 -0
  44. codedebrief-0.11.0.dist-info/RECORD +48 -0
  45. codedebrief-0.11.0.dist-info/WHEEL +4 -0
  46. codedebrief-0.11.0.dist-info/entry_points.txt +2 -0
  47. codedebrief-0.11.0.dist-info/licenses/LICENSE +176 -0
  48. codedebrief-0.11.0.dist-info/licenses/NOTICE +9 -0
@@ -0,0 +1,939 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import copy
5
+ from collections.abc import Callable, Iterable
6
+ from pathlib import Path
7
+ from typing import Any, cast
8
+
9
+ from codedebrief.analysis.common import (
10
+ CONTINUES,
11
+ EMPTY,
12
+ FALLS_THROUGH,
13
+ MATCH,
14
+ NO,
15
+ RAISES,
16
+ RETURNS,
17
+ SUCCESS,
18
+ WILDCARD,
19
+ YES,
20
+ FlowBuilder,
21
+ PendingEdge,
22
+ annotate_reachability,
23
+ attach_qualified_calls,
24
+ branch,
25
+ call_is_boundary,
26
+ decision_identity,
27
+ decision_metadata,
28
+ domain_from_subject,
29
+ is_functional_condition,
30
+ tag_call_effects,
31
+ value_namespace,
32
+ )
33
+ from codedebrief.analysis.discovery import discover_source_files
34
+ from codedebrief.config import CodeDebriefConfig
35
+ from codedebrief.model import (
36
+ Evidence,
37
+ FileAnalysis,
38
+ Flow,
39
+ NodeKind,
40
+ SourceLocation,
41
+ )
42
+ from codedebrief.util import compact_text, file_sha256, relpath, stable_id
43
+
44
+ FASTAPI_METHODS = {"get", "post", "put", "patch", "delete", "options", "head", "websocket"}
45
+ CLI_DECORATORS = {"command", "callback"}
46
+ HANDLER_PREFIXES = ("handle_", "on_", "process_")
47
+
48
+
49
+ class _SourceText:
50
+ def __init__(self, text: str) -> None:
51
+ self.text = text
52
+ self.lines = text.splitlines(keepends=True)
53
+
54
+ def segment(self, node: ast.AST) -> str:
55
+ start_line = getattr(node, "lineno", None)
56
+ end_line = getattr(node, "end_lineno", None)
57
+ start_col = getattr(node, "col_offset", None)
58
+ end_col = getattr(node, "end_col_offset", None)
59
+ if (
60
+ not isinstance(start_line, int)
61
+ or not isinstance(end_line, int)
62
+ or not isinstance(start_col, int)
63
+ or not isinstance(end_col, int)
64
+ or start_line < 1
65
+ or end_line < start_line
66
+ or start_line > len(self.lines)
67
+ ):
68
+ return ""
69
+
70
+ end_line = min(end_line, len(self.lines))
71
+ selected = self.lines[start_line - 1 : end_line]
72
+ if not selected:
73
+ return ""
74
+ if start_line == end_line:
75
+ return _slice_line_utf8(selected[0], start_col, end_col)
76
+
77
+ first = _slice_line_utf8(selected[0], start_col, None)
78
+ middle = selected[1:-1]
79
+ last = _slice_line_utf8(selected[-1], 0, end_col)
80
+ return "".join([first, *middle, last])
81
+
82
+
83
+ class PythonAnalyzer:
84
+ def __init__(self, root: Path, config: CodeDebriefConfig) -> None:
85
+ self.root = root
86
+ self.config = config
87
+ self._module_paths: dict[str, str] | None = None
88
+
89
+ def analyze(self, path: Path) -> FileAnalysis:
90
+ # utf-8-sig transparently strips a leading BOM (a valid file an editor saved as
91
+ # UTF-8-with-BOM), so it parses instead of choking on a stray  token.
92
+ source = _SourceText(path.read_text(encoding="utf-8-sig"))
93
+ relative = relpath(path, self.root)
94
+ tree = ast.parse(source.text, filename=relative)
95
+ module_name = _module_name(relative)
96
+ constants = _harvest_constants(tree)
97
+ constant_names = set(constants)
98
+ flows: list[Flow] = []
99
+ for definition, owner in _definitions(tree):
100
+ flow = self._analyze_definition(
101
+ definition=definition,
102
+ owner=owner,
103
+ source=source,
104
+ relative=relative,
105
+ module_name=module_name,
106
+ )
107
+ # A constant shadowed locally is runtime-dependent, so dead_guard must not
108
+ # claim its guard is always true/false.
109
+ shadowed = _assigned_names(definition) & constant_names
110
+ if shadowed:
111
+ flow.metadata["shadows_constants"] = sorted(shadowed)
112
+ flows.append(flow)
113
+
114
+ is_package = Path(relative).name == "__init__.py"
115
+ module_paths = self._python_module_paths()
116
+ import_map = _import_map(
117
+ tree,
118
+ module_name,
119
+ is_package,
120
+ lambda base, name: _is_submodule(module_paths, base, name),
121
+ )
122
+ dependencies = [
123
+ item
124
+ for item in _dependency_paths_from_modules(import_map, module_paths)
125
+ if item != relative
126
+ ]
127
+ for flow in flows:
128
+ attach_qualified_calls(flow, import_map, module_name)
129
+
130
+ return FileAnalysis(
131
+ path=relative,
132
+ language="python",
133
+ sha256=file_sha256(path),
134
+ enums=_harvest_enums(tree),
135
+ constants=constants,
136
+ dependencies=dependencies,
137
+ flows=flows,
138
+ )
139
+
140
+ def _python_module_paths(self) -> dict[str, str]:
141
+ if self._module_paths is not None:
142
+ return self._module_paths
143
+ module_paths: dict[str, str] = {}
144
+ for path in discover_source_files(self.root, self.config):
145
+ if path.suffix.lower() != ".py":
146
+ continue
147
+ relative = relpath(path, self.root)
148
+ module_paths.setdefault(_module_name(relative), relative)
149
+ self._module_paths = module_paths
150
+ return module_paths
151
+
152
+ def _analyze_definition(
153
+ self,
154
+ definition: ast.FunctionDef | ast.AsyncFunctionDef,
155
+ owner: str,
156
+ source: _SourceText,
157
+ relative: str,
158
+ module_name: str,
159
+ ) -> Flow:
160
+ qualified_name = f"{owner}.{definition.name}" if owner else definition.name
161
+ symbol = f"{module_name}:{qualified_name}"
162
+ framework, entry_kind, is_entrypoint = _classify_entrypoint(
163
+ definition, relative, owner, self.config
164
+ )
165
+ is_test = _is_test(relative, definition.name)
166
+ if is_test:
167
+ is_entrypoint = False
168
+ entry_kind = "test"
169
+
170
+ location = _location(relative, definition)
171
+ flow = Flow(
172
+ id=f"flow-{stable_id(symbol)}",
173
+ name=qualified_name,
174
+ symbol=symbol,
175
+ language="python",
176
+ framework=framework,
177
+ entry_kind=entry_kind,
178
+ is_entrypoint=is_entrypoint,
179
+ location=location,
180
+ metadata={
181
+ "async": isinstance(definition, ast.AsyncFunctionDef),
182
+ "test": is_test,
183
+ "decorators": [_safe_unparse(item) for item in definition.decorator_list],
184
+ },
185
+ )
186
+ builder = FlowBuilder(flow)
187
+ entry = builder.add_node(
188
+ NodeKind.ENTRY,
189
+ _entry_label(flow),
190
+ location,
191
+ [],
192
+ metadata={"symbol": symbol},
193
+ )
194
+ outgoing = self._walk_statements(
195
+ definition.body,
196
+ [PendingEdge(entry.id)],
197
+ builder,
198
+ source,
199
+ relative,
200
+ )
201
+ if outgoing:
202
+ builder.add_node(
203
+ NodeKind.TERMINAL,
204
+ "Complete",
205
+ location,
206
+ outgoing,
207
+ evidence=Evidence.INFERRED,
208
+ )
209
+ annotate_reachability(flow)
210
+ # Tag call effects for downstream navigation and explanation metadata.
211
+ tag_call_effects(flow)
212
+ return flow
213
+
214
+ def _walk_statements(
215
+ self,
216
+ statements: list[ast.stmt],
217
+ incoming: list[PendingEdge],
218
+ builder: FlowBuilder,
219
+ source: _SourceText,
220
+ relative: str,
221
+ ) -> list[PendingEdge]:
222
+ endpoints = incoming
223
+ for statement in statements:
224
+ if not endpoints:
225
+ break
226
+ if isinstance(statement, ast.If):
227
+ endpoints = self._walk_if(statement, endpoints, builder, source, relative)
228
+ elif isinstance(statement, ast.Match):
229
+ endpoints = self._walk_match(statement, endpoints, builder, source, relative)
230
+ elif isinstance(statement, ast.Try):
231
+ endpoints = self._walk_try(statement, endpoints, builder, source, relative)
232
+ elif isinstance(statement, (ast.For, ast.AsyncFor, ast.While)):
233
+ endpoints = self._walk_loop(statement, endpoints, builder, source, relative)
234
+ elif isinstance(statement, ast.Return):
235
+ value = _safe_unparse(statement.value) if statement.value else ""
236
+ calls = _call_names(statement)
237
+ if calls:
238
+ call_node = builder.add_node(
239
+ NodeKind.CALL,
240
+ f"Call {calls[0]}()",
241
+ _location(relative, statement),
242
+ endpoints,
243
+ detail=_source_segment(source, statement),
244
+ metadata={"calls": calls},
245
+ )
246
+ endpoints = [PendingEdge(call_node.id)]
247
+ node = builder.add_node(
248
+ NodeKind.TERMINAL,
249
+ f"Return {value}".strip(),
250
+ _location(relative, statement),
251
+ endpoints,
252
+ detail=_source_segment(source, statement),
253
+ )
254
+ endpoints = []
255
+ elif isinstance(statement, ast.Raise):
256
+ value = _safe_unparse(statement.exc) if statement.exc else "error"
257
+ builder.add_node(
258
+ NodeKind.ERROR,
259
+ f"Raise {value}",
260
+ _location(relative, statement),
261
+ endpoints,
262
+ detail=_source_segment(source, statement),
263
+ )
264
+ endpoints = []
265
+ elif isinstance(statement, ast.Break):
266
+ node = builder.add_node(
267
+ NodeKind.ACTION,
268
+ "Break loop",
269
+ _location(relative, statement),
270
+ endpoints,
271
+ detail=_source_segment(source, statement),
272
+ metadata={"loop_control": "break"},
273
+ )
274
+ endpoints = [PendingEdge(node.id)]
275
+ elif isinstance(statement, ast.Continue):
276
+ builder.add_node(
277
+ NodeKind.ACTION,
278
+ "Continue loop",
279
+ _location(relative, statement),
280
+ endpoints,
281
+ detail=_source_segment(source, statement),
282
+ metadata={"loop_control": "continue"},
283
+ )
284
+ endpoints = []
285
+ else:
286
+ kind, label, calls = _statement_summary(statement)
287
+ node = builder.add_node(
288
+ kind,
289
+ label,
290
+ _location(relative, statement),
291
+ endpoints,
292
+ detail=_source_segment(source, statement),
293
+ metadata={"calls": calls} if calls else {},
294
+ )
295
+ endpoints = [PendingEdge(node.id)]
296
+ return endpoints
297
+
298
+ def _walk_loop(
299
+ self,
300
+ statement: ast.For | ast.AsyncFor | ast.While,
301
+ incoming: list[PendingEdge],
302
+ builder: FlowBuilder,
303
+ source: _SourceText,
304
+ relative: str,
305
+ ) -> list[PendingEdge]:
306
+ node = builder.add_node(
307
+ NodeKind.ACTION,
308
+ _loop_label(statement),
309
+ _location(relative, statement),
310
+ incoming,
311
+ detail=_source_segment(source, statement),
312
+ evidence=Evidence.INFERRED,
313
+ metadata={
314
+ "loop": True,
315
+ "body_outcome": _branch_outcome(statement.body),
316
+ "else_outcome": (
317
+ _branch_outcome(statement.orelse) if statement.orelse else FALLS_THROUGH
318
+ ),
319
+ "has_else": bool(statement.orelse),
320
+ },
321
+ )
322
+ body_endpoints = self._walk_statements(
323
+ statement.body,
324
+ [PendingEdge(node.id, "Iteration")],
325
+ builder,
326
+ source,
327
+ relative,
328
+ )
329
+ done_endpoints = [PendingEdge(node.id, "Done")]
330
+ if statement.orelse:
331
+ return self._walk_statements(
332
+ statement.orelse,
333
+ done_endpoints + body_endpoints,
334
+ builder,
335
+ source,
336
+ relative,
337
+ )
338
+ return done_endpoints + body_endpoints
339
+
340
+ def _walk_if(
341
+ self,
342
+ statement: ast.If,
343
+ incoming: list[PendingEdge],
344
+ builder: FlowBuilder,
345
+ source: _SourceText,
346
+ relative: str,
347
+ ) -> list[PendingEdge]:
348
+ condition = _safe_unparse(statement.test)
349
+ branch_source = _branch_behavior_source(statement.body)
350
+ functional = is_functional_condition(condition, branch_source)
351
+ if not functional:
352
+ node = builder.add_node(
353
+ NodeKind.ACTION,
354
+ f"Handle internal condition: {condition}",
355
+ _location(relative, statement),
356
+ incoming,
357
+ evidence=Evidence.INFERRED,
358
+ detail=_source_segment(source, statement),
359
+ )
360
+ return [PendingEdge(node.id)]
361
+
362
+ node = builder.add_node(
363
+ NodeKind.DECISION,
364
+ condition,
365
+ _location(relative, statement.test),
366
+ incoming,
367
+ detail=_source_segment(source, statement.test),
368
+ metadata=decision_metadata(condition),
369
+ )
370
+ node.metadata["branches"] = [
371
+ branch(YES, _branch_outcome(statement.body)),
372
+ branch(
373
+ NO,
374
+ _branch_outcome(statement.orelse) if statement.orelse else FALLS_THROUGH,
375
+ implicit=not statement.orelse,
376
+ ),
377
+ ]
378
+ yes_endpoints = self._walk_statements(
379
+ statement.body,
380
+ [PendingEdge(node.id, YES)],
381
+ builder,
382
+ source,
383
+ relative,
384
+ )
385
+ if statement.orelse:
386
+ no_endpoints = self._walk_statements(
387
+ statement.orelse,
388
+ [PendingEdge(node.id, NO)],
389
+ builder,
390
+ source,
391
+ relative,
392
+ )
393
+ else:
394
+ no_endpoints = [PendingEdge(node.id, NO)]
395
+ return yes_endpoints + no_endpoints
396
+
397
+ def _walk_match(
398
+ self,
399
+ statement: ast.Match,
400
+ incoming: list[PendingEdge],
401
+ builder: FlowBuilder,
402
+ source: _SourceText,
403
+ relative: str,
404
+ ) -> list[PendingEdge]:
405
+ subject = _safe_unparse(statement.subject)
406
+ node = builder.add_node(
407
+ NodeKind.DECISION,
408
+ f"Match {subject}",
409
+ _location(relative, statement),
410
+ incoming,
411
+ metadata=decision_identity(
412
+ condition=subject,
413
+ subject=subject,
414
+ operator=MATCH,
415
+ domain=domain_from_subject(subject),
416
+ namespace="",
417
+ ),
418
+ )
419
+ endpoints: list[PendingEdge] = []
420
+ has_default = False
421
+ values: list[str] = []
422
+ branches: list[dict[str, Any]] = []
423
+ for case in statement.cases:
424
+ pattern = _safe_unparse(case.pattern)
425
+ # A guarded wildcard `case _ if cond:` only matches when the guard holds, so
426
+ # it is NOT an exhaustive default - fall-through and missing enum members must
427
+ # still be surfaced.
428
+ is_default = pattern == WILDCARD and case.guard is None
429
+ has_default = has_default or is_default
430
+ label = f"{pattern} if {_safe_unparse(case.guard)}" if case.guard else pattern
431
+ if not is_default and pattern != WILDCARD:
432
+ # Split OR-patterns (`case A | B:`) into their individual members so
433
+ # value_namespace and enum exhaustiveness see the real values.
434
+ values.extend(_match_values(case.pattern))
435
+ branches.append(branch(label, _branch_outcome(case.body)))
436
+ endpoints.extend(
437
+ self._walk_statements(
438
+ case.body,
439
+ [PendingEdge(node.id, label)],
440
+ builder,
441
+ source,
442
+ relative,
443
+ )
444
+ )
445
+ node.metadata["values"] = sorted(set(values))
446
+ node.metadata["value_namespace"] = value_namespace(sorted(set(values)))
447
+ if not has_default:
448
+ branches.append(branch(WILDCARD, FALLS_THROUGH, implicit=True))
449
+ # An unmatched value falls through to whatever follows the match.
450
+ endpoints.append(PendingEdge(node.id, WILDCARD))
451
+ node.metadata["branches"] = branches
452
+ return endpoints
453
+
454
+ def _walk_try(
455
+ self,
456
+ statement: ast.Try,
457
+ incoming: list[PendingEdge],
458
+ builder: FlowBuilder,
459
+ source: _SourceText,
460
+ relative: str,
461
+ ) -> list[PendingEdge]:
462
+ node = builder.add_node(
463
+ NodeKind.DECISION,
464
+ "Operation succeeds?",
465
+ _location(relative, statement),
466
+ incoming,
467
+ evidence=Evidence.INFERRED,
468
+ detail=_source_segment(source, statement),
469
+ metadata=decision_identity(
470
+ condition="exception boundary",
471
+ subject="exception",
472
+ operator="",
473
+ domain="error",
474
+ namespace="",
475
+ ),
476
+ )
477
+ success_outcome = _branch_outcome(statement.body)
478
+ if success_outcome == FALLS_THROUGH and statement.orelse:
479
+ success_outcome = _branch_outcome(statement.orelse)
480
+ branches: list[dict[str, Any]] = [branch(SUCCESS, success_outcome)]
481
+ body_endpoints = self._walk_statements(
482
+ statement.body,
483
+ [PendingEdge(node.id, SUCCESS)],
484
+ builder,
485
+ source,
486
+ relative,
487
+ )
488
+ if statement.orelse and body_endpoints:
489
+ body_endpoints = self._walk_statements(
490
+ statement.orelse,
491
+ body_endpoints,
492
+ builder,
493
+ source,
494
+ relative,
495
+ )
496
+ endpoints = body_endpoints
497
+ for handler in statement.handlers:
498
+ error_name = _safe_unparse(handler.type) if handler.type else "Any error"
499
+ branches.append(branch(error_name, _branch_outcome(handler.body)))
500
+ endpoints.extend(
501
+ self._walk_statements(
502
+ handler.body,
503
+ [PendingEdge(node.id, error_name)],
504
+ builder,
505
+ source,
506
+ relative,
507
+ )
508
+ )
509
+ node.metadata["branches"] = branches
510
+ if statement.finalbody:
511
+ # A finally block always runs, even when the body/handlers returned.
512
+ body_terminated = not endpoints
513
+ finally_incoming = endpoints or [PendingEdge(node.id, "finally")]
514
+ endpoints = self._walk_statements(
515
+ statement.finalbody, finally_incoming, builder, source, relative
516
+ )
517
+ if body_terminated:
518
+ # The try/handlers already returned/raised; the terminator resumes after
519
+ # finally, so anything past the try is unreachable.
520
+ endpoints = []
521
+ return endpoints
522
+
523
+
524
+ def _assigned_names(definition: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
525
+ """Names bound inside a function: assignment targets, loop/with vars, and params.
526
+
527
+ A name in Store context (or a parameter) shadows a module-level constant of the same
528
+ name, making a guard on it runtime-dependent rather than statically dead.
529
+ """
530
+ visitor = _AssignedNameVisitor()
531
+ visitor.names.update(_argument_names(definition.args))
532
+ for statement in definition.body:
533
+ visitor.visit(statement)
534
+ return visitor.names
535
+
536
+
537
+ def _match_values(pattern: ast.pattern) -> list[str]:
538
+ """The dispatched value(s) of a match case, flattening OR-patterns to members."""
539
+ if isinstance(pattern, ast.MatchOr):
540
+ members: list[str] = []
541
+ for alternative in pattern.patterns:
542
+ members.extend(_match_values(alternative))
543
+ return members
544
+ return [_safe_unparse(pattern)]
545
+
546
+
547
+ def _definitions(
548
+ tree: ast.Module,
549
+ ) -> Iterable[tuple[ast.FunctionDef | ast.AsyncFunctionDef, str]]:
550
+ for node in tree.body:
551
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
552
+ yield node, ""
553
+ elif isinstance(node, ast.ClassDef):
554
+ for member in node.body:
555
+ if isinstance(member, (ast.FunctionDef, ast.AsyncFunctionDef)):
556
+ yield member, node.name
557
+
558
+
559
+ def _classify_entrypoint(
560
+ definition: ast.FunctionDef | ast.AsyncFunctionDef,
561
+ relative: str,
562
+ owner: str,
563
+ config: CodeDebriefConfig,
564
+ ) -> tuple[str, str, bool]:
565
+ decorators = [_safe_unparse(item) for item in definition.decorator_list]
566
+ symbol_hint = f"{relative}:{owner + '.' if owner else ''}{definition.name}"
567
+ override = config.entrypoint_override(symbol_hint)
568
+
569
+ for decorator in decorators:
570
+ parts = decorator.split("(", 1)[0].split(".")
571
+ method = parts[-1]
572
+ if method in FASTAPI_METHODS:
573
+ return "fastapi", "route", override if override is not None else True
574
+ if method in CLI_DECORATORS:
575
+ return "python-cli", "command", override if override is not None else True
576
+
577
+ if definition.name.startswith(HANDLER_PREFIXES):
578
+ return "generic", "event_handler", override if override is not None else True
579
+ if owner:
580
+ return "generic", "method", override if override is not None else False
581
+ public = config.include_public_functions and not definition.name.startswith("_")
582
+ return "generic", "function", override if override is not None else public
583
+
584
+
585
+ def _statement_summary(statement: ast.stmt) -> tuple[NodeKind, str, list[str]]:
586
+ if isinstance(statement, (ast.FunctionDef, ast.AsyncFunctionDef)):
587
+ return NodeKind.ACTION, f"Define local function {statement.name}", []
588
+ if isinstance(statement, ast.ClassDef):
589
+ return NodeKind.ACTION, f"Define local class {statement.name}", []
590
+ calls = _call_names(statement)
591
+ boundary = next((item for item in calls if call_is_boundary(item)), "")
592
+ if boundary:
593
+ return NodeKind.CALL, f"Call {boundary}()", calls
594
+ if calls:
595
+ return NodeKind.CALL, f"Call {calls[0]}()", calls
596
+ if isinstance(statement, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
597
+ targets: list[str] = []
598
+ if isinstance(statement, ast.Assign):
599
+ targets = [_safe_unparse(item) for item in statement.targets]
600
+ else:
601
+ targets = [_safe_unparse(statement.target)]
602
+ return NodeKind.ACTION, f"Set {', '.join(targets)}", []
603
+ if isinstance(statement, ast.Assert):
604
+ return NodeKind.ACTION, f"Assert {_safe_unparse(statement.test)}", []
605
+ if isinstance(statement, (ast.Import, ast.ImportFrom)):
606
+ return NodeKind.ACTION, "Load dependencies", []
607
+ return NodeKind.ACTION, compact_text(_safe_unparse(statement), 90), []
608
+
609
+
610
+ def _entry_label(flow: Flow) -> str:
611
+ if flow.entry_kind == "route":
612
+ return f"Route: {flow.name}"
613
+ if flow.entry_kind == "command":
614
+ return f"Command: {flow.name}"
615
+ if flow.entry_kind == "test":
616
+ return f"Test: {flow.name}"
617
+ return flow.name
618
+
619
+
620
+ def _location(relative: str, node: ast.AST) -> SourceLocation:
621
+ start = int(getattr(node, "lineno", 1))
622
+ end = int(getattr(node, "end_lineno", start))
623
+ return SourceLocation(relative, start, end)
624
+
625
+
626
+ def _source_segment(source: _SourceText, node: ast.AST) -> str:
627
+ return compact_text(source.segment(node) or _safe_unparse(node), 500)
628
+
629
+
630
+ def _slice_line_utf8(line: str, start: int, end: int | None) -> str:
631
+ data = line.encode("utf-8")
632
+ safe_start = max(0, min(start, len(data)))
633
+ safe_end = len(data) if end is None else max(safe_start, min(end, len(data)))
634
+ return data[safe_start:safe_end].decode("utf-8", "replace")
635
+
636
+
637
+ def _safe_unparse(node: ast.AST | None) -> str:
638
+ if node is None:
639
+ return ""
640
+ try:
641
+ return ast.unparse(node)
642
+ except (ValueError, TypeError):
643
+ return node.__class__.__name__
644
+
645
+
646
+ def _call_name(node: ast.expr) -> str:
647
+ if isinstance(node, ast.Name):
648
+ return node.id
649
+ if isinstance(node, ast.Attribute):
650
+ prefix = _call_name(node.value)
651
+ return f"{prefix}.{node.attr}" if prefix else node.attr
652
+ return ""
653
+
654
+
655
+ def _call_names(node: ast.AST) -> list[str]:
656
+ visitor = _CallNameVisitor()
657
+ visitor.visit(node)
658
+ return visitor.calls
659
+
660
+
661
+ class _CallNameVisitor(ast.NodeVisitor):
662
+ def __init__(self) -> None:
663
+ self.calls: list[str] = []
664
+
665
+ def visit_Call(self, node: ast.Call) -> None:
666
+ name = _call_name(node.func)
667
+ if name:
668
+ self.calls.append(name)
669
+ self.generic_visit(node)
670
+
671
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
672
+ return None
673
+
674
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
675
+ return None
676
+
677
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
678
+ return None
679
+
680
+ def visit_Lambda(self, node: ast.Lambda) -> None:
681
+ return None
682
+
683
+
684
+ class _AssignedNameVisitor(ast.NodeVisitor):
685
+ def __init__(self) -> None:
686
+ self.names: set[str] = set()
687
+
688
+ def visit_Name(self, node: ast.Name) -> None:
689
+ if isinstance(node.ctx, ast.Store):
690
+ self.names.add(node.id)
691
+
692
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
693
+ self.names.add(node.name)
694
+
695
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
696
+ self.names.add(node.name)
697
+
698
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
699
+ self.names.add(node.name)
700
+
701
+ def visit_Lambda(self, node: ast.Lambda) -> None:
702
+ return None
703
+
704
+
705
+ def _argument_names(arguments: ast.arguments) -> set[str]:
706
+ names: set[str] = set()
707
+ for arg in (
708
+ list(arguments.posonlyargs)
709
+ + list(arguments.args)
710
+ + list(arguments.kwonlyargs)
711
+ + ([arguments.vararg] if arguments.vararg else [])
712
+ + ([arguments.kwarg] if arguments.kwarg else [])
713
+ ):
714
+ names.add(arg.arg)
715
+ return names
716
+
717
+
718
+ def _branch_behavior_source(stmts: list[ast.stmt]) -> str:
719
+ return " ".join(
720
+ _safe_unparse(_strip_nested_callable_bodies(statement))
721
+ for statement in stmts
722
+ if not isinstance(statement, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef))
723
+ )
724
+
725
+
726
+ def _strip_nested_callable_bodies(statement: ast.stmt) -> ast.stmt:
727
+ return cast(ast.stmt, _NestedCallableBodyStripper().visit(copy.deepcopy(statement)))
728
+
729
+
730
+ class _NestedCallableBodyStripper(ast.NodeTransformer):
731
+ def visit_Lambda(self, node: ast.Lambda) -> ast.AST:
732
+ return ast.copy_location(ast.Constant(value="lambda"), node)
733
+
734
+
735
+ def _loop_label(statement: ast.For | ast.AsyncFor | ast.While) -> str:
736
+ if isinstance(statement, ast.While):
737
+ return f"Repeat while {_safe_unparse(statement.test)}"
738
+ return f"Process each {_safe_unparse(statement.target)}"
739
+
740
+
741
+ _ENUM_BASES = {"Enum", "IntEnum", "StrEnum", "IntFlag", "Flag", "ReprEnum"}
742
+
743
+
744
+ def _harvest_enums(tree: ast.Module) -> dict[str, list[str]]:
745
+ """Map each Enum class to its members (``X.MEMBER``) - the value universe."""
746
+ enums: dict[str, list[str]] = {}
747
+ for node in tree.body:
748
+ if not isinstance(node, ast.ClassDef) or not _is_enum_class(node):
749
+ continue
750
+ members: list[str] = []
751
+ for statement in node.body:
752
+ if isinstance(statement, ast.Assign):
753
+ members.extend(
754
+ f"{node.name}.{target.id}"
755
+ for target in statement.targets
756
+ if isinstance(target, ast.Name) and _is_enum_member(target.id)
757
+ )
758
+ elif (
759
+ isinstance(statement, ast.AnnAssign)
760
+ and isinstance(statement.target, ast.Name)
761
+ and _is_enum_member(statement.target.id)
762
+ ):
763
+ members.append(f"{node.name}.{statement.target.id}")
764
+ if members:
765
+ enums[node.name] = members
766
+ return enums
767
+
768
+
769
+ def _is_enum_class(node: ast.ClassDef) -> bool:
770
+ for base in node.bases:
771
+ name = base.id if isinstance(base, ast.Name) else getattr(base, "attr", "")
772
+ if name in _ENUM_BASES or name.endswith("Enum"):
773
+ return True
774
+ return False
775
+
776
+
777
+ def _is_enum_member(name: str) -> bool:
778
+ # Skip Enum directives and private attributes (e.g. _ignore_, __dunder__).
779
+ return not name.startswith("_")
780
+
781
+
782
+ def _harvest_constants(tree: ast.Module) -> dict[str, bool]:
783
+ """Module-level boolean constants (``FLAG = False``) - the data-flow fact a guard's
784
+ always-true/false check needs."""
785
+ constants: dict[str, bool] = {}
786
+ for node in tree.body:
787
+ target: ast.expr | None = None
788
+ value: ast.expr | None = None
789
+ if isinstance(node, ast.Assign) and len(node.targets) == 1:
790
+ target, value = node.targets[0], node.value
791
+ elif isinstance(node, ast.AnnAssign):
792
+ target, value = node.target, node.value
793
+ if (
794
+ isinstance(target, ast.Name)
795
+ and isinstance(value, ast.Constant)
796
+ and isinstance(value.value, bool)
797
+ ):
798
+ constants[target.id] = value.value
799
+ return constants
800
+
801
+
802
+ def _module_name(relative: str) -> str:
803
+ path = relative.removesuffix(".py").replace("/", ".")
804
+ return path.removesuffix(".__init__")
805
+
806
+
807
+ def _import_map(
808
+ tree: ast.Module,
809
+ module_name: str,
810
+ is_package: bool,
811
+ is_submodule: Callable[[str, str], bool],
812
+ ) -> dict[str, str]:
813
+ """Map each imported alias to a ``module:symbol`` (or ``module:``) binding.
814
+
815
+ ``from m import f`` => ``f`` -> ``m:f`` (binds a symbol); ``import m as a`` => ``a`` ->
816
+ ``m:`` (binds a module). ``from pkg import sub`` where ``sub`` is a known first-party
817
+ submodule binds the module (``pkg.sub:``), mirroring a TS namespace import, so the next
818
+ attribute is read as the symbol. Relative imports resolve against the current module's
819
+ package, accounting for ``__init__.py`` being its own package.
820
+ """
821
+ mapping: dict[str, str] = {}
822
+ for node in tree.body:
823
+ if isinstance(node, ast.Import):
824
+ for alias in node.names:
825
+ if alias.asname:
826
+ mapping[alias.asname] = f"{alias.name}:"
827
+ else:
828
+ # `import pkg` / `import pkg.util` (no alias): the dotted name is itself
829
+ # a module; resolve_qualified longest-prefix-matches it.
830
+ mapping[alias.name] = f"{alias.name}:"
831
+ elif isinstance(node, ast.ImportFrom):
832
+ base = _relative_base(node.module, node.level, module_name, is_package)
833
+ for alias in node.names:
834
+ bound = alias.asname or alias.name
835
+ if base and is_submodule(base, alias.name):
836
+ mapping[bound] = f"{base}.{alias.name}:"
837
+ elif base:
838
+ mapping[bound] = f"{base}:{alias.name}"
839
+ else:
840
+ mapping[bound] = alias.name
841
+ return mapping
842
+
843
+
844
+ def _is_submodule(module_paths: dict[str, str], base: str, name: str) -> bool:
845
+ """Whether ``base.name`` is a known first-party module in the discovered source set."""
846
+ return f"{base}.{name}" in module_paths
847
+
848
+
849
+ def _dependency_paths_from_modules(
850
+ import_map: dict[str, str], module_paths: dict[str, str]
851
+ ) -> list[str]:
852
+ dependencies: list[str] = []
853
+ seen: set[str] = set()
854
+ for binding in import_map.values():
855
+ module, _, _ = binding.partition(":")
856
+ relative = module_paths.get(module)
857
+ if relative is None or relative in seen:
858
+ continue
859
+ dependencies.append(relative)
860
+ seen.add(relative)
861
+ return dependencies
862
+
863
+
864
+ def _relative_base(module: str | None, level: int, current_module: str, is_package: bool) -> str:
865
+ if level == 0:
866
+ return module or ""
867
+ # An __init__ module *is* its own package, so a level-1 import stays put.
868
+ drop = level - 1 if is_package else level
869
+ parts = current_module.split(".")
870
+ base_parts = parts[: len(parts) - drop] if drop <= len(parts) else []
871
+ base = ".".join(base_parts)
872
+ if module:
873
+ return f"{base}.{module}" if base else module
874
+ return base
875
+
876
+
877
+ def _is_test(relative: str, name: str) -> bool:
878
+ parts = Path(relative).parts
879
+ return name.startswith("test_") or "tests" in parts or Path(relative).name.startswith("test_")
880
+
881
+
882
+ def _branch_outcome(stmts: list[ast.stmt]) -> str:
883
+ """Classify how control leaves a branch body: one of common.BRANCH_OUTCOMES."""
884
+ meaningful = [stmt for stmt in stmts if not _is_noop(stmt)]
885
+ if not meaningful:
886
+ return EMPTY
887
+ for stmt in meaningful:
888
+ if isinstance(stmt, ast.Return):
889
+ return RETURNS
890
+ if isinstance(stmt, ast.Raise):
891
+ return RAISES
892
+ if isinstance(stmt, ast.Continue):
893
+ return CONTINUES
894
+ if isinstance(stmt, ast.Break):
895
+ # break exits the enclosing loop/switch; control resumes after it.
896
+ return FALLS_THROUGH
897
+ if isinstance(stmt, ast.Try):
898
+ try_outcome = _try_statement_outcome(stmt)
899
+ if _terminates(try_outcome):
900
+ return try_outcome
901
+ if isinstance(stmt, ast.If) and stmt.orelse:
902
+ then_outcome = _branch_outcome(stmt.body)
903
+ else_outcome = _branch_outcome(stmt.orelse)
904
+ if _terminates(then_outcome) and _terminates(else_outcome):
905
+ return then_outcome if then_outcome == else_outcome else RETURNS
906
+ return FALLS_THROUGH
907
+
908
+
909
+ def _try_statement_outcome(statement: ast.Try) -> str:
910
+ final_outcome = _branch_outcome(statement.finalbody)
911
+ if _terminates(final_outcome):
912
+ return final_outcome
913
+
914
+ success_outcome = _branch_outcome(statement.body)
915
+ if success_outcome == FALLS_THROUGH and statement.orelse:
916
+ success_outcome = _branch_outcome(statement.orelse)
917
+
918
+ branch_outcomes = [
919
+ success_outcome,
920
+ *(_branch_outcome(handler.body) for handler in statement.handlers),
921
+ ]
922
+ if branch_outcomes and all(_terminates(outcome) for outcome in branch_outcomes):
923
+ return (
924
+ branch_outcomes[0]
925
+ if all(outcome == branch_outcomes[0] for outcome in branch_outcomes)
926
+ else RETURNS
927
+ )
928
+ return FALLS_THROUGH
929
+
930
+
931
+ def _terminates(outcome: str) -> bool:
932
+ return outcome in {RETURNS, RAISES, CONTINUES}
933
+
934
+
935
+ def _is_noop(stmt: ast.stmt) -> bool:
936
+ if isinstance(stmt, ast.Pass):
937
+ return True
938
+ # Docstrings, bare string literals, and `...` placeholders carry no behavior.
939
+ return isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Constant)