codedebrief 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. codedebrief/__init__.py +12 -0
  2. codedebrief/analysis/__init__.py +16 -0
  3. codedebrief/analysis/common.py +527 -0
  4. codedebrief/analysis/discovery.py +100 -0
  5. codedebrief/analysis/languages/__init__.py +6 -0
  6. codedebrief/analysis/languages/_common.py +68 -0
  7. codedebrief/analysis/languages/c.py +96 -0
  8. codedebrief/analysis/languages/cpp.py +146 -0
  9. codedebrief/analysis/languages/csharp.py +137 -0
  10. codedebrief/analysis/languages/go.py +157 -0
  11. codedebrief/analysis/languages/java.py +158 -0
  12. codedebrief/analysis/languages/php.py +83 -0
  13. codedebrief/analysis/languages/ruby.py +75 -0
  14. codedebrief/analysis/languages/rust.py +96 -0
  15. codedebrief/analysis/project.py +373 -0
  16. codedebrief/analysis/python.py +939 -0
  17. codedebrief/analysis/registry.py +320 -0
  18. codedebrief/analysis/treesitter.py +884 -0
  19. codedebrief/analysis/typescript.py +1019 -0
  20. codedebrief/artifacts.py +49 -0
  21. codedebrief/cli.py +585 -0
  22. codedebrief/config.py +226 -0
  23. codedebrief/doctor.py +175 -0
  24. codedebrief/install.py +441 -0
  25. codedebrief/mcp_server.py +2720 -0
  26. codedebrief/model.py +189 -0
  27. codedebrief/py.typed +1 -0
  28. codedebrief/quality.py +392 -0
  29. codedebrief/query.py +641 -0
  30. codedebrief/render/__init__.py +6 -0
  31. codedebrief/render/assets/generated/codedebrief-viewer-runtime.iife.js +10 -0
  32. codedebrief/render/assets/panels.js +462 -0
  33. codedebrief/render/assets/shell.js +1649 -0
  34. codedebrief/render/assets/styles.css +1715 -0
  35. codedebrief/render/assets/tree.js +616 -0
  36. codedebrief/render/html.py +191 -0
  37. codedebrief/render/markdown.py +153 -0
  38. codedebrief/render/payload.py +326 -0
  39. codedebrief/render/snapshot.py +769 -0
  40. codedebrief/schema/codedebrief.schema.json +449 -0
  41. codedebrief/util.py +65 -0
  42. codedebrief/validation.py +214 -0
  43. codedebrief-0.11.0.dist-info/METADATA +426 -0
  44. codedebrief-0.11.0.dist-info/RECORD +48 -0
  45. codedebrief-0.11.0.dist-info/WHEEL +4 -0
  46. codedebrief-0.11.0.dist-info/entry_points.txt +2 -0
  47. codedebrief-0.11.0.dist-info/licenses/LICENSE +176 -0
  48. codedebrief-0.11.0.dist-info/licenses/NOTICE +9 -0
@@ -0,0 +1,12 @@
1
+ """CodeDebrief turns source code into source-grounded workflow flowcharts."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ from codedebrief.model import ProjectModel
6
+
7
+ __all__ = ["ProjectModel"]
8
+
9
+ try:
10
+ __version__ = version("codedebrief")
11
+ except PackageNotFoundError: # pragma: no cover - only when imported outside an install.
12
+ __version__ = "0.0.0"
@@ -0,0 +1,16 @@
1
+ """Language analyzers and project-level linking."""
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ if TYPE_CHECKING:
6
+ from codedebrief.analysis.project import AnalysisResult, ProjectAnalyzer
7
+
8
+ __all__ = ["AnalysisResult", "ProjectAnalyzer"]
9
+
10
+
11
+ def __getattr__(name: str) -> object:
12
+ if name in __all__:
13
+ from codedebrief.analysis.project import AnalysisResult, ProjectAnalyzer
14
+
15
+ return {"AnalysisResult": AnalysisResult, "ProjectAnalyzer": ProjectAnalyzer}[name]
16
+ raise AttributeError(f"module 'codedebrief.analysis' has no attribute {name!r}")
@@ -0,0 +1,527 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from collections.abc import Callable, Iterable
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from codedebrief.model import (
10
+ Evidence,
11
+ Flow,
12
+ FlowEdge,
13
+ FlowNode,
14
+ NodeKind,
15
+ SourceLocation,
16
+ )
17
+ from codedebrief.util import compact_text, relpath
18
+
19
+ FUNCTIONAL_TERMS = {
20
+ "active",
21
+ "admin",
22
+ "allow",
23
+ "auth",
24
+ "authorized",
25
+ "blocked",
26
+ "cancel",
27
+ "complete",
28
+ "deleted",
29
+ "deny",
30
+ "disabled",
31
+ "enabled",
32
+ "error",
33
+ "exists",
34
+ "failed",
35
+ "invalid",
36
+ "missing",
37
+ "mode",
38
+ "none",
39
+ "owner",
40
+ "paid",
41
+ "permission",
42
+ "ready",
43
+ "role",
44
+ "state",
45
+ "status",
46
+ "suspended",
47
+ "type",
48
+ "valid",
49
+ }
50
+
51
+ BOUNDARY_CALL_TERMS = {
52
+ "authorize",
53
+ "commit",
54
+ "create",
55
+ "delete",
56
+ "dispatch",
57
+ "execute",
58
+ "fetch",
59
+ "insert",
60
+ "publish",
61
+ "redirect",
62
+ "request",
63
+ "save",
64
+ "send",
65
+ "update",
66
+ "validate",
67
+ "write",
68
+ }
69
+
70
+
71
+ @dataclass(slots=True)
72
+ class PendingEdge:
73
+ node_id: str
74
+ label: str = ""
75
+
76
+
77
+ class FlowBuilder:
78
+ def __init__(self, flow: Flow) -> None:
79
+ self.flow = flow
80
+ self._node_number = 0
81
+ self._edge_number = 0
82
+
83
+ def add_node(
84
+ self,
85
+ kind: NodeKind,
86
+ label: str,
87
+ location: SourceLocation,
88
+ incoming: list[PendingEdge],
89
+ *,
90
+ evidence: Evidence = Evidence.VERIFIED,
91
+ detail: str = "",
92
+ metadata: dict[str, Any] | None = None,
93
+ ) -> FlowNode:
94
+ self._node_number += 1
95
+ node = FlowNode(
96
+ id=f"{self.flow.id}:n{self._node_number}",
97
+ kind=kind,
98
+ label=compact_text(label, 120),
99
+ location=location,
100
+ evidence=evidence,
101
+ detail=compact_text(detail, 500),
102
+ metadata=metadata or {},
103
+ )
104
+ self.flow.nodes.append(node)
105
+ for endpoint in incoming:
106
+ self.add_edge(endpoint.node_id, node.id, endpoint.label)
107
+ return node
108
+
109
+ def add_edge(
110
+ self,
111
+ source: str,
112
+ target: str,
113
+ label: str = "",
114
+ evidence: Evidence = Evidence.VERIFIED,
115
+ ) -> FlowEdge:
116
+ self._edge_number += 1
117
+ edge = FlowEdge(
118
+ id=f"{self.flow.id}:e{self._edge_number}",
119
+ source=source,
120
+ target=target,
121
+ label=label,
122
+ evidence=evidence,
123
+ )
124
+ self.flow.edges.append(edge)
125
+ return edge
126
+
127
+
128
+ def require_tree_sitter_parse_ok(root_node: Any, relative: str, language: str) -> None:
129
+ """Raise a clean SyntaxError when a tree-sitter parse contains error nodes.
130
+
131
+ Tree-sitter can produce a partial tree for malformed source. That is useful for
132
+ editors, but CodeDebrief's canonical model should not present a partial flow as if it
133
+ were trustworthy. The project analyzer catches SyntaxError and records the file as a
134
+ skipped-file quality signal instead.
135
+ """
136
+ parse_error = tree_sitter_parse_error(root_node, relative, language)
137
+ if parse_error is None:
138
+ return
139
+ raise SyntaxError(parse_error["reason"])
140
+
141
+
142
+ def tree_sitter_parse_error(root_node: Any, relative: str, language: str) -> dict[str, Any] | None:
143
+ if not bool(getattr(root_node, "has_error", False)):
144
+ return None
145
+ error_node = _first_tree_sitter_error(root_node) or root_node
146
+ point = getattr(error_node, "start_point", None)
147
+ line = int(getattr(point, "row", 0)) + 1
148
+ kind = str(getattr(error_node, "type", "ERROR"))
149
+ return {
150
+ "language": language,
151
+ "path": relative,
152
+ "line": line,
153
+ "kind": kind,
154
+ "reason": f"{language} parse error in {relative}:{line} near {kind}",
155
+ }
156
+
157
+
158
+ def _first_tree_sitter_error(node: Any) -> Any | None:
159
+ if str(getattr(node, "type", "")) == "ERROR":
160
+ return node
161
+ for child in getattr(node, "children", []) or []:
162
+ if bool(getattr(child, "has_error", False)):
163
+ found = _first_tree_sitter_error(child)
164
+ return found or child
165
+ return None
166
+
167
+
168
+ def is_functional_condition(condition: str, branch_text: str = "") -> bool:
169
+ lowered = f"{condition} {branch_text}".lower()
170
+ tokens = set(re.findall(r"[a-zA-Z_][a-zA-Z0-9_]*", lowered))
171
+ if tokens & FUNCTIONAL_TERMS:
172
+ return True
173
+ # Control-flow outcomes mark a branch as functional, including a bare `return`.
174
+ if re.search(r"\b(return|raise|throw|redirect)\b", lowered):
175
+ return True
176
+ return any(term in lowered for term in BOUNDARY_CALL_TERMS)
177
+
178
+
179
+ # Canonical per-branch terminal behavior, recorded on a decision node's `branches`
180
+ # metadata and validated by branch().
181
+ RETURNS = "returns"
182
+ RAISES = "raises"
183
+ FALLS_THROUGH = "falls_through"
184
+ EMPTY = "empty"
185
+ CONTINUES = "continues"
186
+ BRANCH_OUTCOMES = frozenset({RETURNS, RAISES, FALLS_THROUGH, EMPTY, CONTINUES})
187
+
188
+ # Structural branch *labels* the walkers emit on decision edges.
189
+ YES = "Yes"
190
+ NO = "No"
191
+ SUCCESS = "Success"
192
+ DEFAULT = "default"
193
+ WILDCARD = "_"
194
+ # Labels that mark a genuine else/default branch (vs the positive case).
195
+ FALLBACK_LABELS = frozenset({NO, DEFAULT, WILDCARD})
196
+
197
+ # Value-dispatch decision constructs, stored in a decision node's `operator`.
198
+ MATCH = "match"
199
+ SWITCH = "switch"
200
+ DISPATCH_OPERATORS = frozenset({MATCH, SWITCH})
201
+
202
+ # Negative comparison operators stored in a decision node's `operator`. A negative
203
+ # comparison (status != X / status not in {...} / x is not Y) is a guard that allows the
204
+ # rest, not a positive value-dispatch over those members.
205
+ OP_NE = "!="
206
+ OP_IS_NOT = "is not"
207
+ OP_NOT_IN = "not in"
208
+ NEGATIVE_OPERATORS = frozenset({OP_NE, OP_NOT_IN, OP_IS_NOT})
209
+
210
+ DOMAIN_TERMS = ("status", "state", "role", "type", "kind", "mode", "permission")
211
+ _IDENTITY_OPERATORS = r"==|!=|\bis not\b|\bnot in\b|\bis\b|\bin\b"
212
+
213
+
214
+ def domain_from_subject(subject: str) -> str:
215
+ """The functional domain a decision subject touches (status/role/...), or ""."""
216
+ lowered = subject.lower()
217
+ return next((term for term in DOMAIN_TERMS if term in lowered), "")
218
+
219
+
220
+ def branch(label: str, outcome: str, *, implicit: bool = False) -> dict[str, Any]:
221
+ """One decision-branch record for a node's `branches` metadata."""
222
+ assert outcome in BRANCH_OUTCOMES, f"unknown branch outcome: {outcome!r}"
223
+ return {"label": label, "outcome": outcome, "implicit": implicit}
224
+
225
+
226
+ def decision_identity(
227
+ *,
228
+ condition: str,
229
+ subject: str,
230
+ operator: str,
231
+ domain: str = "",
232
+ values: list[str] | None = None,
233
+ negation: bool = False,
234
+ namespace: str | None = None,
235
+ ) -> dict[str, Any]:
236
+ """Assemble the canonical decision-node metadata key set.
237
+
238
+ Single constructor so every decision node - if/elif, match, switch, try - carries
239
+ the same shape (condition/domain/values plus the identity fields).
240
+ """
241
+ sorted_values = sorted(set(values or []))
242
+ resolved_namespace = namespace if namespace is not None else value_namespace(sorted_values)
243
+ return {
244
+ "condition": condition,
245
+ "domain": domain,
246
+ "values": sorted_values,
247
+ "subject": subject,
248
+ "operator": operator,
249
+ "negation": negation,
250
+ "value_namespace": resolved_namespace,
251
+ }
252
+
253
+
254
+ _VALUE_OPERATORS = r"==|!=|\bis not\b|\bnot in\b|\bin\b|\bis\b"
255
+ # Tuple, list, set literal, dotted identifier, or quoted string after a comparison.
256
+ _VALUE_LITERAL = r"\([^)]*\)|\[[^\]]*\]|\{[^}]*\}|[A-Za-z_][\w.]*|['\"][^'\"]+['\"]"
257
+ _VALUE_PATTERN = re.compile(rf"(?:{_VALUE_OPERATORS})\s*(?:{_VALUE_LITERAL})")
258
+ _LEADING_OPERATOR = re.compile(rf"^(?:{_VALUE_OPERATORS})\s*")
259
+ _VALUE_STRIP = " '\"[](){}"
260
+
261
+
262
+ def decision_metadata(condition: str) -> dict[str, Any]:
263
+ compact = compact_text(condition, 240)
264
+ lowered = compact.lower()
265
+ domain = next((term for term in DOMAIN_TERMS if re.search(rf"\b{term}\b", lowered)), "")
266
+
267
+ values: list[str] = []
268
+ for value in _VALUE_PATTERN.findall(compact):
269
+ for token in re.split(r"[,|]", _LEADING_OPERATOR.sub("", value)):
270
+ cleaned = token.strip(_VALUE_STRIP)
271
+ if cleaned:
272
+ values.append(cleaned)
273
+ subject, operator, negation = parse_subject_operator(compact)
274
+ return decision_identity(
275
+ condition=compact,
276
+ subject=subject,
277
+ operator=operator,
278
+ domain=domain,
279
+ values=values,
280
+ negation=negation,
281
+ )
282
+
283
+
284
+ def parse_subject_operator(condition: str) -> tuple[str, str, bool]:
285
+ """Decompose a decision condition into (subject, operator, negation).
286
+
287
+ Comparison conditions yield the normalized dotted left-hand side and one of
288
+ ==/!=/is/is not/in/not in. Bare truthiness checks (``not user.active``,
289
+ ``!ctx.ok``) yield an empty operator with the negation flag set.
290
+ """
291
+ text = condition.strip()
292
+ match = re.match(
293
+ rf"^\s*(?P<neg>not\s+|!)?\s*(?P<lhs>.+?)\s*(?P<op>{_IDENTITY_OPERATORS})\s*(?P<rhs>.+)$",
294
+ text,
295
+ )
296
+ if match:
297
+ operator = re.sub(r"\s+", " ", match.group("op").strip())
298
+ return match.group("lhs").strip(), operator, bool(match.group("neg"))
299
+
300
+ negation = bool(re.match(r"\s*(not\s+|!)", text))
301
+ subject = re.sub(r"^\s*(not\s+|!)\s*", "", text)
302
+ return subject.strip(), "", negation
303
+
304
+
305
+ def value_namespace(values: list[str]) -> str:
306
+ """The shared dotted enum prefix of compared values (``Foo.BAR`` -> ``Foo``).
307
+
308
+ Returns the single common namespace when every dotted value agrees, else "".
309
+ """
310
+ prefixes = {value.rsplit(".", 1)[0] for value in values if "." in value}
311
+ return next(iter(prefixes)) if len(prefixes) == 1 else ""
312
+
313
+
314
+ # Marker for a module's default export, so a default import resolves to whichever
315
+ # flow carries `default_export` rather than guessing its name. `#` cannot appear in
316
+ # an identifier, so it never collides with a real symbol.
317
+ DEFAULT_EXPORT_MARKER = "#default"
318
+
319
+ # Confidence tiers recorded on a resolved call's `link_confidence`.
320
+ CONFIDENCE_HIGH = "high"
321
+ CONFIDENCE_MEDIUM = "medium"
322
+ CONFIDENCE_LOW = "low"
323
+ CONFIDENCE_NONE = "none"
324
+ LINK_CONFIDENCES = frozenset({CONFIDENCE_HIGH, CONFIDENCE_MEDIUM, CONFIDENCE_LOW, CONFIDENCE_NONE})
325
+
326
+
327
+ def resolve_qualified(raw: str, import_map: dict[str, str], current_module: str) -> str:
328
+ """Resolve a call name to a ``module:symbol`` reference via the import map.
329
+
330
+ Import-map values carry the boundary: ``module:symbol`` binds a symbol
331
+ (``from m import f``), ``module:`` binds a module (so the next attribute is the
332
+ symbol). An unmapped head is assumed local to the current module. Preserving the
333
+ ``:`` keeps a module path from being confused with attribute access on a value.
334
+
335
+ The longest dotted prefix wins, so a multi-segment module binding (``import
336
+ pkg.util`` -> ``pkg.util:``) resolves ``pkg.util.persist`` to ``pkg.util:persist``
337
+ rather than stopping at the first segment.
338
+ """
339
+ parts = raw.split(".")
340
+ for size in range(len(parts), 0, -1):
341
+ base = import_map.get(".".join(parts[:size]))
342
+ if base is None:
343
+ continue
344
+ rest = ".".join(parts[size:])
345
+ if not rest:
346
+ return base
347
+ return f"{base}{rest}" if base.endswith(":") else f"{base}.{rest}"
348
+ return f"{current_module}:{raw}"
349
+
350
+
351
+ def attach_qualified_calls(flow: Flow, import_map: dict[str, str], current_module: str) -> None:
352
+ """Record `qualified_calls` (``module:symbol`` references) on every call node.
353
+
354
+ Each raw call name is resolved through the import map; unmapped heads fall back
355
+ to a current-module reference (expected to miss for external calls, hit for local).
356
+ """
357
+ for node in flow.nodes:
358
+ if node.kind is not NodeKind.CALL:
359
+ continue
360
+ raw_calls = [str(item) for item in node.metadata.get("calls", [])]
361
+ node.metadata["qualified_calls"] = [
362
+ resolve_qualified(raw, import_map, current_module) for raw in raw_calls
363
+ ]
364
+
365
+
366
+ def dependency_paths_from_import_map(
367
+ import_map: dict[str, str],
368
+ root: Path,
369
+ *,
370
+ module_suffixes: tuple[str, ...],
371
+ package_files: tuple[str, ...] = (),
372
+ package_directories: bool = False,
373
+ include_path: Callable[[str], bool] = lambda relative: True,
374
+ ) -> list[str]:
375
+ """Resolve import-map modules to first-party source paths under ``root``.
376
+
377
+ Import maps may contain external packages. A dependency is emitted only when a
378
+ candidate file exists inside the analyzed folder, keeping impact edges local-first and
379
+ deterministic.
380
+ """
381
+ dependencies: list[str] = []
382
+ seen: set[str] = set()
383
+ for module in _import_map_modules(import_map):
384
+ module_path = module.replace(".", "/")
385
+ candidates = [
386
+ *(f"{module_path}{suffix}" for suffix in module_suffixes),
387
+ *(f"{module_path}/{filename}" for filename in package_files),
388
+ ]
389
+ for candidate in candidates:
390
+ path = root / candidate
391
+ if not path.is_file():
392
+ continue
393
+ relative = relpath(path, root)
394
+ if not include_path(relative):
395
+ continue
396
+ if relative not in seen:
397
+ dependencies.append(relative)
398
+ seen.add(relative)
399
+ break
400
+ else:
401
+ if not package_directories:
402
+ continue
403
+ package_dir = root / module_path
404
+ if not package_dir.is_dir():
405
+ continue
406
+ for suffix in module_suffixes:
407
+ for path in sorted(package_dir.glob(f"*{suffix}")):
408
+ if not path.is_file():
409
+ continue
410
+ relative = relpath(path, root)
411
+ if not include_path(relative):
412
+ continue
413
+ if relative not in seen:
414
+ dependencies.append(relative)
415
+ seen.add(relative)
416
+ return dependencies
417
+
418
+
419
+ def _import_map_modules(import_map: dict[str, str]) -> list[str]:
420
+ modules: list[str] = []
421
+ seen: set[str] = set()
422
+ for value in import_map.values():
423
+ module = str(value).split(":", 1)[0]
424
+ if not module or module in seen:
425
+ continue
426
+ modules.append(module)
427
+ seen.add(module)
428
+ return modules
429
+
430
+
431
+ def annotate_reachability(flow: Flow) -> None:
432
+ """Record `reachable_from_entry` / `reaches_terminal` on every node.
433
+
434
+ Deterministic graph reachability: a forward walk from entry nodes and a
435
+ reverse walk from terminal/error nodes. Surfaced for navigation and explanation.
436
+ """
437
+ outgoing: dict[str, list[str]] = {node.id: [] for node in flow.nodes}
438
+ incoming: dict[str, list[str]] = {node.id: [] for node in flow.nodes}
439
+ for edge in flow.edges:
440
+ if edge.source in outgoing and edge.target in incoming:
441
+ outgoing[edge.source].append(edge.target)
442
+ incoming[edge.target].append(edge.source)
443
+
444
+ entries = [node.id for node in flow.nodes if node.kind is NodeKind.ENTRY]
445
+ exits = [node.id for node in flow.nodes if node.kind in (NodeKind.TERMINAL, NodeKind.ERROR)]
446
+ from_entry = _reach(entries, outgoing)
447
+ to_terminal = _reach(exits, incoming)
448
+ for node in flow.nodes:
449
+ node.metadata["reachable_from_entry"] = node.id in from_entry
450
+ node.metadata["reaches_terminal"] = node.id in to_terminal
451
+
452
+
453
+ def _reach(seeds: list[str], adjacency: dict[str, list[str]]) -> set[str]:
454
+ seen: set[str] = set(seeds)
455
+ stack = list(seeds)
456
+ while stack:
457
+ current = stack.pop()
458
+ for neighbor in adjacency.get(current, ()):
459
+ if neighbor not in seen:
460
+ seen.add(neighbor)
461
+ stack.append(neighbor)
462
+ return seen
463
+
464
+
465
+ def call_is_boundary(name: str) -> bool:
466
+ lowered = name.lower()
467
+ return any(term in lowered for term in BOUNDARY_CALL_TERMS)
468
+
469
+
470
+ # Side-effect categories inferred from a call's leaf name. Used as explanatory metadata
471
+ # for calls and branch behavior.
472
+ EFFECT_LEXICON: dict[str, tuple[str, ...]] = {
473
+ "auth_check": (
474
+ "require_role",
475
+ "require_admin",
476
+ "require_permission",
477
+ "check_permission",
478
+ "ensure_authenticated",
479
+ "ensure_admin",
480
+ "get_current_user",
481
+ "authorize",
482
+ "authenticate",
483
+ "verify_token",
484
+ "has_permission",
485
+ "is_authorized",
486
+ ),
487
+ "db_write": ("save", "insert", "update", "delete", "create", "commit", "persist", "upsert"),
488
+ "db_read": ("fetch", "find", "load", "query", "select", "lookup"),
489
+ "network": ("send", "publish", "dispatch", "notify", "emit", "request"),
490
+ "log": ("log", "warn", "warning", "capture_exception", "alert"),
491
+ }
492
+
493
+
494
+ # Receiver names that mark a call as logging regardless of the level method
495
+ # (so `logger.info` / `log.error` / `self.logging.warning` all count as a log).
496
+ _LOGGER_RECEIVERS = {"log", "logger", "logging"}
497
+
498
+
499
+ def _to_snake(name: str) -> str:
500
+ return re.sub(r"(?<!^)(?=[A-Z])", "_", name).lower()
501
+
502
+
503
+ def effect_tags(call_names: Iterable[str]) -> list[str]:
504
+ """The side-effect categories a set of call names implies (word-boundary match)."""
505
+ effects: set[str] = set()
506
+ for name in call_names:
507
+ parts = str(name).split(".")
508
+ padded = f"_{_to_snake(parts[-1])}_"
509
+ for effect, terms in EFFECT_LEXICON.items():
510
+ if any(f"_{term}_" in padded for term in terms):
511
+ effects.add(effect)
512
+ if any(part.lower() in _LOGGER_RECEIVERS for part in parts[:-1]):
513
+ effects.add("log")
514
+ return sorted(effects)
515
+
516
+
517
+ def tag_call_effects(flow: Flow) -> None:
518
+ """Tag call nodes with `effects` and set the flow's `performs_auth_check`."""
519
+ performs_auth = False
520
+ for node in flow.nodes:
521
+ if node.kind is not NodeKind.CALL:
522
+ continue
523
+ effects = effect_tags(str(item) for item in node.metadata.get("calls", []))
524
+ if effects:
525
+ node.metadata["effects"] = effects
526
+ performs_auth = performs_auth or "auth_check" in effects
527
+ flow.metadata["performs_auth_check"] = performs_auth
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from collections.abc import Iterator
5
+ from pathlib import Path
6
+
7
+ from codedebrief.analysis.registry import supported_suffixes
8
+ from codedebrief.config import CodeDebriefConfig
9
+
10
+ # Running CodeDebrief package dir: discovery.py is <pkg>/analysis/discovery.py,
11
+ # so two parents up is <pkg> (".../codedebrief").
12
+ _SELF_PACKAGE_DIR = Path(__file__).resolve().parent.parent
13
+
14
+
15
+ def discover_source_files(root: Path, config: CodeDebriefConfig) -> list[Path]:
16
+ root_resolved = root.resolve()
17
+ excluded_roots = _self_exclude_roots(root_resolved) if config.self_exclude else []
18
+ suffixes = supported_suffixes()
19
+ files: set[Path] = set()
20
+ for source_root in config.source_roots:
21
+ base = (root_resolved / source_root).resolve()
22
+ if not base.exists():
23
+ continue
24
+ for candidate in _candidate_paths(root_resolved, base, config, excluded_roots):
25
+ if not candidate.is_file() or candidate.suffix.lower() not in suffixes:
26
+ continue
27
+ # Self-exclusion uses a resolved-path prefix check, not a `config.exclude`
28
+ # glob: the running package may live outside the analyzed tree (e.g. a
29
+ # virtualenv has no project-relative path).
30
+ resolved = candidate.resolve()
31
+ if any(resolved.is_relative_to(item) for item in excluded_roots):
32
+ continue
33
+ # A symlink (or junction) whose target resolves outside the analyzed root has
34
+ # no project-relative path, so relpath would raise. Skip it rather than abort
35
+ # discovery - it isn't part of this project's tree.
36
+ if not resolved.is_relative_to(root_resolved):
37
+ continue
38
+ relative = _resolved_relpath(resolved, root_resolved)
39
+ if not config.is_excluded(relative) and not config.is_excluded_dir(relative):
40
+ files.add(candidate)
41
+ return sorted(files, key=lambda item: _resolved_relpath(item.resolve(), root_resolved))
42
+
43
+
44
+ def _candidate_paths(
45
+ root_resolved: Path,
46
+ base: Path,
47
+ config: CodeDebriefConfig,
48
+ excluded_roots: list[Path],
49
+ ) -> Iterator[Path]:
50
+ if base.is_file():
51
+ yield base
52
+ return
53
+ for current, dirnames, filenames in os.walk(base):
54
+ current_path = Path(current)
55
+ current_resolved = current_path.resolve()
56
+ if not current_resolved.is_relative_to(root_resolved):
57
+ dirnames[:] = []
58
+ continue
59
+ if any(current_resolved.is_relative_to(item) for item in excluded_roots):
60
+ dirnames[:] = []
61
+ continue
62
+ relative_current = _resolved_relpath(current_resolved, root_resolved)
63
+ if relative_current != "." and (
64
+ config.is_excluded_dir(relative_current) or config.is_excluded(relative_current)
65
+ ):
66
+ dirnames[:] = []
67
+ continue
68
+ kept_dirs: list[str] = []
69
+ for dirname in dirnames:
70
+ directory = current_path / dirname
71
+ resolved = directory.resolve()
72
+ if not resolved.is_relative_to(root_resolved):
73
+ continue
74
+ if any(resolved.is_relative_to(item) for item in excluded_roots):
75
+ continue
76
+ relative = _resolved_relpath(resolved, root_resolved)
77
+ if config.is_excluded_dir(relative) or config.is_excluded(relative):
78
+ continue
79
+ kept_dirs.append(dirname)
80
+ dirnames[:] = kept_dirs
81
+ for filename in filenames:
82
+ yield current_path / filename
83
+
84
+
85
+ def _resolved_relpath(path: Path, root_resolved: Path) -> str:
86
+ return path.relative_to(root_resolved).as_posix()
87
+
88
+
89
+ def _self_exclude_roots(root: Path) -> list[Path]:
90
+ """Return CodeDebrief's own directories, to keep the tool's parser internals out
91
+ of the published artifact.
92
+
93
+ Always excludes the running package directory. When the analyzed root *is* the
94
+ CodeDebrief source checkout (its ``src/codedebrief`` resolves to the running
95
+ package), also excludes the project's own ``tests/`` suite.
96
+ """
97
+ roots = [_SELF_PACKAGE_DIR]
98
+ if (root / "src" / "codedebrief").resolve() == _SELF_PACKAGE_DIR:
99
+ roots.append((root / "tests").resolve())
100
+ return roots
@@ -0,0 +1,6 @@
1
+ """Per-language profiles for the profile-driven tree-sitter analyzer.
2
+
3
+ Each module here defines a `LanguageProfile` and a `build_analyzer(root, config)`
4
+ factory. The factory is referenced lazily from `analysis/registry.py`, so a language's
5
+ grammar is imported only when a file of that language is actually analyzed.
6
+ """