refactorika 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. refactorika/__init__.py +3 -0
  2. refactorika/agents/__init__.py +0 -0
  3. refactorika/agents/base.py +23 -0
  4. refactorika/agents/complexity_agent.py +28 -0
  5. refactorika/agents/dead_code_agent.py +23 -0
  6. refactorika/agents/duplicate_agent.py +27 -0
  7. refactorika/agents/import_agent.py +15 -0
  8. refactorika/agents/orchestrator.py +82 -0
  9. refactorika/analysis/__init__.py +0 -0
  10. refactorika/analysis/audit.py +86 -0
  11. refactorika/analysis/call_graph.py +411 -0
  12. refactorika/analysis/dead_code.py +248 -0
  13. refactorika/analysis/duplicates.py +337 -0
  14. refactorika/analysis/embeddings.py +164 -0
  15. refactorika/analysis/parser.py +129 -0
  16. refactorika/analysis/related.py +159 -0
  17. refactorika/cli.py +382 -0
  18. refactorika/core/__init__.py +1 -0
  19. refactorika/core/analyze.py +137 -0
  20. refactorika/core/apply.py +161 -0
  21. refactorika/core/gates.py +126 -0
  22. refactorika/core/schema.py +275 -0
  23. refactorika/core/storage.py +157 -0
  24. refactorika/dashboard.py +165 -0
  25. refactorika/docs_gen.py +286 -0
  26. refactorika/harness.py +266 -0
  27. refactorika/languages/__init__.py +18 -0
  28. refactorika/languages/base.py +45 -0
  29. refactorika/languages/generic_adapter.py +18 -0
  30. refactorika/languages/python_adapter.py +49 -0
  31. refactorika/languages/registry.py +29 -0
  32. refactorika/mcp_server.py +193 -0
  33. refactorika/memory/__init__.py +0 -0
  34. refactorika/memory/agent_memory.py +116 -0
  35. refactorika/memory/context.py +113 -0
  36. refactorika/memory/vector_index.py +325 -0
  37. refactorika/observability.py +152 -0
  38. refactorika/transforms/__init__.py +0 -0
  39. refactorika/transforms/dead.py +94 -0
  40. refactorika/transforms/imports.py +95 -0
  41. refactorika-0.2.0.dist-info/METADATA +541 -0
  42. refactorika-0.2.0.dist-info/RECORD +45 -0
  43. refactorika-0.2.0.dist-info/WHEEL +4 -0
  44. refactorika-0.2.0.dist-info/entry_points.txt +3 -0
  45. refactorika-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,411 @@
1
+ """Call graph builder for dead-code reachability analysis.
2
+
3
+ Walks all *.py files in a directory (or a single file), builds a directed graph
4
+ of qualname -> set[qualname] edges, and exposes entry-point heuristics.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ from refactorika.analysis.parser import get_tree, iter_imports, iter_symbols
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Helpers
16
+ # ---------------------------------------------------------------------------
17
+
18
+ _SKIP_DIRS = {
19
+ ".venv",
20
+ "__pycache__",
21
+ ".git",
22
+ ".mypy_cache",
23
+ ".pytest_cache",
24
+ ".ruff_cache",
25
+ }
26
+
27
+ _ENTRY_DECORATORS = {"app.route", "click.command", "pytest.fixture"}
28
+
29
+
30
+ def _module_name(file_path: Path, root: Path) -> str:
31
+ """Derive dotted module name from file path relative to root."""
32
+ try:
33
+ rel = file_path.relative_to(root)
34
+ except ValueError:
35
+ rel = file_path
36
+ parts = list(rel.parts)
37
+ if parts and parts[-1].endswith(".py"):
38
+ parts[-1] = parts[-1][:-3]
39
+ if parts and parts[-1] == "__init__":
40
+ parts = parts[:-1]
41
+ return ".".join(parts) if parts else file_path.stem
42
+
43
+
44
+ def _collect_py_files(path: str) -> tuple[list[Path], Path]:
45
+ """Return (list of .py files to scan, root directory for module naming)."""
46
+ p = Path(path)
47
+ if p.is_file():
48
+ return [p], p.parent
49
+ files: list[Path] = []
50
+ for f in p.rglob("*.py"):
51
+ if any(part in _SKIP_DIRS for part in f.parts):
52
+ continue
53
+ files.append(f)
54
+ return files, p
55
+
56
+
57
+ def _string_literal_text(node) -> Optional[str]:
58
+ """Return the decoded inner text of a string node, or None if not a string."""
59
+ if node.type != "string":
60
+ return None
61
+ parts: list[str] = []
62
+ for child in node.children:
63
+ if child.type == "string_content" and child.text:
64
+ parts.append(child.text.decode())
65
+ if parts:
66
+ return "".join(parts)
67
+ # Fallback: strip the surrounding quotes from the raw text.
68
+ raw = node.text.decode() if node.text else ""
69
+ return raw.strip("\"'")
70
+
71
+
72
+ def _parse_all_from_tree(tree) -> set[str]:
73
+ """Collect names listed in a module-level ``__all__`` via the AST.
74
+
75
+ Handles list **and** tuple (and set) literals, including multi-line ones —
76
+ anything regex-over-source missed.
77
+ """
78
+ names: set[str] = set()
79
+ root = tree.root_node
80
+ for node in root.children:
81
+ # __all__ = [...] / (...) is an expression_statement wrapping an assignment.
82
+ assign = node
83
+ if node.type == "expression_statement" and node.children:
84
+ assign = node.children[0]
85
+ if assign.type != "assignment":
86
+ continue
87
+ left = assign.child_by_field_name("left")
88
+ right = assign.child_by_field_name("right")
89
+ if left is None or right is None:
90
+ continue
91
+ if not (
92
+ left.type == "identifier" and left.text and left.text.decode() == "__all__"
93
+ ):
94
+ continue
95
+ if right.type not in ("list", "tuple", "set"):
96
+ continue
97
+ for elem in right.children:
98
+ text = _string_literal_text(elem)
99
+ if text and text.isidentifier():
100
+ names.add(text)
101
+ return names
102
+
103
+
104
+ def _find_main_block(tree):
105
+ """Return the ``if __name__ == "__main__":`` if_statement node, or None."""
106
+ root = tree.root_node
107
+ for node in root.children:
108
+ if node.type != "if_statement":
109
+ continue
110
+ cond = node.child_by_field_name("condition")
111
+ if cond is None or cond.type != "comparison_operator":
112
+ continue
113
+ cond_text = cond.text.decode() if cond.text else ""
114
+ # Normalize quotes/spacing: __name__ == "__main__" or '__main__'.
115
+ normalized = cond_text.replace(" ", "")
116
+ if "__name__==" in normalized and "__main__" in normalized:
117
+ return node
118
+ return None
119
+
120
+
121
+ def _has_main_block(tree) -> bool:
122
+ return _find_main_block(tree) is not None
123
+
124
+
125
+ def _main_block_calls(tree) -> set[str]:
126
+ """Extract function names called anywhere inside the ``__main__`` block.
127
+
128
+ Walks the full block subtree (multi-line and nested calls included) via the
129
+ AST instead of a single-line regex.
130
+ """
131
+ block = _find_main_block(tree)
132
+ if block is None:
133
+ return set()
134
+ return set(_iter_calls_from_node(block))
135
+
136
+
137
+ def _decorator_texts(node) -> list[str]:
138
+ """Return decorator expression texts for a function/class node."""
139
+ decorators: list[str] = []
140
+ for child in node.children:
141
+ if child.type == "decorator":
142
+ # decorator -> '@' followed by the expression
143
+ text = child.text.decode() if child.text else ""
144
+ text = text.lstrip("@").strip()
145
+ decorators.append(text)
146
+ return decorators
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # CallGraph
151
+ # ---------------------------------------------------------------------------
152
+
153
+
154
+ class CallGraph:
155
+ """Directed call graph over all symbols in a Python project."""
156
+
157
+ def __init__(self) -> None:
158
+ # qualname -> (kind, file_path_str, line)
159
+ self._nodes: dict[str, tuple[str, str, int]] = {}
160
+ # qualname -> set of qualnames it calls
161
+ self._edges: dict[str, set[str]] = {}
162
+ # qualnames considered entry points
163
+ self._entry_points: set[str] = set()
164
+
165
+ # ------------------------------------------------------------------
166
+ # Builder
167
+ # ------------------------------------------------------------------
168
+
169
+ @classmethod
170
+ def build(cls, path: str) -> "CallGraph":
171
+ """Parse all *.py files under *path* and construct the call graph."""
172
+ cg = cls()
173
+ files, root = _collect_py_files(path)
174
+
175
+ # Pass 1: collect all symbols and build per-file data needed for edge resolution.
176
+ # per_file: module -> { local_name -> qualname, import_alias -> qualname }
177
+ per_file_symbols: dict[str, dict[str, str]] = {} # module -> {name: qualname}
178
+ per_file_imports: dict[str, dict[str, str]] = {} # module -> {alias: qualname}
179
+ file_trees: dict[str, object] = {} # module -> tree
180
+ file_paths: dict[str, str] = {} # module -> filesystem path
181
+
182
+ for fpath in files:
183
+ try:
184
+ source = fpath.read_text(encoding="utf-8", errors="replace")
185
+ tree = get_tree(source)
186
+ except Exception:
187
+ continue
188
+
189
+ module = _module_name(fpath, root)
190
+ file_trees[module] = tree
191
+ file_paths[module] = str(fpath)
192
+
193
+ sym_map: dict[str, str] = {}
194
+ for node, kind, name, line in iter_symbols(tree):
195
+ qualname = f"{module}.{name}"
196
+ cg._nodes[qualname] = (kind, str(fpath), line)
197
+ sym_map[name] = qualname
198
+ per_file_symbols[module] = sym_map
199
+
200
+ # Pass 1b: collect import aliases per module
201
+ for module, tree in file_trees.items():
202
+ import_map: dict[str, str] = {}
203
+ try:
204
+ for mod, names in iter_imports(tree):
205
+ if names:
206
+ for nm in names:
207
+ # e.g. "from orders import compute_total" -> compute_total: orders.compute_total
208
+ import_map[nm] = f"{mod}.{nm}"
209
+ else:
210
+ # bare "import foo" -> foo: foo
211
+ top = mod.split(".")[0]
212
+ import_map[top] = mod
213
+ except Exception:
214
+ pass
215
+ per_file_imports[module] = import_map
216
+
217
+ # Build a project-wide unqualified-name -> qualname map, but ONLY for
218
+ # names that are unique across the whole project. Ambiguous names (e.g.
219
+ # two modules each defining `compute`) are deliberately excluded so a
220
+ # bare call to an ambiguous name resolves to no edge instead of guessing.
221
+ _unq_counts: dict[str, list[str]] = {}
222
+ for qualname in cg._nodes:
223
+ _unq_counts.setdefault(qualname.split(".")[-1], []).append(qualname)
224
+ unique_by_unqualified: dict[str, str] = {
225
+ unq: quals[0] for unq, quals in _unq_counts.items() if len(quals) == 1
226
+ }
227
+
228
+ # Pass 2: build edges + detect entry points
229
+ for module, tree in file_trees.items():
230
+ sym_map = per_file_symbols.get(module, {})
231
+ import_map = per_file_imports.get(module, {})
232
+
233
+ all_dunder_names = _parse_all_from_tree(tree)
234
+ main_calls = _main_block_calls(tree)
235
+ is_test_file = Path(file_paths[module]).name.startswith("test_") or Path(
236
+ file_paths[module]
237
+ ).name.endswith("_test.py")
238
+
239
+ for node, kind, name, line in iter_symbols(tree):
240
+ qualname = f"{module}.{name}"
241
+
242
+ # Determine entry point
243
+ is_entry = False
244
+
245
+ # Public name -> conservative entry point
246
+ if not name.startswith("_"):
247
+ is_entry = True
248
+
249
+ # __all__ inclusion
250
+ if name in all_dunder_names:
251
+ is_entry = True
252
+
253
+ # inside __main__ block call
254
+ if name in main_calls:
255
+ is_entry = True
256
+
257
+ # test_ prefix or in test file
258
+ if name.startswith("test_") or is_test_file:
259
+ is_entry = True
260
+
261
+ # decorator heuristic
262
+ for deco_text in _decorator_texts(node):
263
+ for ep_deco in _ENTRY_DECORATORS:
264
+ if deco_text.startswith(ep_deco):
265
+ is_entry = True
266
+ break
267
+
268
+ if is_entry:
269
+ cg._entry_points.add(qualname)
270
+
271
+ # Build edges: collect call names from this node's body
272
+ try:
273
+ # iter_calls walks the whole tree; we scope it to this node
274
+ sub_tree_calls = list(_iter_calls_from_node(node))
275
+ except Exception:
276
+ sub_tree_calls = []
277
+
278
+ edge_set: set[str] = set()
279
+ for call_name in sub_tree_calls:
280
+ resolved = _resolve_name(
281
+ call_name,
282
+ module,
283
+ sym_map,
284
+ import_map,
285
+ cg._nodes,
286
+ unique_by_unqualified,
287
+ )
288
+ if resolved:
289
+ edge_set.add(resolved)
290
+
291
+ cg._edges.setdefault(qualname, set()).update(edge_set)
292
+
293
+ # Ensure every node has an (possibly empty) edge set
294
+ for qualname in cg._nodes:
295
+ cg._edges.setdefault(qualname, set())
296
+
297
+ return cg
298
+
299
+ # ------------------------------------------------------------------
300
+ # Public API
301
+ # ------------------------------------------------------------------
302
+
303
+ def call_sites(self, name: str) -> int:
304
+ """Count how many edges point TO *name* (exact qualname only).
305
+
306
+ Edges store fully-resolved qualnames, so an exact match is the correct
307
+ count. We deliberately do **not** match on the unqualified suffix —
308
+ doing so would credit calls aimed at a *different* same-named symbol in
309
+ another module, inflating the count and masking genuinely-dead code.
310
+ """
311
+ count = 0
312
+ for targets in self._edges.values():
313
+ if name in targets:
314
+ count += 1
315
+ return count
316
+
317
+ def edges_from(self, qualname: str) -> set[str]:
318
+ """Outbound references (qualnames) from *qualname*."""
319
+ return self._edges.get(qualname, set())
320
+
321
+ def all_symbols(self) -> set[str]:
322
+ """All known qualnames."""
323
+ return set(self._nodes.keys())
324
+
325
+ def dependents_of(self, module: str) -> list[str]:
326
+ """Modules referencing *module* (matched by final segment) via call-graph edges."""
327
+ target = module.split(".")[-1]
328
+ dependents: set[str] = set()
329
+ for qualname in self.all_symbols():
330
+ src_module = qualname.rsplit(".", 1)[0] if "." in qualname else qualname
331
+ if src_module.split(".")[-1] == target:
332
+ continue # references within the same module aren't "dependents"
333
+ for t in self.edges_from(qualname):
334
+ t_module = t.rsplit(".", 1)[0] if "." in t else t
335
+ if t_module.split(".")[-1] == target:
336
+ dependents.add(src_module)
337
+ break
338
+ return sorted(dependents)
339
+
340
+ def dependent_count(self, module: str) -> int:
341
+ """How many other modules depend on *module* (blast radius)."""
342
+ return len(self.dependents_of(module))
343
+
344
+ def entry_points(self) -> set[str]:
345
+ """Conservatively reachable anchors."""
346
+ return set(self._entry_points)
347
+
348
+ def node_info(self, qualname: str) -> Optional[tuple[str, str, int]]:
349
+ """Return (kind, file, line) for a qualname, or None if unknown."""
350
+ return self._nodes.get(qualname)
351
+
352
+
353
+ # ---------------------------------------------------------------------------
354
+ # Internal helpers
355
+ # ---------------------------------------------------------------------------
356
+
357
+
358
+ def _iter_calls_from_node(node) -> list[str]:
359
+ """Collect call target names from a single AST node (and its descendants)."""
360
+ results: list[str] = []
361
+
362
+ def _walk(n) -> None:
363
+ if n.type == "call":
364
+ fn = n.child_by_field_name("function")
365
+ if fn is not None:
366
+ if fn.type == "identifier" and fn.text:
367
+ results.append(fn.text.decode())
368
+ elif fn.type == "attribute":
369
+ attr = fn.child_by_field_name("attribute")
370
+ if attr is not None and attr.text:
371
+ results.append(attr.text.decode())
372
+ for child in n.children:
373
+ _walk(child)
374
+
375
+ _walk(node)
376
+ return results
377
+
378
+
379
+ def _resolve_name(
380
+ name: str,
381
+ current_module: str,
382
+ sym_map: dict[str, str],
383
+ import_map: dict[str, str],
384
+ all_nodes: dict[str, tuple],
385
+ unique_by_unqualified: dict[str, str],
386
+ ) -> Optional[str]:
387
+ """Resolve a bare call name to a fully qualified name, or None.
388
+
389
+ Resolution is *scoped* — we never credit a call to an arbitrary same-named
390
+ symbol in another module (that invents false edges and makes genuinely-dead
391
+ code look alive). Order:
392
+
393
+ 1. Same-module symbol table.
394
+ 2. Real imported-name map (the name was explicitly imported into this module).
395
+ 3. A project-wide unqualified-name match **only when it is unambiguous**
396
+ (exactly one symbol anywhere bears that unqualified name). When the name
397
+ is ambiguous across modules, we record **no edge** rather than guessing.
398
+ """
399
+ # 1. Same-module symbol
400
+ if name in sym_map:
401
+ return sym_map[name]
402
+
403
+ # 2. Imported alias -> the real target it was imported as
404
+ if name in import_map:
405
+ candidate = import_map[name]
406
+ if candidate in all_nodes:
407
+ return candidate
408
+
409
+ # 3. Unambiguous project-wide match (one and only one symbol has this name).
410
+ # Ambiguous names resolve to None -> no edge.
411
+ return unique_by_unqualified.get(name)
@@ -0,0 +1,248 @@
1
+ """Dead-code detection via call-graph reachability.
2
+
3
+ BFS/DFS from entry points; anything not reachable is a dead-code candidate.
4
+ Confidence is assigned based on naming conventions and string-literal reflection risk.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+
11
+ from refactorika.analysis.call_graph import CallGraph, _collect_py_files
12
+ from refactorika.analysis.parser import get_tree
13
+ from refactorika.core.schema import DeadSymbol
14
+ from refactorika.core.storage import Storage
15
+
16
+ # Builtins that take a string attribute/key name and dynamically resolve a symbol.
17
+ _REFLECTION_FUNCS = {"getattr", "setattr", "hasattr", "delattr"}
18
+
19
+
20
+ def find_dead_code(path: str, storage: Storage) -> dict:
21
+ """Detect unreachable symbols in *path* via call-graph reachability.
22
+
23
+ Parameters
24
+ ----------
25
+ path:
26
+ File or directory to analyse.
27
+ storage:
28
+ Storage instance. Used to cache the result on an AST/content signature
29
+ of the analysed files (Redis primary, JSON fallback) so a re-run over an
30
+ unchanged tree skips the whole call-graph build.
31
+
32
+ Returns
33
+ -------
34
+ dict with keys:
35
+ "path" - the analysed path
36
+ "entry_points" - list of qualnames used as BFS roots
37
+ "dead_symbols" - list of DeadSymbol.to_dict() sorted by rank descending
38
+ """
39
+ # Cache on a signature of every analysed file (path + content). A re-seen,
40
+ # unchanged tree returns the prior result without re-parsing.
41
+ cache_key = _dir_signature(path)
42
+ if cache_key is not None:
43
+ cached = storage.cache_get(cache_key)
44
+ if cached is not None:
45
+ return cached
46
+
47
+ # Build call graph
48
+ try:
49
+ call_graph = CallGraph.build(path)
50
+ except Exception as exc:
51
+ return {
52
+ "path": path,
53
+ "entry_points": [],
54
+ "dead_symbols": [],
55
+ "error": str(exc),
56
+ }
57
+
58
+ all_symbols = call_graph.all_symbols()
59
+ entry_pts = call_graph.entry_points()
60
+
61
+ # BFS/DFS reachability from entry points
62
+ reachable: set[str] = set()
63
+ frontier = list(entry_pts & all_symbols)
64
+ while frontier:
65
+ node = frontier.pop()
66
+ if node in reachable:
67
+ continue
68
+ reachable.add(node)
69
+ for child in call_graph.edges_from(node):
70
+ if child not in reachable:
71
+ frontier.append(child)
72
+
73
+ # Collect names that appear in *actual* reflection / dynamic-dispatch
74
+ # patterns (getattr("name"), string dispatch-dict keys) — not every string.
75
+ reflection_names = _collect_reflection_names(path)
76
+
77
+ # Identify dead symbols
78
+ dead: list[DeadSymbol] = []
79
+ for qualname in all_symbols:
80
+ if qualname in reachable:
81
+ continue
82
+
83
+ info = call_graph.node_info(qualname)
84
+ if info is None:
85
+ continue
86
+ kind, file_str, line = info
87
+
88
+ unqualified = qualname.split(".")[-1]
89
+ sites = call_graph.call_sites(qualname)
90
+
91
+ # Assign confidence + reason.
92
+ # Reflection wins over everything: a name resolved dynamically
93
+ # (getattr / dispatch-dict key) can't be trusted as dead — flag low,
94
+ # even for a private name with zero static call sites.
95
+ if unqualified in reflection_names:
96
+ confidence = "low"
97
+ rank = 30
98
+ reason = (
99
+ f"Name '{unqualified}' has {sites} static call site(s) but appears as a "
100
+ "reflection/dispatch string (getattr/dispatch key) — possible dynamic usage."
101
+ )
102
+ elif unqualified.startswith("_") and sites == 0:
103
+ confidence = "high"
104
+ rank = 90
105
+ reason = f"Private name '{unqualified}' with zero call sites and unreachable from entry points."
106
+ elif sites == 0:
107
+ confidence = "medium"
108
+ rank = 60
109
+ reason = (
110
+ f"Public name '{unqualified}' has zero call sites within the analysed codebase "
111
+ "and is unreachable from entry points."
112
+ )
113
+ else:
114
+ # Has call sites but still unreachable — unusual; treat as medium.
115
+ confidence = "medium"
116
+ rank = 60
117
+ reason = (
118
+ f"Symbol '{unqualified}' is unreachable from entry points "
119
+ f"(call_sites={sites})."
120
+ )
121
+
122
+ dead.append(
123
+ DeadSymbol(
124
+ kind=kind,
125
+ name=qualname,
126
+ file=file_str,
127
+ line=line,
128
+ confidence=confidence,
129
+ reason=reason,
130
+ rank=rank,
131
+ )
132
+ )
133
+
134
+ # Sort by rank descending (highest confidence first)
135
+ dead.sort(key=lambda d: d.rank, reverse=True)
136
+
137
+ result = {
138
+ "path": path,
139
+ "entry_points": sorted(entry_pts),
140
+ "dead_symbols": [d.to_dict() for d in dead],
141
+ }
142
+ if cache_key is not None:
143
+ storage.cache_set(cache_key, result)
144
+ return result
145
+
146
+
147
+ # ---------------------------------------------------------------------------
148
+ # Helpers
149
+ # ---------------------------------------------------------------------------
150
+
151
+
152
+ def _dir_signature(path: str) -> str | None:
153
+ """Sha1 over every analysed file's relative path + content.
154
+
155
+ Returns ``None`` if no files are readable (nothing to cache on). Sorting the
156
+ inputs keeps the signature stable regardless of filesystem walk order.
157
+ """
158
+ files, root = _collect_py_files(path)
159
+ items: list[str] = []
160
+ for fpath in sorted(files):
161
+ try:
162
+ content = fpath.read_text(encoding="utf-8", errors="replace")
163
+ except Exception:
164
+ continue
165
+ try:
166
+ rel = str(fpath.relative_to(root))
167
+ except ValueError:
168
+ rel = str(fpath)
169
+ items.append(f"{rel}\0{content}")
170
+ if not items:
171
+ return None
172
+ digest = hashlib.sha1("\0\0".join(items).encode()).hexdigest()
173
+ return f"dead_code:{digest}"
174
+
175
+
176
+ def _collect_reflection_names(path: str) -> set[str]:
177
+ """Return identifiers that appear in *actual reflection / dynamic-dispatch* sites.
178
+
179
+ Narrow on purpose (the old version matched any identifier-like substring in
180
+ any string/comment, which demoted far too many symbols to ``low``). We only
181
+ collect a name when it is used in a way that could dynamically resolve a
182
+ symbol:
183
+
184
+ * a string-literal argument to ``getattr`` / ``setattr`` / ``hasattr`` /
185
+ ``delattr`` (e.g. ``getattr(obj, "handle_event")``);
186
+ * a string-literal key in a dict literal — a dispatch table
187
+ (e.g. ``{"create": create_user, "delete": delete_user}``).
188
+ """
189
+ names: set[str] = set()
190
+ files, _ = _collect_py_files(path)
191
+ for fpath in files:
192
+ try:
193
+ source = fpath.read_text(encoding="utf-8", errors="replace")
194
+ tree = get_tree(source)
195
+ except Exception:
196
+ continue
197
+ _walk_reflection(tree.root_node, names)
198
+ return names
199
+
200
+
201
+ def _walk_reflection(node, names: set[str]) -> None:
202
+ """Recursively collect reflection/dispatch string names from a subtree."""
203
+ if node.type == "call":
204
+ _collect_reflection_call(node, names)
205
+ elif node.type == "dictionary":
206
+ for child in node.children:
207
+ if child.type == "pair":
208
+ key = child.child_by_field_name("key")
209
+ text = _string_identifier(key) if key is not None else None
210
+ if text:
211
+ names.add(text)
212
+ for child in node.children:
213
+ _walk_reflection(child, names)
214
+
215
+
216
+ def _collect_reflection_call(call_node, names: set[str]) -> None:
217
+ """If *call_node* is getattr/setattr/..., collect its string-literal name args."""
218
+ fn = call_node.child_by_field_name("function")
219
+ fn_name = (
220
+ fn.text.decode()
221
+ if (fn is not None and fn.type == "identifier" and fn.text)
222
+ else ""
223
+ )
224
+ if fn_name not in _REFLECTION_FUNCS:
225
+ return
226
+ args = call_node.child_by_field_name("arguments")
227
+ if args is None:
228
+ return
229
+ for arg in args.children:
230
+ text = _string_identifier(arg)
231
+ if text:
232
+ names.add(text)
233
+
234
+
235
+ def _string_identifier(node) -> str | None:
236
+ """Return the inner text of a string node iff it is a valid identifier, else None."""
237
+ if node is None or node.type != "string":
238
+ return None
239
+ parts: list[str] = []
240
+ for child in node.children:
241
+ if child.type == "string_content" and child.text:
242
+ parts.append(child.text.decode())
243
+ text = (
244
+ "".join(parts)
245
+ if parts
246
+ else (node.text.decode().strip("\"'") if node.text else "")
247
+ )
248
+ return text if text.isidentifier() else None