code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. code_review_graph/__init__.py +20 -0
  2. code_review_graph/__main__.py +4 -0
  3. code_review_graph/analysis.py +410 -0
  4. code_review_graph/changes.py +409 -0
  5. code_review_graph/cli.py +1255 -0
  6. code_review_graph/communities.py +874 -0
  7. code_review_graph/constants.py +23 -0
  8. code_review_graph/context_savings.py +317 -0
  9. code_review_graph/custom_languages.py +322 -0
  10. code_review_graph/daemon.py +1009 -0
  11. code_review_graph/daemon_cli.py +320 -0
  12. code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
  13. code_review_graph/embeddings.py +1006 -0
  14. code_review_graph/enrich.py +303 -0
  15. code_review_graph/eval/__init__.py +33 -0
  16. code_review_graph/eval/benchmarks/__init__.py +1 -0
  17. code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
  18. code_review_graph/eval/benchmarks/build_performance.py +60 -0
  19. code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
  20. code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
  21. code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
  22. code_review_graph/eval/benchmarks/search_quality.py +59 -0
  23. code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
  24. code_review_graph/eval/configs/code-review-graph.yaml +50 -0
  25. code_review_graph/eval/configs/express.yaml +45 -0
  26. code_review_graph/eval/configs/fastapi.yaml +48 -0
  27. code_review_graph/eval/configs/flask.yaml +50 -0
  28. code_review_graph/eval/configs/gin.yaml +51 -0
  29. code_review_graph/eval/configs/httpx.yaml +48 -0
  30. code_review_graph/eval/reporter.py +301 -0
  31. code_review_graph/eval/runner.py +211 -0
  32. code_review_graph/eval/scorer.py +85 -0
  33. code_review_graph/eval/token_benchmark.py +182 -0
  34. code_review_graph/exports.py +409 -0
  35. code_review_graph/flows.py +698 -0
  36. code_review_graph/graph.py +1427 -0
  37. code_review_graph/graph_diff.py +122 -0
  38. code_review_graph/hints.py +384 -0
  39. code_review_graph/incremental.py +1245 -0
  40. code_review_graph/jedi_resolver.py +303 -0
  41. code_review_graph/main.py +1079 -0
  42. code_review_graph/memory.py +142 -0
  43. code_review_graph/migrations.py +284 -0
  44. code_review_graph/parser.py +6957 -0
  45. code_review_graph/postprocessing.py +134 -0
  46. code_review_graph/prompts.py +159 -0
  47. code_review_graph/refactor.py +852 -0
  48. code_review_graph/registry.py +319 -0
  49. code_review_graph/rescript_resolver.py +206 -0
  50. code_review_graph/search.py +447 -0
  51. code_review_graph/skills.py +1481 -0
  52. code_review_graph/spring_resolver.py +200 -0
  53. code_review_graph/temporal_resolver.py +199 -0
  54. code_review_graph/token_benchmark.py +125 -0
  55. code_review_graph/tools/__init__.py +156 -0
  56. code_review_graph/tools/_common.py +176 -0
  57. code_review_graph/tools/analysis_tools.py +184 -0
  58. code_review_graph/tools/build.py +541 -0
  59. code_review_graph/tools/community_tools.py +246 -0
  60. code_review_graph/tools/context.py +152 -0
  61. code_review_graph/tools/docs.py +274 -0
  62. code_review_graph/tools/flows_tools.py +176 -0
  63. code_review_graph/tools/query.py +692 -0
  64. code_review_graph/tools/refactor_tools.py +168 -0
  65. code_review_graph/tools/registry_tools.py +125 -0
  66. code_review_graph/tools/review.py +477 -0
  67. code_review_graph/tsconfig_resolver.py +257 -0
  68. code_review_graph/visualization.py +2184 -0
  69. code_review_graph/wiki.py +305 -0
  70. code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
  71. code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
  72. code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
  73. code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
  74. code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,852 @@
1
+ """Graph-powered refactoring operations.
2
+
3
+ Provides rename previews, dead code detection, refactoring suggestions,
4
+ and safe application of refactoring edits to source files. All file writes
5
+ go through a preview-then-apply workflow with expiry enforcement and path
6
+ traversal prevention.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import functools
12
+ import logging
13
+ import re
14
+ import threading
15
+ import time
16
+ import uuid
17
+ from pathlib import Path
18
+ from typing import Any, Optional, Union
19
+
20
+ from .flows import _has_framework_decorator, _matches_entry_name
21
+ from .graph import GraphStore, _sanitize_name
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Base class names that indicate a framework-managed class (ORM models,
26
+ # Pydantic schemas, settings). Classes inheriting from these are invoked
27
+ # via metaclass/framework magic and should not be flagged as dead code.
28
+ _FRAMEWORK_BASE_CLASSES = frozenset({
29
+ "Base", "DeclarativeBase", "Model", "BaseModel", "BaseSettings",
30
+ "db.Model", "TableBase",
31
+ # AWS CDK constructs -- instantiated by CDK app wiring, not explicit CALLS.
32
+ "Stack", "NestedStack", "Construct", "Resource",
33
+ })
34
+
35
+ # Class name suffixes that indicate CDK/IaC constructs.
36
+ # These are instantiated by framework wiring, not direct CALLS edges.
37
+ # Used as fallback when INHERITS edges to external base classes are absent.
38
+ _CDK_CLASS_SUFFIXES = ("Stack", "Construct", "Pipeline", "Resources", "Layer")
39
+
40
+ # Patterns for mock/stub variables in test files that should not be flagged dead.
41
+ _MOCK_NAME_RE = re.compile(
42
+ r"^(mock[A-Z_]|Mock[A-Z]|createMock[A-Z])|" # mockDynamoClient, MockService, createMockX
43
+ r"(Mock|Stub|Fake|Spy)$", # s3ClientMock, dbStub
44
+ re.IGNORECASE,
45
+ )
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Thread-safe pending refactors storage
49
+ # ---------------------------------------------------------------------------
50
+
51
+ _refactor_lock = threading.Lock()
52
+ _pending_refactors: dict[str, dict] = {}
53
+ REFACTOR_EXPIRY_SECONDS = 600 # 10 minutes
54
+
55
+
56
+ def _cleanup_expired() -> int:
57
+ """Remove expired refactors from the pending dict. Returns count removed."""
58
+ now = time.time()
59
+ expired = [
60
+ rid for rid, r in _pending_refactors.items()
61
+ if now - r["created_at"] > REFACTOR_EXPIRY_SECONDS
62
+ ]
63
+ for rid in expired:
64
+ del _pending_refactors[rid]
65
+ return len(expired)
66
+
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # 1. rename_preview
70
+ # ---------------------------------------------------------------------------
71
+
72
+
73
+ def rename_preview(
74
+ store: GraphStore,
75
+ old_name: str,
76
+ new_name: str,
77
+ ) -> Optional[dict[str, Any]]:
78
+ """Build a rename edit list for *old_name* -> *new_name*.
79
+
80
+ Finds the node via ``store.search_nodes(old_name)``, collects
81
+ definition and reference sites, generates a unique ``refactor_id``,
82
+ and stores the preview in the thread-safe ``_pending_refactors`` dict.
83
+
84
+ Returns:
85
+ A refactor preview dict, or ``None`` if the node is not found.
86
+ """
87
+ candidates = store.search_nodes(old_name, limit=10)
88
+ # Pick the best match: prefer exact name match.
89
+ node = None
90
+ for c in candidates:
91
+ if c.name == old_name:
92
+ node = c
93
+ break
94
+ if node is None and candidates:
95
+ node = candidates[0]
96
+ if node is None:
97
+ logger.warning("rename_preview: node %r not found", old_name)
98
+ return None
99
+
100
+ edits: list[dict[str, Any]] = []
101
+
102
+ # --- Definition site ---
103
+ edits.append({
104
+ "file": node.file_path,
105
+ "line": node.line_start,
106
+ "old": old_name,
107
+ "new": new_name,
108
+ "confidence": "high",
109
+ })
110
+
111
+ # --- Call sites (CALLS edges targeting this node) ---
112
+ call_edges = store.get_edges_by_target(node.qualified_name)
113
+ for edge in call_edges:
114
+ if edge.kind == "CALLS":
115
+ edits.append({
116
+ "file": edge.file_path,
117
+ "line": edge.line,
118
+ "old": old_name,
119
+ "new": new_name,
120
+ "confidence": "high",
121
+ })
122
+
123
+ # Also search by bare name for unqualified edges.
124
+ bare_edges = store.search_edges_by_target_name(old_name, kind="CALLS")
125
+ seen = {(e["file"], e["line"]) for e in edits}
126
+ for edge in bare_edges:
127
+ key = (edge.file_path, edge.line)
128
+ if key not in seen:
129
+ edits.append({
130
+ "file": edge.file_path,
131
+ "line": edge.line,
132
+ "old": old_name,
133
+ "new": new_name,
134
+ "confidence": "high",
135
+ })
136
+ seen.add(key)
137
+
138
+ # --- Import sites (IMPORTS_FROM edges targeting this node) ---
139
+ import_edges = store.get_edges_by_target(node.qualified_name)
140
+ for edge in import_edges:
141
+ if edge.kind == "IMPORTS_FROM":
142
+ key = (edge.file_path, edge.line)
143
+ if key not in seen:
144
+ edits.append({
145
+ "file": edge.file_path,
146
+ "line": edge.line,
147
+ "old": old_name,
148
+ "new": new_name,
149
+ "confidence": "high",
150
+ })
151
+ seen.add(key)
152
+
153
+ # --- Stats ---
154
+ stats = {"high": 0, "medium": 0, "low": 0}
155
+ for e in edits:
156
+ stats[e["confidence"]] += 1
157
+
158
+ refactor_id = uuid.uuid4().hex[:8]
159
+ preview: dict[str, Any] = {
160
+ "refactor_id": refactor_id,
161
+ "type": "rename",
162
+ "old_name": _sanitize_name(old_name),
163
+ "new_name": _sanitize_name(new_name),
164
+ "edits": edits,
165
+ "stats": stats,
166
+ "created_at": time.time(),
167
+ }
168
+
169
+ with _refactor_lock:
170
+ _cleanup_expired()
171
+ _pending_refactors[refactor_id] = preview
172
+
173
+ logger.info(
174
+ "rename_preview: created refactor %s (%s -> %s, %d edits)",
175
+ refactor_id, old_name, new_name, len(edits),
176
+ )
177
+ return preview
178
+
179
+
180
+ # ---------------------------------------------------------------------------
181
+ # 2. find_dead_code
182
+ # ---------------------------------------------------------------------------
183
+
184
+
185
+ def _is_entry_point(node: Any) -> bool:
186
+ """Check if a node looks like an entry point by name or decorator.
187
+
188
+ Unlike ``flows.detect_entry_points()`` which treats ALL uncalled functions
189
+ as entry points, this checks only for conventional name patterns and
190
+ framework decorators -- the indicators that a function is *intentionally*
191
+ an entry point rather than simply unreferenced dead code.
192
+ """
193
+ if _has_framework_decorator(node):
194
+ return True
195
+ if _matches_entry_name(node):
196
+ return True
197
+ return False
198
+
199
+
200
+ # Matches identifiers inside type annotations (e.g. "GoalCreate" in
201
+ # "body: GoalCreate", "Optional[UserResponse]", "list[Item]").
202
+ _TEST_FILE_RE = re.compile(
203
+ r"([\\/]__tests__[\\/]|\.spec\.[jt]sx?$|\.test\.[jt]sx?$|[\\/]test_[^/\\]*\.py$"
204
+ r"|[\\/]e2e[_-]?tests?[\\/]|[\\/]test[_-]utils?[\\/])",
205
+ )
206
+
207
+
208
+ def _is_test_file(file_path: str) -> bool:
209
+ """Return True if *file_path* looks like a test file."""
210
+ return bool(_TEST_FILE_RE.search(file_path))
211
+
212
+
213
+ _MIN_PKG_SEGMENT_LEN = 4 # ignore short dirs like "src", "lib", "app"
214
+
215
+
216
+ @functools.lru_cache(maxsize=4096)
217
+ def _path_segments(file_path: str) -> tuple[str, ...]:
218
+ """Return directory segments long enough to serve as package-name anchors."""
219
+ parts = file_path.replace("\\", "/").split("/")
220
+ return tuple(
221
+ p for p in parts[:-1] # skip the filename itself
222
+ if len(p) >= _MIN_PKG_SEGMENT_LEN and p not in ("home", "src", "lib", "app")
223
+ )
224
+
225
+
226
+ _TYPE_IDENT_RE = re.compile(r"[A-Z][A-Za-z0-9_]*")
227
+
228
+
229
+ def _collect_type_referenced_names(store: GraphStore) -> set[str]:
230
+ """Collect class names that appear in function params or return types."""
231
+ funcs = store.get_nodes_by_kind(kinds=["Function", "Test"])
232
+ names: set[str] = set()
233
+ for f in funcs:
234
+ for text in (f.params, f.return_type):
235
+ if text:
236
+ names.update(_TYPE_IDENT_RE.findall(text))
237
+ return names
238
+
239
+
240
+ def find_dead_code(
241
+ store: GraphStore,
242
+ kind: Optional[str] = None,
243
+ file_pattern: Optional[str] = None,
244
+ root: Optional[Union[str, Path]] = None,
245
+ ) -> list[dict[str, Any]]:
246
+ """Find functions/classes with no callers, no test refs, no importers, and no references.
247
+
248
+ Entry points (functions matching framework decorators or conventional name
249
+ patterns like ``main``, ``test_*``, ``handle_*``) are excluded.
250
+
251
+ .. note::
252
+
253
+ **Caveats — dynamic dispatch patterns.** Static analysis cannot track
254
+ all runtime-determined call patterns. Functions registered via fully
255
+ dynamic keys (``map[computedKey()] = fn``), ``Reflect.apply``, or
256
+ runtime ``require()`` may still appear as dead code. Treat results as
257
+ hints, especially for TypeScript projects that use map-based dispatch,
258
+ plugin registries, or dynamic requires.
259
+
260
+ Args:
261
+ store: The GraphStore instance.
262
+ kind: Optional filter (e.g. ``"Function"`` or ``"Class"``).
263
+ file_pattern: Optional file-path substring filter.
264
+ root: Optional repo root path for computing ``relative_path``.
265
+
266
+ Returns:
267
+ List of dead-code dicts with name, qualified_name, kind, file_path,
268
+ relative_path, line, and language fields.
269
+ """
270
+ # Query candidate nodes.
271
+ candidates = store.get_nodes_by_kind(
272
+ kinds=[kind] if kind else ["Function", "Class"],
273
+ file_pattern=file_pattern,
274
+ )
275
+
276
+ # Build set of class names referenced in function type annotations.
277
+ type_ref_names = _collect_type_referenced_names(store)
278
+
279
+ # Build class hierarchy: class_qualified_name -> [bare_base_names]
280
+ class_bases: dict[str, list[str]] = {}
281
+ conn = store._conn
282
+ for row in conn.execute(
283
+ "SELECT source_qualified, target_qualified FROM edges WHERE kind = 'INHERITS'"
284
+ ).fetchall():
285
+ base = row[1].rsplit("::", 1)[-1] if "::" in row[1] else row[1]
286
+ class_bases.setdefault(row[0], []).append(base)
287
+
288
+ # Build import graph: file_path -> set of file_paths it imports from.
289
+ # Used to filter bare-name caller matches to plausible callers.
290
+ importer_files: dict[str, set[str]] = {}
291
+ for row in conn.execute(
292
+ "SELECT file_path, target_qualified FROM edges WHERE kind = 'IMPORTS_FROM'"
293
+ ).fetchall():
294
+ importer_files.setdefault(row[0], set()).add(row[1])
295
+
296
+ # Build set of globally unique names (only one non-test node with that name).
297
+ # For unique names, any bare-name CALLS edge is reliable — no ambiguity.
298
+ name_counts: dict[str, int] = {}
299
+ for row in conn.execute(
300
+ "SELECT name, COUNT(*) FROM nodes "
301
+ "WHERE kind IN ('Function', 'Class') AND is_test = 0 "
302
+ "GROUP BY name"
303
+ ).fetchall():
304
+ name_counts[row[0]] = row[1]
305
+
306
+ def _is_plausible_caller(
307
+ edge_file: str, node_file: str, node_name: str = "",
308
+ ) -> bool:
309
+ """A bare-name edge is plausible if it comes from the same file,
310
+ from a file that has an IMPORTS_FROM edge whose target matches
311
+ the node's file path, or the name is globally unique (no ambiguity)."""
312
+ if edge_file == node_file:
313
+ return True
314
+ # Unique names (only one definition) have no ambiguity -- accept all callers.
315
+ if node_name and name_counts.get(node_name, 0) == 1:
316
+ return True
317
+ for imp_target in importer_files.get(edge_file, ()):
318
+ # Strip "::name" suffix — workspace-resolved imports may include it
319
+ imp_path = imp_target.split("::")[0] if "::" in imp_target else imp_target
320
+ # __init__.py represents its parent package directory
321
+ if imp_path.endswith("/__init__.py"):
322
+ imp_dir = imp_path[:-12] # strip "/__init__.py"
323
+ if node_file.startswith(imp_dir + "/"):
324
+ return True
325
+ if imp_path.startswith(node_file) or node_file.startswith(imp_path + "/"):
326
+ return True
327
+ # 2-hop: edge_file imports X, X re-exports from node_file (barrel files)
328
+ for imp2 in importer_files.get(imp_target, ()):
329
+ imp2_path = imp2.split("::")[0] if "::" in imp2 else imp2
330
+ if imp2_path.endswith("/__init__.py"):
331
+ imp2_dir = imp2_path[:-12]
332
+ if node_file.startswith(imp2_dir + "/"):
333
+ return True
334
+ if imp2_path.startswith(node_file) or node_file.startswith(imp2_path + "/"):
335
+ return True
336
+ # Package-alias heuristic: monorepo imports like "@scope/pkg-name"
337
+ # contain the directory name of the target package. Check if the
338
+ # import target string contains a significant directory segment from
339
+ # the node's file path (e.g. "lambda-common" in both the import
340
+ # "@cova-utils/lambda-common" and the path "libraries/lambda-common/...").
341
+ if not imp_target.startswith("/"):
342
+ # imp_target is a package specifier, not a file path
343
+ for seg in _path_segments(node_file):
344
+ if seg in imp_target:
345
+ return True
346
+ return False
347
+
348
+ dead: list[dict[str, Any]] = []
349
+
350
+ for node in candidates:
351
+
352
+ # Skip test nodes and anything defined in test files.
353
+ if node.is_test or _is_test_file(node.file_path):
354
+ continue
355
+
356
+ # Skip ambient type declarations (.d.ts) — they describe external APIs.
357
+ if node.file_path.endswith(".d.ts"):
358
+ continue
359
+
360
+ # Skip dunder methods -- invoked by runtime, never have explicit callers.
361
+ if node.name.startswith("__") and node.name.endswith("__"):
362
+ continue
363
+
364
+ # Skip JS/TS/Java constructors -- invoked via `new ClassName()`, which
365
+ # creates a CALLS edge to the class, not to `constructor`.
366
+ if node.name == "constructor" and node.parent_name:
367
+ continue
368
+
369
+ # Skip mock/stub variables in test files -- these are test helpers
370
+ # referenced via variable assignment, not function calls.
371
+ if node.is_test or _is_test_file(node.file_path):
372
+ if _MOCK_NAME_RE.search(node.name):
373
+ continue
374
+
375
+ # Skip entry points (by name pattern or decorator, not just "uncalled").
376
+ if _is_entry_point(node):
377
+ continue
378
+
379
+ # Check for callers (CALLS), test refs (TESTED_BY), importers (IMPORTS_FROM),
380
+ # and value references (REFERENCES -- function-as-value in maps, arrays, etc.).
381
+
382
+ # Skip classes referenced in type annotations (Pydantic schemas, etc.).
383
+ if node.kind == "Class" and node.name in type_ref_names:
384
+ continue
385
+
386
+ # Skip Angular/NestJS decorated classes -- they are framework-managed
387
+ # and instantiated by the DI container, not direct CALLS edges.
388
+ if node.kind == "Class" and _has_framework_decorator(node):
389
+ continue
390
+
391
+ # Skip classes (and their methods) inheriting from known framework bases.
392
+ _is_framework_class = False
393
+ _check_qn = node.qualified_name if node.kind == "Class" else (
394
+ node.qualified_name.rsplit(".", 1)[0] if node.parent_name else None
395
+ )
396
+ if _check_qn:
397
+ outgoing = store.get_edges_by_source(_check_qn)
398
+ base_names = {
399
+ e.target_qualified.rsplit("::", 1)[-1]
400
+ for e in outgoing if e.kind == "INHERITS"
401
+ }
402
+ if base_names & _FRAMEWORK_BASE_CLASSES:
403
+ _is_framework_class = True
404
+ if node.kind == "Class":
405
+ if _is_framework_class:
406
+ continue
407
+ # Fallback: CDK class name suffixes (no INHERITS edge for external bases)
408
+ if any(node.name.endswith(s) for s in _CDK_CLASS_SUFFIXES):
409
+ continue
410
+ if node.kind == "Function" and _is_framework_class:
411
+ continue
412
+ # Also skip methods whose parent class name matches CDK suffixes
413
+ # (fallback for external base classes without INHERITS edges).
414
+ if (
415
+ node.kind == "Function"
416
+ and node.parent_name
417
+ and any(node.parent_name.endswith(s) for s in _CDK_CLASS_SUFFIXES)
418
+ ):
419
+ continue
420
+
421
+ # Skip decorated functions/classes that are invoked implicitly rather
422
+ # than via explicit CALLS edges.
423
+ decorators = node.extra.get("decorators", ())
424
+ if isinstance(decorators, (list, tuple)) and decorators:
425
+ if node.kind in ("Function", "Test"):
426
+ # @property -- invoked via attribute access
427
+ # @abstractmethod -- polymorphic dispatch, never called directly
428
+ # @classmethod/@staticmethod -- called via Class.method()
429
+ if any(
430
+ d in ("property", "abstractmethod", "classmethod", "staticmethod")
431
+ or d.endswith(".abstractmethod")
432
+ # Angular @HostListener -- method called by framework event system
433
+ or d.startswith("HostListener")
434
+ for d in decorators
435
+ ):
436
+ continue
437
+ if node.kind == "Class":
438
+ # @dataclass classes are instantiated as types, not via CALLS
439
+ if any("dataclass" in d for d in decorators):
440
+ continue
441
+
442
+ # Skip methods that override an @abstractmethod in a base class --
443
+ # they are called polymorphically via the base class reference.
444
+ if node.kind == "Function" and node.parent_name:
445
+ parent_qn = node.qualified_name.rsplit(".", 1)[0]
446
+ parent_edges = store.get_edges_by_source(parent_qn)
447
+ base_class_names = [
448
+ e.target_qualified for e in parent_edges if e.kind == "INHERITS"
449
+ ]
450
+ for base_name in base_class_names:
451
+ # Try fully-qualified base first, then bare name match
452
+ base_method_qn = f"{base_name}.{node.name}"
453
+ base_nodes = store.get_node(base_method_qn)
454
+ if base_nodes is None:
455
+ # Base class may be bare name -- search in same file
456
+ base_method_qn2 = (
457
+ node.file_path + "::" + base_name + "." + node.name
458
+ )
459
+ base_nodes = store.get_node(base_method_qn2)
460
+ if base_nodes is not None:
461
+ base_decos = base_nodes.extra.get("decorators", ())
462
+ if isinstance(base_decos, (list, tuple)) and any(
463
+ "abstractmethod" in d for d in base_decos
464
+ ):
465
+ break
466
+ else:
467
+ base_name = None # no abstract override found
468
+ if base_name is not None:
469
+ continue
470
+
471
+ incoming = store.get_edges_by_target(node.qualified_name)
472
+ # Also check class-qualified edges (e.g. "ClassName::method") which
473
+ # lack the file-path prefix used in node.qualified_name.
474
+ if not any(e.kind == "CALLS" for e in incoming) and node.parent_name:
475
+ class_qn = f"{node.parent_name}::{node.name}"
476
+ incoming = incoming + store.get_edges_by_target(class_qn)
477
+ # Also check bare-name and partially-qualified edges.
478
+ # CALLS targets may be bare ("funcName"), class-qualified
479
+ # ("Class::method"), or workspace-qualified ("pkg/dir::funcName").
480
+ if not any(e.kind == "CALLS" for e in incoming):
481
+ bare = store.search_edges_by_target_name(node.name, kind="CALLS")
482
+ # Also search for partially-qualified targets ending with ::name
483
+ suffix_rows = conn.execute(
484
+ "SELECT * FROM edges WHERE kind = 'CALLS'"
485
+ " AND target_qualified LIKE ?",
486
+ (f"%::{node.name}",),
487
+ ).fetchall()
488
+ suffix_edges = [store._row_to_edge(r) for r in suffix_rows]
489
+ all_bare = bare + suffix_edges
490
+ all_bare = [
491
+ e for e in all_bare
492
+ if _is_plausible_caller(e.file_path, node.file_path, node.name)
493
+ ]
494
+ incoming = incoming + all_bare
495
+ if not any(e.kind == "TESTED_BY" for e in incoming):
496
+ bare_tb = store.search_edges_by_target_name(node.name, kind="TESTED_BY")
497
+ bare_tb = [
498
+ e for e in bare_tb
499
+ if _is_plausible_caller(e.file_path, node.file_path, node.name)
500
+ ]
501
+ incoming = incoming + bare_tb
502
+ # Check INHERITS -- classes with subclasses are not dead.
503
+ if node.kind == "Class" and not any(e.kind == "INHERITS" for e in incoming):
504
+ bare_inh = store.search_edges_by_target_name(node.name, kind="INHERITS")
505
+ incoming = incoming + bare_inh
506
+ has_callers = any(e.kind == "CALLS" for e in incoming)
507
+ has_test_refs = any(e.kind == "TESTED_BY" for e in incoming)
508
+ has_importers = any(e.kind == "IMPORTS_FROM" for e in incoming)
509
+ has_references = any(e.kind == "REFERENCES" for e in incoming)
510
+ has_subclasses = any(e.kind == "INHERITS" for e in incoming)
511
+
512
+ # For classes with no direct references, check if any member has callers.
513
+ no_refs = not (
514
+ has_callers or has_test_refs or has_importers
515
+ or has_references or has_subclasses
516
+ )
517
+ if node.kind == "Class" and no_refs:
518
+ member_prefix = node.qualified_name + "."
519
+ # Also check bare class-name pattern (unresolved CALLS targets)
520
+ bare_prefix = node.name + "."
521
+ member_calls = conn.execute(
522
+ "SELECT COUNT(*) FROM edges WHERE kind = 'CALLS'"
523
+ " AND (target_qualified LIKE ? OR target_qualified LIKE ?)",
524
+ (f"%{member_prefix}%", f"%{bare_prefix}%"),
525
+ ).fetchone()[0]
526
+ if member_calls > 0:
527
+ has_callers = True
528
+
529
+ if not (
530
+ has_callers or has_test_refs or has_importers
531
+ or has_references or has_subclasses
532
+ ):
533
+ # Check if this is a method override where the base class method
534
+ # has callers (polymorphic dispatch: callers of Base.method()
535
+ # implicitly call SubClass.method() at runtime).
536
+ if node.kind == "Function" and node.parent_name and not has_callers:
537
+ method_suffix = "." + node.name
538
+ if node.qualified_name.endswith(method_suffix):
539
+ class_qn = node.qualified_name[: -len(method_suffix)]
540
+ for base_name in class_bases.get(class_qn, []):
541
+ rows = conn.execute(
542
+ "SELECT n.qualified_name FROM nodes n "
543
+ "WHERE n.parent_name = ? AND n.name = ? "
544
+ "AND n.kind IN ('Function', 'Test')",
545
+ (base_name, node.name),
546
+ ).fetchall()
547
+ for (base_method_qn,) in rows:
548
+ if conn.execute(
549
+ "SELECT 1 FROM edges "
550
+ "WHERE target_qualified = ? AND kind = 'CALLS' "
551
+ "LIMIT 1",
552
+ (base_method_qn,),
553
+ ).fetchone():
554
+ has_callers = True
555
+ break
556
+ if has_callers:
557
+ break
558
+
559
+ if not has_callers:
560
+ if root:
561
+ try:
562
+ rel = str(Path(node.file_path).relative_to(root))
563
+ except ValueError:
564
+ rel = node.file_path
565
+ else:
566
+ rel = node.file_path
567
+ dead.append({
568
+ "name": _sanitize_name(node.name),
569
+ "qualified_name": _sanitize_name(node.qualified_name),
570
+ "kind": node.kind,
571
+ "file": node.file_path,
572
+ "file_path": node.file_path,
573
+ "relative_path": rel,
574
+ "line": node.line_start,
575
+ "language": node.language,
576
+ })
577
+
578
+ logger.info("find_dead_code: found %d dead symbols", len(dead))
579
+ return dead
580
+
581
+
582
+ # ---------------------------------------------------------------------------
583
+ # 3. suggest_refactorings
584
+ # ---------------------------------------------------------------------------
585
+
586
+
587
+ def suggest_refactorings(store: GraphStore) -> list[dict[str, Any]]:
588
+ """Produce community-driven refactoring suggestions.
589
+
590
+ Currently two categories:
591
+ - **move**: Functions in Community A only called by Community B.
592
+ - **remove**: Dead code (no callers, tests, or importers and not entry points).
593
+
594
+ Returns:
595
+ List of suggestion dicts with type, description, symbols, rationale.
596
+ """
597
+ suggestions: list[dict[str, Any]] = []
598
+
599
+ # --- Dead code suggestions ---
600
+ dead = find_dead_code(store)
601
+ for d in dead:
602
+ suggestions.append({
603
+ "type": "remove",
604
+ "description": f"Remove unused {d['kind'].lower()} '{d['name']}'",
605
+ "symbols": [d["qualified_name"]],
606
+ "rationale": "No callers, no test references, no importers, not an entry point.",
607
+ })
608
+
609
+ # --- Cross-community move suggestions ---
610
+ # Only attempt if communities table exists and has data.
611
+ community_rows = store.get_communities_list()
612
+
613
+ if community_rows:
614
+ # Build node -> community_id mapping.
615
+ node_community: dict[str, int] = {}
616
+ for crow in community_rows:
617
+ cid = crow["id"]
618
+ member_qns = store.get_community_member_qns(cid)
619
+ for qn in member_qns:
620
+ node_community[qn] = cid
621
+
622
+ community_names: dict[int, str] = {
623
+ r["id"]: r["name"] for r in community_rows
624
+ }
625
+
626
+ # Check functions called only by members of a different community.
627
+ all_funcs = store.get_nodes_by_kind(["Function"])
628
+
629
+ for fnode in all_funcs:
630
+ f_community = node_community.get(fnode.qualified_name)
631
+ if f_community is None:
632
+ continue
633
+
634
+ incoming_calls = [
635
+ e for e in store.get_edges_by_target(fnode.qualified_name)
636
+ if e.kind == "CALLS"
637
+ ]
638
+ if not incoming_calls:
639
+ continue
640
+
641
+ caller_communities = set()
642
+ for edge in incoming_calls:
643
+ c_community = node_community.get(edge.source_qualified)
644
+ if c_community is not None:
645
+ caller_communities.add(c_community)
646
+
647
+ # If ALL callers are from a single *different* community, suggest move.
648
+ if len(caller_communities) == 1:
649
+ target_community = next(iter(caller_communities))
650
+ if target_community != f_community:
651
+ src_name = community_names.get(f_community, f"community-{f_community}")
652
+ tgt_name = community_names.get(
653
+ target_community, f"community-{target_community}"
654
+ )
655
+ suggestions.append({
656
+ "type": "move",
657
+ "description": (
658
+ f"Move '{_sanitize_name(fnode.name)}' from "
659
+ f"'{src_name}' to '{tgt_name}'"
660
+ ),
661
+ "symbols": [_sanitize_name(fnode.qualified_name)],
662
+ "rationale": (
663
+ f"Function is in community '{src_name}' but only "
664
+ f"called by members of community '{tgt_name}'."
665
+ ),
666
+ })
667
+
668
+ logger.info("suggest_refactorings: produced %d suggestions", len(suggestions))
669
+ return suggestions
670
+
671
+
672
+ # ---------------------------------------------------------------------------
673
+ # 4. apply_refactor
674
+ # ---------------------------------------------------------------------------
675
+
676
+
677
+ def apply_refactor(
678
+ refactor_id: str,
679
+ repo_root: Path,
680
+ dry_run: bool = False,
681
+ ) -> dict[str, Any]:
682
+ """Apply a previously previewed refactoring to source files.
683
+
684
+ Validates the refactor_id, checks expiry, ensures all edit paths are
685
+ within the repo root, then performs exact string replacements on the
686
+ target files.
687
+
688
+ Args:
689
+ refactor_id: ID from a prior ``rename_preview`` call.
690
+ repo_root: Validated repository root path.
691
+ dry_run: If True, compute the would-be changes and return a
692
+ unified-diff representation per affected file, but do NOT
693
+ write anything to disk. The ``refactor_id`` is preserved so
694
+ the same preview can be committed afterwards via a second
695
+ call without ``dry_run``. See: #176
696
+
697
+ Returns:
698
+ Status dict with applied count and modified files. When
699
+ ``dry_run=True`` the dict additionally contains:
700
+
701
+ - ``dry_run``: ``True``
702
+ - ``would_modify``: list of file paths that would be changed
703
+ - ``diffs``: map of file path → unified diff string showing the
704
+ proposed change
705
+ """
706
+ repo_root = repo_root.resolve()
707
+
708
+ with _refactor_lock:
709
+ _cleanup_expired()
710
+ preview = _pending_refactors.get(refactor_id)
711
+
712
+ if preview is None:
713
+ logger.warning("apply_refactor: unknown or expired refactor_id %s", refactor_id)
714
+ return {"status": "error", "error": f"Refactor '{refactor_id}' not found or expired."}
715
+
716
+ # Check expiry explicitly.
717
+ age = time.time() - preview["created_at"]
718
+ if age > REFACTOR_EXPIRY_SECONDS:
719
+ with _refactor_lock:
720
+ _pending_refactors.pop(refactor_id, None)
721
+ logger.warning("apply_refactor: refactor %s expired (%.0fs old)", refactor_id, age)
722
+ return {"status": "error", "error": f"Refactor '{refactor_id}' has expired."}
723
+
724
+ edits = preview.get("edits", [])
725
+ if not edits:
726
+ if dry_run:
727
+ return {
728
+ "status": "ok", "dry_run": True, "applied": 0,
729
+ "files_modified": [], "edits_applied": 0,
730
+ "would_modify": [], "diffs": {},
731
+ }
732
+ return {"status": "ok", "applied": 0, "files_modified": [], "edits_applied": 0}
733
+
734
+ # --- Path traversal validation ---
735
+ for edit in edits:
736
+ edit_path = Path(edit["file"]).resolve()
737
+ try:
738
+ edit_path.relative_to(repo_root)
739
+ except ValueError:
740
+ logger.error(
741
+ "apply_refactor: path traversal blocked for %s (repo_root=%s)",
742
+ edit_path, repo_root,
743
+ )
744
+ return {
745
+ "status": "error",
746
+ "error": f"Edit path '{edit['file']}' is outside repo root.",
747
+ }
748
+
749
+ # --- Compute new content for every edit (shared by dry-run and write paths) ---
750
+ # Group edits by file so multiple edits to the same file apply
751
+ # sequentially against the updated content rather than stomping each
752
+ # other. Dry-run and write modes then share this computation.
753
+ from collections import defaultdict
754
+ edits_by_file: dict[str, list[dict]] = defaultdict(list)
755
+ for edit in edits:
756
+ edits_by_file[edit["file"]].append(edit)
757
+
758
+ planned: dict[str, tuple[str, str, int]] = {} # file -> (old_content, new_content, edit_count)
759
+ for file_str, file_edits in edits_by_file.items():
760
+ file_path = Path(file_str)
761
+ if not file_path.is_file():
762
+ logger.warning("apply_refactor: file not found: %s", file_path)
763
+ continue
764
+ try:
765
+ original = file_path.read_text(encoding="utf-8", errors="replace")
766
+ except (OSError, UnicodeDecodeError) as exc:
767
+ logger.warning("apply_refactor: could not read %s: %s", file_path, exc)
768
+ continue
769
+
770
+ content = original
771
+ file_edits_applied = 0
772
+ for edit in file_edits:
773
+ old_text = edit["old"]
774
+ new_text = edit["new"]
775
+ if old_text not in content:
776
+ logger.warning(
777
+ "apply_refactor: old text %r not found in %s",
778
+ old_text, file_path,
779
+ )
780
+ continue
781
+ target_line = edit.get("line")
782
+ if target_line is not None:
783
+ lines = content.splitlines(keepends=True)
784
+ idx = target_line - 1
785
+ if 0 <= idx < len(lines) and old_text in lines[idx]:
786
+ lines[idx] = lines[idx].replace(old_text, new_text, 1)
787
+ content = "".join(lines)
788
+ else:
789
+ content = content.replace(old_text, new_text, 1)
790
+ else:
791
+ content = content.replace(old_text, new_text, 1)
792
+ file_edits_applied += 1
793
+
794
+ if file_edits_applied > 0:
795
+ planned[file_str] = (original, content, file_edits_applied)
796
+
797
+ # --- Dry-run path: return diffs, no writes ---
798
+ if dry_run:
799
+ import difflib
800
+ diffs: dict[str, str] = {}
801
+ for file_str, (original, new_content, _count) in planned.items():
802
+ diff_lines = list(difflib.unified_diff(
803
+ original.splitlines(keepends=True),
804
+ new_content.splitlines(keepends=True),
805
+ fromfile=f"a/{file_str}",
806
+ tofile=f"b/{file_str}",
807
+ n=3,
808
+ ))
809
+ diffs[file_str] = "".join(diff_lines)
810
+ total_edits = sum(count for _o, _n, count in planned.values())
811
+ result = {
812
+ "status": "ok",
813
+ "dry_run": True,
814
+ "applied": 0,
815
+ "edits_applied": total_edits,
816
+ "would_modify": sorted(planned.keys()),
817
+ "files_modified": [],
818
+ "diffs": diffs,
819
+ }
820
+ logger.info(
821
+ "apply_refactor: dry-run %s — %d edits would be applied to %d files",
822
+ refactor_id, total_edits, len(planned),
823
+ )
824
+ # Do NOT pop the pending refactor — let the user commit via a
825
+ # second call with dry_run=False.
826
+ return result
827
+
828
+ # --- Real-write path: write the pre-computed new content ---
829
+ files_modified: set[str] = set()
830
+ edits_applied = 0
831
+ for file_str, (_original, new_content, count) in planned.items():
832
+ file_path = Path(file_str)
833
+ try:
834
+ file_path.write_text(new_content, encoding="utf-8")
835
+ edits_applied += count
836
+ files_modified.add(str(file_path))
837
+ logger.info("apply_refactor: applied %d edit(s) to %s", count, file_path)
838
+ except OSError as exc:
839
+ logger.error("apply_refactor: could not write %s: %s", file_path, exc)
840
+
841
+ # Remove from pending after successful application.
842
+ with _refactor_lock:
843
+ _pending_refactors.pop(refactor_id, None)
844
+
845
+ result = {
846
+ "status": "ok",
847
+ "applied": edits_applied,
848
+ "files_modified": sorted(files_modified),
849
+ "edits_applied": edits_applied,
850
+ }
851
+ logger.info("apply_refactor: completed %s — %d edits applied", refactor_id, edits_applied)
852
+ return result