code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. code_review_graph/__init__.py +20 -0
  2. code_review_graph/__main__.py +4 -0
  3. code_review_graph/analysis.py +410 -0
  4. code_review_graph/changes.py +409 -0
  5. code_review_graph/cli.py +1255 -0
  6. code_review_graph/communities.py +874 -0
  7. code_review_graph/constants.py +23 -0
  8. code_review_graph/context_savings.py +317 -0
  9. code_review_graph/custom_languages.py +322 -0
  10. code_review_graph/daemon.py +1009 -0
  11. code_review_graph/daemon_cli.py +320 -0
  12. code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
  13. code_review_graph/embeddings.py +1006 -0
  14. code_review_graph/enrich.py +303 -0
  15. code_review_graph/eval/__init__.py +33 -0
  16. code_review_graph/eval/benchmarks/__init__.py +1 -0
  17. code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
  18. code_review_graph/eval/benchmarks/build_performance.py +60 -0
  19. code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
  20. code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
  21. code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
  22. code_review_graph/eval/benchmarks/search_quality.py +59 -0
  23. code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
  24. code_review_graph/eval/configs/code-review-graph.yaml +50 -0
  25. code_review_graph/eval/configs/express.yaml +45 -0
  26. code_review_graph/eval/configs/fastapi.yaml +48 -0
  27. code_review_graph/eval/configs/flask.yaml +50 -0
  28. code_review_graph/eval/configs/gin.yaml +51 -0
  29. code_review_graph/eval/configs/httpx.yaml +48 -0
  30. code_review_graph/eval/reporter.py +301 -0
  31. code_review_graph/eval/runner.py +211 -0
  32. code_review_graph/eval/scorer.py +85 -0
  33. code_review_graph/eval/token_benchmark.py +182 -0
  34. code_review_graph/exports.py +409 -0
  35. code_review_graph/flows.py +698 -0
  36. code_review_graph/graph.py +1427 -0
  37. code_review_graph/graph_diff.py +122 -0
  38. code_review_graph/hints.py +384 -0
  39. code_review_graph/incremental.py +1245 -0
  40. code_review_graph/jedi_resolver.py +303 -0
  41. code_review_graph/main.py +1079 -0
  42. code_review_graph/memory.py +142 -0
  43. code_review_graph/migrations.py +284 -0
  44. code_review_graph/parser.py +6957 -0
  45. code_review_graph/postprocessing.py +134 -0
  46. code_review_graph/prompts.py +159 -0
  47. code_review_graph/refactor.py +852 -0
  48. code_review_graph/registry.py +319 -0
  49. code_review_graph/rescript_resolver.py +206 -0
  50. code_review_graph/search.py +447 -0
  51. code_review_graph/skills.py +1481 -0
  52. code_review_graph/spring_resolver.py +200 -0
  53. code_review_graph/temporal_resolver.py +199 -0
  54. code_review_graph/token_benchmark.py +125 -0
  55. code_review_graph/tools/__init__.py +156 -0
  56. code_review_graph/tools/_common.py +176 -0
  57. code_review_graph/tools/analysis_tools.py +184 -0
  58. code_review_graph/tools/build.py +541 -0
  59. code_review_graph/tools/community_tools.py +246 -0
  60. code_review_graph/tools/context.py +152 -0
  61. code_review_graph/tools/docs.py +274 -0
  62. code_review_graph/tools/flows_tools.py +176 -0
  63. code_review_graph/tools/query.py +692 -0
  64. code_review_graph/tools/refactor_tools.py +168 -0
  65. code_review_graph/tools/registry_tools.py +125 -0
  66. code_review_graph/tools/review.py +477 -0
  67. code_review_graph/tsconfig_resolver.py +257 -0
  68. code_review_graph/visualization.py +2184 -0
  69. code_review_graph/wiki.py +305 -0
  70. code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
  71. code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
  72. code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
  73. code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
  74. code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,20 @@
1
+ """Code Review Graph - MCP server for persistent incremental code knowledge graphs."""
2
+
3
+ from .context_savings import (
4
+ attach_context_savings,
5
+ estimate_context_savings,
6
+ estimate_file_tokens,
7
+ estimate_tokens,
8
+ format_context_savings,
9
+ )
10
+
11
+ __version__ = "2.3.6"
12
+
13
+ __all__ = [
14
+ "__version__",
15
+ "attach_context_savings",
16
+ "estimate_context_savings",
17
+ "estimate_file_tokens",
18
+ "estimate_tokens",
19
+ "format_context_savings",
20
+ ]
@@ -0,0 +1,4 @@
1
+ """Allow running as: python -m code_review_graph"""
2
+ from .cli import main
3
+
4
+ main()
@@ -0,0 +1,410 @@
1
+ """Graph analysis: hub detection, bridge nodes, knowledge gaps,
2
+ surprise scoring, suggested questions."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import logging
7
+ from collections import Counter, defaultdict
8
+
9
+ from .graph import GraphStore, _sanitize_name
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def find_hub_nodes(store: GraphStore, top_n: int = 10) -> list[dict]:
15
+ """Find the most connected nodes (highest in+out degree), excluding File nodes.
16
+
17
+ Returns list of dicts with: name, qualified_name, kind, file,
18
+ in_degree, out_degree, total_degree, community_id
19
+ """
20
+ # Build degree counts from all edges
21
+ edges = store.get_all_edges()
22
+ in_degree: dict[str, int] = Counter()
23
+ out_degree: dict[str, int] = Counter()
24
+ for e in edges:
25
+ out_degree[e.source_qualified] += 1
26
+ in_degree[e.target_qualified] += 1
27
+
28
+ # Get all non-File nodes
29
+ nodes = store.get_all_nodes(exclude_files=True)
30
+ community_map = store.get_all_community_ids()
31
+
32
+ scored = []
33
+ for n in nodes:
34
+ qn = n.qualified_name
35
+ ind = in_degree.get(qn, 0)
36
+ outd = out_degree.get(qn, 0)
37
+ total = ind + outd
38
+ if total == 0:
39
+ continue
40
+ scored.append({
41
+ "name": _sanitize_name(n.name),
42
+ "qualified_name": n.qualified_name,
43
+ "kind": n.kind,
44
+ "file": n.file_path,
45
+ "in_degree": ind,
46
+ "out_degree": outd,
47
+ "total_degree": total,
48
+ "community_id": community_map.get(qn),
49
+ })
50
+
51
+ scored.sort(
52
+ key=lambda x: x.get("total_degree", 0), # type: ignore[arg-type,return-value]
53
+ reverse=True,
54
+ )
55
+ return scored[:top_n]
56
+
57
+
58
+ def find_bridge_nodes(
59
+ store: GraphStore, top_n: int = 10
60
+ ) -> list[dict]:
61
+ """Find nodes with highest betweenness centrality.
62
+
63
+ These are architectural chokepoints that sit on shortest paths
64
+ between many node pairs. If they break, multiple communities
65
+ lose connectivity.
66
+
67
+ Returns list of dicts with: name, qualified_name, kind, file,
68
+ betweenness, community_id
69
+ """
70
+ import networkx as nx
71
+
72
+ # Build the graph — use cached version if available
73
+ nxg = store._build_networkx_graph()
74
+
75
+ # Compute betweenness centrality (approximate for large graphs)
76
+ n_nodes = nxg.number_of_nodes()
77
+ if n_nodes > 5000:
78
+ # Sample-based approximation for large graphs
79
+ k = min(500, n_nodes)
80
+ bc = nx.betweenness_centrality(nxg, k=k, normalized=True)
81
+ elif n_nodes > 0:
82
+ bc = nx.betweenness_centrality(nxg, normalized=True)
83
+ else:
84
+ return []
85
+
86
+ community_map = store.get_all_community_ids()
87
+ node_map = {
88
+ n.qualified_name: n
89
+ for n in store.get_all_nodes(exclude_files=True)
90
+ }
91
+
92
+ results = []
93
+ for qn, score in bc.items():
94
+ if score <= 0 or qn not in node_map:
95
+ continue
96
+ n = node_map[qn]
97
+ if n.kind == "File":
98
+ continue
99
+ results.append({
100
+ "name": _sanitize_name(n.name),
101
+ "qualified_name": n.qualified_name,
102
+ "kind": n.kind,
103
+ "file": n.file_path,
104
+ "betweenness": round(score, 6),
105
+ "community_id": community_map.get(qn),
106
+ })
107
+
108
+ results.sort(
109
+ key=lambda x: float(x.get("betweenness", 0)), # type: ignore[arg-type,return-value]
110
+ reverse=True,
111
+ )
112
+ return results[:top_n]
113
+
114
+
115
+ def find_knowledge_gaps(store: GraphStore) -> dict[str, list[dict]]:
116
+ """Identify structural weaknesses in the codebase graph.
117
+
118
+ Returns dict with categories:
119
+ - isolated_nodes: degree <= 1, disconnected from graph
120
+ - thin_communities: fewer than 3 members
121
+ - untested_hotspots: high-degree nodes with no TESTED_BY edges
122
+ - single_file_communities: entire community in one file
123
+ """
124
+ edges = store.get_all_edges()
125
+ nodes = store.get_all_nodes(exclude_files=True)
126
+ community_map = store.get_all_community_ids()
127
+
128
+ # Build degree map
129
+ degree: dict[str, int] = Counter()
130
+ tested_nodes: set[str] = set()
131
+ for e in edges:
132
+ degree[e.source_qualified] += 1
133
+ degree[e.target_qualified] += 1
134
+ if e.kind == "TESTED_BY":
135
+ tested_nodes.add(e.source_qualified)
136
+
137
+ # 1. Isolated nodes (degree <= 1, not File)
138
+ isolated = []
139
+ for n in nodes:
140
+ d = degree.get(n.qualified_name, 0)
141
+ if d <= 1:
142
+ isolated.append({
143
+ "name": _sanitize_name(n.name),
144
+ "qualified_name": n.qualified_name,
145
+ "kind": n.kind,
146
+ "file": n.file_path,
147
+ "degree": d,
148
+ })
149
+
150
+ # 2. Build community sizes and file maps from node data
151
+ comm_sizes: Counter[int] = Counter()
152
+ comm_files: dict[int, set[str]] = defaultdict(set)
153
+ for n in nodes:
154
+ cid = community_map.get(n.qualified_name)
155
+ if cid is not None:
156
+ comm_sizes[cid] += 1
157
+ comm_files[cid].add(n.file_path)
158
+
159
+ # Thin communities (< 3 members)
160
+ communities = store.get_communities_list()
161
+ thin = []
162
+ for c in communities:
163
+ cid = int(c["id"])
164
+ size = comm_sizes.get(cid, 0)
165
+ if size < 3:
166
+ thin.append({
167
+ "community_id": cid,
168
+ "name": str(c["name"]),
169
+ "size": size,
170
+ })
171
+
172
+ # 3. Untested hotspots (degree >= 5, no TESTED_BY)
173
+ untested_hotspots = []
174
+ for n in nodes:
175
+ d = degree.get(n.qualified_name, 0)
176
+ if (d >= 5
177
+ and n.qualified_name not in tested_nodes
178
+ and not n.is_test):
179
+ untested_hotspots.append({
180
+ "name": _sanitize_name(n.name),
181
+ "qualified_name": n.qualified_name,
182
+ "kind": n.kind,
183
+ "file": n.file_path,
184
+ "degree": d,
185
+ })
186
+ untested_hotspots.sort(
187
+ key=lambda x: x.get("degree", 0), # type: ignore[arg-type,return-value]
188
+ reverse=True,
189
+ )
190
+
191
+ # 4. Single-file communities
192
+ single_file = []
193
+ for c in communities:
194
+ cid = int(c["id"])
195
+ files = comm_files.get(cid, set())
196
+ size = comm_sizes.get(cid, 0)
197
+ if len(files) == 1 and size >= 3:
198
+ single_file.append({
199
+ "community_id": cid,
200
+ "name": str(c["name"]),
201
+ "size": size,
202
+ "file": next(iter(files)),
203
+ })
204
+
205
+ return {
206
+ "isolated_nodes": isolated[:50],
207
+ "thin_communities": thin,
208
+ "untested_hotspots": untested_hotspots[:20],
209
+ "single_file_communities": single_file,
210
+ }
211
+
212
+
213
+ def find_surprising_connections(
214
+ store: GraphStore, top_n: int = 15
215
+ ) -> list[dict]:
216
+ """Find edges with high surprise scores.
217
+
218
+ Detects unexpected architectural coupling based on:
219
+ - Cross-community: source and target in different communities
220
+ - Cross-language: different file languages
221
+ - Peripheral-to-hub: low-degree node to high-degree node
222
+ - Cross-file-type: test calling production or vice versa
223
+ - Non-standard edge kind for the node types
224
+ """
225
+ edges = store.get_all_edges()
226
+ nodes = store.get_all_nodes(exclude_files=True)
227
+ community_map = store.get_all_community_ids()
228
+
229
+ node_map = {n.qualified_name: n for n in nodes}
230
+
231
+ # Build degree map
232
+ degree: dict[str, int] = Counter()
233
+ for e in edges:
234
+ degree[e.source_qualified] += 1
235
+ degree[e.target_qualified] += 1
236
+
237
+ # Median degree for peripheral detection
238
+ degrees = [d for d in degree.values() if d > 0]
239
+ if not degrees:
240
+ return []
241
+ median_deg = sorted(degrees)[len(degrees) // 2]
242
+ high_deg_threshold = max(median_deg * 3, 10)
243
+
244
+ scored_edges = []
245
+ for e in edges:
246
+ src = node_map.get(e.source_qualified)
247
+ tgt = node_map.get(e.target_qualified)
248
+ if not src or not tgt:
249
+ continue
250
+ if src.kind == "File" or tgt.kind == "File":
251
+ continue
252
+
253
+ score = 0.0
254
+ reasons = []
255
+
256
+ # Cross-community (+0.3)
257
+ src_cid = community_map.get(e.source_qualified)
258
+ tgt_cid = community_map.get(e.target_qualified)
259
+ if (src_cid is not None
260
+ and tgt_cid is not None
261
+ and src_cid != tgt_cid):
262
+ score += 0.3
263
+ reasons.append("cross-community")
264
+
265
+ # Cross-language (+0.2)
266
+ src_lang = (
267
+ src.file_path.rsplit(".", 1)[-1]
268
+ if "." in src.file_path else ""
269
+ )
270
+ tgt_lang = (
271
+ tgt.file_path.rsplit(".", 1)[-1]
272
+ if "." in tgt.file_path else ""
273
+ )
274
+ if src_lang and tgt_lang and src_lang != tgt_lang:
275
+ score += 0.2
276
+ reasons.append("cross-language")
277
+
278
+ # Peripheral-to-hub (+0.2)
279
+ src_deg = degree.get(e.source_qualified, 0)
280
+ tgt_deg = degree.get(e.target_qualified, 0)
281
+ if ((src_deg <= 2 and tgt_deg >= high_deg_threshold)
282
+ or (tgt_deg <= 2
283
+ and src_deg >= high_deg_threshold)):
284
+ score += 0.2
285
+ reasons.append("peripheral-to-hub")
286
+
287
+ # Cross-file-type: test <-> non-test (+0.15)
288
+ if src.is_test != tgt.is_test and e.kind == "CALLS":
289
+ score += 0.15
290
+ reasons.append("cross-test-boundary")
291
+
292
+ # Non-standard edge kind (+0.15)
293
+ if e.kind == "CALLS" and src.kind == "Type":
294
+ score += 0.15
295
+ reasons.append("unusual-edge-kind")
296
+
297
+ if score > 0:
298
+ scored_edges.append({
299
+ "source": _sanitize_name(src.name),
300
+ "source_qualified": e.source_qualified,
301
+ "target": _sanitize_name(tgt.name),
302
+ "target_qualified": e.target_qualified,
303
+ "edge_kind": e.kind,
304
+ "surprise_score": round(score, 2),
305
+ "reasons": reasons,
306
+ "source_community": src_cid,
307
+ "target_community": tgt_cid,
308
+ })
309
+
310
+ scored_edges.sort(
311
+ key=lambda x: float(x.get("surprise_score", 0)), # type: ignore[arg-type,return-value]
312
+ reverse=True,
313
+ )
314
+ return scored_edges[:top_n]
315
+
316
+
317
+ def generate_suggested_questions(
318
+ store: GraphStore,
319
+ ) -> list[dict]:
320
+ """Auto-generate review questions from graph analysis.
321
+
322
+ Categories:
323
+ - bridge_node: Why does X connect communities A and B?
324
+ - isolated_node: Is X dead code or dynamically invoked?
325
+ - low_cohesion: Should community X be split?
326
+ - hub_risk: Does hub node X have adequate test coverage?
327
+ - surprising: Why does A call B across community boundary?
328
+ """
329
+ questions = []
330
+
331
+ # Bridge node questions
332
+ bridges = find_bridge_nodes(store, top_n=3)
333
+ for b in bridges:
334
+ questions.append({
335
+ "category": "bridge_node",
336
+ "question": (
337
+ f"'{b['name']}' is a critical connector "
338
+ f"between multiple code regions. Is it "
339
+ f"adequately tested and documented?"
340
+ ),
341
+ "target": b["qualified_name"],
342
+ "priority": "high",
343
+ })
344
+
345
+ # Hub risk questions
346
+ hubs = find_hub_nodes(store, top_n=3)
347
+ edges = store.get_all_edges()
348
+ tested = {
349
+ e.source_qualified
350
+ for e in edges if e.kind == "TESTED_BY"
351
+ }
352
+ for h in hubs:
353
+ if h["qualified_name"] not in tested:
354
+ questions.append({
355
+ "category": "hub_risk",
356
+ "question": (
357
+ f"Hub node '{h['name']}' has "
358
+ f"{h['total_degree']} connections but no "
359
+ f"direct test coverage. Should it be "
360
+ f"tested?"
361
+ ),
362
+ "target": h["qualified_name"],
363
+ "priority": "high",
364
+ })
365
+
366
+ # Surprising connection questions
367
+ surprises = find_surprising_connections(store, top_n=3)
368
+ for s in surprises:
369
+ if "cross-community" in s["reasons"]:
370
+ questions.append({
371
+ "category": "surprising_connection",
372
+ "question": (
373
+ f"'{s['source']}' (community "
374
+ f"{s['source_community']}) calls "
375
+ f"'{s['target']}' (community "
376
+ f"{s['target_community']}). Is this "
377
+ f"coupling intentional?"
378
+ ),
379
+ "target": s["source_qualified"],
380
+ "priority": "medium",
381
+ })
382
+
383
+ # Knowledge gap questions
384
+ gaps = find_knowledge_gaps(store)
385
+
386
+ for c in gaps["thin_communities"][:2]:
387
+ questions.append({
388
+ "category": "thin_community",
389
+ "question": (
390
+ f"Community '{c['name']}' has only "
391
+ f"{c['size']} member(s). Should it be "
392
+ f"merged with a neighbor?"
393
+ ),
394
+ "target": f"community:{c['community_id']}",
395
+ "priority": "low",
396
+ })
397
+
398
+ for h in gaps["untested_hotspots"][:2]:
399
+ questions.append({
400
+ "category": "untested_hotspot",
401
+ "question": (
402
+ f"'{h['name']}' has {h['degree']} "
403
+ f"connections but no test coverage. "
404
+ f"Is this a risk?"
405
+ ),
406
+ "target": h["qualified_name"],
407
+ "priority": "medium",
408
+ })
409
+
410
+ return questions