code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. code_review_graph/__init__.py +20 -0
  2. code_review_graph/__main__.py +4 -0
  3. code_review_graph/analysis.py +410 -0
  4. code_review_graph/changes.py +409 -0
  5. code_review_graph/cli.py +1255 -0
  6. code_review_graph/communities.py +874 -0
  7. code_review_graph/constants.py +23 -0
  8. code_review_graph/context_savings.py +317 -0
  9. code_review_graph/custom_languages.py +322 -0
  10. code_review_graph/daemon.py +1009 -0
  11. code_review_graph/daemon_cli.py +320 -0
  12. code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
  13. code_review_graph/embeddings.py +1006 -0
  14. code_review_graph/enrich.py +303 -0
  15. code_review_graph/eval/__init__.py +33 -0
  16. code_review_graph/eval/benchmarks/__init__.py +1 -0
  17. code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
  18. code_review_graph/eval/benchmarks/build_performance.py +60 -0
  19. code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
  20. code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
  21. code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
  22. code_review_graph/eval/benchmarks/search_quality.py +59 -0
  23. code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
  24. code_review_graph/eval/configs/code-review-graph.yaml +50 -0
  25. code_review_graph/eval/configs/express.yaml +45 -0
  26. code_review_graph/eval/configs/fastapi.yaml +48 -0
  27. code_review_graph/eval/configs/flask.yaml +50 -0
  28. code_review_graph/eval/configs/gin.yaml +51 -0
  29. code_review_graph/eval/configs/httpx.yaml +48 -0
  30. code_review_graph/eval/reporter.py +301 -0
  31. code_review_graph/eval/runner.py +211 -0
  32. code_review_graph/eval/scorer.py +85 -0
  33. code_review_graph/eval/token_benchmark.py +182 -0
  34. code_review_graph/exports.py +409 -0
  35. code_review_graph/flows.py +698 -0
  36. code_review_graph/graph.py +1427 -0
  37. code_review_graph/graph_diff.py +122 -0
  38. code_review_graph/hints.py +384 -0
  39. code_review_graph/incremental.py +1245 -0
  40. code_review_graph/jedi_resolver.py +303 -0
  41. code_review_graph/main.py +1079 -0
  42. code_review_graph/memory.py +142 -0
  43. code_review_graph/migrations.py +284 -0
  44. code_review_graph/parser.py +6957 -0
  45. code_review_graph/postprocessing.py +134 -0
  46. code_review_graph/prompts.py +159 -0
  47. code_review_graph/refactor.py +852 -0
  48. code_review_graph/registry.py +319 -0
  49. code_review_graph/rescript_resolver.py +206 -0
  50. code_review_graph/search.py +447 -0
  51. code_review_graph/skills.py +1481 -0
  52. code_review_graph/spring_resolver.py +200 -0
  53. code_review_graph/temporal_resolver.py +199 -0
  54. code_review_graph/token_benchmark.py +125 -0
  55. code_review_graph/tools/__init__.py +156 -0
  56. code_review_graph/tools/_common.py +176 -0
  57. code_review_graph/tools/analysis_tools.py +184 -0
  58. code_review_graph/tools/build.py +541 -0
  59. code_review_graph/tools/community_tools.py +246 -0
  60. code_review_graph/tools/context.py +152 -0
  61. code_review_graph/tools/docs.py +274 -0
  62. code_review_graph/tools/flows_tools.py +176 -0
  63. code_review_graph/tools/query.py +692 -0
  64. code_review_graph/tools/refactor_tools.py +168 -0
  65. code_review_graph/tools/registry_tools.py +125 -0
  66. code_review_graph/tools/review.py +477 -0
  67. code_review_graph/tsconfig_resolver.py +257 -0
  68. code_review_graph/visualization.py +2184 -0
  69. code_review_graph/wiki.py +305 -0
  70. code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
  71. code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
  72. code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
  73. code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
  74. code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,698 @@
1
+ """Execution flow detection, tracing, and criticality scoring.
2
+
3
+ Detects entry points in the codebase (functions with no incoming CALLS edges,
4
+ framework-decorated handlers, and conventional name patterns), traces execution
5
+ paths via forward BFS through CALLS edges, scores each flow for criticality,
6
+ and persists results to the ``flows`` / ``flow_memberships`` tables.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import logging
13
+ import re
14
+ from collections import deque
15
+ from typing import Optional
16
+
17
+ from .constants import SECURITY_KEYWORDS as _SECURITY_KEYWORDS
18
+ from .graph import FlowAdjacency, GraphNode, GraphStore, _sanitize_name
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Constants
24
+ # ---------------------------------------------------------------------------
25
+
26
+ # Decorator patterns that indicate a function is a framework entry point.
27
+ _FRAMEWORK_DECORATOR_PATTERNS: list[re.Pattern[str]] = [
28
+ # Python web frameworks
29
+ re.compile(r"app\.(get|post|put|delete|patch|route|websocket|on_event)", re.IGNORECASE),
30
+ re.compile(r"router\.(get|post|put|delete|patch|route)", re.IGNORECASE),
31
+ re.compile(r"blueprint\.(route|before_request|after_request)", re.IGNORECASE),
32
+ re.compile(r"(before|after)_(request|response)", re.IGNORECASE),
33
+ # CLI frameworks
34
+ re.compile(r"click\.(command|group)", re.IGNORECASE),
35
+ re.compile(r"\w+\.(command|group)\b", re.IGNORECASE), # Click subgroups: @mygroup.command()
36
+ # Pydantic validators/serializers
37
+ re.compile(r"(field|model)_(serializer|validator)", re.IGNORECASE),
38
+ # Task queues
39
+ re.compile(r"(celery\.)?(task|shared_task|periodic_task)", re.IGNORECASE),
40
+ # Django
41
+ re.compile(r"receiver", re.IGNORECASE),
42
+ re.compile(r"api_view", re.IGNORECASE),
43
+ re.compile(r"\baction\b", re.IGNORECASE),
44
+ # Testing
45
+ re.compile(r"pytest\.(fixture|mark)"),
46
+ re.compile(r"(override_settings|modify_settings)", re.IGNORECASE),
47
+ # SQLAlchemy / event systems
48
+ re.compile(r"(event\.)?listens_for", re.IGNORECASE),
49
+ # Java Spring
50
+ re.compile(r"(Get|Post|Put|Delete|Patch|RequestMapping)Mapping", re.IGNORECASE),
51
+ re.compile(r"(Scheduled|EventListener|Bean|Configuration)", re.IGNORECASE),
52
+ # JS/TS frameworks
53
+ re.compile(r"(Component|Injectable|Controller|Module|Guard|Pipe)", re.IGNORECASE),
54
+ re.compile(r"(Subscribe|Mutation|Query|Resolver)", re.IGNORECASE),
55
+ # Express / Koa / Hono route handlers
56
+ re.compile(r"(app|router)\.(get|post|put|delete|patch|use|all)\b"),
57
+ # Android lifecycle
58
+ re.compile(r"@(Override|OnLifecycleEvent|Composable)", re.IGNORECASE),
59
+ # Kotlin coroutines / Android ViewModel
60
+ re.compile(r"(HiltViewModel|AndroidEntryPoint|Inject)", re.IGNORECASE),
61
+ # AI/agent frameworks (pydantic-ai, langchain, etc.)
62
+ re.compile(r"\w+\.(tool|tool_plain|system_prompt|result_validator)\b", re.IGNORECASE),
63
+ re.compile(r"^tool\b"), # bare @tool (LangChain, etc.)
64
+ # Middleware and exception handlers (Starlette, FastAPI, Sanic)
65
+ re.compile(r"\w+\.(middleware|exception_handler|on_exception)\b", re.IGNORECASE),
66
+ # Generic route decorator (Flask blueprints: @bp.route, @auth_bp.route, etc.)
67
+ re.compile(r"\w+\.route\b", re.IGNORECASE),
68
+ ]
69
+
70
+ # Name patterns that indicate conventional entry points.
71
+ _ENTRY_NAME_PATTERNS: list[re.Pattern[str]] = [
72
+ re.compile(r"^main$"),
73
+ re.compile(r"^__main__$"),
74
+ re.compile(r"^test_"),
75
+ re.compile(r"^Test[A-Z]"),
76
+ re.compile(r"^on_"),
77
+ re.compile(r"^handle_"),
78
+ # Lambda / serverless handler functions (wired via config, not code calls)
79
+ re.compile(r"^handler$"),
80
+ re.compile(r"^handle$"),
81
+ re.compile(r"^lambda_handler$"),
82
+ # Alembic migration entry points
83
+ re.compile(r"^upgrade$"),
84
+ re.compile(r"^downgrade$"),
85
+ # FastAPI lifecycle / dependency injection
86
+ re.compile(r"^lifespan$"),
87
+ re.compile(r"^get_db$"),
88
+ # Android Activity/Fragment lifecycle
89
+ re.compile(r"^on(Create|Start|Resume|Pause|Stop|Destroy|Bind|Receive)"),
90
+ # Servlet / JAX-RS
91
+ re.compile(r"^do(Get|Post|Put|Delete)$"),
92
+ # Python BaseHTTPRequestHandler
93
+ re.compile(r"^do_(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)$"),
94
+ re.compile(r"^log_message$"),
95
+ # Express middleware signature
96
+ re.compile(r"^(middleware|errorHandler)$"),
97
+ # Angular lifecycle hooks
98
+ re.compile(
99
+ r"^ng(OnInit|OnChanges|OnDestroy|DoCheck"
100
+ r"|AfterContentInit|AfterContentChecked|AfterViewInit|AfterViewChecked)$"
101
+ ),
102
+ # Angular Pipe / ControlValueAccessor / Guards / Resolvers
103
+ re.compile(r"^(transform|writeValue|registerOnChange|registerOnTouched|setDisabledState)$"),
104
+ re.compile(r"^(canActivate|canDeactivate|canActivateChild|canLoad|canMatch|resolve)$"),
105
+ # React class component lifecycle
106
+ re.compile(
107
+ r"^(componentDidMount|componentDidUpdate|componentWillUnmount"
108
+ r"|shouldComponentUpdate|render)$"
109
+ ),
110
+ ]
111
+
112
+
113
+ # ---------------------------------------------------------------------------
114
+ # Entry-point detection
115
+ # ---------------------------------------------------------------------------
116
+
117
+
118
+ def _has_framework_decorator(node: GraphNode) -> bool:
119
+ """Return True if *node* has a decorator matching a framework pattern."""
120
+ decorators = node.extra.get("decorators")
121
+ if not decorators:
122
+ return False
123
+ if isinstance(decorators, str):
124
+ decorators = [decorators]
125
+ for dec in decorators:
126
+ for pat in _FRAMEWORK_DECORATOR_PATTERNS:
127
+ if pat.search(dec):
128
+ return True
129
+ return False
130
+
131
+
132
+ def _matches_entry_name(node: GraphNode) -> bool:
133
+ """Return True if *node*'s name matches a conventional entry-point pattern."""
134
+ for pat in _ENTRY_NAME_PATTERNS:
135
+ if pat.search(node.name):
136
+ return True
137
+ return False
138
+
139
+
140
+ _TEST_FILE_RE = re.compile(
141
+ r"([\\/]__tests__[\\/]|\.spec\.[jt]sx?$|\.test\.[jt]sx?$|[\\/]test_[^/\\]*\.py$)",
142
+ )
143
+
144
+
145
+ def _is_test_file(file_path: str) -> bool:
146
+ """Return True if *file_path* looks like a test file."""
147
+ return bool(_TEST_FILE_RE.search(file_path))
148
+
149
+
150
+ def detect_entry_points(
151
+ store: GraphStore,
152
+ include_tests: bool = False,
153
+ ) -> list[GraphNode]:
154
+ """Find functions that are entry points in the graph.
155
+
156
+ An entry point is a Function/Test node that either:
157
+ 1. Has no incoming CALLS edges (true root), or
158
+ 2. Has a framework decorator (e.g. ``@app.get``), or
159
+ 3. Matches a conventional name pattern (``main``, ``test_*``, etc.).
160
+
161
+ When *include_tests* is False (the default), Test nodes are excluded so
162
+ that flow analysis focuses on production entry points.
163
+ """
164
+ # Build a set of all qualified names that are CALLS targets. Exclude
165
+ # edges sourced at File nodes so that script-/notebook-/top-level-only
166
+ # callees (e.g. ``run_job()`` invoked from module scope, a top-level
167
+ # ``<App />`` render) remain detectable as entry points.
168
+ called_qnames = store.get_all_call_targets(include_file_sources=False)
169
+
170
+ # Scan all nodes for entry-point candidates.
171
+ candidate_nodes = store.get_nodes_by_kind(["Function", "Test"])
172
+
173
+ entry_points: list[GraphNode] = []
174
+ seen_qn: set[str] = set()
175
+
176
+ for node in candidate_nodes:
177
+ if not include_tests and (node.is_test or _is_test_file(node.file_path)):
178
+ continue
179
+
180
+ is_entry = False
181
+
182
+ # True root: no one calls this function.
183
+ if node.qualified_name not in called_qnames:
184
+ is_entry = True
185
+
186
+ # Framework decorator match.
187
+ if _has_framework_decorator(node):
188
+ is_entry = True
189
+
190
+ # Conventional name match.
191
+ if _matches_entry_name(node):
192
+ is_entry = True
193
+
194
+ if is_entry and node.qualified_name not in seen_qn:
195
+ entry_points.append(node)
196
+ seen_qn.add(node.qualified_name)
197
+
198
+ return entry_points
199
+
200
+
201
+ # ---------------------------------------------------------------------------
202
+ # Flow tracing (BFS)
203
+ # ---------------------------------------------------------------------------
204
+
205
+
206
+ def _trace_single_flow(
207
+ adj: FlowAdjacency,
208
+ ep: GraphNode,
209
+ max_depth: int = 15,
210
+ ) -> Optional[dict]:
211
+ """Trace a single execution flow from *ep* via forward BFS.
212
+
213
+ Returns a flow dict (see :func:`trace_flows` for the schema) or ``None``
214
+ if the flow is trivial (single-node, no outgoing CALLS that resolve).
215
+ """
216
+ path_ids: list[int] = [ep.id]
217
+ path_qnames: list[str] = [ep.qualified_name]
218
+ visited: set[str] = {ep.qualified_name}
219
+ queue: deque[tuple[str, int]] = deque([(ep.qualified_name, 0)])
220
+
221
+ actual_depth = 0
222
+ nodes_by_qn = adj.nodes_by_qn
223
+ calls_out = adj.calls_out
224
+
225
+ while queue:
226
+ current_qn, depth = queue.popleft()
227
+ if depth > actual_depth:
228
+ actual_depth = depth
229
+ if depth >= max_depth:
230
+ continue
231
+
232
+ for target_qn in calls_out.get(current_qn, ()):
233
+ if target_qn in visited:
234
+ continue
235
+ target_node = nodes_by_qn.get(target_qn)
236
+ if target_node is None:
237
+ continue
238
+ visited.add(target_qn)
239
+ path_ids.append(target_node.id)
240
+ path_qnames.append(target_qn)
241
+ queue.append((target_qn, depth + 1))
242
+
243
+ # Skip trivial single-node flows.
244
+ if len(path_ids) < 2:
245
+ return None
246
+
247
+ files = list({
248
+ n.file_path
249
+ for qn in path_qnames
250
+ if (n := nodes_by_qn.get(qn)) is not None
251
+ })
252
+
253
+ flow: dict = {
254
+ "name": _sanitize_name(ep.name),
255
+ "entry_point": ep.qualified_name,
256
+ "entry_point_id": ep.id,
257
+ "path": path_ids,
258
+ "depth": actual_depth,
259
+ "node_count": len(path_ids),
260
+ "file_count": len(files),
261
+ "files": files,
262
+ "criticality": 0.0,
263
+ }
264
+ flow["criticality"] = compute_criticality(flow, adj)
265
+ return flow
266
+
267
+
268
+ def trace_flows(
269
+ store: GraphStore,
270
+ max_depth: int = 15,
271
+ include_tests: bool = False,
272
+ ) -> list[dict]:
273
+ """Trace execution flows from every entry point via forward BFS.
274
+
275
+ Returns a list of flow dicts, each containing:
276
+ - name: human-readable flow name (entry point name)
277
+ - entry_point: qualified name of the entry point
278
+ - entry_point_id: node database id of the entry point
279
+ - path: ordered list of node IDs in the flow
280
+ - depth: maximum BFS depth reached
281
+ - node_count: number of distinct nodes in the path
282
+ - file_count: number of distinct files touched
283
+ - files: list of distinct file paths
284
+ - criticality: computed criticality score (0.0-1.0)
285
+ """
286
+ entry_points = detect_entry_points(store, include_tests=include_tests)
287
+ if not entry_points:
288
+ return []
289
+
290
+ adj = store.load_flow_adjacency()
291
+ flows: list[dict] = []
292
+
293
+ for ep in entry_points:
294
+ flow = _trace_single_flow(adj, ep, max_depth)
295
+ if flow is not None:
296
+ flows.append(flow)
297
+
298
+ # Sort by criticality descending.
299
+ flows.sort(key=lambda f: f["criticality"], reverse=True)
300
+ return flows
301
+
302
+
303
+ # ---------------------------------------------------------------------------
304
+ # Criticality scoring
305
+ # ---------------------------------------------------------------------------
306
+
307
+
308
+ def compute_criticality(flow: dict, adj: FlowAdjacency) -> float:
309
+ """Score a flow from 0.0 to 1.0 based on multiple weighted factors.
310
+
311
+ Weights:
312
+ - File spread: 0.30
313
+ - External calls: 0.20
314
+ - Security sensitivity: 0.25
315
+ - Test coverage gap: 0.15
316
+ - Depth: 0.10
317
+ """
318
+ node_ids: list[int] = flow.get("path", [])
319
+ if not node_ids:
320
+ return 0.0
321
+
322
+ nodes_by_id = adj.nodes_by_id
323
+ nodes_by_qn = adj.nodes_by_qn
324
+ calls_out = adj.calls_out
325
+ has_tested_by = adj.has_tested_by
326
+
327
+ nodes: list[GraphNode] = [
328
+ n for nid in node_ids if (n := nodes_by_id.get(nid)) is not None
329
+ ]
330
+ if not nodes:
331
+ return 0.0
332
+
333
+ # --- File spread (0.0 - 1.0) ---
334
+ file_count = len({n.file_path for n in nodes})
335
+ # Normalize: 1 file => 0.0, 5+ files => 1.0
336
+ file_spread = min((file_count - 1) / 4.0, 1.0) if file_count > 1 else 0.0
337
+
338
+ # --- External calls (0.0 - 1.0) ---
339
+ # Calls that target nodes NOT in the graph are considered external.
340
+ external_count = 0
341
+ for n in nodes:
342
+ for target_qn in calls_out.get(n.qualified_name, ()):
343
+ if target_qn not in nodes_by_qn:
344
+ external_count += 1
345
+ # Normalize: 0 => 0.0, 5+ => 1.0
346
+ external_score = min(external_count / 5.0, 1.0)
347
+
348
+ # --- Security sensitivity (0.0 - 1.0) ---
349
+ security_hits = 0
350
+ for n in nodes:
351
+ name_lower = n.name.lower()
352
+ qn_lower = n.qualified_name.lower()
353
+ for kw in _SECURITY_KEYWORDS:
354
+ if kw in name_lower or kw in qn_lower:
355
+ security_hits += 1
356
+ break # Count each node at most once.
357
+ security_score = min(security_hits / max(len(nodes), 1), 1.0)
358
+
359
+ # --- Test coverage gap (0.0 - 1.0) ---
360
+ tested_count = sum(1 for n in nodes if n.qualified_name in has_tested_by)
361
+ coverage = tested_count / max(len(nodes), 1)
362
+ test_gap = 1.0 - coverage
363
+
364
+ # --- Depth (0.0 - 1.0) ---
365
+ depth = flow.get("depth", 0)
366
+ # Normalize: 0 => 0.0, 10+ => 1.0
367
+ depth_score = min(depth / 10.0, 1.0)
368
+
369
+ # --- Weighted sum ---
370
+ criticality = (
371
+ file_spread * 0.30
372
+ + external_score * 0.20
373
+ + security_score * 0.25
374
+ + test_gap * 0.15
375
+ + depth_score * 0.10
376
+ )
377
+ return round(min(max(criticality, 0.0), 1.0), 4)
378
+
379
+
380
+ # ---------------------------------------------------------------------------
381
+ # Persistence
382
+ # ---------------------------------------------------------------------------
383
+
384
+
385
+ def store_flows(store: GraphStore, flows: list[dict]) -> int:
386
+ """Clear existing flows and persist new ones.
387
+
388
+ Returns the number of flows stored.
389
+ """
390
+ # NOTE: store_flows uses _conn directly because it performs
391
+ # multi-statement batch writes (DELETE + INSERT loop) that are
392
+ # tightly coupled to the DB transaction lifecycle.
393
+ conn = store._conn
394
+
395
+ if conn.in_transaction:
396
+ logger.warning("Rolling back uncommitted transaction before BEGIN IMMEDIATE")
397
+ conn.rollback()
398
+ # Wrap the full DELETE + INSERT sequence in an explicit transaction
399
+ # so partial writes cannot occur if an exception interrupts the loop.
400
+ conn.execute("BEGIN IMMEDIATE")
401
+ try:
402
+ conn.execute("DELETE FROM flow_memberships")
403
+ conn.execute("DELETE FROM flows")
404
+
405
+ count = 0
406
+ for flow in flows:
407
+ path_json = json.dumps(flow.get("path", []))
408
+ conn.execute(
409
+ """INSERT INTO flows
410
+ (name, entry_point_id, depth, node_count, file_count,
411
+ criticality, path_json)
412
+ VALUES (?, ?, ?, ?, ?, ?, ?)""",
413
+ (
414
+ flow["name"],
415
+ flow["entry_point_id"],
416
+ flow["depth"],
417
+ flow["node_count"],
418
+ flow["file_count"],
419
+ flow["criticality"],
420
+ path_json,
421
+ ),
422
+ )
423
+ flow_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
424
+
425
+ # Insert memberships.
426
+ node_ids = flow.get("path", [])
427
+ for position, node_id in enumerate(node_ids):
428
+ conn.execute(
429
+ "INSERT OR IGNORE INTO flow_memberships (flow_id, node_id, position) "
430
+ "VALUES (?, ?, ?)",
431
+ (flow_id, node_id, position),
432
+ )
433
+ count += 1
434
+
435
+ conn.commit()
436
+ except BaseException:
437
+ conn.rollback()
438
+ raise
439
+ return count
440
+
441
+
442
+ def incremental_trace_flows(
443
+ store: GraphStore,
444
+ changed_files: list[str],
445
+ max_depth: int = 15,
446
+ ) -> int:
447
+ """Re-trace only flows that touch *changed_files*. Much faster than full trace.
448
+
449
+ 1. Find flow IDs whose memberships reference nodes in *changed_files*.
450
+ 2. Collect the entry-point node IDs of those flows before deleting them.
451
+ 3. Delete only the affected flows and their memberships.
452
+ 4. Re-detect entry points, keeping those in *changed_files* **or** whose
453
+ node ID was an entry point of a deleted flow.
454
+ 5. BFS-trace each relevant entry point via :func:`_trace_single_flow`.
455
+ 6. INSERT the new flows (without clearing unrelated flows).
456
+
457
+ Returns the number of re-traced flows that were stored.
458
+ """
459
+ if not changed_files:
460
+ return 0
461
+
462
+ conn = store._conn
463
+ changed_file_set = set(changed_files)
464
+
465
+ # ------------------------------------------------------------------
466
+ # 1. Find affected flow IDs
467
+ # ------------------------------------------------------------------
468
+ placeholders = ",".join("?" * len(changed_files))
469
+ affected_rows = conn.execute(
470
+ f"SELECT DISTINCT fm.flow_id FROM flow_memberships fm " # nosec B608
471
+ f"JOIN nodes n ON n.id = fm.node_id "
472
+ f"WHERE n.file_path IN ({placeholders})",
473
+ changed_files,
474
+ ).fetchall()
475
+ affected_ids = [r[0] for r in affected_rows]
476
+
477
+ # ------------------------------------------------------------------
478
+ # 2. Collect old entry-point node IDs before deletion
479
+ # ------------------------------------------------------------------
480
+ entry_point_ids: set[int] = set()
481
+ if affected_ids:
482
+ ep_placeholders = ",".join("?" * len(affected_ids))
483
+ ep_rows = conn.execute(
484
+ f"SELECT entry_point_id FROM flows " # nosec B608
485
+ f"WHERE id IN ({ep_placeholders})",
486
+ affected_ids,
487
+ ).fetchall()
488
+ entry_point_ids = {r[0] for r in ep_rows}
489
+
490
+ # ------------------------------------------------------------------
491
+ # 3. Delete affected flows and their memberships
492
+ # ------------------------------------------------------------------
493
+ # Wrap in an explicit transaction so a crash mid-loop cannot leave
494
+ # orphaned flow_memberships rows pointing at deleted flows. See #258.
495
+ if affected_ids:
496
+ if conn.in_transaction:
497
+ conn.commit()
498
+ conn.execute("BEGIN IMMEDIATE")
499
+ try:
500
+ for fid in affected_ids:
501
+ conn.execute(
502
+ "DELETE FROM flow_memberships WHERE flow_id = ?", (fid,),
503
+ )
504
+ conn.execute("DELETE FROM flows WHERE id = ?", (fid,))
505
+ conn.commit()
506
+ except BaseException:
507
+ conn.rollback()
508
+ raise
509
+
510
+ # ------------------------------------------------------------------
511
+ # 4. Re-detect entry points and filter to relevant ones
512
+ # ------------------------------------------------------------------
513
+ entry_points = detect_entry_points(store)
514
+ relevant_eps = [
515
+ ep for ep in entry_points
516
+ if ep.file_path in changed_file_set or ep.id in entry_point_ids
517
+ ]
518
+
519
+ # ------------------------------------------------------------------
520
+ # 5. BFS-trace each relevant entry point
521
+ # ------------------------------------------------------------------
522
+ new_flows: list[dict] = []
523
+ if relevant_eps:
524
+ adj = store.load_flow_adjacency()
525
+ for ep in relevant_eps:
526
+ flow = _trace_single_flow(adj, ep, max_depth)
527
+ if flow is not None:
528
+ new_flows.append(flow)
529
+
530
+ # ------------------------------------------------------------------
531
+ # 6. INSERT new flows without clearing unrelated ones
532
+ # ------------------------------------------------------------------
533
+ count = 0
534
+ for flow in new_flows:
535
+ path_json = json.dumps(flow.get("path", []))
536
+ conn.execute(
537
+ """INSERT INTO flows
538
+ (name, entry_point_id, depth, node_count, file_count,
539
+ criticality, path_json)
540
+ VALUES (?, ?, ?, ?, ?, ?, ?)""",
541
+ (
542
+ flow["name"],
543
+ flow["entry_point_id"],
544
+ flow["depth"],
545
+ flow["node_count"],
546
+ flow["file_count"],
547
+ flow["criticality"],
548
+ path_json,
549
+ ),
550
+ )
551
+ flow_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
552
+
553
+ node_ids = flow.get("path", [])
554
+ for position, node_id in enumerate(node_ids):
555
+ conn.execute(
556
+ "INSERT OR IGNORE INTO flow_memberships (flow_id, node_id, position) "
557
+ "VALUES (?, ?, ?)",
558
+ (flow_id, node_id, position),
559
+ )
560
+ count += 1
561
+
562
+ conn.commit()
563
+ return count
564
+
565
+
566
+ # ---------------------------------------------------------------------------
567
+ # Query helpers
568
+ # ---------------------------------------------------------------------------
569
+
570
+
571
+ def get_flows(
572
+ store: GraphStore,
573
+ sort_by: str = "criticality",
574
+ limit: int = 50,
575
+ ) -> list[dict]:
576
+ """Retrieve stored flows from the database.
577
+
578
+ Args:
579
+ store: The graph store.
580
+ sort_by: Column to sort by (``criticality``, ``depth``, ``node_count``).
581
+ limit: Maximum number of flows to return.
582
+ """
583
+ allowed_sort = {"criticality", "depth", "node_count", "file_count", "name"}
584
+ if sort_by not in allowed_sort:
585
+ sort_by = "criticality"
586
+
587
+ order = "DESC" if sort_by in ("criticality", "depth", "node_count", "file_count") else "ASC"
588
+
589
+ # NOTE: get_flows reads from the flows table which is managed by
590
+ # the flows module; _conn access is documented coupling.
591
+ rows = store._conn.execute(
592
+ f"SELECT * FROM flows ORDER BY {sort_by} {order} LIMIT ?", # nosec B608
593
+ (limit,),
594
+ ).fetchall()
595
+
596
+ results: list[dict] = []
597
+ for row in rows:
598
+ results.append({
599
+ "id": row["id"],
600
+ "name": _sanitize_name(row["name"]),
601
+ "entry_point_id": row["entry_point_id"],
602
+ "depth": row["depth"],
603
+ "node_count": row["node_count"],
604
+ "file_count": row["file_count"],
605
+ "criticality": row["criticality"],
606
+ "path": json.loads(row["path_json"]),
607
+ "created_at": row["created_at"],
608
+ "updated_at": row["updated_at"],
609
+ })
610
+ return results
611
+
612
+
613
+ def get_flow_by_id(store: GraphStore, flow_id: int) -> Optional[dict]:
614
+ """Retrieve a single flow with full path details.
615
+
616
+ Returns a dict with the flow metadata plus a ``steps`` list containing
617
+ each node's name, kind, file, and line info.
618
+ """
619
+ # NOTE: get_flow_by_id reads from the flows table; see store_flows note.
620
+ row = store._conn.execute(
621
+ "SELECT * FROM flows WHERE id = ?", (flow_id,)
622
+ ).fetchone()
623
+ if row is None:
624
+ return None
625
+
626
+ path_ids: list[int] = json.loads(row["path_json"])
627
+
628
+ # Build detailed step info.
629
+ steps: list[dict] = []
630
+ for nid in path_ids:
631
+ node = store.get_node_by_id(nid)
632
+ if node:
633
+ steps.append({
634
+ "node_id": node.id,
635
+ "name": _sanitize_name(node.name),
636
+ "kind": node.kind,
637
+ "file": node.file_path,
638
+ "line_start": node.line_start,
639
+ "line_end": node.line_end,
640
+ "qualified_name": _sanitize_name(node.qualified_name),
641
+ })
642
+
643
+ return {
644
+ "id": row["id"],
645
+ "name": _sanitize_name(row["name"]),
646
+ "entry_point_id": row["entry_point_id"],
647
+ "depth": row["depth"],
648
+ "node_count": row["node_count"],
649
+ "file_count": row["file_count"],
650
+ "criticality": row["criticality"],
651
+ "path": path_ids,
652
+ "steps": steps,
653
+ "created_at": row["created_at"],
654
+ "updated_at": row["updated_at"],
655
+ }
656
+
657
+
658
+ def get_affected_flows(
659
+ store: GraphStore,
660
+ changed_files: list[str],
661
+ ) -> dict:
662
+ """Find flows that include nodes from the given changed files.
663
+
664
+ Returns::
665
+
666
+ {
667
+ "affected_flows": [<flow dicts>],
668
+ "total": <int>,
669
+ }
670
+ """
671
+ if not changed_files:
672
+ return {"affected_flows": [], "total": 0}
673
+
674
+ # Find node IDs belonging to changed files.
675
+ node_ids = store.get_node_ids_by_files(changed_files)
676
+
677
+ if not node_ids:
678
+ return {"affected_flows": [], "total": 0}
679
+
680
+ # Find flow IDs that contain any of these nodes.
681
+ flow_ids = store.get_flow_ids_by_node_ids(node_ids)
682
+
683
+ if not flow_ids:
684
+ return {"affected_flows": [], "total": 0}
685
+
686
+ affected: list[dict] = []
687
+ for fid in flow_ids:
688
+ flow = get_flow_by_id(store, fid)
689
+ if flow:
690
+ affected.append(flow)
691
+
692
+ # Sort by criticality descending.
693
+ affected.sort(key=lambda f: f.get("criticality", 0), reverse=True)
694
+
695
+ return {
696
+ "affected_flows": affected,
697
+ "total": len(affected),
698
+ }