code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_review_graph/__init__.py +20 -0
- code_review_graph/__main__.py +4 -0
- code_review_graph/analysis.py +410 -0
- code_review_graph/changes.py +409 -0
- code_review_graph/cli.py +1255 -0
- code_review_graph/communities.py +874 -0
- code_review_graph/constants.py +23 -0
- code_review_graph/context_savings.py +317 -0
- code_review_graph/custom_languages.py +322 -0
- code_review_graph/daemon.py +1009 -0
- code_review_graph/daemon_cli.py +320 -0
- code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
- code_review_graph/embeddings.py +1006 -0
- code_review_graph/enrich.py +303 -0
- code_review_graph/eval/__init__.py +33 -0
- code_review_graph/eval/benchmarks/__init__.py +1 -0
- code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
- code_review_graph/eval/benchmarks/build_performance.py +60 -0
- code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
- code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
- code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
- code_review_graph/eval/benchmarks/search_quality.py +59 -0
- code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
- code_review_graph/eval/configs/code-review-graph.yaml +50 -0
- code_review_graph/eval/configs/express.yaml +45 -0
- code_review_graph/eval/configs/fastapi.yaml +48 -0
- code_review_graph/eval/configs/flask.yaml +50 -0
- code_review_graph/eval/configs/gin.yaml +51 -0
- code_review_graph/eval/configs/httpx.yaml +48 -0
- code_review_graph/eval/reporter.py +301 -0
- code_review_graph/eval/runner.py +211 -0
- code_review_graph/eval/scorer.py +85 -0
- code_review_graph/eval/token_benchmark.py +182 -0
- code_review_graph/exports.py +409 -0
- code_review_graph/flows.py +698 -0
- code_review_graph/graph.py +1427 -0
- code_review_graph/graph_diff.py +122 -0
- code_review_graph/hints.py +384 -0
- code_review_graph/incremental.py +1245 -0
- code_review_graph/jedi_resolver.py +303 -0
- code_review_graph/main.py +1079 -0
- code_review_graph/memory.py +142 -0
- code_review_graph/migrations.py +284 -0
- code_review_graph/parser.py +6957 -0
- code_review_graph/postprocessing.py +134 -0
- code_review_graph/prompts.py +159 -0
- code_review_graph/refactor.py +852 -0
- code_review_graph/registry.py +319 -0
- code_review_graph/rescript_resolver.py +206 -0
- code_review_graph/search.py +447 -0
- code_review_graph/skills.py +1481 -0
- code_review_graph/spring_resolver.py +200 -0
- code_review_graph/temporal_resolver.py +199 -0
- code_review_graph/token_benchmark.py +125 -0
- code_review_graph/tools/__init__.py +156 -0
- code_review_graph/tools/_common.py +176 -0
- code_review_graph/tools/analysis_tools.py +184 -0
- code_review_graph/tools/build.py +541 -0
- code_review_graph/tools/community_tools.py +246 -0
- code_review_graph/tools/context.py +152 -0
- code_review_graph/tools/docs.py +274 -0
- code_review_graph/tools/flows_tools.py +176 -0
- code_review_graph/tools/query.py +692 -0
- code_review_graph/tools/refactor_tools.py +168 -0
- code_review_graph/tools/registry_tools.py +125 -0
- code_review_graph/tools/review.py +477 -0
- code_review_graph/tsconfig_resolver.py +257 -0
- code_review_graph/visualization.py +2184 -0
- code_review_graph/wiki.py +305 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,698 @@
|
|
|
1
|
+
"""Execution flow detection, tracing, and criticality scoring.
|
|
2
|
+
|
|
3
|
+
Detects entry points in the codebase (functions with no incoming CALLS edges,
|
|
4
|
+
framework-decorated handlers, and conventional name patterns), traces execution
|
|
5
|
+
paths via forward BFS through CALLS edges, scores each flow for criticality,
|
|
6
|
+
and persists results to the ``flows`` / ``flow_memberships`` tables.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
from collections import deque
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
from .constants import SECURITY_KEYWORDS as _SECURITY_KEYWORDS
|
|
18
|
+
from .graph import FlowAdjacency, GraphNode, GraphStore, _sanitize_name
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Constants
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
# Decorator patterns that indicate a function is a framework entry point.
|
|
27
|
+
_FRAMEWORK_DECORATOR_PATTERNS: list[re.Pattern[str]] = [
|
|
28
|
+
# Python web frameworks
|
|
29
|
+
re.compile(r"app\.(get|post|put|delete|patch|route|websocket|on_event)", re.IGNORECASE),
|
|
30
|
+
re.compile(r"router\.(get|post|put|delete|patch|route)", re.IGNORECASE),
|
|
31
|
+
re.compile(r"blueprint\.(route|before_request|after_request)", re.IGNORECASE),
|
|
32
|
+
re.compile(r"(before|after)_(request|response)", re.IGNORECASE),
|
|
33
|
+
# CLI frameworks
|
|
34
|
+
re.compile(r"click\.(command|group)", re.IGNORECASE),
|
|
35
|
+
re.compile(r"\w+\.(command|group)\b", re.IGNORECASE), # Click subgroups: @mygroup.command()
|
|
36
|
+
# Pydantic validators/serializers
|
|
37
|
+
re.compile(r"(field|model)_(serializer|validator)", re.IGNORECASE),
|
|
38
|
+
# Task queues
|
|
39
|
+
re.compile(r"(celery\.)?(task|shared_task|periodic_task)", re.IGNORECASE),
|
|
40
|
+
# Django
|
|
41
|
+
re.compile(r"receiver", re.IGNORECASE),
|
|
42
|
+
re.compile(r"api_view", re.IGNORECASE),
|
|
43
|
+
re.compile(r"\baction\b", re.IGNORECASE),
|
|
44
|
+
# Testing
|
|
45
|
+
re.compile(r"pytest\.(fixture|mark)"),
|
|
46
|
+
re.compile(r"(override_settings|modify_settings)", re.IGNORECASE),
|
|
47
|
+
# SQLAlchemy / event systems
|
|
48
|
+
re.compile(r"(event\.)?listens_for", re.IGNORECASE),
|
|
49
|
+
# Java Spring
|
|
50
|
+
re.compile(r"(Get|Post|Put|Delete|Patch|RequestMapping)Mapping", re.IGNORECASE),
|
|
51
|
+
re.compile(r"(Scheduled|EventListener|Bean|Configuration)", re.IGNORECASE),
|
|
52
|
+
# JS/TS frameworks
|
|
53
|
+
re.compile(r"(Component|Injectable|Controller|Module|Guard|Pipe)", re.IGNORECASE),
|
|
54
|
+
re.compile(r"(Subscribe|Mutation|Query|Resolver)", re.IGNORECASE),
|
|
55
|
+
# Express / Koa / Hono route handlers
|
|
56
|
+
re.compile(r"(app|router)\.(get|post|put|delete|patch|use|all)\b"),
|
|
57
|
+
# Android lifecycle
|
|
58
|
+
re.compile(r"@(Override|OnLifecycleEvent|Composable)", re.IGNORECASE),
|
|
59
|
+
# Kotlin coroutines / Android ViewModel
|
|
60
|
+
re.compile(r"(HiltViewModel|AndroidEntryPoint|Inject)", re.IGNORECASE),
|
|
61
|
+
# AI/agent frameworks (pydantic-ai, langchain, etc.)
|
|
62
|
+
re.compile(r"\w+\.(tool|tool_plain|system_prompt|result_validator)\b", re.IGNORECASE),
|
|
63
|
+
re.compile(r"^tool\b"), # bare @tool (LangChain, etc.)
|
|
64
|
+
# Middleware and exception handlers (Starlette, FastAPI, Sanic)
|
|
65
|
+
re.compile(r"\w+\.(middleware|exception_handler|on_exception)\b", re.IGNORECASE),
|
|
66
|
+
# Generic route decorator (Flask blueprints: @bp.route, @auth_bp.route, etc.)
|
|
67
|
+
re.compile(r"\w+\.route\b", re.IGNORECASE),
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
# Name patterns that indicate conventional entry points.
|
|
71
|
+
_ENTRY_NAME_PATTERNS: list[re.Pattern[str]] = [
|
|
72
|
+
re.compile(r"^main$"),
|
|
73
|
+
re.compile(r"^__main__$"),
|
|
74
|
+
re.compile(r"^test_"),
|
|
75
|
+
re.compile(r"^Test[A-Z]"),
|
|
76
|
+
re.compile(r"^on_"),
|
|
77
|
+
re.compile(r"^handle_"),
|
|
78
|
+
# Lambda / serverless handler functions (wired via config, not code calls)
|
|
79
|
+
re.compile(r"^handler$"),
|
|
80
|
+
re.compile(r"^handle$"),
|
|
81
|
+
re.compile(r"^lambda_handler$"),
|
|
82
|
+
# Alembic migration entry points
|
|
83
|
+
re.compile(r"^upgrade$"),
|
|
84
|
+
re.compile(r"^downgrade$"),
|
|
85
|
+
# FastAPI lifecycle / dependency injection
|
|
86
|
+
re.compile(r"^lifespan$"),
|
|
87
|
+
re.compile(r"^get_db$"),
|
|
88
|
+
# Android Activity/Fragment lifecycle
|
|
89
|
+
re.compile(r"^on(Create|Start|Resume|Pause|Stop|Destroy|Bind|Receive)"),
|
|
90
|
+
# Servlet / JAX-RS
|
|
91
|
+
re.compile(r"^do(Get|Post|Put|Delete)$"),
|
|
92
|
+
# Python BaseHTTPRequestHandler
|
|
93
|
+
re.compile(r"^do_(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)$"),
|
|
94
|
+
re.compile(r"^log_message$"),
|
|
95
|
+
# Express middleware signature
|
|
96
|
+
re.compile(r"^(middleware|errorHandler)$"),
|
|
97
|
+
# Angular lifecycle hooks
|
|
98
|
+
re.compile(
|
|
99
|
+
r"^ng(OnInit|OnChanges|OnDestroy|DoCheck"
|
|
100
|
+
r"|AfterContentInit|AfterContentChecked|AfterViewInit|AfterViewChecked)$"
|
|
101
|
+
),
|
|
102
|
+
# Angular Pipe / ControlValueAccessor / Guards / Resolvers
|
|
103
|
+
re.compile(r"^(transform|writeValue|registerOnChange|registerOnTouched|setDisabledState)$"),
|
|
104
|
+
re.compile(r"^(canActivate|canDeactivate|canActivateChild|canLoad|canMatch|resolve)$"),
|
|
105
|
+
# React class component lifecycle
|
|
106
|
+
re.compile(
|
|
107
|
+
r"^(componentDidMount|componentDidUpdate|componentWillUnmount"
|
|
108
|
+
r"|shouldComponentUpdate|render)$"
|
|
109
|
+
),
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# ---------------------------------------------------------------------------
|
|
114
|
+
# Entry-point detection
|
|
115
|
+
# ---------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _has_framework_decorator(node: GraphNode) -> bool:
|
|
119
|
+
"""Return True if *node* has a decorator matching a framework pattern."""
|
|
120
|
+
decorators = node.extra.get("decorators")
|
|
121
|
+
if not decorators:
|
|
122
|
+
return False
|
|
123
|
+
if isinstance(decorators, str):
|
|
124
|
+
decorators = [decorators]
|
|
125
|
+
for dec in decorators:
|
|
126
|
+
for pat in _FRAMEWORK_DECORATOR_PATTERNS:
|
|
127
|
+
if pat.search(dec):
|
|
128
|
+
return True
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _matches_entry_name(node: GraphNode) -> bool:
|
|
133
|
+
"""Return True if *node*'s name matches a conventional entry-point pattern."""
|
|
134
|
+
for pat in _ENTRY_NAME_PATTERNS:
|
|
135
|
+
if pat.search(node.name):
|
|
136
|
+
return True
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
_TEST_FILE_RE = re.compile(
|
|
141
|
+
r"([\\/]__tests__[\\/]|\.spec\.[jt]sx?$|\.test\.[jt]sx?$|[\\/]test_[^/\\]*\.py$)",
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _is_test_file(file_path: str) -> bool:
|
|
146
|
+
"""Return True if *file_path* looks like a test file."""
|
|
147
|
+
return bool(_TEST_FILE_RE.search(file_path))
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def detect_entry_points(
|
|
151
|
+
store: GraphStore,
|
|
152
|
+
include_tests: bool = False,
|
|
153
|
+
) -> list[GraphNode]:
|
|
154
|
+
"""Find functions that are entry points in the graph.
|
|
155
|
+
|
|
156
|
+
An entry point is a Function/Test node that either:
|
|
157
|
+
1. Has no incoming CALLS edges (true root), or
|
|
158
|
+
2. Has a framework decorator (e.g. ``@app.get``), or
|
|
159
|
+
3. Matches a conventional name pattern (``main``, ``test_*``, etc.).
|
|
160
|
+
|
|
161
|
+
When *include_tests* is False (the default), Test nodes are excluded so
|
|
162
|
+
that flow analysis focuses on production entry points.
|
|
163
|
+
"""
|
|
164
|
+
# Build a set of all qualified names that are CALLS targets. Exclude
|
|
165
|
+
# edges sourced at File nodes so that script-/notebook-/top-level-only
|
|
166
|
+
# callees (e.g. ``run_job()`` invoked from module scope, a top-level
|
|
167
|
+
# ``<App />`` render) remain detectable as entry points.
|
|
168
|
+
called_qnames = store.get_all_call_targets(include_file_sources=False)
|
|
169
|
+
|
|
170
|
+
# Scan all nodes for entry-point candidates.
|
|
171
|
+
candidate_nodes = store.get_nodes_by_kind(["Function", "Test"])
|
|
172
|
+
|
|
173
|
+
entry_points: list[GraphNode] = []
|
|
174
|
+
seen_qn: set[str] = set()
|
|
175
|
+
|
|
176
|
+
for node in candidate_nodes:
|
|
177
|
+
if not include_tests and (node.is_test or _is_test_file(node.file_path)):
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
is_entry = False
|
|
181
|
+
|
|
182
|
+
# True root: no one calls this function.
|
|
183
|
+
if node.qualified_name not in called_qnames:
|
|
184
|
+
is_entry = True
|
|
185
|
+
|
|
186
|
+
# Framework decorator match.
|
|
187
|
+
if _has_framework_decorator(node):
|
|
188
|
+
is_entry = True
|
|
189
|
+
|
|
190
|
+
# Conventional name match.
|
|
191
|
+
if _matches_entry_name(node):
|
|
192
|
+
is_entry = True
|
|
193
|
+
|
|
194
|
+
if is_entry and node.qualified_name not in seen_qn:
|
|
195
|
+
entry_points.append(node)
|
|
196
|
+
seen_qn.add(node.qualified_name)
|
|
197
|
+
|
|
198
|
+
return entry_points
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# ---------------------------------------------------------------------------
|
|
202
|
+
# Flow tracing (BFS)
|
|
203
|
+
# ---------------------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _trace_single_flow(
|
|
207
|
+
adj: FlowAdjacency,
|
|
208
|
+
ep: GraphNode,
|
|
209
|
+
max_depth: int = 15,
|
|
210
|
+
) -> Optional[dict]:
|
|
211
|
+
"""Trace a single execution flow from *ep* via forward BFS.
|
|
212
|
+
|
|
213
|
+
Returns a flow dict (see :func:`trace_flows` for the schema) or ``None``
|
|
214
|
+
if the flow is trivial (single-node, no outgoing CALLS that resolve).
|
|
215
|
+
"""
|
|
216
|
+
path_ids: list[int] = [ep.id]
|
|
217
|
+
path_qnames: list[str] = [ep.qualified_name]
|
|
218
|
+
visited: set[str] = {ep.qualified_name}
|
|
219
|
+
queue: deque[tuple[str, int]] = deque([(ep.qualified_name, 0)])
|
|
220
|
+
|
|
221
|
+
actual_depth = 0
|
|
222
|
+
nodes_by_qn = adj.nodes_by_qn
|
|
223
|
+
calls_out = adj.calls_out
|
|
224
|
+
|
|
225
|
+
while queue:
|
|
226
|
+
current_qn, depth = queue.popleft()
|
|
227
|
+
if depth > actual_depth:
|
|
228
|
+
actual_depth = depth
|
|
229
|
+
if depth >= max_depth:
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
for target_qn in calls_out.get(current_qn, ()):
|
|
233
|
+
if target_qn in visited:
|
|
234
|
+
continue
|
|
235
|
+
target_node = nodes_by_qn.get(target_qn)
|
|
236
|
+
if target_node is None:
|
|
237
|
+
continue
|
|
238
|
+
visited.add(target_qn)
|
|
239
|
+
path_ids.append(target_node.id)
|
|
240
|
+
path_qnames.append(target_qn)
|
|
241
|
+
queue.append((target_qn, depth + 1))
|
|
242
|
+
|
|
243
|
+
# Skip trivial single-node flows.
|
|
244
|
+
if len(path_ids) < 2:
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
files = list({
|
|
248
|
+
n.file_path
|
|
249
|
+
for qn in path_qnames
|
|
250
|
+
if (n := nodes_by_qn.get(qn)) is not None
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
flow: dict = {
|
|
254
|
+
"name": _sanitize_name(ep.name),
|
|
255
|
+
"entry_point": ep.qualified_name,
|
|
256
|
+
"entry_point_id": ep.id,
|
|
257
|
+
"path": path_ids,
|
|
258
|
+
"depth": actual_depth,
|
|
259
|
+
"node_count": len(path_ids),
|
|
260
|
+
"file_count": len(files),
|
|
261
|
+
"files": files,
|
|
262
|
+
"criticality": 0.0,
|
|
263
|
+
}
|
|
264
|
+
flow["criticality"] = compute_criticality(flow, adj)
|
|
265
|
+
return flow
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def trace_flows(
|
|
269
|
+
store: GraphStore,
|
|
270
|
+
max_depth: int = 15,
|
|
271
|
+
include_tests: bool = False,
|
|
272
|
+
) -> list[dict]:
|
|
273
|
+
"""Trace execution flows from every entry point via forward BFS.
|
|
274
|
+
|
|
275
|
+
Returns a list of flow dicts, each containing:
|
|
276
|
+
- name: human-readable flow name (entry point name)
|
|
277
|
+
- entry_point: qualified name of the entry point
|
|
278
|
+
- entry_point_id: node database id of the entry point
|
|
279
|
+
- path: ordered list of node IDs in the flow
|
|
280
|
+
- depth: maximum BFS depth reached
|
|
281
|
+
- node_count: number of distinct nodes in the path
|
|
282
|
+
- file_count: number of distinct files touched
|
|
283
|
+
- files: list of distinct file paths
|
|
284
|
+
- criticality: computed criticality score (0.0-1.0)
|
|
285
|
+
"""
|
|
286
|
+
entry_points = detect_entry_points(store, include_tests=include_tests)
|
|
287
|
+
if not entry_points:
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
adj = store.load_flow_adjacency()
|
|
291
|
+
flows: list[dict] = []
|
|
292
|
+
|
|
293
|
+
for ep in entry_points:
|
|
294
|
+
flow = _trace_single_flow(adj, ep, max_depth)
|
|
295
|
+
if flow is not None:
|
|
296
|
+
flows.append(flow)
|
|
297
|
+
|
|
298
|
+
# Sort by criticality descending.
|
|
299
|
+
flows.sort(key=lambda f: f["criticality"], reverse=True)
|
|
300
|
+
return flows
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
# ---------------------------------------------------------------------------
|
|
304
|
+
# Criticality scoring
|
|
305
|
+
# ---------------------------------------------------------------------------
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def compute_criticality(flow: dict, adj: FlowAdjacency) -> float:
|
|
309
|
+
"""Score a flow from 0.0 to 1.0 based on multiple weighted factors.
|
|
310
|
+
|
|
311
|
+
Weights:
|
|
312
|
+
- File spread: 0.30
|
|
313
|
+
- External calls: 0.20
|
|
314
|
+
- Security sensitivity: 0.25
|
|
315
|
+
- Test coverage gap: 0.15
|
|
316
|
+
- Depth: 0.10
|
|
317
|
+
"""
|
|
318
|
+
node_ids: list[int] = flow.get("path", [])
|
|
319
|
+
if not node_ids:
|
|
320
|
+
return 0.0
|
|
321
|
+
|
|
322
|
+
nodes_by_id = adj.nodes_by_id
|
|
323
|
+
nodes_by_qn = adj.nodes_by_qn
|
|
324
|
+
calls_out = adj.calls_out
|
|
325
|
+
has_tested_by = adj.has_tested_by
|
|
326
|
+
|
|
327
|
+
nodes: list[GraphNode] = [
|
|
328
|
+
n for nid in node_ids if (n := nodes_by_id.get(nid)) is not None
|
|
329
|
+
]
|
|
330
|
+
if not nodes:
|
|
331
|
+
return 0.0
|
|
332
|
+
|
|
333
|
+
# --- File spread (0.0 - 1.0) ---
|
|
334
|
+
file_count = len({n.file_path for n in nodes})
|
|
335
|
+
# Normalize: 1 file => 0.0, 5+ files => 1.0
|
|
336
|
+
file_spread = min((file_count - 1) / 4.0, 1.0) if file_count > 1 else 0.0
|
|
337
|
+
|
|
338
|
+
# --- External calls (0.0 - 1.0) ---
|
|
339
|
+
# Calls that target nodes NOT in the graph are considered external.
|
|
340
|
+
external_count = 0
|
|
341
|
+
for n in nodes:
|
|
342
|
+
for target_qn in calls_out.get(n.qualified_name, ()):
|
|
343
|
+
if target_qn not in nodes_by_qn:
|
|
344
|
+
external_count += 1
|
|
345
|
+
# Normalize: 0 => 0.0, 5+ => 1.0
|
|
346
|
+
external_score = min(external_count / 5.0, 1.0)
|
|
347
|
+
|
|
348
|
+
# --- Security sensitivity (0.0 - 1.0) ---
|
|
349
|
+
security_hits = 0
|
|
350
|
+
for n in nodes:
|
|
351
|
+
name_lower = n.name.lower()
|
|
352
|
+
qn_lower = n.qualified_name.lower()
|
|
353
|
+
for kw in _SECURITY_KEYWORDS:
|
|
354
|
+
if kw in name_lower or kw in qn_lower:
|
|
355
|
+
security_hits += 1
|
|
356
|
+
break # Count each node at most once.
|
|
357
|
+
security_score = min(security_hits / max(len(nodes), 1), 1.0)
|
|
358
|
+
|
|
359
|
+
# --- Test coverage gap (0.0 - 1.0) ---
|
|
360
|
+
tested_count = sum(1 for n in nodes if n.qualified_name in has_tested_by)
|
|
361
|
+
coverage = tested_count / max(len(nodes), 1)
|
|
362
|
+
test_gap = 1.0 - coverage
|
|
363
|
+
|
|
364
|
+
# --- Depth (0.0 - 1.0) ---
|
|
365
|
+
depth = flow.get("depth", 0)
|
|
366
|
+
# Normalize: 0 => 0.0, 10+ => 1.0
|
|
367
|
+
depth_score = min(depth / 10.0, 1.0)
|
|
368
|
+
|
|
369
|
+
# --- Weighted sum ---
|
|
370
|
+
criticality = (
|
|
371
|
+
file_spread * 0.30
|
|
372
|
+
+ external_score * 0.20
|
|
373
|
+
+ security_score * 0.25
|
|
374
|
+
+ test_gap * 0.15
|
|
375
|
+
+ depth_score * 0.10
|
|
376
|
+
)
|
|
377
|
+
return round(min(max(criticality, 0.0), 1.0), 4)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
# ---------------------------------------------------------------------------
|
|
381
|
+
# Persistence
|
|
382
|
+
# ---------------------------------------------------------------------------
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def store_flows(store: GraphStore, flows: list[dict]) -> int:
|
|
386
|
+
"""Clear existing flows and persist new ones.
|
|
387
|
+
|
|
388
|
+
Returns the number of flows stored.
|
|
389
|
+
"""
|
|
390
|
+
# NOTE: store_flows uses _conn directly because it performs
|
|
391
|
+
# multi-statement batch writes (DELETE + INSERT loop) that are
|
|
392
|
+
# tightly coupled to the DB transaction lifecycle.
|
|
393
|
+
conn = store._conn
|
|
394
|
+
|
|
395
|
+
if conn.in_transaction:
|
|
396
|
+
logger.warning("Rolling back uncommitted transaction before BEGIN IMMEDIATE")
|
|
397
|
+
conn.rollback()
|
|
398
|
+
# Wrap the full DELETE + INSERT sequence in an explicit transaction
|
|
399
|
+
# so partial writes cannot occur if an exception interrupts the loop.
|
|
400
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
401
|
+
try:
|
|
402
|
+
conn.execute("DELETE FROM flow_memberships")
|
|
403
|
+
conn.execute("DELETE FROM flows")
|
|
404
|
+
|
|
405
|
+
count = 0
|
|
406
|
+
for flow in flows:
|
|
407
|
+
path_json = json.dumps(flow.get("path", []))
|
|
408
|
+
conn.execute(
|
|
409
|
+
"""INSERT INTO flows
|
|
410
|
+
(name, entry_point_id, depth, node_count, file_count,
|
|
411
|
+
criticality, path_json)
|
|
412
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
413
|
+
(
|
|
414
|
+
flow["name"],
|
|
415
|
+
flow["entry_point_id"],
|
|
416
|
+
flow["depth"],
|
|
417
|
+
flow["node_count"],
|
|
418
|
+
flow["file_count"],
|
|
419
|
+
flow["criticality"],
|
|
420
|
+
path_json,
|
|
421
|
+
),
|
|
422
|
+
)
|
|
423
|
+
flow_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
|
424
|
+
|
|
425
|
+
# Insert memberships.
|
|
426
|
+
node_ids = flow.get("path", [])
|
|
427
|
+
for position, node_id in enumerate(node_ids):
|
|
428
|
+
conn.execute(
|
|
429
|
+
"INSERT OR IGNORE INTO flow_memberships (flow_id, node_id, position) "
|
|
430
|
+
"VALUES (?, ?, ?)",
|
|
431
|
+
(flow_id, node_id, position),
|
|
432
|
+
)
|
|
433
|
+
count += 1
|
|
434
|
+
|
|
435
|
+
conn.commit()
|
|
436
|
+
except BaseException:
|
|
437
|
+
conn.rollback()
|
|
438
|
+
raise
|
|
439
|
+
return count
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def incremental_trace_flows(
|
|
443
|
+
store: GraphStore,
|
|
444
|
+
changed_files: list[str],
|
|
445
|
+
max_depth: int = 15,
|
|
446
|
+
) -> int:
|
|
447
|
+
"""Re-trace only flows that touch *changed_files*. Much faster than full trace.
|
|
448
|
+
|
|
449
|
+
1. Find flow IDs whose memberships reference nodes in *changed_files*.
|
|
450
|
+
2. Collect the entry-point node IDs of those flows before deleting them.
|
|
451
|
+
3. Delete only the affected flows and their memberships.
|
|
452
|
+
4. Re-detect entry points, keeping those in *changed_files* **or** whose
|
|
453
|
+
node ID was an entry point of a deleted flow.
|
|
454
|
+
5. BFS-trace each relevant entry point via :func:`_trace_single_flow`.
|
|
455
|
+
6. INSERT the new flows (without clearing unrelated flows).
|
|
456
|
+
|
|
457
|
+
Returns the number of re-traced flows that were stored.
|
|
458
|
+
"""
|
|
459
|
+
if not changed_files:
|
|
460
|
+
return 0
|
|
461
|
+
|
|
462
|
+
conn = store._conn
|
|
463
|
+
changed_file_set = set(changed_files)
|
|
464
|
+
|
|
465
|
+
# ------------------------------------------------------------------
|
|
466
|
+
# 1. Find affected flow IDs
|
|
467
|
+
# ------------------------------------------------------------------
|
|
468
|
+
placeholders = ",".join("?" * len(changed_files))
|
|
469
|
+
affected_rows = conn.execute(
|
|
470
|
+
f"SELECT DISTINCT fm.flow_id FROM flow_memberships fm " # nosec B608
|
|
471
|
+
f"JOIN nodes n ON n.id = fm.node_id "
|
|
472
|
+
f"WHERE n.file_path IN ({placeholders})",
|
|
473
|
+
changed_files,
|
|
474
|
+
).fetchall()
|
|
475
|
+
affected_ids = [r[0] for r in affected_rows]
|
|
476
|
+
|
|
477
|
+
# ------------------------------------------------------------------
|
|
478
|
+
# 2. Collect old entry-point node IDs before deletion
|
|
479
|
+
# ------------------------------------------------------------------
|
|
480
|
+
entry_point_ids: set[int] = set()
|
|
481
|
+
if affected_ids:
|
|
482
|
+
ep_placeholders = ",".join("?" * len(affected_ids))
|
|
483
|
+
ep_rows = conn.execute(
|
|
484
|
+
f"SELECT entry_point_id FROM flows " # nosec B608
|
|
485
|
+
f"WHERE id IN ({ep_placeholders})",
|
|
486
|
+
affected_ids,
|
|
487
|
+
).fetchall()
|
|
488
|
+
entry_point_ids = {r[0] for r in ep_rows}
|
|
489
|
+
|
|
490
|
+
# ------------------------------------------------------------------
|
|
491
|
+
# 3. Delete affected flows and their memberships
|
|
492
|
+
# ------------------------------------------------------------------
|
|
493
|
+
# Wrap in an explicit transaction so a crash mid-loop cannot leave
|
|
494
|
+
# orphaned flow_memberships rows pointing at deleted flows. See #258.
|
|
495
|
+
if affected_ids:
|
|
496
|
+
if conn.in_transaction:
|
|
497
|
+
conn.commit()
|
|
498
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
499
|
+
try:
|
|
500
|
+
for fid in affected_ids:
|
|
501
|
+
conn.execute(
|
|
502
|
+
"DELETE FROM flow_memberships WHERE flow_id = ?", (fid,),
|
|
503
|
+
)
|
|
504
|
+
conn.execute("DELETE FROM flows WHERE id = ?", (fid,))
|
|
505
|
+
conn.commit()
|
|
506
|
+
except BaseException:
|
|
507
|
+
conn.rollback()
|
|
508
|
+
raise
|
|
509
|
+
|
|
510
|
+
# ------------------------------------------------------------------
|
|
511
|
+
# 4. Re-detect entry points and filter to relevant ones
|
|
512
|
+
# ------------------------------------------------------------------
|
|
513
|
+
entry_points = detect_entry_points(store)
|
|
514
|
+
relevant_eps = [
|
|
515
|
+
ep for ep in entry_points
|
|
516
|
+
if ep.file_path in changed_file_set or ep.id in entry_point_ids
|
|
517
|
+
]
|
|
518
|
+
|
|
519
|
+
# ------------------------------------------------------------------
|
|
520
|
+
# 5. BFS-trace each relevant entry point
|
|
521
|
+
# ------------------------------------------------------------------
|
|
522
|
+
new_flows: list[dict] = []
|
|
523
|
+
if relevant_eps:
|
|
524
|
+
adj = store.load_flow_adjacency()
|
|
525
|
+
for ep in relevant_eps:
|
|
526
|
+
flow = _trace_single_flow(adj, ep, max_depth)
|
|
527
|
+
if flow is not None:
|
|
528
|
+
new_flows.append(flow)
|
|
529
|
+
|
|
530
|
+
# ------------------------------------------------------------------
|
|
531
|
+
# 6. INSERT new flows without clearing unrelated ones
|
|
532
|
+
# ------------------------------------------------------------------
|
|
533
|
+
count = 0
|
|
534
|
+
for flow in new_flows:
|
|
535
|
+
path_json = json.dumps(flow.get("path", []))
|
|
536
|
+
conn.execute(
|
|
537
|
+
"""INSERT INTO flows
|
|
538
|
+
(name, entry_point_id, depth, node_count, file_count,
|
|
539
|
+
criticality, path_json)
|
|
540
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
541
|
+
(
|
|
542
|
+
flow["name"],
|
|
543
|
+
flow["entry_point_id"],
|
|
544
|
+
flow["depth"],
|
|
545
|
+
flow["node_count"],
|
|
546
|
+
flow["file_count"],
|
|
547
|
+
flow["criticality"],
|
|
548
|
+
path_json,
|
|
549
|
+
),
|
|
550
|
+
)
|
|
551
|
+
flow_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
|
552
|
+
|
|
553
|
+
node_ids = flow.get("path", [])
|
|
554
|
+
for position, node_id in enumerate(node_ids):
|
|
555
|
+
conn.execute(
|
|
556
|
+
"INSERT OR IGNORE INTO flow_memberships (flow_id, node_id, position) "
|
|
557
|
+
"VALUES (?, ?, ?)",
|
|
558
|
+
(flow_id, node_id, position),
|
|
559
|
+
)
|
|
560
|
+
count += 1
|
|
561
|
+
|
|
562
|
+
conn.commit()
|
|
563
|
+
return count
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
# ---------------------------------------------------------------------------
|
|
567
|
+
# Query helpers
|
|
568
|
+
# ---------------------------------------------------------------------------
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def get_flows(
|
|
572
|
+
store: GraphStore,
|
|
573
|
+
sort_by: str = "criticality",
|
|
574
|
+
limit: int = 50,
|
|
575
|
+
) -> list[dict]:
|
|
576
|
+
"""Retrieve stored flows from the database.
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
store: The graph store.
|
|
580
|
+
sort_by: Column to sort by (``criticality``, ``depth``, ``node_count``).
|
|
581
|
+
limit: Maximum number of flows to return.
|
|
582
|
+
"""
|
|
583
|
+
allowed_sort = {"criticality", "depth", "node_count", "file_count", "name"}
|
|
584
|
+
if sort_by not in allowed_sort:
|
|
585
|
+
sort_by = "criticality"
|
|
586
|
+
|
|
587
|
+
order = "DESC" if sort_by in ("criticality", "depth", "node_count", "file_count") else "ASC"
|
|
588
|
+
|
|
589
|
+
# NOTE: get_flows reads from the flows table which is managed by
|
|
590
|
+
# the flows module; _conn access is documented coupling.
|
|
591
|
+
rows = store._conn.execute(
|
|
592
|
+
f"SELECT * FROM flows ORDER BY {sort_by} {order} LIMIT ?", # nosec B608
|
|
593
|
+
(limit,),
|
|
594
|
+
).fetchall()
|
|
595
|
+
|
|
596
|
+
results: list[dict] = []
|
|
597
|
+
for row in rows:
|
|
598
|
+
results.append({
|
|
599
|
+
"id": row["id"],
|
|
600
|
+
"name": _sanitize_name(row["name"]),
|
|
601
|
+
"entry_point_id": row["entry_point_id"],
|
|
602
|
+
"depth": row["depth"],
|
|
603
|
+
"node_count": row["node_count"],
|
|
604
|
+
"file_count": row["file_count"],
|
|
605
|
+
"criticality": row["criticality"],
|
|
606
|
+
"path": json.loads(row["path_json"]),
|
|
607
|
+
"created_at": row["created_at"],
|
|
608
|
+
"updated_at": row["updated_at"],
|
|
609
|
+
})
|
|
610
|
+
return results
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def get_flow_by_id(store: GraphStore, flow_id: int) -> Optional[dict]:
|
|
614
|
+
"""Retrieve a single flow with full path details.
|
|
615
|
+
|
|
616
|
+
Returns a dict with the flow metadata plus a ``steps`` list containing
|
|
617
|
+
each node's name, kind, file, and line info.
|
|
618
|
+
"""
|
|
619
|
+
# NOTE: get_flow_by_id reads from the flows table; see store_flows note.
|
|
620
|
+
row = store._conn.execute(
|
|
621
|
+
"SELECT * FROM flows WHERE id = ?", (flow_id,)
|
|
622
|
+
).fetchone()
|
|
623
|
+
if row is None:
|
|
624
|
+
return None
|
|
625
|
+
|
|
626
|
+
path_ids: list[int] = json.loads(row["path_json"])
|
|
627
|
+
|
|
628
|
+
# Build detailed step info.
|
|
629
|
+
steps: list[dict] = []
|
|
630
|
+
for nid in path_ids:
|
|
631
|
+
node = store.get_node_by_id(nid)
|
|
632
|
+
if node:
|
|
633
|
+
steps.append({
|
|
634
|
+
"node_id": node.id,
|
|
635
|
+
"name": _sanitize_name(node.name),
|
|
636
|
+
"kind": node.kind,
|
|
637
|
+
"file": node.file_path,
|
|
638
|
+
"line_start": node.line_start,
|
|
639
|
+
"line_end": node.line_end,
|
|
640
|
+
"qualified_name": _sanitize_name(node.qualified_name),
|
|
641
|
+
})
|
|
642
|
+
|
|
643
|
+
return {
|
|
644
|
+
"id": row["id"],
|
|
645
|
+
"name": _sanitize_name(row["name"]),
|
|
646
|
+
"entry_point_id": row["entry_point_id"],
|
|
647
|
+
"depth": row["depth"],
|
|
648
|
+
"node_count": row["node_count"],
|
|
649
|
+
"file_count": row["file_count"],
|
|
650
|
+
"criticality": row["criticality"],
|
|
651
|
+
"path": path_ids,
|
|
652
|
+
"steps": steps,
|
|
653
|
+
"created_at": row["created_at"],
|
|
654
|
+
"updated_at": row["updated_at"],
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
def get_affected_flows(
|
|
659
|
+
store: GraphStore,
|
|
660
|
+
changed_files: list[str],
|
|
661
|
+
) -> dict:
|
|
662
|
+
"""Find flows that include nodes from the given changed files.
|
|
663
|
+
|
|
664
|
+
Returns::
|
|
665
|
+
|
|
666
|
+
{
|
|
667
|
+
"affected_flows": [<flow dicts>],
|
|
668
|
+
"total": <int>,
|
|
669
|
+
}
|
|
670
|
+
"""
|
|
671
|
+
if not changed_files:
|
|
672
|
+
return {"affected_flows": [], "total": 0}
|
|
673
|
+
|
|
674
|
+
# Find node IDs belonging to changed files.
|
|
675
|
+
node_ids = store.get_node_ids_by_files(changed_files)
|
|
676
|
+
|
|
677
|
+
if not node_ids:
|
|
678
|
+
return {"affected_flows": [], "total": 0}
|
|
679
|
+
|
|
680
|
+
# Find flow IDs that contain any of these nodes.
|
|
681
|
+
flow_ids = store.get_flow_ids_by_node_ids(node_ids)
|
|
682
|
+
|
|
683
|
+
if not flow_ids:
|
|
684
|
+
return {"affected_flows": [], "total": 0}
|
|
685
|
+
|
|
686
|
+
affected: list[dict] = []
|
|
687
|
+
for fid in flow_ids:
|
|
688
|
+
flow = get_flow_by_id(store, fid)
|
|
689
|
+
if flow:
|
|
690
|
+
affected.append(flow)
|
|
691
|
+
|
|
692
|
+
# Sort by criticality descending.
|
|
693
|
+
affected.sort(key=lambda f: f.get("criticality", 0), reverse=True)
|
|
694
|
+
|
|
695
|
+
return {
|
|
696
|
+
"affected_flows": affected,
|
|
697
|
+
"total": len(affected),
|
|
698
|
+
}
|