codegraph-nav 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. codegraph_nav/__init__.py +194 -0
  2. codegraph_nav/ast_grep_analyzer.py +448 -0
  3. codegraph_nav/cli.py +223 -0
  4. codegraph_nav/code_navigator.py +1328 -0
  5. codegraph_nav/code_search.py +1009 -0
  6. codegraph_nav/colors.py +209 -0
  7. codegraph_nav/completions.py +354 -0
  8. codegraph_nav/dart_analyzer.py +301 -0
  9. codegraph_nav/dependency_graph.py +814 -0
  10. codegraph_nav/domain/__init__.py +20 -0
  11. codegraph_nav/domain/routes.py +337 -0
  12. codegraph_nav/domain/schemas.py +229 -0
  13. codegraph_nav/domain/tags.py +87 -0
  14. codegraph_nav/exporters.py +563 -0
  15. codegraph_nav/go_analyzer.py +273 -0
  16. codegraph_nav/graph/__init__.py +72 -0
  17. codegraph_nav/graph/builder.py +409 -0
  18. codegraph_nav/graph/communities.py +402 -0
  19. codegraph_nav/graph/flows.py +311 -0
  20. codegraph_nav/graph/query.py +380 -0
  21. codegraph_nav/graph/schema.py +266 -0
  22. codegraph_nav/graph/search.py +257 -0
  23. codegraph_nav/graph/store.py +517 -0
  24. codegraph_nav/hints.py +195 -0
  25. codegraph_nav/import_resolver.py +891 -0
  26. codegraph_nav/js_ts_analyzer.py +564 -0
  27. codegraph_nav/line_reader.py +664 -0
  28. codegraph_nav/mcp/__init__.py +39 -0
  29. codegraph_nav/mcp/__main__.py +5 -0
  30. codegraph_nav/mcp/server.py +2228 -0
  31. codegraph_nav/py.typed +2 -0
  32. codegraph_nav/ruby_analyzer.py +259 -0
  33. codegraph_nav/rust_analyzer.py +379 -0
  34. codegraph_nav/token_efficient_renderer.py +743 -0
  35. codegraph_nav/watcher.py +382 -0
  36. codegraph_nav-0.1.0.dist-info/METADATA +487 -0
  37. codegraph_nav-0.1.0.dist-info/RECORD +41 -0
  38. codegraph_nav-0.1.0.dist-info/WHEEL +5 -0
  39. codegraph_nav-0.1.0.dist-info/entry_points.txt +4 -0
  40. codegraph_nav-0.1.0.dist-info/licenses/LICENSE +21 -0
  41. codegraph_nav-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,402 @@
1
+ """Community detection — file-based clustering with optional Leiden algorithm.
2
+
3
+ Groups graph nodes into communities based on directory structure (always available)
4
+ or Leiden algorithm (requires igraph). Computes cohesion and coupling metrics.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from collections import Counter, defaultdict
11
+ from pathlib import Path
12
+ from typing import cast
13
+
14
+ from .store import GraphStore
15
+
16
+ # Edge weights for Leiden (if igraph available)
17
+ EDGE_WEIGHTS = {
18
+ "CALLS": 1.0,
19
+ "IMPORTS_FROM": 0.5,
20
+ "INHERITS": 0.8,
21
+ "CONTAINS": 0.3,
22
+ "TESTED_BY": 0.4,
23
+ }
24
+
25
+ # Check igraph availability
26
+ try:
27
+ import igraph
28
+
29
+ HAS_IGRAPH = True
30
+ except ImportError:
31
+ HAS_IGRAPH = False
32
+
33
+
34
+ # ==============================================================================
35
+ # Community Detection
36
+ # ==============================================================================
37
+
38
+
39
+ def detect_communities(store: GraphStore, min_size: int = 2) -> list[dict]:
40
+ """Detect communities. Uses Leiden if igraph available, file-based otherwise."""
41
+ if HAS_IGRAPH:
42
+ try:
43
+ return detect_communities_leiden(store, min_size)
44
+ except Exception:
45
+ pass # Fall back to file-based
46
+ return detect_communities_file_based(store, min_size)
47
+
48
+
49
+ def detect_communities_file_based(store: GraphStore, min_size: int = 2) -> list[dict]:
50
+ """Group non-File nodes by directory prefix (first 2 components)."""
51
+ nodes = store.get_all_nodes()
52
+ groups: dict[str, list] = defaultdict(list)
53
+
54
+ for node in nodes:
55
+ if node["kind"] == "File":
56
+ continue
57
+ fp = node["file_path"]
58
+ parts = Path(fp).parts
59
+ # Use first 2 directory components as group key
60
+ if len(parts) >= 2:
61
+ prefix = str(Path(parts[0]) / parts[1])
62
+ elif len(parts) == 1:
63
+ prefix = parts[0]
64
+ else:
65
+ prefix = "root"
66
+ groups[prefix].append(node)
67
+
68
+ communities = []
69
+ for prefix, members in groups.items():
70
+ if len(members) < min_size:
71
+ continue
72
+
73
+ member_ids = [m["id"] for m in members]
74
+ member_qns = {m["qualified_name"] for m in members}
75
+
76
+ # Compute cohesion
77
+ cohesion = _compute_cohesion(store, member_qns)
78
+
79
+ # Generate name
80
+ name = _generate_community_name(members, prefix)
81
+
82
+ # Extract keywords
83
+ keywords = _extract_keywords(members)
84
+
85
+ communities.append(
86
+ {
87
+ "name": name,
88
+ "node_count": len(members),
89
+ "cohesion": cohesion,
90
+ "file_prefix": prefix,
91
+ "keywords": keywords[:5],
92
+ "member_ids": member_ids,
93
+ }
94
+ )
95
+
96
+ communities.sort(key=lambda c: cast(int, c["node_count"]), reverse=True)
97
+ return communities
98
+
99
+
100
+ def detect_communities_leiden(store: GraphStore, min_size: int = 2) -> list[dict]:
101
+ """Leiden community detection using igraph."""
102
+ if not HAS_IGRAPH:
103
+ return detect_communities_file_based(store, min_size)
104
+
105
+ nodes = [n for n in store.get_all_nodes() if n["kind"] != "File"]
106
+ if not nodes:
107
+ return []
108
+
109
+ # Build igraph
110
+ qn_to_idx = {n["qualified_name"]: i for i, n in enumerate(nodes)}
111
+ edges_data = []
112
+ weights = []
113
+
114
+ conn = store.conn
115
+ all_edges = conn.execute("SELECT * FROM edges").fetchall()
116
+
117
+ for edge in all_edges:
118
+ src_idx = qn_to_idx.get(edge["source_qualified"])
119
+ tgt_idx = qn_to_idx.get(edge["target_qualified"])
120
+ if src_idx is not None and tgt_idx is not None and src_idx != tgt_idx:
121
+ edges_data.append((src_idx, tgt_idx))
122
+ weights.append(EDGE_WEIGHTS.get(edge["kind"], 0.5))
123
+
124
+ if not edges_data:
125
+ return detect_communities_file_based(store, min_size)
126
+
127
+ g = igraph.Graph(n=len(nodes), edges=edges_data, directed=True)
128
+ g.es["weight"] = weights
129
+
130
+ # Run Leiden
131
+ partition = g.community_leiden(
132
+ objective_function="modularity",
133
+ weights="weight",
134
+ resolution=1.0,
135
+ )
136
+
137
+ # Build communities from partition
138
+ community_nodes: dict[int, list] = defaultdict(list)
139
+ for node_idx, comm_id in enumerate(partition.membership):
140
+ community_nodes[comm_id].append(nodes[node_idx])
141
+
142
+ communities = []
143
+ for comm_id, members in community_nodes.items():
144
+ if len(members) < min_size:
145
+ continue
146
+
147
+ member_ids = [m["id"] for m in members]
148
+ member_qns = {m["qualified_name"] for m in members}
149
+
150
+ # Compute cohesion
151
+ cohesion = _compute_cohesion(store, member_qns)
152
+
153
+ # File prefix from members
154
+ file_paths = [m["file_path"] for m in members]
155
+ prefix = _extract_file_prefix(file_paths)
156
+
157
+ name = _generate_community_name(members, prefix)
158
+ keywords = _extract_keywords(members)
159
+
160
+ communities.append(
161
+ {
162
+ "name": name,
163
+ "node_count": len(members),
164
+ "cohesion": cohesion,
165
+ "file_prefix": prefix,
166
+ "keywords": keywords[:5],
167
+ "member_ids": member_ids,
168
+ }
169
+ )
170
+
171
+ communities.sort(key=lambda c: cast(int, c["node_count"]), reverse=True)
172
+ return communities
173
+
174
+
175
+ # ==============================================================================
176
+ # Helpers
177
+ # ==============================================================================
178
+
179
+
180
+ def _compute_cohesion(store: GraphStore, member_qns: set[str]) -> float:
181
+ """Cohesion = internal_edges / (internal_edges + external_edges)."""
182
+ internal = 0
183
+ external = 0
184
+
185
+ for qn in member_qns:
186
+ for edge in store.get_edges_from(qn):
187
+ if edge["target_qualified"] in member_qns:
188
+ internal += 1
189
+ else:
190
+ external += 1
191
+
192
+ total = internal + external
193
+ if total == 0:
194
+ return 0.0
195
+ return round(internal / total, 4)
196
+
197
+
198
+ def _generate_community_name(members: list, prefix: str) -> str:
199
+ """Generate community name from prefix + dominant class/keyword."""
200
+ # Find dominant class (>40% of members)
201
+ classes = [m["name"] for m in members if m["kind"] == "Class"]
202
+ if classes:
203
+ class_counts = Counter(classes)
204
+ top_class, top_count = class_counts.most_common(1)[0]
205
+ if top_count / len(members) > 0.4:
206
+ return f"{Path(prefix).name}-{top_class}"
207
+
208
+ # Use directory name + top keyword
209
+ keywords = _extract_keywords(members)
210
+ if keywords:
211
+ return f"{Path(prefix).name}-{keywords[0]}"
212
+
213
+ return Path(prefix).name
214
+
215
+
216
+ def _extract_file_prefix(file_paths: list[str]) -> str:
217
+ """Find common directory prefix from file paths."""
218
+ if not file_paths:
219
+ return "root"
220
+ parts_list = [Path(fp).parts for fp in file_paths]
221
+ if not parts_list:
222
+ return "root"
223
+
224
+ prefix_parts = []
225
+ for i in range(min(len(p) for p in parts_list)):
226
+ vals = {p[i] for p in parts_list}
227
+ if len(vals) == 1:
228
+ prefix_parts.append(vals.pop())
229
+ else:
230
+ break
231
+
232
+ return str(Path(*prefix_parts)) if prefix_parts else "root"
233
+
234
+
235
+ def _extract_keywords(members: list) -> list[str]:
236
+ """Extract frequent keywords from member names via camelCase/snake_case splitting."""
237
+ words: Counter = Counter()
238
+ stop_words = {
239
+ "get",
240
+ "set",
241
+ "is",
242
+ "has",
243
+ "do",
244
+ "to",
245
+ "from",
246
+ "on",
247
+ "the",
248
+ "a",
249
+ "an",
250
+ "test",
251
+ "self",
252
+ "init",
253
+ "new",
254
+ "create",
255
+ "make",
256
+ "build",
257
+ }
258
+
259
+ for m in members:
260
+ name = m["name"]
261
+ # Split camelCase and snake_case
262
+ parts = _split_name(name)
263
+ for part in parts:
264
+ part_lower = part.lower()
265
+ if len(part_lower) > 2 and part_lower not in stop_words:
266
+ words[part_lower] += 1
267
+
268
+ return [w for w, _ in words.most_common(5)]
269
+
270
+
271
+ def _split_name(name: str) -> list[str]:
272
+ """Split camelCase and snake_case into words."""
273
+ # Snake case
274
+ if "_" in name:
275
+ return [p for p in name.split("_") if p]
276
+ # CamelCase
277
+ parts = re.sub(r"([A-Z])", r" \1", name).split()
278
+ return [p for p in parts if p]
279
+
280
+
281
+ # ==============================================================================
282
+ # Coupling Warnings
283
+ # ==============================================================================
284
+
285
+
286
+ def get_coupling_warnings(store: GraphStore, communities: list[dict]) -> list[str]:
287
+ """Detect cross-community coupling and oversized communities."""
288
+ warnings = []
289
+
290
+ # Build node → community mapping
291
+ node_to_community: dict[int, str] = {}
292
+ for comm in communities:
293
+ for nid in comm["member_ids"]:
294
+ node_to_community[nid] = comm["name"]
295
+
296
+ # Count cross-community edges
297
+ cross_edges: Counter = Counter()
298
+ conn = store.conn
299
+ all_edges = conn.execute(
300
+ "SELECT source_qualified, target_qualified FROM edges WHERE kind = 'CALLS'"
301
+ ).fetchall()
302
+
303
+ for edge in all_edges:
304
+ src_node = store.get_node(edge[0])
305
+ tgt_node = store.get_node(edge[1])
306
+ if src_node and tgt_node:
307
+ src_comm = node_to_community.get(src_node["id"])
308
+ tgt_comm = node_to_community.get(tgt_node["id"])
309
+ if src_comm and tgt_comm and src_comm != tgt_comm:
310
+ key = f"{src_comm}→{tgt_comm}"
311
+ cross_edges[key] += 1
312
+
313
+ # Top coupling warnings
314
+ for pair, count in cross_edges.most_common(5):
315
+ if count >= 3:
316
+ warnings.append(f"coupling: {pair} ({count} edges)")
317
+
318
+ # Oversized community warning
319
+ for comm in communities:
320
+ if comm["node_count"] > 50:
321
+ warnings.append(f"WARN: {comm['name']} too large ({comm['node_count']} nodes)")
322
+
323
+ # Low cohesion warning
324
+ for comm in communities:
325
+ if comm["cohesion"] < 0.3 and comm["node_count"] > 5:
326
+ warnings.append(f"WARN: {comm['name']} low cohesion ({comm['cohesion']:.2f})")
327
+
328
+ return warnings
329
+
330
+
331
+ # ==============================================================================
332
+ # Persistence
333
+ # ==============================================================================
334
+
335
+
336
+ def store_communities(store: GraphStore, communities: list[dict]):
337
+ """Save communities to DB."""
338
+ store.clear_communities()
339
+ for comm in communities:
340
+ cid = store.insert_community(
341
+ name=comm["name"],
342
+ cohesion=comm["cohesion"],
343
+ node_count=comm["node_count"],
344
+ file_prefix=comm.get("file_prefix"),
345
+ keywords=comm.get("keywords"),
346
+ )
347
+ store.add_community_members(cid, comm["member_ids"])
348
+
349
+
350
+ # ==============================================================================
351
+ # Formatters
352
+ # ==============================================================================
353
+
354
+
355
+ def format_communities_minimal(communities: list[dict], limit: int = 10) -> str:
356
+ """One line per community."""
357
+ if not communities:
358
+ return "No communities detected."
359
+
360
+ lines = [f"{len(communities)} communities:"]
361
+ for comm in communities[:limit]:
362
+ kw = ",".join(comm.get("keywords", [])[:3])
363
+ kw_str = f" [{kw}]" if kw else ""
364
+ lines.append(
365
+ f" {comm['name']} size:{comm['node_count']} "
366
+ f"cohesion:{comm['cohesion']:.2f}{kw_str}"
367
+ )
368
+ if len(communities) > limit:
369
+ lines.append(f" ... +{len(communities) - limit} more")
370
+ return "\n".join(lines)
371
+
372
+
373
+ def format_architecture_overview(
374
+ communities: list[dict],
375
+ coupling_warnings: list[str],
376
+ hubs: list[dict],
377
+ flows_count: int,
378
+ ) -> str:
379
+ """Compact architecture summary (<150 tokens)."""
380
+ lines = []
381
+
382
+ # Communities summary
383
+ if communities:
384
+ top = communities[:5]
385
+ comm_strs = [f"{c['name']}({c['node_count']})" for c in top]
386
+ lines.append(f"communities({len(communities)}): {', '.join(comm_strs)}")
387
+
388
+ # Coupling warnings
389
+ if coupling_warnings:
390
+ for w in coupling_warnings[:3]:
391
+ lines.append(f" {w}")
392
+
393
+ # Hubs
394
+ if hubs:
395
+ hub_strs = [f"{h['file']}({h['imports']}←)" for h in hubs[:3]]
396
+ lines.append(f"hubs: {', '.join(hub_strs)}")
397
+
398
+ # Flows
399
+ if flows_count:
400
+ lines.append(f"flows: {flows_count}")
401
+
402
+ return "\n".join(lines) if lines else "No architecture data available."
@@ -0,0 +1,311 @@
1
+ """Execution flow detection, BFS tracing, and criticality scoring.
2
+
3
+ Detects entry points via decorator patterns, conventional names, and
4
+ graph topology (no incoming CALLS). Traces flows via forward BFS.
5
+ Scores criticality using a 5-factor weighted formula.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import re
12
+ from collections import deque
13
+
14
+ from .schema import MAX_BFS_DEPTH, SECURITY_KEYWORDS
15
+ from .store import GraphStore
16
+
17
+ # ==============================================================================
18
+ # Entry Point Detection
19
+ # ==============================================================================
20
+
21
+ ENTRY_DECORATOR_PATTERN = re.compile(
22
+ r"@(app|router|blueprint)\.(get|post|put|delete|patch|route|websocket)"
23
+ r"|@click\.(command|group)"
24
+ r"|@celery\.task"
25
+ r"|@(api_view|action)"
26
+ r"|@(Get|Post|Put|Delete|Patch|RequestMapping)",
27
+ re.IGNORECASE,
28
+ )
29
+
30
+ ENTRY_NAME_PATTERN = re.compile(
31
+ r"^(main|__main__|cli|run|start|setup|app|create_app|entrypoint|handler)$"
32
+ r"|^test_"
33
+ r"|^(on_|handle_|process_|dispatch_)",
34
+ re.IGNORECASE,
35
+ )
36
+
37
+
38
+ def detect_entry_points(store: GraphStore) -> list:
39
+ """Find execution entry points in the graph.
40
+
41
+ An entry point is a Function/Method node that satisfies ANY of:
42
+ 1. No incoming CALLS edges (true root)
43
+ 2. Has a framework decorator (e.g., @app.get, @click.command)
44
+ 3. Has a conventional entry point name (main, test_*, handle_*)
45
+
46
+ Returns list of node Rows.
47
+ """
48
+ conn = store.conn
49
+
50
+ # Get all Function/Method nodes
51
+ candidates = conn.execute("SELECT * FROM nodes WHERE kind IN ('Function', 'Method')").fetchall()
52
+
53
+ entry_points = []
54
+ seen = set()
55
+
56
+ for node in candidates:
57
+ qn = node["qualified_name"]
58
+ if qn in seen:
59
+ continue
60
+
61
+ is_entry = False
62
+
63
+ # Strategy 1: No incoming CALLS
64
+ incoming_calls = store.get_edges_to(qn, kind="CALLS")
65
+ if not incoming_calls:
66
+ is_entry = True
67
+
68
+ # Strategy 2: Decorator pattern
69
+ if not is_entry:
70
+ extra = node["extra"] or "{}"
71
+ try:
72
+ extra_data = json.loads(extra)
73
+ except (json.JSONDecodeError, TypeError):
74
+ extra_data = {}
75
+ decorators = extra_data.get("decorators", [])
76
+ for dec in decorators:
77
+ if ENTRY_DECORATOR_PATTERN.search(dec):
78
+ is_entry = True
79
+ break
80
+
81
+ # Strategy 3: Name pattern
82
+ if not is_entry and ENTRY_NAME_PATTERN.search(node["name"]):
83
+ is_entry = True
84
+
85
+ if is_entry:
86
+ seen.add(qn)
87
+ entry_points.append(node)
88
+
89
+ return entry_points
90
+
91
+
92
+ # ==============================================================================
93
+ # BFS Flow Tracing
94
+ # ==============================================================================
95
+
96
+
97
+ def trace_flows(
98
+ store: GraphStore,
99
+ max_depth: int = MAX_BFS_DEPTH,
100
+ limit: int = 100,
101
+ ) -> list[dict]:
102
+ """Trace execution flows from entry points via forward BFS.
103
+
104
+ Returns list of flow dicts, each with: name, entry_point, path, depth,
105
+ node_count, file_count, criticality.
106
+ """
107
+ entry_points = detect_entry_points(store)
108
+
109
+ # Skip test entry points for flow tracing (too many)
110
+ entry_points = [ep for ep in entry_points if not ep["is_test"]]
111
+
112
+ flows = []
113
+ for ep in entry_points[:limit]:
114
+ flow = _trace_single_flow(store, ep, max_depth)
115
+ if flow:
116
+ flows.append(flow)
117
+
118
+ # Sort by criticality
119
+ flows.sort(key=lambda f: f["criticality"], reverse=True)
120
+ return flows
121
+
122
+
123
+ def _trace_single_flow(
124
+ store: GraphStore,
125
+ entry_point,
126
+ max_depth: int = MAX_BFS_DEPTH,
127
+ ) -> dict | None:
128
+ """BFS from a single entry point, following forward CALLS edges."""
129
+ queue = deque([(entry_point["qualified_name"], 0)])
130
+ visited = {entry_point["qualified_name"]}
131
+ path_ids = [entry_point["id"]]
132
+ path_names = [entry_point["name"]]
133
+ files = {entry_point["file_path"]}
134
+ max_reached_depth = 0
135
+
136
+ while queue:
137
+ current_qn, depth = queue.popleft()
138
+ if depth >= max_depth:
139
+ continue
140
+
141
+ # Follow forward CALLS edges
142
+ edges = store.get_edges_from(current_qn, kind="CALLS")
143
+ for edge in edges:
144
+ target_qn = edge["target_qualified"]
145
+ if target_qn in visited or target_qn.startswith("__unresolved__"):
146
+ continue
147
+ visited.add(target_qn)
148
+
149
+ target_node = store.get_node(target_qn)
150
+ if target_node:
151
+ path_ids.append(target_node["id"])
152
+ path_names.append(target_node["name"])
153
+ files.add(target_node["file_path"])
154
+ max_reached_depth = max(max_reached_depth, depth + 1)
155
+ queue.append((target_qn, depth + 1))
156
+
157
+ # Skip trivial flows
158
+ if len(path_ids) < 2:
159
+ return None
160
+
161
+ criticality = compute_criticality(path_ids, path_names, files, max_reached_depth, store)
162
+
163
+ return {
164
+ "name": entry_point["name"],
165
+ "entry_point": entry_point["qualified_name"],
166
+ "entry_point_id": entry_point["id"],
167
+ "path_ids": path_ids,
168
+ "path_names": path_names,
169
+ "depth": max_reached_depth,
170
+ "node_count": len(path_ids),
171
+ "file_count": len(files),
172
+ "files": sorted(files),
173
+ "criticality": criticality,
174
+ }
175
+
176
+
177
+ # ==============================================================================
178
+ # Criticality Scoring
179
+ # ==============================================================================
180
+
181
+
182
+ def compute_criticality(
183
+ path_ids: list[int],
184
+ path_names: list[str],
185
+ files: set[str],
186
+ depth: int,
187
+ store: GraphStore,
188
+ ) -> float:
189
+ """Compute flow criticality score (0.0–1.0).
190
+
191
+ Factors:
192
+ - file_spread (0.30): How many files the flow touches
193
+ - external_calls (0.20): Unresolved/external dependencies
194
+ - security_hits (0.25): Symbols with security-related names
195
+ - test_gap (0.15): Fraction of flow nodes without tests
196
+ - depth (0.10): How deep the call chain goes
197
+ """
198
+ node_count = len(path_ids)
199
+ if node_count == 0:
200
+ return 0.0
201
+
202
+ # 1. File spread (0–1), weight 0.30
203
+ file_count = len(files)
204
+ file_spread = min((file_count - 1) / 4.0, 1.0) if file_count > 1 else 0.0
205
+
206
+ # 2. External calls (0–1), weight 0.20
207
+ # Count edges from flow nodes to unresolved targets
208
+ external_count = 0
209
+ for nid in path_ids:
210
+ node = store.get_node_by_id(nid)
211
+ if node:
212
+ edges = store.get_edges_from(node["qualified_name"], kind="CALLS")
213
+ external_count += sum(
214
+ 1 for e in edges if e["target_qualified"].startswith("__unresolved__")
215
+ )
216
+ external_score = min(external_count / 5.0, 1.0)
217
+
218
+ # 3. Security sensitivity (0–1), weight 0.25
219
+ security_hits = 0
220
+ all_names = [n.lower() for n in path_names]
221
+ for name in all_names:
222
+ if any(kw in name for kw in SECURITY_KEYWORDS):
223
+ security_hits += 1
224
+ security_score = min(security_hits / max(node_count, 1), 1.0)
225
+
226
+ # 4. Test coverage gap (0–1), weight 0.15
227
+ tested_count = 0
228
+ for nid in path_ids:
229
+ node = store.get_node_by_id(nid)
230
+ if node:
231
+ tested_by = store.get_edges_to(node["qualified_name"], kind="TESTED_BY")
232
+ if tested_by:
233
+ tested_count += 1
234
+ coverage = tested_count / max(node_count, 1)
235
+ test_gap = 1.0 - coverage
236
+
237
+ # 5. Depth (0–1), weight 0.10
238
+ depth_score = min(depth / 10.0, 1.0)
239
+
240
+ criticality = (
241
+ file_spread * 0.30
242
+ + external_score * 0.20
243
+ + security_score * 0.25
244
+ + test_gap * 0.15
245
+ + depth_score * 0.10
246
+ )
247
+
248
+ return round(min(max(criticality, 0.0), 1.0), 4)
249
+
250
+
251
+ # ==============================================================================
252
+ # Flow Persistence
253
+ # ==============================================================================
254
+
255
+
256
+ def store_flows(store: GraphStore, flows: list[dict]):
257
+ """Persist traced flows to the database."""
258
+ store.clear_flows()
259
+ for flow in flows:
260
+ store.insert_flow(
261
+ name=flow["name"],
262
+ entry_point_id=flow["entry_point_id"],
263
+ depth=flow["depth"],
264
+ node_count=flow["node_count"],
265
+ file_count=flow["file_count"],
266
+ criticality=flow["criticality"],
267
+ path_ids=flow["path_ids"],
268
+ )
269
+
270
+
271
+ # ==============================================================================
272
+ # Formatters
273
+ # ==============================================================================
274
+
275
+
276
+ def format_flow_minimal(flow: dict) -> str:
277
+ """Format a single flow as compact string (<30 tokens).
278
+
279
+ Example: flow login → authenticate → verify_password [auth] crit:0.82 gaps:1
280
+ """
281
+ names = flow["path_names"][:5]
282
+ path_str = " → ".join(names)
283
+ if len(flow["path_names"]) > 5:
284
+ path_str += f" +{len(flow['path_names']) - 5}"
285
+
286
+ # Detect domain tags from names
287
+ tags = set()
288
+ for name in flow["path_names"]:
289
+ name_lower = name.lower()
290
+ if any(kw in name_lower for kw in ("auth", "login", "password", "token", "session")):
291
+ tags.add("auth")
292
+ if any(kw in name_lower for kw in ("db", "query", "sql", "model")):
293
+ tags.add("db")
294
+ if any(kw in name_lower for kw in ("http", "request", "api", "route")):
295
+ tags.add("api")
296
+
297
+ tag_str = f" [{','.join(sorted(tags))}]" if tags else ""
298
+ return f"flow {path_str}{tag_str} crit:{flow['criticality']:.2f}"
299
+
300
+
301
+ def format_flows_minimal(flows: list[dict], limit: int = 10) -> str:
302
+ """Format flow list as compact string."""
303
+ if not flows:
304
+ return "No execution flows detected."
305
+
306
+ lines = [f"{len(flows)} flows detected:"]
307
+ for flow in flows[:limit]:
308
+ lines.append(f" {format_flow_minimal(flow)}")
309
+ if len(flows) > limit:
310
+ lines.append(f" ... +{len(flows) - limit} more")
311
+ return "\n".join(lines)