codegraph-nav 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_nav/__init__.py +194 -0
- codegraph_nav/ast_grep_analyzer.py +448 -0
- codegraph_nav/cli.py +223 -0
- codegraph_nav/code_navigator.py +1328 -0
- codegraph_nav/code_search.py +1009 -0
- codegraph_nav/colors.py +209 -0
- codegraph_nav/completions.py +354 -0
- codegraph_nav/dart_analyzer.py +301 -0
- codegraph_nav/dependency_graph.py +814 -0
- codegraph_nav/domain/__init__.py +20 -0
- codegraph_nav/domain/routes.py +337 -0
- codegraph_nav/domain/schemas.py +229 -0
- codegraph_nav/domain/tags.py +87 -0
- codegraph_nav/exporters.py +563 -0
- codegraph_nav/go_analyzer.py +273 -0
- codegraph_nav/graph/__init__.py +72 -0
- codegraph_nav/graph/builder.py +409 -0
- codegraph_nav/graph/communities.py +402 -0
- codegraph_nav/graph/flows.py +311 -0
- codegraph_nav/graph/query.py +380 -0
- codegraph_nav/graph/schema.py +266 -0
- codegraph_nav/graph/search.py +257 -0
- codegraph_nav/graph/store.py +517 -0
- codegraph_nav/hints.py +195 -0
- codegraph_nav/import_resolver.py +891 -0
- codegraph_nav/js_ts_analyzer.py +564 -0
- codegraph_nav/line_reader.py +664 -0
- codegraph_nav/mcp/__init__.py +39 -0
- codegraph_nav/mcp/__main__.py +5 -0
- codegraph_nav/mcp/server.py +2228 -0
- codegraph_nav/py.typed +2 -0
- codegraph_nav/ruby_analyzer.py +259 -0
- codegraph_nav/rust_analyzer.py +379 -0
- codegraph_nav/token_efficient_renderer.py +743 -0
- codegraph_nav/watcher.py +382 -0
- codegraph_nav-0.1.0.dist-info/METADATA +487 -0
- codegraph_nav-0.1.0.dist-info/RECORD +41 -0
- codegraph_nav-0.1.0.dist-info/WHEEL +5 -0
- codegraph_nav-0.1.0.dist-info/entry_points.txt +4 -0
- codegraph_nav-0.1.0.dist-info/licenses/LICENSE +21 -0
- codegraph_nav-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
"""Community detection — file-based clustering with optional Leiden algorithm.
|
|
2
|
+
|
|
3
|
+
Groups graph nodes into communities based on directory structure (always available)
|
|
4
|
+
or Leiden algorithm (requires igraph). Computes cohesion and coupling metrics.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from collections import Counter, defaultdict
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import cast
|
|
13
|
+
|
|
14
|
+
from .store import GraphStore
|
|
15
|
+
|
|
16
|
+
# Edge weights for Leiden (if igraph available)
|
|
17
|
+
EDGE_WEIGHTS = {
|
|
18
|
+
"CALLS": 1.0,
|
|
19
|
+
"IMPORTS_FROM": 0.5,
|
|
20
|
+
"INHERITS": 0.8,
|
|
21
|
+
"CONTAINS": 0.3,
|
|
22
|
+
"TESTED_BY": 0.4,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# Check igraph availability
|
|
26
|
+
try:
|
|
27
|
+
import igraph
|
|
28
|
+
|
|
29
|
+
HAS_IGRAPH = True
|
|
30
|
+
except ImportError:
|
|
31
|
+
HAS_IGRAPH = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ==============================================================================
|
|
35
|
+
# Community Detection
|
|
36
|
+
# ==============================================================================
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def detect_communities(store: GraphStore, min_size: int = 2) -> list[dict]:
|
|
40
|
+
"""Detect communities. Uses Leiden if igraph available, file-based otherwise."""
|
|
41
|
+
if HAS_IGRAPH:
|
|
42
|
+
try:
|
|
43
|
+
return detect_communities_leiden(store, min_size)
|
|
44
|
+
except Exception:
|
|
45
|
+
pass # Fall back to file-based
|
|
46
|
+
return detect_communities_file_based(store, min_size)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def detect_communities_file_based(store: GraphStore, min_size: int = 2) -> list[dict]:
|
|
50
|
+
"""Group non-File nodes by directory prefix (first 2 components)."""
|
|
51
|
+
nodes = store.get_all_nodes()
|
|
52
|
+
groups: dict[str, list] = defaultdict(list)
|
|
53
|
+
|
|
54
|
+
for node in nodes:
|
|
55
|
+
if node["kind"] == "File":
|
|
56
|
+
continue
|
|
57
|
+
fp = node["file_path"]
|
|
58
|
+
parts = Path(fp).parts
|
|
59
|
+
# Use first 2 directory components as group key
|
|
60
|
+
if len(parts) >= 2:
|
|
61
|
+
prefix = str(Path(parts[0]) / parts[1])
|
|
62
|
+
elif len(parts) == 1:
|
|
63
|
+
prefix = parts[0]
|
|
64
|
+
else:
|
|
65
|
+
prefix = "root"
|
|
66
|
+
groups[prefix].append(node)
|
|
67
|
+
|
|
68
|
+
communities = []
|
|
69
|
+
for prefix, members in groups.items():
|
|
70
|
+
if len(members) < min_size:
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
member_ids = [m["id"] for m in members]
|
|
74
|
+
member_qns = {m["qualified_name"] for m in members}
|
|
75
|
+
|
|
76
|
+
# Compute cohesion
|
|
77
|
+
cohesion = _compute_cohesion(store, member_qns)
|
|
78
|
+
|
|
79
|
+
# Generate name
|
|
80
|
+
name = _generate_community_name(members, prefix)
|
|
81
|
+
|
|
82
|
+
# Extract keywords
|
|
83
|
+
keywords = _extract_keywords(members)
|
|
84
|
+
|
|
85
|
+
communities.append(
|
|
86
|
+
{
|
|
87
|
+
"name": name,
|
|
88
|
+
"node_count": len(members),
|
|
89
|
+
"cohesion": cohesion,
|
|
90
|
+
"file_prefix": prefix,
|
|
91
|
+
"keywords": keywords[:5],
|
|
92
|
+
"member_ids": member_ids,
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
communities.sort(key=lambda c: cast(int, c["node_count"]), reverse=True)
|
|
97
|
+
return communities
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def detect_communities_leiden(store: GraphStore, min_size: int = 2) -> list[dict]:
|
|
101
|
+
"""Leiden community detection using igraph."""
|
|
102
|
+
if not HAS_IGRAPH:
|
|
103
|
+
return detect_communities_file_based(store, min_size)
|
|
104
|
+
|
|
105
|
+
nodes = [n for n in store.get_all_nodes() if n["kind"] != "File"]
|
|
106
|
+
if not nodes:
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
# Build igraph
|
|
110
|
+
qn_to_idx = {n["qualified_name"]: i for i, n in enumerate(nodes)}
|
|
111
|
+
edges_data = []
|
|
112
|
+
weights = []
|
|
113
|
+
|
|
114
|
+
conn = store.conn
|
|
115
|
+
all_edges = conn.execute("SELECT * FROM edges").fetchall()
|
|
116
|
+
|
|
117
|
+
for edge in all_edges:
|
|
118
|
+
src_idx = qn_to_idx.get(edge["source_qualified"])
|
|
119
|
+
tgt_idx = qn_to_idx.get(edge["target_qualified"])
|
|
120
|
+
if src_idx is not None and tgt_idx is not None and src_idx != tgt_idx:
|
|
121
|
+
edges_data.append((src_idx, tgt_idx))
|
|
122
|
+
weights.append(EDGE_WEIGHTS.get(edge["kind"], 0.5))
|
|
123
|
+
|
|
124
|
+
if not edges_data:
|
|
125
|
+
return detect_communities_file_based(store, min_size)
|
|
126
|
+
|
|
127
|
+
g = igraph.Graph(n=len(nodes), edges=edges_data, directed=True)
|
|
128
|
+
g.es["weight"] = weights
|
|
129
|
+
|
|
130
|
+
# Run Leiden
|
|
131
|
+
partition = g.community_leiden(
|
|
132
|
+
objective_function="modularity",
|
|
133
|
+
weights="weight",
|
|
134
|
+
resolution=1.0,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Build communities from partition
|
|
138
|
+
community_nodes: dict[int, list] = defaultdict(list)
|
|
139
|
+
for node_idx, comm_id in enumerate(partition.membership):
|
|
140
|
+
community_nodes[comm_id].append(nodes[node_idx])
|
|
141
|
+
|
|
142
|
+
communities = []
|
|
143
|
+
for comm_id, members in community_nodes.items():
|
|
144
|
+
if len(members) < min_size:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
member_ids = [m["id"] for m in members]
|
|
148
|
+
member_qns = {m["qualified_name"] for m in members}
|
|
149
|
+
|
|
150
|
+
# Compute cohesion
|
|
151
|
+
cohesion = _compute_cohesion(store, member_qns)
|
|
152
|
+
|
|
153
|
+
# File prefix from members
|
|
154
|
+
file_paths = [m["file_path"] for m in members]
|
|
155
|
+
prefix = _extract_file_prefix(file_paths)
|
|
156
|
+
|
|
157
|
+
name = _generate_community_name(members, prefix)
|
|
158
|
+
keywords = _extract_keywords(members)
|
|
159
|
+
|
|
160
|
+
communities.append(
|
|
161
|
+
{
|
|
162
|
+
"name": name,
|
|
163
|
+
"node_count": len(members),
|
|
164
|
+
"cohesion": cohesion,
|
|
165
|
+
"file_prefix": prefix,
|
|
166
|
+
"keywords": keywords[:5],
|
|
167
|
+
"member_ids": member_ids,
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
communities.sort(key=lambda c: cast(int, c["node_count"]), reverse=True)
|
|
172
|
+
return communities
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ==============================================================================
|
|
176
|
+
# Helpers
|
|
177
|
+
# ==============================================================================
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _compute_cohesion(store: GraphStore, member_qns: set[str]) -> float:
|
|
181
|
+
"""Cohesion = internal_edges / (internal_edges + external_edges)."""
|
|
182
|
+
internal = 0
|
|
183
|
+
external = 0
|
|
184
|
+
|
|
185
|
+
for qn in member_qns:
|
|
186
|
+
for edge in store.get_edges_from(qn):
|
|
187
|
+
if edge["target_qualified"] in member_qns:
|
|
188
|
+
internal += 1
|
|
189
|
+
else:
|
|
190
|
+
external += 1
|
|
191
|
+
|
|
192
|
+
total = internal + external
|
|
193
|
+
if total == 0:
|
|
194
|
+
return 0.0
|
|
195
|
+
return round(internal / total, 4)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _generate_community_name(members: list, prefix: str) -> str:
|
|
199
|
+
"""Generate community name from prefix + dominant class/keyword."""
|
|
200
|
+
# Find dominant class (>40% of members)
|
|
201
|
+
classes = [m["name"] for m in members if m["kind"] == "Class"]
|
|
202
|
+
if classes:
|
|
203
|
+
class_counts = Counter(classes)
|
|
204
|
+
top_class, top_count = class_counts.most_common(1)[0]
|
|
205
|
+
if top_count / len(members) > 0.4:
|
|
206
|
+
return f"{Path(prefix).name}-{top_class}"
|
|
207
|
+
|
|
208
|
+
# Use directory name + top keyword
|
|
209
|
+
keywords = _extract_keywords(members)
|
|
210
|
+
if keywords:
|
|
211
|
+
return f"{Path(prefix).name}-{keywords[0]}"
|
|
212
|
+
|
|
213
|
+
return Path(prefix).name
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _extract_file_prefix(file_paths: list[str]) -> str:
|
|
217
|
+
"""Find common directory prefix from file paths."""
|
|
218
|
+
if not file_paths:
|
|
219
|
+
return "root"
|
|
220
|
+
parts_list = [Path(fp).parts for fp in file_paths]
|
|
221
|
+
if not parts_list:
|
|
222
|
+
return "root"
|
|
223
|
+
|
|
224
|
+
prefix_parts = []
|
|
225
|
+
for i in range(min(len(p) for p in parts_list)):
|
|
226
|
+
vals = {p[i] for p in parts_list}
|
|
227
|
+
if len(vals) == 1:
|
|
228
|
+
prefix_parts.append(vals.pop())
|
|
229
|
+
else:
|
|
230
|
+
break
|
|
231
|
+
|
|
232
|
+
return str(Path(*prefix_parts)) if prefix_parts else "root"
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _extract_keywords(members: list) -> list[str]:
|
|
236
|
+
"""Extract frequent keywords from member names via camelCase/snake_case splitting."""
|
|
237
|
+
words: Counter = Counter()
|
|
238
|
+
stop_words = {
|
|
239
|
+
"get",
|
|
240
|
+
"set",
|
|
241
|
+
"is",
|
|
242
|
+
"has",
|
|
243
|
+
"do",
|
|
244
|
+
"to",
|
|
245
|
+
"from",
|
|
246
|
+
"on",
|
|
247
|
+
"the",
|
|
248
|
+
"a",
|
|
249
|
+
"an",
|
|
250
|
+
"test",
|
|
251
|
+
"self",
|
|
252
|
+
"init",
|
|
253
|
+
"new",
|
|
254
|
+
"create",
|
|
255
|
+
"make",
|
|
256
|
+
"build",
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
for m in members:
|
|
260
|
+
name = m["name"]
|
|
261
|
+
# Split camelCase and snake_case
|
|
262
|
+
parts = _split_name(name)
|
|
263
|
+
for part in parts:
|
|
264
|
+
part_lower = part.lower()
|
|
265
|
+
if len(part_lower) > 2 and part_lower not in stop_words:
|
|
266
|
+
words[part_lower] += 1
|
|
267
|
+
|
|
268
|
+
return [w for w, _ in words.most_common(5)]
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _split_name(name: str) -> list[str]:
|
|
272
|
+
"""Split camelCase and snake_case into words."""
|
|
273
|
+
# Snake case
|
|
274
|
+
if "_" in name:
|
|
275
|
+
return [p for p in name.split("_") if p]
|
|
276
|
+
# CamelCase
|
|
277
|
+
parts = re.sub(r"([A-Z])", r" \1", name).split()
|
|
278
|
+
return [p for p in parts if p]
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# ==============================================================================
|
|
282
|
+
# Coupling Warnings
|
|
283
|
+
# ==============================================================================
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def get_coupling_warnings(store: GraphStore, communities: list[dict]) -> list[str]:
|
|
287
|
+
"""Detect cross-community coupling and oversized communities."""
|
|
288
|
+
warnings = []
|
|
289
|
+
|
|
290
|
+
# Build node → community mapping
|
|
291
|
+
node_to_community: dict[int, str] = {}
|
|
292
|
+
for comm in communities:
|
|
293
|
+
for nid in comm["member_ids"]:
|
|
294
|
+
node_to_community[nid] = comm["name"]
|
|
295
|
+
|
|
296
|
+
# Count cross-community edges
|
|
297
|
+
cross_edges: Counter = Counter()
|
|
298
|
+
conn = store.conn
|
|
299
|
+
all_edges = conn.execute(
|
|
300
|
+
"SELECT source_qualified, target_qualified FROM edges WHERE kind = 'CALLS'"
|
|
301
|
+
).fetchall()
|
|
302
|
+
|
|
303
|
+
for edge in all_edges:
|
|
304
|
+
src_node = store.get_node(edge[0])
|
|
305
|
+
tgt_node = store.get_node(edge[1])
|
|
306
|
+
if src_node and tgt_node:
|
|
307
|
+
src_comm = node_to_community.get(src_node["id"])
|
|
308
|
+
tgt_comm = node_to_community.get(tgt_node["id"])
|
|
309
|
+
if src_comm and tgt_comm and src_comm != tgt_comm:
|
|
310
|
+
key = f"{src_comm}→{tgt_comm}"
|
|
311
|
+
cross_edges[key] += 1
|
|
312
|
+
|
|
313
|
+
# Top coupling warnings
|
|
314
|
+
for pair, count in cross_edges.most_common(5):
|
|
315
|
+
if count >= 3:
|
|
316
|
+
warnings.append(f"coupling: {pair} ({count} edges)")
|
|
317
|
+
|
|
318
|
+
# Oversized community warning
|
|
319
|
+
for comm in communities:
|
|
320
|
+
if comm["node_count"] > 50:
|
|
321
|
+
warnings.append(f"WARN: {comm['name']} too large ({comm['node_count']} nodes)")
|
|
322
|
+
|
|
323
|
+
# Low cohesion warning
|
|
324
|
+
for comm in communities:
|
|
325
|
+
if comm["cohesion"] < 0.3 and comm["node_count"] > 5:
|
|
326
|
+
warnings.append(f"WARN: {comm['name']} low cohesion ({comm['cohesion']:.2f})")
|
|
327
|
+
|
|
328
|
+
return warnings
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
# ==============================================================================
|
|
332
|
+
# Persistence
|
|
333
|
+
# ==============================================================================
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def store_communities(store: GraphStore, communities: list[dict]):
|
|
337
|
+
"""Save communities to DB."""
|
|
338
|
+
store.clear_communities()
|
|
339
|
+
for comm in communities:
|
|
340
|
+
cid = store.insert_community(
|
|
341
|
+
name=comm["name"],
|
|
342
|
+
cohesion=comm["cohesion"],
|
|
343
|
+
node_count=comm["node_count"],
|
|
344
|
+
file_prefix=comm.get("file_prefix"),
|
|
345
|
+
keywords=comm.get("keywords"),
|
|
346
|
+
)
|
|
347
|
+
store.add_community_members(cid, comm["member_ids"])
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# ==============================================================================
|
|
351
|
+
# Formatters
|
|
352
|
+
# ==============================================================================
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def format_communities_minimal(communities: list[dict], limit: int = 10) -> str:
|
|
356
|
+
"""One line per community."""
|
|
357
|
+
if not communities:
|
|
358
|
+
return "No communities detected."
|
|
359
|
+
|
|
360
|
+
lines = [f"{len(communities)} communities:"]
|
|
361
|
+
for comm in communities[:limit]:
|
|
362
|
+
kw = ",".join(comm.get("keywords", [])[:3])
|
|
363
|
+
kw_str = f" [{kw}]" if kw else ""
|
|
364
|
+
lines.append(
|
|
365
|
+
f" {comm['name']} size:{comm['node_count']} "
|
|
366
|
+
f"cohesion:{comm['cohesion']:.2f}{kw_str}"
|
|
367
|
+
)
|
|
368
|
+
if len(communities) > limit:
|
|
369
|
+
lines.append(f" ... +{len(communities) - limit} more")
|
|
370
|
+
return "\n".join(lines)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def format_architecture_overview(
|
|
374
|
+
communities: list[dict],
|
|
375
|
+
coupling_warnings: list[str],
|
|
376
|
+
hubs: list[dict],
|
|
377
|
+
flows_count: int,
|
|
378
|
+
) -> str:
|
|
379
|
+
"""Compact architecture summary (<150 tokens)."""
|
|
380
|
+
lines = []
|
|
381
|
+
|
|
382
|
+
# Communities summary
|
|
383
|
+
if communities:
|
|
384
|
+
top = communities[:5]
|
|
385
|
+
comm_strs = [f"{c['name']}({c['node_count']})" for c in top]
|
|
386
|
+
lines.append(f"communities({len(communities)}): {', '.join(comm_strs)}")
|
|
387
|
+
|
|
388
|
+
# Coupling warnings
|
|
389
|
+
if coupling_warnings:
|
|
390
|
+
for w in coupling_warnings[:3]:
|
|
391
|
+
lines.append(f" {w}")
|
|
392
|
+
|
|
393
|
+
# Hubs
|
|
394
|
+
if hubs:
|
|
395
|
+
hub_strs = [f"{h['file']}({h['imports']}←)" for h in hubs[:3]]
|
|
396
|
+
lines.append(f"hubs: {', '.join(hub_strs)}")
|
|
397
|
+
|
|
398
|
+
# Flows
|
|
399
|
+
if flows_count:
|
|
400
|
+
lines.append(f"flows: {flows_count}")
|
|
401
|
+
|
|
402
|
+
return "\n".join(lines) if lines else "No architecture data available."
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""Execution flow detection, BFS tracing, and criticality scoring.
|
|
2
|
+
|
|
3
|
+
Detects entry points via decorator patterns, conventional names, and
|
|
4
|
+
graph topology (no incoming CALLS). Traces flows via forward BFS.
|
|
5
|
+
Scores criticality using a 5-factor weighted formula.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from collections import deque
|
|
13
|
+
|
|
14
|
+
from .schema import MAX_BFS_DEPTH, SECURITY_KEYWORDS
|
|
15
|
+
from .store import GraphStore
|
|
16
|
+
|
|
17
|
+
# ==============================================================================
|
|
18
|
+
# Entry Point Detection
|
|
19
|
+
# ==============================================================================
|
|
20
|
+
|
|
21
|
+
ENTRY_DECORATOR_PATTERN = re.compile(
|
|
22
|
+
r"@(app|router|blueprint)\.(get|post|put|delete|patch|route|websocket)"
|
|
23
|
+
r"|@click\.(command|group)"
|
|
24
|
+
r"|@celery\.task"
|
|
25
|
+
r"|@(api_view|action)"
|
|
26
|
+
r"|@(Get|Post|Put|Delete|Patch|RequestMapping)",
|
|
27
|
+
re.IGNORECASE,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
ENTRY_NAME_PATTERN = re.compile(
|
|
31
|
+
r"^(main|__main__|cli|run|start|setup|app|create_app|entrypoint|handler)$"
|
|
32
|
+
r"|^test_"
|
|
33
|
+
r"|^(on_|handle_|process_|dispatch_)",
|
|
34
|
+
re.IGNORECASE,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def detect_entry_points(store: GraphStore) -> list:
|
|
39
|
+
"""Find execution entry points in the graph.
|
|
40
|
+
|
|
41
|
+
An entry point is a Function/Method node that satisfies ANY of:
|
|
42
|
+
1. No incoming CALLS edges (true root)
|
|
43
|
+
2. Has a framework decorator (e.g., @app.get, @click.command)
|
|
44
|
+
3. Has a conventional entry point name (main, test_*, handle_*)
|
|
45
|
+
|
|
46
|
+
Returns list of node Rows.
|
|
47
|
+
"""
|
|
48
|
+
conn = store.conn
|
|
49
|
+
|
|
50
|
+
# Get all Function/Method nodes
|
|
51
|
+
candidates = conn.execute("SELECT * FROM nodes WHERE kind IN ('Function', 'Method')").fetchall()
|
|
52
|
+
|
|
53
|
+
entry_points = []
|
|
54
|
+
seen = set()
|
|
55
|
+
|
|
56
|
+
for node in candidates:
|
|
57
|
+
qn = node["qualified_name"]
|
|
58
|
+
if qn in seen:
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
is_entry = False
|
|
62
|
+
|
|
63
|
+
# Strategy 1: No incoming CALLS
|
|
64
|
+
incoming_calls = store.get_edges_to(qn, kind="CALLS")
|
|
65
|
+
if not incoming_calls:
|
|
66
|
+
is_entry = True
|
|
67
|
+
|
|
68
|
+
# Strategy 2: Decorator pattern
|
|
69
|
+
if not is_entry:
|
|
70
|
+
extra = node["extra"] or "{}"
|
|
71
|
+
try:
|
|
72
|
+
extra_data = json.loads(extra)
|
|
73
|
+
except (json.JSONDecodeError, TypeError):
|
|
74
|
+
extra_data = {}
|
|
75
|
+
decorators = extra_data.get("decorators", [])
|
|
76
|
+
for dec in decorators:
|
|
77
|
+
if ENTRY_DECORATOR_PATTERN.search(dec):
|
|
78
|
+
is_entry = True
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
# Strategy 3: Name pattern
|
|
82
|
+
if not is_entry and ENTRY_NAME_PATTERN.search(node["name"]):
|
|
83
|
+
is_entry = True
|
|
84
|
+
|
|
85
|
+
if is_entry:
|
|
86
|
+
seen.add(qn)
|
|
87
|
+
entry_points.append(node)
|
|
88
|
+
|
|
89
|
+
return entry_points
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ==============================================================================
|
|
93
|
+
# BFS Flow Tracing
|
|
94
|
+
# ==============================================================================
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def trace_flows(
|
|
98
|
+
store: GraphStore,
|
|
99
|
+
max_depth: int = MAX_BFS_DEPTH,
|
|
100
|
+
limit: int = 100,
|
|
101
|
+
) -> list[dict]:
|
|
102
|
+
"""Trace execution flows from entry points via forward BFS.
|
|
103
|
+
|
|
104
|
+
Returns list of flow dicts, each with: name, entry_point, path, depth,
|
|
105
|
+
node_count, file_count, criticality.
|
|
106
|
+
"""
|
|
107
|
+
entry_points = detect_entry_points(store)
|
|
108
|
+
|
|
109
|
+
# Skip test entry points for flow tracing (too many)
|
|
110
|
+
entry_points = [ep for ep in entry_points if not ep["is_test"]]
|
|
111
|
+
|
|
112
|
+
flows = []
|
|
113
|
+
for ep in entry_points[:limit]:
|
|
114
|
+
flow = _trace_single_flow(store, ep, max_depth)
|
|
115
|
+
if flow:
|
|
116
|
+
flows.append(flow)
|
|
117
|
+
|
|
118
|
+
# Sort by criticality
|
|
119
|
+
flows.sort(key=lambda f: f["criticality"], reverse=True)
|
|
120
|
+
return flows
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _trace_single_flow(
|
|
124
|
+
store: GraphStore,
|
|
125
|
+
entry_point,
|
|
126
|
+
max_depth: int = MAX_BFS_DEPTH,
|
|
127
|
+
) -> dict | None:
|
|
128
|
+
"""BFS from a single entry point, following forward CALLS edges."""
|
|
129
|
+
queue = deque([(entry_point["qualified_name"], 0)])
|
|
130
|
+
visited = {entry_point["qualified_name"]}
|
|
131
|
+
path_ids = [entry_point["id"]]
|
|
132
|
+
path_names = [entry_point["name"]]
|
|
133
|
+
files = {entry_point["file_path"]}
|
|
134
|
+
max_reached_depth = 0
|
|
135
|
+
|
|
136
|
+
while queue:
|
|
137
|
+
current_qn, depth = queue.popleft()
|
|
138
|
+
if depth >= max_depth:
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
# Follow forward CALLS edges
|
|
142
|
+
edges = store.get_edges_from(current_qn, kind="CALLS")
|
|
143
|
+
for edge in edges:
|
|
144
|
+
target_qn = edge["target_qualified"]
|
|
145
|
+
if target_qn in visited or target_qn.startswith("__unresolved__"):
|
|
146
|
+
continue
|
|
147
|
+
visited.add(target_qn)
|
|
148
|
+
|
|
149
|
+
target_node = store.get_node(target_qn)
|
|
150
|
+
if target_node:
|
|
151
|
+
path_ids.append(target_node["id"])
|
|
152
|
+
path_names.append(target_node["name"])
|
|
153
|
+
files.add(target_node["file_path"])
|
|
154
|
+
max_reached_depth = max(max_reached_depth, depth + 1)
|
|
155
|
+
queue.append((target_qn, depth + 1))
|
|
156
|
+
|
|
157
|
+
# Skip trivial flows
|
|
158
|
+
if len(path_ids) < 2:
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
criticality = compute_criticality(path_ids, path_names, files, max_reached_depth, store)
|
|
162
|
+
|
|
163
|
+
return {
|
|
164
|
+
"name": entry_point["name"],
|
|
165
|
+
"entry_point": entry_point["qualified_name"],
|
|
166
|
+
"entry_point_id": entry_point["id"],
|
|
167
|
+
"path_ids": path_ids,
|
|
168
|
+
"path_names": path_names,
|
|
169
|
+
"depth": max_reached_depth,
|
|
170
|
+
"node_count": len(path_ids),
|
|
171
|
+
"file_count": len(files),
|
|
172
|
+
"files": sorted(files),
|
|
173
|
+
"criticality": criticality,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ==============================================================================
|
|
178
|
+
# Criticality Scoring
|
|
179
|
+
# ==============================================================================
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def compute_criticality(
|
|
183
|
+
path_ids: list[int],
|
|
184
|
+
path_names: list[str],
|
|
185
|
+
files: set[str],
|
|
186
|
+
depth: int,
|
|
187
|
+
store: GraphStore,
|
|
188
|
+
) -> float:
|
|
189
|
+
"""Compute flow criticality score (0.0–1.0).
|
|
190
|
+
|
|
191
|
+
Factors:
|
|
192
|
+
- file_spread (0.30): How many files the flow touches
|
|
193
|
+
- external_calls (0.20): Unresolved/external dependencies
|
|
194
|
+
- security_hits (0.25): Symbols with security-related names
|
|
195
|
+
- test_gap (0.15): Fraction of flow nodes without tests
|
|
196
|
+
- depth (0.10): How deep the call chain goes
|
|
197
|
+
"""
|
|
198
|
+
node_count = len(path_ids)
|
|
199
|
+
if node_count == 0:
|
|
200
|
+
return 0.0
|
|
201
|
+
|
|
202
|
+
# 1. File spread (0–1), weight 0.30
|
|
203
|
+
file_count = len(files)
|
|
204
|
+
file_spread = min((file_count - 1) / 4.0, 1.0) if file_count > 1 else 0.0
|
|
205
|
+
|
|
206
|
+
# 2. External calls (0–1), weight 0.20
|
|
207
|
+
# Count edges from flow nodes to unresolved targets
|
|
208
|
+
external_count = 0
|
|
209
|
+
for nid in path_ids:
|
|
210
|
+
node = store.get_node_by_id(nid)
|
|
211
|
+
if node:
|
|
212
|
+
edges = store.get_edges_from(node["qualified_name"], kind="CALLS")
|
|
213
|
+
external_count += sum(
|
|
214
|
+
1 for e in edges if e["target_qualified"].startswith("__unresolved__")
|
|
215
|
+
)
|
|
216
|
+
external_score = min(external_count / 5.0, 1.0)
|
|
217
|
+
|
|
218
|
+
# 3. Security sensitivity (0–1), weight 0.25
|
|
219
|
+
security_hits = 0
|
|
220
|
+
all_names = [n.lower() for n in path_names]
|
|
221
|
+
for name in all_names:
|
|
222
|
+
if any(kw in name for kw in SECURITY_KEYWORDS):
|
|
223
|
+
security_hits += 1
|
|
224
|
+
security_score = min(security_hits / max(node_count, 1), 1.0)
|
|
225
|
+
|
|
226
|
+
# 4. Test coverage gap (0–1), weight 0.15
|
|
227
|
+
tested_count = 0
|
|
228
|
+
for nid in path_ids:
|
|
229
|
+
node = store.get_node_by_id(nid)
|
|
230
|
+
if node:
|
|
231
|
+
tested_by = store.get_edges_to(node["qualified_name"], kind="TESTED_BY")
|
|
232
|
+
if tested_by:
|
|
233
|
+
tested_count += 1
|
|
234
|
+
coverage = tested_count / max(node_count, 1)
|
|
235
|
+
test_gap = 1.0 - coverage
|
|
236
|
+
|
|
237
|
+
# 5. Depth (0–1), weight 0.10
|
|
238
|
+
depth_score = min(depth / 10.0, 1.0)
|
|
239
|
+
|
|
240
|
+
criticality = (
|
|
241
|
+
file_spread * 0.30
|
|
242
|
+
+ external_score * 0.20
|
|
243
|
+
+ security_score * 0.25
|
|
244
|
+
+ test_gap * 0.15
|
|
245
|
+
+ depth_score * 0.10
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
return round(min(max(criticality, 0.0), 1.0), 4)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# ==============================================================================
|
|
252
|
+
# Flow Persistence
|
|
253
|
+
# ==============================================================================
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def store_flows(store: GraphStore, flows: list[dict]):
|
|
257
|
+
"""Persist traced flows to the database."""
|
|
258
|
+
store.clear_flows()
|
|
259
|
+
for flow in flows:
|
|
260
|
+
store.insert_flow(
|
|
261
|
+
name=flow["name"],
|
|
262
|
+
entry_point_id=flow["entry_point_id"],
|
|
263
|
+
depth=flow["depth"],
|
|
264
|
+
node_count=flow["node_count"],
|
|
265
|
+
file_count=flow["file_count"],
|
|
266
|
+
criticality=flow["criticality"],
|
|
267
|
+
path_ids=flow["path_ids"],
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# ==============================================================================
|
|
272
|
+
# Formatters
|
|
273
|
+
# ==============================================================================
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def format_flow_minimal(flow: dict) -> str:
|
|
277
|
+
"""Format a single flow as compact string (<30 tokens).
|
|
278
|
+
|
|
279
|
+
Example: flow login → authenticate → verify_password [auth] crit:0.82 gaps:1
|
|
280
|
+
"""
|
|
281
|
+
names = flow["path_names"][:5]
|
|
282
|
+
path_str = " → ".join(names)
|
|
283
|
+
if len(flow["path_names"]) > 5:
|
|
284
|
+
path_str += f" +{len(flow['path_names']) - 5}"
|
|
285
|
+
|
|
286
|
+
# Detect domain tags from names
|
|
287
|
+
tags = set()
|
|
288
|
+
for name in flow["path_names"]:
|
|
289
|
+
name_lower = name.lower()
|
|
290
|
+
if any(kw in name_lower for kw in ("auth", "login", "password", "token", "session")):
|
|
291
|
+
tags.add("auth")
|
|
292
|
+
if any(kw in name_lower for kw in ("db", "query", "sql", "model")):
|
|
293
|
+
tags.add("db")
|
|
294
|
+
if any(kw in name_lower for kw in ("http", "request", "api", "route")):
|
|
295
|
+
tags.add("api")
|
|
296
|
+
|
|
297
|
+
tag_str = f" [{','.join(sorted(tags))}]" if tags else ""
|
|
298
|
+
return f"flow {path_str}{tag_str} crit:{flow['criticality']:.2f}"
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def format_flows_minimal(flows: list[dict], limit: int = 10) -> str:
|
|
302
|
+
"""Format flow list as compact string."""
|
|
303
|
+
if not flows:
|
|
304
|
+
return "No execution flows detected."
|
|
305
|
+
|
|
306
|
+
lines = [f"{len(flows)} flows detected:"]
|
|
307
|
+
for flow in flows[:limit]:
|
|
308
|
+
lines.append(f" {format_flow_minimal(flow)}")
|
|
309
|
+
if len(flows) > limit:
|
|
310
|
+
lines.append(f" ... +{len(flows) - limit} more")
|
|
311
|
+
return "\n".join(lines)
|