codegraph-nav 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_nav/__init__.py +194 -0
- codegraph_nav/ast_grep_analyzer.py +448 -0
- codegraph_nav/cli.py +223 -0
- codegraph_nav/code_navigator.py +1328 -0
- codegraph_nav/code_search.py +1009 -0
- codegraph_nav/colors.py +209 -0
- codegraph_nav/completions.py +354 -0
- codegraph_nav/dart_analyzer.py +301 -0
- codegraph_nav/dependency_graph.py +814 -0
- codegraph_nav/domain/__init__.py +20 -0
- codegraph_nav/domain/routes.py +337 -0
- codegraph_nav/domain/schemas.py +229 -0
- codegraph_nav/domain/tags.py +87 -0
- codegraph_nav/exporters.py +563 -0
- codegraph_nav/go_analyzer.py +273 -0
- codegraph_nav/graph/__init__.py +72 -0
- codegraph_nav/graph/builder.py +409 -0
- codegraph_nav/graph/communities.py +402 -0
- codegraph_nav/graph/flows.py +311 -0
- codegraph_nav/graph/query.py +380 -0
- codegraph_nav/graph/schema.py +266 -0
- codegraph_nav/graph/search.py +257 -0
- codegraph_nav/graph/store.py +517 -0
- codegraph_nav/hints.py +195 -0
- codegraph_nav/import_resolver.py +891 -0
- codegraph_nav/js_ts_analyzer.py +564 -0
- codegraph_nav/line_reader.py +664 -0
- codegraph_nav/mcp/__init__.py +39 -0
- codegraph_nav/mcp/__main__.py +5 -0
- codegraph_nav/mcp/server.py +2228 -0
- codegraph_nav/py.typed +2 -0
- codegraph_nav/ruby_analyzer.py +259 -0
- codegraph_nav/rust_analyzer.py +379 -0
- codegraph_nav/token_efficient_renderer.py +743 -0
- codegraph_nav/watcher.py +382 -0
- codegraph_nav-0.1.0.dist-info/METADATA +487 -0
- codegraph_nav-0.1.0.dist-info/RECORD +41 -0
- codegraph_nav-0.1.0.dist-info/WHEEL +5 -0
- codegraph_nav-0.1.0.dist-info/entry_points.txt +4 -0
- codegraph_nav-0.1.0.dist-info/licenses/LICENSE +21 -0
- codegraph_nav-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""Graph queries — blast radius, change impact, risk scoring.
|
|
2
|
+
|
|
3
|
+
All queries use parameterized SQL. Blast radius uses recursive CTEs for
|
|
4
|
+
efficient transitive closure. Risk scoring adapts code-review-graph's
|
|
5
|
+
proven 5-factor formula.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
import subprocess
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from .schema import (
|
|
15
|
+
BATCH_SIZE,
|
|
16
|
+
MAX_IMPACT_DEPTH,
|
|
17
|
+
MAX_IMPACT_NODES,
|
|
18
|
+
SECURITY_KEYWORDS,
|
|
19
|
+
)
|
|
20
|
+
from .store import GraphStore
|
|
21
|
+
|
|
22
|
+
# ==============================================================================
|
|
23
|
+
# Blast Radius
|
|
24
|
+
# ==============================================================================
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_blast_radius(
|
|
28
|
+
store: GraphStore,
|
|
29
|
+
changed_files: list[str],
|
|
30
|
+
max_depth: int = MAX_IMPACT_DEPTH,
|
|
31
|
+
max_nodes: int = MAX_IMPACT_NODES,
|
|
32
|
+
) -> dict:
|
|
33
|
+
"""Compute transitive impact of file changes using recursive CTE.
|
|
34
|
+
|
|
35
|
+
Returns dict with changed_nodes, impacted_nodes, impacted_files, etc.
|
|
36
|
+
"""
|
|
37
|
+
if not changed_files:
|
|
38
|
+
return {
|
|
39
|
+
"changed_files": [],
|
|
40
|
+
"changed_nodes": 0,
|
|
41
|
+
"impacted_nodes": 0,
|
|
42
|
+
"impacted_files": [],
|
|
43
|
+
"direct_impacted": 0,
|
|
44
|
+
"transitive_impacted": 0,
|
|
45
|
+
"truncated": False,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
conn = store.conn
|
|
49
|
+
|
|
50
|
+
# Seed: all nodes in changed files
|
|
51
|
+
placeholders = ",".join("?" for _ in changed_files)
|
|
52
|
+
seed_rows = conn.execute(
|
|
53
|
+
f"SELECT qualified_name FROM nodes WHERE file_path IN ({placeholders})",
|
|
54
|
+
changed_files,
|
|
55
|
+
).fetchall()
|
|
56
|
+
seed_qns = {row[0] for row in seed_rows}
|
|
57
|
+
|
|
58
|
+
if not seed_qns:
|
|
59
|
+
return {
|
|
60
|
+
"changed_files": changed_files,
|
|
61
|
+
"changed_nodes": 0,
|
|
62
|
+
"impacted_nodes": 0,
|
|
63
|
+
"impacted_files": [],
|
|
64
|
+
"direct_impacted": 0,
|
|
65
|
+
"transitive_impacted": 0,
|
|
66
|
+
"truncated": False,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# Use temp table for seeds (avoids variable limit)
|
|
70
|
+
conn.execute("CREATE TEMP TABLE IF NOT EXISTS _blast_seeds (qn TEXT)")
|
|
71
|
+
conn.execute("DELETE FROM _blast_seeds")
|
|
72
|
+
seed_list = list(seed_qns)
|
|
73
|
+
for i in range(0, len(seed_list), BATCH_SIZE):
|
|
74
|
+
batch = [(qn,) for qn in seed_list[i : i + BATCH_SIZE]]
|
|
75
|
+
conn.executemany("INSERT INTO _blast_seeds (qn) VALUES (?)", batch)
|
|
76
|
+
|
|
77
|
+
# Recursive CTE: bidirectional BFS
|
|
78
|
+
rows = conn.execute(
|
|
79
|
+
"""
|
|
80
|
+
WITH RECURSIVE impacted(node_qn, depth) AS (
|
|
81
|
+
SELECT qn, 0 FROM _blast_seeds
|
|
82
|
+
UNION
|
|
83
|
+
SELECT e.target_qualified, i.depth + 1
|
|
84
|
+
FROM impacted i
|
|
85
|
+
JOIN edges e ON e.source_qualified = i.node_qn
|
|
86
|
+
WHERE i.depth < ?
|
|
87
|
+
UNION
|
|
88
|
+
SELECT e.source_qualified, i.depth + 1
|
|
89
|
+
FROM impacted i
|
|
90
|
+
JOIN edges e ON e.target_qualified = i.node_qn
|
|
91
|
+
WHERE i.depth < ?
|
|
92
|
+
)
|
|
93
|
+
SELECT DISTINCT node_qn, MIN(depth) as min_depth
|
|
94
|
+
FROM impacted
|
|
95
|
+
GROUP BY node_qn
|
|
96
|
+
LIMIT ?
|
|
97
|
+
""",
|
|
98
|
+
(max_depth, max_depth, max_nodes),
|
|
99
|
+
).fetchall()
|
|
100
|
+
|
|
101
|
+
conn.execute("DROP TABLE IF EXISTS _blast_seeds")
|
|
102
|
+
|
|
103
|
+
# Categorize
|
|
104
|
+
impacted_qns = {}
|
|
105
|
+
for row in rows:
|
|
106
|
+
impacted_qns[row[0]] = row[1]
|
|
107
|
+
|
|
108
|
+
# Get file paths for impacted nodes
|
|
109
|
+
all_qns = set(impacted_qns.keys())
|
|
110
|
+
impacted_nodes = store.batch_get_nodes(all_qns)
|
|
111
|
+
impacted_files = sorted({n["file_path"] for n in impacted_nodes} - set(changed_files))
|
|
112
|
+
|
|
113
|
+
direct = sum(1 for d in impacted_qns.values() if d == 1)
|
|
114
|
+
transitive = sum(1 for d in impacted_qns.values() if d > 1)
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
"changed_files": changed_files,
|
|
118
|
+
"changed_nodes": len(seed_qns),
|
|
119
|
+
"impacted_nodes": len(impacted_qns) - len(seed_qns),
|
|
120
|
+
"impacted_files": impacted_files,
|
|
121
|
+
"direct_impacted": direct,
|
|
122
|
+
"transitive_impacted": transitive,
|
|
123
|
+
"truncated": len(rows) >= max_nodes,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def format_blast_radius_minimal(result: dict) -> str:
|
|
128
|
+
"""Format blast radius as compact string (<150 tokens)."""
|
|
129
|
+
files = result["changed_files"]
|
|
130
|
+
files_str = ", ".join(Path(f).name for f in files[:3])
|
|
131
|
+
if len(files) > 3:
|
|
132
|
+
files_str += f" +{len(files) - 3}"
|
|
133
|
+
|
|
134
|
+
lines = [
|
|
135
|
+
f"blast({files_str}): {result['impacted_nodes']} nodes · "
|
|
136
|
+
f"{len(result['impacted_files'])} files impacted"
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
if result["impacted_files"]:
|
|
140
|
+
direct_files = result["impacted_files"][:5]
|
|
141
|
+
lines.append(f" affected: {', '.join(Path(f).name for f in direct_files)}")
|
|
142
|
+
if len(result["impacted_files"]) > 5:
|
|
143
|
+
lines[-1] += f" +{len(result['impacted_files']) - 5} more"
|
|
144
|
+
|
|
145
|
+
if result["truncated"]:
|
|
146
|
+
lines.append(" (truncated)")
|
|
147
|
+
|
|
148
|
+
return "\n".join(lines)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# ==============================================================================
|
|
152
|
+
# Git Diff Parsing
|
|
153
|
+
# ==============================================================================
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def parse_git_diff_ranges(
|
|
157
|
+
root_path: str,
|
|
158
|
+
base: str = "HEAD~1",
|
|
159
|
+
) -> dict[str, list[tuple[int, int]]]:
|
|
160
|
+
"""Parse git diff --unified=0 into changed line ranges per file.
|
|
161
|
+
|
|
162
|
+
Returns: {file_path: [(start_line, end_line), ...]}
|
|
163
|
+
"""
|
|
164
|
+
try:
|
|
165
|
+
result = subprocess.run(
|
|
166
|
+
["git", "diff", "--unified=0", base, "--"],
|
|
167
|
+
cwd=root_path,
|
|
168
|
+
capture_output=True,
|
|
169
|
+
text=True,
|
|
170
|
+
timeout=30,
|
|
171
|
+
)
|
|
172
|
+
if result.returncode != 0:
|
|
173
|
+
return {}
|
|
174
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
175
|
+
return {}
|
|
176
|
+
|
|
177
|
+
ranges: dict[str, list[tuple[int, int]]] = {}
|
|
178
|
+
current_file = None
|
|
179
|
+
|
|
180
|
+
for line in result.stdout.split("\n"):
|
|
181
|
+
if line.startswith("+++ b/"):
|
|
182
|
+
current_file = line[6:]
|
|
183
|
+
elif line.startswith("@@ ") and current_file:
|
|
184
|
+
# Parse hunk header: @@ -old,count +new,count @@
|
|
185
|
+
m = re.search(r"\+(\d+)(?:,(\d+))?", line)
|
|
186
|
+
if m:
|
|
187
|
+
start = int(m.group(1))
|
|
188
|
+
count = int(m.group(2)) if m.group(2) else 1
|
|
189
|
+
if count > 0:
|
|
190
|
+
end = start + count - 1
|
|
191
|
+
ranges.setdefault(current_file, []).append((start, end))
|
|
192
|
+
|
|
193
|
+
return ranges
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def map_changes_to_nodes(
|
|
197
|
+
store: GraphStore,
|
|
198
|
+
changed_ranges: dict[str, list[tuple[int, int]]],
|
|
199
|
+
) -> list[dict]:
|
|
200
|
+
"""Map changed line ranges to graph nodes.
|
|
201
|
+
|
|
202
|
+
Returns list of {"node": Row, "file": str, "risk": float}.
|
|
203
|
+
"""
|
|
204
|
+
changed_nodes = []
|
|
205
|
+
for file_path, line_ranges in changed_ranges.items():
|
|
206
|
+
nodes = store.get_nodes_by_file(file_path)
|
|
207
|
+
for node in nodes:
|
|
208
|
+
if node["kind"] == "File":
|
|
209
|
+
continue
|
|
210
|
+
n_start = node["line_start"] or 0
|
|
211
|
+
n_end = node["line_end"] or n_start
|
|
212
|
+
for start, end in line_ranges:
|
|
213
|
+
if n_start <= end and n_end >= start: # Overlap
|
|
214
|
+
changed_nodes.append(
|
|
215
|
+
{
|
|
216
|
+
"node": node,
|
|
217
|
+
"file": file_path,
|
|
218
|
+
"risk": 0.0,
|
|
219
|
+
}
|
|
220
|
+
)
|
|
221
|
+
break
|
|
222
|
+
return changed_nodes
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ==============================================================================
|
|
226
|
+
# Risk Scoring
|
|
227
|
+
# ==============================================================================
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def compute_node_risk(store: GraphStore, node) -> float:
|
|
231
|
+
"""Compute risk score (0.0-1.0) for a single node.
|
|
232
|
+
|
|
233
|
+
Factors:
|
|
234
|
+
- flow_membership: min(count * 0.05, 0.25)
|
|
235
|
+
- cross_file_callers: min(count * 0.05, 0.15)
|
|
236
|
+
- untested: 0.30 if no TESTED_BY, else 0.05
|
|
237
|
+
- security_keyword: 0.20 if name matches
|
|
238
|
+
- caller_count: min(count / 20, 0.10)
|
|
239
|
+
"""
|
|
240
|
+
score = 0.0
|
|
241
|
+
qn = node["qualified_name"]
|
|
242
|
+
|
|
243
|
+
# 1. Flow membership
|
|
244
|
+
flow_count = store.count_flow_memberships(node["id"])
|
|
245
|
+
score += min(flow_count * 0.05, 0.25)
|
|
246
|
+
|
|
247
|
+
# 2. Cross-file callers
|
|
248
|
+
callers = store.get_edges_to(qn, kind="CALLS")
|
|
249
|
+
cross_file = sum(1 for e in callers if e["file_path"] != node["file_path"])
|
|
250
|
+
score += min(cross_file * 0.05, 0.15)
|
|
251
|
+
|
|
252
|
+
# 3. Test coverage
|
|
253
|
+
tested_by = store.get_edges_to(qn, kind="TESTED_BY")
|
|
254
|
+
score += 0.05 if tested_by else 0.30
|
|
255
|
+
|
|
256
|
+
# 4. Security keywords
|
|
257
|
+
name_lower = node["name"].lower()
|
|
258
|
+
qn_lower = qn.lower()
|
|
259
|
+
if any(kw in name_lower or kw in qn_lower for kw in SECURITY_KEYWORDS):
|
|
260
|
+
score += 0.20
|
|
261
|
+
|
|
262
|
+
# 5. Caller count
|
|
263
|
+
score += min(len(callers) / 20.0, 0.10)
|
|
264
|
+
|
|
265
|
+
return round(min(max(score, 0.0), 1.0), 4)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def detect_changes(
|
|
269
|
+
store: GraphStore,
|
|
270
|
+
root_path: str,
|
|
271
|
+
base: str = "HEAD~1",
|
|
272
|
+
) -> dict:
|
|
273
|
+
"""Full change impact analysis: git diff → risk scores.
|
|
274
|
+
|
|
275
|
+
Returns dict with overall_risk, changed_nodes, test_gaps, affected_flows.
|
|
276
|
+
"""
|
|
277
|
+
# Parse git diff
|
|
278
|
+
changed_ranges = parse_git_diff_ranges(root_path, base)
|
|
279
|
+
if not changed_ranges:
|
|
280
|
+
return {
|
|
281
|
+
"overall_risk": 0.0,
|
|
282
|
+
"risk_level": "NONE",
|
|
283
|
+
"changed_files": 0,
|
|
284
|
+
"changed_nodes": [],
|
|
285
|
+
"test_gaps": [],
|
|
286
|
+
"affected_flows": 0,
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
# Map to nodes
|
|
290
|
+
changed_nodes = map_changes_to_nodes(store, changed_ranges)
|
|
291
|
+
|
|
292
|
+
# Compute risk per node
|
|
293
|
+
for item in changed_nodes:
|
|
294
|
+
item["risk"] = compute_node_risk(store, item["node"])
|
|
295
|
+
|
|
296
|
+
# Sort by risk descending
|
|
297
|
+
changed_nodes.sort(key=lambda x: x["risk"], reverse=True)
|
|
298
|
+
|
|
299
|
+
# Overall risk = max node risk
|
|
300
|
+
overall_risk = max((n["risk"] for n in changed_nodes), default=0.0)
|
|
301
|
+
|
|
302
|
+
# Risk level
|
|
303
|
+
if overall_risk >= 0.7:
|
|
304
|
+
risk_level = "HIGH"
|
|
305
|
+
elif overall_risk >= 0.4:
|
|
306
|
+
risk_level = "MEDIUM"
|
|
307
|
+
elif overall_risk > 0:
|
|
308
|
+
risk_level = "LOW"
|
|
309
|
+
else:
|
|
310
|
+
risk_level = "NONE"
|
|
311
|
+
|
|
312
|
+
# Test gaps
|
|
313
|
+
test_gaps = []
|
|
314
|
+
for item in changed_nodes:
|
|
315
|
+
qn = item["node"]["qualified_name"]
|
|
316
|
+
tested_by = store.get_edges_to(qn, kind="TESTED_BY")
|
|
317
|
+
if not tested_by and item["node"]["kind"] in ("Function", "Method"):
|
|
318
|
+
test_gaps.append(item["node"]["name"])
|
|
319
|
+
|
|
320
|
+
# Affected flows
|
|
321
|
+
affected_flow_ids = set()
|
|
322
|
+
for item in changed_nodes:
|
|
323
|
+
memberships = store.get_flow_memberships(item["node"]["id"])
|
|
324
|
+
for m in memberships:
|
|
325
|
+
affected_flow_ids.add(m["flow_id"])
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
"overall_risk": round(overall_risk, 4),
|
|
329
|
+
"risk_level": risk_level,
|
|
330
|
+
"changed_files": len(changed_ranges),
|
|
331
|
+
"changed_nodes": [
|
|
332
|
+
{
|
|
333
|
+
"name": item["node"]["name"],
|
|
334
|
+
"file": item["file"],
|
|
335
|
+
"risk": item["risk"],
|
|
336
|
+
"kind": item["node"]["kind"],
|
|
337
|
+
}
|
|
338
|
+
for item in changed_nodes
|
|
339
|
+
],
|
|
340
|
+
"test_gaps": test_gaps,
|
|
341
|
+
"affected_flows": len(affected_flow_ids),
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def format_changes_minimal(result: dict) -> str:
|
|
346
|
+
"""Format change impact as compact string (<100 tokens)."""
|
|
347
|
+
lines = [
|
|
348
|
+
f"risk: {result['overall_risk']:.2f} {result['risk_level']} | "
|
|
349
|
+
f"{result['changed_files']} files · {len(result['changed_nodes'])} symbols changed"
|
|
350
|
+
]
|
|
351
|
+
|
|
352
|
+
# Group by file, show top risks
|
|
353
|
+
by_file: dict[str, list] = {}
|
|
354
|
+
for n in result["changed_nodes"]:
|
|
355
|
+
by_file.setdefault(n["file"], []).append(n)
|
|
356
|
+
|
|
357
|
+
for fpath, nodes in sorted(by_file.items(), key=lambda x: -max(n["risk"] for n in x[1])):
|
|
358
|
+
top_nodes = sorted(nodes, key=lambda x: -x["risk"])[:3]
|
|
359
|
+
node_strs = [f"{n['name']}({n['risk']:.2f})" for n in top_nodes]
|
|
360
|
+
level = (
|
|
361
|
+
"HIGH"
|
|
362
|
+
if top_nodes[0]["risk"] >= 0.7
|
|
363
|
+
else "MED" if top_nodes[0]["risk"] >= 0.4 else "LOW"
|
|
364
|
+
)
|
|
365
|
+
lines.append(f" {level} {Path(fpath).name}: {', '.join(node_strs)}")
|
|
366
|
+
if len(lines) > 5:
|
|
367
|
+
lines.append(f" ... +{len(by_file) - 4} more files")
|
|
368
|
+
break
|
|
369
|
+
|
|
370
|
+
if result["test_gaps"]:
|
|
371
|
+
gaps = result["test_gaps"][:5]
|
|
372
|
+
gaps_str = ", ".join(gaps)
|
|
373
|
+
if len(result["test_gaps"]) > 5:
|
|
374
|
+
gaps_str += f" +{len(result['test_gaps']) - 5}"
|
|
375
|
+
lines.append(f" gaps: {gaps_str}")
|
|
376
|
+
|
|
377
|
+
if result["affected_flows"]:
|
|
378
|
+
lines.append(f" flows: {result['affected_flows']} affected")
|
|
379
|
+
|
|
380
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""Graph database schema, migrations, and constants.
|
|
2
|
+
|
|
3
|
+
Defines the SQLite schema for the optional graph layer (.codegraph.db).
|
|
4
|
+
All tables use IF NOT EXISTS for safe re-creation. Migrations are applied
|
|
5
|
+
incrementally via ensure_schema().
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import sqlite3
|
|
11
|
+
|
|
12
|
+
# ==============================================================================
|
|
13
|
+
# Constants
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
|
|
16
|
+
SCHEMA_VERSION = 4
|
|
17
|
+
|
|
18
|
+
EDGE_KINDS = ("CALLS", "IMPORTS_FROM", "CONTAINS", "INHERITS", "TESTED_BY")
|
|
19
|
+
NODE_KINDS = ("File", "Class", "Function", "Method", "Variable")
|
|
20
|
+
|
|
21
|
+
SECURITY_KEYWORDS = frozenset(
|
|
22
|
+
{
|
|
23
|
+
"auth",
|
|
24
|
+
"login",
|
|
25
|
+
"password",
|
|
26
|
+
"token",
|
|
27
|
+
"session",
|
|
28
|
+
"crypt",
|
|
29
|
+
"secret",
|
|
30
|
+
"credential",
|
|
31
|
+
"permission",
|
|
32
|
+
"sql",
|
|
33
|
+
"query",
|
|
34
|
+
"execute",
|
|
35
|
+
"connect",
|
|
36
|
+
"socket",
|
|
37
|
+
"request",
|
|
38
|
+
"http",
|
|
39
|
+
"sanitize",
|
|
40
|
+
"validate",
|
|
41
|
+
"encrypt",
|
|
42
|
+
"decrypt",
|
|
43
|
+
"hash",
|
|
44
|
+
"sign",
|
|
45
|
+
"verify",
|
|
46
|
+
"admin",
|
|
47
|
+
"privilege",
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
MAX_IMPACT_DEPTH = 2
|
|
52
|
+
MAX_IMPACT_NODES = 500
|
|
53
|
+
MAX_BFS_DEPTH = 15
|
|
54
|
+
BATCH_SIZE = 450 # Stay under SQLite's 999-variable limit
|
|
55
|
+
|
|
56
|
+
# ==============================================================================
|
|
57
|
+
# SQL DDL
|
|
58
|
+
# ==============================================================================
|
|
59
|
+
|
|
60
|
+
SQL_CREATE_NODES = """
|
|
61
|
+
CREATE TABLE IF NOT EXISTS nodes (
|
|
62
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
63
|
+
kind TEXT NOT NULL,
|
|
64
|
+
name TEXT NOT NULL,
|
|
65
|
+
qualified_name TEXT NOT NULL UNIQUE,
|
|
66
|
+
file_path TEXT NOT NULL,
|
|
67
|
+
line_start INTEGER,
|
|
68
|
+
line_end INTEGER,
|
|
69
|
+
language TEXT,
|
|
70
|
+
parent_name TEXT,
|
|
71
|
+
signature TEXT,
|
|
72
|
+
is_test INTEGER DEFAULT 0,
|
|
73
|
+
file_hash TEXT,
|
|
74
|
+
extra TEXT DEFAULT '{}',
|
|
75
|
+
updated_at REAL NOT NULL
|
|
76
|
+
)
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
SQL_CREATE_EDGES = """
|
|
80
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
81
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
82
|
+
kind TEXT NOT NULL,
|
|
83
|
+
source_qualified TEXT NOT NULL,
|
|
84
|
+
target_qualified TEXT NOT NULL,
|
|
85
|
+
file_path TEXT NOT NULL,
|
|
86
|
+
extra TEXT DEFAULT '{}',
|
|
87
|
+
updated_at REAL NOT NULL
|
|
88
|
+
)
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
SQL_CREATE_METADATA = """
|
|
92
|
+
CREATE TABLE IF NOT EXISTS metadata (
|
|
93
|
+
key TEXT PRIMARY KEY,
|
|
94
|
+
value TEXT NOT NULL
|
|
95
|
+
)
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
SQL_CREATE_FLOWS = """
|
|
99
|
+
CREATE TABLE IF NOT EXISTS flows (
|
|
100
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
101
|
+
name TEXT NOT NULL,
|
|
102
|
+
entry_point_id INTEGER NOT NULL,
|
|
103
|
+
depth INTEGER NOT NULL,
|
|
104
|
+
node_count INTEGER NOT NULL,
|
|
105
|
+
file_count INTEGER NOT NULL,
|
|
106
|
+
criticality REAL NOT NULL DEFAULT 0.0,
|
|
107
|
+
path_json TEXT NOT NULL,
|
|
108
|
+
updated_at REAL NOT NULL
|
|
109
|
+
)
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
SQL_CREATE_FLOW_MEMBERSHIPS = """
|
|
113
|
+
CREATE TABLE IF NOT EXISTS flow_memberships (
|
|
114
|
+
flow_id INTEGER NOT NULL,
|
|
115
|
+
node_id INTEGER NOT NULL,
|
|
116
|
+
position INTEGER NOT NULL,
|
|
117
|
+
PRIMARY KEY (flow_id, node_id)
|
|
118
|
+
)
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
SQL_CREATE_INDEXES = [
|
|
122
|
+
"CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(file_path)",
|
|
123
|
+
"CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind)",
|
|
124
|
+
"CREATE INDEX IF NOT EXISTS idx_nodes_qn ON nodes(qualified_name)",
|
|
125
|
+
"CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_qualified)",
|
|
126
|
+
"CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_qualified)",
|
|
127
|
+
"CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind)",
|
|
128
|
+
"CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path)",
|
|
129
|
+
"CREATE INDEX IF NOT EXISTS idx_flows_crit ON flows(criticality DESC)",
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
SQL_CREATE_FTS = """
|
|
133
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(
|
|
134
|
+
name, qualified_name, file_path, signature,
|
|
135
|
+
tokenize='porter unicode61'
|
|
136
|
+
)
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
SQL_CREATE_COMMUNITIES = """
|
|
140
|
+
CREATE TABLE IF NOT EXISTS communities (
|
|
141
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
142
|
+
name TEXT NOT NULL,
|
|
143
|
+
node_count INTEGER NOT NULL DEFAULT 0,
|
|
144
|
+
cohesion REAL NOT NULL DEFAULT 0.0,
|
|
145
|
+
file_prefix TEXT,
|
|
146
|
+
keywords TEXT DEFAULT '[]',
|
|
147
|
+
updated_at REAL NOT NULL
|
|
148
|
+
)
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
SQL_CREATE_COMMUNITY_MEMBERS = """
|
|
152
|
+
CREATE TABLE IF NOT EXISTS community_members (
|
|
153
|
+
community_id INTEGER NOT NULL,
|
|
154
|
+
node_id INTEGER NOT NULL,
|
|
155
|
+
PRIMARY KEY (community_id, node_id)
|
|
156
|
+
)
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
SQL_CREATE_ROUTES = """
|
|
160
|
+
CREATE TABLE IF NOT EXISTS routes (
|
|
161
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
162
|
+
method TEXT NOT NULL,
|
|
163
|
+
path TEXT NOT NULL,
|
|
164
|
+
file_path TEXT NOT NULL,
|
|
165
|
+
handler_name TEXT,
|
|
166
|
+
framework TEXT,
|
|
167
|
+
tags TEXT DEFAULT '[]',
|
|
168
|
+
confidence TEXT DEFAULT 'high',
|
|
169
|
+
updated_at REAL NOT NULL
|
|
170
|
+
)
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
SQL_CREATE_SCHEMAS = """
|
|
174
|
+
CREATE TABLE IF NOT EXISTS schemas (
|
|
175
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
176
|
+
name TEXT NOT NULL,
|
|
177
|
+
file_path TEXT NOT NULL,
|
|
178
|
+
orm TEXT,
|
|
179
|
+
fields TEXT DEFAULT '[]',
|
|
180
|
+
relations TEXT DEFAULT '[]',
|
|
181
|
+
updated_at REAL NOT NULL
|
|
182
|
+
)
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
SQL_CREATE_INDEXES_V4 = [
|
|
186
|
+
"CREATE INDEX IF NOT EXISTS idx_cm_community ON community_members(community_id)",
|
|
187
|
+
"CREATE INDEX IF NOT EXISTS idx_cm_node ON community_members(node_id)",
|
|
188
|
+
"CREATE INDEX IF NOT EXISTS idx_routes_file ON routes(file_path)",
|
|
189
|
+
"CREATE INDEX IF NOT EXISTS idx_schemas_file ON schemas(file_path)",
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
# ==============================================================================
|
|
193
|
+
# Migrations
|
|
194
|
+
# ==============================================================================
|
|
195
|
+
|
|
196
|
+
MIGRATIONS = {
|
|
197
|
+
1: [
|
|
198
|
+
SQL_CREATE_NODES,
|
|
199
|
+
SQL_CREATE_EDGES,
|
|
200
|
+
SQL_CREATE_METADATA,
|
|
201
|
+
*SQL_CREATE_INDEXES[:7], # Node + edge indexes
|
|
202
|
+
],
|
|
203
|
+
2: [
|
|
204
|
+
SQL_CREATE_FLOWS,
|
|
205
|
+
SQL_CREATE_FLOW_MEMBERSHIPS,
|
|
206
|
+
SQL_CREATE_INDEXES[7], # flows criticality index
|
|
207
|
+
],
|
|
208
|
+
3: [
|
|
209
|
+
# FTS5 — may fail if not compiled in; that's OK
|
|
210
|
+
],
|
|
211
|
+
4: [
|
|
212
|
+
SQL_CREATE_COMMUNITIES,
|
|
213
|
+
SQL_CREATE_COMMUNITY_MEMBERS,
|
|
214
|
+
SQL_CREATE_ROUTES,
|
|
215
|
+
SQL_CREATE_SCHEMAS,
|
|
216
|
+
*SQL_CREATE_INDEXES_V4,
|
|
217
|
+
],
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def ensure_schema(conn: sqlite3.Connection) -> int:
|
|
222
|
+
"""Apply pending migrations. Returns current schema version."""
|
|
223
|
+
# Ensure metadata table exists first
|
|
224
|
+
conn.execute(SQL_CREATE_METADATA)
|
|
225
|
+
|
|
226
|
+
row = conn.execute("SELECT value FROM metadata WHERE key = 'schema_version'").fetchone()
|
|
227
|
+
current = int(row[0]) if row else 0
|
|
228
|
+
|
|
229
|
+
for version in sorted(MIGRATIONS.keys()):
|
|
230
|
+
if version <= current:
|
|
231
|
+
continue
|
|
232
|
+
for sql in MIGRATIONS[version]:
|
|
233
|
+
conn.execute(sql)
|
|
234
|
+
|
|
235
|
+
# Try FTS5 for version 3
|
|
236
|
+
if version == 3:
|
|
237
|
+
try:
|
|
238
|
+
conn.execute(SQL_CREATE_FTS)
|
|
239
|
+
except sqlite3.OperationalError:
|
|
240
|
+
pass # FTS5 not available, skip
|
|
241
|
+
|
|
242
|
+
conn.execute(
|
|
243
|
+
"INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?)",
|
|
244
|
+
(str(version),),
|
|
245
|
+
)
|
|
246
|
+
conn.commit()
|
|
247
|
+
|
|
248
|
+
row = conn.execute("SELECT value FROM metadata WHERE key = 'schema_version'").fetchone()
|
|
249
|
+
return int(row[0]) if row else 0
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def make_qualified_name(file_path: str, name: str, parent: str | None = None) -> str:
|
|
253
|
+
"""Build a qualified name: file_path::parent.name or file_path::name."""
|
|
254
|
+
if parent:
|
|
255
|
+
return f"{file_path}::{parent}.{name}"
|
|
256
|
+
return f"{file_path}::{name}"
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def is_fts5_available(conn: sqlite3.Connection) -> bool:
|
|
260
|
+
"""Check if FTS5 is compiled into this SQLite build."""
|
|
261
|
+
try:
|
|
262
|
+
conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS _fts_probe USING fts5(x)")
|
|
263
|
+
conn.execute("DROP TABLE IF EXISTS _fts_probe")
|
|
264
|
+
return True
|
|
265
|
+
except sqlite3.OperationalError:
|
|
266
|
+
return False
|