codegraph-nav 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. codegraph_nav/__init__.py +194 -0
  2. codegraph_nav/ast_grep_analyzer.py +448 -0
  3. codegraph_nav/cli.py +223 -0
  4. codegraph_nav/code_navigator.py +1328 -0
  5. codegraph_nav/code_search.py +1009 -0
  6. codegraph_nav/colors.py +209 -0
  7. codegraph_nav/completions.py +354 -0
  8. codegraph_nav/dart_analyzer.py +301 -0
  9. codegraph_nav/dependency_graph.py +814 -0
  10. codegraph_nav/domain/__init__.py +20 -0
  11. codegraph_nav/domain/routes.py +337 -0
  12. codegraph_nav/domain/schemas.py +229 -0
  13. codegraph_nav/domain/tags.py +87 -0
  14. codegraph_nav/exporters.py +563 -0
  15. codegraph_nav/go_analyzer.py +273 -0
  16. codegraph_nav/graph/__init__.py +72 -0
  17. codegraph_nav/graph/builder.py +409 -0
  18. codegraph_nav/graph/communities.py +402 -0
  19. codegraph_nav/graph/flows.py +311 -0
  20. codegraph_nav/graph/query.py +380 -0
  21. codegraph_nav/graph/schema.py +266 -0
  22. codegraph_nav/graph/search.py +257 -0
  23. codegraph_nav/graph/store.py +517 -0
  24. codegraph_nav/hints.py +195 -0
  25. codegraph_nav/import_resolver.py +891 -0
  26. codegraph_nav/js_ts_analyzer.py +564 -0
  27. codegraph_nav/line_reader.py +664 -0
  28. codegraph_nav/mcp/__init__.py +39 -0
  29. codegraph_nav/mcp/__main__.py +5 -0
  30. codegraph_nav/mcp/server.py +2228 -0
  31. codegraph_nav/py.typed +2 -0
  32. codegraph_nav/ruby_analyzer.py +259 -0
  33. codegraph_nav/rust_analyzer.py +379 -0
  34. codegraph_nav/token_efficient_renderer.py +743 -0
  35. codegraph_nav/watcher.py +382 -0
  36. codegraph_nav-0.1.0.dist-info/METADATA +487 -0
  37. codegraph_nav-0.1.0.dist-info/RECORD +41 -0
  38. codegraph_nav-0.1.0.dist-info/WHEEL +5 -0
  39. codegraph_nav-0.1.0.dist-info/entry_points.txt +4 -0
  40. codegraph_nav-0.1.0.dist-info/licenses/LICENSE +21 -0
  41. codegraph_nav-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,380 @@
1
+ """Graph queries — blast radius, change impact, risk scoring.
2
+
3
+ All queries use parameterized SQL. Blast radius uses recursive CTEs for
4
+ efficient transitive closure. Risk scoring adapts code-review-graph's
5
+ proven 5-factor formula.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ import subprocess
12
+ from pathlib import Path
13
+
14
+ from .schema import (
15
+ BATCH_SIZE,
16
+ MAX_IMPACT_DEPTH,
17
+ MAX_IMPACT_NODES,
18
+ SECURITY_KEYWORDS,
19
+ )
20
+ from .store import GraphStore
21
+
22
+ # ==============================================================================
23
+ # Blast Radius
24
+ # ==============================================================================
25
+
26
+
27
+ def get_blast_radius(
28
+ store: GraphStore,
29
+ changed_files: list[str],
30
+ max_depth: int = MAX_IMPACT_DEPTH,
31
+ max_nodes: int = MAX_IMPACT_NODES,
32
+ ) -> dict:
33
+ """Compute transitive impact of file changes using recursive CTE.
34
+
35
+ Returns dict with changed_nodes, impacted_nodes, impacted_files, etc.
36
+ """
37
+ if not changed_files:
38
+ return {
39
+ "changed_files": [],
40
+ "changed_nodes": 0,
41
+ "impacted_nodes": 0,
42
+ "impacted_files": [],
43
+ "direct_impacted": 0,
44
+ "transitive_impacted": 0,
45
+ "truncated": False,
46
+ }
47
+
48
+ conn = store.conn
49
+
50
+ # Seed: all nodes in changed files
51
+ placeholders = ",".join("?" for _ in changed_files)
52
+ seed_rows = conn.execute(
53
+ f"SELECT qualified_name FROM nodes WHERE file_path IN ({placeholders})",
54
+ changed_files,
55
+ ).fetchall()
56
+ seed_qns = {row[0] for row in seed_rows}
57
+
58
+ if not seed_qns:
59
+ return {
60
+ "changed_files": changed_files,
61
+ "changed_nodes": 0,
62
+ "impacted_nodes": 0,
63
+ "impacted_files": [],
64
+ "direct_impacted": 0,
65
+ "transitive_impacted": 0,
66
+ "truncated": False,
67
+ }
68
+
69
+ # Use temp table for seeds (avoids variable limit)
70
+ conn.execute("CREATE TEMP TABLE IF NOT EXISTS _blast_seeds (qn TEXT)")
71
+ conn.execute("DELETE FROM _blast_seeds")
72
+ seed_list = list(seed_qns)
73
+ for i in range(0, len(seed_list), BATCH_SIZE):
74
+ batch = [(qn,) for qn in seed_list[i : i + BATCH_SIZE]]
75
+ conn.executemany("INSERT INTO _blast_seeds (qn) VALUES (?)", batch)
76
+
77
+ # Recursive CTE: bidirectional BFS
78
+ rows = conn.execute(
79
+ """
80
+ WITH RECURSIVE impacted(node_qn, depth) AS (
81
+ SELECT qn, 0 FROM _blast_seeds
82
+ UNION
83
+ SELECT e.target_qualified, i.depth + 1
84
+ FROM impacted i
85
+ JOIN edges e ON e.source_qualified = i.node_qn
86
+ WHERE i.depth < ?
87
+ UNION
88
+ SELECT e.source_qualified, i.depth + 1
89
+ FROM impacted i
90
+ JOIN edges e ON e.target_qualified = i.node_qn
91
+ WHERE i.depth < ?
92
+ )
93
+ SELECT DISTINCT node_qn, MIN(depth) as min_depth
94
+ FROM impacted
95
+ GROUP BY node_qn
96
+ LIMIT ?
97
+ """,
98
+ (max_depth, max_depth, max_nodes),
99
+ ).fetchall()
100
+
101
+ conn.execute("DROP TABLE IF EXISTS _blast_seeds")
102
+
103
+ # Categorize
104
+ impacted_qns = {}
105
+ for row in rows:
106
+ impacted_qns[row[0]] = row[1]
107
+
108
+ # Get file paths for impacted nodes
109
+ all_qns = set(impacted_qns.keys())
110
+ impacted_nodes = store.batch_get_nodes(all_qns)
111
+ impacted_files = sorted({n["file_path"] for n in impacted_nodes} - set(changed_files))
112
+
113
+ direct = sum(1 for d in impacted_qns.values() if d == 1)
114
+ transitive = sum(1 for d in impacted_qns.values() if d > 1)
115
+
116
+ return {
117
+ "changed_files": changed_files,
118
+ "changed_nodes": len(seed_qns),
119
+ "impacted_nodes": len(impacted_qns) - len(seed_qns),
120
+ "impacted_files": impacted_files,
121
+ "direct_impacted": direct,
122
+ "transitive_impacted": transitive,
123
+ "truncated": len(rows) >= max_nodes,
124
+ }
125
+
126
+
127
+ def format_blast_radius_minimal(result: dict) -> str:
128
+ """Format blast radius as compact string (<150 tokens)."""
129
+ files = result["changed_files"]
130
+ files_str = ", ".join(Path(f).name for f in files[:3])
131
+ if len(files) > 3:
132
+ files_str += f" +{len(files) - 3}"
133
+
134
+ lines = [
135
+ f"blast({files_str}): {result['impacted_nodes']} nodes · "
136
+ f"{len(result['impacted_files'])} files impacted"
137
+ ]
138
+
139
+ if result["impacted_files"]:
140
+ direct_files = result["impacted_files"][:5]
141
+ lines.append(f" affected: {', '.join(Path(f).name for f in direct_files)}")
142
+ if len(result["impacted_files"]) > 5:
143
+ lines[-1] += f" +{len(result['impacted_files']) - 5} more"
144
+
145
+ if result["truncated"]:
146
+ lines.append(" (truncated)")
147
+
148
+ return "\n".join(lines)
149
+
150
+
151
+ # ==============================================================================
152
+ # Git Diff Parsing
153
+ # ==============================================================================
154
+
155
+
156
+ def parse_git_diff_ranges(
157
+ root_path: str,
158
+ base: str = "HEAD~1",
159
+ ) -> dict[str, list[tuple[int, int]]]:
160
+ """Parse git diff --unified=0 into changed line ranges per file.
161
+
162
+ Returns: {file_path: [(start_line, end_line), ...]}
163
+ """
164
+ try:
165
+ result = subprocess.run(
166
+ ["git", "diff", "--unified=0", base, "--"],
167
+ cwd=root_path,
168
+ capture_output=True,
169
+ text=True,
170
+ timeout=30,
171
+ )
172
+ if result.returncode != 0:
173
+ return {}
174
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
175
+ return {}
176
+
177
+ ranges: dict[str, list[tuple[int, int]]] = {}
178
+ current_file = None
179
+
180
+ for line in result.stdout.split("\n"):
181
+ if line.startswith("+++ b/"):
182
+ current_file = line[6:]
183
+ elif line.startswith("@@ ") and current_file:
184
+ # Parse hunk header: @@ -old,count +new,count @@
185
+ m = re.search(r"\+(\d+)(?:,(\d+))?", line)
186
+ if m:
187
+ start = int(m.group(1))
188
+ count = int(m.group(2)) if m.group(2) else 1
189
+ if count > 0:
190
+ end = start + count - 1
191
+ ranges.setdefault(current_file, []).append((start, end))
192
+
193
+ return ranges
194
+
195
+
196
+ def map_changes_to_nodes(
197
+ store: GraphStore,
198
+ changed_ranges: dict[str, list[tuple[int, int]]],
199
+ ) -> list[dict]:
200
+ """Map changed line ranges to graph nodes.
201
+
202
+ Returns list of {"node": Row, "file": str, "risk": float}.
203
+ """
204
+ changed_nodes = []
205
+ for file_path, line_ranges in changed_ranges.items():
206
+ nodes = store.get_nodes_by_file(file_path)
207
+ for node in nodes:
208
+ if node["kind"] == "File":
209
+ continue
210
+ n_start = node["line_start"] or 0
211
+ n_end = node["line_end"] or n_start
212
+ for start, end in line_ranges:
213
+ if n_start <= end and n_end >= start: # Overlap
214
+ changed_nodes.append(
215
+ {
216
+ "node": node,
217
+ "file": file_path,
218
+ "risk": 0.0,
219
+ }
220
+ )
221
+ break
222
+ return changed_nodes
223
+
224
+
225
+ # ==============================================================================
226
+ # Risk Scoring
227
+ # ==============================================================================
228
+
229
+
230
+ def compute_node_risk(store: GraphStore, node) -> float:
231
+ """Compute risk score (0.0-1.0) for a single node.
232
+
233
+ Factors:
234
+ - flow_membership: min(count * 0.05, 0.25)
235
+ - cross_file_callers: min(count * 0.05, 0.15)
236
+ - untested: 0.30 if no TESTED_BY, else 0.05
237
+ - security_keyword: 0.20 if name matches
238
+ - caller_count: min(count / 20, 0.10)
239
+ """
240
+ score = 0.0
241
+ qn = node["qualified_name"]
242
+
243
+ # 1. Flow membership
244
+ flow_count = store.count_flow_memberships(node["id"])
245
+ score += min(flow_count * 0.05, 0.25)
246
+
247
+ # 2. Cross-file callers
248
+ callers = store.get_edges_to(qn, kind="CALLS")
249
+ cross_file = sum(1 for e in callers if e["file_path"] != node["file_path"])
250
+ score += min(cross_file * 0.05, 0.15)
251
+
252
+ # 3. Test coverage
253
+ tested_by = store.get_edges_to(qn, kind="TESTED_BY")
254
+ score += 0.05 if tested_by else 0.30
255
+
256
+ # 4. Security keywords
257
+ name_lower = node["name"].lower()
258
+ qn_lower = qn.lower()
259
+ if any(kw in name_lower or kw in qn_lower for kw in SECURITY_KEYWORDS):
260
+ score += 0.20
261
+
262
+ # 5. Caller count
263
+ score += min(len(callers) / 20.0, 0.10)
264
+
265
+ return round(min(max(score, 0.0), 1.0), 4)
266
+
267
+
268
+ def detect_changes(
269
+ store: GraphStore,
270
+ root_path: str,
271
+ base: str = "HEAD~1",
272
+ ) -> dict:
273
+ """Full change impact analysis: git diff → risk scores.
274
+
275
+ Returns dict with overall_risk, changed_nodes, test_gaps, affected_flows.
276
+ """
277
+ # Parse git diff
278
+ changed_ranges = parse_git_diff_ranges(root_path, base)
279
+ if not changed_ranges:
280
+ return {
281
+ "overall_risk": 0.0,
282
+ "risk_level": "NONE",
283
+ "changed_files": 0,
284
+ "changed_nodes": [],
285
+ "test_gaps": [],
286
+ "affected_flows": 0,
287
+ }
288
+
289
+ # Map to nodes
290
+ changed_nodes = map_changes_to_nodes(store, changed_ranges)
291
+
292
+ # Compute risk per node
293
+ for item in changed_nodes:
294
+ item["risk"] = compute_node_risk(store, item["node"])
295
+
296
+ # Sort by risk descending
297
+ changed_nodes.sort(key=lambda x: x["risk"], reverse=True)
298
+
299
+ # Overall risk = max node risk
300
+ overall_risk = max((n["risk"] for n in changed_nodes), default=0.0)
301
+
302
+ # Risk level
303
+ if overall_risk >= 0.7:
304
+ risk_level = "HIGH"
305
+ elif overall_risk >= 0.4:
306
+ risk_level = "MEDIUM"
307
+ elif overall_risk > 0:
308
+ risk_level = "LOW"
309
+ else:
310
+ risk_level = "NONE"
311
+
312
+ # Test gaps
313
+ test_gaps = []
314
+ for item in changed_nodes:
315
+ qn = item["node"]["qualified_name"]
316
+ tested_by = store.get_edges_to(qn, kind="TESTED_BY")
317
+ if not tested_by and item["node"]["kind"] in ("Function", "Method"):
318
+ test_gaps.append(item["node"]["name"])
319
+
320
+ # Affected flows
321
+ affected_flow_ids = set()
322
+ for item in changed_nodes:
323
+ memberships = store.get_flow_memberships(item["node"]["id"])
324
+ for m in memberships:
325
+ affected_flow_ids.add(m["flow_id"])
326
+
327
+ return {
328
+ "overall_risk": round(overall_risk, 4),
329
+ "risk_level": risk_level,
330
+ "changed_files": len(changed_ranges),
331
+ "changed_nodes": [
332
+ {
333
+ "name": item["node"]["name"],
334
+ "file": item["file"],
335
+ "risk": item["risk"],
336
+ "kind": item["node"]["kind"],
337
+ }
338
+ for item in changed_nodes
339
+ ],
340
+ "test_gaps": test_gaps,
341
+ "affected_flows": len(affected_flow_ids),
342
+ }
343
+
344
+
345
+ def format_changes_minimal(result: dict) -> str:
346
+ """Format change impact as compact string (<100 tokens)."""
347
+ lines = [
348
+ f"risk: {result['overall_risk']:.2f} {result['risk_level']} | "
349
+ f"{result['changed_files']} files · {len(result['changed_nodes'])} symbols changed"
350
+ ]
351
+
352
+ # Group by file, show top risks
353
+ by_file: dict[str, list] = {}
354
+ for n in result["changed_nodes"]:
355
+ by_file.setdefault(n["file"], []).append(n)
356
+
357
+ for fpath, nodes in sorted(by_file.items(), key=lambda x: -max(n["risk"] for n in x[1])):
358
+ top_nodes = sorted(nodes, key=lambda x: -x["risk"])[:3]
359
+ node_strs = [f"{n['name']}({n['risk']:.2f})" for n in top_nodes]
360
+ level = (
361
+ "HIGH"
362
+ if top_nodes[0]["risk"] >= 0.7
363
+ else "MED" if top_nodes[0]["risk"] >= 0.4 else "LOW"
364
+ )
365
+ lines.append(f" {level} {Path(fpath).name}: {', '.join(node_strs)}")
366
+ if len(lines) > 5:
367
+ lines.append(f" ... +{len(by_file) - 4} more files")
368
+ break
369
+
370
+ if result["test_gaps"]:
371
+ gaps = result["test_gaps"][:5]
372
+ gaps_str = ", ".join(gaps)
373
+ if len(result["test_gaps"]) > 5:
374
+ gaps_str += f" +{len(result['test_gaps']) - 5}"
375
+ lines.append(f" gaps: {gaps_str}")
376
+
377
+ if result["affected_flows"]:
378
+ lines.append(f" flows: {result['affected_flows']} affected")
379
+
380
+ return "\n".join(lines)
@@ -0,0 +1,266 @@
1
+ """Graph database schema, migrations, and constants.
2
+
3
+ Defines the SQLite schema for the optional graph layer (.codegraph.db).
4
+ All tables use IF NOT EXISTS for safe re-creation. Migrations are applied
5
+ incrementally via ensure_schema().
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import sqlite3
11
+
12
+ # ==============================================================================
13
+ # Constants
14
+ # ==============================================================================
15
+
16
+ SCHEMA_VERSION = 4
17
+
18
+ EDGE_KINDS = ("CALLS", "IMPORTS_FROM", "CONTAINS", "INHERITS", "TESTED_BY")
19
+ NODE_KINDS = ("File", "Class", "Function", "Method", "Variable")
20
+
21
+ SECURITY_KEYWORDS = frozenset(
22
+ {
23
+ "auth",
24
+ "login",
25
+ "password",
26
+ "token",
27
+ "session",
28
+ "crypt",
29
+ "secret",
30
+ "credential",
31
+ "permission",
32
+ "sql",
33
+ "query",
34
+ "execute",
35
+ "connect",
36
+ "socket",
37
+ "request",
38
+ "http",
39
+ "sanitize",
40
+ "validate",
41
+ "encrypt",
42
+ "decrypt",
43
+ "hash",
44
+ "sign",
45
+ "verify",
46
+ "admin",
47
+ "privilege",
48
+ }
49
+ )
50
+
51
+ MAX_IMPACT_DEPTH = 2
52
+ MAX_IMPACT_NODES = 500
53
+ MAX_BFS_DEPTH = 15
54
+ BATCH_SIZE = 450 # Stay under SQLite's 999-variable limit
55
+
56
+ # ==============================================================================
57
+ # SQL DDL
58
+ # ==============================================================================
59
+
60
+ SQL_CREATE_NODES = """
61
+ CREATE TABLE IF NOT EXISTS nodes (
62
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
63
+ kind TEXT NOT NULL,
64
+ name TEXT NOT NULL,
65
+ qualified_name TEXT NOT NULL UNIQUE,
66
+ file_path TEXT NOT NULL,
67
+ line_start INTEGER,
68
+ line_end INTEGER,
69
+ language TEXT,
70
+ parent_name TEXT,
71
+ signature TEXT,
72
+ is_test INTEGER DEFAULT 0,
73
+ file_hash TEXT,
74
+ extra TEXT DEFAULT '{}',
75
+ updated_at REAL NOT NULL
76
+ )
77
+ """
78
+
79
+ SQL_CREATE_EDGES = """
80
+ CREATE TABLE IF NOT EXISTS edges (
81
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
82
+ kind TEXT NOT NULL,
83
+ source_qualified TEXT NOT NULL,
84
+ target_qualified TEXT NOT NULL,
85
+ file_path TEXT NOT NULL,
86
+ extra TEXT DEFAULT '{}',
87
+ updated_at REAL NOT NULL
88
+ )
89
+ """
90
+
91
+ SQL_CREATE_METADATA = """
92
+ CREATE TABLE IF NOT EXISTS metadata (
93
+ key TEXT PRIMARY KEY,
94
+ value TEXT NOT NULL
95
+ )
96
+ """
97
+
98
+ SQL_CREATE_FLOWS = """
99
+ CREATE TABLE IF NOT EXISTS flows (
100
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
101
+ name TEXT NOT NULL,
102
+ entry_point_id INTEGER NOT NULL,
103
+ depth INTEGER NOT NULL,
104
+ node_count INTEGER NOT NULL,
105
+ file_count INTEGER NOT NULL,
106
+ criticality REAL NOT NULL DEFAULT 0.0,
107
+ path_json TEXT NOT NULL,
108
+ updated_at REAL NOT NULL
109
+ )
110
+ """
111
+
112
+ SQL_CREATE_FLOW_MEMBERSHIPS = """
113
+ CREATE TABLE IF NOT EXISTS flow_memberships (
114
+ flow_id INTEGER NOT NULL,
115
+ node_id INTEGER NOT NULL,
116
+ position INTEGER NOT NULL,
117
+ PRIMARY KEY (flow_id, node_id)
118
+ )
119
+ """
120
+
121
+ SQL_CREATE_INDEXES = [
122
+ "CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(file_path)",
123
+ "CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind)",
124
+ "CREATE INDEX IF NOT EXISTS idx_nodes_qn ON nodes(qualified_name)",
125
+ "CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_qualified)",
126
+ "CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_qualified)",
127
+ "CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind)",
128
+ "CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path)",
129
+ "CREATE INDEX IF NOT EXISTS idx_flows_crit ON flows(criticality DESC)",
130
+ ]
131
+
132
+ SQL_CREATE_FTS = """
133
+ CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(
134
+ name, qualified_name, file_path, signature,
135
+ tokenize='porter unicode61'
136
+ )
137
+ """
138
+
139
+ SQL_CREATE_COMMUNITIES = """
140
+ CREATE TABLE IF NOT EXISTS communities (
141
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
142
+ name TEXT NOT NULL,
143
+ node_count INTEGER NOT NULL DEFAULT 0,
144
+ cohesion REAL NOT NULL DEFAULT 0.0,
145
+ file_prefix TEXT,
146
+ keywords TEXT DEFAULT '[]',
147
+ updated_at REAL NOT NULL
148
+ )
149
+ """
150
+
151
+ SQL_CREATE_COMMUNITY_MEMBERS = """
152
+ CREATE TABLE IF NOT EXISTS community_members (
153
+ community_id INTEGER NOT NULL,
154
+ node_id INTEGER NOT NULL,
155
+ PRIMARY KEY (community_id, node_id)
156
+ )
157
+ """
158
+
159
+ SQL_CREATE_ROUTES = """
160
+ CREATE TABLE IF NOT EXISTS routes (
161
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
162
+ method TEXT NOT NULL,
163
+ path TEXT NOT NULL,
164
+ file_path TEXT NOT NULL,
165
+ handler_name TEXT,
166
+ framework TEXT,
167
+ tags TEXT DEFAULT '[]',
168
+ confidence TEXT DEFAULT 'high',
169
+ updated_at REAL NOT NULL
170
+ )
171
+ """
172
+
173
+ SQL_CREATE_SCHEMAS = """
174
+ CREATE TABLE IF NOT EXISTS schemas (
175
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
176
+ name TEXT NOT NULL,
177
+ file_path TEXT NOT NULL,
178
+ orm TEXT,
179
+ fields TEXT DEFAULT '[]',
180
+ relations TEXT DEFAULT '[]',
181
+ updated_at REAL NOT NULL
182
+ )
183
+ """
184
+
185
+ SQL_CREATE_INDEXES_V4 = [
186
+ "CREATE INDEX IF NOT EXISTS idx_cm_community ON community_members(community_id)",
187
+ "CREATE INDEX IF NOT EXISTS idx_cm_node ON community_members(node_id)",
188
+ "CREATE INDEX IF NOT EXISTS idx_routes_file ON routes(file_path)",
189
+ "CREATE INDEX IF NOT EXISTS idx_schemas_file ON schemas(file_path)",
190
+ ]
191
+
192
+ # ==============================================================================
193
+ # Migrations
194
+ # ==============================================================================
195
+
196
+ MIGRATIONS = {
197
+ 1: [
198
+ SQL_CREATE_NODES,
199
+ SQL_CREATE_EDGES,
200
+ SQL_CREATE_METADATA,
201
+ *SQL_CREATE_INDEXES[:7], # Node + edge indexes
202
+ ],
203
+ 2: [
204
+ SQL_CREATE_FLOWS,
205
+ SQL_CREATE_FLOW_MEMBERSHIPS,
206
+ SQL_CREATE_INDEXES[7], # flows criticality index
207
+ ],
208
+ 3: [
209
+ # FTS5 — may fail if not compiled in; that's OK
210
+ ],
211
+ 4: [
212
+ SQL_CREATE_COMMUNITIES,
213
+ SQL_CREATE_COMMUNITY_MEMBERS,
214
+ SQL_CREATE_ROUTES,
215
+ SQL_CREATE_SCHEMAS,
216
+ *SQL_CREATE_INDEXES_V4,
217
+ ],
218
+ }
219
+
220
+
221
+ def ensure_schema(conn: sqlite3.Connection) -> int:
222
+ """Apply pending migrations. Returns current schema version."""
223
+ # Ensure metadata table exists first
224
+ conn.execute(SQL_CREATE_METADATA)
225
+
226
+ row = conn.execute("SELECT value FROM metadata WHERE key = 'schema_version'").fetchone()
227
+ current = int(row[0]) if row else 0
228
+
229
+ for version in sorted(MIGRATIONS.keys()):
230
+ if version <= current:
231
+ continue
232
+ for sql in MIGRATIONS[version]:
233
+ conn.execute(sql)
234
+
235
+ # Try FTS5 for version 3
236
+ if version == 3:
237
+ try:
238
+ conn.execute(SQL_CREATE_FTS)
239
+ except sqlite3.OperationalError:
240
+ pass # FTS5 not available, skip
241
+
242
+ conn.execute(
243
+ "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?)",
244
+ (str(version),),
245
+ )
246
+ conn.commit()
247
+
248
+ row = conn.execute("SELECT value FROM metadata WHERE key = 'schema_version'").fetchone()
249
+ return int(row[0]) if row else 0
250
+
251
+
252
+ def make_qualified_name(file_path: str, name: str, parent: str | None = None) -> str:
253
+ """Build a qualified name: file_path::parent.name or file_path::name."""
254
+ if parent:
255
+ return f"{file_path}::{parent}.{name}"
256
+ return f"{file_path}::{name}"
257
+
258
+
259
+ def is_fts5_available(conn: sqlite3.Connection) -> bool:
260
+ """Check if FTS5 is compiled into this SQLite build."""
261
+ try:
262
+ conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS _fts_probe USING fts5(x)")
263
+ conn.execute("DROP TABLE IF EXISTS _fts_probe")
264
+ return True
265
+ except sqlite3.OperationalError:
266
+ return False