graphnav 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,793 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ import shutil
7
+ import subprocess
8
+ import sys
9
+ import threading
10
+ import time
11
+ from dataclasses import dataclass, field
12
+
13
+ from codex_graph.config import MonoConfig
14
+
15
+
16
+ SOURCE_EXTENSIONS = frozenset({
17
+ ".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", ".vue", ".svelte",
18
+ ".go", ".rs", ".java", ".kt", ".rb", ".php", ".cs", ".swift", ".scala",
19
+ ".c", ".cc", ".cpp", ".h", ".hpp", ".m", ".mm", ".dart", ".ex", ".exs",
20
+ })
21
+
22
+ SKIP_DIRS = frozenset({
23
+ "node_modules", "dist", "build", "out", "target", "vendor", "bin", "obj",
24
+ "__pycache__", "graphify-out", "venv", ".venv", "env", "site-packages",
25
+ ".next", ".nuxt", "coverage", "test-results", "playwright-report",
26
+ ".pytest_cache", ".mypy_cache", ".git", ".github", ".idea", ".vscode",
27
+ })
28
+
29
+
30
+ def _find_env_file(start: str) -> str | None:
31
+ current = os.path.abspath(start)
32
+ while True:
33
+ candidate = os.path.join(current, ".env")
34
+ if os.path.isfile(candidate):
35
+ return candidate
36
+ parent = os.path.dirname(current)
37
+ if parent == current:
38
+ return None
39
+ current = parent
40
+
41
+
42
+ def _parse_env_file(path: str) -> dict[str, str]:
43
+ env_vars: dict[str, str] = {}
44
+ try:
45
+ with open(path) as f:
46
+ for line in f:
47
+ line = line.strip()
48
+ if not line or line.startswith("#") or "=" not in line:
49
+ continue
50
+ if line.startswith("export "):
51
+ line = line[len("export "):]
52
+ key, _, value = line.partition("=")
53
+ env_vars[key.strip()] = value.strip().strip('"').strip("'")
54
+ except OSError:
55
+ pass
56
+ return env_vars
57
+
58
+
59
+ def _env_file_sources(root: str) -> list[str]:
60
+ sources: list[str] = []
61
+ seen: set[str] = set()
62
+
63
+ def _add(path: str | None) -> None:
64
+ if path and path not in seen and os.path.isfile(path):
65
+ seen.add(path)
66
+ sources.append(path)
67
+
68
+ _add(_find_env_file(root))
69
+ _add(_find_env_file(os.getcwd()))
70
+ for base in (root, os.getcwd()):
71
+ try:
72
+ for entry in sorted(os.listdir(base)):
73
+ _add(os.path.join(base, entry, ".env"))
74
+ except OSError:
75
+ pass
76
+ return sources
77
+
78
+
79
+ def _load_env_file(root: str) -> dict[str, str]:
80
+ env_vars: dict[str, str] = {}
81
+ for path in _env_file_sources(root):
82
+ for key, value in _parse_env_file(path).items():
83
+ env_vars.setdefault(key, value)
84
+ if "ANTHROPIC_KEY" in env_vars and "ANTHROPIC_API_KEY" not in env_vars:
85
+ env_vars["ANTHROPIC_API_KEY"] = env_vars["ANTHROPIC_KEY"]
86
+ return env_vars
87
+
88
+
89
+ def _build_subprocess_env(root: str) -> dict[str, str]:
90
+ env = dict(os.environ)
91
+ env.update(_load_env_file(root))
92
+ return env
93
+
94
+
95
+ @dataclass
96
+ class ServiceInfo:
97
+ name: str
98
+ abs_path: str
99
+ graph_path: str
100
+ bridges_to: list[str] = field(default_factory=list)
101
+
102
+
103
+ @dataclass
104
+ class BridgeRow:
105
+ local_file: str
106
+ local_symbol: str
107
+ relation: str
108
+ remote_svc: str
109
+ remote_file: str
110
+ remote_symbol: str
111
+ local_loc: str = ""
112
+ remote_loc: str = ""
113
+
114
+
115
+ def _has_source_files(path: str, max_depth: int = 4) -> bool:
116
+ base = path.rstrip(os.sep).count(os.sep)
117
+ for dirpath, dirnames, filenames in os.walk(path):
118
+ depth = dirpath.count(os.sep) - base
119
+ if depth >= max_depth:
120
+ dirnames[:] = []
121
+ else:
122
+ dirnames[:] = [
123
+ d for d in dirnames
124
+ if d not in SKIP_DIRS and not d.startswith(".")
125
+ ]
126
+ for fn in filenames:
127
+ if os.path.splitext(fn)[1] in SOURCE_EXTENSIONS:
128
+ return True
129
+ return False
130
+
131
+
132
+ def detect_services(root: str, marker_files: list[str]) -> list[ServiceInfo]:
133
+ services = []
134
+ marker_set = set(marker_files)
135
+ try:
136
+ entries = os.listdir(root)
137
+ except OSError:
138
+ return []
139
+ for entry in sorted(entries):
140
+ abs_path = os.path.join(root, entry)
141
+ if not os.path.isdir(abs_path):
142
+ continue
143
+ if entry in SKIP_DIRS or entry.startswith("."):
144
+ continue
145
+ has_marker = any(
146
+ os.path.exists(os.path.join(abs_path, marker)) for marker in marker_set
147
+ )
148
+ if has_marker or _has_source_files(abs_path):
149
+ services.append(ServiceInfo(
150
+ name=entry,
151
+ abs_path=abs_path,
152
+ graph_path=os.path.join(abs_path, "graphify-out", "graph.json"),
153
+ ))
154
+ return services
155
+
156
+
157
+ def _stream_proc(proc: subprocess.Popen, timeout: int) -> int:
158
+ def _relay(src, dst):
159
+ for line in src:
160
+ dst.write(line)
161
+ dst.flush()
162
+
163
+ t_out = threading.Thread(target=_relay, args=(proc.stdout, sys.stderr), daemon=True)
164
+ t_err = threading.Thread(target=_relay, args=(proc.stderr, sys.stderr), daemon=True)
165
+ t_out.start()
166
+ t_err.start()
167
+ try:
168
+ proc.wait(timeout=timeout)
169
+ except subprocess.TimeoutExpired:
170
+ proc.kill()
171
+ proc.wait()
172
+ t_out.join(timeout=5)
173
+ t_err.join(timeout=5)
174
+ return proc.returncode
175
+
176
+
177
+ def run_extract(
178
+ service: ServiceInfo,
179
+ graphify_path: str,
180
+ backend: str,
181
+ timeout: int = 600,
182
+ env: dict[str, str] | None = None,
183
+ ) -> int:
184
+ print(f"[codex-graph] extracting {service.name} ...", file=sys.stderr)
185
+ proc = subprocess.Popen(
186
+ [graphify_path, "extract", service.abs_path, "--backend", backend, "--out", service.abs_path],
187
+ stdout=subprocess.PIPE,
188
+ stderr=subprocess.PIPE,
189
+ text=True,
190
+ bufsize=1,
191
+ env=env,
192
+ )
193
+ return _stream_proc(proc, timeout)
194
+
195
+
196
+ def _overarching_graph_path(root: str) -> str:
197
+ return os.path.join(root, "graphify-out", "graph.json")
198
+
199
+
200
+ def _overarching_service(root: str) -> ServiceInfo:
201
+ return ServiceInfo(
202
+ name="overarching (whole repo)",
203
+ abs_path=root,
204
+ graph_path=_overarching_graph_path(root),
205
+ )
206
+
207
+
208
+ def build_overarching_graph(
209
+ root: str,
210
+ graphify_path: str,
211
+ backend: str,
212
+ timeout: int = 1200,
213
+ env: dict[str, str] | None = None,
214
+ ) -> int:
215
+ return run_extract(_overarching_service(root), graphify_path, backend, timeout=timeout, env=env)
216
+
217
+
218
+ def _graph_links(graph: dict) -> list[dict]:
219
+ links = graph.get("links")
220
+ if links is None:
221
+ links = graph.get("edges", [])
222
+ return links
223
+
224
+
225
+ def partition_graph(
226
+ overarching_graph_path: str,
227
+ services: list[ServiceInfo],
228
+ ) -> dict[str, int]:
229
+ with open(overarching_graph_path) as f:
230
+ graph = json.load(f)
231
+
232
+ service_names = {s.name for s in services}
233
+ node_svc: dict[str, str] = {}
234
+ per_nodes: dict[str, list[dict]] = {s.name: [] for s in services}
235
+ for node in graph.get("nodes", []):
236
+ svc = _service_of(node.get("source_file", ""), service_names)
237
+ if svc is not None:
238
+ node_svc[node.get("id")] = svc
239
+ per_nodes[svc].append(node)
240
+
241
+ per_links: dict[str, list[dict]] = {s.name: [] for s in services}
242
+ for link in _graph_links(graph):
243
+ src_svc = node_svc.get(link.get("source"))
244
+ tgt_svc = node_svc.get(link.get("target"))
245
+ if src_svc is not None and src_svc == tgt_svc:
246
+ per_links[src_svc].append(link)
247
+
248
+ base_meta = {k: v for k, v in graph.items() if k not in ("nodes", "links", "edges")}
249
+ counts: dict[str, int] = {}
250
+ for svc in services:
251
+ out_dir = os.path.join(svc.abs_path, "graphify-out")
252
+ os.makedirs(out_dir, exist_ok=True)
253
+ subgraph = dict(base_meta)
254
+ subgraph["nodes"] = per_nodes[svc.name]
255
+ subgraph["links"] = per_links[svc.name]
256
+ with open(svc.graph_path, "w") as f:
257
+ json.dump(subgraph, f, indent=2)
258
+ counts[svc.name] = len(per_nodes[svc.name])
259
+ return counts
260
+
261
+
262
+ def _service_of(source_file: str, service_names: set[str]) -> str | None:
263
+ if not source_file:
264
+ return None
265
+ prefix = source_file.split("/")[0]
266
+ return prefix if prefix in service_names else None
267
+
268
+
269
+ def analyze_bridges(
270
+ overarching_graph_path: str,
271
+ services: list[ServiceInfo],
272
+ ) -> dict[str, list[BridgeRow]]:
273
+ with open(overarching_graph_path) as f:
274
+ graph = json.load(f)
275
+
276
+ service_names = {s.name for s in services}
277
+ node_by_id: dict[str, dict] = {n["id"]: n for n in graph.get("nodes", [])}
278
+ bridges: dict[str, list[BridgeRow]] = {s.name: [] for s in services}
279
+
280
+ for link in _graph_links(graph):
281
+ src_node = node_by_id.get(link.get("source", ""))
282
+ tgt_node = node_by_id.get(link.get("target", ""))
283
+ if not src_node or not tgt_node:
284
+ continue
285
+
286
+ src_svc = _service_of(src_node.get("source_file", ""), service_names)
287
+ tgt_svc = _service_of(tgt_node.get("source_file", ""), service_names)
288
+
289
+ if not src_svc or not tgt_svc or src_svc == tgt_svc:
290
+ continue
291
+
292
+ link_sf = link.get("source_file", "")
293
+ local_svc = _service_of(link_sf, service_names) or src_svc
294
+
295
+ if local_svc == src_svc:
296
+ local_node, remote_node, remote_svc = src_node, tgt_node, tgt_svc
297
+ else:
298
+ local_node, remote_node, remote_svc = tgt_node, src_node, src_svc
299
+
300
+ local_file = local_node.get("source_file", "").removeprefix(local_svc + "/")
301
+ bridges[local_svc].append(BridgeRow(
302
+ local_file=local_file,
303
+ local_symbol=local_node.get("label", ""),
304
+ relation=link.get("relation", ""),
305
+ remote_svc=remote_svc,
306
+ remote_file=remote_node.get("source_file", ""),
307
+ remote_symbol=remote_node.get("label", ""),
308
+ local_loc=local_node.get("source_location", ""),
309
+ remote_loc=remote_node.get("source_location", ""),
310
+ ))
311
+
312
+ for svc in services:
313
+ remote_svcs = sorted({r.remote_svc for r in bridges[svc.name]})
314
+ svc.bridges_to = remote_svcs
315
+
316
+ return bridges
317
+
318
+
319
+ def write_bridges_md(service: ServiceInfo, rows: list[BridgeRow]) -> str:
320
+ out_dir = os.path.join(service.abs_path, "graphify-out")
321
+ os.makedirs(out_dir, exist_ok=True)
322
+ path = os.path.join(out_dir, "BRIDGES.md")
323
+ lines = [f"# Bridges: {service.name}", ""]
324
+ if not rows:
325
+ lines.append("_No cross-service connections detected._")
326
+ else:
327
+ lines.append(
328
+ "> Editing a Local symbol below may require changes to the Remote symbol. "
329
+ 'Run `graphify affected "<symbol>"` to confirm impact before changing it.'
330
+ )
331
+ lines.append("")
332
+ lines.append("| Local File | Symbol | Loc | Relation | → Service | Remote File | Remote Symbol | Loc |")
333
+ lines.append("|---|---|---|---|---|---|---|---|")
334
+ for r in rows:
335
+ lines.append(
336
+ f"| {r.local_file} | {r.local_symbol} | {r.local_loc} | {r.relation} | "
337
+ f"{r.remote_svc} | {r.remote_file} | {r.remote_symbol} | {r.remote_loc} |"
338
+ )
339
+ with open(path, "w") as f:
340
+ f.write("\n".join(lines) + "\n")
341
+ return path
342
+
343
+
344
+ def _symbols_by_file(graph: dict, prefix_strip: str = "") -> dict[str, list[tuple[str, str]]]:
345
+ out: dict[str, list[tuple[str, str]]] = {}
346
+ for node in graph.get("nodes", []):
347
+ if node.get("file_type") != "code":
348
+ continue
349
+ sf = node.get("source_file", "")
350
+ label = node.get("label", "")
351
+ if not sf or not label or label == os.path.basename(sf):
352
+ continue
353
+ if os.path.splitext(sf)[1] not in SOURCE_EXTENSIONS:
354
+ continue
355
+ key = sf
356
+ if prefix_strip and key.startswith(prefix_strip + "/"):
357
+ key = key[len(prefix_strip) + 1:]
358
+ out.setdefault(key, []).append((label, node.get("source_location", "")))
359
+ return out
360
+
361
+
362
+ def write_symbols_md(service: ServiceInfo) -> str:
363
+ out_dir = os.path.join(service.abs_path, "graphify-out")
364
+ os.makedirs(out_dir, exist_ok=True)
365
+ path = os.path.join(out_dir, "SYMBOLS.md")
366
+ try:
367
+ with open(service.graph_path) as f:
368
+ graph = json.load(f)
369
+ except (OSError, json.JSONDecodeError):
370
+ graph = {"nodes": []}
371
+
372
+ by_file = _symbols_by_file(graph, prefix_strip=service.name)
373
+ lines = [f"# Symbols: {service.name}", ""]
374
+ if not by_file:
375
+ lines.append("_No code symbols extracted._")
376
+ else:
377
+ lines.append("Open a symbol by its `file:line` instead of reading whole files.")
378
+ lines.append("")
379
+ for sf in sorted(by_file):
380
+ lines.append(f"## {sf}")
381
+ for label, loc in by_file[sf]:
382
+ lines.append(f"- {label}{(' — ' + loc) if loc else ''}")
383
+ lines.append("")
384
+ with open(path, "w") as f:
385
+ f.write("\n".join(lines).rstrip() + "\n")
386
+ return path
387
+
388
+
389
+ def write_monorepo_map(root: str, services: list[ServiceInfo]) -> str:
390
+ out_dir = os.path.join(root, "graphify-out")
391
+ os.makedirs(out_dir, exist_ok=True)
392
+ path = os.path.join(out_dir, "MONOREPO_MAP.md")
393
+ lines = ["# Monorepo Map", "", "| Service | Graph | Bridges To |", "|---|---|---|"]
394
+ for svc in services:
395
+ graph_rel = os.path.relpath(svc.graph_path, root)
396
+ bridges_cell = ", ".join(svc.bridges_to) if svc.bridges_to else "_none_"
397
+ lines.append(f"| {svc.name} | {graph_rel} | {bridges_cell} |")
398
+ with open(path, "w") as f:
399
+ f.write("\n".join(lines) + "\n")
400
+ return path
401
+
402
+
403
+ _BLOCK_START = "<!-- codex-graph:start -->"
404
+ _BLOCK_END = "<!-- codex-graph:end -->"
405
+
406
+
407
+ def build_playbook_text(root: str, services: list[ServiceInfo]) -> str:
408
+ svc_names = ", ".join(s.name for s in services) if services else "(single project)"
409
+ lines = [
410
+ "# Coding with the codebase knowledge graph",
411
+ "",
412
+ "This repo has a graphify knowledge graph. Use it as your **first resort** — "
413
+ "never use `find`, `ls`, or `cat` to explore repo structure or understand unfamiliar code.",
414
+ "",
415
+ "**Step 0 — always read the monorepo map first** for any task that isn't a "
416
+ "single-file, single-line change:",
417
+ "```",
418
+ "graphify-out/MONOREPO_MAP.md",
419
+ "```",
420
+ "",
421
+ "**Then judge scope:**",
422
+ "- Single-file, single-line edit (rename, formatting, one-liner)? "
423
+ "Just make it — no further graphify steps needed.",
424
+ "- Everything else — including code changes, explanations, architecture questions, "
425
+ '"how does X work", overviews, or anything touching unfamiliar files:',
426
+ ' 1. Run `codex-graph context "<task>"` — prints the minimal files, their symbol '
427
+ "`file:line` locations, and any cross-service impact.",
428
+ " 2. Open ONLY those files; read the given `file:line` regions, not whole files.",
429
+ ' 3. Before changing a symbol flagged "Cross-service impact", run '
430
+ '`graphify affected "<symbol>"`.',
431
+ " 4. Implement (or answer), then run the project's tests if code changed.",
432
+ "",
433
+ "**Never** use `find`/`ls`/`cat` to survey the repo. If graphify doesn't give "
434
+ "enough context, read `<service>/graphify-out/SYMBOLS.md` or "
435
+ "`<service>/graphify-out/BRIDGES.md` next — not a raw directory listing.",
436
+ "",
437
+ f"Services: {svc_names}",
438
+ "On-demand maps (open only when needed): `graphify-out/MONOREPO_MAP.md` · "
439
+ "`<service>/graphify-out/SYMBOLS.md` · `<service>/graphify-out/BRIDGES.md`",
440
+ ]
441
+ return "\n".join(lines)
442
+
443
+
444
+ def _write_managed_block(path: str, content: str) -> None:
445
+ block = f"{_BLOCK_START}\n{content}\n{_BLOCK_END}\n"
446
+ existing = ""
447
+ if os.path.exists(path):
448
+ try:
449
+ with open(path) as f:
450
+ existing = f.read()
451
+ except OSError:
452
+ existing = ""
453
+
454
+ if _BLOCK_START in existing and _BLOCK_END in existing:
455
+ before = existing.split(_BLOCK_START, 1)[0]
456
+ after = existing.split(_BLOCK_END, 1)[1]
457
+ new_content = before + block.rstrip("\n") + after
458
+ elif existing.strip():
459
+ new_content = existing.rstrip("\n") + "\n\n" + block
460
+ else:
461
+ new_content = block
462
+
463
+ os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
464
+ with open(path, "w") as f:
465
+ f.write(new_content)
466
+
467
+
468
+ def write_copilot_instructions(root: str, services: list[ServiceInfo]) -> str:
469
+ content = build_playbook_text(root, services)
470
+ copilot_path = os.path.join(root, ".github", "copilot-instructions.md")
471
+ _write_managed_block(copilot_path, content)
472
+ _write_managed_block(os.path.join(root, "AGENTS.md"), content)
473
+ _write_managed_block(os.path.join(root, "CLAUDE.md"), content)
474
+ return copilot_path
475
+
476
+
477
+ def build_context_pack(
478
+ root: str,
479
+ task: str,
480
+ top_files: int = 8,
481
+ budget_tokens: int = 2000,
482
+ skip_patterns: list[str] | None = None,
483
+ ) -> str:
484
+ from codex_graph.graph_query import load_index, query_files
485
+
486
+ root = os.path.abspath(root)
487
+ overarching_path = _overarching_graph_path(root)
488
+ if not os.path.exists(overarching_path):
489
+ rel = os.path.relpath(overarching_path, root)
490
+ return (
491
+ f"# Context for: {task}\n\n"
492
+ f"No knowledge graph found at {rel}.\n"
493
+ "Run `codex-graph map` (monorepo) or `graphify extract .` first.\n"
494
+ )
495
+
496
+ if skip_patterns is None:
497
+ skip_patterns = [
498
+ "node_modules", ".git", "graphify-out", "dist", "build",
499
+ "playwright-report", "test-results", ".next", "coverage",
500
+ ]
501
+
502
+ try:
503
+ index = load_index(overarching_path, skip_patterns)
504
+ ranked = query_files(task, index, top_files)
505
+ except Exception:
506
+ ranked = []
507
+
508
+ with open(overarching_path) as f:
509
+ graph = json.load(f)
510
+ by_file = _symbols_by_file(graph)
511
+ selected = [rf.source_file for rf in ranked]
512
+
513
+ out_lines = [f"# Context for: {task}", ""]
514
+ if not selected:
515
+ out_lines.append(
516
+ "_No matching files. Try terms from the code itself (function or class names)._"
517
+ )
518
+ return "\n".join(out_lines) + "\n"
519
+
520
+ out_lines.append("## Open only these files")
521
+ for sf in selected:
522
+ syms = by_file.get(sf, [])
523
+ if syms:
524
+ shown = ", ".join(f"{label} {loc}".strip() for label, loc in syms[:12])
525
+ out_lines.append(f"- {sf} — {shown}")
526
+ else:
527
+ out_lines.append(f"- {sf}")
528
+
529
+ services = detect_services(root, MonoConfig().marker_files)
530
+ if services:
531
+ bridges = analyze_bridges(overarching_path, services)
532
+ sel_set = set(selected)
533
+ impact: list[str] = []
534
+ for svc in services:
535
+ for r in bridges[svc.name]:
536
+ local_full = f"{svc.name}/{r.local_file}"
537
+ if local_full in sel_set or r.remote_file in sel_set:
538
+ impact.append(
539
+ f"- {local_full}:{r.local_symbol} {r.local_loc} "
540
+ f"--{r.relation}--> {r.remote_file}:{r.remote_symbol} {r.remote_loc}"
541
+ )
542
+ if impact:
543
+ out_lines.append("")
544
+ out_lines.append("## Cross-service impact")
545
+ out_lines.extend(impact)
546
+
547
+ out_lines += [
548
+ "",
549
+ "## Next",
550
+ "Read only the `file:line` regions above. Before changing a symbol under "
551
+ 'Cross-service impact, run `graphify affected "<symbol>"`. Then run the tests.',
552
+ ]
553
+
554
+ text = "\n".join(out_lines) + "\n"
555
+ char_budget = max(budget_tokens, 0) * 4
556
+ if char_budget and len(text) > char_budget:
557
+ text = text[:char_budget].rstrip() + "\n\n_(truncated to budget)_\n"
558
+ return text
559
+
560
+
561
+ def _extract_code_windows(abs_path, lines_wanted, before=2, after=14, max_lines=110):
562
+ try:
563
+ with open(abs_path, errors="replace") as f:
564
+ src = f.read().splitlines()
565
+ except OSError:
566
+ return ""
567
+ n = len(src)
568
+ keep = set()
569
+ for ln in lines_wanted:
570
+ if 1 <= ln <= n:
571
+ for i in range(max(1, ln - before), min(n, ln + after) + 1):
572
+ keep.add(i)
573
+ if not keep:
574
+ return ""
575
+ kept = sorted(keep)[:max_lines]
576
+ pieces = []
577
+ prev = None
578
+ for i in kept:
579
+ if prev is not None and i > prev + 1:
580
+ pieces.append(" ...")
581
+ pieces.append(f"{i:>5} {src[i - 1]}")
582
+ prev = i
583
+ return "\n".join(pieces)
584
+
585
+
586
+ def build_context_pack_inline(root, task, top_files=3, budget_tokens=2500, skip_patterns=None):
587
+ from codex_graph.graph_query import load_index, query_files
588
+
589
+ root = os.path.abspath(root)
590
+ overarching_path = _overarching_graph_path(root)
591
+ if not os.path.exists(overarching_path):
592
+ return f"# Context for: {task}\n\nNo knowledge graph found.\n"
593
+ if skip_patterns is None:
594
+ skip_patterns = [
595
+ "node_modules", ".git", "graphify-out", "dist", "build",
596
+ "playwright-report", "test-results", ".next", "coverage",
597
+ ]
598
+ try:
599
+ index = load_index(overarching_path, skip_patterns)
600
+ ranked = query_files(task, index, top_files)
601
+ except Exception:
602
+ ranked = []
603
+
604
+ with open(overarching_path) as f:
605
+ graph = json.load(f)
606
+ by_file = _symbols_by_file(graph)
607
+
608
+ out = [
609
+ f"# Context for: {task}",
610
+ "",
611
+ "## Relevant code (extracted from the knowledge graph — already in context, do not re-open these files)",
612
+ ]
613
+ if not ranked:
614
+ out.append("_No confident matches; explore normally._")
615
+ return "\n".join(out) + "\n"
616
+
617
+ for rf in ranked:
618
+ sf = rf.source_file
619
+ syms = by_file.get(sf, [])
620
+ line_nums = []
621
+ for _label, loc in syms:
622
+ m = re.search(r"L(\d+)", loc or "")
623
+ if m:
624
+ line_nums.append(int(m.group(1)))
625
+ snippet = _extract_code_windows(os.path.join(root, sf), line_nums)
626
+ out.append("")
627
+ out.append(f"### {sf}")
628
+ if syms:
629
+ out.append("symbols: " + ", ".join(label for label, _ in syms[:10]))
630
+ if snippet:
631
+ out.append("```")
632
+ out.append(snippet)
633
+ out.append("```")
634
+
635
+ from codex_graph.graph_nav import GraphNav
636
+
637
+ try:
638
+ nav = GraphNav(overarching_path, skip_patterns)
639
+ refs = nav.references_to([rf.source_file for rf in ranked], limit=12)
640
+ except Exception:
641
+ refs = []
642
+ if refs:
643
+ out.append("")
644
+ out.append("## Other code that references the above (likely also needs edits)")
645
+ out.extend("- " + r for r in refs)
646
+
647
+ out += [
648
+ "",
649
+ "## Next",
650
+ "The relevant code is shown above. Make the change directly; only open a file "
651
+ "if you need a region not shown. To explore further, use the graph tools "
652
+ "(graph_find, graph_neighbors) instead of broad searches.",
653
+ ]
654
+ text = "\n".join(out) + "\n"
655
+ char_budget = max(budget_tokens, 0) * 4
656
+ if char_budget and len(text) > char_budget:
657
+ text = text[:char_budget].rstrip() + "\n```\n\n_(truncated to budget)_\n"
658
+ return text
659
+
660
+
661
+ def _refresh(
662
+ root: str,
663
+ services: list[ServiceInfo],
664
+ overarching_graph_path: str,
665
+ ) -> dict[str, list[BridgeRow]]:
666
+ partition_graph(overarching_graph_path, services)
667
+ bridges = analyze_bridges(overarching_graph_path, services)
668
+ for svc in services:
669
+ write_bridges_md(svc, bridges[svc.name])
670
+ write_symbols_md(svc)
671
+ write_monorepo_map(root, services)
672
+ write_copilot_instructions(root, services)
673
+ return bridges
674
+
675
+
676
+ def run_map(
677
+ root: str,
678
+ mono_cfg: MonoConfig,
679
+ backend_override: str | None = None,
680
+ dry_run: bool = False,
681
+ ) -> int:
682
+ root = os.path.abspath(root)
683
+ graphify_path = shutil.which("graphify")
684
+ if graphify_path is None:
685
+ print("Error: 'graphify' not found on PATH. Install with: pip install graphifyy", file=sys.stderr)
686
+ return 1
687
+
688
+ services = detect_services(root, mono_cfg.marker_files)
689
+ if not services:
690
+ print(f"No services detected in {root}. Add code to subdirectories (or marker files like package.json/pyproject.toml).", file=sys.stderr)
691
+ return 1
692
+
693
+ if dry_run:
694
+ print(f"Detected {len(services)} service(s):")
695
+ for svc in services:
696
+ print(f" {svc.name} {svc.abs_path}")
697
+ print("[dry-run] No graphify calls made.")
698
+ return 0
699
+
700
+ backend = backend_override or mono_cfg.graphify_backend
701
+ env = _build_subprocess_env(root)
702
+ overarching_path = _overarching_graph_path(root)
703
+
704
+ print(f"[codex-graph] Building overarching graph across {len(services)} service(s): {', '.join(s.name for s in services)}", file=sys.stderr)
705
+ rc = build_overarching_graph(root, graphify_path, backend, env=env)
706
+ if rc != 0 or not os.path.exists(overarching_path):
707
+ print(f"Error: overarching graphify extraction failed (exit {rc}).", file=sys.stderr)
708
+ print(" Ensure an API key is available (e.g. ANTHROPIC_API_KEY or ANTHROPIC_KEY in a .env file).", file=sys.stderr)
709
+ return 1
710
+
711
+ bridges = _refresh(root, services, overarching_path)
712
+ total_bridges = sum(len(rows) for rows in bridges.values())
713
+
714
+ print(f"\nDone. {len(services)} service(s) mapped, {total_bridges} cross-service connection(s) found.")
715
+ print(f" Overarching graph : {overarching_path}")
716
+ for svc in services:
717
+ to = ", ".join(svc.bridges_to) if svc.bridges_to else "none"
718
+ print(f" {svc.name}/graphify-out/ (bridges -> {to})")
719
+ print(f" Monorepo map : {os.path.join(root, 'graphify-out', 'MONOREPO_MAP.md')}")
720
+ print(f" Copilot instructions : {os.path.join(root, '.github', 'copilot-instructions.md')}")
721
+ return 0
722
+
723
+
724
+ def run_watch(
725
+ root: str,
726
+ mono_cfg: MonoConfig,
727
+ backend_override: str | None = None,
728
+ ) -> int:
729
+ root = os.path.abspath(root)
730
+ graphify_path = shutil.which("graphify")
731
+ if graphify_path is None:
732
+ print("Error: 'graphify' not found on PATH. Install with: pip install graphifyy", file=sys.stderr)
733
+ return 1
734
+
735
+ services = detect_services(root, mono_cfg.marker_files)
736
+ if not services:
737
+ print(f"No services detected in {root}.", file=sys.stderr)
738
+ return 1
739
+
740
+ backend = backend_override or mono_cfg.graphify_backend
741
+ env = _build_subprocess_env(root)
742
+ overarching_path = _overarching_graph_path(root)
743
+
744
+ if not os.path.exists(overarching_path):
745
+ print(f"[codex-graph] Bootstrapping overarching graph for {len(services)} service(s) ...", file=sys.stderr)
746
+ rc = build_overarching_graph(root, graphify_path, backend, env=env)
747
+ if rc != 0 or not os.path.exists(overarching_path):
748
+ print(f"Error: bootstrap extraction failed (exit {rc}).", file=sys.stderr)
749
+ return 1
750
+
751
+ _refresh(root, services, overarching_path)
752
+
753
+ def _start_watch() -> subprocess.Popen:
754
+ return subprocess.Popen(
755
+ [graphify_path, "watch", root],
756
+ stdout=subprocess.DEVNULL,
757
+ stderr=subprocess.DEVNULL,
758
+ env=env,
759
+ )
760
+
761
+ watch_proc = _start_watch()
762
+ try:
763
+ last_mtime = os.stat(overarching_path).st_mtime
764
+ except OSError:
765
+ last_mtime = 0.0
766
+
767
+ print(f"[codex-graph] Watching {root} ({len(services)} service(s)). Press Ctrl-C to stop.", file=sys.stderr)
768
+ try:
769
+ while True:
770
+ time.sleep(mono_cfg.watch_poll_interval)
771
+
772
+ try:
773
+ mtime = os.stat(overarching_path).st_mtime
774
+ except OSError:
775
+ mtime = last_mtime
776
+ if mtime != last_mtime:
777
+ last_mtime = mtime
778
+ ts = time.strftime("%H:%M:%S")
779
+ print(f"[codex-graph] {ts} graph updated — re-partitioning and re-analyzing bridges ...", file=sys.stderr)
780
+ _refresh(root, services, overarching_path)
781
+
782
+ if watch_proc.poll() is not None:
783
+ print(f"[codex-graph] WARNING: graphify watch exited (exit {watch_proc.returncode}), restarting ...", file=sys.stderr)
784
+ watch_proc = _start_watch()
785
+
786
+ except KeyboardInterrupt:
787
+ print("\n[codex-graph] Stopping watch ...", file=sys.stderr)
788
+ watch_proc.terminate()
789
+ try:
790
+ watch_proc.wait(timeout=5)
791
+ except subprocess.TimeoutExpired:
792
+ watch_proc.kill()
793
+ return 0