graphnav 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codex_graph/__init__.py +10 -0
- codex_graph/cli.py +238 -0
- codex_graph/config.py +127 -0
- codex_graph/graph_nav.py +113 -0
- codex_graph/graph_query.py +187 -0
- codex_graph/multirepo.py +793 -0
- codex_graph/runner.py +123 -0
- graphnav-0.1.0.dist-info/METADATA +9 -0
- graphnav-0.1.0.dist-info/RECORD +12 -0
- graphnav-0.1.0.dist-info/WHEEL +5 -0
- graphnav-0.1.0.dist-info/entry_points.txt +2 -0
- graphnav-0.1.0.dist-info/top_level.txt +1 -0
codex_graph/multirepo.py
ADDED
|
@@ -0,0 +1,793 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import shutil
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
import threading
|
|
10
|
+
import time
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
|
|
13
|
+
from codex_graph.config import MonoConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
SOURCE_EXTENSIONS = frozenset({
|
|
17
|
+
".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", ".vue", ".svelte",
|
|
18
|
+
".go", ".rs", ".java", ".kt", ".rb", ".php", ".cs", ".swift", ".scala",
|
|
19
|
+
".c", ".cc", ".cpp", ".h", ".hpp", ".m", ".mm", ".dart", ".ex", ".exs",
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
SKIP_DIRS = frozenset({
|
|
23
|
+
"node_modules", "dist", "build", "out", "target", "vendor", "bin", "obj",
|
|
24
|
+
"__pycache__", "graphify-out", "venv", ".venv", "env", "site-packages",
|
|
25
|
+
".next", ".nuxt", "coverage", "test-results", "playwright-report",
|
|
26
|
+
".pytest_cache", ".mypy_cache", ".git", ".github", ".idea", ".vscode",
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _find_env_file(start: str) -> str | None:
|
|
31
|
+
current = os.path.abspath(start)
|
|
32
|
+
while True:
|
|
33
|
+
candidate = os.path.join(current, ".env")
|
|
34
|
+
if os.path.isfile(candidate):
|
|
35
|
+
return candidate
|
|
36
|
+
parent = os.path.dirname(current)
|
|
37
|
+
if parent == current:
|
|
38
|
+
return None
|
|
39
|
+
current = parent
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _parse_env_file(path: str) -> dict[str, str]:
|
|
43
|
+
env_vars: dict[str, str] = {}
|
|
44
|
+
try:
|
|
45
|
+
with open(path) as f:
|
|
46
|
+
for line in f:
|
|
47
|
+
line = line.strip()
|
|
48
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
49
|
+
continue
|
|
50
|
+
if line.startswith("export "):
|
|
51
|
+
line = line[len("export "):]
|
|
52
|
+
key, _, value = line.partition("=")
|
|
53
|
+
env_vars[key.strip()] = value.strip().strip('"').strip("'")
|
|
54
|
+
except OSError:
|
|
55
|
+
pass
|
|
56
|
+
return env_vars
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _env_file_sources(root: str) -> list[str]:
|
|
60
|
+
sources: list[str] = []
|
|
61
|
+
seen: set[str] = set()
|
|
62
|
+
|
|
63
|
+
def _add(path: str | None) -> None:
|
|
64
|
+
if path and path not in seen and os.path.isfile(path):
|
|
65
|
+
seen.add(path)
|
|
66
|
+
sources.append(path)
|
|
67
|
+
|
|
68
|
+
_add(_find_env_file(root))
|
|
69
|
+
_add(_find_env_file(os.getcwd()))
|
|
70
|
+
for base in (root, os.getcwd()):
|
|
71
|
+
try:
|
|
72
|
+
for entry in sorted(os.listdir(base)):
|
|
73
|
+
_add(os.path.join(base, entry, ".env"))
|
|
74
|
+
except OSError:
|
|
75
|
+
pass
|
|
76
|
+
return sources
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _load_env_file(root: str) -> dict[str, str]:
|
|
80
|
+
env_vars: dict[str, str] = {}
|
|
81
|
+
for path in _env_file_sources(root):
|
|
82
|
+
for key, value in _parse_env_file(path).items():
|
|
83
|
+
env_vars.setdefault(key, value)
|
|
84
|
+
if "ANTHROPIC_KEY" in env_vars and "ANTHROPIC_API_KEY" not in env_vars:
|
|
85
|
+
env_vars["ANTHROPIC_API_KEY"] = env_vars["ANTHROPIC_KEY"]
|
|
86
|
+
return env_vars
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _build_subprocess_env(root: str) -> dict[str, str]:
|
|
90
|
+
env = dict(os.environ)
|
|
91
|
+
env.update(_load_env_file(root))
|
|
92
|
+
return env
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class ServiceInfo:
|
|
97
|
+
name: str
|
|
98
|
+
abs_path: str
|
|
99
|
+
graph_path: str
|
|
100
|
+
bridges_to: list[str] = field(default_factory=list)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclass
|
|
104
|
+
class BridgeRow:
|
|
105
|
+
local_file: str
|
|
106
|
+
local_symbol: str
|
|
107
|
+
relation: str
|
|
108
|
+
remote_svc: str
|
|
109
|
+
remote_file: str
|
|
110
|
+
remote_symbol: str
|
|
111
|
+
local_loc: str = ""
|
|
112
|
+
remote_loc: str = ""
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _has_source_files(path: str, max_depth: int = 4) -> bool:
|
|
116
|
+
base = path.rstrip(os.sep).count(os.sep)
|
|
117
|
+
for dirpath, dirnames, filenames in os.walk(path):
|
|
118
|
+
depth = dirpath.count(os.sep) - base
|
|
119
|
+
if depth >= max_depth:
|
|
120
|
+
dirnames[:] = []
|
|
121
|
+
else:
|
|
122
|
+
dirnames[:] = [
|
|
123
|
+
d for d in dirnames
|
|
124
|
+
if d not in SKIP_DIRS and not d.startswith(".")
|
|
125
|
+
]
|
|
126
|
+
for fn in filenames:
|
|
127
|
+
if os.path.splitext(fn)[1] in SOURCE_EXTENSIONS:
|
|
128
|
+
return True
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def detect_services(root: str, marker_files: list[str]) -> list[ServiceInfo]:
|
|
133
|
+
services = []
|
|
134
|
+
marker_set = set(marker_files)
|
|
135
|
+
try:
|
|
136
|
+
entries = os.listdir(root)
|
|
137
|
+
except OSError:
|
|
138
|
+
return []
|
|
139
|
+
for entry in sorted(entries):
|
|
140
|
+
abs_path = os.path.join(root, entry)
|
|
141
|
+
if not os.path.isdir(abs_path):
|
|
142
|
+
continue
|
|
143
|
+
if entry in SKIP_DIRS or entry.startswith("."):
|
|
144
|
+
continue
|
|
145
|
+
has_marker = any(
|
|
146
|
+
os.path.exists(os.path.join(abs_path, marker)) for marker in marker_set
|
|
147
|
+
)
|
|
148
|
+
if has_marker or _has_source_files(abs_path):
|
|
149
|
+
services.append(ServiceInfo(
|
|
150
|
+
name=entry,
|
|
151
|
+
abs_path=abs_path,
|
|
152
|
+
graph_path=os.path.join(abs_path, "graphify-out", "graph.json"),
|
|
153
|
+
))
|
|
154
|
+
return services
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _stream_proc(proc: subprocess.Popen, timeout: int) -> int:
|
|
158
|
+
def _relay(src, dst):
|
|
159
|
+
for line in src:
|
|
160
|
+
dst.write(line)
|
|
161
|
+
dst.flush()
|
|
162
|
+
|
|
163
|
+
t_out = threading.Thread(target=_relay, args=(proc.stdout, sys.stderr), daemon=True)
|
|
164
|
+
t_err = threading.Thread(target=_relay, args=(proc.stderr, sys.stderr), daemon=True)
|
|
165
|
+
t_out.start()
|
|
166
|
+
t_err.start()
|
|
167
|
+
try:
|
|
168
|
+
proc.wait(timeout=timeout)
|
|
169
|
+
except subprocess.TimeoutExpired:
|
|
170
|
+
proc.kill()
|
|
171
|
+
proc.wait()
|
|
172
|
+
t_out.join(timeout=5)
|
|
173
|
+
t_err.join(timeout=5)
|
|
174
|
+
return proc.returncode
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def run_extract(
|
|
178
|
+
service: ServiceInfo,
|
|
179
|
+
graphify_path: str,
|
|
180
|
+
backend: str,
|
|
181
|
+
timeout: int = 600,
|
|
182
|
+
env: dict[str, str] | None = None,
|
|
183
|
+
) -> int:
|
|
184
|
+
print(f"[codex-graph] extracting {service.name} ...", file=sys.stderr)
|
|
185
|
+
proc = subprocess.Popen(
|
|
186
|
+
[graphify_path, "extract", service.abs_path, "--backend", backend, "--out", service.abs_path],
|
|
187
|
+
stdout=subprocess.PIPE,
|
|
188
|
+
stderr=subprocess.PIPE,
|
|
189
|
+
text=True,
|
|
190
|
+
bufsize=1,
|
|
191
|
+
env=env,
|
|
192
|
+
)
|
|
193
|
+
return _stream_proc(proc, timeout)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _overarching_graph_path(root: str) -> str:
|
|
197
|
+
return os.path.join(root, "graphify-out", "graph.json")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _overarching_service(root: str) -> ServiceInfo:
|
|
201
|
+
return ServiceInfo(
|
|
202
|
+
name="overarching (whole repo)",
|
|
203
|
+
abs_path=root,
|
|
204
|
+
graph_path=_overarching_graph_path(root),
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def build_overarching_graph(
|
|
209
|
+
root: str,
|
|
210
|
+
graphify_path: str,
|
|
211
|
+
backend: str,
|
|
212
|
+
timeout: int = 1200,
|
|
213
|
+
env: dict[str, str] | None = None,
|
|
214
|
+
) -> int:
|
|
215
|
+
return run_extract(_overarching_service(root), graphify_path, backend, timeout=timeout, env=env)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _graph_links(graph: dict) -> list[dict]:
|
|
219
|
+
links = graph.get("links")
|
|
220
|
+
if links is None:
|
|
221
|
+
links = graph.get("edges", [])
|
|
222
|
+
return links
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def partition_graph(
|
|
226
|
+
overarching_graph_path: str,
|
|
227
|
+
services: list[ServiceInfo],
|
|
228
|
+
) -> dict[str, int]:
|
|
229
|
+
with open(overarching_graph_path) as f:
|
|
230
|
+
graph = json.load(f)
|
|
231
|
+
|
|
232
|
+
service_names = {s.name for s in services}
|
|
233
|
+
node_svc: dict[str, str] = {}
|
|
234
|
+
per_nodes: dict[str, list[dict]] = {s.name: [] for s in services}
|
|
235
|
+
for node in graph.get("nodes", []):
|
|
236
|
+
svc = _service_of(node.get("source_file", ""), service_names)
|
|
237
|
+
if svc is not None:
|
|
238
|
+
node_svc[node.get("id")] = svc
|
|
239
|
+
per_nodes[svc].append(node)
|
|
240
|
+
|
|
241
|
+
per_links: dict[str, list[dict]] = {s.name: [] for s in services}
|
|
242
|
+
for link in _graph_links(graph):
|
|
243
|
+
src_svc = node_svc.get(link.get("source"))
|
|
244
|
+
tgt_svc = node_svc.get(link.get("target"))
|
|
245
|
+
if src_svc is not None and src_svc == tgt_svc:
|
|
246
|
+
per_links[src_svc].append(link)
|
|
247
|
+
|
|
248
|
+
base_meta = {k: v for k, v in graph.items() if k not in ("nodes", "links", "edges")}
|
|
249
|
+
counts: dict[str, int] = {}
|
|
250
|
+
for svc in services:
|
|
251
|
+
out_dir = os.path.join(svc.abs_path, "graphify-out")
|
|
252
|
+
os.makedirs(out_dir, exist_ok=True)
|
|
253
|
+
subgraph = dict(base_meta)
|
|
254
|
+
subgraph["nodes"] = per_nodes[svc.name]
|
|
255
|
+
subgraph["links"] = per_links[svc.name]
|
|
256
|
+
with open(svc.graph_path, "w") as f:
|
|
257
|
+
json.dump(subgraph, f, indent=2)
|
|
258
|
+
counts[svc.name] = len(per_nodes[svc.name])
|
|
259
|
+
return counts
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _service_of(source_file: str, service_names: set[str]) -> str | None:
|
|
263
|
+
if not source_file:
|
|
264
|
+
return None
|
|
265
|
+
prefix = source_file.split("/")[0]
|
|
266
|
+
return prefix if prefix in service_names else None
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def analyze_bridges(
|
|
270
|
+
overarching_graph_path: str,
|
|
271
|
+
services: list[ServiceInfo],
|
|
272
|
+
) -> dict[str, list[BridgeRow]]:
|
|
273
|
+
with open(overarching_graph_path) as f:
|
|
274
|
+
graph = json.load(f)
|
|
275
|
+
|
|
276
|
+
service_names = {s.name for s in services}
|
|
277
|
+
node_by_id: dict[str, dict] = {n["id"]: n for n in graph.get("nodes", [])}
|
|
278
|
+
bridges: dict[str, list[BridgeRow]] = {s.name: [] for s in services}
|
|
279
|
+
|
|
280
|
+
for link in _graph_links(graph):
|
|
281
|
+
src_node = node_by_id.get(link.get("source", ""))
|
|
282
|
+
tgt_node = node_by_id.get(link.get("target", ""))
|
|
283
|
+
if not src_node or not tgt_node:
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
src_svc = _service_of(src_node.get("source_file", ""), service_names)
|
|
287
|
+
tgt_svc = _service_of(tgt_node.get("source_file", ""), service_names)
|
|
288
|
+
|
|
289
|
+
if not src_svc or not tgt_svc or src_svc == tgt_svc:
|
|
290
|
+
continue
|
|
291
|
+
|
|
292
|
+
link_sf = link.get("source_file", "")
|
|
293
|
+
local_svc = _service_of(link_sf, service_names) or src_svc
|
|
294
|
+
|
|
295
|
+
if local_svc == src_svc:
|
|
296
|
+
local_node, remote_node, remote_svc = src_node, tgt_node, tgt_svc
|
|
297
|
+
else:
|
|
298
|
+
local_node, remote_node, remote_svc = tgt_node, src_node, src_svc
|
|
299
|
+
|
|
300
|
+
local_file = local_node.get("source_file", "").removeprefix(local_svc + "/")
|
|
301
|
+
bridges[local_svc].append(BridgeRow(
|
|
302
|
+
local_file=local_file,
|
|
303
|
+
local_symbol=local_node.get("label", ""),
|
|
304
|
+
relation=link.get("relation", ""),
|
|
305
|
+
remote_svc=remote_svc,
|
|
306
|
+
remote_file=remote_node.get("source_file", ""),
|
|
307
|
+
remote_symbol=remote_node.get("label", ""),
|
|
308
|
+
local_loc=local_node.get("source_location", ""),
|
|
309
|
+
remote_loc=remote_node.get("source_location", ""),
|
|
310
|
+
))
|
|
311
|
+
|
|
312
|
+
for svc in services:
|
|
313
|
+
remote_svcs = sorted({r.remote_svc for r in bridges[svc.name]})
|
|
314
|
+
svc.bridges_to = remote_svcs
|
|
315
|
+
|
|
316
|
+
return bridges
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def write_bridges_md(service: ServiceInfo, rows: list[BridgeRow]) -> str:
|
|
320
|
+
out_dir = os.path.join(service.abs_path, "graphify-out")
|
|
321
|
+
os.makedirs(out_dir, exist_ok=True)
|
|
322
|
+
path = os.path.join(out_dir, "BRIDGES.md")
|
|
323
|
+
lines = [f"# Bridges: {service.name}", ""]
|
|
324
|
+
if not rows:
|
|
325
|
+
lines.append("_No cross-service connections detected._")
|
|
326
|
+
else:
|
|
327
|
+
lines.append(
|
|
328
|
+
"> Editing a Local symbol below may require changes to the Remote symbol. "
|
|
329
|
+
'Run `graphify affected "<symbol>"` to confirm impact before changing it.'
|
|
330
|
+
)
|
|
331
|
+
lines.append("")
|
|
332
|
+
lines.append("| Local File | Symbol | Loc | Relation | → Service | Remote File | Remote Symbol | Loc |")
|
|
333
|
+
lines.append("|---|---|---|---|---|---|---|---|")
|
|
334
|
+
for r in rows:
|
|
335
|
+
lines.append(
|
|
336
|
+
f"| {r.local_file} | {r.local_symbol} | {r.local_loc} | {r.relation} | "
|
|
337
|
+
f"{r.remote_svc} | {r.remote_file} | {r.remote_symbol} | {r.remote_loc} |"
|
|
338
|
+
)
|
|
339
|
+
with open(path, "w") as f:
|
|
340
|
+
f.write("\n".join(lines) + "\n")
|
|
341
|
+
return path
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _symbols_by_file(graph: dict, prefix_strip: str = "") -> dict[str, list[tuple[str, str]]]:
|
|
345
|
+
out: dict[str, list[tuple[str, str]]] = {}
|
|
346
|
+
for node in graph.get("nodes", []):
|
|
347
|
+
if node.get("file_type") != "code":
|
|
348
|
+
continue
|
|
349
|
+
sf = node.get("source_file", "")
|
|
350
|
+
label = node.get("label", "")
|
|
351
|
+
if not sf or not label or label == os.path.basename(sf):
|
|
352
|
+
continue
|
|
353
|
+
if os.path.splitext(sf)[1] not in SOURCE_EXTENSIONS:
|
|
354
|
+
continue
|
|
355
|
+
key = sf
|
|
356
|
+
if prefix_strip and key.startswith(prefix_strip + "/"):
|
|
357
|
+
key = key[len(prefix_strip) + 1:]
|
|
358
|
+
out.setdefault(key, []).append((label, node.get("source_location", "")))
|
|
359
|
+
return out
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def write_symbols_md(service: ServiceInfo) -> str:
|
|
363
|
+
out_dir = os.path.join(service.abs_path, "graphify-out")
|
|
364
|
+
os.makedirs(out_dir, exist_ok=True)
|
|
365
|
+
path = os.path.join(out_dir, "SYMBOLS.md")
|
|
366
|
+
try:
|
|
367
|
+
with open(service.graph_path) as f:
|
|
368
|
+
graph = json.load(f)
|
|
369
|
+
except (OSError, json.JSONDecodeError):
|
|
370
|
+
graph = {"nodes": []}
|
|
371
|
+
|
|
372
|
+
by_file = _symbols_by_file(graph, prefix_strip=service.name)
|
|
373
|
+
lines = [f"# Symbols: {service.name}", ""]
|
|
374
|
+
if not by_file:
|
|
375
|
+
lines.append("_No code symbols extracted._")
|
|
376
|
+
else:
|
|
377
|
+
lines.append("Open a symbol by its `file:line` instead of reading whole files.")
|
|
378
|
+
lines.append("")
|
|
379
|
+
for sf in sorted(by_file):
|
|
380
|
+
lines.append(f"## {sf}")
|
|
381
|
+
for label, loc in by_file[sf]:
|
|
382
|
+
lines.append(f"- {label}{(' — ' + loc) if loc else ''}")
|
|
383
|
+
lines.append("")
|
|
384
|
+
with open(path, "w") as f:
|
|
385
|
+
f.write("\n".join(lines).rstrip() + "\n")
|
|
386
|
+
return path
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def write_monorepo_map(root: str, services: list[ServiceInfo]) -> str:
|
|
390
|
+
out_dir = os.path.join(root, "graphify-out")
|
|
391
|
+
os.makedirs(out_dir, exist_ok=True)
|
|
392
|
+
path = os.path.join(out_dir, "MONOREPO_MAP.md")
|
|
393
|
+
lines = ["# Monorepo Map", "", "| Service | Graph | Bridges To |", "|---|---|---|"]
|
|
394
|
+
for svc in services:
|
|
395
|
+
graph_rel = os.path.relpath(svc.graph_path, root)
|
|
396
|
+
bridges_cell = ", ".join(svc.bridges_to) if svc.bridges_to else "_none_"
|
|
397
|
+
lines.append(f"| {svc.name} | {graph_rel} | {bridges_cell} |")
|
|
398
|
+
with open(path, "w") as f:
|
|
399
|
+
f.write("\n".join(lines) + "\n")
|
|
400
|
+
return path
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
_BLOCK_START = "<!-- codex-graph:start -->"
|
|
404
|
+
_BLOCK_END = "<!-- codex-graph:end -->"
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def build_playbook_text(root: str, services: list[ServiceInfo]) -> str:
|
|
408
|
+
svc_names = ", ".join(s.name for s in services) if services else "(single project)"
|
|
409
|
+
lines = [
|
|
410
|
+
"# Coding with the codebase knowledge graph",
|
|
411
|
+
"",
|
|
412
|
+
"This repo has a graphify knowledge graph. Use it as your **first resort** — "
|
|
413
|
+
"never use `find`, `ls`, or `cat` to explore repo structure or understand unfamiliar code.",
|
|
414
|
+
"",
|
|
415
|
+
"**Step 0 — always read the monorepo map first** for any task that isn't a "
|
|
416
|
+
"single-file, single-line change:",
|
|
417
|
+
"```",
|
|
418
|
+
"graphify-out/MONOREPO_MAP.md",
|
|
419
|
+
"```",
|
|
420
|
+
"",
|
|
421
|
+
"**Then judge scope:**",
|
|
422
|
+
"- Single-file, single-line edit (rename, formatting, one-liner)? "
|
|
423
|
+
"Just make it — no further graphify steps needed.",
|
|
424
|
+
"- Everything else — including code changes, explanations, architecture questions, "
|
|
425
|
+
'"how does X work", overviews, or anything touching unfamiliar files:',
|
|
426
|
+
' 1. Run `codex-graph context "<task>"` — prints the minimal files, their symbol '
|
|
427
|
+
"`file:line` locations, and any cross-service impact.",
|
|
428
|
+
" 2. Open ONLY those files; read the given `file:line` regions, not whole files.",
|
|
429
|
+
' 3. Before changing a symbol flagged "Cross-service impact", run '
|
|
430
|
+
'`graphify affected "<symbol>"`.',
|
|
431
|
+
" 4. Implement (or answer), then run the project's tests if code changed.",
|
|
432
|
+
"",
|
|
433
|
+
"**Never** use `find`/`ls`/`cat` to survey the repo. If graphify doesn't give "
|
|
434
|
+
"enough context, read `<service>/graphify-out/SYMBOLS.md` or "
|
|
435
|
+
"`<service>/graphify-out/BRIDGES.md` next — not a raw directory listing.",
|
|
436
|
+
"",
|
|
437
|
+
f"Services: {svc_names}",
|
|
438
|
+
"On-demand maps (open only when needed): `graphify-out/MONOREPO_MAP.md` · "
|
|
439
|
+
"`<service>/graphify-out/SYMBOLS.md` · `<service>/graphify-out/BRIDGES.md`",
|
|
440
|
+
]
|
|
441
|
+
return "\n".join(lines)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _write_managed_block(path: str, content: str) -> None:
|
|
445
|
+
block = f"{_BLOCK_START}\n{content}\n{_BLOCK_END}\n"
|
|
446
|
+
existing = ""
|
|
447
|
+
if os.path.exists(path):
|
|
448
|
+
try:
|
|
449
|
+
with open(path) as f:
|
|
450
|
+
existing = f.read()
|
|
451
|
+
except OSError:
|
|
452
|
+
existing = ""
|
|
453
|
+
|
|
454
|
+
if _BLOCK_START in existing and _BLOCK_END in existing:
|
|
455
|
+
before = existing.split(_BLOCK_START, 1)[0]
|
|
456
|
+
after = existing.split(_BLOCK_END, 1)[1]
|
|
457
|
+
new_content = before + block.rstrip("\n") + after
|
|
458
|
+
elif existing.strip():
|
|
459
|
+
new_content = existing.rstrip("\n") + "\n\n" + block
|
|
460
|
+
else:
|
|
461
|
+
new_content = block
|
|
462
|
+
|
|
463
|
+
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
|
|
464
|
+
with open(path, "w") as f:
|
|
465
|
+
f.write(new_content)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def write_copilot_instructions(root: str, services: list[ServiceInfo]) -> str:
|
|
469
|
+
content = build_playbook_text(root, services)
|
|
470
|
+
copilot_path = os.path.join(root, ".github", "copilot-instructions.md")
|
|
471
|
+
_write_managed_block(copilot_path, content)
|
|
472
|
+
_write_managed_block(os.path.join(root, "AGENTS.md"), content)
|
|
473
|
+
_write_managed_block(os.path.join(root, "CLAUDE.md"), content)
|
|
474
|
+
return copilot_path
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def build_context_pack(
|
|
478
|
+
root: str,
|
|
479
|
+
task: str,
|
|
480
|
+
top_files: int = 8,
|
|
481
|
+
budget_tokens: int = 2000,
|
|
482
|
+
skip_patterns: list[str] | None = None,
|
|
483
|
+
) -> str:
|
|
484
|
+
from codex_graph.graph_query import load_index, query_files
|
|
485
|
+
|
|
486
|
+
root = os.path.abspath(root)
|
|
487
|
+
overarching_path = _overarching_graph_path(root)
|
|
488
|
+
if not os.path.exists(overarching_path):
|
|
489
|
+
rel = os.path.relpath(overarching_path, root)
|
|
490
|
+
return (
|
|
491
|
+
f"# Context for: {task}\n\n"
|
|
492
|
+
f"No knowledge graph found at {rel}.\n"
|
|
493
|
+
"Run `codex-graph map` (monorepo) or `graphify extract .` first.\n"
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
if skip_patterns is None:
|
|
497
|
+
skip_patterns = [
|
|
498
|
+
"node_modules", ".git", "graphify-out", "dist", "build",
|
|
499
|
+
"playwright-report", "test-results", ".next", "coverage",
|
|
500
|
+
]
|
|
501
|
+
|
|
502
|
+
try:
|
|
503
|
+
index = load_index(overarching_path, skip_patterns)
|
|
504
|
+
ranked = query_files(task, index, top_files)
|
|
505
|
+
except Exception:
|
|
506
|
+
ranked = []
|
|
507
|
+
|
|
508
|
+
with open(overarching_path) as f:
|
|
509
|
+
graph = json.load(f)
|
|
510
|
+
by_file = _symbols_by_file(graph)
|
|
511
|
+
selected = [rf.source_file for rf in ranked]
|
|
512
|
+
|
|
513
|
+
out_lines = [f"# Context for: {task}", ""]
|
|
514
|
+
if not selected:
|
|
515
|
+
out_lines.append(
|
|
516
|
+
"_No matching files. Try terms from the code itself (function or class names)._"
|
|
517
|
+
)
|
|
518
|
+
return "\n".join(out_lines) + "\n"
|
|
519
|
+
|
|
520
|
+
out_lines.append("## Open only these files")
|
|
521
|
+
for sf in selected:
|
|
522
|
+
syms = by_file.get(sf, [])
|
|
523
|
+
if syms:
|
|
524
|
+
shown = ", ".join(f"{label} {loc}".strip() for label, loc in syms[:12])
|
|
525
|
+
out_lines.append(f"- {sf} — {shown}")
|
|
526
|
+
else:
|
|
527
|
+
out_lines.append(f"- {sf}")
|
|
528
|
+
|
|
529
|
+
services = detect_services(root, MonoConfig().marker_files)
|
|
530
|
+
if services:
|
|
531
|
+
bridges = analyze_bridges(overarching_path, services)
|
|
532
|
+
sel_set = set(selected)
|
|
533
|
+
impact: list[str] = []
|
|
534
|
+
for svc in services:
|
|
535
|
+
for r in bridges[svc.name]:
|
|
536
|
+
local_full = f"{svc.name}/{r.local_file}"
|
|
537
|
+
if local_full in sel_set or r.remote_file in sel_set:
|
|
538
|
+
impact.append(
|
|
539
|
+
f"- {local_full}:{r.local_symbol} {r.local_loc} "
|
|
540
|
+
f"--{r.relation}--> {r.remote_file}:{r.remote_symbol} {r.remote_loc}"
|
|
541
|
+
)
|
|
542
|
+
if impact:
|
|
543
|
+
out_lines.append("")
|
|
544
|
+
out_lines.append("## Cross-service impact")
|
|
545
|
+
out_lines.extend(impact)
|
|
546
|
+
|
|
547
|
+
out_lines += [
|
|
548
|
+
"",
|
|
549
|
+
"## Next",
|
|
550
|
+
"Read only the `file:line` regions above. Before changing a symbol under "
|
|
551
|
+
'Cross-service impact, run `graphify affected "<symbol>"`. Then run the tests.',
|
|
552
|
+
]
|
|
553
|
+
|
|
554
|
+
text = "\n".join(out_lines) + "\n"
|
|
555
|
+
char_budget = max(budget_tokens, 0) * 4
|
|
556
|
+
if char_budget and len(text) > char_budget:
|
|
557
|
+
text = text[:char_budget].rstrip() + "\n\n_(truncated to budget)_\n"
|
|
558
|
+
return text
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _extract_code_windows(abs_path, lines_wanted, before=2, after=14, max_lines=110):
|
|
562
|
+
try:
|
|
563
|
+
with open(abs_path, errors="replace") as f:
|
|
564
|
+
src = f.read().splitlines()
|
|
565
|
+
except OSError:
|
|
566
|
+
return ""
|
|
567
|
+
n = len(src)
|
|
568
|
+
keep = set()
|
|
569
|
+
for ln in lines_wanted:
|
|
570
|
+
if 1 <= ln <= n:
|
|
571
|
+
for i in range(max(1, ln - before), min(n, ln + after) + 1):
|
|
572
|
+
keep.add(i)
|
|
573
|
+
if not keep:
|
|
574
|
+
return ""
|
|
575
|
+
kept = sorted(keep)[:max_lines]
|
|
576
|
+
pieces = []
|
|
577
|
+
prev = None
|
|
578
|
+
for i in kept:
|
|
579
|
+
if prev is not None and i > prev + 1:
|
|
580
|
+
pieces.append(" ...")
|
|
581
|
+
pieces.append(f"{i:>5} {src[i - 1]}")
|
|
582
|
+
prev = i
|
|
583
|
+
return "\n".join(pieces)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def build_context_pack_inline(root, task, top_files=3, budget_tokens=2500, skip_patterns=None):
|
|
587
|
+
from codex_graph.graph_query import load_index, query_files
|
|
588
|
+
|
|
589
|
+
root = os.path.abspath(root)
|
|
590
|
+
overarching_path = _overarching_graph_path(root)
|
|
591
|
+
if not os.path.exists(overarching_path):
|
|
592
|
+
return f"# Context for: {task}\n\nNo knowledge graph found.\n"
|
|
593
|
+
if skip_patterns is None:
|
|
594
|
+
skip_patterns = [
|
|
595
|
+
"node_modules", ".git", "graphify-out", "dist", "build",
|
|
596
|
+
"playwright-report", "test-results", ".next", "coverage",
|
|
597
|
+
]
|
|
598
|
+
try:
|
|
599
|
+
index = load_index(overarching_path, skip_patterns)
|
|
600
|
+
ranked = query_files(task, index, top_files)
|
|
601
|
+
except Exception:
|
|
602
|
+
ranked = []
|
|
603
|
+
|
|
604
|
+
with open(overarching_path) as f:
|
|
605
|
+
graph = json.load(f)
|
|
606
|
+
by_file = _symbols_by_file(graph)
|
|
607
|
+
|
|
608
|
+
out = [
|
|
609
|
+
f"# Context for: {task}",
|
|
610
|
+
"",
|
|
611
|
+
"## Relevant code (extracted from the knowledge graph — already in context, do not re-open these files)",
|
|
612
|
+
]
|
|
613
|
+
if not ranked:
|
|
614
|
+
out.append("_No confident matches; explore normally._")
|
|
615
|
+
return "\n".join(out) + "\n"
|
|
616
|
+
|
|
617
|
+
for rf in ranked:
|
|
618
|
+
sf = rf.source_file
|
|
619
|
+
syms = by_file.get(sf, [])
|
|
620
|
+
line_nums = []
|
|
621
|
+
for _label, loc in syms:
|
|
622
|
+
m = re.search(r"L(\d+)", loc or "")
|
|
623
|
+
if m:
|
|
624
|
+
line_nums.append(int(m.group(1)))
|
|
625
|
+
snippet = _extract_code_windows(os.path.join(root, sf), line_nums)
|
|
626
|
+
out.append("")
|
|
627
|
+
out.append(f"### {sf}")
|
|
628
|
+
if syms:
|
|
629
|
+
out.append("symbols: " + ", ".join(label for label, _ in syms[:10]))
|
|
630
|
+
if snippet:
|
|
631
|
+
out.append("```")
|
|
632
|
+
out.append(snippet)
|
|
633
|
+
out.append("```")
|
|
634
|
+
|
|
635
|
+
from codex_graph.graph_nav import GraphNav
|
|
636
|
+
|
|
637
|
+
try:
|
|
638
|
+
nav = GraphNav(overarching_path, skip_patterns)
|
|
639
|
+
refs = nav.references_to([rf.source_file for rf in ranked], limit=12)
|
|
640
|
+
except Exception:
|
|
641
|
+
refs = []
|
|
642
|
+
if refs:
|
|
643
|
+
out.append("")
|
|
644
|
+
out.append("## Other code that references the above (likely also needs edits)")
|
|
645
|
+
out.extend("- " + r for r in refs)
|
|
646
|
+
|
|
647
|
+
out += [
|
|
648
|
+
"",
|
|
649
|
+
"## Next",
|
|
650
|
+
"The relevant code is shown above. Make the change directly; only open a file "
|
|
651
|
+
"if you need a region not shown. To explore further, use the graph tools "
|
|
652
|
+
"(graph_find, graph_neighbors) instead of broad searches.",
|
|
653
|
+
]
|
|
654
|
+
text = "\n".join(out) + "\n"
|
|
655
|
+
char_budget = max(budget_tokens, 0) * 4
|
|
656
|
+
if char_budget and len(text) > char_budget:
|
|
657
|
+
text = text[:char_budget].rstrip() + "\n```\n\n_(truncated to budget)_\n"
|
|
658
|
+
return text
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def _refresh(
|
|
662
|
+
root: str,
|
|
663
|
+
services: list[ServiceInfo],
|
|
664
|
+
overarching_graph_path: str,
|
|
665
|
+
) -> dict[str, list[BridgeRow]]:
|
|
666
|
+
partition_graph(overarching_graph_path, services)
|
|
667
|
+
bridges = analyze_bridges(overarching_graph_path, services)
|
|
668
|
+
for svc in services:
|
|
669
|
+
write_bridges_md(svc, bridges[svc.name])
|
|
670
|
+
write_symbols_md(svc)
|
|
671
|
+
write_monorepo_map(root, services)
|
|
672
|
+
write_copilot_instructions(root, services)
|
|
673
|
+
return bridges
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def run_map(
|
|
677
|
+
root: str,
|
|
678
|
+
mono_cfg: MonoConfig,
|
|
679
|
+
backend_override: str | None = None,
|
|
680
|
+
dry_run: bool = False,
|
|
681
|
+
) -> int:
|
|
682
|
+
root = os.path.abspath(root)
|
|
683
|
+
graphify_path = shutil.which("graphify")
|
|
684
|
+
if graphify_path is None:
|
|
685
|
+
print("Error: 'graphify' not found on PATH. Install with: pip install graphifyy", file=sys.stderr)
|
|
686
|
+
return 1
|
|
687
|
+
|
|
688
|
+
services = detect_services(root, mono_cfg.marker_files)
|
|
689
|
+
if not services:
|
|
690
|
+
print(f"No services detected in {root}. Add code to subdirectories (or marker files like package.json/pyproject.toml).", file=sys.stderr)
|
|
691
|
+
return 1
|
|
692
|
+
|
|
693
|
+
if dry_run:
|
|
694
|
+
print(f"Detected {len(services)} service(s):")
|
|
695
|
+
for svc in services:
|
|
696
|
+
print(f" {svc.name} {svc.abs_path}")
|
|
697
|
+
print("[dry-run] No graphify calls made.")
|
|
698
|
+
return 0
|
|
699
|
+
|
|
700
|
+
backend = backend_override or mono_cfg.graphify_backend
|
|
701
|
+
env = _build_subprocess_env(root)
|
|
702
|
+
overarching_path = _overarching_graph_path(root)
|
|
703
|
+
|
|
704
|
+
print(f"[codex-graph] Building overarching graph across {len(services)} service(s): {', '.join(s.name for s in services)}", file=sys.stderr)
|
|
705
|
+
rc = build_overarching_graph(root, graphify_path, backend, env=env)
|
|
706
|
+
if rc != 0 or not os.path.exists(overarching_path):
|
|
707
|
+
print(f"Error: overarching graphify extraction failed (exit {rc}).", file=sys.stderr)
|
|
708
|
+
print(" Ensure an API key is available (e.g. ANTHROPIC_API_KEY or ANTHROPIC_KEY in a .env file).", file=sys.stderr)
|
|
709
|
+
return 1
|
|
710
|
+
|
|
711
|
+
bridges = _refresh(root, services, overarching_path)
|
|
712
|
+
total_bridges = sum(len(rows) for rows in bridges.values())
|
|
713
|
+
|
|
714
|
+
print(f"\nDone. {len(services)} service(s) mapped, {total_bridges} cross-service connection(s) found.")
|
|
715
|
+
print(f" Overarching graph : {overarching_path}")
|
|
716
|
+
for svc in services:
|
|
717
|
+
to = ", ".join(svc.bridges_to) if svc.bridges_to else "none"
|
|
718
|
+
print(f" {svc.name}/graphify-out/ (bridges -> {to})")
|
|
719
|
+
print(f" Monorepo map : {os.path.join(root, 'graphify-out', 'MONOREPO_MAP.md')}")
|
|
720
|
+
print(f" Copilot instructions : {os.path.join(root, '.github', 'copilot-instructions.md')}")
|
|
721
|
+
return 0
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def run_watch(
|
|
725
|
+
root: str,
|
|
726
|
+
mono_cfg: MonoConfig,
|
|
727
|
+
backend_override: str | None = None,
|
|
728
|
+
) -> int:
|
|
729
|
+
root = os.path.abspath(root)
|
|
730
|
+
graphify_path = shutil.which("graphify")
|
|
731
|
+
if graphify_path is None:
|
|
732
|
+
print("Error: 'graphify' not found on PATH. Install with: pip install graphifyy", file=sys.stderr)
|
|
733
|
+
return 1
|
|
734
|
+
|
|
735
|
+
services = detect_services(root, mono_cfg.marker_files)
|
|
736
|
+
if not services:
|
|
737
|
+
print(f"No services detected in {root}.", file=sys.stderr)
|
|
738
|
+
return 1
|
|
739
|
+
|
|
740
|
+
backend = backend_override or mono_cfg.graphify_backend
|
|
741
|
+
env = _build_subprocess_env(root)
|
|
742
|
+
overarching_path = _overarching_graph_path(root)
|
|
743
|
+
|
|
744
|
+
if not os.path.exists(overarching_path):
|
|
745
|
+
print(f"[codex-graph] Bootstrapping overarching graph for {len(services)} service(s) ...", file=sys.stderr)
|
|
746
|
+
rc = build_overarching_graph(root, graphify_path, backend, env=env)
|
|
747
|
+
if rc != 0 or not os.path.exists(overarching_path):
|
|
748
|
+
print(f"Error: bootstrap extraction failed (exit {rc}).", file=sys.stderr)
|
|
749
|
+
return 1
|
|
750
|
+
|
|
751
|
+
_refresh(root, services, overarching_path)
|
|
752
|
+
|
|
753
|
+
def _start_watch() -> subprocess.Popen:
|
|
754
|
+
return subprocess.Popen(
|
|
755
|
+
[graphify_path, "watch", root],
|
|
756
|
+
stdout=subprocess.DEVNULL,
|
|
757
|
+
stderr=subprocess.DEVNULL,
|
|
758
|
+
env=env,
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
watch_proc = _start_watch()
|
|
762
|
+
try:
|
|
763
|
+
last_mtime = os.stat(overarching_path).st_mtime
|
|
764
|
+
except OSError:
|
|
765
|
+
last_mtime = 0.0
|
|
766
|
+
|
|
767
|
+
print(f"[codex-graph] Watching {root} ({len(services)} service(s)). Press Ctrl-C to stop.", file=sys.stderr)
|
|
768
|
+
try:
|
|
769
|
+
while True:
|
|
770
|
+
time.sleep(mono_cfg.watch_poll_interval)
|
|
771
|
+
|
|
772
|
+
try:
|
|
773
|
+
mtime = os.stat(overarching_path).st_mtime
|
|
774
|
+
except OSError:
|
|
775
|
+
mtime = last_mtime
|
|
776
|
+
if mtime != last_mtime:
|
|
777
|
+
last_mtime = mtime
|
|
778
|
+
ts = time.strftime("%H:%M:%S")
|
|
779
|
+
print(f"[codex-graph] {ts} graph updated — re-partitioning and re-analyzing bridges ...", file=sys.stderr)
|
|
780
|
+
_refresh(root, services, overarching_path)
|
|
781
|
+
|
|
782
|
+
if watch_proc.poll() is not None:
|
|
783
|
+
print(f"[codex-graph] WARNING: graphify watch exited (exit {watch_proc.returncode}), restarting ...", file=sys.stderr)
|
|
784
|
+
watch_proc = _start_watch()
|
|
785
|
+
|
|
786
|
+
except KeyboardInterrupt:
|
|
787
|
+
print("\n[codex-graph] Stopping watch ...", file=sys.stderr)
|
|
788
|
+
watch_proc.terminate()
|
|
789
|
+
try:
|
|
790
|
+
watch_proc.wait(timeout=5)
|
|
791
|
+
except subprocess.TimeoutExpired:
|
|
792
|
+
watch_proc.kill()
|
|
793
|
+
return 0
|