loom-code 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. loom_code/__init__.py +22 -0
  2. loom_code/_post_commit.py +119 -0
  3. loom_code/agent.py +544 -0
  4. loom_code/approval.py +616 -0
  5. loom_code/browse/__init__.py +291 -0
  6. loom_code/browse/act.py +467 -0
  7. loom_code/browse/observe.py +249 -0
  8. loom_code/browse/session.py +96 -0
  9. loom_code/browse/verify.py +194 -0
  10. loom_code/checkpoint.py +283 -0
  11. loom_code/cli.py +495 -0
  12. loom_code/code_index.py +703 -0
  13. loom_code/compact.py +143 -0
  14. loom_code/consent.py +47 -0
  15. loom_code/credentials.py +527 -0
  16. loom_code/edit_tool.py +635 -0
  17. loom_code/extensions.py +522 -0
  18. loom_code/file_history.py +322 -0
  19. loom_code/file_tools.py +93 -0
  20. loom_code/git_hook.py +200 -0
  21. loom_code/grep_tool.py +430 -0
  22. loom_code/hooks.py +297 -0
  23. loom_code/loominit/__init__.py +23 -0
  24. loom_code/loominit/_ast_walk.py +429 -0
  25. loom_code/loominit/_files.py +284 -0
  26. loom_code/loominit/_graph.py +141 -0
  27. loom_code/loominit/_resolve.py +392 -0
  28. loom_code/loominit/_tests_map.py +108 -0
  29. loom_code/loominit/extractor.py +332 -0
  30. loom_code/loominit/repomap.py +225 -0
  31. loom_code/loominit/schema.py +242 -0
  32. loom_code/lsp_tools.py +396 -0
  33. loom_code/mcp_host.py +79 -0
  34. loom_code/operator.py +449 -0
  35. loom_code/paste.py +97 -0
  36. loom_code/paths.py +52 -0
  37. loom_code/permissions.py +177 -0
  38. loom_code/project.py +104 -0
  39. loom_code/prompts.py +451 -0
  40. loom_code/render.py +783 -0
  41. loom_code/repl.py +4080 -0
  42. loom_code/rules.py +267 -0
  43. loom_code/sandboxed_bash.py +176 -0
  44. loom_code/scribe.py +88 -0
  45. loom_code/skills/__init__.py +16 -0
  46. loom_code/skills/graphify/SKILL.md +97 -0
  47. loom_code/skills/graphify/tools.py +570 -0
  48. loom_code/trust.py +216 -0
  49. loom_code/turn.py +169 -0
  50. loom_code/web_fetch.py +370 -0
  51. loom_code/workers.py +758 -0
  52. loom_code/worktree.py +134 -0
  53. loom_code-0.1.1.dist-info/METADATA +224 -0
  54. loom_code-0.1.1.dist-info/RECORD +58 -0
  55. loom_code-0.1.1.dist-info/WHEEL +5 -0
  56. loom_code-0.1.1.dist-info/entry_points.txt +2 -0
  57. loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
  58. loom_code-0.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,570 @@
1
+ """Mode B Python tools for the ``graphify`` skill.
2
+
3
+ Wraps graphify's public Python primitives (``collect_files`` /
4
+ ``extract`` / ``build_from_json`` / ``cluster`` / ``to_json``) into
5
+ ``@tool``-decorated functions the agent calls directly. No
6
+ subprocess, no MCP server — just in-process Python.
7
+
8
+ Why not the standalone ``graphify`` CLI: it doesn't have a
9
+ ``graphify <path>`` subcommand. The CLI is a SKILL installer
10
+ (``graphify install`` copies ``SKILL.md`` to
11
+ ``~/.claude/skills/graphify/`` for Claude Code to find). The
12
+ actual extraction pipeline lives in the Python modules and is
13
+ intended to be orchestrated by the host AI tool — Claude Code
14
+ runs the multi-step skill flow; loom-code does the same here
15
+ via its own skill machinery, scoped to AST-only extraction
16
+ (code files) for predictable in-process behavior.
17
+
18
+ Multi-modal extraction (docs / papers / images) needs the full
19
+ skill-driven semantic pass with parallel subagents — that's a
20
+ follow-up; AST-only covers the 90% case for loom-code's use.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ from dataclasses import dataclass
27
+ from pathlib import Path
28
+ from typing import Any
29
+
30
+ import anyio
31
+ from loomflow import tool
32
+
33
+ _GRAPHIFY_OUT_SUBDIR = ".loom/graphify"
34
+ _GRAPH_FILENAME = "graph.json"
35
+
36
+ # Extensions graphify has a tree-sitter extractor for. Keep in sync
37
+ # with ``graphify.extract``'s per-language dispatch table — anything
38
+ # outside this set the extractor would silently skip, so we drop it
39
+ # before paying the dict-lookup + Path stat cost. Source of truth is
40
+ # the ``extract_<lang>`` functions in ``graphify/extract.py``.
41
+ _GRAPHIFY_SUPPORTED_SUFFIXES = frozenset({
42
+ ".astro", ".sh", ".bash", ".blade.php", ".c", ".h",
43
+ ".cpp", ".cc", ".cxx", ".hpp", ".cs", ".dart", ".pas",
44
+ ".dfm", ".ex", ".exs", ".f", ".f90", ".f95", ".for",
45
+ ".go", ".groovy", ".java", ".js", ".jsx", ".mjs", ".cjs",
46
+ ".json", ".jl", ".kt", ".kts", ".lpr", ".lpk", ".lua",
47
+ ".md", ".markdown", ".m", ".mm", ".lpi", ".lps",
48
+ ".php", ".ps1", ".psm1", ".py", ".pyi", ".rb",
49
+ ".rs", ".scala", ".sc", ".sql", ".svelte", ".swift",
50
+ ".ts", ".tsx", ".v", ".sv", ".zig",
51
+ })
52
+
53
+
54
+ # loom-code's OWN generated artifacts — never feed these to the
55
+ # knowledge graph. ``LOOM.md`` is loominit's output (indexing it is
56
+ # circular); ``.loom/`` is generated state (memory.db, graph.json,
57
+ # the notebook); ``graphify-out/`` is graphify's AST cache. These
58
+ # are frequently NOT in the user's ``.gitignore``, so ``git
59
+ # ls-files --others`` would happily surface them — we exclude them
60
+ # explicitly by path rather than relying on git's ignore handling.
61
+ _LOOM_OWN_ARTIFACT_FILES = frozenset({"LOOM.md"})
62
+ _LOOM_OWN_ARTIFACT_PREFIXES = (".loom/", "graphify-out/")
63
+
64
+
65
+ def _is_loom_own_artifact(rel_path: str) -> bool:
66
+ """True if ``rel_path`` (repo-root-relative, forward slashes)
67
+ is one of loom-code's own generated outputs."""
68
+ if rel_path in _LOOM_OWN_ARTIFACT_FILES:
69
+ return True
70
+ return any(
71
+ rel_path.startswith(p) for p in _LOOM_OWN_ARTIFACT_PREFIXES
72
+ )
73
+
74
+
75
+ async def _run_git_ls(
76
+ project_root: Path, extra_args: list[str]
77
+ ) -> list[str] | None:
78
+ """Run ``git -C <root> ls-files <extra_args>`` and return the
79
+ relative-path lines, or ``None`` on any failure (not a git
80
+ repo, no git binary, non-zero exit)."""
81
+ try:
82
+ result = await anyio.run_process(
83
+ ["git", "-C", str(project_root), "ls-files", *extra_args],
84
+ check=False,
85
+ )
86
+ except (FileNotFoundError, OSError):
87
+ return None
88
+ if result.returncode != 0:
89
+ return None
90
+ return result.stdout.decode("utf-8", errors="replace").splitlines()
91
+
92
+
93
+ async def _git_ls_files(project_root: Path) -> list[Path] | None:
94
+ """Discover source files via git — TRACKED plus brand-new
95
+ UNTRACKED-but-not-ignored files — minus loom-code's own
96
+ generated artifacts. Returns ``None`` (caller falls back to
97
+ ``graphify.collect_files``) when git is unavailable.
98
+
99
+ Two git queries combine to mean "every file git considers part
100
+ of the project":
101
+
102
+ * ``git ls-files`` — tracked files (committed + staged).
103
+ * ``git ls-files --others --exclude-standard`` — untracked
104
+ files that AREN'T gitignored. This is what makes a freshly-
105
+ created ``new_module.py`` show up in the graph BEFORE it's
106
+ ``git add``-ed — the previous tracked-only behaviour left
107
+ new files invisible until commit, which surprised users
108
+ ("why doesn't the agent see the file I just made?").
109
+
110
+ Both inherit git's ignore handling, so ``.venv`` /
111
+ ``node_modules`` / build artifacts stay out for free. On top of
112
+ that we drop loom-code's own outputs (``LOOM.md`` / ``.loom/``
113
+ / ``graphify-out/``) explicitly, because those are usually NOT
114
+ gitignored and ``--others`` would otherwise surface them —
115
+ indexing loominit's own output is circular noise.
116
+
117
+ Why git at all: ``graphify.collect_files`` walks every file
118
+ under the root (including a 17k-file ``.venv``) then filters by
119
+ extension — 6+ seconds on a real project. The git index gives
120
+ the same set in ~10ms.
121
+ """
122
+ tracked = await _run_git_ls(project_root, [])
123
+ if tracked is None:
124
+ # Not a git repo (or git missing) — signal fallback.
125
+ return None
126
+ # Untracked-but-not-ignored. If THIS sub-call fails for some
127
+ # reason (it shouldn't if the tracked one succeeded), treat it
128
+ # as "no extra files" rather than aborting the whole discovery.
129
+ untracked = await _run_git_ls(
130
+ project_root, ["--others", "--exclude-standard"]
131
+ )
132
+ out: list[Path] = []
133
+ seen: set[str] = set()
134
+ for line in [*tracked, *(untracked or [])]:
135
+ if not line or line in seen:
136
+ continue
137
+ seen.add(line)
138
+ if _is_loom_own_artifact(line):
139
+ continue
140
+ # git emits paths relative to the repo root with forward
141
+ # slashes. Resolve against project_root (not cwd) so a cwd
142
+ # shift can't change which files we see. Skip directory
143
+ # entries / submodules (bare names, no matching suffix).
144
+ full = project_root / line
145
+ if (
146
+ full.is_file()
147
+ and full.suffix.lower() in _GRAPHIFY_SUPPORTED_SUFFIXES
148
+ ):
149
+ out.append(full)
150
+ return out
151
+
152
+
153
+ def _graph_path(project_root: Path | str) -> Path:
154
+ """Where the graph file lives for a given project root.
155
+ Single source of truth so build + query agree."""
156
+ return (
157
+ Path(project_root).resolve()
158
+ / _GRAPHIFY_OUT_SUBDIR
159
+ / _GRAPH_FILENAME
160
+ )
161
+
162
+
163
+ def _load_graph(project_root: Path | str) -> Any:
164
+ """Load the persisted graph, or raise a tool-friendly error
165
+ with the build hint baked in."""
166
+ path = _graph_path(project_root)
167
+ if not path.is_file():
168
+ raise FileNotFoundError(
169
+ f"No graph at {path}. Run `graphify__build()` first "
170
+ "to extract + persist the knowledge graph for this "
171
+ "project."
172
+ )
173
+ from networkx.readwrite import json_graph
174
+ data = json.loads(path.read_text(encoding="utf-8"))
175
+ return json_graph.node_link_graph(data, edges="links")
176
+
177
+
178
+ async def _load_or_build_graph(project_root: Path | str) -> Any:
179
+ """Load the persisted graph; if it's missing, build it ONCE
180
+ transparently then load.
181
+
182
+ Lets the agent ``query`` / ``explain`` / ``path_between`` work
183
+ with a single tool call instead of the brittle ``query → 'run
184
+ build first' error → build → query again`` dance that smaller
185
+ models routinely fail to recover from. Uses the same
186
+ ``graphify_build_impl`` the explicit ``build`` tool + post-commit
187
+ hook share; it deliberately does NOT install the git hook —
188
+ persistent refresh stays an explicit ``build`` side effect."""
189
+ try:
190
+ return _load_graph(project_root)
191
+ except FileNotFoundError:
192
+ await graphify_build_impl(project_root)
193
+ return _load_graph(project_root)
194
+
195
+
196
+ @dataclass(frozen=True)
197
+ class GraphifyBuildResult:
198
+ """Structured outcome of one ``graphify_build_impl`` run.
199
+
200
+ Used by callers that need the numbers (``/loominit`` for the
201
+ LOOM.md ``## Knowledge Graph`` section, ``_post_commit`` for a
202
+ log line). The ``@tool`` wrapper formats the same fields into
203
+ the string the agent sees."""
204
+
205
+ graph_path: Path
206
+ project_root: Path
207
+ n_nodes: int
208
+ n_edges: int
209
+ n_files: int
210
+ n_communities: int
211
+ source: str # "git ls-files" or "graphify.collect_files (no git index)"
212
+ skipped_reason: str | None = None # set when build was a no-op
213
+
214
+
215
+ async def graphify_build_impl(path: str | Path = ".") -> GraphifyBuildResult:
216
+ """Build + persist the project's knowledge graph. Shared core
217
+ that both the ``@tool`` wrapper (below) and the loom-code REPL's
218
+ ``/loominit`` + post-commit refresh call directly.
219
+
220
+ Steps: source-file discovery (git fast path → graphify fallback)
221
+ → tree-sitter extraction → NetworkX graph → Leiden clustering →
222
+ JSON persistence at ``<path>/.loom/graphify/graph.json``.
223
+ Idempotent; incremental via per-file hash caching inside
224
+ graphify.
225
+
226
+ Returns a structured ``GraphifyBuildResult``. When no source
227
+ files are discoverable, returns a result with
228
+ ``skipped_reason`` set + zero counts — caller decides whether to
229
+ surface that as a warning or as silent success.
230
+
231
+ IMPORTANT — every graphify callable is imported from its OWN
232
+ submodule, never via ``graphify.X``. graphify's ``__init__``
233
+ uses lazy ``__getattr__`` to expose top-level callables, but
234
+ importing any submodule (``from graphify.extract import
235
+ extract``) cascades other submodule loads (``graphify.cluster``
236
+ / ``graphify.build`` / ``graphify.export``), and once a
237
+ submodule is in ``sys.modules`` it gets bound on the
238
+ ``graphify`` namespace and SHADOWS the lazy callable of the
239
+ same name. ``graphify.cluster(g)`` then raises "'module' object
240
+ is not callable". The only safe form is "import the function
241
+ from its submodule".
242
+ """
243
+ from graphify.build import build_from_json
244
+ from graphify.cluster import cluster
245
+ from graphify.export import to_json
246
+ from graphify.extract import collect_files, extract
247
+
248
+ root = Path(path).resolve() # noqa: ASYNC240 — trivial fs op
249
+ out_path = _graph_path(root)
250
+ out_path.parent.mkdir(parents=True, exist_ok=True) # noqa: ASYNC240
251
+
252
+ # Fast path: ``git ls-files`` returns the tracked source files
253
+ # in ~10ms by reading the git index, skipping ``.venv`` /
254
+ # ``node_modules`` / ``.pytest_cache`` etc. for free.
255
+ # ``graphify.collect_files`` does an unconditional os.walk that
256
+ # costs 6+ seconds on projects with a venv at the root,
257
+ # dominating 95% of build wall time. Fall back to the walker
258
+ # for non-git projects (or when git itself is missing).
259
+ files = await _git_ls_files(root)
260
+ source = "git ls-files"
261
+ # Fall back to the walker when git gives us nothing usable —
262
+ # either not a git repo (None) OR a git repo whose source files
263
+ # aren't tracked yet (empty list: a fresh ``git init`` before the
264
+ # first commit, or source under .gitignore). The old check only
265
+ # caught ``None``, so an uncommitted project skipped graphify
266
+ # entirely even though ``collect_files`` would have found its
267
+ # files. ``collect_files`` may itself return empty (genuinely no
268
+ # supported source) — the ``if not files`` guard below handles
269
+ # that as the real "nothing to index" skip.
270
+ if not files:
271
+ files = collect_files(root)
272
+ source = "graphify.collect_files (git index empty or absent)"
273
+ if not files:
274
+ return GraphifyBuildResult(
275
+ graph_path=out_path,
276
+ project_root=root,
277
+ n_nodes=0,
278
+ n_edges=0,
279
+ n_files=0,
280
+ n_communities=0,
281
+ source=source,
282
+ skipped_reason=(
283
+ "no extractable source files (check tree-sitter "
284
+ "language coverage — graphify supports py/ts/js/"
285
+ "go/rs/java/c/cpp/rb/cs/kt/scala/php and more)"
286
+ ),
287
+ )
288
+ # extract → dict (NOT list); build_from_json takes that dict
289
+ # straight through. cluster returns a community map
290
+ # (``dict[int, list[str]]``), NOT the graph; to_json wants both
291
+ # the graph AND the community map as positional args, plus
292
+ # ``force=True`` so re-runs can overwrite the prior graph.json.
293
+ extraction = extract(files)
294
+ graph_obj = build_from_json(extraction)
295
+ communities = cluster(graph_obj)
296
+ to_json(graph_obj, communities, str(out_path), force=True)
297
+ return GraphifyBuildResult(
298
+ graph_path=out_path,
299
+ project_root=root,
300
+ n_nodes=graph_obj.number_of_nodes(),
301
+ n_edges=graph_obj.number_of_edges(),
302
+ n_files=len(files),
303
+ n_communities=len(communities),
304
+ source=source,
305
+ )
306
+
307
+
308
+ @tool
309
+ async def build(path: str = ".") -> str:
310
+ """Extract + cluster + persist the project's knowledge graph.
311
+
312
+ Walks code files under ``path``, parses them with tree-sitter
313
+ via graphify's extractor, builds a NetworkX graph (nodes =
314
+ symbols/files, edges = imports/calls/references), runs Leiden
315
+ community detection, and writes ``<path>/.loom/graphify/graph.json``.
316
+
317
+ Idempotent: incremental via file-hash gating — re-running on
318
+ an unchanged repo is fast. Run once per project (or after
319
+ major refactors); the post-commit hook keeps it current every
320
+ 5 commits.
321
+
322
+ Returns a short summary the agent can quote back to the user.
323
+ """
324
+ result = await graphify_build_impl(path)
325
+ # Install the debounced post-commit hook so the graph refreshes
326
+ # itself every few commits. This used to be installed by
327
+ # ``/loominit`` (now removed); graphify owns its own lifecycle, so
328
+ # the hook lives here. Best-effort — a hook failure (e.g. non-git
329
+ # tree) must never fail the build.
330
+ try:
331
+ from ...git_hook import install as _install_hook
332
+
333
+ _install_hook(result.project_root)
334
+ except Exception: # noqa: BLE001 — hook install is best-effort
335
+ pass
336
+ if result.skipped_reason is not None:
337
+ return (
338
+ f"graphify__build: {result.skipped_reason} "
339
+ f"(searched via {result.source})."
340
+ )
341
+ return (
342
+ f"graphify__build: ✓ wrote "
343
+ f"{result.graph_path.relative_to(result.project_root)} "
344
+ f"({result.n_nodes} nodes, {result.n_edges} edges, "
345
+ f"{result.n_files} source files via {result.source}, "
346
+ f"{result.n_communities} communities)"
347
+ )
348
+
349
+
350
+ @tool
351
+ async def query(question: str, path: str = ".") -> str:
352
+ """Find nodes related to ``question`` via THREE strategies and
353
+ rank them so the agent sees the whole subsystem, not just the
354
+ literal name matches.
355
+
356
+ 1. **DIRECT** — literal substring match on node label or
357
+ source-file. The narrow case grep would also catch.
358
+ 2. **NEIGHBOR** — 1-hop graph neighbours of any direct match.
359
+ Surfaces callers, callees, decorators, dependencies — the
360
+ things grep on the keyword would silently miss because they
361
+ use a DIFFERENT identifier name but participate in the same
362
+ call structure.
363
+ 3. **COMMUNITY** — other nodes in the same Leiden community
364
+ as a direct match. Surfaces the "auth subsystem" when the
365
+ query is "auth" even if specific symbols don't contain
366
+ "auth" in their name. Leiden communities are pre-computed
367
+ at build time and persisted in graph.json; we just use the
368
+ cluster id at query time.
369
+
370
+ Output is grouped by tier so the agent knows which results
371
+ are literal hits vs structural neighbours vs community-cohort.
372
+ Limited to 20 results total (8 direct + 8 neighbors + 4
373
+ community) to keep tool output bounded while showing breadth.
374
+ """
375
+ graph_obj = await _load_or_build_graph(path)
376
+ terms = [t.lower() for t in question.split() if len(t) > 2]
377
+ if not terms:
378
+ return (
379
+ "graphify__query: question too short (need at least "
380
+ "one keyword > 2 chars)."
381
+ )
382
+
383
+ # === Tier 1: DIRECT label/source matches (existing behaviour) ===
384
+ direct_scored: list[tuple[float, str, dict[str, Any]]] = []
385
+ for nid, data in graph_obj.nodes(data=True):
386
+ label = str(data.get("label", "")).lower()
387
+ src = str(data.get("source_file", "")).lower()
388
+ score = sum(1.0 for t in terms if t in label)
389
+ score += sum(0.4 for t in terms if t in src)
390
+ if score > 0:
391
+ direct_scored.append((score, nid, data))
392
+ direct_scored.sort(key=lambda x: x[0], reverse=True)
393
+ if not direct_scored:
394
+ return (
395
+ f"graphify__query: no nodes matched {terms!r}. "
396
+ "Try a different keyword, or use ``graphify__explain`` "
397
+ "on a known symbol to discover related names."
398
+ )
399
+ direct_ids = {nid for _, nid, _ in direct_scored[:8]}
400
+
401
+ # === Tier 2: NEIGHBORS of direct matches (1-hop in graph) ===
402
+ # graphify builds undirected graphs by default, so .neighbors()
403
+ # gives both ends (callers + callees + dependencies in one
404
+ # call). Skip nodes already in direct_ids so neighbours don't
405
+ # double-count literal matches.
406
+ neighbour_ids: set[str] = set()
407
+ for did in direct_ids:
408
+ for n in graph_obj.neighbors(did):
409
+ if n not in direct_ids:
410
+ neighbour_ids.add(n)
411
+ # Rank neighbours by degree — the high-degree ones are
412
+ # structurally central and most worth surfacing first.
413
+ neighbour_ranked = sorted(
414
+ neighbour_ids,
415
+ key=lambda n: graph_obj.degree(n),
416
+ reverse=True,
417
+ )[:8]
418
+
419
+ # === Tier 3: COMMUNITY peers (same Leiden cluster) ===
420
+ # Find every community id touched by a direct match, then
421
+ # surface OTHER members of those communities — skipping nodes
422
+ # already in tier 1 or 2. Communities give "the subsystem"
423
+ # rather than just "the call neighbourhood".
424
+ direct_communities: set[Any] = set()
425
+ for did in direct_ids:
426
+ cid = graph_obj.nodes[did].get("community")
427
+ if cid is not None:
428
+ direct_communities.add(cid)
429
+ seen = direct_ids | set(neighbour_ranked)
430
+ community_ids: list[str] = []
431
+ if direct_communities:
432
+ # Rank community peers by degree too so we surface the
433
+ # central members of the subsystem first.
434
+ candidates = [
435
+ n for n, d in graph_obj.nodes(data=True)
436
+ if d.get("community") in direct_communities
437
+ and n not in seen
438
+ ]
439
+ community_ids = sorted(
440
+ candidates,
441
+ key=lambda n: graph_obj.degree(n),
442
+ reverse=True,
443
+ )[:4]
444
+
445
+ # === Render: grouped by tier with explicit labels ===
446
+ def _line(nid: str, tier: str) -> str:
447
+ data = graph_obj.nodes[nid]
448
+ label = data.get("label", nid)
449
+ src = data.get("source_file", "?")
450
+ loc = data.get("source_location", "")
451
+ deg = graph_obj.degree(nid)
452
+ cid = data.get("community", "?")
453
+ loc_part = f":{loc}" if loc else ""
454
+ return (
455
+ f" [{tier}] {label} [{src}{loc_part}] "
456
+ f"— degree {deg}, community {cid}"
457
+ )
458
+
459
+ sections: list[str] = []
460
+ sections.append(
461
+ f"graphify__query for {terms!r}:\n"
462
+ )
463
+ sections.append("DIRECT matches (literal label/source hit):")
464
+ for _, nid, _ in direct_scored[:8]:
465
+ sections.append(_line(nid, "DIRECT"))
466
+ if neighbour_ranked:
467
+ sections.append("")
468
+ sections.append(
469
+ "NEIGHBOR (1-hop in graph — callers / callees / "
470
+ "dependencies of the direct matches):"
471
+ )
472
+ for nid in neighbour_ranked:
473
+ sections.append(_line(nid, "NEIGHBOR"))
474
+ if community_ids:
475
+ sections.append("")
476
+ sections.append(
477
+ "COMMUNITY (same Leiden cluster as the direct matches "
478
+ "— the rest of the subsystem):"
479
+ )
480
+ for nid in community_ids:
481
+ sections.append(_line(nid, "COMMUNITY"))
482
+ return "\n".join(sections)
483
+
484
+
485
+ @tool
486
+ async def path_between(a: str, b: str, path: str = ".") -> str:
487
+ """Shortest path between two named concepts. The single most
488
+ useful graph query: "how does A get to B?" / "what connects
489
+ X and Y?" — exactly what grep can't answer.
490
+ """
491
+ graph_obj = await _load_or_build_graph(path)
492
+ a_match = _find_node(graph_obj, a)
493
+ b_match = _find_node(graph_obj, b)
494
+ if a_match is None:
495
+ return f"graphify__path: no node matched {a!r}."
496
+ if b_match is None:
497
+ return f"graphify__path: no node matched {b!r}."
498
+ import networkx as nx
499
+ try:
500
+ nodes = nx.shortest_path(graph_obj, a_match, b_match)
501
+ except nx.NetworkXNoPath:
502
+ return (
503
+ f"graphify__path: no path from {a!r} → {b!r}. They "
504
+ "live in disconnected components — likely separate "
505
+ "subsystems with no static linkage."
506
+ )
507
+ hops: list[str] = []
508
+ for i, nid in enumerate(nodes):
509
+ label = graph_obj.nodes[nid].get("label", nid)
510
+ src = graph_obj.nodes[nid].get("source_file", "?")
511
+ hops.append(f" {i}. {label} [{src}]")
512
+ if i < len(nodes) - 1:
513
+ edge_data = graph_obj.get_edge_data(nid, nodes[i + 1]) or {}
514
+ relation = edge_data.get("relation", "→")
515
+ hops.append(f" —[{relation}]→")
516
+ return (
517
+ f"graphify__path {a!r} → {b!r} "
518
+ f"({len(nodes) - 1} hops):\n" + "\n".join(hops)
519
+ )
520
+
521
+
522
+ @tool
523
+ async def explain(node: str, path: str = ".") -> str:
524
+ """Plain-language explanation of a single node: source
525
+ location, immediate neighbours, community, edge count."""
526
+ graph_obj = await _load_or_build_graph(path)
527
+ nid = _find_node(graph_obj, node)
528
+ if nid is None:
529
+ return f"graphify__explain: no node matched {node!r}."
530
+ data = graph_obj.nodes[nid]
531
+ label = data.get("label", nid)
532
+ src = data.get("source_file", "?")
533
+ loc = data.get("source_location", "")
534
+ community = data.get("community", "?")
535
+ in_degree = graph_obj.in_degree(nid) if graph_obj.is_directed() else None
536
+ out_degree = (
537
+ graph_obj.out_degree(nid) if graph_obj.is_directed() else None
538
+ )
539
+ total_degree = graph_obj.degree(nid)
540
+ neighbours = list(graph_obj.neighbors(nid))[:10]
541
+ parts = [
542
+ f"{label}",
543
+ f" source: {src}{':' + str(loc) if loc else ''}",
544
+ f" community: {community}",
545
+ f" total degree: {total_degree}",
546
+ ]
547
+ if in_degree is not None:
548
+ parts.append(
549
+ f" in-edges: {in_degree} out-edges: {out_degree}"
550
+ )
551
+ if neighbours:
552
+ parts.append(" neighbours:")
553
+ for n in neighbours:
554
+ n_label = graph_obj.nodes[n].get("label", n)
555
+ parts.append(f" • {n_label}")
556
+ if total_degree > 10:
557
+ parts.append(f" ... and {total_degree - 10} more")
558
+ return "graphify__explain:\n" + "\n".join(parts)
559
+
560
+
561
+ def _find_node(graph_obj: Any, name: str) -> str | None:
562
+ """Resolve a user-supplied name to a node ID. Exact ID match
563
+ wins; otherwise case-insensitive label substring match."""
564
+ if name in graph_obj.nodes:
565
+ return name
566
+ needle = name.lower()
567
+ for nid, data in graph_obj.nodes(data=True):
568
+ if needle in str(data.get("label", "")).lower():
569
+ return nid
570
+ return None