loom-code 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loom_code/__init__.py +22 -0
- loom_code/_post_commit.py +119 -0
- loom_code/agent.py +544 -0
- loom_code/approval.py +616 -0
- loom_code/browse/__init__.py +291 -0
- loom_code/browse/act.py +467 -0
- loom_code/browse/observe.py +249 -0
- loom_code/browse/session.py +96 -0
- loom_code/browse/verify.py +194 -0
- loom_code/checkpoint.py +283 -0
- loom_code/cli.py +495 -0
- loom_code/code_index.py +703 -0
- loom_code/compact.py +143 -0
- loom_code/consent.py +47 -0
- loom_code/credentials.py +527 -0
- loom_code/edit_tool.py +635 -0
- loom_code/extensions.py +522 -0
- loom_code/file_history.py +322 -0
- loom_code/file_tools.py +93 -0
- loom_code/git_hook.py +200 -0
- loom_code/grep_tool.py +430 -0
- loom_code/hooks.py +297 -0
- loom_code/loominit/__init__.py +23 -0
- loom_code/loominit/_ast_walk.py +429 -0
- loom_code/loominit/_files.py +284 -0
- loom_code/loominit/_graph.py +141 -0
- loom_code/loominit/_resolve.py +392 -0
- loom_code/loominit/_tests_map.py +108 -0
- loom_code/loominit/extractor.py +332 -0
- loom_code/loominit/repomap.py +225 -0
- loom_code/loominit/schema.py +242 -0
- loom_code/lsp_tools.py +396 -0
- loom_code/mcp_host.py +79 -0
- loom_code/operator.py +449 -0
- loom_code/paste.py +97 -0
- loom_code/paths.py +52 -0
- loom_code/permissions.py +177 -0
- loom_code/project.py +104 -0
- loom_code/prompts.py +451 -0
- loom_code/render.py +783 -0
- loom_code/repl.py +4080 -0
- loom_code/rules.py +267 -0
- loom_code/sandboxed_bash.py +176 -0
- loom_code/scribe.py +88 -0
- loom_code/skills/__init__.py +16 -0
- loom_code/skills/graphify/SKILL.md +97 -0
- loom_code/skills/graphify/tools.py +570 -0
- loom_code/trust.py +216 -0
- loom_code/turn.py +169 -0
- loom_code/web_fetch.py +370 -0
- loom_code/workers.py +758 -0
- loom_code/worktree.py +134 -0
- loom_code-0.1.1.dist-info/METADATA +224 -0
- loom_code-0.1.1.dist-info/RECORD +58 -0
- loom_code-0.1.1.dist-info/WHEEL +5 -0
- loom_code-0.1.1.dist-info/entry_points.txt +2 -0
- loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
- loom_code-0.1.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
"""Mode B Python tools for the ``graphify`` skill.
|
|
2
|
+
|
|
3
|
+
Wraps graphify's public Python primitives (``collect_files`` /
|
|
4
|
+
``extract`` / ``build_from_json`` / ``cluster`` / ``to_json``) into
|
|
5
|
+
``@tool``-decorated functions the agent calls directly. No
|
|
6
|
+
subprocess, no MCP server — just in-process Python.
|
|
7
|
+
|
|
8
|
+
Why not the standalone ``graphify`` CLI: it doesn't have a
|
|
9
|
+
``graphify <path>`` subcommand. The CLI is a SKILL installer
|
|
10
|
+
(``graphify install`` copies ``SKILL.md`` to
|
|
11
|
+
``~/.claude/skills/graphify/`` for Claude Code to find). The
|
|
12
|
+
actual extraction pipeline lives in the Python modules and is
|
|
13
|
+
intended to be orchestrated by the host AI tool — Claude Code
|
|
14
|
+
runs the multi-step skill flow; loom-code does the same here
|
|
15
|
+
via its own skill machinery, scoped to AST-only extraction
|
|
16
|
+
(code files) for predictable in-process behavior.
|
|
17
|
+
|
|
18
|
+
Multi-modal extraction (docs / papers / images) needs the full
|
|
19
|
+
skill-driven semantic pass with parallel subagents — that's a
|
|
20
|
+
follow-up; AST-only covers the 90% case for loom-code's use.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
import anyio
|
|
31
|
+
from loomflow import tool
|
|
32
|
+
|
|
33
|
+
_GRAPHIFY_OUT_SUBDIR = ".loom/graphify"
|
|
34
|
+
_GRAPH_FILENAME = "graph.json"
|
|
35
|
+
|
|
36
|
+
# Extensions graphify has a tree-sitter extractor for. Keep in sync
|
|
37
|
+
# with ``graphify.extract``'s per-language dispatch table — anything
|
|
38
|
+
# outside this set the extractor would silently skip, so we drop it
|
|
39
|
+
# before paying the dict-lookup + Path stat cost. Source of truth is
|
|
40
|
+
# the ``extract_<lang>`` functions in ``graphify/extract.py``.
|
|
41
|
+
_GRAPHIFY_SUPPORTED_SUFFIXES = frozenset({
|
|
42
|
+
".astro", ".sh", ".bash", ".blade.php", ".c", ".h",
|
|
43
|
+
".cpp", ".cc", ".cxx", ".hpp", ".cs", ".dart", ".pas",
|
|
44
|
+
".dfm", ".ex", ".exs", ".f", ".f90", ".f95", ".for",
|
|
45
|
+
".go", ".groovy", ".java", ".js", ".jsx", ".mjs", ".cjs",
|
|
46
|
+
".json", ".jl", ".kt", ".kts", ".lpr", ".lpk", ".lua",
|
|
47
|
+
".md", ".markdown", ".m", ".mm", ".lpi", ".lps",
|
|
48
|
+
".php", ".ps1", ".psm1", ".py", ".pyi", ".rb",
|
|
49
|
+
".rs", ".scala", ".sc", ".sql", ".svelte", ".swift",
|
|
50
|
+
".ts", ".tsx", ".v", ".sv", ".zig",
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# loom-code's OWN generated artifacts — never feed these to the
|
|
55
|
+
# knowledge graph. ``LOOM.md`` is loominit's output (indexing it is
|
|
56
|
+
# circular); ``.loom/`` is generated state (memory.db, graph.json,
|
|
57
|
+
# the notebook); ``graphify-out/`` is graphify's AST cache. These
|
|
58
|
+
# are frequently NOT in the user's ``.gitignore``, so ``git
|
|
59
|
+
# ls-files --others`` would happily surface them — we exclude them
|
|
60
|
+
# explicitly by path rather than relying on git's ignore handling.
|
|
61
|
+
_LOOM_OWN_ARTIFACT_FILES = frozenset({"LOOM.md"})
|
|
62
|
+
_LOOM_OWN_ARTIFACT_PREFIXES = (".loom/", "graphify-out/")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _is_loom_own_artifact(rel_path: str) -> bool:
|
|
66
|
+
"""True if ``rel_path`` (repo-root-relative, forward slashes)
|
|
67
|
+
is one of loom-code's own generated outputs."""
|
|
68
|
+
if rel_path in _LOOM_OWN_ARTIFACT_FILES:
|
|
69
|
+
return True
|
|
70
|
+
return any(
|
|
71
|
+
rel_path.startswith(p) for p in _LOOM_OWN_ARTIFACT_PREFIXES
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
async def _run_git_ls(
|
|
76
|
+
project_root: Path, extra_args: list[str]
|
|
77
|
+
) -> list[str] | None:
|
|
78
|
+
"""Run ``git -C <root> ls-files <extra_args>`` and return the
|
|
79
|
+
relative-path lines, or ``None`` on any failure (not a git
|
|
80
|
+
repo, no git binary, non-zero exit)."""
|
|
81
|
+
try:
|
|
82
|
+
result = await anyio.run_process(
|
|
83
|
+
["git", "-C", str(project_root), "ls-files", *extra_args],
|
|
84
|
+
check=False,
|
|
85
|
+
)
|
|
86
|
+
except (FileNotFoundError, OSError):
|
|
87
|
+
return None
|
|
88
|
+
if result.returncode != 0:
|
|
89
|
+
return None
|
|
90
|
+
return result.stdout.decode("utf-8", errors="replace").splitlines()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
async def _git_ls_files(project_root: Path) -> list[Path] | None:
|
|
94
|
+
"""Discover source files via git — TRACKED plus brand-new
|
|
95
|
+
UNTRACKED-but-not-ignored files — minus loom-code's own
|
|
96
|
+
generated artifacts. Returns ``None`` (caller falls back to
|
|
97
|
+
``graphify.collect_files``) when git is unavailable.
|
|
98
|
+
|
|
99
|
+
Two git queries combine to mean "every file git considers part
|
|
100
|
+
of the project":
|
|
101
|
+
|
|
102
|
+
* ``git ls-files`` — tracked files (committed + staged).
|
|
103
|
+
* ``git ls-files --others --exclude-standard`` — untracked
|
|
104
|
+
files that AREN'T gitignored. This is what makes a freshly-
|
|
105
|
+
created ``new_module.py`` show up in the graph BEFORE it's
|
|
106
|
+
``git add``-ed — the previous tracked-only behaviour left
|
|
107
|
+
new files invisible until commit, which surprised users
|
|
108
|
+
("why doesn't the agent see the file I just made?").
|
|
109
|
+
|
|
110
|
+
Both inherit git's ignore handling, so ``.venv`` /
|
|
111
|
+
``node_modules`` / build artifacts stay out for free. On top of
|
|
112
|
+
that we drop loom-code's own outputs (``LOOM.md`` / ``.loom/``
|
|
113
|
+
/ ``graphify-out/``) explicitly, because those are usually NOT
|
|
114
|
+
gitignored and ``--others`` would otherwise surface them —
|
|
115
|
+
indexing loominit's own output is circular noise.
|
|
116
|
+
|
|
117
|
+
Why git at all: ``graphify.collect_files`` walks every file
|
|
118
|
+
under the root (including a 17k-file ``.venv``) then filters by
|
|
119
|
+
extension — 6+ seconds on a real project. The git index gives
|
|
120
|
+
the same set in ~10ms.
|
|
121
|
+
"""
|
|
122
|
+
tracked = await _run_git_ls(project_root, [])
|
|
123
|
+
if tracked is None:
|
|
124
|
+
# Not a git repo (or git missing) — signal fallback.
|
|
125
|
+
return None
|
|
126
|
+
# Untracked-but-not-ignored. If THIS sub-call fails for some
|
|
127
|
+
# reason (it shouldn't if the tracked one succeeded), treat it
|
|
128
|
+
# as "no extra files" rather than aborting the whole discovery.
|
|
129
|
+
untracked = await _run_git_ls(
|
|
130
|
+
project_root, ["--others", "--exclude-standard"]
|
|
131
|
+
)
|
|
132
|
+
out: list[Path] = []
|
|
133
|
+
seen: set[str] = set()
|
|
134
|
+
for line in [*tracked, *(untracked or [])]:
|
|
135
|
+
if not line or line in seen:
|
|
136
|
+
continue
|
|
137
|
+
seen.add(line)
|
|
138
|
+
if _is_loom_own_artifact(line):
|
|
139
|
+
continue
|
|
140
|
+
# git emits paths relative to the repo root with forward
|
|
141
|
+
# slashes. Resolve against project_root (not cwd) so a cwd
|
|
142
|
+
# shift can't change which files we see. Skip directory
|
|
143
|
+
# entries / submodules (bare names, no matching suffix).
|
|
144
|
+
full = project_root / line
|
|
145
|
+
if (
|
|
146
|
+
full.is_file()
|
|
147
|
+
and full.suffix.lower() in _GRAPHIFY_SUPPORTED_SUFFIXES
|
|
148
|
+
):
|
|
149
|
+
out.append(full)
|
|
150
|
+
return out
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _graph_path(project_root: Path | str) -> Path:
|
|
154
|
+
"""Where the graph file lives for a given project root.
|
|
155
|
+
Single source of truth so build + query agree."""
|
|
156
|
+
return (
|
|
157
|
+
Path(project_root).resolve()
|
|
158
|
+
/ _GRAPHIFY_OUT_SUBDIR
|
|
159
|
+
/ _GRAPH_FILENAME
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _load_graph(project_root: Path | str) -> Any:
|
|
164
|
+
"""Load the persisted graph, or raise a tool-friendly error
|
|
165
|
+
with the build hint baked in."""
|
|
166
|
+
path = _graph_path(project_root)
|
|
167
|
+
if not path.is_file():
|
|
168
|
+
raise FileNotFoundError(
|
|
169
|
+
f"No graph at {path}. Run `graphify__build()` first "
|
|
170
|
+
"to extract + persist the knowledge graph for this "
|
|
171
|
+
"project."
|
|
172
|
+
)
|
|
173
|
+
from networkx.readwrite import json_graph
|
|
174
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
175
|
+
return json_graph.node_link_graph(data, edges="links")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
async def _load_or_build_graph(project_root: Path | str) -> Any:
|
|
179
|
+
"""Load the persisted graph; if it's missing, build it ONCE
|
|
180
|
+
transparently then load.
|
|
181
|
+
|
|
182
|
+
Lets the agent ``query`` / ``explain`` / ``path_between`` work
|
|
183
|
+
with a single tool call instead of the brittle ``query → 'run
|
|
184
|
+
build first' error → build → query again`` dance that smaller
|
|
185
|
+
models routinely fail to recover from. Uses the same
|
|
186
|
+
``graphify_build_impl`` the explicit ``build`` tool + post-commit
|
|
187
|
+
hook share; it deliberately does NOT install the git hook —
|
|
188
|
+
persistent refresh stays an explicit ``build`` side effect."""
|
|
189
|
+
try:
|
|
190
|
+
return _load_graph(project_root)
|
|
191
|
+
except FileNotFoundError:
|
|
192
|
+
await graphify_build_impl(project_root)
|
|
193
|
+
return _load_graph(project_root)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
@dataclass(frozen=True)
|
|
197
|
+
class GraphifyBuildResult:
|
|
198
|
+
"""Structured outcome of one ``graphify_build_impl`` run.
|
|
199
|
+
|
|
200
|
+
Used by callers that need the numbers (``/loominit`` for the
|
|
201
|
+
LOOM.md ``## Knowledge Graph`` section, ``_post_commit`` for a
|
|
202
|
+
log line). The ``@tool`` wrapper formats the same fields into
|
|
203
|
+
the string the agent sees."""
|
|
204
|
+
|
|
205
|
+
graph_path: Path
|
|
206
|
+
project_root: Path
|
|
207
|
+
n_nodes: int
|
|
208
|
+
n_edges: int
|
|
209
|
+
n_files: int
|
|
210
|
+
n_communities: int
|
|
211
|
+
source: str # "git ls-files" or "graphify.collect_files (no git index)"
|
|
212
|
+
skipped_reason: str | None = None # set when build was a no-op
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
async def graphify_build_impl(path: str | Path = ".") -> GraphifyBuildResult:
|
|
216
|
+
"""Build + persist the project's knowledge graph. Shared core
|
|
217
|
+
that both the ``@tool`` wrapper (below) and the loom-code REPL's
|
|
218
|
+
``/loominit`` + post-commit refresh call directly.
|
|
219
|
+
|
|
220
|
+
Steps: source-file discovery (git fast path → graphify fallback)
|
|
221
|
+
→ tree-sitter extraction → NetworkX graph → Leiden clustering →
|
|
222
|
+
JSON persistence at ``<path>/.loom/graphify/graph.json``.
|
|
223
|
+
Idempotent; incremental via per-file hash caching inside
|
|
224
|
+
graphify.
|
|
225
|
+
|
|
226
|
+
Returns a structured ``GraphifyBuildResult``. When no source
|
|
227
|
+
files are discoverable, returns a result with
|
|
228
|
+
``skipped_reason`` set + zero counts — caller decides whether to
|
|
229
|
+
surface that as a warning or as silent success.
|
|
230
|
+
|
|
231
|
+
IMPORTANT — every graphify callable is imported from its OWN
|
|
232
|
+
submodule, never via ``graphify.X``. graphify's ``__init__``
|
|
233
|
+
uses lazy ``__getattr__`` to expose top-level callables, but
|
|
234
|
+
importing any submodule (``from graphify.extract import
|
|
235
|
+
extract``) cascades other submodule loads (``graphify.cluster``
|
|
236
|
+
/ ``graphify.build`` / ``graphify.export``), and once a
|
|
237
|
+
submodule is in ``sys.modules`` it gets bound on the
|
|
238
|
+
``graphify`` namespace and SHADOWS the lazy callable of the
|
|
239
|
+
same name. ``graphify.cluster(g)`` then raises "'module' object
|
|
240
|
+
is not callable". The only safe form is "import the function
|
|
241
|
+
from its submodule".
|
|
242
|
+
"""
|
|
243
|
+
from graphify.build import build_from_json
|
|
244
|
+
from graphify.cluster import cluster
|
|
245
|
+
from graphify.export import to_json
|
|
246
|
+
from graphify.extract import collect_files, extract
|
|
247
|
+
|
|
248
|
+
root = Path(path).resolve() # noqa: ASYNC240 — trivial fs op
|
|
249
|
+
out_path = _graph_path(root)
|
|
250
|
+
out_path.parent.mkdir(parents=True, exist_ok=True) # noqa: ASYNC240
|
|
251
|
+
|
|
252
|
+
# Fast path: ``git ls-files`` returns the tracked source files
|
|
253
|
+
# in ~10ms by reading the git index, skipping ``.venv`` /
|
|
254
|
+
# ``node_modules`` / ``.pytest_cache`` etc. for free.
|
|
255
|
+
# ``graphify.collect_files`` does an unconditional os.walk that
|
|
256
|
+
# costs 6+ seconds on projects with a venv at the root,
|
|
257
|
+
# dominating 95% of build wall time. Fall back to the walker
|
|
258
|
+
# for non-git projects (or when git itself is missing).
|
|
259
|
+
files = await _git_ls_files(root)
|
|
260
|
+
source = "git ls-files"
|
|
261
|
+
# Fall back to the walker when git gives us nothing usable —
|
|
262
|
+
# either not a git repo (None) OR a git repo whose source files
|
|
263
|
+
# aren't tracked yet (empty list: a fresh ``git init`` before the
|
|
264
|
+
# first commit, or source under .gitignore). The old check only
|
|
265
|
+
# caught ``None``, so an uncommitted project skipped graphify
|
|
266
|
+
# entirely even though ``collect_files`` would have found its
|
|
267
|
+
# files. ``collect_files`` may itself return empty (genuinely no
|
|
268
|
+
# supported source) — the ``if not files`` guard below handles
|
|
269
|
+
# that as the real "nothing to index" skip.
|
|
270
|
+
if not files:
|
|
271
|
+
files = collect_files(root)
|
|
272
|
+
source = "graphify.collect_files (git index empty or absent)"
|
|
273
|
+
if not files:
|
|
274
|
+
return GraphifyBuildResult(
|
|
275
|
+
graph_path=out_path,
|
|
276
|
+
project_root=root,
|
|
277
|
+
n_nodes=0,
|
|
278
|
+
n_edges=0,
|
|
279
|
+
n_files=0,
|
|
280
|
+
n_communities=0,
|
|
281
|
+
source=source,
|
|
282
|
+
skipped_reason=(
|
|
283
|
+
"no extractable source files (check tree-sitter "
|
|
284
|
+
"language coverage — graphify supports py/ts/js/"
|
|
285
|
+
"go/rs/java/c/cpp/rb/cs/kt/scala/php and more)"
|
|
286
|
+
),
|
|
287
|
+
)
|
|
288
|
+
# extract → dict (NOT list); build_from_json takes that dict
|
|
289
|
+
# straight through. cluster returns a community map
|
|
290
|
+
# (``dict[int, list[str]]``), NOT the graph; to_json wants both
|
|
291
|
+
# the graph AND the community map as positional args, plus
|
|
292
|
+
# ``force=True`` so re-runs can overwrite the prior graph.json.
|
|
293
|
+
extraction = extract(files)
|
|
294
|
+
graph_obj = build_from_json(extraction)
|
|
295
|
+
communities = cluster(graph_obj)
|
|
296
|
+
to_json(graph_obj, communities, str(out_path), force=True)
|
|
297
|
+
return GraphifyBuildResult(
|
|
298
|
+
graph_path=out_path,
|
|
299
|
+
project_root=root,
|
|
300
|
+
n_nodes=graph_obj.number_of_nodes(),
|
|
301
|
+
n_edges=graph_obj.number_of_edges(),
|
|
302
|
+
n_files=len(files),
|
|
303
|
+
n_communities=len(communities),
|
|
304
|
+
source=source,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
@tool
|
|
309
|
+
async def build(path: str = ".") -> str:
|
|
310
|
+
"""Extract + cluster + persist the project's knowledge graph.
|
|
311
|
+
|
|
312
|
+
Walks code files under ``path``, parses them with tree-sitter
|
|
313
|
+
via graphify's extractor, builds a NetworkX graph (nodes =
|
|
314
|
+
symbols/files, edges = imports/calls/references), runs Leiden
|
|
315
|
+
community detection, and writes ``<path>/.loom/graphify/graph.json``.
|
|
316
|
+
|
|
317
|
+
Idempotent: incremental via file-hash gating — re-running on
|
|
318
|
+
an unchanged repo is fast. Run once per project (or after
|
|
319
|
+
major refactors); the post-commit hook keeps it current every
|
|
320
|
+
5 commits.
|
|
321
|
+
|
|
322
|
+
Returns a short summary the agent can quote back to the user.
|
|
323
|
+
"""
|
|
324
|
+
result = await graphify_build_impl(path)
|
|
325
|
+
# Install the debounced post-commit hook so the graph refreshes
|
|
326
|
+
# itself every few commits. This used to be installed by
|
|
327
|
+
# ``/loominit`` (now removed); graphify owns its own lifecycle, so
|
|
328
|
+
# the hook lives here. Best-effort — a hook failure (e.g. non-git
|
|
329
|
+
# tree) must never fail the build.
|
|
330
|
+
try:
|
|
331
|
+
from ...git_hook import install as _install_hook
|
|
332
|
+
|
|
333
|
+
_install_hook(result.project_root)
|
|
334
|
+
except Exception: # noqa: BLE001 — hook install is best-effort
|
|
335
|
+
pass
|
|
336
|
+
if result.skipped_reason is not None:
|
|
337
|
+
return (
|
|
338
|
+
f"graphify__build: {result.skipped_reason} "
|
|
339
|
+
f"(searched via {result.source})."
|
|
340
|
+
)
|
|
341
|
+
return (
|
|
342
|
+
f"graphify__build: ✓ wrote "
|
|
343
|
+
f"{result.graph_path.relative_to(result.project_root)} "
|
|
344
|
+
f"({result.n_nodes} nodes, {result.n_edges} edges, "
|
|
345
|
+
f"{result.n_files} source files via {result.source}, "
|
|
346
|
+
f"{result.n_communities} communities)"
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@tool
|
|
351
|
+
async def query(question: str, path: str = ".") -> str:
|
|
352
|
+
"""Find nodes related to ``question`` via THREE strategies and
|
|
353
|
+
rank them so the agent sees the whole subsystem, not just the
|
|
354
|
+
literal name matches.
|
|
355
|
+
|
|
356
|
+
1. **DIRECT** — literal substring match on node label or
|
|
357
|
+
source-file. The narrow case grep would also catch.
|
|
358
|
+
2. **NEIGHBOR** — 1-hop graph neighbours of any direct match.
|
|
359
|
+
Surfaces callers, callees, decorators, dependencies — the
|
|
360
|
+
things grep on the keyword would silently miss because they
|
|
361
|
+
use a DIFFERENT identifier name but participate in the same
|
|
362
|
+
call structure.
|
|
363
|
+
3. **COMMUNITY** — other nodes in the same Leiden community
|
|
364
|
+
as a direct match. Surfaces the "auth subsystem" when the
|
|
365
|
+
query is "auth" even if specific symbols don't contain
|
|
366
|
+
"auth" in their name. Leiden communities are pre-computed
|
|
367
|
+
at build time and persisted in graph.json; we just use the
|
|
368
|
+
cluster id at query time.
|
|
369
|
+
|
|
370
|
+
Output is grouped by tier so the agent knows which results
|
|
371
|
+
are literal hits vs structural neighbours vs community-cohort.
|
|
372
|
+
Limited to 20 results total (8 direct + 8 neighbors + 4
|
|
373
|
+
community) to keep tool output bounded while showing breadth.
|
|
374
|
+
"""
|
|
375
|
+
graph_obj = await _load_or_build_graph(path)
|
|
376
|
+
terms = [t.lower() for t in question.split() if len(t) > 2]
|
|
377
|
+
if not terms:
|
|
378
|
+
return (
|
|
379
|
+
"graphify__query: question too short (need at least "
|
|
380
|
+
"one keyword > 2 chars)."
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
# === Tier 1: DIRECT label/source matches (existing behaviour) ===
|
|
384
|
+
direct_scored: list[tuple[float, str, dict[str, Any]]] = []
|
|
385
|
+
for nid, data in graph_obj.nodes(data=True):
|
|
386
|
+
label = str(data.get("label", "")).lower()
|
|
387
|
+
src = str(data.get("source_file", "")).lower()
|
|
388
|
+
score = sum(1.0 for t in terms if t in label)
|
|
389
|
+
score += sum(0.4 for t in terms if t in src)
|
|
390
|
+
if score > 0:
|
|
391
|
+
direct_scored.append((score, nid, data))
|
|
392
|
+
direct_scored.sort(key=lambda x: x[0], reverse=True)
|
|
393
|
+
if not direct_scored:
|
|
394
|
+
return (
|
|
395
|
+
f"graphify__query: no nodes matched {terms!r}. "
|
|
396
|
+
"Try a different keyword, or use ``graphify__explain`` "
|
|
397
|
+
"on a known symbol to discover related names."
|
|
398
|
+
)
|
|
399
|
+
direct_ids = {nid for _, nid, _ in direct_scored[:8]}
|
|
400
|
+
|
|
401
|
+
# === Tier 2: NEIGHBORS of direct matches (1-hop in graph) ===
|
|
402
|
+
# graphify builds undirected graphs by default, so .neighbors()
|
|
403
|
+
# gives both ends (callers + callees + dependencies in one
|
|
404
|
+
# call). Skip nodes already in direct_ids so neighbours don't
|
|
405
|
+
# double-count literal matches.
|
|
406
|
+
neighbour_ids: set[str] = set()
|
|
407
|
+
for did in direct_ids:
|
|
408
|
+
for n in graph_obj.neighbors(did):
|
|
409
|
+
if n not in direct_ids:
|
|
410
|
+
neighbour_ids.add(n)
|
|
411
|
+
# Rank neighbours by degree — the high-degree ones are
|
|
412
|
+
# structurally central and most worth surfacing first.
|
|
413
|
+
neighbour_ranked = sorted(
|
|
414
|
+
neighbour_ids,
|
|
415
|
+
key=lambda n: graph_obj.degree(n),
|
|
416
|
+
reverse=True,
|
|
417
|
+
)[:8]
|
|
418
|
+
|
|
419
|
+
# === Tier 3: COMMUNITY peers (same Leiden cluster) ===
|
|
420
|
+
# Find every community id touched by a direct match, then
|
|
421
|
+
# surface OTHER members of those communities — skipping nodes
|
|
422
|
+
# already in tier 1 or 2. Communities give "the subsystem"
|
|
423
|
+
# rather than just "the call neighbourhood".
|
|
424
|
+
direct_communities: set[Any] = set()
|
|
425
|
+
for did in direct_ids:
|
|
426
|
+
cid = graph_obj.nodes[did].get("community")
|
|
427
|
+
if cid is not None:
|
|
428
|
+
direct_communities.add(cid)
|
|
429
|
+
seen = direct_ids | set(neighbour_ranked)
|
|
430
|
+
community_ids: list[str] = []
|
|
431
|
+
if direct_communities:
|
|
432
|
+
# Rank community peers by degree too so we surface the
|
|
433
|
+
# central members of the subsystem first.
|
|
434
|
+
candidates = [
|
|
435
|
+
n for n, d in graph_obj.nodes(data=True)
|
|
436
|
+
if d.get("community") in direct_communities
|
|
437
|
+
and n not in seen
|
|
438
|
+
]
|
|
439
|
+
community_ids = sorted(
|
|
440
|
+
candidates,
|
|
441
|
+
key=lambda n: graph_obj.degree(n),
|
|
442
|
+
reverse=True,
|
|
443
|
+
)[:4]
|
|
444
|
+
|
|
445
|
+
# === Render: grouped by tier with explicit labels ===
|
|
446
|
+
def _line(nid: str, tier: str) -> str:
|
|
447
|
+
data = graph_obj.nodes[nid]
|
|
448
|
+
label = data.get("label", nid)
|
|
449
|
+
src = data.get("source_file", "?")
|
|
450
|
+
loc = data.get("source_location", "")
|
|
451
|
+
deg = graph_obj.degree(nid)
|
|
452
|
+
cid = data.get("community", "?")
|
|
453
|
+
loc_part = f":{loc}" if loc else ""
|
|
454
|
+
return (
|
|
455
|
+
f" [{tier}] {label} [{src}{loc_part}] "
|
|
456
|
+
f"— degree {deg}, community {cid}"
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
sections: list[str] = []
|
|
460
|
+
sections.append(
|
|
461
|
+
f"graphify__query for {terms!r}:\n"
|
|
462
|
+
)
|
|
463
|
+
sections.append("DIRECT matches (literal label/source hit):")
|
|
464
|
+
for _, nid, _ in direct_scored[:8]:
|
|
465
|
+
sections.append(_line(nid, "DIRECT"))
|
|
466
|
+
if neighbour_ranked:
|
|
467
|
+
sections.append("")
|
|
468
|
+
sections.append(
|
|
469
|
+
"NEIGHBOR (1-hop in graph — callers / callees / "
|
|
470
|
+
"dependencies of the direct matches):"
|
|
471
|
+
)
|
|
472
|
+
for nid in neighbour_ranked:
|
|
473
|
+
sections.append(_line(nid, "NEIGHBOR"))
|
|
474
|
+
if community_ids:
|
|
475
|
+
sections.append("")
|
|
476
|
+
sections.append(
|
|
477
|
+
"COMMUNITY (same Leiden cluster as the direct matches "
|
|
478
|
+
"— the rest of the subsystem):"
|
|
479
|
+
)
|
|
480
|
+
for nid in community_ids:
|
|
481
|
+
sections.append(_line(nid, "COMMUNITY"))
|
|
482
|
+
return "\n".join(sections)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
@tool
|
|
486
|
+
async def path_between(a: str, b: str, path: str = ".") -> str:
|
|
487
|
+
"""Shortest path between two named concepts. The single most
|
|
488
|
+
useful graph query: "how does A get to B?" / "what connects
|
|
489
|
+
X and Y?" — exactly what grep can't answer.
|
|
490
|
+
"""
|
|
491
|
+
graph_obj = await _load_or_build_graph(path)
|
|
492
|
+
a_match = _find_node(graph_obj, a)
|
|
493
|
+
b_match = _find_node(graph_obj, b)
|
|
494
|
+
if a_match is None:
|
|
495
|
+
return f"graphify__path: no node matched {a!r}."
|
|
496
|
+
if b_match is None:
|
|
497
|
+
return f"graphify__path: no node matched {b!r}."
|
|
498
|
+
import networkx as nx
|
|
499
|
+
try:
|
|
500
|
+
nodes = nx.shortest_path(graph_obj, a_match, b_match)
|
|
501
|
+
except nx.NetworkXNoPath:
|
|
502
|
+
return (
|
|
503
|
+
f"graphify__path: no path from {a!r} → {b!r}. They "
|
|
504
|
+
"live in disconnected components — likely separate "
|
|
505
|
+
"subsystems with no static linkage."
|
|
506
|
+
)
|
|
507
|
+
hops: list[str] = []
|
|
508
|
+
for i, nid in enumerate(nodes):
|
|
509
|
+
label = graph_obj.nodes[nid].get("label", nid)
|
|
510
|
+
src = graph_obj.nodes[nid].get("source_file", "?")
|
|
511
|
+
hops.append(f" {i}. {label} [{src}]")
|
|
512
|
+
if i < len(nodes) - 1:
|
|
513
|
+
edge_data = graph_obj.get_edge_data(nid, nodes[i + 1]) or {}
|
|
514
|
+
relation = edge_data.get("relation", "→")
|
|
515
|
+
hops.append(f" —[{relation}]→")
|
|
516
|
+
return (
|
|
517
|
+
f"graphify__path {a!r} → {b!r} "
|
|
518
|
+
f"({len(nodes) - 1} hops):\n" + "\n".join(hops)
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
@tool
|
|
523
|
+
async def explain(node: str, path: str = ".") -> str:
|
|
524
|
+
"""Plain-language explanation of a single node: source
|
|
525
|
+
location, immediate neighbours, community, edge count."""
|
|
526
|
+
graph_obj = await _load_or_build_graph(path)
|
|
527
|
+
nid = _find_node(graph_obj, node)
|
|
528
|
+
if nid is None:
|
|
529
|
+
return f"graphify__explain: no node matched {node!r}."
|
|
530
|
+
data = graph_obj.nodes[nid]
|
|
531
|
+
label = data.get("label", nid)
|
|
532
|
+
src = data.get("source_file", "?")
|
|
533
|
+
loc = data.get("source_location", "")
|
|
534
|
+
community = data.get("community", "?")
|
|
535
|
+
in_degree = graph_obj.in_degree(nid) if graph_obj.is_directed() else None
|
|
536
|
+
out_degree = (
|
|
537
|
+
graph_obj.out_degree(nid) if graph_obj.is_directed() else None
|
|
538
|
+
)
|
|
539
|
+
total_degree = graph_obj.degree(nid)
|
|
540
|
+
neighbours = list(graph_obj.neighbors(nid))[:10]
|
|
541
|
+
parts = [
|
|
542
|
+
f"{label}",
|
|
543
|
+
f" source: {src}{':' + str(loc) if loc else ''}",
|
|
544
|
+
f" community: {community}",
|
|
545
|
+
f" total degree: {total_degree}",
|
|
546
|
+
]
|
|
547
|
+
if in_degree is not None:
|
|
548
|
+
parts.append(
|
|
549
|
+
f" in-edges: {in_degree} out-edges: {out_degree}"
|
|
550
|
+
)
|
|
551
|
+
if neighbours:
|
|
552
|
+
parts.append(" neighbours:")
|
|
553
|
+
for n in neighbours:
|
|
554
|
+
n_label = graph_obj.nodes[n].get("label", n)
|
|
555
|
+
parts.append(f" • {n_label}")
|
|
556
|
+
if total_degree > 10:
|
|
557
|
+
parts.append(f" ... and {total_degree - 10} more")
|
|
558
|
+
return "graphify__explain:\n" + "\n".join(parts)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _find_node(graph_obj: Any, name: str) -> str | None:
|
|
562
|
+
"""Resolve a user-supplied name to a node ID. Exact ID match
|
|
563
|
+
wins; otherwise case-insensitive label substring match."""
|
|
564
|
+
if name in graph_obj.nodes:
|
|
565
|
+
return name
|
|
566
|
+
needle = name.lower()
|
|
567
|
+
for nid, data in graph_obj.nodes(data=True):
|
|
568
|
+
if needle in str(data.get("label", "")).lower():
|
|
569
|
+
return nid
|
|
570
|
+
return None
|