loom-code 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loom_code/__init__.py +22 -0
- loom_code/_post_commit.py +119 -0
- loom_code/agent.py +544 -0
- loom_code/approval.py +616 -0
- loom_code/browse/__init__.py +291 -0
- loom_code/browse/act.py +467 -0
- loom_code/browse/observe.py +249 -0
- loom_code/browse/session.py +96 -0
- loom_code/browse/verify.py +194 -0
- loom_code/checkpoint.py +283 -0
- loom_code/cli.py +495 -0
- loom_code/code_index.py +703 -0
- loom_code/compact.py +143 -0
- loom_code/consent.py +47 -0
- loom_code/credentials.py +527 -0
- loom_code/edit_tool.py +635 -0
- loom_code/extensions.py +522 -0
- loom_code/file_history.py +322 -0
- loom_code/file_tools.py +93 -0
- loom_code/git_hook.py +200 -0
- loom_code/grep_tool.py +430 -0
- loom_code/hooks.py +297 -0
- loom_code/loominit/__init__.py +23 -0
- loom_code/loominit/_ast_walk.py +429 -0
- loom_code/loominit/_files.py +284 -0
- loom_code/loominit/_graph.py +141 -0
- loom_code/loominit/_resolve.py +392 -0
- loom_code/loominit/_tests_map.py +108 -0
- loom_code/loominit/extractor.py +332 -0
- loom_code/loominit/repomap.py +225 -0
- loom_code/loominit/schema.py +242 -0
- loom_code/lsp_tools.py +396 -0
- loom_code/mcp_host.py +79 -0
- loom_code/operator.py +449 -0
- loom_code/paste.py +97 -0
- loom_code/paths.py +52 -0
- loom_code/permissions.py +177 -0
- loom_code/project.py +104 -0
- loom_code/prompts.py +451 -0
- loom_code/render.py +783 -0
- loom_code/repl.py +4080 -0
- loom_code/rules.py +267 -0
- loom_code/sandboxed_bash.py +176 -0
- loom_code/scribe.py +88 -0
- loom_code/skills/__init__.py +16 -0
- loom_code/skills/graphify/SKILL.md +97 -0
- loom_code/skills/graphify/tools.py +570 -0
- loom_code/trust.py +216 -0
- loom_code/turn.py +169 -0
- loom_code/web_fetch.py +370 -0
- loom_code/workers.py +758 -0
- loom_code/worktree.py +134 -0
- loom_code-0.1.1.dist-info/METADATA +224 -0
- loom_code-0.1.1.dist-info/RECORD +58 -0
- loom_code-0.1.1.dist-info/WHEEL +5 -0
- loom_code-0.1.1.dist-info/entry_points.txt +2 -0
- loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
- loom_code-0.1.1.dist-info/top_level.txt +1 -0
loom_code/workers.py
ADDED
|
@@ -0,0 +1,758 @@
|
|
|
1
|
+
"""The worker roster for loom-code's ``Team.supervisor``.
|
|
2
|
+
|
|
3
|
+
loom-code is a hierarchical team: a coordinator Agent (the tech
|
|
4
|
+
lead) delegates to these workers via loomflow's ``Supervisor``
|
|
5
|
+
architecture. Each worker is a full loomflow ``Agent`` with a
|
|
6
|
+
``ReAct`` loop — the coordinator hands it a focused task through
|
|
7
|
+
the ``delegate`` tool and it runs to completion.
|
|
8
|
+
|
|
9
|
+
The roster is sliced by VERB, and one invariant holds it together:
|
|
10
|
+
|
|
11
|
+
* **coder** — the ONLY writer. Full file-and-shell kernel
|
|
12
|
+
(read/write/edit/grep/find/ls/bash). Every actual change to the
|
|
13
|
+
codebase happens here, one delegation at a time.
|
|
14
|
+
* **explorer** — read-only investigation → a briefing.
|
|
15
|
+
* **auditor** — read-only defect hunt (security / perf /
|
|
16
|
+
correctness lens) → tagged findings.
|
|
17
|
+
* **reviewer** — read-only inspection + ``bash`` to run the
|
|
18
|
+
project's tests → a pass/fail verdict.
|
|
19
|
+
|
|
20
|
+
Because only ``coder`` writes, the coordinator can delegate the
|
|
21
|
+
three read-only workers in parallel with zero risk of filesystem
|
|
22
|
+
races (loomflow's Supervisor gets parallel delegation for free —
|
|
23
|
+
ReAct dispatches multiple ``delegate`` calls in one turn through
|
|
24
|
+
an ``anyio`` task group). The coordinator serialises ``coder``
|
|
25
|
+
delegations itself.
|
|
26
|
+
|
|
27
|
+
Workers inherit the shared notebook (``workspace=``) and the
|
|
28
|
+
coordinator's memory via loomflow's ambient propagation — they
|
|
29
|
+
are NOT given their own, so there's one notebook and one memory
|
|
30
|
+
db for the whole team.
|
|
31
|
+
|
|
32
|
+
Memory propagation became real in loomflow 0.10.15 (before that,
|
|
33
|
+
``Team.supervisor(memory=...)`` propagated to the coordinator
|
|
34
|
+
only and workers silently fell back to ephemeral
|
|
35
|
+
``InMemoryMemory``). Combined with ``persist_tool_transcripts=True``
|
|
36
|
+
on each worker (also 0.10.15+), the worker's ``read`` / ``edit`` /
|
|
37
|
+
``bash`` results land in the coordinator's sqlite memory keyed
|
|
38
|
+
by the worker's stable session_id — so the same worker delegated
|
|
39
|
+
to twice no longer re-reads the same file. See the per-worker
|
|
40
|
+
constructors below for the wiring and the ``BUILD_LOG`` for the
|
|
41
|
+
diagnosis that led to this.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from __future__ import annotations
|
|
45
|
+
|
|
46
|
+
from collections.abc import Awaitable, Callable
|
|
47
|
+
from typing import Any
|
|
48
|
+
|
|
49
|
+
from loomflow import Agent, StandardPermissions, Tuning
|
|
50
|
+
from loomflow.architecture import ReAct
|
|
51
|
+
from loomflow.tools import (
|
|
52
|
+
bash_tool,
|
|
53
|
+
find_tool,
|
|
54
|
+
ls_tool,
|
|
55
|
+
read_tool,
|
|
56
|
+
write_tool,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
from .code_index import codebase_search_tool
|
|
60
|
+
from .credentials import patient_retry_policy_for
|
|
61
|
+
from .edit_tool import multi_edit_tool
|
|
62
|
+
from .edit_tool import verifying_edit_tool as edit_tool
|
|
63
|
+
from .extensions import AgentSpec
|
|
64
|
+
from .file_tools import loom_read_tool
|
|
65
|
+
from .grep_tool import enhanced_grep_tool as grep_tool
|
|
66
|
+
from .lsp_tools import lsp_tools
|
|
67
|
+
from .project import Project
|
|
68
|
+
from .prompts import build_coder_prompt
|
|
69
|
+
from .web_fetch import web_fetch_tool
|
|
70
|
+
|
|
71
|
+
# The coder does real, multi-step work — it gets a generous turn
|
|
72
|
+
# budget. Read-only specialists answer a scoped question and exit,
|
|
73
|
+
# so they're capped tighter; the reviewer sits in the middle
|
|
74
|
+
# because running a test suite + iterating on failures legitimately
|
|
75
|
+
# takes more turns than answering one question.
|
|
76
|
+
_CODER_MAX_TURNS = 60
|
|
77
|
+
_SPECIALIST_MAX_TURNS = 20 # explorer + auditor — one scoped question
|
|
78
|
+
_REVIEWER_MAX_TURNS = 30 # tests can iterate
|
|
79
|
+
|
|
80
|
+
# Per-result summarisation threshold (chars) for the read-only
|
|
81
|
+
# workers' ``tool_result_summarizer``. IMPORTANT semantics: loomflow
|
|
82
|
+
# replaces the result IN-TURN — the worker never sees the verbatim
|
|
83
|
+
# output, only a ≤512-token digest. So this is strictly a last-resort
|
|
84
|
+
# bound: 20k chars (~5k tokens) leaves normal reads/greps/test runs
|
|
85
|
+
# verbatim and compresses only the genuine dumps that would otherwise
|
|
86
|
+
# 400 the run with context_length_exceeded — where a digest beats a
|
|
87
|
+
# dead turn. Do NOT lower this to "save tokens": below it, fidelity
|
|
88
|
+
# loss (explorer briefings and reviewer verdicts built from digests)
|
|
89
|
+
# costs more than the tokens save.
|
|
90
|
+
SUMMARY_THRESHOLD_CHARS = 20_000
|
|
91
|
+
|
|
92
|
+
_EXPLORER_PROMPT = """\
|
|
93
|
+
You are the EXPLORER on a loom-code team — a read-only
|
|
94
|
+
investigator. A tech lead delegates ONE question about the
|
|
95
|
+
codebase; answer it thoroughly and hand the answer back.
|
|
96
|
+
|
|
97
|
+
You have read-only tools: `read`, `grep`, `find`, `ls` (scoped to
|
|
98
|
+
the project root) and `web_fetch` (HTTPS URLs; GitHub blob URLs
|
|
99
|
+
auto-rewrite to raw). NO write/edit/bash — you cannot change
|
|
100
|
+
anything, and must not try.
|
|
101
|
+
|
|
102
|
+
How you work:
|
|
103
|
+
- Start broad (`find`/`ls`/`grep`), then `read` the files that
|
|
104
|
+
matter. Follow the actual wiring — imports, call sites, config
|
|
105
|
+
— don't guess.
|
|
106
|
+
- For URLs the lead names, use `web_fetch(url=...)` — never
|
|
107
|
+
substitute a local file for a remote source. A full repo needs
|
|
108
|
+
`bash git clone`, which only `coder` has — say so in your
|
|
109
|
+
report and the lead can re-route.
|
|
110
|
+
- Answer concretely: cite `path:line` for every claim, quote the
|
|
111
|
+
key code, answer every sub-part, and end with a short summary
|
|
112
|
+
the lead can act on.
|
|
113
|
+
|
|
114
|
+
**When your finding is non-trivial, write ONE finding note** —
|
|
115
|
+
`note(kind="finding", title="<short, keyword-rich>",
|
|
116
|
+
content=<findings with path:line citations>)` — so the next
|
|
117
|
+
fresh-session worker reuses it instead of re-investigating. SKIP
|
|
118
|
+
the note for a quick lookup or one-line answer; restating a
|
|
119
|
+
simple answer is noise.
|
|
120
|
+
|
|
121
|
+
Be exhaustive on facts, terse on prose. No summary documents or
|
|
122
|
+
banners; your report is the only thing read.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
# Appended onto _EXPLORER_PROMPT only when the explorer was built
|
|
126
|
+
# with a web_tool — promising a tool the agent doesn't have wastes
|
|
127
|
+
# turns on failed tool calls.
|
|
128
|
+
_EXPLORER_WEB_HINT = """\
|
|
129
|
+
|
|
130
|
+
You also have `web_search(query=...)` for investigation *outside*
|
|
131
|
+
the codebase — an upstream library's documented behaviour, an
|
|
132
|
+
external API's contract, a CVE, a third-party error's known
|
|
133
|
+
cause. Use it AFTER you've read the relevant project code, not
|
|
134
|
+
instead. Keyword queries beat sentences. Cite the source URL in
|
|
135
|
+
your finding note.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _explorer_prompt(has_web: bool) -> str:
|
|
140
|
+
"""The explorer's system prompt. Web-search hint is opt-in so
|
|
141
|
+
the agent isn't told about a tool it doesn't have."""
|
|
142
|
+
if has_web:
|
|
143
|
+
return _EXPLORER_PROMPT + _EXPLORER_WEB_HINT
|
|
144
|
+
return _EXPLORER_PROMPT
|
|
145
|
+
|
|
146
|
+
_AUDITOR_PROMPT = """\
|
|
147
|
+
You are the AUDITOR on a loom-code team — a read-only inspector.
|
|
148
|
+
A tech lead delegates a focus area and a lens (security,
|
|
149
|
+
performance, or correctness). Your job: hunt for PROBLEMS.
|
|
150
|
+
|
|
151
|
+
You have read-only tools: `read`, `grep`, `find`, `ls`. NO
|
|
152
|
+
write/edit/bash — you find problems, you do not fix them.
|
|
153
|
+
|
|
154
|
+
How you work:
|
|
155
|
+
- Read the focus area carefully; trace inputs to where they're
|
|
156
|
+
used. Through your lens, hunt concrete defects:
|
|
157
|
+
- security: injection, unsanitised input, secrets in code,
|
|
158
|
+
path traversal, unsafe deserialization, missing authz.
|
|
159
|
+
- performance: N+1 patterns, work in hot loops, unbounded
|
|
160
|
+
growth, sync I/O on a hot path.
|
|
161
|
+
- correctness: unhandled edge cases, off-by-one, swallowed
|
|
162
|
+
errors, race conditions, wrong input assumptions.
|
|
163
|
+
- Report each finding as a list item tagged severity:
|
|
164
|
+
`[blocker]` — a real bug / vulnerability, must fix.
|
|
165
|
+
`[risk]` — likely wrong or fragile, worth a closer look.
|
|
166
|
+
`[nit]` — minor, optional.
|
|
167
|
+
Cite `path:line` for every finding and quote the offending code.
|
|
168
|
+
- If you find nothing real, say so — do NOT invent problems to
|
|
169
|
+
look thorough.
|
|
170
|
+
|
|
171
|
+
**When you found real issues, write ONE finding note** —
|
|
172
|
+
`note(kind="finding", title="<area>: <severity gist>",
|
|
173
|
+
content=<tagged findings with path:line citations>)` — so the
|
|
174
|
+
lead and next worker reuse it instead of re-auditing. Nothing
|
|
175
|
+
notable → SKIP the note; don't write one to record "no issues".
|
|
176
|
+
|
|
177
|
+
End with a one-line summary: how many blockers / risks / nits. No
|
|
178
|
+
summary documents or banners — the report is the only thing read.
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
_REVIEWER_PROMPT = """\
|
|
182
|
+
You are the REVIEWER on a loom-code team — a verification
|
|
183
|
+
specialist. A tech lead delegates a description of a change just
|
|
184
|
+
made. Independently confirm it is correct, complete, and safe.
|
|
185
|
+
|
|
186
|
+
You have `read`, `grep`, `find`, `ls`, and `bash`. Use `bash` to
|
|
187
|
+
run the project's OWN tests / linters / build — not improvised
|
|
188
|
+
checks. NO write/edit — you do not fix things, you REPORT.
|
|
189
|
+
|
|
190
|
+
How you work:
|
|
191
|
+
- Re-read the changed files yourself. Don't trust the description.
|
|
192
|
+
- Run the verification command (test suite, build, type-check).
|
|
193
|
+
- Look for: broken callers, missing edge cases, untested paths,
|
|
194
|
+
things the change claimed but didn't do, regressions.
|
|
195
|
+
- Report findings as a list, each tagged severity:
|
|
196
|
+
`[blocker]` — must fix before this is done.
|
|
197
|
+
`[risk]` — probably wrong / fragile, worth a second look.
|
|
198
|
+
`[nit]` — minor, optional.
|
|
199
|
+
- All good → `VERDICT: pass` plus the evidence (which tests ran,
|
|
200
|
+
what passed). Otherwise `VERDICT: fail` and the blockers.
|
|
201
|
+
|
|
202
|
+
You are the last line before the user sees the work. Be skeptical.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _read_only_tools(
|
|
207
|
+
project: Project,
|
|
208
|
+
embedder: str | None = None,
|
|
209
|
+
workspace: Any | None = None,
|
|
210
|
+
) -> list[Any]:
|
|
211
|
+
"""The read-only inspection kernel — `read`/`grep`/`find`/`ls`
|
|
212
|
+
scoped to the project root, plus `web_fetch` for reaching URLs
|
|
213
|
+
and GitHub raw files (read-only by construction — no disk write,
|
|
214
|
+
no shell). Shared by explorer + auditor; the reviewer adds
|
|
215
|
+
`bash` on top.
|
|
216
|
+
|
|
217
|
+
``web_fetch`` closes the URL-fetch gap that previously forced
|
|
218
|
+
the read-only specialists to silently substitute local files
|
|
219
|
+
for remote sources; preserves the sole-writer invariant because
|
|
220
|
+
the tool literally cannot write.
|
|
221
|
+
|
|
222
|
+
``embedder`` (``"openai"`` / ``"hash"``) — when set, adds the
|
|
223
|
+
read-only ``codebase_search`` semantic tool so explorers/auditors
|
|
224
|
+
can find code by meaning, not just grep strings. ``None`` (the
|
|
225
|
+
default) keeps the legacy kernel for any caller that hasn't wired
|
|
226
|
+
the embedder yet."""
|
|
227
|
+
root = project.root
|
|
228
|
+
tools: list[Any] = [
|
|
229
|
+
read_tool(root),
|
|
230
|
+
grep_tool(root),
|
|
231
|
+
find_tool(root),
|
|
232
|
+
ls_tool(root),
|
|
233
|
+
web_fetch_tool(),
|
|
234
|
+
# LSP navigation (jedi) — go_to_definition / find_references /
|
|
235
|
+
# hover. Read-only by construction (static analysis, no disk
|
|
236
|
+
# write); given to every worker so explorers/auditors navigate
|
|
237
|
+
# by symbol, not grep. Python only; no embedder needed.
|
|
238
|
+
*lsp_tools(root),
|
|
239
|
+
]
|
|
240
|
+
if embedder is not None:
|
|
241
|
+
# Same embedder name the coordinator + memory use, so every
|
|
242
|
+
# agent searches the one shared index. ``workspace`` (when
|
|
243
|
+
# given) fuses learned notes into the results (Phase 1b).
|
|
244
|
+
# Read-only by construction — no disk write.
|
|
245
|
+
tools.insert(
|
|
246
|
+
2, codebase_search_tool(root, embedder, workspace=workspace)
|
|
247
|
+
)
|
|
248
|
+
return tools
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _build_coder(
|
|
252
|
+
project: Project,
|
|
253
|
+
*,
|
|
254
|
+
model: str,
|
|
255
|
+
approval_handler: Callable[..., Awaitable[bool]] | None,
|
|
256
|
+
has_web: bool = False,
|
|
257
|
+
skills: list[Any] | None = None,
|
|
258
|
+
auto_compact_at_tokens: int | None = None,
|
|
259
|
+
snip_window: int = 0,
|
|
260
|
+
effort: str | None = None,
|
|
261
|
+
mcp_registry: Any | None = None,
|
|
262
|
+
sandbox: bool = False,
|
|
263
|
+
sandbox_allow_network: bool = False,
|
|
264
|
+
embedder: str | None = None,
|
|
265
|
+
workspace: Any | None = None,
|
|
266
|
+
memory: Any | None = None,
|
|
267
|
+
attach_workspace: bool = False,
|
|
268
|
+
persist_tool_transcripts: bool = True,
|
|
269
|
+
) -> Agent:
|
|
270
|
+
"""The doer. Full file-and-shell kernel, scoped to the project
|
|
271
|
+
root. `StandardPermissions` gates the destructive tools
|
|
272
|
+
(write / edit / bash) through the shared approval handler.
|
|
273
|
+
|
|
274
|
+
``persist_tool_transcripts=False`` is used by the SOLO fast path:
|
|
275
|
+
it shares the REPL ``session_id`` with the read-only coordinator,
|
|
276
|
+
and persisting its write/bash transcripts would make the
|
|
277
|
+
coordinator rehydrate history of "itself" editing files — the
|
|
278
|
+
exact grind-it-myself failure the read-only design exists to
|
|
279
|
+
prevent. Solo turns are small; losing their transcript reuse
|
|
280
|
+
costs little. (As a delegate worker the coder keeps ``True`` —
|
|
281
|
+
its sessions are worker-private.)
|
|
282
|
+
|
|
283
|
+
``embedder`` adds the read-only ``codebase_search`` tool — the
|
|
284
|
+
coder uses it to locate the right code to change before editing,
|
|
285
|
+
not just grep for strings.
|
|
286
|
+
|
|
287
|
+
``sandbox=True`` swaps the plain ``bash`` for the kernel-sandboxed
|
|
288
|
+
one (``sandboxed_bash_tool``): the shell command runs inside
|
|
289
|
+
sandbox-exec (macOS) / bwrap (Linux) so it can only WRITE under the
|
|
290
|
+
project root and has NO network (unless ``sandbox_allow_network``).
|
|
291
|
+
Claude-Code-style — only ``bash`` (arbitrary code) is sandboxed;
|
|
292
|
+
``edit``/``write`` keep the approval gate. Off by default.
|
|
293
|
+
``has_web`` toggles the `web_search` section in the prompt —
|
|
294
|
+
keep this in lockstep with whether ``build_workers`` actually
|
|
295
|
+
attaches the tool, else the prompt lies.
|
|
296
|
+
|
|
297
|
+
``mcp_registry`` (an ``MCPRegistry``, typed ``Any`` to avoid a hard
|
|
298
|
+
``mcp``-extra import) adds the user's MCP-server tools to the coder
|
|
299
|
+
— the ONLY worker that gets them, since it's the sole writer/executor.
|
|
300
|
+
When set, the coder's static tools are wrapped in an
|
|
301
|
+
``McpAugmentedHost`` so MCP tools resolve lazily (connect-on-first-
|
|
302
|
+
use) and static builtins win any name collision.
|
|
303
|
+
|
|
304
|
+
``memory`` / ``attach_workspace``: as a delegate worker the coder
|
|
305
|
+
inherits the coordinator's memory + workspace ambiently, so both
|
|
306
|
+
stay off (``None`` / ``False``). The SOLO fast path
|
|
307
|
+
(:func:`loom_code.agent.build_solo_agent`) runs this same agent
|
|
308
|
+
standalone — no parent to inherit from — so it passes the shared
|
|
309
|
+
memory cfg explicitly and attaches the notebook workspace so
|
|
310
|
+
``note`` / ``search_notes`` exist."""
|
|
311
|
+
root = project.root
|
|
312
|
+
# bash is the one tool that runs arbitrary code, so it's the one we
|
|
313
|
+
# kernel-sandbox when asked. edit/write only touch where the model
|
|
314
|
+
# says + go through the approval gate, so they stay as-is.
|
|
315
|
+
if sandbox:
|
|
316
|
+
from .sandboxed_bash import sandboxed_bash_tool
|
|
317
|
+
|
|
318
|
+
bash = sandboxed_bash_tool(
|
|
319
|
+
root, allow_network=sandbox_allow_network, timeout=300.0
|
|
320
|
+
)
|
|
321
|
+
else:
|
|
322
|
+
bash = bash_tool(root, timeout=300.0)
|
|
323
|
+
static_tools: list[Any] = [
|
|
324
|
+
# Policy-bounded read (loom_read_tool): reaches user-referenced
|
|
325
|
+
# files outside the project too, matching edit/multi_edit; a
|
|
326
|
+
# self-initiated outside read the user never named is refused.
|
|
327
|
+
loom_read_tool(root),
|
|
328
|
+
write_tool(root),
|
|
329
|
+
edit_tool(root),
|
|
330
|
+
multi_edit_tool(root),
|
|
331
|
+
grep_tool(root),
|
|
332
|
+
find_tool(root),
|
|
333
|
+
ls_tool(root),
|
|
334
|
+
bash,
|
|
335
|
+
web_fetch_tool(),
|
|
336
|
+
# LSP navigation (jedi) — the writer locates the symbol to
|
|
337
|
+
# change by resolution, not grep, before editing. Read-only.
|
|
338
|
+
*lsp_tools(root),
|
|
339
|
+
]
|
|
340
|
+
if embedder is not None:
|
|
341
|
+
# Semantic search for the writer too — locate the code to
|
|
342
|
+
# change by meaning before editing. Same shared index.
|
|
343
|
+
static_tools.insert(
|
|
344
|
+
5, codebase_search_tool(root, embedder, workspace=workspace)
|
|
345
|
+
)
|
|
346
|
+
# Default: pass the static list straight through (framework wraps it
|
|
347
|
+
# in an InProcessToolHost). With MCP, build that host ourselves and
|
|
348
|
+
# compose it with the registry as one ToolHost.
|
|
349
|
+
tools: Any = static_tools
|
|
350
|
+
if mcp_registry is not None:
|
|
351
|
+
from loomflow.tools.registry import InProcessToolHost
|
|
352
|
+
|
|
353
|
+
from .mcp_host import McpAugmentedHost
|
|
354
|
+
|
|
355
|
+
tools = McpAugmentedHost(
|
|
356
|
+
InProcessToolHost(static_tools), mcp_registry
|
|
357
|
+
)
|
|
358
|
+
return Agent(
|
|
359
|
+
build_coder_prompt(project, has_web=has_web),
|
|
360
|
+
model=model,
|
|
361
|
+
architecture=ReAct(),
|
|
362
|
+
tools=tools,
|
|
363
|
+
memory=memory,
|
|
364
|
+
workspace=workspace if attach_workspace else None,
|
|
365
|
+
# Bundled skills (graphify, etc.) — registered on workers
|
|
366
|
+
# too, not just the coordinator. Without this, when the
|
|
367
|
+
# coordinator delegates "build the graph" to coder, the
|
|
368
|
+
# coder spawns with its own tool host that doesn't have
|
|
369
|
+
# ``graphify__build`` — and falls back to ``bash
|
|
370
|
+
# graphify__build`` which doesn't exist. Skill on worker
|
|
371
|
+
# = tool actually callable wherever execution lands.
|
|
372
|
+
skills=skills,
|
|
373
|
+
permissions=StandardPermissions(),
|
|
374
|
+
approval_handler=approval_handler,
|
|
375
|
+
prompt_caching=True,
|
|
376
|
+
max_turns=_CODER_MAX_TURNS,
|
|
377
|
+
# Bounded-window trim of the rehydrated history before each
|
|
378
|
+
# run — THE active context bound. Without it a worker
|
|
379
|
+
# rehydrating a heavily-used session's accumulated tool
|
|
380
|
+
# transcripts (many ≤50KB entries) overflows the model window
|
|
381
|
+
# and 400s (context_length_exceeded). The coordinator has it;
|
|
382
|
+
# workers must too. (``auto_compact_at_tokens`` below only
|
|
383
|
+
# fires between Ralph stop-hook iterations, which loom-code
|
|
384
|
+
# disables via max_stop_hook_iterations=0 — so snip is what
|
|
385
|
+
# actually protects a single run.)
|
|
386
|
+
snip_window=snip_window,
|
|
387
|
+
auto_compact_at_tokens=auto_compact_at_tokens,
|
|
388
|
+
effort=effort,
|
|
389
|
+
# Persistent tool-transcripts (loomflow 0.10.15+) — without
|
|
390
|
+
# this the coder forgets every file read / edit / bash
|
|
391
|
+
# output between delegations, even though its session_id
|
|
392
|
+
# is preserved. Re-reading the same file 5x per task is
|
|
393
|
+
# the single biggest token leak in long sessions; flipping
|
|
394
|
+
# this on makes session_messages() rehydrate the prior
|
|
395
|
+
# tool transcript so the coder QUOTES what it read instead
|
|
396
|
+
# of re-running `read`. (False on the solo fast path — see
|
|
397
|
+
# the docstring.)
|
|
398
|
+
persist_tool_transcripts=persist_tool_transcripts,
|
|
399
|
+
# Patient retry schedule on free-tier/litellm providers (None
|
|
400
|
+
# elsewhere = loomflow default). See patient_retry_policy_for.
|
|
401
|
+
tuning=Tuning(retry_policy=patient_retry_policy_for(model)),
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _build_explorer(
|
|
406
|
+
project: Project,
|
|
407
|
+
*,
|
|
408
|
+
model: str,
|
|
409
|
+
has_web: bool = False,
|
|
410
|
+
skills: list[Any] | None = None,
|
|
411
|
+
auto_compact_at_tokens: int | None = None,
|
|
412
|
+
snip_window: int = 0,
|
|
413
|
+
tool_result_summarizer: str | None = None,
|
|
414
|
+
effort: str | None = None,
|
|
415
|
+
embedder: str | None = None,
|
|
416
|
+
workspace: Any | None = None,
|
|
417
|
+
) -> Agent:
|
|
418
|
+
"""Read-only investigator — no permissions needed (none of its
|
|
419
|
+
tools are destructive). ``has_web`` toggles the `web_search`
|
|
420
|
+
section in the prompt — must match ``build_workers``' wiring.
|
|
421
|
+
``embedder`` adds the read-only ``codebase_search`` semantic tool
|
|
422
|
+
(the explorer is the prime beneficiary — concept-level lookups)."""
|
|
423
|
+
return Agent(
|
|
424
|
+
_explorer_prompt(has_web),
|
|
425
|
+
model=model,
|
|
426
|
+
architecture=ReAct(),
|
|
427
|
+
tools=_read_only_tools(project, embedder, workspace),
|
|
428
|
+
skills=skills,
|
|
429
|
+
prompt_caching=True,
|
|
430
|
+
max_turns=_SPECIALIST_MAX_TURNS,
|
|
431
|
+
snip_window=snip_window,
|
|
432
|
+
auto_compact_at_tokens=auto_compact_at_tokens,
|
|
433
|
+
# Per-result output compression (cheap model). Safe here —
|
|
434
|
+
# the explorer returns a BRIEFING, not exact-match edits, so
|
|
435
|
+
# a faithfully-summarised read/grep dump loses nothing it
|
|
436
|
+
# needs. This is the per-result bound snip (turn-count) and
|
|
437
|
+
# auto-compact (never fires inside a worker run — stop hooks
|
|
438
|
+
# are off on workers) cannot provide.
|
|
439
|
+
tool_result_summarizer=tool_result_summarizer,
|
|
440
|
+
tuning=Tuning(
|
|
441
|
+
tool_result_summary_threshold=SUMMARY_THRESHOLD_CHARS,
|
|
442
|
+
retry_policy=patient_retry_policy_for(model),
|
|
443
|
+
),
|
|
444
|
+
effort=effort,
|
|
445
|
+
# See ``_build_coder`` for the rationale. Explorer benefits
|
|
446
|
+
# too: a question like "how does X work, then check Y" no
|
|
447
|
+
# longer re-greps + re-reads X's files when Y comes in as
|
|
448
|
+
# a follow-up via ``send_message``.
|
|
449
|
+
persist_tool_transcripts=True,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def _build_auditor(
|
|
454
|
+
project: Project,
|
|
455
|
+
*,
|
|
456
|
+
model: str,
|
|
457
|
+
skills: list[Any] | None = None,
|
|
458
|
+
auto_compact_at_tokens: int | None = None,
|
|
459
|
+
snip_window: int = 0,
|
|
460
|
+
tool_result_summarizer: str | None = None,
|
|
461
|
+
effort: str | None = None,
|
|
462
|
+
embedder: str | None = None,
|
|
463
|
+
workspace: Any | None = None,
|
|
464
|
+
) -> Agent:
|
|
465
|
+
"""Read-only defect hunter — same tool scope as the explorer,
|
|
466
|
+
different objective."""
|
|
467
|
+
return Agent(
|
|
468
|
+
_AUDITOR_PROMPT,
|
|
469
|
+
model=model,
|
|
470
|
+
architecture=ReAct(),
|
|
471
|
+
tools=_read_only_tools(project, embedder, workspace),
|
|
472
|
+
skills=skills,
|
|
473
|
+
prompt_caching=True,
|
|
474
|
+
max_turns=_SPECIALIST_MAX_TURNS,
|
|
475
|
+
snip_window=snip_window,
|
|
476
|
+
auto_compact_at_tokens=auto_compact_at_tokens,
|
|
477
|
+
# Same rationale as the explorer: briefings, not exact edits.
|
|
478
|
+
tool_result_summarizer=tool_result_summarizer,
|
|
479
|
+
tuning=Tuning(
|
|
480
|
+
tool_result_summary_threshold=SUMMARY_THRESHOLD_CHARS,
|
|
481
|
+
retry_policy=patient_retry_policy_for(model),
|
|
482
|
+
),
|
|
483
|
+
effort=effort,
|
|
484
|
+
# Same rationale as explorer — auditor accumulates context
|
|
485
|
+
# about the focus area across rounds when its findings get
|
|
486
|
+
# iterated on.
|
|
487
|
+
persist_tool_transcripts=True,
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def _build_reviewer(
|
|
492
|
+
project: Project,
|
|
493
|
+
*,
|
|
494
|
+
model: str,
|
|
495
|
+
approval_handler: Callable[..., Awaitable[bool]] | None,
|
|
496
|
+
skills: list[Any] | None = None,
|
|
497
|
+
auto_compact_at_tokens: int | None = None,
|
|
498
|
+
snip_window: int = 0,
|
|
499
|
+
tool_result_summarizer: str | None = None,
|
|
500
|
+
effort: str | None = None,
|
|
501
|
+
embedder: str | None = None,
|
|
502
|
+
workspace: Any | None = None,
|
|
503
|
+
) -> Agent:
|
|
504
|
+
"""Independent verifier — read-only inspection plus `bash` to
|
|
505
|
+
run the project's real test suite. `bash` is gated through the
|
|
506
|
+
same approval handler as the coder; it has no write/edit, so
|
|
507
|
+
it reports but never fixes."""
|
|
508
|
+
root = project.root
|
|
509
|
+
return Agent(
|
|
510
|
+
_REVIEWER_PROMPT,
|
|
511
|
+
model=model,
|
|
512
|
+
architecture=ReAct(),
|
|
513
|
+
tools=[
|
|
514
|
+
*_read_only_tools(project, embedder, workspace),
|
|
515
|
+
bash_tool(root, timeout=300.0),
|
|
516
|
+
],
|
|
517
|
+
skills=skills,
|
|
518
|
+
permissions=StandardPermissions(),
|
|
519
|
+
approval_handler=approval_handler,
|
|
520
|
+
prompt_caching=True,
|
|
521
|
+
max_turns=_REVIEWER_MAX_TURNS,
|
|
522
|
+
snip_window=snip_window,
|
|
523
|
+
auto_compact_at_tokens=auto_compact_at_tokens,
|
|
524
|
+
# Briefings + test verdicts, not exact edits — a summarised
|
|
525
|
+
# pytest dump keeps the failures, drops the dots.
|
|
526
|
+
tool_result_summarizer=tool_result_summarizer,
|
|
527
|
+
tuning=Tuning(
|
|
528
|
+
tool_result_summary_threshold=SUMMARY_THRESHOLD_CHARS,
|
|
529
|
+
retry_policy=patient_retry_policy_for(model),
|
|
530
|
+
),
|
|
531
|
+
effort=effort,
|
|
532
|
+
# Reviewer benefits too: re-review cycles ("you flagged X,
|
|
533
|
+
# the coder fixed it, recheck") no longer re-read every
|
|
534
|
+
# changed file from scratch.
|
|
535
|
+
persist_tool_transcripts=True,
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def build_workers(
|
|
540
|
+
project: Project,
|
|
541
|
+
*,
|
|
542
|
+
model: str,
|
|
543
|
+
approval_handler: Callable[..., Awaitable[bool]] | None = None,
|
|
544
|
+
web_backend: str | None = None,
|
|
545
|
+
skills: list[Any] | None = None,
|
|
546
|
+
auto_compact_at_tokens: int | None = None,
|
|
547
|
+
snip_window: int = 0,
|
|
548
|
+
tool_result_summarizer: str | None = None,
|
|
549
|
+
effort: str | None = None,
|
|
550
|
+
mcp_registry: Any | None = None,
|
|
551
|
+
sandbox: bool = False,
|
|
552
|
+
sandbox_allow_network: bool = False,
|
|
553
|
+
embedder: str | None = None,
|
|
554
|
+
workspace: Any | None = None,
|
|
555
|
+
) -> dict[str, Agent]:
|
|
556
|
+
"""Build the worker roster for ``Team.supervisor``.
|
|
557
|
+
|
|
558
|
+
Returns ``{"coder", "explorer", "auditor", "reviewer"}`` — the
|
|
559
|
+
dict keys become each worker's delegate name AND its author
|
|
560
|
+
identity in the shared notebook. All four run on the same
|
|
561
|
+
``model`` as the coordinator; the specialism is in the prompt
|
|
562
|
+
+ tool scoping, not a weaker model.
|
|
563
|
+
|
|
564
|
+
Only ``coder`` and ``reviewer`` get a permissions policy +
|
|
565
|
+
approval handler (they hold destructive tools); ``explorer``
|
|
566
|
+
and ``auditor`` are purely read-only.
|
|
567
|
+
|
|
568
|
+
``web_backend``: ``"serper"`` or ``"duckduckgo"`` to enable
|
|
569
|
+
``loomflow.tools.web_tool`` on ``coder`` + ``explorer``. The
|
|
570
|
+
coder needs it to look up library APIs while implementing;
|
|
571
|
+
the explorer for investigation that goes beyond the codebase.
|
|
572
|
+
Auditor + reviewer stay read-only-and-local (no web access)
|
|
573
|
+
— keeps their cost predictable and their scope honest.
|
|
574
|
+
``None`` (default) leaves web search off entirely.
|
|
575
|
+
|
|
576
|
+
``tool_result_summarizer`` (a cheap model name) compresses
|
|
577
|
+
oversized tool results on the READ-ONLY workers (explorer /
|
|
578
|
+
auditor / reviewer) — they return briefings, so a faithful
|
|
579
|
+
summary loses nothing. The CODER is deliberately excluded:
|
|
580
|
+
it needs verbatim ``read`` output to construct exact-match
|
|
581
|
+
``edit`` old_strings; a summarised read would make every
|
|
582
|
+
subsequent edit miss.
|
|
583
|
+
"""
|
|
584
|
+
has_web = web_backend is not None
|
|
585
|
+
workers: dict[str, Agent] = {
|
|
586
|
+
"coder": _build_coder(
|
|
587
|
+
project,
|
|
588
|
+
model=model,
|
|
589
|
+
approval_handler=approval_handler,
|
|
590
|
+
has_web=has_web,
|
|
591
|
+
skills=skills,
|
|
592
|
+
auto_compact_at_tokens=auto_compact_at_tokens,
|
|
593
|
+
snip_window=snip_window,
|
|
594
|
+
effort=effort,
|
|
595
|
+
mcp_registry=mcp_registry,
|
|
596
|
+
sandbox=sandbox,
|
|
597
|
+
sandbox_allow_network=sandbox_allow_network,
|
|
598
|
+
embedder=embedder,
|
|
599
|
+
workspace=workspace,
|
|
600
|
+
),
|
|
601
|
+
"explorer": _build_explorer(
|
|
602
|
+
project,
|
|
603
|
+
model=model,
|
|
604
|
+
has_web=has_web,
|
|
605
|
+
skills=skills,
|
|
606
|
+
auto_compact_at_tokens=auto_compact_at_tokens,
|
|
607
|
+
snip_window=snip_window,
|
|
608
|
+
tool_result_summarizer=tool_result_summarizer,
|
|
609
|
+
effort=effort,
|
|
610
|
+
embedder=embedder,
|
|
611
|
+
workspace=workspace,
|
|
612
|
+
),
|
|
613
|
+
"auditor": _build_auditor(
|
|
614
|
+
project,
|
|
615
|
+
model=model,
|
|
616
|
+
skills=skills,
|
|
617
|
+
auto_compact_at_tokens=auto_compact_at_tokens,
|
|
618
|
+
snip_window=snip_window,
|
|
619
|
+
tool_result_summarizer=tool_result_summarizer,
|
|
620
|
+
effort=effort,
|
|
621
|
+
embedder=embedder,
|
|
622
|
+
workspace=workspace,
|
|
623
|
+
),
|
|
624
|
+
"reviewer": _build_reviewer(
|
|
625
|
+
project,
|
|
626
|
+
model=model,
|
|
627
|
+
approval_handler=approval_handler,
|
|
628
|
+
skills=skills,
|
|
629
|
+
auto_compact_at_tokens=auto_compact_at_tokens,
|
|
630
|
+
snip_window=snip_window,
|
|
631
|
+
tool_result_summarizer=tool_result_summarizer,
|
|
632
|
+
effort=effort,
|
|
633
|
+
embedder=embedder,
|
|
634
|
+
workspace=workspace,
|
|
635
|
+
),
|
|
636
|
+
}
|
|
637
|
+
if has_web:
|
|
638
|
+
# One shared web_tool instance — same Tool object on both
|
|
639
|
+
# workers. Cheap; nothing in the tool's lifecycle is per-
|
|
640
|
+
# worker. If the backend is misconfigured (e.g. serper
|
|
641
|
+
# without a key) ``web_tool`` raises ConfigError here; the
|
|
642
|
+
# caller (REPL's /set_web) is expected to validate first.
|
|
643
|
+
# ``has_web=True`` was already threaded into the prompts —
|
|
644
|
+
# the model knows the tool exists; here we actually attach
|
|
645
|
+
# it.
|
|
646
|
+
from loomflow.tools import web_tool
|
|
647
|
+
web = web_tool(backend=web_backend) # type: ignore[arg-type]
|
|
648
|
+
workers["coder"].add_tool(web)
|
|
649
|
+
workers["explorer"].add_tool(web)
|
|
650
|
+
return workers
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
# Builtin worker role names — protected. A user-authored subagent that
|
|
654
|
+
# names itself one of these is skipped rather than allowed to shadow
|
|
655
|
+
# the known roster (especially ``coder``, the sole writer).
|
|
656
|
+
BUILTIN_WORKER_NAMES = frozenset(
|
|
657
|
+
{"coder", "explorer", "auditor", "reviewer"}
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
# Tool names a custom subagent's ``tools:`` frontmatter may request,
|
|
661
|
+
# mapped to the same builtin factories the builtin workers use.
|
|
662
|
+
# ``web_search`` is intentionally absent — it needs backend wiring
|
|
663
|
+
# (``/set_web``); custom agents get ``web_fetch`` (always available,
|
|
664
|
+
# read-only, no shell/disk write) instead.
|
|
665
|
+
_DESTRUCTIVE_TOOL_NAMES = frozenset(
|
|
666
|
+
{"write", "edit", "multi_edit", "bash"}
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
# When a spec declares no ``tools:``, this read-only kernel is the
|
|
670
|
+
# default — we never hand a stranger's spec write/shell access
|
|
671
|
+
# implicitly. Matches ``_read_only_tools``.
|
|
672
|
+
_DEFAULT_CUSTOM_TOOLS = ("read", "grep", "find", "ls", "web_fetch")
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
def _custom_tool_factories(root: Any) -> dict[str, Callable[[], Any]]:
|
|
676
|
+
"""Map tool name → a zero-arg factory rooted at the project.
|
|
677
|
+
|
|
678
|
+
Unknown names a spec requests simply aren't in this map and are
|
|
679
|
+
skipped by :func:`build_custom_worker` — a typo'd tool name costs
|
|
680
|
+
that tool, not the whole agent."""
|
|
681
|
+
return {
|
|
682
|
+
"read": lambda: read_tool(root),
|
|
683
|
+
"write": lambda: write_tool(root),
|
|
684
|
+
"edit": lambda: edit_tool(root),
|
|
685
|
+
"multi_edit": lambda: multi_edit_tool(root),
|
|
686
|
+
"grep": lambda: grep_tool(root),
|
|
687
|
+
"find": lambda: find_tool(root),
|
|
688
|
+
"ls": lambda: ls_tool(root),
|
|
689
|
+
"bash": lambda: bash_tool(root, timeout=300.0),
|
|
690
|
+
"web_fetch": lambda: web_fetch_tool(),
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
def build_custom_worker(
|
|
695
|
+
project: Project,
|
|
696
|
+
spec: AgentSpec,
|
|
697
|
+
*,
|
|
698
|
+
model: str,
|
|
699
|
+
approval_handler: Callable[..., Awaitable[bool]] | None,
|
|
700
|
+
skills: list[Any] | None = None,
|
|
701
|
+
auto_compact_at_tokens: int | None = None,
|
|
702
|
+
snip_window: int = 0,
|
|
703
|
+
effort: str | None = None,
|
|
704
|
+
) -> Agent:
|
|
705
|
+
"""Build a delegate-able worker Agent from a user-authored subagent
|
|
706
|
+
spec (``.loom/agents/<name>.md`` — see :mod:`loom_code.extensions`).
|
|
707
|
+
|
|
708
|
+
The worker's ``instructions`` LEAD with the frontmatter
|
|
709
|
+
``description`` followed by the markdown body, because
|
|
710
|
+
``Supervisor`` shows the coordinator only the first ~200 chars of
|
|
711
|
+
each worker's instructions (not a separate description field) — so
|
|
712
|
+
the description must come first for the coordinator to route to it
|
|
713
|
+
correctly.
|
|
714
|
+
|
|
715
|
+
Tools come from the spec's ``tools:`` list mapped through
|
|
716
|
+
:func:`_custom_tool_factories`; an empty list defaults to the
|
|
717
|
+
read-only kernel. Permissions + the approval handler are wired ONLY
|
|
718
|
+
when the requested tools include a destructive one
|
|
719
|
+
(write/edit/multi_edit/bash) — a read-only subagent needs no gate.
|
|
720
|
+
``model`` falls back to the coordinator's model when the spec
|
|
721
|
+
doesn't override it.
|
|
722
|
+
"""
|
|
723
|
+
root = project.root
|
|
724
|
+
factories = _custom_tool_factories(root)
|
|
725
|
+
requested = spec.tools or _DEFAULT_CUSTOM_TOOLS
|
|
726
|
+
tools: list[Any] = [
|
|
727
|
+
factories[name]() for name in requested if name in factories
|
|
728
|
+
]
|
|
729
|
+
has_destructive = any(
|
|
730
|
+
name in _DESTRUCTIVE_TOOL_NAMES for name in requested
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
instructions = spec.description
|
|
734
|
+
if spec.system_prompt:
|
|
735
|
+
instructions = f"{spec.description}\n\n{spec.system_prompt}"
|
|
736
|
+
|
|
737
|
+
return Agent(
|
|
738
|
+
instructions,
|
|
739
|
+
model=spec.model or model,
|
|
740
|
+
architecture=ReAct(),
|
|
741
|
+
tools=tools,
|
|
742
|
+
skills=skills,
|
|
743
|
+
permissions=StandardPermissions() if has_destructive else None,
|
|
744
|
+
approval_handler=approval_handler if has_destructive else None,
|
|
745
|
+
prompt_caching=True,
|
|
746
|
+
# Writers get the full budget; read-only specialists answer a
|
|
747
|
+
# scoped question and exit (same split as the builtin roster).
|
|
748
|
+
max_turns=(
|
|
749
|
+
_CODER_MAX_TURNS if has_destructive else _SPECIALIST_MAX_TURNS
|
|
750
|
+
),
|
|
751
|
+
snip_window=snip_window,
|
|
752
|
+
auto_compact_at_tokens=auto_compact_at_tokens,
|
|
753
|
+
effort=effort,
|
|
754
|
+
persist_tool_transcripts=True,
|
|
755
|
+
tuning=Tuning(
|
|
756
|
+
retry_policy=patient_retry_policy_for(spec.model or model)
|
|
757
|
+
),
|
|
758
|
+
)
|