loom-code 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. loom_code/__init__.py +22 -0
  2. loom_code/_post_commit.py +119 -0
  3. loom_code/agent.py +544 -0
  4. loom_code/approval.py +616 -0
  5. loom_code/browse/__init__.py +291 -0
  6. loom_code/browse/act.py +467 -0
  7. loom_code/browse/observe.py +249 -0
  8. loom_code/browse/session.py +96 -0
  9. loom_code/browse/verify.py +194 -0
  10. loom_code/checkpoint.py +283 -0
  11. loom_code/cli.py +495 -0
  12. loom_code/code_index.py +703 -0
  13. loom_code/compact.py +143 -0
  14. loom_code/consent.py +47 -0
  15. loom_code/credentials.py +527 -0
  16. loom_code/edit_tool.py +635 -0
  17. loom_code/extensions.py +522 -0
  18. loom_code/file_history.py +322 -0
  19. loom_code/file_tools.py +93 -0
  20. loom_code/git_hook.py +200 -0
  21. loom_code/grep_tool.py +430 -0
  22. loom_code/hooks.py +297 -0
  23. loom_code/loominit/__init__.py +23 -0
  24. loom_code/loominit/_ast_walk.py +429 -0
  25. loom_code/loominit/_files.py +284 -0
  26. loom_code/loominit/_graph.py +141 -0
  27. loom_code/loominit/_resolve.py +392 -0
  28. loom_code/loominit/_tests_map.py +108 -0
  29. loom_code/loominit/extractor.py +332 -0
  30. loom_code/loominit/repomap.py +225 -0
  31. loom_code/loominit/schema.py +242 -0
  32. loom_code/lsp_tools.py +396 -0
  33. loom_code/mcp_host.py +79 -0
  34. loom_code/operator.py +449 -0
  35. loom_code/paste.py +97 -0
  36. loom_code/paths.py +52 -0
  37. loom_code/permissions.py +177 -0
  38. loom_code/project.py +104 -0
  39. loom_code/prompts.py +451 -0
  40. loom_code/render.py +783 -0
  41. loom_code/repl.py +4080 -0
  42. loom_code/rules.py +267 -0
  43. loom_code/sandboxed_bash.py +176 -0
  44. loom_code/scribe.py +88 -0
  45. loom_code/skills/__init__.py +16 -0
  46. loom_code/skills/graphify/SKILL.md +97 -0
  47. loom_code/skills/graphify/tools.py +570 -0
  48. loom_code/trust.py +216 -0
  49. loom_code/turn.py +169 -0
  50. loom_code/web_fetch.py +370 -0
  51. loom_code/workers.py +758 -0
  52. loom_code/worktree.py +134 -0
  53. loom_code-0.1.1.dist-info/METADATA +224 -0
  54. loom_code-0.1.1.dist-info/RECORD +58 -0
  55. loom_code-0.1.1.dist-info/WHEEL +5 -0
  56. loom_code-0.1.1.dist-info/entry_points.txt +2 -0
  57. loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
  58. loom_code-0.1.1.dist-info/top_level.txt +1 -0
loom_code/workers.py ADDED
@@ -0,0 +1,758 @@
1
+ """The worker roster for loom-code's ``Team.supervisor``.
2
+
3
+ loom-code is a hierarchical team: a coordinator Agent (the tech
4
+ lead) delegates to these workers via loomflow's ``Supervisor``
5
+ architecture. Each worker is a full loomflow ``Agent`` with a
6
+ ``ReAct`` loop — the coordinator hands it a focused task through
7
+ the ``delegate`` tool and it runs to completion.
8
+
9
+ The roster is sliced by VERB, and one invariant holds it together:
10
+
11
+ * **coder** — the ONLY writer. Full file-and-shell kernel
12
+ (read/write/edit/grep/find/ls/bash). Every actual change to the
13
+ codebase happens here, one delegation at a time.
14
+ * **explorer** — read-only investigation → a briefing.
15
+ * **auditor** — read-only defect hunt (security / perf /
16
+ correctness lens) → tagged findings.
17
+ * **reviewer** — read-only inspection + ``bash`` to run the
18
+ project's tests → a pass/fail verdict.
19
+
20
+ Because only ``coder`` writes, the coordinator can delegate the
21
+ three read-only workers in parallel with zero risk of filesystem
22
+ races (loomflow's Supervisor gets parallel delegation for free —
23
+ ReAct dispatches multiple ``delegate`` calls in one turn through
24
+ an ``anyio`` task group). The coordinator serialises ``coder``
25
+ delegations itself.
26
+
27
+ Workers inherit the shared notebook (``workspace=``) and the
28
+ coordinator's memory via loomflow's ambient propagation — they
29
+ are NOT given their own, so there's one notebook and one memory
30
+ db for the whole team.
31
+
32
+ Memory propagation became real in loomflow 0.10.15 (before that,
33
+ ``Team.supervisor(memory=...)`` propagated to the coordinator
34
+ only and workers silently fell back to ephemeral
35
+ ``InMemoryMemory``). Combined with ``persist_tool_transcripts=True``
36
+ on each worker (also 0.10.15+), the worker's ``read`` / ``edit`` /
37
+ ``bash`` results land in the coordinator's sqlite memory keyed
38
+ by the worker's stable session_id — so the same worker delegated
39
+ to twice no longer re-reads the same file. See the per-worker
40
+ constructors below for the wiring and the ``BUILD_LOG`` for the
41
+ diagnosis that led to this.
42
+ """
43
+
44
+ from __future__ import annotations
45
+
46
+ from collections.abc import Awaitable, Callable
47
+ from typing import Any
48
+
49
+ from loomflow import Agent, StandardPermissions, Tuning
50
+ from loomflow.architecture import ReAct
51
+ from loomflow.tools import (
52
+ bash_tool,
53
+ find_tool,
54
+ ls_tool,
55
+ read_tool,
56
+ write_tool,
57
+ )
58
+
59
+ from .code_index import codebase_search_tool
60
+ from .credentials import patient_retry_policy_for
61
+ from .edit_tool import multi_edit_tool
62
+ from .edit_tool import verifying_edit_tool as edit_tool
63
+ from .extensions import AgentSpec
64
+ from .file_tools import loom_read_tool
65
+ from .grep_tool import enhanced_grep_tool as grep_tool
66
+ from .lsp_tools import lsp_tools
67
+ from .project import Project
68
+ from .prompts import build_coder_prompt
69
+ from .web_fetch import web_fetch_tool
70
+
71
+ # The coder does real, multi-step work — it gets a generous turn
72
+ # budget. Read-only specialists answer a scoped question and exit,
73
+ # so they're capped tighter; the reviewer sits in the middle
74
+ # because running a test suite + iterating on failures legitimately
75
+ # takes more turns than answering one question.
76
+ _CODER_MAX_TURNS = 60
77
+ _SPECIALIST_MAX_TURNS = 20 # explorer + auditor — one scoped question
78
+ _REVIEWER_MAX_TURNS = 30 # tests can iterate
79
+
80
+ # Per-result summarisation threshold (chars) for the read-only
81
+ # workers' ``tool_result_summarizer``. IMPORTANT semantics: loomflow
82
+ # replaces the result IN-TURN — the worker never sees the verbatim
83
+ # output, only a ≤512-token digest. So this is strictly a last-resort
84
+ # bound: 20k chars (~5k tokens) leaves normal reads/greps/test runs
85
+ # verbatim and compresses only the genuine dumps that would otherwise
86
+ # 400 the run with context_length_exceeded — where a digest beats a
87
+ # dead turn. Do NOT lower this to "save tokens": below it, fidelity
88
+ # loss (explorer briefings and reviewer verdicts built from digests)
89
+ # costs more than the tokens save.
90
+ SUMMARY_THRESHOLD_CHARS = 20_000
91
+
92
+ _EXPLORER_PROMPT = """\
93
+ You are the EXPLORER on a loom-code team — a read-only
94
+ investigator. A tech lead delegates ONE question about the
95
+ codebase; answer it thoroughly and hand the answer back.
96
+
97
+ You have read-only tools: `read`, `grep`, `find`, `ls` (scoped to
98
+ the project root) and `web_fetch` (HTTPS URLs; GitHub blob URLs
99
+ auto-rewrite to raw). NO write/edit/bash — you cannot change
100
+ anything, and must not try.
101
+
102
+ How you work:
103
+ - Start broad (`find`/`ls`/`grep`), then `read` the files that
104
+ matter. Follow the actual wiring — imports, call sites, config
105
+ — don't guess.
106
+ - For URLs the lead names, use `web_fetch(url=...)` — never
107
+ substitute a local file for a remote source. A full repo needs
108
+ `bash git clone`, which only `coder` has — say so in your
109
+ report and the lead can re-route.
110
+ - Answer concretely: cite `path:line` for every claim, quote the
111
+ key code, answer every sub-part, and end with a short summary
112
+ the lead can act on.
113
+
114
+ **When your finding is non-trivial, write ONE finding note** —
115
+ `note(kind="finding", title="<short, keyword-rich>",
116
+ content=<findings with path:line citations>)` — so the next
117
+ fresh-session worker reuses it instead of re-investigating. SKIP
118
+ the note for a quick lookup or one-line answer; restating a
119
+ simple answer is noise.
120
+
121
+ Be exhaustive on facts, terse on prose. No summary documents or
122
+ banners; your report is the only thing read.
123
+ """
124
+
125
+ # Appended onto _EXPLORER_PROMPT only when the explorer was built
126
+ # with a web_tool — promising a tool the agent doesn't have wastes
127
+ # turns on failed tool calls.
128
+ _EXPLORER_WEB_HINT = """\
129
+
130
+ You also have `web_search(query=...)` for investigation *outside*
131
+ the codebase — an upstream library's documented behaviour, an
132
+ external API's contract, a CVE, a third-party error's known
133
+ cause. Use it AFTER you've read the relevant project code, not
134
+ instead. Keyword queries beat sentences. Cite the source URL in
135
+ your finding note.
136
+ """
137
+
138
+
139
+ def _explorer_prompt(has_web: bool) -> str:
140
+ """The explorer's system prompt. Web-search hint is opt-in so
141
+ the agent isn't told about a tool it doesn't have."""
142
+ if has_web:
143
+ return _EXPLORER_PROMPT + _EXPLORER_WEB_HINT
144
+ return _EXPLORER_PROMPT
145
+
146
+ _AUDITOR_PROMPT = """\
147
+ You are the AUDITOR on a loom-code team — a read-only inspector.
148
+ A tech lead delegates a focus area and a lens (security,
149
+ performance, or correctness). Your job: hunt for PROBLEMS.
150
+
151
+ You have read-only tools: `read`, `grep`, `find`, `ls`. NO
152
+ write/edit/bash — you find problems, you do not fix them.
153
+
154
+ How you work:
155
+ - Read the focus area carefully; trace inputs to where they're
156
+ used. Through your lens, hunt concrete defects:
157
+ - security: injection, unsanitised input, secrets in code,
158
+ path traversal, unsafe deserialization, missing authz.
159
+ - performance: N+1 patterns, work in hot loops, unbounded
160
+ growth, sync I/O on a hot path.
161
+ - correctness: unhandled edge cases, off-by-one, swallowed
162
+ errors, race conditions, wrong input assumptions.
163
+ - Report each finding as a list item tagged severity:
164
+ `[blocker]` — a real bug / vulnerability, must fix.
165
+ `[risk]` — likely wrong or fragile, worth a closer look.
166
+ `[nit]` — minor, optional.
167
+ Cite `path:line` for every finding and quote the offending code.
168
+ - If you find nothing real, say so — do NOT invent problems to
169
+ look thorough.
170
+
171
+ **When you found real issues, write ONE finding note** —
172
+ `note(kind="finding", title="<area>: <severity gist>",
173
+ content=<tagged findings with path:line citations>)` — so the
174
+ lead and next worker reuse it instead of re-auditing. Nothing
175
+ notable → SKIP the note; don't write one to record "no issues".
176
+
177
+ End with a one-line summary: how many blockers / risks / nits. No
178
+ summary documents or banners — the report is the only thing read.
179
+ """
180
+
181
+ _REVIEWER_PROMPT = """\
182
+ You are the REVIEWER on a loom-code team — a verification
183
+ specialist. A tech lead delegates a description of a change just
184
+ made. Independently confirm it is correct, complete, and safe.
185
+
186
+ You have `read`, `grep`, `find`, `ls`, and `bash`. Use `bash` to
187
+ run the project's OWN tests / linters / build — not improvised
188
+ checks. NO write/edit — you do not fix things, you REPORT.
189
+
190
+ How you work:
191
+ - Re-read the changed files yourself. Don't trust the description.
192
+ - Run the verification command (test suite, build, type-check).
193
+ - Look for: broken callers, missing edge cases, untested paths,
194
+ things the change claimed but didn't do, regressions.
195
+ - Report findings as a list, each tagged severity:
196
+ `[blocker]` — must fix before this is done.
197
+ `[risk]` — probably wrong / fragile, worth a second look.
198
+ `[nit]` — minor, optional.
199
+ - All good → `VERDICT: pass` plus the evidence (which tests ran,
200
+ what passed). Otherwise `VERDICT: fail` and the blockers.
201
+
202
+ You are the last line before the user sees the work. Be skeptical.
203
+ """
204
+
205
+
206
+ def _read_only_tools(
207
+ project: Project,
208
+ embedder: str | None = None,
209
+ workspace: Any | None = None,
210
+ ) -> list[Any]:
211
+ """The read-only inspection kernel — `read`/`grep`/`find`/`ls`
212
+ scoped to the project root, plus `web_fetch` for reaching URLs
213
+ and GitHub raw files (read-only by construction — no disk write,
214
+ no shell). Shared by explorer + auditor; the reviewer adds
215
+ `bash` on top.
216
+
217
+ ``web_fetch`` closes the URL-fetch gap that previously forced
218
+ the read-only specialists to silently substitute local files
219
+ for remote sources; preserves the sole-writer invariant because
220
+ the tool literally cannot write.
221
+
222
+ ``embedder`` (``"openai"`` / ``"hash"``) — when set, adds the
223
+ read-only ``codebase_search`` semantic tool so explorers/auditors
224
+ can find code by meaning, not just grep strings. ``None`` (the
225
+ default) keeps the legacy kernel for any caller that hasn't wired
226
+ the embedder yet."""
227
+ root = project.root
228
+ tools: list[Any] = [
229
+ read_tool(root),
230
+ grep_tool(root),
231
+ find_tool(root),
232
+ ls_tool(root),
233
+ web_fetch_tool(),
234
+ # LSP navigation (jedi) — go_to_definition / find_references /
235
+ # hover. Read-only by construction (static analysis, no disk
236
+ # write); given to every worker so explorers/auditors navigate
237
+ # by symbol, not grep. Python only; no embedder needed.
238
+ *lsp_tools(root),
239
+ ]
240
+ if embedder is not None:
241
+ # Same embedder name the coordinator + memory use, so every
242
+ # agent searches the one shared index. ``workspace`` (when
243
+ # given) fuses learned notes into the results (Phase 1b).
244
+ # Read-only by construction — no disk write.
245
+ tools.insert(
246
+ 2, codebase_search_tool(root, embedder, workspace=workspace)
247
+ )
248
+ return tools
249
+
250
+
251
+ def _build_coder(
252
+ project: Project,
253
+ *,
254
+ model: str,
255
+ approval_handler: Callable[..., Awaitable[bool]] | None,
256
+ has_web: bool = False,
257
+ skills: list[Any] | None = None,
258
+ auto_compact_at_tokens: int | None = None,
259
+ snip_window: int = 0,
260
+ effort: str | None = None,
261
+ mcp_registry: Any | None = None,
262
+ sandbox: bool = False,
263
+ sandbox_allow_network: bool = False,
264
+ embedder: str | None = None,
265
+ workspace: Any | None = None,
266
+ memory: Any | None = None,
267
+ attach_workspace: bool = False,
268
+ persist_tool_transcripts: bool = True,
269
+ ) -> Agent:
270
+ """The doer. Full file-and-shell kernel, scoped to the project
271
+ root. `StandardPermissions` gates the destructive tools
272
+ (write / edit / bash) through the shared approval handler.
273
+
274
+ ``persist_tool_transcripts=False`` is used by the SOLO fast path:
275
+ it shares the REPL ``session_id`` with the read-only coordinator,
276
+ and persisting its write/bash transcripts would make the
277
+ coordinator rehydrate history of "itself" editing files — the
278
+ exact grind-it-myself failure the read-only design exists to
279
+ prevent. Solo turns are small; losing their transcript reuse
280
+ costs little. (As a delegate worker the coder keeps ``True`` —
281
+ its sessions are worker-private.)
282
+
283
+ ``embedder`` adds the read-only ``codebase_search`` tool — the
284
+ coder uses it to locate the right code to change before editing,
285
+ not just grep for strings.
286
+
287
+ ``sandbox=True`` swaps the plain ``bash`` for the kernel-sandboxed
288
+ one (``sandboxed_bash_tool``): the shell command runs inside
289
+ sandbox-exec (macOS) / bwrap (Linux) so it can only WRITE under the
290
+ project root and has NO network (unless ``sandbox_allow_network``).
291
+ Claude-Code-style — only ``bash`` (arbitrary code) is sandboxed;
292
+ ``edit``/``write`` keep the approval gate. Off by default.
293
+ ``has_web`` toggles the `web_search` section in the prompt —
294
+ keep this in lockstep with whether ``build_workers`` actually
295
+ attaches the tool, else the prompt lies.
296
+
297
+ ``mcp_registry`` (an ``MCPRegistry``, typed ``Any`` to avoid a hard
298
+ ``mcp``-extra import) adds the user's MCP-server tools to the coder
299
+ — the ONLY worker that gets them, since it's the sole writer/executor.
300
+ When set, the coder's static tools are wrapped in an
301
+ ``McpAugmentedHost`` so MCP tools resolve lazily (connect-on-first-
302
+ use) and static builtins win any name collision.
303
+
304
+ ``memory`` / ``attach_workspace``: as a delegate worker the coder
305
+ inherits the coordinator's memory + workspace ambiently, so both
306
+ stay off (``None`` / ``False``). The SOLO fast path
307
+ (:func:`loom_code.agent.build_solo_agent`) runs this same agent
308
+ standalone — no parent to inherit from — so it passes the shared
309
+ memory cfg explicitly and attaches the notebook workspace so
310
+ ``note`` / ``search_notes`` exist."""
311
+ root = project.root
312
+ # bash is the one tool that runs arbitrary code, so it's the one we
313
+ # kernel-sandbox when asked. edit/write only touch where the model
314
+ # says + go through the approval gate, so they stay as-is.
315
+ if sandbox:
316
+ from .sandboxed_bash import sandboxed_bash_tool
317
+
318
+ bash = sandboxed_bash_tool(
319
+ root, allow_network=sandbox_allow_network, timeout=300.0
320
+ )
321
+ else:
322
+ bash = bash_tool(root, timeout=300.0)
323
+ static_tools: list[Any] = [
324
+ # Policy-bounded read (loom_read_tool): reaches user-referenced
325
+ # files outside the project too, matching edit/multi_edit; a
326
+ # self-initiated outside read the user never named is refused.
327
+ loom_read_tool(root),
328
+ write_tool(root),
329
+ edit_tool(root),
330
+ multi_edit_tool(root),
331
+ grep_tool(root),
332
+ find_tool(root),
333
+ ls_tool(root),
334
+ bash,
335
+ web_fetch_tool(),
336
+ # LSP navigation (jedi) — the writer locates the symbol to
337
+ # change by resolution, not grep, before editing. Read-only.
338
+ *lsp_tools(root),
339
+ ]
340
+ if embedder is not None:
341
+ # Semantic search for the writer too — locate the code to
342
+ # change by meaning before editing. Same shared index.
343
+ static_tools.insert(
344
+ 5, codebase_search_tool(root, embedder, workspace=workspace)
345
+ )
346
+ # Default: pass the static list straight through (framework wraps it
347
+ # in an InProcessToolHost). With MCP, build that host ourselves and
348
+ # compose it with the registry as one ToolHost.
349
+ tools: Any = static_tools
350
+ if mcp_registry is not None:
351
+ from loomflow.tools.registry import InProcessToolHost
352
+
353
+ from .mcp_host import McpAugmentedHost
354
+
355
+ tools = McpAugmentedHost(
356
+ InProcessToolHost(static_tools), mcp_registry
357
+ )
358
+ return Agent(
359
+ build_coder_prompt(project, has_web=has_web),
360
+ model=model,
361
+ architecture=ReAct(),
362
+ tools=tools,
363
+ memory=memory,
364
+ workspace=workspace if attach_workspace else None,
365
+ # Bundled skills (graphify, etc.) — registered on workers
366
+ # too, not just the coordinator. Without this, when the
367
+ # coordinator delegates "build the graph" to coder, the
368
+ # coder spawns with its own tool host that doesn't have
369
+ # ``graphify__build`` — and falls back to ``bash
370
+ # graphify__build`` which doesn't exist. Skill on worker
371
+ # = tool actually callable wherever execution lands.
372
+ skills=skills,
373
+ permissions=StandardPermissions(),
374
+ approval_handler=approval_handler,
375
+ prompt_caching=True,
376
+ max_turns=_CODER_MAX_TURNS,
377
+ # Bounded-window trim of the rehydrated history before each
378
+ # run — THE active context bound. Without it a worker
379
+ # rehydrating a heavily-used session's accumulated tool
380
+ # transcripts (many ≤50KB entries) overflows the model window
381
+ # and 400s (context_length_exceeded). The coordinator has it;
382
+ # workers must too. (``auto_compact_at_tokens`` below only
383
+ # fires between Ralph stop-hook iterations, which loom-code
384
+ # disables via max_stop_hook_iterations=0 — so snip is what
385
+ # actually protects a single run.)
386
+ snip_window=snip_window,
387
+ auto_compact_at_tokens=auto_compact_at_tokens,
388
+ effort=effort,
389
+ # Persistent tool-transcripts (loomflow 0.10.15+) — without
390
+ # this the coder forgets every file read / edit / bash
391
+ # output between delegations, even though its session_id
392
+ # is preserved. Re-reading the same file 5x per task is
393
+ # the single biggest token leak in long sessions; flipping
394
+ # this on makes session_messages() rehydrate the prior
395
+ # tool transcript so the coder QUOTES what it read instead
396
+ # of re-running `read`. (False on the solo fast path — see
397
+ # the docstring.)
398
+ persist_tool_transcripts=persist_tool_transcripts,
399
+ # Patient retry schedule on free-tier/litellm providers (None
400
+ # elsewhere = loomflow default). See patient_retry_policy_for.
401
+ tuning=Tuning(retry_policy=patient_retry_policy_for(model)),
402
+ )
403
+
404
+
405
+ def _build_explorer(
406
+ project: Project,
407
+ *,
408
+ model: str,
409
+ has_web: bool = False,
410
+ skills: list[Any] | None = None,
411
+ auto_compact_at_tokens: int | None = None,
412
+ snip_window: int = 0,
413
+ tool_result_summarizer: str | None = None,
414
+ effort: str | None = None,
415
+ embedder: str | None = None,
416
+ workspace: Any | None = None,
417
+ ) -> Agent:
418
+ """Read-only investigator — no permissions needed (none of its
419
+ tools are destructive). ``has_web`` toggles the `web_search`
420
+ section in the prompt — must match ``build_workers``' wiring.
421
+ ``embedder`` adds the read-only ``codebase_search`` semantic tool
422
+ (the explorer is the prime beneficiary — concept-level lookups)."""
423
+ return Agent(
424
+ _explorer_prompt(has_web),
425
+ model=model,
426
+ architecture=ReAct(),
427
+ tools=_read_only_tools(project, embedder, workspace),
428
+ skills=skills,
429
+ prompt_caching=True,
430
+ max_turns=_SPECIALIST_MAX_TURNS,
431
+ snip_window=snip_window,
432
+ auto_compact_at_tokens=auto_compact_at_tokens,
433
+ # Per-result output compression (cheap model). Safe here —
434
+ # the explorer returns a BRIEFING, not exact-match edits, so
435
+ # a faithfully-summarised read/grep dump loses nothing it
436
+ # needs. This is the per-result bound snip (turn-count) and
437
+ # auto-compact (never fires inside a worker run — stop hooks
438
+ # are off on workers) cannot provide.
439
+ tool_result_summarizer=tool_result_summarizer,
440
+ tuning=Tuning(
441
+ tool_result_summary_threshold=SUMMARY_THRESHOLD_CHARS,
442
+ retry_policy=patient_retry_policy_for(model),
443
+ ),
444
+ effort=effort,
445
+ # See ``_build_coder`` for the rationale. Explorer benefits
446
+ # too: a question like "how does X work, then check Y" no
447
+ # longer re-greps + re-reads X's files when Y comes in as
448
+ # a follow-up via ``send_message``.
449
+ persist_tool_transcripts=True,
450
+ )
451
+
452
+
453
+ def _build_auditor(
454
+ project: Project,
455
+ *,
456
+ model: str,
457
+ skills: list[Any] | None = None,
458
+ auto_compact_at_tokens: int | None = None,
459
+ snip_window: int = 0,
460
+ tool_result_summarizer: str | None = None,
461
+ effort: str | None = None,
462
+ embedder: str | None = None,
463
+ workspace: Any | None = None,
464
+ ) -> Agent:
465
+ """Read-only defect hunter — same tool scope as the explorer,
466
+ different objective."""
467
+ return Agent(
468
+ _AUDITOR_PROMPT,
469
+ model=model,
470
+ architecture=ReAct(),
471
+ tools=_read_only_tools(project, embedder, workspace),
472
+ skills=skills,
473
+ prompt_caching=True,
474
+ max_turns=_SPECIALIST_MAX_TURNS,
475
+ snip_window=snip_window,
476
+ auto_compact_at_tokens=auto_compact_at_tokens,
477
+ # Same rationale as the explorer: briefings, not exact edits.
478
+ tool_result_summarizer=tool_result_summarizer,
479
+ tuning=Tuning(
480
+ tool_result_summary_threshold=SUMMARY_THRESHOLD_CHARS,
481
+ retry_policy=patient_retry_policy_for(model),
482
+ ),
483
+ effort=effort,
484
+ # Same rationale as explorer — auditor accumulates context
485
+ # about the focus area across rounds when its findings get
486
+ # iterated on.
487
+ persist_tool_transcripts=True,
488
+ )
489
+
490
+
491
+ def _build_reviewer(
492
+ project: Project,
493
+ *,
494
+ model: str,
495
+ approval_handler: Callable[..., Awaitable[bool]] | None,
496
+ skills: list[Any] | None = None,
497
+ auto_compact_at_tokens: int | None = None,
498
+ snip_window: int = 0,
499
+ tool_result_summarizer: str | None = None,
500
+ effort: str | None = None,
501
+ embedder: str | None = None,
502
+ workspace: Any | None = None,
503
+ ) -> Agent:
504
+ """Independent verifier — read-only inspection plus `bash` to
505
+ run the project's real test suite. `bash` is gated through the
506
+ same approval handler as the coder; it has no write/edit, so
507
+ it reports but never fixes."""
508
+ root = project.root
509
+ return Agent(
510
+ _REVIEWER_PROMPT,
511
+ model=model,
512
+ architecture=ReAct(),
513
+ tools=[
514
+ *_read_only_tools(project, embedder, workspace),
515
+ bash_tool(root, timeout=300.0),
516
+ ],
517
+ skills=skills,
518
+ permissions=StandardPermissions(),
519
+ approval_handler=approval_handler,
520
+ prompt_caching=True,
521
+ max_turns=_REVIEWER_MAX_TURNS,
522
+ snip_window=snip_window,
523
+ auto_compact_at_tokens=auto_compact_at_tokens,
524
+ # Briefings + test verdicts, not exact edits — a summarised
525
+ # pytest dump keeps the failures, drops the dots.
526
+ tool_result_summarizer=tool_result_summarizer,
527
+ tuning=Tuning(
528
+ tool_result_summary_threshold=SUMMARY_THRESHOLD_CHARS,
529
+ retry_policy=patient_retry_policy_for(model),
530
+ ),
531
+ effort=effort,
532
+ # Reviewer benefits too: re-review cycles ("you flagged X,
533
+ # the coder fixed it, recheck") no longer re-read every
534
+ # changed file from scratch.
535
+ persist_tool_transcripts=True,
536
+ )
537
+
538
+
539
+ def build_workers(
540
+ project: Project,
541
+ *,
542
+ model: str,
543
+ approval_handler: Callable[..., Awaitable[bool]] | None = None,
544
+ web_backend: str | None = None,
545
+ skills: list[Any] | None = None,
546
+ auto_compact_at_tokens: int | None = None,
547
+ snip_window: int = 0,
548
+ tool_result_summarizer: str | None = None,
549
+ effort: str | None = None,
550
+ mcp_registry: Any | None = None,
551
+ sandbox: bool = False,
552
+ sandbox_allow_network: bool = False,
553
+ embedder: str | None = None,
554
+ workspace: Any | None = None,
555
+ ) -> dict[str, Agent]:
556
+ """Build the worker roster for ``Team.supervisor``.
557
+
558
+ Returns ``{"coder", "explorer", "auditor", "reviewer"}`` — the
559
+ dict keys become each worker's delegate name AND its author
560
+ identity in the shared notebook. All four run on the same
561
+ ``model`` as the coordinator; the specialism is in the prompt
562
+ + tool scoping, not a weaker model.
563
+
564
+ Only ``coder`` and ``reviewer`` get a permissions policy +
565
+ approval handler (they hold destructive tools); ``explorer``
566
+ and ``auditor`` are purely read-only.
567
+
568
+ ``web_backend``: ``"serper"`` or ``"duckduckgo"`` to enable
569
+ ``loomflow.tools.web_tool`` on ``coder`` + ``explorer``. The
570
+ coder needs it to look up library APIs while implementing;
571
+ the explorer for investigation that goes beyond the codebase.
572
+ Auditor + reviewer stay read-only-and-local (no web access)
573
+ — keeps their cost predictable and their scope honest.
574
+ ``None`` (default) leaves web search off entirely.
575
+
576
+ ``tool_result_summarizer`` (a cheap model name) compresses
577
+ oversized tool results on the READ-ONLY workers (explorer /
578
+ auditor / reviewer) — they return briefings, so a faithful
579
+ summary loses nothing. The CODER is deliberately excluded:
580
+ it needs verbatim ``read`` output to construct exact-match
581
+ ``edit`` old_strings; a summarised read would make every
582
+ subsequent edit miss.
583
+ """
584
+ has_web = web_backend is not None
585
+ workers: dict[str, Agent] = {
586
+ "coder": _build_coder(
587
+ project,
588
+ model=model,
589
+ approval_handler=approval_handler,
590
+ has_web=has_web,
591
+ skills=skills,
592
+ auto_compact_at_tokens=auto_compact_at_tokens,
593
+ snip_window=snip_window,
594
+ effort=effort,
595
+ mcp_registry=mcp_registry,
596
+ sandbox=sandbox,
597
+ sandbox_allow_network=sandbox_allow_network,
598
+ embedder=embedder,
599
+ workspace=workspace,
600
+ ),
601
+ "explorer": _build_explorer(
602
+ project,
603
+ model=model,
604
+ has_web=has_web,
605
+ skills=skills,
606
+ auto_compact_at_tokens=auto_compact_at_tokens,
607
+ snip_window=snip_window,
608
+ tool_result_summarizer=tool_result_summarizer,
609
+ effort=effort,
610
+ embedder=embedder,
611
+ workspace=workspace,
612
+ ),
613
+ "auditor": _build_auditor(
614
+ project,
615
+ model=model,
616
+ skills=skills,
617
+ auto_compact_at_tokens=auto_compact_at_tokens,
618
+ snip_window=snip_window,
619
+ tool_result_summarizer=tool_result_summarizer,
620
+ effort=effort,
621
+ embedder=embedder,
622
+ workspace=workspace,
623
+ ),
624
+ "reviewer": _build_reviewer(
625
+ project,
626
+ model=model,
627
+ approval_handler=approval_handler,
628
+ skills=skills,
629
+ auto_compact_at_tokens=auto_compact_at_tokens,
630
+ snip_window=snip_window,
631
+ tool_result_summarizer=tool_result_summarizer,
632
+ effort=effort,
633
+ embedder=embedder,
634
+ workspace=workspace,
635
+ ),
636
+ }
637
+ if has_web:
638
+ # One shared web_tool instance — same Tool object on both
639
+ # workers. Cheap; nothing in the tool's lifecycle is per-
640
+ # worker. If the backend is misconfigured (e.g. serper
641
+ # without a key) ``web_tool`` raises ConfigError here; the
642
+ # caller (REPL's /set_web) is expected to validate first.
643
+ # ``has_web=True`` was already threaded into the prompts —
644
+ # the model knows the tool exists; here we actually attach
645
+ # it.
646
+ from loomflow.tools import web_tool
647
+ web = web_tool(backend=web_backend) # type: ignore[arg-type]
648
+ workers["coder"].add_tool(web)
649
+ workers["explorer"].add_tool(web)
650
+ return workers
651
+
652
+
653
+ # Builtin worker role names — protected. A user-authored subagent that
654
+ # names itself one of these is skipped rather than allowed to shadow
655
+ # the known roster (especially ``coder``, the sole writer).
656
+ BUILTIN_WORKER_NAMES = frozenset(
657
+ {"coder", "explorer", "auditor", "reviewer"}
658
+ )
659
+
660
+ # Tool names a custom subagent's ``tools:`` frontmatter may request,
661
+ # mapped to the same builtin factories the builtin workers use.
662
+ # ``web_search`` is intentionally absent — it needs backend wiring
663
+ # (``/set_web``); custom agents get ``web_fetch`` (always available,
664
+ # read-only, no shell/disk write) instead.
665
+ _DESTRUCTIVE_TOOL_NAMES = frozenset(
666
+ {"write", "edit", "multi_edit", "bash"}
667
+ )
668
+
669
+ # When a spec declares no ``tools:``, this read-only kernel is the
670
+ # default — we never hand a stranger's spec write/shell access
671
+ # implicitly. Matches ``_read_only_tools``.
672
+ _DEFAULT_CUSTOM_TOOLS = ("read", "grep", "find", "ls", "web_fetch")
673
+
674
+
675
+ def _custom_tool_factories(root: Any) -> dict[str, Callable[[], Any]]:
676
+ """Map tool name → a zero-arg factory rooted at the project.
677
+
678
+ Unknown names a spec requests simply aren't in this map and are
679
+ skipped by :func:`build_custom_worker` — a typo'd tool name costs
680
+ that tool, not the whole agent."""
681
+ return {
682
+ "read": lambda: read_tool(root),
683
+ "write": lambda: write_tool(root),
684
+ "edit": lambda: edit_tool(root),
685
+ "multi_edit": lambda: multi_edit_tool(root),
686
+ "grep": lambda: grep_tool(root),
687
+ "find": lambda: find_tool(root),
688
+ "ls": lambda: ls_tool(root),
689
+ "bash": lambda: bash_tool(root, timeout=300.0),
690
+ "web_fetch": lambda: web_fetch_tool(),
691
+ }
692
+
693
+
694
+ def build_custom_worker(
695
+ project: Project,
696
+ spec: AgentSpec,
697
+ *,
698
+ model: str,
699
+ approval_handler: Callable[..., Awaitable[bool]] | None,
700
+ skills: list[Any] | None = None,
701
+ auto_compact_at_tokens: int | None = None,
702
+ snip_window: int = 0,
703
+ effort: str | None = None,
704
+ ) -> Agent:
705
+ """Build a delegate-able worker Agent from a user-authored subagent
706
+ spec (``.loom/agents/<name>.md`` — see :mod:`loom_code.extensions`).
707
+
708
+ The worker's ``instructions`` LEAD with the frontmatter
709
+ ``description`` followed by the markdown body, because
710
+ ``Supervisor`` shows the coordinator only the first ~200 chars of
711
+ each worker's instructions (not a separate description field) — so
712
+ the description must come first for the coordinator to route to it
713
+ correctly.
714
+
715
+ Tools come from the spec's ``tools:`` list mapped through
716
+ :func:`_custom_tool_factories`; an empty list defaults to the
717
+ read-only kernel. Permissions + the approval handler are wired ONLY
718
+ when the requested tools include a destructive one
719
+ (write/edit/multi_edit/bash) — a read-only subagent needs no gate.
720
+ ``model`` falls back to the coordinator's model when the spec
721
+ doesn't override it.
722
+ """
723
+ root = project.root
724
+ factories = _custom_tool_factories(root)
725
+ requested = spec.tools or _DEFAULT_CUSTOM_TOOLS
726
+ tools: list[Any] = [
727
+ factories[name]() for name in requested if name in factories
728
+ ]
729
+ has_destructive = any(
730
+ name in _DESTRUCTIVE_TOOL_NAMES for name in requested
731
+ )
732
+
733
+ instructions = spec.description
734
+ if spec.system_prompt:
735
+ instructions = f"{spec.description}\n\n{spec.system_prompt}"
736
+
737
+ return Agent(
738
+ instructions,
739
+ model=spec.model or model,
740
+ architecture=ReAct(),
741
+ tools=tools,
742
+ skills=skills,
743
+ permissions=StandardPermissions() if has_destructive else None,
744
+ approval_handler=approval_handler if has_destructive else None,
745
+ prompt_caching=True,
746
+ # Writers get the full budget; read-only specialists answer a
747
+ # scoped question and exit (same split as the builtin roster).
748
+ max_turns=(
749
+ _CODER_MAX_TURNS if has_destructive else _SPECIALIST_MAX_TURNS
750
+ ),
751
+ snip_window=snip_window,
752
+ auto_compact_at_tokens=auto_compact_at_tokens,
753
+ effort=effort,
754
+ persist_tool_transcripts=True,
755
+ tuning=Tuning(
756
+ retry_policy=patient_retry_policy_for(spec.model or model)
757
+ ),
758
+ )