loom-code 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. loom_code/__init__.py +22 -0
  2. loom_code/_post_commit.py +119 -0
  3. loom_code/agent.py +544 -0
  4. loom_code/approval.py +616 -0
  5. loom_code/browse/__init__.py +291 -0
  6. loom_code/browse/act.py +467 -0
  7. loom_code/browse/observe.py +249 -0
  8. loom_code/browse/session.py +96 -0
  9. loom_code/browse/verify.py +194 -0
  10. loom_code/checkpoint.py +283 -0
  11. loom_code/cli.py +495 -0
  12. loom_code/code_index.py +703 -0
  13. loom_code/compact.py +143 -0
  14. loom_code/consent.py +47 -0
  15. loom_code/credentials.py +527 -0
  16. loom_code/edit_tool.py +635 -0
  17. loom_code/extensions.py +522 -0
  18. loom_code/file_history.py +322 -0
  19. loom_code/file_tools.py +93 -0
  20. loom_code/git_hook.py +200 -0
  21. loom_code/grep_tool.py +430 -0
  22. loom_code/hooks.py +297 -0
  23. loom_code/loominit/__init__.py +23 -0
  24. loom_code/loominit/_ast_walk.py +429 -0
  25. loom_code/loominit/_files.py +284 -0
  26. loom_code/loominit/_graph.py +141 -0
  27. loom_code/loominit/_resolve.py +392 -0
  28. loom_code/loominit/_tests_map.py +108 -0
  29. loom_code/loominit/extractor.py +332 -0
  30. loom_code/loominit/repomap.py +225 -0
  31. loom_code/loominit/schema.py +242 -0
  32. loom_code/lsp_tools.py +396 -0
  33. loom_code/mcp_host.py +79 -0
  34. loom_code/operator.py +449 -0
  35. loom_code/paste.py +97 -0
  36. loom_code/paths.py +52 -0
  37. loom_code/permissions.py +177 -0
  38. loom_code/project.py +104 -0
  39. loom_code/prompts.py +451 -0
  40. loom_code/render.py +783 -0
  41. loom_code/repl.py +4080 -0
  42. loom_code/rules.py +267 -0
  43. loom_code/sandboxed_bash.py +176 -0
  44. loom_code/scribe.py +88 -0
  45. loom_code/skills/__init__.py +16 -0
  46. loom_code/skills/graphify/SKILL.md +97 -0
  47. loom_code/skills/graphify/tools.py +570 -0
  48. loom_code/trust.py +216 -0
  49. loom_code/turn.py +169 -0
  50. loom_code/web_fetch.py +370 -0
  51. loom_code/workers.py +758 -0
  52. loom_code/worktree.py +134 -0
  53. loom_code-0.1.1.dist-info/METADATA +224 -0
  54. loom_code-0.1.1.dist-info/RECORD +58 -0
  55. loom_code-0.1.1.dist-info/WHEEL +5 -0
  56. loom_code-0.1.1.dist-info/entry_points.txt +2 -0
  57. loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
  58. loom_code-0.1.1.dist-info/top_level.txt +1 -0
loom_code/repl.py ADDED
@@ -0,0 +1,4080 @@
1
+ """The loom-code interactive REPL.
2
+
3
+ ``loom-code`` with no args drops here. You chat, it codes, it asks
4
+ before destructive changes, you keep going — the Claude-Code / Pi
5
+ loop. Conversation continuity comes free: every turn reuses the
6
+ same ``session_id``, so loomflow rehydrates prior turns as real
7
+ chat history.
8
+
9
+ The self-improvement loop (Phase 3)
10
+ -----------------------------------
11
+
12
+ Every turn, the agent READS notes from the project notebook —
13
+ past plans, past findings (``recall_past_plans``, ``search_notes``,
14
+ ``read_note``). loomflow records those reads as *citations* on
15
+ ``RunResult.cited_slugs``. When a turn is judged successful, we
16
+ call ``workspace.attribute_outcome(success=True, slugs=...)`` — the
17
+ cited notes' ``cited_count`` / ``success_count`` climb, and future
18
+ ``search_notes(boost_relevance=True)`` ranks them higher.
19
+
20
+ How "success" is judged — the **moved-on heuristic**:
21
+
22
+ * We DON'T attribute immediately. We hold the last turn's
23
+ ``cited_slugs`` as ``pending``.
24
+ * If you give loom-code another task without complaint, the
25
+ previous turn must have been fine → attribute the pending as
26
+ ``success=True``.
27
+ * ``/bad`` attributes pending as ``success=False`` (it broke
28
+ something / wasn't useful).
29
+ * ``/good`` attributes pending as ``success=True`` immediately.
30
+ * On ``/exit``, any pending is attributed ``success=True`` — you
31
+ left satisfied.
32
+
33
+ That matches how a developer actually signals: silence + moving
34
+ on means "worked", an explicit "no" means "didn't".
35
+
36
+ Slash commands are handled here, never sent to the agent. The full
37
+ list is defined once in :data:`_COMMAND_DEFS` (grouped) and rendered
38
+ by :func:`_render_help` for ``/help`` and the autocomplete menu — see
39
+ there rather than duplicating the catalogue in this docstring.
40
+ """
41
+
42
+ from __future__ import annotations
43
+
44
+ import json
45
+ import os
46
+ import re
47
+ import sys
48
+ import time
49
+ from pathlib import Path
50
+ from typing import Any
51
+
52
+ import anyio
53
+ from loomflow import new_id
54
+ from prompt_toolkit import HTML, PromptSession
55
+ from prompt_toolkit.completion import (
56
+ CompleteEvent,
57
+ Completer,
58
+ Completion,
59
+ )
60
+ from prompt_toolkit.document import Document
61
+ from rich.text import Text
62
+
63
+ from . import checkpoint as _checkpoint
64
+ from . import file_history, worktree
65
+ from .agent import LOOM_DIR, build_agent, build_solo_agent
66
+ from .approval import ApprovalGate
67
+ from .compact import Compactor, default_compact_threshold
68
+ from .credentials import (
69
+ cheap_model_for,
70
+ ensure_key_for_model,
71
+ save_credential,
72
+ )
73
+ from .extensions import Extensions, HookSpec
74
+ from .extensions import discover as discover_extensions
75
+ from .hooks import run_repl_hooks
76
+ from .paste import (
77
+ build_paste_keybindings,
78
+ expand_pastes,
79
+ reset_paste_stash,
80
+ )
81
+ from .project import Project, detect_project
82
+ from .render import StreamRenderer, banner, console
83
+ from .trust import filter_trusted_hooks
84
+
85
+ # Provider defaults for /set_model — picking a provider switches
86
+ # to that provider's commonly-used model.
87
+ _OPENAI_DEFAULT_MODEL = "gpt-4.1-mini"
88
+ _ANTHROPIC_DEFAULT_MODEL = "claude-sonnet-4-6"
89
+
90
+ _USER_ID = "loom-code"
91
+
92
+ # DEFAULT cap on auto-continue iterations per turn. This is the
93
+ # Ralph-loop / Cursor-judge-agent pattern: the model's "I'm done"
94
+ # judgement is unreliable on multi-step plans, so the REPL
95
+ # overrules it as long as the plan explicitly disagrees.
96
+ #
97
+ # Bumped from 5 → 15 after empirical observation: real scaffold
98
+ # tasks the user threw at us had 6-12 plan steps, and 5 left them
99
+ # stuck mid-stream. 15 gives headroom; stall detection still kicks
100
+ # in early on genuinely-runaway loops so the higher cap doesn't
101
+ # inflate worst-case cost. Per-session overridable via
102
+ # ``/set_continue_cap N`` for power users who want more or less.
103
+ _AUTO_CONTINUE_LIMIT_DEFAULT = 15
104
+
105
+ # Consecutive IDENTICAL tool calls (same tool, same args) before the
106
+ # stall detector aborts the turn. loomflow's own no-progress hook only
107
+ # arms under /goal; this guards the everyday interactive path. 4 is
108
+ # high enough that legitimate retry-once patterns never trip it.
109
+ _STALL_REPEATS = 4
110
+
111
+ # @-completion caches the project file list this long (seconds) and
112
+ # filters it in-memory per keystroke, instead of re-walking the tree
113
+ # on every character — a cold walk per keypress froze the prompt on a
114
+ # large repo. Bounded so a huge monorepo can't build an enormous list.
115
+ _FILE_CACHE_TTL = 4.0
116
+ _FILE_CACHE_MAX = 20_000
117
+
118
+ # Pure greetings answered locally, zero tokens. Short prompts route to
119
+ # the heavy TEAM path (the anaphora rule), so before this a bare "hi"
120
+ # cost the full coordinator context (~6.6k tokens, observed live) and
121
+ # sometimes a hallucinated delegation on weak models. EXACT matches
122
+ # only — "ok"/"thanks" are moved-on feedback signals, and anything
123
+ # with more content deserves the model.
124
+ _GREETINGS = frozenset({
125
+ "hi", "hello", "hey", "yo", "hiya", "hola", "sup",
126
+ "good morning", "good afternoon", "good evening",
127
+ "hi there", "hello there", "hey there",
128
+ })
129
+
130
+
131
+ def _greeting_reply(prompt: str) -> str | None:
132
+ """A canned local reply when ``prompt`` is a bare greeting, else
133
+ None (run the model normally)."""
134
+ p = re.sub(r"[!.,\s]+$", "", prompt.strip().lower())
135
+ if p in _GREETINGS:
136
+ return (
137
+ "hi! give me a coding task — or [cyan]/help[/cyan] "
138
+ "for the command list."
139
+ )
140
+ return None
141
+
142
+
143
+ def _context_high_water(
144
+ prev: int, *, tokens_in: int, cached_in: int
145
+ ) -> int:
146
+ """Update the compaction trigger's context-occupancy estimate.
147
+
148
+ The last turn's INPUT tokens (uncached ``tokens_in`` + ``cached_in``)
149
+ already represent the *entire* conversation sent to the model that
150
+ turn — so their sum is a direct read of how full the context window
151
+ is right now. We track the **high-water mark**, never a running sum:
152
+ summing per-turn inputs double-counts, because each turn's input
153
+ re-includes all prior history, so a cumulative counter races far past
154
+ true occupancy and trips compaction much too early — discarding live
155
+ file/edit state into a lossy prose summary mid-task.
156
+
157
+ ``max`` (not plain assignment) so a brief dip — a short follow-up
158
+ turn whose input momentarily shrinks — doesn't un-arm a compaction
159
+ the conversation has genuinely grown to need. The counter is reset to
160
+ 0 by the caller on a fresh thread (compaction / clear / model switch /
161
+ resume), where occupancy genuinely starts over.
162
+ """
163
+ return max(prev, tokens_in + cached_in)
164
+
165
+
166
+ # Tool names the agents actually expose — used to recognise a tool
167
+ # call that a weak model emitted as PLAIN TEXT instead of through the
168
+ # structured tool-calling interface. (Observed live with
169
+ # phi-4-mini: final answer was literally
170
+ # ``{ "name": "read", "parameters": {"path": "FileA.py"} }``.)
171
+ _KNOWN_TOOL_NAMES = frozenset({
172
+ "read", "write", "edit", "multi_edit", "grep", "find", "ls",
173
+ "bash", "web_fetch", "web_search", "delegate", "codebase_search",
174
+ "plan_write", "plan_read", "note", "search_notes", "read_note",
175
+ "list_notes", "remember_rule", "go_to_definition",
176
+ "find_references", "hover",
177
+ })
178
+
179
+
180
+ def _looks_like_leaked_tool_call(text: str) -> bool:
181
+ """True when a final answer is clearly a tool CALL written as
182
+ prose — a bare JSON object naming a known tool with an args-like
183
+ key. The ReAct loop treats "no structured tool calls" as "model
184
+ is done", so without this guard the user sees raw JSON as the
185
+ answer and the tool never runs."""
186
+ t = text.strip()
187
+ # Unwrap a single fenced code block (```json ... ```).
188
+ if t.startswith("```") and t.endswith("```"):
189
+ t = t.strip("`").strip()
190
+ first, _, rest = t.partition("\n")
191
+ if rest and len(first) <= 10: # language tag line
192
+ t = rest.strip()
193
+ if not (t.startswith("{") and t.endswith("}")):
194
+ return False
195
+ import json
196
+
197
+ try:
198
+ obj = json.loads(t)
199
+ except (ValueError, TypeError):
200
+ return False
201
+ if not isinstance(obj, dict):
202
+ return False
203
+ name = obj.get("name") or obj.get("tool") or obj.get("function")
204
+ if isinstance(name, dict): # OpenAI shape: {"function": {"name": ..}}
205
+ name = name.get("name")
206
+ if not isinstance(name, str):
207
+ return False
208
+ has_args = any(
209
+ k in obj
210
+ for k in ("parameters", "arguments", "args", "input")
211
+ )
212
+ return has_args and name.rsplit(".", 1)[-1] in _KNOWN_TOOL_NAMES
213
+
214
+
215
+ # The bounded corrective prompt sent when a leaked tool call is
216
+ # detected. One nudge per turn — if the model leaks again, we show
217
+ # the raw output rather than loop.
218
+ _TOOL_LEAK_NUDGE = (
219
+ "Your previous reply was a tool invocation written as plain "
220
+ "text, which cannot be executed. Either call the tool through "
221
+ "the proper tool-calling interface, or answer the user's "
222
+ "request directly in prose. Do not print JSON."
223
+ )
224
+
225
+
226
+ def friendly_error_hint(exc: BaseException) -> str | None:
227
+ """An actionable one-liner for a model/provider error, or None
228
+ when we have nothing better than the raw message.
229
+
230
+ The classified exception (loomflow's PermanentModelError /
231
+ RateLimitError / etc., or the provider SDK error inside it) is
232
+ matched on type name + message text rather than imported types —
233
+ keeps this working across loomflow versions and provider SDKs
234
+ without hard dependencies. Checked most-specific first.
235
+ """
236
+ blob = f"{type(exc).__name__}: {exc}".lower()
237
+ if "notfounderror" in blob or "404" in blob or "not found" in blob:
238
+ return (
239
+ "the model id wasn't found at the provider — check the "
240
+ "spelling, or /set_model to pick another"
241
+ )
242
+ if (
243
+ "authentication" in blob
244
+ or "401" in blob
245
+ or "invalid api key" in blob
246
+ or "unauthorized" in blob
247
+ ):
248
+ return (
249
+ "the API key was rejected — /set_model to re-enter it "
250
+ "(or fix the env var / ~/.loom-code/credentials)"
251
+ )
252
+ if "ratelimit" in blob or "429" in blob or "rate limit" in blob:
253
+ return (
254
+ "the provider rate-limited us and retries ran out — "
255
+ "wait a moment and try again, or /model to switch"
256
+ )
257
+ if "timeout" in blob or "timed out" in blob:
258
+ return (
259
+ "the turn timed out — try again, or narrow the ask; "
260
+ "/undo restores the tree if it half-finished"
261
+ )
262
+ if "connection" in blob or "connect" in blob or "network" in blob:
263
+ return (
264
+ "couldn't reach the provider — check your network / "
265
+ "VPN, then try again"
266
+ )
267
+ if "context" in blob and ("length" in blob or "window" in blob):
268
+ return (
269
+ "the conversation outgrew the model's context window — "
270
+ "/compress_token_length to lower the auto-compact "
271
+ "threshold, or /clear for a fresh session"
272
+ )
273
+ return None
274
+
275
+
276
+ def _flatten_exception_group(
277
+ eg: BaseExceptionGroup,
278
+ ) -> list[BaseException]:
279
+ """Recursively unwrap nested ``BaseExceptionGroup`` into a flat
280
+ list of the underlying exceptions.
281
+
282
+ anyio task groups raise an ``ExceptionGroup`` whose default
283
+ ``str()`` is "unhandled errors in a TaskGroup (N sub-exception)"
284
+ — useless for the user. Flatten to surface what ACTUALLY went
285
+ wrong (the wrapper might nest more wrappers if multiple groups
286
+ were involved)."""
287
+ out: list[BaseException] = []
288
+ for inner in eg.exceptions:
289
+ if isinstance(inner, BaseExceptionGroup):
290
+ out.extend(_flatten_exception_group(inner))
291
+ else:
292
+ out.append(inner)
293
+ return out
294
+
295
+ # The single source of truth for slash commands the REPL accepts.
296
+ # The autocomplete menu (popped the moment the user types '/')
297
+ # reads off this list, so adding a new command here is enough —
298
+ # Question-shaped prompts route to the TEAM coordinator (it answers
299
+ # read-only questions directly — no delegation tax — and holds the
300
+ # repo map). The heuristic only ever short-circuits TOWARD the team,
301
+ # so a false positive ("how about you rename X" → team) costs the
302
+ # status-quo overhead, never a capability.
303
+ _QUESTION_STARTERS = (
304
+ "what", "when", "where", "which", "who", "why", "how",
305
+ "is ", "are ", "does ", "do ", "did ", "can ", "could ",
306
+ "should ", "would ", "will ", "explain", "show me", "tell me",
307
+ "describe", "summarize", "summarise", "walk me through",
308
+ )
309
+
310
+
311
+ def _looks_like_question(prompt: str) -> bool:
312
+ """True when the prompt reads as a question / explanation request
313
+ rather than a change request."""
314
+ p = prompt.strip().lower()
315
+ return p.endswith("?") or p.startswith(_QUESTION_STARTERS)
316
+
317
+
318
+ # Pronouns/markers that point at conversation the SOLO/TEAM classifier
319
+ # cannot see. "fix it" after ten turns of discussion LOOKS trivial in
320
+ # isolation but may be a multi-file task — the team coordinator (which
321
+ # holds the session history) must take those.
322
+ _ANAPHORA_WORDS = frozenset(
323
+ {"it", "that", "this", "them", "those", "these", "above", "again"}
324
+ )
325
+
326
+
327
+ def _references_prior_context(prompt: str) -> bool:
328
+ """True when a SHORT prompt leans on prior conversation the
329
+ stateless classifier can't see ("fix it", "continue", "do that
330
+ again"). Long prompts may use "it" self-referentially and still
331
+ classify fine, so only short ones short-circuit."""
332
+ words = prompt.strip().lower().split()
333
+ if len(words) <= 2:
334
+ return True
335
+ if len(words) <= 6 and any(
336
+ w.strip(".,!:;") in _ANAPHORA_WORDS for w in words
337
+ ):
338
+ return True
339
+ return False
340
+
341
+
342
+ # The SOLO/TEAM classifier's whole system prompt. One word out; the
343
+ # cheap model handles this reliably. Biased toward TEAM — solo only
344
+ # for tasks where skipping the team round-trip is a pure win.
345
+ _ROUTER_PROMPT = """\
346
+ You route tasks for a terminal coding agent. Reply with EXACTLY one
347
+ word: SOLO or TEAM.
348
+
349
+ SOLO — one small, focused change a single capable agent should just
350
+ do: a one-file edit or bugfix, a rename confined to one place, adding
351
+ one test, tweaking a config value, running or adjusting one command,
352
+ a small mechanical change with an obvious definition of done.
353
+
354
+ TEAM — everything else: multi-file features or refactors, anything
355
+ needing investigation first ("find out why...", "figure out..."),
356
+ work that warrants independent review or running a test suite to
357
+ verify, vague or large scope, external integrations, anything
358
+ destructive or wide-reaching. Also TEAM: any task that leans on
359
+ prior conversation you cannot see ("fix it", "do that again",
360
+ "continue", "the bug we discussed") — you see only this one message.
361
+
362
+ When in doubt: TEAM.
363
+ """
364
+
365
+
366
+ # no need to also update the autocomplete separately.
367
+ #
368
+ # Each entry is (command, description, group). The GROUP tag lets
369
+ # /help print the commands clustered by purpose instead of as one
370
+ # flat 20-item wall — and both /help AND the autocomplete menu read
371
+ # off this one list, so there's a single source of truth. Add a
372
+ # command here and it shows up in both, correctly grouped, for free.
373
+ # Group order below is the order groups appear in /help.
374
+ _COMMAND_DEFS: list[tuple[str, str, str]] = [
375
+ # Coding — the day-to-day task loop.
376
+ ("/plan", "show the current plan, or start one", "Coding"),
377
+ (
378
+ "/goal",
379
+ "work until a condition is met (/goal make all tests pass)",
380
+ "Coding",
381
+ ),
382
+ ("/undo", "restore the working tree to the last checkpoint", "Coding"),
383
+ (
384
+ "/checkpoints",
385
+ "list auto-checkpoints (taken before each edit)",
386
+ "Coding",
387
+ ),
388
+ ("/good", "mark the last turn useful (credit notes)", "Coding"),
389
+ ("/bad", "mark the last turn unhelpful", "Coding"),
390
+ ("/init-loom", "create a starter AGENTS.md rules file", "Coding"),
391
+ # Isolation — sandbox this session in its own git worktree.
392
+ ("/isolate", "run this session in its own git worktree", "Isolate"),
393
+ ("/review", "show the isolated session's diff vs base", "Isolate"),
394
+ ("/merge", "merge the isolated session's edits into base", "Isolate"),
395
+ ("/discard", "discard the isolated session's edits", "Isolate"),
396
+ # Model & tools — how the agent thinks and what it can reach.
397
+ ("/model", "switch to a specific model by name", "Model"),
398
+ ("/effort", "reasoning effort: low | medium | high | off", "Model"),
399
+ (
400
+ "/mode",
401
+ "approval mode: default | accept-edits | plan | yolo",
402
+ "Model",
403
+ ),
404
+ ("/set_model", "pick a provider + model (saves API key)", "Model"),
405
+ ("/set_web", "enable web search (Serper / DuckDuckGo / off)", "Model"),
406
+ ("/mcp", "list connected MCP servers + their tools", "Model"),
407
+ # Session — state, cost, and history for the whole run.
408
+ ("/cost", "session cost + token totals", "Session"),
409
+ (
410
+ "/resume",
411
+ "resume the last session — or `/resume pick` to choose one",
412
+ "Session",
413
+ ),
414
+ ("/export", "save this conversation to a markdown file", "Session"),
415
+ (
416
+ "/set_continue_cap",
417
+ "set auto-continue cap (current=default 15)",
418
+ "Session",
419
+ ),
420
+ (
421
+ "/compact",
422
+ "compact the conversation NOW (fold history into a summary)",
423
+ "Session",
424
+ ),
425
+ (
426
+ "/compress_token_length",
427
+ "auto-compact threshold: <N> | auto | off",
428
+ "Session",
429
+ ),
430
+ ("/clear", "fresh conversation (new session)", "Session"),
431
+ ("/help", "show all commands", "Session"),
432
+ ("/exit", "leave (Ctrl-D also works)", "Session"),
433
+ # /computer (computer-operator mode) is HIDDEN for now — kept in code
434
+ # + dispatched if typed, but not advertised in help/autocomplete
435
+ # until it's ready to ship. Add an entry here to surface it again.
436
+ ]
437
+
438
+ # Group order for /help — groups render in this sequence; any group
439
+ # not listed falls to the end in first-seen order.
440
+ _HELP_GROUP_ORDER = ("Coding", "Isolate", "Model", "Session")
441
+
442
+
443
+ def _render_help() -> str:
444
+ """Build the /help text from :data:`_COMMAND_DEFS` so it can never
445
+ drift from the commands the REPL actually accepts (the old
446
+ hand-maintained blob had silently lost a third of them). Commands
447
+ cluster under their group header, aligned on the description."""
448
+ from collections import defaultdict
449
+
450
+ by_group: dict[str, list[tuple[str, str]]] = defaultdict(list)
451
+ for cmd, desc, group in _COMMAND_DEFS:
452
+ by_group[group].append((cmd, desc))
453
+ order = [g for g in _HELP_GROUP_ORDER if g in by_group]
454
+ order += [g for g in by_group if g not in _HELP_GROUP_ORDER]
455
+
456
+ # Align descriptions on the widest command across ALL groups so the
457
+ # columns line up down the whole list, not just within a group.
458
+ width = max(len(cmd) for cmd, _, _ in _COMMAND_DEFS)
459
+ lines = ["[bold]loom-code commands[/bold]"]
460
+ for group in order:
461
+ lines.append(f"\n [dim]{group}[/dim]")
462
+ for cmd, desc in by_group[group]:
463
+ pad = " " * (width - len(cmd))
464
+ lines.append(f" [cyan]{cmd}[/cyan]{pad} {desc}")
465
+ lines.append(
466
+ "\nAnything else is a task — loom-code plans, codes, and "
467
+ "verifies it.\nLong sessions auto-compact: when tokens cross "
468
+ "the threshold, a\ncompactor writes a dense summary to memory "
469
+ "and the run continues."
470
+ )
471
+ return "\n".join(lines)
472
+
473
+
474
+ class _SlashCompleter(Completer):
475
+ """Two completions in one:
476
+
477
+ * ``/`` at line start → the slash-command menu (filters as you
478
+ type, so ``/co`` narrows to /cost + /compress_token_length).
479
+ * ``@`` anywhere → a fuzzy file-path menu rooted at the project,
480
+ so ``@src/ma`` completes to ``@src/main.py`` — the agent then
481
+ reads the referenced file (see ``_expand_at_mentions``).
482
+
483
+ A normal task message with neither trigger stays clean, no popup.
484
+ """
485
+
486
+ def __init__(self, root: Path | None = None) -> None:
487
+ self._root = root
488
+ # Cached (rel-path list, monotonic timestamp). complete_while_
489
+ # typing fires this on EVERY keystroke, so we walk the tree at
490
+ # most once per _FILE_CACHE_TTL and filter the cached list per
491
+ # keystroke instead of re-walking (a re-walk per char froze the
492
+ # prompt on a large repo).
493
+ self._file_cache: list[str] | None = None
494
+ self._file_cache_at = 0.0
495
+
496
+ def get_completions(
497
+ self, document: Document, complete_event: CompleteEvent
498
+ ):
499
+ text = document.text_before_cursor
500
+ if text.startswith("/"):
501
+ for cmd, desc, _group in _COMMAND_DEFS:
502
+ if cmd.startswith(text):
503
+ yield Completion(
504
+ cmd,
505
+ start_position=-len(text),
506
+ display_meta=desc,
507
+ )
508
+ return
509
+ # @-file mention: complete the token after the last '@'.
510
+ at = text.rfind("@")
511
+ if at != -1 and self._root is not None:
512
+ frag = text[at + 1:]
513
+ if " " not in frag: # still typing one path token
514
+ yield from self._file_completions(frag)
515
+
516
+ def _all_files(self) -> list[str]:
517
+ """Project file list (rel paths), walked at most once per TTL
518
+ and cached — the completer filters this in memory per keystroke
519
+ rather than re-walking the tree each time."""
520
+ now = time.monotonic()
521
+ if (
522
+ self._file_cache is not None
523
+ and now - self._file_cache_at < _FILE_CACHE_TTL
524
+ ):
525
+ return self._file_cache
526
+ import os
527
+
528
+ from .loominit.repomap import _SKIP_DIRS
529
+
530
+ root = self._root
531
+ assert root is not None
532
+ out: list[str] = []
533
+ for dirpath, dirnames, filenames in os.walk(root):
534
+ dirnames[:] = [
535
+ d for d in dirnames
536
+ if d not in _SKIP_DIRS and not d.endswith(".egg-info")
537
+ ]
538
+ for fn in filenames:
539
+ out.append(
540
+ os.path.relpath(os.path.join(dirpath, fn), root)
541
+ )
542
+ if len(out) >= _FILE_CACHE_MAX:
543
+ break
544
+ if len(out) >= _FILE_CACHE_MAX:
545
+ break
546
+ self._file_cache = out
547
+ self._file_cache_at = now
548
+ return out
549
+
550
+ def _file_completions(self, frag: str):
551
+ """Up to 20 project files matching ``frag`` (prefix on the
552
+ path, or substring on the basename), filtered from the cached
553
+ file list."""
554
+ frag_l = frag.lower()
555
+ count = 0
556
+ for rel in self._all_files():
557
+ base = rel.rsplit("/", 1)[-1].lower()
558
+ if rel.lower().startswith(frag_l) or frag_l in base:
559
+ yield Completion(
560
+ rel,
561
+ start_position=-len(frag),
562
+ display_meta="file",
563
+ )
564
+ count += 1
565
+ if count >= 20:
566
+ return
567
+
568
+
569
+ class Repl:
570
+ """One interactive loom-code session over one project."""
571
+
572
+ def __init__(
573
+ self,
574
+ project: Project,
575
+ model: str,
576
+ *,
577
+ sandbox: bool = False,
578
+ sandbox_allow_network: bool = False,
579
+ startup_resume: str | None = None,
580
+ ) -> None:
581
+ self.project = project
582
+ self.model = model
583
+ # "last" / "pick" / None — consumed once at the top of the
584
+ # loop (the --continue / --resume CLI flags).
585
+ self._startup_resume = startup_resume
586
+ # Kernel-sandbox the coder's bash (Claude-Code-style). Stored so
587
+ # every (re)build of the agent — initial + /model switch + worktree
588
+ # isolate — passes them through ``_rebuild_agent``.
589
+ self._sandbox = sandbox
590
+ self._sandbox_allow_network = sandbox_allow_network
591
+ # Per-turn spinner controls. The ApprovalGate must pause the
592
+ # spinner while it prompts (its Live refresh otherwise mangles
593
+ # the keystroke). ``_turn`` points these at the live closures;
594
+ # the gate calls them through the stable wrapper methods.
595
+ self._active_pause_spinner: Any = None
596
+ self._active_resume_spinner: Any = None
597
+ # True while the ApprovalGate is waiting on the user — the
598
+ # idle-watchdog must not count that as a hung stream.
599
+ self._gate_active = False
600
+ # Monotonic time of the last Ctrl-C at the idle prompt — a
601
+ # second press within the window exits (see _run_inner).
602
+ self._last_ctrl_c = 0.0
603
+ # Output of the most recent ``!cmd`` inline shell run, folded
604
+ # into the NEXT task turn's prompt then cleared (see _run_inner).
605
+ self._last_bash_output: str | None = None
606
+ # Which session_id already has a sessions.jsonl record — one
607
+ # line per session, written on its first pointer save.
608
+ self._recorded_session_id: str | None = None
609
+ # content-hash per working block — skip the sqlite write when a
610
+ # block's content hasn't changed. loomflow's update_block is an
611
+ # UPSERT that bumps ``updated_at`` on every call and its block
612
+ # ``format()`` has no timestamp, so an identical rewrite doesn't
613
+ # itself bust the provider prompt-cache; the win here is
614
+ # avoiding a redundant DB write (and the churn of re-reading
615
+ # AGENTS.md from disk) every turn. Reset on agent rebuild.
616
+ self._block_hashes: dict[str, str] = {}
617
+ # Idle watchdog: abort a turn when the agent stream produces
618
+ # NO events for this many seconds (a hung provider/model would
619
+ # otherwise burn until max_turns). 0 disables. Generous default
620
+ # — a slow non-streaming completion can legitimately be quiet
621
+ # for a couple of minutes.
622
+ try:
623
+ self._idle_timeout = float(
624
+ os.environ.get("LOOM_IDLE_TIMEOUT", "300")
625
+ )
626
+ except ValueError:
627
+ self._idle_timeout = 300.0
628
+ # Permission rules from settings.toml (user + project) + the
629
+ # session mode. Rules load once; /mode swaps the mode live.
630
+ from .permissions import Mode, load_rules
631
+
632
+ rule_dirs = [
633
+ Path.home() / ".loom-code",
634
+ project.root / ".loom",
635
+ ]
636
+ perm_rules = load_rules(rule_dirs)
637
+ # ApprovalGate persists across turns so 'allow all' sticks
638
+ # for the whole session.
639
+ self._gate = ApprovalGate(
640
+ pause_spinner=self._pause_active_spinner,
641
+ resume_spinner=self._resume_active_spinner,
642
+ rules=perm_rules,
643
+ mode=Mode.DEFAULT,
644
+ project_root=project.root,
645
+ )
646
+ self._auto_continue_limit = _AUTO_CONTINUE_LIMIT_DEFAULT
647
+ # Reasoning effort (None | "low" | "medium" | "high"). None =
648
+ # provider default. Set via /effort; threaded into build_agent
649
+ # → every work agent. Inert on non-reasoning models.
650
+ self._effort: str | None = None
651
+ # Session worktree isolation (/isolate). When set, this session
652
+ # edits in its own git worktree on ``_worktree.branch`` and the
653
+ # agent is rebuilt rooted there (_isolated_project); /merge or
654
+ # /discard restores the main tree.
655
+ self._worktree: worktree.WorktreeInfo | None = None
656
+ self._isolated_project: Project | None = None
657
+ # User + project extensions (the ``.loom`` folder). Discovered
658
+ # once here so the SAME bundle drives both build_agent (skills,
659
+ # subagents, tool hooks) and the REPL-lifecycle hooks fired
660
+ # below (SessionStart / UserPromptSubmit / SessionEnd). The
661
+ # REPL owns discovery because it also runs the trust prompt for
662
+ # project hooks (see _consume_trusted_extensions).
663
+ self._extensions = self._consume_trusted_extensions(
664
+ discover_extensions(project.root)
665
+ )
666
+ # /computer browser-control mode. When on, the agent gets the
667
+ # Playwright MCP server (browser_navigate/snapshot/click/type …)
668
+ # + a browser-oriented prompt. Off by default — toggled by the
669
+ # /computer command, which injects the spec + rebuilds the agent.
670
+ self._browser_mode: bool = False
671
+ # Model the session was on before /computer bumped to a stronger
672
+ # one — restored if operator mode is turned off.
673
+ self._pre_operator_model: str | None = None
674
+ # /goal run-until-done loop spec (None = off). When /goal arms a
675
+ # condition, this holds the run_until dict (condition + cheap
676
+ # checker + guardrails) that build_agent forwards to the
677
+ # framework GoalStopHook. Cleared after the goal turn completes.
678
+ self._run_until: dict[str, Any] | None = None
679
+ # Adaptive routing (solo fast path). Both built lazily on
680
+ # first use and invalidated by ``_rebuild_agent`` (model /
681
+ # web changes): the solo agent is a standalone coder kernel
682
+ # sharing the team's memory + notebook; the router agent is
683
+ # the cheap one-word SOLO/TEAM classifier.
684
+ self._solo_agent: Any | None = None
685
+ self._router_agent: Any | None = None
686
+ # Graphify and other bundled skills are auto-registered
687
+ # by build_agent (see _bundled_skill_paths). No per-session
688
+ # toggle needed — the agent decides when to load skills.
689
+ self.agent, self.workspace = build_agent(
690
+ project,
691
+ model=model,
692
+ approval_handler=self._gate.handler,
693
+ max_stop_hook_iterations=self._auto_continue_limit,
694
+ extensions=self._extensions,
695
+ effort=self._effort,
696
+ sandbox=self._sandbox,
697
+ sandbox_allow_network=self._sandbox_allow_network,
698
+ operator=self._browser_mode,
699
+ run_until=self._run_until,
700
+ )
701
+ # One session_id for the whole REPL → loomflow rehydrates
702
+ # prior turns so the agent has real conversation memory.
703
+ self.session_id = new_id()
704
+ # Session accumulators. ``total_in`` is *combined* input
705
+ # tokens (uncached + cached); ``total_cached_in`` is the
706
+ # cached subset, tracked separately so the status line can
707
+ # show the same split (``uncached+cached in``) that the
708
+ # end-of-turn summary uses.
709
+ self.total_cost = 0.0
710
+ self.total_in = 0
711
+ self.total_cached_in = 0
712
+ # ``total_cache_write`` is Anthropic-only — the cache CREATION
713
+ # tokens (1.25x base price on 5m TTL, 2x on 1h). Tracked
714
+ # separately from cached_in (which is the cache READ — cheap)
715
+ # so /cost can surface both directions of the cache
716
+ # accounting. OpenAI returns 0 here (no separate billing for
717
+ # cache writes).
718
+ self.total_cache_write = 0
719
+ self.total_out = 0
720
+ # Per-turn deltas (reset each turn in _account_result) — drive
721
+ # the end-of-turn summary line so each response is separated by
722
+ # a rule showing THAT turn's tokens + cost.
723
+ self._turn_in = 0
724
+ self._turn_out = 0
725
+ self._turn_cost = 0.0
726
+ # Framework-event counters (loomflow 0.10.13+):
727
+ # ``total_summaries`` ticks each time
728
+ # ``tool_result_summarized`` fires (per-tool-result LLM
729
+ # compression — only when ``tool_result_summarizer=`` is
730
+ # wired). ``total_compacts`` ticks each
731
+ # ``auto_compacted`` event (mid-Ralph-loop conversation
732
+ # summarisation when tokens cross the budget threshold).
733
+ # ``total_snips`` ticks each ``messages_snipped`` event
734
+ # (free list-slicing trim of older user-anchored turn
735
+ # groups). All three surface in ``/cost`` so the user can
736
+ # see the token-optimisation tiers actually firing.
737
+ self.total_summaries = 0
738
+ self.total_compacts = 0
739
+ self.total_snips = 0
740
+ self.turns = 0
741
+ self.last_plan: str | None = None
742
+ self.last_result: dict[str, Any] | None = None
743
+ # Self-improvement: cited slugs from the last turn, awaiting
744
+ # a success/failure judgement (the moved-on heuristic).
745
+ self._pending_slugs: list[str] = []
746
+ # Files the last turn WROTE to, awaiting the same judgement.
747
+ # Recorded immediately as "unknown" (so a crash before the
748
+ # verdict still leaves a touch record), then revised to
749
+ # success/fail when the moved-on / good / bad signal lands —
750
+ # the same lifecycle as ``_pending_slugs``. Feeds the file-
751
+ # touch history that powers proactive anticipation.
752
+ self._pending_files: list[str] = []
753
+ # The prompt that drove the last turn — used as the touch
754
+ # summary ("why was this file changed").
755
+ self._last_prompt: str = ""
756
+ # Automatic compaction state. ``_compact_threshold = -1``
757
+ # means "auto, recompute from model"; ``0`` means "off";
758
+ # any positive int is an explicit user override. The
759
+ # exchange list is what the compactor sees on trigger; the
760
+ # cumulative-tokens counter is what fires the trigger.
761
+ # Compaction is summarisation — low-stakes, so it runs on the
762
+ # cheap same-provider sibling (Haiku / gpt-4.1-mini) instead
763
+ # of burning the coding model's rates on it.
764
+ self._compactor = Compactor(
765
+ model=cheap_model_for(model) or model
766
+ )
767
+ self._compact_threshold = -1 # auto
768
+ self._compact_tokens = 0
769
+ self._compact_exchanges: list[tuple[str, str]] = []
770
+ # Web-search backend: ``"serper"``, ``"duckduckgo"``, or
771
+ # ``None`` (off — default). Toggled via /set_web. Rebuilding
772
+ # the agent picks the new backend up by adding (or not
773
+ # adding) a ``web_tool`` to coder + explorer.
774
+ self._web_backend: str | None = None
775
+ # ``self._auto_continue_limit`` is initialised earlier in
776
+ # __init__ (before build_agent is called) so the framework
777
+ # gets the right ``max_stop_hook_iterations`` on construction.
778
+ # See the build_agent call above.
779
+ # prompt_toolkit drives the input line. complete_while_typing
780
+ # opens the autocomplete menu the moment the user types '/'
781
+ # without any extra keystroke (Tab also still works for
782
+ # explicit completion). History gives free up-arrow recall
783
+ # within the session. The paste keybindings collapse large
784
+ # pastes into `[paste-N: <lines>, <chars>]` placeholders so
785
+ # the visible prompt stays readable; expand_pastes() restores
786
+ # the full content before the line goes to the agent.
787
+ self._prompt_session: PromptSession[str] = PromptSession(
788
+ completer=_SlashCompleter(root=project.root),
789
+ complete_while_typing=True,
790
+ key_bindings=build_paste_keybindings(),
791
+ )
792
+
793
+ async def run(self) -> int:
794
+ """The REPL loop. Returns an exit code.
795
+
796
+ Skills (graphify and friends) are wired in at agent
797
+ construction time via :func:`build_agent` — no per-session
798
+ spawning, no subprocess lifecycle to manage here.
799
+
800
+ The ``finally`` DOES tear down the MCP registry: ``build_agent``
801
+ stashes any connected MCP servers on ``agent._mcp_registry``,
802
+ and those hold live subprocess / HTTP sessions (stdio servers
803
+ are child processes). Closing them on every exit path — normal
804
+ quit, Ctrl-C, or an exception — avoids leaking processes.
805
+ """
806
+ try:
807
+ return await self._run_inner()
808
+ finally:
809
+ await self._aclose_mcp()
810
+ await self._aclose_browsers()
811
+
812
+ async def _aclose_mcp(self) -> None:
813
+ """Best-effort teardown of the MCP registry's sessions. Never
814
+ raises — shutdown must not turn a clean exit into an error."""
815
+ registry = getattr(self.agent, "_mcp_registry", None)
816
+ if registry is None:
817
+ return
818
+ try:
819
+ await registry.aclose()
820
+ except Exception: # noqa: BLE001 — teardown must not fail exit
821
+ pass
822
+
823
+ async def _aclose_browsers(self) -> None:
824
+ """Close any /computer browser windows on exit so a headed
825
+ Chromium doesn't linger after loom-code quits. Best-effort."""
826
+ try:
827
+ from .browse import close_all_browsers
828
+
829
+ await close_all_browsers()
830
+ except Exception: # noqa: BLE001 — teardown must not fail exit
831
+ pass
832
+
833
+ async def _run_inner(self) -> int:
834
+ """The REPL loop body. Held as a separate method in case a
835
+ future feature wants to wrap it in a context manager again
836
+ — keeps the wrapping point obvious."""
837
+ banner(
838
+ self.model,
839
+ str(self.project.root),
840
+ self.project.is_git,
841
+ sandbox=self._sandbox,
842
+ sandbox_allow_network=self._sandbox_allow_network,
843
+ )
844
+ if self.project.context_file:
845
+ console.print(
846
+ f" [dim]loaded context: "
847
+ f"{self.project.context_file.name}[/dim]"
848
+ )
849
+ # Brief getting-started hints right after the banner. Surfaces
850
+ # provider/web setup AND the bare /model command so users who
851
+ # already have an API key but want a different model name
852
+ # don't go hunting through /help. Ordering: most-common first.
853
+ # ``•`` bullets, NOT ``▸``/``›`` — the input prompt uses a
854
+ # ``›`` glyph, and arrow-ish banner bullets read as prompts at
855
+ # a glance in most terminal fonts.
856
+ console.print(
857
+ " [dim]• type a task, or [cyan]/help[/cyan] "
858
+ "for the command menu[/dim]"
859
+ )
860
+ console.print(
861
+ " [dim]• [cyan]/model <name>[/cyan] switch to a "
862
+ "specific model by name (e.g. gpt-4.1, claude-opus-4-8)[/dim]"
863
+ )
864
+ console.print(
865
+ " [dim]• [cyan]/set_model[/cyan] pick a provider + "
866
+ "model (saves your API key)[/dim]"
867
+ )
868
+ console.print(
869
+ " [dim]• [cyan]/set_web[/cyan] enable web "
870
+ "search (Serper / DuckDuckGo)[/dim]"
871
+ )
872
+ # Show the resume hint ONLY when a prior session pointer
873
+ # exists — no point telling first-time users about a
874
+ # feature they can't use yet.
875
+ if self._load_session_pointer() is not None:
876
+ console.print(
877
+ " [dim]• [cyan]/resume[/cyan] pick up "
878
+ "the last session for this project (rehydrates "
879
+ "prior turns)[/dim]"
880
+ )
881
+ self._print_extensions_banner()
882
+ console.print()
883
+
884
+ # SessionStart hooks fire once, after the banner and before the
885
+ # first prompt — for side effects (env setup, logging). Their
886
+ # added context is surfaced as a dim note rather than injected,
887
+ # since there's no user turn to attach it to yet.
888
+ start_result = await self._fire_repl_hooks("SessionStart")
889
+ if start_result.added_context:
890
+ console.print(
891
+ f" [dim]{start_result.added_context}[/dim]"
892
+ )
893
+
894
+ # --continue / --resume: rejoin a prior session before the
895
+ # first prompt, via the same machinery as /resume.
896
+ if self._startup_resume == "last":
897
+ await self._handle_resume("")
898
+ elif self._startup_resume == "pick":
899
+ await self._handle_resume("pick")
900
+
901
+ while True:
902
+ # _read_line opens each turn with a full-width rule + a dim
903
+ # cost line, so the cost/token status is attached to the
904
+ # prompt (no separate status print above it) and the rule
905
+ # separates this turn from the previous output.
906
+ try:
907
+ line = await self._read_line()
908
+ except EOFError:
909
+ # Ctrl-D: leaving satisfied — credit any pending turn.
910
+ await self._attribute_pending(success=True, quiet=True)
911
+ await self._fire_repl_hooks("SessionEnd")
912
+ console.print("\n[dim]bye[/dim]")
913
+ return 0
914
+ except KeyboardInterrupt:
915
+ # Ctrl-C at the idle prompt: the reflex from every
916
+ # other REPL is "clear the line", not "quit" — a
917
+ # single press exiting the whole session was a sharp
918
+ # edge. First press warns; a second within the window
919
+ # exits (same cleanup as Ctrl-D).
920
+ now = time.monotonic()
921
+ if now - self._last_ctrl_c < 2.0:
922
+ await self._attribute_pending(
923
+ success=True, quiet=True
924
+ )
925
+ await self._fire_repl_hooks("SessionEnd")
926
+ console.print("\n[dim]bye[/dim]")
927
+ return 0
928
+ self._last_ctrl_c = now
929
+ console.print(
930
+ " [dim]press Ctrl-C again to exit "
931
+ "(or /exit)[/dim]"
932
+ )
933
+ continue
934
+
935
+ line = line.strip()
936
+ if not line:
937
+ continue
938
+
939
+ # Expand any [paste-N: ...] placeholders to the full
940
+ # stashed content BEFORE dispatch — slash commands
941
+ # generally won't contain pastes, but expanding here
942
+ # keeps a single canonical "what the user really said"
943
+ # point of truth and matches how Claude Code does it.
944
+ line = expand_pastes(line)
945
+
946
+ if line.startswith("/"):
947
+ # Only dispatch KNOWN commands. An absolute filesystem
948
+ # path (``/Users/me/x.py``) also starts with "/" — it
949
+ # must reach the agent as a task, not error as an
950
+ # unknown command. Heuristic: a first token with a
951
+ # second "/" is a path; a bare unknown token like
952
+ # "/hlep" still errors (typo protection).
953
+ first = line.split()[0].lower()
954
+ known = {c for c, _d, _g in _COMMAND_DEFS}
955
+ known |= {"/quit", "/computer"}
956
+ if first in known:
957
+ should_continue = await self._handle_slash(line)
958
+ if not should_continue:
959
+ await self._attribute_pending(
960
+ success=True, quiet=True
961
+ )
962
+ await self._fire_repl_hooks("SessionEnd")
963
+ console.print("[dim]bye[/dim]")
964
+ return 0
965
+ continue
966
+ if "/" not in first[1:]:
967
+ console.print(
968
+ f" unknown command {first} — /help for "
969
+ "the list"
970
+ )
971
+ continue
972
+ # Falls through: a path-shaped line is a task.
973
+
974
+ # ``!cmd`` — run a shell command inline, right now, without
975
+ # spending a model turn. The output is echoed AND stashed so
976
+ # the NEXT task turn can reference it ("now fix that error").
977
+ # Matches Claude Code's ``!`` prefix.
978
+ if line.startswith("!"):
979
+ try:
980
+ await self._run_bang(line[1:].strip())
981
+ except Exception as exc: # noqa: BLE001 — never exit
982
+ console.print(
983
+ Text(f" ! error: {exc}", style="red")
984
+ )
985
+ continue
986
+
987
+ # Expand @-file mentions to inline the referenced files so
988
+ # the model gets their content, not just the path.
989
+ line = self._expand_at_mentions(line)
990
+
991
+ # Fold in the last ``!cmd`` output (once) so "now fix that"
992
+ # after a bang command has the output to work from.
993
+ if self._last_bash_output is not None:
994
+ line = (
995
+ f"{line}\n\n[output of a shell command I just ran]\n"
996
+ f"{self._last_bash_output}"
997
+ )
998
+ self._last_bash_output = None
999
+
1000
+ # Pure greeting → answer locally, zero tokens. Placed
1001
+ # BEFORE hooks/attribution/injection on purpose: a "hi"
1002
+ # is neutral chatter — it must not credit the previous
1003
+ # turn as accepted, fire task hooks, or pay the per-turn
1004
+ # context injection.
1005
+ greeting = _greeting_reply(line)
1006
+ if greeting is not None:
1007
+ console.print(f" {greeting}")
1008
+ continue
1009
+
1010
+ # UserPromptSubmit hooks see the prompt before the agent
1011
+ # does. A hook may BLOCK the turn (exit 2) — e.g. a policy
1012
+ # gate — or return additionalContext we fold into the
1013
+ # prompt (e.g. inject the current ticket / branch).
1014
+ submit = await self._fire_repl_hooks(
1015
+ "UserPromptSubmit", prompt=line
1016
+ )
1017
+ if submit.blocked:
1018
+ console.print(
1019
+ f" [red]⊘ blocked by hook[/red]: "
1020
+ f"{submit.reason or '(no reason given)'}"
1021
+ )
1022
+ continue
1023
+ if submit.added_context:
1024
+ line = f"{line}\n\n[context from hook]\n{submit.added_context}"
1025
+
1026
+ # A new task with no prior complaint → the previous
1027
+ # turn must have been fine. Credit it, then run.
1028
+ await self._attribute_pending(success=True, quiet=False)
1029
+ # Per-turn repo-map injection — populates the
1030
+ # ``loom_index`` working block with the deterministic repo
1031
+ # map. Loomflow auto-injects working blocks into the next
1032
+ # system prompt.
1033
+ await self._inject_loom_context(line)
1034
+ await self._inject_file_history(line)
1035
+ await self._inject_learned_notes(line)
1036
+ # Auto-checkpoint before the turn runs: snapshot the working
1037
+ # tree so the user can /undo this turn's edits even if the
1038
+ # agent goes off the rails. Silent on success (a checkpoint
1039
+ # per turn would be noise); only /undo + /checkpoints surface
1040
+ # them. Best-effort — a non-git repo / git failure no-ops.
1041
+ self._checkpoint_before_turn(line)
1042
+ route = await self._route_turn(line)
1043
+ if route == "solo":
1044
+ # Surface the routing decision — silent topology
1045
+ # switches make cost/behaviour differences look
1046
+ # random to the user.
1047
+ console.print(
1048
+ " [dim]→ solo fast path (small task — skipping "
1049
+ "team delegation)[/dim]"
1050
+ )
1051
+ await self._turn(line, agent=self._get_solo_agent())
1052
+ else:
1053
+ await self._turn(line)
1054
+
1055
+ # ---- input ----------------------------------------------------------
1056
+
1057
+ async def _read_line(self) -> str:
1058
+ """Read one line with a clean, framed-feel prompt.
1059
+
1060
+ The separation is owned by the END of each turn: a full-width
1061
+ rule + that turn's tokens/cost (``_print_turn_summary``). The
1062
+ prompt itself stays minimal — one blank line of air, then the
1063
+ bold ``›`` glyph. Cumulative session totals live in ``/cost``
1064
+ (printing them above every prompt duplicated the turn rule and
1065
+ showed a noisy all-zeros line before the first input).
1066
+ Autocomplete / history / paste keybindings come from the
1067
+ ``PromptSession``.
1068
+ """
1069
+ console.print()
1070
+ return await self._prompt_session.prompt_async(
1071
+ HTML("<ansigreen><b>›</b></ansigreen> ")
1072
+ )
1073
+
1074
+ async def _run_bang(self, cmd: str) -> None:
1075
+ """Run ``cmd`` in the project root right now (``!`` prefix) and
1076
+ echo its output. The result is stashed in ``_last_bash_output``
1077
+ so the next task turn can be told about it — Claude-Code-style
1078
+ "run this, then act on what you see"."""
1079
+ if not cmd:
1080
+ console.print(" [dim]usage: !<shell command>[/dim]")
1081
+ return
1082
+ import functools
1083
+ import subprocess
1084
+
1085
+ # The command AND its output are USER/tool data — render with
1086
+ # markup DISABLED (styling via Text/style=, never inline
1087
+ # ``[dim]{x}[/dim]`` tags). A ``[`` in the command or a line
1088
+ # like ``[FAILED]`` in the output would otherwise be parsed as
1089
+ # a Rich tag → MarkupError, which — uncaught in the input loop
1090
+ # — killed the whole REPL (observed: ``!pytest`` on failing
1091
+ # tests crashed the session).
1092
+ console.print(Text(f" $ {cmd}", style="dim"))
1093
+ try:
1094
+ # Worker thread so the blocking run can't stall the event
1095
+ # loop (and Ctrl-C at the REPL stays responsive).
1096
+ proc = await anyio.to_thread.run_sync(
1097
+ functools.partial(
1098
+ subprocess.run,
1099
+ cmd,
1100
+ shell=True,
1101
+ cwd=str(self.project.root),
1102
+ capture_output=True,
1103
+ text=True,
1104
+ timeout=120,
1105
+ )
1106
+ )
1107
+ except subprocess.TimeoutExpired:
1108
+ console.print(
1109
+ Text(" ! command timed out (120s)", style="yellow")
1110
+ )
1111
+ return
1112
+ except Exception as exc: # noqa: BLE001 — never kill the REPL
1113
+ console.print(Text(f" ! failed: {exc}", style="red"))
1114
+ return
1115
+ out = (proc.stdout or "") + (proc.stderr or "")
1116
+ out = out.rstrip()
1117
+ if out:
1118
+ for ln in out.splitlines()[:200]:
1119
+ console.print(Text(f" {ln}", style="dim"))
1120
+ code = proc.returncode
1121
+ console.print(
1122
+ Text(
1123
+ f" exit {code}",
1124
+ style="dim" if code == 0 else "yellow",
1125
+ )
1126
+ )
1127
+ # Stash for the next turn (bounded so a huge dump can't bloat
1128
+ # the prompt). Consumed + cleared in the input loop.
1129
+ self._last_bash_output = (
1130
+ f"$ {cmd}\n{out[:4000]}" if out else f"$ {cmd}\n(exit {code})"
1131
+ )
1132
+
1133
+ def _expand_at_mentions(self, line: str) -> str:
1134
+ """Inline file references so the model gets the file CONTENT,
1135
+ not just the name. Two forms with DIFFERENT trust levels:
1136
+
1137
+ * ``@path`` mention — a DELIBERATE reference. Inlines the file
1138
+ AND grants outside-project EDIT consent (see
1139
+ ``loom_code.consent``): typing ``@`` is an unambiguous "act
1140
+ on this file" gesture.
1141
+ * a bare / quoted / pasted absolute path — a convenience for
1142
+ "read this". Inlined for READING only; NO edit consent. This
1143
+ is the safety boundary: a path that merely appears in a
1144
+ pasted stack trace or log line (``…/site-packages/x.py``,
1145
+ ``~/.zshrc``) must never become editable just by being
1146
+ quoted — only an explicit ``@`` unlocks edits.
1147
+
1148
+ The AGENT's own read tool stays project-scoped regardless, so a
1149
+ prompt-injected model can't roam the filesystem; this is purely
1150
+ about the USER pulling a file into the prompt.
1151
+
1152
+ Each existing file is inlined once; non-files are left as
1153
+ literal text. Bounded per file so a giant file can't blow the
1154
+ context."""
1155
+ import re as _re
1156
+
1157
+ # (ref, is_at_mention) — @-mentions grant edit consent, bare
1158
+ # paths don't.
1159
+ refs: list[tuple[str, bool]] = [
1160
+ (m, True) for m in _re.findall(r"@([^\s]+)", line)
1161
+ ]
1162
+ # Bare/quoted absolute paths — READ-only convenience.
1163
+ for m in _re.findall(r"['\"]((?:/|~/)[^'\"]+)['\"]", line):
1164
+ refs.append((m, False))
1165
+ for m in _re.findall(r"(?<!\S)((?:/|~/)[^\s'\"]+)", line):
1166
+ refs.append((m, False))
1167
+ # macOS drag-and-drop escapes spaces (``…/Screenshot\ 2026….png``).
1168
+ for m in _re.findall(
1169
+ r"(?<!\S)((?:/|~/)(?:[^\s'\"\\]|\\ )+)", line
1170
+ ):
1171
+ if "\\ " in m:
1172
+ refs.append((m.replace("\\ ", " "), False))
1173
+ # Last resort for a PASTED path with unescaped spaces.
1174
+ for start in [
1175
+ m.start() for m in _re.finditer(r"(?<!\S)(?=/|~/)", line)
1176
+ ]:
1177
+ tail = line[start:].strip().strip("'\"")
1178
+ words = tail.split(" ")
1179
+ for end in range(len(words), 0, -1):
1180
+ cand = " ".join(words[:end])
1181
+ if Path(cand).expanduser().is_file():
1182
+ refs.append((cand, False))
1183
+ break
1184
+ if not refs:
1185
+ return line
1186
+ seen: set[str] = set()
1187
+ blocks: list[str] = []
1188
+ for ref, is_mention in refs:
1189
+ ref = ref.rstrip(".,;:!?")
1190
+ if ref in seen:
1191
+ continue
1192
+ seen.add(ref)
1193
+ p = Path(ref).expanduser()
1194
+ fpath = (
1195
+ p if p.is_absolute() else self.project.root / p
1196
+ ).resolve()
1197
+ # Existence IS the filter — prose that merely looks
1198
+ # path-shaped never resolves to a real file.
1199
+ if not fpath.is_file():
1200
+ continue
1201
+ try:
1202
+ raw = fpath.read_bytes()
1203
+ except OSError:
1204
+ continue
1205
+ if b"\x00" in raw[:8192]:
1206
+ # Binary (image/archive/db) — inlining bytes as text
1207
+ # is garbage. Tell the user instead of silently
1208
+ # skipping; the models here are text-only.
1209
+ console.print(
1210
+ f" [yellow]@ {ref} is a binary file — can't "
1211
+ "inline it (text files only)[/yellow]"
1212
+ )
1213
+ continue
1214
+ # Grant edit consent for ANY path the user typed/pasted —
1215
+ # bare OR @-mentioned. You naming a file IS the permission
1216
+ # (Claude-Code model). This is safe because the approval
1217
+ # gate ALWAYS shows the diff + asks before an outside-
1218
+ # project edit (see ApprovalGate._is_outside_project): a
1219
+ # path incidentally embedded in a pasted stack trace can't
1220
+ # silently mutate anything — the user sees the prompt and
1221
+ # rejects it. ``is_mention`` is kept only for display
1222
+ # nuance, not for the grant decision.
1223
+ del is_mention # no longer gates consent
1224
+ from . import consent
1225
+
1226
+ consent.grant(fpath)
1227
+ body = raw.decode("utf-8", errors="replace")
1228
+ if len(body) > 8000:
1229
+ body = body[:8000] + "\n… (truncated)"
1230
+ blocks.append(f"--- {ref} ---\n{body}")
1231
+ console.print(f" [dim]@ inlined {ref}[/dim]")
1232
+ if not blocks:
1233
+ return line
1234
+ # Bare-path turn: the user pasted ONLY a path, no instruction.
1235
+ # Weak models invent a task for it (observed live: an
1236
+ # unprompted multi_edit on the referenced file). Spell the
1237
+ # implicit contract out instead of letting the model guess.
1238
+ residue = line
1239
+ for ref in seen:
1240
+ residue = residue.replace(ref, " ")
1241
+ residue = residue.replace("@", " ").strip(" \t'\".,;:!?")
1242
+ suffix = ""
1243
+ if not residue:
1244
+ suffix = (
1245
+ "\n\n(The user pasted only this file path, with no "
1246
+ "instruction. Summarise the file in 2-3 sentences "
1247
+ "and ask what they'd like done with it. Do NOT "
1248
+ "modify anything.)"
1249
+ )
1250
+ return line + "\n\n" + "\n\n".join(blocks) + suffix
1251
+
1252
+ # ---- a task turn ----------------------------------------------------
1253
+
1254
+ async def _update_block_if_changed(
1255
+ self, name: str, content: str
1256
+ ) -> None:
1257
+ """Write a working block only when changed — delegates to the
1258
+ shared :func:`loom_code.turn.update_block_if_changed` so the
1259
+ REPL and the learned-notes injector use ONE dirty-check."""
1260
+ from .turn import update_block_if_changed
1261
+
1262
+ await update_block_if_changed(
1263
+ self.agent.memory,
1264
+ name,
1265
+ content,
1266
+ user_id=_USER_ID,
1267
+ block_hashes=self._block_hashes,
1268
+ )
1269
+
1270
+ async def _inject_loom_context(self, prompt: str) -> None:
1271
+ """Update the ``loom_index`` working block with a deterministic
1272
+ repo map — the most structurally-important symbols (signatures +
1273
+ locations) — which loomflow folds into the next system prompt.
1274
+
1275
+ Built from the structural index (AST walk, no model calls), so
1276
+ it needs no ``/loominit`` and is fresh-by-construction: the
1277
+ cached builder re-walks only when the tree changed. ``prompt``
1278
+ is unused (the map is a stable global overview, which keeps the
1279
+ system prompt cache-stable across turns).
1280
+
1281
+ Failures are swallowed (never let memory I/O kill a turn).
1282
+ """
1283
+ del prompt # map is global, not prompt-ranked
1284
+ try:
1285
+ from .loominit.repomap import repo_map_for_root_cached
1286
+
1287
+ # Deterministic repo map (top symbols by structural
1288
+ # importance) built from the structural index — no LLM, no
1289
+ # LOOM.md/loominit needed, and fresh-by-construction
1290
+ # (re-walked only when the tree changed). Replaces the old
1291
+ # BM25-over-LLM-narrative retrieval that drifted as the
1292
+ # agent edited code.
1293
+ body = repo_map_for_root_cached(self.project.root)
1294
+ if body:
1295
+ await self._update_block_if_changed("loom_index", body)
1296
+ # Auto-reload the project rules file (AGENTS.md): re-read it
1297
+ # FRESH each turn into the ``project_rules`` working block, so
1298
+ # a mid-session edit applies on the next turn without a
1299
+ # restart. The coordinator's static prompt no longer bakes
1300
+ # the rules file (see build_unified_coordinator_instructions).
1301
+ # The dirty-check keeps the re-read cheap: an UNCHANGED file
1302
+ # skips the write, so the cache prefix survives.
1303
+ from .rules import project_rules_block
1304
+
1305
+ await self._update_block_if_changed(
1306
+ "project_rules",
1307
+ project_rules_block(self.project.root),
1308
+ )
1309
+ except Exception: # noqa: BLE001 — injection is best-effort
1310
+ pass
1311
+
1312
+ async def _inject_learned_notes(self, prompt: str) -> None:
1313
+ """ACTIVE recall: push the top success-credited notes relevant
1314
+ to this prompt into the ``learned_notes`` working block, so past
1315
+ learnings shape the next action directly.
1316
+
1317
+ Before this, credited notes only ranked higher in
1318
+ ``search_notes`` — a search the agent might never run. Now the
1319
+ proven ones (cited in a turn the user accepted) arrive in the
1320
+ system prompt unprompted, CLAUDE.md-style, while the full
1321
+ notebook stays behind search. Slugs are shown so the agent can
1322
+ ``read_note(slug)`` for full detail — which also keeps the
1323
+ citation-credit chain alive for the /good /bad loop.
1324
+
1325
+ Bounded: top 3 notes, snippet-length excerpts (~140 chars each)
1326
+ — a few hundred tokens, not a notebook dump. Block is cleared
1327
+ when nothing relevant is proven, so stale advice never lingers.
1328
+ Failures are swallowed (never let memory I/O kill a turn).
1329
+
1330
+ Delegates to :mod:`loom_code.turn` — the SHARED per-turn
1331
+ pipeline, so the desktop sidecar runs the identical logic.
1332
+ """
1333
+ from .turn import inject_learned_notes
1334
+
1335
+ await inject_learned_notes(
1336
+ self.workspace,
1337
+ self.agent.memory,
1338
+ prompt,
1339
+ user_id=_USER_ID,
1340
+ block_hashes=self._block_hashes,
1341
+ )
1342
+
1343
+ async def _inject_file_history(self, prompt: str) -> None:
1344
+ """Proactive anticipation: before the agent runs, surface what
1345
+ happened last time we touched the files this prompt is about.
1346
+
1347
+ THE soul of loom-code over a stateless coder — "last time you
1348
+ edited src/auth.py the change was marked bad, be careful." Two
1349
+ surfaces: a ``file_anticipation`` working block (loomflow folds
1350
+ it into the next system prompt so the AGENT heeds it) AND a dim
1351
+ ``recall:`` line so the USER sees the warning fire.
1352
+
1353
+ Silent when nothing's notable (a clean, rarely-touched file
1354
+ produces no block + no line) — noise would train both the
1355
+ model and the user to ignore the section. Best-effort: any
1356
+ failure is swallowed; anticipation degrading to silence is
1357
+ correct, a crash mid-turn is not."""
1358
+ try:
1359
+ candidates = file_history.candidate_paths_from_prompt(
1360
+ self.project.root, prompt
1361
+ )
1362
+ if not candidates:
1363
+ # Clear any stale block from a prior turn so last turn's
1364
+ # warning doesn't bleed into an unrelated prompt.
1365
+ # (Dirty-checked: consecutive no-candidate turns — the
1366
+ # common case — write once, then stay cache-warm.)
1367
+ await self._update_block_if_changed(
1368
+ "file_anticipation", ""
1369
+ )
1370
+ return
1371
+ records = file_history.history_for(
1372
+ self.project.root, candidates
1373
+ )
1374
+ block = file_history.anticipation_block(records)
1375
+ await self._update_block_if_changed(
1376
+ "file_anticipation", block
1377
+ )
1378
+ if block:
1379
+ # One concise user-visible line per warned file.
1380
+ for rec in records:
1381
+ if rec.last_outcome == "fail" or rec.fail_count > 0:
1382
+ console.print(
1383
+ f" [dim]↶ recall:[/dim] [yellow]last change "
1384
+ f"to {rec.path} was marked bad — being "
1385
+ f"careful[/yellow]"
1386
+ )
1387
+ elif rec.touch_count >= 4:
1388
+ console.print(
1389
+ f" [dim]↶ recall: {rec.path} is a churn "
1390
+ f"hotspot ({rec.touch_count} edits)[/dim]"
1391
+ )
1392
+ except Exception: # noqa: BLE001 — anticipation is best-effort
1393
+ pass
1394
+
1395
+ def _checkpoint_before_turn(self, prompt: str) -> None:
1396
+ """Snapshot the working tree before a turn (auto-checkpoint).
1397
+
1398
+ Silent on success — a per-turn confirmation line would be noise;
1399
+ the snapshots only surface via /undo + /checkpoints. Best-effort:
1400
+ a non-git repo or git failure no-ops without disturbing the run.
1401
+ Skipped when the session is already isolated in a worktree
1402
+ (the worktree IS the isolation; double-snapshotting is redundant
1403
+ and would snapshot the wrong tree)."""
1404
+ if getattr(self, "_isolated_wt", None) is not None:
1405
+ return
1406
+ try:
1407
+ _checkpoint.checkpoint(self.project.root, summary=prompt)
1408
+ except Exception: # noqa: BLE001 — checkpointing is best-effort
1409
+ pass
1410
+
1411
+ async def _consume_agent_stream(
1412
+ self,
1413
+ agent: Any,
1414
+ prompt: str,
1415
+ renderer: StreamRenderer,
1416
+ pause_status: Any,
1417
+ ) -> bool:
1418
+ """Stream one agent run into ``renderer`` + tick the token-
1419
+ optimisation counters. Returns False (caller should abort
1420
+ the turn) if the stream raised; True on clean completion.
1421
+
1422
+ Extracted so the escalation path can re-run a SECOND agent
1423
+ (the supervisor) through the identical consume + error-
1424
+ handling logic without duplicating it.
1425
+
1426
+ Two liveness guards run alongside the consume loop:
1427
+
1428
+ * **Idle watchdog** — no events for ``_idle_timeout`` seconds
1429
+ (approval-prompt waits excluded via ``_gate_active``) means
1430
+ the stream is hung (dead provider, stuck model); cancel the
1431
+ turn instead of burning until ``max_turns``.
1432
+ * **Stall detector** — ``_STALL_REPEATS`` consecutive
1433
+ IDENTICAL tool calls means the model is looping without
1434
+ progress (loomflow's no-progress hook only arms under
1435
+ /goal); cancel early with a clear message.
1436
+ """
1437
+ # Shared mutable state between the consume body and watchdog.
1438
+ state: dict[str, Any] = {
1439
+ "last_event": time.monotonic(),
1440
+ "timed_out": False,
1441
+ "stalled_tool": None,
1442
+ "tool_in_flight": False,
1443
+ }
1444
+ repeat: dict[str, Any] = {"key": None, "count": 0}
1445
+ idle_timeout = self._idle_timeout
1446
+
1447
+ try:
1448
+ async with anyio.create_task_group() as tg:
1449
+
1450
+ # Poll at a fraction of the timeout (capped at 5s) so
1451
+ # small timeouts — tests, aggressive configs — still
1452
+ # trip promptly; the default 300s polls every 5s.
1453
+ poll = max(0.05, min(5.0, (idle_timeout or 5.0) / 4))
1454
+
1455
+ async def _watchdog() -> None:
1456
+ while True:
1457
+ await anyio.sleep(poll)
1458
+ # Don't count time the user is at an approval
1459
+ # prompt, or a tool is legitimately RUNNING
1460
+ # (a bash test-suite/build emits tool_call then
1461
+ # nothing until its result — that's work, not a
1462
+ # hang). Only genuine model-side silence counts.
1463
+ if self._gate_active or state["tool_in_flight"]:
1464
+ state["last_event"] = time.monotonic()
1465
+ continue
1466
+ idle = time.monotonic() - state["last_event"]
1467
+ if idle_timeout and idle > idle_timeout:
1468
+ state["timed_out"] = True
1469
+ tg.cancel_scope.cancel()
1470
+ return
1471
+
1472
+ tg.start_soon(_watchdog)
1473
+
1474
+ stream = agent.stream(
1475
+ prompt,
1476
+ user_id=_USER_ID,
1477
+ session_id=self.session_id,
1478
+ )
1479
+ async for event in stream:
1480
+ state["last_event"] = time.monotonic()
1481
+ renderer.handle(event)
1482
+ kind = str(getattr(event, "kind", ""))
1483
+ payload = getattr(event, "payload", None) or {}
1484
+ # Tick the token-optimisation counters (loomflow
1485
+ # 0.10.13+) off architecture events.
1486
+ if kind.endswith("architecture_event"):
1487
+ name = payload.get("name")
1488
+ if name == "tool_result_summarized":
1489
+ self.total_summaries += 1
1490
+ elif name == "auto_compacted":
1491
+ self.total_compacts += 1
1492
+ elif name == "messages_snipped":
1493
+ self.total_snips += 1
1494
+ elif kind.endswith("tool_result"):
1495
+ # Tool finished → the model is back in control;
1496
+ # idle time counts again.
1497
+ state["tool_in_flight"] = False
1498
+ elif kind.endswith("tool_call"):
1499
+ # A tool is now RUNNING — pause the idle clock
1500
+ # until its result arrives (see _watchdog).
1501
+ state["tool_in_flight"] = True
1502
+ # Stall detection: same tool + same args,
1503
+ # over and over, is a loop — not progress.
1504
+ call = payload.get("call") or {}
1505
+ try:
1506
+ args_key = json.dumps(
1507
+ call.get("args"),
1508
+ sort_keys=True,
1509
+ default=str,
1510
+ )
1511
+ except (TypeError, ValueError):
1512
+ args_key = repr(call.get("args"))
1513
+ key = f"{call.get('tool')}:{args_key}"
1514
+ if key == repeat["key"]:
1515
+ repeat["count"] += 1
1516
+ if repeat["count"] >= _STALL_REPEATS:
1517
+ state["stalled_tool"] = call.get(
1518
+ "tool"
1519
+ )
1520
+ # Close the stream generator IN THIS
1521
+ # task before cancelling — a bare
1522
+ # ``break`` leaves it suspended for GC
1523
+ # to finalise in another task, which
1524
+ # trips anyio's "cancel scope in a
1525
+ # different task". aclose() unwinds its
1526
+ # internal task group here, cleanly.
1527
+ await stream.aclose()
1528
+ tg.cancel_scope.cancel()
1529
+ break
1530
+ else:
1531
+ repeat["key"] = key
1532
+ repeat["count"] = 1
1533
+ # Stream done — stop the watchdog.
1534
+ tg.cancel_scope.cancel()
1535
+ except KeyboardInterrupt:
1536
+ pause_status()
1537
+ console.print(
1538
+ "\n[yellow]interrupted — turn abandoned[/yellow]"
1539
+ )
1540
+ return False
1541
+ except BaseExceptionGroup as eg:
1542
+ # anyio's task groups raise ``ExceptionGroup`` when any
1543
+ # child task fails. Unwrap to surface the REAL cause(s)
1544
+ # instead of the opaque wrapper message. Ctrl-C inside
1545
+ # the group arrives wrapped — route it to the interrupt
1546
+ # path, not the error path.
1547
+ pause_status()
1548
+ inners = _flatten_exception_group(eg)
1549
+ interrupted = any(
1550
+ isinstance(i, KeyboardInterrupt) for i in inners
1551
+ )
1552
+ if interrupted:
1553
+ console.print(
1554
+ "\n[yellow]interrupted — turn abandoned[/yellow]"
1555
+ )
1556
+ # Print REAL errors too, even alongside a Ctrl-C — a worker
1557
+ # that crashed (e.g. a 401) at the same moment the user hit
1558
+ # Ctrl-C must not be hidden behind "interrupted", or they
1559
+ # retry the identical prompt into the same silent failure.
1560
+ for inner in inners:
1561
+ if not isinstance(
1562
+ inner, (KeyboardInterrupt, anyio.get_cancelled_exc_class())
1563
+ ):
1564
+ self._print_turn_error(inner)
1565
+ return False
1566
+ except Exception as exc: # noqa: BLE001 — REPL must survive
1567
+ pause_status()
1568
+ self._print_turn_error(exc)
1569
+ return False
1570
+ finally:
1571
+ # The gate can't still be up once the stream ends — render
1572
+ # anything that queued behind an approval prompt.
1573
+ self._gate_active = False
1574
+ renderer.flush_deferred()
1575
+
1576
+ if state["timed_out"]:
1577
+ pause_status()
1578
+ console.print(
1579
+ f"\n[yellow]turn aborted — no activity for "
1580
+ f"{int(idle_timeout)}s (stream hung?). The tree is "
1581
+ f"unchanged since the pre-turn checkpoint; /undo "
1582
+ f"restores it if needed. LOOM_IDLE_TIMEOUT=0 "
1583
+ f"disables this guard.[/yellow]"
1584
+ )
1585
+ return False
1586
+ if state["stalled_tool"]:
1587
+ pause_status()
1588
+ console.print(
1589
+ f"\n[yellow]turn aborted — the model repeated the "
1590
+ f"same [cyan]{state['stalled_tool']}[/cyan] call "
1591
+ f"{_STALL_REPEATS}× with identical arguments (a "
1592
+ f"loop, not progress). Try rephrasing, or a "
1593
+ f"stronger model via /model.[/yellow]"
1594
+ )
1595
+ return False
1596
+ return True
1597
+
1598
+ @staticmethod
1599
+ def _print_turn_error(exc: BaseException) -> None:
1600
+ """One error, two lines max: the real cause (dim, for bug
1601
+ reports) + an actionable hint when we recognise the class.
1602
+ The raw ExceptionGroup wrapper never reaches here — callers
1603
+ flatten first — and render's error event suppresses its own
1604
+ copy, so each failure prints exactly once."""
1605
+ console.print(
1606
+ f"\n[red]error: {type(exc).__name__}: {exc}[/red]"
1607
+ )
1608
+ hint = friendly_error_hint(exc)
1609
+ if hint:
1610
+ console.print(f" [yellow]→ {hint}[/yellow]")
1611
+
1612
+ # ---- .loom extensions: trust gate + REPL-lifecycle hooks --------
1613
+
1614
+ def _consume_trusted_extensions(
1615
+ self, extensions: Extensions
1616
+ ) -> Extensions:
1617
+ """Apply the project-hook trust gate to a discovered bundle.
1618
+
1619
+ User hooks, skills, and subagents pass through untouched;
1620
+ project hooks survive only if already trusted or approved at
1621
+ the prompt below. Called once from ``__init__``."""
1622
+ return filter_trusted_hooks(
1623
+ extensions,
1624
+ project_root=self.project.root,
1625
+ prompt=self._prompt_trust_project_hooks,
1626
+ )
1627
+
1628
+ def _prompt_trust_project_hooks(self, specs: list[HookSpec]) -> bool:
1629
+ """Show a project's hook commands and ask whether to trust them.
1630
+
1631
+ Safe default is NO: a non-tty session never auto-trusts, and at
1632
+ the prompt only an explicit ``y`` approves — we don't run a
1633
+ cloned repo's shell commands without consent."""
1634
+ from .approval import _read_single_key
1635
+
1636
+ console.print()
1637
+ console.print(
1638
+ " [bold yellow]⚠ this project defines hooks[/bold yellow] "
1639
+ "(.loom/settings.toml) that run shell commands "
1640
+ "automatically:"
1641
+ )
1642
+ for s in specs:
1643
+ tag = f" [{s.matcher}]" if s.matcher not in ("", "*") else ""
1644
+ console.print(
1645
+ f" [cyan]{s.event}[/cyan]{tag} → "
1646
+ f"[dim]{s.command}[/dim]"
1647
+ )
1648
+ if not sys.stdin.isatty():
1649
+ console.print(
1650
+ " [dim](non-interactive — skipping project hooks)[/dim]"
1651
+ )
1652
+ return False
1653
+ console.print(
1654
+ " [bold]trust and run these hooks?[/bold] "
1655
+ "[dim](press y to trust, any other key to skip)[/dim] ",
1656
+ end="",
1657
+ )
1658
+ trusted = _read_single_key() in ("y", "Y")
1659
+ console.print(
1660
+ "[green]trusted[/green]" if trusted else "[dim]skipped[/dim]"
1661
+ )
1662
+ return trusted
1663
+
1664
+ def _print_extensions_banner(self) -> None:
1665
+ """Show what got picked up from ``.loom`` so the user can
1666
+ confirm their skills / subagents / hooks loaded (and which
1667
+ project hooks the trust gate let through)."""
1668
+ ext = self._extensions
1669
+ bits: list[str] = []
1670
+ if ext.skill_paths:
1671
+ bits.append(f"{len(ext.skill_paths)} skill(s)")
1672
+ if ext.agent_specs:
1673
+ names = ", ".join(s.name for s in ext.agent_specs)
1674
+ bits.append(f"{len(ext.agent_specs)} subagent(s) ({names})")
1675
+ if ext.hook_specs:
1676
+ bits.append(f"{len(ext.hook_specs)} hook(s)")
1677
+ if bits:
1678
+ console.print(
1679
+ f" [dim]▸ .loom extensions: {' · '.join(bits)}[/dim]"
1680
+ )
1681
+
1682
+ async def _fire_repl_hooks(
1683
+ self, event: str, *, prompt: str | None = None
1684
+ ) -> Any:
1685
+ """Run every REPL-lifecycle hook registered for ``event``.
1686
+
1687
+ Returns the ``ReplHookResult`` so ``UserPromptSubmit`` can act
1688
+ on a block / injected context; ``SessionStart`` / ``SessionEnd``
1689
+ callers ignore it (those hooks run for their side effects)."""
1690
+ return await run_repl_hooks(
1691
+ self._extensions.hook_specs,
1692
+ event,
1693
+ cwd=self.project.root,
1694
+ prompt=prompt,
1695
+ )
1696
+
1697
+ def _pause_active_spinner(self) -> None:
1698
+ """Stable hook the ApprovalGate calls to stop the current
1699
+ turn's spinner before prompting. No-op between turns.
1700
+
1701
+ Also marks the gate as active so the idle-watchdog doesn't
1702
+ count the user's thinking time at an approval prompt as
1703
+ "the stream hung"."""
1704
+ self._gate_active = True
1705
+ cb = self._active_pause_spinner
1706
+ if cb is not None:
1707
+ cb()
1708
+
1709
+ def _resume_active_spinner(self) -> None:
1710
+ """Stable hook the ApprovalGate calls after the prompt to
1711
+ bring the spinner back."""
1712
+ self._gate_active = False
1713
+ cb = self._active_resume_spinner
1714
+ if cb is not None:
1715
+ cb()
1716
+
1717
+ def _account_result(
1718
+ self,
1719
+ result: dict[str, Any],
1720
+ renderer: StreamRenderer,
1721
+ prompt: str,
1722
+ *,
1723
+ extend_files: bool = False,
1724
+ ) -> None:
1725
+ """Fold ONE completed stream's result into the session totals:
1726
+ cost, every token bucket (incl. cache-write), turns, this
1727
+ turn's file touches, and the compaction high-water mark.
1728
+
1729
+ Shared by the main turn AND the tool-leak nudge so the two can
1730
+ never drift — a hand-copied second version had already dropped
1731
+ cache_write_tokens and the high-water update, which under-
1732
+ reported /cost and delayed auto-compaction on nudged turns.
1733
+
1734
+ ``extend_files`` appends to the pending file list (nudge, whose
1735
+ touches add to the same turn) instead of replacing it."""
1736
+ cost = float(result.get("cost_usd", 0.0))
1737
+ tin = int(result.get("tokens_in", 0))
1738
+ cached_in = int(result.get("cached_tokens_in", 0))
1739
+ tout = int(result.get("tokens_out", 0))
1740
+ self.total_cost += cost
1741
+ self.total_in += tin + cached_in
1742
+ self.total_cached_in += cached_in
1743
+ self.total_cache_write += int(
1744
+ result.get("cache_write_tokens", 0)
1745
+ )
1746
+ self.total_out += tout
1747
+ self.turns += int(result.get("turns", 0))
1748
+ # Per-turn deltas for the end-of-turn summary line. ``extend``
1749
+ # (the nudge path) ADDS to the same turn's numbers so the
1750
+ # summary reflects the whole turn, main + nudge.
1751
+ if extend_files:
1752
+ self._turn_in += tin + cached_in
1753
+ self._turn_out += tout
1754
+ self._turn_cost += cost
1755
+ else:
1756
+ self._turn_in = tin + cached_in
1757
+ self._turn_out = tout
1758
+ self._turn_cost = cost
1759
+ # Record this turn's file touches immediately as "unknown".
1760
+ # The outcome is revised to success/fail in
1761
+ # ``_attribute_pending`` when the moved-on / good / bad signal
1762
+ # arrives. Recording now means a crash before judgement still
1763
+ # leaves the touch on record — better unjudged than lost.
1764
+ touched = list(renderer.files_touched)
1765
+ if extend_files:
1766
+ self._pending_files.extend(touched)
1767
+ else:
1768
+ self._pending_files = touched
1769
+ self._last_prompt = prompt
1770
+ if touched:
1771
+ file_history.record_touches(
1772
+ self.project.root,
1773
+ touched,
1774
+ outcome="unknown",
1775
+ summary=prompt,
1776
+ )
1777
+ # Context-occupancy estimate for the compaction trigger — the
1778
+ # high-water mark of the last turn's INPUT (not a running sum,
1779
+ # which double-counts resent history and compacts too early).
1780
+ self._compact_tokens = _context_high_water(
1781
+ self._compact_tokens, tokens_in=tin, cached_in=cached_in
1782
+ )
1783
+
1784
+ async def _turn(self, prompt: str, *, agent: Any | None = None) -> None:
1785
+ """Stream one agent run for ``prompt``, reusing the
1786
+ session so conversation history carries forward.
1787
+
1788
+ ``agent`` overrides the team coordinator for this turn —
1789
+ the solo fast path passes the coder-kernel agent here (see
1790
+ ``_route_turn``). Default ``None`` keeps the team. Both run
1791
+ under the same ``session_id`` + memory db, so history is
1792
+ continuous whichever route a turn takes.
1793
+
1794
+ Spinner UX: Rich's ``console.status`` runs continuously for
1795
+ the whole turn. The renderer drives its label via two
1796
+ callbacks — ``set_status(label)`` updates the text,
1797
+ ``pause_status()`` stops it (used while assistant prose is
1798
+ streaming, since the spinner shares the cursor line). Labels
1799
+ come from the in-flight event: "delegating to coder...",
1800
+ "running: pytest -q", "searching: openpyxl write_only", or
1801
+ a generic "thinking..." between events. The point is to
1802
+ avoid the long blank stretches the old "drop on first event"
1803
+ scheme produced in Supervisor mode."""
1804
+ status = console.status(
1805
+ "[dim]loomflowing...[/dim]", spinner="dots"
1806
+ )
1807
+ status.start()
1808
+ status_running = True
1809
+
1810
+ def set_status(label: str) -> None:
1811
+ """Update the spinner label, restarting it if it was
1812
+ paused for a prose burst."""
1813
+ nonlocal status_running
1814
+ if not status_running:
1815
+ status.start()
1816
+ status_running = True
1817
+ status.update(f"[dim]{label}[/dim]")
1818
+
1819
+ def pause_status() -> None:
1820
+ """Stop the spinner so streamed text can use the cursor
1821
+ line cleanly. ``set_status`` restarts it later."""
1822
+ nonlocal status_running
1823
+ if status_running:
1824
+ status.stop()
1825
+ status_running = False
1826
+
1827
+ # Point the ApprovalGate's spinner hooks at THIS turn's
1828
+ # closures. Resume re-labels to a neutral "thinking..." since
1829
+ # the gate has no event to name.
1830
+ self._active_pause_spinner = pause_status
1831
+ self._active_resume_spinner = lambda: set_status("thinking...")
1832
+
1833
+ renderer = StreamRenderer(
1834
+ set_status=set_status,
1835
+ pause_status=pause_status,
1836
+ sandbox=self._sandbox,
1837
+ # Defer event rendering while the approval selector is on
1838
+ # screen — concurrent prints displace its in-place redraw.
1839
+ gate_active=lambda: self._gate_active,
1840
+ )
1841
+
1842
+ # The Ralph loop now lives in loomflow itself (>=0.10.8) via
1843
+ # the StopHook protocol — Agent(living_plan=True) auto-
1844
+ # registers a hook that re-prompts when any plan step is
1845
+ # still `doing`/`todo` after the architecture exits. We just
1846
+ # consume the agent's stream; the framework handles
1847
+ # continuation, bounded by ``max_stop_hook_iterations``.
1848
+ ok = await self._consume_agent_stream(
1849
+ agent if agent is not None else self.agent,
1850
+ prompt,
1851
+ renderer,
1852
+ pause_status,
1853
+ )
1854
+ if not ok:
1855
+ return
1856
+
1857
+ if renderer.last_plan:
1858
+ self.last_plan = renderer.last_plan
1859
+ result = renderer.last_result
1860
+ # Stash for post-turn inspection (e.g. /goal reads
1861
+ # interruption_reason to report goal-met vs guardrail-stop).
1862
+ self.last_result = result
1863
+ agent_output = ""
1864
+ if result:
1865
+ self._account_result(result, renderer, prompt)
1866
+ self._pending_slugs = list(
1867
+ result.get("cited_slugs") or []
1868
+ )
1869
+ agent_output = str(result.get("output") or "")
1870
+
1871
+ # Weak-model guard: the "answer" is a tool call leaked as
1872
+ # text (structured tool_calls was empty, so the loop ended
1873
+ # the turn). Nudge ONCE — same session, so the model sees
1874
+ # its own leaked reply — then take whatever comes back.
1875
+ if _looks_like_leaked_tool_call(agent_output):
1876
+ console.print(
1877
+ " [yellow]model emitted a tool call as text — "
1878
+ "nudging it to use the tool interface[/yellow]"
1879
+ )
1880
+ nudge_renderer = StreamRenderer(
1881
+ set_status=set_status,
1882
+ pause_status=pause_status,
1883
+ sandbox=self._sandbox,
1884
+ gate_active=lambda: self._gate_active,
1885
+ )
1886
+ ok = await self._consume_agent_stream(
1887
+ agent if agent is not None else self.agent,
1888
+ _TOOL_LEAK_NUDGE,
1889
+ nudge_renderer,
1890
+ pause_status,
1891
+ )
1892
+ if ok and nudge_renderer.last_result:
1893
+ n = nudge_renderer.last_result
1894
+ # Same accounting as the main path (cost, all token
1895
+ # buckets incl. cache_write, turns, file touches,
1896
+ # compaction high-water) — via the shared helper so
1897
+ # a nudged turn can't under-count or stall compaction.
1898
+ self._account_result(
1899
+ n, nudge_renderer, prompt, extend_files=True
1900
+ )
1901
+ self.last_result = n
1902
+ agent_output = str(n.get("output") or "")
1903
+
1904
+ # Surface framework-level stop-hook exhaustion so the
1905
+ # user knows the cap was hit (and can raise it with
1906
+ # /set_continue_cap N).
1907
+ if result.get("interrupted") and (
1908
+ result.get("interruption_reason")
1909
+ == "stop_hook_iterations_exhausted"
1910
+ ):
1911
+ console.print(
1912
+ "\n [yellow]plan still had work but the agent "
1913
+ f"hit the auto-continue cap "
1914
+ f"({self._auto_continue_limit}) — type "
1915
+ "'continue' to push further, raise the cap "
1916
+ "with /set_continue_cap N, or accept the "
1917
+ "partial result[/yellow]"
1918
+ )
1919
+
1920
+ pause_status()
1921
+ self._compact_exchanges.append((prompt, agent_output))
1922
+
1923
+ # Anti-poison gate: if the turn made ZERO tool calls AND the
1924
+ # output is a bare completion claim ("all issues fixed"),
1925
+ # the episode loomflow just persisted is a hallucination
1926
+ # with no grounding — and a self-reinforcing one (recall
1927
+ # surfaces it → next turn parrots it → new episode → doom
1928
+ # loop). Delete it so it can't poison future recall. We
1929
+ # only nuke the no-tool-call completion-claim case;
1930
+ # legitimate no-tool answers ("what does X mean?") don't
1931
+ # match the completion-claim pattern and are kept.
1932
+ n_tool_calls = len(renderer._call_names)
1933
+ if n_tool_calls == 0 and _looks_like_completion_claim(
1934
+ agent_output
1935
+ ):
1936
+ # Under /isolate the live session writes to the WORKTREE's
1937
+ # .loom/memory.db — target that one, not the main project's.
1938
+ active_root = (self._isolated_project or self.project).root
1939
+ deleted = _delete_last_episode(
1940
+ active_root / LOOM_DIR / "memory.db",
1941
+ session_id=self.session_id,
1942
+ user_id=_USER_ID,
1943
+ )
1944
+ if deleted:
1945
+ console.print(
1946
+ " [dim](skipped persisting an unverified "
1947
+ "'done' claim — no tool calls backed it)[/dim]"
1948
+ )
1949
+
1950
+ # Persist the current session_id to disk so /resume on the
1951
+ # next REPL launch knows what to rehydrate. Done after EVERY
1952
+ # turn (not just on /exit) so a crash doesn't lose the
1953
+ # session pointer. Cheap — one short write to a small file.
1954
+ self._save_session_pointer()
1955
+
1956
+ if self._pending_slugs:
1957
+ console.print(
1958
+ " [dim]if that worked, just continue — or "
1959
+ "/bad if it didn't[/dim]"
1960
+ )
1961
+ # End-of-turn separator: a full-width rule closing THIS
1962
+ # response, right-labelled with the turn's own token usage +
1963
+ # cost, so every answer is cleanly delimited and you can see
1964
+ # what it cost at a glance (not just the cumulative session).
1965
+ self._print_turn_summary()
1966
+
1967
+ # Maybe compact. Done AFTER the turn renders + the
1968
+ # pending-slugs hint prints so the user sees the natural
1969
+ # turn boundary before any compaction status appears.
1970
+ await self._maybe_compact()
1971
+
1972
+ def _print_turn_summary(self) -> None:
1973
+ """Full-width rule + this turn's tokens / cost, right-aligned —
1974
+ the horizontal separator between responses the user asked for.
1975
+ Zero cost renders as ``free`` (free-tier models) instead of a
1976
+ noisy ``$0.0000``."""
1977
+ cost = (
1978
+ "free" if self._turn_cost == 0 else f"${self._turn_cost:.4f}"
1979
+ )
1980
+ stats = f"{self._turn_in:,} in · {self._turn_out:,} out · {cost}"
1981
+ width = console.size.width
1982
+ # rule that ends with the stats: dashes + " stats" flush right.
1983
+ pad = max(0, width - len(stats) - 3)
1984
+ console.print(
1985
+ f"[bright_black]{'─' * pad}[/bright_black] "
1986
+ f"[dim]{self._turn_in:,} in · {self._turn_out:,} out · "
1987
+ f"[green]{cost}[/green][/dim]"
1988
+ )
1989
+
1990
+ # ---- self-improvement attribution -----------------------------------
1991
+
1992
+ async def _attribute_pending(
1993
+ self, *, success: bool, quiet: bool
1994
+ ) -> None:
1995
+ """Flush the pending turn's citations to the workspace,
1996
+ crediting (or debiting) the notes the agent read.
1997
+
1998
+ ``quiet`` suppresses the confirmation line — used for the
1999
+ implicit 'moved-on = success' path so the REPL doesn't
2000
+ chatter on every turn."""
2001
+ # Shared pipeline (loom_code.turn) owns what crediting means;
2002
+ # the REPL only owns the pending state + console feedback.
2003
+ from .turn import attribute_turn
2004
+
2005
+ files = self._pending_files
2006
+ slugs = self._pending_slugs
2007
+ self._pending_files = []
2008
+ self._pending_slugs = []
2009
+ n = await attribute_turn(
2010
+ self.workspace,
2011
+ self.project.root,
2012
+ success=success,
2013
+ slugs=slugs,
2014
+ files=files,
2015
+ user_id=_USER_ID,
2016
+ )
2017
+ if n and not quiet:
2018
+ verb = "credited" if success else "debited"
2019
+ console.print(
2020
+ f" [dim]{verb} {n} note(s) from the last "
2021
+ f"turn[/dim]"
2022
+ )
2023
+
2024
+ # ---- slash commands -------------------------------------------------
2025
+
2026
+ async def _handle_slash(self, line: str) -> bool:
2027
+ """Dispatch a /command. Returns False to exit the REPL."""
2028
+ parts = line.split(maxsplit=1)
2029
+ cmd = parts[0].lower()
2030
+ arg = parts[1].strip() if len(parts) > 1 else ""
2031
+
2032
+ if cmd in ("/exit", "/quit"):
2033
+ return False
2034
+ if cmd == "/help":
2035
+ console.print(_render_help())
2036
+ elif cmd == "/init-loom":
2037
+ from .rules import init_agents_md
2038
+
2039
+ path, created = init_agents_md(self.project.root)
2040
+ if created:
2041
+ console.print(
2042
+ f"[green]created {path.name}[/green] — a starter "
2043
+ "rules file loom-code reads every session. Edit it, "
2044
+ 'or just state rules in chat (e.g. "never edit X") '
2045
+ "and loom-code will save them here."
2046
+ )
2047
+ else:
2048
+ console.print(
2049
+ f"[dim]{path.name} already exists — loom-code "
2050
+ "already reads it. Edit it directly, or state rules "
2051
+ "in chat.[/dim]"
2052
+ )
2053
+ elif cmd == "/plan":
2054
+ if arg:
2055
+ # "/plan <task>" reads as "plan and do <task>" — run
2056
+ # it as a normal task. loom-code plans every task
2057
+ # anyway (living_plan=True), so the plan shows up
2058
+ # mid-stream and `/plan` with no arg replays it.
2059
+ await self._attribute_pending(
2060
+ success=True, quiet=False
2061
+ )
2062
+ await self._inject_loom_context(arg)
2063
+ await self._turn(arg)
2064
+ elif self.last_plan:
2065
+ console.print(Text(self.last_plan, style="dim"))
2066
+ else:
2067
+ console.print(
2068
+ "[dim]no plan yet — give loom-code a task, or "
2069
+ "`/plan <task>` to start one[/dim]"
2070
+ )
2071
+ elif cmd == "/cost":
2072
+ uncached = self.total_in - self.total_cached_in
2073
+ # Cache-hit ratio over total input tokens. The ratio
2074
+ # tells the user whether their prompt-caching investment
2075
+ # is actually paying off — a low ratio means the system
2076
+ # prompt is changing turn-to-turn (cache-bust) or the
2077
+ # provider doesn't expose cache reads.
2078
+ hit_pct = (
2079
+ (self.total_cached_in / self.total_in * 100.0)
2080
+ if self.total_in > 0
2081
+ else 0.0
2082
+ )
2083
+ console.print(
2084
+ Text.assemble(
2085
+ (" session: ", "dim"),
2086
+ (f"{self.turns} turns", ""),
2087
+ (" · ", "dim"),
2088
+ (
2089
+ f"{uncached:,}+{self.total_cached_in:,} in / "
2090
+ f"{self.total_out:,} out",
2091
+ "",
2092
+ ),
2093
+ (" · ", "dim"),
2094
+ (f"${self.total_cost:.4f}", "green"),
2095
+ )
2096
+ )
2097
+ # Second line: cache breakdown. Only render when there's
2098
+ # something to report — keeps the empty-session output
2099
+ # uncluttered. ``cache_write`` only fires on Anthropic
2100
+ # (5m TTL = +25%, 1h = +100%); on OpenAI it stays 0.
2101
+ if self.total_cached_in > 0 or self.total_cache_write > 0:
2102
+ cache_color = (
2103
+ "green" if hit_pct >= 50 else "yellow"
2104
+ if hit_pct >= 20 else "dim"
2105
+ )
2106
+ segments: list[tuple[str, str]] = [
2107
+ (" cache: ", "dim"),
2108
+ (f"{hit_pct:.1f}% hit", cache_color),
2109
+ ]
2110
+ if self.total_cache_write > 0:
2111
+ segments.extend(
2112
+ [
2113
+ (" · ", "dim"),
2114
+ (
2115
+ f"{self.total_cache_write:,} write",
2116
+ "dim",
2117
+ ),
2118
+ ]
2119
+ )
2120
+ console.print(Text.assemble(*segments))
2121
+ # Third line: token-optimisation tier counters. Each
2122
+ # entry only renders when its counter is non-zero —
2123
+ # opted-out features stay invisible. The three counters
2124
+ # map 1:1 to the three opt-in framework knobs in
2125
+ # build_agent (snip_window, auto_compact_at_tokens,
2126
+ # tool_result_summarizer) — seeing zeros across the
2127
+ # board means "the conversation never got large enough
2128
+ # to need any of them," which is a useful diagnostic
2129
+ # signal on its own.
2130
+ opt_segments: list[tuple[str, str]] = []
2131
+ if self.total_snips > 0:
2132
+ opt_segments.append(
2133
+ (f"{self.total_snips} snip", "dim")
2134
+ )
2135
+ if self.total_compacts > 0:
2136
+ if opt_segments:
2137
+ opt_segments.append((" · ", "dim"))
2138
+ opt_segments.append(
2139
+ (f"{self.total_compacts} compact", "dim")
2140
+ )
2141
+ if self.total_summaries > 0:
2142
+ if opt_segments:
2143
+ opt_segments.append((" · ", "dim"))
2144
+ opt_segments.append(
2145
+ (f"{self.total_summaries} tool-summary", "dim")
2146
+ )
2147
+ if opt_segments:
2148
+ console.print(
2149
+ Text.assemble((" optim: ", "dim"), *opt_segments)
2150
+ )
2151
+ elif cmd == "/good":
2152
+ if self._pending_slugs:
2153
+ await self._attribute_pending(
2154
+ success=True, quiet=False
2155
+ )
2156
+ else:
2157
+ console.print(
2158
+ " [dim]nothing pending to rate[/dim]"
2159
+ )
2160
+ elif cmd == "/bad":
2161
+ if self._pending_slugs:
2162
+ await self._attribute_pending(
2163
+ success=False, quiet=False
2164
+ )
2165
+ else:
2166
+ console.print(
2167
+ " [dim]nothing pending to rate[/dim]"
2168
+ )
2169
+ elif cmd == "/model":
2170
+ if not arg:
2171
+ console.print(
2172
+ f" [dim]current model: {self.model}[/dim]"
2173
+ )
2174
+ else:
2175
+ self._switch_model(arg)
2176
+ elif cmd == "/clear":
2177
+ self.session_id = new_id()
2178
+ self.last_plan = None
2179
+ self._compact_tokens = 0
2180
+ self._compact_exchanges.clear()
2181
+ reset_paste_stash()
2182
+ # Fresh start also revokes outside-file edit grants —
2183
+ # they belong to the conversation that named them.
2184
+ from . import consent
2185
+
2186
+ consent.reset()
2187
+ # Move the on-disk pointer to the NEW session so a
2188
+ # later /resume doesn't rewind into the conversation
2189
+ # the user just told us to forget. /clear means "I
2190
+ # want a fresh start," and that should survive a
2191
+ # quit + relaunch.
2192
+ self._save_session_pointer()
2193
+ console.print(
2194
+ " [dim]fresh conversation — prior turns "
2195
+ "forgotten[/dim]"
2196
+ )
2197
+ elif cmd == "/compact":
2198
+ await self._handle_compact()
2199
+ elif cmd == "/compress_token_length":
2200
+ self._handle_compress_command(arg)
2201
+ elif cmd == "/set_model":
2202
+ await self._handle_set_model()
2203
+ elif cmd == "/set_web":
2204
+ await self._handle_set_web()
2205
+ elif cmd == "/resume":
2206
+ await self._handle_resume(arg)
2207
+ elif cmd == "/export":
2208
+ self._handle_export()
2209
+ elif cmd == "/set_continue_cap":
2210
+ self._handle_set_continue_cap(arg)
2211
+ elif cmd == "/effort":
2212
+ self._handle_effort(arg)
2213
+ elif cmd == "/mode":
2214
+ self._handle_mode(arg)
2215
+ elif cmd == "/isolate":
2216
+ self._handle_isolate()
2217
+ elif cmd == "/review":
2218
+ self._handle_review()
2219
+ elif cmd == "/merge":
2220
+ self._handle_merge()
2221
+ elif cmd == "/discard":
2222
+ self._handle_discard()
2223
+ elif cmd == "/mcp":
2224
+ await self._handle_mcp()
2225
+ elif cmd == "/computer":
2226
+ await self._handle_computer(arg)
2227
+ elif cmd == "/goal":
2228
+ await self._handle_goal(arg)
2229
+ else:
2230
+ console.print(
2231
+ f" [yellow]unknown command {cmd}[/yellow] — "
2232
+ "/help for the list"
2233
+ )
2234
+ return True
2235
+
2236
+ async def _handle_mcp(self) -> None:
2237
+ """List the connected MCP servers + their tools.
2238
+
2239
+ Reads the registry stashed on the coordinator by ``build_agent``.
2240
+ Connecting is lazy, so this is the first thing that actually
2241
+ opens the sessions — surfaces a misconfigured server here rather
2242
+ than mid-task."""
2243
+ registry = getattr(self.agent, "_mcp_registry", None)
2244
+ if registry is None:
2245
+ console.print(
2246
+ " [dim]No MCP servers configured. Add an [[mcp]] block "
2247
+ "to .loom/settings.toml (or ~/.loom-code/settings.toml) "
2248
+ "and restart.[/dim]"
2249
+ )
2250
+ return
2251
+ names = registry.server_names
2252
+ console.print(
2253
+ f" [cyan]MCP servers[/cyan] ({len(names)}): "
2254
+ f"{', '.join(names) if names else '—'}"
2255
+ )
2256
+ try:
2257
+ tools = await registry.list_tools() # lazily connects
2258
+ except Exception as exc: # noqa: BLE001 — surface, don't crash
2259
+ console.print(
2260
+ f" [red]failed to list MCP tools:[/red] {exc}"
2261
+ )
2262
+ return
2263
+ if not tools:
2264
+ console.print(" [dim]no tools exposed yet.[/dim]")
2265
+ return
2266
+ console.print(f" [cyan]tools[/cyan] ({len(tools)}):")
2267
+ for t in tools:
2268
+ desc = (t.description or "").strip().splitlines()
2269
+ first = desc[0] if desc else ""
2270
+ console.print(f" [green]{t.name}[/green] [dim]{first}[/dim]")
2271
+
2272
+ def _switch_model(self, model: str) -> None:
2273
+ """Rebuild the agent on a new model. Keeps the project +
2274
+ approval gate; starts a fresh conversation since the new
2275
+ model has no history of the old one. The compactor uses
2276
+ the new model too; ``_compact_threshold`` stays as-is so a
2277
+ user override survives a model switch (auto = -1 just
2278
+ recomputes against the new model on the next check)."""
2279
+ # Expand friendly provider aliases first (``nvidia/nemotron-…``
2280
+ # → ``litellm/nvidia_nim/nvidia/nemotron-…``) so ``/model`` in
2281
+ # the REPL accepts the same short forms as the ``--model`` flag,
2282
+ # and the key prompt / resolver see the canonical string.
2283
+ from .credentials import (
2284
+ normalize_model,
2285
+ quiet_litellm_model_warnings,
2286
+ )
2287
+
2288
+ model = normalize_model(model)
2289
+ quiet_litellm_model_warnings(model)
2290
+ # Ensure we have a key for the NEW model before
2291
+ # constructing — otherwise build_agent crashes inside the
2292
+ # provider SDK on a missing key. ensure_key_for_model
2293
+ # prompts inline + saves so the switch just works.
2294
+ if not ensure_key_for_model(model, console):
2295
+ console.print(
2296
+ " [yellow]model switch cancelled — staying on "
2297
+ f"{self.model}[/yellow]"
2298
+ )
2299
+ return
2300
+ self.model = model
2301
+ self._rebuild_agent()
2302
+ # Persist so this model is the default on the next launch — the
2303
+ # user shouldn't have to re-pick it every time.
2304
+ from .credentials import save_preferred_model
2305
+
2306
+ save_preferred_model(model)
2307
+ console.print(
2308
+ f" [dim]switched to {model} — fresh conversation[/dim]"
2309
+ )
2310
+
2311
+ def _handle_mode(self, arg: str) -> None:
2312
+ """``/mode [default|accept-edits|plan|yolo]`` — set the
2313
+ approval mode for calls no permission rule matches. No arg
2314
+ shows the current mode. Takes effect immediately (the gate
2315
+ object is shared by every agent) — no rebuild needed.
2316
+
2317
+ * ``default`` — ask for every mutation (write/edit/bash).
2318
+ * ``accept-edits`` — auto-allow file edits, still ask for bash.
2319
+ * ``plan`` — read-only: deny all mutation; the agent can
2320
+ research and propose but not touch the tree.
2321
+ * ``yolo`` — allow everything (the irreversible-danger gate
2322
+ still fires). Same risk profile as ``--yes``.
2323
+
2324
+ Explicit ``deny`` rules in settings.toml beat every mode."""
2325
+ from .permissions import Mode, parse_mode
2326
+
2327
+ choice = arg.strip()
2328
+ if not choice:
2329
+ console.print(
2330
+ f" [dim]current mode: {self._gate.mode.value}[/dim] "
2331
+ "[dim](usage: /mode default|accept-edits|plan|yolo)"
2332
+ "[/dim]"
2333
+ )
2334
+ return
2335
+ mode = parse_mode(choice)
2336
+ if mode is None:
2337
+ console.print(
2338
+ f" [yellow]unknown mode {choice!r}[/yellow] — "
2339
+ "use default | accept-edits | plan | yolo"
2340
+ )
2341
+ return
2342
+ self._gate.mode = mode
2343
+ blurb = {
2344
+ Mode.DEFAULT: "asking before every mutation",
2345
+ Mode.ACCEPT_EDITS: (
2346
+ "auto-allowing file edits; bash still asks"
2347
+ ),
2348
+ Mode.PLAN: "read-only — all mutation denied",
2349
+ Mode.YOLO: (
2350
+ "allowing everything (danger gate still fires)"
2351
+ ),
2352
+ }[mode]
2353
+ console.print(
2354
+ f" [dim]mode → [/dim][cyan]{mode.value}[/cyan]"
2355
+ f"[dim] — {blurb}[/dim]"
2356
+ )
2357
+
2358
+ def _handle_effort(self, arg: str) -> None:
2359
+ """``/effort [low|medium|high|off]`` — set the reasoning-effort
2360
+ dial + rebuild. No arg shows the current value. ``off`` (or
2361
+ ``none``/``default``) clears it back to the provider default.
2362
+ Effort only affects reasoning-capable models (Claude extended
2363
+ thinking, OpenAI o-series); it's inert on gpt-4.1/4o."""
2364
+ choice = arg.strip().lower()
2365
+ if not choice:
2366
+ console.print(
2367
+ f" [dim]current effort: "
2368
+ f"{self._effort or 'default'}[/dim] "
2369
+ "[dim](usage: /effort low|medium|high|off)[/dim]"
2370
+ )
2371
+ return
2372
+ if choice in ("off", "none", "default"):
2373
+ new_effort: str | None = None
2374
+ elif choice in ("low", "medium", "high"):
2375
+ new_effort = choice
2376
+ else:
2377
+ console.print(
2378
+ f" [yellow]unknown effort {choice!r}[/yellow] — "
2379
+ "use low | medium | high | off"
2380
+ )
2381
+ return
2382
+ if new_effort == self._effort:
2383
+ console.print(
2384
+ f" [dim]effort already {new_effort or 'default'}[/dim]"
2385
+ )
2386
+ return
2387
+ self._effort = new_effort
2388
+ self._rebuild_agent()
2389
+ console.print(
2390
+ f" [dim]reasoning effort → {new_effort or 'default'} "
2391
+ "— fresh conversation[/dim]"
2392
+ )
2393
+
2394
+ # ---- session worktree isolation -----------------------------------
2395
+
2396
+ def _handle_isolate(self) -> None:
2397
+ """``/isolate`` — run this session in its own git worktree so
2398
+ its edits can't collide with another loom-code session on the
2399
+ same repo (e.g. a second terminal). Rebuilds the agent rooted
2400
+ in the worktree; /merge or /discard finishes."""
2401
+ if self._worktree is not None:
2402
+ console.print(
2403
+ f" [dim]already isolated on "
2404
+ f"{self._worktree.branch}[/dim]"
2405
+ )
2406
+ return
2407
+ if not worktree.is_git_repo(self.project.root):
2408
+ console.print(" [yellow]/isolate needs a git repo[/yellow]")
2409
+ return
2410
+ info, err = worktree.create(self.project.root, self.session_id)
2411
+ if info is None:
2412
+ console.print(f" [red]isolate failed:[/red] {err}")
2413
+ return
2414
+ self._worktree = info
2415
+ self._isolated_project = detect_project(info.path)
2416
+ self._rebuild_agent()
2417
+ console.print(
2418
+ f" [dim]isolated → worktree on [cyan]{info.branch}[/cyan] "
2419
+ f"(base {info.base}). Edits stay here until "
2420
+ "/merge or /discard.[/dim]"
2421
+ )
2422
+
2423
+ def _handle_review(self) -> None:
2424
+ """``/review`` — show this isolated session's diff vs its base
2425
+ branch (read-only)."""
2426
+ if self._worktree is None:
2427
+ console.print(" [dim]not isolated — /isolate first[/dim]")
2428
+ return
2429
+ text, err = worktree.diff(self._worktree)
2430
+ if err:
2431
+ console.print(f" [red]diff failed:[/red] {err}")
2432
+ return
2433
+ if not text.strip():
2434
+ console.print(" [dim]no changes in this session yet[/dim]")
2435
+ return
2436
+ self._print_diff(text)
2437
+
2438
+ def _handle_merge(self) -> None:
2439
+ """``/merge`` — review the session's diff, then commit + merge
2440
+ its branch into base and return to the main tree."""
2441
+ if self._worktree is None:
2442
+ console.print(" [dim]not isolated — nothing to merge[/dim]")
2443
+ return
2444
+ text, _ = worktree.diff(self._worktree)
2445
+ if text.strip():
2446
+ self._print_diff(text)
2447
+ else:
2448
+ console.print(" [dim](no changes to merge)[/dim]")
2449
+ info = self._worktree
2450
+ ok, err = worktree.merge(self.project.root, info)
2451
+ if not ok:
2452
+ console.print(f" [red]merge failed:[/red] {err}")
2453
+ return
2454
+ worktree.remove(self.project.root, info)
2455
+ self._worktree = None
2456
+ self._isolated_project = None
2457
+ self._rebuild_agent()
2458
+ console.print(
2459
+ f" [dim]merged [cyan]{info.branch}[/cyan] → {info.base} "
2460
+ "+ cleaned up — back on the main tree[/dim]"
2461
+ )
2462
+
2463
+ def _handle_discard(self) -> None:
2464
+ """``/discard`` — drop this isolated session's edits + remove
2465
+ the worktree, returning to the main tree."""
2466
+ if self._worktree is None:
2467
+ console.print(" [dim]not isolated — nothing to discard[/dim]")
2468
+ return
2469
+ info = self._worktree
2470
+ worktree.remove(self.project.root, info)
2471
+ self._worktree = None
2472
+ self._isolated_project = None
2473
+ self._rebuild_agent()
2474
+ console.print(
2475
+ f" [dim]discarded [cyan]{info.branch}[/cyan] — back on "
2476
+ "the main tree[/dim]"
2477
+ )
2478
+
2479
+ def _print_diff(self, text: str) -> None:
2480
+ """Print a unified diff with green/red/hunk colours — same
2481
+ vocabulary as the desktop's review modal + edit cards."""
2482
+ for raw in text.splitlines():
2483
+ if raw.startswith(("+++", "---")):
2484
+ style = "dim"
2485
+ elif raw.startswith("@@"):
2486
+ style = "cyan"
2487
+ elif raw.startswith("diff --git") or raw.startswith("index "):
2488
+ style = "bold dim"
2489
+ elif raw.startswith("+"):
2490
+ style = "green"
2491
+ elif raw.startswith("-"):
2492
+ style = "red"
2493
+ else:
2494
+ style = "default"
2495
+ console.print(Text(raw or " ", style=style))
2496
+
2497
+ async def _handle_computer(self, arg: str) -> None:
2498
+ """``/computer [task]`` — turn on COMPUTER OPERATOR mode: the agent
2499
+ gets loom-code's built-in browser engine (page_open/observe/act/
2500
+ check) + media/app tools + files/shell, under an operator prompt.
2501
+ Rebuilds the agent, then (if a task was given) runs it.
2502
+
2503
+ The browser engine is Playwright-based (already installed); a
2504
+ visible Chromium window opens on the first page_open. Operator
2505
+ mode also upgrades to a STRONGER reasoning model (browser
2506
+ comprehension is hard for small models) when its key is
2507
+ available — the coding session's model is restored on exit."""
2508
+ if not self._browser_mode:
2509
+ self._browser_mode = True
2510
+ # Bump to a stronger reasoning model for browser comprehension
2511
+ # (gpt-4.1-mini struggles with dense dynamic pages). Pick the
2512
+ # first candidate whose API key is already set; else keep the
2513
+ # current model. Remember the original to restore later.
2514
+ self._pre_operator_model = self.model
2515
+ strong = self._pick_operator_model()
2516
+ if strong and strong != self.model:
2517
+ self.model = strong
2518
+ self._rebuild_agent()
2519
+ console.print(
2520
+ " [green]✓[/green] computer operator on — driving a visible "
2521
+ f"browser + files/shell/apps, on [cyan]{self.model}[/cyan]. "
2522
+ "[dim]A Chromium window opens on the first web action.[/dim]"
2523
+ )
2524
+ if arg.strip():
2525
+ await self._turn(arg.strip())
2526
+
2527
+ async def _handle_goal(self, arg: str) -> None:
2528
+ """``/goal <task>`` — run until the goal is met.
2529
+
2530
+ The agent works on ``<task>`` and, after each pass, a cheap
2531
+ same-provider checker model judges whether the goal is
2532
+ satisfied; if not, the agent is re-prompted and works again —
2533
+ the run-until-done loop (framework ``run_until=`` / GoalStopHook).
2534
+ Bounded by three guardrails so it can't spin forever: a max
2535
+ re-prompt count, no-progress detection, and a cost cap.
2536
+
2537
+ The task text IS the stop condition. For an explicit split, use
2538
+ ``/goal <task> :: <condition>`` — everything before ``::`` is
2539
+ what to do, everything after is what the checker tests."""
2540
+ arg = arg.strip()
2541
+ if not arg:
2542
+ console.print(
2543
+ " [yellow]usage: /goal <task> — e.g. "
2544
+ "/goal make all tests pass[/yellow]\n"
2545
+ " [dim]optional explicit condition: "
2546
+ "/goal <task> :: <condition>[/dim]"
2547
+ )
2548
+ return
2549
+
2550
+ # Split the optional "task :: condition" form. Default: the task
2551
+ # is also the condition (the framework's str happy-path).
2552
+ if "::" in arg:
2553
+ task, _, condition = arg.partition("::")
2554
+ task, condition = task.strip(), condition.strip()
2555
+ if not task or not condition:
2556
+ console.print(
2557
+ " [yellow]both sides of :: must be non-empty — "
2558
+ "/goal <task> :: <condition>[/yellow]"
2559
+ )
2560
+ return
2561
+ else:
2562
+ task = condition = arg
2563
+
2564
+ # Cheap same-provider checker (Haiku / gpt-4.1-mini); falls back
2565
+ # to the main model inside the framework when no cheap key.
2566
+ checker = self._pick_checker_model()
2567
+ # Guardrails — the research is unanimous these prevent runaway
2568
+ # cost. max_iterations doubles as the loop's hard cap; the
2569
+ # framework caps each re-prompt and bails on no-progress / cost.
2570
+ self._run_until = {
2571
+ "condition": condition,
2572
+ "max_iterations": 20,
2573
+ "max_no_progress": 3,
2574
+ "max_cost_usd": 2.0,
2575
+ }
2576
+ if checker is not None:
2577
+ self._run_until["checker"] = checker
2578
+
2579
+ # The goal loop needs room to re-prompt — loom-code's default
2580
+ # auto-continue cap (2) is far too low for run-until-done. Lift
2581
+ # it for this goal turn; restore after. The GoalStopHook's own
2582
+ # max_iterations is the real bound.
2583
+ saved_cap = self._auto_continue_limit
2584
+ self._auto_continue_limit = max(saved_cap, 20)
2585
+ # keep_session: the goal must run WITH the conversation so far
2586
+ # ("/goal fix the bug we discussed") — rebuilding is only about
2587
+ # arming the run_until hook, not starting over.
2588
+ try:
2589
+ self._rebuild_agent(keep_session=True)
2590
+ except TypeError:
2591
+ # Installed loomflow predates ``run_until=``. Disarm and
2592
+ # rebuild clean so the REPL keeps working — /goal is the
2593
+ # only casualty, not the session.
2594
+ self._run_until = None
2595
+ self._auto_continue_limit = saved_cap
2596
+ self._rebuild_agent(keep_session=True)
2597
+ console.print(
2598
+ " [yellow]/goal needs a newer loomflow than is "
2599
+ "installed (Agent(run_until=) is missing). "
2600
+ "Upgrade loomflow and retry.[/yellow]"
2601
+ )
2602
+ return
2603
+
2604
+ checker_label = checker or f"{self.model} (no cheap checker key)"
2605
+ console.print(
2606
+ f" [green]🎯 goal:[/green] {condition}\n"
2607
+ f" [dim]checker {checker_label} · max 20 passes · "
2608
+ "no-progress 3 · cap $2.00 · Esc to stop[/dim]"
2609
+ )
2610
+
2611
+ try:
2612
+ await self._inject_loom_context(task)
2613
+ await self._turn(task)
2614
+ finally:
2615
+ # Disarm the goal: clear the spec, restore the cap, rebuild
2616
+ # back to a normal coding agent for the next message —
2617
+ # keeping the session so the goal turn stays part of the
2618
+ # conversation history.
2619
+ self._run_until = None
2620
+ self._auto_continue_limit = saved_cap
2621
+ self._rebuild_agent(keep_session=True)
2622
+
2623
+ # Report whether the goal was met or a guardrail stopped it. The
2624
+ # framework sets interruption_reason="run_until:<reason>" on a
2625
+ # guardrail stop; a clean condition_met leaves interrupted False.
2626
+ result = self.last_result
2627
+ reason = (result or {}).get("interruption_reason") or ""
2628
+ if reason.startswith("run_until:"):
2629
+ why = reason.split(":", 1)[1]
2630
+ pretty = {
2631
+ "max_iterations": "hit the 20-pass cap",
2632
+ "no_progress": "stopped making progress",
2633
+ "cost_cap": "hit the $2.00 cost cap",
2634
+ }.get(why, why)
2635
+ console.print(
2636
+ f" [yellow]⚠ goal not confirmed — {pretty}.[/yellow] "
2637
+ "[dim]Review the work above; re-run /goal to continue.[/dim]"
2638
+ )
2639
+ elif reason == "stop_hook_iterations_exhausted":
2640
+ console.print(
2641
+ " [yellow]⚠ goal not confirmed — auto-continue cap "
2642
+ "reached.[/yellow]"
2643
+ )
2644
+ else:
2645
+ console.print(
2646
+ " [green]✓ goal met[/green] — the checker confirmed the "
2647
+ "condition."
2648
+ )
2649
+
2650
+ def _pick_operator_model(self) -> str | None:
2651
+ """Choose a strong reasoning model for operator mode — the first
2652
+ candidate whose API key is already configured (so we never prompt
2653
+ or fail). Returns None to keep the current model if no stronger
2654
+ one is usable."""
2655
+ from .credentials import required_env_for_model
2656
+
2657
+ cur = self.model.lower()
2658
+
2659
+ # If already on a capable model, keep it (don't downgrade).
2660
+ if cur in ("gpt-4.1", "claude-sonnet-4-6", "claude-opus-4-7") \
2661
+ or "opus" in cur:
2662
+ return None
2663
+
2664
+ # Choose the upgrade by which PROVIDER the user is already on, so
2665
+ # we never switch to a provider whose account may be unfunded.
2666
+ # A set key only proves the key exists, NOT that it has credits
2667
+ # (Anthropic 400s "credit balance too low" otherwise) — so we
2668
+ # stay within the current provider's family.
2669
+ if "claude" in cur:
2670
+ target = "claude-sonnet-4-6"
2671
+ else:
2672
+ # OpenAI family (gpt-*, o-series) → the strong OpenAI model.
2673
+ target = "gpt-4.1"
2674
+
2675
+ env = required_env_for_model(target)
2676
+ if env is None or os.environ.get(env):
2677
+ return target
2678
+ return None
2679
+
2680
+ def _pick_checker_model(self) -> str | None:
2681
+ """Choose a CHEAP, fast checker for /goal's run-until loop — the
2682
+ small model in the SAME provider as the current model. The
2683
+ checker runs once per loop pass to judge DONE/NOT_DONE, so it
2684
+ should be cheap; staying in-provider avoids switching to an
2685
+ account that may be unfunded (the funding lesson from operator
2686
+ mode — a set key doesn't prove credits). Returns None to let the
2687
+ framework fall back to the main model when no cheap key is set.
2688
+
2689
+ Delegates to :func:`credentials.cheap_model_for` — the same
2690
+ picker the compactor and tool-result summariser use. One
2691
+ deliberate difference from the original inline logic: local /
2692
+ litellm models now return None (fall back to the main model)
2693
+ instead of silently routing judgements to OpenAI — an
2694
+ Ollama user shouldn't leak session content to a cloud
2695
+ provider just because OPENAI_API_KEY happens to be set."""
2696
+ return cheap_model_for(self.model)
2697
+
2698
+ # ---- adaptive routing (solo fast path) ------------------------------
2699
+
2700
+ async def _route_turn(self, prompt: str) -> str:
2701
+ """Pick ``"solo"`` or ``"team"`` for this turn.
2702
+
2703
+ The supervisor team taxes a one-line fix with a full
2704
+ delegation round-trip (coordinator reads → delegates → coder
2705
+ re-reads), so obviously-small write tasks run on a standalone
2706
+ coder kernel instead. The decision is conservative — every
2707
+ branch that isn't a confident SOLO falls back to the team:
2708
+
2709
+ * /goal armed or operator mode → team (their hooks live on
2710
+ the coordinator).
2711
+ * Question-shaped prompts → team (the read-only coordinator
2712
+ answers those directly — no delegation tax to dodge — and
2713
+ it holds the repo map + notebook tools).
2714
+ * Otherwise the cheap classifier votes; no usable cheap
2715
+ model, classifier error, or anything but a clear SOLO →
2716
+ team. A misroute therefore costs at most the status-quo
2717
+ overhead, never a lost capability.
2718
+ """
2719
+ if self._run_until is not None or self._browser_mode:
2720
+ return "team"
2721
+ if _looks_like_question(prompt):
2722
+ return "team"
2723
+ if _references_prior_context(prompt):
2724
+ # "fix it" / "continue" lean on history the stateless
2725
+ # classifier can't see — the coordinator has it.
2726
+ return "team"
2727
+ return (
2728
+ "solo"
2729
+ if await self._classify_task(prompt) == "SOLO"
2730
+ else "team"
2731
+ )
2732
+
2733
+ async def _classify_task(self, prompt: str) -> str:
2734
+ """One-word SOLO/TEAM vote from the cheap same-provider
2735
+ model (~a hundred tokens, fractions of a cent — repaid many
2736
+ times over when it saves one delegation round-trip).
2737
+
2738
+ litellm-routed models have no cheap sibling
2739
+ (``cheap_model_for`` returns None to avoid crossing
2740
+ providers) — but disabling the classifier there forced EVERY
2741
+ turn onto the heavy TEAM path, the worst deal for exactly the
2742
+ providers with the weakest models. Classify with the model
2743
+ ITSELF instead: the call is ~100 tokens, and on the free
2744
+ tiers this targets (NVIDIA NIM) it costs nothing. Local
2745
+ ollama/echo stay disabled — an extra local call is pure
2746
+ latency with no cost to save."""
2747
+ cheap = cheap_model_for(self.model)
2748
+ if cheap is None:
2749
+ model_str = str(self.model).lower()
2750
+ if model_str.startswith("litellm/"):
2751
+ cheap = self.model # self-classification
2752
+ else:
2753
+ return "TEAM"
2754
+ try:
2755
+ if self._router_agent is None:
2756
+ from loomflow import Agent as _Agent
2757
+
2758
+ self._router_agent = _Agent(
2759
+ _ROUTER_PROMPT, model=cheap, prompt_caching=True
2760
+ )
2761
+ result = await self._router_agent.run(
2762
+ prompt[:2000], user_id=_USER_ID
2763
+ )
2764
+ return (
2765
+ "SOLO" if "SOLO" in result.output.upper() else "TEAM"
2766
+ )
2767
+ except Exception: # noqa: BLE001 — routing must never kill a turn
2768
+ return "TEAM"
2769
+
2770
+ def _get_solo_agent(self) -> Any:
2771
+ """Lazily build the standalone coder for the fast path —
2772
+ shares the team's memory db + notebook so context stays
2773
+ continuous across routes. Invalidated on /model, /set_web,
2774
+ and isolation changes via ``_rebuild_agent``."""
2775
+ if self._solo_agent is None:
2776
+ build_project = self._isolated_project or self.project
2777
+ self._solo_agent = build_solo_agent(
2778
+ build_project,
2779
+ model=self.model,
2780
+ approval_handler=self._gate.handler,
2781
+ web_backend=self._web_backend,
2782
+ effort=self._effort,
2783
+ sandbox=self._sandbox,
2784
+ sandbox_allow_network=self._sandbox_allow_network,
2785
+ extensions=self._extensions,
2786
+ )
2787
+ return self._solo_agent
2788
+
2789
+ def _rebuild_agent(self, *, keep_session: bool = False) -> None:
2790
+ """Reconstruct the supervisor + workers using the current
2791
+ ``self.model`` and ``self._web_backend``. Used by
2792
+ ``/model`` (model change) and ``/set_web`` (backend change).
2793
+ Bundled skills (graphify et al.) are auto-registered
2794
+ inside ``build_agent`` so we don't pass them explicitly
2795
+ here.
2796
+
2797
+ ``keep_session=True`` preserves ``session_id`` (and the
2798
+ compaction accumulators that mirror it) across the rebuild —
2799
+ used by ``/goal``, which rebuilds only to arm/disarm the
2800
+ ``run_until`` hook on the SAME conversation: "/goal fix the
2801
+ bug we discussed" must see the discussion, and the goal
2802
+ turn must stay part of the session history afterwards. The
2803
+ default (fresh session) is right for ``/model`` and
2804
+ ``/set_web``, where history is model-specific."""
2805
+ # When isolated, build rooted at the worktree (its own working
2806
+ # copy + .loom). Extensions stay ``self._extensions`` — they're
2807
+ # the MAIN project's .loom config, which the worktree (being
2808
+ # gitignored) doesn't have a copy of, so an isolated session
2809
+ # would otherwise lose its skills/subagents/hooks.
2810
+ build_project = self._isolated_project or self.project
2811
+ self.agent, self.workspace = build_agent(
2812
+ build_project,
2813
+ model=self.model,
2814
+ approval_handler=self._gate.handler,
2815
+ web_backend=self._web_backend,
2816
+ max_stop_hook_iterations=self._auto_continue_limit,
2817
+ extensions=self._extensions,
2818
+ effort=self._effort,
2819
+ sandbox=self._sandbox,
2820
+ sandbox_allow_network=self._sandbox_allow_network,
2821
+ operator=self._browser_mode,
2822
+ run_until=self._run_until,
2823
+ )
2824
+ # Routing agents are model-derived — drop them so the next
2825
+ # solo route / classification rebuilds on the new config.
2826
+ self._solo_agent = None
2827
+ self._router_agent = None
2828
+ # New agent/memory — forget what blocks we last wrote so the
2829
+ # dirty-check can't wrongly skip the first write.
2830
+ self._block_hashes.clear()
2831
+ self._compactor = Compactor(
2832
+ model=cheap_model_for(self.model) or self.model
2833
+ )
2834
+ if not keep_session:
2835
+ self._compact_tokens = 0
2836
+ self._compact_exchanges.clear()
2837
+ self.session_id = new_id()
2838
+
2839
+ # ---- automatic compaction ------------------------------------------
2840
+
2841
+ def _active_threshold(self) -> int:
2842
+ """Resolve the live threshold:
2843
+
2844
+ * positive int → explicit user override (set via
2845
+ ``/compress_token_length N``)
2846
+ * 0 → user disabled compaction (``... off``)
2847
+ * -1 (sentinel) → recompute from the active model
2848
+ """
2849
+ if self._compact_threshold >= 0:
2850
+ return self._compact_threshold
2851
+ return default_compact_threshold(self.model)
2852
+
2853
+ async def _maybe_compact(self) -> None:
2854
+ """If cumulative tokens have crossed the active threshold,
2855
+ run the compactor, write its summary to the agent's memory
2856
+ as a working block (auto-injected into every subsequent
2857
+ system prompt), and reset the conversation thread."""
2858
+ threshold = self._active_threshold()
2859
+ if threshold == 0:
2860
+ return # explicitly disabled
2861
+ if self._compact_tokens < threshold:
2862
+ return
2863
+ if not self._compact_exchanges:
2864
+ return
2865
+ console.print(
2866
+ f" [dim]compacting {self._compact_tokens:,} tokens of "
2867
+ f"history (threshold {threshold:,})...[/dim]"
2868
+ )
2869
+ await self._compact_now()
2870
+
2871
+ async def _handle_compact(self) -> None:
2872
+ """``/compact`` — force a compaction NOW, regardless of the
2873
+ auto threshold. Useful right before a big new task: fold the
2874
+ session so far into a dense summary and start the next turn
2875
+ on a fresh, cheap thread."""
2876
+ if not self._compact_exchanges:
2877
+ console.print(
2878
+ " [dim]nothing to compact yet — no completed "
2879
+ "turns this session[/dim]"
2880
+ )
2881
+ return
2882
+ console.print(
2883
+ f" [dim]compacting {self._compact_tokens:,} tokens of "
2884
+ f"history (manual)...[/dim]"
2885
+ )
2886
+ await self._compact_now()
2887
+
2888
+ async def _compact_now(self) -> None:
2889
+ """The shared compaction body: summarise, land the summary as
2890
+ a working block, reset the thread. Callers gate on
2891
+ ``_compact_exchanges`` being non-empty and print their own
2892
+ lead-in line."""
2893
+ try:
2894
+ summary = await self._compactor.compact(
2895
+ self._compact_exchanges
2896
+ )
2897
+ except Exception as exc: # noqa: BLE001 — never fatal
2898
+ console.print(
2899
+ f" [yellow]compaction failed: {exc} — continuing "
2900
+ "without it (use /clear if you hit context "
2901
+ "limits)[/yellow]"
2902
+ )
2903
+ return
2904
+
2905
+ if not summary:
2906
+ return
2907
+
2908
+ # Land the summary as a working block. loomflow auto-
2909
+ # injects working blocks into every subsequent system
2910
+ # prompt, so the next turn starts on a fresh session_id
2911
+ # but immediately "remembers" the session via this block.
2912
+ try:
2913
+ await self.agent.memory.update_block(
2914
+ "session_summary", summary, user_id=_USER_ID
2915
+ )
2916
+ except Exception as exc: # noqa: BLE001 — never fatal
2917
+ console.print(
2918
+ f" [yellow]could not write summary to memory: "
2919
+ f"{exc}[/yellow]"
2920
+ )
2921
+ return
2922
+
2923
+ self.session_id = new_id()
2924
+ self._compact_tokens = 0
2925
+ self._compact_exchanges.clear()
2926
+ console.print(
2927
+ f" [dim]compacted into {len(summary)}-char summary "
2928
+ f"in memory; new conversation thread.[/dim]"
2929
+ )
2930
+
2931
+ def _handle_set_continue_cap(self, arg: str) -> None:
2932
+ """``/set_continue_cap [N]`` — view or set the auto-continue cap.
2933
+
2934
+ Bare ``/set_continue_cap`` shows the current value. ``N=0``
2935
+ disables auto-continue entirely (turns become single-shot
2936
+ again — useful when debugging a model's behaviour and you
2937
+ want to see exactly what it does on its own). Otherwise N
2938
+ is the new cap; we clamp at 100 to prevent typos like
2939
+ ``/set_continue_cap 1000`` from costing the user real money.
2940
+ """
2941
+ arg = arg.strip()
2942
+ if not arg:
2943
+ console.print(
2944
+ f" [dim]auto-continue cap: "
2945
+ f"[b]{self._auto_continue_limit}[/b] "
2946
+ f"(default {_AUTO_CONTINUE_LIMIT_DEFAULT}, "
2947
+ "0 disables)[/dim]"
2948
+ )
2949
+ return
2950
+ try:
2951
+ n = int(arg)
2952
+ except ValueError:
2953
+ console.print(
2954
+ " [yellow]usage: /set_continue_cap <N> — N is "
2955
+ "an integer ≥ 0 (0 disables)[/yellow]"
2956
+ )
2957
+ return
2958
+ if n < 0:
2959
+ console.print(
2960
+ " [yellow]cap must be non-negative (use 0 to "
2961
+ "disable auto-continue)[/yellow]"
2962
+ )
2963
+ return
2964
+ if n > 100:
2965
+ console.print(
2966
+ " [yellow]cap clamped to 100 to prevent runaway "
2967
+ "cost on a typo. Use /set_continue_cap 100 if you "
2968
+ "really meant that.[/yellow]"
2969
+ )
2970
+ n = 100
2971
+ old = self._auto_continue_limit
2972
+ self._auto_continue_limit = n
2973
+ # The cap is a construction-time kwarg on loomflow's Agent
2974
+ # (max_stop_hook_iterations). Rebuild so the new value
2975
+ # takes effect; this also resets the conversation, which
2976
+ # matches the rebuild semantics of /model and /set_web.
2977
+ self._rebuild_agent()
2978
+ if n == 0:
2979
+ console.print(
2980
+ f" [dim]auto-continue [b red]disabled[/b red] "
2981
+ f"(was {old}). Multi-step plans now stop after "
2982
+ "their first ReAct exit; type 'continue' to nudge "
2983
+ "manually.[/dim]"
2984
+ )
2985
+ else:
2986
+ console.print(
2987
+ f" [dim]auto-continue cap: [b]{old}[/b] → "
2988
+ f"[b green]{n}[/b green][/dim]"
2989
+ )
2990
+
2991
+ def _handle_compress_command(self, arg: str) -> None:
2992
+ """Dispatch ``/compress_token_length`` — view, set, auto, off."""
2993
+ arg = arg.strip().lower()
2994
+ if not arg:
2995
+ current = self._active_threshold()
2996
+ mode = (
2997
+ "off (disabled)"
2998
+ if self._compact_threshold == 0
2999
+ else (
3000
+ f"user-set ({current:,})"
3001
+ if self._compact_threshold > 0
3002
+ else f"auto ({current:,}, "
3003
+ f"80% of {self.model}'s context window)"
3004
+ )
3005
+ )
3006
+ console.print(
3007
+ f" [dim]compaction threshold: {mode}[/dim]\n"
3008
+ f" [dim]used this session so far: "
3009
+ f"{self._compact_tokens:,} tokens[/dim]"
3010
+ )
3011
+ return
3012
+ if arg == "auto":
3013
+ self._compact_threshold = -1
3014
+ console.print(
3015
+ f" [dim]threshold: auto "
3016
+ f"({self._active_threshold():,})[/dim]"
3017
+ )
3018
+ return
3019
+ if arg == "off":
3020
+ self._compact_threshold = 0
3021
+ console.print(
3022
+ " [dim]auto-compaction disabled[/dim]"
3023
+ )
3024
+ return
3025
+ try:
3026
+ n = int(arg.replace(",", "").replace("_", ""))
3027
+ except ValueError:
3028
+ console.print(
3029
+ " [yellow]usage: /compress_token_length <N> | "
3030
+ "auto | off[/yellow]"
3031
+ )
3032
+ return
3033
+ if n <= 0:
3034
+ console.print(
3035
+ " [yellow]threshold must be positive (use 'off' "
3036
+ "to disable)[/yellow]"
3037
+ )
3038
+ return
3039
+ self._compact_threshold = n
3040
+ console.print(
3041
+ f" [dim]threshold set to {n:,} tokens[/dim]"
3042
+ )
3043
+
3044
+ # ---- /set_model + /set_web (interactive provider setup) ----------
3045
+
3046
+ async def _select_menu(
3047
+ self,
3048
+ title: str,
3049
+ options: list[tuple[str, str]],
3050
+ *,
3051
+ default: int = 0,
3052
+ ) -> str | None:
3053
+ """Arrow-key vertical menu (↑/↓ + Enter, or a number/hotkey) —
3054
+ the same selector the approval prompt uses, for the REPL's
3055
+ pick-a-thing prompts (/set_model, /set_web, model lists, …).
3056
+
3057
+ ``options`` is ``[(key, label), …]``; returns the chosen key,
3058
+ or ``None`` if cancelled (Esc / Ctrl-C). On a non-TTY it falls
3059
+ back to a typed line matched against the keys, so scripted use
3060
+ and tests still work.
3061
+
3062
+ The raw-mode selector runs on a worker thread (its blocking
3063
+ key reads must not stall the event loop), mirroring how the
3064
+ ApprovalGate calls ``_select_option``."""
3065
+ from .approval import _select_option
3066
+
3067
+ console.print()
3068
+ if title:
3069
+ console.print(f" [bold]{title}[/bold]")
3070
+ # ``_select_option`` OWNS the option rendering (it redraws the
3071
+ # numbered list in place on each keypress); we only print the
3072
+ # title above it. A trailing "Cancel" is the safe last option
3073
+ # the selector maps Esc/EOF to — so cancel is distinguishable
3074
+ # from a real pick.
3075
+ menu = [*options, ("\x00cancel", "Cancel")]
3076
+ try:
3077
+ choice = await anyio.to_thread.run_sync(
3078
+ lambda: _select_option(menu, default=default)
3079
+ )
3080
+ except (EOFError, KeyboardInterrupt):
3081
+ choice = "\x00cancel"
3082
+ # ERASE the whole menu (blank line + title + every option row)
3083
+ # so a subsequent menu REPLACES this one in place rather than
3084
+ # stacking below it. 1 blank + (title?1:0) + len(menu) rows.
3085
+ if sys.stdout.isatty():
3086
+ n = 1 + (1 if title else 0) + len(menu)
3087
+ sys.stdout.write(f"\x1b[{n}F\x1b[0J")
3088
+ sys.stdout.flush()
3089
+ if choice == "\x00cancel":
3090
+ return None
3091
+ return choice
3092
+
3093
+ async def _prompt_line(self, message: str) -> str | None:
3094
+ """Read one line from the user with a fresh PromptSession.
3095
+
3096
+ We deliberately do NOT reuse ``self._prompt_session`` here.
3097
+ prompt_toolkit's PromptSession holds state on its instance
3098
+ (``is_password``, completers, key bindings) and even though
3099
+ ``prompt_async`` is supposed to save/restore per-call,
3100
+ empirically the redact-mode leaked into the next main-loop
3101
+ prompt after the secret prompt returned. A throwaway
3102
+ session per inline question keeps the main REPL's session
3103
+ pristine.
3104
+
3105
+ Returns ``None`` on EOF / Ctrl-C so callers can treat the
3106
+ cancel path uniformly."""
3107
+ try:
3108
+ return (
3109
+ await PromptSession().prompt_async(message)
3110
+ ).strip()
3111
+ except (EOFError, KeyboardInterrupt):
3112
+ return None
3113
+
3114
+ async def _prompt_secret(self, message: str) -> str | None:
3115
+ """Same as ``_prompt_line`` but hides the input —
3116
+ ``is_password=True`` makes prompt_toolkit redact keystrokes
3117
+ (no terminal echo, no shell history). Same fresh-session
3118
+ rationale as ``_prompt_line`` — and ESPECIALLY important
3119
+ here, because this is the prompt whose state was leaking
3120
+ back into the main REPL."""
3121
+ try:
3122
+ return (
3123
+ await PromptSession().prompt_async(
3124
+ message, is_password=True
3125
+ )
3126
+ ).strip()
3127
+ except (EOFError, KeyboardInterrupt):
3128
+ return None
3129
+
3130
+ async def _handle_set_model(self) -> None:
3131
+ """``/set_model`` — pick a provider, ensure its API key (ask
3132
+ for it FIRST if missing), then pick a specific model from that
3133
+ provider's list. Key-before-models so you can't choose a model
3134
+ you can't authenticate.
3135
+
3136
+ Two-level navigation: cancelling the MODEL menu returns to the
3137
+ PROVIDER menu (the loop's ``continue``); only cancelling the
3138
+ provider menu itself exits ``/set_model`` entirely."""
3139
+ while True:
3140
+ choice = await self._select_menu(
3141
+ "Pick a model provider:",
3142
+ [
3143
+ ("1", "OpenAI (gpt-4.1, gpt-5.1, o4-mini, …)"),
3144
+ ("2", "Anthropic (claude-opus-4-8, sonnet-4-6, …)"),
3145
+ ("3", "NVIDIA (Nemotron — free at build.nvidia.com)"),
3146
+ ("4", "Other (Groq / Together / any litellm)"),
3147
+ ],
3148
+ )
3149
+ if choice is None:
3150
+ # Only a provider-menu cancel exits the command.
3151
+ return
3152
+ if choice == "4":
3153
+ await self._set_model_other()
3154
+ return
3155
+
3156
+ provider = {
3157
+ "1": ("OpenAI", "OPENAI_API_KEY", self._OPENAI_MODELS),
3158
+ "2": (
3159
+ "Anthropic",
3160
+ "ANTHROPIC_API_KEY",
3161
+ self._ANTHROPIC_MODELS,
3162
+ ),
3163
+ "3": ("NVIDIA", "NVIDIA_NIM_API_KEY", self._NVIDIA_MODELS),
3164
+ }[choice]
3165
+ label, env_name, models = provider
3166
+
3167
+ # KEY FIRST — ask for the provider's key before showing
3168
+ # models, so a user without a key sets it up rather than
3169
+ # picking a model that then fails to authenticate. A cancel
3170
+ # here returns to the provider menu (not a full exit).
3171
+ if not await self._ensure_provider_key(label, env_name):
3172
+ continue
3173
+
3174
+ # Then pick a specific model. NVIDIA uses its own picker
3175
+ # (its custom-input routes a bare ``nvidia/x`` through
3176
+ # litellm/nim); OpenAI/Anthropic use the generic list.
3177
+ if choice == "3":
3178
+ target_model = await self._pick_nvidia_model()
3179
+ else:
3180
+ target_model = await self._pick_from_models(label, models)
3181
+ if target_model is None:
3182
+ # Back out to the provider menu instead of exiting.
3183
+ continue
3184
+ console.print(f" [dim]switching to {target_model}[/dim]")
3185
+ self._switch_model(target_model)
3186
+ return
3187
+
3188
+ async def _ensure_provider_key(
3189
+ self, label: str, env_name: str
3190
+ ) -> bool:
3191
+ """Make sure ``env_name`` is set, prompting + saving it if not.
3192
+ Returns True to proceed, False if the user cancelled. Shown
3193
+ BEFORE the model list in /set_model (key-first flow).
3194
+
3195
+ Silent on the common path: if the key is already set we just
3196
+ proceed to the model list. (Printing "already set" here echoed
3197
+ once per provider re-visit when backing out of the model menu —
3198
+ the stack of "using it" lines the user saw.)"""
3199
+ if os.environ.get(env_name):
3200
+ return True
3201
+ from .credentials import signup_url_for
3202
+
3203
+ console.print(
3204
+ f" [yellow]No {env_name} set.[/yellow] "
3205
+ f"loom-code needs it to use {label}."
3206
+ )
3207
+ console.print(
3208
+ f" Get one at [dim]{signup_url_for(env_name)}[/dim]"
3209
+ )
3210
+ key = await self._prompt_secret(f" Paste your {env_name}: ")
3211
+ if not key:
3212
+ console.print(" [yellow]no key entered — aborting[/yellow]")
3213
+ return False
3214
+ save_credential(env_name, key)
3215
+ os.environ[env_name] = key
3216
+ console.print(
3217
+ f" [green]✓[/green] saved {env_name} "
3218
+ "(future sessions pick it up automatically)"
3219
+ )
3220
+ return True
3221
+
3222
+ async def _pick_from_models(
3223
+ self, label: str, models: list[tuple[str, str, str]]
3224
+ ) -> str | None:
3225
+ """Arrow-key menu over a provider's (name, model_id, note)
3226
+ list, plus a 'type your own' escape. Returns the chosen model
3227
+ string, or None if cancelled."""
3228
+ options = [
3229
+ (str(i), f"{name:24} {note}")
3230
+ for i, (name, _mid, note) in enumerate(models, 1)
3231
+ ]
3232
+ options.append(("custom", "Type a different model id…"))
3233
+ choice = await self._select_menu(
3234
+ f"{label} models:", options
3235
+ )
3236
+ if choice is None:
3237
+ return None
3238
+ if choice.isdigit():
3239
+ return models[int(choice) - 1][1]
3240
+ ans = await self._prompt_line(" Model id: ")
3241
+ if not ans:
3242
+ return None
3243
+ from .credentials import normalize_model
3244
+
3245
+ return normalize_model(ans)
3246
+
3247
+ # Curated model lists per provider — (label, model_id, note). The
3248
+ # /set_model flow shows these as a sub-menu after the key is set,
3249
+ # so the user picks a SPECIFIC model, not just a provider default.
3250
+ # A "type your own" escape covers anything not listed.
3251
+ _OPENAI_MODELS: list[tuple[str, str, str]] = [
3252
+ ("gpt-4.1-mini", "gpt-4.1-mini", "fast + cheap, solid tools"),
3253
+ ("gpt-4.1", "gpt-4.1", "stronger general coding"),
3254
+ ("gpt-5.1", "gpt-5.1", "flagship reasoning (if you have access)"),
3255
+ ("o4-mini", "o4-mini", "reasoning, cost-efficient"),
3256
+ ]
3257
+ _ANTHROPIC_MODELS: list[tuple[str, str, str]] = [
3258
+ ("claude-haiku-4-5", "claude-haiku-4-5", "fast + cheap"),
3259
+ (
3260
+ "claude-sonnet-4-6",
3261
+ "claude-sonnet-4-6",
3262
+ "best speed/intelligence balance",
3263
+ ),
3264
+ (
3265
+ "claude-opus-4-8",
3266
+ "claude-opus-4-8",
3267
+ "most capable — top tool-use + agentic",
3268
+ ),
3269
+ ("claude-opus-4-7", "claude-opus-4-7", "previous-gen Opus"),
3270
+ ]
3271
+
3272
+ # NVIDIA NIM models, ordered small→large. loom-code is tool-heavy
3273
+ # (delegate/write/edit/bash), so models with solid OpenAI-format
3274
+ # function calling do far better — those are marked. IDs verified
3275
+ # against NVIDIA's live /v1/models catalog.
3276
+ _NVIDIA_MODELS: list[tuple[str, str, str]] = [
3277
+ (
3278
+ "nemotron-nano-9b-v2",
3279
+ "litellm/nvidia_nim/nvidia/nvidia-nemotron-nano-9b-v2",
3280
+ "small/fast — weak at multi-step tool use",
3281
+ ),
3282
+ (
3283
+ "nemotron-super-49b-v1.5",
3284
+ "litellm/nvidia_nim/nvidia/"
3285
+ "llama-3.3-nemotron-super-49b-v1.5",
3286
+ "stronger, function-calling — better for real tasks",
3287
+ ),
3288
+ (
3289
+ "llama-3.3-70b",
3290
+ "litellm/nvidia_nim/meta/llama-3.3-70b-instruct",
3291
+ "general 70B, function-calling",
3292
+ ),
3293
+ (
3294
+ "deepseek-v4-pro",
3295
+ "litellm/nvidia_nim/deepseek-ai/deepseek-v4-pro",
3296
+ "strong code/reasoning (MoE)",
3297
+ ),
3298
+ ]
3299
+
3300
+ async def _pick_nvidia_model(self) -> str | None:
3301
+ """Arrow-key menu of common NVIDIA NIM models + a "type your
3302
+ own" escape. Returns the chosen litellm model string, or None
3303
+ if cancelled."""
3304
+ from .credentials import normalize_model
3305
+
3306
+ options = [
3307
+ (str(i), f"{name:22} {note}")
3308
+ for i, (name, _model, note) in enumerate(
3309
+ self._NVIDIA_MODELS, 1
3310
+ )
3311
+ ]
3312
+ options.append(("custom", "Type a different model id…"))
3313
+ choice = await self._select_menu(
3314
+ "NVIDIA models (free at build.nvidia.com):", options
3315
+ )
3316
+ if choice is None:
3317
+ return None
3318
+ if choice.isdigit():
3319
+ return self._NVIDIA_MODELS[int(choice) - 1][1]
3320
+ # "custom" → free-type an id (bare vendor id → routed via NIM,
3321
+ # explicit litellm string kept as-is).
3322
+ ans = await self._prompt_line(
3323
+ " Model id (e.g. nvidia/nemotron-…): "
3324
+ )
3325
+ if not ans:
3326
+ return None
3327
+ if ans.lower().startswith("litellm/"):
3328
+ return ans
3329
+ if "/" in ans:
3330
+ return f"litellm/nvidia_nim/{ans}"
3331
+ return normalize_model(f"nvidia/{ans}")
3332
+
3333
+ async def _set_model_other(self) -> None:
3334
+ """``/set_model`` → Other — the fully generic path for ANY
3335
+ provider loomflow can route through LiteLLM (Groq, Together,
3336
+ DeepSeek, a custom OpenAI-compatible proxy, ...).
3337
+
3338
+ The user types a model string; :func:`normalize_model` in
3339
+ ``_switch_model`` expands a known alias, and
3340
+ ``ensure_key_for_model`` prompts for the right key (with a
3341
+ signup link) when the provider is in the registry. Providers
3342
+ added via ``~/.loom-code/settings.toml`` ``[[provider]]`` blocks
3343
+ work here too, with no code change."""
3344
+ from .credentials import litellm_providers
3345
+
3346
+ console.print()
3347
+ known = ", ".join(sorted(litellm_providers()))
3348
+ console.print(
3349
+ " [dim]Enter any model string loom-code can route, e.g.:[/dim]"
3350
+ )
3351
+ console.print(
3352
+ " [cyan]groq/llama-3.3-70b-versatile[/cyan] "
3353
+ "[dim](short alias)[/dim]"
3354
+ )
3355
+ console.print(
3356
+ " [cyan]litellm/deepseek/deepseek-chat[/cyan] "
3357
+ "[dim](explicit litellm form)[/dim]"
3358
+ )
3359
+ console.print(f" [dim]known providers: {known}[/dim]")
3360
+ console.print(
3361
+ " [dim]add more in ~/.loom-code/settings.toml "
3362
+ "([[provider]] blocks)[/dim]"
3363
+ )
3364
+ model = await self._prompt_line(" Model: ")
3365
+ if not model:
3366
+ console.print(" [dim]cancelled[/dim]")
3367
+ return
3368
+ # _switch_model normalizes the alias + prompts for the key.
3369
+ self._switch_model(model)
3370
+
3371
+ async def _handle_set_web(self) -> None:
3372
+ """``/set_web`` — pick a web-search backend (or disable).
3373
+ Serper prompts for the API key on first use; DuckDuckGo
3374
+ needs nothing. Rebuilds the agent so the new tool wiring
3375
+ takes effect on the next turn."""
3376
+ choice = await self._select_menu(
3377
+ "Web search backend:",
3378
+ [
3379
+ ("1", "Serper (Google, best quality, needs API key)"),
3380
+ ("2", "DuckDuckGo (free, no key, lower quality)"),
3381
+ ("3", "Off (disable web search)"),
3382
+ ],
3383
+ )
3384
+ if choice is None:
3385
+ return
3386
+
3387
+ if choice == "1":
3388
+ # Serper needs SERPER_API_KEY. Prompt if missing,
3389
+ # save it so future sessions pick it up.
3390
+ if not os.environ.get("SERPER_API_KEY"):
3391
+ console.print(
3392
+ " [dim]Get a key at "
3393
+ "https://serper.dev "
3394
+ "(2,500 lifetime free searches).[/dim]"
3395
+ )
3396
+ key = await self._prompt_secret(
3397
+ " Paste your SERPER_API_KEY: "
3398
+ )
3399
+ if not key:
3400
+ console.print(
3401
+ " [yellow]no key entered — "
3402
+ "aborting[/yellow]"
3403
+ )
3404
+ return
3405
+ save_credential("SERPER_API_KEY", key)
3406
+ os.environ["SERPER_API_KEY"] = key
3407
+ console.print(
3408
+ " [green]✓[/green] saved SERPER_API_KEY"
3409
+ )
3410
+ self._web_backend = "serper"
3411
+ elif choice == "2":
3412
+ self._web_backend = "duckduckgo"
3413
+ elif choice == "3":
3414
+ self._web_backend = None
3415
+ else:
3416
+ console.print(
3417
+ f" [yellow]invalid choice {choice!r} — "
3418
+ "enter 1, 2, or 3[/yellow]"
3419
+ )
3420
+ return
3421
+
3422
+ self._rebuild_agent()
3423
+ state = self._web_backend or "off"
3424
+ console.print(
3425
+ f" [dim]web search: {state} — "
3426
+ "fresh conversation[/dim]"
3427
+ )
3428
+
3429
+ # ---- /resume --------------------------------------------------------
3430
+
3431
+ def _session_pointer_path(self) -> Path:
3432
+ """Where we stash the last-used session_id for this project.
3433
+
3434
+ Lives under ``.loom/`` (same dir loom-code already uses for
3435
+ per-project state — notebook, memory db, repo map).
3436
+ One file per project, single line: the session_id ULID.
3437
+ """
3438
+ return self.project.root / ".loom" / "last_session.txt"
3439
+
3440
+ def _save_session_pointer(self) -> None:
3441
+ """Write the current ``session_id`` to the project's
3442
+ ``.loom/last_session.txt``. Best-effort — a write failure
3443
+ is logged once but never blocks a turn (the file is a
3444
+ convenience; the agent's actual memory keys off
3445
+ ``session_id`` in loomflow's Memory which we don't touch
3446
+ here).
3447
+
3448
+ Also appends one record per NEW session_id to
3449
+ ``.loom/sessions.jsonl`` — the history behind ``/resume pick``
3450
+ and ``--resume``. One line per session (first turn only), with
3451
+ a first-prompt hint so the picker is legible."""
3452
+ try:
3453
+ p = self._session_pointer_path()
3454
+ p.parent.mkdir(exist_ok=True)
3455
+ p.write_text(self.session_id + "\n", encoding="utf-8")
3456
+ if self.session_id != self._recorded_session_id:
3457
+ import datetime as _dt
3458
+
3459
+ record = {
3460
+ "session_id": self.session_id,
3461
+ "ts": _dt.datetime.now(_dt.UTC).isoformat(
3462
+ timespec="seconds"
3463
+ ),
3464
+ "hint": (self._last_prompt or "")[:80],
3465
+ "model": str(self.model),
3466
+ }
3467
+ with (p.parent / "sessions.jsonl").open(
3468
+ "a", encoding="utf-8"
3469
+ ) as fh:
3470
+ fh.write(json.dumps(record) + "\n")
3471
+ self._recorded_session_id = self.session_id
3472
+ except OSError:
3473
+ # Silent failure: a read-only filesystem or perms
3474
+ # issue would otherwise spam the chat with the same
3475
+ # warning every turn.
3476
+ pass
3477
+
3478
+ def _recent_sessions(self, limit: int = 10) -> list[dict[str, Any]]:
3479
+ """Recent sessions from ``.loom/sessions.jsonl``, newest first,
3480
+ current session excluded. Lenient on malformed lines."""
3481
+ path = self.project.root / LOOM_DIR / "sessions.jsonl"
3482
+ out: list[dict[str, Any]] = []
3483
+ try:
3484
+ lines = path.read_text(encoding="utf-8").splitlines()
3485
+ except OSError:
3486
+ return out
3487
+ for raw in reversed(lines):
3488
+ try:
3489
+ rec = json.loads(raw)
3490
+ except ValueError:
3491
+ continue
3492
+ sid = rec.get("session_id")
3493
+ if not sid or sid == self.session_id:
3494
+ continue
3495
+ if any(s["session_id"] == sid for s in out):
3496
+ continue
3497
+ out.append(rec)
3498
+ if len(out) >= limit:
3499
+ break
3500
+ return out
3501
+
3502
+ async def _pick_session(self) -> str | None:
3503
+ """Numbered menu over recent sessions; returns the chosen
3504
+ session_id or None on cancel / nothing to show."""
3505
+ sessions = self._recent_sessions()
3506
+ if not sessions:
3507
+ console.print(
3508
+ " [yellow]no other recorded sessions for this "
3509
+ "project.[/yellow]"
3510
+ )
3511
+ return None
3512
+ console.print()
3513
+ console.print(" [bold]Recent sessions[/bold]")
3514
+ for i, s in enumerate(sessions, 1):
3515
+ ts = str(s.get("ts", ""))[:16].replace("T", " ")
3516
+ hint = s.get("hint") or "(no prompt recorded)"
3517
+ console.print(
3518
+ f" [cyan]{i}[/cyan]. [dim]{ts}[/dim] "
3519
+ f"{s['session_id'][:8]}… {hint}"
3520
+ )
3521
+ ans = await self._prompt_line(" Resume which? (number): ")
3522
+ if not ans or not ans.isdigit():
3523
+ console.print(" [dim]cancelled[/dim]")
3524
+ return None
3525
+ idx = int(ans) - 1
3526
+ if not 0 <= idx < len(sessions):
3527
+ console.print(f" [yellow]no option {ans}[/yellow]")
3528
+ return None
3529
+ return str(sessions[idx]["session_id"])
3530
+
3531
+ def _load_session_pointer(self) -> str | None:
3532
+ """Read the last saved session_id for this project, or
3533
+ ``None`` if no prior session has been recorded yet (first
3534
+ run on this project)."""
3535
+ try:
3536
+ p = self._session_pointer_path()
3537
+ if not p.exists():
3538
+ return None
3539
+ value = p.read_text(encoding="utf-8").strip()
3540
+ return value or None
3541
+ except OSError:
3542
+ return None
3543
+
3544
+ def _handle_export(self) -> None:
3545
+ """``/export`` — write this session's turns to a markdown file
3546
+ under ``.loom/exports/`` and print the path. Uses the same
3547
+ (prompt, response) pairs the compactor sees, so it covers the
3548
+ current conversation thread."""
3549
+ if not self._compact_exchanges:
3550
+ console.print(
3551
+ " [dim]nothing to export yet — no completed turns "
3552
+ "this session[/dim]"
3553
+ )
3554
+ return
3555
+ import datetime as _dt
3556
+
3557
+ ts = _dt.datetime.now().strftime("%Y%m%d-%H%M%S")
3558
+ out_dir = self.project.root / LOOM_DIR / "exports"
3559
+ out_path = out_dir / f"session-{ts}.md"
3560
+ lines = [
3561
+ "# loom-code session",
3562
+ "",
3563
+ f"- project: `{self.project.root}`",
3564
+ f"- model: `{self.model}`",
3565
+ f"- session: `{self.session_id}`",
3566
+ f"- exported: {ts}",
3567
+ "",
3568
+ ]
3569
+ for i, (prompt, reply) in enumerate(
3570
+ self._compact_exchanges, 1
3571
+ ):
3572
+ lines += [
3573
+ f"## Turn {i}",
3574
+ "",
3575
+ f"**user:** {prompt}",
3576
+ "",
3577
+ f"**loom-code:** {reply}",
3578
+ "",
3579
+ ]
3580
+ try:
3581
+ out_dir.mkdir(parents=True, exist_ok=True)
3582
+ out_path.write_text("\n".join(lines), encoding="utf-8")
3583
+ except OSError as exc:
3584
+ console.print(f" [red]export failed: {exc}[/red]")
3585
+ return
3586
+ console.print(
3587
+ f" [green]✓[/green] exported "
3588
+ f"{len(self._compact_exchanges)} turn(s) → "
3589
+ f"[cyan]{out_path.relative_to(self.project.root)}[/cyan]"
3590
+ )
3591
+
3592
+ async def _handle_resume(self, arg: str = "") -> None:
3593
+ """``/resume`` — pick up a prior session on this project.
3594
+
3595
+ * no arg → the LAST session (the 95% case, unchanged).
3596
+ * ``pick`` / ``list`` → numbered menu of recent sessions
3597
+ (from ``.loom/sessions.jsonl``), choose one.
3598
+ * a session-id prefix → resume that session directly.
3599
+
3600
+ loomflow's Memory keys episodes by ``(user_id, session_id)``;
3601
+ when the agent's next ``run()`` reuses the same session_id,
3602
+ loomflow rehydrates the prior turns into the conversation
3603
+ context for free. We don't need to do any rehydration here
3604
+ — just swap the id and let loomflow do its thing.
3605
+
3606
+ Edge case: the saved session_id might be from a /clear
3607
+ boundary (i.e. the user explicitly told us to forget) or
3608
+ from a different model. We don't try to guard against
3609
+ either — /resume is a deliberate gesture the user owns.
3610
+ """
3611
+ arg = arg.strip()
3612
+ prior: str | None
3613
+ if arg in ("pick", "list"):
3614
+ prior = await self._pick_session()
3615
+ if prior is None:
3616
+ return
3617
+ elif arg:
3618
+ # A session-id prefix.
3619
+ matches = [
3620
+ s["session_id"]
3621
+ for s in self._recent_sessions()
3622
+ if s["session_id"].startswith(arg)
3623
+ ]
3624
+ if not matches:
3625
+ console.print(
3626
+ f" [yellow]no recorded session starts with "
3627
+ f"{arg!r} — try /resume pick[/yellow]"
3628
+ )
3629
+ return
3630
+ prior = matches[0]
3631
+ else:
3632
+ prior = self._load_session_pointer()
3633
+ if prior is None:
3634
+ console.print(
3635
+ " [yellow]no prior session recorded for this "
3636
+ "project — nothing to resume.[/yellow]"
3637
+ )
3638
+ console.print(
3639
+ " [dim](sessions are saved per project after each "
3640
+ "turn — your first task here starts a fresh one.)"
3641
+ "[/dim]"
3642
+ )
3643
+ return
3644
+ if prior == self.session_id:
3645
+ console.print(
3646
+ " [dim]you're already on the latest session "
3647
+ f"({prior[:8]}…) — nothing to resume.[/dim]"
3648
+ )
3649
+ return
3650
+ # Swap. Reset the compaction state so we don't blend the
3651
+ # newly-resumed session with whatever happened in this
3652
+ # REPL launch before /resume was called.
3653
+ old = self.session_id
3654
+ self.session_id = prior
3655
+ self._compact_tokens = 0
3656
+ self._compact_exchanges.clear()
3657
+
3658
+ # Legacy data migration — loom-code pre-0.10.18 ran the
3659
+ # Router in ``per_route`` mode, so episodes were stored
3660
+ # under ``{prior}__route_simple`` / ``{prior}__route_complex``,
3661
+ # NOT under ``prior`` itself. Post-upgrade we run
3662
+ # ``conversation_scope='shared'`` which keys rehydration on
3663
+ # ``prior`` — so a /resume to a pre-upgrade session loses
3664
+ # all context unless we migrate.
3665
+ #
3666
+ # One-shot UPDATE in the sqlite db (loom-code hardcodes the
3667
+ # sqlite backend). Idempotent — a post-upgrade session has
3668
+ # nothing under the derived names. Episode_tool_transcripts
3669
+ # cascades via episode_id, so no separate migration needed.
3670
+ migrated = _migrate_legacy_per_route_episodes(
3671
+ self.project.root / LOOM_DIR / "memory.db", prior
3672
+ )
3673
+ if migrated:
3674
+ console.print(
3675
+ f" [dim]migrated {migrated} legacy per-route "
3676
+ "episode(s) into the shared session for "
3677
+ "rehydration[/dim]"
3678
+ )
3679
+ # Repair transcripts written by loomflow < 0.10.30, whose
3680
+ # capture window leaked rehydrated prose (a prior answer +
3681
+ # the run's own prompt) into the tool transcript — that
3682
+ # prose gets spliced back by session_messages and shows up
3683
+ # as duplicated/misaligned turns in rehydration AND the
3684
+ # preview below. Idempotent; silent when nothing to fix.
3685
+ scrubbed = _scrub_prose_from_tool_transcripts(
3686
+ self.project.root / LOOM_DIR / "memory.db", prior
3687
+ )
3688
+ if scrubbed:
3689
+ console.print(
3690
+ f" [dim]scrubbed {scrubbed} leaked prose message(s) "
3691
+ "from stored tool transcripts[/dim]"
3692
+ )
3693
+
3694
+ console.print(
3695
+ f" [green]✓[/green] resumed session [cyan]{prior[:8]}…"
3696
+ f"[/cyan] (was on {old[:8]}…)"
3697
+ )
3698
+ console.print(
3699
+ " [dim]loomflow will rehydrate prior turns from "
3700
+ "memory on your next task.[/dim]"
3701
+ )
3702
+
3703
+ # Surface the last N turns of the resumed session so the
3704
+ # user has visual context of WHAT they're resuming. Without
3705
+ # this, /resume is invisible — user has no way to confirm
3706
+ # the rehydration actually picked up real content vs an
3707
+ # empty session id, and no way to catch a wrong-session
3708
+ # mistake before they type the next prompt.
3709
+ await self._render_resumed_history_preview(prior)
3710
+
3711
+ async def _render_resumed_history_preview(
3712
+ self, session_id: str
3713
+ ) -> None:
3714
+ """Fetch + render the last 5 turn groups from the resumed
3715
+ session so the user sees what they're inheriting. Silently
3716
+ no-ops when the memory backend doesn't expose
3717
+ ``session_messages`` (some custom backends don't) or the
3718
+ session is empty."""
3719
+ try:
3720
+ messages = await self.agent._memory.session_messages(
3721
+ session_id, user_id=_USER_ID, limit=100
3722
+ )
3723
+ except (AttributeError, TypeError):
3724
+ return
3725
+ if not messages:
3726
+ return
3727
+ turn_groups = _group_messages_into_turns(messages)
3728
+ if not turn_groups:
3729
+ return
3730
+ raw_count = len(turn_groups)
3731
+ # Collapse consecutive identical (user, assistant) pairs
3732
+ # into one row with a repeat count — without this, runs of
3733
+ # "user typed the same thing twice" or "stop-hook re-fired
3734
+ # the same prompt" produce visual noise in the preview.
3735
+ collapsed = _collapse_consecutive_duplicate_turns(
3736
+ turn_groups
3737
+ )
3738
+ recent = collapsed[-5:]
3739
+ skipped = raw_count - sum(r[3] for r in recent)
3740
+ console.print()
3741
+ title = (
3742
+ f"history (last {len(recent)} of {raw_count} "
3743
+ "turns — agent sees the full set)"
3744
+ )
3745
+ rule = "─" * max(0, 64 - len(title) - 4)
3746
+ console.print(f" [dim]── {title} {rule}[/dim]")
3747
+ for user_prompt, assistant_text, n_tool_calls, repeats in recent:
3748
+ console.print()
3749
+ u = _truncate_one_line(user_prompt, 140)
3750
+ repeat_tag = f" [dim](×{repeats})[/dim]" if repeats > 1 else ""
3751
+ console.print(
3752
+ f" [bold]user:[/bold] {u}{repeat_tag}"
3753
+ )
3754
+ a = _truncate_one_line(assistant_text, 200)
3755
+ if a:
3756
+ console.print(f" [dim]loom:[/dim] {a}")
3757
+ else:
3758
+ console.print(
3759
+ " [dim]loom: (no text response)[/dim]"
3760
+ )
3761
+ if n_tool_calls:
3762
+ console.print(
3763
+ f" [dim]({n_tool_calls} tool call"
3764
+ f"{'s' if n_tool_calls != 1 else ''})[/dim]"
3765
+ )
3766
+ console.print(f" [dim]{'─' * 68}[/dim]")
3767
+ if skipped > 0:
3768
+ console.print(
3769
+ f" [dim]+ {skipped} earlier turn(s) recovered "
3770
+ "(visible to the agent, not shown here)[/dim]"
3771
+ )
3772
+
3773
+
3774
+ def _truncate_one_line(text: str, max_chars: int) -> str:
3775
+ """Collapse to one line + cap length. For the /resume history
3776
+ preview where multi-line messages would blow the layout."""
3777
+ if not text:
3778
+ return ""
3779
+ first = text.replace("\r", " ").strip()
3780
+ # Collapse all whitespace runs to a single space so multi-line
3781
+ # responses fit on one line cleanly.
3782
+ first = " ".join(first.split())
3783
+ if len(first) <= max_chars:
3784
+ return first
3785
+ return first[: max_chars - 1].rstrip() + "…"
3786
+
3787
+
3788
+ def _collapse_consecutive_duplicate_turns(
3789
+ groups: list[tuple[str, str, int]],
3790
+ ) -> list[tuple[str, str, int, int]]:
3791
+ """Collapse runs of consecutive identical
3792
+ ``(user_prompt, assistant_text)`` turn groups into one entry
3793
+ annotated with a repeat count.
3794
+
3795
+ Used by the /resume history preview to dedupe the visual when
3796
+ the user (or a prior framework version's stop-hook re-prompt)
3797
+ persisted the same exchange multiple times in a row. Three
3798
+ consecutive identical groups collapse to one ``(user, asst,
3799
+ n_tool, repeats=3)`` row; non-consecutive duplicates are kept
3800
+ as separate rows (different points in the conversation should
3801
+ show separately even if identical).
3802
+
3803
+ ``n_tool`` from the FIRST occurrence is preserved — the
3804
+ assumption being that all collapsed copies had the same
3805
+ tool-call shape (they had identical assistant text, so
3806
+ almost certainly identical tools).
3807
+ """
3808
+ if not groups:
3809
+ return []
3810
+ out: list[tuple[str, str, int, int]] = []
3811
+ cur_user, cur_asst, cur_tools = groups[0]
3812
+ repeats = 1
3813
+ for user, asst, tools in groups[1:]:
3814
+ if user == cur_user and asst == cur_asst:
3815
+ repeats += 1
3816
+ else:
3817
+ out.append((cur_user, cur_asst, cur_tools, repeats))
3818
+ cur_user, cur_asst, cur_tools = user, asst, tools
3819
+ repeats = 1
3820
+ out.append((cur_user, cur_asst, cur_tools, repeats))
3821
+ return out
3822
+
3823
+
3824
+ def _group_messages_into_turns(
3825
+ messages: list[Any],
3826
+ ) -> list[tuple[str, str, int]]:
3827
+ """Walk a rehydrated message list and group it into the
3828
+ natural ``(user_prompt, assistant_text, n_tool_calls)`` shape
3829
+ used by the /resume preview.
3830
+
3831
+ Each USER message starts a new turn group; ASSISTANT messages
3832
+ contribute their text content + tool_call count to the
3833
+ currently-open group; TOOL result messages are folded into the
3834
+ current group's tool-call count too (they're the other half of
3835
+ a tool_call pair). SYSTEM messages are ignored — they're
3836
+ framework context, not conversation.
3837
+
3838
+ Returns groups in source order (oldest first). Empty list for
3839
+ a message stream with no USER turns.
3840
+ """
3841
+ groups: list[tuple[str, str, int]] = []
3842
+ cur_user: str | None = None
3843
+ cur_assistant: list[str] = []
3844
+ cur_tool_calls = 0
3845
+ for m in messages:
3846
+ role = getattr(m, "role", None)
3847
+ # Role enum values are lowercase strings: 'user', 'assistant',
3848
+ # 'tool', 'system'. Some custom backends may pass plain strings.
3849
+ role_s = str(role).lower().split(".")[-1]
3850
+ content = str(getattr(m, "content", "") or "")
3851
+ if role_s == "user":
3852
+ # Close the previous group if any.
3853
+ if cur_user is not None:
3854
+ groups.append((
3855
+ cur_user,
3856
+ " ".join(cur_assistant).strip(),
3857
+ cur_tool_calls,
3858
+ ))
3859
+ cur_user = content
3860
+ cur_assistant = []
3861
+ cur_tool_calls = 0
3862
+ elif role_s == "assistant":
3863
+ if content:
3864
+ cur_assistant.append(content)
3865
+ tool_calls = getattr(m, "tool_calls", None) or ()
3866
+ cur_tool_calls += len(tool_calls)
3867
+ elif role_s == "tool":
3868
+ # Tool result — counts as part of the open group's
3869
+ # tool activity. We don't double-count vs the
3870
+ # assistant's tool_calls list (which counted CALLS);
3871
+ # the tool message is the RESULT of one of those.
3872
+ # Skipping it avoids 2x-ing the displayed count.
3873
+ pass
3874
+ # SYSTEM messages: drop, not user-facing.
3875
+ # Close the final group.
3876
+ if cur_user is not None:
3877
+ groups.append((
3878
+ cur_user,
3879
+ " ".join(cur_assistant).strip(),
3880
+ cur_tool_calls,
3881
+ ))
3882
+ return groups
3883
+
3884
+
3885
+ # Phrases a hallucinated "I'm done" turn uses. Matched against the
3886
+ # agent's output when the turn made ZERO tool calls. Deliberately
3887
+ # narrow — we want completion CLAIMS, not legitimate no-tool
3888
+ # answers ("here's what X means"). Each pattern is "verb of
3889
+ # completion + object of work".
3890
+ _COMPLETION_CLAIM_RE = re.compile(
3891
+ r"\b("
3892
+ r"all (the )?(detected |previously )?(issues|problems|"
3893
+ r"bugs|fixes)\b.{0,40}\b(fixed|addressed|resolved|done)"
3894
+ r"|already been fixed"
3895
+ r"|have been fixed"
3896
+ r"|were fixed"
3897
+ r"|no (remaining |outstanding )?(issues|problems|blockers)"
3898
+ r")\b",
3899
+ re.IGNORECASE | re.DOTALL,
3900
+ )
3901
+
3902
+
3903
+ def _looks_like_completion_claim(text: str) -> bool:
3904
+ """True if ``text`` reads like "I finished the work" — used to
3905
+ detect hallucinated completion claims on zero-tool-call turns.
3906
+ Narrow on purpose: a normal answer that happens to say 'fixed'
3907
+ once shouldn't trip it, but 'all the detected issues have been
3908
+ fixed' should."""
3909
+ if not text:
3910
+ return False
3911
+ return _COMPLETION_CLAIM_RE.search(text) is not None
3912
+
3913
+
3914
+ def _delete_last_episode(
3915
+ db_path: Path, *, session_id: str, user_id: str
3916
+ ) -> bool:
3917
+ """Delete the most-recently-persisted episode for
3918
+ ``(user_id, session_id)``. Used by the anti-poison gate to
3919
+ remove a just-written no-tool-call completion claim before it
3920
+ pollutes recall.
3921
+
3922
+ Direct sqlite (loom-code hardcodes the sqlite backend) because
3923
+ the Memory protocol's ``forget`` is coarse (by user/session/
3924
+ time, not 'the single most-recent row'). Returns True if a row
3925
+ was deleted. Best-effort — swallows errors so a gate failure
3926
+ never breaks the turn.
3927
+ """
3928
+ if not db_path.is_file():
3929
+ return False
3930
+ try:
3931
+ import sqlite3
3932
+ with sqlite3.connect(str(db_path)) as conn:
3933
+ cur = conn.cursor()
3934
+ # Find the most-recent episode id for this scope, then
3935
+ # delete by id (episode_tool_transcripts cascades via
3936
+ # the episode_id FK).
3937
+ cur.execute(
3938
+ "SELECT id FROM episodes "
3939
+ "WHERE user_id = ? AND session_id = ? "
3940
+ "ORDER BY occurred_at DESC LIMIT 1",
3941
+ (user_id, session_id),
3942
+ )
3943
+ row = cur.fetchone()
3944
+ if row is None:
3945
+ return False
3946
+ cur.execute(
3947
+ "DELETE FROM episodes WHERE id = ?", (row[0],)
3948
+ )
3949
+ conn.commit()
3950
+ return (cur.rowcount or 0) > 0
3951
+ except (sqlite3.Error, OSError):
3952
+ return False
3953
+
3954
+
3955
+ def _migrate_legacy_per_route_episodes(
3956
+ db_path: Path, parent_session_id: str
3957
+ ) -> int:
3958
+ """Re-key any legacy per-route episodes into the parent
3959
+ session_id so ``conversation_scope='shared'`` rehydration sees
3960
+ them.
3961
+
3962
+ Pre-0.10.18 loom-code ran the Router in default ``per_route``
3963
+ mode, persisting episodes under ``{parent}__route_simple`` and
3964
+ ``{parent}__route_complex``. The new shared-mode lookup keys on
3965
+ ``parent`` alone, so /resume'd pre-upgrade sessions had no
3966
+ visible history. This UPDATE rewrites the session_id column for
3967
+ any matching legacy rows. Idempotent — re-running on a
3968
+ post-upgrade session is a no-op.
3969
+
3970
+ Returns the number of rows migrated. Silently no-ops when the
3971
+ db file is absent or unreadable — failure here must NEVER
3972
+ block /resume.
3973
+
3974
+ Why direct sqlite (not via the Memory protocol): the Memory
3975
+ protocol exposes ``remember(Episode)`` and ``session_messages``
3976
+ but no primitive for ``rekey-session``. Adding one to the
3977
+ framework just to satisfy this one-shot loom-code migration
3978
+ isn't worth the surface. We know the backend is sqlite (the
3979
+ REPL hardcodes it) and the column name is stable.
3980
+ """
3981
+ if not db_path.is_file():
3982
+ return 0
3983
+ legacy_simple = f"{parent_session_id}__route_simple"
3984
+ legacy_complex = f"{parent_session_id}__route_complex"
3985
+ try:
3986
+ import sqlite3
3987
+ with sqlite3.connect(str(db_path)) as conn:
3988
+ cur = conn.cursor()
3989
+ cur.execute(
3990
+ "UPDATE episodes SET session_id = ? "
3991
+ "WHERE session_id IN (?, ?)",
3992
+ (parent_session_id, legacy_simple, legacy_complex),
3993
+ )
3994
+ migrated = cur.rowcount or 0
3995
+ conn.commit()
3996
+ return int(migrated)
3997
+ except (sqlite3.Error, OSError):
3998
+ return 0
3999
+
4000
+
4001
+ def _scrub_prose_from_tool_transcripts(
4002
+ db_path: Path, session_id: str
4003
+ ) -> int:
4004
+ """Delete NON-tool messages from a session's persisted tool
4005
+ transcripts.
4006
+
4007
+ loomflow < 0.10.30 built the transcript by EXCLUDING {system,
4008
+ first USER, last ASSISTANT-text} and keeping the rest — on a
4009
+ resumed session the "first USER" was a prior turn's rehydrated
4010
+ prompt, so a prior answer + the run's own input leaked into the
4011
+ transcript. ``session_messages`` splices the transcript between
4012
+ input/output, so consumers (rehydration AND the /resume preview)
4013
+ saw duplicated, misaligned turns.
4014
+
4015
+ A transcript row is legitimate iff it is tool work:
4016
+ ``role == "tool"`` or ``role == "assistant"`` with a non-empty
4017
+ ``tool_calls``. Everything else is leaked prose — delete it.
4018
+ Idempotent; returns rows deleted. Same direct-sqlite rationale
4019
+ as :func:`_migrate_legacy_per_route_episodes`, and failure here
4020
+ must NEVER block /resume.
4021
+ """
4022
+ if not db_path.is_file():
4023
+ return 0
4024
+ try:
4025
+ import sqlite3
4026
+ with sqlite3.connect(str(db_path)) as conn:
4027
+ cur = conn.cursor()
4028
+ rows = cur.execute(
4029
+ "SELECT t.episode_id, t.sequence, t.message_json "
4030
+ "FROM episode_tool_transcripts t "
4031
+ "JOIN episodes e ON e.id = t.episode_id "
4032
+ "WHERE e.session_id = ?",
4033
+ (session_id,),
4034
+ ).fetchall()
4035
+ doomed: list[tuple[str, int]] = []
4036
+ for episode_id, sequence, message_json in rows:
4037
+ try:
4038
+ msg = json.loads(message_json)
4039
+ except (json.JSONDecodeError, ValueError):
4040
+ continue # unparseable — leave it alone
4041
+ role = str(msg.get("role", "")).lower()
4042
+ is_tool_work = role == "tool" or (
4043
+ role == "assistant" and msg.get("tool_calls")
4044
+ )
4045
+ if not is_tool_work:
4046
+ doomed.append((episode_id, sequence))
4047
+ for episode_id, sequence in doomed:
4048
+ cur.execute(
4049
+ "DELETE FROM episode_tool_transcripts "
4050
+ "WHERE episode_id = ? AND sequence = ?",
4051
+ (episode_id, sequence),
4052
+ )
4053
+ conn.commit()
4054
+ return len(doomed)
4055
+ except (sqlite3.Error, OSError):
4056
+ return 0
4057
+
4058
+
4059
+ async def run_repl(
4060
+ project: Project,
4061
+ model: str,
4062
+ *,
4063
+ sandbox: bool = False,
4064
+ sandbox_allow_network: bool = False,
4065
+ resume: str | None = None,
4066
+ ) -> int:
4067
+ """Entry point for the interactive REPL — construct the Repl and
4068
+ run its loop until the user exits.
4069
+
4070
+ ``resume`` maps the CLI flags onto the /resume machinery before
4071
+ the first prompt: ``"last"`` (--continue) rejoins the most recent
4072
+ session, ``"pick"`` (--resume) shows the session picker."""
4073
+ repl = Repl(
4074
+ project,
4075
+ model,
4076
+ sandbox=sandbox,
4077
+ sandbox_allow_network=sandbox_allow_network,
4078
+ startup_resume=resume,
4079
+ )
4080
+ return await repl.run()