loom-code 0.1.3__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loom_code-0.1.3 → loom_code-0.2.0}/PKG-INFO +1 -1
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/__init__.py +1 -1
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/agent.py +12 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/approval.py +1 -1
- loom_code-0.2.0/loom_code/background.py +226 -0
- loom_code-0.2.0/loom_code/context_report.py +129 -0
- loom_code-0.2.0/loom_code/diagnostics.py +155 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/extensions.py +108 -1
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/hooks.py +37 -0
- loom_code-0.2.0/loom_code/loop_guard.py +151 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/permissions.py +4 -3
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/repl.py +610 -26
- loom_code-0.2.0/loom_code/verify_gate.py +146 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/workers.py +6 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code.egg-info/PKG-INFO +1 -1
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code.egg-info/SOURCES.txt +13 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/pyproject.toml +2 -2
- loom_code-0.2.0/tests/test_background.py +115 -0
- loom_code-0.2.0/tests/test_context_observability.py +118 -0
- loom_code-0.2.0/tests/test_custom_commands.py +106 -0
- loom_code-0.2.0/tests/test_diagnostics.py +126 -0
- loom_code-0.2.0/tests/test_goal_state.py +78 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_loom_hooks.py +61 -0
- loom_code-0.2.0/tests/test_loop_guard.py +143 -0
- loom_code-0.2.0/tests/test_session_tree.py +154 -0
- loom_code-0.2.0/tests/test_verify_gate.py +147 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/LICENSE +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/README.md +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/_post_commit.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/browse/__init__.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/browse/act.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/browse/observe.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/browse/session.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/browse/verify.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/checkpoint.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/cli.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/code_index.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/compact.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/consent.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/credentials.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/edit_tool.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/file_history.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/file_tools.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/git_hook.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/grep_tool.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/loominit/__init__.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/loominit/_ast_walk.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/loominit/_files.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/loominit/_graph.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/loominit/_resolve.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/loominit/_tests_map.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/loominit/extractor.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/loominit/repomap.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/loominit/schema.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/lsp_tools.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/mcp_host.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/operator.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/paste.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/paths.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/project.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/prompts.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/render.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/rules.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/sandboxed_bash.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/scribe.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/skills/__init__.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/skills/graphify/SKILL.md +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/skills/graphify/tools.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/trust.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/turn.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/web_fetch.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code/worktree.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code.egg-info/dependency_links.txt +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code.egg-info/entry_points.txt +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code.egg-info/requires.txt +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/loom_code.egg-info/top_level.txt +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/setup.cfg +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_agent.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_antipoison_gate.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_approval.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_approval_danger.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_approval_integration.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_checkpoint.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_code_index.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_compact.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_credentials.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_edit_tool.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_extensions.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_file_boundary.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_file_history.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_git_hook.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_graphify_file_discovery.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_graphify_query_tiers.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_graphify_wiring.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_grep_tool.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_learned_notes.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_lsp_tools.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_mcp.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_paste.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_permissions.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_pricing.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_project.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_prompts.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_render.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_repl_guards.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_resume_migration.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_resume_preview.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_routing.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_rules.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_sandboxed_bash.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_stream_liveness.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_turn_economy.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_web_fetch.py +0 -0
- {loom_code-0.1.3 → loom_code-0.2.0}/tests/test_workers.py +0 -0
|
@@ -442,10 +442,18 @@ def build_agent(
|
|
|
442
442
|
# coordinator only reads, but a user PreToolUse hook can match
|
|
443
443
|
# ``read``/``grep`` too, so keep it in the set; the workers (which
|
|
444
444
|
# write + run bash) are the main target. No-op when none declared.
|
|
445
|
+
# The loop guard (doom-loop + missing-binary steering) and the
|
|
446
|
+
# post-edit diagnostics rider (per-file syntax/lint findings
|
|
447
|
+
# appended to edit results) ride the same registries — native
|
|
448
|
+
# post hooks, not shell specs.
|
|
449
|
+
from . import diagnostics, loop_guard
|
|
450
|
+
|
|
445
451
|
for tool_agent in (coordinator, *workers.values()):
|
|
446
452
|
attach_tool_hooks(
|
|
447
453
|
tool_agent, extensions.hook_specs, cwd=project.root
|
|
448
454
|
)
|
|
455
|
+
loop_guard.attach(tool_agent)
|
|
456
|
+
diagnostics.attach(tool_agent, project.root)
|
|
449
457
|
|
|
450
458
|
# Stash the MCP registry on the coordinator so the REPL / sidecar can
|
|
451
459
|
# tear it down (``await coordinator._mcp_registry.aclose()``) on exit
|
|
@@ -534,6 +542,10 @@ def build_solo_agent(
|
|
|
534
542
|
from loomflow.tools import web_tool
|
|
535
543
|
agent.add_tool(web_tool(backend=web_backend)) # type: ignore[arg-type]
|
|
536
544
|
attach_tool_hooks(agent, extensions.hook_specs, cwd=project.root)
|
|
545
|
+
from . import diagnostics, loop_guard
|
|
546
|
+
|
|
547
|
+
loop_guard.attach(agent)
|
|
548
|
+
diagnostics.attach(agent, project.root)
|
|
537
549
|
return agent
|
|
538
550
|
|
|
539
551
|
|
|
@@ -60,7 +60,7 @@ def _is_danger_command(tool: str, args: dict[str, Any]) -> str | None:
|
|
|
60
60
|
past the check. False positives are acceptable here: an extra
|
|
61
61
|
confirmation on a benign ``git reset --hard`` to a known-safe ref
|
|
62
62
|
costs one keypress; a missed ``rm -rf .git`` costs the repo."""
|
|
63
|
-
if tool
|
|
63
|
+
if tool not in ("bash", "bash_background"):
|
|
64
64
|
return None
|
|
65
65
|
cmd = str(args.get("command", "")).lower()
|
|
66
66
|
norm = " ".join(cmd.split())
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Background bash — long-running processes that don't block the turn.
|
|
2
|
+
|
|
3
|
+
The capability gap vs Claude Code (background subagents/shells by
|
|
4
|
+
default) and opencode (non-blocking subagents): loom-code's ``bash``
|
|
5
|
+
blocks until the command exits, so a dev server, a watcher, or a long
|
|
6
|
+
test run wedges the whole turn. These tools let the agent start a
|
|
7
|
+
process, keep working, and check on it later:
|
|
8
|
+
|
|
9
|
+
* ``bash_background(command)`` → spawns detached, returns a handle
|
|
10
|
+
(``bg1``, ``bg2``, …). Runs arbitrary code, so it is
|
|
11
|
+
``destructive=True`` and rides the SAME approval gate + allow/ask/
|
|
12
|
+
deny rules as ``bash`` (``permissions.call_target`` maps it to the
|
|
13
|
+
command string, and the irreversible-danger scan applies).
|
|
14
|
+
* ``bash_output(handle)`` → status + the output tail. Read-only.
|
|
15
|
+
* ``bash_kill(handle)`` → terminate the process group. Read-only
|
|
16
|
+
gate-wise (it only stops what the agent itself started).
|
|
17
|
+
|
|
18
|
+
Registry is module-level (the ``consent.py`` pattern): tools are
|
|
19
|
+
built at agent construction, the REPL kills leftovers on exit, and
|
|
20
|
+
``atexit`` backstops a crash so no orphan dev-servers outlive the
|
|
21
|
+
session. Output goes to a spool file per process — bounded tail
|
|
22
|
+
reads, no pipe-buffer deadlocks.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import atexit
|
|
28
|
+
import os
|
|
29
|
+
import signal
|
|
30
|
+
import subprocess
|
|
31
|
+
import tempfile
|
|
32
|
+
import time
|
|
33
|
+
from dataclasses import dataclass, field
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Any
|
|
36
|
+
|
|
37
|
+
from loomflow import tool
|
|
38
|
+
|
|
39
|
+
_TAIL_CHARS = 4_000
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class _Proc:
|
|
44
|
+
handle: str
|
|
45
|
+
command: str
|
|
46
|
+
popen: subprocess.Popen[bytes]
|
|
47
|
+
spool_path: Path
|
|
48
|
+
started_at: float = field(default_factory=time.monotonic)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
_procs: dict[str, _Proc] = {}
|
|
52
|
+
_counter = 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def reset() -> None:
|
|
56
|
+
"""Kill everything and clear the registry (tests + /clear)."""
|
|
57
|
+
kill_all()
|
|
58
|
+
_procs.clear()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _spawn(command: str, cwd: Path) -> _Proc:
|
|
62
|
+
global _counter
|
|
63
|
+
_counter += 1
|
|
64
|
+
handle = f"bg{_counter}"
|
|
65
|
+
fd, spool = tempfile.mkstemp(prefix=f"loom-{handle}-", suffix=".log")
|
|
66
|
+
spool_file = os.fdopen(fd, "wb")
|
|
67
|
+
# New process GROUP so bash_kill can terminate the whole tree
|
|
68
|
+
# (a dev server forks children; killing just the shell leaks
|
|
69
|
+
# them). start_new_session works on POSIX; on Windows we fall
|
|
70
|
+
# back to CREATE_NEW_PROCESS_GROUP.
|
|
71
|
+
kwargs: dict[str, Any] = {}
|
|
72
|
+
if os.name == "posix":
|
|
73
|
+
kwargs["start_new_session"] = True
|
|
74
|
+
else: # pragma: no cover - windows
|
|
75
|
+
kwargs["creationflags"] = getattr(
|
|
76
|
+
subprocess, "CREATE_NEW_PROCESS_GROUP", 0
|
|
77
|
+
)
|
|
78
|
+
popen = subprocess.Popen( # noqa: S602 - the whole point
|
|
79
|
+
command,
|
|
80
|
+
shell=True,
|
|
81
|
+
cwd=str(cwd),
|
|
82
|
+
stdout=spool_file,
|
|
83
|
+
stderr=subprocess.STDOUT,
|
|
84
|
+
stdin=subprocess.DEVNULL,
|
|
85
|
+
**kwargs,
|
|
86
|
+
)
|
|
87
|
+
spool_file.close() # child holds its own fd; parent reads by path
|
|
88
|
+
proc = _Proc(
|
|
89
|
+
handle=handle,
|
|
90
|
+
command=command,
|
|
91
|
+
popen=popen,
|
|
92
|
+
spool_path=Path(spool),
|
|
93
|
+
)
|
|
94
|
+
_procs[handle] = proc
|
|
95
|
+
return proc
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _tail(proc: _Proc) -> str:
|
|
99
|
+
try:
|
|
100
|
+
data = proc.spool_path.read_bytes()
|
|
101
|
+
except OSError:
|
|
102
|
+
return ""
|
|
103
|
+
text = data.decode("utf-8", "replace")
|
|
104
|
+
if len(text) > _TAIL_CHARS:
|
|
105
|
+
text = f"…(earlier output trimmed)\n{text[-_TAIL_CHARS:]}"
|
|
106
|
+
return text
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _status_line(proc: _Proc) -> str:
|
|
110
|
+
rc = proc.popen.poll()
|
|
111
|
+
elapsed = time.monotonic() - proc.started_at
|
|
112
|
+
if rc is None:
|
|
113
|
+
return (
|
|
114
|
+
f"{proc.handle}: RUNNING ({elapsed:.0f}s) — {proc.command}"
|
|
115
|
+
)
|
|
116
|
+
return (
|
|
117
|
+
f"{proc.handle}: EXITED rc={rc} after {elapsed:.0f}s — "
|
|
118
|
+
f"{proc.command}"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _kill(proc: _Proc) -> str:
|
|
123
|
+
rc = proc.popen.poll()
|
|
124
|
+
if rc is not None:
|
|
125
|
+
return f"{proc.handle} already exited (rc={rc})"
|
|
126
|
+
try:
|
|
127
|
+
if os.name == "posix":
|
|
128
|
+
os.killpg(os.getpgid(proc.popen.pid), signal.SIGTERM)
|
|
129
|
+
else: # pragma: no cover - windows
|
|
130
|
+
proc.popen.terminate()
|
|
131
|
+
except (ProcessLookupError, PermissionError, OSError):
|
|
132
|
+
proc.popen.terminate()
|
|
133
|
+
try:
|
|
134
|
+
proc.popen.wait(timeout=3)
|
|
135
|
+
except subprocess.TimeoutExpired:
|
|
136
|
+
try:
|
|
137
|
+
if os.name == "posix":
|
|
138
|
+
os.killpg(os.getpgid(proc.popen.pid), signal.SIGKILL)
|
|
139
|
+
else: # pragma: no cover - windows
|
|
140
|
+
proc.popen.kill()
|
|
141
|
+
except (ProcessLookupError, PermissionError, OSError):
|
|
142
|
+
proc.popen.kill()
|
|
143
|
+
return f"{proc.handle} terminated"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def kill_all() -> int:
|
|
147
|
+
"""Terminate every live background process. Returns how many were
|
|
148
|
+
still running. Called on REPL exit + atexit."""
|
|
149
|
+
n = 0
|
|
150
|
+
for proc in list(_procs.values()):
|
|
151
|
+
if proc.popen.poll() is None:
|
|
152
|
+
_kill(proc)
|
|
153
|
+
n += 1
|
|
154
|
+
return n
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
atexit.register(kill_all)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def background_tools(workdir: Path | str) -> list[Any]:
|
|
161
|
+
"""The three background-process tools, rooted at ``workdir``."""
|
|
162
|
+
root = Path(workdir)
|
|
163
|
+
|
|
164
|
+
async def bash_background(command: str) -> str:
|
|
165
|
+
command = str(command).strip()
|
|
166
|
+
if not command:
|
|
167
|
+
return "ERROR: empty command"
|
|
168
|
+
proc = _spawn(command, root)
|
|
169
|
+
return (
|
|
170
|
+
f"started {proc.handle} (pid {proc.popen.pid}): "
|
|
171
|
+
f"{command}\nCheck it with bash_output(handle="
|
|
172
|
+
f"'{proc.handle}'); stop it with bash_kill(handle="
|
|
173
|
+
f"'{proc.handle}'). Keep working while it runs."
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
async def bash_output(handle: str) -> str:
|
|
177
|
+
proc = _procs.get(str(handle).strip())
|
|
178
|
+
if proc is None:
|
|
179
|
+
live = ", ".join(sorted(_procs)) or "none"
|
|
180
|
+
return f"ERROR: unknown handle {handle!r} (live: {live})"
|
|
181
|
+
tail = _tail(proc)
|
|
182
|
+
body = tail if tail.strip() else "(no output yet)"
|
|
183
|
+
return f"{_status_line(proc)}\n---\n{body}"
|
|
184
|
+
|
|
185
|
+
async def bash_kill(handle: str) -> str:
|
|
186
|
+
proc = _procs.get(str(handle).strip())
|
|
187
|
+
if proc is None:
|
|
188
|
+
live = ", ".join(sorted(_procs)) or "none"
|
|
189
|
+
return f"ERROR: unknown handle {handle!r} (live: {live})"
|
|
190
|
+
msg = _kill(proc)
|
|
191
|
+
tail = _tail(proc)
|
|
192
|
+
if tail.strip():
|
|
193
|
+
msg += f"\nfinal output tail:\n{tail[-800:]}"
|
|
194
|
+
return msg
|
|
195
|
+
|
|
196
|
+
return [
|
|
197
|
+
tool(
|
|
198
|
+
name="bash_background",
|
|
199
|
+
description=(
|
|
200
|
+
"Run a shell command in the BACKGROUND and return a "
|
|
201
|
+
"handle immediately — for dev servers, watchers, "
|
|
202
|
+
"long builds/test runs you want to keep working "
|
|
203
|
+
"past. Check progress with bash_output(handle); "
|
|
204
|
+
"stop with bash_kill(handle). Use plain bash for "
|
|
205
|
+
"anything under ~30s."
|
|
206
|
+
),
|
|
207
|
+
# Runs arbitrary code — same safety contract as bash:
|
|
208
|
+
# approval gate + allow/ask/deny rules + danger scan.
|
|
209
|
+
destructive=True,
|
|
210
|
+
)(bash_background),
|
|
211
|
+
tool(
|
|
212
|
+
name="bash_output",
|
|
213
|
+
description=(
|
|
214
|
+
"Status + output tail of a background process "
|
|
215
|
+
"started with bash_background. Args: handle "
|
|
216
|
+
"(e.g. 'bg1')."
|
|
217
|
+
),
|
|
218
|
+
)(bash_output),
|
|
219
|
+
tool(
|
|
220
|
+
name="bash_kill",
|
|
221
|
+
description=(
|
|
222
|
+
"Terminate a background process (whole process "
|
|
223
|
+
"group) started with bash_background. Args: handle."
|
|
224
|
+
),
|
|
225
|
+
)(bash_kill),
|
|
226
|
+
]
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Context observability — the renderers behind ``/context`` and the
|
|
2
|
+
per-turn ``ctx%`` readout.
|
|
3
|
+
|
|
4
|
+
The loudest 2026 harness complaint (pi's author's core critique,
|
|
5
|
+
Anthropic's own Claude Code postmortems, Kilo's whole positioning) is
|
|
6
|
+
INVISIBLE context: harnesses inject content the user never sees,
|
|
7
|
+
compact silently, and change defaults without telling anyone.
|
|
8
|
+
loom-code's answer is to show everything — on demand (``/context``,
|
|
9
|
+
``/prompt``) and ambiently (a ``N% ctx`` figure on every turn's
|
|
10
|
+
summary line).
|
|
11
|
+
|
|
12
|
+
Pure functions only (no console, no agent) so the rendering is
|
|
13
|
+
trivially testable; ``repl.py`` gathers the live numbers and prints.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
# Mirrors loomflow's DEFAULT_CHARS_PER_TOKEN — the conservative
|
|
19
|
+
# cross-content estimate (English prose ≈4 chars/token, code ≈3;
|
|
20
|
+
# under-estimating context left would overflow, so estimate high).
|
|
21
|
+
_CHARS_PER_TOKEN = 4
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def estimate_tokens(text: str) -> int:
|
|
25
|
+
"""Char-based token estimate, floored at 0 for empty text.
|
|
26
|
+
|
|
27
|
+
Used for the working-block sizes in ``/context`` — these blocks
|
|
28
|
+
never pass through a provider tokenizer on their own, so an
|
|
29
|
+
estimate is the honest label (and it's marked ``~`` in the UI).
|
|
30
|
+
"""
|
|
31
|
+
if not text:
|
|
32
|
+
return 0
|
|
33
|
+
return max(1, len(text) // _CHARS_PER_TOKEN)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def context_percent(used_tokens: int, window: int) -> int:
|
|
37
|
+
"""Whole-percent context occupancy, clamped to [0, 100]."""
|
|
38
|
+
if window <= 0:
|
|
39
|
+
return 0
|
|
40
|
+
return max(0, min(100, round(used_tokens * 100 / window)))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def context_report(
|
|
44
|
+
*,
|
|
45
|
+
model: str,
|
|
46
|
+
window: int,
|
|
47
|
+
used_tokens: int,
|
|
48
|
+
threshold: int,
|
|
49
|
+
blocks: list[tuple[str, str]],
|
|
50
|
+
n_exchanges: int,
|
|
51
|
+
) -> str:
|
|
52
|
+
"""Render the ``/context`` report as plain text.
|
|
53
|
+
|
|
54
|
+
``blocks`` is ``[(name, content), …]`` — every working block
|
|
55
|
+
loomflow will inject into the next system prompt. ``used_tokens``
|
|
56
|
+
is the context high-water mark of the last turn's input (the same
|
|
57
|
+
figure the auto-compactor keys on), so the user sees the number
|
|
58
|
+
the harness itself acts on — not a synthetic one.
|
|
59
|
+
"""
|
|
60
|
+
pct = context_percent(used_tokens, window)
|
|
61
|
+
bar_w = 24
|
|
62
|
+
filled = round(bar_w * pct / 100)
|
|
63
|
+
bar = "█" * filled + "░" * (bar_w - filled)
|
|
64
|
+
|
|
65
|
+
lines = [
|
|
66
|
+
f"context — {model}",
|
|
67
|
+
f" window {window:>10,} tokens",
|
|
68
|
+
f" used {used_tokens:>10,} tokens "
|
|
69
|
+
f"[{bar}] {pct}%",
|
|
70
|
+
]
|
|
71
|
+
if threshold > 0:
|
|
72
|
+
lines.append(
|
|
73
|
+
f" compaction {threshold:>10,} tokens "
|
|
74
|
+
f"(auto-compacts at this point)"
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
lines.append(" compaction off")
|
|
78
|
+
lines.append(
|
|
79
|
+
f" history {n_exchanges:>10,} exchange"
|
|
80
|
+
f"{'s' if n_exchanges != 1 else ''} this thread"
|
|
81
|
+
)
|
|
82
|
+
lines.append("")
|
|
83
|
+
if blocks:
|
|
84
|
+
lines.append(
|
|
85
|
+
"injected working blocks (folded into every system prompt):"
|
|
86
|
+
)
|
|
87
|
+
total = 0
|
|
88
|
+
for name, content in sorted(blocks, key=lambda b: b[0]):
|
|
89
|
+
t = estimate_tokens(content)
|
|
90
|
+
total += t
|
|
91
|
+
lines.append(f" {name:<18} ~{t:>7,} tokens")
|
|
92
|
+
lines.append(f" {'total':<18} ~{total:>7,} tokens")
|
|
93
|
+
else:
|
|
94
|
+
lines.append("injected working blocks: none")
|
|
95
|
+
lines.append("")
|
|
96
|
+
lines.append(
|
|
97
|
+
"nothing else is injected — what you see here plus the "
|
|
98
|
+
"conversation is the model's entire context. /prompt shows "
|
|
99
|
+
"the full text."
|
|
100
|
+
)
|
|
101
|
+
return "\n".join(lines)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def prompt_dump(
|
|
105
|
+
*,
|
|
106
|
+
instructions: str | None,
|
|
107
|
+
blocks: list[tuple[str, str]],
|
|
108
|
+
) -> str:
|
|
109
|
+
"""Render the ``/prompt`` dump: the coordinator's static
|
|
110
|
+
instructions plus every working block body, clearly delimited.
|
|
111
|
+
No paraphrasing, no elision — the point is that this IS what the
|
|
112
|
+
model receives."""
|
|
113
|
+
parts: list[str] = []
|
|
114
|
+
if instructions:
|
|
115
|
+
parts.append("═══ system instructions (static) ═══")
|
|
116
|
+
parts.append(instructions.rstrip())
|
|
117
|
+
else:
|
|
118
|
+
parts.append(
|
|
119
|
+
"═══ system instructions (static) ═══\n"
|
|
120
|
+
"(not exposed by this agent build — working blocks below "
|
|
121
|
+
"are still exact)"
|
|
122
|
+
)
|
|
123
|
+
for name, content in sorted(blocks, key=lambda b: b[0]):
|
|
124
|
+
parts.append(
|
|
125
|
+
f"═══ working block: {name} "
|
|
126
|
+
f"(~{estimate_tokens(content):,} tokens) ═══"
|
|
127
|
+
)
|
|
128
|
+
parts.append(content.rstrip() or "(empty)")
|
|
129
|
+
return "\n\n".join(parts)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Post-edit diagnostics — surface syntax/lint errors to the model
|
|
2
|
+
immediately after every file mutation.
|
|
3
|
+
|
|
4
|
+
The MVP slice of "LSP-aware code intelligence" (named table-stakes for
|
|
5
|
+
2026 harnesses; Claude Code surfaces type errors after each edit, and
|
|
6
|
+
cline's LACK of it is a documented token-cost complaint): after each
|
|
7
|
+
``edit`` / ``multi_edit`` / ``write``, run a CHEAP per-file checker
|
|
8
|
+
and append any findings to the tool result, so the model fixes the
|
|
9
|
+
break in the same breath instead of discovering it three tool calls
|
|
10
|
+
later via a failing test.
|
|
11
|
+
|
|
12
|
+
Deliberately bounded — this is not an LSP client:
|
|
13
|
+
|
|
14
|
+
* per-FILE checks only (never whole-project ``tsc``/``cargo check``,
|
|
15
|
+
which cost seconds-to-minutes per edit);
|
|
16
|
+
* only checkers that are actually PRESENT (never cause the #1 agent
|
|
17
|
+
failure, "executable not found");
|
|
18
|
+
* hard timeout; on timeout/absence/success the hook is silent.
|
|
19
|
+
|
|
20
|
+
Rides the same native post-tool-hook mechanism as ``loop_guard`` —
|
|
21
|
+
the hint lands appended to the live ``ToolResult`` before the loop
|
|
22
|
+
serialises it, so it reaches the model inline.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import shutil
|
|
28
|
+
import sys
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
import anyio
|
|
33
|
+
|
|
34
|
+
_EDIT_TOOLS = frozenset({"edit", "multi_edit", "write"})
|
|
35
|
+
|
|
36
|
+
_TIMEOUT_S = 4.0
|
|
37
|
+
_MAX_LINES = 12
|
|
38
|
+
_MAX_CHARS = 1200
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def detect_checker(path: Path) -> list[str] | None:
|
|
42
|
+
"""The per-file checker argv for ``path``, or None.
|
|
43
|
+
|
|
44
|
+
Preference order per language: the project's real linter when
|
|
45
|
+
installed, else a stdlib/toolchain syntax check, else nothing.
|
|
46
|
+
Every command here is single-file and sub-second."""
|
|
47
|
+
suffix = path.suffix.lower()
|
|
48
|
+
if suffix == ".py":
|
|
49
|
+
ruff = shutil.which("ruff")
|
|
50
|
+
if ruff:
|
|
51
|
+
# --no-cache: the agent edits fast; a stale cache dir in
|
|
52
|
+
# odd cwds causes confusing misses. Still ~50ms/file.
|
|
53
|
+
return [ruff, "check", "--no-cache", str(path)]
|
|
54
|
+
return [sys.executable, "-m", "py_compile", str(path)]
|
|
55
|
+
if suffix in (".js", ".mjs", ".cjs"):
|
|
56
|
+
node = shutil.which("node")
|
|
57
|
+
if node:
|
|
58
|
+
return [node, "--check", str(path)]
|
|
59
|
+
return None
|
|
60
|
+
if suffix == ".go":
|
|
61
|
+
gofmt = shutil.which("gofmt")
|
|
62
|
+
if gofmt:
|
|
63
|
+
# -e: report all (syntax) errors; -l alone is silent.
|
|
64
|
+
return [gofmt, "-e", "-l", str(path)]
|
|
65
|
+
return None
|
|
66
|
+
if suffix in (".sh", ".bash"):
|
|
67
|
+
bash = shutil.which("bash")
|
|
68
|
+
if bash:
|
|
69
|
+
return [bash, "-n", str(path)]
|
|
70
|
+
return None
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _trim(text: str) -> str:
|
|
75
|
+
lines = [ln for ln in text.strip().splitlines() if ln.strip()]
|
|
76
|
+
if len(lines) > _MAX_LINES:
|
|
77
|
+
lines = lines[:_MAX_LINES] + [
|
|
78
|
+
f"… (+{len(lines) - _MAX_LINES} more lines)"
|
|
79
|
+
]
|
|
80
|
+
out = "\n".join(lines)
|
|
81
|
+
return out[:_MAX_CHARS]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
async def run_diagnostics(path: Path) -> str | None:
|
|
85
|
+
"""Run the file's checker; return trimmed findings on FAILURE,
|
|
86
|
+
None on success / no checker / timeout / any error. Silence is
|
|
87
|
+
the contract — diagnostics may only ever add signal."""
|
|
88
|
+
argv = detect_checker(path)
|
|
89
|
+
# noqa rationale: one stat on a local path — cheaper than hopping
|
|
90
|
+
# to a worker thread for it.
|
|
91
|
+
if argv is None or not path.is_file(): # noqa: ASYNC240
|
|
92
|
+
return None
|
|
93
|
+
result: Any = None
|
|
94
|
+
with anyio.move_on_after(_TIMEOUT_S):
|
|
95
|
+
try:
|
|
96
|
+
result = await anyio.run_process(argv, check=False)
|
|
97
|
+
except Exception: # noqa: BLE001 — silent by contract
|
|
98
|
+
return None
|
|
99
|
+
if result is None: # timed out
|
|
100
|
+
return None
|
|
101
|
+
out = result.stdout.decode("utf-8", "replace")
|
|
102
|
+
err = result.stderr.decode("utf-8", "replace")
|
|
103
|
+
# gofmt -e -l prints the filename on stdout for UNFORMATTED files
|
|
104
|
+
# even with rc=0 — only treat rc!=0 (real syntax/lint errors) as
|
|
105
|
+
# a finding, matching "silence unless broken".
|
|
106
|
+
if result.returncode == 0:
|
|
107
|
+
return None
|
|
108
|
+
findings = _trim(f"{out}\n{err}")
|
|
109
|
+
return findings or None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def make_post_tool_hook(root: Path) -> Any:
|
|
113
|
+
"""Build the post-tool hook bound to ``root`` — edit tools take
|
|
114
|
+
project-relative paths, so the hook must resolve them against the
|
|
115
|
+
project root (mirroring ``paths.resolve_path``), not the process
|
|
116
|
+
cwd."""
|
|
117
|
+
|
|
118
|
+
async def post_tool(call: Any, result: Any) -> None:
|
|
119
|
+
try:
|
|
120
|
+
tool = str(getattr(call, "tool", ""))
|
|
121
|
+
if tool not in _EDIT_TOOLS:
|
|
122
|
+
return
|
|
123
|
+
if not getattr(result, "ok", False):
|
|
124
|
+
return # the edit itself failed; don't pile on
|
|
125
|
+
raw = str(
|
|
126
|
+
dict(getattr(call, "args", {}) or {}).get("path", "")
|
|
127
|
+
).strip()
|
|
128
|
+
if not raw:
|
|
129
|
+
return
|
|
130
|
+
# noqa rationale: pure string math, no disk I/O.
|
|
131
|
+
p = Path(raw).expanduser() # noqa: ASYNC240
|
|
132
|
+
if not p.is_absolute():
|
|
133
|
+
p = root / p
|
|
134
|
+
findings = await run_diagnostics(p)
|
|
135
|
+
if findings and isinstance(result.output, str):
|
|
136
|
+
result.output = (
|
|
137
|
+
f"{result.output}\n\n[diagnostics] the edited "
|
|
138
|
+
"file now has problems — fix them before moving "
|
|
139
|
+
f"on:\n{findings}"
|
|
140
|
+
)
|
|
141
|
+
except Exception: # noqa: BLE001 — never break a tool result
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
return post_tool
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def attach(agent: Any, root: Path) -> None:
|
|
148
|
+
"""Register on ``agent``'s hook registry (same shape as
|
|
149
|
+
loop_guard.attach). Best-effort."""
|
|
150
|
+
try:
|
|
151
|
+
agent._hooks.register_post_tool( # noqa: SLF001
|
|
152
|
+
make_post_tool_hook(root)
|
|
153
|
+
)
|
|
154
|
+
except Exception: # noqa: BLE001
|
|
155
|
+
return
|