loom-code 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. loom_code/__init__.py +22 -0
  2. loom_code/_post_commit.py +119 -0
  3. loom_code/agent.py +544 -0
  4. loom_code/approval.py +616 -0
  5. loom_code/browse/__init__.py +291 -0
  6. loom_code/browse/act.py +467 -0
  7. loom_code/browse/observe.py +249 -0
  8. loom_code/browse/session.py +96 -0
  9. loom_code/browse/verify.py +194 -0
  10. loom_code/checkpoint.py +283 -0
  11. loom_code/cli.py +495 -0
  12. loom_code/code_index.py +703 -0
  13. loom_code/compact.py +143 -0
  14. loom_code/consent.py +47 -0
  15. loom_code/credentials.py +527 -0
  16. loom_code/edit_tool.py +635 -0
  17. loom_code/extensions.py +522 -0
  18. loom_code/file_history.py +322 -0
  19. loom_code/file_tools.py +93 -0
  20. loom_code/git_hook.py +200 -0
  21. loom_code/grep_tool.py +430 -0
  22. loom_code/hooks.py +297 -0
  23. loom_code/loominit/__init__.py +23 -0
  24. loom_code/loominit/_ast_walk.py +429 -0
  25. loom_code/loominit/_files.py +284 -0
  26. loom_code/loominit/_graph.py +141 -0
  27. loom_code/loominit/_resolve.py +392 -0
  28. loom_code/loominit/_tests_map.py +108 -0
  29. loom_code/loominit/extractor.py +332 -0
  30. loom_code/loominit/repomap.py +225 -0
  31. loom_code/loominit/schema.py +242 -0
  32. loom_code/lsp_tools.py +396 -0
  33. loom_code/mcp_host.py +79 -0
  34. loom_code/operator.py +449 -0
  35. loom_code/paste.py +97 -0
  36. loom_code/paths.py +52 -0
  37. loom_code/permissions.py +177 -0
  38. loom_code/project.py +104 -0
  39. loom_code/prompts.py +451 -0
  40. loom_code/render.py +783 -0
  41. loom_code/repl.py +4080 -0
  42. loom_code/rules.py +267 -0
  43. loom_code/sandboxed_bash.py +176 -0
  44. loom_code/scribe.py +88 -0
  45. loom_code/skills/__init__.py +16 -0
  46. loom_code/skills/graphify/SKILL.md +97 -0
  47. loom_code/skills/graphify/tools.py +570 -0
  48. loom_code/trust.py +216 -0
  49. loom_code/turn.py +169 -0
  50. loom_code/web_fetch.py +370 -0
  51. loom_code/workers.py +758 -0
  52. loom_code/worktree.py +134 -0
  53. loom_code-0.1.1.dist-info/METADATA +224 -0
  54. loom_code-0.1.1.dist-info/RECORD +58 -0
  55. loom_code-0.1.1.dist-info/WHEEL +5 -0
  56. loom_code-0.1.1.dist-info/entry_points.txt +2 -0
  57. loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
  58. loom_code-0.1.1.dist-info/top_level.txt +1 -0
loom_code/web_fetch.py ADDED
@@ -0,0 +1,370 @@
1
+ """HTTPS fetch tool for loom-code workers.
2
+
3
+ Closes a gap in the read-only specialists' tool surface:
4
+ ``explorer`` and ``auditor`` can read the local project root
5
+ (``read``/``grep``/``find``/``ls`` are all path-scoped), but
6
+ cannot reach a URL or a GitHub repo. Without a fetch primitive
7
+ they silently substitute local files for remote sources when a
8
+ task names a URL — a hallucinated-authority failure mode that
9
+ ``bash curl`` from ``coder`` only solves if the coordinator
10
+ routes the work there.
11
+
12
+ :func:`web_fetch_tool` returns a :class:`loomflow.Tool` that
13
+ takes one ``url`` arg and returns the body as text. It is
14
+ read-only by construction — it cannot write to disk, mutate
15
+ state, or run arbitrary shell — so it preserves the parallel-
16
+ delegation safety claim (only ``coder`` writes) even when wired
17
+ into the read-only workers.
18
+
19
+ Lives in loom-code (not loomflow) intentionally: the framework's
20
+ ``web_tool`` covers SEARCH, this covers FETCH. The two would
21
+ naturally pair under a single ``loomflow.tools.web`` namespace,
22
+ but until that lands upstream loom-code carries it locally.
23
+
24
+ Implementation notes:
25
+
26
+ - Uses ``httpx`` which ships via ``loomflow[web]`` (pyproject
27
+ declares the floor) — no new top-level dependency.
28
+ - ``http://`` is silently upgraded to ``https://``; other
29
+ schemes are rejected with a clear ``ERROR: ...`` string so
30
+ the model sees what went wrong instead of stack-tracing.
31
+ - GitHub blob URLs rewrite to ``raw.githubusercontent.com``
32
+ before fetching. Models naturally type the human URL and would
33
+ otherwise get a page of HTML — rewriting saves a turn.
34
+ - Response cap (5 MB default) is structural, not a soft warning:
35
+ an accidental tarball or binary blob doesn't get to blow
36
+ conversation context.
37
+ - Errors return as strings (``ERROR: ...``), not raises — same
38
+ convention as ``loomflow.tools.web.web_tool``. The agent sees
39
+ the error and decides what to do (retry, change URL, escalate).
40
+ - SSRF guard: the target host is resolved and rejected if it lands
41
+ in loopback / link-local / private / reserved IP space (incl. the
42
+ 169.254.169.254 cloud-metadata endpoint). Redirects are followed
43
+ MANUALLY so the guard re-runs on every hop — a public URL can't
44
+ 302 its way to an internal address.
45
+ """
46
+
47
+ from __future__ import annotations
48
+
49
+ import ipaddress
50
+ import re
51
+ import socket
52
+ from typing import Any
53
+ from urllib.parse import urlsplit
54
+
55
+ from loomflow import Tool
56
+
57
+ # Cap the TEXT returned to the model — distinct from the 5MB byte cap on
58
+ # the raw HTTP response. A large page (or a giant HTML *error* page) must
59
+ # never blow the context window: a single uncapped fetch of a ~1MB 404
60
+ # page once hit ~261k tokens and crashed a 200k-window model. 100KB
61
+ # matches Claude Code's WebFetch text cap. Error (non-2xx) bodies are
62
+ # rarely useful content, so they get a much smaller snippet.
63
+ _MAX_RESULT_CHARS = 100_000
64
+ _MAX_ERROR_CHARS = 4_000
65
+
66
+ _TOOL_DESCRIPTION = (
67
+ "Fetch the body of an HTTPS URL and return it as text. Use "
68
+ "for READMEs, raw source files on GitHub, documentation "
69
+ "pages, JSON/YAML configs. For a full repository prefer "
70
+ "`git clone` via bash. GitHub blob URLs are auto-rewritten "
71
+ "to raw URLs so you can paste the human URL and get file "
72
+ "content. Responses over 5MB are rejected; larger pages are "
73
+ "truncated to ~100KB of text (fetch a more specific URL for the "
74
+ "part you need). Returns the body as a string prefixed with "
75
+ "status + final URL; errors come back as `ERROR: ...` strings, "
76
+ "not exceptions."
77
+ )
78
+
79
+ _URL_SCHEMA: dict[str, Any] = {
80
+ "type": "object",
81
+ "properties": {
82
+ "url": {
83
+ "type": "string",
84
+ "description": (
85
+ "Fully-qualified URL to fetch. http:// is "
86
+ "upgraded to https://; other schemes are rejected."
87
+ ),
88
+ }
89
+ },
90
+ "required": ["url"],
91
+ }
92
+
93
+ # github.com/<owner>/<repo>/blob/<ref>/<path>
94
+ # → raw.githubusercontent.com/<owner>/<repo>/<ref>/<path>
95
+ # Match the host explicitly so we don't rewrite gitlab/bitbucket/etc.
96
+ _GITHUB_BLOB_RE = re.compile(
97
+ r"^https?://(?:www\.)?github\.com/"
98
+ r"(?P<owner>[^/]+)/(?P<repo>[^/]+)/blob/(?P<rest>.+)$"
99
+ )
100
+
101
+ # github.com/<owner>/<repo>/tree/<ref>(/<path>)? is a directory page.
102
+ # Fetching it returns ~700kB of React-rendered HTML — pure token
103
+ # waste — instead of the file listing the model usually wants.
104
+ # We refuse and direct the model at the GitHub contents API
105
+ # (returns JSON: file names + download URLs in one call). The
106
+ # `rest` group can be empty (tree at root), so use `.*` not `.+`.
107
+ _GITHUB_TREE_RE = re.compile(
108
+ r"^https?://(?:www\.)?github\.com/"
109
+ r"(?P<owner>[^/]+)/(?P<repo>[^/]+)/tree/(?P<rest>.*)$"
110
+ )
111
+
112
+ # github.com/<owner>/<repo>(/)? — the REPO ROOT page. Same ~700kB of
113
+ # React HTML as a /tree/ page (the README rendered + the whole file
114
+ # tree + sidebars), and the model usually wants the README or the
115
+ # file listing, not the chrome. Refuse + direct it at the cheap
116
+ # routes. Anchored to end (with optional query/fragment) so it can't
117
+ # swallow /blob/, /tree/, /pull/, /issues/, etc.
118
+ _GITHUB_REPO_ROOT_RE = re.compile(
119
+ r"^https?://(?:www\.)?github\.com/"
120
+ r"(?P<owner>[^/?#]+)/(?P<repo>[^/?#]+)/?(?:[?#].*)?$"
121
+ )
122
+
123
+
124
+ def _normalize_url(url: str) -> tuple[str | None, str | None]:
125
+ """Return ``(normalized_url, error)``. Exactly one is non-None.
126
+
127
+ Pure function — extracted from the tool body so it can be
128
+ unit-tested without monkeypatching httpx.
129
+ """
130
+ url = url.strip()
131
+ if not url:
132
+ return None, "ERROR: empty URL"
133
+ if url.startswith("http://"):
134
+ url = "https://" + url[len("http://"):]
135
+ if not url.startswith("https://"):
136
+ return None, (
137
+ f"ERROR: only http(s) URLs are supported, got "
138
+ f"{url[:60]!r}. For local files use `read`; for "
139
+ f"shell commands use `bash`."
140
+ )
141
+ # /tree/ before /blob/: a tree URL would slip past /blob/
142
+ # matching anyway, but we want the directive error, not a
143
+ # silent fallthrough fetch of HTML.
144
+ m_tree = _GITHUB_TREE_RE.match(url)
145
+ if m_tree:
146
+ owner = m_tree["owner"]
147
+ repo = m_tree["repo"]
148
+ # `rest` may be "" (tree at root), "<ref>", or "<ref>/<path>".
149
+ # Split into ref + path so the suggested API URL is correct.
150
+ parts = m_tree["rest"].split("/", 1)
151
+ ref = parts[0] if parts and parts[0] else "main"
152
+ path = parts[1] if len(parts) > 1 else ""
153
+ api_url = (
154
+ f"https://api.github.com/repos/{owner}/{repo}/contents/"
155
+ f"{path}?ref={ref}"
156
+ )
157
+ return None, (
158
+ f"ERROR: {url} is a GitHub DIRECTORY page (React HTML, "
159
+ f"~700kB). Fetching it wastes tokens; use one of:\n"
160
+ f" - LIST contents (JSON): web_fetch {api_url}\n"
161
+ f" - LIST via gh CLI: "
162
+ f"`bash gh api repos/{owner}/{repo}/contents/{path}?ref={ref}`\n"
163
+ f" - FETCH a file: web_fetch the /blob/ URL "
164
+ f"(github.com/{owner}/{repo}/blob/{ref}/<path>)\n"
165
+ f" - FETCH README: "
166
+ f"web_fetch https://github.com/{owner}/{repo}/blob/{ref}/README.md"
167
+ )
168
+ m_root = _GITHUB_REPO_ROOT_RE.match(url)
169
+ if m_root:
170
+ owner = m_root["owner"]
171
+ repo = m_root["repo"]
172
+ return None, (
173
+ f"ERROR: {url} is a GitHub REPO ROOT page (React HTML, "
174
+ f"~700kB). Fetching it wastes tokens; to explore the repo "
175
+ f"use one of:\n"
176
+ f" - READ the README: "
177
+ f"web_fetch https://github.com/{owner}/{repo}/blob/main/README.md\n"
178
+ f" - LIST the root: "
179
+ f"web_fetch https://api.github.com/repos/{owner}/{repo}/contents/\n"
180
+ f" - LIST a subdir: web_fetch "
181
+ f"https://api.github.com/repos/{owner}/{repo}/contents/<dir>\n"
182
+ f" - CLONE + explore: `bash git clone "
183
+ f"https://github.com/{owner}/{repo} \"$(mktemp -d)/{repo}\"` "
184
+ f"then inspect with `bash ls`/`bash cat` (your read/grep/ls "
185
+ f"are scoped to the LOCAL project and can't see the clone)"
186
+ )
187
+ m = _GITHUB_BLOB_RE.match(url)
188
+ if m:
189
+ url = (
190
+ f"https://raw.githubusercontent.com/"
191
+ f"{m['owner']}/{m['repo']}/{m['rest']}"
192
+ )
193
+ return url, None
194
+
195
+
196
+ def _ip_is_blocked(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
197
+ """True if ``ip`` is a destination web_fetch must NOT reach.
198
+
199
+ Blocks the whole non-public space — loopback (127/8, ::1), link-local
200
+ (169.254/16 incl. the 169.254.169.254 cloud-metadata endpoint, fe80::/10),
201
+ RFC-1918 private (10/8, 172.16/12, 192.168/16), unique-local IPv6
202
+ (fc00::/7), unspecified (0.0.0.0, ::), and other reserved ranges. The
203
+ ``is_*`` properties on ``ipaddress`` cover each class; we OR them so a
204
+ new reserved range can't slip through a single missed check."""
205
+ # IPv4-mapped IPv6 (::ffff:127.0.0.1) would otherwise dodge the v4
206
+ # checks — unwrap to the embedded v4 address first.
207
+ if isinstance(ip, ipaddress.IPv6Address) and ip.ipv4_mapped is not None:
208
+ ip = ip.ipv4_mapped
209
+ return (
210
+ ip.is_private
211
+ or ip.is_loopback
212
+ or ip.is_link_local
213
+ or ip.is_unspecified
214
+ or ip.is_reserved
215
+ or ip.is_multicast
216
+ )
217
+
218
+
219
+ def _host_is_blocked(host: str) -> str | None:
220
+ """Resolve ``host`` and return a reason string if ANY resolved IP is
221
+ in a blocked range; ``None`` if every address is a public destination.
222
+
223
+ SSRF defense: an attacker-influenced URL (or a redirect hop) could
224
+ point at cloud-metadata (169.254.169.254), localhost services, or the
225
+ LAN. We resolve the name and reject if a literal IP or DNS result lands
226
+ in non-public space. Resolving (not just string-matching) is required —
227
+ a hostname can resolve to 127.0.0.1, and a single A record in a blocked
228
+ range is enough to refuse (no partial trust)."""
229
+ if not host:
230
+ return "no host in URL"
231
+ # Literal IP? Validate directly (covers the obvious 10.x / 127.x cases
232
+ # and avoids a needless DNS lookup).
233
+ try:
234
+ return (
235
+ f"host {host} resolves to a private/loopback/link-local "
236
+ f"address — refused (SSRF guard)"
237
+ if _ip_is_blocked(ipaddress.ip_address(host))
238
+ else None
239
+ )
240
+ except ValueError:
241
+ pass # not a literal IP — fall through to DNS resolution
242
+ try:
243
+ infos = socket.getaddrinfo(host, None)
244
+ except socket.gaierror as exc:
245
+ return f"could not resolve host {host}: {exc}"
246
+ for info in infos:
247
+ addr = info[4][0]
248
+ try:
249
+ ip = ipaddress.ip_address(addr)
250
+ except ValueError:
251
+ continue
252
+ if _ip_is_blocked(ip):
253
+ return (
254
+ f"host {host} resolves to {addr}, a private/loopback/"
255
+ f"link-local address — refused (SSRF guard)"
256
+ )
257
+ return None
258
+
259
+
260
+ def web_fetch_tool(
261
+ *,
262
+ name: str = "web_fetch",
263
+ timeout: float = 15.0,
264
+ max_bytes: int = 5_000_000,
265
+ ) -> Tool:
266
+ """Build a :class:`Tool` that fetches an HTTPS URL's body.
267
+
268
+ Args:
269
+ name: Tool name the model sees (default ``web_fetch``).
270
+ Overridable mostly for tests / co-existence with
271
+ other fetch tools.
272
+ timeout: Per-request timeout in seconds (default 15).
273
+ Applies to connect + read combined.
274
+ max_bytes: Reject responses larger than this — keeps an
275
+ accidental tarball or binary blob from blowing
276
+ conversation context. Default 5 MB.
277
+
278
+ Returns:
279
+ A :class:`Tool` named ``web_fetch`` with one ``url: str``
280
+ parameter, returning the response body prefixed by status
281
+ code and final URL (after redirects + GitHub rewriting).
282
+
283
+ Example::
284
+
285
+ from loom_code.web_fetch import web_fetch_tool
286
+ from loomflow import Agent
287
+
288
+ agent = Agent("...", tools=[web_fetch_tool()])
289
+ """
290
+
291
+ async def _fetch(url: str) -> str:
292
+ normalized, err = _normalize_url(url)
293
+ if err is not None:
294
+ return err
295
+ # Lazy import — matches the loomflow tool convention so
296
+ # `import loom_code.web_fetch` doesn't pay the httpx cost.
297
+ import httpx
298
+
299
+ # Follow redirects MANUALLY so we re-run the SSRF guard on every
300
+ # hop. With httpx's follow_redirects=True a public URL could 302
301
+ # to http://169.254.169.254/ and we'd never see the final host —
302
+ # the guard has to gate each Location, not just the first URL.
303
+ assert normalized is not None # err-None branch guarantees this
304
+ current = normalized
305
+ try:
306
+ async with httpx.AsyncClient(
307
+ follow_redirects=False,
308
+ timeout=timeout,
309
+ ) as client:
310
+ for _ in range(10): # redirect cap — matches httpx default
311
+ host = urlsplit(current).hostname or ""
312
+ blocked = _host_is_blocked(host)
313
+ if blocked is not None:
314
+ return f"ERROR: {blocked}"
315
+ r = await client.get(current)
316
+ if r.is_redirect and r.headers.get("location"):
317
+ # Resolve relative Location against the current URL.
318
+ current = str(r.url.join(r.headers["location"]))
319
+ continue
320
+ break
321
+ else:
322
+ return "ERROR: too many redirects (>10)"
323
+ except httpx.HTTPError as exc:
324
+ return f"ERROR: fetch failed: {exc}"
325
+
326
+ # Reject oversized payloads after the fact rather than via
327
+ # Content-Length — many CDNs don't set it correctly and
328
+ # we'd rather download-and-reject than incorrectly block a
329
+ # small response with a bogus header.
330
+ if len(r.content) > max_bytes:
331
+ return (
332
+ f"ERROR: response exceeds {max_bytes} bytes "
333
+ f"({len(r.content)} actual). For large repos use "
334
+ f"`git clone` via bash; for partial reads pass a "
335
+ f"more specific URL."
336
+ )
337
+
338
+ # Cap the body BEFORE it enters the conversation (where it gets
339
+ # re-sent every turn). Successful pages truncate at 100KB; error
340
+ # pages (non-2xx) get a small snippet since the body is an error
341
+ # page, not content the model needs.
342
+ body = r.text
343
+ ok = 200 <= r.status_code < 300
344
+ cap = _MAX_RESULT_CHARS if ok else _MAX_ERROR_CHARS
345
+ if len(body) > cap:
346
+ omitted = len(body) - cap
347
+ reason = "page too large" if ok else "error page"
348
+ body = (
349
+ body[:cap]
350
+ + f"\n\n… [{reason} — truncated {omitted} of {len(r.text)} "
351
+ "chars. Fetch a more specific URL, or `git clone` via bash "
352
+ "for a whole repo.]"
353
+ )
354
+
355
+ # Render with a small header so the model knows what it
356
+ # got — the final URL (after redirects + GitHub rewriting)
357
+ # and status are both load-bearing for follow-ups.
358
+ return (
359
+ f"# {r.url}\n"
360
+ f"status: {r.status_code}\n"
361
+ f"\n"
362
+ f"{body}"
363
+ )
364
+
365
+ return Tool(
366
+ name=name,
367
+ description=_TOOL_DESCRIPTION,
368
+ fn=_fetch,
369
+ input_schema=_URL_SCHEMA,
370
+ )