PyPI - loom-code - Versions diffs - 0.1.1__py3-none-any.whl - Mend

loom-code 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

loom_code/__init__.py +22 -0
loom_code/_post_commit.py +119 -0
loom_code/agent.py +544 -0
loom_code/approval.py +616 -0
loom_code/browse/__init__.py +291 -0
loom_code/browse/act.py +467 -0
loom_code/browse/observe.py +249 -0
loom_code/browse/session.py +96 -0
loom_code/browse/verify.py +194 -0
loom_code/checkpoint.py +283 -0
loom_code/cli.py +495 -0
loom_code/code_index.py +703 -0
loom_code/compact.py +143 -0
loom_code/consent.py +47 -0
loom_code/credentials.py +527 -0
loom_code/edit_tool.py +635 -0
loom_code/extensions.py +522 -0
loom_code/file_history.py +322 -0
loom_code/file_tools.py +93 -0
loom_code/git_hook.py +200 -0
loom_code/grep_tool.py +430 -0
loom_code/hooks.py +297 -0
loom_code/loominit/__init__.py +23 -0
loom_code/loominit/_ast_walk.py +429 -0
loom_code/loominit/_files.py +284 -0
loom_code/loominit/_graph.py +141 -0
loom_code/loominit/_resolve.py +392 -0
loom_code/loominit/_tests_map.py +108 -0
loom_code/loominit/extractor.py +332 -0
loom_code/loominit/repomap.py +225 -0
loom_code/loominit/schema.py +242 -0
loom_code/lsp_tools.py +396 -0
loom_code/mcp_host.py +79 -0
loom_code/operator.py +449 -0
loom_code/paste.py +97 -0
loom_code/paths.py +52 -0
loom_code/permissions.py +177 -0
loom_code/project.py +104 -0
loom_code/prompts.py +451 -0
loom_code/render.py +783 -0
loom_code/repl.py +4080 -0
loom_code/rules.py +267 -0
loom_code/sandboxed_bash.py +176 -0
loom_code/scribe.py +88 -0
loom_code/skills/__init__.py +16 -0
loom_code/skills/graphify/SKILL.md +97 -0
loom_code/skills/graphify/tools.py +570 -0
loom_code/trust.py +216 -0
loom_code/turn.py +169 -0
loom_code/web_fetch.py +370 -0
loom_code/workers.py +758 -0
loom_code/worktree.py +134 -0
loom_code-0.1.1.dist-info/METADATA +224 -0
loom_code-0.1.1.dist-info/RECORD +58 -0
loom_code-0.1.1.dist-info/WHEEL +5 -0
loom_code-0.1.1.dist-info/entry_points.txt +2 -0
loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
loom_code-0.1.1.dist-info/top_level.txt +1 -0

loom_code/web_fetch.py ADDED Viewed

@@ -0,0 +1,370 @@
+"""HTTPS fetch tool for loom-code workers.
+Closes a gap in the read-only specialists' tool surface:
+``explorer`` and ``auditor`` can read the local project root
+(``read``/``grep``/``find``/``ls`` are all path-scoped), but
+cannot reach a URL or a GitHub repo. Without a fetch primitive
+they silently substitute local files for remote sources when a
+task names a URL — a hallucinated-authority failure mode that
+``bash curl`` from ``coder`` only solves if the coordinator
+routes the work there.
+:func:`web_fetch_tool` returns a :class:`loomflow.Tool` that
+takes one ``url`` arg and returns the body as text. It is
+read-only by construction — it cannot write to disk, mutate
+state, or run arbitrary shell — so it preserves the parallel-
+delegation safety claim (only ``coder`` writes) even when wired
+into the read-only workers.
+Lives in loom-code (not loomflow) intentionally: the framework's
+``web_tool`` covers SEARCH, this covers FETCH. The two would
+naturally pair under a single ``loomflow.tools.web`` namespace,
+but until that lands upstream loom-code carries it locally.
+Implementation notes:
+- Uses ``httpx`` which ships via ``loomflow[web]`` (pyproject
+  declares the floor) — no new top-level dependency.
+- ``http://`` is silently upgraded to ``https://``; other
+  schemes are rejected with a clear ``ERROR: ...`` string so
+  the model sees what went wrong instead of stack-tracing.
+- GitHub blob URLs rewrite to ``raw.githubusercontent.com``
+  before fetching. Models naturally type the human URL and would
+  otherwise get a page of HTML — rewriting saves a turn.
+- Response cap (5 MB default) is structural, not a soft warning:
+  an accidental tarball or binary blob doesn't get to blow
+  conversation context.
+- Errors return as strings (``ERROR: ...``), not raises — same
+  convention as ``loomflow.tools.web.web_tool``. The agent sees
+  the error and decides what to do (retry, change URL, escalate).
+- SSRF guard: the target host is resolved and rejected if it lands
+  in loopback / link-local / private / reserved IP space (incl. the
+  169.254.169.254 cloud-metadata endpoint). Redirects are followed
+  MANUALLY so the guard re-runs on every hop — a public URL can't
+  302 its way to an internal address.
+"""
+from __future__ import annotations
+import ipaddress
+import re
+import socket
+from typing import Any
+from urllib.parse import urlsplit
+from loomflow import Tool
+# Cap the TEXT returned to the model — distinct from the 5MB byte cap on
+# the raw HTTP response. A large page (or a giant HTML *error* page) must
+# never blow the context window: a single uncapped fetch of a ~1MB 404
+# page once hit ~261k tokens and crashed a 200k-window model. 100KB
+# matches Claude Code's WebFetch text cap. Error (non-2xx) bodies are
+# rarely useful content, so they get a much smaller snippet.
+_MAX_RESULT_CHARS = 100_000
+_MAX_ERROR_CHARS = 4_000
+_TOOL_DESCRIPTION = (
+    "Fetch the body of an HTTPS URL and return it as text. Use "
+    "for READMEs, raw source files on GitHub, documentation "
+    "pages, JSON/YAML configs. For a full repository prefer "
+    "`git clone` via bash. GitHub blob URLs are auto-rewritten "
+    "to raw URLs so you can paste the human URL and get file "
+    "content. Responses over 5MB are rejected; larger pages are "
+    "truncated to ~100KB of text (fetch a more specific URL for the "
+    "part you need). Returns the body as a string prefixed with "
+    "status + final URL; errors come back as `ERROR: ...` strings, "
+    "not exceptions."
+)
+_URL_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "properties": {
+        "url": {
+            "type": "string",
+            "description": (
+                "Fully-qualified URL to fetch. http:// is "
+                "upgraded to https://; other schemes are rejected."
+            ),
+        }
+    },
+    "required": ["url"],
+}
+# github.com/<owner>/<repo>/blob/<ref>/<path>
+#   → raw.githubusercontent.com/<owner>/<repo>/<ref>/<path>
+# Match the host explicitly so we don't rewrite gitlab/bitbucket/etc.
+_GITHUB_BLOB_RE = re.compile(
+    r"^https?://(?:www\.)?github\.com/"
+    r"(?P<owner>[^/]+)/(?P<repo>[^/]+)/blob/(?P<rest>.+)$"
+)
+# github.com/<owner>/<repo>/tree/<ref>(/<path>)? is a directory page.
+# Fetching it returns ~700kB of React-rendered HTML — pure token
+# waste — instead of the file listing the model usually wants.
+# We refuse and direct the model at the GitHub contents API
+# (returns JSON: file names + download URLs in one call). The
+# `rest` group can be empty (tree at root), so use `.*` not `.+`.
+_GITHUB_TREE_RE = re.compile(
+    r"^https?://(?:www\.)?github\.com/"
+    r"(?P<owner>[^/]+)/(?P<repo>[^/]+)/tree/(?P<rest>.*)$"
+)
+# github.com/<owner>/<repo>(/)? — the REPO ROOT page. Same ~700kB of
+# React HTML as a /tree/ page (the README rendered + the whole file
+# tree + sidebars), and the model usually wants the README or the
+# file listing, not the chrome. Refuse + direct it at the cheap
+# routes. Anchored to end (with optional query/fragment) so it can't
+# swallow /blob/, /tree/, /pull/, /issues/, etc.
+_GITHUB_REPO_ROOT_RE = re.compile(
+    r"^https?://(?:www\.)?github\.com/"
+    r"(?P<owner>[^/?#]+)/(?P<repo>[^/?#]+)/?(?:[?#].*)?$"
+)
+def _normalize_url(url: str) -> tuple[str | None, str | None]:
+    """Return ``(normalized_url, error)``. Exactly one is non-None.
+    Pure function — extracted from the tool body so it can be
+    unit-tested without monkeypatching httpx.
+    """
+    url = url.strip()
+    if not url:
+        return None, "ERROR: empty URL"
+    if url.startswith("http://"):
+        url = "https://" + url[len("http://"):]
+    if not url.startswith("https://"):
+        return None, (
+            f"ERROR: only http(s) URLs are supported, got "
+            f"{url[:60]!r}. For local files use `read`; for "
+            f"shell commands use `bash`."
+        )
+    # /tree/ before /blob/: a tree URL would slip past /blob/
+    # matching anyway, but we want the directive error, not a
+    # silent fallthrough fetch of HTML.
+    m_tree = _GITHUB_TREE_RE.match(url)
+    if m_tree:
+        owner = m_tree["owner"]
+        repo = m_tree["repo"]
+        # `rest` may be "" (tree at root), "<ref>", or "<ref>/<path>".
+        # Split into ref + path so the suggested API URL is correct.
+        parts = m_tree["rest"].split("/", 1)
+        ref = parts[0] if parts and parts[0] else "main"
+        path = parts[1] if len(parts) > 1 else ""
+        api_url = (
+            f"https://api.github.com/repos/{owner}/{repo}/contents/"
+            f"{path}?ref={ref}"
+        )
+        return None, (
+            f"ERROR: {url} is a GitHub DIRECTORY page (React HTML, "
+            f"~700kB). Fetching it wastes tokens; use one of:\n"
+            f"  - LIST contents (JSON): web_fetch {api_url}\n"
+            f"  - LIST via gh CLI:      "
+            f"`bash gh api repos/{owner}/{repo}/contents/{path}?ref={ref}`\n"
+            f"  - FETCH a file:         web_fetch the /blob/ URL "
+            f"(github.com/{owner}/{repo}/blob/{ref}/<path>)\n"
+            f"  - FETCH README:         "
+            f"web_fetch https://github.com/{owner}/{repo}/blob/{ref}/README.md"
+        )
+    m_root = _GITHUB_REPO_ROOT_RE.match(url)
+    if m_root:
+        owner = m_root["owner"]
+        repo = m_root["repo"]
+        return None, (
+            f"ERROR: {url} is a GitHub REPO ROOT page (React HTML, "
+            f"~700kB). Fetching it wastes tokens; to explore the repo "
+            f"use one of:\n"
+            f"  - READ the README:   "
+            f"web_fetch https://github.com/{owner}/{repo}/blob/main/README.md\n"
+            f"  - LIST the root:     "
+            f"web_fetch https://api.github.com/repos/{owner}/{repo}/contents/\n"
+            f"  - LIST a subdir:     web_fetch "
+            f"https://api.github.com/repos/{owner}/{repo}/contents/<dir>\n"
+            f"  - CLONE + explore:   `bash git clone "
+            f"https://github.com/{owner}/{repo} \"$(mktemp -d)/{repo}\"` "
+            f"then inspect with `bash ls`/`bash cat` (your read/grep/ls "
+            f"are scoped to the LOCAL project and can't see the clone)"
+        )
+    m = _GITHUB_BLOB_RE.match(url)
+    if m:
+        url = (
+            f"https://raw.githubusercontent.com/"
+            f"{m['owner']}/{m['repo']}/{m['rest']}"
+        )
+    return url, None
+def _ip_is_blocked(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
+    """True if ``ip`` is a destination web_fetch must NOT reach.
+    Blocks the whole non-public space — loopback (127/8, ::1), link-local
+    (169.254/16 incl. the 169.254.169.254 cloud-metadata endpoint, fe80::/10),
+    RFC-1918 private (10/8, 172.16/12, 192.168/16), unique-local IPv6
+    (fc00::/7), unspecified (0.0.0.0, ::), and other reserved ranges. The
+    ``is_*`` properties on ``ipaddress`` cover each class; we OR them so a
+    new reserved range can't slip through a single missed check."""
+    # IPv4-mapped IPv6 (::ffff:127.0.0.1) would otherwise dodge the v4
+    # checks — unwrap to the embedded v4 address first.
+    if isinstance(ip, ipaddress.IPv6Address) and ip.ipv4_mapped is not None:
+        ip = ip.ipv4_mapped
+    return (
+        ip.is_private
+        or ip.is_loopback
+        or ip.is_link_local
+        or ip.is_unspecified
+        or ip.is_reserved
+        or ip.is_multicast
+    )
+def _host_is_blocked(host: str) -> str | None:
+    """Resolve ``host`` and return a reason string if ANY resolved IP is
+    in a blocked range; ``None`` if every address is a public destination.
+    SSRF defense: an attacker-influenced URL (or a redirect hop) could
+    point at cloud-metadata (169.254.169.254), localhost services, or the
+    LAN. We resolve the name and reject if a literal IP or DNS result lands
+    in non-public space. Resolving (not just string-matching) is required —
+    a hostname can resolve to 127.0.0.1, and a single A record in a blocked
+    range is enough to refuse (no partial trust)."""
+    if not host:
+        return "no host in URL"
+    # Literal IP? Validate directly (covers the obvious 10.x / 127.x cases
+    # and avoids a needless DNS lookup).
+    try:
+        return (
+            f"host {host} resolves to a private/loopback/link-local "
+            f"address — refused (SSRF guard)"
+            if _ip_is_blocked(ipaddress.ip_address(host))
+            else None
+        )
+    except ValueError:
+        pass  # not a literal IP — fall through to DNS resolution
+    try:
+        infos = socket.getaddrinfo(host, None)
+    except socket.gaierror as exc:
+        return f"could not resolve host {host}: {exc}"
+    for info in infos:
+        addr = info[4][0]
+        try:
+            ip = ipaddress.ip_address(addr)
+        except ValueError:
+            continue
+        if _ip_is_blocked(ip):
+            return (
+                f"host {host} resolves to {addr}, a private/loopback/"
+                f"link-local address — refused (SSRF guard)"
+            )
+    return None
+def web_fetch_tool(
+    *,
+    name: str = "web_fetch",
+    timeout: float = 15.0,
+    max_bytes: int = 5_000_000,
+) -> Tool:
+    """Build a :class:`Tool` that fetches an HTTPS URL's body.
+    Args:
+        name: Tool name the model sees (default ``web_fetch``).
+            Overridable mostly for tests / co-existence with
+            other fetch tools.
+        timeout: Per-request timeout in seconds (default 15).
+            Applies to connect + read combined.
+        max_bytes: Reject responses larger than this — keeps an
+            accidental tarball or binary blob from blowing
+            conversation context. Default 5 MB.
+    Returns:
+        A :class:`Tool` named ``web_fetch`` with one ``url: str``
+        parameter, returning the response body prefixed by status
+        code and final URL (after redirects + GitHub rewriting).
+    Example::
+        from loom_code.web_fetch import web_fetch_tool
+        from loomflow import Agent
+        agent = Agent("...", tools=[web_fetch_tool()])
+    """
+    async def _fetch(url: str) -> str:
+        normalized, err = _normalize_url(url)
+        if err is not None:
+            return err
+        # Lazy import — matches the loomflow tool convention so
+        # `import loom_code.web_fetch` doesn't pay the httpx cost.
+        import httpx
+        # Follow redirects MANUALLY so we re-run the SSRF guard on every
+        # hop. With httpx's follow_redirects=True a public URL could 302
+        # to http://169.254.169.254/ and we'd never see the final host —
+        # the guard has to gate each Location, not just the first URL.
+        assert normalized is not None  # err-None branch guarantees this
+        current = normalized
+        try:
+            async with httpx.AsyncClient(
+                follow_redirects=False,
+                timeout=timeout,
+            ) as client:
+                for _ in range(10):  # redirect cap — matches httpx default
+                    host = urlsplit(current).hostname or ""
+                    blocked = _host_is_blocked(host)
+                    if blocked is not None:
+                        return f"ERROR: {blocked}"
+                    r = await client.get(current)
+                    if r.is_redirect and r.headers.get("location"):
+                        # Resolve relative Location against the current URL.
+                        current = str(r.url.join(r.headers["location"]))
+                        continue
+                    break
+                else:
+                    return "ERROR: too many redirects (>10)"
+        except httpx.HTTPError as exc:
+            return f"ERROR: fetch failed: {exc}"
+        # Reject oversized payloads after the fact rather than via
+        # Content-Length — many CDNs don't set it correctly and
+        # we'd rather download-and-reject than incorrectly block a
+        # small response with a bogus header.
+        if len(r.content) > max_bytes:
+            return (
+                f"ERROR: response exceeds {max_bytes} bytes "
+                f"({len(r.content)} actual). For large repos use "
+                f"`git clone` via bash; for partial reads pass a "
+                f"more specific URL."
+            )
+        # Cap the body BEFORE it enters the conversation (where it gets
+        # re-sent every turn). Successful pages truncate at 100KB; error
+        # pages (non-2xx) get a small snippet since the body is an error
+        # page, not content the model needs.
+        body = r.text
+        ok = 200 <= r.status_code < 300
+        cap = _MAX_RESULT_CHARS if ok else _MAX_ERROR_CHARS
+        if len(body) > cap:
+            omitted = len(body) - cap
+            reason = "page too large" if ok else "error page"
+            body = (
+                body[:cap]
+                + f"\n\n… [{reason} — truncated {omitted} of {len(r.text)} "
+                "chars. Fetch a more specific URL, or `git clone` via bash "
+                "for a whole repo.]"
+            )
+        # Render with a small header so the model knows what it
+        # got — the final URL (after redirects + GitHub rewriting)
+        # and status are both load-bearing for follow-ups.
+        return (
+            f"# {r.url}\n"
+            f"status: {r.status_code}\n"
+            f"\n"
+            f"{body}"
+        )
+    return Tool(
+        name=name,
+        description=_TOOL_DESCRIPTION,
+        fn=_fetch,
+        input_schema=_URL_SCHEMA,
+    )