npm - mtrx-cli - Versions diffs - 0.1.25 → 0.1.27 - Mend

mtrx-cli 0.1.25 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +1 -1
package/src/matrx/__init__.py +1 -1
package/src/matrx/cli/cursor_ca.py +257 -39
package/src/matrx/cli/cursor_config.py +14 -1
package/src/matrx/cli/cursor_daemon.py +4 -0
package/src/matrx/cli/cursor_launcher.py +3 -1
package/src/matrx/cli/cursor_proxy.py +412 -166
package/src/matrx/cli/cursor_reroute.py +376 -17
package/src/matrx/cli/launcher.py +47 -1
package/src/matrx/cli/main.py +384 -59
package/src/matrx/cli/state.py +21 -0

package/src/matrx/cli/cursor_proxy.py CHANGED Viewed

@@ -19,23 +19,33 @@ Design choices (informed by cursor-tap):
 from __future__ import annotations
 import asyncio
+import contextlib
 import logging
 import os
 import signal
 import ssl
+import sys
 import time
 import uuid
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any, AsyncGenerator
 import httpx
 from matrx.cli.cursor_ca import CertCache, load_ca
 try:
-    from matrx.cli.cursor_reroute import is_ai_path, try_inject_context, try_reroute_to_matrx
+    from matrx.cli.cursor_reroute import (
+        classify_ai_request,
+        is_ai_path,
+        try_inject_context,
+        try_reroute_to_matrx,
+    )
 except ImportError:
     # Stubs when cursor_reroute not available (e.g. npm package omit).
+    def classify_ai_request(method: str, path: str, headers: dict[str, str] | None = None) -> dict[str, bool]:
+        return {"candidate": False, "reroutable": False}
     def is_ai_path(path: str) -> bool:
         return False
@@ -47,19 +57,74 @@ except ImportError:
 logger = logging.getLogger(__name__)
+class _SuppressAsyncioNoise(logging.Filter):
+    """Suppress known-benign asyncio noise on Windows and SSL connections.
+    Two cases are filtered:
+    1. SSL EOF warning — Python's asyncio SSL transport emits this at WARNING
+       level whenever a remote peer closes the connection.  The return value
+       from eof_received() is silently ignored for SSL connections; harmless
+       and unfixable without rewriting asyncio's SSL transport layer.
+    2. WinError 10054 in _call_connection_lost — Windows ProactorEventLoop
+       calls socket.shutdown(SHUT_RDWR) on already-reset sockets during
+       connection teardown.  Cursor's CodebaseSnapshotService packfile uploads
+       close connections with TCP RST, triggering this path.  The error is
+       logged at ERROR level by asyncio but indicates normal connection teardown;
+       the proxy's own logic is unaffected.
+    """
+    def filter(self, record: logging.LogRecord) -> bool:
+        msg = record.getMessage()
+        if "eof_received" in msg:
+            return False
+        if "_call_connection_lost" in msg and record.exc_info:
+            exc_type = record.exc_info[0]
+            if exc_type is not None and issubclass(exc_type, ConnectionResetError):
+                return False
+        return True
+_MAX_BODY_BYTES = 50 * 1024 * 1024  # 50 MB hard limit for buffered request bodies
 DEFAULT_PORT = 8842
 PROXY_HOST = "127.0.0.1"
 HEALTH_PATH = "/__mtrx_health__"
+def _print_inbox_notification(from_agent_id: str, prompt_preview: str) -> None:
+    """Print a visible terminal notification when an A2A task arrives while idle."""
+    preview_display = f': "{prompt_preview}"' if prompt_preview else ""
+    print(
+        f"\n\033[1m[MTRX]\033[0m \U0001f4ec A2A task from {from_agent_id}{preview_display}"
+        "\n       → Start a new turn to receive and process it.\n",
+        file=sys.stderr,
+        flush=True,
+    )
 # Domains whose TLS we intercept for observability.
 _INTERCEPT_DOMAINS = {
     "api2.cursor.sh",
     "api3.cursor.sh",
     "api4.cursor.sh",
     "api5.cursor.sh",
-    "agentn.global.api5.cursor.sh",
+    "agent.api5.cursor.sh",   # Cloud Agent (privacy mode)
+    "agentn.api5.cursor.sh",  # Cloud Agent (non-privacy mode)
+    "api.anthropic.com",
+    "api.openai.com",
 }
+_PREWARM_DOMAINS = (
+    "api2.cursor.sh",
+    "api3.cursor.sh",
+    "api4.cursor.sh",
+    "api5.cursor.sh",
+    "agent.api5.cursor.sh",
+    "agentn.api5.cursor.sh",
+)
 class MITMProxy:
     """Async MITM forward proxy with telemetry mirroring."""
@@ -71,24 +136,41 @@ class MITMProxy:
         matrx_base_url: str,
         host: str = PROXY_HOST,
         port: int = DEFAULT_PORT,
+        agent_id: str | None = None,
+        group_id: str | None = None,
     ):
         self.matrx_key = matrx_key
         self.matrx_base_url = matrx_base_url.rstrip("/")
         self.host = host
         self.port = port
+        self._agent_id: str | None = agent_id
+        self._group_id: str | None = group_id
         self._server: asyncio.Server | None = None
         self._telemetry_client: httpx.AsyncClient | None = None
         self._cert_cache: CertCache | None = None
         self._request_count = 0
+        self._connect_count = 0
+        self._inbox_poll_task: asyncio.Task | None = None
     async def start(self) -> None:
         ca_key, ca_cert = load_ca()
         self._cert_cache = CertCache(ca_key, ca_cert)
+        self._cert_cache.prewarm(_PREWARM_DOMAINS)
         self._telemetry_client = httpx.AsyncClient(timeout=10)
         self._server = await asyncio.start_server(
             self._handle_client, self.host, self.port
         )
         logger.info("MITM proxy listening on %s:%d", self.host, self.port)
+        if self._agent_id and self._group_id:
+            self._inbox_poll_task = asyncio.create_task(
+                self._run_inbox_poll_loop(),
+                name="mtrx-inbox-poll",
+            )
+            logger.info(
+                "proxy: inbox poller started agent_id=%s group_id=%s",
+                self._agent_id,
+                self._group_id,
+            )
     async def serve_forever(self) -> None:
         if self._server is None:
@@ -98,6 +180,10 @@ class MITMProxy:
             await self._server.serve_forever()
     async def stop(self) -> None:
+        if self._inbox_poll_task and not self._inbox_poll_task.done():
+            self._inbox_poll_task.cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await self._inbox_poll_task
         if self._server:
             self._server.close()
             await self._server.wait_closed()
@@ -108,6 +194,58 @@ class MITMProxy:
     def request_count(self) -> int:
         return self._request_count
+    # -----------------------------------------------------------------
+    # Inbox background poller
+    # -----------------------------------------------------------------
+    async def _run_inbox_poll_loop(self) -> None:
+        """Long-poll /v1/inbox/wait while the proxy is running.
+        When a directed work item arrives for this agent, prints a visible
+        terminal notification so the user knows to give the agent its next
+        turn.  Actual task delivery still happens via the normal injection
+        path (claim_directed_work_for_injection) on the next proxy call —
+        this loop only provides the push notification.
+        """
+        url = f"{self.matrx_base_url}/v1/inbox/wait"
+        params: dict[str, str | int] = {
+            "group_id": str(self._group_id),
+            "timeout_s": 25,
+        }
+        if self._agent_id:
+            params["agent_id"] = self._agent_id
+        headers = {"X-Matrx-Key": self.matrx_key}
+        async with httpx.AsyncClient(timeout=httpx.Timeout(32.0, connect=5.0)) as client:
+            while True:
+                try:
+                    resp = await client.get(url, params=params, headers=headers)
+                    if resp.status_code == 200:
+                        data = resp.json()
+                        if data.get("has_pending"):
+                            from_label = data.get("from_agent_id") or "external"
+                            preview = (data.get("prompt_preview") or "").strip()
+                            _print_inbox_notification(from_label, preview)
+                    elif resp.status_code == 401:
+                        logger.warning("proxy: inbox poller received 401 — stopping")
+                        return
+                    # Any other non-2xx: log at debug and retry after backoff
+                    elif resp.status_code >= 400:
+                        logger.debug(
+                            "proxy: inbox poller got %s, retrying in 10s",
+                            resp.status_code,
+                        )
+                        await asyncio.sleep(10)
+                except asyncio.CancelledError:
+                    return
+                except (httpx.TimeoutException, httpx.ConnectError):
+                    # Timeout is expected (server held 25s with no work).
+                    # ConnectError happens briefly at startup or on network blip.
+                    pass
+                except Exception:
+                    logger.debug("proxy: inbox poller error", exc_info=True)
+                    await asyncio.sleep(5)
     # -----------------------------------------------------------------
     # Connection handling
     # -----------------------------------------------------------------
@@ -122,7 +260,7 @@ class MITMProxy:
         except (ConnectionResetError, BrokenPipeError, asyncio.IncompleteReadError):
             pass
         except Exception:
-            logger.debug("proxy: connection error", exc_info=True)
+            logger.warning("proxy: connection error", exc_info=True)
         finally:
             try:
                 writer.close()
@@ -166,8 +304,10 @@ class MITMProxy:
             await writer.drain()
             if hostname in _INTERCEPT_DOMAINS:
+                logger.info("proxy: CONNECT %s:%d [intercept]", hostname, port)
                 await self._mitm_intercept(reader, writer, hostname, port)
             else:
+                logger.info("proxy: CONNECT %s:%d [tunnel]", hostname, port)
                 await self._tunnel_passthrough(reader, writer, hostname, port)
         elif method in ("GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"):
             # Plain HTTP proxy request (non-CONNECT) -- handle health check
@@ -218,10 +358,22 @@ class MITMProxy:
         port: int,
     ) -> None:
         assert self._cert_cache is not None
+        self._connect_count += 1
+        conn_id = f"{hostname}:{self._connect_count}"
         # Use the hostname from the CONNECT request for the cert
         # (matches SNI in virtually all cases, avoids ClientHello peeking)
+        handshake_info = self._cert_cache.get_handshake_info(hostname)
         server_ctx = self._cert_cache.get_ssl_context(hostname)
+        logger.info(
+            "proxy: tls_prepare conn=%s host=%s serial=%s leaf_sha256=%s chain_len=%s cert=%s",
+            conn_id,
+            hostname,
+            handshake_info["leaf_serial"],
+            handshake_info["leaf_sha256"],
+            handshake_info["chain_length"],
+            handshake_info["cert_path"],
+        )
         # Upgrade client connection to TLS (we are the "server")
         loop = asyncio.get_running_loop()
@@ -232,8 +384,23 @@ class MITMProxy:
                 transport, protocol, server_ctx, server_side=True
             )
         except (ssl.SSLError, ConnectionError) as exc:
-            logger.debug("TLS handshake with client failed for %s: %s", hostname, exc)
+            logger.warning(
+                "TLS handshake with client failed for %s [conn=%s serial=%s leaf_sha256=%s chain_len=%s]: %s",
+                hostname,
+                conn_id,
+                handshake_info["leaf_serial"],
+                handshake_info["leaf_sha256"],
+                handshake_info["chain_length"],
+                exc,
+            )
             return
+        logger.info(
+            "proxy: tls_ready conn=%s host=%s serial=%s chain_len=%s",
+            conn_id,
+            hostname,
+            handshake_info["leaf_serial"],
+            handshake_info["chain_length"],
+        )
         tls_writer = asyncio.StreamWriter(new_transport, protocol, client_reader, loop)
@@ -246,6 +413,11 @@ class MITMProxy:
             )
         except Exception:
             logger.debug("Failed to connect to upstream %s:%d", hostname, port)
+            try:
+                tls_writer.write(b"HTTP/1.1 502 Bad Gateway\r\nContent-Length: 0\r\n\r\n")
+                await tls_writer.drain()
+            except Exception:
+                pass
             return
         # Forward HTTP/1.1 traffic between decrypted client and upstream
@@ -280,9 +452,11 @@ class MITMProxy:
         while True:
             req_line = await client_reader.readline()
             if not req_line:
+                logger.info("proxy: %s — connection closed (no request line)", hostname)
                 break
             req_line_str = req_line.decode("utf-8", errors="replace").strip()
             if not req_line_str:
+                logger.info("proxy: %s — empty request line", hostname)
                 break
             parts = req_line_str.split(" ", 2)
@@ -290,64 +464,109 @@ class MITMProxy:
             path = parts[1] if len(parts) > 1 else "/"
             req_body_size = 0
-            _is_ai_req = method == "POST" and is_ai_path(path)
-            _req_session_id = str(uuid.uuid4()) if _is_ai_req else ""
-            # For AI paths: buffer request and try rerouting through MTRX (live injection)
-            if _is_ai_req:
+            _is_ai_req = False
+            _req_session_id = ""
+            req_headers: dict[str, str]
+            req_cl: int
+            req_chunked: bool
+            if method == "POST":
                 req_headers, req_cl, req_chunked = await self._read_headers_only(
                     client_reader
                 )
-                req_body = await self._read_body_to_bytes(
-                    client_reader, req_cl, req_chunked
-                )
-                req_body_size = len(req_body)
-                result = await try_reroute_to_matrx(
-                    path=path,
-                    method=method,
-                    req_headers=req_headers,
-                    req_body=req_body,
-                    matrx_base_url=self.matrx_base_url,
-                    matrx_key=self.matrx_key,
-                    session_id=_req_session_id,
+                ai_classification = classify_ai_request(method, path, req_headers)
+                _is_ai_req = ai_classification["candidate"]
+                _is_ai_reroutable = ai_classification["reroutable"]
+                _req_session_id = str(uuid.uuid4()) if _is_ai_req else ""
+                logger.info(
+                    "proxy: %s %s%s [ai=%s reroutable=%s ct=%s]",
+                    method,
+                    hostname,
+                    path,
+                    _is_ai_req,
+                    _is_ai_reroutable,
+                    req_headers.get("content-type", ""),
                 )
-                if result is not None:
-                    success, resp_headers, resp_body, is_streaming = result
-                    if success and resp_body is not None:
-                        self._request_count += 1
-                        self._write_http_response(
-                            client_writer, 200, resp_headers, resp_body
+                if _is_ai_req and not _is_ai_reroutable and "aiserver.v1." in path.lower():
+                    logger.info("proxy: candidate AI request not yet reroutable: %s%s", hostname, path)
+                # For AI paths: buffer request and try rerouting through MTRX (live injection)
+                if _is_ai_req:
+                    try:
+                        req_body = await self._read_body_to_bytes(
+                            client_reader, req_cl, req_chunked
                         )
-                        asyncio.create_task(
-                            self._ship_telemetry(
-                                hostname=hostname,
-                                method=method,
-                                path=path,
-                                status_code=200,
-                                req_body_size=len(req_body),
-                                resp_body_size=len(resp_body),
-                                elapsed_ms=0,
-                                content_type=resp_headers.get("content-type", ""),
-                                is_streaming=is_streaming,
+                    except ValueError:
+                        client_writer.write(b"HTTP/1.1 413 Content Too Large\r\nContent-Length: 0\r\n\r\n")
+                        await client_writer.drain()
+                        return
+                    req_body_size = len(req_body)
+                    result = await try_reroute_to_matrx(
+                        path=path,
+                        method=method,
+                        req_headers=req_headers,
+                        req_body=req_body,
+                        matrx_base_url=self.matrx_base_url,
+                        matrx_key=self.matrx_key,
+                        session_id=_req_session_id,
+                    )
+                    if result is not None:
+                        success, resp_headers, resp_body, is_streaming = result
+                        if success and resp_body is not None:
+                            self._request_count += 1
+                            if hasattr(resp_body, "__aiter__"):
+                                # Streaming generator: write chunked HTTP response
+                                resp_body_size = await self._write_chunked_reroute_response(
+                                    client_writer, resp_headers, resp_body
+                                )
+                            else:
+                                # Buffered bytes: write with content-length
+                                self._write_http_response(
+                                    client_writer, 200, resp_headers, resp_body
+                                )
+                                await client_writer.drain()
+                                resp_body_size = len(resp_body)
+                            asyncio.create_task(
+                                self._ship_telemetry(
+                                    hostname=hostname,
+                                    method=method,
+                                    path=path,
+                                    status_code=200,
+                                    req_body_size=len(req_body),
+                                    resp_body_size=resp_body_size,
+                                    elapsed_ms=0,
+                                    content_type=resp_headers.get("content-type", ""),
+                                    is_streaming=is_streaming,
+                                )
                             )
-                        )
-                        continue
-                    # Reroute returned but failed — fall through to forward
-                # Inject MTRX memory context into request before forwarding
-                injected_body = await try_inject_context(
-                    req_body=req_body,
-                    req_headers=req_headers,
-                    matrx_base_url=self.matrx_base_url,
-                    matrx_key=self.matrx_key,
-                    session_id=_req_session_id,
-                )
-                body_to_forward = injected_body if injected_body is not None else req_body
-                fwd_headers = dict(req_headers)
-                fwd_headers["content-length"] = str(len(body_to_forward))
-                up_writer.write(req_line)
-                await self._write_headers(up_writer, fwd_headers)
-                up_writer.write(body_to_forward)
-                await up_writer.drain()
+                            continue
+                        # Reroute returned but failed — fall through to forward
+                    # Inject MTRX memory context into request before forwarding
+                    injected_body = await try_inject_context(
+                        req_body=req_body,
+                        req_headers=req_headers,
+                        matrx_base_url=self.matrx_base_url,
+                        matrx_key=self.matrx_key,
+                        session_id=_req_session_id,
+                    )
+                    body_to_forward = injected_body if injected_body is not None else req_body
+                    fwd_headers = dict(req_headers)
+                    fwd_headers.pop("transfer-encoding", None)  # remove chunked before setting content-length
+                    fwd_headers["content-length"] = str(len(body_to_forward))
+                    up_writer.write(req_line)
+                    self._write_headers(up_writer, fwd_headers)
+                    up_writer.write(body_to_forward)
+                    await up_writer.drain()
+                else:
+                    up_writer.write(req_line)
+                    self._write_headers(up_writer, req_headers)
+                    req_body_size = await self._forward_body(
+                        client_reader, up_writer, req_cl, req_chunked
+                    )
+                    if req_body_size == 0 and req_cl > 0:
+                        req_body_size = req_cl
             else:
+                logger.info("proxy: %s %s%s [ai=%s]", method, hostname, path, False)
                 up_writer.write(req_line)
                 req_headers, req_cl, req_chunked = await self._forward_headers(
                     client_reader, up_writer
@@ -402,24 +621,22 @@ class MITMProxy:
             elapsed_ms = int((time.monotonic() - started) * 1000)
             self._request_count += 1
-            asyncio.create_task(
-                self._ship_telemetry(
-                    hostname=hostname,
-                    method=method,
-                    path=path,
-                    status_code=status_code,
-                    req_body_size=req_body_size,
-                    resp_body_size=resp_body_size,
-                    elapsed_ms=elapsed_ms,
-                    content_type=content_type,
-                    is_streaming=is_streaming,
+            if _is_ai_req:  # backend rejects telemetry for non-AI infrastructure paths
+                asyncio.create_task(
+                    self._ship_telemetry(
+                        hostname=hostname,
+                        method=method,
+                        path=path,
+                        status_code=status_code,
+                        req_body_size=req_body_size,
+                        resp_body_size=resp_body_size,
+                        elapsed_ms=elapsed_ms,
+                        content_type=content_type,
+                        is_streaming=is_streaming,
+                    )
                 )
-            )
-            conn_h = (
-                req_headers.get("connection", "")
-                + resp_headers.get("connection", "")
-            ).lower()
+            conn_h = resp_headers.get("connection", "").lower()
             if "close" in conn_h:
                 break
@@ -487,7 +704,25 @@ class MITMProxy:
                 await writer.drain()
             return total, b"".join(parts)
-        return 0, b""
+        # No content-length, no chunked encoding — stream until the upstream closes.
+        # This covers Cursor's SSE AI responses that use raw HTTP/1.1 keep-alive streaming.
+        # Cap capture at 512 KB to bound memory; bytes beyond that are still forwarded.
+        _CAPTURE_LIMIT = 512 * 1024
+        parts = []
+        total = 0
+        capturing = True
+        while True:
+            chunk = await reader.read(65536)
+            if not chunk:
+                break
+            writer.write(chunk)
+            await writer.drain()
+            total += len(chunk)
+            if capturing:
+                parts.append(chunk)
+                if total >= _CAPTURE_LIMIT:
+                    capturing = False
+        return total, b"".join(parts)
     async def _extract_ai_response(
         self,
@@ -497,110 +732,63 @@ class MITMProxy:
     ) -> None:
         """Parse Connect frames from *resp_bytes* and ship response telemetry.
+        Tries compiled proto parsing first; falls back to raw wire-format parsing
+        so token counts are always extracted even without compiled proto files.
         Fire-and-forget — never raises, never blocks the forward path.
         """
         try:
-            from matrx.cli.cursor_connect import parse_all_frames
-            from matrx.cli.cursor_extraction import (
-                extract_from_response_frame,
-                parse_response_proto,
-                ship_ai_telemetry,
-            )
+            from matrx.cli.cursor_extraction import ship_ai_telemetry
+            import gzip as _gzip
+            body = resp_bytes
+            if len(body) >= 2 and body[:2] == b"\x1f\x8b":
+                try:
+                    body = _gzip.decompress(body)
+                except Exception:
+                    body = resp_bytes
-            frames = parse_all_frames(resp_bytes)
             accumulated: dict = {
                 "session_id": session_id,
                 "response_text": "",
                 "tool_calls": [],
                 "usage": None,
             }
-            for flags, payload in frames:
-                if flags == 0x02:  # end-of-stream trailer — stop
-                    break
-                resp_proto = parse_response_proto(payload)
-                frame_data = extract_from_response_frame(resp_proto)
-                if frame_data:
-                    accumulated["response_text"] = (
-                        accumulated.get("response_text", "") + frame_data.get("text", "")
-                    )
-                    accumulated["tool_calls"].extend(frame_data.get("tool_calls", []))
-                    if frame_data.get("usage"):
-                        accumulated["usage"] = frame_data["usage"]
+            if hostname == "api.anthropic.com":
+                from matrx.cli.cursor_extraction import extract_from_anthropic_sse_response
+                frame_data = extract_from_anthropic_sse_response(body)
+                accumulated["response_text"] = frame_data.get("text", "")
+                accumulated["tool_calls"] = frame_data.get("tool_calls", [])
+                accumulated["usage"] = frame_data.get("usage")
+            elif hostname == "api.openai.com":
+                from matrx.cli.cursor_extraction import extract_from_openai_sse_response
+                frame_data = extract_from_openai_sse_response(body)
+                accumulated["response_text"] = frame_data.get("text", "")
+                accumulated["tool_calls"] = frame_data.get("tool_calls", [])
+                accumulated["usage"] = frame_data.get("usage")
+            else:
+                # Cursor backend: Connect/gRPC protobuf frames
+                from matrx.cli.cursor_connect import parse_all_frames
+                from matrx.cli.cursor_extraction import (
+                    _raw_extract_response_frame,
+                    extract_from_response_frame,
+                    parse_response_proto,
+                )
+                for flags, payload in parse_all_frames(body):
+                    if flags == 0x02:
+                        break
+                    resp_proto = parse_response_proto(payload)
+                    frame_data = extract_from_response_frame(resp_proto) if resp_proto is not None else _raw_extract_response_frame(payload)
+                    if frame_data:
+                        accumulated["response_text"] += frame_data.get("text", "")
+                        accumulated["tool_calls"].extend(frame_data.get("tool_calls", []))
+                        if frame_data.get("usage"):
+                            accumulated["usage"] = frame_data["usage"]
             await ship_ai_telemetry(accumulated, self.matrx_base_url, self.matrx_key)
         except Exception:
             logger.debug("proxy: _extract_ai_response failed", exc_info=True)
-    async def _read_headers_only(
-        self, reader: asyncio.StreamReader
-    ) -> tuple[dict[str, str], int, bool]:
-        """Read headers without writing. Returns (headers_dict, content_length, is_chunked)."""
-        headers: dict[str, str] = {}
-        content_length = -1
-        chunked = False
-        while True:
-            line = await reader.readline()
-            decoded = line.decode("utf-8", errors="replace").strip()
-            if not decoded:
-                break
-            if ":" in decoded:
-                k, _, v = decoded.partition(":")
-                k_lower = k.strip().lower()
-                v_stripped = v.strip()
-                headers[k_lower] = v_stripped
-                if k_lower == "content-length":
-                    content_length = int(v_stripped)
-                elif k_lower == "transfer-encoding" and "chunked" in v_stripped.lower():
-                    chunked = True
-        return headers, content_length, chunked
-    async def _read_body_to_bytes(
-        self,
-        reader: asyncio.StreamReader,
-        content_length: int,
-        chunked: bool,
-    ) -> bytes:
-        """Read body into bytes (no writer)."""
-        if content_length > 0:
-            return await reader.read(content_length)
-        if chunked:
-            parts: list[bytes] = []
-            while True:
-                size_line = await reader.readline()
-                size_str = size_line.decode("utf-8", errors="replace").strip()
-                try:
-                    chunk_size = int(size_str.split(";")[0], 16)
-                except ValueError:
-                    break
-                if chunk_size == 0:
-                    await reader.readline()  # trailer
-                    break
-                parts.append(await reader.read(chunk_size))
-                await reader.readline()  # crlf
-            return b"".join(parts)
-        return b""
-    def _write_headers(
-        self, writer: asyncio.StreamWriter, headers: dict[str, str]
-    ) -> None:
-        """Write headers as HTTP lines (caller must drain)."""
-        for k, v in headers.items():
-            writer.write(f"{k}: {v}\r\n".encode())
-        writer.write(b"\r\n")
-    def _write_http_response(
-        self,
-        writer: asyncio.StreamWriter,
-        status: int,
-        resp_headers: dict[str, str],
-        resp_body: bytes,
-    ) -> None:
-        """Write a complete HTTP response."""
-        writer.write(f"HTTP/1.1 {status} OK\r\n".encode())
-        self._write_headers(writer, resp_headers)
-        writer.write(resp_body)
-        # Caller should drain
     async def _forward_headers(
         self,
         reader: asyncio.StreamReader,
@@ -625,7 +813,10 @@ class MITMProxy:
                 v_stripped = v.strip()
                 headers[k_lower] = v_stripped
                 if k_lower == "content-length":
-                    content_length = int(v_stripped)
+                    try:
+                        content_length = int(v_stripped)
+                    except ValueError:
+                        pass
                 elif k_lower == "transfer-encoding" and "chunked" in v_stripped.lower():
                     chunked = True
         await writer.drain()
@@ -717,7 +908,10 @@ class MITMProxy:
                 v_stripped = v.strip()
                 headers[k_lower] = v_stripped
                 if k_lower == "content-length":
-                    content_length = int(v_stripped)
+                    try:
+                        content_length = int(v_stripped)
+                    except ValueError:
+                        pass
                 elif k_lower == "transfer-encoding" and "chunked" in v_stripped.lower():
                     chunked = True
         return headers, content_length, chunked
@@ -730,6 +924,8 @@ class MITMProxy:
     ) -> bytes:
         """Read body into bytes."""
         if content_length > 0:
+            if content_length > _MAX_BODY_BYTES:
+                raise ValueError(f"Request body too large: {content_length} bytes")
             return await reader.readexactly(content_length)
         if chunked:
             parts: list[bytes] = []
@@ -774,6 +970,38 @@ class MITMProxy:
         writer.write(body)
         # Note: drain is caller's responsibility
+    async def _write_chunked_reroute_response(
+        self,
+        writer: asyncio.StreamWriter,
+        headers: dict[str, str],
+        frames: AsyncGenerator[bytes, None],
+    ) -> int:
+        """Write an HTTP/1.1 chunked-encoded response by iterating a Connect-frame generator.
+        Each Connect frame from the generator becomes one chunk.  The response ends
+        with the mandatory zero-length chunk terminator.  Returns total payload bytes
+        written (for telemetry).
+        """
+        writer.write(b"HTTP/1.1 200 OK\r\n")
+        merged = dict(headers)
+        merged["transfer-encoding"] = "chunked"
+        self._write_headers(writer, merged)
+        await writer.drain()
+        total = 0
+        async for chunk in frames:
+            if not chunk:
+                continue
+            writer.write(f"{len(chunk):x}\r\n".encode())
+            writer.write(chunk)
+            writer.write(b"\r\n")
+            await writer.drain()
+            total += len(chunk)
+        writer.write(b"0\r\n\r\n")
+        await writer.drain()
+        return total
     # -----------------------------------------------------------------
     # Raw bidirectional pipe (for opaque tunnels)
     # -----------------------------------------------------------------
@@ -820,7 +1048,7 @@ class MITMProxy:
         content_type: str,
         is_streaming: bool,
     ) -> None:
-        if self._telemetry_client is None:
+        if self._telemetry_client is None or not self.matrx_key:
             return
         payload = {
@@ -837,11 +1065,19 @@ class MITMProxy:
         }
         url = f"{self.matrx_base_url}/v1/telemetry/cursor"
         try:
-            await self._telemetry_client.post(
+            resp = await self._telemetry_client.post(
                 url,
                 json=payload,
                 headers={"X-Matrx-Key": self.matrx_key},
             )
+            if resp.status_code >= 400:
+                logger.warning(
+                    "telemetry: %s from %s (key=%s... path=%s)",
+                    resp.status_code,
+                    url,
+                    self.matrx_key[:8],
+                    path,
+                )
         except Exception:
             logger.debug("telemetry ship failed", exc_info=True)
@@ -857,8 +1093,16 @@ def run_proxy(
     host: str = PROXY_HOST,
     port: int = DEFAULT_PORT,
     pid_file: Path | None = None,
+    agent_id: str | None = None,
+    group_id: str | None = None,
 ) -> None:
     """Run the MITM proxy (blocking).  Intended for daemon/service use."""
+    logging.getLogger("asyncio").addFilter(_SuppressAsyncioNoise())
+    # Allow agent/group identity to come from environment when not explicitly set
+    agent_id = agent_id or os.environ.get("MTRX_AGENT_ID") or None
+    group_id = group_id or os.environ.get("MTRX_GROUP_ID") or None
     if pid_file:
         pid_file.parent.mkdir(parents=True, exist_ok=True)
         pid_file.write_text(str(os.getpid()), encoding="utf-8")
@@ -868,6 +1112,8 @@ def run_proxy(
         matrx_base_url=matrx_base_url,
         host=host,
         port=port,
+        agent_id=agent_id,
+        group_id=group_id,
     )
     loop = asyncio.new_event_loop()