npm - mtrx-cli - Versions diffs - 0.1.25 → 0.1.26 - Mend

mtrx-cli 0.1.25 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/src/matrx/__init__.py +1 -1
package/src/matrx/cli/cursor_ca.py +184 -34
package/src/matrx/cli/cursor_config.py +9 -0
package/src/matrx/cli/cursor_launcher.py +3 -1
package/src/matrx/cli/cursor_proxy.py +218 -150
package/src/matrx/cli/cursor_reroute.py +323 -13
package/src/matrx/cli/launcher.py +4 -0
package/src/matrx/cli/main.py +384 -59
package/src/matrx/cli/state.py +11 -0

package/src/matrx/cli/cursor_proxy.py CHANGED Viewed

@@ -33,9 +33,17 @@ import httpx
 from matrx.cli.cursor_ca import CertCache, load_ca
 try:
-    from matrx.cli.cursor_reroute import is_ai_path, try_inject_context, try_reroute_to_matrx
+    from matrx.cli.cursor_reroute import (
+        classify_ai_request,
+        is_ai_path,
+        try_inject_context,
+        try_reroute_to_matrx,
+    )
 except ImportError:
     # Stubs when cursor_reroute not available (e.g. npm package omit).
+    def classify_ai_request(method: str, path: str, headers: dict[str, str] | None = None) -> dict[str, bool]:
+        return {"candidate": False, "reroutable": False}
     def is_ai_path(path: str) -> bool:
         return False
@@ -47,6 +55,8 @@ except ImportError:
 logger = logging.getLogger(__name__)
+_MAX_BODY_BYTES = 50 * 1024 * 1024  # 50 MB hard limit for buffered request bodies
 DEFAULT_PORT = 8842
 PROXY_HOST = "127.0.0.1"
 HEALTH_PATH = "/__mtrx_health__"
@@ -58,8 +68,17 @@ _INTERCEPT_DOMAINS = {
     "api4.cursor.sh",
     "api5.cursor.sh",
     "agentn.global.api5.cursor.sh",
+    "api.anthropic.com",
+    "api.openai.com",
 }
+_PREWARM_DOMAINS = (
+    "api2.cursor.sh",
+    "api3.cursor.sh",
+    "api4.cursor.sh",
+    "api5.cursor.sh",
+)
 class MITMProxy:
     """Async MITM forward proxy with telemetry mirroring."""
@@ -80,10 +99,12 @@ class MITMProxy:
         self._telemetry_client: httpx.AsyncClient | None = None
         self._cert_cache: CertCache | None = None
         self._request_count = 0
+        self._connect_count = 0
     async def start(self) -> None:
         ca_key, ca_cert = load_ca()
         self._cert_cache = CertCache(ca_key, ca_cert)
+        self._cert_cache.prewarm(_PREWARM_DOMAINS)
         self._telemetry_client = httpx.AsyncClient(timeout=10)
         self._server = await asyncio.start_server(
             self._handle_client, self.host, self.port
@@ -122,7 +143,7 @@ class MITMProxy:
         except (ConnectionResetError, BrokenPipeError, asyncio.IncompleteReadError):
             pass
         except Exception:
-            logger.debug("proxy: connection error", exc_info=True)
+            logger.warning("proxy: connection error", exc_info=True)
         finally:
             try:
                 writer.close()
@@ -166,8 +187,10 @@ class MITMProxy:
             await writer.drain()
             if hostname in _INTERCEPT_DOMAINS:
+                logger.info("proxy: CONNECT %s:%d [intercept]", hostname, port)
                 await self._mitm_intercept(reader, writer, hostname, port)
             else:
+                logger.info("proxy: CONNECT %s:%d [tunnel]", hostname, port)
                 await self._tunnel_passthrough(reader, writer, hostname, port)
         elif method in ("GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"):
             # Plain HTTP proxy request (non-CONNECT) -- handle health check
@@ -218,10 +241,22 @@ class MITMProxy:
         port: int,
     ) -> None:
         assert self._cert_cache is not None
+        self._connect_count += 1
+        conn_id = f"{hostname}:{self._connect_count}"
         # Use the hostname from the CONNECT request for the cert
         # (matches SNI in virtually all cases, avoids ClientHello peeking)
+        handshake_info = self._cert_cache.get_handshake_info(hostname)
         server_ctx = self._cert_cache.get_ssl_context(hostname)
+        logger.info(
+            "proxy: tls_prepare conn=%s host=%s serial=%s leaf_sha256=%s chain_len=%s cert=%s",
+            conn_id,
+            hostname,
+            handshake_info["leaf_serial"],
+            handshake_info["leaf_sha256"],
+            handshake_info["chain_length"],
+            handshake_info["cert_path"],
+        )
         # Upgrade client connection to TLS (we are the "server")
         loop = asyncio.get_running_loop()
@@ -232,8 +267,23 @@ class MITMProxy:
                 transport, protocol, server_ctx, server_side=True
             )
         except (ssl.SSLError, ConnectionError) as exc:
-            logger.debug("TLS handshake with client failed for %s: %s", hostname, exc)
+            logger.warning(
+                "TLS handshake with client failed for %s [conn=%s serial=%s leaf_sha256=%s chain_len=%s]: %s",
+                hostname,
+                conn_id,
+                handshake_info["leaf_serial"],
+                handshake_info["leaf_sha256"],
+                handshake_info["chain_length"],
+                exc,
+            )
             return
+        logger.info(
+            "proxy: tls_ready conn=%s host=%s serial=%s chain_len=%s",
+            conn_id,
+            hostname,
+            handshake_info["leaf_serial"],
+            handshake_info["chain_length"],
+        )
         tls_writer = asyncio.StreamWriter(new_transport, protocol, client_reader, loop)
@@ -246,6 +296,11 @@ class MITMProxy:
             )
         except Exception:
             logger.debug("Failed to connect to upstream %s:%d", hostname, port)
+            try:
+                tls_writer.write(b"HTTP/1.1 502 Bad Gateway\r\nContent-Length: 0\r\n\r\n")
+                await tls_writer.drain()
+            except Exception:
+                pass
             return
         # Forward HTTP/1.1 traffic between decrypted client and upstream
@@ -280,9 +335,11 @@ class MITMProxy:
         while True:
             req_line = await client_reader.readline()
             if not req_line:
+                logger.info("proxy: %s — connection closed (no request line)", hostname)
                 break
             req_line_str = req_line.decode("utf-8", errors="replace").strip()
             if not req_line_str:
+                logger.info("proxy: %s — empty request line", hostname)
                 break
             parts = req_line_str.split(" ", 2)
@@ -290,64 +347,99 @@ class MITMProxy:
             path = parts[1] if len(parts) > 1 else "/"
             req_body_size = 0
-            _is_ai_req = method == "POST" and is_ai_path(path)
-            _req_session_id = str(uuid.uuid4()) if _is_ai_req else ""
-            # For AI paths: buffer request and try rerouting through MTRX (live injection)
-            if _is_ai_req:
+            _is_ai_req = False
+            _req_session_id = ""
+            req_headers: dict[str, str]
+            req_cl: int
+            req_chunked: bool
+            if method == "POST":
                 req_headers, req_cl, req_chunked = await self._read_headers_only(
                     client_reader
                 )
-                req_body = await self._read_body_to_bytes(
-                    client_reader, req_cl, req_chunked
+                ai_classification = classify_ai_request(method, path, req_headers)
+                _is_ai_req = ai_classification["candidate"]
+                _is_ai_reroutable = ai_classification["reroutable"]
+                _req_session_id = str(uuid.uuid4()) if _is_ai_req else ""
+                logger.info(
+                    "proxy: %s %s%s [ai=%s reroutable=%s ct=%s]",
+                    method,
+                    hostname,
+                    path,
+                    _is_ai_req,
+                    _is_ai_reroutable,
+                    req_headers.get("content-type", ""),
                 )
-                req_body_size = len(req_body)
-                result = await try_reroute_to_matrx(
-                    path=path,
-                    method=method,
-                    req_headers=req_headers,
-                    req_body=req_body,
-                    matrx_base_url=self.matrx_base_url,
-                    matrx_key=self.matrx_key,
-                    session_id=_req_session_id,
-                )
-                if result is not None:
-                    success, resp_headers, resp_body, is_streaming = result
-                    if success and resp_body is not None:
-                        self._request_count += 1
-                        self._write_http_response(
-                            client_writer, 200, resp_headers, resp_body
+                if _is_ai_req and not _is_ai_reroutable and "aiserver.v1." in path.lower():
+                    logger.info("proxy: candidate AI request not yet reroutable: %s%s", hostname, path)
+                # For AI paths: buffer request and try rerouting through MTRX (live injection)
+                if _is_ai_req:
+                    try:
+                        req_body = await self._read_body_to_bytes(
+                            client_reader, req_cl, req_chunked
                         )
-                        asyncio.create_task(
-                            self._ship_telemetry(
-                                hostname=hostname,
-                                method=method,
-                                path=path,
-                                status_code=200,
-                                req_body_size=len(req_body),
-                                resp_body_size=len(resp_body),
-                                elapsed_ms=0,
-                                content_type=resp_headers.get("content-type", ""),
-                                is_streaming=is_streaming,
+                    except ValueError:
+                        client_writer.write(b"HTTP/1.1 413 Content Too Large\r\nContent-Length: 0\r\n\r\n")
+                        await client_writer.drain()
+                        return
+                    req_body_size = len(req_body)
+                    result = await try_reroute_to_matrx(
+                        path=path,
+                        method=method,
+                        req_headers=req_headers,
+                        req_body=req_body,
+                        matrx_base_url=self.matrx_base_url,
+                        matrx_key=self.matrx_key,
+                        session_id=_req_session_id,
+                    )
+                    if result is not None:
+                        success, resp_headers, resp_body, is_streaming = result
+                        if success and resp_body is not None:
+                            self._request_count += 1
+                            self._write_http_response(
+                                client_writer, 200, resp_headers, resp_body
                             )
-                        )
-                        continue
-                    # Reroute returned but failed — fall through to forward
-                # Inject MTRX memory context into request before forwarding
-                injected_body = await try_inject_context(
-                    req_body=req_body,
-                    req_headers=req_headers,
-                    matrx_base_url=self.matrx_base_url,
-                    matrx_key=self.matrx_key,
-                    session_id=_req_session_id,
-                )
-                body_to_forward = injected_body if injected_body is not None else req_body
-                fwd_headers = dict(req_headers)
-                fwd_headers["content-length"] = str(len(body_to_forward))
-                up_writer.write(req_line)
-                await self._write_headers(up_writer, fwd_headers)
-                up_writer.write(body_to_forward)
-                await up_writer.drain()
+                            asyncio.create_task(
+                                self._ship_telemetry(
+                                    hostname=hostname,
+                                    method=method,
+                                    path=path,
+                                    status_code=200,
+                                    req_body_size=len(req_body),
+                                    resp_body_size=len(resp_body),
+                                    elapsed_ms=0,
+                                    content_type=resp_headers.get("content-type", ""),
+                                    is_streaming=is_streaming,
+                                )
+                            )
+                            continue
+                        # Reroute returned but failed — fall through to forward
+                    # Inject MTRX memory context into request before forwarding
+                    injected_body = await try_inject_context(
+                        req_body=req_body,
+                        req_headers=req_headers,
+                        matrx_base_url=self.matrx_base_url,
+                        matrx_key=self.matrx_key,
+                        session_id=_req_session_id,
+                    )
+                    body_to_forward = injected_body if injected_body is not None else req_body
+                    fwd_headers = dict(req_headers)
+                    fwd_headers["content-length"] = str(len(body_to_forward))
+                    up_writer.write(req_line)
+                    self._write_headers(up_writer, fwd_headers)
+                    up_writer.write(body_to_forward)
+                    await up_writer.drain()
+                else:
+                    up_writer.write(req_line)
+                    self._write_headers(up_writer, req_headers)
+                    req_body_size = await self._forward_body(
+                        client_reader, up_writer, req_cl, req_chunked
+                    )
+                    if req_body_size == 0 and req_cl > 0:
+                        req_body_size = req_cl
             else:
+                logger.info("proxy: %s %s%s [ai=%s]", method, hostname, path, False)
                 up_writer.write(req_line)
                 req_headers, req_cl, req_chunked = await self._forward_headers(
                     client_reader, up_writer
@@ -416,10 +508,7 @@ class MITMProxy:
                 )
             )
-            conn_h = (
-                req_headers.get("connection", "")
-                + resp_headers.get("connection", "")
-            ).lower()
+            conn_h = resp_headers.get("connection", "").lower()
             if "close" in conn_h:
                 break
@@ -487,7 +576,25 @@ class MITMProxy:
                 await writer.drain()
             return total, b"".join(parts)
-        return 0, b""
+        # No content-length, no chunked encoding — stream until the upstream closes.
+        # This covers Cursor's SSE AI responses that use raw HTTP/1.1 keep-alive streaming.
+        # Cap capture at 512 KB to bound memory; bytes beyond that are still forwarded.
+        _CAPTURE_LIMIT = 512 * 1024
+        parts = []
+        total = 0
+        capturing = True
+        while True:
+            chunk = await reader.read(65536)
+            if not chunk:
+                break
+            writer.write(chunk)
+            await writer.drain()
+            total += len(chunk)
+            if capturing:
+                parts.append(chunk)
+                if total >= _CAPTURE_LIMIT:
+                    capturing = False
+        return total, b"".join(parts)
     async def _extract_ai_response(
         self,
@@ -497,110 +604,63 @@ class MITMProxy:
     ) -> None:
         """Parse Connect frames from *resp_bytes* and ship response telemetry.
+        Tries compiled proto parsing first; falls back to raw wire-format parsing
+        so token counts are always extracted even without compiled proto files.
         Fire-and-forget — never raises, never blocks the forward path.
         """
         try:
-            from matrx.cli.cursor_connect import parse_all_frames
-            from matrx.cli.cursor_extraction import (
-                extract_from_response_frame,
-                parse_response_proto,
-                ship_ai_telemetry,
-            )
+            from matrx.cli.cursor_extraction import ship_ai_telemetry
+            import gzip as _gzip
+            body = resp_bytes
+            if len(body) >= 2 and body[:2] == b"\x1f\x8b":
+                try:
+                    body = _gzip.decompress(body)
+                except Exception:
+                    body = resp_bytes
-            frames = parse_all_frames(resp_bytes)
             accumulated: dict = {
                 "session_id": session_id,
                 "response_text": "",
                 "tool_calls": [],
                 "usage": None,
             }
-            for flags, payload in frames:
-                if flags == 0x02:  # end-of-stream trailer — stop
-                    break
-                resp_proto = parse_response_proto(payload)
-                frame_data = extract_from_response_frame(resp_proto)
-                if frame_data:
-                    accumulated["response_text"] = (
-                        accumulated.get("response_text", "") + frame_data.get("text", "")
-                    )
-                    accumulated["tool_calls"].extend(frame_data.get("tool_calls", []))
-                    if frame_data.get("usage"):
-                        accumulated["usage"] = frame_data["usage"]
+            if hostname == "api.anthropic.com":
+                from matrx.cli.cursor_extraction import extract_from_anthropic_sse_response
+                frame_data = extract_from_anthropic_sse_response(body)
+                accumulated["response_text"] = frame_data.get("text", "")
+                accumulated["tool_calls"] = frame_data.get("tool_calls", [])
+                accumulated["usage"] = frame_data.get("usage")
+            elif hostname == "api.openai.com":
+                from matrx.cli.cursor_extraction import extract_from_openai_sse_response
+                frame_data = extract_from_openai_sse_response(body)
+                accumulated["response_text"] = frame_data.get("text", "")
+                accumulated["tool_calls"] = frame_data.get("tool_calls", [])
+                accumulated["usage"] = frame_data.get("usage")
+            else:
+                # Cursor backend: Connect/gRPC protobuf frames
+                from matrx.cli.cursor_connect import parse_all_frames
+                from matrx.cli.cursor_extraction import (
+                    _raw_extract_response_frame,
+                    extract_from_response_frame,
+                    parse_response_proto,
+                )
+                for flags, payload in parse_all_frames(body):
+                    if flags == 0x02:
+                        break
+                    resp_proto = parse_response_proto(payload)
+                    frame_data = extract_from_response_frame(resp_proto) if resp_proto is not None else _raw_extract_response_frame(payload)
+                    if frame_data:
+                        accumulated["response_text"] += frame_data.get("text", "")
+                        accumulated["tool_calls"].extend(frame_data.get("tool_calls", []))
+                        if frame_data.get("usage"):
+                            accumulated["usage"] = frame_data["usage"]
             await ship_ai_telemetry(accumulated, self.matrx_base_url, self.matrx_key)
         except Exception:
             logger.debug("proxy: _extract_ai_response failed", exc_info=True)
-    async def _read_headers_only(
-        self, reader: asyncio.StreamReader
-    ) -> tuple[dict[str, str], int, bool]:
-        """Read headers without writing. Returns (headers_dict, content_length, is_chunked)."""
-        headers: dict[str, str] = {}
-        content_length = -1
-        chunked = False
-        while True:
-            line = await reader.readline()
-            decoded = line.decode("utf-8", errors="replace").strip()
-            if not decoded:
-                break
-            if ":" in decoded:
-                k, _, v = decoded.partition(":")
-                k_lower = k.strip().lower()
-                v_stripped = v.strip()
-                headers[k_lower] = v_stripped
-                if k_lower == "content-length":
-                    content_length = int(v_stripped)
-                elif k_lower == "transfer-encoding" and "chunked" in v_stripped.lower():
-                    chunked = True
-        return headers, content_length, chunked
-    async def _read_body_to_bytes(
-        self,
-        reader: asyncio.StreamReader,
-        content_length: int,
-        chunked: bool,
-    ) -> bytes:
-        """Read body into bytes (no writer)."""
-        if content_length > 0:
-            return await reader.read(content_length)
-        if chunked:
-            parts: list[bytes] = []
-            while True:
-                size_line = await reader.readline()
-                size_str = size_line.decode("utf-8", errors="replace").strip()
-                try:
-                    chunk_size = int(size_str.split(";")[0], 16)
-                except ValueError:
-                    break
-                if chunk_size == 0:
-                    await reader.readline()  # trailer
-                    break
-                parts.append(await reader.read(chunk_size))
-                await reader.readline()  # crlf
-            return b"".join(parts)
-        return b""
-    def _write_headers(
-        self, writer: asyncio.StreamWriter, headers: dict[str, str]
-    ) -> None:
-        """Write headers as HTTP lines (caller must drain)."""
-        for k, v in headers.items():
-            writer.write(f"{k}: {v}\r\n".encode())
-        writer.write(b"\r\n")
-    def _write_http_response(
-        self,
-        writer: asyncio.StreamWriter,
-        status: int,
-        resp_headers: dict[str, str],
-        resp_body: bytes,
-    ) -> None:
-        """Write a complete HTTP response."""
-        writer.write(f"HTTP/1.1 {status} OK\r\n".encode())
-        self._write_headers(writer, resp_headers)
-        writer.write(resp_body)
-        # Caller should drain
     async def _forward_headers(
         self,
         reader: asyncio.StreamReader,
@@ -625,7 +685,10 @@ class MITMProxy:
                 v_stripped = v.strip()
                 headers[k_lower] = v_stripped
                 if k_lower == "content-length":
-                    content_length = int(v_stripped)
+                    try:
+                        content_length = int(v_stripped)
+                    except ValueError:
+                        pass
                 elif k_lower == "transfer-encoding" and "chunked" in v_stripped.lower():
                     chunked = True
         await writer.drain()
@@ -717,7 +780,10 @@ class MITMProxy:
                 v_stripped = v.strip()
                 headers[k_lower] = v_stripped
                 if k_lower == "content-length":
-                    content_length = int(v_stripped)
+                    try:
+                        content_length = int(v_stripped)
+                    except ValueError:
+                        pass
                 elif k_lower == "transfer-encoding" and "chunked" in v_stripped.lower():
                     chunked = True
         return headers, content_length, chunked
@@ -730,6 +796,8 @@ class MITMProxy:
     ) -> bytes:
         """Read body into bytes."""
         if content_length > 0:
+            if content_length > _MAX_BODY_BYTES:
+                raise ValueError(f"Request body too large: {content_length} bytes")
             return await reader.readexactly(content_length)
         if chunked:
             parts: list[bytes] = []