npm - mtrx-cli - Versions diffs - 0.1.24 → 0.1.26 - Mend

mtrx-cli 0.1.24 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/src/matrx/__init__.py +1 -1
package/src/matrx/cli/cursor_ca.py +184 -34
package/src/matrx/cli/cursor_config.py +9 -0
package/src/matrx/cli/cursor_launcher.py +3 -1
package/src/matrx/cli/cursor_proxy.py +303 -101
package/src/matrx/cli/cursor_reroute.py +490 -8
package/src/matrx/cli/launcher.py +4 -0
package/src/matrx/cli/main.py +384 -59
package/src/matrx/cli/state.py +11 -0

package/src/matrx/cli/cursor_proxy.py CHANGED Viewed

@@ -33,17 +33,30 @@ import httpx
 from matrx.cli.cursor_ca import CertCache, load_ca
 try:
-    from matrx.cli.cursor_reroute import is_ai_path, try_reroute_to_matrx
+    from matrx.cli.cursor_reroute import (
+        classify_ai_request,
+        is_ai_path,
+        try_inject_context,
+        try_reroute_to_matrx,
+    )
 except ImportError:
     # Stubs when cursor_reroute not available (e.g. npm package omit).
+    def classify_ai_request(method: str, path: str, headers: dict[str, str] | None = None) -> dict[str, bool]:
+        return {"candidate": False, "reroutable": False}
     def is_ai_path(path: str) -> bool:
         return False
     async def try_reroute_to_matrx(*, path: str, method: str, **kwargs: Any) -> None:
         return None
+    async def try_inject_context(**kwargs: Any) -> None:
+        return None
 logger = logging.getLogger(__name__)
+_MAX_BODY_BYTES = 50 * 1024 * 1024  # 50 MB hard limit for buffered request bodies
 DEFAULT_PORT = 8842
 PROXY_HOST = "127.0.0.1"
 HEALTH_PATH = "/__mtrx_health__"
@@ -55,8 +68,17 @@ _INTERCEPT_DOMAINS = {
     "api4.cursor.sh",
     "api5.cursor.sh",
     "agentn.global.api5.cursor.sh",
+    "api.anthropic.com",
+    "api.openai.com",
 }
+_PREWARM_DOMAINS = (
+    "api2.cursor.sh",
+    "api3.cursor.sh",
+    "api4.cursor.sh",
+    "api5.cursor.sh",
+)
 class MITMProxy:
     """Async MITM forward proxy with telemetry mirroring."""
@@ -77,10 +99,12 @@ class MITMProxy:
         self._telemetry_client: httpx.AsyncClient | None = None
         self._cert_cache: CertCache | None = None
         self._request_count = 0
+        self._connect_count = 0
     async def start(self) -> None:
         ca_key, ca_cert = load_ca()
         self._cert_cache = CertCache(ca_key, ca_cert)
+        self._cert_cache.prewarm(_PREWARM_DOMAINS)
         self._telemetry_client = httpx.AsyncClient(timeout=10)
         self._server = await asyncio.start_server(
             self._handle_client, self.host, self.port
@@ -119,7 +143,7 @@ class MITMProxy:
         except (ConnectionResetError, BrokenPipeError, asyncio.IncompleteReadError):
             pass
         except Exception:
-            logger.debug("proxy: connection error", exc_info=True)
+            logger.warning("proxy: connection error", exc_info=True)
         finally:
             try:
                 writer.close()
@@ -163,8 +187,10 @@ class MITMProxy:
             await writer.drain()
             if hostname in _INTERCEPT_DOMAINS:
+                logger.info("proxy: CONNECT %s:%d [intercept]", hostname, port)
                 await self._mitm_intercept(reader, writer, hostname, port)
             else:
+                logger.info("proxy: CONNECT %s:%d [tunnel]", hostname, port)
                 await self._tunnel_passthrough(reader, writer, hostname, port)
         elif method in ("GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"):
             # Plain HTTP proxy request (non-CONNECT) -- handle health check
@@ -215,10 +241,22 @@ class MITMProxy:
         port: int,
     ) -> None:
         assert self._cert_cache is not None
+        self._connect_count += 1
+        conn_id = f"{hostname}:{self._connect_count}"
         # Use the hostname from the CONNECT request for the cert
         # (matches SNI in virtually all cases, avoids ClientHello peeking)
+        handshake_info = self._cert_cache.get_handshake_info(hostname)
         server_ctx = self._cert_cache.get_ssl_context(hostname)
+        logger.info(
+            "proxy: tls_prepare conn=%s host=%s serial=%s leaf_sha256=%s chain_len=%s cert=%s",
+            conn_id,
+            hostname,
+            handshake_info["leaf_serial"],
+            handshake_info["leaf_sha256"],
+            handshake_info["chain_length"],
+            handshake_info["cert_path"],
+        )
         # Upgrade client connection to TLS (we are the "server")
         loop = asyncio.get_running_loop()
@@ -229,8 +267,23 @@ class MITMProxy:
                 transport, protocol, server_ctx, server_side=True
             )
         except (ssl.SSLError, ConnectionError) as exc:
-            logger.debug("TLS handshake with client failed for %s: %s", hostname, exc)
+            logger.warning(
+                "TLS handshake with client failed for %s [conn=%s serial=%s leaf_sha256=%s chain_len=%s]: %s",
+                hostname,
+                conn_id,
+                handshake_info["leaf_serial"],
+                handshake_info["leaf_sha256"],
+                handshake_info["chain_length"],
+                exc,
+            )
             return
+        logger.info(
+            "proxy: tls_ready conn=%s host=%s serial=%s chain_len=%s",
+            conn_id,
+            hostname,
+            handshake_info["leaf_serial"],
+            handshake_info["chain_length"],
+        )
         tls_writer = asyncio.StreamWriter(new_transport, protocol, client_reader, loop)
@@ -243,6 +296,11 @@ class MITMProxy:
             )
         except Exception:
             logger.debug("Failed to connect to upstream %s:%d", hostname, port)
+            try:
+                tls_writer.write(b"HTTP/1.1 502 Bad Gateway\r\nContent-Length: 0\r\n\r\n")
+                await tls_writer.drain()
+            except Exception:
+                pass
             return
         # Forward HTTP/1.1 traffic between decrypted client and upstream
@@ -277,9 +335,11 @@ class MITMProxy:
         while True:
             req_line = await client_reader.readline()
             if not req_line:
+                logger.info("proxy: %s — connection closed (no request line)", hostname)
                 break
             req_line_str = req_line.decode("utf-8", errors="replace").strip()
             if not req_line_str:
+                logger.info("proxy: %s — empty request line", hostname)
                 break
             parts = req_line_str.split(" ", 2)
@@ -287,52 +347,99 @@ class MITMProxy:
             path = parts[1] if len(parts) > 1 else "/"
             req_body_size = 0
-            # For AI paths: buffer request and try rerouting through MTRX (live injection)
-            if method == "POST" and is_ai_path(path):
+            _is_ai_req = False
+            _req_session_id = ""
+            req_headers: dict[str, str]
+            req_cl: int
+            req_chunked: bool
+            if method == "POST":
                 req_headers, req_cl, req_chunked = await self._read_headers_only(
                     client_reader
                 )
-                req_body = await self._read_body_to_bytes(
-                    client_reader, req_cl, req_chunked
-                )
-                req_body_size = len(req_body)
-                result = await try_reroute_to_matrx(
-                    path=path,
-                    method=method,
-                    req_headers=req_headers,
-                    req_body=req_body,
-                    matrx_base_url=self.matrx_base_url,
-                    matrx_key=self.matrx_key,
-                    session_id=str(uuid.uuid4()),
+                ai_classification = classify_ai_request(method, path, req_headers)
+                _is_ai_req = ai_classification["candidate"]
+                _is_ai_reroutable = ai_classification["reroutable"]
+                _req_session_id = str(uuid.uuid4()) if _is_ai_req else ""
+                logger.info(
+                    "proxy: %s %s%s [ai=%s reroutable=%s ct=%s]",
+                    method,
+                    hostname,
+                    path,
+                    _is_ai_req,
+                    _is_ai_reroutable,
+                    req_headers.get("content-type", ""),
                 )
-                if result is not None:
-                    success, resp_headers, resp_body, is_streaming = result
-                    if success and resp_body is not None:
-                        self._request_count += 1
-                        self._write_http_response(
-                            client_writer, 200, resp_headers, resp_body
+                if _is_ai_req and not _is_ai_reroutable and "aiserver.v1." in path.lower():
+                    logger.info("proxy: candidate AI request not yet reroutable: %s%s", hostname, path)
+                # For AI paths: buffer request and try rerouting through MTRX (live injection)
+                if _is_ai_req:
+                    try:
+                        req_body = await self._read_body_to_bytes(
+                            client_reader, req_cl, req_chunked
                         )
-                        asyncio.create_task(
-                            self._ship_telemetry(
-                                hostname=hostname,
-                                method=method,
-                                path=path,
-                                status_code=200,
-                                req_body_size=len(req_body),
-                                resp_body_size=len(resp_body),
-                                elapsed_ms=0,
-                                content_type=resp_headers.get("content-type", ""),
-                                is_streaming=is_streaming,
+                    except ValueError:
+                        client_writer.write(b"HTTP/1.1 413 Content Too Large\r\nContent-Length: 0\r\n\r\n")
+                        await client_writer.drain()
+                        return
+                    req_body_size = len(req_body)
+                    result = await try_reroute_to_matrx(
+                        path=path,
+                        method=method,
+                        req_headers=req_headers,
+                        req_body=req_body,
+                        matrx_base_url=self.matrx_base_url,
+                        matrx_key=self.matrx_key,
+                        session_id=_req_session_id,
+                    )
+                    if result is not None:
+                        success, resp_headers, resp_body, is_streaming = result
+                        if success and resp_body is not None:
+                            self._request_count += 1
+                            self._write_http_response(
+                                client_writer, 200, resp_headers, resp_body
                             )
-                        )
-                        continue
-                    # Reroute returned but failed — fall through to forward
-                # Reroute not implemented or failed — forward to upstream
-                up_writer.write(req_line)
-                await self._write_headers(up_writer, req_headers)
-                up_writer.write(req_body)
-                await up_writer.drain()
+                            asyncio.create_task(
+                                self._ship_telemetry(
+                                    hostname=hostname,
+                                    method=method,
+                                    path=path,
+                                    status_code=200,
+                                    req_body_size=len(req_body),
+                                    resp_body_size=len(resp_body),
+                                    elapsed_ms=0,
+                                    content_type=resp_headers.get("content-type", ""),
+                                    is_streaming=is_streaming,
+                                )
+                            )
+                            continue
+                        # Reroute returned but failed — fall through to forward
+                    # Inject MTRX memory context into request before forwarding
+                    injected_body = await try_inject_context(
+                        req_body=req_body,
+                        req_headers=req_headers,
+                        matrx_base_url=self.matrx_base_url,
+                        matrx_key=self.matrx_key,
+                        session_id=_req_session_id,
+                    )
+                    body_to_forward = injected_body if injected_body is not None else req_body
+                    fwd_headers = dict(req_headers)
+                    fwd_headers["content-length"] = str(len(body_to_forward))
+                    up_writer.write(req_line)
+                    self._write_headers(up_writer, fwd_headers)
+                    up_writer.write(body_to_forward)
+                    await up_writer.drain()
+                else:
+                    up_writer.write(req_line)
+                    self._write_headers(up_writer, req_headers)
+                    req_body_size = await self._forward_body(
+                        client_reader, up_writer, req_cl, req_chunked
+                    )
+                    if req_body_size == 0 and req_cl > 0:
+                        req_body_size = req_cl
             else:
+                logger.info("proxy: %s %s%s [ai=%s]", method, hostname, path, False)
                 up_writer.write(req_line)
                 req_headers, req_cl, req_chunked = await self._forward_headers(
                     client_reader, up_writer
@@ -369,9 +476,20 @@ class MITMProxy:
                 for t in ("text/event-stream", "grpc", "proto", "connect")
             )
-            resp_body_size = await self._forward_body(
-                up_reader, client_writer, resp_cl, resp_chunked
-            )
+            if _is_ai_req:
+                resp_body_size, resp_captured = await self._forward_body_with_capture(
+                    up_reader, client_writer, resp_cl, resp_chunked
+                )
+                if resp_captured:
+                    asyncio.create_task(
+                        self._extract_ai_response(
+                            resp_captured, _req_session_id, hostname
+                        )
+                    )
+            else:
+                resp_body_size = await self._forward_body(
+                    up_reader, client_writer, resp_cl, resp_chunked
+                )
             elapsed_ms = int((time.monotonic() - started) * 1000)
             self._request_count += 1
@@ -390,82 +508,158 @@ class MITMProxy:
                 )
             )
-            conn_h = (
-                req_headers.get("connection", "")
-                + resp_headers.get("connection", "")
-            ).lower()
+            conn_h = resp_headers.get("connection", "").lower()
             if "close" in conn_h:
                 break
-    async def _read_headers_only(
-        self, reader: asyncio.StreamReader
-    ) -> tuple[dict[str, str], int, bool]:
-        """Read headers without writing. Returns (headers_dict, content_length, is_chunked)."""
-        headers: dict[str, str] = {}
-        content_length = -1
-        chunked = False
-        while True:
-            line = await reader.readline()
-            decoded = line.decode("utf-8", errors="replace").strip()
-            if not decoded:
-                break
-            if ":" in decoded:
-                k, _, v = decoded.partition(":")
-                k_lower = k.strip().lower()
-                v_stripped = v.strip()
-                headers[k_lower] = v_stripped
-                if k_lower == "content-length":
-                    content_length = int(v_stripped)
-                elif k_lower == "transfer-encoding" and "chunked" in v_stripped.lower():
-                    chunked = True
-        return headers, content_length, chunked
-    async def _read_body_to_bytes(
+    async def _forward_body_with_capture(
         self,
         reader: asyncio.StreamReader,
+        writer: asyncio.StreamWriter,
         content_length: int,
         chunked: bool,
-    ) -> bytes:
-        """Read body into bytes (no writer)."""
+    ) -> tuple[int, bytes]:
+        """Forward body like ``_forward_body`` while also capturing a copy.
+        Returns ``(bytes_forwarded, captured_bytes)``.  The capture enables
+        background response extraction without blocking the forward path.
+        """
+        parts: list[bytes] = []
         if content_length > 0:
-            return await reader.read(content_length)
+            total = 0
+            remaining = content_length
+            while remaining > 0:
+                chunk = await reader.read(min(remaining, 65536))
+                if not chunk:
+                    break
+                writer.write(chunk)
+                await writer.drain()
+                parts.append(chunk)
+                total += len(chunk)
+                remaining -= len(chunk)
+            return total, b"".join(parts)
         if chunked:
-            parts: list[bytes] = []
+            total = 0
             while True:
                 size_line = await reader.readline()
+                if not size_line:
+                    break
+                writer.write(size_line)
+                await writer.drain()
                 size_str = size_line.decode("utf-8", errors="replace").strip()
                 try:
                     chunk_size = int(size_str.split(";")[0], 16)
                 except ValueError:
                     break
                 if chunk_size == 0:
-                    await reader.readline()  # trailer
+                    trailer = await reader.readline()
+                    writer.write(trailer)
+                    await writer.drain()
                     break
-                parts.append(await reader.read(chunk_size))
-                await reader.readline()  # crlf
-            return b"".join(parts)
-        return b""
+                remaining = chunk_size
+                chunk_parts: list[bytes] = []
+                while remaining > 0:
+                    data = await reader.read(min(remaining, 65536))
+                    if not data:
+                        return total, b"".join(parts)
+                    writer.write(data)
+                    await writer.drain()
+                    chunk_parts.append(data)
+                    total += len(data)
+                    remaining -= len(data)
+                chunk_data = b"".join(chunk_parts)
+                parts.append(chunk_data)
+                crlf = await reader.readline()
+                writer.write(crlf)
+                await writer.drain()
+            return total, b"".join(parts)
-    def _write_headers(
-        self, writer: asyncio.StreamWriter, headers: dict[str, str]
-    ) -> None:
-        """Write headers as HTTP lines (caller must drain)."""
-        for k, v in headers.items():
-            writer.write(f"{k}: {v}\r\n".encode())
-        writer.write(b"\r\n")
+        # No content-length, no chunked encoding — stream until the upstream closes.
+        # This covers Cursor's SSE AI responses that use raw HTTP/1.1 keep-alive streaming.
+        # Cap capture at 512 KB to bound memory; bytes beyond that are still forwarded.
+        _CAPTURE_LIMIT = 512 * 1024
+        parts = []
+        total = 0
+        capturing = True
+        while True:
+            chunk = await reader.read(65536)
+            if not chunk:
+                break
+            writer.write(chunk)
+            await writer.drain()
+            total += len(chunk)
+            if capturing:
+                parts.append(chunk)
+                if total >= _CAPTURE_LIMIT:
+                    capturing = False
+        return total, b"".join(parts)
-    def _write_http_response(
+    async def _extract_ai_response(
         self,
-        writer: asyncio.StreamWriter,
-        status: int,
-        resp_headers: dict[str, str],
-        resp_body: bytes,
+        resp_bytes: bytes,
+        session_id: str,
+        hostname: str,
     ) -> None:
-        """Write a complete HTTP response."""
-        writer.write(f"HTTP/1.1 {status} OK\r\n".encode())
-        self._write_headers(writer, resp_headers)
-        writer.write(resp_body)
-        # Caller should drain
+        """Parse Connect frames from *resp_bytes* and ship response telemetry.
+        Tries compiled proto parsing first; falls back to raw wire-format parsing
+        so token counts are always extracted even without compiled proto files.
+        Fire-and-forget — never raises, never blocks the forward path.
+        """
+        try:
+            from matrx.cli.cursor_extraction import ship_ai_telemetry
+            import gzip as _gzip
+            body = resp_bytes
+            if len(body) >= 2 and body[:2] == b"\x1f\x8b":
+                try:
+                    body = _gzip.decompress(body)
+                except Exception:
+                    body = resp_bytes
+            accumulated: dict = {
+                "session_id": session_id,
+                "response_text": "",
+                "tool_calls": [],
+                "usage": None,
+            }
+            if hostname == "api.anthropic.com":
+                from matrx.cli.cursor_extraction import extract_from_anthropic_sse_response
+                frame_data = extract_from_anthropic_sse_response(body)
+                accumulated["response_text"] = frame_data.get("text", "")
+                accumulated["tool_calls"] = frame_data.get("tool_calls", [])
+                accumulated["usage"] = frame_data.get("usage")
+            elif hostname == "api.openai.com":
+                from matrx.cli.cursor_extraction import extract_from_openai_sse_response
+                frame_data = extract_from_openai_sse_response(body)
+                accumulated["response_text"] = frame_data.get("text", "")
+                accumulated["tool_calls"] = frame_data.get("tool_calls", [])
+                accumulated["usage"] = frame_data.get("usage")
+            else:
+                # Cursor backend: Connect/gRPC protobuf frames
+                from matrx.cli.cursor_connect import parse_all_frames
+                from matrx.cli.cursor_extraction import (
+                    _raw_extract_response_frame,
+                    extract_from_response_frame,
+                    parse_response_proto,
+                )
+                for flags, payload in parse_all_frames(body):
+                    if flags == 0x02:
+                        break
+                    resp_proto = parse_response_proto(payload)
+                    frame_data = extract_from_response_frame(resp_proto) if resp_proto is not None else _raw_extract_response_frame(payload)
+                    if frame_data:
+                        accumulated["response_text"] += frame_data.get("text", "")
+                        accumulated["tool_calls"].extend(frame_data.get("tool_calls", []))
+                        if frame_data.get("usage"):
+                            accumulated["usage"] = frame_data["usage"]
+            await ship_ai_telemetry(accumulated, self.matrx_base_url, self.matrx_key)
+        except Exception:
+            logger.debug("proxy: _extract_ai_response failed", exc_info=True)
     async def _forward_headers(
         self,
@@ -491,7 +685,10 @@ class MITMProxy:
                 v_stripped = v.strip()
                 headers[k_lower] = v_stripped
                 if k_lower == "content-length":
-                    content_length = int(v_stripped)
+                    try:
+                        content_length = int(v_stripped)
+                    except ValueError:
+                        pass
                 elif k_lower == "transfer-encoding" and "chunked" in v_stripped.lower():
                     chunked = True
         await writer.drain()
@@ -583,7 +780,10 @@ class MITMProxy:
                 v_stripped = v.strip()
                 headers[k_lower] = v_stripped
                 if k_lower == "content-length":
-                    content_length = int(v_stripped)
+                    try:
+                        content_length = int(v_stripped)
+                    except ValueError:
+                        pass
                 elif k_lower == "transfer-encoding" and "chunked" in v_stripped.lower():
                     chunked = True
         return headers, content_length, chunked
@@ -596,6 +796,8 @@ class MITMProxy:
     ) -> bytes:
         """Read body into bytes."""
         if content_length > 0:
+            if content_length > _MAX_BODY_BYTES:
+                raise ValueError(f"Request body too large: {content_length} bytes")
             return await reader.readexactly(content_length)
         if chunked:
             parts: list[bytes] = []