PyPI - langgraph-api - Versions diffs - 0.4.1__py3-none-any.whl → 0.7.3__py3-none-any.whl - Mend

langgraph-api 0.4.1py3-none-any.whl → 0.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

langgraph_api/__init__.py +1 -1
langgraph_api/api/__init__.py +111 -51
langgraph_api/api/a2a.py +1610 -0
langgraph_api/api/assistants.py +212 -89
langgraph_api/api/mcp.py +3 -3
langgraph_api/api/meta.py +52 -28
langgraph_api/api/openapi.py +27 -17
langgraph_api/api/profile.py +108 -0
langgraph_api/api/runs.py +342 -195
langgraph_api/api/store.py +19 -2
langgraph_api/api/threads.py +209 -27
langgraph_api/asgi_transport.py +14 -9
langgraph_api/asyncio.py +14 -4
langgraph_api/auth/custom.py +52 -37
langgraph_api/auth/langsmith/backend.py +4 -3
langgraph_api/auth/langsmith/client.py +13 -8
langgraph_api/cli.py +230 -133
langgraph_api/command.py +5 -3
langgraph_api/config/__init__.py +532 -0
langgraph_api/config/_parse.py +58 -0
langgraph_api/config/schemas.py +431 -0
langgraph_api/cron_scheduler.py +17 -1
langgraph_api/encryption/__init__.py +15 -0
langgraph_api/encryption/aes_json.py +158 -0
langgraph_api/encryption/context.py +35 -0
langgraph_api/encryption/custom.py +280 -0
langgraph_api/encryption/middleware.py +632 -0
langgraph_api/encryption/shared.py +63 -0
langgraph_api/errors.py +12 -1
langgraph_api/executor_entrypoint.py +11 -6
langgraph_api/feature_flags.py +29 -0
langgraph_api/graph.py +176 -76
langgraph_api/grpc/client.py +313 -0
langgraph_api/grpc/config_conversion.py +231 -0
langgraph_api/grpc/generated/__init__.py +29 -0
langgraph_api/grpc/generated/checkpointer_pb2.py +63 -0
langgraph_api/grpc/generated/checkpointer_pb2.pyi +99 -0
langgraph_api/grpc/generated/checkpointer_pb2_grpc.py +329 -0
langgraph_api/grpc/generated/core_api_pb2.py +216 -0
langgraph_api/grpc/generated/core_api_pb2.pyi +905 -0
langgraph_api/grpc/generated/core_api_pb2_grpc.py +1621 -0
langgraph_api/grpc/generated/engine_common_pb2.py +219 -0
langgraph_api/grpc/generated/engine_common_pb2.pyi +722 -0
langgraph_api/grpc/generated/engine_common_pb2_grpc.py +24 -0
langgraph_api/grpc/generated/enum_cancel_run_action_pb2.py +37 -0
langgraph_api/grpc/generated/enum_cancel_run_action_pb2.pyi +12 -0
langgraph_api/grpc/generated/enum_cancel_run_action_pb2_grpc.py +24 -0
langgraph_api/grpc/generated/enum_control_signal_pb2.py +37 -0
langgraph_api/grpc/generated/enum_control_signal_pb2.pyi +16 -0
langgraph_api/grpc/generated/enum_control_signal_pb2_grpc.py +24 -0
langgraph_api/grpc/generated/enum_durability_pb2.py +37 -0
langgraph_api/grpc/generated/enum_durability_pb2.pyi +16 -0
langgraph_api/grpc/generated/enum_durability_pb2_grpc.py +24 -0
langgraph_api/grpc/generated/enum_multitask_strategy_pb2.py +37 -0
langgraph_api/grpc/generated/enum_multitask_strategy_pb2.pyi +16 -0
langgraph_api/grpc/generated/enum_multitask_strategy_pb2_grpc.py +24 -0
langgraph_api/grpc/generated/enum_run_status_pb2.py +37 -0
langgraph_api/grpc/generated/enum_run_status_pb2.pyi +22 -0
langgraph_api/grpc/generated/enum_run_status_pb2_grpc.py +24 -0
langgraph_api/grpc/generated/enum_stream_mode_pb2.py +37 -0
langgraph_api/grpc/generated/enum_stream_mode_pb2.pyi +28 -0
langgraph_api/grpc/generated/enum_stream_mode_pb2_grpc.py +24 -0
langgraph_api/grpc/generated/enum_thread_status_pb2.py +37 -0
langgraph_api/grpc/generated/enum_thread_status_pb2.pyi +16 -0
langgraph_api/grpc/generated/enum_thread_status_pb2_grpc.py +24 -0
langgraph_api/grpc/generated/enum_thread_stream_mode_pb2.py +37 -0
langgraph_api/grpc/generated/enum_thread_stream_mode_pb2.pyi +16 -0
langgraph_api/grpc/generated/enum_thread_stream_mode_pb2_grpc.py +24 -0
langgraph_api/grpc/generated/errors_pb2.py +39 -0
langgraph_api/grpc/generated/errors_pb2.pyi +21 -0
langgraph_api/grpc/generated/errors_pb2_grpc.py +24 -0
langgraph_api/grpc/ops/__init__.py +370 -0
langgraph_api/grpc/ops/assistants.py +424 -0
langgraph_api/grpc/ops/runs.py +792 -0
langgraph_api/grpc/ops/threads.py +1013 -0
langgraph_api/http.py +16 -5
langgraph_api/http_metrics.py +15 -35
langgraph_api/http_metrics_utils.py +38 -0
langgraph_api/js/build.mts +1 -1
langgraph_api/js/client.http.mts +13 -7
langgraph_api/js/client.mts +2 -5
langgraph_api/js/package.json +29 -28
langgraph_api/js/remote.py +56 -30
langgraph_api/js/src/graph.mts +20 -0
langgraph_api/js/sse.py +2 -2
langgraph_api/js/ui.py +1 -1
langgraph_api/js/yarn.lock +1204 -1006
langgraph_api/logging.py +29 -2
langgraph_api/metadata.py +99 -28
langgraph_api/middleware/http_logger.py +7 -2
langgraph_api/middleware/private_network.py +7 -7
langgraph_api/models/run.py +54 -93
langgraph_api/otel_context.py +205 -0
langgraph_api/patch.py +5 -3
langgraph_api/queue_entrypoint.py +154 -65
langgraph_api/route.py +47 -5
langgraph_api/schema.py +88 -10
langgraph_api/self_hosted_logs.py +124 -0
langgraph_api/self_hosted_metrics.py +450 -0
langgraph_api/serde.py +79 -37
langgraph_api/server.py +138 -60
langgraph_api/state.py +4 -3
langgraph_api/store.py +25 -16
langgraph_api/stream.py +80 -29
langgraph_api/thread_ttl.py +31 -13
langgraph_api/timing/__init__.py +25 -0
langgraph_api/timing/profiler.py +200 -0
langgraph_api/timing/timer.py +318 -0
langgraph_api/utils/__init__.py +53 -8
langgraph_api/utils/cache.py +47 -10
langgraph_api/utils/config.py +2 -1
langgraph_api/utils/errors.py +77 -0
langgraph_api/utils/future.py +10 -6
langgraph_api/utils/headers.py +76 -2
langgraph_api/utils/retriable_client.py +74 -0
langgraph_api/utils/stream_codec.py +315 -0
langgraph_api/utils/uuids.py +29 -62
langgraph_api/validation.py +9 -0
langgraph_api/webhook.py +120 -6
langgraph_api/worker.py +55 -24
{langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/METADATA +16 -8
langgraph_api-0.7.3.dist-info/RECORD +168 -0
{langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/WHEEL +1 -1
langgraph_runtime/__init__.py +1 -0
langgraph_runtime/routes.py +11 -0
logging.json +1 -3
openapi.json +839 -478
langgraph_api/config.py +0 -387
langgraph_api/js/isolate-0x130008000-46649-46649-v8.log +0 -4430
langgraph_api/js/isolate-0x138008000-44681-44681-v8.log +0 -4430
langgraph_api/js/package-lock.json +0 -3308
langgraph_api-0.4.1.dist-info/RECORD +0 -107
/langgraph_api/{utils.py → grpc/__init__.py} +0 -0
{langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/entry_points.txt +0 -0
{langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/licenses/LICENSE +0 -0

langgraph_api/otel_context.py ADDED Viewed

@@ -0,0 +1,205 @@
+"""OTEL trace context propagation utilities.
+Provides helpers for extracting, storing, and restoring W3C Trace Context
+across the API-to-worker boundary in distributed LangGraph deployments.
+"""
+from __future__ import annotations
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, Any
+import structlog
+from langgraph_api import __version__, config
+if TYPE_CHECKING:
+    from collections.abc import Generator, Mapping
+    from opentelemetry.trace import Tracer
+logger = structlog.stdlib.get_logger(__name__)
+# Constants for storing trace context in configurable
+OTEL_TRACEPARENT_KEY = "__otel_traceparent__"
+OTEL_TRACESTATE_KEY = "__otel_tracestate__"
+OTEL_TRACER_NAME = "langsmith_agent_server"
+OTEL_RUN_ID_ATTR_NAME = "langsmith.run_id"
+OTEL_THREAD_ID_ATTR_NAME = "langsmith.thread_id"
+# Cached instances (initialized lazily, once)
+_propagator: Any = None
+_tracer: Any = None
+_otel_available: bool | None = None
+def _check_otel_available() -> bool:
+    """Check if OpenTelemetry is available. Cached after first call."""
+    global _otel_available
+    if _otel_available is None:
+        try:
+            from opentelemetry import trace  # noqa: F401
+            from opentelemetry.trace.propagation.tracecontext import (
+                TraceContextTextMapPropagator,  # noqa: F401
+            )
+            _otel_available = True
+        except ImportError:
+            _otel_available = False
+    return _otel_available
+def _get_propagator() -> Any:
+    """Get cached W3C TraceContext propagator."""
+    global _propagator
+    if _propagator is None:
+        from opentelemetry.trace.propagation.tracecontext import (
+            TraceContextTextMapPropagator,
+        )
+        _propagator = TraceContextTextMapPropagator()
+    return _propagator
+def _get_tracer() -> Tracer:
+    """Get cached tracer for worker spans."""
+    global _tracer
+    if _tracer is None:
+        from opentelemetry import trace
+        _tracer = trace.get_tracer(
+            OTEL_TRACER_NAME, instrumenting_library_version=__version__
+        )
+    return _tracer
+def extract_otel_headers_to_configurable(
+    headers: Mapping[str, str],
+    configurable: dict[str, Any],
+) -> None:
+    """Extract traceparent/tracestate from HTTP headers into configurable dict.
+    Only extracts if OTEL is enabled. No-op otherwise.
+    Args:
+        headers: HTTP headers from the incoming request
+        configurable: The configurable dict to store trace context in
+    """
+    if not config.OTEL_ENABLED:
+        return
+    if traceparent := headers.get("traceparent"):
+        configurable[OTEL_TRACEPARENT_KEY] = traceparent
+    if tracestate := headers.get("tracestate"):
+        configurable[OTEL_TRACESTATE_KEY] = tracestate
+def inject_current_trace_context(configurable: dict[str, Any]) -> None:
+    """Inject current OTEL trace context into configurable for worker propagation.
+    This captures the active span context (e.g., from Starlette auto-instrumentation)
+    and stores it in the configurable dict so workers can restore it and create
+    child spans under the API request span.
+    Args:
+        configurable: The configurable dict to store trace context in
+    """
+    if not config.OTEL_ENABLED or not _check_otel_available():
+        return
+    try:
+        from opentelemetry import trace
+        span = trace.get_current_span()
+        if not span.is_recording():
+            return
+        carrier: dict[str, str] = {}
+        _get_propagator().inject(carrier)
+        if traceparent := carrier.get("traceparent"):
+            configurable[OTEL_TRACEPARENT_KEY] = traceparent
+        if tracestate := carrier.get("tracestate"):
+            configurable[OTEL_TRACESTATE_KEY] = tracestate
+    except Exception:
+        # Never fail - tracing issues shouldn't break functionality
+        pass
+@contextmanager
+def restore_otel_trace_context(
+    configurable: dict[str, Any],
+    run_id: str | None = None,
+    thread_id: str | None = None,
+) -> Generator[None, None, None]:
+    """Restore OTEL trace context and create child span for worker execution.
+    Creates a child span under the original API request span, ensuring
+    distributed traces are connected across the API-to-worker boundary.
+    Yields:
+        None - execution continues within the restored trace context
+    Note:
+        - No-ops if OTEL is disabled or unavailable
+        - Never raises - tracing failures won't break run execution
+    """
+    if not config.OTEL_ENABLED or not _check_otel_available():
+        yield
+        return
+    traceparent = configurable.get(OTEL_TRACEPARENT_KEY)
+    if not traceparent:
+        yield
+        return
+    try:
+        from opentelemetry import trace
+        # Build carrier dict for W3C propagator
+        carrier: dict[str, str] = {"traceparent": traceparent}
+        if tracestate := configurable.get(OTEL_TRACESTATE_KEY):
+            carrier["tracestate"] = tracestate
+        # Extract context from carrier
+        ctx = _get_propagator().extract(carrier=carrier)
+        with _get_tracer().start_as_current_span(
+            "worker.stream_run",
+            context=ctx,
+            kind=trace.SpanKind.CONSUMER,
+        ) as span:
+            if run_id:
+                span.set_attribute(OTEL_RUN_ID_ATTR_NAME, run_id)
+            if thread_id:
+                span.set_attribute(OTEL_THREAD_ID_ATTR_NAME, thread_id)
+            yield
+    except Exception:
+        logger.debug("Failed to restore OTEL trace context", exc_info=True)
+        yield
+def inject_otel_headers() -> dict[str, str]:
+    """Inject current trace context into headers for outgoing HTTP requests.
+    Used to propagate trace context to webhooks.
+    Returns:
+        Dict with traceparent/tracestate headers if in active trace, else empty.
+    """
+    if not config.OTEL_ENABLED or not _check_otel_available():
+        return {}
+    try:
+        from opentelemetry import trace
+        span = trace.get_current_span()
+        if not span.is_recording():
+            return {}
+        carrier: dict[str, str] = {}
+        _get_propagator().inject(carrier)
+        return carrier
+    except Exception:
+        return {}

langgraph_api/patch.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any
 from starlette.responses import Response, StreamingResponse
 from starlette.types import Send
-from langgraph_api.serde import Fragment
+from langgraph_api.serde import Fragment, json_dumpb
 """
 Patch Response.render and StreamingResponse.stream_response
@@ -14,7 +14,7 @@ to recognize bytearrays and memoryviews as bytes-like objects.
 def Response_render(self, content: Any) -> bytes:
     if content is None:
         return b""
-    if isinstance(content, (bytes, bytearray, memoryview)):  # noqa: UP038
+    if isinstance(content, (bytes, bytearray, memoryview)):
         return content
     return content.encode(self.charset)  # type: ignore
@@ -32,7 +32,9 @@ async def StreamingResponse_stream_response(self, send: Send) -> None:
             continue
         if isinstance(chunk, Fragment):
             chunk = chunk.buf
-        if not isinstance(chunk, (bytes, bytearray, memoryview)):  # noqa: UP038
+        if isinstance(chunk, dict):
+            chunk = json_dumpb(chunk)
+        if not isinstance(chunk, (bytes, bytearray, memoryview)):
             chunk = chunk.encode(self.charset)
         await send({"type": "http.response.body", "body": chunk, "more_body": True})

langgraph_api/queue_entrypoint.py CHANGED Viewed

@@ -1,46 +1,68 @@
-# ruff: noqa: E402
 import os
 if not (
     (disable_truststore := os.getenv("DISABLE_TRUSTSTORE"))
     and disable_truststore.lower() == "true"
 ):
-    import truststore  # noqa: F401
+    import truststore
-    truststore.inject_into_ssl()  # noqa: F401
+    truststore.inject_into_ssl()
 import asyncio
-import contextlib
+import functools
 import json
 import logging.config
 import pathlib
 import signal
-from contextlib import asynccontextmanager
-from typing import cast
+import socket
 import structlog
-from langgraph_runtime.database import pool_stats
-from langgraph_runtime.lifespan import lifespan
+from langgraph_api.utils.errors import GraphLoadError, HealthServerStartupError
+from langgraph_runtime import lifespan
+from langgraph_runtime.database import healthcheck, pool_stats
 from langgraph_runtime.metrics import get_metrics
 logger = structlog.stdlib.get_logger(__name__)
+def _ensure_port_available(host: str, port: int) -> None:
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+            sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            sock.bind((host, port))
+    except OSError as exc:
+        raise HealthServerStartupError(host, port, exc) from exc
 async def health_and_metrics_server():
     import uvicorn
     from starlette.applications import Starlette
+    from starlette.requests import Request
     from starlette.responses import JSONResponse, PlainTextResponse
-    from starlette.routing import Route
+    from starlette.routing import Mount, Route
+    from langgraph_api import config as lc_config
+    from langgraph_api.api.meta import METRICS_FORMATS
     port = int(os.getenv("PORT", "8080"))
+    host = os.getenv("LANGGRAPH_SERVER_HOST", "0.0.0.0")
     async def health_endpoint(request):
+        # if db or redis is not healthy, this will raise an exception
+        await healthcheck()
         return JSONResponse({"status": "ok"})
-    async def metrics_endpoint(request):
+    async def metrics_endpoint(request: Request):
+        metrics_format = request.query_params.get("format", "prometheus")
+        if metrics_format not in METRICS_FORMATS:
+            await logger.awarning(
+                f"metrics format {metrics_format} not supported, falling back to prometheus"
+            )
+            metrics_format = "prometheus"
         metrics = get_metrics()
-        worker_metrics = cast(dict[str, int], metrics["workers"])
+        worker_metrics = metrics["workers"]
         workers_max = worker_metrics["max"]
         workers_active = worker_metrics["active"]
         workers_available = worker_metrics["available"]
@@ -48,86 +70,132 @@ async def health_and_metrics_server():
         project_id = os.getenv("LANGSMITH_HOST_PROJECT_ID")
         revision_id = os.getenv("LANGSMITH_HOST_REVISION_ID")
-        metrics_lines = [
-            "# HELP lg_api_workers_max The maximum number of workers available.",
-            "# TYPE lg_api_workers_max gauge",
-            f'lg_api_workers_max{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_max}',
-            "# HELP lg_api_workers_active The number of currently active workers.",
-            "# TYPE lg_api_workers_active gauge",
-            f'lg_api_workers_active{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_active}',
-            "# HELP lg_api_workers_available The number of available (idle) workers.",
-            "# TYPE lg_api_workers_available gauge",
-            f'lg_api_workers_available{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_available}',
-        ]
-        metrics_lines.extend(
-            pool_stats(
-                project_id=project_id,
-                revision_id=revision_id,
-            )
+        pg_redis_stats = pool_stats(
+            project_id=project_id,
+            revision_id=revision_id,
+            format=metrics_format,
         )
-        return PlainTextResponse(
-            "\n".join(metrics_lines),
-            media_type="text/plain; version=0.0.4; charset=utf-8",
+        if metrics_format == "json":
+            resp = {
+                **pg_redis_stats,
+                "workers": worker_metrics,
+            }
+            return JSONResponse(resp)
+        elif metrics_format == "prometheus":
+            metrics_lines = [
+                "# HELP lg_api_workers_max The maximum number of workers available.",
+                "# TYPE lg_api_workers_max gauge",
+                f'lg_api_workers_max{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_max}',
+                "# HELP lg_api_workers_active The number of currently active workers.",
+                "# TYPE lg_api_workers_active gauge",
+                f'lg_api_workers_active{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_active}',
+                "# HELP lg_api_workers_available The number of available (idle) workers.",
+                "# TYPE lg_api_workers_available gauge",
+                f'lg_api_workers_available{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_available}',
+            ]
+            metrics_lines.extend(pg_redis_stats)
+            return PlainTextResponse(
+                "\n".join(metrics_lines),
+                media_type="text/plain; version=0.0.4; charset=utf-8",
+            )
+    routes = [
+        Route("/ok", health_endpoint),
+        Route("/metrics", metrics_endpoint),
+    ]
+    app = Starlette(routes=routes)
+    if lc_config.MOUNT_PREFIX:
+        app = Starlette(
+            routes=[*routes, Mount(lc_config.MOUNT_PREFIX, app=app)],
+            lifespan=app.router.lifespan_context,
+            exception_handlers=app.exception_handlers,
         )
-    app = Starlette(
-        routes=[
-            Route("/ok", health_endpoint),
-            Route("/metrics", metrics_endpoint),
-        ]
-    )
+    try:
+        _ensure_port_available(host, port)
+    except HealthServerStartupError as exc:
+        await logger.aerror(
+            str(exc),
+            host=exc.host,
+            port=exc.port,
+            cause=str(exc.cause),
+        )
+        raise
     config = uvicorn.Config(
         app,
-        host="0.0.0.0",
+        host=host,
         port=port,
         log_level="error",
         access_log=False,
     )
+    # Server will run indefinitely until the process is terminated
     server = uvicorn.Server(config)
-    logger.info(f"Health and metrics server started at http://0.0.0.0:{port}")
-    await server.serve()
+    logger.info(f"Health and metrics server started at http://{host}:{port}")
+    try:
+        await server.serve()
+    except SystemExit as exc:
+        if exc.code == 0:
+            return
+        try:
+            _ensure_port_available(host, port)
+        except HealthServerStartupError as port_exc:
+            await logger.aerror(
+                str(port_exc),
+                host=port_exc.host,
+                port=port_exc.port,
+                cause=str(port_exc.cause),
+            )
+            raise port_exc from None
+        error = HealthServerStartupError(host, port, exc)
+        await logger.aerror(
+            str(error), host=error.host, port=error.port, cause=str(error.cause)
+        )
+        raise error from None
+    except OSError as exc:
+        error = HealthServerStartupError(host, port, exc)
+        await logger.aerror(
+            str(error), host=error.host, port=error.port, cause=str(error.cause)
+        )
+        raise error from exc
 async def entrypoint(
-    grpc_port: int | None = None, entrypoint_name: str = "python-queue"
+    grpc_port: int | None = None,
+    entrypoint_name: str = "python-queue",
+    cancel_event: asyncio.Event | None = None,
 ):
     from langgraph_api import logging as lg_logging
+    from langgraph_api import timing
     from langgraph_api.api import user_router
+    from langgraph_api.server import app
     lg_logging.set_logging_context({"entrypoint": entrypoint_name})
     tasks: set[asyncio.Task] = set()
-    tasks.add(asyncio.create_task(health_and_metrics_server()))
-    original_lifespan = user_router.router.lifespan_context if user_router else None
-    @asynccontextmanager
-    async def combined_lifespan(
-        app, with_cron_scheduler=False, grpc_port=None, taskset=None
-    ):
-        async with lifespan(
-            app,
-            with_cron_scheduler=with_cron_scheduler,
+    user_lifespan = None if user_router is None else user_router.router.lifespan_context
+    wrapped_lifespan = timing.combine_lifespans(
+        functools.partial(
+            lifespan.lifespan,
+            with_cron_scheduler=False,
             grpc_port=grpc_port,
-            taskset=taskset,
-        ):
-            if original_lifespan:
-                async with original_lifespan(app):
-                    yield
-            else:
-                yield
-    async with combined_lifespan(
-        None, with_cron_scheduler=False, grpc_port=grpc_port, taskset=tasks
-    ):
+            taskset=tasks,
+            cancel_event=cancel_event,
+        ),
+        user_lifespan,
+    )
+    async with wrapped_lifespan(app):
+        tasks.add(asyncio.create_task(health_and_metrics_server()))
         await asyncio.gather(*tasks)
 async def main(grpc_port: int | None = None, entrypoint_name: str = "python-queue"):
     """Run the queue entrypoint and shut down gracefully on SIGTERM/SIGINT."""
     loop = asyncio.get_running_loop()
     stop_event = asyncio.Event()
@@ -141,14 +209,35 @@ async def main(grpc_port: int | None = None, entrypoint_name: str = "python-queu
         signal.signal(signal.SIGTERM, lambda *_: _handle_signal())
     entry_task = asyncio.create_task(
-        entrypoint(grpc_port=grpc_port, entrypoint_name=entrypoint_name)
+        entrypoint(
+            grpc_port=grpc_port,
+            entrypoint_name=entrypoint_name,
+            cancel_event=stop_event,
+        )
     )
+    # Handle the case where the entrypoint errors out
+    entry_task.add_done_callback(lambda _: stop_event.set())
     await stop_event.wait()
     logger.warning("Cancelling queue entrypoint task")
     entry_task.cancel()
-    with contextlib.suppress(asyncio.CancelledError):
+    try:
         await entry_task
+    except asyncio.CancelledError:
+        pass
+    except (GraphLoadError, HealthServerStartupError) as exc:
+        raise SystemExit(1) from exc
+    except RuntimeError as exc:
+        if str(exc) == "generator didn't yield":
+            last_error = lifespan.get_last_error()
+            if last_error is not None:
+                logger.exception(
+                    "Application startup failed",
+                    error_type=type(last_error).__name__,
+                    error_message=str(last_error),
+                )
+                raise SystemExit(1) from None
+        raise
 if __name__ == "__main__":

langgraph_api/route.py CHANGED Viewed

@@ -4,6 +4,7 @@ import typing
 import jsonschema_rs
 import orjson
+import structlog
 from starlette._exception_handler import wrap_app_handling_exceptions
 from starlette._utils import is_async_callable
 from starlette.concurrency import run_in_threadpool
@@ -14,9 +15,11 @@ from starlette.responses import JSONResponse
 from starlette.routing import Route, compile_path, get_name
 from starlette.types import ASGIApp, Receive, Scope, Send
+from langgraph_api import config
 from langgraph_api.serde import json_dumpb
 from langgraph_api.utils import get_auth_ctx, with_user
+logger = structlog.getLogger(__name__)
 SchemaType = (
     jsonschema_rs.Draft4Validator
     | jsonschema_rs.Draft6Validator
@@ -43,7 +46,7 @@ def api_request_response(
                 response: ASGIApp = await func(request)
             else:
                 response = await run_in_threadpool(
-                    typing.cast(typing.Callable[[Request], ASGIApp], func), request
+                    typing.cast("typing.Callable[[Request], ASGIApp]", func), request
                 )
             await response(scope, receive, send)
@@ -58,10 +61,11 @@ class ApiResponse(JSONResponse):
 def _json_loads(content: bytearray, schema: SchemaType) -> typing.Any:
-    json = orjson.loads(content)
+    """Parse JSON and validate schema. Used by threadpool for large payloads."""
+    json_data = orjson.loads(content)
     if schema is not None:
-        schema.validate(json)
-    return json
+        schema.validate(json_data)
+    return json_data
 class ApiRequest(Request):
@@ -76,8 +80,16 @@ class ApiRequest(Request):
     async def json(self, schema: SchemaType = None) -> typing.Any:
         if not hasattr(self, "_json"):
             body = await self.body()
+            # Hybrid approach for optimal performance:
+            # - Small payloads: parse directly (fast, no queueing/thread pool limitations)
+            # - Large payloads: use dedicated thread pool (safer, doesn't block event loop)
             try:
-                self._json = await run_in_threadpool(_json_loads, body, schema)
+                self._json = (
+                    await run_in_threadpool(_json_loads, body, schema)
+                    if len(body) > config.JSON_THREAD_POOL_MINIMUM_SIZE_BYTES
+                    else _json_loads(body, schema)
+                )
             except orjson.JSONDecodeError as e:
                 raise HTTPException(
                     status_code=422, detail="Invalid JSON in request body"
@@ -135,6 +147,8 @@ class ApiRoute(Route):
         scope["route"] = self.path
         set_logging_context({"path": self.path, "method": scope.get("method")})
+        route_pattern = f"{scope.get('root_path', '')}{self.path}"
+        _name_otel_span(scope, route_pattern)
         ctx = get_auth_ctx()
         if ctx:
             user, auth = ctx.user, ctx.permissions
@@ -142,3 +156,31 @@ class ApiRoute(Route):
             user, auth = scope.get("user"), scope.get("auth")
         async with with_user(user, auth):
             return await super().handle(scope, receive, send)
+def _name_otel_span(scope: Scope, route_pattern: str):
+    """Best-effort rename of the active OTEL server span to include the route.
+    - No-ops if OTEL is disabled or OTEL libs are unavailable.
+    - Sets span name to "METHOD /templated/path" and attaches http.route.
+    - Never raises; safe for hot path usage.
+    """
+    if not config.OTEL_ENABLED:
+        return
+    try:
+        from opentelemetry.trace import get_current_span
+        span = get_current_span()
+        if span.is_recording():
+            method = scope.get("method", "") or ""
+            try:
+                span.update_name(f"{method} {route_pattern}")
+            except Exception:
+                logger.error("Failed to update OTEL span name", exc_info=True)
+                pass
+            try:
+                span.set_attribute("http.route", route_pattern)
+            except Exception:
+                logger.error("Failed to update OTEL span attributes", exc_info=True)
+    except Exception:
+        logger.error("Failed to update OTEL span", exc_info=True)

langgraph-api 0.4.1__py3-none-any.whl → 0.7.3__py3-none-any.whl

langgraph-api 0.4.1py3-none-any.whl → 0.7.3py3-none-any.whl