langgraph-api 0.4.1__py3-none-any.whl → 0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. langgraph_api/__init__.py +1 -1
  2. langgraph_api/api/__init__.py +111 -51
  3. langgraph_api/api/a2a.py +1610 -0
  4. langgraph_api/api/assistants.py +212 -89
  5. langgraph_api/api/mcp.py +3 -3
  6. langgraph_api/api/meta.py +52 -28
  7. langgraph_api/api/openapi.py +27 -17
  8. langgraph_api/api/profile.py +108 -0
  9. langgraph_api/api/runs.py +342 -195
  10. langgraph_api/api/store.py +19 -2
  11. langgraph_api/api/threads.py +209 -27
  12. langgraph_api/asgi_transport.py +14 -9
  13. langgraph_api/asyncio.py +14 -4
  14. langgraph_api/auth/custom.py +52 -37
  15. langgraph_api/auth/langsmith/backend.py +4 -3
  16. langgraph_api/auth/langsmith/client.py +13 -8
  17. langgraph_api/cli.py +230 -133
  18. langgraph_api/command.py +5 -3
  19. langgraph_api/config/__init__.py +532 -0
  20. langgraph_api/config/_parse.py +58 -0
  21. langgraph_api/config/schemas.py +431 -0
  22. langgraph_api/cron_scheduler.py +17 -1
  23. langgraph_api/encryption/__init__.py +15 -0
  24. langgraph_api/encryption/aes_json.py +158 -0
  25. langgraph_api/encryption/context.py +35 -0
  26. langgraph_api/encryption/custom.py +280 -0
  27. langgraph_api/encryption/middleware.py +632 -0
  28. langgraph_api/encryption/shared.py +63 -0
  29. langgraph_api/errors.py +12 -1
  30. langgraph_api/executor_entrypoint.py +11 -6
  31. langgraph_api/feature_flags.py +29 -0
  32. langgraph_api/graph.py +176 -76
  33. langgraph_api/grpc/client.py +313 -0
  34. langgraph_api/grpc/config_conversion.py +231 -0
  35. langgraph_api/grpc/generated/__init__.py +29 -0
  36. langgraph_api/grpc/generated/checkpointer_pb2.py +63 -0
  37. langgraph_api/grpc/generated/checkpointer_pb2.pyi +99 -0
  38. langgraph_api/grpc/generated/checkpointer_pb2_grpc.py +329 -0
  39. langgraph_api/grpc/generated/core_api_pb2.py +216 -0
  40. langgraph_api/grpc/generated/core_api_pb2.pyi +905 -0
  41. langgraph_api/grpc/generated/core_api_pb2_grpc.py +1621 -0
  42. langgraph_api/grpc/generated/engine_common_pb2.py +219 -0
  43. langgraph_api/grpc/generated/engine_common_pb2.pyi +722 -0
  44. langgraph_api/grpc/generated/engine_common_pb2_grpc.py +24 -0
  45. langgraph_api/grpc/generated/enum_cancel_run_action_pb2.py +37 -0
  46. langgraph_api/grpc/generated/enum_cancel_run_action_pb2.pyi +12 -0
  47. langgraph_api/grpc/generated/enum_cancel_run_action_pb2_grpc.py +24 -0
  48. langgraph_api/grpc/generated/enum_control_signal_pb2.py +37 -0
  49. langgraph_api/grpc/generated/enum_control_signal_pb2.pyi +16 -0
  50. langgraph_api/grpc/generated/enum_control_signal_pb2_grpc.py +24 -0
  51. langgraph_api/grpc/generated/enum_durability_pb2.py +37 -0
  52. langgraph_api/grpc/generated/enum_durability_pb2.pyi +16 -0
  53. langgraph_api/grpc/generated/enum_durability_pb2_grpc.py +24 -0
  54. langgraph_api/grpc/generated/enum_multitask_strategy_pb2.py +37 -0
  55. langgraph_api/grpc/generated/enum_multitask_strategy_pb2.pyi +16 -0
  56. langgraph_api/grpc/generated/enum_multitask_strategy_pb2_grpc.py +24 -0
  57. langgraph_api/grpc/generated/enum_run_status_pb2.py +37 -0
  58. langgraph_api/grpc/generated/enum_run_status_pb2.pyi +22 -0
  59. langgraph_api/grpc/generated/enum_run_status_pb2_grpc.py +24 -0
  60. langgraph_api/grpc/generated/enum_stream_mode_pb2.py +37 -0
  61. langgraph_api/grpc/generated/enum_stream_mode_pb2.pyi +28 -0
  62. langgraph_api/grpc/generated/enum_stream_mode_pb2_grpc.py +24 -0
  63. langgraph_api/grpc/generated/enum_thread_status_pb2.py +37 -0
  64. langgraph_api/grpc/generated/enum_thread_status_pb2.pyi +16 -0
  65. langgraph_api/grpc/generated/enum_thread_status_pb2_grpc.py +24 -0
  66. langgraph_api/grpc/generated/enum_thread_stream_mode_pb2.py +37 -0
  67. langgraph_api/grpc/generated/enum_thread_stream_mode_pb2.pyi +16 -0
  68. langgraph_api/grpc/generated/enum_thread_stream_mode_pb2_grpc.py +24 -0
  69. langgraph_api/grpc/generated/errors_pb2.py +39 -0
  70. langgraph_api/grpc/generated/errors_pb2.pyi +21 -0
  71. langgraph_api/grpc/generated/errors_pb2_grpc.py +24 -0
  72. langgraph_api/grpc/ops/__init__.py +370 -0
  73. langgraph_api/grpc/ops/assistants.py +424 -0
  74. langgraph_api/grpc/ops/runs.py +792 -0
  75. langgraph_api/grpc/ops/threads.py +1013 -0
  76. langgraph_api/http.py +16 -5
  77. langgraph_api/http_metrics.py +15 -35
  78. langgraph_api/http_metrics_utils.py +38 -0
  79. langgraph_api/js/build.mts +1 -1
  80. langgraph_api/js/client.http.mts +13 -7
  81. langgraph_api/js/client.mts +2 -5
  82. langgraph_api/js/package.json +29 -28
  83. langgraph_api/js/remote.py +56 -30
  84. langgraph_api/js/src/graph.mts +20 -0
  85. langgraph_api/js/sse.py +2 -2
  86. langgraph_api/js/ui.py +1 -1
  87. langgraph_api/js/yarn.lock +1204 -1006
  88. langgraph_api/logging.py +29 -2
  89. langgraph_api/metadata.py +99 -28
  90. langgraph_api/middleware/http_logger.py +7 -2
  91. langgraph_api/middleware/private_network.py +7 -7
  92. langgraph_api/models/run.py +54 -93
  93. langgraph_api/otel_context.py +205 -0
  94. langgraph_api/patch.py +5 -3
  95. langgraph_api/queue_entrypoint.py +154 -65
  96. langgraph_api/route.py +47 -5
  97. langgraph_api/schema.py +88 -10
  98. langgraph_api/self_hosted_logs.py +124 -0
  99. langgraph_api/self_hosted_metrics.py +450 -0
  100. langgraph_api/serde.py +79 -37
  101. langgraph_api/server.py +138 -60
  102. langgraph_api/state.py +4 -3
  103. langgraph_api/store.py +25 -16
  104. langgraph_api/stream.py +80 -29
  105. langgraph_api/thread_ttl.py +31 -13
  106. langgraph_api/timing/__init__.py +25 -0
  107. langgraph_api/timing/profiler.py +200 -0
  108. langgraph_api/timing/timer.py +318 -0
  109. langgraph_api/utils/__init__.py +53 -8
  110. langgraph_api/utils/cache.py +47 -10
  111. langgraph_api/utils/config.py +2 -1
  112. langgraph_api/utils/errors.py +77 -0
  113. langgraph_api/utils/future.py +10 -6
  114. langgraph_api/utils/headers.py +76 -2
  115. langgraph_api/utils/retriable_client.py +74 -0
  116. langgraph_api/utils/stream_codec.py +315 -0
  117. langgraph_api/utils/uuids.py +29 -62
  118. langgraph_api/validation.py +9 -0
  119. langgraph_api/webhook.py +120 -6
  120. langgraph_api/worker.py +55 -24
  121. {langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/METADATA +16 -8
  122. langgraph_api-0.7.3.dist-info/RECORD +168 -0
  123. {langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/WHEEL +1 -1
  124. langgraph_runtime/__init__.py +1 -0
  125. langgraph_runtime/routes.py +11 -0
  126. logging.json +1 -3
  127. openapi.json +839 -478
  128. langgraph_api/config.py +0 -387
  129. langgraph_api/js/isolate-0x130008000-46649-46649-v8.log +0 -4430
  130. langgraph_api/js/isolate-0x138008000-44681-44681-v8.log +0 -4430
  131. langgraph_api/js/package-lock.json +0 -3308
  132. langgraph_api-0.4.1.dist-info/RECORD +0 -107
  133. /langgraph_api/{utils.py → grpc/__init__.py} +0 -0
  134. {langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/entry_points.txt +0 -0
  135. {langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,205 @@
1
+ """OTEL trace context propagation utilities.
2
+
3
+ Provides helpers for extracting, storing, and restoring W3C Trace Context
4
+ across the API-to-worker boundary in distributed LangGraph deployments.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from contextlib import contextmanager
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ import structlog
13
+
14
+ from langgraph_api import __version__, config
15
+
16
+ if TYPE_CHECKING:
17
+ from collections.abc import Generator, Mapping
18
+
19
+ from opentelemetry.trace import Tracer
20
+
21
+ logger = structlog.stdlib.get_logger(__name__)
22
+
23
+ # Constants for storing trace context in configurable
24
+ OTEL_TRACEPARENT_KEY = "__otel_traceparent__"
25
+ OTEL_TRACESTATE_KEY = "__otel_tracestate__"
26
+ OTEL_TRACER_NAME = "langsmith_agent_server"
27
+ OTEL_RUN_ID_ATTR_NAME = "langsmith.run_id"
28
+ OTEL_THREAD_ID_ATTR_NAME = "langsmith.thread_id"
29
+
30
+ # Cached instances (initialized lazily, once)
31
+ _propagator: Any = None
32
+ _tracer: Any = None
33
+ _otel_available: bool | None = None
34
+
35
+
36
+ def _check_otel_available() -> bool:
37
+ """Check if OpenTelemetry is available. Cached after first call."""
38
+ global _otel_available
39
+ if _otel_available is None:
40
+ try:
41
+ from opentelemetry import trace # noqa: F401
42
+ from opentelemetry.trace.propagation.tracecontext import (
43
+ TraceContextTextMapPropagator, # noqa: F401
44
+ )
45
+
46
+ _otel_available = True
47
+ except ImportError:
48
+ _otel_available = False
49
+ return _otel_available
50
+
51
+
52
+ def _get_propagator() -> Any:
53
+ """Get cached W3C TraceContext propagator."""
54
+ global _propagator
55
+ if _propagator is None:
56
+ from opentelemetry.trace.propagation.tracecontext import (
57
+ TraceContextTextMapPropagator,
58
+ )
59
+
60
+ _propagator = TraceContextTextMapPropagator()
61
+ return _propagator
62
+
63
+
64
+ def _get_tracer() -> Tracer:
65
+ """Get cached tracer for worker spans."""
66
+ global _tracer
67
+ if _tracer is None:
68
+ from opentelemetry import trace
69
+
70
+ _tracer = trace.get_tracer(
71
+ OTEL_TRACER_NAME, instrumenting_library_version=__version__
72
+ )
73
+ return _tracer
74
+
75
+
76
+ def extract_otel_headers_to_configurable(
77
+ headers: Mapping[str, str],
78
+ configurable: dict[str, Any],
79
+ ) -> None:
80
+ """Extract traceparent/tracestate from HTTP headers into configurable dict.
81
+
82
+ Only extracts if OTEL is enabled. No-op otherwise.
83
+
84
+ Args:
85
+ headers: HTTP headers from the incoming request
86
+ configurable: The configurable dict to store trace context in
87
+ """
88
+ if not config.OTEL_ENABLED:
89
+ return
90
+
91
+ if traceparent := headers.get("traceparent"):
92
+ configurable[OTEL_TRACEPARENT_KEY] = traceparent
93
+ if tracestate := headers.get("tracestate"):
94
+ configurable[OTEL_TRACESTATE_KEY] = tracestate
95
+
96
+
97
+ def inject_current_trace_context(configurable: dict[str, Any]) -> None:
98
+ """Inject current OTEL trace context into configurable for worker propagation.
99
+
100
+ This captures the active span context (e.g., from Starlette auto-instrumentation)
101
+ and stores it in the configurable dict so workers can restore it and create
102
+ child spans under the API request span.
103
+
104
+ Args:
105
+ configurable: The configurable dict to store trace context in
106
+ """
107
+ if not config.OTEL_ENABLED or not _check_otel_available():
108
+ return
109
+
110
+ try:
111
+ from opentelemetry import trace
112
+
113
+ span = trace.get_current_span()
114
+ if not span.is_recording():
115
+ return
116
+
117
+ carrier: dict[str, str] = {}
118
+ _get_propagator().inject(carrier)
119
+
120
+ if traceparent := carrier.get("traceparent"):
121
+ configurable[OTEL_TRACEPARENT_KEY] = traceparent
122
+ if tracestate := carrier.get("tracestate"):
123
+ configurable[OTEL_TRACESTATE_KEY] = tracestate
124
+ except Exception:
125
+ # Never fail - tracing issues shouldn't break functionality
126
+ pass
127
+
128
+
129
+ @contextmanager
130
+ def restore_otel_trace_context(
131
+ configurable: dict[str, Any],
132
+ run_id: str | None = None,
133
+ thread_id: str | None = None,
134
+ ) -> Generator[None, None, None]:
135
+ """Restore OTEL trace context and create child span for worker execution.
136
+
137
+ Creates a child span under the original API request span, ensuring
138
+ distributed traces are connected across the API-to-worker boundary.
139
+
140
+ Yields:
141
+ None - execution continues within the restored trace context
142
+
143
+ Note:
144
+ - No-ops if OTEL is disabled or unavailable
145
+ - Never raises - tracing failures won't break run execution
146
+ """
147
+ if not config.OTEL_ENABLED or not _check_otel_available():
148
+ yield
149
+ return
150
+
151
+ traceparent = configurable.get(OTEL_TRACEPARENT_KEY)
152
+ if not traceparent:
153
+ yield
154
+ return
155
+
156
+ try:
157
+ from opentelemetry import trace
158
+
159
+ # Build carrier dict for W3C propagator
160
+ carrier: dict[str, str] = {"traceparent": traceparent}
161
+ if tracestate := configurable.get(OTEL_TRACESTATE_KEY):
162
+ carrier["tracestate"] = tracestate
163
+
164
+ # Extract context from carrier
165
+ ctx = _get_propagator().extract(carrier=carrier)
166
+
167
+ with _get_tracer().start_as_current_span(
168
+ "worker.stream_run",
169
+ context=ctx,
170
+ kind=trace.SpanKind.CONSUMER,
171
+ ) as span:
172
+ if run_id:
173
+ span.set_attribute(OTEL_RUN_ID_ATTR_NAME, run_id)
174
+ if thread_id:
175
+ span.set_attribute(OTEL_THREAD_ID_ATTR_NAME, thread_id)
176
+
177
+ yield
178
+ except Exception:
179
+ logger.debug("Failed to restore OTEL trace context", exc_info=True)
180
+ yield
181
+
182
+
183
+ def inject_otel_headers() -> dict[str, str]:
184
+ """Inject current trace context into headers for outgoing HTTP requests.
185
+
186
+ Used to propagate trace context to webhooks.
187
+
188
+ Returns:
189
+ Dict with traceparent/tracestate headers if in active trace, else empty.
190
+ """
191
+ if not config.OTEL_ENABLED or not _check_otel_available():
192
+ return {}
193
+
194
+ try:
195
+ from opentelemetry import trace
196
+
197
+ span = trace.get_current_span()
198
+ if not span.is_recording():
199
+ return {}
200
+
201
+ carrier: dict[str, str] = {}
202
+ _get_propagator().inject(carrier)
203
+ return carrier
204
+ except Exception:
205
+ return {}
langgraph_api/patch.py CHANGED
@@ -3,7 +3,7 @@ from typing import Any
3
3
  from starlette.responses import Response, StreamingResponse
4
4
  from starlette.types import Send
5
5
 
6
- from langgraph_api.serde import Fragment
6
+ from langgraph_api.serde import Fragment, json_dumpb
7
7
 
8
8
  """
9
9
  Patch Response.render and StreamingResponse.stream_response
@@ -14,7 +14,7 @@ to recognize bytearrays and memoryviews as bytes-like objects.
14
14
  def Response_render(self, content: Any) -> bytes:
15
15
  if content is None:
16
16
  return b""
17
- if isinstance(content, (bytes, bytearray, memoryview)): # noqa: UP038
17
+ if isinstance(content, (bytes, bytearray, memoryview)):
18
18
  return content
19
19
  return content.encode(self.charset) # type: ignore
20
20
 
@@ -32,7 +32,9 @@ async def StreamingResponse_stream_response(self, send: Send) -> None:
32
32
  continue
33
33
  if isinstance(chunk, Fragment):
34
34
  chunk = chunk.buf
35
- if not isinstance(chunk, (bytes, bytearray, memoryview)): # noqa: UP038
35
+ if isinstance(chunk, dict):
36
+ chunk = json_dumpb(chunk)
37
+ if not isinstance(chunk, (bytes, bytearray, memoryview)):
36
38
  chunk = chunk.encode(self.charset)
37
39
  await send({"type": "http.response.body", "body": chunk, "more_body": True})
38
40
 
@@ -1,46 +1,68 @@
1
- # ruff: noqa: E402
2
1
  import os
3
2
 
4
3
  if not (
5
4
  (disable_truststore := os.getenv("DISABLE_TRUSTSTORE"))
6
5
  and disable_truststore.lower() == "true"
7
6
  ):
8
- import truststore # noqa: F401
7
+ import truststore
9
8
 
10
- truststore.inject_into_ssl() # noqa: F401
9
+ truststore.inject_into_ssl()
11
10
 
12
11
  import asyncio
13
- import contextlib
12
+ import functools
14
13
  import json
15
14
  import logging.config
16
15
  import pathlib
17
16
  import signal
18
- from contextlib import asynccontextmanager
19
- from typing import cast
17
+ import socket
20
18
 
21
19
  import structlog
22
20
 
23
- from langgraph_runtime.database import pool_stats
24
- from langgraph_runtime.lifespan import lifespan
21
+ from langgraph_api.utils.errors import GraphLoadError, HealthServerStartupError
22
+ from langgraph_runtime import lifespan
23
+ from langgraph_runtime.database import healthcheck, pool_stats
25
24
  from langgraph_runtime.metrics import get_metrics
26
25
 
27
26
  logger = structlog.stdlib.get_logger(__name__)
28
27
 
29
28
 
29
+ def _ensure_port_available(host: str, port: int) -> None:
30
+ try:
31
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
32
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
33
+ sock.bind((host, port))
34
+ except OSError as exc:
35
+ raise HealthServerStartupError(host, port, exc) from exc
36
+
37
+
30
38
  async def health_and_metrics_server():
31
39
  import uvicorn
32
40
  from starlette.applications import Starlette
41
+ from starlette.requests import Request
33
42
  from starlette.responses import JSONResponse, PlainTextResponse
34
- from starlette.routing import Route
43
+ from starlette.routing import Mount, Route
44
+
45
+ from langgraph_api import config as lc_config
46
+ from langgraph_api.api.meta import METRICS_FORMATS
35
47
 
36
48
  port = int(os.getenv("PORT", "8080"))
49
+ host = os.getenv("LANGGRAPH_SERVER_HOST", "0.0.0.0")
37
50
 
38
51
  async def health_endpoint(request):
52
+ # if db or redis is not healthy, this will raise an exception
53
+ await healthcheck()
39
54
  return JSONResponse({"status": "ok"})
40
55
 
41
- async def metrics_endpoint(request):
56
+ async def metrics_endpoint(request: Request):
57
+ metrics_format = request.query_params.get("format", "prometheus")
58
+ if metrics_format not in METRICS_FORMATS:
59
+ await logger.awarning(
60
+ f"metrics format {metrics_format} not supported, falling back to prometheus"
61
+ )
62
+ metrics_format = "prometheus"
63
+
42
64
  metrics = get_metrics()
43
- worker_metrics = cast(dict[str, int], metrics["workers"])
65
+ worker_metrics = metrics["workers"]
44
66
  workers_max = worker_metrics["max"]
45
67
  workers_active = worker_metrics["active"]
46
68
  workers_available = worker_metrics["available"]
@@ -48,86 +70,132 @@ async def health_and_metrics_server():
48
70
  project_id = os.getenv("LANGSMITH_HOST_PROJECT_ID")
49
71
  revision_id = os.getenv("LANGSMITH_HOST_REVISION_ID")
50
72
 
51
- metrics_lines = [
52
- "# HELP lg_api_workers_max The maximum number of workers available.",
53
- "# TYPE lg_api_workers_max gauge",
54
- f'lg_api_workers_max{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_max}',
55
- "# HELP lg_api_workers_active The number of currently active workers.",
56
- "# TYPE lg_api_workers_active gauge",
57
- f'lg_api_workers_active{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_active}',
58
- "# HELP lg_api_workers_available The number of available (idle) workers.",
59
- "# TYPE lg_api_workers_available gauge",
60
- f'lg_api_workers_available{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_available}',
61
- ]
62
-
63
- metrics_lines.extend(
64
- pool_stats(
65
- project_id=project_id,
66
- revision_id=revision_id,
67
- )
73
+ pg_redis_stats = pool_stats(
74
+ project_id=project_id,
75
+ revision_id=revision_id,
76
+ format=metrics_format,
68
77
  )
69
78
 
70
- return PlainTextResponse(
71
- "\n".join(metrics_lines),
72
- media_type="text/plain; version=0.0.4; charset=utf-8",
79
+ if metrics_format == "json":
80
+ resp = {
81
+ **pg_redis_stats,
82
+ "workers": worker_metrics,
83
+ }
84
+ return JSONResponse(resp)
85
+ elif metrics_format == "prometheus":
86
+ metrics_lines = [
87
+ "# HELP lg_api_workers_max The maximum number of workers available.",
88
+ "# TYPE lg_api_workers_max gauge",
89
+ f'lg_api_workers_max{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_max}',
90
+ "# HELP lg_api_workers_active The number of currently active workers.",
91
+ "# TYPE lg_api_workers_active gauge",
92
+ f'lg_api_workers_active{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_active}',
93
+ "# HELP lg_api_workers_available The number of available (idle) workers.",
94
+ "# TYPE lg_api_workers_available gauge",
95
+ f'lg_api_workers_available{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_available}',
96
+ ]
97
+
98
+ metrics_lines.extend(pg_redis_stats)
99
+
100
+ return PlainTextResponse(
101
+ "\n".join(metrics_lines),
102
+ media_type="text/plain; version=0.0.4; charset=utf-8",
103
+ )
104
+
105
+ routes = [
106
+ Route("/ok", health_endpoint),
107
+ Route("/metrics", metrics_endpoint),
108
+ ]
109
+ app = Starlette(routes=routes)
110
+ if lc_config.MOUNT_PREFIX:
111
+ app = Starlette(
112
+ routes=[*routes, Mount(lc_config.MOUNT_PREFIX, app=app)],
113
+ lifespan=app.router.lifespan_context,
114
+ exception_handlers=app.exception_handlers,
73
115
  )
74
116
 
75
- app = Starlette(
76
- routes=[
77
- Route("/ok", health_endpoint),
78
- Route("/metrics", metrics_endpoint),
79
- ]
80
- )
117
+ try:
118
+ _ensure_port_available(host, port)
119
+ except HealthServerStartupError as exc:
120
+ await logger.aerror(
121
+ str(exc),
122
+ host=exc.host,
123
+ port=exc.port,
124
+ cause=str(exc.cause),
125
+ )
126
+ raise
81
127
 
82
128
  config = uvicorn.Config(
83
129
  app,
84
- host="0.0.0.0",
130
+ host=host,
85
131
  port=port,
86
132
  log_level="error",
87
133
  access_log=False,
88
134
  )
135
+ # Server will run indefinitely until the process is terminated
89
136
  server = uvicorn.Server(config)
90
137
 
91
- logger.info(f"Health and metrics server started at http://0.0.0.0:{port}")
92
- await server.serve()
138
+ logger.info(f"Health and metrics server started at http://{host}:{port}")
139
+ try:
140
+ await server.serve()
141
+ except SystemExit as exc:
142
+ if exc.code == 0:
143
+ return
144
+ try:
145
+ _ensure_port_available(host, port)
146
+ except HealthServerStartupError as port_exc:
147
+ await logger.aerror(
148
+ str(port_exc),
149
+ host=port_exc.host,
150
+ port=port_exc.port,
151
+ cause=str(port_exc.cause),
152
+ )
153
+ raise port_exc from None
154
+ error = HealthServerStartupError(host, port, exc)
155
+ await logger.aerror(
156
+ str(error), host=error.host, port=error.port, cause=str(error.cause)
157
+ )
158
+ raise error from None
159
+ except OSError as exc:
160
+ error = HealthServerStartupError(host, port, exc)
161
+ await logger.aerror(
162
+ str(error), host=error.host, port=error.port, cause=str(error.cause)
163
+ )
164
+ raise error from exc
93
165
 
94
166
 
95
167
  async def entrypoint(
96
- grpc_port: int | None = None, entrypoint_name: str = "python-queue"
168
+ grpc_port: int | None = None,
169
+ entrypoint_name: str = "python-queue",
170
+ cancel_event: asyncio.Event | None = None,
97
171
  ):
98
172
  from langgraph_api import logging as lg_logging
173
+ from langgraph_api import timing
99
174
  from langgraph_api.api import user_router
175
+ from langgraph_api.server import app
100
176
 
101
177
  lg_logging.set_logging_context({"entrypoint": entrypoint_name})
102
178
  tasks: set[asyncio.Task] = set()
103
- tasks.add(asyncio.create_task(health_and_metrics_server()))
104
-
105
- original_lifespan = user_router.router.lifespan_context if user_router else None
106
-
107
- @asynccontextmanager
108
- async def combined_lifespan(
109
- app, with_cron_scheduler=False, grpc_port=None, taskset=None
110
- ):
111
- async with lifespan(
112
- app,
113
- with_cron_scheduler=with_cron_scheduler,
179
+ user_lifespan = None if user_router is None else user_router.router.lifespan_context
180
+ wrapped_lifespan = timing.combine_lifespans(
181
+ functools.partial(
182
+ lifespan.lifespan,
183
+ with_cron_scheduler=False,
114
184
  grpc_port=grpc_port,
115
- taskset=taskset,
116
- ):
117
- if original_lifespan:
118
- async with original_lifespan(app):
119
- yield
120
- else:
121
- yield
122
-
123
- async with combined_lifespan(
124
- None, with_cron_scheduler=False, grpc_port=grpc_port, taskset=tasks
125
- ):
185
+ taskset=tasks,
186
+ cancel_event=cancel_event,
187
+ ),
188
+ user_lifespan,
189
+ )
190
+
191
+ async with wrapped_lifespan(app):
192
+ tasks.add(asyncio.create_task(health_and_metrics_server()))
126
193
  await asyncio.gather(*tasks)
127
194
 
128
195
 
129
196
  async def main(grpc_port: int | None = None, entrypoint_name: str = "python-queue"):
130
197
  """Run the queue entrypoint and shut down gracefully on SIGTERM/SIGINT."""
198
+
131
199
  loop = asyncio.get_running_loop()
132
200
  stop_event = asyncio.Event()
133
201
 
@@ -141,14 +209,35 @@ async def main(grpc_port: int | None = None, entrypoint_name: str = "python-queu
141
209
  signal.signal(signal.SIGTERM, lambda *_: _handle_signal())
142
210
 
143
211
  entry_task = asyncio.create_task(
144
- entrypoint(grpc_port=grpc_port, entrypoint_name=entrypoint_name)
212
+ entrypoint(
213
+ grpc_port=grpc_port,
214
+ entrypoint_name=entrypoint_name,
215
+ cancel_event=stop_event,
216
+ )
145
217
  )
218
+ # Handle the case where the entrypoint errors out
219
+ entry_task.add_done_callback(lambda _: stop_event.set())
146
220
  await stop_event.wait()
147
221
 
148
222
  logger.warning("Cancelling queue entrypoint task")
149
223
  entry_task.cancel()
150
- with contextlib.suppress(asyncio.CancelledError):
224
+ try:
151
225
  await entry_task
226
+ except asyncio.CancelledError:
227
+ pass
228
+ except (GraphLoadError, HealthServerStartupError) as exc:
229
+ raise SystemExit(1) from exc
230
+ except RuntimeError as exc:
231
+ if str(exc) == "generator didn't yield":
232
+ last_error = lifespan.get_last_error()
233
+ if last_error is not None:
234
+ logger.exception(
235
+ "Application startup failed",
236
+ error_type=type(last_error).__name__,
237
+ error_message=str(last_error),
238
+ )
239
+ raise SystemExit(1) from None
240
+ raise
152
241
 
153
242
 
154
243
  if __name__ == "__main__":
langgraph_api/route.py CHANGED
@@ -4,6 +4,7 @@ import typing
4
4
 
5
5
  import jsonschema_rs
6
6
  import orjson
7
+ import structlog
7
8
  from starlette._exception_handler import wrap_app_handling_exceptions
8
9
  from starlette._utils import is_async_callable
9
10
  from starlette.concurrency import run_in_threadpool
@@ -14,9 +15,11 @@ from starlette.responses import JSONResponse
14
15
  from starlette.routing import Route, compile_path, get_name
15
16
  from starlette.types import ASGIApp, Receive, Scope, Send
16
17
 
18
+ from langgraph_api import config
17
19
  from langgraph_api.serde import json_dumpb
18
20
  from langgraph_api.utils import get_auth_ctx, with_user
19
21
 
22
+ logger = structlog.getLogger(__name__)
20
23
  SchemaType = (
21
24
  jsonschema_rs.Draft4Validator
22
25
  | jsonschema_rs.Draft6Validator
@@ -43,7 +46,7 @@ def api_request_response(
43
46
  response: ASGIApp = await func(request)
44
47
  else:
45
48
  response = await run_in_threadpool(
46
- typing.cast(typing.Callable[[Request], ASGIApp], func), request
49
+ typing.cast("typing.Callable[[Request], ASGIApp]", func), request
47
50
  )
48
51
  await response(scope, receive, send)
49
52
 
@@ -58,10 +61,11 @@ class ApiResponse(JSONResponse):
58
61
 
59
62
 
60
63
  def _json_loads(content: bytearray, schema: SchemaType) -> typing.Any:
61
- json = orjson.loads(content)
64
+ """Parse JSON and validate schema. Used by threadpool for large payloads."""
65
+ json_data = orjson.loads(content)
62
66
  if schema is not None:
63
- schema.validate(json)
64
- return json
67
+ schema.validate(json_data)
68
+ return json_data
65
69
 
66
70
 
67
71
  class ApiRequest(Request):
@@ -76,8 +80,16 @@ class ApiRequest(Request):
76
80
  async def json(self, schema: SchemaType = None) -> typing.Any:
77
81
  if not hasattr(self, "_json"):
78
82
  body = await self.body()
83
+
84
+ # Hybrid approach for optimal performance:
85
+ # - Small payloads: parse directly (fast, no queueing/thread pool limitations)
86
+ # - Large payloads: use dedicated thread pool (safer, doesn't block event loop)
79
87
  try:
80
- self._json = await run_in_threadpool(_json_loads, body, schema)
88
+ self._json = (
89
+ await run_in_threadpool(_json_loads, body, schema)
90
+ if len(body) > config.JSON_THREAD_POOL_MINIMUM_SIZE_BYTES
91
+ else _json_loads(body, schema)
92
+ )
81
93
  except orjson.JSONDecodeError as e:
82
94
  raise HTTPException(
83
95
  status_code=422, detail="Invalid JSON in request body"
@@ -135,6 +147,8 @@ class ApiRoute(Route):
135
147
 
136
148
  scope["route"] = self.path
137
149
  set_logging_context({"path": self.path, "method": scope.get("method")})
150
+ route_pattern = f"{scope.get('root_path', '')}{self.path}"
151
+ _name_otel_span(scope, route_pattern)
138
152
  ctx = get_auth_ctx()
139
153
  if ctx:
140
154
  user, auth = ctx.user, ctx.permissions
@@ -142,3 +156,31 @@ class ApiRoute(Route):
142
156
  user, auth = scope.get("user"), scope.get("auth")
143
157
  async with with_user(user, auth):
144
158
  return await super().handle(scope, receive, send)
159
+
160
+
161
+ def _name_otel_span(scope: Scope, route_pattern: str):
162
+ """Best-effort rename of the active OTEL server span to include the route.
163
+
164
+ - No-ops if OTEL is disabled or OTEL libs are unavailable.
165
+ - Sets span name to "METHOD /templated/path" and attaches http.route.
166
+ - Never raises; safe for hot path usage.
167
+ """
168
+ if not config.OTEL_ENABLED:
169
+ return
170
+ try:
171
+ from opentelemetry.trace import get_current_span
172
+
173
+ span = get_current_span()
174
+ if span.is_recording():
175
+ method = scope.get("method", "") or ""
176
+ try:
177
+ span.update_name(f"{method} {route_pattern}")
178
+ except Exception:
179
+ logger.error("Failed to update OTEL span name", exc_info=True)
180
+ pass
181
+ try:
182
+ span.set_attribute("http.route", route_pattern)
183
+ except Exception:
184
+ logger.error("Failed to update OTEL span attributes", exc_info=True)
185
+ except Exception:
186
+ logger.error("Failed to update OTEL span", exc_info=True)