langgraph-api 0.4.1__py3-none-any.whl → 0.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langgraph_api/__init__.py +1 -1
- langgraph_api/api/__init__.py +111 -51
- langgraph_api/api/a2a.py +1610 -0
- langgraph_api/api/assistants.py +212 -89
- langgraph_api/api/mcp.py +3 -3
- langgraph_api/api/meta.py +52 -28
- langgraph_api/api/openapi.py +27 -17
- langgraph_api/api/profile.py +108 -0
- langgraph_api/api/runs.py +342 -195
- langgraph_api/api/store.py +19 -2
- langgraph_api/api/threads.py +209 -27
- langgraph_api/asgi_transport.py +14 -9
- langgraph_api/asyncio.py +14 -4
- langgraph_api/auth/custom.py +52 -37
- langgraph_api/auth/langsmith/backend.py +4 -3
- langgraph_api/auth/langsmith/client.py +13 -8
- langgraph_api/cli.py +230 -133
- langgraph_api/command.py +5 -3
- langgraph_api/config/__init__.py +532 -0
- langgraph_api/config/_parse.py +58 -0
- langgraph_api/config/schemas.py +431 -0
- langgraph_api/cron_scheduler.py +17 -1
- langgraph_api/encryption/__init__.py +15 -0
- langgraph_api/encryption/aes_json.py +158 -0
- langgraph_api/encryption/context.py +35 -0
- langgraph_api/encryption/custom.py +280 -0
- langgraph_api/encryption/middleware.py +632 -0
- langgraph_api/encryption/shared.py +63 -0
- langgraph_api/errors.py +12 -1
- langgraph_api/executor_entrypoint.py +11 -6
- langgraph_api/feature_flags.py +29 -0
- langgraph_api/graph.py +176 -76
- langgraph_api/grpc/client.py +313 -0
- langgraph_api/grpc/config_conversion.py +231 -0
- langgraph_api/grpc/generated/__init__.py +29 -0
- langgraph_api/grpc/generated/checkpointer_pb2.py +63 -0
- langgraph_api/grpc/generated/checkpointer_pb2.pyi +99 -0
- langgraph_api/grpc/generated/checkpointer_pb2_grpc.py +329 -0
- langgraph_api/grpc/generated/core_api_pb2.py +216 -0
- langgraph_api/grpc/generated/core_api_pb2.pyi +905 -0
- langgraph_api/grpc/generated/core_api_pb2_grpc.py +1621 -0
- langgraph_api/grpc/generated/engine_common_pb2.py +219 -0
- langgraph_api/grpc/generated/engine_common_pb2.pyi +722 -0
- langgraph_api/grpc/generated/engine_common_pb2_grpc.py +24 -0
- langgraph_api/grpc/generated/enum_cancel_run_action_pb2.py +37 -0
- langgraph_api/grpc/generated/enum_cancel_run_action_pb2.pyi +12 -0
- langgraph_api/grpc/generated/enum_cancel_run_action_pb2_grpc.py +24 -0
- langgraph_api/grpc/generated/enum_control_signal_pb2.py +37 -0
- langgraph_api/grpc/generated/enum_control_signal_pb2.pyi +16 -0
- langgraph_api/grpc/generated/enum_control_signal_pb2_grpc.py +24 -0
- langgraph_api/grpc/generated/enum_durability_pb2.py +37 -0
- langgraph_api/grpc/generated/enum_durability_pb2.pyi +16 -0
- langgraph_api/grpc/generated/enum_durability_pb2_grpc.py +24 -0
- langgraph_api/grpc/generated/enum_multitask_strategy_pb2.py +37 -0
- langgraph_api/grpc/generated/enum_multitask_strategy_pb2.pyi +16 -0
- langgraph_api/grpc/generated/enum_multitask_strategy_pb2_grpc.py +24 -0
- langgraph_api/grpc/generated/enum_run_status_pb2.py +37 -0
- langgraph_api/grpc/generated/enum_run_status_pb2.pyi +22 -0
- langgraph_api/grpc/generated/enum_run_status_pb2_grpc.py +24 -0
- langgraph_api/grpc/generated/enum_stream_mode_pb2.py +37 -0
- langgraph_api/grpc/generated/enum_stream_mode_pb2.pyi +28 -0
- langgraph_api/grpc/generated/enum_stream_mode_pb2_grpc.py +24 -0
- langgraph_api/grpc/generated/enum_thread_status_pb2.py +37 -0
- langgraph_api/grpc/generated/enum_thread_status_pb2.pyi +16 -0
- langgraph_api/grpc/generated/enum_thread_status_pb2_grpc.py +24 -0
- langgraph_api/grpc/generated/enum_thread_stream_mode_pb2.py +37 -0
- langgraph_api/grpc/generated/enum_thread_stream_mode_pb2.pyi +16 -0
- langgraph_api/grpc/generated/enum_thread_stream_mode_pb2_grpc.py +24 -0
- langgraph_api/grpc/generated/errors_pb2.py +39 -0
- langgraph_api/grpc/generated/errors_pb2.pyi +21 -0
- langgraph_api/grpc/generated/errors_pb2_grpc.py +24 -0
- langgraph_api/grpc/ops/__init__.py +370 -0
- langgraph_api/grpc/ops/assistants.py +424 -0
- langgraph_api/grpc/ops/runs.py +792 -0
- langgraph_api/grpc/ops/threads.py +1013 -0
- langgraph_api/http.py +16 -5
- langgraph_api/http_metrics.py +15 -35
- langgraph_api/http_metrics_utils.py +38 -0
- langgraph_api/js/build.mts +1 -1
- langgraph_api/js/client.http.mts +13 -7
- langgraph_api/js/client.mts +2 -5
- langgraph_api/js/package.json +29 -28
- langgraph_api/js/remote.py +56 -30
- langgraph_api/js/src/graph.mts +20 -0
- langgraph_api/js/sse.py +2 -2
- langgraph_api/js/ui.py +1 -1
- langgraph_api/js/yarn.lock +1204 -1006
- langgraph_api/logging.py +29 -2
- langgraph_api/metadata.py +99 -28
- langgraph_api/middleware/http_logger.py +7 -2
- langgraph_api/middleware/private_network.py +7 -7
- langgraph_api/models/run.py +54 -93
- langgraph_api/otel_context.py +205 -0
- langgraph_api/patch.py +5 -3
- langgraph_api/queue_entrypoint.py +154 -65
- langgraph_api/route.py +47 -5
- langgraph_api/schema.py +88 -10
- langgraph_api/self_hosted_logs.py +124 -0
- langgraph_api/self_hosted_metrics.py +450 -0
- langgraph_api/serde.py +79 -37
- langgraph_api/server.py +138 -60
- langgraph_api/state.py +4 -3
- langgraph_api/store.py +25 -16
- langgraph_api/stream.py +80 -29
- langgraph_api/thread_ttl.py +31 -13
- langgraph_api/timing/__init__.py +25 -0
- langgraph_api/timing/profiler.py +200 -0
- langgraph_api/timing/timer.py +318 -0
- langgraph_api/utils/__init__.py +53 -8
- langgraph_api/utils/cache.py +47 -10
- langgraph_api/utils/config.py +2 -1
- langgraph_api/utils/errors.py +77 -0
- langgraph_api/utils/future.py +10 -6
- langgraph_api/utils/headers.py +76 -2
- langgraph_api/utils/retriable_client.py +74 -0
- langgraph_api/utils/stream_codec.py +315 -0
- langgraph_api/utils/uuids.py +29 -62
- langgraph_api/validation.py +9 -0
- langgraph_api/webhook.py +120 -6
- langgraph_api/worker.py +55 -24
- {langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/METADATA +16 -8
- langgraph_api-0.7.3.dist-info/RECORD +168 -0
- {langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/WHEEL +1 -1
- langgraph_runtime/__init__.py +1 -0
- langgraph_runtime/routes.py +11 -0
- logging.json +1 -3
- openapi.json +839 -478
- langgraph_api/config.py +0 -387
- langgraph_api/js/isolate-0x130008000-46649-46649-v8.log +0 -4430
- langgraph_api/js/isolate-0x138008000-44681-44681-v8.log +0 -4430
- langgraph_api/js/package-lock.json +0 -3308
- langgraph_api-0.4.1.dist-info/RECORD +0 -107
- /langgraph_api/{utils.py → grpc/__init__.py} +0 -0
- {langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/entry_points.txt +0 -0
- {langgraph_api-0.4.1.dist-info → langgraph_api-0.7.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""OTEL trace context propagation utilities.
|
|
2
|
+
|
|
3
|
+
Provides helpers for extracting, storing, and restoring W3C Trace Context
|
|
4
|
+
across the API-to-worker boundary in distributed LangGraph deployments.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
import structlog
|
|
13
|
+
|
|
14
|
+
from langgraph_api import __version__, config
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Generator, Mapping
|
|
18
|
+
|
|
19
|
+
from opentelemetry.trace import Tracer
|
|
20
|
+
|
|
21
|
+
logger = structlog.stdlib.get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
# Constants for storing trace context in configurable
|
|
24
|
+
OTEL_TRACEPARENT_KEY = "__otel_traceparent__"
|
|
25
|
+
OTEL_TRACESTATE_KEY = "__otel_tracestate__"
|
|
26
|
+
OTEL_TRACER_NAME = "langsmith_agent_server"
|
|
27
|
+
OTEL_RUN_ID_ATTR_NAME = "langsmith.run_id"
|
|
28
|
+
OTEL_THREAD_ID_ATTR_NAME = "langsmith.thread_id"
|
|
29
|
+
|
|
30
|
+
# Cached instances (initialized lazily, once)
|
|
31
|
+
_propagator: Any = None
|
|
32
|
+
_tracer: Any = None
|
|
33
|
+
_otel_available: bool | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _check_otel_available() -> bool:
|
|
37
|
+
"""Check if OpenTelemetry is available. Cached after first call."""
|
|
38
|
+
global _otel_available
|
|
39
|
+
if _otel_available is None:
|
|
40
|
+
try:
|
|
41
|
+
from opentelemetry import trace # noqa: F401
|
|
42
|
+
from opentelemetry.trace.propagation.tracecontext import (
|
|
43
|
+
TraceContextTextMapPropagator, # noqa: F401
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
_otel_available = True
|
|
47
|
+
except ImportError:
|
|
48
|
+
_otel_available = False
|
|
49
|
+
return _otel_available
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _get_propagator() -> Any:
|
|
53
|
+
"""Get cached W3C TraceContext propagator."""
|
|
54
|
+
global _propagator
|
|
55
|
+
if _propagator is None:
|
|
56
|
+
from opentelemetry.trace.propagation.tracecontext import (
|
|
57
|
+
TraceContextTextMapPropagator,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
_propagator = TraceContextTextMapPropagator()
|
|
61
|
+
return _propagator
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _get_tracer() -> Tracer:
|
|
65
|
+
"""Get cached tracer for worker spans."""
|
|
66
|
+
global _tracer
|
|
67
|
+
if _tracer is None:
|
|
68
|
+
from opentelemetry import trace
|
|
69
|
+
|
|
70
|
+
_tracer = trace.get_tracer(
|
|
71
|
+
OTEL_TRACER_NAME, instrumenting_library_version=__version__
|
|
72
|
+
)
|
|
73
|
+
return _tracer
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def extract_otel_headers_to_configurable(
|
|
77
|
+
headers: Mapping[str, str],
|
|
78
|
+
configurable: dict[str, Any],
|
|
79
|
+
) -> None:
|
|
80
|
+
"""Extract traceparent/tracestate from HTTP headers into configurable dict.
|
|
81
|
+
|
|
82
|
+
Only extracts if OTEL is enabled. No-op otherwise.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
headers: HTTP headers from the incoming request
|
|
86
|
+
configurable: The configurable dict to store trace context in
|
|
87
|
+
"""
|
|
88
|
+
if not config.OTEL_ENABLED:
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
if traceparent := headers.get("traceparent"):
|
|
92
|
+
configurable[OTEL_TRACEPARENT_KEY] = traceparent
|
|
93
|
+
if tracestate := headers.get("tracestate"):
|
|
94
|
+
configurable[OTEL_TRACESTATE_KEY] = tracestate
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def inject_current_trace_context(configurable: dict[str, Any]) -> None:
|
|
98
|
+
"""Inject current OTEL trace context into configurable for worker propagation.
|
|
99
|
+
|
|
100
|
+
This captures the active span context (e.g., from Starlette auto-instrumentation)
|
|
101
|
+
and stores it in the configurable dict so workers can restore it and create
|
|
102
|
+
child spans under the API request span.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
configurable: The configurable dict to store trace context in
|
|
106
|
+
"""
|
|
107
|
+
if not config.OTEL_ENABLED or not _check_otel_available():
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
from opentelemetry import trace
|
|
112
|
+
|
|
113
|
+
span = trace.get_current_span()
|
|
114
|
+
if not span.is_recording():
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
carrier: dict[str, str] = {}
|
|
118
|
+
_get_propagator().inject(carrier)
|
|
119
|
+
|
|
120
|
+
if traceparent := carrier.get("traceparent"):
|
|
121
|
+
configurable[OTEL_TRACEPARENT_KEY] = traceparent
|
|
122
|
+
if tracestate := carrier.get("tracestate"):
|
|
123
|
+
configurable[OTEL_TRACESTATE_KEY] = tracestate
|
|
124
|
+
except Exception:
|
|
125
|
+
# Never fail - tracing issues shouldn't break functionality
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@contextmanager
|
|
130
|
+
def restore_otel_trace_context(
|
|
131
|
+
configurable: dict[str, Any],
|
|
132
|
+
run_id: str | None = None,
|
|
133
|
+
thread_id: str | None = None,
|
|
134
|
+
) -> Generator[None, None, None]:
|
|
135
|
+
"""Restore OTEL trace context and create child span for worker execution.
|
|
136
|
+
|
|
137
|
+
Creates a child span under the original API request span, ensuring
|
|
138
|
+
distributed traces are connected across the API-to-worker boundary.
|
|
139
|
+
|
|
140
|
+
Yields:
|
|
141
|
+
None - execution continues within the restored trace context
|
|
142
|
+
|
|
143
|
+
Note:
|
|
144
|
+
- No-ops if OTEL is disabled or unavailable
|
|
145
|
+
- Never raises - tracing failures won't break run execution
|
|
146
|
+
"""
|
|
147
|
+
if not config.OTEL_ENABLED or not _check_otel_available():
|
|
148
|
+
yield
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
traceparent = configurable.get(OTEL_TRACEPARENT_KEY)
|
|
152
|
+
if not traceparent:
|
|
153
|
+
yield
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
from opentelemetry import trace
|
|
158
|
+
|
|
159
|
+
# Build carrier dict for W3C propagator
|
|
160
|
+
carrier: dict[str, str] = {"traceparent": traceparent}
|
|
161
|
+
if tracestate := configurable.get(OTEL_TRACESTATE_KEY):
|
|
162
|
+
carrier["tracestate"] = tracestate
|
|
163
|
+
|
|
164
|
+
# Extract context from carrier
|
|
165
|
+
ctx = _get_propagator().extract(carrier=carrier)
|
|
166
|
+
|
|
167
|
+
with _get_tracer().start_as_current_span(
|
|
168
|
+
"worker.stream_run",
|
|
169
|
+
context=ctx,
|
|
170
|
+
kind=trace.SpanKind.CONSUMER,
|
|
171
|
+
) as span:
|
|
172
|
+
if run_id:
|
|
173
|
+
span.set_attribute(OTEL_RUN_ID_ATTR_NAME, run_id)
|
|
174
|
+
if thread_id:
|
|
175
|
+
span.set_attribute(OTEL_THREAD_ID_ATTR_NAME, thread_id)
|
|
176
|
+
|
|
177
|
+
yield
|
|
178
|
+
except Exception:
|
|
179
|
+
logger.debug("Failed to restore OTEL trace context", exc_info=True)
|
|
180
|
+
yield
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def inject_otel_headers() -> dict[str, str]:
|
|
184
|
+
"""Inject current trace context into headers for outgoing HTTP requests.
|
|
185
|
+
|
|
186
|
+
Used to propagate trace context to webhooks.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Dict with traceparent/tracestate headers if in active trace, else empty.
|
|
190
|
+
"""
|
|
191
|
+
if not config.OTEL_ENABLED or not _check_otel_available():
|
|
192
|
+
return {}
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
from opentelemetry import trace
|
|
196
|
+
|
|
197
|
+
span = trace.get_current_span()
|
|
198
|
+
if not span.is_recording():
|
|
199
|
+
return {}
|
|
200
|
+
|
|
201
|
+
carrier: dict[str, str] = {}
|
|
202
|
+
_get_propagator().inject(carrier)
|
|
203
|
+
return carrier
|
|
204
|
+
except Exception:
|
|
205
|
+
return {}
|
langgraph_api/patch.py
CHANGED
|
@@ -3,7 +3,7 @@ from typing import Any
|
|
|
3
3
|
from starlette.responses import Response, StreamingResponse
|
|
4
4
|
from starlette.types import Send
|
|
5
5
|
|
|
6
|
-
from langgraph_api.serde import Fragment
|
|
6
|
+
from langgraph_api.serde import Fragment, json_dumpb
|
|
7
7
|
|
|
8
8
|
"""
|
|
9
9
|
Patch Response.render and StreamingResponse.stream_response
|
|
@@ -14,7 +14,7 @@ to recognize bytearrays and memoryviews as bytes-like objects.
|
|
|
14
14
|
def Response_render(self, content: Any) -> bytes:
|
|
15
15
|
if content is None:
|
|
16
16
|
return b""
|
|
17
|
-
if isinstance(content, (bytes, bytearray, memoryview)):
|
|
17
|
+
if isinstance(content, (bytes, bytearray, memoryview)):
|
|
18
18
|
return content
|
|
19
19
|
return content.encode(self.charset) # type: ignore
|
|
20
20
|
|
|
@@ -32,7 +32,9 @@ async def StreamingResponse_stream_response(self, send: Send) -> None:
|
|
|
32
32
|
continue
|
|
33
33
|
if isinstance(chunk, Fragment):
|
|
34
34
|
chunk = chunk.buf
|
|
35
|
-
if
|
|
35
|
+
if isinstance(chunk, dict):
|
|
36
|
+
chunk = json_dumpb(chunk)
|
|
37
|
+
if not isinstance(chunk, (bytes, bytearray, memoryview)):
|
|
36
38
|
chunk = chunk.encode(self.charset)
|
|
37
39
|
await send({"type": "http.response.body", "body": chunk, "more_body": True})
|
|
38
40
|
|
|
@@ -1,46 +1,68 @@
|
|
|
1
|
-
# ruff: noqa: E402
|
|
2
1
|
import os
|
|
3
2
|
|
|
4
3
|
if not (
|
|
5
4
|
(disable_truststore := os.getenv("DISABLE_TRUSTSTORE"))
|
|
6
5
|
and disable_truststore.lower() == "true"
|
|
7
6
|
):
|
|
8
|
-
import truststore
|
|
7
|
+
import truststore
|
|
9
8
|
|
|
10
|
-
truststore.inject_into_ssl()
|
|
9
|
+
truststore.inject_into_ssl()
|
|
11
10
|
|
|
12
11
|
import asyncio
|
|
13
|
-
import
|
|
12
|
+
import functools
|
|
14
13
|
import json
|
|
15
14
|
import logging.config
|
|
16
15
|
import pathlib
|
|
17
16
|
import signal
|
|
18
|
-
|
|
19
|
-
from typing import cast
|
|
17
|
+
import socket
|
|
20
18
|
|
|
21
19
|
import structlog
|
|
22
20
|
|
|
23
|
-
from
|
|
24
|
-
from langgraph_runtime
|
|
21
|
+
from langgraph_api.utils.errors import GraphLoadError, HealthServerStartupError
|
|
22
|
+
from langgraph_runtime import lifespan
|
|
23
|
+
from langgraph_runtime.database import healthcheck, pool_stats
|
|
25
24
|
from langgraph_runtime.metrics import get_metrics
|
|
26
25
|
|
|
27
26
|
logger = structlog.stdlib.get_logger(__name__)
|
|
28
27
|
|
|
29
28
|
|
|
29
|
+
def _ensure_port_available(host: str, port: int) -> None:
|
|
30
|
+
try:
|
|
31
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
|
32
|
+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
33
|
+
sock.bind((host, port))
|
|
34
|
+
except OSError as exc:
|
|
35
|
+
raise HealthServerStartupError(host, port, exc) from exc
|
|
36
|
+
|
|
37
|
+
|
|
30
38
|
async def health_and_metrics_server():
|
|
31
39
|
import uvicorn
|
|
32
40
|
from starlette.applications import Starlette
|
|
41
|
+
from starlette.requests import Request
|
|
33
42
|
from starlette.responses import JSONResponse, PlainTextResponse
|
|
34
|
-
from starlette.routing import Route
|
|
43
|
+
from starlette.routing import Mount, Route
|
|
44
|
+
|
|
45
|
+
from langgraph_api import config as lc_config
|
|
46
|
+
from langgraph_api.api.meta import METRICS_FORMATS
|
|
35
47
|
|
|
36
48
|
port = int(os.getenv("PORT", "8080"))
|
|
49
|
+
host = os.getenv("LANGGRAPH_SERVER_HOST", "0.0.0.0")
|
|
37
50
|
|
|
38
51
|
async def health_endpoint(request):
|
|
52
|
+
# if db or redis is not healthy, this will raise an exception
|
|
53
|
+
await healthcheck()
|
|
39
54
|
return JSONResponse({"status": "ok"})
|
|
40
55
|
|
|
41
|
-
async def metrics_endpoint(request):
|
|
56
|
+
async def metrics_endpoint(request: Request):
|
|
57
|
+
metrics_format = request.query_params.get("format", "prometheus")
|
|
58
|
+
if metrics_format not in METRICS_FORMATS:
|
|
59
|
+
await logger.awarning(
|
|
60
|
+
f"metrics format {metrics_format} not supported, falling back to prometheus"
|
|
61
|
+
)
|
|
62
|
+
metrics_format = "prometheus"
|
|
63
|
+
|
|
42
64
|
metrics = get_metrics()
|
|
43
|
-
worker_metrics =
|
|
65
|
+
worker_metrics = metrics["workers"]
|
|
44
66
|
workers_max = worker_metrics["max"]
|
|
45
67
|
workers_active = worker_metrics["active"]
|
|
46
68
|
workers_available = worker_metrics["available"]
|
|
@@ -48,86 +70,132 @@ async def health_and_metrics_server():
|
|
|
48
70
|
project_id = os.getenv("LANGSMITH_HOST_PROJECT_ID")
|
|
49
71
|
revision_id = os.getenv("LANGSMITH_HOST_REVISION_ID")
|
|
50
72
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
"# HELP lg_api_workers_active The number of currently active workers.",
|
|
56
|
-
"# TYPE lg_api_workers_active gauge",
|
|
57
|
-
f'lg_api_workers_active{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_active}',
|
|
58
|
-
"# HELP lg_api_workers_available The number of available (idle) workers.",
|
|
59
|
-
"# TYPE lg_api_workers_available gauge",
|
|
60
|
-
f'lg_api_workers_available{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_available}',
|
|
61
|
-
]
|
|
62
|
-
|
|
63
|
-
metrics_lines.extend(
|
|
64
|
-
pool_stats(
|
|
65
|
-
project_id=project_id,
|
|
66
|
-
revision_id=revision_id,
|
|
67
|
-
)
|
|
73
|
+
pg_redis_stats = pool_stats(
|
|
74
|
+
project_id=project_id,
|
|
75
|
+
revision_id=revision_id,
|
|
76
|
+
format=metrics_format,
|
|
68
77
|
)
|
|
69
78
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
79
|
+
if metrics_format == "json":
|
|
80
|
+
resp = {
|
|
81
|
+
**pg_redis_stats,
|
|
82
|
+
"workers": worker_metrics,
|
|
83
|
+
}
|
|
84
|
+
return JSONResponse(resp)
|
|
85
|
+
elif metrics_format == "prometheus":
|
|
86
|
+
metrics_lines = [
|
|
87
|
+
"# HELP lg_api_workers_max The maximum number of workers available.",
|
|
88
|
+
"# TYPE lg_api_workers_max gauge",
|
|
89
|
+
f'lg_api_workers_max{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_max}',
|
|
90
|
+
"# HELP lg_api_workers_active The number of currently active workers.",
|
|
91
|
+
"# TYPE lg_api_workers_active gauge",
|
|
92
|
+
f'lg_api_workers_active{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_active}',
|
|
93
|
+
"# HELP lg_api_workers_available The number of available (idle) workers.",
|
|
94
|
+
"# TYPE lg_api_workers_available gauge",
|
|
95
|
+
f'lg_api_workers_available{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_available}',
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
metrics_lines.extend(pg_redis_stats)
|
|
99
|
+
|
|
100
|
+
return PlainTextResponse(
|
|
101
|
+
"\n".join(metrics_lines),
|
|
102
|
+
media_type="text/plain; version=0.0.4; charset=utf-8",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
routes = [
|
|
106
|
+
Route("/ok", health_endpoint),
|
|
107
|
+
Route("/metrics", metrics_endpoint),
|
|
108
|
+
]
|
|
109
|
+
app = Starlette(routes=routes)
|
|
110
|
+
if lc_config.MOUNT_PREFIX:
|
|
111
|
+
app = Starlette(
|
|
112
|
+
routes=[*routes, Mount(lc_config.MOUNT_PREFIX, app=app)],
|
|
113
|
+
lifespan=app.router.lifespan_context,
|
|
114
|
+
exception_handlers=app.exception_handlers,
|
|
73
115
|
)
|
|
74
116
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
117
|
+
try:
|
|
118
|
+
_ensure_port_available(host, port)
|
|
119
|
+
except HealthServerStartupError as exc:
|
|
120
|
+
await logger.aerror(
|
|
121
|
+
str(exc),
|
|
122
|
+
host=exc.host,
|
|
123
|
+
port=exc.port,
|
|
124
|
+
cause=str(exc.cause),
|
|
125
|
+
)
|
|
126
|
+
raise
|
|
81
127
|
|
|
82
128
|
config = uvicorn.Config(
|
|
83
129
|
app,
|
|
84
|
-
host=
|
|
130
|
+
host=host,
|
|
85
131
|
port=port,
|
|
86
132
|
log_level="error",
|
|
87
133
|
access_log=False,
|
|
88
134
|
)
|
|
135
|
+
# Server will run indefinitely until the process is terminated
|
|
89
136
|
server = uvicorn.Server(config)
|
|
90
137
|
|
|
91
|
-
logger.info(f"Health and metrics server started at http://
|
|
92
|
-
|
|
138
|
+
logger.info(f"Health and metrics server started at http://{host}:{port}")
|
|
139
|
+
try:
|
|
140
|
+
await server.serve()
|
|
141
|
+
except SystemExit as exc:
|
|
142
|
+
if exc.code == 0:
|
|
143
|
+
return
|
|
144
|
+
try:
|
|
145
|
+
_ensure_port_available(host, port)
|
|
146
|
+
except HealthServerStartupError as port_exc:
|
|
147
|
+
await logger.aerror(
|
|
148
|
+
str(port_exc),
|
|
149
|
+
host=port_exc.host,
|
|
150
|
+
port=port_exc.port,
|
|
151
|
+
cause=str(port_exc.cause),
|
|
152
|
+
)
|
|
153
|
+
raise port_exc from None
|
|
154
|
+
error = HealthServerStartupError(host, port, exc)
|
|
155
|
+
await logger.aerror(
|
|
156
|
+
str(error), host=error.host, port=error.port, cause=str(error.cause)
|
|
157
|
+
)
|
|
158
|
+
raise error from None
|
|
159
|
+
except OSError as exc:
|
|
160
|
+
error = HealthServerStartupError(host, port, exc)
|
|
161
|
+
await logger.aerror(
|
|
162
|
+
str(error), host=error.host, port=error.port, cause=str(error.cause)
|
|
163
|
+
)
|
|
164
|
+
raise error from exc
|
|
93
165
|
|
|
94
166
|
|
|
95
167
|
async def entrypoint(
|
|
96
|
-
grpc_port: int | None = None,
|
|
168
|
+
grpc_port: int | None = None,
|
|
169
|
+
entrypoint_name: str = "python-queue",
|
|
170
|
+
cancel_event: asyncio.Event | None = None,
|
|
97
171
|
):
|
|
98
172
|
from langgraph_api import logging as lg_logging
|
|
173
|
+
from langgraph_api import timing
|
|
99
174
|
from langgraph_api.api import user_router
|
|
175
|
+
from langgraph_api.server import app
|
|
100
176
|
|
|
101
177
|
lg_logging.set_logging_context({"entrypoint": entrypoint_name})
|
|
102
178
|
tasks: set[asyncio.Task] = set()
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
async def combined_lifespan(
|
|
109
|
-
app, with_cron_scheduler=False, grpc_port=None, taskset=None
|
|
110
|
-
):
|
|
111
|
-
async with lifespan(
|
|
112
|
-
app,
|
|
113
|
-
with_cron_scheduler=with_cron_scheduler,
|
|
179
|
+
user_lifespan = None if user_router is None else user_router.router.lifespan_context
|
|
180
|
+
wrapped_lifespan = timing.combine_lifespans(
|
|
181
|
+
functools.partial(
|
|
182
|
+
lifespan.lifespan,
|
|
183
|
+
with_cron_scheduler=False,
|
|
114
184
|
grpc_port=grpc_port,
|
|
115
|
-
taskset=
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
async with combined_lifespan(
|
|
124
|
-
None, with_cron_scheduler=False, grpc_port=grpc_port, taskset=tasks
|
|
125
|
-
):
|
|
185
|
+
taskset=tasks,
|
|
186
|
+
cancel_event=cancel_event,
|
|
187
|
+
),
|
|
188
|
+
user_lifespan,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
async with wrapped_lifespan(app):
|
|
192
|
+
tasks.add(asyncio.create_task(health_and_metrics_server()))
|
|
126
193
|
await asyncio.gather(*tasks)
|
|
127
194
|
|
|
128
195
|
|
|
129
196
|
async def main(grpc_port: int | None = None, entrypoint_name: str = "python-queue"):
|
|
130
197
|
"""Run the queue entrypoint and shut down gracefully on SIGTERM/SIGINT."""
|
|
198
|
+
|
|
131
199
|
loop = asyncio.get_running_loop()
|
|
132
200
|
stop_event = asyncio.Event()
|
|
133
201
|
|
|
@@ -141,14 +209,35 @@ async def main(grpc_port: int | None = None, entrypoint_name: str = "python-queu
|
|
|
141
209
|
signal.signal(signal.SIGTERM, lambda *_: _handle_signal())
|
|
142
210
|
|
|
143
211
|
entry_task = asyncio.create_task(
|
|
144
|
-
entrypoint(
|
|
212
|
+
entrypoint(
|
|
213
|
+
grpc_port=grpc_port,
|
|
214
|
+
entrypoint_name=entrypoint_name,
|
|
215
|
+
cancel_event=stop_event,
|
|
216
|
+
)
|
|
145
217
|
)
|
|
218
|
+
# Handle the case where the entrypoint errors out
|
|
219
|
+
entry_task.add_done_callback(lambda _: stop_event.set())
|
|
146
220
|
await stop_event.wait()
|
|
147
221
|
|
|
148
222
|
logger.warning("Cancelling queue entrypoint task")
|
|
149
223
|
entry_task.cancel()
|
|
150
|
-
|
|
224
|
+
try:
|
|
151
225
|
await entry_task
|
|
226
|
+
except asyncio.CancelledError:
|
|
227
|
+
pass
|
|
228
|
+
except (GraphLoadError, HealthServerStartupError) as exc:
|
|
229
|
+
raise SystemExit(1) from exc
|
|
230
|
+
except RuntimeError as exc:
|
|
231
|
+
if str(exc) == "generator didn't yield":
|
|
232
|
+
last_error = lifespan.get_last_error()
|
|
233
|
+
if last_error is not None:
|
|
234
|
+
logger.exception(
|
|
235
|
+
"Application startup failed",
|
|
236
|
+
error_type=type(last_error).__name__,
|
|
237
|
+
error_message=str(last_error),
|
|
238
|
+
)
|
|
239
|
+
raise SystemExit(1) from None
|
|
240
|
+
raise
|
|
152
241
|
|
|
153
242
|
|
|
154
243
|
if __name__ == "__main__":
|
langgraph_api/route.py
CHANGED
|
@@ -4,6 +4,7 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
import jsonschema_rs
|
|
6
6
|
import orjson
|
|
7
|
+
import structlog
|
|
7
8
|
from starlette._exception_handler import wrap_app_handling_exceptions
|
|
8
9
|
from starlette._utils import is_async_callable
|
|
9
10
|
from starlette.concurrency import run_in_threadpool
|
|
@@ -14,9 +15,11 @@ from starlette.responses import JSONResponse
|
|
|
14
15
|
from starlette.routing import Route, compile_path, get_name
|
|
15
16
|
from starlette.types import ASGIApp, Receive, Scope, Send
|
|
16
17
|
|
|
18
|
+
from langgraph_api import config
|
|
17
19
|
from langgraph_api.serde import json_dumpb
|
|
18
20
|
from langgraph_api.utils import get_auth_ctx, with_user
|
|
19
21
|
|
|
22
|
+
logger = structlog.getLogger(__name__)
|
|
20
23
|
SchemaType = (
|
|
21
24
|
jsonschema_rs.Draft4Validator
|
|
22
25
|
| jsonschema_rs.Draft6Validator
|
|
@@ -43,7 +46,7 @@ def api_request_response(
|
|
|
43
46
|
response: ASGIApp = await func(request)
|
|
44
47
|
else:
|
|
45
48
|
response = await run_in_threadpool(
|
|
46
|
-
typing.cast(typing.Callable[[Request], ASGIApp], func), request
|
|
49
|
+
typing.cast("typing.Callable[[Request], ASGIApp]", func), request
|
|
47
50
|
)
|
|
48
51
|
await response(scope, receive, send)
|
|
49
52
|
|
|
@@ -58,10 +61,11 @@ class ApiResponse(JSONResponse):
|
|
|
58
61
|
|
|
59
62
|
|
|
60
63
|
def _json_loads(content: bytearray, schema: SchemaType) -> typing.Any:
|
|
61
|
-
|
|
64
|
+
"""Parse JSON and validate schema. Used by threadpool for large payloads."""
|
|
65
|
+
json_data = orjson.loads(content)
|
|
62
66
|
if schema is not None:
|
|
63
|
-
schema.validate(
|
|
64
|
-
return
|
|
67
|
+
schema.validate(json_data)
|
|
68
|
+
return json_data
|
|
65
69
|
|
|
66
70
|
|
|
67
71
|
class ApiRequest(Request):
|
|
@@ -76,8 +80,16 @@ class ApiRequest(Request):
|
|
|
76
80
|
async def json(self, schema: SchemaType = None) -> typing.Any:
|
|
77
81
|
if not hasattr(self, "_json"):
|
|
78
82
|
body = await self.body()
|
|
83
|
+
|
|
84
|
+
# Hybrid approach for optimal performance:
|
|
85
|
+
# - Small payloads: parse directly (fast, no queueing/thread pool limitations)
|
|
86
|
+
# - Large payloads: use dedicated thread pool (safer, doesn't block event loop)
|
|
79
87
|
try:
|
|
80
|
-
self._json =
|
|
88
|
+
self._json = (
|
|
89
|
+
await run_in_threadpool(_json_loads, body, schema)
|
|
90
|
+
if len(body) > config.JSON_THREAD_POOL_MINIMUM_SIZE_BYTES
|
|
91
|
+
else _json_loads(body, schema)
|
|
92
|
+
)
|
|
81
93
|
except orjson.JSONDecodeError as e:
|
|
82
94
|
raise HTTPException(
|
|
83
95
|
status_code=422, detail="Invalid JSON in request body"
|
|
@@ -135,6 +147,8 @@ class ApiRoute(Route):
|
|
|
135
147
|
|
|
136
148
|
scope["route"] = self.path
|
|
137
149
|
set_logging_context({"path": self.path, "method": scope.get("method")})
|
|
150
|
+
route_pattern = f"{scope.get('root_path', '')}{self.path}"
|
|
151
|
+
_name_otel_span(scope, route_pattern)
|
|
138
152
|
ctx = get_auth_ctx()
|
|
139
153
|
if ctx:
|
|
140
154
|
user, auth = ctx.user, ctx.permissions
|
|
@@ -142,3 +156,31 @@ class ApiRoute(Route):
|
|
|
142
156
|
user, auth = scope.get("user"), scope.get("auth")
|
|
143
157
|
async with with_user(user, auth):
|
|
144
158
|
return await super().handle(scope, receive, send)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _name_otel_span(scope: Scope, route_pattern: str):
|
|
162
|
+
"""Best-effort rename of the active OTEL server span to include the route.
|
|
163
|
+
|
|
164
|
+
- No-ops if OTEL is disabled or OTEL libs are unavailable.
|
|
165
|
+
- Sets span name to "METHOD /templated/path" and attaches http.route.
|
|
166
|
+
- Never raises; safe for hot path usage.
|
|
167
|
+
"""
|
|
168
|
+
if not config.OTEL_ENABLED:
|
|
169
|
+
return
|
|
170
|
+
try:
|
|
171
|
+
from opentelemetry.trace import get_current_span
|
|
172
|
+
|
|
173
|
+
span = get_current_span()
|
|
174
|
+
if span.is_recording():
|
|
175
|
+
method = scope.get("method", "") or ""
|
|
176
|
+
try:
|
|
177
|
+
span.update_name(f"{method} {route_pattern}")
|
|
178
|
+
except Exception:
|
|
179
|
+
logger.error("Failed to update OTEL span name", exc_info=True)
|
|
180
|
+
pass
|
|
181
|
+
try:
|
|
182
|
+
span.set_attribute("http.route", route_pattern)
|
|
183
|
+
except Exception:
|
|
184
|
+
logger.error("Failed to update OTEL span attributes", exc_info=True)
|
|
185
|
+
except Exception:
|
|
186
|
+
logger.error("Failed to update OTEL span", exc_info=True)
|