langgraph-api 0.11.0.dev9__py3-none-any.whl → 0.12.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langgraph_api/__init__.py +1 -1
- langgraph_api/api/meta.py +18 -129
- langgraph_api/config/__init__.py +8 -4
- langgraph_api/metrics_collector.py +184 -0
- langgraph_api/metrics_otlp.py +326 -85
- langgraph_api/middleware/http_logger.py +24 -0
- langgraph_api/queue_entrypoint.py +13 -36
- langgraph_api/self_hosted_metrics.py +1 -1
- {langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/METADATA +3 -3
- {langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/RECORD +13 -12
- {langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/WHEEL +0 -0
- {langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/entry_points.txt +0 -0
- {langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/licenses/LICENSE +0 -0
langgraph_api/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.12.0.dev1"
|
langgraph_api/api/meta.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import langgraph.version
|
|
2
2
|
import structlog
|
|
3
|
+
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
|
3
4
|
from starlette.responses import JSONResponse, PlainTextResponse
|
|
4
5
|
|
|
5
6
|
from langgraph_api import __version__, config, metadata
|
|
@@ -46,54 +47,6 @@ def _merge_pool_stats(local: PoolStats, remote: PoolStats) -> PoolStats:
|
|
|
46
47
|
return merged
|
|
47
48
|
|
|
48
49
|
|
|
49
|
-
def _pool_stats_to_prometheus_lines(
|
|
50
|
-
stats: PoolStats,
|
|
51
|
-
project_id: str | None,
|
|
52
|
-
revision_id: str | None,
|
|
53
|
-
deployment_type: str = "",
|
|
54
|
-
) -> list[str]:
|
|
55
|
-
"""Format merged pool stats as Prometheus text lines (same format as langgraph_runtime.database.pool_stats)."""
|
|
56
|
-
labels = f'project_id="{project_id}", revision_id="{revision_id}", deployment_type="{deployment_type}"'
|
|
57
|
-
lines = []
|
|
58
|
-
if "postgres" in stats:
|
|
59
|
-
pg = stats["postgres"]
|
|
60
|
-
lines.extend(
|
|
61
|
-
[
|
|
62
|
-
"# HELP lg_api_pg_pool_max The maximum size of the postgres connection pool.",
|
|
63
|
-
"# TYPE lg_api_pg_pool_max gauge",
|
|
64
|
-
f"lg_api_pg_pool_max{{{labels}}} {pg.get('pool_max', 0)}",
|
|
65
|
-
"# HELP lg_api_pg_pool_size Number of connections currently managed by the postgres connection pool (in the pool, given to clients, being prepared)",
|
|
66
|
-
"# TYPE lg_api_pg_pool_size gauge",
|
|
67
|
-
f"lg_api_pg_pool_size{{{labels}}} {pg.get('pool_size', 0)}",
|
|
68
|
-
"# HELP lg_api_pg_pool_available Number of connections currently idle in the postgres connection pool",
|
|
69
|
-
"# TYPE lg_api_pg_pool_available gauge",
|
|
70
|
-
f"lg_api_pg_pool_available{{{labels}}} {pg.get('pool_available', 0)}",
|
|
71
|
-
"# HELP lg_api_pg_pool_requests_queued Number of postgres connection requests queued because a postgres connection wasn't immediately available in the pool",
|
|
72
|
-
"# TYPE lg_api_pg_pool_requests_queued counter",
|
|
73
|
-
f"lg_api_pg_pool_requests_queued{{{labels}}} {pg.get('requests_queued', 0)}",
|
|
74
|
-
"# HELP lg_api_pg_pool_requests_errors Number of postgres connection requests resulting in an error (timeouts, queue full...)",
|
|
75
|
-
"# TYPE lg_api_pg_pool_requests_errors counter",
|
|
76
|
-
f"lg_api_pg_pool_requests_errors{{{labels}}} {pg.get('requests_errors', 0)}",
|
|
77
|
-
]
|
|
78
|
-
)
|
|
79
|
-
if "redis" in stats:
|
|
80
|
-
rd = stats["redis"]
|
|
81
|
-
lines.extend(
|
|
82
|
-
[
|
|
83
|
-
"# HELP lg_api_redis_pool_available Number of connections currently idle in the redis connection pool",
|
|
84
|
-
"# TYPE lg_api_redis_pool_available gauge",
|
|
85
|
-
f"lg_api_redis_pool_available{{{labels}}} {rd.get('idle_connections', 0)}",
|
|
86
|
-
"# HELP lg_api_redis_pool_size Number of connections currently in use in the redis connection pool",
|
|
87
|
-
"# TYPE lg_api_redis_pool_size gauge",
|
|
88
|
-
f"lg_api_redis_pool_size{{{labels}}} {rd.get('in_use_connections', 0)}",
|
|
89
|
-
"# HELP lg_api_redis_pool_max The maximum size of the redis connection pool.",
|
|
90
|
-
"# TYPE lg_api_redis_pool_max gauge",
|
|
91
|
-
f"lg_api_redis_pool_max{{{labels}}} {rd.get('max_connections', 0)}",
|
|
92
|
-
]
|
|
93
|
-
)
|
|
94
|
-
return lines
|
|
95
|
-
|
|
96
|
-
|
|
97
50
|
async def _grpc_pool_stats() -> PoolStats:
|
|
98
51
|
"""Fetch connection pool stats from the Core API (Go) via gRPC for metrics aggregation. Returns {} on error."""
|
|
99
52
|
if not IS_POSTGRES_OR_GRPC_BACKEND:
|
|
@@ -107,21 +60,12 @@ async def _grpc_pool_stats() -> PoolStats:
|
|
|
107
60
|
return {}
|
|
108
61
|
|
|
109
62
|
|
|
110
|
-
async def meta_pool_stats(
|
|
63
|
+
async def meta_pool_stats() -> PoolStats:
|
|
111
64
|
local_pool_stats: PoolStats = pool_stats()
|
|
112
65
|
|
|
113
66
|
# Aggregate with Core API (Go) pool stats when using gRPC backend
|
|
114
67
|
grpc_pool_stats = await _grpc_pool_stats()
|
|
115
|
-
|
|
116
|
-
if metrics_format == "prometheus":
|
|
117
|
-
return _pool_stats_to_prometheus_lines(
|
|
118
|
-
merged_pool_stats,
|
|
119
|
-
metadata.PROJECT_ID,
|
|
120
|
-
metadata.HOST_REVISION_ID,
|
|
121
|
-
metadata.DEPLOYMENT_TYPE,
|
|
122
|
-
)
|
|
123
|
-
else:
|
|
124
|
-
return merged_pool_stats
|
|
68
|
+
return _merge_pool_stats(local_pool_stats, grpc_pool_stats)
|
|
125
69
|
|
|
126
70
|
|
|
127
71
|
async def meta_info(request: ApiRequest):
|
|
@@ -153,81 +97,26 @@ async def meta_metrics(request: ApiRequest):
|
|
|
153
97
|
if metrics_format not in METRICS_FORMATS:
|
|
154
98
|
metrics_format = "prometheus"
|
|
155
99
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
workers_active = worker_metrics["active"]
|
|
161
|
-
workers_available = worker_metrics["available"]
|
|
100
|
+
if metrics_format == "prometheus":
|
|
101
|
+
# Served straight from the OTLP Prometheus client's registry (see
|
|
102
|
+
# metrics_otlp._LSDPrometheusReader).
|
|
103
|
+
return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)
|
|
162
104
|
|
|
105
|
+
# JSON: hand-built snapshot of workers, queue depth, HTTP, and pool stats.
|
|
106
|
+
worker_metrics = get_metrics()["workers"]
|
|
163
107
|
http_metrics = HTTP_METRICS_COLLECTOR.get_metrics(
|
|
164
108
|
metadata.PROJECT_ID,
|
|
165
109
|
metadata.HOST_REVISION_ID,
|
|
166
110
|
metrics_format,
|
|
167
111
|
metadata.DEPLOYMENT_TYPE,
|
|
168
112
|
)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
"queue": await Runs.stats(conn),
|
|
177
|
-
**http_metrics,
|
|
178
|
-
}
|
|
179
|
-
if config.N_JOBS_PER_WORKER > 0:
|
|
180
|
-
resp["workers"] = worker_metrics
|
|
181
|
-
return JSONResponse(resp)
|
|
182
|
-
elif metrics_format == "prometheus":
|
|
183
|
-
metrics = []
|
|
184
|
-
try:
|
|
185
|
-
async with connect() as conn:
|
|
186
|
-
queue_stats = await Runs.stats(conn)
|
|
187
|
-
|
|
188
|
-
labels = f'project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}", deployment_type="{metadata.DEPLOYMENT_TYPE}"'
|
|
189
|
-
metrics.extend(
|
|
190
|
-
[
|
|
191
|
-
"# HELP lg_api_num_pending_runs The number of runs currently pending.",
|
|
192
|
-
"# TYPE lg_api_num_pending_runs gauge",
|
|
193
|
-
f"lg_api_num_pending_runs{{{labels}}} {queue_stats['n_pending']}",
|
|
194
|
-
"# HELP lg_api_num_running_runs The number of runs currently running.",
|
|
195
|
-
"# TYPE lg_api_num_running_runs gauge",
|
|
196
|
-
f"lg_api_num_running_runs{{{labels}}} {queue_stats['n_running']}",
|
|
197
|
-
"# HELP lg_api_pending_runs_wait_time_max The maximum time a run has been pending, in seconds.",
|
|
198
|
-
"# TYPE lg_api_pending_runs_wait_time_max gauge",
|
|
199
|
-
f"lg_api_pending_runs_wait_time_max{{{labels}}} {queue_stats.get('pending_runs_wait_time_max_secs') or 0}",
|
|
200
|
-
"# HELP lg_api_pending_runs_wait_time_med The median pending wait time across runs, in seconds.",
|
|
201
|
-
"# TYPE lg_api_pending_runs_wait_time_med gauge",
|
|
202
|
-
f"lg_api_pending_runs_wait_time_med{{{labels}}} {queue_stats.get('pending_runs_wait_time_med_secs') or 0}",
|
|
203
|
-
"# HELP lg_api_pending_unblocked_runs_wait_time_max The maximum time a run has been pending excluding runs blocked by another run on the same thread, in seconds.",
|
|
204
|
-
"# TYPE lg_api_pending_unblocked_runs_wait_time_max gauge",
|
|
205
|
-
f"lg_api_pending_unblocked_runs_wait_time_max{{{labels}}} {queue_stats.get('pending_unblocked_runs_wait_time_max_secs') or 0}",
|
|
206
|
-
]
|
|
207
|
-
)
|
|
208
|
-
except Exception as e:
|
|
209
|
-
await logger.awarning(
|
|
210
|
-
"Ignoring error while getting run stats for /metrics", exc_info=e
|
|
211
|
-
)
|
|
212
|
-
|
|
113
|
+
merged_pool_stats = await meta_pool_stats()
|
|
114
|
+
async with connect() as conn:
|
|
115
|
+
resp = {
|
|
116
|
+
**merged_pool_stats,
|
|
117
|
+
"queue": await Runs.stats(conn),
|
|
118
|
+
**http_metrics,
|
|
119
|
+
}
|
|
213
120
|
if config.N_JOBS_PER_WORKER > 0:
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
[
|
|
217
|
-
"# HELP lg_api_workers_max The maximum number of workers available.",
|
|
218
|
-
"# TYPE lg_api_workers_max gauge",
|
|
219
|
-
f"lg_api_workers_max{{{worker_labels}}} {workers_max}",
|
|
220
|
-
"# HELP lg_api_workers_active The number of currently active workers.",
|
|
221
|
-
"# TYPE lg_api_workers_active gauge",
|
|
222
|
-
f"lg_api_workers_active{{{worker_labels}}} {workers_active}",
|
|
223
|
-
"# HELP lg_api_workers_available The number of available (idle) workers.",
|
|
224
|
-
"# TYPE lg_api_workers_available gauge",
|
|
225
|
-
f"lg_api_workers_available{{{worker_labels}}} {workers_available}",
|
|
226
|
-
]
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
metrics.extend(http_metrics)
|
|
230
|
-
metrics.extend(merged_pool_stats)
|
|
231
|
-
|
|
232
|
-
metrics_response = "\n".join(metrics)
|
|
233
|
-
return PlainTextResponse(metrics_response)
|
|
121
|
+
resp["workers"] = worker_metrics
|
|
122
|
+
return JSONResponse(resp)
|
langgraph_api/config/__init__.py
CHANGED
|
@@ -577,8 +577,13 @@ METRIC_MAX_EMITTING_TIER = env(
|
|
|
577
577
|
"METRIC_MAX_EMITTING_TIER", cast=int, default=_METRIC_MAX_EMITTING_TIER_DEFAULT
|
|
578
578
|
)
|
|
579
579
|
DATADOG_METRICS_ENABLED = bool(LSD_DD_API_KEY)
|
|
580
|
-
|
|
581
|
-
|
|
580
|
+
# When true, the Prometheus scrape (/metrics) exposes ALL metrics, not just the
|
|
581
|
+
# lsd_web_metric (Deployment-UI) set. Record-time tier filtering
|
|
582
|
+
# (METRIC_MAX_EMITTING_TIER) still applies, so internal metrics must be within the
|
|
583
|
+
# max emitting tier to be recorded at all.
|
|
584
|
+
EXPOSE_INTERNAL_METRICS_PROMETHEUS = env(
|
|
585
|
+
"EXPOSE_INTERNAL_METRICS_PROMETHEUS", cast=bool, default=False
|
|
586
|
+
)
|
|
582
587
|
LANGGRAPH_LOGS_ENDPOINT = env("LANGGRAPH_LOGS_ENDPOINT", cast=str, default=None)
|
|
583
588
|
LANGGRAPH_LOGS_ENABLED = env("LANGGRAPH_LOGS_ENABLED", cast=bool, default=False)
|
|
584
589
|
|
|
@@ -635,6 +640,7 @@ __all__ = [
|
|
|
635
640
|
"CRON_SCHEDULER_SLEEP_TIME",
|
|
636
641
|
"DATABASE_URI",
|
|
637
642
|
"DATADOG_METRICS_ENABLED",
|
|
643
|
+
"EXPOSE_INTERNAL_METRICS_PROMETHEUS",
|
|
638
644
|
"FF_CRONS_ENABLED",
|
|
639
645
|
"FF_LOG_DROPPED_EVENTS",
|
|
640
646
|
"FF_LOG_QUERY_AND_PARAMS",
|
|
@@ -672,8 +678,6 @@ __all__ = [
|
|
|
672
678
|
"LSD_GRPC_SERVER_ADDRESS",
|
|
673
679
|
"LSD_GRPC_SERVER_MAX_RECV_MSG_BYTES",
|
|
674
680
|
"LSD_GRPC_SERVER_MAX_SEND_MSG_BYTES",
|
|
675
|
-
"LSD_PROM_METRICS_ENABLED",
|
|
676
|
-
"LSD_PROM_METRICS_PORT",
|
|
677
681
|
"MAX_STREAM_CHUNK_SIZE_BYTES",
|
|
678
682
|
"METRIC_MAX_EMITTING_TIER",
|
|
679
683
|
"METRIC_PREFIX",
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Periodic collector that pushes snapshot/state metrics to the OTLP client.
|
|
2
|
+
|
|
3
|
+
This background task samples the same sources every ``STATS_INTERVAL_SECS`` and records them via the reporter.
|
|
4
|
+
|
|
5
|
+
The loop runs in **every** process (on postgres both the API server and the
|
|
6
|
+
dedicated queue worker share the same lifespan; inmem is a single process). Each
|
|
7
|
+
metric group self-gates so it lands on the right process:
|
|
8
|
+
|
|
9
|
+
- **worker gauges** — recorded wherever workers run (``N_JOBS_PER_WORKER > 0``):
|
|
10
|
+
the queue worker, or a combined single-process deployment. A distributed API
|
|
11
|
+
process (``N_JOBS_PER_WORKER == 0``) has no workers and skips them.
|
|
12
|
+
- **queue depth** (``num_pending_runs``/``num_running_runs``) — a single global
|
|
13
|
+
value from ``Runs.stats`` (a gRPC call to the Go core). Emitted by the **API
|
|
14
|
+
process only** (``not IS_QUEUE_ENTRYPOINT``) on the **postgres** runtime; inmem
|
|
15
|
+
skips the DB round-trip entirely.
|
|
16
|
+
- **Postgres + Redis pool stats** — recorded on **both** processes (postgres
|
|
17
|
+
runtime only), each reporting its own pools via ``meta_pool_stats()``, which
|
|
18
|
+
merges the local Python pools with the Go-core pools. Redis stats in particular
|
|
19
|
+
come from the local Python pool — the Go core omits them unless it has a
|
|
20
|
+
non-cluster redis client — so a Go-core-only source would drop them. inmem has
|
|
21
|
+
no real Postgres/Redis pools, so nothing is reported.
|
|
22
|
+
|
|
23
|
+
The two pool request counters are cumulative, so we push the delta since the
|
|
24
|
+
previous sample (OTLP counters are additive).
|
|
25
|
+
|
|
26
|
+
The loop also logs the same samples (``Worker stats``, ``Postgres pool stats``,
|
|
27
|
+
``Redis pool stats``) — folding in what the legacy per-process ``stats_loop``
|
|
28
|
+
functions used to log.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
import asyncio
|
|
34
|
+
|
|
35
|
+
import structlog
|
|
36
|
+
|
|
37
|
+
from langgraph_api import config
|
|
38
|
+
from langgraph_api.api.meta import meta_pool_stats
|
|
39
|
+
from langgraph_api.feature_flags import IS_POSTGRES_OR_GRPC_BACKEND
|
|
40
|
+
from langgraph_api.metrics_otlp import (
|
|
41
|
+
COUNTER_PG_POOL_REQUESTS_ERRORS,
|
|
42
|
+
COUNTER_PG_POOL_REQUESTS_QUEUED,
|
|
43
|
+
GAUGE_NUM_PENDING_RUNS,
|
|
44
|
+
GAUGE_NUM_RUNNING_RUNS,
|
|
45
|
+
GAUGE_PG_POOL_AVAILABLE,
|
|
46
|
+
GAUGE_PG_POOL_MAX,
|
|
47
|
+
GAUGE_PG_POOL_SIZE,
|
|
48
|
+
GAUGE_REDIS_POOL_AVAILABLE,
|
|
49
|
+
GAUGE_REDIS_POOL_MAX,
|
|
50
|
+
GAUGE_REDIS_POOL_SIZE,
|
|
51
|
+
GAUGE_WORKERS_ACTIVE,
|
|
52
|
+
GAUGE_WORKERS_AVAILABLE,
|
|
53
|
+
GAUGE_WORKERS_MAX,
|
|
54
|
+
get_otlp_metrics_reporter,
|
|
55
|
+
)
|
|
56
|
+
from langgraph_runtime.database import connect
|
|
57
|
+
from langgraph_runtime.metrics import get_metrics
|
|
58
|
+
|
|
59
|
+
if IS_POSTGRES_OR_GRPC_BACKEND:
|
|
60
|
+
from langgraph_api.grpc.ops import Runs
|
|
61
|
+
else:
|
|
62
|
+
from langgraph_runtime.ops import Runs
|
|
63
|
+
|
|
64
|
+
logger = structlog.stdlib.get_logger(__name__)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def _collect_queue_and_workers(reporter) -> None:
|
|
68
|
+
"""Worker gauges (where workers run) + queue depth (API process only).
|
|
69
|
+
|
|
70
|
+
Worker counts are local to the process running this loop and emitted wherever
|
|
71
|
+
workers run (``N_JOBS_PER_WORKER > 0``) — the queue worker or a combined
|
|
72
|
+
single-process deployment; a distributed API process (N_JOBS == 0) skips them.
|
|
73
|
+
|
|
74
|
+
Queue depth is a single global value (from the run table, via ``Runs.stats`` —
|
|
75
|
+
a gRPC call to the Go core) and is emitted by the **API process only**
|
|
76
|
+
(``not IS_QUEUE_ENTRYPOINT``) on **postgres**: inmem skips the DB round-trip,
|
|
77
|
+
and the dedicated queue worker leaves it to the API process so the global value
|
|
78
|
+
is not double-reported across the queue/API split.
|
|
79
|
+
"""
|
|
80
|
+
if config.N_JOBS_PER_WORKER > 0:
|
|
81
|
+
workers = get_metrics()["workers"]
|
|
82
|
+
reporter.record_gauge(GAUGE_WORKERS_MAX, workers["max"])
|
|
83
|
+
reporter.record_gauge(GAUGE_WORKERS_ACTIVE, workers["active"])
|
|
84
|
+
reporter.record_gauge(GAUGE_WORKERS_AVAILABLE, workers["available"])
|
|
85
|
+
await logger.ainfo(
|
|
86
|
+
"Worker stats",
|
|
87
|
+
max=workers["max"],
|
|
88
|
+
active=workers["active"],
|
|
89
|
+
available=workers["available"],
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Queue depth is read from the run table via Runs.stats (a gRPC call to the
|
|
93
|
+
# Go core on postgres). Emitted by the API process only — the queue worker
|
|
94
|
+
# (IS_QUEUE_ENTRYPOINT) skips it. inmem skips the DB round-trip and reports
|
|
95
|
+
# nothing.
|
|
96
|
+
if IS_POSTGRES_OR_GRPC_BACKEND and not config.IS_QUEUE_ENTRYPOINT:
|
|
97
|
+
async with connect() as conn:
|
|
98
|
+
stats = await Runs.stats(conn)
|
|
99
|
+
reporter.record_gauge(GAUGE_NUM_PENDING_RUNS, stats["n_pending"])
|
|
100
|
+
reporter.record_gauge(GAUGE_NUM_RUNNING_RUNS, stats["n_running"])
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
async def _collect_pool(reporter, prev_counters: dict[str, int]) -> None:
|
|
104
|
+
"""Postgres + Redis pool gauges + cumulative request counters.
|
|
105
|
+
|
|
106
|
+
Postgres runtime only, recorded on **both** processes (API server and queue
|
|
107
|
+
worker) — each reports its own pools. Uses ``meta_pool_stats()``, which merges
|
|
108
|
+
the local Python pools with the Go-core pools — matching the legacy /metrics.
|
|
109
|
+
Redis stats come from the local Python pool (the Go core omits them unless it
|
|
110
|
+
has a non-cluster redis client), so a Go-core-only source would drop them.
|
|
111
|
+
"""
|
|
112
|
+
# Limitation: under BG_JOB_ISOLATED_LOOPS each worker runs in its own thread with its own
|
|
113
|
+
# thread-local pg pool and redis client. This collector runs on the main
|
|
114
|
+
# thread, so meta_pool_stats() -> _get_pool()/redis_stats() only sees the main
|
|
115
|
+
# thread's pool (redis_stats() reads the global client unconditionally), and
|
|
116
|
+
# the per-thread isolated pools are NOT aggregated — so pg/redis pool gauges
|
|
117
|
+
# and the pg request counters under-report in isolated-loop mode.
|
|
118
|
+
stats = await meta_pool_stats()
|
|
119
|
+
|
|
120
|
+
pg = stats.get("postgres") or {}
|
|
121
|
+
if pg:
|
|
122
|
+
reporter.record_gauge(GAUGE_PG_POOL_MAX, pg.get("pool_max", 0))
|
|
123
|
+
reporter.record_gauge(GAUGE_PG_POOL_SIZE, pg.get("pool_size", 0))
|
|
124
|
+
reporter.record_gauge(GAUGE_PG_POOL_AVAILABLE, pg.get("pool_available", 0))
|
|
125
|
+
# Cumulative counters: record the delta since the last sample. Emit on a
|
|
126
|
+
# non-negative delta (>= 0) so the counter is created and reported from the
|
|
127
|
+
# first sample even when it is 0 — the legacy /metrics always reported
|
|
128
|
+
# these. Negative deltas (Go-core pool counter resets) are skipped to keep
|
|
129
|
+
# the OTLP counter monotonic.
|
|
130
|
+
for key, metric in (
|
|
131
|
+
("requests_queued", COUNTER_PG_POOL_REQUESTS_QUEUED),
|
|
132
|
+
("requests_errors", COUNTER_PG_POOL_REQUESTS_ERRORS),
|
|
133
|
+
):
|
|
134
|
+
current = pg.get(key, 0)
|
|
135
|
+
delta = current - prev_counters.get(key, 0)
|
|
136
|
+
if delta >= 0:
|
|
137
|
+
reporter.inc_counter(metric, delta)
|
|
138
|
+
prev_counters[key] = current
|
|
139
|
+
await logger.ainfo("Postgres pool stats", **pg)
|
|
140
|
+
|
|
141
|
+
redis = stats.get("redis") or {}
|
|
142
|
+
if redis:
|
|
143
|
+
reporter.record_gauge(
|
|
144
|
+
GAUGE_REDIS_POOL_AVAILABLE, redis.get("idle_connections", 0)
|
|
145
|
+
)
|
|
146
|
+
reporter.record_gauge(GAUGE_REDIS_POOL_SIZE, redis.get("in_use_connections", 0))
|
|
147
|
+
reporter.record_gauge(GAUGE_REDIS_POOL_MAX, redis.get("max_connections", 0))
|
|
148
|
+
await logger.ainfo("Redis pool stats", **redis)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
async def _collect_once(prev_counters: dict[str, int]) -> None:
|
|
152
|
+
reporter = get_otlp_metrics_reporter()
|
|
153
|
+
if not reporter.enabled:
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
# Worker gauges are emitted wherever workers run; _collect_queue_and_workers
|
|
157
|
+
# adds queue depth on the API process only (postgres; inmem skips the DB
|
|
158
|
+
# round-trip).
|
|
159
|
+
try:
|
|
160
|
+
await _collect_queue_and_workers(reporter)
|
|
161
|
+
except Exception as exc:
|
|
162
|
+
await logger.awarning(
|
|
163
|
+
"metrics collector: queue/worker sample failed", exc_info=exc
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Postgres/Redis pools live in the Go core (no real pools on inmem).
|
|
167
|
+
if IS_POSTGRES_OR_GRPC_BACKEND:
|
|
168
|
+
try:
|
|
169
|
+
await _collect_pool(reporter, prev_counters)
|
|
170
|
+
except Exception as exc:
|
|
171
|
+
await logger.awarning("metrics collector: pool sample failed", exc_info=exc)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
async def collector_loop() -> None:
|
|
175
|
+
"""Sample snapshot metrics into the OTLP client every STATS_INTERVAL_SECS."""
|
|
176
|
+
interval = config.STATS_INTERVAL_SECS
|
|
177
|
+
prev_counters: dict[str, int] = {}
|
|
178
|
+
await logger.ainfo("Starting OTLP metrics collector loop", interval_secs=interval)
|
|
179
|
+
try:
|
|
180
|
+
while True:
|
|
181
|
+
await _collect_once(prev_counters)
|
|
182
|
+
await asyncio.sleep(interval)
|
|
183
|
+
except asyncio.CancelledError:
|
|
184
|
+
pass
|
langgraph_api/metrics_otlp.py
CHANGED
|
@@ -4,18 +4,20 @@ import os
|
|
|
4
4
|
import threading
|
|
5
5
|
import time
|
|
6
6
|
from contextlib import contextmanager
|
|
7
|
-
from dataclasses import dataclass
|
|
7
|
+
from dataclasses import dataclass, replace
|
|
8
8
|
from datetime import timedelta
|
|
9
9
|
from typing import TYPE_CHECKING, Any, Literal
|
|
10
10
|
|
|
11
11
|
import structlog
|
|
12
12
|
|
|
13
|
-
from langgraph_api import __version__, config
|
|
13
|
+
from langgraph_api import __version__, config, metadata
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
16
|
from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
|
|
17
17
|
OTLPMetricExporter,
|
|
18
18
|
)
|
|
19
|
+
from opentelemetry.exporter.prometheus import PrometheusMetricReader
|
|
20
|
+
from opentelemetry.metrics import Observation
|
|
19
21
|
from opentelemetry.sdk.metrics import Counter, Histogram, MeterProvider
|
|
20
22
|
from opentelemetry.sdk.metrics.export import (
|
|
21
23
|
AggregationTemporality,
|
|
@@ -28,6 +30,7 @@ else:
|
|
|
28
30
|
from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
|
|
29
31
|
OTLPMetricExporter,
|
|
30
32
|
)
|
|
33
|
+
from opentelemetry.metrics import Observation
|
|
31
34
|
from opentelemetry.sdk.metrics import Counter, Histogram, MeterProvider
|
|
32
35
|
from opentelemetry.sdk.metrics.export import (
|
|
33
36
|
AggregationTemporality,
|
|
@@ -39,6 +42,7 @@ else:
|
|
|
39
42
|
OTEL_AVAILABLE = True
|
|
40
43
|
except ModuleNotFoundError:
|
|
41
44
|
OTLPMetricExporter = None
|
|
45
|
+
Observation = None
|
|
42
46
|
MeterProvider = None
|
|
43
47
|
PeriodicExportingMetricReader = None
|
|
44
48
|
Resource = None
|
|
@@ -50,12 +54,11 @@ else:
|
|
|
50
54
|
|
|
51
55
|
try:
|
|
52
56
|
from opentelemetry.exporter.prometheus import PrometheusMetricReader
|
|
53
|
-
from prometheus_client import start_http_server
|
|
54
57
|
|
|
55
58
|
PROMETHEUS_EXPORTER_AVAILABLE = True
|
|
56
59
|
except ModuleNotFoundError:
|
|
57
|
-
|
|
58
|
-
|
|
60
|
+
# initialize as empty object to prevent breaking downstream inheritancei with _LSDPrometheusReader
|
|
61
|
+
PrometheusMetricReader = object
|
|
59
62
|
PROMETHEUS_EXPORTER_AVAILABLE = False
|
|
60
63
|
|
|
61
64
|
logger = structlog.stdlib.get_logger(__name__)
|
|
@@ -79,28 +82,62 @@ class MetricDef:
|
|
|
79
82
|
metric_type: MetricType
|
|
80
83
|
name: str
|
|
81
84
|
tier: int
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
+
# True for metrics surfaced on the LSD Deployment UI. This flag partitions the
|
|
86
|
+
# two backends: the Prometheus scrape endpoint serves only these (see
|
|
87
|
+
# _LSDPrometheusReader) so GCP indexes just the Deployment-UI metrics, while
|
|
88
|
+
# Datadog gets only the internal complement (see _DatadogExporter).
|
|
89
|
+
lsd_web_metric: bool = False
|
|
90
|
+
# Human-readable help text. Passed to the OTel instrument as its description,
|
|
91
|
+
# which the Prometheus exporter exposes as the metric's ``# HELP`` line.
|
|
92
|
+
description: str = ""
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def def_counter(
|
|
96
|
+
name: str, tier: int, lsd_web_metric: bool = False, description: str = ""
|
|
97
|
+
) -> MetricDef:
|
|
85
98
|
return MetricDef(
|
|
86
|
-
metric_type="counter",
|
|
99
|
+
metric_type="counter",
|
|
100
|
+
name=f"{METRIC_NAME_PREFIX}{name}",
|
|
101
|
+
tier=tier,
|
|
102
|
+
lsd_web_metric=lsd_web_metric,
|
|
103
|
+
description=description,
|
|
87
104
|
)
|
|
88
105
|
|
|
89
106
|
|
|
90
|
-
def def_histogram(
|
|
107
|
+
def def_histogram(
|
|
108
|
+
name: str, tier: int, lsd_web_metric: bool = False, description: str = ""
|
|
109
|
+
) -> MetricDef:
|
|
91
110
|
return MetricDef(
|
|
92
|
-
metric_type="histogram",
|
|
111
|
+
metric_type="histogram",
|
|
112
|
+
name=f"{METRIC_NAME_PREFIX}{name}",
|
|
113
|
+
tier=tier,
|
|
114
|
+
lsd_web_metric=lsd_web_metric,
|
|
115
|
+
description=description,
|
|
93
116
|
)
|
|
94
117
|
|
|
95
118
|
|
|
96
|
-
def def_latency(
|
|
119
|
+
def def_latency(
|
|
120
|
+
name: str, tier: int, lsd_web_metric: bool = False, description: str = ""
|
|
121
|
+
) -> MetricDef:
|
|
97
122
|
return MetricDef(
|
|
98
|
-
metric_type="latency",
|
|
123
|
+
metric_type="latency",
|
|
124
|
+
name=f"{METRIC_NAME_PREFIX}{name}",
|
|
125
|
+
tier=tier,
|
|
126
|
+
lsd_web_metric=lsd_web_metric,
|
|
127
|
+
description=description,
|
|
99
128
|
)
|
|
100
129
|
|
|
101
130
|
|
|
102
|
-
def def_gauge(
|
|
103
|
-
|
|
131
|
+
def def_gauge(
|
|
132
|
+
name: str, tier: int, lsd_web_metric: bool = False, description: str = ""
|
|
133
|
+
) -> MetricDef:
|
|
134
|
+
return MetricDef(
|
|
135
|
+
metric_type="gauge",
|
|
136
|
+
name=f"{METRIC_NAME_PREFIX}{name}",
|
|
137
|
+
tier=tier,
|
|
138
|
+
lsd_web_metric=lsd_web_metric,
|
|
139
|
+
description=description,
|
|
140
|
+
)
|
|
104
141
|
|
|
105
142
|
|
|
106
143
|
# Pre-defined counter metrics.
|
|
@@ -152,23 +189,119 @@ COUNTER_PROTOCOL_V2_RESUME_GAP = def_counter(
|
|
|
152
189
|
COUNTER_PROTOCOL_V2_TRANSPORT_SEND_FAILURE = def_counter(
|
|
153
190
|
"protocol_v2_transport_send_failure_counter", METRIC_TIER_INFO
|
|
154
191
|
)
|
|
192
|
+
# Migrated from meta.py /metrics. Named to expose as `lg_api_http_requests_total`
|
|
193
|
+
# (this exporter version does not double-append `_total`).
|
|
194
|
+
COUNTER_HTTP_REQUESTS = def_counter(
|
|
195
|
+
"http_requests_total", METRIC_TIER_INFO, lsd_web_metric=True
|
|
196
|
+
)
|
|
197
|
+
# Migrated from meta.py /metrics. The exporter appends `_total` to counter names,
|
|
198
|
+
# so these expose as `lg_api_pg_pool_requests_{queued,errors}_total` (idiomatic).
|
|
199
|
+
COUNTER_PG_POOL_REQUESTS_QUEUED = def_counter(
|
|
200
|
+
"pg_pool_requests_queued",
|
|
201
|
+
METRIC_TIER_CRITICAL,
|
|
202
|
+
lsd_web_metric=True,
|
|
203
|
+
description=(
|
|
204
|
+
"Number of postgres connection requests queued because a postgres "
|
|
205
|
+
"connection wasn't immediately available in the pool"
|
|
206
|
+
),
|
|
207
|
+
)
|
|
208
|
+
COUNTER_PG_POOL_REQUESTS_ERRORS = def_counter(
|
|
209
|
+
"pg_pool_requests_errors",
|
|
210
|
+
METRIC_TIER_CRITICAL,
|
|
211
|
+
lsd_web_metric=True,
|
|
212
|
+
description=(
|
|
213
|
+
"Number of postgres connection requests resulting in an error "
|
|
214
|
+
"(timeouts, queue full...)"
|
|
215
|
+
),
|
|
216
|
+
)
|
|
155
217
|
|
|
156
218
|
# Pre-defined latency metrics.
|
|
157
219
|
LATENCY_RUN_EXECUTION = def_latency("run_execution_latency", METRIC_TIER_INFO)
|
|
158
220
|
LATENCY_RUN_QUEUE_WAIT_TIME_1ST_ATTEMPT = def_latency(
|
|
159
|
-
"run_queue_wait_time_1st_attempt",
|
|
221
|
+
"run_queue_wait_time_1st_attempt",
|
|
222
|
+
METRIC_TIER_INFO,
|
|
223
|
+
lsd_web_metric=True,
|
|
224
|
+
description=(
|
|
225
|
+
"Time (milliseconds) spent by jobs waiting in the queue"
|
|
226
|
+
" before getting processed for the first time. "
|
|
227
|
+
),
|
|
160
228
|
)
|
|
161
229
|
LATENCY_RUN_QUEUE_WAIT_TIME_RETRY_ATTEMPT = def_latency(
|
|
162
230
|
"run_queue_wait_time_retry_attempt", METRIC_TIER_INFO
|
|
163
231
|
)
|
|
164
232
|
LATENCY_STREAM_PUBLISH = def_latency("stream_publish_latency", METRIC_TIER_INFO)
|
|
233
|
+
LATENCY_HTTP_REQUEST = def_latency(
|
|
234
|
+
"http_requests_latency",
|
|
235
|
+
METRIC_TIER_INFO,
|
|
236
|
+
lsd_web_metric=True,
|
|
237
|
+
description="HTTP request latency in milliseconds",
|
|
238
|
+
)
|
|
165
239
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
240
|
+
GAUGE_WORKERS_ACTIVE = def_gauge(
|
|
241
|
+
"workers_active", METRIC_TIER_CRITICAL, lsd_web_metric=True
|
|
242
|
+
)
|
|
243
|
+
GAUGE_WORKERS_AVAILABLE = def_gauge(
|
|
244
|
+
"workers_available", METRIC_TIER_CRITICAL, lsd_web_metric=True
|
|
245
|
+
)
|
|
169
246
|
GAUGE_PUBLISH_QUEUE_AVAILABILITY = def_gauge(
|
|
170
247
|
"publish_queue_availability", METRIC_TIER_CRITICAL
|
|
171
248
|
)
|
|
249
|
+
# Snapshot/state gauges pushed by the periodic
|
|
250
|
+
# metrics collector loop (langgraph_api.metrics_collector).
|
|
251
|
+
# Queue depth + workers_max are inmem-only (the Go core emits them on postgres);
|
|
252
|
+
# pool stats are emitted on both runtimes.
|
|
253
|
+
GAUGE_WORKERS_MAX = def_gauge("workers_max", METRIC_TIER_CRITICAL, lsd_web_metric=True)
|
|
254
|
+
GAUGE_NUM_PENDING_RUNS = def_gauge(
|
|
255
|
+
"num_pending_runs",
|
|
256
|
+
METRIC_TIER_INFO,
|
|
257
|
+
lsd_web_metric=True,
|
|
258
|
+
description="The number of runs currently pending.",
|
|
259
|
+
)
|
|
260
|
+
GAUGE_NUM_RUNNING_RUNS = def_gauge(
|
|
261
|
+
"num_running_runs",
|
|
262
|
+
METRIC_TIER_INFO,
|
|
263
|
+
lsd_web_metric=True,
|
|
264
|
+
description="The number of runs currently running.",
|
|
265
|
+
)
|
|
266
|
+
GAUGE_PG_POOL_MAX = def_gauge(
|
|
267
|
+
"pg_pool_max",
|
|
268
|
+
METRIC_TIER_CRITICAL,
|
|
269
|
+
lsd_web_metric=True,
|
|
270
|
+
description="The maximum size of the postgres connection pool.",
|
|
271
|
+
)
|
|
272
|
+
GAUGE_PG_POOL_SIZE = def_gauge(
|
|
273
|
+
"pg_pool_size",
|
|
274
|
+
METRIC_TIER_CRITICAL,
|
|
275
|
+
lsd_web_metric=True,
|
|
276
|
+
description=(
|
|
277
|
+
"Number of connections currently managed by the postgres connection "
|
|
278
|
+
"pool (in the pool, given to clients, being prepared)"
|
|
279
|
+
),
|
|
280
|
+
)
|
|
281
|
+
GAUGE_PG_POOL_AVAILABLE = def_gauge(
|
|
282
|
+
"pg_pool_available",
|
|
283
|
+
METRIC_TIER_INFO,
|
|
284
|
+
lsd_web_metric=True,
|
|
285
|
+
description="Number of connections currently idle in the postgres connection pool",
|
|
286
|
+
)
|
|
287
|
+
GAUGE_REDIS_POOL_AVAILABLE = def_gauge(
|
|
288
|
+
"redis_pool_available",
|
|
289
|
+
METRIC_TIER_INFO,
|
|
290
|
+
lsd_web_metric=True,
|
|
291
|
+
description="Number of connections currently idle in the redis connection pool",
|
|
292
|
+
)
|
|
293
|
+
GAUGE_REDIS_POOL_SIZE = def_gauge(
|
|
294
|
+
"redis_pool_size",
|
|
295
|
+
METRIC_TIER_INFO,
|
|
296
|
+
lsd_web_metric=True,
|
|
297
|
+
description="Number of connections currently in use in the redis connection pool",
|
|
298
|
+
)
|
|
299
|
+
GAUGE_REDIS_POOL_MAX = def_gauge(
|
|
300
|
+
"redis_pool_max",
|
|
301
|
+
METRIC_TIER_INFO,
|
|
302
|
+
lsd_web_metric=True,
|
|
303
|
+
description="The maximum size of the redis connection pool.",
|
|
304
|
+
)
|
|
172
305
|
# Protocol v2 sessions retain a bounded replay buffer per run. Track the
|
|
173
306
|
# observed occupancy so operators can tune LSD_PROTOCOL_V2_BUFFER_SIZE before
|
|
174
307
|
# reconnects start seeing resume gaps.
|
|
@@ -182,6 +315,71 @@ HISTOGRAM_PROTOCOL_V2_REPLAYED_EVENTS = def_histogram(
|
|
|
182
315
|
)
|
|
183
316
|
|
|
184
317
|
|
|
318
|
+
# Names of metrics surfaced on the LSD Deployment UI. By default the two metric
|
|
319
|
+
# backends are partitioned by this set: the Prometheus scrape endpoint serves only
|
|
320
|
+
# these (see ``_LSDPrometheusReader``), and Datadog receives only the complement
|
|
321
|
+
# (see ``_DatadogExporter``). Setting EXPOSE_INTERNAL_METRICS_PROMETHEUS lifts the
|
|
322
|
+
# Prometheus filter so it serves every metric. Computed from the definitions above.
|
|
323
|
+
LSD_WEB_METRIC_NAMES: frozenset[str] = frozenset(
|
|
324
|
+
m.name for m in globals().values() if isinstance(m, MetricDef) and m.lsd_web_metric
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _select_metrics(metrics_data: Any, keep) -> Any:
|
|
329
|
+
"""Return a copy of ``metrics_data`` keeping only metrics where ``keep(name)``.
|
|
330
|
+
|
|
331
|
+
Rebuilt with ``dataclasses.replace`` (never mutated in place) since the two
|
|
332
|
+
backend readers share the same SDK metric objects. Scope and resource groups
|
|
333
|
+
left empty by the filter are dropped entirely, so a non-empty
|
|
334
|
+
``resource_metrics`` on the result guarantees at least one metric point.
|
|
335
|
+
"""
|
|
336
|
+
if metrics_data is None or not metrics_data.resource_metrics:
|
|
337
|
+
return metrics_data
|
|
338
|
+
resource_metrics = []
|
|
339
|
+
for rm in metrics_data.resource_metrics:
|
|
340
|
+
scope_metrics = []
|
|
341
|
+
for sm in rm.scope_metrics:
|
|
342
|
+
kept = [m for m in sm.metrics if keep(m.name)]
|
|
343
|
+
if kept:
|
|
344
|
+
scope_metrics.append(replace(sm, metrics=kept))
|
|
345
|
+
if scope_metrics:
|
|
346
|
+
resource_metrics.append(replace(rm, scope_metrics=scope_metrics))
|
|
347
|
+
return replace(metrics_data, resource_metrics=resource_metrics)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _filter_web_metrics(metrics_data: Any) -> Any:
|
|
351
|
+
"""Copy of ``metrics_data`` with only LSD web metrics (for Prometheus/GCP)."""
|
|
352
|
+
return _select_metrics(metrics_data, lambda name: name in LSD_WEB_METRIC_NAMES)
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _drop_web_metrics(metrics_data: Any) -> Any:
|
|
356
|
+
"""Copy of ``metrics_data`` without LSD web metrics (for Datadog)."""
|
|
357
|
+
return _select_metrics(metrics_data, lambda name: name not in LSD_WEB_METRIC_NAMES)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class _LSDPrometheusReader(PrometheusMetricReader):
|
|
361
|
+
"""The Prometheus reader for this service.
|
|
362
|
+
|
|
363
|
+
By default it serves only the LSD Deployment-UI (``lsd_web_metric``) set —
|
|
364
|
+
Prometheus feeds the LSD web UI, while internal metrics go to Datadog instead
|
|
365
|
+
(see ``_DatadogExporter``). When ``EXPOSE_INTERNAL_METRICS_PROMETHEUS`` is set,
|
|
366
|
+
the web filter is skipped and every recorded metric is exposed (record-time
|
|
367
|
+
tier filtering via ``METRIC_MAX_EMITTING_TIER`` still applies). The base
|
|
368
|
+
``_receive_metrics`` simply hands the data to its collector, so filtering here
|
|
369
|
+
is sufficient.
|
|
370
|
+
"""
|
|
371
|
+
|
|
372
|
+
def _receive_metrics(
|
|
373
|
+
self,
|
|
374
|
+
metrics_data: Any,
|
|
375
|
+
timeout_millis: float = 10_000,
|
|
376
|
+
**kwargs: Any,
|
|
377
|
+
) -> None:
|
|
378
|
+
if not config.EXPOSE_INTERNAL_METRICS_PROMETHEUS:
|
|
379
|
+
metrics_data = _filter_web_metrics(metrics_data)
|
|
380
|
+
super()._receive_metrics(metrics_data, timeout_millis, **kwargs)
|
|
381
|
+
|
|
382
|
+
|
|
185
383
|
def _normalize_emitting_tier(value: int) -> int:
|
|
186
384
|
if value < METRIC_TIER_CRITICAL:
|
|
187
385
|
return METRIC_TIER_CRITICAL
|
|
@@ -190,8 +388,15 @@ def _normalize_emitting_tier(value: int) -> int:
|
|
|
190
388
|
return value
|
|
191
389
|
|
|
192
390
|
|
|
193
|
-
class
|
|
194
|
-
"""
|
|
391
|
+
class _DatadogExporter(MetricExporter):
|
|
392
|
+
"""Datadog exporter wrapper: drops LSD web metrics, and skips export when
|
|
393
|
+
nothing remains.
|
|
394
|
+
|
|
395
|
+
Web metrics are served to the LSD Deployment UI via Prometheus only (see
|
|
396
|
+
``_LSDPrometheusReader``); Datadog receives only the internal complement. The
|
|
397
|
+
drop happens here rather than at record time because the same SDK metric
|
|
398
|
+
objects feed both backend readers.
|
|
399
|
+
"""
|
|
195
400
|
|
|
196
401
|
def __init__(self, exporter: MetricExporter):
|
|
197
402
|
super().__init__()
|
|
@@ -205,16 +410,12 @@ class _FilteringExporter(MetricExporter):
|
|
|
205
410
|
timeout_millis: float = 10_000,
|
|
206
411
|
**kwargs: Any,
|
|
207
412
|
) -> Any:
|
|
208
|
-
|
|
413
|
+
# _drop_web_metrics prunes emptied groups, so a non-empty resource_metrics
|
|
414
|
+
# guarantees there is at least one internal metric point left to export.
|
|
415
|
+
filtered = _drop_web_metrics(metrics_data)
|
|
416
|
+
if not filtered or not filtered.resource_metrics:
|
|
209
417
|
return None
|
|
210
|
-
|
|
211
|
-
if resource_metric.scope_metrics:
|
|
212
|
-
for scope_metric in resource_metric.scope_metrics:
|
|
213
|
-
if scope_metric.metrics:
|
|
214
|
-
return self._exporter.export(
|
|
215
|
-
metrics_data, timeout_millis, **kwargs
|
|
216
|
-
)
|
|
217
|
-
return None
|
|
418
|
+
return self._exporter.export(filtered, timeout_millis, **kwargs)
|
|
218
419
|
|
|
219
420
|
def shutdown(self, timeout_millis: float = 30_000, **kwargs: Any) -> None:
|
|
220
421
|
self._exporter.shutdown(timeout_millis, **kwargs)
|
|
@@ -232,8 +433,17 @@ class OTelMetricsReporter:
|
|
|
232
433
|
self._meter = None
|
|
233
434
|
self._max_tier = _normalize_emitting_tier(config.METRIC_MAX_EMITTING_TIER)
|
|
234
435
|
self._instruments: dict[str, Any] = {}
|
|
436
|
+
# Initializes a gauge values cache that is read when querying /metrics.
|
|
437
|
+
# sync gauges have a limitation of flapping values - it doesn't report the metric
|
|
438
|
+
# if the value wasn't set recently. By using a cache, it consistently reports the metric
|
|
439
|
+
# value when scraped. Guarded by ``_gauge_lock`` because
|
|
440
|
+
# callbacks run on the SDK collection thread, ``record_gauge`` on others.
|
|
441
|
+
self._gauge_lock = threading.Lock()
|
|
442
|
+
self._gauge_values: dict[str, dict[tuple, tuple[dict[str, Any], float]]] = {}
|
|
443
|
+
self._observable_gauges: dict[str, Any] = {}
|
|
444
|
+
# Labels attached to every metric (set in ``initialize``)
|
|
445
|
+
self._common_attributes: dict[str, str] = {}
|
|
235
446
|
self._prom_enabled = False
|
|
236
|
-
self._prom_server: Any | None = None
|
|
237
447
|
|
|
238
448
|
@property
|
|
239
449
|
def enabled(self) -> bool:
|
|
@@ -245,14 +455,6 @@ class OTelMetricsReporter:
|
|
|
245
455
|
return
|
|
246
456
|
self._initialized = True
|
|
247
457
|
|
|
248
|
-
if (
|
|
249
|
-
not config.DATADOG_METRICS_ENABLED
|
|
250
|
-
and not config.LSD_PROM_METRICS_ENABLED
|
|
251
|
-
):
|
|
252
|
-
logger.info(
|
|
253
|
-
"OTel metrics disabled (no DD API key and Prometheus not enabled)"
|
|
254
|
-
)
|
|
255
|
-
return
|
|
256
458
|
if not OTEL_AVAILABLE:
|
|
257
459
|
logger.warning(
|
|
258
460
|
"OTel metrics disabled because OpenTelemetry dependencies are not installed"
|
|
@@ -301,22 +503,23 @@ class OTelMetricsReporter:
|
|
|
301
503
|
|
|
302
504
|
readers.append(
|
|
303
505
|
PeriodicExportingMetricReader(
|
|
304
|
-
|
|
506
|
+
_DatadogExporter(base_exporter),
|
|
305
507
|
export_interval_millis=10_000,
|
|
306
508
|
)
|
|
307
509
|
)
|
|
308
510
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
511
|
+
# Prometheus metrics are always exported (served via /metrics).
|
|
512
|
+
if not PROMETHEUS_EXPORTER_AVAILABLE:
|
|
513
|
+
logger.error(
|
|
514
|
+
"Prometheus metrics disabled: opentelemetry-exporter-prometheus not installed"
|
|
515
|
+
)
|
|
516
|
+
else:
|
|
517
|
+
# PrometheusMetricReader registers its collector with the
|
|
518
|
+
# global prometheus_client REGISTRY, which the /metrics
|
|
519
|
+
# endpoint serves via generate_latest() (see api/meta.py).
|
|
520
|
+
# Prometheus serves only Deployment-UI metrics.
|
|
521
|
+
readers.append(_LSDPrometheusReader())
|
|
522
|
+
self._prom_enabled = True
|
|
320
523
|
|
|
321
524
|
if not readers:
|
|
322
525
|
logger.info(
|
|
@@ -328,6 +531,12 @@ class OTelMetricsReporter:
|
|
|
328
531
|
resource=resource, metric_readers=readers
|
|
329
532
|
)
|
|
330
533
|
self._meter = self._meter_provider.get_meter(SERVICE_NAME)
|
|
534
|
+
# Labels added to every metric, matching the legacy /metrics.
|
|
535
|
+
self._common_attributes = {
|
|
536
|
+
"project_id": metadata.PROJECT_ID or "",
|
|
537
|
+
"revision_id": metadata.HOST_REVISION_ID or "",
|
|
538
|
+
"deployment_type": metadata.DEPLOYMENT_TYPE or "",
|
|
539
|
+
}
|
|
331
540
|
self._enabled = True
|
|
332
541
|
|
|
333
542
|
if config.DATADOG_METRICS_ENABLED:
|
|
@@ -335,8 +544,6 @@ class OTelMetricsReporter:
|
|
|
335
544
|
"Datadog OTLP metrics reader initialized",
|
|
336
545
|
endpoint=f"https://{config.LSD_DD_ENDPOINT}/v1/metrics",
|
|
337
546
|
)
|
|
338
|
-
if self._prom_enabled:
|
|
339
|
-
self._start_prometheus_server()
|
|
340
547
|
|
|
341
548
|
logger.info(
|
|
342
549
|
"OTel metrics reporter initialized",
|
|
@@ -358,38 +565,25 @@ class OTelMetricsReporter:
|
|
|
358
565
|
logger.exception("Failed to initialize OTel metrics reporter")
|
|
359
566
|
raise
|
|
360
567
|
|
|
361
|
-
def _start_prometheus_server(self) -> None:
|
|
362
|
-
port = config.LSD_PROM_METRICS_PORT
|
|
363
|
-
# ``start_http_server`` spins up a WSGI server in a daemon thread serving
|
|
364
|
-
# the global prometheus_client REGISTRY, and returns the (server, thread)
|
|
365
|
-
# handle. Keep the server so ``shutdown`` can stop it cleanly (e.g. in
|
|
366
|
-
# tests); in production the daemon thread exits with the process anyway.
|
|
367
|
-
server, _thread = start_http_server(port=port)
|
|
368
|
-
self._prom_server = server
|
|
369
|
-
logger.info("Prometheus metrics scrape server started", port=port)
|
|
370
|
-
|
|
371
568
|
def shutdown(self) -> None:
|
|
372
569
|
with self._lock:
|
|
373
570
|
if self._meter_provider:
|
|
374
571
|
try:
|
|
572
|
+
# Unregisters the Prometheus reader's collector from the global
|
|
573
|
+
# prometheus_client REGISTRY (and flushes/stops other readers).
|
|
375
574
|
self._meter_provider.shutdown()
|
|
376
575
|
except Exception:
|
|
377
576
|
logger.exception("Failed to shutdown OTel metrics reporter")
|
|
378
577
|
finally:
|
|
379
578
|
self._meter_provider = None
|
|
380
579
|
self._meter = None
|
|
381
|
-
if self._prom_server is not None:
|
|
382
|
-
try:
|
|
383
|
-
self._prom_server.shutdown()
|
|
384
|
-
self._prom_server.server_close()
|
|
385
|
-
except Exception:
|
|
386
|
-
logger.exception("Failed to stop Prometheus scrape server")
|
|
387
|
-
finally:
|
|
388
|
-
self._prom_server = None
|
|
389
580
|
self._prom_enabled = False
|
|
390
581
|
self._enabled = False
|
|
391
582
|
self._initialized = False
|
|
392
583
|
self._instruments.clear()
|
|
584
|
+
with self._gauge_lock:
|
|
585
|
+
self._gauge_values.clear()
|
|
586
|
+
self._observable_gauges.clear()
|
|
393
587
|
|
|
394
588
|
def _instrument_name(self, metric_name: str) -> str:
|
|
395
589
|
return metric_name
|
|
@@ -397,22 +591,69 @@ class OTelMetricsReporter:
|
|
|
397
591
|
def _tier_enabled(self, tier: int) -> bool:
|
|
398
592
|
return _normalize_emitting_tier(tier) <= self._max_tier
|
|
399
593
|
|
|
594
|
+
def _should_emit(self, metric: MetricDef) -> bool:
|
|
595
|
+
"""Whether a sample for ``metric`` should be recorded.
|
|
596
|
+
|
|
597
|
+
``lsd_web_metric`` metrics bypass tier filtering: they back the LSD
|
|
598
|
+
Deployment UI (served by the Prometheus reader) and must be emitted even
|
|
599
|
+
on low-tier deployments (dev/dev_free default ``METRIC_MAX_EMITTING_TIER``
|
|
600
|
+
to 1/CRITICAL). The tier gate runs before the MeterProvider, so a dropped
|
|
601
|
+
sample never reaches any reader — Prometheus included.
|
|
602
|
+
"""
|
|
603
|
+
if not self._enabled or not self._meter:
|
|
604
|
+
return False
|
|
605
|
+
return metric.lsd_web_metric or self._tier_enabled(metric.tier)
|
|
606
|
+
|
|
400
607
|
def _get_or_create_instrument(self, metric: MetricDef):
|
|
401
608
|
name = self._instrument_name(metric.name)
|
|
402
609
|
instrument = self._instruments.get(name)
|
|
403
610
|
if instrument is not None:
|
|
404
611
|
return instrument
|
|
405
612
|
if metric.metric_type == "counter":
|
|
406
|
-
instrument = self._meter.create_counter(
|
|
613
|
+
instrument = self._meter.create_counter(
|
|
614
|
+
name=name, description=metric.description
|
|
615
|
+
)
|
|
407
616
|
elif metric.metric_type in {"histogram", "latency"}:
|
|
408
|
-
instrument = self._meter.create_histogram(
|
|
409
|
-
|
|
410
|
-
|
|
617
|
+
instrument = self._meter.create_histogram(
|
|
618
|
+
name=name, description=metric.description
|
|
619
|
+
)
|
|
411
620
|
else:
|
|
621
|
+
# Gauges are handled via observable instruments (see _set_gauge).
|
|
412
622
|
raise ValueError(f"Unsupported metric type: {metric.metric_type}")
|
|
413
623
|
self._instruments[name] = instrument
|
|
414
624
|
return instrument
|
|
415
625
|
|
|
626
|
+
def _make_gauge_callback(self, name: str):
|
|
627
|
+
"""Build the observable-gauge callback that the SDK invokes on each scrape.
|
|
628
|
+
|
|
629
|
+
It yields one Observation per recorded attribute-set from the cache, so the
|
|
630
|
+
last sampled value is re-reported on every collect (no flapping).
|
|
631
|
+
"""
|
|
632
|
+
|
|
633
|
+
def _callback(_options: Any):
|
|
634
|
+
with self._gauge_lock:
|
|
635
|
+
points = list(self._gauge_values.get(name, {}).values())
|
|
636
|
+
return [Observation(value, attributes=attrs) for attrs, value in points]
|
|
637
|
+
|
|
638
|
+
return _callback
|
|
639
|
+
|
|
640
|
+
def _with_common(self, attributes: dict[str, Any] | None) -> dict[str, Any]:
|
|
641
|
+
"""Merge the shared labels (project_id/revision_id/deployment_type) with
|
|
642
|
+
any per-call attributes. Per-call values win on key conflicts."""
|
|
643
|
+
return {**self._common_attributes, **(attributes or {})}
|
|
644
|
+
|
|
645
|
+
def _set_gauge(self, metric: MetricDef, value: float, attributes: dict) -> None:
|
|
646
|
+
name = metric.name
|
|
647
|
+
key = tuple(sorted(attributes.items()))
|
|
648
|
+
with self._gauge_lock:
|
|
649
|
+
self._gauge_values.setdefault(name, {})[key] = (attributes, float(value))
|
|
650
|
+
if name not in self._observable_gauges:
|
|
651
|
+
self._observable_gauges[name] = self._meter.create_observable_gauge(
|
|
652
|
+
name=name,
|
|
653
|
+
description=metric.description,
|
|
654
|
+
callbacks=[self._make_gauge_callback(name)],
|
|
655
|
+
)
|
|
656
|
+
|
|
416
657
|
def inc_counter(
|
|
417
658
|
self,
|
|
418
659
|
metric: MetricDef,
|
|
@@ -421,11 +662,11 @@ class OTelMetricsReporter:
|
|
|
421
662
|
) -> None:
|
|
422
663
|
if metric.metric_type != "counter":
|
|
423
664
|
raise ValueError(f"{metric.name} is not a counter metric")
|
|
424
|
-
if not self.
|
|
665
|
+
if not self._should_emit(metric):
|
|
425
666
|
return
|
|
426
667
|
instrument = self._get_or_create_instrument(metric)
|
|
427
668
|
try:
|
|
428
|
-
instrument.add(value, attributes
|
|
669
|
+
instrument.add(value, self._with_common(attributes))
|
|
429
670
|
except Exception:
|
|
430
671
|
logger.warning("Failed to add counter", metric_name=metric.name)
|
|
431
672
|
|
|
@@ -437,11 +678,11 @@ class OTelMetricsReporter:
|
|
|
437
678
|
) -> None:
|
|
438
679
|
if metric.metric_type != "histogram":
|
|
439
680
|
raise ValueError(f"{metric.name} is not a histogram metric")
|
|
440
|
-
if not self.
|
|
681
|
+
if not self._should_emit(metric):
|
|
441
682
|
return
|
|
442
683
|
instrument = self._get_or_create_instrument(metric)
|
|
443
684
|
try:
|
|
444
|
-
instrument.record(value, attributes
|
|
685
|
+
instrument.record(value, self._with_common(attributes))
|
|
445
686
|
except Exception:
|
|
446
687
|
logger.warning("Failed to record histogram", metric_name=metric.name)
|
|
447
688
|
|
|
@@ -458,11 +699,11 @@ class OTelMetricsReporter:
|
|
|
458
699
|
else:
|
|
459
700
|
seconds = float(duration_seconds)
|
|
460
701
|
value = seconds * 1000
|
|
461
|
-
if not self.
|
|
702
|
+
if not self._should_emit(metric):
|
|
462
703
|
return
|
|
463
704
|
instrument = self._get_or_create_instrument(metric)
|
|
464
705
|
try:
|
|
465
|
-
instrument.record(value, attributes
|
|
706
|
+
instrument.record(value, self._with_common(attributes))
|
|
466
707
|
except Exception:
|
|
467
708
|
logger.warning("Failed to record latency", metric_name=metric.name)
|
|
468
709
|
|
|
@@ -474,11 +715,11 @@ class OTelMetricsReporter:
|
|
|
474
715
|
) -> None:
|
|
475
716
|
if metric.metric_type != "gauge":
|
|
476
717
|
raise ValueError(f"{metric.name} is not a gauge metric")
|
|
477
|
-
if not self.
|
|
718
|
+
if not self._should_emit(metric):
|
|
478
719
|
return
|
|
479
|
-
instrument = self._get_or_create_instrument(metric)
|
|
480
720
|
try:
|
|
481
|
-
|
|
721
|
+
# Cache the value; an observable gauge re-reports it on every scrape.
|
|
722
|
+
self._set_gauge(metric, value, self._with_common(attributes))
|
|
482
723
|
except Exception:
|
|
483
724
|
logger.warning("Failed to record gauge", metric_name=metric.name)
|
|
484
725
|
|
|
@@ -488,7 +729,7 @@ class OTelMetricsReporter:
|
|
|
488
729
|
metric: MetricDef,
|
|
489
730
|
attributes: dict[str, Any] | None = None,
|
|
490
731
|
):
|
|
491
|
-
if not self.
|
|
732
|
+
if not self._should_emit(metric):
|
|
492
733
|
yield
|
|
493
734
|
return
|
|
494
735
|
start = time.perf_counter()
|
|
@@ -7,6 +7,12 @@ from starlette.types import Message, Receive, Scope, Send
|
|
|
7
7
|
|
|
8
8
|
from langgraph_api.config import MOUNT_PREFIX
|
|
9
9
|
from langgraph_api.http_metrics import HTTP_METRICS_COLLECTOR
|
|
10
|
+
from langgraph_api.http_metrics_utils import get_route, should_filter_route
|
|
11
|
+
from langgraph_api.metrics_otlp import (
|
|
12
|
+
COUNTER_HTTP_REQUESTS,
|
|
13
|
+
LATENCY_HTTP_REQUEST,
|
|
14
|
+
get_otlp_metrics_reporter,
|
|
15
|
+
)
|
|
10
16
|
from langgraph_api.utils.headers import should_include_header_in_logs
|
|
11
17
|
|
|
12
18
|
asgi = structlog.stdlib.get_logger("asgi")
|
|
@@ -150,6 +156,24 @@ class AccessLoggerMiddleware:
|
|
|
150
156
|
|
|
151
157
|
if method and route and status:
|
|
152
158
|
HTTP_METRICS_COLLECTOR.record_request(method, route, status, latency)
|
|
159
|
+
route_path = get_route(route)
|
|
160
|
+
if route_path is not None and not should_filter_route(route_path):
|
|
161
|
+
reporter = get_otlp_metrics_reporter()
|
|
162
|
+
reporter.inc_counter(
|
|
163
|
+
COUNTER_HTTP_REQUESTS,
|
|
164
|
+
attributes={
|
|
165
|
+
"method": method,
|
|
166
|
+
"path": route_path,
|
|
167
|
+
"status": str(status),
|
|
168
|
+
},
|
|
169
|
+
)
|
|
170
|
+
# record_latency takes seconds and stores milliseconds; latency
|
|
171
|
+
# is already in ms, so pass seconds.
|
|
172
|
+
reporter.record_latency(
|
|
173
|
+
LATENCY_HTTP_REQUEST,
|
|
174
|
+
latency / 1000.0,
|
|
175
|
+
attributes={"method": method, "path": route_path},
|
|
176
|
+
)
|
|
153
177
|
qs = scope.get("query_string")
|
|
154
178
|
first_byte_time = info["first_byte_time"]
|
|
155
179
|
ttfb_ms = (
|
|
@@ -65,6 +65,7 @@ def _ensure_port_available(host: str, port: int) -> None:
|
|
|
65
65
|
|
|
66
66
|
async def health_and_metrics_server():
|
|
67
67
|
import uvicorn # noqa: PLC0415
|
|
68
|
+
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest # noqa: PLC0415
|
|
68
69
|
from starlette.applications import Starlette # noqa: PLC0415
|
|
69
70
|
from starlette.requests import Request # noqa: PLC0415
|
|
70
71
|
from starlette.responses import JSONResponse, PlainTextResponse # noqa: PLC0415
|
|
@@ -91,42 +92,18 @@ async def health_and_metrics_server():
|
|
|
91
92
|
)
|
|
92
93
|
metrics_format = "prometheus"
|
|
93
94
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
resp = {
|
|
107
|
-
**pg_redis_stats,
|
|
108
|
-
"workers": worker_metrics,
|
|
109
|
-
}
|
|
110
|
-
return JSONResponse(resp)
|
|
111
|
-
elif metrics_format == "prometheus":
|
|
112
|
-
metrics_lines = [
|
|
113
|
-
"# HELP lg_api_workers_max The maximum number of workers available.",
|
|
114
|
-
"# TYPE lg_api_workers_max gauge",
|
|
115
|
-
f'lg_api_workers_max{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_max}',
|
|
116
|
-
"# HELP lg_api_workers_active The number of currently active workers.",
|
|
117
|
-
"# TYPE lg_api_workers_active gauge",
|
|
118
|
-
f'lg_api_workers_active{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_active}',
|
|
119
|
-
"# HELP lg_api_workers_available The number of available (idle) workers.",
|
|
120
|
-
"# TYPE lg_api_workers_available gauge",
|
|
121
|
-
f'lg_api_workers_available{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_available}',
|
|
122
|
-
]
|
|
123
|
-
|
|
124
|
-
metrics_lines.extend(pg_redis_stats)
|
|
125
|
-
|
|
126
|
-
return PlainTextResponse(
|
|
127
|
-
"\n".join(metrics_lines),
|
|
128
|
-
media_type="text/plain; version=0.0.4; charset=utf-8",
|
|
129
|
-
)
|
|
95
|
+
if metrics_format == "prometheus":
|
|
96
|
+
# Served from THIS process's OTLP Prometheus registry. The collector
|
|
97
|
+
# runs in every process, so this queue worker exposes its worker
|
|
98
|
+
# gauges and its own pool stats — but NOT queue depth
|
|
99
|
+
# (num_pending/num_running), which the collector emits on the API
|
|
100
|
+
# process only. Mirrors the main API /metrics (api/meta.py:meta_metrics).
|
|
101
|
+
return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)
|
|
102
|
+
|
|
103
|
+
# JSON: hand-built snapshot of workers + pool stats.
|
|
104
|
+
worker_metrics = get_metrics()["workers"]
|
|
105
|
+
pg_redis_stats = await meta_pool_stats()
|
|
106
|
+
return JSONResponse({**pg_redis_stats, "workers": worker_metrics})
|
|
130
107
|
|
|
131
108
|
routes = [
|
|
132
109
|
Route("/ok", health_endpoint),
|
|
@@ -287,7 +287,7 @@ def _get_pool_stats():
|
|
|
287
287
|
# so we submit this as a coro to run in the main event loop
|
|
288
288
|
async def _fetch_pool_stats():
|
|
289
289
|
try:
|
|
290
|
-
return await meta_pool_stats(
|
|
290
|
+
return await meta_pool_stats()
|
|
291
291
|
except Exception as e:
|
|
292
292
|
logger.warning("Failed to get pool stats", exc_info=e)
|
|
293
293
|
return {"postgres": {}, "redis": {}}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: langgraph-api
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0.dev1
|
|
4
4
|
Author-email: Will Fu-Hinthorn <will@langchain.dev>, Josh Rogers <josh@langchain.dev>, Parker Rule <parker@langchain.dev>
|
|
5
5
|
License: Elastic-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -16,7 +16,7 @@ Requires-Dist: jsonschema-rs<0.45,>=0.20.0
|
|
|
16
16
|
Requires-Dist: langchain-core>=0.3.64
|
|
17
17
|
Requires-Dist: langchain-protocol<0.1,>=0.0.16
|
|
18
18
|
Requires-Dist: langgraph-checkpoint<5,>=3.0.1
|
|
19
|
-
Requires-Dist: langgraph-runtime-inmem<0.
|
|
19
|
+
Requires-Dist: langgraph-runtime-inmem<0.32.0.dev0,>=0.31.0.dev0
|
|
20
20
|
Requires-Dist: langgraph-sdk>=0.3.5
|
|
21
21
|
Requires-Dist: langgraph<2,>=0.4.10
|
|
22
22
|
Requires-Dist: langsmith[otel]>=0.6.3
|
|
@@ -29,7 +29,7 @@ Requires-Dist: prometheus-client>=0.0.1
|
|
|
29
29
|
Requires-Dist: protobuf<7.0.0,>=6.32.1
|
|
30
30
|
Requires-Dist: pyjwt>=2.9.0
|
|
31
31
|
Requires-Dist: sse-starlette<3.4.0,>=2.1.3
|
|
32
|
-
Requires-Dist: starlette>=0.
|
|
32
|
+
Requires-Dist: starlette>=1.0.1
|
|
33
33
|
Requires-Dist: structlog<26,>=24.1.0
|
|
34
34
|
Requires-Dist: tenacity>=8.0.0
|
|
35
35
|
Requires-Dist: truststore>=0.1
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
langgraph_api/__init__.py,sha256=
|
|
1
|
+
langgraph_api/__init__.py,sha256=mzCq9ao7wd_jc3lCw0iVM8xH_PSoH6l9__-Lr3pxcKA,28
|
|
2
2
|
langgraph_api/_factory_utils.py,sha256=5JsiJbg_YocVSryN2jwoZTg03-eyymlWMK6sKCmXwz0,5756
|
|
3
3
|
langgraph_api/asgi_transport.py,sha256=XApY3lIWBZTMbbsl8dDJzl0cLGirmAGE0SifqZUnXvs,11896
|
|
4
4
|
langgraph_api/asyncio.py,sha256=c-YE-14N7_AP1GzifsbP14XnhLsmxT2P916KXruerpI,10573
|
|
@@ -14,15 +14,16 @@ langgraph_api/http_metrics.py,sha256=etxbZNmYxdb58DVLNkHP7S-N6njXPTiQh2OWKMaIZi8
|
|
|
14
14
|
langgraph_api/http_metrics_utils.py,sha256=sjxF7SYGTzY0Wz_G0dzatsYNnWr31S6ujej4JmBG2yo,866
|
|
15
15
|
langgraph_api/logging.py,sha256=V1RCnqVLuMvJtrBiyMMLfaEdbS3k5A2M8Unhr4FUUdQ,6801
|
|
16
16
|
langgraph_api/metadata.py,sha256=ih2et_R0prFsCzikQ4_L0j9up7t0rObAMVKyEk7ienI,9778
|
|
17
|
-
langgraph_api/
|
|
17
|
+
langgraph_api/metrics_collector.py,sha256=gMLHL18rJyYl985AOmu9eH7W1ttdRdkPHzeyczjCOBw,8280
|
|
18
|
+
langgraph_api/metrics_otlp.py,sha256=t9oJrxfxY2O5jY4JW2gONPKoBiBuklhzCrnZvn1qTxQ,28730
|
|
18
19
|
langgraph_api/otel_context.py,sha256=DWFwW4Yu88QY4W2J0IRcURR450Th9J2DupvDDkSkMBA,7166
|
|
19
20
|
langgraph_api/patch.py,sha256=ViUknYvyQWS6y0f5XuaEoci2qB_mQv8vZl-oaUxsI6M,1448
|
|
20
|
-
langgraph_api/queue_entrypoint.py,sha256
|
|
21
|
+
langgraph_api/queue_entrypoint.py,sha256=-9YnY_GhmDxEiGCc3k-7UqRKK_M3dPriits2iGgYlgU,11327
|
|
21
22
|
langgraph_api/release_tags.py,sha256=BjgGj2vFcA7I0MDRXLw1sUA4jquz-DaKVS0Eq-dYSjE,9091
|
|
22
23
|
langgraph_api/route.py,sha256=_KE8A8Q-J-QfqjGlyM2Kc6n5cirmgt8xmI5-pI8kVEE,8837
|
|
23
24
|
langgraph_api/schema.py,sha256=I_ciXy4YE3Ri4-PAWIvqLNRH2FpC4goTkKwfFwk6wIQ,15100
|
|
24
25
|
langgraph_api/self_hosted_logs.py,sha256=FoUkPdtpt-nuEhejne8o1Q2phE9CccoHdoR_PvXPcBU,4442
|
|
25
|
-
langgraph_api/self_hosted_metrics.py,sha256=
|
|
26
|
+
langgraph_api/self_hosted_metrics.py,sha256=pWsQQ-2ukoFIbmVfzNOSkwCqZ5Cnts6pRSWTII44Ll4,16844
|
|
26
27
|
langgraph_api/serde.py,sha256=V3fO9bkUOlBX3okw5Qi31nlcr59fcuXMgL7DHNyarZY,8855
|
|
27
28
|
langgraph_api/server.py,sha256=1eAZPim0Pkgh5oGS4EvW-_7Zh_82iGOZtR1rpX08FoA,11216
|
|
28
29
|
langgraph_api/sse.py,sha256=cChZ7raQUHp8p5BreE_5wMBR8lFO0n7746sV8_HQOrc,4822
|
|
@@ -41,7 +42,7 @@ langgraph_api/api/__init__.py,sha256=Zu1ew3dxYZu7cLRAjn-6HcYmtuQBdihlVFMKMJ77Y3c
|
|
|
41
42
|
langgraph_api/api/a2a.py,sha256=VPllgqfoLUQD6Eqob3RjcegjtKgLhphNGTrTqbNLoIY,95135
|
|
42
43
|
langgraph_api/api/assistants.py,sha256=4v1TpkeeSF7vFrbnOKIvh7BY4K0WamzEdMeTAzwRElE,20786
|
|
43
44
|
langgraph_api/api/event_streaming.py,sha256=nvoaKz4QGklX5YUmY9WQ3vSwhQ1Q81QeQWNR8aEXUz8,17571
|
|
44
|
-
langgraph_api/api/meta.py,sha256=
|
|
45
|
+
langgraph_api/api/meta.py,sha256=4vqfgJYIqaqwZPfdmvHN00Co7SdeYSnQ1STxyvGWVDw,4698
|
|
45
46
|
langgraph_api/api/openapi.py,sha256=Zkdlb9mjrQyHro1TtrDIWVuaBDovxx-uGWJ1fZMOg54,12604
|
|
46
47
|
langgraph_api/api/profile.py,sha256=CA1ZkHALOuP8orYTICnEhcG_JnnA2wnyjbWyeb117jA,3455
|
|
47
48
|
langgraph_api/api/runs.py,sha256=h5droLgaz_aAyILCRJIpbj2KH1PbijCeXcggOSa3Zww,35178
|
|
@@ -63,7 +64,7 @@ langgraph_api/auth/studio_user.py,sha256=gNCicIo6cYaHmFj2sEdsvDYkKW7NWfGXGS2tTAM
|
|
|
63
64
|
langgraph_api/auth/langsmith/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
65
|
langgraph_api/auth/langsmith/backend.py,sha256=Y6-VxD7zfV1jzGdjmQ66CgNa3SenLbo3d_375CcKZ9U,3770
|
|
65
66
|
langgraph_api/auth/langsmith/client.py,sha256=79kwCVeHU64nsHsxWipfZhf44lM6vfs2nlfTxlJF6LU,4142
|
|
66
|
-
langgraph_api/config/__init__.py,sha256=
|
|
67
|
+
langgraph_api/config/__init__.py,sha256=qj7HF1XmojpG6WXahlFxrMZiO8hUI6QlPNVT-32xfvs,25528
|
|
67
68
|
langgraph_api/config/_parse.py,sha256=VXQPKzqtIsZrRy-nUEBMDESBxXzqFRQNiqsvAZeX3HU,3921
|
|
68
69
|
langgraph_api/config/schemas.py,sha256=rYqu67fZxmtCOU-Zc1s3265KbRbqK8PmfvfwvrAmd-Q,20863
|
|
69
70
|
langgraph_api/encryption/__init__.py,sha256=gaCZ00CocSbqSqrDn6XJHaSp2CZCnC8qnrD9G4fbzyI,363
|
|
@@ -125,7 +126,7 @@ langgraph_api/lc_security/policy.py,sha256=tW0IACvPCeJZbcsUKv9egk0LapF5gL3hxF9Ao
|
|
|
125
126
|
langgraph_api/lc_security/transport.py,sha256=AgiAsPLNP135ag30XPwVDkMNhomiYJwGYUKs0TfaWSI,7183
|
|
126
127
|
langgraph_api/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
127
128
|
langgraph_api/middleware/ensure_store.py,sha256=KzgAsLPloPD8mcQG-5v7kguzDMFrs9PJz2-xK5-rV0Q,1351
|
|
128
|
-
langgraph_api/middleware/http_logger.py,sha256=
|
|
129
|
+
langgraph_api/middleware/http_logger.py,sha256=jjqLBPqoGRC1UfB2VYKPY2tkq6gT7Rm88DnAHBlwuTw,8231
|
|
129
130
|
langgraph_api/middleware/private_network.py,sha256=eQEzWI8epBNUCiNsMu9O27ofHBQ45M0p2OZy5YdUYos,2097
|
|
130
131
|
langgraph_api/middleware/request_id.py,sha256=-p230Q5jDJAJLmSZRqQvB4dFFkJS9B4Vwg6pUgQtI24,1259
|
|
131
132
|
langgraph_api/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -228,8 +229,8 @@ langgraph_grpc_common/proto/errors_pb2.py,sha256=JI6x-vBK1AE7DHZ5DQwN1mZWF6C4xTR
|
|
|
228
229
|
langgraph_grpc_common/proto/errors_pb2.pyi,sha256=rd3-BYUH8V-aO66taL7OOblaLgdrDtf1Vcd38GUoVVM,2181
|
|
229
230
|
langgraph_grpc_common/proto/errors_pb2_grpc.py,sha256=2-LwQ0OPGo-NtC0269q7Fw6GPBxnTLYWq3xP5Eq0_YA,886
|
|
230
231
|
langgraph_grpc_common/proto/errors_pb2_grpc.pyi,sha256=uC9Wnq6uyg488QiONpJ0ba1s_iouQCOYsjd_FDd1XUM,495
|
|
231
|
-
langgraph_api-0.
|
|
232
|
-
langgraph_api-0.
|
|
233
|
-
langgraph_api-0.
|
|
234
|
-
langgraph_api-0.
|
|
235
|
-
langgraph_api-0.
|
|
232
|
+
langgraph_api-0.12.0.dev1.dist-info/METADATA,sha256=T_4d-LSGhM29_XbrhbUhbSvblmXtgsfPeDugmsCBuLU,4629
|
|
233
|
+
langgraph_api-0.12.0.dev1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
234
|
+
langgraph_api-0.12.0.dev1.dist-info/entry_points.txt,sha256=hGedv8n7cgi41PypMfinwS_HfCwA7xJIfS0jAp8htV8,78
|
|
235
|
+
langgraph_api-0.12.0.dev1.dist-info/licenses/LICENSE,sha256=ZPwVR73Biwm3sK6vR54djCrhaRiM4cAD2zvOQZV8Xis,3859
|
|
236
|
+
langgraph_api-0.12.0.dev1.dist-info/RECORD,,
|
|
File without changes
|
{langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|