PyPI - langgraph-api - Versions diffs - 0.11.0.dev9__py3-none-any.whl → 0.12.0.dev1__py3-none-any.whl - Mend

langgraph-api 0.11.0.dev9py3-none-any.whl → 0.12.0.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

langgraph_api/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.11.0.~~dev9~~"
1	+ __version__ = "0.12.0.dev1"

langgraph_api/api/meta.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import langgraph.version
 import structlog
+from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
 from starlette.responses import JSONResponse, PlainTextResponse
 from langgraph_api import __version__, config, metadata
@@ -46,54 +47,6 @@ def _merge_pool_stats(local: PoolStats, remote: PoolStats) -> PoolStats:
     return merged
-def _pool_stats_to_prometheus_lines(
-    stats: PoolStats,
-    project_id: str | None,
-    revision_id: str | None,
-    deployment_type: str = "",
-) -> list[str]:
-    """Format merged pool stats as Prometheus text lines (same format as langgraph_runtime.database.pool_stats)."""
-    labels = f'project_id="{project_id}", revision_id="{revision_id}", deployment_type="{deployment_type}"'
-    lines = []
-    if "postgres" in stats:
-        pg = stats["postgres"]
-        lines.extend(
-            [
-                "# HELP lg_api_pg_pool_max The maximum size of the postgres connection pool.",
-                "# TYPE lg_api_pg_pool_max gauge",
-                f"lg_api_pg_pool_max{{{labels}}} {pg.get('pool_max', 0)}",
-                "# HELP lg_api_pg_pool_size Number of connections currently managed by the postgres connection pool (in the pool, given to clients, being prepared)",
-                "# TYPE lg_api_pg_pool_size gauge",
-                f"lg_api_pg_pool_size{{{labels}}} {pg.get('pool_size', 0)}",
-                "# HELP lg_api_pg_pool_available Number of connections currently idle in the postgres connection pool",
-                "# TYPE lg_api_pg_pool_available gauge",
-                f"lg_api_pg_pool_available{{{labels}}} {pg.get('pool_available', 0)}",
-                "# HELP lg_api_pg_pool_requests_queued Number of postgres connection requests queued because a postgres connection wasn't immediately available in the pool",
-                "# TYPE lg_api_pg_pool_requests_queued counter",
-                f"lg_api_pg_pool_requests_queued{{{labels}}} {pg.get('requests_queued', 0)}",
-                "# HELP lg_api_pg_pool_requests_errors Number of postgres connection requests resulting in an error (timeouts, queue full...)",
-                "# TYPE lg_api_pg_pool_requests_errors counter",
-                f"lg_api_pg_pool_requests_errors{{{labels}}} {pg.get('requests_errors', 0)}",
-            ]
-        )
-    if "redis" in stats:
-        rd = stats["redis"]
-        lines.extend(
-            [
-                "# HELP lg_api_redis_pool_available Number of connections currently idle in the redis connection pool",
-                "# TYPE lg_api_redis_pool_available gauge",
-                f"lg_api_redis_pool_available{{{labels}}} {rd.get('idle_connections', 0)}",
-                "# HELP lg_api_redis_pool_size Number of connections currently in use in the redis connection pool",
-                "# TYPE lg_api_redis_pool_size gauge",
-                f"lg_api_redis_pool_size{{{labels}}} {rd.get('in_use_connections', 0)}",
-                "# HELP lg_api_redis_pool_max The maximum size of the redis connection pool.",
-                "# TYPE lg_api_redis_pool_max gauge",
-                f"lg_api_redis_pool_max{{{labels}}} {rd.get('max_connections', 0)}",
-            ]
-        )
-    return lines
 async def _grpc_pool_stats() -> PoolStats:
     """Fetch connection pool stats from the Core API (Go) via gRPC for metrics aggregation. Returns {} on error."""
     if not IS_POSTGRES_OR_GRPC_BACKEND:
@@ -107,21 +60,12 @@ async def _grpc_pool_stats() -> PoolStats:
         return {}
-async def meta_pool_stats(metrics_format: str) -> PoolStats | list[str]:
+async def meta_pool_stats() -> PoolStats:
     local_pool_stats: PoolStats = pool_stats()
     # Aggregate with Core API (Go) pool stats when using gRPC backend
     grpc_pool_stats = await _grpc_pool_stats()
-    merged_pool_stats = _merge_pool_stats(local_pool_stats, grpc_pool_stats)
-    if metrics_format == "prometheus":
-        return _pool_stats_to_prometheus_lines(
-            merged_pool_stats,
-            metadata.PROJECT_ID,
-            metadata.HOST_REVISION_ID,
-            metadata.DEPLOYMENT_TYPE,
-        )
-    else:
-        return merged_pool_stats
+    return _merge_pool_stats(local_pool_stats, grpc_pool_stats)
 async def meta_info(request: ApiRequest):
@@ -153,81 +97,26 @@ async def meta_metrics(request: ApiRequest):
     if metrics_format not in METRICS_FORMATS:
         metrics_format = "prometheus"
-    # collect stats
-    metrics = get_metrics()
-    worker_metrics = metrics["workers"]
-    workers_max = worker_metrics["max"]
-    workers_active = worker_metrics["active"]
-    workers_available = worker_metrics["available"]
+    if metrics_format == "prometheus":
+        # Served straight from the OTLP Prometheus client's registry (see
+        # metrics_otlp._LSDPrometheusReader).
+        return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)
+    # JSON: hand-built snapshot of workers, queue depth, HTTP, and pool stats.
+    worker_metrics = get_metrics()["workers"]
     http_metrics = HTTP_METRICS_COLLECTOR.get_metrics(
         metadata.PROJECT_ID,
         metadata.HOST_REVISION_ID,
         metrics_format,
         metadata.DEPLOYMENT_TYPE,
     )
-    merged_pool_stats = await meta_pool_stats(metrics_format)
-    if metrics_format == "json":
-        async with connect() as conn:
-            resp = {
-                **merged_pool_stats,
-                "queue": await Runs.stats(conn),
-                **http_metrics,
-            }
-            if config.N_JOBS_PER_WORKER > 0:
-                resp["workers"] = worker_metrics
-            return JSONResponse(resp)
-    elif metrics_format == "prometheus":
-        metrics = []
-        try:
-            async with connect() as conn:
-                queue_stats = await Runs.stats(conn)
-                labels = f'project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}", deployment_type="{metadata.DEPLOYMENT_TYPE}"'
-                metrics.extend(
-                    [
-                        "# HELP lg_api_num_pending_runs The number of runs currently pending.",
-                        "# TYPE lg_api_num_pending_runs gauge",
-                        f"lg_api_num_pending_runs{{{labels}}} {queue_stats['n_pending']}",
-                        "# HELP lg_api_num_running_runs The number of runs currently running.",
-                        "# TYPE lg_api_num_running_runs gauge",
-                        f"lg_api_num_running_runs{{{labels}}} {queue_stats['n_running']}",
-                        "# HELP lg_api_pending_runs_wait_time_max The maximum time a run has been pending, in seconds.",
-                        "# TYPE lg_api_pending_runs_wait_time_max gauge",
-                        f"lg_api_pending_runs_wait_time_max{{{labels}}} {queue_stats.get('pending_runs_wait_time_max_secs') or 0}",
-                        "# HELP lg_api_pending_runs_wait_time_med The median pending wait time across runs, in seconds.",
-                        "# TYPE lg_api_pending_runs_wait_time_med gauge",
-                        f"lg_api_pending_runs_wait_time_med{{{labels}}} {queue_stats.get('pending_runs_wait_time_med_secs') or 0}",
-                        "# HELP lg_api_pending_unblocked_runs_wait_time_max The maximum time a run has been pending excluding runs blocked by another run on the same thread, in seconds.",
-                        "# TYPE lg_api_pending_unblocked_runs_wait_time_max gauge",
-                        f"lg_api_pending_unblocked_runs_wait_time_max{{{labels}}} {queue_stats.get('pending_unblocked_runs_wait_time_max_secs') or 0}",
-                    ]
-                )
-        except Exception as e:
-            await logger.awarning(
-                "Ignoring error while getting run stats for /metrics", exc_info=e
-            )
+    merged_pool_stats = await meta_pool_stats()
+    async with connect() as conn:
+        resp = {
+            **merged_pool_stats,
+            "queue": await Runs.stats(conn),
+            **http_metrics,
+        }
         if config.N_JOBS_PER_WORKER > 0:
-            worker_labels = f'project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}", deployment_type="{metadata.DEPLOYMENT_TYPE}"'
-            metrics.extend(
-                [
-                    "# HELP lg_api_workers_max The maximum number of workers available.",
-                    "# TYPE lg_api_workers_max gauge",
-                    f"lg_api_workers_max{{{worker_labels}}} {workers_max}",
-                    "# HELP lg_api_workers_active The number of currently active workers.",
-                    "# TYPE lg_api_workers_active gauge",
-                    f"lg_api_workers_active{{{worker_labels}}} {workers_active}",
-                    "# HELP lg_api_workers_available The number of available (idle) workers.",
-                    "# TYPE lg_api_workers_available gauge",
-                    f"lg_api_workers_available{{{worker_labels}}} {workers_available}",
-                ]
-            )
-        metrics.extend(http_metrics)
-        metrics.extend(merged_pool_stats)
-        metrics_response = "\n".join(metrics)
-        return PlainTextResponse(metrics_response)
+            resp["workers"] = worker_metrics
+        return JSONResponse(resp)

langgraph_api/config/__init__.py CHANGED Viewed

@@ -577,8 +577,13 @@ METRIC_MAX_EMITTING_TIER = env(
     "METRIC_MAX_EMITTING_TIER", cast=int, default=_METRIC_MAX_EMITTING_TIER_DEFAULT
 )
 DATADOG_METRICS_ENABLED = bool(LSD_DD_API_KEY)
-LSD_PROM_METRICS_ENABLED = env("LSD_PROM_METRICS_ENABLED", cast=bool, default=False)
-LSD_PROM_METRICS_PORT = env("LSD_PROM_METRICS_PORT", cast=int, default=9464)
+# When true, the Prometheus scrape (/metrics) exposes ALL metrics, not just the
+# lsd_web_metric (Deployment-UI) set. Record-time tier filtering
+# (METRIC_MAX_EMITTING_TIER) still applies, so internal metrics must be within the
+# max emitting tier to be recorded at all.
+EXPOSE_INTERNAL_METRICS_PROMETHEUS = env(
+    "EXPOSE_INTERNAL_METRICS_PROMETHEUS", cast=bool, default=False
+)
 LANGGRAPH_LOGS_ENDPOINT = env("LANGGRAPH_LOGS_ENDPOINT", cast=str, default=None)
 LANGGRAPH_LOGS_ENABLED = env("LANGGRAPH_LOGS_ENABLED", cast=bool, default=False)
@@ -635,6 +640,7 @@ __all__ = [
     "CRON_SCHEDULER_SLEEP_TIME",
     "DATABASE_URI",
     "DATADOG_METRICS_ENABLED",
+    "EXPOSE_INTERNAL_METRICS_PROMETHEUS",
     "FF_CRONS_ENABLED",
     "FF_LOG_DROPPED_EVENTS",
     "FF_LOG_QUERY_AND_PARAMS",
@@ -672,8 +678,6 @@ __all__ = [
     "LSD_GRPC_SERVER_ADDRESS",
     "LSD_GRPC_SERVER_MAX_RECV_MSG_BYTES",
     "LSD_GRPC_SERVER_MAX_SEND_MSG_BYTES",
-    "LSD_PROM_METRICS_ENABLED",
-    "LSD_PROM_METRICS_PORT",
     "MAX_STREAM_CHUNK_SIZE_BYTES",
     "METRIC_MAX_EMITTING_TIER",
     "METRIC_PREFIX",

langgraph_api/metrics_collector.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""Periodic collector that pushes snapshot/state metrics to the OTLP client.
+This background task samples the same sources every ``STATS_INTERVAL_SECS`` and records them via the reporter.
+The loop runs in **every** process (on postgres both the API server and the
+dedicated queue worker share the same lifespan; inmem is a single process). Each
+metric group self-gates so it lands on the right process:
+- **worker gauges** — recorded wherever workers run (``N_JOBS_PER_WORKER > 0``):
+  the queue worker, or a combined single-process deployment. A distributed API
+  process (``N_JOBS_PER_WORKER == 0``) has no workers and skips them.
+- **queue depth** (``num_pending_runs``/``num_running_runs``) — a single global
+  value from ``Runs.stats`` (a gRPC call to the Go core). Emitted by the **API
+  process only** (``not IS_QUEUE_ENTRYPOINT``) on the **postgres** runtime; inmem
+  skips the DB round-trip entirely.
+- **Postgres + Redis pool stats** — recorded on **both** processes (postgres
+  runtime only), each reporting its own pools via ``meta_pool_stats()``, which
+  merges the local Python pools with the Go-core pools. Redis stats in particular
+  come from the local Python pool — the Go core omits them unless it has a
+  non-cluster redis client — so a Go-core-only source would drop them. inmem has
+  no real Postgres/Redis pools, so nothing is reported.
+The two pool request counters are cumulative, so we push the delta since the
+previous sample (OTLP counters are additive).
+The loop also logs the same samples (``Worker stats``, ``Postgres pool stats``,
+``Redis pool stats``) — folding in what the legacy per-process ``stats_loop``
+functions used to log.
+"""
+from __future__ import annotations
+import asyncio
+import structlog
+from langgraph_api import config
+from langgraph_api.api.meta import meta_pool_stats
+from langgraph_api.feature_flags import IS_POSTGRES_OR_GRPC_BACKEND
+from langgraph_api.metrics_otlp import (
+    COUNTER_PG_POOL_REQUESTS_ERRORS,
+    COUNTER_PG_POOL_REQUESTS_QUEUED,
+    GAUGE_NUM_PENDING_RUNS,
+    GAUGE_NUM_RUNNING_RUNS,
+    GAUGE_PG_POOL_AVAILABLE,
+    GAUGE_PG_POOL_MAX,
+    GAUGE_PG_POOL_SIZE,
+    GAUGE_REDIS_POOL_AVAILABLE,
+    GAUGE_REDIS_POOL_MAX,
+    GAUGE_REDIS_POOL_SIZE,
+    GAUGE_WORKERS_ACTIVE,
+    GAUGE_WORKERS_AVAILABLE,
+    GAUGE_WORKERS_MAX,
+    get_otlp_metrics_reporter,
+)
+from langgraph_runtime.database import connect
+from langgraph_runtime.metrics import get_metrics
+if IS_POSTGRES_OR_GRPC_BACKEND:
+    from langgraph_api.grpc.ops import Runs
+else:
+    from langgraph_runtime.ops import Runs
+logger = structlog.stdlib.get_logger(__name__)
+async def _collect_queue_and_workers(reporter) -> None:
+    """Worker gauges (where workers run) + queue depth (API process only).
+    Worker counts are local to the process running this loop and emitted wherever
+    workers run (``N_JOBS_PER_WORKER > 0``) — the queue worker or a combined
+    single-process deployment; a distributed API process (N_JOBS == 0) skips them.
+    Queue depth is a single global value (from the run table, via ``Runs.stats`` —
+    a gRPC call to the Go core) and is emitted by the **API process only**
+    (``not IS_QUEUE_ENTRYPOINT``) on **postgres**: inmem skips the DB round-trip,
+    and the dedicated queue worker leaves it to the API process so the global value
+    is not double-reported across the queue/API split.
+    """
+    if config.N_JOBS_PER_WORKER > 0:
+        workers = get_metrics()["workers"]
+        reporter.record_gauge(GAUGE_WORKERS_MAX, workers["max"])
+        reporter.record_gauge(GAUGE_WORKERS_ACTIVE, workers["active"])
+        reporter.record_gauge(GAUGE_WORKERS_AVAILABLE, workers["available"])
+        await logger.ainfo(
+            "Worker stats",
+            max=workers["max"],
+            active=workers["active"],
+            available=workers["available"],
+        )
+    # Queue depth is read from the run table via Runs.stats (a gRPC call to the
+    # Go core on postgres). Emitted by the API process only — the queue worker
+    # (IS_QUEUE_ENTRYPOINT) skips it. inmem skips the DB round-trip and reports
+    # nothing.
+    if IS_POSTGRES_OR_GRPC_BACKEND and not config.IS_QUEUE_ENTRYPOINT:
+        async with connect() as conn:
+            stats = await Runs.stats(conn)
+        reporter.record_gauge(GAUGE_NUM_PENDING_RUNS, stats["n_pending"])
+        reporter.record_gauge(GAUGE_NUM_RUNNING_RUNS, stats["n_running"])
+async def _collect_pool(reporter, prev_counters: dict[str, int]) -> None:
+    """Postgres + Redis pool gauges + cumulative request counters.
+    Postgres runtime only, recorded on **both** processes (API server and queue
+    worker) — each reports its own pools. Uses ``meta_pool_stats()``, which merges
+    the local Python pools with the Go-core pools — matching the legacy /metrics.
+    Redis stats come from the local Python pool (the Go core omits them unless it
+    has a non-cluster redis client), so a Go-core-only source would drop them.
+    """
+    # Limitation: under BG_JOB_ISOLATED_LOOPS each worker runs in its own thread with its own
+    # thread-local pg pool and redis client. This collector runs on the main
+    # thread, so meta_pool_stats() -> _get_pool()/redis_stats() only sees the main
+    # thread's pool (redis_stats() reads the global client unconditionally), and
+    # the per-thread isolated pools are NOT aggregated — so pg/redis pool gauges
+    # and the pg request counters under-report in isolated-loop mode.
+    stats = await meta_pool_stats()
+    pg = stats.get("postgres") or {}
+    if pg:
+        reporter.record_gauge(GAUGE_PG_POOL_MAX, pg.get("pool_max", 0))
+        reporter.record_gauge(GAUGE_PG_POOL_SIZE, pg.get("pool_size", 0))
+        reporter.record_gauge(GAUGE_PG_POOL_AVAILABLE, pg.get("pool_available", 0))
+        # Cumulative counters: record the delta since the last sample. Emit on a
+        # non-negative delta (>= 0) so the counter is created and reported from the
+        # first sample even when it is 0 — the legacy /metrics always reported
+        # these. Negative deltas (Go-core pool counter resets) are skipped to keep
+        # the OTLP counter monotonic.
+        for key, metric in (
+            ("requests_queued", COUNTER_PG_POOL_REQUESTS_QUEUED),
+            ("requests_errors", COUNTER_PG_POOL_REQUESTS_ERRORS),
+        ):
+            current = pg.get(key, 0)
+            delta = current - prev_counters.get(key, 0)
+            if delta >= 0:
+                reporter.inc_counter(metric, delta)
+            prev_counters[key] = current
+        await logger.ainfo("Postgres pool stats", **pg)
+    redis = stats.get("redis") or {}
+    if redis:
+        reporter.record_gauge(
+            GAUGE_REDIS_POOL_AVAILABLE, redis.get("idle_connections", 0)
+        )
+        reporter.record_gauge(GAUGE_REDIS_POOL_SIZE, redis.get("in_use_connections", 0))
+        reporter.record_gauge(GAUGE_REDIS_POOL_MAX, redis.get("max_connections", 0))
+        await logger.ainfo("Redis pool stats", **redis)
+async def _collect_once(prev_counters: dict[str, int]) -> None:
+    reporter = get_otlp_metrics_reporter()
+    if not reporter.enabled:
+        return
+    # Worker gauges are emitted wherever workers run; _collect_queue_and_workers
+    # adds queue depth on the API process only (postgres; inmem skips the DB
+    # round-trip).
+    try:
+        await _collect_queue_and_workers(reporter)
+    except Exception as exc:
+        await logger.awarning(
+            "metrics collector: queue/worker sample failed", exc_info=exc
+        )
+    # Postgres/Redis pools live in the Go core (no real pools on inmem).
+    if IS_POSTGRES_OR_GRPC_BACKEND:
+        try:
+            await _collect_pool(reporter, prev_counters)
+        except Exception as exc:
+            await logger.awarning("metrics collector: pool sample failed", exc_info=exc)
+async def collector_loop() -> None:
+    """Sample snapshot metrics into the OTLP client every STATS_INTERVAL_SECS."""
+    interval = config.STATS_INTERVAL_SECS
+    prev_counters: dict[str, int] = {}
+    await logger.ainfo("Starting OTLP metrics collector loop", interval_secs=interval)
+    try:
+        while True:
+            await _collect_once(prev_counters)
+            await asyncio.sleep(interval)
+    except asyncio.CancelledError:
+        pass

langgraph_api/metrics_otlp.py CHANGED Viewed

@@ -4,18 +4,20 @@ import os
 import threading
 import time
 from contextlib import contextmanager
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from datetime import timedelta
 from typing import TYPE_CHECKING, Any, Literal
 import structlog
-from langgraph_api import __version__, config
+from langgraph_api import __version__, config, metadata
 if TYPE_CHECKING:
     from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
         OTLPMetricExporter,
     )
+    from opentelemetry.exporter.prometheus import PrometheusMetricReader
+    from opentelemetry.metrics import Observation
     from opentelemetry.sdk.metrics import Counter, Histogram, MeterProvider
     from opentelemetry.sdk.metrics.export import (
         AggregationTemporality,
@@ -28,6 +30,7 @@ else:
         from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
             OTLPMetricExporter,
         )
+        from opentelemetry.metrics import Observation
         from opentelemetry.sdk.metrics import Counter, Histogram, MeterProvider
         from opentelemetry.sdk.metrics.export import (
             AggregationTemporality,
@@ -39,6 +42,7 @@ else:
         OTEL_AVAILABLE = True
     except ModuleNotFoundError:
         OTLPMetricExporter = None
+        Observation = None
         MeterProvider = None
         PeriodicExportingMetricReader = None
         Resource = None
@@ -50,12 +54,11 @@ else:
     try:
         from opentelemetry.exporter.prometheus import PrometheusMetricReader
-        from prometheus_client import start_http_server
         PROMETHEUS_EXPORTER_AVAILABLE = True
     except ModuleNotFoundError:
-        PrometheusMetricReader = None
-        start_http_server = None
+        # initialize as empty object to prevent breaking downstream inheritancei with _LSDPrometheusReader
+        PrometheusMetricReader = object
         PROMETHEUS_EXPORTER_AVAILABLE = False
 logger = structlog.stdlib.get_logger(__name__)
@@ -79,28 +82,62 @@ class MetricDef:
     metric_type: MetricType
     name: str
     tier: int
-def def_counter(name: str, tier: int) -> MetricDef:
+    # True for metrics surfaced on the LSD Deployment UI. This flag partitions the
+    # two backends: the Prometheus scrape endpoint serves only these (see
+    # _LSDPrometheusReader) so GCP indexes just the Deployment-UI metrics, while
+    # Datadog gets only the internal complement (see _DatadogExporter).
+    lsd_web_metric: bool = False
+    # Human-readable help text. Passed to the OTel instrument as its description,
+    # which the Prometheus exporter exposes as the metric's ``# HELP`` line.
+    description: str = ""
+def def_counter(
+    name: str, tier: int, lsd_web_metric: bool = False, description: str = ""
+) -> MetricDef:
     return MetricDef(
-        metric_type="counter", name=f"{METRIC_NAME_PREFIX}{name}", tier=tier
+        metric_type="counter",
+        name=f"{METRIC_NAME_PREFIX}{name}",
+        tier=tier,
+        lsd_web_metric=lsd_web_metric,
+        description=description,
     )
-def def_histogram(name: str, tier: int) -> MetricDef:
+def def_histogram(
+    name: str, tier: int, lsd_web_metric: bool = False, description: str = ""
+) -> MetricDef:
     return MetricDef(
-        metric_type="histogram", name=f"{METRIC_NAME_PREFIX}{name}", tier=tier
+        metric_type="histogram",
+        name=f"{METRIC_NAME_PREFIX}{name}",
+        tier=tier,
+        lsd_web_metric=lsd_web_metric,
+        description=description,
     )
-def def_latency(name: str, tier: int) -> MetricDef:
+def def_latency(
+    name: str, tier: int, lsd_web_metric: bool = False, description: str = ""
+) -> MetricDef:
     return MetricDef(
-        metric_type="latency", name=f"{METRIC_NAME_PREFIX}{name}", tier=tier
+        metric_type="latency",
+        name=f"{METRIC_NAME_PREFIX}{name}",
+        tier=tier,
+        lsd_web_metric=lsd_web_metric,
+        description=description,
     )
-def def_gauge(name: str, tier: int) -> MetricDef:
-    return MetricDef(metric_type="gauge", name=f"{METRIC_NAME_PREFIX}{name}", tier=tier)
+def def_gauge(
+    name: str, tier: int, lsd_web_metric: bool = False, description: str = ""
+) -> MetricDef:
+    return MetricDef(
+        metric_type="gauge",
+        name=f"{METRIC_NAME_PREFIX}{name}",
+        tier=tier,
+        lsd_web_metric=lsd_web_metric,
+        description=description,
+    )
 # Pre-defined counter metrics.
@@ -152,23 +189,119 @@ COUNTER_PROTOCOL_V2_RESUME_GAP = def_counter(
 COUNTER_PROTOCOL_V2_TRANSPORT_SEND_FAILURE = def_counter(
     "protocol_v2_transport_send_failure_counter", METRIC_TIER_INFO
 )
+# Migrated from meta.py /metrics. Named to expose as `lg_api_http_requests_total`
+# (this exporter version does not double-append `_total`).
+COUNTER_HTTP_REQUESTS = def_counter(
+    "http_requests_total", METRIC_TIER_INFO, lsd_web_metric=True
+)
+# Migrated from meta.py /metrics. The exporter appends `_total` to counter names,
+# so these expose as `lg_api_pg_pool_requests_{queued,errors}_total` (idiomatic).
+COUNTER_PG_POOL_REQUESTS_QUEUED = def_counter(
+    "pg_pool_requests_queued",
+    METRIC_TIER_CRITICAL,
+    lsd_web_metric=True,
+    description=(
+        "Number of postgres connection requests queued because a postgres "
+        "connection wasn't immediately available in the pool"
+    ),
+)
+COUNTER_PG_POOL_REQUESTS_ERRORS = def_counter(
+    "pg_pool_requests_errors",
+    METRIC_TIER_CRITICAL,
+    lsd_web_metric=True,
+    description=(
+        "Number of postgres connection requests resulting in an error "
+        "(timeouts, queue full...)"
+    ),
+)
 # Pre-defined latency metrics.
 LATENCY_RUN_EXECUTION = def_latency("run_execution_latency", METRIC_TIER_INFO)
 LATENCY_RUN_QUEUE_WAIT_TIME_1ST_ATTEMPT = def_latency(
-    "run_queue_wait_time_1st_attempt", METRIC_TIER_INFO
+    "run_queue_wait_time_1st_attempt",
+    METRIC_TIER_INFO,
+    lsd_web_metric=True,
+    description=(
+        "Time (milliseconds) spent by jobs waiting in the queue"
+        " before getting processed for the first time. "
+    ),
 )
 LATENCY_RUN_QUEUE_WAIT_TIME_RETRY_ATTEMPT = def_latency(
     "run_queue_wait_time_retry_attempt", METRIC_TIER_INFO
 )
 LATENCY_STREAM_PUBLISH = def_latency("stream_publish_latency", METRIC_TIER_INFO)
+LATENCY_HTTP_REQUEST = def_latency(
+    "http_requests_latency",
+    METRIC_TIER_INFO,
+    lsd_web_metric=True,
+    description="HTTP request latency in milliseconds",
+)
-# Pre-defined gauge metrics.
-GAUGE_WORKERS_ACTIVE = def_gauge("workers_active", METRIC_TIER_CRITICAL)
-GAUGE_WORKERS_AVAILABLE = def_gauge("workers_available", METRIC_TIER_CRITICAL)
+GAUGE_WORKERS_ACTIVE = def_gauge(
+    "workers_active", METRIC_TIER_CRITICAL, lsd_web_metric=True
+)
+GAUGE_WORKERS_AVAILABLE = def_gauge(
+    "workers_available", METRIC_TIER_CRITICAL, lsd_web_metric=True
+)
 GAUGE_PUBLISH_QUEUE_AVAILABILITY = def_gauge(
     "publish_queue_availability", METRIC_TIER_CRITICAL
 )
+# Snapshot/state gauges pushed by the periodic
+# metrics collector loop (langgraph_api.metrics_collector).
+# Queue depth + workers_max are inmem-only (the Go core emits them on postgres);
+# pool stats are emitted on both runtimes.
+GAUGE_WORKERS_MAX = def_gauge("workers_max", METRIC_TIER_CRITICAL, lsd_web_metric=True)
+GAUGE_NUM_PENDING_RUNS = def_gauge(
+    "num_pending_runs",
+    METRIC_TIER_INFO,
+    lsd_web_metric=True,
+    description="The number of runs currently pending.",
+)
+GAUGE_NUM_RUNNING_RUNS = def_gauge(
+    "num_running_runs",
+    METRIC_TIER_INFO,
+    lsd_web_metric=True,
+    description="The number of runs currently running.",
+)
+GAUGE_PG_POOL_MAX = def_gauge(
+    "pg_pool_max",
+    METRIC_TIER_CRITICAL,
+    lsd_web_metric=True,
+    description="The maximum size of the postgres connection pool.",
+)
+GAUGE_PG_POOL_SIZE = def_gauge(
+    "pg_pool_size",
+    METRIC_TIER_CRITICAL,
+    lsd_web_metric=True,
+    description=(
+        "Number of connections currently managed by the postgres connection "
+        "pool (in the pool, given to clients, being prepared)"
+    ),
+)
+GAUGE_PG_POOL_AVAILABLE = def_gauge(
+    "pg_pool_available",
+    METRIC_TIER_INFO,
+    lsd_web_metric=True,
+    description="Number of connections currently idle in the postgres connection pool",
+)
+GAUGE_REDIS_POOL_AVAILABLE = def_gauge(
+    "redis_pool_available",
+    METRIC_TIER_INFO,
+    lsd_web_metric=True,
+    description="Number of connections currently idle in the redis connection pool",
+)
+GAUGE_REDIS_POOL_SIZE = def_gauge(
+    "redis_pool_size",
+    METRIC_TIER_INFO,
+    lsd_web_metric=True,
+    description="Number of connections currently in use in the redis connection pool",
+)
+GAUGE_REDIS_POOL_MAX = def_gauge(
+    "redis_pool_max",
+    METRIC_TIER_INFO,
+    lsd_web_metric=True,
+    description="The maximum size of the redis connection pool.",
+)
 # Protocol v2 sessions retain a bounded replay buffer per run. Track the
 # observed occupancy so operators can tune LSD_PROTOCOL_V2_BUFFER_SIZE before
 # reconnects start seeing resume gaps.
@@ -182,6 +315,71 @@ HISTOGRAM_PROTOCOL_V2_REPLAYED_EVENTS = def_histogram(
 )
+# Names of metrics surfaced on the LSD Deployment UI. By default the two metric
+# backends are partitioned by this set: the Prometheus scrape endpoint serves only
+# these (see ``_LSDPrometheusReader``), and Datadog receives only the complement
+# (see ``_DatadogExporter``). Setting EXPOSE_INTERNAL_METRICS_PROMETHEUS lifts the
+# Prometheus filter so it serves every metric. Computed from the definitions above.
+LSD_WEB_METRIC_NAMES: frozenset[str] = frozenset(
+    m.name for m in globals().values() if isinstance(m, MetricDef) and m.lsd_web_metric
+)
+def _select_metrics(metrics_data: Any, keep) -> Any:
+    """Return a copy of ``metrics_data`` keeping only metrics where ``keep(name)``.
+    Rebuilt with ``dataclasses.replace`` (never mutated in place) since the two
+    backend readers share the same SDK metric objects. Scope and resource groups
+    left empty by the filter are dropped entirely, so a non-empty
+    ``resource_metrics`` on the result guarantees at least one metric point.
+    """
+    if metrics_data is None or not metrics_data.resource_metrics:
+        return metrics_data
+    resource_metrics = []
+    for rm in metrics_data.resource_metrics:
+        scope_metrics = []
+        for sm in rm.scope_metrics:
+            kept = [m for m in sm.metrics if keep(m.name)]
+            if kept:
+                scope_metrics.append(replace(sm, metrics=kept))
+        if scope_metrics:
+            resource_metrics.append(replace(rm, scope_metrics=scope_metrics))
+    return replace(metrics_data, resource_metrics=resource_metrics)
+def _filter_web_metrics(metrics_data: Any) -> Any:
+    """Copy of ``metrics_data`` with only LSD web metrics (for Prometheus/GCP)."""
+    return _select_metrics(metrics_data, lambda name: name in LSD_WEB_METRIC_NAMES)
+def _drop_web_metrics(metrics_data: Any) -> Any:
+    """Copy of ``metrics_data`` without LSD web metrics (for Datadog)."""
+    return _select_metrics(metrics_data, lambda name: name not in LSD_WEB_METRIC_NAMES)
+class _LSDPrometheusReader(PrometheusMetricReader):
+    """The Prometheus reader for this service.
+    By default it serves only the LSD Deployment-UI (``lsd_web_metric``) set —
+    Prometheus feeds the LSD web UI, while internal metrics go to Datadog instead
+    (see ``_DatadogExporter``). When ``EXPOSE_INTERNAL_METRICS_PROMETHEUS`` is set,
+    the web filter is skipped and every recorded metric is exposed (record-time
+    tier filtering via ``METRIC_MAX_EMITTING_TIER`` still applies). The base
+    ``_receive_metrics`` simply hands the data to its collector, so filtering here
+    is sufficient.
+    """
+    def _receive_metrics(
+        self,
+        metrics_data: Any,
+        timeout_millis: float = 10_000,
+        **kwargs: Any,
+    ) -> None:
+        if not config.EXPOSE_INTERNAL_METRICS_PROMETHEUS:
+            metrics_data = _filter_web_metrics(metrics_data)
+        super()._receive_metrics(metrics_data, timeout_millis, **kwargs)
 def _normalize_emitting_tier(value: int) -> int:
     if value < METRIC_TIER_CRITICAL:
         return METRIC_TIER_CRITICAL
@@ -190,8 +388,15 @@ def _normalize_emitting_tier(value: int) -> int:
     return value
-class _FilteringExporter(MetricExporter):
-    """Wrapper that skips export when there are no metric points."""
+class _DatadogExporter(MetricExporter):
+    """Datadog exporter wrapper: drops LSD web metrics, and skips export when
+    nothing remains.
+    Web metrics are served to the LSD Deployment UI via Prometheus only (see
+    ``_LSDPrometheusReader``); Datadog receives only the internal complement. The
+    drop happens here rather than at record time because the same SDK metric
+    objects feed both backend readers.
+    """
     def __init__(self, exporter: MetricExporter):
         super().__init__()
@@ -205,16 +410,12 @@ class _FilteringExporter(MetricExporter):
         timeout_millis: float = 10_000,
         **kwargs: Any,
     ) -> Any:
-        if not metrics_data or not metrics_data.resource_metrics:
+        # _drop_web_metrics prunes emptied groups, so a non-empty resource_metrics
+        # guarantees there is at least one internal metric point left to export.
+        filtered = _drop_web_metrics(metrics_data)
+        if not filtered or not filtered.resource_metrics:
             return None
-        for resource_metric in metrics_data.resource_metrics:
-            if resource_metric.scope_metrics:
-                for scope_metric in resource_metric.scope_metrics:
-                    if scope_metric.metrics:
-                        return self._exporter.export(
-                            metrics_data, timeout_millis, **kwargs
-                        )
-        return None
+        return self._exporter.export(filtered, timeout_millis, **kwargs)
     def shutdown(self, timeout_millis: float = 30_000, **kwargs: Any) -> None:
         self._exporter.shutdown(timeout_millis, **kwargs)
@@ -232,8 +433,17 @@ class OTelMetricsReporter:
         self._meter = None
         self._max_tier = _normalize_emitting_tier(config.METRIC_MAX_EMITTING_TIER)
         self._instruments: dict[str, Any] = {}
+        # Initializes a gauge values cache that is read when querying /metrics.
+        #  sync gauges have a limitation of flapping values - it doesn't report the metric
+        #  if the value wasn't set recently. By using a cache, it consistently reports the metric
+        #  value when scraped.  Guarded by ``_gauge_lock`` because
+        # callbacks run on the SDK collection thread, ``record_gauge`` on others.
+        self._gauge_lock = threading.Lock()
+        self._gauge_values: dict[str, dict[tuple, tuple[dict[str, Any], float]]] = {}
+        self._observable_gauges: dict[str, Any] = {}
+        # Labels attached to every metric (set in ``initialize``)
+        self._common_attributes: dict[str, str] = {}
         self._prom_enabled = False
-        self._prom_server: Any | None = None
     @property
     def enabled(self) -> bool:
@@ -245,14 +455,6 @@ class OTelMetricsReporter:
                 return
             self._initialized = True
-            if (
-                not config.DATADOG_METRICS_ENABLED
-                and not config.LSD_PROM_METRICS_ENABLED
-            ):
-                logger.info(
-                    "OTel metrics disabled (no DD API key and Prometheus not enabled)"
-                )
-                return
             if not OTEL_AVAILABLE:
                 logger.warning(
                     "OTel metrics disabled because OpenTelemetry dependencies are not installed"
@@ -301,22 +503,23 @@ class OTelMetricsReporter:
                     readers.append(
                         PeriodicExportingMetricReader(
-                            _FilteringExporter(base_exporter),
+                            _DatadogExporter(base_exporter),
                             export_interval_millis=10_000,
                         )
                     )
-                if config.LSD_PROM_METRICS_ENABLED:
-                    if not PROMETHEUS_EXPORTER_AVAILABLE:
-                        logger.warning(
-                            "Prometheus metrics disabled: opentelemetry-exporter-prometheus not installed"
-                        )
-                    else:
-                        # PrometheusMetricReader registers its collector with the
-                        # global prometheus_client REGISTRY, which start_http_server
-                        # serves by default. MeterProvider.shutdown() unregisters it.
-                        readers.append(PrometheusMetricReader())
-                        self._prom_enabled = True
+                # Prometheus metrics are always exported (served via /metrics).
+                if not PROMETHEUS_EXPORTER_AVAILABLE:
+                    logger.error(
+                        "Prometheus metrics disabled: opentelemetry-exporter-prometheus not installed"
+                    )
+                else:
+                    # PrometheusMetricReader registers its collector with the
+                    # global prometheus_client REGISTRY, which the /metrics
+                    # endpoint serves via generate_latest() (see api/meta.py).
+                    # Prometheus serves only Deployment-UI metrics.
+                    readers.append(_LSDPrometheusReader())
+                    self._prom_enabled = True
                 if not readers:
                     logger.info(
@@ -328,6 +531,12 @@ class OTelMetricsReporter:
                     resource=resource, metric_readers=readers
                 )
                 self._meter = self._meter_provider.get_meter(SERVICE_NAME)
+                # Labels added to every metric, matching the legacy /metrics.
+                self._common_attributes = {
+                    "project_id": metadata.PROJECT_ID or "",
+                    "revision_id": metadata.HOST_REVISION_ID or "",
+                    "deployment_type": metadata.DEPLOYMENT_TYPE or "",
+                }
                 self._enabled = True
                 if config.DATADOG_METRICS_ENABLED:
@@ -335,8 +544,6 @@ class OTelMetricsReporter:
                         "Datadog OTLP metrics reader initialized",
                         endpoint=f"https://{config.LSD_DD_ENDPOINT}/v1/metrics",
                     )
-                if self._prom_enabled:
-                    self._start_prometheus_server()
                 logger.info(
                     "OTel metrics reporter initialized",
@@ -358,38 +565,25 @@ class OTelMetricsReporter:
                 logger.exception("Failed to initialize OTel metrics reporter")
                 raise
-    def _start_prometheus_server(self) -> None:
-        port = config.LSD_PROM_METRICS_PORT
-        # ``start_http_server`` spins up a WSGI server in a daemon thread serving
-        # the global prometheus_client REGISTRY, and returns the (server, thread)
-        # handle. Keep the server so ``shutdown`` can stop it cleanly (e.g. in
-        # tests); in production the daemon thread exits with the process anyway.
-        server, _thread = start_http_server(port=port)
-        self._prom_server = server
-        logger.info("Prometheus metrics scrape server started", port=port)
     def shutdown(self) -> None:
         with self._lock:
             if self._meter_provider:
                 try:
+                    # Unregisters the Prometheus reader's collector from the global
+                    # prometheus_client REGISTRY (and flushes/stops other readers).
                     self._meter_provider.shutdown()
                 except Exception:
                     logger.exception("Failed to shutdown OTel metrics reporter")
                 finally:
                     self._meter_provider = None
                     self._meter = None
-            if self._prom_server is not None:
-                try:
-                    self._prom_server.shutdown()
-                    self._prom_server.server_close()
-                except Exception:
-                    logger.exception("Failed to stop Prometheus scrape server")
-                finally:
-                    self._prom_server = None
             self._prom_enabled = False
             self._enabled = False
             self._initialized = False
             self._instruments.clear()
+            with self._gauge_lock:
+                self._gauge_values.clear()
+                self._observable_gauges.clear()
     def _instrument_name(self, metric_name: str) -> str:
         return metric_name
@@ -397,22 +591,69 @@ class OTelMetricsReporter:
     def _tier_enabled(self, tier: int) -> bool:
         return _normalize_emitting_tier(tier) <= self._max_tier
+    def _should_emit(self, metric: MetricDef) -> bool:
+        """Whether a sample for ``metric`` should be recorded.
+        ``lsd_web_metric`` metrics bypass tier filtering: they back the LSD
+        Deployment UI (served by the Prometheus reader) and must be emitted even
+        on low-tier deployments (dev/dev_free default ``METRIC_MAX_EMITTING_TIER``
+        to 1/CRITICAL). The tier gate runs before the MeterProvider, so a dropped
+        sample never reaches any reader — Prometheus included.
+        """
+        if not self._enabled or not self._meter:
+            return False
+        return metric.lsd_web_metric or self._tier_enabled(metric.tier)
     def _get_or_create_instrument(self, metric: MetricDef):
         name = self._instrument_name(metric.name)
         instrument = self._instruments.get(name)
         if instrument is not None:
             return instrument
         if metric.metric_type == "counter":
-            instrument = self._meter.create_counter(name=name)
+            instrument = self._meter.create_counter(
+                name=name, description=metric.description
+            )
         elif metric.metric_type in {"histogram", "latency"}:
-            instrument = self._meter.create_histogram(name=name)
-        elif metric.metric_type == "gauge":
-            instrument = self._meter.create_gauge(name=name)
+            instrument = self._meter.create_histogram(
+                name=name, description=metric.description
+            )
         else:
+            # Gauges are handled via observable instruments (see _set_gauge).
             raise ValueError(f"Unsupported metric type: {metric.metric_type}")
         self._instruments[name] = instrument
         return instrument
+    def _make_gauge_callback(self, name: str):
+        """Build the observable-gauge callback that the SDK invokes on each scrape.
+        It yields one Observation per recorded attribute-set from the cache, so the
+        last sampled value is re-reported on every collect (no flapping).
+        """
+        def _callback(_options: Any):
+            with self._gauge_lock:
+                points = list(self._gauge_values.get(name, {}).values())
+            return [Observation(value, attributes=attrs) for attrs, value in points]
+        return _callback
+    def _with_common(self, attributes: dict[str, Any] | None) -> dict[str, Any]:
+        """Merge the shared labels (project_id/revision_id/deployment_type) with
+        any per-call attributes. Per-call values win on key conflicts."""
+        return {**self._common_attributes, **(attributes or {})}
+    def _set_gauge(self, metric: MetricDef, value: float, attributes: dict) -> None:
+        name = metric.name
+        key = tuple(sorted(attributes.items()))
+        with self._gauge_lock:
+            self._gauge_values.setdefault(name, {})[key] = (attributes, float(value))
+            if name not in self._observable_gauges:
+                self._observable_gauges[name] = self._meter.create_observable_gauge(
+                    name=name,
+                    description=metric.description,
+                    callbacks=[self._make_gauge_callback(name)],
+                )
     def inc_counter(
         self,
         metric: MetricDef,
@@ -421,11 +662,11 @@ class OTelMetricsReporter:
     ) -> None:
         if metric.metric_type != "counter":
             raise ValueError(f"{metric.name} is not a counter metric")
-        if not self._enabled or not self._meter or not self._tier_enabled(metric.tier):
+        if not self._should_emit(metric):
             return
         instrument = self._get_or_create_instrument(metric)
         try:
-            instrument.add(value, attributes or {})
+            instrument.add(value, self._with_common(attributes))
         except Exception:
             logger.warning("Failed to add counter", metric_name=metric.name)
@@ -437,11 +678,11 @@ class OTelMetricsReporter:
     ) -> None:
         if metric.metric_type != "histogram":
             raise ValueError(f"{metric.name} is not a histogram metric")
-        if not self._enabled or not self._meter or not self._tier_enabled(metric.tier):
+        if not self._should_emit(metric):
             return
         instrument = self._get_or_create_instrument(metric)
         try:
-            instrument.record(value, attributes or {})
+            instrument.record(value, self._with_common(attributes))
         except Exception:
             logger.warning("Failed to record histogram", metric_name=metric.name)
@@ -458,11 +699,11 @@ class OTelMetricsReporter:
         else:
             seconds = float(duration_seconds)
         value = seconds * 1000
-        if not self._enabled or not self._meter or not self._tier_enabled(metric.tier):
+        if not self._should_emit(metric):
             return
         instrument = self._get_or_create_instrument(metric)
         try:
-            instrument.record(value, attributes or {})
+            instrument.record(value, self._with_common(attributes))
         except Exception:
             logger.warning("Failed to record latency", metric_name=metric.name)
@@ -474,11 +715,11 @@ class OTelMetricsReporter:
     ) -> None:
         if metric.metric_type != "gauge":
             raise ValueError(f"{metric.name} is not a gauge metric")
-        if not self._enabled or not self._meter or not self._tier_enabled(metric.tier):
+        if not self._should_emit(metric):
             return
-        instrument = self._get_or_create_instrument(metric)
         try:
-            instrument.set(value, attributes or {})
+            # Cache the value; an observable gauge re-reports it on every scrape.
+            self._set_gauge(metric, value, self._with_common(attributes))
         except Exception:
             logger.warning("Failed to record gauge", metric_name=metric.name)
@@ -488,7 +729,7 @@ class OTelMetricsReporter:
         metric: MetricDef,
         attributes: dict[str, Any] | None = None,
     ):
-        if not self._enabled or not self._meter or not self._tier_enabled(metric.tier):
+        if not self._should_emit(metric):
             yield
             return
         start = time.perf_counter()

langgraph_api/middleware/http_logger.py CHANGED Viewed

@@ -7,6 +7,12 @@ from starlette.types import Message, Receive, Scope, Send
 from langgraph_api.config import MOUNT_PREFIX
 from langgraph_api.http_metrics import HTTP_METRICS_COLLECTOR
+from langgraph_api.http_metrics_utils import get_route, should_filter_route
+from langgraph_api.metrics_otlp import (
+    COUNTER_HTTP_REQUESTS,
+    LATENCY_HTTP_REQUEST,
+    get_otlp_metrics_reporter,
+)
 from langgraph_api.utils.headers import should_include_header_in_logs
 asgi = structlog.stdlib.get_logger("asgi")
@@ -150,6 +156,24 @@ class AccessLoggerMiddleware:
             if method and route and status:
                 HTTP_METRICS_COLLECTOR.record_request(method, route, status, latency)
+                route_path = get_route(route)
+                if route_path is not None and not should_filter_route(route_path):
+                    reporter = get_otlp_metrics_reporter()
+                    reporter.inc_counter(
+                        COUNTER_HTTP_REQUESTS,
+                        attributes={
+                            "method": method,
+                            "path": route_path,
+                            "status": str(status),
+                        },
+                    )
+                    # record_latency takes seconds and stores milliseconds; latency
+                    # is already in ms, so pass seconds.
+                    reporter.record_latency(
+                        LATENCY_HTTP_REQUEST,
+                        latency / 1000.0,
+                        attributes={"method": method, "path": route_path},
+                    )
             qs = scope.get("query_string")
             first_byte_time = info["first_byte_time"]
             ttfb_ms = (

langgraph_api/queue_entrypoint.py CHANGED Viewed

@@ -65,6 +65,7 @@ def _ensure_port_available(host: str, port: int) -> None:
 async def health_and_metrics_server():
     import uvicorn  # noqa: PLC0415
+    from prometheus_client import CONTENT_TYPE_LATEST, generate_latest  # noqa: PLC0415
     from starlette.applications import Starlette  # noqa: PLC0415
     from starlette.requests import Request  # noqa: PLC0415
     from starlette.responses import JSONResponse, PlainTextResponse  # noqa: PLC0415
@@ -91,42 +92,18 @@ async def health_and_metrics_server():
             )
             metrics_format = "prometheus"
-        metrics = get_metrics()
-        worker_metrics = metrics["workers"]
-        workers_max = worker_metrics["max"]
-        workers_active = worker_metrics["active"]
-        workers_available = worker_metrics["available"]
-        project_id = os.getenv("LANGSMITH_HOST_PROJECT_ID")
-        revision_id = os.getenv("LANGSMITH_HOST_REVISION_ID")
-        pg_redis_stats = await meta_pool_stats(metrics_format)
-        if metrics_format == "json":
-            resp = {
-                **pg_redis_stats,
-                "workers": worker_metrics,
-            }
-            return JSONResponse(resp)
-        elif metrics_format == "prometheus":
-            metrics_lines = [
-                "# HELP lg_api_workers_max The maximum number of workers available.",
-                "# TYPE lg_api_workers_max gauge",
-                f'lg_api_workers_max{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_max}',
-                "# HELP lg_api_workers_active The number of currently active workers.",
-                "# TYPE lg_api_workers_active gauge",
-                f'lg_api_workers_active{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_active}',
-                "# HELP lg_api_workers_available The number of available (idle) workers.",
-                "# TYPE lg_api_workers_available gauge",
-                f'lg_api_workers_available{{project_id="{project_id}", revision_id="{revision_id}"}} {workers_available}',
-            ]
-            metrics_lines.extend(pg_redis_stats)
-            return PlainTextResponse(
-                "\n".join(metrics_lines),
-                media_type="text/plain; version=0.0.4; charset=utf-8",
-            )
+        if metrics_format == "prometheus":
+            # Served from THIS process's OTLP Prometheus registry. The collector
+            # runs in every process, so this queue worker exposes its worker
+            # gauges and its own pool stats — but NOT queue depth
+            # (num_pending/num_running), which the collector emits on the API
+            # process only. Mirrors the main API /metrics (api/meta.py:meta_metrics).
+            return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)
+        # JSON: hand-built snapshot of workers + pool stats.
+        worker_metrics = get_metrics()["workers"]
+        pg_redis_stats = await meta_pool_stats()
+        return JSONResponse({**pg_redis_stats, "workers": worker_metrics})
     routes = [
         Route("/ok", health_endpoint),

langgraph_api/self_hosted_metrics.py CHANGED Viewed

@@ -287,7 +287,7 @@ def _get_pool_stats():
     # so we submit this as a coro to run in the main event loop
     async def _fetch_pool_stats():
         try:
-            return await meta_pool_stats("json")
+            return await meta_pool_stats()
         except Exception as e:
             logger.warning("Failed to get pool stats", exc_info=e)
             return {"postgres": {}, "redis": {}}

{langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langgraph-api
-Version: 0.11.0.dev9
+Version: 0.12.0.dev1
 Author-email: Will Fu-Hinthorn <will@langchain.dev>, Josh Rogers <josh@langchain.dev>, Parker Rule <parker@langchain.dev>
 License: Elastic-2.0
 License-File: LICENSE
@@ -16,7 +16,7 @@ Requires-Dist: jsonschema-rs<0.45,>=0.20.0
 Requires-Dist: langchain-core>=0.3.64
 Requires-Dist: langchain-protocol<0.1,>=0.0.16
 Requires-Dist: langgraph-checkpoint<5,>=3.0.1
-Requires-Dist: langgraph-runtime-inmem<0.31.0.dev0,>=0.30.0.dev0
+Requires-Dist: langgraph-runtime-inmem<0.32.0.dev0,>=0.31.0.dev0
 Requires-Dist: langgraph-sdk>=0.3.5
 Requires-Dist: langgraph<2,>=0.4.10
 Requires-Dist: langsmith[otel]>=0.6.3
@@ -29,7 +29,7 @@ Requires-Dist: prometheus-client>=0.0.1
 Requires-Dist: protobuf<7.0.0,>=6.32.1
 Requires-Dist: pyjwt>=2.9.0
 Requires-Dist: sse-starlette<3.4.0,>=2.1.3
-Requires-Dist: starlette>=0.38.6
+Requires-Dist: starlette>=1.0.1
 Requires-Dist: structlog<26,>=24.1.0
 Requires-Dist: tenacity>=8.0.0
 Requires-Dist: truststore>=0.1

{langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-langgraph_api/__init__.py,sha256=ODvnQh6zY2Mdw6hMdx11i_MRvcQ-Kv5LMYazITUYfdo,28
+langgraph_api/__init__.py,sha256=mzCq9ao7wd_jc3lCw0iVM8xH_PSoH6l9__-Lr3pxcKA,28
 langgraph_api/_factory_utils.py,sha256=5JsiJbg_YocVSryN2jwoZTg03-eyymlWMK6sKCmXwz0,5756
 langgraph_api/asgi_transport.py,sha256=XApY3lIWBZTMbbsl8dDJzl0cLGirmAGE0SifqZUnXvs,11896
 langgraph_api/asyncio.py,sha256=c-YE-14N7_AP1GzifsbP14XnhLsmxT2P916KXruerpI,10573
@@ -14,15 +14,16 @@ langgraph_api/http_metrics.py,sha256=etxbZNmYxdb58DVLNkHP7S-N6njXPTiQh2OWKMaIZi8
 langgraph_api/http_metrics_utils.py,sha256=sjxF7SYGTzY0Wz_G0dzatsYNnWr31S6ujej4JmBG2yo,866
 langgraph_api/logging.py,sha256=V1RCnqVLuMvJtrBiyMMLfaEdbS3k5A2M8Unhr4FUUdQ,6801
 langgraph_api/metadata.py,sha256=ih2et_R0prFsCzikQ4_L0j9up7t0rObAMVKyEk7ienI,9778
-langgraph_api/metrics_otlp.py,sha256=TxK96ks8fok_g0phk24QDvLcXm-Mh9TbVThoiyTJv6Y,18925
+langgraph_api/metrics_collector.py,sha256=gMLHL18rJyYl985AOmu9eH7W1ttdRdkPHzeyczjCOBw,8280
+langgraph_api/metrics_otlp.py,sha256=t9oJrxfxY2O5jY4JW2gONPKoBiBuklhzCrnZvn1qTxQ,28730
 langgraph_api/otel_context.py,sha256=DWFwW4Yu88QY4W2J0IRcURR450Th9J2DupvDDkSkMBA,7166
 langgraph_api/patch.py,sha256=ViUknYvyQWS6y0f5XuaEoci2qB_mQv8vZl-oaUxsI6M,1448
-langgraph_api/queue_entrypoint.py,sha256=JitIsrnJXnfAke2qwsvlvMKwvna9GMMm55wBYoMtJFM,12166
+langgraph_api/queue_entrypoint.py,sha256=-9YnY_GhmDxEiGCc3k-7UqRKK_M3dPriits2iGgYlgU,11327
 langgraph_api/release_tags.py,sha256=BjgGj2vFcA7I0MDRXLw1sUA4jquz-DaKVS0Eq-dYSjE,9091
 langgraph_api/route.py,sha256=_KE8A8Q-J-QfqjGlyM2Kc6n5cirmgt8xmI5-pI8kVEE,8837
 langgraph_api/schema.py,sha256=I_ciXy4YE3Ri4-PAWIvqLNRH2FpC4goTkKwfFwk6wIQ,15100
 langgraph_api/self_hosted_logs.py,sha256=FoUkPdtpt-nuEhejne8o1Q2phE9CccoHdoR_PvXPcBU,4442
-langgraph_api/self_hosted_metrics.py,sha256=wgjVYrEQNlgcn6TzAAf14Sdyz-i2l7bqAdhMn-qEiHI,16850
+langgraph_api/self_hosted_metrics.py,sha256=pWsQQ-2ukoFIbmVfzNOSkwCqZ5Cnts6pRSWTII44Ll4,16844
 langgraph_api/serde.py,sha256=V3fO9bkUOlBX3okw5Qi31nlcr59fcuXMgL7DHNyarZY,8855
 langgraph_api/server.py,sha256=1eAZPim0Pkgh5oGS4EvW-_7Zh_82iGOZtR1rpX08FoA,11216
 langgraph_api/sse.py,sha256=cChZ7raQUHp8p5BreE_5wMBR8lFO0n7746sV8_HQOrc,4822
@@ -41,7 +42,7 @@ langgraph_api/api/__init__.py,sha256=Zu1ew3dxYZu7cLRAjn-6HcYmtuQBdihlVFMKMJ77Y3c
 langgraph_api/api/a2a.py,sha256=VPllgqfoLUQD6Eqob3RjcegjtKgLhphNGTrTqbNLoIY,95135
 langgraph_api/api/assistants.py,sha256=4v1TpkeeSF7vFrbnOKIvh7BY4K0WamzEdMeTAzwRElE,20786
 langgraph_api/api/event_streaming.py,sha256=nvoaKz4QGklX5YUmY9WQ3vSwhQ1Q81QeQWNR8aEXUz8,17571
-langgraph_api/api/meta.py,sha256=jVRBWZio8x__LbSp4e4WPyPB0sJJ5cgHGjbitLd6yGE,11045
+langgraph_api/api/meta.py,sha256=4vqfgJYIqaqwZPfdmvHN00Co7SdeYSnQ1STxyvGWVDw,4698
 langgraph_api/api/openapi.py,sha256=Zkdlb9mjrQyHro1TtrDIWVuaBDovxx-uGWJ1fZMOg54,12604
 langgraph_api/api/profile.py,sha256=CA1ZkHALOuP8orYTICnEhcG_JnnA2wnyjbWyeb117jA,3455
 langgraph_api/api/runs.py,sha256=h5droLgaz_aAyILCRJIpbj2KH1PbijCeXcggOSa3Zww,35178
@@ -63,7 +64,7 @@ langgraph_api/auth/studio_user.py,sha256=gNCicIo6cYaHmFj2sEdsvDYkKW7NWfGXGS2tTAM
 langgraph_api/auth/langsmith/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langgraph_api/auth/langsmith/backend.py,sha256=Y6-VxD7zfV1jzGdjmQ66CgNa3SenLbo3d_375CcKZ9U,3770
 langgraph_api/auth/langsmith/client.py,sha256=79kwCVeHU64nsHsxWipfZhf44lM6vfs2nlfTxlJF6LU,4142
-langgraph_api/config/__init__.py,sha256=gHQgH-0qR3nD0woEZjgQwKNv71FieLhX9Hr6NlrdYzs,25327
+langgraph_api/config/__init__.py,sha256=qj7HF1XmojpG6WXahlFxrMZiO8hUI6QlPNVT-32xfvs,25528
 langgraph_api/config/_parse.py,sha256=VXQPKzqtIsZrRy-nUEBMDESBxXzqFRQNiqsvAZeX3HU,3921
 langgraph_api/config/schemas.py,sha256=rYqu67fZxmtCOU-Zc1s3265KbRbqK8PmfvfwvrAmd-Q,20863
 langgraph_api/encryption/__init__.py,sha256=gaCZ00CocSbqSqrDn6XJHaSp2CZCnC8qnrD9G4fbzyI,363
@@ -125,7 +126,7 @@ langgraph_api/lc_security/policy.py,sha256=tW0IACvPCeJZbcsUKv9egk0LapF5gL3hxF9Ao
 langgraph_api/lc_security/transport.py,sha256=AgiAsPLNP135ag30XPwVDkMNhomiYJwGYUKs0TfaWSI,7183
 langgraph_api/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langgraph_api/middleware/ensure_store.py,sha256=KzgAsLPloPD8mcQG-5v7kguzDMFrs9PJz2-xK5-rV0Q,1351
-langgraph_api/middleware/http_logger.py,sha256=Y5E1NgDJuJHWI3pMtpu0cdspZA3Jikbv2PEsiWFctaA,7149
+langgraph_api/middleware/http_logger.py,sha256=jjqLBPqoGRC1UfB2VYKPY2tkq6gT7Rm88DnAHBlwuTw,8231
 langgraph_api/middleware/private_network.py,sha256=eQEzWI8epBNUCiNsMu9O27ofHBQ45M0p2OZy5YdUYos,2097
 langgraph_api/middleware/request_id.py,sha256=-p230Q5jDJAJLmSZRqQvB4dFFkJS9B4Vwg6pUgQtI24,1259
 langgraph_api/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -228,8 +229,8 @@ langgraph_grpc_common/proto/errors_pb2.py,sha256=JI6x-vBK1AE7DHZ5DQwN1mZWF6C4xTR
 langgraph_grpc_common/proto/errors_pb2.pyi,sha256=rd3-BYUH8V-aO66taL7OOblaLgdrDtf1Vcd38GUoVVM,2181
 langgraph_grpc_common/proto/errors_pb2_grpc.py,sha256=2-LwQ0OPGo-NtC0269q7Fw6GPBxnTLYWq3xP5Eq0_YA,886
 langgraph_grpc_common/proto/errors_pb2_grpc.pyi,sha256=uC9Wnq6uyg488QiONpJ0ba1s_iouQCOYsjd_FDd1XUM,495
-langgraph_api-0.11.0.dev9.dist-info/METADATA,sha256=4oMpCP0-X5iIKGh10-YW2PnAbkNVSMWCjBMj-pEyBjA,4630
-langgraph_api-0.11.0.dev9.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
-langgraph_api-0.11.0.dev9.dist-info/entry_points.txt,sha256=hGedv8n7cgi41PypMfinwS_HfCwA7xJIfS0jAp8htV8,78
-langgraph_api-0.11.0.dev9.dist-info/licenses/LICENSE,sha256=ZPwVR73Biwm3sK6vR54djCrhaRiM4cAD2zvOQZV8Xis,3859
-langgraph_api-0.11.0.dev9.dist-info/RECORD,,
+langgraph_api-0.12.0.dev1.dist-info/METADATA,sha256=T_4d-LSGhM29_XbrhbUhbSvblmXtgsfPeDugmsCBuLU,4629
+langgraph_api-0.12.0.dev1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+langgraph_api-0.12.0.dev1.dist-info/entry_points.txt,sha256=hGedv8n7cgi41PypMfinwS_HfCwA7xJIfS0jAp8htV8,78
+langgraph_api-0.12.0.dev1.dist-info/licenses/LICENSE,sha256=ZPwVR73Biwm3sK6vR54djCrhaRiM4cAD2zvOQZV8Xis,3859
+langgraph_api-0.12.0.dev1.dist-info/RECORD,,

{langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/WHEEL RENAMED Viewed

File without changes

{langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langgraph_api-0.11.0.dev9.dist-info → langgraph_api-0.12.0.dev1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langgraph-api 0.11.0.dev9__py3-none-any.whl → 0.12.0.dev1__py3-none-any.whl

langgraph-api 0.11.0.dev9py3-none-any.whl → 0.12.0.dev1py3-none-any.whl