juniper-observability 0.1.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- juniper_observability/__init__.py +65 -0
- juniper_observability/_version.py +3 -0
- juniper_observability/constants.py +42 -0
- juniper_observability/health/__init__.py +11 -0
- juniper_observability/health/models.py +41 -0
- juniper_observability/health/probe.py +49 -0
- juniper_observability/logging.py +83 -0
- juniper_observability/middleware/__init__.py +6 -0
- juniper_observability/middleware/prometheus.py +78 -0
- juniper_observability/middleware/request_id.py +43 -0
- juniper_observability/prometheus.py +40 -0
- juniper_observability/sentry.py +75 -0
- juniper_observability-0.1.0a0.dist-info/METADATA +71 -0
- juniper_observability-0.1.0a0.dist-info/RECORD +16 -0
- juniper_observability-0.1.0a0.dist-info/WHEEL +5 -0
- juniper_observability-0.1.0a0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""``juniper-observability`` — shared observability primitives for Juniper services.
|
|
2
|
+
|
|
3
|
+
Single source of truth for ``DependencyStatus`` / ``ReadinessResponse``
|
|
4
|
+
Pydantic models, the dependency-probe utility, structured-JSON logging,
|
|
5
|
+
the R1.1/R1.2/R1.3 contract constants, and the Starlette middlewares
|
|
6
|
+
(``RequestIdMiddleware``, ``PrometheusMiddleware``) that every Juniper
|
|
7
|
+
server applies.
|
|
8
|
+
|
|
9
|
+
Per-service metric definitions (training-loop counters, dataset-gen
|
|
10
|
+
histograms, websocket gauges, etc.) intentionally stay in their owning
|
|
11
|
+
repo — only cross-cutting infrastructure lives here.
|
|
12
|
+
|
|
13
|
+
See ``notes/code-review/METRICS_MONITORING_R2.1_SHARED_OBSERVABILITY_DESIGN_2026-04-28.md``
|
|
14
|
+
in juniper-ml for the design and migration plan.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from juniper_observability._version import __version__
|
|
18
|
+
from juniper_observability.constants import (
|
|
19
|
+
HEADER_X_REQUEST_ID,
|
|
20
|
+
LIVENESS_STALENESS_SECONDS,
|
|
21
|
+
LIVENESS_TICK_BUDGET_MS,
|
|
22
|
+
READINESS_HEADER,
|
|
23
|
+
UNMATCHED_ENDPOINT_LABEL,
|
|
24
|
+
)
|
|
25
|
+
from juniper_observability.health.models import DependencyStatus, ReadinessResponse
|
|
26
|
+
from juniper_observability.health.probe import probe_dependency
|
|
27
|
+
from juniper_observability.logging import (
|
|
28
|
+
DEFAULT_LOG_FORMAT_PLAIN,
|
|
29
|
+
LOG_FORMAT_JSON,
|
|
30
|
+
JuniperJsonFormatter,
|
|
31
|
+
configure_logging,
|
|
32
|
+
)
|
|
33
|
+
from juniper_observability.middleware import PrometheusMiddleware, RequestIdMiddleware, request_id_var
|
|
34
|
+
from juniper_observability.prometheus import get_prometheus_app, set_build_info
|
|
35
|
+
from juniper_observability.sentry import DEFAULT_SENTRY_TRACES_SAMPLE_RATE, configure_sentry
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
# Version
|
|
39
|
+
"__version__",
|
|
40
|
+
# Constants (R1.1/R1.2/R1.3 contract)
|
|
41
|
+
"HEADER_X_REQUEST_ID",
|
|
42
|
+
"LIVENESS_STALENESS_SECONDS",
|
|
43
|
+
"LIVENESS_TICK_BUDGET_MS",
|
|
44
|
+
"READINESS_HEADER",
|
|
45
|
+
"UNMATCHED_ENDPOINT_LABEL",
|
|
46
|
+
# Health
|
|
47
|
+
"DependencyStatus",
|
|
48
|
+
"ReadinessResponse",
|
|
49
|
+
"probe_dependency",
|
|
50
|
+
# Logging
|
|
51
|
+
"DEFAULT_LOG_FORMAT_PLAIN",
|
|
52
|
+
"JuniperJsonFormatter",
|
|
53
|
+
"LOG_FORMAT_JSON",
|
|
54
|
+
"configure_logging",
|
|
55
|
+
# Middleware
|
|
56
|
+
"PrometheusMiddleware",
|
|
57
|
+
"RequestIdMiddleware",
|
|
58
|
+
"request_id_var",
|
|
59
|
+
# Prometheus utilities
|
|
60
|
+
"get_prometheus_app",
|
|
61
|
+
"set_build_info",
|
|
62
|
+
# Sentry
|
|
63
|
+
"DEFAULT_SENTRY_TRACES_SAMPLE_RATE",
|
|
64
|
+
"configure_sentry",
|
|
65
|
+
]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Cross-service observability contract constants.
|
|
2
|
+
|
|
3
|
+
These constants pin down the wire format established by METRICS-MON
|
|
4
|
+
R1.1, R1.2, and R1.3 across juniper-data, juniper-cascor, and
|
|
5
|
+
juniper-canopy. Pulling them into a single module ensures that any
|
|
6
|
+
future contract change happens in one place and ripples to every
|
|
7
|
+
consumer at version-bump time.
|
|
8
|
+
|
|
9
|
+
References:
|
|
10
|
+
- juniper-ml notes/code-review/METRICS_MONITORING_R1.1_*: cardinality
|
|
11
|
+
- juniper-ml notes/code-review/METRICS_MONITORING_R1.2_PROBE_DESIGN_2026-04-27.md
|
|
12
|
+
- juniper-ml notes/code-review/METRICS_MONITORING_R1.3_WORKER_HEARTBEAT_DESIGN_2026-04-27.md
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Final
|
|
16
|
+
|
|
17
|
+
# METRICS-MON R1.1 / seed-01: when a request does not match any
|
|
18
|
+
# registered Starlette route template, the Prometheus middleware emits
|
|
19
|
+
# this single bucket value for the ``endpoint`` label and increments a
|
|
20
|
+
# separate ``http_unmatched_requests_total{method}`` counter so the
|
|
21
|
+
# label cardinality stays bounded under attacker-controlled paths.
|
|
22
|
+
UNMATCHED_ENDPOINT_LABEL: Final[str] = "_unmatched"
|
|
23
|
+
|
|
24
|
+
# METRICS-MON R1.2 / seed-02: response header that mirrors the readiness
|
|
25
|
+
# body status. Lets ``kubectl describe pod`` and ``curl -I`` surface the
|
|
26
|
+
# state without parsing JSON.
|
|
27
|
+
READINESS_HEADER: Final[str] = "X-Juniper-Readiness"
|
|
28
|
+
|
|
29
|
+
# METRICS-MON R1.2 / seed-03: liveness tick must complete within this
|
|
30
|
+
# wall-clock budget (milliseconds). Helm ``timeoutSeconds`` (5–10s)
|
|
31
|
+
# wraps this with headroom; the budget catches event-loop stalls and
|
|
32
|
+
# CPU starvation that the previous no-op probe could not.
|
|
33
|
+
LIVENESS_TICK_BUDGET_MS: Final[int] = 250
|
|
34
|
+
|
|
35
|
+
# METRICS-MON R1.2 / seed-03: heartbeat staleness threshold for
|
|
36
|
+
# services that bump a per-second liveness counter (e.g.,
|
|
37
|
+
# juniper-cascor's lifecycle manager). A staleness > 30s reliably
|
|
38
|
+
# indicates a wedged process.
|
|
39
|
+
LIVENESS_STALENESS_SECONDS: Final[float] = 30.0
|
|
40
|
+
|
|
41
|
+
# Standard request-id header propagated through ``RequestIdMiddleware``.
|
|
42
|
+
HEADER_X_REQUEST_ID: Final[str] = "X-Request-ID"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Health-check primitives shared across Juniper services.
|
|
2
|
+
|
|
3
|
+
Re-exports the model classes and probe utility so consumers can
|
|
4
|
+
``from juniper_observability.health import DependencyStatus, ReadinessResponse, probe_dependency``
|
|
5
|
+
without reaching into the submodules.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from juniper_observability.health.models import DependencyStatus, ReadinessResponse
|
|
9
|
+
from juniper_observability.health.probe import probe_dependency
|
|
10
|
+
|
|
11
|
+
__all__ = ["DependencyStatus", "ReadinessResponse", "probe_dependency"]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Pydantic models for ``/v1/health/ready`` responses.
|
|
2
|
+
|
|
3
|
+
The R1.2 probe contract pins:
|
|
4
|
+
|
|
5
|
+
* ``DependencyStatus.status`` is one of {healthy, unhealthy, degraded,
|
|
6
|
+
not_configured}.
|
|
7
|
+
* ``ReadinessResponse.status`` is one of {ready, degraded, not_ready};
|
|
8
|
+
HTTP status code is 200 for ready/degraded and 503 for not_ready.
|
|
9
|
+
* ``ReadinessResponse.timestamp`` is a unix-epoch float **derived from
|
|
10
|
+
timezone-aware UTC** (resolves the cascor naive-tz drift identified
|
|
11
|
+
during R1.2 implementation; matches juniper-data's BUG-JD-06 fix).
|
|
12
|
+
|
|
13
|
+
These models are wire-compatible with the per-repo copies they replace.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from datetime import UTC, datetime
|
|
17
|
+
from typing import Literal
|
|
18
|
+
|
|
19
|
+
from pydantic import BaseModel, Field
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DependencyStatus(BaseModel):
|
|
23
|
+
"""Health status of a single dependency probed during readiness."""
|
|
24
|
+
|
|
25
|
+
name: str
|
|
26
|
+
status: Literal["healthy", "unhealthy", "degraded", "not_configured"]
|
|
27
|
+
latency_ms: float | None = None
|
|
28
|
+
message: str | None = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ReadinessResponse(BaseModel):
|
|
32
|
+
"""Standard ``/v1/health/ready`` response across all Juniper services."""
|
|
33
|
+
|
|
34
|
+
status: Literal["ready", "degraded", "not_ready"]
|
|
35
|
+
version: str
|
|
36
|
+
service: str
|
|
37
|
+
# METRICS-MON R1.2 / BUG-JD-06: timezone-aware UTC. Always epoch
|
|
38
|
+
# seconds from a tz-aware datetime — never naive.
|
|
39
|
+
timestamp: float = Field(default_factory=lambda: datetime.now(UTC).timestamp())
|
|
40
|
+
dependencies: dict[str, DependencyStatus] = Field(default_factory=dict)
|
|
41
|
+
details: dict[str, object] = Field(default_factory=dict)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Synchronous dependency-probe helper used by readiness handlers.
|
|
2
|
+
|
|
3
|
+
The probe is a one-shot HTTP GET against a peer service's
|
|
4
|
+
``/v1/health/live`` endpoint. ``probe_dependency`` swallows every
|
|
5
|
+
exception and converts it into a ``DependencyStatus`` with status
|
|
6
|
+
``unhealthy``; this is intentional — a probe failure must never bubble
|
|
7
|
+
out of the readiness handler and crash the request.
|
|
8
|
+
|
|
9
|
+
R4.2 will introduce an async variant that does not block the event
|
|
10
|
+
loop. Until then, callers running inside async handlers should be aware
|
|
11
|
+
that ``probe_dependency`` is sync and best invoked via
|
|
12
|
+
``asyncio.to_thread`` if probe latency is non-trivial.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
import urllib.request
|
|
17
|
+
|
|
18
|
+
from juniper_observability.health.models import DependencyStatus
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def probe_dependency(name: str, url: str, timeout: float = 5.0) -> DependencyStatus:
|
|
22
|
+
"""Probe a dependency health endpoint. Returns status with latency.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
name: Human-readable name of the dependency for logs/dashboards.
|
|
26
|
+
url: Health endpoint URL to probe (typically ``/v1/health/live``).
|
|
27
|
+
timeout: Connection timeout in seconds.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
``DependencyStatus`` with:
|
|
31
|
+
- ``status="healthy"`` when the GET returns without raising.
|
|
32
|
+
- ``status="unhealthy"`` for any exception (connection refused,
|
|
33
|
+
timeout, non-2xx via ``HTTPError``); the exception type and
|
|
34
|
+
message are encoded into the ``message`` field.
|
|
35
|
+
- ``latency_ms`` always populated from a monotonic clock.
|
|
36
|
+
"""
|
|
37
|
+
start = time.monotonic()
|
|
38
|
+
try:
|
|
39
|
+
urllib.request.urlopen(url, timeout=timeout) # nosec B310 — internal health probe
|
|
40
|
+
latency = (time.monotonic() - start) * 1000
|
|
41
|
+
return DependencyStatus(name=name, status="healthy", latency_ms=round(latency, 1), message=url)
|
|
42
|
+
except Exception as e: # noqa: BLE001 — probe surfaces every failure mode
|
|
43
|
+
latency = (time.monotonic() - start) * 1000
|
|
44
|
+
return DependencyStatus(
|
|
45
|
+
name=name,
|
|
46
|
+
status="unhealthy",
|
|
47
|
+
latency_ms=round(latency, 1),
|
|
48
|
+
message=f"{url} — {type(e).__name__}: {e}",
|
|
49
|
+
)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Structured-JSON logging primitives for Juniper services.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
|
|
5
|
+
- ``JuniperJsonFormatter`` — a ``logging.Formatter`` subclass that
|
|
6
|
+
emits one JSON object per record with stable keys (``timestamp``,
|
|
7
|
+
``level``, ``logger``, ``message``, ``service``, ``request_id``, and
|
|
8
|
+
optional ``exception``).
|
|
9
|
+
- ``configure_logging`` — installs the formatter onto the root logger
|
|
10
|
+
with the requested level, replacing any existing handlers.
|
|
11
|
+
|
|
12
|
+
The ``request_id`` field is sourced from the ``request_id_var``
|
|
13
|
+
ContextVar populated by ``RequestIdMiddleware`` so async handlers can
|
|
14
|
+
emit log lines that correlate to the originating HTTP request without
|
|
15
|
+
threading the ID through every call.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
|
|
21
|
+
from juniper_observability.middleware.request_id import request_id_var
|
|
22
|
+
|
|
23
|
+
# Default plain-text log format when ``log_format != "json"``. Mirrors
|
|
24
|
+
# the Python convention used by every Juniper service.
|
|
25
|
+
DEFAULT_LOG_FORMAT_PLAIN = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
|
26
|
+
|
|
27
|
+
# Sentinel value identifying the JSON formatter mode.
|
|
28
|
+
LOG_FORMAT_JSON = "json"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class JuniperJsonFormatter(logging.Formatter):
|
|
32
|
+
"""JSON log formatter with ``request_id`` propagation.
|
|
33
|
+
|
|
34
|
+
Always emits the same set of top-level keys so log shippers can
|
|
35
|
+
parse every Juniper service's logs without per-service rules.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, service: str = "juniper-service") -> None:
|
|
39
|
+
super().__init__()
|
|
40
|
+
self._service = service
|
|
41
|
+
|
|
42
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
43
|
+
log_entry = {
|
|
44
|
+
"timestamp": self.formatTime(record, self.datefmt),
|
|
45
|
+
"level": record.levelname,
|
|
46
|
+
"logger": record.name,
|
|
47
|
+
"message": record.getMessage(),
|
|
48
|
+
"service": self._service,
|
|
49
|
+
"request_id": request_id_var.get(""),
|
|
50
|
+
}
|
|
51
|
+
if record.exc_info and record.exc_info[1] is not None:
|
|
52
|
+
log_entry["exception"] = self.formatException(record.exc_info)
|
|
53
|
+
return json.dumps(log_entry)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def configure_logging(log_level: str, log_format: str, service_name: str = "juniper-service") -> None:
|
|
57
|
+
"""Configure the root logger.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
log_level: Logging level string (``"INFO"``, ``"DEBUG"``, …).
|
|
61
|
+
Unknown values fall back to ``logging.INFO``.
|
|
62
|
+
log_format: ``"json"`` for structured JSON via
|
|
63
|
+
:class:`JuniperJsonFormatter`; anything else for plain
|
|
64
|
+
text via :data:`DEFAULT_LOG_FORMAT_PLAIN`.
|
|
65
|
+
service_name: Service identity included in JSON log entries.
|
|
66
|
+
"""
|
|
67
|
+
level = getattr(logging, log_level.upper(), logging.INFO)
|
|
68
|
+
root = logging.getLogger()
|
|
69
|
+
root.setLevel(level)
|
|
70
|
+
|
|
71
|
+
# Remove existing handlers to avoid duplicate output.
|
|
72
|
+
for handler in root.handlers[:]:
|
|
73
|
+
root.removeHandler(handler)
|
|
74
|
+
|
|
75
|
+
handler = logging.StreamHandler()
|
|
76
|
+
handler.setLevel(level)
|
|
77
|
+
|
|
78
|
+
if log_format == LOG_FORMAT_JSON:
|
|
79
|
+
handler.setFormatter(JuniperJsonFormatter(service=service_name))
|
|
80
|
+
else:
|
|
81
|
+
handler.setFormatter(logging.Formatter(DEFAULT_LOG_FORMAT_PLAIN))
|
|
82
|
+
|
|
83
|
+
root.addHandler(handler)
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
"""Starlette middleware shared across Juniper services."""
|
|
2
|
+
|
|
3
|
+
from juniper_observability.middleware.prometheus import PrometheusMiddleware
|
|
4
|
+
from juniper_observability.middleware.request_id import RequestIdMiddleware, request_id_var
|
|
5
|
+
|
|
6
|
+
__all__ = ["PrometheusMiddleware", "RequestIdMiddleware", "request_id_var"]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Prometheus middleware for Juniper services.
|
|
2
|
+
|
|
3
|
+
METRICS-MON R1.1 / seed-01 contract:
|
|
4
|
+
|
|
5
|
+
- ``endpoint`` label is set to the resolved Starlette route template
|
|
6
|
+
(e.g. ``/v1/datasets/{dataset_id}``) — never to the raw URL path.
|
|
7
|
+
- Requests that do not match any registered route template collapse
|
|
8
|
+
into ``UNMATCHED_ENDPOINT_LABEL`` and increment a separate counter
|
|
9
|
+
``<namespace>_http_unmatched_requests_total{method}``.
|
|
10
|
+
- This bounds Prometheus label cardinality under attacker-controlled
|
|
11
|
+
paths or path-parameter routes; per-repo dashboards relying on the
|
|
12
|
+
unbounded raw-URL fallback have been migrated.
|
|
13
|
+
|
|
14
|
+
The middleware is service-specific only by virtue of its ``namespace``
|
|
15
|
+
prefix — ``juniper_data_*``, ``juniper_cascor_*``, ``juniper_canopy_*``.
|
|
16
|
+
Consumers pass their identity at construction time.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import time
|
|
20
|
+
|
|
21
|
+
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
|
22
|
+
from starlette.requests import Request
|
|
23
|
+
from starlette.responses import Response
|
|
24
|
+
|
|
25
|
+
from juniper_observability.constants import UNMATCHED_ENDPOINT_LABEL
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PrometheusMiddleware(BaseHTTPMiddleware):
|
|
29
|
+
"""Tracks HTTP request counts and durations with bounded cardinality.
|
|
30
|
+
|
|
31
|
+
Lazily imports ``prometheus_client`` so the broader package can be
|
|
32
|
+
used without the ``[prometheus]`` extra installed.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, app: object, service_name: str = "juniper-service", namespace: str = "juniper") -> None:
|
|
36
|
+
super().__init__(app)
|
|
37
|
+
from prometheus_client import Counter, Histogram
|
|
38
|
+
|
|
39
|
+
prefix = f"{namespace}_" if namespace else ""
|
|
40
|
+
self._request_count = Counter(
|
|
41
|
+
f"{prefix}http_requests_total",
|
|
42
|
+
"Total HTTP requests",
|
|
43
|
+
["method", "endpoint", "status"],
|
|
44
|
+
)
|
|
45
|
+
self._request_duration = Histogram(
|
|
46
|
+
f"{prefix}http_request_duration_seconds",
|
|
47
|
+
"HTTP request duration in seconds",
|
|
48
|
+
["method", "endpoint"],
|
|
49
|
+
)
|
|
50
|
+
self._unmatched_count = Counter(
|
|
51
|
+
f"{prefix}http_unmatched_requests_total",
|
|
52
|
+
"HTTP requests not matching any registered route template",
|
|
53
|
+
["method"],
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
async def dispatch(
|
|
57
|
+
self,
|
|
58
|
+
request: Request,
|
|
59
|
+
call_next: RequestResponseEndpoint,
|
|
60
|
+
) -> Response:
|
|
61
|
+
start = time.perf_counter()
|
|
62
|
+
response = await call_next(request)
|
|
63
|
+
duration = time.perf_counter() - start
|
|
64
|
+
|
|
65
|
+
route = request.scope.get("route")
|
|
66
|
+
template = getattr(route, "path", None) if route is not None else None
|
|
67
|
+
method = request.method
|
|
68
|
+
if template:
|
|
69
|
+
endpoint = template
|
|
70
|
+
else:
|
|
71
|
+
endpoint = UNMATCHED_ENDPOINT_LABEL
|
|
72
|
+
self._unmatched_count.labels(method=method).inc()
|
|
73
|
+
|
|
74
|
+
status = str(response.status_code)
|
|
75
|
+
self._request_count.labels(method=method, endpoint=endpoint, status=status).inc()
|
|
76
|
+
self._request_duration.labels(method=method, endpoint=endpoint).observe(duration)
|
|
77
|
+
|
|
78
|
+
return response
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Request-ID propagation middleware.
|
|
2
|
+
|
|
3
|
+
Injects an ``X-Request-ID`` header into every response and stores the
|
|
4
|
+
value in a ContextVar so async handlers and log records can correlate
|
|
5
|
+
to the originating HTTP request without threading the ID through every
|
|
6
|
+
call.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import uuid
|
|
10
|
+
from contextvars import ContextVar
|
|
11
|
+
|
|
12
|
+
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
|
13
|
+
from starlette.requests import Request
|
|
14
|
+
from starlette.responses import Response
|
|
15
|
+
|
|
16
|
+
from juniper_observability.constants import HEADER_X_REQUEST_ID
|
|
17
|
+
|
|
18
|
+
# Public ContextVar; ``JuniperJsonFormatter`` reads from it to embed the
|
|
19
|
+
# request ID in every log record emitted during the request scope.
|
|
20
|
+
request_id_var: ContextVar[str] = ContextVar("request_id", default="")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RequestIdMiddleware(BaseHTTPMiddleware):
|
|
24
|
+
"""Injects ``X-Request-ID`` into ContextVar and response header.
|
|
25
|
+
|
|
26
|
+
If the request carries an inbound ``X-Request-ID`` header, that
|
|
27
|
+
value is propagated; otherwise a fresh UUID4 is generated. The
|
|
28
|
+
header is always echoed back on the response.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
async def dispatch(
|
|
32
|
+
self,
|
|
33
|
+
request: Request,
|
|
34
|
+
call_next: RequestResponseEndpoint,
|
|
35
|
+
) -> Response:
|
|
36
|
+
rid = request.headers.get(HEADER_X_REQUEST_ID, str(uuid.uuid4()))
|
|
37
|
+
token = request_id_var.set(rid)
|
|
38
|
+
try:
|
|
39
|
+
response = await call_next(request)
|
|
40
|
+
response.headers[HEADER_X_REQUEST_ID] = rid
|
|
41
|
+
return response
|
|
42
|
+
finally:
|
|
43
|
+
request_id_var.reset(token)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Prometheus utilities (ASGI app + build-info Info metric).
|
|
2
|
+
|
|
3
|
+
These helpers wrap ``prometheus_client`` so consumers don't need to
|
|
4
|
+
import the SDK directly. Both functions are lazy — ``prometheus_client``
|
|
5
|
+
is only imported when the helper is called, so the package can be
|
|
6
|
+
installed without the ``[prometheus]`` extra and these helpers will
|
|
7
|
+
simply raise at call time.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_prometheus_app():
|
|
14
|
+
"""Return the ASGI app for ``/metrics`` via ``make_asgi_app()``.
|
|
15
|
+
|
|
16
|
+
The returned app should typically be wrapped by a service-specific
|
|
17
|
+
auth middleware (e.g. juniper-data's SEC-16 ``MetricsAuthMiddleware``
|
|
18
|
+
IP allowlist) before being mounted.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
ASGI application serving Prometheus metrics in the standard
|
|
22
|
+
scrape format.
|
|
23
|
+
"""
|
|
24
|
+
from prometheus_client import make_asgi_app
|
|
25
|
+
|
|
26
|
+
return make_asgi_app()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def set_build_info(namespace: str, version: str) -> None:
|
|
30
|
+
"""Register a ``<namespace>_build`` Info metric with version + python_version.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
namespace: Metric namespace prefix (e.g. ``"juniper_data"``).
|
|
34
|
+
version: Application version string.
|
|
35
|
+
"""
|
|
36
|
+
from prometheus_client import Info
|
|
37
|
+
|
|
38
|
+
python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
|
39
|
+
info = Info(f"{namespace}_build", f"Build information for {namespace.replace('_', '-')} service")
|
|
40
|
+
info.info({"version": version, "python_version": python_version})
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Sentry initialization with reconciled signature and SEC-10 hook.
|
|
2
|
+
|
|
3
|
+
This module exposes the **superset** signature originally introduced
|
|
4
|
+
in juniper-data for SEC-10 (security review of Sentry forwarding) and
|
|
5
|
+
makes it the cross-service standard:
|
|
6
|
+
|
|
7
|
+
- ``send_pii`` is keyword-only, defaulting to ``False``.
|
|
8
|
+
- A ``before_send`` hook always scrubs ``X-API-Key``, ``Authorization``,
|
|
9
|
+
and ``Cookie`` headers from outbound events regardless of
|
|
10
|
+
``send_default_pii`` — defense in depth so that future Sentry SDK
|
|
11
|
+
changes (replay, custom integrations) cannot leak credentials.
|
|
12
|
+
|
|
13
|
+
``configure_sentry`` is a no-op when ``dsn`` is ``None`` or the empty
|
|
14
|
+
string, so consumers can call it unconditionally during startup.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
DEFAULT_SENTRY_TRACES_SAMPLE_RATE = 0.1
|
|
18
|
+
|
|
19
|
+
# SEC-10: header names that may carry API keys or session identifiers.
|
|
20
|
+
_SENTRY_SENSITIVE_HEADERS = frozenset({"x-api-key", "authorization", "cookie"})
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _strip_sensitive_headers(event, hint): # noqa: ARG001 — Sentry hook signature
|
|
24
|
+
"""Redact sensitive request headers in a Sentry event with ``[Filtered]``.
|
|
25
|
+
|
|
26
|
+
Sentry calls this via ``before_send`` for every outbound event.
|
|
27
|
+
The filter only rewrites keys in :data:`_SENTRY_SENSITIVE_HEADERS`
|
|
28
|
+
so non-sensitive diagnostic headers (user-agent, trace IDs, etc.)
|
|
29
|
+
still reach Sentry unchanged.
|
|
30
|
+
"""
|
|
31
|
+
request_data = event.get("request", {}) if isinstance(event, dict) else {}
|
|
32
|
+
headers = request_data.get("headers", {}) if isinstance(request_data, dict) else {}
|
|
33
|
+
if isinstance(headers, dict):
|
|
34
|
+
for key in list(headers.keys()):
|
|
35
|
+
if key.lower() in _SENTRY_SENSITIVE_HEADERS:
|
|
36
|
+
headers[key] = "[Filtered]"
|
|
37
|
+
return event
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def configure_sentry(
|
|
41
|
+
dsn: str | None,
|
|
42
|
+
service_name: str,
|
|
43
|
+
version: str,
|
|
44
|
+
*,
|
|
45
|
+
send_pii: bool = False,
|
|
46
|
+
traces_sample_rate: float = DEFAULT_SENTRY_TRACES_SAMPLE_RATE,
|
|
47
|
+
) -> None:
|
|
48
|
+
"""Initialize Sentry. No-op when ``dsn`` is None or empty.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
dsn: Sentry DSN URL. Pass ``None`` or empty string to skip
|
|
52
|
+
initialization.
|
|
53
|
+
service_name: Service name for Sentry environment tag (used in
|
|
54
|
+
the ``release`` field as ``"<service_name>@<version>"``).
|
|
55
|
+
version: Application version string.
|
|
56
|
+
send_pii: Whether to send default PII (IP addresses, etc.) to
|
|
57
|
+
Sentry. **Defaults to False** (SEC-10); operators opt in
|
|
58
|
+
explicitly via per-service env vars when they accept the
|
|
59
|
+
risk. The ``before_send`` filter still scrubs sensitive
|
|
60
|
+
headers regardless of this flag.
|
|
61
|
+
traces_sample_rate: Fraction of transactions to send (0.0–1.0).
|
|
62
|
+
"""
|
|
63
|
+
if not dsn:
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
import sentry_sdk
|
|
67
|
+
|
|
68
|
+
sentry_sdk.init(
|
|
69
|
+
dsn=dsn,
|
|
70
|
+
send_default_pii=send_pii,
|
|
71
|
+
enable_logs=True,
|
|
72
|
+
traces_sample_rate=traces_sample_rate,
|
|
73
|
+
release=f"{service_name}@{version}",
|
|
74
|
+
before_send=_strip_sensitive_headers,
|
|
75
|
+
)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: juniper-observability
|
|
3
|
+
Version: 0.1.0a0
|
|
4
|
+
Summary: Shared observability primitives (health models, logging, middleware, Sentry, Prometheus) for the Juniper ML platform
|
|
5
|
+
Author: Paul Calnon
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/pcalnon/juniper-ml
|
|
8
|
+
Project-URL: Repository, https://github.com/pcalnon/juniper-ml
|
|
9
|
+
Project-URL: Issues, https://github.com/pcalnon/juniper-ml/issues
|
|
10
|
+
Keywords: juniper,observability,health,metrics,logging
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Classifier: Topic :: System :: Monitoring
|
|
19
|
+
Requires-Python: >=3.12
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: pydantic>=2.0
|
|
22
|
+
Requires-Dist: starlette>=0.27
|
|
23
|
+
Provides-Extra: prometheus
|
|
24
|
+
Requires-Dist: prometheus-client>=0.20.0; extra == "prometheus"
|
|
25
|
+
Provides-Extra: sentry
|
|
26
|
+
Requires-Dist: sentry-sdk[fastapi]>=2.0.0; extra == "sentry"
|
|
27
|
+
Provides-Extra: all
|
|
28
|
+
Requires-Dist: juniper-observability[prometheus,sentry]; extra == "all"
|
|
29
|
+
Provides-Extra: test
|
|
30
|
+
Requires-Dist: pytest>=8.0; extra == "test"
|
|
31
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "test"
|
|
32
|
+
Requires-Dist: pytest-cov>=5.0; extra == "test"
|
|
33
|
+
Requires-Dist: httpx>=0.27; extra == "test"
|
|
34
|
+
Requires-Dist: fastapi>=0.110; extra == "test"
|
|
35
|
+
Requires-Dist: prometheus-client>=0.20.0; extra == "test"
|
|
36
|
+
Requires-Dist: sentry-sdk[fastapi]>=2.0.0; extra == "test"
|
|
37
|
+
|
|
38
|
+
# juniper-observability
|
|
39
|
+
|
|
40
|
+
Shared observability primitives for the Juniper ML platform.
|
|
41
|
+
|
|
42
|
+
## What's in here
|
|
43
|
+
|
|
44
|
+
- **Health models** (`DependencyStatus`, `ReadinessResponse`) — Pydantic models for the standard `/v1/health/ready` response shape used by every Juniper server.
|
|
45
|
+
- **Probe utility** (`probe_dependency`) — synchronous HTTP health-check helper.
|
|
46
|
+
- **Logging** (`JuniperJsonFormatter`, `configure_logging`) — structured-JSON logging with `request_id` propagation.
|
|
47
|
+
- **Middleware** (`RequestIdMiddleware`, `PrometheusMiddleware`) — Starlette middlewares applied by every Juniper server. The Prometheus middleware bounds label cardinality per the R1.1 contract.
|
|
48
|
+
- **Constants** (`UNMATCHED_ENDPOINT_LABEL`, `READINESS_HEADER`, `LIVENESS_TICK_BUDGET_MS`, `LIVENESS_STALENESS_SECONDS`) — pinned values from the R1.1, R1.2, and R1.3 cross-service contracts.
|
|
49
|
+
- **Prometheus utilities** (`get_prometheus_app`, `set_build_info`).
|
|
50
|
+
- **Sentry init** (`configure_sentry`) — with the SEC-10 `before_send` hook always installed.
|
|
51
|
+
|
|
52
|
+
## Install
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install juniper-observability # core only
|
|
56
|
+
pip install "juniper-observability[prometheus]" # + Prometheus middleware/utilities
|
|
57
|
+
pip install "juniper-observability[sentry]" # + Sentry init
|
|
58
|
+
pip install "juniper-observability[all]" # everything
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Per-service metrics stay in each repo
|
|
62
|
+
|
|
63
|
+
This package intentionally exposes only **cross-cutting** observability infrastructure. Service-specific metric definitions (training-loop counters, dataset-gen histograms, websocket gauges, etc.) live in their owning repo and use the lazy-init pattern with `prometheus_client` directly.
|
|
64
|
+
|
|
65
|
+
## Design + migration
|
|
66
|
+
|
|
67
|
+
See [`notes/code-review/METRICS_MONITORING_R2.1_SHARED_OBSERVABILITY_DESIGN_2026-04-28.md`](../notes/code-review/METRICS_MONITORING_R2.1_SHARED_OBSERVABILITY_DESIGN_2026-04-28.md) in the parent juniper-ml repo for the full design and the 5-PR migration sequence.
|
|
68
|
+
|
|
69
|
+
## License
|
|
70
|
+
|
|
71
|
+
MIT — see [LICENSE](../LICENSE).
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
juniper_observability/__init__.py,sha256=Bw5zDckWuQKJ1SQxQ0NDLl12sDOyN3YRjhNGDHBuUUw,2234
|
|
2
|
+
juniper_observability/_version.py,sha256=pATGvX6deSACs_N0gIf_2a_XBGNgiJ5WMLYlhYxCgXM,79
|
|
3
|
+
juniper_observability/constants.py,sha256=KtwgEC-k8xOhMdxYtcMYzlUFzfhStjJ7_SosP_2tSkE,1957
|
|
4
|
+
juniper_observability/logging.py,sha256=Lpl52liQOjFc-AkxbPBcRwRbdxqG06hcLjlh0M7n25I,3060
|
|
5
|
+
juniper_observability/prometheus.py,sha256=D9sMKQMbyolflqLNWK2dsbqr0N4FviPgc15Rqe_p8O8,1442
|
|
6
|
+
juniper_observability/sentry.py,sha256=N6MISGVKmeqF-nnVhN2iYvxR3U0wftVsATrLetVjgxQ,2937
|
|
7
|
+
juniper_observability/health/__init__.py,sha256=NgPynq4XCsrkHyGGwXSYHs-g8ugcjIpXTXtbyyZNH2I,487
|
|
8
|
+
juniper_observability/health/models.py,sha256=K9tI2bZir7Hx9orx2Mf068Hy2KgKYMNFSODI6GZySik,1521
|
|
9
|
+
juniper_observability/health/probe.py,sha256=aEB23s-1oK6Xykx2Xfuuj3s6CoXaHt4WQLpkh4YnJAU,2114
|
|
10
|
+
juniper_observability/middleware/__init__.py,sha256=lIuBq-F4FuySH-pXTyp7UgZy3zNrvcinQ0WXWBLfJmc,306
|
|
11
|
+
juniper_observability/middleware/prometheus.py,sha256=SO6pEt3mqfuZxNh_YMQ64OYIMX0KIhH5UJY6ERb-YXI,2972
|
|
12
|
+
juniper_observability/middleware/request_id.py,sha256=AKklsfGHLd-bpOLQupUvQWknfueDHnH8nh5uNjXz1T0,1504
|
|
13
|
+
juniper_observability-0.1.0a0.dist-info/METADATA,sha256=geWg9wMcDK56jykc1jXXpLybmERz_1Vg30wX6sdH8cA,3652
|
|
14
|
+
juniper_observability-0.1.0a0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
15
|
+
juniper_observability-0.1.0a0.dist-info/top_level.txt,sha256=EyZY0RUKHX85FvFXimRWyIF-GX9S0JKGhKudkj-nicY,22
|
|
16
|
+
juniper_observability-0.1.0a0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
juniper_observability
|