PyPI - omkit - Versions diffs - 0.0.2__py3-none-any.whl - Mend

omkit 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

omkit/__init__.py +18 -0
omkit/cleanup.py +62 -0
omkit/config.py +60 -0
omkit/cost.py +78 -0
omkit/data/__init__.py +33 -0
omkit/dbpool.py +139 -0
omkit/encryption.py +58 -0
omkit/eventbus.py +360 -0
omkit/events.py +23 -0
omkit/health.py +66 -0
omkit/http.py +82 -0
omkit/internal/__init__.py +7 -0
omkit/internal/crypto.py +17 -0
omkit/jobqueue/__init__.py +28 -0
omkit/jobqueue/envelope.py +116 -0
omkit/jobqueue/streaq.py +267 -0
omkit/logging.py +77 -0
omkit/metrics.py +41 -0
omkit/model_lifecycle.py +192 -0
omkit/platform/__init__.py +18 -0
omkit/providers/__init__.py +11 -0
omkit/providers/base.py +76 -0
omkit/providers/registry.py +263 -0
omkit/py.typed +0 -0
omkit/quota.py +186 -0
omkit/resilience.py +122 -0
omkit/sanitize.py +122 -0
omkit/security/__init__.py +28 -0
omkit/security/events.py +79 -0
omkit/sessions.py +301 -0
omkit/settings.py +348 -0
omkit/sync_notifier.py +110 -0
omkit/tenant.py +271 -0
omkit/tracing.py +80 -0
omkit/transport/__init__.py +29 -0
omkit/valkey.py +45 -0
omkit-0.0.2.dist-info/METADATA +29 -0
omkit-0.0.2.dist-info/RECORD +40 -0
omkit-0.0.2.dist-info/WHEEL +5 -0
omkit-0.0.2.dist-info/top_level.txt +1 -0

omkit/jobqueue/envelope.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""packages/omur-sdk/omkit/jobqueue/envelope.py — Cross-SDK envelope contract for job-queue payloads.
+Every task enqueued via streaq (Python) or Asynq (Go) is wrapped in this
+envelope. Workers unwrap on receive, validate, and run the handler under the
+tenant's RLS scope.
+exports: ENVELOPE_VERSION | class InvalidEnvelopeError | class Envelope | wrap(tenant_id, payload) | unwrap(data)
+rules:   The Envelope class must maintain strict tenant isolation and never allow cross-tenant data leakage. All envelope validation must be immutable and deterministic to ensure consistent task processing across distributed workers. The wrap/unwrap functions must handle all serialization edge cases including nested data structures and preserve original payload integrity during transformation.
+agent:   ollama/qwen3-coder:latest | ollama | 2026-05-01 | codedna-cli | initial CodeDNA annotation pass
+message:
+"""
+from __future__ import annotations
+import json
+import uuid
+from typing import Any
+from pydantic import BaseModel, Field, ValidationError, field_validator
+ENVELOPE_VERSION = 1
+class InvalidEnvelopeError(ValueError):
+    """Envelope failed validation. Workers should dead-letter (no retry)."""
+class Envelope(BaseModel):
+    """Tenant-scoped task envelope.
+    `payload` is opaque — handlers parse it into their own pydantic model.
+    Cross-SDK contract: matches Go's packages/omur-go-sdk/jobqueue/Envelope
+    field-for-field. Empty payloads and missing version keys are rejected by
+    both sides — wrap()/Wrap() produce envelopes that round-trip cleanly
+    between Python and Go workers.
+    """
+    model_config = {"frozen": True, "extra": "forbid"}
+    # `version` is required — no default. Go's Unwrap rejects envelopes
+    # with version==0 (missing field zero-value), so the Python side must
+    # reject the missing-key case symmetrically.
+    version: int
+    tenant_id: str
+    payload: dict[str, Any]
+    @field_validator("tenant_id")
+    @classmethod
+    def _validate_tenant(cls, v: str) -> str:
+        try:
+            uuid.UUID(v)
+        except (ValueError, AttributeError, TypeError) as exc:
+            raise ValueError(f"tenant_id not a valid uuid: {v!r}") from exc
+        return v
+    @field_validator("version")
+    @classmethod
+    def _validate_version(cls, v: int) -> int:
+        if v < 1:
+            raise ValueError(f"envelope version must be >= 1, got {v}")
+        if v > ENVELOPE_VERSION:
+            raise ValueError(
+                f"unsupported envelope version {v} (max {ENVELOPE_VERSION})"
+            )
+        return v
+    @field_validator("payload")
+    @classmethod
+    def _validate_payload(cls, v: dict[str, Any]) -> dict[str, Any]:
+        if not v:
+            raise ValueError(
+                "payload must not be empty — Go workers dead-letter empty payloads"
+            )
+        return v
+def wrap(tenant_id: str, payload: dict[str, Any]) -> bytes:
+    """Build an envelope and serialize to JSON bytes for streaq enqueue.
+    Raises InvalidEnvelopeError if tenant_id is not a UUID.
+    Rules:   tenant_id must be a valid UUID string, otherwise InvalidEnvelopeError is raised
+    """
+    try:
+        env = Envelope(
+            version=ENVELOPE_VERSION,
+            tenant_id=tenant_id,
+            payload=payload,
+        )
+    except ValidationError as exc:
+        raise InvalidEnvelopeError(str(exc)) from exc
+    return env.model_dump_json().encode("utf-8")
+def unwrap(data: bytes | str | dict[str, Any]) -> Envelope:
+    """Parse and validate inbound envelope.
+    Accepts raw JSON bytes/str or a pre-parsed dict (streaq sometimes hands
+    handlers the decoded payload directly). Raises InvalidEnvelopeError on
+    any validation failure — callers must dead-letter, not retry.
+    Rules:   Input must be valid JSON bytes/str or a pre-parsed dict. If pre-parsed dict is provided, it must already be validated and contain the expected envelope structure. The function raises InvalidEnvelopeError for any validation failure, which should be handled by dead-lettering rather than retrying.
+    """
+    if isinstance(data, (bytes, str)):
+        try:
+            obj = json.loads(data)
+        except json.JSONDecodeError as exc:
+            raise InvalidEnvelopeError(f"envelope not valid json: {exc}") from exc
+    else:
+        obj = data
+    if not isinstance(obj, dict):
+        raise InvalidEnvelopeError(f"envelope must be a json object, got {type(obj).__name__}")
+    try:
+        return Envelope.model_validate(obj)
+    except ValidationError as exc:
+        raise InvalidEnvelopeError(str(exc)) from exc

omkit/jobqueue/streaq.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""packages/omur-sdk/omkit/jobqueue/streaq.py — streaq integration for Omur Python services.
+Wraps the streaq Worker with the SDK's tenant + envelope contract so all
+Python services have the same ergonomics as the Go-side `omkit.jobqueue`
+helpers (which front Asynq).
+Public surface:
+    make_worker(redis_url, queue_name, ...)  -> streaq.Worker
+    tenant_middleware                         -> streaq middleware factory
+    enqueue(task, tenant_id, payload, ...)    -> shorthand for envelope-wrapped enqueue
+    mount_streaq_ui(app, worker, prefix=...)  -> mount the FastAPI UI router
+    StreaqPromCollector(worker)               -> prometheus.Collector for worker.counters
+Conventions:
+- Workers serialize tasks as JSON. Required because (a) cross-language
+  round-trip with Go workers requires JSON and (b) the streaq UI renders
+  JSON arguments inline. streaq's default binary serializer is replaced.
+  Callers MUST pass JSON-safe payloads.
+- Every task is tenant-scoped. The first positional argument of every
+  registered task is the envelope dict; `tenant_middleware` unwraps it,
+  binds `tenant.current()`, and passes the inner payload to the handler.
+- Defaults match the SDK contract documented in
+  `docs/superpowers/specs/2026-04-29-job-queue-design.md`:
+  concurrency=4, max_tries=3, task_timeout=300s, ttl=48h.
+exports: DEFAULT_CONCURRENCY | DEFAULT_MAX_TRIES | DEFAULT_TIMEOUT_SECONDS | DEFAULT_TTL | make_worker(redis_url, queue_name) | tenant_middleware(next_handler) | enqueue(task, tenant_id, payload) | mount_streaq_ui(app, worker) | _STREAQ_COUNTER_KEYS | class StreaqPromCollector
+rules:   The module requires all Redis-based job queue operations to be thread-safe and idempotent, as it's designed for high-concurrency worker environments where tasks may be retried or processed by multiple workers simultaneously.
+agent:   ollama/qwen3-coder:latest | ollama | 2026-05-01 | codedna-cli | initial CodeDNA annotation pass
+message:
+"""
+from __future__ import annotations
+import json
+import logging
+from datetime import timedelta
+from typing import Any, Awaitable, Callable
+from omkit import tenant
+from omkit.jobqueue.envelope import (
+    Envelope,
+    InvalidEnvelopeError,
+    unwrap,
+    wrap,
+)
+log = logging.getLogger(__name__)
+DEFAULT_CONCURRENCY = 4
+DEFAULT_MAX_TRIES = 3
+DEFAULT_TIMEOUT_SECONDS = 300
+DEFAULT_TTL = timedelta(hours=48)
+def _json_serializer(obj: Any) -> bytes:
+    return json.dumps(obj, default=str, separators=(",", ":")).encode("utf-8")
+def _json_deserializer(data: bytes) -> Any:
+    return json.loads(data)
+def make_worker(
+    redis_url: str,
+    queue_name: str,
+    *,
+    concurrency: int = DEFAULT_CONCURRENCY,
+    max_tries: int = DEFAULT_MAX_TRIES,
+    task_timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
+    ttl: timedelta = DEFAULT_TTL,
+    handle_signals: bool = False,
+    **worker_kwargs: Any,
+):
+    """Construct a streaq Worker pre-configured for Omur services.
+    `redis_url` accepts the same forms streaq does (`redis://valkey:6379/0`
+    or `rediss://…`). For Valkey with password, encode it in the URL:
+    `redis://:PASSWORD@valkey:6379/0`.
+    `handle_signals=False` because services run streaq alongside an HTTP
+    server and own their own SIGTERM handler — letting streaq install one
+    deadlocks shutdown. The lifespan-context-manager pattern stops the
+    worker cleanly on app shutdown instead.
+    `max_tries` and `task_timeout_seconds` set the worker-level defaults;
+    individual `@worker.task(...)` decorators may override.
+    Extra keyword arguments are forwarded to `streaq.Worker(...)` for
+    advanced cases (sentinel/cluster, custom serializers, etc.).
+    Rules:   The `redis_url` must be a valid Redis/Valkey URL, including password-encoded URLs if required. The `handle_signals` parameter should be set to `True` only if the worker is intended to handle OS signals for graceful shutdown.
+    """
+    import streaq
+    return streaq.Worker(
+        redis_url=redis_url,
+        queue_name=queue_name,
+        concurrency=concurrency,
+        handle_signals=handle_signals,
+        serializer=_json_serializer,
+        deserializer=_json_deserializer,
+        **worker_kwargs,
+    )
+# ─────────────────────────────────────────────────────────────────────
+# Tenant + envelope middleware
+# ─────────────────────────────────────────────────────────────────────
+def tenant_middleware(next_handler: Callable[..., Awaitable[Any]]) -> Callable[..., Awaitable[Any]]:
+    """streaq middleware. Unwraps the envelope from the first positional
+    arg, binds `tenant.current()`, then calls `next_handler(payload, …)`.
+    Register on the Worker:
+        worker.middleware(tenant_middleware)
+    And then write tasks as:
+        @worker.task(timeout=600)
+        async def parse(payload: dict) -> None:
+            doc_id = payload["doc_id"]
+            assert tenant.current() is not None
+    `InvalidEnvelopeError` raised here propagates as a regular exception —
+    streaq counts it as a failure and respects `max_tries`. Callers MUST
+    ensure all enqueues go through `enqueue()` below so envelopes are
+    well-formed; a ValidationError at the worker boundary indicates a
+    bug, not a transient fault.
+    Rules:   The `next_handler` function must accept the unwrapped payload as its first positional argument and must be used in conjunction with a worker that has `tenant_middleware` registered to ensure tenant context is correctly set.
+    """
+    async def wrapper(*args: Any, **kwargs: Any) -> Any:
+        if not args:
+            raise InvalidEnvelopeError("task called with no positional args")
+        try:
+            env: Envelope = unwrap(args[0])
+        except InvalidEnvelopeError:
+            log.error("streaq.invalid_envelope")
+            raise
+        with tenant.bind(env.tenant_id):
+            return await next_handler(env.payload, *args[1:], **kwargs)
+    return wrapper
+# ─────────────────────────────────────────────────────────────────────
+# Enqueue helper
+# ─────────────────────────────────────────────────────────────────────
+def enqueue(task: Any, tenant_id: str, payload: dict[str, Any], **opts: Any) -> Awaitable[Any]:
+    """Enqueue a streaq task with an envelope-wrapped payload.
+    `task` is the result of `@worker.task(...)`. Returns the awaitable
+    streaq returns from `task.enqueue(...)`.
+    Caller is responsible for `await`-ing.
+    Rules:   The `task` must be a valid streaq task created using `@worker.task(...)`; otherwise, `task.enqueue(...)` will fail. The `tenant_id` must be a valid identifier for the tenant context.
+    """
+    envelope_bytes = wrap(tenant_id, payload)
+    envelope_dict = json.loads(envelope_bytes)
+    return task.enqueue(envelope_dict, **opts)
+# ─────────────────────────────────────────────────────────────────────
+# FastAPI UI mount
+# ─────────────────────────────────────────────────────────────────────
+def mount_streaq_ui(app: Any, worker: Any, *, prefix: str = "/queue/ui") -> None:
+    """Mount streaq's built-in admin UI at `prefix`.
+    streaq's UI router uses a FastAPI dependency `get_worker` that raises
+    412 by default. Override it to return our worker so the UI can read
+    queue state, results, and counters. The route is otherwise open —
+    Caddy's oauth2-proxy forward-auth gates access (Zitadel SSO).
+    Rules:   The `app` must be a FastAPI application instance, and the `worker` must be a properly initialized streaq worker. The `prefix` should not conflict with existing routes in the application.
+    """
+    try:
+        from streaq.ui.deps import get_worker
+        from streaq.ui.tasks import router as tasks_router
+    except ImportError as exc:
+        raise RuntimeError(
+            "streaq UI requires `streaq[web]` extra (fastapi/jinja2/uvicorn)"
+        ) from exc
+    app.dependency_overrides[get_worker] = lambda: worker
+    app.include_router(tasks_router, prefix=prefix)
+# ─────────────────────────────────────────────────────────────────────
+# Prometheus bridge
+# ─────────────────────────────────────────────────────────────────────
+_STREAQ_COUNTER_KEYS = (
+    "aborted",
+    "completed",
+    "failed",
+    "relinquished",
+    "retried",
+    "running",
+)
+class StreaqPromCollector:
+    """Prometheus collector that lazily reads `worker.counters` on every
+    scrape and exports gauges:
+        streaq_worker_aborted{queue}
+        streaq_worker_completed{queue}
+        streaq_worker_failed{queue}
+        streaq_worker_relinquished{queue}
+        streaq_worker_retried{queue}
+        streaq_worker_running{queue}
+    Register once per process:
+        from prometheus_client import REGISTRY
+        REGISTRY.register(StreaqPromCollector(worker))
+    All metrics are gauges (counters reset on worker restart, which is
+    fine — alert dashboards already de-dupe on `service` instance).
+    """
+    def __init__(self, worker: Any) -> None:
+        self._worker = worker
+    def describe(self) -> Any:
+        return iter([])
+    def collect(self) -> Any:
+        """
+        Rules:   The `_worker` object must have a `queue_name` attribute and a `counters` dictionary with keys matching `_STREAQ_COUNTER_KEYS`; otherwise, the Prometheus metrics will not be correctly populated.
+        """
+        from prometheus_client.core import GaugeMetricFamily
+        queue = getattr(self._worker, "queue_name", "default")
+        counters = getattr(self._worker, "counters", {}) or {}
+        for key in _STREAQ_COUNTER_KEYS:
+            g = GaugeMetricFamily(
+                f"streaq_worker_{key}",
+                f"streaq worker {key} count (since process start)",
+                labels=["queue"],
+            )
+            g.add_metric([queue], float(counters.get(key, 0)))
+            yield g
+__all__ = [
+    "DEFAULT_CONCURRENCY",
+    "DEFAULT_MAX_TRIES",
+    "DEFAULT_TIMEOUT_SECONDS",
+    "DEFAULT_TTL",
+    "StreaqPromCollector",
+    "enqueue",
+    "make_worker",
+    "mount_streaq_ui",
+    "tenant_middleware",
+]

omkit/logging.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""packages/omur-sdk/omkit/logging.py — Shared structlog configuration for all Omur services.
+Default output is JSON, suitable for production log aggregation. Set
+``LOG_FORMAT=console`` to switch to the human-readable renderer during dev.
+Usage:
+    from omkit.logging import configure_logging
+    configure_logging("spine")  # Call once at startup, before get_logger()
+exports: configure_logging(service_name)
+rules:   The logging module must maintain backward compatibility with existing log format configurations and service name resolution patterns across all SDK versions. The module cannot introduce breaking changes to its public API or alter the default logging behavior without explicit versioned migration paths. All logging configurations must remain thread-safe and support concurrent service initialization without race conditions.
+agent:   ollama/qwen3-coder:latest | ollama | 2026-05-01 | codedna-cli | initial CodeDNA annotation pass
+message:
+"""
+from __future__ import annotations
+import os
+import structlog
+def configure_logging(service_name: str) -> None:
+    """Configure structlog with ISO timestamps, log level, contextvars, and
+    a renderer selected by the ``LOG_FORMAT`` environment variable.
+    Every log record emitted after this call carries a ``service`` field set
+    to ``service_name`` (unless the call site overrides it explicitly).
+    ``LOG_FORMAT`` values:
+        * ``json`` (default) — JSONRenderer for production / log aggregation.
+        * ``console`` — ConsoleRenderer for local development.
+    Rules:   LOG_FORMAT environment variable must be either 'json' or 'console' (case insensitive), with 'json' as default. Future developers must ensure these specific values are handled or risk runtime errors.
+    """
+    fmt = os.environ.get("LOG_FORMAT", "json").lower()
+    if fmt == "console":
+        renderer = structlog.dev.ConsoleRenderer()
+    else:
+        renderer = structlog.processors.JSONRenderer()
+    def _add_service(_logger, _method, event_dict):
+        event_dict.setdefault("service", service_name)
+        return event_dict
+    def _add_correlation(_logger, _method, event_dict):
+        # Pull tenant + request_id off the SDK-managed contextvars so every
+        # log record is auto-tagged with cross-service correlation fields.
+        # Lazy import keeps this module decoupled from tenant.
+        try:
+            from omkit.tenant import current_or_none, request_id
+        except Exception:
+            return event_dict
+        try:
+            tid = current_or_none()
+            rid = request_id()
+        except Exception:
+            return event_dict
+        if tid:
+            event_dict.setdefault("tenant_id", tid)
+        if rid:
+            event_dict.setdefault("request_id", rid)
+        return event_dict
+    structlog.configure(
+        processors=[
+            structlog.contextvars.merge_contextvars,
+            _add_service,
+            _add_correlation,
+            structlog.stdlib.add_log_level,
+            structlog.processors.TimeStamper(fmt="iso"),
+            renderer,
+        ],
+        wrapper_class=structlog.make_filtering_bound_logger(0),
+        context_class=dict,
+        logger_factory=structlog.PrintLoggerFactory(),
+        cache_logger_on_first_use=True,
+    )

omkit/metrics.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""Shared Prometheus metrics wiring for FastAPI services.
+Usage:
+    from omkit.metrics import mount_metrics
+    mount_metrics(app, "my-service")  # exposes /metrics, instruments all routes
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from fastapi import FastAPI
+def mount_metrics(app: "FastAPI", service_name: str) -> None:
+    """Wire prometheus-fastapi-instrumentator with default labels and a /metrics endpoint.
+    Idempotent: calling twice on the same app is a no-op.
+    Rules:   The function requires the 'prometheus-fastapi-instrumentator' package to be installed, and the app parameter must be a FastAPI instance that supports the '_omkit_metrics_mounted' attribute for idempotency checks.
+    """
+    try:
+        from prometheus_fastapi_instrumentator import Instrumentator
+    except ImportError as e:
+        raise ImportError(
+            "prometheus-fastapi-instrumentator is required. "
+            "Install with: pip install omkit[metrics]"
+        ) from e
+    if getattr(app, "_omkit_metrics_mounted", False):
+        return
+    Instrumentator(
+        should_group_status_codes=True,
+        should_ignore_untemplated=True,
+        should_respect_env_var=False,
+        excluded_handlers=["/metrics", "/health", "/ready"],
+    ).instrument(app, metric_namespace="omur", metric_subsystem=service_name).expose(app)
+    app._omkit_metrics_mounted = True

omkit/model_lifecycle.py ADDED Viewed

@@ -0,0 +1,192 @@
+"""On-demand model loading with TTL-based idle unloading.
+ModelLifecycle is an abstract base for lazy model loading. ModelRegistry
+manages a set of lifecycles and reaps idle ones. Thread-safe via per-
+lifecycle async lock.
+"""
+from __future__ import annotations
+import abc
+import asyncio
+import gc
+import time
+from typing import Any
+import structlog
+from prometheus_client import Histogram, Counter, Gauge
+log = structlog.get_logger()
+MODEL_LOAD_DURATION = Histogram(
+    "model_load_duration_seconds",
+    "Time to load a model into memory",
+    ["model"],
+    buckets=[1, 2, 5, 10, 15, 20, 30, 60],
+)
+MODEL_LOAD_ERRORS = Counter(
+    "model_load_errors_total",
+    "Number of model load failures",
+    ["model"],
+)
+MODEL_UNLOAD_TOTAL = Counter(
+    "model_unload_total",
+    "Number of model unloads",
+    ["model", "reason"],
+)
+MODEL_LOADED = Gauge(
+    "model_loaded",
+    "Whether a model is currently loaded (1=yes, 0=no)",
+    ["model"],
+)
+class ModelLifecycle(abc.ABC):
+    """Abstract base for on-demand model loading with idle tracking."""
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self._model: Any = None
+        self._last_used: float = 0
+        self._lock = asyncio.Lock()
+    @abc.abstractmethod
+    def _do_load(self) -> Any:
+        """Load model into memory. Runs in thread executor. Return model object."""
+    @abc.abstractmethod
+    def _do_unload(self) -> None:
+        """Release model resources. Runs in thread executor."""
+    @property
+    def is_loaded(self) -> bool:
+        """
+        Rules:   none
+        """
+        return self._model is not None
+    @property
+    def model(self) -> Any:
+        return self._model
+    @property
+    def last_used(self) -> float:
+        return self._last_used
+    def touch(self) -> None:
+        """
+        Rules:   none
+        """
+        self._last_used = time.monotonic()
+    async def ensure_loaded(self) -> None:
+        """
+        Rules:   Model loading is async and uses a lock; concurrent calls may result in redundant loading if not properly synchronized.
+        """
+        async with self._lock:
+            if self._model is not None:
+                self._last_used = time.monotonic()
+                return
+            log.info("model.loading", model=self.name)
+            t0 = time.monotonic()
+            loop = asyncio.get_running_loop()
+            try:
+                self._model = await loop.run_in_executor(None, self._do_load)
+            except Exception:
+                MODEL_LOAD_ERRORS.labels(model=self.name).inc()
+                raise
+            duration = time.monotonic() - t0
+            self._last_used = time.monotonic()
+            MODEL_LOAD_DURATION.labels(model=self.name).observe(duration)
+            MODEL_LOADED.labels(model=self.name).set(1)
+            log.info("model.loaded", model=self.name, duration_s=round(duration, 2))
+    async def unload(self) -> None:
+        """
+        Rules:   Model unloading is async and uses a lock; calling unload on an already unloaded model is safe but does nothing.
+        """
+        async with self._lock:
+            if self._model is None:
+                return
+            log.info("model.unloading", model=self.name)
+            MODEL_LOADED.labels(model=self.name).set(0)
+            loop = asyncio.get_running_loop()
+            await loop.run_in_executor(None, self._do_unload)
+            self._model = None
+            self._last_used = 0
+            gc.collect()
+            log.info("model.unloaded", model=self.name)
+class ModelRegistry:
+    """Manages a set of ModelLifecycle instances with a shared reaper task."""
+    def __init__(self) -> None:
+        self._models: dict[str, ModelLifecycle] = {}
+        self._ttl: int = 300
+        self._reaper_task: asyncio.Task | None = None
+    def register(self, name: str, lifecycle: ModelLifecycle) -> None:
+        """
+        Rules:   none
+        """
+        self._models[name] = lifecycle
+    def status(self) -> dict[str, bool]:
+        """
+        Rules:   none
+        """
+        return {name: lc.is_loaded for name, lc in self._models.items()}
+    def set_ttl(self, ttl_seconds: int) -> None:
+        """
+        Rules:   none
+        """
+        self._ttl = ttl_seconds
+        log.info("registry.ttl_updated", ttl=ttl_seconds)
+    def start_reaper(self, ttl_seconds: int, sweep_interval: float = 30) -> None:
+        """
+        Rules:   Starting a new reaper task cancels any existing one; ensure the registry is not used concurrently during this operation.
+        """
+        self._ttl = ttl_seconds
+        if self._reaper_task and not self._reaper_task.done():
+            self._reaper_task.cancel()
+        self._reaper_task = asyncio.create_task(
+            self._reap_loop(sweep_interval), name="model-reaper"
+        )
+    def stop_reaper(self) -> None:
+        """
+        Rules:   none
+        """
+        if self._reaper_task and not self._reaper_task.done():
+            self._reaper_task.cancel()
+            self._reaper_task = None
+    async def unload_all(self) -> None:
+        """
+        Rules:   Unloading all models stops the reaper task and may cause a delay due to garbage collection and async I/O.
+        """
+        self.stop_reaper()
+        for name, lc in self._models.items():
+            if lc.is_loaded:
+                MODEL_UNLOAD_TOTAL.labels(model=name, reason="shutdown").inc()
+                await lc.unload()
+    async def _reap_loop(self, interval: float) -> None:
+        try:
+            while True:
+                await asyncio.sleep(interval)
+                if self._ttl <= 0:
+                    continue
+                now = time.monotonic()
+                for name, lc in list(self._models.items()):
+                    try:
+                        if lc.is_loaded and (now - lc.last_used) >= self._ttl:
+                            MODEL_UNLOAD_TOTAL.labels(model=name, reason="idle").inc()
+                            await lc.unload()
+                    except Exception:
+                        log.error("reaper.unload_failed", model=name, exc_info=True)
+        except asyncio.CancelledError:
+            pass