npm - @meridiona/meridian-darwin-arm64 - Versions diffs - 1.54.0 → 1.55.0 - Mend

@meridiona/meridian-darwin-arm64 1.54.0 → 1.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/.env.example +9 -0
package/VERSION +1 -1
package/bin/meridian +0 -0
package/package.json +1 -1
package/scripts/install-openobserve-daemon.sh +7 -3
package/services/agents/observability.py +188 -17
package/services/agents/run_task_linker_mlx.py +211 -41
package/services/agents/server.py +138 -71
package/services/agents/tests/test_run_task_linker_mlx.py +53 -0
package/services/observability/dashboards/classifier-debug.json +174 -0
package/services/pyproject.toml +1 -1
package/ui.tar.gz +0 -0

package/.env.example CHANGED Viewed

@@ -19,6 +19,15 @@
 # MLX_SERVER_HOST=127.0.0.1
 # MLX_SERVER_PORT=7823
+# Idle eviction for the MLX model. The model holds ~7 GB of Metal memory while
+# resident, but classification is bursty — so the server unloads it after this
+# many seconds idle and reloads on the next request (~3 s cold start). Default
+# 120s (aggressive: lightest idle footprint). Raise it to keep the model warm
+# longer; set 0 to disable eviction (pin the model in memory). Avoid values
+# below ~30s: if the TTL drops under the gap between sessions in a classification
+# burst, the model evicts and cold-reloads (~3 s) repeatedly mid-burst.
+# MLX_IDLE_EVICT_S=120
 # Dashboard (Next.js UI) port. Defaults to 3939. Change this and re-run
 # `meridian setup` to move the dashboard.
 # MERIDIAN_UI_PORT=3939

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.54.0
1	+ 1.55.0

package/bin/meridian CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@meridiona/meridian-darwin-arm64",
-  "version": "1.54.0",
+  "version": "1.55.0",
   "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
   "homepage": "https://github.com/Meridiona/meridian",
   "repository": {

package/scripts/install-openobserve-daemon.sh CHANGED Viewed

@@ -41,15 +41,19 @@ elif command -v openobserve >/dev/null 2>&1; then
 fi
 if [[ -z "${OO_BIN}" ]]; then
-    echo "→ OpenObserve binary not found — downloading v0.11.0 (last release with arm64 binary)..."
+    echo "→ OpenObserve binary not found — downloading v0.90.3..."
     _oo_arch="$(uname -m)"
     case "$_oo_arch" in
         arm64)  _oo_arch="arm64" ;;
         x86_64) _oo_arch="amd64" ;;
         *) echo "✗ Unsupported arch: $_oo_arch" >&2; exit 1 ;;
     esac
-    _oo_ver="v0.11.0"
-    _oo_url="https://github.com/openobserve/openobserve/releases/download/${_oo_ver}/openobserve-${_oo_ver}-darwin-${_oo_arch}.tar.gz"
+    # GitHub release assets were removed for recent versions; binaries now live on
+    # the official downloads host. Trace deep-linking (dashboard drilldown into a
+    # single trace's spans) needs a modern build, so we pin a current stable.
+    # KEEP IN SYNC: the same version is pinned in install.sh — bump both together.
+    _oo_ver="v0.90.3"
+    _oo_url="https://downloads.openobserve.ai/releases/openobserve/${_oo_ver}/openobserve-${_oo_ver}-darwin-${_oo_arch}.tar.gz"
     mkdir -p "${HOME}/.openobserve"
     if curl -fsSL -o "${HOME}/.openobserve/openobserve.tar.gz" "$_oo_url" \
         && tar -xzf "${HOME}/.openobserve/openobserve.tar.gz" -C "${HOME}/.openobserve" \

package/services/agents/observability.py CHANGED Viewed

@@ -4,7 +4,9 @@ A single `setup(agent_name)` call wires up:
   * an OTel `TracerProvider` with `service.name=agent_name`
   * a `BatchSpanProcessor` exporting OTLP/HTTP-protobuf spans to OpenObserve
-    (`MERIDIAN_OTLP_TRACES_ENDPOINT`, with Basic auth via `MERIDIAN_OO_AUTH`)
+  * a `LoggerProvider` + OTLP-logs handler so every `logging.LogRecord` is also
+    shipped to OpenObserve (correlated to the active span), mirroring the Rust
+    daemon's `OpenTelemetryTracingBridge`
   * W3C `TraceContextTextMapPropagator` as the global propagator so each
     agent can pick up the Rust daemon's `traceparent` and continue the trace
   * `LoggingInstrumentor` so every `logging.LogRecord` carries
@@ -13,6 +15,12 @@ A single `setup(agent_name)` call wires up:
     under `~/.meridian/logs/{agent_name}.jsonl` plus stderr — both ingestable
     by OpenObserve's log pipeline without further parsing.
+Export config (endpoint + Basic-auth credentials) is resolved from the SAME
+`~/.meridian/settings.json` the Rust daemon reads — `otlp_enabled`,
+`otlp_endpoint`, `oo_email`, `oo_password` — so the dashboard Settings page is
+the single source of truth for both processes. The legacy `MERIDIAN_OO_AUTH`
+env credential is deprecated and ignored, matching the daemon.
 `extract_parent_context(traceparent)` is the helper agents use to continue
 a span emitted by another process — typically the Rust ETL or another
 agent stage.
@@ -23,20 +31,26 @@ single-shot CLI paths funnel through the same module.
 """
 from __future__ import annotations
+import base64
+import json
 import logging
 import logging.handlers
 import os
 import sys
 from pathlib import Path
-from typing import Optional
+from typing import NamedTuple, Optional
 from opentelemetry import trace
+from opentelemetry._logs import set_logger_provider
 from opentelemetry.context import Context
+from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
     OTLPSpanExporter,
 )
 from opentelemetry.instrumentation.logging import LoggingInstrumentor
 from opentelemetry.propagate import set_global_textmap
+from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
+from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import BatchSpanProcessor
@@ -50,12 +64,125 @@ from pythonjsonlogger import jsonlogger
 DEFAULT_TRACES_ENDPOINT = "http://localhost:5080/api/default/v1/traces"
 DEFAULT_LOGS_ENDPOINT   = "http://localhost:5080/api/default/v1/logs"
 DEFAULT_LOG_DIR         = Path.home() / ".meridian" / "logs"
+# Single source of truth for OpenObserve export config — the SAME file the Rust
+# daemon reads (see `src/observability.rs::resolve_otlp_target`). Keeps the two
+# processes credential-aligned: the dashboard Settings page writes here and both
+# the daemon and this MLX server pick it up with no env plumbing.
+_SETTINGS_PATH = Path(
+    os.environ.get("MERIDIAN_SETTINGS_PATH")
+    or (Path.home() / ".meridian" / "settings.json")
+)
 _NOISY_LOGGERS = ("urllib3", "httpx", "httpcore", "openai", "botocore")
 # Track which agents have been configured so a second setup() call is a no-op.
 _INITIALISED: dict[str, trace.Tracer] = {}
 _PROCESS_SERVICE_NAME: str | None = None
+# Held so shutdown() can flush log records the same way it flushes spans.
+_LOGGER_PROVIDER: LoggerProvider | None = None
+# One-time guard so an export misconfiguration (enabled-but-no-creds, or a
+# schemeless endpoint) warns once per process instead of on every resolve.
+_WARNED_EXPORT_MISCONFIG: bool = False
+# ──────────────────────── OTLP target resolution ───────────────────────────────
+class _OtlpTarget(NamedTuple):
+    """Resolved OTLP export target: signal endpoints + Basic-auth header value."""
+    traces_endpoint: str
+    logs_endpoint: str
+    headers: dict[str, str]
+def _load_settings() -> dict[str, object]:
+    """Read `~/.meridian/settings.json`; empty dict if absent/unreadable."""
+    try:
+        with _SETTINGS_PATH.open(encoding="utf-8") as fh:
+            data = json.load(fh)
+        return data if isinstance(data, dict) else {}
+    except (OSError, ValueError):
+        return {}
+def _resolve_otlp_target() -> Optional[_OtlpTarget]:
+    """Mirror of the Rust daemon's `resolve_otlp_target()`.
+    Returns `None` (→ export disabled) when the toggle is off or credentials
+    are missing. Endpoint precedence: settings.json `otlp_endpoint` → the
+    `MERIDIAN_OTLP_TRACES_ENDPOINT`/`MERIDIAN_OTLP_ENDPOINT` env override →
+    the localhost default. Auth is `base64(oo_email:oo_password)` — settings.json
+    only; the legacy `MERIDIAN_OO_AUTH` env path is deprecated and ignored, the
+    same decision the daemon made.
+    """
+    global _WARNED_EXPORT_MISCONFIG
+    if os.environ.get("MERIDIAN_TRACING_DISABLED", "").lower() in ("1", "true", "yes"):
+        return None
+    settings = _load_settings()
+    if not settings.get("otlp_enabled"):
+        return None
+    # Resolve the endpoint up front so we can warn (not silently disable) when
+    # export is enabled but unusable. Precedence: settings → env → localhost.
+    configured = str(settings.get("otlp_endpoint") or "").strip()
+    env_endpoint = (
+        os.environ.get("MERIDIAN_OTLP_TRACES_ENDPOINT", "").strip()
+        or os.environ.get("MERIDIAN_OTLP_ENDPOINT", "").strip()
+    )
+    traces_endpoint = configured or env_endpoint or DEFAULT_TRACES_ENDPOINT
+    def _warn_once(msg: str, *args: object) -> None:
+        global _WARNED_EXPORT_MISCONFIG
+        if not _WARNED_EXPORT_MISCONFIG:
+            _WARNED_EXPORT_MISCONFIG = True
+            logging.getLogger(__name__).warning(msg, *args)
+    email = str(settings.get("oo_email") or "")
+    password = str(settings.get("oo_password") or "")
+    if not email or not password:
+        # otlp_enabled but no usable credentials → export OFF. Warn once so an
+        # env-only (MERIDIAN_OO_AUTH) install that predates the settings.json
+        # credential move doesn't go dark silently — mirrors the daemon, which
+        # also warns. MERIDIAN_OO_AUTH is no longer read here.
+        _warn_once(
+            "OpenObserve export enabled but oo_email/oo_password missing in %s — "
+            "traces+logs export DISABLED. Set credentials in the dashboard Settings "
+            "(the MERIDIAN_OO_AUTH env var is no longer used).",
+            _SETTINGS_PATH,
+        )
+        return None
+    # Guard against HTTP header injection / malformed user:password splits —
+    # matches the daemon's same-named check.
+    if any(c in email for c in "\r\n:") or any(c in password for c in "\r\n"):
+        return None
+    auth = base64.standard_b64encode(f"{email}:{password}".encode()).decode()
+    # Validate scheme — only http/https are valid OTLP transports. The daemon
+    # disables export + warns on a schemeless endpoint; mirror that exactly so the
+    # two processes don't disagree on whether export is on.
+    if not (traces_endpoint.startswith("http://") or traces_endpoint.startswith("https://")):
+        _warn_once(
+            "OTLP endpoint %r has no http/https scheme — export DISABLED.",
+            traces_endpoint,
+        )
+        return None
+    # OpenObserve serves logs at the sibling `…/v1/logs` path. Derive it from the
+    # traces endpoint by swapping the trailing signal segment so a custom host or
+    # base (incl. a trailing slash, e.g. `…/v1/traces/`) carries to BOTH signals —
+    # never silently fall back to localhost for logs while traces go remote.
+    t = traces_endpoint.rstrip("/")
+    if t.endswith("/v1/traces"):
+        logs_endpoint = t[: -len("/v1/traces")] + "/v1/logs"
+    elif t.endswith("/traces"):
+        logs_endpoint = t[: -len("/traces")] + "/logs"
+    elif "traces" in t:
+        logs_endpoint = t.rsplit("traces", 1)[0] + "logs"
+    else:
+        logs_endpoint = t + "/v1/logs"
+    return _OtlpTarget(traces_endpoint, logs_endpoint, {"Authorization": f"Basic {auth}"})
 # ──────────────────────── Public API ───────────────────────────────────────────
@@ -80,8 +207,13 @@ def setup(agent_name: str) -> trace.Tracer:
     if _PROCESS_SERVICE_NAME is None:
         _PROCESS_SERVICE_NAME = agent_name
-        _configure_tracing(agent_name)
-        _configure_logging(agent_name)
+        # Resolve the export target ONCE and pass it to both configurers — a
+        # second read could see a settings.json the dashboard rewrote mid-setup
+        # (TOCTOU), leaving traces enabled while logs resolve disabled (or with
+        # different creds/endpoint) in the same process.
+        target = _resolve_otlp_target()
+        _configure_tracing(agent_name, target)
+        _configure_logging(agent_name, target)
         logging.getLogger(agent_name).info(
             "observability initialised",
             extra={"service.name": agent_name},
@@ -105,6 +237,12 @@ def shutdown() -> None:
     if hasattr(provider, "shutdown"):
         provider.shutdown()
+    # Flush queued log records too — BatchLogRecordProcessor drops them on
+    # interpreter exit otherwise, the same hazard as spans.
+    if _LOGGER_PROVIDER is not None:
+        _LOGGER_PROVIDER.force_flush(timeout_millis=5_000)
+        _LOGGER_PROVIDER.shutdown()
 def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
     """Parse an incoming W3C `traceparent` header into an OTel `Context`.
@@ -119,21 +257,14 @@ def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
 # ──────────────────────── Tracing setup ────────────────────────────────────────
-def _configure_tracing(agent_name: str) -> None:
+def _configure_tracing(agent_name: str, target: Optional[_OtlpTarget]) -> None:
     resource = Resource.create({"service.name": agent_name})
     provider = TracerProvider(resource=resource)
-    disabled = os.environ.get("MERIDIAN_TRACING_DISABLED", "").lower() in ("1", "true", "yes")
-    endpoint = (
-        os.environ.get("MERIDIAN_OTLP_TRACES_ENDPOINT", "").strip()
-        or os.environ.get("MERIDIAN_OTLP_ENDPOINT", "").strip()
-    )
-    if not disabled and endpoint:
-        headers: dict[str, str] = {}
-        auth = os.environ.get("MERIDIAN_OO_AUTH")
-        if auth:
-            headers["Authorization"] = f"Basic {auth}"
-        exporter = OTLPSpanExporter(endpoint=endpoint, headers=headers)
+    if target is not None:
+        exporter = OTLPSpanExporter(
+            endpoint=target.traces_endpoint, headers=target.headers
+        )
         provider.add_span_processor(BatchSpanProcessor(exporter))
     # Set as the global provider. OTel's `set_tracer_provider` warns if
@@ -143,8 +274,32 @@ def _configure_tracing(agent_name: str) -> None:
     set_global_textmap(TraceContextTextMapPropagator())
+def _configure_log_export(
+    agent_name: str, target: Optional[_OtlpTarget]
+) -> Optional[logging.Handler]:
+    """Build an OTLP-logs handler so every `log.*` record reaches OpenObserve,
+    correlated to the active span by trace_id/span_id — the Python counterpart
+    of the Rust daemon's `OpenTelemetryTracingBridge`.
+    Returns the handler (caller attaches it to root) or `None` when export is
+    disabled, in which case logs still go to the JSONL file + stdout/stderr.
+    """
+    global _LOGGER_PROVIDER
+    if target is None:
+        return None
+    resource = Resource.create({"service.name": agent_name})
+    provider = LoggerProvider(resource=resource)
+    exporter = OTLPLogExporter(endpoint=target.logs_endpoint, headers=target.headers)
+    provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
+    set_logger_provider(provider)
+    _LOGGER_PROVIDER = provider
+    return LoggingHandler(level=logging.NOTSET, logger_provider=provider)
 # ──────────────────────── Logging setup ────────────────────────────────────────
-def _configure_logging(agent_name: str) -> None:
+def _configure_logging(agent_name: str, target: Optional[_OtlpTarget]) -> None:
     log_dir = Path(os.environ.get("MERIDIAN_LOG_DIR") or DEFAULT_LOG_DIR)
     log_dir.mkdir(parents=True, exist_ok=True)
     log_path = log_dir / f"{agent_name}.jsonl"
@@ -204,6 +359,22 @@ def _configure_logging(agent_name: str) -> None:
     root.addHandler(file_h)
     root.addHandler(stdout_h)
     root.addHandler(stderr_h)
+    # Ship every record to OpenObserve via OTLP/HTTP logs too, when export is
+    # configured. The OTel LoggingHandler reads the active span context, so each
+    # OO log row carries the trace_id/span_id that ties it to the classifier's
+    # span waterfall. No-op (None) when OTLP is disabled.
+    # The OTLP handler already carries service.name via the OTel Resource, so it
+    # needs no _ServiceFilter (that would duplicate the attribute on each record).
+    otlp_log_h = _configure_log_export(agent_name, target)
+    if otlp_log_h is not None:
+        # Do NOT feed the OTLP exporter's OWN transport logs back into OTLP
+        # export: on export failure httpx/urllib3/opentelemetry emit WARNING+
+        # records which this root handler would try to export → more failures (a
+        # log→export→log loop). Drop those from THIS handler only — they still
+        # reach the file/stderr handlers.
+        _otlp_excluded = ("httpx", "httpcore", "urllib3", "grpc", "opentelemetry")
+        otlp_log_h.addFilter(lambda r: not r.name.startswith(_otlp_excluded))
+        root.addHandler(otlp_log_h)
     root.setLevel(level)
     for noisy in _NOISY_LOGGERS:

package/services/agents/run_task_linker_mlx.py CHANGED Viewed

@@ -25,15 +25,19 @@ Method tag in results: "mlx_direct".
 from __future__ import annotations
 import datetime as _dt
+import gc
 import json
 import logging
 import os
 import sqlite3 as _sqlite3
 import sys
+import threading
 import time
+from contextlib import contextmanager
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal, Optional, Iterator
+from opentelemetry import trace
 from opentelemetry.trace import StatusCode
 from pydantic import BaseModel, Field
@@ -234,42 +238,144 @@ _SYSTEM_PROMPT = (
 # ---------------------------------------------------------------------------
-# Model loading — cached for the process lifetime.
-# outlines.from_mlxlm wraps the already-loaded mlx model; subsequent calls
-# skip the expensive disk load.
+# Model loading — loaded lazily on first use, evicted when idle.
+#
+# The MLX model holds ~7 GB of Metal unified memory while resident (measured;
+# note `ps`/Activity Monitor RSS does NOT show it). Classification is bursty,
+# so we keep the model only while it's being used: load on first inference,
+# and evict after MLX_IDLE_EVICT_S of inactivity (server.py runs the evictor).
+# `del + gc.collect() + mx.clear_cache()` reclaims the full 7 GB; cold reload
+# is ~3 s. `_model_lock` + `_in_flight` guarantee the evictor never frees the
+# model out from under an in-flight inference.
 # ---------------------------------------------------------------------------
 _model_cache: dict[str, Any] = {}
+_model_lock = threading.Lock()       # guards _model_cache mutation, _in_flight, _last_used, eviction
+_in_flight = 0                       # inferences currently using the model
+_last_used = time.monotonic()        # monotonic ts of the last finished inference
+# Aggressive default (2 min): the model is present only during active bursts.
+# Tune via env without a code change; 0 disables idle eviction entirely.
+_IDLE_EVICT_S = float(os.environ.get("MLX_IDLE_EVICT_S", "120"))
 def _get_model() -> Any:
-    """Return an outlines-wrapped model, loading from disk on the first call."""
+    """Return an outlines-wrapped model, loading from disk on the first call.
+    Cache-miss load is done under _model_lock (double-checked) so concurrent
+    callers can't double-load and the idle evictor can't race the load.
+    """
     model_id = _resolve_model_id()
-    if model_id in _model_cache:
-        return _model_cache[model_id]
+    cached = _model_cache.get(model_id)
+    if cached is not None:
+        return cached
+    with _model_lock:
+        cached = _model_cache.get(model_id)   # re-check under lock
+        if cached is not None:
+            return cached
+        try:
+            import mlx_lm
+            import outlines
+        except ImportError as exc:
+            raise ImportError(
+                f"Required package not installed: {exc}. "
+                "Install with: pip install 'mlx-lm>=0.22' 'outlines[mlxlm]>=1.3'"
+            ) from exc
+        log.info(
+            "run_task_linker_mlx: loading %s (first call this process)", model_id
+        )
+        t0 = time.time()
+        mlx_model, tokenizer = mlx_lm.load(
+            model_id,
+            tokenizer_config={"trust_remote_code": True},
+        )
+        outlines_model = outlines.from_mlxlm(mlx_model, tokenizer)
+        log.info("run_task_linker_mlx: model loaded in %.1fs", time.time() - t0)
+        _model_cache[model_id] = outlines_model
+        return outlines_model
+@contextmanager
+def model_session() -> Iterator[Any]:
+    """Yield the loaded model, marking it in-flight so the idle evictor never
+    frees it mid-inference. Wrap every direct ``model(...)`` call in this.
+    Lock is held only briefly (to bump/clear the in-flight counter), never for
+    the duration of inference. NOTE: production serialises all MLX calls upstream
+    via the Rust llm_gate (1-permit semaphore), so inferences don't actually
+    overlap — this lock scope just avoids adding a second, redundant serialisation
+    point, NOT a claim that concurrent generation on the shared model is safe.
+    """
+    global _in_flight, _last_used
+    with _model_lock:
+        _in_flight += 1
     try:
-        import mlx_lm
-        import outlines
-    except ImportError as exc:
-        raise ImportError(
-            f"Required package not installed: {exc}. "
-            "Install with: pip install 'mlx-lm>=0.22' 'outlines[mlxlm]>=1.3'"
-        ) from exc
-    log.info(
-        "run_task_linker_mlx: loading %s (first call this process)", model_id
-    )
-    t0 = time.time()
-    mlx_model, tokenizer = mlx_lm.load(
-        model_id,
-        tokenizer_config={"trust_remote_code": True},
-    )
-    outlines_model = outlines.from_mlxlm(mlx_model, tokenizer)
-    log.info("run_task_linker_mlx: model loaded in %.1fs", time.time() - t0)
+        yield _get_model()
+    finally:
+        with _model_lock:
+            _in_flight -= 1
+            _last_used = time.monotonic()
+def maybe_evict_idle(idle_s: float | None = None) -> float | None:
+    """Evict the model if it's resident, nothing is in flight, and it's been
+    idle longer than ``idle_s`` (default MLX_IDLE_EVICT_S). Returns the GB freed,
+    or None if no eviction happened. Safe to call from a threadpool worker.
+    Uses a non-blocking lock acquire: if an inference/load is mutating state we
+    simply skip this tick and try again on the next one.
+    """
+    ttl = _IDLE_EVICT_S if idle_s is None else idle_s
+    if ttl <= 0:
+        return None
+    if not _model_lock.acquire(blocking=False):
+        return None
+    try:
+        if _in_flight > 0 or not _model_cache:
+            return None
+        if (time.monotonic() - _last_used) < ttl:
+            return None
+        try:
+            import mlx.core as mx
+            before = mx.get_active_memory()
+        except Exception:               # noqa: BLE001 — mx should always import here
+            mx, before = None, 0
+        _model_cache.clear()
+        gc.collect()
+        freed = 0.0
+        if mx is not None:
+            mx.clear_cache()
+            freed = max(0.0, (before - mx.get_active_memory()) / 1e9)
+        log.info(
+            "run_task_linker_mlx: evicted idle model (idle ≥ %.0fs), freed ~%.1f GB",
+            ttl, freed,
+        )
+        return freed
+    finally:
+        _model_lock.release()
+def model_resident() -> bool:
+    """True if the MLX model is currently loaded in memory."""
+    return bool(_model_cache)
-    _model_cache[model_id] = outlines_model
-    return outlines_model
+def model_active_memory_gb() -> float | None:
+    """Live Metal active-memory footprint in GB, or None if MLX is unavailable.
+    Process-wide Metal active memory (≈ the model when resident — the model
+    dominates, though a transient load allocation can briefly inflate it), and
+    the only honest measure: `ps`/Activity Monitor can't see Metal unified
+    memory (they undercount by ~6.5 GB).
+    """
+    try:
+        import mlx.core as mx
+        return round(mx.get_active_memory() / 1e9, 2)
+    except Exception:  # noqa: BLE001 — mx absent on non-MLX machines
+        return None
 # Apple Foundation Models has a 4096-token combined context window (input + output).
@@ -705,14 +811,14 @@ def _classify_one(
                 from mlx_lm.sample_utils import make_sampler
                 from outlines.inputs import Chat
-                model = _get_model()
-                raw = model(
-                    Chat(messages),
-                    output_type=SessionClassification,
-                    max_tokens=_MAX_TOKENS,
-                    sampler=make_sampler(temp=_TEMPERATURE),
-                    verbose=False,
-                )
+                with model_session() as model:
+                    raw = model(
+                        Chat(messages),
+                        output_type=SessionClassification,
+                        max_tokens=_MAX_TOKENS,
+                        sampler=make_sampler(temp=_TEMPERATURE),
+                        verbose=False,
+                    )
         except Exception as exc:
             elapsed = time.time() - t0
             outcome = "apple_fm_error" if _use_apple_fm else "mlx_error"
@@ -840,10 +946,76 @@ def _open_run_log(db_path: str) -> "tuple[Path, Any]":
     return log_path, log_path.open("w", encoding="utf-8")
+# `method` values that mean the model produced a usable classification.
+# Anything else is an error path the dashboard surfaces under errors-only. The
+# real error `method` values emitted by `_error_result` are `mlx_parse_error`
+# (schema validation / unknown task_key — those names are child-span `outcome`
+# attributes, NOT methods) and `mlx_error` (inference failure / session-not-found).
+_SUCCESS_METHODS = {"mlx_direct", "apple_fm"}
+def _annotate_classification_span(result: dict[str, Any]) -> None:
+    """Promote the classification result onto the enclosing `classify_session`
+    span so each session is ONE self-describing row in OpenObserve — filterable
+    by session_id / session_type / task_key / is_error without joining the child
+    spans. Both the server and CLI entry points wrap the call in a
+    `classify_session` span, so annotating the current span here covers both.
+    """
+    span = trace.get_current_span()
+    if not span.is_recording():
+        return
+    method = str(result.get("method", ""))
+    task_key = result.get("task_key")
+    is_error = method not in _SUCCESS_METHODS
+    span.set_attribute("session_id", int(result.get("session_id", 0)))
+    span.set_attribute("task_key", task_key or "-")
+    span.set_attribute("has_task", task_key is not None)
+    span.set_attribute("session_type", str(result.get("session_type", "")))
+    span.set_attribute("category", str(result.get("category", "")))
+    span.set_attribute("confidence", float(result.get("confidence", 0.0)))
+    span.set_attribute(
+        "category_confidence", float(result.get("category_confidence", 0.0))
+    )
+    span.set_attribute("method", method)
+    span.set_attribute("elapsed_s", float(result.get("elapsed_s", 0.0)))
+    span.set_attribute("is_error", is_error)
+    if is_error:
+        span.set_status(StatusCode.ERROR, str(result.get("reasoning", method))[:300])
 def _classify_one_logged(
     session_id: int,
     con: _sqlite3.Connection,
     run_log: Any,
+) -> dict[str, Any]:
+    """Classify one session, marking the span is_error=true on ANY failure.
+    Wraps the inner worker so an UNHANDLED exception (a sqlite read error,
+    malformed window_titles JSON, …) still stamps is_error=true + ERROR status on
+    the enclosing classify_session span before propagating — otherwise the
+    dashboard's errors-only table (which filters is_error='true') silently misses
+    exactly the crashes an operator opens it to find. Handled failures already
+    return _error_result dicts that _annotate_classification_span marks.
+    """
+    try:
+        return _classify_one_logged_inner(session_id, con, run_log)
+    except Exception as exc:  # noqa: BLE001 — annotate, then re-raise unchanged
+        span = trace.get_current_span()
+        if span.is_recording():
+            span.set_attribute("session_id", int(session_id))
+            span.set_attribute("is_error", True)
+            span.set_attribute("method", "mlx_error")
+            span.set_status(StatusCode.ERROR, str(exc)[:300])
+        log.exception(
+            "run_task_linker_mlx: unhandled error classifying session %d", session_id
+        )
+        raise
+def _classify_one_logged_inner(
+    session_id: int,
+    con: _sqlite3.Connection,
+    run_log: Any,
 ) -> dict[str, Any]:
     """Classify one session and append a full record to the run log."""
     # Gather inputs before classification so we can log them even on error.
@@ -888,6 +1060,7 @@ def _classify_one_logged(
     }
     run_log.write(json.dumps(record, default=str) + "\n")
     run_log.flush()
+    _annotate_classification_span(result)
     return result
@@ -950,15 +1123,12 @@ def main() -> None:
         try:
             results: list[dict[str, Any]] = []
             for sid in session_ids:
-                with tracer.start_as_current_span("classify_session") as cls_span:
-                    cls_span.set_attribute("session_id", sid)
+                with tracer.start_as_current_span("classify_session"):
+                    # _classify_one_logged enriches this span with the full
+                    # result (session_id, task_key, session_type, is_error, …).
                     log.info("run_task_linker_mlx: classifying session %d", sid)
                     result = _classify_one_logged(sid, con, run_log_file)
                     results.append(result)
-                    cls_span.set_attribute("task_key", result["task_key"] or "-")
-                    cls_span.set_attribute("session_type", result["session_type"])
-                    cls_span.set_attribute("method", result["method"])
-                    cls_span.set_attribute("elapsed_s", result["elapsed_s"])
                     log.info(
                         "run_task_linker_mlx: session_id=%d task_key=%s "
                         "session_type=%s elapsed_s=%.2f",

package/services/agents/server.py CHANGED Viewed

@@ -41,20 +41,58 @@ _DB_PATH = Path(os.environ.get("MERIDIAN_DB", Path.home() / ".meridian/meridian.
 _app_state: dict[str, Any] = {}
+async def _idle_evictor(mlx_module: Any) -> None:
+    """Background loop: evict the MLX model after it has been idle long enough.
+    Runs the (briefly blocking) eviction in a threadpool so it never stalls the
+    event loop, and never raises out — the evictor must outlive transient errors.
+    """
+    import asyncio
+    from fastapi.concurrency import run_in_threadpool
+    ttl = mlx_module._IDLE_EVICT_S
+    if ttl <= 0:
+        return
+    interval = max(15.0, ttl / 4.0)   # check ~4× per idle window
+    while True:
+        await asyncio.sleep(interval)
+        try:
+            await run_in_threadpool(mlx_module.maybe_evict_idle)
+        except Exception as exc:       # noqa: BLE001 — evictor must never die
+            log.warning("server: idle-evictor error: %s", exc)
 @asynccontextmanager
 async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
+    import asyncio
     import datetime
     import agents.run_task_linker_mlx as _mlx
     _app_state["mlx_module"] = _mlx
     _app_state["loaded_at"] = datetime.datetime.now(datetime.timezone.utc).isoformat()
     from agents.llm_selector import APPLE_INTELLIGENCE_ID
+    evictor: "asyncio.Task | None" = None
     if _mlx._resolve_model_id() == APPLE_INTELLIGENCE_ID:
-        log.info("server: 8 GB machine — Apple Intelligence backend, no MLX model to pre-load")
+        log.info("server: Apple Intelligence backend — no MLX model to load")
+    elif _mlx._IDLE_EVICT_S > 0:
+        # Lazy: the ~7 GB model loads on the first inference and is evicted after
+        # MLX_IDLE_EVICT_S of inactivity, so the server idles light (~0.4 GB)
+        # instead of pinning ~7 GB of Metal memory for the whole process life.
+        log.info(
+            "server: MLX model loads on first request; idle-evict after %.0fs",
+            _mlx._IDLE_EVICT_S,
+        )
+        evictor = asyncio.create_task(_idle_evictor(_mlx))
     else:
-        log.info("server: loading MLX model at startup…")
-        _mlx._get_model()
-        log.info("server: MLX model ready")
-    yield
+        # Eviction disabled — don't spawn a no-op evictor task just to cancel it.
+        log.info("server: MLX model loads on first request; idle-eviction disabled (MLX_IDLE_EVICT_S=0)")
+    try:
+        yield
+    finally:
+        if evictor is not None:
+            import contextlib
+            evictor.cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await evictor
 app = FastAPI(title="Meridian Agent", version="1.0.0", lifespan=_lifespan)
@@ -76,12 +114,19 @@ async def health() -> dict:
 @app.get("/info")
 async def info() -> dict:
-    """Return the identity of the loaded model."""
+    """Return the identity of the model and its live memory state.
+    `active_memory_gb` reads `mx.get_active_memory()` — the ONLY honest measure
+    of the model's footprint, since Metal unified memory is invisible to `ps`
+    and Activity Monitor (they undercount the model by ~6.5 GB).
+    """
     m = _app_state.get("mlx_module")
     return {
-        "backend":   "mlx",
-        "model_id":  m._resolve_model_id() if m else None,
-        "loaded_at": _app_state.get("loaded_at"),
+        "backend":          "mlx",
+        "model_id":         m._resolve_model_id() if m else None,
+        "loaded_at":        _app_state.get("loaded_at"),
+        "model_resident":   m.model_resident() if m else False,
+        "active_memory_gb": m.model_active_memory_gb() if m else None,
     }
@@ -143,14 +188,14 @@ async def classify(req: ClassifyRequest) -> ClassifyResponse:
             # _classify_apple_fm uses asyncio.new_event_loop() internally;
             # must run in a thread (no existing loop) not in the async handler.
             return m._classify_apple_fm(messages)
-        model = m._get_model()
-        raw = model(
-            Chat(messages),
-            output_type=m.SessionClassification,
-            max_tokens=m._MAX_TOKENS,
-            sampler=make_sampler(temp=m._TEMPERATURE),
-            verbose=False,
-        )
+        with m.model_session() as model:
+            raw = model(
+                Chat(messages),
+                output_type=m.SessionClassification,
+                max_tokens=m._MAX_TOKENS,
+                sampler=make_sampler(temp=m._TEMPERATURE),
+                verbose=False,
+            )
         return m.SessionClassification.model_validate_json(raw)
     try:
@@ -214,49 +259,43 @@ async def classify_sessions(req: ClassifySessionsRequest) -> dict:
     tracer = _app_state.get("tracer") or trace.get_tracer("meridian-agent-server-mlx")
     parent_ctx = observability.extract_parent_context(req.traceparent)
-    with tracer.start_as_current_span("classify_sessions", context=parent_ctx) as span:
-        span.set_attribute("session_count", len(req.session_ids))
-        # Snapshot the OTel context while classify_sessions span is active so we
-        # can attach it explicitly inside the threadpool (anyio copies contextvars,
-        # but explicit attach is more reliable across anyio versions).
-        ctx_snapshot = _otel_context.get_current()
-        def _classify_all() -> list[dict]:
-            # Attach classify_sessions context so _classify_one sub-spans
-            # (db_fetch, build_prompt, llm_inference, parse_response) appear
-            # as children of classify_sessions in the OO trace waterfall.
-            _tok = _otel_context.attach(ctx_snapshot)
+    # No batch-wrapper span: each session emits a single `classify_session` span
+    # attached directly to the Rust caller's context (via the propagated
+    # traceparent). This keeps the debug trace minimal — one self-describing span
+    # per session with no redundant N=1 wrapper. For N>1, the sessions appear as
+    # sibling classify_session spans under the same daemon trace.
+    def _classify_all() -> list[dict]:
+        _tok = _otel_context.attach(parent_ctx) if parent_ctx is not None else None
+        try:
+            # Always use the server's own _DB_PATH — ignoring req.meridian_db avoids
+            # path-traversal: the server knows its DB from the environment.
+            con = _sqlite3.connect(str(_DB_PATH), check_same_thread=False)
+            con.row_factory = _sqlite3.Row
             try:
-                # Always use the server's own _DB_PATH — ignoring req.meridian_db avoids
-                # path-traversal: the server knows its DB from the environment.
-                con = _sqlite3.connect(str(_DB_PATH), check_same_thread=False)
-                con.row_factory = _sqlite3.Row
-                try:
-                    results: list[dict] = []
-                    for sid in req.session_ids:
-                        with tracer.start_as_current_span(
-                            "classify_session",
-                            attributes={"session_id": sid},
-                        ):
-                            result = m._classify_one_logged(sid, con, fh)
-                        log.info(
-                            "classify_sessions: session_id=%d task_key=%s session_type=%s elapsed_s=%.2f",
-                            sid,
-                            result.get("task_key"),
-                            result.get("session_type"),
-                            result.get("elapsed_s", 0.0),
-                        )
-                        results.append(result)
-                    return results
-                finally:
-                    con.close()
+                results: list[dict] = []
+                for sid in req.session_ids:
+                    # _classify_one_logged owns this span's attributes (session_id,
+                    # task_key, confidence, is_error, …) via _annotate_classification_span
+                    # and emits db_fetch / build_prompt / llm_inference / parse_response
+                    # as its children — one source of truth, matching the CLI path.
+                    with tracer.start_as_current_span("classify_session"):
+                        result = m._classify_one_logged(sid, con, fh)
+                    log.info(
+                        "classify_sessions: session_id=%d task_key=%s session_type=%s elapsed_s=%.2f",
+                        sid,
+                        result.get("task_key"),
+                        result.get("session_type"),
+                        result.get("elapsed_s", 0.0),
+                    )
+                    results.append(result)
+                return results
             finally:
+                con.close()
+        finally:
+            if _tok is not None:
                 _otel_context.detach(_tok)
-        results = await run_in_threadpool(_classify_all)
-        span.set_attribute("classified_count", len(results))
+    results = await run_in_threadpool(_classify_all)
     return {"results": results}
@@ -370,18 +409,46 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
     temperature = req.temperature if req.temperature is not None else 0.3
     max_tokens  = req.max_tokens if req.max_tokens else 2048
+    # Honour OpenAI `response_format: {"type":"json_schema", ...}` by
+    # FSM-constraining decoding to that schema via outlines. Without this, a
+    # reasoning model is free to emit chain-of-thought prose instead of the JSON
+    # the caller (e.g. agno's structured-output path) expects, and the parse
+    # fails. `{"type":"json_object"}` carries no schema, so it stays free-form.
+    output_type = None
+    rf = req.response_format
+    if isinstance(rf, dict) and rf.get("type") == "json_schema":
+        schema = (rf.get("json_schema") or {}).get("schema")
+        if schema:
+            from outlines.types import JsonSchema
+            output_type = JsonSchema(schema)
     from agents.llm_selector import APPLE_INTELLIGENCE_ID
+    # A `json_schema` request cannot be honoured on Apple Foundation Models:
+    # outlines FSM-constrained decoding is incompatible with FM, so the schema
+    # would be silently dropped and a structured-output caller (e.g. agno) would
+    # get free-form text that fails to parse downstream. Reject explicitly with a
+    # 4xx rather than emit unconstrained output that breaks later.
+    if output_type is not None and m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
+        raise HTTPException(
+            status_code=400,
+            detail="response_format=json_schema is not supported on Apple "
+            "Foundation Models (no FSM-constrained decoding available)",
+        )
     def _generate() -> str:
         if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
+            # outlines FSM decoding is incompatible with Foundation Models;
+            # Apple FM falls back to free-form (json_object / no schema only).
             return _infer_apple_fm(msgs, max_tokens)
-        model = m._get_model()
-        return model(
-            Chat(msgs),
-            max_tokens=max_tokens,
-            sampler=make_sampler(temp=temperature),
-            verbose=False,
-        )
+        with m.model_session() as model:
+            return model(
+                Chat(msgs),
+                output_type=output_type,
+                max_tokens=max_tokens,
+                sampler=make_sampler(temp=temperature),
+                verbose=False,
+            )
     t0 = _time.time()
     try:
@@ -504,14 +571,14 @@ async def summarise(req: _SummariseRequest) -> _SummariseResponse:
     from outlines.inputs import Chat
     def _generate() -> str:
-        model = m._get_model()
-        return model(
-            Chat(messages),
-            output_type=_SummarySchema,
-            max_tokens=req.max_tokens,
-            sampler=make_sampler(temp=req.temperature),
-            verbose=False,
-        )
+        with m.model_session() as model:
+            return model(
+                Chat(messages),
+                output_type=_SummarySchema,
+                max_tokens=req.max_tokens,
+                sampler=make_sampler(temp=req.temperature),
+                verbose=False,
+            )
     try:
         raw = await run_in_threadpool(_generate)

package/services/agents/tests/test_run_task_linker_mlx.py CHANGED Viewed

@@ -9,6 +9,7 @@ from __future__ import annotations
 import json
 import sqlite3
 import sys
+import time
 from io import StringIO
 from pathlib import Path
 from typing import Iterator
@@ -1011,6 +1012,58 @@ class TestModelCache:
                 m._get_model()
+# ---------------------------------------------------------------------------
+# Idle eviction — model_session() in-flight tracking + maybe_evict_idle()
+# (the model holds ~7 GB while resident; the server unloads it when idle)
+# ---------------------------------------------------------------------------
+class TestModelEviction:
+    def test_model_session_loads_and_tracks_in_flight(self):
+        import agents.run_task_linker_mlx as m
+        sentinel = MagicMock(name="model")
+        with patch.object(m, "_get_model", return_value=sentinel):
+            m._in_flight = 0
+            with m.model_session() as model:
+                assert model is sentinel
+                assert m._in_flight == 1          # marked in-flight while in use
+            assert m._in_flight == 0              # released on exit
+    def test_evict_noop_when_not_idle_long_enough(self):
+        import agents.run_task_linker_mlx as m
+        m._model_cache["x"] = MagicMock()
+        m._in_flight = 0
+        m._last_used = time.monotonic()           # just used
+        assert m.maybe_evict_idle(idle_s=600) is None
+        assert m.model_resident() is True
+    def test_evict_disabled_when_ttl_zero(self):
+        import agents.run_task_linker_mlx as m
+        m._model_cache["x"] = MagicMock()
+        assert m.maybe_evict_idle(idle_s=0) is None
+        assert m.model_resident() is True
+    def test_evict_noop_when_in_flight(self):
+        import agents.run_task_linker_mlx as m
+        m._model_cache["x"] = MagicMock()
+        m._in_flight = 1                          # an inference is using the model
+        m._last_used = time.monotonic() - 1000
+        try:
+            assert m.maybe_evict_idle(idle_s=0.001) is None
+            assert m.model_resident() is True     # never freed mid-inference
+        finally:
+            m._in_flight = 0
+    def test_evict_clears_cache_when_idle(self):
+        import agents.run_task_linker_mlx as m
+        m._model_cache["x"] = MagicMock()
+        m._in_flight = 0
+        m._last_used = time.monotonic() - 1000    # idle long past the window
+        freed = m.maybe_evict_idle(idle_s=0.001)
+        assert freed is not None                  # eviction happened
+        assert m.model_resident() is False
+        assert m._model_cache == {}
 # ---------------------------------------------------------------------------
 # SessionClassification schema
 # ---------------------------------------------------------------------------

package/services/observability/dashboards/classifier-debug.json ADDED Viewed

@@ -0,0 +1,174 @@
+{
+  "title": "Session→Task Classifier — Debug",
+  "description": "Every session-task classification, newest first. Filter by session_id, session_type, or errors-only; copy a row's trace_id and open it in Traces for the full waterfall (db_fetch → build_prompt → llm_inference → parse_response, with raw_mlx_output). Backed by the enriched `classify_session` spans (service meridian-agent-server-mlx in-process, or meridian-task-linker-mlx from the standalone CLI). Drilldown keys on trace_id alone so it works for both.",
+  "version": 5,
+  "variables": {
+    "list": [
+      {
+        "type": "textbox",
+        "name": "session_id",
+        "label": "Session ID",
+        "query_data": null,
+        "value": "",
+        "options": [],
+        "multiSelect": false,
+        "hideOnDashboard": false,
+        "selectAllValueForMultiSelect": "custom",
+        "customMultiSelectValue": [],
+        "escapeSingleQuotes": true
+      },
+      {
+        "type": "custom",
+        "name": "session_type",
+        "label": "Session type",
+        "query_data": null,
+        "value": "",
+        "options": [
+          {"label": "All", "value": "", "selected": true},
+          {"label": "task", "value": "task", "selected": false},
+          {"label": "overhead", "value": "overhead", "selected": false},
+          {"label": "untracked", "value": "untracked", "selected": false}
+        ],
+        "multiSelect": false,
+        "hideOnDashboard": false,
+        "selectAllValueForMultiSelect": "custom",
+        "customMultiSelectValue": [],
+        "escapeSingleQuotes": true
+      },
+      {
+        "type": "custom",
+        "name": "errors_only",
+        "label": "Errors only",
+        "query_data": null,
+        "value": "",
+        "options": [
+          {"label": "All", "value": "", "selected": true},
+          {"label": "Errors only", "value": "true", "selected": false}
+        ],
+        "multiSelect": false,
+        "hideOnDashboard": false,
+        "selectAllValueForMultiSelect": "custom",
+        "customMultiSelectValue": [],
+        "escapeSingleQuotes": true
+      }
+    ],
+    "showDynamicFilters": true
+  },
+  "defaultDatetimeDuration": {"type": "relative", "relativeTimePeriod": "12h", "startTime": null, "endTime": null},
+  "tabs": [
+    {
+      "tabId": "default",
+      "name": "Default",
+      "panels": [
+        {
+          "id": "stat_total",
+          "type": "metric",
+          "title": "Classifications",
+          "description": "Total classify_session spans in range",
+          "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session'",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Classifications", "alias": "y_axis_1", "column": "y_axis_1", "color": "#5960b2", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 0, "y": 0, "w": 12, "h": 6, "i": 1}
+        },
+        {
+          "id": "stat_errors",
+          "type": "metric",
+          "title": "Errors",
+          "description": "Classifications whose method failed (is_error=true)",
+          "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true'",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Errors", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b25959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 12, "y": 0, "w": 12, "h": 6, "i": 2}
+        },
+        {
+          "id": "stat_untracked",
+          "type": "metric",
+          "title": "Untracked",
+          "description": "Sessions classified as untracked (no ticket)",
+          "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND session_type='untracked'",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Untracked", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b29959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 24, "y": 0, "w": 12, "h": 6, "i": 3}
+        },
+        {
+          "id": "stat_conf",
+          "type": "metric",
+          "title": "Avg confidence",
+          "description": "Mean confidence of successful classifications",
+          "config": {"show_legends": false, "unit": null, "decimals": 2, "no_value_replacement": "0"},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT round(avg(CAST(confidence AS DOUBLE)),2) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='false'",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Avg confidence", "alias": "y_axis_1", "column": "y_axis_1", "color": "#59b27a", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 36, "y": 0, "w": 12, "h": 6, "i": 4}
+        },
+        {
+          "id": "table_all",
+          "type": "table",
+          "title": "All classifications (newest first)",
+          "description": "Filter with the Session ID / Session type / Errors only variables above. Click any row → opens the Traces view filtered to just that trace's spans.",
+          "config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", category as \"Category\", confidence as \"Confidence\", round(CAST(elapsed_s AS DOUBLE),2) as \"Time taken (s)\", method as \"Method\", is_error as \"Error\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND ('$session_id'='' OR session_id='$session_id') AND ('$session_type'='' OR session_type='$session_type') AND ('$errors_only'='' OR is_error='$errors_only') ORDER BY _timestamp DESC",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Category", "alias": "Category", "column": "category", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Confidence", "alias": "Confidence", "column": "confidence", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Time taken (s)", "alias": "Time taken (s)", "column": "elapsed_s", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Error", "alias": "Error", "column": "is_error", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 0, "y": 6, "w": 48, "h": 14, "i": 5}
+        },
+        {
+          "id": "table_errors",
+          "type": "table",
+          "title": "Errors only",
+          "description": "Failed classifications — inference errors, schema errors, invalid task_key, session-not-found. Click a row → opens just that trace's spans.",
+          "config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", method as \"Method\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true' ORDER BY _timestamp DESC",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 0, "y": 20, "w": 48, "h": 10, "i": 6}
+        }
+      ]
+    }
+  ]
+}

package/services/pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "meridian-agents"
-version = "1.54.0"
+version = "1.55.0"
 description = "Meridian agents — MLX classifier server and Jira worklog synthesis for meridian.db"
 requires-python = ">=3.11"
 authors = [{ name = "Meridiona" }]

package/ui.tar.gz CHANGED Viewed

Binary file