npm - @meridiona/meridian-darwin-arm64 - Versions diffs - 1.54.1 → 1.55.0 - Mend

@meridiona/meridian-darwin-arm64 1.54.1 → 1.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/VERSION +1 -1
package/bin/meridian +0 -0
package/package.json +1 -1
package/scripts/install-openobserve-daemon.sh +7 -3
package/services/agents/observability.py +188 -17
package/services/agents/run_task_linker_mlx.py +71 -6
package/services/agents/server.py +61 -39
package/services/observability/dashboards/classifier-debug.json +174 -0
package/services/pyproject.toml +1 -1
package/ui.tar.gz +0 -0

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.54.1
1	+ 1.55.0

package/bin/meridian CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@meridiona/meridian-darwin-arm64",
-  "version": "1.54.1",
+  "version": "1.55.0",
   "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
   "homepage": "https://github.com/Meridiona/meridian",
   "repository": {

package/scripts/install-openobserve-daemon.sh CHANGED Viewed

@@ -41,15 +41,19 @@ elif command -v openobserve >/dev/null 2>&1; then
 fi
 if [[ -z "${OO_BIN}" ]]; then
-    echo "→ OpenObserve binary not found — downloading v0.11.0 (last release with arm64 binary)..."
+    echo "→ OpenObserve binary not found — downloading v0.90.3..."
     _oo_arch="$(uname -m)"
     case "$_oo_arch" in
         arm64)  _oo_arch="arm64" ;;
         x86_64) _oo_arch="amd64" ;;
         *) echo "✗ Unsupported arch: $_oo_arch" >&2; exit 1 ;;
     esac
-    _oo_ver="v0.11.0"
-    _oo_url="https://github.com/openobserve/openobserve/releases/download/${_oo_ver}/openobserve-${_oo_ver}-darwin-${_oo_arch}.tar.gz"
+    # GitHub release assets were removed for recent versions; binaries now live on
+    # the official downloads host. Trace deep-linking (dashboard drilldown into a
+    # single trace's spans) needs a modern build, so we pin a current stable.
+    # KEEP IN SYNC: the same version is pinned in install.sh — bump both together.
+    _oo_ver="v0.90.3"
+    _oo_url="https://downloads.openobserve.ai/releases/openobserve/${_oo_ver}/openobserve-${_oo_ver}-darwin-${_oo_arch}.tar.gz"
     mkdir -p "${HOME}/.openobserve"
     if curl -fsSL -o "${HOME}/.openobserve/openobserve.tar.gz" "$_oo_url" \
         && tar -xzf "${HOME}/.openobserve/openobserve.tar.gz" -C "${HOME}/.openobserve" \

package/services/agents/observability.py CHANGED Viewed

@@ -4,7 +4,9 @@ A single `setup(agent_name)` call wires up:
   * an OTel `TracerProvider` with `service.name=agent_name`
   * a `BatchSpanProcessor` exporting OTLP/HTTP-protobuf spans to OpenObserve
-    (`MERIDIAN_OTLP_TRACES_ENDPOINT`, with Basic auth via `MERIDIAN_OO_AUTH`)
+  * a `LoggerProvider` + OTLP-logs handler so every `logging.LogRecord` is also
+    shipped to OpenObserve (correlated to the active span), mirroring the Rust
+    daemon's `OpenTelemetryTracingBridge`
   * W3C `TraceContextTextMapPropagator` as the global propagator so each
     agent can pick up the Rust daemon's `traceparent` and continue the trace
   * `LoggingInstrumentor` so every `logging.LogRecord` carries
@@ -13,6 +15,12 @@ A single `setup(agent_name)` call wires up:
     under `~/.meridian/logs/{agent_name}.jsonl` plus stderr — both ingestable
     by OpenObserve's log pipeline without further parsing.
+Export config (endpoint + Basic-auth credentials) is resolved from the SAME
+`~/.meridian/settings.json` the Rust daemon reads — `otlp_enabled`,
+`otlp_endpoint`, `oo_email`, `oo_password` — so the dashboard Settings page is
+the single source of truth for both processes. The legacy `MERIDIAN_OO_AUTH`
+env credential is deprecated and ignored, matching the daemon.
 `extract_parent_context(traceparent)` is the helper agents use to continue
 a span emitted by another process — typically the Rust ETL or another
 agent stage.
@@ -23,20 +31,26 @@ single-shot CLI paths funnel through the same module.
 """
 from __future__ import annotations
+import base64
+import json
 import logging
 import logging.handlers
 import os
 import sys
 from pathlib import Path
-from typing import Optional
+from typing import NamedTuple, Optional
 from opentelemetry import trace
+from opentelemetry._logs import set_logger_provider
 from opentelemetry.context import Context
+from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
     OTLPSpanExporter,
 )
 from opentelemetry.instrumentation.logging import LoggingInstrumentor
 from opentelemetry.propagate import set_global_textmap
+from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
+from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import BatchSpanProcessor
@@ -50,12 +64,125 @@ from pythonjsonlogger import jsonlogger
 DEFAULT_TRACES_ENDPOINT = "http://localhost:5080/api/default/v1/traces"
 DEFAULT_LOGS_ENDPOINT   = "http://localhost:5080/api/default/v1/logs"
 DEFAULT_LOG_DIR         = Path.home() / ".meridian" / "logs"
+# Single source of truth for OpenObserve export config — the SAME file the Rust
+# daemon reads (see `src/observability.rs::resolve_otlp_target`). Keeps the two
+# processes credential-aligned: the dashboard Settings page writes here and both
+# the daemon and this MLX server pick it up with no env plumbing.
+_SETTINGS_PATH = Path(
+    os.environ.get("MERIDIAN_SETTINGS_PATH")
+    or (Path.home() / ".meridian" / "settings.json")
+)
 _NOISY_LOGGERS = ("urllib3", "httpx", "httpcore", "openai", "botocore")
 # Track which agents have been configured so a second setup() call is a no-op.
 _INITIALISED: dict[str, trace.Tracer] = {}
 _PROCESS_SERVICE_NAME: str | None = None
+# Held so shutdown() can flush log records the same way it flushes spans.
+_LOGGER_PROVIDER: LoggerProvider | None = None
+# One-time guard so an export misconfiguration (enabled-but-no-creds, or a
+# schemeless endpoint) warns once per process instead of on every resolve.
+_WARNED_EXPORT_MISCONFIG: bool = False
+# ──────────────────────── OTLP target resolution ───────────────────────────────
+class _OtlpTarget(NamedTuple):
+    """Resolved OTLP export target: signal endpoints + Basic-auth header value."""
+    traces_endpoint: str
+    logs_endpoint: str
+    headers: dict[str, str]
+def _load_settings() -> dict[str, object]:
+    """Read `~/.meridian/settings.json`; empty dict if absent/unreadable."""
+    try:
+        with _SETTINGS_PATH.open(encoding="utf-8") as fh:
+            data = json.load(fh)
+        return data if isinstance(data, dict) else {}
+    except (OSError, ValueError):
+        return {}
+def _resolve_otlp_target() -> Optional[_OtlpTarget]:
+    """Mirror of the Rust daemon's `resolve_otlp_target()`.
+    Returns `None` (→ export disabled) when the toggle is off or credentials
+    are missing. Endpoint precedence: settings.json `otlp_endpoint` → the
+    `MERIDIAN_OTLP_TRACES_ENDPOINT`/`MERIDIAN_OTLP_ENDPOINT` env override →
+    the localhost default. Auth is `base64(oo_email:oo_password)` — settings.json
+    only; the legacy `MERIDIAN_OO_AUTH` env path is deprecated and ignored, the
+    same decision the daemon made.
+    """
+    global _WARNED_EXPORT_MISCONFIG
+    if os.environ.get("MERIDIAN_TRACING_DISABLED", "").lower() in ("1", "true", "yes"):
+        return None
+    settings = _load_settings()
+    if not settings.get("otlp_enabled"):
+        return None
+    # Resolve the endpoint up front so we can warn (not silently disable) when
+    # export is enabled but unusable. Precedence: settings → env → localhost.
+    configured = str(settings.get("otlp_endpoint") or "").strip()
+    env_endpoint = (
+        os.environ.get("MERIDIAN_OTLP_TRACES_ENDPOINT", "").strip()
+        or os.environ.get("MERIDIAN_OTLP_ENDPOINT", "").strip()
+    )
+    traces_endpoint = configured or env_endpoint or DEFAULT_TRACES_ENDPOINT
+    def _warn_once(msg: str, *args: object) -> None:
+        global _WARNED_EXPORT_MISCONFIG
+        if not _WARNED_EXPORT_MISCONFIG:
+            _WARNED_EXPORT_MISCONFIG = True
+            logging.getLogger(__name__).warning(msg, *args)
+    email = str(settings.get("oo_email") or "")
+    password = str(settings.get("oo_password") or "")
+    if not email or not password:
+        # otlp_enabled but no usable credentials → export OFF. Warn once so an
+        # env-only (MERIDIAN_OO_AUTH) install that predates the settings.json
+        # credential move doesn't go dark silently — mirrors the daemon, which
+        # also warns. MERIDIAN_OO_AUTH is no longer read here.
+        _warn_once(
+            "OpenObserve export enabled but oo_email/oo_password missing in %s — "
+            "traces+logs export DISABLED. Set credentials in the dashboard Settings "
+            "(the MERIDIAN_OO_AUTH env var is no longer used).",
+            _SETTINGS_PATH,
+        )
+        return None
+    # Guard against HTTP header injection / malformed user:password splits —
+    # matches the daemon's same-named check.
+    if any(c in email for c in "\r\n:") or any(c in password for c in "\r\n"):
+        return None
+    auth = base64.standard_b64encode(f"{email}:{password}".encode()).decode()
+    # Validate scheme — only http/https are valid OTLP transports. The daemon
+    # disables export + warns on a schemeless endpoint; mirror that exactly so the
+    # two processes don't disagree on whether export is on.
+    if not (traces_endpoint.startswith("http://") or traces_endpoint.startswith("https://")):
+        _warn_once(
+            "OTLP endpoint %r has no http/https scheme — export DISABLED.",
+            traces_endpoint,
+        )
+        return None
+    # OpenObserve serves logs at the sibling `…/v1/logs` path. Derive it from the
+    # traces endpoint by swapping the trailing signal segment so a custom host or
+    # base (incl. a trailing slash, e.g. `…/v1/traces/`) carries to BOTH signals —
+    # never silently fall back to localhost for logs while traces go remote.
+    t = traces_endpoint.rstrip("/")
+    if t.endswith("/v1/traces"):
+        logs_endpoint = t[: -len("/v1/traces")] + "/v1/logs"
+    elif t.endswith("/traces"):
+        logs_endpoint = t[: -len("/traces")] + "/logs"
+    elif "traces" in t:
+        logs_endpoint = t.rsplit("traces", 1)[0] + "logs"
+    else:
+        logs_endpoint = t + "/v1/logs"
+    return _OtlpTarget(traces_endpoint, logs_endpoint, {"Authorization": f"Basic {auth}"})
 # ──────────────────────── Public API ───────────────────────────────────────────
@@ -80,8 +207,13 @@ def setup(agent_name: str) -> trace.Tracer:
     if _PROCESS_SERVICE_NAME is None:
         _PROCESS_SERVICE_NAME = agent_name
-        _configure_tracing(agent_name)
-        _configure_logging(agent_name)
+        # Resolve the export target ONCE and pass it to both configurers — a
+        # second read could see a settings.json the dashboard rewrote mid-setup
+        # (TOCTOU), leaving traces enabled while logs resolve disabled (or with
+        # different creds/endpoint) in the same process.
+        target = _resolve_otlp_target()
+        _configure_tracing(agent_name, target)
+        _configure_logging(agent_name, target)
         logging.getLogger(agent_name).info(
             "observability initialised",
             extra={"service.name": agent_name},
@@ -105,6 +237,12 @@ def shutdown() -> None:
     if hasattr(provider, "shutdown"):
         provider.shutdown()
+    # Flush queued log records too — BatchLogRecordProcessor drops them on
+    # interpreter exit otherwise, the same hazard as spans.
+    if _LOGGER_PROVIDER is not None:
+        _LOGGER_PROVIDER.force_flush(timeout_millis=5_000)
+        _LOGGER_PROVIDER.shutdown()
 def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
     """Parse an incoming W3C `traceparent` header into an OTel `Context`.
@@ -119,21 +257,14 @@ def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
 # ──────────────────────── Tracing setup ────────────────────────────────────────
-def _configure_tracing(agent_name: str) -> None:
+def _configure_tracing(agent_name: str, target: Optional[_OtlpTarget]) -> None:
     resource = Resource.create({"service.name": agent_name})
     provider = TracerProvider(resource=resource)
-    disabled = os.environ.get("MERIDIAN_TRACING_DISABLED", "").lower() in ("1", "true", "yes")
-    endpoint = (
-        os.environ.get("MERIDIAN_OTLP_TRACES_ENDPOINT", "").strip()
-        or os.environ.get("MERIDIAN_OTLP_ENDPOINT", "").strip()
-    )
-    if not disabled and endpoint:
-        headers: dict[str, str] = {}
-        auth = os.environ.get("MERIDIAN_OO_AUTH")
-        if auth:
-            headers["Authorization"] = f"Basic {auth}"
-        exporter = OTLPSpanExporter(endpoint=endpoint, headers=headers)
+    if target is not None:
+        exporter = OTLPSpanExporter(
+            endpoint=target.traces_endpoint, headers=target.headers
+        )
         provider.add_span_processor(BatchSpanProcessor(exporter))
     # Set as the global provider. OTel's `set_tracer_provider` warns if
@@ -143,8 +274,32 @@ def _configure_tracing(agent_name: str) -> None:
     set_global_textmap(TraceContextTextMapPropagator())
+def _configure_log_export(
+    agent_name: str, target: Optional[_OtlpTarget]
+) -> Optional[logging.Handler]:
+    """Build an OTLP-logs handler so every `log.*` record reaches OpenObserve,
+    correlated to the active span by trace_id/span_id — the Python counterpart
+    of the Rust daemon's `OpenTelemetryTracingBridge`.
+    Returns the handler (caller attaches it to root) or `None` when export is
+    disabled, in which case logs still go to the JSONL file + stdout/stderr.
+    """
+    global _LOGGER_PROVIDER
+    if target is None:
+        return None
+    resource = Resource.create({"service.name": agent_name})
+    provider = LoggerProvider(resource=resource)
+    exporter = OTLPLogExporter(endpoint=target.logs_endpoint, headers=target.headers)
+    provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
+    set_logger_provider(provider)
+    _LOGGER_PROVIDER = provider
+    return LoggingHandler(level=logging.NOTSET, logger_provider=provider)
 # ──────────────────────── Logging setup ────────────────────────────────────────
-def _configure_logging(agent_name: str) -> None:
+def _configure_logging(agent_name: str, target: Optional[_OtlpTarget]) -> None:
     log_dir = Path(os.environ.get("MERIDIAN_LOG_DIR") or DEFAULT_LOG_DIR)
     log_dir.mkdir(parents=True, exist_ok=True)
     log_path = log_dir / f"{agent_name}.jsonl"
@@ -204,6 +359,22 @@ def _configure_logging(agent_name: str) -> None:
     root.addHandler(file_h)
     root.addHandler(stdout_h)
     root.addHandler(stderr_h)
+    # Ship every record to OpenObserve via OTLP/HTTP logs too, when export is
+    # configured. The OTel LoggingHandler reads the active span context, so each
+    # OO log row carries the trace_id/span_id that ties it to the classifier's
+    # span waterfall. No-op (None) when OTLP is disabled.
+    # The OTLP handler already carries service.name via the OTel Resource, so it
+    # needs no _ServiceFilter (that would duplicate the attribute on each record).
+    otlp_log_h = _configure_log_export(agent_name, target)
+    if otlp_log_h is not None:
+        # Do NOT feed the OTLP exporter's OWN transport logs back into OTLP
+        # export: on export failure httpx/urllib3/opentelemetry emit WARNING+
+        # records which this root handler would try to export → more failures (a
+        # log→export→log loop). Drop those from THIS handler only — they still
+        # reach the file/stderr handlers.
+        _otlp_excluded = ("httpx", "httpcore", "urllib3", "grpc", "opentelemetry")
+        otlp_log_h.addFilter(lambda r: not r.name.startswith(_otlp_excluded))
+        root.addHandler(otlp_log_h)
     root.setLevel(level)
     for noisy in _NOISY_LOGGERS:

package/services/agents/run_task_linker_mlx.py CHANGED Viewed

@@ -37,6 +37,7 @@ from contextlib import contextmanager
 from pathlib import Path
 from typing import Any, Literal, Optional, Iterator
+from opentelemetry import trace
 from opentelemetry.trace import StatusCode
 from pydantic import BaseModel, Field
@@ -945,10 +946,76 @@ def _open_run_log(db_path: str) -> "tuple[Path, Any]":
     return log_path, log_path.open("w", encoding="utf-8")
+# `method` values that mean the model produced a usable classification.
+# Anything else is an error path the dashboard surfaces under errors-only. The
+# real error `method` values emitted by `_error_result` are `mlx_parse_error`
+# (schema validation / unknown task_key — those names are child-span `outcome`
+# attributes, NOT methods) and `mlx_error` (inference failure / session-not-found).
+_SUCCESS_METHODS = {"mlx_direct", "apple_fm"}
+def _annotate_classification_span(result: dict[str, Any]) -> None:
+    """Promote the classification result onto the enclosing `classify_session`
+    span so each session is ONE self-describing row in OpenObserve — filterable
+    by session_id / session_type / task_key / is_error without joining the child
+    spans. Both the server and CLI entry points wrap the call in a
+    `classify_session` span, so annotating the current span here covers both.
+    """
+    span = trace.get_current_span()
+    if not span.is_recording():
+        return
+    method = str(result.get("method", ""))
+    task_key = result.get("task_key")
+    is_error = method not in _SUCCESS_METHODS
+    span.set_attribute("session_id", int(result.get("session_id", 0)))
+    span.set_attribute("task_key", task_key or "-")
+    span.set_attribute("has_task", task_key is not None)
+    span.set_attribute("session_type", str(result.get("session_type", "")))
+    span.set_attribute("category", str(result.get("category", "")))
+    span.set_attribute("confidence", float(result.get("confidence", 0.0)))
+    span.set_attribute(
+        "category_confidence", float(result.get("category_confidence", 0.0))
+    )
+    span.set_attribute("method", method)
+    span.set_attribute("elapsed_s", float(result.get("elapsed_s", 0.0)))
+    span.set_attribute("is_error", is_error)
+    if is_error:
+        span.set_status(StatusCode.ERROR, str(result.get("reasoning", method))[:300])
 def _classify_one_logged(
     session_id: int,
     con: _sqlite3.Connection,
     run_log: Any,
+) -> dict[str, Any]:
+    """Classify one session, marking the span is_error=true on ANY failure.
+    Wraps the inner worker so an UNHANDLED exception (a sqlite read error,
+    malformed window_titles JSON, …) still stamps is_error=true + ERROR status on
+    the enclosing classify_session span before propagating — otherwise the
+    dashboard's errors-only table (which filters is_error='true') silently misses
+    exactly the crashes an operator opens it to find. Handled failures already
+    return _error_result dicts that _annotate_classification_span marks.
+    """
+    try:
+        return _classify_one_logged_inner(session_id, con, run_log)
+    except Exception as exc:  # noqa: BLE001 — annotate, then re-raise unchanged
+        span = trace.get_current_span()
+        if span.is_recording():
+            span.set_attribute("session_id", int(session_id))
+            span.set_attribute("is_error", True)
+            span.set_attribute("method", "mlx_error")
+            span.set_status(StatusCode.ERROR, str(exc)[:300])
+        log.exception(
+            "run_task_linker_mlx: unhandled error classifying session %d", session_id
+        )
+        raise
+def _classify_one_logged_inner(
+    session_id: int,
+    con: _sqlite3.Connection,
+    run_log: Any,
 ) -> dict[str, Any]:
     """Classify one session and append a full record to the run log."""
     # Gather inputs before classification so we can log them even on error.
@@ -993,6 +1060,7 @@ def _classify_one_logged(
     }
     run_log.write(json.dumps(record, default=str) + "\n")
     run_log.flush()
+    _annotate_classification_span(result)
     return result
@@ -1055,15 +1123,12 @@ def main() -> None:
         try:
             results: list[dict[str, Any]] = []
             for sid in session_ids:
-                with tracer.start_as_current_span("classify_session") as cls_span:
-                    cls_span.set_attribute("session_id", sid)
+                with tracer.start_as_current_span("classify_session"):
+                    # _classify_one_logged enriches this span with the full
+                    # result (session_id, task_key, session_type, is_error, …).
                     log.info("run_task_linker_mlx: classifying session %d", sid)
                     result = _classify_one_logged(sid, con, run_log_file)
                     results.append(result)
-                    cls_span.set_attribute("task_key", result["task_key"] or "-")
-                    cls_span.set_attribute("session_type", result["session_type"])
-                    cls_span.set_attribute("method", result["method"])
-                    cls_span.set_attribute("elapsed_s", result["elapsed_s"])
                     log.info(
                         "run_task_linker_mlx: session_id=%d task_key=%s "
                         "session_type=%s elapsed_s=%.2f",

package/services/agents/server.py CHANGED Viewed

@@ -259,49 +259,43 @@ async def classify_sessions(req: ClassifySessionsRequest) -> dict:
     tracer = _app_state.get("tracer") or trace.get_tracer("meridian-agent-server-mlx")
     parent_ctx = observability.extract_parent_context(req.traceparent)
-    with tracer.start_as_current_span("classify_sessions", context=parent_ctx) as span:
-        span.set_attribute("session_count", len(req.session_ids))
-        # Snapshot the OTel context while classify_sessions span is active so we
-        # can attach it explicitly inside the threadpool (anyio copies contextvars,
-        # but explicit attach is more reliable across anyio versions).
-        ctx_snapshot = _otel_context.get_current()
-        def _classify_all() -> list[dict]:
-            # Attach classify_sessions context so _classify_one sub-spans
-            # (db_fetch, build_prompt, llm_inference, parse_response) appear
-            # as children of classify_sessions in the OO trace waterfall.
-            _tok = _otel_context.attach(ctx_snapshot)
+    # No batch-wrapper span: each session emits a single `classify_session` span
+    # attached directly to the Rust caller's context (via the propagated
+    # traceparent). This keeps the debug trace minimal — one self-describing span
+    # per session with no redundant N=1 wrapper. For N>1, the sessions appear as
+    # sibling classify_session spans under the same daemon trace.
+    def _classify_all() -> list[dict]:
+        _tok = _otel_context.attach(parent_ctx) if parent_ctx is not None else None
+        try:
+            # Always use the server's own _DB_PATH — ignoring req.meridian_db avoids
+            # path-traversal: the server knows its DB from the environment.
+            con = _sqlite3.connect(str(_DB_PATH), check_same_thread=False)
+            con.row_factory = _sqlite3.Row
             try:
-                # Always use the server's own _DB_PATH — ignoring req.meridian_db avoids
-                # path-traversal: the server knows its DB from the environment.
-                con = _sqlite3.connect(str(_DB_PATH), check_same_thread=False)
-                con.row_factory = _sqlite3.Row
-                try:
-                    results: list[dict] = []
-                    for sid in req.session_ids:
-                        with tracer.start_as_current_span(
-                            "classify_session",
-                            attributes={"session_id": sid},
-                        ):
-                            result = m._classify_one_logged(sid, con, fh)
-                        log.info(
-                            "classify_sessions: session_id=%d task_key=%s session_type=%s elapsed_s=%.2f",
-                            sid,
-                            result.get("task_key"),
-                            result.get("session_type"),
-                            result.get("elapsed_s", 0.0),
-                        )
-                        results.append(result)
-                    return results
-                finally:
-                    con.close()
+                results: list[dict] = []
+                for sid in req.session_ids:
+                    # _classify_one_logged owns this span's attributes (session_id,
+                    # task_key, confidence, is_error, …) via _annotate_classification_span
+                    # and emits db_fetch / build_prompt / llm_inference / parse_response
+                    # as its children — one source of truth, matching the CLI path.
+                    with tracer.start_as_current_span("classify_session"):
+                        result = m._classify_one_logged(sid, con, fh)
+                    log.info(
+                        "classify_sessions: session_id=%d task_key=%s session_type=%s elapsed_s=%.2f",
+                        sid,
+                        result.get("task_key"),
+                        result.get("session_type"),
+                        result.get("elapsed_s", 0.0),
+                    )
+                    results.append(result)
+                return results
             finally:
+                con.close()
+        finally:
+            if _tok is not None:
                 _otel_context.detach(_tok)
-        results = await run_in_threadpool(_classify_all)
-        span.set_attribute("classified_count", len(results))
+    results = await run_in_threadpool(_classify_all)
     return {"results": results}
@@ -415,14 +409,42 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
     temperature = req.temperature if req.temperature is not None else 0.3
     max_tokens  = req.max_tokens if req.max_tokens else 2048
+    # Honour OpenAI `response_format: {"type":"json_schema", ...}` by
+    # FSM-constraining decoding to that schema via outlines. Without this, a
+    # reasoning model is free to emit chain-of-thought prose instead of the JSON
+    # the caller (e.g. agno's structured-output path) expects, and the parse
+    # fails. `{"type":"json_object"}` carries no schema, so it stays free-form.
+    output_type = None
+    rf = req.response_format
+    if isinstance(rf, dict) and rf.get("type") == "json_schema":
+        schema = (rf.get("json_schema") or {}).get("schema")
+        if schema:
+            from outlines.types import JsonSchema
+            output_type = JsonSchema(schema)
     from agents.llm_selector import APPLE_INTELLIGENCE_ID
+    # A `json_schema` request cannot be honoured on Apple Foundation Models:
+    # outlines FSM-constrained decoding is incompatible with FM, so the schema
+    # would be silently dropped and a structured-output caller (e.g. agno) would
+    # get free-form text that fails to parse downstream. Reject explicitly with a
+    # 4xx rather than emit unconstrained output that breaks later.
+    if output_type is not None and m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
+        raise HTTPException(
+            status_code=400,
+            detail="response_format=json_schema is not supported on Apple "
+            "Foundation Models (no FSM-constrained decoding available)",
+        )
     def _generate() -> str:
         if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
+            # outlines FSM decoding is incompatible with Foundation Models;
+            # Apple FM falls back to free-form (json_object / no schema only).
             return _infer_apple_fm(msgs, max_tokens)
         with m.model_session() as model:
             return model(
                 Chat(msgs),
+                output_type=output_type,
                 max_tokens=max_tokens,
                 sampler=make_sampler(temp=temperature),
                 verbose=False,

package/services/observability/dashboards/classifier-debug.json ADDED Viewed

@@ -0,0 +1,174 @@
+{
+  "title": "Session→Task Classifier — Debug",
+  "description": "Every session-task classification, newest first. Filter by session_id, session_type, or errors-only; copy a row's trace_id and open it in Traces for the full waterfall (db_fetch → build_prompt → llm_inference → parse_response, with raw_mlx_output). Backed by the enriched `classify_session` spans (service meridian-agent-server-mlx in-process, or meridian-task-linker-mlx from the standalone CLI). Drilldown keys on trace_id alone so it works for both.",
+  "version": 5,
+  "variables": {
+    "list": [
+      {
+        "type": "textbox",
+        "name": "session_id",
+        "label": "Session ID",
+        "query_data": null,
+        "value": "",
+        "options": [],
+        "multiSelect": false,
+        "hideOnDashboard": false,
+        "selectAllValueForMultiSelect": "custom",
+        "customMultiSelectValue": [],
+        "escapeSingleQuotes": true
+      },
+      {
+        "type": "custom",
+        "name": "session_type",
+        "label": "Session type",
+        "query_data": null,
+        "value": "",
+        "options": [
+          {"label": "All", "value": "", "selected": true},
+          {"label": "task", "value": "task", "selected": false},
+          {"label": "overhead", "value": "overhead", "selected": false},
+          {"label": "untracked", "value": "untracked", "selected": false}
+        ],
+        "multiSelect": false,
+        "hideOnDashboard": false,
+        "selectAllValueForMultiSelect": "custom",
+        "customMultiSelectValue": [],
+        "escapeSingleQuotes": true
+      },
+      {
+        "type": "custom",
+        "name": "errors_only",
+        "label": "Errors only",
+        "query_data": null,
+        "value": "",
+        "options": [
+          {"label": "All", "value": "", "selected": true},
+          {"label": "Errors only", "value": "true", "selected": false}
+        ],
+        "multiSelect": false,
+        "hideOnDashboard": false,
+        "selectAllValueForMultiSelect": "custom",
+        "customMultiSelectValue": [],
+        "escapeSingleQuotes": true
+      }
+    ],
+    "showDynamicFilters": true
+  },
+  "defaultDatetimeDuration": {"type": "relative", "relativeTimePeriod": "12h", "startTime": null, "endTime": null},
+  "tabs": [
+    {
+      "tabId": "default",
+      "name": "Default",
+      "panels": [
+        {
+          "id": "stat_total",
+          "type": "metric",
+          "title": "Classifications",
+          "description": "Total classify_session spans in range",
+          "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session'",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Classifications", "alias": "y_axis_1", "column": "y_axis_1", "color": "#5960b2", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 0, "y": 0, "w": 12, "h": 6, "i": 1}
+        },
+        {
+          "id": "stat_errors",
+          "type": "metric",
+          "title": "Errors",
+          "description": "Classifications whose method failed (is_error=true)",
+          "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true'",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Errors", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b25959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 12, "y": 0, "w": 12, "h": 6, "i": 2}
+        },
+        {
+          "id": "stat_untracked",
+          "type": "metric",
+          "title": "Untracked",
+          "description": "Sessions classified as untracked (no ticket)",
+          "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND session_type='untracked'",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Untracked", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b29959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 24, "y": 0, "w": 12, "h": 6, "i": 3}
+        },
+        {
+          "id": "stat_conf",
+          "type": "metric",
+          "title": "Avg confidence",
+          "description": "Mean confidence of successful classifications",
+          "config": {"show_legends": false, "unit": null, "decimals": 2, "no_value_replacement": "0"},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT round(avg(CAST(confidence AS DOUBLE)),2) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='false'",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Avg confidence", "alias": "y_axis_1", "column": "y_axis_1", "color": "#59b27a", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 36, "y": 0, "w": 12, "h": 6, "i": 4}
+        },
+        {
+          "id": "table_all",
+          "type": "table",
+          "title": "All classifications (newest first)",
+          "description": "Filter with the Session ID / Session type / Errors only variables above. Click any row → opens the Traces view filtered to just that trace's spans.",
+          "config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", category as \"Category\", confidence as \"Confidence\", round(CAST(elapsed_s AS DOUBLE),2) as \"Time taken (s)\", method as \"Method\", is_error as \"Error\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND ('$session_id'='' OR session_id='$session_id') AND ('$session_type'='' OR session_type='$session_type') AND ('$errors_only'='' OR is_error='$errors_only') ORDER BY _timestamp DESC",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Category", "alias": "Category", "column": "category", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Confidence", "alias": "Confidence", "column": "confidence", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Time taken (s)", "alias": "Time taken (s)", "column": "elapsed_s", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Error", "alias": "Error", "column": "is_error", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 0, "y": 6, "w": 48, "h": 14, "i": 5}
+        },
+        {
+          "id": "table_errors",
+          "type": "table",
+          "title": "Errors only",
+          "description": "Failed classifications — inference errors, schema errors, invalid task_key, session-not-found. Click a row → opens just that trace's spans.",
+          "config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
+          "queryType": "sql",
+          "queries": [
+            {
+              "query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", method as \"Method\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true' ORDER BY _timestamp DESC",
+              "vrlFunctionQuery": "",
+              "customQuery": true,
+              "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
+            }
+          ],
+          "layout": {"x": 0, "y": 20, "w": 48, "h": 10, "i": 6}
+        }
+      ]
+    }
+  ]
+}

package/services/pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "meridian-agents"
-version = "1.54.1"
+version = "1.55.0"
 description = "Meridian agents — MLX classifier server and Jira worklog synthesis for meridian.db"
 requires-python = ">=3.11"
 authors = [{ name = "Meridiona" }]

package/ui.tar.gz CHANGED Viewed

Binary file