@meridiona/meridian-darwin-arm64 1.54.1 → 1.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 1.54.1
1
+ 1.55.0
package/bin/meridian CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meridiona/meridian-darwin-arm64",
3
- "version": "1.54.1",
3
+ "version": "1.55.0",
4
4
  "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
5
5
  "homepage": "https://github.com/Meridiona/meridian",
6
6
  "repository": {
@@ -41,15 +41,19 @@ elif command -v openobserve >/dev/null 2>&1; then
41
41
  fi
42
42
 
43
43
  if [[ -z "${OO_BIN}" ]]; then
44
- echo "→ OpenObserve binary not found — downloading v0.11.0 (last release with arm64 binary)..."
44
+ echo "→ OpenObserve binary not found — downloading v0.90.3..."
45
45
  _oo_arch="$(uname -m)"
46
46
  case "$_oo_arch" in
47
47
  arm64) _oo_arch="arm64" ;;
48
48
  x86_64) _oo_arch="amd64" ;;
49
49
  *) echo "✗ Unsupported arch: $_oo_arch" >&2; exit 1 ;;
50
50
  esac
51
- _oo_ver="v0.11.0"
52
- _oo_url="https://github.com/openobserve/openobserve/releases/download/${_oo_ver}/openobserve-${_oo_ver}-darwin-${_oo_arch}.tar.gz"
51
+ # GitHub release assets were removed for recent versions; binaries now live on
52
+ # the official downloads host. Trace deep-linking (dashboard drilldown into a
53
+ # single trace's spans) needs a modern build, so we pin a current stable.
54
+ # KEEP IN SYNC: the same version is pinned in install.sh — bump both together.
55
+ _oo_ver="v0.90.3"
56
+ _oo_url="https://downloads.openobserve.ai/releases/openobserve/${_oo_ver}/openobserve-${_oo_ver}-darwin-${_oo_arch}.tar.gz"
53
57
  mkdir -p "${HOME}/.openobserve"
54
58
  if curl -fsSL -o "${HOME}/.openobserve/openobserve.tar.gz" "$_oo_url" \
55
59
  && tar -xzf "${HOME}/.openobserve/openobserve.tar.gz" -C "${HOME}/.openobserve" \
@@ -4,7 +4,9 @@ A single `setup(agent_name)` call wires up:
4
4
 
5
5
  * an OTel `TracerProvider` with `service.name=agent_name`
6
6
  * a `BatchSpanProcessor` exporting OTLP/HTTP-protobuf spans to OpenObserve
7
- (`MERIDIAN_OTLP_TRACES_ENDPOINT`, with Basic auth via `MERIDIAN_OO_AUTH`)
7
+ * a `LoggerProvider` + OTLP-logs handler so every `logging.LogRecord` is also
8
+ shipped to OpenObserve (correlated to the active span), mirroring the Rust
9
+ daemon's `OpenTelemetryTracingBridge`
8
10
  * W3C `TraceContextTextMapPropagator` as the global propagator so each
9
11
  agent can pick up the Rust daemon's `traceparent` and continue the trace
10
12
  * `LoggingInstrumentor` so every `logging.LogRecord` carries
@@ -13,6 +15,12 @@ A single `setup(agent_name)` call wires up:
13
15
  under `~/.meridian/logs/{agent_name}.jsonl` plus stderr — both ingestable
14
16
  by OpenObserve's log pipeline without further parsing.
15
17
 
18
+ Export config (endpoint + Basic-auth credentials) is resolved from the SAME
19
+ `~/.meridian/settings.json` the Rust daemon reads — `otlp_enabled`,
20
+ `otlp_endpoint`, `oo_email`, `oo_password` — so the dashboard Settings page is
21
+ the single source of truth for both processes. The legacy `MERIDIAN_OO_AUTH`
22
+ env credential is deprecated and ignored, matching the daemon.
23
+
16
24
  `extract_parent_context(traceparent)` is the helper agents use to continue
17
25
  a span emitted by another process — typically the Rust ETL or another
18
26
  agent stage.
@@ -23,20 +31,26 @@ single-shot CLI paths funnel through the same module.
23
31
  """
24
32
  from __future__ import annotations
25
33
 
34
+ import base64
35
+ import json
26
36
  import logging
27
37
  import logging.handlers
28
38
  import os
29
39
  import sys
30
40
  from pathlib import Path
31
- from typing import Optional
41
+ from typing import NamedTuple, Optional
32
42
 
33
43
  from opentelemetry import trace
44
+ from opentelemetry._logs import set_logger_provider
34
45
  from opentelemetry.context import Context
46
+ from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
35
47
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
36
48
  OTLPSpanExporter,
37
49
  )
38
50
  from opentelemetry.instrumentation.logging import LoggingInstrumentor
39
51
  from opentelemetry.propagate import set_global_textmap
52
+ from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
53
+ from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
40
54
  from opentelemetry.sdk.resources import Resource
41
55
  from opentelemetry.sdk.trace import TracerProvider
42
56
  from opentelemetry.sdk.trace.export import BatchSpanProcessor
@@ -50,12 +64,125 @@ from pythonjsonlogger import jsonlogger
50
64
  DEFAULT_TRACES_ENDPOINT = "http://localhost:5080/api/default/v1/traces"
51
65
  DEFAULT_LOGS_ENDPOINT = "http://localhost:5080/api/default/v1/logs"
52
66
  DEFAULT_LOG_DIR = Path.home() / ".meridian" / "logs"
67
+ # Single source of truth for OpenObserve export config — the SAME file the Rust
68
+ # daemon reads (see `src/observability.rs::resolve_otlp_target`). Keeps the two
69
+ # processes credential-aligned: the dashboard Settings page writes here and both
70
+ # the daemon and this MLX server pick it up with no env plumbing.
71
+ _SETTINGS_PATH = Path(
72
+ os.environ.get("MERIDIAN_SETTINGS_PATH")
73
+ or (Path.home() / ".meridian" / "settings.json")
74
+ )
53
75
 
54
76
  _NOISY_LOGGERS = ("urllib3", "httpx", "httpcore", "openai", "botocore")
55
77
 
56
78
  # Track which agents have been configured so a second setup() call is a no-op.
57
79
  _INITIALISED: dict[str, trace.Tracer] = {}
58
80
  _PROCESS_SERVICE_NAME: str | None = None
81
+ # Held so shutdown() can flush log records the same way it flushes spans.
82
+ _LOGGER_PROVIDER: LoggerProvider | None = None
83
+ # One-time guard so an export misconfiguration (enabled-but-no-creds, or a
84
+ # schemeless endpoint) warns once per process instead of on every resolve.
85
+ _WARNED_EXPORT_MISCONFIG: bool = False
86
+
87
+
88
+ # ──────────────────────── OTLP target resolution ───────────────────────────────
89
+ class _OtlpTarget(NamedTuple):
90
+ """Resolved OTLP export target: signal endpoints + Basic-auth header value."""
91
+
92
+ traces_endpoint: str
93
+ logs_endpoint: str
94
+ headers: dict[str, str]
95
+
96
+
97
+ def _load_settings() -> dict[str, object]:
98
+ """Read `~/.meridian/settings.json`; empty dict if absent/unreadable."""
99
+ try:
100
+ with _SETTINGS_PATH.open(encoding="utf-8") as fh:
101
+ data = json.load(fh)
102
+ return data if isinstance(data, dict) else {}
103
+ except (OSError, ValueError):
104
+ return {}
105
+
106
+
107
+ def _resolve_otlp_target() -> Optional[_OtlpTarget]:
108
+ """Mirror of the Rust daemon's `resolve_otlp_target()`.
109
+
110
+ Returns `None` (→ export disabled) when the toggle is off or credentials
111
+ are missing. Endpoint precedence: settings.json `otlp_endpoint` → the
112
+ `MERIDIAN_OTLP_TRACES_ENDPOINT`/`MERIDIAN_OTLP_ENDPOINT` env override →
113
+ the localhost default. Auth is `base64(oo_email:oo_password)` — settings.json
114
+ only; the legacy `MERIDIAN_OO_AUTH` env path is deprecated and ignored, the
115
+ same decision the daemon made.
116
+ """
117
+ global _WARNED_EXPORT_MISCONFIG
118
+
119
+ if os.environ.get("MERIDIAN_TRACING_DISABLED", "").lower() in ("1", "true", "yes"):
120
+ return None
121
+
122
+ settings = _load_settings()
123
+ if not settings.get("otlp_enabled"):
124
+ return None
125
+
126
+ # Resolve the endpoint up front so we can warn (not silently disable) when
127
+ # export is enabled but unusable. Precedence: settings → env → localhost.
128
+ configured = str(settings.get("otlp_endpoint") or "").strip()
129
+ env_endpoint = (
130
+ os.environ.get("MERIDIAN_OTLP_TRACES_ENDPOINT", "").strip()
131
+ or os.environ.get("MERIDIAN_OTLP_ENDPOINT", "").strip()
132
+ )
133
+ traces_endpoint = configured or env_endpoint or DEFAULT_TRACES_ENDPOINT
134
+
135
+ def _warn_once(msg: str, *args: object) -> None:
136
+ global _WARNED_EXPORT_MISCONFIG
137
+ if not _WARNED_EXPORT_MISCONFIG:
138
+ _WARNED_EXPORT_MISCONFIG = True
139
+ logging.getLogger(__name__).warning(msg, *args)
140
+
141
+ email = str(settings.get("oo_email") or "")
142
+ password = str(settings.get("oo_password") or "")
143
+ if not email or not password:
144
+ # otlp_enabled but no usable credentials → export OFF. Warn once so an
145
+ # env-only (MERIDIAN_OO_AUTH) install that predates the settings.json
146
+ # credential move doesn't go dark silently — mirrors the daemon, which
147
+ # also warns. MERIDIAN_OO_AUTH is no longer read here.
148
+ _warn_once(
149
+ "OpenObserve export enabled but oo_email/oo_password missing in %s — "
150
+ "traces+logs export DISABLED. Set credentials in the dashboard Settings "
151
+ "(the MERIDIAN_OO_AUTH env var is no longer used).",
152
+ _SETTINGS_PATH,
153
+ )
154
+ return None
155
+ # Guard against HTTP header injection / malformed user:password splits —
156
+ # matches the daemon's same-named check.
157
+ if any(c in email for c in "\r\n:") or any(c in password for c in "\r\n"):
158
+ return None
159
+ auth = base64.standard_b64encode(f"{email}:{password}".encode()).decode()
160
+
161
+ # Validate scheme — only http/https are valid OTLP transports. The daemon
162
+ # disables export + warns on a schemeless endpoint; mirror that exactly so the
163
+ # two processes don't disagree on whether export is on.
164
+ if not (traces_endpoint.startswith("http://") or traces_endpoint.startswith("https://")):
165
+ _warn_once(
166
+ "OTLP endpoint %r has no http/https scheme — export DISABLED.",
167
+ traces_endpoint,
168
+ )
169
+ return None
170
+
171
+ # OpenObserve serves logs at the sibling `…/v1/logs` path. Derive it from the
172
+ # traces endpoint by swapping the trailing signal segment so a custom host or
173
+ # base (incl. a trailing slash, e.g. `…/v1/traces/`) carries to BOTH signals —
174
+ # never silently fall back to localhost for logs while traces go remote.
175
+ t = traces_endpoint.rstrip("/")
176
+ if t.endswith("/v1/traces"):
177
+ logs_endpoint = t[: -len("/v1/traces")] + "/v1/logs"
178
+ elif t.endswith("/traces"):
179
+ logs_endpoint = t[: -len("/traces")] + "/logs"
180
+ elif "traces" in t:
181
+ logs_endpoint = t.rsplit("traces", 1)[0] + "logs"
182
+ else:
183
+ logs_endpoint = t + "/v1/logs"
184
+
185
+ return _OtlpTarget(traces_endpoint, logs_endpoint, {"Authorization": f"Basic {auth}"})
59
186
 
60
187
 
61
188
  # ──────────────────────── Public API ───────────────────────────────────────────
@@ -80,8 +207,13 @@ def setup(agent_name: str) -> trace.Tracer:
80
207
 
81
208
  if _PROCESS_SERVICE_NAME is None:
82
209
  _PROCESS_SERVICE_NAME = agent_name
83
- _configure_tracing(agent_name)
84
- _configure_logging(agent_name)
210
+ # Resolve the export target ONCE and pass it to both configurers — a
211
+ # second read could see a settings.json the dashboard rewrote mid-setup
212
+ # (TOCTOU), leaving traces enabled while logs resolve disabled (or with
213
+ # different creds/endpoint) in the same process.
214
+ target = _resolve_otlp_target()
215
+ _configure_tracing(agent_name, target)
216
+ _configure_logging(agent_name, target)
85
217
  logging.getLogger(agent_name).info(
86
218
  "observability initialised",
87
219
  extra={"service.name": agent_name},
@@ -105,6 +237,12 @@ def shutdown() -> None:
105
237
  if hasattr(provider, "shutdown"):
106
238
  provider.shutdown()
107
239
 
240
+ # Flush queued log records too — BatchLogRecordProcessor drops them on
241
+ # interpreter exit otherwise, the same hazard as spans.
242
+ if _LOGGER_PROVIDER is not None:
243
+ _LOGGER_PROVIDER.force_flush(timeout_millis=5_000)
244
+ _LOGGER_PROVIDER.shutdown()
245
+
108
246
 
109
247
  def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
110
248
  """Parse an incoming W3C `traceparent` header into an OTel `Context`.
@@ -119,21 +257,14 @@ def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
119
257
 
120
258
 
121
259
  # ──────────────────────── Tracing setup ────────────────────────────────────────
122
- def _configure_tracing(agent_name: str) -> None:
260
+ def _configure_tracing(agent_name: str, target: Optional[_OtlpTarget]) -> None:
123
261
  resource = Resource.create({"service.name": agent_name})
124
262
  provider = TracerProvider(resource=resource)
125
263
 
126
- disabled = os.environ.get("MERIDIAN_TRACING_DISABLED", "").lower() in ("1", "true", "yes")
127
- endpoint = (
128
- os.environ.get("MERIDIAN_OTLP_TRACES_ENDPOINT", "").strip()
129
- or os.environ.get("MERIDIAN_OTLP_ENDPOINT", "").strip()
130
- )
131
- if not disabled and endpoint:
132
- headers: dict[str, str] = {}
133
- auth = os.environ.get("MERIDIAN_OO_AUTH")
134
- if auth:
135
- headers["Authorization"] = f"Basic {auth}"
136
- exporter = OTLPSpanExporter(endpoint=endpoint, headers=headers)
264
+ if target is not None:
265
+ exporter = OTLPSpanExporter(
266
+ endpoint=target.traces_endpoint, headers=target.headers
267
+ )
137
268
  provider.add_span_processor(BatchSpanProcessor(exporter))
138
269
 
139
270
  # Set as the global provider. OTel's `set_tracer_provider` warns if
@@ -143,8 +274,32 @@ def _configure_tracing(agent_name: str) -> None:
143
274
  set_global_textmap(TraceContextTextMapPropagator())
144
275
 
145
276
 
277
+ def _configure_log_export(
278
+ agent_name: str, target: Optional[_OtlpTarget]
279
+ ) -> Optional[logging.Handler]:
280
+ """Build an OTLP-logs handler so every `log.*` record reaches OpenObserve,
281
+ correlated to the active span by trace_id/span_id — the Python counterpart
282
+ of the Rust daemon's `OpenTelemetryTracingBridge`.
283
+
284
+ Returns the handler (caller attaches it to root) or `None` when export is
285
+ disabled, in which case logs still go to the JSONL file + stdout/stderr.
286
+ """
287
+ global _LOGGER_PROVIDER
288
+
289
+ if target is None:
290
+ return None
291
+
292
+ resource = Resource.create({"service.name": agent_name})
293
+ provider = LoggerProvider(resource=resource)
294
+ exporter = OTLPLogExporter(endpoint=target.logs_endpoint, headers=target.headers)
295
+ provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
296
+ set_logger_provider(provider)
297
+ _LOGGER_PROVIDER = provider
298
+ return LoggingHandler(level=logging.NOTSET, logger_provider=provider)
299
+
300
+
146
301
  # ──────────────────────── Logging setup ────────────────────────────────────────
147
- def _configure_logging(agent_name: str) -> None:
302
+ def _configure_logging(agent_name: str, target: Optional[_OtlpTarget]) -> None:
148
303
  log_dir = Path(os.environ.get("MERIDIAN_LOG_DIR") or DEFAULT_LOG_DIR)
149
304
  log_dir.mkdir(parents=True, exist_ok=True)
150
305
  log_path = log_dir / f"{agent_name}.jsonl"
@@ -204,6 +359,22 @@ def _configure_logging(agent_name: str) -> None:
204
359
  root.addHandler(file_h)
205
360
  root.addHandler(stdout_h)
206
361
  root.addHandler(stderr_h)
362
+ # Ship every record to OpenObserve via OTLP/HTTP logs too, when export is
363
+ # configured. The OTel LoggingHandler reads the active span context, so each
364
+ # OO log row carries the trace_id/span_id that ties it to the classifier's
365
+ # span waterfall. No-op (None) when OTLP is disabled.
366
+ # The OTLP handler already carries service.name via the OTel Resource, so it
367
+ # needs no _ServiceFilter (that would duplicate the attribute on each record).
368
+ otlp_log_h = _configure_log_export(agent_name, target)
369
+ if otlp_log_h is not None:
370
+ # Do NOT feed the OTLP exporter's OWN transport logs back into OTLP
371
+ # export: on export failure httpx/urllib3/opentelemetry emit WARNING+
372
+ # records which this root handler would try to export → more failures (a
373
+ # log→export→log loop). Drop those from THIS handler only — they still
374
+ # reach the file/stderr handlers.
375
+ _otlp_excluded = ("httpx", "httpcore", "urllib3", "grpc", "opentelemetry")
376
+ otlp_log_h.addFilter(lambda r: not r.name.startswith(_otlp_excluded))
377
+ root.addHandler(otlp_log_h)
207
378
  root.setLevel(level)
208
379
 
209
380
  for noisy in _NOISY_LOGGERS:
@@ -37,6 +37,7 @@ from contextlib import contextmanager
37
37
  from pathlib import Path
38
38
  from typing import Any, Literal, Optional, Iterator
39
39
 
40
+ from opentelemetry import trace
40
41
  from opentelemetry.trace import StatusCode
41
42
  from pydantic import BaseModel, Field
42
43
 
@@ -945,10 +946,76 @@ def _open_run_log(db_path: str) -> "tuple[Path, Any]":
945
946
  return log_path, log_path.open("w", encoding="utf-8")
946
947
 
947
948
 
949
+ # `method` values that mean the model produced a usable classification.
950
+ # Anything else is an error path the dashboard surfaces under errors-only. The
951
+ # real error `method` values emitted by `_error_result` are `mlx_parse_error`
952
+ # (schema validation / unknown task_key — those names are child-span `outcome`
953
+ # attributes, NOT methods) and `mlx_error` (inference failure / session-not-found).
954
+ _SUCCESS_METHODS = {"mlx_direct", "apple_fm"}
955
+
956
+
957
+ def _annotate_classification_span(result: dict[str, Any]) -> None:
958
+ """Promote the classification result onto the enclosing `classify_session`
959
+ span so each session is ONE self-describing row in OpenObserve — filterable
960
+ by session_id / session_type / task_key / is_error without joining the child
961
+ spans. Both the server and CLI entry points wrap the call in a
962
+ `classify_session` span, so annotating the current span here covers both.
963
+ """
964
+ span = trace.get_current_span()
965
+ if not span.is_recording():
966
+ return
967
+ method = str(result.get("method", ""))
968
+ task_key = result.get("task_key")
969
+ is_error = method not in _SUCCESS_METHODS
970
+ span.set_attribute("session_id", int(result.get("session_id", 0)))
971
+ span.set_attribute("task_key", task_key or "-")
972
+ span.set_attribute("has_task", task_key is not None)
973
+ span.set_attribute("session_type", str(result.get("session_type", "")))
974
+ span.set_attribute("category", str(result.get("category", "")))
975
+ span.set_attribute("confidence", float(result.get("confidence", 0.0)))
976
+ span.set_attribute(
977
+ "category_confidence", float(result.get("category_confidence", 0.0))
978
+ )
979
+ span.set_attribute("method", method)
980
+ span.set_attribute("elapsed_s", float(result.get("elapsed_s", 0.0)))
981
+ span.set_attribute("is_error", is_error)
982
+ if is_error:
983
+ span.set_status(StatusCode.ERROR, str(result.get("reasoning", method))[:300])
984
+
985
+
948
986
  def _classify_one_logged(
949
987
  session_id: int,
950
988
  con: _sqlite3.Connection,
951
989
  run_log: Any,
990
+ ) -> dict[str, Any]:
991
+ """Classify one session, marking the span is_error=true on ANY failure.
992
+
993
+ Wraps the inner worker so an UNHANDLED exception (a sqlite read error,
994
+ malformed window_titles JSON, …) still stamps is_error=true + ERROR status on
995
+ the enclosing classify_session span before propagating — otherwise the
996
+ dashboard's errors-only table (which filters is_error='true') silently misses
997
+ exactly the crashes an operator opens it to find. Handled failures already
998
+ return _error_result dicts that _annotate_classification_span marks.
999
+ """
1000
+ try:
1001
+ return _classify_one_logged_inner(session_id, con, run_log)
1002
+ except Exception as exc: # noqa: BLE001 — annotate, then re-raise unchanged
1003
+ span = trace.get_current_span()
1004
+ if span.is_recording():
1005
+ span.set_attribute("session_id", int(session_id))
1006
+ span.set_attribute("is_error", True)
1007
+ span.set_attribute("method", "mlx_error")
1008
+ span.set_status(StatusCode.ERROR, str(exc)[:300])
1009
+ log.exception(
1010
+ "run_task_linker_mlx: unhandled error classifying session %d", session_id
1011
+ )
1012
+ raise
1013
+
1014
+
1015
+ def _classify_one_logged_inner(
1016
+ session_id: int,
1017
+ con: _sqlite3.Connection,
1018
+ run_log: Any,
952
1019
  ) -> dict[str, Any]:
953
1020
  """Classify one session and append a full record to the run log."""
954
1021
  # Gather inputs before classification so we can log them even on error.
@@ -993,6 +1060,7 @@ def _classify_one_logged(
993
1060
  }
994
1061
  run_log.write(json.dumps(record, default=str) + "\n")
995
1062
  run_log.flush()
1063
+ _annotate_classification_span(result)
996
1064
  return result
997
1065
 
998
1066
 
@@ -1055,15 +1123,12 @@ def main() -> None:
1055
1123
  try:
1056
1124
  results: list[dict[str, Any]] = []
1057
1125
  for sid in session_ids:
1058
- with tracer.start_as_current_span("classify_session") as cls_span:
1059
- cls_span.set_attribute("session_id", sid)
1126
+ with tracer.start_as_current_span("classify_session"):
1127
+ # _classify_one_logged enriches this span with the full
1128
+ # result (session_id, task_key, session_type, is_error, …).
1060
1129
  log.info("run_task_linker_mlx: classifying session %d", sid)
1061
1130
  result = _classify_one_logged(sid, con, run_log_file)
1062
1131
  results.append(result)
1063
- cls_span.set_attribute("task_key", result["task_key"] or "-")
1064
- cls_span.set_attribute("session_type", result["session_type"])
1065
- cls_span.set_attribute("method", result["method"])
1066
- cls_span.set_attribute("elapsed_s", result["elapsed_s"])
1067
1132
  log.info(
1068
1133
  "run_task_linker_mlx: session_id=%d task_key=%s "
1069
1134
  "session_type=%s elapsed_s=%.2f",
@@ -259,49 +259,43 @@ async def classify_sessions(req: ClassifySessionsRequest) -> dict:
259
259
  tracer = _app_state.get("tracer") or trace.get_tracer("meridian-agent-server-mlx")
260
260
  parent_ctx = observability.extract_parent_context(req.traceparent)
261
261
 
262
- with tracer.start_as_current_span("classify_sessions", context=parent_ctx) as span:
263
- span.set_attribute("session_count", len(req.session_ids))
264
-
265
- # Snapshot the OTel context while classify_sessions span is active so we
266
- # can attach it explicitly inside the threadpool (anyio copies contextvars,
267
- # but explicit attach is more reliable across anyio versions).
268
- ctx_snapshot = _otel_context.get_current()
269
-
270
- def _classify_all() -> list[dict]:
271
- # Attach classify_sessions context so _classify_one sub-spans
272
- # (db_fetch, build_prompt, llm_inference, parse_response) appear
273
- # as children of classify_sessions in the OO trace waterfall.
274
- _tok = _otel_context.attach(ctx_snapshot)
262
+ # No batch-wrapper span: each session emits a single `classify_session` span
263
+ # attached directly to the Rust caller's context (via the propagated
264
+ # traceparent). This keeps the debug trace minimal — one self-describing span
265
+ # per session with no redundant N=1 wrapper. For N>1, the sessions appear as
266
+ # sibling classify_session spans under the same daemon trace.
267
+ def _classify_all() -> list[dict]:
268
+ _tok = _otel_context.attach(parent_ctx) if parent_ctx is not None else None
269
+ try:
270
+ # Always use the server's own _DB_PATH — ignoring req.meridian_db avoids
271
+ # path-traversal: the server knows its DB from the environment.
272
+ con = _sqlite3.connect(str(_DB_PATH), check_same_thread=False)
273
+ con.row_factory = _sqlite3.Row
275
274
  try:
276
- # Always use the server's own _DB_PATH — ignoring req.meridian_db avoids
277
- # path-traversal: the server knows its DB from the environment.
278
- con = _sqlite3.connect(str(_DB_PATH), check_same_thread=False)
279
- con.row_factory = _sqlite3.Row
280
- try:
281
- results: list[dict] = []
282
- for sid in req.session_ids:
283
- with tracer.start_as_current_span(
284
- "classify_session",
285
- attributes={"session_id": sid},
286
- ):
287
- result = m._classify_one_logged(sid, con, fh)
288
- log.info(
289
- "classify_sessions: session_id=%d task_key=%s session_type=%s elapsed_s=%.2f",
290
- sid,
291
- result.get("task_key"),
292
- result.get("session_type"),
293
- result.get("elapsed_s", 0.0),
294
- )
295
- results.append(result)
296
- return results
297
- finally:
298
- con.close()
275
+ results: list[dict] = []
276
+ for sid in req.session_ids:
277
+ # _classify_one_logged owns this span's attributes (session_id,
278
+ # task_key, confidence, is_error, …) via _annotate_classification_span
279
+ # and emits db_fetch / build_prompt / llm_inference / parse_response
280
+ # as its children — one source of truth, matching the CLI path.
281
+ with tracer.start_as_current_span("classify_session"):
282
+ result = m._classify_one_logged(sid, con, fh)
283
+ log.info(
284
+ "classify_sessions: session_id=%d task_key=%s session_type=%s elapsed_s=%.2f",
285
+ sid,
286
+ result.get("task_key"),
287
+ result.get("session_type"),
288
+ result.get("elapsed_s", 0.0),
289
+ )
290
+ results.append(result)
291
+ return results
299
292
  finally:
293
+ con.close()
294
+ finally:
295
+ if _tok is not None:
300
296
  _otel_context.detach(_tok)
301
297
 
302
- results = await run_in_threadpool(_classify_all)
303
- span.set_attribute("classified_count", len(results))
304
-
298
+ results = await run_in_threadpool(_classify_all)
305
299
  return {"results": results}
306
300
 
307
301
 
@@ -415,14 +409,42 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
415
409
  temperature = req.temperature if req.temperature is not None else 0.3
416
410
  max_tokens = req.max_tokens if req.max_tokens else 2048
417
411
 
412
+ # Honour OpenAI `response_format: {"type":"json_schema", ...}` by
413
+ # FSM-constraining decoding to that schema via outlines. Without this, a
414
+ # reasoning model is free to emit chain-of-thought prose instead of the JSON
415
+ # the caller (e.g. agno's structured-output path) expects, and the parse
416
+ # fails. `{"type":"json_object"}` carries no schema, so it stays free-form.
417
+ output_type = None
418
+ rf = req.response_format
419
+ if isinstance(rf, dict) and rf.get("type") == "json_schema":
420
+ schema = (rf.get("json_schema") or {}).get("schema")
421
+ if schema:
422
+ from outlines.types import JsonSchema
423
+ output_type = JsonSchema(schema)
424
+
418
425
  from agents.llm_selector import APPLE_INTELLIGENCE_ID
419
426
 
427
+ # A `json_schema` request cannot be honoured on Apple Foundation Models:
428
+ # outlines FSM-constrained decoding is incompatible with FM, so the schema
429
+ # would be silently dropped and a structured-output caller (e.g. agno) would
430
+ # get free-form text that fails to parse downstream. Reject explicitly with a
431
+ # 4xx rather than emit unconstrained output that breaks later.
432
+ if output_type is not None and m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
433
+ raise HTTPException(
434
+ status_code=400,
435
+ detail="response_format=json_schema is not supported on Apple "
436
+ "Foundation Models (no FSM-constrained decoding available)",
437
+ )
438
+
420
439
  def _generate() -> str:
421
440
  if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
441
+ # outlines FSM decoding is incompatible with Foundation Models;
442
+ # Apple FM falls back to free-form (json_object / no schema only).
422
443
  return _infer_apple_fm(msgs, max_tokens)
423
444
  with m.model_session() as model:
424
445
  return model(
425
446
  Chat(msgs),
447
+ output_type=output_type,
426
448
  max_tokens=max_tokens,
427
449
  sampler=make_sampler(temp=temperature),
428
450
  verbose=False,
@@ -0,0 +1,174 @@
1
+ {
2
+ "title": "Session→Task Classifier — Debug",
3
+ "description": "Every session-task classification, newest first. Filter by session_id, session_type, or errors-only; copy a row's trace_id and open it in Traces for the full waterfall (db_fetch → build_prompt → llm_inference → parse_response, with raw_mlx_output). Backed by the enriched `classify_session` spans (service meridian-agent-server-mlx in-process, or meridian-task-linker-mlx from the standalone CLI). Drilldown keys on trace_id alone so it works for both.",
4
+ "version": 5,
5
+ "variables": {
6
+ "list": [
7
+ {
8
+ "type": "textbox",
9
+ "name": "session_id",
10
+ "label": "Session ID",
11
+ "query_data": null,
12
+ "value": "",
13
+ "options": [],
14
+ "multiSelect": false,
15
+ "hideOnDashboard": false,
16
+ "selectAllValueForMultiSelect": "custom",
17
+ "customMultiSelectValue": [],
18
+ "escapeSingleQuotes": true
19
+ },
20
+ {
21
+ "type": "custom",
22
+ "name": "session_type",
23
+ "label": "Session type",
24
+ "query_data": null,
25
+ "value": "",
26
+ "options": [
27
+ {"label": "All", "value": "", "selected": true},
28
+ {"label": "task", "value": "task", "selected": false},
29
+ {"label": "overhead", "value": "overhead", "selected": false},
30
+ {"label": "untracked", "value": "untracked", "selected": false}
31
+ ],
32
+ "multiSelect": false,
33
+ "hideOnDashboard": false,
34
+ "selectAllValueForMultiSelect": "custom",
35
+ "customMultiSelectValue": [],
36
+ "escapeSingleQuotes": true
37
+ },
38
+ {
39
+ "type": "custom",
40
+ "name": "errors_only",
41
+ "label": "Errors only",
42
+ "query_data": null,
43
+ "value": "",
44
+ "options": [
45
+ {"label": "All", "value": "", "selected": true},
46
+ {"label": "Errors only", "value": "true", "selected": false}
47
+ ],
48
+ "multiSelect": false,
49
+ "hideOnDashboard": false,
50
+ "selectAllValueForMultiSelect": "custom",
51
+ "customMultiSelectValue": [],
52
+ "escapeSingleQuotes": true
53
+ }
54
+ ],
55
+ "showDynamicFilters": true
56
+ },
57
+ "defaultDatetimeDuration": {"type": "relative", "relativeTimePeriod": "12h", "startTime": null, "endTime": null},
58
+ "tabs": [
59
+ {
60
+ "tabId": "default",
61
+ "name": "Default",
62
+ "panels": [
63
+ {
64
+ "id": "stat_total",
65
+ "type": "metric",
66
+ "title": "Classifications",
67
+ "description": "Total classify_session spans in range",
68
+ "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
69
+ "queryType": "sql",
70
+ "queries": [
71
+ {
72
+ "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session'",
73
+ "vrlFunctionQuery": "",
74
+ "customQuery": true,
75
+ "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Classifications", "alias": "y_axis_1", "column": "y_axis_1", "color": "#5960b2", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
76
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
77
+ }
78
+ ],
79
+ "layout": {"x": 0, "y": 0, "w": 12, "h": 6, "i": 1}
80
+ },
81
+ {
82
+ "id": "stat_errors",
83
+ "type": "metric",
84
+ "title": "Errors",
85
+ "description": "Classifications whose method failed (is_error=true)",
86
+ "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
87
+ "queryType": "sql",
88
+ "queries": [
89
+ {
90
+ "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true'",
91
+ "vrlFunctionQuery": "",
92
+ "customQuery": true,
93
+ "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Errors", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b25959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
94
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
95
+ }
96
+ ],
97
+ "layout": {"x": 12, "y": 0, "w": 12, "h": 6, "i": 2}
98
+ },
99
+ {
100
+ "id": "stat_untracked",
101
+ "type": "metric",
102
+ "title": "Untracked",
103
+ "description": "Sessions classified as untracked (no ticket)",
104
+ "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
105
+ "queryType": "sql",
106
+ "queries": [
107
+ {
108
+ "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND session_type='untracked'",
109
+ "vrlFunctionQuery": "",
110
+ "customQuery": true,
111
+ "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Untracked", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b29959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
112
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
113
+ }
114
+ ],
115
+ "layout": {"x": 24, "y": 0, "w": 12, "h": 6, "i": 3}
116
+ },
117
+ {
118
+ "id": "stat_conf",
119
+ "type": "metric",
120
+ "title": "Avg confidence",
121
+ "description": "Mean confidence of successful classifications",
122
+ "config": {"show_legends": false, "unit": null, "decimals": 2, "no_value_replacement": "0"},
123
+ "queryType": "sql",
124
+ "queries": [
125
+ {
126
+ "query": "SELECT round(avg(CAST(confidence AS DOUBLE)),2) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='false'",
127
+ "vrlFunctionQuery": "",
128
+ "customQuery": true,
129
+ "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Avg confidence", "alias": "y_axis_1", "column": "y_axis_1", "color": "#59b27a", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
130
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
131
+ }
132
+ ],
133
+ "layout": {"x": 36, "y": 0, "w": 12, "h": 6, "i": 4}
134
+ },
135
+ {
136
+ "id": "table_all",
137
+ "type": "table",
138
+ "title": "All classifications (newest first)",
139
+ "description": "Filter with the Session ID / Session type / Errors only variables above. Click any row → opens the Traces view filtered to just that trace's spans.",
140
+ "config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
141
+ "queryType": "sql",
142
+ "queries": [
143
+ {
144
+ "query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", category as \"Category\", confidence as \"Confidence\", round(CAST(elapsed_s AS DOUBLE),2) as \"Time taken (s)\", method as \"Method\", is_error as \"Error\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND ('$session_id'='' OR session_id='$session_id') AND ('$session_type'='' OR session_type='$session_type') AND ('$errors_only'='' OR is_error='$errors_only') ORDER BY _timestamp DESC",
145
+ "vrlFunctionQuery": "",
146
+ "customQuery": true,
147
+ "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Category", "alias": "Category", "column": "category", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Confidence", "alias": "Confidence", "column": "confidence", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Time taken (s)", "alias": "Time taken (s)", "column": "elapsed_s", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Error", "alias": "Error", "column": "is_error", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
148
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
149
+ }
150
+ ],
151
+ "layout": {"x": 0, "y": 6, "w": 48, "h": 14, "i": 5}
152
+ },
153
+ {
154
+ "id": "table_errors",
155
+ "type": "table",
156
+ "title": "Errors only",
157
+ "description": "Failed classifications — inference errors, schema errors, invalid task_key, session-not-found. Click a row → opens just that trace's spans.",
158
+ "config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
159
+ "queryType": "sql",
160
+ "queries": [
161
+ {
162
+ "query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", method as \"Method\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true' ORDER BY _timestamp DESC",
163
+ "vrlFunctionQuery": "",
164
+ "customQuery": true,
165
+ "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
166
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
167
+ }
168
+ ],
169
+ "layout": {"x": 0, "y": 20, "w": 48, "h": 10, "i": 6}
170
+ }
171
+ ]
172
+ }
173
+ ]
174
+ }
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "meridian-agents"
7
- version = "1.54.1"
7
+ version = "1.55.0"
8
8
  description = "Meridian agents — MLX classifier server and Jira worklog synthesis for meridian.db"
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "Meridiona" }]
package/ui.tar.gz CHANGED
Binary file