@meridiona/meridian-darwin-arm64 1.54.0 → 1.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -19,6 +19,15 @@
19
19
  # MLX_SERVER_HOST=127.0.0.1
20
20
  # MLX_SERVER_PORT=7823
21
21
 
22
+ # Idle eviction for the MLX model. The model holds ~7 GB of Metal memory while
23
+ # resident, but classification is bursty — so the server unloads it after this
24
+ # many seconds idle and reloads on the next request (~3 s cold start). Default
25
+ # 120s (aggressive: lightest idle footprint). Raise it to keep the model warm
26
+ # longer; set 0 to disable eviction (pin the model in memory). Avoid values
27
+ # below ~30s: if the TTL drops under the gap between sessions in a classification
28
+ # burst, the model evicts and cold-reloads (~3 s) repeatedly mid-burst.
29
+ # MLX_IDLE_EVICT_S=120
30
+
22
31
  # Dashboard (Next.js UI) port. Defaults to 3939. Change this and re-run
23
32
  # `meridian setup` to move the dashboard.
24
33
  # MERIDIAN_UI_PORT=3939
package/VERSION CHANGED
@@ -1 +1 @@
1
- 1.54.0
1
+ 1.55.0
package/bin/meridian CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meridiona/meridian-darwin-arm64",
3
- "version": "1.54.0",
3
+ "version": "1.55.0",
4
4
  "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
5
5
  "homepage": "https://github.com/Meridiona/meridian",
6
6
  "repository": {
@@ -41,15 +41,19 @@ elif command -v openobserve >/dev/null 2>&1; then
41
41
  fi
42
42
 
43
43
  if [[ -z "${OO_BIN}" ]]; then
44
- echo "→ OpenObserve binary not found — downloading v0.11.0 (last release with arm64 binary)..."
44
+ echo "→ OpenObserve binary not found — downloading v0.90.3..."
45
45
  _oo_arch="$(uname -m)"
46
46
  case "$_oo_arch" in
47
47
  arm64) _oo_arch="arm64" ;;
48
48
  x86_64) _oo_arch="amd64" ;;
49
49
  *) echo "✗ Unsupported arch: $_oo_arch" >&2; exit 1 ;;
50
50
  esac
51
- _oo_ver="v0.11.0"
52
- _oo_url="https://github.com/openobserve/openobserve/releases/download/${_oo_ver}/openobserve-${_oo_ver}-darwin-${_oo_arch}.tar.gz"
51
+ # GitHub release assets were removed for recent versions; binaries now live on
52
+ # the official downloads host. Trace deep-linking (dashboard drilldown into a
53
+ # single trace's spans) needs a modern build, so we pin a current stable.
54
+ # KEEP IN SYNC: the same version is pinned in install.sh — bump both together.
55
+ _oo_ver="v0.90.3"
56
+ _oo_url="https://downloads.openobserve.ai/releases/openobserve/${_oo_ver}/openobserve-${_oo_ver}-darwin-${_oo_arch}.tar.gz"
53
57
  mkdir -p "${HOME}/.openobserve"
54
58
  if curl -fsSL -o "${HOME}/.openobserve/openobserve.tar.gz" "$_oo_url" \
55
59
  && tar -xzf "${HOME}/.openobserve/openobserve.tar.gz" -C "${HOME}/.openobserve" \
@@ -4,7 +4,9 @@ A single `setup(agent_name)` call wires up:
4
4
 
5
5
  * an OTel `TracerProvider` with `service.name=agent_name`
6
6
  * a `BatchSpanProcessor` exporting OTLP/HTTP-protobuf spans to OpenObserve
7
- (`MERIDIAN_OTLP_TRACES_ENDPOINT`, with Basic auth via `MERIDIAN_OO_AUTH`)
7
+ * a `LoggerProvider` + OTLP-logs handler so every `logging.LogRecord` is also
8
+ shipped to OpenObserve (correlated to the active span), mirroring the Rust
9
+ daemon's `OpenTelemetryTracingBridge`
8
10
  * W3C `TraceContextTextMapPropagator` as the global propagator so each
9
11
  agent can pick up the Rust daemon's `traceparent` and continue the trace
10
12
  * `LoggingInstrumentor` so every `logging.LogRecord` carries
@@ -13,6 +15,12 @@ A single `setup(agent_name)` call wires up:
13
15
  under `~/.meridian/logs/{agent_name}.jsonl` plus stderr — both ingestable
14
16
  by OpenObserve's log pipeline without further parsing.
15
17
 
18
+ Export config (endpoint + Basic-auth credentials) is resolved from the SAME
19
+ `~/.meridian/settings.json` the Rust daemon reads — `otlp_enabled`,
20
+ `otlp_endpoint`, `oo_email`, `oo_password` — so the dashboard Settings page is
21
+ the single source of truth for both processes. The legacy `MERIDIAN_OO_AUTH`
22
+ env credential is deprecated and ignored, matching the daemon.
23
+
16
24
  `extract_parent_context(traceparent)` is the helper agents use to continue
17
25
  a span emitted by another process — typically the Rust ETL or another
18
26
  agent stage.
@@ -23,20 +31,26 @@ single-shot CLI paths funnel through the same module.
23
31
  """
24
32
  from __future__ import annotations
25
33
 
34
+ import base64
35
+ import json
26
36
  import logging
27
37
  import logging.handlers
28
38
  import os
29
39
  import sys
30
40
  from pathlib import Path
31
- from typing import Optional
41
+ from typing import NamedTuple, Optional
32
42
 
33
43
  from opentelemetry import trace
44
+ from opentelemetry._logs import set_logger_provider
34
45
  from opentelemetry.context import Context
46
+ from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
35
47
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
36
48
  OTLPSpanExporter,
37
49
  )
38
50
  from opentelemetry.instrumentation.logging import LoggingInstrumentor
39
51
  from opentelemetry.propagate import set_global_textmap
52
+ from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
53
+ from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
40
54
  from opentelemetry.sdk.resources import Resource
41
55
  from opentelemetry.sdk.trace import TracerProvider
42
56
  from opentelemetry.sdk.trace.export import BatchSpanProcessor
@@ -50,12 +64,125 @@ from pythonjsonlogger import jsonlogger
50
64
  DEFAULT_TRACES_ENDPOINT = "http://localhost:5080/api/default/v1/traces"
51
65
  DEFAULT_LOGS_ENDPOINT = "http://localhost:5080/api/default/v1/logs"
52
66
  DEFAULT_LOG_DIR = Path.home() / ".meridian" / "logs"
67
+ # Single source of truth for OpenObserve export config — the SAME file the Rust
68
+ # daemon reads (see `src/observability.rs::resolve_otlp_target`). Keeps the two
69
+ # processes credential-aligned: the dashboard Settings page writes here and both
70
+ # the daemon and this MLX server pick it up with no env plumbing.
71
+ _SETTINGS_PATH = Path(
72
+ os.environ.get("MERIDIAN_SETTINGS_PATH")
73
+ or (Path.home() / ".meridian" / "settings.json")
74
+ )
53
75
 
54
76
  _NOISY_LOGGERS = ("urllib3", "httpx", "httpcore", "openai", "botocore")
55
77
 
56
78
  # Track which agents have been configured so a second setup() call is a no-op.
57
79
  _INITIALISED: dict[str, trace.Tracer] = {}
58
80
  _PROCESS_SERVICE_NAME: str | None = None
81
+ # Held so shutdown() can flush log records the same way it flushes spans.
82
+ _LOGGER_PROVIDER: LoggerProvider | None = None
83
+ # One-time guard so an export misconfiguration (enabled-but-no-creds, or a
84
+ # schemeless endpoint) warns once per process instead of on every resolve.
85
+ _WARNED_EXPORT_MISCONFIG: bool = False
86
+
87
+
88
+ # ──────────────────────── OTLP target resolution ───────────────────────────────
89
+ class _OtlpTarget(NamedTuple):
90
+ """Resolved OTLP export target: signal endpoints + Basic-auth header value."""
91
+
92
+ traces_endpoint: str
93
+ logs_endpoint: str
94
+ headers: dict[str, str]
95
+
96
+
97
+ def _load_settings() -> dict[str, object]:
98
+ """Read `~/.meridian/settings.json`; empty dict if absent/unreadable."""
99
+ try:
100
+ with _SETTINGS_PATH.open(encoding="utf-8") as fh:
101
+ data = json.load(fh)
102
+ return data if isinstance(data, dict) else {}
103
+ except (OSError, ValueError):
104
+ return {}
105
+
106
+
107
+ def _resolve_otlp_target() -> Optional[_OtlpTarget]:
108
+ """Mirror of the Rust daemon's `resolve_otlp_target()`.
109
+
110
+ Returns `None` (→ export disabled) when the toggle is off or credentials
111
+ are missing. Endpoint precedence: settings.json `otlp_endpoint` → the
112
+ `MERIDIAN_OTLP_TRACES_ENDPOINT`/`MERIDIAN_OTLP_ENDPOINT` env override →
113
+ the localhost default. Auth is `base64(oo_email:oo_password)` — settings.json
114
+ only; the legacy `MERIDIAN_OO_AUTH` env path is deprecated and ignored, the
115
+ same decision the daemon made.
116
+ """
117
+ global _WARNED_EXPORT_MISCONFIG
118
+
119
+ if os.environ.get("MERIDIAN_TRACING_DISABLED", "").lower() in ("1", "true", "yes"):
120
+ return None
121
+
122
+ settings = _load_settings()
123
+ if not settings.get("otlp_enabled"):
124
+ return None
125
+
126
+ # Resolve the endpoint up front so we can warn (not silently disable) when
127
+ # export is enabled but unusable. Precedence: settings → env → localhost.
128
+ configured = str(settings.get("otlp_endpoint") or "").strip()
129
+ env_endpoint = (
130
+ os.environ.get("MERIDIAN_OTLP_TRACES_ENDPOINT", "").strip()
131
+ or os.environ.get("MERIDIAN_OTLP_ENDPOINT", "").strip()
132
+ )
133
+ traces_endpoint = configured or env_endpoint or DEFAULT_TRACES_ENDPOINT
134
+
135
+ def _warn_once(msg: str, *args: object) -> None:
136
+ global _WARNED_EXPORT_MISCONFIG
137
+ if not _WARNED_EXPORT_MISCONFIG:
138
+ _WARNED_EXPORT_MISCONFIG = True
139
+ logging.getLogger(__name__).warning(msg, *args)
140
+
141
+ email = str(settings.get("oo_email") or "")
142
+ password = str(settings.get("oo_password") or "")
143
+ if not email or not password:
144
+ # otlp_enabled but no usable credentials → export OFF. Warn once so an
145
+ # env-only (MERIDIAN_OO_AUTH) install that predates the settings.json
146
+ # credential move doesn't go dark silently — mirrors the daemon, which
147
+ # also warns. MERIDIAN_OO_AUTH is no longer read here.
148
+ _warn_once(
149
+ "OpenObserve export enabled but oo_email/oo_password missing in %s — "
150
+ "traces+logs export DISABLED. Set credentials in the dashboard Settings "
151
+ "(the MERIDIAN_OO_AUTH env var is no longer used).",
152
+ _SETTINGS_PATH,
153
+ )
154
+ return None
155
+ # Guard against HTTP header injection / malformed user:password splits —
156
+ # matches the daemon's same-named check.
157
+ if any(c in email for c in "\r\n:") or any(c in password for c in "\r\n"):
158
+ return None
159
+ auth = base64.standard_b64encode(f"{email}:{password}".encode()).decode()
160
+
161
+ # Validate scheme — only http/https are valid OTLP transports. The daemon
162
+ # disables export + warns on a schemeless endpoint; mirror that exactly so the
163
+ # two processes don't disagree on whether export is on.
164
+ if not (traces_endpoint.startswith("http://") or traces_endpoint.startswith("https://")):
165
+ _warn_once(
166
+ "OTLP endpoint %r has no http/https scheme — export DISABLED.",
167
+ traces_endpoint,
168
+ )
169
+ return None
170
+
171
+ # OpenObserve serves logs at the sibling `…/v1/logs` path. Derive it from the
172
+ # traces endpoint by swapping the trailing signal segment so a custom host or
173
+ # base (incl. a trailing slash, e.g. `…/v1/traces/`) carries to BOTH signals —
174
+ # never silently fall back to localhost for logs while traces go remote.
175
+ t = traces_endpoint.rstrip("/")
176
+ if t.endswith("/v1/traces"):
177
+ logs_endpoint = t[: -len("/v1/traces")] + "/v1/logs"
178
+ elif t.endswith("/traces"):
179
+ logs_endpoint = t[: -len("/traces")] + "/logs"
180
+ elif "traces" in t:
181
+ logs_endpoint = t.rsplit("traces", 1)[0] + "logs"
182
+ else:
183
+ logs_endpoint = t + "/v1/logs"
184
+
185
+ return _OtlpTarget(traces_endpoint, logs_endpoint, {"Authorization": f"Basic {auth}"})
59
186
 
60
187
 
61
188
  # ──────────────────────── Public API ───────────────────────────────────────────
@@ -80,8 +207,13 @@ def setup(agent_name: str) -> trace.Tracer:
80
207
 
81
208
  if _PROCESS_SERVICE_NAME is None:
82
209
  _PROCESS_SERVICE_NAME = agent_name
83
- _configure_tracing(agent_name)
84
- _configure_logging(agent_name)
210
+ # Resolve the export target ONCE and pass it to both configurers — a
211
+ # second read could see a settings.json the dashboard rewrote mid-setup
212
+ # (TOCTOU), leaving traces enabled while logs resolve disabled (or with
213
+ # different creds/endpoint) in the same process.
214
+ target = _resolve_otlp_target()
215
+ _configure_tracing(agent_name, target)
216
+ _configure_logging(agent_name, target)
85
217
  logging.getLogger(agent_name).info(
86
218
  "observability initialised",
87
219
  extra={"service.name": agent_name},
@@ -105,6 +237,12 @@ def shutdown() -> None:
105
237
  if hasattr(provider, "shutdown"):
106
238
  provider.shutdown()
107
239
 
240
+ # Flush queued log records too — BatchLogRecordProcessor drops them on
241
+ # interpreter exit otherwise, the same hazard as spans.
242
+ if _LOGGER_PROVIDER is not None:
243
+ _LOGGER_PROVIDER.force_flush(timeout_millis=5_000)
244
+ _LOGGER_PROVIDER.shutdown()
245
+
108
246
 
109
247
  def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
110
248
  """Parse an incoming W3C `traceparent` header into an OTel `Context`.
@@ -119,21 +257,14 @@ def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
119
257
 
120
258
 
121
259
  # ──────────────────────── Tracing setup ────────────────────────────────────────
122
- def _configure_tracing(agent_name: str) -> None:
260
+ def _configure_tracing(agent_name: str, target: Optional[_OtlpTarget]) -> None:
123
261
  resource = Resource.create({"service.name": agent_name})
124
262
  provider = TracerProvider(resource=resource)
125
263
 
126
- disabled = os.environ.get("MERIDIAN_TRACING_DISABLED", "").lower() in ("1", "true", "yes")
127
- endpoint = (
128
- os.environ.get("MERIDIAN_OTLP_TRACES_ENDPOINT", "").strip()
129
- or os.environ.get("MERIDIAN_OTLP_ENDPOINT", "").strip()
130
- )
131
- if not disabled and endpoint:
132
- headers: dict[str, str] = {}
133
- auth = os.environ.get("MERIDIAN_OO_AUTH")
134
- if auth:
135
- headers["Authorization"] = f"Basic {auth}"
136
- exporter = OTLPSpanExporter(endpoint=endpoint, headers=headers)
264
+ if target is not None:
265
+ exporter = OTLPSpanExporter(
266
+ endpoint=target.traces_endpoint, headers=target.headers
267
+ )
137
268
  provider.add_span_processor(BatchSpanProcessor(exporter))
138
269
 
139
270
  # Set as the global provider. OTel's `set_tracer_provider` warns if
@@ -143,8 +274,32 @@ def _configure_tracing(agent_name: str) -> None:
143
274
  set_global_textmap(TraceContextTextMapPropagator())
144
275
 
145
276
 
277
+ def _configure_log_export(
278
+ agent_name: str, target: Optional[_OtlpTarget]
279
+ ) -> Optional[logging.Handler]:
280
+ """Build an OTLP-logs handler so every `log.*` record reaches OpenObserve,
281
+ correlated to the active span by trace_id/span_id — the Python counterpart
282
+ of the Rust daemon's `OpenTelemetryTracingBridge`.
283
+
284
+ Returns the handler (caller attaches it to root) or `None` when export is
285
+ disabled, in which case logs still go to the JSONL file + stdout/stderr.
286
+ """
287
+ global _LOGGER_PROVIDER
288
+
289
+ if target is None:
290
+ return None
291
+
292
+ resource = Resource.create({"service.name": agent_name})
293
+ provider = LoggerProvider(resource=resource)
294
+ exporter = OTLPLogExporter(endpoint=target.logs_endpoint, headers=target.headers)
295
+ provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
296
+ set_logger_provider(provider)
297
+ _LOGGER_PROVIDER = provider
298
+ return LoggingHandler(level=logging.NOTSET, logger_provider=provider)
299
+
300
+
146
301
  # ──────────────────────── Logging setup ────────────────────────────────────────
147
- def _configure_logging(agent_name: str) -> None:
302
+ def _configure_logging(agent_name: str, target: Optional[_OtlpTarget]) -> None:
148
303
  log_dir = Path(os.environ.get("MERIDIAN_LOG_DIR") or DEFAULT_LOG_DIR)
149
304
  log_dir.mkdir(parents=True, exist_ok=True)
150
305
  log_path = log_dir / f"{agent_name}.jsonl"
@@ -204,6 +359,22 @@ def _configure_logging(agent_name: str) -> None:
204
359
  root.addHandler(file_h)
205
360
  root.addHandler(stdout_h)
206
361
  root.addHandler(stderr_h)
362
+ # Ship every record to OpenObserve via OTLP/HTTP logs too, when export is
363
+ # configured. The OTel LoggingHandler reads the active span context, so each
364
+ # OO log row carries the trace_id/span_id that ties it to the classifier's
365
+ # span waterfall. No-op (None) when OTLP is disabled.
366
+ # The OTLP handler already carries service.name via the OTel Resource, so it
367
+ # needs no _ServiceFilter (that would duplicate the attribute on each record).
368
+ otlp_log_h = _configure_log_export(agent_name, target)
369
+ if otlp_log_h is not None:
370
+ # Do NOT feed the OTLP exporter's OWN transport logs back into OTLP
371
+ # export: on export failure httpx/urllib3/opentelemetry emit WARNING+
372
+ # records which this root handler would try to export → more failures (a
373
+ # log→export→log loop). Drop those from THIS handler only — they still
374
+ # reach the file/stderr handlers.
375
+ _otlp_excluded = ("httpx", "httpcore", "urllib3", "grpc", "opentelemetry")
376
+ otlp_log_h.addFilter(lambda r: not r.name.startswith(_otlp_excluded))
377
+ root.addHandler(otlp_log_h)
207
378
  root.setLevel(level)
208
379
 
209
380
  for noisy in _NOISY_LOGGERS:
@@ -25,15 +25,19 @@ Method tag in results: "mlx_direct".
25
25
  from __future__ import annotations
26
26
 
27
27
  import datetime as _dt
28
+ import gc
28
29
  import json
29
30
  import logging
30
31
  import os
31
32
  import sqlite3 as _sqlite3
32
33
  import sys
34
+ import threading
33
35
  import time
36
+ from contextlib import contextmanager
34
37
  from pathlib import Path
35
- from typing import Any, Literal, Optional
38
+ from typing import Any, Literal, Optional, Iterator
36
39
 
40
+ from opentelemetry import trace
37
41
  from opentelemetry.trace import StatusCode
38
42
  from pydantic import BaseModel, Field
39
43
 
@@ -234,42 +238,144 @@ _SYSTEM_PROMPT = (
234
238
 
235
239
 
236
240
  # ---------------------------------------------------------------------------
237
- # Model loading — cached for the process lifetime.
238
- # outlines.from_mlxlm wraps the already-loaded mlx model; subsequent calls
239
- # skip the expensive disk load.
241
+ # Model loading — loaded lazily on first use, evicted when idle.
242
+ #
243
+ # The MLX model holds ~7 GB of Metal unified memory while resident (measured;
244
+ # note `ps`/Activity Monitor RSS does NOT show it). Classification is bursty,
245
+ # so we keep the model only while it's being used: load on first inference,
246
+ # and evict after MLX_IDLE_EVICT_S of inactivity (server.py runs the evictor).
247
+ # `del + gc.collect() + mx.clear_cache()` reclaims the full 7 GB; cold reload
248
+ # is ~3 s. `_model_lock` + `_in_flight` guarantee the evictor never frees the
249
+ # model out from under an in-flight inference.
240
250
  # ---------------------------------------------------------------------------
241
251
 
242
252
  _model_cache: dict[str, Any] = {}
253
+ _model_lock = threading.Lock() # guards _model_cache mutation, _in_flight, _last_used, eviction
254
+ _in_flight = 0 # inferences currently using the model
255
+ _last_used = time.monotonic() # monotonic ts of the last finished inference
256
+
257
+ # Aggressive default (2 min): the model is present only during active bursts.
258
+ # Tune via env without a code change; 0 disables idle eviction entirely.
259
+ _IDLE_EVICT_S = float(os.environ.get("MLX_IDLE_EVICT_S", "120"))
243
260
 
244
261
 
245
262
  def _get_model() -> Any:
246
- """Return an outlines-wrapped model, loading from disk on the first call."""
263
+ """Return an outlines-wrapped model, loading from disk on the first call.
264
+
265
+ Cache-miss load is done under _model_lock (double-checked) so concurrent
266
+ callers can't double-load and the idle evictor can't race the load.
267
+ """
247
268
  model_id = _resolve_model_id()
248
- if model_id in _model_cache:
249
- return _model_cache[model_id]
269
+ cached = _model_cache.get(model_id)
270
+ if cached is not None:
271
+ return cached
272
+
273
+ with _model_lock:
274
+ cached = _model_cache.get(model_id) # re-check under lock
275
+ if cached is not None:
276
+ return cached
277
+ try:
278
+ import mlx_lm
279
+ import outlines
280
+ except ImportError as exc:
281
+ raise ImportError(
282
+ f"Required package not installed: {exc}. "
283
+ "Install with: pip install 'mlx-lm>=0.22' 'outlines[mlxlm]>=1.3'"
284
+ ) from exc
285
+
286
+ log.info(
287
+ "run_task_linker_mlx: loading %s (first call this process)", model_id
288
+ )
289
+ t0 = time.time()
290
+ mlx_model, tokenizer = mlx_lm.load(
291
+ model_id,
292
+ tokenizer_config={"trust_remote_code": True},
293
+ )
294
+ outlines_model = outlines.from_mlxlm(mlx_model, tokenizer)
295
+ log.info("run_task_linker_mlx: model loaded in %.1fs", time.time() - t0)
250
296
 
297
+ _model_cache[model_id] = outlines_model
298
+ return outlines_model
299
+
300
+
301
+ @contextmanager
302
+ def model_session() -> Iterator[Any]:
303
+ """Yield the loaded model, marking it in-flight so the idle evictor never
304
+ frees it mid-inference. Wrap every direct ``model(...)`` call in this.
305
+
306
+ Lock is held only briefly (to bump/clear the in-flight counter), never for
307
+ the duration of inference. NOTE: production serialises all MLX calls upstream
308
+ via the Rust llm_gate (1-permit semaphore), so inferences don't actually
309
+ overlap — this lock scope just avoids adding a second, redundant serialisation
310
+ point, NOT a claim that concurrent generation on the shared model is safe.
311
+ """
312
+ global _in_flight, _last_used
313
+ with _model_lock:
314
+ _in_flight += 1
251
315
  try:
252
- import mlx_lm
253
- import outlines
254
- except ImportError as exc:
255
- raise ImportError(
256
- f"Required package not installed: {exc}. "
257
- "Install with: pip install 'mlx-lm>=0.22' 'outlines[mlxlm]>=1.3'"
258
- ) from exc
259
-
260
- log.info(
261
- "run_task_linker_mlx: loading %s (first call this process)", model_id
262
- )
263
- t0 = time.time()
264
- mlx_model, tokenizer = mlx_lm.load(
265
- model_id,
266
- tokenizer_config={"trust_remote_code": True},
267
- )
268
- outlines_model = outlines.from_mlxlm(mlx_model, tokenizer)
269
- log.info("run_task_linker_mlx: model loaded in %.1fs", time.time() - t0)
316
+ yield _get_model()
317
+ finally:
318
+ with _model_lock:
319
+ _in_flight -= 1
320
+ _last_used = time.monotonic()
321
+
322
+
323
+ def maybe_evict_idle(idle_s: float | None = None) -> float | None:
324
+ """Evict the model if it's resident, nothing is in flight, and it's been
325
+ idle longer than ``idle_s`` (default MLX_IDLE_EVICT_S). Returns the GB freed,
326
+ or None if no eviction happened. Safe to call from a threadpool worker.
327
+
328
+ Uses a non-blocking lock acquire: if an inference/load is mutating state we
329
+ simply skip this tick and try again on the next one.
330
+ """
331
+ ttl = _IDLE_EVICT_S if idle_s is None else idle_s
332
+ if ttl <= 0:
333
+ return None
334
+ if not _model_lock.acquire(blocking=False):
335
+ return None
336
+ try:
337
+ if _in_flight > 0 or not _model_cache:
338
+ return None
339
+ if (time.monotonic() - _last_used) < ttl:
340
+ return None
341
+ try:
342
+ import mlx.core as mx
343
+ before = mx.get_active_memory()
344
+ except Exception: # noqa: BLE001 — mx should always import here
345
+ mx, before = None, 0
346
+ _model_cache.clear()
347
+ gc.collect()
348
+ freed = 0.0
349
+ if mx is not None:
350
+ mx.clear_cache()
351
+ freed = max(0.0, (before - mx.get_active_memory()) / 1e9)
352
+ log.info(
353
+ "run_task_linker_mlx: evicted idle model (idle ≥ %.0fs), freed ~%.1f GB",
354
+ ttl, freed,
355
+ )
356
+ return freed
357
+ finally:
358
+ _model_lock.release()
359
+
360
+
361
+ def model_resident() -> bool:
362
+ """True if the MLX model is currently loaded in memory."""
363
+ return bool(_model_cache)
270
364
 
271
- _model_cache[model_id] = outlines_model
272
- return outlines_model
365
+
366
+ def model_active_memory_gb() -> float | None:
367
+ """Live Metal active-memory footprint in GB, or None if MLX is unavailable.
368
+
369
+ Process-wide Metal active memory (≈ the model when resident — the model
370
+ dominates, though a transient load allocation can briefly inflate it), and
371
+ the only honest measure: `ps`/Activity Monitor can't see Metal unified
372
+ memory (they undercount by ~6.5 GB).
373
+ """
374
+ try:
375
+ import mlx.core as mx
376
+ return round(mx.get_active_memory() / 1e9, 2)
377
+ except Exception: # noqa: BLE001 — mx absent on non-MLX machines
378
+ return None
273
379
 
274
380
 
275
381
  # Apple Foundation Models has a 4096-token combined context window (input + output).
@@ -705,14 +811,14 @@ def _classify_one(
705
811
  from mlx_lm.sample_utils import make_sampler
706
812
  from outlines.inputs import Chat
707
813
 
708
- model = _get_model()
709
- raw = model(
710
- Chat(messages),
711
- output_type=SessionClassification,
712
- max_tokens=_MAX_TOKENS,
713
- sampler=make_sampler(temp=_TEMPERATURE),
714
- verbose=False,
715
- )
814
+ with model_session() as model:
815
+ raw = model(
816
+ Chat(messages),
817
+ output_type=SessionClassification,
818
+ max_tokens=_MAX_TOKENS,
819
+ sampler=make_sampler(temp=_TEMPERATURE),
820
+ verbose=False,
821
+ )
716
822
  except Exception as exc:
717
823
  elapsed = time.time() - t0
718
824
  outcome = "apple_fm_error" if _use_apple_fm else "mlx_error"
@@ -840,10 +946,76 @@ def _open_run_log(db_path: str) -> "tuple[Path, Any]":
840
946
  return log_path, log_path.open("w", encoding="utf-8")
841
947
 
842
948
 
949
+ # `method` values that mean the model produced a usable classification.
950
+ # Anything else is an error path the dashboard surfaces under errors-only. The
951
+ # real error `method` values emitted by `_error_result` are `mlx_parse_error`
952
+ # (schema validation / unknown task_key — those names are child-span `outcome`
953
+ # attributes, NOT methods) and `mlx_error` (inference failure / session-not-found).
954
+ _SUCCESS_METHODS = {"mlx_direct", "apple_fm"}
955
+
956
+
957
+ def _annotate_classification_span(result: dict[str, Any]) -> None:
958
+ """Promote the classification result onto the enclosing `classify_session`
959
+ span so each session is ONE self-describing row in OpenObserve — filterable
960
+ by session_id / session_type / task_key / is_error without joining the child
961
+ spans. Both the server and CLI entry points wrap the call in a
962
+ `classify_session` span, so annotating the current span here covers both.
963
+ """
964
+ span = trace.get_current_span()
965
+ if not span.is_recording():
966
+ return
967
+ method = str(result.get("method", ""))
968
+ task_key = result.get("task_key")
969
+ is_error = method not in _SUCCESS_METHODS
970
+ span.set_attribute("session_id", int(result.get("session_id", 0)))
971
+ span.set_attribute("task_key", task_key or "-")
972
+ span.set_attribute("has_task", task_key is not None)
973
+ span.set_attribute("session_type", str(result.get("session_type", "")))
974
+ span.set_attribute("category", str(result.get("category", "")))
975
+ span.set_attribute("confidence", float(result.get("confidence", 0.0)))
976
+ span.set_attribute(
977
+ "category_confidence", float(result.get("category_confidence", 0.0))
978
+ )
979
+ span.set_attribute("method", method)
980
+ span.set_attribute("elapsed_s", float(result.get("elapsed_s", 0.0)))
981
+ span.set_attribute("is_error", is_error)
982
+ if is_error:
983
+ span.set_status(StatusCode.ERROR, str(result.get("reasoning", method))[:300])
984
+
985
+
843
986
  def _classify_one_logged(
844
987
  session_id: int,
845
988
  con: _sqlite3.Connection,
846
989
  run_log: Any,
990
+ ) -> dict[str, Any]:
991
+ """Classify one session, marking the span is_error=true on ANY failure.
992
+
993
+ Wraps the inner worker so an UNHANDLED exception (a sqlite read error,
994
+ malformed window_titles JSON, …) still stamps is_error=true + ERROR status on
995
+ the enclosing classify_session span before propagating — otherwise the
996
+ dashboard's errors-only table (which filters is_error='true') silently misses
997
+ exactly the crashes an operator opens it to find. Handled failures already
998
+ return _error_result dicts that _annotate_classification_span marks.
999
+ """
1000
+ try:
1001
+ return _classify_one_logged_inner(session_id, con, run_log)
1002
+ except Exception as exc: # noqa: BLE001 — annotate, then re-raise unchanged
1003
+ span = trace.get_current_span()
1004
+ if span.is_recording():
1005
+ span.set_attribute("session_id", int(session_id))
1006
+ span.set_attribute("is_error", True)
1007
+ span.set_attribute("method", "mlx_error")
1008
+ span.set_status(StatusCode.ERROR, str(exc)[:300])
1009
+ log.exception(
1010
+ "run_task_linker_mlx: unhandled error classifying session %d", session_id
1011
+ )
1012
+ raise
1013
+
1014
+
1015
+ def _classify_one_logged_inner(
1016
+ session_id: int,
1017
+ con: _sqlite3.Connection,
1018
+ run_log: Any,
847
1019
  ) -> dict[str, Any]:
848
1020
  """Classify one session and append a full record to the run log."""
849
1021
  # Gather inputs before classification so we can log them even on error.
@@ -888,6 +1060,7 @@ def _classify_one_logged(
888
1060
  }
889
1061
  run_log.write(json.dumps(record, default=str) + "\n")
890
1062
  run_log.flush()
1063
+ _annotate_classification_span(result)
891
1064
  return result
892
1065
 
893
1066
 
@@ -950,15 +1123,12 @@ def main() -> None:
950
1123
  try:
951
1124
  results: list[dict[str, Any]] = []
952
1125
  for sid in session_ids:
953
- with tracer.start_as_current_span("classify_session") as cls_span:
954
- cls_span.set_attribute("session_id", sid)
1126
+ with tracer.start_as_current_span("classify_session"):
1127
+ # _classify_one_logged enriches this span with the full
1128
+ # result (session_id, task_key, session_type, is_error, …).
955
1129
  log.info("run_task_linker_mlx: classifying session %d", sid)
956
1130
  result = _classify_one_logged(sid, con, run_log_file)
957
1131
  results.append(result)
958
- cls_span.set_attribute("task_key", result["task_key"] or "-")
959
- cls_span.set_attribute("session_type", result["session_type"])
960
- cls_span.set_attribute("method", result["method"])
961
- cls_span.set_attribute("elapsed_s", result["elapsed_s"])
962
1132
  log.info(
963
1133
  "run_task_linker_mlx: session_id=%d task_key=%s "
964
1134
  "session_type=%s elapsed_s=%.2f",
@@ -41,20 +41,58 @@ _DB_PATH = Path(os.environ.get("MERIDIAN_DB", Path.home() / ".meridian/meridian.
41
41
  _app_state: dict[str, Any] = {}
42
42
 
43
43
 
44
+ async def _idle_evictor(mlx_module: Any) -> None:
45
+ """Background loop: evict the MLX model after it has been idle long enough.
46
+
47
+ Runs the (briefly blocking) eviction in a threadpool so it never stalls the
48
+ event loop, and never raises out — the evictor must outlive transient errors.
49
+ """
50
+ import asyncio
51
+ from fastapi.concurrency import run_in_threadpool
52
+
53
+ ttl = mlx_module._IDLE_EVICT_S
54
+ if ttl <= 0:
55
+ return
56
+ interval = max(15.0, ttl / 4.0) # check ~4× per idle window
57
+ while True:
58
+ await asyncio.sleep(interval)
59
+ try:
60
+ await run_in_threadpool(mlx_module.maybe_evict_idle)
61
+ except Exception as exc: # noqa: BLE001 — evictor must never die
62
+ log.warning("server: idle-evictor error: %s", exc)
63
+
64
+
44
65
  @asynccontextmanager
45
66
  async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
67
+ import asyncio
46
68
  import datetime
47
69
  import agents.run_task_linker_mlx as _mlx
48
70
  _app_state["mlx_module"] = _mlx
49
71
  _app_state["loaded_at"] = datetime.datetime.now(datetime.timezone.utc).isoformat()
50
72
  from agents.llm_selector import APPLE_INTELLIGENCE_ID
73
+ evictor: "asyncio.Task | None" = None
51
74
  if _mlx._resolve_model_id() == APPLE_INTELLIGENCE_ID:
52
- log.info("server: 8 GB machine — Apple Intelligence backend, no MLX model to pre-load")
75
+ log.info("server: Apple Intelligence backend no MLX model to load")
76
+ elif _mlx._IDLE_EVICT_S > 0:
77
+ # Lazy: the ~7 GB model loads on the first inference and is evicted after
78
+ # MLX_IDLE_EVICT_S of inactivity, so the server idles light (~0.4 GB)
79
+ # instead of pinning ~7 GB of Metal memory for the whole process life.
80
+ log.info(
81
+ "server: MLX model loads on first request; idle-evict after %.0fs",
82
+ _mlx._IDLE_EVICT_S,
83
+ )
84
+ evictor = asyncio.create_task(_idle_evictor(_mlx))
53
85
  else:
54
- log.info("server: loading MLX model at startup…")
55
- _mlx._get_model()
56
- log.info("server: MLX model ready")
57
- yield
86
+ # Eviction disabled don't spawn a no-op evictor task just to cancel it.
87
+ log.info("server: MLX model loads on first request; idle-eviction disabled (MLX_IDLE_EVICT_S=0)")
88
+ try:
89
+ yield
90
+ finally:
91
+ if evictor is not None:
92
+ import contextlib
93
+ evictor.cancel()
94
+ with contextlib.suppress(asyncio.CancelledError):
95
+ await evictor
58
96
 
59
97
 
60
98
  app = FastAPI(title="Meridian Agent", version="1.0.0", lifespan=_lifespan)
@@ -76,12 +114,19 @@ async def health() -> dict:
76
114
 
77
115
  @app.get("/info")
78
116
  async def info() -> dict:
79
- """Return the identity of the loaded model."""
117
+ """Return the identity of the model and its live memory state.
118
+
119
+ `active_memory_gb` reads `mx.get_active_memory()` — the ONLY honest measure
120
+ of the model's footprint, since Metal unified memory is invisible to `ps`
121
+ and Activity Monitor (they undercount the model by ~6.5 GB).
122
+ """
80
123
  m = _app_state.get("mlx_module")
81
124
  return {
82
- "backend": "mlx",
83
- "model_id": m._resolve_model_id() if m else None,
84
- "loaded_at": _app_state.get("loaded_at"),
125
+ "backend": "mlx",
126
+ "model_id": m._resolve_model_id() if m else None,
127
+ "loaded_at": _app_state.get("loaded_at"),
128
+ "model_resident": m.model_resident() if m else False,
129
+ "active_memory_gb": m.model_active_memory_gb() if m else None,
85
130
  }
86
131
 
87
132
 
@@ -143,14 +188,14 @@ async def classify(req: ClassifyRequest) -> ClassifyResponse:
143
188
  # _classify_apple_fm uses asyncio.new_event_loop() internally;
144
189
  # must run in a thread (no existing loop) not in the async handler.
145
190
  return m._classify_apple_fm(messages)
146
- model = m._get_model()
147
- raw = model(
148
- Chat(messages),
149
- output_type=m.SessionClassification,
150
- max_tokens=m._MAX_TOKENS,
151
- sampler=make_sampler(temp=m._TEMPERATURE),
152
- verbose=False,
153
- )
191
+ with m.model_session() as model:
192
+ raw = model(
193
+ Chat(messages),
194
+ output_type=m.SessionClassification,
195
+ max_tokens=m._MAX_TOKENS,
196
+ sampler=make_sampler(temp=m._TEMPERATURE),
197
+ verbose=False,
198
+ )
154
199
  return m.SessionClassification.model_validate_json(raw)
155
200
 
156
201
  try:
@@ -214,49 +259,43 @@ async def classify_sessions(req: ClassifySessionsRequest) -> dict:
214
259
  tracer = _app_state.get("tracer") or trace.get_tracer("meridian-agent-server-mlx")
215
260
  parent_ctx = observability.extract_parent_context(req.traceparent)
216
261
 
217
- with tracer.start_as_current_span("classify_sessions", context=parent_ctx) as span:
218
- span.set_attribute("session_count", len(req.session_ids))
219
-
220
- # Snapshot the OTel context while classify_sessions span is active so we
221
- # can attach it explicitly inside the threadpool (anyio copies contextvars,
222
- # but explicit attach is more reliable across anyio versions).
223
- ctx_snapshot = _otel_context.get_current()
224
-
225
- def _classify_all() -> list[dict]:
226
- # Attach classify_sessions context so _classify_one sub-spans
227
- # (db_fetch, build_prompt, llm_inference, parse_response) appear
228
- # as children of classify_sessions in the OO trace waterfall.
229
- _tok = _otel_context.attach(ctx_snapshot)
262
+ # No batch-wrapper span: each session emits a single `classify_session` span
263
+ # attached directly to the Rust caller's context (via the propagated
264
+ # traceparent). This keeps the debug trace minimal — one self-describing span
265
+ # per session with no redundant N=1 wrapper. For N>1, the sessions appear as
266
+ # sibling classify_session spans under the same daemon trace.
267
+ def _classify_all() -> list[dict]:
268
+ _tok = _otel_context.attach(parent_ctx) if parent_ctx is not None else None
269
+ try:
270
+ # Always use the server's own _DB_PATH — ignoring req.meridian_db avoids
271
+ # path-traversal: the server knows its DB from the environment.
272
+ con = _sqlite3.connect(str(_DB_PATH), check_same_thread=False)
273
+ con.row_factory = _sqlite3.Row
230
274
  try:
231
- # Always use the server's own _DB_PATH — ignoring req.meridian_db avoids
232
- # path-traversal: the server knows its DB from the environment.
233
- con = _sqlite3.connect(str(_DB_PATH), check_same_thread=False)
234
- con.row_factory = _sqlite3.Row
235
- try:
236
- results: list[dict] = []
237
- for sid in req.session_ids:
238
- with tracer.start_as_current_span(
239
- "classify_session",
240
- attributes={"session_id": sid},
241
- ):
242
- result = m._classify_one_logged(sid, con, fh)
243
- log.info(
244
- "classify_sessions: session_id=%d task_key=%s session_type=%s elapsed_s=%.2f",
245
- sid,
246
- result.get("task_key"),
247
- result.get("session_type"),
248
- result.get("elapsed_s", 0.0),
249
- )
250
- results.append(result)
251
- return results
252
- finally:
253
- con.close()
275
+ results: list[dict] = []
276
+ for sid in req.session_ids:
277
+ # _classify_one_logged owns this span's attributes (session_id,
278
+ # task_key, confidence, is_error, …) via _annotate_classification_span
279
+ # and emits db_fetch / build_prompt / llm_inference / parse_response
280
+ # as its children — one source of truth, matching the CLI path.
281
+ with tracer.start_as_current_span("classify_session"):
282
+ result = m._classify_one_logged(sid, con, fh)
283
+ log.info(
284
+ "classify_sessions: session_id=%d task_key=%s session_type=%s elapsed_s=%.2f",
285
+ sid,
286
+ result.get("task_key"),
287
+ result.get("session_type"),
288
+ result.get("elapsed_s", 0.0),
289
+ )
290
+ results.append(result)
291
+ return results
254
292
  finally:
293
+ con.close()
294
+ finally:
295
+ if _tok is not None:
255
296
  _otel_context.detach(_tok)
256
297
 
257
- results = await run_in_threadpool(_classify_all)
258
- span.set_attribute("classified_count", len(results))
259
-
298
+ results = await run_in_threadpool(_classify_all)
260
299
  return {"results": results}
261
300
 
262
301
 
@@ -370,18 +409,46 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
370
409
  temperature = req.temperature if req.temperature is not None else 0.3
371
410
  max_tokens = req.max_tokens if req.max_tokens else 2048
372
411
 
412
+ # Honour OpenAI `response_format: {"type":"json_schema", ...}` by
413
+ # FSM-constraining decoding to that schema via outlines. Without this, a
414
+ # reasoning model is free to emit chain-of-thought prose instead of the JSON
415
+ # the caller (e.g. agno's structured-output path) expects, and the parse
416
+ # fails. `{"type":"json_object"}` carries no schema, so it stays free-form.
417
+ output_type = None
418
+ rf = req.response_format
419
+ if isinstance(rf, dict) and rf.get("type") == "json_schema":
420
+ schema = (rf.get("json_schema") or {}).get("schema")
421
+ if schema:
422
+ from outlines.types import JsonSchema
423
+ output_type = JsonSchema(schema)
424
+
373
425
  from agents.llm_selector import APPLE_INTELLIGENCE_ID
374
426
 
427
+ # A `json_schema` request cannot be honoured on Apple Foundation Models:
428
+ # outlines FSM-constrained decoding is incompatible with FM, so the schema
429
+ # would be silently dropped and a structured-output caller (e.g. agno) would
430
+ # get free-form text that fails to parse downstream. Reject explicitly with a
431
+ # 4xx rather than emit unconstrained output that breaks later.
432
+ if output_type is not None and m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
433
+ raise HTTPException(
434
+ status_code=400,
435
+ detail="response_format=json_schema is not supported on Apple "
436
+ "Foundation Models (no FSM-constrained decoding available)",
437
+ )
438
+
375
439
  def _generate() -> str:
376
440
  if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
441
+ # outlines FSM decoding is incompatible with Foundation Models;
442
+ # Apple FM falls back to free-form (json_object / no schema only).
377
443
  return _infer_apple_fm(msgs, max_tokens)
378
- model = m._get_model()
379
- return model(
380
- Chat(msgs),
381
- max_tokens=max_tokens,
382
- sampler=make_sampler(temp=temperature),
383
- verbose=False,
384
- )
444
+ with m.model_session() as model:
445
+ return model(
446
+ Chat(msgs),
447
+ output_type=output_type,
448
+ max_tokens=max_tokens,
449
+ sampler=make_sampler(temp=temperature),
450
+ verbose=False,
451
+ )
385
452
 
386
453
  t0 = _time.time()
387
454
  try:
@@ -504,14 +571,14 @@ async def summarise(req: _SummariseRequest) -> _SummariseResponse:
504
571
  from outlines.inputs import Chat
505
572
 
506
573
  def _generate() -> str:
507
- model = m._get_model()
508
- return model(
509
- Chat(messages),
510
- output_type=_SummarySchema,
511
- max_tokens=req.max_tokens,
512
- sampler=make_sampler(temp=req.temperature),
513
- verbose=False,
514
- )
574
+ with m.model_session() as model:
575
+ return model(
576
+ Chat(messages),
577
+ output_type=_SummarySchema,
578
+ max_tokens=req.max_tokens,
579
+ sampler=make_sampler(temp=req.temperature),
580
+ verbose=False,
581
+ )
515
582
 
516
583
  try:
517
584
  raw = await run_in_threadpool(_generate)
@@ -9,6 +9,7 @@ from __future__ import annotations
9
9
  import json
10
10
  import sqlite3
11
11
  import sys
12
+ import time
12
13
  from io import StringIO
13
14
  from pathlib import Path
14
15
  from typing import Iterator
@@ -1011,6 +1012,58 @@ class TestModelCache:
1011
1012
  m._get_model()
1012
1013
 
1013
1014
 
1015
+ # ---------------------------------------------------------------------------
1016
+ # Idle eviction — model_session() in-flight tracking + maybe_evict_idle()
1017
+ # (the model holds ~7 GB while resident; the server unloads it when idle)
1018
+ # ---------------------------------------------------------------------------
1019
+
1020
+ class TestModelEviction:
1021
+ def test_model_session_loads_and_tracks_in_flight(self):
1022
+ import agents.run_task_linker_mlx as m
1023
+ sentinel = MagicMock(name="model")
1024
+ with patch.object(m, "_get_model", return_value=sentinel):
1025
+ m._in_flight = 0
1026
+ with m.model_session() as model:
1027
+ assert model is sentinel
1028
+ assert m._in_flight == 1 # marked in-flight while in use
1029
+ assert m._in_flight == 0 # released on exit
1030
+
1031
+ def test_evict_noop_when_not_idle_long_enough(self):
1032
+ import agents.run_task_linker_mlx as m
1033
+ m._model_cache["x"] = MagicMock()
1034
+ m._in_flight = 0
1035
+ m._last_used = time.monotonic() # just used
1036
+ assert m.maybe_evict_idle(idle_s=600) is None
1037
+ assert m.model_resident() is True
1038
+
1039
+ def test_evict_disabled_when_ttl_zero(self):
1040
+ import agents.run_task_linker_mlx as m
1041
+ m._model_cache["x"] = MagicMock()
1042
+ assert m.maybe_evict_idle(idle_s=0) is None
1043
+ assert m.model_resident() is True
1044
+
1045
+ def test_evict_noop_when_in_flight(self):
1046
+ import agents.run_task_linker_mlx as m
1047
+ m._model_cache["x"] = MagicMock()
1048
+ m._in_flight = 1 # an inference is using the model
1049
+ m._last_used = time.monotonic() - 1000
1050
+ try:
1051
+ assert m.maybe_evict_idle(idle_s=0.001) is None
1052
+ assert m.model_resident() is True # never freed mid-inference
1053
+ finally:
1054
+ m._in_flight = 0
1055
+
1056
+ def test_evict_clears_cache_when_idle(self):
1057
+ import agents.run_task_linker_mlx as m
1058
+ m._model_cache["x"] = MagicMock()
1059
+ m._in_flight = 0
1060
+ m._last_used = time.monotonic() - 1000 # idle long past the window
1061
+ freed = m.maybe_evict_idle(idle_s=0.001)
1062
+ assert freed is not None # eviction happened
1063
+ assert m.model_resident() is False
1064
+ assert m._model_cache == {}
1065
+
1066
+
1014
1067
  # ---------------------------------------------------------------------------
1015
1068
  # SessionClassification schema
1016
1069
  # ---------------------------------------------------------------------------
@@ -0,0 +1,174 @@
1
+ {
2
+ "title": "Session→Task Classifier — Debug",
3
+ "description": "Every session-task classification, newest first. Filter by session_id, session_type, or errors-only; copy a row's trace_id and open it in Traces for the full waterfall (db_fetch → build_prompt → llm_inference → parse_response, with raw_mlx_output). Backed by the enriched `classify_session` spans (service meridian-agent-server-mlx in-process, or meridian-task-linker-mlx from the standalone CLI). Drilldown keys on trace_id alone so it works for both.",
4
+ "version": 5,
5
+ "variables": {
6
+ "list": [
7
+ {
8
+ "type": "textbox",
9
+ "name": "session_id",
10
+ "label": "Session ID",
11
+ "query_data": null,
12
+ "value": "",
13
+ "options": [],
14
+ "multiSelect": false,
15
+ "hideOnDashboard": false,
16
+ "selectAllValueForMultiSelect": "custom",
17
+ "customMultiSelectValue": [],
18
+ "escapeSingleQuotes": true
19
+ },
20
+ {
21
+ "type": "custom",
22
+ "name": "session_type",
23
+ "label": "Session type",
24
+ "query_data": null,
25
+ "value": "",
26
+ "options": [
27
+ {"label": "All", "value": "", "selected": true},
28
+ {"label": "task", "value": "task", "selected": false},
29
+ {"label": "overhead", "value": "overhead", "selected": false},
30
+ {"label": "untracked", "value": "untracked", "selected": false}
31
+ ],
32
+ "multiSelect": false,
33
+ "hideOnDashboard": false,
34
+ "selectAllValueForMultiSelect": "custom",
35
+ "customMultiSelectValue": [],
36
+ "escapeSingleQuotes": true
37
+ },
38
+ {
39
+ "type": "custom",
40
+ "name": "errors_only",
41
+ "label": "Errors only",
42
+ "query_data": null,
43
+ "value": "",
44
+ "options": [
45
+ {"label": "All", "value": "", "selected": true},
46
+ {"label": "Errors only", "value": "true", "selected": false}
47
+ ],
48
+ "multiSelect": false,
49
+ "hideOnDashboard": false,
50
+ "selectAllValueForMultiSelect": "custom",
51
+ "customMultiSelectValue": [],
52
+ "escapeSingleQuotes": true
53
+ }
54
+ ],
55
+ "showDynamicFilters": true
56
+ },
57
+ "defaultDatetimeDuration": {"type": "relative", "relativeTimePeriod": "12h", "startTime": null, "endTime": null},
58
+ "tabs": [
59
+ {
60
+ "tabId": "default",
61
+ "name": "Default",
62
+ "panels": [
63
+ {
64
+ "id": "stat_total",
65
+ "type": "metric",
66
+ "title": "Classifications",
67
+ "description": "Total classify_session spans in range",
68
+ "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
69
+ "queryType": "sql",
70
+ "queries": [
71
+ {
72
+ "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session'",
73
+ "vrlFunctionQuery": "",
74
+ "customQuery": true,
75
+ "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Classifications", "alias": "y_axis_1", "column": "y_axis_1", "color": "#5960b2", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
76
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
77
+ }
78
+ ],
79
+ "layout": {"x": 0, "y": 0, "w": 12, "h": 6, "i": 1}
80
+ },
81
+ {
82
+ "id": "stat_errors",
83
+ "type": "metric",
84
+ "title": "Errors",
85
+ "description": "Classifications whose method failed (is_error=true)",
86
+ "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
87
+ "queryType": "sql",
88
+ "queries": [
89
+ {
90
+ "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true'",
91
+ "vrlFunctionQuery": "",
92
+ "customQuery": true,
93
+ "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Errors", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b25959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
94
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
95
+ }
96
+ ],
97
+ "layout": {"x": 12, "y": 0, "w": 12, "h": 6, "i": 2}
98
+ },
99
+ {
100
+ "id": "stat_untracked",
101
+ "type": "metric",
102
+ "title": "Untracked",
103
+ "description": "Sessions classified as untracked (no ticket)",
104
+ "config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
105
+ "queryType": "sql",
106
+ "queries": [
107
+ {
108
+ "query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND session_type='untracked'",
109
+ "vrlFunctionQuery": "",
110
+ "customQuery": true,
111
+ "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Untracked", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b29959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
112
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
113
+ }
114
+ ],
115
+ "layout": {"x": 24, "y": 0, "w": 12, "h": 6, "i": 3}
116
+ },
117
+ {
118
+ "id": "stat_conf",
119
+ "type": "metric",
120
+ "title": "Avg confidence",
121
+ "description": "Mean confidence of successful classifications",
122
+ "config": {"show_legends": false, "unit": null, "decimals": 2, "no_value_replacement": "0"},
123
+ "queryType": "sql",
124
+ "queries": [
125
+ {
126
+ "query": "SELECT round(avg(CAST(confidence AS DOUBLE)),2) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='false'",
127
+ "vrlFunctionQuery": "",
128
+ "customQuery": true,
129
+ "fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Avg confidence", "alias": "y_axis_1", "column": "y_axis_1", "color": "#59b27a", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
130
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
131
+ }
132
+ ],
133
+ "layout": {"x": 36, "y": 0, "w": 12, "h": 6, "i": 4}
134
+ },
135
+ {
136
+ "id": "table_all",
137
+ "type": "table",
138
+ "title": "All classifications (newest first)",
139
+ "description": "Filter with the Session ID / Session type / Errors only variables above. Click any row → opens the Traces view filtered to just that trace's spans.",
140
+ "config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
141
+ "queryType": "sql",
142
+ "queries": [
143
+ {
144
+ "query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", category as \"Category\", confidence as \"Confidence\", round(CAST(elapsed_s AS DOUBLE),2) as \"Time taken (s)\", method as \"Method\", is_error as \"Error\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND ('$session_id'='' OR session_id='$session_id') AND ('$session_type'='' OR session_type='$session_type') AND ('$errors_only'='' OR is_error='$errors_only') ORDER BY _timestamp DESC",
145
+ "vrlFunctionQuery": "",
146
+ "customQuery": true,
147
+ "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Category", "alias": "Category", "column": "category", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Confidence", "alias": "Confidence", "column": "confidence", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Time taken (s)", "alias": "Time taken (s)", "column": "elapsed_s", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Error", "alias": "Error", "column": "is_error", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
148
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
149
+ }
150
+ ],
151
+ "layout": {"x": 0, "y": 6, "w": 48, "h": 14, "i": 5}
152
+ },
153
+ {
154
+ "id": "table_errors",
155
+ "type": "table",
156
+ "title": "Errors only",
157
+ "description": "Failed classifications — inference errors, schema errors, invalid task_key, session-not-found. Click a row → opens just that trace's spans.",
158
+ "config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
159
+ "queryType": "sql",
160
+ "queries": [
161
+ {
162
+ "query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", method as \"Method\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true' ORDER BY _timestamp DESC",
163
+ "vrlFunctionQuery": "",
164
+ "customQuery": true,
165
+ "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
166
+ "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
167
+ }
168
+ ],
169
+ "layout": {"x": 0, "y": 20, "w": 48, "h": 10, "i": 6}
170
+ }
171
+ ]
172
+ }
173
+ ]
174
+ }
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "meridian-agents"
7
- version = "1.54.0"
7
+ version = "1.55.0"
8
8
  description = "Meridian agents — MLX classifier server and Jira worklog synthesis for meridian.db"
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "Meridiona" }]
package/ui.tar.gz CHANGED
Binary file