@meridiona/meridian-darwin-arm64 1.54.0 → 1.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +9 -0
- package/VERSION +1 -1
- package/bin/meridian +0 -0
- package/package.json +1 -1
- package/scripts/install-openobserve-daemon.sh +7 -3
- package/services/agents/observability.py +188 -17
- package/services/agents/run_task_linker_mlx.py +211 -41
- package/services/agents/server.py +138 -71
- package/services/agents/tests/test_run_task_linker_mlx.py +53 -0
- package/services/observability/dashboards/classifier-debug.json +174 -0
- package/services/pyproject.toml +1 -1
- package/ui.tar.gz +0 -0
package/.env.example
CHANGED
|
@@ -19,6 +19,15 @@
|
|
|
19
19
|
# MLX_SERVER_HOST=127.0.0.1
|
|
20
20
|
# MLX_SERVER_PORT=7823
|
|
21
21
|
|
|
22
|
+
# Idle eviction for the MLX model. The model holds ~7 GB of Metal memory while
|
|
23
|
+
# resident, but classification is bursty — so the server unloads it after this
|
|
24
|
+
# many seconds idle and reloads on the next request (~3 s cold start). Default
|
|
25
|
+
# 120s (aggressive: lightest idle footprint). Raise it to keep the model warm
|
|
26
|
+
# longer; set 0 to disable eviction (pin the model in memory). Avoid values
|
|
27
|
+
# below ~30s: if the TTL drops under the gap between sessions in a classification
|
|
28
|
+
# burst, the model evicts and cold-reloads (~3 s) repeatedly mid-burst.
|
|
29
|
+
# MLX_IDLE_EVICT_S=120
|
|
30
|
+
|
|
22
31
|
# Dashboard (Next.js UI) port. Defaults to 3939. Change this and re-run
|
|
23
32
|
# `meridian setup` to move the dashboard.
|
|
24
33
|
# MERIDIAN_UI_PORT=3939
|
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.
|
|
1
|
+
1.55.0
|
package/bin/meridian
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@meridiona/meridian-darwin-arm64",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.55.0",
|
|
4
4
|
"description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
|
|
5
5
|
"homepage": "https://github.com/Meridiona/meridian",
|
|
6
6
|
"repository": {
|
|
@@ -41,15 +41,19 @@ elif command -v openobserve >/dev/null 2>&1; then
|
|
|
41
41
|
fi
|
|
42
42
|
|
|
43
43
|
if [[ -z "${OO_BIN}" ]]; then
|
|
44
|
-
echo "→ OpenObserve binary not found — downloading v0.
|
|
44
|
+
echo "→ OpenObserve binary not found — downloading v0.90.3..."
|
|
45
45
|
_oo_arch="$(uname -m)"
|
|
46
46
|
case "$_oo_arch" in
|
|
47
47
|
arm64) _oo_arch="arm64" ;;
|
|
48
48
|
x86_64) _oo_arch="amd64" ;;
|
|
49
49
|
*) echo "✗ Unsupported arch: $_oo_arch" >&2; exit 1 ;;
|
|
50
50
|
esac
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
# GitHub release assets were removed for recent versions; binaries now live on
|
|
52
|
+
# the official downloads host. Trace deep-linking (dashboard drilldown into a
|
|
53
|
+
# single trace's spans) needs a modern build, so we pin a current stable.
|
|
54
|
+
# KEEP IN SYNC: the same version is pinned in install.sh — bump both together.
|
|
55
|
+
_oo_ver="v0.90.3"
|
|
56
|
+
_oo_url="https://downloads.openobserve.ai/releases/openobserve/${_oo_ver}/openobserve-${_oo_ver}-darwin-${_oo_arch}.tar.gz"
|
|
53
57
|
mkdir -p "${HOME}/.openobserve"
|
|
54
58
|
if curl -fsSL -o "${HOME}/.openobserve/openobserve.tar.gz" "$_oo_url" \
|
|
55
59
|
&& tar -xzf "${HOME}/.openobserve/openobserve.tar.gz" -C "${HOME}/.openobserve" \
|
|
@@ -4,7 +4,9 @@ A single `setup(agent_name)` call wires up:
|
|
|
4
4
|
|
|
5
5
|
* an OTel `TracerProvider` with `service.name=agent_name`
|
|
6
6
|
* a `BatchSpanProcessor` exporting OTLP/HTTP-protobuf spans to OpenObserve
|
|
7
|
-
|
|
7
|
+
* a `LoggerProvider` + OTLP-logs handler so every `logging.LogRecord` is also
|
|
8
|
+
shipped to OpenObserve (correlated to the active span), mirroring the Rust
|
|
9
|
+
daemon's `OpenTelemetryTracingBridge`
|
|
8
10
|
* W3C `TraceContextTextMapPropagator` as the global propagator so each
|
|
9
11
|
agent can pick up the Rust daemon's `traceparent` and continue the trace
|
|
10
12
|
* `LoggingInstrumentor` so every `logging.LogRecord` carries
|
|
@@ -13,6 +15,12 @@ A single `setup(agent_name)` call wires up:
|
|
|
13
15
|
under `~/.meridian/logs/{agent_name}.jsonl` plus stderr — both ingestable
|
|
14
16
|
by OpenObserve's log pipeline without further parsing.
|
|
15
17
|
|
|
18
|
+
Export config (endpoint + Basic-auth credentials) is resolved from the SAME
|
|
19
|
+
`~/.meridian/settings.json` the Rust daemon reads — `otlp_enabled`,
|
|
20
|
+
`otlp_endpoint`, `oo_email`, `oo_password` — so the dashboard Settings page is
|
|
21
|
+
the single source of truth for both processes. The legacy `MERIDIAN_OO_AUTH`
|
|
22
|
+
env credential is deprecated and ignored, matching the daemon.
|
|
23
|
+
|
|
16
24
|
`extract_parent_context(traceparent)` is the helper agents use to continue
|
|
17
25
|
a span emitted by another process — typically the Rust ETL or another
|
|
18
26
|
agent stage.
|
|
@@ -23,20 +31,26 @@ single-shot CLI paths funnel through the same module.
|
|
|
23
31
|
"""
|
|
24
32
|
from __future__ import annotations
|
|
25
33
|
|
|
34
|
+
import base64
|
|
35
|
+
import json
|
|
26
36
|
import logging
|
|
27
37
|
import logging.handlers
|
|
28
38
|
import os
|
|
29
39
|
import sys
|
|
30
40
|
from pathlib import Path
|
|
31
|
-
from typing import Optional
|
|
41
|
+
from typing import NamedTuple, Optional
|
|
32
42
|
|
|
33
43
|
from opentelemetry import trace
|
|
44
|
+
from opentelemetry._logs import set_logger_provider
|
|
34
45
|
from opentelemetry.context import Context
|
|
46
|
+
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
|
|
35
47
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
|
36
48
|
OTLPSpanExporter,
|
|
37
49
|
)
|
|
38
50
|
from opentelemetry.instrumentation.logging import LoggingInstrumentor
|
|
39
51
|
from opentelemetry.propagate import set_global_textmap
|
|
52
|
+
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
|
|
53
|
+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
|
|
40
54
|
from opentelemetry.sdk.resources import Resource
|
|
41
55
|
from opentelemetry.sdk.trace import TracerProvider
|
|
42
56
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
@@ -50,12 +64,125 @@ from pythonjsonlogger import jsonlogger
|
|
|
50
64
|
DEFAULT_TRACES_ENDPOINT = "http://localhost:5080/api/default/v1/traces"
|
|
51
65
|
DEFAULT_LOGS_ENDPOINT = "http://localhost:5080/api/default/v1/logs"
|
|
52
66
|
DEFAULT_LOG_DIR = Path.home() / ".meridian" / "logs"
|
|
67
|
+
# Single source of truth for OpenObserve export config — the SAME file the Rust
|
|
68
|
+
# daemon reads (see `src/observability.rs::resolve_otlp_target`). Keeps the two
|
|
69
|
+
# processes credential-aligned: the dashboard Settings page writes here and both
|
|
70
|
+
# the daemon and this MLX server pick it up with no env plumbing.
|
|
71
|
+
_SETTINGS_PATH = Path(
|
|
72
|
+
os.environ.get("MERIDIAN_SETTINGS_PATH")
|
|
73
|
+
or (Path.home() / ".meridian" / "settings.json")
|
|
74
|
+
)
|
|
53
75
|
|
|
54
76
|
_NOISY_LOGGERS = ("urllib3", "httpx", "httpcore", "openai", "botocore")
|
|
55
77
|
|
|
56
78
|
# Track which agents have been configured so a second setup() call is a no-op.
|
|
57
79
|
_INITIALISED: dict[str, trace.Tracer] = {}
|
|
58
80
|
_PROCESS_SERVICE_NAME: str | None = None
|
|
81
|
+
# Held so shutdown() can flush log records the same way it flushes spans.
|
|
82
|
+
_LOGGER_PROVIDER: LoggerProvider | None = None
|
|
83
|
+
# One-time guard so an export misconfiguration (enabled-but-no-creds, or a
|
|
84
|
+
# schemeless endpoint) warns once per process instead of on every resolve.
|
|
85
|
+
_WARNED_EXPORT_MISCONFIG: bool = False
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ──────────────────────── OTLP target resolution ───────────────────────────────
|
|
89
|
+
class _OtlpTarget(NamedTuple):
|
|
90
|
+
"""Resolved OTLP export target: signal endpoints + Basic-auth header value."""
|
|
91
|
+
|
|
92
|
+
traces_endpoint: str
|
|
93
|
+
logs_endpoint: str
|
|
94
|
+
headers: dict[str, str]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _load_settings() -> dict[str, object]:
|
|
98
|
+
"""Read `~/.meridian/settings.json`; empty dict if absent/unreadable."""
|
|
99
|
+
try:
|
|
100
|
+
with _SETTINGS_PATH.open(encoding="utf-8") as fh:
|
|
101
|
+
data = json.load(fh)
|
|
102
|
+
return data if isinstance(data, dict) else {}
|
|
103
|
+
except (OSError, ValueError):
|
|
104
|
+
return {}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _resolve_otlp_target() -> Optional[_OtlpTarget]:
|
|
108
|
+
"""Mirror of the Rust daemon's `resolve_otlp_target()`.
|
|
109
|
+
|
|
110
|
+
Returns `None` (→ export disabled) when the toggle is off or credentials
|
|
111
|
+
are missing. Endpoint precedence: settings.json `otlp_endpoint` → the
|
|
112
|
+
`MERIDIAN_OTLP_TRACES_ENDPOINT`/`MERIDIAN_OTLP_ENDPOINT` env override →
|
|
113
|
+
the localhost default. Auth is `base64(oo_email:oo_password)` — settings.json
|
|
114
|
+
only; the legacy `MERIDIAN_OO_AUTH` env path is deprecated and ignored, the
|
|
115
|
+
same decision the daemon made.
|
|
116
|
+
"""
|
|
117
|
+
global _WARNED_EXPORT_MISCONFIG
|
|
118
|
+
|
|
119
|
+
if os.environ.get("MERIDIAN_TRACING_DISABLED", "").lower() in ("1", "true", "yes"):
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
settings = _load_settings()
|
|
123
|
+
if not settings.get("otlp_enabled"):
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
# Resolve the endpoint up front so we can warn (not silently disable) when
|
|
127
|
+
# export is enabled but unusable. Precedence: settings → env → localhost.
|
|
128
|
+
configured = str(settings.get("otlp_endpoint") or "").strip()
|
|
129
|
+
env_endpoint = (
|
|
130
|
+
os.environ.get("MERIDIAN_OTLP_TRACES_ENDPOINT", "").strip()
|
|
131
|
+
or os.environ.get("MERIDIAN_OTLP_ENDPOINT", "").strip()
|
|
132
|
+
)
|
|
133
|
+
traces_endpoint = configured or env_endpoint or DEFAULT_TRACES_ENDPOINT
|
|
134
|
+
|
|
135
|
+
def _warn_once(msg: str, *args: object) -> None:
|
|
136
|
+
global _WARNED_EXPORT_MISCONFIG
|
|
137
|
+
if not _WARNED_EXPORT_MISCONFIG:
|
|
138
|
+
_WARNED_EXPORT_MISCONFIG = True
|
|
139
|
+
logging.getLogger(__name__).warning(msg, *args)
|
|
140
|
+
|
|
141
|
+
email = str(settings.get("oo_email") or "")
|
|
142
|
+
password = str(settings.get("oo_password") or "")
|
|
143
|
+
if not email or not password:
|
|
144
|
+
# otlp_enabled but no usable credentials → export OFF. Warn once so an
|
|
145
|
+
# env-only (MERIDIAN_OO_AUTH) install that predates the settings.json
|
|
146
|
+
# credential move doesn't go dark silently — mirrors the daemon, which
|
|
147
|
+
# also warns. MERIDIAN_OO_AUTH is no longer read here.
|
|
148
|
+
_warn_once(
|
|
149
|
+
"OpenObserve export enabled but oo_email/oo_password missing in %s — "
|
|
150
|
+
"traces+logs export DISABLED. Set credentials in the dashboard Settings "
|
|
151
|
+
"(the MERIDIAN_OO_AUTH env var is no longer used).",
|
|
152
|
+
_SETTINGS_PATH,
|
|
153
|
+
)
|
|
154
|
+
return None
|
|
155
|
+
# Guard against HTTP header injection / malformed user:password splits —
|
|
156
|
+
# matches the daemon's same-named check.
|
|
157
|
+
if any(c in email for c in "\r\n:") or any(c in password for c in "\r\n"):
|
|
158
|
+
return None
|
|
159
|
+
auth = base64.standard_b64encode(f"{email}:{password}".encode()).decode()
|
|
160
|
+
|
|
161
|
+
# Validate scheme — only http/https are valid OTLP transports. The daemon
|
|
162
|
+
# disables export + warns on a schemeless endpoint; mirror that exactly so the
|
|
163
|
+
# two processes don't disagree on whether export is on.
|
|
164
|
+
if not (traces_endpoint.startswith("http://") or traces_endpoint.startswith("https://")):
|
|
165
|
+
_warn_once(
|
|
166
|
+
"OTLP endpoint %r has no http/https scheme — export DISABLED.",
|
|
167
|
+
traces_endpoint,
|
|
168
|
+
)
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
# OpenObserve serves logs at the sibling `…/v1/logs` path. Derive it from the
|
|
172
|
+
# traces endpoint by swapping the trailing signal segment so a custom host or
|
|
173
|
+
# base (incl. a trailing slash, e.g. `…/v1/traces/`) carries to BOTH signals —
|
|
174
|
+
# never silently fall back to localhost for logs while traces go remote.
|
|
175
|
+
t = traces_endpoint.rstrip("/")
|
|
176
|
+
if t.endswith("/v1/traces"):
|
|
177
|
+
logs_endpoint = t[: -len("/v1/traces")] + "/v1/logs"
|
|
178
|
+
elif t.endswith("/traces"):
|
|
179
|
+
logs_endpoint = t[: -len("/traces")] + "/logs"
|
|
180
|
+
elif "traces" in t:
|
|
181
|
+
logs_endpoint = t.rsplit("traces", 1)[0] + "logs"
|
|
182
|
+
else:
|
|
183
|
+
logs_endpoint = t + "/v1/logs"
|
|
184
|
+
|
|
185
|
+
return _OtlpTarget(traces_endpoint, logs_endpoint, {"Authorization": f"Basic {auth}"})
|
|
59
186
|
|
|
60
187
|
|
|
61
188
|
# ──────────────────────── Public API ───────────────────────────────────────────
|
|
@@ -80,8 +207,13 @@ def setup(agent_name: str) -> trace.Tracer:
|
|
|
80
207
|
|
|
81
208
|
if _PROCESS_SERVICE_NAME is None:
|
|
82
209
|
_PROCESS_SERVICE_NAME = agent_name
|
|
83
|
-
|
|
84
|
-
|
|
210
|
+
# Resolve the export target ONCE and pass it to both configurers — a
|
|
211
|
+
# second read could see a settings.json the dashboard rewrote mid-setup
|
|
212
|
+
# (TOCTOU), leaving traces enabled while logs resolve disabled (or with
|
|
213
|
+
# different creds/endpoint) in the same process.
|
|
214
|
+
target = _resolve_otlp_target()
|
|
215
|
+
_configure_tracing(agent_name, target)
|
|
216
|
+
_configure_logging(agent_name, target)
|
|
85
217
|
logging.getLogger(agent_name).info(
|
|
86
218
|
"observability initialised",
|
|
87
219
|
extra={"service.name": agent_name},
|
|
@@ -105,6 +237,12 @@ def shutdown() -> None:
|
|
|
105
237
|
if hasattr(provider, "shutdown"):
|
|
106
238
|
provider.shutdown()
|
|
107
239
|
|
|
240
|
+
# Flush queued log records too — BatchLogRecordProcessor drops them on
|
|
241
|
+
# interpreter exit otherwise, the same hazard as spans.
|
|
242
|
+
if _LOGGER_PROVIDER is not None:
|
|
243
|
+
_LOGGER_PROVIDER.force_flush(timeout_millis=5_000)
|
|
244
|
+
_LOGGER_PROVIDER.shutdown()
|
|
245
|
+
|
|
108
246
|
|
|
109
247
|
def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
|
|
110
248
|
"""Parse an incoming W3C `traceparent` header into an OTel `Context`.
|
|
@@ -119,21 +257,14 @@ def extract_parent_context(traceparent: Optional[str]) -> Optional[Context]:
|
|
|
119
257
|
|
|
120
258
|
|
|
121
259
|
# ──────────────────────── Tracing setup ────────────────────────────────────────
|
|
122
|
-
def _configure_tracing(agent_name: str) -> None:
|
|
260
|
+
def _configure_tracing(agent_name: str, target: Optional[_OtlpTarget]) -> None:
|
|
123
261
|
resource = Resource.create({"service.name": agent_name})
|
|
124
262
|
provider = TracerProvider(resource=resource)
|
|
125
263
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
)
|
|
131
|
-
if not disabled and endpoint:
|
|
132
|
-
headers: dict[str, str] = {}
|
|
133
|
-
auth = os.environ.get("MERIDIAN_OO_AUTH")
|
|
134
|
-
if auth:
|
|
135
|
-
headers["Authorization"] = f"Basic {auth}"
|
|
136
|
-
exporter = OTLPSpanExporter(endpoint=endpoint, headers=headers)
|
|
264
|
+
if target is not None:
|
|
265
|
+
exporter = OTLPSpanExporter(
|
|
266
|
+
endpoint=target.traces_endpoint, headers=target.headers
|
|
267
|
+
)
|
|
137
268
|
provider.add_span_processor(BatchSpanProcessor(exporter))
|
|
138
269
|
|
|
139
270
|
# Set as the global provider. OTel's `set_tracer_provider` warns if
|
|
@@ -143,8 +274,32 @@ def _configure_tracing(agent_name: str) -> None:
|
|
|
143
274
|
set_global_textmap(TraceContextTextMapPropagator())
|
|
144
275
|
|
|
145
276
|
|
|
277
|
+
def _configure_log_export(
|
|
278
|
+
agent_name: str, target: Optional[_OtlpTarget]
|
|
279
|
+
) -> Optional[logging.Handler]:
|
|
280
|
+
"""Build an OTLP-logs handler so every `log.*` record reaches OpenObserve,
|
|
281
|
+
correlated to the active span by trace_id/span_id — the Python counterpart
|
|
282
|
+
of the Rust daemon's `OpenTelemetryTracingBridge`.
|
|
283
|
+
|
|
284
|
+
Returns the handler (caller attaches it to root) or `None` when export is
|
|
285
|
+
disabled, in which case logs still go to the JSONL file + stdout/stderr.
|
|
286
|
+
"""
|
|
287
|
+
global _LOGGER_PROVIDER
|
|
288
|
+
|
|
289
|
+
if target is None:
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
resource = Resource.create({"service.name": agent_name})
|
|
293
|
+
provider = LoggerProvider(resource=resource)
|
|
294
|
+
exporter = OTLPLogExporter(endpoint=target.logs_endpoint, headers=target.headers)
|
|
295
|
+
provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
|
|
296
|
+
set_logger_provider(provider)
|
|
297
|
+
_LOGGER_PROVIDER = provider
|
|
298
|
+
return LoggingHandler(level=logging.NOTSET, logger_provider=provider)
|
|
299
|
+
|
|
300
|
+
|
|
146
301
|
# ──────────────────────── Logging setup ────────────────────────────────────────
|
|
147
|
-
def _configure_logging(agent_name: str) -> None:
|
|
302
|
+
def _configure_logging(agent_name: str, target: Optional[_OtlpTarget]) -> None:
|
|
148
303
|
log_dir = Path(os.environ.get("MERIDIAN_LOG_DIR") or DEFAULT_LOG_DIR)
|
|
149
304
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
150
305
|
log_path = log_dir / f"{agent_name}.jsonl"
|
|
@@ -204,6 +359,22 @@ def _configure_logging(agent_name: str) -> None:
|
|
|
204
359
|
root.addHandler(file_h)
|
|
205
360
|
root.addHandler(stdout_h)
|
|
206
361
|
root.addHandler(stderr_h)
|
|
362
|
+
# Ship every record to OpenObserve via OTLP/HTTP logs too, when export is
|
|
363
|
+
# configured. The OTel LoggingHandler reads the active span context, so each
|
|
364
|
+
# OO log row carries the trace_id/span_id that ties it to the classifier's
|
|
365
|
+
# span waterfall. No-op (None) when OTLP is disabled.
|
|
366
|
+
# The OTLP handler already carries service.name via the OTel Resource, so it
|
|
367
|
+
# needs no _ServiceFilter (that would duplicate the attribute on each record).
|
|
368
|
+
otlp_log_h = _configure_log_export(agent_name, target)
|
|
369
|
+
if otlp_log_h is not None:
|
|
370
|
+
# Do NOT feed the OTLP exporter's OWN transport logs back into OTLP
|
|
371
|
+
# export: on export failure httpx/urllib3/opentelemetry emit WARNING+
|
|
372
|
+
# records which this root handler would try to export → more failures (a
|
|
373
|
+
# log→export→log loop). Drop those from THIS handler only — they still
|
|
374
|
+
# reach the file/stderr handlers.
|
|
375
|
+
_otlp_excluded = ("httpx", "httpcore", "urllib3", "grpc", "opentelemetry")
|
|
376
|
+
otlp_log_h.addFilter(lambda r: not r.name.startswith(_otlp_excluded))
|
|
377
|
+
root.addHandler(otlp_log_h)
|
|
207
378
|
root.setLevel(level)
|
|
208
379
|
|
|
209
380
|
for noisy in _NOISY_LOGGERS:
|
|
@@ -25,15 +25,19 @@ Method tag in results: "mlx_direct".
|
|
|
25
25
|
from __future__ import annotations
|
|
26
26
|
|
|
27
27
|
import datetime as _dt
|
|
28
|
+
import gc
|
|
28
29
|
import json
|
|
29
30
|
import logging
|
|
30
31
|
import os
|
|
31
32
|
import sqlite3 as _sqlite3
|
|
32
33
|
import sys
|
|
34
|
+
import threading
|
|
33
35
|
import time
|
|
36
|
+
from contextlib import contextmanager
|
|
34
37
|
from pathlib import Path
|
|
35
|
-
from typing import Any, Literal, Optional
|
|
38
|
+
from typing import Any, Literal, Optional, Iterator
|
|
36
39
|
|
|
40
|
+
from opentelemetry import trace
|
|
37
41
|
from opentelemetry.trace import StatusCode
|
|
38
42
|
from pydantic import BaseModel, Field
|
|
39
43
|
|
|
@@ -234,42 +238,144 @@ _SYSTEM_PROMPT = (
|
|
|
234
238
|
|
|
235
239
|
|
|
236
240
|
# ---------------------------------------------------------------------------
|
|
237
|
-
# Model loading —
|
|
238
|
-
#
|
|
239
|
-
#
|
|
241
|
+
# Model loading — loaded lazily on first use, evicted when idle.
|
|
242
|
+
#
|
|
243
|
+
# The MLX model holds ~7 GB of Metal unified memory while resident (measured;
|
|
244
|
+
# note `ps`/Activity Monitor RSS does NOT show it). Classification is bursty,
|
|
245
|
+
# so we keep the model only while it's being used: load on first inference,
|
|
246
|
+
# and evict after MLX_IDLE_EVICT_S of inactivity (server.py runs the evictor).
|
|
247
|
+
# `del + gc.collect() + mx.clear_cache()` reclaims the full 7 GB; cold reload
|
|
248
|
+
# is ~3 s. `_model_lock` + `_in_flight` guarantee the evictor never frees the
|
|
249
|
+
# model out from under an in-flight inference.
|
|
240
250
|
# ---------------------------------------------------------------------------
|
|
241
251
|
|
|
242
252
|
_model_cache: dict[str, Any] = {}
|
|
253
|
+
_model_lock = threading.Lock() # guards _model_cache mutation, _in_flight, _last_used, eviction
|
|
254
|
+
_in_flight = 0 # inferences currently using the model
|
|
255
|
+
_last_used = time.monotonic() # monotonic ts of the last finished inference
|
|
256
|
+
|
|
257
|
+
# Aggressive default (2 min): the model is present only during active bursts.
|
|
258
|
+
# Tune via env without a code change; 0 disables idle eviction entirely.
|
|
259
|
+
_IDLE_EVICT_S = float(os.environ.get("MLX_IDLE_EVICT_S", "120"))
|
|
243
260
|
|
|
244
261
|
|
|
245
262
|
def _get_model() -> Any:
|
|
246
|
-
"""Return an outlines-wrapped model, loading from disk on the first call.
|
|
263
|
+
"""Return an outlines-wrapped model, loading from disk on the first call.
|
|
264
|
+
|
|
265
|
+
Cache-miss load is done under _model_lock (double-checked) so concurrent
|
|
266
|
+
callers can't double-load and the idle evictor can't race the load.
|
|
267
|
+
"""
|
|
247
268
|
model_id = _resolve_model_id()
|
|
248
|
-
|
|
249
|
-
|
|
269
|
+
cached = _model_cache.get(model_id)
|
|
270
|
+
if cached is not None:
|
|
271
|
+
return cached
|
|
272
|
+
|
|
273
|
+
with _model_lock:
|
|
274
|
+
cached = _model_cache.get(model_id) # re-check under lock
|
|
275
|
+
if cached is not None:
|
|
276
|
+
return cached
|
|
277
|
+
try:
|
|
278
|
+
import mlx_lm
|
|
279
|
+
import outlines
|
|
280
|
+
except ImportError as exc:
|
|
281
|
+
raise ImportError(
|
|
282
|
+
f"Required package not installed: {exc}. "
|
|
283
|
+
"Install with: pip install 'mlx-lm>=0.22' 'outlines[mlxlm]>=1.3'"
|
|
284
|
+
) from exc
|
|
285
|
+
|
|
286
|
+
log.info(
|
|
287
|
+
"run_task_linker_mlx: loading %s (first call this process)", model_id
|
|
288
|
+
)
|
|
289
|
+
t0 = time.time()
|
|
290
|
+
mlx_model, tokenizer = mlx_lm.load(
|
|
291
|
+
model_id,
|
|
292
|
+
tokenizer_config={"trust_remote_code": True},
|
|
293
|
+
)
|
|
294
|
+
outlines_model = outlines.from_mlxlm(mlx_model, tokenizer)
|
|
295
|
+
log.info("run_task_linker_mlx: model loaded in %.1fs", time.time() - t0)
|
|
250
296
|
|
|
297
|
+
_model_cache[model_id] = outlines_model
|
|
298
|
+
return outlines_model
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@contextmanager
|
|
302
|
+
def model_session() -> Iterator[Any]:
|
|
303
|
+
"""Yield the loaded model, marking it in-flight so the idle evictor never
|
|
304
|
+
frees it mid-inference. Wrap every direct ``model(...)`` call in this.
|
|
305
|
+
|
|
306
|
+
Lock is held only briefly (to bump/clear the in-flight counter), never for
|
|
307
|
+
the duration of inference. NOTE: production serialises all MLX calls upstream
|
|
308
|
+
via the Rust llm_gate (1-permit semaphore), so inferences don't actually
|
|
309
|
+
overlap — this lock scope just avoids adding a second, redundant serialisation
|
|
310
|
+
point, NOT a claim that concurrent generation on the shared model is safe.
|
|
311
|
+
"""
|
|
312
|
+
global _in_flight, _last_used
|
|
313
|
+
with _model_lock:
|
|
314
|
+
_in_flight += 1
|
|
251
315
|
try:
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
316
|
+
yield _get_model()
|
|
317
|
+
finally:
|
|
318
|
+
with _model_lock:
|
|
319
|
+
_in_flight -= 1
|
|
320
|
+
_last_used = time.monotonic()
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def maybe_evict_idle(idle_s: float | None = None) -> float | None:
|
|
324
|
+
"""Evict the model if it's resident, nothing is in flight, and it's been
|
|
325
|
+
idle longer than ``idle_s`` (default MLX_IDLE_EVICT_S). Returns the GB freed,
|
|
326
|
+
or None if no eviction happened. Safe to call from a threadpool worker.
|
|
327
|
+
|
|
328
|
+
Uses a non-blocking lock acquire: if an inference/load is mutating state we
|
|
329
|
+
simply skip this tick and try again on the next one.
|
|
330
|
+
"""
|
|
331
|
+
ttl = _IDLE_EVICT_S if idle_s is None else idle_s
|
|
332
|
+
if ttl <= 0:
|
|
333
|
+
return None
|
|
334
|
+
if not _model_lock.acquire(blocking=False):
|
|
335
|
+
return None
|
|
336
|
+
try:
|
|
337
|
+
if _in_flight > 0 or not _model_cache:
|
|
338
|
+
return None
|
|
339
|
+
if (time.monotonic() - _last_used) < ttl:
|
|
340
|
+
return None
|
|
341
|
+
try:
|
|
342
|
+
import mlx.core as mx
|
|
343
|
+
before = mx.get_active_memory()
|
|
344
|
+
except Exception: # noqa: BLE001 — mx should always import here
|
|
345
|
+
mx, before = None, 0
|
|
346
|
+
_model_cache.clear()
|
|
347
|
+
gc.collect()
|
|
348
|
+
freed = 0.0
|
|
349
|
+
if mx is not None:
|
|
350
|
+
mx.clear_cache()
|
|
351
|
+
freed = max(0.0, (before - mx.get_active_memory()) / 1e9)
|
|
352
|
+
log.info(
|
|
353
|
+
"run_task_linker_mlx: evicted idle model (idle ≥ %.0fs), freed ~%.1f GB",
|
|
354
|
+
ttl, freed,
|
|
355
|
+
)
|
|
356
|
+
return freed
|
|
357
|
+
finally:
|
|
358
|
+
_model_lock.release()
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def model_resident() -> bool:
|
|
362
|
+
"""True if the MLX model is currently loaded in memory."""
|
|
363
|
+
return bool(_model_cache)
|
|
270
364
|
|
|
271
|
-
|
|
272
|
-
|
|
365
|
+
|
|
366
|
+
def model_active_memory_gb() -> float | None:
|
|
367
|
+
"""Live Metal active-memory footprint in GB, or None if MLX is unavailable.
|
|
368
|
+
|
|
369
|
+
Process-wide Metal active memory (≈ the model when resident — the model
|
|
370
|
+
dominates, though a transient load allocation can briefly inflate it), and
|
|
371
|
+
the only honest measure: `ps`/Activity Monitor can't see Metal unified
|
|
372
|
+
memory (they undercount by ~6.5 GB).
|
|
373
|
+
"""
|
|
374
|
+
try:
|
|
375
|
+
import mlx.core as mx
|
|
376
|
+
return round(mx.get_active_memory() / 1e9, 2)
|
|
377
|
+
except Exception: # noqa: BLE001 — mx absent on non-MLX machines
|
|
378
|
+
return None
|
|
273
379
|
|
|
274
380
|
|
|
275
381
|
# Apple Foundation Models has a 4096-token combined context window (input + output).
|
|
@@ -705,14 +811,14 @@ def _classify_one(
|
|
|
705
811
|
from mlx_lm.sample_utils import make_sampler
|
|
706
812
|
from outlines.inputs import Chat
|
|
707
813
|
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
814
|
+
with model_session() as model:
|
|
815
|
+
raw = model(
|
|
816
|
+
Chat(messages),
|
|
817
|
+
output_type=SessionClassification,
|
|
818
|
+
max_tokens=_MAX_TOKENS,
|
|
819
|
+
sampler=make_sampler(temp=_TEMPERATURE),
|
|
820
|
+
verbose=False,
|
|
821
|
+
)
|
|
716
822
|
except Exception as exc:
|
|
717
823
|
elapsed = time.time() - t0
|
|
718
824
|
outcome = "apple_fm_error" if _use_apple_fm else "mlx_error"
|
|
@@ -840,10 +946,76 @@ def _open_run_log(db_path: str) -> "tuple[Path, Any]":
|
|
|
840
946
|
return log_path, log_path.open("w", encoding="utf-8")
|
|
841
947
|
|
|
842
948
|
|
|
949
|
+
# `method` values that mean the model produced a usable classification.
|
|
950
|
+
# Anything else is an error path the dashboard surfaces under errors-only. The
|
|
951
|
+
# real error `method` values emitted by `_error_result` are `mlx_parse_error`
|
|
952
|
+
# (schema validation / unknown task_key — those names are child-span `outcome`
|
|
953
|
+
# attributes, NOT methods) and `mlx_error` (inference failure / session-not-found).
|
|
954
|
+
_SUCCESS_METHODS = {"mlx_direct", "apple_fm"}
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
def _annotate_classification_span(result: dict[str, Any]) -> None:
|
|
958
|
+
"""Promote the classification result onto the enclosing `classify_session`
|
|
959
|
+
span so each session is ONE self-describing row in OpenObserve — filterable
|
|
960
|
+
by session_id / session_type / task_key / is_error without joining the child
|
|
961
|
+
spans. Both the server and CLI entry points wrap the call in a
|
|
962
|
+
`classify_session` span, so annotating the current span here covers both.
|
|
963
|
+
"""
|
|
964
|
+
span = trace.get_current_span()
|
|
965
|
+
if not span.is_recording():
|
|
966
|
+
return
|
|
967
|
+
method = str(result.get("method", ""))
|
|
968
|
+
task_key = result.get("task_key")
|
|
969
|
+
is_error = method not in _SUCCESS_METHODS
|
|
970
|
+
span.set_attribute("session_id", int(result.get("session_id", 0)))
|
|
971
|
+
span.set_attribute("task_key", task_key or "-")
|
|
972
|
+
span.set_attribute("has_task", task_key is not None)
|
|
973
|
+
span.set_attribute("session_type", str(result.get("session_type", "")))
|
|
974
|
+
span.set_attribute("category", str(result.get("category", "")))
|
|
975
|
+
span.set_attribute("confidence", float(result.get("confidence", 0.0)))
|
|
976
|
+
span.set_attribute(
|
|
977
|
+
"category_confidence", float(result.get("category_confidence", 0.0))
|
|
978
|
+
)
|
|
979
|
+
span.set_attribute("method", method)
|
|
980
|
+
span.set_attribute("elapsed_s", float(result.get("elapsed_s", 0.0)))
|
|
981
|
+
span.set_attribute("is_error", is_error)
|
|
982
|
+
if is_error:
|
|
983
|
+
span.set_status(StatusCode.ERROR, str(result.get("reasoning", method))[:300])
|
|
984
|
+
|
|
985
|
+
|
|
843
986
|
def _classify_one_logged(
|
|
844
987
|
session_id: int,
|
|
845
988
|
con: _sqlite3.Connection,
|
|
846
989
|
run_log: Any,
|
|
990
|
+
) -> dict[str, Any]:
|
|
991
|
+
"""Classify one session, marking the span is_error=true on ANY failure.
|
|
992
|
+
|
|
993
|
+
Wraps the inner worker so an UNHANDLED exception (a sqlite read error,
|
|
994
|
+
malformed window_titles JSON, …) still stamps is_error=true + ERROR status on
|
|
995
|
+
the enclosing classify_session span before propagating — otherwise the
|
|
996
|
+
dashboard's errors-only table (which filters is_error='true') silently misses
|
|
997
|
+
exactly the crashes an operator opens it to find. Handled failures already
|
|
998
|
+
return _error_result dicts that _annotate_classification_span marks.
|
|
999
|
+
"""
|
|
1000
|
+
try:
|
|
1001
|
+
return _classify_one_logged_inner(session_id, con, run_log)
|
|
1002
|
+
except Exception as exc: # noqa: BLE001 — annotate, then re-raise unchanged
|
|
1003
|
+
span = trace.get_current_span()
|
|
1004
|
+
if span.is_recording():
|
|
1005
|
+
span.set_attribute("session_id", int(session_id))
|
|
1006
|
+
span.set_attribute("is_error", True)
|
|
1007
|
+
span.set_attribute("method", "mlx_error")
|
|
1008
|
+
span.set_status(StatusCode.ERROR, str(exc)[:300])
|
|
1009
|
+
log.exception(
|
|
1010
|
+
"run_task_linker_mlx: unhandled error classifying session %d", session_id
|
|
1011
|
+
)
|
|
1012
|
+
raise
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
def _classify_one_logged_inner(
|
|
1016
|
+
session_id: int,
|
|
1017
|
+
con: _sqlite3.Connection,
|
|
1018
|
+
run_log: Any,
|
|
847
1019
|
) -> dict[str, Any]:
|
|
848
1020
|
"""Classify one session and append a full record to the run log."""
|
|
849
1021
|
# Gather inputs before classification so we can log them even on error.
|
|
@@ -888,6 +1060,7 @@ def _classify_one_logged(
|
|
|
888
1060
|
}
|
|
889
1061
|
run_log.write(json.dumps(record, default=str) + "\n")
|
|
890
1062
|
run_log.flush()
|
|
1063
|
+
_annotate_classification_span(result)
|
|
891
1064
|
return result
|
|
892
1065
|
|
|
893
1066
|
|
|
@@ -950,15 +1123,12 @@ def main() -> None:
|
|
|
950
1123
|
try:
|
|
951
1124
|
results: list[dict[str, Any]] = []
|
|
952
1125
|
for sid in session_ids:
|
|
953
|
-
with tracer.start_as_current_span("classify_session")
|
|
954
|
-
|
|
1126
|
+
with tracer.start_as_current_span("classify_session"):
|
|
1127
|
+
# _classify_one_logged enriches this span with the full
|
|
1128
|
+
# result (session_id, task_key, session_type, is_error, …).
|
|
955
1129
|
log.info("run_task_linker_mlx: classifying session %d", sid)
|
|
956
1130
|
result = _classify_one_logged(sid, con, run_log_file)
|
|
957
1131
|
results.append(result)
|
|
958
|
-
cls_span.set_attribute("task_key", result["task_key"] or "-")
|
|
959
|
-
cls_span.set_attribute("session_type", result["session_type"])
|
|
960
|
-
cls_span.set_attribute("method", result["method"])
|
|
961
|
-
cls_span.set_attribute("elapsed_s", result["elapsed_s"])
|
|
962
1132
|
log.info(
|
|
963
1133
|
"run_task_linker_mlx: session_id=%d task_key=%s "
|
|
964
1134
|
"session_type=%s elapsed_s=%.2f",
|
|
@@ -41,20 +41,58 @@ _DB_PATH = Path(os.environ.get("MERIDIAN_DB", Path.home() / ".meridian/meridian.
|
|
|
41
41
|
_app_state: dict[str, Any] = {}
|
|
42
42
|
|
|
43
43
|
|
|
44
|
+
async def _idle_evictor(mlx_module: Any) -> None:
|
|
45
|
+
"""Background loop: evict the MLX model after it has been idle long enough.
|
|
46
|
+
|
|
47
|
+
Runs the (briefly blocking) eviction in a threadpool so it never stalls the
|
|
48
|
+
event loop, and never raises out — the evictor must outlive transient errors.
|
|
49
|
+
"""
|
|
50
|
+
import asyncio
|
|
51
|
+
from fastapi.concurrency import run_in_threadpool
|
|
52
|
+
|
|
53
|
+
ttl = mlx_module._IDLE_EVICT_S
|
|
54
|
+
if ttl <= 0:
|
|
55
|
+
return
|
|
56
|
+
interval = max(15.0, ttl / 4.0) # check ~4× per idle window
|
|
57
|
+
while True:
|
|
58
|
+
await asyncio.sleep(interval)
|
|
59
|
+
try:
|
|
60
|
+
await run_in_threadpool(mlx_module.maybe_evict_idle)
|
|
61
|
+
except Exception as exc: # noqa: BLE001 — evictor must never die
|
|
62
|
+
log.warning("server: idle-evictor error: %s", exc)
|
|
63
|
+
|
|
64
|
+
|
|
44
65
|
@asynccontextmanager
|
|
45
66
|
async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
|
|
67
|
+
import asyncio
|
|
46
68
|
import datetime
|
|
47
69
|
import agents.run_task_linker_mlx as _mlx
|
|
48
70
|
_app_state["mlx_module"] = _mlx
|
|
49
71
|
_app_state["loaded_at"] = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
50
72
|
from agents.llm_selector import APPLE_INTELLIGENCE_ID
|
|
73
|
+
evictor: "asyncio.Task | None" = None
|
|
51
74
|
if _mlx._resolve_model_id() == APPLE_INTELLIGENCE_ID:
|
|
52
|
-
log.info("server:
|
|
75
|
+
log.info("server: Apple Intelligence backend — no MLX model to load")
|
|
76
|
+
elif _mlx._IDLE_EVICT_S > 0:
|
|
77
|
+
# Lazy: the ~7 GB model loads on the first inference and is evicted after
|
|
78
|
+
# MLX_IDLE_EVICT_S of inactivity, so the server idles light (~0.4 GB)
|
|
79
|
+
# instead of pinning ~7 GB of Metal memory for the whole process life.
|
|
80
|
+
log.info(
|
|
81
|
+
"server: MLX model loads on first request; idle-evict after %.0fs",
|
|
82
|
+
_mlx._IDLE_EVICT_S,
|
|
83
|
+
)
|
|
84
|
+
evictor = asyncio.create_task(_idle_evictor(_mlx))
|
|
53
85
|
else:
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
86
|
+
# Eviction disabled — don't spawn a no-op evictor task just to cancel it.
|
|
87
|
+
log.info("server: MLX model loads on first request; idle-eviction disabled (MLX_IDLE_EVICT_S=0)")
|
|
88
|
+
try:
|
|
89
|
+
yield
|
|
90
|
+
finally:
|
|
91
|
+
if evictor is not None:
|
|
92
|
+
import contextlib
|
|
93
|
+
evictor.cancel()
|
|
94
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
95
|
+
await evictor
|
|
58
96
|
|
|
59
97
|
|
|
60
98
|
app = FastAPI(title="Meridian Agent", version="1.0.0", lifespan=_lifespan)
|
|
@@ -76,12 +114,19 @@ async def health() -> dict:
|
|
|
76
114
|
|
|
77
115
|
@app.get("/info")
|
|
78
116
|
async def info() -> dict:
|
|
79
|
-
"""Return the identity of the
|
|
117
|
+
"""Return the identity of the model and its live memory state.
|
|
118
|
+
|
|
119
|
+
`active_memory_gb` reads `mx.get_active_memory()` — the ONLY honest measure
|
|
120
|
+
of the model's footprint, since Metal unified memory is invisible to `ps`
|
|
121
|
+
and Activity Monitor (they undercount the model by ~6.5 GB).
|
|
122
|
+
"""
|
|
80
123
|
m = _app_state.get("mlx_module")
|
|
81
124
|
return {
|
|
82
|
-
"backend":
|
|
83
|
-
"model_id":
|
|
84
|
-
"loaded_at":
|
|
125
|
+
"backend": "mlx",
|
|
126
|
+
"model_id": m._resolve_model_id() if m else None,
|
|
127
|
+
"loaded_at": _app_state.get("loaded_at"),
|
|
128
|
+
"model_resident": m.model_resident() if m else False,
|
|
129
|
+
"active_memory_gb": m.model_active_memory_gb() if m else None,
|
|
85
130
|
}
|
|
86
131
|
|
|
87
132
|
|
|
@@ -143,14 +188,14 @@ async def classify(req: ClassifyRequest) -> ClassifyResponse:
|
|
|
143
188
|
# _classify_apple_fm uses asyncio.new_event_loop() internally;
|
|
144
189
|
# must run in a thread (no existing loop) not in the async handler.
|
|
145
190
|
return m._classify_apple_fm(messages)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
191
|
+
with m.model_session() as model:
|
|
192
|
+
raw = model(
|
|
193
|
+
Chat(messages),
|
|
194
|
+
output_type=m.SessionClassification,
|
|
195
|
+
max_tokens=m._MAX_TOKENS,
|
|
196
|
+
sampler=make_sampler(temp=m._TEMPERATURE),
|
|
197
|
+
verbose=False,
|
|
198
|
+
)
|
|
154
199
|
return m.SessionClassification.model_validate_json(raw)
|
|
155
200
|
|
|
156
201
|
try:
|
|
@@ -214,49 +259,43 @@ async def classify_sessions(req: ClassifySessionsRequest) -> dict:
|
|
|
214
259
|
tracer = _app_state.get("tracer") or trace.get_tracer("meridian-agent-server-mlx")
|
|
215
260
|
parent_ctx = observability.extract_parent_context(req.traceparent)
|
|
216
261
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
#
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
_tok = _otel_context.attach(ctx_snapshot)
|
|
262
|
+
# No batch-wrapper span: each session emits a single `classify_session` span
|
|
263
|
+
# attached directly to the Rust caller's context (via the propagated
|
|
264
|
+
# traceparent). This keeps the debug trace minimal — one self-describing span
|
|
265
|
+
# per session with no redundant N=1 wrapper. For N>1, the sessions appear as
|
|
266
|
+
# sibling classify_session spans under the same daemon trace.
|
|
267
|
+
def _classify_all() -> list[dict]:
|
|
268
|
+
_tok = _otel_context.attach(parent_ctx) if parent_ctx is not None else None
|
|
269
|
+
try:
|
|
270
|
+
# Always use the server's own _DB_PATH — ignoring req.meridian_db avoids
|
|
271
|
+
# path-traversal: the server knows its DB from the environment.
|
|
272
|
+
con = _sqlite3.connect(str(_DB_PATH), check_same_thread=False)
|
|
273
|
+
con.row_factory = _sqlite3.Row
|
|
230
274
|
try:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
result.get("elapsed_s", 0.0),
|
|
249
|
-
)
|
|
250
|
-
results.append(result)
|
|
251
|
-
return results
|
|
252
|
-
finally:
|
|
253
|
-
con.close()
|
|
275
|
+
results: list[dict] = []
|
|
276
|
+
for sid in req.session_ids:
|
|
277
|
+
# _classify_one_logged owns this span's attributes (session_id,
|
|
278
|
+
# task_key, confidence, is_error, …) via _annotate_classification_span
|
|
279
|
+
# and emits db_fetch / build_prompt / llm_inference / parse_response
|
|
280
|
+
# as its children — one source of truth, matching the CLI path.
|
|
281
|
+
with tracer.start_as_current_span("classify_session"):
|
|
282
|
+
result = m._classify_one_logged(sid, con, fh)
|
|
283
|
+
log.info(
|
|
284
|
+
"classify_sessions: session_id=%d task_key=%s session_type=%s elapsed_s=%.2f",
|
|
285
|
+
sid,
|
|
286
|
+
result.get("task_key"),
|
|
287
|
+
result.get("session_type"),
|
|
288
|
+
result.get("elapsed_s", 0.0),
|
|
289
|
+
)
|
|
290
|
+
results.append(result)
|
|
291
|
+
return results
|
|
254
292
|
finally:
|
|
293
|
+
con.close()
|
|
294
|
+
finally:
|
|
295
|
+
if _tok is not None:
|
|
255
296
|
_otel_context.detach(_tok)
|
|
256
297
|
|
|
257
|
-
|
|
258
|
-
span.set_attribute("classified_count", len(results))
|
|
259
|
-
|
|
298
|
+
results = await run_in_threadpool(_classify_all)
|
|
260
299
|
return {"results": results}
|
|
261
300
|
|
|
262
301
|
|
|
@@ -370,18 +409,46 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
|
|
|
370
409
|
temperature = req.temperature if req.temperature is not None else 0.3
|
|
371
410
|
max_tokens = req.max_tokens if req.max_tokens else 2048
|
|
372
411
|
|
|
412
|
+
# Honour OpenAI `response_format: {"type":"json_schema", ...}` by
|
|
413
|
+
# FSM-constraining decoding to that schema via outlines. Without this, a
|
|
414
|
+
# reasoning model is free to emit chain-of-thought prose instead of the JSON
|
|
415
|
+
# the caller (e.g. agno's structured-output path) expects, and the parse
|
|
416
|
+
# fails. `{"type":"json_object"}` carries no schema, so it stays free-form.
|
|
417
|
+
output_type = None
|
|
418
|
+
rf = req.response_format
|
|
419
|
+
if isinstance(rf, dict) and rf.get("type") == "json_schema":
|
|
420
|
+
schema = (rf.get("json_schema") or {}).get("schema")
|
|
421
|
+
if schema:
|
|
422
|
+
from outlines.types import JsonSchema
|
|
423
|
+
output_type = JsonSchema(schema)
|
|
424
|
+
|
|
373
425
|
from agents.llm_selector import APPLE_INTELLIGENCE_ID
|
|
374
426
|
|
|
427
|
+
# A `json_schema` request cannot be honoured on Apple Foundation Models:
|
|
428
|
+
# outlines FSM-constrained decoding is incompatible with FM, so the schema
|
|
429
|
+
# would be silently dropped and a structured-output caller (e.g. agno) would
|
|
430
|
+
# get free-form text that fails to parse downstream. Reject explicitly with a
|
|
431
|
+
# 4xx rather than emit unconstrained output that breaks later.
|
|
432
|
+
if output_type is not None and m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
|
|
433
|
+
raise HTTPException(
|
|
434
|
+
status_code=400,
|
|
435
|
+
detail="response_format=json_schema is not supported on Apple "
|
|
436
|
+
"Foundation Models (no FSM-constrained decoding available)",
|
|
437
|
+
)
|
|
438
|
+
|
|
375
439
|
def _generate() -> str:
|
|
376
440
|
if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
|
|
441
|
+
# outlines FSM decoding is incompatible with Foundation Models;
|
|
442
|
+
# Apple FM falls back to free-form (json_object / no schema only).
|
|
377
443
|
return _infer_apple_fm(msgs, max_tokens)
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
444
|
+
with m.model_session() as model:
|
|
445
|
+
return model(
|
|
446
|
+
Chat(msgs),
|
|
447
|
+
output_type=output_type,
|
|
448
|
+
max_tokens=max_tokens,
|
|
449
|
+
sampler=make_sampler(temp=temperature),
|
|
450
|
+
verbose=False,
|
|
451
|
+
)
|
|
385
452
|
|
|
386
453
|
t0 = _time.time()
|
|
387
454
|
try:
|
|
@@ -504,14 +571,14 @@ async def summarise(req: _SummariseRequest) -> _SummariseResponse:
|
|
|
504
571
|
from outlines.inputs import Chat
|
|
505
572
|
|
|
506
573
|
def _generate() -> str:
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
574
|
+
with m.model_session() as model:
|
|
575
|
+
return model(
|
|
576
|
+
Chat(messages),
|
|
577
|
+
output_type=_SummarySchema,
|
|
578
|
+
max_tokens=req.max_tokens,
|
|
579
|
+
sampler=make_sampler(temp=req.temperature),
|
|
580
|
+
verbose=False,
|
|
581
|
+
)
|
|
515
582
|
|
|
516
583
|
try:
|
|
517
584
|
raw = await run_in_threadpool(_generate)
|
|
@@ -9,6 +9,7 @@ from __future__ import annotations
|
|
|
9
9
|
import json
|
|
10
10
|
import sqlite3
|
|
11
11
|
import sys
|
|
12
|
+
import time
|
|
12
13
|
from io import StringIO
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from typing import Iterator
|
|
@@ -1011,6 +1012,58 @@ class TestModelCache:
|
|
|
1011
1012
|
m._get_model()
|
|
1012
1013
|
|
|
1013
1014
|
|
|
1015
|
+
# ---------------------------------------------------------------------------
|
|
1016
|
+
# Idle eviction — model_session() in-flight tracking + maybe_evict_idle()
|
|
1017
|
+
# (the model holds ~7 GB while resident; the server unloads it when idle)
|
|
1018
|
+
# ---------------------------------------------------------------------------
|
|
1019
|
+
|
|
1020
|
+
class TestModelEviction:
|
|
1021
|
+
def test_model_session_loads_and_tracks_in_flight(self):
|
|
1022
|
+
import agents.run_task_linker_mlx as m
|
|
1023
|
+
sentinel = MagicMock(name="model")
|
|
1024
|
+
with patch.object(m, "_get_model", return_value=sentinel):
|
|
1025
|
+
m._in_flight = 0
|
|
1026
|
+
with m.model_session() as model:
|
|
1027
|
+
assert model is sentinel
|
|
1028
|
+
assert m._in_flight == 1 # marked in-flight while in use
|
|
1029
|
+
assert m._in_flight == 0 # released on exit
|
|
1030
|
+
|
|
1031
|
+
def test_evict_noop_when_not_idle_long_enough(self):
|
|
1032
|
+
import agents.run_task_linker_mlx as m
|
|
1033
|
+
m._model_cache["x"] = MagicMock()
|
|
1034
|
+
m._in_flight = 0
|
|
1035
|
+
m._last_used = time.monotonic() # just used
|
|
1036
|
+
assert m.maybe_evict_idle(idle_s=600) is None
|
|
1037
|
+
assert m.model_resident() is True
|
|
1038
|
+
|
|
1039
|
+
def test_evict_disabled_when_ttl_zero(self):
|
|
1040
|
+
import agents.run_task_linker_mlx as m
|
|
1041
|
+
m._model_cache["x"] = MagicMock()
|
|
1042
|
+
assert m.maybe_evict_idle(idle_s=0) is None
|
|
1043
|
+
assert m.model_resident() is True
|
|
1044
|
+
|
|
1045
|
+
def test_evict_noop_when_in_flight(self):
|
|
1046
|
+
import agents.run_task_linker_mlx as m
|
|
1047
|
+
m._model_cache["x"] = MagicMock()
|
|
1048
|
+
m._in_flight = 1 # an inference is using the model
|
|
1049
|
+
m._last_used = time.monotonic() - 1000
|
|
1050
|
+
try:
|
|
1051
|
+
assert m.maybe_evict_idle(idle_s=0.001) is None
|
|
1052
|
+
assert m.model_resident() is True # never freed mid-inference
|
|
1053
|
+
finally:
|
|
1054
|
+
m._in_flight = 0
|
|
1055
|
+
|
|
1056
|
+
def test_evict_clears_cache_when_idle(self):
|
|
1057
|
+
import agents.run_task_linker_mlx as m
|
|
1058
|
+
m._model_cache["x"] = MagicMock()
|
|
1059
|
+
m._in_flight = 0
|
|
1060
|
+
m._last_used = time.monotonic() - 1000 # idle long past the window
|
|
1061
|
+
freed = m.maybe_evict_idle(idle_s=0.001)
|
|
1062
|
+
assert freed is not None # eviction happened
|
|
1063
|
+
assert m.model_resident() is False
|
|
1064
|
+
assert m._model_cache == {}
|
|
1065
|
+
|
|
1066
|
+
|
|
1014
1067
|
# ---------------------------------------------------------------------------
|
|
1015
1068
|
# SessionClassification schema
|
|
1016
1069
|
# ---------------------------------------------------------------------------
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Session→Task Classifier — Debug",
|
|
3
|
+
"description": "Every session-task classification, newest first. Filter by session_id, session_type, or errors-only; copy a row's trace_id and open it in Traces for the full waterfall (db_fetch → build_prompt → llm_inference → parse_response, with raw_mlx_output). Backed by the enriched `classify_session` spans (service meridian-agent-server-mlx in-process, or meridian-task-linker-mlx from the standalone CLI). Drilldown keys on trace_id alone so it works for both.",
|
|
4
|
+
"version": 5,
|
|
5
|
+
"variables": {
|
|
6
|
+
"list": [
|
|
7
|
+
{
|
|
8
|
+
"type": "textbox",
|
|
9
|
+
"name": "session_id",
|
|
10
|
+
"label": "Session ID",
|
|
11
|
+
"query_data": null,
|
|
12
|
+
"value": "",
|
|
13
|
+
"options": [],
|
|
14
|
+
"multiSelect": false,
|
|
15
|
+
"hideOnDashboard": false,
|
|
16
|
+
"selectAllValueForMultiSelect": "custom",
|
|
17
|
+
"customMultiSelectValue": [],
|
|
18
|
+
"escapeSingleQuotes": true
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"type": "custom",
|
|
22
|
+
"name": "session_type",
|
|
23
|
+
"label": "Session type",
|
|
24
|
+
"query_data": null,
|
|
25
|
+
"value": "",
|
|
26
|
+
"options": [
|
|
27
|
+
{"label": "All", "value": "", "selected": true},
|
|
28
|
+
{"label": "task", "value": "task", "selected": false},
|
|
29
|
+
{"label": "overhead", "value": "overhead", "selected": false},
|
|
30
|
+
{"label": "untracked", "value": "untracked", "selected": false}
|
|
31
|
+
],
|
|
32
|
+
"multiSelect": false,
|
|
33
|
+
"hideOnDashboard": false,
|
|
34
|
+
"selectAllValueForMultiSelect": "custom",
|
|
35
|
+
"customMultiSelectValue": [],
|
|
36
|
+
"escapeSingleQuotes": true
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"type": "custom",
|
|
40
|
+
"name": "errors_only",
|
|
41
|
+
"label": "Errors only",
|
|
42
|
+
"query_data": null,
|
|
43
|
+
"value": "",
|
|
44
|
+
"options": [
|
|
45
|
+
{"label": "All", "value": "", "selected": true},
|
|
46
|
+
{"label": "Errors only", "value": "true", "selected": false}
|
|
47
|
+
],
|
|
48
|
+
"multiSelect": false,
|
|
49
|
+
"hideOnDashboard": false,
|
|
50
|
+
"selectAllValueForMultiSelect": "custom",
|
|
51
|
+
"customMultiSelectValue": [],
|
|
52
|
+
"escapeSingleQuotes": true
|
|
53
|
+
}
|
|
54
|
+
],
|
|
55
|
+
"showDynamicFilters": true
|
|
56
|
+
},
|
|
57
|
+
"defaultDatetimeDuration": {"type": "relative", "relativeTimePeriod": "12h", "startTime": null, "endTime": null},
|
|
58
|
+
"tabs": [
|
|
59
|
+
{
|
|
60
|
+
"tabId": "default",
|
|
61
|
+
"name": "Default",
|
|
62
|
+
"panels": [
|
|
63
|
+
{
|
|
64
|
+
"id": "stat_total",
|
|
65
|
+
"type": "metric",
|
|
66
|
+
"title": "Classifications",
|
|
67
|
+
"description": "Total classify_session spans in range",
|
|
68
|
+
"config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
|
|
69
|
+
"queryType": "sql",
|
|
70
|
+
"queries": [
|
|
71
|
+
{
|
|
72
|
+
"query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session'",
|
|
73
|
+
"vrlFunctionQuery": "",
|
|
74
|
+
"customQuery": true,
|
|
75
|
+
"fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Classifications", "alias": "y_axis_1", "column": "y_axis_1", "color": "#5960b2", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
|
|
76
|
+
"config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
|
|
77
|
+
}
|
|
78
|
+
],
|
|
79
|
+
"layout": {"x": 0, "y": 0, "w": 12, "h": 6, "i": 1}
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"id": "stat_errors",
|
|
83
|
+
"type": "metric",
|
|
84
|
+
"title": "Errors",
|
|
85
|
+
"description": "Classifications whose method failed (is_error=true)",
|
|
86
|
+
"config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
|
|
87
|
+
"queryType": "sql",
|
|
88
|
+
"queries": [
|
|
89
|
+
{
|
|
90
|
+
"query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true'",
|
|
91
|
+
"vrlFunctionQuery": "",
|
|
92
|
+
"customQuery": true,
|
|
93
|
+
"fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Errors", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b25959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
|
|
94
|
+
"config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
|
|
95
|
+
}
|
|
96
|
+
],
|
|
97
|
+
"layout": {"x": 12, "y": 0, "w": 12, "h": 6, "i": 2}
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"id": "stat_untracked",
|
|
101
|
+
"type": "metric",
|
|
102
|
+
"title": "Untracked",
|
|
103
|
+
"description": "Sessions classified as untracked (no ticket)",
|
|
104
|
+
"config": {"show_legends": false, "unit": null, "decimals": 0, "no_value_replacement": "0"},
|
|
105
|
+
"queryType": "sql",
|
|
106
|
+
"queries": [
|
|
107
|
+
{
|
|
108
|
+
"query": "SELECT count(*) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND session_type='untracked'",
|
|
109
|
+
"vrlFunctionQuery": "",
|
|
110
|
+
"customQuery": true,
|
|
111
|
+
"fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Untracked", "alias": "y_axis_1", "column": "y_axis_1", "color": "#b29959", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
|
|
112
|
+
"config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
|
|
113
|
+
}
|
|
114
|
+
],
|
|
115
|
+
"layout": {"x": 24, "y": 0, "w": 12, "h": 6, "i": 3}
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
"id": "stat_conf",
|
|
119
|
+
"type": "metric",
|
|
120
|
+
"title": "Avg confidence",
|
|
121
|
+
"description": "Mean confidence of successful classifications",
|
|
122
|
+
"config": {"show_legends": false, "unit": null, "decimals": 2, "no_value_replacement": "0"},
|
|
123
|
+
"queryType": "sql",
|
|
124
|
+
"queries": [
|
|
125
|
+
{
|
|
126
|
+
"query": "SELECT round(avg(CAST(confidence AS DOUBLE)),2) as \"y_axis_1\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='false'",
|
|
127
|
+
"vrlFunctionQuery": "",
|
|
128
|
+
"customQuery": true,
|
|
129
|
+
"fields": {"stream": "default", "stream_type": "traces", "x": [], "y": [{"label": "Avg confidence", "alias": "y_axis_1", "column": "y_axis_1", "color": "#59b27a", "aggregationFunction": null, "isDerived": false, "havingConditions": []}], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
|
|
130
|
+
"config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
|
|
131
|
+
}
|
|
132
|
+
],
|
|
133
|
+
"layout": {"x": 36, "y": 0, "w": 12, "h": 6, "i": 4}
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"id": "table_all",
|
|
137
|
+
"type": "table",
|
|
138
|
+
"title": "All classifications (newest first)",
|
|
139
|
+
"description": "Filter with the Session ID / Session type / Errors only variables above. Click any row → opens the Traces view filtered to just that trace's spans.",
|
|
140
|
+
"config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
|
|
141
|
+
"queryType": "sql",
|
|
142
|
+
"queries": [
|
|
143
|
+
{
|
|
144
|
+
"query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", category as \"Category\", confidence as \"Confidence\", round(CAST(elapsed_s AS DOUBLE),2) as \"Time taken (s)\", method as \"Method\", is_error as \"Error\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND ('$session_id'='' OR session_id='$session_id') AND ('$session_type'='' OR session_type='$session_type') AND ('$errors_only'='' OR is_error='$errors_only') ORDER BY _timestamp DESC",
|
|
145
|
+
"vrlFunctionQuery": "",
|
|
146
|
+
"customQuery": true,
|
|
147
|
+
"fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Category", "alias": "Category", "column": "category", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Confidence", "alias": "Confidence", "column": "confidence", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Time taken (s)", "alias": "Time taken (s)", "column": "elapsed_s", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Error", "alias": "Error", "column": "is_error", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
|
|
148
|
+
"config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
|
|
149
|
+
}
|
|
150
|
+
],
|
|
151
|
+
"layout": {"x": 0, "y": 6, "w": 48, "h": 14, "i": 5}
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
"id": "table_errors",
|
|
155
|
+
"type": "table",
|
|
156
|
+
"title": "Errors only",
|
|
157
|
+
"description": "Failed classifications — inference errors, schema errors, invalid task_key, session-not-found. Click a row → opens just that trace's spans.",
|
|
158
|
+
"config": {"show_legends": false, "wrap_table_cells": false, "table_dynamic_columns": false, "drilldown": [{"name": "Open this trace's spans", "type": "byUrl", "targetBlank": true, "findBy": "name", "data": {"url": "/web/traces?org_identifier=default&stream=default&search_type=ui&search_mode=spans&from=${start_time}&to=${end_time}&query=${row.field.trace_filter}", "folder": "", "dashboard": "", "tab": "", "passAllVariables": false, "variables": []}}]},
|
|
159
|
+
"queryType": "sql",
|
|
160
|
+
"queries": [
|
|
161
|
+
{
|
|
162
|
+
"query": "SELECT to_char(to_timestamp_micros(_timestamp),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", method as \"Method\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true' ORDER BY _timestamp DESC",
|
|
163
|
+
"vrlFunctionQuery": "",
|
|
164
|
+
"customQuery": true,
|
|
165
|
+
"fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
|
|
166
|
+
"config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
|
|
167
|
+
}
|
|
168
|
+
],
|
|
169
|
+
"layout": {"x": 0, "y": 20, "w": 48, "h": 10, "i": 6}
|
|
170
|
+
}
|
|
171
|
+
]
|
|
172
|
+
}
|
|
173
|
+
]
|
|
174
|
+
}
|
package/services/pyproject.toml
CHANGED
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "meridian-agents"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.55.0"
|
|
8
8
|
description = "Meridian agents — MLX classifier server and Jira worklog synthesis for meridian.db"
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
authors = [{ name = "Meridiona" }]
|
package/ui.tar.gz
CHANGED
|
Binary file
|