npm - @meridiona/meridian-darwin-arm64 - Versions diffs - 1.33.0 → 1.34.1 - Mend

@meridiona/meridian-darwin-arm64 1.33.0 → 1.34.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/.env.example +18 -1
package/VERSION +1 -1
package/bin/meridian +0 -0
package/package.json +1 -1
package/scripts/install-from-bundle.sh +3 -0
package/scripts/install-screenpipe-daemon.sh +30 -14
package/scripts/meridian-cli.sh +2 -2
package/services/agents/__init__.py +1 -1
package/services/agents/config.py +1 -14
package/services/agents/llm_selector.py +4 -339
package/services/agents/observability.py +1 -1
package/services/agents/run_task_linker_mlx.py +0 -1
package/services/agents/server.py +27 -125
package/services/agents/tests/conftest.py +0 -7
package/services/agents/tests/test_llm_selector.py +0 -190
package/services/pyproject.toml +1 -1
package/services/tests/conftest.py +1 -8
package/services/tests/evals/build_dataset.py +3 -4
package/services/tests/evals/test_classifier.py +0 -2
package/services/tests/evals/test_model_sweep.py +0 -2
package/ui.tar.gz +0 -0
package/services/agents/_hermes_setup.py +0 -44
package/services/agents/run_task_linker.py +0 -445
package/services/agents/tests/test_run_task_linker.py +0 -360
package/services/tests/evals/eval_agent.py +0 -143
package/services/tests/test_task_linker.py +0 -157

package/.env.example CHANGED Viewed

@@ -24,12 +24,29 @@
 # MERIDIAN_UI_PORT=3939
 # ---------------------------------------------------------------------------
-# Jira (all three required to enable the Jira connector)
+# Jira — choose ONE auth path:
+#
+#   (A) Browser OAuth (recommended): just run  `meridian oauth-login jira`.
+#       It opens your browser, you click Accept, and tokens land in
+#       ~/.meridian/oauth/jira.json (auto-refreshed). No env vars, no API
+#       token; the site is discovered automatically. Then `meridian restart`.
+#
+#   (B) Static API token (legacy): set JIRA_BASE_URL + JIRA_EMAIL + JIRA_API_TOKEN.
+#
+# If both are present, OAuth wins. JIRA_PROJECT_KEYS applies to either.
 # ---------------------------------------------------------------------------
+# (A) OAuth needs NO config — Meridian ships a public client id. The vars below are
+#     optional overrides (e.g. a self-hosted app or a non-default redirect port).
+# JIRA_OAUTH_CLIENT_ID=your-atlassian-app-client-id   # override the baked-in client id
+# JIRA_OAUTH_REDIRECT_PORT=9123      # must match the app's registered redirect
+                                     # http://127.0.0.1:<port>/callback
+# (B) Static API token
 # JIRA_BASE_URL=https://your-org.atlassian.net
 # JIRA_EMAIL=you@your-org.com
 # JIRA_API_TOKEN=your-api-token-here
 # JIRA_PROJECT_KEYS=KAN,ENG          # optional — comma-separated; empty = all projects
 # ---------------------------------------------------------------------------

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.33.0
1	+ 1.34.1

package/bin/meridian CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@meridiona/meridian-darwin-arm64",
-  "version": "1.33.0",
+  "version": "1.34.1",
   "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
   "homepage": "https://github.com/Meridiona/meridian",
   "repository": {

package/scripts/install-from-bundle.sh CHANGED Viewed

@@ -84,6 +84,9 @@ collect_credentials() {
     echo "    (edit later anytime: meridian config edit)" >&2
     echo >&2
     if prompt_category "Jira"; then
+        info "Easiest: skip the token prompts below and, after install, run"
+        info "  meridian oauth-login jira   — connect in your browser, no API token."
+        info "Or fill these in for the legacy API-token path:"
         prompt_env_var "JIRA_BASE_URL" "Jira URL (e.g. https://your-org.atlassian.net)" 0 "$env_file"
         # The Python side reads JIRA_URL, the Rust side JIRA_BASE_URL — keep both in sync.
         local jira_url; jira_url="$(get_env_value JIRA_BASE_URL "$env_file")"

package/scripts/install-screenpipe-daemon.sh CHANGED Viewed

@@ -40,21 +40,37 @@ fi
 # attaches to a stable binary named `screenpipe` (and survives reinstalls of the
 # same version, since its path is fixed). Falls back to whatever `command -v`
 # found when screenpipe is a native binary (Homebrew) rather than the npm shim.
-SCREENPIPE_BIN="$(command -v screenpipe)" || true
-if [[ -z "${SCREENPIPE_BIN}" ]]; then
-    echo "✗ screenpipe binary not found in PATH — install with: npm install -g screenpipe" >&2
-    exit 1
-fi
-_npm_root="$(npm root -g 2>/dev/null || true)"
-if [[ -n "${_npm_root}" && -d "${_npm_root}/screenpipe" ]]; then
-    _real=""
-    while IFS= read -r _cand; do
-        if file "${_cand}" 2>/dev/null | grep -q "Mach-O"; then _real="${_cand}"; break; fi
-    done < <(find "${_npm_root}/screenpipe" -type f -name screenpipe -perm +0111 2>/dev/null)
-    if [[ -n "${_real}" ]]; then
-        SCREENPIPE_BIN="${_real}"
-        echo "→ using the real screenpipe binary (not the node wrapper): ${SCREENPIPE_BIN}"
+STAGED_BIN="${HOME}/.meridian/bin/screenpipe"
+# Prefer the already-staged stable binary (written by install-from-bundle.sh).
+# On a standalone re-run of this script (e.g. `meridian repair`) resolve the
+# real Mach-O from the npm tree and stage it so the launchd plist is immune to
+# nvm version changes — the npm shim path under ~/.nvm is version-specific and
+# breaks silently when the user runs `nvm use` or upgrades Node.
+if [[ -x "${STAGED_BIN}" ]] && file "${STAGED_BIN}" 2>/dev/null | grep -q "Mach-O"; then
+    SCREENPIPE_BIN="${STAGED_BIN}"
+    echo "→ using staged screenpipe binary: ${SCREENPIPE_BIN}"
+else
+    SCREENPIPE_BIN="$(command -v screenpipe 2>/dev/null || true)"
+    if [[ -z "${SCREENPIPE_BIN}" ]]; then
+        echo "✗ screenpipe not found in PATH — install with: npm install -g screenpipe" >&2
+        exit 1
+    fi
+    _npm_root="$(npm root -g 2>/dev/null || true)"
+    if [[ -n "${_npm_root}" && -d "${_npm_root}/screenpipe" ]]; then
+        _real=""
+        while IFS= read -r _cand; do
+            if file "${_cand}" 2>/dev/null | grep -q "Mach-O"; then _real="${_cand}"; break; fi
+        done < <(find "${_npm_root}/screenpipe" -type f -name screenpipe -perm +0111 2>/dev/null)
+        if [[ -n "${_real}" ]]; then
+            SCREENPIPE_BIN="${_real}"
+        fi
     fi
+    mkdir -p "${HOME}/.meridian/bin"
+    cp "${SCREENPIPE_BIN}" "${STAGED_BIN}"
+    chmod +x "${STAGED_BIN}"
+    SCREENPIPE_BIN="${STAGED_BIN}"
+    echo "→ staged screenpipe binary: ${SCREENPIPE_BIN}"
 fi
 mkdir -p "${HOME}/.meridian/logs"

package/scripts/meridian-cli.sh CHANGED Viewed

@@ -315,8 +315,8 @@ cmd_smoke() {
     mlx_port="$(_smoke_read_env MLX_SERVER_PORT)"
     mlx_port="${mlx_port:-7823}"
     local base="http://127.0.0.1:${mlx_port}"
-    local classify_timeout=60
-    [[ $classify_only -eq 1 ]] && classify_timeout=30
+    local classify_timeout=180
+    [[ $classify_only -eq 1 ]] && classify_timeout=180
     local all_ok=1
     if [[ -t 1 ]]; then

package/services/agents/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- """Meridian agents — AI-powered session task classification ~~via hermes~~."""
1	+ """Meridian agents — AI-powered session task classification."""

package/services/agents/config.py CHANGED Viewed

@@ -21,15 +21,6 @@ _ENV_FILE = PROJECT_ROOT / ".env"
 if _ENV_FILE.exists():
     load_dotenv(_ENV_FILE, override=False)
-# ── Hermes (AIAgent library) ──────────────────────────────────────────────────
-HERMES_HOME = Path(os.environ.get("HERMES_HOME", str(REPO_ROOT / ".hermes")))
-# Directories searched for skill files (SKILL.md, SKILL-*.md).
-SKILLS_SEARCH_PATHS: list[Path] = [
-    REPO_ROOT / "skills" / "activity",
-    HERMES_HOME / "skills",
-]
 # ── LLM ───────────────────────────────────────────────────────────────────────
 MODEL            = os.environ.get("OLLAMA_MODEL")
 BASE_URL         = os.environ.get("OLLAMA_HOST")
@@ -43,7 +34,7 @@ if not API_KEY:
     )
 # Local model selection — Apple Silicon only.
-# LLM_PREFER_LOCAL=1 tries a local model before the cloud AIAgent path.
+# LLM_PREFER_LOCAL=1 tries a local model before the cloud path.
 # LLM_BUDGET_PCT controls the fraction of available Metal headroom to allocate
 # (0.5 = 50% of free GPU memory). Set to 0 or LLM_PREFER_LOCAL=0 to disable.
@@ -58,10 +49,6 @@ def _env_bool(name: str, default: bool) -> bool:
 LLM_PREFER_LOCAL = _env_bool("LLM_PREFER_LOCAL", True)
 LLM_BUDGET_PCT   = float(os.environ.get("LLM_BUDGET_PCT", "0.5"))
-# When true, _hermes_setup.ensure_hermes_importable() prepends services/.hermes/
-# to sys.path so the local source checkout shadows the installed hermes-agent package.
-HERMES_DEV_MODE = os.environ.get("HERMES_DEV_MODE", "0") == "1"
 # ── DB / runtime paths ────────────────────────────────────────────────────────
 MERIDIAN_HOME = Path(os.environ.get("MERIDIAN_HOME", str(Path.home() / ".meridian")))
 MERIDIAN_DB   = Path(os.environ.get("MERIDIAN_DB",   str(MERIDIAN_HOME / "meridian.db")))

package/services/agents/llm_selector.py CHANGED Viewed

@@ -28,7 +28,6 @@ import logging
 import os
 import platform
 import re
-import signal
 import socket
 import subprocess
 import sys
@@ -263,17 +262,6 @@ class ComputeSnapshot:
     mem_bw_gbs: int
-@dataclass
-class LocalModelEndpoint:
-    model: str       # model name to pass to AIAgent
-    base_url: str    # OpenAI-compatible base URL
-    api_key: str     # typically "local"
-    runtime: str     # "ollama" | "lmstudio" | "llamacpp" | "mlxlm" | "mlx_managed"
-_MANAGED_SERVER_PORT = 8765
-_MANAGED_SERVER_PID_FILE = Path.home() / ".meridian" / "mlx_lm_server.pid"
 # Sentinel returned by select_mlx_model_id() when Apple Intelligence is chosen.
 APPLE_INTELLIGENCE_ID = "apple-intelligence"
@@ -438,7 +426,7 @@ def local_infer(system_prompt: str, user_message: str,
     """Run inference on the best available local model.
     Returns the model's text response, or None if nothing is available
-    (caller falls back to the cloud AIAgent path).
+    (caller falls back to the cloud path).
     Priority:
       1. Already-running server with a model in memory (zero load cost)
@@ -521,325 +509,6 @@ def _infer_mlx(model_id: str, system: str, user: str, max_tokens: int) -> Option
         return None
-def _shutdown_managed_server() -> None:
-    """Kill the managed mlx_lm.server if it is running and remove the PID file."""
-    pid_file = _MANAGED_SERVER_PID_FILE
-    if not pid_file.exists():
-        return
-    try:
-        meta = json.loads(pid_file.read_text())
-        pid = meta["pid"]
-        try:
-            os.kill(pid, 0)
-            os.kill(pid, signal.SIGTERM)
-            log.info("llm_selector: unloaded managed mlx_lm.server pid=%d model=%s",
-                     pid, meta.get("model", "?"))
-        except OSError:
-            pass
-    except Exception:
-        pass
-    pid_file.unlink(missing_ok=True)
-def _wait_for_process_exit(pid: int, timeout: float = 10.0) -> None:
-    """Wait for a process to exit; SIGKILL after timeout."""
-    deadline = time.monotonic() + timeout
-    while time.monotonic() < deadline:
-        try:
-            os.kill(pid, 0)
-        except OSError:
-            return  # dead
-        time.sleep(0.3)
-    try:
-        os.kill(pid, signal.SIGKILL)
-    except OSError:
-        pass
-    time.sleep(0.5)
-def _wait_for_port_free(port: int, timeout: float = 5.0) -> None:
-    """Wait until a local TCP port stops accepting connections."""
-    deadline = time.monotonic() + timeout
-    while time.monotonic() < deadline:
-        if not _tcp_open("127.0.0.1", port, timeout=0.3):
-            return
-        time.sleep(0.3)
-def _ensure_mlx_server(model_id: str, port: int = _MANAGED_SERVER_PORT) -> bool:
-    with _tracer.start_as_current_span("llm_selector.ensure_server") as span:
-        span.set_attribute("server.model",      model_id)
-        span.set_attribute("server.port",       port)
-        t0 = time.monotonic()
-        pid_file = _MANAGED_SERVER_PID_FILE
-        if pid_file.exists():
-            try:
-                meta = json.loads(pid_file.read_text())
-                pid, existing_model, existing_port = meta["pid"], meta["model"], meta["port"]
-                try:
-                    os.kill(pid, 0)
-                    alive = True
-                except OSError:
-                    alive = False
-                if alive and existing_model == model_id and existing_port == port:
-                    log.info(
-                        "llm_selector: managed server already running model=%s pid=%d port=%d",
-                        model_id, pid, port,
-                    )
-                    span.set_attribute("server.action", "reused")
-                    span.set_attribute("server.pid",    pid)
-                    span.add_event("server_reused", {"pid": pid, "model": model_id})
-                    return True
-                if alive:
-                    log.info(
-                        "llm_selector: model switch %s → %s — stopping pid=%d",
-                        existing_model, model_id, pid,
-                    )
-                    span.set_attribute("server.previous_model", existing_model)
-                    span.add_event("model_switch", {
-                        "from_model": existing_model,
-                        "to_model":   model_id,
-                        "pid":        pid,
-                    })
-                    os.kill(pid, signal.SIGTERM)
-                    _wait_for_process_exit(pid)
-                    _wait_for_port_free(port)
-                    stop_ms = int((time.monotonic() - t0) * 1000)
-                    log.info(
-                        "llm_selector: stopped old managed server pid=%d model=%s elapsed_ms=%d",
-                        pid, existing_model, stop_ms,
-                    )
-                    span.add_event("old_server_stopped", {"elapsed_ms": stop_ms})
-                else:
-                    log.debug("llm_selector: stale pid file (pid=%d dead) — starting fresh", pid)
-                    span.add_event("stale_pid_file", {"pid": pid})
-            except Exception:
-                pass
-        proc = subprocess.Popen(
-            [sys.executable, "-m", "mlx_lm.server",
-             "--model", model_id, "--port", str(port), "--max-tokens", "4096"],
-            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
-            start_new_session=True,
-        )
-        pid_file.parent.mkdir(parents=True, exist_ok=True)
-        pid_file.write_text(json.dumps({"pid": proc.pid, "model": model_id, "port": port}))
-        log.info(
-            "llm_selector: started mlx_lm.server model=%s pid=%d port=%d — waiting for ready",
-            model_id, proc.pid, port,
-        )
-        span.set_attribute("server.action", "started")
-        span.set_attribute("server.pid",    proc.pid)
-        span.add_event("server_started", {"pid": proc.pid, "model": model_id})
-        url = f"http://127.0.0.1:{port}/v1/models"
-        deadline = time.monotonic() + 90.0
-        while time.monotonic() < deadline:
-            if proc.poll() is not None:
-                elapsed_ms = int((time.monotonic() - t0) * 1000)
-                log.warning(
-                    "llm_selector: mlx_lm.server exited early exit=%d model=%s elapsed_ms=%d"
-                    " — is mlx_lm installed?",
-                    proc.returncode, model_id, elapsed_ms,
-                )
-                span.set_attribute("server.action",     "failed")
-                span.set_attribute("server.exit_code",  proc.returncode)
-                span.add_event("server_exited_early", {"exit_code": proc.returncode})
-                pid_file.unlink(missing_ok=True)
-                return False
-            _, status = _get_json(url, timeout=1.0)
-            if status == 200:
-                elapsed_ms = int((time.monotonic() - t0) * 1000)
-                log.info(
-                    "llm_selector: mlx_lm.server ready model=%s port=%d startup_ms=%d",
-                    model_id, port, elapsed_ms,
-                )
-                span.set_attribute("server.startup_ms", elapsed_ms)
-                span.add_event("server_ready", {"startup_ms": elapsed_ms})
-                return True
-            time.sleep(1)
-        elapsed_ms = int((time.monotonic() - t0) * 1000)
-        log.warning(
-            "llm_selector: mlx_lm.server startup timeout model=%s elapsed_ms=%d",
-            model_id, elapsed_ms,
-        )
-        span.set_attribute("server.action",  "timeout")
-        span.add_event("server_timeout", {"elapsed_ms": elapsed_ms})
-        return False
-def select_model_for_hermes(budget_pct: Optional[float] = None) -> Optional[LocalModelEndpoint]:
-    """Return the best available local endpoint for AIAgent, or None to use cloud."""
-    if budget_pct is None:
-        from agents.config import LLM_BUDGET_PCT
-        budget_pct = LLM_BUDGET_PCT
-    with _tracer.start_as_current_span("llm_selector.select_model") as span:
-        try:
-            result: Optional[LocalModelEndpoint] = None
-            if platform.system() != "Darwin":
-                span.set_attribute("llm.budget_pct", budget_pct)
-                span.set_attribute("llm.selected_model", "cloud_fallback")
-                span.set_attribute("llm.selected_runtime", "cloud")
-                span.set_attribute("llm.is_local", False)
-                return None
-            brand = _sysctl("machdep.cpu.brand_string") or ""
-            if not brand.startswith("Apple M"):
-                span.set_attribute("llm.budget_pct", budget_pct)
-                span.set_attribute("llm.selected_model", "cloud_fallback")
-                span.set_attribute("llm.selected_runtime", "cloud")
-                span.set_attribute("llm.is_local", False)
-                return None
-            servers = discover_running_servers()
-            if not servers:
-                log.debug("llm_selector: no external servers found — will compute budget")
-            for server in servers:
-                if server.runtime == "apple_fm":
-                    continue
-                _shutdown_managed_server()
-                log.info("llm_selector: using external server runtime=%s model=%s",
-                         server.runtime, server.best_model)
-                result = LocalModelEndpoint(
-                    model=server.best_model,
-                    base_url=server.base_url,
-                    api_key="local",
-                    runtime=server.runtime,
-                )
-                break
-            _reason = "cloud_fallback"
-            _headroom_gb = 0.0
-            _adj_headroom_gb = 0.0
-            _budget_gb = 0.0
-            _thermal = 0
-            _screen_locked_val = False
-            _effective_pct = budget_pct
-            if result is None:
-                try:
-                    snap = probe_compute()
-                except Exception as exc:
-                    log.warning("llm_selector: compute probe failed: %s", exc)
-                    _reason = "compute_probe_failed"
-                    span.set_attribute("llm.budget_pct",       budget_pct)
-                    span.set_attribute("llm.selected_model",   "cloud_fallback")
-                    span.set_attribute("llm.selected_runtime", "cloud")
-                    span.set_attribute("llm.is_local",         False)
-                    span.set_attribute("llm.reason",           _reason)
-                    return None
-                _headroom_gb      = snap.metal_headroom_gb
-                _thermal          = snap.thermal_level
-                _screen_locked_val = snap.screen_locked
-                # If a managed server is already running, its model weight is
-                # included in Metal's "used" accounting, which shrinks headroom.
-                # Add that weight back so the selection sees the true system-wide
-                # budget rather than headroom-minus-current-model.  Without this
-                # the selected model changes on every tick as headroom shifts,
-                # causing an oscillation loop (Qwen3.5 → phi-4 → gemma → …).
-                _adj_headroom_gb = _headroom_gb
-                if _MANAGED_SERVER_PID_FILE.exists():
-                    try:
-                        meta = json.loads(_MANAGED_SERVER_PID_FILE.read_text())
-                        os.kill(meta["pid"], 0)  # raises OSError if dead
-                        current_ram = next(
-                            (min_ram for _, _, min_ram, _, hf in _MODELS
-                             if hf == meta["model"]),
-                            0.0,
-                        )
-                        _adj_headroom_gb = _headroom_gb + current_ram
-                        log.info(
-                            "llm_selector: headroom adjusted %.1f→%.1f GB "
-                            "(managed model=%s uses %.1f GB)",
-                            _headroom_gb, _adj_headroom_gb,
-                            meta["model"], current_ram,
-                        )
-                    except (OSError, Exception):
-                        pass
-                _effective_pct = min(0.8, budget_pct * 1.5) if snap.screen_locked else budget_pct
-                _budget_gb = _adj_headroom_gb * _effective_pct
-                entry = _select_mlx_entry(_adj_headroom_gb, _effective_pct,
-                                          snap.thermal_level, apple_intelligence=False)
-                if entry is None:
-                    _reason = "no_model_fits"
-                    log.info(
-                        "llm_selector: no local model fits "
-                        "headroom=%.1f GB adj=%.1f GB budget=%.1f GB pct=%.2f → cloud fallback",
-                        _headroom_gb, _adj_headroom_gb, _budget_gb, _effective_pct,
-                    )
-                else:
-                    model_id, _, min_ram, quality, hf_id = entry
-                    log.info(
-                        "llm_selector: selected model=%s hf=%s min_ram=%.1f GB quality=%d "
-                        "headroom=%.1f GB adj=%.1f GB budget=%.1f GB pct=%.2f",
-                        model_id, hf_id, min_ram, quality,
-                        _headroom_gb, _adj_headroom_gb, _budget_gb, _effective_pct,
-                    )
-                    if _ensure_mlx_server(hf_id, _MANAGED_SERVER_PORT):
-                        _reason = "mlx_managed"
-                        result = LocalModelEndpoint(
-                            model=hf_id,
-                            base_url=f"http://127.0.0.1:{_MANAGED_SERVER_PORT}/v1",
-                            api_key="local",
-                            runtime="mlx_managed",
-                        )
-                    else:
-                        _reason = "mlx_server_failed"
-                        log.warning(
-                            "llm_selector: mlx_lm.server failed to start for model=%s — cloud fallback",
-                            hf_id,
-                        )
-            else:
-                _reason = result.runtime
-            _selected_model   = result.model   if result else "cloud_fallback"
-            _selected_runtime = result.runtime if result else "cloud"
-            _is_local         = result is not None
-            span.set_attribute("llm.budget_pct",         budget_pct)
-            span.set_attribute("llm.effective_pct",      round(_effective_pct, 3))
-            span.set_attribute("llm.headroom_gb",        round(_headroom_gb, 2))
-            span.set_attribute("llm.adj_headroom_gb",    round(_adj_headroom_gb, 2))
-            span.set_attribute("llm.budget_gb",          round(_budget_gb, 2))
-            span.set_attribute("llm.thermal_level",      _thermal)
-            span.set_attribute("llm.screen_locked",      _screen_locked_val)
-            span.set_attribute("llm.reason",             _reason)
-            span.set_attribute("llm.selected_model",     _selected_model)
-            span.set_attribute("llm.selected_runtime",   _selected_runtime)
-            span.set_attribute("llm.is_local",           _is_local)
-            log.info(
-                "llm_selector: decision reason=%s model=%s runtime=%s "
-                "budget_pct=%.2f headroom_gb=%.1f budget_gb=%.1f thermal=%d",
-                _reason, _selected_model, _selected_runtime,
-                budget_pct, _adj_headroom_gb, _budget_gb, _thermal,
-                extra={
-                    "llm_selector_reason":       _reason,
-                    "llm_selector_model":        _selected_model,
-                    "llm_selector_runtime":      _selected_runtime,
-                    "llm_selector_budget_pct":   budget_pct,
-                    "llm_selector_headroom_gb":  round(_adj_headroom_gb, 2),
-                    "llm_selector_budget_gb":    round(_budget_gb, 2),
-                    "llm_selector_thermal":      _thermal,
-                    "llm_selector_screen_locked": _screen_locked_val,
-                    "llm_selector_is_local":     _is_local,
-                },
-            )
-            return result
-        except Exception as exc:
-            span.record_exception(exc)
-            raise
 def _hf_model_cached(hf_id: "str | None") -> bool:
     """True when a HuggingFace repo's weights are already in the local cache.
@@ -874,7 +543,7 @@ def select_mlx_model_id(
 ) -> "str | None":
     """Pick the best **in-process** MLX model id for this machine.
-    Selection-only sibling of select_model_for_hermes(): returns a HuggingFace
+    Returns a HuggingFace
     repo id the caller loads directly via mlx_lm + outlines (FSM-constrained
     decoding). It deliberately does NOT discover external servers
     (Ollama / LM Studio / Apple Intelligence give no constrained decoding) and
@@ -1054,10 +723,6 @@ def discover_mlx_eval_server(port: int = 7823) -> "str | None":
 __all__ = ["local_infer", "discover_running_servers", "probe_compute",
-           "RunningServer", "ComputeSnapshot", "LocalModelEndpoint",
-           "select_model_for_hermes", "select_mlx_model_id",
-           "shutdown_managed_server",
+           "RunningServer", "ComputeSnapshot",
+           "select_mlx_model_id",
            "resolve_model", "discover_mlx_eval_server"]
-# Public alias (no underscore) for external callers
-shutdown_managed_server = _shutdown_managed_server

package/services/agents/observability.py CHANGED Viewed

@@ -198,7 +198,7 @@ def _configure_logging(agent_name: str) -> None:
     root = logging.getLogger()
     # Clear any pre-existing handlers — long-running daemons that import
-    # third-party libs (hermes, mcp) often leave a default basicConfig handler
+    # third-party libs (mcp, etc.) often leave a default basicConfig handler
     # behind that would duplicate every line.
     root.handlers.clear()
     root.addHandler(file_h)

package/services/agents/run_task_linker_mlx.py CHANGED Viewed

@@ -37,7 +37,6 @@ from opentelemetry.trace import StatusCode
 from pydantic import BaseModel, Field
 _SERVICES_DIR = Path(__file__).parent.parent
-os.environ.setdefault("HERMES_HOME", str(_SERVICES_DIR / ".hermes"))
 from agents import observability
 from agents._prompts import build_user_message