@meridiona/meridian-darwin-arm64 1.33.0 → 1.34.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -24,12 +24,29 @@
24
24
  # MERIDIAN_UI_PORT=3939
25
25
 
26
26
  # ---------------------------------------------------------------------------
27
- # Jira (all three required to enable the Jira connector)
27
+ # Jira choose ONE auth path:
28
+ #
29
+ # (A) Browser OAuth (recommended): just run `meridian oauth-login jira`.
30
+ # It opens your browser, you click Accept, and tokens land in
31
+ # ~/.meridian/oauth/jira.json (auto-refreshed). No env vars, no API
32
+ # token; the site is discovered automatically. Then `meridian restart`.
33
+ #
34
+ # (B) Static API token (legacy): set JIRA_BASE_URL + JIRA_EMAIL + JIRA_API_TOKEN.
35
+ #
36
+ # If both are present, OAuth wins. JIRA_PROJECT_KEYS applies to either.
28
37
  # ---------------------------------------------------------------------------
29
38
 
39
+ # (A) OAuth needs NO config — Meridian ships a public client id. The vars below are
40
+ # optional overrides (e.g. a self-hosted app or a non-default redirect port).
41
+ # JIRA_OAUTH_CLIENT_ID=your-atlassian-app-client-id # override the baked-in client id
42
+ # JIRA_OAUTH_REDIRECT_PORT=9123 # must match the app's registered redirect
43
+ # http://127.0.0.1:<port>/callback
44
+
45
+ # (B) Static API token
30
46
  # JIRA_BASE_URL=https://your-org.atlassian.net
31
47
  # JIRA_EMAIL=you@your-org.com
32
48
  # JIRA_API_TOKEN=your-api-token-here
49
+
33
50
  # JIRA_PROJECT_KEYS=KAN,ENG # optional — comma-separated; empty = all projects
34
51
 
35
52
  # ---------------------------------------------------------------------------
package/VERSION CHANGED
@@ -1 +1 @@
1
- 1.33.0
1
+ 1.34.1
package/bin/meridian CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meridiona/meridian-darwin-arm64",
3
- "version": "1.33.0",
3
+ "version": "1.34.1",
4
4
  "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
5
5
  "homepage": "https://github.com/Meridiona/meridian",
6
6
  "repository": {
@@ -84,6 +84,9 @@ collect_credentials() {
84
84
  echo " (edit later anytime: meridian config edit)" >&2
85
85
  echo >&2
86
86
  if prompt_category "Jira"; then
87
+ info "Easiest: skip the token prompts below and, after install, run"
88
+ info " meridian oauth-login jira — connect in your browser, no API token."
89
+ info "Or fill these in for the legacy API-token path:"
87
90
  prompt_env_var "JIRA_BASE_URL" "Jira URL (e.g. https://your-org.atlassian.net)" 0 "$env_file"
88
91
  # The Python side reads JIRA_URL, the Rust side JIRA_BASE_URL — keep both in sync.
89
92
  local jira_url; jira_url="$(get_env_value JIRA_BASE_URL "$env_file")"
@@ -40,21 +40,37 @@ fi
40
40
  # attaches to a stable binary named `screenpipe` (and survives reinstalls of the
41
41
  # same version, since its path is fixed). Falls back to whatever `command -v`
42
42
  # found when screenpipe is a native binary (Homebrew) rather than the npm shim.
43
- SCREENPIPE_BIN="$(command -v screenpipe)" || true
44
- if [[ -z "${SCREENPIPE_BIN}" ]]; then
45
- echo "✗ screenpipe binary not found in PATH install with: npm install -g screenpipe" >&2
46
- exit 1
47
- fi
48
- _npm_root="$(npm root -g 2>/dev/null || true)"
49
- if [[ -n "${_npm_root}" && -d "${_npm_root}/screenpipe" ]]; then
50
- _real=""
51
- while IFS= read -r _cand; do
52
- if file "${_cand}" 2>/dev/null | grep -q "Mach-O"; then _real="${_cand}"; break; fi
53
- done < <(find "${_npm_root}/screenpipe" -type f -name screenpipe -perm +0111 2>/dev/null)
54
- if [[ -n "${_real}" ]]; then
55
- SCREENPIPE_BIN="${_real}"
56
- echo " using the real screenpipe binary (not the node wrapper): ${SCREENPIPE_BIN}"
43
+ STAGED_BIN="${HOME}/.meridian/bin/screenpipe"
44
+
45
+ # Prefer the already-staged stable binary (written by install-from-bundle.sh).
46
+ # On a standalone re-run of this script (e.g. `meridian repair`) resolve the
47
+ # real Mach-O from the npm tree and stage it so the launchd plist is immune to
48
+ # nvm version changes — the npm shim path under ~/.nvm is version-specific and
49
+ # breaks silently when the user runs `nvm use` or upgrades Node.
50
+ if [[ -x "${STAGED_BIN}" ]] && file "${STAGED_BIN}" 2>/dev/null | grep -q "Mach-O"; then
51
+ SCREENPIPE_BIN="${STAGED_BIN}"
52
+ echo " using staged screenpipe binary: ${SCREENPIPE_BIN}"
53
+ else
54
+ SCREENPIPE_BIN="$(command -v screenpipe 2>/dev/null || true)"
55
+ if [[ -z "${SCREENPIPE_BIN}" ]]; then
56
+ echo " screenpipe not found in PATH install with: npm install -g screenpipe" >&2
57
+ exit 1
58
+ fi
59
+ _npm_root="$(npm root -g 2>/dev/null || true)"
60
+ if [[ -n "${_npm_root}" && -d "${_npm_root}/screenpipe" ]]; then
61
+ _real=""
62
+ while IFS= read -r _cand; do
63
+ if file "${_cand}" 2>/dev/null | grep -q "Mach-O"; then _real="${_cand}"; break; fi
64
+ done < <(find "${_npm_root}/screenpipe" -type f -name screenpipe -perm +0111 2>/dev/null)
65
+ if [[ -n "${_real}" ]]; then
66
+ SCREENPIPE_BIN="${_real}"
67
+ fi
57
68
  fi
69
+ mkdir -p "${HOME}/.meridian/bin"
70
+ cp "${SCREENPIPE_BIN}" "${STAGED_BIN}"
71
+ chmod +x "${STAGED_BIN}"
72
+ SCREENPIPE_BIN="${STAGED_BIN}"
73
+ echo "→ staged screenpipe binary: ${SCREENPIPE_BIN}"
58
74
  fi
59
75
 
60
76
  mkdir -p "${HOME}/.meridian/logs"
@@ -315,8 +315,8 @@ cmd_smoke() {
315
315
  mlx_port="$(_smoke_read_env MLX_SERVER_PORT)"
316
316
  mlx_port="${mlx_port:-7823}"
317
317
  local base="http://127.0.0.1:${mlx_port}"
318
- local classify_timeout=60
319
- [[ $classify_only -eq 1 ]] && classify_timeout=30
318
+ local classify_timeout=180
319
+ [[ $classify_only -eq 1 ]] && classify_timeout=180
320
320
  local all_ok=1
321
321
 
322
322
  if [[ -t 1 ]]; then
@@ -1 +1 @@
1
- """Meridian agents — AI-powered session task classification via hermes."""
1
+ """Meridian agents — AI-powered session task classification."""
@@ -21,15 +21,6 @@ _ENV_FILE = PROJECT_ROOT / ".env"
21
21
  if _ENV_FILE.exists():
22
22
  load_dotenv(_ENV_FILE, override=False)
23
23
 
24
- # ── Hermes (AIAgent library) ──────────────────────────────────────────────────
25
- HERMES_HOME = Path(os.environ.get("HERMES_HOME", str(REPO_ROOT / ".hermes")))
26
-
27
- # Directories searched for skill files (SKILL.md, SKILL-*.md).
28
- SKILLS_SEARCH_PATHS: list[Path] = [
29
- REPO_ROOT / "skills" / "activity",
30
- HERMES_HOME / "skills",
31
- ]
32
-
33
24
  # ── LLM ───────────────────────────────────────────────────────────────────────
34
25
  MODEL = os.environ.get("OLLAMA_MODEL")
35
26
  BASE_URL = os.environ.get("OLLAMA_HOST")
@@ -43,7 +34,7 @@ if not API_KEY:
43
34
  )
44
35
 
45
36
  # Local model selection — Apple Silicon only.
46
- # LLM_PREFER_LOCAL=1 tries a local model before the cloud AIAgent path.
37
+ # LLM_PREFER_LOCAL=1 tries a local model before the cloud path.
47
38
  # LLM_BUDGET_PCT controls the fraction of available Metal headroom to allocate
48
39
  # (0.5 = 50% of free GPU memory). Set to 0 or LLM_PREFER_LOCAL=0 to disable.
49
40
 
@@ -58,10 +49,6 @@ def _env_bool(name: str, default: bool) -> bool:
58
49
  LLM_PREFER_LOCAL = _env_bool("LLM_PREFER_LOCAL", True)
59
50
  LLM_BUDGET_PCT = float(os.environ.get("LLM_BUDGET_PCT", "0.5"))
60
51
 
61
- # When true, _hermes_setup.ensure_hermes_importable() prepends services/.hermes/
62
- # to sys.path so the local source checkout shadows the installed hermes-agent package.
63
- HERMES_DEV_MODE = os.environ.get("HERMES_DEV_MODE", "0") == "1"
64
-
65
52
  # ── DB / runtime paths ────────────────────────────────────────────────────────
66
53
  MERIDIAN_HOME = Path(os.environ.get("MERIDIAN_HOME", str(Path.home() / ".meridian")))
67
54
  MERIDIAN_DB = Path(os.environ.get("MERIDIAN_DB", str(MERIDIAN_HOME / "meridian.db")))
@@ -28,7 +28,6 @@ import logging
28
28
  import os
29
29
  import platform
30
30
  import re
31
- import signal
32
31
  import socket
33
32
  import subprocess
34
33
  import sys
@@ -263,17 +262,6 @@ class ComputeSnapshot:
263
262
  mem_bw_gbs: int
264
263
 
265
264
 
266
- @dataclass
267
- class LocalModelEndpoint:
268
- model: str # model name to pass to AIAgent
269
- base_url: str # OpenAI-compatible base URL
270
- api_key: str # typically "local"
271
- runtime: str # "ollama" | "lmstudio" | "llamacpp" | "mlxlm" | "mlx_managed"
272
-
273
-
274
- _MANAGED_SERVER_PORT = 8765
275
- _MANAGED_SERVER_PID_FILE = Path.home() / ".meridian" / "mlx_lm_server.pid"
276
-
277
265
  # Sentinel returned by select_mlx_model_id() when Apple Intelligence is chosen.
278
266
  APPLE_INTELLIGENCE_ID = "apple-intelligence"
279
267
 
@@ -438,7 +426,7 @@ def local_infer(system_prompt: str, user_message: str,
438
426
  """Run inference on the best available local model.
439
427
 
440
428
  Returns the model's text response, or None if nothing is available
441
- (caller falls back to the cloud AIAgent path).
429
+ (caller falls back to the cloud path).
442
430
 
443
431
  Priority:
444
432
  1. Already-running server with a model in memory (zero load cost)
@@ -521,325 +509,6 @@ def _infer_mlx(model_id: str, system: str, user: str, max_tokens: int) -> Option
521
509
  return None
522
510
 
523
511
 
524
- def _shutdown_managed_server() -> None:
525
- """Kill the managed mlx_lm.server if it is running and remove the PID file."""
526
- pid_file = _MANAGED_SERVER_PID_FILE
527
- if not pid_file.exists():
528
- return
529
- try:
530
- meta = json.loads(pid_file.read_text())
531
- pid = meta["pid"]
532
- try:
533
- os.kill(pid, 0)
534
- os.kill(pid, signal.SIGTERM)
535
- log.info("llm_selector: unloaded managed mlx_lm.server pid=%d model=%s",
536
- pid, meta.get("model", "?"))
537
- except OSError:
538
- pass
539
- except Exception:
540
- pass
541
- pid_file.unlink(missing_ok=True)
542
-
543
-
544
- def _wait_for_process_exit(pid: int, timeout: float = 10.0) -> None:
545
- """Wait for a process to exit; SIGKILL after timeout."""
546
- deadline = time.monotonic() + timeout
547
- while time.monotonic() < deadline:
548
- try:
549
- os.kill(pid, 0)
550
- except OSError:
551
- return # dead
552
- time.sleep(0.3)
553
- try:
554
- os.kill(pid, signal.SIGKILL)
555
- except OSError:
556
- pass
557
- time.sleep(0.5)
558
-
559
-
560
- def _wait_for_port_free(port: int, timeout: float = 5.0) -> None:
561
- """Wait until a local TCP port stops accepting connections."""
562
- deadline = time.monotonic() + timeout
563
- while time.monotonic() < deadline:
564
- if not _tcp_open("127.0.0.1", port, timeout=0.3):
565
- return
566
- time.sleep(0.3)
567
-
568
-
569
- def _ensure_mlx_server(model_id: str, port: int = _MANAGED_SERVER_PORT) -> bool:
570
- with _tracer.start_as_current_span("llm_selector.ensure_server") as span:
571
- span.set_attribute("server.model", model_id)
572
- span.set_attribute("server.port", port)
573
- t0 = time.monotonic()
574
-
575
- pid_file = _MANAGED_SERVER_PID_FILE
576
- if pid_file.exists():
577
- try:
578
- meta = json.loads(pid_file.read_text())
579
- pid, existing_model, existing_port = meta["pid"], meta["model"], meta["port"]
580
- try:
581
- os.kill(pid, 0)
582
- alive = True
583
- except OSError:
584
- alive = False
585
-
586
- if alive and existing_model == model_id and existing_port == port:
587
- log.info(
588
- "llm_selector: managed server already running model=%s pid=%d port=%d",
589
- model_id, pid, port,
590
- )
591
- span.set_attribute("server.action", "reused")
592
- span.set_attribute("server.pid", pid)
593
- span.add_event("server_reused", {"pid": pid, "model": model_id})
594
- return True
595
-
596
- if alive:
597
- log.info(
598
- "llm_selector: model switch %s → %s — stopping pid=%d",
599
- existing_model, model_id, pid,
600
- )
601
- span.set_attribute("server.previous_model", existing_model)
602
- span.add_event("model_switch", {
603
- "from_model": existing_model,
604
- "to_model": model_id,
605
- "pid": pid,
606
- })
607
- os.kill(pid, signal.SIGTERM)
608
- _wait_for_process_exit(pid)
609
- _wait_for_port_free(port)
610
- stop_ms = int((time.monotonic() - t0) * 1000)
611
- log.info(
612
- "llm_selector: stopped old managed server pid=%d model=%s elapsed_ms=%d",
613
- pid, existing_model, stop_ms,
614
- )
615
- span.add_event("old_server_stopped", {"elapsed_ms": stop_ms})
616
- else:
617
- log.debug("llm_selector: stale pid file (pid=%d dead) — starting fresh", pid)
618
- span.add_event("stale_pid_file", {"pid": pid})
619
- except Exception:
620
- pass
621
-
622
- proc = subprocess.Popen(
623
- [sys.executable, "-m", "mlx_lm.server",
624
- "--model", model_id, "--port", str(port), "--max-tokens", "4096"],
625
- stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
626
- start_new_session=True,
627
- )
628
- pid_file.parent.mkdir(parents=True, exist_ok=True)
629
- pid_file.write_text(json.dumps({"pid": proc.pid, "model": model_id, "port": port}))
630
- log.info(
631
- "llm_selector: started mlx_lm.server model=%s pid=%d port=%d — waiting for ready",
632
- model_id, proc.pid, port,
633
- )
634
- span.set_attribute("server.action", "started")
635
- span.set_attribute("server.pid", proc.pid)
636
- span.add_event("server_started", {"pid": proc.pid, "model": model_id})
637
-
638
- url = f"http://127.0.0.1:{port}/v1/models"
639
- deadline = time.monotonic() + 90.0
640
- while time.monotonic() < deadline:
641
- if proc.poll() is not None:
642
- elapsed_ms = int((time.monotonic() - t0) * 1000)
643
- log.warning(
644
- "llm_selector: mlx_lm.server exited early exit=%d model=%s elapsed_ms=%d"
645
- " — is mlx_lm installed?",
646
- proc.returncode, model_id, elapsed_ms,
647
- )
648
- span.set_attribute("server.action", "failed")
649
- span.set_attribute("server.exit_code", proc.returncode)
650
- span.add_event("server_exited_early", {"exit_code": proc.returncode})
651
- pid_file.unlink(missing_ok=True)
652
- return False
653
- _, status = _get_json(url, timeout=1.0)
654
- if status == 200:
655
- elapsed_ms = int((time.monotonic() - t0) * 1000)
656
- log.info(
657
- "llm_selector: mlx_lm.server ready model=%s port=%d startup_ms=%d",
658
- model_id, port, elapsed_ms,
659
- )
660
- span.set_attribute("server.startup_ms", elapsed_ms)
661
- span.add_event("server_ready", {"startup_ms": elapsed_ms})
662
- return True
663
- time.sleep(1)
664
-
665
- elapsed_ms = int((time.monotonic() - t0) * 1000)
666
- log.warning(
667
- "llm_selector: mlx_lm.server startup timeout model=%s elapsed_ms=%d",
668
- model_id, elapsed_ms,
669
- )
670
- span.set_attribute("server.action", "timeout")
671
- span.add_event("server_timeout", {"elapsed_ms": elapsed_ms})
672
- return False
673
-
674
-
675
- def select_model_for_hermes(budget_pct: Optional[float] = None) -> Optional[LocalModelEndpoint]:
676
- """Return the best available local endpoint for AIAgent, or None to use cloud."""
677
- if budget_pct is None:
678
- from agents.config import LLM_BUDGET_PCT
679
- budget_pct = LLM_BUDGET_PCT
680
- with _tracer.start_as_current_span("llm_selector.select_model") as span:
681
- try:
682
- result: Optional[LocalModelEndpoint] = None
683
-
684
- if platform.system() != "Darwin":
685
- span.set_attribute("llm.budget_pct", budget_pct)
686
- span.set_attribute("llm.selected_model", "cloud_fallback")
687
- span.set_attribute("llm.selected_runtime", "cloud")
688
- span.set_attribute("llm.is_local", False)
689
- return None
690
- brand = _sysctl("machdep.cpu.brand_string") or ""
691
- if not brand.startswith("Apple M"):
692
- span.set_attribute("llm.budget_pct", budget_pct)
693
- span.set_attribute("llm.selected_model", "cloud_fallback")
694
- span.set_attribute("llm.selected_runtime", "cloud")
695
- span.set_attribute("llm.is_local", False)
696
- return None
697
-
698
- servers = discover_running_servers()
699
- if not servers:
700
- log.debug("llm_selector: no external servers found — will compute budget")
701
-
702
- for server in servers:
703
- if server.runtime == "apple_fm":
704
- continue
705
- _shutdown_managed_server()
706
- log.info("llm_selector: using external server runtime=%s model=%s",
707
- server.runtime, server.best_model)
708
- result = LocalModelEndpoint(
709
- model=server.best_model,
710
- base_url=server.base_url,
711
- api_key="local",
712
- runtime=server.runtime,
713
- )
714
- break
715
-
716
- _reason = "cloud_fallback"
717
- _headroom_gb = 0.0
718
- _adj_headroom_gb = 0.0
719
- _budget_gb = 0.0
720
- _thermal = 0
721
- _screen_locked_val = False
722
- _effective_pct = budget_pct
723
-
724
- if result is None:
725
- try:
726
- snap = probe_compute()
727
- except Exception as exc:
728
- log.warning("llm_selector: compute probe failed: %s", exc)
729
- _reason = "compute_probe_failed"
730
- span.set_attribute("llm.budget_pct", budget_pct)
731
- span.set_attribute("llm.selected_model", "cloud_fallback")
732
- span.set_attribute("llm.selected_runtime", "cloud")
733
- span.set_attribute("llm.is_local", False)
734
- span.set_attribute("llm.reason", _reason)
735
- return None
736
-
737
- _headroom_gb = snap.metal_headroom_gb
738
- _thermal = snap.thermal_level
739
- _screen_locked_val = snap.screen_locked
740
-
741
- # If a managed server is already running, its model weight is
742
- # included in Metal's "used" accounting, which shrinks headroom.
743
- # Add that weight back so the selection sees the true system-wide
744
- # budget rather than headroom-minus-current-model. Without this
745
- # the selected model changes on every tick as headroom shifts,
746
- # causing an oscillation loop (Qwen3.5 → phi-4 → gemma → …).
747
- _adj_headroom_gb = _headroom_gb
748
- if _MANAGED_SERVER_PID_FILE.exists():
749
- try:
750
- meta = json.loads(_MANAGED_SERVER_PID_FILE.read_text())
751
- os.kill(meta["pid"], 0) # raises OSError if dead
752
- current_ram = next(
753
- (min_ram for _, _, min_ram, _, hf in _MODELS
754
- if hf == meta["model"]),
755
- 0.0,
756
- )
757
- _adj_headroom_gb = _headroom_gb + current_ram
758
- log.info(
759
- "llm_selector: headroom adjusted %.1f→%.1f GB "
760
- "(managed model=%s uses %.1f GB)",
761
- _headroom_gb, _adj_headroom_gb,
762
- meta["model"], current_ram,
763
- )
764
- except (OSError, Exception):
765
- pass
766
-
767
- _effective_pct = min(0.8, budget_pct * 1.5) if snap.screen_locked else budget_pct
768
- _budget_gb = _adj_headroom_gb * _effective_pct
769
-
770
- entry = _select_mlx_entry(_adj_headroom_gb, _effective_pct,
771
- snap.thermal_level, apple_intelligence=False)
772
- if entry is None:
773
- _reason = "no_model_fits"
774
- log.info(
775
- "llm_selector: no local model fits "
776
- "headroom=%.1f GB adj=%.1f GB budget=%.1f GB pct=%.2f → cloud fallback",
777
- _headroom_gb, _adj_headroom_gb, _budget_gb, _effective_pct,
778
- )
779
- else:
780
- model_id, _, min_ram, quality, hf_id = entry
781
- log.info(
782
- "llm_selector: selected model=%s hf=%s min_ram=%.1f GB quality=%d "
783
- "headroom=%.1f GB adj=%.1f GB budget=%.1f GB pct=%.2f",
784
- model_id, hf_id, min_ram, quality,
785
- _headroom_gb, _adj_headroom_gb, _budget_gb, _effective_pct,
786
- )
787
- if _ensure_mlx_server(hf_id, _MANAGED_SERVER_PORT):
788
- _reason = "mlx_managed"
789
- result = LocalModelEndpoint(
790
- model=hf_id,
791
- base_url=f"http://127.0.0.1:{_MANAGED_SERVER_PORT}/v1",
792
- api_key="local",
793
- runtime="mlx_managed",
794
- )
795
- else:
796
- _reason = "mlx_server_failed"
797
- log.warning(
798
- "llm_selector: mlx_lm.server failed to start for model=%s — cloud fallback",
799
- hf_id,
800
- )
801
- else:
802
- _reason = result.runtime
803
-
804
- _selected_model = result.model if result else "cloud_fallback"
805
- _selected_runtime = result.runtime if result else "cloud"
806
- _is_local = result is not None
807
-
808
- span.set_attribute("llm.budget_pct", budget_pct)
809
- span.set_attribute("llm.effective_pct", round(_effective_pct, 3))
810
- span.set_attribute("llm.headroom_gb", round(_headroom_gb, 2))
811
- span.set_attribute("llm.adj_headroom_gb", round(_adj_headroom_gb, 2))
812
- span.set_attribute("llm.budget_gb", round(_budget_gb, 2))
813
- span.set_attribute("llm.thermal_level", _thermal)
814
- span.set_attribute("llm.screen_locked", _screen_locked_val)
815
- span.set_attribute("llm.reason", _reason)
816
- span.set_attribute("llm.selected_model", _selected_model)
817
- span.set_attribute("llm.selected_runtime", _selected_runtime)
818
- span.set_attribute("llm.is_local", _is_local)
819
-
820
- log.info(
821
- "llm_selector: decision reason=%s model=%s runtime=%s "
822
- "budget_pct=%.2f headroom_gb=%.1f budget_gb=%.1f thermal=%d",
823
- _reason, _selected_model, _selected_runtime,
824
- budget_pct, _adj_headroom_gb, _budget_gb, _thermal,
825
- extra={
826
- "llm_selector_reason": _reason,
827
- "llm_selector_model": _selected_model,
828
- "llm_selector_runtime": _selected_runtime,
829
- "llm_selector_budget_pct": budget_pct,
830
- "llm_selector_headroom_gb": round(_adj_headroom_gb, 2),
831
- "llm_selector_budget_gb": round(_budget_gb, 2),
832
- "llm_selector_thermal": _thermal,
833
- "llm_selector_screen_locked": _screen_locked_val,
834
- "llm_selector_is_local": _is_local,
835
- },
836
- )
837
- return result
838
- except Exception as exc:
839
- span.record_exception(exc)
840
- raise
841
-
842
-
843
512
  def _hf_model_cached(hf_id: "str | None") -> bool:
844
513
  """True when a HuggingFace repo's weights are already in the local cache.
845
514
 
@@ -874,7 +543,7 @@ def select_mlx_model_id(
874
543
  ) -> "str | None":
875
544
  """Pick the best **in-process** MLX model id for this machine.
876
545
 
877
- Selection-only sibling of select_model_for_hermes(): returns a HuggingFace
546
+ Returns a HuggingFace
878
547
  repo id the caller loads directly via mlx_lm + outlines (FSM-constrained
879
548
  decoding). It deliberately does NOT discover external servers
880
549
  (Ollama / LM Studio / Apple Intelligence give no constrained decoding) and
@@ -1054,10 +723,6 @@ def discover_mlx_eval_server(port: int = 7823) -> "str | None":
1054
723
 
1055
724
 
1056
725
  __all__ = ["local_infer", "discover_running_servers", "probe_compute",
1057
- "RunningServer", "ComputeSnapshot", "LocalModelEndpoint",
1058
- "select_model_for_hermes", "select_mlx_model_id",
1059
- "shutdown_managed_server",
726
+ "RunningServer", "ComputeSnapshot",
727
+ "select_mlx_model_id",
1060
728
  "resolve_model", "discover_mlx_eval_server"]
1061
-
1062
- # Public alias (no underscore) for external callers
1063
- shutdown_managed_server = _shutdown_managed_server
@@ -198,7 +198,7 @@ def _configure_logging(agent_name: str) -> None:
198
198
 
199
199
  root = logging.getLogger()
200
200
  # Clear any pre-existing handlers — long-running daemons that import
201
- # third-party libs (hermes, mcp) often leave a default basicConfig handler
201
+ # third-party libs (mcp, etc.) often leave a default basicConfig handler
202
202
  # behind that would duplicate every line.
203
203
  root.handlers.clear()
204
204
  root.addHandler(file_h)
@@ -37,7 +37,6 @@ from opentelemetry.trace import StatusCode
37
37
  from pydantic import BaseModel, Field
38
38
 
39
39
  _SERVICES_DIR = Path(__file__).parent.parent
40
- os.environ.setdefault("HERMES_HOME", str(_SERVICES_DIR / ".hermes"))
41
40
 
42
41
  from agents import observability
43
42
  from agents._prompts import build_user_message