@meridiona/meridian-darwin-arm64 1.31.1 → 1.31.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 1.31.1
1
+ 1.31.3
package/bin/meridian CHANGED
Binary file
package/bin/meridian-tray CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meridiona/meridian-darwin-arm64",
3
- "version": "1.31.1",
3
+ "version": "1.31.3",
4
4
  "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
5
5
  "homepage": "https://github.com/Meridiona/meridian",
6
6
  "repository": {
@@ -6,14 +6,21 @@ set -euo pipefail
6
6
  IFS=$'\n\t'
7
7
 
8
8
  REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
9
- TRAY_BIN="${REPO_ROOT}/target/release/meridian-tray"
10
- PLIST="${HOME}/Library/LaunchAgents/com.meridiona.tray.plist"
11
-
12
- if [[ ! -x "${TRAY_BIN}" ]]; then
13
- echo "meridian-tray binary not found at ${TRAY_BIN}" >&2
14
- echo " Build it first: cd tray && npm run build" >&2
9
+ # Support both dev mode (target/release) and bundle mode (bin/)
10
+ TRAY_BIN=""
11
+ if [[ -x "${REPO_ROOT}/target/release/meridian-tray" ]]; then
12
+ # Dev mode: built locally to target/release
13
+ TRAY_BIN="${REPO_ROOT}/target/release/meridian-tray"
14
+ elif [[ -x "${REPO_ROOT}/bin/meridian-tray" ]]; then
15
+ # Bundle mode: installed to ~/.meridian/app/bin
16
+ TRAY_BIN="${REPO_ROOT}/bin/meridian-tray"
17
+ else
18
+ echo "✗ meridian-tray binary not found" >&2
19
+ echo " Dev: build it with: cd tray && npm run tauri build" >&2
20
+ echo " Bundle: already included via: meridian update" >&2
15
21
  exit 1
16
22
  fi
23
+ PLIST="${HOME}/Library/LaunchAgents/com.meridiona.tray.plist"
17
24
 
18
25
  mkdir -p "$(dirname "${PLIST}")"
19
26
 
@@ -252,6 +252,29 @@ Example: 28 GB headroom, `LLM_BUDGET_PCT=0.5` → budget = 14 GB → **phi-4** (
252
252
 
253
253
  When the screen is locked the selector uses `min(0.8, budget_pct × 1.5)` as the effective budget, allowing a larger model to load while the machine is idle.
254
254
 
255
+ ### In-process MLX model selection (`select_mlx_model_id`)
256
+
257
+ `select_mlx_model_id` is called by the MLX server (`server.py`) at startup to pick which model to load directly into the process (via mlx_lm + outlines). It uses the same catalog but applies a three-stage priority:
258
+
259
+ 1. **Preferred fits** — return the caller-supplied `preferred_hf_id` if `preferred_min_ram_gb ≤ budget`. This keeps the eval-tuned classifier model on capable machines.
260
+ 2. **Largest cached model fits** — if the preferred is too large, return the largest catalog model whose files are already in the HF cache and whose `min_ram_gb ≤ budget`. Avoids surprising multi-GB downloads on constrained machines.
261
+ 3. **Largest catalog model that fits (may download)** — if nothing cached fits, return the largest catalog entry where `min_ram_gb ≤ budget`, regardless of cache. This triggers a one-time download of the best available model rather than loading an oversized one that exceeds available memory. Falls back to `preferred_hf_id` only when **no catalog model fits** at all (budget so low even the 1.8 GB model won't load).
262
+
263
+ **Why stage 3 matters on low-RAM machines:** an M1 Air (8 GB) has Metal headroom ≈ 5.4 GB. At `LLM_BUDGET_PCT=0.5` the budget is ~2.7 GB. The default preferred model is 6.5 GB (`Qwen3.5-9B-OptiQ-4bit`). Without the fix, stage 3 returned the preferred unconditionally — the server then attempted to load a 6.5 GB model into a 2.7 GB budget, causing memory pressure or an outright load failure. With the fix, stage 3 selects `Qwen3.5-4B-MLX-4bit` (2.5 GB) or `Llama-3.2-3B-Instruct-4bit` (1.8 GB) — whichever is largest and fits — and downloads it on first use.
264
+
265
+ **Check what would be selected:**
266
+
267
+ ```bash
268
+ cd services
269
+ .venv/bin/python -c "
270
+ from agents.llm_selector import select_mlx_model_id, probe_compute
271
+ snap = probe_compute()
272
+ print(f'Headroom: {snap.metal_headroom_gb:.1f} GB thermal: {snap.thermal_level}')
273
+ model = select_mlx_model_id('mlx-community/Qwen3.5-9B-OptiQ-4bit', 6.5, 0.5)
274
+ print(f'Would load: {model}')
275
+ "
276
+ ```
277
+
255
278
  ### Persistent MLX server
256
279
 
257
280
  `_ensure_mlx_server()` manages a subprocess tracked in `~/.meridian/mlx_lm_server.pid` (JSON: pid, model, port). The model loads once and persists between `run_task_linker.py` invocations (which are fresh subprocesses each tick). If the budget changes and a different model is selected, the old server is killed and the new model loads automatically.
@@ -991,13 +991,29 @@ def select_mlx_model_id(
991
991
  )
992
992
  return hf_id
993
993
 
994
- # 3. Nothing cached fits and Apple Intelligence is unavailable (macOS < 26)
995
- # best effort with the preferred id. (This preserves the pre-existing
996
- # single-model behaviour on older macOS; the load may trigger a download.)
997
- span.set_attribute("llm.reason", "nothing_cached_fits_use_preferred")
994
+ # 3. Nothing cached fits and Apple Intelligence is unavailable (macOS < 26).
995
+ # Pick the largest catalog model that fits the budget, ignoring the cache
996
+ # (it will trigger a one-time download). This prevents returning an
997
+ # oversized preferred model (e.g. 6.5 GB Qwen3.5-9B on an 8 GB machine
998
+ # whose Metal budget is ~2.7 GB) that would exceed available memory.
999
+ # Only fall back to preferred_hf_id when nothing in the catalog fits.
1000
+ entry = _select_mlx_entry(snap.metal_headroom_gb, effective_pct,
1001
+ snap.thermal_level, apple_intelligence)
1002
+ if entry is not None:
1003
+ _, _, min_ram, _, hf_id = entry
1004
+ span.set_attribute("llm.reason", "catalog_fit_uncached")
1005
+ span.set_attribute("llm.selected_model", hf_id or "")
1006
+ log.warning(
1007
+ "llm_selector: no cached MLX model fits budget=%.1f GB — "
1008
+ "selecting %s (min_ram=%.1f GB fits; will download)",
1009
+ budget, hf_id, min_ram,
1010
+ )
1011
+ return hf_id
1012
+ span.set_attribute("llm.reason", "nothing_fits_use_preferred")
998
1013
  span.set_attribute("llm.selected_model", preferred_hf_id or "")
999
1014
  log.warning(
1000
- "llm_selector: no cached MLX model fits budget=%.1f GB — falling back to %s",
1015
+ "llm_selector: no catalog model fits budget=%.1f GB — "
1016
+ "last-resort fallback to preferred %s",
1001
1017
  budget, preferred_hf_id,
1002
1018
  )
1003
1019
  return preferred_hf_id
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "meridian-agents"
7
- version = "1.31.1"
7
+ version = "1.31.3"
8
8
  description = "Meridian agents — hermes task linking and Jira progress updates for meridian.db"
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "Meridiona" }]
@@ -0,0 +1,223 @@
1
+ """Tests for select_mlx_model_id in llm_selector.py.
2
+
3
+ Covers the three-stage selection logic with focus on the low-RAM stage 3 fix:
4
+ previously the function returned the oversized preferred model unconditionally
5
+ when nothing cached fit; it now picks the largest catalog model that fits.
6
+ """
7
+ import sys
8
+ from pathlib import Path
9
+ from unittest.mock import MagicMock, patch
10
+
11
+ sys.path.insert(0, str(Path(__file__).parent.parent))
12
+
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Helpers
16
+ # ---------------------------------------------------------------------------
17
+
18
+ def _make_snap(headroom_gb: float, thermal_level: int = 0,
19
+ screen_locked: bool = False) -> MagicMock:
20
+ snap = MagicMock()
21
+ snap.metal_headroom_gb = headroom_gb
22
+ snap.thermal_level = thermal_level
23
+ snap.screen_locked = screen_locked
24
+ return snap
25
+
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Stage 1 — preferred fits
29
+ # ---------------------------------------------------------------------------
30
+
31
+ class TestStage1PreferredFits:
32
+ def test_returns_preferred_when_budget_allows(self):
33
+ """Stage 1: preferred model returned unchanged when budget covers it.
34
+
35
+ 14.0 GB headroom × 0.5 = 7.0 GB budget ≥ 6.5 GB min_ram → stage 1 fires.
36
+ """
37
+ snap = _make_snap(headroom_gb=14.0)
38
+
39
+ with (
40
+ patch("agents.llm_selector.platform") as mock_platform,
41
+ patch("agents.llm_selector._sysctl", return_value="Apple M2 Pro"),
42
+ patch("agents.llm_selector._apple_intelligence_available", return_value=False),
43
+ patch("agents.llm_selector.probe_compute", return_value=snap),
44
+ ):
45
+ mock_platform.system.return_value = "Darwin"
46
+ from agents.llm_selector import select_mlx_model_id
47
+ result = select_mlx_model_id(
48
+ preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
49
+ preferred_min_ram_gb=6.5,
50
+ budget_pct=0.5,
51
+ )
52
+
53
+ assert result == "mlx-community/Qwen3.5-9B-OptiQ-4bit"
54
+
55
+ def test_returns_preferred_on_large_machine(self):
56
+ """Stage 1: 64 GB machine, preferred (6.5 GB) trivially fits."""
57
+ snap = _make_snap(headroom_gb=50.0)
58
+
59
+ with (
60
+ patch("agents.llm_selector.platform") as mock_platform,
61
+ patch("agents.llm_selector._sysctl", return_value="Apple M2 Ultra"),
62
+ patch("agents.llm_selector._apple_intelligence_available", return_value=False),
63
+ patch("agents.llm_selector.probe_compute", return_value=snap),
64
+ ):
65
+ mock_platform.system.return_value = "Darwin"
66
+ from agents.llm_selector import select_mlx_model_id
67
+ result = select_mlx_model_id(
68
+ preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
69
+ preferred_min_ram_gb=6.5,
70
+ budget_pct=0.5,
71
+ )
72
+
73
+ # 50.0 × 0.5 = 25 GB budget; 6.5 ≤ 25 → stage 1 fires
74
+ assert result == "mlx-community/Qwen3.5-9B-OptiQ-4bit"
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # Stage 2 — catalog cached fit
79
+ # ---------------------------------------------------------------------------
80
+
81
+ class TestStage2CachedFit:
82
+ def test_returns_largest_cached_model_when_preferred_too_big(self):
83
+ """Stage 2: preferred doesn't fit but a smaller cached model does."""
84
+ snap = _make_snap(headroom_gb=5.5) # budget = 5.5 × 0.5 = 2.75 GB
85
+
86
+ def cached_side_effect(hf_id):
87
+ # Only qwen3.5-4b (2.5 GB) is cached
88
+ return hf_id == "mlx-community/Qwen3.5-4B-MLX-4bit"
89
+
90
+ with (
91
+ patch("agents.llm_selector.platform") as mock_platform,
92
+ patch("agents.llm_selector._sysctl", return_value="Apple M1 Air"),
93
+ patch("agents.llm_selector._apple_intelligence_available", return_value=False),
94
+ patch("agents.llm_selector.probe_compute", return_value=snap),
95
+ patch("agents.llm_selector._hf_model_cached",
96
+ side_effect=cached_side_effect),
97
+ ):
98
+ mock_platform.system.return_value = "Darwin"
99
+ from agents.llm_selector import select_mlx_model_id
100
+ result = select_mlx_model_id(
101
+ preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
102
+ preferred_min_ram_gb=6.5,
103
+ budget_pct=0.5,
104
+ )
105
+
106
+ assert result == "mlx-community/Qwen3.5-4B-MLX-4bit"
107
+
108
+ def test_apple_intelligence_returned_when_available(self):
109
+ """Stage 2 (apple_fm branch): Apple Intelligence chosen over mlx on macOS 26+."""
110
+ snap = _make_snap(headroom_gb=5.5)
111
+
112
+ with (
113
+ patch("agents.llm_selector.platform") as mock_platform,
114
+ patch("agents.llm_selector._sysctl", return_value="Apple M1 Air"),
115
+ patch("agents.llm_selector._apple_intelligence_available", return_value=True),
116
+ patch("agents.llm_selector.probe_compute", return_value=snap),
117
+ patch("agents.llm_selector._hf_model_cached", return_value=False),
118
+ ):
119
+ mock_platform.system.return_value = "Darwin"
120
+ from agents.llm_selector import select_mlx_model_id, APPLE_INTELLIGENCE_ID
121
+ result = select_mlx_model_id(
122
+ preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
123
+ preferred_min_ram_gb=6.5,
124
+ budget_pct=0.5,
125
+ )
126
+
127
+ assert result == APPLE_INTELLIGENCE_ID
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # Stage 3 — the fixed low-RAM fallback
132
+ # ---------------------------------------------------------------------------
133
+
134
+ class TestStage3LowRamFallback:
135
+ def test_returns_fitting_catalog_model_not_preferred_on_low_ram(self):
136
+ """Stage 3 fix: M1 Air 8 GB — nothing cached, preferred (6.5 GB) doesn't
137
+ fit the 2.7 GB budget — must return a smaller catalog model, NOT preferred.
138
+ """
139
+ snap = _make_snap(headroom_gb=5.4) # Metal headroom ≈ 5.4 GB on M1 Air 8 GB
140
+ # budget = 5.4 × 0.5 = 2.7 GB → llama3.2-3b (1.8 GB) fits, qwen3.5-4b (2.5 GB) fits
141
+
142
+ with (
143
+ patch("agents.llm_selector.platform") as mock_platform,
144
+ patch("agents.llm_selector._sysctl", return_value="Apple M1"),
145
+ patch("agents.llm_selector._apple_intelligence_available", return_value=False),
146
+ patch("agents.llm_selector.probe_compute", return_value=snap),
147
+ patch("agents.llm_selector._hf_model_cached", return_value=False),
148
+ ):
149
+ mock_platform.system.return_value = "Darwin"
150
+ from agents.llm_selector import select_mlx_model_id
151
+ result = select_mlx_model_id(
152
+ preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
153
+ preferred_min_ram_gb=6.5,
154
+ budget_pct=0.5,
155
+ )
156
+
157
+ # Must NOT return the oversized preferred model
158
+ assert result != "mlx-community/Qwen3.5-9B-OptiQ-4bit", (
159
+ "Stage 3 returned the oversized preferred model on a low-RAM machine"
160
+ )
161
+ # Must return a model that fits the budget (largest fitting = qwen3.5-4b at 2.5 GB)
162
+ assert result == "mlx-community/Qwen3.5-4B-MLX-4bit"
163
+
164
+ def test_preferred_returned_only_when_nothing_in_catalog_fits(self):
165
+ """Stage 3 true last resort: budget so tiny no catalog model fits."""
166
+ snap = _make_snap(headroom_gb=0.5) # budget = 0.25 GB — nothing fits
167
+
168
+ with (
169
+ patch("agents.llm_selector.platform") as mock_platform,
170
+ patch("agents.llm_selector._sysctl", return_value="Apple M1"),
171
+ patch("agents.llm_selector._apple_intelligence_available", return_value=False),
172
+ patch("agents.llm_selector.probe_compute", return_value=snap),
173
+ patch("agents.llm_selector._hf_model_cached", return_value=False),
174
+ ):
175
+ mock_platform.system.return_value = "Darwin"
176
+ from agents.llm_selector import select_mlx_model_id
177
+ result = select_mlx_model_id(
178
+ preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
179
+ preferred_min_ram_gb=6.5,
180
+ budget_pct=0.5,
181
+ )
182
+
183
+ # Nothing fits → last-resort returns preferred (preserves old behaviour)
184
+ assert result == "mlx-community/Qwen3.5-9B-OptiQ-4bit"
185
+
186
+ def test_returns_none_when_no_preferred_and_nothing_fits(self):
187
+ """Stage 3: no preferred given + nothing fits → None."""
188
+ snap = _make_snap(headroom_gb=0.5)
189
+
190
+ with (
191
+ patch("agents.llm_selector.platform") as mock_platform,
192
+ patch("agents.llm_selector._sysctl", return_value="Apple M1"),
193
+ patch("agents.llm_selector._apple_intelligence_available", return_value=False),
194
+ patch("agents.llm_selector.probe_compute", return_value=snap),
195
+ patch("agents.llm_selector._hf_model_cached", return_value=False),
196
+ ):
197
+ mock_platform.system.return_value = "Darwin"
198
+ from agents.llm_selector import select_mlx_model_id
199
+ result = select_mlx_model_id(budget_pct=0.5)
200
+
201
+ assert result is None
202
+
203
+ def test_picks_largest_fitting_not_smallest(self):
204
+ """Stage 3: when multiple catalog models fit, the largest (highest quality) wins."""
205
+ snap = _make_snap(headroom_gb=7.0) # budget = 3.5 GB
206
+ # qwen3.5-4b (2.5 GB) and llama3.2-3b (1.8 GB) both fit; qwen3.5-4b should win
207
+
208
+ with (
209
+ patch("agents.llm_selector.platform") as mock_platform,
210
+ patch("agents.llm_selector._sysctl", return_value="Apple M1 Pro"),
211
+ patch("agents.llm_selector._apple_intelligence_available", return_value=False),
212
+ patch("agents.llm_selector.probe_compute", return_value=snap),
213
+ patch("agents.llm_selector._hf_model_cached", return_value=False),
214
+ ):
215
+ mock_platform.system.return_value = "Darwin"
216
+ from agents.llm_selector import select_mlx_model_id
217
+ result = select_mlx_model_id(
218
+ preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
219
+ preferred_min_ram_gb=6.5,
220
+ budget_pct=0.5,
221
+ )
222
+
223
+ assert result == "mlx-community/Qwen3.5-4B-MLX-4bit"
package/ui.tar.gz CHANGED
Binary file