@meridiona/meridian-darwin-arm64 1.31.2 → 1.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +11 -9
- package/VERSION +1 -1
- package/bin/meridian +0 -0
- package/package.json +1 -1
- package/scripts/install-from-bundle.sh +13 -3
- package/scripts/lib-github-setup.sh +125 -0
- package/services/agents/README.md +23 -0
- package/services/agents/llm_selector.py +21 -5
- package/services/pyproject.toml +1 -1
- package/services/tests/test_llm_selector.py +223 -0
- package/ui.tar.gz +0 -0
package/.env.example
CHANGED
|
@@ -68,18 +68,20 @@
|
|
|
68
68
|
# PM_WORKLOG_SYNTH_TIMEOUT_S=300
|
|
69
69
|
|
|
70
70
|
# ---------------------------------------------------------------------------
|
|
71
|
-
# GitHub (GITHUB_TOKEN
|
|
72
|
-
# Syncs the OPEN issues assigned to you
|
|
73
|
-
# structured issue comments (GitHub has no
|
|
74
|
-
#
|
|
75
|
-
#
|
|
76
|
-
#
|
|
77
|
-
#
|
|
71
|
+
# GitHub (GITHUB_TOKEN required; GITHUB_PROJECT_IDS selects which Projects sync)
|
|
72
|
+
# Syncs the OPEN issues assigned to you from the listed GitHub Projects v2 (read
|
|
73
|
+
# via the GraphQL API); logs worklogs as structured issue comments (GitHub has no
|
|
74
|
+
# native time tracking).
|
|
75
|
+
# Token: easiest is the gh CLI — `meridian setup` runs `gh auth token` and adds
|
|
76
|
+
# the read:project scope for you (no PAT). Otherwise create a classic PAT with
|
|
77
|
+
# the `repo`, `read:org`, `read:project` scopes (read:project reads Projects;
|
|
78
|
+
# repo posts worklog comments).
|
|
79
|
+
# GITHUB_PROJECT_IDS: comma-separated Projects v2 node IDs (PVT_xxx). Find them:
|
|
80
|
+
# gh api graphql -f query='{ viewer { projectsV2(first:10){nodes{id title}} } }'
|
|
78
81
|
# ---------------------------------------------------------------------------
|
|
79
82
|
|
|
80
83
|
# GITHUB_TOKEN=ghp_your_personal_access_token
|
|
81
|
-
#
|
|
82
|
-
# GITHUB_REPOS=your-org/api,your-org/web # optional — comma-separated owner/repo; empty = all repos under the owner
|
|
84
|
+
# GITHUB_PROJECT_IDS=PVT_xxx,PVT_yyy
|
|
83
85
|
|
|
84
86
|
# ---------------------------------------------------------------------------
|
|
85
87
|
# Linear (LINEAR_API_KEY required to enable the Linear connector)
|
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.
|
|
1
|
+
1.32.0
|
package/bin/meridian
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@meridiona/meridian-darwin-arm64",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.32.0",
|
|
4
4
|
"description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
|
|
5
5
|
"homepage": "https://github.com/Meridiona/meridian",
|
|
6
6
|
"repository": {
|
|
@@ -94,9 +94,16 @@ collect_credentials() {
|
|
|
94
94
|
fi
|
|
95
95
|
echo >&2
|
|
96
96
|
if prompt_category "GitHub"; then
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
97
|
+
if ! _try_gh_token "$env_file"; then
|
|
98
|
+
echo >&2
|
|
99
|
+
echo " Alternatively, create a personal access token (classic) at:" >&2
|
|
100
|
+
echo " https://github.com/settings/tokens/new" >&2
|
|
101
|
+
echo " Required scopes: repo, read:org, read:project" >&2
|
|
102
|
+
echo " (read:project lets meridian read your GitHub Projects; repo posts worklog comments)" >&2
|
|
103
|
+
echo >&2
|
|
104
|
+
prompt_env_var "GITHUB_TOKEN" "GitHub personal access token" 1 "$env_file"
|
|
105
|
+
fi
|
|
106
|
+
_pick_github_projects "$env_file"
|
|
100
107
|
fi
|
|
101
108
|
echo >&2
|
|
102
109
|
if prompt_category "Linear"; then
|
|
@@ -106,6 +113,9 @@ collect_credentials() {
|
|
|
106
113
|
ok "Credential collection complete"
|
|
107
114
|
}
|
|
108
115
|
|
|
116
|
+
# GitHub setup helpers — shared with install.sh.
|
|
117
|
+
source "${APP_ROOT}/scripts/lib-github-setup.sh"
|
|
118
|
+
|
|
109
119
|
GUI_TARGET="gui/$(id -u)"
|
|
110
120
|
LAUNCH_AGENTS="${HOME}/Library/LaunchAgents"
|
|
111
121
|
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# meridian — normalises screenpipe activity into structured app sessions
|
|
3
|
+
#
|
|
4
|
+
# Shared GitHub setup helpers, sourced by both install.sh (source installs) and
|
|
5
|
+
# scripts/install-from-bundle.sh (bundle installs). The sourcing script must
|
|
6
|
+
# already define: info, ok, warn, get_env_value, set_env_value (resolved at
|
|
7
|
+
# call time, so definition order across files does not matter).
|
|
8
|
+
|
|
9
|
+
# Obtain a GitHub token from the gh CLI — no PAT needed. meridian needs two
|
|
10
|
+
# scopes: `repo` (post worklog / task-update issue comments) and `read:project`
|
|
11
|
+
# (read Projects v2 via GraphQL). gh's default web-login grants repo + read:org
|
|
12
|
+
# but not read:project, so add whatever is missing through the same browser flow,
|
|
13
|
+
# then write the OAuth token to GITHUB_TOKEN. Returns non-zero if gh is missing,
|
|
14
|
+
# unauthenticated, or the scope refresh fails, so the caller can fall back to a
|
|
15
|
+
# manual PAT prompt. An existing GITHUB_TOKEN is kept untouched.
|
|
16
|
+
_try_gh_token() {
|
|
17
|
+
local env_file="$1"
|
|
18
|
+
[[ -n "$(get_env_value GITHUB_TOKEN "$env_file")" ]] && {
|
|
19
|
+
ok "GITHUB_TOKEN already set — keeping"; return 0
|
|
20
|
+
}
|
|
21
|
+
command -v gh >/dev/null 2>&1 || return 1
|
|
22
|
+
gh auth status >/dev/null 2>&1 || return 1
|
|
23
|
+
|
|
24
|
+
# Add any missing scope through gh's browser flow. `project` (write) satisfies
|
|
25
|
+
# the read:project requirement too, so accept either.
|
|
26
|
+
local status; status="$(gh auth status 2>&1)"
|
|
27
|
+
local want=()
|
|
28
|
+
grep -q "'repo'" <<< "$status" || want+=("repo")
|
|
29
|
+
grep -qE "'read:project'|'project'" <<< "$status" || want+=("read:project")
|
|
30
|
+
if (( ${#want[@]} > 0 )); then
|
|
31
|
+
local joined; printf -v joined '%s,' "${want[@]}"; joined="${joined%,}"
|
|
32
|
+
info " Granting the ${joined} scope(s) to your gh login (opens a browser)…"
|
|
33
|
+
gh auth refresh -h github.com -s "$joined" >&2 || {
|
|
34
|
+
warn " Could not extend gh scopes — use a personal access token instead"
|
|
35
|
+
return 1
|
|
36
|
+
}
|
|
37
|
+
fi
|
|
38
|
+
|
|
39
|
+
local token
|
|
40
|
+
token="$(gh auth token 2>/dev/null)" || return 1
|
|
41
|
+
[[ -z "$token" ]] && return 1
|
|
42
|
+
set_env_value GITHUB_TOKEN "$token" "$env_file"
|
|
43
|
+
ok "GITHUB_TOKEN set from gh CLI (no PAT needed)"
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Interactively pick GitHub Projects and write their node IDs to GITHUB_PROJECT_IDS.
|
|
47
|
+
# Lists both personal and org projects via GraphQL. No-op if already set or if
|
|
48
|
+
# the gh CLI is unavailable or unauthenticated.
|
|
49
|
+
_pick_github_projects() {
|
|
50
|
+
local env_file="$1"
|
|
51
|
+
[[ -n "$(get_env_value GITHUB_PROJECT_IDS "$env_file")" ]] && {
|
|
52
|
+
ok "GITHUB_PROJECT_IDS already set — keeping"; return 0
|
|
53
|
+
}
|
|
54
|
+
command -v gh >/dev/null 2>&1 || return 0
|
|
55
|
+
gh auth status >/dev/null 2>&1 || return 0
|
|
56
|
+
|
|
57
|
+
local raw
|
|
58
|
+
raw="$(gh api graphql -f query='
|
|
59
|
+
{ viewer {
|
|
60
|
+
projectsV2(first: 20) { nodes { id title } }
|
|
61
|
+
organizations(first: 20) {
|
|
62
|
+
nodes { login projectsV2(first: 20) { nodes { id title } } }
|
|
63
|
+
}
|
|
64
|
+
} }' 2>/dev/null)" || {
|
|
65
|
+
warn "Could not list GitHub Projects — add GITHUB_PROJECT_IDS to the config manually if needed"
|
|
66
|
+
return 0
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# One python3 pass emits "id<TAB>label" per project (personal + org). python3
|
|
70
|
+
# is always present on macOS.
|
|
71
|
+
local pairs_raw
|
|
72
|
+
pairs_raw="$(printf '%s' "$raw" | python3 -c "
|
|
73
|
+
import json, sys
|
|
74
|
+
d = json.load(sys.stdin).get('data', {}).get('viewer', {})
|
|
75
|
+
for n in d.get('projectsV2', {}).get('nodes', []):
|
|
76
|
+
print('%s\t%s' % (n['id'], n['title']))
|
|
77
|
+
for org in d.get('organizations', {}).get('nodes', []):
|
|
78
|
+
for n in org.get('projectsV2', {}).get('nodes', []):
|
|
79
|
+
print('%s\t%s / %s' % (n['id'], org['login'], n['title']))
|
|
80
|
+
" 2>/dev/null)" || true
|
|
81
|
+
|
|
82
|
+
# Split each "id<TAB>label" line into parallel arrays (bash 3.2 — no mapfile).
|
|
83
|
+
local _ids=() _labels=()
|
|
84
|
+
local _id _label
|
|
85
|
+
while IFS=$'\t' read -r _id _label; do
|
|
86
|
+
[[ -z "$_id" ]] && continue
|
|
87
|
+
_ids+=("$_id"); _labels+=("$_label")
|
|
88
|
+
done <<< "$pairs_raw"
|
|
89
|
+
local count=${#_ids[@]}
|
|
90
|
+
(( count == 0 )) && { warn "No GitHub Projects found for your account"; return 0; }
|
|
91
|
+
|
|
92
|
+
echo >&2
|
|
93
|
+
echo " Your GitHub Projects:" >&2
|
|
94
|
+
local i=0
|
|
95
|
+
while (( i < count )); do
|
|
96
|
+
printf " %d. %s\n" "$((i+1))" "${_labels[$i]}" >&2
|
|
97
|
+
i=$((i+1))
|
|
98
|
+
done
|
|
99
|
+
echo >&2
|
|
100
|
+
|
|
101
|
+
local selection
|
|
102
|
+
read -r -p " Enter project numbers (comma-sep, e.g. 1,2) or Enter to skip: " selection
|
|
103
|
+
[[ -z "$selection" ]] && { info " (skipped GITHUB_PROJECT_IDS)"; return 0; }
|
|
104
|
+
|
|
105
|
+
local selected_ids=()
|
|
106
|
+
local IFS_save="$IFS"
|
|
107
|
+
IFS=',' read -ra nums <<< "$selection"
|
|
108
|
+
IFS="$IFS_save"
|
|
109
|
+
local n
|
|
110
|
+
for n in "${nums[@]}"; do
|
|
111
|
+
n="${n//[[:space:]]/}"
|
|
112
|
+
if [[ "$n" =~ ^[0-9]+$ ]] && (( n >= 1 && n <= count )); then
|
|
113
|
+
selected_ids+=("${_ids[$((n-1))]}")
|
|
114
|
+
fi
|
|
115
|
+
done
|
|
116
|
+
|
|
117
|
+
if [[ ${#selected_ids[@]} -eq 0 ]]; then
|
|
118
|
+
info " (no valid selection — skipped GITHUB_PROJECT_IDS)"; return 0
|
|
119
|
+
fi
|
|
120
|
+
|
|
121
|
+
local joined
|
|
122
|
+
printf -v joined '%s,' "${selected_ids[@]}"
|
|
123
|
+
set_env_value GITHUB_PROJECT_IDS "${joined%,}" "$env_file"
|
|
124
|
+
ok "GITHUB_PROJECT_IDS set (${#selected_ids[@]} project(s))"
|
|
125
|
+
}
|
|
@@ -252,6 +252,29 @@ Example: 28 GB headroom, `LLM_BUDGET_PCT=0.5` → budget = 14 GB → **phi-4** (
|
|
|
252
252
|
|
|
253
253
|
When the screen is locked the selector uses `min(0.8, budget_pct × 1.5)` as the effective budget, allowing a larger model to load while the machine is idle.
|
|
254
254
|
|
|
255
|
+
### In-process MLX model selection (`select_mlx_model_id`)
|
|
256
|
+
|
|
257
|
+
`select_mlx_model_id` is called by the MLX server (`server.py`) at startup to pick which model to load directly into the process (via mlx_lm + outlines). It uses the same catalog but applies a three-stage priority:
|
|
258
|
+
|
|
259
|
+
1. **Preferred fits** — return the caller-supplied `preferred_hf_id` if `preferred_min_ram_gb ≤ budget`. This keeps the eval-tuned classifier model on capable machines.
|
|
260
|
+
2. **Largest cached model fits** — if the preferred is too large, return the largest catalog model whose files are already in the HF cache and whose `min_ram_gb ≤ budget`. Avoids surprising multi-GB downloads on constrained machines.
|
|
261
|
+
3. **Largest catalog model that fits (may download)** — if nothing cached fits, return the largest catalog entry where `min_ram_gb ≤ budget`, regardless of cache. This triggers a one-time download of the best available model rather than loading an oversized one that exceeds available memory. Falls back to `preferred_hf_id` only when **no catalog model fits** at all (budget so low even the 1.8 GB model won't load).
|
|
262
|
+
|
|
263
|
+
**Why stage 3 matters on low-RAM machines:** an M1 Air (8 GB) has Metal headroom ≈ 5.4 GB. At `LLM_BUDGET_PCT=0.5` the budget is ~2.7 GB. The default preferred model is 6.5 GB (`Qwen3.5-9B-OptiQ-4bit`). Without the fix, stage 3 returned the preferred unconditionally — the server then attempted to load a 6.5 GB model into a 2.7 GB budget, causing memory pressure or an outright load failure. With the fix, stage 3 selects `Qwen3.5-4B-MLX-4bit` (2.5 GB) or `Llama-3.2-3B-Instruct-4bit` (1.8 GB) — whichever is largest and fits — and downloads it on first use.
|
|
264
|
+
|
|
265
|
+
**Check what would be selected:**
|
|
266
|
+
|
|
267
|
+
```bash
|
|
268
|
+
cd services
|
|
269
|
+
.venv/bin/python -c "
|
|
270
|
+
from agents.llm_selector import select_mlx_model_id, probe_compute
|
|
271
|
+
snap = probe_compute()
|
|
272
|
+
print(f'Headroom: {snap.metal_headroom_gb:.1f} GB thermal: {snap.thermal_level}')
|
|
273
|
+
model = select_mlx_model_id('mlx-community/Qwen3.5-9B-OptiQ-4bit', 6.5, 0.5)
|
|
274
|
+
print(f'Would load: {model}')
|
|
275
|
+
"
|
|
276
|
+
```
|
|
277
|
+
|
|
255
278
|
### Persistent MLX server
|
|
256
279
|
|
|
257
280
|
`_ensure_mlx_server()` manages a subprocess tracked in `~/.meridian/mlx_lm_server.pid` (JSON: pid, model, port). The model loads once and persists between `run_task_linker.py` invocations (which are fresh subprocesses each tick). If the budget changes and a different model is selected, the old server is killed and the new model loads automatically.
|
|
@@ -991,13 +991,29 @@ def select_mlx_model_id(
|
|
|
991
991
|
)
|
|
992
992
|
return hf_id
|
|
993
993
|
|
|
994
|
-
# 3. Nothing cached fits and Apple Intelligence is unavailable (macOS < 26)
|
|
995
|
-
#
|
|
996
|
-
#
|
|
997
|
-
|
|
994
|
+
# 3. Nothing cached fits and Apple Intelligence is unavailable (macOS < 26).
|
|
995
|
+
# Pick the largest catalog model that fits the budget, ignoring the cache
|
|
996
|
+
# (it will trigger a one-time download). This prevents returning an
|
|
997
|
+
# oversized preferred model (e.g. 6.5 GB Qwen3.5-9B on an 8 GB machine
|
|
998
|
+
# whose Metal budget is ~2.7 GB) that would exceed available memory.
|
|
999
|
+
# Only fall back to preferred_hf_id when nothing in the catalog fits.
|
|
1000
|
+
entry = _select_mlx_entry(snap.metal_headroom_gb, effective_pct,
|
|
1001
|
+
snap.thermal_level, apple_intelligence)
|
|
1002
|
+
if entry is not None:
|
|
1003
|
+
_, _, min_ram, _, hf_id = entry
|
|
1004
|
+
span.set_attribute("llm.reason", "catalog_fit_uncached")
|
|
1005
|
+
span.set_attribute("llm.selected_model", hf_id or "")
|
|
1006
|
+
log.warning(
|
|
1007
|
+
"llm_selector: no cached MLX model fits budget=%.1f GB — "
|
|
1008
|
+
"selecting %s (min_ram=%.1f GB fits; will download)",
|
|
1009
|
+
budget, hf_id, min_ram,
|
|
1010
|
+
)
|
|
1011
|
+
return hf_id
|
|
1012
|
+
span.set_attribute("llm.reason", "nothing_fits_use_preferred")
|
|
998
1013
|
span.set_attribute("llm.selected_model", preferred_hf_id or "")
|
|
999
1014
|
log.warning(
|
|
1000
|
-
"llm_selector: no
|
|
1015
|
+
"llm_selector: no catalog model fits budget=%.1f GB — "
|
|
1016
|
+
"last-resort fallback to preferred %s",
|
|
1001
1017
|
budget, preferred_hf_id,
|
|
1002
1018
|
)
|
|
1003
1019
|
return preferred_hf_id
|
package/services/pyproject.toml
CHANGED
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "meridian-agents"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.32.0"
|
|
8
8
|
description = "Meridian agents — hermes task linking and Jira progress updates for meridian.db"
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
authors = [{ name = "Meridiona" }]
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Tests for select_mlx_model_id in llm_selector.py.
|
|
2
|
+
|
|
3
|
+
Covers the three-stage selection logic with focus on the low-RAM stage 3 fix:
|
|
4
|
+
previously the function returned the oversized preferred model unconditionally
|
|
5
|
+
when nothing cached fit; it now picks the largest catalog model that fits.
|
|
6
|
+
"""
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from unittest.mock import MagicMock, patch
|
|
10
|
+
|
|
11
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# Helpers
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
def _make_snap(headroom_gb: float, thermal_level: int = 0,
|
|
19
|
+
screen_locked: bool = False) -> MagicMock:
|
|
20
|
+
snap = MagicMock()
|
|
21
|
+
snap.metal_headroom_gb = headroom_gb
|
|
22
|
+
snap.thermal_level = thermal_level
|
|
23
|
+
snap.screen_locked = screen_locked
|
|
24
|
+
return snap
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Stage 1 — preferred fits
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
class TestStage1PreferredFits:
|
|
32
|
+
def test_returns_preferred_when_budget_allows(self):
|
|
33
|
+
"""Stage 1: preferred model returned unchanged when budget covers it.
|
|
34
|
+
|
|
35
|
+
14.0 GB headroom × 0.5 = 7.0 GB budget ≥ 6.5 GB min_ram → stage 1 fires.
|
|
36
|
+
"""
|
|
37
|
+
snap = _make_snap(headroom_gb=14.0)
|
|
38
|
+
|
|
39
|
+
with (
|
|
40
|
+
patch("agents.llm_selector.platform") as mock_platform,
|
|
41
|
+
patch("agents.llm_selector._sysctl", return_value="Apple M2 Pro"),
|
|
42
|
+
patch("agents.llm_selector._apple_intelligence_available", return_value=False),
|
|
43
|
+
patch("agents.llm_selector.probe_compute", return_value=snap),
|
|
44
|
+
):
|
|
45
|
+
mock_platform.system.return_value = "Darwin"
|
|
46
|
+
from agents.llm_selector import select_mlx_model_id
|
|
47
|
+
result = select_mlx_model_id(
|
|
48
|
+
preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
|
|
49
|
+
preferred_min_ram_gb=6.5,
|
|
50
|
+
budget_pct=0.5,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
assert result == "mlx-community/Qwen3.5-9B-OptiQ-4bit"
|
|
54
|
+
|
|
55
|
+
def test_returns_preferred_on_large_machine(self):
|
|
56
|
+
"""Stage 1: 64 GB machine, preferred (6.5 GB) trivially fits."""
|
|
57
|
+
snap = _make_snap(headroom_gb=50.0)
|
|
58
|
+
|
|
59
|
+
with (
|
|
60
|
+
patch("agents.llm_selector.platform") as mock_platform,
|
|
61
|
+
patch("agents.llm_selector._sysctl", return_value="Apple M2 Ultra"),
|
|
62
|
+
patch("agents.llm_selector._apple_intelligence_available", return_value=False),
|
|
63
|
+
patch("agents.llm_selector.probe_compute", return_value=snap),
|
|
64
|
+
):
|
|
65
|
+
mock_platform.system.return_value = "Darwin"
|
|
66
|
+
from agents.llm_selector import select_mlx_model_id
|
|
67
|
+
result = select_mlx_model_id(
|
|
68
|
+
preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
|
|
69
|
+
preferred_min_ram_gb=6.5,
|
|
70
|
+
budget_pct=0.5,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# 50.0 × 0.5 = 25 GB budget; 6.5 ≤ 25 → stage 1 fires
|
|
74
|
+
assert result == "mlx-community/Qwen3.5-9B-OptiQ-4bit"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
# Stage 2 — catalog cached fit
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
class TestStage2CachedFit:
|
|
82
|
+
def test_returns_largest_cached_model_when_preferred_too_big(self):
|
|
83
|
+
"""Stage 2: preferred doesn't fit but a smaller cached model does."""
|
|
84
|
+
snap = _make_snap(headroom_gb=5.5) # budget = 5.5 × 0.5 = 2.75 GB
|
|
85
|
+
|
|
86
|
+
def cached_side_effect(hf_id):
|
|
87
|
+
# Only qwen3.5-4b (2.5 GB) is cached
|
|
88
|
+
return hf_id == "mlx-community/Qwen3.5-4B-MLX-4bit"
|
|
89
|
+
|
|
90
|
+
with (
|
|
91
|
+
patch("agents.llm_selector.platform") as mock_platform,
|
|
92
|
+
patch("agents.llm_selector._sysctl", return_value="Apple M1 Air"),
|
|
93
|
+
patch("agents.llm_selector._apple_intelligence_available", return_value=False),
|
|
94
|
+
patch("agents.llm_selector.probe_compute", return_value=snap),
|
|
95
|
+
patch("agents.llm_selector._hf_model_cached",
|
|
96
|
+
side_effect=cached_side_effect),
|
|
97
|
+
):
|
|
98
|
+
mock_platform.system.return_value = "Darwin"
|
|
99
|
+
from agents.llm_selector import select_mlx_model_id
|
|
100
|
+
result = select_mlx_model_id(
|
|
101
|
+
preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
|
|
102
|
+
preferred_min_ram_gb=6.5,
|
|
103
|
+
budget_pct=0.5,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
assert result == "mlx-community/Qwen3.5-4B-MLX-4bit"
|
|
107
|
+
|
|
108
|
+
def test_apple_intelligence_returned_when_available(self):
|
|
109
|
+
"""Stage 2 (apple_fm branch): Apple Intelligence chosen over mlx on macOS 26+."""
|
|
110
|
+
snap = _make_snap(headroom_gb=5.5)
|
|
111
|
+
|
|
112
|
+
with (
|
|
113
|
+
patch("agents.llm_selector.platform") as mock_platform,
|
|
114
|
+
patch("agents.llm_selector._sysctl", return_value="Apple M1 Air"),
|
|
115
|
+
patch("agents.llm_selector._apple_intelligence_available", return_value=True),
|
|
116
|
+
patch("agents.llm_selector.probe_compute", return_value=snap),
|
|
117
|
+
patch("agents.llm_selector._hf_model_cached", return_value=False),
|
|
118
|
+
):
|
|
119
|
+
mock_platform.system.return_value = "Darwin"
|
|
120
|
+
from agents.llm_selector import select_mlx_model_id, APPLE_INTELLIGENCE_ID
|
|
121
|
+
result = select_mlx_model_id(
|
|
122
|
+
preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
|
|
123
|
+
preferred_min_ram_gb=6.5,
|
|
124
|
+
budget_pct=0.5,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
assert result == APPLE_INTELLIGENCE_ID
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
# Stage 3 — the fixed low-RAM fallback
|
|
132
|
+
# ---------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
class TestStage3LowRamFallback:
|
|
135
|
+
def test_returns_fitting_catalog_model_not_preferred_on_low_ram(self):
|
|
136
|
+
"""Stage 3 fix: M1 Air 8 GB — nothing cached, preferred (6.5 GB) doesn't
|
|
137
|
+
fit the 2.7 GB budget — must return a smaller catalog model, NOT preferred.
|
|
138
|
+
"""
|
|
139
|
+
snap = _make_snap(headroom_gb=5.4) # Metal headroom ≈ 5.4 GB on M1 Air 8 GB
|
|
140
|
+
# budget = 5.4 × 0.5 = 2.7 GB → llama3.2-3b (1.8 GB) fits, qwen3.5-4b (2.5 GB) fits
|
|
141
|
+
|
|
142
|
+
with (
|
|
143
|
+
patch("agents.llm_selector.platform") as mock_platform,
|
|
144
|
+
patch("agents.llm_selector._sysctl", return_value="Apple M1"),
|
|
145
|
+
patch("agents.llm_selector._apple_intelligence_available", return_value=False),
|
|
146
|
+
patch("agents.llm_selector.probe_compute", return_value=snap),
|
|
147
|
+
patch("agents.llm_selector._hf_model_cached", return_value=False),
|
|
148
|
+
):
|
|
149
|
+
mock_platform.system.return_value = "Darwin"
|
|
150
|
+
from agents.llm_selector import select_mlx_model_id
|
|
151
|
+
result = select_mlx_model_id(
|
|
152
|
+
preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
|
|
153
|
+
preferred_min_ram_gb=6.5,
|
|
154
|
+
budget_pct=0.5,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Must NOT return the oversized preferred model
|
|
158
|
+
assert result != "mlx-community/Qwen3.5-9B-OptiQ-4bit", (
|
|
159
|
+
"Stage 3 returned the oversized preferred model on a low-RAM machine"
|
|
160
|
+
)
|
|
161
|
+
# Must return a model that fits the budget (largest fitting = qwen3.5-4b at 2.5 GB)
|
|
162
|
+
assert result == "mlx-community/Qwen3.5-4B-MLX-4bit"
|
|
163
|
+
|
|
164
|
+
def test_preferred_returned_only_when_nothing_in_catalog_fits(self):
|
|
165
|
+
"""Stage 3 true last resort: budget so tiny no catalog model fits."""
|
|
166
|
+
snap = _make_snap(headroom_gb=0.5) # budget = 0.25 GB — nothing fits
|
|
167
|
+
|
|
168
|
+
with (
|
|
169
|
+
patch("agents.llm_selector.platform") as mock_platform,
|
|
170
|
+
patch("agents.llm_selector._sysctl", return_value="Apple M1"),
|
|
171
|
+
patch("agents.llm_selector._apple_intelligence_available", return_value=False),
|
|
172
|
+
patch("agents.llm_selector.probe_compute", return_value=snap),
|
|
173
|
+
patch("agents.llm_selector._hf_model_cached", return_value=False),
|
|
174
|
+
):
|
|
175
|
+
mock_platform.system.return_value = "Darwin"
|
|
176
|
+
from agents.llm_selector import select_mlx_model_id
|
|
177
|
+
result = select_mlx_model_id(
|
|
178
|
+
preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
|
|
179
|
+
preferred_min_ram_gb=6.5,
|
|
180
|
+
budget_pct=0.5,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Nothing fits → last-resort returns preferred (preserves old behaviour)
|
|
184
|
+
assert result == "mlx-community/Qwen3.5-9B-OptiQ-4bit"
|
|
185
|
+
|
|
186
|
+
def test_returns_none_when_no_preferred_and_nothing_fits(self):
|
|
187
|
+
"""Stage 3: no preferred given + nothing fits → None."""
|
|
188
|
+
snap = _make_snap(headroom_gb=0.5)
|
|
189
|
+
|
|
190
|
+
with (
|
|
191
|
+
patch("agents.llm_selector.platform") as mock_platform,
|
|
192
|
+
patch("agents.llm_selector._sysctl", return_value="Apple M1"),
|
|
193
|
+
patch("agents.llm_selector._apple_intelligence_available", return_value=False),
|
|
194
|
+
patch("agents.llm_selector.probe_compute", return_value=snap),
|
|
195
|
+
patch("agents.llm_selector._hf_model_cached", return_value=False),
|
|
196
|
+
):
|
|
197
|
+
mock_platform.system.return_value = "Darwin"
|
|
198
|
+
from agents.llm_selector import select_mlx_model_id
|
|
199
|
+
result = select_mlx_model_id(budget_pct=0.5)
|
|
200
|
+
|
|
201
|
+
assert result is None
|
|
202
|
+
|
|
203
|
+
def test_picks_largest_fitting_not_smallest(self):
|
|
204
|
+
"""Stage 3: when multiple catalog models fit, the largest (highest quality) wins."""
|
|
205
|
+
snap = _make_snap(headroom_gb=7.0) # budget = 3.5 GB
|
|
206
|
+
# qwen3.5-4b (2.5 GB) and llama3.2-3b (1.8 GB) both fit; qwen3.5-4b should win
|
|
207
|
+
|
|
208
|
+
with (
|
|
209
|
+
patch("agents.llm_selector.platform") as mock_platform,
|
|
210
|
+
patch("agents.llm_selector._sysctl", return_value="Apple M1 Pro"),
|
|
211
|
+
patch("agents.llm_selector._apple_intelligence_available", return_value=False),
|
|
212
|
+
patch("agents.llm_selector.probe_compute", return_value=snap),
|
|
213
|
+
patch("agents.llm_selector._hf_model_cached", return_value=False),
|
|
214
|
+
):
|
|
215
|
+
mock_platform.system.return_value = "Darwin"
|
|
216
|
+
from agents.llm_selector import select_mlx_model_id
|
|
217
|
+
result = select_mlx_model_id(
|
|
218
|
+
preferred_hf_id="mlx-community/Qwen3.5-9B-OptiQ-4bit",
|
|
219
|
+
preferred_min_ram_gb=6.5,
|
|
220
|
+
budget_pct=0.5,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
assert result == "mlx-community/Qwen3.5-4B-MLX-4bit"
|
package/ui.tar.gz
CHANGED
|
Binary file
|