project-ara 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ara/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2026 Will Sarg
3
+ """Project ARA — AI Runs Anywhere.
4
+
5
+ Backend-agnostic local inference. Core is pure Python; hardware-specific
6
+ engines load lazily behind a backend protocol (see docs in the project_ara vault).
7
+ """
ara/acquire.py ADDED
@@ -0,0 +1,142 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2026 Will Sarg
3
+ """Model acquisition — backend-neutral downloads into the HF cache.
4
+
5
+ ``download(repo_id)`` fetches a model; ``repo_size_gb`` / ``free_disk_gb`` back the
6
+ pre-download disk check. Uses ``huggingface_hub`` directly, which produces the exact
7
+ cache layout that mlx_lm / wmx-suite reads.
8
+
9
+ No token required for ungated models (e.g. mlx-community/*). Set HF_TOKEN or
10
+ HUGGING_FACE_HUB_TOKEN for gated ones, or HF_ENDPOINT for a mirror.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import re
16
+
17
+ # Headroom we insist on beyond the raw download, so a fetch never fills the disk
18
+ # (unpacking, the snapshot's own .incomplete temp files, normal system churn).
19
+ DISK_BUFFER_GB = 2.0
20
+
21
+ # A well-formed Hugging Face repo id: ``name`` or ``org/name``, each segment starting with an
22
+ # alphanumeric. Rejects anything an out-of-process worker's argparse could mis-read as a flag or
23
+ # path — a leading ``-``, an ``=``, whitespace, ``..`` traversal, extra slashes. The model is a
24
+ # *sink arg* (it becomes argv for the engine worker), so ARA validates its shape before it ever
25
+ # leaves the process. Defensive: the value is a local CLI arg, but cheap to get right.
26
+ _MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*(/[A-Za-z0-9][A-Za-z0-9._-]*)?$")
27
+
28
+
29
+ def valid_model_id(model: str) -> bool:
30
+ """True if *model* is a well-formed HF repo id (``org/name`` or ``name``), safe to pass as a
31
+ worker argv positional. Rejects flag-like / traversal / malformed values."""
32
+ return isinstance(model, str) and _MODEL_ID_RE.match(model) is not None
33
+
34
+
35
+ def is_local_gguf(model: str) -> bool:
36
+ """True if *model* points at an existing local ``.gguf`` file that's safe as a worker argv
37
+ positional (never flag-like). The engine workers already resolve a ``.gguf`` path directly, so
38
+ this lets the CLI accept loose GGUF files on disk (e.g. a local model library) without
39
+ weakening the repo-id guard. The leading-``-`` ban preserves the argv-injection guarantee — a
40
+ real file path is a safe positional. (Slug: 2026-06-25-local-gguf-cli-support)"""
41
+ return (isinstance(model, str) and model.endswith(".gguf")
42
+ and not model.startswith("-") and os.path.isfile(model))
43
+
44
+
45
+ def valid_model_ref(model: str) -> bool:
46
+ """True if *model* is a usable model reference safe to pass to an engine worker: a well-formed
47
+ HF repo id, or a local ``.gguf`` file path. The single guard the CLI applies before a model
48
+ becomes worker argv. (Slug: 2026-06-25-local-gguf-cli-support)"""
49
+ return valid_model_id(model) or is_local_gguf(model)
50
+
51
+
52
+ _REASON_GATED = "gated"
53
+ _REASON_NOT_FOUND = "not_found"
54
+ _REASON_AUTH = "auth"
55
+ _REASON_OFFLINE = "offline"
56
+ _REASON_UNKNOWN = "unknown"
57
+
58
+
59
+ def classify_repo_error(exc: BaseException) -> str:
60
+ """Map a Hugging Face (or network) exception to a small honest reason string.
61
+
62
+ Returns one of: ``"gated"``, ``"not_found"``, ``"auth"``, ``"offline"``, ``"unknown"``.
63
+ Pure function — safe to call with any exception type, including non-HF ones.
64
+ Imported lazily so this module stays cheap at import time.
65
+ """
66
+ from huggingface_hub.errors import (
67
+ GatedRepoError, HfHubHTTPError, LocalEntryNotFoundError,
68
+ OfflineModeIsEnabled, RepositoryNotFoundError,
69
+ )
70
+
71
+ if isinstance(exc, GatedRepoError):
72
+ return _REASON_GATED
73
+ if isinstance(exc, RepositoryNotFoundError):
74
+ return _REASON_NOT_FOUND
75
+ if isinstance(exc, (LocalEntryNotFoundError, OfflineModeIsEnabled)):
76
+ return _REASON_OFFLINE
77
+ if isinstance(exc, ConnectionError):
78
+ return _REASON_OFFLINE
79
+ if isinstance(exc, HfHubHTTPError) and getattr(
80
+ getattr(exc, "response", None), "status_code", None) == 401:
81
+ return _REASON_AUTH
82
+ return _REASON_UNKNOWN
83
+
84
+
85
+ def probe_repo(repo_id: str) -> dict:
86
+ """Probe *repo_id* and return ``{"size_gb": float|None, "reason": str|None}``.
87
+
88
+ ``reason`` is None on success; one of the ``classify_repo_error`` strings on failure.
89
+ ``size_gb`` is None when the size can't be read (empty repo or any error).
90
+ Use this when the caller needs to surface *why* a fetch failed (e.g. the CLI).
91
+ ``repo_size_gb`` is still the right call when only the size matters.
92
+ """
93
+ from huggingface_hub import HfApi
94
+
95
+ try:
96
+ info = HfApi().model_info(repo_id, files_metadata=True)
97
+ total = sum(s.size for s in (info.siblings or []) if s.size)
98
+ return {"size_gb": round(total / 1e9, 3) if total else None, "reason": None}
99
+ except Exception as exc:
100
+ return {"size_gb": None, "reason": classify_repo_error(exc)}
101
+
102
+
103
+ def repo_size_gb(repo_id: str) -> float | None:
104
+ """Total download size of *repo_id* in GB (decimal). None if it can't be read
105
+ (offline, private, or an API hiccup) — callers treat None as 'size unknown'."""
106
+ return probe_repo(repo_id)["size_gb"]
107
+
108
+
109
+ def free_disk_gb() -> float | None:
110
+ """Free space (GB, decimal) on the volume holding the home directory."""
111
+ import shutil
112
+ from pathlib import Path
113
+
114
+ try:
115
+ return shutil.disk_usage(Path.home()).free / 1e9
116
+ except Exception:
117
+ return None
118
+
119
+
120
+ def download(repo_id: str, *, progress: bool = False) -> None:
121
+ """Download *repo_id* into the HF cache. Network + disk only, no engine load.
122
+
123
+ ``progress=True`` enables HF's native tqdm bars for the duration of this call;
124
+ ``progress=False`` (default) silences them so the caller owns the output.
125
+ The prior bar state is always restored in ``finally`` regardless of which path
126
+ ran or whether the download succeeded.
127
+ """
128
+ from huggingface_hub import snapshot_download
129
+ from huggingface_hub.utils import are_progress_bars_disabled, disable_progress_bars, enable_progress_bars
130
+
131
+ was_disabled = are_progress_bars_disabled()
132
+ if progress:
133
+ enable_progress_bars()
134
+ else:
135
+ disable_progress_bars()
136
+ try:
137
+ snapshot_download(repo_id)
138
+ finally:
139
+ if was_disabled:
140
+ disable_progress_bars()
141
+ else:
142
+ enable_progress_bars()
ara/apps.py ADDED
@@ -0,0 +1,176 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2026 Will Sarg
3
+ """Inventory of AI/ML applications installed on the machine — GUI apps in /Applications
4
+ plus Homebrew packages — matched against a curated catalog of known AI/ML software.
5
+
6
+ A different lens from ENGINES (what ARA can launch) and FRAMEWORKS (python libraries):
7
+ this is "what AI software is installed here," organized by what it's for. Read-only.
8
+ macOS-focused (scans /Applications + Homebrew); degrades to whatever it can find elsewhere.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+
15
+ from ara import versions
16
+
17
+ # Category keys in display order, with their section sub-headers.
18
+ CATEGORY_LABEL = {
19
+ "runner": "model runners",
20
+ "image": "image generation",
21
+ "speech": "speech / audio",
22
+ "toolkit": "ML toolkits",
23
+ "assistant": "AI assistants",
24
+ "coding": "AI coding",
25
+ }
26
+ _ORDER = list(CATEGORY_LABEL)
27
+
28
+ # (label, category, [.app bundle names], [brew formula/cask tokens]). Curated — matched
29
+ # exactly (case-insensitive), no keyword guessing, so a hit is always a real known app.
30
+ CATALOG: list[tuple[str, str, list[str], list[str]]] = [
31
+ # local model runners / chat frontends
32
+ ("LM Studio", "runner", ["LM Studio"], ["lm-studio"]),
33
+ ("Ollama", "runner", ["Ollama"], ["ollama"]),
34
+ ("GPT4All", "runner", ["GPT4All", "gpt4all"], ["gpt4all"]),
35
+ ("Jan", "runner", ["Jan"], ["jan"]),
36
+ ("Msty", "runner", ["Msty"], ["msty"]),
37
+ ("Enchanted", "runner", ["Enchanted"], []),
38
+ ("Ollamac", "runner", ["Ollamac"], []),
39
+ ("Pinokio", "runner", ["Pinokio"], ["pinokio"]),
40
+ ("Transformer Lab", "runner", ["Transformer Lab"], []),
41
+ # image generation
42
+ ("DiffusionBee", "image", ["DiffusionBee"], ["diffusionbee"]),
43
+ ("Draw Things", "image", ["Draw Things"], []),
44
+ ("ComfyUI", "image", ["ComfyUI"], []),
45
+ ("InvokeAI", "image", ["InvokeAI"], []),
46
+ ("Diffusers", "image", ["Diffusers"], []),
47
+ ("Fooocus", "image", ["Fooocus"], []),
48
+ # speech / audio
49
+ ("MacWhisper", "speech", ["MacWhisper"], ["macwhisper"]),
50
+ ("superwhisper", "speech", ["superwhisper"], ["superwhisper"]),
51
+ ("VoiceInk", "speech", ["VoiceInk"], ["voiceink"]),
52
+ ("Aiko", "speech", ["Aiko"], []),
53
+ ("Whisper Transcription", "speech", ["Whisper Transcription"], []),
54
+ # ML toolkits / CLIs (largely Homebrew)
55
+ ("llama.cpp", "toolkit", [], ["llama.cpp"]),
56
+ ("whisper.cpp", "toolkit", [], ["whisper-cpp"]),
57
+ ("MLX", "toolkit", [], ["mlx", "mlx-c"]),
58
+ ("ggml", "toolkit", [], ["ggml"]),
59
+ ("ONNX Runtime", "toolkit", [], ["onnxruntime"]),
60
+ ("PyTorch", "toolkit", [], ["pytorch"]),
61
+ ("TensorFlow", "toolkit", [], ["tensorflow"]),
62
+ ("Hugging Face CLI", "toolkit", [], ["huggingface-cli"]),
63
+ # AI assistants (cloud clients) and AI coding tools
64
+ ("ChatGPT", "assistant", ["ChatGPT"], ["chatgpt"]),
65
+ ("Claude", "assistant", ["Claude"], ["claude"]),
66
+ ("Perplexity", "assistant", ["Perplexity"], ["perplexity"]),
67
+ ("Cursor", "coding", ["Cursor"], ["cursor"]),
68
+ ("Windsurf", "coding", ["Windsurf"], ["windsurf"]),
69
+ ("Antigravity", "coding", ["Antigravity"], []),
70
+ # Codex ships as two distinct artifacts — keep them separate so their independent
71
+ # versions aren't compared as "drift" (the .app is com.openai.codex; the cask is the CLI).
72
+ ("Codex", "coding", ["Codex"], []),
73
+ ("Codex CLI", "coding", [], ["codex"]),
74
+ ("CodexBar", "coding", ["CodexBar"], ["codexbar"]),
75
+ ("Claude Code", "coding", [], ["claude-code"]),
76
+ ("GitHub Copilot", "coding", ["GitHub Copilot"], ["copilot"]),
77
+ ("Warp", "coding", ["Warp"], ["warp"]),
78
+ ]
79
+
80
+
81
+ @dataclass(frozen=True)
82
+ class App:
83
+ label: str
84
+ category: str
85
+ in_app: bool # a .app bundle is present in an Applications folder
86
+ cask: bool # installed as a Homebrew cask (which IS how its .app got there)
87
+ formula: bool # installed as a Homebrew formula (CLI)
88
+ version: str | None = None # what's actually installed (.app plist for GUIs)
89
+ brew_recorded: str | None = None # Homebrew's receipt version, when it manages this
90
+ cask_token: str | None = None # the matched brew cask token (for drift remediation)
91
+ installed_at: float | None = None # epoch mtime/birthtime, for "recently installed"
92
+
93
+ @property
94
+ def homebrew(self) -> bool:
95
+ return self.cask or self.formula
96
+
97
+ @property
98
+ def drift(self) -> bool:
99
+ """A cask GUI app whose installed (.app) version has self-updated past Homebrew's
100
+ frozen receipt — so `brew` no longer reflects reality (and `brew upgrade` may clobber).
101
+ Requires an actual installed .app, so a CLI-only cask never counts as drift."""
102
+ return bool(self.cask and self.in_app and self.brew_recorded and self.version
103
+ and self.version != self.brew_recorded)
104
+
105
+ @property
106
+ def duplicate(self) -> bool:
107
+ """Two independent installs of the same tool: a CLI formula alongside a GUI
108
+ install (cask or a hand-dropped .app), or both a cask and a formula. A cask plus
109
+ its own .app is NOT a duplicate — the cask is what put the .app there."""
110
+ return self.formula and (self.cask or self.in_app)
111
+
112
+ @property
113
+ def source(self) -> str:
114
+ if self.cask and self.formula:
115
+ return "Homebrew (cask + formula)"
116
+ if self.cask:
117
+ return "Homebrew (cask)"
118
+ if self.formula and self.in_app:
119
+ return "Homebrew (formula) + separate app"
120
+ if self.formula:
121
+ return "Homebrew (formula)"
122
+ return "app (not via Homebrew)"
123
+
124
+
125
+ _APP_DIRS = (Path("/Applications"), Path.home() / "Applications")
126
+ _BREW_PREFIX = Path("/opt/homebrew") if Path("/opt/homebrew").exists() else Path("/usr/local")
127
+
128
+
129
+ def _install_time(bundles: list[str], tokens: list[str], in_app: bool) -> float | None:
130
+ """Best-effort install/update time: a .app's filesystem time, else a Homebrew dir's.
131
+ Uses max(mtime, birthtime) — some bundles report a bogus birthtime."""
132
+ if in_app:
133
+ for b in bundles:
134
+ for base in _APP_DIRS:
135
+ app = base / f"{b}.app"
136
+ if app.is_dir():
137
+ st = app.stat()
138
+ return max(st.st_mtime, getattr(st, "st_birthtime", 0) or 0)
139
+ for t in tokens:
140
+ for sub in ("Caskroom", "Cellar"):
141
+ d = _BREW_PREFIX / sub / t
142
+ if d.is_dir():
143
+ try:
144
+ return max((p.stat().st_mtime for p in d.iterdir()), default=d.stat().st_mtime)
145
+ except Exception:
146
+ return None
147
+ return None
148
+
149
+
150
+ def scan() -> list[App]:
151
+ """Installed AI/ML apps from the curated catalog, ordered by category then name."""
152
+ formulae, casks = versions.brew_formulae(), versions.brew_casks()
153
+ out: list[App] = []
154
+ for label, category, bundles, tokens in CATALOG:
155
+ in_app, app_ver = versions.find_app(bundles)
156
+ cask_ver = next((casks[t] for t in tokens if casks.get(t)), None)
157
+ formula_ver = next((formulae[t] for t in tokens if formulae.get(t)), None)
158
+ cask = any(t in casks for t in tokens)
159
+ formula = any(t in formulae for t in tokens)
160
+ if not (in_app or cask or formula):
161
+ continue
162
+ # The installed truth is the .app's own version; show that. For a cask we also keep
163
+ # brew's receipt so we can flag self-update drift. A formula (CLI) has no .app, so
164
+ # its brew version IS the truth.
165
+ cask_token = next((t for t in tokens if t in casks), None)
166
+ if cask:
167
+ version, brew_recorded = (app_ver or cask_ver), cask_ver
168
+ elif formula:
169
+ version, brew_recorded = formula_ver, None
170
+ else: # hand-installed .app
171
+ version, brew_recorded = app_ver, None
172
+ out.append(App(label, category, in_app=in_app, cask=cask, formula=formula,
173
+ version=version, brew_recorded=brew_recorded, cask_token=cask_token,
174
+ installed_at=_install_time(bundles, tokens, in_app)))
175
+ out.sort(key=lambda a: (_ORDER.index(a.category), a.label.lower()))
176
+ return out
@@ -0,0 +1,3 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2026 Will Sarg
3
+ """Hardware backends. Each module is an adapter loaded lazily by the registry."""
ara/backends/ane.py ADDED
@@ -0,0 +1,10 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2026 Will Sarg
3
+ """Apple Neural Engine (ANE / CoreML) — STUB (no implementation yet).
4
+
5
+ Contract class: **graph-fit** (not a context ramp). The question here isn't "how far
6
+ can KV-cache grow" but "does this fixed, quantized graph map onto the accelerator and
7
+ its memory slice." A different assessment from apple.py, which targets the *GPU*
8
+ (MLX/Metal) on the same chip — a modern Mac carries both backends at once.
9
+ Wall source: unified memory shared with the system; programmed via CoreML.
10
+ """
ara/backends/apple.py ADDED
@@ -0,0 +1,178 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2026 Will Sarg
3
+ """Apple-Silicon backend adapter — drives wmx-suite's MLX measurement out-of-process.
4
+
5
+ A lean device oracle, symmetric with backends/cuda.py: it reads the machine's memory wall and
6
+ runs wmx-suite's crash-safe calibration, but it owns **no persistence** — ARA stores and reuses
7
+ the calibration (see cli.render_profile). It never imports wmx in-process: every engine call
8
+ goes through the isolated ``apple`` env via :mod:`ara.engine_env`, so nothing MLX-shaped loads
9
+ in ARA's interpreter and the core stays engine-free at runtime, not just at lock time.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ # Core, engine-free helpers (no wmx) — safe to import at module load and patchable in tests.
14
+ from ara import calibration, db, engine_env
15
+ from ara.contracts import driver
16
+
17
+ # The wmx worker modules ARA drives in the isolated apple env (never imported in-process).
18
+ DEVICE_MODULE = "wmx_suite.device"
19
+
20
+ # Model ARA calibrates against — smallest SmolLM (MLX 4-bit). Calibration only measures
21
+ # fixed memory overhead, so a tiny instruct model is plenty.
22
+ CALIBRATION_MODEL = "mlx-community/SmolLM-135M-Instruct-4bit"
23
+
24
+
25
+ def safe_limits() -> dict:
26
+ """Read this machine's safe memory limits via the wmx worker. Pure read — no model.
27
+
28
+ Stateless: returns the budget with no stored overhead (``calibrated=False``). ARA overlays
29
+ a previously-measured overhead from its own store — the engine no longer reads a database.
30
+ """
31
+ facts = engine_env.run_worker("apple", ["-m", DEVICE_MODULE, "limits"])
32
+ return {
33
+ **facts,
34
+ "overhead_gb": None, # ARA owns the stored calibration now
35
+ "calibrated": False,
36
+ "calibrated_at": None,
37
+ }
38
+
39
+
40
+ def calibration_model_cached(model: str = CALIBRATION_MODEL) -> bool:
41
+ """Is the calibration model already in the HF cache? (cheap, no load)."""
42
+ from huggingface_hub import try_to_load_from_cache
43
+
44
+ try:
45
+ return isinstance(try_to_load_from_cache(model, "config.json"), str)
46
+ except Exception:
47
+ return False
48
+
49
+
50
+ def download_calibration_model(model: str = CALIBRATION_MODEL, *,
51
+ progress: bool = False) -> None:
52
+ """Fetch the calibration model into the HF cache. Network + disk only."""
53
+ from ara import acquire
54
+
55
+ acquire.download(model, progress=progress)
56
+
57
+
58
+ def calibrate(model: str = CALIBRATION_MODEL) -> dict:
59
+ """Run wmx-suite's crash-safe calibration via the worker; return fresh limits + what it
60
+ measured.
61
+
62
+ The worker loads the model and watches memory under wmx-suite's predictive safety ramp,
63
+ which aborts before approaching the safe budget. ARA only invokes it (out-of-process in the
64
+ apple env). Surfaces the **effective** cold-start overhead (clamped to the engine's floor:
65
+ ``max(default, measured)``) as ``overhead_gb`` so ARA can persist it; the raw measurement is
66
+ in the ``"calibration"`` sub-dict for the caller to show.
67
+
68
+ If the worker fails (error dict or exception), returns an uncalibrated result with a
69
+ ``calibration_error`` field (never ``calibrated=True`` for unobserved data — Rule #3).
70
+ The safe default overhead is still in effect via ``_budget_params``; callers can detect the
71
+ condition via ``calibrated=False`` + presence of ``calibration_error``.
72
+ """
73
+ limits = safe_limits()
74
+ try:
75
+ result = engine_env.run_worker("apple", ["-m", DEVICE_MODULE, "calibrate", model])
76
+ except Exception as exc:
77
+ limits["calibrated"] = False
78
+ limits["overhead_gb"] = None
79
+ limits["calibration_error"] = (
80
+ f"calibration unavailable for {model!r}: {exc}"
81
+ )
82
+ return limits
83
+ if result.get("error"):
84
+ limits["calibrated"] = False
85
+ limits["overhead_gb"] = None
86
+ limits["calibration_error"] = (
87
+ f"calibration unavailable for {model!r}: {result['error']}"
88
+ )
89
+ limits["calibration"] = result
90
+ return limits
91
+ overheads = [v for v in (result.get("measured_overhead_gb"),
92
+ result.get("default_overhead_gb")) if v is not None]
93
+ limits["overhead_gb"] = max(overheads) if overheads else None
94
+ limits["calibrated"] = True
95
+ limits["calibration"] = result
96
+ return limits
97
+
98
+
99
+ # ARA-owned ramp policy (the engine only measures; ARA decides the schedule + safety margin).
100
+ WORKER_MODULE = "wmx_suite.measure_one"
101
+ RAMP_SCHEDULE = [2000, 4000, 8000, 16000, 32000, 65536, 131072]
102
+ DEFAULT_MARGIN_GB = 2.0 # safety cushion below the wall (ARA policy)
103
+ DEFAULT_OVERHEAD_GB = 1.0 # fallback cold-start overhead until calibrated
104
+
105
+
106
+ def _budget_params() -> tuple[float, float]:
107
+ """ARA-owned (margin, overhead). Margin is policy; overhead is this machine's stored
108
+ calibration for the wmx engine, or a safe default if uncalibrated."""
109
+ overhead = DEFAULT_OVERHEAD_GB
110
+ stored = calibration.get_calibration(db.connect(), "wmx")
111
+ if stored and stored.get("fixed_overhead_gb") is not None:
112
+ overhead = stored["fixed_overhead_gb"]
113
+ return DEFAULT_MARGIN_GB, overhead
114
+
115
+
116
+ # KV-cache quant lever (parity with the Vulkan lane). ARA's cross-engine `--kv-quant`
117
+ # {f16,q8_0,q4_0} maps to MLX's integer kv-bits (fp16 = no quant). The effective bytes/elem
118
+ # (8-bit/4-bit payload + an fp16 scale+bias per 64-elem group) feeds the KV-aware decode
119
+ # estimate so it reflects the cache actually in use — not always fp16.
120
+ _MLX_KV_BITS = {"f16": None, "q8_0": 8, "q4_0": 4}
121
+ _MLX_KV_BYTES = {"f16": 2.0, "q8_0": 8 / 8 + 2 * 2 / 64, "q4_0": 4 / 8 + 2 * 2 / 64}
122
+
123
+
124
+ def _worker_argv(model: str, ctx: int, margin: float, overhead: float, *,
125
+ preflight: bool = False, kv_quant: str = "f16") -> list[str]:
126
+ argv = ["-m", WORKER_MODULE, model, str(ctx),
127
+ "--margin", str(margin), "--overhead", str(overhead)]
128
+ if preflight:
129
+ argv.append("--preflight")
130
+ bits = _MLX_KV_BITS[kv_quant]
131
+ if bits is not None:
132
+ argv += ["--kv-bits", str(bits)]
133
+ return argv
134
+
135
+
136
+ def characterize(model: str, *, progress: bool = False, kv_quant: str = "f16") -> dict:
137
+ """Measure *model*'s safe context ceiling on this Mac — the thin path.
138
+
139
+ Pure wiring: ARA owns the methodology in the engine-agnostic ``contracts.driver`` (the
140
+ antidote to an Apple-shaped abstraction); this adapter only supplies the Apple specifics —
141
+ the isolated ``apple`` env, wmx's self-vetoing ``measure_one`` worker, the budget params,
142
+ and the schedule. ARA never imports wmx in-process. Crash-safety is layered: the driver
143
+ gates each rung (L1 ``plan_next`` + L2 actual-footprint check), the engine refuses-before-
144
+ load (L4) and a watchdog aborts mid-probe (L5). Returns ``{model, safe_context, points}``.
145
+
146
+ ``progress`` is accepted for interface symmetry with the cpu backend but has no effect
147
+ here: the HF download bar already ran in-process during the pre-fetch step.
148
+ """
149
+ margin, overhead = _budget_params()
150
+ return driver.characterize(
151
+ model,
152
+ preflight=lambda m: engine_env.run_worker(
153
+ "apple", _worker_argv(m, 0, margin, overhead, preflight=True, kv_quant=kv_quant)),
154
+ measure=lambda m, ctx: engine_env.run_worker(
155
+ "apple", _worker_argv(m, ctx, margin, overhead, kv_quant=kv_quant)),
156
+ schedule=RAMP_SCHEDULE,
157
+ kv_dtype_bytes=_MLX_KV_BYTES[kv_quant], # decode-ceiling estimate reflects the cache type
158
+ )
159
+
160
+
161
+ DEFAULT_MAX_TOKENS = 256
162
+
163
+
164
+ def generate(model, prompt, *, max_context, max_tokens=DEFAULT_MAX_TOKENS,
165
+ kv_quant: str = "f16") -> dict:
166
+ """One-shot MLX completion, governed: max_context is the characterized safe ceiling, so the
167
+ worker generates under the wall. Out-of-process in the isolated `apple` env via wmx-suite's
168
+ generate worker; the prompt goes over stdin, never argv. ``kv_quant`` (default ``"f16"``)
169
+ should match how *model* was characterized. Returns {context, completion} or a refusal
170
+ {refused, reason}. ARA never imports MLX in-process."""
171
+ margin, overhead = _budget_params()
172
+ argv = ["-m", "wmx_suite.generate", model, str(max_context),
173
+ "--margin", str(margin), "--overhead", str(overhead),
174
+ "--max-tokens", str(max_tokens)]
175
+ bits = _MLX_KV_BITS[kv_quant]
176
+ if bits is not None:
177
+ argv += ["--kv-bits", str(bits)]
178
+ return engine_env.run_worker("apple", argv, input=prompt)
ara/backends/coral.py ADDED
@@ -0,0 +1,10 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2026 Will Sarg
3
+ """Google Coral / Edge TPU — STUB (no implementation yet).
4
+
5
+ Contract class: **graph-fit** (not a context ramp). A fixed-function edge accelerator
6
+ (USB/PCIe/SoM) that runs only INT8 TFLite models compiled by the Edge TPU compiler;
7
+ assessment is "does this compiled graph fit the device's on-chip SRAM + its model
8
+ budget," with overflow spilling to host. Closest neighbour to the MCU/TinyML class.
9
+ Wall source: Edge TPU on-chip memory (+ host RAM for the runtime).
10
+ """