npm - @meridiona/meridian-darwin-arm64 - Versions diffs - 1.22.0 → 1.23.0 - Mend

@meridiona/meridian-darwin-arm64 1.22.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/VERSION +1 -1
package/bin/meridian +0 -0
package/package.json +1 -1
package/scripts/install-from-bundle.sh +5 -1
package/scripts/meridian-cli.sh +1 -1
package/services/agents/llm_selector.py +26 -9
package/services/agents/run_task_linker_mlx.py +95 -20
package/services/agents/server.py +15 -8
package/services/pyproject.toml +4 -2
package/services/scripts/install-claude-hook.sh +12 -16
package/ui.tar.gz +0 -0

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.22.0
1	+ 1.23.0

package/bin/meridian CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@meridiona/meridian-darwin-arm64",
-  "version": "1.22.0",
+  "version": "1.23.0",
   "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
   "homepage": "https://github.com/Meridiona/meridian",
   "repository": {

package/scripts/install-from-bundle.sh CHANGED Viewed

@@ -319,10 +319,14 @@ if [[ -f "${VENV_TARBALL}" ]]; then
     fi
 else
     # Dev / source install — no pre-built tarball. Resolve from uv.lock.
-    info "Installing Python + MLX deps (mlx-lm/outlines/fastapi; first run may download a few hundred MB)…"
+    # Both extras: mlx (classifier + server) AND pm_worklog_update (agno) — the
+    # one MLX server process serves /classify_sessions AND /synthesise_worklog,
+    # so the venv needs agno or worklog synthesis 500s with ModuleNotFoundError.
+    info "Installing Python + MLX deps (mlx-lm/outlines/fastapi/agno; first run may download a few hundred MB)…"
     if "${UV_BIN}" sync \
             --project "${APP_ROOT}/services" \
             --extra mlx \
+            --extra pm_worklog_update \
             --frozen \
             --python "${PYTHON_BIN}"; then
         ok "Python services ready ($(${VENV}/bin/python --version 2>&1))"

package/scripts/meridian-cli.sh CHANGED Viewed

@@ -483,7 +483,7 @@ case "$CMD" in
     uninstall)        cmd_uninstall ;;
     permissions)      cmd_permissions ;;
     version|--version|-v) cat "${REPO_ROOT}/VERSION" 2>/dev/null || echo "unknown" ;;
-    worklog-status|pm-worklog) cmd_daemon_passthrough "$CMD" "$@" ;;
+    worklog-status|pm-worklog|coding-agent-hook|coding-agent-summarise|coding-agent-classify) cmd_daemon_passthrough "$CMD" "$@" ;;
     --help|-h|help|"") cmd_help ;;
     *) err "unknown command: ${CMD}"; echo; cmd_help; exit 1 ;;
 esac

package/services/agents/llm_selector.py CHANGED Viewed

@@ -274,6 +274,9 @@ class LocalModelEndpoint:
 _MANAGED_SERVER_PORT = 8765
 _MANAGED_SERVER_PID_FILE = Path.home() / ".meridian" / "mlx_lm_server.pid"
+# Sentinel returned by select_mlx_model_id() when Apple Intelligence is chosen.
+APPLE_INTELLIGENCE_ID = "apple-intelligence"
 def _metal_headroom_gb() -> tuple[float, str]:
     """Primary memory signal — headroom within Metal's recommended working set.
@@ -890,6 +893,9 @@ def select_mlx_model_id(
             span.set_attribute("llm.selected_model", preferred_hf_id or "")
             return preferred_hf_id
+        macos_major = int(platform.mac_ver()[0].split(".")[0] or "0")
+        apple_intelligence = macos_major >= 26
         try:
             snap = probe_compute()
         except Exception as exc:  # noqa: BLE001
@@ -928,13 +934,24 @@ def select_mlx_model_id(
             return preferred_hf_id
         # 2. Largest catalog model that BOTH fits the budget AND is already in
-        #    the HF cache. Gating on the cache keeps "dynamic" meaning "best
-        #    among what's present" — never a surprise multi-GB download (or an
-        #    offline load failure that would kill server startup) on exactly the
-        #    constrained machines this degradation path targets. The `budget`
-        #    here is already thermal-capped, matching _select_mlx_entry.
+        #    the HF cache. Apple Intelligence (apple_fm, min_ram=0) is always
+        #    "available" on supported machines — no HF cache check needed.
+        #    Gating MLX entries on the cache keeps "dynamic" meaning "best
+        #    among what's present" — never a surprise multi-GB download on
+        #    constrained machines. The `budget` here is already thermal-capped.
         for model_id, backend, min_ram, quality, hf_id in _MODELS:
-            if backend != "mlx" or min_ram > budget:
+            if backend == "apple_fm":
+                if apple_intelligence:
+                    span.set_attribute("llm.reason", "apple_intelligence_catalog")
+                    span.set_attribute("llm.selected_model", APPLE_INTELLIGENCE_ID)
+                    log.info(
+                        "llm_selector: MLX in-process fallback=Apple Intelligence "
+                        "(no cached MLX model fits budget=%.1f GB)",
+                        budget,
+                    )
+                    return APPLE_INTELLIGENCE_ID
+                continue
+            if min_ram > budget:
                 continue
             if not _hf_model_cached(hf_id):
                 log.debug(
@@ -951,9 +968,9 @@ def select_mlx_model_id(
             )
             return hf_id
-        # 3. Nothing cached fits the budget — best effort with the preferred id.
-        #    (Loading preferred-when-absent is the pre-existing single-model
-        #    behaviour, not a regression introduced by dynamic selection.)
+        # 3. Nothing cached fits and Apple Intelligence is unavailable (macOS < 26) —
+        #    best effort with the preferred id. (This preserves the pre-existing
+        #    single-model behaviour on older macOS; the load may trigger a download.)
         span.set_attribute("llm.reason", "nothing_cached_fits_use_preferred")
         span.set_attribute("llm.selected_model", preferred_hf_id or "")
         log.warning(

package/services/agents/run_task_linker_mlx.py CHANGED Viewed

@@ -87,15 +87,20 @@ def _resolve_model_id() -> str:
         return _MLX_MODEL_ID
     try:
-        from agents.llm_selector import resolve_model, select_mlx_model_id
+        from agents.llm_selector import (
+            APPLE_INTELLIGENCE_ID, resolve_model, select_mlx_model_id,
+        )
         entry = resolve_model(_DEFAULT_MLX_MODEL_ID)
         preferred_min_ram = (
             entry["min_ram_gb"] if entry else _DEFAULT_MLX_MODEL_MIN_RAM_GB
         )
-        _MLX_MODEL_ID = select_mlx_model_id(
+        selected = select_mlx_model_id(
             preferred_hf_id=_DEFAULT_MLX_MODEL_ID,
             preferred_min_ram_gb=preferred_min_ram,
-        ) or _DEFAULT_MLX_MODEL_ID
+        )
+        # Propagate the Apple Intelligence sentinel as-is; fall back to the
+        # default MLX model only when nothing at all was selected (None).
+        _MLX_MODEL_ID = selected if selected is not None else _DEFAULT_MLX_MODEL_ID
     except Exception as exc:  # noqa: BLE001
         log.warning(
             "run_task_linker_mlx: dynamic model selection failed (%s) — "
@@ -267,6 +272,44 @@ def _get_model() -> Any:
     return outlines_model
+def _classify_apple_fm(messages: list[dict[str, str]]) -> "SessionClassification":
+    """Classify via Apple Foundation Models (non-FSM, JSON parsing with one retry)."""
+    import asyncio
+    from apple_fm_sdk import LanguageModelSession  # type: ignore[import]
+    system = next((m["content"] for m in messages if m["role"] == "system"), "")
+    user   = next((m["content"] for m in messages if m["role"] == "user"),   "")
+    user_with_hint = (
+        user
+        + "\n\nRespond with a JSON object matching the SessionClassification schema. "
+        "Output only valid JSON — no markdown fences, no extra text."
+    )
+    async def _run(prompt: str) -> str:
+        session = LanguageModelSession(instructions=system)
+        r = await session.respond(prompt)
+        return getattr(r, "content", r)
+    raw = asyncio.run(_run(user_with_hint))
+    try:
+        text = raw.strip()
+        if text.startswith("```"):
+            text = text.split("\n", 1)[1].rsplit("```", 1)[0].strip()
+        return SessionClassification.model_validate_json(text)
+    except Exception:
+        # One retry: ask the model to fix the JSON it produced.
+        fix_prompt = (
+            "The JSON you produced was invalid. Fix it and return only valid JSON:\n"
+            + raw
+        )
+        raw2 = asyncio.run(_run(fix_prompt))
+        text2 = raw2.strip()
+        if text2.startswith("```"):
+            text2 = text2.split("\n", 1)[1].rsplit("```", 1)[0].strip()
+        return SessionClassification.model_validate_json(text2)
 # ---------------------------------------------------------------------------
 # DB helpers
 # ---------------------------------------------------------------------------
@@ -424,25 +467,39 @@ def _classify_one(
     # ── llm_inference ─────────────────────────────────────────────────────────
     t0 = time.time()
     with tracer.start_as_current_span("llm_inference") as llm_span:
-        llm_span.set_attribute("model", _resolve_model_id())
+        model_id = _resolve_model_id()
+        llm_span.set_attribute("model", model_id)
         llm_span.set_attribute("max_tokens", _MAX_TOKENS)
         llm_span.set_attribute("temperature", _TEMPERATURE)
         llm_span.add_event("inference_started", {"session_id": session_id})
+        # Apple Intelligence path — no in-process MLX model; JSON parsing with retry.
         try:
-            from mlx_lm.sample_utils import make_sampler
-            from outlines.inputs import Chat
-            model = _get_model()
-            raw = model(
-                Chat(messages),
-                output_type=SessionClassification,
-                max_tokens=_MAX_TOKENS,
-                sampler=make_sampler(temp=_TEMPERATURE),
-                verbose=False,
-            )
+            from agents.llm_selector import APPLE_INTELLIGENCE_ID
+            _use_apple_fm = model_id == APPLE_INTELLIGENCE_ID
+        except Exception:
+            _use_apple_fm = False
+        try:
+            if _use_apple_fm:
+                result = _classify_apple_fm(messages)
+                raw = result.model_dump_json()
+            else:
+                from mlx_lm.sample_utils import make_sampler
+                from outlines.inputs import Chat
+                model = _get_model()
+                raw = model(
+                    Chat(messages),
+                    output_type=SessionClassification,
+                    max_tokens=_MAX_TOKENS,
+                    sampler=make_sampler(temp=_TEMPERATURE),
+                    verbose=False,
+                )
         except Exception as exc:
             elapsed = time.time() - t0
-            llm_span.set_attribute("outcome", "mlx_error")
+            outcome = "apple_fm_error" if _use_apple_fm else "mlx_error"
+            llm_span.set_attribute("outcome", outcome)
             llm_span.set_attribute("elapsed_s", elapsed)
             llm_span.set_status(StatusCode.ERROR, str(exc))
             llm_span.add_event("inference_error", {
@@ -455,11 +512,12 @@ def _classify_one(
                 session_id, exc,
             )
             return _error_result(
-                session_id, f"mlx inference error: {exc}", elapsed, "mlx_error"
+                session_id, f"inference error: {exc}", elapsed, outcome
             )
         elapsed = time.time() - t0
-        llm_span.set_attribute("outcome", "mlx_direct")
+        outcome = "apple_fm" if _use_apple_fm else "mlx_direct"
+        llm_span.set_attribute("outcome", outcome)
         llm_span.set_attribute("elapsed_s", elapsed)
         llm_span.set_attribute("response_chars", len(raw))
         llm_span.add_event("inference_complete", {
@@ -480,7 +538,9 @@ def _classify_one(
             "preview": raw[:500],
         })
-        # outlines guarantees schema validity; model_validate_json rarely fails.
+        # Both paths converge on a JSON string in `raw`; parse to SessionClassification.
+        # Apple FM already validated once inside _classify_apple_fm; re-parsing from
+        # model_dump_json() is a no-op that keeps the two paths uniform.
         try:
             result = SessionClassification.model_validate_json(raw)
         except Exception as exc:
@@ -540,7 +600,7 @@ def _classify_one(
         "category_explanation": result.category_explanation,
         "session_type":         result.session_type,
         "reasoning":            result.reasoning,
-        "method":              "mlx_direct",
+        "method":              outcome,
         "dimensions":          result.dimensions,
         "session_summary":     result.session_summary,
         "elapsed_s":           elapsed,
@@ -632,6 +692,21 @@ def main() -> None:
         log.error("run_task_linker_mlx: meridian_db path is empty")
         sys.exit(1)
+    # Canonicalize and restrict to ~/.meridian/ to prevent path traversal.
+    try:
+        canonical = Path(db_path).expanduser().resolve()
+    except (OSError, ValueError) as exc:
+        log.error("run_task_linker_mlx: invalid db path: %s", exc)
+        sys.exit(1)
+    allowed_root = Path.home() / ".meridian"
+    if not str(canonical).startswith(str(allowed_root) + "/") and canonical != allowed_root:
+        log.error(
+            "run_task_linker_mlx: db path %s is outside allowed directory %s",
+            canonical, allowed_root,
+        )
+        sys.exit(1)
+    db_path = str(canonical)
     if not Path(db_path).exists():
         log.error("run_task_linker_mlx: db file does not exist: %s", db_path)
         sys.exit(1)

package/services/agents/server.py CHANGED Viewed

@@ -40,7 +40,7 @@ os.environ.setdefault("HERMES_HOME", str(_SERVICES_DIR / ".hermes"))
 import opentelemetry.context as _otel_context
 from fastapi import FastAPI, HTTPException
 from opentelemetry import trace
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from agents import observability
@@ -59,12 +59,16 @@ _app_state: dict[str, Any] = {}
 async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
     if _app_state.get("backend") == "mlx":
         import datetime
-        log.info("server: loading MLX model at startup…")
         import agents.run_task_linker_mlx as _mlx
-        _mlx._get_model()
         _app_state["mlx_module"] = _mlx
         _app_state["loaded_at"] = datetime.datetime.now(datetime.timezone.utc).isoformat()
-        log.info("server: MLX model ready")
+        from agents.llm_selector import APPLE_INTELLIGENCE_ID
+        if _mlx._resolve_model_id() == APPLE_INTELLIGENCE_ID:
+            log.info("server: 8 GB machine — Apple Intelligence backend, no MLX model to pre-load")
+        else:
+            log.info("server: loading MLX model at startup…")
+            _mlx._get_model()
+            log.info("server: MLX model ready")
     yield
@@ -148,8 +152,11 @@ async def chat(req: ChatRequest) -> ChatResponse:
 # MLX backend — direct in-process inference, model pre-loaded at startup
 # ---------------------------------------------------------------------------
+_MAX_INPUT_CHARS = 128_000  # ~32k tokens; hard ceiling to prevent resource exhaustion
 class ClassifyRequest(BaseModel):
-    input: str  # fully-formatted user_message string (from build_user_message)
+    input: str = Field(..., max_length=_MAX_INPUT_CHARS)  # fully-formatted user_message string
 class ClassifyResponse(BaseModel):
@@ -341,7 +348,7 @@ class _OAIChatRequest(BaseModel):
     model: str | None = None
     messages: list[_OAIMessage]
     temperature: float | None = None
-    max_tokens: int | None = None
+    max_tokens: int | None = Field(None, ge=1, le=8192)
     top_p: float | None = None
     stop: list[str] | str | None = None
     stream: bool = False
@@ -452,9 +459,9 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
 class _SummariseRequest(BaseModel):
-    transcript: str
+    transcript: str = Field(..., max_length=_MAX_INPUT_CHARS)
     system: str | None = None
-    max_tokens: int = 2048
+    max_tokens: int = Field(2048, ge=1, le=8192)
     temperature: float = 0.2

package/services/pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "meridian-agents"
-version = "1.22.0"
+version = "1.23.0"
 description = "Meridian agents — hermes task linking and Jira progress updates for meridian.db"
 requires-python = ">=3.11"
 authors = [{ name = "Meridiona" }]
@@ -53,7 +53,9 @@ meridian-server = "agents.server:main"
 [tool.uv]
 # Lock all extras so `uv lock` produces a complete, reproducible uv.lock.
-# install-from-bundle.sh installs the mlx extra via `uv sync --extra mlx --frozen`.
+# The shipped venv installs BOTH server-side extras — `uv sync --extra mlx
+# --extra pm_worklog_update` — because the one MLX server serves
+# /classify_sessions (mlx) AND /synthesise_worklog (agno, pm_worklog_update).
 constraint-dependencies = []
 [tool.setuptools.packages.find]

package/services/scripts/install-claude-hook.sh CHANGED Viewed

@@ -66,36 +66,32 @@ with open(settings_path) as f:
 hooks = settings.setdefault("hooks", {})
 session_end = hooks.setdefault("SessionEnd", [])
-# SessionEnd doesn't support `matcher` — each entry is just a list of
-# hooks. We use an unmatched group whose first command carries our
-# marker as a comment so we can find + replace it later.
 new_entry = {
     "hooks": [
         {
             "type":    "command",
             "command": hook_cmd,
-            # Claude Code reads `timeout` in milliseconds (matching the
-            # existing entries already in this settings.json). 30s ceiling
+            # Claude Code reads `timeout` in milliseconds. 30s ceiling
             # — the hook itself returns in <100 ms.
             "timeout": 30000,
-            # Comment field that Claude Code preserves but doesn't act on
-            # — gives us a robust idempotency / uninstall handle.
-            "_meridian": marker,
         }
     ]
 }
-# Replace any prior meridian entry, leave all others untouched.
+# Replace any prior meridian entry by matching on the command substring
+# "coding-agent-hook". Claude Code strips unknown JSON fields (like the
+# former "_meridian" marker) on every save, so command-string matching
+# is the only reliable idempotency mechanism.
 filtered = []
 removed = 0
 for group in session_end:
-    is_ours = False
-    for h in group.get("hooks", []):
-        if h.get("_meridian") == marker:
-            is_ours = True
-            removed += 1
-            break
-    if not is_ours:
+    is_ours = any(
+        "coding-agent-hook" in h.get("command", "")
+        for h in group.get("hooks", [])
+    )
+    if is_ours:
+        removed += 1
+    else:
         filtered.append(group)
 filtered.append(new_entry)

package/ui.tar.gz CHANGED Viewed

Binary file