npm - @meridiona/meridian-darwin-arm64 - Versions diffs - 1.24.0 → 1.24.2 - Mend

@meridiona/meridian-darwin-arm64 1.24.0 → 1.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/VERSION +1 -1
package/bin/meridian +0 -0
package/package.json +1 -1
package/scripts/install-from-bundle.sh +11 -3
package/services/agents/server.py +85 -11
package/services/pyproject.toml +1 -1
package/ui.tar.gz +0 -0

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.24.0
1	+ 1.24.2

package/bin/meridian CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@meridiona/meridian-darwin-arm64",
-  "version": "1.24.0",
+  "version": "1.24.2",
   "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
   "homepage": "https://github.com/Meridiona/meridian",
   "repository": {

package/scripts/install-from-bundle.sh CHANGED Viewed

@@ -523,9 +523,17 @@ fi
 info "Installing screenpipe launchd agent…"
 bash "${APP_ROOT}/scripts/install-screenpipe-daemon.sh" || warn "screenpipe agent install failed"
-# MLX: skip restart + model-load wait when server was already healthy and the
-# Python services/venv didn't change — saves ~9 s on every non-Python update.
-if [[ "${_mlx_was_healthy}" -eq 1 && "${_venv_changed}" -eq 0 ]]; then
+# MLX: skip restart + model-load wait when server was already healthy and
+# neither the venv nor the Python source files changed.
+_PY_SRC_STAMP="${HOME}/.meridian/py-src.sha256"
+_py_src_hash="$(find "${APP_ROOT}/services/agents" -name '*.py' | sort | xargs shasum -a 256 2>/dev/null | shasum -a 256 | cut -d' ' -f1 || true)"
+_py_src_changed=1
+if [[ -f "${_PY_SRC_STAMP}" && "$(cat "${_PY_SRC_STAMP}")" == "${_py_src_hash}" ]]; then
+    _py_src_changed=0
+fi
+echo "${_py_src_hash}" > "${_PY_SRC_STAMP}"
+if [[ "${_mlx_was_healthy}" -eq 1 && "${_venv_changed}" -eq 0 && "${_py_src_changed}" -eq 0 ]]; then
     ok "Python services unchanged — MLX server kept running"
 else
     info "Installing MLX inference server launchd agent…"

package/services/agents/server.py CHANGED Viewed

@@ -193,22 +193,27 @@ async def classify(req: ClassifyRequest) -> ClassifyResponse:
     from outlines.inputs import Chat
     from mlx_lm.sample_utils import make_sampler
+    from agents.llm_selector import APPLE_INTELLIGENCE_ID
     m = _app_state["mlx_module"]
-    model = m._get_model()
     messages = [
         {"role": "system", "content": m._SYSTEM_PROMPT},
         {"role": "user",   "content": req.input},
     ]
     t0 = _time.time()
     try:
-        raw = model(
-            Chat(messages),
-            output_type=m.SessionClassification,
-            max_tokens=m._MAX_TOKENS,
-            sampler=make_sampler(temp=m._TEMPERATURE),
-            verbose=False,
-        )
-        result = m.SessionClassification.model_validate_json(raw)
+        if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
+            result = m._classify_apple_fm(messages)
+        else:
+            model = m._get_model()
+            raw = model(
+                Chat(messages),
+                output_type=m.SessionClassification,
+                max_tokens=m._MAX_TOKENS,
+                sampler=make_sampler(temp=m._TEMPERATURE),
+                verbose=False,
+            )
+            result = m.SessionClassification.model_validate_json(raw)
     except Exception as exc:
         log.warning("classify: inference error: %s", exc)
         raise HTTPException(status_code=500, detail=str(exc)) from exc
@@ -373,6 +378,40 @@ def _flatten_message_content(content: Any) -> str:
     return str(content)
+# Apple FM context cap: 4096-token combined context window (input + output).
+# Reserve ~1024 tokens for the response; ~3072 for the prompt → ~12 000 chars.
+_APPLE_FM_USER_CHARS = 12_000
+def _infer_apple_fm(msgs: list[dict], max_tokens: int) -> str:  # noqa: ARG001
+    """Infer via Apple Foundation Models from an OpenAI-style messages list.
+    Extracts the last system message and joins all user/assistant turns.
+    Raises on failure — callers must handle and return 500.
+    """
+    import asyncio
+    from apple_fm_sdk import LanguageModelSession  # type: ignore[import]
+    system = next(
+        (m["content"] for m in reversed(msgs) if m.get("role") == "system"), ""
+    )
+    user_parts = [m["content"] for m in msgs if m.get("role") in ("user", "assistant")]
+    user = "\n".join(user_parts)
+    if len(user) > _APPLE_FM_USER_CHARS:
+        user = user[:_APPLE_FM_USER_CHARS]
+    async def _run() -> str:
+        session = LanguageModelSession(instructions=system)
+        result = await session.respond(user)
+        return result.content if hasattr(result, "content") else str(result)
+    loop = asyncio.new_event_loop()
+    try:
+        return loop.run_until_complete(_run())
+    finally:
+        loop.close()
 @app.post("/v1/chat/completions")
 async def openai_chat_completions(req: _OAIChatRequest) -> dict:
     """OpenAI ChatCompletions-shaped wrapper around the MLX model.
@@ -409,7 +448,11 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
     temperature = req.temperature if req.temperature is not None else 0.3
     max_tokens  = req.max_tokens if req.max_tokens else 2048
+    from agents.llm_selector import APPLE_INTELLIGENCE_ID
     def _generate() -> str:
+        if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
+            return _infer_apple_fm(msgs, max_tokens)
         model = m._get_model()
         return model(
             Chat(msgs),
@@ -501,14 +544,45 @@ async def summarise(req: _SummariseRequest) -> _SummariseResponse:
     if m is None:
         raise HTTPException(status_code=503, detail="MLX model is still loading")
-    from mlx_lm.sample_utils import make_sampler
-    from outlines.inputs import Chat
+    from agents.llm_selector import APPLE_INTELLIGENCE_ID
     messages = [
         {"role": "system", "content": req.system or _SUMMARISE_DEFAULT_SYSTEM},
         {"role": "user", "content": req.transcript},
     ]
+    if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
+        # outlines FSM decoding is incompatible with Foundation Models.
+        # Ask Apple FM for JSON directly; strip fences and retry once on parse error.
+        _JSON_HINT = (
+            "\n\nRespond ONLY with a JSON object — no markdown, no explanation: "
+            '{"summary": "<string>", "blockers": ["<string>", ...]}'
+        )
+        def _generate_fm() -> _SummarySchema:
+            fm_msgs = [
+                {"role": "system", "content": messages[0]["content"] + _JSON_HINT},
+                {"role": "user",   "content": messages[1]["content"]},
+            ]
+            raw = _infer_apple_fm(fm_msgs, req.max_tokens)
+            try:
+                return _SummarySchema.model_validate_json(raw)
+            except Exception:
+                stripped = raw.strip().removeprefix("```json").removeprefix("```").removesuffix("```").strip()
+                return _SummarySchema.model_validate_json(stripped)
+        from fastapi.concurrency import run_in_threadpool as _rtp
+        try:
+            obj = await _rtp(_generate_fm)
+        except Exception as exc:  # noqa: BLE001
+            log.warning("summarise(apple_fm): parse error: %s", exc)
+            raise HTTPException(status_code=500, detail=str(exc)) from exc
+        log.info("summarise(apple_fm): out_chars=%d blockers=%d", len(obj.summary), len(obj.blockers))
+        return _SummariseResponse(summary=obj.summary.strip(), blockers=obj.blockers)
+    from mlx_lm.sample_utils import make_sampler
+    from outlines.inputs import Chat
     def _generate() -> str:
         model = m._get_model()
         return model(

package/services/pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "meridian-agents"
-version = "1.24.0"
+version = "1.24.2"
 description = "Meridian agents — hermes task linking and Jira progress updates for meridian.db"
 requires-python = ">=3.11"
 authors = [{ name = "Meridiona" }]

package/ui.tar.gz ADDED Viewed

Binary file