@meridiona/meridian-darwin-arm64 1.24.0 → 1.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 1.24.0
1
+ 1.24.2
package/bin/meridian CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meridiona/meridian-darwin-arm64",
3
- "version": "1.24.0",
3
+ "version": "1.24.2",
4
4
  "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
5
5
  "homepage": "https://github.com/Meridiona/meridian",
6
6
  "repository": {
@@ -523,9 +523,17 @@ fi
523
523
  info "Installing screenpipe launchd agent…"
524
524
  bash "${APP_ROOT}/scripts/install-screenpipe-daemon.sh" || warn "screenpipe agent install failed"
525
525
 
526
- # MLX: skip restart + model-load wait when server was already healthy and the
527
- # Python services/venv didn't change saves ~9 s on every non-Python update.
528
- if [[ "${_mlx_was_healthy}" -eq 1 && "${_venv_changed}" -eq 0 ]]; then
526
+ # MLX: skip restart + model-load wait when server was already healthy and
527
+ # neither the venv nor the Python source files changed.
528
+ _PY_SRC_STAMP="${HOME}/.meridian/py-src.sha256"
529
+ _py_src_hash="$(find "${APP_ROOT}/services/agents" -name '*.py' | sort | xargs shasum -a 256 2>/dev/null | shasum -a 256 | cut -d' ' -f1 || true)"
530
+ _py_src_changed=1
531
+ if [[ -f "${_PY_SRC_STAMP}" && "$(cat "${_PY_SRC_STAMP}")" == "${_py_src_hash}" ]]; then
532
+ _py_src_changed=0
533
+ fi
534
+ echo "${_py_src_hash}" > "${_PY_SRC_STAMP}"
535
+
536
+ if [[ "${_mlx_was_healthy}" -eq 1 && "${_venv_changed}" -eq 0 && "${_py_src_changed}" -eq 0 ]]; then
529
537
  ok "Python services unchanged — MLX server kept running"
530
538
  else
531
539
  info "Installing MLX inference server launchd agent…"
@@ -193,22 +193,27 @@ async def classify(req: ClassifyRequest) -> ClassifyResponse:
193
193
  from outlines.inputs import Chat
194
194
  from mlx_lm.sample_utils import make_sampler
195
195
 
196
+ from agents.llm_selector import APPLE_INTELLIGENCE_ID
197
+
196
198
  m = _app_state["mlx_module"]
197
- model = m._get_model()
198
199
  messages = [
199
200
  {"role": "system", "content": m._SYSTEM_PROMPT},
200
201
  {"role": "user", "content": req.input},
201
202
  ]
202
203
  t0 = _time.time()
203
204
  try:
204
- raw = model(
205
- Chat(messages),
206
- output_type=m.SessionClassification,
207
- max_tokens=m._MAX_TOKENS,
208
- sampler=make_sampler(temp=m._TEMPERATURE),
209
- verbose=False,
210
- )
211
- result = m.SessionClassification.model_validate_json(raw)
205
+ if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
206
+ result = m._classify_apple_fm(messages)
207
+ else:
208
+ model = m._get_model()
209
+ raw = model(
210
+ Chat(messages),
211
+ output_type=m.SessionClassification,
212
+ max_tokens=m._MAX_TOKENS,
213
+ sampler=make_sampler(temp=m._TEMPERATURE),
214
+ verbose=False,
215
+ )
216
+ result = m.SessionClassification.model_validate_json(raw)
212
217
  except Exception as exc:
213
218
  log.warning("classify: inference error: %s", exc)
214
219
  raise HTTPException(status_code=500, detail=str(exc)) from exc
@@ -373,6 +378,40 @@ def _flatten_message_content(content: Any) -> str:
373
378
  return str(content)
374
379
 
375
380
 
381
+ # Apple FM context cap: 4096-token combined context window (input + output).
382
+ # Reserve ~1024 tokens for the response; ~3072 for the prompt → ~12 000 chars.
383
+ _APPLE_FM_USER_CHARS = 12_000
384
+
385
+
386
+ def _infer_apple_fm(msgs: list[dict], max_tokens: int) -> str: # noqa: ARG001
387
+ """Infer via Apple Foundation Models from an OpenAI-style messages list.
388
+
389
+ Extracts the last system message and joins all user/assistant turns.
390
+ Raises on failure — callers must handle and return 500.
391
+ """
392
+ import asyncio
393
+ from apple_fm_sdk import LanguageModelSession # type: ignore[import]
394
+
395
+ system = next(
396
+ (m["content"] for m in reversed(msgs) if m.get("role") == "system"), ""
397
+ )
398
+ user_parts = [m["content"] for m in msgs if m.get("role") in ("user", "assistant")]
399
+ user = "\n".join(user_parts)
400
+ if len(user) > _APPLE_FM_USER_CHARS:
401
+ user = user[:_APPLE_FM_USER_CHARS]
402
+
403
+ async def _run() -> str:
404
+ session = LanguageModelSession(instructions=system)
405
+ result = await session.respond(user)
406
+ return result.content if hasattr(result, "content") else str(result)
407
+
408
+ loop = asyncio.new_event_loop()
409
+ try:
410
+ return loop.run_until_complete(_run())
411
+ finally:
412
+ loop.close()
413
+
414
+
376
415
  @app.post("/v1/chat/completions")
377
416
  async def openai_chat_completions(req: _OAIChatRequest) -> dict:
378
417
  """OpenAI ChatCompletions-shaped wrapper around the MLX model.
@@ -409,7 +448,11 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
409
448
  temperature = req.temperature if req.temperature is not None else 0.3
410
449
  max_tokens = req.max_tokens if req.max_tokens else 2048
411
450
 
451
+ from agents.llm_selector import APPLE_INTELLIGENCE_ID
452
+
412
453
  def _generate() -> str:
454
+ if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
455
+ return _infer_apple_fm(msgs, max_tokens)
413
456
  model = m._get_model()
414
457
  return model(
415
458
  Chat(msgs),
@@ -501,14 +544,45 @@ async def summarise(req: _SummariseRequest) -> _SummariseResponse:
501
544
  if m is None:
502
545
  raise HTTPException(status_code=503, detail="MLX model is still loading")
503
546
 
504
- from mlx_lm.sample_utils import make_sampler
505
- from outlines.inputs import Chat
547
+ from agents.llm_selector import APPLE_INTELLIGENCE_ID
506
548
 
507
549
  messages = [
508
550
  {"role": "system", "content": req.system or _SUMMARISE_DEFAULT_SYSTEM},
509
551
  {"role": "user", "content": req.transcript},
510
552
  ]
511
553
 
554
+ if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
555
+ # outlines FSM decoding is incompatible with Foundation Models.
556
+ # Ask Apple FM for JSON directly; strip fences and retry once on parse error.
557
+ _JSON_HINT = (
558
+ "\n\nRespond ONLY with a JSON object — no markdown, no explanation: "
559
+ '{"summary": "<string>", "blockers": ["<string>", ...]}'
560
+ )
561
+
562
+ def _generate_fm() -> _SummarySchema:
563
+ fm_msgs = [
564
+ {"role": "system", "content": messages[0]["content"] + _JSON_HINT},
565
+ {"role": "user", "content": messages[1]["content"]},
566
+ ]
567
+ raw = _infer_apple_fm(fm_msgs, req.max_tokens)
568
+ try:
569
+ return _SummarySchema.model_validate_json(raw)
570
+ except Exception:
571
+ stripped = raw.strip().removeprefix("```json").removeprefix("```").removesuffix("```").strip()
572
+ return _SummarySchema.model_validate_json(stripped)
573
+
574
+ from fastapi.concurrency import run_in_threadpool as _rtp
575
+ try:
576
+ obj = await _rtp(_generate_fm)
577
+ except Exception as exc: # noqa: BLE001
578
+ log.warning("summarise(apple_fm): parse error: %s", exc)
579
+ raise HTTPException(status_code=500, detail=str(exc)) from exc
580
+ log.info("summarise(apple_fm): out_chars=%d blockers=%d", len(obj.summary), len(obj.blockers))
581
+ return _SummariseResponse(summary=obj.summary.strip(), blockers=obj.blockers)
582
+
583
+ from mlx_lm.sample_utils import make_sampler
584
+ from outlines.inputs import Chat
585
+
512
586
  def _generate() -> str:
513
587
  model = m._get_model()
514
588
  return model(
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "meridian-agents"
7
- version = "1.24.0"
7
+ version = "1.24.2"
8
8
  description = "Meridian agents — hermes task linking and Jira progress updates for meridian.db"
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "Meridiona" }]
package/ui.tar.gz ADDED
Binary file