@meridiona/meridian-darwin-arm64 1.60.0 → 1.61.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 1.60.0
1
+ 1.61.1
package/bin/meridian CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meridiona/meridian-darwin-arm64",
3
- "version": "1.60.0",
3
+ "version": "1.61.1",
4
4
  "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
5
5
  "homepage": "https://github.com/Meridiona/meridian",
6
6
  "repository": {
@@ -32,7 +32,7 @@ MLX_SERVER_MODEL = os.environ.get("MLX_SERVER_MODEL", "qwen3.5-9b-instruct")
32
32
  # Token caps. The MLX model exposes 128-262K context — a single Synthesise
33
33
  # call comfortably swallows even the heaviest hour of work.
34
34
  PM_WORKLOG_SYNTH_MAX_TOKENS = int(os.environ.get("PM_WORKLOG_SYNTH_MAX_TOKENS", "8000"))
35
- PM_WORKLOG_REQUEST_TIMEOUT_S = int(os.environ.get("PM_WORKLOG_REQUEST_TIMEOUT_S", "300"))
35
+ PM_WORKLOG_REQUEST_TIMEOUT_S = int(os.environ.get("PM_WORKLOG_REQUEST_TIMEOUT_S", "900"))
36
36
 
37
37
  # Temperature tuned for each step. Lower = more deterministic.
38
38
  PM_WORKLOG_TEMP_COLLECT = 0.0
@@ -62,6 +62,25 @@ async def _idle_evictor(mlx_module: Any) -> None:
62
62
  log.warning("server: idle-evictor error: %s", exc)
63
63
 
64
64
 
65
+ def _model_sem() -> "asyncio.Semaphore":
66
+ """Return the process-global single-slot model semaphore.
67
+
68
+ Created once in _lifespan and stored in _app_state. Every endpoint that
69
+ runs a model inference acquires this before calling run_in_threadpool so
70
+ that classify, synthesise_worklog, and summarise never compete on the GPU.
71
+ The synthesise path is indirectly serialised: /synthesise_worklog itself
72
+ does NOT hold the semaphore (agno calls /v1/chat/completions internally),
73
+ so /v1/chat/completions acquires it instead — no nested acquisition,
74
+ no deadlock.
75
+ """
76
+ import asyncio
77
+ sem = _app_state.get("model_sem")
78
+ if sem is None: # fallback if called before lifespan (e.g. tests)
79
+ sem = asyncio.Semaphore(1)
80
+ _app_state["model_sem"] = sem
81
+ return sem
82
+
83
+
65
84
  @asynccontextmanager
66
85
  async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
67
86
  import asyncio
@@ -69,6 +88,7 @@ async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
69
88
  import agents.run_task_linker_mlx as _mlx
70
89
  _app_state["mlx_module"] = _mlx
71
90
  _app_state["loaded_at"] = datetime.datetime.now(datetime.timezone.utc).isoformat()
91
+ _app_state["model_sem"] = asyncio.Semaphore(1)
72
92
  from agents.llm_selector import APPLE_INTELLIGENCE_ID
73
93
  evictor: "asyncio.Task | None" = None
74
94
  if _mlx._resolve_model_id() == APPLE_INTELLIGENCE_ID:
@@ -332,7 +352,8 @@ async def classify_sessions(req: ClassifySessionsRequest) -> dict:
332
352
  if _tok is not None:
333
353
  _otel_context.detach(_tok)
334
354
 
335
- results = await run_in_threadpool(_classify_all)
355
+ async with _model_sem():
356
+ results = await run_in_threadpool(_classify_all)
336
357
  return {"results": results}
337
358
 
338
359
 
@@ -489,7 +510,8 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
489
510
 
490
511
  t0 = _time.time()
491
512
  try:
492
- text = await run_in_threadpool(_generate)
513
+ async with _model_sem():
514
+ text = await run_in_threadpool(_generate)
493
515
  except Exception as exc: # noqa: BLE001
494
516
  log.warning("openai_chat_completions: inference error: %s", exc)
495
517
  raise HTTPException(status_code=500, detail=str(exc)) from exc
@@ -618,7 +640,8 @@ async def summarise(req: _SummariseRequest) -> _SummariseResponse:
618
640
  )
619
641
 
620
642
  try:
621
- raw = await run_in_threadpool(_generate)
643
+ async with _model_sem():
644
+ raw = await run_in_threadpool(_generate)
622
645
  obj = _SummarySchema.model_validate_json(raw)
623
646
  except Exception as exc: # noqa: BLE001
624
647
  log.warning("summarise: inference/parse error: %s", exc)
@@ -832,6 +855,9 @@ async def synthesise_worklog(req: _SynthWorklogRequest) -> dict:
832
855
  except Exception as exc: # noqa: BLE001 — never crash the shared server
833
856
  last_detail = f"agent run raised {type(exc).__name__}: {exc}"
834
857
  log.warning("synthesise_worklog: attempt %d %s", attempt, last_detail)
858
+ if attempt < 3:
859
+ import time as _t
860
+ _t.sleep(5 * attempt) # 5s, 10s between retries
835
861
  continue
836
862
  raw = getattr(response, "content", response)
837
863
  if raw is None:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "meridian-agents"
7
- version = "1.60.0"
7
+ version = "1.61.1"
8
8
  description = "Meridian agents — MLX classifier server and Jira worklog synthesis for meridian.db"
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "Meridiona" }]
package/ui.tar.gz CHANGED
Binary file