@meridiona/meridian-darwin-arm64 1.60.0 → 1.61.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/bin/meridian +0 -0
- package/package.json +1 -1
- package/services/agents/pm_worklog_update/config.py +1 -1
- package/services/agents/server.py +29 -3
- package/services/pyproject.toml +1 -1
- package/ui.tar.gz +0 -0
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.
|
|
1
|
+
1.61.1
|
package/bin/meridian
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@meridiona/meridian-darwin-arm64",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.61.1",
|
|
4
4
|
"description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
|
|
5
5
|
"homepage": "https://github.com/Meridiona/meridian",
|
|
6
6
|
"repository": {
|
|
@@ -32,7 +32,7 @@ MLX_SERVER_MODEL = os.environ.get("MLX_SERVER_MODEL", "qwen3.5-9b-instruct")
|
|
|
32
32
|
# Token caps. The MLX model exposes 128-262K context — a single Synthesise
|
|
33
33
|
# call comfortably swallows even the heaviest hour of work.
|
|
34
34
|
PM_WORKLOG_SYNTH_MAX_TOKENS = int(os.environ.get("PM_WORKLOG_SYNTH_MAX_TOKENS", "8000"))
|
|
35
|
-
PM_WORKLOG_REQUEST_TIMEOUT_S = int(os.environ.get("PM_WORKLOG_REQUEST_TIMEOUT_S", "
|
|
35
|
+
PM_WORKLOG_REQUEST_TIMEOUT_S = int(os.environ.get("PM_WORKLOG_REQUEST_TIMEOUT_S", "900"))
|
|
36
36
|
|
|
37
37
|
# Temperature tuned for each step. Lower = more deterministic.
|
|
38
38
|
PM_WORKLOG_TEMP_COLLECT = 0.0
|
|
@@ -62,6 +62,25 @@ async def _idle_evictor(mlx_module: Any) -> None:
|
|
|
62
62
|
log.warning("server: idle-evictor error: %s", exc)
|
|
63
63
|
|
|
64
64
|
|
|
65
|
+
def _model_sem() -> "asyncio.Semaphore":
|
|
66
|
+
"""Return the process-global single-slot model semaphore.
|
|
67
|
+
|
|
68
|
+
Created once in _lifespan and stored in _app_state. Every endpoint that
|
|
69
|
+
runs a model inference acquires this before calling run_in_threadpool so
|
|
70
|
+
that classify, synthesise_worklog, and summarise never compete on the GPU.
|
|
71
|
+
The synthesise path is indirectly serialised: /synthesise_worklog itself
|
|
72
|
+
does NOT hold the semaphore (agno calls /v1/chat/completions internally),
|
|
73
|
+
so /v1/chat/completions acquires it instead — no nested acquisition,
|
|
74
|
+
no deadlock.
|
|
75
|
+
"""
|
|
76
|
+
import asyncio
|
|
77
|
+
sem = _app_state.get("model_sem")
|
|
78
|
+
if sem is None: # fallback if called before lifespan (e.g. tests)
|
|
79
|
+
sem = asyncio.Semaphore(1)
|
|
80
|
+
_app_state["model_sem"] = sem
|
|
81
|
+
return sem
|
|
82
|
+
|
|
83
|
+
|
|
65
84
|
@asynccontextmanager
|
|
66
85
|
async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
|
|
67
86
|
import asyncio
|
|
@@ -69,6 +88,7 @@ async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
|
|
|
69
88
|
import agents.run_task_linker_mlx as _mlx
|
|
70
89
|
_app_state["mlx_module"] = _mlx
|
|
71
90
|
_app_state["loaded_at"] = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
91
|
+
_app_state["model_sem"] = asyncio.Semaphore(1)
|
|
72
92
|
from agents.llm_selector import APPLE_INTELLIGENCE_ID
|
|
73
93
|
evictor: "asyncio.Task | None" = None
|
|
74
94
|
if _mlx._resolve_model_id() == APPLE_INTELLIGENCE_ID:
|
|
@@ -332,7 +352,8 @@ async def classify_sessions(req: ClassifySessionsRequest) -> dict:
|
|
|
332
352
|
if _tok is not None:
|
|
333
353
|
_otel_context.detach(_tok)
|
|
334
354
|
|
|
335
|
-
|
|
355
|
+
async with _model_sem():
|
|
356
|
+
results = await run_in_threadpool(_classify_all)
|
|
336
357
|
return {"results": results}
|
|
337
358
|
|
|
338
359
|
|
|
@@ -489,7 +510,8 @@ async def openai_chat_completions(req: _OAIChatRequest) -> dict:
|
|
|
489
510
|
|
|
490
511
|
t0 = _time.time()
|
|
491
512
|
try:
|
|
492
|
-
|
|
513
|
+
async with _model_sem():
|
|
514
|
+
text = await run_in_threadpool(_generate)
|
|
493
515
|
except Exception as exc: # noqa: BLE001
|
|
494
516
|
log.warning("openai_chat_completions: inference error: %s", exc)
|
|
495
517
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
|
@@ -618,7 +640,8 @@ async def summarise(req: _SummariseRequest) -> _SummariseResponse:
|
|
|
618
640
|
)
|
|
619
641
|
|
|
620
642
|
try:
|
|
621
|
-
|
|
643
|
+
async with _model_sem():
|
|
644
|
+
raw = await run_in_threadpool(_generate)
|
|
622
645
|
obj = _SummarySchema.model_validate_json(raw)
|
|
623
646
|
except Exception as exc: # noqa: BLE001
|
|
624
647
|
log.warning("summarise: inference/parse error: %s", exc)
|
|
@@ -832,6 +855,9 @@ async def synthesise_worklog(req: _SynthWorklogRequest) -> dict:
|
|
|
832
855
|
except Exception as exc: # noqa: BLE001 — never crash the shared server
|
|
833
856
|
last_detail = f"agent run raised {type(exc).__name__}: {exc}"
|
|
834
857
|
log.warning("synthesise_worklog: attempt %d %s", attempt, last_detail)
|
|
858
|
+
if attempt < 3:
|
|
859
|
+
import time as _t
|
|
860
|
+
_t.sleep(5 * attempt) # 5s, 10s between retries
|
|
835
861
|
continue
|
|
836
862
|
raw = getattr(response, "content", response)
|
|
837
863
|
if raw is None:
|
package/services/pyproject.toml
CHANGED
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "meridian-agents"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.61.1"
|
|
8
8
|
description = "Meridian agents — MLX classifier server and Jira worklog synthesis for meridian.db"
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
authors = [{ name = "Meridiona" }]
|
package/ui.tar.gz
CHANGED
|
Binary file
|