@meridiona/meridian-darwin-arm64 1.24.1 → 1.24.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 1.24.1
1
+ 1.24.3
package/bin/meridian CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meridiona/meridian-darwin-arm64",
3
- "version": "1.24.1",
3
+ "version": "1.24.3",
4
4
  "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
5
5
  "homepage": "https://github.com/Meridiona/meridian",
6
6
  "repository": {
@@ -523,9 +523,17 @@ fi
523
523
  info "Installing screenpipe launchd agent…"
524
524
  bash "${APP_ROOT}/scripts/install-screenpipe-daemon.sh" || warn "screenpipe agent install failed"
525
525
 
526
- # MLX: skip restart + model-load wait when server was already healthy and the
527
- # Python services/venv didn't change saves ~9 s on every non-Python update.
528
- if [[ "${_mlx_was_healthy}" -eq 1 && "${_venv_changed}" -eq 0 ]]; then
526
+ # MLX: skip restart + model-load wait when server was already healthy and
527
+ # neither the venv nor the Python source files changed.
528
+ _PY_SRC_STAMP="${HOME}/.meridian/py-src.sha256"
529
+ _py_src_hash="$(find "${APP_ROOT}/services/agents" -name '*.py' | sort | xargs shasum -a 256 2>/dev/null | shasum -a 256 | cut -d' ' -f1 || true)"
530
+ _py_src_changed=1
531
+ if [[ -f "${_PY_SRC_STAMP}" && "$(cat "${_PY_SRC_STAMP}")" == "${_py_src_hash}" ]]; then
532
+ _py_src_changed=0
533
+ fi
534
+ echo "${_py_src_hash}" > "${_PY_SRC_STAMP}"
535
+
536
+ if [[ "${_mlx_was_healthy}" -eq 1 && "${_venv_changed}" -eq 0 && "${_py_src_changed}" -eq 0 ]]; then
529
537
  ok "Python services unchanged — MLX server kept running"
530
538
  else
531
539
  info "Installing MLX inference server launchd agent…"
@@ -273,18 +273,120 @@ def _get_model() -> Any:
273
273
 
274
274
 
275
275
  # Apple Foundation Models has a 4096-token combined context window (input + output).
276
- # Reserve _MAX_TOKENS (1024) for the response; leave ~3072 tokens for the prompt.
277
- # At ~4 chars/token that is ~12 000 chars. Cap the user message to stay inside it.
278
- _APPLE_FM_USER_CHARS = 11_000
276
+ # The full _SYSTEM_PROMPT is ~19k chars / ~4800 tokens it does NOT fit. Use a
277
+ # compact prompt instead: ~500 tokens for instructions, ~2000 for user, ~500 for output.
278
+ _APPLE_FM_USER_CHARS = 8_000 # ~2000 tokens — user message cap
279
+
280
+ # Compact classifier prompt sized for Apple FM's 4096-token window.
281
+ # Covers the essential decision logic; the full SKILL.md is used for larger models.
282
+ # Schema matches SessionClassification exactly — wrong types cause Pydantic rejection.
283
+ _APPLE_FM_SYSTEM_PROMPT = """\
284
+ You are Meridian's session classifier. Return ONLY a JSON object — no markdown, no extra text.
285
+
286
+ Required schema (all fields mandatory):
287
+ {"task_key": <string or null>, "confidence": <float 0.0-1.0>, "category": <see below>, "category_confidence": <float 0.0-1.0>, "category_explanation": "<one sentence max 300 chars>", "session_type": <see below>, "reasoning": "<concise justification>", "dimensions": {"activity": ["<tag>"], "tool": ["<tag>"]}, "session_summary": "<100-500 char factual past-tense prose>"}
288
+
289
+ category must be exactly one of: coding, code_review, meeting, communication, design, documentation, planning, deployment_devops, research, idle_personal
290
+
291
+ session_type must be exactly one of: task, overhead, untracked
292
+
293
+ Rules:
294
+ - task_key: ONLY copy a key from the supplied candidate list verbatim. null if no list or no clear match. NEVER invent a key.
295
+ - session_type "task": session matches a candidate ticket. session_type "overhead": idle/personal/music/idle_personal → confidence ≥ 0.9. session_type "untracked": real work, no ticket match → confidence 0.65-0.75.
296
+ - confidence: 0.95=certain, 0.80=probable, 0.65=likely, 0.50=uncertain
297
+ - dimensions values must be lists of lowercase snake_case strings
298
+ - session_summary must be factual past tense, cite specific files/tools/actions, minimum 2 sentences"""
299
+
300
+
301
+ _VALID_CATEGORIES = frozenset({
302
+ "coding", "code_review", "meeting", "communication", "design",
303
+ "documentation", "planning", "deployment_devops", "research", "idle_personal",
304
+ })
305
+
306
+
307
+ def _coerce_apple_fm_result(data: dict) -> dict:
308
+ """Fill missing or malformed fields so Pydantic can validate Apple FM output.
309
+
310
+ Apple FM doesn't guarantee all required fields. This function synthesizes
311
+ missing ones from what was returned rather than failing.
312
+ """
313
+ # session_type coercion
314
+ st = str(data.get("session_type", "untracked"))
315
+ if st not in ("task", "overhead", "untracked"):
316
+ st = "overhead" if st in ("idle", "personal") else "untracked"
317
+ data["session_type"] = st
318
+
319
+ # category coercion
320
+ cat = str(data.get("category", ""))
321
+ if cat not in _VALID_CATEGORIES:
322
+ cat = "idle_personal" if st == "overhead" else "coding"
323
+ data["category"] = cat
324
+
325
+ # confidence: clamp to [0, 1]
326
+ try:
327
+ data["confidence"] = max(0.0, min(1.0, float(data.get("confidence", 0.7))))
328
+ except (TypeError, ValueError):
329
+ data["confidence"] = 0.7
330
+
331
+ # category_confidence: derive from confidence if missing
332
+ if "category_confidence" not in data or not isinstance(data["category_confidence"], (int, float)):
333
+ data["category_confidence"] = round(data["confidence"] * 0.9, 2)
334
+ else:
335
+ data["category_confidence"] = max(0.0, min(1.0, float(data["category_confidence"])))
336
+
337
+ # category_explanation: fall back to first sentence of reasoning
338
+ if not data.get("category_explanation"):
339
+ reasoning = str(data.get("reasoning", "No details recorded."))
340
+ data["category_explanation"] = reasoning[:300]
341
+
342
+ # reasoning: ensure it's a non-empty string
343
+ if not data.get("reasoning"):
344
+ data["reasoning"] = "Classified via Apple Foundation Models."
345
+
346
+ # session_summary: must be 100-1000 chars
347
+ summary = str(data.get("session_summary", ""))
348
+ if len(summary) < 100:
349
+ # Pad from reasoning
350
+ reasoning = str(data.get("reasoning", ""))
351
+ summary = (summary + " " + reasoning).strip()
352
+ if len(summary) < 100:
353
+ summary = summary + " The session was processed by Apple Foundation Models."
354
+ data["session_summary"] = summary[:1000]
355
+
356
+ # dimensions: must be dict[str, list[str]]
357
+ dims = data.get("dimensions", {})
358
+ if not isinstance(dims, dict):
359
+ dims = {}
360
+ data["dimensions"] = {
361
+ k: ([str(i) for i in v] if isinstance(v, list) else [str(v)])
362
+ for k, v in dims.items()
363
+ }
364
+
365
+ # task_key: null if session_type is not "task"
366
+ if st != "task":
367
+ data["task_key"] = None
368
+ elif data.get("task_key") is not None:
369
+ data["task_key"] = str(data["task_key"])
370
+
371
+ return data
279
372
 
280
373
 
281
374
  def _classify_apple_fm(messages: list[dict[str, str]]) -> "SessionClassification":
282
- """Classify via Apple Foundation Models (non-FSM, JSON parsing with one retry)."""
375
+ """Classify via Apple Foundation Models (non-FSM, JSON parsing with coercion).
376
+
377
+ Uses a compact system prompt sized for Apple FM's 4096-token context window.
378
+ The full _SYSTEM_PROMPT (~4800 tokens) does not fit; _APPLE_FM_SYSTEM_PROMPT
379
+ covers the essential decision logic in ~500 tokens.
380
+
381
+ Apple FM may omit fields. _coerce_apple_fm_result fills missing required
382
+ fields with sensible defaults before Pydantic validation.
383
+ """
283
384
  import asyncio
284
385
 
285
386
  from apple_fm_sdk import LanguageModelSession # type: ignore[import]
286
387
 
287
- system = next((m["content"] for m in messages if m["role"] == "system"), "")
388
+ # Always use the compact prompt ignore whatever system message the caller sent.
389
+ system = _APPLE_FM_SYSTEM_PROMPT
288
390
  user = next((m["content"] for m in messages if m["role"] == "user"), "")
289
391
 
290
392
  # Truncate to stay within the 4096-token context window.
@@ -297,7 +399,7 @@ def _classify_apple_fm(messages: list[dict[str, str]]) -> "SessionClassification
297
399
 
298
400
  user_with_hint = (
299
401
  user
300
- + "\n\nRespond with a JSON object matching the SessionClassification schema. "
402
+ + "\n\nRespond with a JSON object matching the schema above. "
301
403
  "Output only valid JSON — no markdown fences, no extra text."
302
404
  )
303
405
 
@@ -306,23 +408,25 @@ def _classify_apple_fm(messages: list[dict[str, str]]) -> "SessionClassification
306
408
  r = await session.respond(prompt)
307
409
  return getattr(r, "content", r)
308
410
 
309
- raw = asyncio.run(_run(user_with_hint))
310
- try:
311
- text = raw.strip()
411
+ def _parse(text: str) -> "SessionClassification":
412
+ text = text.strip()
312
413
  if text.startswith("```"):
313
414
  text = text.split("\n", 1)[1].rsplit("```", 1)[0].strip()
314
- return SessionClassification.model_validate_json(text)
415
+ data = json.loads(text)
416
+ return SessionClassification.model_validate(_coerce_apple_fm_result(data))
417
+
418
+ raw = asyncio.run(_run(user_with_hint))
419
+ try:
420
+ return _parse(raw)
315
421
  except Exception:
316
- # One retry: ask the model to fix the JSON it produced.
422
+ # One retry: ask the model to complete any missing fields.
317
423
  fix_prompt = (
318
- "The JSON you produced was invalid. Fix it and return only valid JSON:\n"
319
- + raw
424
+ "Your previous JSON was incomplete it was missing required fields "
425
+ "(category, category_confidence, category_explanation, session_summary). "
426
+ "Return a complete JSON with ALL fields from the schema:\n" + raw
320
427
  )
321
428
  raw2 = asyncio.run(_run(fix_prompt))
322
- text2 = raw2.strip()
323
- if text2.startswith("```"):
324
- text2 = text2.split("\n", 1)[1].rsplit("```", 1)[0].strip()
325
- return SessionClassification.model_validate_json(text2)
429
+ return _parse(raw2)
326
430
 
327
431
 
328
432
  # ---------------------------------------------------------------------------
@@ -193,22 +193,27 @@ async def classify(req: ClassifyRequest) -> ClassifyResponse:
193
193
  from outlines.inputs import Chat
194
194
  from mlx_lm.sample_utils import make_sampler
195
195
 
196
+ from agents.llm_selector import APPLE_INTELLIGENCE_ID
197
+
196
198
  m = _app_state["mlx_module"]
197
- model = m._get_model()
198
199
  messages = [
199
200
  {"role": "system", "content": m._SYSTEM_PROMPT},
200
201
  {"role": "user", "content": req.input},
201
202
  ]
202
203
  t0 = _time.time()
203
204
  try:
204
- raw = model(
205
- Chat(messages),
206
- output_type=m.SessionClassification,
207
- max_tokens=m._MAX_TOKENS,
208
- sampler=make_sampler(temp=m._TEMPERATURE),
209
- verbose=False,
210
- )
211
- result = m.SessionClassification.model_validate_json(raw)
205
+ if m._resolve_model_id() == APPLE_INTELLIGENCE_ID:
206
+ result = m._classify_apple_fm(messages)
207
+ else:
208
+ model = m._get_model()
209
+ raw = model(
210
+ Chat(messages),
211
+ output_type=m.SessionClassification,
212
+ max_tokens=m._MAX_TOKENS,
213
+ sampler=make_sampler(temp=m._TEMPERATURE),
214
+ verbose=False,
215
+ )
216
+ result = m.SessionClassification.model_validate_json(raw)
212
217
  except Exception as exc:
213
218
  log.warning("classify: inference error: %s", exc)
214
219
  raise HTTPException(status_code=500, detail=str(exc)) from exc
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "meridian-agents"
7
- version = "1.24.1"
7
+ version = "1.24.3"
8
8
  description = "Meridian agents — hermes task linking and Jira progress updates for meridian.db"
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "Meridiona" }]