npm - @meridiona/meridian-darwin-arm64 - Versions diffs - 1.59.0 → 1.60.0 - Mend

@meridiona/meridian-darwin-arm64 1.59.0 → 1.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/VERSION +1 -1
package/bin/meridian +0 -0
package/package.json +1 -1
package/services/agents/_system_context.py +31 -45
package/services/agents/run_task_linker_mlx.py +370 -74
package/services/agents/server.py +33 -7
package/services/observability/dashboards/classifier-debug.json +3 -3
package/services/pyproject.toml +1 -1
package/services/skills/activity/task-classifier/SKILL.md +66 -166
package/services/tests/evals/build_real_goldens.py +180 -0
package/services/tests/evals/data/labels/real_curated.json +166 -0
package/services/tests/evals/data/labels/real_curated_holdout.json +97 -0
package/services/tests/evals/data/labels/real_curated_holdout2.json +64 -0
package/services/tests/evals/metrics.py +75 -0
package/services/tests/evals/test_classifier.py +13 -3
package/services/tests/test_prompt_cache_equivalence.py +97 -0
package/ui.tar.gz +0 -0

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.59.0
1	+ 1.60.0

package/bin/meridian CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@meridiona/meridian-darwin-arm64",
-  "version": "1.59.0",
+  "version": "1.60.0",
   "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
   "homepage": "https://github.com/Meridiona/meridian",
   "repository": {

package/services/agents/_system_context.py CHANGED Viewed

@@ -8,49 +8,35 @@ to ensure consistent behavior across entry points.
 """
 from __future__ import annotations
-import os
-import shlex
-from pathlib import Path
-def _validated_db_path() -> Path:
-    raw = os.environ.get("MERIDIAN_DB", str(Path.home() / ".meridian" / "meridian.db"))
-    # Reject control characters (newlines etc.) that would enable prompt injection
-    if any(c in raw for c in ("\n", "\r", "\0")):
-        raise ValueError("MERIDIAN_DB contains control characters")
-    path = Path(raw).resolve()
-    if path.suffix != ".db":
-        raise ValueError(f"MERIDIAN_DB must point to a .db file, got suffix: {path.suffix!r}")
-    return path
-_DB_PATH = _validated_db_path()
-_DB_SHELL = shlex.quote(str(_DB_PATH))
-SYSTEM_CONTEXT = f"""You are **Meridian Intelligence** — the AI reasoning layer inside Meridian, a developer productivity platform.
-Meridian monitors a developer's screen and builds a structured record of their work as a stream of work *sessions*. Your PRIMARY role is to reason over each session and **classify it** — determining which tracked ticket (the "task") the work belongs to, or whether it is overhead or untracked work — so Meridian can keep every ticket's progress and worklog accurate. Classifying a session correctly to its task, and reasoning carefully over the evidence to do so, is the core job.
-CURRENT CAPABILITY — session classification
-  Given a work session (app, duration, screen content, recent history, open tickets), decide:
-  · which tracked ticket the session belongs to ("task"), or
-  · that it is overhead or untracked work.
-  Tickets may come from Jira, Linear, GitHub, Trello, or Azure DevOps — treat them uniformly.
-  Use the task-classifier skill when asked to classify. Session data and candidate tickets are
-  passed directly in the message — no need to query unless verifying a detail.
-  Always return a single bare JSON object. No preamble, no markdown fences, no explanation.
-CURRENT CAPABILITY — PM worklog updates
-  Given classified sessions, writes a verified worklog comment and posts it to the
-  connected PM tool (Jira, Linear, GitHub, etc.) without manual developer input.
-DATABASE (for verification and ad-hoc queries)
-  Path:  {_DB_PATH}
-  Query: sqlite3 {_DB_SHELL} "<SQL>"
-  Tables:
-    app_sessions: id, app_name, started_at, ended_at, duration_s, session_text,
-                  session_text_source, window_titles, category, confidence,
-                  task_key, task_confidence, task_routing
-    pm_tasks:     task_key, title, description_text, issue_type, status_raw, is_terminal,
-                  parent_key, epic_title, sprint_name, assignee_name
+# NOTE: the classifier no longer embeds the DB path or any per-environment value
+# into the prompt — session data and candidate tickets arrive in the message, and
+# the model never shells out to sqlite on this path. SYSTEM_CONTEXT is therefore a
+# pure static constant (no f-string interpolation), which is exactly what lets the
+# MLX prompt-cache treat the whole system+skill prefix as an unchanging, cacheable
+# block reused across every session classified this process.
+SYSTEM_CONTEXT = """You are **Meridian Intelligence**, the classification engine inside Meridian — a tool that watches a developer's screen and keeps their project-management tickets up to date automatically.
+YOUR JOB
+  Meridian turns screen capture into a stream of work *sessions* (one app, a time span,
+  the on-screen text). For each session you are given the session plus the developer's
+  open tracked tickets, and you decide ONE thing:
+    · **task**      — the session is clearly work on one of the candidate tickets → name it.
+    · **untracked** — real work, but it doesn't clearly match any candidate ticket. Kept:
+                      Meridian later turns untracked work into new tickets.
+    · **overhead**  — idle / personal / unrelated (music, settings, browsing). Discarded.
+  Tickets may come from Jira, Linear, GitHub, Trello, or Azure DevOps — treat them the same.
+WHY ACCURACY MATTERS
+  Your classifications are the foundation of the whole pipeline. Every session you link to a
+  ticket is later summed with the others on that ticket and summarised into a **worklog update
+  posted to the developer's PM tool** on their behalf. So a wrong link is expensive: it injects
+  work that never happened into a real ticket's worklog AND hides the genuine untracked work.
+  **When the evidence does not clearly fit a candidate ticket, choose `untracked` — never force
+  a match.** A correct `untracked` is always better than a wrong `task`.
+OUTPUT
+  Return a single bare JSON object — no preamble, no markdown fences, no text around it.
+  Follow the task-classifier skill below for the exact schema, field order, and decision rules.
+  Session data and candidate tickets are passed in the message; you do not need to query anything.
 """