npm - @meridiona/meridian-darwin-arm64 - Versions diffs - 1.56.0 → 1.58.0 - Mend

@meridiona/meridian-darwin-arm64 1.56.0 → 1.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/VERSION +1 -1
package/bin/meridian +0 -0
package/package.json +1 -1
package/services/agents/run_task_linker_mlx.py +114 -12
package/services/observability/dashboards/classifier-debug.json +17 -4
package/services/pyproject.toml +1 -1
package/services/tests/evals/classify_session.py +1 -1
package/services/tests/evals/compare_pipeline.py +2 -2
package/ui.tar.gz +0 -0

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.56.0
1	+ 1.58.0

package/bin/meridian CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@meridiona/meridian-darwin-arm64",
-  "version": "1.56.0",
+  "version": "1.58.0",
   "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
   "homepage": "https://github.com/Meridiona/meridian",
   "repository": {

package/services/agents/run_task_linker_mlx.py CHANGED Viewed

@@ -66,6 +66,20 @@ _CONTEXT_WINDOW = 5
 _MAX_TOKENS = 1024
 _TEMPERATURE = 0.0  # greedy decoding — deterministic classification
+# Candidate-set policy. When the dev has CONFIRMED a daily plan, restrict the
+# classifier's candidate tickets to exactly those planned tickets instead of
+# offering every open ticket (the historical "boost-never-filter" behaviour).
+# Rationale: a focused candidate set sharpens precision on the day's declared
+# work; off-plan work then intentionally falls through to `untracked` — a
+# deliberate holding state — rather than being mis-linked onto an unrelated open
+# ticket. NOTE: until a recall-recovery stage exists, `untracked` sessions do
+# not produce PM worklogs, so off-plan work is not written back while this is on.
+#   "1" (default) → plan-only filtering whenever a plan is confirmed
+#   "0"           → legacy boost-never-filter (plan tickets floated up, all kept)
+# Read once at import — flipping it requires an MLX-server restart. Only ever
+# active on days with a confirmed, non-empty plan; unplanned days are unaffected.
+_PLAN_ONLY_CANDIDATES = os.environ.get("CLASSIFY_PLAN_ONLY_CANDIDATES", "1") == "1"
 # The eval-tuned default classifier model. It lives in the llm_selector catalog
 # (_MODELS) as "qwen3.5-9b-optiq"; llm_selector keeps it on machines where it
 # fits and degrades only when Metal headroom can't accommodate it. The catalog
@@ -579,7 +593,9 @@ def _fetch_session(
     row = con.execute(
         "SELECT id, app_name, started_at, ended_at, duration_s, session_text,"
         "       session_text_source, window_titles, category, confidence,"
-        "       session_summary, claude_session_uuid"
+        "       session_summary, coding_agent_session_uuid,"
+        "       segment_started_at, sealed_at, summary_source,"
+        "       min_frame_id, max_frame_id, frame_count"
         " FROM app_sessions WHERE id = ?",
         (session_id,),
     ).fetchone()
@@ -683,17 +699,46 @@ def _fetch_pm_tasks(
         rows = con.execute(base_cols).fetchall()
     tasks = [dict(r) for r in rows]
-    # Today's-focus boost: tag the tickets the dev declared for the day and float
-    # them to the top of the candidate list, in their declared order. This is a
-    # BOOST, never a filter — every other candidate still follows, so recall is
-    # untouched. A focus key that isn't in `tasks` (e.g. excluded by curation)
-    # simply has no effect; we never resurrect a filtered-out ticket.
+    # Candidate-set policy (see _PLAN_ONLY_CANDIDATES). `focus_keys` are the
+    # tickets the dev CONFIRMED for this session's day (empty when no plan).
     focus = focus_keys or []
-    if focus:
-        order = {key: i for i, key in enumerate(focus)}
-        for t in tasks:
-            t["is_today_focus"] = t["task_key"] in order
-        tasks.sort(key=lambda t: (0, order[t["task_key"]]) if t.get("is_today_focus") else (1, 0))
+    if not focus:
+        # No confirmed plan → offer every candidate. Unchanged behaviour for
+        # users who don't use the plan, or days that aren't confirmed yet.
+        # `is_today_focus` is left unset (falsy) on every ticket.
+        return tasks
+    order = {key: i for i, key in enumerate(focus)}
+    if _PLAN_ONLY_CANDIDATES:
+        # Plan-only: the candidate set IS the confirmed plan, in declared order.
+        # Off-plan work then has no candidate to match, so the model returns
+        # `untracked` (the intended holding state) instead of being shoehorned
+        # onto an unrelated ticket.
+        in_plan = [t for t in tasks if t["task_key"] in order]
+        # GUARD: never return an empty candidate set. If the confirmed plan's
+        # tickets are all absent from the live pool (curation-excluded, closed,
+        # or not yet synced), fall back to the full set — an empty list would
+        # force EVERY session that day to `untracked`.
+        if not in_plan:
+            log.warning(
+                "plan-only candidates: confirmed plan has no live candidate "
+                "tickets (focus=%s) — falling back to full candidate set",
+                focus,
+            )
+            return tasks
+        for t in in_plan:
+            t["is_today_focus"] = True
+        in_plan.sort(key=lambda t: order[t["task_key"]])
+        return in_plan
+    # Legacy boost-never-filter: tag the declared tickets and float them to the
+    # top in declared order, but keep every other candidate so recall is
+    # untouched. A focus key not in `tasks` (e.g. excluded by curation) simply
+    # has no effect — we never resurrect a filtered-out ticket.
+    for t in tasks:
+        t["is_today_focus"] = t["task_key"] in order
+    tasks.sort(key=lambda t: (0, order[t["task_key"]]) if t.get("is_today_focus") else (1, 0))
     return tasks
@@ -785,7 +830,7 @@ def _classify_one(
         # session_text and a concise, high-quality prose summary in
         # session_summary. Classify on the summary, not the multi-MB transcript:
         # cheaper, faster, and it's already the distilled "what was done".
-        if session_raw.get("claude_session_uuid") and (session_raw.get("session_summary") or "").strip():
+        if session_raw.get("coding_agent_session_uuid") and (session_raw.get("session_summary") or "").strip():
             session_text = session_raw["session_summary"]
         # db_fetch is the SOLE source of "what the model was given" — recorded
@@ -796,15 +841,64 @@ def _classify_one(
         # (today's-focus keys float to the front in _fetch_pm_tasks).
         candidate_keys = [t["task_key"] for t in pm_tasks]
         recent_task_keys = [r.get("task_key") for r in recent if r.get("task_key")]
+        # Session identity + the app_sessions row metadata, so a trace is
+        # self-contained — you know WHICH session and its key fields (app, window
+        # titles, time span) without opening meridian.db.
+        db_span.set_attribute("session_id", session_id)
         db_span.set_attribute("app_name", str(session_raw.get("app_name") or ""))
+        db_span.set_attribute("started_at", str(session_raw.get("started_at") or ""))
+        db_span.set_attribute("ended_at", str(session_raw.get("ended_at") or ""))
+        try:
+            _wts = json.loads(session_raw.get("window_titles") or "[]")
+            _wt_names = [str(w.get("window_name", "")) for w in _wts if w.get("window_name")]
+        except (TypeError, ValueError):
+            _wt_names = []
+        db_span.set_attribute("window_titles", " | ".join(_wt_names) if _wt_names else "-")
+        db_span.set_attribute("window_title_count", len(_wt_names))
         db_span.set_attribute("duration_s", float(session_raw.get("duration_s") or 0.0))
         db_span.set_attribute("text_source", str(session_raw.get("session_text_source") or ""))
         db_span.set_attribute("session_text_chars", len(session_text))
+        # Frame-range attribution: the contiguous screenpipe frame_id window this
+        # session was built from (min..max, inclusive) plus the kept frame count.
+        # Answers "which capture window fed this classification" — the raw frames
+        # live in screenpipe keyed by these ids. Coding-agent rows have no frames
+        # (min/max = 0), so guard on a real range before stamping.
+        _min_fid = session_raw.get("min_frame_id")
+        _max_fid = session_raw.get("max_frame_id")
+        if isinstance(_min_fid, int) and isinstance(_max_fid, int) and _max_fid > 0:
+            db_span.set_attribute("min_frame_id", _min_fid)
+            db_span.set_attribute("max_frame_id", _max_fid)
+            db_span.set_attribute("frame_count", int(session_raw.get("frame_count") or 0))
+        # Coding-agent provenance: which agent conversation + segment this row came
+        # from, when the indexer sealed it, and who wrote the summary the model is
+        # classifying on. Only present on coding-agent rows (Claude Code / Codex /
+        # …); guard on coding_agent_session_uuid so screen-capture sessions stay clean.
+        _ca_uuid = session_raw.get("coding_agent_session_uuid")
+        if _ca_uuid:
+            db_span.set_attribute("coding_agent_session_uuid", str(_ca_uuid))
+            db_span.set_attribute("segment_started_at", str(session_raw.get("segment_started_at") or ""))
+            db_span.set_attribute("sealed_at", str(session_raw.get("sealed_at") or ""))
+            db_span.set_attribute("summary_source", str(session_raw.get("summary_source") or ""))
         db_span.set_attribute("pm_tasks_count", len(pm_tasks))
         db_span.set_attribute("today_focus_count", len(focus_keys))
         db_span.set_attribute("recent_sessions_count", len(recent))
         db_span.set_attribute("candidate_task_keys", ", ".join(candidate_keys) if candidate_keys else "-")
         db_span.set_attribute("today_focus_keys", ", ".join(focus_keys) if focus_keys else "-")
+        # Which candidate-set policy actually applied for this session, so a trace
+        # explains the candidate list without re-deriving it:
+        #   all               → no confirmed plan; every open ticket offered
+        #   plan_only         → narrowed to the confirmed plan
+        #   plan_fallback_all → plan confirmed but its tickets weren't live → fell back
+        #   boost             → legacy boost-never-filter (flag off)
+        if not focus_keys:
+            candidate_mode = "all"
+        elif not _PLAN_ONLY_CANDIDATES:
+            candidate_mode = "boost"
+        elif pm_tasks and all(t.get("is_today_focus") for t in pm_tasks):
+            candidate_mode = "plan_only"
+        else:
+            candidate_mode = "plan_fallback_all"
+        db_span.set_attribute("candidate_mode", candidate_mode)
         db_span.set_attribute("recent_task_keys", ", ".join(recent_task_keys) if recent_task_keys else "-")
     session = {
@@ -1254,6 +1348,14 @@ def _classify_one_logged_inner(
     }
     run_log.write(json.dumps(record, default=str) + "\n")
     run_log.flush()
+    # Promote app_name onto the classify_session span (the one row-per-session
+    # span the dashboards query), so app name is a filterable column there — not
+    # just on the child db_fetch span. session_raw is the DB row; current span
+    # here is classify_session (db_fetch's child span has already closed).
+    if session_raw:
+        _cs = trace.get_current_span()
+        if _cs.is_recording():
+            _cs.set_attribute("app_name", str(session_raw.get("app_name") or ""))
     _annotate_classification_span(result)
     return result

package/services/observability/dashboards/classifier-debug.json CHANGED Viewed

@@ -17,6 +17,19 @@
         "customMultiSelectValue": [],
         "escapeSingleQuotes": true
       },
+      {
+        "type": "textbox",
+        "name": "app_name",
+        "label": "App name",
+        "query_data": null,
+        "value": "",
+        "options": [],
+        "multiSelect": false,
+        "hideOnDashboard": false,
+        "selectAllValueForMultiSelect": "custom",
+        "customMultiSelectValue": [],
+        "escapeSingleQuotes": true
+      },
       {
         "type": "custom",
         "name": "session_type",
@@ -141,10 +154,10 @@
           "queryType": "sql",
           "queries": [
             {
-              "query": "SELECT to_char(to_timestamp_micros(_timestamp + 19800000000),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", category as \"Category\", confidence as \"Confidence\", round(CAST(elapsed_s AS DOUBLE),2) as \"Time taken (s)\", method as \"Method\", is_error as \"Error\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND ('$session_id'='' OR session_id='$session_id') AND ('$session_type'='' OR session_type='$session_type') AND ('$errors_only'='' OR is_error='$errors_only') ORDER BY _timestamp DESC",
+              "query": "SELECT to_char(to_timestamp_micros(_timestamp + 19800000000),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", app_name as \"App\", task_key as \"Task\", session_type as \"Type\", category as \"Category\", confidence as \"Confidence\", round(CAST(elapsed_s AS DOUBLE),2) as \"Time taken (s)\", method as \"Method\", is_error as \"Error\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND ('$session_id'='' OR session_id='$session_id') AND ('$session_type'='' OR session_type='$session_type') AND ('$errors_only'='' OR is_error='$errors_only') AND ('$app_name'='' OR app_name LIKE '%$app_name%') ORDER BY _timestamp DESC",
               "vrlFunctionQuery": "",
               "customQuery": true,
-              "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Category", "alias": "Category", "column": "category", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Confidence", "alias": "Confidence", "column": "confidence", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Time taken (s)", "alias": "Time taken (s)", "column": "elapsed_s", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Error", "alias": "Error", "column": "is_error", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "App", "alias": "App", "column": "app_name", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Category", "alias": "Category", "column": "category", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Confidence", "alias": "Confidence", "column": "confidence", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Time taken (s)", "alias": "Time taken (s)", "column": "elapsed_s", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Error", "alias": "Error", "column": "is_error", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
               "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
             }
           ],
@@ -159,10 +172,10 @@
           "queryType": "sql",
           "queries": [
             {
-              "query": "SELECT to_char(to_timestamp_micros(_timestamp + 19800000000),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", task_key as \"Task\", session_type as \"Type\", method as \"Method\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true' ORDER BY _timestamp DESC",
+              "query": "SELECT to_char(to_timestamp_micros(_timestamp + 19800000000),'%Y-%m-%d %H:%M:%S') as \"Time\", session_id as \"Session\", app_name as \"App\", task_key as \"Task\", session_type as \"Type\", method as \"Method\", trace_id as \"trace_id\", encode(concat('trace_id=''', trace_id, ''''),'base64') as \"trace_filter\" FROM \"default\" WHERE operation_name='classify_session' AND is_error='true' AND ('$app_name'='' OR app_name LIKE '%$app_name%') ORDER BY _timestamp DESC",
               "vrlFunctionQuery": "",
               "customQuery": true,
-              "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
+              "fields": {"stream": "default", "stream_type": "traces", "x": [{"label": "Time", "alias": "Time", "column": "_timestamp", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Session", "alias": "Session", "column": "session_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "App", "alias": "App", "column": "app_name", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Task", "alias": "Task", "column": "task_key", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Type", "alias": "Type", "column": "session_type", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Method", "alias": "Method", "column": "method", "color": null, "isDerived": false, "havingConditions": []}, {"label": "Trace ID", "alias": "trace_id", "column": "trace_id", "color": null, "isDerived": false, "havingConditions": []}, {"label": "trace_filter", "alias": "trace_filter", "column": "trace_filter", "color": null, "isDerived": false, "havingConditions": []}], "y": [], "z": [], "breakdown": [], "filter": {"filterType": "group", "logicalOperator": "AND", "conditions": []}},
               "config": {"promql_legend": "", "layer_type": "scatter", "weight_fixed": 1}
             }
           ],

package/services/pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "meridian-agents"
-version = "1.56.0"
+version = "1.58.0"
 description = "Meridian agents — MLX classifier server and Jira worklog synthesis for meridian.db"
 requires-python = ">=3.11"
 authors = [{ name = "Meridiona" }]

package/services/tests/evals/classify_session.py CHANGED Viewed

@@ -51,7 +51,7 @@ def _reconstruct_prompt(db_path: str, session_id: int) -> str | None:
     recent = _fetch_recent_sessions(con, session_id)
     pm_tasks = _fetch_pm_tasks(con)
     session_text = raw.get("session_text") or ""
-    if raw.get("claude_session_uuid") and (raw.get("session_summary") or "").strip():
+    if raw.get("coding_agent_session_uuid") and (raw.get("session_summary") or "").strip():
         session_text = raw["session_summary"]
     session = {
         "id": session_id,

package/services/tests/evals/compare_pipeline.py CHANGED Viewed

@@ -13,7 +13,7 @@ what meridian actually produced in app_sessions. Reports three layers:
 Join key is the screenpipe frame_id: labeled blocks carry a frame_range; app_sessions carry
 min_frame_id/max_frame_id. Each app_session is assigned to exactly one labeled block by its
 midpoint frame, so fragments are never double-counted. Only screen-derived sessions
-(claude_session_uuid IS NULL) participate — coding-agent rows are a separate ingest path.
+(coding_agent_session_uuid IS NULL) participate — coding-agent rows are a separate ingest path.
 This is the measurement harness for the real-session eval (KAN-141). Re-run it after every
 ETL fix to quantify the delta against a fixed ground-truth label set.
@@ -77,7 +77,7 @@ def _load_sessions(db_path: Path, date: str) -> list[dict]:
                task_confidence, task_method
         FROM app_sessions
         WHERE substr(started_at, 1, 10) = ?
-          AND claude_session_uuid IS NULL
+          AND coding_agent_session_uuid IS NULL
         ORDER BY min_frame_id
         """,
         (date,),

package/ui.tar.gz CHANGED Viewed

Binary file