@meridiona/meridian-darwin-arm64 1.58.1 → 1.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 1.58.1
1
+ 1.60.0
package/bin/meridian CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meridiona/meridian-darwin-arm64",
3
- "version": "1.58.1",
3
+ "version": "1.60.0",
4
4
  "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
5
5
  "homepage": "https://github.com/Meridiona/meridian",
6
6
  "repository": {
@@ -23,6 +23,25 @@ _VSCODE_BANNER_RE = re.compile(
23
23
  # then responsible for not blowing the model's context window).
24
24
  SESSION_TEXT_CAP = int(os.environ.get("SESSION_TEXT_CAP", "10000"))
25
25
 
26
+ # Max chars of each candidate ticket's description included in the prompt.
27
+ # Default 0 = NO cap — the full description is sent. This field was previously
28
+ # hard-capped at 240 chars, which dropped 56-83% of real ticket text (measured:
29
+ # avg 548 chars, max 1440 across the live board), and the discriminating scope a
30
+ # session must be matched against frequently lives past char 240. With the
31
+ # 128K-context classifier and plan-only candidate sets (2-3 tickets), the prompt
32
+ # has ample budget, so descriptions are sent in full by default. Set
33
+ # CANDIDATE_DESC_CAP=<n> to re-impose a ceiling if an unusually long description
34
+ # ever bloats the prompt (e.g. on a full-candidate fallback day).
35
+ CANDIDATE_DESC_CAP = int(os.environ.get("CANDIDATE_DESC_CAP", "0"))
36
+
37
+ # Recent-work continuity window (minutes). The prompt summarises the developer's
38
+ # tracked work in this many minutes BEFORE the current session, aggregated per
39
+ # ticket, as a weak continuity prior. Time-windowed (not count-windowed) on
40
+ # purpose: session length is wildly variable, so "last N sessions" can be 90s of
41
+ # micro-glances or 3h of deep work. Shared with run_task_linker_mlx.py, which
42
+ # fetches the window. Override via CONTINUITY_WINDOW_MIN.
43
+ _CONTINUITY_WINDOW_MIN = int(os.environ.get("CONTINUITY_WINDOW_MIN", "30"))
44
+
26
45
 
27
46
  def _fmt_dur(duration_s: int | float) -> str:
28
47
  secs = int(duration_s or 0)
@@ -102,8 +121,8 @@ def _format_candidates(tasks: list[dict]) -> str:
102
121
  epic_title = (task.get("epic_title") or "").strip()
103
122
  sprint_name = (task.get("sprint_name") or "").strip()
104
123
  tags = (task.get("tags") or "").strip()
105
- if len(desc) > 240:
106
- desc = desc[:240] + "…"
124
+ if CANDIDATE_DESC_CAP > 0 and len(desc) > CANDIDATE_DESC_CAP:
125
+ desc = desc[:CANDIDATE_DESC_CAP] + "…"
107
126
  meta_parts = [p for p in [issue_type, f"Epic: {epic_title}" if epic_title else "", sprint_name, f"tags: {tags}" if tags else ""] if p]
108
127
  meta = " [" + " · ".join(meta_parts) + "]" if meta_parts else ""
109
128
  # The dev declared this ticket as today's focus on the plan page. It's a
@@ -117,44 +136,70 @@ def _format_candidates(tasks: list[dict]) -> str:
117
136
  return "\n\n".join(rows) if rows else "(no candidates)"
118
137
 
119
138
 
120
- def _format_recent_sessions(sessions: list[dict]) -> str:
121
- if not sessions:
122
- return " (no recent session context)"
123
- rows = []
124
- for s in sessions:
125
- time_str = _fmt_time(s.get("started_at") or "")
126
- app = (s.get("app_name") or "?")[:14]
127
- dur_str = _fmt_dur(s.get("duration_s") or 0)
128
- task_key = s.get("task_key")
129
- routing = s.get("task_routing") # None means unclassified
130
- if task_key:
131
- target = f"→ {task_key}"
132
- elif routing == "untracked":
133
- target = "→ [untracked]"
134
- elif routing is None:
135
- # session captured but not yet classified
136
- target = " [pending]"
139
+ def _fmt_continuity_mins(seconds: float) -> str:
140
+ """Coarse minutes label for the continuity block: '<1 min' or '~N min'."""
141
+ secs = int(seconds or 0)
142
+ if secs < 60:
143
+ return "<1 min"
144
+ return f"~{round(secs / 60)} min"
145
+
146
+
147
+ def _format_continuity(activity: list[dict], now_iso: str | None = None) -> str:
148
+ """Render the recent-ticket continuity prior — one bullet per ticket worked in
149
+ the window, ordered most-recent-first: total time spent, how many sessions it
150
+ spanned, and how long before the current session it was last active.
151
+
152
+ `activity` entries come from `_fetch_recent_ticket_activity` (already
153
+ aggregated, candidate-gated, confidence-filtered, recency-sorted). Empty input
154
+ an explicit "no tracked work" line (not ""), so the block is ALWAYS present:
155
+ that tells the model definitively "there is no recent continuity — rely on this
156
+ session's own evidence" (silence is ambiguous — it can't tell "no work" from
157
+ "not provided") and keeps the trace node legible instead of blank. We
158
+ deliberately do NOT emit a raw per-session log: those rows leak internal state
159
+ (sub-threshold micro-sessions, not-yet-classified neighbours, two interleaved
160
+ classify pipelines) that the model misreads as signal. This is a derived,
161
+ calibrated statement of recent tracked work.
162
+ """
163
+ if not activity:
164
+ return " (no tracked work in this window)"
165
+ lines = []
166
+ for a in activity:
167
+ total = _fmt_continuity_mins(a.get("total_s", 0))
168
+ n = int(a.get("sessions", 0) or 0)
169
+ sess = "1 session" if n == 1 else f"{n} sessions"
170
+ ago_s = a.get("ago_s")
171
+ if ago_s is None:
172
+ recency = ""
173
+ elif ago_s < 60:
174
+ recency = ", last active just before this session"
137
175
  else:
138
- target = " [overhead]"
139
- # Category is intentionally omitted recent-context is a task-continuity
140
- # signal only; carrying the (rule-based or prior-LLM) category tag would
141
- # feed a category prior back into classification.
142
- rows.append(f" {time_str} {app:<14} {dur_str:<7} {target}")
143
- return "\n".join(rows)
176
+ recency = f", last active ~{round(ago_s / 60)} min before this session"
177
+ lines.append(f" • {a['task_key']}{total} over {sess}{recency}")
178
+ return "\n".join(lines)
144
179
 
145
180
 
146
181
  def build_user_message(
147
182
  session: dict,
148
183
  candidates: list[dict],
149
- recent_sessions: list[dict] | None = None,
184
+ recent_activity: list[dict] | None = None,
185
+ now_iso: str | None = None,
150
186
  ) -> str:
151
- sessions = recent_sessions or []
152
- has_any_task_key = any(s.get("task_key") for s in sessions)
187
+ continuity = _format_continuity(recent_activity or [], now_iso)
188
+ # ALWAYS emitted (even when empty, where `continuity` is an explicit
189
+ # "no tracked work" line) so the model gets a definitive signal rather than
190
+ # ambiguous silence, and the trace node is never blank. Framed as a WEAK prior,
191
+ # never an instruction: an assertive "user was working on KAN-X" anchors the
192
+ # model into force-linking — the exact false-positive failure mode the SKILL
193
+ # warns against. The block states facts (ticket, time, recency); the SKILL's
194
+ # "classify by THIS session's evidence" rule governs.
153
195
  recent_block = (
154
- "RECENT WORK CONTEXT:\n"
155
- f"{_format_recent_sessions(sessions)}\n"
196
+ f"RECENT WORK CONTEXT — the developer's tracked work in the last "
197
+ f"{_CONTINUITY_WINDOW_MIN} minutes before this session. This is a WEAK "
198
+ "continuity hint, NOT proof: continue the most-recent ticket ONLY if this "
199
+ "session's own evidence also fits it; never link on continuity alone.\n"
200
+ f"{continuity}\n"
156
201
  "\n"
157
- ) if has_any_task_key else ""
202
+ )
158
203
  # When the dev declared a focus for the day, name it in the header so the model
159
204
  # treats ★ rows as a prior — preferred when the evidence plausibly fits, but
160
205
  # never forced. Recall is preserved: every candidate is still listed.
@@ -8,49 +8,35 @@ to ensure consistent behavior across entry points.
8
8
  """
9
9
  from __future__ import annotations
10
10
 
11
- import os
12
- import shlex
13
- from pathlib import Path
14
-
15
-
16
- def _validated_db_path() -> Path:
17
- raw = os.environ.get("MERIDIAN_DB", str(Path.home() / ".meridian" / "meridian.db"))
18
- # Reject control characters (newlines etc.) that would enable prompt injection
19
- if any(c in raw for c in ("\n", "\r", "\0")):
20
- raise ValueError("MERIDIAN_DB contains control characters")
21
- path = Path(raw).resolve()
22
- if path.suffix != ".db":
23
- raise ValueError(f"MERIDIAN_DB must point to a .db file, got suffix: {path.suffix!r}")
24
- return path
25
-
26
-
27
- _DB_PATH = _validated_db_path()
28
- _DB_SHELL = shlex.quote(str(_DB_PATH))
29
-
30
- SYSTEM_CONTEXT = f"""You are **Meridian Intelligence** — the AI reasoning layer inside Meridian, a developer productivity platform.
31
-
32
- Meridian monitors a developer's screen and builds a structured record of their work. Your role is to reason over that record and take actions.
33
-
34
- CURRENT CAPABILITY session classification
35
- Given a work session (app, duration, screen content, recent history, open tickets), decide:
36
- · which tracked ticket the session belongs to ("task"), or
37
- · that it is overhead or untracked work.
38
- Tickets may come from Jira, Linear, GitHub, Trello, or Azure DevOps — treat them uniformly.
39
- Use the task-classifier skill when asked to classify. Session data and candidate tickets are
40
- passed directly in the message no need to query unless verifying a detail.
41
- Always return a single bare JSON object. No preamble, no markdown fences, no explanation.
42
-
43
- CURRENT CAPABILITY — PM worklog updates
44
- Given classified sessions, writes a verified worklog comment and posts it to the
45
- connected PM tool (Jira, Linear, GitHub, etc.) without manual developer input.
46
-
47
- DATABASE (for verification and ad-hoc queries)
48
- Path: {_DB_PATH}
49
- Query: sqlite3 {_DB_SHELL} "<SQL>"
50
- Tables:
51
- app_sessions: id, app_name, started_at, ended_at, duration_s, session_text,
52
- session_text_source, window_titles, category, confidence,
53
- task_key, task_confidence, task_routing
54
- pm_tasks: task_key, title, description_text, issue_type, status_raw, is_terminal,
55
- parent_key, epic_title, sprint_name, assignee_name
11
+ # NOTE: the classifier no longer embeds the DB path or any per-environment value
12
+ # into the prompt — session data and candidate tickets arrive in the message, and
13
+ # the model never shells out to sqlite on this path. SYSTEM_CONTEXT is therefore a
14
+ # pure static constant (no f-string interpolation), which is exactly what lets the
15
+ # MLX prompt-cache treat the whole system+skill prefix as an unchanging, cacheable
16
+ # block reused across every session classified this process.
17
+
18
+ SYSTEM_CONTEXT = """You are **Meridian Intelligence**, the classification engine inside Meridian — a tool that watches a developer's screen and keeps their project-management tickets up to date automatically.
19
+
20
+ YOUR JOB
21
+ Meridian turns screen capture into a stream of work *sessions* (one app, a time span,
22
+ the on-screen text). For each session you are given the session plus the developer's
23
+ open tracked tickets, and you decide ONE thing:
24
+ · **task** — the session is clearly work on one of the candidate tickets → name it.
25
+ · **untracked** — real work, but it doesn't clearly match any candidate ticket. Kept:
26
+ Meridian later turns untracked work into new tickets.
27
+ · **overhead** — idle / personal / unrelated (music, settings, browsing). Discarded.
28
+ Tickets may come from Jira, Linear, GitHub, Trello, or Azure DevOps — treat them the same.
29
+
30
+ WHY ACCURACY MATTERS
31
+ Your classifications are the foundation of the whole pipeline. Every session you link to a
32
+ ticket is later summed with the others on that ticket and summarised into a **worklog update
33
+ posted to the developer's PM tool** on their behalf. So a wrong link is expensive: it injects
34
+ work that never happened into a real ticket's worklog AND hides the genuine untracked work.
35
+ **When the evidence does not clearly fit a candidate ticket, choose `untracked` — never force
36
+ a match.** A correct `untracked` is always better than a wrong `task`.
37
+
38
+ OUTPUT
39
+ Return a single bare JSON object no preamble, no markdown fences, no text around it.
40
+ Follow the task-classifier skill below for the exact schema, field order, and decision rules.
41
+ Session data and candidate tickets are passed in the message; you do not need to query anything.
56
42
  """