@meridiona/meridian-darwin-arm64 1.58.1 → 1.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 1.58.1
1
+ 1.59.0
package/bin/meridian CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meridiona/meridian-darwin-arm64",
3
- "version": "1.58.1",
3
+ "version": "1.59.0",
4
4
  "description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
5
5
  "homepage": "https://github.com/Meridiona/meridian",
6
6
  "repository": {
@@ -23,6 +23,25 @@ _VSCODE_BANNER_RE = re.compile(
23
23
  # then responsible for not blowing the model's context window).
24
24
  SESSION_TEXT_CAP = int(os.environ.get("SESSION_TEXT_CAP", "10000"))
25
25
 
26
+ # Max chars of each candidate ticket's description included in the prompt.
27
+ # Default 0 = NO cap — the full description is sent. This field was previously
28
+ # hard-capped at 240 chars, which dropped 56-83% of real ticket text (measured:
29
+ # avg 548 chars, max 1440 across the live board), and the discriminating scope a
30
+ # session must be matched against frequently lives past char 240. With the
31
+ # 128K-context classifier and plan-only candidate sets (2-3 tickets), the prompt
32
+ # has ample budget, so descriptions are sent in full by default. Set
33
+ # CANDIDATE_DESC_CAP=<n> to re-impose a ceiling if an unusually long description
34
+ # ever bloats the prompt (e.g. on a full-candidate fallback day).
35
+ CANDIDATE_DESC_CAP = int(os.environ.get("CANDIDATE_DESC_CAP", "0"))
36
+
37
+ # Recent-work continuity window (minutes). The prompt summarises the developer's
38
+ # tracked work in this many minutes BEFORE the current session, aggregated per
39
+ # ticket, as a weak continuity prior. Time-windowed (not count-windowed) on
40
+ # purpose: session length is wildly variable, so "last N sessions" can be 90s of
41
+ # micro-glances or 3h of deep work. Shared with run_task_linker_mlx.py, which
42
+ # fetches the window. Override via CONTINUITY_WINDOW_MIN.
43
+ _CONTINUITY_WINDOW_MIN = int(os.environ.get("CONTINUITY_WINDOW_MIN", "30"))
44
+
26
45
 
27
46
  def _fmt_dur(duration_s: int | float) -> str:
28
47
  secs = int(duration_s or 0)
@@ -102,8 +121,8 @@ def _format_candidates(tasks: list[dict]) -> str:
102
121
  epic_title = (task.get("epic_title") or "").strip()
103
122
  sprint_name = (task.get("sprint_name") or "").strip()
104
123
  tags = (task.get("tags") or "").strip()
105
- if len(desc) > 240:
106
- desc = desc[:240] + "…"
124
+ if CANDIDATE_DESC_CAP > 0 and len(desc) > CANDIDATE_DESC_CAP:
125
+ desc = desc[:CANDIDATE_DESC_CAP] + "…"
107
126
  meta_parts = [p for p in [issue_type, f"Epic: {epic_title}" if epic_title else "", sprint_name, f"tags: {tags}" if tags else ""] if p]
108
127
  meta = " [" + " · ".join(meta_parts) + "]" if meta_parts else ""
109
128
  # The dev declared this ticket as today's focus on the plan page. It's a
@@ -117,44 +136,70 @@ def _format_candidates(tasks: list[dict]) -> str:
117
136
  return "\n\n".join(rows) if rows else "(no candidates)"
118
137
 
119
138
 
120
- def _format_recent_sessions(sessions: list[dict]) -> str:
121
- if not sessions:
122
- return " (no recent session context)"
123
- rows = []
124
- for s in sessions:
125
- time_str = _fmt_time(s.get("started_at") or "")
126
- app = (s.get("app_name") or "?")[:14]
127
- dur_str = _fmt_dur(s.get("duration_s") or 0)
128
- task_key = s.get("task_key")
129
- routing = s.get("task_routing") # None means unclassified
130
- if task_key:
131
- target = f"→ {task_key}"
132
- elif routing == "untracked":
133
- target = "→ [untracked]"
134
- elif routing is None:
135
- # session captured but not yet classified
136
- target = " [pending]"
139
+ def _fmt_continuity_mins(seconds: float) -> str:
140
+ """Coarse minutes label for the continuity block: '<1 min' or '~N min'."""
141
+ secs = int(seconds or 0)
142
+ if secs < 60:
143
+ return "<1 min"
144
+ return f"~{round(secs / 60)} min"
145
+
146
+
147
+ def _format_continuity(activity: list[dict], now_iso: str | None = None) -> str:
148
+ """Render the recent-ticket continuity prior — one bullet per ticket worked in
149
+ the window, ordered most-recent-first: total time spent, how many sessions it
150
+ spanned, and how long before the current session it was last active.
151
+
152
+ `activity` entries come from `_fetch_recent_ticket_activity` (already
153
+ aggregated, candidate-gated, confidence-filtered, recency-sorted). Empty input
154
+ an explicit "no tracked work" line (not ""), so the block is ALWAYS present:
155
+ that tells the model definitively "there is no recent continuity — rely on this
156
+ session's own evidence" (silence is ambiguous — it can't tell "no work" from
157
+ "not provided") and keeps the trace node legible instead of blank. We
158
+ deliberately do NOT emit a raw per-session log: those rows leak internal state
159
+ (sub-threshold micro-sessions, not-yet-classified neighbours, two interleaved
160
+ classify pipelines) that the model misreads as signal. This is a derived,
161
+ calibrated statement of recent tracked work.
162
+ """
163
+ if not activity:
164
+ return " (no tracked work in this window)"
165
+ lines = []
166
+ for a in activity:
167
+ total = _fmt_continuity_mins(a.get("total_s", 0))
168
+ n = int(a.get("sessions", 0) or 0)
169
+ sess = "1 session" if n == 1 else f"{n} sessions"
170
+ ago_s = a.get("ago_s")
171
+ if ago_s is None:
172
+ recency = ""
173
+ elif ago_s < 60:
174
+ recency = ", last active just before this session"
137
175
  else:
138
- target = " [overhead]"
139
- # Category is intentionally omitted recent-context is a task-continuity
140
- # signal only; carrying the (rule-based or prior-LLM) category tag would
141
- # feed a category prior back into classification.
142
- rows.append(f" {time_str} {app:<14} {dur_str:<7} {target}")
143
- return "\n".join(rows)
176
+ recency = f", last active ~{round(ago_s / 60)} min before this session"
177
+ lines.append(f" • {a['task_key']}{total} over {sess}{recency}")
178
+ return "\n".join(lines)
144
179
 
145
180
 
146
181
  def build_user_message(
147
182
  session: dict,
148
183
  candidates: list[dict],
149
- recent_sessions: list[dict] | None = None,
184
+ recent_activity: list[dict] | None = None,
185
+ now_iso: str | None = None,
150
186
  ) -> str:
151
- sessions = recent_sessions or []
152
- has_any_task_key = any(s.get("task_key") for s in sessions)
187
+ continuity = _format_continuity(recent_activity or [], now_iso)
188
+ # ALWAYS emitted (even when empty, where `continuity` is an explicit
189
+ # "no tracked work" line) so the model gets a definitive signal rather than
190
+ # ambiguous silence, and the trace node is never blank. Framed as a WEAK prior,
191
+ # never an instruction: an assertive "user was working on KAN-X" anchors the
192
+ # model into force-linking — the exact false-positive failure mode the SKILL
193
+ # warns against. The block states facts (ticket, time, recency); the SKILL's
194
+ # "classify by THIS session's evidence" rule governs.
153
195
  recent_block = (
154
- "RECENT WORK CONTEXT:\n"
155
- f"{_format_recent_sessions(sessions)}\n"
196
+ f"RECENT WORK CONTEXT — the developer's tracked work in the last "
197
+ f"{_CONTINUITY_WINDOW_MIN} minutes before this session. This is a WEAK "
198
+ "continuity hint, NOT proof: continue the most-recent ticket ONLY if this "
199
+ "session's own evidence also fits it; never link on continuity alone.\n"
200
+ f"{continuity}\n"
156
201
  "\n"
157
- ) if has_any_task_key else ""
202
+ )
158
203
  # When the dev declared a focus for the day, name it in the header so the model
159
204
  # treats ★ rows as a prior — preferred when the evidence plausibly fits, but
160
205
  # never forced. Recall is preserved: every candidate is still listed.
@@ -29,7 +29,7 @@ _DB_SHELL = shlex.quote(str(_DB_PATH))
29
29
 
30
30
  SYSTEM_CONTEXT = f"""You are **Meridian Intelligence** — the AI reasoning layer inside Meridian, a developer productivity platform.
31
31
 
32
- Meridian monitors a developer's screen and builds a structured record of their work. Your role is to reason over that record and take actions.
32
+ Meridian monitors a developer's screen and builds a structured record of their work as a stream of work *sessions*. Your PRIMARY role is to reason over each session and **classify it** — determining which tracked ticket (the "task") the work belongs to, or whether it is overhead or untracked work — so Meridian can keep every ticket's progress and worklog accurate. Classifying a session correctly to its task, and reasoning carefully over the evidence to do so, is the core job.
33
33
 
34
34
  CURRENT CAPABILITY — session classification
35
35
  Given a work session (app, duration, screen content, recent history, open tickets), decide:
@@ -13,7 +13,7 @@ OTel span hierarchy (when invoked as a script via main()):
13
13
  db_fetch
14
14
  classifier_input ← the COMPLETE model input (system + user)
15
15
  system_prompt — classifier skill + context
16
- recent_context — past-5 sessions
16
+ recent_context — 30-min per-ticket continuity prior
17
17
  session_block — the input session being classified
18
18
  candidate_tickets — ranked candidate tickets (★ = today)
19
19
  llm_inference
@@ -54,15 +54,22 @@ from agents import observability
54
54
  from agents._prompts import (
55
55
  build_user_message,
56
56
  _format_candidates,
57
- _format_recent_sessions,
57
+ _format_continuity,
58
58
  _format_session,
59
+ _CONTINUITY_WINDOW_MIN,
59
60
  )
60
61
  from agents._system_context import SYSTEM_CONTEXT
61
62
 
62
63
  log = logging.getLogger("agents.run_task_linker_mlx")
63
64
  tracer = observability.setup("meridian-task-linker-mlx")
64
65
 
65
- _CONTEXT_WINDOW = 5
66
+ # Recent-work continuity: only count a prior session toward the continuity block
67
+ # if its task link is confident enough to trust (a shaky 0.5 generic match
68
+ # shouldn't compound into a continuity nudge). 0.7 sits at the top of the SKILL's
69
+ # "generic project-level match" band (0.50-0.65), so this keeps real alignments
70
+ # and drops weak guesses. The window length lives in _prompts._CONTINUITY_WINDOW_MIN
71
+ # (shared with the prompt label). Override via CONTINUITY_MIN_CONFIDENCE.
72
+ _CONTINUITY_MIN_CONFIDENCE = float(os.environ.get("CONTINUITY_MIN_CONFIDENCE", "0.7"))
66
73
  _MAX_TOKENS = 1024
67
74
  _TEMPERATURE = 0.0 # greedy decoding — deterministic classification
68
75
 
@@ -602,23 +609,88 @@ def _fetch_session(
602
609
  return dict(row) if row else None
603
610
 
604
611
 
605
- def _fetch_recent_sessions(
606
- con: _sqlite3.Connection, before_id: int
612
+ def _fetch_recent_ticket_activity(
613
+ con: _sqlite3.Connection,
614
+ current_started_at: str,
615
+ candidate_keys: list[str],
607
616
  ) -> list[dict[str, Any]]:
608
- # Recent context is a task-continuity signal only: app, time, duration and
609
- # which ticket each recent session mapped to. We deliberately do NOT select
610
- # session_text/excerpt or category recent OCR is noise here and a category
611
- # tag would feed a prior back into classification. (session_text is still
612
- # referenced in WHERE only to skip empty-capture rows.)
617
+ """The developer's tracked-ticket work in the _CONTINUITY_WINDOW_MIN minutes
618
+ before the current session, aggregated per ticket a calibrated continuity
619
+ prior (NOT a raw session log).
620
+
621
+ Returns one entry per ticket worked in the window:
622
+ {"task_key", "total_s", "sessions", "last_ended_at", "ago_s"}
623
+ ordered by recency (most-recently-active ticket first). Empty when there is no
624
+ qualifying recent work — the caller then omits the block entirely rather than
625
+ asserting a continuity that doesn't exist.
626
+
627
+ A session counts only if it is (a) already CLASSIFIED to a ticket
628
+ (task_session_type='task' — "last classified", never pending/in-flight),
629
+ (b) confident enough to trust as a prior (task_confidence >=
630
+ _CONTINUITY_MIN_CONFIDENCE), and (c) mapped to a ticket in the CURRENT
631
+ candidate set — a prior on a ticket the model can't even pick is pure noise.
632
+ Windowing is done in Python (fromisoformat) so it's robust to the stored
633
+ timestamp's timezone/precision; the SQL only does the cheap "strictly before
634
+ current" + confidence prefilter (consistent ISO format → lexicographic '<' is
635
+ chronological).
636
+ """
637
+ candidates = set(candidate_keys)
638
+ if not current_started_at or not candidates:
639
+ return []
640
+ try:
641
+ anchor = _dt.datetime.fromisoformat(current_started_at)
642
+ except (ValueError, TypeError):
643
+ return []
644
+ window_start = anchor - _dt.timedelta(minutes=_CONTINUITY_WINDOW_MIN)
613
645
  rows = con.execute(
614
- "SELECT app_name, started_at, duration_s, task_key, task_routing"
646
+ "SELECT task_key, started_at, ended_at, duration_s, task_confidence"
615
647
  " FROM app_sessions"
616
- " WHERE id < ? AND duration_s > 1 AND COALESCE(session_text,'') != ''"
617
- " ORDER BY id DESC LIMIT ?",
618
- (before_id, _CONTEXT_WINDOW),
648
+ " WHERE started_at < ?"
649
+ " AND task_key IS NOT NULL"
650
+ " AND task_session_type = 'task'"
651
+ " AND task_confidence >= ?"
652
+ " ORDER BY started_at DESC LIMIT 200",
653
+ (current_started_at, _CONTINUITY_MIN_CONFIDENCE),
619
654
  ).fetchall()
620
- result = [dict(r) for r in rows]
621
- result.reverse()
655
+
656
+ agg: dict[str, dict[str, Any]] = {}
657
+ for r in rows:
658
+ d = dict(r)
659
+ tk = d.get("task_key")
660
+ if tk not in candidates:
661
+ continue
662
+ try:
663
+ s_at = _dt.datetime.fromisoformat(d["started_at"])
664
+ except (ValueError, TypeError):
665
+ continue
666
+ if s_at < window_start:
667
+ continue # outside the continuity window
668
+ try:
669
+ e_at = _dt.datetime.fromisoformat(d.get("ended_at") or d["started_at"])
670
+ except (ValueError, TypeError):
671
+ e_at = s_at
672
+ entry = agg.get(tk)
673
+ if entry is None:
674
+ entry = {"task_key": tk, "total_s": 0.0, "sessions": 0, "last_ended": e_at}
675
+ agg[tk] = entry
676
+ entry["total_s"] += float(d.get("duration_s") or 0.0)
677
+ entry["sessions"] += 1
678
+ if e_at > entry["last_ended"]:
679
+ entry["last_ended"] = e_at
680
+
681
+ result: list[dict[str, Any]] = []
682
+ for entry in agg.values():
683
+ ago_s = max(0.0, (anchor - entry["last_ended"]).total_seconds())
684
+ result.append(
685
+ {
686
+ "task_key": entry["task_key"],
687
+ "total_s": entry["total_s"],
688
+ "sessions": entry["sessions"],
689
+ "last_ended_at": entry["last_ended"].isoformat(),
690
+ "ago_s": ago_s,
691
+ }
692
+ )
693
+ result.sort(key=lambda e: e["ago_s"]) # most-recently-active ticket first
622
694
  return result
623
695
 
624
696
 
@@ -813,7 +885,7 @@ def _classify_one(
813
885
  session_raw = _fetch_session(con, session_id)
814
886
  if session_raw is None:
815
887
  db_span.set_attribute("pm_tasks_count", 0)
816
- db_span.set_attribute("recent_sessions_count", 0)
888
+ db_span.set_attribute("recent_continuity_tickets", 0)
817
889
  db_span.add_event("session_not_found", {"session_id": session_id})
818
890
  db_span.set_status(StatusCode.ERROR, f"session {session_id} not found in DB")
819
891
  return _error_result(
@@ -823,7 +895,12 @@ def _classify_one(
823
895
  plan_date = _local_day(session_raw.get("started_at") or "")
824
896
  focus_keys = _fetch_plan_focus(con, plan_date)
825
897
  pm_tasks = _fetch_pm_tasks(con, focus_keys)
826
- recent = _fetch_recent_sessions(con, session_id)
898
+ # Continuity prior needs the candidate set up front (it only names tickets
899
+ # the model can actually pick), so compute candidate_keys before fetching.
900
+ candidate_keys = [t["task_key"] for t in pm_tasks]
901
+ recent = _fetch_recent_ticket_activity(
902
+ con, session_raw.get("started_at") or "", candidate_keys
903
+ )
827
904
 
828
905
  session_text = session_raw.get("session_text") or ""
829
906
  # Coding-agent rows (Claude Code / Codex) carry the full transcript in
@@ -839,7 +916,7 @@ def _classify_one(
839
916
  # it answers "was the right ticket even offered, and where was it ranked?"
840
917
  # without anyone having to read the prompt. Ranked order is preserved
841
918
  # (today's-focus keys float to the front in _fetch_pm_tasks).
842
- candidate_keys = [t["task_key"] for t in pm_tasks]
919
+ # candidate_keys computed above (the continuity fetch needs it).
843
920
  recent_task_keys = [r.get("task_key") for r in recent if r.get("task_key")]
844
921
  # Session identity + the app_sessions row metadata, so a trace is
845
922
  # self-contained — you know WHICH session and its key fields (app, window
@@ -881,7 +958,13 @@ def _classify_one(
881
958
  db_span.set_attribute("summary_source", str(session_raw.get("summary_source") or ""))
882
959
  db_span.set_attribute("pm_tasks_count", len(pm_tasks))
883
960
  db_span.set_attribute("today_focus_count", len(focus_keys))
884
- db_span.set_attribute("recent_sessions_count", len(recent))
961
+ # Continuity prior: how many tickets the dev worked in the prior window,
962
+ # and across how many classified sessions (0/0 when there's no qualifying
963
+ # recent work → the block is omitted from the prompt).
964
+ db_span.set_attribute("recent_continuity_tickets", len(recent))
965
+ db_span.set_attribute(
966
+ "recent_continuity_sessions", sum(int(r.get("sessions", 0) or 0) for r in recent)
967
+ )
885
968
  db_span.set_attribute("candidate_task_keys", ", ".join(candidate_keys) if candidate_keys else "-")
886
969
  db_span.set_attribute("today_focus_keys", ", ".join(focus_keys) if focus_keys else "-")
887
970
  # Which candidate-set policy actually applied for this session, so a trace
@@ -920,8 +1003,8 @@ def _classify_one(
920
1003
  # The single drill-down span for "exactly what the classifier was asked".
921
1004
  # It carries the COMPLETE input, byte-for-byte as handed to the model:
922
1005
  # • system_prompt — full system context + the task-classifier SKILL
923
- # • llm_input — full user message: the input session block, the recent
924
- # past-5 sessions, and the ranked candidate tickets
1006
+ # • llm_input — full user message: the input session block, the 30-min
1007
+ # per-ticket continuity prior, and the ranked candidate tickets
925
1008
  # Both are captured POST-assembly, so any cap already applied while building
926
1009
  # the prompt (e.g. SESSION_TEXT_CAP truncating the OCR excerpt) is reflected
927
1010
  # here EXACTLY as the model saw it — never the pre-cap original. Concatenating
@@ -931,7 +1014,9 @@ def _classify_one(
931
1014
  # capped at ~8k chars — so on that path the on-span text is the assembled
932
1015
  # input, not the rewritten one.)
933
1016
  with tracer.start_as_current_span("classifier_input") as bp_span:
934
- user_message = build_user_message(session, pm_tasks, recent_sessions=recent)
1017
+ user_message = build_user_message(
1018
+ session, pm_tasks, recent_activity=recent, now_iso=session.get("started_at")
1019
+ )
935
1020
  messages = [
936
1021
  {"role": "system", "content": _SYSTEM_PROMPT},
937
1022
  {"role": "user", "content": user_message},
@@ -975,7 +1060,8 @@ def _classify_one(
975
1060
  part.set_attribute(_k, _v)
976
1061
 
977
1062
  _input_part("system_prompt", _SYSTEM_PROMPT) # classifier skill + context
978
- _input_part("recent_context", _format_recent_sessions(recent)) # past-5 sessions
1063
+ _input_part("recent_context",
1064
+ _format_continuity(recent, session.get("started_at"))) # 30-min continuity prior
979
1065
  _input_part("session_block", _format_session(session)) # the input session
980
1066
  _input_part("candidate_tickets", _format_candidates(pm_tasks), # ranked candidates
981
1067
  ticket_count=len(pm_tasks))
@@ -1310,7 +1396,13 @@ def _classify_one_logged_inner(
1310
1396
  session_raw = _fetch_session(con, session_id)
1311
1397
  focus_keys = _fetch_plan_focus(con, _local_day(session_raw.get("started_at") or "")) if session_raw else []
1312
1398
  pm_tasks = _fetch_pm_tasks(con, focus_keys) if session_raw else []
1313
- recent = _fetch_recent_sessions(con, session_id) if session_raw else []
1399
+ recent = (
1400
+ _fetch_recent_ticket_activity(
1401
+ con, session_raw.get("started_at") or "", [t["task_key"] for t in pm_tasks]
1402
+ )
1403
+ if session_raw
1404
+ else []
1405
+ )
1314
1406
 
1315
1407
  if session_raw:
1316
1408
  user_message = build_user_message(
@@ -1328,7 +1420,8 @@ def _classify_one_logged_inner(
1328
1420
  "audio_snippets": [],
1329
1421
  },
1330
1422
  pm_tasks,
1331
- recent_sessions=recent,
1423
+ recent_activity=recent,
1424
+ now_iso=session_raw.get("started_at", ""),
1332
1425
  )
1333
1426
  else:
1334
1427
  user_message = ""
@@ -348,7 +348,7 @@ class TestObservabilityClassifyOne:
348
348
  def test_db_fetch_recent_sessions_count(self, db: Path, span_exporter):
349
349
  _, spans = self._run(db, span_exporter)
350
350
  s = _span_by_name(spans, "db_fetch")
351
- assert s.attributes["recent_sessions_count"] == 0 # no prior sessions
351
+ assert s.attributes["recent_continuity_tickets"] == 0 # no prior tracked work
352
352
 
353
353
  def test_db_fetch_session_loaded_event_fields(self, db: Path, span_exporter):
354
354
  _, spans = self._run(db, span_exporter)
@@ -421,7 +421,7 @@ class TestObservabilityClassifyOne:
421
421
  s = _span_by_name(spans, "build_prompt")
422
422
  assert s is not None
423
423
  assert s.attributes["pm_tasks_count"] == 1
424
- assert s.attributes["recent_sessions_count"] == 0
424
+ assert s.attributes["recent_continuity_tickets"] == 0
425
425
  assert s.attributes["prompt_chars"] > 0
426
426
 
427
427
  def test_build_prompt_assembled_event(self, db: Path, span_exporter):
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "meridian-agents"
7
- version = "1.58.1"
7
+ version = "1.59.0"
8
8
  description = "Meridian agents — MLX classifier server and Jira worklog synthesis for meridian.db"
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "Meridiona" }]
@@ -55,7 +55,7 @@ The user message contains:
55
55
 
56
56
  - **SESSION** — app, duration, top window titles, and the screen content (OCR / a11y). Decide the category yourself from this evidence; no category is provided.
57
57
  - **CANDIDATE TICKETS** — all open tracked tickets (Jira, Linear, GitHub, Trello, Azure DevOps). These are the only tickets you may choose from.
58
- - **RECENT SESSIONS** (previous 5) app / time / duration / which ticket each mapped to (no screen text). A **weak disambiguation hint only**: it can support a match when the current session ALSO has matching evidence, but it must never override what the current session itself shows. Recent activity on a ticket does not make the current session that ticket.
58
+ - **RECENT WORK CONTEXT** a summary of the developer's tracked work in the **last 30 minutes** before this session, aggregated **per ticket**: each line is a ticket they worked, with the total time spent, how many sessions it spanned, and how long before this session it was last active (most-recently-active ticket first). It lists only tickets that are in the candidate set above. This is a **weak disambiguation hint only**: it can support a match when the current session ALSO has matching evidence, but it must never override what the current session itself shows. Recent activity on a ticket does not make the current session that ticket — and a ticket worked 25 minutes ago is a much weaker hint than one active a minute ago. When this block is absent, there was no confident recent tracked work to report.
59
59
 
60
60
  ## Available capabilities
61
61
 
@@ -74,9 +74,9 @@ Use database queries sparingly — session data and candidate tickets are alread
74
74
 
75
75
  Pick **exactly one** of the candidate `task_key` values, OR return `null` if **none** fit the session.
76
76
 
77
- Use **context from previous sessions** to make smarter decisions:
78
- - If the current session is **generic** (e.g., Slack) but follows/precedes work on a specific ticket, consider linking it to that task.
79
- - If sessions alternate (coding Slack coding), treat them as potentially the **same task** if separated by only a few minutes.
77
+ Use the **recent work context** to make smarter decisions:
78
+ - If the current session is **generic** (e.g., Slack) but the recent context shows sustained, very-recent work on a specific ticket, consider linking it to that task — *only if* this session's own evidence is at least consistent with it.
79
+ - The recent context is **recency- and time-weighted**: prefer the ticket that was active most recently and for the most time. A ticket last active a minute ago is a strong tie-breaker; one last active ~25 minutes ago is weak. When two or more tickets appear, the dev was context-switching — continuity is ambiguous, so lean harder on the current session's own evidence.
80
80
  - Overhead (system settings, music, etc) should always be `null` regardless of context.
81
81
 
82
82
  ## Output format
@@ -192,18 +192,22 @@ Capture every category the session shows evidence of:
192
192
  **Bad — speculative + marketing:**
193
193
  > Successfully refactored the workflow to be more efficient. The new linear design will be much faster. Next steps include adding the worklog poster and testing end-to-end on Jira.
194
194
 
195
- ## Using Context from Previous Sessions
195
+ ## Using the Recent Work Context
196
196
 
197
- You have access to **the previous 5 sessions** to disambiguate the current session:
197
+ The **RECENT WORK CONTEXT** block summarises the developer's tracked work over the prior 30 minutes, per ticket, with time spent and how recently each was active. Use it to disambiguate the current session — never to override it.
198
198
 
199
- **Example: Coding Communication about same work → Coding**
200
- - Session 1 (5 min ago): VS Code, editing KAN-42 implementation → task_key: KAN-42, confidence: 0.90
201
- - Session 2 (3 min ago): Slack, discussing PR review for KAN-42 → **if related to same work**, task_key: KAN-42, confidence: 0.75 (work mention + prior context)
202
- - Session 3 (now): VS Code, editing same file task_key: KAN-42, confidence: 0.85 (context continuity)
199
+ **Example: the recent context shows**
200
+ ```
201
+ RECENT WORK CONTEXT the developer's tracked work in the last 30 minutes before this session...
202
+ • KAN-42 ~22 min over 5 sessions, last active just before this session
203
+ ```
204
+ - Current session is **VS Code editing the same file** referenced by KAN-42 → strong: the recent context *and* the current evidence agree → task_key: KAN-42, confidence ~0.85.
205
+ - Current session is **Slack** with the channel/thread discussing the KAN-42 PR → the current content itself is about KAN-42, and the recent context supports it → task_key: KAN-42, confidence ~0.75.
206
+ - Current session is **Slack** showing a generic standup or an unrelated thread → the current evidence is NOT about KAN-42 → return `null` / `untracked` (or a different ticket if its own evidence matches). **Do not inherit KAN-42 just because it was the recent task.**
203
207
 
204
- **Decision:** Only link Session 2 to KAN-42 if Session 2's **own content** shows it is about that work (the OCR/window discusses or searches the KAN-42 work). If Session 2 is generic, OR shows the user has moved to *different* work (another project, another team's doc, an unrelated meeting), return `null` with `session_type: "untracked"` (or a different ticket if its own evidence matches one) **do not inherit KAN-42 just because it was the recent task.** Continuity is a tie-breaker between plausible matches, never a substitute for current-session evidence.
208
+ **Decision rule:** continuity is a tie-breaker between plausible matches, never a substitute for current-session evidence. Weight the recent context by recency and time a ticket "last active just before this session" with 22 minutes behind it is a strong tie-breaker; one "last active ~25 min before this session" is weak. When the block lists **more than one ticket**, the developer was switching context, so continuity is ambiguous: rely on the current session's own evidence.
205
209
 
206
- Example reasoning for Session 2 (if task-related): `"Slack discusses PR review for KAN-42 implementation mentioned in prior VS Code session; linked via work context."`
210
+ Example reasoning (if task-related): `"Slack thread discusses the KAN-42 PR; recent context shows 22 min on KAN-42 ending just before this session linked via work context."`
207
211
 
208
212
  ## Scoring heuristics
209
213
 
@@ -34,6 +34,7 @@ if str(_SERVICES_DIR) not in sys.path:
34
34
  sys.path.insert(0, str(_SERVICES_DIR))
35
35
 
36
36
  from agents._prompts import build_user_message
37
+ from agents.run_task_linker_mlx import _fetch_recent_ticket_activity
37
38
 
38
39
  MERIDIAN_DB = Path(os.environ.get("MERIDIAN_DB", Path.home() / ".meridian/meridian.db"))
39
40
  SESSION_IDS: list[int] = [
@@ -103,19 +104,6 @@ def _fetch_pm_tasks(con: sqlite3.Connection) -> list[dict]:
103
104
  return [dict(r) for r in rows]
104
105
 
105
106
 
106
- def _fetch_recent(con: sqlite3.Connection, before_id: int) -> list[dict]:
107
- rows = con.execute(
108
- "SELECT app_name, started_at, duration_s, task_key, task_routing, category"
109
- " FROM app_sessions"
110
- " WHERE id < ? AND duration_s > 1 AND COALESCE(session_text,'') != ''"
111
- " ORDER BY id DESC LIMIT 5",
112
- (before_id,),
113
- ).fetchall()
114
- result = [dict(r) for r in rows]
115
- result.reverse()
116
- return result
117
-
118
-
119
107
  def main() -> None:
120
108
  if not MERIDIAN_DB.exists():
121
109
  print(f"ERROR: meridian.db not found at {MERIDIAN_DB}", file=sys.stderr)
@@ -152,8 +140,12 @@ def main() -> None:
152
140
  "confidence": s["confidence"] or 0.0,
153
141
  "audio_snippets": [],
154
142
  }
155
- recent = _fetch_recent(con, s["id"])
156
- prompt_input = build_user_message(session, pm_tasks, recent_sessions=recent)
143
+ recent = _fetch_recent_ticket_activity(
144
+ con, session["started_at"], [t["task_key"] for t in pm_tasks]
145
+ )
146
+ prompt_input = build_user_message(
147
+ session, pm_tasks, recent_activity=recent, now_iso=session["started_at"]
148
+ )
157
149
 
158
150
  expected = {
159
151
  "task_key": _normalise_task_key(s.get("task_key")),
@@ -39,7 +39,7 @@ def _reconstruct_prompt(db_path: str, session_id: int) -> str | None:
39
39
  from agents._prompts import build_user_message
40
40
  from agents.run_task_linker_mlx import (
41
41
  _fetch_pm_tasks,
42
- _fetch_recent_sessions,
42
+ _fetch_recent_ticket_activity,
43
43
  _fetch_session,
44
44
  )
45
45
 
@@ -48,8 +48,10 @@ def _reconstruct_prompt(db_path: str, session_id: int) -> str | None:
48
48
  raw = _fetch_session(con, session_id)
49
49
  if raw is None:
50
50
  return None
51
- recent = _fetch_recent_sessions(con, session_id)
52
51
  pm_tasks = _fetch_pm_tasks(con)
52
+ recent = _fetch_recent_ticket_activity(
53
+ con, raw.get("started_at") or "", [t["task_key"] for t in pm_tasks]
54
+ )
53
55
  session_text = raw.get("session_text") or ""
54
56
  if raw.get("coding_agent_session_uuid") and (raw.get("session_summary") or "").strip():
55
57
  session_text = raw["session_summary"]
@@ -66,7 +68,9 @@ def _reconstruct_prompt(db_path: str, session_id: int) -> str | None:
66
68
  "confidence": raw.get("confidence", 0.0),
67
69
  "audio_snippets": [],
68
70
  }
69
- return build_user_message(session, pm_tasks, recent_sessions=recent)
71
+ return build_user_message(
72
+ session, pm_tasks, recent_activity=recent, now_iso=raw.get("started_at")
73
+ )
70
74
 
71
75
 
72
76
  def _classify(url: str, db_path: str, session_ids: list[int]) -> list[dict]:
@@ -24,7 +24,10 @@ _SERVICES_DIR = Path(__file__).parent.parent.parent
24
24
  if str(_SERVICES_DIR) not in sys.path:
25
25
  sys.path.insert(0, str(_SERVICES_DIR))
26
26
 
27
+ import sqlite3 # noqa: E402
28
+
27
29
  from agents._prompts import build_user_message # noqa: E402
30
+ from agents.run_task_linker_mlx import _fetch_recent_ticket_activity # noqa: E402
28
31
 
29
32
  EVAL_DIR = Path(__file__).parent
30
33
  SEED_DIR = EVAL_DIR / "data" / "seeds"
@@ -34,22 +37,36 @@ PERSONA_FILES = {
34
37
  }
35
38
 
36
39
 
37
- def _project_recent(prior: list[dict]) -> list[dict]:
38
- """Project a list of prior seed sessions into the shape build_user_message wants."""
39
- out = []
40
+ def _project_recent(
41
+ prior: list[dict], current_started_at: str, candidate_keys: list[str]
42
+ ) -> list[dict]:
43
+ """Build the per-ticket continuity prior for a seed session, reusing the EXACT
44
+ production aggregation (`_fetch_recent_ticket_activity`) so rendered goldens
45
+ match the live prompt. We load the prior scoreable seeds into a throwaway
46
+ in-memory DB and run the real query against it (windowing, confidence floor,
47
+ candidate-gating, recency ordering all happen there — one source of truth)."""
48
+ con = sqlite3.connect(":memory:")
49
+ con.row_factory = sqlite3.Row
50
+ con.execute(
51
+ "CREATE TABLE app_sessions ("
52
+ " id INTEGER PRIMARY KEY AUTOINCREMENT,"
53
+ " task_key TEXT, started_at TEXT, ended_at TEXT, duration_s REAL,"
54
+ " task_confidence REAL, task_session_type TEXT)"
55
+ )
40
56
  for s in prior:
41
57
  gt = s.get("ground_truth", {})
42
58
  tk = gt.get("task_key")
43
59
  task_key = tk if tk and tk != "none" else None
44
- out.append({
45
- "app_name": s["app_name"],
46
- "started_at": s["started_at"],
47
- "duration_s": s["duration_s"],
48
- "task_key": task_key,
49
- "task_routing": "auto" if task_key else None,
50
- "category": s.get("category", ""),
51
- })
52
- return out
60
+ if not task_key:
61
+ continue # untracked/overhead priors carry no continuity signal
62
+ con.execute(
63
+ "INSERT INTO app_sessions"
64
+ " (task_key, started_at, ended_at, duration_s, task_confidence, task_session_type)"
65
+ " VALUES (?, ?, ?, ?, 1.0, 'task')",
66
+ (task_key, s["started_at"], s.get("ended_at") or s["started_at"], s["duration_s"]),
67
+ )
68
+ con.commit()
69
+ return _fetch_recent_ticket_activity(con, current_started_at, candidate_keys)
53
70
 
54
71
 
55
72
  def render(persona: str) -> list[dict]:
@@ -79,8 +96,12 @@ def render(persona: str) -> list[dict]:
79
96
  if not gt.get("scoreable"):
80
97
  continue
81
98
 
82
- recent = _project_recent(scoreable_prior[-5:])
83
- prompt = build_user_message(s, candidates, recent_sessions=recent)
99
+ recent = _project_recent(
100
+ scoreable_prior, s["started_at"], [c["task_key"] for c in candidates]
101
+ )
102
+ prompt = build_user_message(
103
+ s, candidates, recent_activity=recent, now_iso=s["started_at"]
104
+ )
84
105
 
85
106
  expected = {
86
107
  "task_key": gt.get("task_key", "none"),
@@ -0,0 +1,202 @@
1
+ """Unit tests for the 30-min per-ticket continuity context.
2
+
3
+ Covers the two halves of the rewritten recent-work block:
4
+ * _fetch_recent_ticket_activity — windowing, confidence floor, candidate-gating,
5
+ per-ticket aggregation, recency ordering.
6
+ * _format_continuity — rendering (none / single / multiple / recency).
7
+
8
+ Run: services/.venv/bin/pytest services/tests/test_continuity_context.py -v
9
+ (Also runnable without pytest: services/.venv/bin/python services/tests/test_continuity_context.py)
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import datetime as _dt
14
+ import sqlite3
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ # Make `from agents import ...` resolve (mirror tests/evals/eval_classifier.py).
19
+ _SERVICES_DIR = Path(__file__).resolve().parent.parent
20
+ if str(_SERVICES_DIR) not in sys.path:
21
+ sys.path.insert(0, str(_SERVICES_DIR))
22
+
23
+ from agents import run_task_linker_mlx as rtl # noqa: E402
24
+ from agents import _prompts # noqa: E402
25
+
26
+ ANCHOR = "2026-06-17T10:00:00+00:00"
27
+
28
+
29
+ def _at(minutes_before: int) -> str:
30
+ base = _dt.datetime.fromisoformat(ANCHOR)
31
+ return (base - _dt.timedelta(minutes=minutes_before)).isoformat()
32
+
33
+
34
+ def _make_con(rows: list[dict]) -> sqlite3.Connection:
35
+ """In-memory meridian DB with the columns _fetch_recent_ticket_activity reads."""
36
+ con = sqlite3.connect(":memory:")
37
+ con.row_factory = sqlite3.Row
38
+ con.execute(
39
+ "CREATE TABLE app_sessions ("
40
+ " id INTEGER PRIMARY KEY AUTOINCREMENT,"
41
+ " task_key TEXT, started_at TEXT, ended_at TEXT, duration_s REAL,"
42
+ " task_confidence REAL, task_session_type TEXT)"
43
+ )
44
+ for r in rows:
45
+ con.execute(
46
+ "INSERT INTO app_sessions"
47
+ " (task_key, started_at, ended_at, duration_s, task_confidence, task_session_type)"
48
+ " VALUES (?, ?, ?, ?, ?, ?)",
49
+ (
50
+ r.get("task_key"),
51
+ r["started_at"],
52
+ r.get("ended_at"),
53
+ r.get("duration_s", 0.0),
54
+ r.get("task_confidence", 0.9),
55
+ r.get("task_session_type", "task"),
56
+ ),
57
+ )
58
+ con.commit()
59
+ return con
60
+
61
+
62
+ def _keys(activity):
63
+ return [a["task_key"] for a in activity]
64
+
65
+
66
+ # ── _fetch_recent_ticket_activity ────────────────────────────────────────────
67
+
68
+ def test_aggregates_per_ticket_and_orders_by_recency():
69
+ con = _make_con([
70
+ # KAN-1: two sessions, most-recent ends 2 min before anchor
71
+ {"task_key": "KAN-1", "started_at": _at(10), "ended_at": _at(9), "duration_s": 300},
72
+ {"task_key": "KAN-1", "started_at": _at(3), "ended_at": _at(2), "duration_s": 120},
73
+ # KAN-2: one session, ends 20 min before anchor
74
+ {"task_key": "KAN-2", "started_at": _at(21), "ended_at": _at(20), "duration_s": 600},
75
+ ])
76
+ out = rtl._fetch_recent_ticket_activity(con, ANCHOR, ["KAN-1", "KAN-2"])
77
+ assert _keys(out) == ["KAN-1", "KAN-2"] # most-recently-active first
78
+ k1 = out[0]
79
+ assert k1["sessions"] == 2
80
+ assert k1["total_s"] == 420.0 # 300 + 120 summed
81
+ assert abs(k1["ago_s"] - 120) < 1 # last active ~2 min ago
82
+ assert abs(out[1]["ago_s"] - 1200) < 1 # KAN-2 ~20 min ago
83
+
84
+
85
+ def test_excludes_sessions_outside_the_window():
86
+ con = _make_con([
87
+ {"task_key": "KAN-1", "started_at": _at(5), "ended_at": _at(4), "duration_s": 60},
88
+ {"task_key": "KAN-9", "started_at": _at(40), "ended_at": _at(39), "duration_s": 60}, # >30 min
89
+ ])
90
+ out = rtl._fetch_recent_ticket_activity(con, ANCHOR, ["KAN-1", "KAN-9"])
91
+ assert _keys(out) == ["KAN-1"]
92
+
93
+
94
+ def test_excludes_below_confidence_floor():
95
+ con = _make_con([
96
+ {"task_key": "KAN-1", "started_at": _at(5), "ended_at": _at(4), "duration_s": 60, "task_confidence": 0.9},
97
+ {"task_key": "KAN-2", "started_at": _at(5), "ended_at": _at(4), "duration_s": 60, "task_confidence": 0.5},
98
+ ])
99
+ out = rtl._fetch_recent_ticket_activity(con, ANCHOR, ["KAN-1", "KAN-2"])
100
+ assert _keys(out) == ["KAN-1"]
101
+
102
+
103
+ def test_candidate_gating_drops_non_candidate_tickets():
104
+ con = _make_con([
105
+ {"task_key": "KAN-1", "started_at": _at(5), "ended_at": _at(4), "duration_s": 60},
106
+ {"task_key": "KAN-7", "started_at": _at(5), "ended_at": _at(4), "duration_s": 60}, # not a candidate
107
+ ])
108
+ out = rtl._fetch_recent_ticket_activity(con, ANCHOR, ["KAN-1"])
109
+ assert _keys(out) == ["KAN-1"]
110
+
111
+
112
+ def test_excludes_untracked_and_null_task():
113
+ con = _make_con([
114
+ {"task_key": None, "started_at": _at(5), "ended_at": _at(4), "duration_s": 60, "task_session_type": "untracked"},
115
+ {"task_key": "KAN-1", "started_at": _at(5), "ended_at": _at(4), "duration_s": 60, "task_session_type": "task"},
116
+ ])
117
+ out = rtl._fetch_recent_ticket_activity(con, ANCHOR, ["KAN-1"])
118
+ assert _keys(out) == ["KAN-1"]
119
+
120
+
121
+ def test_no_candidates_returns_empty():
122
+ con = _make_con([{"task_key": "KAN-1", "started_at": _at(5), "ended_at": _at(4), "duration_s": 60}])
123
+ assert rtl._fetch_recent_ticket_activity(con, ANCHOR, []) == []
124
+
125
+
126
+ def test_no_anchor_returns_empty():
127
+ con = _make_con([{"task_key": "KAN-1", "started_at": _at(5), "ended_at": _at(4), "duration_s": 60}])
128
+ assert rtl._fetch_recent_ticket_activity(con, "", ["KAN-1"]) == []
129
+
130
+
131
+ # ── _format_continuity ───────────────────────────────────────────────────────
132
+
133
+ def test_format_empty_is_explicit_no_work_line():
134
+ out = _prompts._format_continuity([])
135
+ assert out.strip() == "(no tracked work in this window)"
136
+ assert out != "" # explicit, never silent
137
+
138
+
139
+ def test_format_single_ticket_recent():
140
+ out = _prompts._format_continuity(
141
+ [{"task_key": "KAN-1", "total_s": 420, "sessions": 2, "ago_s": 30}]
142
+ )
143
+ assert "KAN-1" in out
144
+ assert "~7 min" in out
145
+ assert "2 sessions" in out
146
+ assert "just before this session" in out # ago < 60s
147
+
148
+
149
+ def test_format_recency_minutes():
150
+ out = _prompts._format_continuity(
151
+ [{"task_key": "KAN-2", "total_s": 600, "sessions": 1, "ago_s": 1200}]
152
+ )
153
+ assert "1 session" in out
154
+ assert "~20 min before this session" in out
155
+
156
+
157
+ def test_format_multiple_tickets_one_bullet_each():
158
+ out = _prompts._format_continuity([
159
+ {"task_key": "KAN-1", "total_s": 420, "sessions": 2, "ago_s": 30},
160
+ {"task_key": "KAN-2", "total_s": 600, "sessions": 1, "ago_s": 1200},
161
+ ])
162
+ assert out.count("•") == 2
163
+ assert "KAN-1" in out and "KAN-2" in out
164
+
165
+
166
+ def test_build_user_message_includes_block_when_activity_present():
167
+ msg = _prompts.build_user_message(
168
+ {"app_name": "Code", "session_text": "x"},
169
+ [{"task_key": "KAN-1", "title": "t", "description_text": "d"}],
170
+ recent_activity=[{"task_key": "KAN-1", "total_s": 60, "sessions": 1, "ago_s": 30}],
171
+ now_iso=ANCHOR,
172
+ )
173
+ assert "RECENT WORK CONTEXT" in msg
174
+ assert "WEAK" in msg # framed as a weak prior
175
+ assert "KAN-1" in msg
176
+
177
+
178
+ def test_build_user_message_shows_explicit_block_when_no_activity():
179
+ msg = _prompts.build_user_message(
180
+ {"app_name": "Code", "session_text": "x"},
181
+ [{"task_key": "KAN-1", "title": "t", "description_text": "d"}],
182
+ recent_activity=[],
183
+ now_iso=ANCHOR,
184
+ )
185
+ assert "RECENT WORK CONTEXT" in msg # always present now
186
+ assert "no tracked work in this window" in msg # explicit empty state
187
+
188
+
189
+ # ── plain-python runner (no pytest needed) ───────────────────────────────────
190
+
191
+ if __name__ == "__main__":
192
+ fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
193
+ passed = 0
194
+ for fn in fns:
195
+ try:
196
+ fn()
197
+ print(f" PASS {fn.__name__}")
198
+ passed += 1
199
+ except Exception as exc: # noqa: BLE001
200
+ print(f" FAIL {fn.__name__}: {exc!r}")
201
+ print(f"\n{passed}/{len(fns)} passed")
202
+ raise SystemExit(0 if passed == len(fns) else 1)
@@ -0,0 +1,120 @@
1
+ """Unit tests for `_fetch_pm_tasks` candidate-set policy.
2
+
3
+ Covers the plan-only candidate filtering (CLASSIFY_PLAN_ONLY_CANDIDATES) added on
4
+ top of the legacy boost-never-filter behaviour, including both safety guards:
5
+ * no confirmed plan → every candidate is offered (unchanged behaviour)
6
+ * plan confirmed → candidates narrowed to the confirmed plan, in order
7
+ * plan tickets gone → fall back to the full set (never zero candidates)
8
+ * curation-excluded → never a candidate, even if named in the plan
9
+
10
+ Run: services/.venv/bin/pytest services/tests/test_fetch_pm_tasks.py -v
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import sqlite3
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ import pytest
19
+
20
+ # Make `from agents import ...` resolve (mirror tests/evals/eval_classifier.py).
21
+ _SERVICES_DIR = Path(__file__).resolve().parent.parent
22
+ if str(_SERVICES_DIR) not in sys.path:
23
+ sys.path.insert(0, str(_SERVICES_DIR))
24
+
25
+ from agents import run_task_linker_mlx as rtl # noqa: E402
26
+
27
+
28
+ def _make_con(task_keys: list[str], excluded: list[str] | None = None) -> sqlite3.Connection:
29
+ """In-memory meridian DB with the columns `_fetch_pm_tasks` selects."""
30
+ con = sqlite3.connect(":memory:")
31
+ con.row_factory = sqlite3.Row
32
+ con.execute(
33
+ "CREATE TABLE pm_tasks ("
34
+ " task_key TEXT PRIMARY KEY, title TEXT, description_text TEXT,"
35
+ " status_raw TEXT, is_terminal INTEGER, issue_type TEXT,"
36
+ " parent_key TEXT, epic_title TEXT, sprint_name TEXT, tags TEXT)"
37
+ )
38
+ con.execute(
39
+ "CREATE TABLE pm_task_curation (task_key TEXT PRIMARY KEY, decision TEXT)"
40
+ )
41
+ for k in task_keys:
42
+ con.execute(
43
+ "INSERT INTO pm_tasks (task_key, title, description_text, status_raw,"
44
+ " is_terminal, issue_type, parent_key, epic_title, sprint_name, tags)"
45
+ " VALUES (?, ?, '', 'In Progress', 0, 'Task', '', '', '', '')",
46
+ (k, f"title {k}"),
47
+ )
48
+ for k in excluded or []:
49
+ con.execute(
50
+ "INSERT INTO pm_task_curation (task_key, decision) VALUES (?, 'excluded')",
51
+ (k,),
52
+ )
53
+ con.commit()
54
+ return con
55
+
56
+
57
+ @pytest.fixture
58
+ def plan_only(monkeypatch):
59
+ """Force plan-only filtering on regardless of the ambient env default."""
60
+ monkeypatch.setattr(rtl, "_PLAN_ONLY_CANDIDATES", True)
61
+
62
+
63
+ @pytest.fixture
64
+ def boost_mode(monkeypatch):
65
+ """Force the legacy boost-never-filter behaviour."""
66
+ monkeypatch.setattr(rtl, "_PLAN_ONLY_CANDIDATES", False)
67
+
68
+
69
+ def _keys(tasks):
70
+ return [t["task_key"] for t in tasks]
71
+
72
+
73
+ def test_no_plan_returns_all_unmarked(plan_only):
74
+ """No confirmed plan → every candidate offered, none marked as focus."""
75
+ con = _make_con(["K-1", "K-2", "K-3"])
76
+ tasks = rtl._fetch_pm_tasks(con, focus_keys=[])
77
+ assert set(_keys(tasks)) == {"K-1", "K-2", "K-3"}
78
+ assert all(not t.get("is_today_focus") for t in tasks)
79
+
80
+
81
+ def test_plan_only_narrows_to_plan_in_declared_order(plan_only):
82
+ """Plan confirmed → candidates are exactly the plan, in declared order, marked."""
83
+ con = _make_con(["K-1", "K-2", "K-3", "K-4"])
84
+ tasks = rtl._fetch_pm_tasks(con, focus_keys=["K-3", "K-1"])
85
+ assert _keys(tasks) == ["K-3", "K-1"] # declared order preserved
86
+ assert all(t["is_today_focus"] for t in tasks)
87
+
88
+
89
+ def test_plan_only_falls_back_when_plan_tickets_absent(plan_only):
90
+ """GUARD: plan tickets not in the live pool → fall back to ALL, never empty."""
91
+ con = _make_con(["K-1", "K-2"])
92
+ tasks = rtl._fetch_pm_tasks(con, focus_keys=["GHOST-9"])
93
+ assert set(_keys(tasks)) == {"K-1", "K-2"} # full set, not empty
94
+ assert all(not t.get("is_today_focus") for t in tasks)
95
+
96
+
97
+ def test_plan_only_drops_curation_excluded_even_if_in_plan(plan_only):
98
+ """An excluded ticket is never a candidate, even when named in the plan."""
99
+ con = _make_con(["K-1", "K-2"], excluded=["K-2"])
100
+ tasks = rtl._fetch_pm_tasks(con, focus_keys=["K-2", "K-1"])
101
+ # K-2 excluded → only K-1 survives; still a non-empty, plan-scoped set.
102
+ assert _keys(tasks) == ["K-1"]
103
+ assert tasks[0]["is_today_focus"]
104
+
105
+
106
+ def test_plan_only_partial_plan_keeps_only_live_plan_tickets(plan_only):
107
+ """Plan names a live + a dead ticket → only the live one is offered."""
108
+ con = _make_con(["K-1", "K-2", "K-3"])
109
+ tasks = rtl._fetch_pm_tasks(con, focus_keys=["K-2", "GHOST-9"])
110
+ assert _keys(tasks) == ["K-2"]
111
+
112
+
113
+ def test_boost_mode_keeps_all_with_plan_floated_to_top(boost_mode):
114
+ """Flag off → legacy behaviour: plan floated to top, every candidate kept."""
115
+ con = _make_con(["K-1", "K-2", "K-3"])
116
+ tasks = rtl._fetch_pm_tasks(con, focus_keys=["K-3"])
117
+ assert set(_keys(tasks)) == {"K-1", "K-2", "K-3"} # recall untouched
118
+ assert tasks[0]["task_key"] == "K-3" # floated to top
119
+ assert tasks[0]["is_today_focus"]
120
+ assert sum(1 for t in tasks if t.get("is_today_focus")) == 1
@@ -0,0 +1,45 @@
1
+ """Unit tests for the candidate-description cap in `_format_candidates`.
2
+
3
+ The cap is configurable via CANDIDATE_DESC_CAP (default 0 = no cap). These cover
4
+ the default uncapped behaviour and an explicit ceiling.
5
+
6
+ Run: services/.venv/bin/pytest services/tests/test_format_candidates.py -v
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ import pytest
14
+
15
+ _SERVICES_DIR = Path(__file__).resolve().parent.parent
16
+ if str(_SERVICES_DIR) not in sys.path:
17
+ sys.path.insert(0, str(_SERVICES_DIR))
18
+
19
+ from agents import _prompts # noqa: E402
20
+
21
+
22
+ def _task(desc: str) -> dict:
23
+ return {"task_key": "K-1", "title": "t", "description_text": desc}
24
+
25
+
26
+ def test_default_no_cap_keeps_full_description(monkeypatch):
27
+ monkeypatch.setattr(_prompts, "CANDIDATE_DESC_CAP", 0)
28
+ desc = "x" * 1000
29
+ out = _prompts._format_candidates([_task(desc)])
30
+ assert desc in out # full text present
31
+ assert "…" not in out # no truncation marker
32
+
33
+
34
+ def test_positive_cap_truncates_with_marker(monkeypatch):
35
+ monkeypatch.setattr(_prompts, "CANDIDATE_DESC_CAP", 50)
36
+ out = _prompts._format_candidates([_task("y" * 100)])
37
+ assert "y" * 50 + "…" in out
38
+ assert "y" * 51 not in out # nothing past the cap
39
+
40
+
41
+ def test_description_under_cap_unchanged(monkeypatch):
42
+ monkeypatch.setattr(_prompts, "CANDIDATE_DESC_CAP", 240)
43
+ out = _prompts._format_candidates([_task("short desc")])
44
+ assert "short desc" in out
45
+ assert "…" not in out
package/ui.tar.gz CHANGED
Binary file