@meridiona/meridian-darwin-arm64 1.59.0 → 1.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/bin/meridian +0 -0
- package/package.json +1 -1
- package/services/agents/_system_context.py +31 -45
- package/services/agents/run_task_linker_mlx.py +370 -74
- package/services/agents/server.py +33 -7
- package/services/observability/dashboards/classifier-debug.json +3 -3
- package/services/pyproject.toml +1 -1
- package/services/skills/activity/task-classifier/SKILL.md +66 -166
- package/services/tests/evals/build_real_goldens.py +180 -0
- package/services/tests/evals/data/labels/real_curated.json +166 -0
- package/services/tests/evals/data/labels/real_curated_holdout.json +97 -0
- package/services/tests/evals/data/labels/real_curated_holdout2.json +64 -0
- package/services/tests/evals/metrics.py +75 -0
- package/services/tests/evals/test_classifier.py +13 -3
- package/services/tests/test_prompt_cache_equivalence.py +97 -0
- package/ui.tar.gz +0 -0
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.
|
|
1
|
+
1.60.0
|
package/bin/meridian
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@meridiona/meridian-darwin-arm64",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.60.0",
|
|
4
4
|
"description": "Prebuilt Meridian app for macOS arm64 (daemon binary + dashboard + Python services). Installed via @meridiona/meridian.",
|
|
5
5
|
"homepage": "https://github.com/Meridiona/meridian",
|
|
6
6
|
"repository": {
|
|
@@ -8,49 +8,35 @@ to ensure consistent behavior across entry points.
|
|
|
8
8
|
"""
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
CURRENT CAPABILITY — PM worklog updates
|
|
44
|
-
Given classified sessions, writes a verified worklog comment and posts it to the
|
|
45
|
-
connected PM tool (Jira, Linear, GitHub, etc.) without manual developer input.
|
|
46
|
-
|
|
47
|
-
DATABASE (for verification and ad-hoc queries)
|
|
48
|
-
Path: {_DB_PATH}
|
|
49
|
-
Query: sqlite3 {_DB_SHELL} "<SQL>"
|
|
50
|
-
Tables:
|
|
51
|
-
app_sessions: id, app_name, started_at, ended_at, duration_s, session_text,
|
|
52
|
-
session_text_source, window_titles, category, confidence,
|
|
53
|
-
task_key, task_confidence, task_routing
|
|
54
|
-
pm_tasks: task_key, title, description_text, issue_type, status_raw, is_terminal,
|
|
55
|
-
parent_key, epic_title, sprint_name, assignee_name
|
|
11
|
+
# NOTE: the classifier no longer embeds the DB path or any per-environment value
|
|
12
|
+
# into the prompt — session data and candidate tickets arrive in the message, and
|
|
13
|
+
# the model never shells out to sqlite on this path. SYSTEM_CONTEXT is therefore a
|
|
14
|
+
# pure static constant (no f-string interpolation), which is exactly what lets the
|
|
15
|
+
# MLX prompt-cache treat the whole system+skill prefix as an unchanging, cacheable
|
|
16
|
+
# block reused across every session classified this process.
|
|
17
|
+
|
|
18
|
+
SYSTEM_CONTEXT = """You are **Meridian Intelligence**, the classification engine inside Meridian — a tool that watches a developer's screen and keeps their project-management tickets up to date automatically.
|
|
19
|
+
|
|
20
|
+
YOUR JOB
|
|
21
|
+
Meridian turns screen capture into a stream of work *sessions* (one app, a time span,
|
|
22
|
+
the on-screen text). For each session you are given the session plus the developer's
|
|
23
|
+
open tracked tickets, and you decide ONE thing:
|
|
24
|
+
· **task** — the session is clearly work on one of the candidate tickets → name it.
|
|
25
|
+
· **untracked** — real work, but it doesn't clearly match any candidate ticket. Kept:
|
|
26
|
+
Meridian later turns untracked work into new tickets.
|
|
27
|
+
· **overhead** — idle / personal / unrelated (music, settings, browsing). Discarded.
|
|
28
|
+
Tickets may come from Jira, Linear, GitHub, Trello, or Azure DevOps — treat them the same.
|
|
29
|
+
|
|
30
|
+
WHY ACCURACY MATTERS
|
|
31
|
+
Your classifications are the foundation of the whole pipeline. Every session you link to a
|
|
32
|
+
ticket is later summed with the others on that ticket and summarised into a **worklog update
|
|
33
|
+
posted to the developer's PM tool** on their behalf. So a wrong link is expensive: it injects
|
|
34
|
+
work that never happened into a real ticket's worklog AND hides the genuine untracked work.
|
|
35
|
+
**When the evidence does not clearly fit a candidate ticket, choose `untracked` — never force
|
|
36
|
+
a match.** A correct `untracked` is always better than a wrong `task`.
|
|
37
|
+
|
|
38
|
+
OUTPUT
|
|
39
|
+
Return a single bare JSON object — no preamble, no markdown fences, no text around it.
|
|
40
|
+
Follow the task-classifier skill below for the exact schema, field order, and decision rules.
|
|
41
|
+
Session data and candidate tickets are passed in the message; you do not need to query anything.
|
|
56
42
|
"""
|