@seanyao/roll 0.5.0 → 2.602.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +736 -0
- package/LICENSE +21 -0
- package/README.md +65 -165
- package/bin/dream-test-quality-scan +110 -0
- package/bin/roll +15030 -814
- package/conventions/config.yaml +17 -1
- package/conventions/global/AGENTS.md +146 -100
- package/conventions/global/CLAUDE.md +1 -21
- package/conventions/global/GEMINI.md +8 -22
- package/conventions/global/project_rules.md +9 -0
- package/conventions/templates/backend-service/AGENTS.md +30 -81
- package/conventions/templates/backend-service/GEMINI.md +3 -3
- package/conventions/templates/backend-service/project_rules.md +16 -0
- package/conventions/templates/cli/AGENTS.md +31 -58
- package/conventions/templates/cli/CLAUDE.md +3 -5
- package/conventions/templates/cli/GEMINI.md +3 -3
- package/conventions/templates/cli/project_rules.md +16 -0
- package/conventions/templates/frontend-only/AGENTS.md +29 -64
- package/conventions/templates/frontend-only/GEMINI.md +3 -3
- package/conventions/templates/frontend-only/project_rules.md +14 -0
- package/conventions/templates/fullstack/AGENTS.md +31 -79
- package/conventions/templates/fullstack/CLAUDE.md +1 -1
- package/conventions/templates/fullstack/GEMINI.md +3 -3
- package/conventions/templates/fullstack/project_rules.md +15 -0
- package/lib/README.md +42 -0
- package/lib/__pycache__/github_sync.cpython-314.pyc +0 -0
- package/lib/__pycache__/loop-fmt.cpython-314.pyc +0 -0
- package/lib/__pycache__/loop_result_eval.cpython-314.pyc +0 -0
- package/lib/__pycache__/loop_unstick.cpython-314.pyc +0 -0
- package/lib/__pycache__/model_prices.cpython-314.pyc +0 -0
- package/lib/__pycache__/prices_fetcher.cpython-314.pyc +0 -0
- package/lib/__pycache__/roll-home.cpython-314.pyc +0 -0
- package/lib/__pycache__/roll-loop-status.cpython-314.pyc +0 -0
- package/lib/__pycache__/roll_git.cpython-314.pyc +0 -0
- package/lib/__pycache__/roll_render.cpython-314.pyc +0 -0
- package/lib/__pycache__/slides-render.cpython-314.pyc +0 -0
- package/lib/agent_usage/README.md +49 -0
- package/lib/agent_usage/__init__.py +108 -0
- package/lib/agent_usage/__pycache__/__init__.cpython-314.pyc +0 -0
- package/lib/agent_usage/__pycache__/gemini.cpython-314.pyc +0 -0
- package/lib/agent_usage/__pycache__/kimi.cpython-314.pyc +0 -0
- package/lib/agent_usage/__pycache__/openai.cpython-314.pyc +0 -0
- package/lib/agent_usage/__pycache__/pi.cpython-314.pyc +0 -0
- package/lib/agent_usage/__pycache__/pi_emit.cpython-314.pyc +0 -0
- package/lib/agent_usage/__pycache__/qwen.cpython-314.pyc +0 -0
- package/lib/agent_usage/gemini.py +127 -0
- package/lib/agent_usage/kimi.py +278 -0
- package/lib/agent_usage/kimi_emit.py +123 -0
- package/lib/agent_usage/openai.py +126 -0
- package/lib/agent_usage/pi.py +200 -0
- package/lib/agent_usage/pi_emit.py +135 -0
- package/lib/agent_usage/qwen.py +128 -0
- package/lib/backfill-pi-usage.py +243 -0
- package/lib/changelog_audit.py +155 -0
- package/lib/changelog_generate.py +263 -0
- package/lib/context_feed_budget.sh +194 -0
- package/lib/github_sync.py +876 -0
- package/lib/i18n/README.md +54 -0
- package/lib/i18n/agent.sh +75 -0
- package/lib/i18n/alert.sh +20 -0
- package/lib/i18n/backlog.sh +96 -0
- package/lib/i18n/brief.sh +5 -0
- package/lib/i18n/changelog.sh +5 -0
- package/lib/i18n/ci.sh +15 -0
- package/lib/i18n/debug.sh +0 -0
- package/lib/i18n/doctor.sh +44 -0
- package/lib/i18n/dream.sh +0 -0
- package/lib/i18n/init.sh +91 -0
- package/lib/i18n/lang.sh +10 -0
- package/lib/i18n/loop.sh +140 -0
- package/lib/i18n/migrate.sh +74 -0
- package/lib/i18n/offboard.sh +31 -0
- package/lib/i18n/onboard.sh +0 -0
- package/lib/i18n/peer.sh +41 -0
- package/lib/i18n/peer_help.sh +25 -0
- package/lib/i18n/peer_reset.sh +7 -0
- package/lib/i18n/peer_status.sh +5 -0
- package/lib/i18n/prices.sh +3 -0
- package/lib/i18n/prices_refresh.sh +17 -0
- package/lib/i18n/prices_show.sh +7 -0
- package/lib/i18n/propose.sh +0 -0
- package/lib/i18n/release.sh +0 -0
- package/lib/i18n/research.sh +0 -0
- package/lib/i18n/review_pr.sh +0 -0
- package/lib/i18n/sentinel.sh +0 -0
- package/lib/i18n/setup.sh +3 -0
- package/lib/i18n/shared.sh +157 -0
- package/lib/i18n/skills/roll-brief.sh +47 -0
- package/lib/i18n/skills/roll-build.sh +97 -0
- package/lib/i18n/skills/roll-design.sh +18 -0
- package/lib/i18n/skills/roll-fix.sh +53 -0
- package/lib/i18n/skills/roll-loop.sh +28 -0
- package/lib/i18n/skills/roll-onboard.sh +33 -0
- package/lib/i18n/skills_catalog.sh +30 -0
- package/lib/i18n/slides.sh +3 -0
- package/lib/i18n/slides_build.sh +38 -0
- package/lib/i18n/slides_delete.sh +19 -0
- package/lib/i18n/slides_list.sh +14 -0
- package/lib/i18n/slides_logs.sh +12 -0
- package/lib/i18n/slides_new.sh +15 -0
- package/lib/i18n/slides_preview.sh +14 -0
- package/lib/i18n/slides_templates.sh +7 -0
- package/lib/i18n/status.sh +21 -0
- package/lib/i18n/update.sh +24 -0
- package/lib/i18n.sh +211 -0
- package/lib/loop-exit-summary.py +393 -0
- package/lib/loop-fmt.py +589 -0
- package/lib/loop_pick_agent.py +316 -0
- package/lib/loop_result_eval.py +469 -0
- package/lib/loop_unstick.py +180 -0
- package/lib/model_prices.py +194 -0
- package/lib/prices/README.md +35 -0
- package/lib/prices/snapshot-2026-05-22.json +22 -0
- package/lib/prices/snapshot-2026-05-23-deepseek.json +15 -0
- package/lib/prices/snapshot-2026-05-23-kimi.json +15 -0
- package/lib/prices_fetcher.py +285 -0
- package/lib/roll-backlog.py +225 -0
- package/lib/roll-brief.py +286 -0
- package/lib/roll-help.py +158 -0
- package/lib/roll-home.py +556 -0
- package/lib/roll-init.py +156 -0
- package/lib/roll-loop-status.py +1683 -0
- package/lib/roll-loop-story.py +191 -0
- package/lib/roll-onboard-render.py +378 -0
- package/lib/roll-peer.py +252 -0
- package/lib/roll-plan-validate.py +386 -0
- package/lib/roll-setup.py +102 -0
- package/lib/roll-status.py +367 -0
- package/lib/roll_git.py +41 -0
- package/lib/roll_render.py +414 -0
- package/lib/slides/components/README.md +123 -0
- package/lib/slides/components/cards-2.html +9 -0
- package/lib/slides/components/cards-3.html +9 -0
- package/lib/slides/components/cards-4.html +9 -0
- package/lib/slides/components/compare.html +22 -0
- package/lib/slides/components/highlight.html +9 -0
- package/lib/slides/components/pipeline.html +12 -0
- package/lib/slides/components/plain.html +7 -0
- package/lib/slides/components/quote.html +4 -0
- package/lib/slides/components/timeline.html +9 -0
- package/lib/slides/templates/introduction-v3.html +571 -0
- package/lib/slides/templates/pitch.html +0 -0
- package/lib/slides-render.py +778 -0
- package/lib/slides-validate.py +357 -0
- package/lib/test_quality_gate.py +143 -0
- package/package.json +8 -7
- package/skills/roll-.changelog/SKILL.md +406 -33
- package/skills/roll-.clarify/SKILL.md +5 -2
- package/skills/roll-.dream/SKILL.md +374 -0
- package/skills/roll-.echo/SKILL.md +5 -2
- package/skills/roll-.qa/SKILL.md +57 -3
- package/skills/roll-.review/SKILL.md +42 -3
- package/skills/roll-brief/SKILL.md +209 -0
- package/skills/roll-build/SKILL.md +308 -63
- package/skills/roll-debug/SKILL.md +341 -162
- package/skills/roll-debug/injectable-bb.js +263 -0
- package/skills/roll-deck/SKILL.md +296 -0
- package/skills/roll-design/ENGINEERING_CHECKLIST.md +1 -1
- package/skills/roll-design/SKILL.md +733 -94
- package/skills/roll-doc/SKILL.md +595 -0
- package/skills/roll-doctor/SKILL.md +192 -0
- package/skills/roll-fix/SKILL.md +149 -32
- package/skills/{roll-jot → roll-idea}/SKILL.md +18 -10
- package/skills/roll-loop/SKILL.md +579 -0
- package/skills/roll-notes/SKILL.md +103 -0
- package/skills/roll-onboard/SKILL.md +234 -0
- package/skills/roll-peer/SKILL.md +336 -0
- package/skills/roll-propose/SKILL.md +157 -0
- package/skills/roll-review-pr/SKILL.md +58 -0
- package/skills/roll-sentinel/SKILL.md +11 -2
- package/skills/roll-spar/SKILL.md +8 -6
- package/template/.github/workflows/ci.yml +5 -2
- package/template/AGENTS.md +20 -74
- package/skills/roll-research/SKILL.md +0 -307
- package/skills/roll-research/references/schema.json +0 -162
- package/skills/roll-research/scripts/md_to_pdf.py +0 -289
- package/tools/roll-fetch/SKILL.md +0 -182
- package/tools/roll-fetch/package.json +0 -15
- package/tools/roll-fetch/smart-web-fetch.js +0 -558
- package/tools/roll-probe/SKILL.md +0 -84
- /package/template/{BACKLOG.md → .roll/backlog.md} +0 -0
|
@@ -0,0 +1,1683 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
roll-loop-status — render the `roll loop` health dashboard.
|
|
4
|
+
|
|
5
|
+
Reads (all per-project, slug = <basename>-<md5_6chars> of project root):
|
|
6
|
+
$ROLL_SHARED_ROOT/loop/events-<slug>.ndjson structured per-cycle events
|
|
7
|
+
$ROLL_SHARED_ROOT/loop/cron-<slug>.log wall-clock dur + cost per cycle
|
|
8
|
+
$ROLL_SHARED_ROOT/loop/state-<slug>.yaml idle | running | paused
|
|
9
|
+
./.roll/backlog.md story id → description
|
|
10
|
+
|
|
11
|
+
Writes (stdout):
|
|
12
|
+
Static 100-col colored print, EN/ZH paired rows. Designed for a 5-10s glance,
|
|
13
|
+
leaves the dashboard in scrollback. Honors NO_COLOR; degrades to 80 cols.
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
python3 lib/roll-loop-status.py # default 3-day window
|
|
17
|
+
python3 lib/roll-loop-status.py --days 7
|
|
18
|
+
python3 lib/roll-loop-status.py --no-color
|
|
19
|
+
python3 lib/roll-loop-status.py --en | --zh # collapse bilingual rows
|
|
20
|
+
ROLL_RENDER_FIXTURE=1 python3 lib/roll-loop-status.py # render with fixture data (test only)
|
|
21
|
+
|
|
22
|
+
Wire it in bin/roll under `loop status` (replace _loop_status with a call to
|
|
23
|
+
this script).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
import argparse, hashlib, json, os, re, subprocess, sys, time
|
|
28
|
+
from collections import defaultdict
|
|
29
|
+
from datetime import datetime, timedelta, timezone
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
32
|
+
|
|
33
|
+
# Display TZ is fixed to Asia/Shanghai (UTC+8). Internal datetimes stay UTC;
|
|
34
|
+
# only display conversions honor this. Set the process TZ so .astimezone()
|
|
35
|
+
# without args resolves to Beijing time across all renderers.
|
|
36
|
+
os.environ.setdefault("TZ", "Asia/Shanghai")
|
|
37
|
+
time.tzset()
|
|
38
|
+
|
|
39
|
+
# Shared rendering primitives — see lib/roll_render.py for the design system.
|
|
40
|
+
_LIB_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
41
|
+
if _LIB_DIR not in sys.path:
|
|
42
|
+
sys.path.insert(0, _LIB_DIR)
|
|
43
|
+
import roll_render
|
|
44
|
+
from roll_render import (
|
|
45
|
+
PAL, BOLD, RESET, COLS, c, strw, pad, row,
|
|
46
|
+
fmt_dur, fmt_delta, fmt_tokens, trunc, empty_rollup,
|
|
47
|
+
section_head, metric, metric_dur, metric_dollar, metric_tokens,
|
|
48
|
+
day_band, cycle_row,
|
|
49
|
+
)
|
|
50
|
+
from roll_git import git_remote_url as _git_remote_url
|
|
51
|
+
|
|
52
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
53
|
+
# Paths — must match bin/roll's _project_slug + _SHARED_ROOT defaults
|
|
54
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
55
|
+
def project_slug(path: Optional[str] = None) -> str:
|
|
56
|
+
# US-LOOP-006: cycle wrapper exports ROLL_MAIN_SLUG — honour it.
|
|
57
|
+
env_slug = os.environ.get("ROLL_MAIN_SLUG", "").strip()
|
|
58
|
+
if env_slug:
|
|
59
|
+
return env_slug
|
|
60
|
+
|
|
61
|
+
path = os.path.realpath(path or os.getcwd())
|
|
62
|
+
try: # resolve git worktree → main tree (FIX-034 in bin/roll)
|
|
63
|
+
common = subprocess.check_output(
|
|
64
|
+
["git", "-C", path, "rev-parse", "--git-common-dir"],
|
|
65
|
+
stderr=subprocess.DEVNULL, text=True
|
|
66
|
+
).strip()
|
|
67
|
+
if common.endswith("/.git"):
|
|
68
|
+
path = common[:-5]
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
# US-OBS-010: derive slug from git remote URL for stable cross-machine
|
|
73
|
+
# identity. Normalize: strip .git, git@HOST:PATH → https://HOST/PATH,
|
|
74
|
+
# lowercase. Fallback chain: origin → first remote → path-based.
|
|
75
|
+
remote_url = _git_remote_url(path)
|
|
76
|
+
if remote_url:
|
|
77
|
+
# Normalize
|
|
78
|
+
remote_url = remote_url.rstrip("/")
|
|
79
|
+
if remote_url.endswith(".git"):
|
|
80
|
+
remote_url = remote_url[:-4]
|
|
81
|
+
m = re.match(r"^git@([^:]+):(.+)$", remote_url)
|
|
82
|
+
if m:
|
|
83
|
+
remote_url = f"https://{m.group(1)}/{m.group(2)}"
|
|
84
|
+
remote_url = remote_url.lower()
|
|
85
|
+
base = re.sub(r"[^A-Za-z0-9]+", "-", os.path.basename(remote_url)).strip("-")
|
|
86
|
+
h = hashlib.md5(remote_url.encode()).hexdigest()[:6]
|
|
87
|
+
return f"{base}-{h}"
|
|
88
|
+
|
|
89
|
+
base = re.sub(r"[^A-Za-z0-9]+", "-", os.path.basename(path)).strip("-")
|
|
90
|
+
h = hashlib.md5(path.encode()).hexdigest()[:6]
|
|
91
|
+
return f"{base}-{h}"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def shared_root() -> Path:
|
|
95
|
+
return Path(os.environ.get("ROLL_SHARED_ROOT") or os.path.expanduser("~/.shared/roll"))
|
|
96
|
+
|
|
97
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
98
|
+
# Project path resolution — mirrors bin/roll's _loop_resolve_project_path
|
|
99
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
100
|
+
def _resolve_project_path(slug: str) -> Optional[Path]:
|
|
101
|
+
"""Mirror bin/roll's _loop_resolve_project_path: resolve a project slug
|
|
102
|
+
back to the absolute project root directory.
|
|
103
|
+
|
|
104
|
+
Priority chain (same as bash):
|
|
105
|
+
1. ROLL_MAIN_PROJECT env var (set by cycle runner)
|
|
106
|
+
2. macOS launchd plist WorkingDirectory for com.roll.loop.<slug>
|
|
107
|
+
3. crontab entry referencing run-<slug>.sh
|
|
108
|
+
4. inner runner script at ~/.shared/roll/loop/run-<slug>-inner.sh
|
|
109
|
+
"""
|
|
110
|
+
# 1. Env var
|
|
111
|
+
env_proj = os.environ.get("ROLL_MAIN_PROJECT", "").strip()
|
|
112
|
+
if env_proj and Path(env_proj).is_dir():
|
|
113
|
+
return Path(env_proj)
|
|
114
|
+
|
|
115
|
+
# 2. macOS launchd plist
|
|
116
|
+
if sys.platform == "darwin":
|
|
117
|
+
plist = Path.home() / "Library" / "LaunchAgents" / f"com.roll.loop.{slug}.plist"
|
|
118
|
+
if plist.exists():
|
|
119
|
+
try:
|
|
120
|
+
text = plist.read_text(errors="ignore")
|
|
121
|
+
m = re.search(
|
|
122
|
+
r"<key>WorkingDirectory</key>\s*<string>([^<]+)</string>",
|
|
123
|
+
text
|
|
124
|
+
)
|
|
125
|
+
if m:
|
|
126
|
+
proj = Path(m.group(1))
|
|
127
|
+
if proj.is_dir():
|
|
128
|
+
return proj
|
|
129
|
+
except Exception:
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
# 3. crontab
|
|
133
|
+
try:
|
|
134
|
+
cron_out = subprocess.check_output(
|
|
135
|
+
["crontab", "-l"], stderr=subprocess.DEVNULL, text=True
|
|
136
|
+
)
|
|
137
|
+
for line in cron_out.splitlines():
|
|
138
|
+
if f"run-{slug}.sh" in line:
|
|
139
|
+
# Match: cd "<path>"
|
|
140
|
+
m = re.search(r'cd\s+"([^"]+)"', line)
|
|
141
|
+
if m:
|
|
142
|
+
proj = Path(m.group(1))
|
|
143
|
+
if proj.is_dir():
|
|
144
|
+
return proj
|
|
145
|
+
except Exception:
|
|
146
|
+
pass
|
|
147
|
+
|
|
148
|
+
# 4. Inner runner script (grep for ROLL_MAIN_PROJECT=)
|
|
149
|
+
inner_script = shared_root() / "loop" / f"run-{slug}-inner.sh"
|
|
150
|
+
if inner_script.exists():
|
|
151
|
+
try:
|
|
152
|
+
text = inner_script.read_text(errors="ignore")
|
|
153
|
+
m = re.search(r'export ROLL_MAIN_PROJECT="([^"]+)"', text)
|
|
154
|
+
if m:
|
|
155
|
+
proj = Path(m.group(1))
|
|
156
|
+
if proj.is_dir():
|
|
157
|
+
return proj
|
|
158
|
+
except Exception:
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _loop_runtime_dir_py(slug: str) -> Optional[Path]:
|
|
165
|
+
"""Mirror bin/roll's _loop_runtime_dir: return <project>/.roll/loop.
|
|
166
|
+
Honors ROLL_PROJECT_RUNTIME_DIR env override (test sandbox)."""
|
|
167
|
+
env_rt = os.environ.get("ROLL_PROJECT_RUNTIME_DIR", "").strip()
|
|
168
|
+
if env_rt:
|
|
169
|
+
return Path(env_rt)
|
|
170
|
+
proj = _resolve_project_path(slug)
|
|
171
|
+
if proj is None:
|
|
172
|
+
return None
|
|
173
|
+
return proj / ".roll" / "loop"
|
|
174
|
+
|
|
175
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
176
|
+
# Loaders
|
|
177
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
178
|
+
def load_events(slug: str, days: int) -> List[Dict[str, Any]]:
|
|
179
|
+
# FIX-137: read from project-local .roll/loop/events.ndjson first
|
|
180
|
+
# (mirrors _loop_event writer), fall back to shared events-<slug>.ndjson
|
|
181
|
+
# for historical data that hasn't been migrated yet.
|
|
182
|
+
candidates: List[Path] = []
|
|
183
|
+
|
|
184
|
+
# Primary: project-local (same path as _loop_event writer, US-LOOP-020)
|
|
185
|
+
rt_dir = _loop_runtime_dir_py(slug)
|
|
186
|
+
if rt_dir is not None:
|
|
187
|
+
head = rt_dir / "events.ndjson"
|
|
188
|
+
candidates.append(head)
|
|
189
|
+
candidates.extend(head.with_suffix(f".ndjson.{i}") for i in range(1, 5))
|
|
190
|
+
|
|
191
|
+
# Fallback: shared (old data pre-US-LOOP-020, migration not yet done)
|
|
192
|
+
shared_head = shared_root() / "loop" / f"events-{slug}.ndjson"
|
|
193
|
+
candidates.append(shared_head)
|
|
194
|
+
candidates.extend(shared_head.with_suffix(f".ndjson.{i}") for i in range(1, 5))
|
|
195
|
+
|
|
196
|
+
existing = [p for p in candidates if p.exists()]
|
|
197
|
+
if not existing:
|
|
198
|
+
return []
|
|
199
|
+
cutoff = datetime.now(timezone.utc) - timedelta(days=days + 1) # +1 for grace
|
|
200
|
+
out: List[Dict[str, Any]] = []
|
|
201
|
+
seen: set[str] = set() # dedup on the raw JSON line (rotation is mv, so
|
|
202
|
+
# duplicates only appear from manual ops — defensive)
|
|
203
|
+
for p in existing:
|
|
204
|
+
with p.open() as f:
|
|
205
|
+
for line in f:
|
|
206
|
+
line = line.strip()
|
|
207
|
+
if not line:
|
|
208
|
+
continue
|
|
209
|
+
if line in seen:
|
|
210
|
+
continue
|
|
211
|
+
seen.add(line)
|
|
212
|
+
try:
|
|
213
|
+
e = json.loads(line)
|
|
214
|
+
e["_ts"] = datetime.fromisoformat(e["ts"].replace("Z", "+00:00"))
|
|
215
|
+
if e["_ts"] >= cutoff:
|
|
216
|
+
out.append(e)
|
|
217
|
+
except Exception:
|
|
218
|
+
continue
|
|
219
|
+
out.sort(key=lambda e: e["_ts"])
|
|
220
|
+
if os.environ.get("ROLL_DEBUG_LOAD"):
|
|
221
|
+
print(f"roll-loop-status: loaded {len(out)} events from {len(existing)} files",
|
|
222
|
+
file=sys.stderr)
|
|
223
|
+
return out
|
|
224
|
+
|
|
225
|
+
# cron.log entry format (from bin/roll):
|
|
226
|
+
# "03:49:25 cycle done — done · 981s · $4.53"
|
|
227
|
+
# "03:57:35 cycle done — done · 1 tcr · 538s · $3.20"
|
|
228
|
+
_CRON_PAT = re.compile(
|
|
229
|
+
r"^(\d{2}:\d{2}):(\d{2})\s+cycle done — (\w+)"
|
|
230
|
+
r"(?:\s*·\s*(\d+)\s+tcr)?"
|
|
231
|
+
r"\s*·\s*(\d+)s"
|
|
232
|
+
r"\s*·\s*\$([\d.]+)"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
def load_cron_log(slug: str) -> List[Dict[str, Any]]:
|
|
236
|
+
"""Return ordered list of cron entries with local HH:MM:SS + extracted fields.
|
|
237
|
+
|
|
238
|
+
FIX-137: checks project-local .roll/loop/cron.log first, falls back to
|
|
239
|
+
shared cron-<slug>.log."""
|
|
240
|
+
# Primary: project-local
|
|
241
|
+
rt_dir = _loop_runtime_dir_py(slug)
|
|
242
|
+
path = (rt_dir / "cron.log") if rt_dir is not None else None
|
|
243
|
+
if path is None or not path.exists():
|
|
244
|
+
# Fallback: shared
|
|
245
|
+
path = shared_root() / "loop" / f"cron-{slug}.log"
|
|
246
|
+
if not path.exists():
|
|
247
|
+
return []
|
|
248
|
+
out: List[Dict[str, Any]] = []
|
|
249
|
+
with path.open(errors="ignore") as f:
|
|
250
|
+
for line in f:
|
|
251
|
+
# Bug D: cron.log lines are written with ANSI color escapes
|
|
252
|
+
# (\033[90m...\033[0m). Strip them before regex matching.
|
|
253
|
+
m = _CRON_PAT.match(roll_render.strip_ansi(line).strip())
|
|
254
|
+
if m:
|
|
255
|
+
out.append({
|
|
256
|
+
"hhmm": m.group(1),
|
|
257
|
+
"ss": int(m.group(2)),
|
|
258
|
+
"outcome": m.group(3),
|
|
259
|
+
"tcr": int(m.group(4) or 0),
|
|
260
|
+
"duration_s": int(m.group(5)),
|
|
261
|
+
"cost": float(m.group(6)),
|
|
262
|
+
})
|
|
263
|
+
return out
|
|
264
|
+
|
|
265
|
+
def load_state(slug: str) -> Dict[str, str]:
|
|
266
|
+
"""Tiny YAML reader — only the flat keys bin/roll writes.
|
|
267
|
+
|
|
268
|
+
FIX-137: checks project-local .roll/loop/state.yaml first, falls back to
|
|
269
|
+
shared state-<slug>.yaml."""
|
|
270
|
+
# Primary: project-local
|
|
271
|
+
rt_dir = _loop_runtime_dir_py(slug)
|
|
272
|
+
path = (rt_dir / "state.yaml") if rt_dir is not None else None
|
|
273
|
+
if path is None or not path.exists():
|
|
274
|
+
# Fallback: shared
|
|
275
|
+
path = shared_root() / "loop" / f"state-{slug}.yaml"
|
|
276
|
+
if not path.exists():
|
|
277
|
+
return {}
|
|
278
|
+
out: Dict[str, str] = {}
|
|
279
|
+
for line in path.open(errors="ignore"):
|
|
280
|
+
m = re.match(r"^([\w_]+):\s*(.*?)\s*$", line)
|
|
281
|
+
if m:
|
|
282
|
+
out[m.group(1)] = m.group(2).strip().strip('"').strip("'")
|
|
283
|
+
return out
|
|
284
|
+
|
|
285
|
+
def load_backlog(project_root: Optional[Path] = None) -> Dict[str, str]:
|
|
286
|
+
"""Map story id → description from .roll/backlog.md table rows."""
|
|
287
|
+
path = (project_root or Path()) / ".roll/backlog.md"
|
|
288
|
+
if not path.exists():
|
|
289
|
+
return {}
|
|
290
|
+
out: Dict[str, str] = {}
|
|
291
|
+
pat = re.compile(r"^\|\s*(?:\[)?([A-Z]+-\d+)(?:\]\([^)]+\))?\s*\|\s*([^|]+?)\s*\|")
|
|
292
|
+
with path.open() as f:
|
|
293
|
+
for line in f:
|
|
294
|
+
m = pat.match(line)
|
|
295
|
+
if m:
|
|
296
|
+
out[m.group(1)] = m.group(2)
|
|
297
|
+
return out
|
|
298
|
+
|
|
299
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
300
|
+
# Cycle aggregation — group events by cycle label; attach cron + story id
|
|
301
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
302
|
+
# FIX-108: each segment was [A-Z]+ (letters only), so alphanumeric segments
|
|
303
|
+
# like I18N / K8S / D2 / S3 / 2FA failed to match — dashboard silently dropped
|
|
304
|
+
# any story id with a mixed-letter-digit segment (US-I18N-001 etc.). First
|
|
305
|
+
# char must still be a letter so "001-002" doesn't false-positive as an id.
|
|
306
|
+
_STORY_ID_PAT = re.compile(r"\b([A-Z][A-Z0-9]*(?:-[A-Z][A-Z0-9]*)*-\d+)\b")
|
|
307
|
+
_PR_NUM_PAT = re.compile(r"/pull/(\d+)")
|
|
308
|
+
|
|
309
|
+
def _extract_story_id(ev_detail: str) -> Optional[str]:
|
|
310
|
+
if not ev_detail:
|
|
311
|
+
return None
|
|
312
|
+
m = _STORY_ID_PAT.search(ev_detail)
|
|
313
|
+
return m.group(1) if m else None
|
|
314
|
+
|
|
315
|
+
def _extract_pr_num(url: str) -> Optional[int]:
|
|
316
|
+
if not url:
|
|
317
|
+
return None
|
|
318
|
+
m = _PR_NUM_PAT.search(url)
|
|
319
|
+
return int(m.group(1)) if m else None
|
|
320
|
+
|
|
321
|
+
def _normalize_pr_outcome(raw: str) -> str:
|
|
322
|
+
"""US-VIEW-011: 3-state PR landing tracker.
|
|
323
|
+
|
|
324
|
+
Legacy events wrote 'ok' at PR creation; treat as 'open' so old rows
|
|
325
|
+
don't render as an unknown state. New events emit 'open' (PR created),
|
|
326
|
+
'merged' (auto-merge landed), or 'closed' (PR closed without merge).
|
|
327
|
+
"""
|
|
328
|
+
if raw in ("merged", "closed", "open"):
|
|
329
|
+
return raw
|
|
330
|
+
return "open"
|
|
331
|
+
|
|
332
|
+
def normalize_cycle_label(lbl: str) -> str:
|
|
333
|
+
"""Strip the 'loop/cycle-' branch-name prefix so pr events bucket with
|
|
334
|
+
their cycle_start/end siblings (Bug A — see plan §3)."""
|
|
335
|
+
if lbl.startswith("loop/cycle-"):
|
|
336
|
+
return lbl[len("loop/cycle-"):]
|
|
337
|
+
return lbl
|
|
338
|
+
|
|
339
|
+
def aggregate(events: List[Dict[str, Any]], cron: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
340
|
+
"""Build a per-cycle list (newest first), tmp-* filtered."""
|
|
341
|
+
by_label: Dict[str, Dict[str, Any]] = defaultdict(
|
|
342
|
+
lambda: {"start": None, "end": None, "outcome": None, "story": None,
|
|
343
|
+
"pr": None, "label": None, "fail_detail": None}
|
|
344
|
+
)
|
|
345
|
+
for e in events:
|
|
346
|
+
lbl = normalize_cycle_label(e.get("label", ""))
|
|
347
|
+
if not lbl or lbl.startswith("tmp-"):
|
|
348
|
+
continue
|
|
349
|
+
cy = by_label[lbl]
|
|
350
|
+
cy["label"] = lbl
|
|
351
|
+
stage = e.get("stage", "")
|
|
352
|
+
detail = e.get("detail", "")
|
|
353
|
+
if stage == "cycle_start":
|
|
354
|
+
cy["start"] = e["_ts"]
|
|
355
|
+
elif stage == "cycle_end":
|
|
356
|
+
cy["end"] = e["_ts"]
|
|
357
|
+
cy["outcome"] = e.get("outcome", "done")
|
|
358
|
+
elif stage == "idle":
|
|
359
|
+
# Bug B: cycles that find no Todo emit 'idle' instead of 'cycle_end'.
|
|
360
|
+
# Treat as terminal with a distinct outcome so they stop showing
|
|
361
|
+
# as 'still running' forever.
|
|
362
|
+
cy["end"] = e["_ts"]
|
|
363
|
+
cy["outcome"] = "idle"
|
|
364
|
+
elif stage == "pr":
|
|
365
|
+
cy["pr"] = detail
|
|
366
|
+
cy["pr_ts"] = e["_ts"] # used to match cron-log lines (inner cycle done)
|
|
367
|
+
# US-VIEW-011: capture PR # and landing outcome. Later pr events
|
|
368
|
+
# win (open → merged/closed finalization in cycle_end path).
|
|
369
|
+
pr_num = _extract_pr_num(detail)
|
|
370
|
+
if pr_num is not None:
|
|
371
|
+
cy["pr_num"] = pr_num
|
|
372
|
+
cy["pr_outcome"] = _normalize_pr_outcome(e.get("outcome", ""))
|
|
373
|
+
sid = _extract_story_id(detail) or _extract_story_id(lbl)
|
|
374
|
+
if sid and not cy.get("story"):
|
|
375
|
+
cy["story"] = sid
|
|
376
|
+
elif stage == "pick_todo":
|
|
377
|
+
sid = _extract_story_id(detail)
|
|
378
|
+
if sid:
|
|
379
|
+
cy["story"] = sid
|
|
380
|
+
elif stage == "agent_used":
|
|
381
|
+
# FIX-119: non-claude agents don't expose model in stream-json.
|
|
382
|
+
# The inner runner emits an agent_used event with the agent name
|
|
383
|
+
# so the dashboard can show it when cy["model"] is None.
|
|
384
|
+
if detail:
|
|
385
|
+
cy["agent"] = detail
|
|
386
|
+
elif stage == "usage":
|
|
387
|
+
# US-LOOP-004: loop-fmt emits this with full token / cost data.
|
|
388
|
+
# Detail is a dict (not the legacy string form).
|
|
389
|
+
# US-VIEW-010: token counts are per-turn deltas — sum across events
|
|
390
|
+
# so list-price cost computed from totals matches actual API usage.
|
|
391
|
+
# Non-additive fields (model, cost_reported_usd, duration_ms) take
|
|
392
|
+
# the last value seen.
|
|
393
|
+
d = e.get("detail") or {}
|
|
394
|
+
if isinstance(d, dict):
|
|
395
|
+
prev = cy.get("usage_event") or {}
|
|
396
|
+
merged = dict(prev)
|
|
397
|
+
merged.update(d)
|
|
398
|
+
for k in ("input_tokens", "output_tokens",
|
|
399
|
+
"cache_creation_tokens", "cache_read_tokens"):
|
|
400
|
+
merged[k] = int(prev.get(k) or 0) + int(d.get(k) or 0)
|
|
401
|
+
cy["usage_event"] = merged
|
|
402
|
+
elif stage in ("test", "build") and e.get("outcome") == "fail":
|
|
403
|
+
cy["fail_detail"] = detail or stage
|
|
404
|
+
|
|
405
|
+
# Drop incomplete entries; sort newest-first by start time.
|
|
406
|
+
cycles = [v for v in by_label.values() if v["start"]]
|
|
407
|
+
cycles.sort(key=lambda x: x["start"], reverse=True)
|
|
408
|
+
|
|
409
|
+
# Match cron-log entries by HH:MM:SS proximity to the inner cycle-done
|
|
410
|
+
# signal (within ±120s). cron.log is overwritten each cycle, so only the
|
|
411
|
+
# most recent cycle gets a cron entry — but it carries the only cost we
|
|
412
|
+
# have. duration_s falls back to (end - start) for every other cycle.
|
|
413
|
+
for cy in cycles:
|
|
414
|
+
anchor = cy.get("pr_ts") or cy.get("end") or cy.get("start")
|
|
415
|
+
target = anchor.hour * 3600 + anchor.minute * 60 + anchor.second
|
|
416
|
+
best = None
|
|
417
|
+
best_dt = 999
|
|
418
|
+
for cr in cron:
|
|
419
|
+
ch, cm = cr["hhmm"].split(":")
|
|
420
|
+
csec = int(ch) * 3600 + int(cm) * 60 + cr["ss"]
|
|
421
|
+
dt = abs(csec - target)
|
|
422
|
+
if dt < best_dt:
|
|
423
|
+
best_dt = dt
|
|
424
|
+
best = cr
|
|
425
|
+
if best and best_dt <= 120:
|
|
426
|
+
cy["cron"] = best
|
|
427
|
+
|
|
428
|
+
# Compute duration from event timestamps when cron didn't match.
|
|
429
|
+
if cy.get("end") and cy.get("start"):
|
|
430
|
+
cy["duration_s"] = int((cy["end"] - cy["start"]).total_seconds())
|
|
431
|
+
elif cy.get("cron"):
|
|
432
|
+
cy["duration_s"] = cy["cron"]["duration_s"]
|
|
433
|
+
|
|
434
|
+
# Default outcome if missing (e.g. cycle never ended → still running, or crashed).
|
|
435
|
+
if not cy.get("outcome"):
|
|
436
|
+
cy["outcome"] = "running" if not cy.get("end") else "unknown"
|
|
437
|
+
return cycles
|
|
438
|
+
|
|
439
|
+
def load_claude_session_usage(label: str, slug: str) -> Optional[Dict[str, Any]]:
|
|
440
|
+
"""Backfill from claude's own session log when events stream lacks
|
|
441
|
+
token / cost data. Each cycle runs in a worktree whose path Claude maps
|
|
442
|
+
to ~/.claude/projects/-<escaped-worktree-path>/<uuid>.jsonl. Sum tokens
|
|
443
|
+
across all assistant turns; pick model from any; pull total_cost_usd
|
|
444
|
+
from the trailing result event.
|
|
445
|
+
|
|
446
|
+
Returns {model, input_tokens, output_tokens, cache_creation_tokens,
|
|
447
|
+
cache_read_tokens, cost_reported_usd, duration_ms} or None."""
|
|
448
|
+
# Worktree path: /Users/seanyao/.shared/roll/worktrees/<slug>-cycle-<label>/
|
|
449
|
+
# Claude project dir mirrors that path with '/' → '-' + leading '-'.
|
|
450
|
+
worktree_path = f"/Users/{os.environ.get('USER', 'seanyao')}/.shared/roll/worktrees/{slug}-cycle-{label}"
|
|
451
|
+
# Claude escapes both '/' and '.' to '-' in the project dir name.
|
|
452
|
+
proj_name = "-" + worktree_path.replace("/", "-").replace(".", "-").lstrip("-")
|
|
453
|
+
proj_dir = Path.home() / ".claude" / "projects" / proj_name
|
|
454
|
+
if not proj_dir.exists():
|
|
455
|
+
return None
|
|
456
|
+
# Take the largest .jsonl in that dir (one cycle = one session).
|
|
457
|
+
jsonls = sorted(proj_dir.glob("*.jsonl"), key=lambda p: p.stat().st_size, reverse=True)
|
|
458
|
+
if not jsonls:
|
|
459
|
+
return None
|
|
460
|
+
path = jsonls[0]
|
|
461
|
+
|
|
462
|
+
sums = {"input_tokens": 0, "output_tokens": 0,
|
|
463
|
+
"cache_creation_tokens": 0, "cache_read_tokens": 0}
|
|
464
|
+
model = None
|
|
465
|
+
cost = None
|
|
466
|
+
duration_ms = None
|
|
467
|
+
with path.open(errors="ignore") as f:
|
|
468
|
+
for line in f:
|
|
469
|
+
try:
|
|
470
|
+
e = json.loads(line)
|
|
471
|
+
except Exception:
|
|
472
|
+
continue
|
|
473
|
+
# result event has total_cost_usd + duration_ms
|
|
474
|
+
if e.get("type") == "result":
|
|
475
|
+
cost = e.get("total_cost_usd") or cost
|
|
476
|
+
duration_ms = e.get("duration_ms") or duration_ms
|
|
477
|
+
continue
|
|
478
|
+
# assistant turns carry per-message usage
|
|
479
|
+
msg = e.get("message") or {}
|
|
480
|
+
usage = msg.get("usage") or {}
|
|
481
|
+
if not usage:
|
|
482
|
+
continue
|
|
483
|
+
if msg.get("model") and not model:
|
|
484
|
+
model = msg["model"]
|
|
485
|
+
sums["input_tokens"] += int(usage.get("input_tokens") or 0)
|
|
486
|
+
sums["output_tokens"] += int(usage.get("output_tokens") or 0)
|
|
487
|
+
sums["cache_creation_tokens"] += int(usage.get("cache_creation_input_tokens") or 0)
|
|
488
|
+
sums["cache_read_tokens"] += int(usage.get("cache_read_input_tokens") or 0)
|
|
489
|
+
if sums["input_tokens"] == 0 and sums["output_tokens"] == 0:
|
|
490
|
+
return None
|
|
491
|
+
return {"model": model, **sums,
|
|
492
|
+
"cost_reported_usd": cost, "duration_ms": duration_ms}
|
|
493
|
+
|
|
494
|
+
def backfill_usage_from_claude_sessions(cycles: List[Dict[str, Any]], slug: str) -> None:
|
|
495
|
+
"""Populate cy['input_tokens'], cy['output_tokens'], cy['cost_list'],
|
|
496
|
+
cy['model']. Two paths:
|
|
497
|
+
1. usage_event from events stream (US-LOOP-004 writer side) — authoritative
|
|
498
|
+
2. claude session JSONL backfill — for cycles that ran before the
|
|
499
|
+
writer existed, or on machines where events.ndjson got truncated
|
|
500
|
+
|
|
501
|
+
US-VIEW-012: dashboard exposes input + output only (the model's actual
|
|
502
|
+
work). cache_creation / cache_read remain in the usage_event for
|
|
503
|
+
compute_list_cost — they're still part of true API cost — but no longer
|
|
504
|
+
surface in the UI where they previously inflated visible token totals.
|
|
505
|
+
"""
|
|
506
|
+
import importlib.util
|
|
507
|
+
spec = importlib.util.spec_from_file_location("model_prices",
|
|
508
|
+
os.path.join(_LIB_DIR, "model_prices.py"))
|
|
509
|
+
mp = importlib.util.module_from_spec(spec)
|
|
510
|
+
spec.loader.exec_module(mp)
|
|
511
|
+
for cy in cycles:
|
|
512
|
+
# Path 1: usage event written by loop-fmt at result time.
|
|
513
|
+
ue = cy.get("usage_event")
|
|
514
|
+
if isinstance(ue, dict) and (ue.get("input_tokens") or ue.get("output_tokens")):
|
|
515
|
+
cy["input_tokens"] = int(ue.get("input_tokens") or 0)
|
|
516
|
+
cy["output_tokens"] = int(ue.get("output_tokens") or 0)
|
|
517
|
+
cy["cache_creation_tokens"] = int(ue.get("cache_creation_tokens") or 0)
|
|
518
|
+
cy["cache_read_tokens"] = int(ue.get("cache_read_tokens") or 0)
|
|
519
|
+
cy["model"] = ue.get("model")
|
|
520
|
+
# US-VIEW-014: prefer the cost frozen at cycle_end so a later
|
|
521
|
+
# prices refresh never rewrites a historical cycle's cost. Only
|
|
522
|
+
# legacy events (pre-US-VIEW-014) fall back to recomputing — and
|
|
523
|
+
# the row gets a muted [legacy] tag so it can't be mistaken for
|
|
524
|
+
# the authoritative value.
|
|
525
|
+
persisted = ue.get("cost_list_usd")
|
|
526
|
+
if persisted is not None:
|
|
527
|
+
cy["cost_list"] = float(persisted)
|
|
528
|
+
cy["cost_currency"] = ue.get("cost_currency") or "USD"
|
|
529
|
+
cy["cost_list_legacy"] = False
|
|
530
|
+
else:
|
|
531
|
+
cy["cost_list"] = mp.compute_list_cost(
|
|
532
|
+
ue.get("model"),
|
|
533
|
+
input_tokens=ue.get("input_tokens", 0),
|
|
534
|
+
output_tokens=ue.get("output_tokens", 0),
|
|
535
|
+
cache_creation_tokens=ue.get("cache_creation_tokens", 0),
|
|
536
|
+
cache_read_tokens=ue.get("cache_read_tokens", 0),
|
|
537
|
+
)
|
|
538
|
+
cy["cost_currency"] = mp.currency_for(ue.get("model")) or "USD"
|
|
539
|
+
cy["cost_list_legacy"] = True
|
|
540
|
+
if ue.get("duration_ms") and not cy.get("duration_s"):
|
|
541
|
+
cy["duration_s"] = int(ue["duration_ms"] / 1000)
|
|
542
|
+
continue
|
|
543
|
+
# Path 2: salvage from claude's own session log.
|
|
544
|
+
if cy.get("input_tokens") or cy.get("output_tokens"):
|
|
545
|
+
continue
|
|
546
|
+
u = load_claude_session_usage(cy.get("label", ""), slug)
|
|
547
|
+
if not u:
|
|
548
|
+
continue
|
|
549
|
+
cy["input_tokens"] = int(u.get("input_tokens") or 0)
|
|
550
|
+
cy["output_tokens"] = int(u.get("output_tokens") or 0)
|
|
551
|
+
cy["cache_creation_tokens"] = int(u.get("cache_creation_tokens") or 0)
|
|
552
|
+
cy["cache_read_tokens"] = int(u.get("cache_read_tokens") or 0)
|
|
553
|
+
cy["model"] = u["model"]
|
|
554
|
+
cy["cost_list"] = mp.compute_list_cost(
|
|
555
|
+
u["model"],
|
|
556
|
+
input_tokens=u["input_tokens"],
|
|
557
|
+
output_tokens=u["output_tokens"],
|
|
558
|
+
cache_creation_tokens=u["cache_creation_tokens"],
|
|
559
|
+
cache_read_tokens=u["cache_read_tokens"],
|
|
560
|
+
)
|
|
561
|
+
cy["cost_currency"] = mp.currency_for(u["model"]) or "USD"
|
|
562
|
+
# US-VIEW-014: session salvage never has a frozen cycle_end cost, so
|
|
563
|
+
# this path is always legacy.
|
|
564
|
+
cy["cost_list_legacy"] = True
|
|
565
|
+
if u.get("duration_ms") and not cy.get("duration_s"):
|
|
566
|
+
cy["duration_s"] = int(u["duration_ms"] / 1000)
|
|
567
|
+
|
|
568
|
+
def load_pr_merges_from_git(days: int) -> Dict[str, Dict[str, Any]]:
|
|
569
|
+
"""Repair fallback: when events.ndjson dropped the pr / cycle_end events
|
|
570
|
+
for a cycle (events writer regressions, or cycle_end fired before PR
|
|
571
|
+
merged), git log still has the merge commit. Two known subject formats:
|
|
572
|
+
|
|
573
|
+
- Branch-named (Merge commit / older squash): "Merge pull request #N
|
|
574
|
+
from seanyao/loop/cycle-LABEL" — the branch name carries the label.
|
|
575
|
+
- Squash with default-title (newer GitHub UI / `gh pr merge --squash`):
|
|
576
|
+
"loop cycle LABEL (#N)" — space-separated, no slash.
|
|
577
|
+
|
|
578
|
+
FIX-107: the old --grep="loop/cycle-" + label_re missed the squash
|
|
579
|
+
subject entirely, so PRs merged AFTER cycle_end never got their
|
|
580
|
+
pr_outcome promoted to 'merged' on the dashboard.
|
|
581
|
+
"""
|
|
582
|
+
try:
|
|
583
|
+
out = subprocess.check_output(
|
|
584
|
+
["git", "log", f"--since={days + 1} days ago",
|
|
585
|
+
"--grep=loop[ /]cycle", "--extended-regexp",
|
|
586
|
+
"--format=%H|||%s|||%b<<<END>>>"],
|
|
587
|
+
text=True, errors="ignore"
|
|
588
|
+
)
|
|
589
|
+
except Exception:
|
|
590
|
+
return {}
|
|
591
|
+
result: Dict[str, Dict[str, Any]] = {}
|
|
592
|
+
# Accept both `loop/cycle-LABEL` and `loop cycle LABEL` (with or without
|
|
593
|
+
# the leading `-` separator after `cycle`). LABEL = YYYYMMDD-HHMMSS-PID.
|
|
594
|
+
label_re = re.compile(r"loop[ /]cycle[-\s](\d{8}-\d+-\d+)")
|
|
595
|
+
pr_re = re.compile(r"#(\d+)")
|
|
596
|
+
story_re = re.compile(r"\b([A-Z]+(?:-[A-Z]+)*-\d+)\b")
|
|
597
|
+
for chunk in out.split("<<<END>>>"):
|
|
598
|
+
chunk = chunk.strip()
|
|
599
|
+
if not chunk:
|
|
600
|
+
continue
|
|
601
|
+
try:
|
|
602
|
+
_, subj, body = chunk.split("|||", 2)
|
|
603
|
+
except ValueError:
|
|
604
|
+
continue
|
|
605
|
+
text = f"{subj}\n{body}"
|
|
606
|
+
m = label_re.search(text)
|
|
607
|
+
if not m:
|
|
608
|
+
continue
|
|
609
|
+
label = m.group(1)
|
|
610
|
+
pr_m = pr_re.search(subj)
|
|
611
|
+
stories = []
|
|
612
|
+
for s in story_re.findall(text):
|
|
613
|
+
if s not in stories:
|
|
614
|
+
stories.append(s)
|
|
615
|
+
result[label] = {"pr": pr_m.group(1) if pr_m else None, "stories": stories}
|
|
616
|
+
return result
|
|
617
|
+
|
|
618
|
+
def repair_orphan_cycles_from_git(cycles: List[Dict[str, Any]], git_merges: Dict[str, Dict[str, Any]]) -> None:
|
|
619
|
+
"""Salvage data from git merges: for any cycle whose branch was merged,
|
|
620
|
+
promote 'running'/'unknown' outcomes to 'done' and back-fill the
|
|
621
|
+
built[] story list when events + runs.jsonl came up empty."""
|
|
622
|
+
for cy in cycles:
|
|
623
|
+
m = git_merges.get(cy.get("label", ""))
|
|
624
|
+
if not m:
|
|
625
|
+
continue
|
|
626
|
+
if cy.get("outcome") in ("running", "unknown"):
|
|
627
|
+
cy["outcome"] = "done"
|
|
628
|
+
if m["pr"] and not cy.get("pr"):
|
|
629
|
+
cy["pr"] = f"https://github.com/seanyao/roll/pull/{m['pr']}"
|
|
630
|
+
# US-VIEW-011: a merge commit in git proves the PR landed.
|
|
631
|
+
# Promote pr_outcome to 'merged' even when no terminal pr event
|
|
632
|
+
# was emitted (older cycles, missed runs, events truncation).
|
|
633
|
+
if m["pr"]:
|
|
634
|
+
cy["pr_num"] = int(m["pr"])
|
|
635
|
+
cy["pr_outcome"] = "merged"
|
|
636
|
+
# Fill stories when our existing sources didn't carry them. Filter
|
|
637
|
+
# to ones that actually appear in BACKLOG so we don't pull in stray
|
|
638
|
+
# tokens from the merge body (PR numbers, file paths, etc.).
|
|
639
|
+
if m["stories"] and not cy.get("built"):
|
|
640
|
+
cy["built"] = m["stories"]
|
|
641
|
+
cy["story"] = m["stories"][0]
|
|
642
|
+
|
|
643
|
+
def load_runs(slug: str) -> Dict[str, Dict[str, Any]]:
|
|
644
|
+
"""Map run_id → run row for the current project (filters out other slugs
|
|
645
|
+
sharing runs.jsonl). Lenient slug matching salvages entries written under
|
|
646
|
+
buggy slugs (FIX-053): the bare project basename (e.g. 'Roll') or worktree
|
|
647
|
+
paths (e.g. '{slug}-cycle-XXX').
|
|
648
|
+
|
|
649
|
+
FIX-137: reads from project-local .roll/loop/runs.jsonl first, falls back
|
|
650
|
+
to shared runs.jsonl."""
|
|
651
|
+
# Primary: project-local
|
|
652
|
+
rt_dir = _loop_runtime_dir_py(slug)
|
|
653
|
+
path = (rt_dir / "runs.jsonl") if rt_dir is not None else None
|
|
654
|
+
if path is None or not path.exists():
|
|
655
|
+
# Fallback: shared (cross-project)
|
|
656
|
+
path = shared_root() / "loop" / "runs.jsonl"
|
|
657
|
+
if not path.exists():
|
|
658
|
+
return {}
|
|
659
|
+
base = slug.split("-")[0] # 'Roll-a43d1b' → 'Roll'
|
|
660
|
+
# FIX-144: old-slug runs (e.g. path-based slug before git-remote-based
|
|
661
|
+
# migration) share the same project path but have a different slug.
|
|
662
|
+
# Resolve once and compare paths to salvage those runs.
|
|
663
|
+
proj_path = _resolve_project_path(slug)
|
|
664
|
+
out: Dict[str, Dict[str, Any]] = {}
|
|
665
|
+
with path.open(errors="ignore") as f:
|
|
666
|
+
for line in f:
|
|
667
|
+
try:
|
|
668
|
+
r = json.loads(line)
|
|
669
|
+
except Exception:
|
|
670
|
+
continue
|
|
671
|
+
p = r.get("project", "")
|
|
672
|
+
if p != slug and p != base and not p.startswith(f"{slug}-cycle-"):
|
|
673
|
+
# String match failed — try path match for old-slug salvage.
|
|
674
|
+
if proj_path is None:
|
|
675
|
+
continue
|
|
676
|
+
other_proj = _resolve_project_path(p)
|
|
677
|
+
if other_proj is None or other_proj != proj_path:
|
|
678
|
+
continue
|
|
679
|
+
rid = r.get("run_id", "")
|
|
680
|
+
if rid:
|
|
681
|
+
out[rid] = r
|
|
682
|
+
return out
|
|
683
|
+
|
|
684
|
+
def merge_runs_into_cycles(cycles: List[Dict[str, Any]], runs: Dict[str, Dict[str, Any]]) -> None:
|
|
685
|
+
"""Attach tcr_count + built stories from runs.jsonl onto matching cycles.
|
|
686
|
+
|
|
687
|
+
The runs.jsonl `run_id` field has inconsistent time format across writer
|
|
688
|
+
versions (sometimes UTC, sometimes Beijing local, sometimes with PID
|
|
689
|
+
suffix), so string matching is unreliable. Match by `ts` proximity
|
|
690
|
+
instead: each cycle gets the closest run whose ts is between this
|
|
691
|
+
cycle's start and the next-newer cycle's start (i.e. the run wrote out
|
|
692
|
+
before the next cycle began). Each run consumed exactly once."""
|
|
693
|
+
# Parse run timestamps once.
|
|
694
|
+
runs_list = []
|
|
695
|
+
for rid, r in runs.items():
|
|
696
|
+
try:
|
|
697
|
+
ts = datetime.fromisoformat(r["ts"].replace("Z", "+00:00"))
|
|
698
|
+
runs_list.append((ts, rid, r))
|
|
699
|
+
except Exception:
|
|
700
|
+
continue
|
|
701
|
+
runs_list.sort(key=lambda x: x[0])
|
|
702
|
+
consumed = set()
|
|
703
|
+
|
|
704
|
+
# Cycles arrive newest-first; pair each with the next-older to bound
|
|
705
|
+
# the matching window (so a cycle's run doesn't steal the next idle's).
|
|
706
|
+
for i, cy in enumerate(cycles):
|
|
707
|
+
start = cy["start"]
|
|
708
|
+
# next newer cycle in real time = the cycle just above us in list
|
|
709
|
+
next_start = cycles[i - 1]["start"] if i > 0 else start + timedelta(hours=2)
|
|
710
|
+
# If there's a cycle_end, also clamp to end + 30min as upper bound.
|
|
711
|
+
if cy.get("end"):
|
|
712
|
+
clamp = cy["end"] + timedelta(minutes=30)
|
|
713
|
+
window_end = min(next_start, clamp)
|
|
714
|
+
else:
|
|
715
|
+
window_end = next_start
|
|
716
|
+
best = None
|
|
717
|
+
for ts, rid, r in runs_list:
|
|
718
|
+
if rid in consumed:
|
|
719
|
+
continue
|
|
720
|
+
if ts < start:
|
|
721
|
+
continue
|
|
722
|
+
if ts >= window_end:
|
|
723
|
+
break
|
|
724
|
+
if best is None or ts < best[0]:
|
|
725
|
+
best = (ts, rid, r)
|
|
726
|
+
if not best:
|
|
727
|
+
continue
|
|
728
|
+
ts, rid, r = best
|
|
729
|
+
consumed.add(rid)
|
|
730
|
+
cy["tcr_count"] = r.get("tcr_count", 0)
|
|
731
|
+
cy["built"] = r.get("built", []) or []
|
|
732
|
+
# Duration: cap runs.jsonl's reported duration_sec by (runs_ts -
|
|
733
|
+
# cycle_start) since the field has been seen with garbage values.
|
|
734
|
+
if r.get("duration_sec"):
|
|
735
|
+
cap = int((ts - start).total_seconds())
|
|
736
|
+
cy["duration_s"] = min(r["duration_sec"], cap) if cap > 0 else r["duration_sec"]
|
|
737
|
+
# Outcome: runs.jsonl wins when events stream was vacuous or
|
|
738
|
+
# misleading (idle/failed emitted by _loop_event even though the
|
|
739
|
+
# agent completed work and _runs_append recorded built).
|
|
740
|
+
if cy.get("outcome") in ("unknown", "running", "idle", "failed") and r.get("status"):
|
|
741
|
+
cy["outcome"] = {"built": "done", "interrupted": "fail"}.get(r["status"], r["status"])
|
|
742
|
+
if not cy.get("story") and r["built"]:
|
|
743
|
+
cy["story"] = r["built"][0]
|
|
744
|
+
|
|
745
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
746
|
+
# Rollup math — by day buckets in LOCAL time
|
|
747
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
748
|
+
def bucket_by_day(cycles: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
|
|
749
|
+
out: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
750
|
+
for cy in cycles:
|
|
751
|
+
day = cy["start"].astimezone().strftime("%Y-%m-%d")
|
|
752
|
+
out[day].append(cy)
|
|
753
|
+
return out
|
|
754
|
+
|
|
755
|
+
def rollup_for_story(cycles: List[Dict[str, Any]], story_id: str) -> Dict[str, Any]:
|
|
756
|
+
"""US-LOOP-024: aggregate cycles belonging to a single story.
|
|
757
|
+
|
|
758
|
+
Case-insensitive match on cy["story"]. Sums duration / tokens / cost,
|
|
759
|
+
splits outcomes into ✓ (done|idle) / ✗ (fail) / ⏵ (running), collects
|
|
760
|
+
PR landings, captures the model from the first matching cycle.
|
|
761
|
+
"""
|
|
762
|
+
sid_lower = (story_id or "").lower()
|
|
763
|
+
matched = [cy for cy in cycles if (cy.get("story") or "").lower() == sid_lower]
|
|
764
|
+
r: Dict[str, Any] = {
|
|
765
|
+
"story_id": story_id,
|
|
766
|
+
"cycles": matched,
|
|
767
|
+
"count": len(matched),
|
|
768
|
+
"ok_count": 0, "fail_count": 0, "running_count": 0,
|
|
769
|
+
"span_start": None, "span_end": None,
|
|
770
|
+
"duration_s": 0, "cost": 0.0,
|
|
771
|
+
"input_tokens": 0, "output_tokens": 0,
|
|
772
|
+
"cache_creation_tokens": 0, "cache_read_tokens": 0,
|
|
773
|
+
"prs": [], "model": None,
|
|
774
|
+
}
|
|
775
|
+
for cy in matched:
|
|
776
|
+
outcome = cy.get("outcome") or ""
|
|
777
|
+
if outcome == "fail":
|
|
778
|
+
r["fail_count"] += 1
|
|
779
|
+
elif outcome == "running":
|
|
780
|
+
r["running_count"] += 1
|
|
781
|
+
else:
|
|
782
|
+
r["ok_count"] += 1
|
|
783
|
+
if cy.get("start"):
|
|
784
|
+
if r["span_start"] is None or cy["start"] < r["span_start"]:
|
|
785
|
+
r["span_start"] = cy["start"]
|
|
786
|
+
if cy.get("end"):
|
|
787
|
+
if r["span_end"] is None or cy["end"] > r["span_end"]:
|
|
788
|
+
r["span_end"] = cy["end"]
|
|
789
|
+
if cy.get("duration_s"):
|
|
790
|
+
r["duration_s"] += cy["duration_s"]
|
|
791
|
+
for tk in ("input_tokens", "output_tokens",
|
|
792
|
+
"cache_creation_tokens", "cache_read_tokens"):
|
|
793
|
+
if cy.get(tk):
|
|
794
|
+
r[tk] += cy[tk]
|
|
795
|
+
if cy.get("cost_list") is not None:
|
|
796
|
+
r["cost"] += cy["cost_list"]
|
|
797
|
+
elif cy.get("cron"):
|
|
798
|
+
r["cost"] += cy["cron"]["cost"]
|
|
799
|
+
if cy.get("pr_num"):
|
|
800
|
+
r["prs"].append({"num": cy["pr_num"],
|
|
801
|
+
"outcome": cy.get("pr_outcome") or "open"})
|
|
802
|
+
if cy.get("model") and not r["model"]:
|
|
803
|
+
r["model"] = cy["model"]
|
|
804
|
+
return r
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
# US-SKILL-014: aggregate the last N self-score notes for the dashboard.
|
|
808
|
+
# Reads .roll/notes/*.md (frontmatter format from US-SKILL-010), returns
|
|
809
|
+
# "self-score: mean 7.8 / min 4 / redo 2 (last 14)"
|
|
810
|
+
# or "" when no notes / "self-score: (n/a) — N sample(s), need 3 (last N)"
|
|
811
|
+
# when sample is too small.
|
|
812
|
+
def _self_score_summary_line(notes_dir = None, window: int = 14) -> str:
|
|
813
|
+
notes_dir = notes_dir if notes_dir is not None else Path(".roll/notes")
|
|
814
|
+
if not notes_dir.exists():
|
|
815
|
+
return ""
|
|
816
|
+
files = sorted(notes_dir.glob("*.md"))[-window:]
|
|
817
|
+
if not files:
|
|
818
|
+
return ""
|
|
819
|
+
total = 0
|
|
820
|
+
count = 0
|
|
821
|
+
minv = 11
|
|
822
|
+
redo = 0
|
|
823
|
+
for f in files:
|
|
824
|
+
score = None
|
|
825
|
+
verdict = None
|
|
826
|
+
for line in f.read_text(errors="ignore").splitlines():
|
|
827
|
+
if line.startswith("score: "):
|
|
828
|
+
try:
|
|
829
|
+
score = int(line.split(": ", 1)[1].strip())
|
|
830
|
+
except ValueError:
|
|
831
|
+
score = None
|
|
832
|
+
elif line.startswith("verdict: "):
|
|
833
|
+
verdict = line.split(": ", 1)[1].strip()
|
|
834
|
+
if score is not None and verdict is not None:
|
|
835
|
+
break
|
|
836
|
+
if score is None:
|
|
837
|
+
continue
|
|
838
|
+
count += 1
|
|
839
|
+
total += score
|
|
840
|
+
if score < minv:
|
|
841
|
+
minv = score
|
|
842
|
+
if verdict == "regression":
|
|
843
|
+
redo += 1
|
|
844
|
+
elif verdict == "ok" and score < 6:
|
|
845
|
+
redo += 1
|
|
846
|
+
if count < 3:
|
|
847
|
+
return f"self-score: (n/a) — {count} sample(s), need 3 (last {window})"
|
|
848
|
+
mean = total / count
|
|
849
|
+
return f"self-score: mean {mean:.1f} / min {minv} / redo {redo} (last {window})"
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
853
|
+
# US-EVAL-003: result-eval trend view (window aggregation).
|
|
854
|
+
#
|
|
855
|
+
# Distinct from self-score (above): self-score is the agent's *subjective*
|
|
856
|
+
# review of a skill run; result_eval is the *objective* per-cycle result score
|
|
857
|
+
# computed in US-EVAL-002 and stored in each runs.jsonl record under the
|
|
858
|
+
# `result_eval` block ({version, score 1..10, dims{dim: 0..1|"unknown"}}).
|
|
859
|
+
#
|
|
860
|
+
# We aggregate the most recent N records that carry a result_eval: the mean and
|
|
861
|
+
# minimum cycle score, a per-dimension hit-rate (share of records scoring 1.0 on
|
|
862
|
+
# that dimension, ignoring "unknown"), and a trend arrow comparing the newer
|
|
863
|
+
# half's mean against the older half's. Records without result_eval (older
|
|
864
|
+
# schema) are simply skipped — never an error (backward compat).
|
|
865
|
+
|
|
866
|
+
# Dimension order for the eval view. Imported from the rubric so the two stay
|
|
867
|
+
# in sync; falls back to a literal list if the scorer module isn't importable
|
|
868
|
+
# (e.g. a stripped-down test env).
|
|
869
|
+
def _eval_dim_names() -> List[str]:
|
|
870
|
+
try:
|
|
871
|
+
import importlib.util
|
|
872
|
+
path = Path(__file__).resolve().parent / "loop_result_eval.py"
|
|
873
|
+
spec = importlib.util.spec_from_file_location("loop_result_eval", path)
|
|
874
|
+
mod = importlib.util.module_from_spec(spec)
|
|
875
|
+
spec.loader.exec_module(mod) # type: ignore[union-attr]
|
|
876
|
+
return [name for name, _ in mod.DIMENSIONS]
|
|
877
|
+
except Exception:
|
|
878
|
+
return ["outcome", "correctness", "scope_fidelity",
|
|
879
|
+
"quality", "efficiency", "cleanliness"]
|
|
880
|
+
|
|
881
|
+
UNKNOWN = "unknown"
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
def _eval_records(records: List[Dict[str, Any]], window: int) -> List[Dict[str, Any]]:
|
|
885
|
+
"""The most recent `window` records (oldest→newest) that carry a usable
|
|
886
|
+
result_eval block. Records sorted by `ts`; absence of result_eval skips."""
|
|
887
|
+
rows: List[Dict[str, Any]] = []
|
|
888
|
+
ordered = sorted((r or {} for r in records), key=lambda r: r.get("ts", ""))
|
|
889
|
+
for r in ordered:
|
|
890
|
+
ev = r.get("result_eval")
|
|
891
|
+
if isinstance(ev, dict) and isinstance(ev.get("score"), (int, float)):
|
|
892
|
+
rows.append(ev)
|
|
893
|
+
return rows[-window:] if window > 0 else rows
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def aggregate_eval(records: List[Dict[str, Any]], window: int = 14) -> Dict[str, Any]:
|
|
897
|
+
"""Aggregate result_eval over the last `window` scored records.
|
|
898
|
+
|
|
899
|
+
Returns a dict: {n, mean, min, trend ('up'|'down'|'flat'|None), dims:
|
|
900
|
+
{dim: hit_rate float 0..1 | None}}. n is the count of scored records used.
|
|
901
|
+
"""
|
|
902
|
+
rows = _eval_records(records, window)
|
|
903
|
+
n = len(rows)
|
|
904
|
+
scores = [float(ev["score"]) for ev in rows]
|
|
905
|
+
out: Dict[str, Any] = {"n": n, "mean": None, "min": None,
|
|
906
|
+
"trend": None, "dims": {}}
|
|
907
|
+
if n == 0:
|
|
908
|
+
return out
|
|
909
|
+
out["mean"] = sum(scores) / n
|
|
910
|
+
out["min"] = min(scores)
|
|
911
|
+
# Per-dimension hit-rate: share of records that scored a perfect 1.0 on the
|
|
912
|
+
# dimension, counting only records where the dim is known (not "unknown").
|
|
913
|
+
for dim in _eval_dim_names():
|
|
914
|
+
known = 0
|
|
915
|
+
hits = 0
|
|
916
|
+
for ev in rows:
|
|
917
|
+
v = (ev.get("dims") or {}).get(dim, UNKNOWN)
|
|
918
|
+
if v == UNKNOWN or v is None:
|
|
919
|
+
continue
|
|
920
|
+
known += 1
|
|
921
|
+
if float(v) >= 1.0:
|
|
922
|
+
hits += 1
|
|
923
|
+
out["dims"][dim] = (hits / known) if known else None
|
|
924
|
+
# Trend: compare the newer half's mean against the older half's. Needs at
|
|
925
|
+
# least 2 records to split; <0.3 score delta is "flat".
|
|
926
|
+
if n >= 2:
|
|
927
|
+
half = n // 2
|
|
928
|
+
older = scores[:half] or scores[:1]
|
|
929
|
+
newer = scores[half:]
|
|
930
|
+
delta = (sum(newer) / len(newer)) - (sum(older) / len(older))
|
|
931
|
+
if delta > 0.3:
|
|
932
|
+
out["trend"] = "up"
|
|
933
|
+
elif delta < -0.3:
|
|
934
|
+
out["trend"] = "down"
|
|
935
|
+
else:
|
|
936
|
+
out["trend"] = "flat"
|
|
937
|
+
return out
|
|
938
|
+
|
|
939
|
+
|
|
940
|
+
_TREND_ARROW = {"up": "↑", "down": "↓", "flat": "→"}
|
|
941
|
+
|
|
942
|
+
|
|
943
|
+
# Single-line dashboard summary, mirroring `_self_score_summary_line`'s form.
|
|
944
|
+
# Returns "" when there are no scored records, "result-eval: (n/a) need 3 ..."
|
|
945
|
+
# when the sample is below 3 (same threshold/idiom as self-score), else
|
|
946
|
+
# "result-eval: mean 7.4↑ / min 5 / outcome 80% scope 60% ... (last N)".
|
|
947
|
+
def _result_eval_summary_line(records: List[Dict[str, Any]], window: int = 14) -> str:
|
|
948
|
+
agg = aggregate_eval(records or [], window)
|
|
949
|
+
n = agg["n"]
|
|
950
|
+
if n == 0:
|
|
951
|
+
return ""
|
|
952
|
+
if n < 3:
|
|
953
|
+
return f"result-eval: (n/a) — {n} sample(s), need 3 (last {window})"
|
|
954
|
+
arrow = _TREND_ARROW.get(agg["trend"] or "flat", "")
|
|
955
|
+
# Short per-dimension labels for a compact line.
|
|
956
|
+
short = {"outcome": "out", "correctness": "ci", "scope_fidelity": "scope",
|
|
957
|
+
"quality": "qual", "efficiency": "eff", "cleanliness": "clean"}
|
|
958
|
+
dim_bits = []
|
|
959
|
+
for dim, rate in agg["dims"].items():
|
|
960
|
+
if rate is None:
|
|
961
|
+
continue
|
|
962
|
+
dim_bits.append(f"{short.get(dim, dim)} {round(rate * 100)}%")
|
|
963
|
+
dims_str = (" / " + " ".join(dim_bits)) if dim_bits else ""
|
|
964
|
+
return (f"result-eval: mean {agg['mean']:.1f}{arrow} / min {int(agg['min'])}"
|
|
965
|
+
f"{dims_str} (last {window})")
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
# Multi-line view for `roll loop eval [N]`. Pretty-prints the aggregation as a
|
|
969
|
+
# small dashboard block (no color dependency beyond the shared renderer).
|
|
970
|
+
def format_eval_view(records: List[Dict[str, Any]], window: int = 14) -> str:
|
|
971
|
+
agg = aggregate_eval(records or [], window)
|
|
972
|
+
n = agg["n"]
|
|
973
|
+
lines: List[str] = []
|
|
974
|
+
lines.append(f"Loop result-eval — last {window} cycles")
|
|
975
|
+
lines.append(f"循环结果评分 — 最近 {window} 轮")
|
|
976
|
+
lines.append("")
|
|
977
|
+
if n == 0:
|
|
978
|
+
lines.append("no scored cycles yet (need result_eval in runs.jsonl)")
|
|
979
|
+
lines.append("尚无评分 cycle(runs.jsonl 需含 result_eval)")
|
|
980
|
+
return "\n".join(lines)
|
|
981
|
+
if n < 3:
|
|
982
|
+
lines.append(f"(n/a) — {n} sample(s), need 3")
|
|
983
|
+
lines.append(f"(n/a) — 样本 {n} 个,至少需要 3 个")
|
|
984
|
+
return "\n".join(lines)
|
|
985
|
+
arrow = _TREND_ARROW.get(agg["trend"] or "flat", "")
|
|
986
|
+
lines.append(f" mean {agg['mean']:.1f} / 10 {arrow}")
|
|
987
|
+
lines.append(f" min {int(agg['min'])} / 10")
|
|
988
|
+
lines.append(f" n {n}")
|
|
989
|
+
lines.append("")
|
|
990
|
+
lines.append(" dimension hit-rate / 各维度命中率")
|
|
991
|
+
for dim in _eval_dim_names():
|
|
992
|
+
rate = agg["dims"].get(dim)
|
|
993
|
+
if rate is None:
|
|
994
|
+
lines.append(f" {dim:<16} n/a")
|
|
995
|
+
else:
|
|
996
|
+
lines.append(f" {dim:<16} {round(rate * 100)}%")
|
|
997
|
+
return "\n".join(lines)
|
|
998
|
+
|
|
999
|
+
|
|
1000
|
+
# US-AGENT-010: per-agent hit-rate summary for the ROLLUP block.
|
|
1001
|
+
# Aggregates the last `window_cycles` runs.jsonl records grouped by `agent`.
|
|
1002
|
+
# Returns a single-line string like
|
|
1003
|
+
# "agents: pi 8/22 (36%) · deepseek 5/8 (63%) · claude 2/2 (n/a)"
|
|
1004
|
+
# Empty agents / missing agent field are skipped. Sample < min_sample renders
|
|
1005
|
+
# as "(n/a)" instead of a percentage to avoid noise from tiny windows.
|
|
1006
|
+
def _agent_summary_line(records: List[Dict[str, Any]], window_cycles: int = 50,
|
|
1007
|
+
min_sample: int = 5) -> str:
|
|
1008
|
+
if not records or window_cycles <= 0:
|
|
1009
|
+
return ""
|
|
1010
|
+
# Take the most recent `window_cycles` records that have an agent field.
|
|
1011
|
+
tail: List[Dict[str, Any]] = []
|
|
1012
|
+
for rec in records[-window_cycles:]:
|
|
1013
|
+
agent = (rec or {}).get("agent") or ""
|
|
1014
|
+
if not agent:
|
|
1015
|
+
continue
|
|
1016
|
+
tail.append(rec)
|
|
1017
|
+
if not tail:
|
|
1018
|
+
return ""
|
|
1019
|
+
counts: Dict[str, List[int]] = {}
|
|
1020
|
+
# preserve first-seen order for stable output
|
|
1021
|
+
order: List[str] = []
|
|
1022
|
+
for rec in tail:
|
|
1023
|
+
agent = rec.get("agent") or ""
|
|
1024
|
+
if not agent:
|
|
1025
|
+
continue
|
|
1026
|
+
if agent not in counts:
|
|
1027
|
+
counts[agent] = [0, 0]
|
|
1028
|
+
order.append(agent)
|
|
1029
|
+
counts[agent][1] += 1
|
|
1030
|
+
if rec.get("status") == "built":
|
|
1031
|
+
counts[agent][0] += 1
|
|
1032
|
+
if not order:
|
|
1033
|
+
return ""
|
|
1034
|
+
parts: List[str] = []
|
|
1035
|
+
for agent in order:
|
|
1036
|
+
built, total = counts[agent]
|
|
1037
|
+
if total < min_sample:
|
|
1038
|
+
parts.append(f"{agent} {built}/{total} (n/a)")
|
|
1039
|
+
else:
|
|
1040
|
+
pct = round(100 * built / total) if total else 0
|
|
1041
|
+
parts.append(f"{agent} {built}/{total} ({pct}%)")
|
|
1042
|
+
return "agents: " + " · ".join(parts)
|
|
1043
|
+
|
|
1044
|
+
|
|
1045
|
+
def rollup_for_day(day_cycles: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1046
|
+
# US-VIEW-012: track input + output separately so the daily summary can
|
|
1047
|
+
# show two metric rows. cache_read tokens deliberately excluded — they're
|
|
1048
|
+
# already captured in cy["cost_list"] via list-price math (compute_list_cost
|
|
1049
|
+
# reads all 4 fields), but they don't represent the model's actual work.
|
|
1050
|
+
# FIX-126: cost is tracked per-currency. deepseek bills in native CNY (¥),
|
|
1051
|
+
# claude in USD ($) — summing them into one number (and stamping it "$")
|
|
1052
|
+
# is meaningless. `cost` stays as a legacy scalar sum for back-compat with
|
|
1053
|
+
# callers that don't care about currency; `cost_by_cur` is the currency-
|
|
1054
|
+
# aware breakdown the dashboard ROLLUP renders (one row per currency).
|
|
1055
|
+
r = {"cycles": len(day_cycles), "prs": 0, "failed": 0,
|
|
1056
|
+
"duration_s": 0, "cost": 0.0, "cost_by_cur": {},
|
|
1057
|
+
"input_tokens": 0, "output_tokens": 0,
|
|
1058
|
+
"cache_creation_tokens": 0, "cache_read_tokens": 0}
|
|
1059
|
+
for cy in day_cycles:
|
|
1060
|
+
if cy.get("outcome") == "fail":
|
|
1061
|
+
r["failed"] += 1
|
|
1062
|
+
if cy.get("duration_s"):
|
|
1063
|
+
r["duration_s"] += cy["duration_s"]
|
|
1064
|
+
if cy.get("input_tokens"):
|
|
1065
|
+
r["input_tokens"] += cy["input_tokens"]
|
|
1066
|
+
if cy.get("output_tokens"):
|
|
1067
|
+
r["output_tokens"] += cy["output_tokens"]
|
|
1068
|
+
if cy.get("cache_creation_tokens"):
|
|
1069
|
+
r["cache_creation_tokens"] += cy["cache_creation_tokens"]
|
|
1070
|
+
if cy.get("cache_read_tokens"):
|
|
1071
|
+
r["cache_read_tokens"] += cy["cache_read_tokens"]
|
|
1072
|
+
# US-VIEW-011: rollup only counts cycles whose PR actually merged.
|
|
1073
|
+
# Backward compat: rows where pr_outcome is missing but pr URL exists
|
|
1074
|
+
# (no `pr` event after the writer upgrade ran for that cycle) are
|
|
1075
|
+
# treated conservatively as open — they shouldn't inflate merged count.
|
|
1076
|
+
if cy.get("pr_outcome") == "merged":
|
|
1077
|
+
r["prs"] += 1
|
|
1078
|
+
if cy.get("cost_list") is not None:
|
|
1079
|
+
r["cost"] += cy["cost_list"]
|
|
1080
|
+
cur = cy.get("cost_currency") or "USD"
|
|
1081
|
+
r["cost_by_cur"][cur] = r["cost_by_cur"].get(cur, 0.0) + cy["cost_list"]
|
|
1082
|
+
elif cy.get("cron"):
|
|
1083
|
+
# No claude session backfill available — fall back to whatever
|
|
1084
|
+
# cron.log carries (best-effort, only the latest cycle). cron.log
|
|
1085
|
+
# cost is claude's USD figure.
|
|
1086
|
+
r["cost"] += cy["cron"]["cost"]
|
|
1087
|
+
r["cost_by_cur"]["USD"] = r["cost_by_cur"].get("USD", 0.0) + cy["cron"]["cost"]
|
|
1088
|
+
return r
|
|
1089
|
+
|
|
1090
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
1091
|
+
# Render
|
|
1092
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
1093
|
+
def render(events, cron, state, backlog, *, days=3, lang="both", now=None,
|
|
1094
|
+
runs=None, git_merges=None, claude_slug=None):
|
|
1095
|
+
now = now or datetime.now(timezone.utc).astimezone()
|
|
1096
|
+
cycles = aggregate(events, cron)
|
|
1097
|
+
if runs:
|
|
1098
|
+
merge_runs_into_cycles(cycles, runs)
|
|
1099
|
+
if git_merges:
|
|
1100
|
+
repair_orphan_cycles_from_git(cycles, git_merges)
|
|
1101
|
+
# Path 1 (usage_event from the events stream) is authoritative and needs no
|
|
1102
|
+
# slug; path 2 (claude session-log salvage) self-guards on the worktree dir
|
|
1103
|
+
# existing, so it's a no-op when claude_slug is empty. Always run both — the
|
|
1104
|
+
# old `if claude_slug:` gate dropped real per-currency cost for any caller
|
|
1105
|
+
# that didn't pass a slug (FIX-126).
|
|
1106
|
+
backfill_usage_from_claude_sessions(cycles, claude_slug or "")
|
|
1107
|
+
by_day = bucket_by_day(cycles)
|
|
1108
|
+
days_keys = sorted(by_day.keys(), reverse=True)[:days]
|
|
1109
|
+
|
|
1110
|
+
def bilingual(en_line, zh_line):
|
|
1111
|
+
"""Emit EN row then ZH row, honoring --en / --zh."""
|
|
1112
|
+
if lang in ("both", "en"):
|
|
1113
|
+
print(en_line)
|
|
1114
|
+
if lang in ("both", "zh") and zh_line is not None:
|
|
1115
|
+
print(zh_line)
|
|
1116
|
+
|
|
1117
|
+
# ── Title row ───────────────────────────────────────────────────────────
|
|
1118
|
+
n_cycles = len(cycles)
|
|
1119
|
+
title_l = c("fg", "roll loop", bold=True) + c("muted", " · ") + c("dim", "health")
|
|
1120
|
+
title_r = c("dim", now.strftime("%Y-%m-%d %H:%M")) + c("muted", " · ") + c("muted", f"{n_cycles} cycles / {days*24}h")
|
|
1121
|
+
print(row(title_l, title_r))
|
|
1122
|
+
print()
|
|
1123
|
+
|
|
1124
|
+
# ── Status eyebrow ─────────────────────────────────────────────────────
|
|
1125
|
+
status_word = (state.get("status") or "idle").lower()
|
|
1126
|
+
if status_word == "running":
|
|
1127
|
+
item = state.get("current_item") or "—"
|
|
1128
|
+
eb_l = (c("purple", "⏵", bold=True) + " " +
|
|
1129
|
+
c("purple", "RUNNING", bold=True) + c("muted", " ") +
|
|
1130
|
+
c("dim", "story ") + c("blue", item, bold=True))
|
|
1131
|
+
eb_zh = (c("dim", " 正在运行 · 当前 ") + c("blue", item))
|
|
1132
|
+
elif status_word == "paused":
|
|
1133
|
+
eb_l = (c("amber", "⏸ PAUSED", bold=True) + c("muted", " ") +
|
|
1134
|
+
c("dim", "since ") + c("fg", state.get("paused_at", "—")) +
|
|
1135
|
+
c("muted", " · ") + c("dim", state.get("paused_reason", "")))
|
|
1136
|
+
eb_zh = c("dim", " 已暂停 · run: roll loop resume")
|
|
1137
|
+
else:
|
|
1138
|
+
# FIX-095: surface three-state install/enable status. Pre-FIX, every
|
|
1139
|
+
# case fell through to '● IDLE' which hid 'not installed' and
|
|
1140
|
+
# 'installed/off' from the user.
|
|
1141
|
+
install_state = _detect_install_state()
|
|
1142
|
+
if install_state == "not-installed":
|
|
1143
|
+
eb_l = (c("muted", "○ not installed", bold=True) + c("muted", " ") +
|
|
1144
|
+
c("dim", "run ") + c("fg", "roll loop on", bold=True) +
|
|
1145
|
+
c("dim", " to enable"))
|
|
1146
|
+
eb_zh = c("dim", " 未安装 · 运行 ") + c("fg", "roll loop on") + c("dim", " 启用")
|
|
1147
|
+
elif install_state in ("stale", "disabled"):
|
|
1148
|
+
# FIX-098: 'stale' = plist on disk but agent not registered in launchd.
|
|
1149
|
+
# 'disabled' kept for back-compat (old install_state values). Both mean
|
|
1150
|
+
# the user needs to run 'roll loop on' to bootstrap the agent.
|
|
1151
|
+
eb_l = (c("amber", "◌ STALE — plist present, not loaded", bold=True) + c("muted", " ") +
|
|
1152
|
+
c("dim", "run ") + c("fg", "roll loop on", bold=True) + c("dim", " to repair"))
|
|
1153
|
+
eb_zh = c("dim", " Plist 存在但未加载 · 运行 ") + c("fg", "roll loop on") + c("dim", " 修复")
|
|
1154
|
+
else:
|
|
1155
|
+
eb_l = (c("blue", "● IDLE", bold=True) + c("muted", " · ") +
|
|
1156
|
+
c("dim", "enabled · next run ") + c("fg", _next_cron_hint(state), bold=True))
|
|
1157
|
+
eb_zh = c("dim", f" 已启用 · 闲置 · 距下一轮 {_next_cron_hint(state, zh=True)}")
|
|
1158
|
+
|
|
1159
|
+
# 'last' = the most recent cycle the user can act on — skip cycles that
|
|
1160
|
+
# are still running (the running banner already announces those) and skip
|
|
1161
|
+
# idle cycles (they picked no story, so 'last · 23:48 —' carries no info).
|
|
1162
|
+
last = next(
|
|
1163
|
+
(cy for cy in cycles if cy.get("outcome") not in ("running", "idle")),
|
|
1164
|
+
None,
|
|
1165
|
+
) or (cycles[0] if cycles else None)
|
|
1166
|
+
if last:
|
|
1167
|
+
story = last.get("story") or "—"
|
|
1168
|
+
title = backlog.get(story, "") if story != "—" else ""
|
|
1169
|
+
glyph_c, glyph_ch = {
|
|
1170
|
+
"done": ("green", "✓"),
|
|
1171
|
+
"ok": ("green", "✓"),
|
|
1172
|
+
"idle": ("muted", "·"),
|
|
1173
|
+
"fail": ("red", "✗"),
|
|
1174
|
+
"running": ("purple", "⏵"),
|
|
1175
|
+
}.get(last["outcome"], ("muted", "·"))
|
|
1176
|
+
glyph = c(glyph_c, glyph_ch, bold=True)
|
|
1177
|
+
eb_r = (c("dim", "last ") + glyph + " " +
|
|
1178
|
+
c("fg", last["start"].astimezone().strftime("%H:%M")) + " " +
|
|
1179
|
+
c("blue", story, bold=True) + " " +
|
|
1180
|
+
c("fg", trunc(title, 32)))
|
|
1181
|
+
else:
|
|
1182
|
+
eb_r = c("muted", "no cycles yet")
|
|
1183
|
+
print(row(eb_l, eb_r))
|
|
1184
|
+
if lang != "en" and last:
|
|
1185
|
+
# ZH eyebrow row is left-aligned only — mirroring the EN right side
|
|
1186
|
+
# would duplicate signal without adding info.
|
|
1187
|
+
print(eb_zh)
|
|
1188
|
+
|
|
1189
|
+
# US-LOOP-036: daily service (dream/brief) next-fire lines, read straight
|
|
1190
|
+
# from the launchd plist so they reflect the latest `roll config <svc>-time`
|
|
1191
|
+
# reload rather than a stale yaml-derived guess.
|
|
1192
|
+
for _svc in ("dream", "brief"):
|
|
1193
|
+
_sl = _daily_schedule_line(_svc, now=now)
|
|
1194
|
+
if _sl:
|
|
1195
|
+
print(" " + c("dim", _sl))
|
|
1196
|
+
# FIX-151: dedicated loop (pr/ci/alert) last-tick age
|
|
1197
|
+
for _loop in ("pr", "ci", "alert"):
|
|
1198
|
+
_tl = _tick_age_line(_loop, now=now)
|
|
1199
|
+
if _tl:
|
|
1200
|
+
print(" " + c("dim", _tl))
|
|
1201
|
+
print()
|
|
1202
|
+
|
|
1203
|
+
print(c("faint", "─" * COLS))
|
|
1204
|
+
print()
|
|
1205
|
+
|
|
1206
|
+
# ── 3-day rollup ────────────────────────────────────────────────────────
|
|
1207
|
+
section_head("ROLLUP", "近 " + str(days) + " 天", "↑ today vs yesterday · 今日 vs 昨日")
|
|
1208
|
+
print()
|
|
1209
|
+
|
|
1210
|
+
# Bug C: today_key is derived from `now` (real today in local TZ), not
|
|
1211
|
+
# from sorted(by_day)[0]. If today has 0 cycles, the Today column shows 0
|
|
1212
|
+
# and yesterday's data stays under Yesterday — matching the day-band below.
|
|
1213
|
+
today_key = now.strftime("%Y-%m-%d")
|
|
1214
|
+
yest_key = (now - timedelta(days=1)).strftime("%Y-%m-%d")
|
|
1215
|
+
d2_key = (now - timedelta(days=2)).strftime("%Y-%m-%d")
|
|
1216
|
+
|
|
1217
|
+
today = rollup_for_day(by_day.get(today_key, []))
|
|
1218
|
+
yest = rollup_for_day(by_day.get(yest_key, []))
|
|
1219
|
+
d2 = rollup_for_day(by_day.get(d2_key, []))
|
|
1220
|
+
|
|
1221
|
+
# 'partial' = today is still in progress — today's cycle count is under
|
|
1222
|
+
# yesterday's, so a 'down −23' delta against yesterday's full-day count
|
|
1223
|
+
# would otherwise read as a regression. Mute delta colors when partial;
|
|
1224
|
+
# 'failed' stays loud because a fail is a real alert regardless.
|
|
1225
|
+
is_partial = today["cycles"] < yest["cycles"]
|
|
1226
|
+
|
|
1227
|
+
# column headers — 'trend' hint removed (we don't emit a trend column).
|
|
1228
|
+
# 'in progress' indicator stays on the day band + muted deltas, not the
|
|
1229
|
+
# column header (cramming '(in progress)' into 18 chars collides with
|
|
1230
|
+
# the Yesterday column).
|
|
1231
|
+
# Today column spans 22 cols = value(8) + gap(2) + delta(12), matching
|
|
1232
|
+
# the metric row geometry exactly so Yesterday and −2d line up under
|
|
1233
|
+
# their data — fixes the "yesterday/−2d squished" misalignment.
|
|
1234
|
+
hdr_en = (" " + c("muted", pad("", 14)) +
|
|
1235
|
+
c("fg", pad("Today", 22), bold=True) +
|
|
1236
|
+
c("dim", pad("Yesterday", 10)) +
|
|
1237
|
+
c("muted", pad("−2d", 8)))
|
|
1238
|
+
hdr_zh = (" " + c("muted", pad("", 14)) +
|
|
1239
|
+
c("dim", pad("今日", 22)) +
|
|
1240
|
+
c("muted", pad("昨日", 10)) +
|
|
1241
|
+
c("muted", pad("前天", 8)))
|
|
1242
|
+
bilingual(hdr_en, hdr_zh)
|
|
1243
|
+
|
|
1244
|
+
metric("cycles", today["cycles"], yest["cycles"], d2["cycles"], "up_good", partial=is_partial)
|
|
1245
|
+
metric("merged PRs", today["prs"], yest["prs"], d2["prs"], "up_good", partial=is_partial)
|
|
1246
|
+
# Failures stay loud — do NOT pass partial=True. A regression today is
|
|
1247
|
+
# a real alert even when comparing to a full yesterday.
|
|
1248
|
+
metric("failed", today["failed"], yest["failed"], d2["failed"], "up_bad",
|
|
1249
|
+
yest_color="amber" if yest["failed"] > 0 else "dim",
|
|
1250
|
+
yest_suffix="⚠" if yest["failed"] > 0 else "")
|
|
1251
|
+
metric_dur("duration", today["duration_s"], yest["duration_s"], d2["duration_s"], partial=is_partial)
|
|
1252
|
+
# US-VIEW-017: show all 4 token components so the cost is explainable.
|
|
1253
|
+
# cache_creation (↑) and cache_read (↓) typically account for 80-90% of
|
|
1254
|
+
# cost — hiding them makes the cost line incomprehensible.
|
|
1255
|
+
metric_tokens("input tokens", today["input_tokens"], yest["input_tokens"], d2["input_tokens"], partial=is_partial)
|
|
1256
|
+
metric_tokens("cache writes", today["cache_creation_tokens"], yest["cache_creation_tokens"], d2["cache_creation_tokens"], partial=is_partial)
|
|
1257
|
+
metric_tokens("cache reads", today["cache_read_tokens"], yest["cache_read_tokens"], d2["cache_read_tokens"], partial=is_partial)
|
|
1258
|
+
metric_tokens("output tokens", today["output_tokens"], yest["output_tokens"], d2["output_tokens"], partial=is_partial)
|
|
1259
|
+
# FIX-126: one cost row per currency (deepseek ¥, claude $) — never summed
|
|
1260
|
+
# across currencies. Show a currency only if it has spend in any of the 3
|
|
1261
|
+
# days; default to a single USD row when there's no cost at all.
|
|
1262
|
+
_cost_days = (today, yest, d2)
|
|
1263
|
+
_currencies = []
|
|
1264
|
+
for _cur in ["USD", "CNY"]:
|
|
1265
|
+
if any(r["cost_by_cur"].get(_cur) for r in _cost_days):
|
|
1266
|
+
_currencies.append(_cur)
|
|
1267
|
+
for r in _cost_days:
|
|
1268
|
+
for _cur in r["cost_by_cur"]:
|
|
1269
|
+
if _cur not in _currencies and r["cost_by_cur"][_cur]:
|
|
1270
|
+
_currencies.append(_cur)
|
|
1271
|
+
if not _currencies:
|
|
1272
|
+
_currencies = ["USD"]
|
|
1273
|
+
for _cur in _currencies:
|
|
1274
|
+
_sym = "¥" if _cur == "CNY" else "$"
|
|
1275
|
+
_label = "cost" if len(_currencies) == 1 else "cost " + _sym
|
|
1276
|
+
metric_dollar(_label,
|
|
1277
|
+
today["cost_by_cur"].get(_cur, 0.0),
|
|
1278
|
+
yest["cost_by_cur"].get(_cur, 0.0),
|
|
1279
|
+
d2["cost_by_cur"].get(_cur, 0.0),
|
|
1280
|
+
partial=is_partial, symbol=_sym)
|
|
1281
|
+
|
|
1282
|
+
# US-AGENT-010: per-agent hit-rate summary (single line).
|
|
1283
|
+
try:
|
|
1284
|
+
runs_records = list(runs.values()) if isinstance(runs, dict) else list(runs or [])
|
|
1285
|
+
runs_records.sort(key=lambda r: (r or {}).get("ts", ""))
|
|
1286
|
+
_agent_line = _agent_summary_line(runs_records, window_cycles=50)
|
|
1287
|
+
except Exception:
|
|
1288
|
+
_agent_line = ""
|
|
1289
|
+
if _agent_line:
|
|
1290
|
+
print(" " + c("dim", _agent_line))
|
|
1291
|
+
|
|
1292
|
+
# US-SKILL-014: per-skill self-score trend (single line) under the agent line.
|
|
1293
|
+
try:
|
|
1294
|
+
_skill_line = _self_score_summary_line()
|
|
1295
|
+
except Exception:
|
|
1296
|
+
_skill_line = ""
|
|
1297
|
+
if _skill_line:
|
|
1298
|
+
print(" " + c("dim", _skill_line))
|
|
1299
|
+
|
|
1300
|
+
# US-EVAL-003: per-cycle result-eval trend (single line), distinct from the
|
|
1301
|
+
# self-score line above (subjective skill review vs objective cycle result).
|
|
1302
|
+
try:
|
|
1303
|
+
_eval_records_in = list(runs.values()) if isinstance(runs, dict) else list(runs or [])
|
|
1304
|
+
_eval_line = _result_eval_summary_line(_eval_records_in)
|
|
1305
|
+
except Exception:
|
|
1306
|
+
_eval_line = ""
|
|
1307
|
+
if _eval_line:
|
|
1308
|
+
print(" " + c("dim", _eval_line))
|
|
1309
|
+
|
|
1310
|
+
print()
|
|
1311
|
+
print(c("faint", "─" * COLS))
|
|
1312
|
+
print()
|
|
1313
|
+
|
|
1314
|
+
# ── Recent cycles ───────────────────────────────────────────────────────
|
|
1315
|
+
section_head("RECENT", f"最近 {len(cycles)} 个 cycle",
|
|
1316
|
+
"t · time Δ · duration tok · tokens $ · cost id · backlog")
|
|
1317
|
+
print()
|
|
1318
|
+
|
|
1319
|
+
if not cycles:
|
|
1320
|
+
print(" " + c("dim", "no cycles yet — first run fires on next cron tick"))
|
|
1321
|
+
print(" " + c("dim", "尚无 cycle · 等待下一次 cron 触发"))
|
|
1322
|
+
return
|
|
1323
|
+
|
|
1324
|
+
for day_key in days_keys:
|
|
1325
|
+
day_cycles = by_day[day_key]
|
|
1326
|
+
if not day_cycles:
|
|
1327
|
+
continue
|
|
1328
|
+
day_band(day_key, len(day_cycles),
|
|
1329
|
+
sum(1 for c0 in day_cycles if c0["outcome"] == "fail"),
|
|
1330
|
+
now,
|
|
1331
|
+
in_progress=(day_key == today_key and is_partial))
|
|
1332
|
+
for cy in reversed(day_cycles):
|
|
1333
|
+
cycle_row(cy, backlog)
|
|
1334
|
+
print()
|
|
1335
|
+
|
|
1336
|
+
print(c("faint", "─" * COLS))
|
|
1337
|
+
print()
|
|
1338
|
+
print(" " +
|
|
1339
|
+
c("dim", "drill ") + c("blue", "roll loop show <cycle>") +
|
|
1340
|
+
c("muted", " ") +
|
|
1341
|
+
c("dim", "watch ") + c("blue", "roll loop --watch") +
|
|
1342
|
+
c("muted", " ") +
|
|
1343
|
+
c("dim", "more ") + c("blue", "roll loop status --days 7"))
|
|
1344
|
+
|
|
1345
|
+
# US-LOOP-032: period 1–1440; offset 0–59 (deprecated, kept for backward compat)
|
|
1346
|
+
def _schedule_valid(period: int, offset: int) -> bool:
|
|
1347
|
+
"""Validate schedule spec: period 1–1440, offset in [0, 60)."""
|
|
1348
|
+
return 1 <= period <= 1440 and 0 <= offset < 60
|
|
1349
|
+
|
|
1350
|
+
|
|
1351
|
+
def _read_schedule_spec(project_root: Optional[Path] = None) -> Tuple[int, int]:
|
|
1352
|
+
"""US-LOOP-013: read loop schedule spec, mirroring bin/roll's _loop_schedule_spec.
|
|
1353
|
+
|
|
1354
|
+
Returns (period_minutes, offset_minute).
|
|
1355
|
+
Priority: .roll/local.yaml → ~/.roll/config.yaml → default (60, hash-derived)
|
|
1356
|
+
"""
|
|
1357
|
+
project_root = (project_root or Path()).resolve()
|
|
1358
|
+
|
|
1359
|
+
# 1. Try project-level .roll/local.yaml
|
|
1360
|
+
local_file = project_root / ".roll" / "local.yaml"
|
|
1361
|
+
if local_file.exists():
|
|
1362
|
+
try:
|
|
1363
|
+
text = local_file.read_text(errors="ignore")
|
|
1364
|
+
# Parse loop_schedule block: loop_schedule:\n period_minutes: N\n offset_minute: N
|
|
1365
|
+
period_m = re.search(r'period_minutes:\s*(\d+)', text)
|
|
1366
|
+
offset_m = re.search(r'offset_minute:\s*(\d+)', text)
|
|
1367
|
+
if period_m and offset_m:
|
|
1368
|
+
period = int(period_m.group(1))
|
|
1369
|
+
offset = int(offset_m.group(1))
|
|
1370
|
+
if _schedule_valid(period, offset):
|
|
1371
|
+
return (period, offset)
|
|
1372
|
+
except Exception:
|
|
1373
|
+
pass
|
|
1374
|
+
|
|
1375
|
+
# 2. Try global ~/.roll/config.yaml loop_minute (backward compat)
|
|
1376
|
+
config_file = Path(os.path.expanduser("~/.roll/config.yaml"))
|
|
1377
|
+
if config_file.exists():
|
|
1378
|
+
try:
|
|
1379
|
+
text = config_file.read_text(errors="ignore")
|
|
1380
|
+
m = re.search(r'^loop_minute:\s*(\d+)', text, re.MULTILINE)
|
|
1381
|
+
if m:
|
|
1382
|
+
offset = int(m.group(1))
|
|
1383
|
+
return (60, offset)
|
|
1384
|
+
except Exception:
|
|
1385
|
+
pass
|
|
1386
|
+
|
|
1387
|
+
# 3. Default: derive offset from project path hash (matches bin/roll)
|
|
1388
|
+
h = int(hashlib.md5(str(project_root).encode()).hexdigest()[:2], 16) % 60
|
|
1389
|
+
return (60, h)
|
|
1390
|
+
|
|
1391
|
+
|
|
1392
|
+
def _read_daily_plist_schedule(svc: str) -> Optional[Dict[str, Any]]:
|
|
1393
|
+
"""US-LOOP-036: read the actual fire schedule of a daily service (dream/brief)
|
|
1394
|
+
from its launchd plist — the truth source after a `roll config <svc>-time`
|
|
1395
|
+
reload. Returns one of:
|
|
1396
|
+
|
|
1397
|
+
{"mode": "calendar", "hour": H, "minute": M} array-style StartCalendarInterval
|
|
1398
|
+
{"mode": "interval"} legacy StartInterval=86400
|
|
1399
|
+
|
|
1400
|
+
or None when the plist is missing/unparseable. Distinguishing the two modes
|
|
1401
|
+
lets the caller pick the right _compute_next_fire branch.
|
|
1402
|
+
"""
|
|
1403
|
+
import re as _re
|
|
1404
|
+
slug = project_slug()
|
|
1405
|
+
# Honor _LAUNCHD_DIR (the same sandbox override bin/roll exports) so tests —
|
|
1406
|
+
# and any non-default install dir — read the plist the writer just wrote.
|
|
1407
|
+
ladir = os.environ.get("_LAUNCHD_DIR") or os.path.expanduser("~/Library/LaunchAgents")
|
|
1408
|
+
plist = Path(ladir) / f"com.roll.{svc}.{slug}.plist"
|
|
1409
|
+
if not plist.exists():
|
|
1410
|
+
return None
|
|
1411
|
+
try:
|
|
1412
|
+
text = plist.read_text(errors="ignore")
|
|
1413
|
+
except Exception:
|
|
1414
|
+
return None
|
|
1415
|
+
if "StartCalendarInterval" in text:
|
|
1416
|
+
h = _re.search(r"<key>Hour</key>\s*<integer>(\d+)</integer>", text)
|
|
1417
|
+
m = _re.search(r"<key>Minute</key>\s*<integer>(\d+)</integer>", text)
|
|
1418
|
+
if h:
|
|
1419
|
+
return {"mode": "calendar", "hour": int(h.group(1)),
|
|
1420
|
+
"minute": int(m.group(1)) if m else 0}
|
|
1421
|
+
if "StartInterval" in text:
|
|
1422
|
+
return {"mode": "interval"}
|
|
1423
|
+
return None
|
|
1424
|
+
|
|
1425
|
+
|
|
1426
|
+
def _daily_schedule_line(svc: str, now: Optional[datetime] = None) -> Optional[str]:
|
|
1427
|
+
"""US-LOOP-036: one-line `<svc>: HH:MM (next fire in Xh Ym)` for the status
|
|
1428
|
+
dashboard. Calendar mode shows the configured wall-clock time and projects
|
|
1429
|
+
the next fire; interval (legacy) mode has no HH:MM anchor so it just labels
|
|
1430
|
+
the daily-interval mode. Returns None when the service has no plist.
|
|
1431
|
+
"""
|
|
1432
|
+
sched = _read_daily_plist_schedule(svc)
|
|
1433
|
+
if sched is None:
|
|
1434
|
+
return None
|
|
1435
|
+
base = now or datetime.now().astimezone()
|
|
1436
|
+
if sched["mode"] == "calendar":
|
|
1437
|
+
hh, mm = sched["hour"], sched["minute"]
|
|
1438
|
+
nxt = _compute_next_fire(hour=hh, minute=mm, now=base)
|
|
1439
|
+
line = f"{svc}: {hh:02d}:{mm:02d}"
|
|
1440
|
+
if nxt is not None:
|
|
1441
|
+
delta = int(nxt - base.timestamp())
|
|
1442
|
+
h, m = divmod(max(delta, 0) // 60, 60)
|
|
1443
|
+
line += f" (next fire in {h}h {m}m)"
|
|
1444
|
+
return line
|
|
1445
|
+
return f"{svc}: daily (legacy interval)"
|
|
1446
|
+
|
|
1447
|
+
|
|
1448
|
+
def _tick_age_line(loop_type: str, now: Optional[datetime] = None) -> Optional[str]:
|
|
1449
|
+
"""FIX-151: read the last tick for a dedicated loop (pr/ci/alert) and return
|
|
1450
|
+
a human-readable age line, or None if no tick file exists."""
|
|
1451
|
+
slug = project_slug()
|
|
1452
|
+
rt_dir = _loop_runtime_dir_py(slug)
|
|
1453
|
+
if rt_dir is not None:
|
|
1454
|
+
tick_file = rt_dir / f"{loop_type}-tick.jsonl"
|
|
1455
|
+
else:
|
|
1456
|
+
tick_file = shared_root() / "loop" / f"{loop_type}-tick-{slug}.jsonl"
|
|
1457
|
+
if not tick_file.exists():
|
|
1458
|
+
return None
|
|
1459
|
+
try:
|
|
1460
|
+
last_line = tick_file.read_text().strip().splitlines()[-1]
|
|
1461
|
+
except (IndexError, OSError):
|
|
1462
|
+
return None
|
|
1463
|
+
# Extract ts field from JSONL
|
|
1464
|
+
m = re.search(r'"ts":"([^"]+)"', last_line)
|
|
1465
|
+
if not m:
|
|
1466
|
+
return None
|
|
1467
|
+
ts_str = m.group(1)
|
|
1468
|
+
try:
|
|
1469
|
+
# Parse ISO 8601 UTC timestamp
|
|
1470
|
+
tick_dt = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
|
1471
|
+
except ValueError:
|
|
1472
|
+
return None
|
|
1473
|
+
base = now or datetime.now(timezone.utc)
|
|
1474
|
+
age_sec = int((base - tick_dt).total_seconds())
|
|
1475
|
+
if age_sec < 60:
|
|
1476
|
+
age_str = f"{age_sec}s"
|
|
1477
|
+
elif age_sec < 3600:
|
|
1478
|
+
age_str = f"{age_sec // 60}m"
|
|
1479
|
+
else:
|
|
1480
|
+
age_str = f"{age_sec // 3600}h"
|
|
1481
|
+
return f"{loop_type}: tick {age_str} ago"
|
|
1482
|
+
|
|
1483
|
+
|
|
1484
|
+
def _detect_install_state() -> str:
|
|
1485
|
+
"""FIX-095 / FIX-098: classify the launchd install state of the loop service.
|
|
1486
|
+
|
|
1487
|
+
Returns one of:
|
|
1488
|
+
'not-installed' — no plist for com.roll.loop.<slug> in ~/Library/LaunchAgents/
|
|
1489
|
+
'stale' — plist on disk but agent NOT registered in launchd
|
|
1490
|
+
(happens after roll loop off + roll update without roll loop on)
|
|
1491
|
+
'enabled' — plist on disk AND registered in launchd
|
|
1492
|
+
|
|
1493
|
+
FIX-098: switched from `launchctl print-disabled` (disabled-overrides DB) to
|
|
1494
|
+
`launchctl print gui/<uid>/<label>` which probes the actual launchd registry.
|
|
1495
|
+
The old approach returned false-positive 'enabled' when the disabled-overrides
|
|
1496
|
+
DB had no entry for the label (empty = not explicitly disabled, not loaded).
|
|
1497
|
+
"""
|
|
1498
|
+
slug = project_slug()
|
|
1499
|
+
label = f"com.roll.loop.{slug}"
|
|
1500
|
+
plist = Path(os.path.expanduser("~/Library/LaunchAgents")) / f"{label}.plist"
|
|
1501
|
+
if not plist.exists():
|
|
1502
|
+
return "not-installed"
|
|
1503
|
+
try:
|
|
1504
|
+
uid = os.getuid()
|
|
1505
|
+
result = subprocess.run(
|
|
1506
|
+
["launchctl", "print", f"gui/{uid}/{label}"],
|
|
1507
|
+
capture_output=True, timeout=2,
|
|
1508
|
+
)
|
|
1509
|
+
if result.returncode == 0:
|
|
1510
|
+
return "enabled"
|
|
1511
|
+
return "stale"
|
|
1512
|
+
except Exception:
|
|
1513
|
+
# launchctl missing or timed out — assume stale (safe: user sees STALE
|
|
1514
|
+
# banner and is told to run 'roll loop on' to repair).
|
|
1515
|
+
return "stale"
|
|
1516
|
+
|
|
1517
|
+
|
|
1518
|
+
def _compute_next_fire(
|
|
1519
|
+
*,
|
|
1520
|
+
hour: Optional[int] = None,
|
|
1521
|
+
minute: int = 0,
|
|
1522
|
+
last_fire: Optional[float] = None,
|
|
1523
|
+
now: Optional[datetime] = None,
|
|
1524
|
+
) -> Optional[float]:
|
|
1525
|
+
"""US-LOOP-036: compute the next fire epoch for a daily (dream/brief) service.
|
|
1526
|
+
|
|
1527
|
+
Two modes mirror the plist schedule_xml that _write_launchd_plist renders:
|
|
1528
|
+
|
|
1529
|
+
StartCalendarInterval mode (hour is not None):
|
|
1530
|
+
Fire at the next HH:MM wall-clock instant. If today's HH:MM has not yet
|
|
1531
|
+
passed (relative to `now`), it is today; otherwise tomorrow. Day/month/
|
|
1532
|
+
year roll-over (including leap-year Feb 29 → Mar 1) is handled by adding
|
|
1533
|
+
a timedelta to a normalized datetime, so we never construct an invalid
|
|
1534
|
+
date by hand.
|
|
1535
|
+
|
|
1536
|
+
StartInterval=86400 legacy mode (hour is None):
|
|
1537
|
+
Fire at `last_fire` + 24h. Returns None when last_fire is unknown — the
|
|
1538
|
+
caller then has no anchor to project from.
|
|
1539
|
+
|
|
1540
|
+
Returns a POSIX epoch (float seconds) or None. `now` defaults to the current
|
|
1541
|
+
local time; tests pass a fixed `now` for determinism.
|
|
1542
|
+
"""
|
|
1543
|
+
if hour is None:
|
|
1544
|
+
# Legacy StartInterval=86400 — project from the last fire.
|
|
1545
|
+
if last_fire is None:
|
|
1546
|
+
return None
|
|
1547
|
+
return float(last_fire) + 86400.0
|
|
1548
|
+
|
|
1549
|
+
base = (now or datetime.now().astimezone())
|
|
1550
|
+
# Anchor to today's HH:MM. timedelta arithmetic handles all calendar
|
|
1551
|
+
# roll-over (month/year boundaries, leap days) without manual date math.
|
|
1552
|
+
candidate = base.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
|
1553
|
+
if candidate <= base:
|
|
1554
|
+
candidate = candidate + timedelta(days=1)
|
|
1555
|
+
return candidate.timestamp()
|
|
1556
|
+
|
|
1557
|
+
|
|
1558
|
+
def _next_cron_hint(state: Dict[str, str], zh: bool = False) -> str:
|
|
1559
|
+
"""US-LOOP-013: compute next cron fire time from schedule spec.
|
|
1560
|
+
|
|
1561
|
+
Handles multi-trigger schedules (period < 60) by scanning forward
|
|
1562
|
+
from the current hour's offset minute.
|
|
1563
|
+
"""
|
|
1564
|
+
now = datetime.now().astimezone()
|
|
1565
|
+
period, offset = _read_schedule_spec()
|
|
1566
|
+
|
|
1567
|
+
# Start at offset minute within the current hour, then advance
|
|
1568
|
+
# by 'period' minutes until we find a slot after 'now'.
|
|
1569
|
+
nxt = now.replace(minute=offset, second=0, microsecond=0)
|
|
1570
|
+
while nxt <= now:
|
|
1571
|
+
nxt += timedelta(minutes=period)
|
|
1572
|
+
|
|
1573
|
+
delta = nxt - now
|
|
1574
|
+
mins = int(delta.total_seconds() // 60)
|
|
1575
|
+
secs = int(delta.total_seconds() % 60)
|
|
1576
|
+
if zh:
|
|
1577
|
+
return f"{mins} 分 {secs:02d} 秒"
|
|
1578
|
+
return nxt.strftime("%H:%M") + f" · in {mins}m {secs:02d}s"
|
|
1579
|
+
|
|
1580
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
1581
|
+
# Fixture data (test-only; opt in via ROLL_RENDER_FIXTURE=1)
|
|
1582
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
1583
|
+
def _fixture_data():
|
|
1584
|
+
now = datetime.now(timezone.utc)
|
|
1585
|
+
events, cron = [], []
|
|
1586
|
+
cycle_id = 0
|
|
1587
|
+
for d in (2, 1, 0):
|
|
1588
|
+
day = now - timedelta(days=d)
|
|
1589
|
+
n_cycles = [3, 4, 5][2 - d]
|
|
1590
|
+
for i in range(n_cycles):
|
|
1591
|
+
hour = 0 + i * 5
|
|
1592
|
+
start = day.replace(hour=hour, minute=48, second=0, microsecond=0)
|
|
1593
|
+
end = start + timedelta(seconds=540 + i * 120)
|
|
1594
|
+
label = start.strftime("%Y%m%d-%H%M%S-30585")
|
|
1595
|
+
story = ["FIX-048", "US-112", "FIX-047", "REFACT-9", "FIX-040"][i % 5]
|
|
1596
|
+
outcome = "fail" if (d == 1 and i == 2) else "done"
|
|
1597
|
+
events.extend([
|
|
1598
|
+
{"ts": start.isoformat().replace("+00:00", "Z"), "stage": "cycle_start",
|
|
1599
|
+
"label": label, "detail": "", "outcome": "", "_ts": start},
|
|
1600
|
+
{"ts": start.isoformat().replace("+00:00", "Z"), "stage": "pick_todo",
|
|
1601
|
+
"label": label, "detail": f"{story} picked", "outcome": "ok",
|
|
1602
|
+
"_ts": start + timedelta(seconds=2)},
|
|
1603
|
+
{"ts": end.isoformat().replace("+00:00", "Z"), "stage": "cycle_end",
|
|
1604
|
+
"label": label, "detail": "", "outcome": outcome, "_ts": end},
|
|
1605
|
+
])
|
|
1606
|
+
if outcome == "done":
|
|
1607
|
+
events.append({"ts": end.isoformat().replace("+00:00", "Z"),
|
|
1608
|
+
"stage": "pr", "label": label,
|
|
1609
|
+
"detail": f"https://github.com/x/y/pull/{50 + cycle_id}",
|
|
1610
|
+
"outcome": "ok", "_ts": end - timedelta(seconds=1)})
|
|
1611
|
+
local = end.astimezone()
|
|
1612
|
+
cron.append({"hhmm": local.strftime("%H:%M"), "ss": local.second,
|
|
1613
|
+
"outcome": outcome, "tcr": 1 if outcome == "done" else 0,
|
|
1614
|
+
"duration_s": int((end - start).total_seconds()),
|
|
1615
|
+
"cost": 3.20 + i * 0.32})
|
|
1616
|
+
cycle_id += 1
|
|
1617
|
+
state = {"status": "idle", "last_run_outcome": "success"}
|
|
1618
|
+
backlog = {
|
|
1619
|
+
"FIX-048": "Dedupe Todo across cycles",
|
|
1620
|
+
"US-112": "Loop run summary report",
|
|
1621
|
+
"FIX-047": "Cycle log rotation by day",
|
|
1622
|
+
"REFACT-9": "Extract stage runner module",
|
|
1623
|
+
"FIX-040": "8/12 tests failed → bail",
|
|
1624
|
+
}
|
|
1625
|
+
return events, cron, state, backlog
|
|
1626
|
+
|
|
1627
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
1628
|
+
# CLI
|
|
1629
|
+
# ════════════════════════════════════════════════════════════════════════════
|
|
1630
|
+
def main(argv=None):
|
|
1631
|
+
p = argparse.ArgumentParser(description="roll loop status — health dashboard")
|
|
1632
|
+
p.add_argument("--days", type=int, default=3, help="window in days (default 3)")
|
|
1633
|
+
p.add_argument("--no-color", action="store_true", help="strip ANSI (also honors NO_COLOR=1)")
|
|
1634
|
+
p.add_argument("--en", action="store_true", help="EN rows only")
|
|
1635
|
+
p.add_argument("--zh", action="store_true", help="ZH rows only")
|
|
1636
|
+
p.add_argument("--eval", nargs="?", type=int, const=14, default=None,
|
|
1637
|
+
metavar="N",
|
|
1638
|
+
help="result-eval trend view over the last N scored cycles "
|
|
1639
|
+
"(default 14); prints mean/min/per-dim hit-rate and exits")
|
|
1640
|
+
args = p.parse_args(argv)
|
|
1641
|
+
|
|
1642
|
+
# US-EVAL-003: `roll loop eval [N]` — result-eval trend view, then exit.
|
|
1643
|
+
if args.eval is not None:
|
|
1644
|
+
window = args.eval if args.eval and args.eval > 0 else 14
|
|
1645
|
+
if os.environ.get("ROLL_RENDER_FIXTURE"):
|
|
1646
|
+
records: List[Dict[str, Any]] = []
|
|
1647
|
+
else:
|
|
1648
|
+
_slug = project_slug()
|
|
1649
|
+
_runs = load_runs(_slug)
|
|
1650
|
+
records = list(_runs.values())
|
|
1651
|
+
print(format_eval_view(records, window=window))
|
|
1652
|
+
return
|
|
1653
|
+
|
|
1654
|
+
roll_render.USE_COLOR = (not args.no_color
|
|
1655
|
+
and not os.environ.get("NO_COLOR")
|
|
1656
|
+
and (sys.stdout.isatty() or os.environ.get("FORCE_COLOR")))
|
|
1657
|
+
|
|
1658
|
+
lang = "en" if args.en else ("zh" if args.zh else "both")
|
|
1659
|
+
|
|
1660
|
+
use_fixture = bool(os.environ.get("ROLL_RENDER_FIXTURE"))
|
|
1661
|
+
if use_fixture:
|
|
1662
|
+
events, cron, state, backlog = _fixture_data()
|
|
1663
|
+
runs = {}
|
|
1664
|
+
git_merges = {}
|
|
1665
|
+
slug = None
|
|
1666
|
+
else:
|
|
1667
|
+
slug = project_slug()
|
|
1668
|
+
events = load_events(slug, args.days)
|
|
1669
|
+
cron = load_cron_log(slug)
|
|
1670
|
+
state = load_state(slug)
|
|
1671
|
+
backlog = load_backlog()
|
|
1672
|
+
runs = load_runs(slug)
|
|
1673
|
+
git_merges = load_pr_merges_from_git(args.days)
|
|
1674
|
+
|
|
1675
|
+
render(events, cron, state, backlog, days=args.days, lang=lang,
|
|
1676
|
+
runs=runs, git_merges=git_merges,
|
|
1677
|
+
claude_slug=slug)
|
|
1678
|
+
|
|
1679
|
+
if __name__ == "__main__":
|
|
1680
|
+
try:
|
|
1681
|
+
main()
|
|
1682
|
+
except BrokenPipeError:
|
|
1683
|
+
pass # piped to `less` etc.
|