@seanyao/roll 2026.601.2 → 2026.601.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## v2026.601.3
4
+
5
+ ### 可见性
6
+
7
+ - **kimi cycle 现在也能看到 token 和成本(FIX-154)** — 以前 dashboard 对 kimi 那一行全是 `—/—`,看不到主力 agent 花了多少钱;现在 cycle 跑完读 kimi-code 的 `wire.jsonl`,把 token 数和成本写进事件流,RECENT 视图和成本总闸都看得见 `[loop]`
8
+
9
+ ### 稳定性
10
+
11
+ - **loop 把活派给 AI 后现在真会动手,不再空转零产出(FIX-152)** — kimi 等对话式 agent 拿到 SKILL.md 会把它当成"贴过来的文档"反问"What would you like me to do?",8 秒空返没交付;技能正文前置一条 agent 无关的自主执行指令,kimi/claude/pi/codex/agy 现在都会直接动手 `[loop]`
12
+ - **agy 在 loop / cron 自动化里不再卡 tty 等待(FIX-153)** — antigravity(agy)默认要 tty 批准操作,自动化场景拿不到 tty 就一直挂着等;现在 headless 模式自动加 `-p` 和跳过权限标记,跑得到结果 `[loop]`
13
+ - **测试不再在桌面弹空报错终端(FIX-155)** — bats 测试跑完临时沙箱删了,但 peer auto-attach 弹的 Terminal 窗口指向那个已不存在的路径,桌面堆一堆空报错的死窗口;给 peer 弹窗补上和 loop 弹窗一样的测试守卫,测试上下文不再弹 `[loop]`
14
+
3
15
  ## v2026.601.2
4
16
 
5
17
  ### 新功能
package/bin/roll CHANGED
@@ -4,7 +4,7 @@ set -euo pipefail
4
4
  # Roll — AI Agent Convention Manager
5
5
  # Single source of truth for how all AI coding agents behave.
6
6
 
7
- VERSION="2026.601.2"
7
+ VERSION="2026.601.3"
8
8
  ROLL_HOME="${ROLL_HOME:-${HOME}/.roll}"
9
9
  ROLL_CONFIG="${ROLL_HOME}/config.yaml"
10
10
  ROLL_GLOBAL="${ROLL_HOME}/conventions/global"
@@ -3841,6 +3841,8 @@ _peer_route() {
3841
3841
  _peer_auto_attach() {
3842
3842
  local session="$1"
3843
3843
  [ "$(uname)" = "Darwin" ] || return 0
3844
+ [ -n "${BATS_TEST_NUMBER:-}" ] && return 0
3845
+ [ -n "${ROLL_LOOP_NO_POPUP:-}" ] && return 0
3844
3846
  [ -f "$_LOOP_MUTE_FILE" ] && return 0
3845
3847
  local attach_cmd="${_SHARED_ROOT}/loop/attach-${session}.command"
3846
3848
  # Drop `exec` so the wrapping shell survives `tmux attach` exiting; pause
@@ -4411,10 +4413,12 @@ _agent_argv() {
4411
4413
  # late 2025. agy reuses ~/.gemini/ for config and reads GEMINI.md
4412
4414
  # natively, so the convention sync target is unchanged — only the
4413
4415
  # invoked binary changes.
4416
+ # FIX-153: non-interactive modes must use -p (headless) +
4417
+ # --dangerously-skip-permissions so the agent does not hang waiting
4418
+ # for a tty approval that never comes in loop/cron contexts.
4414
4419
  case "$mode" in
4415
4420
  interactive) _AGENT_ARGV=(agy -i "$prompt") ;;
4416
- text|peer) _AGENT_ARGV=(agy "$prompt") ;;
4417
- *) _AGENT_ARGV=(agy "$prompt") ;;
4421
+ *) _AGENT_ARGV=(agy -p --dangerously-skip-permissions "$prompt") ;;
4418
4422
  esac ;;
4419
4423
  qwen)
4420
4424
  # qwen has the same argv shape in both modes (positional prompt).
@@ -8162,7 +8166,11 @@ _write_loop_runner_script() {
8162
8166
  # US-LOOP-026: post-cycle single-shot usage writer for non-claude agents.
8163
8167
  # pi -p text mode prints no usage, so we recover it from pi's session jsonl
8164
8168
  # exactly once per cycle (loop-fmt passthrough is display-only).
8169
+ # FIX-154: kimi-code's `-p` mode also writes nothing to stdout but persists
8170
+ # usage to wire.jsonl; kimi_emit covers that path. bin/roll dispatches by
8171
+ # agent (pi/deepseek → pi_emit, kimi → kimi_emit).
8165
8172
  local pi_emit_script="${ROLL_PKG_DIR}/lib/agent_usage/pi_emit.py"
8173
+ local kimi_emit_script="${ROLL_PKG_DIR}/lib/agent_usage/kimi_emit.py"
8166
8174
  local roll_bin="${ROLL_PKG_DIR}/bin/roll"
8167
8175
  # US-EVAL-002: pure-function rubric scorer (US-EVAL-001). Baked in at
8168
8176
  # generation time so the inner runner can compute result_eval at cycle finish.
@@ -8831,23 +8839,37 @@ else
8831
8839
  _phase_end agent_invoke ok
8832
8840
  fi
8833
8841
 
8834
- # US-LOOP-026: non-claude agents (pi/deepseek/kimi) print no usage in -p text
8835
- # mode. Recover token+cost once per cycle from the agent's session jsonl and
8836
- # append a single authoritative usage event. Done here (not in loop-fmt's
8837
- # per-attempt passthrough) so retries can't write N duplicate events that the
8838
- # dashboard's same-label SUM would inflate. Runs before the timeout-abort exit
8839
- # so partial cycles still get whatever usage the session recorded. The events
8840
- # path is resolved exactly like _loop_event (rt_dir first, shared fallback) so
8841
- # pi_emit appends to the same file the reader consumes.
8842
- if [ "\$(_project_agent)" != "claude" ] && [ -f "${pi_emit_script}" ]; then
8842
+ # US-LOOP-026 + FIX-154: non-claude agents (pi/deepseek/kimi) print no usage
8843
+ # in -p text mode. Recover token+cost once per cycle from the agent's session
8844
+ # jsonl and append a single authoritative usage event. Done here (not in
8845
+ # loop-fmt's per-attempt passthrough) so retries can't write N duplicate
8846
+ # events that the dashboard's same-label SUM would inflate. Runs before the
8847
+ # timeout-abort exit so partial cycles still get whatever usage the session
8848
+ # recorded. The events path is resolved exactly like _loop_event (rt_dir
8849
+ # first, shared fallback) so the emitter appends to the same file the reader
8850
+ # consumes. Dispatch by agent so each emitter reads the right session format
8851
+ # (pi.usage_from_session vs kimi.usage_from_session).
8852
+ if [ "\$(_project_agent)" != "claude" ]; then
8843
8853
  _pi_rt=\$(_loop_runtime_dir "${slug}" 2>/dev/null || echo "")
8844
8854
  if [ -n "\$_pi_rt" ]; then
8845
8855
  _pi_evfile="\${_pi_rt}/events.ndjson"
8846
8856
  else
8847
8857
  _pi_evfile="\${_SHARED_ROOT:-\$HOME/.shared/roll}/loop/events-${slug}.ndjson"
8848
8858
  fi
8849
- python3 "${pi_emit_script}" --cwd "\$WT" --cycle "\${CYCLE_ID}" \\
8850
- --slug "${slug}" --events "\$_pi_evfile" 2>/dev/null || true
8859
+ case "\$(_project_agent)" in
8860
+ kimi)
8861
+ if [ -f "${kimi_emit_script}" ]; then
8862
+ python3 "${kimi_emit_script}" --cwd "\$WT" --cycle "\${CYCLE_ID}" \\
8863
+ --slug "${slug}" --events "\$_pi_evfile" 2>/dev/null || true
8864
+ fi
8865
+ ;;
8866
+ *)
8867
+ if [ -f "${pi_emit_script}" ]; then
8868
+ python3 "${pi_emit_script}" --cwd "\$WT" --cycle "\${CYCLE_ID}" \\
8869
+ --slug "${slug}" --events "\$_pi_evfile" 2>/dev/null || true
8870
+ fi
8871
+ ;;
8872
+ esac
8851
8873
  fi
8852
8874
 
8853
8875
  # FIX-057: timed out — skip publish; EXIT trap writes cycle_end blocked + ALERT.
@@ -9423,7 +9445,15 @@ _agent_skill_cmd() {
9423
9445
  for ((i = 1; i < prompt_idx; i++)); do
9424
9446
  out+=" ${_AGENT_ARGV[i]}"
9425
9447
  done
9426
- echo "${out} \"\$(${strip})\""
9448
+ # FIX-152: prepend an explicit autonomous-execution directive ahead of the bare
9449
+ # SKILL.md body. Without it, conversational `-p` agents (notably kimi-code) read
9450
+ # the skill doc as pasted context and reply "what would you like me to do?",
9451
+ # returning in seconds with zero output → the cycle ends idle, no delivery.
9452
+ # pi/deepseek/claude tolerate the bare doc, but the directive is agent-agnostic
9453
+ # and hardens every autonomous cron skill (loop/dream/brief share this chokepoint).
9454
+ # Must stay free of " $ ` \ so it survives the later `eval` of the cycle command.
9455
+ local _autorun='[roll 自主模式] 你正在无人值守的自动化循环中运行,这不是对话。请立即、完整地执行下面这份技能文档描述的工作流,直到完成交付或写出 ALERT 为止;严禁反问、严禁等待确认、严禁只复述或总结而不动手。技能文档如下: '
9456
+ echo "${out} \"${_autorun}\$(${strip})\""
9427
9457
  }
9428
9458
 
9429
9459
  # FIX-134: build the full per-cycle agent command at RUNTIME, routing-aware.
@@ -1,29 +1,33 @@
1
1
  """
2
2
  kimi (Moonshot Kimi CLI) agent usage extractor.
3
3
 
4
- Like openai and gemini (and unlike pi, which persists usage to session
5
- files), the Kimi CLI prints a token-usage summary to stdout at the end of a
6
- session. So this plugin implements the standard ``extract()`` registry
7
- contract: scrape the passthrough stdout lines for the usage / model lines.
4
+ Two paths are supported, mirroring pi.py:
8
5
 
9
- Recognised lines (case-insensitive, robust to thousands separators)::
6
+ 1. ``extract()`` — the registry stdout-scrape contract, kept for legacy
7
+ callers (and as a fallback when session files are absent).
8
+ 2. ``usage_from_session()`` — authoritative recovery from kimi-code's
9
+ persisted session files at ``~/.kimi-code/sessions/wd_*/session_*/agents/main/wire.jsonl``.
10
+ Each wire file is NDJSON with one or more ``{"type":"usage.record","model":...,"usage":{...}}``
11
+ lines whose token fields are summed per cycle.
10
12
 
11
- Model: kimi-k2
12
- Tokens: input=15300 output=3120
13
+ FIX-154 added the session path so loop cycles run by kimi-code (the
14
+ default agent today) no longer show ``—/—`` for tokens and cost in the
15
+ RECENT dashboard.
13
16
 
14
- The Kimi CLI's "usage" / session-summary block is also accepted::
17
+ The stdout-scrape contract still recognises (case-insensitive)::
15
18
 
19
+ Model: kimi-k2
20
+ Tokens: input=15300 output=3120
16
21
  Input tokens: 15,300
17
22
  Output tokens: 3,120
18
23
  Total tokens: 18,420
19
- model: kimi-k2
20
24
 
21
25
  When an explicit USD cost line isn't present, cost is computed from
22
- ``lib/model_prices.py`` (list price) so the dashboard never shows ``—``
23
- for a recognised kimi cycle. Returns None if no usage line is found,
24
- so the caller falls back to the null payload (US-LOOP-010 compatible).
26
+ ``lib/model_prices.py`` (list price).
25
27
  """
26
28
 
29
+ import glob
30
+ import json
27
31
  import os
28
32
  import re
29
33
  import sys
@@ -125,3 +129,150 @@ def extract(stdin_lines: list[str]) -> Optional[dict]:
125
129
  "cost_list_usd": cost,
126
130
  "duration_ms": None,
127
131
  }
132
+
133
+
134
+ # ── Session-file extraction (authoritative, FIX-154) ───────────────────────
135
+
136
+ # kimi-code persists every CLI session under
137
+ # ``~/.kimi-code/sessions/wd_<cwd-basename>_<8-hex>/session_<uuid>/agents/main/wire.jsonl``
138
+ # where ``<cwd-basename>`` is the basename of the cycle's worktree
139
+ # (e.g. ``roll-ecf079-cycle-20260601-170905-54957``).
140
+ # Each wire file is NDJSON; one or more lines have::
141
+ #
142
+ # {"type": "usage.record", "model": "kimi-code/kimi-for-coding",
143
+ # "usage": {"inputOther": <int>, "output": <int>,
144
+ # "inputCacheRead": <int>, "inputCacheCreation": <int>},
145
+ # "usageScope": "turn", "time": <ms>}
146
+ #
147
+ # We sum across all matching wire files (retries reuse the same worktree).
148
+
149
+
150
+ def _kimi_sessions_base_dir(base_dir: Optional[str]) -> str:
151
+ """Resolve kimi-code's sessions root: arg → env → default."""
152
+ return (
153
+ base_dir
154
+ or os.environ.get("ROLL_KIMI_SESSIONS_DIR")
155
+ or os.path.expanduser("~/.kimi-code/sessions")
156
+ )
157
+
158
+
159
+ def _sum_wire_file(path: str) -> Optional[dict]:
160
+ """Sum ``usage.record`` lines in a single kimi wire.jsonl.
161
+
162
+ Returns a usage dict or None when no usage records are found.
163
+ Field mapping kimi → roll::
164
+
165
+ inputOther → input_tokens
166
+ output → output_tokens
167
+ inputCacheRead → cache_read_tokens
168
+ inputCacheCreation → cache_creation_tokens
169
+ """
170
+ tin = tout = tcr = tcw = 0
171
+ model = None
172
+ seen = False
173
+ try:
174
+ with open(path) as f:
175
+ for line in f:
176
+ line = line.strip()
177
+ if not line:
178
+ continue
179
+ try:
180
+ o = json.loads(line)
181
+ except json.JSONDecodeError:
182
+ continue
183
+ if o.get("type") != "usage.record":
184
+ continue
185
+ u = o.get("usage") or {}
186
+ seen = True
187
+ if o.get("model"):
188
+ model = o["model"]
189
+ tin += int(u.get("inputOther") or 0)
190
+ tout += int(u.get("output") or 0)
191
+ tcr += int(u.get("inputCacheRead") or 0)
192
+ tcw += int(u.get("inputCacheCreation") or 0)
193
+ except OSError:
194
+ return None
195
+ if not seen:
196
+ return None
197
+ return {
198
+ "model": model or _DEFAULT_MODEL,
199
+ "input_tokens": tin,
200
+ "output_tokens": tout,
201
+ "cache_creation_tokens": tcw,
202
+ "cache_read_tokens": tcr,
203
+ "duration_ms": None,
204
+ }
205
+
206
+
207
+ def usage_from_session(
208
+ cwd: Optional[str] = None,
209
+ cycle_id: Optional[str] = None,
210
+ slug: Optional[str] = None,
211
+ base_dir: Optional[str] = None,
212
+ ) -> Optional[dict]:
213
+ """Recover a kimi cycle's usage by reading its persisted wire file(s).
214
+
215
+ Matching: scan ``<base>/wd_*/session_*/agents/main/wire.jsonl`` and
216
+ select files whose ``wd_*`` directory name contains the worktree
217
+ basename (authoritative when ``cwd`` is given) or the ``cycle_id``
218
+ substring (fallback).
219
+
220
+ Retries can produce multiple wire files for the same cycle; their
221
+ usage is SUMMED so token totals reflect retry work too.
222
+
223
+ Returns the merged usage dict (tokens + model), or None when nothing
224
+ matches / zero tokens — caller writes nothing in that case, preserving
225
+ "n/a, not fake zero".
226
+ """
227
+ base = _kimi_sessions_base_dir(base_dir)
228
+ files = sorted(glob.glob(
229
+ os.path.join(base, "wd_*", "session_*", "agents", "main", "wire.jsonl")
230
+ ))
231
+ if not files:
232
+ return None
233
+
234
+ cwd_basename = os.path.basename(cwd.rstrip("/")) if cwd else None
235
+ matched = []
236
+ for path in files:
237
+ # Session dir name: wd_<cwd-basename>_<8-hex>
238
+ # Path: <base>/wd_<cwd-basename>_<hash>/session_<uuid>/agents/main/wire.jsonl
239
+ wd_seg = path[len(base):].lstrip(os.sep).split(os.sep, 1)[0]
240
+ if cwd_basename and ("wd_%s_" % cwd_basename) in (wd_seg + "_"):
241
+ matched.append(path)
242
+ continue
243
+ if cycle_id and ("cycle-%s" % cycle_id) in wd_seg:
244
+ matched.append(path)
245
+
246
+ if not matched:
247
+ return None
248
+
249
+ agg = {
250
+ "model": None,
251
+ "input_tokens": 0,
252
+ "output_tokens": 0,
253
+ "cache_creation_tokens": 0,
254
+ "cache_read_tokens": 0,
255
+ "duration_ms": None,
256
+ }
257
+ got = False
258
+ for path in matched:
259
+ s = _sum_wire_file(path)
260
+ if s is None:
261
+ continue
262
+ got = True
263
+ agg["model"] = agg["model"] or s["model"]
264
+ agg["input_tokens"] += s["input_tokens"]
265
+ agg["output_tokens"] += s["output_tokens"]
266
+ agg["cache_creation_tokens"] += s["cache_creation_tokens"]
267
+ agg["cache_read_tokens"] += s["cache_read_tokens"]
268
+
269
+ if not got:
270
+ return None
271
+ has_tokens = (
272
+ agg["input_tokens"] or agg["output_tokens"]
273
+ or agg["cache_creation_tokens"] or agg["cache_read_tokens"]
274
+ )
275
+ if not has_tokens:
276
+ return None
277
+ agg["model"] = agg["model"] or _DEFAULT_MODEL
278
+ return agg
@@ -0,0 +1,123 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ kimi_emit — write ONE authoritative usage event for a finished kimi cycle.
4
+
5
+ Mirror of ``pi_emit.py``: invoked once by bin/roll after the agent phase
6
+ when ROLL_LOOP_AGENT == "kimi". Recovers the cycle's real usage from
7
+ kimi-code's persisted ``wire.jsonl`` files via ``kimi.usage_from_session``
8
+ and appends a single ``stage=="usage"`` event to the loop events file.
9
+
10
+ Exactly one event per cycle — the dashboard SUMS token fields across
11
+ same-label usage events, so a per-retry write path would inflate ×N.
12
+
13
+ Cost is frozen at the active price snapshot via
14
+ ``model_prices.compute_list_cost`` in the model's native currency.
15
+
16
+ When ``usage_from_session`` finds nothing (no matching session, zero
17
+ tokens) we write nothing — preserving "show n/a, not a fake zero".
18
+ """
19
+
20
+ import argparse
21
+ import importlib.util
22
+ import json
23
+ import os
24
+ import sys
25
+ from datetime import datetime, timezone
26
+
27
+ _THIS_DIR = os.path.dirname(os.path.abspath(__file__))
28
+ _LIB_DIR = os.path.dirname(_THIS_DIR)
29
+
30
+
31
+ def _load_model_prices():
32
+ spec = importlib.util.spec_from_file_location(
33
+ "model_prices", os.path.join(_LIB_DIR, "model_prices.py")
34
+ )
35
+ mp = importlib.util.module_from_spec(spec)
36
+ spec.loader.exec_module(mp)
37
+ return mp
38
+
39
+
40
+ def _load_kimi():
41
+ spec = importlib.util.spec_from_file_location(
42
+ "agent_usage_kimi", os.path.join(_THIS_DIR, "kimi.py")
43
+ )
44
+ kimi = importlib.util.module_from_spec(spec)
45
+ spec.loader.exec_module(kimi)
46
+ return kimi
47
+
48
+
49
+ def build_event(cwd=None, cycle_id=None, slug=None, base_dir=None):
50
+ """Return the (line dict) usage event for a kimi cycle, or None to skip."""
51
+ kimi = _load_kimi()
52
+ u = kimi.usage_from_session(
53
+ cwd=cwd, cycle_id=cycle_id, slug=slug, base_dir=base_dir
54
+ )
55
+ if u is None:
56
+ return None
57
+
58
+ mp = _load_model_prices()
59
+ model = u.get("model") or "kimi-k2.5"
60
+ totals = {
61
+ "input_tokens": int(u.get("input_tokens") or 0),
62
+ "output_tokens": int(u.get("output_tokens") or 0),
63
+ "cache_creation_tokens": int(u.get("cache_creation_tokens") or 0),
64
+ "cache_read_tokens": int(u.get("cache_read_tokens") or 0),
65
+ }
66
+ cost_list = mp.compute_list_cost(model, **totals)
67
+ currency = mp.currency_for(model)
68
+
69
+ payload = {
70
+ "model": model,
71
+ "input_tokens": totals["input_tokens"],
72
+ "output_tokens": totals["output_tokens"],
73
+ "cache_creation_tokens": totals["cache_creation_tokens"],
74
+ "cache_read_tokens": totals["cache_read_tokens"],
75
+ "duration_ms": u.get("duration_ms"),
76
+ "cost_list_usd": cost_list,
77
+ "cost_currency": currency,
78
+ "prices_version": getattr(mp, "VERSION", None),
79
+ }
80
+ return {
81
+ "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
82
+ "stage": "usage",
83
+ "label": cycle_id,
84
+ "detail": payload,
85
+ "outcome": "ok",
86
+ }
87
+
88
+
89
+ def _default_events_path(slug, shared):
90
+ base = shared or os.environ.get("LOOP_SHARED_ROOT") \
91
+ or os.path.expanduser("~/.shared/roll")
92
+ return os.path.join(base, "loop", "events-%s.ndjson" % slug)
93
+
94
+
95
+ def main(argv=None):
96
+ ap = argparse.ArgumentParser(description="emit one kimi usage event")
97
+ ap.add_argument("--cwd", help="cycle worktree path (authoritative match)")
98
+ ap.add_argument("--cycle", help="cycle id (label + dir-name fallback)")
99
+ ap.add_argument("--slug", help="project slug (events filename)")
100
+ ap.add_argument("--shared", help="shared root (for default events path)")
101
+ ap.add_argument("--events", help="explicit events file path (preferred)")
102
+ ap.add_argument("--base-dir", help="kimi sessions root override (tests)")
103
+ args = ap.parse_args(argv)
104
+
105
+ event = build_event(
106
+ cwd=args.cwd, cycle_id=args.cycle, slug=args.slug, base_dir=args.base_dir
107
+ )
108
+ if event is None:
109
+ return 0 # nothing recoverable — write nothing (n/a, not fake zero)
110
+
111
+ evfile = args.events or _default_events_path(args.slug, args.shared)
112
+ try:
113
+ os.makedirs(os.path.dirname(evfile), exist_ok=True)
114
+ with open(evfile, "a") as f:
115
+ f.write(json.dumps(event) + "\n")
116
+ except OSError as e:
117
+ print("[kimi_emit] failed to write %s: %s" % (evfile, e), file=sys.stderr)
118
+ return 1
119
+ return 0
120
+
121
+
122
+ if __name__ == "__main__":
123
+ sys.exit(main())
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@seanyao/roll",
3
- "version": "2026.601.2",
3
+ "version": "2026.601.3",
4
4
  "description": "Roll — Roll out features with AI agents",
5
5
  "scripts": {
6
6
  "test": "bash tests/run.sh"