npm - @seanyao/roll - Versions diffs - 2026.601.2 → 2026.601.3 - Mend

@seanyao/roll 2026.601.2 → 2026.601.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md +12 -0
package/bin/roll +45 -15
package/lib/agent_usage/kimi.py +163 -12
package/lib/agent_usage/kimi_emit.py +123 -0
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # Changelog
+## v2026.601.3
+### 可见性
+- **kimi cycle 现在也能看到 token 和成本(FIX-154)** — 以前 dashboard 对 kimi 那一行全是 `—/—`,看不到主力 agent 花了多少钱;现在 cycle 跑完读 kimi-code 的 `wire.jsonl`,把 token 数和成本写进事件流,RECENT 视图和成本总闸都看得见 `[loop]`
+### 稳定性
+- **loop 把活派给 AI 后现在真会动手,不再空转零产出(FIX-152)** — kimi 等对话式 agent 拿到 SKILL.md 会把它当成"贴过来的文档"反问"What would you like me to do?",8 秒空返没交付;技能正文前置一条 agent 无关的自主执行指令,kimi/claude/pi/codex/agy 现在都会直接动手 `[loop]`
+- **agy 在 loop / cron 自动化里不再卡 tty 等待(FIX-153)** — antigravity(agy)默认要 tty 批准操作,自动化场景拿不到 tty 就一直挂着等;现在 headless 模式自动加 `-p` 和跳过权限标记,跑得到结果 `[loop]`
+- **测试不再在桌面弹空报错终端(FIX-155)** — bats 测试跑完临时沙箱删了,但 peer auto-attach 弹的 Terminal 窗口指向那个已不存在的路径,桌面堆一堆空报错的死窗口;给 peer 弹窗补上和 loop 弹窗一样的测试守卫,测试上下文不再弹 `[loop]`
 ## v2026.601.2
 ### 新功能

package/bin/roll CHANGED Viewed

@@ -4,7 +4,7 @@ set -euo pipefail
 # Roll — AI Agent Convention Manager
 # Single source of truth for how all AI coding agents behave.
-VERSION="2026.601.2"
+VERSION="2026.601.3"
 ROLL_HOME="${ROLL_HOME:-${HOME}/.roll}"
 ROLL_CONFIG="${ROLL_HOME}/config.yaml"
 ROLL_GLOBAL="${ROLL_HOME}/conventions/global"
@@ -3841,6 +3841,8 @@ _peer_route() {
 _peer_auto_attach() {
   local session="$1"
   [ "$(uname)" = "Darwin" ] || return 0
+  [ -n "${BATS_TEST_NUMBER:-}" ] && return 0
+  [ -n "${ROLL_LOOP_NO_POPUP:-}" ] && return 0
   [ -f "$_LOOP_MUTE_FILE" ] && return 0
   local attach_cmd="${_SHARED_ROOT}/loop/attach-${session}.command"
   # Drop `exec` so the wrapping shell survives `tmux attach` exiting; pause
@@ -4411,10 +4413,12 @@ _agent_argv() {
       # late 2025. agy reuses ~/.gemini/ for config and reads GEMINI.md
       # natively, so the convention sync target is unchanged — only the
       # invoked binary changes.
+      # FIX-153: non-interactive modes must use -p (headless) +
+      # --dangerously-skip-permissions so the agent does not hang waiting
+      # for a tty approval that never comes in loop/cron contexts.
       case "$mode" in
         interactive) _AGENT_ARGV=(agy -i "$prompt") ;;
-        text|peer)   _AGENT_ARGV=(agy "$prompt") ;;
-        *)           _AGENT_ARGV=(agy "$prompt") ;;
+        *)           _AGENT_ARGV=(agy -p --dangerously-skip-permissions "$prompt") ;;
       esac ;;
     qwen)
       # qwen has the same argv shape in both modes (positional prompt).
@@ -8162,7 +8166,11 @@ _write_loop_runner_script() {
   # US-LOOP-026: post-cycle single-shot usage writer for non-claude agents.
   # pi -p text mode prints no usage, so we recover it from pi's session jsonl
   # exactly once per cycle (loop-fmt passthrough is display-only).
+  # FIX-154: kimi-code's `-p` mode also writes nothing to stdout but persists
+  # usage to wire.jsonl; kimi_emit covers that path. bin/roll dispatches by
+  # agent (pi/deepseek → pi_emit, kimi → kimi_emit).
   local pi_emit_script="${ROLL_PKG_DIR}/lib/agent_usage/pi_emit.py"
+  local kimi_emit_script="${ROLL_PKG_DIR}/lib/agent_usage/kimi_emit.py"
   local roll_bin="${ROLL_PKG_DIR}/bin/roll"
   # US-EVAL-002: pure-function rubric scorer (US-EVAL-001). Baked in at
   # generation time so the inner runner can compute result_eval at cycle finish.
@@ -8831,23 +8839,37 @@ else
   _phase_end agent_invoke ok
 fi
-# US-LOOP-026: non-claude agents (pi/deepseek/kimi) print no usage in -p text
-# mode. Recover token+cost once per cycle from the agent's session jsonl and
-# append a single authoritative usage event. Done here (not in loop-fmt's
-# per-attempt passthrough) so retries can't write N duplicate events that the
-# dashboard's same-label SUM would inflate. Runs before the timeout-abort exit
-# so partial cycles still get whatever usage the session recorded. The events
-# path is resolved exactly like _loop_event (rt_dir first, shared fallback) so
-# pi_emit appends to the same file the reader consumes.
-if [ "\$(_project_agent)" != "claude" ] && [ -f "${pi_emit_script}" ]; then
+# US-LOOP-026 + FIX-154: non-claude agents (pi/deepseek/kimi) print no usage
+# in -p text mode. Recover token+cost once per cycle from the agent's session
+# jsonl and append a single authoritative usage event. Done here (not in
+# loop-fmt's per-attempt passthrough) so retries can't write N duplicate
+# events that the dashboard's same-label SUM would inflate. Runs before the
+# timeout-abort exit so partial cycles still get whatever usage the session
+# recorded. The events path is resolved exactly like _loop_event (rt_dir
+# first, shared fallback) so the emitter appends to the same file the reader
+# consumes. Dispatch by agent so each emitter reads the right session format
+# (pi.usage_from_session vs kimi.usage_from_session).
+if [ "\$(_project_agent)" != "claude" ]; then
   _pi_rt=\$(_loop_runtime_dir "${slug}" 2>/dev/null || echo "")
   if [ -n "\$_pi_rt" ]; then
     _pi_evfile="\${_pi_rt}/events.ndjson"
   else
     _pi_evfile="\${_SHARED_ROOT:-\$HOME/.shared/roll}/loop/events-${slug}.ndjson"
   fi
-  python3 "${pi_emit_script}" --cwd "\$WT" --cycle "\${CYCLE_ID}" \\
-    --slug "${slug}" --events "\$_pi_evfile" 2>/dev/null || true
+  case "\$(_project_agent)" in
+    kimi)
+      if [ -f "${kimi_emit_script}" ]; then
+        python3 "${kimi_emit_script}" --cwd "\$WT" --cycle "\${CYCLE_ID}" \\
+          --slug "${slug}" --events "\$_pi_evfile" 2>/dev/null || true
+      fi
+      ;;
+    *)
+      if [ -f "${pi_emit_script}" ]; then
+        python3 "${pi_emit_script}" --cwd "\$WT" --cycle "\${CYCLE_ID}" \\
+          --slug "${slug}" --events "\$_pi_evfile" 2>/dev/null || true
+      fi
+      ;;
+  esac
 fi
 # FIX-057: timed out — skip publish; EXIT trap writes cycle_end blocked + ALERT.
@@ -9423,7 +9445,15 @@ _agent_skill_cmd() {
   for ((i = 1; i < prompt_idx; i++)); do
     out+=" ${_AGENT_ARGV[i]}"
   done
-  echo "${out} \"\$(${strip})\""
+  # FIX-152: prepend an explicit autonomous-execution directive ahead of the bare
+  # SKILL.md body. Without it, conversational `-p` agents (notably kimi-code) read
+  # the skill doc as pasted context and reply "what would you like me to do?",
+  # returning in seconds with zero output → the cycle ends idle, no delivery.
+  # pi/deepseek/claude tolerate the bare doc, but the directive is agent-agnostic
+  # and hardens every autonomous cron skill (loop/dream/brief share this chokepoint).
+  # Must stay free of " $ ` \ so it survives the later `eval` of the cycle command.
+  local _autorun='[roll 自主模式] 你正在无人值守的自动化循环中运行,这不是对话。请立即、完整地执行下面这份技能文档描述的工作流,直到完成交付或写出 ALERT 为止;严禁反问、严禁等待确认、严禁只复述或总结而不动手。技能文档如下: '
+  echo "${out} \"${_autorun}\$(${strip})\""
 }
 # FIX-134: build the full per-cycle agent command at RUNTIME, routing-aware.

package/lib/agent_usage/kimi.py CHANGED Viewed

@@ -1,29 +1,33 @@
 """
 kimi (Moonshot Kimi CLI) agent usage extractor.
-Like openai and gemini (and unlike pi, which persists usage to session
-files), the Kimi CLI prints a token-usage summary to stdout at the end of a
-session.  So this plugin implements the standard ``extract()`` registry
-contract: scrape the passthrough stdout lines for the usage / model lines.
+Two paths are supported, mirroring pi.py:
-Recognised lines (case-insensitive, robust to thousands separators)::
+1. ``extract()`` — the registry stdout-scrape contract, kept for legacy
+   callers (and as a fallback when session files are absent).
+2. ``usage_from_session()`` — authoritative recovery from kimi-code's
+   persisted session files at ``~/.kimi-code/sessions/wd_*/session_*/agents/main/wire.jsonl``.
+   Each wire file is NDJSON with one or more ``{"type":"usage.record","model":...,"usage":{...}}``
+   lines whose token fields are summed per cycle.
-    Model: kimi-k2
-    Tokens: input=15300 output=3120
+FIX-154 added the session path so loop cycles run by kimi-code (the
+default agent today) no longer show ``—/—`` for tokens and cost in the
+RECENT dashboard.
-The Kimi CLI's "usage" / session-summary block is also accepted::
+The stdout-scrape contract still recognises (case-insensitive)::
+    Model: kimi-k2
+    Tokens: input=15300 output=3120
     Input tokens:  15,300
     Output tokens:  3,120
     Total tokens:  18,420
-    model: kimi-k2
 When an explicit USD cost line isn't present, cost is computed from
-``lib/model_prices.py`` (list price) so the dashboard never shows ``—``
-for a recognised kimi cycle.  Returns None if no usage line is found,
-so the caller falls back to the null payload (US-LOOP-010 compatible).
+``lib/model_prices.py`` (list price).
 """
+import glob
+import json
 import os
 import re
 import sys
@@ -125,3 +129,150 @@ def extract(stdin_lines: list[str]) -> Optional[dict]:
         "cost_list_usd": cost,
         "duration_ms": None,
     }
+# ── Session-file extraction (authoritative, FIX-154) ───────────────────────
+# kimi-code persists every CLI session under
+# ``~/.kimi-code/sessions/wd_<cwd-basename>_<8-hex>/session_<uuid>/agents/main/wire.jsonl``
+# where ``<cwd-basename>`` is the basename of the cycle's worktree
+# (e.g. ``roll-ecf079-cycle-20260601-170905-54957``).
+# Each wire file is NDJSON; one or more lines have::
+#
+#     {"type": "usage.record", "model": "kimi-code/kimi-for-coding",
+#      "usage": {"inputOther": <int>, "output": <int>,
+#                "inputCacheRead": <int>, "inputCacheCreation": <int>},
+#      "usageScope": "turn", "time": <ms>}
+#
+# We sum across all matching wire files (retries reuse the same worktree).
+def _kimi_sessions_base_dir(base_dir: Optional[str]) -> str:
+    """Resolve kimi-code's sessions root: arg → env → default."""
+    return (
+        base_dir
+        or os.environ.get("ROLL_KIMI_SESSIONS_DIR")
+        or os.path.expanduser("~/.kimi-code/sessions")
+    )
+def _sum_wire_file(path: str) -> Optional[dict]:
+    """Sum ``usage.record`` lines in a single kimi wire.jsonl.
+    Returns a usage dict or None when no usage records are found.
+    Field mapping kimi → roll::
+        inputOther         → input_tokens
+        output             → output_tokens
+        inputCacheRead     → cache_read_tokens
+        inputCacheCreation → cache_creation_tokens
+    """
+    tin = tout = tcr = tcw = 0
+    model = None
+    seen = False
+    try:
+        with open(path) as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    o = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                if o.get("type") != "usage.record":
+                    continue
+                u = o.get("usage") or {}
+                seen = True
+                if o.get("model"):
+                    model = o["model"]
+                tin += int(u.get("inputOther") or 0)
+                tout += int(u.get("output") or 0)
+                tcr += int(u.get("inputCacheRead") or 0)
+                tcw += int(u.get("inputCacheCreation") or 0)
+    except OSError:
+        return None
+    if not seen:
+        return None
+    return {
+        "model": model or _DEFAULT_MODEL,
+        "input_tokens": tin,
+        "output_tokens": tout,
+        "cache_creation_tokens": tcw,
+        "cache_read_tokens": tcr,
+        "duration_ms": None,
+    }
+def usage_from_session(
+    cwd: Optional[str] = None,
+    cycle_id: Optional[str] = None,
+    slug: Optional[str] = None,
+    base_dir: Optional[str] = None,
+) -> Optional[dict]:
+    """Recover a kimi cycle's usage by reading its persisted wire file(s).
+    Matching: scan ``<base>/wd_*/session_*/agents/main/wire.jsonl`` and
+    select files whose ``wd_*`` directory name contains the worktree
+    basename (authoritative when ``cwd`` is given) or the ``cycle_id``
+    substring (fallback).
+    Retries can produce multiple wire files for the same cycle; their
+    usage is SUMMED so token totals reflect retry work too.
+    Returns the merged usage dict (tokens + model), or None when nothing
+    matches / zero tokens — caller writes nothing in that case, preserving
+    "n/a, not fake zero".
+    """
+    base = _kimi_sessions_base_dir(base_dir)
+    files = sorted(glob.glob(
+        os.path.join(base, "wd_*", "session_*", "agents", "main", "wire.jsonl")
+    ))
+    if not files:
+        return None
+    cwd_basename = os.path.basename(cwd.rstrip("/")) if cwd else None
+    matched = []
+    for path in files:
+        # Session dir name: wd_<cwd-basename>_<8-hex>
+        # Path: <base>/wd_<cwd-basename>_<hash>/session_<uuid>/agents/main/wire.jsonl
+        wd_seg = path[len(base):].lstrip(os.sep).split(os.sep, 1)[0]
+        if cwd_basename and ("wd_%s_" % cwd_basename) in (wd_seg + "_"):
+            matched.append(path)
+            continue
+        if cycle_id and ("cycle-%s" % cycle_id) in wd_seg:
+            matched.append(path)
+    if not matched:
+        return None
+    agg = {
+        "model": None,
+        "input_tokens": 0,
+        "output_tokens": 0,
+        "cache_creation_tokens": 0,
+        "cache_read_tokens": 0,
+        "duration_ms": None,
+    }
+    got = False
+    for path in matched:
+        s = _sum_wire_file(path)
+        if s is None:
+            continue
+        got = True
+        agg["model"] = agg["model"] or s["model"]
+        agg["input_tokens"] += s["input_tokens"]
+        agg["output_tokens"] += s["output_tokens"]
+        agg["cache_creation_tokens"] += s["cache_creation_tokens"]
+        agg["cache_read_tokens"] += s["cache_read_tokens"]
+    if not got:
+        return None
+    has_tokens = (
+        agg["input_tokens"] or agg["output_tokens"]
+        or agg["cache_creation_tokens"] or agg["cache_read_tokens"]
+    )
+    if not has_tokens:
+        return None
+    agg["model"] = agg["model"] or _DEFAULT_MODEL
+    return agg

package/lib/agent_usage/kimi_emit.py ADDED Viewed

@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""
+kimi_emit — write ONE authoritative usage event for a finished kimi cycle.
+Mirror of ``pi_emit.py``: invoked once by bin/roll after the agent phase
+when ROLL_LOOP_AGENT == "kimi". Recovers the cycle's real usage from
+kimi-code's persisted ``wire.jsonl`` files via ``kimi.usage_from_session``
+and appends a single ``stage=="usage"`` event to the loop events file.
+Exactly one event per cycle — the dashboard SUMS token fields across
+same-label usage events, so a per-retry write path would inflate ×N.
+Cost is frozen at the active price snapshot via
+``model_prices.compute_list_cost`` in the model's native currency.
+When ``usage_from_session`` finds nothing (no matching session, zero
+tokens) we write nothing — preserving "show n/a, not a fake zero".
+"""
+import argparse
+import importlib.util
+import json
+import os
+import sys
+from datetime import datetime, timezone
+_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+_LIB_DIR = os.path.dirname(_THIS_DIR)
+def _load_model_prices():
+    spec = importlib.util.spec_from_file_location(
+        "model_prices", os.path.join(_LIB_DIR, "model_prices.py")
+    )
+    mp = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mp)
+    return mp
+def _load_kimi():
+    spec = importlib.util.spec_from_file_location(
+        "agent_usage_kimi", os.path.join(_THIS_DIR, "kimi.py")
+    )
+    kimi = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(kimi)
+    return kimi
+def build_event(cwd=None, cycle_id=None, slug=None, base_dir=None):
+    """Return the (line dict) usage event for a kimi cycle, or None to skip."""
+    kimi = _load_kimi()
+    u = kimi.usage_from_session(
+        cwd=cwd, cycle_id=cycle_id, slug=slug, base_dir=base_dir
+    )
+    if u is None:
+        return None
+    mp = _load_model_prices()
+    model = u.get("model") or "kimi-k2.5"
+    totals = {
+        "input_tokens": int(u.get("input_tokens") or 0),
+        "output_tokens": int(u.get("output_tokens") or 0),
+        "cache_creation_tokens": int(u.get("cache_creation_tokens") or 0),
+        "cache_read_tokens": int(u.get("cache_read_tokens") or 0),
+    }
+    cost_list = mp.compute_list_cost(model, **totals)
+    currency = mp.currency_for(model)
+    payload = {
+        "model": model,
+        "input_tokens": totals["input_tokens"],
+        "output_tokens": totals["output_tokens"],
+        "cache_creation_tokens": totals["cache_creation_tokens"],
+        "cache_read_tokens": totals["cache_read_tokens"],
+        "duration_ms": u.get("duration_ms"),
+        "cost_list_usd": cost_list,
+        "cost_currency": currency,
+        "prices_version": getattr(mp, "VERSION", None),
+    }
+    return {
+        "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "stage": "usage",
+        "label": cycle_id,
+        "detail": payload,
+        "outcome": "ok",
+    }
+def _default_events_path(slug, shared):
+    base = shared or os.environ.get("LOOP_SHARED_ROOT") \
+        or os.path.expanduser("~/.shared/roll")
+    return os.path.join(base, "loop", "events-%s.ndjson" % slug)
+def main(argv=None):
+    ap = argparse.ArgumentParser(description="emit one kimi usage event")
+    ap.add_argument("--cwd", help="cycle worktree path (authoritative match)")
+    ap.add_argument("--cycle", help="cycle id (label + dir-name fallback)")
+    ap.add_argument("--slug", help="project slug (events filename)")
+    ap.add_argument("--shared", help="shared root (for default events path)")
+    ap.add_argument("--events", help="explicit events file path (preferred)")
+    ap.add_argument("--base-dir", help="kimi sessions root override (tests)")
+    args = ap.parse_args(argv)
+    event = build_event(
+        cwd=args.cwd, cycle_id=args.cycle, slug=args.slug, base_dir=args.base_dir
+    )
+    if event is None:
+        return 0  # nothing recoverable — write nothing (n/a, not fake zero)
+    evfile = args.events or _default_events_path(args.slug, args.shared)
+    try:
+        os.makedirs(os.path.dirname(evfile), exist_ok=True)
+        with open(evfile, "a") as f:
+            f.write(json.dumps(event) + "\n")
+    except OSError as e:
+        print("[kimi_emit] failed to write %s: %s" % (evfile, e), file=sys.stderr)
+        return 1
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@seanyao/roll",
-  "version": "2026.601.2",
+  "version": "2026.601.3",
   "description": "Roll — Roll out features with AI agents",
   "scripts": {
     "test": "bash tests/run.sh"