npm - @seanyao/roll - Versions diffs - 2026.526.1 → 2026.528.1 - Mend

@seanyao/roll 2026.526.1 → 2026.528.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +39 -12
package/README.md +1 -0
package/bin/roll +813 -68
package/lib/__pycache__/roll-loop-status.cpython-314.pyc +0 -0
package/lib/__pycache__/roll_render.cpython-314.pyc +0 -0
package/lib/agent_usage/README.md +49 -0
package/lib/agent_usage/__init__.py +104 -0
package/lib/agent_usage/__pycache__/__init__.cpython-314.pyc +0 -0
package/lib/agent_usage/__pycache__/pi.cpython-314.pyc +0 -0
package/lib/agent_usage/__pycache__/pi_emit.cpython-314.pyc +0 -0
package/lib/agent_usage/pi.py +200 -0
package/lib/agent_usage/pi_emit.py +135 -0
package/lib/backfill-pi-usage.py +243 -0
package/lib/i18n.sh +12 -20
package/lib/loop-fmt.py +67 -9
package/lib/prices/snapshot-2026-05-23-deepseek.json +7 -7
package/lib/roll-loop-status.py +42 -11
package/lib/roll_render.py +11 -7
package/package.json +1 -1
package/skills/roll-design/SKILL.md +1 -1
package/template/.github/workflows/ci.yml +2 -2

package/lib/__pycache__/roll-loop-status.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/__pycache__/roll_render.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/agent_usage/README.md ADDED Viewed

@@ -0,0 +1,49 @@
+# Adding a New Agent Usage Plugin
+5-step checklist for adding token/cost extraction for a new agent.
+## 1. Create plugin file
+```bash
+cp lib/agent_usage/pi.py lib/agent_usage/<agent>.py
+```
+Implement `extract(stdin_lines: list[str]) -> dict | None`.
+## 2. Register in `__init__.py`
+In `lib/agent_usage/__init__.py`, add one entry to `_PLUGINS`:
+```python
+_PLUGINS = {
+    "pi": ".pi",
+    "<agent>": ".<agent>",  # ← add this line
+}
+```
+The key must match `ROLL_LOOP_AGENT` env var (e.g. `kimi`, `deepseek`).
+## 3. Capture sample output
+Run a real cycle with the agent and save the stdout to a fixture:
+```bash
+roll loop test 2>&1 | tee tests/fixtures/<agent>_output_sample.txt
+```
+Or capture from a real cycle log.
+## 4. Write unit tests
+See `tests/unit/agent_usage_pi.bats` for reference. Test:
+- Happy path: fixture produces valid dict (all required fields non-None)
+- Edge case: empty lines, missing cost, unmatchable format → returns None
+- Round-trip: known token counts match fixture
+## 5. Run tests
+```bash
+npm test
+```
+That's it — no changes to `loop-fmt.py` or any other file.

package/lib/agent_usage/__init__.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""
+agent_usage — plugin registry for extracting token/cost usage from
+non-claude agent stdout.
+Contract
+--------
+Each plugin module exports a single function:
+    def extract(stdin_lines: list[str]) -> dict | None:
+        '''Parse agent stdout lines and return structured usage data.
+        Returns None if the format wasn't recognized (caller falls back
+        to null payload — fully backward-compatible with US-LOOP-010).
+        Return dict shape:
+            {
+                "model": str,           # e.g. "deepseek-v4-pro"
+                "input_tokens": int,    # never None
+                "output_tokens": int,   # never None
+                "cost_list_usd": float, # never None
+                "duration_ms": int | None,
+            }
+        '''
+Adding a new agent
+------------------
+1. Create ``lib/agent_usage/<agent>.py`` implementing ``extract()``
+2. Register it here by adding one entry to ``REGISTRY``
+3. Add a fixture file under ``tests/fixtures/<agent>_output_sample.txt``
+4. Add unit tests in ``tests/unit/agent_usage_<agent>.bats``
+5. Run ``npm test`` to verify no regressions
+"""
+from __future__ import annotations
+import importlib
+import logging
+import os
+from typing import Callable, Dict, Optional
+_log = logging.getLogger(__name__)
+# Registry: agent name → extract function
+# Agent names match ROLL_LOOP_AGENT env var values (e.g. "pi", "deepseek", "kimi").
+REGISTRY: Dict[str, Callable] = {}
+def _lazy_import(module_name: str) -> Optional[Callable]:
+    """Import a plugin module and return its extract function, or None on failure."""
+    try:
+        mod = importlib.import_module(module_name)
+        extract = getattr(mod, "extract", None)
+        if extract is None:
+            _log.warning("agent_usage plugin %s has no extract() function", module_name)
+            return None
+        if not callable(extract):
+            _log.warning("agent_usage plugin %s.extract is not callable", module_name)
+            return None
+        return extract
+    except Exception:
+        _log.warning("agent_usage plugin %s failed to load", module_name, exc_info=True)
+        return None
+# Populate REGISTRY from known plugins
+_PLUGIN_DIR = os.path.dirname(os.path.abspath(__file__))
+_PLUGINS = {
+    # agent name → python module name (relative to this package)
+    "pi": ".pi",
+}
+for _agent, _mod_suffix in _PLUGINS.items():
+    _extract = _lazy_import(__package__ + _mod_suffix)
+    if _extract is not None:
+        REGISTRY[_agent] = _extract
+def extract_usage(agent: str, stdin_lines: list[str]) -> Optional[dict]:
+    """Look up agent in REGISTRY and call its extract().
+    Returns None if agent not registered, plugin not loadable, or
+    extract() returns None / raises an exception.  The caller falls
+    back to the null-payload passthrough path (US-LOOP-010 compatible).
+    """
+    extract_fn = REGISTRY.get(agent)
+    if extract_fn is None:
+        return None
+    try:
+        result = extract_fn(stdin_lines)
+        if result is None:
+            return None
+        # Validate required fields
+        for key in ("model", "input_tokens", "output_tokens", "cost_list_usd"):
+            if result.get(key) is None:
+                _log.warning(
+                    "agent_usage plugin %s returned None for required field %r",
+                    agent, key,
+                )
+                return None
+        return result
+    except Exception:
+        _log.warning(
+            "agent_usage plugin %s raised during extract()", agent, exc_info=True,
+        )
+        return None

package/lib/agent_usage/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file

package/lib/agent_usage/__pycache__/pi.cpython-314.pyc ADDED Viewed

Binary file

package/lib/agent_usage/__pycache__/pi_emit.cpython-314.pyc ADDED Viewed

Binary file

package/lib/agent_usage/pi.py ADDED Viewed

@@ -0,0 +1,200 @@
+"""
+pi agent usage extractor.
+pi runs in the loop as ``pi -p`` (text mode), whose stdout is ONLY the
+assistant's answer text — it carries no token/cost summary.  So stdout
+scraping (the ``extract()`` registry contract) cannot recover usage and
+always returns None for real pi output.
+Instead, pi persists every session to disk at::
+    ~/.pi/agent/sessions/<encoded-cwd>/<ISO-ts>_<uuid>.jsonl
+Each file is NDJSON: one ``{"type":"session","cwd":<abs-worktree-path>}``
+header line followed by ``{"type":"message","message":{...}}`` lines.
+Assistant messages carry a per-call ``usage`` block including pi's own
+cost calc.  The authoritative usage path is therefore ``usage_from_session``,
+which sums per-message usage for a cycle's worktree.  See ``pi_emit.py``
+(live capture) and ``backfill-pi-usage.py`` (historical backfill).
+"""
+import glob
+import json
+import os
+from typing import Optional
+def extract(stdin_lines: list[str]) -> Optional[dict]:
+    """Registry contract stub.
+    pi ``-p`` text-mode stdout carries no usage data, so this always
+    returns None and the caller falls back to the null-payload path.
+    Real usage is recovered from session files via ``usage_from_session``.
+    Kept so the agent_usage REGISTRY contract / tests stay valid.
+    """
+    return None
+# ── Session-file extraction (authoritative) ────────────────────────────────
+# pi reports a per-message ``cost.total``; we sum it into ``cost_reported``
+# for audit only.  The authoritative list cost is frozen by the writers from
+# lib/prices/snapshot-*-deepseek.json in deepseek's native currency (CNY) —
+# we never convert currencies (the CLI already shows the currency symbol).
+def _sessions_base_dir(base_dir: Optional[str]) -> str:
+    """Resolve the pi sessions root: arg → env → default."""
+    return (
+        base_dir
+        or os.environ.get("ROLL_PI_SESSIONS_DIR")
+        or os.path.expanduser("~/.pi/agent/sessions")
+    )
+def _sum_session_file(path: str) -> Optional[dict]:
+    """Sum per-message assistant usage in a single session jsonl.
+    Returns a usage dict (tokens summed) or None when the file has no
+    assistant usage.  Field mapping from pi → roll schema:
+    cacheWrite→cache_creation_tokens, cacheRead→cache_read_tokens.
+    ``cost_reported`` carries pi's own per-message ``cost.total`` summed,
+    purely for audit — it is NOT the authoritative cost.  The authoritative
+    list cost is frozen by the writers (pi_emit / backfill) from the deepseek
+    price snapshot in its native currency (CNY), matching claude's
+    ``_price_at_snapshot`` convention.  We never convert currencies.
+    """
+    tin = tout = tcr = tcw = 0
+    cost = 0.0
+    model = None
+    seen = False
+    try:
+        with open(path) as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    o = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                if o.get("type") != "message":
+                    continue
+                m = o.get("message") or {}
+                if m.get("role") != "assistant":
+                    continue
+                u = m.get("usage")
+                if not u:
+                    continue
+                seen = True
+                if m.get("model"):
+                    model = m["model"]
+                tin += int(u.get("input") or 0)
+                tout += int(u.get("output") or 0)
+                tcr += int(u.get("cacheRead") or 0)
+                tcw += int(u.get("cacheWrite") or 0)
+                cost += float((u.get("cost") or {}).get("total") or 0.0)
+    except OSError:
+        return None
+    if not seen:
+        return None
+    return {
+        "model": model or "deepseek-v4-pro",
+        "input_tokens": tin,
+        "output_tokens": tout,
+        "cache_creation_tokens": tcw,
+        "cache_read_tokens": tcr,
+        "cost_reported": cost,
+        "duration_ms": None,
+    }
+def _session_cwd(path: str) -> Optional[str]:
+    """Read the header ``session`` line and return its ``cwd``, or None."""
+    try:
+        with open(path) as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    o = json.loads(line)
+                except json.JSONDecodeError:
+                    return None
+                if o.get("type") == "session":
+                    return o.get("cwd")
+                # session header is expected first; bail after first JSON line
+                return None
+    except OSError:
+        return None
+    return None
+def usage_from_session(
+    cwd: Optional[str] = None,
+    cycle_id: Optional[str] = None,
+    slug: Optional[str] = None,
+    base_dir: Optional[str] = None,
+) -> Optional[dict]:
+    """Recover a pi cycle's usage by reading its persisted session file(s).
+    Matching: scan ``<base>/*/*.jsonl`` and select files whose session
+    header ``cwd`` equals the target worktree path (authoritative).  When
+    ``cwd`` isn't given but ``cycle_id`` is, also accept files whose path
+    contains ``cycle-<cycle_id>`` (dir-name fallback).
+    Retries reuse the same worktree → multiple session files may match;
+    their usage is SUMMED (so token totals reflect wasted retry work too).
+    Returns the merged usage dict (tokens + model + ``cost_reported``), or
+    None when nothing matches / zero tokens (callers then skip writing,
+    preserving "n/a not fake zero").  The authoritative list cost is left to
+    the writer, which freezes it from the CNY price snapshot.
+    """
+    base = _sessions_base_dir(base_dir)
+    files = sorted(glob.glob(os.path.join(base, "*", "*.jsonl")))
+    if not files:
+        return None
+    matched = []
+    for path in files:
+        if cwd is not None and _session_cwd(path) == cwd:
+            matched.append(path)
+            continue
+        if cycle_id is not None and ("cycle-%s" % cycle_id) in path:
+            matched.append(path)
+    if not matched:
+        return None
+    agg = {
+        "model": None,
+        "input_tokens": 0,
+        "output_tokens": 0,
+        "cache_creation_tokens": 0,
+        "cache_read_tokens": 0,
+        "cost_reported": 0.0,
+        "duration_ms": None,
+    }
+    got = False
+    for path in matched:
+        s = _sum_session_file(path)
+        if s is None:
+            continue
+        got = True
+        agg["model"] = agg["model"] or s["model"]
+        agg["input_tokens"] += s["input_tokens"]
+        agg["output_tokens"] += s["output_tokens"]
+        agg["cache_creation_tokens"] += s["cache_creation_tokens"]
+        agg["cache_read_tokens"] += s["cache_read_tokens"]
+        agg["cost_reported"] += s["cost_reported"]
+    if not got:
+        return None
+    has_tokens = (
+        agg["input_tokens"] or agg["output_tokens"]
+        or agg["cache_creation_tokens"] or agg["cache_read_tokens"]
+    )
+    if not has_tokens:
+        return None
+    agg["model"] = agg["model"] or "deepseek-v4-pro"
+    return agg

package/lib/agent_usage/pi_emit.py ADDED Viewed

@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+"""
+pi_emit — write ONE authoritative usage event for a finished pi cycle.
+pi runs as ``pi -p`` (text mode): its stdout carries no token/cost summary,
+so loop-fmt's passthrough can only show progress, not capture usage. This
+thin CLI is invoked once by bin/roll after the agent phase (when ``$WT`` and
+``$CYCLE_ID`` are still in scope). It recovers the cycle's real usage from
+pi's persisted session files via ``pi.usage_from_session`` and appends a
+single ``stage=="usage"`` event to the loop events file.
+Exactly one event per cycle — the dashboard SUMS token fields across same-
+label usage events, so emitting once (here, post-cycle) instead of once per
+retry attempt (the old loop-fmt path) avoids ×N inflation.
+Cost is frozen at the active price snapshot in deepseek's native currency
+(CNY) via ``model_prices.compute_list_cost`` — the same convention claude
+uses (US-VIEW-014). pi's own ``cost.total`` (computed in USD) is kept as
+``cost_reported_usd`` for audit only. We never convert currencies; the
+dashboard already renders the right symbol from ``cost_currency``.
+When ``usage_from_session`` finds nothing (no session match, zero tokens),
+nothing is written — preserving "show n/a, not a fake zero".
+"""
+import argparse
+import importlib.util
+import json
+import os
+import sys
+from datetime import datetime, timezone
+_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+_LIB_DIR = os.path.dirname(_THIS_DIR)
+def _load_model_prices():
+    spec = importlib.util.spec_from_file_location(
+        "model_prices", os.path.join(_LIB_DIR, "model_prices.py")
+    )
+    mp = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mp)
+    return mp
+def _load_pi():
+    spec = importlib.util.spec_from_file_location(
+        "agent_usage_pi", os.path.join(_THIS_DIR, "pi.py")
+    )
+    pi = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(pi)
+    return pi
+def build_event(cwd=None, cycle_id=None, slug=None, base_dir=None):
+    """Return the (line dict) usage event for a pi cycle, or None to skip.
+    None means no recoverable usage — caller writes nothing.
+    """
+    pi = _load_pi()
+    u = pi.usage_from_session(
+        cwd=cwd, cycle_id=cycle_id, slug=slug, base_dir=base_dir
+    )
+    if u is None:
+        return None
+    mp = _load_model_prices()
+    model = u.get("model") or "deepseek-v4-pro"
+    totals = {
+        "input_tokens": int(u.get("input_tokens") or 0),
+        "output_tokens": int(u.get("output_tokens") or 0),
+        "cache_creation_tokens": int(u.get("cache_creation_tokens") or 0),
+        "cache_read_tokens": int(u.get("cache_read_tokens") or 0),
+    }
+    cost_list = mp.compute_list_cost(model, **totals)
+    currency = mp.currency_for(model)
+    payload = {
+        "model": model,
+        "input_tokens": totals["input_tokens"],
+        "output_tokens": totals["output_tokens"],
+        "cache_creation_tokens": totals["cache_creation_tokens"],
+        "cache_read_tokens": totals["cache_read_tokens"],
+        # pi's own per-message cost.total summed, in USD — audit only.
+        "cost_reported_usd": u.get("cost_reported"),
+        "duration_ms": u.get("duration_ms"),
+        # Authoritative, frozen at snapshot in native currency (CNY).
+        "cost_list_usd": cost_list,
+        "cost_currency": currency,
+        "prices_version": getattr(mp, "VERSION", None),
+    }
+    return {
+        "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "stage": "usage",
+        "label": cycle_id,
+        "detail": payload,
+        "outcome": "ok",
+    }
+def _default_events_path(slug, shared):
+    base = shared or os.environ.get("LOOP_SHARED_ROOT") \
+        or os.path.expanduser("~/.shared/roll")
+    return os.path.join(base, "loop", "events-%s.ndjson" % slug)
+def main(argv=None):
+    ap = argparse.ArgumentParser(description="emit one pi usage event")
+    ap.add_argument("--cwd", help="cycle worktree path (authoritative match)")
+    ap.add_argument("--cycle", help="cycle id (label + dir-name fallback)")
+    ap.add_argument("--slug", help="project slug (events filename)")
+    ap.add_argument("--shared", help="shared root (for default events path)")
+    ap.add_argument("--events", help="explicit events file path (preferred)")
+    ap.add_argument("--base-dir", help="pi sessions root override (tests)")
+    args = ap.parse_args(argv)
+    event = build_event(
+        cwd=args.cwd, cycle_id=args.cycle, slug=args.slug, base_dir=args.base_dir
+    )
+    if event is None:
+        return 0  # nothing recoverable — write nothing (n/a, not fake zero)
+    evfile = args.events or _default_events_path(args.slug, args.shared)
+    try:
+        os.makedirs(os.path.dirname(evfile), exist_ok=True)
+        with open(evfile, "a") as f:
+            f.write(json.dumps(event) + "\n")
+    except OSError as e:
+        print("[pi_emit] failed to write %s: %s" % (evfile, e), file=sys.stderr)
+        return 1
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())