@seanyao/roll 2026.526.1 → 2026.528.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ # Adding a New Agent Usage Plugin
2
+
3
+ 5-step checklist for adding token/cost extraction for a new agent.
4
+
5
+ ## 1. Create plugin file
6
+
7
+ ```bash
8
+ cp lib/agent_usage/pi.py lib/agent_usage/<agent>.py
9
+ ```
10
+
11
+ Implement `extract(stdin_lines: list[str]) -> dict | None`.
12
+
13
+ ## 2. Register in `__init__.py`
14
+
15
+ In `lib/agent_usage/__init__.py`, add one entry to `_PLUGINS`:
16
+
17
+ ```python
18
+ _PLUGINS = {
19
+ "pi": ".pi",
20
+ "<agent>": ".<agent>", # ← add this line
21
+ }
22
+ ```
23
+
24
+ The key must match `ROLL_LOOP_AGENT` env var (e.g. `kimi`, `deepseek`).
25
+
26
+ ## 3. Capture sample output
27
+
28
+ Run a real cycle with the agent and save the stdout to a fixture:
29
+
30
+ ```bash
31
+ roll loop test 2>&1 | tee tests/fixtures/<agent>_output_sample.txt
32
+ ```
33
+
34
+ Or capture from a real cycle log.
35
+
36
+ ## 4. Write unit tests
37
+
38
+ See `tests/unit/agent_usage_pi.bats` for reference. Test:
39
+ - Happy path: fixture produces valid dict (all required fields non-None)
40
+ - Edge case: empty lines, missing cost, unmatchable format → returns None
41
+ - Round-trip: known token counts match fixture
42
+
43
+ ## 5. Run tests
44
+
45
+ ```bash
46
+ npm test
47
+ ```
48
+
49
+ That's it — no changes to `loop-fmt.py` or any other file.
@@ -0,0 +1,104 @@
1
+ """
2
+ agent_usage — plugin registry for extracting token/cost usage from
3
+ non-claude agent stdout.
4
+
5
+ Contract
6
+ --------
7
+ Each plugin module exports a single function:
8
+
9
+ def extract(stdin_lines: list[str]) -> dict | None:
10
+ '''Parse agent stdout lines and return structured usage data.
11
+
12
+ Returns None if the format wasn't recognized (caller falls back
13
+ to null payload — fully backward-compatible with US-LOOP-010).
14
+
15
+ Return dict shape:
16
+ {
17
+ "model": str, # e.g. "deepseek-v4-pro"
18
+ "input_tokens": int, # never None
19
+ "output_tokens": int, # never None
20
+ "cost_list_usd": float, # never None
21
+ "duration_ms": int | None,
22
+ }
23
+ '''
24
+
25
+ Adding a new agent
26
+ ------------------
27
+ 1. Create ``lib/agent_usage/<agent>.py`` implementing ``extract()``
28
+ 2. Register it here by adding one entry to ``REGISTRY``
29
+ 3. Add a fixture file under ``tests/fixtures/<agent>_output_sample.txt``
30
+ 4. Add unit tests in ``tests/unit/agent_usage_<agent>.bats``
31
+ 5. Run ``npm test`` to verify no regressions
32
+ """
33
+ from __future__ import annotations
34
+
35
+ import importlib
36
+ import logging
37
+ import os
38
+ from typing import Callable, Dict, Optional
39
+
40
+ _log = logging.getLogger(__name__)
41
+
42
+ # Registry: agent name → extract function
43
+ # Agent names match ROLL_LOOP_AGENT env var values (e.g. "pi", "deepseek", "kimi").
44
+ REGISTRY: Dict[str, Callable] = {}
45
+
46
+
47
+ def _lazy_import(module_name: str) -> Optional[Callable]:
48
+ """Import a plugin module and return its extract function, or None on failure."""
49
+ try:
50
+ mod = importlib.import_module(module_name)
51
+ extract = getattr(mod, "extract", None)
52
+ if extract is None:
53
+ _log.warning("agent_usage plugin %s has no extract() function", module_name)
54
+ return None
55
+ if not callable(extract):
56
+ _log.warning("agent_usage plugin %s.extract is not callable", module_name)
57
+ return None
58
+ return extract
59
+ except Exception:
60
+ _log.warning("agent_usage plugin %s failed to load", module_name, exc_info=True)
61
+ return None
62
+
63
+
64
+ # Populate REGISTRY from known plugins
65
+ _PLUGIN_DIR = os.path.dirname(os.path.abspath(__file__))
66
+ _PLUGINS = {
67
+ # agent name → python module name (relative to this package)
68
+ "pi": ".pi",
69
+ }
70
+
71
+ for _agent, _mod_suffix in _PLUGINS.items():
72
+ _extract = _lazy_import(__package__ + _mod_suffix)
73
+ if _extract is not None:
74
+ REGISTRY[_agent] = _extract
75
+
76
+
77
+ def extract_usage(agent: str, stdin_lines: list[str]) -> Optional[dict]:
78
+ """Look up agent in REGISTRY and call its extract().
79
+
80
+ Returns None if agent not registered, plugin not loadable, or
81
+ extract() returns None / raises an exception. The caller falls
82
+ back to the null-payload passthrough path (US-LOOP-010 compatible).
83
+ """
84
+ extract_fn = REGISTRY.get(agent)
85
+ if extract_fn is None:
86
+ return None
87
+ try:
88
+ result = extract_fn(stdin_lines)
89
+ if result is None:
90
+ return None
91
+ # Validate required fields
92
+ for key in ("model", "input_tokens", "output_tokens", "cost_list_usd"):
93
+ if result.get(key) is None:
94
+ _log.warning(
95
+ "agent_usage plugin %s returned None for required field %r",
96
+ agent, key,
97
+ )
98
+ return None
99
+ return result
100
+ except Exception:
101
+ _log.warning(
102
+ "agent_usage plugin %s raised during extract()", agent, exc_info=True,
103
+ )
104
+ return None
@@ -0,0 +1,200 @@
1
+ """
2
+ pi agent usage extractor.
3
+
4
+ pi runs in the loop as ``pi -p`` (text mode), whose stdout is ONLY the
5
+ assistant's answer text — it carries no token/cost summary. So stdout
6
+ scraping (the ``extract()`` registry contract) cannot recover usage and
7
+ always returns None for real pi output.
8
+
9
+ Instead, pi persists every session to disk at::
10
+
11
+ ~/.pi/agent/sessions/<encoded-cwd>/<ISO-ts>_<uuid>.jsonl
12
+
13
+ Each file is NDJSON: one ``{"type":"session","cwd":<abs-worktree-path>}``
14
+ header line followed by ``{"type":"message","message":{...}}`` lines.
15
+ Assistant messages carry a per-call ``usage`` block including pi's own
16
+ cost calc. The authoritative usage path is therefore ``usage_from_session``,
17
+ which sums per-message usage for a cycle's worktree. See ``pi_emit.py``
18
+ (live capture) and ``backfill-pi-usage.py`` (historical backfill).
19
+ """
20
+
21
+ import glob
22
+ import json
23
+ import os
24
+ from typing import Optional
25
+
26
+
27
+ def extract(stdin_lines: list[str]) -> Optional[dict]:
28
+ """Registry contract stub.
29
+
30
+ pi ``-p`` text-mode stdout carries no usage data, so this always
31
+ returns None and the caller falls back to the null-payload path.
32
+ Real usage is recovered from session files via ``usage_from_session``.
33
+ Kept so the agent_usage REGISTRY contract / tests stay valid.
34
+ """
35
+ return None
36
+
37
+
38
+ # ── Session-file extraction (authoritative) ────────────────────────────────
39
+
40
+ # pi reports a per-message ``cost.total``; we sum it into ``cost_reported``
41
+ # for audit only. The authoritative list cost is frozen by the writers from
42
+ # lib/prices/snapshot-*-deepseek.json in deepseek's native currency (CNY) —
43
+ # we never convert currencies (the CLI already shows the currency symbol).
44
+ def _sessions_base_dir(base_dir: Optional[str]) -> str:
45
+ """Resolve the pi sessions root: arg → env → default."""
46
+ return (
47
+ base_dir
48
+ or os.environ.get("ROLL_PI_SESSIONS_DIR")
49
+ or os.path.expanduser("~/.pi/agent/sessions")
50
+ )
51
+
52
+
53
+ def _sum_session_file(path: str) -> Optional[dict]:
54
+ """Sum per-message assistant usage in a single session jsonl.
55
+
56
+ Returns a usage dict (tokens summed) or None when the file has no
57
+ assistant usage. Field mapping from pi → roll schema:
58
+ cacheWrite→cache_creation_tokens, cacheRead→cache_read_tokens.
59
+
60
+ ``cost_reported`` carries pi's own per-message ``cost.total`` summed,
61
+ purely for audit — it is NOT the authoritative cost. The authoritative
62
+ list cost is frozen by the writers (pi_emit / backfill) from the deepseek
63
+ price snapshot in its native currency (CNY), matching claude's
64
+ ``_price_at_snapshot`` convention. We never convert currencies.
65
+ """
66
+ tin = tout = tcr = tcw = 0
67
+ cost = 0.0
68
+ model = None
69
+ seen = False
70
+ try:
71
+ with open(path) as f:
72
+ for line in f:
73
+ line = line.strip()
74
+ if not line:
75
+ continue
76
+ try:
77
+ o = json.loads(line)
78
+ except json.JSONDecodeError:
79
+ continue
80
+ if o.get("type") != "message":
81
+ continue
82
+ m = o.get("message") or {}
83
+ if m.get("role") != "assistant":
84
+ continue
85
+ u = m.get("usage")
86
+ if not u:
87
+ continue
88
+ seen = True
89
+ if m.get("model"):
90
+ model = m["model"]
91
+ tin += int(u.get("input") or 0)
92
+ tout += int(u.get("output") or 0)
93
+ tcr += int(u.get("cacheRead") or 0)
94
+ tcw += int(u.get("cacheWrite") or 0)
95
+ cost += float((u.get("cost") or {}).get("total") or 0.0)
96
+ except OSError:
97
+ return None
98
+ if not seen:
99
+ return None
100
+ return {
101
+ "model": model or "deepseek-v4-pro",
102
+ "input_tokens": tin,
103
+ "output_tokens": tout,
104
+ "cache_creation_tokens": tcw,
105
+ "cache_read_tokens": tcr,
106
+ "cost_reported": cost,
107
+ "duration_ms": None,
108
+ }
109
+
110
+
111
+ def _session_cwd(path: str) -> Optional[str]:
112
+ """Read the header ``session`` line and return its ``cwd``, or None."""
113
+ try:
114
+ with open(path) as f:
115
+ for line in f:
116
+ line = line.strip()
117
+ if not line:
118
+ continue
119
+ try:
120
+ o = json.loads(line)
121
+ except json.JSONDecodeError:
122
+ return None
123
+ if o.get("type") == "session":
124
+ return o.get("cwd")
125
+ # session header is expected first; bail after first JSON line
126
+ return None
127
+ except OSError:
128
+ return None
129
+ return None
130
+
131
+
132
+ def usage_from_session(
133
+ cwd: Optional[str] = None,
134
+ cycle_id: Optional[str] = None,
135
+ slug: Optional[str] = None,
136
+ base_dir: Optional[str] = None,
137
+ ) -> Optional[dict]:
138
+ """Recover a pi cycle's usage by reading its persisted session file(s).
139
+
140
+ Matching: scan ``<base>/*/*.jsonl`` and select files whose session
141
+ header ``cwd`` equals the target worktree path (authoritative). When
142
+ ``cwd`` isn't given but ``cycle_id`` is, also accept files whose path
143
+ contains ``cycle-<cycle_id>`` (dir-name fallback).
144
+
145
+ Retries reuse the same worktree → multiple session files may match;
146
+ their usage is SUMMED (so token totals reflect wasted retry work too).
147
+
148
+ Returns the merged usage dict (tokens + model + ``cost_reported``), or
149
+ None when nothing matches / zero tokens (callers then skip writing,
150
+ preserving "n/a not fake zero"). The authoritative list cost is left to
151
+ the writer, which freezes it from the CNY price snapshot.
152
+ """
153
+ base = _sessions_base_dir(base_dir)
154
+ files = sorted(glob.glob(os.path.join(base, "*", "*.jsonl")))
155
+ if not files:
156
+ return None
157
+
158
+ matched = []
159
+ for path in files:
160
+ if cwd is not None and _session_cwd(path) == cwd:
161
+ matched.append(path)
162
+ continue
163
+ if cycle_id is not None and ("cycle-%s" % cycle_id) in path:
164
+ matched.append(path)
165
+
166
+ if not matched:
167
+ return None
168
+
169
+ agg = {
170
+ "model": None,
171
+ "input_tokens": 0,
172
+ "output_tokens": 0,
173
+ "cache_creation_tokens": 0,
174
+ "cache_read_tokens": 0,
175
+ "cost_reported": 0.0,
176
+ "duration_ms": None,
177
+ }
178
+ got = False
179
+ for path in matched:
180
+ s = _sum_session_file(path)
181
+ if s is None:
182
+ continue
183
+ got = True
184
+ agg["model"] = agg["model"] or s["model"]
185
+ agg["input_tokens"] += s["input_tokens"]
186
+ agg["output_tokens"] += s["output_tokens"]
187
+ agg["cache_creation_tokens"] += s["cache_creation_tokens"]
188
+ agg["cache_read_tokens"] += s["cache_read_tokens"]
189
+ agg["cost_reported"] += s["cost_reported"]
190
+
191
+ if not got:
192
+ return None
193
+ has_tokens = (
194
+ agg["input_tokens"] or agg["output_tokens"]
195
+ or agg["cache_creation_tokens"] or agg["cache_read_tokens"]
196
+ )
197
+ if not has_tokens:
198
+ return None
199
+ agg["model"] = agg["model"] or "deepseek-v4-pro"
200
+ return agg
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ pi_emit — write ONE authoritative usage event for a finished pi cycle.
4
+
5
+ pi runs as ``pi -p`` (text mode): its stdout carries no token/cost summary,
6
+ so loop-fmt's passthrough can only show progress, not capture usage. This
7
+ thin CLI is invoked once by bin/roll after the agent phase (when ``$WT`` and
8
+ ``$CYCLE_ID`` are still in scope). It recovers the cycle's real usage from
9
+ pi's persisted session files via ``pi.usage_from_session`` and appends a
10
+ single ``stage=="usage"`` event to the loop events file.
11
+
12
+ Exactly one event per cycle — the dashboard SUMS token fields across same-
13
+ label usage events, so emitting once (here, post-cycle) instead of once per
14
+ retry attempt (the old loop-fmt path) avoids ×N inflation.
15
+
16
+ Cost is frozen at the active price snapshot in deepseek's native currency
17
+ (CNY) via ``model_prices.compute_list_cost`` — the same convention claude
18
+ uses (US-VIEW-014). pi's own ``cost.total`` (computed in USD) is kept as
19
+ ``cost_reported_usd`` for audit only. We never convert currencies; the
20
+ dashboard already renders the right symbol from ``cost_currency``.
21
+
22
+ When ``usage_from_session`` finds nothing (no session match, zero tokens),
23
+ nothing is written — preserving "show n/a, not a fake zero".
24
+ """
25
+
26
+ import argparse
27
+ import importlib.util
28
+ import json
29
+ import os
30
+ import sys
31
+ from datetime import datetime, timezone
32
+
33
+ _THIS_DIR = os.path.dirname(os.path.abspath(__file__))
34
+ _LIB_DIR = os.path.dirname(_THIS_DIR)
35
+
36
+
37
+ def _load_model_prices():
38
+ spec = importlib.util.spec_from_file_location(
39
+ "model_prices", os.path.join(_LIB_DIR, "model_prices.py")
40
+ )
41
+ mp = importlib.util.module_from_spec(spec)
42
+ spec.loader.exec_module(mp)
43
+ return mp
44
+
45
+
46
+ def _load_pi():
47
+ spec = importlib.util.spec_from_file_location(
48
+ "agent_usage_pi", os.path.join(_THIS_DIR, "pi.py")
49
+ )
50
+ pi = importlib.util.module_from_spec(spec)
51
+ spec.loader.exec_module(pi)
52
+ return pi
53
+
54
+
55
+ def build_event(cwd=None, cycle_id=None, slug=None, base_dir=None):
56
+ """Return the (line dict) usage event for a pi cycle, or None to skip.
57
+
58
+ None means no recoverable usage — caller writes nothing.
59
+ """
60
+ pi = _load_pi()
61
+ u = pi.usage_from_session(
62
+ cwd=cwd, cycle_id=cycle_id, slug=slug, base_dir=base_dir
63
+ )
64
+ if u is None:
65
+ return None
66
+
67
+ mp = _load_model_prices()
68
+ model = u.get("model") or "deepseek-v4-pro"
69
+ totals = {
70
+ "input_tokens": int(u.get("input_tokens") or 0),
71
+ "output_tokens": int(u.get("output_tokens") or 0),
72
+ "cache_creation_tokens": int(u.get("cache_creation_tokens") or 0),
73
+ "cache_read_tokens": int(u.get("cache_read_tokens") or 0),
74
+ }
75
+ cost_list = mp.compute_list_cost(model, **totals)
76
+ currency = mp.currency_for(model)
77
+
78
+ payload = {
79
+ "model": model,
80
+ "input_tokens": totals["input_tokens"],
81
+ "output_tokens": totals["output_tokens"],
82
+ "cache_creation_tokens": totals["cache_creation_tokens"],
83
+ "cache_read_tokens": totals["cache_read_tokens"],
84
+ # pi's own per-message cost.total summed, in USD — audit only.
85
+ "cost_reported_usd": u.get("cost_reported"),
86
+ "duration_ms": u.get("duration_ms"),
87
+ # Authoritative, frozen at snapshot in native currency (CNY).
88
+ "cost_list_usd": cost_list,
89
+ "cost_currency": currency,
90
+ "prices_version": getattr(mp, "VERSION", None),
91
+ }
92
+ return {
93
+ "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
94
+ "stage": "usage",
95
+ "label": cycle_id,
96
+ "detail": payload,
97
+ "outcome": "ok",
98
+ }
99
+
100
+
101
+ def _default_events_path(slug, shared):
102
+ base = shared or os.environ.get("LOOP_SHARED_ROOT") \
103
+ or os.path.expanduser("~/.shared/roll")
104
+ return os.path.join(base, "loop", "events-%s.ndjson" % slug)
105
+
106
+
107
+ def main(argv=None):
108
+ ap = argparse.ArgumentParser(description="emit one pi usage event")
109
+ ap.add_argument("--cwd", help="cycle worktree path (authoritative match)")
110
+ ap.add_argument("--cycle", help="cycle id (label + dir-name fallback)")
111
+ ap.add_argument("--slug", help="project slug (events filename)")
112
+ ap.add_argument("--shared", help="shared root (for default events path)")
113
+ ap.add_argument("--events", help="explicit events file path (preferred)")
114
+ ap.add_argument("--base-dir", help="pi sessions root override (tests)")
115
+ args = ap.parse_args(argv)
116
+
117
+ event = build_event(
118
+ cwd=args.cwd, cycle_id=args.cycle, slug=args.slug, base_dir=args.base_dir
119
+ )
120
+ if event is None:
121
+ return 0 # nothing recoverable — write nothing (n/a, not fake zero)
122
+
123
+ evfile = args.events or _default_events_path(args.slug, args.shared)
124
+ try:
125
+ os.makedirs(os.path.dirname(evfile), exist_ok=True)
126
+ with open(evfile, "a") as f:
127
+ f.write(json.dumps(event) + "\n")
128
+ except OSError as e:
129
+ print("[pi_emit] failed to write %s: %s" % (evfile, e), file=sys.stderr)
130
+ return 1
131
+ return 0
132
+
133
+
134
+ if __name__ == "__main__":
135
+ sys.exit(main())