@seanyao/roll 2026.522.2 → 2026.523.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/i18n.sh ADDED
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env bash
2
+ # Roll i18n engine — US-I18N-001.
3
+ #
4
+ # Provides:
5
+ # _i18n_resolve_lang — resolve user's language per precedence
6
+ # msg <key> [args...] — look up message catalog with EN fallback
7
+ # _i18n_set <lang> <key> <value> — fill the catalog (used by modules)
8
+ #
9
+ # Storage is bash 3.2-compatible (macOS default ships 3.2): each catalog entry
10
+ # is held in a plain variable named `MSG_<LANG>_<key>`, looked up via indirect
11
+ # reference. Bash 4 assoc arrays would be cleaner but break macOS default bash
12
+ # (see AGENTS.md §4: single bash script, no runtime).
13
+ #
14
+ # Precedence: ROLL_LANG env > ~/.roll/config.yaml `lang` > LC_ALL > LANG
15
+ # > (macOS) AppleLanguages > 'en'.
16
+ # Decision: value starting with `zh` → "zh", everything else → "en".
17
+
18
+ # Sanitize a free-form key into a variable-safe suffix. Anything that isn't a
19
+ # letter, digit, or underscore becomes an underscore so callers can use natural
20
+ # dotted keys like "loop.cycle_start" without exploding bash syntax.
21
+ _i18n_safe_key() {
22
+ echo "${1//[^A-Za-z0-9_]/_}"
23
+ }
24
+
25
+ # Fill the catalog. Modules call this at source-time:
26
+ # _i18n_set en hello "Hello, %s!"
27
+ # _i18n_set zh hello "你好,%s!"
28
+ _i18n_set() {
29
+ local lang="$1" key="$2" val="$3"
30
+ local upper safe varname
31
+ upper=$(echo "$lang" | tr '[:lower:]' '[:upper:]')
32
+ safe=$(_i18n_safe_key "$key")
33
+ varname="MSG_${upper}_${safe}"
34
+ printf -v "$varname" '%s' "$val"
35
+ export "$varname"
36
+ }
37
+
38
+ # Decide "zh" or "en" from a free-form locale string.
39
+ _i18n_classify() {
40
+ case "${1:-}" in
41
+ zh*) echo zh ;;
42
+ *) echo en ;;
43
+ esac
44
+ }
45
+
46
+ # Resolve the active language. Caches in ROLL_LANG_RESOLVED so later calls are
47
+ # free.
48
+ _i18n_resolve_lang() {
49
+ if [[ -n "${ROLL_LANG_RESOLVED:-}" ]]; then
50
+ echo "$ROLL_LANG_RESOLVED"
51
+ return
52
+ fi
53
+
54
+ local lang=""
55
+
56
+ if [[ -n "${ROLL_LANG:-}" ]]; then
57
+ lang=$(_i18n_classify "$ROLL_LANG")
58
+ fi
59
+
60
+ if [[ -z "$lang" && -n "${ROLL_CONFIG:-}" && -f "${ROLL_CONFIG}" ]]; then
61
+ local cfg
62
+ cfg=$(grep -E '^lang:' "$ROLL_CONFIG" 2>/dev/null | head -1 \
63
+ | sed 's/^lang:[[:space:]]*//' \
64
+ | sed 's/[[:space:]]*#.*$//' \
65
+ | sed 's/[[:space:]]*$//')
66
+ [[ -n "$cfg" ]] && lang=$(_i18n_classify "$cfg")
67
+ fi
68
+
69
+ if [[ -z "$lang" && -n "${LC_ALL:-}" ]]; then
70
+ lang=$(_i18n_classify "$LC_ALL")
71
+ fi
72
+
73
+ if [[ -z "$lang" && -n "${LANG:-}" ]]; then
74
+ lang=$(_i18n_classify "$LANG")
75
+ fi
76
+
77
+ if [[ -z "$lang" ]] && command -v defaults >/dev/null 2>&1; then
78
+ local apple
79
+ apple=$(defaults read -g AppleLanguages 2>/dev/null | head -2 | tail -1 \
80
+ | tr -d ' ",()' | head -1 || true)
81
+ [[ -n "$apple" ]] && lang=$(_i18n_classify "$apple")
82
+ fi
83
+
84
+ [[ -z "$lang" ]] && lang="en"
85
+
86
+ ROLL_LANG_RESOLVED="$lang"
87
+ echo "$lang"
88
+ }
89
+
90
+ # Look up message catalog entry. Falls back to EN, then to the key itself so
91
+ # missing translations stay visible without crashing the caller.
92
+ msg() {
93
+ local key="$1"; shift || true
94
+ local lang safe
95
+ lang=$(_i18n_resolve_lang)
96
+ safe=$(_i18n_safe_key "$key")
97
+
98
+ local zh_var="MSG_ZH_${safe}"
99
+ local en_var="MSG_EN_${safe}"
100
+ local tmpl=""
101
+
102
+ if [[ "$lang" == "zh" && -n "${!zh_var:-}" ]]; then
103
+ tmpl="${!zh_var}"
104
+ elif [[ -n "${!en_var:-}" ]]; then
105
+ tmpl="${!en_var}"
106
+ else
107
+ tmpl="$key"
108
+ fi
109
+
110
+ # shellcheck disable=SC2059 — template comes from our own catalog
111
+ printf "$tmpl" "$@"
112
+ echo
113
+ }
package/lib/loop-fmt.py CHANGED
@@ -344,6 +344,40 @@ class LoopFmt:
344
344
  # required env vars aren't set (e.g. running outside roll loop).
345
345
  self._emit_usage_event(ev, dur_ms, cost_usd)
346
346
 
347
+ @staticmethod
348
+ def _price_at_snapshot(model, totals):
349
+ """Resolve (cost_list_usd, prices_version) from the active price snapshot.
350
+
351
+ Returns (None, None) when model_prices isn't loadable or the snapshot
352
+ has no usable prices — callers still emit the event so token data and
353
+ duration aren't lost. When tokens are all zero, cost_list_usd is None.
354
+ """
355
+ try:
356
+ import importlib.util
357
+ lib_dir = os.path.dirname(os.path.abspath(__file__))
358
+ spec = importlib.util.spec_from_file_location(
359
+ "model_prices", os.path.join(lib_dir, "model_prices.py")
360
+ )
361
+ mp = importlib.util.module_from_spec(spec)
362
+ spec.loader.exec_module(mp)
363
+ except Exception:
364
+ return None, None
365
+ prices_version = getattr(mp, "VERSION", None)
366
+ has_tokens = any(int(totals.get(k) or 0) > 0 for k in totals)
367
+ if not has_tokens:
368
+ return None, prices_version
369
+ try:
370
+ cost = mp.compute_list_cost(
371
+ model,
372
+ input_tokens=int(totals.get("input_tokens") or 0),
373
+ output_tokens=int(totals.get("output_tokens") or 0),
374
+ cache_creation_tokens=int(totals.get("cache_creation_tokens") or 0),
375
+ cache_read_tokens=int(totals.get("cache_read_tokens") or 0),
376
+ )
377
+ except Exception:
378
+ return None, prices_version
379
+ return float(cost), prices_version
380
+
347
381
  def _emit_usage_event(self, result_ev, dur_ms, cost_usd):
348
382
  slug = os.environ.get("LOOP_PROJECT_SLUG")
349
383
  cycle = os.environ.get("LOOP_CYCLE_ID")
@@ -353,14 +387,39 @@ class LoopFmt:
353
387
  # Use the cumulative totals accumulated across all assistant turns;
354
388
  # result.usage is per-turn (last only) so it would under-count badly.
355
389
  model = result_ev.get("model") or self._last_model or ""
390
+
391
+ # FIX-099: skip writing the usage event when claude returned no real
392
+ # usage data (model empty AND cost/duration both zero). This prevents
393
+ # stale/placeholder values from leaking into the events stream and
394
+ # showing up as "cost=$1.24 dur=372s" in three consecutive cycles when
395
+ # the real cycle had no token data (the default-value fallback).
396
+ # The dashboard can render "n/a" for missing usage rather than false data.
397
+ has_model = bool(model)
398
+ has_tokens = any(self._usage_totals[k] > 0 for k in self._usage_totals)
399
+ has_cost = bool(cost_usd)
400
+ has_dur = bool(dur_ms)
401
+ if not has_model and not has_tokens and not has_cost and not has_dur:
402
+ return # nothing real to report — skip rather than persist zeros
403
+
404
+ # US-VIEW-014: freeze cost at the current snapshot's list price so a
405
+ # later prices refresh (or roll upgrade) never rewrites history. The
406
+ # dashboard reads cost_list_usd first; only legacy events without it
407
+ # fall back to recomputing and get tagged [legacy].
408
+ cost_list_usd, prices_version = self._price_at_snapshot(
409
+ model if has_model else None,
410
+ self._usage_totals,
411
+ )
412
+
356
413
  payload = {
357
- "model": model,
414
+ "model": model if has_model else None,
358
415
  "input_tokens": self._usage_totals["input_tokens"],
359
416
  "output_tokens": self._usage_totals["output_tokens"],
360
417
  "cache_creation_tokens": self._usage_totals["cache_creation_tokens"],
361
418
  "cache_read_tokens": self._usage_totals["cache_read_tokens"],
362
- "cost_reported_usd": float(cost_usd or 0),
363
- "duration_ms": int(dur_ms or 0),
419
+ "cost_reported_usd": float(cost_usd) if has_cost else None,
420
+ "duration_ms": int(dur_ms) if has_dur else None,
421
+ "cost_list_usd": cost_list_usd,
422
+ "prices_version": prices_version,
364
423
  }
365
424
  evfile = os.path.join(shared, "loop", f"events-{slug}.ndjson")
366
425
  line = json.dumps({
@@ -5,68 +5,108 @@ Pricing is per million tokens (MTok), USD. These are the public list rates;
5
5
  discounts (Pro subscription, prepay credits, etc.) are intentionally not
6
6
  modeled — IDEA-025 is about cross-account / cross-project comparable cost.
7
7
 
8
- Update this table when Anthropic changes pricing. Unknown models fall back
9
- to sonnet rates with a stderr warning so dashboards don't blank out.
8
+ US-VIEW-013: prices are no longer hardcoded here. They live in versioned
9
+ snapshot files under ``lib/prices/snapshot-YYYY-MM-DD.json`` and are loaded
10
+ at module import time. ``roll prices refresh`` produces new snapshots; this
11
+ module never writes — it only loads the latest one.
12
+
13
+ Unknown models fall back to the snapshot's ``default_model`` with a stderr
14
+ warning so dashboards don't blank out.
10
15
  """
11
16
 
17
+ import json
18
+ import os
12
19
  import sys
13
- from typing import Dict, Optional
14
-
15
- # Rates per million tokens (USD). cache_create = 5-minute cache write (1.25x
16
- # input). 1-hour cache writes (2x input) are not modeled — Roll loop uses the
17
- # default 5m caching only.
18
- # Source: https://platform.claude.com/docs/en/about-claude/pricing
19
- PRICES: Dict[str, Dict[str, float]] = {
20
- # Claude 4.x Opus family 2026-05 repricing: Opus 4.5+ moved to
21
- # $5/$25 base, 3x cheaper than Opus 4 / 4.1.
22
- "claude-opus-4-7": {"in": 5.00, "out": 25.00, "cache_create": 6.25, "cache_read": 0.50},
23
- "claude-opus-4-6": {"in": 5.00, "out": 25.00, "cache_create": 6.25, "cache_read": 0.50},
24
- "claude-opus-4-5": {"in": 5.00, "out": 25.00, "cache_create": 6.25, "cache_read": 0.50},
25
- "claude-opus-4-1": {"in": 15.00, "out": 75.00, "cache_create": 18.75, "cache_read": 1.50},
26
- "claude-opus-4": {"in": 15.00, "out": 75.00, "cache_create": 18.75, "cache_read": 1.50},
27
- # Claude 4.x Sonnet family.
28
- "claude-sonnet-4-6": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
29
- "claude-sonnet-4-5": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
30
- "claude-sonnet-4": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
31
- # Claude 4.x Haiku family.
32
- "claude-haiku-4-5": {"in": 1.00, "out": 5.00, "cache_create": 1.25, "cache_read": 0.10},
33
- # Older / retired models (Bedrock & Vertex only for 3.5 Haiku).
34
- "claude-haiku-3-5": {"in": 0.80, "out": 4.00, "cache_create": 1.00, "cache_read": 0.08},
35
- "claude-3-5-sonnet": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
36
- }
37
-
38
- DEFAULT = "claude-sonnet-4-6"
20
+ from typing import Any, Dict, List, Optional, Tuple
21
+
22
+ _LIB_DIR = os.path.dirname(os.path.abspath(__file__))
23
+ SNAPSHOT_DIR = os.path.join(_LIB_DIR, "prices")
24
+
25
+
26
+ def list_snapshots(snapshot_dir: str = SNAPSHOT_DIR) -> List[str]:
27
+ """Return absolute paths of all snapshot files, sorted oldest newest by filename."""
28
+ if not os.path.isdir(snapshot_dir):
29
+ return []
30
+ entries = [
31
+ os.path.join(snapshot_dir, name)
32
+ for name in os.listdir(snapshot_dir)
33
+ if name.startswith("snapshot-") and name.endswith(".json")
34
+ ]
35
+ return sorted(entries)
36
+
37
+
38
+ def load_snapshot(path: str) -> Dict[str, Any]:
39
+ """Load a snapshot file and validate its shape."""
40
+ with open(path, "r", encoding="utf-8") as f:
41
+ data = json.load(f)
42
+ for key in ("version", "effective_at", "source_url", "prices"):
43
+ if key not in data:
44
+ raise ValueError(f"snapshot {path!r} missing required key {key!r}")
45
+ if not isinstance(data["prices"], dict) or not data["prices"]:
46
+ raise ValueError(f"snapshot {path!r} has empty or invalid prices map")
47
+ data.setdefault("default_model", next(iter(data["prices"])))
48
+ return data
49
+
50
+
51
+ def load_latest_snapshot(snapshot_dir: str = SNAPSHOT_DIR) -> Dict[str, Any]:
52
+ """Load the newest snapshot by filename. Raises FileNotFoundError if none exist."""
53
+ snaps = list_snapshots(snapshot_dir)
54
+ if not snaps:
55
+ raise FileNotFoundError(
56
+ f"no price snapshots found in {snapshot_dir}; run `roll prices refresh`"
57
+ )
58
+ return load_snapshot(snaps[-1])
59
+
60
+
61
+ _SNAPSHOT: Dict[str, Any] = load_latest_snapshot()
62
+ PRICES: Dict[str, Dict[str, float]] = _SNAPSHOT["prices"]
63
+ DEFAULT: str = _SNAPSHOT["default_model"]
64
+ VERSION: str = _SNAPSHOT["version"]
65
+ EFFECTIVE_AT: str = _SNAPSHOT["effective_at"]
66
+ SOURCE_URL: str = _SNAPSHOT["source_url"]
67
+
39
68
  _warned: set = set()
40
69
 
41
- def _resolve(model: Optional[str]) -> Dict[str, float]:
70
+
71
+ def snapshot_meta() -> Tuple[str, str, str]:
72
+ """Return (version, effective_at, source_url) of the active snapshot."""
73
+ return VERSION, EFFECTIVE_AT, SOURCE_URL
74
+
75
+
76
+ def _resolve(model: Optional[str], prices: Optional[Dict[str, Dict[str, float]]] = None,
77
+ default: Optional[str] = None) -> Dict[str, float]:
78
+ table = prices if prices is not None else PRICES
79
+ fallback = default if default is not None else DEFAULT
42
80
  if not model:
43
- return PRICES[DEFAULT]
44
- # Strip date suffixes like '-20251001' or '[1m]' context tags.
81
+ return table[fallback]
45
82
  base = model.split("[")[0].rstrip("0123456789-")
46
- # Try a prefix match against the table; longest match wins.
47
- candidates = [k for k in PRICES if model.startswith(k) or base.startswith(k)]
83
+ candidates = [k for k in table if model.startswith(k) or base.startswith(k)]
48
84
  if candidates:
49
- return PRICES[max(candidates, key=len)]
85
+ return table[max(candidates, key=len)]
50
86
  if model not in _warned:
51
87
  _warned.add(model)
52
- print(f"[model_prices] warn: unknown model {model!r}, falling back to {DEFAULT}",
88
+ print(f"[model_prices] warn: unknown model {model!r}, falling back to {fallback}",
53
89
  file=sys.stderr)
54
- return PRICES[DEFAULT]
90
+ return table[fallback]
91
+
55
92
 
56
93
  def compute_list_cost(model: Optional[str],
57
94
  *,
58
95
  input_tokens: int = 0,
59
96
  output_tokens: int = 0,
60
97
  cache_creation_tokens: int = 0,
61
- cache_read_tokens: int = 0) -> float:
98
+ cache_read_tokens: int = 0,
99
+ prices: Optional[Dict[str, Dict[str, float]]] = None,
100
+ default: Optional[str] = None) -> float:
62
101
  """Return USD cost at list price for one cycle's token usage."""
63
- p = _resolve(model)
102
+ p = _resolve(model, prices=prices, default=default)
64
103
  total = (input_tokens * p["in"]
65
104
  + output_tokens * p["out"]
66
105
  + cache_creation_tokens * p["cache_create"]
67
106
  + cache_read_tokens * p["cache_read"]) / 1_000_000
68
107
  return round(total, 4)
69
108
 
109
+
70
110
  def total_tokens(*,
71
111
  input_tokens: int = 0,
72
112
  output_tokens: int = 0,
@@ -0,0 +1,20 @@
1
+ {
2
+ "version": "2026-05-22",
3
+ "effective_at": "2026-05-22",
4
+ "source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
5
+ "default_model": "claude-sonnet-4-6",
6
+ "notes": "Rates per million tokens (USD). cache_create = 5-minute cache write (1.25x input). 1-hour cache writes (2x input) are not modeled — Roll loop uses the default 5m caching only. 2026-05 repricing: Opus 4.5+ moved to $5/$25 base (3x cheaper than Opus 4/4.1).",
7
+ "prices": {
8
+ "claude-opus-4-7": {"in": 5.00, "out": 25.00, "cache_create": 6.25, "cache_read": 0.50},
9
+ "claude-opus-4-6": {"in": 5.00, "out": 25.00, "cache_create": 6.25, "cache_read": 0.50},
10
+ "claude-opus-4-5": {"in": 5.00, "out": 25.00, "cache_create": 6.25, "cache_read": 0.50},
11
+ "claude-opus-4-1": {"in": 15.00, "out": 75.00, "cache_create": 18.75, "cache_read": 1.50},
12
+ "claude-opus-4": {"in": 15.00, "out": 75.00, "cache_create": 18.75, "cache_read": 1.50},
13
+ "claude-sonnet-4-6": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
14
+ "claude-sonnet-4-5": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
15
+ "claude-sonnet-4": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
16
+ "claude-haiku-4-5": {"in": 1.00, "out": 5.00, "cache_create": 1.25, "cache_read": 0.10},
17
+ "claude-haiku-3-5": {"in": 0.80, "out": 4.00, "cache_create": 1.00, "cache_read": 0.08},
18
+ "claude-3-5-sonnet": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30}
19
+ }
20
+ }
@@ -0,0 +1,285 @@
1
+ """
2
+ prices_fetcher — fetch + parse + diff + write Claude API pricing snapshots.
3
+
4
+ US-VIEW-013: replaces the hardcoded PRICES table in ``model_prices.py`` with
5
+ versioned JSON snapshots under ``lib/prices/``. The fetcher pulls the live
6
+ pricing docs page, extracts the model rate rows, and writes a new snapshot
7
+ only when the rates differ from the most recent one on disk.
8
+
9
+ Design:
10
+ * ``fetch_pricing_html(url, timeout)`` — pure I/O, raises ``FetchError``
11
+ * ``parse_pricing_html(html)`` — pure parser, raises ``ParseError``
12
+ * ``diff_prices(old, new)`` — pure diff, returns list of changes
13
+ * ``write_snapshot(prices, ...)`` — pure I/O, returns the path written
14
+ * ``refresh(...)`` — orchestrator; the only function with side effects on
15
+ both network and disk
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import datetime as _dt
21
+ import json
22
+ import os
23
+ import re
24
+ import sys
25
+ from html.parser import HTMLParser
26
+ from typing import Any, Dict, List, Optional, Tuple
27
+ from urllib.error import URLError
28
+ from urllib.request import Request, urlopen
29
+
30
+ DEFAULT_SOURCE_URL = "https://platform.claude.com/docs/en/about-claude/pricing"
31
+ DEFAULT_TIMEOUT = 15
32
+
33
+ _MODEL_RE = re.compile(r"claude-(?:opus|sonnet|haiku)-[0-9](?:-[0-9])?")
34
+ _DOLLAR_RE = re.compile(r"\$\s*([0-9]+(?:\.[0-9]+)?)")
35
+
36
+
37
+ class FetchError(RuntimeError):
38
+ """Raised when fetching the pricing page fails."""
39
+
40
+
41
+ class ParseError(ValueError):
42
+ """Raised when the pricing HTML cannot be parsed into a prices map."""
43
+
44
+
45
+ def fetch_pricing_html(url: str = DEFAULT_SOURCE_URL,
46
+ timeout: float = DEFAULT_TIMEOUT) -> str:
47
+ """Fetch the pricing docs page and return its raw HTML."""
48
+ req = Request(url, headers={"User-Agent": "roll/prices_fetcher"})
49
+ try:
50
+ with urlopen(req, timeout=timeout) as resp:
51
+ data = resp.read()
52
+ charset = resp.headers.get_content_charset() or "utf-8"
53
+ return data.decode(charset, errors="replace")
54
+ except (URLError, OSError, TimeoutError) as exc:
55
+ raise FetchError(f"could not fetch {url}: {exc}") from exc
56
+
57
+
58
+ class _TableTextExtractor(HTMLParser):
59
+ """Walk an HTML document and yield <tr> cell-text lists per row."""
60
+
61
+ def __init__(self) -> None:
62
+ super().__init__()
63
+ self.rows: List[List[str]] = []
64
+ self._in_row = False
65
+ self._in_cell = False
66
+ self._cells: List[str] = []
67
+ self._cur: List[str] = []
68
+
69
+ def handle_starttag(self, tag: str, attrs): # noqa: ANN001
70
+ if tag == "tr":
71
+ self._in_row = True
72
+ self._cells = []
73
+ elif tag in ("td", "th") and self._in_row:
74
+ self._in_cell = True
75
+ self._cur = []
76
+
77
+ def handle_endtag(self, tag: str) -> None:
78
+ if tag in ("td", "th") and self._in_cell:
79
+ self._cells.append(" ".join(self._cur).strip())
80
+ self._in_cell = False
81
+ elif tag == "tr" and self._in_row:
82
+ if self._cells:
83
+ self.rows.append(self._cells)
84
+ self._in_row = False
85
+
86
+ def handle_data(self, data: str) -> None:
87
+ if self._in_cell:
88
+ self._cur.append(data)
89
+
90
+
91
+ def parse_pricing_html(html: str) -> Dict[str, Dict[str, float]]:
92
+ """Parse pricing docs HTML into a {model: rates} map.
93
+
94
+ The parser is intentionally tolerant: it scans every table row, looks for
95
+ one ``claude-*`` model identifier and four dollar amounts on that row, and
96
+ treats them as ``in / cache_create / cache_read / out`` in the order they
97
+ appear. (Anthropic's table renders columns in that order.)
98
+ """
99
+ parser = _TableTextExtractor()
100
+ parser.feed(html)
101
+
102
+ prices: Dict[str, Dict[str, float]] = {}
103
+ for row in parser.rows:
104
+ text = " ".join(row)
105
+ model_match = _MODEL_RE.search(text)
106
+ if not model_match:
107
+ continue
108
+ model = model_match.group(0)
109
+ amounts = [float(m.group(1)) for m in _DOLLAR_RE.finditer(text)]
110
+ if len(amounts) < 4:
111
+ continue
112
+ in_rate, cache_create, cache_read, out_rate = amounts[:4]
113
+ prices[model] = {
114
+ "in": in_rate,
115
+ "out": out_rate,
116
+ "cache_create": cache_create,
117
+ "cache_read": cache_read,
118
+ }
119
+
120
+ if not prices:
121
+ raise ParseError("no price rows found in HTML; page layout may have changed")
122
+ return prices
123
+
124
+
125
+ def diff_prices(old: Dict[str, Dict[str, float]],
126
+ new: Dict[str, Dict[str, float]]
127
+ ) -> List[Tuple[str, str, str, Optional[float], Optional[float]]]:
128
+ """Return a list of (kind, model, field, old_val, new_val) tuples.
129
+
130
+ kind is one of: ``added``, ``removed``, ``changed``. For added rows the
131
+ old_val is None; for removed, the new_val is None.
132
+ """
133
+ changes: List[Tuple[str, str, str, Optional[float], Optional[float]]] = []
134
+ for model in sorted(set(old) | set(new)):
135
+ if model not in old:
136
+ for field, val in new[model].items():
137
+ changes.append(("added", model, field, None, val))
138
+ continue
139
+ if model not in new:
140
+ for field, val in old[model].items():
141
+ changes.append(("removed", model, field, val, None))
142
+ continue
143
+ for field in sorted(set(old[model]) | set(new[model])):
144
+ old_val = old[model].get(field)
145
+ new_val = new[model].get(field)
146
+ if old_val != new_val:
147
+ changes.append(("changed", model, field, old_val, new_val))
148
+ return changes
149
+
150
+
151
+ def format_diff(changes: List[Tuple[str, str, str, Optional[float], Optional[float]]],
152
+ colored: bool = True) -> str:
153
+ """Render diff_prices output as red-/green-coded lines."""
154
+ if not changes:
155
+ return ""
156
+ red = "\033[31m" if colored else ""
157
+ green = "\033[32m" if colored else ""
158
+ dim = "\033[2m" if colored else ""
159
+ reset = "\033[0m" if colored else ""
160
+ lines: List[str] = []
161
+ for kind, model, field, old, new in changes:
162
+ if kind == "added":
163
+ lines.append(f"{green}+ {model} {field} = {new}{reset}")
164
+ elif kind == "removed":
165
+ lines.append(f"{red}- {model} {field} = {old}{reset}")
166
+ else:
167
+ lines.append(f"{dim}~ {model} {field}{reset} {red}{old}{reset} → {green}{new}{reset}")
168
+ return "\n".join(lines)
169
+
170
+
171
+ def write_snapshot(prices: Dict[str, Dict[str, float]],
172
+ *,
173
+ snapshot_dir: str,
174
+ source_url: str = DEFAULT_SOURCE_URL,
175
+ effective_at: Optional[str] = None,
176
+ default_model: Optional[str] = None,
177
+ notes: Optional[str] = None) -> str:
178
+ """Write a new snapshot JSON and return its path."""
179
+ os.makedirs(snapshot_dir, exist_ok=True)
180
+ today = effective_at or _dt.date.today().isoformat()
181
+ payload: Dict[str, Any] = {
182
+ "version": today,
183
+ "effective_at": today,
184
+ "source_url": source_url,
185
+ "default_model": default_model or _pick_default(prices),
186
+ "prices": prices,
187
+ }
188
+ if notes:
189
+ payload["notes"] = notes
190
+ dest = os.path.join(snapshot_dir, f"snapshot-{today}.json")
191
+ with open(dest, "w", encoding="utf-8") as f:
192
+ json.dump(payload, f, indent=2, sort_keys=False)
193
+ f.write("\n")
194
+ return dest
195
+
196
+
197
+ def _pick_default(prices: Dict[str, Dict[str, float]]) -> str:
198
+ """Pick a sensible fallback model: prefer the cheapest sonnet, else first key."""
199
+ for k in prices:
200
+ if "sonnet" in k:
201
+ return k
202
+ return next(iter(prices))
203
+
204
+
205
+ def refresh(*,
206
+ snapshot_dir: str,
207
+ url: str = DEFAULT_SOURCE_URL,
208
+ timeout: float = DEFAULT_TIMEOUT,
209
+ html: Optional[str] = None,
210
+ ) -> Tuple[str, List[Tuple[str, str, str, Optional[float], Optional[float]]]]:
211
+ """Fetch (or accept fixture HTML), parse, diff against latest snapshot, write.
212
+
213
+ Returns (action, changes) where action is one of:
214
+ ``"unchanged"`` — no diff vs latest snapshot, nothing written
215
+ ``"written:<path>"`` — new snapshot written at <path>
216
+ ``"first:<path>"`` — no prior snapshot existed; baseline written
217
+ """
218
+ if html is None:
219
+ html = fetch_pricing_html(url, timeout=timeout)
220
+ new_prices = parse_pricing_html(html)
221
+
222
+ # Load latest if any
223
+ latest = _latest_snapshot_path(snapshot_dir)
224
+ if latest is None:
225
+ dest = write_snapshot(new_prices, snapshot_dir=snapshot_dir, source_url=url)
226
+ return f"first:{dest}", diff_prices({}, new_prices)
227
+
228
+ with open(latest, "r", encoding="utf-8") as f:
229
+ old = json.load(f).get("prices", {})
230
+ changes = diff_prices(old, new_prices)
231
+ if not changes:
232
+ return "unchanged", []
233
+ dest = write_snapshot(new_prices, snapshot_dir=snapshot_dir, source_url=url)
234
+ return f"written:{dest}", changes
235
+
236
+
237
+ def _latest_snapshot_path(snapshot_dir: str) -> Optional[str]:
238
+ if not os.path.isdir(snapshot_dir):
239
+ return None
240
+ snaps = sorted(
241
+ os.path.join(snapshot_dir, n)
242
+ for n in os.listdir(snapshot_dir)
243
+ if n.startswith("snapshot-") and n.endswith(".json")
244
+ )
245
+ return snaps[-1] if snaps else None
246
+
247
+
248
+ # CLI entry — `python3 lib/prices_fetcher.py refresh|show` is the fallback when
249
+ # bin/roll is unavailable (e.g. running tests directly).
250
+ def _main(argv: List[str]) -> int:
251
+ snapshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prices")
252
+ if not argv or argv[0] in ("-h", "--help", "help"):
253
+ print("usage: prices_fetcher.py refresh|show [--url URL]")
254
+ return 0
255
+ cmd = argv[0]
256
+ url = DEFAULT_SOURCE_URL
257
+ if "--url" in argv:
258
+ url = argv[argv.index("--url") + 1]
259
+ if cmd == "show":
260
+ latest = _latest_snapshot_path(snapshot_dir)
261
+ if not latest:
262
+ print("no snapshot found", file=sys.stderr)
263
+ return 1
264
+ with open(latest) as f:
265
+ print(f.read())
266
+ return 0
267
+ if cmd == "refresh":
268
+ try:
269
+ action, changes = refresh(snapshot_dir=snapshot_dir, url=url)
270
+ except FetchError as exc:
271
+ print(f"fetch failed: {exc}", file=sys.stderr)
272
+ return 2
273
+ except ParseError as exc:
274
+ print(f"parse failed: {exc}", file=sys.stderr)
275
+ return 3
276
+ print(action)
277
+ if changes:
278
+ print(format_diff(changes, colored=sys.stdout.isatty()))
279
+ return 0
280
+ print(f"unknown command: {cmd}", file=sys.stderr)
281
+ return 1
282
+
283
+
284
+ if __name__ == "__main__": # pragma: no cover
285
+ sys.exit(_main(sys.argv[1:]))