@seanyao/roll 2026.523.1 → 2026.523.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/bin/roll +418 -14
- package/lib/__pycache__/model_prices.cpython-314.pyc +0 -0
- package/lib/__pycache__/prices_fetcher.cpython-314.pyc +0 -0
- package/lib/__pycache__/roll-loop-status.cpython-314.pyc +0 -0
- package/lib/__pycache__/roll_render.cpython-314.pyc +0 -0
- package/lib/i18n.sh +113 -0
- package/lib/loop-fmt.py +45 -0
- package/lib/model_prices.py +78 -38
- package/lib/prices/snapshot-2026-05-22.json +20 -0
- package/lib/prices_fetcher.py +285 -0
- package/lib/roll-loop-status.py +43 -19
- package/lib/roll_render.py +6 -1
- package/package.json +1 -1
package/lib/i18n.sh
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Roll i18n engine — US-I18N-001.
|
|
3
|
+
#
|
|
4
|
+
# Provides:
|
|
5
|
+
# _i18n_resolve_lang — resolve user's language per precedence
|
|
6
|
+
# msg <key> [args...] — look up message catalog with EN fallback
|
|
7
|
+
# _i18n_set <lang> <key> <value> — fill the catalog (used by modules)
|
|
8
|
+
#
|
|
9
|
+
# Storage is bash 3.2-compatible (macOS default ships 3.2): each catalog entry
|
|
10
|
+
# is held in a plain variable named `MSG_<LANG>_<key>`, looked up via indirect
|
|
11
|
+
# reference. Bash 4 assoc arrays would be cleaner but break macOS default bash
|
|
12
|
+
# (see AGENTS.md §4: single bash script, no runtime).
|
|
13
|
+
#
|
|
14
|
+
# Precedence: ROLL_LANG env > ~/.roll/config.yaml `lang` > LC_ALL > LANG
|
|
15
|
+
# > (macOS) AppleLanguages > 'en'.
|
|
16
|
+
# Decision: value starting with `zh` → "zh", everything else → "en".
|
|
17
|
+
|
|
18
|
+
# Sanitize a free-form key into a variable-safe suffix. Anything that isn't a
|
|
19
|
+
# letter, digit, or underscore becomes an underscore so callers can use natural
|
|
20
|
+
# dotted keys like "loop.cycle_start" without exploding bash syntax.
|
|
21
|
+
_i18n_safe_key() {
|
|
22
|
+
echo "${1//[^A-Za-z0-9_]/_}"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# Fill the catalog. Modules call this at source-time:
|
|
26
|
+
# _i18n_set en hello "Hello, %s!"
|
|
27
|
+
# _i18n_set zh hello "你好,%s!"
|
|
28
|
+
_i18n_set() {
|
|
29
|
+
local lang="$1" key="$2" val="$3"
|
|
30
|
+
local upper safe varname
|
|
31
|
+
upper=$(echo "$lang" | tr '[:lower:]' '[:upper:]')
|
|
32
|
+
safe=$(_i18n_safe_key "$key")
|
|
33
|
+
varname="MSG_${upper}_${safe}"
|
|
34
|
+
printf -v "$varname" '%s' "$val"
|
|
35
|
+
export "$varname"
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Decide "zh" or "en" from a free-form locale string.
|
|
39
|
+
_i18n_classify() {
|
|
40
|
+
case "${1:-}" in
|
|
41
|
+
zh*) echo zh ;;
|
|
42
|
+
*) echo en ;;
|
|
43
|
+
esac
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Resolve the active language. Caches in ROLL_LANG_RESOLVED so later calls are
|
|
47
|
+
# free.
|
|
48
|
+
_i18n_resolve_lang() {
|
|
49
|
+
if [[ -n "${ROLL_LANG_RESOLVED:-}" ]]; then
|
|
50
|
+
echo "$ROLL_LANG_RESOLVED"
|
|
51
|
+
return
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
local lang=""
|
|
55
|
+
|
|
56
|
+
if [[ -n "${ROLL_LANG:-}" ]]; then
|
|
57
|
+
lang=$(_i18n_classify "$ROLL_LANG")
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
if [[ -z "$lang" && -n "${ROLL_CONFIG:-}" && -f "${ROLL_CONFIG}" ]]; then
|
|
61
|
+
local cfg
|
|
62
|
+
cfg=$(grep -E '^lang:' "$ROLL_CONFIG" 2>/dev/null | head -1 \
|
|
63
|
+
| sed 's/^lang:[[:space:]]*//' \
|
|
64
|
+
| sed 's/[[:space:]]*#.*$//' \
|
|
65
|
+
| sed 's/[[:space:]]*$//')
|
|
66
|
+
[[ -n "$cfg" ]] && lang=$(_i18n_classify "$cfg")
|
|
67
|
+
fi
|
|
68
|
+
|
|
69
|
+
if [[ -z "$lang" && -n "${LC_ALL:-}" ]]; then
|
|
70
|
+
lang=$(_i18n_classify "$LC_ALL")
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
if [[ -z "$lang" && -n "${LANG:-}" ]]; then
|
|
74
|
+
lang=$(_i18n_classify "$LANG")
|
|
75
|
+
fi
|
|
76
|
+
|
|
77
|
+
if [[ -z "$lang" ]] && command -v defaults >/dev/null 2>&1; then
|
|
78
|
+
local apple
|
|
79
|
+
apple=$(defaults read -g AppleLanguages 2>/dev/null | head -2 | tail -1 \
|
|
80
|
+
| tr -d ' ",()' | head -1 || true)
|
|
81
|
+
[[ -n "$apple" ]] && lang=$(_i18n_classify "$apple")
|
|
82
|
+
fi
|
|
83
|
+
|
|
84
|
+
[[ -z "$lang" ]] && lang="en"
|
|
85
|
+
|
|
86
|
+
ROLL_LANG_RESOLVED="$lang"
|
|
87
|
+
echo "$lang"
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# Look up message catalog entry. Falls back to EN, then to the key itself so
|
|
91
|
+
# missing translations stay visible without crashing the caller.
|
|
92
|
+
msg() {
|
|
93
|
+
local key="$1"; shift || true
|
|
94
|
+
local lang safe
|
|
95
|
+
lang=$(_i18n_resolve_lang)
|
|
96
|
+
safe=$(_i18n_safe_key "$key")
|
|
97
|
+
|
|
98
|
+
local zh_var="MSG_ZH_${safe}"
|
|
99
|
+
local en_var="MSG_EN_${safe}"
|
|
100
|
+
local tmpl=""
|
|
101
|
+
|
|
102
|
+
if [[ "$lang" == "zh" && -n "${!zh_var:-}" ]]; then
|
|
103
|
+
tmpl="${!zh_var}"
|
|
104
|
+
elif [[ -n "${!en_var:-}" ]]; then
|
|
105
|
+
tmpl="${!en_var}"
|
|
106
|
+
else
|
|
107
|
+
tmpl="$key"
|
|
108
|
+
fi
|
|
109
|
+
|
|
110
|
+
# shellcheck disable=SC2059 — template comes from our own catalog
|
|
111
|
+
printf "$tmpl" "$@"
|
|
112
|
+
echo
|
|
113
|
+
}
|
package/lib/loop-fmt.py
CHANGED
|
@@ -344,6 +344,40 @@ class LoopFmt:
|
|
|
344
344
|
# required env vars aren't set (e.g. running outside roll loop).
|
|
345
345
|
self._emit_usage_event(ev, dur_ms, cost_usd)
|
|
346
346
|
|
|
347
|
+
@staticmethod
|
|
348
|
+
def _price_at_snapshot(model, totals):
|
|
349
|
+
"""Resolve (cost_list_usd, prices_version) from the active price snapshot.
|
|
350
|
+
|
|
351
|
+
Returns (None, None) when model_prices isn't loadable or the snapshot
|
|
352
|
+
has no usable prices — callers still emit the event so token data and
|
|
353
|
+
duration aren't lost. When tokens are all zero, cost_list_usd is None.
|
|
354
|
+
"""
|
|
355
|
+
try:
|
|
356
|
+
import importlib.util
|
|
357
|
+
lib_dir = os.path.dirname(os.path.abspath(__file__))
|
|
358
|
+
spec = importlib.util.spec_from_file_location(
|
|
359
|
+
"model_prices", os.path.join(lib_dir, "model_prices.py")
|
|
360
|
+
)
|
|
361
|
+
mp = importlib.util.module_from_spec(spec)
|
|
362
|
+
spec.loader.exec_module(mp)
|
|
363
|
+
except Exception:
|
|
364
|
+
return None, None
|
|
365
|
+
prices_version = getattr(mp, "VERSION", None)
|
|
366
|
+
has_tokens = any(int(totals.get(k) or 0) > 0 for k in totals)
|
|
367
|
+
if not has_tokens:
|
|
368
|
+
return None, prices_version
|
|
369
|
+
try:
|
|
370
|
+
cost = mp.compute_list_cost(
|
|
371
|
+
model,
|
|
372
|
+
input_tokens=int(totals.get("input_tokens") or 0),
|
|
373
|
+
output_tokens=int(totals.get("output_tokens") or 0),
|
|
374
|
+
cache_creation_tokens=int(totals.get("cache_creation_tokens") or 0),
|
|
375
|
+
cache_read_tokens=int(totals.get("cache_read_tokens") or 0),
|
|
376
|
+
)
|
|
377
|
+
except Exception:
|
|
378
|
+
return None, prices_version
|
|
379
|
+
return float(cost), prices_version
|
|
380
|
+
|
|
347
381
|
def _emit_usage_event(self, result_ev, dur_ms, cost_usd):
|
|
348
382
|
slug = os.environ.get("LOOP_PROJECT_SLUG")
|
|
349
383
|
cycle = os.environ.get("LOOP_CYCLE_ID")
|
|
@@ -367,6 +401,15 @@ class LoopFmt:
|
|
|
367
401
|
if not has_model and not has_tokens and not has_cost and not has_dur:
|
|
368
402
|
return # nothing real to report — skip rather than persist zeros
|
|
369
403
|
|
|
404
|
+
# US-VIEW-014: freeze cost at the current snapshot's list price so a
|
|
405
|
+
# later prices refresh (or roll upgrade) never rewrites history. The
|
|
406
|
+
# dashboard reads cost_list_usd first; only legacy events without it
|
|
407
|
+
# fall back to recomputing and get tagged [legacy].
|
|
408
|
+
cost_list_usd, prices_version = self._price_at_snapshot(
|
|
409
|
+
model if has_model else None,
|
|
410
|
+
self._usage_totals,
|
|
411
|
+
)
|
|
412
|
+
|
|
370
413
|
payload = {
|
|
371
414
|
"model": model if has_model else None,
|
|
372
415
|
"input_tokens": self._usage_totals["input_tokens"],
|
|
@@ -375,6 +418,8 @@ class LoopFmt:
|
|
|
375
418
|
"cache_read_tokens": self._usage_totals["cache_read_tokens"],
|
|
376
419
|
"cost_reported_usd": float(cost_usd) if has_cost else None,
|
|
377
420
|
"duration_ms": int(dur_ms) if has_dur else None,
|
|
421
|
+
"cost_list_usd": cost_list_usd,
|
|
422
|
+
"prices_version": prices_version,
|
|
378
423
|
}
|
|
379
424
|
evfile = os.path.join(shared, "loop", f"events-{slug}.ndjson")
|
|
380
425
|
line = json.dumps({
|
package/lib/model_prices.py
CHANGED
|
@@ -5,68 +5,108 @@ Pricing is per million tokens (MTok), USD. These are the public list rates;
|
|
|
5
5
|
discounts (Pro subscription, prepay credits, etc.) are intentionally not
|
|
6
6
|
modeled — IDEA-025 is about cross-account / cross-project comparable cost.
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
US-VIEW-013: prices are no longer hardcoded here. They live in versioned
|
|
9
|
+
snapshot files under ``lib/prices/snapshot-YYYY-MM-DD.json`` and are loaded
|
|
10
|
+
at module import time. ``roll prices refresh`` produces new snapshots; this
|
|
11
|
+
module never writes — it only loads the latest one.
|
|
12
|
+
|
|
13
|
+
Unknown models fall back to the snapshot's ``default_model`` with a stderr
|
|
14
|
+
warning so dashboards don't blank out.
|
|
10
15
|
"""
|
|
11
16
|
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
12
19
|
import sys
|
|
13
|
-
from typing import Dict, Optional
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
20
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
21
|
+
|
|
22
|
+
_LIB_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
23
|
+
SNAPSHOT_DIR = os.path.join(_LIB_DIR, "prices")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def list_snapshots(snapshot_dir: str = SNAPSHOT_DIR) -> List[str]:
|
|
27
|
+
"""Return absolute paths of all snapshot files, sorted oldest → newest by filename."""
|
|
28
|
+
if not os.path.isdir(snapshot_dir):
|
|
29
|
+
return []
|
|
30
|
+
entries = [
|
|
31
|
+
os.path.join(snapshot_dir, name)
|
|
32
|
+
for name in os.listdir(snapshot_dir)
|
|
33
|
+
if name.startswith("snapshot-") and name.endswith(".json")
|
|
34
|
+
]
|
|
35
|
+
return sorted(entries)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load_snapshot(path: str) -> Dict[str, Any]:
|
|
39
|
+
"""Load a snapshot file and validate its shape."""
|
|
40
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
41
|
+
data = json.load(f)
|
|
42
|
+
for key in ("version", "effective_at", "source_url", "prices"):
|
|
43
|
+
if key not in data:
|
|
44
|
+
raise ValueError(f"snapshot {path!r} missing required key {key!r}")
|
|
45
|
+
if not isinstance(data["prices"], dict) or not data["prices"]:
|
|
46
|
+
raise ValueError(f"snapshot {path!r} has empty or invalid prices map")
|
|
47
|
+
data.setdefault("default_model", next(iter(data["prices"])))
|
|
48
|
+
return data
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def load_latest_snapshot(snapshot_dir: str = SNAPSHOT_DIR) -> Dict[str, Any]:
|
|
52
|
+
"""Load the newest snapshot by filename. Raises FileNotFoundError if none exist."""
|
|
53
|
+
snaps = list_snapshots(snapshot_dir)
|
|
54
|
+
if not snaps:
|
|
55
|
+
raise FileNotFoundError(
|
|
56
|
+
f"no price snapshots found in {snapshot_dir}; run `roll prices refresh`"
|
|
57
|
+
)
|
|
58
|
+
return load_snapshot(snaps[-1])
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
_SNAPSHOT: Dict[str, Any] = load_latest_snapshot()
|
|
62
|
+
PRICES: Dict[str, Dict[str, float]] = _SNAPSHOT["prices"]
|
|
63
|
+
DEFAULT: str = _SNAPSHOT["default_model"]
|
|
64
|
+
VERSION: str = _SNAPSHOT["version"]
|
|
65
|
+
EFFECTIVE_AT: str = _SNAPSHOT["effective_at"]
|
|
66
|
+
SOURCE_URL: str = _SNAPSHOT["source_url"]
|
|
67
|
+
|
|
39
68
|
_warned: set = set()
|
|
40
69
|
|
|
41
|
-
|
|
70
|
+
|
|
71
|
+
def snapshot_meta() -> Tuple[str, str, str]:
|
|
72
|
+
"""Return (version, effective_at, source_url) of the active snapshot."""
|
|
73
|
+
return VERSION, EFFECTIVE_AT, SOURCE_URL
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _resolve(model: Optional[str], prices: Optional[Dict[str, Dict[str, float]]] = None,
|
|
77
|
+
default: Optional[str] = None) -> Dict[str, float]:
|
|
78
|
+
table = prices if prices is not None else PRICES
|
|
79
|
+
fallback = default if default is not None else DEFAULT
|
|
42
80
|
if not model:
|
|
43
|
-
return
|
|
44
|
-
# Strip date suffixes like '-20251001' or '[1m]' context tags.
|
|
81
|
+
return table[fallback]
|
|
45
82
|
base = model.split("[")[0].rstrip("0123456789-")
|
|
46
|
-
|
|
47
|
-
candidates = [k for k in PRICES if model.startswith(k) or base.startswith(k)]
|
|
83
|
+
candidates = [k for k in table if model.startswith(k) or base.startswith(k)]
|
|
48
84
|
if candidates:
|
|
49
|
-
return
|
|
85
|
+
return table[max(candidates, key=len)]
|
|
50
86
|
if model not in _warned:
|
|
51
87
|
_warned.add(model)
|
|
52
|
-
print(f"[model_prices] warn: unknown model {model!r}, falling back to {
|
|
88
|
+
print(f"[model_prices] warn: unknown model {model!r}, falling back to {fallback}",
|
|
53
89
|
file=sys.stderr)
|
|
54
|
-
return
|
|
90
|
+
return table[fallback]
|
|
91
|
+
|
|
55
92
|
|
|
56
93
|
def compute_list_cost(model: Optional[str],
|
|
57
94
|
*,
|
|
58
95
|
input_tokens: int = 0,
|
|
59
96
|
output_tokens: int = 0,
|
|
60
97
|
cache_creation_tokens: int = 0,
|
|
61
|
-
cache_read_tokens: int = 0
|
|
98
|
+
cache_read_tokens: int = 0,
|
|
99
|
+
prices: Optional[Dict[str, Dict[str, float]]] = None,
|
|
100
|
+
default: Optional[str] = None) -> float:
|
|
62
101
|
"""Return USD cost at list price for one cycle's token usage."""
|
|
63
|
-
p = _resolve(model)
|
|
102
|
+
p = _resolve(model, prices=prices, default=default)
|
|
64
103
|
total = (input_tokens * p["in"]
|
|
65
104
|
+ output_tokens * p["out"]
|
|
66
105
|
+ cache_creation_tokens * p["cache_create"]
|
|
67
106
|
+ cache_read_tokens * p["cache_read"]) / 1_000_000
|
|
68
107
|
return round(total, 4)
|
|
69
108
|
|
|
109
|
+
|
|
70
110
|
def total_tokens(*,
|
|
71
111
|
input_tokens: int = 0,
|
|
72
112
|
output_tokens: int = 0,
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "2026-05-22",
|
|
3
|
+
"effective_at": "2026-05-22",
|
|
4
|
+
"source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
|
|
5
|
+
"default_model": "claude-sonnet-4-6",
|
|
6
|
+
"notes": "Rates per million tokens (USD). cache_create = 5-minute cache write (1.25x input). 1-hour cache writes (2x input) are not modeled — Roll loop uses the default 5m caching only. 2026-05 repricing: Opus 4.5+ moved to $5/$25 base (3x cheaper than Opus 4/4.1).",
|
|
7
|
+
"prices": {
|
|
8
|
+
"claude-opus-4-7": {"in": 5.00, "out": 25.00, "cache_create": 6.25, "cache_read": 0.50},
|
|
9
|
+
"claude-opus-4-6": {"in": 5.00, "out": 25.00, "cache_create": 6.25, "cache_read": 0.50},
|
|
10
|
+
"claude-opus-4-5": {"in": 5.00, "out": 25.00, "cache_create": 6.25, "cache_read": 0.50},
|
|
11
|
+
"claude-opus-4-1": {"in": 15.00, "out": 75.00, "cache_create": 18.75, "cache_read": 1.50},
|
|
12
|
+
"claude-opus-4": {"in": 15.00, "out": 75.00, "cache_create": 18.75, "cache_read": 1.50},
|
|
13
|
+
"claude-sonnet-4-6": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
|
|
14
|
+
"claude-sonnet-4-5": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
|
|
15
|
+
"claude-sonnet-4": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30},
|
|
16
|
+
"claude-haiku-4-5": {"in": 1.00, "out": 5.00, "cache_create": 1.25, "cache_read": 0.10},
|
|
17
|
+
"claude-haiku-3-5": {"in": 0.80, "out": 4.00, "cache_create": 1.00, "cache_read": 0.08},
|
|
18
|
+
"claude-3-5-sonnet": {"in": 3.00, "out": 15.00, "cache_create": 3.75, "cache_read": 0.30}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""
|
|
2
|
+
prices_fetcher — fetch + parse + diff + write Claude API pricing snapshots.
|
|
3
|
+
|
|
4
|
+
US-VIEW-013: replaces the hardcoded PRICES table in ``model_prices.py`` with
|
|
5
|
+
versioned JSON snapshots under ``lib/prices/``. The fetcher pulls the live
|
|
6
|
+
pricing docs page, extracts the model rate rows, and writes a new snapshot
|
|
7
|
+
only when the rates differ from the most recent one on disk.
|
|
8
|
+
|
|
9
|
+
Design:
|
|
10
|
+
* ``fetch_pricing_html(url, timeout)`` — pure I/O, raises ``FetchError``
|
|
11
|
+
* ``parse_pricing_html(html)`` — pure parser, raises ``ParseError``
|
|
12
|
+
* ``diff_prices(old, new)`` — pure diff, returns list of changes
|
|
13
|
+
* ``write_snapshot(prices, ...)`` — pure I/O, returns the path written
|
|
14
|
+
* ``refresh(...)`` — orchestrator; the only function with side effects on
|
|
15
|
+
both network and disk
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import datetime as _dt
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
import re
|
|
24
|
+
import sys
|
|
25
|
+
from html.parser import HTMLParser
|
|
26
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
27
|
+
from urllib.error import URLError
|
|
28
|
+
from urllib.request import Request, urlopen
|
|
29
|
+
|
|
30
|
+
DEFAULT_SOURCE_URL = "https://platform.claude.com/docs/en/about-claude/pricing"
|
|
31
|
+
DEFAULT_TIMEOUT = 15
|
|
32
|
+
|
|
33
|
+
_MODEL_RE = re.compile(r"claude-(?:opus|sonnet|haiku)-[0-9](?:-[0-9])?")
|
|
34
|
+
_DOLLAR_RE = re.compile(r"\$\s*([0-9]+(?:\.[0-9]+)?)")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class FetchError(RuntimeError):
|
|
38
|
+
"""Raised when fetching the pricing page fails."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ParseError(ValueError):
|
|
42
|
+
"""Raised when the pricing HTML cannot be parsed into a prices map."""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def fetch_pricing_html(url: str = DEFAULT_SOURCE_URL,
|
|
46
|
+
timeout: float = DEFAULT_TIMEOUT) -> str:
|
|
47
|
+
"""Fetch the pricing docs page and return its raw HTML."""
|
|
48
|
+
req = Request(url, headers={"User-Agent": "roll/prices_fetcher"})
|
|
49
|
+
try:
|
|
50
|
+
with urlopen(req, timeout=timeout) as resp:
|
|
51
|
+
data = resp.read()
|
|
52
|
+
charset = resp.headers.get_content_charset() or "utf-8"
|
|
53
|
+
return data.decode(charset, errors="replace")
|
|
54
|
+
except (URLError, OSError, TimeoutError) as exc:
|
|
55
|
+
raise FetchError(f"could not fetch {url}: {exc}") from exc
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class _TableTextExtractor(HTMLParser):
|
|
59
|
+
"""Walk an HTML document and yield <tr> cell-text lists per row."""
|
|
60
|
+
|
|
61
|
+
def __init__(self) -> None:
|
|
62
|
+
super().__init__()
|
|
63
|
+
self.rows: List[List[str]] = []
|
|
64
|
+
self._in_row = False
|
|
65
|
+
self._in_cell = False
|
|
66
|
+
self._cells: List[str] = []
|
|
67
|
+
self._cur: List[str] = []
|
|
68
|
+
|
|
69
|
+
def handle_starttag(self, tag: str, attrs): # noqa: ANN001
|
|
70
|
+
if tag == "tr":
|
|
71
|
+
self._in_row = True
|
|
72
|
+
self._cells = []
|
|
73
|
+
elif tag in ("td", "th") and self._in_row:
|
|
74
|
+
self._in_cell = True
|
|
75
|
+
self._cur = []
|
|
76
|
+
|
|
77
|
+
def handle_endtag(self, tag: str) -> None:
|
|
78
|
+
if tag in ("td", "th") and self._in_cell:
|
|
79
|
+
self._cells.append(" ".join(self._cur).strip())
|
|
80
|
+
self._in_cell = False
|
|
81
|
+
elif tag == "tr" and self._in_row:
|
|
82
|
+
if self._cells:
|
|
83
|
+
self.rows.append(self._cells)
|
|
84
|
+
self._in_row = False
|
|
85
|
+
|
|
86
|
+
def handle_data(self, data: str) -> None:
|
|
87
|
+
if self._in_cell:
|
|
88
|
+
self._cur.append(data)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def parse_pricing_html(html: str) -> Dict[str, Dict[str, float]]:
|
|
92
|
+
"""Parse pricing docs HTML into a {model: rates} map.
|
|
93
|
+
|
|
94
|
+
The parser is intentionally tolerant: it scans every table row, looks for
|
|
95
|
+
one ``claude-*`` model identifier and four dollar amounts on that row, and
|
|
96
|
+
treats them as ``in / cache_create / cache_read / out`` in the order they
|
|
97
|
+
appear. (Anthropic's table renders columns in that order.)
|
|
98
|
+
"""
|
|
99
|
+
parser = _TableTextExtractor()
|
|
100
|
+
parser.feed(html)
|
|
101
|
+
|
|
102
|
+
prices: Dict[str, Dict[str, float]] = {}
|
|
103
|
+
for row in parser.rows:
|
|
104
|
+
text = " ".join(row)
|
|
105
|
+
model_match = _MODEL_RE.search(text)
|
|
106
|
+
if not model_match:
|
|
107
|
+
continue
|
|
108
|
+
model = model_match.group(0)
|
|
109
|
+
amounts = [float(m.group(1)) for m in _DOLLAR_RE.finditer(text)]
|
|
110
|
+
if len(amounts) < 4:
|
|
111
|
+
continue
|
|
112
|
+
in_rate, cache_create, cache_read, out_rate = amounts[:4]
|
|
113
|
+
prices[model] = {
|
|
114
|
+
"in": in_rate,
|
|
115
|
+
"out": out_rate,
|
|
116
|
+
"cache_create": cache_create,
|
|
117
|
+
"cache_read": cache_read,
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if not prices:
|
|
121
|
+
raise ParseError("no price rows found in HTML; page layout may have changed")
|
|
122
|
+
return prices
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def diff_prices(old: Dict[str, Dict[str, float]],
|
|
126
|
+
new: Dict[str, Dict[str, float]]
|
|
127
|
+
) -> List[Tuple[str, str, str, Optional[float], Optional[float]]]:
|
|
128
|
+
"""Return a list of (kind, model, field, old_val, new_val) tuples.
|
|
129
|
+
|
|
130
|
+
kind is one of: ``added``, ``removed``, ``changed``. For added rows the
|
|
131
|
+
old_val is None; for removed, the new_val is None.
|
|
132
|
+
"""
|
|
133
|
+
changes: List[Tuple[str, str, str, Optional[float], Optional[float]]] = []
|
|
134
|
+
for model in sorted(set(old) | set(new)):
|
|
135
|
+
if model not in old:
|
|
136
|
+
for field, val in new[model].items():
|
|
137
|
+
changes.append(("added", model, field, None, val))
|
|
138
|
+
continue
|
|
139
|
+
if model not in new:
|
|
140
|
+
for field, val in old[model].items():
|
|
141
|
+
changes.append(("removed", model, field, val, None))
|
|
142
|
+
continue
|
|
143
|
+
for field in sorted(set(old[model]) | set(new[model])):
|
|
144
|
+
old_val = old[model].get(field)
|
|
145
|
+
new_val = new[model].get(field)
|
|
146
|
+
if old_val != new_val:
|
|
147
|
+
changes.append(("changed", model, field, old_val, new_val))
|
|
148
|
+
return changes
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def format_diff(changes: List[Tuple[str, str, str, Optional[float], Optional[float]]],
|
|
152
|
+
colored: bool = True) -> str:
|
|
153
|
+
"""Render diff_prices output as red-/green-coded lines."""
|
|
154
|
+
if not changes:
|
|
155
|
+
return ""
|
|
156
|
+
red = "\033[31m" if colored else ""
|
|
157
|
+
green = "\033[32m" if colored else ""
|
|
158
|
+
dim = "\033[2m" if colored else ""
|
|
159
|
+
reset = "\033[0m" if colored else ""
|
|
160
|
+
lines: List[str] = []
|
|
161
|
+
for kind, model, field, old, new in changes:
|
|
162
|
+
if kind == "added":
|
|
163
|
+
lines.append(f"{green}+ {model} {field} = {new}{reset}")
|
|
164
|
+
elif kind == "removed":
|
|
165
|
+
lines.append(f"{red}- {model} {field} = {old}{reset}")
|
|
166
|
+
else:
|
|
167
|
+
lines.append(f"{dim}~ {model} {field}{reset} {red}{old}{reset} → {green}{new}{reset}")
|
|
168
|
+
return "\n".join(lines)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def write_snapshot(prices: Dict[str, Dict[str, float]],
|
|
172
|
+
*,
|
|
173
|
+
snapshot_dir: str,
|
|
174
|
+
source_url: str = DEFAULT_SOURCE_URL,
|
|
175
|
+
effective_at: Optional[str] = None,
|
|
176
|
+
default_model: Optional[str] = None,
|
|
177
|
+
notes: Optional[str] = None) -> str:
|
|
178
|
+
"""Write a new snapshot JSON and return its path."""
|
|
179
|
+
os.makedirs(snapshot_dir, exist_ok=True)
|
|
180
|
+
today = effective_at or _dt.date.today().isoformat()
|
|
181
|
+
payload: Dict[str, Any] = {
|
|
182
|
+
"version": today,
|
|
183
|
+
"effective_at": today,
|
|
184
|
+
"source_url": source_url,
|
|
185
|
+
"default_model": default_model or _pick_default(prices),
|
|
186
|
+
"prices": prices,
|
|
187
|
+
}
|
|
188
|
+
if notes:
|
|
189
|
+
payload["notes"] = notes
|
|
190
|
+
dest = os.path.join(snapshot_dir, f"snapshot-{today}.json")
|
|
191
|
+
with open(dest, "w", encoding="utf-8") as f:
|
|
192
|
+
json.dump(payload, f, indent=2, sort_keys=False)
|
|
193
|
+
f.write("\n")
|
|
194
|
+
return dest
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _pick_default(prices: Dict[str, Dict[str, float]]) -> str:
|
|
198
|
+
"""Pick a sensible fallback model: prefer the cheapest sonnet, else first key."""
|
|
199
|
+
for k in prices:
|
|
200
|
+
if "sonnet" in k:
|
|
201
|
+
return k
|
|
202
|
+
return next(iter(prices))
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def refresh(*,
|
|
206
|
+
snapshot_dir: str,
|
|
207
|
+
url: str = DEFAULT_SOURCE_URL,
|
|
208
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
209
|
+
html: Optional[str] = None,
|
|
210
|
+
) -> Tuple[str, List[Tuple[str, str, str, Optional[float], Optional[float]]]]:
|
|
211
|
+
"""Fetch (or accept fixture HTML), parse, diff against latest snapshot, write.
|
|
212
|
+
|
|
213
|
+
Returns (action, changes) where action is one of:
|
|
214
|
+
``"unchanged"`` — no diff vs latest snapshot, nothing written
|
|
215
|
+
``"written:<path>"`` — new snapshot written at <path>
|
|
216
|
+
``"first:<path>"`` — no prior snapshot existed; baseline written
|
|
217
|
+
"""
|
|
218
|
+
if html is None:
|
|
219
|
+
html = fetch_pricing_html(url, timeout=timeout)
|
|
220
|
+
new_prices = parse_pricing_html(html)
|
|
221
|
+
|
|
222
|
+
# Load latest if any
|
|
223
|
+
latest = _latest_snapshot_path(snapshot_dir)
|
|
224
|
+
if latest is None:
|
|
225
|
+
dest = write_snapshot(new_prices, snapshot_dir=snapshot_dir, source_url=url)
|
|
226
|
+
return f"first:{dest}", diff_prices({}, new_prices)
|
|
227
|
+
|
|
228
|
+
with open(latest, "r", encoding="utf-8") as f:
|
|
229
|
+
old = json.load(f).get("prices", {})
|
|
230
|
+
changes = diff_prices(old, new_prices)
|
|
231
|
+
if not changes:
|
|
232
|
+
return "unchanged", []
|
|
233
|
+
dest = write_snapshot(new_prices, snapshot_dir=snapshot_dir, source_url=url)
|
|
234
|
+
return f"written:{dest}", changes
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _latest_snapshot_path(snapshot_dir: str) -> Optional[str]:
|
|
238
|
+
if not os.path.isdir(snapshot_dir):
|
|
239
|
+
return None
|
|
240
|
+
snaps = sorted(
|
|
241
|
+
os.path.join(snapshot_dir, n)
|
|
242
|
+
for n in os.listdir(snapshot_dir)
|
|
243
|
+
if n.startswith("snapshot-") and n.endswith(".json")
|
|
244
|
+
)
|
|
245
|
+
return snaps[-1] if snaps else None
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# CLI entry — `python3 lib/prices_fetcher.py refresh|show` is the fallback when
|
|
249
|
+
# bin/roll is unavailable (e.g. running tests directly).
|
|
250
|
+
def _main(argv: List[str]) -> int:
|
|
251
|
+
snapshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prices")
|
|
252
|
+
if not argv or argv[0] in ("-h", "--help", "help"):
|
|
253
|
+
print("usage: prices_fetcher.py refresh|show [--url URL]")
|
|
254
|
+
return 0
|
|
255
|
+
cmd = argv[0]
|
|
256
|
+
url = DEFAULT_SOURCE_URL
|
|
257
|
+
if "--url" in argv:
|
|
258
|
+
url = argv[argv.index("--url") + 1]
|
|
259
|
+
if cmd == "show":
|
|
260
|
+
latest = _latest_snapshot_path(snapshot_dir)
|
|
261
|
+
if not latest:
|
|
262
|
+
print("no snapshot found", file=sys.stderr)
|
|
263
|
+
return 1
|
|
264
|
+
with open(latest) as f:
|
|
265
|
+
print(f.read())
|
|
266
|
+
return 0
|
|
267
|
+
if cmd == "refresh":
|
|
268
|
+
try:
|
|
269
|
+
action, changes = refresh(snapshot_dir=snapshot_dir, url=url)
|
|
270
|
+
except FetchError as exc:
|
|
271
|
+
print(f"fetch failed: {exc}", file=sys.stderr)
|
|
272
|
+
return 2
|
|
273
|
+
except ParseError as exc:
|
|
274
|
+
print(f"parse failed: {exc}", file=sys.stderr)
|
|
275
|
+
return 3
|
|
276
|
+
print(action)
|
|
277
|
+
if changes:
|
|
278
|
+
print(format_diff(changes, colored=sys.stdout.isatty()))
|
|
279
|
+
return 0
|
|
280
|
+
print(f"unknown command: {cmd}", file=sys.stderr)
|
|
281
|
+
return 1
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
if __name__ == "__main__": # pragma: no cover
|
|
285
|
+
sys.exit(_main(sys.argv[1:]))
|