alpha-engine-lib 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alpha_engine_lib/__init__.py +3 -0
- alpha_engine_lib/agent_schemas.py +663 -0
- alpha_engine_lib/alerts.py +576 -0
- alpha_engine_lib/arcticdb.py +340 -0
- alpha_engine_lib/collector_results.py +69 -0
- alpha_engine_lib/cost.py +665 -0
- alpha_engine_lib/dates.py +273 -0
- alpha_engine_lib/decision_capture.py +462 -0
- alpha_engine_lib/ec2_spot.py +363 -0
- alpha_engine_lib/email_sender.py +206 -0
- alpha_engine_lib/eval_artifacts.py +361 -0
- alpha_engine_lib/logging.py +303 -0
- alpha_engine_lib/model_pricing.yaml +73 -0
- alpha_engine_lib/pillars.py +756 -0
- alpha_engine_lib/pipeline_status/__init__.py +70 -0
- alpha_engine_lib/pipeline_status/read.py +541 -0
- alpha_engine_lib/pipeline_status/registry.py +368 -0
- alpha_engine_lib/pipeline_status/templates.py +120 -0
- alpha_engine_lib/preflight.py +444 -0
- alpha_engine_lib/rag/__init__.py +39 -0
- alpha_engine_lib/rag/db.py +96 -0
- alpha_engine_lib/rag/embeddings.py +63 -0
- alpha_engine_lib/rag/migrations/0001_content_tsv.sql +39 -0
- alpha_engine_lib/rag/rerank.py +377 -0
- alpha_engine_lib/rag/retrieval.py +465 -0
- alpha_engine_lib/rag/schema.sql +65 -0
- alpha_engine_lib/reconcile.py +203 -0
- alpha_engine_lib/secrets.py +186 -0
- alpha_engine_lib/sources/__init__.py +35 -0
- alpha_engine_lib/sources/protocols.py +227 -0
- alpha_engine_lib/ssm_log_capture.py +274 -0
- alpha_engine_lib/telegram.py +165 -0
- alpha_engine_lib/trading_calendar.py +236 -0
- alpha_engine_lib/transparency.py +746 -0
- alpha_engine_lib/transparency_inventory.yaml +260 -0
- alpha_engine_lib/universe.py +83 -0
- alpha_engine_lib-0.32.0.dist-info/METADATA +217 -0
- alpha_engine_lib-0.32.0.dist-info/RECORD +40 -0
- alpha_engine_lib-0.32.0.dist-info/WHEEL +5 -0
- alpha_engine_lib-0.32.0.dist-info/top_level.txt +1 -0
alpha_engine_lib/cost.py
ADDED
|
@@ -0,0 +1,665 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM cost-pricing primitive for the Alpha Engine cost-telemetry stream.
|
|
3
|
+
|
|
4
|
+
This module is the price-table side of the P1 "Per-run LLM cost telemetry as
|
|
5
|
+
code artifact" workstream. The capture wrapper in
|
|
6
|
+
:mod:`alpha_engine_lib.decision_capture` records token counts on every LLM
|
|
7
|
+
call; this module translates token counts × model × wall-clock time into a
|
|
8
|
+
USD cost figure.
|
|
9
|
+
|
|
10
|
+
**Design rule — tokens are immutable, dollars are derived.** Per the
|
|
11
|
+
roadmap entry's scope, dollar amounts are NEVER persisted as the load-bearing
|
|
12
|
+
analytics column. Every captured artifact stores token counts; cost is
|
|
13
|
+
recomputed from the active rate card at query time. That way, if Anthropic
|
|
14
|
+
changes pricing or a rate-card entry was wrong when it was first written,
|
|
15
|
+
historical numbers can be repriced without rewriting captured data.
|
|
16
|
+
|
|
17
|
+
``ModelMetadata.cost_usd`` exists as a derived convenience (handy for
|
|
18
|
+
emails + dashboards that don't want to load the rate card on every read);
|
|
19
|
+
it is overwritable by :func:`recompute_cost` and must not be treated as
|
|
20
|
+
canonical.
|
|
21
|
+
|
|
22
|
+
**Effective dates.** Each ``PriceCard`` carries an ``effective_from``
|
|
23
|
+
date. :meth:`PriceTable.get` returns the card whose ``effective_from`` is
|
|
24
|
+
the latest ≤ the query date — so a January call gets January rates even
|
|
25
|
+
when the YAML has been updated for April rates. Cards for the same model
|
|
26
|
+
must be ordered by ``effective_from`` ascending; the loader hard-fails on
|
|
27
|
+
overlap or unsorted input per ``feedback_no_silent_fails``.
|
|
28
|
+
|
|
29
|
+
**Public surface:**
|
|
30
|
+
|
|
31
|
+
- :class:`PriceCard` — one (model_name, effective_from) → per-1M-token rate row.
|
|
32
|
+
- :class:`PriceTable` — wraps a list of cards with effective-date lookup.
|
|
33
|
+
- :class:`ToolFee` — one (tool_name, effective_from) → per-1K-request rate row,
|
|
34
|
+
for Anthropic server-side tools billed as flat per-request fees
|
|
35
|
+
(``web_search``, ``web_fetch``).
|
|
36
|
+
- :class:`ToolFeeTable` — wraps a list of tool fees with effective-date
|
|
37
|
+
lookup (mirror of :class:`PriceTable`).
|
|
38
|
+
- :func:`load_pricing` / :func:`load_tool_fees` — read external pricing
|
|
39
|
+
YAML into the respective table.
|
|
40
|
+
- :func:`load_default_pricing` / :func:`load_default_tool_fees` — load
|
|
41
|
+
the packaged-default tables for consumers without an external YAML.
|
|
42
|
+
- :func:`compute_cost` — pure math from token counts + price card +
|
|
43
|
+
optional server-tool request counts + matching tool fees.
|
|
44
|
+
- :func:`recompute_cost` — recompute and overwrite ``cost_usd`` on a
|
|
45
|
+
``ModelMetadata`` from a ``PriceTable``, optional ``ToolFeeTable``,
|
|
46
|
+
and a query date.
|
|
47
|
+
- :func:`metadata_from_anthropic_message` — raw-Anthropic-SDK adapter;
|
|
48
|
+
maps a ``Message.usage`` (including ``server_tool_use`` request counts)
|
|
49
|
+
onto a ``ModelMetadata`` for consumers using the SDK directly (no
|
|
50
|
+
LangChain).
|
|
51
|
+
- :exc:`PriceCardLookupError` — raised when no card matches a (model, date)
|
|
52
|
+
query OR a non-zero tool-request count has no matching fee (do not
|
|
53
|
+
swallow).
|
|
54
|
+
|
|
55
|
+
Workstream design: ``alpha-engine-config/private-docs/ROADMAP.md`` line ~1708
|
|
56
|
+
("Per-run LLM cost telemetry as code artifact").
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
from __future__ import annotations
|
|
60
|
+
|
|
61
|
+
from datetime import date, datetime, timezone
|
|
62
|
+
from importlib import resources
|
|
63
|
+
from pathlib import Path
|
|
64
|
+
from typing import TYPE_CHECKING, Any, Protocol
|
|
65
|
+
|
|
66
|
+
import yaml
|
|
67
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
68
|
+
|
|
69
|
+
from alpha_engine_lib.decision_capture import ModelMetadata
|
|
70
|
+
|
|
71
|
+
if TYPE_CHECKING:
|
|
72
|
+
# Structural Protocol below describes the only attributes we touch on
|
|
73
|
+
# an Anthropic SDK ``Message`` — kept here so that ``anthropic`` does
|
|
74
|
+
# not have to be a hard dependency of this library. Consumers that
|
|
75
|
+
# call :func:`metadata_from_anthropic_message` install ``anthropic``
|
|
76
|
+
# in their own environment.
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ── Price card ────────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class PriceCard(BaseModel):
|
|
84
|
+
"""One row of the price table — per-model, per-effective-date rate.
|
|
85
|
+
|
|
86
|
+
All four prices are USD per 1,000,000 tokens. Cache-write and cache-
|
|
87
|
+
read prices follow Anthropic's prompt-caching semantics: cache-write
|
|
88
|
+
tokens are billed at ~1.25× the input price, cache-read tokens at
|
|
89
|
+
~0.10× the input price. The fields are stored explicitly rather than
|
|
90
|
+
derived from a multiplier so that future provider changes (or the
|
|
91
|
+
addition of non-Anthropic providers) don't require a math change.
|
|
92
|
+
|
|
93
|
+
A card applies to its model from ``effective_from`` until the next
|
|
94
|
+
card for the same model, exclusive on the new card's ``effective_from``
|
|
95
|
+
date.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
model_config = ConfigDict(extra="forbid")
|
|
99
|
+
|
|
100
|
+
model_name: str
|
|
101
|
+
effective_from: date
|
|
102
|
+
input_per_1m: float = Field(ge=0.0)
|
|
103
|
+
output_per_1m: float = Field(ge=0.0)
|
|
104
|
+
cache_read_per_1m: float = Field(ge=0.0)
|
|
105
|
+
cache_create_per_1m: float = Field(ge=0.0)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# ── Errors ────────────────────────────────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class PriceCardLookupError(LookupError):
|
|
112
|
+
"""Raised when :meth:`PriceTable.get` finds no card matching the query.
|
|
113
|
+
|
|
114
|
+
Per ``feedback_no_silent_fails``, the cost path does not silently
|
|
115
|
+
return zero on missing models or out-of-range dates — that would
|
|
116
|
+
bury cost regressions. Callers may catch this if they want a
|
|
117
|
+
best-effort price (e.g. dashboard fallback), but the default is
|
|
118
|
+
hard-fail.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class PriceTableLoadError(RuntimeError):
|
|
123
|
+
"""Raised when ``model_pricing.yaml`` is malformed.
|
|
124
|
+
|
|
125
|
+
Structural validation: missing top-level key, unknown fields, or
|
|
126
|
+
cards for the same model not sorted ascending by ``effective_from``.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ── Price table ───────────────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class PriceTable(BaseModel):
|
|
134
|
+
"""Ordered collection of :class:`PriceCard` rows with effective-date lookup.
|
|
135
|
+
|
|
136
|
+
Construction-time invariants (enforced by ``model_validator``):
|
|
137
|
+
|
|
138
|
+
1. Cards for the same model are sorted ascending by ``effective_from``.
|
|
139
|
+
2. No two cards for the same model share an ``effective_from`` date.
|
|
140
|
+
|
|
141
|
+
Lookups via :meth:`get` return the latest card whose ``effective_from``
|
|
142
|
+
is ≤ the query date; if no such card exists for the model, raises
|
|
143
|
+
:exc:`PriceCardLookupError`.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
model_config = ConfigDict(extra="forbid")
|
|
147
|
+
|
|
148
|
+
cards: list[PriceCard]
|
|
149
|
+
|
|
150
|
+
@model_validator(mode="after")
|
|
151
|
+
def _validate_card_ordering(self) -> "PriceTable":
|
|
152
|
+
seen_dates: dict[str, list[date]] = {}
|
|
153
|
+
for card in self.cards:
|
|
154
|
+
seen_dates.setdefault(card.model_name, []).append(card.effective_from)
|
|
155
|
+
for model_name, dates in seen_dates.items():
|
|
156
|
+
if len(set(dates)) != len(dates):
|
|
157
|
+
raise PriceTableLoadError(
|
|
158
|
+
f"PriceTable: duplicate effective_from date for model "
|
|
159
|
+
f"{model_name!r}: {dates}"
|
|
160
|
+
)
|
|
161
|
+
if dates != sorted(dates):
|
|
162
|
+
raise PriceTableLoadError(
|
|
163
|
+
f"PriceTable: cards for model {model_name!r} are not "
|
|
164
|
+
f"sorted ascending by effective_from: {dates}"
|
|
165
|
+
)
|
|
166
|
+
return self
|
|
167
|
+
|
|
168
|
+
def get(self, model_name: str, at: datetime | date) -> PriceCard:
|
|
169
|
+
"""Return the active :class:`PriceCard` for ``model_name`` at ``at``.
|
|
170
|
+
|
|
171
|
+
``at`` may be a ``datetime`` (UTC offsets accepted; only the date
|
|
172
|
+
component is used for lookup) or a ``date``. The returned card is
|
|
173
|
+
the one whose ``effective_from`` is the latest among cards ≤ ``at``.
|
|
174
|
+
|
|
175
|
+
Raises :exc:`PriceCardLookupError` if the model has no cards or
|
|
176
|
+
every card's ``effective_from`` is later than ``at``.
|
|
177
|
+
"""
|
|
178
|
+
query_date = at.date() if isinstance(at, datetime) else at
|
|
179
|
+
candidates = [
|
|
180
|
+
c for c in self.cards
|
|
181
|
+
if c.model_name == model_name and c.effective_from <= query_date
|
|
182
|
+
]
|
|
183
|
+
if not candidates:
|
|
184
|
+
raise PriceCardLookupError(
|
|
185
|
+
f"No price card for model {model_name!r} active on {query_date}"
|
|
186
|
+
)
|
|
187
|
+
# cards are validated sorted ascending; latest active = last match.
|
|
188
|
+
return max(candidates, key=lambda c: c.effective_from)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# ── Tool fee table ────────────────────────────────────────────────────────
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class ToolFee(BaseModel):
|
|
195
|
+
"""One row of the tool-fee table — per-tool, per-effective-date rate.
|
|
196
|
+
|
|
197
|
+
Anthropic's server-side tools (web_search, web_fetch) are billed as
|
|
198
|
+
flat per-request fees, independent of which model invoked them. That
|
|
199
|
+
pricing dimension is conceptually separate from the per-token
|
|
200
|
+
:class:`PriceCard` rate, so it gets its own table to avoid duplicating
|
|
201
|
+
a global fee across every (model × effective_from) row.
|
|
202
|
+
|
|
203
|
+
Future server-side tools that adopt a per-request fee (e.g. anything
|
|
204
|
+
Anthropic adds to ``Message.usage.server_tool_use``) plug in here by
|
|
205
|
+
name, no schema change required.
|
|
206
|
+
|
|
207
|
+
Rate is published as USD per 1,000 requests to mirror Anthropic's
|
|
208
|
+
quoting convention ("$10 per 1,000 web search requests").
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
model_config = ConfigDict(extra="forbid")
|
|
212
|
+
|
|
213
|
+
tool_name: str
|
|
214
|
+
effective_from: date
|
|
215
|
+
per_1k_requests_usd: float = Field(ge=0.0)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class ToolFeeTable(BaseModel):
|
|
219
|
+
"""Ordered collection of :class:`ToolFee` rows with effective-date lookup.
|
|
220
|
+
|
|
221
|
+
Mirrors :class:`PriceTable` semantics: cards-per-tool are sorted
|
|
222
|
+
ascending by ``effective_from``; :meth:`get` returns the latest active
|
|
223
|
+
card for a (tool_name, query_date). Raises :exc:`PriceCardLookupError`
|
|
224
|
+
on missing-tool or query-before-first-card per ``feedback_no_silent_fails``.
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
model_config = ConfigDict(extra="forbid")
|
|
228
|
+
|
|
229
|
+
fees: list[ToolFee]
|
|
230
|
+
|
|
231
|
+
@model_validator(mode="after")
|
|
232
|
+
def _validate_fee_ordering(self) -> "ToolFeeTable":
|
|
233
|
+
seen_dates: dict[str, list[date]] = {}
|
|
234
|
+
for fee in self.fees:
|
|
235
|
+
seen_dates.setdefault(fee.tool_name, []).append(fee.effective_from)
|
|
236
|
+
for tool_name, dates in seen_dates.items():
|
|
237
|
+
if len(set(dates)) != len(dates):
|
|
238
|
+
raise PriceTableLoadError(
|
|
239
|
+
f"ToolFeeTable: duplicate effective_from date for tool "
|
|
240
|
+
f"{tool_name!r}: {dates}"
|
|
241
|
+
)
|
|
242
|
+
if dates != sorted(dates):
|
|
243
|
+
raise PriceTableLoadError(
|
|
244
|
+
f"ToolFeeTable: fees for tool {tool_name!r} are not "
|
|
245
|
+
f"sorted ascending by effective_from: {dates}"
|
|
246
|
+
)
|
|
247
|
+
return self
|
|
248
|
+
|
|
249
|
+
def get(self, tool_name: str, at: datetime | date) -> ToolFee:
|
|
250
|
+
"""Return the active :class:`ToolFee` for ``tool_name`` at ``at``."""
|
|
251
|
+
query_date = at.date() if isinstance(at, datetime) else at
|
|
252
|
+
candidates = [
|
|
253
|
+
f for f in self.fees
|
|
254
|
+
if f.tool_name == tool_name and f.effective_from <= query_date
|
|
255
|
+
]
|
|
256
|
+
if not candidates:
|
|
257
|
+
raise PriceCardLookupError(
|
|
258
|
+
f"No tool fee for tool {tool_name!r} active on {query_date}"
|
|
259
|
+
)
|
|
260
|
+
return max(candidates, key=lambda f: f.effective_from)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# ── YAML loader ───────────────────────────────────────────────────────────
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
_DEFAULT_PRICING_RESOURCE = "model_pricing.yaml"
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def load_default_pricing() -> PriceTable:
|
|
270
|
+
"""Load the :class:`PriceTable` shipped inside this package.
|
|
271
|
+
|
|
272
|
+
Convenience entry point for consumers that don't maintain their own
|
|
273
|
+
operator-managed rate card (e.g. ``morning-signal`` or any other
|
|
274
|
+
non-alpha-engine app pulling in this library purely for cost
|
|
275
|
+
telemetry). Alpha-engine repos that need a separately-versioned card
|
|
276
|
+
(so an Anthropic price change can ship without a lib bump) should
|
|
277
|
+
keep calling :func:`load_pricing` with their own YAML path.
|
|
278
|
+
|
|
279
|
+
The default file lives at ``alpha_engine_lib/model_pricing.yaml`` and
|
|
280
|
+
is shipped as package data; updates ride normal lib version bumps.
|
|
281
|
+
"""
|
|
282
|
+
with resources.files("alpha_engine_lib").joinpath(
|
|
283
|
+
_DEFAULT_PRICING_RESOURCE
|
|
284
|
+
).open() as fh:
|
|
285
|
+
raw: Any = yaml.safe_load(fh)
|
|
286
|
+
|
|
287
|
+
if not isinstance(raw, dict) or "cards" not in raw:
|
|
288
|
+
raise PriceTableLoadError(
|
|
289
|
+
f"Packaged {_DEFAULT_PRICING_RESOURCE}: expected top-level "
|
|
290
|
+
f"mapping with 'cards' key; got {type(raw).__name__}"
|
|
291
|
+
)
|
|
292
|
+
if not isinstance(raw["cards"], list):
|
|
293
|
+
raise PriceTableLoadError(
|
|
294
|
+
f"Packaged {_DEFAULT_PRICING_RESOURCE}: 'cards' must be a "
|
|
295
|
+
f"list; got {type(raw['cards']).__name__}"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
cards = [PriceCard.model_validate(entry) for entry in raw["cards"]]
|
|
299
|
+
return PriceTable(cards=cards)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def load_default_tool_fees() -> ToolFeeTable:
|
|
303
|
+
"""Load the :class:`ToolFeeTable` shipped inside this package.
|
|
304
|
+
|
|
305
|
+
Reads the ``tool_fees`` section of the packaged ``model_pricing.yaml``.
|
|
306
|
+
Hard-fails if the section is absent (per ``feedback_no_silent_fails``);
|
|
307
|
+
a caller wiring tool-fee accounting should never silently get an empty
|
|
308
|
+
table.
|
|
309
|
+
|
|
310
|
+
Companion to :func:`load_default_pricing`; both load from the same
|
|
311
|
+
YAML so a single packaged file covers both pricing dimensions.
|
|
312
|
+
"""
|
|
313
|
+
with resources.files("alpha_engine_lib").joinpath(
|
|
314
|
+
_DEFAULT_PRICING_RESOURCE
|
|
315
|
+
).open() as fh:
|
|
316
|
+
raw: Any = yaml.safe_load(fh)
|
|
317
|
+
|
|
318
|
+
if not isinstance(raw, dict) or "tool_fees" not in raw:
|
|
319
|
+
raise PriceTableLoadError(
|
|
320
|
+
f"Packaged {_DEFAULT_PRICING_RESOURCE}: expected top-level "
|
|
321
|
+
f"mapping with 'tool_fees' key; got {type(raw).__name__}"
|
|
322
|
+
)
|
|
323
|
+
if not isinstance(raw["tool_fees"], list):
|
|
324
|
+
raise PriceTableLoadError(
|
|
325
|
+
f"Packaged {_DEFAULT_PRICING_RESOURCE}: 'tool_fees' must be a "
|
|
326
|
+
f"list; got {type(raw['tool_fees']).__name__}"
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
fees = [ToolFee.model_validate(entry) for entry in raw["tool_fees"]]
|
|
330
|
+
return ToolFeeTable(fees=fees)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def load_tool_fees(path: Path | str) -> ToolFeeTable:
|
|
334
|
+
"""Load the ``tool_fees`` section of an external pricing YAML.
|
|
335
|
+
|
|
336
|
+
External-path counterpart of :func:`load_default_tool_fees` — same
|
|
337
|
+
contract, sourced from an operator-managed YAML. Used by
|
|
338
|
+
alpha-engine-research and any other consumer that needs to override
|
|
339
|
+
the packaged defaults (e.g. price change before next lib bump).
|
|
340
|
+
|
|
341
|
+
Expected YAML shape::
|
|
342
|
+
|
|
343
|
+
tool_fees:
|
|
344
|
+
- tool_name: web_search
|
|
345
|
+
effective_from: 2026-01-01
|
|
346
|
+
per_1k_requests_usd: 10.00
|
|
347
|
+
- tool_name: web_fetch
|
|
348
|
+
effective_from: 2026-01-01
|
|
349
|
+
per_1k_requests_usd: 0.00
|
|
350
|
+
"""
|
|
351
|
+
path = Path(path)
|
|
352
|
+
if not path.exists():
|
|
353
|
+
raise FileNotFoundError(f"pricing YAML not found at {path}")
|
|
354
|
+
|
|
355
|
+
with path.open() as fh:
|
|
356
|
+
raw: Any = yaml.safe_load(fh)
|
|
357
|
+
|
|
358
|
+
if not isinstance(raw, dict) or "tool_fees" not in raw:
|
|
359
|
+
raise PriceTableLoadError(
|
|
360
|
+
f"{path}: expected top-level mapping with 'tool_fees' key; "
|
|
361
|
+
f"got {type(raw).__name__}"
|
|
362
|
+
)
|
|
363
|
+
if not isinstance(raw["tool_fees"], list):
|
|
364
|
+
raise PriceTableLoadError(
|
|
365
|
+
f"{path}: 'tool_fees' must be a list; got "
|
|
366
|
+
f"{type(raw['tool_fees']).__name__}"
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
fees = [ToolFee.model_validate(entry) for entry in raw["tool_fees"]]
|
|
370
|
+
return ToolFeeTable(fees=fees)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def load_pricing(path: Path | str) -> PriceTable:
|
|
374
|
+
"""Load ``model_pricing.yaml`` from ``path`` into a :class:`PriceTable`.
|
|
375
|
+
|
|
376
|
+
Expected YAML shape::
|
|
377
|
+
|
|
378
|
+
# version: 1
|
|
379
|
+
cards:
|
|
380
|
+
- model_name: claude-haiku-4-5
|
|
381
|
+
effective_from: 2026-01-01
|
|
382
|
+
input_per_1m: 1.00
|
|
383
|
+
output_per_1m: 5.00
|
|
384
|
+
cache_read_per_1m: 0.10
|
|
385
|
+
cache_create_per_1m: 1.25
|
|
386
|
+
- model_name: claude-sonnet-4-6
|
|
387
|
+
effective_from: 2026-01-01
|
|
388
|
+
input_per_1m: 3.00
|
|
389
|
+
...
|
|
390
|
+
|
|
391
|
+
Validation:
|
|
392
|
+
|
|
393
|
+
1. File must exist; missing file → :exc:`FileNotFoundError`.
|
|
394
|
+
2. Top-level must contain ``cards: [...]``.
|
|
395
|
+
3. Each card validated via :class:`PriceCard` (extra fields rejected).
|
|
396
|
+
4. Cards-per-model sorted ascending by ``effective_from`` (validator).
|
|
397
|
+
|
|
398
|
+
Returns the loaded :class:`PriceTable`. Hard-fails on any malformation
|
|
399
|
+
per ``feedback_no_silent_fails``.
|
|
400
|
+
"""
|
|
401
|
+
path = Path(path)
|
|
402
|
+
if not path.exists():
|
|
403
|
+
raise FileNotFoundError(f"model_pricing.yaml not found at {path}")
|
|
404
|
+
|
|
405
|
+
with path.open() as fh:
|
|
406
|
+
raw: Any = yaml.safe_load(fh)
|
|
407
|
+
|
|
408
|
+
if not isinstance(raw, dict) or "cards" not in raw:
|
|
409
|
+
raise PriceTableLoadError(
|
|
410
|
+
f"{path}: expected top-level mapping with 'cards' key; "
|
|
411
|
+
f"got {type(raw).__name__}"
|
|
412
|
+
)
|
|
413
|
+
if not isinstance(raw["cards"], list):
|
|
414
|
+
raise PriceTableLoadError(
|
|
415
|
+
f"{path}: 'cards' must be a list; got {type(raw['cards']).__name__}"
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
cards = [PriceCard.model_validate(entry) for entry in raw["cards"]]
|
|
419
|
+
return PriceTable(cards=cards)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
# ── Cost math ─────────────────────────────────────────────────────────────
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
_TOKENS_PER_PRICE_UNIT = 1_000_000
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
_REQUESTS_PER_FEE_UNIT = 1_000
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def compute_cost(
|
|
432
|
+
*,
|
|
433
|
+
input_tokens: int,
|
|
434
|
+
output_tokens: int,
|
|
435
|
+
cache_read_tokens: int,
|
|
436
|
+
cache_create_tokens: int,
|
|
437
|
+
card: PriceCard,
|
|
438
|
+
tool_requests: dict[str, int] | None = None,
|
|
439
|
+
tool_fees: dict[str, ToolFee] | None = None,
|
|
440
|
+
) -> float:
|
|
441
|
+
"""Compute USD cost from token counts, a :class:`PriceCard`, and
|
|
442
|
+
optional server-tool request counts + their resolved :class:`ToolFee`
|
|
443
|
+
rows.
|
|
444
|
+
|
|
445
|
+
Pure math; no I/O. Caller is responsible for selecting the correct
|
|
446
|
+
cards via :meth:`PriceTable.get` and :meth:`ToolFeeTable.get` (both
|
|
447
|
+
know about effective dates).
|
|
448
|
+
|
|
449
|
+
Each token class is billed at its per-1M-token rate, summed; each
|
|
450
|
+
tool-request class is billed at its per-1K-request rate. Tool keys
|
|
451
|
+
must align between ``tool_requests`` and ``tool_fees`` — if a tool
|
|
452
|
+
has a non-zero request count but no matching fee, :exc:`PriceCardLookupError`
|
|
453
|
+
is raised (per ``feedback_no_silent_fails`` — a silent zero would
|
|
454
|
+
bury a real cost slice).
|
|
455
|
+
"""
|
|
456
|
+
cost = (
|
|
457
|
+
input_tokens * card.input_per_1m
|
|
458
|
+
+ output_tokens * card.output_per_1m
|
|
459
|
+
+ cache_read_tokens * card.cache_read_per_1m
|
|
460
|
+
+ cache_create_tokens * card.cache_create_per_1m
|
|
461
|
+
) / _TOKENS_PER_PRICE_UNIT
|
|
462
|
+
|
|
463
|
+
if tool_requests:
|
|
464
|
+
for tool_name, count in tool_requests.items():
|
|
465
|
+
if count <= 0:
|
|
466
|
+
continue
|
|
467
|
+
if tool_fees is None or tool_name not in tool_fees:
|
|
468
|
+
raise PriceCardLookupError(
|
|
469
|
+
f"{count} {tool_name} requests recorded but no matching "
|
|
470
|
+
f"ToolFee provided to compute_cost. Pass tool_fees={{...}}."
|
|
471
|
+
)
|
|
472
|
+
cost += (
|
|
473
|
+
count * tool_fees[tool_name].per_1k_requests_usd
|
|
474
|
+
/ _REQUESTS_PER_FEE_UNIT
|
|
475
|
+
)
|
|
476
|
+
return cost
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def _tool_request_counts(metadata: ModelMetadata) -> dict[str, int]:
|
|
480
|
+
"""Pull non-zero server-tool request counts off a ``ModelMetadata``.
|
|
481
|
+
|
|
482
|
+
Centralizes the mapping between ``ModelMetadata`` field names and
|
|
483
|
+
Anthropic tool names. Add new server tools here when the SDK adds
|
|
484
|
+
them to ``Usage.server_tool_use`` (and to ``ModelMetadata``).
|
|
485
|
+
"""
|
|
486
|
+
return {
|
|
487
|
+
name: count
|
|
488
|
+
for name, count in (
|
|
489
|
+
("web_search", metadata.web_search_requests),
|
|
490
|
+
("web_fetch", metadata.web_fetch_requests),
|
|
491
|
+
)
|
|
492
|
+
if count > 0
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def recompute_cost(
|
|
497
|
+
metadata: ModelMetadata,
|
|
498
|
+
table: PriceTable,
|
|
499
|
+
*,
|
|
500
|
+
tool_fee_table: ToolFeeTable | None = None,
|
|
501
|
+
at: datetime | date | None = None,
|
|
502
|
+
overwrite: bool = True,
|
|
503
|
+
) -> float:
|
|
504
|
+
"""Recompute ``cost_usd`` for ``metadata`` against ``table``.
|
|
505
|
+
|
|
506
|
+
Returns the freshly computed USD cost. By default also overwrites
|
|
507
|
+
``metadata.cost_usd`` in place (the field is treated as a derived
|
|
508
|
+
convenience — see module docstring).
|
|
509
|
+
|
|
510
|
+
Parameters
|
|
511
|
+
----------
|
|
512
|
+
metadata
|
|
513
|
+
The :class:`ModelMetadata` whose tokens are priced.
|
|
514
|
+
table
|
|
515
|
+
Active price table.
|
|
516
|
+
at
|
|
517
|
+
Wall-clock date for price-card lookup. Defaults to ``datetime.
|
|
518
|
+
now(timezone.utc)`` — appropriate for live recompute paths.
|
|
519
|
+
Historical recompute (replay against a different rate card)
|
|
520
|
+
passes the original capture timestamp.
|
|
521
|
+
overwrite
|
|
522
|
+
If ``True`` (default), assigns the result to ``metadata.cost_usd``
|
|
523
|
+
before returning. Set to ``False`` for read-only repricing.
|
|
524
|
+
|
|
525
|
+
Parameters
|
|
526
|
+
----------
|
|
527
|
+
tool_fee_table
|
|
528
|
+
Optional :class:`ToolFeeTable` for pricing server-tool requests
|
|
529
|
+
captured on ``metadata`` (``web_search_requests``,
|
|
530
|
+
``web_fetch_requests``). Required if any non-zero request count
|
|
531
|
+
is present — :exc:`PriceCardLookupError` is raised otherwise (per
|
|
532
|
+
``feedback_no_silent_fails``: silently dropping a real fee slice
|
|
533
|
+
would bury cost regressions). Pure-LLM consumers with no
|
|
534
|
+
server-tool usage can omit it.
|
|
535
|
+
|
|
536
|
+
Raises
|
|
537
|
+
------
|
|
538
|
+
PriceCardLookupError
|
|
539
|
+
If ``table`` has no card for ``metadata.model_name`` active at
|
|
540
|
+
``at``; or if a non-zero server-tool request count is recorded
|
|
541
|
+
without a matching :class:`ToolFee` in ``tool_fee_table``. Per
|
|
542
|
+
``feedback_no_silent_fails`` — silent zero-pricing on a missing
|
|
543
|
+
model or tool would bury cost regressions.
|
|
544
|
+
"""
|
|
545
|
+
when = at if at is not None else datetime.now(timezone.utc)
|
|
546
|
+
card = table.get(metadata.model_name, when)
|
|
547
|
+
|
|
548
|
+
tool_requests = _tool_request_counts(metadata)
|
|
549
|
+
tool_fees: dict[str, ToolFee] | None = None
|
|
550
|
+
if tool_requests:
|
|
551
|
+
if tool_fee_table is None:
|
|
552
|
+
raise PriceCardLookupError(
|
|
553
|
+
f"ModelMetadata has non-zero server-tool requests "
|
|
554
|
+
f"({tool_requests}) but no tool_fee_table was passed to "
|
|
555
|
+
f"recompute_cost. Pass tool_fee_table=... or zero the "
|
|
556
|
+
f"request counts."
|
|
557
|
+
)
|
|
558
|
+
tool_fees = {
|
|
559
|
+
name: tool_fee_table.get(name, when) for name in tool_requests
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
cost = compute_cost(
|
|
563
|
+
input_tokens=metadata.input_tokens,
|
|
564
|
+
output_tokens=metadata.output_tokens,
|
|
565
|
+
cache_read_tokens=metadata.cache_read_tokens,
|
|
566
|
+
cache_create_tokens=metadata.cache_create_tokens,
|
|
567
|
+
card=card,
|
|
568
|
+
tool_requests=tool_requests or None,
|
|
569
|
+
tool_fees=tool_fees,
|
|
570
|
+
)
|
|
571
|
+
if overwrite:
|
|
572
|
+
metadata.cost_usd = cost
|
|
573
|
+
return cost
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
# ── Anthropic SDK adapter ─────────────────────────────────────────────────
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
class _AnthropicServerToolUsageLike(Protocol):
|
|
580
|
+
"""Structural type for ``anthropic.types.ServerToolUsage``."""
|
|
581
|
+
|
|
582
|
+
web_search_requests: int
|
|
583
|
+
web_fetch_requests: int
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
class _AnthropicUsageLike(Protocol):
|
|
587
|
+
"""Structural type for an Anthropic SDK ``Usage`` object.
|
|
588
|
+
|
|
589
|
+
Mirrors ``anthropic.types.Usage`` (input_tokens / output_tokens are
|
|
590
|
+
required; cache fields and server_tool_use are optional). Defined as
|
|
591
|
+
a Protocol so this module does not import ``anthropic`` at runtime —
|
|
592
|
+
consumers pass the SDK's actual ``Usage`` and duck-typing handles
|
|
593
|
+
the rest.
|
|
594
|
+
"""
|
|
595
|
+
|
|
596
|
+
input_tokens: int
|
|
597
|
+
output_tokens: int
|
|
598
|
+
cache_read_input_tokens: int | None
|
|
599
|
+
cache_creation_input_tokens: int | None
|
|
600
|
+
server_tool_use: _AnthropicServerToolUsageLike | None
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
class _AnthropicMessageLike(Protocol):
|
|
604
|
+
"""Structural type for an Anthropic SDK ``Message`` object."""
|
|
605
|
+
|
|
606
|
+
model: str
|
|
607
|
+
usage: _AnthropicUsageLike
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
def metadata_from_anthropic_message(
|
|
611
|
+
msg: _AnthropicMessageLike,
|
|
612
|
+
*,
|
|
613
|
+
model_name: str | None = None,
|
|
614
|
+
) -> ModelMetadata:
|
|
615
|
+
"""Map an Anthropic SDK ``Message.usage`` onto a :class:`ModelMetadata`.
|
|
616
|
+
|
|
617
|
+
Raw-Anthropic-SDK counterpart to the LangChain callback handler in
|
|
618
|
+
``alpha-engine-research/graph/llm_cost_tracker.py``. For consumers
|
|
619
|
+
that call ``client.messages.create()`` directly (no LangChain stack),
|
|
620
|
+
this is the canonical capture point — pass the returned ``Message``
|
|
621
|
+
and the adapter pulls out the four token classes the cost-telemetry
|
|
622
|
+
pipeline cares about.
|
|
623
|
+
|
|
624
|
+
Parameters
|
|
625
|
+
----------
|
|
626
|
+
msg
|
|
627
|
+
Any object shaped like ``anthropic.types.Message`` (must expose
|
|
628
|
+
``model: str`` and ``usage`` with the four token-count attributes).
|
|
629
|
+
Not imported at runtime — the structural Protocol above is the
|
|
630
|
+
only contract.
|
|
631
|
+
model_name
|
|
632
|
+
Override for ``ModelMetadata.model_name``. Defaults to
|
|
633
|
+
``msg.model`` — set this when the SDK reports a different
|
|
634
|
+
identifier than the one cataloged in your price table (e.g.
|
|
635
|
+
dated suffixes, model aliases).
|
|
636
|
+
|
|
637
|
+
Returns
|
|
638
|
+
-------
|
|
639
|
+
ModelMetadata
|
|
640
|
+
With ``model_name`` populated, token counts from ``msg.usage``
|
|
641
|
+
(cache fields zero-defaulted when the SDK returns ``None``), and
|
|
642
|
+
``cost_usd=0.0``. Call :func:`recompute_cost` with a
|
|
643
|
+
:class:`PriceTable` to fill the cost.
|
|
644
|
+
|
|
645
|
+
Notes
|
|
646
|
+
-----
|
|
647
|
+
Server-tool request counts (``usage.server_tool_use.web_search_requests``
|
|
648
|
+
and ``.web_fetch_requests``) ARE captured into ``ModelMetadata``.
|
|
649
|
+
They are flat per-request fees, billed via :class:`ToolFee` rather
|
|
650
|
+
than the per-1M-token rates on :class:`PriceCard`. Pass a
|
|
651
|
+
:class:`ToolFeeTable` to :func:`recompute_cost` to price them.
|
|
652
|
+
"""
|
|
653
|
+
u = msg.usage
|
|
654
|
+
stu = getattr(u, "server_tool_use", None)
|
|
655
|
+
return ModelMetadata(
|
|
656
|
+
model_name=model_name if model_name is not None else msg.model,
|
|
657
|
+
input_tokens=u.input_tokens,
|
|
658
|
+
output_tokens=u.output_tokens,
|
|
659
|
+
cache_read_tokens=getattr(u, "cache_read_input_tokens", None) or 0,
|
|
660
|
+
cache_create_tokens=getattr(u, "cache_creation_input_tokens", None) or 0,
|
|
661
|
+
web_search_requests=(getattr(stu, "web_search_requests", 0) or 0)
|
|
662
|
+
if stu is not None else 0,
|
|
663
|
+
web_fetch_requests=(getattr(stu, "web_fetch_requests", 0) or 0)
|
|
664
|
+
if stu is not None else 0,
|
|
665
|
+
)
|