dispatch-relay 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ """dispatch-relay — the swarph's canonical provider-agnostic LLM layer.
2
+
3
+ Pure core + 3 injected seams. The T1 contract, AI²-converged
4
+ with the peer 2026-06-08 (pending peer co-review).
5
+
6
+ Exports the three injected-interface seams (each a ``runtime_checkable`` Protocol +
7
+ a dependency-light default impl), the shared value types, the core-owned provider
8
+ facts, and the pure cost model:
9
+
10
+ - ConfigSource / DefaultConfigSource — resolve(key, role, default) → model_id
11
+ - UsageSink / NoOpUsageSink — record(...) usage (separate cache fields)
12
+ - DispatchBackend / DefaultDispatchBackend — supports(...) + dispatch(...) → LLMResponse
13
+ - LLMResponse / UsageRecord — shared value types
14
+ - DEFAULTS / extract_usage / resolve_usage — core-owned provider facts
15
+ - estimate_cost — pure pre-call cost estimator
16
+ """
17
+ from __future__ import annotations
18
+
19
+ from .cost import estimate_cost
20
+ from .core import DEFAULTS, extract_usage, resolve_usage
21
+ from .interfaces import (
22
+ ConfigSource,
23
+ DefaultConfigSource,
24
+ UsageSink,
25
+ NoOpUsageSink,
26
+ DispatchBackend,
27
+ DefaultDispatchBackend,
28
+ LLMResponse,
29
+ UsageRecord,
30
+ )
31
+
32
+ __all__ = [
33
+ "ConfigSource",
34
+ "DefaultConfigSource",
35
+ "UsageSink",
36
+ "NoOpUsageSink",
37
+ "DispatchBackend",
38
+ "DefaultDispatchBackend",
39
+ "LLMResponse",
40
+ "UsageRecord",
41
+ "DEFAULTS",
42
+ "extract_usage",
43
+ "resolve_usage",
44
+ "estimate_cost",
45
+ ]
46
+
47
+ __version__ = "0.0.1"
@@ -0,0 +1,191 @@
1
+ """Token-usage analytics — pure aggregation over in-memory usage records.
2
+
3
+ This is the INVERTED form of the host's old fetch-then-aggregate helper: the
4
+ library owns the AGGREGATION LOGIC and takes the usage records AS INPUT; it does
5
+ NOT fetch. The host keeps a thin wrapper that reads its store (a usage-table
6
+ hypertable, etc.) and delegates the rows here. Inverting the dependency keeps
7
+ this module pure-stdlib + zero-dep so it lives in the dependency-light core
8
+ without re-opening the locked T1 seam contract (no 4th read-seam).
9
+
10
+ Each record is a mapping with the keys this module reads:
11
+ ``provider`` / ``role`` / ``caller`` / ``model`` / ``day`` and the integer token
12
+ columns ``input`` / ``output`` / ``cached`` / ``thought`` plus a float ``cost``.
13
+
14
+ Schema-asymmetry note (load-bearing for the per-provider arithmetic in
15
+ ``_row_total_tokens`` below):
16
+
17
+ - Anthropic: ``input`` is the FRESH remainder (``lc_input - cache_read -
18
+ cache_create``) and ``cached`` is SEPARATE. The two columns are disjoint.
19
+ True prompt size = ``input + cached``.
20
+ - Gemini: ``input`` = ``prompt_token_count`` (the FULL prompt) with
21
+ ``cached`` as a SUBSET. True prompt size = ``input`` (``cached`` is
22
+ informational, already counted).
23
+ - OpenAI: ``input`` = ``prompt_tokens`` (FULL) with ``cached`` a subset.
24
+ Same convention as Gemini.
25
+ - ``thought`` carries reasoning tokens (Gemini Pro thinking mode, o1-style
26
+ models) — additive for total prompt cost regardless of provider, currently
27
+ 0 for non-reasoning calls.
28
+
29
+ Summing ``input + output`` everywhere undercounts Anthropic by the cache_read
30
+ amount; summing ``input + output + cached`` everywhere double-counts
31
+ Gemini/OpenAI by the same. The fix is per-provider arithmetic.
32
+ """
33
+ from __future__ import annotations
34
+
35
+ from collections import defaultdict
36
+ from typing import Optional
37
+
38
+
39
+ # Providers where the ``input`` column ALREADY includes cached tokens
40
+ # (cached is a subset, not a separate bucket). For these, summing
41
+ # ``input + cached`` would double-count.
42
+ _INPUT_INCLUDES_CACHED = {"gemini", "openai"}
43
+
44
+
45
+ def _row_total_tokens(provider: str, in_tok: int, out_tok: int,
46
+ cached: int, thought: int) -> int:
47
+ """True total prompt+output+thinking tokens for a row, accounting for the
48
+ Anthropic-vs-Gemini/OpenAI schema asymmetry documented at module top."""
49
+ if provider in _INPUT_INCLUDES_CACHED:
50
+ return in_tok + out_tok + thought
51
+ # Anthropic (and any provider that stores cached disjoint from input)
52
+ return in_tok + out_tok + cached + thought
53
+
54
+
55
+ def summarize_usage(records, *, days: int = 7, role: Optional[str] = None,
56
+ provider: Optional[str] = None,
57
+ caller: Optional[str] = None) -> dict:
58
+ """Aggregate already-fetched usage ``records`` into rollup buckets.
59
+
60
+ The ``days`` / ``role`` / ``provider`` / ``caller`` args are echoed into the
61
+ result as metadata (the host applied them as fetch filters); this function
62
+ aggregates whatever rows it is given.
63
+
64
+ caller bucketing: a ``by_caller`` bucket is always produced so callers can
65
+ rank workers by spend; NULL caller buckets as ``"<unattributed>"`` so the
66
+ JSON key is stable for dashboards (no null-handling on the consumer side).
67
+
68
+ Returns:
69
+ {
70
+ "days": int,
71
+ "role": str | None,
72
+ "provider": str | None,
73
+ "caller": str | None,
74
+ "total_cost_usd": float,
75
+ "total_tokens": int, # provider-aware sum, see module docstring
76
+ "by_provider": {provider: {input, output, cached, thought, cost, n_rows}},
77
+ "by_model": [{model, provider, role, caller, ...}, …],
78
+ "by_role": {role: {cost, tokens}},
79
+ "by_caller": {caller_or_"<unattributed>": {cost, tokens, n_rows}},
80
+ }
81
+
82
+ All values default to 0 on empty input so dashboards / callers don't need to
83
+ guard against None.
84
+ """
85
+ by_provider: dict[str, dict] = defaultdict(
86
+ lambda: {"input": 0, "output": 0, "cached": 0, "thought": 0,
87
+ "cost": 0.0, "n_rows": 0}
88
+ )
89
+ by_role: dict[str, dict] = defaultdict(lambda: {"cost": 0.0, "tokens": 0})
90
+ # NULL caller → bucket as "<unattributed>" so the JSON key is stable
91
+ # for dashboards (avoids null-handling on the consumer side).
92
+ by_caller: dict[str, dict] = defaultdict(
93
+ lambda: {"cost": 0.0, "tokens": 0, "n_rows": 0}
94
+ )
95
+ by_model: list[dict] = []
96
+ total_cost = 0.0
97
+ total_tokens = 0
98
+
99
+ for r in records:
100
+ prov = r.get("provider") or "unknown"
101
+ rrole = r.get("role") or "agents"
102
+ rcaller = r.get("caller") or "<unattributed>"
103
+ cost = float(r.get("cost") or 0.0)
104
+ in_tok = int(r.get("input") or 0)
105
+ out_tok = int(r.get("output") or 0)
106
+ cached = int(r.get("cached") or 0)
107
+ thought = int(r.get("thought") or 0)
108
+ row_total = _row_total_tokens(prov, in_tok, out_tok, cached, thought)
109
+
110
+ by_provider[prov]["input"] += in_tok
111
+ by_provider[prov]["output"] += out_tok
112
+ by_provider[prov]["cached"] += cached
113
+ by_provider[prov]["thought"] += thought
114
+ by_provider[prov]["cost"] += cost
115
+ by_provider[prov]["n_rows"] += 1
116
+
117
+ by_role[rrole]["cost"] += cost
118
+ by_role[rrole]["tokens"] += row_total
119
+
120
+ by_caller[rcaller]["cost"] += cost
121
+ by_caller[rcaller]["tokens"] += row_total
122
+ by_caller[rcaller]["n_rows"] += 1
123
+
124
+ by_model.append({
125
+ "model": r.get("model"),
126
+ "provider": prov,
127
+ "role": rrole,
128
+ "caller": r.get("caller"), # raw NULL preserved on per-row records
129
+ "day": r.get("day"),
130
+ "input": in_tok,
131
+ "output": out_tok,
132
+ "cached": cached,
133
+ "thought": thought,
134
+ "cost": cost,
135
+ })
136
+
137
+ total_cost += cost
138
+ total_tokens += row_total
139
+
140
+ by_model.sort(key=lambda m: -m["cost"])
141
+
142
+ return {
143
+ "days": days,
144
+ "role": role,
145
+ "provider": provider,
146
+ "caller": caller,
147
+ "total_cost_usd": round(total_cost, 6),
148
+ "total_tokens": total_tokens,
149
+ "by_provider": {k: dict(v) for k, v in by_provider.items()},
150
+ "by_model": by_model,
151
+ "by_role": {k: dict(v) for k, v in by_role.items()},
152
+ "by_caller": {k: dict(v) for k, v in by_caller.items()},
153
+ }
154
+
155
+
156
+ def detect_anomalies(records, *, spike_factor: float = 2.0) -> list[dict]:
157
+ """Flag (model, day) cells whose cost > spike_factor × baseline avg.
158
+
159
+ Baseline is the mean per-day cost for that model over the supplied records
160
+ EXCLUDING the day being checked. Returns rows in descending cost order.
161
+ Empty list when ``records`` is empty.
162
+ """
163
+ if not records:
164
+ return []
165
+
166
+ # Group by model -> [(day, cost), …]
167
+ series: dict[str, list[tuple]] = defaultdict(list)
168
+ for r in records:
169
+ model = r.get("model") or "unknown"
170
+ cost = float(r.get("cost") or 0.0)
171
+ series[model].append((r.get("day"), cost))
172
+
173
+ spikes = []
174
+ for model, points in series.items():
175
+ if len(points) < 3:
176
+ continue
177
+ for day, cost in points:
178
+ others = [c for d, c in points if d != day]
179
+ if not others:
180
+ continue
181
+ baseline = sum(others) / len(others)
182
+ if baseline > 0 and cost >= spike_factor * baseline:
183
+ spikes.append({
184
+ "model": model,
185
+ "day": day,
186
+ "cost": round(cost, 6),
187
+ "baseline_avg": round(baseline, 6),
188
+ "factor": round(cost / baseline, 2),
189
+ })
190
+ spikes.sort(key=lambda s: -s["cost"])
191
+ return spikes
@@ -0,0 +1,168 @@
1
+ """Anthropic prompt-caching helper — wrap any LangChain-compatible LLM.
2
+
3
+ Wraps any LangChain-compatible LLM (typically a :class:`dispatch_relay.facade._BoundLLM`)
4
+ so every ``.invoke(messages)`` call prepends a SystemMessage carrying
5
+ ``cache_control: {"type": "ephemeral", "ttl": ttl}`` in the correct
6
+ list-of-blocks shape.
7
+
8
+ Why list-of-blocks: ``langchain_anthropic`` SILENTLY DROPS the
9
+ ``additional_kwargs={"cache_control": ...}`` shape. The only shape that
10
+ propagates to the wire is::
11
+
12
+ SystemMessage(content=[{"type": "text", "text": ...,
13
+ "cache_control": {"type": "ephemeral", "ttl": "1h"}}])
14
+
15
+ For non-Anthropic LLMs the SystemMessage is sent without cache_control (Gemini's
16
+ implicit caching handles long stable prefixes automatically; OpenAI auto-caches
17
+ prompt prefixes ≥1024 tokens at 50% off input price).
18
+
19
+ This module lives in the ``[facade]`` extra (it needs ``langchain_core``), but the
20
+ langchain import is LAZY (inside :func:`build_cached_system_message`) so importing
21
+ the module is cheap and the zero-dep core stays importable without it.
22
+ """
23
+ from __future__ import annotations
24
+
25
+ from typing import Any, Optional
26
+
27
+
28
+ def build_cached_system_message(text: str, ttl: str, is_anthropic: bool):
29
+ """Build a SystemMessage with the right shape for the provider.
30
+
31
+ For Anthropic: returns the list-of-blocks shape that langchain_anthropic
32
+ actually propagates to the wire. For non-Anthropic providers: plain string
33
+ content (cache_control would be ignored anyway).
34
+ """
35
+ from langchain_core.messages import SystemMessage
36
+ if is_anthropic:
37
+ return SystemMessage(content=[{
38
+ "type": "text",
39
+ "text": text,
40
+ "cache_control": {"type": "ephemeral", "ttl": ttl},
41
+ }])
42
+ return SystemMessage(content=text)
43
+
44
+
45
+ # Backward-compat alias for the private name (callers should use the public one).
46
+ _build_cached_system_message = build_cached_system_message
47
+
48
+
49
+ def _detect_anthropic(inner: Any) -> bool:
50
+ """True if the given LLM is an Anthropic model.
51
+
52
+ A wrapper exposing ``_provider`` is trusted; raw ``ChatAnthropic`` is detected
53
+ by class-name sniff. Unknown wrappers default to False — safer to silently
54
+ disable cache_control than to inject it on a non-Anthropic wire.
55
+ """
56
+ provider = getattr(inner, "_provider", None)
57
+ if provider is None:
58
+ provider = "anthropic" if "Anthropic" in type(inner).__name__ else ""
59
+ return provider == "anthropic"
60
+
61
+
62
+ class _CacheableLLM:
63
+ """Proxy over a LangChain LLM that prepends a cached SystemMessage on invoke.
64
+
65
+ Delegates ``bind_tools`` / ``with_structured_output`` to the underlying LLM and
66
+ re-wraps the result so the cached SystemMessage is preserved across chained
67
+ calls (matches LangChain's chainable contract).
68
+
69
+ The provider flag is captured ONCE at the outermost wrap and threaded through
70
+ every chained re-wrap. Re-sniffing on a chained inner is unsafe — a structured-
71
+ output proxy may have no ``_provider`` attribute and a class name without
72
+ "Anthropic", so a re-sniff would silently flip ``_is_anthropic`` to False and
73
+ drop cache_control from the wire.
74
+ """
75
+
76
+ def __init__(self, inner: Any, cached_text: str, ttl: str = "1h",
77
+ *, is_anthropic: Optional[bool] = None):
78
+ self._inner = inner
79
+ self._cached_text = cached_text
80
+ self._ttl = ttl
81
+ self._is_anthropic = (
82
+ _detect_anthropic(inner) if is_anthropic is None else is_anthropic
83
+ )
84
+
85
+ def _prepend(self, messages):
86
+ sysmsg = build_cached_system_message(
87
+ self._cached_text, self._ttl, self._is_anthropic,
88
+ )
89
+ if isinstance(messages, list):
90
+ return [sysmsg, *messages]
91
+ return [sysmsg, messages]
92
+
93
+ def invoke(self, messages, *args, **kwargs):
94
+ return self._inner.invoke(self._prepend(messages), *args, **kwargs)
95
+
96
+ async def ainvoke(self, messages, *args, **kwargs):
97
+ return await self._inner.ainvoke(self._prepend(messages), *args, **kwargs)
98
+
99
+ def stream(self, messages, *args, **kwargs):
100
+ return self._inner.stream(self._prepend(messages), *args, **kwargs)
101
+
102
+ def bind_tools(self, *args, **kwargs):
103
+ bound = self._inner.bind_tools(*args, **kwargs)
104
+ return _CacheableLLM(bound, self._cached_text, self._ttl,
105
+ is_anthropic=self._is_anthropic)
106
+
107
+ def with_structured_output(self, *args, **kwargs):
108
+ so = self._inner.with_structured_output(*args, **kwargs)
109
+ return _CacheableLLM(so, self._cached_text, self._ttl,
110
+ is_anthropic=self._is_anthropic)
111
+
112
+ def __getattr__(self, name):
113
+ return getattr(self._inner, name)
114
+
115
+
116
+ def with_cache(llm: Any, cached_text: str, ttl: str = "1h") -> _CacheableLLM:
117
+ """Wrap an LLM so every ``.invoke()`` prepends a cached SystemMessage.
118
+
119
+ Args:
120
+ llm: any LangChain-compatible LLM (typically a ``_BoundLLM``).
121
+ cached_text: the long prefix to cache (e.g. a playbook / master prompt).
122
+ ttl: Anthropic ephemeral cache TTL — ``"5m"`` or ``"1h"``. Ignored for
123
+ non-Anthropic providers (the SystemMessage is still prepended, just
124
+ without the cache_control marker).
125
+
126
+ Returns:
127
+ A :class:`_CacheableLLM` proxy. Chainable via ``.bind_tools()`` and
128
+ ``.with_structured_output()``.
129
+ """
130
+ return _CacheableLLM(llm, cached_text, ttl=ttl)
131
+
132
+
133
+ # Module-level default TTL — settable by the Relay façade so
134
+ # `relay(cache_ttl_default="5m").claude().with_cache(text)` honors the TTL (the
135
+ # attached `.with_cache` method has no reference back to the Relay that made it).
136
+ DEFAULT_CACHE_TTL = "1h"
137
+
138
+
139
+ def set_default_cache_ttl(ttl: str) -> None:
140
+ """Set the module-wide default TTL used by the attached ``.with_cache`` when
141
+ no explicit ``ttl=`` is passed. Called by ``Relay.__post_init__`` so
142
+ per-façade defaults stay in sync."""
143
+ global DEFAULT_CACHE_TTL
144
+ DEFAULT_CACHE_TTL = ttl
145
+
146
+
147
+ def _attach_with_cache_method():
148
+ """Attach ``with_cache`` as a bound method on ``_BoundLLM``.
149
+
150
+ Idempotent — safe to call multiple times. Attached at import so any LLM
151
+ produced via ``relay(...).gemini()`` etc. exposes ``.with_cache(text)`` as if
152
+ native, without forcing callers to import :func:`with_cache`.
153
+ """
154
+ try:
155
+ from dispatch_relay.facade import _BoundLLM
156
+ except ImportError:
157
+ return
158
+
159
+ if "with_cache" in _BoundLLM.__dict__:
160
+ return
161
+
162
+ def _method(self, cached_text: str, ttl: Optional[str] = None):
163
+ return with_cache(self, cached_text, ttl=ttl or DEFAULT_CACHE_TTL)
164
+
165
+ _BoundLLM.with_cache = _method # type: ignore[attr-defined]
166
+
167
+
168
+ _attach_with_cache_method()
dispatch_relay/core.py ADDED
@@ -0,0 +1,138 @@
1
+ """Core-owned provider facts — the DEFAULTS table + usage extraction.
2
+
3
+ These are provider-facts that must live in exactly ONE place (never duplicated
4
+ per backend or per config source):
5
+
6
+ - :data:`DEFAULTS` — the 7-key abstract-key → model-id table. The core passes
7
+ ``default=DEFAULTS[key]`` into :meth:`ConfigSource.resolve`.
8
+ - :func:`extract_usage` — the single place that knows each provider's
9
+ usage-from-raw shape, including the Anthropic dual-path (the Session-19 surface).
10
+ - :func:`resolve_usage` — the locked reconciliation rule between a backend's
11
+ optional pre-populated ``LLMResponse.usage`` and core extraction.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import dataclasses
16
+ from typing import Any
17
+
18
+ from .interfaces import LLMResponse, UsageRecord
19
+
20
+ # =====================================================================
21
+ # DEFAULTS — abstract model key → concrete model id (the 7-key table)
22
+ # =====================================================================
23
+ # Moved here verbatim from the old DefaultConfigSource.DEFAULTS. The core owns
24
+ # this provider-fact table and passes default=DEFAULTS[key] into resolve().
25
+ DEFAULTS: dict[str, str] = {
26
+ "gemini_flash": "gemini-2.5-flash",
27
+ "gemini_flash_lite": "gemini-2.5-flash-lite",
28
+ "gemini_pro": "gemini-2.5-pro",
29
+ "gemini_deep_research": "deep-research-pro-preview-12-2025",
30
+ "claude_sonnet": "claude-sonnet-4-6",
31
+ "claude_opus": "claude-opus-4-6",
32
+ "claude_haiku": "claude-haiku-4-5-20251001",
33
+ }
34
+
35
+
36
+ def _get(obj: Any, key: str, default: Any = None) -> Any:
37
+ """Read ``key`` from ``obj`` attribute-style OR dict-style.
38
+
39
+ Works on LangChain ``AIMessage``-like objects (attributes) AND plain dicts
40
+ (canned-dict test fixtures) uniformly.
41
+ """
42
+ if obj is None:
43
+ return default
44
+ if isinstance(obj, dict):
45
+ return obj.get(key, default)
46
+ return getattr(obj, key, default)
47
+
48
+
49
+ def extract_usage(provider: str, raw: Any) -> UsageRecord | None:
50
+ """Extract a :class:`UsageRecord` from a provider's raw response.
51
+
52
+ The single place that knows each provider's usage-from-raw shape (provider-fact
53
+ → core, never duplicated per backend).
54
+
55
+ Anthropic DUAL-PATH (the Session-19 surface):
56
+ * PREFER ``raw.response_metadata["usage"]`` — the UNCACHED remainder, with
57
+ ``input_tokens`` / ``output_tokens`` / ``cache_read_input_tokens`` /
58
+ ``cache_creation_input_tokens``.
59
+ * FALL BACK to ``raw.usage_metadata`` (the LangChain shape:
60
+ ``input_tokens`` / ``output_tokens`` + ``input_token_details.cache_read`` /
61
+ ``cache_creation``) only if ``response_metadata.usage`` is absent.
62
+ Using the wrong one double-counts.
63
+
64
+ Non-Anthropic (gemini / openai): read ``raw.usage_metadata``
65
+ (``input_tokens`` / ``output_tokens``, ``cache_read`` from
66
+ ``input_token_details`` if present).
67
+
68
+ The model name is read from ``raw.response_metadata["model_name"]`` (both
69
+ Anthropic and Gemini surface it there — a real LangChain ``AIMessage`` has NO
70
+ top-level ``.model`` attribute), falling back to ``""`` if absent.
71
+
72
+ Returns ``None`` if no usage metadata is present (e.g. a subscription raw that
73
+ is a bare string).
74
+ """
75
+ rmd = _get(raw, "response_metadata")
76
+ model = (_get(rmd, "model_name", "") if rmd is not None else "") or ""
77
+
78
+ if provider == "anthropic":
79
+ rmd_usage = _get(rmd, "usage") if rmd is not None else None
80
+ if rmd_usage is not None:
81
+ return UsageRecord(
82
+ input_tokens=int(_get(rmd_usage, "input_tokens", 0) or 0),
83
+ output_tokens=int(_get(rmd_usage, "output_tokens", 0) or 0),
84
+ cache_read=int(_get(rmd_usage, "cache_read_input_tokens", 0) or 0),
85
+ cache_creation=int(
86
+ _get(rmd_usage, "cache_creation_input_tokens", 0) or 0
87
+ ),
88
+ model=model,
89
+ )
90
+ # Fall back to the LangChain usage_metadata shape.
91
+ um = _get(raw, "usage_metadata")
92
+ if um is not None:
93
+ details = _get(um, "input_token_details") or {}
94
+ return UsageRecord(
95
+ input_tokens=int(_get(um, "input_tokens", 0) or 0),
96
+ output_tokens=int(_get(um, "output_tokens", 0) or 0),
97
+ cache_read=int(_get(details, "cache_read", 0) or 0),
98
+ cache_creation=int(_get(details, "cache_creation", 0) or 0),
99
+ model=model,
100
+ )
101
+ return None
102
+
103
+ # gemini / openai (and any other non-anthropic provider)
104
+ um = _get(raw, "usage_metadata")
105
+ if um is not None:
106
+ details = _get(um, "input_token_details") or {}
107
+ return UsageRecord(
108
+ input_tokens=int(_get(um, "input_tokens", 0) or 0),
109
+ output_tokens=int(_get(um, "output_tokens", 0) or 0),
110
+ cache_read=int(_get(details, "cache_read", 0) or 0),
111
+ cache_creation=int(_get(details, "cache_creation", 0) or 0),
112
+ model=model,
113
+ )
114
+ return None
115
+
116
+
117
+ def resolve_usage(
118
+ response: LLMResponse, provider: str, model: str
119
+ ) -> UsageRecord | None:
120
+ """The LOCKED reconciliation rule.
121
+
122
+ A backend MAY pre-populate ``response.usage`` (a real escape hatch); otherwise
123
+ the core extracts it from ``response.raw``.
124
+
125
+ The dispatch call KNOWS the configured model (it's the ``model`` argument), so
126
+ the dispatch-arg ``model`` is authoritative: once a record is resolved, its
127
+ ``model`` field is stamped with this argument — the configured model always
128
+ wins over whatever the raw echoed (or didn't). Returns ``None`` unchanged when
129
+ there is no usage (the subscription lane).
130
+ """
131
+ record = (
132
+ response.usage
133
+ if response.usage is not None
134
+ else extract_usage(provider, response.raw)
135
+ )
136
+ if record is None:
137
+ return None
138
+ return dataclasses.replace(record, model=model)