powerailabs-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- powerailabs_core-0.1.0/.gitignore +16 -0
- powerailabs_core-0.1.0/PKG-INFO +51 -0
- powerailabs_core-0.1.0/README.md +33 -0
- powerailabs_core-0.1.0/pyproject.toml +22 -0
- powerailabs_core-0.1.0/src/powerailabs/core/__init__.py +21 -0
- powerailabs_core-0.1.0/src/powerailabs/core/bus.py +27 -0
- powerailabs_core-0.1.0/src/powerailabs/core/instrument.py +246 -0
- powerailabs_core-0.1.0/src/powerailabs/core/otel.py +36 -0
- powerailabs_core-0.1.0/src/powerailabs/core/prices.json +12 -0
- powerailabs_core-0.1.0/src/powerailabs/core/prices.py +117 -0
- powerailabs_core-0.1.0/src/powerailabs/core/protocols.py +63 -0
- powerailabs_core-0.1.0/src/powerailabs/core/py.typed +0 -0
- powerailabs_core-0.1.0/src/powerailabs/core/tokens.py +102 -0
- powerailabs_core-0.1.0/src/powerailabs/core/types.py +117 -0
- powerailabs_core-0.1.0/tests/test_bus.py +9 -0
- powerailabs_core-0.1.0/tests/test_instrument.py +89 -0
- powerailabs_core-0.1.0/tests/test_otel.py +9 -0
- powerailabs_core-0.1.0/tests/test_prices.py +72 -0
- powerailabs_core-0.1.0/tests/test_protocols.py +27 -0
- powerailabs_core-0.1.0/tests/test_tokens.py +49 -0
- powerailabs_core-0.1.0/tests/test_tools_and_replay.py +79 -0
- powerailabs_core-0.1.0/tests/test_types.py +42 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: powerailabs-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Foundation for the PowerAI Labs stack: shared types, token counting, prices, instrument(), event bus, OTel.
|
|
5
|
+
Author: Raghav Mishra
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Provides-Extra: anthropic
|
|
9
|
+
Requires-Dist: anthropic>=0.30; extra == 'anthropic'
|
|
10
|
+
Provides-Extra: openai
|
|
11
|
+
Requires-Dist: openai>=1.0; extra == 'openai'
|
|
12
|
+
Provides-Extra: otel
|
|
13
|
+
Requires-Dist: opentelemetry-api>=1.25; extra == 'otel'
|
|
14
|
+
Requires-Dist: opentelemetry-sdk>=1.25; extra == 'otel'
|
|
15
|
+
Provides-Extra: tiktoken
|
|
16
|
+
Requires-Dist: tiktoken>=0.7; extra == 'tiktoken'
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# powerailabs-core
|
|
20
|
+
|
|
21
|
+
The shared foundation for the PowerAI Labs stack: canonical types, provider-aware token
|
|
22
|
+
counting, an offline price table, one `instrument()` interception point, an in-process event
|
|
23
|
+
bus, and OpenTelemetry GenAI emitters. Tiny on purpose — it's the blast radius for every other tool.
|
|
24
|
+
|
|
25
|
+
**One `instrument()` call, every sibling tool observes the stream — no per-call wiring, offline by default.**
|
|
26
|
+
|
|
27
|
+
 
|
|
28
|
+
|
|
29
|
+
🚧 building · usually installed transitively · `import powerailabs.core`
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from powerailabs.core import tokens, prices, instrument, bus
|
|
33
|
+
|
|
34
|
+
# Count tokens and price a call — fully offline, no API key, no network:
|
|
35
|
+
n = tokens.count([{"role": "user", "content": "Summarize the attached report in 3 bullets."}],
|
|
36
|
+
model="claude-opus-4-8")
|
|
37
|
+
cost = prices.estimate("claude-opus-4-8", input_tokens=n, output_tokens=200)
|
|
38
|
+
print(n, cost) # -> 20 0.0051000 USD
|
|
39
|
+
|
|
40
|
+
# Instrument any client once; tools subscribe to the normalized event stream:
|
|
41
|
+
@bus.subscribe
|
|
42
|
+
def on_call(call): # receives a normalized LLMCall with usage + cost
|
|
43
|
+
print(call.provider, call.model, call.cost)
|
|
44
|
+
|
|
45
|
+
client = instrument(openai_or_anthropic_client) # idempotent, additive, sync + async
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Install `powerailabs-core[tiktoken]` for exact OpenAI token counts (a documented heuristic is
|
|
49
|
+
used otherwise), or `[otel]` to emit `gen_ai.*` spans. Provider SDKs are always optional extras.
|
|
50
|
+
|
|
51
|
+
See [`docs/core.md`](../../docs/core.md). *Part of the PowerAI Labs stack — github.com/PowerAI-Labs/powerailabs.*
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# powerailabs-core
|
|
2
|
+
|
|
3
|
+
The shared foundation for the PowerAI Labs stack: canonical types, provider-aware token
|
|
4
|
+
counting, an offline price table, one `instrument()` interception point, an in-process event
|
|
5
|
+
bus, and OpenTelemetry GenAI emitters. Tiny on purpose — it's the blast radius for every other tool.
|
|
6
|
+
|
|
7
|
+
**One `instrument()` call, every sibling tool observes the stream — no per-call wiring, offline by default.**
|
|
8
|
+
|
|
9
|
+
 
|
|
10
|
+
|
|
11
|
+
🚧 building · usually installed transitively · `import powerailabs.core`
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from powerailabs.core import tokens, prices, instrument, bus
|
|
15
|
+
|
|
16
|
+
# Count tokens and price a call — fully offline, no API key, no network:
|
|
17
|
+
n = tokens.count([{"role": "user", "content": "Summarize the attached report in 3 bullets."}],
|
|
18
|
+
model="claude-opus-4-8")
|
|
19
|
+
cost = prices.estimate("claude-opus-4-8", input_tokens=n, output_tokens=200)
|
|
20
|
+
print(n, cost) # -> 20 0.0051000 USD
|
|
21
|
+
|
|
22
|
+
# Instrument any client once; tools subscribe to the normalized event stream:
|
|
23
|
+
@bus.subscribe
|
|
24
|
+
def on_call(call): # receives a normalized LLMCall with usage + cost
|
|
25
|
+
print(call.provider, call.model, call.cost)
|
|
26
|
+
|
|
27
|
+
client = instrument(openai_or_anthropic_client) # idempotent, additive, sync + async
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Install `powerailabs-core[tiktoken]` for exact OpenAI token counts (a documented heuristic is
|
|
31
|
+
used otherwise), or `[otel]` to emit `gen_ai.*` spans. Provider SDKs are always optional extras.
|
|
32
|
+
|
|
33
|
+
See [`docs/core.md`](../../docs/core.md). *Part of the PowerAI Labs stack — github.com/PowerAI-Labs/powerailabs.*
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "powerailabs-core"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Foundation for the PowerAI Labs stack: shared types, token counting, prices, instrument(), event bus, OTel."
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
authors = [{ name = "Raghav Mishra" }]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
dependencies = []
|
|
10
|
+
|
|
11
|
+
[project.optional-dependencies]
|
|
12
|
+
openai = ["openai>=1.0"]
|
|
13
|
+
anthropic = ["anthropic>=0.30"]
|
|
14
|
+
otel = ["opentelemetry-api>=1.25", "opentelemetry-sdk>=1.25"]
|
|
15
|
+
tiktoken = ["tiktoken>=0.7"] # optional: exact OpenAI token counts (heuristic fallback otherwise)
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["hatchling"]
|
|
19
|
+
build-backend = "hatchling.build"
|
|
20
|
+
|
|
21
|
+
[tool.hatch.build.targets.wheel]
|
|
22
|
+
packages = ["src/powerailabs"] # contributes powerailabs/core only — NEVER add src/powerailabs/__init__.py
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""powerailabs.core — the shared foundation. Keep this public surface small and stable."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from . import bus, otel, prices, protocols, tokens
|
|
6
|
+
from .instrument import instrument, instrument_tool
|
|
7
|
+
from .types import LLMCall, Money, ToolCall, Usage
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"LLMCall",
|
|
11
|
+
"ToolCall",
|
|
12
|
+
"Usage",
|
|
13
|
+
"Money",
|
|
14
|
+
"bus",
|
|
15
|
+
"tokens",
|
|
16
|
+
"prices",
|
|
17
|
+
"otel",
|
|
18
|
+
"protocols",
|
|
19
|
+
"instrument",
|
|
20
|
+
"instrument_tool",
|
|
21
|
+
]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""In-process pub/sub event bus: one instrument() emits, many tools subscribe. docs/core.md §6."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
_subscribers: list[Callable[[Any], None]] = []
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def subscribe(fn: Callable[[Any], None]) -> Callable[[Any], None]:
|
|
12
|
+
"""Register a subscriber. Usable as a decorator. Idempotent: re-registering the
|
|
13
|
+
same callable is a no-op, so a sibling tool can safely ensure its subscription."""
|
|
14
|
+
if fn not in _subscribers:
|
|
15
|
+
_subscribers.append(fn)
|
|
16
|
+
return fn
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def emit(event: Any) -> None:
|
|
20
|
+
"""Publish an event to all subscribers (synchronous)."""
|
|
21
|
+
for fn in list(_subscribers):
|
|
22
|
+
fn(event)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _reset() -> None:
|
|
26
|
+
"""Test helper: clear all subscribers."""
|
|
27
|
+
_subscribers.clear()
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""Single interception point: wrap a provider client (or tool) once; emit normalized events.
|
|
2
|
+
|
|
3
|
+
docs/core.md §6. Idempotent (re-wrapping is a no-op) and additive (coexists with other
|
|
4
|
+
instrumentation like OpenLLMetry). Supports sync and async. Uses duck typing — the provider
|
|
5
|
+
SDKs are never imported here, so they stay optional.
|
|
6
|
+
|
|
7
|
+
Two cooperation hooks (used by ``cassette``; harmless otherwise):
|
|
8
|
+
* **record** — the raw provider response is attached at ``call.metadata["response"]`` before
|
|
9
|
+
the event is emitted, so a subscriber can persist it.
|
|
10
|
+
* **replay** — registered *interceptors* run *before* the real call; one may return a response
|
|
11
|
+
to short-circuit it (returning :data:`MISS` to decline). This is how record/replay avoids a
|
|
12
|
+
second instrumentation point: tools cooperate through ``core``, they never patch the client.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import functools
|
|
18
|
+
import inspect
|
|
19
|
+
import time
|
|
20
|
+
import uuid
|
|
21
|
+
from collections.abc import Callable
|
|
22
|
+
from datetime import UTC, datetime
|
|
23
|
+
from typing import Any, TypeVar
|
|
24
|
+
|
|
25
|
+
from . import bus, prices
|
|
26
|
+
from .types import LLMCall, ToolCall, Usage
|
|
27
|
+
|
|
28
|
+
T = TypeVar("T")
|
|
29
|
+
|
|
30
|
+
_WRAPPED = "_powerailabs_wrapped"
|
|
31
|
+
|
|
32
|
+
#: Sentinel an interceptor returns to decline a call (let it proceed normally). A recorded
|
|
33
|
+
#: response may legitimately be ``None``, so "no replay" needs its own distinct value.
|
|
34
|
+
MISS: Any = object()
|
|
35
|
+
|
|
36
|
+
_interceptors: list[Callable[[Any], Any]] = []
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def add_interceptor(fn: Callable[[Any], Any]) -> Callable[[Any], Any]:
|
|
40
|
+
"""Register a pre-call interceptor. It receives the event (``LLMCall``/``ToolCall``) and
|
|
41
|
+
returns a response to short-circuit the real call, or :data:`MISS` to proceed. Idempotent."""
|
|
42
|
+
if fn not in _interceptors:
|
|
43
|
+
_interceptors.append(fn)
|
|
44
|
+
return fn
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def remove_interceptor(fn: Callable[[Any], Any]) -> None:
|
|
48
|
+
"""Unregister a previously added interceptor (no error if absent)."""
|
|
49
|
+
if fn in _interceptors:
|
|
50
|
+
_interceptors.remove(fn)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _intercept(event: Any) -> Any:
|
|
54
|
+
for fn in list(_interceptors):
|
|
55
|
+
result = fn(event)
|
|
56
|
+
if result is not MISS:
|
|
57
|
+
return result
|
|
58
|
+
return MISS
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# --------------------------------------------------------------------------- model clients
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def instrument(client: T) -> T:
|
|
65
|
+
"""Wrap an OpenAI- or Anthropic-shaped client so each call emits an ``LLMCall`` on the bus.
|
|
66
|
+
|
|
67
|
+
Detection is structural: an object exposing ``chat.completions.create`` is treated as
|
|
68
|
+
OpenAI-style; one exposing ``messages.create`` as Anthropic-style. Unknown clients are
|
|
69
|
+
returned untouched. Wrapping is idempotent and returns the same client object.
|
|
70
|
+
"""
|
|
71
|
+
target = _find_target(client)
|
|
72
|
+
if target is None:
|
|
73
|
+
return client
|
|
74
|
+
owner, attr, provider = target
|
|
75
|
+
fn = getattr(owner, attr)
|
|
76
|
+
if getattr(fn, _WRAPPED, False):
|
|
77
|
+
return client # already instrumented — no double-wrap
|
|
78
|
+
setattr(owner, attr, _wrap(fn, provider))
|
|
79
|
+
return client
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _find_target(client: Any) -> tuple[Any, str, str] | None:
|
|
83
|
+
chat = getattr(client, "chat", None)
|
|
84
|
+
completions = getattr(chat, "completions", None) if chat is not None else None
|
|
85
|
+
if completions is not None and callable(getattr(completions, "create", None)):
|
|
86
|
+
return completions, "create", "openai"
|
|
87
|
+
messages = getattr(client, "messages", None)
|
|
88
|
+
if messages is not None and callable(getattr(messages, "create", None)):
|
|
89
|
+
return messages, "create", "anthropic"
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _wrap(fn: Any, provider: str) -> Any:
|
|
94
|
+
if inspect.iscoroutinefunction(fn):
|
|
95
|
+
|
|
96
|
+
@functools.wraps(fn)
|
|
97
|
+
async def awrapper(*args: Any, **kwargs: Any) -> Any:
|
|
98
|
+
call, start = _pre(provider, kwargs)
|
|
99
|
+
replayed = _intercept(call)
|
|
100
|
+
if replayed is not MISS:
|
|
101
|
+
call.metadata["replayed"] = True
|
|
102
|
+
response = replayed
|
|
103
|
+
else:
|
|
104
|
+
response = await fn(*args, **kwargs)
|
|
105
|
+
_post(call, response, provider, start)
|
|
106
|
+
return response
|
|
107
|
+
|
|
108
|
+
setattr(awrapper, _WRAPPED, True)
|
|
109
|
+
return awrapper
|
|
110
|
+
|
|
111
|
+
@functools.wraps(fn)
|
|
112
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
113
|
+
call, start = _pre(provider, kwargs)
|
|
114
|
+
replayed = _intercept(call)
|
|
115
|
+
if replayed is not MISS:
|
|
116
|
+
call.metadata["replayed"] = True
|
|
117
|
+
response = replayed
|
|
118
|
+
else:
|
|
119
|
+
response = fn(*args, **kwargs)
|
|
120
|
+
_post(call, response, provider, start)
|
|
121
|
+
return response
|
|
122
|
+
|
|
123
|
+
setattr(wrapper, _WRAPPED, True)
|
|
124
|
+
return wrapper
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _pre(provider: str, kwargs: dict) -> tuple[LLMCall, float]:
|
|
128
|
+
call = LLMCall(
|
|
129
|
+
id=uuid.uuid4().hex,
|
|
130
|
+
provider=provider,
|
|
131
|
+
model=kwargs.get("model", ""),
|
|
132
|
+
messages=list(kwargs.get("messages") or []),
|
|
133
|
+
ts=datetime.now(UTC),
|
|
134
|
+
)
|
|
135
|
+
return call, time.perf_counter()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _post(call: LLMCall, response: Any, provider: str, start: float) -> None:
|
|
139
|
+
call.latency_ms = (time.perf_counter() - start) * 1000.0
|
|
140
|
+
usage = _extract_usage(response, provider)
|
|
141
|
+
call.usage = usage
|
|
142
|
+
if usage is not None:
|
|
143
|
+
try:
|
|
144
|
+
call.cost = prices.estimate(
|
|
145
|
+
call.model, usage.input_tokens, usage.output_tokens, usage.cached_tokens
|
|
146
|
+
)
|
|
147
|
+
except KeyError:
|
|
148
|
+
call.cost = None
|
|
149
|
+
call.metadata["response"] = response # for recorders (cassette); a reference, not a copy
|
|
150
|
+
bus.emit(call)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _get(obj: Any, name: str, default: Any = None) -> Any:
|
|
154
|
+
if isinstance(obj, dict):
|
|
155
|
+
return obj.get(name, default)
|
|
156
|
+
return getattr(obj, name, default)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _extract_usage(response: Any, provider: str) -> Usage | None:
|
|
160
|
+
u = _get(response, "usage")
|
|
161
|
+
if u is None:
|
|
162
|
+
return None
|
|
163
|
+
if provider == "openai":
|
|
164
|
+
inp = _get(u, "prompt_tokens")
|
|
165
|
+
out = _get(u, "completion_tokens", 0) or 0
|
|
166
|
+
details = _get(u, "prompt_tokens_details")
|
|
167
|
+
cached = _get(details, "cached_tokens", 0) or 0 if details is not None else 0
|
|
168
|
+
else: # anthropic
|
|
169
|
+
inp = _get(u, "input_tokens")
|
|
170
|
+
out = _get(u, "output_tokens", 0) or 0
|
|
171
|
+
cached = _get(u, "cache_read_input_tokens", 0) or 0
|
|
172
|
+
if inp is None:
|
|
173
|
+
return None
|
|
174
|
+
return Usage(input_tokens=int(inp), output_tokens=int(out), cached_tokens=int(cached))
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# --------------------------------------------------------------------------- tools
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def instrument_tool(name: str | Callable | None = None) -> Callable:
|
|
181
|
+
"""Wrap a tool/function so each invocation emits a ``ToolCall`` on the bus.
|
|
182
|
+
|
|
183
|
+
Usable as ``@instrument_tool`` or ``@instrument_tool("search")``. Mirrors :func:`instrument`:
|
|
184
|
+
idempotent, sync + async, replay-aware. The return value is stored on ``ToolCall.result`` so
|
|
185
|
+
``cassette`` can record/replay tool side effects.
|
|
186
|
+
"""
|
|
187
|
+
if callable(name): # bare @instrument_tool
|
|
188
|
+
return _wrap_tool(name, str(getattr(name, "__name__", "tool")))
|
|
189
|
+
|
|
190
|
+
def decorator(fn: Callable) -> Callable:
|
|
191
|
+
return _wrap_tool(fn, name or str(getattr(fn, "__name__", "tool")))
|
|
192
|
+
|
|
193
|
+
return decorator
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _wrap_tool(fn: Callable, tool_name: str) -> Callable:
|
|
197
|
+
if getattr(fn, _WRAPPED, False):
|
|
198
|
+
return fn
|
|
199
|
+
|
|
200
|
+
if inspect.iscoroutinefunction(fn):
|
|
201
|
+
|
|
202
|
+
@functools.wraps(fn)
|
|
203
|
+
async def awrapper(*args: Any, **kwargs: Any) -> Any:
|
|
204
|
+
tc, start = _pre_tool(tool_name, args, kwargs)
|
|
205
|
+
replayed = _intercept(tc)
|
|
206
|
+
if replayed is not MISS:
|
|
207
|
+
tc.metadata["replayed"] = True
|
|
208
|
+
result = replayed
|
|
209
|
+
else:
|
|
210
|
+
result = await fn(*args, **kwargs)
|
|
211
|
+
_post_tool(tc, result, start)
|
|
212
|
+
return result
|
|
213
|
+
|
|
214
|
+
setattr(awrapper, _WRAPPED, True)
|
|
215
|
+
return awrapper
|
|
216
|
+
|
|
217
|
+
@functools.wraps(fn)
|
|
218
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
219
|
+
tc, start = _pre_tool(tool_name, args, kwargs)
|
|
220
|
+
replayed = _intercept(tc)
|
|
221
|
+
if replayed is not MISS:
|
|
222
|
+
tc.metadata["replayed"] = True
|
|
223
|
+
result = replayed
|
|
224
|
+
else:
|
|
225
|
+
result = fn(*args, **kwargs)
|
|
226
|
+
_post_tool(tc, result, start)
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
setattr(wrapper, _WRAPPED, True)
|
|
230
|
+
return wrapper
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _pre_tool(name: str, args: tuple, kwargs: dict) -> tuple[ToolCall, float]:
|
|
234
|
+
tc = ToolCall(
|
|
235
|
+
id=uuid.uuid4().hex,
|
|
236
|
+
name=name,
|
|
237
|
+
arguments={"args": list(args), "kwargs": dict(kwargs)},
|
|
238
|
+
ts=datetime.now(UTC),
|
|
239
|
+
)
|
|
240
|
+
return tc, time.perf_counter()
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _post_tool(tc: ToolCall, result: Any, start: float) -> None:
|
|
244
|
+
tc.latency_ms = (time.perf_counter() - start) * 1000.0
|
|
245
|
+
tc.result = result
|
|
246
|
+
bus.emit(tc)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""OpenTelemetry GenAI span helpers (optional). No-op if OTel isn't installed. docs/core.md §6.
|
|
2
|
+
|
|
3
|
+
Emits ``gen_ai.*`` spans following the OpenTelemetry GenAI semantic conventions, so the whole
|
|
4
|
+
stack speaks the standard everyone is converging on — no proprietary telemetry format.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Iterator
|
|
10
|
+
from contextlib import contextmanager
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@contextmanager
|
|
15
|
+
def span(model: str, *, provider: str | None = None, **attributes: Any) -> Iterator[Any]:
|
|
16
|
+
"""Emit a ``gen_ai`` span around a call. Yields the span, or ``None`` if OTel is absent.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
model: Model id, recorded as ``gen_ai.request.model``.
|
|
20
|
+
provider: Optional system name, recorded as ``gen_ai.system``.
|
|
21
|
+
**attributes: Extra span attributes to set verbatim.
|
|
22
|
+
"""
|
|
23
|
+
try:
|
|
24
|
+
from opentelemetry import trace
|
|
25
|
+
except ImportError:
|
|
26
|
+
yield None
|
|
27
|
+
return
|
|
28
|
+
|
|
29
|
+
tracer = trace.get_tracer("powerailabs.core")
|
|
30
|
+
with tracer.start_as_current_span(f"chat {model}") as current:
|
|
31
|
+
current.set_attribute("gen_ai.request.model", model)
|
|
32
|
+
if provider is not None:
|
|
33
|
+
current.set_attribute("gen_ai.system", provider)
|
|
34
|
+
for key, value in attributes.items():
|
|
35
|
+
current.set_attribute(key, value)
|
|
36
|
+
yield current
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_note": "Illustrative snapshot of per-token USD rates. Not authoritative — refresh via prices.refresh(url=...) or replace with a real dated snapshot. See docs/core.md §7.",
|
|
3
|
+
"_updated": "2026-06-01",
|
|
4
|
+
"models": {
|
|
5
|
+
"gpt-4o": {"input": 0.0000025, "output": 0.00001, "cached": 0.00000125},
|
|
6
|
+
"gpt-4o-mini": {"input": 0.00000015, "output": 0.0000006},
|
|
7
|
+
"gpt-4.1": {"input": 0.000002, "output": 0.000008},
|
|
8
|
+
"claude-opus-4-8": {"input": 0.000005, "output": 0.000025, "cached": 0.0000005},
|
|
9
|
+
"claude-sonnet-4-6": {"input": 0.000003, "output": 0.000015},
|
|
10
|
+
"claude-haiku-4-5": {"input": 0.0000008, "output": 0.000004}
|
|
11
|
+
}
|
|
12
|
+
}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Offline-first price registry: bundled snapshot + optional refresh. docs/core.md §7.
|
|
2
|
+
|
|
3
|
+
A dated ``prices.json`` ships in the wheel, so cost estimation works with no network.
|
|
4
|
+
``refresh(url=...)`` optionally pulls a *static* JSON file (GitHub raw / CDN) — never a
|
|
5
|
+
running service — and falls back silently to the bundled snapshot if it can't.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from decimal import Decimal
|
|
12
|
+
|
|
13
|
+
from .types import Money
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class UnknownModelError(KeyError):
|
|
17
|
+
"""Raised when a model id is not present in the price table."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_table: dict | None = None
|
|
21
|
+
_source: str = "bundled"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _bundled_text() -> str:
|
|
25
|
+
try:
|
|
26
|
+
from importlib.resources import files
|
|
27
|
+
|
|
28
|
+
return (files("powerailabs.core") / "prices.json").read_text(encoding="utf-8")
|
|
29
|
+
except Exception:
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
|
|
32
|
+
return (Path(__file__).with_name("prices.json")).read_text(encoding="utf-8")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _ensure_loaded() -> dict:
|
|
36
|
+
global _table
|
|
37
|
+
if _table is None:
|
|
38
|
+
_table = json.loads(_bundled_text())
|
|
39
|
+
return _table
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _rates(model: str) -> dict:
|
|
43
|
+
models = _ensure_loaded().get("models", {})
|
|
44
|
+
if model not in models:
|
|
45
|
+
raise UnknownModelError(model)
|
|
46
|
+
return models[model]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def estimate(
|
|
50
|
+
model: str,
|
|
51
|
+
input_tokens: int,
|
|
52
|
+
output_tokens: int = 0,
|
|
53
|
+
cached_tokens: int = 0,
|
|
54
|
+
) -> Money:
|
|
55
|
+
"""Estimate the cost of a call from the price snapshot, as exact ``Decimal`` ``Money``.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
model: Model id; must exist in the table (else :class:`UnknownModelError`).
|
|
59
|
+
input_tokens: Billed input tokens.
|
|
60
|
+
output_tokens: Billed output tokens.
|
|
61
|
+
cached_tokens: Cache-read tokens (priced at the model's ``cached`` rate if present).
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
The estimated :class:`~powerailabs.core.types.Money` cost in USD.
|
|
65
|
+
"""
|
|
66
|
+
r = _rates(model)
|
|
67
|
+
amount = (
|
|
68
|
+
Decimal(str(r["input"])) * input_tokens
|
|
69
|
+
+ Decimal(str(r.get("output", 0))) * output_tokens
|
|
70
|
+
+ Decimal(str(r.get("cached", 0))) * cached_tokens
|
|
71
|
+
)
|
|
72
|
+
return Money(amount)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def models() -> list[str]:
|
|
76
|
+
"""Sorted list of model ids known to the current price table."""
|
|
77
|
+
return sorted(_ensure_loaded().get("models", {}))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def snapshot_date() -> str | None:
|
|
81
|
+
"""The ``_updated`` date of the loaded snapshot, so callers can surface its age."""
|
|
82
|
+
return _ensure_loaded().get("_updated")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def source() -> str:
|
|
86
|
+
"""``"bundled"`` or ``"refreshed"`` — where the active table came from."""
|
|
87
|
+
return _source
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def refresh(url: str | None = None, *, timeout: float = 5.0) -> bool:
|
|
91
|
+
"""Optionally replace the table from a static JSON URL. Never raises.
|
|
92
|
+
|
|
93
|
+
Returns ``True`` if the table was updated, ``False`` if no URL was given or the fetch
|
|
94
|
+
failed (the bundled/last-good snapshot stays active). docs/core.md §7.
|
|
95
|
+
"""
|
|
96
|
+
global _table, _source
|
|
97
|
+
if not url:
|
|
98
|
+
return False
|
|
99
|
+
try:
|
|
100
|
+
import urllib.request
|
|
101
|
+
|
|
102
|
+
with urllib.request.urlopen(url, timeout=timeout) as resp: # noqa: S310 - static JSON only
|
|
103
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
104
|
+
if isinstance(data, dict) and "models" in data:
|
|
105
|
+
_table = data
|
|
106
|
+
_source = "refreshed"
|
|
107
|
+
return True
|
|
108
|
+
except Exception:
|
|
109
|
+
return False
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _reset() -> None:
|
|
114
|
+
"""Test helper: drop the loaded table so the bundled snapshot reloads."""
|
|
115
|
+
global _table, _source
|
|
116
|
+
_table = None
|
|
117
|
+
_source = "bundled"
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Structural interfaces shared across the stack. docs/core.md §2, architecture.md §2 (Layer 1).
|
|
2
|
+
|
|
3
|
+
These are ``typing.Protocol``s, so a library satisfies an interface by *shape* — no imports, no
|
|
4
|
+
base classes, zero directional coupling. ``squeeze`` *is* a ``Compressor`` without importing
|
|
5
|
+
``contextkit``; ``acttrace`` *is* a ``Subscriber`` without importing the bus' producers.
|
|
6
|
+
|
|
7
|
+
Grown incrementally as tools land: ``Compressor``/``EvictionStrategy``/``Handle`` (contextkit +
|
|
8
|
+
squeeze), ``Sink``/``Subscriber`` (cassette + acttrace).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any, Protocol, runtime_checkable
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@runtime_checkable
|
|
17
|
+
class Handle(Protocol):
|
|
18
|
+
"""A restore handle for a reversible compression. ``expand()`` returns the original."""
|
|
19
|
+
|
|
20
|
+
def expand(self) -> Any: ...
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@runtime_checkable
|
|
24
|
+
class Compressor(Protocol):
|
|
25
|
+
"""Shrinks content toward a token budget and returns a restorable :class:`Handle`.
|
|
26
|
+
|
|
27
|
+
``squeeze`` satisfies this; ``contextkit`` accepts anything of this shape for
|
|
28
|
+
``Block(evict="compress")`` without importing ``squeeze``.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def compress(
|
|
32
|
+
self,
|
|
33
|
+
content: Any,
|
|
34
|
+
*,
|
|
35
|
+
target_tokens: int | None = None,
|
|
36
|
+
model: str | None = None,
|
|
37
|
+
kind: str = "auto",
|
|
38
|
+
) -> tuple[str, Handle]: ...
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@runtime_checkable
|
|
42
|
+
class EvictionStrategy(Protocol):
|
|
43
|
+
"""A pluggable per-block shrink rule. Returns ``(new_content_or_None, action_label)``.
|
|
44
|
+
|
|
45
|
+
``None`` content means the block was dropped. ``contextkit`` ships string-named built-ins
|
|
46
|
+
(``drop_oldest``/``truncate``) and accepts custom strategies of this shape.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def evict(self, content: str, remaining_tokens: int, model: str) -> tuple[str | None, str]: ...
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@runtime_checkable
|
|
53
|
+
class Sink(Protocol):
|
|
54
|
+
"""A destination for records/entries (in-memory, JSONL, SQLite, OTel, ...)."""
|
|
55
|
+
|
|
56
|
+
def write(self, entry: Any) -> None: ...
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@runtime_checkable
|
|
60
|
+
class Subscriber(Protocol):
|
|
61
|
+
"""A bus subscriber: a callable that receives normalized events. ``acttrace`` is one."""
|
|
62
|
+
|
|
63
|
+
def __call__(self, event: Any) -> None: ...
|
|
File without changes
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Provider-aware token counting. docs/core.md §4, §8.
|
|
2
|
+
|
|
3
|
+
Best-effort and **offline by default**: a character-based heuristic per model family,
|
|
4
|
+
so counting never hits the network and is fully deterministic. If ``tiktoken`` is installed
|
|
5
|
+
(``pip install powerailabs-core[tiktoken]``) it is used for exact OpenAI counts. Register a
|
|
6
|
+
precise counter for any family via :func:`register` to override the heuristic.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import math
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
|
|
14
|
+
# Counts for chat messages add a small fixed overhead per message plus a one-off priming
|
|
15
|
+
# cost, mirroring the framing real chat tokenizers add around each turn.
|
|
16
|
+
_MESSAGE_OVERHEAD = 4
|
|
17
|
+
_PRIMING = 3
|
|
18
|
+
|
|
19
|
+
# Approximate characters-per-token by family (heuristic fallback). Documented accuracy:
|
|
20
|
+
# within ~10-15% of provider tokenizers for English prose.
|
|
21
|
+
_CHARS_PER_TOKEN: dict[str, float] = {
|
|
22
|
+
"openai": 4.0,
|
|
23
|
+
"anthropic": 3.5,
|
|
24
|
+
"default": 4.0,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
Counter = Callable[["str | list[dict]", str], int]
|
|
28
|
+
_counters: dict[str, Counter] = {}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def family(model: str) -> str:
|
|
32
|
+
"""Tokenizer family for a model id: ``"openai"``, ``"anthropic"``, or ``"default"``."""
|
|
33
|
+
m = model.lower()
|
|
34
|
+
if m.startswith(("gpt", "o1", "o3", "o4", "chatgpt", "text-", "davinci")):
|
|
35
|
+
return "openai"
|
|
36
|
+
if m.startswith("claude"):
|
|
37
|
+
return "anthropic"
|
|
38
|
+
return "default"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def register(fam: str, counter: Counter) -> None:
|
|
42
|
+
"""Override the counter for a family (e.g. plug in a precise tokenizer). See docs/core.md §8."""
|
|
43
|
+
_counters[fam] = counter
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def count(text_or_messages: str | list[dict], model: str) -> int:
|
|
47
|
+
"""Count tokens for a string or a list of chat messages under ``model``.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
text_or_messages: Raw text, or a list of ``{"role", "content"}`` message dicts.
|
|
51
|
+
``content`` may itself be a list of content blocks (multimodal); text parts are summed.
|
|
52
|
+
model: The model id; selects the tokenizer family.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
The estimated token count.
|
|
56
|
+
"""
|
|
57
|
+
fam = family(model)
|
|
58
|
+
if fam in _counters:
|
|
59
|
+
return _counters[fam](text_or_messages, model)
|
|
60
|
+
|
|
61
|
+
if isinstance(text_or_messages, str):
|
|
62
|
+
return _count_text(text_or_messages, fam, model)
|
|
63
|
+
|
|
64
|
+
total = _PRIMING
|
|
65
|
+
for msg in text_or_messages:
|
|
66
|
+
total += _MESSAGE_OVERHEAD
|
|
67
|
+
total += _count_text(_message_text(msg), fam, model)
|
|
68
|
+
return total
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _message_text(msg: dict) -> str:
|
|
72
|
+
content = msg.get("content", "")
|
|
73
|
+
if isinstance(content, list):
|
|
74
|
+
parts = [p.get("text", "") for p in content if isinstance(p, dict)]
|
|
75
|
+
return "".join(parts)
|
|
76
|
+
return str(content or "")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _count_text(text: str, fam: str, model: str) -> int:
|
|
80
|
+
if not text:
|
|
81
|
+
return 0
|
|
82
|
+
if fam == "openai":
|
|
83
|
+
enc = _tiktoken_encoding(model)
|
|
84
|
+
if enc is not None:
|
|
85
|
+
return len(enc.encode(text))
|
|
86
|
+
cpt = _CHARS_PER_TOKEN.get(fam, _CHARS_PER_TOKEN["default"])
|
|
87
|
+
return math.ceil(len(text) / cpt)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _tiktoken_encoding(model: str): # noqa: ANN202 - third-party type is optional
|
|
91
|
+
"""Return a tiktoken encoding for ``model`` if tiktoken is installed, else ``None``."""
|
|
92
|
+
try:
|
|
93
|
+
import tiktoken
|
|
94
|
+
except ImportError:
|
|
95
|
+
return None
|
|
96
|
+
try:
|
|
97
|
+
return tiktoken.encoding_for_model(model)
|
|
98
|
+
except KeyError:
|
|
99
|
+
try:
|
|
100
|
+
return tiktoken.get_encoding("o200k_base")
|
|
101
|
+
except Exception:
|
|
102
|
+
return None
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Canonical data types shared across the powerailabs stack. See docs/core.md §5."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from decimal import Decimal
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class Usage:
|
|
12
|
+
"""Token usage for a single LLM call."""
|
|
13
|
+
|
|
14
|
+
input_tokens: int
|
|
15
|
+
output_tokens: int = 0
|
|
16
|
+
cached_tokens: int = 0
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def total_tokens(self) -> int:
|
|
20
|
+
"""Input + output tokens (cached tokens are a subset of input, not added)."""
|
|
21
|
+
return self.input_tokens + self.output_tokens
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class Money:
|
|
26
|
+
"""A Decimal-backed monetary amount. Never use ``float`` for money.
|
|
27
|
+
|
|
28
|
+
Accepts ``int``/``float``/``str``/``Decimal`` for ``amount`` and coerces to
|
|
29
|
+
``Decimal`` (floats via their string form, to avoid binary-float noise).
|
|
30
|
+
Arithmetic and comparisons require a matching ``currency``.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
amount: Decimal
|
|
34
|
+
currency: str = "USD"
|
|
35
|
+
|
|
36
|
+
def __post_init__(self) -> None:
|
|
37
|
+
if not isinstance(self.amount, Decimal):
|
|
38
|
+
object.__setattr__(self, "amount", Decimal(str(self.amount)))
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def zero(cls, currency: str = "USD") -> Money:
|
|
42
|
+
"""A zero amount in the given currency."""
|
|
43
|
+
return cls(Decimal("0"), currency)
|
|
44
|
+
|
|
45
|
+
def _check(self, other: Money) -> None:
|
|
46
|
+
if self.currency != other.currency:
|
|
47
|
+
raise ValueError(f"currency mismatch: {self.currency} vs {other.currency}")
|
|
48
|
+
|
|
49
|
+
def __add__(self, other: Money | int) -> Money:
|
|
50
|
+
if other == 0: # supports sum([...]) which starts at 0
|
|
51
|
+
return self
|
|
52
|
+
if not isinstance(other, Money):
|
|
53
|
+
return NotImplemented
|
|
54
|
+
self._check(other)
|
|
55
|
+
return Money(self.amount + other.amount, self.currency)
|
|
56
|
+
|
|
57
|
+
__radd__ = __add__
|
|
58
|
+
|
|
59
|
+
def __sub__(self, other: Money) -> Money:
|
|
60
|
+
if not isinstance(other, Money):
|
|
61
|
+
return NotImplemented
|
|
62
|
+
self._check(other)
|
|
63
|
+
return Money(self.amount - other.amount, self.currency)
|
|
64
|
+
|
|
65
|
+
def __mul__(self, scalar: int | Decimal) -> Money:
|
|
66
|
+
return Money(self.amount * Decimal(str(scalar)), self.currency)
|
|
67
|
+
|
|
68
|
+
__rmul__ = __mul__
|
|
69
|
+
|
|
70
|
+
def __lt__(self, other: Money) -> bool:
|
|
71
|
+
self._check(other)
|
|
72
|
+
return self.amount < other.amount
|
|
73
|
+
|
|
74
|
+
def __le__(self, other: Money) -> bool:
|
|
75
|
+
self._check(other)
|
|
76
|
+
return self.amount <= other.amount
|
|
77
|
+
|
|
78
|
+
def __gt__(self, other: Money) -> bool:
|
|
79
|
+
self._check(other)
|
|
80
|
+
return self.amount > other.amount
|
|
81
|
+
|
|
82
|
+
def __ge__(self, other: Money) -> bool:
|
|
83
|
+
self._check(other)
|
|
84
|
+
return self.amount >= other.amount
|
|
85
|
+
|
|
86
|
+
def __str__(self) -> str:
|
|
87
|
+
return f"{self.amount} {self.currency}"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class LLMCall:
|
|
92
|
+
"""A normalized provider-agnostic record of one model call. Emitted on the bus."""
|
|
93
|
+
|
|
94
|
+
id: str
|
|
95
|
+
provider: str
|
|
96
|
+
model: str
|
|
97
|
+
messages: list[dict]
|
|
98
|
+
usage: Usage | None = None
|
|
99
|
+
cost: Money | None = None
|
|
100
|
+
latency_ms: float | None = None
|
|
101
|
+
trace_id: str = ""
|
|
102
|
+
ts: datetime | None = None
|
|
103
|
+
metadata: dict = field(default_factory=dict)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass
|
|
107
|
+
class ToolCall:
|
|
108
|
+
"""A normalized record of one tool invocation. Emitted when the dispatcher is wrapped."""
|
|
109
|
+
|
|
110
|
+
id: str
|
|
111
|
+
name: str
|
|
112
|
+
arguments: dict
|
|
113
|
+
result: object | None = None
|
|
114
|
+
latency_ms: float | None = None
|
|
115
|
+
trace_id: str = ""
|
|
116
|
+
ts: datetime | None = None
|
|
117
|
+
metadata: dict = field(default_factory=dict)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""instrument(): mock clients only, no network. Idempotent wrap + normalized LLMCall events."""
|
|
2
|
+
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
from types import SimpleNamespace
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
from powerailabs.core import bus, instrument
|
|
8
|
+
from powerailabs.core.types import LLMCall, Money, Usage
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.fixture
|
|
12
|
+
def events():
|
|
13
|
+
bus._reset()
|
|
14
|
+
seen: list = []
|
|
15
|
+
bus.subscribe(seen.append)
|
|
16
|
+
yield seen
|
|
17
|
+
bus._reset()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _openai_client(prompt_tokens=100, completion_tokens=50):
|
|
21
|
+
class Completions:
|
|
22
|
+
def create(self, **kwargs):
|
|
23
|
+
return SimpleNamespace(
|
|
24
|
+
usage=SimpleNamespace(
|
|
25
|
+
prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
|
|
26
|
+
)
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
return SimpleNamespace(chat=SimpleNamespace(completions=Completions()))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _anthropic_async_client(input_tokens=10, output_tokens=20):
|
|
33
|
+
class Messages:
|
|
34
|
+
async def create(self, **kwargs):
|
|
35
|
+
return SimpleNamespace(
|
|
36
|
+
usage=SimpleNamespace(input_tokens=input_tokens, output_tokens=output_tokens)
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return SimpleNamespace(messages=Messages())
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_sync_openai_emits_normalized_llmcall(events):
|
|
43
|
+
client = instrument(_openai_client())
|
|
44
|
+
client.chat.completions.create(model="gpt-4o", messages=[{"role": "user", "content": "hi"}])
|
|
45
|
+
|
|
46
|
+
assert len(events) == 1
|
|
47
|
+
call = events[0]
|
|
48
|
+
assert isinstance(call, LLMCall)
|
|
49
|
+
assert call.provider == "openai"
|
|
50
|
+
assert call.model == "gpt-4o"
|
|
51
|
+
assert call.usage == Usage(input_tokens=100, output_tokens=50)
|
|
52
|
+
assert isinstance(call.cost, Money)
|
|
53
|
+
# 0.0000025*100 + 0.00001*50 = 0.00075
|
|
54
|
+
assert call.cost.amount == Decimal("0.00075")
|
|
55
|
+
assert call.latency_ms is not None and call.latency_ms >= 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_instrument_is_idempotent(events):
|
|
59
|
+
client = _openai_client()
|
|
60
|
+
instrument(client)
|
|
61
|
+
first = client.chat.completions.create
|
|
62
|
+
returned = instrument(client)
|
|
63
|
+
assert returned is client
|
|
64
|
+
assert client.chat.completions.create is first # not double-wrapped
|
|
65
|
+
|
|
66
|
+
client.chat.completions.create(model="gpt-4o", messages=[])
|
|
67
|
+
assert len(events) == 1 # exactly one event per call, not two
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def test_async_anthropic_emits_event(events):
|
|
71
|
+
client = instrument(_anthropic_async_client())
|
|
72
|
+
await client.messages.create(
|
|
73
|
+
model="claude-opus-4-8", messages=[{"role": "user", "content": "hi"}]
|
|
74
|
+
)
|
|
75
|
+
assert len(events) == 1
|
|
76
|
+
assert events[0].provider == "anthropic"
|
|
77
|
+
assert events[0].usage == Usage(input_tokens=10, output_tokens=20)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_unknown_client_returned_untouched(events):
|
|
81
|
+
sentinel = SimpleNamespace(foo="bar")
|
|
82
|
+
assert instrument(sentinel) is sentinel
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_unpriced_model_yields_no_cost(events):
|
|
86
|
+
client = instrument(_openai_client())
|
|
87
|
+
client.chat.completions.create(model="totally-unknown", messages=[])
|
|
88
|
+
assert events[0].cost is None
|
|
89
|
+
assert events[0].usage is not None # usage still captured
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""otel.span is a no-op (yields None) when OpenTelemetry isn't installed — never raises."""
|
|
2
|
+
|
|
3
|
+
from powerailabs.core import otel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_span_is_noop_without_otel():
|
|
7
|
+
# OTel is an optional extra and not installed in the test env.
|
|
8
|
+
with otel.span("gpt-4o", provider="openai", custom="x") as s:
|
|
9
|
+
assert s is None
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Prices: exact Decimal estimates from the bundled snapshot + offline refresh fallback."""
|
|
2
|
+
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from powerailabs.core import prices
|
|
7
|
+
from powerailabs.core.types import Money
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture(autouse=True)
|
|
11
|
+
def _fresh_table():
|
|
12
|
+
prices._reset()
|
|
13
|
+
yield
|
|
14
|
+
prices._reset()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_estimate_is_exact_decimal_money():
|
|
18
|
+
# 0.000005*1200 + 0.000025*300 = 0.006 + 0.0075 = 0.0135
|
|
19
|
+
cost = prices.estimate("claude-opus-4-8", input_tokens=1200, output_tokens=300)
|
|
20
|
+
assert isinstance(cost, Money)
|
|
21
|
+
assert cost.amount == Decimal("0.0135")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_estimate_includes_cached_rate():
|
|
25
|
+
# 0.0000025*1000 + 0.00001*500 + 0.00000125*200 = 0.0025 + 0.005 + 0.00025 = 0.00775
|
|
26
|
+
cost = prices.estimate("gpt-4o", 1000, 500, cached_tokens=200)
|
|
27
|
+
assert cost.amount == Decimal("0.00775")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_unknown_model_raises():
|
|
31
|
+
with pytest.raises(prices.UnknownModelError):
|
|
32
|
+
prices.estimate("does-not-exist", 100)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_bundled_snapshot_metadata():
|
|
36
|
+
assert prices.source() == "bundled"
|
|
37
|
+
assert prices.snapshot_date() == "2026-06-01"
|
|
38
|
+
assert "claude-opus-4-8" in prices.models()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_refresh_no_url_is_noop():
|
|
42
|
+
assert prices.refresh() is False
|
|
43
|
+
assert prices.source() == "bundled"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_refresh_falls_back_silently_when_offline(monkeypatch):
|
|
47
|
+
def boom(*args, **kwargs):
|
|
48
|
+
raise OSError("no network")
|
|
49
|
+
|
|
50
|
+
monkeypatch.setattr("urllib.request.urlopen", boom)
|
|
51
|
+
assert prices.refresh("https://example.com/prices.json") is False
|
|
52
|
+
assert prices.source() == "bundled"
|
|
53
|
+
# bundled table still usable
|
|
54
|
+
assert prices.estimate("gpt-4o", 1000).amount == Decimal("0.0025")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_refresh_updates_from_static_json(monkeypatch):
|
|
58
|
+
import contextlib
|
|
59
|
+
import io
|
|
60
|
+
import json
|
|
61
|
+
|
|
62
|
+
table = {"_updated": "2099-01-01", "models": {"gpt-4o": {"input": 0.001, "output": 0}}}
|
|
63
|
+
payload = json.dumps(table)
|
|
64
|
+
|
|
65
|
+
@contextlib.contextmanager
|
|
66
|
+
def fake_urlopen(url, timeout=5.0):
|
|
67
|
+
yield io.BytesIO(payload.encode("utf-8"))
|
|
68
|
+
|
|
69
|
+
monkeypatch.setattr("urllib.request.urlopen", fake_urlopen)
|
|
70
|
+
assert prices.refresh("https://example.com/prices.json") is True
|
|
71
|
+
assert prices.source() == "refreshed"
|
|
72
|
+
assert prices.estimate("gpt-4o", 1000).amount == Decimal("1")
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Protocols are structural: an object satisfies one by shape, with no import or base class."""
|
|
2
|
+
|
|
3
|
+
from powerailabs.core import protocols
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_compressor_is_satisfied_by_shape():
|
|
7
|
+
class MyCompressor:
|
|
8
|
+
def compress(self, content, *, target_tokens=None, model=None, kind="auto"):
|
|
9
|
+
return content[: target_tokens or 10], None
|
|
10
|
+
|
|
11
|
+
assert isinstance(MyCompressor(), protocols.Compressor)
|
|
12
|
+
assert not isinstance(object(), protocols.Compressor)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_subscriber_and_sink_shapes():
|
|
16
|
+
assert isinstance(lambda event: None, protocols.Subscriber)
|
|
17
|
+
|
|
18
|
+
class JsonlSink:
|
|
19
|
+
def write(self, entry):
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
assert isinstance(JsonlSink(), protocols.Sink)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_handle_and_eviction_protocols_exist():
|
|
26
|
+
assert hasattr(protocols, "Handle")
|
|
27
|
+
assert hasattr(protocols, "EvictionStrategy")
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Golden token counts: known input -> expected count per model family. No network.
|
|
2
|
+
|
|
3
|
+
OpenAI counts are forced onto the heuristic path (tiktoken absent / patched off) so the
|
|
4
|
+
goldens are deterministic regardless of whether tiktoken happens to be installed.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
from powerailabs.core import tokens
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.fixture(autouse=True)
|
|
12
|
+
def _no_tiktoken(monkeypatch):
|
|
13
|
+
# Force the offline heuristic so OpenAI goldens are stable everywhere.
|
|
14
|
+
monkeypatch.setattr(tokens, "_tiktoken_encoding", lambda model: None)
|
|
15
|
+
yield
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_family_detection():
|
|
19
|
+
assert tokens.family("gpt-4o") == "openai"
|
|
20
|
+
assert tokens.family("o3-mini") == "openai"
|
|
21
|
+
assert tokens.family("claude-opus-4-8") == "anthropic"
|
|
22
|
+
assert tokens.family("mistral-large") == "default"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_golden_text_counts_differ_by_family():
|
|
26
|
+
# "hello world" is 11 chars: openai @4.0 -> 3, anthropic @3.5 -> 4.
|
|
27
|
+
assert tokens.count("hello world", "gpt-4o") == 3
|
|
28
|
+
assert tokens.count("hello world", "claude-opus-4-8") == 4
|
|
29
|
+
assert tokens.count("", "gpt-4o") == 0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_golden_message_counts_include_overhead():
|
|
33
|
+
# priming(3) + per-message overhead(4) + content(3) = 10
|
|
34
|
+
msgs = [{"role": "user", "content": "hello world"}]
|
|
35
|
+
assert tokens.count(msgs, "gpt-4o") == 10
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_multimodal_content_blocks_sum_text():
|
|
39
|
+
content = [{"type": "text", "text": "hello world"}, {"type": "image"}]
|
|
40
|
+
msgs = [{"role": "user", "content": content}]
|
|
41
|
+
assert tokens.count(msgs, "gpt-4o") == 10
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_register_overrides_family():
|
|
45
|
+
tokens.register("default", lambda t, m: 42)
|
|
46
|
+
try:
|
|
47
|
+
assert tokens.count("anything", "some-unknown-model") == 42
|
|
48
|
+
finally:
|
|
49
|
+
tokens._counters.clear()
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""instrument_tool emits ToolCall; interceptors short-circuit calls (replay). No network."""
|
|
2
|
+
|
|
3
|
+
from types import SimpleNamespace
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from powerailabs.core import bus, instrument, instrument_tool
|
|
7
|
+
from powerailabs.core.instrument import MISS, add_interceptor, remove_interceptor
|
|
8
|
+
from powerailabs.core.types import LLMCall, ToolCall
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.fixture
|
|
12
|
+
def events():
|
|
13
|
+
bus._reset()
|
|
14
|
+
seen: list = []
|
|
15
|
+
bus.subscribe(seen.append)
|
|
16
|
+
yield seen
|
|
17
|
+
bus._reset()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_instrument_tool_emits_toolcall(events):
|
|
21
|
+
@instrument_tool
|
|
22
|
+
def search(query, top_k=3):
|
|
23
|
+
return [f"result for {query}"]
|
|
24
|
+
|
|
25
|
+
out = search("refunds", top_k=2)
|
|
26
|
+
assert out == ["result for refunds"]
|
|
27
|
+
assert len(events) == 1
|
|
28
|
+
tc = events[0]
|
|
29
|
+
assert isinstance(tc, ToolCall)
|
|
30
|
+
assert tc.name == "search"
|
|
31
|
+
assert tc.arguments == {"args": ["refunds"], "kwargs": {"top_k": 2}}
|
|
32
|
+
assert tc.result == ["result for refunds"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_instrument_tool_named_and_idempotent(events):
|
|
36
|
+
@instrument_tool("lookup")
|
|
37
|
+
def f(x):
|
|
38
|
+
return x
|
|
39
|
+
|
|
40
|
+
wrapped_again = instrument_tool("lookup")(f)
|
|
41
|
+
assert wrapped_again is f # idempotent
|
|
42
|
+
f(5)
|
|
43
|
+
assert events[0].name == "lookup"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
async def test_async_tool(events):
|
|
47
|
+
@instrument_tool
|
|
48
|
+
async def fetch(url):
|
|
49
|
+
return "body"
|
|
50
|
+
|
|
51
|
+
assert await fetch("http://x") == "body"
|
|
52
|
+
assert events[0].name == "fetch"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_interceptor_replays_llm_call_without_running_it(events):
|
|
56
|
+
calls = {"n": 0}
|
|
57
|
+
|
|
58
|
+
class Completions:
|
|
59
|
+
def create(self, **kwargs):
|
|
60
|
+
calls["n"] += 1
|
|
61
|
+
return SimpleNamespace(usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1))
|
|
62
|
+
|
|
63
|
+
client = instrument(SimpleNamespace(chat=SimpleNamespace(completions=Completions())))
|
|
64
|
+
|
|
65
|
+
canned = SimpleNamespace(usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20))
|
|
66
|
+
|
|
67
|
+
def replayer(event):
|
|
68
|
+
return canned if isinstance(event, LLMCall) else MISS
|
|
69
|
+
|
|
70
|
+
add_interceptor(replayer)
|
|
71
|
+
try:
|
|
72
|
+
resp = client.chat.completions.create(model="gpt-4o", messages=[])
|
|
73
|
+
finally:
|
|
74
|
+
remove_interceptor(replayer)
|
|
75
|
+
|
|
76
|
+
assert resp is canned
|
|
77
|
+
assert calls["n"] == 0 # real method never ran
|
|
78
|
+
assert events[0].metadata.get("replayed") is True
|
|
79
|
+
assert events[0].usage.input_tokens == 10 # usage taken from the replayed response
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Money arithmetic must be exact (Decimal) and never use float. See write-tests skill."""
|
|
2
|
+
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from powerailabs.core.types import Money, Usage
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_money_coerces_to_decimal_without_float_noise():
|
|
10
|
+
# 0.1 + 0.2 is the classic float trap; Money must stay exact.
|
|
11
|
+
assert (Money(0.1) + Money(0.2)).amount == Decimal("0.3")
|
|
12
|
+
assert Money(0.1).amount == Decimal("0.1")
|
|
13
|
+
assert Money("0.0000025").amount == Decimal("0.0000025")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_money_arithmetic():
|
|
17
|
+
assert (Money(Decimal("0.01")) + Money(Decimal("0.02"))).amount == Decimal("0.03")
|
|
18
|
+
assert (Money(Decimal("0.05")) - Money(Decimal("0.02"))).amount == Decimal("0.03")
|
|
19
|
+
assert (Money(Decimal("0.001")) * 5).amount == Decimal("0.005")
|
|
20
|
+
assert (3 * Money(Decimal("0.002"))).amount == Decimal("0.006")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_money_sum_starts_at_zero():
|
|
24
|
+
total = sum([Money(Decimal("0.01")), Money(Decimal("0.02")), Money(Decimal("0.03"))])
|
|
25
|
+
assert total == Money(Decimal("0.06"))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_money_comparisons():
|
|
29
|
+
assert Money(Decimal("1")) < Money(Decimal("2"))
|
|
30
|
+
assert Money(Decimal("2")) >= Money(Decimal("2"))
|
|
31
|
+
assert Money.zero() == Money(Decimal("0"))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_money_currency_mismatch_raises():
|
|
35
|
+
with pytest.raises(ValueError):
|
|
36
|
+
Money(Decimal("1"), "USD") + Money(Decimal("1"), "EUR")
|
|
37
|
+
with pytest.raises(ValueError):
|
|
38
|
+
_ = Money(Decimal("1"), "USD") < Money(Decimal("1"), "EUR")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_usage_total():
|
|
42
|
+
assert Usage(input_tokens=100, output_tokens=50).total_tokens == 150
|