costkey 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- costkey-0.1.0/.gitignore +10 -0
- costkey-0.1.0/PKG-INFO +76 -0
- costkey-0.1.0/README.md +57 -0
- costkey-0.1.0/costkey/__init__.py +13 -0
- costkey-0.1.0/costkey/client.py +136 -0
- costkey-0.1.0/costkey/patch.py +285 -0
- costkey-0.1.0/costkey/pricing.py +65 -0
- costkey-0.1.0/costkey/providers.py +133 -0
- costkey-0.1.0/costkey/stack.py +45 -0
- costkey-0.1.0/costkey/transport.py +127 -0
- costkey-0.1.0/costkey/types.py +75 -0
- costkey-0.1.0/pyproject.toml +34 -0
costkey-0.1.0/.gitignore
ADDED
costkey-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: costkey
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Sentry for AI costs. Track every LLM call's cost, tokens, and latency with one line of code.
|
|
5
|
+
Project-URL: Homepage, https://costkey.dev
|
|
6
|
+
Project-URL: Repository, https://github.com/costkey/costkey-python
|
|
7
|
+
Project-URL: Documentation, https://github.com/costkey/costkey-python
|
|
8
|
+
Author-email: CostKey <hello@costkey.dev>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
Keywords: ai,anthropic,cost,gemini,llm,observability,openai,tracking
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Requires-Dist: httpx>=0.24.0
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# costkey
|
|
21
|
+
|
|
22
|
+
> Sentry for AI costs. Track every LLM call's cost, tokens, and latency with one line of code.
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install costkey
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import costkey
|
|
34
|
+
|
|
35
|
+
costkey.init(dsn="https://ck_your_key@costkey.dev/your-project")
|
|
36
|
+
|
|
37
|
+
# That's it. Every AI call is now tracked automatically.
|
|
38
|
+
# Works with OpenAI, Anthropic, Google Gemini, Azure OpenAI.
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## How It Works
|
|
42
|
+
|
|
43
|
+
CostKey patches `httpx` and `requests` — the HTTP clients that every AI SDK uses under the hood. When your code calls any AI provider, CostKey automatically:
|
|
44
|
+
|
|
45
|
+
1. **Detects** the AI provider from the URL
|
|
46
|
+
2. **Extracts** token usage from the response
|
|
47
|
+
3. **Captures** a stack trace (which function, which file, which line)
|
|
48
|
+
4. **Computes** cost using built-in pricing for 30+ models
|
|
49
|
+
5. **Ships** the event to your CostKey dashboard (async, non-blocking)
|
|
50
|
+
|
|
51
|
+
## Tracing
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
with costkey.start_trace(name="POST /api/search"):
|
|
55
|
+
intent = classify_intent(query)
|
|
56
|
+
results = search(query)
|
|
57
|
+
summary = summarize(results)
|
|
58
|
+
# All 3 AI calls grouped under one trace
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Manual Context
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
with costkey.with_context(task="summarize", team="search"):
|
|
65
|
+
response = openai.chat.completions.create(...)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Privacy
|
|
69
|
+
|
|
70
|
+
- Never captures API keys — request headers are never read
|
|
71
|
+
- Auto-scrubs credentials from request/response bodies
|
|
72
|
+
- `before_send` hook for custom PII scrubbing
|
|
73
|
+
|
|
74
|
+
## License
|
|
75
|
+
|
|
76
|
+
MIT
|
costkey-0.1.0/README.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# costkey
|
|
2
|
+
|
|
3
|
+
> Sentry for AI costs. Track every LLM call's cost, tokens, and latency with one line of code.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install costkey
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import costkey
|
|
15
|
+
|
|
16
|
+
costkey.init(dsn="https://ck_your_key@costkey.dev/your-project")
|
|
17
|
+
|
|
18
|
+
# That's it. Every AI call is now tracked automatically.
|
|
19
|
+
# Works with OpenAI, Anthropic, Google Gemini, Azure OpenAI.
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## How It Works
|
|
23
|
+
|
|
24
|
+
CostKey patches `httpx` and `requests` — the HTTP clients that every AI SDK uses under the hood. When your code calls any AI provider, CostKey automatically:
|
|
25
|
+
|
|
26
|
+
1. **Detects** the AI provider from the URL
|
|
27
|
+
2. **Extracts** token usage from the response
|
|
28
|
+
3. **Captures** a stack trace (which function, which file, which line)
|
|
29
|
+
4. **Computes** cost using built-in pricing for 30+ models
|
|
30
|
+
5. **Ships** the event to your CostKey dashboard (async, non-blocking)
|
|
31
|
+
|
|
32
|
+
## Tracing
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
with costkey.start_trace(name="POST /api/search"):
|
|
36
|
+
intent = classify_intent(query)
|
|
37
|
+
results = search(query)
|
|
38
|
+
summary = summarize(results)
|
|
39
|
+
# All 3 AI calls grouped under one trace
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Manual Context
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
with costkey.with_context(task="summarize", team="search"):
|
|
46
|
+
response = openai.chat.completions.create(...)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Privacy
|
|
50
|
+
|
|
51
|
+
- Never captures API keys — request headers are never read
|
|
52
|
+
- Auto-scrubs credentials from request/response bodies
|
|
53
|
+
- `before_send` hook for custom PII scrubbing
|
|
54
|
+
|
|
55
|
+
## License
|
|
56
|
+
|
|
57
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from costkey.client import init, shutdown, flush, with_context, start_trace
|
|
2
|
+
from costkey.client import register_extractor, register_pricing
|
|
3
|
+
|
|
4
|
+
__version__ = "0.1.0"
|
|
5
|
+
__all__ = [
|
|
6
|
+
"init",
|
|
7
|
+
"shutdown",
|
|
8
|
+
"flush",
|
|
9
|
+
"with_context",
|
|
10
|
+
"start_trace",
|
|
11
|
+
"register_extractor",
|
|
12
|
+
"register_pricing",
|
|
13
|
+
]
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""CostKey Python SDK — Sentry for AI costs."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import uuid
|
|
4
|
+
import logging
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from contextvars import copy_context
|
|
7
|
+
from typing import Any, Callable, Generator
|
|
8
|
+
from costkey.types import CostKeyOptions, CostKeyEvent
|
|
9
|
+
from costkey.transport import Transport
|
|
10
|
+
from costkey.patch import patch, unpatch, _context, set_context, get_context
|
|
11
|
+
from costkey.providers import register_extractor as _register_extractor
|
|
12
|
+
from costkey.pricing import register_pricing as _register_pricing
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger("costkey")
|
|
15
|
+
|
|
16
|
+
_transport: Transport | None = None
|
|
17
|
+
_initialized = False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _parse_dsn(dsn: str) -> tuple[str, str, str]:
|
|
21
|
+
"""Parse DSN → (endpoint, auth_key, project_id)."""
|
|
22
|
+
from urllib.parse import urlparse
|
|
23
|
+
parsed = urlparse(dsn)
|
|
24
|
+
auth_key = parsed.username or ""
|
|
25
|
+
if not auth_key:
|
|
26
|
+
raise ValueError(f"[costkey] DSN missing auth key: {dsn}")
|
|
27
|
+
project_id = parsed.path.lstrip("/")
|
|
28
|
+
if not project_id:
|
|
29
|
+
raise ValueError(f"[costkey] DSN missing project ID: {dsn}")
|
|
30
|
+
endpoint = f"{parsed.scheme}://{parsed.hostname}"
|
|
31
|
+
if parsed.port:
|
|
32
|
+
endpoint += f":{parsed.port}"
|
|
33
|
+
endpoint += "/api/v1/events"
|
|
34
|
+
return endpoint, auth_key, project_id
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def init(dsn: str, *, capture_body: bool = True,
|
|
38
|
+
before_send: Callable[[CostKeyEvent], CostKeyEvent | None] | None = None,
|
|
39
|
+
max_batch_size: int = 50, flush_interval: float = 5.0,
|
|
40
|
+
debug: bool = False, default_context: dict[str, Any] | None = None) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Initialize CostKey. Call once at app startup.
|
|
43
|
+
|
|
44
|
+
>>> import costkey
|
|
45
|
+
>>> costkey.init(dsn="https://ck_abc123@costkey.dev/my-project")
|
|
46
|
+
>>> # That's it. Every AI call is now tracked.
|
|
47
|
+
"""
|
|
48
|
+
global _transport, _initialized
|
|
49
|
+
|
|
50
|
+
if _initialized:
|
|
51
|
+
if debug:
|
|
52
|
+
logger.warning("[costkey] Already initialized, skipping")
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
endpoint, auth_key, project_id = _parse_dsn(dsn)
|
|
56
|
+
|
|
57
|
+
_transport = Transport(
|
|
58
|
+
endpoint=endpoint, auth_key=auth_key,
|
|
59
|
+
max_batch_size=max_batch_size, flush_interval=flush_interval,
|
|
60
|
+
debug=debug,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
patch(
|
|
64
|
+
transport=_transport, project_id=project_id,
|
|
65
|
+
capture_body=capture_body, before_send=before_send,
|
|
66
|
+
default_context=default_context or {}, debug=debug,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
_transport.start()
|
|
70
|
+
_initialized = True
|
|
71
|
+
|
|
72
|
+
if debug:
|
|
73
|
+
logger.info(f"[costkey] Initialized for project {project_id}")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# Alias for MLflow familiarity
|
|
77
|
+
autolog = init
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def shutdown() -> None:
|
|
81
|
+
"""Flush pending events and restore original HTTP clients."""
|
|
82
|
+
global _transport, _initialized
|
|
83
|
+
if _transport:
|
|
84
|
+
_transport.flush()
|
|
85
|
+
_transport.stop()
|
|
86
|
+
_transport = None
|
|
87
|
+
unpatch()
|
|
88
|
+
_initialized = False
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def flush() -> None:
|
|
92
|
+
"""Flush all pending events without shutting down."""
|
|
93
|
+
if _transport:
|
|
94
|
+
_transport.flush()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@contextmanager
|
|
98
|
+
def with_context(**kwargs: Any) -> Generator[None, None, None]:
|
|
99
|
+
"""
|
|
100
|
+
Tag AI calls with custom context.
|
|
101
|
+
|
|
102
|
+
>>> with costkey.with_context(task="summarize", team="search"):
|
|
103
|
+
... openai.chat.completions.create(...)
|
|
104
|
+
"""
|
|
105
|
+
parent = get_context()
|
|
106
|
+
merged = {**parent, **kwargs}
|
|
107
|
+
token = _context.set(merged)
|
|
108
|
+
try:
|
|
109
|
+
yield
|
|
110
|
+
finally:
|
|
111
|
+
_context.reset(token)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@contextmanager
|
|
115
|
+
def start_trace(name: str | None = None, trace_id: str | None = None) -> Generator[None, None, None]:
|
|
116
|
+
"""
|
|
117
|
+
Start a trace. All AI calls inside are grouped under one trace ID.
|
|
118
|
+
|
|
119
|
+
>>> with costkey.start_trace(name="POST /api/search"):
|
|
120
|
+
... classify_intent(query)
|
|
121
|
+
... results = search(query)
|
|
122
|
+
... summary = summarize(results)
|
|
123
|
+
"""
|
|
124
|
+
tid = trace_id or uuid.uuid4().hex
|
|
125
|
+
with with_context(traceId=tid, traceName=name):
|
|
126
|
+
yield
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def register_extractor(extractor: Any) -> None:
|
|
130
|
+
"""Register a custom provider extractor."""
|
|
131
|
+
_register_extractor(extractor)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def register_pricing(model: str, input_per_1m: float, output_per_1m: float, **kwargs: Any) -> None:
|
|
135
|
+
"""Register custom model pricing."""
|
|
136
|
+
_register_pricing(model, input_per_1m, output_per_1m, **kwargs)
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""Monkey-patch HTTP clients to intercept AI provider calls."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
import re
|
|
7
|
+
import logging
|
|
8
|
+
from contextvars import ContextVar
|
|
9
|
+
from typing import Any, Callable
|
|
10
|
+
from costkey.types import CostKeyEvent, NormalizedUsage, Provider
|
|
11
|
+
from costkey.providers import find_extractor
|
|
12
|
+
from costkey.stack import capture_call_site
|
|
13
|
+
from costkey.pricing import compute_cost
|
|
14
|
+
from costkey.transport import Transport
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger("costkey")
|
|
17
|
+
|
|
18
|
+
# Context var for tracing / manual context
|
|
19
|
+
_context: ContextVar[dict[str, Any]] = ContextVar("costkey_context", default={})
|
|
20
|
+
|
|
21
|
+
# Secret patterns to scrub from bodies
|
|
22
|
+
_SECRET_PATTERNS = [
|
|
23
|
+
re.compile(r"^sk-[a-zA-Z0-9]{20,}$"),
|
|
24
|
+
re.compile(r"^sk-ant-[a-zA-Z0-9\-]{20,}$"),
|
|
25
|
+
re.compile(r"^AIza[a-zA-Z0-9_\-]{30,}$"),
|
|
26
|
+
re.compile(r"^Bearer\s+.{20,}$"),
|
|
27
|
+
re.compile(r"^eyJ[a-zA-Z0-9_\-]{20,}"),
|
|
28
|
+
]
|
|
29
|
+
_SECRET_KEYS = frozenset({
|
|
30
|
+
"api_key", "apikey", "api-key", "secret", "secret_key",
|
|
31
|
+
"token", "access_token", "refresh_token", "password",
|
|
32
|
+
"authorization", "auth", "private_key",
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _scrub(obj: Any) -> Any:
|
|
37
|
+
if obj is None:
|
|
38
|
+
return None
|
|
39
|
+
if isinstance(obj, str):
|
|
40
|
+
for pat in _SECRET_PATTERNS:
|
|
41
|
+
if pat.match(obj):
|
|
42
|
+
return "[REDACTED]"
|
|
43
|
+
return obj
|
|
44
|
+
if isinstance(obj, list):
|
|
45
|
+
return [_scrub(item) for item in obj]
|
|
46
|
+
if isinstance(obj, dict):
|
|
47
|
+
return {
|
|
48
|
+
k: "[REDACTED]" if k.lower() in _SECRET_KEYS else _scrub(v)
|
|
49
|
+
for k, v in obj.items()
|
|
50
|
+
}
|
|
51
|
+
return obj
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class _PatchState:
|
|
55
|
+
def __init__(self) -> None:
|
|
56
|
+
self.transport: Transport | None = None
|
|
57
|
+
self.project_id: str = ""
|
|
58
|
+
self.capture_body: bool = True
|
|
59
|
+
self.before_send: Callable | None = None
|
|
60
|
+
self.default_context: dict[str, Any] = {}
|
|
61
|
+
self.debug: bool = False
|
|
62
|
+
self._original_httpx_send: Any = None
|
|
63
|
+
self._original_httpx_async_send: Any = None
|
|
64
|
+
self._original_requests_send: Any = None
|
|
65
|
+
self.patched = False
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
_state = _PatchState()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def patch(transport: Transport, project_id: str, capture_body: bool,
|
|
72
|
+
before_send: Callable | None, default_context: dict[str, Any], debug: bool) -> None:
|
|
73
|
+
if _state.patched:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
_state.transport = transport
|
|
77
|
+
_state.project_id = project_id
|
|
78
|
+
_state.capture_body = capture_body
|
|
79
|
+
_state.before_send = before_send
|
|
80
|
+
_state.default_context = default_context
|
|
81
|
+
_state.debug = debug
|
|
82
|
+
|
|
83
|
+
_patch_httpx()
|
|
84
|
+
_patch_requests()
|
|
85
|
+
_state.patched = True
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def unpatch() -> None:
|
|
89
|
+
_unpatch_httpx()
|
|
90
|
+
_unpatch_requests()
|
|
91
|
+
_state.patched = False
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _patch_httpx() -> None:
|
|
95
|
+
try:
|
|
96
|
+
import httpx
|
|
97
|
+
|
|
98
|
+
_state._original_httpx_send = httpx.Client.send
|
|
99
|
+
|
|
100
|
+
def patched_send(self: Any, request: Any, **kwargs: Any) -> Any:
|
|
101
|
+
url = str(request.url)
|
|
102
|
+
extractor = find_extractor(url)
|
|
103
|
+
|
|
104
|
+
if not extractor:
|
|
105
|
+
return _state._original_httpx_send(self, request, **kwargs)
|
|
106
|
+
|
|
107
|
+
call_site = capture_call_site()
|
|
108
|
+
ctx = {**_state.default_context, **_context.get()}
|
|
109
|
+
start = time.perf_counter()
|
|
110
|
+
|
|
111
|
+
request_body = None
|
|
112
|
+
if request.content:
|
|
113
|
+
try:
|
|
114
|
+
request_body = json.loads(request.content)
|
|
115
|
+
except Exception:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
response = _state._original_httpx_send(self, request, **kwargs)
|
|
119
|
+
duration_ms = (time.perf_counter() - start) * 1000
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
response_body = response.json()
|
|
123
|
+
except Exception:
|
|
124
|
+
response_body = None
|
|
125
|
+
|
|
126
|
+
_process(extractor, url, request.method, response.status_code,
|
|
127
|
+
request_body, response_body, duration_ms, call_site, ctx)
|
|
128
|
+
|
|
129
|
+
return response
|
|
130
|
+
|
|
131
|
+
httpx.Client.send = patched_send
|
|
132
|
+
|
|
133
|
+
# Also patch async client
|
|
134
|
+
_state._original_httpx_async_send = httpx.AsyncClient.send
|
|
135
|
+
|
|
136
|
+
async def patched_async_send(self: Any, request: Any, **kwargs: Any) -> Any:
|
|
137
|
+
url = str(request.url)
|
|
138
|
+
extractor = find_extractor(url)
|
|
139
|
+
|
|
140
|
+
if not extractor:
|
|
141
|
+
return await _state._original_httpx_async_send(self, request, **kwargs)
|
|
142
|
+
|
|
143
|
+
call_site = capture_call_site()
|
|
144
|
+
ctx = {**_state.default_context, **_context.get()}
|
|
145
|
+
start = time.perf_counter()
|
|
146
|
+
|
|
147
|
+
request_body = None
|
|
148
|
+
if request.content:
|
|
149
|
+
try:
|
|
150
|
+
request_body = json.loads(request.content)
|
|
151
|
+
except Exception:
|
|
152
|
+
pass
|
|
153
|
+
|
|
154
|
+
response = await _state._original_httpx_async_send(self, request, **kwargs)
|
|
155
|
+
duration_ms = (time.perf_counter() - start) * 1000
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
response_body = response.json()
|
|
159
|
+
except Exception:
|
|
160
|
+
response_body = None
|
|
161
|
+
|
|
162
|
+
_process(extractor, url, request.method, response.status_code,
|
|
163
|
+
request_body, response_body, duration_ms, call_site, ctx)
|
|
164
|
+
|
|
165
|
+
return response
|
|
166
|
+
|
|
167
|
+
httpx.AsyncClient.send = patched_async_send
|
|
168
|
+
|
|
169
|
+
except ImportError:
|
|
170
|
+
if _state.debug:
|
|
171
|
+
logger.debug("[costkey] httpx not installed, skipping patch")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _unpatch_httpx() -> None:
|
|
175
|
+
try:
|
|
176
|
+
import httpx
|
|
177
|
+
if _state._original_httpx_send:
|
|
178
|
+
httpx.Client.send = _state._original_httpx_send
|
|
179
|
+
if _state._original_httpx_async_send:
|
|
180
|
+
httpx.AsyncClient.send = _state._original_httpx_async_send
|
|
181
|
+
except ImportError:
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _patch_requests() -> None:
|
|
186
|
+
try:
|
|
187
|
+
import requests
|
|
188
|
+
|
|
189
|
+
_state._original_requests_send = requests.Session.send
|
|
190
|
+
|
|
191
|
+
def patched_send(self: Any, request: Any, **kwargs: Any) -> Any:
|
|
192
|
+
url = str(request.url)
|
|
193
|
+
extractor = find_extractor(url)
|
|
194
|
+
|
|
195
|
+
if not extractor:
|
|
196
|
+
return _state._original_requests_send(self, request, **kwargs)
|
|
197
|
+
|
|
198
|
+
call_site = capture_call_site()
|
|
199
|
+
ctx = {**_state.default_context, **_context.get()}
|
|
200
|
+
start = time.perf_counter()
|
|
201
|
+
|
|
202
|
+
request_body = None
|
|
203
|
+
if request.body:
|
|
204
|
+
try:
|
|
205
|
+
request_body = json.loads(request.body)
|
|
206
|
+
except Exception:
|
|
207
|
+
pass
|
|
208
|
+
|
|
209
|
+
response = _state._original_requests_send(self, request, **kwargs)
|
|
210
|
+
duration_ms = (time.perf_counter() - start) * 1000
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
response_body = response.json()
|
|
214
|
+
except Exception:
|
|
215
|
+
response_body = None
|
|
216
|
+
|
|
217
|
+
_process(extractor, url, request.method, response.status_code,
|
|
218
|
+
request_body, response_body, duration_ms, call_site, ctx)
|
|
219
|
+
|
|
220
|
+
return response
|
|
221
|
+
|
|
222
|
+
requests.Session.send = patched_send
|
|
223
|
+
|
|
224
|
+
except ImportError:
|
|
225
|
+
if _state.debug:
|
|
226
|
+
logger.debug("[costkey] requests not installed, skipping patch")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _unpatch_requests() -> None:
|
|
230
|
+
try:
|
|
231
|
+
import requests
|
|
232
|
+
if _state._original_requests_send:
|
|
233
|
+
requests.Session.send = _state._original_requests_send
|
|
234
|
+
except ImportError:
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _process(extractor: Any, url: str, method: str, status_code: int | None,
|
|
239
|
+
request_body: Any, response_body: Any,
|
|
240
|
+
duration_ms: float, call_site: Any, ctx: dict[str, Any]) -> None:
|
|
241
|
+
try:
|
|
242
|
+
usage = extractor.extract_usage(response_body) if response_body else None
|
|
243
|
+
model = extractor.extract_model(request_body, response_body)
|
|
244
|
+
cost_usd = compute_cost(model, usage) if model and usage else None
|
|
245
|
+
|
|
246
|
+
event = CostKeyEvent(
|
|
247
|
+
id=uuid.uuid4().hex,
|
|
248
|
+
timestamp=time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime()),
|
|
249
|
+
project_id=_state.project_id,
|
|
250
|
+
provider=extractor.provider,
|
|
251
|
+
model=model,
|
|
252
|
+
url=url,
|
|
253
|
+
method=method,
|
|
254
|
+
status_code=status_code,
|
|
255
|
+
usage=usage,
|
|
256
|
+
cost_usd=cost_usd,
|
|
257
|
+
duration_ms=round(duration_ms, 2),
|
|
258
|
+
streaming=False,
|
|
259
|
+
call_site=call_site,
|
|
260
|
+
context=ctx,
|
|
261
|
+
request_body=_scrub(request_body) if _state.capture_body else None,
|
|
262
|
+
response_body=_scrub(response_body) if _state.capture_body else None,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
if _state.before_send:
|
|
266
|
+
try:
|
|
267
|
+
event = _state.before_send(event)
|
|
268
|
+
except Exception:
|
|
269
|
+
if _state.debug:
|
|
270
|
+
logger.warning("[costkey] before_send threw, dropping event")
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
if event and _state.transport:
|
|
274
|
+
_state.transport.enqueue(event)
|
|
275
|
+
except Exception:
|
|
276
|
+
if _state.debug:
|
|
277
|
+
logger.warning("[costkey] Error processing event", exc_info=True)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def get_context() -> dict[str, Any]:
|
|
281
|
+
return _context.get()
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def set_context(ctx: dict[str, Any]) -> None:
|
|
285
|
+
_context.set(ctx)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Model pricing — cost per 1M tokens in USD."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from costkey.types import NormalizedUsage
|
|
4
|
+
|
|
5
|
+
# (input_per_1M, output_per_1M, cache_read_per_1M, cache_write_per_1M)
|
|
6
|
+
_PRICING: dict[str, tuple[float, float, float | None, float | None]] = {
|
|
7
|
+
"gpt-4o": (2.5, 10, None, None),
|
|
8
|
+
"gpt-4o-mini": (0.15, 0.6, None, None),
|
|
9
|
+
"gpt-4-turbo": (10, 30, None, None),
|
|
10
|
+
"gpt-4": (30, 60, None, None),
|
|
11
|
+
"gpt-3.5-turbo": (0.5, 1.5, None, None),
|
|
12
|
+
"o1": (15, 60, None, None),
|
|
13
|
+
"o1-mini": (3, 12, None, None),
|
|
14
|
+
"o3": (10, 40, None, None),
|
|
15
|
+
"o3-mini": (1.1, 4.4, None, None),
|
|
16
|
+
"o4-mini": (1.1, 4.4, None, None),
|
|
17
|
+
"claude-opus-4-0-20250514": (15, 75, 1.5, 18.75),
|
|
18
|
+
"claude-sonnet-4-0-20250514": (3, 15, 0.3, 3.75),
|
|
19
|
+
"claude-sonnet-4-5-20250514": (3, 15, 0.3, 3.75),
|
|
20
|
+
"claude-haiku-3-5-20241022": (0.8, 4, 0.08, 1),
|
|
21
|
+
"claude-3-5-sonnet-20241022": (3, 15, 0.3, 3.75),
|
|
22
|
+
"claude-3-opus-20240229": (15, 75, 1.5, 18.75),
|
|
23
|
+
"gemini-2.0-flash": (0.1, 0.4, None, None),
|
|
24
|
+
"gemini-2.0-flash-lite": (0.02, 0.1, None, None),
|
|
25
|
+
"gemini-1.5-pro": (1.25, 5, None, None),
|
|
26
|
+
"gemini-1.5-flash": (0.075, 0.3, None, None),
|
|
27
|
+
"gemini-2.5-pro": (1.25, 10, None, None),
|
|
28
|
+
"gemini-2.5-flash": (0.15, 0.6, None, None),
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _find_pricing(model: str) -> tuple[float, float, float | None, float | None] | None:
|
|
33
|
+
if model in _PRICING:
|
|
34
|
+
return _PRICING[model]
|
|
35
|
+
parts = model.split("-")
|
|
36
|
+
for i in range(len(parts) - 1, 0, -1):
|
|
37
|
+
prefix = "-".join(parts[:i])
|
|
38
|
+
if prefix in _PRICING:
|
|
39
|
+
return _PRICING[prefix]
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def compute_cost(model: str, usage: NormalizedUsage) -> float | None:
|
|
44
|
+
pricing = _find_pricing(model)
|
|
45
|
+
if pricing is None:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
inp, out, cache_r, cache_w = pricing
|
|
49
|
+
cost = 0.0
|
|
50
|
+
if usage.input_tokens is not None:
|
|
51
|
+
cost += (usage.input_tokens / 1_000_000) * inp
|
|
52
|
+
if usage.output_tokens is not None:
|
|
53
|
+
cost += (usage.output_tokens / 1_000_000) * out
|
|
54
|
+
if usage.cache_read_tokens is not None and cache_r is not None:
|
|
55
|
+
cost += (usage.cache_read_tokens / 1_000_000) * cache_r
|
|
56
|
+
if usage.cache_creation_tokens is not None and cache_w is not None:
|
|
57
|
+
cost += (usage.cache_creation_tokens / 1_000_000) * cache_w
|
|
58
|
+
|
|
59
|
+
return round(cost, 6)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def register_pricing(model: str, input_per_1m: float, output_per_1m: float,
|
|
63
|
+
cache_read_per_1m: float | None = None,
|
|
64
|
+
cache_write_per_1m: float | None = None) -> None:
|
|
65
|
+
_PRICING[model] = (input_per_1m, output_per_1m, cache_read_per_1m, cache_write_per_1m)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Provider extractors — detect AI providers by URL and extract usage from responses."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
from typing import Any, Protocol
|
|
5
|
+
from costkey.types import Provider, NormalizedUsage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ProviderExtractor(Protocol):
|
|
9
|
+
provider: Provider
|
|
10
|
+
def match(self, url: str) -> bool: ...
|
|
11
|
+
def extract_usage(self, body: Any) -> NormalizedUsage | None: ...
|
|
12
|
+
def extract_model(self, request_body: Any, response_body: Any) -> str | None: ...
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _as_int(val: Any) -> int | None:
|
|
16
|
+
if isinstance(val, (int, float)) and not isinstance(val, bool):
|
|
17
|
+
return int(val)
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class OpenAIExtractor:
|
|
22
|
+
provider = Provider.OPENAI
|
|
23
|
+
|
|
24
|
+
def match(self, url: str) -> bool:
|
|
25
|
+
host = urlparse(url).hostname or ""
|
|
26
|
+
return host == "api.openai.com" or host.endswith(".openai.azure.com")
|
|
27
|
+
|
|
28
|
+
def extract_usage(self, body: Any) -> NormalizedUsage | None:
|
|
29
|
+
if not isinstance(body, dict):
|
|
30
|
+
return None
|
|
31
|
+
usage = body.get("usage")
|
|
32
|
+
if not isinstance(usage, dict):
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
input_t = _as_int(usage.get("prompt_tokens")) or _as_int(usage.get("input_tokens"))
|
|
36
|
+
output_t = _as_int(usage.get("completion_tokens")) or _as_int(usage.get("output_tokens"))
|
|
37
|
+
total_t = _as_int(usage.get("total_tokens"))
|
|
38
|
+
if total_t is None and input_t is not None and output_t is not None:
|
|
39
|
+
total_t = input_t + output_t
|
|
40
|
+
|
|
41
|
+
details = usage.get("completion_tokens_details") or usage.get("output_tokens_details") or {}
|
|
42
|
+
reasoning = _as_int(details.get("reasoning_tokens")) if isinstance(details, dict) else None
|
|
43
|
+
|
|
44
|
+
return NormalizedUsage(
|
|
45
|
+
input_tokens=input_t, output_tokens=output_t, total_tokens=total_t,
|
|
46
|
+
reasoning_tokens=reasoning,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def extract_model(self, request_body: Any, response_body: Any) -> str | None:
|
|
50
|
+
if isinstance(response_body, dict) and isinstance(response_body.get("model"), str):
|
|
51
|
+
return response_body["model"]
|
|
52
|
+
if isinstance(request_body, dict) and isinstance(request_body.get("model"), str):
|
|
53
|
+
return request_body["model"]
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class AnthropicExtractor:
|
|
58
|
+
provider = Provider.ANTHROPIC
|
|
59
|
+
|
|
60
|
+
def match(self, url: str) -> bool:
|
|
61
|
+
host = urlparse(url).hostname or ""
|
|
62
|
+
return host == "api.anthropic.com"
|
|
63
|
+
|
|
64
|
+
def extract_usage(self, body: Any) -> NormalizedUsage | None:
|
|
65
|
+
if not isinstance(body, dict):
|
|
66
|
+
return None
|
|
67
|
+
usage = body.get("usage")
|
|
68
|
+
if not isinstance(usage, dict):
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
input_t = _as_int(usage.get("input_tokens"))
|
|
72
|
+
output_t = _as_int(usage.get("output_tokens"))
|
|
73
|
+
total_t = (input_t or 0) + (output_t or 0) if input_t is not None or output_t is not None else None
|
|
74
|
+
|
|
75
|
+
return NormalizedUsage(
|
|
76
|
+
input_tokens=input_t, output_tokens=output_t, total_tokens=total_t,
|
|
77
|
+
cache_read_tokens=_as_int(usage.get("cache_read_input_tokens")),
|
|
78
|
+
cache_creation_tokens=_as_int(usage.get("cache_creation_input_tokens")),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def extract_model(self, request_body: Any, response_body: Any) -> str | None:
|
|
82
|
+
if isinstance(response_body, dict) and isinstance(response_body.get("model"), str):
|
|
83
|
+
return response_body["model"]
|
|
84
|
+
if isinstance(request_body, dict) and isinstance(request_body.get("model"), str):
|
|
85
|
+
return request_body["model"]
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class GoogleExtractor:
|
|
90
|
+
provider = Provider.GOOGLE
|
|
91
|
+
|
|
92
|
+
def match(self, url: str) -> bool:
|
|
93
|
+
host = urlparse(url).hostname or ""
|
|
94
|
+
return host == "generativelanguage.googleapis.com" or host.endswith("-aiplatform.googleapis.com")
|
|
95
|
+
|
|
96
|
+
def extract_usage(self, body: Any) -> NormalizedUsage | None:
|
|
97
|
+
if not isinstance(body, dict):
|
|
98
|
+
return None
|
|
99
|
+
meta = body.get("usageMetadata")
|
|
100
|
+
if not isinstance(meta, dict):
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
input_t = _as_int(meta.get("promptTokenCount"))
|
|
104
|
+
output_t = _as_int(meta.get("candidatesTokenCount"))
|
|
105
|
+
total_t = _as_int(meta.get("totalTokenCount"))
|
|
106
|
+
if total_t is None and input_t is not None and output_t is not None:
|
|
107
|
+
total_t = input_t + output_t
|
|
108
|
+
|
|
109
|
+
return NormalizedUsage(
|
|
110
|
+
input_tokens=input_t, output_tokens=output_t, total_tokens=total_t,
|
|
111
|
+
reasoning_tokens=_as_int(meta.get("thoughtsTokenCount")),
|
|
112
|
+
cache_read_tokens=_as_int(meta.get("cachedContentTokenCount")),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def extract_model(self, request_body: Any, response_body: Any) -> str | None:
|
|
116
|
+
if isinstance(response_body, dict) and isinstance(response_body.get("modelVersion"), str):
|
|
117
|
+
return response_body["modelVersion"]
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# Registry
|
|
122
|
+
_extractors: list[ProviderExtractor] = [OpenAIExtractor(), AnthropicExtractor(), GoogleExtractor()]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def find_extractor(url: str) -> ProviderExtractor | None:
|
|
126
|
+
for ext in _extractors:
|
|
127
|
+
if ext.match(url):
|
|
128
|
+
return ext
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def register_extractor(extractor: ProviderExtractor) -> None:
|
|
133
|
+
_extractors.append(extractor)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Stack trace capture — auto-attribute AI calls to code."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import traceback
|
|
4
|
+
from costkey.types import CallSite, StackFrame
|
|
5
|
+
|
|
6
|
+
_INTERNAL = ("costkey/", "site-packages/costkey")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def capture_call_site() -> CallSite | None:
|
|
10
|
+
raw = traceback.format_stack()
|
|
11
|
+
frames: list[StackFrame] = []
|
|
12
|
+
|
|
13
|
+
for line in reversed(raw):
|
|
14
|
+
line = line.strip()
|
|
15
|
+
if not line.startswith("File "):
|
|
16
|
+
continue
|
|
17
|
+
# Parse: File "path", line N, in func
|
|
18
|
+
parts = line.split(", ")
|
|
19
|
+
if len(parts) < 3:
|
|
20
|
+
continue
|
|
21
|
+
|
|
22
|
+
file_name = parts[0].replace('File "', "").rstrip('"')
|
|
23
|
+
if any(p in file_name for p in _INTERNAL):
|
|
24
|
+
continue
|
|
25
|
+
|
|
26
|
+
line_num = None
|
|
27
|
+
func_name = None
|
|
28
|
+
for p in parts[1:]:
|
|
29
|
+
if p.startswith("line "):
|
|
30
|
+
try:
|
|
31
|
+
line_num = int(p.replace("line ", ""))
|
|
32
|
+
except ValueError:
|
|
33
|
+
pass
|
|
34
|
+
elif p.startswith("in "):
|
|
35
|
+
func_name = p.replace("in ", "").strip()
|
|
36
|
+
|
|
37
|
+
frames.append(StackFrame(
|
|
38
|
+
function_name=func_name,
|
|
39
|
+
file_name=file_name,
|
|
40
|
+
line_number=line_num,
|
|
41
|
+
))
|
|
42
|
+
|
|
43
|
+
if not frames:
|
|
44
|
+
return None
|
|
45
|
+
return CallSite(raw="".join(raw), frames=frames)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Batched async transport — ships events to costkey.dev. Never blocks. Never throws."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import json
|
|
4
|
+
import threading
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any
|
|
7
|
+
import httpx
|
|
8
|
+
from costkey.types import CostKeyEvent
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger("costkey")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Transport:
|
|
14
|
+
def __init__(self, endpoint: str, auth_key: str, max_batch_size: int,
|
|
15
|
+
flush_interval: float, debug: bool):
|
|
16
|
+
self._endpoint = endpoint
|
|
17
|
+
self._auth_key = auth_key
|
|
18
|
+
self._max_batch_size = max_batch_size
|
|
19
|
+
self._flush_interval = flush_interval
|
|
20
|
+
self._debug = debug
|
|
21
|
+
self._queue: list[dict[str, Any]] = []
|
|
22
|
+
self._lock = threading.Lock()
|
|
23
|
+
self._timer: threading.Timer | None = None
|
|
24
|
+
self._max_queue = 500
|
|
25
|
+
|
|
26
|
+
def start(self) -> None:
|
|
27
|
+
self._schedule_flush()
|
|
28
|
+
|
|
29
|
+
def stop(self) -> None:
|
|
30
|
+
if self._timer:
|
|
31
|
+
self._timer.cancel()
|
|
32
|
+
self._timer = None
|
|
33
|
+
|
|
34
|
+
def enqueue(self, event: CostKeyEvent) -> None:
|
|
35
|
+
with self._lock:
|
|
36
|
+
if len(self._queue) >= self._max_queue:
|
|
37
|
+
self._queue.pop(0)
|
|
38
|
+
if self._debug:
|
|
39
|
+
logger.warning("[costkey] Queue full, dropping oldest event")
|
|
40
|
+
|
|
41
|
+
self._queue.append(self._serialize(event))
|
|
42
|
+
|
|
43
|
+
if len(self._queue) >= self._max_batch_size:
|
|
44
|
+
self._do_flush()
|
|
45
|
+
|
|
46
|
+
def flush(self) -> None:
|
|
47
|
+
with self._lock:
|
|
48
|
+
self._do_flush()
|
|
49
|
+
|
|
50
|
+
def _schedule_flush(self) -> None:
|
|
51
|
+
self._timer = threading.Timer(self._flush_interval, self._tick)
|
|
52
|
+
self._timer.daemon = True
|
|
53
|
+
self._timer.start()
|
|
54
|
+
|
|
55
|
+
def _tick(self) -> None:
|
|
56
|
+
with self._lock:
|
|
57
|
+
self._do_flush()
|
|
58
|
+
self._schedule_flush()
|
|
59
|
+
|
|
60
|
+
def _do_flush(self) -> None:
|
|
61
|
+
if not self._queue:
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
batch = self._queue[:self._max_batch_size]
|
|
65
|
+
self._queue = self._queue[self._max_batch_size:]
|
|
66
|
+
|
|
67
|
+
payload = {"sdkVersion": "python-0.1.0", "events": batch}
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
resp = httpx.post(
|
|
71
|
+
self._endpoint,
|
|
72
|
+
json=payload,
|
|
73
|
+
headers={
|
|
74
|
+
"Authorization": f"Bearer {self._auth_key}",
|
|
75
|
+
"User-Agent": "costkey-python/0.1.0",
|
|
76
|
+
},
|
|
77
|
+
timeout=10,
|
|
78
|
+
)
|
|
79
|
+
if resp.status_code == 429:
|
|
80
|
+
self._queue = batch + self._queue
|
|
81
|
+
if self._debug:
|
|
82
|
+
logger.warning("[costkey] Rate limited, will retry")
|
|
83
|
+
elif not resp.is_success and self._debug:
|
|
84
|
+
logger.warning(f"[costkey] Ingest returned {resp.status_code}")
|
|
85
|
+
except Exception as e:
|
|
86
|
+
if self._debug:
|
|
87
|
+
logger.warning(f"[costkey] Failed to send events: {e}")
|
|
88
|
+
|
|
89
|
+
def _serialize(self, event: CostKeyEvent) -> dict[str, Any]:
|
|
90
|
+
d: dict[str, Any] = {
|
|
91
|
+
"id": event.id,
|
|
92
|
+
"timestamp": event.timestamp,
|
|
93
|
+
"projectId": event.project_id,
|
|
94
|
+
"provider": event.provider.value,
|
|
95
|
+
"model": event.model,
|
|
96
|
+
"url": event.url,
|
|
97
|
+
"method": event.method,
|
|
98
|
+
"statusCode": event.status_code,
|
|
99
|
+
"usage": None,
|
|
100
|
+
"costUsd": event.cost_usd,
|
|
101
|
+
"durationMs": event.duration_ms,
|
|
102
|
+
"streaming": event.streaming,
|
|
103
|
+
"streamTiming": None,
|
|
104
|
+
"callSite": None,
|
|
105
|
+
"context": event.context,
|
|
106
|
+
"requestBody": event.request_body,
|
|
107
|
+
"responseBody": event.response_body,
|
|
108
|
+
}
|
|
109
|
+
if event.usage:
|
|
110
|
+
d["usage"] = {
|
|
111
|
+
"inputTokens": event.usage.input_tokens,
|
|
112
|
+
"outputTokens": event.usage.output_tokens,
|
|
113
|
+
"totalTokens": event.usage.total_tokens,
|
|
114
|
+
"reasoningTokens": event.usage.reasoning_tokens,
|
|
115
|
+
"cacheReadTokens": event.usage.cache_read_tokens,
|
|
116
|
+
"cacheCreationTokens": event.usage.cache_creation_tokens,
|
|
117
|
+
}
|
|
118
|
+
if event.call_site:
|
|
119
|
+
d["callSite"] = {
|
|
120
|
+
"raw": event.call_site.raw,
|
|
121
|
+
"frames": [
|
|
122
|
+
{"functionName": f.function_name, "fileName": f.file_name,
|
|
123
|
+
"lineNumber": f.line_number, "columnNumber": None}
|
|
124
|
+
for f in event.call_site.frames
|
|
125
|
+
],
|
|
126
|
+
}
|
|
127
|
+
return d
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Provider(str, Enum):
|
|
8
|
+
OPENAI = "openai"
|
|
9
|
+
ANTHROPIC = "anthropic"
|
|
10
|
+
GOOGLE = "google"
|
|
11
|
+
AZURE = "azure"
|
|
12
|
+
UNKNOWN = "unknown"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class NormalizedUsage:
|
|
17
|
+
input_tokens: int | None = None
|
|
18
|
+
output_tokens: int | None = None
|
|
19
|
+
total_tokens: int | None = None
|
|
20
|
+
reasoning_tokens: int | None = None
|
|
21
|
+
cache_read_tokens: int | None = None
|
|
22
|
+
cache_creation_tokens: int | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class StackFrame:
|
|
27
|
+
function_name: str | None = None
|
|
28
|
+
file_name: str | None = None
|
|
29
|
+
line_number: int | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class CallSite:
|
|
34
|
+
raw: str = ""
|
|
35
|
+
frames: list[StackFrame] = field(default_factory=list)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class StreamTiming:
|
|
40
|
+
ttft: float | None = None
|
|
41
|
+
tps: float | None = None
|
|
42
|
+
stream_duration: float | None = None
|
|
43
|
+
chunk_count: int = 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class CostKeyEvent:
|
|
48
|
+
id: str = ""
|
|
49
|
+
timestamp: str = ""
|
|
50
|
+
project_id: str = ""
|
|
51
|
+
provider: Provider = Provider.UNKNOWN
|
|
52
|
+
model: str | None = None
|
|
53
|
+
url: str = ""
|
|
54
|
+
method: str = "POST"
|
|
55
|
+
status_code: int | None = None
|
|
56
|
+
usage: NormalizedUsage | None = None
|
|
57
|
+
cost_usd: float | None = None
|
|
58
|
+
duration_ms: float = 0
|
|
59
|
+
streaming: bool = False
|
|
60
|
+
stream_timing: StreamTiming | None = None
|
|
61
|
+
call_site: CallSite | None = None
|
|
62
|
+
context: dict[str, Any] = field(default_factory=dict)
|
|
63
|
+
request_body: Any = None
|
|
64
|
+
response_body: Any = None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class CostKeyOptions:
|
|
69
|
+
dsn: str = ""
|
|
70
|
+
capture_body: bool = True
|
|
71
|
+
before_send: Callable[[CostKeyEvent], CostKeyEvent | None] | None = None
|
|
72
|
+
max_batch_size: int = 50
|
|
73
|
+
flush_interval: float = 5.0
|
|
74
|
+
debug: bool = False
|
|
75
|
+
default_context: dict[str, Any] = field(default_factory=dict)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "costkey"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Sentry for AI costs. Track every LLM call's cost, tokens, and latency with one line of code."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{ name = "CostKey", email = "hello@costkey.dev" }]
|
|
13
|
+
keywords = ["ai", "llm", "cost", "tracking", "observability", "openai", "anthropic", "gemini"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"httpx>=0.24.0",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://costkey.dev"
|
|
27
|
+
Repository = "https://github.com/costkey/costkey-python"
|
|
28
|
+
Documentation = "https://github.com/costkey/costkey-python"
|
|
29
|
+
|
|
30
|
+
[tool.hatch.build.targets.wheel]
|
|
31
|
+
packages = ["costkey"]
|
|
32
|
+
|
|
33
|
+
[tool.pytest.ini_options]
|
|
34
|
+
testpaths = ["tests"]
|