costkey 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .eggs/
7
+ *.egg
8
+ .env
9
+ .venv/
10
+ venv/
costkey-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: costkey
3
+ Version: 0.1.0
4
+ Summary: Sentry for AI costs. Track every LLM call's cost, tokens, and latency with one line of code.
5
+ Project-URL: Homepage, https://costkey.dev
6
+ Project-URL: Repository, https://github.com/costkey/costkey-python
7
+ Project-URL: Documentation, https://github.com/costkey/costkey-python
8
+ Author-email: CostKey <hello@costkey.dev>
9
+ License-Expression: MIT
10
+ Keywords: ai,anthropic,cost,gemini,llm,observability,openai,tracking
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Requires-Python: >=3.9
17
+ Requires-Dist: httpx>=0.24.0
18
+ Description-Content-Type: text/markdown
19
+
20
+ # costkey
21
+
22
+ > Sentry for AI costs. Track every LLM call's cost, tokens, and latency with one line of code.
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ pip install costkey
28
+ ```
29
+
30
+ ## Quick Start
31
+
32
+ ```python
33
+ import costkey
34
+
35
+ costkey.init(dsn="https://ck_your_key@costkey.dev/your-project")
36
+
37
+ # That's it. Every AI call is now tracked automatically.
38
+ # Works with OpenAI, Anthropic, Google Gemini, Azure OpenAI.
39
+ ```
40
+
41
+ ## How It Works
42
+
43
+ CostKey patches `httpx` and `requests` — the HTTP clients that every AI SDK uses under the hood. When your code calls any AI provider, CostKey automatically:
44
+
45
+ 1. **Detects** the AI provider from the URL
46
+ 2. **Extracts** token usage from the response
47
+ 3. **Captures** a stack trace (which function, which file, which line)
48
+ 4. **Computes** cost using built-in pricing for 30+ models
49
+ 5. **Ships** the event to your CostKey dashboard (async, non-blocking)
50
+
51
+ ## Tracing
52
+
53
+ ```python
54
+ with costkey.start_trace(name="POST /api/search"):
55
+ intent = classify_intent(query)
56
+ results = search(query)
57
+ summary = summarize(results)
58
+ # All 3 AI calls grouped under one trace
59
+ ```
60
+
61
+ ## Manual Context
62
+
63
+ ```python
64
+ with costkey.with_context(task="summarize", team="search"):
65
+ response = openai.chat.completions.create(...)
66
+ ```
67
+
68
+ ## Privacy
69
+
70
+ - Never captures API keys — request headers are never read
71
+ - Auto-scrubs credentials from request/response bodies
72
+ - `before_send` hook for custom PII scrubbing
73
+
74
+ ## License
75
+
76
+ MIT
@@ -0,0 +1,57 @@
1
+ # costkey
2
+
3
+ > Sentry for AI costs. Track every LLM call's cost, tokens, and latency with one line of code.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install costkey
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```python
14
+ import costkey
15
+
16
+ costkey.init(dsn="https://ck_your_key@costkey.dev/your-project")
17
+
18
+ # That's it. Every AI call is now tracked automatically.
19
+ # Works with OpenAI, Anthropic, Google Gemini, Azure OpenAI.
20
+ ```
21
+
22
+ ## How It Works
23
+
24
+ CostKey patches `httpx` and `requests` — the HTTP clients that every AI SDK uses under the hood. When your code calls any AI provider, CostKey automatically:
25
+
26
+ 1. **Detects** the AI provider from the URL
27
+ 2. **Extracts** token usage from the response
28
+ 3. **Captures** a stack trace (which function, which file, which line)
29
+ 4. **Computes** cost using built-in pricing for 30+ models
30
+ 5. **Ships** the event to your CostKey dashboard (async, non-blocking)
31
+
32
+ ## Tracing
33
+
34
+ ```python
35
+ with costkey.start_trace(name="POST /api/search"):
36
+ intent = classify_intent(query)
37
+ results = search(query)
38
+ summary = summarize(results)
39
+ # All 3 AI calls grouped under one trace
40
+ ```
41
+
42
+ ## Manual Context
43
+
44
+ ```python
45
+ with costkey.with_context(task="summarize", team="search"):
46
+ response = openai.chat.completions.create(...)
47
+ ```
48
+
49
+ ## Privacy
50
+
51
+ - Never captures API keys — request headers are never read
52
+ - Auto-scrubs credentials from request/response bodies
53
+ - `before_send` hook for custom PII scrubbing
54
+
55
+ ## License
56
+
57
+ MIT
@@ -0,0 +1,13 @@
1
+ from costkey.client import init, shutdown, flush, with_context, start_trace
2
+ from costkey.client import register_extractor, register_pricing
3
+
4
+ __version__ = "0.1.0"
5
+ __all__ = [
6
+ "init",
7
+ "shutdown",
8
+ "flush",
9
+ "with_context",
10
+ "start_trace",
11
+ "register_extractor",
12
+ "register_pricing",
13
+ ]
@@ -0,0 +1,136 @@
1
+ """CostKey Python SDK — Sentry for AI costs."""
2
+ from __future__ import annotations
3
+ import uuid
4
+ import logging
5
+ from contextlib import contextmanager
6
+ from contextvars import copy_context
7
+ from typing import Any, Callable, Generator
8
+ from costkey.types import CostKeyOptions, CostKeyEvent
9
+ from costkey.transport import Transport
10
+ from costkey.patch import patch, unpatch, _context, set_context, get_context
11
+ from costkey.providers import register_extractor as _register_extractor
12
+ from costkey.pricing import register_pricing as _register_pricing
13
+
14
+ logger = logging.getLogger("costkey")
15
+
16
+ _transport: Transport | None = None
17
+ _initialized = False
18
+
19
+
20
+ def _parse_dsn(dsn: str) -> tuple[str, str, str]:
21
+ """Parse DSN → (endpoint, auth_key, project_id)."""
22
+ from urllib.parse import urlparse
23
+ parsed = urlparse(dsn)
24
+ auth_key = parsed.username or ""
25
+ if not auth_key:
26
+ raise ValueError(f"[costkey] DSN missing auth key: {dsn}")
27
+ project_id = parsed.path.lstrip("/")
28
+ if not project_id:
29
+ raise ValueError(f"[costkey] DSN missing project ID: {dsn}")
30
+ endpoint = f"{parsed.scheme}://{parsed.hostname}"
31
+ if parsed.port:
32
+ endpoint += f":{parsed.port}"
33
+ endpoint += "/api/v1/events"
34
+ return endpoint, auth_key, project_id
35
+
36
+
37
+ def init(dsn: str, *, capture_body: bool = True,
38
+ before_send: Callable[[CostKeyEvent], CostKeyEvent | None] | None = None,
39
+ max_batch_size: int = 50, flush_interval: float = 5.0,
40
+ debug: bool = False, default_context: dict[str, Any] | None = None) -> None:
41
+ """
42
+ Initialize CostKey. Call once at app startup.
43
+
44
+ >>> import costkey
45
+ >>> costkey.init(dsn="https://ck_abc123@costkey.dev/my-project")
46
+ >>> # That's it. Every AI call is now tracked.
47
+ """
48
+ global _transport, _initialized
49
+
50
+ if _initialized:
51
+ if debug:
52
+ logger.warning("[costkey] Already initialized, skipping")
53
+ return
54
+
55
+ endpoint, auth_key, project_id = _parse_dsn(dsn)
56
+
57
+ _transport = Transport(
58
+ endpoint=endpoint, auth_key=auth_key,
59
+ max_batch_size=max_batch_size, flush_interval=flush_interval,
60
+ debug=debug,
61
+ )
62
+
63
+ patch(
64
+ transport=_transport, project_id=project_id,
65
+ capture_body=capture_body, before_send=before_send,
66
+ default_context=default_context or {}, debug=debug,
67
+ )
68
+
69
+ _transport.start()
70
+ _initialized = True
71
+
72
+ if debug:
73
+ logger.info(f"[costkey] Initialized for project {project_id}")
74
+
75
+
76
+ # Alias for MLflow familiarity
77
+ autolog = init
78
+
79
+
80
+ def shutdown() -> None:
81
+ """Flush pending events and restore original HTTP clients."""
82
+ global _transport, _initialized
83
+ if _transport:
84
+ _transport.flush()
85
+ _transport.stop()
86
+ _transport = None
87
+ unpatch()
88
+ _initialized = False
89
+
90
+
91
+ def flush() -> None:
92
+ """Flush all pending events without shutting down."""
93
+ if _transport:
94
+ _transport.flush()
95
+
96
+
97
+ @contextmanager
98
+ def with_context(**kwargs: Any) -> Generator[None, None, None]:
99
+ """
100
+ Tag AI calls with custom context.
101
+
102
+ >>> with costkey.with_context(task="summarize", team="search"):
103
+ ... openai.chat.completions.create(...)
104
+ """
105
+ parent = get_context()
106
+ merged = {**parent, **kwargs}
107
+ token = _context.set(merged)
108
+ try:
109
+ yield
110
+ finally:
111
+ _context.reset(token)
112
+
113
+
114
+ @contextmanager
115
+ def start_trace(name: str | None = None, trace_id: str | None = None) -> Generator[None, None, None]:
116
+ """
117
+ Start a trace. All AI calls inside are grouped under one trace ID.
118
+
119
+ >>> with costkey.start_trace(name="POST /api/search"):
120
+ ... classify_intent(query)
121
+ ... results = search(query)
122
+ ... summary = summarize(results)
123
+ """
124
+ tid = trace_id or uuid.uuid4().hex
125
+ with with_context(traceId=tid, traceName=name):
126
+ yield
127
+
128
+
129
+ def register_extractor(extractor: Any) -> None:
130
+ """Register a custom provider extractor."""
131
+ _register_extractor(extractor)
132
+
133
+
134
+ def register_pricing(model: str, input_per_1m: float, output_per_1m: float, **kwargs: Any) -> None:
135
+ """Register custom model pricing."""
136
+ _register_pricing(model, input_per_1m, output_per_1m, **kwargs)
@@ -0,0 +1,285 @@
1
+ """Monkey-patch HTTP clients to intercept AI provider calls."""
2
+ from __future__ import annotations
3
+ import json
4
+ import time
5
+ import uuid
6
+ import re
7
+ import logging
8
+ from contextvars import ContextVar
9
+ from typing import Any, Callable
10
+ from costkey.types import CostKeyEvent, NormalizedUsage, Provider
11
+ from costkey.providers import find_extractor
12
+ from costkey.stack import capture_call_site
13
+ from costkey.pricing import compute_cost
14
+ from costkey.transport import Transport
15
+
16
+ logger = logging.getLogger("costkey")
17
+
18
+ # Context var for tracing / manual context
19
+ _context: ContextVar[dict[str, Any]] = ContextVar("costkey_context", default={})
20
+
21
+ # Secret patterns to scrub from bodies
22
+ _SECRET_PATTERNS = [
23
+ re.compile(r"^sk-[a-zA-Z0-9]{20,}$"),
24
+ re.compile(r"^sk-ant-[a-zA-Z0-9\-]{20,}$"),
25
+ re.compile(r"^AIza[a-zA-Z0-9_\-]{30,}$"),
26
+ re.compile(r"^Bearer\s+.{20,}$"),
27
+ re.compile(r"^eyJ[a-zA-Z0-9_\-]{20,}"),
28
+ ]
29
+ _SECRET_KEYS = frozenset({
30
+ "api_key", "apikey", "api-key", "secret", "secret_key",
31
+ "token", "access_token", "refresh_token", "password",
32
+ "authorization", "auth", "private_key",
33
+ })
34
+
35
+
36
+ def _scrub(obj: Any) -> Any:
37
+ if obj is None:
38
+ return None
39
+ if isinstance(obj, str):
40
+ for pat in _SECRET_PATTERNS:
41
+ if pat.match(obj):
42
+ return "[REDACTED]"
43
+ return obj
44
+ if isinstance(obj, list):
45
+ return [_scrub(item) for item in obj]
46
+ if isinstance(obj, dict):
47
+ return {
48
+ k: "[REDACTED]" if k.lower() in _SECRET_KEYS else _scrub(v)
49
+ for k, v in obj.items()
50
+ }
51
+ return obj
52
+
53
+
54
+ class _PatchState:
55
+ def __init__(self) -> None:
56
+ self.transport: Transport | None = None
57
+ self.project_id: str = ""
58
+ self.capture_body: bool = True
59
+ self.before_send: Callable | None = None
60
+ self.default_context: dict[str, Any] = {}
61
+ self.debug: bool = False
62
+ self._original_httpx_send: Any = None
63
+ self._original_httpx_async_send: Any = None
64
+ self._original_requests_send: Any = None
65
+ self.patched = False
66
+
67
+
68
+ _state = _PatchState()
69
+
70
+
71
+ def patch(transport: Transport, project_id: str, capture_body: bool,
72
+ before_send: Callable | None, default_context: dict[str, Any], debug: bool) -> None:
73
+ if _state.patched:
74
+ return
75
+
76
+ _state.transport = transport
77
+ _state.project_id = project_id
78
+ _state.capture_body = capture_body
79
+ _state.before_send = before_send
80
+ _state.default_context = default_context
81
+ _state.debug = debug
82
+
83
+ _patch_httpx()
84
+ _patch_requests()
85
+ _state.patched = True
86
+
87
+
88
+ def unpatch() -> None:
89
+ _unpatch_httpx()
90
+ _unpatch_requests()
91
+ _state.patched = False
92
+
93
+
94
+ def _patch_httpx() -> None:
95
+ try:
96
+ import httpx
97
+
98
+ _state._original_httpx_send = httpx.Client.send
99
+
100
+ def patched_send(self: Any, request: Any, **kwargs: Any) -> Any:
101
+ url = str(request.url)
102
+ extractor = find_extractor(url)
103
+
104
+ if not extractor:
105
+ return _state._original_httpx_send(self, request, **kwargs)
106
+
107
+ call_site = capture_call_site()
108
+ ctx = {**_state.default_context, **_context.get()}
109
+ start = time.perf_counter()
110
+
111
+ request_body = None
112
+ if request.content:
113
+ try:
114
+ request_body = json.loads(request.content)
115
+ except Exception:
116
+ pass
117
+
118
+ response = _state._original_httpx_send(self, request, **kwargs)
119
+ duration_ms = (time.perf_counter() - start) * 1000
120
+
121
+ try:
122
+ response_body = response.json()
123
+ except Exception:
124
+ response_body = None
125
+
126
+ _process(extractor, url, request.method, response.status_code,
127
+ request_body, response_body, duration_ms, call_site, ctx)
128
+
129
+ return response
130
+
131
+ httpx.Client.send = patched_send
132
+
133
+ # Also patch async client
134
+ _state._original_httpx_async_send = httpx.AsyncClient.send
135
+
136
+ async def patched_async_send(self: Any, request: Any, **kwargs: Any) -> Any:
137
+ url = str(request.url)
138
+ extractor = find_extractor(url)
139
+
140
+ if not extractor:
141
+ return await _state._original_httpx_async_send(self, request, **kwargs)
142
+
143
+ call_site = capture_call_site()
144
+ ctx = {**_state.default_context, **_context.get()}
145
+ start = time.perf_counter()
146
+
147
+ request_body = None
148
+ if request.content:
149
+ try:
150
+ request_body = json.loads(request.content)
151
+ except Exception:
152
+ pass
153
+
154
+ response = await _state._original_httpx_async_send(self, request, **kwargs)
155
+ duration_ms = (time.perf_counter() - start) * 1000
156
+
157
+ try:
158
+ response_body = response.json()
159
+ except Exception:
160
+ response_body = None
161
+
162
+ _process(extractor, url, request.method, response.status_code,
163
+ request_body, response_body, duration_ms, call_site, ctx)
164
+
165
+ return response
166
+
167
+ httpx.AsyncClient.send = patched_async_send
168
+
169
+ except ImportError:
170
+ if _state.debug:
171
+ logger.debug("[costkey] httpx not installed, skipping patch")
172
+
173
+
174
+ def _unpatch_httpx() -> None:
175
+ try:
176
+ import httpx
177
+ if _state._original_httpx_send:
178
+ httpx.Client.send = _state._original_httpx_send
179
+ if _state._original_httpx_async_send:
180
+ httpx.AsyncClient.send = _state._original_httpx_async_send
181
+ except ImportError:
182
+ pass
183
+
184
+
185
+ def _patch_requests() -> None:
186
+ try:
187
+ import requests
188
+
189
+ _state._original_requests_send = requests.Session.send
190
+
191
+ def patched_send(self: Any, request: Any, **kwargs: Any) -> Any:
192
+ url = str(request.url)
193
+ extractor = find_extractor(url)
194
+
195
+ if not extractor:
196
+ return _state._original_requests_send(self, request, **kwargs)
197
+
198
+ call_site = capture_call_site()
199
+ ctx = {**_state.default_context, **_context.get()}
200
+ start = time.perf_counter()
201
+
202
+ request_body = None
203
+ if request.body:
204
+ try:
205
+ request_body = json.loads(request.body)
206
+ except Exception:
207
+ pass
208
+
209
+ response = _state._original_requests_send(self, request, **kwargs)
210
+ duration_ms = (time.perf_counter() - start) * 1000
211
+
212
+ try:
213
+ response_body = response.json()
214
+ except Exception:
215
+ response_body = None
216
+
217
+ _process(extractor, url, request.method, response.status_code,
218
+ request_body, response_body, duration_ms, call_site, ctx)
219
+
220
+ return response
221
+
222
+ requests.Session.send = patched_send
223
+
224
+ except ImportError:
225
+ if _state.debug:
226
+ logger.debug("[costkey] requests not installed, skipping patch")
227
+
228
+
229
+ def _unpatch_requests() -> None:
230
+ try:
231
+ import requests
232
+ if _state._original_requests_send:
233
+ requests.Session.send = _state._original_requests_send
234
+ except ImportError:
235
+ pass
236
+
237
+
238
+ def _process(extractor: Any, url: str, method: str, status_code: int | None,
239
+ request_body: Any, response_body: Any,
240
+ duration_ms: float, call_site: Any, ctx: dict[str, Any]) -> None:
241
+ try:
242
+ usage = extractor.extract_usage(response_body) if response_body else None
243
+ model = extractor.extract_model(request_body, response_body)
244
+ cost_usd = compute_cost(model, usage) if model and usage else None
245
+
246
+ event = CostKeyEvent(
247
+ id=uuid.uuid4().hex,
248
+ timestamp=time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime()),
249
+ project_id=_state.project_id,
250
+ provider=extractor.provider,
251
+ model=model,
252
+ url=url,
253
+ method=method,
254
+ status_code=status_code,
255
+ usage=usage,
256
+ cost_usd=cost_usd,
257
+ duration_ms=round(duration_ms, 2),
258
+ streaming=False,
259
+ call_site=call_site,
260
+ context=ctx,
261
+ request_body=_scrub(request_body) if _state.capture_body else None,
262
+ response_body=_scrub(response_body) if _state.capture_body else None,
263
+ )
264
+
265
+ if _state.before_send:
266
+ try:
267
+ event = _state.before_send(event)
268
+ except Exception:
269
+ if _state.debug:
270
+ logger.warning("[costkey] before_send threw, dropping event")
271
+ return
272
+
273
+ if event and _state.transport:
274
+ _state.transport.enqueue(event)
275
+ except Exception:
276
+ if _state.debug:
277
+ logger.warning("[costkey] Error processing event", exc_info=True)
278
+
279
+
280
+ def get_context() -> dict[str, Any]:
281
+ return _context.get()
282
+
283
+
284
+ def set_context(ctx: dict[str, Any]) -> None:
285
+ _context.set(ctx)
@@ -0,0 +1,65 @@
1
+ """Model pricing — cost per 1M tokens in USD."""
2
+ from __future__ import annotations
3
+ from costkey.types import NormalizedUsage
4
+
5
+ # (input_per_1M, output_per_1M, cache_read_per_1M, cache_write_per_1M)
6
+ _PRICING: dict[str, tuple[float, float, float | None, float | None]] = {
7
+ "gpt-4o": (2.5, 10, None, None),
8
+ "gpt-4o-mini": (0.15, 0.6, None, None),
9
+ "gpt-4-turbo": (10, 30, None, None),
10
+ "gpt-4": (30, 60, None, None),
11
+ "gpt-3.5-turbo": (0.5, 1.5, None, None),
12
+ "o1": (15, 60, None, None),
13
+ "o1-mini": (3, 12, None, None),
14
+ "o3": (10, 40, None, None),
15
+ "o3-mini": (1.1, 4.4, None, None),
16
+ "o4-mini": (1.1, 4.4, None, None),
17
+ "claude-opus-4-0-20250514": (15, 75, 1.5, 18.75),
18
+ "claude-sonnet-4-0-20250514": (3, 15, 0.3, 3.75),
19
+ "claude-sonnet-4-5-20250514": (3, 15, 0.3, 3.75),
20
+ "claude-haiku-3-5-20241022": (0.8, 4, 0.08, 1),
21
+ "claude-3-5-sonnet-20241022": (3, 15, 0.3, 3.75),
22
+ "claude-3-opus-20240229": (15, 75, 1.5, 18.75),
23
+ "gemini-2.0-flash": (0.1, 0.4, None, None),
24
+ "gemini-2.0-flash-lite": (0.02, 0.1, None, None),
25
+ "gemini-1.5-pro": (1.25, 5, None, None),
26
+ "gemini-1.5-flash": (0.075, 0.3, None, None),
27
+ "gemini-2.5-pro": (1.25, 10, None, None),
28
+ "gemini-2.5-flash": (0.15, 0.6, None, None),
29
+ }
30
+
31
+
32
+ def _find_pricing(model: str) -> tuple[float, float, float | None, float | None] | None:
33
+ if model in _PRICING:
34
+ return _PRICING[model]
35
+ parts = model.split("-")
36
+ for i in range(len(parts) - 1, 0, -1):
37
+ prefix = "-".join(parts[:i])
38
+ if prefix in _PRICING:
39
+ return _PRICING[prefix]
40
+ return None
41
+
42
+
43
+ def compute_cost(model: str, usage: NormalizedUsage) -> float | None:
44
+ pricing = _find_pricing(model)
45
+ if pricing is None:
46
+ return None
47
+
48
+ inp, out, cache_r, cache_w = pricing
49
+ cost = 0.0
50
+ if usage.input_tokens is not None:
51
+ cost += (usage.input_tokens / 1_000_000) * inp
52
+ if usage.output_tokens is not None:
53
+ cost += (usage.output_tokens / 1_000_000) * out
54
+ if usage.cache_read_tokens is not None and cache_r is not None:
55
+ cost += (usage.cache_read_tokens / 1_000_000) * cache_r
56
+ if usage.cache_creation_tokens is not None and cache_w is not None:
57
+ cost += (usage.cache_creation_tokens / 1_000_000) * cache_w
58
+
59
+ return round(cost, 6)
60
+
61
+
62
+ def register_pricing(model: str, input_per_1m: float, output_per_1m: float,
63
+ cache_read_per_1m: float | None = None,
64
+ cache_write_per_1m: float | None = None) -> None:
65
+ _PRICING[model] = (input_per_1m, output_per_1m, cache_read_per_1m, cache_write_per_1m)
@@ -0,0 +1,133 @@
1
+ """Provider extractors — detect AI providers by URL and extract usage from responses."""
2
+ from __future__ import annotations
3
+ from urllib.parse import urlparse
4
+ from typing import Any, Protocol
5
+ from costkey.types import Provider, NormalizedUsage
6
+
7
+
8
+ class ProviderExtractor(Protocol):
9
+ provider: Provider
10
+ def match(self, url: str) -> bool: ...
11
+ def extract_usage(self, body: Any) -> NormalizedUsage | None: ...
12
+ def extract_model(self, request_body: Any, response_body: Any) -> str | None: ...
13
+
14
+
15
+ def _as_int(val: Any) -> int | None:
16
+ if isinstance(val, (int, float)) and not isinstance(val, bool):
17
+ return int(val)
18
+ return None
19
+
20
+
21
+ class OpenAIExtractor:
22
+ provider = Provider.OPENAI
23
+
24
+ def match(self, url: str) -> bool:
25
+ host = urlparse(url).hostname or ""
26
+ return host == "api.openai.com" or host.endswith(".openai.azure.com")
27
+
28
+ def extract_usage(self, body: Any) -> NormalizedUsage | None:
29
+ if not isinstance(body, dict):
30
+ return None
31
+ usage = body.get("usage")
32
+ if not isinstance(usage, dict):
33
+ return None
34
+
35
+ input_t = _as_int(usage.get("prompt_tokens")) or _as_int(usage.get("input_tokens"))
36
+ output_t = _as_int(usage.get("completion_tokens")) or _as_int(usage.get("output_tokens"))
37
+ total_t = _as_int(usage.get("total_tokens"))
38
+ if total_t is None and input_t is not None and output_t is not None:
39
+ total_t = input_t + output_t
40
+
41
+ details = usage.get("completion_tokens_details") or usage.get("output_tokens_details") or {}
42
+ reasoning = _as_int(details.get("reasoning_tokens")) if isinstance(details, dict) else None
43
+
44
+ return NormalizedUsage(
45
+ input_tokens=input_t, output_tokens=output_t, total_tokens=total_t,
46
+ reasoning_tokens=reasoning,
47
+ )
48
+
49
+ def extract_model(self, request_body: Any, response_body: Any) -> str | None:
50
+ if isinstance(response_body, dict) and isinstance(response_body.get("model"), str):
51
+ return response_body["model"]
52
+ if isinstance(request_body, dict) and isinstance(request_body.get("model"), str):
53
+ return request_body["model"]
54
+ return None
55
+
56
+
57
+ class AnthropicExtractor:
58
+ provider = Provider.ANTHROPIC
59
+
60
+ def match(self, url: str) -> bool:
61
+ host = urlparse(url).hostname or ""
62
+ return host == "api.anthropic.com"
63
+
64
+ def extract_usage(self, body: Any) -> NormalizedUsage | None:
65
+ if not isinstance(body, dict):
66
+ return None
67
+ usage = body.get("usage")
68
+ if not isinstance(usage, dict):
69
+ return None
70
+
71
+ input_t = _as_int(usage.get("input_tokens"))
72
+ output_t = _as_int(usage.get("output_tokens"))
73
+ total_t = (input_t or 0) + (output_t or 0) if input_t is not None or output_t is not None else None
74
+
75
+ return NormalizedUsage(
76
+ input_tokens=input_t, output_tokens=output_t, total_tokens=total_t,
77
+ cache_read_tokens=_as_int(usage.get("cache_read_input_tokens")),
78
+ cache_creation_tokens=_as_int(usage.get("cache_creation_input_tokens")),
79
+ )
80
+
81
+ def extract_model(self, request_body: Any, response_body: Any) -> str | None:
82
+ if isinstance(response_body, dict) and isinstance(response_body.get("model"), str):
83
+ return response_body["model"]
84
+ if isinstance(request_body, dict) and isinstance(request_body.get("model"), str):
85
+ return request_body["model"]
86
+ return None
87
+
88
+
89
+ class GoogleExtractor:
90
+ provider = Provider.GOOGLE
91
+
92
+ def match(self, url: str) -> bool:
93
+ host = urlparse(url).hostname or ""
94
+ return host == "generativelanguage.googleapis.com" or host.endswith("-aiplatform.googleapis.com")
95
+
96
+ def extract_usage(self, body: Any) -> NormalizedUsage | None:
97
+ if not isinstance(body, dict):
98
+ return None
99
+ meta = body.get("usageMetadata")
100
+ if not isinstance(meta, dict):
101
+ return None
102
+
103
+ input_t = _as_int(meta.get("promptTokenCount"))
104
+ output_t = _as_int(meta.get("candidatesTokenCount"))
105
+ total_t = _as_int(meta.get("totalTokenCount"))
106
+ if total_t is None and input_t is not None and output_t is not None:
107
+ total_t = input_t + output_t
108
+
109
+ return NormalizedUsage(
110
+ input_tokens=input_t, output_tokens=output_t, total_tokens=total_t,
111
+ reasoning_tokens=_as_int(meta.get("thoughtsTokenCount")),
112
+ cache_read_tokens=_as_int(meta.get("cachedContentTokenCount")),
113
+ )
114
+
115
+ def extract_model(self, request_body: Any, response_body: Any) -> str | None:
116
+ if isinstance(response_body, dict) and isinstance(response_body.get("modelVersion"), str):
117
+ return response_body["modelVersion"]
118
+ return None
119
+
120
+
121
+ # Registry
122
+ _extractors: list[ProviderExtractor] = [OpenAIExtractor(), AnthropicExtractor(), GoogleExtractor()]
123
+
124
+
125
+ def find_extractor(url: str) -> ProviderExtractor | None:
126
+ for ext in _extractors:
127
+ if ext.match(url):
128
+ return ext
129
+ return None
130
+
131
+
132
+ def register_extractor(extractor: ProviderExtractor) -> None:
133
+ _extractors.append(extractor)
@@ -0,0 +1,45 @@
1
+ """Stack trace capture — auto-attribute AI calls to code."""
2
+ from __future__ import annotations
3
+ import traceback
4
+ from costkey.types import CallSite, StackFrame
5
+
6
+ _INTERNAL = ("costkey/", "site-packages/costkey")
7
+
8
+
9
+ def capture_call_site() -> CallSite | None:
10
+ raw = traceback.format_stack()
11
+ frames: list[StackFrame] = []
12
+
13
+ for line in reversed(raw):
14
+ line = line.strip()
15
+ if not line.startswith("File "):
16
+ continue
17
+ # Parse: File "path", line N, in func
18
+ parts = line.split(", ")
19
+ if len(parts) < 3:
20
+ continue
21
+
22
+ file_name = parts[0].replace('File "', "").rstrip('"')
23
+ if any(p in file_name for p in _INTERNAL):
24
+ continue
25
+
26
+ line_num = None
27
+ func_name = None
28
+ for p in parts[1:]:
29
+ if p.startswith("line "):
30
+ try:
31
+ line_num = int(p.replace("line ", ""))
32
+ except ValueError:
33
+ pass
34
+ elif p.startswith("in "):
35
+ func_name = p.replace("in ", "").strip()
36
+
37
+ frames.append(StackFrame(
38
+ function_name=func_name,
39
+ file_name=file_name,
40
+ line_number=line_num,
41
+ ))
42
+
43
+ if not frames:
44
+ return None
45
+ return CallSite(raw="".join(raw), frames=frames)
@@ -0,0 +1,127 @@
1
+ """Batched async transport — ships events to costkey.dev. Never blocks. Never throws."""
2
+ from __future__ import annotations
3
+ import json
4
+ import threading
5
+ import logging
6
+ from typing import Any
7
+ import httpx
8
+ from costkey.types import CostKeyEvent
9
+
10
+ logger = logging.getLogger("costkey")
11
+
12
+
13
+ class Transport:
14
+ def __init__(self, endpoint: str, auth_key: str, max_batch_size: int,
15
+ flush_interval: float, debug: bool):
16
+ self._endpoint = endpoint
17
+ self._auth_key = auth_key
18
+ self._max_batch_size = max_batch_size
19
+ self._flush_interval = flush_interval
20
+ self._debug = debug
21
+ self._queue: list[dict[str, Any]] = []
22
+ self._lock = threading.Lock()
23
+ self._timer: threading.Timer | None = None
24
+ self._max_queue = 500
25
+
26
+ def start(self) -> None:
27
+ self._schedule_flush()
28
+
29
+ def stop(self) -> None:
30
+ if self._timer:
31
+ self._timer.cancel()
32
+ self._timer = None
33
+
34
+ def enqueue(self, event: CostKeyEvent) -> None:
35
+ with self._lock:
36
+ if len(self._queue) >= self._max_queue:
37
+ self._queue.pop(0)
38
+ if self._debug:
39
+ logger.warning("[costkey] Queue full, dropping oldest event")
40
+
41
+ self._queue.append(self._serialize(event))
42
+
43
+ if len(self._queue) >= self._max_batch_size:
44
+ self._do_flush()
45
+
46
+ def flush(self) -> None:
47
+ with self._lock:
48
+ self._do_flush()
49
+
50
+ def _schedule_flush(self) -> None:
51
+ self._timer = threading.Timer(self._flush_interval, self._tick)
52
+ self._timer.daemon = True
53
+ self._timer.start()
54
+
55
+ def _tick(self) -> None:
56
+ with self._lock:
57
+ self._do_flush()
58
+ self._schedule_flush()
59
+
60
+ def _do_flush(self) -> None:
61
+ if not self._queue:
62
+ return
63
+
64
+ batch = self._queue[:self._max_batch_size]
65
+ self._queue = self._queue[self._max_batch_size:]
66
+
67
+ payload = {"sdkVersion": "python-0.1.0", "events": batch}
68
+
69
+ try:
70
+ resp = httpx.post(
71
+ self._endpoint,
72
+ json=payload,
73
+ headers={
74
+ "Authorization": f"Bearer {self._auth_key}",
75
+ "User-Agent": "costkey-python/0.1.0",
76
+ },
77
+ timeout=10,
78
+ )
79
+ if resp.status_code == 429:
80
+ self._queue = batch + self._queue
81
+ if self._debug:
82
+ logger.warning("[costkey] Rate limited, will retry")
83
+ elif not resp.is_success and self._debug:
84
+ logger.warning(f"[costkey] Ingest returned {resp.status_code}")
85
+ except Exception as e:
86
+ if self._debug:
87
+ logger.warning(f"[costkey] Failed to send events: {e}")
88
+
89
+ def _serialize(self, event: CostKeyEvent) -> dict[str, Any]:
90
+ d: dict[str, Any] = {
91
+ "id": event.id,
92
+ "timestamp": event.timestamp,
93
+ "projectId": event.project_id,
94
+ "provider": event.provider.value,
95
+ "model": event.model,
96
+ "url": event.url,
97
+ "method": event.method,
98
+ "statusCode": event.status_code,
99
+ "usage": None,
100
+ "costUsd": event.cost_usd,
101
+ "durationMs": event.duration_ms,
102
+ "streaming": event.streaming,
103
+ "streamTiming": None,
104
+ "callSite": None,
105
+ "context": event.context,
106
+ "requestBody": event.request_body,
107
+ "responseBody": event.response_body,
108
+ }
109
+ if event.usage:
110
+ d["usage"] = {
111
+ "inputTokens": event.usage.input_tokens,
112
+ "outputTokens": event.usage.output_tokens,
113
+ "totalTokens": event.usage.total_tokens,
114
+ "reasoningTokens": event.usage.reasoning_tokens,
115
+ "cacheReadTokens": event.usage.cache_read_tokens,
116
+ "cacheCreationTokens": event.usage.cache_creation_tokens,
117
+ }
118
+ if event.call_site:
119
+ d["callSite"] = {
120
+ "raw": event.call_site.raw,
121
+ "frames": [
122
+ {"functionName": f.function_name, "fileName": f.file_name,
123
+ "lineNumber": f.line_number, "columnNumber": None}
124
+ for f in event.call_site.frames
125
+ ],
126
+ }
127
+ return d
@@ -0,0 +1,75 @@
1
+ from __future__ import annotations
2
+ from dataclasses import dataclass, field
3
+ from enum import Enum
4
+ from typing import Any, Callable, Optional
5
+
6
+
7
+ class Provider(str, Enum):
8
+ OPENAI = "openai"
9
+ ANTHROPIC = "anthropic"
10
+ GOOGLE = "google"
11
+ AZURE = "azure"
12
+ UNKNOWN = "unknown"
13
+
14
+
15
+ @dataclass
16
+ class NormalizedUsage:
17
+ input_tokens: int | None = None
18
+ output_tokens: int | None = None
19
+ total_tokens: int | None = None
20
+ reasoning_tokens: int | None = None
21
+ cache_read_tokens: int | None = None
22
+ cache_creation_tokens: int | None = None
23
+
24
+
25
+ @dataclass
26
+ class StackFrame:
27
+ function_name: str | None = None
28
+ file_name: str | None = None
29
+ line_number: int | None = None
30
+
31
+
32
+ @dataclass
33
+ class CallSite:
34
+ raw: str = ""
35
+ frames: list[StackFrame] = field(default_factory=list)
36
+
37
+
38
+ @dataclass
39
+ class StreamTiming:
40
+ ttft: float | None = None
41
+ tps: float | None = None
42
+ stream_duration: float | None = None
43
+ chunk_count: int = 0
44
+
45
+
46
+ @dataclass
47
+ class CostKeyEvent:
48
+ id: str = ""
49
+ timestamp: str = ""
50
+ project_id: str = ""
51
+ provider: Provider = Provider.UNKNOWN
52
+ model: str | None = None
53
+ url: str = ""
54
+ method: str = "POST"
55
+ status_code: int | None = None
56
+ usage: NormalizedUsage | None = None
57
+ cost_usd: float | None = None
58
+ duration_ms: float = 0
59
+ streaming: bool = False
60
+ stream_timing: StreamTiming | None = None
61
+ call_site: CallSite | None = None
62
+ context: dict[str, Any] = field(default_factory=dict)
63
+ request_body: Any = None
64
+ response_body: Any = None
65
+
66
+
67
+ @dataclass
68
+ class CostKeyOptions:
69
+ dsn: str = ""
70
+ capture_body: bool = True
71
+ before_send: Callable[[CostKeyEvent], CostKeyEvent | None] | None = None
72
+ max_batch_size: int = 50
73
+ flush_interval: float = 5.0
74
+ debug: bool = False
75
+ default_context: dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,34 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "costkey"
7
+ version = "0.1.0"
8
+ description = "Sentry for AI costs. Track every LLM call's cost, tokens, and latency with one line of code."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [{ name = "CostKey", email = "hello@costkey.dev" }]
13
+ keywords = ["ai", "llm", "cost", "tracking", "observability", "openai", "anthropic", "gemini"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Software Development :: Libraries :: Python Modules",
20
+ ]
21
+ dependencies = [
22
+ "httpx>=0.24.0",
23
+ ]
24
+
25
+ [project.urls]
26
+ Homepage = "https://costkey.dev"
27
+ Repository = "https://github.com/costkey/costkey-python"
28
+ Documentation = "https://github.com/costkey/costkey-python"
29
+
30
+ [tool.hatch.build.targets.wheel]
31
+ packages = ["costkey"]
32
+
33
+ [tool.pytest.ini_options]
34
+ testpaths = ["tests"]