PyPI - botzone-cost - Versions diffs - 0.1.0__tar.gz - Mend

botzone-cost 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

botzone_cost-0.1.0/LICENSE +21 -0
botzone_cost-0.1.0/PKG-INFO +85 -0
botzone_cost-0.1.0/README.md +55 -0
botzone_cost-0.1.0/botzone_cost/__init__.py +6 -0
botzone_cost-0.1.0/botzone_cost/_wrap.py +201 -0
botzone_cost-0.1.0/botzone_cost/queue.py +75 -0
botzone_cost-0.1.0/botzone_cost.egg-info/PKG-INFO +85 -0
botzone_cost-0.1.0/botzone_cost.egg-info/SOURCES.txt +12 -0
botzone_cost-0.1.0/botzone_cost.egg-info/dependency_links.txt +1 -0
botzone_cost-0.1.0/botzone_cost.egg-info/requires.txt +14 -0
botzone_cost-0.1.0/botzone_cost.egg-info/top_level.txt +1 -0
botzone_cost-0.1.0/pyproject.toml +36 -0
botzone_cost-0.1.0/setup.cfg +4 -0
botzone_cost-0.1.0/tests/test_wrap.py +117 -0

botzone_cost-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Botzone
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

botzone_cost-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,85 @@
+Metadata-Version: 2.4
+Name: botzone-cost
+Version: 0.1.0
+Summary: Cost-tracking SDK for Anthropic, OpenAI, and Gemini Python clients
+Author-email: Botzone <hello@botzone.ai>
+License: MIT
+Project-URL: Homepage, https://github.com/botzone-ai/cost-sdk-py
+Project-URL: Repository, https://github.com/botzone-ai/cost-sdk-py
+Project-URL: Issues, https://github.com/botzone-ai/cost-sdk-py/issues
+Keywords: llm,cost-tracking,anthropic,openai,gemini
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: httpx>=0.25
+Provides-Extra: anthropic
+Requires-Dist: anthropic>=0.30; extra == "anthropic"
+Provides-Extra: openai
+Requires-Dist: openai>=1.0; extra == "openai"
+Provides-Extra: gemini
+Requires-Dist: google-generativeai>=0.5; extra == "gemini"
+Provides-Extra: test
+Requires-Dist: pytest>=7; extra == "test"
+Requires-Dist: pytest-asyncio>=0.23; extra == "test"
+Dynamic: license-file
+# botzone-cost
+Cost-tracking SDK for Anthropic, OpenAI, and Gemini Python clients. Wrap your
+existing client; per-call usage flows to your Cost dashboard. Adds zero
+measurable latency to the host call.
+## Install
+```
+pip install botzone-cost
+```
+## Usage
+```python
+from anthropic import Anthropic
+from botzone_cost import wrap
+client = wrap(Anthropic(), api_key="cost_sk_...", route="follow-up-draft")
+```
+Same surface for OpenAI and Gemini:
+```python
+from openai import OpenAI
+import google.generativeai as genai
+from botzone_cost import wrap
+openai_client = wrap(OpenAI(), route="summariser")
+gemini = wrap(genai.GenerativeModel("gemini-2.5-flash"), route="classifier")
+```
+## Options
+| arg              | default                                          |
+| ---------------- | ------------------------------------------------ |
+| `api_key`        | env `COST_API_KEY`                               |
+| `endpoint`       | env `COST_ENDPOINT` or `https://cost.botzone.ai` |
+| `route`          | (none: strongly recommended)                     |
+| `user_id`        | (sha256-hashed in the SDK before send)           |
+| `feature_tag`    | (none)                                           |
+| `enabled`        | `True`                                           |
+| `capture_bodies` | `False` (reserved, no effect today, see below)   |
+## What gets captured
+Token counts (including Anthropic prompt-cache reads / writes and OpenAI cached
+prompt tokens), latency, model, route, user id (hashed), feature tag. Computed
+USD cost is added server-side from the live pricing table. The Python SDK is
+**metadata-only today**: it does not send raw request or response bodies, and
+the `capture_bodies` parameter is reserved for future parity with the
+TypeScript SDK.
+End-user identifiers passed via `user_id` are SHA-256 hashed in the SDK before
+send; the plaintext never leaves your process.

botzone_cost-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,55 @@
+# botzone-cost
+Cost-tracking SDK for Anthropic, OpenAI, and Gemini Python clients. Wrap your
+existing client; per-call usage flows to your Cost dashboard. Adds zero
+measurable latency to the host call.
+## Install
+```
+pip install botzone-cost
+```
+## Usage
+```python
+from anthropic import Anthropic
+from botzone_cost import wrap
+client = wrap(Anthropic(), api_key="cost_sk_...", route="follow-up-draft")
+```
+Same surface for OpenAI and Gemini:
+```python
+from openai import OpenAI
+import google.generativeai as genai
+from botzone_cost import wrap
+openai_client = wrap(OpenAI(), route="summariser")
+gemini = wrap(genai.GenerativeModel("gemini-2.5-flash"), route="classifier")
+```
+## Options
+| arg              | default                                          |
+| ---------------- | ------------------------------------------------ |
+| `api_key`        | env `COST_API_KEY`                               |
+| `endpoint`       | env `COST_ENDPOINT` or `https://cost.botzone.ai` |
+| `route`          | (none: strongly recommended)                     |
+| `user_id`        | (sha256-hashed in the SDK before send)           |
+| `feature_tag`    | (none)                                           |
+| `enabled`        | `True`                                           |
+| `capture_bodies` | `False` (reserved, no effect today, see below)   |
+## What gets captured
+Token counts (including Anthropic prompt-cache reads / writes and OpenAI cached
+prompt tokens), latency, model, route, user id (hashed), feature tag. Computed
+USD cost is added server-side from the live pricing table. The Python SDK is
+**metadata-only today**: it does not send raw request or response bodies, and
+the `capture_bodies` parameter is reserved for future parity with the
+TypeScript SDK.
+End-user identifiers passed via `user_id` are SHA-256 hashed in the SDK before
+send; the plaintext never leaves your process.

botzone_cost-0.1.0/botzone_cost/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Cost-tracking SDK for Anthropic, OpenAI, and Gemini clients."""
+from ._wrap import wrap, flush
+__all__ = ["wrap", "flush"]
+__version__ = "0.1.0"

botzone_cost-0.1.0/botzone_cost/_wrap.py ADDED Viewed

@@ -0,0 +1,201 @@
+"""wrap() entry point. Auto-detects Anthropic / OpenAI / Gemini clients."""
+from __future__ import annotations
+import hashlib
+import os
+import time
+from datetime import datetime, timezone
+from typing import Any, Optional
+from .queue import IngestionQueue
+_queues: dict[str, IngestionQueue] = {}
+def _get_queue(api_key: Optional[str], endpoint: Optional[str], enabled: bool) -> Optional[IngestionQueue]:
+    if not enabled:
+        return None
+    api_key = api_key or os.environ.get("COST_API_KEY")
+    if not api_key:
+        return None
+    endpoint = endpoint or os.environ.get("COST_ENDPOINT", "https://cost.botzone.ai")
+    key = f"{api_key}|{endpoint}"
+    q = _queues.get(key)
+    if not q:
+        q = IngestionQueue(api_key=api_key, endpoint=endpoint)
+        _queues[key] = q
+    return q
+def _sha256(s: str) -> str:
+    return hashlib.sha256(s.encode("utf-8")).hexdigest()
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+def wrap(
+    client: Any,
+    *,
+    api_key: Optional[str] = None,
+    endpoint: Optional[str] = None,
+    route: Optional[str] = None,
+    user_id: Optional[str] = None,
+    feature_tag: Optional[str] = None,
+    enabled: bool = True,
+    capture_bodies: bool = False,
+    provider: Optional[str] = None,
+) -> Any:
+    """Wrap an LLM client to capture cost-tracking events.
+    Returns the same client (mutated) for chaining. Raises ValueError if
+    the provider can't be detected.
+    The Python SDK is metadata-only today: it ships token counts, model,
+    route, latency, hashed user id, and feature tag. It does not send raw
+    request or response bodies. The ``capture_bodies`` keyword is reserved
+    for future parity with the TypeScript SDK and has no effect today.
+    """
+    queue = _get_queue(api_key, endpoint, enabled)
+    user_hash = _sha256(user_id) if user_id else None
+    detected = provider or _detect(client)
+    if detected == "anthropic":
+        return _wrap_anthropic(client, queue, route, user_hash, feature_tag, capture_bodies)
+    if detected == "openai":
+        return _wrap_openai(client, queue, route, user_hash, feature_tag, capture_bodies)
+    if detected == "gemini":
+        return _wrap_gemini(client, queue, route, user_hash, feature_tag, capture_bodies)
+    raise ValueError(
+        "[botzone-cost] could not detect provider: pass provider='anthropic'|'openai'|'gemini'"
+    )
+def _detect(client: Any) -> Optional[str]:
+    # Prefer module-path detection so auto-mocks don't all look like Anthropic.
+    module = getattr(type(client), "__module__", "") or ""
+    if module.startswith("anthropic"):
+        return "anthropic"
+    if module.startswith("openai"):
+        return "openai"
+    if module.startswith("google.generativeai") or module.startswith("google.genai"):
+        return "gemini"
+    # Fallback duck-typing for thin wrappers that don't carry the module name.
+    if hasattr(client, "messages") and hasattr(client.messages, "create"):
+        return "anthropic"
+    if hasattr(client, "chat") and hasattr(client.chat, "completions"):
+        return "openai"
+    if hasattr(client, "generate_content"):
+        return "gemini"
+    return None
+def _emit(queue: Optional[IngestionQueue], event: dict) -> None:
+    if queue is not None:
+        queue.enqueue(event)
+def _wrap_anthropic(client: Any, queue, route, user_hash, feature_tag, capture_bodies):
+    original = client.messages.create
+    def wrapped(*args, **kwargs):
+        start = time.time()
+        result = original(*args, **kwargs)
+        latency_ms = int((time.time() - start) * 1000)
+        if queue is not None:
+            usage = getattr(result, "usage", None)
+            input_tokens = getattr(usage, "input_tokens", 0) or 0
+            output_tokens = getattr(usage, "output_tokens", 0) or 0
+            cached = getattr(usage, "cache_read_input_tokens", 0) or 0
+            cache_creation = getattr(usage, "cache_creation_input_tokens", 0) or 0
+            _emit(queue, {
+                "provider": "anthropic",
+                "model": kwargs.get("model") or getattr(result, "model", "unknown"),
+                "promptTokens": input_tokens,
+                "completionTokens": output_tokens,
+                "cachedTokens": cached,
+                "cacheCreationTokens": cache_creation,
+                "latencyMs": latency_ms,
+                "route": route,
+                "userIdHash": user_hash,
+                "featureTag": feature_tag,
+                "occurredAt": _now(),
+            })
+        return result
+    client.messages.create = wrapped
+    return client
+def _wrap_openai(client: Any, queue, route, user_hash, feature_tag, capture_bodies):
+    original = client.chat.completions.create
+    def wrapped(*args, **kwargs):
+        start = time.time()
+        result = original(*args, **kwargs)
+        latency_ms = int((time.time() - start) * 1000)
+        if queue is not None:
+            usage = getattr(result, "usage", None)
+            prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
+            completion_tokens = getattr(usage, "completion_tokens", 0) or 0
+            details = getattr(usage, "prompt_tokens_details", None)
+            cached = getattr(details, "cached_tokens", 0) if details else 0
+            _emit(queue, {
+                "provider": "openai",
+                "model": kwargs.get("model") or getattr(result, "model", "unknown"),
+                "promptTokens": prompt_tokens,
+                "completionTokens": completion_tokens,
+                "cachedTokens": cached or 0,
+                "cacheCreationTokens": 0,
+                "latencyMs": latency_ms,
+                "route": route,
+                "userIdHash": user_hash,
+                "featureTag": feature_tag,
+                "occurredAt": _now(),
+            })
+        return result
+    client.chat.completions.create = wrapped
+    return client
+def _wrap_gemini(model: Any, queue, route, user_hash, feature_tag, capture_bodies):
+    """Wraps a `google.generativeai.GenerativeModel` instance directly."""
+    original = model.generate_content
+    model_name = getattr(model, "model_name", None) or getattr(model, "_model_name", "unknown")
+    if isinstance(model_name, str) and model_name.startswith("models/"):
+        model_name = model_name[len("models/"):]
+    def wrapped(*args, **kwargs):
+        start = time.time()
+        result = original(*args, **kwargs)
+        latency_ms = int((time.time() - start) * 1000)
+        if queue is not None:
+            usage = getattr(result, "usage_metadata", None)
+            prompt = getattr(usage, "prompt_token_count", 0) if usage else 0
+            completion = getattr(usage, "candidates_token_count", 0) if usage else 0
+            cached = getattr(usage, "cached_content_token_count", 0) if usage else 0
+            _emit(queue, {
+                "provider": "gemini",
+                "model": model_name,
+                "promptTokens": prompt or 0,
+                "completionTokens": completion or 0,
+                "cachedTokens": cached or 0,
+                "cacheCreationTokens": 0,
+                "latencyMs": latency_ms,
+                "route": route,
+                "userIdHash": user_hash,
+                "featureTag": feature_tag,
+                "occurredAt": _now(),
+            })
+        return result
+    model.generate_content = wrapped
+    return model
+def flush() -> None:
+    """Block until all pending events are sent. Useful in scripts before exit."""
+    for q in _queues.values():
+        q.flush()

botzone_cost-0.1.0/botzone_cost/queue.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""Background ingestion queue.
+Fire-and-forget: enqueue() returns immediately. A daemon thread flushes the
+buffer every 2 seconds (and on process exit). Drops events past a 1000-item
+cap rather than leak memory.
+"""
+from __future__ import annotations
+import atexit
+import json
+import threading
+import time
+from typing import Optional
+import httpx
+class IngestionQueue:
+    def __init__(self, api_key: str, endpoint: str, *, http: Optional[httpx.Client] = None):
+        self._api_key = api_key
+        self._endpoint = endpoint.rstrip("/")
+        self._buf: list[dict] = []
+        self._lock = threading.Lock()
+        self._stop = threading.Event()
+        self._dropped = 0
+        self._http = http or httpx.Client(timeout=5.0)
+        self._thread = threading.Thread(target=self._loop, daemon=True)
+        self._thread.start()
+        atexit.register(self._on_exit)
+    def enqueue(self, event: dict) -> None:
+        with self._lock:
+            if len(self._buf) >= 1000:
+                self._dropped += 1
+                return
+            self._buf.append(event)
+    def flush(self) -> None:
+        with self._lock:
+            batch, self._buf = self._buf, []
+        if not batch:
+            return
+        for chunk_start in range(0, len(batch), 50):
+            chunk = batch[chunk_start : chunk_start + 50]
+            self._send(chunk)
+    def dropped_count(self) -> int:
+        return self._dropped
+    def _send(self, batch: list[dict], attempt: int = 0) -> None:
+        try:
+            self._http.post(
+                f"{self._endpoint}/api/v1/events",
+                headers={"x-api-key": self._api_key, "content-type": "application/json"},
+                content=json.dumps({"events": batch}),
+            )
+        except Exception:
+            if attempt < 3:
+                time.sleep(0.2 * (2 ** attempt))
+                self._send(batch, attempt + 1)
+    def _loop(self) -> None:
+        while not self._stop.is_set():
+            self._stop.wait(2.0)
+            try:
+                self.flush()
+            except Exception:
+                pass
+    def _on_exit(self) -> None:
+        self._stop.set()
+        try:
+            self.flush()
+        except Exception:
+            pass

botzone_cost-0.1.0/botzone_cost.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,85 @@
+Metadata-Version: 2.4
+Name: botzone-cost
+Version: 0.1.0
+Summary: Cost-tracking SDK for Anthropic, OpenAI, and Gemini Python clients
+Author-email: Botzone <hello@botzone.ai>
+License: MIT
+Project-URL: Homepage, https://github.com/botzone-ai/cost-sdk-py
+Project-URL: Repository, https://github.com/botzone-ai/cost-sdk-py
+Project-URL: Issues, https://github.com/botzone-ai/cost-sdk-py/issues
+Keywords: llm,cost-tracking,anthropic,openai,gemini
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: httpx>=0.25
+Provides-Extra: anthropic
+Requires-Dist: anthropic>=0.30; extra == "anthropic"
+Provides-Extra: openai
+Requires-Dist: openai>=1.0; extra == "openai"
+Provides-Extra: gemini
+Requires-Dist: google-generativeai>=0.5; extra == "gemini"
+Provides-Extra: test
+Requires-Dist: pytest>=7; extra == "test"
+Requires-Dist: pytest-asyncio>=0.23; extra == "test"
+Dynamic: license-file
+# botzone-cost
+Cost-tracking SDK for Anthropic, OpenAI, and Gemini Python clients. Wrap your
+existing client; per-call usage flows to your Cost dashboard. Adds zero
+measurable latency to the host call.
+## Install
+```
+pip install botzone-cost
+```
+## Usage
+```python
+from anthropic import Anthropic
+from botzone_cost import wrap
+client = wrap(Anthropic(), api_key="cost_sk_...", route="follow-up-draft")
+```
+Same surface for OpenAI and Gemini:
+```python
+from openai import OpenAI
+import google.generativeai as genai
+from botzone_cost import wrap
+openai_client = wrap(OpenAI(), route="summariser")
+gemini = wrap(genai.GenerativeModel("gemini-2.5-flash"), route="classifier")
+```
+## Options
+| arg              | default                                          |
+| ---------------- | ------------------------------------------------ |
+| `api_key`        | env `COST_API_KEY`                               |
+| `endpoint`       | env `COST_ENDPOINT` or `https://cost.botzone.ai` |
+| `route`          | (none: strongly recommended)                     |
+| `user_id`        | (sha256-hashed in the SDK before send)           |
+| `feature_tag`    | (none)                                           |
+| `enabled`        | `True`                                           |
+| `capture_bodies` | `False` (reserved, no effect today, see below)   |
+## What gets captured
+Token counts (including Anthropic prompt-cache reads / writes and OpenAI cached
+prompt tokens), latency, model, route, user id (hashed), feature tag. Computed
+USD cost is added server-side from the live pricing table. The Python SDK is
+**metadata-only today**: it does not send raw request or response bodies, and
+the `capture_bodies` parameter is reserved for future parity with the
+TypeScript SDK.
+End-user identifiers passed via `user_id` are SHA-256 hashed in the SDK before
+send; the plaintext never leaves your process.

botzone_cost-0.1.0/botzone_cost.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,12 @@
+LICENSE
+README.md
+pyproject.toml
+botzone_cost/__init__.py
+botzone_cost/_wrap.py
+botzone_cost/queue.py
+botzone_cost.egg-info/PKG-INFO
+botzone_cost.egg-info/SOURCES.txt
+botzone_cost.egg-info/dependency_links.txt
+botzone_cost.egg-info/requires.txt
+botzone_cost.egg-info/top_level.txt
+tests/test_wrap.py

botzone_cost-0.1.0/botzone_cost.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

botzone_cost-0.1.0/botzone_cost.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,14 @@
+httpx>=0.25
+[anthropic]
+anthropic>=0.30
+[gemini]
+google-generativeai>=0.5
+[openai]
+openai>=1.0
+[test]
+pytest>=7
+pytest-asyncio>=0.23

botzone_cost-0.1.0/botzone_cost.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ botzone_cost

botzone_cost-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,36 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "botzone-cost"
+version = "0.1.0"
+description = "Cost-tracking SDK for Anthropic, OpenAI, and Gemini Python clients"
+readme = "README.md"
+license = { text = "MIT" }
+authors = [{ name = "Botzone", email = "hello@botzone.ai" }]
+requires-python = ">=3.9"
+dependencies = ["httpx>=0.25"]
+keywords = ["llm", "cost-tracking", "anthropic", "openai", "gemini"]
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3 :: Only",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Operating System :: OS Independent",
+]
+[project.urls]
+Homepage = "https://github.com/botzone-ai/cost-sdk-py"
+Repository = "https://github.com/botzone-ai/cost-sdk-py"
+Issues = "https://github.com/botzone-ai/cost-sdk-py/issues"
+[project.optional-dependencies]
+anthropic = ["anthropic>=0.30"]
+openai = ["openai>=1.0"]
+gemini = ["google-generativeai>=0.5"]
+test = ["pytest>=7", "pytest-asyncio>=0.23"]
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["botzone_cost*"]

botzone_cost-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

botzone_cost-0.1.0/tests/test_wrap.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""Unit tests for botzone_cost.wrap (no real LLM clients required)."""
+from __future__ import annotations
+import json
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+from botzone_cost import wrap, flush
+def _fake_anthropic(input_tokens=100, output_tokens=50, cached=80):
+    client = MagicMock()
+    client.messages.create.return_value = SimpleNamespace(
+        model="claude-sonnet-4-6",
+        usage=SimpleNamespace(
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            cache_read_input_tokens=cached,
+            cache_creation_input_tokens=0,
+        ),
+    )
+    return client
+def _fake_openai(prompt_tokens=200, completion_tokens=75, cached=50):
+    client = MagicMock()
+    client.chat.completions.create.return_value = SimpleNamespace(
+        model="gpt-4o-mini",
+        usage=SimpleNamespace(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            prompt_tokens_details=SimpleNamespace(cached_tokens=cached),
+        ),
+    )
+    return client
+def _fake_gemini(prompt=300, completion=100, cached=0):
+    model = MagicMock()
+    model.model_name = "gemini-2.5-flash"
+    model.generate_content.return_value = SimpleNamespace(
+        usage_metadata=SimpleNamespace(
+            prompt_token_count=prompt,
+            candidates_token_count=completion,
+            cached_content_token_count=cached,
+        ),
+    )
+    return model
+def _captured_events(monkeypatch, body_calls):
+    """Returns a stub IngestionQueue.enqueue capturing events into body_calls."""
+    def fake_enqueue(self, event):
+        body_calls.append(event)
+    return fake_enqueue
+def _setup(monkeypatch, body_calls):
+    monkeypatch.setenv("COST_API_KEY", "cost_sk_test")
+    monkeypatch.setenv("COST_ENDPOINT", "http://localhost:3001")
+    from botzone_cost import _wrap as wrap_mod
+    wrap_mod._queues.clear()
+    from botzone_cost.queue import IngestionQueue
+    monkeypatch.setattr(IngestionQueue, "enqueue", _captured_events(monkeypatch, body_calls))
+def test_anthropic(monkeypatch):
+    body_calls: list = []
+    _setup(monkeypatch, body_calls)
+    client = wrap(_fake_anthropic(), route="test", provider="anthropic")
+    client.messages.create(model="claude-sonnet-4-6", messages=[])
+    assert len(body_calls) == 1
+    ev = body_calls[0]
+    assert ev["provider"] == "anthropic"
+    assert ev["promptTokens"] == 100
+    assert ev["cachedTokens"] == 80
+    assert ev["route"] == "test"
+def test_openai(monkeypatch):
+    body_calls: list = []
+    _setup(monkeypatch, body_calls)
+    client = wrap(_fake_openai(), route="summarise", provider="openai")
+    client.chat.completions.create(model="gpt-4o-mini", messages=[])
+    assert len(body_calls) == 1
+    ev = body_calls[0]
+    assert ev["provider"] == "openai"
+    assert ev["promptTokens"] == 200
+    assert ev["cachedTokens"] == 50
+def test_gemini(monkeypatch):
+    body_calls: list = []
+    _setup(monkeypatch, body_calls)
+    model = wrap(_fake_gemini(), route="classify", provider="gemini")
+    model.generate_content("hello")
+    assert len(body_calls) == 1
+    ev = body_calls[0]
+    assert ev["provider"] == "gemini"
+    assert ev["model"] == "gemini-2.5-flash"
+    assert ev["promptTokens"] == 300
+def test_disabled(monkeypatch):
+    body_calls: list = []
+    _setup(monkeypatch, body_calls)
+    client = wrap(_fake_anthropic(), enabled=False, provider="anthropic")
+    client.messages.create(model="claude-sonnet-4-6", messages=[])
+    assert body_calls == []
+def test_passthrough_returns_result(monkeypatch):
+    body_calls: list = []
+    _setup(monkeypatch, body_calls)
+    client = wrap(_fake_anthropic(), provider="anthropic")
+    result = client.messages.create(model="claude-sonnet-4-6", messages=[])
+    assert result.model == "claude-sonnet-4-6"