lago-agent-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lago_agent_sdk-0.1.0/LICENSE +21 -0
- lago_agent_sdk-0.1.0/PKG-INFO +289 -0
- lago_agent_sdk-0.1.0/README.md +243 -0
- lago_agent_sdk-0.1.0/pyproject.toml +98 -0
- lago_agent_sdk-0.1.0/setup.cfg +4 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/__init__.py +23 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/adapters/__init__.py +16 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/adapters/anthropic_native.py +91 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/adapters/bedrock_converse.py +95 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/adapters/bedrock_invoke.py +247 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/adapters/gemini_native.py +135 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/adapters/mistral_native.py +64 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/adapters/openai_native.py +157 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/canonical.py +45 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/config.py +55 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/detector.py +52 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/exceptions.py +24 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/lago_client.py +39 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/queue.py +150 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/sdk.py +173 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/wrappers/__init__.py +0 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/wrappers/anthropic.py +273 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/wrappers/boto3_bedrock.py +221 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/wrappers/gemini.py +157 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/wrappers/mistral.py +167 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk/wrappers/openai.py +211 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk.egg-info/PKG-INFO +289 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk.egg-info/SOURCES.txt +29 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk.egg-info/dependency_links.txt +1 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk.egg-info/requires.txt +30 -0
- lago_agent_sdk-0.1.0/src/lago_agent_sdk.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025–2026 Lago
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lago-agent-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Instrument LLM clients and emit usage events to Lago.
|
|
5
|
+
Project-URL: Homepage, https://www.getlago.com
|
|
6
|
+
Project-URL: Repository, https://github.com/getlago/lago-agent-sdk-python
|
|
7
|
+
Project-URL: Issues, https://github.com/getlago/lago-agent-sdk-python/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/getlago/lago-agent-sdk-python/blob/main/CHANGELOG.md
|
|
9
|
+
Keywords: lago,billing,llm,metering,bedrock,mistral,anthropic,openai
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: requests>=2.31
|
|
22
|
+
Provides-Extra: bedrock
|
|
23
|
+
Requires-Dist: boto3>=1.34; extra == "bedrock"
|
|
24
|
+
Provides-Extra: mistral
|
|
25
|
+
Requires-Dist: mistralai>=2.0; extra == "mistral"
|
|
26
|
+
Provides-Extra: anthropic
|
|
27
|
+
Requires-Dist: anthropic>=0.30; extra == "anthropic"
|
|
28
|
+
Provides-Extra: openai
|
|
29
|
+
Requires-Dist: openai>=1.50; extra == "openai"
|
|
30
|
+
Provides-Extra: gemini
|
|
31
|
+
Requires-Dist: google-genai>=1.0; extra == "gemini"
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov>=5; extra == "dev"
|
|
36
|
+
Requires-Dist: hypothesis>=6; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
38
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
39
|
+
Requires-Dist: types-requests>=2.31; extra == "dev"
|
|
40
|
+
Requires-Dist: boto3>=1.34; extra == "dev"
|
|
41
|
+
Requires-Dist: mistralai>=2.0; extra == "dev"
|
|
42
|
+
Requires-Dist: anthropic>=0.30; extra == "dev"
|
|
43
|
+
Requires-Dist: openai>=1.50; extra == "dev"
|
|
44
|
+
Requires-Dist: google-genai>=1.0; extra == "dev"
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
|
|
47
|
+
# lago-agent-sdk
|
|
48
|
+
|
|
49
|
+
Instrument LLM clients and emit usage events to [Lago](https://www.getlago.com) for billing.
|
|
50
|
+
|
|
51
|
+
```text
|
|
52
|
+
┌──────────────┐
|
|
53
|
+
your code ──────► │ wrapped client│ ──► provider (Bedrock / Mistral / …)
|
|
54
|
+
└──────┬───────┘
|
|
55
|
+
│ (extract usage)
|
|
56
|
+
▼
|
|
57
|
+
┌──────────────┐
|
|
58
|
+
│ Lago events │ ──► api.getlago.com
|
|
59
|
+
└──────────────┘
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## What it does
|
|
63
|
+
|
|
64
|
+
- Wraps your existing LLM client in place — no API surface change for your application code.
|
|
65
|
+
- Extracts usage from each response into a normalized shape (`CanonicalUsage`).
|
|
66
|
+
- Buffers events in memory, flushes them in batches to Lago's `/events/batch` endpoint.
|
|
67
|
+
- Survives provider/Lago outages with exponential backoff and a bounded buffer.
|
|
68
|
+
- p99 wrap-overhead under 5 ms — your call is never blocked on Lago.
|
|
69
|
+
|
|
70
|
+
## Install
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install lago-agent-sdk
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`).
|
|
77
|
+
For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`).
|
|
78
|
+
For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`).
|
|
79
|
+
For OpenAI native support: `pip install 'lago-agent-sdk[openai]'` (adds `openai`).
|
|
80
|
+
For Gemini native support: `pip install 'lago-agent-sdk[gemini]'` (adds `google-genai`).
|
|
81
|
+
|
|
82
|
+
## Quickstart — Bedrock
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
import boto3
|
|
86
|
+
from lago_agent_sdk import LagoSDK
|
|
87
|
+
|
|
88
|
+
sdk = LagoSDK(
|
|
89
|
+
api_key="<YOUR_LAGO_API_KEY>",
|
|
90
|
+
api_url="https://api.getlago.com/api/v1/",
|
|
91
|
+
default_subscription_id="sub_acme",
|
|
92
|
+
)
|
|
93
|
+
client = sdk.wrap(boto3.client("bedrock-runtime", region_name="eu-west-1"))
|
|
94
|
+
|
|
95
|
+
resp = client.converse(
|
|
96
|
+
modelId="eu.amazon.nova-lite-v1:0",
|
|
97
|
+
messages=[{"role": "user", "content": [{"text": "Hello"}]}],
|
|
98
|
+
)
|
|
99
|
+
sdk.flush()
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
The wrapped client behaves identically to the original — same arguments, same return shape, same exceptions. The SDK adds an in-memory queue that batches events to Lago in the background.
|
|
103
|
+
|
|
104
|
+
## Quickstart — Anthropic
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from anthropic import Anthropic
|
|
108
|
+
from lago_agent_sdk import LagoSDK
|
|
109
|
+
|
|
110
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
|
|
111
|
+
client = sdk.wrap(Anthropic(api_key="..."))
|
|
112
|
+
|
|
113
|
+
resp = client.messages.create(
|
|
114
|
+
model="claude-sonnet-4-6",
|
|
115
|
+
max_tokens=200,
|
|
116
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
117
|
+
)
|
|
118
|
+
sdk.flush()
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Works with `Anthropic` and `AsyncAnthropic`. Both `messages.create(..., stream=True)` and the `messages.stream(...)` context manager are instrumented — usage is captured from the final `message_delta` event in either case.
|
|
122
|
+
|
|
123
|
+
## Quickstart — Mistral
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from mistralai.client import Mistral
|
|
127
|
+
from lago_agent_sdk import LagoSDK
|
|
128
|
+
|
|
129
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
|
|
130
|
+
client = sdk.wrap(Mistral(api_key="..."))
|
|
131
|
+
|
|
132
|
+
resp = client.chat.complete(
|
|
133
|
+
model="mistral-small-latest",
|
|
134
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
135
|
+
)
|
|
136
|
+
sdk.flush()
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Quickstart — OpenAI
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from openai import OpenAI
|
|
143
|
+
from lago_agent_sdk import LagoSDK
|
|
144
|
+
|
|
145
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
|
|
146
|
+
client = sdk.wrap(OpenAI(api_key="..."))
|
|
147
|
+
|
|
148
|
+
resp = client.chat.completions.create(
|
|
149
|
+
model="gpt-4o-mini",
|
|
150
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
151
|
+
max_completion_tokens=200,
|
|
152
|
+
)
|
|
153
|
+
sdk.flush()
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Works with `OpenAI` and `AsyncOpenAI`. Covers both **Chat Completions** (`client.chat.completions.create`) and the newer **Responses API** (`client.responses.create`), sync + streaming. For streaming, the wrapper auto-injects `stream_options={"include_usage": True}` so the final chunk carries usage data — without it OpenAI emits no usage on streamed responses.
|
|
157
|
+
|
|
158
|
+
**Reasoning tokens** (`llm_reasoning_tokens`) populate automatically when you call an o-series model (`o4-mini`, `o1`, etc.) — OpenAI is the first provider to expose this metric separately.
|
|
159
|
+
|
|
160
|
+
## Quickstart — Gemini
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
from google import genai
|
|
164
|
+
from lago_agent_sdk import LagoSDK
|
|
165
|
+
|
|
166
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
|
|
167
|
+
client = sdk.wrap(genai.Client(api_key="..."))
|
|
168
|
+
|
|
169
|
+
resp = client.models.generate_content(
|
|
170
|
+
model="gemini-2.5-flash",
|
|
171
|
+
contents="Hello",
|
|
172
|
+
)
|
|
173
|
+
sdk.flush()
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Wraps the modern `google-genai` SDK (`from google import genai`). Covers `client.models.generate_content` + `generate_content_stream`, sync + async (via `client.aio.models`).
|
|
177
|
+
|
|
178
|
+
**Reasoning tokens** populate automatically on Gemini 2.5 — the model reasons internally by default and surfaces `thoughts_token_count`. Note the semantic difference vs OpenAI:
|
|
179
|
+
- **OpenAI:** `reasoning_tokens` is a *subset* of `completion_tokens` (already counted in output)
|
|
180
|
+
- **Gemini:** `thoughts_token_count` is *additive* to `candidates_token_count` (total Google bill = output + reasoning)
|
|
181
|
+
|
|
182
|
+
## Multi-tenant — pick a subscription per call
|
|
183
|
+
|
|
184
|
+
Three ways to set the `external_subscription_id`, in priority order:
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
# 1. Per-call override (highest precedence)
|
|
188
|
+
client.converse(..., extra_lago={"subscription": "sub_acme", "dimensions": {"feature": "summarize"}})
|
|
189
|
+
|
|
190
|
+
# 2. Context-bound (use in middleware to set once per request)
|
|
191
|
+
sdk.set_subscription("sub_acme")
|
|
192
|
+
# all calls in this thread/asyncio task → sub_acme
|
|
193
|
+
|
|
194
|
+
# 3. Default at init (fallback)
|
|
195
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_default")
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Backed by `contextvars` for safe propagation across `asyncio` tasks.
|
|
199
|
+
|
|
200
|
+
## Supported providers
|
|
201
|
+
|
|
202
|
+
| Provider | Access | Status |
|
|
203
|
+
|---|---|---|
|
|
204
|
+
| AWS Bedrock | `Converse` (sync + stream) | ✓ |
|
|
205
|
+
| AWS Bedrock | `InvokeModel` (sync + stream), 7 model families | ✓ |
|
|
206
|
+
| Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) | ✓ |
|
|
207
|
+
| Mistral | native SDK (`chat.complete` + `chat.stream`) | ✓ |
|
|
208
|
+
| OpenAI | native SDK (`chat.completions.create` + `responses.create`, sync + async + stream) | ✓ |
|
|
209
|
+
| Google Gemini | native SDK (`google-genai`: `models.generate_content` + `generate_content_stream`, sync + async) | ✓ |
|
|
210
|
+
| LiteLLM | callback bridge | Phase 4 |
|
|
211
|
+
|
|
212
|
+
## Token dimensions captured
|
|
213
|
+
|
|
214
|
+
`CanonicalUsage` carries 11 numeric fields. Which ones populate depends on the provider:
|
|
215
|
+
|
|
216
|
+
| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI | Gemini |
|
|
217
|
+
|---|---|---|---|---|---|---|
|
|
218
|
+
| input | `llm_input_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
219
|
+
| output | `llm_output_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
220
|
+
| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | ✓ (auto-cache) | ✓ (CachedContent API) |
|
|
221
|
+
| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | ✗ | ✗ |
|
|
222
|
+
| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | ✗ | ✗ |
|
|
223
|
+
| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series, subset)** | **✓ (Gemini 2.5, additive)** |
|
|
224
|
+
| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
225
|
+
| audio_input | `llm_audio_input_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
|
|
226
|
+
| audio_output | `llm_audio_output_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
|
|
227
|
+
| image_input | `llm_image_input_tokens` | ✗ | ✗ | ✗ | ✗ (Phase 3) | ✓ (multimodal IMAGE) |
|
|
228
|
+
|
|
229
|
+
**Semantic note on `reasoning`:**
|
|
230
|
+
- **OpenAI's `reasoning_tokens` is a SUBSET of `output`** — already counted in `completion_tokens`.
|
|
231
|
+
- **Gemini's `thoughts_token_count` is ADDITIVE to `output`** — `candidates + thoughts = total billable output`.
|
|
232
|
+
|
|
233
|
+
**Semantic note on input breakdowns (avoid double-counting):**
|
|
234
|
+
For both OpenAI and Gemini, `cache_read`, `audio_input`, and `image_input` are **subsets of `input`**, not additive to it — they are a breakdown of tokens already counted in `llm_input_tokens`. For example, OpenAI reports `cached_tokens` under `prompt_tokens_details` *within* `prompt_tokens`, and Gemini's docs state `prompt_token_count` "includes the number of tokens in the cached content". A billable metric that sums `llm_input_tokens + llm_cached_input_tokens` (or `+ llm_audio_input_tokens`, `+ llm_image_input_tokens`) will **double-count**. Bill on `llm_input_tokens` as the total; use the breakdown fields only for cost attribution or discounted-rate tiers (e.g. cached input billed at a lower rate), subtracting them from `input` rather than adding.
|
|
235
|
+
|
|
236
|
+
OpenAI's Predicted Outputs tokens (`accepted_prediction_tokens`, `rejected_prediction_tokens`) are not surfaced — see the OpenAI adapter docstring for details on this intentional gap.
|
|
237
|
+
|
|
238
|
+
## Error policy
|
|
239
|
+
|
|
240
|
+
The SDK never breaks your LLM call. If anything in instrumentation fails (adapter bug, Lago down, network error), the SDK swallows it, logs a warning, and your call returns normally.
|
|
241
|
+
|
|
242
|
+
## Subscription resolution returns nothing → drop with `ERROR` log
|
|
243
|
+
|
|
244
|
+
Configurable via `LagoConfig.on_error` callback to integrate with Sentry, Datadog, etc.:
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
from lago_agent_sdk import LagoConfig, LagoSDK
|
|
248
|
+
|
|
249
|
+
def on_error(exc: Exception, where: str) -> None:
|
|
250
|
+
sentry.capture_exception(exc, tags={"sdk_phase": where})
|
|
251
|
+
|
|
252
|
+
sdk = LagoSDK(
|
|
253
|
+
api_key="...",
|
|
254
|
+
config=LagoConfig(api_key="...", on_error=on_error),
|
|
255
|
+
)
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## Setting up Lago
|
|
259
|
+
|
|
260
|
+
The SDK ships with default metric codes (`llm_input_tokens`, `llm_output_tokens`, etc.). You need to register matching billable metrics in your Lago tenant before events count toward charges. See [Lago docs — Billable Metrics](https://docs.getlago.com/api-reference/billable-metrics/create).
|
|
261
|
+
|
|
262
|
+
## Development
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
git clone https://github.com/getlago/lago-agent-sdk-python
|
|
266
|
+
cd lago-agent-sdk-python
|
|
267
|
+
python -m venv venv && source venv/bin/activate
|
|
268
|
+
pip install -e '.[dev]'
|
|
269
|
+
pytest
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
Run live integration tests (requires real credentials):
|
|
273
|
+
|
|
274
|
+
```bash
|
|
275
|
+
AWS_BEARER_TOKEN_BEDROCK="..." \
|
|
276
|
+
MISTRAL_API_KEY="..." \
|
|
277
|
+
LAGO_API_URL="https://api.getlago.com/api/v1/" \
|
|
278
|
+
LAGO_API_KEY="..." \
|
|
279
|
+
LAGO_EXTERNAL_SUBSCRIPTION_ID="sub_..." \
|
|
280
|
+
pytest tests/integration
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
## Security
|
|
284
|
+
|
|
285
|
+
Found a vulnerability? See [SECURITY.md](SECURITY.md).
|
|
286
|
+
|
|
287
|
+
## License
|
|
288
|
+
|
|
289
|
+
[MIT LICENSE](LICENSE).
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# lago-agent-sdk
|
|
2
|
+
|
|
3
|
+
Instrument LLM clients and emit usage events to [Lago](https://www.getlago.com) for billing.
|
|
4
|
+
|
|
5
|
+
```text
|
|
6
|
+
┌──────────────┐
|
|
7
|
+
your code ──────► │ wrapped client│ ──► provider (Bedrock / Mistral / …)
|
|
8
|
+
└──────┬───────┘
|
|
9
|
+
│ (extract usage)
|
|
10
|
+
▼
|
|
11
|
+
┌──────────────┐
|
|
12
|
+
│ Lago events │ ──► api.getlago.com
|
|
13
|
+
└──────────────┘
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## What it does
|
|
17
|
+
|
|
18
|
+
- Wraps your existing LLM client in place — no API surface change for your application code.
|
|
19
|
+
- Extracts usage from each response into a normalized shape (`CanonicalUsage`).
|
|
20
|
+
- Buffers events in memory, flushes them in batches to Lago's `/events/batch` endpoint.
|
|
21
|
+
- Survives provider/Lago outages with exponential backoff and a bounded buffer.
|
|
22
|
+
- p99 wrap-overhead under 5 ms — your call is never blocked on Lago.
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install lago-agent-sdk
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`).
|
|
31
|
+
For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`).
|
|
32
|
+
For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`).
|
|
33
|
+
For OpenAI native support: `pip install 'lago-agent-sdk[openai]'` (adds `openai`).
|
|
34
|
+
For Gemini native support: `pip install 'lago-agent-sdk[gemini]'` (adds `google-genai`).
|
|
35
|
+
|
|
36
|
+
## Quickstart — Bedrock
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
import boto3
|
|
40
|
+
from lago_agent_sdk import LagoSDK
|
|
41
|
+
|
|
42
|
+
sdk = LagoSDK(
|
|
43
|
+
api_key="<YOUR_LAGO_API_KEY>",
|
|
44
|
+
api_url="https://api.getlago.com/api/v1/",
|
|
45
|
+
default_subscription_id="sub_acme",
|
|
46
|
+
)
|
|
47
|
+
client = sdk.wrap(boto3.client("bedrock-runtime", region_name="eu-west-1"))
|
|
48
|
+
|
|
49
|
+
resp = client.converse(
|
|
50
|
+
modelId="eu.amazon.nova-lite-v1:0",
|
|
51
|
+
messages=[{"role": "user", "content": [{"text": "Hello"}]}],
|
|
52
|
+
)
|
|
53
|
+
sdk.flush()
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
The wrapped client behaves identically to the original — same arguments, same return shape, same exceptions. The SDK adds an in-memory queue that batches events to Lago in the background.
|
|
57
|
+
|
|
58
|
+
## Quickstart — Anthropic
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from anthropic import Anthropic
|
|
62
|
+
from lago_agent_sdk import LagoSDK
|
|
63
|
+
|
|
64
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
|
|
65
|
+
client = sdk.wrap(Anthropic(api_key="..."))
|
|
66
|
+
|
|
67
|
+
resp = client.messages.create(
|
|
68
|
+
model="claude-sonnet-4-6",
|
|
69
|
+
max_tokens=200,
|
|
70
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
71
|
+
)
|
|
72
|
+
sdk.flush()
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Works with `Anthropic` and `AsyncAnthropic`. Both `messages.create(..., stream=True)` and the `messages.stream(...)` context manager are instrumented — usage is captured from the final `message_delta` event in either case.
|
|
76
|
+
|
|
77
|
+
## Quickstart — Mistral
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from mistralai.client import Mistral
|
|
81
|
+
from lago_agent_sdk import LagoSDK
|
|
82
|
+
|
|
83
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
|
|
84
|
+
client = sdk.wrap(Mistral(api_key="..."))
|
|
85
|
+
|
|
86
|
+
resp = client.chat.complete(
|
|
87
|
+
model="mistral-small-latest",
|
|
88
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
89
|
+
)
|
|
90
|
+
sdk.flush()
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Quickstart — OpenAI
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from openai import OpenAI
|
|
97
|
+
from lago_agent_sdk import LagoSDK
|
|
98
|
+
|
|
99
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
|
|
100
|
+
client = sdk.wrap(OpenAI(api_key="..."))
|
|
101
|
+
|
|
102
|
+
resp = client.chat.completions.create(
|
|
103
|
+
model="gpt-4o-mini",
|
|
104
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
105
|
+
max_completion_tokens=200,
|
|
106
|
+
)
|
|
107
|
+
sdk.flush()
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Works with `OpenAI` and `AsyncOpenAI`. Covers both **Chat Completions** (`client.chat.completions.create`) and the newer **Responses API** (`client.responses.create`), sync + streaming. For streaming, the wrapper auto-injects `stream_options={"include_usage": True}` so the final chunk carries usage data — without it OpenAI emits no usage on streamed responses.
|
|
111
|
+
|
|
112
|
+
**Reasoning tokens** (`llm_reasoning_tokens`) populate automatically when you call an o-series model (`o4-mini`, `o1`, etc.) — OpenAI is the first provider to expose this metric separately.
|
|
113
|
+
|
|
114
|
+
## Quickstart — Gemini
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from google import genai
|
|
118
|
+
from lago_agent_sdk import LagoSDK
|
|
119
|
+
|
|
120
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
|
|
121
|
+
client = sdk.wrap(genai.Client(api_key="..."))
|
|
122
|
+
|
|
123
|
+
resp = client.models.generate_content(
|
|
124
|
+
model="gemini-2.5-flash",
|
|
125
|
+
contents="Hello",
|
|
126
|
+
)
|
|
127
|
+
sdk.flush()
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Wraps the modern `google-genai` SDK (`from google import genai`). Covers `client.models.generate_content` + `generate_content_stream`, sync + async (via `client.aio.models`).
|
|
131
|
+
|
|
132
|
+
**Reasoning tokens** populate automatically on Gemini 2.5 — the model reasons internally by default and surfaces `thoughts_token_count`. Note the semantic difference vs OpenAI:
|
|
133
|
+
- **OpenAI:** `reasoning_tokens` is a *subset* of `completion_tokens` (already counted in output)
|
|
134
|
+
- **Gemini:** `thoughts_token_count` is *additive* to `candidates_token_count` (total Google bill = output + reasoning)
|
|
135
|
+
|
|
136
|
+
## Multi-tenant — pick a subscription per call
|
|
137
|
+
|
|
138
|
+
Three ways to set the `external_subscription_id`, in priority order:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
# 1. Per-call override (highest precedence)
|
|
142
|
+
client.converse(..., extra_lago={"subscription": "sub_acme", "dimensions": {"feature": "summarize"}})
|
|
143
|
+
|
|
144
|
+
# 2. Context-bound (use in middleware to set once per request)
|
|
145
|
+
sdk.set_subscription("sub_acme")
|
|
146
|
+
# all calls in this thread/asyncio task → sub_acme
|
|
147
|
+
|
|
148
|
+
# 3. Default at init (fallback)
|
|
149
|
+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_default")
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Backed by `contextvars` for safe propagation across `asyncio` tasks.
|
|
153
|
+
|
|
154
|
+
## Supported providers
|
|
155
|
+
|
|
156
|
+
| Provider | Access | Status |
|
|
157
|
+
|---|---|---|
|
|
158
|
+
| AWS Bedrock | `Converse` (sync + stream) | ✓ |
|
|
159
|
+
| AWS Bedrock | `InvokeModel` (sync + stream), 7 model families | ✓ |
|
|
160
|
+
| Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) | ✓ |
|
|
161
|
+
| Mistral | native SDK (`chat.complete` + `chat.stream`) | ✓ |
|
|
162
|
+
| OpenAI | native SDK (`chat.completions.create` + `responses.create`, sync + async + stream) | ✓ |
|
|
163
|
+
| Google Gemini | native SDK (`google-genai`: `models.generate_content` + `generate_content_stream`, sync + async) | ✓ |
|
|
164
|
+
| LiteLLM | callback bridge | Phase 4 |
|
|
165
|
+
|
|
166
|
+
## Token dimensions captured
|
|
167
|
+
|
|
168
|
+
`CanonicalUsage` carries 11 numeric fields. Which ones populate depends on the provider:
|
|
169
|
+
|
|
170
|
+
| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI | Gemini |
|
|
171
|
+
|---|---|---|---|---|---|---|
|
|
172
|
+
| input | `llm_input_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
173
|
+
| output | `llm_output_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
174
|
+
| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | ✓ (auto-cache) | ✓ (CachedContent API) |
|
|
175
|
+
| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | ✗ | ✗ |
|
|
176
|
+
| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | ✗ | ✗ |
|
|
177
|
+
| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series, subset)** | **✓ (Gemini 2.5, additive)** |
|
|
178
|
+
| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
179
|
+
| audio_input | `llm_audio_input_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
|
|
180
|
+
| audio_output | `llm_audio_output_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
|
|
181
|
+
| image_input | `llm_image_input_tokens` | ✗ | ✗ | ✗ | ✗ (Phase 3) | ✓ (multimodal IMAGE) |
|
|
182
|
+
|
|
183
|
+
**Semantic note on `reasoning`:**
|
|
184
|
+
- **OpenAI's `reasoning_tokens` is a SUBSET of `output`** — already counted in `completion_tokens`.
|
|
185
|
+
- **Gemini's `thoughts_token_count` is ADDITIVE to `output`** — `candidates + thoughts = total billable output`.
|
|
186
|
+
|
|
187
|
+
**Semantic note on input breakdowns (avoid double-counting):**
|
|
188
|
+
For both OpenAI and Gemini, `cache_read`, `audio_input`, and `image_input` are **subsets of `input`**, not additive to it — they are a breakdown of tokens already counted in `llm_input_tokens`. For example, OpenAI reports `cached_tokens` under `prompt_tokens_details` *within* `prompt_tokens`, and Gemini's docs state `prompt_token_count` "includes the number of tokens in the cached content". A billable metric that sums `llm_input_tokens + llm_cached_input_tokens` (or `+ llm_audio_input_tokens`, `+ llm_image_input_tokens`) will **double-count**. Bill on `llm_input_tokens` as the total; use the breakdown fields only for cost attribution or discounted-rate tiers (e.g. cached input billed at a lower rate), subtracting them from `input` rather than adding.
|
|
189
|
+
|
|
190
|
+
OpenAI's Predicted Outputs tokens (`accepted_prediction_tokens`, `rejected_prediction_tokens`) are not surfaced — see the OpenAI adapter docstring for details on this intentional gap.
|
|
191
|
+
|
|
192
|
+
## Error policy
|
|
193
|
+
|
|
194
|
+
The SDK never breaks your LLM call. If anything in instrumentation fails (adapter bug, Lago down, network error), the SDK swallows it, logs a warning, and your call returns normally.
|
|
195
|
+
|
|
196
|
+
## Subscription resolution returns nothing → drop with `ERROR` log
|
|
197
|
+
|
|
198
|
+
Configurable via `LagoConfig.on_error` callback to integrate with Sentry, Datadog, etc.:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
from lago_agent_sdk import LagoConfig, LagoSDK
|
|
202
|
+
|
|
203
|
+
def on_error(exc: Exception, where: str) -> None:
|
|
204
|
+
sentry.capture_exception(exc, tags={"sdk_phase": where})
|
|
205
|
+
|
|
206
|
+
sdk = LagoSDK(
|
|
207
|
+
api_key="...",
|
|
208
|
+
config=LagoConfig(api_key="...", on_error=on_error),
|
|
209
|
+
)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Setting up Lago
|
|
213
|
+
|
|
214
|
+
The SDK ships with default metric codes (`llm_input_tokens`, `llm_output_tokens`, etc.). You need to register matching billable metrics in your Lago tenant before events count toward charges. See [Lago docs — Billable Metrics](https://docs.getlago.com/api-reference/billable-metrics/create).
|
|
215
|
+
|
|
216
|
+
## Development
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
git clone https://github.com/getlago/lago-agent-sdk-python
|
|
220
|
+
cd lago-agent-sdk-python
|
|
221
|
+
python -m venv venv && source venv/bin/activate
|
|
222
|
+
pip install -e '.[dev]'
|
|
223
|
+
pytest
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
Run live integration tests (requires real credentials):
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
AWS_BEARER_TOKEN_BEDROCK="..." \
|
|
230
|
+
MISTRAL_API_KEY="..." \
|
|
231
|
+
LAGO_API_URL="https://api.getlago.com/api/v1/" \
|
|
232
|
+
LAGO_API_KEY="..." \
|
|
233
|
+
LAGO_EXTERNAL_SUBSCRIPTION_ID="sub_..." \
|
|
234
|
+
pytest tests/integration
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Security
|
|
238
|
+
|
|
239
|
+
Found a vulnerability? See [SECURITY.md](SECURITY.md).
|
|
240
|
+
|
|
241
|
+
## License
|
|
242
|
+
|
|
243
|
+
[MIT LICENSE](LICENSE).
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "lago-agent-sdk"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Instrument LLM clients and emit usage events to Lago."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
keywords = ["lago", "billing", "llm", "metering", "bedrock", "mistral", "anthropic", "openai"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"Operating System :: OS Independent",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"requests>=2.31",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
# User-facing extras — install only the providers you use:
|
|
28
|
+
# pip install 'lago-agent-sdk[anthropic,openai]'
|
|
29
|
+
bedrock = ["boto3>=1.34"]
|
|
30
|
+
mistral = ["mistralai>=2.0"]
|
|
31
|
+
anthropic = ["anthropic>=0.30"]
|
|
32
|
+
openai = ["openai>=1.50"]
|
|
33
|
+
gemini = ["google-genai>=1.0"]
|
|
34
|
+
|
|
35
|
+
# Single source of truth for development. Includes test runners, tooling,
|
|
36
|
+
# AND every provider SDK so the full test suite can run.
|
|
37
|
+
# pip install -e '.[dev]'
|
|
38
|
+
dev = [
|
|
39
|
+
# test runners
|
|
40
|
+
"pytest>=7",
|
|
41
|
+
"pytest-asyncio>=0.23",
|
|
42
|
+
"pytest-cov>=5",
|
|
43
|
+
"hypothesis>=6",
|
|
44
|
+
# tooling
|
|
45
|
+
"ruff>=0.6",
|
|
46
|
+
"mypy>=1.10",
|
|
47
|
+
"types-requests>=2.31",
|
|
48
|
+
# every provider SDK (so unit + live integration tests can import them)
|
|
49
|
+
"boto3>=1.34",
|
|
50
|
+
"mistralai>=2.0",
|
|
51
|
+
"anthropic>=0.30",
|
|
52
|
+
"openai>=1.50",
|
|
53
|
+
"google-genai>=1.0",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
[project.urls]
|
|
57
|
+
Homepage = "https://www.getlago.com"
|
|
58
|
+
Repository = "https://github.com/getlago/lago-agent-sdk-python"
|
|
59
|
+
Issues = "https://github.com/getlago/lago-agent-sdk-python/issues"
|
|
60
|
+
Changelog = "https://github.com/getlago/lago-agent-sdk-python/blob/main/CHANGELOG.md"
|
|
61
|
+
|
|
62
|
+
[tool.setuptools.packages.find]
|
|
63
|
+
where = ["src"]
|
|
64
|
+
|
|
65
|
+
[tool.pytest.ini_options]
|
|
66
|
+
testpaths = ["tests"]
|
|
67
|
+
pythonpath = ["src"]
|
|
68
|
+
|
|
69
|
+
[tool.ruff]
|
|
70
|
+
line-length = 110
|
|
71
|
+
target-version = "py310"
|
|
72
|
+
src = ["src", "tests"]
|
|
73
|
+
|
|
74
|
+
[tool.ruff.lint]
|
|
75
|
+
select = [
|
|
76
|
+
"E", # pycodestyle errors
|
|
77
|
+
"W", # pycodestyle warnings
|
|
78
|
+
"F", # pyflakes
|
|
79
|
+
"I", # isort
|
|
80
|
+
"B", # flake8-bugbear
|
|
81
|
+
"UP", # pyupgrade
|
|
82
|
+
"ASYNC",
|
|
83
|
+
]
|
|
84
|
+
ignore = [
|
|
85
|
+
"E501", # line too long — handled by formatter
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
[tool.ruff.lint.per-file-ignores]
|
|
89
|
+
"tests/**" = ["B", "F841"] # tests can be looser
|
|
90
|
+
|
|
91
|
+
[tool.mypy]
|
|
92
|
+
python_version = "3.10"
|
|
93
|
+
strict = true
|
|
94
|
+
files = ["src/lago_agent_sdk"]
|
|
95
|
+
|
|
96
|
+
[[tool.mypy.overrides]]
|
|
97
|
+
module = ["boto3.*", "botocore.*", "mistralai.*", "openai.*", "google.*"]
|
|
98
|
+
ignore_missing_imports = true
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Lago Agent SDK — Python."""
|
|
2
|
+
|
|
3
|
+
from .canonical import CanonicalUsage
|
|
4
|
+
from .config import DEFAULT_METRIC_CODES, LagoConfig
|
|
5
|
+
from .exceptions import (
|
|
6
|
+
LagoApiError,
|
|
7
|
+
LagoConfigError,
|
|
8
|
+
LagoSDKError,
|
|
9
|
+
UnknownClientError,
|
|
10
|
+
)
|
|
11
|
+
from .sdk import LagoSDK
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"LagoSDK",
|
|
15
|
+
"LagoConfig",
|
|
16
|
+
"CanonicalUsage",
|
|
17
|
+
"LagoApiError",
|
|
18
|
+
"LagoConfigError",
|
|
19
|
+
"LagoSDKError",
|
|
20
|
+
"UnknownClientError",
|
|
21
|
+
"DEFAULT_METRIC_CODES",
|
|
22
|
+
]
|
|
23
|
+
__version__ = "0.1.0"
|