avenza 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avenza-1.0.0/PKG-INFO +143 -0
- avenza-1.0.0/README.md +110 -0
- avenza-1.0.0/avenza/__init__.py +30 -0
- avenza-1.0.0/avenza/_buffer.py +52 -0
- avenza-1.0.0/avenza/_context.py +30 -0
- avenza-1.0.0/avenza/_instrument/__init__.py +17 -0
- avenza-1.0.0/avenza/_instrument/anthropic_patch.py +77 -0
- avenza-1.0.0/avenza/_instrument/gemini_patch.py +76 -0
- avenza-1.0.0/avenza/_instrument/openai_patch.py +77 -0
- avenza-1.0.0/avenza/agent.py +180 -0
- avenza-1.0.0/avenza/cli.py +189 -0
- avenza-1.0.0/avenza/client.py +123 -0
- avenza-1.0.0/avenza/exceptions.py +17 -0
- avenza-1.0.0/avenza/integrations/__init__.py +1 -0
- avenza-1.0.0/avenza/integrations/crewai.py +13 -0
- avenza-1.0.0/avenza/integrations/langchain.py +100 -0
- avenza-1.0.0/avenza/py.typed +0 -0
- avenza-1.0.0/avenza/run.py +207 -0
- avenza-1.0.0/avenza/testing.py +122 -0
- avenza-1.0.0/avenza.egg-info/PKG-INFO +143 -0
- avenza-1.0.0/avenza.egg-info/SOURCES.txt +29 -0
- avenza-1.0.0/avenza.egg-info/dependency_links.txt +1 -0
- avenza-1.0.0/avenza.egg-info/entry_points.txt +2 -0
- avenza-1.0.0/avenza.egg-info/requires.txt +12 -0
- avenza-1.0.0/avenza.egg-info/top_level.txt +1 -0
- avenza-1.0.0/pyproject.toml +63 -0
- avenza-1.0.0/setup.cfg +4 -0
- avenza-1.0.0/tests/test_buffer.py +81 -0
- avenza-1.0.0/tests/test_client.py +84 -0
- avenza-1.0.0/tests/test_instrumentation.py +126 -0
- avenza-1.0.0/tests/test_run.py +113 -0
avenza-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: avenza
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Instrument AI agents in one line. Cost, value, and SLO tracking, automatically.
|
|
5
|
+
Author-email: Finacc Support and Solutions <noreply@avenza.app>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://avenza.app
|
|
8
|
+
Project-URL: Documentation, https://avenza.app/docs
|
|
9
|
+
Project-URL: Repository, https://github.com/bnyamesa/avenza
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/bnyamesa/avenza/issues
|
|
11
|
+
Keywords: ai,agents,llm,observability,cost-tracking,slo
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: requests>=2.28
|
|
24
|
+
Provides-Extra: langchain
|
|
25
|
+
Requires-Dist: langchain-core>=0.1; extra == "langchain"
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
29
|
+
Requires-Dist: responses>=0.25; extra == "dev"
|
|
30
|
+
Requires-Dist: anthropic>=0.25; extra == "dev"
|
|
31
|
+
Requires-Dist: openai>=1.0; extra == "dev"
|
|
32
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
33
|
+
|
|
34
|
+
# Avenza Python SDK
|
|
35
|
+
|
|
36
|
+
Instrument AI agents in one line. Cost, value, and SLO tracking — automatically.
|
|
37
|
+
|
|
38
|
+
## Install
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install avenza
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quickstart (3 lines)
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from avenza import Agent
|
|
48
|
+
|
|
49
|
+
agent = Agent(name='Invoice Bot', risk_tier='T2')
|
|
50
|
+
|
|
51
|
+
with agent.run() as run:
|
|
52
|
+
result = process_invoice(data)
|
|
53
|
+
run.success = result.is_valid
|
|
54
|
+
run.log_value('task_completed', quantity=1, unit_value_usd=1.50)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
That's it. If you're using the official Anthropic, OpenAI, or Gemini client, token usage is captured automatically with zero additional code.
|
|
58
|
+
|
|
59
|
+
## Setup wizard
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
avenza init
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Interactive wizard that verifies your API key and writes a working starter script — not a docs page to interpret.
|
|
66
|
+
|
|
67
|
+
## Diagnose
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
avenza doctor
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Self-diagnose connection issues, missing instrumentation, and proxy configuration.
|
|
74
|
+
|
|
75
|
+
## Auto-instrumentation
|
|
76
|
+
|
|
77
|
+
The SDK patches the official provider clients the moment it's imported:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
import anthropic
|
|
81
|
+
from avenza import Agent
|
|
82
|
+
|
|
83
|
+
agent = Agent(name='Support Bot')
|
|
84
|
+
client = anthropic.Anthropic()
|
|
85
|
+
|
|
86
|
+
with agent.run() as run:
|
|
87
|
+
# Token usage captured automatically from this call
|
|
88
|
+
response = client.messages.create(model='claude-sonnet-4-6', ...)
|
|
89
|
+
run.success = True
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Works across threads and `async`/`await` via Python's `contextvars`.
|
|
93
|
+
|
|
94
|
+
## Manual token fallback
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
with agent.run() as run:
|
|
98
|
+
response = my_llm_client.call(...)
|
|
99
|
+
run.set_tokens(response.input_tokens, response.output_tokens)
|
|
100
|
+
run.success = True
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## LangChain
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from avenza.integrations.langchain import AvenzaCallbackHandler
|
|
107
|
+
from langchain_anthropic import ChatAnthropic
|
|
108
|
+
|
|
109
|
+
handler = AvenzaCallbackHandler(agent_name='Support Classifier', risk_tier='T1')
|
|
110
|
+
llm = ChatAnthropic(model='claude-sonnet-4-6', callbacks=[handler])
|
|
111
|
+
response = llm.invoke('Classify this ticket: ...')
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Testing
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from avenza.testing import MockAgent
|
|
118
|
+
|
|
119
|
+
def test_my_agent():
|
|
120
|
+
agent = MockAgent(name='Invoice Bot')
|
|
121
|
+
with agent.run() as run:
|
|
122
|
+
run.success = True
|
|
123
|
+
run.log_value('task_completed', quantity=1, unit_value_usd=1.50)
|
|
124
|
+
|
|
125
|
+
assert agent.runs[-1].success is True
|
|
126
|
+
assert agent.runs[-1].value_events[0]['quantity'] == 1
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Configuration
|
|
130
|
+
|
|
131
|
+
| Parameter | Default | Description |
|
|
132
|
+
|---|---|---|
|
|
133
|
+
| `name` | required | Agent display name |
|
|
134
|
+
| `risk_tier` | `'T1'` | T1 (autonomous), T2 (approve-first), T3 (assist-only) |
|
|
135
|
+
| `api_key` | `AVENZA_API_KEY` env | Bearer token from Settings → API Tokens |
|
|
136
|
+
| `model` | None | LLM model for cost lookup (auto-detected when using auto-instrumentation) |
|
|
137
|
+
| `auto_instrument` | `True` | Patch provider clients automatically |
|
|
138
|
+
| `offline_buffer` | `True` | Queue failed sends to disk and retry |
|
|
139
|
+
| `base_url` | `https://app.avenza.app` | Override for self-hosted |
|
|
140
|
+
|
|
141
|
+
## Never blocks. Never raises.
|
|
142
|
+
|
|
143
|
+
Every network call is fire-and-forget on a background thread. Avenza being down, slow, or returning errors will never crash your agent or add latency to your agent's actual work. Failed sends are buffered to `.avenza_buffer.jsonl` and retried on next startup.
|
avenza-1.0.0/README.md
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Avenza Python SDK
|
|
2
|
+
|
|
3
|
+
Instrument AI agents in one line. Cost, value, and SLO tracking — automatically.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install avenza
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quickstart (3 lines)
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from avenza import Agent
|
|
15
|
+
|
|
16
|
+
agent = Agent(name='Invoice Bot', risk_tier='T2')
|
|
17
|
+
|
|
18
|
+
with agent.run() as run:
|
|
19
|
+
result = process_invoice(data)
|
|
20
|
+
run.success = result.is_valid
|
|
21
|
+
run.log_value('task_completed', quantity=1, unit_value_usd=1.50)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
That's it. If you're using the official Anthropic, OpenAI, or Gemini client, token usage is captured automatically with zero additional code.
|
|
25
|
+
|
|
26
|
+
## Setup wizard
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
avenza init
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Interactive wizard that verifies your API key and writes a working starter script — not a docs page to interpret.
|
|
33
|
+
|
|
34
|
+
## Diagnose
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
avenza doctor
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Self-diagnose connection issues, missing instrumentation, and proxy configuration.
|
|
41
|
+
|
|
42
|
+
## Auto-instrumentation
|
|
43
|
+
|
|
44
|
+
The SDK patches the official provider clients the moment it's imported:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import anthropic
|
|
48
|
+
from avenza import Agent
|
|
49
|
+
|
|
50
|
+
agent = Agent(name='Support Bot')
|
|
51
|
+
client = anthropic.Anthropic()
|
|
52
|
+
|
|
53
|
+
with agent.run() as run:
|
|
54
|
+
# Token usage captured automatically from this call
|
|
55
|
+
response = client.messages.create(model='claude-sonnet-4-6', ...)
|
|
56
|
+
run.success = True
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Works across threads and `async`/`await` via Python's `contextvars`.
|
|
60
|
+
|
|
61
|
+
## Manual token fallback
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
with agent.run() as run:
|
|
65
|
+
response = my_llm_client.call(...)
|
|
66
|
+
run.set_tokens(response.input_tokens, response.output_tokens)
|
|
67
|
+
run.success = True
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## LangChain
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from avenza.integrations.langchain import AvenzaCallbackHandler
|
|
74
|
+
from langchain_anthropic import ChatAnthropic
|
|
75
|
+
|
|
76
|
+
handler = AvenzaCallbackHandler(agent_name='Support Classifier', risk_tier='T1')
|
|
77
|
+
llm = ChatAnthropic(model='claude-sonnet-4-6', callbacks=[handler])
|
|
78
|
+
response = llm.invoke('Classify this ticket: ...')
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Testing
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from avenza.testing import MockAgent
|
|
85
|
+
|
|
86
|
+
def test_my_agent():
|
|
87
|
+
agent = MockAgent(name='Invoice Bot')
|
|
88
|
+
with agent.run() as run:
|
|
89
|
+
run.success = True
|
|
90
|
+
run.log_value('task_completed', quantity=1, unit_value_usd=1.50)
|
|
91
|
+
|
|
92
|
+
assert agent.runs[-1].success is True
|
|
93
|
+
assert agent.runs[-1].value_events[0]['quantity'] == 1
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Configuration
|
|
97
|
+
|
|
98
|
+
| Parameter | Default | Description |
|
|
99
|
+
|---|---|---|
|
|
100
|
+
| `name` | required | Agent display name |
|
|
101
|
+
| `risk_tier` | `'T1'` | T1 (autonomous), T2 (approve-first), T3 (assist-only) |
|
|
102
|
+
| `api_key` | `AVENZA_API_KEY` env | Bearer token from Settings → API Tokens |
|
|
103
|
+
| `model` | None | LLM model for cost lookup (auto-detected when using auto-instrumentation) |
|
|
104
|
+
| `auto_instrument` | `True` | Patch provider clients automatically |
|
|
105
|
+
| `offline_buffer` | `True` | Queue failed sends to disk and retry |
|
|
106
|
+
| `base_url` | `https://app.avenza.app` | Override for self-hosted |
|
|
107
|
+
|
|
108
|
+
## Never blocks. Never raises.
|
|
109
|
+
|
|
110
|
+
Every network call is fire-and-forget on a background thread. Avenza being down, slow, or returning errors will never crash your agent or add latency to your agent's actual work. Failed sends are buffered to `.avenza_buffer.jsonl` and retried on next startup.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Avenza Python SDK
|
|
3
|
+
|
|
4
|
+
Instrument AI agents in one line. Cost, value, and SLO tracking — automatically.
|
|
5
|
+
|
|
6
|
+
Quickstart:
|
|
7
|
+
from avenza import Agent
|
|
8
|
+
|
|
9
|
+
agent = Agent(name='Invoice Bot', risk_tier='T2')
|
|
10
|
+
|
|
11
|
+
with agent.run() as run:
|
|
12
|
+
result = process(data)
|
|
13
|
+
run.success = result.ok
|
|
14
|
+
run.log_value('task_completed', quantity=1, unit_value_usd=1.50)
|
|
15
|
+
|
|
16
|
+
Auto-instrumentation is active by default — if you're using the official Anthropic,
|
|
17
|
+
OpenAI, or Gemini client, token usage is captured without any additional code.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from .agent import Agent
|
|
22
|
+
from .exceptions import AvenzaConfigError, AvenzaError
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from importlib.metadata import version as _version
|
|
26
|
+
__version__: str = _version("avenza")
|
|
27
|
+
except Exception:
|
|
28
|
+
__version__ = "dev"
|
|
29
|
+
|
|
30
|
+
__all__ = ["Agent", "AvenzaError", "AvenzaConfigError", "__version__"]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Offline buffer — persists failed sends to .avenza_buffer.jsonl and retries
|
|
3
|
+
on next SDK init. Designed for intermittent-connectivity environments.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from .client import AvenzaClient
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
_DEFAULT_PATH = ".avenza_buffer.jsonl"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class OfflineBuffer:
|
|
21
|
+
def __init__(self, path: str = _DEFAULT_PATH) -> None:
|
|
22
|
+
self._path = path
|
|
23
|
+
|
|
24
|
+
def save(self, path: str, payload: dict[str, Any]) -> None:
|
|
25
|
+
try:
|
|
26
|
+
with open(self._path, "a", encoding="utf-8") as f:
|
|
27
|
+
f.write(json.dumps({"path": path, "payload": payload}) + "\n")
|
|
28
|
+
except OSError:
|
|
29
|
+
pass # Read-only FS or permission error — drop silently
|
|
30
|
+
|
|
31
|
+
def flush(self, client: "AvenzaClient") -> None:
|
|
32
|
+
if not os.path.exists(self._path):
|
|
33
|
+
return
|
|
34
|
+
try:
|
|
35
|
+
with open(self._path, encoding="utf-8") as f:
|
|
36
|
+
lines = f.readlines()
|
|
37
|
+
os.remove(self._path)
|
|
38
|
+
except OSError:
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
flushed = 0
|
|
42
|
+
for line in lines:
|
|
43
|
+
try:
|
|
44
|
+
entry = json.loads(line.strip())
|
|
45
|
+
if entry.get("path") and entry.get("payload") is not None:
|
|
46
|
+
client.post_async(entry["path"], entry["payload"])
|
|
47
|
+
flushed += 1
|
|
48
|
+
except (json.JSONDecodeError, KeyError):
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
if flushed:
|
|
52
|
+
logger.info("avenza: flushed %d buffered run(s) from offline buffer", flushed)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
contextvars-based active-run tracking.
|
|
3
|
+
|
|
4
|
+
Each OS thread and each asyncio task gets its own isolated view of the current
|
|
5
|
+
run, so token capture is always attributed to the correct run even under
|
|
6
|
+
concurrent async tasks or multi-threaded agents.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import contextvars
|
|
11
|
+
from typing import TYPE_CHECKING, Optional
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from .run import RunContext
|
|
15
|
+
|
|
16
|
+
_current_run: contextvars.ContextVar[Optional["RunContext"]] = contextvars.ContextVar(
|
|
17
|
+
"avenza_current_run", default=None
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_current_run() -> Optional["RunContext"]:
|
|
22
|
+
return _current_run.get()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def set_current_run(run: "RunContext") -> contextvars.Token:
|
|
26
|
+
return _current_run.set(run)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def reset_current_run(token: contextvars.Token) -> None:
|
|
30
|
+
_current_run.reset(token)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Auto-instrumentation dispatcher.
|
|
3
|
+
|
|
4
|
+
patch_all() tries each provider's patch. If the provider's SDK is not
|
|
5
|
+
installed, the patch is silently skipped — no error, no warning.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def patch_all() -> None:
|
|
11
|
+
from .anthropic_patch import patch_anthropic
|
|
12
|
+
from .openai_patch import patch_openai
|
|
13
|
+
from .gemini_patch import patch_gemini
|
|
14
|
+
|
|
15
|
+
patch_anthropic()
|
|
16
|
+
patch_openai()
|
|
17
|
+
patch_gemini()
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Patches Anthropic's official Python client to capture token usage automatically.
|
|
3
|
+
Wraps both the sync and async message creation methods.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import functools
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
from .._context import get_current_run
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
_patched = False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def patch_anthropic() -> None:
|
|
18
|
+
global _patched
|
|
19
|
+
if _patched:
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
import anthropic
|
|
24
|
+
except ImportError:
|
|
25
|
+
return # SDK not installed — nothing to patch, no error
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
_patch_sync(anthropic)
|
|
29
|
+
_patch_async(anthropic)
|
|
30
|
+
_patched = True
|
|
31
|
+
logger.debug("avenza: anthropic auto-instrumentation active")
|
|
32
|
+
except Exception as exc:
|
|
33
|
+
logger.debug("avenza: anthropic patch failed (non-fatal) — %s", exc)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _patch_sync(anthropic: object) -> None:
|
|
37
|
+
messages_cls = anthropic.resources.messages.Messages # type: ignore[attr-defined]
|
|
38
|
+
original = messages_cls.create
|
|
39
|
+
|
|
40
|
+
@functools.wraps(original)
|
|
41
|
+
def wrapped(self: object, *args: object, **kwargs: object) -> object:
|
|
42
|
+
response = original(self, *args, **kwargs)
|
|
43
|
+
run = get_current_run()
|
|
44
|
+
if run is not None and hasattr(response, "usage"):
|
|
45
|
+
run._record_auto_tokens(
|
|
46
|
+
provider="anthropic",
|
|
47
|
+
model=str(kwargs.get("model", "unknown")),
|
|
48
|
+
input_tokens=getattr(response.usage, "input_tokens", 0),
|
|
49
|
+
output_tokens=getattr(response.usage, "output_tokens", 0),
|
|
50
|
+
)
|
|
51
|
+
return response
|
|
52
|
+
|
|
53
|
+
messages_cls.create = wrapped
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _patch_async(anthropic: object) -> None:
|
|
57
|
+
try:
|
|
58
|
+
async_cls = anthropic.resources.messages.AsyncMessages # type: ignore[attr-defined]
|
|
59
|
+
except AttributeError:
|
|
60
|
+
return # older version — no async client
|
|
61
|
+
|
|
62
|
+
original_async = async_cls.create
|
|
63
|
+
|
|
64
|
+
@functools.wraps(original_async)
|
|
65
|
+
async def wrapped_async(self: object, *args: object, **kwargs: object) -> object:
|
|
66
|
+
response = await original_async(self, *args, **kwargs)
|
|
67
|
+
run = get_current_run()
|
|
68
|
+
if run is not None and hasattr(response, "usage"):
|
|
69
|
+
run._record_auto_tokens(
|
|
70
|
+
provider="anthropic",
|
|
71
|
+
model=str(kwargs.get("model", "unknown")),
|
|
72
|
+
input_tokens=getattr(response.usage, "input_tokens", 0),
|
|
73
|
+
output_tokens=getattr(response.usage, "output_tokens", 0),
|
|
74
|
+
)
|
|
75
|
+
return response
|
|
76
|
+
|
|
77
|
+
async_cls.create = wrapped_async
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Patches Google's Generative AI client to capture token usage automatically.
|
|
3
|
+
Works with both google-generativeai (genai) and google-cloud-aiplatform clients.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import functools
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
from .._context import get_current_run
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
_patched = False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def patch_gemini() -> None:
|
|
18
|
+
global _patched
|
|
19
|
+
if _patched:
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
_patch_genai()
|
|
24
|
+
_patched = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
pass # Neither SDK installed — skip silently
|
|
27
|
+
except Exception as exc:
|
|
28
|
+
logger.debug("avenza: gemini patch failed (non-fatal) — %s", exc)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _patch_genai() -> None:
|
|
32
|
+
import google.generativeai as genai # type: ignore[import]
|
|
33
|
+
model_cls = genai.GenerativeModel
|
|
34
|
+
|
|
35
|
+
original_sync = model_cls.generate_content
|
|
36
|
+
|
|
37
|
+
@functools.wraps(original_sync)
|
|
38
|
+
def wrapped_sync(self: object, *args: object, **kwargs: object) -> object:
|
|
39
|
+
response = original_sync(self, *args, **kwargs)
|
|
40
|
+
run = get_current_run()
|
|
41
|
+
if run is not None:
|
|
42
|
+
_capture_gemini_usage(run, response, getattr(self, "model_name", "gemini"))
|
|
43
|
+
return response
|
|
44
|
+
|
|
45
|
+
model_cls.generate_content = wrapped_sync
|
|
46
|
+
logger.debug("avenza: gemini (google-generativeai) auto-instrumentation active")
|
|
47
|
+
|
|
48
|
+
# Async variant
|
|
49
|
+
if hasattr(model_cls, "generate_content_async"):
|
|
50
|
+
original_async = model_cls.generate_content_async
|
|
51
|
+
|
|
52
|
+
@functools.wraps(original_async)
|
|
53
|
+
async def wrapped_async(self: object, *args: object, **kwargs: object) -> object:
|
|
54
|
+
response = await original_async(self, *args, **kwargs)
|
|
55
|
+
run = get_current_run()
|
|
56
|
+
if run is not None:
|
|
57
|
+
_capture_gemini_usage(run, response, getattr(self, "model_name", "gemini"))
|
|
58
|
+
return response
|
|
59
|
+
|
|
60
|
+
model_cls.generate_content_async = wrapped_async
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _capture_gemini_usage(run: object, response: object, model_name: str) -> None:
|
|
64
|
+
# google-generativeai stores token counts in response.usage_metadata
|
|
65
|
+
usage = getattr(response, "usage_metadata", None)
|
|
66
|
+
if usage is None:
|
|
67
|
+
return
|
|
68
|
+
input_tokens = getattr(usage, "prompt_token_count", 0) or 0
|
|
69
|
+
output_tokens = getattr(usage, "candidates_token_count", 0) or 0
|
|
70
|
+
if input_tokens or output_tokens:
|
|
71
|
+
run._record_auto_tokens( # type: ignore[union-attr]
|
|
72
|
+
provider="google",
|
|
73
|
+
model=model_name,
|
|
74
|
+
input_tokens=input_tokens,
|
|
75
|
+
output_tokens=output_tokens,
|
|
76
|
+
)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Patches OpenAI's official Python client to capture token usage automatically.
|
|
3
|
+
Handles both sync (Completions.create) and async (AsyncCompletions.create).
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import functools
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
from .._context import get_current_run
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
_patched = False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def patch_openai() -> None:
|
|
18
|
+
global _patched
|
|
19
|
+
if _patched:
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
import openai
|
|
24
|
+
except ImportError:
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
_patch_sync(openai)
|
|
29
|
+
_patch_async(openai)
|
|
30
|
+
_patched = True
|
|
31
|
+
logger.debug("avenza: openai auto-instrumentation active")
|
|
32
|
+
except Exception as exc:
|
|
33
|
+
logger.debug("avenza: openai patch failed (non-fatal) — %s", exc)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _patch_sync(openai: object) -> None:
|
|
37
|
+
completions_cls = openai.resources.chat.completions.Completions # type: ignore[attr-defined]
|
|
38
|
+
original = completions_cls.create
|
|
39
|
+
|
|
40
|
+
@functools.wraps(original)
|
|
41
|
+
def wrapped(self: object, *args: object, **kwargs: object) -> object:
|
|
42
|
+
response = original(self, *args, **kwargs)
|
|
43
|
+
run = get_current_run()
|
|
44
|
+
if run is not None and hasattr(response, "usage") and response.usage is not None:
|
|
45
|
+
run._record_auto_tokens(
|
|
46
|
+
provider="openai",
|
|
47
|
+
model=str(kwargs.get("model", getattr(response, "model", "unknown"))),
|
|
48
|
+
input_tokens=getattr(response.usage, "prompt_tokens", 0),
|
|
49
|
+
output_tokens=getattr(response.usage, "completion_tokens", 0),
|
|
50
|
+
)
|
|
51
|
+
return response
|
|
52
|
+
|
|
53
|
+
completions_cls.create = wrapped
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _patch_async(openai: object) -> None:
|
|
57
|
+
try:
|
|
58
|
+
async_cls = openai.resources.chat.completions.AsyncCompletions # type: ignore[attr-defined]
|
|
59
|
+
except AttributeError:
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
original_async = async_cls.create
|
|
63
|
+
|
|
64
|
+
@functools.wraps(original_async)
|
|
65
|
+
async def wrapped_async(self: object, *args: object, **kwargs: object) -> object:
|
|
66
|
+
response = await original_async(self, *args, **kwargs)
|
|
67
|
+
run = get_current_run()
|
|
68
|
+
if run is not None and hasattr(response, "usage") and response.usage is not None:
|
|
69
|
+
run._record_auto_tokens(
|
|
70
|
+
provider="openai",
|
|
71
|
+
model=str(kwargs.get("model", getattr(response, "model", "unknown"))),
|
|
72
|
+
input_tokens=getattr(response.usage, "prompt_tokens", 0),
|
|
73
|
+
output_tokens=getattr(response.usage, "completion_tokens", 0),
|
|
74
|
+
)
|
|
75
|
+
return response
|
|
76
|
+
|
|
77
|
+
async_cls.create = wrapped_async
|