agentbasis 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentbasis/__init__.py +87 -0
- agentbasis/client.py +134 -0
- agentbasis/config.py +33 -0
- agentbasis/context.py +259 -0
- agentbasis/decorators.py +80 -0
- agentbasis/frameworks/langchain/__init__.py +109 -0
- agentbasis/frameworks/langchain/callback.py +373 -0
- agentbasis/frameworks/pydanticai/__init__.py +32 -0
- agentbasis/frameworks/pydanticai/instrumentation.py +233 -0
- agentbasis/llms/anthropic/__init__.py +18 -0
- agentbasis/llms/anthropic/messages.py +298 -0
- agentbasis/llms/gemini/__init__.py +18 -0
- agentbasis/llms/gemini/chat.py +326 -0
- agentbasis/llms/openai/__init__.py +18 -0
- agentbasis/llms/openai/chat.py +235 -0
- agentbasis-0.1.0.dist-info/METADATA +220 -0
- agentbasis-0.1.0.dist-info/RECORD +19 -0
- agentbasis-0.1.0.dist-info/WHEEL +5 -0
- agentbasis-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
from typing import Any, Generator, AsyncGenerator
|
|
2
|
+
import functools
|
|
3
|
+
import time
|
|
4
|
+
from opentelemetry import trace
|
|
5
|
+
from opentelemetry.trace import Status, StatusCode, Span
|
|
6
|
+
from agentbasis.context import inject_context_to_span
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _get_tracer():
|
|
10
|
+
"""
|
|
11
|
+
Get the tracer lazily at runtime.
|
|
12
|
+
This ensures the tracer is retrieved after agentbasis.init() has configured the provider.
|
|
13
|
+
"""
|
|
14
|
+
return trace.get_tracer("agentbasis.llms.anthropic")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _set_request_attributes(span: Span, model: str, messages: list, is_streaming: bool = False):
|
|
18
|
+
"""
|
|
19
|
+
Set common request attributes on a span.
|
|
20
|
+
"""
|
|
21
|
+
# Inject user/session context
|
|
22
|
+
inject_context_to_span(span)
|
|
23
|
+
|
|
24
|
+
span.set_attribute("llm.system", "anthropic")
|
|
25
|
+
span.set_attribute("llm.request.model", model)
|
|
26
|
+
span.set_attribute("llm.request.messages", str(messages))
|
|
27
|
+
if is_streaming:
|
|
28
|
+
span.set_attribute("llm.request.streaming", True)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _set_response_attributes(span: Span, response):
|
|
32
|
+
"""
|
|
33
|
+
Set common response attributes on a span (for non-streaming responses).
|
|
34
|
+
|
|
35
|
+
Anthropic response structure:
|
|
36
|
+
- response.content: list of content blocks (e.g., [{"type": "text", "text": "..."}])
|
|
37
|
+
- response.usage.input_tokens
|
|
38
|
+
- response.usage.output_tokens
|
|
39
|
+
- response.model
|
|
40
|
+
- response.stop_reason
|
|
41
|
+
"""
|
|
42
|
+
# Extract text content from response
|
|
43
|
+
if response.content:
|
|
44
|
+
text_parts = []
|
|
45
|
+
for block in response.content:
|
|
46
|
+
if hasattr(block, 'text'):
|
|
47
|
+
text_parts.append(block.text)
|
|
48
|
+
elif isinstance(block, dict) and block.get('type') == 'text':
|
|
49
|
+
text_parts.append(block.get('text', ''))
|
|
50
|
+
content = "".join(text_parts)
|
|
51
|
+
span.set_attribute("llm.response.content", content)
|
|
52
|
+
|
|
53
|
+
# Set stop reason
|
|
54
|
+
if hasattr(response, 'stop_reason') and response.stop_reason:
|
|
55
|
+
span.set_attribute("llm.response.stop_reason", response.stop_reason)
|
|
56
|
+
|
|
57
|
+
# Set token usage (Anthropic uses input_tokens/output_tokens)
|
|
58
|
+
if hasattr(response, 'usage') and response.usage:
|
|
59
|
+
input_tokens = getattr(response.usage, 'input_tokens', 0)
|
|
60
|
+
output_tokens = getattr(response.usage, 'output_tokens', 0)
|
|
61
|
+
span.set_attribute("llm.usage.prompt_tokens", input_tokens)
|
|
62
|
+
span.set_attribute("llm.usage.completion_tokens", output_tokens)
|
|
63
|
+
span.set_attribute("llm.usage.total_tokens", input_tokens + output_tokens)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _wrap_sync_stream(stream, span: Span, start_time: float) -> Generator:
|
|
67
|
+
"""
|
|
68
|
+
Wrap a synchronous Anthropic streaming response to track chunks and finalize span.
|
|
69
|
+
|
|
70
|
+
Anthropic streaming events:
|
|
71
|
+
- message_start: Contains initial message info
|
|
72
|
+
- content_block_start: Start of a content block
|
|
73
|
+
- content_block_delta: Text delta with 'delta.text'
|
|
74
|
+
- content_block_stop: End of content block
|
|
75
|
+
- message_delta: Final message info with usage stats
|
|
76
|
+
- message_stop: Stream complete
|
|
77
|
+
"""
|
|
78
|
+
content_parts = []
|
|
79
|
+
chunk_count = 0
|
|
80
|
+
first_token_time = None
|
|
81
|
+
input_tokens = 0
|
|
82
|
+
output_tokens = 0
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
for event in stream:
|
|
86
|
+
chunk_count += 1
|
|
87
|
+
|
|
88
|
+
# Track time to first content
|
|
89
|
+
if first_token_time is None and hasattr(event, 'type'):
|
|
90
|
+
if event.type == 'content_block_delta':
|
|
91
|
+
first_token_time = time.time()
|
|
92
|
+
span.set_attribute("llm.response.first_token_ms",
|
|
93
|
+
int((first_token_time - start_time) * 1000))
|
|
94
|
+
|
|
95
|
+
# Extract content from delta events
|
|
96
|
+
if hasattr(event, 'type'):
|
|
97
|
+
if event.type == 'content_block_delta':
|
|
98
|
+
if hasattr(event, 'delta') and hasattr(event.delta, 'text'):
|
|
99
|
+
content_parts.append(event.delta.text)
|
|
100
|
+
|
|
101
|
+
# Capture usage from message_start or message_delta
|
|
102
|
+
elif event.type == 'message_start':
|
|
103
|
+
if hasattr(event, 'message') and hasattr(event.message, 'usage'):
|
|
104
|
+
input_tokens = getattr(event.message.usage, 'input_tokens', 0)
|
|
105
|
+
|
|
106
|
+
elif event.type == 'message_delta':
|
|
107
|
+
if hasattr(event, 'usage'):
|
|
108
|
+
output_tokens = getattr(event.usage, 'output_tokens', 0)
|
|
109
|
+
|
|
110
|
+
yield event
|
|
111
|
+
|
|
112
|
+
# Stream complete - finalize span
|
|
113
|
+
full_content = "".join(content_parts)
|
|
114
|
+
span.set_attribute("llm.response.content", full_content)
|
|
115
|
+
span.set_attribute("llm.response.chunk_count", chunk_count)
|
|
116
|
+
|
|
117
|
+
if input_tokens or output_tokens:
|
|
118
|
+
span.set_attribute("llm.usage.prompt_tokens", input_tokens)
|
|
119
|
+
span.set_attribute("llm.usage.completion_tokens", output_tokens)
|
|
120
|
+
span.set_attribute("llm.usage.total_tokens", input_tokens + output_tokens)
|
|
121
|
+
|
|
122
|
+
span.set_status(Status(StatusCode.OK))
|
|
123
|
+
|
|
124
|
+
except Exception as e:
|
|
125
|
+
span.record_exception(e)
|
|
126
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
127
|
+
raise
|
|
128
|
+
finally:
|
|
129
|
+
span.end()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
async def _wrap_async_stream(stream, span: Span, start_time: float) -> AsyncGenerator:
|
|
133
|
+
"""
|
|
134
|
+
Wrap an asynchronous Anthropic streaming response to track chunks and finalize span.
|
|
135
|
+
"""
|
|
136
|
+
content_parts = []
|
|
137
|
+
chunk_count = 0
|
|
138
|
+
first_token_time = None
|
|
139
|
+
input_tokens = 0
|
|
140
|
+
output_tokens = 0
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
async for event in stream:
|
|
144
|
+
chunk_count += 1
|
|
145
|
+
|
|
146
|
+
# Track time to first content
|
|
147
|
+
if first_token_time is None and hasattr(event, 'type'):
|
|
148
|
+
if event.type == 'content_block_delta':
|
|
149
|
+
first_token_time = time.time()
|
|
150
|
+
span.set_attribute("llm.response.first_token_ms",
|
|
151
|
+
int((first_token_time - start_time) * 1000))
|
|
152
|
+
|
|
153
|
+
# Extract content from delta events
|
|
154
|
+
if hasattr(event, 'type'):
|
|
155
|
+
if event.type == 'content_block_delta':
|
|
156
|
+
if hasattr(event, 'delta') and hasattr(event.delta, 'text'):
|
|
157
|
+
content_parts.append(event.delta.text)
|
|
158
|
+
|
|
159
|
+
elif event.type == 'message_start':
|
|
160
|
+
if hasattr(event, 'message') and hasattr(event.message, 'usage'):
|
|
161
|
+
input_tokens = getattr(event.message.usage, 'input_tokens', 0)
|
|
162
|
+
|
|
163
|
+
elif event.type == 'message_delta':
|
|
164
|
+
if hasattr(event, 'usage'):
|
|
165
|
+
output_tokens = getattr(event.usage, 'output_tokens', 0)
|
|
166
|
+
|
|
167
|
+
yield event
|
|
168
|
+
|
|
169
|
+
# Stream complete - finalize span
|
|
170
|
+
full_content = "".join(content_parts)
|
|
171
|
+
span.set_attribute("llm.response.content", full_content)
|
|
172
|
+
span.set_attribute("llm.response.chunk_count", chunk_count)
|
|
173
|
+
|
|
174
|
+
if input_tokens or output_tokens:
|
|
175
|
+
span.set_attribute("llm.usage.prompt_tokens", input_tokens)
|
|
176
|
+
span.set_attribute("llm.usage.completion_tokens", output_tokens)
|
|
177
|
+
span.set_attribute("llm.usage.total_tokens", input_tokens + output_tokens)
|
|
178
|
+
|
|
179
|
+
span.set_status(Status(StatusCode.OK))
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
span.record_exception(e)
|
|
183
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
184
|
+
raise
|
|
185
|
+
finally:
|
|
186
|
+
span.end()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def instrument_messages(anthropic_module: Any):
|
|
190
|
+
"""
|
|
191
|
+
Instruments the synchronous Anthropic Messages API with OpenTelemetry.
|
|
192
|
+
Handles both regular and streaming responses.
|
|
193
|
+
"""
|
|
194
|
+
try:
|
|
195
|
+
from anthropic.resources.messages import Messages
|
|
196
|
+
except ImportError:
|
|
197
|
+
return
|
|
198
|
+
|
|
199
|
+
original_create = Messages.create
|
|
200
|
+
|
|
201
|
+
@functools.wraps(original_create)
|
|
202
|
+
def wrapped_create(self, *args, **kwargs):
|
|
203
|
+
tracer = _get_tracer()
|
|
204
|
+
model = kwargs.get("model", "unknown")
|
|
205
|
+
messages = kwargs.get("messages", [])
|
|
206
|
+
is_streaming = kwargs.get("stream", False)
|
|
207
|
+
|
|
208
|
+
span_name = f"anthropic.messages.create {model}"
|
|
209
|
+
|
|
210
|
+
if is_streaming:
|
|
211
|
+
# For streaming, we need to manually manage the span lifecycle
|
|
212
|
+
span = tracer.start_span(span_name)
|
|
213
|
+
start_time = time.time()
|
|
214
|
+
_set_request_attributes(span, model, messages, is_streaming=True)
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
stream = original_create(self, *args, **kwargs)
|
|
218
|
+
# Return wrapped generator that will finalize span when exhausted
|
|
219
|
+
return _wrap_sync_stream(stream, span, start_time)
|
|
220
|
+
except Exception as e:
|
|
221
|
+
span.record_exception(e)
|
|
222
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
223
|
+
span.end()
|
|
224
|
+
raise
|
|
225
|
+
else:
|
|
226
|
+
# Non-streaming: use context manager
|
|
227
|
+
with tracer.start_as_current_span(span_name) as span:
|
|
228
|
+
_set_request_attributes(span, model, messages)
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
response = original_create(self, *args, **kwargs)
|
|
232
|
+
_set_response_attributes(span, response)
|
|
233
|
+
span.set_status(Status(StatusCode.OK))
|
|
234
|
+
return response
|
|
235
|
+
|
|
236
|
+
except Exception as e:
|
|
237
|
+
span.record_exception(e)
|
|
238
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
239
|
+
raise
|
|
240
|
+
|
|
241
|
+
Messages.create = wrapped_create
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def instrument_async_messages(anthropic_module: Any):
|
|
245
|
+
"""
|
|
246
|
+
Instruments the asynchronous Anthropic Messages API with OpenTelemetry.
|
|
247
|
+
Handles both regular and streaming responses.
|
|
248
|
+
"""
|
|
249
|
+
try:
|
|
250
|
+
from anthropic.resources.messages import AsyncMessages
|
|
251
|
+
except ImportError:
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
original_async_create = AsyncMessages.create
|
|
255
|
+
|
|
256
|
+
@functools.wraps(original_async_create)
|
|
257
|
+
async def wrapped_async_create(self, *args, **kwargs):
|
|
258
|
+
tracer = _get_tracer()
|
|
259
|
+
model = kwargs.get("model", "unknown")
|
|
260
|
+
messages = kwargs.get("messages", [])
|
|
261
|
+
is_streaming = kwargs.get("stream", False)
|
|
262
|
+
|
|
263
|
+
span_name = f"anthropic.messages.create {model}"
|
|
264
|
+
|
|
265
|
+
if is_streaming:
|
|
266
|
+
# For streaming, we need to manually manage the span lifecycle
|
|
267
|
+
span = tracer.start_span(span_name)
|
|
268
|
+
start_time = time.time()
|
|
269
|
+
span.set_attribute("llm.request.async", True)
|
|
270
|
+
_set_request_attributes(span, model, messages, is_streaming=True)
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
stream = await original_async_create(self, *args, **kwargs)
|
|
274
|
+
# Return wrapped async generator that will finalize span when exhausted
|
|
275
|
+
return _wrap_async_stream(stream, span, start_time)
|
|
276
|
+
except Exception as e:
|
|
277
|
+
span.record_exception(e)
|
|
278
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
279
|
+
span.end()
|
|
280
|
+
raise
|
|
281
|
+
else:
|
|
282
|
+
# Non-streaming: use context manager
|
|
283
|
+
with tracer.start_as_current_span(span_name) as span:
|
|
284
|
+
span.set_attribute("llm.request.async", True)
|
|
285
|
+
_set_request_attributes(span, model, messages)
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
response = await original_async_create(self, *args, **kwargs)
|
|
289
|
+
_set_response_attributes(span, response)
|
|
290
|
+
span.set_status(Status(StatusCode.OK))
|
|
291
|
+
return response
|
|
292
|
+
|
|
293
|
+
except Exception as e:
|
|
294
|
+
span.record_exception(e)
|
|
295
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
296
|
+
raise
|
|
297
|
+
|
|
298
|
+
AsyncMessages.create = wrapped_async_create
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from .chat import instrument_chat, instrument_async_chat
|
|
2
|
+
|
|
3
|
+
def instrument():
|
|
4
|
+
"""
|
|
5
|
+
Auto-instruments the Google Gemini SDK (both sync and async).
|
|
6
|
+
Call this function after `agentbasis.init()` and before using `google.generativeai`.
|
|
7
|
+
|
|
8
|
+
This instruments:
|
|
9
|
+
- GenerativeModel.generate_content() (sync)
|
|
10
|
+
- GenerativeModel.generate_content_async() (async)
|
|
11
|
+
"""
|
|
12
|
+
try:
|
|
13
|
+
import google.generativeai as genai
|
|
14
|
+
instrument_chat(genai)
|
|
15
|
+
instrument_async_chat(genai)
|
|
16
|
+
except ImportError:
|
|
17
|
+
# If google.generativeai is not installed, we simply do nothing or could log a warning
|
|
18
|
+
pass
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
from typing import Any, Generator, AsyncGenerator
|
|
2
|
+
import functools
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from opentelemetry import trace
|
|
6
|
+
from opentelemetry.trace import Status, StatusCode, Span
|
|
7
|
+
|
|
8
|
+
from agentbasis.context import inject_context_to_span
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _get_tracer():
|
|
12
|
+
"""
|
|
13
|
+
Get the tracer lazily at runtime.
|
|
14
|
+
This ensures the tracer is retrieved after agentbasis.init() has configured the provider.
|
|
15
|
+
"""
|
|
16
|
+
return trace.get_tracer("agentbasis.llms.gemini")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _set_request_attributes(span: Span, model: str, contents: Any, tools: list, is_streaming: bool = False):
|
|
20
|
+
"""
|
|
21
|
+
Set common request attributes on a span.
|
|
22
|
+
"""
|
|
23
|
+
# Inject user/session context
|
|
24
|
+
inject_context_to_span(span)
|
|
25
|
+
|
|
26
|
+
span.set_attribute("llm.system", "gemini")
|
|
27
|
+
span.set_attribute("llm.request.model", model)
|
|
28
|
+
span.set_attribute("llm.request.messages", str(contents))
|
|
29
|
+
|
|
30
|
+
if is_streaming:
|
|
31
|
+
span.set_attribute("llm.request.streaming", True)
|
|
32
|
+
|
|
33
|
+
# Track tools if provided to agent
|
|
34
|
+
if tools:
|
|
35
|
+
try:
|
|
36
|
+
tools_str = json.dumps(tools) if isinstance(tools, (list, dict)) else str(tools)
|
|
37
|
+
except (TypeError, ValueError):
|
|
38
|
+
tools_str = str(tools)
|
|
39
|
+
span.set_attribute("llm.request.tools", tools_str)
|
|
40
|
+
if isinstance(tools, list):
|
|
41
|
+
span.set_attribute("llm.request.tool_count", len(tools))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _set_response_attributes(span: Span, response):
|
|
45
|
+
"""
|
|
46
|
+
Set common response attributes on a span (for non-streaming responses).
|
|
47
|
+
"""
|
|
48
|
+
# Record text response
|
|
49
|
+
if hasattr(response, 'text') and response.text:
|
|
50
|
+
span.set_attribute("llm.response.content", str(response.text))
|
|
51
|
+
|
|
52
|
+
# Track function/tool calls if used in the agent
|
|
53
|
+
if hasattr(response, 'candidates') and response.candidates:
|
|
54
|
+
candidate = response.candidates[0]
|
|
55
|
+
if hasattr(candidate, 'function_calls') and candidate.function_calls:
|
|
56
|
+
function_calls_data = []
|
|
57
|
+
for func_call in candidate.function_calls:
|
|
58
|
+
func_data = {
|
|
59
|
+
'name': getattr(func_call, 'name', None),
|
|
60
|
+
}
|
|
61
|
+
# Handle arguments
|
|
62
|
+
arguments = getattr(func_call, 'arguments', None)
|
|
63
|
+
if arguments is not None:
|
|
64
|
+
if isinstance(arguments, (dict, list)):
|
|
65
|
+
try:
|
|
66
|
+
func_data['arguments'] = json.dumps(arguments)
|
|
67
|
+
except (TypeError, ValueError):
|
|
68
|
+
func_data['arguments'] = str(arguments)
|
|
69
|
+
else:
|
|
70
|
+
func_data['arguments'] = str(arguments)
|
|
71
|
+
else:
|
|
72
|
+
func_data['arguments'] = None
|
|
73
|
+
|
|
74
|
+
if hasattr(func_call, 'response') and func_call.response:
|
|
75
|
+
func_data['response'] = str(func_call.response)
|
|
76
|
+
if hasattr(func_call, 'error') and func_call.error:
|
|
77
|
+
func_data['error'] = str(func_call.error)
|
|
78
|
+
function_calls_data.append(func_data)
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
function_calls_str = json.dumps(function_calls_data)
|
|
82
|
+
except (TypeError, ValueError):
|
|
83
|
+
function_calls_str = str(function_calls_data)
|
|
84
|
+
span.set_attribute("llm.response.function_calls", function_calls_str)
|
|
85
|
+
span.set_attribute("llm.response.function_call_count", len(candidate.function_calls))
|
|
86
|
+
|
|
87
|
+
# Track token usage
|
|
88
|
+
if hasattr(response, 'usage_metadata') and response.usage_metadata:
|
|
89
|
+
usage = response.usage_metadata
|
|
90
|
+
if hasattr(usage, 'prompt_token_count'):
|
|
91
|
+
span.set_attribute("llm.usage.prompt_tokens", usage.prompt_token_count)
|
|
92
|
+
if hasattr(usage, 'candidates_token_count'):
|
|
93
|
+
span.set_attribute("llm.usage.completion_tokens", usage.candidates_token_count)
|
|
94
|
+
if hasattr(usage, 'total_token_count'):
|
|
95
|
+
span.set_attribute("llm.usage.total_tokens", usage.total_token_count)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _wrap_sync_stream(stream, span: Span, start_time: float) -> Generator:
|
|
99
|
+
"""
|
|
100
|
+
Wrap a synchronous streaming response to track chunks and finalize span.
|
|
101
|
+
"""
|
|
102
|
+
content_parts = []
|
|
103
|
+
chunk_count = 0
|
|
104
|
+
first_token_time = None
|
|
105
|
+
last_chunk = None
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
for chunk in stream:
|
|
109
|
+
chunk_count += 1
|
|
110
|
+
last_chunk = chunk
|
|
111
|
+
|
|
112
|
+
# Track time to first token
|
|
113
|
+
if first_token_time is None:
|
|
114
|
+
first_token_time = time.time()
|
|
115
|
+
span.set_attribute("llm.response.first_token_ms",
|
|
116
|
+
int((first_token_time - start_time) * 1000))
|
|
117
|
+
|
|
118
|
+
# Extract content from chunk (Gemini chunks have .text property)
|
|
119
|
+
if hasattr(chunk, 'text') and chunk.text:
|
|
120
|
+
content_parts.append(chunk.text)
|
|
121
|
+
|
|
122
|
+
yield chunk
|
|
123
|
+
|
|
124
|
+
# Stream complete - finalize span
|
|
125
|
+
full_content = "".join(content_parts)
|
|
126
|
+
span.set_attribute("llm.response.content", full_content)
|
|
127
|
+
span.set_attribute("llm.response.chunk_count", chunk_count)
|
|
128
|
+
|
|
129
|
+
# Try to get final token counts from last chunk
|
|
130
|
+
if last_chunk and hasattr(last_chunk, 'usage_metadata') and last_chunk.usage_metadata:
|
|
131
|
+
usage = last_chunk.usage_metadata
|
|
132
|
+
if hasattr(usage, 'prompt_token_count'):
|
|
133
|
+
span.set_attribute("llm.usage.prompt_tokens", usage.prompt_token_count)
|
|
134
|
+
if hasattr(usage, 'candidates_token_count'):
|
|
135
|
+
span.set_attribute("llm.usage.completion_tokens", usage.candidates_token_count)
|
|
136
|
+
if hasattr(usage, 'total_token_count'):
|
|
137
|
+
span.set_attribute("llm.usage.total_tokens", usage.total_token_count)
|
|
138
|
+
|
|
139
|
+
span.set_status(Status(StatusCode.OK))
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
span.record_exception(e)
|
|
143
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
144
|
+
raise
|
|
145
|
+
finally:
|
|
146
|
+
span.end()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
async def _wrap_async_stream(stream, span: Span, start_time: float) -> AsyncGenerator:
|
|
150
|
+
"""
|
|
151
|
+
Wrap an asynchronous streaming response to track chunks and finalize span.
|
|
152
|
+
"""
|
|
153
|
+
content_parts = []
|
|
154
|
+
chunk_count = 0
|
|
155
|
+
first_token_time = None
|
|
156
|
+
last_chunk = None
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
async for chunk in stream:
|
|
160
|
+
chunk_count += 1
|
|
161
|
+
last_chunk = chunk
|
|
162
|
+
|
|
163
|
+
# Track time to first token
|
|
164
|
+
if first_token_time is None:
|
|
165
|
+
first_token_time = time.time()
|
|
166
|
+
span.set_attribute("llm.response.first_token_ms",
|
|
167
|
+
int((first_token_time - start_time) * 1000))
|
|
168
|
+
|
|
169
|
+
# Extract content from chunk
|
|
170
|
+
if hasattr(chunk, 'text') and chunk.text:
|
|
171
|
+
content_parts.append(chunk.text)
|
|
172
|
+
|
|
173
|
+
yield chunk
|
|
174
|
+
|
|
175
|
+
# Stream complete - finalize span
|
|
176
|
+
full_content = "".join(content_parts)
|
|
177
|
+
span.set_attribute("llm.response.content", full_content)
|
|
178
|
+
span.set_attribute("llm.response.chunk_count", chunk_count)
|
|
179
|
+
|
|
180
|
+
# Try to get final token counts from last chunk
|
|
181
|
+
if last_chunk and hasattr(last_chunk, 'usage_metadata') and last_chunk.usage_metadata:
|
|
182
|
+
usage = last_chunk.usage_metadata
|
|
183
|
+
if hasattr(usage, 'prompt_token_count'):
|
|
184
|
+
span.set_attribute("llm.usage.prompt_tokens", usage.prompt_token_count)
|
|
185
|
+
if hasattr(usage, 'candidates_token_count'):
|
|
186
|
+
span.set_attribute("llm.usage.completion_tokens", usage.candidates_token_count)
|
|
187
|
+
if hasattr(usage, 'total_token_count'):
|
|
188
|
+
span.set_attribute("llm.usage.total_tokens", usage.total_token_count)
|
|
189
|
+
|
|
190
|
+
span.set_status(Status(StatusCode.OK))
|
|
191
|
+
|
|
192
|
+
except Exception as e:
|
|
193
|
+
span.record_exception(e)
|
|
194
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
195
|
+
raise
|
|
196
|
+
finally:
|
|
197
|
+
span.end()
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _instrument_grpc():
|
|
201
|
+
"""
|
|
202
|
+
Optionally instrument gRPC client (Gemini uses gRPC under the hood).
|
|
203
|
+
"""
|
|
204
|
+
try:
|
|
205
|
+
from opentelemetry.instrumentation.grpc import GrpcInstrumentorClient
|
|
206
|
+
grpc_instrumentor = GrpcInstrumentorClient()
|
|
207
|
+
grpc_instrumentor.instrument()
|
|
208
|
+
except Exception:
|
|
209
|
+
# gRPC instrumentation is optional, don't fail if it doesn't work
|
|
210
|
+
pass
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def instrument_chat(gemini_module: Any):
|
|
214
|
+
"""
|
|
215
|
+
Instruments the synchronous Google Gemini Chat API with OpenTelemetry.
|
|
216
|
+
Handles both regular and streaming responses.
|
|
217
|
+
"""
|
|
218
|
+
_instrument_grpc()
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
from google.generativeai import GenerativeModel
|
|
222
|
+
except ImportError:
|
|
223
|
+
return
|
|
224
|
+
|
|
225
|
+
original_generate_content = GenerativeModel.generate_content
|
|
226
|
+
|
|
227
|
+
@functools.wraps(original_generate_content)
|
|
228
|
+
def wrapped_generate_content(self, *args, **kwargs):
|
|
229
|
+
tracer = _get_tracer()
|
|
230
|
+
model = getattr(self, "model_name", "unknown")
|
|
231
|
+
contents = args[0] if args else kwargs.get('contents', [])
|
|
232
|
+
tools = kwargs.get('tools', [])
|
|
233
|
+
is_streaming = kwargs.get('stream', False)
|
|
234
|
+
|
|
235
|
+
span_name = f"gemini.generate_content {model}"
|
|
236
|
+
|
|
237
|
+
if is_streaming:
|
|
238
|
+
# For streaming, manually manage span lifecycle
|
|
239
|
+
span = tracer.start_span(span_name)
|
|
240
|
+
start_time = time.time()
|
|
241
|
+
_set_request_attributes(span, model, contents, tools, is_streaming=True)
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
stream = original_generate_content(self, *args, **kwargs)
|
|
245
|
+
return _wrap_sync_stream(stream, span, start_time)
|
|
246
|
+
except Exception as e:
|
|
247
|
+
span.record_exception(e)
|
|
248
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
249
|
+
span.end()
|
|
250
|
+
raise
|
|
251
|
+
else:
|
|
252
|
+
# Non-streaming: use context manager
|
|
253
|
+
with tracer.start_as_current_span(span_name) as span:
|
|
254
|
+
_set_request_attributes(span, model, contents, tools)
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
response = original_generate_content(self, *args, **kwargs)
|
|
258
|
+
_set_response_attributes(span, response)
|
|
259
|
+
span.set_status(Status(StatusCode.OK))
|
|
260
|
+
return response
|
|
261
|
+
|
|
262
|
+
except Exception as e:
|
|
263
|
+
span.record_exception(e)
|
|
264
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
265
|
+
raise
|
|
266
|
+
|
|
267
|
+
GenerativeModel.generate_content = wrapped_generate_content
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def instrument_async_chat(gemini_module: Any):
|
|
271
|
+
"""
|
|
272
|
+
Instruments the asynchronous Google Gemini Chat API with OpenTelemetry.
|
|
273
|
+
Handles both regular and streaming responses.
|
|
274
|
+
"""
|
|
275
|
+
_instrument_grpc()
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
from google.generativeai import GenerativeModel
|
|
279
|
+
except ImportError:
|
|
280
|
+
return
|
|
281
|
+
|
|
282
|
+
original_generate_content_async = GenerativeModel.generate_content_async
|
|
283
|
+
|
|
284
|
+
@functools.wraps(original_generate_content_async)
|
|
285
|
+
async def wrapped_generate_content_async(self, *args, **kwargs):
|
|
286
|
+
tracer = _get_tracer()
|
|
287
|
+
model = getattr(self, "model_name", "unknown")
|
|
288
|
+
contents = args[0] if args else kwargs.get('contents', [])
|
|
289
|
+
tools = kwargs.get('tools', [])
|
|
290
|
+
is_streaming = kwargs.get('stream', False)
|
|
291
|
+
|
|
292
|
+
span_name = f"gemini.generate_content {model}"
|
|
293
|
+
|
|
294
|
+
if is_streaming:
|
|
295
|
+
# For streaming, manually manage span lifecycle
|
|
296
|
+
span = tracer.start_span(span_name)
|
|
297
|
+
start_time = time.time()
|
|
298
|
+
span.set_attribute("llm.request.async", True)
|
|
299
|
+
_set_request_attributes(span, model, contents, tools, is_streaming=True)
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
stream = await original_generate_content_async(self, *args, **kwargs)
|
|
303
|
+
return _wrap_async_stream(stream, span, start_time)
|
|
304
|
+
except Exception as e:
|
|
305
|
+
span.record_exception(e)
|
|
306
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
307
|
+
span.end()
|
|
308
|
+
raise
|
|
309
|
+
else:
|
|
310
|
+
# Non-streaming: use context manager
|
|
311
|
+
with tracer.start_as_current_span(span_name) as span:
|
|
312
|
+
span.set_attribute("llm.request.async", True)
|
|
313
|
+
_set_request_attributes(span, model, contents, tools)
|
|
314
|
+
|
|
315
|
+
try:
|
|
316
|
+
response = await original_generate_content_async(self, *args, **kwargs)
|
|
317
|
+
_set_response_attributes(span, response)
|
|
318
|
+
span.set_status(Status(StatusCode.OK))
|
|
319
|
+
return response
|
|
320
|
+
|
|
321
|
+
except Exception as e:
|
|
322
|
+
span.record_exception(e)
|
|
323
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
324
|
+
raise
|
|
325
|
+
|
|
326
|
+
GenerativeModel.generate_content_async = wrapped_generate_content_async
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from .chat import instrument_chat, instrument_async_chat
|
|
2
|
+
|
|
3
|
+
def instrument():
|
|
4
|
+
"""
|
|
5
|
+
Auto-instruments the OpenAI SDK (both sync and async).
|
|
6
|
+
Call this function after `agentbasis.init()` and before using `openai`.
|
|
7
|
+
|
|
8
|
+
This instruments:
|
|
9
|
+
- OpenAI().chat.completions.create() (sync)
|
|
10
|
+
- AsyncOpenAI().chat.completions.create() (async)
|
|
11
|
+
"""
|
|
12
|
+
try:
|
|
13
|
+
import openai
|
|
14
|
+
instrument_chat(openai)
|
|
15
|
+
instrument_async_chat(openai)
|
|
16
|
+
except ImportError:
|
|
17
|
+
# If openai is not installed, we simply do nothing or could log a warning
|
|
18
|
+
pass
|