agentreplay 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentreplay/__init__.py +81 -0
- agentreplay/auto_instrument/__init__.py +237 -0
- agentreplay/auto_instrument/openai.py +431 -0
- agentreplay/batching.py +270 -0
- agentreplay/bootstrap.py +202 -0
- agentreplay/circuit_breaker.py +300 -0
- agentreplay/client.py +1560 -0
- agentreplay/config.py +215 -0
- agentreplay/context.py +168 -0
- agentreplay/env_config.py +327 -0
- agentreplay/env_init.py +128 -0
- agentreplay/exceptions.py +92 -0
- agentreplay/genai.py +510 -0
- agentreplay/genai_conventions.py +502 -0
- agentreplay/install_pth.py +159 -0
- agentreplay/langchain_tracer.py +385 -0
- agentreplay/models.py +120 -0
- agentreplay/otel_bridge.py +281 -0
- agentreplay/patch.py +308 -0
- agentreplay/propagation.py +328 -0
- agentreplay/py.typed +3 -0
- agentreplay/retry.py +151 -0
- agentreplay/sampling.py +298 -0
- agentreplay/session.py +164 -0
- agentreplay/sitecustomize.py +73 -0
- agentreplay/span.py +270 -0
- agentreplay/unified.py +465 -0
- agentreplay-0.1.2.dist-info/METADATA +285 -0
- agentreplay-0.1.2.dist-info/RECORD +33 -0
- agentreplay-0.1.2.dist-info/WHEEL +5 -0
- agentreplay-0.1.2.dist-info/entry_points.txt +2 -0
- agentreplay-0.1.2.dist-info/licenses/LICENSE +190 -0
- agentreplay-0.1.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
# Copyright 2025 Sushanth (https://github.com/sushanthpy)
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""OpenAI-specific instrumentation for streaming and tool calls.
|
|
16
|
+
|
|
17
|
+
This module provides custom wrappers for OpenAI API calls to:
|
|
18
|
+
1. Handle streaming responses (sync and async)
|
|
19
|
+
2. Capture tool calls and their results
|
|
20
|
+
3. Inject agent context into spans
|
|
21
|
+
4. Respect content capture settings
|
|
22
|
+
|
|
23
|
+
The wrappers are designed to work alongside the official OpenTelemetry
|
|
24
|
+
OpenAI instrumentation, adding Agentreplay-specific enhancements.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import logging
|
|
28
|
+
from typing import Iterator, AsyncIterator, Optional, Any, Dict, List
|
|
29
|
+
import json
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
# Configuration from environment
|
|
34
|
+
import os
|
|
35
|
+
CAPTURE_CONTENT = os.getenv("AGENTREPLAY_CAPTURE_CONTENT", "true").lower() in {
|
|
36
|
+
"1", "true", "yes"
|
|
37
|
+
}
|
|
38
|
+
MAX_CONTENT_LENGTH = int(os.getenv("AGENTREPLAY_MAX_CONTENT_LENGTH", "10000"))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def is_streaming(response: Any) -> bool:
|
|
42
|
+
"""Check if an OpenAI response is a stream.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
response: OpenAI API response
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
True if response is a stream, False otherwise
|
|
49
|
+
"""
|
|
50
|
+
# Check for stream attribute or iterator protocol
|
|
51
|
+
if hasattr(response, "__iter__") and not isinstance(response, (str, bytes, dict)):
|
|
52
|
+
return True
|
|
53
|
+
if hasattr(response, "__aiter__"):
|
|
54
|
+
return True
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class _StreamWrapper:
|
|
59
|
+
"""Wrapper for synchronous OpenAI streaming responses.
|
|
60
|
+
|
|
61
|
+
This wrapper:
|
|
62
|
+
- Yields chunks to the caller transparently
|
|
63
|
+
- Accumulates content for span attributes
|
|
64
|
+
- Handles tool calls in streaming mode
|
|
65
|
+
- Respects MAX_CONTENT_LENGTH
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
>>> stream = client.chat.completions.create(..., stream=True)
|
|
69
|
+
>>> wrapped = _StreamWrapper(stream, span)
|
|
70
|
+
>>> for chunk in wrapped:
|
|
71
|
+
... print(chunk.choices[0].delta.content)
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(self, stream: Iterator, span: Optional[Any] = None):
|
|
75
|
+
"""Initialize stream wrapper.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
stream: Original OpenAI stream
|
|
79
|
+
span: OpenTelemetry span to annotate (optional)
|
|
80
|
+
"""
|
|
81
|
+
self.stream = stream
|
|
82
|
+
self.span = span
|
|
83
|
+
self.accumulated_content = []
|
|
84
|
+
# Tool calls accumulator: Dict[int, Dict] keyed by tool call index
|
|
85
|
+
# OpenAI streams tool calls as deltas that need to be merged by index
|
|
86
|
+
self.tool_calls_by_index: Dict[int, Dict[str, Any]] = {}
|
|
87
|
+
self.total_length = 0
|
|
88
|
+
self.chunk_count = 0
|
|
89
|
+
self._capture_content = CAPTURE_CONTENT
|
|
90
|
+
|
|
91
|
+
def __iter__(self):
|
|
92
|
+
return self
|
|
93
|
+
|
|
94
|
+
def __next__(self):
|
|
95
|
+
"""Get next chunk from stream and capture metadata."""
|
|
96
|
+
try:
|
|
97
|
+
chunk = next(self.stream)
|
|
98
|
+
self.chunk_count += 1
|
|
99
|
+
|
|
100
|
+
# Extract content if available
|
|
101
|
+
if hasattr(chunk, "choices") and len(chunk.choices) > 0:
|
|
102
|
+
choice = chunk.choices[0]
|
|
103
|
+
|
|
104
|
+
# Capture text content
|
|
105
|
+
if hasattr(choice, "delta") and hasattr(choice.delta, "content"):
|
|
106
|
+
content = choice.delta.content
|
|
107
|
+
if content and self._capture_content:
|
|
108
|
+
self.accumulated_content.append(content)
|
|
109
|
+
self.total_length += len(content)
|
|
110
|
+
|
|
111
|
+
# Stop accumulating if we exceed max length
|
|
112
|
+
if self.total_length > MAX_CONTENT_LENGTH:
|
|
113
|
+
self.accumulated_content.append(
|
|
114
|
+
f"... (truncated, total {self.total_length} chars)"
|
|
115
|
+
)
|
|
116
|
+
self._capture_content = False # Disable for rest of stream
|
|
117
|
+
|
|
118
|
+
# Capture tool calls - merge deltas by index
|
|
119
|
+
if hasattr(choice.delta, "tool_calls") and choice.delta.tool_calls:
|
|
120
|
+
for tool_call in choice.delta.tool_calls:
|
|
121
|
+
# Get the index for this tool call (OpenAI sends this)
|
|
122
|
+
idx = getattr(tool_call, "index", 0)
|
|
123
|
+
|
|
124
|
+
# Initialize if first delta for this index
|
|
125
|
+
if idx not in self.tool_calls_by_index:
|
|
126
|
+
self.tool_calls_by_index[idx] = {
|
|
127
|
+
"id": None,
|
|
128
|
+
"name": None,
|
|
129
|
+
"arguments": "",
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
acc = self.tool_calls_by_index[idx]
|
|
133
|
+
|
|
134
|
+
# Merge id (usually only in first delta)
|
|
135
|
+
if hasattr(tool_call, "id") and tool_call.id:
|
|
136
|
+
acc["id"] = tool_call.id
|
|
137
|
+
|
|
138
|
+
# Merge function name and arguments
|
|
139
|
+
if hasattr(tool_call, "function"):
|
|
140
|
+
func = tool_call.function
|
|
141
|
+
if hasattr(func, "name") and func.name:
|
|
142
|
+
acc["name"] = func.name
|
|
143
|
+
if hasattr(func, "arguments") and func.arguments:
|
|
144
|
+
# Arguments come as fragments, concatenate them
|
|
145
|
+
acc["arguments"] += func.arguments
|
|
146
|
+
|
|
147
|
+
return chunk
|
|
148
|
+
|
|
149
|
+
except StopIteration:
|
|
150
|
+
# Stream ended, finalize span
|
|
151
|
+
self._finalize_span()
|
|
152
|
+
raise
|
|
153
|
+
|
|
154
|
+
def _finalize_span(self):
|
|
155
|
+
"""Add accumulated data to span when stream completes."""
|
|
156
|
+
if not self.span:
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
# Add accumulated content
|
|
161
|
+
if self.accumulated_content:
|
|
162
|
+
full_content = "".join(self.accumulated_content)
|
|
163
|
+
if self._capture_content:
|
|
164
|
+
self.span.set_attribute("llm.response.content", full_content[:MAX_CONTENT_LENGTH])
|
|
165
|
+
self.span.set_attribute("llm.response.length", self.total_length)
|
|
166
|
+
|
|
167
|
+
# Add tool calls (merged by index)
|
|
168
|
+
if self.tool_calls_by_index:
|
|
169
|
+
# Sort by index for consistent ordering
|
|
170
|
+
sorted_tool_calls = [
|
|
171
|
+
self.tool_calls_by_index[idx]
|
|
172
|
+
for idx in sorted(self.tool_calls_by_index.keys())
|
|
173
|
+
]
|
|
174
|
+
self.span.set_attribute("llm.tool_calls.count", len(sorted_tool_calls))
|
|
175
|
+
for i, tool_call in enumerate(sorted_tool_calls[:10]): # Max 10
|
|
176
|
+
if tool_call.get("name"):
|
|
177
|
+
self.span.set_attribute(f"llm.tool_call.{i}.name", tool_call["name"])
|
|
178
|
+
if tool_call.get("id"):
|
|
179
|
+
self.span.set_attribute(f"llm.tool_call.{i}.id", tool_call["id"])
|
|
180
|
+
if tool_call.get("arguments") and self._capture_content:
|
|
181
|
+
args = tool_call["arguments"][:500] # Truncate args
|
|
182
|
+
self.span.set_attribute(f"llm.tool_call.{i}.arguments", args)
|
|
183
|
+
|
|
184
|
+
# Add streaming metadata
|
|
185
|
+
self.span.set_attribute("llm.streaming", True)
|
|
186
|
+
self.span.set_attribute("llm.stream.chunks", self.chunk_count)
|
|
187
|
+
|
|
188
|
+
except Exception as e:
|
|
189
|
+
logger.debug(f"Failed to finalize stream span: {e}")
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class _AsyncStreamWrapper:
|
|
193
|
+
"""Wrapper for asynchronous OpenAI streaming responses.
|
|
194
|
+
|
|
195
|
+
Similar to _StreamWrapper but for async/await code.
|
|
196
|
+
|
|
197
|
+
Example:
|
|
198
|
+
>>> stream = await client.chat.completions.create(..., stream=True)
|
|
199
|
+
>>> wrapped = _AsyncStreamWrapper(stream, span)
|
|
200
|
+
>>> async for chunk in wrapped:
|
|
201
|
+
... print(chunk.choices[0].delta.content)
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def __init__(self, stream: AsyncIterator, span: Optional[Any] = None):
|
|
205
|
+
"""Initialize async stream wrapper.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
stream: Original OpenAI async stream
|
|
209
|
+
span: OpenTelemetry span to annotate (optional)
|
|
210
|
+
"""
|
|
211
|
+
self.stream = stream
|
|
212
|
+
self.span = span
|
|
213
|
+
self.accumulated_content = []
|
|
214
|
+
# Tool calls accumulator: Dict[int, Dict] keyed by tool call index
|
|
215
|
+
# OpenAI streams tool calls as deltas that need to be merged by index
|
|
216
|
+
self.tool_calls_by_index: Dict[int, Dict[str, Any]] = {}
|
|
217
|
+
self.total_length = 0
|
|
218
|
+
self.chunk_count = 0
|
|
219
|
+
self._capture_content = CAPTURE_CONTENT
|
|
220
|
+
|
|
221
|
+
def __aiter__(self):
|
|
222
|
+
return self
|
|
223
|
+
|
|
224
|
+
async def __anext__(self):
|
|
225
|
+
"""Get next chunk from async stream and capture metadata."""
|
|
226
|
+
try:
|
|
227
|
+
chunk = await self.stream.__anext__()
|
|
228
|
+
self.chunk_count += 1
|
|
229
|
+
|
|
230
|
+
# Extract content if available
|
|
231
|
+
if hasattr(chunk, "choices") and len(chunk.choices) > 0:
|
|
232
|
+
choice = chunk.choices[0]
|
|
233
|
+
|
|
234
|
+
# Capture text content
|
|
235
|
+
if hasattr(choice, "delta") and hasattr(choice.delta, "content"):
|
|
236
|
+
content = choice.delta.content
|
|
237
|
+
if content and self._capture_content:
|
|
238
|
+
self.accumulated_content.append(content)
|
|
239
|
+
self.total_length += len(content)
|
|
240
|
+
|
|
241
|
+
# Stop accumulating if we exceed max length
|
|
242
|
+
if self.total_length > MAX_CONTENT_LENGTH:
|
|
243
|
+
self.accumulated_content.append(
|
|
244
|
+
f"... (truncated, total {self.total_length} chars)"
|
|
245
|
+
)
|
|
246
|
+
self._capture_content = False
|
|
247
|
+
|
|
248
|
+
# Capture tool calls - merge deltas by index
|
|
249
|
+
if hasattr(choice.delta, "tool_calls") and choice.delta.tool_calls:
|
|
250
|
+
for tool_call in choice.delta.tool_calls:
|
|
251
|
+
# Get the index for this tool call (OpenAI sends this)
|
|
252
|
+
idx = getattr(tool_call, "index", 0)
|
|
253
|
+
|
|
254
|
+
# Initialize if first delta for this index
|
|
255
|
+
if idx not in self.tool_calls_by_index:
|
|
256
|
+
self.tool_calls_by_index[idx] = {
|
|
257
|
+
"id": None,
|
|
258
|
+
"name": None,
|
|
259
|
+
"arguments": "",
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
acc = self.tool_calls_by_index[idx]
|
|
263
|
+
|
|
264
|
+
# Merge id (usually only in first delta)
|
|
265
|
+
if hasattr(tool_call, "id") and tool_call.id:
|
|
266
|
+
acc["id"] = tool_call.id
|
|
267
|
+
|
|
268
|
+
# Merge function name and arguments
|
|
269
|
+
if hasattr(tool_call, "function"):
|
|
270
|
+
func = tool_call.function
|
|
271
|
+
if hasattr(func, "name") and func.name:
|
|
272
|
+
acc["name"] = func.name
|
|
273
|
+
if hasattr(func, "arguments") and func.arguments:
|
|
274
|
+
# Arguments come as fragments, concatenate them
|
|
275
|
+
acc["arguments"] += func.arguments
|
|
276
|
+
|
|
277
|
+
return chunk
|
|
278
|
+
|
|
279
|
+
except StopAsyncIteration:
|
|
280
|
+
# Stream ended, finalize span
|
|
281
|
+
self._finalize_span()
|
|
282
|
+
raise
|
|
283
|
+
|
|
284
|
+
def _finalize_span(self):
|
|
285
|
+
"""Add accumulated data to span when stream completes."""
|
|
286
|
+
if not self.span:
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
# Add accumulated content
|
|
291
|
+
if self.accumulated_content:
|
|
292
|
+
full_content = "".join(self.accumulated_content)
|
|
293
|
+
if self._capture_content:
|
|
294
|
+
self.span.set_attribute("llm.response.content", full_content[:MAX_CONTENT_LENGTH])
|
|
295
|
+
self.span.set_attribute("llm.response.length", self.total_length)
|
|
296
|
+
|
|
297
|
+
# Add tool calls (merged by index)
|
|
298
|
+
if self.tool_calls_by_index:
|
|
299
|
+
# Sort by index for consistent ordering
|
|
300
|
+
sorted_tool_calls = [
|
|
301
|
+
self.tool_calls_by_index[idx]
|
|
302
|
+
for idx in sorted(self.tool_calls_by_index.keys())
|
|
303
|
+
]
|
|
304
|
+
self.span.set_attribute("llm.tool_calls.count", len(sorted_tool_calls))
|
|
305
|
+
for i, tool_call in enumerate(sorted_tool_calls[:10]): # Max 10
|
|
306
|
+
if tool_call.get("name"):
|
|
307
|
+
self.span.set_attribute(f"llm.tool_call.{i}.name", tool_call["name"])
|
|
308
|
+
if tool_call.get("id"):
|
|
309
|
+
self.span.set_attribute(f"llm.tool_call.{i}.id", tool_call["id"])
|
|
310
|
+
if tool_call.get("arguments") and self._capture_content:
|
|
311
|
+
args = tool_call["arguments"][:500] # Truncate args
|
|
312
|
+
self.span.set_attribute(f"llm.tool_call.{i}.arguments", args)
|
|
313
|
+
|
|
314
|
+
# Add streaming metadata
|
|
315
|
+
self.span.set_attribute("llm.streaming", True)
|
|
316
|
+
self.span.set_attribute("llm.stream.chunks", self.chunk_count)
|
|
317
|
+
|
|
318
|
+
except Exception as e:
|
|
319
|
+
logger.debug(f"Failed to finalize async stream span: {e}")
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _inject_agent_context(span: Any):
|
|
323
|
+
"""Inject current agent context into span attributes.
|
|
324
|
+
|
|
325
|
+
Reads from contextvars set by AgentContext and adds them to the span.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
span: OpenTelemetry span
|
|
329
|
+
"""
|
|
330
|
+
try:
|
|
331
|
+
from agentreplay.context import (
|
|
332
|
+
get_current_agent_id,
|
|
333
|
+
get_current_session_id,
|
|
334
|
+
get_current_workflow_id,
|
|
335
|
+
get_current_user_id,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
agent_id = get_current_agent_id()
|
|
339
|
+
if agent_id:
|
|
340
|
+
span.set_attribute("agentreplay.agent_id", agent_id)
|
|
341
|
+
|
|
342
|
+
session_id = get_current_session_id()
|
|
343
|
+
if session_id:
|
|
344
|
+
span.set_attribute("agentreplay.session_id", session_id)
|
|
345
|
+
|
|
346
|
+
workflow_id = get_current_workflow_id()
|
|
347
|
+
if workflow_id:
|
|
348
|
+
span.set_attribute("agentreplay.workflow_id", workflow_id)
|
|
349
|
+
|
|
350
|
+
user_id = get_current_user_id()
|
|
351
|
+
if user_id:
|
|
352
|
+
span.set_attribute("agentreplay.user_id", user_id)
|
|
353
|
+
|
|
354
|
+
except ImportError:
|
|
355
|
+
# Context module not available
|
|
356
|
+
pass
|
|
357
|
+
except Exception as e:
|
|
358
|
+
logger.debug(f"Failed to inject agent context: {e}")
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def extract_tool_calls(response: Any) -> List[Dict[str, Any]]:
|
|
362
|
+
"""Extract tool calls from OpenAI response.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
response: OpenAI chat completion response
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
List of tool call dictionaries with id, name, arguments
|
|
369
|
+
"""
|
|
370
|
+
tool_calls = []
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
if not hasattr(response, "choices") or not response.choices:
|
|
374
|
+
return tool_calls
|
|
375
|
+
|
|
376
|
+
choice = response.choices[0]
|
|
377
|
+
if not hasattr(choice, "message") or not hasattr(choice.message, "tool_calls"):
|
|
378
|
+
return tool_calls
|
|
379
|
+
|
|
380
|
+
if not choice.message.tool_calls:
|
|
381
|
+
return tool_calls
|
|
382
|
+
|
|
383
|
+
for tool_call in choice.message.tool_calls:
|
|
384
|
+
if hasattr(tool_call, "function"):
|
|
385
|
+
tool_calls.append({
|
|
386
|
+
"id": getattr(tool_call, "id", None),
|
|
387
|
+
"type": getattr(tool_call, "type", "function"),
|
|
388
|
+
"name": getattr(tool_call.function, "name", None),
|
|
389
|
+
"arguments": getattr(tool_call.function, "arguments", None),
|
|
390
|
+
})
|
|
391
|
+
|
|
392
|
+
except Exception as e:
|
|
393
|
+
logger.debug(f"Failed to extract tool calls: {e}")
|
|
394
|
+
|
|
395
|
+
return tool_calls
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def annotate_span_with_tool_calls(span: Any, tool_calls: List[Dict[str, Any]]):
|
|
399
|
+
"""Add tool call information to span attributes.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
span: OpenTelemetry span
|
|
403
|
+
tool_calls: List of tool call dictionaries
|
|
404
|
+
"""
|
|
405
|
+
if not tool_calls:
|
|
406
|
+
return
|
|
407
|
+
|
|
408
|
+
try:
|
|
409
|
+
span.set_attribute("llm.tool_calls.count", len(tool_calls))
|
|
410
|
+
|
|
411
|
+
for i, tool_call in enumerate(tool_calls[:10]): # Max 10 tool calls
|
|
412
|
+
prefix = f"llm.tool_call.{i}"
|
|
413
|
+
|
|
414
|
+
if tool_call.get("id"):
|
|
415
|
+
span.set_attribute(f"{prefix}.id", tool_call["id"])
|
|
416
|
+
|
|
417
|
+
if tool_call.get("name"):
|
|
418
|
+
span.set_attribute(f"{prefix}.name", tool_call["name"])
|
|
419
|
+
|
|
420
|
+
if tool_call.get("type"):
|
|
421
|
+
span.set_attribute(f"{prefix}.type", tool_call["type"])
|
|
422
|
+
|
|
423
|
+
if tool_call.get("arguments") and CAPTURE_CONTENT:
|
|
424
|
+
args = tool_call["arguments"]
|
|
425
|
+
if isinstance(args, str):
|
|
426
|
+
# Truncate if too long
|
|
427
|
+
args = args[:500]
|
|
428
|
+
span.set_attribute(f"{prefix}.arguments", args)
|
|
429
|
+
|
|
430
|
+
except Exception as e:
|
|
431
|
+
logger.debug(f"Failed to annotate span with tool calls: {e}")
|
agentreplay/batching.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# Copyright 2025 Sushanth (https://github.com/sushanthpy)
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Batching client for high-throughput trace ingestion."""
|
|
16
|
+
|
|
17
|
+
import threading
|
|
18
|
+
import time
|
|
19
|
+
from collections import deque
|
|
20
|
+
from typing import Deque, List, Optional
|
|
21
|
+
from agentreplay.client import AgentreplayClient
|
|
22
|
+
from agentreplay.models import AgentFlowEdge
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BatchingAgentreplayClient:
|
|
26
|
+
"""Client wrapper that batches spans for efficient ingestion.
|
|
27
|
+
|
|
28
|
+
Automatically buffers spans and flushes when batch size is reached
|
|
29
|
+
or flush interval expires, reducing HTTP overhead by 100x.
|
|
30
|
+
|
|
31
|
+
**CRITICAL FIX**: Now includes max_buffer_size to prevent OOM.
|
|
32
|
+
When buffer is full, oldest edges are dropped (sampling behavior).
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
client: Underlying AgentreplayClient
|
|
36
|
+
batch_size: Number of spans to buffer before flushing (default: 100)
|
|
37
|
+
flush_interval: Seconds between automatic flushes (default: 5.0)
|
|
38
|
+
max_buffer_size: Maximum buffer size before dropping edges (default: 10000)
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
>>> client = AgentreplayClient(url="http://localhost:8080", tenant_id=1)
|
|
42
|
+
>>> batching_client = BatchingAgentreplayClient(
|
|
43
|
+
... client,
|
|
44
|
+
... batch_size=100,
|
|
45
|
+
... max_buffer_size=10000 # Prevent OOM
|
|
46
|
+
... )
|
|
47
|
+
>>>
|
|
48
|
+
>>> # Spans are buffered
|
|
49
|
+
>>> for i in range(1000):
|
|
50
|
+
... edge = AgentFlowEdge(
|
|
51
|
+
... tenant_id=1,
|
|
52
|
+
... agent_id=1,
|
|
53
|
+
... session_id=42,
|
|
54
|
+
... span_type=SpanType.ROOT
|
|
55
|
+
... )
|
|
56
|
+
... batching_client.insert(edge) # Buffered, not sent immediately
|
|
57
|
+
...
|
|
58
|
+
>>> # Flush remaining spans
|
|
59
|
+
>>> batching_client.flush()
|
|
60
|
+
>>> batching_client.close()
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
client: AgentreplayClient,
|
|
66
|
+
batch_size: int = 100,
|
|
67
|
+
flush_interval: float = 5.0,
|
|
68
|
+
max_buffer_size: int = 10000,
|
|
69
|
+
):
|
|
70
|
+
"""Initialize batching client."""
|
|
71
|
+
self.client = client
|
|
72
|
+
self.batch_size = batch_size
|
|
73
|
+
self.flush_interval = flush_interval
|
|
74
|
+
self.max_buffer_size = max_buffer_size
|
|
75
|
+
self._buffer: List[AgentFlowEdge] = []
|
|
76
|
+
self._retry_queue: Deque[List[AgentFlowEdge]] = deque() # Failed batches awaiting retry
|
|
77
|
+
self._max_retry_batches = 10 # Limit retry queue to prevent unbounded growth
|
|
78
|
+
self._lock = threading.Lock()
|
|
79
|
+
self._running = True
|
|
80
|
+
self._dropped_count = 0 # Track dropped edges for monitoring
|
|
81
|
+
self._flush_thread = threading.Thread(target=self._auto_flush, daemon=True)
|
|
82
|
+
self._flush_thread.start()
|
|
83
|
+
|
|
84
|
+
def __enter__(self) -> "BatchingAgentreplayClient":
|
|
85
|
+
"""Context manager entry."""
|
|
86
|
+
return self
|
|
87
|
+
|
|
88
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
89
|
+
"""Context manager exit - flush and close."""
|
|
90
|
+
self.flush()
|
|
91
|
+
self.close()
|
|
92
|
+
|
|
93
|
+
def insert(self, edge: AgentFlowEdge) -> AgentFlowEdge:
|
|
94
|
+
"""Buffer a single edge for batched insertion.
|
|
95
|
+
|
|
96
|
+
**CRITICAL FIX**: Now enforces max_buffer_size to prevent OOM.
|
|
97
|
+
If buffer is full, drops oldest edges (FIFO sampling).
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
edge: Edge to buffer
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The same edge (for consistency with AgentreplayClient API)
|
|
104
|
+
"""
|
|
105
|
+
with self._lock:
|
|
106
|
+
# CRITICAL FIX: Enforce max buffer size to prevent OOM
|
|
107
|
+
if len(self._buffer) >= self.max_buffer_size:
|
|
108
|
+
# Drop oldest edge (FIFO sampling)
|
|
109
|
+
self._buffer.pop(0)
|
|
110
|
+
self._dropped_count += 1
|
|
111
|
+
|
|
112
|
+
# Log warning every 1000 drops
|
|
113
|
+
if self._dropped_count % 1000 == 0:
|
|
114
|
+
print(
|
|
115
|
+
f"WARNING: Dropped {self._dropped_count} edges due to full buffer. "
|
|
116
|
+
f"Backend may be slow or down. Consider increasing max_buffer_size "
|
|
117
|
+
f"or reducing ingestion rate."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
self._buffer.append(edge)
|
|
121
|
+
|
|
122
|
+
# Flush if batch size reached
|
|
123
|
+
if len(self._buffer) >= self.batch_size:
|
|
124
|
+
self._flush_unlocked()
|
|
125
|
+
|
|
126
|
+
return edge
|
|
127
|
+
|
|
128
|
+
def flush(self) -> int:
|
|
129
|
+
"""Manually flush all buffered spans.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Number of spans flushed
|
|
133
|
+
"""
|
|
134
|
+
with self._lock:
|
|
135
|
+
return self._flush_unlocked()
|
|
136
|
+
|
|
137
|
+
def _flush_unlocked(self) -> int:
|
|
138
|
+
"""Flush buffer without acquiring lock (caller must hold lock).
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Number of spans flushed
|
|
142
|
+
"""
|
|
143
|
+
if not self._buffer:
|
|
144
|
+
return 0
|
|
145
|
+
|
|
146
|
+
# Send entire batch in one HTTP request
|
|
147
|
+
try:
|
|
148
|
+
self.client.insert_batch(self._buffer)
|
|
149
|
+
count = len(self._buffer)
|
|
150
|
+
self._buffer = []
|
|
151
|
+
return count
|
|
152
|
+
except Exception as e:
|
|
153
|
+
# Log error but don't lose spans
|
|
154
|
+
print(f"Error flushing batch: {e}")
|
|
155
|
+
return 0
|
|
156
|
+
|
|
157
|
+
def _auto_flush(self) -> None:
|
|
158
|
+
"""Background thread that flushes buffer periodically.
|
|
159
|
+
|
|
160
|
+
CRITICAL FIX: Transactional buffer management - data is only removed
|
|
161
|
+
from buffer after successful network I/O. Failed batches are queued
|
|
162
|
+
for retry to prevent data loss.
|
|
163
|
+
"""
|
|
164
|
+
while self._running:
|
|
165
|
+
time.sleep(self.flush_interval)
|
|
166
|
+
if self._running: # Check again after sleep
|
|
167
|
+
# 1. First, try to send any previously failed batches
|
|
168
|
+
self._process_retry_queue()
|
|
169
|
+
|
|
170
|
+
# 2. Grab data to send (under lock) but DON'T clear buffer yet
|
|
171
|
+
batch_to_send = []
|
|
172
|
+
with self._lock:
|
|
173
|
+
if self._buffer:
|
|
174
|
+
batch_to_send = self._buffer[:] # Copy for sending
|
|
175
|
+
|
|
176
|
+
# 3. Send I/O outside the lock (won't block application threads)
|
|
177
|
+
if batch_to_send:
|
|
178
|
+
success = False
|
|
179
|
+
try:
|
|
180
|
+
self.client.insert_batch(batch_to_send)
|
|
181
|
+
success = True
|
|
182
|
+
except Exception as e:
|
|
183
|
+
# Log error but don't crash the thread
|
|
184
|
+
print(f"Error flushing batch: {e}")
|
|
185
|
+
|
|
186
|
+
# 4. ONLY clear buffer after confirmed success (transactional)
|
|
187
|
+
with self._lock:
|
|
188
|
+
if success:
|
|
189
|
+
# Remove only the items we successfully sent
|
|
190
|
+
# (new items may have been added during I/O)
|
|
191
|
+
self._buffer = self._buffer[len(batch_to_send):]
|
|
192
|
+
else:
|
|
193
|
+
# Failed: queue batch for retry, clear from main buffer
|
|
194
|
+
# to prevent duplicate sends
|
|
195
|
+
self._buffer = self._buffer[len(batch_to_send):]
|
|
196
|
+
self._queue_for_retry(batch_to_send)
|
|
197
|
+
|
|
198
|
+
def _process_retry_queue(self) -> None:
|
|
199
|
+
"""Process failed batches from retry queue."""
|
|
200
|
+
while self._retry_queue:
|
|
201
|
+
# Pop from front (FIFO)
|
|
202
|
+
with self._lock:
|
|
203
|
+
if not self._retry_queue:
|
|
204
|
+
break
|
|
205
|
+
batch = self._retry_queue.popleft()
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
self.client.insert_batch(batch)
|
|
209
|
+
# Success - batch is now sent, continue to next
|
|
210
|
+
except Exception as e:
|
|
211
|
+
# Still failing - re-queue at the back for later retry
|
|
212
|
+
print(f"Retry failed for batch of {len(batch)} edges: {e}")
|
|
213
|
+
with self._lock:
|
|
214
|
+
if len(self._retry_queue) < self._max_retry_batches:
|
|
215
|
+
self._retry_queue.append(batch)
|
|
216
|
+
else:
|
|
217
|
+
# Drop batch to prevent unbounded growth
|
|
218
|
+
self._dropped_count += len(batch)
|
|
219
|
+
print(f"WARNING: Dropped batch of {len(batch)} edges after max retries")
|
|
220
|
+
break # Stop processing retry queue on failure
|
|
221
|
+
|
|
222
|
+
def _queue_for_retry(self, batch: List[AgentFlowEdge]) -> None:
|
|
223
|
+
"""Add failed batch to retry queue (caller must NOT hold lock)."""
|
|
224
|
+
if len(self._retry_queue) < self._max_retry_batches:
|
|
225
|
+
self._retry_queue.append(batch)
|
|
226
|
+
else:
|
|
227
|
+
# Drop oldest retry batch to make room
|
|
228
|
+
dropped = self._retry_queue.popleft()
|
|
229
|
+
self._dropped_count += len(dropped)
|
|
230
|
+
self._retry_queue.append(batch)
|
|
231
|
+
print(f"WARNING: Dropped oldest retry batch ({len(dropped)} edges) to make room")
|
|
232
|
+
|
|
233
|
+
def close(self) -> None:
|
|
234
|
+
"""Stop auto-flush thread and flush remaining spans including retry queue."""
|
|
235
|
+
self._running = False
|
|
236
|
+
if self._flush_thread.is_alive():
|
|
237
|
+
self._flush_thread.join(timeout=self.flush_interval + 1.0)
|
|
238
|
+
|
|
239
|
+
# Flush main buffer
|
|
240
|
+
self.flush()
|
|
241
|
+
|
|
242
|
+
# Attempt to flush retry queue (best effort)
|
|
243
|
+
retry_attempts = 0
|
|
244
|
+
max_close_retries = 3
|
|
245
|
+
while self._retry_queue and retry_attempts < max_close_retries:
|
|
246
|
+
retry_attempts += 1
|
|
247
|
+
with self._lock:
|
|
248
|
+
if not self._retry_queue:
|
|
249
|
+
break
|
|
250
|
+
batch = self._retry_queue.popleft()
|
|
251
|
+
try:
|
|
252
|
+
self.client.insert_batch(batch)
|
|
253
|
+
except Exception as e:
|
|
254
|
+
print(f"Failed to flush retry queue on close (attempt {retry_attempts}): {e}")
|
|
255
|
+
# Re-queue for next attempt
|
|
256
|
+
with self._lock:
|
|
257
|
+
self._retry_queue.appendleft(batch)
|
|
258
|
+
break
|
|
259
|
+
|
|
260
|
+
# Report any remaining data that couldn't be sent
|
|
261
|
+
remaining = sum(len(b) for b in self._retry_queue)
|
|
262
|
+
if remaining > 0:
|
|
263
|
+
print(f"WARNING: {remaining} edges in retry queue could not be sent on close")
|
|
264
|
+
|
|
265
|
+
def __del__(self) -> None:
|
|
266
|
+
"""Destructor - ensure spans are flushed."""
|
|
267
|
+
try:
|
|
268
|
+
self.close()
|
|
269
|
+
except:
|
|
270
|
+
pass # Ignore errors during cleanup
|