agentreplay 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,431 @@
1
+ # Copyright 2025 Sushanth (https://github.com/sushanthpy)
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """OpenAI-specific instrumentation for streaming and tool calls.
16
+
17
+ This module provides custom wrappers for OpenAI API calls to:
18
+ 1. Handle streaming responses (sync and async)
19
+ 2. Capture tool calls and their results
20
+ 3. Inject agent context into spans
21
+ 4. Respect content capture settings
22
+
23
+ The wrappers are designed to work alongside the official OpenTelemetry
24
+ OpenAI instrumentation, adding Agentreplay-specific enhancements.
25
+ """
26
+
27
+ import logging
28
+ from typing import Iterator, AsyncIterator, Optional, Any, Dict, List
29
+ import json
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # Configuration from environment
34
+ import os
35
+ CAPTURE_CONTENT = os.getenv("AGENTREPLAY_CAPTURE_CONTENT", "true").lower() in {
36
+ "1", "true", "yes"
37
+ }
38
+ MAX_CONTENT_LENGTH = int(os.getenv("AGENTREPLAY_MAX_CONTENT_LENGTH", "10000"))
39
+
40
+
41
+ def is_streaming(response: Any) -> bool:
42
+ """Check if an OpenAI response is a stream.
43
+
44
+ Args:
45
+ response: OpenAI API response
46
+
47
+ Returns:
48
+ True if response is a stream, False otherwise
49
+ """
50
+ # Check for stream attribute or iterator protocol
51
+ if hasattr(response, "__iter__") and not isinstance(response, (str, bytes, dict)):
52
+ return True
53
+ if hasattr(response, "__aiter__"):
54
+ return True
55
+ return False
56
+
57
+
58
+ class _StreamWrapper:
59
+ """Wrapper for synchronous OpenAI streaming responses.
60
+
61
+ This wrapper:
62
+ - Yields chunks to the caller transparently
63
+ - Accumulates content for span attributes
64
+ - Handles tool calls in streaming mode
65
+ - Respects MAX_CONTENT_LENGTH
66
+
67
+ Example:
68
+ >>> stream = client.chat.completions.create(..., stream=True)
69
+ >>> wrapped = _StreamWrapper(stream, span)
70
+ >>> for chunk in wrapped:
71
+ ... print(chunk.choices[0].delta.content)
72
+ """
73
+
74
+ def __init__(self, stream: Iterator, span: Optional[Any] = None):
75
+ """Initialize stream wrapper.
76
+
77
+ Args:
78
+ stream: Original OpenAI stream
79
+ span: OpenTelemetry span to annotate (optional)
80
+ """
81
+ self.stream = stream
82
+ self.span = span
83
+ self.accumulated_content = []
84
+ # Tool calls accumulator: Dict[int, Dict] keyed by tool call index
85
+ # OpenAI streams tool calls as deltas that need to be merged by index
86
+ self.tool_calls_by_index: Dict[int, Dict[str, Any]] = {}
87
+ self.total_length = 0
88
+ self.chunk_count = 0
89
+ self._capture_content = CAPTURE_CONTENT
90
+
91
+ def __iter__(self):
92
+ return self
93
+
94
+ def __next__(self):
95
+ """Get next chunk from stream and capture metadata."""
96
+ try:
97
+ chunk = next(self.stream)
98
+ self.chunk_count += 1
99
+
100
+ # Extract content if available
101
+ if hasattr(chunk, "choices") and len(chunk.choices) > 0:
102
+ choice = chunk.choices[0]
103
+
104
+ # Capture text content
105
+ if hasattr(choice, "delta") and hasattr(choice.delta, "content"):
106
+ content = choice.delta.content
107
+ if content and self._capture_content:
108
+ self.accumulated_content.append(content)
109
+ self.total_length += len(content)
110
+
111
+ # Stop accumulating if we exceed max length
112
+ if self.total_length > MAX_CONTENT_LENGTH:
113
+ self.accumulated_content.append(
114
+ f"... (truncated, total {self.total_length} chars)"
115
+ )
116
+ self._capture_content = False # Disable for rest of stream
117
+
118
+ # Capture tool calls - merge deltas by index
119
+ if hasattr(choice.delta, "tool_calls") and choice.delta.tool_calls:
120
+ for tool_call in choice.delta.tool_calls:
121
+ # Get the index for this tool call (OpenAI sends this)
122
+ idx = getattr(tool_call, "index", 0)
123
+
124
+ # Initialize if first delta for this index
125
+ if idx not in self.tool_calls_by_index:
126
+ self.tool_calls_by_index[idx] = {
127
+ "id": None,
128
+ "name": None,
129
+ "arguments": "",
130
+ }
131
+
132
+ acc = self.tool_calls_by_index[idx]
133
+
134
+ # Merge id (usually only in first delta)
135
+ if hasattr(tool_call, "id") and tool_call.id:
136
+ acc["id"] = tool_call.id
137
+
138
+ # Merge function name and arguments
139
+ if hasattr(tool_call, "function"):
140
+ func = tool_call.function
141
+ if hasattr(func, "name") and func.name:
142
+ acc["name"] = func.name
143
+ if hasattr(func, "arguments") and func.arguments:
144
+ # Arguments come as fragments, concatenate them
145
+ acc["arguments"] += func.arguments
146
+
147
+ return chunk
148
+
149
+ except StopIteration:
150
+ # Stream ended, finalize span
151
+ self._finalize_span()
152
+ raise
153
+
154
+ def _finalize_span(self):
155
+ """Add accumulated data to span when stream completes."""
156
+ if not self.span:
157
+ return
158
+
159
+ try:
160
+ # Add accumulated content
161
+ if self.accumulated_content:
162
+ full_content = "".join(self.accumulated_content)
163
+ if self._capture_content:
164
+ self.span.set_attribute("llm.response.content", full_content[:MAX_CONTENT_LENGTH])
165
+ self.span.set_attribute("llm.response.length", self.total_length)
166
+
167
+ # Add tool calls (merged by index)
168
+ if self.tool_calls_by_index:
169
+ # Sort by index for consistent ordering
170
+ sorted_tool_calls = [
171
+ self.tool_calls_by_index[idx]
172
+ for idx in sorted(self.tool_calls_by_index.keys())
173
+ ]
174
+ self.span.set_attribute("llm.tool_calls.count", len(sorted_tool_calls))
175
+ for i, tool_call in enumerate(sorted_tool_calls[:10]): # Max 10
176
+ if tool_call.get("name"):
177
+ self.span.set_attribute(f"llm.tool_call.{i}.name", tool_call["name"])
178
+ if tool_call.get("id"):
179
+ self.span.set_attribute(f"llm.tool_call.{i}.id", tool_call["id"])
180
+ if tool_call.get("arguments") and self._capture_content:
181
+ args = tool_call["arguments"][:500] # Truncate args
182
+ self.span.set_attribute(f"llm.tool_call.{i}.arguments", args)
183
+
184
+ # Add streaming metadata
185
+ self.span.set_attribute("llm.streaming", True)
186
+ self.span.set_attribute("llm.stream.chunks", self.chunk_count)
187
+
188
+ except Exception as e:
189
+ logger.debug(f"Failed to finalize stream span: {e}")
190
+
191
+
192
+ class _AsyncStreamWrapper:
193
+ """Wrapper for asynchronous OpenAI streaming responses.
194
+
195
+ Similar to _StreamWrapper but for async/await code.
196
+
197
+ Example:
198
+ >>> stream = await client.chat.completions.create(..., stream=True)
199
+ >>> wrapped = _AsyncStreamWrapper(stream, span)
200
+ >>> async for chunk in wrapped:
201
+ ... print(chunk.choices[0].delta.content)
202
+ """
203
+
204
+ def __init__(self, stream: AsyncIterator, span: Optional[Any] = None):
205
+ """Initialize async stream wrapper.
206
+
207
+ Args:
208
+ stream: Original OpenAI async stream
209
+ span: OpenTelemetry span to annotate (optional)
210
+ """
211
+ self.stream = stream
212
+ self.span = span
213
+ self.accumulated_content = []
214
+ # Tool calls accumulator: Dict[int, Dict] keyed by tool call index
215
+ # OpenAI streams tool calls as deltas that need to be merged by index
216
+ self.tool_calls_by_index: Dict[int, Dict[str, Any]] = {}
217
+ self.total_length = 0
218
+ self.chunk_count = 0
219
+ self._capture_content = CAPTURE_CONTENT
220
+
221
+ def __aiter__(self):
222
+ return self
223
+
224
+ async def __anext__(self):
225
+ """Get next chunk from async stream and capture metadata."""
226
+ try:
227
+ chunk = await self.stream.__anext__()
228
+ self.chunk_count += 1
229
+
230
+ # Extract content if available
231
+ if hasattr(chunk, "choices") and len(chunk.choices) > 0:
232
+ choice = chunk.choices[0]
233
+
234
+ # Capture text content
235
+ if hasattr(choice, "delta") and hasattr(choice.delta, "content"):
236
+ content = choice.delta.content
237
+ if content and self._capture_content:
238
+ self.accumulated_content.append(content)
239
+ self.total_length += len(content)
240
+
241
+ # Stop accumulating if we exceed max length
242
+ if self.total_length > MAX_CONTENT_LENGTH:
243
+ self.accumulated_content.append(
244
+ f"... (truncated, total {self.total_length} chars)"
245
+ )
246
+ self._capture_content = False
247
+
248
+ # Capture tool calls - merge deltas by index
249
+ if hasattr(choice.delta, "tool_calls") and choice.delta.tool_calls:
250
+ for tool_call in choice.delta.tool_calls:
251
+ # Get the index for this tool call (OpenAI sends this)
252
+ idx = getattr(tool_call, "index", 0)
253
+
254
+ # Initialize if first delta for this index
255
+ if idx not in self.tool_calls_by_index:
256
+ self.tool_calls_by_index[idx] = {
257
+ "id": None,
258
+ "name": None,
259
+ "arguments": "",
260
+ }
261
+
262
+ acc = self.tool_calls_by_index[idx]
263
+
264
+ # Merge id (usually only in first delta)
265
+ if hasattr(tool_call, "id") and tool_call.id:
266
+ acc["id"] = tool_call.id
267
+
268
+ # Merge function name and arguments
269
+ if hasattr(tool_call, "function"):
270
+ func = tool_call.function
271
+ if hasattr(func, "name") and func.name:
272
+ acc["name"] = func.name
273
+ if hasattr(func, "arguments") and func.arguments:
274
+ # Arguments come as fragments, concatenate them
275
+ acc["arguments"] += func.arguments
276
+
277
+ return chunk
278
+
279
+ except StopAsyncIteration:
280
+ # Stream ended, finalize span
281
+ self._finalize_span()
282
+ raise
283
+
284
+ def _finalize_span(self):
285
+ """Add accumulated data to span when stream completes."""
286
+ if not self.span:
287
+ return
288
+
289
+ try:
290
+ # Add accumulated content
291
+ if self.accumulated_content:
292
+ full_content = "".join(self.accumulated_content)
293
+ if self._capture_content:
294
+ self.span.set_attribute("llm.response.content", full_content[:MAX_CONTENT_LENGTH])
295
+ self.span.set_attribute("llm.response.length", self.total_length)
296
+
297
+ # Add tool calls (merged by index)
298
+ if self.tool_calls_by_index:
299
+ # Sort by index for consistent ordering
300
+ sorted_tool_calls = [
301
+ self.tool_calls_by_index[idx]
302
+ for idx in sorted(self.tool_calls_by_index.keys())
303
+ ]
304
+ self.span.set_attribute("llm.tool_calls.count", len(sorted_tool_calls))
305
+ for i, tool_call in enumerate(sorted_tool_calls[:10]): # Max 10
306
+ if tool_call.get("name"):
307
+ self.span.set_attribute(f"llm.tool_call.{i}.name", tool_call["name"])
308
+ if tool_call.get("id"):
309
+ self.span.set_attribute(f"llm.tool_call.{i}.id", tool_call["id"])
310
+ if tool_call.get("arguments") and self._capture_content:
311
+ args = tool_call["arguments"][:500] # Truncate args
312
+ self.span.set_attribute(f"llm.tool_call.{i}.arguments", args)
313
+
314
+ # Add streaming metadata
315
+ self.span.set_attribute("llm.streaming", True)
316
+ self.span.set_attribute("llm.stream.chunks", self.chunk_count)
317
+
318
+ except Exception as e:
319
+ logger.debug(f"Failed to finalize async stream span: {e}")
320
+
321
+
322
+ def _inject_agent_context(span: Any):
323
+ """Inject current agent context into span attributes.
324
+
325
+ Reads from contextvars set by AgentContext and adds them to the span.
326
+
327
+ Args:
328
+ span: OpenTelemetry span
329
+ """
330
+ try:
331
+ from agentreplay.context import (
332
+ get_current_agent_id,
333
+ get_current_session_id,
334
+ get_current_workflow_id,
335
+ get_current_user_id,
336
+ )
337
+
338
+ agent_id = get_current_agent_id()
339
+ if agent_id:
340
+ span.set_attribute("agentreplay.agent_id", agent_id)
341
+
342
+ session_id = get_current_session_id()
343
+ if session_id:
344
+ span.set_attribute("agentreplay.session_id", session_id)
345
+
346
+ workflow_id = get_current_workflow_id()
347
+ if workflow_id:
348
+ span.set_attribute("agentreplay.workflow_id", workflow_id)
349
+
350
+ user_id = get_current_user_id()
351
+ if user_id:
352
+ span.set_attribute("agentreplay.user_id", user_id)
353
+
354
+ except ImportError:
355
+ # Context module not available
356
+ pass
357
+ except Exception as e:
358
+ logger.debug(f"Failed to inject agent context: {e}")
359
+
360
+
361
+ def extract_tool_calls(response: Any) -> List[Dict[str, Any]]:
362
+ """Extract tool calls from OpenAI response.
363
+
364
+ Args:
365
+ response: OpenAI chat completion response
366
+
367
+ Returns:
368
+ List of tool call dictionaries with id, name, arguments
369
+ """
370
+ tool_calls = []
371
+
372
+ try:
373
+ if not hasattr(response, "choices") or not response.choices:
374
+ return tool_calls
375
+
376
+ choice = response.choices[0]
377
+ if not hasattr(choice, "message") or not hasattr(choice.message, "tool_calls"):
378
+ return tool_calls
379
+
380
+ if not choice.message.tool_calls:
381
+ return tool_calls
382
+
383
+ for tool_call in choice.message.tool_calls:
384
+ if hasattr(tool_call, "function"):
385
+ tool_calls.append({
386
+ "id": getattr(tool_call, "id", None),
387
+ "type": getattr(tool_call, "type", "function"),
388
+ "name": getattr(tool_call.function, "name", None),
389
+ "arguments": getattr(tool_call.function, "arguments", None),
390
+ })
391
+
392
+ except Exception as e:
393
+ logger.debug(f"Failed to extract tool calls: {e}")
394
+
395
+ return tool_calls
396
+
397
+
398
+ def annotate_span_with_tool_calls(span: Any, tool_calls: List[Dict[str, Any]]):
399
+ """Add tool call information to span attributes.
400
+
401
+ Args:
402
+ span: OpenTelemetry span
403
+ tool_calls: List of tool call dictionaries
404
+ """
405
+ if not tool_calls:
406
+ return
407
+
408
+ try:
409
+ span.set_attribute("llm.tool_calls.count", len(tool_calls))
410
+
411
+ for i, tool_call in enumerate(tool_calls[:10]): # Max 10 tool calls
412
+ prefix = f"llm.tool_call.{i}"
413
+
414
+ if tool_call.get("id"):
415
+ span.set_attribute(f"{prefix}.id", tool_call["id"])
416
+
417
+ if tool_call.get("name"):
418
+ span.set_attribute(f"{prefix}.name", tool_call["name"])
419
+
420
+ if tool_call.get("type"):
421
+ span.set_attribute(f"{prefix}.type", tool_call["type"])
422
+
423
+ if tool_call.get("arguments") and CAPTURE_CONTENT:
424
+ args = tool_call["arguments"]
425
+ if isinstance(args, str):
426
+ # Truncate if too long
427
+ args = args[:500]
428
+ span.set_attribute(f"{prefix}.arguments", args)
429
+
430
+ except Exception as e:
431
+ logger.debug(f"Failed to annotate span with tool calls: {e}")
@@ -0,0 +1,270 @@
1
+ # Copyright 2025 Sushanth (https://github.com/sushanthpy)
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Batching client for high-throughput trace ingestion."""
16
+
17
+ import threading
18
+ import time
19
+ from collections import deque
20
+ from typing import Deque, List, Optional
21
+ from agentreplay.client import AgentreplayClient
22
+ from agentreplay.models import AgentFlowEdge
23
+
24
+
25
+ class BatchingAgentreplayClient:
26
+ """Client wrapper that batches spans for efficient ingestion.
27
+
28
+ Automatically buffers spans and flushes when batch size is reached
29
+ or flush interval expires, reducing HTTP overhead by 100x.
30
+
31
+ **CRITICAL FIX**: Now includes max_buffer_size to prevent OOM.
32
+ When buffer is full, oldest edges are dropped (sampling behavior).
33
+
34
+ Args:
35
+ client: Underlying AgentreplayClient
36
+ batch_size: Number of spans to buffer before flushing (default: 100)
37
+ flush_interval: Seconds between automatic flushes (default: 5.0)
38
+ max_buffer_size: Maximum buffer size before dropping edges (default: 10000)
39
+
40
+ Example:
41
+ >>> client = AgentreplayClient(url="http://localhost:8080", tenant_id=1)
42
+ >>> batching_client = BatchingAgentreplayClient(
43
+ ... client,
44
+ ... batch_size=100,
45
+ ... max_buffer_size=10000 # Prevent OOM
46
+ ... )
47
+ >>>
48
+ >>> # Spans are buffered
49
+ >>> for i in range(1000):
50
+ ... edge = AgentFlowEdge(
51
+ ... tenant_id=1,
52
+ ... agent_id=1,
53
+ ... session_id=42,
54
+ ... span_type=SpanType.ROOT
55
+ ... )
56
+ ... batching_client.insert(edge) # Buffered, not sent immediately
57
+ ...
58
+ >>> # Flush remaining spans
59
+ >>> batching_client.flush()
60
+ >>> batching_client.close()
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ client: AgentreplayClient,
66
+ batch_size: int = 100,
67
+ flush_interval: float = 5.0,
68
+ max_buffer_size: int = 10000,
69
+ ):
70
+ """Initialize batching client."""
71
+ self.client = client
72
+ self.batch_size = batch_size
73
+ self.flush_interval = flush_interval
74
+ self.max_buffer_size = max_buffer_size
75
+ self._buffer: List[AgentFlowEdge] = []
76
+ self._retry_queue: Deque[List[AgentFlowEdge]] = deque() # Failed batches awaiting retry
77
+ self._max_retry_batches = 10 # Limit retry queue to prevent unbounded growth
78
+ self._lock = threading.Lock()
79
+ self._running = True
80
+ self._dropped_count = 0 # Track dropped edges for monitoring
81
+ self._flush_thread = threading.Thread(target=self._auto_flush, daemon=True)
82
+ self._flush_thread.start()
83
+
84
+ def __enter__(self) -> "BatchingAgentreplayClient":
85
+ """Context manager entry."""
86
+ return self
87
+
88
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
89
+ """Context manager exit - flush and close."""
90
+ self.flush()
91
+ self.close()
92
+
93
+ def insert(self, edge: AgentFlowEdge) -> AgentFlowEdge:
94
+ """Buffer a single edge for batched insertion.
95
+
96
+ **CRITICAL FIX**: Now enforces max_buffer_size to prevent OOM.
97
+ If buffer is full, drops oldest edges (FIFO sampling).
98
+
99
+ Args:
100
+ edge: Edge to buffer
101
+
102
+ Returns:
103
+ The same edge (for consistency with AgentreplayClient API)
104
+ """
105
+ with self._lock:
106
+ # CRITICAL FIX: Enforce max buffer size to prevent OOM
107
+ if len(self._buffer) >= self.max_buffer_size:
108
+ # Drop oldest edge (FIFO sampling)
109
+ self._buffer.pop(0)
110
+ self._dropped_count += 1
111
+
112
+ # Log warning every 1000 drops
113
+ if self._dropped_count % 1000 == 0:
114
+ print(
115
+ f"WARNING: Dropped {self._dropped_count} edges due to full buffer. "
116
+ f"Backend may be slow or down. Consider increasing max_buffer_size "
117
+ f"or reducing ingestion rate."
118
+ )
119
+
120
+ self._buffer.append(edge)
121
+
122
+ # Flush if batch size reached
123
+ if len(self._buffer) >= self.batch_size:
124
+ self._flush_unlocked()
125
+
126
+ return edge
127
+
128
+ def flush(self) -> int:
129
+ """Manually flush all buffered spans.
130
+
131
+ Returns:
132
+ Number of spans flushed
133
+ """
134
+ with self._lock:
135
+ return self._flush_unlocked()
136
+
137
+ def _flush_unlocked(self) -> int:
138
+ """Flush buffer without acquiring lock (caller must hold lock).
139
+
140
+ Returns:
141
+ Number of spans flushed
142
+ """
143
+ if not self._buffer:
144
+ return 0
145
+
146
+ # Send entire batch in one HTTP request
147
+ try:
148
+ self.client.insert_batch(self._buffer)
149
+ count = len(self._buffer)
150
+ self._buffer = []
151
+ return count
152
+ except Exception as e:
153
+ # Log error but don't lose spans
154
+ print(f"Error flushing batch: {e}")
155
+ return 0
156
+
157
+ def _auto_flush(self) -> None:
158
+ """Background thread that flushes buffer periodically.
159
+
160
+ CRITICAL FIX: Transactional buffer management - data is only removed
161
+ from buffer after successful network I/O. Failed batches are queued
162
+ for retry to prevent data loss.
163
+ """
164
+ while self._running:
165
+ time.sleep(self.flush_interval)
166
+ if self._running: # Check again after sleep
167
+ # 1. First, try to send any previously failed batches
168
+ self._process_retry_queue()
169
+
170
+ # 2. Grab data to send (under lock) but DON'T clear buffer yet
171
+ batch_to_send = []
172
+ with self._lock:
173
+ if self._buffer:
174
+ batch_to_send = self._buffer[:] # Copy for sending
175
+
176
+ # 3. Send I/O outside the lock (won't block application threads)
177
+ if batch_to_send:
178
+ success = False
179
+ try:
180
+ self.client.insert_batch(batch_to_send)
181
+ success = True
182
+ except Exception as e:
183
+ # Log error but don't crash the thread
184
+ print(f"Error flushing batch: {e}")
185
+
186
+ # 4. ONLY clear buffer after confirmed success (transactional)
187
+ with self._lock:
188
+ if success:
189
+ # Remove only the items we successfully sent
190
+ # (new items may have been added during I/O)
191
+ self._buffer = self._buffer[len(batch_to_send):]
192
+ else:
193
+ # Failed: queue batch for retry, clear from main buffer
194
+ # to prevent duplicate sends
195
+ self._buffer = self._buffer[len(batch_to_send):]
196
+ self._queue_for_retry(batch_to_send)
197
+
198
+ def _process_retry_queue(self) -> None:
199
+ """Process failed batches from retry queue."""
200
+ while self._retry_queue:
201
+ # Pop from front (FIFO)
202
+ with self._lock:
203
+ if not self._retry_queue:
204
+ break
205
+ batch = self._retry_queue.popleft()
206
+
207
+ try:
208
+ self.client.insert_batch(batch)
209
+ # Success - batch is now sent, continue to next
210
+ except Exception as e:
211
+ # Still failing - re-queue at the back for later retry
212
+ print(f"Retry failed for batch of {len(batch)} edges: {e}")
213
+ with self._lock:
214
+ if len(self._retry_queue) < self._max_retry_batches:
215
+ self._retry_queue.append(batch)
216
+ else:
217
+ # Drop batch to prevent unbounded growth
218
+ self._dropped_count += len(batch)
219
+ print(f"WARNING: Dropped batch of {len(batch)} edges after max retries")
220
+ break # Stop processing retry queue on failure
221
+
222
+ def _queue_for_retry(self, batch: List[AgentFlowEdge]) -> None:
223
+ """Add failed batch to retry queue (caller must NOT hold lock)."""
224
+ if len(self._retry_queue) < self._max_retry_batches:
225
+ self._retry_queue.append(batch)
226
+ else:
227
+ # Drop oldest retry batch to make room
228
+ dropped = self._retry_queue.popleft()
229
+ self._dropped_count += len(dropped)
230
+ self._retry_queue.append(batch)
231
+ print(f"WARNING: Dropped oldest retry batch ({len(dropped)} edges) to make room")
232
+
233
+ def close(self) -> None:
234
+ """Stop auto-flush thread and flush remaining spans including retry queue."""
235
+ self._running = False
236
+ if self._flush_thread.is_alive():
237
+ self._flush_thread.join(timeout=self.flush_interval + 1.0)
238
+
239
+ # Flush main buffer
240
+ self.flush()
241
+
242
+ # Attempt to flush retry queue (best effort)
243
+ retry_attempts = 0
244
+ max_close_retries = 3
245
+ while self._retry_queue and retry_attempts < max_close_retries:
246
+ retry_attempts += 1
247
+ with self._lock:
248
+ if not self._retry_queue:
249
+ break
250
+ batch = self._retry_queue.popleft()
251
+ try:
252
+ self.client.insert_batch(batch)
253
+ except Exception as e:
254
+ print(f"Failed to flush retry queue on close (attempt {retry_attempts}): {e}")
255
+ # Re-queue for next attempt
256
+ with self._lock:
257
+ self._retry_queue.appendleft(batch)
258
+ break
259
+
260
+ # Report any remaining data that couldn't be sent
261
+ remaining = sum(len(b) for b in self._retry_queue)
262
+ if remaining > 0:
263
+ print(f"WARNING: {remaining} edges in retry queue could not be sent on close")
264
+
265
+ def __del__(self) -> None:
266
+ """Destructor - ensure spans are flushed."""
267
+ try:
268
+ self.close()
269
+ except:
270
+ pass # Ignore errors during cleanup